From 84adefa331c4159d432d22840663c38f155cd4c1 Mon Sep 17 00:00:00 2001 From: Erlang/OTP Date: Fri, 20 Nov 2009 14:54:40 +0000 Subject: The R13B03 release. --- lib/hipe/Makefile | 63 + lib/hipe/TODO | 130 + lib/hipe/amd64/Makefile | 127 + lib/hipe/amd64/hipe_amd64_assemble.erl | 19 + lib/hipe/amd64/hipe_amd64_defuse.erl | 20 + lib/hipe/amd64/hipe_amd64_encode.erl | 1484 ++++++ lib/hipe/amd64/hipe_amd64_frame.erl | 20 + lib/hipe/amd64/hipe_amd64_liveness.erl | 20 + lib/hipe/amd64/hipe_amd64_main.erl | 20 + lib/hipe/amd64/hipe_amd64_pp.erl | 20 + lib/hipe/amd64/hipe_amd64_ra.erl | 20 + lib/hipe/amd64/hipe_amd64_ra_finalise.erl | 20 + lib/hipe/amd64/hipe_amd64_ra_ls.erl | 20 + lib/hipe/amd64/hipe_amd64_ra_naive.erl | 20 + lib/hipe/amd64/hipe_amd64_ra_postconditions.erl | 20 + .../amd64/hipe_amd64_ra_sse2_postconditions.erl | 188 + lib/hipe/amd64/hipe_amd64_ra_x87_ls.erl | 20 + lib/hipe/amd64/hipe_amd64_registers.erl | 288 ++ lib/hipe/amd64/hipe_amd64_spill_restore.erl | 20 + lib/hipe/amd64/hipe_amd64_x87.erl | 20 + lib/hipe/amd64/hipe_rtl_to_amd64.erl | 20 + lib/hipe/arm/Makefile | 116 + lib/hipe/arm/TODO | 20 + lib/hipe/arm/hipe_arm.erl | 380 ++ lib/hipe/arm/hipe_arm.hrl | 124 + lib/hipe/arm/hipe_arm_assemble.erl | 665 +++ lib/hipe/arm/hipe_arm_cfg.erl | 131 + lib/hipe/arm/hipe_arm_defuse.erl | 157 + lib/hipe/arm/hipe_arm_encode.erl | 994 ++++ lib/hipe/arm/hipe_arm_finalise.erl | 73 + lib/hipe/arm/hipe_arm_frame.erl | 639 +++ lib/hipe/arm/hipe_arm_liveness_gpr.erl | 38 + lib/hipe/arm/hipe_arm_main.erl | 58 + lib/hipe/arm/hipe_arm_pp.erl | 351 ++ lib/hipe/arm/hipe_arm_ra.erl | 56 + lib/hipe/arm/hipe_arm_ra_finalise.erl | 285 ++ lib/hipe/arm/hipe_arm_ra_ls.erl | 56 + lib/hipe/arm/hipe_arm_ra_naive.erl | 29 + lib/hipe/arm/hipe_arm_ra_postconditions.erl | 278 ++ lib/hipe/arm/hipe_arm_registers.erl | 207 + lib/hipe/arm/hipe_rtl_to_arm.erl | 836 ++++ lib/hipe/cerl/Makefile | 107 + lib/hipe/cerl/cerl_cconv.erl | 777 +++ lib/hipe/cerl/cerl_closurean.erl | 862 ++++ lib/hipe/cerl/cerl_hipe_primops.hrl | 88 + lib/hipe/cerl/cerl_hipeify.erl | 655 +++ lib/hipe/cerl/cerl_hybrid_transform.erl | 153 + lib/hipe/cerl/cerl_lib.erl | 462 ++ lib/hipe/cerl/cerl_messagean.erl | 1105 +++++ lib/hipe/cerl/cerl_pmatch.erl | 624 +++ lib/hipe/cerl/cerl_prettypr.erl | 883 ++++ lib/hipe/cerl/cerl_to_icode.erl | 2717 +++++++++++ lib/hipe/cerl/cerl_typean.erl | 1003 ++++ lib/hipe/cerl/erl_bif_types.erl | 5021 ++++++++++++++++++++ lib/hipe/cerl/erl_types.erl | 3847 +++++++++++++++ lib/hipe/doc/Makefile | 29 + lib/hipe/doc/html/.gitignore | 0 lib/hipe/doc/overview.edoc | 9 + lib/hipe/doc/pdf/.gitignore | 0 lib/hipe/doc/src/Makefile | 113 + lib/hipe/doc/src/book.xml | 38 + lib/hipe/doc/src/fascicules.xml | 12 + lib/hipe/doc/src/make.dep | 13 + lib/hipe/doc/src/notes.xml | 350 ++ lib/hipe/doc/src/part_notes.xml | 35 + lib/hipe/ebin/.gitignore | 0 lib/hipe/flow/Makefile | 105 + lib/hipe/flow/cfg.hrl | 53 + lib/hipe/flow/cfg.inc | 949 ++++ lib/hipe/flow/ebb.inc | 247 + lib/hipe/flow/hipe_bb.erl | 81 + lib/hipe/flow/hipe_bb.hrl | 30 + lib/hipe/flow/hipe_dominators.erl | 715 +++ lib/hipe/flow/hipe_gen_cfg.erl | 37 + lib/hipe/flow/liveness.inc | 332 ++ lib/hipe/icode/Makefile | 144 + lib/hipe/icode/hipe_beam_to_icode.erl | 2326 +++++++++ lib/hipe/icode/hipe_icode.erl | 1820 +++++++ lib/hipe/icode/hipe_icode.hrl | 188 + lib/hipe/icode/hipe_icode_bincomp.erl | 178 + lib/hipe/icode/hipe_icode_callgraph.erl | 217 + lib/hipe/icode/hipe_icode_cfg.erl | 203 + lib/hipe/icode/hipe_icode_coordinator.erl | 274 ++ lib/hipe/icode/hipe_icode_ebb.erl | 30 + lib/hipe/icode/hipe_icode_exceptions.erl | 474 ++ lib/hipe/icode/hipe_icode_fp.erl | 1043 ++++ lib/hipe/icode/hipe_icode_heap_test.erl | 200 + lib/hipe/icode/hipe_icode_inline_bifs.erl | 240 + lib/hipe/icode/hipe_icode_instruction_counter.erl | 135 + lib/hipe/icode/hipe_icode_liveness.erl | 101 + lib/hipe/icode/hipe_icode_mulret.erl | 1323 ++++++ lib/hipe/icode/hipe_icode_pp.erl | 303 ++ lib/hipe/icode/hipe_icode_primops.erl | 963 ++++ lib/hipe/icode/hipe_icode_primops.hrl | 40 + lib/hipe/icode/hipe_icode_range.erl | 1966 ++++++++ lib/hipe/icode/hipe_icode_split_arith.erl | 553 +++ lib/hipe/icode/hipe_icode_ssa.erl | 98 + lib/hipe/icode/hipe_icode_ssa_const_prop.erl | 728 +++ lib/hipe/icode/hipe_icode_ssa_copy_prop.erl | 41 + lib/hipe/icode/hipe_icode_ssa_struct_reuse.erl | 1444 ++++++ lib/hipe/icode/hipe_icode_type.erl | 2266 +++++++++ lib/hipe/icode/hipe_icode_type.hrl | 25 + lib/hipe/info | 2 + lib/hipe/main/Makefile | 117 + lib/hipe/main/hipe.app.src | 222 + lib/hipe/main/hipe.appup.src | 19 + lib/hipe/main/hipe.erl | 1555 ++++++ lib/hipe/main/hipe.hrl.src | 322 ++ lib/hipe/main/hipe_main.erl | 549 +++ lib/hipe/misc/Makefile | 113 + lib/hipe/misc/hipe_consttab.erl | 503 ++ lib/hipe/misc/hipe_consttab.hrl | 27 + lib/hipe/misc/hipe_data_pp.erl | 158 + lib/hipe/misc/hipe_gensym.erl | 244 + lib/hipe/misc/hipe_pack_constants.erl | 211 + lib/hipe/misc/hipe_sdi.erl | 378 ++ lib/hipe/misc/hipe_sdi.hrl | 25 + lib/hipe/native.mk | 5 + lib/hipe/opt/Makefile | 101 + lib/hipe/opt/hipe_schedule.erl | 1489 ++++++ lib/hipe/opt/hipe_schedule_prio.erl | 58 + lib/hipe/opt/hipe_spillmin.erl | 111 + lib/hipe/opt/hipe_spillmin_color.erl | 556 +++ lib/hipe/opt/hipe_spillmin_scan.erl | 559 +++ lib/hipe/opt/hipe_target_machine.erl | 93 + lib/hipe/opt/hipe_ultra_mod2.erl | 239 + lib/hipe/opt/hipe_ultra_prio.erl | 304 ++ lib/hipe/ppc/Makefile | 120 + lib/hipe/ppc/hipe_ppc.erl | 415 ++ lib/hipe/ppc/hipe_ppc.hrl | 118 + lib/hipe/ppc/hipe_ppc_assemble.erl | 603 +++ lib/hipe/ppc/hipe_ppc_cfg.erl | 131 + lib/hipe/ppc/hipe_ppc_defuse.erl | 145 + lib/hipe/ppc/hipe_ppc_encode.erl | 1558 ++++++ lib/hipe/ppc/hipe_ppc_finalise.erl | 65 + lib/hipe/ppc/hipe_ppc_frame.erl | 657 +++ lib/hipe/ppc/hipe_ppc_liveness_all.erl | 38 + lib/hipe/ppc/hipe_ppc_liveness_fpr.erl | 34 + lib/hipe/ppc/hipe_ppc_liveness_gpr.erl | 38 + lib/hipe/ppc/hipe_ppc_main.erl | 51 + lib/hipe/ppc/hipe_ppc_pp.erl | 350 ++ lib/hipe/ppc/hipe_ppc_ra.erl | 56 + lib/hipe/ppc/hipe_ppc_ra_finalise.erl | 271 ++ lib/hipe/ppc/hipe_ppc_ra_ls.erl | 56 + lib/hipe/ppc/hipe_ppc_ra_naive.erl | 29 + lib/hipe/ppc/hipe_ppc_ra_postconditions.erl | 243 + lib/hipe/ppc/hipe_ppc_ra_postconditions_fp.erl | 130 + lib/hipe/ppc/hipe_ppc_registers.erl | 246 + lib/hipe/ppc/hipe_rtl_to_ppc.erl | 1249 +++++ lib/hipe/prebuild.skip | 1 + lib/hipe/regalloc/Makefile | 123 + lib/hipe/regalloc/hipe_adj_list.erl | 143 + lib/hipe/regalloc/hipe_amd64_specific.erl | 20 + lib/hipe/regalloc/hipe_amd64_specific_sse2.erl | 175 + lib/hipe/regalloc/hipe_amd64_specific_x87.erl | 20 + lib/hipe/regalloc/hipe_arm_specific.erl | 168 + lib/hipe/regalloc/hipe_coalescing_regalloc.erl | 1029 ++++ lib/hipe/regalloc/hipe_graph_coloring_regalloc.erl | 806 ++++ lib/hipe/regalloc/hipe_ig.erl | 776 +++ lib/hipe/regalloc/hipe_ig_moves.erl | 81 + lib/hipe/regalloc/hipe_ls_regalloc.erl | 788 +++ lib/hipe/regalloc/hipe_moves.erl | 165 + lib/hipe/regalloc/hipe_node_sets.erl | 48 + lib/hipe/regalloc/hipe_optimistic_regalloc.erl | 2043 ++++++++ lib/hipe/regalloc/hipe_ppc_specific.erl | 168 + lib/hipe/regalloc/hipe_ppc_specific_fp.erl | 146 + lib/hipe/regalloc/hipe_reg_worklists.erl | 360 ++ lib/hipe/regalloc/hipe_regalloc_loop.erl | 68 + lib/hipe/regalloc/hipe_sparc_specific.erl | 168 + lib/hipe/regalloc/hipe_sparc_specific_fp.erl | 146 + lib/hipe/regalloc/hipe_spillcost.erl | 101 + lib/hipe/regalloc/hipe_spillcost.hrl | 27 + lib/hipe/regalloc/hipe_temp_map.erl | 125 + lib/hipe/regalloc/hipe_x86_specific.erl | 203 + lib/hipe/regalloc/hipe_x86_specific_x87.erl | 164 + lib/hipe/rtl/Makefile | 142 + lib/hipe/rtl/hipe_icode2rtl.erl | 727 +++ lib/hipe/rtl/hipe_rtl.erl | 1655 +++++++ lib/hipe/rtl/hipe_rtl.hrl | 61 + lib/hipe/rtl/hipe_rtl_arch.erl | 612 +++ lib/hipe/rtl/hipe_rtl_arith.inc | 177 + lib/hipe/rtl/hipe_rtl_arith_32.erl | 50 + lib/hipe/rtl/hipe_rtl_arith_64.erl | 38 + lib/hipe/rtl/hipe_rtl_binary.erl | 80 + lib/hipe/rtl/hipe_rtl_binary_construct.erl | 1363 ++++++ lib/hipe/rtl/hipe_rtl_binary_match.erl | 1134 +++++ lib/hipe/rtl/hipe_rtl_cfg.erl | 201 + lib/hipe/rtl/hipe_rtl_cleanup_const.erl | 85 + lib/hipe/rtl/hipe_rtl_exceptions.erl | 120 + lib/hipe/rtl/hipe_rtl_lcm.erl | 1696 +++++++ lib/hipe/rtl/hipe_rtl_liveness.erl | 145 + lib/hipe/rtl/hipe_rtl_mk_switch.erl | 985 ++++ lib/hipe/rtl/hipe_rtl_primops.erl | 1259 +++++ lib/hipe/rtl/hipe_rtl_ssa.erl | 93 + lib/hipe/rtl/hipe_rtl_ssa_avail_expr.erl | 357 ++ lib/hipe/rtl/hipe_rtl_ssa_const_prop.erl | 1082 +++++ lib/hipe/rtl/hipe_rtl_ssapre.erl | 1679 +++++++ lib/hipe/rtl/hipe_rtl_symbolic.erl | 99 + lib/hipe/rtl/hipe_rtl_varmap.erl | 161 + lib/hipe/rtl/hipe_tagscheme.erl | 1209 +++++ lib/hipe/sparc/Makefile | 120 + lib/hipe/sparc/hipe_rtl_to_sparc.erl | 972 ++++ lib/hipe/sparc/hipe_sparc.erl | 407 ++ lib/hipe/sparc/hipe_sparc.hrl | 116 + lib/hipe/sparc/hipe_sparc_assemble.erl | 588 +++ lib/hipe/sparc/hipe_sparc_cfg.erl | 134 + lib/hipe/sparc/hipe_sparc_defuse.erl | 143 + lib/hipe/sparc/hipe_sparc_encode.erl | 476 ++ lib/hipe/sparc/hipe_sparc_finalise.erl | 138 + lib/hipe/sparc/hipe_sparc_frame.erl | 636 +++ lib/hipe/sparc/hipe_sparc_liveness_all.erl | 38 + lib/hipe/sparc/hipe_sparc_liveness_fpr.erl | 34 + lib/hipe/sparc/hipe_sparc_liveness_gpr.erl | 38 + lib/hipe/sparc/hipe_sparc_main.erl | 58 + lib/hipe/sparc/hipe_sparc_pp.erl | 342 ++ lib/hipe/sparc/hipe_sparc_ra.erl | 56 + lib/hipe/sparc/hipe_sparc_ra_finalise.erl | 254 + lib/hipe/sparc/hipe_sparc_ra_ls.erl | 56 + lib/hipe/sparc/hipe_sparc_ra_naive.erl | 29 + lib/hipe/sparc/hipe_sparc_ra_postconditions.erl | 222 + lib/hipe/sparc/hipe_sparc_ra_postconditions_fp.erl | 120 + lib/hipe/sparc/hipe_sparc_registers.erl | 291 ++ lib/hipe/ssa/hipe_ssa.inc | 978 ++++ lib/hipe/ssa/hipe_ssa_const_prop.inc | 522 ++ lib/hipe/ssa/hipe_ssa_copy_prop.inc | 198 + lib/hipe/ssa/hipe_ssa_liveness.inc | 328 ++ lib/hipe/tools/Makefile | 111 + lib/hipe/tools/hipe_ceach.erl | 74 + lib/hipe/tools/hipe_jit.erl | 87 + lib/hipe/tools/hipe_profile.erl | 191 + lib/hipe/tools/hipe_timer.erl | 159 + lib/hipe/tools/hipe_tool.erl | 513 ++ lib/hipe/util/Makefile | 109 + lib/hipe/util/hipe_digraph.erl | 238 + lib/hipe/util/hipe_dot.erl | 217 + lib/hipe/util/hipe_timing.erl | 131 + lib/hipe/util/hipe_vectors.erl | 111 + lib/hipe/util/hipe_vectors.hrl | 28 + lib/hipe/vsn.mk | 1 + lib/hipe/x86/Makefile | 134 + lib/hipe/x86/NOTES.OPTIM | 200 + lib/hipe/x86/NOTES.RA | 32 + lib/hipe/x86/TODO | 31 + lib/hipe/x86/hipe_rtl_to_x86.erl | 865 ++++ lib/hipe/x86/hipe_x86.erl | 496 ++ lib/hipe/x86/hipe_x86.hrl | 116 + lib/hipe/x86/hipe_x86_assemble.erl | 1014 ++++ lib/hipe/x86/hipe_x86_cfg.erl | 147 + lib/hipe/x86/hipe_x86_defuse.erl | 160 + lib/hipe/x86/hipe_x86_encode.erl | 1302 +++++ lib/hipe/x86/hipe_x86_encode.txt | 213 + lib/hipe/x86/hipe_x86_frame.erl | 687 +++ lib/hipe/x86/hipe_x86_liveness.erl | 57 + lib/hipe/x86/hipe_x86_main.erl | 70 + lib/hipe/x86/hipe_x86_postpass.erl | 276 ++ lib/hipe/x86/hipe_x86_pp.erl | 350 ++ lib/hipe/x86/hipe_x86_ra.erl | 99 + lib/hipe/x86/hipe_x86_ra_finalise.erl | 335 ++ lib/hipe/x86/hipe_x86_ra_ls.erl | 85 + lib/hipe/x86/hipe_x86_ra_naive.erl | 409 ++ lib/hipe/x86/hipe_x86_ra_postconditions.erl | 452 ++ lib/hipe/x86/hipe_x86_ra_x87_ls.erl | 63 + lib/hipe/x86/hipe_x86_registers.erl | 254 + lib/hipe/x86/hipe_x86_spill_restore.erl | 345 ++ lib/hipe/x86/hipe_x86_x87.erl | 635 +++ 265 files changed, 102773 insertions(+) create mode 100644 lib/hipe/Makefile create mode 100644 lib/hipe/TODO create mode 100644 lib/hipe/amd64/Makefile create mode 100644 lib/hipe/amd64/hipe_amd64_assemble.erl create mode 100644 lib/hipe/amd64/hipe_amd64_defuse.erl create mode 100644 lib/hipe/amd64/hipe_amd64_encode.erl create mode 100644 lib/hipe/amd64/hipe_amd64_frame.erl create mode 100644 lib/hipe/amd64/hipe_amd64_liveness.erl create mode 100644 lib/hipe/amd64/hipe_amd64_main.erl create mode 100644 lib/hipe/amd64/hipe_amd64_pp.erl create mode 100644 lib/hipe/amd64/hipe_amd64_ra.erl create mode 100644 lib/hipe/amd64/hipe_amd64_ra_finalise.erl create mode 100644 lib/hipe/amd64/hipe_amd64_ra_ls.erl create mode 100644 lib/hipe/amd64/hipe_amd64_ra_naive.erl create mode 100644 lib/hipe/amd64/hipe_amd64_ra_postconditions.erl create mode 100644 lib/hipe/amd64/hipe_amd64_ra_sse2_postconditions.erl create mode 100644 lib/hipe/amd64/hipe_amd64_ra_x87_ls.erl create mode 100644 lib/hipe/amd64/hipe_amd64_registers.erl create mode 100644 lib/hipe/amd64/hipe_amd64_spill_restore.erl create mode 100644 lib/hipe/amd64/hipe_amd64_x87.erl create mode 100644 lib/hipe/amd64/hipe_rtl_to_amd64.erl create mode 100644 lib/hipe/arm/Makefile create mode 100644 lib/hipe/arm/TODO create mode 100644 lib/hipe/arm/hipe_arm.erl create mode 100644 lib/hipe/arm/hipe_arm.hrl create mode 100644 lib/hipe/arm/hipe_arm_assemble.erl create mode 100644 lib/hipe/arm/hipe_arm_cfg.erl create mode 100644 lib/hipe/arm/hipe_arm_defuse.erl create mode 100644 lib/hipe/arm/hipe_arm_encode.erl create mode 100644 lib/hipe/arm/hipe_arm_finalise.erl create mode 100644 lib/hipe/arm/hipe_arm_frame.erl create mode 100644 lib/hipe/arm/hipe_arm_liveness_gpr.erl create mode 100644 lib/hipe/arm/hipe_arm_main.erl create mode 100644 lib/hipe/arm/hipe_arm_pp.erl create mode 100644 lib/hipe/arm/hipe_arm_ra.erl create mode 100644 lib/hipe/arm/hipe_arm_ra_finalise.erl create mode 100644 lib/hipe/arm/hipe_arm_ra_ls.erl create mode 100644 lib/hipe/arm/hipe_arm_ra_naive.erl create mode 100644 lib/hipe/arm/hipe_arm_ra_postconditions.erl create mode 100644 lib/hipe/arm/hipe_arm_registers.erl create mode 100644 lib/hipe/arm/hipe_rtl_to_arm.erl create mode 100644 lib/hipe/cerl/Makefile create mode 100644 lib/hipe/cerl/cerl_cconv.erl create mode 100644 lib/hipe/cerl/cerl_closurean.erl create mode 100644 lib/hipe/cerl/cerl_hipe_primops.hrl create mode 100644 lib/hipe/cerl/cerl_hipeify.erl create mode 100644 lib/hipe/cerl/cerl_hybrid_transform.erl create mode 100644 lib/hipe/cerl/cerl_lib.erl create mode 100644 lib/hipe/cerl/cerl_messagean.erl create mode 100644 lib/hipe/cerl/cerl_pmatch.erl create mode 100644 lib/hipe/cerl/cerl_prettypr.erl create mode 100644 lib/hipe/cerl/cerl_to_icode.erl create mode 100644 lib/hipe/cerl/cerl_typean.erl create mode 100644 lib/hipe/cerl/erl_bif_types.erl create mode 100644 lib/hipe/cerl/erl_types.erl create mode 100644 lib/hipe/doc/Makefile create mode 100644 lib/hipe/doc/html/.gitignore create mode 100644 lib/hipe/doc/overview.edoc create mode 100644 lib/hipe/doc/pdf/.gitignore create mode 100644 lib/hipe/doc/src/Makefile create mode 100644 lib/hipe/doc/src/book.xml create mode 100644 lib/hipe/doc/src/fascicules.xml create mode 100644 lib/hipe/doc/src/make.dep create mode 100644 lib/hipe/doc/src/notes.xml create mode 100644 lib/hipe/doc/src/part_notes.xml create mode 100644 lib/hipe/ebin/.gitignore create mode 100644 lib/hipe/flow/Makefile create mode 100644 lib/hipe/flow/cfg.hrl create mode 100644 lib/hipe/flow/cfg.inc create mode 100644 lib/hipe/flow/ebb.inc create mode 100644 lib/hipe/flow/hipe_bb.erl create mode 100644 lib/hipe/flow/hipe_bb.hrl create mode 100644 lib/hipe/flow/hipe_dominators.erl create mode 100644 lib/hipe/flow/hipe_gen_cfg.erl create mode 100644 lib/hipe/flow/liveness.inc create mode 100644 lib/hipe/icode/Makefile create mode 100644 lib/hipe/icode/hipe_beam_to_icode.erl create mode 100644 lib/hipe/icode/hipe_icode.erl create mode 100644 lib/hipe/icode/hipe_icode.hrl create mode 100644 lib/hipe/icode/hipe_icode_bincomp.erl create mode 100644 lib/hipe/icode/hipe_icode_callgraph.erl create mode 100644 lib/hipe/icode/hipe_icode_cfg.erl create mode 100644 lib/hipe/icode/hipe_icode_coordinator.erl create mode 100644 lib/hipe/icode/hipe_icode_ebb.erl create mode 100644 lib/hipe/icode/hipe_icode_exceptions.erl create mode 100644 lib/hipe/icode/hipe_icode_fp.erl create mode 100644 lib/hipe/icode/hipe_icode_heap_test.erl create mode 100644 lib/hipe/icode/hipe_icode_inline_bifs.erl create mode 100644 lib/hipe/icode/hipe_icode_instruction_counter.erl create mode 100644 lib/hipe/icode/hipe_icode_liveness.erl create mode 100644 lib/hipe/icode/hipe_icode_mulret.erl create mode 100755 lib/hipe/icode/hipe_icode_pp.erl create mode 100644 lib/hipe/icode/hipe_icode_primops.erl create mode 100644 lib/hipe/icode/hipe_icode_primops.hrl create mode 100644 lib/hipe/icode/hipe_icode_range.erl create mode 100644 lib/hipe/icode/hipe_icode_split_arith.erl create mode 100755 lib/hipe/icode/hipe_icode_ssa.erl create mode 100644 lib/hipe/icode/hipe_icode_ssa_const_prop.erl create mode 100644 lib/hipe/icode/hipe_icode_ssa_copy_prop.erl create mode 100644 lib/hipe/icode/hipe_icode_ssa_struct_reuse.erl create mode 100644 lib/hipe/icode/hipe_icode_type.erl create mode 100644 lib/hipe/icode/hipe_icode_type.hrl create mode 100644 lib/hipe/info create mode 100644 lib/hipe/main/Makefile create mode 100644 lib/hipe/main/hipe.app.src create mode 100644 lib/hipe/main/hipe.appup.src create mode 100644 lib/hipe/main/hipe.erl create mode 100644 lib/hipe/main/hipe.hrl.src create mode 100644 lib/hipe/main/hipe_main.erl create mode 100644 lib/hipe/misc/Makefile create mode 100644 lib/hipe/misc/hipe_consttab.erl create mode 100644 lib/hipe/misc/hipe_consttab.hrl create mode 100644 lib/hipe/misc/hipe_data_pp.erl create mode 100644 lib/hipe/misc/hipe_gensym.erl create mode 100644 lib/hipe/misc/hipe_pack_constants.erl create mode 100644 lib/hipe/misc/hipe_sdi.erl create mode 100644 lib/hipe/misc/hipe_sdi.hrl create mode 100644 lib/hipe/native.mk create mode 100644 lib/hipe/opt/Makefile create mode 100644 lib/hipe/opt/hipe_schedule.erl create mode 100644 lib/hipe/opt/hipe_schedule_prio.erl create mode 100644 lib/hipe/opt/hipe_spillmin.erl create mode 100644 lib/hipe/opt/hipe_spillmin_color.erl create mode 100644 lib/hipe/opt/hipe_spillmin_scan.erl create mode 100644 lib/hipe/opt/hipe_target_machine.erl create mode 100644 lib/hipe/opt/hipe_ultra_mod2.erl create mode 100644 lib/hipe/opt/hipe_ultra_prio.erl create mode 100644 lib/hipe/ppc/Makefile create mode 100644 lib/hipe/ppc/hipe_ppc.erl create mode 100644 lib/hipe/ppc/hipe_ppc.hrl create mode 100644 lib/hipe/ppc/hipe_ppc_assemble.erl create mode 100644 lib/hipe/ppc/hipe_ppc_cfg.erl create mode 100644 lib/hipe/ppc/hipe_ppc_defuse.erl create mode 100644 lib/hipe/ppc/hipe_ppc_encode.erl create mode 100644 lib/hipe/ppc/hipe_ppc_finalise.erl create mode 100644 lib/hipe/ppc/hipe_ppc_frame.erl create mode 100644 lib/hipe/ppc/hipe_ppc_liveness_all.erl create mode 100644 lib/hipe/ppc/hipe_ppc_liveness_fpr.erl create mode 100644 lib/hipe/ppc/hipe_ppc_liveness_gpr.erl create mode 100644 lib/hipe/ppc/hipe_ppc_main.erl create mode 100644 lib/hipe/ppc/hipe_ppc_pp.erl create mode 100644 lib/hipe/ppc/hipe_ppc_ra.erl create mode 100644 lib/hipe/ppc/hipe_ppc_ra_finalise.erl create mode 100644 lib/hipe/ppc/hipe_ppc_ra_ls.erl create mode 100644 lib/hipe/ppc/hipe_ppc_ra_naive.erl create mode 100644 lib/hipe/ppc/hipe_ppc_ra_postconditions.erl create mode 100644 lib/hipe/ppc/hipe_ppc_ra_postconditions_fp.erl create mode 100644 lib/hipe/ppc/hipe_ppc_registers.erl create mode 100644 lib/hipe/ppc/hipe_rtl_to_ppc.erl create mode 100644 lib/hipe/prebuild.skip create mode 100644 lib/hipe/regalloc/Makefile create mode 100644 lib/hipe/regalloc/hipe_adj_list.erl create mode 100644 lib/hipe/regalloc/hipe_amd64_specific.erl create mode 100644 lib/hipe/regalloc/hipe_amd64_specific_sse2.erl create mode 100644 lib/hipe/regalloc/hipe_amd64_specific_x87.erl create mode 100644 lib/hipe/regalloc/hipe_arm_specific.erl create mode 100644 lib/hipe/regalloc/hipe_coalescing_regalloc.erl create mode 100644 lib/hipe/regalloc/hipe_graph_coloring_regalloc.erl create mode 100644 lib/hipe/regalloc/hipe_ig.erl create mode 100644 lib/hipe/regalloc/hipe_ig_moves.erl create mode 100644 lib/hipe/regalloc/hipe_ls_regalloc.erl create mode 100644 lib/hipe/regalloc/hipe_moves.erl create mode 100644 lib/hipe/regalloc/hipe_node_sets.erl create mode 100644 lib/hipe/regalloc/hipe_optimistic_regalloc.erl create mode 100644 lib/hipe/regalloc/hipe_ppc_specific.erl create mode 100644 lib/hipe/regalloc/hipe_ppc_specific_fp.erl create mode 100644 lib/hipe/regalloc/hipe_reg_worklists.erl create mode 100644 lib/hipe/regalloc/hipe_regalloc_loop.erl create mode 100644 lib/hipe/regalloc/hipe_sparc_specific.erl create mode 100644 lib/hipe/regalloc/hipe_sparc_specific_fp.erl create mode 100644 lib/hipe/regalloc/hipe_spillcost.erl create mode 100644 lib/hipe/regalloc/hipe_spillcost.hrl create mode 100644 lib/hipe/regalloc/hipe_temp_map.erl create mode 100644 lib/hipe/regalloc/hipe_x86_specific.erl create mode 100644 lib/hipe/regalloc/hipe_x86_specific_x87.erl create mode 100644 lib/hipe/rtl/Makefile create mode 100644 lib/hipe/rtl/hipe_icode2rtl.erl create mode 100644 lib/hipe/rtl/hipe_rtl.erl create mode 100644 lib/hipe/rtl/hipe_rtl.hrl create mode 100644 lib/hipe/rtl/hipe_rtl_arch.erl create mode 100644 lib/hipe/rtl/hipe_rtl_arith.inc create mode 100644 lib/hipe/rtl/hipe_rtl_arith_32.erl create mode 100644 lib/hipe/rtl/hipe_rtl_arith_64.erl create mode 100644 lib/hipe/rtl/hipe_rtl_binary.erl create mode 100644 lib/hipe/rtl/hipe_rtl_binary_construct.erl create mode 100644 lib/hipe/rtl/hipe_rtl_binary_match.erl create mode 100644 lib/hipe/rtl/hipe_rtl_cfg.erl create mode 100644 lib/hipe/rtl/hipe_rtl_cleanup_const.erl create mode 100644 lib/hipe/rtl/hipe_rtl_exceptions.erl create mode 100644 lib/hipe/rtl/hipe_rtl_lcm.erl create mode 100644 lib/hipe/rtl/hipe_rtl_liveness.erl create mode 100644 lib/hipe/rtl/hipe_rtl_mk_switch.erl create mode 100644 lib/hipe/rtl/hipe_rtl_primops.erl create mode 100644 lib/hipe/rtl/hipe_rtl_ssa.erl create mode 100644 lib/hipe/rtl/hipe_rtl_ssa_avail_expr.erl create mode 100644 lib/hipe/rtl/hipe_rtl_ssa_const_prop.erl create mode 100644 lib/hipe/rtl/hipe_rtl_ssapre.erl create mode 100644 lib/hipe/rtl/hipe_rtl_symbolic.erl create mode 100644 lib/hipe/rtl/hipe_rtl_varmap.erl create mode 100644 lib/hipe/rtl/hipe_tagscheme.erl create mode 100644 lib/hipe/sparc/Makefile create mode 100644 lib/hipe/sparc/hipe_rtl_to_sparc.erl create mode 100644 lib/hipe/sparc/hipe_sparc.erl create mode 100644 lib/hipe/sparc/hipe_sparc.hrl create mode 100644 lib/hipe/sparc/hipe_sparc_assemble.erl create mode 100644 lib/hipe/sparc/hipe_sparc_cfg.erl create mode 100644 lib/hipe/sparc/hipe_sparc_defuse.erl create mode 100644 lib/hipe/sparc/hipe_sparc_encode.erl create mode 100644 lib/hipe/sparc/hipe_sparc_finalise.erl create mode 100644 lib/hipe/sparc/hipe_sparc_frame.erl create mode 100644 lib/hipe/sparc/hipe_sparc_liveness_all.erl create mode 100644 lib/hipe/sparc/hipe_sparc_liveness_fpr.erl create mode 100644 lib/hipe/sparc/hipe_sparc_liveness_gpr.erl create mode 100644 lib/hipe/sparc/hipe_sparc_main.erl create mode 100644 lib/hipe/sparc/hipe_sparc_pp.erl create mode 100644 lib/hipe/sparc/hipe_sparc_ra.erl create mode 100644 lib/hipe/sparc/hipe_sparc_ra_finalise.erl create mode 100644 lib/hipe/sparc/hipe_sparc_ra_ls.erl create mode 100644 lib/hipe/sparc/hipe_sparc_ra_naive.erl create mode 100644 lib/hipe/sparc/hipe_sparc_ra_postconditions.erl create mode 100644 lib/hipe/sparc/hipe_sparc_ra_postconditions_fp.erl create mode 100644 lib/hipe/sparc/hipe_sparc_registers.erl create mode 100644 lib/hipe/ssa/hipe_ssa.inc create mode 100644 lib/hipe/ssa/hipe_ssa_const_prop.inc create mode 100644 lib/hipe/ssa/hipe_ssa_copy_prop.inc create mode 100644 lib/hipe/ssa/hipe_ssa_liveness.inc create mode 100644 lib/hipe/tools/Makefile create mode 100644 lib/hipe/tools/hipe_ceach.erl create mode 100644 lib/hipe/tools/hipe_jit.erl create mode 100644 lib/hipe/tools/hipe_profile.erl create mode 100644 lib/hipe/tools/hipe_timer.erl create mode 100644 lib/hipe/tools/hipe_tool.erl create mode 100644 lib/hipe/util/Makefile create mode 100644 lib/hipe/util/hipe_digraph.erl create mode 100755 lib/hipe/util/hipe_dot.erl create mode 100644 lib/hipe/util/hipe_timing.erl create mode 100644 lib/hipe/util/hipe_vectors.erl create mode 100644 lib/hipe/util/hipe_vectors.hrl create mode 100644 lib/hipe/vsn.mk create mode 100644 lib/hipe/x86/Makefile create mode 100644 lib/hipe/x86/NOTES.OPTIM create mode 100644 lib/hipe/x86/NOTES.RA create mode 100644 lib/hipe/x86/TODO create mode 100644 lib/hipe/x86/hipe_rtl_to_x86.erl create mode 100644 lib/hipe/x86/hipe_x86.erl create mode 100644 lib/hipe/x86/hipe_x86.hrl create mode 100644 lib/hipe/x86/hipe_x86_assemble.erl create mode 100644 lib/hipe/x86/hipe_x86_cfg.erl create mode 100644 lib/hipe/x86/hipe_x86_defuse.erl create mode 100644 lib/hipe/x86/hipe_x86_encode.erl create mode 100644 lib/hipe/x86/hipe_x86_encode.txt create mode 100644 lib/hipe/x86/hipe_x86_frame.erl create mode 100644 lib/hipe/x86/hipe_x86_liveness.erl create mode 100644 lib/hipe/x86/hipe_x86_main.erl create mode 100644 lib/hipe/x86/hipe_x86_postpass.erl create mode 100644 lib/hipe/x86/hipe_x86_pp.erl create mode 100644 lib/hipe/x86/hipe_x86_ra.erl create mode 100644 lib/hipe/x86/hipe_x86_ra_finalise.erl create mode 100644 lib/hipe/x86/hipe_x86_ra_ls.erl create mode 100644 lib/hipe/x86/hipe_x86_ra_naive.erl create mode 100644 lib/hipe/x86/hipe_x86_ra_postconditions.erl create mode 100644 lib/hipe/x86/hipe_x86_ra_x87_ls.erl create mode 100644 lib/hipe/x86/hipe_x86_registers.erl create mode 100644 lib/hipe/x86/hipe_x86_spill_restore.erl create mode 100644 lib/hipe/x86/hipe_x86_x87.erl (limited to 'lib/hipe') diff --git a/lib/hipe/Makefile b/lib/hipe/Makefile new file mode 100644 index 0000000000..be3a618e34 --- /dev/null +++ b/lib/hipe/Makefile @@ -0,0 +1,63 @@ +# +# %CopyrightBegin% +# +# Copyright Ericsson AB 2001-2009. All Rights Reserved. +# +# The contents of this file are subject to the Erlang Public License, +# Version 1.1, (the "License"); you may not use this file except in +# compliance with the License. You should have received a copy of the +# Erlang Public License along with this software. If not, it can be +# retrieved online at http://www.erlang.org/. +# +# Software distributed under the License is distributed on an "AS IS" +# basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +# the License for the specific language governing rights and limitations +# under the License. +# +# %CopyrightEnd% +# +SHELL=/bin/sh + +include $(ERL_TOP)/make/target.mk +include $(ERL_TOP)/make/$(TARGET)/otp.mk + +ifdef HIPE_ENABLED +HIPE_SUBDIRS = regalloc sparc ppc x86 amd64 arm opt tools +else +HIPE_SUBDIRS = +endif + +ALWAYS_SUBDIRS = misc main cerl icode flow util + +ifdef HIPE_ENABLED +# "rtl" below must be the first directory so that file rtl/hipe_literals.hrl +# which is needed by many other HiPE files is built first +SUB_DIRECTORIES = rtl $(ALWAYS_SUBDIRS) $(HIPE_SUBDIRS) +else +SUB_DIRECTORIES = $(ALWAYS_SUBDIRS) +endif + +# +# Default Subdir Targets +# +include $(ERL_TOP)/make/otp_subdir.mk + +# This overrides the default recursive-make edocs target in otp_subdir.mk +# It is not pretty, but it will have to do for now. +docs: + @if [ -d $(ERL_TOP)/lib/edoc/ebin ]; then \ + erl -noshell -pa $(ERL_TOP)/lib/edoc/ebin $(ERL_TOP)/lib/syntax_tools/ebin $(ERL_TOP)/lib/xmerl/ebin -run edoc_run application 'hipe' '"."' '[new,no_packages]' -s init stop ; \ + fi + +edocs: docs + +all-subdirs: + -for dir in $(SUB_DIRECTORIES); do \ + (cd $$dir; $(MAKE) $(MAKETARGET) EBIN=../ebin; cd ..); \ + done + +distclean: + $(MAKE) MAKETARGET="distclean" all-subdirs +realclean: + $(MAKE) MAKETARGET="realclean" all-subdirs + diff --git a/lib/hipe/TODO b/lib/hipe/TODO new file mode 100644 index 0000000000..f166472df6 --- /dev/null +++ b/lib/hipe/TODO @@ -0,0 +1,130 @@ +Bugfix +====== + P->current (Fix observable behaviour?) + New calling convention for guard bifs (Recognize at load time). + Long branches: + timer:tc(hipe,c,[megaco_text_parser,[{timeout,infinity}]]). + {4801210531, + {error,[{problem,too_long_branch},{address,3381732},{length,-828622}]}} + +Performance +=========== + + Better handling of multimove in regalloc. + Faster closure creation. (Can static fields be preallocated?) + Expand pseudo-ops before scheduler (SPARC) + Stack maps for SPARC + Make frames in Sparc not in RTL. + Coalesce spill locations. + +Feature +======= + + Stack traces from stack maps. + +Cleanup +======= + + Speedup renaming and other bottlenecks in the compiler. + Only calls with fail label should end basic blocks. + Remove fail-entry-points from RTL (sparc/x86). + Cleanup hipe_*_registers.erl and interface/rules with regalloc. + HiPE in bootstrap. + Cleanup and merge loaders. (Better handling of data.) + Re-examine switching code. + +Extensions +========== + + Design strategy for finding all processes holding a certain closure. + Design strategy for native code unloading. + mbufs: In guards -> throw away, in bifs -> trigger special GC. (fix for native.) + Unified heap + process optimization (+ PE). + Incremental GC. + + +Old list compiled by Thomas Lindgren (needs cleaning up) +======================================================== + +

Experimental implementations

+

RTL

+ + +

Unimplemented optimizations

+ +

Erlang/Core source-level-optimizations

+ + +

Icode-optimizations

+ + +

RTL-optimizations

+ + +

Sparc-optimizations

+ + +

Other optimizations

+ +Profile driven optimizations. + diff --git a/lib/hipe/amd64/Makefile b/lib/hipe/amd64/Makefile new file mode 100644 index 0000000000..93e5f086d9 --- /dev/null +++ b/lib/hipe/amd64/Makefile @@ -0,0 +1,127 @@ +# +# %CopyrightBegin% +# +# Copyright Ericsson AB 2004-2009. All Rights Reserved. +# +# The contents of this file are subject to the Erlang Public License, +# Version 1.1, (the "License"); you may not use this file except in +# compliance with the License. You should have received a copy of the +# Erlang Public License along with this software. If not, it can be +# retrieved online at http://www.erlang.org/. +# +# Software distributed under the License is distributed on an "AS IS" +# basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +# the License for the specific language governing rights and limitations +# under the License. +# +# %CopyrightEnd% +# + +ifndef EBIN +EBIN = ../ebin +endif + +ifndef DOCS +DOCS = ../doc +endif + +include $(ERL_TOP)/make/target.mk +include $(ERL_TOP)/make/$(TARGET)/otp.mk + +# ---------------------------------------------------- +# Application version +# ---------------------------------------------------- +include ../vsn.mk +VSN=$(HIPE_VSN) + +# ---------------------------------------------------- +# Release directory specification +# ---------------------------------------------------- +RELSYSDIR = $(RELEASE_PATH)/lib/hipe-$(VSN) + +# ---------------------------------------------------- +# Target Specs +# ---------------------------------------------------- +# Please keep this list sorted. +MODULES=hipe_amd64_assemble \ + hipe_amd64_defuse \ + hipe_amd64_encode \ + hipe_amd64_frame \ + hipe_amd64_liveness \ + hipe_amd64_main \ + hipe_amd64_pp \ + hipe_amd64_ra \ + hipe_amd64_ra_finalise \ + hipe_amd64_ra_ls \ + hipe_amd64_ra_naive \ + hipe_amd64_ra_postconditions \ + hipe_amd64_ra_sse2_postconditions \ + hipe_amd64_ra_x87_ls \ + hipe_amd64_registers \ + hipe_amd64_spill_restore \ + hipe_amd64_x87 \ + hipe_rtl_to_amd64 + +ERL_FILES=$(MODULES:%=%.erl) +TARGET_FILES=$(MODULES:%=$(EBIN)/%.$(EMULATOR)) +DOC_FILES= $(MODULES:%=$(DOCS)/%.html) + +# ---------------------------------------------------- +# FLAGS +# ---------------------------------------------------- + +include ../native.mk + +ERL_COMPILE_FLAGS += -DHIPE_AMD64 +warn_exported_vars + +# ---------------------------------------------------- +# Targets +# ---------------------------------------------------- + +debug opt: $(TARGET_FILES) + +docs: $(DOC_FILES) + +clean: + rm -f $(TARGET_FILES) + rm -f core + +$(DOCS)/%.html:%.erl + erl -noshell -run edoc_run file '"$<"' '[{dir, "$(DOCS)"}]' -s init stop + +# ---------------------------------------------------- +# Special Build Targets +# ---------------------------------------------------- + +# ---------------------------------------------------- +# Release Target +# ---------------------------------------------------- +include $(ERL_TOP)/make/otp_release_targets.mk + +release_spec: opt + $(INSTALL_DIR) $(RELSYSDIR)/ebin + $(INSTALL_DATA) $(TARGET_FILES) $(RELSYSDIR)/ebin + +release_docs_spec: + +# Please keep this list sorted. +$(EBIN)/hipe_amd64_assemble.beam: ../main/hipe.hrl ../rtl/hipe_literals.hrl ../x86/hipe_x86.hrl ../../kernel/src/hipe_ext_format.hrl ../misc/hipe_sdi.hrl ../x86/hipe_x86_assemble.erl +$(EBIN)/hipe_amd64_defuse.beam: ../x86/hipe_x86.hrl ../x86/hipe_x86_defuse.erl +$(EBIN)/hipe_amd64_frame.beam: ../main/hipe.hrl ../x86/hipe_x86.hrl ../x86/hipe_x86_frame.erl ../rtl/hipe_literals.hrl +$(EBIN)/hipe_amd64_liveness.beam: ../flow/liveness.inc ../x86/hipe_x86_liveness.erl +$(EBIN)/hipe_amd64_main.beam: ../main/hipe.hrl ../x86/hipe_x86_main.erl +$(EBIN)/hipe_amd64_pp.beam: ../x86/hipe_x86.hrl ../x86/hipe_x86_pp.erl +$(EBIN)/hipe_amd64_ra.beam: ../main/hipe.hrl ../x86/hipe_x86_ra.erl +$(EBIN)/hipe_amd64_ra_dummy.beam: ../main/hipe.hrl ../x86/hipe_x86.hrl +$(EBIN)/hipe_amd64_ra_finalise.beam: ../x86/hipe_x86.hrl ../x86/hipe_x86_ra_finalise.erl +$(EBIN)/hipe_amd64_ra_ls.beam: ../main/hipe.hrl ../x86/hipe_x86_ra_ls.erl +$(EBIN)/hipe_amd64_ra_naive.beam: ../main/hipe.hrl ../x86/hipe_x86.hrl ../x86/hipe_x86_ra_naive.erl +$(EBIN)/hipe_amd64_ra_postconditions.beam: ../main/hipe.hrl ../x86/hipe_x86.hrl ../x86/hipe_x86_ra_postconditions.erl +$(EBIN)/hipe_amd64_ra_sse2_postconditions.beam: ../main/hipe.hrl +$(EBIN)/hipe_amd64_ra_x87_ls.beam: ../main/hipe.hrl ../x86/hipe_x86_ra_x87_ls.erl +$(EBIN)/hipe_amd64_registers.beam: ../rtl/hipe_literals.hrl +$(EBIN)/hipe_amd64_spill_restore.beam: ../main/hipe.hrl ../x86/hipe_x86.hrl ../flow/cfg.hrl ../x86/hipe_x86_spill_restore.erl +$(EBIN)/hipe_amd64_x87.beam: ../x86/hipe_x86_x87.erl +$(EBIN)/hipe_rtl_to_amd64.beam: ../x86/hipe_rtl_to_x86.erl ../rtl/hipe_rtl.hrl + +$(TARGET_FILES): ../x86/hipe_x86.hrl ../misc/hipe_consttab.hrl diff --git a/lib/hipe/amd64/hipe_amd64_assemble.erl b/lib/hipe/amd64/hipe_amd64_assemble.erl new file mode 100644 index 0000000000..db3cfcc6bd --- /dev/null +++ b/lib/hipe/amd64/hipe_amd64_assemble.erl @@ -0,0 +1,19 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2004-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +-include("../x86/hipe_x86_assemble.erl"). diff --git a/lib/hipe/amd64/hipe_amd64_defuse.erl b/lib/hipe/amd64/hipe_amd64_defuse.erl new file mode 100644 index 0000000000..c48e80f3f1 --- /dev/null +++ b/lib/hipe/amd64/hipe_amd64_defuse.erl @@ -0,0 +1,20 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2004-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +-include("../x86/hipe_x86_defuse.erl"). diff --git a/lib/hipe/amd64/hipe_amd64_encode.erl b/lib/hipe/amd64/hipe_amd64_encode.erl new file mode 100644 index 0000000000..ee68dfb3b8 --- /dev/null +++ b/lib/hipe/amd64/hipe_amd64_encode.erl @@ -0,0 +1,1484 @@ +%%% +%%% %CopyrightBegin% +%%% +%%% Copyright Ericsson AB 2004-2009. All Rights Reserved. +%%% +%%% The contents of this file are subject to the Erlang Public License, +%%% Version 1.1, (the "License"); you may not use this file except in +%%% compliance with the License. You should have received a copy of the +%%% Erlang Public License along with this software. If not, it can be +%%% retrieved online at http://www.erlang.org/. +%%% +%%% Software distributed under the License is distributed on an "AS IS" +%%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%%% the License for the specific language governing rights and limitations +%%% under the License. +%%% +%%% %CopyrightEnd% +%%% +%%% Copyright (C) 2000-2004 Mikael Pettersson +%%% Copyright (C) 2004 Daniel Luna +%%% +%%% This is the syntax of amd64 r/m operands: +%%% +%%% opnd ::= reg mod == 11 +%%% | MEM[ea] mod != 11 +%%% +%%% ea ::= disp32(reg) mod == 10, r/m != ESP +%%% | disp32 sib12 mod == 10, r/m == 100 +%%% | disp8(reg) mod == 01, r/m != ESP +%%% | disp8 sib12 mod == 01, r/m == 100 +%%% | (reg) mod == 00, r/m != ESP and EBP +%%% | sib0 mod == 00, r/m == 100 +%%% | disp32(%rip) mod == 00, r/m == 101 +%%% +%%% // sib0: mod == 00 +%%% sib0 ::= disp32(,index,scale) base == EBP, index != ESP +%%% | disp32 base == EBP, index == 100 +%%% | (base,index,scale) base != EBP, index != ESP +%%% | (base) base != EBP, index == 100 +%%% +%%% // sib12: mod == 01 or 10 +%%% sib12 ::= (base,index,scale) index != ESP +%%% | (base) index == 100 +%%% +%%% scale ::= 00 | 01 | 10 | 11 index << scale +%%% +%%% Notes: +%%% +%%% 1. ESP cannot be used as index register. +%%% 2. Use of ESP as base register requires a SIB byte. +%%% 3. disp(reg), when reg != ESP, can be represented without +%%% [r/m == reg] or with [r/m == 100, base == reg] a SIB byte. +%%% 4. disp32 can be represented without [mod == 00, r/m == 101] +%%% or with [mod == 00, r/m == 100, base == 101, index == 100] +%%% a SIB byte. +%%% 5. AMD64 and x86 interpret mod==00b r/m==101b EAs differently: +%%% on x86 the disp32 is an absolute address, but on AMD64 the +%%% disp32 is relative to the %rip of the next instruction. + +-module(hipe_amd64_encode). + +-export([% condition codes + cc/1, + % 8-bit registers + %% al/0, cl/0, dl/0, bl/0, ah/0, ch/0, dh/0, bh/0, + % 32-bit registers + %% eax/0, ecx/0, edx/0, ebx/0, esp/0, ebp/0, esi/0, edi/0, + % operands + sindex/2, sib/1, sib/2, + ea_disp32_base/2, ea_disp32_sib/2, + ea_disp8_base/2, ea_disp8_sib/2, + ea_base/1, + ea_disp32_sindex/1, %%ea_disp32_sindex/2, + ea_sib/1, %ea_disp32_rip/1, + rm_reg/1, rm_mem/1, + % instructions + insn_encode/3, insn_sizeof/2]). + +%%-define(DO_HIPE_AMD64_ENCODE_TEST,true). +-ifdef(DO_HIPE_AMD64_ENCODE_TEST). +-export([dotest/0, dotest/1]). % for testing, don't use +-endif. + +-define(ASSERT(F,G), if G -> [] ; true -> exit({?MODULE,F}) end). +%-define(ASSERT(F,G), []). + +%%% condition codes + +-define(CC_O, 2#0000). % overflow +-define(CC_NO, 2#0001). % no overflow +-define(CC_B, 2#0010). % below, =u +-define(CC_E, 2#0100). % equal +-define(CC_NE, 2#0101). % not equal +-define(CC_BE, 2#0110). % below or equal, <=u +-define(CC_A, 2#0111). % above, >u +-define(CC_S, 2#1000). % sign, + +-define(CC_NS, 2#1001). % not sign, - +-define(CC_PE, 2#1010). % parity even +-define(CC_PO, 2#1011). % parity odd +-define(CC_L, 2#1100). % less than, =s +-define(CC_LE, 2#1110). % less or equal, <=s +-define(CC_G, 2#1111). % greater than, >s + +cc(o) -> ?CC_O; +cc(no) -> ?CC_NO; +cc(b) -> ?CC_B; +cc(ae) -> ?CC_AE; +cc(e) -> ?CC_E; +cc(ne) -> ?CC_NE; +cc(be) -> ?CC_BE; +cc(a) -> ?CC_A; +cc(s) -> ?CC_S; +cc(ns) -> ?CC_NS; +cc(pe) -> ?CC_PE; +cc(po) -> ?CC_PO; +cc(l) -> ?CC_L; +cc(ge) -> ?CC_GE; +cc(le) -> ?CC_LE; +cc(g) -> ?CC_G. + +%%% 8-bit registers + +-define(AL, 2#000). +-define(CL, 2#001). +-define(DL, 2#010). +-define(BL, 2#011). +-define(AH, 2#100). +-define(CH, 2#101). +-define(DH, 2#110). +-define(BH, 2#111). + +%% al() -> ?AL. +%% cl() -> ?CL. +%% dl() -> ?DL. +%% bl() -> ?BL. +%% ah() -> ?AH. +%% ch() -> ?CH. +%% dh() -> ?DH. +%% bh() -> ?BH. + +%%% 32-bit registers + +-define(EAX, 2#000). +-define(ECX, 2#001). +-define(EDX, 2#010). +-define(EBX, 2#011). +-define(ESP, 2#100). +-define(EBP, 2#101). +-define(ESI, 2#110). +-define(EDI, 2#111). + +%% eax() -> ?EAX. +%% ecx() -> ?ECX. +%% edx() -> ?EDX. +%% ebx() -> ?EBX. +%% esp() -> ?ESP. +%% ebp() -> ?EBP. +%% esi() -> ?ESI. +%% edi() -> ?EDI. + +%%% r/m operands + +sindex(Scale, Index) when is_integer(Scale), is_integer(Index) -> + ?ASSERT(sindex, Scale >= 0), + ?ASSERT(sindex, Scale =< 3), + ?ASSERT(sindex, Index =/= ?ESP), + {sindex, Scale, Index}. + +-record(sib, {sindex_opt, base :: integer()}). +sib(Base) when is_integer(Base) -> #sib{sindex_opt=none, base=Base}. +sib(Base, Sindex) when is_integer(Base) -> #sib{sindex_opt=Sindex, base=Base}. + +ea_disp32_base(Disp32, Base) when is_integer(Base) -> + ?ASSERT(ea_disp32_base, Base =/= ?ESP), + {ea_disp32_base, Disp32, Base}. +ea_disp32_sib(Disp32, SIB) -> {ea_disp32_sib, Disp32, SIB}. +ea_disp8_base(Disp8, Base) when is_integer(Base) -> + ?ASSERT(ea_disp8_base, Base =/= ?ESP), + {ea_disp8_base, Disp8, Base}. +ea_disp8_sib(Disp8, SIB) -> {ea_disp8_sib, Disp8, SIB}. +ea_base(Base) when is_integer(Base) -> + ?ASSERT(ea_base, Base =/= ?ESP), + ?ASSERT(ea_base, Base =/= ?EBP), + {ea_base, Base}. +ea_disp32_sindex(Disp32) -> {ea_disp32_sindex, Disp32, none}. +%% ea_disp32_sindex(Disp32, Sindex) -> {ea_disp32_sindex, Disp32, Sindex}. +ea_sib(SIB) -> + ?ASSERT(ea_sib, SIB#sib.base =/= ?EBP), + {ea_sib, SIB}. +%ea_disp32_rip(Disp32) -> {ea_disp32_rip, Disp32}. + +rm_reg(Reg) -> {rm_reg, Reg}. +rm_mem(EA) -> {rm_mem, EA}. + +mk_modrm(Mod, RO, RM) -> + {rex([{r,RO}, {b,RM}]), + (Mod bsl 6) bor ((RO band 2#111) bsl 3) bor (RM band 2#111)}. + +mk_sib(Scale, Index, Base) -> + {rex([{x,Index}, {b,Base}]), + (Scale bsl 6) bor ((Index band 2#111) bsl 3) bor (Base band 2#111)}. + +rex(REXs) -> {rex, rex_(REXs)}. +rex_([]) -> 0; +rex_([{r8, Reg8}| Rest]) -> % 8 bit registers + case Reg8 of + {rm_mem, _} -> rex_(Rest); + 4 -> (1 bsl 8) bor rex_(Rest); + 5 -> (1 bsl 8) bor rex_(Rest); + 6 -> (1 bsl 8) bor rex_(Rest); + 7 -> (1 bsl 8) bor rex_(Rest); + X when is_integer(X) -> rex_(Rest) + end; +rex_([{w, REXW}| Rest]) -> % 64-bit mode + (REXW bsl 3) bor rex_(Rest); +rex_([{r, ModRM_regRegister}| Rest]) when is_integer(ModRM_regRegister) -> + REXR = if (ModRM_regRegister > 7) -> 1; + true -> 0 + end, + (REXR bsl 2) bor rex_(Rest); +rex_([{x, SIB_indexRegister}| Rest]) when is_integer(SIB_indexRegister) -> + REXX = if (SIB_indexRegister > 7) -> 1; + true -> 0 + end, + (REXX bsl 1) bor rex_(Rest); +rex_([{b, OtherRegister}| Rest]) when is_integer(OtherRegister) -> + %% ModRM r/m, SIB base or opcode reg + REXB = if (OtherRegister > 7) -> 1; + true -> 0 + end, + REXB bor rex_(Rest). + +le16(Word, Tail) -> + [Word band 16#FF, (Word bsr 8) band 16#FF | Tail]. + +le32(Word, Tail) when is_integer(Word) -> + [Word band 16#FF, (Word bsr 8) band 16#FF, + (Word bsr 16) band 16#FF, (Word bsr 24) band 16#FF | Tail]; +le32({Tag,Val}, Tail) -> % a relocatable datum + [{le32,Tag,Val} | Tail]. + +le64(Word, Tail) when is_integer(Word) -> + [ Word band 16#FF, (Word bsr 8) band 16#FF, + (Word bsr 16) band 16#FF, (Word bsr 24) band 16#FF, + (Word bsr 32) band 16#FF, (Word bsr 40) band 16#FF, + (Word bsr 48) band 16#FF, (Word bsr 56) band 16#FF | Tail]; +le64({Tag,Val}, Tail) -> + [{le64,Tag,Val} | Tail]. + +enc_sindex_opt({sindex,Scale,Index}) -> {Scale, Index}; +enc_sindex_opt(none) -> {2#00, 2#100}. + +enc_sib(#sib{sindex_opt=SindexOpt, base=Base}) -> + {Scale, Index} = enc_sindex_opt(SindexOpt), + mk_sib(Scale, Index, Base). + +enc_ea(EA, RO, Tail) -> + case EA of + {ea_disp32_base, Disp32, Base} -> + [mk_modrm(2#10, RO, Base) | le32(Disp32, Tail)]; + {ea_disp32_sib, Disp32, SIB} -> + [mk_modrm(2#10, RO, 2#100), enc_sib(SIB) | le32(Disp32, Tail)]; + {ea_disp8_base, Disp8, Base} -> + [mk_modrm(2#01, RO, Base), Disp8 | Tail]; + {ea_disp8_sib, Disp8, SIB} -> + [mk_modrm(2#01, RO, 2#100), enc_sib(SIB), Disp8 | Tail]; + {ea_base, Base} -> + [mk_modrm(2#00, RO, Base) | Tail]; + {ea_disp32_sindex, Disp32, SindexOpt} -> + {Scale, Index} = enc_sindex_opt(SindexOpt), + SIB = mk_sib(Scale, Index, 2#101), + MODRM = mk_modrm(2#00, RO, 2#100), + [MODRM, SIB | le32(Disp32, Tail)]; + {ea_sib, SIB} -> + [mk_modrm(2#00, RO, 2#100), enc_sib(SIB) | Tail]; + {ea_disp32_rip, Disp32} -> + [mk_modrm(2#00, RO, 2#101) | le32(Disp32, Tail)] + end. + +encode_rm(RM, RO, Tail) -> + case RM of + {rm_reg, Reg} -> [mk_modrm(2#11, RO, Reg) | Tail]; + {rm_mem, EA} -> enc_ea(EA, RO, Tail) + end. + +%% sizeof_ea(EA) -> +%% case element(1, EA) of +%% ea_disp32_base -> 5; +%% ea_disp32_sib -> 6; +%% ea_disp8_base -> 2; +%% ea_disp8_sib -> 3; +%% ea_base -> 1; +%% ea_disp32_sindex -> 6; +%% ea_sib -> 2; +%% ea_disp32_rip -> 5 +%% end. + +%% sizeof_rm(RM) -> +%% case RM of +%% {rm_reg, _} -> 1; +%% {rm_mem, EA} -> sizeof_ea(EA) +%% end. + +%%% x87 stack postitions + +-define(ST0, 2#000). +-define(ST1, 2#001). +-define(ST2, 2#010). +-define(ST3, 2#011). +-define(ST4, 2#100). +-define(ST5, 2#101). +-define(ST6, 2#110). +-define(ST7, 2#111). + +st(0) -> ?ST0; +st(1) -> ?ST1; +st(2) -> ?ST2; +st(3) -> ?ST3; +st(4) -> ?ST4; +st(5) -> ?ST5; +st(6) -> ?ST6; +st(7) -> ?ST7. + + +%%% Instructions +%%% +%%% Insn ::= {Op,Opnds} +%%% Opnds ::= {Opnd1,...,Opndn} (n >= 0) +%%% Opnd ::= eax | ax | al | 1 | cl +%%% | {imm32,Imm32} | {imm16,Imm16} | {imm8,Imm8} +%%% | {rm32,RM32} | {rm16,RM16} | {rm8,RM8} +%%% | {rel32,Rel32} | {rel8,Rel8} +%%% | {moffs32,Moffs32} | {moffs16,Moffs16} | {moffs8,Moffs8} +%%% | {cc,CC} +%%% | {reg32,Reg32} | {reg16,Reg16} | {reg8,Reg8} +%%% | {ea,EA} + +-define(PFX_OPND_16BITS, 16#66). + +arith_binop_encode(SubOpcode, Opnds) -> + %% add, or, adc, sbb, and, sub, xor, cmp + case Opnds of + {eax, {imm32,Imm32}} -> + [16#05 bor (SubOpcode bsl 3) | le32(Imm32, [])]; + {{rm32,RM32}, {imm32,Imm32}} -> + [16#81 | encode_rm(RM32, SubOpcode, le32(Imm32, []))]; + {{rm32,RM32}, {imm8,Imm8}} -> + [16#83 | encode_rm(RM32, SubOpcode, [Imm8])]; + {{rm32,RM32}, {reg32,Reg32}} -> + [16#01 bor (SubOpcode bsl 3) | encode_rm(RM32, Reg32, [])]; + {{reg32,Reg32}, {rm32,RM32}} -> + [16#03 bor (SubOpcode bsl 3) | encode_rm(RM32, Reg32, [])]; + %% Below starts amd64 stuff with rex prefix + {rax, {imm32,Imm32}} -> + [rex([{w,1}]), 16#05 bor (SubOpcode bsl 3) | le32(Imm32, [])]; + {{rm64,RM64}, {imm32,Imm32}} -> + [rex([{w,1}]), 16#81 + | encode_rm(RM64, SubOpcode, le32(Imm32, []))]; + {{rm64,RM64}, {imm8,Imm8}} -> + [rex([{w,1}]), 16#83 | encode_rm(RM64, SubOpcode, [Imm8])]; + {{rm64,RM64}, {reg64,Reg64}} -> + [rex([{w,1}]), 16#01 bor (SubOpcode bsl 3) + | encode_rm(RM64, Reg64, [])]; + {{reg64,Reg64}, {rm64,RM64}} -> + [rex([{w,1}]), 16#03 bor (SubOpcode bsl 3) + | encode_rm(RM64, Reg64, [])] + end. + +sse2_arith_binop_encode(Prefix, Opcode, {{xmm, XMM64}, {rm64fp, RM64}}) -> + %% addpd, cmpsd, divsd, maxsd, minsd, mulsd, sqrtsd, subsd + [Prefix, 16#0F, Opcode | encode_rm(RM64, XMM64, [])]. + +sse2_cvtsi2sd_encode({{xmm,XMM64}, {rm64,RM64}}) -> + [rex([{w, 1}]), 16#F2, 16#0F, 16#2A | encode_rm(RM64, XMM64, [])]. + +sse2_mov_encode(Opnds) -> + case Opnds of + {{xmm, XMM64}, {rm64fp, RM64}} -> % movsd + [16#F2, 16#0F, 16#10 | encode_rm(RM64, XMM64, [])]; + {{rm64fp, RM64}, {xmm, XMM64}} -> % movsd + [16#F2, 16#0F, 16#11 | encode_rm(RM64, XMM64, [])] +% {{xmm, XMM64}, {rm64, RM64}} -> % cvtsi2sd +% [rex([{w, 1}]), 16#F2, 16#0F, 16#2A | encode_rm(RM64, XMM64, [])] + end. + +%% arith_binop_sizeof(Opnds) -> +%% %% add, or, adc, sbb, and, sub, xor, cmp +%% case Opnds of +%% {eax, {imm32,_}} -> +%% 1 + 4; +%% {{rm32,RM32}, {imm32,_}} -> +%% 1 + sizeof_rm(RM32) + 4; +%% {{rm32,RM32}, {imm8,_}} -> +%% 1 + sizeof_rm(RM32) + 1; +%% {{rm32,RM32}, {reg32,_}} -> +%% 1 + sizeof_rm(RM32); +%% {{reg32,_}, {rm32,RM32}} -> +%% 1 + sizeof_rm(RM32) +%% end. + +bs_op_encode(Opcode, {{reg32,Reg32}, {rm32,RM32}}) -> % bsf, bsr + [16#0F, Opcode | encode_rm(RM32, Reg32, [])]. + +%% bs_op_sizeof({{reg32,_}, {rm32,RM32}}) -> % bsf, bsr +%% 2 + sizeof_rm(RM32). + +bswap_encode(Opnds) -> + case Opnds of + {{reg32,Reg32}} -> + [rex([{b, Reg32}]), 16#0F, 16#C8 bor (Reg32 band 2#111)]; + {{reg64,Reg64}} -> + [rex([{w, 1}, {b, Reg64}]), 16#0F, 16#C8 bor (Reg64 band 2#111)] + end. + +%% bswap_sizeof({{reg32,_}}) -> +%% 2. + +bt_op_encode(SubOpcode, Opnds) -> % bt, btc, btr, bts + case Opnds of + {{rm32,RM32}, {reg32,Reg32}} -> + [16#0F, 16#A3 bor (SubOpcode bsl 3) | encode_rm(RM32, Reg32, [])]; + {{rm32,RM32}, {imm8,Imm8}} -> + [16#0F, 16#BA | encode_rm(RM32, SubOpcode, [Imm8])] + end. + +%% bt_op_sizeof(Opnds) -> % bt, btc, btr, bts +%% case Opnds of +%% {{rm32,RM32}, {reg32,_}} -> +%% 2 + sizeof_rm(RM32); +%% {{rm32,RM32}, {imm8,_}} -> +%% 2 + sizeof_rm(RM32) + 1 +%% end. + +call_encode(Opnds) -> + case Opnds of + {{rel32,Rel32}} -> + [16#E8 | le32(Rel32, [])]; +%%% {{rm32,RM32}} -> +%%% [16#FF | encode_rm(RM32, 2#010, [])]; + {{rm64,RM64}} -> % Defaults to 64 bits on amd64 + [16#FF | encode_rm(RM64, 2#010, [])] + end. + +%% call_sizeof(Opnds) -> +%% case Opnds of +%% {{rel32,_}} -> +%% 1 + 4; +%% {{rm32,RM32}} -> +%% 1 + sizeof_rm(RM32) +%% end. + +cbw_encode({}) -> + [?PFX_OPND_16BITS, 16#98]. + +cbw_sizeof({}) -> + 2. + +nullary_op_encode(Opcode, {}) -> + %% cdq, clc, cld, cmc, cwde, into, leave, nop, prefix_fs, stc, std + [Opcode]. + +nullary_op_sizeof({}) -> + %% cdq, clc, cld, cmc, cwde, into, leave, nop, prefix_fs, stc, std + 1. + +cmovcc_encode({{cc,CC}, {reg32,Reg32}, {rm32,RM32}}) -> + [16#0F, 16#40 bor CC | encode_rm(RM32, Reg32, [])]. + +%% cmovcc_sizeof({{cc,_}, {reg32,_}, {rm32,RM32}}) -> +%% 2 + sizeof_rm(RM32). + +incdec_encode(SubOpcode, Opnds) -> % SubOpcode is either 0 or 1 + case Opnds of + {{rm32,RM32}} -> + [16#FF | encode_rm(RM32, SubOpcode, [])]; + {{rm64,RM64}} -> + [rex([{w, 1}]), 16#FF | encode_rm(RM64, SubOpcode, [])] + end. + +%% incdec_sizeof(Opnds) -> +%% case Opnds of +%% {{rm32,RM32}} -> +%% 1 + sizeof_rm(RM32); +%% {{reg32,_}} -> +%% 1 +%% end. + +arith_unop_encode(Opcode, Opnds) -> % div, idiv, mul, neg, not + case Opnds of + {{rm32,RM32}} -> + [16#F7 | encode_rm(RM32, Opcode, [])]; + {{rm64,RM64}} -> + [rex([{w,1}]), 16#F7 | encode_rm(RM64, Opcode, [])] + end. + +%% arith_unop_sizeof({{rm32,RM32}}) -> % div, idiv, mul, neg, not +%% 1 + sizeof_rm(RM32). + +enter_encode({{imm16,Imm16}, {imm8,Imm8}}) -> + [16#C8 | le16(Imm16, [Imm8])]. + +enter_sizeof({{imm16,_}, {imm8,_}}) -> + 1 + 2 + 1. + +imul_encode(Opnds) -> + case Opnds of + {{rm32,RM32}} -> % *= rm32 + [16#F7 | encode_rm(RM32, 2#101, [])]; + {{rm64,RM64}} -> + [rex([{w,1}]), 16#F7 | encode_rm(RM64, 2#101, [])]; + {{reg32,Reg32}, {rm32,RM32}} -> % reg *= rm32 + [16#0F, 16#AF | encode_rm(RM32, Reg32, [])]; + {{reg64,Reg64}, {rm64,RM64}} -> + [rex([{w,1}]), 16#0F, 16#AF | encode_rm(RM64, Reg64, [])]; + {{reg32,Reg32}, {rm32,RM32}, {imm8,Imm8}} -> % reg := rm32 * sext(imm8) + [16#6B | encode_rm(RM32, Reg32, [Imm8])]; + {{reg64,Reg64}, {rm64,RM64}, {imm8,Imm8}} -> + [rex([{w,1}]), 16#6B | encode_rm(RM64, Reg64, [Imm8])]; + {{reg32,Reg32}, {rm32,RM32}, {imm32,Imm32}} -> % reg := rm32 * imm32 + [16#69 | encode_rm(RM32, Reg32, le32(Imm32, []))]; + {{reg64,Reg64}, {rm64,RM64}, {imm32,Imm32}} -> + [rex([{w,1}]), 16#69 | encode_rm(RM64, Reg64, le32(Imm32, []))] + end. + +%% imul_sizeof(Opnds) -> +%% case Opnds of +%% {{rm32,RM32}} -> +%% 1 + sizeof_rm(RM32); +%% {{reg32,_}, {rm32,RM32}} -> +%% 2 + sizeof_rm(RM32); +%% {{reg32,_}, {rm32,RM32}, {imm8,_}} -> +%% 1 + sizeof_rm(RM32) + 1; +%% {{reg32,_}, {rm32,RM32}, {imm32,_}} -> +%% 1 + sizeof_rm(RM32) + 4 +%% end. + +jcc_encode(Opnds) -> + case Opnds of + {{cc,CC}, {rel8,Rel8}} -> + [16#70 bor CC, Rel8]; + {{cc,CC}, {rel32,Rel32}} -> + [16#0F, 16#80 bor CC | le32(Rel32, [])] + end. + +jcc_sizeof(Opnds) -> + case Opnds of + {{cc,_}, {rel8,_}} -> + 2; + {{cc,_}, {rel32,_}} -> + 2 + 4 + end. + +jmp8_op_encode(Opcode, {{rel8,Rel8}}) -> % jecxz, loop, loope, loopne + [Opcode, Rel8]. + +jmp8_op_sizeof({{rel8,_}}) -> % jecxz, loop, loope, loopne + 2. + +jmp_encode(Opnds) -> + case Opnds of + {{rel8,Rel8}} -> + [16#EB, Rel8]; + {{rel32,Rel32}} -> + [16#E9 | le32(Rel32, [])]; +%%% {{rm32,RM32}} -> +%%% [16#FF | encode_rm(RM32, 2#100, [])] + {{rm64,RM64}} -> + [16#FF | encode_rm(RM64, 2#100, [])] + end. + +%% jmp_sizeof(Opnds) -> +%% case Opnds of +%% {{rel8,_}} -> +%% 2; +%% {{rel32,_}} -> +%% 1 + 4; +%% {{rm32,RM32}} -> +%% 1 + sizeof_rm(RM32) +%% end. + +lea_encode({{reg32,Reg32}, {ea,EA}}) -> + [16#8D | enc_ea(EA, Reg32, [])]; +lea_encode({{reg64,Reg64}, {ea,EA}}) -> + [rex([{w, 1}]), 16#8D | enc_ea(EA, Reg64, [])]. + +%% lea_sizeof({{reg32,_}, {ea,EA}}) -> +%% 1 + sizeof_ea(EA). + +mov_encode(Opnds) -> + case Opnds of + {{rm8,RM8}, {reg8,Reg8}} -> + [rex([{r8, RM8}, {r8, Reg8}]), 16#88 | encode_rm(RM8, Reg8, [])]; + {{rm16,RM16}, {reg16,Reg16}} -> + [?PFX_OPND_16BITS, 16#89 | encode_rm(RM16, Reg16, [])]; + {{rm32,RM32}, {reg32,Reg32}} -> + [16#89 | encode_rm(RM32, Reg32, [])]; + {{rm64,RM64}, {reg64,Reg64}} -> + [rex([{w, 1}]), 16#89 | encode_rm(RM64, Reg64, [])]; + {{reg8,Reg8}, {rm8,RM8}} -> + [rex([{r8, RM8}, {r8, Reg8}]), 16#8A | + encode_rm(RM8, Reg8, [])]; + {{reg16,Reg16}, {rm16,RM16}} -> + [?PFX_OPND_16BITS, 16#8B | encode_rm(RM16, Reg16, [])]; + {{reg32,Reg32}, {rm32,RM32}} -> + [16#8B | encode_rm(RM32, Reg32, [])]; + {{reg64,Reg64}, {rm64,RM64}} -> + [rex([{w, 1}]), 16#8B | encode_rm(RM64, Reg64, [])]; + {al, {moffs8,Moffs8}} -> + [16#A0 | le32(Moffs8, [])]; + {ax, {moffs16,Moffs16}} -> + [?PFX_OPND_16BITS, 16#A1 | le32(Moffs16, [])]; + {eax, {moffs32,Moffs32}} -> + [16#A1 | le32(Moffs32, [])]; + {rax, {moffs32,Moffs32}} -> + [rex([{w, 1}]), 16#A1 | le32(Moffs32, [])]; + {{moffs8,Moffs8}, al} -> + [16#A2 | le32(Moffs8, [])]; + {{moffs16,Moffs16}, ax} -> + [?PFX_OPND_16BITS, 16#A3 | le32(Moffs16, [])]; + {{moffs32,Moffs32}, eax} -> + [16#A3 | le32(Moffs32, [])]; + {{moffs32,Moffs32}, rax} -> + [rex([{w, 1}]), 16#A3 | le32(Moffs32, [])]; + {{reg8,Reg8}, {imm8,Imm8}} -> + [rex([{b, Reg8}, {r8, Reg8}]), 16#B0 bor (Reg8 band 2#111), Imm8]; + {{reg16,Reg16}, {imm16,Imm16}} -> + [?PFX_OPND_16BITS, rex([{b, Reg16}]), 16#B8 bor (Reg16 band 2#111) + | le16(Imm16, [])]; + {{reg32,Reg32}, {imm32,Imm32}} -> + [rex([{b, Reg32}]), 16#B8 bor (Reg32 band 2#111) + | le32(Imm32, [])]; + {{reg64,Reg64}, {imm64,Imm64}} -> + [rex([{w, 1}, {b, Reg64}]), 16#B8 bor (Reg64 band 2#111) + | le64(Imm64, [])]; + {{rm8,RM8}, {imm8,Imm8}} -> + [rex([{r8, RM8}]), 16#C6 | encode_rm(RM8, 2#000, [Imm8])]; + {{rm16,RM16}, {imm16,Imm16}} -> + [?PFX_OPND_16BITS, 16#C7 | + encode_rm(RM16, 2#000, le16(Imm16, []))]; + {{rm32,RM32}, {imm32,Imm32}} -> + [16#C7 | encode_rm(RM32, 2#000, le32(Imm32, []))]; + {{rm64,RM64}, {imm32,Imm32}} -> + [rex([{w, 1}]), 16#C7 | encode_rm(RM64, 2#000, le32(Imm32, []))] + end. + +%% mov_sizeof(Opnds) -> +%% case Opnds of +%% {{rm8,RM8}, {reg8,_}} -> +%% 1 + sizeof_rm(RM8); +%% {{rm16,RM16}, {reg16,_}} -> +%% 2 + sizeof_rm(RM16); +%% {{rm32,RM32}, {reg32,_}} -> +%% 1 + sizeof_rm(RM32); +%% {{reg8,_}, {rm8,RM8}} -> +%% 1 + sizeof_rm(RM8); +%% {{reg16,_}, {rm16,RM16}} -> +%% 2 + sizeof_rm(RM16); +%% {{reg32,_}, {rm32,RM32}} -> +%% 1 + sizeof_rm(RM32); +%% {al, {moffs8,_}} -> +%% 1 + 4; +%% {ax, {moffs16,_}} -> +%% 2 + 4; +%% {eax, {moffs32,_}} -> +%% 1 + 4; +%% {{moffs8,_}, al} -> +%% 1 + 4; +%% {{moffs16,_}, ax} -> +%% 2 + 4; +%% {{moffs32,_}, eax} -> +%% 1 + 4; +%% {{reg8,_}, {imm8,_}} -> +%% 2; +%% {{reg16,_}, {imm16,_}} -> +%% 2 + 2; +%% {{reg32,_}, {imm32,_}} -> +%% 1 + 4; +%% {{rm8,RM8}, {imm8,_}} -> +%% 1 + sizeof_rm(RM8) + 1; +%% {{rm16,RM16}, {imm16,_}} -> +%% 2 + sizeof_rm(RM16) + 2; +%% {{rm32,RM32}, {imm32,_}} -> +%% 1 + sizeof_rm(RM32) + 4 +%% end. + +movx_op_encode(Opcode, Opnds) -> % movsx, movzx + case Opnds of + {{reg16,Reg16}, {rm8,RM8}} -> + [?PFX_OPND_16BITS, rex([{r8, RM8}]), 16#0F, Opcode | + encode_rm(RM8, Reg16, [])]; + {{reg32,Reg32}, {rm8,RM8}} -> + [rex([{r8, RM8}]), 16#0F, Opcode | encode_rm(RM8, Reg32, [])]; + {{reg32,Reg32}, {rm16,RM16}} -> + [16#0F, Opcode bor 1 | encode_rm(RM16, Reg32, [])]; + {{reg64,Reg64}, {rm8,RM8}} -> + [rex([{w,1}]), 16#0F, Opcode | encode_rm(RM8, Reg64, [])]; + {{reg64,Reg64}, {rm16,RM16}} -> + [rex([{w,1}]), 16#0F, Opcode bor 1 | encode_rm(RM16, Reg64, [])]; + {{reg64,Reg64}, {rm32,RM32}} -> + %% This is magic... /Luna + [rex([{w,(1 band (Opcode bsr 3))}]), 16#63 | + encode_rm(RM32, Reg64, [])] + end. + +%% movx_op_sizeof(Opnds) -> +%% case Opnds of +%% {{reg16,_}, {rm8,RM8}} -> +%% 3 + sizeof_rm(RM8); +%% {{reg32,_}, {rm8,RM8}} -> +%% 1 + 2 + sizeof_rm(RM8); +%% {{reg32,_}, {rm16,RM16}} -> +%% 1 + 2 + sizeof_rm(RM16) +%% end. + +pop_encode(Opnds) -> + case Opnds of + {{rm64,RM64}} -> + [16#8F | encode_rm(RM64, 2#000, [])]; + {{reg64,Reg64}} -> + [rex([{b,Reg64}]),16#58 bor (Reg64 band 2#111)] + end. + +%% pop_sizeof(Opnds) -> +%% case Opnds of +%% {{rm32,RM32}} -> +%% 1 + sizeof_rm(RM32); +%% {{reg32,_}} -> +%% 1 +%% end. + +push_encode(Opnds) -> + case Opnds of +%%% {{rm32,RM32}} -> +%%% [16#FF | encode_rm(RM32, 2#110, [])]; + {{rm64,RM64}} -> + [16#FF | encode_rm(RM64, 2#110, [])]; +%%% {{reg32,Reg32}} -> +%%% [rex([{b, 1}]), 16#50 bor (Reg32 band 2#111)]; + {{reg64,Reg64}} -> + [rex([{b, Reg64}]), 16#50 bor (Reg64 band 2#111)]; + {{imm8,Imm8}} -> % sign-extended + [16#6A, Imm8]; + {{imm32,Imm32}} -> % Sign extended to 64 bits + [16#68 | le32(Imm32, [])] + end. + +%% push_sizeof(Opnds) -> +%% case Opnds of +%% {{rm32,RM32}} -> +%% 1 + sizeof_rm(RM32); +%% {{reg32,_}} -> +%% 1; +%% {{imm8,_}} -> +%% 2; +%% {{imm32,_}} -> +%% 1 + 4 +%% end. + +shift_op_encode(SubOpcode, Opnds) -> % rol, ror, rcl, rcr, shl, shr, sar + case Opnds of + {{rm32,RM32}, 1} -> + [16#D1 | encode_rm(RM32, SubOpcode, [])]; + {{rm32,RM32}, cl} -> + [16#D3 | encode_rm(RM32, SubOpcode, [])]; + {{rm32,RM32}, {imm8,Imm8}} -> + [16#C1 | encode_rm(RM32, SubOpcode, [Imm8])]; + {{rm64,RM64}, 1} -> + [rex([{w,1}]), 16#D1 | encode_rm(RM64, SubOpcode, [])]; + {{rm64,RM64}, cl} -> + [rex([{w,1}]), 16#D3 | encode_rm(RM64, SubOpcode, [])]; + {{rm64,RM64}, {imm8,Imm8}} -> + [rex([{w,1}]), 16#C1 | encode_rm(RM64, SubOpcode, [Imm8])] + end. + +%% shift_op_sizeof(Opnds) -> % rcl, rcr, rol, ror, sar, shl, shr +%% case Opnds of +%% {{rm32,RM32}, 1} -> +%% 1 + sizeof_rm(RM32); +%% {{rm32,RM32}, cl} -> +%% 1 + sizeof_rm(RM32); +%% {{rm32,RM32}, {imm8,_Imm8}} -> +%% 1 + sizeof_rm(RM32) + 1 +%% end. + +ret_encode(Opnds) -> + case Opnds of + {} -> + [16#C3]; + {{imm16,Imm16}} -> + [16#C2 | le16(Imm16, [])] + end. + +ret_sizeof(Opnds) -> + case Opnds of + {} -> + 1; + {{imm16,_}} -> + 1 + 2 + end. + +setcc_encode({{cc,CC}, {rm8,RM8}}) -> + [rex([{r8, RM8}]), 16#0F, 16#90 bor CC | encode_rm(RM8, 2#000, [])]. + +%% setcc_sizeof({{cc,_}, {rm8,RM8}}) -> +%% 2 + sizeof_rm(RM8). + +shd_op_encode(Opcode, Opnds) -> + case Opnds of + {{rm32,RM32}, {reg32,Reg32}, {imm8,Imm8}} -> + [16#0F, Opcode | encode_rm(RM32, Reg32, [Imm8])]; + {{rm32,RM32}, {reg32,Reg32}, cl} -> + [16#0F, Opcode bor 1 | encode_rm(RM32, Reg32, [])] + end. + +%% shd_op_sizeof(Opnds) -> +%% case Opnds of +%% {{rm32,RM32}, {reg32,_}, {imm8,_}} -> +%% 2 + sizeof_rm(RM32) + 1; +%% {{rm32,RM32}, {reg32,_}, cl} -> +%% 2 + sizeof_rm(RM32) +%% end. + +test_encode(Opnds) -> + case Opnds of + {eax, {imm32,Imm32}} -> + [16#A9 | le32(Imm32, [])]; + {{rm32,RM32}, {imm32,Imm32}} -> + [16#F7 | encode_rm(RM32, 2#000, le32(Imm32, []))]; + {{rm32,RM32}, {reg32,Reg32}} -> + [16#85 | encode_rm(RM32, Reg32, [])] + end. + +%% test_sizeof(Opnds) -> +%% case Opnds of +%% {eax, {imm32,_}} -> +%% 1 + 4; +%% {{rm32,RM32}, {imm32,_}} -> +%% 1 + sizeof_rm(RM32) + 4; +%% {{rm32,RM32}, {reg32,_}} -> +%% 1 + sizeof_rm(RM32) +%% end. + +fild_encode(Opnds) -> + %% The operand cannot be a register! + {{rm64, RM64}} = Opnds, + [16#DB | encode_rm(RM64, 2#000, [])]. + +%% fild_sizeof(Opnds) -> +%% {{rm32, RM32}} = Opnds, +%% 1 + sizeof_rm(RM32). + +fld_encode(Opnds) -> + case Opnds of + {{rm64fp, RM64fp}} -> + [16#DD | encode_rm(RM64fp, 2#000, [])]; + {{fpst, St}} -> + [16#D9, 16#C0 bor st(St)] + end. + +%% fld_sizeof(Opnds) -> +%% case Opnds of +%% {{rm64fp, RM64fp}} -> +%% 1 + sizeof_rm(RM64fp); +%% {{fpst, _}} -> +%% 2 +%% end. + +x87_comm_arith_encode(OpCode, Opnds) -> + %% fadd, fmul + case Opnds of + {{rm64fp, RM64fp}} -> + [16#DC | encode_rm(RM64fp, OpCode, [])]; + {{fpst,0}, {fpst,St}} -> + [16#D8, (16#C0 bor (OpCode bsl 3)) bor st(St)]; + {{fpst,St}, {fpst,0}} -> + [16#DC, (16#C0 bor (OpCode bsl 3)) bor st(St)] + end. + +x87_comm_arith_pop_encode(OpCode, Opnds) -> + %% faddp, fmulp + case Opnds of + [] -> + [16#DE, 16#C0 bor (OpCode bsl 3) bor st(1)]; + {{fpst,St},{fpst,0}} -> + [16#DE, 16#C0 bor (OpCode bsl 3) bor st(St)] + end. + +x87_arith_encode(OpCode, Opnds) -> + %% fdiv, fsub + case Opnds of + {{rm64fp, RM64fp}} -> + [16#DC | encode_rm(RM64fp, OpCode, [])]; + {{fpst,0}, {fpst,St}} -> + OpCode0 = OpCode band 2#110, + [16#D8, 16#C0 bor (OpCode0 bsl 3) bor st(St)]; + {{fpst,St}, {fpst,0}} -> + OpCode0 = OpCode bor 1, + [16#DC, 16#C0 bor (OpCode0 bsl 3) bor st(St)] + end. + +x87_arith_pop_encode(OpCode, Opnds) -> + %% fdivp, fsubp + OpCode0 = OpCode bor 1, + case Opnds of + [] -> + [16#DE, 16#C8 bor (OpCode0 bsl 3) bor st(1)]; + {{fpst,St}, {fpst,0}} -> + [16#DE, 16#C8 bor (OpCode0 bsl 3) bor st(St)] + end. + +x87_arith_rev_encode(OpCode, Opnds) -> + %% fdivr, fsubr + case Opnds of + {{rm64fp, RM64fp}} -> + [16#DC | encode_rm(RM64fp, OpCode, [])]; + {{fpst,0}, {fpst,St}} -> + OpCode0 = OpCode bor 1, + [16#D8, 16#C0 bor (OpCode0 bsl 3) bor st(St)]; + {{fpst,St}, {fpst,0}} -> + OpCode0 = OpCode band 2#110, + [16#DC, 16#C0 bor (OpCode0 bsl 3) bor st(St)] + end. + +x87_arith_rev_pop_encode(OpCode, Opnds) -> + %% fdivrp, fsubrp + OpCode0 = OpCode band 2#110, + case Opnds of + [] -> + [16#DE, 16#C0 bor (OpCode0 bsl 3) bor st(1)]; + {{fpst,St}, {fpst, 0}} -> + [16#DE, 16#C0 bor (OpCode0 bsl 3) bor st(St)] + end. + +%% x87_arith_sizeof(Opnds) -> +%% case Opnds of +%% {{rm64fp, RM64fp}} -> +%% 1 + sizeof_rm(RM64fp); +%% {{fpst,0}, {fpst,_}} -> +%% 2; +%% {{fpst,_}, {fpst,0}} -> +%% 2 +%% end. + +fst_encode(OpCode, Opnds) -> + case Opnds of + {{rm64fp, RM64fp}} -> + [16#DD | encode_rm(RM64fp, OpCode, [])]; + {{fpst, St}} -> + [16#DD, 16#C0 bor (OpCode bsl 3) bor st(St)] + end. + +%% fst_sizeof(Opnds) -> +%% case Opnds of +%% {{rm64fp, RM64fp}} -> +%% 1 + sizeof_rm(RM64fp); +%% {{fpst, _}} -> +%% 2 +%% end. + +fchs_encode() -> + [16#D9, 16#E0]. + +fchs_sizeof() -> + 2. + +ffree_encode({{fpst, St}})-> + [16#DD, 16#C0 bor st(St)]. + +ffree_sizeof() -> + 2. + +fwait_encode() -> + [16#9B]. + +fwait_sizeof() -> + 1. + +fxch_encode(Opnds) -> + case Opnds of + [] -> + [16#D9, 16#C8 bor st(1)]; + {{fpst, St}} -> + [16#D9, 16#C8 bor st(St)] + end. + +fxch_sizeof() -> + 2. + +insn_encode(Op, Opnds, Offset) -> + Bytes_and_REX = insn_encode_internal(Op, Opnds), + Bytes = fix_rex(Bytes_and_REX), + case has_relocs(Bytes) of + false -> % the common case + {Bytes, []}; + _ -> + fix_relocs(Bytes, Offset, [], []) + end. + +fix_rex(Bytes) -> + fix_rex(Bytes, 2#0100 bsl 4, []). + +fix_rex([{rex, REX} | Rest], REXAcc, Bytes) -> + fix_rex(Rest, REXAcc bor REX, Bytes); +fix_rex([{{rex, REX}, Byte} | Rest], REXAcc, Bytes) -> + fix_rex(Rest, REXAcc bor REX, [Byte | Bytes]); +fix_rex([Byte | Rest], REXAcc, Bytes) -> + fix_rex(Rest, REXAcc, [Byte | Bytes]); +fix_rex([], 2#01000000, Bytes) -> % no rex prefix + lists:reverse(Bytes); +fix_rex([], REX0, Bytes) -> % rex prefix... + REX = REX0 band 16#FF, % for 8 bit registers + [Head|Tail] = lists:reverse(Bytes), + case Head of + 16#66 -> % ...and 16 bit/sse2 prefix + [16#66, REX | Tail]; + 16#F2 -> % ...and sse2 prefix + [16#F2, REX | Tail]; + _ -> % ...only + [REX, Head | Tail] + end. + +has_relocs([{le32,_,_}|_]) -> true; +has_relocs([{le64,_,_}|_]) -> true; +has_relocs([_|Bytes]) -> has_relocs(Bytes); +has_relocs([]) -> false. + +fix_relocs([{le32,Tag,Val}|Bytes], Offset, Code, Relocs) -> + fix_relocs(Bytes, Offset+4, + [16#00, 16#00, 16#00, 16#00 | Code], + [{Tag,Offset,Val}|Relocs]); +fix_relocs([{le64,Tag,Val}|Bytes], Offset, Code, Relocs) -> + fix_relocs(Bytes, Offset+8, + [16#00, 16#00, 16#00, 16#00, + 16#00, 16#00, 16#00, 16#00 | Code], + [{Tag,Offset,Val}|Relocs]); +fix_relocs([Byte|Bytes], Offset, Code, Relocs) -> + fix_relocs(Bytes, Offset+1, [Byte|Code], Relocs); +fix_relocs([], _Offset, Code, Relocs) -> + {lists:reverse(Code), lists:reverse(Relocs)}. + +insn_encode_internal(Op, Opnds) -> + case Op of + 'adc' -> arith_binop_encode(2#010, Opnds); + 'add' -> arith_binop_encode(2#000, Opnds); + 'and' -> arith_binop_encode(2#100, Opnds); + 'bsf' -> bs_op_encode(16#BC, Opnds); + 'bsr' -> bs_op_encode(16#BD, Opnds); + 'bswap' -> bswap_encode(Opnds); + 'bt' -> bt_op_encode(2#100, Opnds); + 'btc' -> bt_op_encode(2#111, Opnds); + 'btr' -> bt_op_encode(2#110, Opnds); + 'bts' -> bt_op_encode(2#101, Opnds); + 'call' -> call_encode(Opnds); + 'cbw' -> cbw_encode(Opnds); + 'cdq' -> nullary_op_encode(16#99, Opnds); + 'clc' -> nullary_op_encode(16#F8, Opnds); + 'cld' -> nullary_op_encode(16#FC, Opnds); + 'cmc' -> nullary_op_encode(16#F5, Opnds); + 'cmovcc' -> cmovcc_encode(Opnds); + 'cmp' -> arith_binop_encode(2#111, Opnds); + 'cwde' -> nullary_op_encode(16#98, Opnds); + 'dec' -> incdec_encode(2#001, Opnds); + 'div' -> arith_unop_encode(2#110, Opnds); + 'enter' -> enter_encode(Opnds); + 'idiv' -> arith_unop_encode(2#111, Opnds); + 'imul' -> imul_encode(Opnds); + 'inc' -> incdec_encode(2#000, Opnds); + 'into' -> case get(hipe_target_arch) of + x86 -> nullary_op_encode(16#CE, Opnds); + amd64 -> exit({invalid_amd64_opcode, + hipe_amd64_encode__erl}) + end; + 'jcc' -> jcc_encode(Opnds); + 'jecxz' -> jmp8_op_encode(16#E3, Opnds); + 'jmp' -> jmp_encode(Opnds); + 'lea' -> lea_encode(Opnds); + 'leave' -> nullary_op_encode(16#C9, Opnds); + 'loop' -> jmp8_op_encode(16#E2, Opnds); + 'loope' -> jmp8_op_encode(16#E1, Opnds); + 'loopne' -> jmp8_op_encode(16#E0, Opnds); + 'mov' -> mov_encode(Opnds); + 'movsx' -> movx_op_encode(16#BE, Opnds); + 'movzx' -> movx_op_encode(16#B6, Opnds); + 'mul' -> arith_unop_encode(2#100, Opnds); + 'neg' -> arith_unop_encode(2#011, Opnds); + 'nop' -> nullary_op_encode(16#90, Opnds); + 'not' -> arith_unop_encode(2#010, Opnds); + 'or' -> arith_binop_encode(2#001, Opnds); + 'pop' -> pop_encode(Opnds); + 'prefix_fs' -> nullary_op_encode(16#64, Opnds); + 'push' -> push_encode(Opnds); + 'rcl' -> shift_op_encode(2#010, Opnds); + 'rcr' -> shift_op_encode(2#011, Opnds); + 'ret' -> ret_encode(Opnds); + 'rol' -> shift_op_encode(2#000, Opnds); + 'ror' -> shift_op_encode(2#001, Opnds); + 'sar' -> shift_op_encode(2#111, Opnds); + 'sbb' -> arith_binop_encode(2#011, Opnds); + 'setcc' -> setcc_encode(Opnds); + 'shl' -> shift_op_encode(2#100, Opnds); + 'shld' -> shd_op_encode(16#A4, Opnds); + 'shr' -> shift_op_encode(2#101, Opnds); + 'shrd' -> shd_op_encode(16#AC, Opnds); + 'stc' -> nullary_op_encode(16#F9, Opnds); + 'std' -> nullary_op_encode(16#FD, Opnds); + 'sub' -> arith_binop_encode(2#101, Opnds); + 'test' -> test_encode(Opnds); + 'xor' -> arith_binop_encode(2#110, Opnds); + + %% sse2 + 'addsd' -> sse2_arith_binop_encode(16#F2, 16#58, Opnds); + 'cmpsd' -> sse2_arith_binop_encode(16#F2, 16#C2, Opnds); + 'comisd' -> sse2_arith_binop_encode(16#66, 16#2F, Opnds); + 'cvtsi2sd' -> sse2_cvtsi2sd_encode(Opnds); + 'divsd' -> sse2_arith_binop_encode(16#F2, 16#5E, Opnds); + 'maxsd' -> sse2_arith_binop_encode(16#F2, 16#5F, Opnds); + 'minsd' -> sse2_arith_binop_encode(16#F2, 16#5D, Opnds); + 'movsd' -> sse2_mov_encode(Opnds); + 'mulsd' -> sse2_arith_binop_encode(16#F2, 16#59, Opnds); + 'sqrtsd' -> sse2_arith_binop_encode(16#F2, 16#51, Opnds); + 'subsd' -> sse2_arith_binop_encode(16#F2, 16#5C, Opnds); + 'ucomisd' -> sse2_arith_binop_encode(16#66, 16#2E, Opnds); + 'xorpd' -> sse2_arith_binop_encode(16#66, 16#57, Opnds); + %% End of sse2 + + %% x87 + 'fadd' -> x87_comm_arith_encode(2#000, Opnds); + 'faddp' -> x87_comm_arith_pop_encode(2#000, Opnds); + 'fchs' -> fchs_encode(); + 'fdiv' -> x87_arith_encode(2#110, Opnds); + 'fdivp' -> x87_arith_pop_encode(2#110, Opnds); + 'fdivr' -> x87_arith_rev_encode(2#111, Opnds); + 'fdivrp' -> x87_arith_rev_pop_encode(2#111, Opnds); + 'ffree' -> ffree_encode(Opnds); + 'fild' -> fild_encode(Opnds); + 'fld' -> fld_encode(Opnds); + 'fmul' -> x87_comm_arith_encode(2#001, Opnds); + 'fmulp' -> x87_comm_arith_pop_encode(2#001, Opnds); + 'fst' -> fst_encode(2#010, Opnds); + 'fstp' -> fst_encode(2#011, Opnds); + 'fsub' -> x87_arith_encode(2#100, Opnds); + 'fsubp' -> x87_arith_pop_encode(2#100, Opnds); + 'fsubr' -> x87_arith_rev_encode(2#101, Opnds); + 'fsubrp' -> x87_arith_rev_pop_encode(2#101, Opnds); + 'fwait' -> fwait_encode(); + 'fxch' -> fxch_encode(Opnds); + %% End of x87 + + _ -> exit({?MODULE,insn_encode,Op}) + end. + +insn_sizeof(Op, Opnds) -> + case Op of + 'cbw' -> cbw_sizeof(Opnds); + 'cdq' -> nullary_op_sizeof(Opnds); + 'clc' -> nullary_op_sizeof(Opnds); + 'cld' -> nullary_op_sizeof(Opnds); + 'cmc' -> nullary_op_sizeof(Opnds); + 'cwde' -> nullary_op_sizeof(Opnds); + 'enter' -> enter_sizeof(Opnds); + 'into' -> nullary_op_sizeof(Opnds); + 'jcc' -> jcc_sizeof(Opnds); + 'jecxz' -> jmp8_op_sizeof(Opnds); + 'leave' -> nullary_op_sizeof(Opnds); + 'loop' -> jmp8_op_sizeof(Opnds); + 'loope' -> jmp8_op_sizeof(Opnds); + 'loopne' -> jmp8_op_sizeof(Opnds); + 'nop' -> nullary_op_sizeof(Opnds); + 'prefix_fs' -> nullary_op_sizeof(Opnds); + 'ret' -> ret_sizeof(Opnds); + 'stc' -> nullary_op_sizeof(Opnds); + 'std' -> nullary_op_sizeof(Opnds); + +%% %% x87 +%% 'fadd' -> x87_arith_sizeof(Opnds); +%% 'faddp' -> x87_arith_sizeof(Opnds); + 'fchs' -> fchs_sizeof(); +%% 'fdiv' -> x87_arith_sizeof(Opnds); +%% 'fdivp' -> x87_arith_sizeof(Opnds); +%% 'fdivr' -> x87_arith_sizeof(Opnds); +%% 'fdivrp' -> x87_arith_sizeof(Opnds); + 'ffree' -> ffree_sizeof(); +%% 'fild' -> fild_sizeof(Opnds); +%% 'fld' -> fld_sizeof(Opnds); +%% 'fmul' -> x87_arith_sizeof(Opnds); +%% 'fmulp' -> x87_arith_sizeof(Opnds); +%% 'fst' -> fst_sizeof(Opnds); +%% 'fstp' -> fst_sizeof(Opnds); +%% 'fsub' -> x87_arith_sizeof(Opnds); +%% 'fsubp' -> x87_arith_sizeof(Opnds); +%% 'fsubr' -> x87_arith_sizeof(Opnds); +%% 'fsubrp' -> x87_arith_sizeof(Opnds); + 'fwait' -> fwait_sizeof(); + 'fxch' -> fxch_sizeof(); +%% %% End of x87 + _ -> %% Hack that is to be removed some day... Maybe... + {Bytes, _} = insn_encode(Op, Opnds, 0), + length(Bytes) +%% 'adc' -> arith_binop_sizeof(Opnds); +%% 'add' -> arith_binop_sizeof(Opnds); +%% 'and' -> arith_binop_sizeof(Opnds); +%% 'bsf' -> bs_op_sizeof(Opnds); +%% 'bsr' -> bs_op_sizeof(Opnds); +%% 'bswap' -> bswap_sizeof(Opnds); +%% 'bt' -> bt_op_sizeof(Opnds); +%% 'btc' -> bt_op_sizeof(Opnds); +%% 'btr' -> bt_op_sizeof(Opnds); +%% 'bts' -> bt_op_sizeof(Opnds); +%% 'call' -> call_sizeof(Opnds); +%% 'cmovcc' -> cmovcc_sizeof(Opnds); +%% 'cmp' -> arith_binop_sizeof(Opnds); +%% 'dec' -> incdec_sizeof(Opnds); +%% 'div' -> arith_unop_sizeof(Opnds); +%% 'idiv' -> arith_unop_sizeof(Opnds); +%% 'imul' -> imul_sizeof(Opnds); +%% 'inc' -> incdec_sizeof(Opnds); +%% 'jmp' -> jmp_sizeof(Opnds); +%% 'lea' -> lea_sizeof(Opnds); +%% 'mov' -> mov_sizeof(Opnds); +%% 'movsx' -> movx_op_sizeof(Opnds); +%% 'movzx' -> movx_op_sizeof(Opnds); +%% 'mul' -> arith_unop_sizeof(Opnds); +%% 'neg' -> arith_unop_sizeof(Opnds); +%% 'not' -> arith_unop_sizeof(Opnds); +%% 'or' -> arith_binop_sizeof(Opnds); +%% 'pop' -> pop_sizeof(Opnds); +%% 'push' -> push_sizeof(Opnds); +%% 'rcl' -> shift_op_sizeof(Opnds); +%% 'rcr' -> shift_op_sizeof(Opnds); +%% 'rol' -> shift_op_sizeof(Opnds); +%% 'ror' -> shift_op_sizeof(Opnds); +%% 'sar' -> shift_op_sizeof(Opnds); +%% 'sbb' -> arith_binop_sizeof(Opnds); +%% 'setcc' -> setcc_sizeof(Opnds); +%% 'shl' -> shift_op_sizeof(Opnds); +%% 'shld' -> shd_op_sizeof(Opnds); +%% 'shr' -> shift_op_sizeof(Opnds); +%% 'shrd' -> shd_op_sizeof(Opnds); +%% 'sub' -> arith_binop_sizeof(Opnds); +%% 'test' -> test_sizeof(Opnds); +%% 'xor' -> arith_binop_sizeof(Opnds); +%% _ -> exit({?MODULE,insn_sizeof,Op}) + end. + +%%===================================================================== +%% testing interface +%%===================================================================== + +-ifdef(DO_HIPE_AMD64_ENCODE_TEST). + +say(OS, Str) -> + file:write(OS, Str). + +digit16(Dig0) -> + Dig = Dig0 band 16#F, + if Dig >= 16#A -> $A + (Dig - 16#A); + true -> $0 + Dig + end. + +say_byte(OS, Byte) -> + say(OS, "0x"), + say(OS, [digit16(Byte bsr 4)]), + say(OS, [digit16(Byte)]). + +init(OS) -> + say(OS, "\t.text\n"). + +say_bytes(OS, Byte0, Bytes0) -> + say_byte(OS, Byte0), + case Bytes0 of + [] -> + say(OS, "\n"); + [Byte1|Bytes1] -> + say(OS, ","), + say_bytes(OS, Byte1, Bytes1) + end. + +t(OS, Op, Opnds) -> + insn_sizeof(Op, Opnds), + {[Byte|Bytes],[]} = insn_encode(Op, Opnds, 0), + say(OS, "\t.byte "), + say_bytes(OS, Byte, Bytes). + +dotest1(OS) -> + init(OS), + % exercise all rm32 types + t(OS,lea,{{reg32,?EAX},{ea,ea_disp32_rip(16#87654321)}}), + t(OS,lea,{{reg32,?EAX},{ea,ea_sib(sib(?ECX))}}), + t(OS,lea,{{reg32,?EAX},{ea,ea_sib(sib(?ECX,sindex(2#10,?EDI)))}}), + t(OS,lea,{{reg32,?EAX},{ea,ea_disp32_sindex(16#87654321)}}), + t(OS,lea,{{reg32,?EAX},{ea,ea_disp32_sindex(16#87654321,sindex(2#10,?EDI))}}), + t(OS,lea,{{reg32,?EAX},{ea,ea_base(?ECX)}}), + t(OS,lea,{{reg32,?EAX},{ea,ea_disp8_sib(16#03,sib(?ECX))}}), + t(OS,lea,{{reg32,?EAX},{ea,ea_disp8_sib(16#03,sib(?ECX,sindex(2#10,?EDI)))}}), + t(OS,lea,{{reg32,?EAX},{ea,ea_disp8_base(16#3,?ECX)}}), + t(OS,lea,{{reg32,?EAX},{ea,ea_disp32_sib(16#87654321,sib(?ECX))}}), + t(OS,lea,{{reg32,?EAX},{ea,ea_disp32_sib(16#87654321,sib(?ECX,sindex(2#10,?EDI)))}}), + t(OS,lea,{{reg32,?EAX},{ea,ea_disp32_base(16#87654321,?EBP)}}), + t(OS,call,{{rm32,rm_reg(?EAX)}}), + t(OS,call,{{rm32,rm_mem(ea_disp32_sindex(16#87654321,sindex(2#10,?EDI)))}}), + t(OS,call,{{rel32,-5}}), + % default parameters for the tests below + Word32 = 16#87654321, + Word16 = 16#F00F, + Word8 = 16#80, + Imm32 = {imm32,Word32}, + Imm16 = {imm16,Word16}, + Imm8 = {imm8,Word8}, + RM32 = {rm32,rm_reg(?EDX)}, + RM16 = {rm16,rm_reg(?EDX)}, + RM8 = {rm8,rm_reg(?EDX)}, + Rel32 = {rel32,Word32}, + Rel8 = {rel8,Word8}, + Moffs32 = {moffs32,Word32}, + Moffs16 = {moffs16,Word32}, + Moffs8 = {moffs8,Word32}, + CC = {cc,?CC_G}, + Reg32 = {reg32,?EAX}, + Reg16 = {reg16,?EAX}, + Reg8 = {reg8,?AH}, + EA = {ea,ea_base(?ECX)}, + % exercise each instruction definition + t(OS,'adc',{eax,Imm32}), + t(OS,'adc',{RM32,Imm32}), + t(OS,'adc',{RM32,Imm8}), + t(OS,'adc',{RM32,Reg32}), + t(OS,'adc',{Reg32,RM32}), + t(OS,'add',{eax,Imm32}), + t(OS,'add',{RM32,Imm32}), + t(OS,'add',{RM32,Imm8}), + t(OS,'add',{RM32,Reg32}), + t(OS,'add',{Reg32,RM32}), + t(OS,'and',{eax,Imm32}), + t(OS,'and',{RM32,Imm32}), + t(OS,'and',{RM32,Imm8}), + t(OS,'and',{RM32,Reg32}), + t(OS,'and',{Reg32,RM32}), + t(OS,'bsf',{Reg32,RM32}), + t(OS,'bsr',{Reg32,RM32}), + t(OS,'bswap',{Reg32}), + t(OS,'bt',{RM32,Reg32}), + t(OS,'bt',{RM32,Imm8}), + t(OS,'btc',{RM32,Reg32}), + t(OS,'btc',{RM32,Imm8}), + t(OS,'btr',{RM32,Reg32}), + t(OS,'btr',{RM32,Imm8}), + t(OS,'bts',{RM32,Reg32}), + t(OS,'bts',{RM32,Imm8}), + t(OS,'call',{Rel32}), + t(OS,'call',{RM32}), + t(OS,'cbw',{}), + t(OS,'cdq',{}), + t(OS,'clc',{}), + t(OS,'cld',{}), + t(OS,'cmc',{}), + t(OS,'cmovcc',{CC,Reg32,RM32}), + t(OS,'cmp',{eax,Imm32}), + t(OS,'cmp',{RM32,Imm32}), + t(OS,'cmp',{RM32,Imm8}), + t(OS,'cmp',{RM32,Reg32}), + t(OS,'cmp',{Reg32,RM32}), + t(OS,'cwde',{}), + t(OS,'dec',{RM32}), + t(OS,'dec',{Reg32}), + t(OS,'div',{RM32}), + t(OS,'enter',{Imm16,{imm8,3}}), + t(OS,'idiv',{RM32}), + t(OS,'imul',{RM32}), + t(OS,'imul',{Reg32,RM32}), + t(OS,'imul',{Reg32,RM32,Imm8}), + t(OS,'imul',{Reg32,RM32,Imm32}), + t(OS,'inc',{RM32}), + t(OS,'inc',{Reg32}), + t(OS,'into',{}), + t(OS,'jcc',{CC,Rel8}), + t(OS,'jcc',{CC,Rel32}), + t(OS,'jecxz',{Rel8}), + t(OS,'jmp',{Rel8}), + t(OS,'jmp',{Rel32}), + t(OS,'jmp',{RM32}), + t(OS,'lea',{Reg32,EA}), + t(OS,'leave',{}), + t(OS,'loop',{Rel8}), + t(OS,'loope',{Rel8}), + t(OS,'loopne',{Rel8}), + t(OS,'mov',{RM8,Reg8}), + t(OS,'mov',{RM16,Reg16}), + t(OS,'mov',{RM32,Reg32}), + t(OS,'mov',{Reg8,RM8}), + t(OS,'mov',{Reg16,RM16}), + t(OS,'mov',{Reg32,RM32}), + t(OS,'mov',{al,Moffs8}), + t(OS,'mov',{ax,Moffs16}), + t(OS,'mov',{eax,Moffs32}), + t(OS,'mov',{Moffs8,al}), + t(OS,'mov',{Moffs16,ax}), + t(OS,'mov',{Moffs32,eax}), + t(OS,'mov',{Reg8,Imm8}), + t(OS,'mov',{Reg16,Imm16}), + t(OS,'mov',{Reg32,Imm32}), + t(OS,'mov',{RM8,Imm8}), + t(OS,'mov',{RM16,Imm16}), + t(OS,'mov',{RM32,Imm32}), + t(OS,'movsx',{Reg16,RM8}), + t(OS,'movsx',{Reg32,RM8}), + t(OS,'movsx',{Reg32,RM16}), + t(OS,'movzx',{Reg16,RM8}), + t(OS,'movzx',{Reg32,RM8}), + t(OS,'movzx',{Reg32,RM16}), + t(OS,'mul',{RM32}), + t(OS,'neg',{RM32}), + t(OS,'nop',{}), + t(OS,'not',{RM32}), + t(OS,'or',{eax,Imm32}), + t(OS,'or',{RM32,Imm32}), + t(OS,'or',{RM32,Imm8}), + t(OS,'or',{RM32,Reg32}), + t(OS,'or',{Reg32,RM32}), + t(OS,'pop',{RM32}), + t(OS,'pop',{Reg32}), + t(OS,'push',{RM32}), + t(OS,'push',{Reg32}), + t(OS,'push',{Imm8}), + t(OS,'push',{Imm32}), + t(OS,'rcl',{RM32,1}), + t(OS,'rcl',{RM32,cl}), + t(OS,'rcl',{RM32,Imm8}), + t(OS,'rcr',{RM32,1}), + t(OS,'rcr',{RM32,cl}), + t(OS,'rcr',{RM32,Imm8}), + t(OS,'ret',{}), + t(OS,'ret',{Imm16}), + t(OS,'rol',{RM32,1}), + t(OS,'rol',{RM32,cl}), + t(OS,'rol',{RM32,Imm8}), + t(OS,'ror',{RM32,1}), + t(OS,'ror',{RM32,cl}), + t(OS,'ror',{RM32,Imm8}), + t(OS,'sar',{RM32,1}), + t(OS,'sar',{RM32,cl}), + t(OS,'sar',{RM32,Imm8}), + t(OS,'sbb',{eax,Imm32}), + t(OS,'sbb',{RM32,Imm32}), + t(OS,'sbb',{RM32,Imm8}), + t(OS,'sbb',{RM32,Reg32}), + t(OS,'sbb',{Reg32,RM32}), + t(OS,'setcc',{CC,RM8}), + t(OS,'shl',{RM32,1}), + t(OS,'shl',{RM32,cl}), + t(OS,'shl',{RM32,Imm8}), + t(OS,'shld',{RM32,Reg32,Imm8}), + t(OS,'shld',{RM32,Reg32,cl}), + t(OS,'shr',{RM32,1}), + t(OS,'shr',{RM32,cl}), + t(OS,'shr',{RM32,Imm8}), + t(OS,'shrd',{RM32,Reg32,Imm8}), + t(OS,'shrd',{RM32,Reg32,cl}), + t(OS,'stc',{}), + t(OS,'std',{}), + t(OS,'sub',{eax,Imm32}), + t(OS,'sub',{RM32,Imm32}), + t(OS,'sub',{RM32,Imm8}), + t(OS,'sub',{RM32,Reg32}), + t(OS,'sub',{Reg32,RM32}), + t(OS,'test',{eax,Imm32}), + t(OS,'test',{RM32,Imm32}), + t(OS,'test',{RM32,Reg32}), + t(OS,'xor',{eax,Imm32}), + t(OS,'xor',{RM32,Imm32}), + t(OS,'xor',{RM32,Imm8}), + t(OS,'xor',{RM32,Reg32}), + t(OS,'xor',{Reg32,RM32}), + t(OS,'prefix_fs',{}), t(OS,'add',{{reg32,?EAX},{rm32,rm_mem(ea_disp32_rip(16#20))}}), + []. + +dotest() -> dotest1(group_leader()). % stdout == group_leader + +dotest(File) -> + {ok,OS} = file:open(File, [write]), + dotest1(OS), + file:close(OS). +-endif. diff --git a/lib/hipe/amd64/hipe_amd64_frame.erl b/lib/hipe/amd64/hipe_amd64_frame.erl new file mode 100644 index 0000000000..2bc319f40f --- /dev/null +++ b/lib/hipe/amd64/hipe_amd64_frame.erl @@ -0,0 +1,20 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2004-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +-include("../x86/hipe_x86_frame.erl"). diff --git a/lib/hipe/amd64/hipe_amd64_liveness.erl b/lib/hipe/amd64/hipe_amd64_liveness.erl new file mode 100644 index 0000000000..be878773cb --- /dev/null +++ b/lib/hipe/amd64/hipe_amd64_liveness.erl @@ -0,0 +1,20 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2004-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +-include("../x86/hipe_x86_liveness.erl"). diff --git a/lib/hipe/amd64/hipe_amd64_main.erl b/lib/hipe/amd64/hipe_amd64_main.erl new file mode 100644 index 0000000000..4de7364170 --- /dev/null +++ b/lib/hipe/amd64/hipe_amd64_main.erl @@ -0,0 +1,20 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2004-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +-include("../x86/hipe_x86_main.erl"). diff --git a/lib/hipe/amd64/hipe_amd64_pp.erl b/lib/hipe/amd64/hipe_amd64_pp.erl new file mode 100644 index 0000000000..93ed7b9073 --- /dev/null +++ b/lib/hipe/amd64/hipe_amd64_pp.erl @@ -0,0 +1,20 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2004-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +-include("../x86/hipe_x86_pp.erl"). diff --git a/lib/hipe/amd64/hipe_amd64_ra.erl b/lib/hipe/amd64/hipe_amd64_ra.erl new file mode 100644 index 0000000000..b9ac0338f0 --- /dev/null +++ b/lib/hipe/amd64/hipe_amd64_ra.erl @@ -0,0 +1,20 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2004-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +-include("../x86/hipe_x86_ra.erl"). diff --git a/lib/hipe/amd64/hipe_amd64_ra_finalise.erl b/lib/hipe/amd64/hipe_amd64_ra_finalise.erl new file mode 100644 index 0000000000..a6a787c340 --- /dev/null +++ b/lib/hipe/amd64/hipe_amd64_ra_finalise.erl @@ -0,0 +1,20 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2004-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +-include("../x86/hipe_x86_ra_finalise.erl"). diff --git a/lib/hipe/amd64/hipe_amd64_ra_ls.erl b/lib/hipe/amd64/hipe_amd64_ra_ls.erl new file mode 100644 index 0000000000..7ff2a7c082 --- /dev/null +++ b/lib/hipe/amd64/hipe_amd64_ra_ls.erl @@ -0,0 +1,20 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2004-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +-include("../x86/hipe_x86_ra_ls.erl"). diff --git a/lib/hipe/amd64/hipe_amd64_ra_naive.erl b/lib/hipe/amd64/hipe_amd64_ra_naive.erl new file mode 100644 index 0000000000..194ea8b597 --- /dev/null +++ b/lib/hipe/amd64/hipe_amd64_ra_naive.erl @@ -0,0 +1,20 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2005-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +-include("../x86/hipe_x86_ra_naive.erl"). diff --git a/lib/hipe/amd64/hipe_amd64_ra_postconditions.erl b/lib/hipe/amd64/hipe_amd64_ra_postconditions.erl new file mode 100644 index 0000000000..ef3c284c45 --- /dev/null +++ b/lib/hipe/amd64/hipe_amd64_ra_postconditions.erl @@ -0,0 +1,20 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2004-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +-include("../x86/hipe_x86_ra_postconditions.erl"). diff --git a/lib/hipe/amd64/hipe_amd64_ra_sse2_postconditions.erl b/lib/hipe/amd64/hipe_amd64_ra_sse2_postconditions.erl new file mode 100644 index 0000000000..9ed3d01a56 --- /dev/null +++ b/lib/hipe/amd64/hipe_amd64_ra_sse2_postconditions.erl @@ -0,0 +1,188 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2004-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +-module(hipe_amd64_ra_sse2_postconditions). + +-export([check_and_rewrite/2]). + +-include("../x86/hipe_x86.hrl"). +-define(HIPE_INSTRUMENT_COMPILER, true). +-include("../main/hipe.hrl"). +-define(count_temp(T), ?cons_counter(counter_mfa_mem_temps, T)). + + +check_and_rewrite(AMD64Defun, Coloring) -> + %%io:format("Converting\n"), + TempMap = hipe_temp_map:cols2tuple(Coloring,hipe_amd64_specific_sse2), + %%io:format("Rewriting\n"), + #defun{code=Code0} = AMD64Defun, + {Code1, DidSpill} = do_insns(Code0, TempMap, [], false), + {AMD64Defun#defun{code=Code1, var_range={0, hipe_gensym:get_var(x86)}}, + DidSpill}. + +do_insns([I|Insns], TempMap, Accum, DidSpill0) -> + {NewIs, DidSpill1} = do_insn(I, TempMap), + do_insns(Insns, TempMap, lists:reverse(NewIs, Accum), DidSpill0 or DidSpill1); +do_insns([], _TempMap, Accum, DidSpill) -> + {lists:reverse(Accum), DidSpill}. + +do_insn(I, TempMap) -> % Insn -> {Insn list, DidSpill} + case I of + #fmove{} -> + do_fmove(I, TempMap); + #fp_unop{} -> + do_fp_unop(I, TempMap); + #fp_binop{} -> + do_fp_binop(I, TempMap); + _ -> + %% All non sse2 ops + {[I], false} + end. + +%%% Fix an fp_binop. +do_fp_binop(I, TempMap) -> + #fp_binop{src=Src,dst=Dst} = I, + case is_mem_opnd(Dst, TempMap) of + true -> + Tmp = clone(Dst), + {[#fmove{src=Dst, dst=Tmp}, + I#fp_binop{src=Src,dst=Tmp}, + #fmove{src=Tmp,dst=Dst}], + true}; + false -> + {[I], false} + end. + +do_fp_unop(I, TempMap) -> + #fp_unop{arg=Arg} = I, + case is_mem_opnd(Arg, TempMap) of + true -> + Tmp = clone(Arg), + {[#fmove{src=Arg, dst=Tmp}, + I#fp_unop{arg=Tmp}, + #fmove{src=Tmp,dst=Arg}], + true}; + false -> + {[I], false} + end. + +%%% Fix an fmove op. +do_fmove(I, TempMap) -> + #fmove{src=Src,dst=Dst} = I, + case is_mem_opnd(Dst, TempMap) and is_mem_opnd(Src, TempMap) of + true -> + Tmp = clone(Src), + {[#fmove{src=Src, dst=Tmp},I#fmove{src=Tmp,dst=Dst}], + true}; + false -> + {[I], false} + end. + +%%% Check if an operand denotes a memory cell (mem or pseudo). + +is_mem_opnd(Opnd, TempMap) -> + R = + case Opnd of + #x86_mem{} -> true; + #x86_temp{} -> + Reg = hipe_x86:temp_reg(Opnd), + case hipe_x86:temp_is_allocatable(Opnd) of + true -> + case tuple_size(TempMap) > Reg of + true -> + case + hipe_temp_map:is_spilled(Reg, TempMap) of + true -> + ?count_temp(Reg), + true; + false -> false + end; + _ -> false + end; + false -> true + end; + _ -> false + end, + %% io:format("Op ~w mem: ~w\n",[Opnd,R]), + R. + +%%% Check if an operand is a spilled Temp. + +%%src_is_spilled(Src, TempMap) -> +%% case hipe_x86:is_temp(Src) of +%% true -> +%% Reg = hipe_x86:temp_reg(Src), +%% case hipe_x86:temp_is_allocatable(Src) of +%% true -> +%% case tuple_size(TempMap) > Reg of +%% true -> +%% case hipe_temp_map:is_spilled(Reg, TempMap) of +%% true -> +%% ?count_temp(Reg), +%% true; +%% false -> +%% false +%% end; +%% false -> +%% false +%% end; +%% false -> true +%% end; +%% false -> false +%% end. + +%% is_spilled(Temp, TempMap) -> +%% case hipe_x86:temp_is_allocatable(Temp) of +%% true -> +%% Reg = hipe_x86:temp_reg(Temp), +%% case tuple_size(TempMap) > Reg of +%% true -> +%% case hipe_temp_map:is_spilled(Reg, TempMap) of +%% true -> +%% ?count_temp(Reg), +%% true; +%% false -> +%% false +%% end; +%% false -> +%% false +%% end; +%% false -> true +%% end. + +%%% Make Reg a clone of Dst (attach Dst's type to Reg). + +clone(Dst) -> + Type = + case Dst of + #x86_mem{} -> hipe_x86:mem_type(Dst); + #x86_temp{} -> hipe_x86:temp_type(Dst) + end, + hipe_x86:mk_new_temp(Type). + +%%% Make a certain reg into a clone of Dst + +%% clone2(Dst, Reg) -> +%% Type = +%% case Dst of +%% #x86_mem{} -> hipe_x86:mem_type(Dst); +%% #x86_temp{} -> hipe_x86:temp_type(Dst) +%% end, +%% hipe_x86:mk_temp(Reg,Type). diff --git a/lib/hipe/amd64/hipe_amd64_ra_x87_ls.erl b/lib/hipe/amd64/hipe_amd64_ra_x87_ls.erl new file mode 100644 index 0000000000..267f3335aa --- /dev/null +++ b/lib/hipe/amd64/hipe_amd64_ra_x87_ls.erl @@ -0,0 +1,20 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2004-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +-include("../x86/hipe_x86_ra_x87_ls.erl"). diff --git a/lib/hipe/amd64/hipe_amd64_registers.erl b/lib/hipe/amd64/hipe_amd64_registers.erl new file mode 100644 index 0000000000..4c49eeb00a --- /dev/null +++ b/lib/hipe/amd64/hipe_amd64_registers.erl @@ -0,0 +1,288 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2004-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +-module(hipe_amd64_registers). + +-export([ + all_precoloured/0, + allocatable/0, + allocatable_sse2/0, + allocatable_x87/0, + arg/1, + args/1, + call_clobbered/0, + fcalls/0, + float_size/0, + first_virtual/0, + heap_limit/0, + is_arg/1, + is_fixed/1, + is_precoloured/1, + is_precoloured_sse2/1, + is_precoloured_x87/1, + live_at_return/0, + nr_args/0, + proc_offset/1, + proc_pointer/0, + rax/0, + rcx/0, + ret/1, + sp/0, + sp_limit_offset/0, + reg_name/1, + alignment/0, + tailcall_clobbered/0, + temp0/0, + temp1/0, + %% fixed/0, + wordsize/0 + ]). + +-include("../rtl/hipe_literals.hrl"). + +-ifdef(AMD64_HP_IN_REGISTER). +-export([heap_pointer/0]). +-endif. + +-ifdef(AMD64_FCALLS_IN_REGISTER). +fcalls_offset() -> false. +-else. +fcalls_offset() -> ?P_FCALLS. +-define(AMD64_FCALLS_REGISTER,16). +-endif. + +-ifdef(AMD64_HEAP_LIMIT_IN_REGISTER). +heap_limit_offset() -> false. +-else. +-define(AMD64_HEAP_LIMIT_REGISTER, 17). +heap_limit_offset() -> ?P_HP_LIMIT. +-endif. + + +-define(RAX, 0). +-define(RCX, 1). +-define(RDX, 2). +-define(RBX, 3). +-define(RSP, 4). +-define(RBP, 5). +-define(RSI, 6). +-define(RDI, 7). +-define(R8 , 8). +-define(R9 , 9). +-define(R10, 10). +-define(R11, 11). +-define(R12, 12). +-define(R13, 13). +-define(R14, 14). +-define(R15, 15). +-define(FCALLS, ?AMD64_FCALLS_REGISTER). +-define(HEAP_LIMIT, ?AMD64_HEAP_LIMIT_REGISTER). +-define(LAST_PRECOLOURED, 17). + +-define(ARG0, ?RSI). +-define(ARG1, ?RDX). +-define(ARG2, ?RCX). +-define(ARG3, ?R8). +-define(ARG4, ?R9). +-define(ARG5, ?RDI). + +-define(TEMP0, ?R14). +-define(TEMP1, ?R13). + +-define(PROC_POINTER, ?RBP). + +reg_name(R) -> + case R of + ?RAX -> "%rax"; + ?RCX -> "%rcx"; + ?RDX -> "%rdx"; + ?RBX -> "%rbx"; + ?RSP -> "%rsp"; + ?RBP -> "%rbp"; + ?RSI -> "%rsi"; + ?RDI -> "%rdi"; + ?FCALLS -> "%fcalls"; + ?HEAP_LIMIT -> "%hplim"; + Other -> "%r" ++ integer_to_list(Other) + end. + +alignment() -> 8. + +float_size() -> 8. + +first_virtual() -> ?LAST_PRECOLOURED + 1. + +is_precoloured(X) -> X =< ?LAST_PRECOLOURED. + +is_precoloured_sse2(X) -> X =< 15. + +is_precoloured_x87(X) -> X =< 6. + +all_precoloured() -> + [?RAX, + ?RCX, + ?RDX, + ?RBX, + ?RSP, + ?RBP, + ?RSI, + ?RDI, + ?R8 , + ?R9 , + ?R10, + ?R11, + ?R12, + ?R13, + ?R14, + ?R15, + ?FCALLS, + ?HEAP_LIMIT]. + +rax() -> ?RAX. +rcx() -> ?RCX. +temp0() -> ?TEMP0. +temp1() -> ?TEMP1. +sp() -> ?RSP. +proc_pointer() -> ?PROC_POINTER. +fcalls() -> ?FCALLS. +heap_limit() -> ?HEAP_LIMIT. + + +-ifdef(AMD64_HP_IN_REGISTER). +-define(HEAP_POINTER, ?AMD64_HEAP_POINTER). +heap_pointer() -> ?HEAP_POINTER. +-define(LIST_HP_LIVE_AT_RETURN,[{?HEAP_POINTER,untagged}]). +is_heap_pointer(?HEAP_POINTER) -> true; +is_heap_pointer(_) -> false. +%% -define(LIST_HP_FIXED,[?HEAP_POINTER]). + +-else. +-define(HEAP_POINTER, -1). +is_heap_pointer(_) -> false. +%% -define(LIST_HP_FIXED,[]). +-define(LIST_HP_LIVE_AT_RETURN,[]). +-endif. + +proc_offset(?FCALLS) -> fcalls_offset(); +proc_offset(?HEAP_LIMIT) -> heap_limit_offset(); +proc_offset(_) -> false. + +sp_limit_offset() -> ?P_NSP_LIMIT. + +is_fixed(?RSP) -> true; +is_fixed(?PROC_POINTER) -> true; +is_fixed(?FCALLS) -> true; +is_fixed(?HEAP_LIMIT) -> true; +is_fixed(R) -> is_heap_pointer(R). + +%% fixed() -> +%% [?ESP, ?PROC_POINTER, ?FCALLS, ?HEAP_LIMIT | ?LIST_HP_FIXED]. + +allocatable() -> + [?RDX, ?RCX, ?RBX, ?RAX, ?RSI, ?RDI, + ?R8 , ?R9 , ?R10, ?R11, ?R12, ?R13, ?R14, ?R15] + -- [?FCALLS, ?HEAP_POINTER, ?HEAP_LIMIT]. + +allocatable_sse2() -> + [00,01,02,03,04,05,06,07,08,09,10,11,12,13,14,15]. %% xmm0 - xmm15 + +allocatable_x87() -> + [0,1,2,3,4,5,6]. + +nr_args() -> ?AMD64_NR_ARG_REGS. + +arg(N) -> + if N < ?AMD64_NR_ARG_REGS -> + case N of + 0 -> ?ARG0; + 1 -> ?ARG1; + 2 -> ?ARG2; + 3 -> ?ARG3; + 4 -> ?ARG4; + 5 -> ?ARG5; + _ -> exit({?MODULE, arg, N}) + end; + true -> + exit({?MODULE, arg, N}) + end. + +is_arg(R) -> + case R of + ?ARG0 -> ?AMD64_NR_ARG_REGS > 0; + ?ARG1 -> ?AMD64_NR_ARG_REGS > 1; + ?ARG2 -> ?AMD64_NR_ARG_REGS > 2; + ?ARG3 -> ?AMD64_NR_ARG_REGS > 3; + ?ARG4 -> ?AMD64_NR_ARG_REGS > 4; + ?ARG5 -> ?AMD64_NR_ARG_REGS > 5; + _ -> false + end. + +args(Arity) when is_integer(Arity), Arity >= 0 -> + N = erlang:min(Arity, ?AMD64_NR_ARG_REGS), + args(N-1, []). + +args(I, Rest) when I < 0 -> Rest; +args(I, Rest) -> args(I-1, [arg(I) | Rest]). + +ret(N) -> + case N of + 0 -> ?RAX; + _ -> exit({?MODULE, ret, N}) + end. + +call_clobbered() -> + [{?RAX,tagged},{?RAX,untagged}, % does the RA strip the type or not? + {?RDX,tagged},{?RDX,untagged}, + {?RCX,tagged},{?RCX,untagged}, + {?RBX,tagged},{?RBX,untagged}, + {?RDI,tagged},{?RDI,untagged}, + {?RSI,tagged},{?RSI,untagged}, + {?R8 ,tagged},{?R8 ,untagged}, + {?R9 ,tagged},{?R9 ,untagged}, + {?R10,tagged},{?R10,untagged}, + {?R11,tagged},{?R11,untagged}, + {?R12,tagged},{?R12,untagged}, + {?R13,tagged},{?R13,untagged}, + {?R14,tagged},{?R14,untagged}, + {?R15,tagged},{?R15,untagged} + | fp_call_clobbered()] + -- + [{?FCALLS,tagged},{?FCALLS,untagged}, + {?HEAP_POINTER,tagged},{?HEAP_POINTER,untagged}, + {?HEAP_LIMIT,tagged},{?HEAP_LIMIT,untagged} + ]. + +fp_call_clobbered() -> %% sse2 since it has more registers than x87 + [{Reg,double} || Reg <- allocatable_sse2()]. + +tailcall_clobbered() -> % tailcall crapola needs two temps + [{?TEMP0,tagged},{?TEMP0,untagged}, + {?TEMP1,tagged},{?TEMP1,untagged} + | fp_call_clobbered()]. + +live_at_return() -> + [{?RSP,untagged} + ,{?PROC_POINTER,untagged} + ,{?FCALLS,untagged} + ,{?HEAP_LIMIT,untagged} + | ?LIST_HP_LIVE_AT_RETURN + ]. + +wordsize() -> 8. diff --git a/lib/hipe/amd64/hipe_amd64_spill_restore.erl b/lib/hipe/amd64/hipe_amd64_spill_restore.erl new file mode 100644 index 0000000000..56e3ffd24d --- /dev/null +++ b/lib/hipe/amd64/hipe_amd64_spill_restore.erl @@ -0,0 +1,20 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2008-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +-include("../x86/hipe_x86_spill_restore.erl"). diff --git a/lib/hipe/amd64/hipe_amd64_x87.erl b/lib/hipe/amd64/hipe_amd64_x87.erl new file mode 100644 index 0000000000..e7bf1c1866 --- /dev/null +++ b/lib/hipe/amd64/hipe_amd64_x87.erl @@ -0,0 +1,20 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2004-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +-include("../x86/hipe_x86_x87.erl"). diff --git a/lib/hipe/amd64/hipe_rtl_to_amd64.erl b/lib/hipe/amd64/hipe_rtl_to_amd64.erl new file mode 100644 index 0000000000..17aef0eeac --- /dev/null +++ b/lib/hipe/amd64/hipe_rtl_to_amd64.erl @@ -0,0 +1,20 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2004-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +-include("../x86/hipe_rtl_to_x86.erl"). diff --git a/lib/hipe/arm/Makefile b/lib/hipe/arm/Makefile new file mode 100644 index 0000000000..571a1da0fc --- /dev/null +++ b/lib/hipe/arm/Makefile @@ -0,0 +1,116 @@ +# +# %CopyrightBegin% +# +# Copyright Ericsson AB 2005-2009. All Rights Reserved. +# +# The contents of this file are subject to the Erlang Public License, +# Version 1.1, (the "License"); you may not use this file except in +# compliance with the License. You should have received a copy of the +# Erlang Public License along with this software. If not, it can be +# retrieved online at http://www.erlang.org/. +# +# Software distributed under the License is distributed on an "AS IS" +# basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +# the License for the specific language governing rights and limitations +# under the License. +# +# %CopyrightEnd% +# + + +ifndef EBIN +EBIN = ../ebin +endif + +ifndef DOCS +DOCS = ../doc +endif + +include $(ERL_TOP)/make/target.mk +include $(ERL_TOP)/make/$(TARGET)/otp.mk + +# ---------------------------------------------------- +# Application version +# ---------------------------------------------------- +include ../vsn.mk +VSN=$(HIPE_VSN) + +# ---------------------------------------------------- +# Release directory specification +# ---------------------------------------------------- +RELSYSDIR = $(RELEASE_PATH)/lib/hipe-$(VSN) + +# ---------------------------------------------------- +# Target Specs +# ---------------------------------------------------- +# Please keep this list sorted. +MODULES=hipe_arm \ + hipe_arm_assemble \ + hipe_arm_cfg \ + hipe_arm_defuse \ + hipe_arm_encode \ + hipe_arm_finalise \ + hipe_arm_frame \ + hipe_arm_liveness_gpr \ + hipe_arm_main \ + hipe_arm_pp \ + hipe_arm_ra \ + hipe_arm_ra_finalise \ + hipe_arm_ra_ls \ + hipe_arm_ra_naive \ + hipe_arm_ra_postconditions \ + hipe_arm_registers \ + hipe_rtl_to_arm + +HRL_FILES=hipe_arm.hrl +ERL_FILES=$(MODULES:%=%.erl) +TARGET_FILES=$(MODULES:%=$(EBIN)/%.$(EMULATOR)) +DOC_FILES= $(MODULES:%=$(DOCS)/%.html) + +# ---------------------------------------------------- +# FLAGS +# ---------------------------------------------------- + +include ../native.mk + +ERL_COMPILE_FLAGS += +warn_exported_vars + +# ---------------------------------------------------- +# Targets +# ---------------------------------------------------- + +debug opt: $(TARGET_FILES) + +docs: $(DOC_FILES) + +clean: + rm -f $(TARGET_FILES) + rm -f core + +$(DOCS)/%.html:%.erl + erl -noshell -run edoc_run file '"$<"' '[{dir, "$(DOCS)"}]' -s init stop + +# ---------------------------------------------------- +# Special Build Targets +# ---------------------------------------------------- + +# ---------------------------------------------------- +# Release Target +# ---------------------------------------------------- +include $(ERL_TOP)/make/otp_release_targets.mk + +release_spec: opt + $(INSTALL_DIR) $(RELSYSDIR)/ebin + $(INSTALL_DATA) $(TARGET_FILES) $(RELSYSDIR)/ebin + +release_docs_spec: + +# Please keep this list sorted. +$(EBIN)/hipe_arm_assemble.beam: ../main/hipe.hrl ../../kernel/src/hipe_ext_format.hrl ../rtl/hipe_literals.hrl +$(EBIN)/hipe_arm_cfg.beam: ../flow/cfg.hrl ../flow/cfg.inc +$(EBIN)/hipe_arm_frame.beam: ../rtl/hipe_literals.hrl +$(EBIN)/hipe_arm_liveness_gpr.beam: ../flow/liveness.inc +$(EBIN)/hipe_arm_registers.beam: ../rtl/hipe_literals.hrl +$(EBIN)/hipe_rtl_to_arm.beam: ../rtl/hipe_rtl.hrl + +$(TARGET_FILES): hipe_arm.hrl ../misc/hipe_consttab.hrl diff --git a/lib/hipe/arm/TODO b/lib/hipe/arm/TODO new file mode 100644 index 0000000000..546d22737a --- /dev/null +++ b/lib/hipe/arm/TODO @@ -0,0 +1,20 @@ +Assembler: + +Peephole optimiser: +- Could e.g. turn "ldr lr,[sp,#OFF]; mov pc,lr" + into "ldr pc,[sp#OFF]", but then the LR save slot must + be in the caller's frame not the callee's. +- Also kill "mov r0,r0" which seems to occur often. + +hipe_arm: +- Handle more non-trivial immediates in mk_li/mk_load/mk_store. + See e.g. big_list, which has many 11-bit character constants. + +Floating point: +- Drop no_inline_fp. Implement FP ops as calls to C or ASM + primops. All FP values passed by reference in memory. + This should at least reduce consing costs. + +Linear scan: +- Do not hardcode temp1/temp2/temp3. Instead just take three + regs from (All\Fixed)\Params. (Ditto in PowerPC.) diff --git a/lib/hipe/arm/hipe_arm.erl b/lib/hipe/arm/hipe_arm.erl new file mode 100644 index 0000000000..391f84ca47 --- /dev/null +++ b/lib/hipe/arm/hipe_arm.erl @@ -0,0 +1,380 @@ +%%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2005-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +-module(hipe_arm). +-export([ + mk_temp/2, + mk_new_temp/1, + mk_new_nonallocatable_temp/1, + is_temp/1, + temp_reg/1, + temp_type/1, + temp_is_allocatable/1, + temp_is_precoloured/1, + + mk_mfa/3, + + mk_prim/1, + is_prim/1, + prim_prim/1, + + mk_sdesc/4, + + mk_am2/3, + mk_am3/3, + + mk_alu/5, + + mk_b_fun/2, + + mk_b_label/2, + mk_b_label/1, + + mk_bl/3, + + mk_blx/2, + + mk_cmp/3, + + mk_comment/1, + + mk_label/1, + is_label/1, + label_label/1, + + mk_load/3, + mk_load/6, + + mk_ldrsb/2, + + mk_move/3, + mk_move/2, + + mk_pseudo_bc/4, + + mk_pseudo_call/4, + pseudo_call_contlab/1, + pseudo_call_funv/1, + pseudo_call_sdesc/1, + pseudo_call_linkage/1, + + mk_pseudo_call_prepare/1, + pseudo_call_prepare_nrstkargs/1, + + mk_pseudo_li/2, + + mk_pseudo_move/2, + is_pseudo_move/1, + pseudo_move_dst/1, + pseudo_move_src/1, + + mk_pseudo_switch/3, + + mk_pseudo_tailcall/4, + pseudo_tailcall_funv/1, + pseudo_tailcall_stkargs/1, + pseudo_tailcall_linkage/1, + + mk_pseudo_tailcall_prepare/0, + + mk_smull/4, + + mk_store/3, + mk_store/6, + + mk_pseudo_blr/0, + mk_bx/1, + mk_mflr/1, + mk_mtlr/1, + mk_lr/0, + mk_pc/0, + + mk_li/2, + mk_li/3, + + mk_addi/4, + + try_aluop_imm/2, + + mk_defun/8, + defun_mfa/1, + defun_formals/1, + defun_is_closure/1, + defun_is_leaf/1, + defun_code/1, + defun_data/1, + defun_var_range/1 + ]). + +-include("hipe_arm.hrl"). + +mk_temp(Reg, Type, Allocatable) -> + #arm_temp{reg=Reg, type=Type, allocatable=Allocatable}. +mk_temp(Reg, Type) -> mk_temp(Reg, Type, true). +mk_new_temp(Type, Allocatable) -> + mk_temp(hipe_gensym:get_next_var(arm), Type, Allocatable). +mk_new_temp(Type) -> mk_new_temp(Type, true). +mk_new_nonallocatable_temp(Type) -> mk_new_temp(Type, false). +is_temp(X) -> case X of #arm_temp{} -> true; _ -> false end. +temp_reg(#arm_temp{reg=Reg}) -> Reg. +temp_type(#arm_temp{type=Type}) -> Type. +temp_is_allocatable(#arm_temp{allocatable=A}) -> A. +temp_is_precoloured(#arm_temp{reg=Reg}) -> + hipe_arm_registers:is_precoloured_gpr(Reg). + +mk_mfa(M, F, A) -> #arm_mfa{m=M, f=F, a=A}. + +mk_prim(Prim) -> #arm_prim{prim=Prim}. +is_prim(X) -> case X of #arm_prim{} -> true; _ -> false end. +prim_prim(#arm_prim{prim=Prim}) -> Prim. + +mk_am2(Src, Sign, Offset) -> #am2{src=Src, sign=Sign, offset=Offset}. +mk_am3(Src, Sign, Offset) -> #am3{src=Src, sign=Sign, offset=Offset}. + +mk_alu(AluOp, S, Dst, Src, Am1) -> + #alu{aluop=AluOp, s=S, dst=Dst, src=Src, am1=Am1}. +mk_alu(AluOp, Dst, Src, Am1) -> mk_alu(AluOp, false, Dst, Src, Am1). + +mk_b_fun(Fun, Linkage) -> #b_fun{'fun'=Fun, linkage=Linkage}. + +mk_b_label(Cond, Label) -> #b_label{'cond'=Cond, label=Label}. +mk_b_label(Label) -> mk_b_label('al', Label). + +mk_bl(Fun, SDesc, Linkage) -> #bl{'fun'=Fun, sdesc=SDesc, linkage=Linkage}. + +mk_blx(Src, SDesc) -> #blx{src=Src, sdesc=SDesc}. + +mk_cmp(CmpOp, Src, Am1) -> #cmp{cmpop=CmpOp, src=Src, am1=Am1}. + +mk_sdesc(ExnLab, FSize, Arity, Live) -> + #arm_sdesc{exnlab=ExnLab, fsize=FSize, arity=Arity, live=Live}. + +mk_comment(Term) -> #comment{term=Term}. + +mk_label(Label) -> #label{label=Label}. +is_label(I) -> case I of #label{} -> true; _ -> false end. +label_label(#label{label=Label}) -> Label. + +mk_load(LdOp, Dst, Am2) -> #load{ldop=LdOp, dst=Dst, am2=Am2}. + +mk_load(LdOp, Dst, Base, Offset, Scratch, Rest) when is_integer(Offset) -> + {Sign,AbsOffset} = + if Offset < 0 -> {'-', -Offset}; + true -> {'+', Offset} + end, + if AbsOffset =< 4095 -> + Am2 = #am2{src=Base,sign=Sign,offset=AbsOffset}, + [mk_load(LdOp, Dst, Am2) | Rest]; + true -> + Index = + begin + DstReg = temp_reg(Dst), + BaseReg = temp_reg(Base), + if DstReg =/= BaseReg -> Dst; + true -> mk_scratch(Scratch) + end + end, + Am2 = #am2{src=Base,sign=Sign,offset=Index}, + mk_li(Index, AbsOffset, + [mk_load(LdOp, Dst, Am2) | Rest]) + end. + +mk_scratch(Scratch) -> + case Scratch of + 'temp2' -> mk_temp(hipe_arm_registers:temp2(), 'untagged'); + 'new' -> mk_new_temp('untagged') + end. + +mk_ldrsb(Dst, Am3) -> #ldrsb{dst=Dst, am3=Am3}. + +mk_move(MovOp, S, Dst, Am1) -> #move{movop=MovOp, s=S, dst=Dst, am1=Am1}. +mk_move(S, Dst, Am1) -> mk_move('mov', S, Dst, Am1). +mk_move(Dst, Am1) -> mk_move('mov', false, Dst, Am1). + +mk_pseudo_bc(Cond, TrueLab, FalseLab, Pred) -> + if Pred >= 0.5 -> + mk_pseudo_bc_simple(negate_cond(Cond), FalseLab, + TrueLab, 1.0-Pred); + true -> + mk_pseudo_bc_simple(Cond, TrueLab, FalseLab, Pred) + end. + +mk_pseudo_bc_simple(Cond, TrueLab, FalseLab, Pred) when Pred =< 0.5 -> + #pseudo_bc{'cond'=Cond, true_label=TrueLab, + false_label=FalseLab, pred=Pred}. + +negate_cond(Cond) -> + case Cond of + 'lt' -> 'ge'; % <, >= + 'ge' -> 'lt'; % >=, < + 'gt' -> 'le'; % >, <= + 'le' -> 'gt'; % <=, > + 'eq' -> 'ne'; % ==, != + 'ne' -> 'eq'; % !=, == + 'hi' -> 'ls'; % >u, <=u + 'ls' -> 'hi'; % <=u, >u + 'hs' -> 'lo'; % >=u, 'hs'; % =u + 'vs' -> 'vc'; % overflow, not_overflow + 'vc' -> 'vs' % not_overflow, overflow + end. + +mk_pseudo_call(FunV, SDesc, ContLab, Linkage) -> + #pseudo_call{funv=FunV, sdesc=SDesc, contlab=ContLab, linkage=Linkage}. +pseudo_call_funv(#pseudo_call{funv=FunV}) -> FunV. +pseudo_call_sdesc(#pseudo_call{sdesc=SDesc}) -> SDesc. +pseudo_call_contlab(#pseudo_call{contlab=ContLab}) -> ContLab. +pseudo_call_linkage(#pseudo_call{linkage=Linkage}) -> Linkage. + +mk_pseudo_call_prepare(NrStkArgs) -> + #pseudo_call_prepare{nrstkargs=NrStkArgs}. +pseudo_call_prepare_nrstkargs(#pseudo_call_prepare{nrstkargs=NrStkArgs}) -> + NrStkArgs. + +mk_pseudo_li(Dst, Imm) -> + #pseudo_li{dst=Dst, imm=Imm, label=hipe_gensym:get_next_label(arm)}. + +mk_pseudo_move(Dst, Src) -> #pseudo_move{dst=Dst, src=Src}. +is_pseudo_move(I) -> case I of #pseudo_move{} -> true; _ -> false end. +pseudo_move_dst(#pseudo_move{dst=Dst}) -> Dst. +pseudo_move_src(#pseudo_move{src=Src}) -> Src. + +mk_pseudo_switch(JTab, Index, Labels) -> + #pseudo_switch{jtab=JTab, index=Index, labels=Labels}. + +mk_pseudo_tailcall(FunV, Arity, StkArgs, Linkage) -> + #pseudo_tailcall{funv=FunV, arity=Arity, stkargs=StkArgs, linkage=Linkage}. +pseudo_tailcall_funv(#pseudo_tailcall{funv=FunV}) -> FunV. +pseudo_tailcall_stkargs(#pseudo_tailcall{stkargs=StkArgs}) -> StkArgs. +pseudo_tailcall_linkage(#pseudo_tailcall{linkage=Linkage}) -> Linkage. + +mk_pseudo_tailcall_prepare() -> #pseudo_tailcall_prepare{}. + +mk_smull(DstLo, DstHi, Src1, Src2) -> + #smull{dstlo=DstLo, dsthi=DstHi, src1=Src1, src2=Src2}. + +mk_store(StOp, Src, Am2) -> #store{stop=StOp, src=Src, am2=Am2}. + +mk_store(StOp, Src, Base, Offset, Scratch, Rest) when is_integer(Offset) -> + {Sign,AbsOffset} = + if Offset < 0 -> {'-', -Offset}; + true -> {'+', Offset} + end, + if AbsOffset =< 4095 -> + Am2 = #am2{src=Base,sign=Sign,offset=AbsOffset}, + [mk_store(StOp, Src, Am2) | Rest]; + true -> + Index = mk_scratch(Scratch), + Am2 = #am2{src=Base,sign=Sign,offset=Index}, + mk_li(Index, AbsOffset, + [mk_store(StOp, Src, Am2) | Rest]) + end. + +mk_pseudo_blr() -> #pseudo_blr{}. +mk_bx(Src) -> #pseudo_bx{src=Src}. +mk_mflr(Dst) -> mk_move(Dst, mk_lr()). +mk_mtlr(Src) -> mk_move(mk_lr(), Src). +mk_lr() -> mk_temp(hipe_arm_registers:lr(), 'untagged'). +mk_pc() -> mk_temp(hipe_arm_registers:pc(), 'untagged'). + +%%% Load an integer constant into a register. +mk_li(Dst, Value) -> mk_li(Dst, Value, []). + +mk_li(Dst, Value, Rest) -> + %% XXX: expand to handle 2-instruction sequences + case try_aluop_imm('mov', Value) of + {NewMovOp,Am1} -> + [mk_move(NewMovOp, false, Dst, Am1) | Rest]; + [] -> + [mk_pseudo_li(Dst, Value) | Rest] + end. + +%%% Add an integer constant. Dst may equal Src, +%%% in which case temp2 may be clobbered. + +mk_addi(Dst, Src, Value, Rest) -> + case try_aluop_imm('add', Value) of + {NewAluOp,Am1} -> + [mk_alu(NewAluOp, Dst, Src, Am1) | Rest]; + [] -> + Tmp = + begin + DstReg = temp_reg(Dst), + SrcReg = temp_reg(Src), + if DstReg =:= SrcReg -> + mk_temp(hipe_arm_registers:temp2(), 'untagged'); + true -> Dst + end + end, + [mk_pseudo_li(Tmp, Value), mk_alu('add', Dst, Src, Tmp) | Rest] + end. + +try_aluop_imm(AluOp, Imm) -> % -> {NewAluOp,Am1} or [] + case imm_to_am1(Imm) of + (Am1={_Imm8,_Imm4}) -> {AluOp, Am1}; + [] -> + case invert_aluop_imm(AluOp, Imm) of + {NewAluOp,NewImm} -> + case imm_to_am1(NewImm) of + (Am1={_Imm8,_Imm4}) -> {NewAluOp, Am1}; + [] -> [] + end; + [] -> [] + end + end. + +invert_aluop_imm(AluOp, Imm) -> + case AluOp of + 'mov' -> {'mvn', bnot Imm}; + 'mvn' -> {'mov', bnot Imm}; + 'cmp' -> {'cmn', -Imm}; + 'cmn' -> {'cmp', -Imm}; + 'and' -> {'bic', bnot Imm}; + 'bic' -> {'and', bnot Imm}; + 'orr' -> {'orn', bnot Imm}; + 'orn' -> {'orr', bnot Imm}; + 'add' -> {'sub', -Imm}; + 'sub' -> {'add', -Imm}; + _ -> [] % no inverted form available + end. + +imm_to_am1(Imm) -> imm_to_am1(Imm band 16#FFFFFFFF, 16). +imm_to_am1(Imm, RotCnt) -> + if Imm >= 0, Imm =< 255 -> {Imm, RotCnt band 15}; + true -> + NewRotCnt = RotCnt - 1, + if NewRotCnt =:= 0 -> []; % full circle, no joy + true -> + NewImm = (Imm bsr 2) bor ((Imm band 3) bsl 30), + imm_to_am1(NewImm, NewRotCnt) + end + end. + +mk_defun(MFA, Formals, IsClosure, IsLeaf, Code, Data, VarRange, LabelRange) -> + #defun{mfa=MFA, formals=Formals, code=Code, data=Data, + isclosure=IsClosure, isleaf=IsLeaf, + var_range=VarRange, label_range=LabelRange}. +defun_mfa(#defun{mfa=MFA}) -> MFA. +defun_formals(#defun{formals=Formals}) -> Formals. +defun_is_closure(#defun{isclosure=IsClosure}) -> IsClosure. +defun_is_leaf(#defun{isleaf=IsLeaf}) -> IsLeaf. +defun_code(#defun{code=Code}) -> Code. +defun_data(#defun{data=Data}) -> Data. +defun_var_range(#defun{var_range=VarRange}) -> VarRange. diff --git a/lib/hipe/arm/hipe_arm.hrl b/lib/hipe/arm/hipe_arm.hrl new file mode 100644 index 0000000000..9ee2cb3d06 --- /dev/null +++ b/lib/hipe/arm/hipe_arm.hrl @@ -0,0 +1,124 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2005-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +%%%-------------------------------------------------------------------- +%%% Basic Values: +%%% +%%% temp ::= #arm_temp{reg, type, allocatable} +%%% reg ::= +%%% type ::= tagged | untagged +%%% allocatable ::= true | false +%%% +%%% sdesc ::= #arm_sdesc{exnlab, fsize, arity, live} +%%% exnlab ::= [] | label +%%% fsize ::= int32 (frame size in words) +%%% live ::= (word offsets) +%%% arity ::= uint8 +%%% +%%% mfa ::= #arm_mfa{atom, atom, arity} +%%% prim ::= #arm_prim{atom} + +-record(arm_mfa, {m::atom(), f::atom(), a::arity()}). +-record(arm_prim, {prim}). +-record(arm_sdesc, {exnlab, fsize, arity::arity(), live}). +-record(arm_temp, {reg, type, allocatable}). + +%%% Instruction Operands: +%%% +%%% aluop ::= adc | add | and | bic | eor | orr | rsb | rsc | sbc | sub +%%% cmpop ::= cmn | cmp | tst | teq (alu with s flag and no dst) +%%% cond ::= eq | ne | hs | lo | mi | pl | vs | vc | hi | ls | ge | lt | gt | le | al +%%% ldop ::= ldr | ldrb (am2) +%%% movop ::= mov | mvn (alu with no src) +%%% stop ::= str | strb (am2) +%%% +%%% dst ::= temp +%%% src ::= temp +%%% +%%% s ::= true | false +%%% +%%% imm ::= +%%% +%%% Note: am1 represents all 11 variants of "Adressing Mode 1". +%%% +%%% am1 ::= {imm8,imm4} imm8 rotated right 2*imm4 bits +%%% | src +%%% | {src,rrx} +%%% | {src,shiftop,imm5} +%%% | {src,shiftop,src} +%%% shiftop ::= lsl | lsr | asr | ror +%%% +%%% Note: am2 can represent the first 3 variants of "Addressing Mode 2", +%%% i.e., not the pre- or post-indexed variants. +%%% +%%% am2 ::= #am2{src, sign, am2offset} +%%% am2offset ::= imm12 | src | {src,rrx} | {src,shiftop,imm5} +%%% sign ::= + | - +%%% +%%% Note: am3 can represent the first 2 variants of "Addressing Mode 3", +%%% i.e., not the pre- or post-indexed variants. +%%% +%%% am3 ::= #am3{src, sign, am3offset} +%%% am3offset ::= imm8 | src +%%% +%%% fun ::= mfa | prim +%%% funv ::= mfa | prim | temp +%%% +%%% immediate ::= int32 | atom | {label,label_type} +%%% label_type ::= constant | closure | c_const + +-record(am2, {src, sign, offset}). +-record(am3, {src, sign, offset}). + +%%% Instructions: + +-record(alu, {aluop, s, dst, src, am1}).% cond not included +-record(b_fun, {'fun', linkage}). % known tailcall; cond not included +-record(b_label, {'cond', label}). % local jump +-record(bl, {'fun', sdesc, linkage}). % known recursive call; cond not included +-record(blx, {src, sdesc}). % computed recursive call; cond not included +-record(cmp, {cmpop, src, am1}). % cond not included +-record(comment, {term}). +-record(label, {label}). +-record(load, {ldop, dst, am2}). % cond not included; ldrh/ldrsh not included +-record(ldrsb, {dst, am3}). % cond not included +-record(move, {movop, s, dst, am1}). % cond not included +-record(pseudo_bc, {'cond', true_label, false_label, pred}). +-record(pseudo_blr, {}). % alias for "mov pc,lr" to help cfg +-record(pseudo_bx, {src}). % alias for "mov pc,src" to help cfg +-record(pseudo_call, {funv, sdesc, contlab, linkage}). +-record(pseudo_call_prepare, {nrstkargs}). +-record(pseudo_li, {dst, imm, label}). % pre-generated label for use by the assembler +-record(pseudo_move, {dst, src}). +-record(pseudo_switch, {jtab, index, labels}). +-record(pseudo_tailcall, {funv, arity, stkargs, linkage}). +-record(pseudo_tailcall_prepare, {}). +-record(smull, {dstlo, dsthi, src1, src2}). % cond not included, s not included +-record(store, {stop, src, am2}). % cond not included; strh not included + +%%% Function definitions. + +-include("../misc/hipe_consttab.hrl"). + +-record(defun, {mfa :: mfa(), formals, code, + data :: hipe_consttab(), + isclosure :: boolean(), + isleaf :: boolean(), + var_range, label_range}). diff --git a/lib/hipe/arm/hipe_arm_assemble.erl b/lib/hipe/arm/hipe_arm_assemble.erl new file mode 100644 index 0000000000..2af786994e --- /dev/null +++ b/lib/hipe/arm/hipe_arm_assemble.erl @@ -0,0 +1,665 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2005-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +-module(hipe_arm_assemble). +-export([assemble/4]). + +-include("../main/hipe.hrl"). % for VERSION_STRING, when_option +-include("hipe_arm.hrl"). +-include("../../kernel/src/hipe_ext_format.hrl"). +-include("../rtl/hipe_literals.hrl"). +-undef(ASSERT). +-define(ASSERT(G), if G -> [] ; true -> exit({assertion_failed,?MODULE,?LINE,??G}) end). + +assemble(CompiledCode, Closures, Exports, Options) -> + print("****************** Assembling *******************\n", [], Options), + %% + Code = [{MFA, + hipe_arm:defun_code(Defun), + hipe_arm:defun_data(Defun)} + || {MFA, Defun} <- CompiledCode], + %% + {ConstAlign,ConstSize,ConstMap,RefsFromConsts} = + hipe_pack_constants:pack_constants(Code, 4), + %% + {CodeSize,CodeBinary,AccRefs,LabelMap,ExportMap} = + encode(translate(Code, ConstMap), Options), + print("Total num bytes=~w\n", [CodeSize], Options), + %% + SC = hipe_pack_constants:slim_constmap(ConstMap), + DataRelocs = mk_data_relocs(RefsFromConsts, LabelMap), + SSE = slim_sorted_exportmap(ExportMap,Closures,Exports), + SlimRefs = hipe_pack_constants:slim_refs(AccRefs), + Bin = term_to_binary([{?VERSION_STRING(),?HIPE_SYSTEM_CRC}, + ConstAlign, ConstSize, + SC, + DataRelocs, % nee LM, LabelMap + SSE, + CodeSize,CodeBinary,SlimRefs, + 0,[] % ColdCodeSize, SlimColdRefs + ]), + %% + Bin. + +%%% +%%% Assembly Pass 1. +%%% Process initial {MFA,Code,Data} list. +%%% Translate each MFA's body, choosing operand & instruction kinds. +%%% Manage placement of large immediates in the code segment. (ARM-specific) +%%% +%%% Assembly Pass 2. +%%% Perform short/long form optimisation for jumps. +%%% (Trivial on ARM.) +%%% +%%% Result is {MFA,NewCode,CodeSize,LabelMap} list. +%%% + +translate(Code, ConstMap) -> + translate_mfas(Code, ConstMap, []). + +translate_mfas([{MFA,Insns,_Data}|Code], ConstMap, NewCode) -> + {NewInsns,CodeSize,LabelMap} = translate_insns(Insns, MFA, ConstMap), + translate_mfas(Code, ConstMap, [{MFA,NewInsns,CodeSize,LabelMap}|NewCode]); +translate_mfas([], _ConstMap, NewCode) -> + lists:reverse(NewCode). + +translate_insns(Insns, MFA, ConstMap) -> + translate_insns(Insns, MFA, ConstMap, gb_trees:empty(), 0, [], + previous_empty(), pending_empty()). + +translate_insns([I|Is] = Insns, MFA, ConstMap, LabelMap, Address, NewInsns, PrevImms, PendImms) -> + IsNotFallthroughInsn = is_not_fallthrough_insn(I), + MustFlushPending = must_flush_pending(PendImms, Address), + {NewIs,Insns1,PendImms1,DoFlushPending} = + case {MustFlushPending,IsNotFallthroughInsn} of + {true,false} -> + %% To avoid having to create new symbolic labels, which is problematic + %% in the assembler, we emit a forward branch with an offset computed + %% from the size of the pending literals. + N = pending_size(PendImms), % N >= 1 since MustFlushPending is true + BranchOffset = N - 1, % in units of 32-bit words! + NewIs0 = [{b, {do_cond('al'),{imm24,BranchOffset}}, #comment{term='skip'}}], + %% io:format("~w: forced flush of pending literals in ~w at ~w\n", [?MODULE,MFA,Address]), + {NewIs0,Insns,PendImms,true}; + {_,_} -> + {NewIs0,PendImms0} = translate_insn(I, MFA, ConstMap, Address, PrevImms, PendImms), + {NewIs0,Is,PendImms0,IsNotFallthroughInsn} + end, + add_insns(NewIs, Insns1, MFA, ConstMap, LabelMap, Address, NewInsns, PrevImms, PendImms1, DoFlushPending); +translate_insns([], _MFA, _ConstMap, LabelMap, Address, NewInsns, PrevImms, PendImms) -> + {LabelMap1, Address1, NewInsns1, _PrevImms1} = % at end-of-function we ignore PrevImms1 + flush_pending(PendImms, LabelMap, Address, NewInsns, PrevImms), + {lists:reverse(NewInsns1), Address1, LabelMap1}. + +add_insns([I|Is], Insns, MFA, ConstMap, LabelMap, Address, NewInsns, PrevImms, PendImms, DoFlushPending) -> + NewLabelMap = + case I of + {'.label',L,_} -> + gb_trees:insert(L, Address, LabelMap); + _ -> + LabelMap + end, + Address1 = Address + insn_size(I), + add_insns(Is, Insns, MFA, ConstMap, NewLabelMap, Address1, [I|NewInsns], PrevImms, PendImms, DoFlushPending); +add_insns([], Insns, MFA, ConstMap, LabelMap, Address, NewInsns, PrevImms, PendImms, DoFlushPending) -> + {LabelMap1, Address1, NewInsns1, PrevImms1, PendImms1} = + case DoFlushPending of + true -> + {LabelMap0,Address0,NewInsns0,PrevImms0} = + flush_pending(PendImms, LabelMap, Address, NewInsns, PrevImms), + {LabelMap0,Address0,NewInsns0,PrevImms0,pending_empty()}; + false -> + PrevImms0 = expire_previous(PrevImms, Address), + {LabelMap,Address,NewInsns,PrevImms0,PendImms} + end, + translate_insns(Insns, MFA, ConstMap, LabelMap1, Address1, NewInsns1, PrevImms1, PendImms1). + +must_flush_pending(PendImms, Address) -> + case pending_firstref(PendImms) of + [] -> false; + LP0 -> + Distance = Address - LP0, + %% In "LP0: ldr R,[PC +/- imm12]", the PC value is LP0+8 so the + %% range for the ldr is [LP0-4084, LP0+4100] (32-bit alignment!). + %% LP0+4096 is the last point where we can emit a branch (4 bytes) + %% followed by the pending immediates. + %% + %% The translation of an individual instruction must not advance + %% . by more than 4 bytes, because that could cause us to miss + %% the point where PendImms must be flushed. + ?ASSERT(Distance =< 4096), + Distance =:= 4096 + end. + +flush_pending(PendImms, LabelMap, Address, Insns, PrevImms) -> + Address1 = Address + 4*pending_size(PendImms), + PrevImms1 = expire_previous(PrevImms, Address1), + {LabelMap1,Address1,Insns1,PrevImms2} = + flush_pending2(pending_to_list(PendImms), LabelMap, Address, Insns, PrevImms1), + PrevImms3 = expire_previous(PrevImms2, Address1), + {LabelMap1,Address1,Insns1,PrevImms3}. + +flush_pending2([{Lab,RelocOrInt,Imm}|Imms], LabelMap, Address, Insns, PrevImms) -> + PrevImms1 = previous_append(PrevImms, Address, Lab, Imm), + LabelMap1 = gb_trees:insert(Lab, Address, LabelMap), + {RelocOpt,LongVal} = + if is_integer(RelocOrInt) -> + {[],RelocOrInt}; + true -> + {[RelocOrInt],0} + end, + Insns1 = + [{'.long', LongVal, #comment{term=Imm}} | + RelocOpt ++ + [{'.label', Lab, #comment{term=Imm}} | + Insns]], + flush_pending2(Imms, LabelMap1, Address+4, Insns1, PrevImms1); +flush_pending2([], LabelMap, Address, Insns, PrevImms) -> + {LabelMap, Address, Insns, PrevImms}. + +expire_previous(PrevImms, CodeAddress) -> + case previous_findmin(PrevImms) of + [] -> PrevImms; + {ImmAddress,_Imm} -> + if CodeAddress - ImmAddress > 4084 -> + expire_previous(previous_delmin(PrevImms), CodeAddress); + true -> + PrevImms + end + end. + +is_not_fallthrough_insn(I) -> + case I of + #b_fun{} -> true; + #b_label{'cond'='al'} -> true; + %% bl and blx are not included since they return to ".+4" + %% a load to PC was originally a pseudo_switch insn + #load{dst=#arm_temp{reg=15,type=Type}} when Type =/= 'double' -> true; + %% a move to PC was originally a pseudo_blr or pseudo_bx insn + #move{dst=#arm_temp{reg=15,type=Type}} when Type =/= 'double' -> true; + _ -> false + end. + +insn_size(I) -> + case I of + {'.label',_,_} -> 0; + {'.reloc',_,_} -> 0; + _ -> 4 + end. + +translate_insn(I, MFA, ConstMap, Address, PrevImms, PendImms) -> + case I of + %% pseudo_li is the only insn using MFA, ConstMap, Address, PrevImms, or PendLits + #pseudo_li{} -> do_pseudo_li(I, MFA, ConstMap, Address, PrevImms, PendImms); + _ -> {translate_insn(I), PendImms} + end. + +translate_insn(I) -> % -> [{Op,Opnd,OrigI}] + case I of + #alu{} -> do_alu(I); + #b_fun{} -> do_b_fun(I); + #b_label{} -> do_b_label(I); + #bl{} -> do_bl(I); + #blx{} -> do_blx(I); + #cmp{} -> do_cmp(I); + #comment{} -> []; + #label{} -> do_label(I); + #load{} -> do_load(I); + #ldrsb{} -> do_ldrsb(I); + #move{} -> do_move(I); + %% pseudo_b: eliminated by finalise + %% pseudo_blr: eliminated by finalise + %% pseudo_call: eliminated by finalise + %% pseudo_call_prepare: eliminated by frame + %% pseudo_li: handled separately + %% pseudo_move: eliminated by frame + %% pseudo_switch: eliminated by finalise + %% pseudo_tailcall: eliminated by frame + %% pseudo_tailcall_prepare: eliminated by finalise + #smull{} -> do_smull(I); + #store{} -> do_store(I) + end. + +do_alu(I) -> + #alu{aluop=AluOp,s=S,dst=Dst,src=Src,am1=Am1} = I, + NewCond = do_cond('al'), + NewS = do_s(S), + NewDst = do_reg(Dst), + NewSrc = do_reg(Src), + NewAm1 = do_am1(Am1), + {NewI,NewOpnds} = {AluOp, {NewCond,NewS,NewDst,NewSrc,NewAm1}}, + [{NewI, NewOpnds, I}]. + +do_b_fun(I) -> + #b_fun{'fun'=Fun,linkage=Linkage} = I, + [{'.reloc', {b_fun,Fun,Linkage}, #comment{term='fun'}}, + {b, {do_cond('al'),{imm24,0}}, I}]. + +do_b_label(I) -> + #b_label{'cond'=Cond,label=Label} = I, + [{b, {do_cond(Cond),do_label_ref(Label)}, I}]. + +do_bl(I) -> + #bl{'fun'=Fun,sdesc=SDesc,linkage=Linkage} = I, + [{'.reloc', {b_fun,Fun,Linkage}, #comment{term='fun'}}, + {bl, {do_cond('al'),{imm24,0}}, I}, + {'.reloc', {sdesc,SDesc}, #comment{term=sdesc}}]. + +do_blx(I) -> + #blx{src=Src,sdesc=SDesc} = I, + [{blx, {do_cond('al'),do_reg(Src)}, I}, + {'.reloc', {sdesc,SDesc}, #comment{term=sdesc}}]. + +do_cmp(I) -> + #cmp{cmpop=CmpOp,src=Src,am1=Am1} = I, + NewCond = do_cond('al'), + NewSrc = do_reg(Src), + NewAm1 = do_am1(Am1), + [{CmpOp, {NewCond,NewSrc,NewAm1}, I}]. + +do_label(I) -> + #label{label=Label} = I, + [{'.label', Label, I}]. + +do_load(I) -> + #load{ldop=LdOp,dst=Dst,am2=Am2} = I, + NewCond = do_cond('al'), + NewDst = do_reg(Dst), + NewAm2 = do_am2(Am2), + [{LdOp, {NewCond,NewDst,NewAm2}, I}]. + +do_ldrsb(I) -> + #ldrsb{dst=Dst,am3=Am3} = I, + NewCond = do_cond('al'), + NewDst = do_reg(Dst), + NewAm3 = do_am3(Am3), + [{'ldrsb', {NewCond,NewDst,NewAm3}, I}]. + +do_move(I) -> + #move{movop=MovOp,s=S,dst=Dst,am1=Am1} = I, + NewCond = do_cond('al'), + NewS = do_s(S), + NewDst = do_reg(Dst), + NewAm1 = do_am1(Am1), + [{MovOp, {NewCond,NewS,NewDst,NewAm1}, I}]. + +do_pseudo_li(I, MFA, ConstMap, Address, PrevImms, PendImms) -> + #pseudo_li{dst=Dst,imm=Imm,label=Label0} = I, + {Label1,PendImms1} = + case previous_lookup(PrevImms, Imm) of + {value,Lab} -> {Lab,PendImms}; + none -> + case pending_lookup(PendImms, Imm) of + {value,Lab} -> {Lab,PendImms}; + none -> + RelocOrInt = + if is_integer(Imm) -> + %% This is for immediates that require too much work + %% to reconstruct using only arithmetic instructions. + Imm; + true -> + RelocData = + case Imm of + Atom when is_atom(Atom) -> + {load_atom, Atom}; + {Label,constant} -> + ConstNo = find_const({MFA,Label}, ConstMap), + {load_address, {constant,ConstNo}}; + {Label,closure} -> + {load_address, {closure,Label}}; + {Label,c_const} -> + {load_address, {c_const,Label}} + end, + {'.reloc', RelocData, #comment{term=reloc}} + end, + Lab = Label0, % preallocated: creating labels in the assembler doesn't work + {Lab, pending_append(PendImms, Address, Lab, RelocOrInt, Imm)} + end + end, + NewDst = do_reg(Dst), + {[{'.pseudo_li', {NewDst,do_label_ref(Label1)}, I}], PendImms1}. + +do_smull(I) -> + #smull{dstlo=DstLo,dsthi=DstHi,src1=Src1,src2=Src2} = I, + NewCond = do_cond('al'), + NewS = do_s(false), + NewDstLo = do_reg(DstLo), + NewDstHi = do_reg(DstHi), + NewSrc1 = do_reg(Src1), + NewSrc2 = do_reg(Src2), + [{'smull', {NewCond,NewS,NewDstLo,NewDstHi,NewSrc1,NewSrc2}, I}]. + +do_store(I) -> + #store{stop=StOp,src=Src,am2=Am2} = I, + NewCond = do_cond('al'), + NewSrc = do_reg(Src), + NewAm2 = do_am2(Am2), + [{StOp, {NewCond,NewSrc,NewAm2}, I}]. + +do_reg(#arm_temp{reg=Reg,type=Type}) + when is_integer(Reg), 0 =< Reg, Reg < 16, Type =/= 'double' -> + {r,Reg}. + +do_cond(Cond) -> {'cond',Cond}. + +do_s(S) -> {'s', case S of false -> 0; true -> 1 end}. + +do_label_ref(Label) when is_integer(Label) -> + {label,Label}. % symbolic, since offset is not yet computable + +do_am1(Am1) -> + case Am1 of + #arm_temp{} -> do_reg(Am1); + {Src1,'rrx'} -> {do_reg(Src1),'rrx'}; + {Src1,ShiftOp,Src2=#arm_temp{}} -> {do_reg(Src1),{ShiftOp,do_reg(Src2)}}; + {Src1,ShiftOp,Imm5} -> {do_reg(Src1),{ShiftOp,{imm5,Imm5}}}; + {Imm8,Imm4} -> {{imm8,Imm8},{imm4,Imm4}} + end. + +do_am2(#am2{src=Src,sign=Sign,offset=Offset}) -> + NewSrc = do_reg(Src), + case Offset of + #arm_temp{} -> {'register_offset',NewSrc,Sign,do_reg(Offset)}; + {Src3,'rrx'} -> {'scaled_register_offset',NewSrc,Sign,do_reg(Src3),'rrx'}; + {Src3,ShiftOp,Imm5} -> {'scaled_register_offset',NewSrc,Sign,do_reg(Src3),{ShiftOp,{imm5,Imm5}}}; + Imm12 -> {'immediate_offset',NewSrc,Sign,{imm12,Imm12}} + end. + +do_am3(#am3{src=Src,sign=Sign,offset=Offset}) -> + NewSrc = do_reg(Src), + case Offset of + #arm_temp{} -> {'register_offset',NewSrc,Sign,do_reg(Offset)}; + _ -> {'immediate_offset',NewSrc,Sign,{'imm8',Offset}} + end. + +%%% +%%% Assembly Pass 3. +%%% Process final {MFA,Code,CodeSize,LabelMap} list from pass 2. +%%% Translate to a single binary code segment. +%%% Collect relocation patches. +%%% Build ExportMap (MFA-to-address mapping). +%%% Combine LabelMaps to a single one (for mk_data_relocs/2 compatibility). +%%% Return {CombinedCodeSize,BinaryCode,Relocs,CombinedLabelMap,ExportMap}. +%%% + +encode(Code, Options) -> + CodeSize = compute_code_size(Code, 0), + ExportMap = build_export_map(Code, 0, []), + {AccCode,Relocs} = encode_mfas(Code, 0, [], [], Options), + CodeBinary = list_to_binary(lists:reverse(AccCode)), + ?ASSERT(CodeSize =:= byte_size(CodeBinary)), + CombinedLabelMap = combine_label_maps(Code, 0, gb_trees:empty()), + {CodeSize,CodeBinary,Relocs,CombinedLabelMap,ExportMap}. + +compute_code_size([{_MFA,_Insns,CodeSize,_LabelMap}|Code], Size) -> + compute_code_size(Code, Size+CodeSize); +compute_code_size([], Size) -> Size. + +build_export_map([{{M,F,A},_Insns,CodeSize,_LabelMap}|Code], Address, ExportMap) -> + build_export_map(Code, Address+CodeSize, [{Address,M,F,A}|ExportMap]); +build_export_map([], _Address, ExportMap) -> ExportMap. + +combine_label_maps([{MFA,_Insns,CodeSize,LabelMap}|Code], Address, CLM) -> + NewCLM = merge_label_map(gb_trees:to_list(LabelMap), MFA, Address, CLM), + combine_label_maps(Code, Address+CodeSize, NewCLM); +combine_label_maps([], _Address, CLM) -> CLM. + +merge_label_map([{Label,Offset}|Rest], MFA, Address, CLM) -> + NewCLM = gb_trees:insert({MFA,Label}, Address+Offset, CLM), + merge_label_map(Rest, MFA, Address, NewCLM); +merge_label_map([], _MFA, _Address, CLM) -> CLM. + +encode_mfas([{MFA,Insns,CodeSize,LabelMap}|Code], Address, AccCode, Relocs, Options) -> + print("Generating code for: ~w\n", [MFA], Options), + print("Offset | Opcode | Instruction\n", [], Options), + {Address1,Relocs1,AccCode1} = + encode_insns(Insns, Address, Address, LabelMap, Relocs, AccCode, Options), + ExpectedAddress = Address + CodeSize, + ?ASSERT(Address1 =:= ExpectedAddress), + print("Finished.\n", [], Options), + encode_mfas(Code, Address1, AccCode1, Relocs1, Options); +encode_mfas([], _Address, AccCode, Relocs, _Options) -> + {AccCode,Relocs}. + +encode_insns([I|Insns], Address, FunAddress, LabelMap, Relocs, AccCode, Options) -> + case I of + {'.label',L,_} -> + LabelAddress = gb_trees:get(L, LabelMap) + FunAddress, + ?ASSERT(Address =:= LabelAddress), % sanity check + print_insn(Address, [], I, Options), + encode_insns(Insns, Address, FunAddress, LabelMap, Relocs, AccCode, Options); + {'.reloc',Data,_} -> + print_insn(Address, [], I, Options), + Reloc = encode_reloc(Data, Address, FunAddress, LabelMap), + encode_insns(Insns, Address, FunAddress, LabelMap, [Reloc|Relocs], AccCode, Options); + {'.long',Value,_} -> + print_insn(Address, Value, I, Options), + Segment = <>, + NewAccCode = [Segment|AccCode], + encode_insns(Insns, Address+4, FunAddress, LabelMap, Relocs, NewAccCode, Options); + _ -> + {Op,Arg,_} = fix_pc_refs(I, Address, FunAddress, LabelMap), + Word = hipe_arm_encode:insn_encode(Op, Arg), + print_insn(Address, Word, I, Options), + Segment = <>, + NewAccCode = [Segment|AccCode], + encode_insns(Insns, Address+4, FunAddress, LabelMap, Relocs, NewAccCode, Options) + end; +encode_insns([], Address, _FunAddress, _LabelMap, Relocs, AccCode, _Options) -> + {Address,Relocs,AccCode}. + +encode_reloc(Data, Address, FunAddress, LabelMap) -> + case Data of + {b_fun,MFAorPrim,Linkage} -> + %% b and bl are patched the same, so no need to distinguish + %% call from tailcall + PatchTypeExt = + case Linkage of + remote -> ?CALL_REMOTE; + not_remote -> ?CALL_LOCAL + end, + {PatchTypeExt, Address, untag_mfa_or_prim(MFAorPrim)}; + {load_atom,Atom} -> + {?LOAD_ATOM, Address, Atom}; + {load_address,X} -> + {?LOAD_ADDRESS, Address, X}; + {sdesc,SDesc} -> + #arm_sdesc{exnlab=ExnLab,fsize=FSize,arity=Arity,live=Live} = SDesc, + ExnRA = + case ExnLab of + [] -> []; % don't cons up a new one + ExnLab -> gb_trees:get(ExnLab, LabelMap) + FunAddress + end, + {?SDESC, Address, + ?STACK_DESC(ExnRA, FSize, Arity, Live)} + end. + +untag_mfa_or_prim(#arm_mfa{m=M,f=F,a=A}) -> {M,F,A}; +untag_mfa_or_prim(#arm_prim{prim=Prim}) -> Prim. + +fix_pc_refs(I, InsnAddress, FunAddress, LabelMap) -> + case I of + {b, {Cond,{label,L}}, OrigI} -> + LabelAddress = gb_trees:get(L, LabelMap) + FunAddress, + Imm24 = (LabelAddress - (InsnAddress+8)) div 4, + %% ensure Imm24 fits in a 24 bit sign-extended field + ?ASSERT(Imm24 =< 16#7FFFFF), + ?ASSERT(Imm24 >= -(16#800000)), + {b, {Cond,{imm24,Imm24 band 16#FFFFFF}}, OrigI}; + {'.pseudo_li', {Dst,{label,L}}, OrigI} -> + LabelAddress = gb_trees:get(L, LabelMap) + FunAddress, + Offset = LabelAddress - (InsnAddress+8), + {Sign,Imm12} = + if Offset < 0 -> {'-', -Offset}; + true -> {'+', Offset} + end, + ?ASSERT(Imm12 =< 16#FFF), + Am2 = {'immediate_offset',{r,15},Sign,{imm12,Imm12}}, + {ldr, {do_cond('al'),Dst,Am2}, OrigI}; + _ -> I + end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +mk_data_relocs(RefsFromConsts, LabelMap) -> + lists:flatten(mk_data_relocs(RefsFromConsts, LabelMap, [])). + +mk_data_relocs([{MFA,Labels} | Rest], LabelMap, Acc) -> + Map = [case Label of + {L,Pos} -> + Offset = find({MFA,L}, LabelMap), + {Pos,Offset}; + {sorted,Base,OrderedLabels} -> + {sorted, Base, [begin + Offset = find({MFA,L}, LabelMap), + {Order, Offset} + end + || {L,Order} <- OrderedLabels]} + end + || Label <- Labels], + %% msg("Map: ~w Map\n",[Map]), + mk_data_relocs(Rest, LabelMap, [Map,Acc]); +mk_data_relocs([],_,Acc) -> Acc. + +find({_MFA,_L} = MFAL, LabelMap) -> + gb_trees:get(MFAL, LabelMap). + +slim_sorted_exportmap([{Addr,M,F,A}|Rest], Closures, Exports) -> + IsClosure = lists:member({M,F,A}, Closures), + IsExported = is_exported(F, A, Exports), + [Addr,M,F,A,IsClosure,IsExported | slim_sorted_exportmap(Rest, Closures, Exports)]; +slim_sorted_exportmap([],_,_) -> []. + +is_exported(F, A, Exports) -> lists:member({F,A}, Exports). + +%%% +%%% Assembly listing support (pp_asm option). +%%% + +print(String, Arglist, Options) -> + ?when_option(pp_asm, Options, io:format(String, Arglist)). + +print_insn(Address, Word, I, Options) -> + ?when_option(pp_asm, Options, print_insn_2(Address, Word, I)). + +print_insn_2(Address, Word, {NewI,NewArgs,OrigI}) -> + io:format("~8.16.0b | ", [Address]), + print_code_list(word_to_bytes(Word), 0), + case NewI of + '.long' -> + io:format("\t.long ~.16x\n", [Word, "0x"]); + '.reloc' -> + io:format("\t.reloc ~w\n", [NewArgs]); + _ -> + hipe_arm_pp:pp_insn(OrigI) + end. + +word_to_bytes(W) -> + case W of + [] -> []; % label or other pseudo instruction + _ -> [(W bsr 24) band 16#FF, (W bsr 16) band 16#FF, + (W bsr 8) band 16#FF, W band 16#FF] + end. + +print_code_list([Byte|Rest], Len) -> + print_byte(Byte), + print_code_list(Rest, Len+1); +print_code_list([], Len) -> + fill_spaces(8-(Len*2)), + io:format(" | "). + +print_byte(Byte) -> + io:format("~2.16.0b", [Byte band 16#FF]). + +fill_spaces(N) when N > 0 -> + io:format(" "), + fill_spaces(N-1); +fill_spaces(0) -> + []. + +%%% +%%% Lookup a constant in a ConstMap. +%%% + +find_const({MFA,Label},[{pcm_entry,MFA,Label,ConstNo,_,_,_}|_]) -> + ConstNo; +find_const(N,[_|R]) -> + find_const(N,R); +find_const(C,[]) -> + ?EXIT({constant_not_found,C}). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%% +%%% ADT for previous immediates. +%%% This is a queue (fifo) of the previously defined immediates, +%%% plus a mapping from these immediates to their labels. +%%% +-record(previous, {set, head, tail}). % INV: tail=[] if head=[] + +previous_empty() -> #previous{set=gb_trees:empty(), head=[], tail=[]}. + +previous_lookup(#previous{set=S}, Imm) -> gb_trees:lookup(Imm, S). + +previous_findmin(#previous{head=H}) -> + case H of + [X|_] -> X; + _ -> [] + end. + +previous_delmin(#previous{set=S, head=[{_Address,Imm}|H], tail=T}) -> + {NewH,NewT} = + case H of + [] -> {lists:reverse(T), []}; + _ -> {H, T} + end, + #previous{set=gb_trees:delete(Imm, S), head=NewH, tail=NewT}. + +previous_append(#previous{set=S, head=H, tail=T}, Address, Lab, Imm) -> + {NewH,NewT} = + case H of + [] -> {[{Address,Imm}], []}; + _ -> {H, [{Address,Imm}|T]} + end, + #previous{set=gb_trees:insert(Imm, Lab, S), head=NewH, tail=NewT}. + +%%% +%%% ADT for pending immediates. +%%% This is a queue (fifo) of immediates pending definition, +%%% plus a mapping from these immediates to their labels, +%%% and a recording of the first (lowest) code address referring +%%% to a pending immediate. +%%% +-record(pending, {set, list, firstref}). + +pending_empty() -> #pending{set=gb_trees:empty(), list=[], firstref=[]}. + +pending_to_list(#pending{list=L}) -> lists:reverse(L). + +pending_lookup(#pending{set=S}, Imm) -> gb_trees:lookup(Imm, S). + +pending_firstref(#pending{firstref=F}) -> F. + +pending_append(#pending{set=S, list=L, firstref=F}, Address, Lab, RelocOrInt, Imm) -> + #pending{set=gb_trees:insert(Imm, Lab, S), + list=[{Lab,RelocOrInt,Imm}|L], + firstref=case F of [] -> Address; _ -> F end}. + +pending_size(#pending{list=L}) -> length(L). diff --git a/lib/hipe/arm/hipe_arm_cfg.erl b/lib/hipe/arm/hipe_arm_cfg.erl new file mode 100644 index 0000000000..984a3ccf9e --- /dev/null +++ b/lib/hipe/arm/hipe_arm_cfg.erl @@ -0,0 +1,131 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2005-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +-module(hipe_arm_cfg). + +-export([init/1, + labels/1, start_label/1, + succ/2, + bb/2, bb_add/3]). +-export([postorder/1]). +-export([linearise/1]). +-export([params/1, reverse_postorder/1]). +-export([arity/1]). % for linear scan +%%-export([redirect_jmp/3]). + +%%% these tell cfg.inc what to define (ugly as hell) +-define(BREADTH_ORDER,true). % for linear scan +-define(PARAMS_NEEDED,true). +-define(START_LABEL_UPDATE_NEEDED,true). + +-include("hipe_arm.hrl"). +-include("../flow/cfg.hrl"). +-include("../flow/cfg.inc"). + +init(Defun) -> + Code = hipe_arm:defun_code(Defun), + StartLab = hipe_arm:label_label(hd(Code)), + Data = hipe_arm:defun_data(Defun), + IsClosure = hipe_arm:defun_is_closure(Defun), + Name = hipe_arm:defun_mfa(Defun), + IsLeaf = hipe_arm:defun_is_leaf(Defun), + Formals = hipe_arm:defun_formals(Defun), + CFG0 = mk_empty_cfg(Name, StartLab, Data, IsClosure, IsLeaf, Formals), + take_bbs(Code, CFG0). + +is_branch(I) -> + case I of + #b_fun{} -> true; + #b_label{'cond'='al'} -> true; + #pseudo_bc{} -> true; + #pseudo_blr{} -> true; + #pseudo_bx{} -> true; + #pseudo_call{} -> true; + #pseudo_switch{} -> true; + #pseudo_tailcall{} -> true; + _ -> false + end. + +branch_successors(Branch) -> + case Branch of + #b_fun{} -> []; + #b_label{'cond'='al',label=Label} -> [Label]; + #pseudo_bc{true_label=TrueLab,false_label=FalseLab} -> [FalseLab,TrueLab]; + #pseudo_blr{} -> []; + #pseudo_bx{} -> []; + #pseudo_call{contlab=ContLab, sdesc=#arm_sdesc{exnlab=ExnLab}} -> + case ExnLab of + [] -> [ContLab]; + _ -> [ContLab,ExnLab] + end; + #pseudo_switch{labels=Labels} -> Labels; + #pseudo_tailcall{} -> [] + end. + +-ifdef(REMOVE_TRIVIAL_BBS_NEEDED). +fails_to(_Instr) -> []. +-endif. + +-ifdef(notdef). +redirect_jmp(I, Old, New) -> + case I of + #b_label{label=Label} -> + if Old =:= Label -> I#b_label{label=New}; + true -> I + end; + #pseudo_bc{true_label=TrueLab, false_label=FalseLab} -> + I1 = if Old =:= TrueLab -> I#pseudo_bc{true_label=New}; + true -> I + end, + if Old =:= FalseLab -> I1#pseudo_bc{false_label=New}; + true -> I1 + end; + %% handle pseudo_call too? + _ -> I + end. +-endif. + +mk_goto(Label) -> + hipe_arm:mk_b_label(Label). + +is_label(I) -> + hipe_arm:is_label(I). + +label_name(Label) -> + hipe_arm:label_label(Label). + +mk_label(Name) -> + hipe_arm:mk_label(Name). + +linearise(CFG) -> % -> defun, not insn list + MFA = function(CFG), + Formals = params(CFG), + Code = linearize_cfg(CFG), + Data = data(CFG), + VarRange = hipe_gensym:var_range(arm), + LabelRange = hipe_gensym:label_range(arm), + IsClosure = is_closure(CFG), + IsLeaf = is_leaf(CFG), + hipe_arm:mk_defun(MFA, Formals, IsClosure, IsLeaf, + Code, Data, VarRange, LabelRange). + +arity(CFG) -> + {_M, _F, A} = function(CFG), + A. diff --git a/lib/hipe/arm/hipe_arm_defuse.erl b/lib/hipe/arm/hipe_arm_defuse.erl new file mode 100644 index 0000000000..8d6efebc21 --- /dev/null +++ b/lib/hipe/arm/hipe_arm_defuse.erl @@ -0,0 +1,157 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2005-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +-module(hipe_arm_defuse). +-export([insn_def_all/1, insn_use_all/1]). +-export([insn_def_gpr/1, insn_use_gpr/1]). +-include("hipe_arm.hrl"). + +%%% +%%% Defs and uses for both general-purpose and floating-point registers. +%%% This is needed for the frame module, alas. +%%% +insn_def_all(I) -> + insn_def_gpr(I). + +insn_use_all(I) -> + insn_use_gpr(I). + +%%% +%%% Defs and uses for general-purpose (integer) registers only. +%%% +insn_def_gpr(I) -> + case I of + #alu{dst=Dst} -> [Dst]; + #load{dst=Dst} -> [Dst]; + #ldrsb{dst=Dst} -> [Dst]; + #move{dst=Dst} -> [Dst]; + #pseudo_call{} -> call_clobbered_gpr(); + #pseudo_li{dst=Dst} -> [Dst]; + #pseudo_move{dst=Dst} -> [Dst]; + #pseudo_tailcall_prepare{} -> tailcall_clobbered_gpr(); + #smull{dstlo=DstLo,dsthi=DstHi,src1=Src1} -> + %% ARM requires DstLo, DstHi, and Src1 to be distinct. + %% Add fake DEF of Src1 to prevent regalloc from reusing + %% it as DstLo or DstHi. + [DstLo, DstHi, Src1]; + _ -> [] + end. + +call_clobbered_gpr() -> + [hipe_arm:mk_temp(R, T) + || {R,T} <- hipe_arm_registers:call_clobbered() ++ all_fp_pseudos()]. + +all_fp_pseudos() -> []. % XXX: for now + +tailcall_clobbered_gpr() -> + [hipe_arm:mk_temp(R, T) + || {R,T} <- hipe_arm_registers:tailcall_clobbered() ++ all_fp_pseudos()]. + +insn_use_gpr(I) -> + case I of + #alu{src=Src,am1=Am1} -> am1_use(Am1, [Src]); + #blx{src=Src} -> [Src]; + #cmp{src=Src,am1=Am1} -> am1_use(Am1, [Src]); + #load{am2=Am2} -> am2_use(Am2, []); + #ldrsb{am3=Am3} -> am3_use(Am3, []); + #move{am1=Am1} -> am1_use(Am1, []); + #pseudo_blr{} -> + LR = hipe_arm:mk_temp(hipe_arm_registers:lr(), 'untagged'), + RV = hipe_arm:mk_temp(hipe_arm_registers:return_value(), 'tagged'), + [RV, LR]; + #pseudo_bx{src=Src} -> + io:format("~w: whoa there! insn_use of ~w occurred\n", [?MODULE,I]), + [Src]; + #pseudo_call{funv=FunV,sdesc=#arm_sdesc{arity=Arity}} -> + funv_use(FunV, arity_use_gpr(Arity)); + #pseudo_move{src=Src} -> [Src]; + #pseudo_switch{jtab=JTabR,index=IndexR} -> addtemp(JTabR, [IndexR]); + #pseudo_tailcall{funv=FunV,arity=Arity,stkargs=StkArgs} -> + addargs(StkArgs, addtemps(tailcall_clobbered_gpr(), funv_use(FunV, arity_use_gpr(Arity)))); + #smull{src1=Src1,src2=Src2} -> addtemp(Src1, [Src2]); + #store{src=Src,am2=Am2} -> am2_use(Am2, [Src]); + _ -> [] + end. + +addargs([Arg|Args], Set) -> + addargs(Args, addarg(Arg, Set)); +addargs([], Set) -> + Set. + +addarg(Arg, Set) -> + case Arg of + #arm_temp{} -> addtemp(Arg, Set); + _ -> Set + end. + +arity_use_gpr(Arity) -> + [hipe_arm:mk_temp(R, 'tagged') + || R <- hipe_arm_registers:args(Arity)]. + +funv_use(FunV, Set) -> + case FunV of + #arm_temp{} -> addtemp(FunV, Set); + _ -> Set + end. + +am1_use(Am1, Set) -> + case Am1 of + #arm_temp{} -> addtemp(Am1, Set); + {Src,rrx} -> addtemp(Src, Set); + {Src,_,ShiftArg} -> + Set1 = addtemp(Src, Set), + case ShiftArg of + #arm_temp{} -> addtemp(ShiftArg, Set1); + _ -> Set1 + end; + _ -> Set + end. + +am2_use(#am2{src=Src,offset=Am2Offset}, Set) -> + Set1 = addtemp(Src, Set), + case Am2Offset of + #arm_temp{} -> addtemp(Am2Offset, Set1); + {Src2,_} -> addtemp(Src2, Set1); + {Src2,_,_} -> addtemp(Src2, Set1); + _ -> Set1 + end. + +am3_use(#am3{src=Src,offset=Am3Offset}, Set) -> + Set1 = addtemp(Src, Set), + case Am3Offset of + #arm_temp{} -> addtemp(Am3Offset, Set1); + _ -> Set1 + end. + +%%% +%%% Auxiliary operations on sets of temps +%%% These sets are small. No point using gb_trees, right? +%%% + +addtemps([Arg|Args], Set) -> + addtemps(Args, addtemp(Arg, Set)); +addtemps([], Set) -> + Set. + +addtemp(Temp, Set) -> + case lists:member(Temp, Set) of + false -> [Temp|Set]; + _ -> Set + end. diff --git a/lib/hipe/arm/hipe_arm_encode.erl b/lib/hipe/arm/hipe_arm_encode.erl new file mode 100644 index 0000000000..19e507fdbd --- /dev/null +++ b/lib/hipe/arm/hipe_arm_encode.erl @@ -0,0 +1,994 @@ +%%% -*- erlang-indent-level: 2 -*- +%%% +%%% %CopyrightBegin% +%%% +%%% Copyright Ericsson AB 2005-2009. All Rights Reserved. +%%% +%%% The contents of this file are subject to the Erlang Public License, +%%% Version 1.1, (the "License"); you may not use this file except in +%%% compliance with the License. You should have received a copy of the +%%% Erlang Public License along with this software. If not, it can be +%%% retrieved online at http://www.erlang.org/. +%%% +%%% Software distributed under the License is distributed on an "AS IS" +%%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%%% the License for the specific language governing rights and limitations +%%% under the License. +%%% +%%% %CopyrightEnd% +%%% +%%% Encode symbolic ARM instructions to binary form. +%%% Copyright (C) 2005 Mikael Pettersson +%%% +%%% Implementation Notes: +%%% - The Thumb instruction set is a different entity, and is +%%% not and never will be supported by this module. +%%% - Instructions and instruction forms that are unpredictable +%%% or useless in User mode are not supported. They include: +%%% + Data Processing Instructions with S=1 and Rd=15. +%%% + The LDM(2), LDM(3), and STM(2) instructions. +%%% + MRS instructions that access the SPSR. +%%% + MSR instructions that access the SPSR. +%%% + The LDBRT, LDRT, STBRT, and STRT instructions. +%%% +%%% Instruction Operands: +%%% +%%% S ::= {s,0} | {s,1} +%%% L ::= {l,0} | {l,1} +%%% R ::= {r,RNum} +%%% CR ::= {cr,CRNum} +%%% +%%% Cond ::= {cond,CondName} +%%% CondName ::= eq | ne | cs | hs | cc | lo | mi | pl | vs +%%% | vc | hi | ls | ge | lt | gt | ge | al +%%% +%%% Imm ::= {imm,} for N in 4, 5, 8, 12, 16, 24, and 25 +%%% +%%% Am1ShifterOperand +%%% ::= {Imm8,Imm4} +%%% | Rm +%%% | {Rm,Am1ShiftOp} +%%% Am1ShiftOp ::= {ShiftOp,Imm5} +%%% | {ShiftOp,Rs} +%%% | rrx +%%% ShiftOp ::= lsl | lsr | asr | ror +%%% +%%% Am2LSWUBOperand ::= {immediate_offset,Rn,Sign,Imm12} +%%% | {register_offset,Rn,Sign,Rm} // redundant +%%% | {scaled_register_offset,Rn,Sign,Rm,Am2ShiftOp} +%%% | {immediate_pre_indexed,Rn,Sign,Imm12} +%%% | {register_pre_indexed,Rn,Sign,Rm} // redundant +%%% | {scaled_register_pre_indexed,Rn,Sign,Rm,Am2ShiftOp} +%%% | {immediate_post_indexed,Rn,Sign,Imm12} +%%% | {register_post_indexed,Rn,Sign,Rm} // redundant +%%% | {scaled_register_post_indexed,Rn,Sign,Rm,Am2ShiftOp} +%%% Am2ShiftOp ::= {ShiftOp,Imm5} +%%% | rrx +%%% Sign ::= + | - +%%% +%%% Am3MiscLSOperand::= {immediate_offset,Rn,Sign,Imm8} +%%% | {register_offset,Rn,Sign,Rm} +%%% | {immediate_pre_indexed,Rn,Sign,Imm8} +%%% | {register_pre_indexed,Rn,Sign,Rm} +%%% | {immediate_post_indexed,Rn,Sign,Imm8} +%%% | {register_post_indexed,Rn,Sign,Rm} +%%% +%%% Am4LSMultiple ::= ia | ib | da | db +%%% | fd | ed | fa | ea +%%% +%%% Am5LSCoprocessor::= {offset,Rn,Sign,Imm8} +%%% | {pre_indexed,Rn,Sign,Imm8} +%%% | {post_indexed,Rn,Sign,Imm8} +%%% | {unindexed,Rn,Imm8} + +-module(hipe_arm_encode). + +-export([insn_encode/2]). + +%%-define(TESTING,1). +-ifdef(TESTING). +-export([dotest/0, dotest/1]). +-endif. + +-define(ASSERT(G), + if G -> []; + true -> exit({assertion_failed,?MODULE,?LINE,??G}) + end). + +bf(LeftBit, RightBit, Value) -> + ?ASSERT(32 > LeftBit), + ?ASSERT(LeftBit >= RightBit), + ?ASSERT(RightBit >= 0), + ?ASSERT(Value >= 0), + ?ASSERT(Value < (1 bsl ((LeftBit - RightBit) + 1))), + Value bsl RightBit. + +-define(BF(LB,RB,V), bf(LB,RB,V)). +-define(BIT(Pos,Val), ?BF(Pos,Pos,Val)). +%%-define(BITS(N,Val), ?BF(N,0,Val)). + +%%% +%%% Addressing Modes +%%% + +am1_shifter_operand(Rn, Rd, ShifterOperand) -> + case ShifterOperand of + {{imm8,Imm8},{imm4,RotImm4}} -> + ?BIT(25,1) bor ?BF(11,8,RotImm4) bor ?BF(7,0,Imm8); + {r,Rm} -> + %% same as Rm LSL #0 + ?BF(3,0,Rm); + {{r,Rm},ShiftOp} -> + am1_shift_op(Rn, Rd, Rm, ShiftOp) bor ?BF(3,0,Rm) + end. + +am1_shift_op(_Rn, _Rd, _Rm, {ShiftOp,{imm5,ShiftImm5}}) -> + case ShiftOp of + 'ror' -> ?ASSERT(ShiftImm5 =/= 0); % denotes RRX form + _ -> [] + end, + ?BF(11,7,ShiftImm5) bor shift_op_bits65(ShiftOp); +am1_shift_op(Rn, Rd, Rm, {ShiftOp,{r,Rs}}) -> + ?ASSERT(Rn =/= 15), % UNPREDICTABLE + ?ASSERT(Rd =/= 15), % UNPREDICTABLE + ?ASSERT(Rm =/= 15), % UNPREDICTABLE + ?ASSERT(Rs =/= 15), % UNPREDICTABLE + ?BF(11,8,Rs) bor shift_op_bits65(ShiftOp) bor ?BIT(4,1); +am1_shift_op(_Rn, _Rd, _Rm, 'rrx') -> + ?BF(6,5,2#11). + +shift_op_bits65(ShiftOp) -> + case ShiftOp of + 'lsl' -> ?BF(6,5,2#00); + 'lsr' -> ?BF(6,5,2#01); + 'asr' -> ?BF(6,5,2#10); + 'ror' -> ?BF(6,5,2#11) + end. + +sign('+') -> ?BIT(23,1); +sign('-') -> 0. + +am2_lswub(Rd, AddressingMode) -> + case AddressingMode of + {immediate_offset,{r,Rn},Sign,{imm12,Imm12}} -> + ?BIT(24,1) bor sign(Sign) bor ?BF(19,16,Rn) bor ?BF(11,0,Imm12); + {register_offset,{r,Rn},Sign,{r,Rm}} -> + %% same as scaled_register_offset LSL #0 + ?ASSERT(Rm =/= 15), % UNPREDICTABLE + ?BIT(25,1) bor ?BIT(24,1) bor sign(Sign) bor ?BF(19,16,Rn) bor ?BF(3,0,Rm); + {scaled_register_offset,{r,Rn},Sign,{r,Rm},ShiftOp} -> + ?ASSERT(Rm =/= 15), % UNPREDICTABLE + ?BIT(25,1) bor ?BIT(24,1) bor sign(Sign) bor ?BF(19,16,Rn) bor am2_shift_op(ShiftOp) bor ?BF(3,0,Rm); + {immediate_pre_indexed,{r,Rn},Sign,{imm12,Imm12}} -> + ?ASSERT(Rd =/= Rn), % UNPREDICTABLE + ?ASSERT(Rn =/= 15), % UNPREDICTABLE + ?BIT(24,1) bor sign(Sign) bor ?BIT(21,1) bor ?BF(19,16,Rn) bor ?BF(11,0,Imm12); + {register_pre_indexed,{r,Rn},Sign,{r,Rm}} -> + %% same as scaled_register_pre_indexed LSL #0 + ?ASSERT(Rd =/= Rn), % UNPREDICTABLE + ?ASSERT(Rn =/= 15), % UNPREDICTABLE + ?ASSERT(Rm =/= 15), % UNPREDICTABLE + ?ASSERT(Rn =/= Rm), % UNPREDICTABLE + ?BIT(25,1) bor ?BIT(24,1) bor sign(Sign) bor ?BIT(21,1) bor ?BF(19,16,Rn) bor ?BF(3,0,Rm); + {scaled_register_pre_indexed,{r,Rn},Sign,{r,Rm},ShiftOp} -> + ?ASSERT(Rd =/= Rn), % UNPREDICTABLE + ?ASSERT(Rn =/= 15), % UNPREDICTABLE + ?ASSERT(Rm =/= 15), % UNPREDICTABLE + ?ASSERT(Rn =/= Rm), % UNPREDICTABLE + ?BIT(25,1) bor ?BIT(24,1) bor sign(Sign) bor ?BIT(21,1) bor ?BF(19,16,Rn) bor am2_shift_op(ShiftOp) bor ?BF(3,0,Rm); + {immediate_post_indexed,{r,Rn},Sign,{imm12,Imm12}} -> + ?ASSERT(Rd =/= Rn), % UNPREDICTABLE + ?ASSERT(Rn =/= 15), % UNPREDICTABLE + sign(Sign) bor ?BF(19,16,Rn) bor ?BF(11,0,Imm12); + {register_post_indexed,{r,Rn},Sign,{r,Rm}} -> + %% same as scaled_register_post_indexed LSL #0 + ?ASSERT(Rd =/= Rn), % UNPREDICTABLE + ?BIT(25,1) bor sign(Sign) bor ?BF(19,6,Rn) bor ?BF(3,0,Rm); + {scaled_register_post_indexed,{r,Rn},Sign,{r,Rm},ShiftOp} -> + ?ASSERT(Rd =/= Rn), % UNPREDICTABLE + ?ASSERT(Rn =/= 15), % UNPREDICTABLE + ?ASSERT(Rm =/= 15), % UNPREDICTABLE + ?ASSERT(Rn =/= Rm), % UNPREDICTABLE + ?BIT(25,1) bor sign(Sign) bor ?BF(19,16,Rn) bor am2_shift_op(ShiftOp) bor ?BF(3,0,Rm) + end. + +am2_shift_op({ShiftOp,{imm5,ShiftImm5}}) -> + case ShiftOp of + 'ror' -> ?ASSERT(ShiftImm5 =/= 0); % denotes RRX form + _ -> [] + end, + ?BF(11,7,ShiftImm5) bor shift_op_bits65(ShiftOp); +am2_shift_op('rrx') -> + ?BF(6,5,2#11). + +am3_miscls(Rd, AddressingMode) -> + case AddressingMode of + {immediate_offset,{r,Rn},Sign,{imm8,Imm8}} -> + ?BIT(24,1) bor sign(Sign) bor ?BF(22,21,2#10) bor ?BF(19,16,Rn) bor ?BF(11,8,Imm8 bsr 4) bor ?BF(3,0,Imm8 band 2#1111); + {register_offset,{r,Rn},Sign,{r,Rm}} -> + ?ASSERT(Rm =/= 15), % UNPREDICTABLE + ?BIT(24,1) bor sign(Sign) bor ?BF(22,21,2#00) bor ?BF(19,16,Rn) bor ?BF(3,0,Rm); + {immediate_pre_indexed,{r,Rn},Sign,{imm8,Imm8}} -> + ?ASSERT(Rd =/= Rn), % UNPREDICTABLE + ?ASSERT(Rn =/= 15), % UNPREDICTABLE + ?BIT(24,1) bor sign(Sign) bor ?BF(22,21,2#11) bor ?BF(19,16,Rn) bor ?BF(11,8,Imm8 bsr 4) bor ?BF(3,0,Imm8 band 2#1111); + {register_pre_indexed,{r,Rn},Sign,{r,Rm}} -> + ?ASSERT(Rd =/= Rn), % UNPREDICTABLE + ?ASSERT(Rm =/= 15), % UNPREDICTABLE + ?ASSERT(Rn =/= 15), % UNPREDICTABLE + ?ASSERT(Rm =/= Rn), % UNPREDICTABLE + ?BIT(24,1) bor sign(Sign) bor ?BF(22,21,2#01) bor ?BF(19,16,Rn) bor ?BF(3,0,Rm); + {immediate_post_indexed,{r,Rn},Sign,{imm8,Imm8}} -> + ?ASSERT(Rd =/= Rn), % UNPREDICTABLE + ?ASSERT(Rn =/= 15), % UNPREDICTABLE + ?BIT(24,0) bor sign(Sign) bor ?BF(22,21,2#10) bor ?BF(19,16,Rn) bor ?BF(11,8,Imm8 bsr 4) bor ?BF(3,0,Imm8 band 2#1111); + {register_post_indexed,{r,Rn},Sign,{r,Rm}} -> + ?ASSERT(Rd =/= Rn), % UNPREDICTABLE + ?ASSERT(Rm =/= 15), % UNPREDICTABLE + ?ASSERT(Rn =/= 15), % UNPREDICTABLE + ?ASSERT(Rm =/= Rn), % UNPREDICTABLE + ?BIT(24,0) bor sign(Sign) bor ?BF(22,21,2#00) bor ?BF(19,16,Rn) bor ?BF(3,0,Rm) + end. + +am4_ls_multiple(L, AddressingMode) -> + case AddressingMode of + 'ia' -> ?BF(24,23,2#01); + 'ib' -> ?BF(24,23,2#11); + 'da' -> ?BF(24,23,2#00); + 'db' -> ?BF(24,23,2#10); + _ -> + %% context-sensitive alias crap + case {L,AddressingMode} of + {1,'fa'} -> ?BF(24,23,2#00); + {1,'fd'} -> ?BF(24,23,2#01); + {1,'ea'} -> ?BF(24,23,2#10); + {1,'ed'} -> ?BF(24,23,2#11); + {0,'ed'} -> ?BF(24,23,2#00); + {0,'ea'} -> ?BF(24,23,2#01); + {0,'fd'} -> ?BF(24,23,2#10); + {0,'fa'} -> ?BF(24,23,2#11) + end + end. + +am5_ls_coprocessor(AddressingMode) -> + case AddressingMode of + {offset,{r,Rn},Sign,{imm8,Imm8}} -> + ?BIT(24,1) bor sign(Sign) bor ?BF(19,16,Rn) bor ?BF(7,0,Imm8); + {pre_indexed,{r,Rn},Sign,{imm8,Imm8}} -> + ?ASSERT(Rn =/= 15), % UNPREDICTABLE + ?BIT(24,1) bor sign(Sign) bor ?BIT(21,1) bor ?BF(19,16,Rn) bor ?BF(7,0,Imm8); + {post_indexed,{r,Rn},Sign,{imm8,Imm8}} -> + ?ASSERT(Rn =/= 15), % UNPREDICTABLE + sign(Sign) bor ?BIT(21,1) bor ?BF(19,16,Rn) bor ?BF(7,0,Imm8); + {unindexed,{r,Rn},{imm8,Imm8}} -> + ?BIT(23,1) bor ?BF(19,16,Rn) bor ?BF(7,0,Imm8) + end. + +%%% + +'cond'(Cond) -> + case Cond of + 'eq' -> ?BF(31,28,2#0000); % equal + 'ne' -> ?BF(31,28,2#0001); % not equal + 'cs' -> ?BF(31,28,2#0010); % carry set + 'hs' -> ?BF(31,28,2#0010); % unsigned higher or same + 'cc' -> ?BF(31,28,2#0011); % carry clear + 'lo' -> ?BF(31,28,2#0011); % unsigned lower + 'mi' -> ?BF(31,28,2#0100); % minus/negative + 'pl' -> ?BF(31,28,2#0101); % plus/positive or zero + 'vs' -> ?BF(31,28,2#0110); % overflow + 'vc' -> ?BF(31,28,2#0111); % no overflow + 'hi' -> ?BF(31,28,2#1000); % unsigned higher + 'ls' -> ?BF(31,28,2#1001); % unsigned lower or same + 'ge' -> ?BF(31,28,2#1010); % signed greater than or equal + 'lt' -> ?BF(31,28,2#1011); % signed less than + 'gt' -> ?BF(31,28,2#1100); % signed greater than + 'le' -> ?BF(31,28,2#1101); % signed less than or equal + 'al' -> ?BF(31,28,2#1110) % always + end. + +%%% +%%% ARM Instructions +%%% + +data_processing_form(Cond, OpCode, S, Rn, Rd, ShifterOperand) -> + case S of + 1 -> ?ASSERT(Rd =/= 15); % UNPREDICTABLE in User or System mode + _ -> [] + end, + 'cond'(Cond) bor ?BF(24,21,OpCode) bor ?BIT(20,S) bor ?BF(19,16,Rn) bor ?BF(15,12,Rd) bor am1_shifter_operand(Rn,Rd,ShifterOperand). + +data_processing_form(OpCode, {{'cond',Cond},{s,S},{r,Rd},{r,Rn},ShifterOperand}) -> + data_processing_form(Cond, OpCode, S, Rn, Rd, ShifterOperand). + +adc(Opnds) -> data_processing_form(2#0101, Opnds). +add(Opnds) -> data_processing_form(2#0100, Opnds). +'and'(Opnds) -> data_processing_form(2#0000, Opnds). +bic(Opnds) -> data_processing_form(2#1110, Opnds). +eor(Opnds) -> data_processing_form(2#0001, Opnds). +orr(Opnds) -> data_processing_form(2#1100, Opnds). +rsb(Opnds) -> data_processing_form(2#0011, Opnds). +rsc(Opnds) -> data_processing_form(2#0111, Opnds). +sbc(Opnds) -> data_processing_form(2#0110, Opnds). +sub(Opnds) -> data_processing_form(2#0010, Opnds). + +cmp_form(OpCode, {{'cond',Cond},{r,Rn},ShifterOperand}) -> + data_processing_form(Cond, OpCode, 1, Rn, 0, ShifterOperand). + +cmn(Opnds) -> cmp_form(2#1011, Opnds). +cmp(Opnds) -> cmp_form(2#1010, Opnds). +teq(Opnds) -> cmp_form(2#1001, Opnds). +tst(Opnds) -> cmp_form(2#1000, Opnds). + +mov_form(OpCode, {{'cond',Cond},{s,S},{r,Rd},ShifterOperand}) -> + data_processing_form(Cond, OpCode, S, 0, Rd, ShifterOperand). + +mov(Opnds) -> mov_form(2#1101, Opnds). +mvn(Opnds) -> mov_form(2#1111, Opnds). + +%%% + +b_form(L, {{'cond',Cond},{imm24,Imm24}}) -> + 'cond'(Cond) bor ?BF(27,25,2#101) bor ?BIT(24,L) bor ?BF(23,0,Imm24). + +b(Opnds) -> b_form(0, Opnds). +bl(Opnds) -> b_form(1, Opnds). + +bkpt({{imm16,Imm16}}) -> + ?BF(31,28,2#1110) bor ?BF(27,20,2#00010010) bor ?BF(19,8,Imm16 bsr 4) bor ?BF(7,4,2#0111) bor ?BF(3,0,Imm16 band 2#1111). + +bx_form(SubOpcode, {{'cond',Cond},{r,Rm}}, IsBlx) -> + case IsBlx of + true -> ?ASSERT(Rm =/= 15); % UNPREDICTABLE + _ -> [] + end, + 'cond'(Cond) bor ?BF(27,20,2#00010010) bor ?BF(19,16,2#1111) bor ?BF(15,12,2#1111) bor ?BF(11,8,2#1111) bor ?BF(7,4,SubOpcode) bor ?BF(3,0,Rm). + +blx(Opnds) -> + case Opnds of + {{imm25,Imm25}} -> % u16-offset! + ?BF(31,28,2#1111) bor ?BF(27,25,2#101) bor ?BIT(24,Imm25 band 1) bor ?BF(23,0,Imm25 bsr 1); + _ -> + bx_form(2#0011, Opnds, true) + end. + +bx(Opnds) -> bx_form(2#0001, Opnds, false). + +cdp_form(Cond, CpOp4, CRn, CRd, CpNum, CpOp3, CRm) -> + Cond bor ?BF(27,24,2#1110) bor ?BF(23,20,CpOp4) bor ?BF(19,16,CRn) bor ?BF(15,12,CRd) bor ?BF(11,8,CpNum) bor ?BF(7,5,CpOp3) bor ?BF(3,0,CRm). + +cdp({{'cond',Cond},{cpnum,CpNum},{cpop4,CpOp4},{cr,CRd},{cr,CRn},{cr,CRm},{cpop3,CpOp3}}) -> + cdp_form('cond'(Cond), CpOp4, CRn, CRd, CpNum, CpOp3, CRm). + +cdp2({{cpnum,CpNum},{cpop4,CpOp4},{cr,CRd},{cr,CRn},{cr,CRm},{cpop3,CpOp3}}) -> + cdp_form(?BF(31,28,2#1111), CpOp4, CRn, CRd, CpNum, CpOp3, CRm). + +clz({{'cond',Cond},{r,Rd},{r,Rm}}) -> + ?ASSERT(Rd =/= 15), % UNPREDICTABLE + ?ASSERT(Rm =/= 15), % UNPREDICTABLE + 'cond'(Cond) bor ?BF(27,20,2#00010110) bor ?BF(19,16,2#1111) bor ?BF(15,12,Rd) bor ?BF(11,8,2#1111) bor ?BF(7,4,2#0001) bor ?BF(3,0,Rm). + +ldstc_form(Cond, L, B20, CRd, CpNum, AddressingMode) -> + Cond bor ?BF(27,25,2#110) bor ?BIT(22,L) bor ?BIT(20,B20) bor ?BF(15,12,CRd) bor ?BF(11,8,CpNum) bor am5_ls_coprocessor(AddressingMode). + +ldstc(B20, {{'cond',Cond},{l,L},{cpnum,CpNum},{cr,CRd},AddressingMode}) -> + ldstc_form('cond'(Cond), L, B20, CRd, CpNum, AddressingMode). + +ldc(Opnds) -> ldstc(1, Opnds). +stc(Opnds) -> ldstc(0, Opnds). + +ldstc2(B20, {{l,L},{cpnum,CpNum},{cr,CRd},AddressingMode}) -> + ldstc_form(?BF(31,28,2#1111), L, B20, CRd, CpNum, AddressingMode). + +ldc2(Opnds) -> ldstc2(1, Opnds). +stc2(Opnds) -> ldstc2(0, Opnds). + +ldstm_form(Cond, AddressingMode, W, L, Rn, Registers) -> + RegisterList = register_list(Registers), + ?ASSERT(RegisterList =/= 0), % UNPREDICTABLE + ?ASSERT(Rn =/= 15), % UNPREDICTABLE + case W of + 1 -> + BitRn = 1 bsl Rn, + case L of + 1 -> + %% LDM! Rn in Registers is UNPREDICTABLE + ?ASSERT((RegisterList band BitRn) =:= 0); + 0 -> + %% STM! Rn in Registers and not lowest is UNPREDICTABLE + case RegisterList band BitRn of + 0 -> []; + _ -> + ?ASSERT((RegisterList band (-RegisterList)) =:= BitRn) + end + end; + _ -> [] + end, + 'cond'(Cond) bor ?BF(27,25,2#100) bor am4_ls_multiple(L, AddressingMode) bor ?BIT(21,W) bor ?BIT(20,L) bor ?BF(19,16,Rn) bor ?BF(15,0,RegisterList). + +register_list(Registers) -> register_list(Registers, 0). + +register_list([{r,R}|Rs], Mask) -> register_list(Rs, Mask bor (1 bsl R)); +register_list([], Mask) -> Mask. + +ldstm(L, Opnds) -> + case Opnds of + {{'cond',Cond},AddressingMode,{r,Rn},'!',Registers} -> + ldstm_form(Cond, AddressingMode, 1, L, Rn, Registers); + {{'cond',Cond},AddressingMode,{r,Rn},Registers} -> + ldstm_form(Cond, AddressingMode, 0, L, Rn, Registers) + %% the ldm(2), ldm(3), and stm(2) forms are UNPREDICTABLE + %% in User or System mode + end. + +ldm(Opnds) -> ldstm(1, Opnds). +stm(Opnds) -> ldstm(0, Opnds). + +ldstr_form2(B, L, {{'cond',Cond},{r,Rd},AddressingMode}) -> + 'cond'(Cond) bor ?BF(27,26,2#01) bor am2_lswub(Rd, AddressingMode) bor ?BIT(22,B) bor ?BIT(20,L) bor ?BF(15,12,Rd). + +ldr(Opnds) -> ldstr_form2(0, 1, Opnds). +ldrb(Opnds) -> ldstr_form2(1, 1, Opnds). +str(Opnds) -> ldstr_form2(0, 0, Opnds). +strb(Opnds) -> ldstr_form2(1, 0, Opnds). + +ldstr_form3(L, SubOpcode, {{'cond',Cond},{r,Rd},AddressingMode}) -> + ?ASSERT(Rd =/= 15), % UNPREDICTABLE + 'cond'(Cond) bor am3_miscls(Rd, AddressingMode) bor ?BIT(20,L) bor ?BF(15,12,Rd) bor ?BF(7,4,SubOpcode). + +ldrh(Opnds) -> ldstr_form3(1, 2#1011, Opnds). +ldrsb(Opnds) -> ldstr_form3(1, 2#1101, Opnds). +ldrsh(Opnds) -> ldstr_form3(1, 2#1111, Opnds). +strh(Opnds) -> ldstr_form3(0, 2#1011, Opnds). + +mcr_form(Cond, OP1, CRn, Rd, CpNum, OP2, CRm) -> + ?ASSERT(Rd =/= 15), % UNPREDICTABLE + Cond bor ?BF(27,24,2#1110) bor ?BF(23,21,OP1) bor ?BF(19,16,CRn) bor ?BF(15,12,Rd) bor ?BF(11,8,CpNum) bor ?BF(7,5,OP2) bor ?BIT(4,1) bor ?BF(3,0,CRm). + +mcr({{'cond',Cond},{cpnum,CpNum},{cpop3,OP1},{r,Rd},{cr,CRn},{cr,CRm},{cpop3,OP2}}) -> + mcr_form('cond'(Cond), OP1, CRn, Rd, CpNum, OP2, CRm). + +mcr2({{cpnum,CpNum},{cpop3,OP1},{r,Rd},{cr,CRn},{cr,CRm},{cpop3,OP2}}) -> + mcr_form(?BF(31,28,2#1111), OP1, CRn, Rd, CpNum, OP2, CRm). + +mla({{'cond',Cond},{s,S},{r,Rd},{r,Rm},{r,Rs},{r,Rn}}) -> + ?ASSERT(Rd =/= 15), % UNPREDICTABLE + ?ASSERT(Rm =/= 15), % UNPREDICTABLE + ?ASSERT(Rs =/= 15), % UNPREDICTABLE + ?ASSERT(Rn =/= 15), % UNPREDICTABLE + ?ASSERT(Rd =/= Rm), % UNPREDICTABLE + 'cond'(Cond) bor ?BIT(21,1) bor ?BIT(20,S) bor ?BF(19,16,Rd) bor ?BF(15,12,Rn) bor ?BF(11,8,Rs) bor ?BF(7,4,2#1001) bor ?BF(3,0,Rm). + +mrc_form(Cond, OP1, CRn, Rd, CpNum, OP2, CRm) -> + Cond bor ?BF(27,24,2#1110) bor ?BF(23,21,OP1) bor ?BIT(20,1) bor ?BF(19,16,CRn) bor ?BF(15,12,Rd) bor ?BF(11,8,CpNum) bor ?BF(7,5,OP2) bor ?BIT(4,1) bor ?BF(3,0,CRm). + +mrc({{'cond',Cond},{cpnum,CpNum},{cpop3,OP1},{r,Rd},{cr,CRn},{cr,CRm},{cpop3,OP2}}) -> + mrc_form('cond'(Cond), OP1, CRn, Rd, CpNum, OP2, CRm). + +mrc2({{cpnum,CpNum},{cpop3,OP1},{r,Rd},{cr,CRn},{cr,CRm},{cpop3,OP2}}) -> + mrc_form(?BF(31,28,2#1111), OP1, CRn, Rd, CpNum, OP2, CRm). + +mrs({{'cond',Cond},{r,Rd},'cpsr'}) -> + %% the SPSR form is UNPREDICTABLE in User or System mode + ?ASSERT(Rd =/= 15), % UNPREDICTABLE + 'cond'(Cond) bor ?BIT(24,1) bor ?BF(19,16,2#1111) bor ?BF(15,12,Rd). + +msr_form(Cond, FieldMask4, Operand) -> + 'cond'(Cond) bor ?BIT(24,1) bor ?BIT(21,1) bor ?BF(19,16,FieldMask4) bor ?BF(15,12,2#1111) bor Operand. + +msr(Opnds) -> + %% the SPSR form is UNPREDICTABLE in User or System mode + case Opnds of + {{'cond',Cond},'cpsr',{field_mask,FieldMask4},{imm8,Imm8},{imm4,RotImm4}} -> + msr_form(Cond, FieldMask4, ?BIT(25,1) bor ?BF(11,8,RotImm4) bor ?BF(7,0,Imm8)); + {{'cond',Cond},'cpsr',{field_mask,FieldMask4},{r,Rm}} -> + msr_form(Cond, FieldMask4, ?BF(3,0,Rm)) + end. + +mul({{'cond',Cond},{s,S},{r,Rd},{r,Rm},{r,Rs}}) -> + ?ASSERT(Rd =/= 15), % UNPREDICTABLE + ?ASSERT(Rm =/= 15), % UNPREDICTABLE + ?ASSERT(Rs =/= 15), % UNPREDICTABLE + ?ASSERT(Rd =/= Rm), % UNPREDICTABLE + 'cond'(Cond) bor ?BIT(20,S) bor ?BF(19,16,Rd) bor ?BF(11,8,Rs) bor ?BF(7,4,2#1001) bor ?BF(3,0,Rm). + +ml_form2(OpCode, Cond, S, RdLo, RdHi, Rm, Rs) -> + ?ASSERT(RdHi =/= 15), % UNPREDICTABLE + ?ASSERT(RdLo =/= 15), % UNPREDICTABLE + ?ASSERT(Rm =/= 15), % UNPREDICTABLE + ?ASSERT(Rs =/= 15), % UNPREDICTABLE + ?ASSERT(RdHi =/= RdLo),% UNPREDICTABLE + ?ASSERT(RdHi =/= Rm), % UNPREDICTABLE + ?ASSERT(RdLo =/= Rm), % UNPREDICTABLE + 'cond'(Cond) bor ?BF(27,21,OpCode) bor ?BIT(20,S) bor ?BF(19,16,RdHi) bor ?BF(15,12,RdLo) bor ?BF(11,8,Rs) bor ?BF(7,4,2#1001) bor ?BF(3,0,Rm). + +ml_form(OpCode, {{'cond',Cond},{s,S},{r,RdLo},{r,RdHi},{r,Rm},{r,Rs}}) -> + ml_form2(OpCode, Cond, S, RdLo, RdHi, Rm, Rs). + +%%smlal(Opnds) -> ml_form(2#0000111, Opnds). +smull(Opnds) -> ml_form(2#0000110, Opnds). +umlal(Opnds) -> ml_form(2#0000101, Opnds). +umull(Opnds) -> ml_form(2#0000100, Opnds). + +swi({{'cond',Cond},{imm24,Imm24}}) -> + 'cond'(Cond) bor ?BF(27,24,2#1111) bor ?BF(23,0,Imm24). + +swp_form(B22, {{'cond',Cond},{r,Rd},{r,Rm},{r,Rn}}) -> + ?ASSERT(Rd =/= 15), % UNPREDICTABLE + ?ASSERT(Rm =/= 15), % UNPREDICTABLE + ?ASSERT(Rn =/= 15), % UNPREDICTABLE + ?ASSERT(Rn =/= Rm), % UNPREDICTABLE + ?ASSERT(Rn =/= Rd), % UNPREDICTABLE + 'cond'(Cond) bor ?BIT(24,1) bor ?BIT(22,B22) bor ?BF(19,16,Rn) bor ?BF(15,12,Rd) bor ?BF(7,4,2#1001) bor ?BF(3,0,Rm). + +swp(Opnds) -> swp_form(0, Opnds). +swpb(Opnds) -> swp_form(1, Opnds). + +%%% +%%% Enhanced DSP Extension Instructions +%%% + +ldstrd_form(OpCode, {{'cond',Cond},{r,Rd},AddressingMode}) -> + ?ASSERT(Rd =/= 14), % UNPREDICTABLE + ?ASSERT((Rd band 1) =:= 0), % UNDEFINED + %% XXX: unpredictable if write-back and base reg Rn equals Rd or Rd+1 + %% XXX: if is load then unpredictable if index reg Rm and Rm equals Rd or Rd+1 + 'cond'(Cond) bor am3_miscls(Rd, AddressingMode) bor ?BF(15,12,Rd) bor ?BF(7,4,OpCode). + +ldrd(Opnds) -> ldstrd_form(2#1101, Opnds). +strd(Opnds) -> ldstrd_form(2#1111, Opnds). + +mcrr({{'cond',Cond},{cpnum,CpNum},{cpop4,OP},{r,Rd},{r,Rn},{cr,CRm}}) -> + ?ASSERT(Rd =/= 15), % UNPREDICTABLE + ?ASSERT(Rn =/= 15), % UNPREDICTABLE + 'cond'(Cond) bor ?BF(27,20,2#11000100) bor ?BF(19,16,Rn) bor ?BF(15,12,Rd) bor ?BF(11,8,CpNum) bor ?BF(7,4,OP) bor ?BF(3,0,CRm). + +mrrc({{'cond',Cond},{cpnum,CpNum},{cpop4,OP},{r,Rd},{r,Rn},{cr,CRm}}) -> + ?ASSERT(Rd =/= 15), % UNPREDICTABLE + ?ASSERT(Rn =/= 15), % UNPREDICTABLE + ?ASSERT(Rd =/= Rn), % UNPREDICTABLE + 'cond'(Cond) bor ?BF(27,20,2#11000101) bor ?BF(19,16,Rn) bor ?BF(15,12,Rd) bor ?BF(11,8,CpNum) bor ?BF(7,4,OP) bor ?BF(3,0,CRm). + +pld({AddressingMode}) -> + AM = am2_lswub(42, AddressingMode), % 42 is a dummy reg nr + %% not all adressing modes are allowed: bit 24 must be 1 + %% and bit 21 must be 0 + ?ASSERT(((AM bsr 21) band 2#1001) =:= 2#1000), + 16#F550F000 bor AM. + +q_form(OpCode, {{'cond',Cond},{r,Rd},{r,Rm},{r,Rn}}) -> + ?ASSERT(Rd =/= 15), % UNPREDICTABLE + ?ASSERT(Rm =/= 15), % UNPREDICTABLE + ?ASSERT(Rn =/= 15), % UNPREDICTABLE + 'cond'(Cond) bor ?BF(27,20,OpCode) bor ?BF(19,16,Rn) bor ?BF(15,11,Rd) bor ?BF(7,4,2#0101) bor ?BF(3,0,Rm). + +qadd(Opnds) -> q_form(2#00010000, Opnds). +qdadd(Opnds) -> q_form(2#00010100, Opnds). +qdsub(Opnds) -> q_form(2#00010110, Opnds). +qsub(Opnds) -> q_form(2#00010010, Opnds). + +smlaxy_form(Cond, OpCode, Rd, Rn, Rs, Y, X, Rm) -> + ?ASSERT(Rd =/= 15), % UNPREDICTABLE + ?ASSERT(Rm =/= 15), % UNPREDICTABLE + ?ASSERT(Rs =/= 15), % UNPREDICTABLE + ?ASSERT(Rn =/= 15), % UNPREDICTABLE + 'cond'(Cond) bor ?BF(27,20,OpCode) bor ?BF(19,16,Rd) bor ?BF(15,12,Rn) bor ?BF(11,8,Rs) bor ?BIT(7,1) bor ?BIT(6,Y) bor ?BIT(5,X) bor ?BF(3,0,Rm). + +smla({{bt,X},{bt,Y},{'cond',Cond},{r,Rd},{r,Rm},{r,Rs},{r,Rn}}) -> + smlaxy_form(Cond, 2#00010000, Rd, Rn, Rs, Y, X, Rm). + +smlal(Opnds) -> % may be regular ARM or DSP insn :-( + case Opnds of + {{'cond',Cond},{s,S},{r,RdLo},{r,RdHi},{r,Rm},{r,Rs}} -> + ml_form2(2#0000111, Cond, S, RdLo, RdHi, Rm, Rs); + {{bt,X},{bt,Y},{'cond',Cond},{r,RdLo},{r,RdHi},{r,Rm},{r,Rs}} -> + ?ASSERT(RdLo =/= RdHi), % UNPREDICTABLE + smlaxy_form(Cond, 2#00010100, RdHi, RdLo, Rs, Y, X, Rm) + end. + +smlaw({{bt,Y},{'cond',Cond},{r,Rd},{r,Rm},{r,Rs},{r,Rn}}) -> + smlaxy_form(Cond, 2#00010010, Rd, Rn, Rs, Y, 0, Rm). + +smul({{bt,X},{bt,Y},{'cond',Cond},{r,Rd},{r,Rm},{r,Rs}}) -> + smlaxy_form(Cond, 2#00010110, Rd, 0, Rs, Y, X, Rm). + +smulw({{bt,Y},{'cond',Cond},{r,Rd},{r,Rm},{r,Rs}}) -> + smlaxy_form(Cond, 2#00010010, Rd, 0, Rs, Y, 1, Rm). + +%%% +%%% Main Encode Dispatch +%%% + +insn_encode(Op, Opnds) -> + case Op of + 'adc' -> adc(Opnds); + 'add' -> add(Opnds); + 'and' -> 'and'(Opnds); + 'b' -> b(Opnds); + 'bic' -> bic(Opnds); + 'bkpt' -> bkpt(Opnds); + 'bl' -> bl(Opnds); + 'blx' -> blx(Opnds); + 'bx' -> bx(Opnds); + 'cdp' -> cdp(Opnds); + 'cdp2' -> cdp2(Opnds); + 'clz' -> clz(Opnds); + 'cmn' -> cmn(Opnds); + 'cmp' -> cmp(Opnds); + 'eor' -> eor(Opnds); + 'ldc' -> ldc(Opnds); + 'ldc2' -> ldc2(Opnds); + 'ldm' -> ldm(Opnds); + 'ldr' -> ldr(Opnds); + 'ldrb' -> ldrb(Opnds); + 'ldrd' -> ldrd(Opnds); + %% ldrbt: omitted + 'ldrh' -> ldrh(Opnds); + 'ldrsb' -> ldrsb(Opnds); + 'ldrsh' -> ldrsh(Opnds); + %% ldrt: omitted + 'mcr' -> mcr(Opnds); + 'mcr2' -> mcr2(Opnds); + 'mcrr' -> mcrr(Opnds); + 'mla' -> mla(Opnds); + 'mov' -> mov(Opnds); + 'mrc' -> mrc(Opnds); + 'mrc2' -> mrc2(Opnds); + 'mrrc' -> mrrc(Opnds); + 'mrs' -> mrs(Opnds); + 'msr' -> msr(Opnds); + 'mul' -> mul(Opnds); + 'mvn' -> mvn(Opnds); + 'orr' -> orr(Opnds); + 'pld' -> pld(Opnds); + 'qadd' -> qadd(Opnds); + 'qdadd' -> qdadd(Opnds); + 'qdsub' -> qdsub(Opnds); + 'qsub' -> qsub(Opnds); + 'rsb' -> rsb(Opnds); + 'rsc' -> rsc(Opnds); + 'sbc' -> sbc(Opnds); + 'smla' -> smla(Opnds); + 'smlal' -> smlal(Opnds); % may be regular ARM or DSP insn :-( + 'smlaw' -> smlaw(Opnds); + 'smull' -> smull(Opnds); + 'smul' -> smul(Opnds); + 'smulw' -> smulw(Opnds); + 'stc' -> stc(Opnds); + 'stc2' -> stc2(Opnds); + 'stm' -> stm(Opnds); + 'str' -> str(Opnds); + 'strb' -> strb(Opnds); + %% strbt: omitted + 'strd' -> strd(Opnds); + 'strh' -> strh(Opnds); + %% strt: omitted + 'sub' -> sub(Opnds); + 'swi' -> swi(Opnds); + 'swp' -> swp(Opnds); + 'swpb' -> swpb(Opnds); + 'teq' -> teq(Opnds); + 'tst' -> tst(Opnds); + 'umlal' -> umlal(Opnds); + 'umull' -> umull(Opnds); + _ -> exit({?MODULE,insn_encode,Op}) + end. + +%%% +%%% Testing Interface +%%% + +-ifdef(TESTING). + +say(OS, Str) -> + file:write(OS, Str). + +hex_digit(Dig0) -> + Dig = Dig0 band 16#F, + if Dig >= 16#A -> $A + (Dig - 16#A); + true -> $0 + Dig + end. + +say_byte(OS, Byte) -> + say(OS, [hex_digit(Byte bsr 4)]), + say(OS, [hex_digit(Byte)]). + +say_word(OS, Word) -> + say(OS, "0x"), + say_byte(OS, Word bsr 24), + say_byte(OS, Word bsr 16), + say_byte(OS, Word bsr 8), + say_byte(OS, Word). + +t(OS, Op, Opnds) -> + Word = insn_encode(Op, Opnds), + say(OS, "\t.long "), + say_word(OS, Word), + say(OS, "\n"). + +dotest1(OS) -> + say(OS, "\t.text\n\t.align 4\n"), + %% + Rn = {r,9}, + Rd = {r,8}, % must be even and less than 14 for some insns + Rm = {r,7}, + Rs = {r,6}, + RdLo = Rn, + RdHi = Rd, + Registers = [Rm,Rs,Rd], % must exclude Rn for some insns + CRd = {cr,15}, + CRn = {cr,14}, + CRm = {cr,13}, + BT0 = {bt,0}, + BT1 = {bt,1}, + CpNum = {cpnum,15}, + CpOp3 = {cpop3,16#3}, + CpOp4 = {cpop4,16#F}, + L0 = {l,0}, + L1 = {l,1}, + S0 = {s,0}, + S1 = {s,1}, + FieldMask4 = {field_mask,16#F}, + Imm4 = {imm4,16#F}, + Imm5 = {imm5,16#1F}, + Imm8 = {imm8,16#FF}, + Imm12 = {imm12,16#FFF}, + Imm16 = {imm16,16#FFFF}, + Imm24 = {imm24,16#FFFFF}, + Imm25 = {imm25,16#FFFFF1}, + %% + AM1_1 = {Imm8,Imm4}, + AM1_2 = Rm, + AM1_3_1 = {Rm,{'lsl',Imm5}}, + AM1_3_2 = {Rm,{'lsr',Imm5}}, + AM1_3_3 = {Rm,{'asr',Imm5}}, + AM1_3_4 = {Rm,{'ror',Imm5}}, + AM1_3_5 = {Rm,{'lsl',Rs}}, + AM1_3_6 = {Rm,{'lsr',Rs}}, + AM1_3_7 = {Rm,{'asr',Rs}}, + AM1_3_8 = {Rm,{'ror',Rs}}, + AM1_3_9 = {Rm,'rrx'}, + %% + AM2ShiftOp1 = {'lsl',Imm5}, + AM2ShiftOp2 = {'lsr',Imm5}, + AM2ShiftOp3 = {'asr',Imm5}, + AM2ShiftOp4 = {'ror',Imm5}, + AM2ShiftOp5 = 'rrx', + SignP = '+', + SignM = '-', + AM2_1_1 = {immediate_offset,Rn,SignP,Imm12}, + AM2_1_2 = {immediate_offset,Rn,SignM,Imm12}, + AM2_2_1 = {register_offset,Rn,SignP,Rm}, + AM2_2_2 = {register_offset,Rn,SignM,Rm}, + AM2_3_1 = {scaled_register_offset,Rn,SignP,Rm,AM2ShiftOp1}, + AM2_3_2 = {scaled_register_offset,Rn,SignM,Rm,AM2ShiftOp2}, + AM2_3_3 = {scaled_register_offset,Rn,SignP,Rm,AM2ShiftOp3}, + AM2_3_4 = {scaled_register_offset,Rn,SignM,Rm,AM2ShiftOp4}, + AM2_3_5 = {scaled_register_offset,Rn,SignP,Rm,AM2ShiftOp5}, + AM2_4_1 = {immediate_pre_indexed,Rn,SignP,Imm12}, + AM2_4_2 = {immediate_pre_indexed,Rn,SignM,Imm12}, + AM2_5_1 = {register_pre_indexed,Rn,SignP,Rm}, + AM2_5_2 = {register_pre_indexed,Rn,SignM,Rm}, + AM2_6_1 = {scaled_register_pre_indexed,Rn,SignP,Rm,AM2ShiftOp1}, + AM2_6_2 = {scaled_register_pre_indexed,Rn,SignM,Rm,AM2ShiftOp2}, + AM2_6_3 = {scaled_register_pre_indexed,Rn,SignP,Rm,AM2ShiftOp3}, + AM2_6_4 = {scaled_register_pre_indexed,Rn,SignM,Rm,AM2ShiftOp4}, + AM2_6_5 = {scaled_register_pre_indexed,Rn,SignP,Rm,AM2ShiftOp5}, + AM2_7_1 = {immediate_post_indexed,Rn,SignP,Imm12}, + AM2_7_2 = {immediate_post_indexed,Rn,SignM,Imm12}, + AM2_8_1 = {register_post_indexed,Rn,SignP,Rm}, + AM2_8_2 = {register_post_indexed,Rn,SignM,Rm}, + AM2_9_1 = {scaled_register_post_indexed,Rn,SignP,Rm,AM2ShiftOp1}, + AM2_9_2 = {scaled_register_post_indexed,Rn,SignM,Rm,AM2ShiftOp2}, + AM2_9_3 = {scaled_register_post_indexed,Rn,SignP,Rm,AM2ShiftOp3}, + AM2_9_4 = {scaled_register_post_indexed,Rn,SignM,Rm,AM2ShiftOp4}, + AM2_9_5 = {scaled_register_post_indexed,Rn,SignP,Rm,AM2ShiftOp5}, + %% + AM3_1_1 = {immediate_offset,Rn,SignP,Imm8}, + AM3_1_2 = {immediate_offset,Rn,SignM,Imm8}, + AM3_2_1 = {register_offset,Rn,SignP,Rm}, + AM3_2_2 = {register_offset,Rn,SignM,Rm}, + AM3_3_1 = {immediate_pre_indexed,Rn,SignP,Imm8}, + AM3_3_2 = {immediate_pre_indexed,Rn,SignM,Imm8}, + AM3_4_1 = {register_pre_indexed,Rn,SignP,Rm}, + AM3_4_2 = {register_pre_indexed,Rn,SignM,Rm}, + AM3_5_1 = {immediate_post_indexed,Rn,SignP,Imm8}, + AM3_5_2 = {immediate_post_indexed,Rn,SignM,Imm8}, + AM3_6_1 = {register_post_indexed,Rn,SignP,Rm}, + AM3_6_2 = {register_post_indexed,Rn,SignM,Rm}, + %% + AM4_1 = 'ia', + AM4_2 = 'ib', + AM4_3 = 'da', + AM4_4 = 'db', + AM4_5 = 'fa', + AM4_6 = 'fd', + AM4_7 = 'ea', + AM4_8 = 'ed', + %% + AM5_1_1 = {offset,Rn,SignP,Imm8}, + AM5_1_2 = {offset,Rn,SignM,Imm8}, + AM5_2_1 = {pre_indexed,Rn,SignP,Imm8}, + AM5_2_2 = {pre_indexed,Rn,SignM,Imm8}, + AM5_3_1 = {post_indexed,Rn,SignP,Imm8}, + AM5_3_2 = {post_indexed,Rn,SignM,Imm8}, + AM5_4 = {unindexed,Rn,Imm8}, + %% + Cond_eq = {'cond','eq'}, + Cond_ne = {'cond','ne'}, + Cond_cs = {'cond','cs'}, + Cond_hs = {'cond','hs'}, + Cond_cc = {'cond','cc'}, + Cond_lo = {'cond','lo'}, + Cond_mi = {'cond','mi'}, + Cond_pl = {'cond','pl'}, + Cond_vs = {'cond','vs'}, + Cond_vc = {'cond','vc'}, + Cond_hi = {'cond','hi'}, + Cond_ls = {'cond','ls'}, + Cond_ge = {'cond','ge'}, + Cond_lt = {'cond','lt'}, + Cond_gt = {'cond','gt'}, + Cond_le = {'cond','le'}, + Cond_al = {'cond','al'}, + %% + t(OS,'adc',{Cond_al,S0,Rd,Rn,AM1_1}), % test all AM1 operands + t(OS,'adc',{Cond_al,S0,Rd,Rn,AM1_2}), + t(OS,'adc',{Cond_al,S0,Rd,Rn,AM1_3_1}), + t(OS,'adc',{Cond_al,S0,Rd,Rn,AM1_3_2}), + t(OS,'adc',{Cond_al,S0,Rd,Rn,AM1_3_3}), + t(OS,'adc',{Cond_al,S0,Rd,Rn,AM1_3_4}), + t(OS,'adc',{Cond_al,S0,Rd,Rn,AM1_3_5}), + t(OS,'adc',{Cond_al,S0,Rd,Rn,AM1_3_6}), + t(OS,'adc',{Cond_al,S0,Rd,Rn,AM1_3_7}), + t(OS,'adc',{Cond_al,S0,Rd,Rn,AM1_3_8}), + t(OS,'adc',{Cond_al,S0,Rd,Rn,AM1_3_9}), + t(OS,'add',{Cond_al,S0,Rd,Rn,AM1_1}), % test all S operands + t(OS,'add',{Cond_al,S1,Rd,Rn,AM1_1}), + t(OS,'and',{Cond_eq,S0,Rd,Rn,AM1_1}), % test all Cond operands + t(OS,'and',{Cond_ne,S0,Rd,Rn,AM1_1}), + t(OS,'and',{Cond_cs,S0,Rd,Rn,AM1_1}), + t(OS,'and',{Cond_hs,S0,Rd,Rn,AM1_1}), + t(OS,'and',{Cond_cc,S0,Rd,Rn,AM1_1}), + t(OS,'and',{Cond_lo,S0,Rd,Rn,AM1_1}), + t(OS,'and',{Cond_mi,S0,Rd,Rn,AM1_1}), + t(OS,'and',{Cond_pl,S0,Rd,Rn,AM1_1}), + t(OS,'and',{Cond_vs,S0,Rd,Rn,AM1_1}), + t(OS,'and',{Cond_vc,S0,Rd,Rn,AM1_1}), + t(OS,'and',{Cond_hi,S0,Rd,Rn,AM1_1}), + t(OS,'and',{Cond_ls,S0,Rd,Rn,AM1_1}), + t(OS,'and',{Cond_ge,S0,Rd,Rn,AM1_1}), + t(OS,'and',{Cond_lt,S0,Rd,Rn,AM1_1}), + t(OS,'and',{Cond_gt,S0,Rd,Rn,AM1_1}), + t(OS,'and',{Cond_le,S0,Rd,Rn,AM1_1}), + t(OS,'and',{Cond_al,S0,Rd,Rn,AM1_1}), + t(OS,'b',{Cond_al,Imm24}), + t(OS,'bic',{Cond_al,S0,Rd,Rn,AM1_1}), + t(OS,'bkpt',{Imm16}), + t(OS,'bl',{Cond_al,Imm24}), + t(OS,'blx',{Imm25}), + t(OS,'blx',{Cond_al,Rm}), + t(OS,'bx',{Cond_al,Rm}), + t(OS,'cdp',{Cond_al,CpNum,CpOp4,CRd,CRn,CRm,CpOp3}), + t(OS,'cdp2',{CpNum,CpOp4,CRd,CRn,CRm,CpOp3}), + t(OS,'clz',{Cond_al,Rd,Rm}), + t(OS,'cmn',{Cond_al,Rn,AM1_1}), + t(OS,'cmp',{Cond_al,Rn,AM1_1}), + t(OS,'eor',{Cond_al,S0,Rd,Rn,AM1_1}), + t(OS,'ldc',{Cond_al,L0,CpNum,CRd,AM5_1_1}), % test all AM5 operands + t(OS,'ldc',{Cond_al,L1,CpNum,CRd,AM5_1_2}), + t(OS,'ldc',{Cond_al,L0,CpNum,CRd,AM5_2_1}), + t(OS,'ldc',{Cond_al,L1,CpNum,CRd,AM5_2_2}), + t(OS,'ldc',{Cond_al,L0,CpNum,CRd,AM5_3_1}), + t(OS,'ldc',{Cond_al,L1,CpNum,CRd,AM5_3_2}), + t(OS,'ldc',{Cond_al,L0,CpNum,CRd,AM5_4}), + t(OS,'ldc2',{L0,CpNum,CRd,AM5_1_1}), + t(OS,'ldm',{Cond_al,AM4_1,Rn,'!',Registers}), + t(OS,'ldm',{Cond_al,AM4_1,Rn,Registers}), % test all AM4 operands + t(OS,'ldm',{Cond_al,AM4_2,Rn,Registers}), % test all AM4 operands + t(OS,'ldm',{Cond_al,AM4_3,Rn,Registers}), % test all AM4 operands + t(OS,'ldm',{Cond_al,AM4_4,Rn,Registers}), % test all AM4 operands + t(OS,'ldm',{Cond_al,AM4_5,Rn,Registers}), % test all AM4 operands + t(OS,'ldm',{Cond_al,AM4_6,Rn,Registers}), % test all AM4 operands + t(OS,'ldm',{Cond_al,AM4_7,Rn,Registers}), % test all AM4 operands + t(OS,'ldm',{Cond_al,AM4_8,Rn,Registers}), % test all AM4 operands + t(OS,'ldr',{Cond_al,Rd,AM2_1_1}), % test all AM2 operands + t(OS,'ldr',{Cond_al,Rd,AM2_1_2}), + t(OS,'ldr',{Cond_al,Rd,AM2_2_1}), + t(OS,'ldr',{Cond_al,Rd,AM2_2_2}), + t(OS,'ldr',{Cond_al,Rd,AM2_3_1}), + t(OS,'ldr',{Cond_al,Rd,AM2_3_2}), + t(OS,'ldr',{Cond_al,Rd,AM2_3_3}), + t(OS,'ldr',{Cond_al,Rd,AM2_3_4}), + t(OS,'ldr',{Cond_al,Rd,AM2_3_5}), + t(OS,'ldr',{Cond_al,Rd,AM2_4_1}), + t(OS,'ldr',{Cond_al,Rd,AM2_4_2}), + t(OS,'ldr',{Cond_al,Rd,AM2_5_1}), + t(OS,'ldr',{Cond_al,Rd,AM2_5_2}), + t(OS,'ldr',{Cond_al,Rd,AM2_6_1}), + t(OS,'ldr',{Cond_al,Rd,AM2_6_2}), + t(OS,'ldr',{Cond_al,Rd,AM2_6_3}), + t(OS,'ldr',{Cond_al,Rd,AM2_6_4}), + t(OS,'ldr',{Cond_al,Rd,AM2_6_5}), + t(OS,'ldr',{Cond_al,Rd,AM2_7_1}), + t(OS,'ldr',{Cond_al,Rd,AM2_7_2}), + t(OS,'ldr',{Cond_al,Rd,AM2_8_1}), + t(OS,'ldr',{Cond_al,Rd,AM2_8_2}), + t(OS,'ldr',{Cond_al,Rd,AM2_9_1}), + t(OS,'ldr',{Cond_al,Rd,AM2_9_2}), + t(OS,'ldr',{Cond_al,Rd,AM2_9_3}), + t(OS,'ldr',{Cond_al,Rd,AM2_9_4}), + t(OS,'ldr',{Cond_al,Rd,AM2_9_5}), + t(OS,'ldrb',{Cond_al,Rd,AM2_1_1}), + t(OS,'ldrd',{Cond_al,Rd,AM3_1_1}), + t(OS,'ldrh',{Cond_al,Rd,AM3_1_1}), % test all AM3 operands + t(OS,'ldrh',{Cond_al,Rd,AM3_1_2}), + t(OS,'ldrh',{Cond_al,Rd,AM3_2_1}), + t(OS,'ldrh',{Cond_al,Rd,AM3_2_2}), + t(OS,'ldrh',{Cond_al,Rd,AM3_3_1}), + t(OS,'ldrh',{Cond_al,Rd,AM3_3_2}), + t(OS,'ldrh',{Cond_al,Rd,AM3_4_1}), + t(OS,'ldrh',{Cond_al,Rd,AM3_4_2}), + t(OS,'ldrh',{Cond_al,Rd,AM3_5_1}), + t(OS,'ldrh',{Cond_al,Rd,AM3_5_2}), + t(OS,'ldrh',{Cond_al,Rd,AM3_6_1}), + t(OS,'ldrh',{Cond_al,Rd,AM3_6_2}), + t(OS,'ldrsb',{Cond_al,Rd,AM3_1_1}), + t(OS,'ldrsh',{Cond_al,Rd,AM3_1_1}), + t(OS,'mcr',{Cond_al,CpNum,CpOp3,Rd,CRn,CRm,CpOp3}), + t(OS,'mcr2',{CpNum,CpOp3,Rd,CRn,CRm,CpOp3}), + t(OS,'mcrr',{Cond_al,CpNum,CpOp4,Rd,Rn,CRm}), + t(OS,'mla',{Cond_al,S0,Rd,Rm,Rs,Rn}), + t(OS,'mov',{Cond_al,S0,Rd,AM1_1}), + t(OS,'mrc',{Cond_al,CpNum,CpOp3,Rd,CRn,CRm,CpOp3}), + t(OS,'mrc2',{CpNum,CpOp3,Rd,CRn,CRm,CpOp3}), + t(OS,'mrrc',{Cond_al,CpNum,CpOp4,Rd,Rn,CRm}), + t(OS,'mrs',{Cond_al,Rd,'cpsr'}), + t(OS,'msr',{Cond_al,'cpsr',FieldMask4,Imm8,Imm4}), + t(OS,'msr',{Cond_al,'cpsr',FieldMask4,Rm}), + t(OS,'mul',{Cond_al,S0,Rd,Rm,Rs}), + t(OS,'mvn',{Cond_al,S1,Rd,AM1_1}), + t(OS,'orr',{Cond_al,S0,Rd,Rn,AM1_1}), + t(OS,'pld',{AM2_1_1}), + t(OS,'qadd',{Cond_al,Rd,Rm,Rn}), + t(OS,'qdadd',{Cond_al,Rd,Rm,Rn}), + t(OS,'qdsub',{Cond_al,Rd,Rm,Rn}), + t(OS,'qsub',{Cond_al,Rd,Rm,Rn}), + t(OS,'rsb',{Cond_al,S0,Rd,Rn,AM1_1}), + t(OS,'rsc',{Cond_al,S0,Rd,Rn,AM1_1}), + t(OS,'sbc',{Cond_al,S0,Rd,Rn,AM1_1}), + t(OS,'smla',{BT0,BT0,Cond_al,Rd,Rm,Rs,Rn}), + t(OS,'smla',{BT0,BT1,Cond_al,Rd,Rm,Rs,Rn}), + t(OS,'smla',{BT1,BT0,Cond_al,Rd,Rm,Rs,Rn}), + t(OS,'smla',{BT1,BT1,Cond_al,Rd,Rm,Rs,Rn}), + t(OS,'smlal',{Cond_al,S0,RdLo,RdHi,Rm,Rs}), + t(OS,'smlal',{BT0,BT1,Cond_al,RdLo,RdHi,Rm,Rs}), + t(OS,'smlaw',{BT1,Cond_al,Rd,Rm,Rs,Rn}), + t(OS,'smull',{Cond_al,S0,RdLo,RdHi,Rm,Rs}), + t(OS,'smul',{BT1,BT0,Cond_al,Rd,Rm,Rs}), + t(OS,'smulw',{BT1,Cond_al,Rd,Rm,Rs}), + t(OS,'stc',{Cond_al,L0,CpNum,CRd,AM5_1_1}), + t(OS,'stc2',{L0,CpNum,CRd,AM5_1_1}), + t(OS,'stm',{Cond_al,AM4_1,Rn,Registers}), + t(OS,'str',{Cond_al,Rd,AM2_1_1}), + t(OS,'strb',{Cond_al,Rd,AM2_1_1}), + t(OS,'strd',{Cond_al,Rd,AM3_1_1}), + t(OS,'strh',{Cond_al,Rd,AM3_1_1}), + t(OS,'sub',{Cond_al,S0,Rd,Rn,AM1_1}), + t(OS,'swi',{Cond_al,Imm24}), + t(OS,'swp',{Cond_al,Rd,Rm,Rn}), + t(OS,'swpb',{Cond_al,Rd,Rm,Rn}), + t(OS,'teq',{Cond_al,Rn,AM1_1}), + t(OS,'tst',{Cond_al,Rn,AM1_1}), + t(OS,'umlal',{Cond_al,S0,RdLo,RdHi,Rm,Rs}), + t(OS,'umull',{Cond_al,S0,RdLo,RdHi,Rm,Rs}), + []. + +dotest() -> dotest1(group_leader()). + +dotest(File) -> + {ok,OS} = file:open(File, [write]), + dotest1(OS), + file:close(OS). + +-endif. diff --git a/lib/hipe/arm/hipe_arm_finalise.erl b/lib/hipe/arm/hipe_arm_finalise.erl new file mode 100644 index 0000000000..38e3efd223 --- /dev/null +++ b/lib/hipe/arm/hipe_arm_finalise.erl @@ -0,0 +1,73 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2005-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +-module(hipe_arm_finalise). +-export([finalise/1]). +-include("hipe_arm.hrl"). + +finalise(Defun) -> + #defun{code=Code0} = Defun, + Code1 = peep(expand(Code0)), + Defun#defun{code=Code1}. + +expand(Insns) -> + expand_list(Insns, []). + +expand_list([I|Insns], Accum) -> + expand_list(Insns, expand_insn(I, Accum)); +expand_list([], Accum) -> + lists:reverse(Accum). + +expand_insn(I, Accum) -> + case I of + #pseudo_bc{'cond'=Cond,true_label=TrueLab,false_label=FalseLab} -> + [hipe_arm:mk_b_label(FalseLab), + hipe_arm:mk_b_label(Cond, TrueLab) | + Accum]; + #pseudo_blr{} -> + [hipe_arm:mk_move(hipe_arm:mk_pc(), hipe_arm:mk_lr()) | Accum]; + #pseudo_bx{src=Src} -> + [hipe_arm:mk_move(hipe_arm:mk_pc(), Src) | Accum]; + #pseudo_call{funv=FunV,sdesc=SDesc,contlab=ContLab,linkage=Linkage} -> + [hipe_arm:mk_b_label(ContLab), + case FunV of + #arm_temp{} -> hipe_arm:mk_blx(FunV, SDesc); + _ -> hipe_arm:mk_bl(FunV, SDesc, Linkage) + end | + Accum]; + #pseudo_switch{jtab=JTab,index=Index} -> + PC = hipe_arm:mk_pc(), + Am2 = hipe_arm:mk_am2(JTab, '+', {Index,'lsl',2}), + [hipe_arm:mk_load('ldr', PC, Am2) | Accum]; + #pseudo_tailcall_prepare{} -> + Accum; + _ -> + [I|Accum] + end. + +peep(Insns) -> + peep_list(Insns, []). + +peep_list([#b_label{'cond'='al',label=Label} | (Insns = [#label{label=Label}|_])], Accum) -> + peep_list(Insns, Accum); +peep_list([I|Insns], Accum) -> + peep_list(Insns, [I|Accum]); +peep_list([], Accum) -> + lists:reverse(Accum). diff --git a/lib/hipe/arm/hipe_arm_frame.erl b/lib/hipe/arm/hipe_arm_frame.erl new file mode 100644 index 0000000000..316aa2ef82 --- /dev/null +++ b/lib/hipe/arm/hipe_arm_frame.erl @@ -0,0 +1,639 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2005-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +-module(hipe_arm_frame). +-export([frame/1]). + +-include("hipe_arm.hrl"). +-include("../rtl/hipe_literals.hrl"). + +-define(LIVENESS_ALL, hipe_arm_liveness_gpr). % since we have no FP yet + +frame(Defun) -> + Formals = fix_formals(hipe_arm:defun_formals(Defun)), + Temps0 = all_temps(hipe_arm:defun_code(Defun), Formals), + MinFrame = defun_minframe(Defun), + Temps = ensure_minframe(MinFrame, Temps0), + ClobbersLR = clobbers_lr(hipe_arm:defun_code(Defun)), + CFG0 = hipe_arm_cfg:init(Defun), + Liveness = ?LIVENESS_ALL:analyse(CFG0), + CFG1 = do_body(CFG0, Liveness, Formals, Temps, ClobbersLR), + hipe_arm_cfg:linearise(CFG1). + +fix_formals(Formals) -> + fix_formals(hipe_arm_registers:nr_args(), Formals). + +fix_formals(0, Rest) -> Rest; +fix_formals(N, [_|Rest]) -> fix_formals(N-1, Rest); +fix_formals(_, []) -> []. + +do_body(CFG0, Liveness, Formals, Temps, ClobbersLR) -> + Context = mk_context(Liveness, Formals, Temps, ClobbersLR), + CFG1 = do_blocks(CFG0, Context), + do_prologue(CFG1, Context). + +do_blocks(CFG, Context) -> + Labels = hipe_arm_cfg:labels(CFG), + do_blocks(Labels, CFG, Context). + +do_blocks([Label|Labels], CFG, Context) -> + Liveness = context_liveness(Context), + LiveOut = ?LIVENESS_ALL:liveout(Liveness, Label), + Block = hipe_arm_cfg:bb(CFG, Label), + Code = hipe_bb:code(Block), + NewCode = do_block(Code, LiveOut, Context), + NewBlock = hipe_bb:code_update(Block, NewCode), + NewCFG = hipe_arm_cfg:bb_add(CFG, Label, NewBlock), + do_blocks(Labels, NewCFG, Context); +do_blocks([], CFG, _) -> + CFG. + +do_block(Insns, LiveOut, Context) -> + do_block(Insns, LiveOut, Context, context_framesize(Context), []). + +do_block([I|Insns], LiveOut, Context, FPoff0, RevCode) -> + {NewIs, FPoff1} = do_insn(I, LiveOut, Context, FPoff0), + do_block(Insns, LiveOut, Context, FPoff1, lists:reverse(NewIs, RevCode)); +do_block([], _, Context, FPoff, RevCode) -> + FPoff0 = context_framesize(Context), + if FPoff =:= FPoff0 -> []; + true -> exit({?MODULE,do_block,FPoff}) + end, + lists:reverse(RevCode, []). + +do_insn(I, LiveOut, Context, FPoff) -> + case I of + #pseudo_blr{} -> + {do_pseudo_blr(I, Context, FPoff), context_framesize(Context)}; + #pseudo_call{} -> + do_pseudo_call(I, LiveOut, Context, FPoff); + #pseudo_call_prepare{} -> + do_pseudo_call_prepare(I, FPoff); + #pseudo_move{} -> + {do_pseudo_move(I, Context, FPoff), FPoff}; + #pseudo_tailcall{} -> + {do_pseudo_tailcall(I, Context), context_framesize(Context)}; + _ -> + {[I], FPoff} + end. + +%%% +%%% Moves, with Dst or Src possibly a pseudo +%%% + +do_pseudo_move(I, Context, FPoff) -> + Dst = hipe_arm:pseudo_move_dst(I), + Src = hipe_arm:pseudo_move_src(I), + case temp_is_pseudo(Dst) of + true -> + Offset = pseudo_offset(Dst, FPoff, Context), + mk_store('str', Src, Offset, mk_sp(), []); + _ -> + case temp_is_pseudo(Src) of + true -> + Offset = pseudo_offset(Src, FPoff, Context), + mk_load('ldr', Dst, Offset, mk_sp(), []); + _ -> + [hipe_arm:mk_move(Dst, Src)] + end + end. + +pseudo_offset(Temp, FPoff, Context) -> + FPoff + context_offset(Context, Temp). + +%%% +%%% Return - deallocate frame and emit 'ret $N' insn. +%%% + +do_pseudo_blr(I, Context, FPoff) -> + %% XXX: perhaps use explicit pseudo_move;mtlr, + %% avoiding the need to hard-code Temp1 here + %% XXX: typically only one instruction between + %% the mtlr and the blr, ouch + restore_lr(FPoff, Context, + adjust_sp(FPoff + word_size() * context_arity(Context), + [I])). + +restore_lr(FPoff, Context, Rest) -> + case context_clobbers_lr(Context) of + false -> Rest; + true -> + LR = hipe_arm:mk_lr(), + mk_load('ldr', LR, FPoff - word_size(), mk_sp(), + Rest) + end. + +adjust_sp(N, Rest) -> + if N =:= 0 -> + Rest; + true -> + SP = mk_sp(), + hipe_arm:mk_addi(SP, SP, N, Rest) + end. + +%%% +%%% Recursive calls. +%%% + +do_pseudo_call_prepare(I, FPoff0) -> + %% Create outgoing arguments area on the stack. + NrStkArgs = hipe_arm:pseudo_call_prepare_nrstkargs(I), + Offset = NrStkArgs * word_size(), + {adjust_sp(-Offset, []), FPoff0 + Offset}. + +do_pseudo_call(I, LiveOut, Context, FPoff0) -> + #arm_sdesc{exnlab=ExnLab,arity=OrigArity} = hipe_arm:pseudo_call_sdesc(I), + FunV = hipe_arm:pseudo_call_funv(I), + LiveTemps = [Temp || Temp <- LiveOut, temp_is_pseudo(Temp)], + SDesc = mk_sdesc(ExnLab, Context, LiveTemps), + ContLab = hipe_arm:pseudo_call_contlab(I), + Linkage = hipe_arm:pseudo_call_linkage(I), + CallCode = [hipe_arm:mk_pseudo_call(FunV, SDesc, ContLab, Linkage)], + StkArity = erlang:max(0, OrigArity - hipe_arm_registers:nr_args()), + context_need_stack(Context, stack_need(FPoff0, StkArity, FunV)), + ArgsBytes = word_size() * StkArity, + {CallCode, FPoff0 - ArgsBytes}. + +stack_need(FPoff, StkArity, FunV) -> + case FunV of + #arm_prim{} -> FPoff; + #arm_mfa{m=M,f=F,a=A} -> + case erlang:is_builtin(M, F, A) of + true -> FPoff; + false -> stack_need_general(FPoff, StkArity) + end; + _ -> stack_need_general(FPoff, StkArity) + end. + +stack_need_general(FPoff, StkArity) -> + erlang:max(FPoff, FPoff + (?ARM_LEAF_WORDS - StkArity) * word_size()). + +%%% +%%% Create stack descriptors for call sites. +%%% + +mk_sdesc(ExnLab, Context, Temps) -> % for normal calls + Temps0 = only_tagged(Temps), + Live = mk_live(Context, Temps0), + Arity = context_arity(Context), + FSize = context_framesize(Context), + hipe_arm:mk_sdesc(ExnLab, (FSize div word_size())-1, Arity, + list_to_tuple(Live)). + +only_tagged(Temps)-> + [X || X <- Temps, hipe_arm:temp_type(X) =:= 'tagged']. + +mk_live(Context, Temps) -> + lists:sort([temp_to_slot(Context, Temp) || Temp <- Temps]). + +temp_to_slot(Context, Temp) -> + (context_framesize(Context) + context_offset(Context, Temp)) + div word_size(). + +mk_minimal_sdesc(Context) -> % for inc_stack_0 calls + hipe_arm:mk_sdesc([], 0, context_arity(Context), {}). + +%%% +%%% Tailcalls. +%%% + +do_pseudo_tailcall(I, Context) -> % always at FPoff=context_framesize(Context) + Arity = context_arity(Context), + Args = hipe_arm:pseudo_tailcall_stkargs(I), + FunV = hipe_arm:pseudo_tailcall_funv(I), + Linkage = hipe_arm:pseudo_tailcall_linkage(I), + {Insns, FPoff1} = do_tailcall_args(Args, Context), + context_need_stack(Context, FPoff1), + StkArity = length(Args), + FPoff2 = FPoff1 + (Arity - StkArity) * word_size(), + context_need_stack(Context, stack_need(FPoff2, StkArity, FunV)), + I2 = + case FunV of + #arm_temp{} -> + hipe_arm:mk_bx(FunV); + Fun -> + hipe_arm:mk_b_fun(Fun, Linkage) + end, + %% XXX: break out the LR restore, just like for pseudo_blr? + restore_lr(context_framesize(Context), Context, + Insns ++ adjust_sp(FPoff2, [I2])). + +do_tailcall_args(Args, Context) -> + FPoff0 = context_framesize(Context), + Arity = context_arity(Context), + FrameTop = word_size()*Arity, + DangerOff = FrameTop - word_size()*length(Args), + %% + Moves = mk_moves(Args, FrameTop, []), + %% + {Stores, Simple, Conflict} = + split_moves(Moves, Context, DangerOff, [], [], []), + %% sanity check (shouldn't trigger any more) + if DangerOff < -FPoff0 -> + exit({?MODULE,do_tailcall_args,DangerOff,-FPoff0}); + true -> [] + end, + FPoff1 = FPoff0, + %% + {Pushes, Pops, FPoff2} = split_conflict(Conflict, FPoff1, [], []), + %% + TempReg = hipe_arm_registers:temp1(), + %% + {adjust_sp(-(FPoff2 - FPoff1), + simple_moves(Pushes, FPoff2, TempReg, + store_moves(Stores, FPoff2, TempReg, + simple_moves(Simple, FPoff2, TempReg, + simple_moves(Pops, FPoff2, TempReg, + []))))), + FPoff2}. + +mk_moves([Arg|Args], Off, Moves) -> + Off1 = Off - word_size(), + mk_moves(Args, Off1, [{Arg,Off1}|Moves]); +mk_moves([], _, Moves) -> + Moves. + +split_moves([Move|Moves], Context, DangerOff, Stores, Simple, Conflict) -> + {Src,DstOff} = Move, + case src_is_pseudo(Src) of + false -> + split_moves(Moves, Context, DangerOff, [Move|Stores], + Simple, Conflict); + true -> + SrcOff = context_offset(Context, Src), + Type = typeof_temp(Src), + if SrcOff =:= DstOff -> + split_moves(Moves, Context, DangerOff, Stores, + Simple, Conflict); + SrcOff >= DangerOff -> + split_moves(Moves, Context, DangerOff, Stores, + Simple, [{SrcOff,DstOff,Type}|Conflict]); + true -> + split_moves(Moves, Context, DangerOff, Stores, + [{SrcOff,DstOff,Type}|Simple], Conflict) + end + end; +split_moves([], _, _, Stores, Simple, Conflict) -> + {Stores, Simple, Conflict}. + +split_conflict([{SrcOff,DstOff,Type}|Conflict], FPoff, Pushes, Pops) -> + FPoff1 = FPoff + word_size(), + Push = {SrcOff,-FPoff1,Type}, + Pop = {-FPoff1,DstOff,Type}, + split_conflict(Conflict, FPoff1, [Push|Pushes], [Pop|Pops]); +split_conflict([], FPoff, Pushes, Pops) -> + {lists:reverse(Pushes), Pops, FPoff}. + +simple_moves([{SrcOff,DstOff,Type}|Moves], FPoff, TempReg, Rest) -> + Temp = hipe_arm:mk_temp(TempReg, Type), + SP = mk_sp(), + LoadOff = FPoff+SrcOff, + StoreOff = FPoff+DstOff, + simple_moves(Moves, FPoff, TempReg, + mk_load('ldr', Temp, LoadOff, SP, + mk_store('str', Temp, StoreOff, SP, + Rest))); +simple_moves([], _, _, Rest) -> + Rest. + +store_moves([{Src,DstOff}|Moves], FPoff, TempReg, Rest) -> + %%Type = typeof_temp(Src), + SP = mk_sp(), + StoreOff = FPoff+DstOff, + {NewSrc,FixSrc} = + case hipe_arm:is_temp(Src) of + true -> + {Src, []}; + _ -> + Temp = hipe_arm:mk_temp(TempReg, 'untagged'), + {Temp, hipe_arm:mk_li(Temp, Src)} + end, + store_moves(Moves, FPoff, TempReg, + FixSrc ++ mk_store('str', NewSrc, StoreOff, SP, Rest)); +store_moves([], _, _, Rest) -> + Rest. + +%%% +%%% Contexts +%%% + +-record(context, {liveness, framesize, arity, map, clobbers_lr, ref_maxstack}). + +mk_context(Liveness, Formals, Temps, ClobbersLR) -> + {Map, MinOff} = mk_temp_map(Formals, ClobbersLR, Temps), + FrameSize = (-MinOff), + RefMaxStack = hipe_bifs:ref(FrameSize), + #context{liveness=Liveness, + framesize=FrameSize, arity=length(Formals), + map=Map, clobbers_lr=ClobbersLR, ref_maxstack=RefMaxStack}. + +context_need_stack(#context{ref_maxstack=RM}, N) -> + M = hipe_bifs:ref_get(RM), + if N > M -> hipe_bifs:ref_set(RM, N); + true -> [] + end. + +context_maxstack(#context{ref_maxstack=RM}) -> + hipe_bifs:ref_get(RM). + +context_arity(#context{arity=Arity}) -> + Arity. + +context_framesize(#context{framesize=FrameSize}) -> + FrameSize. + +context_liveness(#context{liveness=Liveness}) -> + Liveness. + +context_offset(#context{map=Map}, Temp) -> + tmap_lookup(Map, Temp). + +context_clobbers_lr(#context{clobbers_lr=ClobbersLR}) -> ClobbersLR. + +mk_temp_map(Formals, ClobbersLR, Temps) -> + {Map, 0} = enter_vars(Formals, word_size() * length(Formals), + tmap_empty()), + TempsList = tset_to_list(Temps), + AllTemps = + case ClobbersLR of + false -> TempsList; + true -> + RA = hipe_arm:mk_new_temp('untagged'), + [RA|TempsList] + end, + enter_vars(AllTemps, 0, Map). + +enter_vars([V|Vs], PrevOff, Map) -> + Off = + case hipe_arm:temp_type(V) of + 'double' -> PrevOff - 2*word_size(); + _ -> PrevOff - word_size() + end, + enter_vars(Vs, Off, tmap_bind(Map, V, Off)); +enter_vars([], Off, Map) -> + {Map, Off}. + +tmap_empty() -> + gb_trees:empty(). + +tmap_bind(Map, Key, Val) -> + gb_trees:insert(Key, Val, Map). + +tmap_lookup(Map, Key) -> + gb_trees:get(Key, Map). + +%%% +%%% do_prologue: prepend stack frame allocation code. +%%% +%%% NewStart: +%%% temp1 = *(P + P_SP_LIMIT) +%%% temp2 = SP - MaxStack +%%% cmp temp2, temp1 +%%% if (ltu) goto IncStack else goto AllocFrame +%%% AllocFrame: +%%% SP = temp2 [if FrameSize == MaxStack] +%%% SP -= FrameSize [if FrameSize != MaxStack] +%%% *(SP + FrameSize-WordSize) = LR [if ClobbersLR] +%%% goto OldStart +%%% OldStart: +%%% ... +%%% IncStack: +%%% temp1 = LR +%%% bl inc_stack +%%% LR = temp1 +%%% goto NewStart + +do_prologue(CFG, Context) -> + MaxStack = context_maxstack(Context), + if MaxStack > 0 -> + FrameSize = context_framesize(Context), + OldStartLab = hipe_arm_cfg:start_label(CFG), + NewStartLab = hipe_gensym:get_next_label(arm), + %% + P = hipe_arm:mk_temp(hipe_arm_registers:proc_pointer(), 'untagged'), + Temp1 = mk_temp1(), + SP = mk_sp(), + %% + LR = hipe_arm:mk_lr(), + ClobbersLR = context_clobbers_lr(Context), + GotoOldStartCode = [hipe_arm:mk_b_label(OldStartLab)], + AllocFrameCodeTail = + case ClobbersLR of + false -> GotoOldStartCode; + true -> mk_store('str', LR, FrameSize-word_size(), SP, GotoOldStartCode) + end, + %% + Arity = context_arity(Context), + Guaranteed = erlang:max(0, (?ARM_LEAF_WORDS - Arity) * word_size()), + %% + {CFG1,NewStartCode} = + if MaxStack =< Guaranteed -> + %% io:format("~w: MaxStack ~w =< Guaranteed ~w :-)\n", [?MODULE,MaxStack,Guaranteed]), + AllocFrameCode = adjust_sp(-FrameSize, AllocFrameCodeTail), + NewStartCode0 = AllocFrameCode, % no mflr needed + {CFG,NewStartCode0}; + true -> + %% io:format("~w: MaxStack ~w > Guaranteed ~w :-(\n", [?MODULE,MaxStack,Guaranteed]), + AllocFrameLab = hipe_gensym:get_next_label(arm), + IncStackLab = hipe_gensym:get_next_label(arm), + Temp2 = mk_temp2(), + %% + NewStartCodeTail2 = + [hipe_arm:mk_pseudo_bc('lo', IncStackLab, AllocFrameLab, 0.01)], + NewStartCodeTail1 = NewStartCodeTail2, % no mflr needed + NewStartCode0 = + mk_load('ldr', Temp1, ?P_NSP_LIMIT, P, + hipe_arm:mk_addi(Temp2, SP, -MaxStack, + [hipe_arm:mk_cmp('cmp', Temp2, Temp1) | + NewStartCodeTail1])), + %% + AllocFrameCode = + if MaxStack =:= FrameSize -> + %% io:format("~w: MaxStack =:= FrameSize =:= ~w :-)\n", [?MODULE,MaxStack]), + [hipe_arm:mk_move(SP, Temp2) | + AllocFrameCodeTail]; + true -> + %% io:format("~w: MaxStack ~w =/= FrameSize ~w :-(\n", [?MODULE,MaxStack,FrameSize]), + adjust_sp(-FrameSize, AllocFrameCodeTail) + end, + %% + IncStackCodeTail = + [hipe_arm:mk_bl(hipe_arm:mk_prim('inc_stack_0'), + mk_minimal_sdesc(Context), not_remote), + hipe_arm:mk_mtlr(Temp1), + hipe_arm:mk_b_label(NewStartLab)], + IncStackCode = + [hipe_arm:mk_mflr(Temp1) | IncStackCodeTail], % mflr always needed + %% + CFG0a = hipe_arm_cfg:bb_add(CFG, AllocFrameLab, + hipe_bb:mk_bb(AllocFrameCode)), + CFG0b = hipe_arm_cfg:bb_add(CFG0a, IncStackLab, + hipe_bb:mk_bb(IncStackCode)), + %% + {CFG0b,NewStartCode0} + end, + %% + CFG2 = hipe_arm_cfg:bb_add(CFG1, NewStartLab, + hipe_bb:mk_bb(NewStartCode)), + hipe_arm_cfg:start_label_update(CFG2, NewStartLab); + true -> + CFG + end. + +%%% Create a load instruction. +%%% May clobber Dst early for large offsets. In principle we could +%%% clobber TEMP2 if Dst =:= Base, but Dst =/= Base here in frame. + +mk_load(LdOp, Dst, Offset, Base, Rest) -> + hipe_arm:mk_load(LdOp, Dst, Base, Offset, 'error', Rest). + +%%% Create a store instruction. +%%% May clobber TEMP2 for large offsets. + +mk_store(StOp, Src, Offset, Base, Rest) -> + hipe_arm:mk_store(StOp, Src, Base, Offset, 'temp2', Rest). + +%%% typeof_temp -- what's temp's type? + +typeof_temp(Temp) -> + hipe_arm:temp_type(Temp). + +%%% Cons up an 'SP' Temp. + +mk_sp() -> + hipe_arm:mk_temp(hipe_arm_registers:stack_pointer(), 'untagged'). + +%%% Cons up a 'TEMP1' Temp. + +mk_temp1() -> + hipe_arm:mk_temp(hipe_arm_registers:temp1(), 'untagged'). + +%%% Cons up a 'TEMP2' Temp. + +mk_temp2() -> + hipe_arm:mk_temp(hipe_arm_registers:temp2(), 'untagged'). + +%%% Check if an operand is a pseudo-Temp. + +src_is_pseudo(Src) -> + hipe_arm:is_temp(Src) andalso temp_is_pseudo(Src). + +temp_is_pseudo(Temp) -> + not(hipe_arm:temp_is_precoloured(Temp)). + +%%% +%%% Detect if a Defun's body clobbers LR. +%%% + +clobbers_lr(Insns) -> + LRreg = hipe_arm_registers:lr(), + LRtagged = hipe_arm:mk_temp(LRreg, 'tagged'), + LRuntagged = hipe_arm:mk_temp(LRreg, 'untagged'), + clobbers_lr(Insns, LRtagged, LRuntagged). + +clobbers_lr([I|Insns], LRtagged, LRuntagged) -> + Defs = hipe_arm_defuse:insn_def_gpr(I), + case lists:member(LRtagged, Defs) of + true -> true; + false -> + case lists:member(LRuntagged, Defs) of + true -> true; + false -> clobbers_lr(Insns, LRtagged, LRuntagged) + end + end; +clobbers_lr([], _LRtagged, _LRuntagged) -> false. + +%%% +%%% Build the set of all temps used in a Defun's body. +%%% + +all_temps(Code, Formals) -> + S0 = find_temps(Code, tset_empty()), + S1 = tset_del_list(S0, Formals), + tset_filter(S1, fun(T) -> temp_is_pseudo(T) end). + +find_temps([I|Insns], S0) -> + S1 = tset_add_list(S0, hipe_arm_defuse:insn_def_all(I)), + S2 = tset_add_list(S1, hipe_arm_defuse:insn_use_all(I)), + find_temps(Insns, S2); +find_temps([], S) -> + S. + +tset_empty() -> + gb_sets:new(). + +tset_size(S) -> + gb_sets:size(S). + +tset_insert(S, T) -> + gb_sets:add_element(T, S). + +tset_add_list(S, Ts) -> + gb_sets:union(S, gb_sets:from_list(Ts)). + +tset_del_list(S, Ts) -> + gb_sets:subtract(S, gb_sets:from_list(Ts)). + +tset_filter(S, F) -> + gb_sets:filter(F, S). + +tset_to_list(S) -> + gb_sets:to_list(S). + +%%% +%%% Compute minimum permissible frame size, ignoring spilled temps. +%%% This is done to ensure that we won't have to adjust the frame size +%%% in the middle of a tailcall. +%%% + +defun_minframe(Defun) -> + MaxTailArity = body_mta(hipe_arm:defun_code(Defun), 0), + MyArity = length(fix_formals(hipe_arm:defun_formals(Defun))), + erlang:max(MaxTailArity - MyArity, 0). + +body_mta([I|Code], MTA) -> + body_mta(Code, insn_mta(I, MTA)); +body_mta([], MTA) -> + MTA. + +insn_mta(I, MTA) -> + case I of + #pseudo_tailcall{arity=Arity} -> + erlang:max(MTA, Arity - hipe_arm_registers:nr_args()); + _ -> MTA + end. + +%%% +%%% Ensure that we have enough temps to satisfy the minimum frame size, +%%% if necessary by prepending unused dummy temps. +%%% + +ensure_minframe(MinFrame, Temps) -> + ensure_minframe(MinFrame, tset_size(Temps), Temps). + +ensure_minframe(MinFrame, Frame, Temps) -> + if MinFrame > Frame -> + Temp = hipe_arm:mk_new_temp('untagged'), + ensure_minframe(MinFrame, Frame+1, tset_insert(Temps, Temp)); + true -> Temps + end. + +word_size() -> + 4. diff --git a/lib/hipe/arm/hipe_arm_liveness_gpr.erl b/lib/hipe/arm/hipe_arm_liveness_gpr.erl new file mode 100644 index 0000000000..cab81c47a1 --- /dev/null +++ b/lib/hipe/arm/hipe_arm_liveness_gpr.erl @@ -0,0 +1,38 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2005-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +-module(hipe_arm_liveness_gpr). +-export([analyse/1]). +-export([liveout/2]). + +-include("hipe_arm.hrl"). +-include("../flow/liveness.inc"). + +analyse(CFG) -> analyze(CFG). +cfg_bb(CFG, L) -> hipe_arm_cfg:bb(CFG, L). +cfg_postorder(CFG) -> hipe_arm_cfg:postorder(CFG). +cfg_succ(CFG, L) -> hipe_arm_cfg:succ(CFG, L). +uses(Insn) -> hipe_arm_defuse:insn_use_gpr(Insn). +defines(Insn) -> hipe_arm_defuse:insn_def_gpr(Insn). +liveout_no_succ() -> + ordsets:from_list(lists:map(fun({Reg,Type}) -> + hipe_arm:mk_temp(Reg, Type) + end, + hipe_arm_registers:live_at_return())). diff --git a/lib/hipe/arm/hipe_arm_main.erl b/lib/hipe/arm/hipe_arm_main.erl new file mode 100644 index 0000000000..5243b3579e --- /dev/null +++ b/lib/hipe/arm/hipe_arm_main.erl @@ -0,0 +1,58 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2005-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +-module(hipe_arm_main). +-export([rtl_to_arm/3]). + +rtl_to_arm(MFA, RTL, Options) -> + Defun1 = hipe_rtl_to_arm:translate(RTL), + %% io:format("~w: after translate\n", [?MODULE]), + %% hipe_arm_pp:pp(Defun1), + Defun2 = hipe_arm_ra:ra(Defun1, Options), + %% io:format("~w: after regalloc\n", [?MODULE]), + %% hipe_arm_pp:pp(Defun2), + Defun3 = hipe_arm_frame:frame(Defun2), + %% io:format("~w: after frame\n", [?MODULE]), + %% hipe_arm_pp:pp(Defun3), + Defun4 = hipe_arm_finalise:finalise(Defun3), + %% io:format("~w: after finalise\n", [?MODULE]), + pp(Defun4, MFA, Options), + {native, arm, {unprofiled, Defun4}}. + +pp(Defun, MFA, Options) -> + case proplists:get_value(pp_native, Options) of + true -> + hipe_arm_pp:pp(Defun); + {only,Lst} when is_list(Lst) -> + case lists:member(MFA,Lst) of + true -> + hipe_arm_pp:pp(Defun); + false -> + ok + end; + {only,MFA} -> + hipe_arm_pp:pp(Defun); + {file,FileName} -> + {ok, File} = file:open(FileName, [write,append]), + hipe_arm_pp:pp(File, Defun), + ok = file:close(File); + _ -> + ok + end. diff --git a/lib/hipe/arm/hipe_arm_pp.erl b/lib/hipe/arm/hipe_arm_pp.erl new file mode 100644 index 0000000000..7ce8421994 --- /dev/null +++ b/lib/hipe/arm/hipe_arm_pp.erl @@ -0,0 +1,351 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2005-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +-module(hipe_arm_pp). +-export([pp/1, pp/2, pp_insn/1]). + +-include("hipe_arm.hrl"). + +pp(Defun) -> + pp(standard_io, Defun). + +pp(Dev, #defun{mfa={M,F,A}, code=Code, data=Data}) -> + Fname = atom_to_list(M)++"_"++atom_to_list(F)++"_"++integer_to_list(A), + io:format(Dev, "\t.text\n", []), + io:format(Dev, "\t.align 4\n", []), + io:format(Dev, "\t.global ~s\n", [Fname]), + io:format(Dev, "~s:\n", [Fname]), + pp_insns(Dev, Code, Fname), + io:format(Dev, "\t.rodata\n", []), + io:format(Dev, "\t.align 4\n", []), + hipe_data_pp:pp(Dev, Data, arm, Fname), + io:format(Dev, "\n", []). + +pp_insns(Dev, [I|Is], Fname) -> + pp_insn(Dev, I, Fname), + pp_insns(Dev, Is, Fname); +pp_insns(_, [], _) -> + []. + +pp_insn(I) -> + pp_insn(standard_io, I, ""). + +pp_insn(Dev, I, Pre) -> + case I of + #alu{aluop=AluOp, s=S, dst=Dst, src=Src, am1=Am1} -> + io:format(Dev, "\t~s~s ", [alu_op_name(AluOp), s_name(S)]), + pp_temp(Dev, Dst), + io:format(Dev, ", ", []), + pp_temp(Dev, Src), + io:format(Dev, ", ", []), + pp_am1(Dev, Am1), + io:format(Dev, "\n", []); + #b_fun{'fun'=Fun, linkage=Linkage} -> + io:format(Dev, "\tb ", []), + pp_fun(Dev, Fun), + io:format(Dev, " # ~w\n", [Linkage]); + #b_label{'cond'=Cond, label=Label} -> + io:format(Dev, "\tb~s .~s_~w\n", [cond_name(Cond), Pre, Label]); + #bl{'fun'=Fun, sdesc=SDesc, linkage=Linkage} -> + io:format(Dev, "\tbl ", []), + pp_fun(Dev, Fun), + io:format(Dev, " #", []), + pp_sdesc(Dev, Pre, SDesc), + io:format(Dev, " ~w\n", [Linkage]); + #blx{src=Src, sdesc=SDesc} -> + io:format(Dev, "\tblx ", []), + pp_temp(Dev, Src), + io:format(Dev, " # ", []), + pp_sdesc(Dev, Pre, SDesc), + io:format(Dev, "\n", []); + #cmp{cmpop=CmpOp, src=Src, am1=Am1} -> + io:format(Dev, "\t~s ", [cmp_op_name(CmpOp)]), + pp_temp(Dev, Src), + io:format(Dev, ", ", []), + pp_am1(Dev, Am1), + io:format(Dev, "\n", []); + #comment{term=Term} -> + io:format(Dev, "\t# ~p\n", [Term]); + #label{label=Label} -> + io:format(Dev, ".~s_~w:~n", [Pre, Label]); + #load{ldop=LdOp, dst=Dst, am2=Am2} -> + io:format(Dev, "\t~w ", [ldop_name(LdOp)]), + pp_temp(Dev, Dst), + io:format(Dev, ", ", []), + pp_am2(Dev, Am2), + io:format(Dev, "\n", []); + #ldrsb{dst=Dst, am3=Am3} -> + io:format(Dev, "\tldrsb ", []), + pp_temp(Dev, Dst), + io:format(Dev, ", ", []), + pp_am3(Dev, Am3), + io:format(Dev, "\n", []); + #move{movop=MovOp, s=S, dst=Dst, am1=Am1} -> + io:format(Dev, "\t~s~s ", [mov_op_name(MovOp), s_name(S)]), + pp_temp(Dev, Dst), + io:format(Dev, ", ", []), + pp_am1(Dev, Am1), + io:format(Dev, "\n", []); + #pseudo_bc{'cond'=Cond, true_label=TrueLab, false_label=FalseLab, pred=Pred} -> + io:format(Dev, "\tpseudo_bc ~w, .~s_~w # .~s_~w ~.2f\n", + [cond_name(Cond), Pre, TrueLab, Pre, FalseLab, Pred]); + #pseudo_blr{} -> + io:format(Dev, "\tpseudo_blr\n", []); + #pseudo_bx{src=Src} -> + io:format(Dev, "\tpseudo_bx ", []), + pp_temp(Dev, Src), + io:format(Dev, "\n", []); + #pseudo_call{funv=FunV, sdesc=SDesc, contlab=ContLab, linkage=Linkage} -> + io:format(Dev, "\tpseudo_call ", []), + pp_funv(Dev, FunV), + io:format(Dev, " # contlab .~s_~w", [Pre, ContLab]), + pp_sdesc(Dev, Pre, SDesc), + io:format(Dev, " ~w\n", [Linkage]); + #pseudo_call_prepare{nrstkargs=NrStkArgs} -> + SP = hipe_arm_registers:reg_name_gpr(hipe_arm_registers:stack_pointer()), + io:format(Dev, "\tsub ~s, ~s, ~w # pseudo_call_prepare\n", + [SP, SP, (4*NrStkArgs)]); + #pseudo_li{dst=Dst, imm=Imm} -> + io:format(Dev, "\tpseudo_li ", []), + pp_temp(Dev, Dst), + io:format(Dev, ", ", []), + pp_imm(Dev, Imm), + io:format(Dev, "\n", []); + #pseudo_move{dst=Dst, src=Src} -> + io:format(Dev, "\tpseudo_move ", []), + pp_temp(Dev, Dst), + io:format(Dev, ", ", []), + pp_temp(Dev, Src), + io:format(Dev, "\n", []); + #pseudo_switch{jtab=JTab, index=Index, labels=Labels} -> + io:format(Dev, "\tpseudo_switch ", []), + pp_temp(Dev, JTab), + io:format(Dev, "[", []), + pp_temp(Dev, Index), + io:format(Dev, "]", []), + case Labels of + [] -> []; + _ -> + io:format(Dev, " #", []), + pp_labels(Dev, Labels, Pre) + end, + io:format(Dev, "\n", []); + #pseudo_tailcall{funv=FunV, arity=Arity, stkargs=StkArgs, linkage=Linkage} -> + io:format(Dev, "\tpseudo_tailcall ", []), + pp_funv(Dev, FunV), + io:format(Dev, "/~w (", [Arity]), + pp_args(Dev, StkArgs), + io:format(Dev, ") ~w\n", [Linkage]); + #pseudo_tailcall_prepare{} -> + io:format(Dev, "\tpseudo_tailcall_prepare\n", []); + #smull{dstlo=DstLo, dsthi=DstHi, src1=Src1, src2=Src2} -> + io:format(Dev, "\tsmull ", []), + pp_temp(Dev, DstLo), + io:format(Dev, ", ", []), + pp_temp(Dev, DstHi), + io:format(Dev, ", ", []), + pp_temp(Dev, Src1), + io:format(Dev, ", ", []), + pp_temp(Dev, Src2), + io:format(Dev, "\n", []); + #store{stop=StOp, src=Src, am2=Am2} -> + io:format(Dev, "\tstr~s ", [stop_suffix(StOp)]), + pp_temp(Dev, Src), + io:format(Dev, ", ", []), + pp_am2(Dev, Am2), + io:format(Dev, "\n", []); + _ -> + exit({?MODULE, pp_insn, I}) + end. + +to_hex(N) -> + io_lib:format("~.16x", [N, "0x"]). + +pp_sdesc(Dev, Pre, #arm_sdesc{exnlab=ExnLab,fsize=FSize,arity=Arity,live=Live}) -> + pp_sdesc_exnlab(Dev, Pre, ExnLab), + io:format(Dev, " ~s ~w [", [to_hex(FSize), Arity]), + pp_sdesc_live(Dev, Live), + io:format(Dev, "]", []). + +pp_sdesc_exnlab(Dev, _, []) -> io:format(Dev, " []", []); +pp_sdesc_exnlab(Dev, Pre, ExnLab) -> io:format(Dev, " .~s_~w", [Pre, ExnLab]). + +pp_sdesc_live(_, {}) -> []; +pp_sdesc_live(Dev, Live) -> pp_sdesc_live(Dev, Live, 1). + +pp_sdesc_live(Dev, Live, I) -> + io:format(Dev, "~s", [to_hex(element(I, Live))]), + if I < tuple_size(Live) -> + io:format(Dev, ",", []), + pp_sdesc_live(Dev, Live, I+1); + true -> [] + end. + +pp_labels(Dev, [Label|Labels], Pre) -> + io:format(Dev, " .~s_~w", [Pre, Label]), + pp_labels(Dev, Labels, Pre); +pp_labels(_, [], _) -> + []. + +pp_fun(Dev, Fun) -> + case Fun of + #arm_mfa{m=M, f=F, a=A} -> + io:format(Dev, "~w:~w/~w", [M, F, A]); + #arm_prim{prim=Prim} -> + io:format(Dev, "~w", [Prim]) + end. + +pp_funv(Dev, FunV) -> + case FunV of + #arm_temp{} -> + pp_temp(Dev, FunV); + Fun -> + pp_fun(Dev, Fun) + end. + +alu_op_name(Op) -> Op. + +cond_name(Cond) -> + case Cond of + 'al' -> ""; + _ -> Cond + end. + +s_name(S) -> + case S of + true -> "s"; + false -> "" + end. + +cmp_op_name(Op) -> Op. + +mov_op_name(Op) -> Op. + +ldop_name(LdOp) -> LdOp. + +stop_suffix(StOp) -> + case StOp of + 'str' -> ""; + 'strb' -> "b" + end. + +pp_temp(Dev, Temp=#arm_temp{reg=Reg, type=Type}) -> + case hipe_arm:temp_is_precoloured(Temp) of + true -> + Name = +%%% case Type of +%%% 'double' -> +%%% hipe_arm_registers:reg_name_fpr(Reg); +%%% _ -> + hipe_arm_registers:reg_name_gpr(Reg) +%%% end + , + io:format(Dev, "~s", [Name]); + false -> + Tag = + case Type of +%%% double -> "f"; + tagged -> "t"; + untagged -> "u" + end, + io:format(Dev, "~s~w", [Tag, Reg]) + end. + +pp_hex(Dev, Value) -> io:format(Dev, "~s", [to_hex(Value)]). + +pp_imm(Dev, Value) -> + if is_integer(Value) -> pp_hex(Dev, Value); + true -> io:format(Dev, "~w", [Value]) + end. + +pp_am1(Dev, Am1) -> + case Am1 of + #arm_temp{} -> + pp_temp(Dev, Am1); + {Src,rrx} -> + pp_temp(Dev, Src), + io:format(Dev, ", rrx", []); + {Src,ShiftOp,ShiftArg} -> + pp_temp(Dev, Src), + io:format(Dev, ", ~w ", [ShiftOp]), + case ShiftArg of + #arm_temp{} -> + pp_temp(Dev, ShiftArg); + Imm5 -> + io:format(Dev, "#~w", [Imm5]) + end; + {Imm8,Imm4} -> + io:format(Dev, "#~w, 2*~w", [Imm8,Imm4]) + end. + +pp_am2(Dev, #am2{src=Src,sign=Sign,offset=Am2Offset}) -> + io:format(Dev, "[", []), + pp_temp(Dev, Src), + io:format(Dev, ",~s", [sign_name(Sign)]), + case Am2Offset of + #arm_temp{} -> + pp_temp(Dev, Am2Offset); + {Src2,rrx} -> + pp_temp(Dev, Src2), + io:format(Dev, ", rrx", []); + {Src2,ShiftOp,Imm5} -> + pp_temp(Dev, Src2), + io:format(Dev, ",~w #~w", [ShiftOp,Imm5]); + Imm12 -> + io:format(Dev, "#~w", [Imm12]) + end, + io:format(Dev, "]", []). + +pp_am3(Dev, #am3{src=Src,sign=Sign,offset=Am3Offset}) -> + io:format(Dev, "[", []), + pp_temp(Dev, Src), + io:format(Dev, ",~s", [sign_name(Sign)]), + case Am3Offset of + #arm_temp{} -> pp_temp(Dev, Am3Offset); + Imm8 -> io:format(Dev, "~w", [Imm8]) + end, + io:format(Dev, "]", []). + +sign_name(Sign) -> + case Sign of + '+' -> ""; + '-' -> "-" + end. + +pp_arg(Dev, Arg) -> + case Arg of + #arm_temp{} -> + pp_temp(Dev, Arg); + _ -> + pp_hex(Dev, Arg) + end. + +pp_args(Dev, [A|As]) -> + pp_arg(Dev, A), + pp_comma_args(Dev, As); +pp_args(_, []) -> + []. + +pp_comma_args(Dev, [A|As]) -> + io:format(Dev, ", ", []), + pp_arg(Dev, A), + pp_comma_args(Dev, As); +pp_comma_args(_, []) -> + []. diff --git a/lib/hipe/arm/hipe_arm_ra.erl b/lib/hipe/arm/hipe_arm_ra.erl new file mode 100644 index 0000000000..bdd9e228e0 --- /dev/null +++ b/lib/hipe/arm/hipe_arm_ra.erl @@ -0,0 +1,56 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2005-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +-module(hipe_arm_ra). +-export([ra/2]). + +ra(Defun0, Options) -> + %% hipe_arm_pp:pp(Defun0), + {Defun1, Coloring_fp, SpillIndex} + = case proplists:get_bool(inline_fp, Options) of +%% true -> +%% hipe_regalloc_loop:ra_fp(Defun0, Options, +%% hipe_coalescing_regalloc, +%% hipe_arm_specific_fp); + false -> + {Defun0,[],0} + end, + %% hipe_arm_pp:pp(Defun1), + {Defun2, Coloring} + = case proplists:get_value(regalloc, Options, coalescing) of + coalescing -> + ra(Defun1, SpillIndex, Options, hipe_coalescing_regalloc); + optimistic -> + ra(Defun1, SpillIndex, Options, hipe_optimistic_regalloc); + graph_color -> + ra(Defun1, SpillIndex, Options, hipe_graph_coloring_regalloc); + linear_scan -> + hipe_arm_ra_ls:ra(Defun1, SpillIndex, Options); + naive -> + hipe_arm_ra_naive:ra(Defun1, Coloring_fp, Options); + _ -> + exit({unknown_regalloc_compiler_option, + proplists:get_value(regalloc,Options)}) + end, + %% hipe_arm_pp:pp(Defun2), + hipe_arm_ra_finalise:finalise(Defun2, Coloring, Coloring_fp). + +ra(Defun, SpillIndex, Options, RegAllocMod) -> + hipe_regalloc_loop:ra(Defun, SpillIndex, Options, RegAllocMod, hipe_arm_specific). diff --git a/lib/hipe/arm/hipe_arm_ra_finalise.erl b/lib/hipe/arm/hipe_arm_ra_finalise.erl new file mode 100644 index 0000000000..9edc362e90 --- /dev/null +++ b/lib/hipe/arm/hipe_arm_ra_finalise.erl @@ -0,0 +1,285 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2005-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +-module(hipe_arm_ra_finalise). +-export([finalise/3]). +-include("hipe_arm.hrl"). + +finalise(Defun, TempMap, _FPMap0=[]) -> + Code = hipe_arm:defun_code(Defun), + {_, SpillLimit} = hipe_arm:defun_var_range(Defun), + Map = mk_ra_map(TempMap, SpillLimit), + NewCode = ra_code(Code, Map, []), + Defun#defun{code=NewCode}. + +ra_code([I|Insns], Map, Accum) -> + ra_code(Insns, Map, [ra_insn(I, Map) | Accum]); +ra_code([], _Map, Accum) -> + lists:reverse(Accum). + +ra_insn(I, Map) -> + case I of + #alu{} -> ra_alu(I, Map); + #cmp{} -> ra_cmp(I, Map); + #load{} -> ra_load(I, Map); + #ldrsb{} -> ra_ldrsb(I, Map); + #move{} -> ra_move(I, Map); + #pseudo_call{} -> ra_pseudo_call(I, Map); + #pseudo_li{} -> ra_pseudo_li(I, Map); + #pseudo_move{} -> ra_pseudo_move(I, Map); + #pseudo_switch{} -> ra_pseudo_switch(I, Map); + #pseudo_tailcall{} -> ra_pseudo_tailcall(I, Map); + #smull{} -> ra_smull(I, Map); + #store{} -> ra_store(I, Map); + _ -> I + end. + +ra_alu(I=#alu{dst=Dst,src=Src,am1=Am1}, Map) -> + NewDst = ra_temp(Dst, Map), + NewSrc = ra_temp(Src, Map), + NewAm1 = ra_am1(Am1, Map), + I#alu{dst=NewDst,src=NewSrc,am1=NewAm1}. + +ra_cmp(I=#cmp{src=Src,am1=Am1}, Map) -> + NewSrc = ra_temp(Src, Map), + NewAm1 = ra_am1(Am1, Map), + I#cmp{src=NewSrc,am1=NewAm1}. + +ra_load(I=#load{dst=Dst,am2=Am2}, Map) -> + NewDst = ra_temp(Dst, Map), + NewAm2 = ra_am2(Am2, Map), + I#load{dst=NewDst,am2=NewAm2}. + +ra_ldrsb(I=#ldrsb{dst=Dst,am3=Am3}, Map) -> + NewDst = ra_temp(Dst, Map), + NewAm3 = ra_am3(Am3, Map), + I#ldrsb{dst=NewDst,am3=NewAm3}. + +ra_move(I=#move{dst=Dst,am1=Am1}, Map) -> + NewDst = ra_temp(Dst, Map), + NewAm1 = ra_am1(Am1, Map), + I#move{dst=NewDst,am1=NewAm1}. + +ra_pseudo_call(I=#pseudo_call{funv=FunV}, Map) -> + NewFunV = ra_funv(FunV, Map), + I#pseudo_call{funv=NewFunV}. + +ra_pseudo_li(I=#pseudo_li{dst=Dst}, Map) -> + NewDst = ra_temp(Dst, Map), + I#pseudo_li{dst=NewDst}. + +ra_pseudo_move(I=#pseudo_move{dst=Dst,src=Src}, Map) -> + NewDst = ra_temp(Dst, Map), + NewSrc = ra_temp(Src, Map), + I#pseudo_move{dst=NewDst,src=NewSrc}. + +ra_pseudo_switch(I=#pseudo_switch{jtab=JTab,index=Index}, Map) -> + NewJTab = ra_temp(JTab, Map), + NewIndex = ra_temp(Index, Map), + I#pseudo_switch{jtab=NewJTab,index=NewIndex}. + +ra_pseudo_tailcall(I=#pseudo_tailcall{funv=FunV,stkargs=StkArgs}, Map) -> + NewFunV = ra_funv(FunV, Map), + NewStkArgs = ra_args(StkArgs, Map), + I#pseudo_tailcall{funv=NewFunV,stkargs=NewStkArgs}. + +ra_smull(I=#smull{dstlo=DstLo,dsthi=DstHi,src1=Src1,src2=Src2}, Map) -> + NewDstLo = ra_temp(DstLo, Map), + NewDstHi = ra_temp(DstHi, Map), + NewSrc1 = ra_temp(Src1, Map), + NewSrc2 = ra_temp(Src2, Map), + I#smull{dstlo=NewDstLo,dsthi=NewDstHi,src1=NewSrc1,src2=NewSrc2}. + +ra_store(I=#store{src=Src,am2=Am2}, Map) -> + NewSrc = ra_temp(Src, Map), + NewAm2 = ra_am2(Am2, Map), + I#store{src=NewSrc,am2=NewAm2}. + +%%% Tailcall stack arguments. + +ra_args([Arg|Args], Map) -> + [ra_temp_or_imm(Arg, Map) | ra_args(Args, Map)]; +ra_args([], _) -> + []. + +ra_temp_or_imm(Arg, Map) -> + case hipe_arm:is_temp(Arg) of + true -> + ra_temp(Arg, Map); + false -> + Arg + end. + +%%% FunV, Am, and Temp operands. + +ra_funv(FunV, Map) -> + case FunV of + #arm_temp{} -> ra_temp(FunV, Map); + _ -> FunV + end. + +ra_am1(Am1, Map) -> + case Am1 of + #arm_temp{} -> + ra_temp(Am1, Map); + {Src2,rrx} -> + NewSrc2 = ra_temp(Src2, Map), + {NewSrc2,rrx}; + {Src2,ShiftOp,ShiftArg} -> + NewSrc2 = ra_temp(Src2, Map), + NewArg = + case ShiftArg of + #arm_temp{} -> ra_temp(ShiftArg, Map); + _ -> ShiftArg + end, + {NewSrc2,ShiftOp,NewArg}; + _ -> + Am1 + end. + +ra_am2(Am2=#am2{src=Src2,offset=Offset}, Map) -> + NewSrc2 = ra_temp(Src2, Map), + NewOffset = ra_am2offset(Offset, Map), + Am2#am2{src=NewSrc2,offset=NewOffset}. + +ra_am2offset(Offset, Map) -> + case Offset of + #arm_temp{} -> + ra_temp(Offset, Map); + {Src3,rrx} -> + NewSrc3 = ra_temp(Src3, Map), + {NewSrc3,rrx}; + {Src3,ShiftOp,Imm5} -> + NewSrc3 = ra_temp(Src3, Map), + {NewSrc3,ShiftOp,Imm5}; + _ -> + Offset + end. + +ra_am3(Am3=#am3{src=Src2,offset=Offset}, Map) -> + NewSrc2 = ra_temp(Src2, Map), + NewOffset = ra_am3offset(Offset, Map), + Am3#am3{src=NewSrc2,offset=NewOffset}. + +ra_am3offset(Offset, Map) -> + case Offset of + #arm_temp{} -> ra_temp(Offset, Map); + _ -> Offset + end. + +-ifdef(notdef). % for FP regalloc +ra_temp_fp(Temp, FPMap) -> + Reg = hipe_arm:temp_reg(Temp), + case hipe_arm:temp_type(Temp) of + 'double' -> + case hipe_arm_registers:is_precoloured_fpr(Reg) of + true -> Temp; + _ -> ra_temp_common(Reg, Temp, FPMap) + end + end. +-endif. + +ra_temp(Temp, Map) -> + Reg = hipe_arm:temp_reg(Temp), + case hipe_arm:temp_type(Temp) of + 'double' -> + exit({?MODULE,ra_temp,Temp}); + _ -> + case hipe_arm_registers:is_precoloured_gpr(Reg) of + true -> Temp; + _ -> ra_temp_common(Reg, Temp, Map) + end + end. + +ra_temp_common(Reg, Temp, Map) -> + case gb_trees:lookup(Reg, Map) of + {value,NewReg} -> Temp#arm_temp{reg=NewReg}; + _ -> Temp + end. + +mk_ra_map(TempMap, SpillLimit) -> + %% Build a partial map from pseudo to reg or spill. + %% Spills are represented as pseudos with indices above SpillLimit. + %% (I'd prefer to use negative indices, but that breaks + %% hipe_arm_registers:is_precoloured/1.) + %% The frame mapping proper is unchanged, since spills look just like + %% ordinary (un-allocated) pseudos. + lists:foldl(fun(MapLet, Map) -> + {Key,Val} = conv_ra_maplet(MapLet, SpillLimit, is_precoloured_gpr), + gb_trees:insert(Key, Val, Map) + end, + gb_trees:empty(), + TempMap). + +conv_ra_maplet(MapLet = {From,To}, SpillLimit, IsPrecoloured) -> + %% From should be a pseudo, or a hard reg mapped to itself. + if is_integer(From), From =< SpillLimit -> + case hipe_arm_registers:IsPrecoloured(From) of + false -> []; + _ -> + case To of + {reg, From} -> []; + _ -> exit({?MODULE,conv_ra_maplet,MapLet}) + end + end; + true -> exit({?MODULE,conv_ra_maplet,MapLet}) + end, + %% end of From check + case To of + {reg, NewReg} -> + %% NewReg should be a hard reg, or a pseudo mapped + %% to itself (formals are handled this way). + if is_integer(NewReg) -> + case hipe_arm_registers:IsPrecoloured(NewReg) of + true -> []; + _ -> if From =:= NewReg -> []; + true -> + exit({?MODULE,conv_ra_maplet,MapLet}) + end + end; + true -> exit({?MODULE,conv_ra_maplet,MapLet}) + end, + %% end of NewReg check + {From, NewReg}; + {spill, SpillIndex} -> + %% SpillIndex should be >= 0. + if is_integer(SpillIndex), SpillIndex >= 0 -> []; + true -> exit({?MODULE,conv_ra_maplet,MapLet}) + end, + %% end of SpillIndex check + ToTempNum = SpillLimit+SpillIndex+1, + MaxTempNum = hipe_gensym:get_var(arm), + if MaxTempNum >= ToTempNum -> ok; + true -> hipe_gensym:set_var(arm, ToTempNum) + end, + {From, ToTempNum}; + _ -> exit({?MODULE,conv_ra_maplet,MapLet}) + end. + +-ifdef(notdef). % for FP regalloc +mk_ra_map_fp(FPMap, SpillLimit) -> + lists:foldl(fun(MapLet, Map) -> + {Key,Val} = conv_ra_maplet(MapLet, SpillLimit, + is_precoloured_fpr), + gb_trees:insert(Key, Val, Map) + end, + gb_trees:empty(), + FPMap). +-endif. diff --git a/lib/hipe/arm/hipe_arm_ra_ls.erl b/lib/hipe/arm/hipe_arm_ra_ls.erl new file mode 100644 index 0000000000..53bfd5b2a3 --- /dev/null +++ b/lib/hipe/arm/hipe_arm_ra_ls.erl @@ -0,0 +1,56 @@ +%%% -*- erlang-indent-level: 2 -*- +%%% +%%% %CopyrightBegin% +%%% +%%% Copyright Ericsson AB 2006-2009. All Rights Reserved. +%%% +%%% The contents of this file are subject to the Erlang Public License, +%%% Version 1.1, (the "License"); you may not use this file except in +%%% compliance with the License. You should have received a copy of the +%%% Erlang Public License along with this software. If not, it can be +%%% retrieved online at http://www.erlang.org/. +%%% +%%% Software distributed under the License is distributed on an "AS IS" +%%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%%% the License for the specific language governing rights and limitations +%%% under the License. +%%% +%%% %CopyrightEnd% +%%% +%%% Linear Scan register allocator for ARM + +-module(hipe_arm_ra_ls). +-export([ra/3]). + +ra(Defun, SpillIndex, Options) -> + NewDefun = Defun, %% hipe_${ARCH}_ra_rename:rename(Defun,Options), + CFG = hipe_arm_cfg:init(NewDefun), + SpillLimit = hipe_arm_specific:number_of_temporaries(CFG), + alloc(NewDefun, SpillIndex, SpillLimit, Options). + +alloc(Defun, SpillIndex, SpillLimit, Options) -> + CFG = hipe_arm_cfg:init(Defun), + {Coloring, _NewSpillIndex} = + regalloc( + CFG, + hipe_arm_registers:allocatable_gpr()-- + [hipe_arm_registers:temp3(), + hipe_arm_registers:temp2(), + hipe_arm_registers:temp1()], + [hipe_arm_cfg:start_label(CFG)], + SpillIndex, SpillLimit, Options, + hipe_arm_specific), + {NewDefun, _DidSpill} = + hipe_arm_ra_postconditions:check_and_rewrite( + Defun, Coloring, 'linearscan'), + TempMap = hipe_temp_map:cols2tuple(Coloring, hipe_arm_specific), + {SpillMap, _NewSpillIndex2} = + hipe_spillmin:stackalloc(CFG, [], SpillIndex, Options, + hipe_arm_specific, TempMap), + Coloring2 = + hipe_spillmin:mapmerge(hipe_temp_map:to_substlist(TempMap), SpillMap), + {NewDefun, Coloring2}. + +regalloc(CFG, PhysRegs, Entrypoints, SpillIndex, DontSpill, Options, Target) -> + hipe_ls_regalloc:regalloc( + CFG, PhysRegs, Entrypoints, SpillIndex, DontSpill, Options, Target). diff --git a/lib/hipe/arm/hipe_arm_ra_naive.erl b/lib/hipe/arm/hipe_arm_ra_naive.erl new file mode 100644 index 0000000000..786895f2ca --- /dev/null +++ b/lib/hipe/arm/hipe_arm_ra_naive.erl @@ -0,0 +1,29 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2005-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +-module(hipe_arm_ra_naive). +-export([ra/3]). + +-include("hipe_arm.hrl"). + +ra(Defun, _Coloring_fp, _Options) -> % -> {Defun, Coloring} + {NewDefun,_DidSpill} = + hipe_arm_ra_postconditions:check_and_rewrite2(Defun, [], 'naive'), + {NewDefun, []}. diff --git a/lib/hipe/arm/hipe_arm_ra_postconditions.erl b/lib/hipe/arm/hipe_arm_ra_postconditions.erl new file mode 100644 index 0000000000..96b0d5733f --- /dev/null +++ b/lib/hipe/arm/hipe_arm_ra_postconditions.erl @@ -0,0 +1,278 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2005-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +-module(hipe_arm_ra_postconditions). + +-export([check_and_rewrite/3, check_and_rewrite2/3]). + +-include("hipe_arm.hrl"). + +check_and_rewrite(Defun, Coloring, Allocator) -> + TempMap = hipe_temp_map:cols2tuple(Coloring, hipe_arm_specific), + check_and_rewrite2(Defun, TempMap, Allocator). + +check_and_rewrite2(Defun, TempMap, Allocator) -> + Strategy = strategy(Allocator), + #defun{code=Code0} = Defun, + {Code1,DidSpill} = do_insns(Code0, TempMap, Strategy, [], false), + VarRange = {0, hipe_gensym:get_var(arm)}, + {Defun#defun{code=Code1, var_range=VarRange}, + DidSpill}. + +strategy(Allocator) -> + case Allocator of + 'normal' -> 'new'; + 'linearscan' -> 'fixed'; + 'naive' -> 'fixed' + end. + +do_insns([I|Insns], TempMap, Strategy, Accum, DidSpill0) -> + {NewIs, DidSpill1} = do_insn(I, TempMap, Strategy), + do_insns(Insns, TempMap, Strategy, lists:reverse(NewIs, Accum), DidSpill0 or DidSpill1); +do_insns([], _TempMap, _Strategy, Accum, DidSpill) -> + {lists:reverse(Accum), DidSpill}. + +do_insn(I, TempMap, Strategy) -> + case I of + #alu{} -> do_alu(I, TempMap, Strategy); + #cmp{} -> do_cmp(I, TempMap, Strategy); + #load{} -> do_load(I, TempMap, Strategy); + #ldrsb{} -> do_ldrsb(I, TempMap, Strategy); + #move{} -> do_move(I, TempMap, Strategy); + #pseudo_call{} -> do_pseudo_call(I, TempMap, Strategy); + #pseudo_li{} -> do_pseudo_li(I, TempMap, Strategy); + #pseudo_move{} -> do_pseudo_move(I, TempMap, Strategy); + #pseudo_switch{} -> do_pseudo_switch(I, TempMap, Strategy); + #pseudo_tailcall{} -> do_pseudo_tailcall(I, TempMap, Strategy); + #smull{} -> do_smull(I, TempMap, Strategy); + #store{} -> do_store(I, TempMap, Strategy); + _ -> {[I], false} + end. + +%%% Fix relevant instruction types. + +do_alu(I=#alu{dst=Dst,src=Src,am1=Am1}, TempMap, Strategy) -> + {FixDst,NewDst,DidSpill1} = fix_dst(Dst, TempMap, Strategy), + {FixSrc,NewSrc,DidSpill2} = fix_src1(Src, TempMap, Strategy), + {FixAm1,NewAm1,DidSpill3} = fix_am1(Am1, TempMap, Strategy), + NewI = I#alu{dst=NewDst,src=NewSrc,am1=NewAm1}, + {FixSrc ++ FixAm1 ++ [NewI | FixDst], DidSpill1 or DidSpill2 or DidSpill3}. + +do_cmp(I=#cmp{src=Src,am1=Am1}, TempMap, Strategy) -> + {FixSrc,NewSrc,DidSpill1} = fix_src1(Src, TempMap, Strategy), + {FixAm1,NewAm1,DidSpill2} = fix_am1(Am1, TempMap, Strategy), + NewI = I#cmp{src=NewSrc,am1=NewAm1}, + {FixSrc ++ FixAm1 ++ [NewI], DidSpill1 or DidSpill2}. + +do_load(I=#load{dst=Dst,am2=Am2}, TempMap, Strategy) -> + {FixDst,NewDst,DidSpill1} = fix_dst(Dst, TempMap, Strategy), + {FixAm2,NewAm2,DidSpill2} = fix_am2(Am2, TempMap, Strategy), + NewI = I#load{dst=NewDst,am2=NewAm2}, + {FixAm2 ++ [NewI | FixDst], DidSpill1 or DidSpill2}. + +do_ldrsb(I=#ldrsb{dst=Dst,am3=Am3}, TempMap, Strategy) -> + {FixDst,NewDst,DidSpill1} = fix_dst(Dst, TempMap, Strategy), + {FixAm3,NewAm3,DidSpill2} = fix_am3(Am3, TempMap, Strategy), + NewI = I#ldrsb{dst=NewDst,am3=NewAm3}, + {FixAm3 ++ [NewI | FixDst], DidSpill1 or DidSpill2}. + +do_move(I=#move{dst=Dst,am1=Am1}, TempMap, Strategy) -> + {FixDst,NewDst,DidSpill1} = fix_dst(Dst, TempMap, Strategy), + {FixAm1,NewAm1,DidSpill2} = fix_am1(Am1, TempMap, Strategy), + NewI = I#move{dst=NewDst,am1=NewAm1}, + {FixAm1 ++ [NewI | FixDst], DidSpill1 or DidSpill2}. + +do_pseudo_call(I=#pseudo_call{funv=FunV}, TempMap, Strategy) -> + {FixFunV,NewFunV,DidSpill} = fix_funv(FunV, TempMap, Strategy), + NewI = I#pseudo_call{funv=NewFunV}, + {FixFunV ++ [NewI], DidSpill}. + +do_pseudo_li(I=#pseudo_li{dst=Dst}, TempMap, Strategy) -> + {FixDst,NewDst,DidSpill} = fix_dst(Dst, TempMap, Strategy), + NewI = I#pseudo_li{dst=NewDst}, + {[NewI | FixDst], DidSpill}. + +do_pseudo_move(I=#pseudo_move{dst=Dst,src=Src}, TempMap, Strategy) -> + %% Either Dst or Src (but not both) may be a pseudo temp. + %% pseudo_move and pseudo_tailcall are special cases: in + %% all other instructions, all temps must be non-pseudos + %% after register allocation. + case temp_is_spilled(Dst, TempMap) of + true -> % Src must not be a pseudo + {FixSrc,NewSrc,DidSpill} = fix_src1(Src, TempMap, Strategy), + NewI = I#pseudo_move{src=NewSrc}, + {FixSrc ++ [NewI], DidSpill}; + _ -> + {[I], false} + end. + +do_pseudo_switch(I=#pseudo_switch{jtab=JTab,index=Index}, TempMap, Strategy) -> + {FixJTab,NewJTab,DidSpill1} = fix_src1(JTab, TempMap, Strategy), + {FixIndex,NewIndex,DidSpill2} = fix_src2(Index, TempMap, Strategy), + NewI = I#pseudo_switch{jtab=NewJTab,index=NewIndex}, + {FixJTab ++ FixIndex ++ [NewI], DidSpill1 or DidSpill2}. + +do_pseudo_tailcall(I=#pseudo_tailcall{funv=FunV}, TempMap, Strategy) -> + {FixFunV,NewFunV,DidSpill} = fix_funv(FunV, TempMap, Strategy), + NewI = I#pseudo_tailcall{funv=NewFunV}, + {FixFunV ++ [NewI], DidSpill}. + +do_smull(I=#smull{dstlo=DstLo,dsthi=DstHi,src1=Src1,src2=Src2}, TempMap, Strategy) -> + %% ARM requires that DstLo, DstHi, and Src1 are distinct. + %% We furthermore require Src1 and Src2 to be different in the fixed strategy. + {FixDstLo,NewDstLo,DidSpill1} = fix_dst(DstLo, TempMap, Strategy), % temp1 + {FixDstHi,NewDstHi,DidSpill2} = fix_dst2(DstHi, TempMap, Strategy), % temp3 + {FixSrc1,NewSrc1,DidSpill3} = fix_src2(Src1, TempMap, Strategy), % temp2 + {FixSrc2,NewSrc2,DidSpill4} = fix_src1(Src2, TempMap, Strategy), % temp1; temp3 would be OK + NewI = I#smull{dstlo=NewDstLo,dsthi=NewDstHi,src1=NewSrc1,src2=NewSrc2}, + {FixSrc1 ++ FixSrc2 ++ [NewI | FixDstLo ++ FixDstHi], + DidSpill1 or DidSpill2 or DidSpill3 or DidSpill4}. + +do_store(I=#store{src=Src,am2=Am2}, TempMap, Strategy) -> + {FixSrc,NewSrc,DidSpill1} = fix_src1(Src, TempMap, Strategy), + {FixAm2,NewAm2,DidSpill2} = fix_am2(Am2, TempMap, Strategy), + NewI = I#store{src=NewSrc,am2=NewAm2}, + {FixSrc ++ FixAm2 ++ [NewI], DidSpill1 or DidSpill2}. + +%%% Fix Dst and Src operands. + +fix_funv(FunV, TempMap, Strategy) -> + case FunV of + #arm_temp{} -> fix_src3(FunV, TempMap, Strategy); + _ -> {[], FunV, false} + end. + +fix_am1(Am1, TempMap, Strategy) -> + case Am1 of + #arm_temp{} -> + fix_src2(Am1, TempMap, Strategy); + {Src2,rrx} -> + {Fix,New,DidSpill} = fix_src2(Src2, TempMap, Strategy), + {Fix, {New,rrx}, DidSpill}; + {Src2,ShiftOp,ShiftArg} -> + {FixSrc2,NewSrc2,DidSpill1} = fix_src2(Src2, TempMap, Strategy), + {FixArg,NewArg,DidSpill2} = + case ShiftArg of + #arm_temp{} -> fix_src3(ShiftArg, TempMap, Strategy); + _ -> {[], ShiftArg, false} + end, + %% order matters: FixArg may clobber temp2/Src2 + {FixArg ++ FixSrc2, {NewSrc2,ShiftOp,NewArg}, DidSpill1 or DidSpill2}; + _ -> {[], Am1, false} + end. + +fix_am2(Am2=#am2{src=Src2,offset=Offset}, TempMap, Strategy) -> + {FixSrc2,NewSrc2,DidSpill1} = fix_src2(Src2, TempMap, Strategy), + {FixOffset,NewOffset,DidSpill2} = fix_am2offset(Offset, TempMap, Strategy), + NewAm2 = Am2#am2{src=NewSrc2,offset=NewOffset}, + %% order matters: FixOffset may clobber temp2/Src2 + {FixOffset ++ FixSrc2, NewAm2, DidSpill1 or DidSpill2}. + +fix_am2offset(Offset, TempMap, Strategy) -> + case Offset of + #arm_temp{} -> + fix_src3(Offset, TempMap, Strategy); + {Src3,rrx} -> + {Fix,New,DidSpill} = fix_src3(Src3, TempMap, Strategy), + {Fix, {New,rrx}, DidSpill}; + {Src3,ShiftOp,Imm5} -> + {Fix,New,DidSpill} = fix_src3(Src3, TempMap, Strategy), + {Fix, {New,ShiftOp,Imm5}, DidSpill}; + _ -> + {[], Offset, false} + end. + +fix_am3(Am3=#am3{src=Src2,offset=Offset}, TempMap, Strategy) -> + {FixSrc2,NewSrc2,DidSpill1} = fix_src2(Src2, TempMap, Strategy), + {FixOffset,NewOffset,DidSpill2} = fix_am3offset(Offset, TempMap, Strategy), + NewAm3 = Am3#am3{src=NewSrc2,offset=NewOffset}, + %% order matters: FixOffset may clobber temp2/Src2 + {FixOffset ++ FixSrc2, NewAm3, DidSpill1 or DidSpill2}. + +fix_am3offset(Offset, TempMap, Strategy) -> + case Offset of + #arm_temp{} -> fix_src3(Offset, TempMap, Strategy); + _ -> {[], Offset, false} + end. + +fix_src1(Src, TempMap, Strategy) -> + fix_src(Src, TempMap, temp1(Strategy)). + +temp1('new') -> []; +temp1('fixed') -> hipe_arm_registers:temp1(). + +fix_src2(Src, TempMap, Strategy) -> + fix_src(Src, TempMap, temp2(Strategy)). + +temp2('new') -> []; +temp2('fixed') -> hipe_arm_registers:temp2(). + +fix_src3(Src, TempMap, Strategy) -> + fix_src(Src, TempMap, temp3(Strategy)). + +temp3('new') -> []; +temp3('fixed') -> hipe_arm_registers:temp3(). + +fix_src(Src, TempMap, RegOpt) -> + case temp_is_spilled(Src, TempMap) of + true -> + NewSrc = clone(Src, RegOpt), + {[hipe_arm:mk_pseudo_move(NewSrc, Src)], + NewSrc, + true}; + _ -> + {[], Src, false} + end. + +fix_dst(Dst, TempMap, Strategy) -> + fix_dst_common(Dst, TempMap, temp1(Strategy)). + +fix_dst2(Dst, TempMap, Strategy) -> % only used for smull's DstHi + fix_dst_common(Dst, TempMap, temp3(Strategy)). + +fix_dst_common(Dst, TempMap, RegOpt) -> + case temp_is_spilled(Dst, TempMap) of + true -> + NewDst = clone(Dst, RegOpt), + {[hipe_arm:mk_pseudo_move(Dst, NewDst)], NewDst, true}; + _ -> + {[], Dst, false} + end. + +%%% Check if an operand is a pseudo-temp. + +temp_is_spilled(Temp, []) -> % special case for naive regalloc + not(hipe_arm:temp_is_precoloured(Temp)); +temp_is_spilled(Temp, TempMap) -> + case hipe_arm:temp_is_allocatable(Temp) of + true -> + Reg = hipe_arm:temp_reg(Temp), + tuple_size(TempMap) > Reg andalso hipe_temp_map:is_spilled(Reg, TempMap); + false -> true + end. + +%%% Make a certain reg into a clone of Temp. + +clone(Temp, RegOpt) -> + Type = hipe_arm:temp_type(Temp), + case RegOpt of + [] -> hipe_arm:mk_new_temp(Type); + Reg -> hipe_arm:mk_temp(Reg, Type) + end. diff --git a/lib/hipe/arm/hipe_arm_registers.erl b/lib/hipe/arm/hipe_arm_registers.erl new file mode 100644 index 0000000000..ff6a163e9c --- /dev/null +++ b/lib/hipe/arm/hipe_arm_registers.erl @@ -0,0 +1,207 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2005-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +-module(hipe_arm_registers). + +-export([reg_name_gpr/1, + first_virtual/0, + is_precoloured_gpr/1, + all_precoloured/0, % for coalescing + return_value/0, + temp1/0, % C callee-save, not parameter, may be allocatable + temp2/0, % not parameter, must not be allocatable (frame) + temp3/0, % not parameter, may be allocatable + heap_pointer/0, + stack_pointer/0, + proc_pointer/0, + lr/0, + pc/0, + %% heap_limit/0, + %% fcalls/0, + allocatable_gpr/0, % for coalescing + is_fixed/1, % for graph coloring + nr_args/0, + arg/1, + args/1, + is_arg/1, % for linear scan + call_clobbered/0, + tailcall_clobbered/0, + live_at_return/0 + ]). + +-include("../rtl/hipe_literals.hrl"). + +-define(R0, 0). +-define(R1, 1). +-define(R2, 2). +-define(R3, 3). +-define(R4, 4). +-define(R5, 5). +-define(R6, 6). +-define(R7, 7). +-define(R8, 8). +-define(R9, 9). +-define(R10, 10). +-define(R11, 11). +-define(R12, 12). +-define(R13, 13). % XXX: see all_precoloured() +-define(R14, 14). +-define(R15, 15). +-define(LAST_PRECOLOURED, 15). % must handle both GPR and FPR ranges + +-define(ARG0, ?R1). +-define(ARG1, ?R2). +-define(ARG2, ?R3). +-define(ARG3, ?R4). +-define(ARG4, ?R5). +-define(ARG5, ?R6). + +-define(TEMP1, ?R8). % stores LR around inc_stack calls, must be C calleE-save +-define(TEMP2, ?R12). +-define(TEMP3, ?R7). + +-define(RETURN_VALUE, ?R0). +-define(HEAP_POINTER, ?R9). +-define(STACK_POINTER, ?R10). +-define(PROC_POINTER, ?R11). + +reg_name_gpr(R) -> [$r | integer_to_list(R)]. + +%%% Must handle both GPR and FPR ranges. +first_virtual() -> ?LAST_PRECOLOURED + 1. + +%%% These two tests have the same implementation, but that's +%%% not something we should cast in stone in the interface. +is_precoloured_gpr(R) -> R =< ?LAST_PRECOLOURED. + +all_precoloured() -> + %% XXX: R13 should be skipped as it never is used anywhere. + %% Unfortunately, gaps in the list of precoloured registers + %% cause the graph_color register allocator to create bogus + %% assignments for those "registers", which in turn causes + %% the "precoloured reg must map to itself" sanity check in + %% the frame module to signal errors. + [ ?R0, ?R1, ?R2, ?R3, ?R4, ?R5, ?R6, ?R7, + ?R8, ?R9, ?R10, ?R11, ?R12, ?R13, ?R14, ?R15]. + +return_value() -> ?RETURN_VALUE. + +temp1() -> ?TEMP1. +temp2() -> ?TEMP2. +temp3() -> ?TEMP3. % for base2 in storeix :-( + +heap_pointer() -> ?HEAP_POINTER. + +stack_pointer() -> ?STACK_POINTER. + +proc_pointer() -> ?PROC_POINTER. + +lr() -> ?R14. + +pc() -> ?R15. + +allocatable_gpr() -> + %% r9, r10, and r11 are fixed global registers. + %% r12 may be used by the frame module for large load/store offsets. + %% r13 is reserved for C. + %% r15 is the PC, and is not usable as a variable. + [ ?R0, ?R1, ?R2, ?R3, ?R4, ?R5, ?R6, ?R7, + ?R8, ?R14]. + +%% Needed for hipe_graph_coloring_regalloc. +%% Presumably true for Reg in AllPrecoloured \ Allocatable. +is_fixed(Reg) -> + case Reg of + ?HEAP_POINTER -> true; + ?STACK_POINTER -> true; + ?PROC_POINTER -> true; + %% The following cases are required for linear scan: + %% it gets confused if it sees a register which is + %% neither allocatable nor global (fixed or one of + %% the scratch registers set aside for linear scan). + ?R15 -> true; + ?R13 -> true; % XXX: see all_precoloured() + ?R12 -> true; + _ -> false + end. + +nr_args() -> ?ARM_NR_ARG_REGS. + +args(Arity) when is_integer(Arity) -> + N = erlang:min(Arity, ?ARM_NR_ARG_REGS), + args(N-1, []). + +args(-1, Rest) -> Rest; +args(I, Rest) -> args(I-1, [arg(I) | Rest]). + +arg(N) -> + if N < ?ARM_NR_ARG_REGS -> + case N of + 0 -> ?ARG0; + 1 -> ?ARG1; + 2 -> ?ARG2; + 3 -> ?ARG3; + 4 -> ?ARG4; + 5 -> ?ARG5; + _ -> exit({?MODULE, arg, N}) + end; + true -> + exit({?MODULE, arg, N}) + end. + +is_arg(R) -> + case R of + ?ARG0 -> ?ARM_NR_ARG_REGS > 0; + ?ARG1 -> ?ARM_NR_ARG_REGS > 1; + ?ARG2 -> ?ARM_NR_ARG_REGS > 2; + ?ARG3 -> ?ARM_NR_ARG_REGS > 3; + ?ARG4 -> ?ARM_NR_ARG_REGS > 4; + ?ARG5 -> ?ARM_NR_ARG_REGS > 5; + _ -> false + end. + +call_clobbered() -> % does the RA strip the type or not? + [{?R0,tagged},{?R0,untagged}, + {?R1,tagged},{?R1,untagged}, + {?R2,tagged},{?R2,untagged}, + {?R3,tagged},{?R3,untagged}, + {?R4,tagged},{?R4,untagged}, + {?R5,tagged},{?R5,untagged}, + {?R6,tagged},{?R6,untagged}, + {?R7,tagged},{?R7,untagged}, + {?R8,tagged},{?R8,untagged}, + %% R9 is fixed (HP) + %% R10 is fixed (NSP) + %% R11 is fixed (P) + {?R12,tagged},{?R12,untagged}, + %% R13 is reserved for C + {?R14,tagged},{?R14,untagged} + %% R15 is the non-allocatable PC + ]. + +tailcall_clobbered() -> % tailcall crapola needs one temp + [{?TEMP1,tagged},{?TEMP1,untagged}]. + +live_at_return() -> + [%%{?LR,untagged}, + {?HEAP_POINTER,untagged}, + {?STACK_POINTER,untagged}, + {?PROC_POINTER,untagged} + ]. diff --git a/lib/hipe/arm/hipe_rtl_to_arm.erl b/lib/hipe/arm/hipe_rtl_to_arm.erl new file mode 100644 index 0000000000..a4dc5db978 --- /dev/null +++ b/lib/hipe/arm/hipe_rtl_to_arm.erl @@ -0,0 +1,836 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2005-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +-module(hipe_rtl_to_arm). +-export([translate/1]). + +-include("../rtl/hipe_rtl.hrl"). + +translate(RTL) -> + hipe_gensym:init(arm), + hipe_gensym:set_var(arm, hipe_arm_registers:first_virtual()), + hipe_gensym:set_label(arm, hipe_gensym:get_label(rtl)), + Map0 = vmap_empty(), + {Formals, Map1} = conv_formals(hipe_rtl:rtl_params(RTL), Map0), + OldData = hipe_rtl:rtl_data(RTL), + {Code0, NewData} = conv_insn_list(hipe_rtl:rtl_code(RTL), Map1, OldData), + {RegFormals,_} = split_args(Formals), + Code = + case RegFormals of + [] -> Code0; + _ -> [hipe_arm:mk_label(hipe_gensym:get_next_label(arm)) | + move_formals(RegFormals, Code0)] + end, + IsClosure = hipe_rtl:rtl_is_closure(RTL), + IsLeaf = hipe_rtl:rtl_is_leaf(RTL), + hipe_arm:mk_defun(hipe_rtl:rtl_fun(RTL), + Formals, + IsClosure, + IsLeaf, + Code, + NewData, + [], + []). + +conv_insn_list([H|T], Map, Data) -> + {NewH, NewMap, NewData1} = conv_insn(H, Map, Data), + %% io:format("~w \n ==>\n ~w\n- - - - - - - - -\n",[H,NewH]), + {NewT, NewData2} = conv_insn_list(T, NewMap, NewData1), + {NewH ++ NewT, NewData2}; +conv_insn_list([], _, Data) -> + {[], Data}. + +conv_insn(I, Map, Data) -> + case I of + #alu{} -> conv_alu(I, Map, Data); + #alub{} -> conv_alub(I, Map, Data); + #branch{} -> conv_branch(I, Map, Data); + #call{} -> conv_call(I, Map, Data); + #comment{} -> conv_comment(I, Map, Data); + #enter{} -> conv_enter(I, Map, Data); + #goto{} -> conv_goto(I, Map, Data); + #label{} -> conv_label(I, Map, Data); + #load{} -> conv_load(I, Map, Data); + #load_address{} -> conv_load_address(I, Map, Data); + #load_atom{} -> conv_load_atom(I, Map, Data); + #move{} -> conv_move(I, Map, Data); + #return{} -> conv_return(I, Map, Data); + #store{} -> conv_store(I, Map, Data); + #switch{} -> conv_switch(I, Map, Data); + _ -> exit({?MODULE,conv_insn,I}) + end. + +conv_alu(I, Map, Data) -> + %% dst = src1 aluop src2 + {Dst, Map0} = conv_dst(hipe_rtl:alu_dst(I), Map), + {Src1, Map1} = conv_src(hipe_rtl:alu_src1(I), Map0), + {Src2, Map2} = conv_src(hipe_rtl:alu_src2(I), Map1), + RtlAluOp = hipe_rtl:alu_op(I), + S = false, + I2 = mk_alu(S, Dst, Src1, RtlAluOp, Src2), + {I2, Map2, Data}. + +conv_shift(RtlShiftOp) -> + case RtlShiftOp of + 'sll' -> 'lsl'; + 'srl' -> 'lsr'; + 'sra' -> 'asr' + end. + +conv_arith(RtlAluOp) -> % RtlAluOp \ RtlShiftOp -> ArmArithOp + case RtlAluOp of + 'add' -> 'add'; + 'sub' -> 'sub'; + 'mul' -> 'mul'; + 'or' -> 'orr'; + 'and' -> 'and'; + 'xor' -> 'eor' + end. + +commute_arithop(ArithOp) -> + case ArithOp of + 'sub' -> 'rsb'; + _ -> ArithOp + end. + +mk_alu(S, Dst, Src1, RtlAluOp, Src2) -> + case hipe_rtl:is_shift_op(RtlAluOp) of + true -> + mk_shift(S, Dst, Src1, conv_shift(RtlAluOp), Src2); + false -> + mk_arith(S, Dst, Src1, conv_arith(RtlAluOp), Src2) + end. + +mk_shift(S, Dst, Src1, ShiftOp, Src2) -> + case hipe_arm:is_temp(Src1) of + true -> + case hipe_arm:is_temp(Src2) of + true -> + mk_shift_rr(S, Dst, Src1, ShiftOp, Src2); + _ -> + mk_shift_ri(S, Dst, Src1, ShiftOp, Src2) + end; + _ -> + case hipe_arm:is_temp(Src2) of + true -> + mk_shift_ir(S, Dst, Src1, ShiftOp, Src2); + _ -> + mk_shift_ii(S, Dst, Src1, ShiftOp, Src2) + end + end. + +mk_shift_ii(S, Dst, Src1, ShiftOp, Src2) -> + io:format("~w: RTL alu with two immediates\n", [?MODULE]), + Tmp = new_untagged_temp(), + mk_li(Tmp, Src1, + mk_shift_ri(S, Dst, Tmp, ShiftOp, Src2)). + +mk_shift_ir(S, Dst, Src1, ShiftOp, Src2) -> + Tmp = new_untagged_temp(), + mk_li(Tmp, Src1, + mk_shift_rr(S, Dst, Tmp, ShiftOp, Src2)). + +mk_shift_ri(S, Dst, Src1, ShiftOp, Src2) when is_integer(Src2) -> + if Src2 >= 0, Src2 < 32 -> ok; + true -> io:format("~w: excessive immediate shift ~w\n", [?MODULE,Src2]) + end, + Am1 = {Src1,ShiftOp,Src2}, + [hipe_arm:mk_move(S, Dst, Am1)]. + +mk_shift_rr(S, Dst, Src1, ShiftOp, Src2) -> + Am1 = {Src1,ShiftOp,Src2}, + [hipe_arm:mk_move(S, Dst, Am1)]. + +mk_arith(S, Dst, Src1, ArithOp, Src2) -> + case hipe_arm:is_temp(Src1) of + true -> + case hipe_arm:is_temp(Src2) of + true -> + mk_arith_rr(S, Dst, Src1, ArithOp, Src2); + _ -> + mk_arith_ri(S, Dst, Src1, ArithOp, Src2) + end; + _ -> + case hipe_arm:is_temp(Src2) of + true -> + mk_arith_ir(S, Dst, Src1, ArithOp, Src2); + _ -> + mk_arith_ii(S, Dst, Src1, ArithOp, Src2) + end + end. + +mk_arith_ii(S, Dst, Src1, ArithOp, Src2) -> + io:format("~w: RTL alu with two immediates\n", [?MODULE]), + Tmp = new_untagged_temp(), + mk_li(Tmp, Src1, + mk_arith_ri(S, Dst, Tmp, ArithOp, Src2)). + +mk_arith_ir(S, Dst, Src1, ArithOp, Src2) -> + mk_arith_ri(S, Dst, Src2, commute_arithop(ArithOp), Src1). + +mk_arith_ri(S, Dst, Src1, ArithOp, Src2) -> + case ArithOp of + 'mul' -> % mul/smull only take reg/reg operands + Tmp = new_untagged_temp(), + mk_li(Tmp, Src2, + mk_arith_rr(S, Dst, Src1, ArithOp, Tmp)); + _ -> % add/sub/orr/and/eor have reg/am1 operands + {FixAm1,NewArithOp,Am1} = fix_aluop_imm(ArithOp, Src2), + FixAm1 ++ [hipe_arm:mk_alu(NewArithOp, S, Dst, Src1, Am1)] + end. + +mk_arith_rr(S, Dst, Src1, ArithOp, Src2) -> + case {ArithOp,S} of + {'mul',true} -> + %% To check for overflow in 32x32->32 multiplication: + %% smull Dst,TmpHi,Src1,Src2 + %% mov TmpSign,Dst,ASR #31 + %% cmp TmpSign,TmpHi + %% [bne OverflowLabel] + TmpHi = new_untagged_temp(), + TmpSign = new_untagged_temp(), + [hipe_arm:mk_smull(Dst, TmpHi, Src1, Src2), + hipe_arm:mk_move(TmpSign, {Dst,'asr',31}), + hipe_arm:mk_cmp('cmp', TmpSign, TmpHi)]; + _ -> + [hipe_arm:mk_alu(ArithOp, S, Dst, Src1, Src2)] + end. + +fix_aluop_imm(AluOp, Imm) -> % {FixAm1,NewAluOp,Am1} + case hipe_arm:try_aluop_imm(AluOp, Imm) of + {NewAluOp,Am1} -> {[], NewAluOp, Am1}; + [] -> + Tmp = new_untagged_temp(), + {mk_li(Tmp, Imm), AluOp, Tmp} + end. + +conv_alub(I, Map, Data) -> + %% dst = src1 aluop src2; if COND goto label + {Dst, Map0} = conv_dst(hipe_rtl:alub_dst(I), Map), + {Src1, Map1} = conv_src(hipe_rtl:alub_src1(I), Map0), + {Src2, Map2} = conv_src(hipe_rtl:alub_src2(I), Map1), + RtlAluOp = hipe_rtl:alub_op(I), + Cond0 = conv_alub_cond(RtlAluOp, hipe_rtl:alub_cond(I)), + Cond = + case {RtlAluOp,Cond0} of + {'mul','vs'} -> 'ne'; % overflow becomes not-equal + {'mul','vc'} -> 'eq'; % no-overflow becomes equal + {'mul',_} -> exit({?MODULE,I}); + {_,_} -> Cond0 + end, + I2 = mk_pseudo_bc( + Cond, + hipe_rtl:alub_true_label(I), + hipe_rtl:alub_false_label(I), + hipe_rtl:alub_pred(I)), + S = true, + I1 = mk_alu(S, Dst, Src1, RtlAluOp, Src2), + {I1 ++ I2, Map2, Data}. + +conv_branch(I, Map, Data) -> + %% = src1 - src2; if COND goto label + {Src1, Map0} = conv_src(hipe_rtl:branch_src1(I), Map), + {Src2, Map1} = conv_src(hipe_rtl:branch_src2(I), Map0), + Cond = conv_branch_cond(hipe_rtl:branch_cond(I)), + I2 = mk_branch(Src1, Cond, Src2, + hipe_rtl:branch_true_label(I), + hipe_rtl:branch_false_label(I), + hipe_rtl:branch_pred(I)), + {I2, Map1, Data}. + +mk_branch(Src1, Cond, Src2, TrueLab, FalseLab, Pred) -> + case hipe_arm:is_temp(Src1) of + true -> + case hipe_arm:is_temp(Src2) of + true -> + mk_branch_rr(Src1, Src2, Cond, TrueLab, FalseLab, Pred); + _ -> + mk_branch_ri(Src1, Cond, Src2, TrueLab, FalseLab, Pred) + end; + _ -> + case hipe_arm:is_temp(Src2) of + true -> + NewCond = commute_cond(Cond), + mk_branch_ri(Src2, NewCond, Src1, TrueLab, FalseLab, Pred); + _ -> + mk_branch_ii(Src1, Cond, Src2, TrueLab, FalseLab, Pred) + end + end. + +mk_branch_ii(Imm1, Cond, Imm2, TrueLab, FalseLab, Pred) -> + io:format("~w: RTL branch with two immediates\n", [?MODULE]), + Tmp = new_untagged_temp(), + mk_li(Tmp, Imm1, + mk_branch_ri(Tmp, Cond, Imm2, + TrueLab, FalseLab, Pred)). + +mk_branch_ri(Src, Cond, Imm, TrueLab, FalseLab, Pred) -> + {FixAm1,NewCmpOp,Am1} = fix_aluop_imm('cmp', Imm), + FixAm1 ++ mk_cmp_bc(NewCmpOp, Src, Am1, Cond, TrueLab, FalseLab, Pred). + +mk_branch_rr(Src1, Src2, Cond, TrueLab, FalseLab, Pred) -> + mk_cmp_bc('cmp', Src1, Src2, Cond, TrueLab, FalseLab, Pred). + +mk_cmp_bc(CmpOp, Src, Am1, Cond, TrueLab, FalseLab, Pred) -> + [hipe_arm:mk_cmp(CmpOp, Src, Am1) | + mk_pseudo_bc(Cond, TrueLab, FalseLab, Pred)]. + +conv_call(I, Map, Data) -> + {Args, Map0} = conv_src_list(hipe_rtl:call_arglist(I), Map), + {Dsts, Map1} = conv_dst_list(hipe_rtl:call_dstlist(I), Map0), + {Fun, Map2} = conv_fun(hipe_rtl:call_fun(I), Map1), + ContLab = hipe_rtl:call_continuation(I), + ExnLab = hipe_rtl:call_fail(I), + Linkage = hipe_rtl:call_type(I), + I2 = mk_call(Dsts, Fun, Args, ContLab, ExnLab, Linkage), + {I2, Map2, Data}. + +mk_call(Dsts, Fun, Args, ContLab, ExnLab, Linkage) -> + case hipe_arm:is_prim(Fun) of + true -> + mk_primop_call(Dsts, Fun, Args, ContLab, ExnLab, Linkage); + false -> + mk_general_call(Dsts, Fun, Args, ContLab, ExnLab, Linkage) + end. + +mk_primop_call(Dsts, Prim, Args, ContLab, ExnLab, Linkage) -> + case hipe_arm:prim_prim(Prim) of + %% no ARM-specific primops defined yet + _ -> + mk_general_call(Dsts, Prim, Args, ContLab, ExnLab, Linkage) + end. + +mk_general_call(Dsts, Fun, Args, ContLab, ExnLab, Linkage) -> + %% The backend does not support pseudo_calls without a + %% continuation label, so we make sure each call has one. + {RealContLab, Tail} = + case mk_call_results(Dsts) of + [] -> + %% Avoid consing up a dummy basic block if the moves list + %% is empty, as is typical for calls to suspend/0. + %% This should be subsumed by a general "optimise the CFG" + %% module, and could probably be removed. + case ContLab of + [] -> + NewContLab = hipe_gensym:get_next_label(arm), + {NewContLab, [hipe_arm:mk_label(NewContLab)]}; + _ -> + {ContLab, []} + end; + Moves -> + %% Change the call to continue at a new basic block. + %% In this block move the result registers to the Dsts, + %% then continue at the call's original continuation. + NewContLab = hipe_gensym:get_next_label(arm), + case ContLab of + [] -> + %% This is just a fallthrough + %% No jump back after the moves. + {NewContLab, + [hipe_arm:mk_label(NewContLab) | + Moves]}; + _ -> + %% The call has a continuation. Jump to it. + {NewContLab, + [hipe_arm:mk_label(NewContLab) | + Moves ++ + [hipe_arm:mk_b_label(ContLab)]]} + end + end, + SDesc = hipe_arm:mk_sdesc(ExnLab, 0, length(Args), {}), + CallInsn = hipe_arm:mk_pseudo_call(Fun, SDesc, RealContLab, Linkage), + {RegArgs,StkArgs} = split_args(Args), + mk_push_args(StkArgs, move_actuals(RegArgs, [CallInsn | Tail])). + +mk_call_results([]) -> + []; +mk_call_results([Dst]) -> + RV = hipe_arm:mk_temp(hipe_arm_registers:return_value(), 'tagged'), + [hipe_arm:mk_pseudo_move(Dst, RV)]; +mk_call_results(Dsts) -> + exit({?MODULE,mk_call_results,Dsts}). + +mk_push_args(StkArgs, Tail) -> + case length(StkArgs) of + 0 -> + Tail; + NrStkArgs -> + [hipe_arm:mk_pseudo_call_prepare(NrStkArgs) | + mk_store_args(StkArgs, NrStkArgs * word_size(), Tail)] + end. + +mk_store_args([Arg|Args], PrevOffset, Tail) -> + Offset = PrevOffset - word_size(), + {Src,FixSrc} = + case hipe_arm:is_temp(Arg) of + true -> + {Arg, []}; + _ -> + Tmp = new_tagged_temp(), + {Tmp, mk_li(Tmp, Arg)} + end, + NewTail = hipe_arm:mk_store('str', Src, mk_sp(), Offset, 'new', Tail), + mk_store_args(Args, Offset, FixSrc ++ NewTail); +mk_store_args([], _, Tail) -> + Tail. + +conv_comment(I, Map, Data) -> + I2 = [hipe_arm:mk_comment(hipe_rtl:comment_text(I))], + {I2, Map, Data}. + +conv_enter(I, Map, Data) -> + {Args, Map0} = conv_src_list(hipe_rtl:enter_arglist(I), Map), + {Fun, Map1} = conv_fun(hipe_rtl:enter_fun(I), Map0), + I2 = mk_enter(Fun, Args, hipe_rtl:enter_type(I)), + {I2, Map1, Data}. + +mk_enter(Fun, Args, Linkage) -> + Arity = length(Args), + {RegArgs,StkArgs} = split_args(Args), + move_actuals(RegArgs, + [hipe_arm:mk_pseudo_tailcall_prepare(), + hipe_arm:mk_pseudo_tailcall(Fun, Arity, StkArgs, Linkage)]). + +conv_goto(I, Map, Data) -> + I2 = [hipe_arm:mk_b_label(hipe_rtl:goto_label(I))], + {I2, Map, Data}. + +conv_label(I, Map, Data) -> + I2 = [hipe_arm:mk_label(hipe_rtl:label_name(I))], + {I2, Map, Data}. + +conv_load(I, Map, Data) -> + {Dst, Map0} = conv_dst(hipe_rtl:load_dst(I), Map), + {Base1, Map1} = conv_src(hipe_rtl:load_src(I), Map0), + {Base2, Map2} = conv_src(hipe_rtl:load_offset(I), Map1), + LoadSize = hipe_rtl:load_size(I), + LoadSign = hipe_rtl:load_sign(I), + I2 = mk_load(Dst, Base1, Base2, LoadSize, LoadSign), + {I2, Map2, Data}. + +mk_load(Dst, Base1, Base2, LoadSize, LoadSign) -> + case {LoadSize,LoadSign} of + {byte,signed} -> + case hipe_arm:is_temp(Base1) of + true -> + case hipe_arm:is_temp(Base2) of + true -> + mk_ldrsb_rr(Dst, Base1, Base2); + _ -> + mk_ldrsb_ri(Dst, Base1, Base2) + end; + _ -> + case hipe_arm:is_temp(Base2) of + true -> + mk_ldrsb_ri(Dst, Base2, Base1); + _ -> + mk_ldrsb_ii(Dst, Base1, Base2) + end + end; + _ -> + LdOp = + case LoadSize of + byte -> 'ldrb'; + int32 -> 'ldr'; + word -> 'ldr' + end, + case hipe_arm:is_temp(Base1) of + true -> + case hipe_arm:is_temp(Base2) of + true -> + mk_load_rr(Dst, Base1, Base2, LdOp); + _ -> + mk_load_ri(Dst, Base1, Base2, LdOp) + end; + _ -> + case hipe_arm:is_temp(Base2) of + true -> + mk_load_ri(Dst, Base2, Base1, LdOp); + _ -> + mk_load_ii(Dst, Base1, Base2, LdOp) + end + end + end. + +mk_load_ii(Dst, Base1, Base2, LdOp) -> + io:format("~w: RTL load with two immediates\n", [?MODULE]), + Tmp = new_untagged_temp(), + mk_li(Tmp, Base1, + mk_load_ri(Dst, Tmp, Base2, LdOp)). + +mk_load_ri(Dst, Base, Offset, LdOp) -> + hipe_arm:mk_load(LdOp, Dst, Base, Offset, 'new', []). + +mk_load_rr(Dst, Base1, Base2, LdOp) -> + Am2 = hipe_arm:mk_am2(Base1, '+', Base2), + [hipe_arm:mk_load(LdOp, Dst, Am2)]. + +mk_ldrsb_ii(Dst, Base1, Base2) -> + io:format("~w: RTL load signed byte with two immediates\n", [?MODULE]), + Tmp = new_untagged_temp(), + mk_li(Tmp, Base1, + mk_ldrsb_ri(Dst, Tmp, Base2)). + +mk_ldrsb_ri(Dst, Base, Offset) when is_integer(Offset) -> + {Sign,AbsOffset} = + if Offset < 0 -> {'-', -Offset}; + true -> {'+', Offset} + end, + if AbsOffset =< 255 -> + Am3 = hipe_arm:mk_am3(Base, Sign, AbsOffset), + [hipe_arm:mk_ldrsb(Dst, Am3)]; + true -> + Index = new_untagged_temp(), + Am3 = hipe_arm:mk_am3(Base, Sign, Index), + mk_li(Index, AbsOffset, + [hipe_arm:mk_ldrsb(Dst, Am3)]) + end. + +mk_ldrsb_rr(Dst, Base1, Base2) -> + Am3 = hipe_arm:mk_am3(Base1, '+', Base2), + [hipe_arm:mk_ldrsb(Dst, Am3)]. + +conv_load_address(I, Map, Data) -> + {Dst, Map0} = conv_dst(hipe_rtl:load_address_dst(I), Map), + Addr = hipe_rtl:load_address_addr(I), + Type = hipe_rtl:load_address_type(I), + Src = {Addr,Type}, + I2 = [hipe_arm:mk_pseudo_li(Dst, Src)], + {I2, Map0, Data}. + +conv_load_atom(I, Map, Data) -> + {Dst, Map0} = conv_dst(hipe_rtl:load_atom_dst(I), Map), + Src = hipe_rtl:load_atom_atom(I), + I2 = [hipe_arm:mk_pseudo_li(Dst, Src)], + {I2, Map0, Data}. + +conv_move(I, Map, Data) -> + {Dst, Map0} = conv_dst(hipe_rtl:move_dst(I), Map), + {Src, Map1} = conv_src(hipe_rtl:move_src(I), Map0), + I2 = mk_move(Dst, Src, []), + {I2, Map1, Data}. + +mk_move(Dst, Src, Tail) -> + case hipe_arm:is_temp(Src) of + true -> [hipe_arm:mk_pseudo_move(Dst, Src) | Tail]; + _ -> mk_li(Dst, Src, Tail) + end. + +conv_return(I, Map, Data) -> + %% TODO: multiple-value returns + {[Arg], Map0} = conv_src_list(hipe_rtl:return_varlist(I), Map), + I2 = mk_move(mk_rv(), Arg, + [hipe_arm:mk_pseudo_blr()]), + {I2, Map0, Data}. + +conv_store(I, Map, Data) -> + {Base, Map0} = conv_dst(hipe_rtl:store_base(I), Map), + {Src, Map1} = conv_src(hipe_rtl:store_src(I), Map0), + {Offset, Map2} = conv_src(hipe_rtl:store_offset(I), Map1), + StoreSize = hipe_rtl:store_size(I), + I2 = mk_store(Src, Base, Offset, StoreSize), + {I2, Map2, Data}. + +mk_store(Src, Base, Offset, StoreSize) -> + StOp = + case StoreSize of + byte -> 'strb'; + int32 -> 'str'; + word -> 'str' + end, + case hipe_arm:is_temp(Src) of + true -> + mk_store2(Src, Base, Offset, StOp); + _ -> + Tmp = new_untagged_temp(), + mk_li(Tmp, Src, + mk_store2(Tmp, Base, Offset, StOp)) + end. + +mk_store2(Src, Base, Offset, StOp) -> + case hipe_arm:is_temp(Offset) of + true -> + mk_store_rr(Src, Base, Offset, StOp); + _ -> + mk_store_ri(Src, Base, Offset, StOp) + end. + +mk_store_ri(Src, Base, Offset, StOp) -> + hipe_arm:mk_store(StOp, Src, Base, Offset, 'new', []). + +mk_store_rr(Src, Base, Index, StOp) -> + Am2 = hipe_arm:mk_am2(Base, '+', Index), + [hipe_arm:mk_store(StOp, Src, Am2)]. + +conv_switch(I, Map, Data) -> + Labels = hipe_rtl:switch_labels(I), + LMap = [{label,L} || L <- Labels], + {NewData, JTabLab} = + case hipe_rtl:switch_sort_order(I) of + [] -> + hipe_consttab:insert_block(Data, word, LMap); + SortOrder -> + hipe_consttab:insert_sorted_block( + Data, word, LMap, SortOrder) + end, + %% no immediates allowed here + {IndexR, Map1} = conv_dst(hipe_rtl:switch_src(I), Map), + JTabR = new_untagged_temp(), + I2 = + [hipe_arm:mk_pseudo_li(JTabR, {JTabLab,constant}), + hipe_arm:mk_pseudo_switch(JTabR, IndexR, Labels)], + {I2, Map1, NewData}. + +%%% Create a conditional branch. + +mk_pseudo_bc(Cond, TrueLabel, FalseLabel, Pred) -> + [hipe_arm:mk_pseudo_bc(Cond, TrueLabel, FalseLabel, Pred)]. + +%%% Load an integer constant into a register. + +mk_li(Dst, Value) -> mk_li(Dst, Value, []). + +mk_li(Dst, Value, Tail) -> + hipe_arm:mk_li(Dst, Value, Tail). + +%%% Convert an RTL condition code. + +conv_alub_cond(RtlAluOp, Cond) -> % may be unsigned, depends on aluop + %% Note: ARM has a non-standard definition of the Carry flag: + %% 'cmp', 'sub', and 'rsb' define Carry as the NEGATION of Borrow. + %% This means that the mapping between C/Z combinations and + %% conditions like "lower" and "higher" becomes non-standard. + %% (See conv_branch_cond/1 which maps ltu to lo/carry-clear, + %% while x86 maps ltu to b/carry-set.) + %% Here in conv_alub_cond/2 it means that the mapping of unsigned + %% conditions also has to consider the alu operator, since e.g. + %% 'add' and 'sub' behave differently with regard to Carry. + case {RtlAluOp, Cond} of % handle allowed alub unsigned conditions + {'add', 'ltu'} -> 'hs'; % add+ltu == unsigned overflow == carry set == hs + %% add more cases when needed + _ -> conv_cond(Cond) + end. + +conv_cond(Cond) -> % only signed + case Cond of + eq -> 'eq'; + ne -> 'ne'; + gt -> 'gt'; + ge -> 'ge'; + lt -> 'lt'; + le -> 'le'; + overflow -> 'vs'; + not_overflow -> 'vc' + end. + +conv_branch_cond(Cond) -> % may be unsigned + case Cond of + gtu -> 'hi'; + geu -> 'hs'; + ltu -> 'lo'; + leu -> 'ls'; + _ -> conv_cond(Cond) + end. + +%%% Commute an ARM condition code. + +commute_cond(Cond) -> % if x Cond y, then y commute_cond(Cond) x + case Cond of + 'eq' -> 'eq'; % ==, == + 'ne' -> 'ne'; % !=, != + 'gt' -> 'lt'; % >, < + 'ge' -> 'le'; % >=, <= + 'lt' -> 'gt'; % <, > + 'le' -> 'ge'; % <=, >= + 'hi' -> 'lo'; % >u, 'ls'; % >=u, <=u + 'lo' -> 'hi'; % u + 'ls' -> 'hs'; % <=u, >=u + %% vs/vc: n/a + _ -> exit({?MODULE,commute_cond,Cond}) + end. + +%%% Split a list of formal or actual parameters into the +%%% part passed in registers and the part passed on the stack. +%%% The parameters passed in registers are also tagged with +%%% the corresponding registers. + +split_args(Args) -> + split_args(0, hipe_arm_registers:nr_args(), Args, []). + +split_args(I, N, [Arg|Args], RegArgs) when I < N -> + Reg = hipe_arm_registers:arg(I), + Temp = hipe_arm:mk_temp(Reg, 'tagged'), + split_args(I+1, N, Args, [{Arg,Temp}|RegArgs]); +split_args(_, _, StkArgs, RegArgs) -> + {RegArgs, StkArgs}. + +%%% Convert a list of actual parameters passed in +%%% registers (from split_args/1) to a list of moves. + +move_actuals([{Src,Dst}|Actuals], Rest) -> + move_actuals(Actuals, mk_move(Dst, Src, Rest)); +move_actuals([], Rest) -> + Rest. + +%%% Convert a list of formal parameters passed in +%%% registers (from split_args/1) to a list of moves. + +move_formals([{Dst,Src}|Formals], Rest) -> + move_formals(Formals, [hipe_arm:mk_pseudo_move(Dst, Src) | Rest]); +move_formals([], Rest) -> + Rest. + +%%% Convert a 'fun' operand (MFA, prim, or temp) + +conv_fun(Fun, Map) -> + case hipe_rtl:is_var(Fun) of + true -> + conv_dst(Fun, Map); + false -> + case hipe_rtl:is_reg(Fun) of + true -> + conv_dst(Fun, Map); + false -> + if is_atom(Fun) -> + {hipe_arm:mk_prim(Fun), Map}; + true -> + {conv_mfa(Fun), Map} + end + end + end. + +%%% Convert an MFA operand. + +conv_mfa({M,F,A}) when is_atom(M), is_atom(F), is_integer(A) -> + hipe_arm:mk_mfa(M, F, A). + +%%% Convert an RTL source operand (imm/var/reg). +%%% Returns a temp or a naked integer. + +conv_src(Opnd, Map) -> + case hipe_rtl:is_imm(Opnd) of + true -> + Value = hipe_rtl:imm_value(Opnd), + if is_integer(Value) -> + {Value, Map} + end; + false -> + conv_dst(Opnd, Map) + end. + +conv_src_list([O|Os], Map) -> + {V, Map1} = conv_src(O, Map), + {Vs, Map2} = conv_src_list(Os, Map1), + {[V|Vs], Map2}; +conv_src_list([], Map) -> + {[], Map}. + +%%% Convert an RTL destination operand (var/reg). + +conv_dst(Opnd, Map) -> + {Name, Type} = + case hipe_rtl:is_var(Opnd) of + true -> + {hipe_rtl:var_index(Opnd), 'tagged'}; + false -> + case hipe_rtl:is_fpreg(Opnd) of + true -> + {hipe_rtl:fpreg_index(Opnd), 'double'}; + false -> + {hipe_rtl:reg_index(Opnd), 'untagged'} + end + end, + IsPrecoloured = + case Type of + 'double' -> false; %hipe_arm_registers:is_precoloured_fpr(Name); + _ -> hipe_arm_registers:is_precoloured_gpr(Name) + end, + case IsPrecoloured of + true -> + {hipe_arm:mk_temp(Name, Type), Map}; + false -> + case vmap_lookup(Map, Opnd) of + {value, NewTemp} -> + {NewTemp, Map}; + _ -> + NewTemp = hipe_arm:mk_new_temp(Type), + {NewTemp, vmap_bind(Map, Opnd, NewTemp)} + end + end. + +conv_dst_list([O|Os], Map) -> + {Dst, Map1} = conv_dst(O, Map), + {Dsts, Map2} = conv_dst_list(Os, Map1), + {[Dst|Dsts], Map2}; +conv_dst_list([], Map) -> + {[], Map}. + +conv_formals(Os, Map) -> + conv_formals(hipe_arm_registers:nr_args(), Os, Map, []). + +conv_formals(N, [O|Os], Map, Res) -> + Type = + case hipe_rtl:is_var(O) of + true -> 'tagged'; + _ -> 'untagged' + end, + Dst = + if N > 0 -> hipe_arm:mk_new_temp(Type); % allocatable + true -> hipe_arm:mk_new_nonallocatable_temp(Type) + end, + Map1 = vmap_bind(Map, O, Dst), + conv_formals(N-1, Os, Map1, [Dst|Res]); +conv_formals(_, [], Map, Res) -> + {lists:reverse(Res), Map}. + +%%% Create a temp representing the stack pointer register. + +mk_sp() -> + hipe_arm:mk_temp(hipe_arm_registers:stack_pointer(), 'untagged'). + +%%% Create a temp representing the return value register. + +mk_rv() -> + hipe_arm:mk_temp(hipe_arm_registers:return_value(), 'tagged'). + +%%% new_untagged_temp -- conjure up an untagged scratch reg + +new_untagged_temp() -> + hipe_arm:mk_new_temp('untagged'). + +%%% new_tagged_temp -- conjure up a tagged scratch reg + +new_tagged_temp() -> + hipe_arm:mk_new_temp('tagged'). + +%%% Map from RTL var/reg operands to temps. + +vmap_empty() -> + gb_trees:empty(). + +vmap_lookup(Map, Key) -> + gb_trees:lookup(Key, Map). + +vmap_bind(Map, Key, Val) -> + gb_trees:insert(Key, Val, Map). + +word_size() -> + 4. diff --git a/lib/hipe/cerl/Makefile b/lib/hipe/cerl/Makefile new file mode 100644 index 0000000000..fb7ca1153b --- /dev/null +++ b/lib/hipe/cerl/Makefile @@ -0,0 +1,107 @@ +# +# %CopyrightBegin% +# +# Copyright Ericsson AB 2003-2009. All Rights Reserved. +# +# The contents of this file are subject to the Erlang Public License, +# Version 1.1, (the "License"); you may not use this file except in +# compliance with the License. You should have received a copy of the +# Erlang Public License along with this software. If not, it can be +# retrieved online at http://www.erlang.org/. +# +# Software distributed under the License is distributed on an "AS IS" +# basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +# the License for the specific language governing rights and limitations +# under the License. +# +# %CopyrightEnd% +# + +ifndef EBIN +EBIN = ../ebin +endif + +ifndef DOCS +DOCS = ../doc +endif + +include $(ERL_TOP)/make/target.mk +include $(ERL_TOP)/make/$(TARGET)/otp.mk + +# ---------------------------------------------------- +# Application version +# ---------------------------------------------------- +include ../vsn.mk +VSN=$(HIPE_VSN) + +# ---------------------------------------------------- +# Release directory specification +# ---------------------------------------------------- +RELSYSDIR = $(RELEASE_PATH)/lib/hipe-$(VSN) + +# ---------------------------------------------------- +# Target Specs +# ---------------------------------------------------- +MODULES = cerl_cconv cerl_closurean cerl_hipeify cerl_hybrid_transform \ + cerl_lib cerl_messagean cerl_pmatch cerl_prettypr cerl_to_icode \ + cerl_typean erl_bif_types erl_types + +HRL_FILES= cerl_hipe_primops.hrl +ERL_FILES= $(MODULES:%=%.erl) +TARGET_FILES= $(MODULES:%=$(EBIN)/%.$(EMULATOR)) +DOC_FILES= $(MODULES:%=$(DOCS)/%.html) + +# APP_FILE= +# APP_SRC= $(APP_FILE).src +# APP_TARGET= $(EBIN)/$(APP_FILE) +# +# APPUP_FILE= +# APPUP_SRC= $(APPUP_FILE).src +# APPUP_TARGET= $(EBIN)/$(APPUP_FILE) + +# ---------------------------------------------------- +# FLAGS +# ---------------------------------------------------- + +include ../native.mk + +ERL_COMPILE_FLAGS += +inline +warn_exported_vars +warn_unused_import +warn_missing_spec# +warn_untyped_record + +# ---------------------------------------------------- +# Targets +# ---------------------------------------------------- + +debug opt: $(TARGET_FILES) + +docs: $(DOC_FILES) + +clean: + rm -f $(TARGET_FILES) + rm -f core + +$(DOCS)/%.html:%.erl + erl -noshell -run edoc_run file '"$<"' '[{dir, "$(DOCS)"}]' -s init stop + +# ---------------------------------------------------- +# Special Build Targets +# ---------------------------------------------------- + + + +# ---------------------------------------------------- +# Release Target +# ---------------------------------------------------- +include $(ERL_TOP)/make/otp_release_targets.mk + +release_spec: opt + $(INSTALL_DIR) $(RELSYSDIR)/cerl + $(INSTALL_DATA) $(ERL_FILES) $(HRL_FILES) $(RELSYSDIR)/cerl + $(INSTALL_DIR) $(RELSYSDIR)/ebin + $(INSTALL_DATA) $(TARGET_FILES) $(RELSYSDIR)/ebin + +release_docs_spec: + +$(EBIN)/cerl_to_icode.beam: cerl_hipe_primops.hrl ../icode/hipe_icode_primops.hrl +$(EBIN)/cerl_hipeify.beam: cerl_hipe_primops.hrl +$(EBIN)/cerl_lambdalift.beam: cerl_hipe_primops.hrl +$(EBIN)/erl_bif_types.beam: ../icode/hipe_icode_primops.hrl diff --git a/lib/hipe/cerl/cerl_cconv.erl b/lib/hipe/cerl/cerl_cconv.erl new file mode 100644 index 0000000000..cf4d317b0d --- /dev/null +++ b/lib/hipe/cerl/cerl_cconv.erl @@ -0,0 +1,777 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2004-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%% @author Richard Carlsson +%% @copyright 2000-2004 Richard Carlsson +%% @doc Closure conversion of Core Erlang modules. This is done as a +%% step in the translation from Core Erlang down to HiPE Icode, and is +%% very much tied to the calling conventions used in HiPE native code. +%% @see cerl_to_icode + +%% Some information about function closures in Beam and HiPE: +%% +%% - In Beam, each fun-expression is lifted to a top-level function such +%% that the arity of the new function is equal to the arity of the fun +%% *plus* the number of free variables. The original fun-expression is +%% replaced by a call to 'make_fun' which takes the *label* of the new +%% function and the number of free variables as arguments (the arity +%% of the fun can be found via the label). When a call is made through +%% the closure, the free variables are extracted from the closure by +%% the 'call_fun' operation and are placed in the X registers +%% following the ones used for the normal parameters; then the call is +%% made to the function label. +%% +%% - In HiPE (when compiling from Beam bytecode), the Beam-to-Icode +%% translation rewrites the fun-functions (those referenced by +%% 'make_fun' operations) so that the code expects only the normal +%% parameters, plus *one* extra parameter containing the closure +%% itself, and then immediately extracts the free variables from the +%% closure - the code knows how many free variables it expects. +%% However, the arity part of the function name is *not* changed; +%% thus, the native code and the Beam code still use the same +%% fun-table entry. The arity value used in native-code 'make_fun' +%% operations should therefore be the same as in Beam, i.e., the sum +%% of the number of parameters and the number of free variables. + +-module(cerl_cconv). + +-export([transform/2]). +-export([core_transform/2]). + +-include("cerl_hipe_primops.hrl"). + +%% A descriptor for top-level and letrec-bound functions. (Top-level +%% functions always have an empty list of free variables.) The 'name' +%% field is the name of the lifted function, and is thus unique over the +%% whole module. + +-record(function, {name :: {atom(), arity()}, free}). + +%% A record for holding fun-information (if such information is attached +%% as an annotation on a fun, it should preferably be preserved). + +-record(fun_info, {name :: atom(), + id = 0 :: integer(), + hash = 0 :: integer()}). + +%% @spec core_transform(Module::cerl_records(), Options::[term()]) -> +%% cerl_records() +%% +%% @doc Transforms a module represented by records. See +%% transform/2 for details. +%% +%%

Use the compiler option {core_transform, cerl_cconv} +%% to insert this function as a compilation pass.

+%% +%% @see transform/2 + +-spec core_transform(cerl:cerl(), [term()]) -> cerl:cerl(). + +core_transform(M, Opts) -> + cerl:to_records(transform(cerl:from_records(M), Opts)). + + +%% @spec transform(Module::cerl(), Options::[term()]) -> cerl() +%% +%% cerl() = cerl:cerl() +%% +%% @doc Rewrites a Core Erlang module so that all fun-expressions +%% (lambda expressions) in the code are in top level function +%% definitions, and the operators of all `apply'-expressions are names +%% of such top-level functions. The primitive operations `make_fun' and +%% `call_fun' are inserted in the code to create and apply functional +%% values; this transformation is known as "Closure Conversion" +%% +%%

See the module {@link cerl_to_icode} for details.

+ +-spec transform(cerl:c_module(), [term()]) -> cerl:c_module(). + +transform(E, _Options) -> + M = cerl:module_name(E), + S0 = s__new(cerl:atom_val(M)), + {Defs1, S1} = module_defs(cerl:module_defs(E), env__new(), + ren__new(), S0), + Defs2 = lists:reverse(s__get_defs(S1) ++ Defs1), + cerl:update_c_module(E, M, cerl:module_exports(E), + cerl:module_attrs(E), Defs2). + +%% Note that the environment is defined on the renamed variables. + +expr(E, Env, Ren, S0) -> + case cerl:type(E) of + literal -> + {E, S0}; + var -> + var(E, Env, Ren, S0); + values -> + {Es, S1} = expr_list(cerl:values_es(E), Env, Ren, S0), + {cerl:update_c_values(E, Es), S1}; + cons -> + {E1, S1} = expr(cerl:cons_hd(E), Env, Ren, S0), + {E2, S2} = expr(cerl:cons_tl(E), Env, Ren, S1), + {cerl:update_c_cons(E, E1, E2), S2}; + tuple -> + {Es, S1} = expr_list(cerl:tuple_es(E), Env, Ren, S0), + {cerl:update_c_tuple(E, Es), S1}; + 'let' -> + {A, S1} = expr(cerl:let_arg(E), Env, Ren, S0), + Vs = cerl:let_vars(E), + {Vs1, Env1, Ren1} = bind_vars(Vs, Env, Ren), + {B, S2} = expr(cerl:let_body(E), Env1, Ren1, S1), + {cerl:update_c_let(E, Vs1, A, B), S2}; + seq -> + {A, S1} = expr(cerl:seq_arg(E), Env, Ren, S0), + {B, S2} = expr(cerl:seq_body(E), Env, Ren, S1), + {cerl:update_c_seq(E, A, B), S2}; + apply -> + apply_expr(E, Env, Ren, S0); + call -> + {M, S1} = expr(cerl:call_module(E), Env, Ren, S0), + {N, S2} = expr(cerl:call_name(E), Env, Ren, S1), + {As, S3} = expr_list(cerl:call_args(E), Env, Ren, S2), + {cerl:update_c_call(E, M, N, As), S3}; + primop -> + {As, S1} = expr_list(cerl:primop_args(E), Env, Ren, S0), + N = cerl:primop_name(E), + {cerl:update_c_primop(E, N, As), S1}; + 'case' -> + {A, S1} = expr(cerl:case_arg(E), Env, Ren, S0), + {Cs, S2} = expr_list(cerl:case_clauses(E), Env, Ren, S1), + {cerl:update_c_case(E, A, Cs), S2}; + clause -> + Vs = cerl:clause_vars(E), + {_, Env1, Ren1} = bind_vars(Vs, Env, Ren), + %% Visit patterns to rename variables. + Ps = pattern_list(cerl:clause_pats(E), Env1, Ren1), + {G, S1} = expr(cerl:clause_guard(E), Env1, Ren1, S0), + {B, S2} = expr(cerl:clause_body(E), Env1, Ren1, S1), + {cerl:update_c_clause(E, Ps, G, B), S2}; + 'fun' -> + fun_expr(E, Env, Ren, S0); + 'receive' -> + {Cs, S1} = expr_list(cerl:receive_clauses(E), Env, Ren, S0), + {T, S2} = expr(cerl:receive_timeout(E), Env, Ren, S1), + {A, S3} = expr(cerl:receive_action(E), Env, Ren, S2), + {cerl:update_c_receive(E, Cs, T, A), S3}; + 'try' -> + {A, S1} = expr(cerl:try_arg(E), Env, Ren, S0), + Vs = cerl:try_vars(E), + {Vs1, Env1, Ren1} = bind_vars(Vs, Env, Ren), + {B, S2} = expr(cerl:try_body(E), Env1, Ren1, S1), + Evs = cerl:try_evars(E), + {Evs1, Env2, Ren2} = bind_vars(Evs, Env, Ren), + {H, S3} = expr(cerl:try_handler(E), Env2, Ren2, S2), + {cerl:update_c_try(E, A, Vs1, B, Evs1, H), S3}; + 'catch' -> + {B, S1} = expr(cerl:catch_body(E), Env, Ren, S0), + {cerl:update_c_catch(E, B), S1}; + letrec -> + {Env1, Ren1, S1} = letrec_defs(cerl:letrec_defs(E), Env, + Ren, S0), + expr(cerl:letrec_body(E), Env1, Ren1, S1); + binary -> + {Segs, S1} = expr_list(cerl:binary_segments(E), Env, Ren, S0), + {cerl:update_c_binary(E, Segs),S1}; + bitstr -> + {E1,S1} = expr(cerl:bitstr_val(E), Env, Ren, S0), + {E2,S2} = expr(cerl:bitstr_size(E), Env, Ren, S1), + E3 = cerl:bitstr_unit(E), + E4 = cerl:bitstr_type(E), + E5 = cerl:bitstr_flags(E), + {cerl:update_c_bitstr(E, E1, E2, E3, E4, E5), S2} + end. + +expr_list([E | Es], Env, Ren, S0) -> + {E1, S1} = expr(E, Env, Ren, S0), + {Es1, S2} = expr_list(Es, Env, Ren, S1), + {[E1 | Es1], S2}; +expr_list([], _, _, S) -> + {[], S}. + +pattern(E, Env, Ren) -> + case cerl:type(E) of + literal -> + E; + var -> + cerl:update_c_var(E, ren__map(cerl:var_name(E), Ren)); + values -> + Es = pattern_list(cerl:values_es(E), Env, Ren), + cerl:update_c_values(E, Es); + cons -> + E1 = pattern(cerl:cons_hd(E), Env, Ren), + E2 = pattern(cerl:cons_tl(E), Env, Ren), + cerl:update_c_cons(E, E1, E2); + tuple -> + Es = pattern_list(cerl:tuple_es(E), Env, Ren), + cerl:update_c_tuple(E, Es); + binary -> + Es = pattern_list(cerl:binary_segments(E), Env, Ren), + cerl:update_c_binary(E, Es); + bitstr -> + E1 = pattern(cerl:bitstr_val(E), Env, Ren), + E2 = pattern(cerl:bitstr_size(E), Env, Ren), + E3 = cerl:bitstr_unit(E), + E4 = cerl:bitstr_type(E), + E5 = cerl:bitstr_flags(E), + cerl:update_c_bitstr(E, E1, E2, E3, E4, E5); + alias -> + V = pattern(cerl:alias_var(E), Env, Ren), + P = pattern(cerl:alias_pat(E), Env, Ren), + cerl:update_c_alias(E, V, P) + end. + +pattern_list([E | Es], Env, Ren) -> + [pattern(E, Env, Ren) | pattern_list(Es, Env, Ren)]; +pattern_list([], _, _) -> + []. + +%% First we set up the environment, binding the function names to the +%% corresponding descriptors. (For the top level functions, we don't +%% want to cause renaming.) After that, we can visit each function body +%% and return the new function definitions and the final state. + +module_defs(Ds, Env, Ren, S) -> + {Env1, S1} = bind_module_defs(Ds, Env, S), + module_defs_1(Ds, [], Env1, Ren, S1). + +bind_module_defs([{V, _F} | Ds], Env, S) -> + Name = cerl:var_name(V), + check_function_name(Name, S), + S1 = s__add_function_name(Name, S), + Info = #function{name = Name, free = []}, + Env1 = env__bind(Name, Info, Env), + bind_module_defs(Ds, Env1, S1); +bind_module_defs([], Env, S) -> + {Env, S}. + +%% Checking that top-level function names are not reused + +check_function_name(Name, S) -> + case s__is_function_name(Name, S) of + true -> + error_msg("multiple definitions of function `~w'.", [Name]), + exit(error); + false -> + ok + end. + +%% We must track which top-level function we are in, for name generation +%% purposes. + +module_defs_1([{V, F} | Ds], Ds1, Env, Ren, S) -> + S1 = s__enter_function(cerl:var_name(V), S), + %% The parameters should never need renaming, but this is easiest. + {Vs, Env1, Ren1} = bind_vars(cerl:fun_vars(F), Env, Ren), + {B, S2} = expr(cerl:fun_body(F), Env1, Ren1, S1), + F1 = cerl:update_c_fun(F, Vs, B), + module_defs_1(Ds, [{V, F1} | Ds1], Env, Ren, S2); +module_defs_1([], Ds, _, _, S) -> + {Ds, S}. + +%% First we must create the new function names and set up the +%% environment with descriptors for the letrec-bound functions. +%% +%% Since we never shadow variables, the free variables of any +%% letrec-bound fun can always be referenced directly wherever the +%% fun-variable itself is referenced - this is important when we create +%% direct calls to lifted letrec-bound functions, and is the main reason +%% why we do renaming. For example: +%% +%% 'f'/0 = fun () -> +%% let X = 42 in +%% letrec 'g'/1 = fun (Y) -> {X, Y} in +%% let X = 17 in +%% apply 'g'/1(X) +%% +%% will become something like +%% +%% 'f'/0 = fun () -> +%% let X = 42 in +%% let X1 = 17 in +%% apply 'g'/2(X1, X) +%% 'g'/2 = fun (Y, X) -> {X, Y} +%% +%% where the innermost X has been renamed so that the outermost X can be +%% referenced in the call to the lifted function 'g'/2. (Renaming must +%% of course also be applied also to letrec-bound function variables.) +%% +%% Furthermore, if some variable X occurs free in a fun 'f'/N, and 'f'/N +%% it its turn occurs free in a fun 'g'/M, then we transitively count X +%% as free in 'g'/M, even if it has no occurrence there. This allows us +%% to rewrite code such as the following: +%% +%% 'f'/0 = fun () -> +%% let X = 42 in +%% letrec 'g'/1 = fun (Y) -> {X, Y} +%% 'h'/1 = fun (Z) -> {'bar', apply 'g'/1(Z)} +%% in let X = 17 in +%% apply 'h'/1(X) +%% +%% into something like: +%% +%% 'f'/0 = fun () -> +%% let X = 42 in +%% let X1 = 17 in +%% apply 'h'/2(X1, X) +%% 'g'/2 = fun (Y, X) -> {X, Y} +%% 'h'/2 = fun (Z, X) -> {'bar', apply 'g'/2(Z, X)} +%% +%% which uses only direct calls. The drawback is that if the occurrence +%% of 'f'/N in 'g'/M instead would cause a closure to be created, then +%% that closure could have been formed earlier (at the point where 'f'/N +%% was defined), rather than passing on all the free variables of 'f'/N +%% into 'g'/M. Since we must know the interface to 'g'/M (i.e., the +%% total number of parameters) before we begin processing its body, and +%% the interface depends on what we do to the body (and functions can be +%% mutually recursive), this problem can only be solved by finding out +%% _what_ we are going to do before we can even define the interfaces of +%% the functions, by looking at _how_ variables are being referenced +%% when we look for free variables. Currently, we don't do that. + +letrec_defs(Ds, Env, Ren, S) -> + {Env1, Ren1, S1} = bind_letrec_defs(Ds, Env, Ren, S), + {Env1, Ren1, lift_letrec_defs(Ds, Env1, Ren1, S1)}. + +%% Note: it is important that we store the *renamed* free variables for +%% each function to be lifted. + +bind_letrec_defs(Ds, Env, Ren, S) -> + bind_letrec_defs(Ds, free_in_defs(Ds, Env, Ren), Env, Ren, S). + +bind_letrec_defs([{V, _F} | Ds], Free, Env, Ren, S) -> + Name = cerl:var_name(V), + {Env1, Ren1, S1} = bind_letrec_fun(Name, Free, Env, Ren, S), + bind_letrec_defs(Ds, Free, Env1, Ren1, S1); +bind_letrec_defs([], _Free, Env, Ren, S) -> + {Env, Ren, S}. + +bind_letrec_fun(Name = {_,A}, Free, Env, Ren, S) -> + A1 = A + length(Free), + {Name1, Ren1, S1} = rename_letrec_fun(Name, A1, Env, Ren, S), + Info = #function{name = Name1, free = Free}, + {env__bind(Name1, Info, Env), Ren1, S1}. + +%% Creating a new name for the lifted function that is informative, is +%% not in the environment, and is not already used for some other lifted +%% function. + +rename_letrec_fun(Name, NewArity, Env, Ren, S) -> + {New, S1} = new_letrec_fun_name(Name, NewArity, Env, S), + {New, ren__add(Name, New, Ren), s__add_function_name(New, S1)}. + +new_letrec_fun_name({N,_}, Arity, Env, S) -> + {FName, FArity} = s__get_function(S), + Base = fun_name_base(FName, FArity) + ++ "-letrec-" ++ atom_to_list(N) ++ "-", + %% We try the base as name first. This will usually work. + Name = {list_to_atom(Base), Arity}, + case env__is_defined(Name, Env) of + true -> + new_fun_name(Base, Arity, Env, S); + false -> + case s__is_function_name(Name, S) of + true -> + new_fun_name(Base, Arity, Env, S); + false -> + {Name, S} + end + end. + +%% Processing the actual functions of a letrec + +lift_letrec_defs([{V, F} | Ds], Env, Ren, S) -> + Info = env__get(ren__map(cerl:var_name(V), Ren), Env), + S1 = lift_letrec_fun(F, Info, Env, Ren, S), + lift_letrec_defs(Ds, Env, Ren, S1); +lift_letrec_defs([], _, _, S) -> + S. + +%% The direct calling convention for letrec-defined functions is to pass +%% the free variables as additional parameters. Note that the free +%% variables (if any) are already in the environment when we get here. +%% We only have to append them to the parameter list so that they are in +%% scope in the lifted function; they are already renamed. +%% +%% It should not be possible for the original parameters to clash with +%% the free ones (in that case they cannot be free), but we do the full +%% bind-and-rename anyway, since it's easiest. + +lift_letrec_fun(F, Info, Env, Ren, S) -> + {Vs, Env1, Ren1} = bind_vars(cerl:fun_vars(F), Env, Ren), + {B, S1} = expr(cerl:fun_body(F), Env1, Ren1, S), + Fs = [cerl:c_var(V) || V <- Info#function.free], + F1 = cerl:c_fun(Vs ++ Fs, B), + s__add_def(cerl:c_var(Info#function.name), F1, S1). + +%% This is a simple way of handling mutual recursion in a group of +%% letrec-definitions: classify a variable as free in all the functions +%% if it is free in any of them. (The preferred way would be to actually +%% take the transitive closure for each function.) + +free_in_defs(Ds, Env, Ren) -> + {Vs, Fs} = free_in_defs(Ds, [], [], Ren), + closure_vars(ordsets:subtract(Fs, Vs), Env, Ren). + +free_in_defs([{V, F} | Ds], Vs, Free, Ren) -> + Fs = cerl_trees:free_variables(F), + free_in_defs(Ds, [ren__map(cerl:var_name(V), Ren) | Vs], Fs ++ Free, + Ren); +free_in_defs([], Vs, Free, _Ren) -> + {ordsets:from_list(Vs), ordsets:from_list(Free)}. + +%% Replacing function variables with the free variables of the function + +closure_vars(Vs, Env, Ren) -> + closure_vars(Vs, [], Env, Ren). + +closure_vars([V = {_, _} | Vs], As, Env, Ren) -> + V1 = ren__map(V, Ren), + case env__lookup(V1, Env) of + {ok, #function{free = Vs1}} -> + closure_vars(Vs, Vs1 ++ As, Env, Ren); + _ -> + closure_vars(Vs, As, Env, Ren) + end; +closure_vars([V | Vs], As, Env, Ren) -> + closure_vars(Vs, [V | As], Env, Ren); +closure_vars([], As, _Env, _Ren) -> + ordsets:from_list(As). + +%% We use the no-shadowing strategy, renaming variables on the fly and +%% only when necessary to uphold the invariant. + +bind_vars(Vs, Env, Ren) -> + bind_vars(Vs, [], Env, Ren). + +bind_vars([V | Vs], Vs1, Env, Ren) -> + Name = cerl:var_name(V), + {Name1, Ren1} = rename_var(Name, Env, Ren), + bind_vars(Vs, [cerl:update_c_var(V, Name1) | Vs1], + env__bind(Name1, variable, Env), Ren1); +bind_vars([], Vs, Env, Ren) -> + {lists:reverse(Vs), Env, Ren}. + +rename_var(Name, Env, Ren) -> + case env__is_defined(Name, Env) of + false -> + {Name, Ren}; + true -> + New = env__new_name(Env), + {New, ren__add(Name, New, Ren)} + end. + +%% This handles variable references *except* in function application +%% operator positions (see apply_expr/4). +%% +%% The Beam compiler annotates function-variable references with 'id' +%% info, eventually transforming a direct reference such as "fun f/2" +%% into a new fun-expression "fun (X1,X2) -> apply f/2(X1,X2)" for which +%% the info is used to create the lifted function as for any other fun. +%% We do the same thing for function-bound variables. + +var(V, Env, Ren, S) -> + Name = ren__map(cerl:var_name(V), Ren), + case lookup_var(Name, Env) of + #function{name = F, free = Vs} -> + {_, Arity} = F, + Vs1 = make_vars(Arity), + C = cerl:c_apply(cerl:c_var(F), Vs1), + E = cerl:ann_c_fun(cerl:get_ann(V), Vs1, C), + fun_expr_1(E, Vs, Env, Ren, S); + variable -> + {cerl:update_c_var(V, Name), S} + end. + +lookup_var(V, Env) -> + case env__lookup(V, Env) of + {ok, X} -> + X; + error -> + error_msg("unbound variable `~P'.", [V, 5]), + exit(error) + end. + +make_vars(N) when N > 0 -> + [cerl:c_var(list_to_atom("X" ++ integer_to_list(N))) + | make_vars(N - 1)]; +make_vars(0) -> + []. + +%% All funs that are not bound by module or letrec definitions will be +%% rewritten to create explicit closures using "make fun". We don't +%% currently track ordinary let-bindings of funs, as in "let F = fun +%% ... in ...apply F(...)...". +%% +%% Note that we (currently) follow the Beam naming convention, including +%% the free variables in the arity of the name, even though the actual +%% function typically expects a different number of parameters. + +fun_expr(F, Env, Ren, S) -> + Free = closure_vars(cerl_trees:free_variables(F), Env, Ren), + Vs = [cerl:c_var(V) || V <- Free], + fun_expr_1(F, Vs, Env, Ren, S). + +fun_expr_1(F, Vs, Env, Ren, S) -> + Arity = cerl:fun_arity(F) + length(Vs), % for the name only + {Info, S1} = fun_info(F, Env, S), + Name = {Info#fun_info.name, Arity}, + S2 = lift_fun(Name, F, Vs, Env, Ren, S1), + {make_fun_primop(Name, Vs, Info, F, S2), S2}. + +make_fun_primop({Name, Arity}, Free, #fun_info{id = Id, hash = Hash}, + F, S) -> + Module = s__get_module_name(S), + cerl:update_c_primop(F, cerl:c_atom(?PRIMOP_MAKE_FUN), + [cerl:c_atom(Module), + cerl:c_atom(Name), + cerl:c_int(Arity), + cerl:c_int(Hash), + cerl:c_int(Id), + cerl:make_list(Free)]). + +%% Getting attached fun-info, if present; otherwise making it up. + +fun_info(E, Env, S) -> + case lists:keyfind(id, 1, cerl:get_ann(E)) of + {id, {Id, H, Name}} -> + %% io:fwrite("Got fun-info: ~w: {~w,~w}.\n", [Name,Id,H]), + {#fun_info{name = Name, id = Id, hash = H}, S}; + _ -> + io:fwrite("Warning - fun not annotated: " + "making up new name.\n"), % for now + {{Name,_Arity}, S1} = new_fun_name(E, Env, S), + {#fun_info{name = Name, id = 0, hash = 0}, S1} + end. + +fun_name_base(FName, FArity) -> + "-" ++ atom_to_list(FName) ++ "/" ++ integer_to_list(FArity). + +%% Generate a name for the new function, using a the same convention +%% that is used by the Beam compiler. +new_fun_name(F, Env, S) -> + {FName, FArity} = s__get_function(S), + Base = fun_name_base(FName, FArity) ++ "-fun-", + Arity = cerl:fun_arity(F), + new_fun_name(Base, Arity, Env, S). + +%% Creating a new function name that is not in the environment and is +%% not already used for some other lifted function. + +new_fun_name(Base, Arity, Env, S) -> + F = fun (N) -> + {list_to_atom(Base ++ integer_to_list(N)), Arity} + end, + new_fun_name(Base, Arity, Env, S, F). + +new_fun_name(Base, Arity, Env, S, F) -> + %% Note that repeated calls to env__new_function_name/2 will yield + %% different names even though Env and F are the same. + Name = env__new_function_name(F, Env), + case s__is_function_name(Name, S) of + true -> + new_fun_name(Base, Arity, Env, S, F); + false -> + {Name, S} + end. + +%% This lifts the fun to a new top-level function which uses the calling +%% convention for closures, with the closure itself as the final +%% parameter. Note that the free variables (if any) are already in the +%% environment. +%% +%% It should not be possible for the original parameters to clash with +%% the free ones (in that case they cannot be free), but we do the full +%% bind-and-rename anyway, since it's easiest. + +lift_fun(Name, F, Free, Env, Ren, S) -> + %% If the name is already in the list of top-level definitions, we + %% assume we have already generated this function, and do not need + %% to do it again (typically, this happens for 'fun f/n'-variables + %% that have been duplicated before being rewritten to actual + %% fun-expressions, and the name is taken from their annotations). + %% Otherwise, we add the name to the list. + case s__is_function_name(Name, S) of + true -> + S; + false -> + S1 = s__add_function_name(Name, S), + lift_fun_1(Name, F, Free, Env, Ren, S1) + end. + +lift_fun_1(Name, F, Free, Env, Ren, S) -> + %% (The original parameters must be added to the environment before + %% we generate the new variable for the closure parameter.) + {Vs, Env1, Ren1} = bind_vars(cerl:fun_vars(F), Env, Ren), + V = env__new_name(Env1), + Env2 = env__bind(V, variable, Env1), + {B, S1} = expr(cerl:fun_body(F), Env2, Ren1, S), + %% We unpack all free variables from the closure upon entering. + %% (Adding this to the body before we process it would introduce + %% unnecessary, although harmless, renaming of the free variables.) + Es = closure_elements(length(Free), cerl:c_var(V)), + B1 = cerl:c_let(Free, cerl:c_values(Es), B), + %% The closure itself is passed as the last argument. The new + %% function is annotated as being a closure-call entry point. + E = cerl:ann_c_fun([closure, {closure_orig_arity, cerl:fun_arity(F)}], Vs ++ [cerl:c_var(V)], B1), + s__add_def(cerl:c_var(Name), E, S1). + +closure_elements(N, V) -> + closure_elements(N, N + 1, V). + +closure_elements(0, _, _) -> []; +closure_elements(N, M, V) -> + [cerl:c_primop(cerl:c_atom(?PRIMOP_FUN_ELEMENT), + [cerl:c_int(M - N), V]) + | closure_elements(N - 1, M, V)]. + + +%% Function applications must be rewritten depending on the +%% operator. For a call to a known top-level function or letrec-bound +%% function, we make a direct call, passing the free variables as extra +%% parameters (we know they are in scope, since variables may not be +%% shadowed). Otherwise, we create an "apply fun" primop call that +%% expects a closure. + +apply_expr(E, Env, Ren, S) -> + {As, S1} = expr_list(cerl:apply_args(E), Env, Ren, S), + Op = cerl:apply_op(E), + case cerl:is_c_var(Op) of + true -> + Name = ren__map(cerl:var_name(Op), Ren), + case lookup_var(Name, Env) of + #function{name = F, free = Vs} -> + Vs1 = As ++ [cerl:c_var(V) || V <- Vs], + {cerl:update_c_apply(E, cerl:c_var(F), Vs1), S1}; + variable -> + apply_expr_1(E, Op, As, Env, Ren, S1) + end; + _ -> + apply_expr_1(E, Op, As, Env, Ren, S1) + end. + +%% Note that this primop call only communicates the necessary +%% information to the core-to-icode stage, which rewrites it to use the +%% real calling convention for funs. + +apply_expr_1(E, Op, As, Env, Ren, S) -> + {Op1, S1} = expr(Op, Env, Ren, S), + Call = cerl:update_c_primop(E, cerl:c_atom(?PRIMOP_APPLY_FUN), + [Op1, cerl:make_list(As)]), + {Call, S1}. + + +%% --------------------------------------------------------------------- +%% Environment + +env__new() -> + rec_env:empty(). + +env__bind(Key, Value, Env) -> + rec_env:bind(Key, Value, Env). + +env__lookup(Key, Env) -> + rec_env:lookup(Key, Env). + +env__get(Key, Env) -> + rec_env:get(Key, Env). + +env__is_defined(Key, Env) -> + rec_env:is_defined(Key, Env). + +env__new_name(Env) -> + rec_env:new_key(Env). + +env__new_function_name(F, Env) -> + rec_env:new_key(F, Env). + + +%% --------------------------------------------------------------------- +%% Renaming + +ren__new() -> + dict:new(). + +ren__add(Key, Value, Ren) -> + dict:store(Key, Value, Ren). + +ren__map(Key, Ren) -> + case dict:find(Key, Ren) of + {ok, Value} -> + Value; + error -> + Key + end. + + +%% --------------------------------------------------------------------- +%% State + +-record(state, {module :: module(), function :: {atom(), arity()}, + names, refs, defs = []}). + +s__new(Module) -> + #state{module = Module, names = sets:new(), refs = dict:new()}. + +s__add_function_name(Name, S) -> + S#state{names = sets:add_element(Name, S#state.names)}. + +s__is_function_name(Name, S) -> + sets:is_element(Name, S#state.names). + +s__get_module_name(S) -> + S#state.module. + +s__enter_function(F, S) -> + S#state{function = F}. + +s__get_function(S) -> + S#state.function. + +s__add_def(V, F, S) -> + S#state{defs = [{V, F} | S#state.defs]}. + +s__get_defs(S) -> + S#state.defs. + + +%% --------------------------------------------------------------------- +%% Reporting + +%% internal_error_msg(S) -> +%% internal_error_msg(S, []). + +%% internal_error_msg(S, Vs) -> +%% error_msg(lists:concat(["Internal error: ", S]), Vs). + +%% error_msg(S) -> +%% error_msg(S, []). + +error_msg(S, Vs) -> + error_logger:error_msg(lists:concat([?MODULE, ": ", S, "\n"]), Vs). + +%% warning_msg(S) -> +%% warning_msg(S, []). + +%% warning_msg(S, Vs) -> +%% info_msg(lists:concat(["warning: ", S]), Vs). + +%% info_msg(S) -> +%% info_msg(S, []). + +%% info_msg(S, Vs) -> +%% error_logger:info_msg(lists:concat([?MODULE, ": ", S, "\n"]), Vs). diff --git a/lib/hipe/cerl/cerl_closurean.erl b/lib/hipe/cerl/cerl_closurean.erl new file mode 100644 index 0000000000..12771668ac --- /dev/null +++ b/lib/hipe/cerl/cerl_closurean.erl @@ -0,0 +1,862 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2003-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%% ===================================================================== +%% Closure analysis of Core Erlang programs. +%% +%% Copyright (C) 2001-2002 Richard Carlsson +%% +%% Author contact: richardc@it.uu.se +%% ===================================================================== + +%% TODO: might need a "top" (`any') element for any-length value lists. + +-module(cerl_closurean). + +-export([analyze/1, annotate/1]). +%% The following functions are exported from this module since they +%% are also used by Dialyzer (file dialyzer/src/dialyzer_dep.erl) +-export([is_escape_op/2, is_escape_op/3, is_literal_op/2, is_literal_op/3]). + +-import(cerl, [ann_c_apply/3, ann_c_fun/3, ann_c_var/2, apply_args/1, + apply_op/1, atom_val/1, bitstr_size/1, bitstr_val/1, + binary_segments/1, c_letrec/2, c_seq/2, c_tuple/1, + c_nil/0, call_args/1, call_module/1, call_name/1, + case_arg/1, case_clauses/1, catch_body/1, clause_body/1, + clause_guard/1, clause_pats/1, cons_hd/1, cons_tl/1, + fun_body/1, fun_vars/1, get_ann/1, is_c_atom/1, + let_arg/1, let_body/1, let_vars/1, letrec_body/1, + letrec_defs/1, module_defs/1, module_defs/1, + module_exports/1, pat_vars/1, primop_args/1, + primop_name/1, receive_action/1, receive_clauses/1, + receive_timeout/1, seq_arg/1, seq_body/1, set_ann/2, + try_arg/1, try_body/1, try_vars/1, try_evars/1, + try_handler/1, tuple_es/1, type/1, values_es/1]). + +-import(cerl_trees, [get_label/1]). + +%% =========================================================================== + +-type label() :: integer() | 'top' | 'external' | 'external_call'. +-type ordset(X) :: [X]. % XXX: TAKE ME OUT +-type labelset() :: ordset(label()). +-type outlist() :: [labelset()] | 'none'. +-type escapes() :: labelset(). + +%% =========================================================================== +%% annotate(Tree) -> {Tree1, OutList, Outputs, Escapes, Dependencies, Parents} +%% +%% Tree = cerl:cerl() +%% +%% Analyzes `Tree' (see `analyze') and appends terms `{callers, +%% Labels}' and `{calls, Labels}' to the annotation list of each +%% fun-expression node and apply-expression node of `Tree', +%% respectively, where `Labels' is an ordered-set list of labels of +%% fun-expressions in `Tree', possibly also containing the atom +%% `external', corresponding to the dependency information derived +%% by the analysis. Any previous such annotations are removed from +%% `Tree'. `Tree1' is the modified tree; for details on `OutList', +%% `Outputs' , `Dependencies', `Escapes' and `Parents', see +%% `analyze'. +%% +%% Note: `Tree' must be annotated with labels in order to use this +%% function; see `analyze' for details. + +-spec annotate(cerl:cerl()) -> + {cerl:cerl(), outlist(), dict(), escapes(), dict(), dict()}. + +annotate(Tree) -> + {Xs, Out, Esc, Deps, Par} = analyze(Tree), + F = fun (T) -> + case type(T) of + 'fun' -> + L = get_label(T), + X = case dict:find(L, Deps) of + {ok, X1} -> X1; + error -> set__new() + end, + set_ann(T, append_ann(callers, + set__to_list(X), + get_ann(T))); + apply -> + L = get_label(T), + X = case dict:find(L, Deps) of + {ok, X1} -> X1; + error -> set__new() + end, + set_ann(T, append_ann(calls, + set__to_list(X), + get_ann(T))); + _ -> +%%% set_ann(T, []) % debug + T + end + end, + {cerl_trees:map(F, Tree), Xs, Out, Esc, Deps, Par}. + +append_ann(Tag, Val, [X | Xs]) -> + if tuple_size(X) >= 1, element(1, X) =:= Tag -> + append_ann(Tag, Val, Xs); + true -> + [X | append_ann(Tag, Val, Xs)] + end; +append_ann(Tag, Val, []) -> + [{Tag, Val}]. + +%% ===================================================================== +%% analyze(Tree) -> {OutList, Outputs, Escapes, Dependencies, Parents} +%% +%% Tree = cerl() +%% OutList = [LabelSet] | none +%% Outputs = dict(Label, OutList) +%% Escapes = LabelSet +%% Dependencies = dict(Label, LabelSet) +%% LabelSet = ordset(Label) +%% Label = integer() | top | external | external_call +%% Parents = dict(Label, Label) +%% +%% Analyzes a module or an expression represented by `Tree'. +%% +%% The returned `OutList' is a list of sets of labels of +%% fun-expressions which correspond to the possible closures in the +%% value list produced by `Tree' (viewed as an expression; the +%% "value" of a module contains its exported functions). The atom +%% `none' denotes missing or conflicting information. +%% +%% The atom `external' in any label set denotes any possible +%% function outside `Tree', including those in `Escapes'. The atom +%% `top' denotes the top-level expression `Tree'. +%% +%% `Outputs' is a mapping from the labels of fun-expressions in +%% `Tree' to corresponding lists of sets of labels of +%% fun-expressions (or the atom `none'), representing the possible +%% closures in the value lists returned by the respective +%% functions. +%% +%% `Dependencies' is a similar mapping from the labels of +%% fun-expressions and apply-expressions in `Tree' to sets of +%% labels of corresponding fun-expressions which may contain call +%% sites of the functions or be called from the call sites, +%% respectively. Any such label not defined in `Dependencies' +%% represents an unreachable function or a dead or faulty +%% application. +%% +%% `Escapes' is the set of labels of fun-expressions in `Tree' such +%% that corresponding closures may be accessed from outside `Tree'. +%% +%% `Parents' is a mapping from labels of fun-expressions in `Tree' +%% to the corresponding label of the nearest containing +%% fun-expression or top-level expression. This can be used to +%% extend the dependency graph, for certain analyses. +%% +%% Note: `Tree' must be annotated with labels (as done by the +%% function `cerl_trees:label/1') in order to use this function. +%% The label annotation `{label, L}' (where L should be an integer) +%% must be the first element of the annotation list of each node in +%% the tree. Instances of variables bound in `Tree' which denote +%% the same variable must have the same label; apart from this, +%% labels should be unique. Constant literals do not need to be +%% labeled. + +-record(state, {vars, out, dep, work, funs, par}). + +%% Note: In order to keep our domain simple, we assume that all remote +%% calls and primops return a single value, if any. + +%% We use the terms `closure', `label', `lambda' and `fun-expression' +%% interchangeably. The exact meaning in each case can be grasped from +%% the context. +%% +%% Rules: +%% 1) The implicit top level lambda escapes. +%% 2) A lambda returned by an escaped lambda also escapes. +%% 3) An escaped lambda can be passed an external lambda as argument. +%% 4) A lambda passed as argument to an external lambda also escapes. +%% 5) An argument passed to an unknown operation escapes. +%% 6) A call to an unknown operation can return an external lambda. +%% +%% Escaped lambdas become part of the set of external lambdas, but this +%% does not need to be represented explicitly. + +%% We wrap the given syntax tree T in a fun-expression labeled `top', +%% which is initially in the set of escaped labels. `top' will be +%% visited at least once. +%% +%% We create a separate function labeled `external', defined as: +%% "'external'/1 = fun (Escape) -> do apply 'external'/1(apply Escape()) +%% 'external'/1", which will represent any and all functions outside T, +%% and which returns itself, and contains a recursive call; this models +%% rules 2 and 4 above. It will be revisited if the set of escaped +%% labels changes, or at least once. Its parameter `Escape' is a +%% variable labeled `escape', which will hold the set of escaped labels. +%% initially it contains `top' and `external'. + +-spec analyze(cerl:cerl()) -> {outlist(), dict(), escapes(), dict(), dict()}. + +analyze(Tree) -> + %% Note that we use different name spaces for variable labels and + %% function/call site labels, so we can reuse some names here. We + %% assume that the labeling of Tree only uses integers, not atoms. + External = ann_c_var([{label, external}], {external, 1}), + Escape = ann_c_var([{label, escape}], 'Escape'), + ExtBody = c_seq(ann_c_apply([{label, loop}], External, + [ann_c_apply([{label, external_call}], + Escape, [])]), + External), + ExtFun = ann_c_fun([{label, external}], [Escape], ExtBody), +%%% io:fwrite("external fun:\n~s.\n", +%%% [cerl_prettypr:format(ExtFun, [noann])]), + Top = ann_c_var([{label, top}], {top, 0}), + TopFun = ann_c_fun([{label, top}], [], Tree), + + %% The "start fun" just makes the initialisation easier. It will not + %% be marked as escaped, and thus cannot be called. + StartFun = ann_c_fun([{label, start}], [], + c_letrec([{External, ExtFun}, {Top, TopFun}], + c_nil())), +%%% io:fwrite("start fun:\n~s.\n", +%%% [cerl_prettypr:format(StartFun, [noann])]), + + %% Gather a database of all fun-expressions in Tree and initialise + %% all their outputs and parameter variables. Bind all module- and + %% letrec-defined variables to their corresponding labels. + Funs0 = dict:new(), + Vars0 = dict:new(), + Out0 = dict:new(), + Empty = empty(), + F = fun (T, S = {Fs, Vs, Os}) -> + case type(T) of + 'fun' -> + L = get_label(T), + As = fun_vars(T), + {dict:store(L, T, Fs), + bind_vars_single(As, Empty, Vs), + dict:store(L, none, Os)}; + letrec -> + {Fs, bind_defs(letrec_defs(T), Vs), Os}; + module -> + {Fs, bind_defs(module_defs(T), Vs), Os}; + _ -> + S + end + end, + {Funs, Vars, Out} = cerl_trees:fold(F, {Funs0, Vars0, Out0}, + StartFun), + + %% Initialise Escape to the minimal set of escaped labels. + Vars1 = dict:store(escape, from_label_list([top, external]), Vars), + + %% Enter the fixpoint iteration at the StartFun. + St = loop(StartFun, start, #state{vars = Vars1, + out = Out, + dep = dict:new(), + work = init_work(), + funs = Funs, + par = dict:new()}), +%%% io:fwrite("dependencies: ~p.\n", +%%% [[{X, set__to_list(Y)} +%%% || {X, Y} <- dict:to_list(St#state.dep)]]), + {dict:fetch(top, St#state.out), + tidy_dict([start, top, external], St#state.out), + dict:fetch(escape, St#state.vars), + tidy_dict([loop], St#state.dep), + St#state.par}. + +tidy_dict([X | Xs], D) -> + tidy_dict(Xs, dict:erase(X, D)); +tidy_dict([], D) -> + D. + +loop(T, L, St0) -> +%%% io:fwrite("analyzing: ~w.\n", [L]), +%%% io:fwrite("work: ~w.\n", [St0#state.work]), + Xs0 = dict:fetch(L, St0#state.out), + {Xs, St1} = visit(fun_body(T), L, St0), + {W, M} = case equal(Xs0, Xs) of + true -> + {St1#state.work, St1#state.out}; + false -> +%%% io:fwrite("out (~w) changed: ~w <- ~w.\n", +%%% [L, Xs, Xs0]), + M1 = dict:store(L, Xs, St1#state.out), + case dict:find(L, St1#state.dep) of + {ok, S} -> + {add_work(set__to_list(S), St1#state.work), + M1}; + error -> + {St1#state.work, M1} + end + end, + St2 = St1#state{out = M}, + case take_work(W) of + {ok, L1, W1} -> + T1 = dict:fetch(L1, St2#state.funs), + loop(T1, L1, St2#state{work = W1}); + none -> + St2 + end. + +visit(T, L, St) -> + case type(T) of + literal -> + {[empty()], St}; + var -> + %% If a variable is not already in the store here, we + %% initialize it to empty(). + L1 = get_label(T), + Vars = St#state.vars, + case dict:find(L1, Vars) of + {ok, X} -> + {[X], St}; + error -> + X = empty(), + St1 = St#state{vars = dict:store(L1, X, Vars)}, + {[X], St1} + end; + 'fun' -> + %% Must revisit the fun also, because its environment might + %% have changed. (We don't keep track of such dependencies.) + L1 = get_label(T), + St1 = St#state{work = add_work([L1], St#state.work), + par = set_parent([L1], L, St#state.par)}, + {[singleton(L1)], St1}; + values -> + visit_list(values_es(T), L, St); + cons -> + {Xs, St1} = visit_list([cons_hd(T), cons_tl(T)], L, St), + {[join_single_list(Xs)], St1}; + tuple -> + {Xs, St1} = visit_list(tuple_es(T), L, St), + {[join_single_list(Xs)], St1}; + 'let' -> + {Xs, St1} = visit(let_arg(T), L, St), + Vars = bind_vars(let_vars(T), Xs, St1#state.vars), + visit(let_body(T), L, St1#state{vars = Vars}); + seq -> + {_, St1} = visit(seq_arg(T), L, St), + visit(seq_body(T), L, St1); + apply -> + {Xs, St1} = visit(apply_op(T), L, St), + {As, St2} = visit_list(apply_args(T), L, St1), + case Xs of + [X] -> + %% We store the dependency from the call site to the + %% called functions + Ls = set__to_list(X), + Out = St2#state.out, + Xs1 = join_list([dict:fetch(Lx, Out) || Lx <- Ls]), + St3 = call_site(Ls, L, As, St2), + L1 = get_label(T), + D = dict:store(L1, X, St3#state.dep), + {Xs1, St3#state{dep = D}}; + none -> + {none, St2} + end; + call -> + M = call_module(T), + F = call_name(T), + {_, St1} = visit(M, L, St), + {_, St2} = visit(F, L, St1), + {Xs, St3} = visit_list(call_args(T), L, St2), + remote_call(M, F, Xs, St3); + primop -> + As = primop_args(T), + {Xs, St1} = visit_list(As, L, St), + primop_call(atom_val(primop_name(T)), length(Xs), Xs, St1); + 'case' -> + {Xs, St1} = visit(case_arg(T), L, St), + visit_clauses(Xs, case_clauses(T), L, St1); + 'receive' -> + X = singleton(external), + {Xs1, St1} = visit_clauses([X], receive_clauses(T), L, St), + {_, St2} = visit(receive_timeout(T), L, St1), + {Xs2, St3} = visit(receive_action(T), L, St2), + {join(Xs1, Xs2), St3}; + 'try' -> + {Xs1, St1} = visit(try_arg(T), L, St), + X = singleton(external), + Vars = bind_vars(try_vars(T), [X], St1#state.vars), + {Xs2, St2} = visit(try_body(T), L, St1#state{vars = Vars}), + Evars = bind_vars(try_evars(T), [X, X, X], St2#state.vars), + {Xs3, St3} = visit(try_handler(T), L, St2#state{vars = Evars}), + {join(join(Xs1, Xs2), Xs3), St3}; + 'catch' -> + {_, St1} = visit(catch_body(T), L, St), + {[singleton(external)], St1}; + binary -> + {_, St1} = visit_list(binary_segments(T), L, St), + {[empty()], St1}; + bitstr -> + %% The other fields are constant literals. + {_, St1} = visit(bitstr_val(T), L, St), + {_, St2} = visit(bitstr_size(T), L, St1), + {none, St2}; + letrec -> + %% All the bound funs should be revisited, because the + %% environment might have changed. + Ls = [get_label(F) || {_, F} <- letrec_defs(T)], + St1 = St#state{work = add_work(Ls, St#state.work), + par = set_parent(Ls, L, St#state.par)}, + visit(letrec_body(T), L, St1); + module -> + %% All the exported functions escape, and can thus be passed + %% any external closures as arguments. We regard a module as + %% a tuple of function variables in the body of a `letrec'. + visit(c_letrec(module_defs(T), c_tuple(module_exports(T))), + L, St) + end. + +visit_clause(T, Xs, L, St) -> + Vars = bind_pats(clause_pats(T), Xs, St#state.vars), + {_, St1} = visit(clause_guard(T), L, St#state{vars = Vars}), + visit(clause_body(T), L, St1). + +%% We assume correct value-list typing. + +visit_list([T | Ts], L, St) -> + {Xs, St1} = visit(T, L, St), + {Xs1, St2} = visit_list(Ts, L, St1), + X = case Xs of + [X1] -> X1; + none -> none + end, + {[X | Xs1], St2}; +visit_list([], _L, St) -> + {[], St}. + +visit_clauses(Xs, [T | Ts], L, St) -> + {Xs1, St1} = visit_clause(T, Xs, L, St), + {Xs2, St2} = visit_clauses(Xs, Ts, L, St1), + {join(Xs1, Xs2), St2}; +visit_clauses(_, [], _L, St) -> + {none, St}. + +bind_defs([{V, F} | Ds], Vars) -> + bind_defs(Ds, dict:store(get_label(V), singleton(get_label(F)), + Vars)); +bind_defs([], Vars) -> + Vars. + +bind_pats(Ps, none, Vars) -> + bind_pats_single(Ps, empty(), Vars); +bind_pats(Ps, Xs, Vars) -> + if length(Xs) =:= length(Ps) -> + bind_pats_list(Ps, Xs, Vars); + true -> + bind_pats_single(Ps, empty(), Vars) + end. + +bind_pats_list([P | Ps], [X | Xs], Vars) -> + bind_pats_list(Ps, Xs, bind_vars_single(pat_vars(P), X, Vars)); +bind_pats_list([], [], Vars) -> + Vars. + +bind_pats_single([P | Ps], X, Vars) -> + bind_pats_single(Ps, X, bind_vars_single(pat_vars(P), X, Vars)); +bind_pats_single([], _X, Vars) -> + Vars. + +bind_vars(Vs, none, Vars) -> + bind_vars_single(Vs, empty(), Vars); +bind_vars(Vs, Xs, Vars) -> + if length(Vs) =:= length(Xs) -> + bind_vars_list(Vs, Xs, Vars); + true -> + bind_vars_single(Vs, empty(), Vars) + end. + +bind_vars_list([V | Vs], [X | Xs], Vars) -> + bind_vars_list(Vs, Xs, dict:store(get_label(V), X, Vars)); +bind_vars_list([], [], Vars) -> + Vars. + +bind_vars_single([V | Vs], X, Vars) -> + bind_vars_single(Vs, X, dict:store(get_label(V), X, Vars)); +bind_vars_single([], _X, Vars) -> + Vars. + +%% This handles a call site - adding dependencies and updating parameter +%% variables with respect to the actual parameters. The 'external' +%% function is handled specially, since it can get an arbitrary number +%% of arguments, which must be unified into a single argument. + +call_site(Ls, L, Xs, St) -> +%%% io:fwrite("call site: ~w -> ~w (~w).\n", [L, Ls, Xs]), + {D, W, V} = call_site(Ls, L, Xs, St#state.dep, St#state.work, + St#state.vars, St#state.funs), + St#state{dep = D, work = W, vars = V}. + +call_site([external | Ls], T, Xs, D, W, V, Fs) -> + D1 = add_dep(external, T, D), + X = join_single_list(Xs), + case bind_arg(escape, X, V) of + {V1, true} -> +%%% io:fwrite("escape changed: ~w <- ~w + ~w.\n", +%%% [dict:fetch(escape, V1), dict:fetch(escape, V), +%%% X]), + {W1, V2} = update_esc(set__to_list(X), W, V1, Fs), + call_site(Ls, T, Xs, D1, add_work([external], W1), V2, Fs); + {V1, false} -> + call_site(Ls, T, Xs, D1, W, V1, Fs) + end; +call_site([L | Ls], T, Xs, D, W, V, Fs) -> + D1 = add_dep(L, T, D), + Vs = fun_vars(dict:fetch(L, Fs)), + case bind_args(Vs, Xs, V) of + {V1, true} -> + call_site(Ls, T, Xs, D1, add_work([L], W), V1, Fs); + {V1, false} -> + call_site(Ls, T, Xs, D1, W, V1, Fs) + end; +call_site([], _, _, D, W, V, _) -> + {D, W, V}. + +%% Note that `visit' makes sure all lambdas are visited at least once. +%% For every called function, we add a dependency from the *called* +%% function to the function containing the call site. + +add_dep(Source, Target, Deps) -> + case dict:find(Source, Deps) of + {ok, X} -> + case set__is_member(Target, X) of + true -> + Deps; + false -> +%%% io:fwrite("new dep: ~w <- ~w.\n", [Target, Source]), + dict:store(Source, set__add(Target, X), Deps) + end; + error -> +%%% io:fwrite("new dep: ~w <- ~w.\n", [Target, Source]), + dict:store(Source, set__singleton(Target), Deps) + end. + +%% If the arity does not match the call, nothing is done here. + +bind_args(Vs, Xs, Vars) -> + if length(Vs) =:= length(Xs) -> + bind_args(Vs, Xs, Vars, false); + true -> + {Vars, false} + end. + +bind_args([V | Vs], [X | Xs], Vars, Ch) -> + L = get_label(V), + {Vars1, Ch1} = bind_arg(L, X, Vars, Ch), + bind_args(Vs, Xs, Vars1, Ch1); +bind_args([], [], Vars, Ch) -> + {Vars, Ch}. + +bind_args_single(Vs, X, Vars) -> + bind_args_single(Vs, X, Vars, false). + +bind_args_single([V | Vs], X, Vars, Ch) -> + L = get_label(V), + {Vars1, Ch1} = bind_arg(L, X, Vars, Ch), + bind_args_single(Vs, X, Vars1, Ch1); +bind_args_single([], _, Vars, Ch) -> + {Vars, Ch}. + +bind_arg(L, X, Vars) -> + bind_arg(L, X, Vars, false). + +bind_arg(L, X, Vars, Ch) -> + X0 = dict:fetch(L, Vars), + X1 = join_single(X, X0), + case equal_single(X0, X1) of + true -> + {Vars, Ch}; + false -> +%%% io:fwrite("arg (~w) changed: ~w <- ~w + ~w.\n", +%%% [L, X1, X0, X]), + {dict:store(L, X1, Vars), true} + end. + +%% This handles escapes from things like primops and remote calls. + +%% escape(none, St) -> +%% St; +escape([X], St) -> + Vars = St#state.vars, + X0 = dict:fetch(escape, Vars), + X1 = join_single(X, X0), + case equal_single(X0, X1) of + true -> + St; + false -> +%%% io:fwrite("escape changed: ~w <- ~w + ~w.\n", [X1, X0, X]), +%%% io:fwrite("updating escaping funs: ~w.\n", [set__to_list(X)]), + Vars1 = dict:store(escape, X1, Vars), + {W, Vars2} = update_esc(set__to_list(set__subtract(X, X0)), + St#state.work, Vars1, + St#state.funs), + St#state{work = add_work([external], W), vars = Vars2} + end. + +%% For all escaping lambdas, since they might be called from outside the +%% program, all their arguments may be an external lambda. (Note that we +%% only have to include the `external' label once per escaping lambda.) +%% If the escape set has changed, we need to revisit the `external' fun. + +update_esc(Ls, W, V, Fs) -> + update_esc(Ls, singleton(external), W, V, Fs). + +%% The external lambda is skipped here - the Escape variable is known to +%% contain `external' from the start. + +update_esc([external | Ls], X, W, V, Fs) -> + update_esc(Ls, X, W, V, Fs); +update_esc([L | Ls], X, W, V, Fs) -> + Vs = fun_vars(dict:fetch(L, Fs)), + case bind_args_single(Vs, X, V) of + {V1, true} -> + update_esc(Ls, X, add_work([L], W), V1, Fs); + {V1, false} -> + update_esc(Ls, X, W, V1, Fs) + end; +update_esc([], _, W, V, _) -> + {W, V}. + +set_parent([L | Ls], L1, D) -> + set_parent(Ls, L1, dict:store(L, L1, D)); +set_parent([], _L1, D) -> + D. + +%% Handle primop calls: (At present, we assume that all unknown primops +%% yield exactly one value. This might have to be changed.) + +primop_call(F, A, Xs, St0) -> + case is_pure_op(F, A) of + %% XXX: this case is currently not possible -- commented out. + %% true -> + %% case is_literal_op(F, A) of + %% true -> {[empty()], St0}; + %% false -> {[join_single_list(Xs)], St0} + %% end; + false -> + St1 = case is_escape_op(F, A) of + true -> escape([join_single_list(Xs)], St0); + false -> St0 + end, + case is_literal_op(F, A) of + true -> {none, St1}; + false -> {[singleton(external)], St1} + end + end. + +%% Handle remote-calls: (At present, we assume that all unknown calls +%% yield exactly one value. This might have to be changed.) + +remote_call(M, F, Xs, St) -> + case is_c_atom(M) andalso is_c_atom(F) of + true -> + remote_call_1(atom_val(M), atom_val(F), length(Xs), Xs, St); + false -> + %% Unknown function + {[singleton(external)], escape([join_single_list(Xs)], St)} + end. + +remote_call_1(M, F, A, Xs, St0) -> + case is_pure_op(M, F, A) of + true -> + case is_literal_op(M, F, A) of + true -> {[empty()], St0}; + false -> {[join_single_list(Xs)], St0} + end; + false -> + St1 = case is_escape_op(M, F, A) of + true -> escape([join_single_list(Xs)], St0); + false -> St0 + end, + case is_literal_op(M, F, A) of + true -> {[empty()], St1}; + false -> {[singleton(external)], St1} + end + end. + +%% Domain: none | [Vs], where Vs = set(integer()). + +join(none, Xs2) -> Xs2; +join(Xs1, none) -> Xs1; +join(Xs1, Xs2) -> + if length(Xs1) =:= length(Xs2) -> + join_1(Xs1, Xs2); + true -> + none + end. + +join_1([X1 | Xs1], [X2 | Xs2]) -> + [join_single(X1, X2) | join_1(Xs1, Xs2)]; +join_1([], []) -> + []. + +empty() -> set__new(). + +singleton(X) -> set__singleton(X). + +from_label_list(X) -> set__from_list(X). + +join_single(none, Y) -> Y; +join_single(X, none) -> X; +join_single(X, Y) -> set__union(X, Y). + +join_list([Xs | Xss]) -> + join(Xs, join_list(Xss)); +join_list([]) -> + none. + +join_single_list([X | Xs]) -> + join_single(X, join_single_list(Xs)); +join_single_list([]) -> + empty(). + +equal(none, none) -> true; +equal(none, _) -> false; +equal(_, none) -> false; +equal(X1, X2) -> equal_1(X1, X2). + +equal_1([X1 | Xs1], [X2 | Xs2]) -> + equal_single(X1, X2) andalso equal_1(Xs1, Xs2); +equal_1([], []) -> true; +equal_1(_, _) -> false. + +equal_single(X, Y) -> set__equal(X, Y). + +%% Set abstraction for label sets in the domain. + +set__new() -> []. + +set__singleton(X) -> [X]. + +set__to_list(S) -> S. + +set__from_list(S) -> ordsets:from_list(S). + +set__union(X, Y) -> ordsets:union(X, Y). + +set__add(X, S) -> ordsets:add_element(X, S). + +set__is_member(X, S) -> ordsets:is_element(X, S). + +set__subtract(X, Y) -> ordsets:subtract(X, Y). + +set__equal(X, Y) -> X =:= Y. + +%% A simple but efficient functional queue. + +queue__new() -> {[], []}. + +queue__put(X, {In, Out}) -> {[X | In], Out}. + +queue__get({In, [X | Out]}) -> {ok, X, {In, Out}}; +queue__get({[], _}) -> empty; +queue__get({In, _}) -> + [X | In1] = lists:reverse(In), + {ok, X, {[], In1}}. + +%% The work list - a queue without repeated elements. + +init_work() -> + {queue__new(), sets:new()}. + +add_work(Ls, {Q, Set}) -> + add_work(Ls, Q, Set). + +%% Note that the elements are enqueued in order. + +add_work([L | Ls], Q, Set) -> + case sets:is_element(L, Set) of + true -> + add_work(Ls, Q, Set); + false -> + add_work(Ls, queue__put(L, Q), sets:add_element(L, Set)) + end; +add_work([], Q, Set) -> + {Q, Set}. + +take_work({Queue0, Set0}) -> + case queue__get(Queue0) of + {ok, L, Queue1} -> + Set1 = sets:del_element(L, Set0), + {ok, L, {Queue1, Set1}}; + empty -> + none + end. + +%% Escape operators may let their arguments escape. Unless we know +%% otherwise, and the function is not pure, we assume this is the case. +%% Error-raising functions (fault/match_fail) are not considered as +%% escapes (but throw/exit are). Zero-argument functions need not be +%% listed. + +-spec is_escape_op(atom(), arity()) -> boolean(). + +is_escape_op(match_fail, 1) -> false; +is_escape_op(F, A) when is_atom(F), is_integer(A) -> true. + +-spec is_escape_op(module(), atom(), arity()) -> boolean(). + +is_escape_op(erlang, error, 1) -> false; +is_escape_op(erlang, error, 2) -> false; +is_escape_op(M, F, A) when is_atom(M), is_atom(F), is_integer(A) -> true. + +%% "Literal" operators will never return functional values even when +%% found in their arguments. Unless we know otherwise, we assume this is +%% not the case. (More functions can be added to this list, if needed +%% for better precision. Note that the result of `term_to_binary' still +%% contains an encoding of the closure.) + +-spec is_literal_op(atom(), arity()) -> boolean(). + +is_literal_op(match_fail, 1) -> true; +is_literal_op(F, A) when is_atom(F), is_integer(A) -> false. + +-spec is_literal_op(module(), atom(), arity()) -> boolean(). + +is_literal_op(erlang, '+', 2) -> true; +is_literal_op(erlang, '-', 2) -> true; +is_literal_op(erlang, '*', 2) -> true; +is_literal_op(erlang, '/', 2) -> true; +is_literal_op(erlang, '=:=', 2) -> true; +is_literal_op(erlang, '==', 2) -> true; +is_literal_op(erlang, '=/=', 2) -> true; +is_literal_op(erlang, '/=', 2) -> true; +is_literal_op(erlang, '<', 2) -> true; +is_literal_op(erlang, '=<', 2) -> true; +is_literal_op(erlang, '>', 2) -> true; +is_literal_op(erlang, '>=', 2) -> true; +is_literal_op(erlang, 'and', 2) -> true; +is_literal_op(erlang, 'or', 2) -> true; +is_literal_op(erlang, 'not', 1) -> true; +is_literal_op(erlang, length, 1) -> true; +is_literal_op(erlang, size, 1) -> true; +is_literal_op(erlang, fun_info, 1) -> true; +is_literal_op(erlang, fun_info, 2) -> true; +is_literal_op(erlang, fun_to_list, 1) -> true; +is_literal_op(erlang, throw, 1) -> true; +is_literal_op(erlang, exit, 1) -> true; +is_literal_op(erlang, error, 1) -> true; +is_literal_op(erlang, error, 2) -> true; +is_literal_op(M, F, A) when is_atom(M), is_atom(F), is_integer(A) -> false. + +%% Pure functions neither affect the state, nor depend on it. + +is_pure_op(F, A) when is_atom(F), is_integer(A) -> false. + +is_pure_op(M, F, A) -> erl_bifs:is_pure(M, F, A). + +%% ===================================================================== diff --git a/lib/hipe/cerl/cerl_hipe_primops.hrl b/lib/hipe/cerl/cerl_hipe_primops.hrl new file mode 100644 index 0000000000..36b1b62901 --- /dev/null +++ b/lib/hipe/cerl/cerl_hipe_primops.hrl @@ -0,0 +1,88 @@ +%% ========================-*-erlang-*-================================= +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2003-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%% Predefined Core Erlang primitive operations used by HiPE +%% +%% Copyright (C) 2000 Richard Carlsson +%% +%% Author contact: richardc@it.uu.se +%% ===================================================================== + +%% These definitions give the names of Core Erlang primops recognized by +%% HiPE. Many of them (e.g., 'not'/'and'/'or', and the type tests), are +%% not primops on the Icode level, but are inline-expanded by the +%% translation from Core Erlang to Icode, or are renamed/rewritten to a +%% corresponding ICode primop; they only exist to help the translation. + +%%-define(PRIMOP_IDENTITY, identity). % arity 1 +-define(PRIMOP_NOT, 'not'). % arity 1 +-define(PRIMOP_AND, 'and'). % arity 2 +-define(PRIMOP_OR, 'or'). % arity 2 +-define(PRIMOP_XOR, 'xor'). % arity 2 +-define(PRIMOP_ADD, '+'). % arity 2 +-define(PRIMOP_SUB, '-'). % arity 2 +-define(PRIMOP_NEG, neg). % arity 1 +-define(PRIMOP_MUL, '*'). % arity 2 +-define(PRIMOP_DIV, '/'). % arity 2 +-define(PRIMOP_INTDIV, 'div'). % arity 2 +-define(PRIMOP_REM, 'rem'). % arity 2 +-define(PRIMOP_BAND, 'band'). % arity 2 +-define(PRIMOP_BOR, 'bor'). % arity 2 +-define(PRIMOP_BXOR, 'bxor'). % arity 2 +-define(PRIMOP_BNOT, 'bnot'). % arity 1 +-define(PRIMOP_BSL, 'bsl'). % arity 2 +-define(PRIMOP_BSR, 'bsr'). % arity 2 +-define(PRIMOP_EQ, '=='). % arity 2 +-define(PRIMOP_NE, '/='). % arity 2 +-define(PRIMOP_EXACT_EQ, '=:='). % arity 2 +-define(PRIMOP_EXACT_NE, '=/='). % arity 2 +-define(PRIMOP_LT, '<'). % arity 2 +-define(PRIMOP_GT, '>'). % arity 2 +-define(PRIMOP_LE, '=<'). % arity 2 +-define(PRIMOP_GE, '>='). % arity 2 +-define(PRIMOP_IS_ATOM, 'is_atom'). % arity 1 +-define(PRIMOP_IS_BIGNUM, 'is_bignum'). % arity 1 +-define(PRIMOP_IS_BINARY, 'is_binary'). % arity 1 +-define(PRIMOP_IS_CONSTANT, 'is_constant'). % arity 1 +-define(PRIMOP_IS_FIXNUM, 'is_fixnum'). % arity 1 +-define(PRIMOP_IS_FLOAT, 'is_float'). % arity 1 +-define(PRIMOP_IS_FUNCTION, 'is_function'). % arity 1 +-define(PRIMOP_IS_INTEGER, 'is_integer'). % arity 1 +-define(PRIMOP_IS_LIST, 'is_list'). % arity 1 +-define(PRIMOP_IS_NUMBER, 'is_number'). % arity 1 +-define(PRIMOP_IS_PID, 'is_pid'). % arity 1 +-define(PRIMOP_IS_PORT, 'is_port'). % arity 1 +-define(PRIMOP_IS_REFERENCE, 'is_reference'). % arity 1 +-define(PRIMOP_IS_TUPLE, 'is_tuple'). % arity 1 +-define(PRIMOP_IS_RECORD, 'is_record'). % arity 3 +-define(PRIMOP_EXIT, exit). % arity 1 +-define(PRIMOP_THROW, throw). % arity 1 +-define(PRIMOP_ERROR, error). % arity 1,2 +-define(PRIMOP_RETHROW, raise). % arity 2 +-define(PRIMOP_RECEIVE_SELECT, receive_select). % arity 0 +-define(PRIMOP_RECEIVE_NEXT, receive_next). % arity 0 +-define(PRIMOP_ELEMENT, element). % arity 2 +-define(PRIMOP_DSETELEMENT, dsetelement). % arity 3 +-define(PRIMOP_MAKE_FUN, make_fun). % arity 6 +-define(PRIMOP_APPLY_FUN, apply_fun). % arity 2 +-define(PRIMOP_FUN_ELEMENT, closure_element). % arity 2 +-define(PRIMOP_SET_LABEL, set_label). % arity 1 +-define(PRIMOP_GOTO_LABEL, goto_label). % arity 1 +-define(PRIMOP_REDUCTION_TEST, reduction_test). % arity 0 +-define(PRIMOP_BS_CONTEXT_TO_BINARY, bs_context_to_binary). % arity 1 diff --git a/lib/hipe/cerl/cerl_hipeify.erl b/lib/hipe/cerl/cerl_hipeify.erl new file mode 100644 index 0000000000..8f6c3561c9 --- /dev/null +++ b/lib/hipe/cerl/cerl_hipeify.erl @@ -0,0 +1,655 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2003-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%% @author Richard Carlsson +%% @copyright 2000-2004 Richard Carlsson +%% @doc HiPE-ification of Core Erlang code. Prepares Core Erlang code +%% for translation to ICode. +%% @see cerl_to_icode + +-module(cerl_hipeify). + +-define(NO_UNUSED, true). + +-export([transform/2]). +-ifndef(NO_UNUSED). +-export([core_transform/2]). +-endif. + +-include("cerl_hipe_primops.hrl"). + +-record(ctxt, {class = expr}). + + +%% @spec core_transform(Module::cerl_records(), Options::[term()]) -> +%% cerl_records() +%% +%% @doc Transforms a module represented by records. See +%% transform/2 for details. +%% +%%

Use the compiler option {core_transform, +%% cerl_hipeify} to insert this function as a compilation +%% pass.

+%% +%% @see transform/2 + +-ifndef(NO_UNUSED). +core_transform(M, Opts) -> + cerl:to_records(transform(cerl:from_records(M), Opts)). +-endif. % NO_UNUSED +%% @clear + + +%% @spec transform(Module::cerl(), Options::[term()]) -> cerl() +%% +%% cerl() = cerl:cerl() +%% +%% @doc Rewrites a Core Erlang module to a form suitable for further +%% translation to HiPE Icode. See module cerl_to_icode for +%% details. +%% +%% @see cerl_to_icode +%% @see cerl_cconv + +-spec transform(cerl:c_module(), [term()]) -> cerl:c_module(). + +transform(E, Opts) -> + %% Start by closure converting the code + module(cerl_cconv:transform(E, Opts), Opts). + +module(E, Opts) -> + {Ds, Env, Ren} = add_defs(cerl:module_defs(E), env__new(), + ren__new()), + M = cerl:module_name(E), + S0 = s__new(cerl:atom_val(M)), + S = s__set_pmatch(proplists:get_value(pmatch, Opts, true), S0), + {Ds1, _} = defs(Ds, true, Env, Ren, S), + cerl:update_c_module(E, M, cerl:module_exports(E), + cerl:module_attrs(E), Ds1). + +%% Note that the environment is defined on the renamed variables. + +expr(E0, Env, Ren, Ctxt, S0) -> + %% Do peephole optimizations as we traverse the code. + E = cerl_lib:reduce_expr(E0), + case cerl:type(E) of + literal -> + {E, S0}; + var -> + variable(E, Env, Ren, Ctxt, S0); + values -> + {Es, S1} = expr_list(cerl:values_es(E), Env, Ren, Ctxt, S0), + {cerl:update_c_values(E, Es), S1}; + cons -> + {E1, S1} = expr(cerl:cons_hd(E), Env, Ren, Ctxt, S0), + {E2, S2} = expr(cerl:cons_tl(E), Env, Ren, Ctxt, S1), + {cerl:update_c_cons(E, E1, E2), S2}; + tuple -> + {Es, S1} = expr_list(cerl:tuple_es(E), Env, Ren, Ctxt, S0), + {cerl:update_c_tuple(E, Es), S1}; + 'let' -> + let_expr(E, Env, Ren, Ctxt, S0); + seq -> + {A, S1} = expr(cerl:seq_arg(E), Env, Ren, Ctxt, S0), + {B, S2} = expr(cerl:seq_body(E), Env, Ren, Ctxt, S1), + {cerl:update_c_seq(E, A, B), S2}; + apply -> + {Op, S1} = expr(cerl:apply_op(E), Env, Ren, Ctxt, S0), + {As, S2} = expr_list(cerl:apply_args(E), Env, Ren, Ctxt, S1), + {cerl:update_c_apply(E, Op, As), S2}; + call -> + {M, S1} = expr(cerl:call_module(E), Env, Ren, Ctxt, S0), + {N, S2} = expr(cerl:call_name(E), Env, Ren, Ctxt, S1), + {As, S3} = expr_list(cerl:call_args(E), Env, Ren, Ctxt, S2), + {rewrite_call(E, M, N, As, S3), S3}; + primop -> + {As, S1} = expr_list(cerl:primop_args(E), Env, Ren, Ctxt, S0), + N = cerl:primop_name(E), + {rewrite_primop(E, N, As, S1), S1}; + 'case' -> + case_expr(E, Env, Ren, Ctxt, S0); + 'fun' -> + Vs = cerl:fun_vars(E), + {Vs1, Env1, Ren1} = add_vars(Vs, Env, Ren), + {B, S1} = expr(cerl:fun_body(E), Env1, Ren1, Ctxt, S0), + {cerl:update_c_fun(E, Vs1, B), S1}; + 'receive' -> + receive_expr(E, Env, Ren, Ctxt, S0); + 'try' -> + {A, S1} = expr(cerl:try_arg(E), Env, Ren, Ctxt, S0), + Vs = cerl:try_vars(E), + {Vs1, Env1, Ren1} = add_vars(Vs, Env, Ren), + {B, S2} = expr(cerl:try_body(E), Env1, Ren1, Ctxt, S1), + Evs = cerl:try_evars(E), + {Evs1, Env2, Ren2} = add_vars(Evs, Env, Ren), + {H, S3} = expr(cerl:try_handler(E), Env2, Ren2, Ctxt, S2), + {cerl:update_c_try(E, A, Vs1, B, Evs1, H), S3}; + 'catch' -> + catch_expr(E, Env, Ren, Ctxt, S0); + letrec -> + {Ds, Env1, Ren1} = add_defs(cerl:letrec_defs(E), Env, Ren), + {Ds1, S1} = defs(Ds, false, Env1, Ren1, S0), + {B, S2} = expr(cerl:letrec_body(E), Env1, Ren1, Ctxt, S1), + {cerl:update_c_letrec(E, Ds1, B), S2}; + binary -> + {Segs, S1} = expr_list(cerl:binary_segments(E), Env, Ren, + Ctxt, S0), + {cerl:update_c_binary(E, Segs), S1}; + bitstr -> + {E1,S1} = expr(cerl:bitstr_val(E), Env, Ren, Ctxt, S0), + {E2,S2} = expr(cerl:bitstr_size(E), Env, Ren, Ctxt, S1), + E3 = cerl:bitstr_unit(E), + E4 = cerl:bitstr_type(E), + E5 = cerl:bitstr_flags(E), + {cerl:update_c_bitstr(E, E1, E2, E3, E4, E5), S2} + end. + +guard_expr(E, Env, Ren, Ctxt, S) -> + expr(E, Env, Ren, Ctxt#ctxt{class = guard}, S). + +expr_list(Es, Env, Ren, Ctxt, S0) -> + list(Es, Env, Ren, Ctxt, S0, fun expr/5). + +list([E | Es], Env, Ren, Ctxt, S0, F) -> + {E1, S1} = F(E, Env, Ren, Ctxt, S0), + {Es1, S2} = list(Es, Env, Ren, Ctxt, S1, F), + {[E1 | Es1], S2}; +list([], _, _, _, S, _) -> + {[], S}. + +pattern(E, Env, Ren) -> + case cerl:type(E) of + literal -> + E; + var -> + cerl:update_c_var(E, ren__map(cerl:var_name(E), Ren)); + values -> + Es = pattern_list(cerl:values_es(E), Env, Ren), + cerl:update_c_values(E, Es); + cons -> + E1 = pattern(cerl:cons_hd(E), Env, Ren), + E2 = pattern(cerl:cons_tl(E), Env, Ren), + cerl:update_c_cons(E, E1, E2); + tuple -> + Es = pattern_list(cerl:tuple_es(E), Env, Ren), + cerl:update_c_tuple(E, Es); + alias -> + V = pattern(cerl:alias_var(E), Env, Ren), + P = pattern(cerl:alias_pat(E), Env, Ren), + cerl:update_c_alias(E, V, P); + binary -> + Segs = pattern_list(cerl:binary_segments(E), Env, Ren), + cerl:update_c_binary(E, Segs); + bitstr -> + E1 = pattern(cerl:bitstr_val(E), Env, Ren), + E2 = pattern(cerl:bitstr_size(E), Env, Ren), + E3 = cerl:bitstr_unit(E), + E4 = cerl:bitstr_type(E), + E5 = cerl:bitstr_flags(E), + cerl:update_c_bitstr(E, E1, E2, E3, E4, E5) + end. + +pattern_list(ExprList, Env, Ren) -> + [pattern(E, Env, Ren) || E <- ExprList]. + +%% Visit the function body of each definition. We insert an explicit +%% reduction test at the start of each function. + +defs(Ds, Top, Env, Ren, S) -> + defs(Ds, [], Top, Env, Ren, S). + +defs([{V, F} | Ds], Ds1, Top, Env, Ren, S0) -> + S1 = case Top of + true -> s__enter_function(cerl:var_name(V), S0); + false -> S0 + end, + {B, S2} = expr(cerl:fun_body(F), Env, Ren, #ctxt{}, S1), + B1 = cerl:c_seq(cerl:c_primop(cerl:c_atom(?PRIMOP_REDUCTION_TEST), []), + B), + F1 = cerl:update_c_fun(F, cerl:fun_vars(F), B1), + defs(Ds, [{V, F1} | Ds1], Top, Env, Ren, S2); +defs([], Ds, _Top, _Env, _Ren, S) -> + {lists:reverse(Ds), S}. + +case_expr(E, Env, Ren, Ctxt, S0) -> + {A, S1} = expr(cerl:case_arg(E), Env, Ren, Ctxt, S0), + {Cs, S2} = clause_list(cerl:case_clauses(E), Env, Ren, Ctxt, S1), + case s__get_revisit(S2) of + false -> + {E1, Vs, S3} = pmatch(Cs, Env, Ren, Ctxt, S2), + {cerl:c_let(Vs, A, E1), S3}; + true -> + {cerl:c_case(A, Cs), S2} + end. + +%% Note: There is an ordering problem with switch-clauses and pattern +%% matching compilation. We must process any receive-clauses first, +%% making the message queue operations explicit, before we can do +%% pattern matching compilation. However, the latter can introduce new +%% expressions - in particular new guards - which also need processing. +%% Hence, we must process the clauses, then do pattern matching +%% compilation, and then re-visit the resulting expression with pattern +%% matching compilation disabled. + +pmatch(Cs, Env, _Ren, Ctxt, S0) -> + {E, Vs} = case s__get_pmatch(S0) of + true -> + cerl_pmatch:clauses(Cs, Env); + no_duplicates -> + put('cerl_pmatch_duplicate_code', never), + cerl_pmatch:clauses(Cs, Env); + duplicate_all -> + put('cerl_pmatch_duplicate_code', always), + cerl_pmatch:clauses(Cs, Env); + false -> + Vs0 = new_vars(cerl:clause_arity(hd(Cs)), Env), + {cerl:c_case(cerl:c_values(Vs0), Cs), Vs0} + end, + %% Revisit the resulting expression. Pass an empty renaming, since + %% all variables in E have already been properly renamed and must + %% not be renamed again by accident. + {E1, S1} = expr(E, Env, ren__new(), Ctxt, s__set_revisit(true, S0)), + {E1, Vs, s__set_revisit(false, S1)}. + +clause_list(Cs, Env, Ren, Ctxt, S) -> + list(Cs, Env, Ren, Ctxt, S, fun clause/5). + +clause(E, Env, Ren, Ctxt, S0) -> + Vs = cerl:clause_vars(E), + {_, Env1, Ren1} = add_vars(Vs, Env, Ren), + %% Visit patterns to rename variables. + Ps = pattern_list(cerl:clause_pats(E), Env1, Ren1), + {G, S1} = guard_expr(cerl:clause_guard(E), Env1, Ren1, Ctxt, S0), + {B, S2} = expr(cerl:clause_body(E), Env1, Ren1, Ctxt, S1), + {cerl:update_c_clause(E, Ps, G, B), S2}. + +%% We use the no-shadowing strategy, renaming variables on the fly and +%% only when necessary to uphold the invariant. + +add_vars(Vs, Env, Ren) -> + add_vars(Vs, [], Env, Ren). + +add_vars([V | Vs], Vs1, Env, Ren) -> + Name = cerl:var_name(V), + {Name1, Ren1} = rename(Name, Env, Ren), + add_vars(Vs, [cerl:update_c_var(V, Name1) | Vs1], + env__bind(Name1, variable, Env), Ren1); +add_vars([], Vs, Env, Ren) -> + {lists:reverse(Vs), Env, Ren}. + +rename(Name, Env, Ren) -> + case env__is_defined(Name, Env) of + false -> + {Name, Ren}; + true -> + New = env__new_name(Env), + {New, ren__add(Name, New, Ren)} + end. + +%% Setting up the environment for a list of letrec-bound definitions. + +add_defs(Ds, Env, Ren) -> + add_defs(Ds, [], Env, Ren). + +add_defs([{V, F} | Ds], Ds1, Env, Ren) -> + Name = cerl:var_name(V), + {Name1, Ren1} = + case env__is_defined(Name, Env) of + false -> + {Name, Ren}; + true -> + {N, A} = Name, + S = atom_to_list(N) ++ "_", + F1 = fun (Num) -> + {list_to_atom(S ++ integer_to_list(Num)), A} + end, + New = env__new_function_name(F1, Env), + {New, ren__add(Name, New, Ren)} + end, + add_defs(Ds, [{cerl:update_c_var(V, Name1), F} | Ds1], + env__bind(Name1, function, Env), Ren1); +add_defs([], Ds, Env, Ren) -> + {lists:reverse(Ds), Env, Ren}. + +%% We change remote calls to important built-in functions into primop +%% calls. In some cases (e.g., for the boolean operators), this is +%% mainly to allow the cerl_to_icode module to handle them more +%% straightforwardly. In most cases however, it is simply because they +%% are supposed to be represented as primop calls on the Icode level. + +rewrite_call(E, M, F, As, S) -> + case cerl:is_c_atom(M) andalso cerl:is_c_atom(F) of + true -> + case call_to_primop(cerl:atom_val(M), + cerl:atom_val(F), + length(As)) + of + {yes, ?PRIMOP_IS_RECORD} -> + %% Needs additional testing + [_, Tag, Arity] = As, + case (cerl:is_c_atom(Tag) andalso + cerl:is_c_int(Arity)) of + true -> + %% The primop might need further handling + N1 = cerl:c_atom(?PRIMOP_IS_RECORD), + E1 = cerl:update_c_primop(E, N1, As), + rewrite_primop(E1, N1, As, S); + false -> + cerl:update_c_call(E, M, F, As) + end; + {yes, N} -> + %% The primop might need further handling + N1 = cerl:c_atom(N), + E1 = cerl:update_c_primop(E, N1, As), + rewrite_primop(E1, N1, As, S); + no -> + cerl:update_c_call(E, M, F, As) + end; + false -> + cerl:update_c_call(E, M, F, As) + end. + +call_to_primop(erlang, 'not', 1) -> {yes, ?PRIMOP_NOT}; +call_to_primop(erlang, 'and', 2) -> {yes, ?PRIMOP_AND}; +call_to_primop(erlang, 'or', 2) -> {yes, ?PRIMOP_OR}; +call_to_primop(erlang, 'xor', 2) -> {yes, ?PRIMOP_XOR}; +call_to_primop(erlang, '+', 2) -> {yes, ?PRIMOP_ADD}; +%%call_to_primop(erlang, '+', 1) -> {yes, ?PRIMOP_IDENTITY}; +call_to_primop(erlang, '-', 2) -> {yes, ?PRIMOP_SUB}; +call_to_primop(erlang, '-', 1) -> {yes, ?PRIMOP_NEG}; +call_to_primop(erlang, '*', 2) -> {yes, ?PRIMOP_MUL}; +call_to_primop(erlang, '/', 2) -> {yes, ?PRIMOP_DIV}; +call_to_primop(erlang, 'div', 2) -> {yes, ?PRIMOP_INTDIV}; +call_to_primop(erlang, 'rem', 2) -> {yes, ?PRIMOP_REM}; +call_to_primop(erlang, 'band', 2) -> {yes, ?PRIMOP_BAND}; +call_to_primop(erlang, 'bor', 2) -> {yes, ?PRIMOP_BOR}; +call_to_primop(erlang, 'bxor', 2) -> {yes, ?PRIMOP_BXOR}; +call_to_primop(erlang, 'bnot', 1) -> {yes, ?PRIMOP_BNOT}; +call_to_primop(erlang, 'bsl', 2) -> {yes, ?PRIMOP_BSL}; +call_to_primop(erlang, 'bsr', 2) -> {yes, ?PRIMOP_BSR}; +call_to_primop(erlang, '==', 2) -> {yes, ?PRIMOP_EQ}; +call_to_primop(erlang, '/=', 2) -> {yes, ?PRIMOP_NE}; +call_to_primop(erlang, '=:=', 2) -> {yes, ?PRIMOP_EXACT_EQ}; +call_to_primop(erlang, '=/=', 2) -> {yes, ?PRIMOP_EXACT_NE}; +call_to_primop(erlang, '<', 2) -> {yes, ?PRIMOP_LT}; +call_to_primop(erlang, '>', 2) -> {yes, ?PRIMOP_GT}; +call_to_primop(erlang, '=<', 2) -> {yes, ?PRIMOP_LE}; +call_to_primop(erlang, '>=', 2) -> {yes, ?PRIMOP_GE}; +call_to_primop(erlang, is_atom, 1) -> {yes, ?PRIMOP_IS_ATOM}; +call_to_primop(erlang, is_binary, 1) -> {yes, ?PRIMOP_IS_BINARY}; +call_to_primop(erlang, is_constant, 1) -> {yes, ?PRIMOP_IS_CONSTANT}; +call_to_primop(erlang, is_float, 1) -> {yes, ?PRIMOP_IS_FLOAT}; +call_to_primop(erlang, is_function, 1) -> {yes, ?PRIMOP_IS_FUNCTION}; +call_to_primop(erlang, is_integer, 1) -> {yes, ?PRIMOP_IS_INTEGER}; +call_to_primop(erlang, is_list, 1) -> {yes, ?PRIMOP_IS_LIST}; +call_to_primop(erlang, is_number, 1) -> {yes, ?PRIMOP_IS_NUMBER}; +call_to_primop(erlang, is_pid, 1) -> {yes, ?PRIMOP_IS_PID}; +call_to_primop(erlang, is_port, 1) -> {yes, ?PRIMOP_IS_PORT}; +call_to_primop(erlang, is_reference, 1) -> {yes, ?PRIMOP_IS_REFERENCE}; +call_to_primop(erlang, is_tuple, 1) -> {yes, ?PRIMOP_IS_TUPLE}; +call_to_primop(erlang, internal_is_record, 3) -> {yes, ?PRIMOP_IS_RECORD}; +call_to_primop(erlang, is_record, 3) -> {yes, ?PRIMOP_IS_RECORD}; +call_to_primop(erlang, element, 2) -> {yes, ?PRIMOP_ELEMENT}; +call_to_primop(erlang, exit, 1) -> {yes, ?PRIMOP_EXIT}; +call_to_primop(erlang, throw, 1) -> {yes, ?PRIMOP_THROW}; +call_to_primop(erlang, error, 1) -> {yes, ?PRIMOP_ERROR}; +call_to_primop(erlang, error, 2) -> {yes, ?PRIMOP_ERROR}; +call_to_primop(M, F, A) when is_atom(M), is_atom(F), is_integer(A) -> no. + +%% Also, some primops (introduced by Erlang to Core Erlang translation +%% and possibly other stages) must be recognized and rewritten. + +rewrite_primop(E, N, As, S) -> + case {cerl:atom_val(N), As} of + {match_fail, [R]} -> + M = s__get_module_name(S), + {F, A} = s__get_function_name(S), + Stack = cerl:abstract([{M, F, A}]), + case cerl:type(R) of + tuple -> + %% Function clause failures have a special encoding + %% as '{function_clause, Arg1, ..., ArgN}'. + case cerl:tuple_es(R) of + [X | Xs] -> + case cerl:is_c_atom(X) of + true -> + case cerl:atom_val(X) of + function_clause -> + FStack = cerl:make_list( + [cerl:c_tuple( + [cerl:c_atom(M), + cerl:c_atom(F), + cerl:make_list(Xs)])]), + match_fail(E, X, FStack); + _ -> + match_fail(E, R, Stack) + end; + false -> + match_fail(E, R, Stack) + end; + _ -> + match_fail(E, R, Stack) + end; + _ -> + match_fail(E, R, Stack) + end; + _ -> + cerl:update_c_primop(E, N, As) + end. + +match_fail(E, R, Stack) -> + cerl:update_c_primop(E, cerl:c_atom(?PRIMOP_ERROR), [R, Stack]). + +%% Simple let-definitions (of degree 1) in guard context are always +%% inline expanded. This is allowable, since they cannot have side +%% effects, and it makes it easy to generate good code for boolean +%% expressions. It could cause repeated evaluations, but typically, +%% local definitions within guards are used exactly once. + +let_expr(E, Env, Ren, Ctxt, S) -> + if Ctxt#ctxt.class =:= guard -> + case cerl:let_vars(E) of + [V] -> + {Name, Ren1} = rename(cerl:var_name(V), Env, Ren), + Env1 = env__bind(Name, {expr, cerl:let_arg(E)}, Env), + expr(cerl:let_body(E), Env1, Ren1, Ctxt, S); + _ -> + let_expr_1(E, Env, Ren, Ctxt, S) + end; + true -> + let_expr_1(E, Env, Ren, Ctxt, S) + end. + +let_expr_1(E, Env, Ren, Ctxt, S0) -> + {A, S1} = expr(cerl:let_arg(E), Env, Ren, Ctxt, S0), + Vs = cerl:let_vars(E), + {Vs1, Env1, Ren1} = add_vars(Vs, Env, Ren), + {B, S2} = expr(cerl:let_body(E), Env1, Ren1, Ctxt, S1), + {cerl:update_c_let(E, Vs1, A, B), S2}. + +variable(E, Env, Ren, Ctxt, S) -> + V = ren__map(cerl:var_name(E), Ren), + if Ctxt#ctxt.class =:= guard -> + case env__lookup(V, Env) of + {ok, {expr, E1}} -> + expr(E1, Env, Ren, Ctxt, S); % inline + _ -> + %% Since we don't track all bindings when we revisit + %% guards, some names will not be in the environment. + variable_1(E, V, S) + end; + true -> + variable_1(E, V, S) + end. + +variable_1(E, V, S) -> + {cerl:update_c_var(E, V), S}. + +%% A catch-expression 'catch Expr' is rewritten as: +%% +%% try Expr +%% of (V) -> V +%% catch (T, V, E) -> +%% letrec 'wrap'/1 = fun (V) -> {'EXIT', V} +%% in case T of +%% 'throw' when 'true' -> V +%% 'exit' when 'true' -> 'wrap'/1(V) +%% V when 'true' -> +%% 'wrap'/1({V, erlang:get_stacktrace()}) +%% end + +catch_expr(E, Env, Ren, Ctxt, S) -> + T = cerl:c_var('T'), + V = cerl:c_var('V'), + X = cerl:c_var('X'), + W = cerl:c_var({wrap,1}), + G = cerl:c_call(cerl:c_atom('erlang'),cerl:c_atom('get_stacktrace'),[]), + Cs = [cerl:c_clause([cerl:c_atom('throw')], V), + cerl:c_clause([cerl:c_atom('exit')], cerl:c_apply(W, [V])), + cerl:c_clause([T], cerl:c_apply(W, [cerl:c_tuple([V,G])])) + ], + C = cerl:c_case(T, Cs), + F = cerl:c_fun([V], cerl:c_tuple([cerl:c_atom('EXIT'), V])), + H = cerl:c_letrec([{W,F}], C), + As = cerl:get_ann(E), + {B, S1} = expr(cerl:catch_body(E),Env, Ren, Ctxt, S), + {cerl:ann_c_try(As, B, [V], V, [T,V,X], H), S1}. + +%% Receive-expressions are rewritten as follows: +%% +%% receive +%% P1 when G1 -> B1 +%% ... +%% Pn when Gn -> Bn +%% after T -> A end +%% becomes: +%% receive +%% M when 'true' -> +%% case M of +%% P1 when G1 -> do primop RECEIVE_SELECT B1 +%% ... +%% Pn when Gn -> do primop RECEIVE_SELECT Bn +%% Pn+1 when 'true' -> primop RECEIVE_NEXT() +%% end +%% after T -> A end + +receive_expr(E, Env, Ren, Ctxt, S0) -> + case s__get_revisit(S0) of + false -> + Cs = receive_clauses(cerl:receive_clauses(E)), + {Cs1, S1} = clause_list(Cs, Env, Ren, Ctxt, S0), + {B, Vs, S2} = pmatch(Cs1, Env, Ren, Ctxt, S1), + {T, S3} = expr(cerl:receive_timeout(E), Env, Ren, Ctxt, S2), + {A, S4} = expr(cerl:receive_action(E), Env, Ren, Ctxt, S3), + {cerl:update_c_receive(E, [cerl:c_clause(Vs, B)], T, A), S4}; + true -> + %% we should never enter a receive-expression twice + {E, S0} + end. + +receive_clauses([C | Cs]) -> + Call = cerl:c_primop(cerl:c_atom(?PRIMOP_RECEIVE_SELECT), []), + B = cerl:c_seq(Call, cerl:clause_body(C)), + C1 = cerl:update_c_clause(C, cerl:clause_pats(C), + cerl:clause_guard(C), B), + [C1 | receive_clauses(Cs)]; +receive_clauses([]) -> + Call = cerl:c_primop(cerl:c_atom(?PRIMOP_RECEIVE_NEXT), []), + V = cerl:c_var('X'), % any name is ok + [cerl:c_clause([V], Call)]. + +new_vars(N, Env) -> + [cerl:c_var(V) || V <- env__new_names(N, Env)]. + +%% --------------------------------------------------------------------- +%% Environment + +env__new() -> + rec_env:empty(). + +env__bind(Key, Value, Env) -> + rec_env:bind(Key, Value, Env). + +%% env__get(Key, Env) -> +%% rec_env:get(Key, Env). + +env__lookup(Key, Env) -> + rec_env:lookup(Key, Env). + +env__is_defined(Key, Env) -> + rec_env:is_defined(Key, Env). + +env__new_name(Env) -> + rec_env:new_key(Env). + +env__new_names(N, Env) -> + rec_env:new_keys(N, Env). + +env__new_function_name(F, Env) -> + rec_env:new_key(F, Env). + +%% --------------------------------------------------------------------- +%% Renaming + +ren__new() -> + dict:new(). + +ren__add(Key, Value, Ren) -> + dict:store(Key, Value, Ren). + +ren__map(Key, Ren) -> + case dict:find(Key, Ren) of + {ok, Value} -> + Value; + error -> + Key + end. + +%% --------------------------------------------------------------------- +%% State + +%% pmatch = 'true' | 'false' | 'no_duplicates' | 'duplicate_all' + +-record(state, {module::atom(), + function::{atom(), 0..256}, + pmatch=true, + revisit = false}). + +s__new(Module) -> + #state{module = Module}. + +s__get_module_name(S) -> + S#state.module. + +s__enter_function(F, S) -> + S#state{function = F}. + +s__get_function_name(S) -> + S#state.function. + +s__set_pmatch(V, S) -> + S#state{pmatch = V}. + +s__get_pmatch(S) -> + S#state.pmatch. + +s__set_revisit(V, S) -> + S#state{revisit = V}. + +s__get_revisit(S) -> + S#state.revisit. diff --git a/lib/hipe/cerl/cerl_hybrid_transform.erl b/lib/hipe/cerl/cerl_hybrid_transform.erl new file mode 100644 index 0000000000..b248b0ccd0 --- /dev/null +++ b/lib/hipe/cerl/cerl_hybrid_transform.erl @@ -0,0 +1,153 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2004-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +-module(cerl_hybrid_transform). + +%% Use compile option `{core_transform, cerl_hybrid_transform}' to +%% insert this as a compilation pass. + +-export([transform/2, core_transform/2]). + +-spec core_transform(cerl:cerl(), [term()]) -> cerl:cerl(). + +core_transform(Code, Opts) -> + cerl:to_records(transform(cerl:from_records(Code), Opts)). + +-spec transform(cerl:cerl(), [term()]) -> cerl:cerl(). + +transform(Code, _Opts) -> + Code0 = cerl_trees:map(fun unfold_literal/1, Code), + {Code1, _} = cerl_trees:label(Code0), + io:fwrite("Running hybrid heap analysis..."), + {T1,_} = statistics(runtime), + {Code2, _, Vars} = cerl_messagean:annotate(Code1), + {T2,_} = statistics(runtime), + io:fwrite("(~w ms), transform...", [T2 - T1]), + Code3 = rewrite(Code2, Vars), + io:fwrite("done.\n"), + cerl_trees:map(fun fold_literal/1, Code3). + +unfold_literal(T) -> + cerl:unfold_literal(T). + +fold_literal(T) -> + cerl:fold_literal(T). + +%% If escape-annotated: +%% {...} => hybrid:tuple([...]) +%% [H | T] => hybrid:cons(H, T) +%% +%% Wrapper for args to hybrid:cons/hybrid:tuple that may need copying: +%% hybrid:copy(A) + +rewrite(Node, Vars) -> + case cerl:type(Node) of + tuple -> + Es = rewrite_list(cerl:tuple_es(Node), Vars), + case is_escaping(Node) of + false -> + cerl:update_c_tuple(Node, Es); + true -> + Es1 = wrap(Es, Node, Vars), + cerl:update_c_call(Node, + cerl:abstract(hybrid), + cerl:abstract(tuple), + [cerl:make_list(Es1)]) +%%% cerl:update_c_call(Node, cerl:abstract(hybrid), +%%% cerl:abstract(tuple), Es1) + end; + cons -> + H = rewrite(cerl:cons_hd(Node), Vars), + T = rewrite(cerl:cons_tl(Node), Vars), + case is_escaping(Node) of + false -> + cerl:update_c_cons(Node, H, T); + true -> + Es = wrap([H, T], Node, Vars), + cerl:update_c_call(Node, + cerl:abstract(hybrid), + cerl:abstract(cons), + Es) + end; +%%% call -> +%%% M = rewrite(cerl:call_module(Node)), +%%% F = rewrite(cerl:call_name(Node)), +%%% As = rewrite_list(cerl:call_args(Node)), +%%% case cerl:is_c_atom(M) andalso cerl:is_c_atom(F) of +%%% true -> +%%% case {cerl:atom_val(M), cerl:atom_val(F), length(As)} of +%%% {erlang, '!', 2} -> +%%% cerl:update_c_call(Node, +%%% cerl:abstract(hipe_bifs), +%%% cerl:abstract(send), +%%% [cerl:make_list(As)]); +%%% _ -> +%%% cerl:update_c_call(Node, M, F, As) +%%% end; +%%% false -> +%%% cerl:update_c_call(Node, M, F, As) +%%% end; + clause -> + B = rewrite(cerl:clause_body(Node), Vars), + cerl:update_c_clause(Node, cerl:clause_pats(Node), + cerl:clause_guard(Node), B); + primop -> + case cerl:atom_val(cerl:primop_name(Node)) of + match_fail -> + Node; + _ -> + As = rewrite_list(cerl:primop_args(Node), Vars), + cerl:update_c_primop(Node, cerl:primop_name(Node), As) + end; + _T -> + case cerl:subtrees(Node) of + [] -> + Node; + Gs -> + cerl:update_tree(Node, [rewrite_list(Ns, Vars) + || Ns <- Gs]) + end + end. + +rewrite_list([N | Ns], Vars) -> + [rewrite(N, Vars) | rewrite_list(Ns, Vars)]; +rewrite_list([], _) -> + []. + +is_escaping(T) -> + lists:member(escapes, cerl:get_ann(T)). + +wrap(Es, Node, Vars) -> + L = cerl_trees:get_label(Node), + Xs = dict:fetch(L, Vars), + wrap(Es, Xs). + +wrap([E | Es], [{S, _} | Xs]) -> + case ordsets:is_element(unsafe, S) of +%% case cerl:type(E) =/= literal of + true -> + [cerl:c_call(cerl:abstract(hybrid), + cerl:abstract(copy), + [E]) + | wrap(Es, Xs)]; + false -> + [E | wrap(Es, Xs)] + end; +wrap([], _) -> + []. diff --git a/lib/hipe/cerl/cerl_lib.erl b/lib/hipe/cerl/cerl_lib.erl new file mode 100644 index 0000000000..83bb20e047 --- /dev/null +++ b/lib/hipe/cerl/cerl_lib.erl @@ -0,0 +1,462 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2004-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +%% @doc Utility functions for Core Erlang abstract syntax trees. +%% +%%

Syntax trees are defined in the module cerl.

+%% +%% @type cerl() = cerl:cerl() + +-module(cerl_lib). + +-define(NO_UNUSED, true). + +-export([is_safe_expr/2, reduce_expr/1, is_simple_clause/1, + is_bool_switch/1, bool_switch_cases/1]). +-ifndef(NO_UNUSED). +-export([is_safe_expr/1, is_pure_expr/1, is_pure_expr/2, + make_bool_switch/3]). +-endif. + + +%% Test if a clause has a single pattern and an always-true guard. + +-spec is_simple_clause(cerl:c_clause()) -> boolean(). + +is_simple_clause(C) -> + case cerl:clause_pats(C) of + [_P] -> + G = cerl:clause_guard(C), + case cerl_clauses:eval_guard(G) of + {value, true} -> true; + _ -> false + end; + _ -> false + end. + +%% Creating an if-then-else construct that can be recognized as such. +%% `Test' *must* be guaranteed to return a boolean. + +-ifndef(NO_UNUSED). +make_bool_switch(Test, True, False) -> + Cs = [cerl:c_clause([cerl:c_atom(true)], True), + cerl:c_clause([cerl:c_atom(false)], False)], + cerl:c_case(Test, Cs). +-endif. + +%% A boolean switch cannot have a catch-all; only true/false branches. + +-spec is_bool_switch([cerl:c_clause()]) -> boolean(). + +is_bool_switch([C1, C2]) -> + case is_simple_clause(C1) andalso is_simple_clause(C2) of + true -> + [P1] = cerl:clause_pats(C1), + [P2] = cerl:clause_pats(C2), + case cerl:is_c_atom(P1) andalso cerl:is_c_atom(P2) of + true -> + A1 = cerl:concrete(P1), + A2 = cerl:concrete(P2), + is_boolean(A1) andalso is_boolean(A2) + andalso A1 =/= A2; + false -> + false + end; + false -> + false + end; +is_bool_switch(_) -> + false. + +%% Returns the true-body and the false-body for boolean switch clauses. + +-spec bool_switch_cases([cerl:c_clause()]) -> {cerl:cerl(), cerl:cerl()}. + +bool_switch_cases([C1, C2]) -> + B1 = cerl:clause_body(C1), + B2 = cerl:clause_body(C2), + [P1] = cerl:clause_pats(C1), + case cerl:concrete(P1) of + true -> + {B1, B2}; + false -> + {B2, B1} + end. + +%% +%% The type of the check functions like the default check below - XXX: refine +%% +-type check_fun() :: fun((_, _) -> boolean()). + +%% The default function property check always returns `false': + +default_check(_Property, _Function) -> false. + + +%% @spec is_safe_expr(Expr::cerl()) -> boolean() +%% +%% @doc Returns `true' if `Expr' represents a "safe" Core Erlang +%% expression, otherwise `false'. An expression is safe if it always +%% completes normally and does not modify the state (although the return +%% value may depend on the state). +%% +%% Expressions of type `apply', `case', `receive' and `binary' are +%% always considered unsafe by this function. + +%% TODO: update cerl_inline to use these functions instead. + +-ifndef(NO_UNUSED). +is_safe_expr(E) -> + Check = fun default_check/2, + is_safe_expr(E, Check). +-endif. +%% @clear + +-spec is_safe_expr(cerl:cerl(), check_fun()) -> boolean(). + +is_safe_expr(E, Check) -> + case cerl:type(E) of + literal -> + true; + var -> + true; + 'fun' -> + true; + values -> + is_safe_expr_list(cerl:values_es(E), Check); + tuple -> + is_safe_expr_list(cerl:tuple_es(E), Check); + cons -> + case is_safe_expr(cerl:cons_hd(E), Check) of + true -> + is_safe_expr(cerl:cons_tl(E), Check); + false -> + false + end; + 'let' -> + case is_safe_expr(cerl:let_arg(E), Check) of + true -> + is_safe_expr(cerl:let_body(E), Check); + false -> + false + end; + letrec -> + is_safe_expr(cerl:letrec_body(E), Check); + seq -> + case is_safe_expr(cerl:seq_arg(E), Check) of + true -> + is_safe_expr(cerl:seq_body(E), Check); + false -> + false + end; + 'catch' -> + is_safe_expr(cerl:catch_body(E), Check); + 'try' -> + %% If the guarded expression is safe, the try-handler will + %% never be evaluated, so we need only check the body. If + %% the guarded expression is pure, but could fail, we also + %% have to check the handler. + case is_safe_expr(cerl:try_arg(E), Check) of + true -> + is_safe_expr(cerl:try_body(E), Check); + false -> + case is_pure_expr(cerl:try_arg(E), Check) of + true -> + case is_safe_expr(cerl:try_body(E), Check) of + true -> + is_safe_expr(cerl:try_handler(E), Check); + false -> + false + end; + false -> + false + end + end; + primop -> + Name = cerl:atom_val(cerl:primop_name(E)), + As = cerl:primop_args(E), + case Check(safe, {Name, length(As)}) of + true -> + is_safe_expr_list(As, Check); + false -> + false + end; + call -> + Module = cerl:call_module(E), + Name = cerl:call_name(E), + case cerl:is_c_atom(Module) and cerl:is_c_atom(Name) of + true -> + M = cerl:atom_val(Module), + F = cerl:atom_val(Name), + As = cerl:call_args(E), + case Check(safe, {M, F, length(As)}) of + true -> + is_safe_expr_list(As, Check); + false -> + false + end; + false -> + false % Call to unknown function + end; + _ -> + false + end. + +is_safe_expr_list([E | Es], Check) -> + case is_safe_expr(E, Check) of + true -> + is_safe_expr_list(Es, Check); + false -> + false + end; +is_safe_expr_list([], _Check) -> + true. + + +%% @spec (Expr::cerl()) -> bool() +%% +%% @doc Returns `true' if `Expr' represents a "pure" Core Erlang +%% expression, otherwise `false'. An expression is pure if it does not +%% affect the state, nor depend on the state, although its evaluation is +%% not guaranteed to complete normally for all input. +%% +%% Expressions of type `apply', `case', `receive' and `binary' are +%% always considered impure by this function. + +-ifndef(NO_UNUSED). +is_pure_expr(E) -> + Check = fun default_check/2, + is_pure_expr(E, Check). +-endif. +%% @clear + +is_pure_expr(E, Check) -> + case cerl:type(E) of + literal -> + true; + var -> + true; + 'fun' -> + true; + values -> + is_pure_expr_list(cerl:values_es(E), Check); + tuple -> + is_pure_expr_list(cerl:tuple_es(E), Check); + cons -> + case is_pure_expr(cerl:cons_hd(E), Check) of + true -> + is_pure_expr(cerl:cons_tl(E), Check); + false -> + false + end; + 'let' -> + case is_pure_expr(cerl:let_arg(E), Check) of + true -> + is_pure_expr(cerl:let_body(E), Check); + false -> + false + end; + letrec -> + is_pure_expr(cerl:letrec_body(E), Check); + seq -> + case is_pure_expr(cerl:seq_arg(E), Check) of + true -> + is_pure_expr(cerl:seq_body(E), Check); + false -> + false + end; + 'catch' -> + is_pure_expr(cerl:catch_body(E), Check); + 'try' -> + case is_pure_expr(cerl:try_arg(E), Check) of + true -> + case is_pure_expr(cerl:try_body(E), Check) of + true -> + is_pure_expr(cerl:try_handler(E), Check); + false -> + false + end; + false -> + false + end; + primop -> + Name = cerl:atom_val(cerl:primop_name(E)), + As = cerl:primop_args(E), + case Check(pure, {Name, length(As)}) of + true -> + is_pure_expr_list(As, Check); + false -> + false + end; + call -> + Module = cerl:call_module(E), + Name = cerl:call_name(E), + case cerl:is_c_atom(Module) and cerl:is_c_atom(Name) of + true -> + M = cerl:atom_val(Module), + F = cerl:atom_val(Name), + As = cerl:call_args(E), + case Check(pure, {M, F, length(As)}) of + true -> + is_pure_expr_list(As, Check); + false -> + false + end; + false -> + false % Call to unknown function + end; + _ -> + false + end. + +is_pure_expr_list([E | Es], Check) -> + case is_pure_expr(E, Check) of + true -> + is_pure_expr_list(Es, Check); + false -> + false + end; +is_pure_expr_list([], _Check) -> + true. + + +%% Peephole optimizations +%% +%% This is only intended to be a light-weight cleanup optimizer, +%% removing small things that may e.g. have been generated by other +%% optimization passes or in the translation from higher-level code. +%% It is not recursive in general - it only descends until it can do no +%% more work in the current context. +%% +%% To expose hidden cases of final expressions (enabling last call +%% optimization), we try to remove all trivial let-bindings (`let X = Y +%% in X', `let X = Y in Y', `let X = Y in let ... in ...', `let X = let +%% ... in ... in ...', etc.). We do not, however, try to recognize any +%% other similar cases, even for simple `case'-expressions like `case E +%% of X -> X end', or simultaneous multiple-value bindings. + +-spec reduce_expr(cerl:cerl()) -> cerl:cerl(). + +reduce_expr(E) -> + Check = fun default_check/2, + reduce_expr(E, Check). + +-spec reduce_expr(cerl:cerl(), check_fun()) -> cerl:cerl(). + +reduce_expr(E, Check) -> + case cerl:type(E) of + values -> + case cerl:values_es(E) of + [E1] -> + %% Not really an "optimization" in itself, but + %% enables other rewritings by removing the wrapper. + reduce_expr(E1, Check); + _ -> + E + end; + 'seq' -> + A = reduce_expr(cerl:seq_arg(E), Check), + B = reduce_expr(cerl:seq_body(E), Check), + %% `do ' is equivalent to `' if `' is + %% "safe" (cannot effect the behaviour in any way). + case is_safe_expr(A, Check) of + true -> + B; + false -> + case cerl:is_c_seq(B) of + true -> + %% Rewrite `do do ' to `do do + %% ' so that the "body" of the + %% outermost seq-operator is the expression + %% which produces the final result (i.e., + %% E3). This can make other optimizations + %% easier; see `let'. + B1 = cerl:seq_arg(B), + B2 = cerl:seq_body(B), + cerl:c_seq(cerl:c_seq(A, B1), B2); + false -> + cerl:c_seq(A, B) + end + end; + 'let' -> + A = reduce_expr(cerl:let_arg(E), Check), + case cerl:is_c_seq(A) of + true -> + %% `let X = do in Y' is equivalent to `do + %% let X = in Y'. Note that `' cannot + %% be a seq-operator, due to the `seq' optimization. + A1 = cerl:seq_arg(A), + A2 = cerl:seq_body(A), + E1 = cerl:update_c_let(E, cerl:let_vars(E), + A2, cerl:let_body(E)), + cerl:c_seq(A1, reduce_expr(E1, Check)); + false -> + B = reduce_expr(cerl:let_body(E), Check), + Vs = cerl:let_vars(E), + %% We give up if the body does not reduce to a + %% single variable. This is not a generic copy + %% propagation. + case cerl:type(B) of + var when length(Vs) =:= 1 -> + %% We have `let = in ': + [V] = Vs, + N1 = cerl:var_name(V), + N2 = cerl:var_name(B), + if N1 =:= N2 -> + %% `let X = in X' equals `' + A; + true -> + %% `let X = in Y' when X and Y + %% are different variables is + %% equivalent to `do Y'. + reduce_expr(cerl:c_seq(A, B), Check) + end; + literal -> + %% `let X = in T' when T is a literal + %% term is equivalent to `do T'. + reduce_expr(cerl:c_seq(A, B), Check); + _ -> + cerl:update_c_let(E, Vs, A, B) + end + end; + 'try' -> + %% Get rid of unnecessary try-expressions. + A = reduce_expr(cerl:try_arg(E), Check), + B = reduce_expr(cerl:try_body(E), Check), + case is_safe_expr(A, Check) of + true -> + B; + false -> + cerl:update_c_try(E, A, cerl:try_vars(E), B, + cerl:try_evars(E), + cerl:try_handler(E)) + end; + 'catch' -> + %% Just a simpler form of try-expressions. + B = reduce_expr(cerl:catch_body(E), Check), + case is_safe_expr(B, Check) of + true -> + B; + false -> + cerl:update_c_catch(E, B) + end; + _ -> + E + end. diff --git a/lib/hipe/cerl/cerl_messagean.erl b/lib/hipe/cerl/cerl_messagean.erl new file mode 100644 index 0000000000..0753376e7d --- /dev/null +++ b/lib/hipe/cerl/cerl_messagean.erl @@ -0,0 +1,1105 @@ +%% ===================================================================== +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2004-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%% Message analysis of Core Erlang programs. +%% +%% Copyright (C) 2002 Richard Carlsson +%% +%% Author contact: richardc@it.uu.se +%% ===================================================================== + +%% TODO: might need a "top" (`any') element for any-length value lists. + +-module(cerl_messagean). + +-export([annotate/1]). + +-import(cerl, [alias_pat/1, alias_var/1, ann_c_var/2, ann_c_fun/3, + apply_args/1, apply_op/1, atom_val/1, bitstr_size/1, + bitstr_val/1, binary_segments/1, c_letrec/2, + ann_c_tuple/2, c_nil/0, call_args/1, call_module/1, + call_name/1, case_arg/1, case_clauses/1, catch_body/1, + clause_body/1, clause_guard/1, clause_pats/1, cons_hd/1, + cons_tl/1, fun_body/1, fun_vars/1, get_ann/1, int_val/1, + is_c_atom/1, is_c_int/1, let_arg/1, let_body/1, + let_vars/1, letrec_body/1, letrec_defs/1, module_defs/1, + module_defs/1, module_exports/1, pat_vars/1, + primop_args/1, primop_name/1, receive_action/1, + receive_clauses/1, receive_timeout/1, seq_arg/1, + seq_body/1, set_ann/2, try_arg/1, try_body/1, try_vars/1, + try_evars/1, try_handler/1, tuple_es/1, type/1, + values_es/1]). + +-import(cerl_trees, [get_label/1]). + +-define(DEF_LIMIT, 4). + +%% -export([test/1, test1/1, ttest/1]). + +%% ttest(F) -> +%% {T, _} = cerl_trees:label(user_default:read(F)), +%% {Time0, _} = erlang:statistics(runtime), +%% analyze(T), +%% {Time1, _} = erlang:statistics(runtime), +%% Time1 - Time0. + +%% test(F) -> +%% {T, _} = cerl_trees:label(user_default:read(F)), +%% {Time0, _} = erlang:statistics(runtime), +%% {Esc, _Vars} = analyze(T), +%% {Time1, _} = erlang:statistics(runtime), +%% io:fwrite("messages: ~p.\n", [Esc]), +%% Set = sets:from_list(Esc), +%% H = fun (Node, Ctxt, Cont) -> +%% Doc = case get_ann(Node) of +%% [{label, L} | _] -> +%% B = sets:is_element(L, Set), +%% bf(Node, Ctxt, Cont, B); +%% _ -> +%% bf(Node, Ctxt, Cont, false) +%% end, +%% case type(Node) of +%% cons -> color(Doc); +%% tuple -> color(Doc); +%% _ -> Doc +%% end +%% end, +%% {ok, FD} = file:open("out.html",[write]), +%% Txt = cerl_prettypr:format(T, [{hook, H},{user,false}]), +%% io:put_chars(FD, "
\n"),
+%%     io:put_chars(FD, html(Txt)),
+%%     io:put_chars(FD, "
\n"), +%% file:close(FD), +%% {ok, Time1 - Time0}. + +%% test1(F) -> +%% {T, _} = cerl_trees:label(user_default:read(F)), +%% {Time0, _} = erlang:statistics(runtime), +%% {T1, Esc, Vars} = annotate(T), +%% {Time1, _} = erlang:statistics(runtime), +%% io:fwrite("messages: ~p.\n", [Esc]), +%% %%% io:fwrite("vars: ~p.\n", [[X || X <- dict:to_list(Vars)]]), +%% T2 = hhl_transform:transform(T1, Vars), +%% Set = sets:from_list(Esc), +%% H = fun (Node, Ctxt, Cont) -> +%% case get_ann(Node) of +%% [{label, L} | _] -> +%% B = sets:is_element(L, Set), +%% bf(Node, Ctxt, Cont, B); +%% _ -> +%% bf(Node, Ctxt, Cont, false) +%% end +%% end, +%% {ok, FD} = file:open("out.html",[write]), +%% Txt = cerl_prettypr:format(T2, [{hook, H},{user,false}]), +%% io:put_chars(FD, "
\n"),
+%%     io:put_chars(FD, html(Txt)),
+%%     io:put_chars(FD, "
\n"), +%% file:close(FD), +%% {ok, Time1 - Time0}. + +%% html(Cs) -> +%% html(Cs, []). + +%% html([$#, $< | Cs], As) -> +%% html_1(Cs, [$< | As]); +%% html([$< | Cs], As) -> +%% html(Cs, ";tl&" ++ As); +%% html([$> | Cs], As) -> +%% html(Cs, ";tg&" ++ As); +%% html([$& | Cs], As) -> +%% html(Cs, ";pma&" ++ As); +%% html([C | Cs], As) -> +%% html(Cs, [C | As]); +%% html([], As) -> +%% lists:reverse(As). + +%% html_1([$> | Cs], As) -> +%% html(Cs, [$> | As]); +%% html_1([C | Cs], As) -> +%% html_1(Cs, [C | As]). + +%% bf(Node, Ctxt, Cont, B) -> +%% B0 = cerl_prettypr:get_ctxt_user(Ctxt), +%% if B /= B0 -> +%% Ctxt1 = cerl_prettypr:set_ctxt_user(Ctxt, B), +%% Doc = Cont(Node, Ctxt1), +%% case B of +%% true -> +%% Start = "", +%% End = ""; +%% false -> +%% Start = "", +%% End = "" +%% end, +%% markup(Doc, Start, End); +%% true -> +%% Cont(Node, Ctxt) +%% end. + +%% color(Doc) -> +%% % Doc. +%% markup(Doc, "", ""). + +%% markup(Doc, Start, End) -> +%% prettypr:beside( +%% prettypr:null_text([$# | Start]), +%% prettypr:beside(Doc, +%% prettypr:null_text([$# | End]))). + + +%% ===================================================================== +%% annotate(Tree) -> {Tree1, Escapes, Vars} +%% +%% Tree = cerl:cerl() +%% +%% Analyzes `Tree' (see `analyze') and appends a term 'escapes', to +%% the annotation list of each constructor expression node and of +%% `Tree', corresponding to the escape information derived by the +%% analysis. Any previous such annotations are removed from `Tree'. +%% `Tree1' is the modified tree; for details on `OutList', +%% `Outputs' , `Dependencies', `Escapes' and `Parents', see +%% `analyze'. +%% +%% Note: `Tree' must be annotated with labels in order to use this +%% function; see `analyze' for details. + +-type label() :: integer() | 'external' | 'top'. +-type ordset(X) :: [X]. % XXX: TAKE ME OUT + +-spec annotate(cerl:cerl()) -> {cerl:cerl(), ordset(label()), dict()}. + +annotate(Tree) -> + {Esc0, Vars} = analyze(Tree), + Esc = sets:from_list(Esc0), + F = fun (T) -> + case type(T) of + literal -> T; +%%% var -> +%%% L = get_label(T), +%%% T1 = ann_escape(T, L, Esc), +%%% X = dict:fetch(L, Vars), +%%% set_ann(T1, append_ann({s,X}, get_ann(T1))); + _ -> + L = get_label(T), + ann_escape(T, L, Esc) + end + end, + {cerl_trees:map(F, Tree), Esc0, Vars}. + +ann_escape(T, L, Esc) -> + case sets:is_element(L, Esc) of + true -> + set_ann(T, append_ann(escapes, get_ann(T))); + false -> + T + end. + +append_ann(Tag, [X | Xs]) -> + if tuple_size(X) >= 1, element(1, X) =:= Tag -> + append_ann(Tag, Xs); + true -> + [X | append_ann(Tag, Xs)] + end; +append_ann(Tag, []) -> + [Tag]. + + +%% ===================================================================== +%% analyze(Tree) -> Escapes +%% +%% Tree = cerl:cerl() +%% Escapes = ordset(Label) +%% Label = integer() | external | top +%% +%% Analyzes a module or an expression represented by `Tree'. +%% +%% `Escapes' is the set of labels of constructor expressions in +%% `Tree' such that the created values may be accessed from outside +%% `Tree'. +%% +%% Note: `Tree' must be annotated with labels (as done by the +%% function `cerl_trees:label/1') in order to use this function. +%% The label annotation `{label, L}' (where L should be an integer) +%% must be the first element of the annotation list of each node in +%% the tree. Instances of variables bound in `Tree' which denote +%% the same variable must have the same label; apart from this, +%% labels should be unique. Constant literals do not need to be +%% labeled. + +-record(state, {vars, out, dep, work, funs, k}). + +%% Note: We assume that all remote calls and primops return a single +%% value. + +%% The analysis determines which objects (identified by the +%% corresponding "cons-point" labels in the code) are likely to be +%% passed in a message. (If so, we say that they "escape".) It is always +%% safe to assume either case, because the send operation will assure +%% that things are copied if necessary. This analysis tries to +%% anticipate that copying will be done. +%% +%% Rules: +%% 1) An object passed as message argument (or part of such an +%% argument) to a known send-operation, will probably be a message. +%% 2) A received value is always a message (safe). +%% 3) The external function can return any object (unsafe). +%% 4) A function called from the external function can receive any +%% object (unsafe) as argument. +%% 5) Unknown functions/operations can return any object (unsafe). + +%% We wrap the given syntax tree T in a fun-expression labeled `top', +%% which is initially in the set of escaped labels. `top' will be +%% visited at least once. +%% +%% We create a separate function labeled `external', defined as: +%% "'external'/1 = fun () -> Any", which will represent any and all +%% functions outside T, and which returns the 'unsafe' value. + +analyze(Tree) -> + analyze(Tree, ?DEF_LIMIT). + +analyze(Tree, Limit) -> + {_, _, Esc, Dep, _Par} = cerl_closurean:analyze(Tree), +%%% io:fwrite("dependencies: ~w.\n", [dict:to_list(Dep)]), + analyze(Tree, Limit, Dep, Esc). + +analyze(Tree, Limit, Dep0, Esc0) -> + %% Note that we use different name spaces for variable labels and + %% function/call site labels, so we can reuse some names here. We + %% assume that the labeling of Tree only uses integers, not atoms. + Any = ann_c_var([{label, any}], 'Any'), + External = ann_c_var([{label, external}], {external, 1}), + ExtFun = ann_c_fun([{label, external}], [], Any), +%%% io:fwrite("external fun:\n~s.\n", +%%% [cerl_prettypr:format(ExtFun, [noann, {paper, 80}])]), + Top = ann_c_var([{label, top}], {top, 0}), + TopFun = ann_c_fun([{label, top}], [], Tree), + + %% The "start fun" just makes the initialisation easier. It is not + %% itself in the call graph. + StartFun = ann_c_fun([{label, start}], [], + c_letrec([{External, ExtFun}, {Top, TopFun}], + c_nil())), +%%% io:fwrite("start fun:\n~s.\n", +%%% [cerl_prettypr:format(StartFun, [{paper, 80}])]), + + %% Initialise the Any and Escape variables. Gather a database of all + %% fun-expressions in Tree and initialise their outputs and parameter + %% variables. All escaping functions can receive any values as + %% inputs. Bind all module- and letrec-defined variables to their + %% corresponding labels. + Esc = sets:from_list(Esc0), + Unsafe = unsafe(), + Empty = empty(), + Funs0 = dict:new(), + Vars0 = dict:store(escape, empty(), + dict:store(any, Unsafe, dict:new())), + Out0 = dict:new(), + F = fun (T, S = {Fs, Vs, Os}) -> + case type(T) of + 'fun' -> + L = get_label(T), + As = fun_vars(T), + X = case sets:is_element(L, Esc) of + true -> Unsafe; + false -> Empty + end, + {dict:store(L, T, Fs), + bind_vars_single(As, X, Vs), + dict:store(L, none, Os)}; + letrec -> + {Fs, bind_defs(letrec_defs(T), Vs), Os}; + module -> + {Fs, bind_defs(module_defs(T), Vs), Os}; + _ -> + S + end + end, + {Funs, Vars, Out} = cerl_trees:fold(F, {Funs0, Vars0, Out0}, StartFun), + + %% Add the dependency for the loop in 'external': + Dep = add_dep(loop, external, Dep0), + + %% Enter the fixpoint iteration at the StartFun. + St = loop(StartFun, start, #state{vars = Vars, + out = Out, + dep = Dep, + work = init_work(), + funs = Funs, + k = Limit}), + Ms = labels(dict:fetch(escape, St#state.vars)), + {Ms, St#state.vars}. + +loop(T, L, St0) -> +%%% io:fwrite("analyzing: ~w.\n",[L]), +%%% io:fwrite("work: ~w.\n", [St0#state.work]), + Xs0 = dict:fetch(L, St0#state.out), + {Xs1, St1} = visit(fun_body(T), L, St0), + Xs = limit(Xs1, St1#state.k), + {W, M} = case equal(Xs0, Xs) of + true -> + {St1#state.work, St1#state.out}; + false -> +%%% io:fwrite("out (~w) changed: ~w <- ~w.\n", +%%% [L, Xs, Xs0]), + M1 = dict:store(L, Xs, St1#state.out), + case dict:find(L, St1#state.dep) of + {ok, S} -> + {add_work(set__to_list(S), St1#state.work), + M1}; + error -> + {St1#state.work, M1} + end + end, + St2 = St1#state{out = M}, + case take_work(W) of + {ok, L1, W1} -> + T1 = dict:fetch(L1, St2#state.funs), + loop(T1, L1, St2#state{work = W1}); + none -> + St2 + end. + +visit(T, L, St) -> +%%% io:fwrite("visiting: ~w.\n",[type(T)]), + case type(T) of + literal -> + %% This is (or should be) a constant, even if it's compound, + %% so it's bugger all whether it is sent or not. + case cerl:concrete(T) of + [] -> {[empty()], St}; + X when is_atom(X) -> {[empty()], St}; + X when is_integer(X) -> {[empty()], St}; + X when is_float(X) -> {[empty()], St}; + _ -> + exit({not_literal, T}) + end; + var -> + %% If a variable is not already in the store here, it must + %% be free in the program. + L1 = get_label(T), + Vars = St#state.vars, + case dict:find(L1, Vars) of + {ok, X} -> + {[X], St}; + error -> +%%% io:fwrite("free var: ~w.\n",[L1]), + X = unsafe(), + St1 = St#state{vars = dict:store(L1, X, Vars)}, + {[X], St1} + end; + 'fun' -> + %% Must revisit the fun also, because its environment might + %% have changed. (We don't keep track of such dependencies.) + L1 = get_label(T), + St1 = St#state{work = add_work([L1], St#state.work)}, + %% Currently, lambda expressions can only be locally + %% allocated, and therefore we have to force copying by + %% treating them as "unsafe" for now. + {[unsafe()], St1}; + %% {[singleton(L1)], St1}; + values -> + visit_list(values_es(T), L, St); + cons -> + {[X1, X2], St1} = visit_list([cons_hd(T), cons_tl(T)], L, St), + L1 = get_label(T), + X = make_cons(L1, X1, X2), + %% Also store the values of the elements. + Hd = get_hd(X), + Tl = get_tl(X), + St2 = St1#state{vars = dict:store(L1, [Hd, Tl], St1#state.vars)}, + {[X], St2}; + tuple -> + {Xs, St1} = visit_list(tuple_es(T), L, St), + L1 = get_label(T), + %% Also store the values of the elements. + St2 = St1#state{vars = dict:store(L1, Xs, St1#state.vars)}, + {[struct(L1, Xs)], St2}; + 'let' -> + {Xs, St1} = visit(let_arg(T), L, St), + Vars = bind_vars(let_vars(T), Xs, St1#state.vars), + visit(let_body(T), L, St1#state{vars = Vars}); + seq -> + {_, St1} = visit(seq_arg(T), L, St), + visit(seq_body(T), L, St1); + apply -> + {_F, St1} = visit(apply_op(T), L, St), + {As, St2} = visit_list(apply_args(T), L, St1), + L1 = get_label(T), + Ls = get_deps(L1, St#state.dep), + Out = St2#state.out, + Xs1 = join_list([dict:fetch(X, Out) || X <- Ls]), + {Xs1, call_site(Ls, As, St2)}; + call -> + M = call_module(T), + F = call_name(T), + As = call_args(T), + {_, St1} = visit(M, L, St), + {_, St2} = visit(F, L, St1), + {Xs, St3} = visit_list(As, L, St2), + L1 = get_label(T), + remote_call(M, F, Xs, As, L1, St3); + primop -> + As = primop_args(T), + {Xs, St1} = visit_list(As, L, St), + F = atom_val(primop_name(T)), + primop_call(F, length(Xs), Xs, As, St1); + 'case' -> + {Xs, St1} = visit(case_arg(T), L, St), + visit_clauses(Xs, case_clauses(T), L, St1); + 'receive' -> + %% The received value is of course a message, so it + %% is 'empty()', not 'unsafe()'. + X = empty(), + {Xs1, St1} = visit_clauses([X], receive_clauses(T), L, St), + {_, St2} = visit(receive_timeout(T), L, St1), + {Xs2, St3} = visit(receive_action(T), L, St2), + {join(Xs1, Xs2), St3}; + 'try' -> + {Xs1, St1} = visit(try_arg(T), L, St), + X = unsafe(), + Vars = bind_vars(try_vars(T), Xs1, St1#state.vars), + {Xs2, St2} = visit(try_body(T), L, St1#state{vars = Vars}), + EVars = bind_vars(try_evars(T), [X, X, X], St2#state.vars), + {Xs3, St3} = visit(try_handler(T), L, St2#state{vars = EVars}), + {join(Xs2, Xs3), St3}; + 'catch' -> + %% If we catch an exception, we can get unsafe data. + {Xs, St1} = visit(catch_body(T), L, St), + {join([unsafe()], Xs), St1}; + binary -> + %% Binaries are heap objects, but we don't have special + %% shared-heap allocation operators for them at the moment. + %% They must therefore be treated as unsafe. + {_, St1} = visit_list(binary_segments(T), L, St), + {[unsafe()], St1}; + bitstr -> + %% The other fields are constant literals. + {_, St1} = visit(bitstr_val(T), L, St), + {_, St2} = visit(bitstr_size(T), L, St1), + {none, St2}; + letrec -> + %% All the bound funs should be revisited, because the + %% environment might have changed. + Ls = [get_label(F) || {_, F} <- letrec_defs(T)], + St1 = St#state{work = add_work(Ls, St#state.work)}, + visit(letrec_body(T), L, St1); + module -> + %% We regard a module as a tuple of function variables in + %% the body of a `letrec'. + visit(c_letrec(module_defs(T), + ann_c_tuple([{label, get_label(T)}], + module_exports(T))), + L, St) + end. + +visit_clause(T, Xs, L, St) -> + Vars = bind_pats(clause_pats(T), Xs, St#state.vars), + {_, St1} = visit(clause_guard(T), L, St#state{vars = Vars}), + visit(clause_body(T), L, St1). + +%% We assume correct value-list typing. + +visit_list([T | Ts], L, St) -> + {Xs, St1} = visit(T, L, St), + {Xs1, St2} = visit_list(Ts, L, St1), + X = case Xs of + [X1] -> X1; + _ -> empty() + end, + {[X | Xs1], St2}; +visit_list([], _L, St) -> + {[], St}. + +visit_clauses(Xs, [T | Ts], L, St) -> + {Xs1, St1} = visit_clause(T, Xs, L, St), + {Xs2, St2} = visit_clauses(Xs, Ts, L, St1), + {join(Xs1, Xs2), St2}; +visit_clauses(_, [], _L, St) -> + {none, St}. + +bind_defs([{V, F} | Ds], Vars) -> + bind_defs(Ds, dict:store(get_label(V), singleton(get_label(F)), Vars)); +bind_defs([], Vars) -> + Vars. + +bind_pats(Ps, none, Vars) -> + bind_pats_single(Ps, empty(), Vars); +bind_pats(Ps, Xs, Vars) -> + if length(Xs) =:= length(Ps) -> + bind_pats_list(Ps, Xs, Vars); + true -> + bind_pats_single(Ps, empty(), Vars) + end. + +%% The lists might not be of the same length. + +bind_pats_list([P | Ps], [X | Xs], Vars) -> + bind_pats_list(Ps, Xs, bind_pat_vars(P, X, Vars)); +bind_pats_list(Ps, [], Vars) -> + bind_pats_single(Ps, empty(), Vars); +bind_pats_list([], _, Vars) -> + Vars. + +bind_pats_single([P | Ps], X, Vars) -> + bind_pats_single(Ps, X, bind_pat_vars(P, X, Vars)); +bind_pats_single([], _X, Vars) -> + Vars. + +bind_pat_vars(P, X, Vars) -> + case type(P) of + var -> + dict:store(get_label(P), X, Vars); + literal -> + Vars; + cons -> + bind_pats_list([cons_hd(P), cons_tl(P)], + [get_hd(X), get_tl(X)], Vars); + tuple -> + case elements(X) of + none -> + bind_vars_single(pat_vars(P), X, Vars); + Xs -> + bind_pats_list(tuple_es(P), Xs, Vars) + end; + binary -> + %% See the handling of binary-expressions. + bind_pats_single(binary_segments(P), unsafe(), Vars); + bitstr -> + %% See the handling of binary-expressions. + bind_pats_single([bitstr_val(P), bitstr_size(P)], + unsafe(), Vars); + alias -> + P1 = alias_pat(P), + Vars1 = bind_pat_vars(P1, X, Vars), + dict:store(get_label(alias_var(P)), X, Vars1) + end. + +%%% %% This is the "exact" version of list representation, which simply +%%% %% mimics the actual cons, head and tail operations. +%%% make_cons(L, X1, X2) -> +%%% struct(L1, [X1, X2]). +%%% get_hd(X) -> +%%% case elements(X) of +%%% none -> X; +%%% [X1 | _] -> X1; +%%% _ -> empty() +%%% end. +%%% get_tl(X) -> +%%% case elements(X) of +%%% none -> X; +%%% [_, X2 | _] -> X2; +%%% _ -> empty() +%%% end. + +%% This version does not unnecessarily confuse spine labels with element +%% labels, and is safe. However, it loses precision if cons cells are +%% used for other things than proper lists. + +make_cons(L, X1, X2) -> + %% join subtypes and cons locations + join_single(struct(L, [X1]), X2). + +get_hd(X) -> + case elements(X) of + none -> X; + [X1 | _] -> X1; % First element represents list subtype. + _ -> empty() + end. + +get_tl(X) -> X. % Tail of X has same type as X. + +bind_vars(Vs, none, Vars) -> + bind_vars_single(Vs, empty(), Vars); +bind_vars(Vs, Xs, Vars) -> + if length(Vs) =:= length(Xs) -> + bind_vars_list(Vs, Xs, Vars); + true -> + bind_vars_single(Vs, empty(), Vars) + end. + +bind_vars_list([V | Vs], [X | Xs], Vars) -> + bind_vars_list(Vs, Xs, dict:store(get_label(V), X, Vars)); +bind_vars_list([], [], Vars) -> + Vars. + +bind_vars_single([V | Vs], X, Vars) -> + bind_vars_single(Vs, X, dict:store(get_label(V), X, Vars)); +bind_vars_single([], _X, Vars) -> + Vars. + +%% This handles a call site, updating parameter variables with respect +%% to the actual parameters. The 'external' function is handled +%% specially, since it can get an arbitrary number of arguments. For our +%% purposes here, calls to the external function can be ignored. + +call_site(Ls, Xs, St) -> +%%% io:fwrite("call site: ~w -> ~w (~w).\n", [L, Ls, Xs]), + {W, V} = call_site(Ls, Xs, St#state.work, St#state.vars, + St#state.funs, St#state.k), + St#state{work = W, vars = V}. + +call_site([external | Ls], Xs, W, V, Fs, Limit) -> + call_site(Ls, Xs, W, V, Fs, Limit); +call_site([L | Ls], Xs, W, V, Fs, Limit) -> + Vs = fun_vars(dict:fetch(L, Fs)), + case bind_args(Vs, Xs, V, Limit) of + {V1, true} -> + call_site(Ls, Xs, add_work([L], W), V1, Fs, Limit); + {V1, false} -> + call_site(Ls, Xs, W, V1, Fs, Limit) + end; +call_site([], _, W, V, _, _) -> + {W, V}. + +add_dep(Source, Target, Deps) -> + case dict:find(Source, Deps) of + {ok, X} -> + case set__is_member(Target, X) of + true -> + Deps; + false -> +%%% io:fwrite("new dep: ~w <- ~w.\n", [Target, Source]), + dict:store(Source, set__add(Target, X), Deps) + end; + error -> +%%% io:fwrite("new dep: ~w <- ~w.\n", [Target, Source]), + dict:store(Source, set__singleton(Target), Deps) + end. + +%% If the arity does not match the call, nothing is done here. + +bind_args(Vs, Xs, Vars, Limit) -> + if length(Vs) =:= length(Xs) -> + bind_args(Vs, Xs, Vars, Limit, false); + true -> + {Vars, false} + end. + +bind_args([V | Vs], [X | Xs], Vars, Limit, Ch) -> + L = get_label(V), + {Vars1, Ch1} = bind_arg(L, X, Vars, Limit, Ch), + bind_args(Vs, Xs, Vars1, Limit, Ch1); +bind_args([], [], Vars, _Limit, Ch) -> + {Vars, Ch}. + +%% bind_arg(L, X, Vars, Limit) -> +%% bind_arg(L, X, Vars, Limit, false). + +bind_arg(L, X, Vars, Limit, Ch) -> + X0 = dict:fetch(L, Vars), + X1 = limit_single(join_single(X, X0), Limit), + case equal_single(X0, X1) of + true -> + {Vars, Ch}; + false -> +%%% io:fwrite("arg (~w) changed: ~w <- ~w + ~w.\n", +%%% [L, X1, X0, X]), + {dict:store(L, X1, Vars), true} + end. + +%% This handles escapes from things like primops and remote calls. + +escape(Xs, Ns, St) -> + escape(Xs, Ns, 1, St). + +escape([_ | Xs], Ns=[N1 | _], N, St) when is_integer(N1), N1 > N -> + escape(Xs, Ns, N + 1, St); +escape([X | Xs], [N | Ns], N, St) -> + Vars = St#state.vars, + X0 = dict:fetch(escape, Vars), + X1 = join_single(X, X0), + case equal_single(X0, X1) of + true -> + escape(Xs, Ns, N + 1, St); + false -> +%%% io:fwrite("escape changed: ~w <- ~w + ~w.\n", [X1, X0, X]), + Vars1 = dict:store(escape, X1, Vars), + escape(Xs, Ns, N + 1, St#state{vars = Vars1}) + end; +escape(Xs, [_ | Ns], N, St) -> + escape(Xs, Ns, N + 1, St); +escape(_, _, _, St) -> + St. + +%% Handle primop calls: (At present, we assume that all unknown calls +%% yield exactly one value. This might have to be changed.) + +primop_call(F, A, Xs, _As, St0) -> + %% St1 = case is_escape_op(F, A) of + %% [] -> St0; + %% Ns -> escape(Xs, Ns, St0) + %% end, + St1 = St0, + case is_imm_op(F, A) of + true -> + {[empty()], St1}; + false -> + call_unknown(Xs, St1) + end. + +%% Handle remote-calls: (At present, we assume that all unknown calls +%% yield exactly one value. This might have to be changed.) + +remote_call(M, F, Xs, As, L, St) -> + case is_c_atom(M) andalso is_c_atom(F) of + true -> + remote_call_1(atom_val(M), atom_val(F), length(Xs), + Xs, As, L, St); + false -> + %% Unknown function + call_unknown(Xs, St) + end. + +%% When calling an unknown function, we assume that the result does +%% *not* contain any of the constructors in its arguments (but it could +%% return locally allocated data that we don't know about). Note that +%% even a "pure" function can still cons up new data. + +call_unknown(_Xs, St) -> + {[unsafe()], St}. + +%% We need to handle some important standard functions in order to get +%% decent precision. +%% TODO: foldl, map, mapfoldl + +remote_call_1(erlang, hd, 1, [X], _As, _L, St) -> + {[get_hd(X)], St}; +remote_call_1(erlang, tl, 1, [X], _As, _L, St) -> + {[get_tl(X)], St}; +remote_call_1(erlang, element, 2, [_,X], [N|_], _L, St) -> + case elements(X) of + none -> {[X], St}; + Xs -> + case is_c_int(N) of + true -> + N1 = int_val(N), + if is_integer(N1), 1 =< N1, N1 =< length(Xs) -> + {[nth(N1, Xs)], St}; + true -> + {none, St} + end; + false -> + %% Even if we don't know which element is selected, + %% we know that the top level is never part of the + %% returned value. + {[join_single_list(Xs)], St} + end + end; +remote_call_1(erlang, setelement, 3, [_,X, Y], [N|_], L, St) -> + %% The constructor gets the label of the call operation. + case elements(X) of + none -> {[join_single(singleton(L), join_single(X, Y))], St}; + Xs -> + case is_c_int(N) of + true -> + N1 = int_val(N), + if is_integer(N1), 1 =< N1, N1 =< length(Xs) -> + Xs1 = set_nth(N1, Y, Xs), + {[struct(L, Xs1)], St}; + true -> + {none, St} + end; + false -> + %% Even if we don't know which element is selected, + %% we know that the top level is never part of the + %% returned value (a new tuple is always created). + Xs1 = [join_single(Y, X1) || X1 <- Xs], + {[struct(L, Xs1)], St} + end + end; +remote_call_1(erlang, '++', 2, [X1,X2], _As, _L, St) -> + %% Note: this is unsafe for non-proper lists! (See make_cons/3). + %% No safe version is implemented. + {[join_single(X1, X2)], St}; +remote_call_1(erlang, '--', 2, [X1,_X2], _As, _L, St) -> + {[X1], St}; +remote_call_1(lists, append, 2, Xs, As, L, St) -> + remote_call_1(erlang, '++', 2, Xs, As, L, St); +remote_call_1(lists, subtract, 2, Xs, As, L, St) -> + remote_call_1(erlang, '--', 2, Xs, As, L, St); +remote_call_1(M, F, A, Xs, _As, _L, St0) -> + St1 = case is_escape_op(M, F, A) of + [] -> St0; + Ns -> escape(Xs, Ns, St0) + end, + case is_imm_op(M, F, A) of + true -> + {[empty()], St1}; + false -> + call_unknown(Xs, St1) + end. + +%% 1-based n:th-element list selector and update function. + +nth(1, [X | _Xs]) -> X; +nth(N, [_X | Xs]) when N > 1 -> nth(N - 1, Xs). + +set_nth(1, Y, [_X | Xs]) -> [Y | Xs]; +set_nth(N, Y, [X | Xs]) when N > 1 -> [X | set_nth(N - 1, Y, Xs)]. + +%% Domain: none | [V], where V = {S, none} | {S, [V]}, S = set(integer()). + +join(none, Xs2) -> Xs2; +join(Xs1, none) -> Xs1; +join(Xs1, Xs2) -> + if length(Xs1) =:= length(Xs2) -> + join_1(Xs1, Xs2); + true -> + none + end. + +join_1([X1 | Xs1], [X2 | Xs2]) -> + [join_single(X1, X2) | join_1(Xs1, Xs2)]; +join_1([], []) -> + []. + +join_list([Xs | Xss]) -> + join(Xs, join_list(Xss)); +join_list([]) -> + none. + +empty() -> {set__new(), []}. + +singleton(X) -> {set__singleton(X), []}. + +struct(X, Xs) -> {set__singleton(X), Xs}. + +elements({_, Xs}) -> Xs. + +unsafe() -> {set__singleton(unsafe), none}. + +equal(none, none) -> true; +equal(none, _) -> false; +equal(_, none) -> false; +equal(X1, X2) -> equal_1(X1, X2). + +equal_1([X1 | Xs1], [X2 | Xs2]) -> + equal_single(X1, X2) andalso equal_1(Xs1, Xs2); +equal_1([], []) -> true; +equal_1(_, _) -> false. + +equal_single({S1, none}, {S2, none}) -> + set__equal(S1, S2); +equal_single({_, none}, _) -> + false; +equal_single(_, {_, none}) -> + false; +equal_single({S1, Vs1}, {S2, Vs2}) -> + set__equal(S1, S2) andalso equal_single_lists(Vs1, Vs2). + +equal_single_lists([X1 | Xs1], [X2 | Xs2]) -> + equal_single(X1, X2) andalso equal_single_lists(Xs1, Xs2); +equal_single_lists([], []) -> + true; +equal_single_lists(_, _) -> + false. + +join_single({S, none}, V) -> + {set__union(S, labels(V)), none}; +join_single(V, {S, none}) -> + {set__union(S, labels(V)), none}; +join_single({S1, Vs1}, {S2, Vs2}) -> + {set__union(S1, S2), join_single_lists(Vs1, Vs2)}. + +join_single_list([V | Vs]) -> + join_single(V, join_single_list(Vs)); +join_single_list([]) -> + empty(). + +%% If one list has more elements that the other, and N is the length of +%% the longer list, then the result has N elements. + +join_single_lists([V1], [V2]) -> + [join_single(V1, V2)]; +join_single_lists([V1 | Vs1], [V2 | Vs2]) -> + [join_single(V1, V2) | join_single_lists(Vs1, Vs2)]; +join_single_lists([], Vs) -> Vs; +join_single_lists(Vs, []) -> Vs. + +collapse(V) -> + {labels(V), none}. + +%% collapse_list([]) -> +%% empty(); +%% collapse_list(Vs) -> +%% {labels_list(Vs), none}. + +labels({S, none}) -> S; +labels({S, []}) -> S; +labels({S, Vs}) -> set__union(S, labels_list(Vs)). + +labels_list([V]) -> + labels(V); +labels_list([V | Vs]) -> + set__union(labels(V), labels_list(Vs)). + +limit(none, _K) -> none; +limit(X, K) -> limit_list(X, K). + +limit_list([X | Xs], K) -> + [limit_single(X, K) | limit_list(Xs, K)]; +limit_list([], _) -> + []. + +limit_single({_, none} = V, _K) -> + V; +limit_single({_, []} = V, _K) -> + V; +limit_single({S, Vs}, K) when K > 0 -> + {S, limit_list(Vs, K - 1)}; +limit_single(V, _K) -> + collapse(V). + +%% Set abstraction for label sets in the domain. + +%% set__is_empty([]) -> true; +%% set__is_empty(_) -> false. + +set__new() -> []. + +set__singleton(X) -> [X]. + +set__to_list(S) -> S. + +%% set__from_list(S) -> ordsets:from_list(S). + +set__union(X, Y) -> ordsets:union(X, Y). + +set__add(X, S) -> ordsets:add_element(X, S). + +set__is_member(X, S) -> ordsets:is_element(X, S). + +%% set__subtract(X, Y) -> ordsets:subtract(X, Y). + +set__equal(X, Y) -> X =:= Y. + +%% A simple but efficient functional queue. + +queue__new() -> {[], []}. + +queue__put(X, {In, Out}) -> {[X | In], Out}. + +queue__get({In, [X | Out]}) -> {ok, X, {In, Out}}; +queue__get({[], _}) -> empty; +queue__get({In, _}) -> + [X | In1] = lists:reverse(In), + {ok, X, {[], In1}}. + +%% The work list - a queue without repeated elements. + +init_work() -> + {queue__new(), sets:new()}. + +add_work(Ls, {Q, Set}) -> + add_work(Ls, Q, Set). + +%% Note that the elements are enqueued in order. + +add_work([L | Ls], Q, Set) -> + case sets:is_element(L, Set) of + true -> + add_work(Ls, Q, Set); + false -> + add_work(Ls, queue__put(L, Q), sets:add_element(L, Set)) + end; +add_work([], Q, Set) -> + {Q, Set}. + +take_work({Queue0, Set0}) -> + case queue__get(Queue0) of + {ok, L, Queue1} -> + Set1 = sets:del_element(L, Set0), + {ok, L, {Queue1, Set1}}; + empty -> + none + end. + +get_deps(L, Dep) -> + case dict:find(L, Dep) of + {ok, Ls} -> Ls; + error -> [] + end. + +%% Escape operators may let their arguments escape. For this analysis, +%% only send-operations are considered as causing escapement, and only +%% in specific arguments. + +%% is_escape_op(_F, _A) -> []. + +-spec is_escape_op(module(), atom(), arity()) -> [arity()]. + +is_escape_op(erlang, '!', 2) -> [2]; +is_escape_op(erlang, send, 2) -> [2]; +is_escape_op(erlang, spawn, 1) -> [1]; +is_escape_op(erlang, spawn, 3) -> [3]; +is_escape_op(erlang, spawn, 4) -> [4]; +is_escape_op(erlang, spawn_link, 3) -> [3]; +is_escape_op(erlang, spawn_link, 4) -> [4]; +is_escape_op(_M, _F, _A) -> []. + +%% "Immediate" operators will never return heap allocated data. This is +%% of course true for operators that never return, like 'exit/1'. (Note +%% that floats are always heap allocated objects, and that most integer +%% arithmetic can return a bignum on the heap.) + +-spec is_imm_op(atom(), arity()) -> boolean(). + +is_imm_op(match_fail, 1) -> true; +is_imm_op(_, _) -> false. + +-spec is_imm_op(module(), atom(), arity()) -> boolean(). + +is_imm_op(erlang, self, 0) -> true; +is_imm_op(erlang, '=:=', 2) -> true; +is_imm_op(erlang, '==', 2) -> true; +is_imm_op(erlang, '=/=', 2) -> true; +is_imm_op(erlang, '/=', 2) -> true; +is_imm_op(erlang, '<', 2) -> true; +is_imm_op(erlang, '=<', 2) -> true; +is_imm_op(erlang, '>', 2) -> true; +is_imm_op(erlang, '>=', 2) -> true; +is_imm_op(erlang, 'and', 2) -> true; +is_imm_op(erlang, 'or', 2) -> true; +is_imm_op(erlang, 'xor', 2) -> true; +is_imm_op(erlang, 'not', 1) -> true; +is_imm_op(erlang, is_alive, 0) -> true; +is_imm_op(erlang, is_atom, 1) -> true; +is_imm_op(erlang, is_binary, 1) -> true; +is_imm_op(erlang, is_builtin, 3) -> true; +is_imm_op(erlang, is_constant, 1) -> true; +is_imm_op(erlang, is_float, 1) -> true; +is_imm_op(erlang, is_function, 1) -> true; +is_imm_op(erlang, is_integer, 1) -> true; +is_imm_op(erlang, is_list, 1) -> true; +is_imm_op(erlang, is_number, 1) -> true; +is_imm_op(erlang, is_pid, 1) -> true; +is_imm_op(erlang, is_port, 1) -> true; +is_imm_op(erlang, is_process_alive, 1) -> true; +is_imm_op(erlang, is_reference, 1) -> true; +is_imm_op(erlang, is_tuple, 1) -> true; +is_imm_op(erlang, length, 1) -> true; % never a bignum +is_imm_op(erlang, list_to_atom, 1) -> true; +is_imm_op(erlang, node, 0) -> true; +is_imm_op(erlang, node, 1) -> true; +is_imm_op(erlang, throw, 1) -> true; +is_imm_op(erlang, exit, 1) -> true; +is_imm_op(erlang, error, 1) -> true; +is_imm_op(erlang, error, 2) -> true; +is_imm_op(_, _, _) -> false. diff --git a/lib/hipe/cerl/cerl_pmatch.erl b/lib/hipe/cerl/cerl_pmatch.erl new file mode 100644 index 0000000000..3bc93e80dd --- /dev/null +++ b/lib/hipe/cerl/cerl_pmatch.erl @@ -0,0 +1,624 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2003-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%% @author Richard Carlsson +%% @copyright 2000-2006 Richard Carlsson +%% +%% @doc Core Erlang pattern matching compiler. +%% +%%

For reference, see Simon L. Peyton Jones "The Implementation of +%% Functional Programming Languages", chapter 5 (by Phil Wadler).

+%% +%% @type cerl() = cerl:cerl(). +%% Abstract Core Erlang syntax trees. +%% @type cerl_records() = cerl:cerl_records(). +%% An explicit record representation of Core Erlang syntax trees. + +-module(cerl_pmatch). + +-define(NO_UNUSED, true). + +-export([clauses/2]). +-ifndef(NO_UNUSED). +-export([transform/2, core_transform/2, expr/2]). +-endif. + +-import(lists, [all/2, splitwith/2, foldr/3, keysort/2, foldl/3, + mapfoldl/3]). + +-define(binary_id, {binary}). +-define(cons_id, {cons}). +-define(tuple_id, {tuple}). +-define(literal_id(V), V). + + +%% @spec core_transform(Module::cerl_records(), Options::[term()]) -> +%% cerl_records() +%% +%% @doc Transforms a module represented by records. See +%% transform/2 for details. +%% +%%

Use the compiler option {core_transform, cerl_pmatch} +%% to insert this function as a compilation pass.

+%% +%% @see transform/2 + +-ifndef(NO_UNUSED). +core_transform(M, Opts) -> + cerl:to_records(transform(cerl:from_records(M), Opts)). +-endif. % NO_UNUSED +%% @clear + + +%% @spec transform(Module::cerl(), Options::[term()]) -> cerl() +%% +%% @doc Rewrites all case-clauses in Module. +%% receive-clauses are not affected. Currently, no options +%% are available. +%% +%% @see clauses/2 +%% @see expr/2 +%% @see core_transform/2 + +-ifndef(NO_UNUSED). +transform(M, _Opts) -> + expr(M, env__empty()). +-endif. % NO_UNUSED +%% @clear + + +%% @spec clauses(Clauses::[Clause], Env) -> {Expr, Vars} +%% Clause = cerl() +%% Expr = cerl() +%% Vars = [cerl()] +%% Env = rec_env:environment() +%% +%% @doc Rewrites a sequence of clauses to an equivalent expression, +%% removing as much repeated testing as possible. Returns a pair +%% {Expr, Vars}, where Expr is the resulting +%% expression, and Vars is a list of new variables (i.e., +%% not already in the given environment) to be bound to the arguments to +%% the switch. The following is a typical example (assuming +%% E is a Core Erlang case expression): +%%
+%%   handle_case(E, Env) ->
+%%       Cs = case_clauses(E),
+%%       {E1, Vs} = cerl_pmatch(Cs, Env),
+%%       c_let(Vs, case_arg(E), E1).
+%% 
+%% +%%

The environment is used for generating new variables which do not +%% shadow existing bindings.

+%% +%% @see rec_env +%% @see expr/2 +%% @see transform/2 + +-spec clauses([cerl:cerl()], rec_env:environment()) -> + {cerl:cerl(), [cerl:cerl()]}. + +clauses(Cs, Env) -> + clauses(Cs, none, Env). + +clauses([C | _] = Cs, Else, Env) -> + Vs = new_vars(cerl:clause_arity(C), Env), + E = match(Vs, Cs, Else, add_vars(Vs, Env)), + {E, Vs}. + +%% The implementation very closely follows that described in the book. + +match([], Cs, Else, _Env) -> + %% If the "default action" is the atom 'none', it is simply not + %% added; otherwise it is put in the body of a final catch-all + %% clause (which is often removed by the below optimization). + Cs1 = if Else =:= none -> Cs; + true -> Cs ++ [cerl:c_clause([], Else)] + end, + %% This clause reduction is an important optimization. It selects a + %% clause body if possible, and otherwise just removes dead clauses. + case cerl_clauses:reduce(Cs1) of + {true, {C, []}} -> % if we get bindings, something is wrong! + cerl:clause_body(C); + {false, Cs2} -> + %% This happens when guards are nontrivial. + cerl:c_case(cerl:c_values([]), Cs2) + end; +match([V | _] = Vs, Cs, Else, Env) -> + foldr(fun (CsF, ElseF) -> + match_var_con(Vs, CsF, ElseF, Env) + end, + Else, + group([unalias(C, V) || C <- Cs], fun is_var_clause/1)). + +group([], _F) -> + []; +group([X | _] = Xs, F) -> + group(Xs, F, F(X)). + +group(Xs, F, P) -> + {First, Rest} = splitwith(fun (X) -> F(X) =:= P end, Xs), + [First | group(Rest, F)]. + +is_var_clause(C) -> + cerl:is_c_var(hd(cerl:clause_pats(C))). + +%% To avoid code duplication, if the 'Else' expression is too big, we +%% put it in a local function definition instead, and replace it with a +%% call. (Note that it is important that 'is_lightweight' does not yield +%% 'true' for a simple function application, or we will create a lot of +%% unnecessary extra functions.) + +match_var_con(Vs, Cs, none = Else, Env) -> + match_var_con_1(Vs, Cs, Else, Env); +match_var_con(Vs, Cs, Else, Env) -> + case is_lightweight(Else) of + true -> + match_var_con_1(Vs, Cs, Else, Env); + false -> + F = new_fvar("match_", 0, Env), + Else1 = cerl:c_apply(F, []), + Env1 = add_vars([F], Env), + cerl:c_letrec([{F, cerl:c_fun([], Else)}], + match_var_con_1(Vs, Cs, Else1, Env1)) + end. + +match_var_con_1(Vs, Cs, Else, Env) -> + case is_var_clause(hd(Cs)) of + true -> + match_var(Vs, Cs, Else, Env); + false -> + match_con(Vs, Cs, Else, Env) + end. + +match_var([V | Vs], Cs, Else, Env) -> + Cs1 = [begin + [P | Ps] = cerl:clause_pats(C), + G = make_let([P], V, cerl:clause_guard(C)), + B = make_let([P], V, cerl:clause_body(C)), + cerl:update_c_clause(C, Ps, G, B) + end + || C <- Cs], + match(Vs, Cs1, Else, Env). + +%% Since Erlang is dynamically typed, we must include the possibility +%% that none of the constructors in the group will match, and in that +%% case the "Else" code will be executed (unless it is 'none'), in the +%% body of a final catch-all clause. + +match_con([V | Vs], Cs, Else, Env) -> + case group_con(Cs) of + [{_, _, Gs}] -> + %% Don't create a group type switch if there is only one + %% such group + make_switch(V, [match_congroup(DG, Vs, CsG, Else, Env) + || {DG, _, CsG} <- Gs], + Else, Env); + Ts -> + Cs1 = [match_typegroup(T, V, Vs, Gs, Else, Env) + || {T, _, Gs} <- Ts], + make_switch(V, Cs1, Else, Env) + end. + + +match_typegroup(_T, _V, Vs, [{D, _, Cs}], Else, Env) when element(1, D) /= ?binary_id -> + %% Don't create a group type switch if there is only one constructor + %% in the group. (Note that this always happens for '[]'.) + %% Special case for binaries which always get a group switch + match_congroup(D, Vs, Cs, Else, Env); +match_typegroup(T, V, Vs, Gs, Else, Env) -> + Body = make_switch(V, [match_congroup(D, Vs, Cs, Else, Env) + || {D, _, Cs} <- Gs], + Else, Env), + typetest_clause(T, V, Body, Env). + +match_congroup({?binary_id, Segs}, Vs, Cs, _Else, Env) -> + Ref = get_unique(), + Guard = cerl:c_primop(cerl:c_atom(set_label), [cerl:c_int(Ref)]), + NewElse = cerl:c_primop(cerl:c_atom(goto_label), [cerl:c_int(Ref)]), + Body = match(Vs, Cs, NewElse, Env), + cerl:c_clause([make_pat(?binary_id, Segs)], Guard, Body); + +match_congroup({D, A}, Vs, Cs, Else, Env) -> + Vs1 = new_vars(A, Env), + Body = match(Vs1 ++ Vs, Cs, Else, add_vars(Vs1, Env)), + cerl:c_clause([make_pat(D, Vs1)], Body). + +make_switch(V, Cs, Else, Env) -> + cerl:c_case(V, if Else =:= none -> Cs; + true -> Cs ++ [cerl:c_clause([new_var(Env)], + Else)] + end). + +%% We preserve the relative order of different-type constructors as they +%% were originally listed. This is done by tracking the clause numbers. + +group_con(Cs) -> + {Cs1, _} = mapfoldl(fun (C, N) -> + [P | Ps] = cerl:clause_pats(C), + Ps1 = sub_pats(P) ++ Ps, + G = cerl:clause_guard(C), + B = cerl:clause_body(C), + C1 = cerl:update_c_clause(C, Ps1, G, B), + D = con_desc(P), + {{D, N, C1}, N + 1} + end, + 0, Cs), + %% Sort and group constructors. + Css = group(keysort(1, Cs1), fun ({D,_,_}) -> D end), + %% Sort each group "back" by line number, and move the descriptor + %% and line number to the wrapper for the group. + Gs = [finalize_congroup(C) || C <- Css], + %% Group by type only (put e.g. different-arity tuples together). + Gss = group(Gs, fun ({D,_,_}) -> con_desc_type(D) end), + %% Sort and wrap the type groups. + Ts = [finalize_typegroup(G) || G <- Gss], + %% Sort type-groups by first clause order + keysort(2, Ts). + +finalize_congroup(Cs) -> + [{D,N,_}|_] = Cs1 = keysort(2, Cs), + {D, N, [C || {_,_,C} <- Cs1]}. + +finalize_typegroup(Gs) -> + [{D,N,_}|_] = Gs1 = keysort(2, Gs), + {con_desc_type(D), N, Gs1}. + +%% Since Erlang clause patterns can contain "alias patterns", we must +%% eliminate these, by turning them into let-definitions in the guards +%% and bodies of the clauses. + +unalias(C, V) -> + [P | Ps] = cerl:clause_pats(C), + B = cerl:clause_body(C), + G = cerl:clause_guard(C), + unalias(P, V, Ps, B, G, C). + +unalias(P, V, Ps, B, G, C) -> + case cerl:type(P) of + alias -> + V1 = cerl:alias_var(P), + B1 = make_let([V1], V, B), + G1 = make_let([V1], V, G), + unalias(cerl:alias_pat(P), V, Ps, B1, G1, C); + _ -> + cerl:update_c_clause(C, [P | Ps], G, B) + end. + +%% Generating a type-switch clause + +typetest_clause([], _V, E, _Env) -> + cerl:c_clause([cerl:c_nil()], E); +typetest_clause(atom, V, E, _Env) -> + typetest_clause_1(is_atom, V, E); +typetest_clause(integer, V, E, _Env) -> + typetest_clause_1(is_integer, V, E); +typetest_clause(float, V, E, _Env) -> + typetest_clause_1(is_float, V, E); +typetest_clause(cons, _V, E, Env) -> + [V1, V2] = new_vars(2, Env), + cerl:c_clause([cerl:c_cons(V1, V2)], E); % there is no 'is cons' +typetest_clause(tuple, V, E, _Env) -> + typetest_clause_1(is_tuple, V, E); +typetest_clause(binary, V, E, _Env) -> + typetest_clause_1(is_binary, V, E). + +typetest_clause_1(T, V, E) -> + cerl:c_clause([V], cerl:c_call(cerl:c_atom('erlang'), + cerl:c_atom(T), [V]), E). + +%% This returns a constructor descriptor, to be used for grouping and +%% pattern generation. It consists of an identifier term and the arity. + +con_desc(E) -> + case cerl:type(E) of + cons -> {?cons_id, 2}; + tuple -> {?tuple_id, cerl:tuple_arity(E)}; + binary -> {?binary_id, cerl:binary_segments(E)}; + literal -> + case cerl:concrete(E) of + [_|_] -> {?cons_id, 2}; + T when is_tuple(T) -> {?tuple_id, tuple_size(T)}; + V -> {?literal_id(V), 0} + end; + _ -> + throw({bad_constructor, E}) + end. + +%% This returns the type class for a constructor descriptor, for +%% grouping of clauses. It does not distinguish between tuples of +%% different arity, nor between different values of atoms, integers and +%% floats. + +con_desc_type({?literal_id([]), _}) -> []; +con_desc_type({?literal_id(V), _}) when is_atom(V) -> atom; +con_desc_type({?literal_id(V), _}) when is_integer(V) -> integer; +con_desc_type({?literal_id(V), _}) when is_float(V) -> float; +con_desc_type({?cons_id, 2}) -> cons; +con_desc_type({?tuple_id, _}) -> tuple; +con_desc_type({?binary_id, _}) -> binary. + +%% This creates a new constructor pattern from a type descriptor and a +%% list of variables. + +make_pat(?cons_id, [V1, V2]) -> cerl:c_cons(V1, V2); +make_pat(?tuple_id, Vs) -> cerl:c_tuple(Vs); +make_pat(?binary_id, Segs) -> cerl:c_binary(Segs); +make_pat(?literal_id(Val), []) -> cerl:abstract(Val). + +%% This returns the list of subpatterns of a constructor pattern. + +sub_pats(E) -> + case cerl:type(E) of + cons -> + [cerl:cons_hd(E), cerl:cons_tl(E)]; + tuple -> + cerl:tuple_es(E); + binary -> + []; + literal -> + case cerl:concrete(E) of + [H|T] -> [cerl:abstract(H), cerl:abstract(T)]; + T when is_tuple(T) -> [cerl:abstract(X) + || X <- tuple_to_list(T)]; + _ -> [] + end; + _ -> + throw({bad_constructor_pattern, E}) + end. + +%% This avoids generating stupid things like "let X = ... in 'true'", +%% and "let X = Y in X", keeping the generated code cleaner. It also +%% prevents expressions from being considered "non-lightweight" when +%% code duplication is disallowed (see is_lightweight for details). + +make_let(Vs, A, B) -> + cerl_lib:reduce_expr(cerl:c_let(Vs, A, B)). + +%% --------------------------------------------------------------------- +%% Rewriting a module or other expression: + +%% @spec expr(Expression::cerl(), Env) -> cerl() +%% Env = rec_env:environment() +%% +%% @doc Rewrites all case-clauses in +%% Expression. receive-clauses are not +%% affected. +%% +%%

The environment is used for generating new variables which do not +%% shadow existing bindings.

+%% +%% @see clauses/2 +%% @see rec_env + +-ifndef(NO_UNUSED). +expr(E, Env) -> + case cerl:type(E) of + literal -> + E; + var -> + E; + values -> + Es = expr_list(cerl:values_es(E), Env), + cerl:update_c_values(E, Es); + cons -> + H = expr(cerl:cons_hd(E), Env), + T = expr(cerl:cons_tl(E), Env), + cerl:update_c_cons(E, H, T); + tuple -> + Es = expr_list(cerl:tuple_es(E), Env), + cerl:update_c_tuple(E, Es); + 'let' -> + A = expr(cerl:let_arg(E), Env), + Vs = cerl:let_vars(E), + Env1 = add_vars(Vs, Env), + B = expr(cerl:let_body(E), Env1), + cerl:update_c_let(E, Vs, A, B); + seq -> + A = expr(cerl:seq_arg(E), Env), + B = expr(cerl:seq_body(E), Env), + cerl:update_c_seq(E, A, B); + apply -> + Op = expr(cerl:apply_op(E), Env), + As = expr_list(cerl:apply_args(E), Env), + cerl:update_c_apply(E, Op, As); + call -> + M = expr(cerl:call_module(E), Env), + N = expr(cerl:call_name(E), Env), + As = expr_list(cerl:call_args(E), Env), + cerl:update_c_call(E, M, N, As); + primop -> + As = expr_list(cerl:primop_args(E), Env), + cerl:update_c_primop(E, cerl:primop_name(E), As); + 'case' -> + A = expr(cerl:case_arg(E), Env), + Cs = expr_list(cerl:case_clauses(E), Env), + {E1, Vs} = clauses(Cs, Env), + make_let(Vs, A, E1); + clause -> + Vs = cerl:clause_vars(E), + Env1 = add_vars(Vs, Env), + G = expr(cerl:clause_guard(E), Env1), + B = expr(cerl:clause_body(E), Env1), + cerl:update_c_clause(E, cerl:clause_pats(E), G, B); + 'fun' -> + Vs = cerl:fun_vars(E), + Env1 = add_vars(Vs, Env), + B = expr(cerl:fun_body(E), Env1), + cerl:update_c_fun(E, Vs, B); + 'receive' -> + %% NOTE: No pattern matching compilation is done here! The + %% receive-clauses and patterns cannot be staged as long as + %% we are working with "normal" Core Erlang. + Cs = expr_list(cerl:receive_clauses(E), Env), + T = expr(cerl:receive_timeout(E), Env), + A = expr(cerl:receive_action(E), Env), + cerl:update_c_receive(E, Cs, T, A); + 'try' -> + A = expr(cerl:try_arg(E), Env), + Vs = cerl:try_vars(E), + B = expr(cerl:try_body(E), add_vars(Vs, Env)), + Evs = cerl:try_evars(E), + H = expr(cerl:try_handler(E), add_vars(Evs, Env)), + cerl:update_c_try(E, A, Vs, B, Evs, H); + 'catch' -> + B = expr(cerl:catch_body(E), Env), + cerl:update_c_catch(E, B); + letrec -> + Ds = cerl:letrec_defs(E), + Env1 = add_defs(Ds, Env), + Ds1 = defs(Ds, Env1), + B = expr(cerl:letrec_body(E), Env1), + cerl:update_c_letrec(E, Ds1, B); + module -> + Ds = cerl:module_defs(E), + Env1 = add_defs(Ds, Env), + Ds1 = defs(Ds, Env1), + cerl:update_c_module(E, cerl:module_name(E), + cerl:module_exports(E), + cerl:module_attrs(E), Ds1) + end. + +expr_list(Es, Env) -> + [expr(E, Env) || E <- Es]. + +defs(Ds, Env) -> + [{V, expr(F, Env)} || {V, F} <- Ds]. +-endif. % NO_UNUSED +%% @clear + +%% --------------------------------------------------------------------- +%% Support functions + +new_var(Env) -> + Name = env__new_vname(Env), + cerl:c_var(Name). + +new_vars(N, Env) -> + [cerl:c_var(V) || V <- env__new_vnames(N, Env)]. + +new_fvar(A, N, Env) -> + Name = env__new_fname(A, N, Env), + cerl:c_var(Name). + +add_vars(Vs, Env) -> + foldl(fun (V, E) -> env__bind(cerl:var_name(V), [], E) end, Env, Vs). + +-ifndef(NO_UNUSED). +add_defs(Ds, Env) -> + foldl(fun ({V, _F}, E) -> + env__bind(cerl:var_name(V), [], E) + end, Env, Ds). +-endif. % NO_UNUSED + +%% This decides whether an expression is worth lifting out to a separate +%% function instead of duplicating the code. In other words, whether its +%% cost is about the same or smaller than that of a local function call. +%% Note that variables must always be "lightweight"; otherwise, they may +%% get lifted out of the case switch that introduces them. + +is_lightweight(E) -> + case get('cerl_pmatch_duplicate_code') of + never -> cerl:type(E) =:= var; % Avoids all code duplication + always -> true; % Does not lift code to new functions + _ -> is_lightweight_1(E) + end. + +is_lightweight_1(E) -> + case cerl:type(E) of + var -> true; + literal -> true; + 'fun' -> true; + values -> all(fun is_simple/1, cerl:values_es(E)); + cons -> is_simple(cerl:cons_hd(E)) + andalso is_simple(cerl:cons_tl(E)); + tuple -> all(fun is_simple/1, cerl:tuple_es(E)); + 'let' -> (is_simple(cerl:let_arg(E)) andalso + is_lightweight_1(cerl:let_body(E))); + seq -> (is_simple(cerl:seq_arg(E)) andalso + is_lightweight_1(cerl:seq_body(E))); + primop -> + all(fun is_simple/1, cerl:primop_args(E)); + apply -> + is_simple(cerl:apply_op(E)) + andalso all(fun is_simple/1, cerl:apply_args(E)); + call -> + is_simple(cerl:call_module(E)) + andalso is_simple(cerl:call_name(E)) + andalso all(fun is_simple/1, cerl:call_args(E)); + _ -> + %% The default is to lift the code to a new function. + false + end. + +%% "Simple" things have no (or negligible) runtime cost and are free +%% from side effects. + +is_simple(E) -> + case cerl:type(E) of + var -> true; + literal -> true; + values -> all(fun is_simple/1, cerl:values_es(E)); + _ -> false + end. + + +get_unique() -> + case get(unique_label) of + undefined -> + put(unique_label, 1), + 0; + N -> + put(unique_label, N+1), + N + end. + +%% --------------------------------------------------------------------- +%% Abstract datatype: environment() + +env__bind(Key, Val, Env) -> + rec_env:bind(Key, Val, Env). + +-ifndef(NO_UNUSED). +%% env__bind_recursive(Ks, Vs, F, Env) -> +%% rec_env:bind_recursive(Ks, Vs, F, Env). + +%% env__lookup(Key, Env) -> +%% rec_env:lookup(Key, Env). + +%% env__get(Key, Env) -> +%% rec_env:get(Key, Env). + +%% env__is_defined(Key, Env) -> +%% rec_env:is_defined(Key, Env). + +env__empty() -> + rec_env:empty(). +-endif. % NO_UNUSED + +env__new_vname(Env) -> + rec_env:new_key(Env). + +env__new_vnames(N, Env) -> + rec_env:new_keys(N, Env). + +env__new_fname(F, A, Env) -> + rec_env:new_key(fun (X) -> + S = integer_to_list(X), + {list_to_atom(F ++ S), A} + end, + Env). diff --git a/lib/hipe/cerl/cerl_prettypr.erl b/lib/hipe/cerl/cerl_prettypr.erl new file mode 100644 index 0000000000..fba9a48cda --- /dev/null +++ b/lib/hipe/cerl/cerl_prettypr.erl @@ -0,0 +1,883 @@ +%% ===================================================================== +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2004-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%% Core Erlang prettyprinter, using the 'prettypr' module. +%% +%% Copyright (C) 1999-2002 Richard Carlsson +%% +%% Author contact: richardc@it.uu.se +%% ===================================================================== +%% +%% @doc Core Erlang prettyprinter. +%% +%%

This module is a front end to the pretty-printing library module +%% prettypr, for text formatting of Core Erlang abstract +%% syntax trees defined by the module cerl.

+ +%% TODO: add printing of comments for `comment'-annotations? + +-module(cerl_prettypr). + +-define(NO_UNUSED, true). + +-export([format/1, format/2, annotate/3]). +-ifndef(NO_UNUSED). +-export([best/1, best/2, layout/1, layout/2, get_ctxt_paperwidth/1, + set_ctxt_paperwidth/2, get_ctxt_linewidth/1, + set_ctxt_linewidth/2, get_ctxt_hook/1, set_ctxt_hook/2, + get_ctxt_user/1, set_ctxt_user/2]). +-endif. + +-import(prettypr, [text/1, nest/2, above/2, beside/2, sep/1, par/1, + par/2, follow/3, follow/2, floating/1, empty/0]). + +-import(cerl, [abstract/1, alias_pat/1, alias_var/1, apply_args/1, + apply_op/1, atom_lit/1, binary_segments/1, bitstr_val/1, + bitstr_size/1, bitstr_unit/1, bitstr_type/1, + bitstr_flags/1, call_args/1, call_module/1, call_name/1, + case_arg/1, case_clauses/1, catch_body/1, c_atom/1, + c_binary/1, c_bitstr/5, c_int/1, clause_body/1, + clause_guard/1, clause_pats/1, concrete/1, cons_hd/1, + cons_tl/1, float_lit/1, fun_body/1, fun_vars/1, + get_ann/1, int_lit/1, is_c_cons/1, is_c_let/1, + is_c_nil/1, is_c_seq/1, is_print_string/1, let_arg/1, + let_body/1, let_vars/1, letrec_body/1, letrec_defs/1, + module_attrs/1, module_defs/1, module_exports/1, + module_name/1, primop_args/1, primop_name/1, + receive_action/1, receive_clauses/1, receive_timeout/1, + seq_arg/1, seq_body/1, string_lit/1, try_arg/1, + try_body/1, try_vars/1, try_evars/1, try_handler/1, + tuple_es/1, type/1, values_es/1, var_name/1]). + +-define(PAPER, 76). +-define(RIBBON, 45). +-define(NOUSER, undefined). +-define(NOHOOK, none). + +-type hook() :: 'none' | fun((cerl:cerl(), _, _) -> prettypr:document()). + +-record(ctxt, {line = 0 :: integer(), + body_indent = 4 :: non_neg_integer(), + sub_indent = 2 :: non_neg_integer(), + hook = ?NOHOOK :: hook(), + noann = false :: boolean(), + paper = ?PAPER :: integer(), + ribbon = ?RIBBON :: integer(), + user = ?NOUSER :: term()}). +-type context() :: #ctxt{}. + +%% ===================================================================== +%% The following functions examine and modify contexts: + +%% @spec (context()) -> integer() +%% @doc Returns the paper widh field of the prettyprinter context. +%% @see set_ctxt_paperwidth/2 + +-ifndef(NO_UNUSED). +get_ctxt_paperwidth(Ctxt) -> + Ctxt#ctxt.paper. +-endif. % NO_UNUSED +%% @clear + +%% @spec (context(), integer()) -> context() +%% +%% @doc Updates the paper widh field of the prettyprinter context. +%% +%%

Note: changing this value (and passing the resulting context to a +%% continuation function) does not affect the normal formatting, but may +%% affect user-defined behaviour in hook functions.

+%% +%% @see get_ctxt_paperwidth/1 + +-ifndef(NO_UNUSED). +set_ctxt_paperwidth(Ctxt, W) -> + Ctxt#ctxt{paper = W}. +-endif. % NO_UNUSED +%% @clear + +%% @spec (context()) -> integer() +%% @doc Returns the line widh field of the prettyprinter context. +%% @see set_ctxt_linewidth/2 + +-ifndef(NO_UNUSED). +get_ctxt_linewidth(Ctxt) -> + Ctxt#ctxt.ribbon. +-endif. % NO_UNUSED +%% @clear + +%% @spec (context(), integer()) -> context() +%% +%% @doc Updates the line widh field of the prettyprinter context. +%% +%%

Note: changing this value (and passing the resulting context to a +%% continuation function) does not affect the normal formatting, but may +%% affect user-defined behaviour in hook functions.

+%% +%% @see get_ctxt_linewidth/1 + +-ifndef(NO_UNUSED). +set_ctxt_linewidth(Ctxt, W) -> + Ctxt#ctxt{ribbon = W}. +-endif. % NO_UNUSED +%% @clear + +%% @spec (context()) -> hook() +%% @doc Returns the hook function field of the prettyprinter context. +%% @see set_ctxt_hook/2 + +-ifndef(NO_UNUSED). +get_ctxt_hook(Ctxt) -> + Ctxt#ctxt.hook. +-endif. % NO_UNUSED +%% @clear + +%% @spec (context(), hook()) -> context() +%% @doc Updates the hook function field of the prettyprinter context. +%% @see get_ctxt_hook/1 + +-ifndef(NO_UNUSED). +set_ctxt_hook(Ctxt, Hook) -> + Ctxt#ctxt{hook = Hook}. +-endif. % NO_UNUSED +%% @clear + +%% @spec (context()) -> term() +%% @doc Returns the user data field of the prettyprinter context. +%% @see set_ctxt_user/2 + +-ifndef(NO_UNUSED). +get_ctxt_user(Ctxt) -> + Ctxt#ctxt.user. +-endif. % NO_UNUSED +%% @clear + +%% @spec (context(), term()) -> context() +%% @doc Updates the user data field of the prettyprinter context. +%% @see get_ctxt_user/1 + +-ifndef(NO_UNUSED). +set_ctxt_user(Ctxt, X) -> + Ctxt#ctxt{user = X}. +-endif. % NO_UNUSED +%% @clear + + +%% ===================================================================== +%% @spec format(Tree::cerl()) -> string() +%% @equiv format(Tree, []) + +-spec format(cerl:cerl()) -> string(). + +format(Node) -> + format(Node, []). + + +%% ===================================================================== +%% @spec format(Tree::cerl(), Options::[term()]) -> string() +%% cerl() = cerl:cerl() +%% +%% @type hook() = (cerl(), context(), Continuation) -> document() +%% Continuation = (cerl(), context()) -> document(). +%% +%% A call-back function for user-controlled formatting. See format/2. +%% +%% @type context(). A representation of the current context of the +%% pretty-printer. Can be accessed in hook functions. +%% +%% @doc Prettyprint-formats a Core Erlang syntax tree as text. +%% +%%

Available options: +%%

+%%
{hook, none | hook()}
+%%
Unless the value is none, the given function +%% is called for every node; see below for details. The default +%% value is none.
+%% +%%
{noann, boolean()}
+%%
If the value is true, annotations on the code +%% are not printed. The default value is false.
+%% +%%
{paper, integer()}
+%%
Specifies the preferred maximum number of characters on any +%% line, including indentation. The default value is 76.
+%% +%%
{ribbon, integer()}
+%%
Specifies the preferred maximum number of characters on any +%% line, not counting indentation. The default value is 45.
+%% +%%
{user, term()}
+%%
User-specific data for use in hook functions. The default +%% value is undefined.
+%%

+%% +%%

A hook function (cf. the hook() type) is passed the current +%% syntax tree node, the context, and a continuation. The context can be +%% examined and manipulated by functions such as +%% get_ctxt_user/1 and set_ctxt_user/2. The +%% hook must return a "document" data structure (see +%% layout/2 and best/2); this may be +%% constructed in part or in whole by applying the continuation +%% function. For example, the following is a trivial hook: +%%

+%%     fun (Node, Ctxt, Cont) -> Cont(Node, Ctxt) end
+%% 
+%% which yields the same result as if no hook was given. +%% The following, however: +%%
+%%     fun (Node, Ctxt, Cont) ->
+%%         Doc = Cont(Node, Ctxt),
+%%         prettypr:beside(prettypr:text("<b>"),
+%%                         prettypr:beside(Doc,
+%%                                         prettypr:text("</b>")))
+%%     end
+%% 
+%% will place the text of any annotated node (regardless of the +%% annotation data) between HTML "boldface begin" and "boldface end" +%% tags. The function annotate/3 is exported for use in +%% hook functions.

+%% +%% @see cerl +%% @see format/1 +%% @see layout/2 +%% @see best/2 +%% @see annotate/3 +%% @see get_ctxt_user/1 +%% @see set_ctxt_user/2 + +-spec format(cerl:cerl(), [term()]) -> string(). + +format(Node, Options) -> + W = proplists:get_value(paper, Options, ?PAPER), + L = proplists:get_value(ribbon, Options, ?RIBBON), + prettypr:format(layout(Node, Options), W, L). + + +%% ===================================================================== +%% @spec best(Tree::cerl()) -> empty | document() +%% @equiv best(Node, []) + +-ifndef(NO_UNUSED). +best(Node) -> + best(Node, []). +-endif. % NO_UNUSED +%% @clear + + +%% ===================================================================== +%% @spec best(Tree::cerl(), Options::[term()]) -> +%% empty | document() +%% +%% @doc Creates a fixed "best" abstract layout for a Core Erlang syntax +%% tree. This is similar to the layout/2 function, except +%% that here, the final layout has been selected with respect to the +%% given options. The atom empty is returned if no such +%% layout could be produced. For information on the options, see the +%% format/2 function. +%% +%% @see best/1 +%% @see layout/2 +%% @see format/2 +%% @see prettypr:best/2 + +-ifndef(NO_UNUSED). +best(Node, Options) -> + W = proplists:get_value(paper, Options, ?PAPER), + L = proplists:get_value(ribbon, Options, ?RIBBON), + prettypr:best(layout(Node, Options), W, L). +-endif. % NO_UNUSED +%% @clear + + +%% ===================================================================== +%% @spec layout(Tree::cerl()) -> document() +%% @equiv layout(Tree, []) + +-ifndef(NO_UNUSED). +layout(Node) -> + layout(Node, []). +-endif. % NO_UNUSED +%% @clear + + +%% ===================================================================== +%% @spec annotate(document(), Terms::[term()], context()) -> document() +%% +%% @doc Adds an annotation containing Terms around the +%% given abstract document. This function is exported mainly for use in +%% hook functions; see format/2. +%% +%% @see format/2 + +-spec annotate(prettypr:document(), [term()], context()) -> prettypr:document(). + +annotate(Doc, As0, Ctxt) -> + case strip_line(As0) of + [] -> + Doc; + As -> + case Ctxt#ctxt.noann of + false -> + Es = seq(As, floating(text(",")), Ctxt, + fun lay_concrete/2), + follow(beside(floating(text("(")), Doc), + beside(text("-| ["), + beside(par(Es), floating(text("])")))), + Ctxt#ctxt.sub_indent); + true -> + Doc + end + end. + + +%% ===================================================================== +%% @spec layout(Tree::cerl(), Options::[term()]) -> document() +%% document() = prettypr:document() +%% +%% @doc Creates an abstract document layout for a syntax tree. The +%% result represents a set of possible layouts (cf. module +%% prettypr). For information on the options, see +%% format/2; note, however, that the paper and +%% ribbon options are ignored by this function. +%% +%%

This function provides a low-level interface to the pretty +%% printer, returning a flexible representation of possible layouts, +%% independent of the paper width eventually to be used for formatting. +%% This can be included as part of another document and/or further +%% processed directly by the functions in the prettypr +%% module, or used in a hook function (see format/2 for +%% details).

+%% +%% @see prettypr +%% @see format/2 +%% @see layout/1 + +-spec layout(cerl:cerl(), [term()]) -> prettypr:document(). + +layout(Node, Options) -> + lay(Node, + #ctxt{hook = proplists:get_value(hook, Options, ?NOHOOK), + noann = proplists:get_bool(noann, Options), + paper = proplists:get_value(paper, Options, ?PAPER), + ribbon = proplists:get_value(ribbon, Options, ?RIBBON), + user = proplists:get_value(user, Options)}). + +lay(Node, Ctxt) -> + case get_line(get_ann(Node)) of + none -> + lay_0(Node, Ctxt); + Line -> + if Line > Ctxt#ctxt.line -> + Ctxt1 = Ctxt#ctxt{line = Line}, + Txt = io_lib:format("% Line ~w",[Line]), +% beside(lay_0(Node, Ctxt1), floating(text(Txt))); + above(floating(text(Txt)), lay_0(Node, Ctxt1)); + true -> + lay_0(Node, Ctxt) + end + end. + +lay_0(Node, Ctxt) -> + case Ctxt#ctxt.hook of + ?NOHOOK -> + lay_ann(Node, Ctxt); + Hook -> + %% If there is a hook, we apply it. + Hook(Node, Ctxt, fun lay_ann/2) + end. + +%% This adds an annotation list (if nonempty) around a document, unless +%% the `noann' option is enabled. + +lay_ann(Node, Ctxt) -> + Doc = lay_1(Node, Ctxt), + As = get_ann(Node), + annotate(Doc, As, Ctxt). + +%% This part ignores annotations: + +lay_1(Node, Ctxt) -> + case type(Node) of + literal -> + lay_literal(Node, Ctxt); + var -> + lay_var(Node, Ctxt); + values -> + lay_values(Node, Ctxt); + cons -> + lay_cons(Node, Ctxt); + tuple -> + lay_tuple(Node, Ctxt); + 'let' -> + lay_let(Node, Ctxt); + seq -> + lay_seq(Node, Ctxt); + apply -> + lay_apply(Node, Ctxt); + call -> + lay_call(Node, Ctxt); + primop -> + lay_primop(Node, Ctxt); + 'case' -> + lay_case(Node, Ctxt); + clause -> + lay_clause(Node, Ctxt); + alias -> + lay_alias(Node, Ctxt); + 'fun' -> + lay_fun(Node, Ctxt); + 'receive' -> + lay_receive(Node, Ctxt); + 'try' -> + lay_try(Node, Ctxt); + 'catch' -> + lay_catch(Node, Ctxt); + letrec -> + lay_letrec(Node, Ctxt); + module -> + lay_module(Node, Ctxt); + binary -> + lay_binary(Node, Ctxt); + bitstr -> + lay_bitstr(Node, Ctxt) + end. + +lay_literal(Node, Ctxt) -> + case concrete(Node) of + V when is_atom(V) -> + text(atom_lit(Node)); + V when is_float(V) -> + text(tidy_float(float_lit(Node))); + V when is_integer(V) -> + %% Note that we do not even try to recognize values + %% that could represent printable characters - we + %% always print an integer. + text(int_lit(Node)); + V when is_binary(V) -> + lay_binary(c_binary([c_bitstr(abstract(B), + abstract(8), + abstract(1), + abstract(integer), + abstract([unsigned, big])) + || B <- binary_to_list(V)]), + Ctxt); + [] -> + text("[]"); + [_ | _] -> + %% `lay_cons' will check for strings. + lay_cons(Node, Ctxt); + V when is_tuple(V) -> + lay_tuple(Node, Ctxt) + end. + +lay_var(Node, Ctxt) -> + %% When formatting variable names, no two names should ever map to + %% the same string. We assume below that an atom representing a + %% variable name either has the character sequence of a proper + %% variable, or otherwise does not need single-quoting. + case var_name(Node) of + V when is_atom(V) -> + S = atom_to_list(V), + case S of + [C | _] when C >= $A, C =< $Z -> + %% Ordinary uppercase-prefixed names are printed + %% just as they are. + text(S); + [C | _] when C >= $\300, C =< $\336, C /= $\327 -> + %% These are also uppercase (ISO 8859-1). + text(S); + [$_| _] -> + %% If the name starts with '_' we keep the name as is. + text(S); + _ -> + %% Plain atom names are prefixed with a single "_". + %% E.g. 'foo' => "_foo". + text([$_ | S]) + end; + V when is_integer(V) -> + %% Integers are always simply prefixed with "_"; + %% e.g. 4711 => "_4711". + text([$_ | integer_to_list(V)]); + {N, A} when is_atom(N), is_integer(A) -> + %% Function names have no overlap problem. + beside(lay_noann(c_atom(atom_to_list(N)), Ctxt), + beside(text("/"), lay_noann(c_int(A), Ctxt))) + end. + +lay_values(Node, Ctxt) -> + lay_value_list(values_es(Node), Ctxt). + +lay_cons(Node, Ctxt) -> + case is_print_string(Node) of + true -> + lay_string(string_lit(Node), Ctxt); + false -> + beside(floating(text("[")), + beside(par(lay_list_elements(Node, Ctxt)), + floating(text("]")))) + end. + +lay_string(S, Ctxt) -> + %% S includes leading/trailing double-quote characters. The segment + %% width is 2/3 of the ribbon width - this seems to work well. + W = (Ctxt#ctxt.ribbon) * 2 div 3, + lay_string_1(S, length(S), W). + +lay_string_1(S, L, W) when L > W, W > 0 -> + %% Note that L is the minimum, not the exact, printed length. + case split_string(S, W - 1, L) of + {_, ""} -> + text(S); + {S1, S2} -> + above(text(S1 ++ "\""), + lay_string_1([$" | S2], L - W + 1, W)) + end; +lay_string_1(S, _L, _W) -> + text(S). + +split_string(Xs, N, L) -> + split_string_1(Xs, N, L, []). + +%% We only split strings at whitespace, if possible. We must make sure +%% we do not split an escape sequence. + +split_string_1([$\s | Xs], N, L, As) when N =< 0, L >= 5 -> + {lists:reverse([$\s | As]), Xs}; +split_string_1([$\t | Xs], N, L, As) when N =< 0, L >= 5 -> + {lists:reverse([$t, $\\ | As]), Xs}; +split_string_1([$\n | Xs], N, L, As) when N =< 0, L >= 5 -> + {lists:reverse([$n, $\\ | As]), Xs}; +split_string_1([$\\ | Xs], N, L, As) -> + split_string_2(Xs, N - 1, L - 1, [$\\ | As]); +split_string_1(Xs, N, L, As) when N =< -10, L >= 5 -> + {lists:reverse(As), Xs}; +split_string_1([X | Xs], N, L, As) -> + split_string_1(Xs, N - 1, L - 1, [X | As]); +split_string_1([], _N, _L, As) -> + {lists:reverse(As), ""}. + +split_string_2([$^, X | Xs], N, L, As) -> + split_string_1(Xs, N - 2, L - 2, [X, $^ | As]); +split_string_2([X1, X2, X3 | Xs], N, L, As) when + X1 >= $0, X1 =< $7, X2 >= $0, X2 =< $7, X3 >= $0, X3 =< $7 -> + split_string_1(Xs, N - 3, L - 3, [X3, X2, X1 | As]); +split_string_2([X1, X2 | Xs], N, L, As) when + X1 >= $0, X1 =< $7, X2 >= $0, X2 =< $7 -> + split_string_1(Xs, N - 2, L - 2, [X2, X1 | As]); +split_string_2([X | Xs], N, L, As) -> + split_string_1(Xs, N - 1, L - 1, [X | As]). + +lay_tuple(Node, Ctxt) -> + beside(floating(text("{")), + beside(par(seq(tuple_es(Node), floating(text(",")), + Ctxt, fun lay/2)), + floating(text("}")))). + +lay_let(Node, Ctxt) -> + V = lay_value_list(let_vars(Node), Ctxt), + D1 = par([follow(text("let"), + beside(V, floating(text(" ="))), + Ctxt#ctxt.sub_indent), + lay(let_arg(Node), Ctxt)], + Ctxt#ctxt.body_indent), + B = let_body(Node), + D2 = lay(B, Ctxt), + case is_c_let(B) of + true -> + sep([beside(D1, floating(text(" in"))), D2]); + false -> + sep([D1, beside(text("in "), D2)]) + end. + +lay_seq(Node, Ctxt) -> + D1 = beside(text("do "), lay(seq_arg(Node), Ctxt)), + B = seq_body(Node), + D2 = lay(B, Ctxt), + case is_c_seq(B) of + true -> + sep([D1, D2]); + false -> + sep([D1, nest(3, D2)]) + end. + +lay_apply(Node, Ctxt) -> + As = seq(apply_args(Node), floating(text(",")), Ctxt, + fun lay/2), + beside(follow(text("apply"), lay(apply_op(Node), Ctxt)), + beside(text("("), + beside(par(As), floating(text(")"))))). + +lay_call(Node, Ctxt) -> + As = seq(call_args(Node), floating(text(",")), Ctxt, + fun lay/2), + beside(follow(text("call"), + beside(beside(lay(call_module(Node), Ctxt), + floating(text(":"))), + lay(call_name(Node), Ctxt)), + Ctxt#ctxt.sub_indent), + beside(text("("), beside(par(As), + floating(text(")"))))). + +lay_primop(Node, Ctxt) -> + As = seq(primop_args(Node), floating(text(",")), Ctxt, + fun lay/2), + beside(follow(text("primop"), + lay(primop_name(Node), Ctxt), + Ctxt#ctxt.sub_indent), + beside(text("("), beside(par(As), + floating(text(")"))))). + +lay_case(Node, Ctxt) -> + Cs = seq(case_clauses(Node), none, Ctxt, fun lay/2), + sep([par([follow(text("case"), + lay(case_arg(Node), Ctxt), + Ctxt#ctxt.sub_indent), + text("of")], + Ctxt#ctxt.sub_indent), + nest(Ctxt#ctxt.sub_indent, + vertical(Cs)), + text("end")]). + +lay_clause(Node, Ctxt) -> + P = lay_value_list(clause_pats(Node), Ctxt), + G = lay(clause_guard(Node), Ctxt), + H = par([P, follow(follow(text("when"), G, + Ctxt#ctxt.sub_indent), + floating(text("->")))], + Ctxt#ctxt.sub_indent), + par([H, lay(clause_body(Node), Ctxt)], + Ctxt#ctxt.body_indent). + +lay_alias(Node, Ctxt) -> + follow(beside(lay(alias_var(Node), Ctxt), + text(" =")), + lay(alias_pat(Node), Ctxt), + Ctxt#ctxt.body_indent). + +lay_fun(Node, Ctxt) -> + Vs = seq(fun_vars(Node), floating(text(",")), + Ctxt, fun lay/2), + par([follow(text("fun"), + beside(text("("), + beside(par(Vs), + floating(text(") ->")))), + Ctxt#ctxt.sub_indent), + lay(fun_body(Node), Ctxt)], + Ctxt#ctxt.body_indent). + +lay_receive(Node, Ctxt) -> + Cs = seq(receive_clauses(Node), none, Ctxt, fun lay/2), + sep([text("receive"), + nest(Ctxt#ctxt.sub_indent, vertical(Cs)), + sep([follow(text("after"), + beside(lay(receive_timeout(Node), Ctxt), + floating(text(" ->"))), + Ctxt#ctxt.sub_indent), + nest(Ctxt#ctxt.sub_indent, + lay(receive_action(Node), Ctxt))])]). + +lay_try(Node, Ctxt) -> + Vs = lay_value_list(try_vars(Node), Ctxt), + Evs = lay_value_list(try_evars(Node), Ctxt), + sep([follow(text("try"), + lay(try_arg(Node), Ctxt), + Ctxt#ctxt.body_indent), + follow(beside(beside(text("of "), Vs), + floating(text(" ->"))), + lay(try_body(Node), Ctxt), + Ctxt#ctxt.body_indent), + follow(beside(beside(text("catch "), Evs), + floating(text(" ->"))), + lay(try_handler(Node), Ctxt), + Ctxt#ctxt.body_indent)]). + +lay_catch(Node, Ctxt) -> + follow(text("catch"), + lay(catch_body(Node), Ctxt), + Ctxt#ctxt.sub_indent). + +lay_letrec(Node, Ctxt) -> + Es = seq(letrec_defs(Node), none, Ctxt, fun lay_fdef/2), + sep([text("letrec"), + nest(Ctxt#ctxt.sub_indent, vertical(Es)), + beside(text("in "), lay(letrec_body(Node), Ctxt))]). + +lay_module(Node, Ctxt) -> + %% Note that the module name, exports and attributes may not + %% be annotated in the printed format. + Xs = seq(module_exports(Node), floating(text(",")), Ctxt, + fun lay_noann/2), + As = seq(module_attrs(Node), floating(text(",")), Ctxt, + fun lay_attrdef/2), + Es = seq(module_defs(Node), none, Ctxt, fun lay_fdef/2), + sep([follow(text("module"), + follow(lay_noann(module_name(Node), Ctxt), + beside(beside(text("["), par(Xs)), + floating(text("]")))), + Ctxt#ctxt.sub_indent), + nest(Ctxt#ctxt.sub_indent, + follow(text("attributes"), + beside(beside(text("["), par(As)), + floating(text("]"))), + Ctxt#ctxt.sub_indent)), + nest(Ctxt#ctxt.sub_indent, vertical(Es)), + text("end")]). + +lay_binary(Node, Ctxt) -> + beside(floating(text("#{")), + beside(sep(seq(binary_segments(Node), floating(text(",")), + Ctxt, fun lay_bitstr/2)), + floating(text("}#")))). + +lay_bitstr(Node, Ctxt) -> + Head = beside(floating(text("#<")), + beside(lay(bitstr_val(Node), Ctxt), + floating(text(">")))), + As = [bitstr_size(Node), + bitstr_unit(Node), + bitstr_type(Node), + bitstr_flags(Node)], + beside(Head, beside(floating(text("(")), + beside(sep(seq(As, floating(text(",")), + Ctxt, fun lay/2)), + floating(text(")"))))). + +%% In all places where "<...>"-sequences can occur, it is OK to +%% write 1-element sequences without the "<" and ">". + +lay_value_list([E], Ctxt) -> + lay(E, Ctxt); +lay_value_list(Es, Ctxt) -> + beside(floating(text("<")), + beside(par(seq(Es, floating(text(",")), + Ctxt, fun lay/2)), + floating(text(">")))). + +lay_noann(Node, Ctxt) -> + lay(Node, Ctxt#ctxt{noann = true}). + +lay_concrete(T, Ctxt) -> + lay(abstract(T), Ctxt). + +lay_attrdef({K, V}, Ctxt) -> + follow(beside(lay_noann(K, Ctxt), floating(text(" ="))), + lay_noann(V, Ctxt), + Ctxt#ctxt.body_indent). + +lay_fdef({N, F}, Ctxt) -> + par([beside(lay(N, Ctxt), floating(text(" ="))), + lay(F, Ctxt)], + Ctxt#ctxt.body_indent). + +lay_list_elements(Node, Ctxt) -> + T = cons_tl(Node), + A = case Ctxt#ctxt.noann of + false -> + get_ann(T); + true -> + [] + end, + H = lay(cons_hd(Node), Ctxt), + case is_c_cons(T) of + true when A =:= [] -> + [beside(H, floating(text(","))) + | lay_list_elements(T, Ctxt)]; + _ -> + case is_c_nil(T) of + true when A =:= [] -> + [H]; + _ -> + [H, beside(floating(text("| ")), + lay(T, Ctxt))] + end + end. + +seq([H | T], Separator, Ctxt, Fun) -> + case T of + [] -> + [Fun(H, Ctxt)]; + _ -> + [maybe_append(Separator, Fun(H, Ctxt)) + | seq(T, Separator, Ctxt, Fun)] + end; +seq([], _, _, _) -> + [empty()]. + +maybe_append(none, D) -> + D; +maybe_append(Suffix, D) -> + beside(D, Suffix). + +vertical([D]) -> + D; +vertical([D | Ds]) -> + above(D, vertical(Ds)); +vertical([]) -> + []. + +% horizontal([D]) -> +% D; +% horizontal([D | Ds]) -> +% beside(D, horizontal(Ds)); +% horizontal([]) -> +% []. + +tidy_float([$., C | Cs]) -> + [$., C | tidy_float_1(Cs)]; % preserve first decimal digit +tidy_float([$e | _] = Cs) -> + tidy_float_2(Cs); +tidy_float([C | Cs]) -> + [C | tidy_float(Cs)]; +tidy_float([]) -> + []. + +tidy_float_1([$0, $0, $0 | Cs]) -> + tidy_float_2(Cs); % cut mantissa at three consecutive zeros. +tidy_float_1([$e | _] = Cs) -> + tidy_float_2(Cs); +tidy_float_1([C | Cs]) -> + [C | tidy_float_1(Cs)]; +tidy_float_1([]) -> + []. + +tidy_float_2([$e, $+, $0]) -> []; +tidy_float_2([$e, $+, $0 | Cs]) -> tidy_float_2([$e, $+ | Cs]); +tidy_float_2([$e, $+ | _] = Cs) -> Cs; +tidy_float_2([$e, $-, $0]) -> []; +tidy_float_2([$e, $-, $0 | Cs]) -> tidy_float_2([$e, $- | Cs]); +tidy_float_2([$e, $- | _] = Cs) -> Cs; +tidy_float_2([$e | Cs]) -> tidy_float_2([$e, $+ | Cs]); +tidy_float_2([_ | Cs]) -> tidy_float_2(Cs); +tidy_float_2([]) -> []. + +get_line([L | _As]) when is_integer(L) -> + L; +get_line([_ | As]) -> + get_line(As); +get_line([]) -> + none. + +strip_line([A | As]) when is_integer(A) -> + strip_line(As); +strip_line([A | As]) -> + [A | strip_line(As)]; +strip_line([]) -> + []. + +%% ===================================================================== diff --git a/lib/hipe/cerl/cerl_to_icode.erl b/lib/hipe/cerl/cerl_to_icode.erl new file mode 100644 index 0000000000..362c427cbe --- /dev/null +++ b/lib/hipe/cerl/cerl_to_icode.erl @@ -0,0 +1,2717 @@ +%% -*- erlang-indent-level: 4 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2003-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%% @author Richard Carlsson +%% @copyright 2000-2006 Richard Carlsson +%% @doc Translation from Core Erlang to HiPE Icode. + +%% TODO: annotate Icode leaf functions as such. +%% TODO: add a pass to remove unnecessary reduction tests +%% TODO: generate branch prediction info? + +-module(cerl_to_icode). + +-define(NO_UNUSED, true). + +-export([module/2]). +-ifndef(NO_UNUSED). +-export([function/3, function/4, module/1]). +-endif. + +%% Added in an attempt to suppress message by Dialyzer, but I run into +%% an internal compiler error in the old inliner and commented it out. +%% The inlining is performed manually instead :-( - Kostis +%% -compile({inline, [{error_fun_value,1}]}). + +%% --------------------------------------------------------------------- +%% Macros and records + +%% Icode primitive operation names + +-include("../icode/hipe_icode_primops.hrl"). + +-define(OP_REDTEST, redtest). +-define(OP_CONS, cons). +-define(OP_TUPLE, mktuple). +-define(OP_ELEMENT, {erlang,element,2}). %% This has an MFA name +-define(OP_UNSAFE_HD, unsafe_hd). +-define(OP_UNSAFE_TL, unsafe_tl). +-define(OP_UNSAFE_ELEMENT(N), #unsafe_element{index=N}). +-define(OP_UNSAFE_SETELEMENT(N), #unsafe_update_element{index=N}). +-define(OP_CHECK_GET_MESSAGE, check_get_msg). +-define(OP_NEXT_MESSAGE, next_msg). +-define(OP_SELECT_MESSAGE, select_msg). +-define(OP_SET_TIMEOUT, set_timeout). +-define(OP_CLEAR_TIMEOUT, clear_timeout). +-define(OP_WAIT_FOR_MESSAGE, suspend_msg). +-define(OP_APPLY_FIXARITY(N), #apply_N{arity=N}). +-define(OP_MAKE_FUN(M, F, A, U, I), #mkfun{mfa={M,F,A}, magic_num=U, index=I}). +-define(OP_FUN_ELEMENT(N), #closure_element{n=N}). +-define(OP_BS_CONTEXT_TO_BINARY, {hipe_bs_primop,bs_context_to_binary}). + +%% Icode conditional tests + +-define(TEST_EQ, '=='). +-define(TEST_NE, '/='). +-define(TEST_EXACT_EQ, '=:='). +-define(TEST_EXACT_NE, '=/='). +-define(TEST_LT, '<'). +-define(TEST_GT, '>'). +-define(TEST_LE, '=<'). +-define(TEST_GE, '>='). +-define(TEST_WAIT_FOR_MESSAGE_OR_TIMEOUT, suspend_msg_timeout). + +%% Icode type tests + +-define(TYPE_ATOM(X), {atom, X}). +-define(TYPE_INTEGER(X), {integer, X}). +-define(TYPE_FIXNUM(X), {integer, X}). % for now +-define(TYPE_CONS, cons). +-define(TYPE_NIL, nil). +-define(TYPE_IS_N_TUPLE(N), {tuple, N}). +-define(TYPE_IS_ATOM, atom). +-define(TYPE_IS_BIGNUM, bignum). +-define(TYPE_IS_BINARY, binary). +-define(TYPE_IS_CONSTANT, constant). +-define(TYPE_IS_FIXNUM, fixnum). +-define(TYPE_IS_FLOAT, float). +-define(TYPE_IS_FUNCTION, function). +-define(TYPE_IS_INTEGER, integer). +-define(TYPE_IS_LIST, list). +-define(TYPE_IS_NUMBER, number). +-define(TYPE_IS_PID, pid). +-define(TYPE_IS_PORT, port). +-define(TYPE_IS_RECORD(Atom_, Size_), {record, Atom_, Size_}). +-define(TYPE_IS_REFERENCE, reference). +-define(TYPE_IS_TUPLE, tuple). + +%% Record definitions + +-record(ctxt, {final = false :: boolean(), + effect = false, + fail = [], % [] or fail-to label + class = expr, % expr | guard + line = 0, % current line number + 'receive' % undefined | #receive{} + }). + +-record('receive', {loop}). +-record(cerl_to_icode__var, {name}). +-record('fun', {label, vars}). + + +%% --------------------------------------------------------------------- +%% Code + + +%% @spec module(Module::cerl()) -> [icode()] +%% @equiv module(Module, []) + +-ifndef(NO_UNUSED). +module(E) -> + module(E, []). +-endif. +%% @clear + + +%% @spec module(Module::cerl(), Options::[term()]) -> [icode()] +%% +%% cerl() = cerl:cerl() +%% icode() = hipe_icode:icode() +%% +%% @doc Transforms a Core Erlang module to linear HiPE Icode. The result +%% is a list of Icode function definitions. Currently, no options are +%% available. +%% +%%

This function first calls the {@link cerl_hipeify:transform/2} +%% function on the module.

+%% +%%

Note: Except for the module name, which is included in the header +%% of each Icode function definition, the remaining information (exports +%% and attributes) associated with the module definition is not included +%% in the resulting Icode.

+%% +%% @see function/4 +%% @see cerl_hipeify:transform/1 + +%% -spec module(cerl:c_module(), [term()]) -> [{mfa(), hipe_icode:icode()}]. + +module(E, Options) -> + module_1(cerl_hipeify:transform(E, Options), Options). + +module_1(E, Options) -> + M = cerl:atom_val(cerl:module_name(E)), + if is_atom(M) -> + ok; + true -> + error_msg("bad module name: ~P.", [M, 5]), + throw(error) + end, + S0 = init(M), + S1 = s__set_pmatch(proplists:get_value(pmatch, Options), S0), + S2 = s__set_bitlevel_binaries(proplists:get_value( + bitlevel_binaries, Options), S1), + {Icode, _} = lists:mapfoldl(fun function_definition/2, + S2, cerl:module_defs(E)), + Icode. + +%% For now, we simply assume that all function bodies should have degree +%% one (i.e., return exactly one value). We clear the code ackumulator +%% before we start compiling each function. + +function_definition({V, F}, S) -> + S1 = s__set_code([], S), + {Icode, S2} = function_1(cerl:var_name(V), F, 1, S1), + {{icode_icode_name(Icode), Icode}, S2}. + +init(Module) -> + reset_label_counter(), + s__new(Module). + +%% @spec function(Module::atom(), Name::atom(), Function::cerl()) -> +%% icode() +%% @equiv function(Module, Name, Fun, 1) + +-ifndef(NO_UNUSED). +function(Module, Name, Fun) -> + function(Module, Name, Fun, 1). +-endif. % NO_UNUSED +%% @clear + +%% @spec function(Module::atom(), Name::{atom(), integer()}, +%% Fun::cerl(), Degree::integer()) -> icode() +%% +%% @doc Transforms a Core Erlang function to a HiPE Icode function +%% definition. `Fun' must represent a fun-expression, which may not +%% contain free variables. `Module' and `Name' specify the module and +%% function name of the resulting Icode function. Note that the arity +%% part of `Name' is not necessarily equivalent to the number of +%% parameters of `Fun' (this can happen e.g., for lifted closure +%% functions). +%% +%%

`Degree' specifies the number of values the function is expected +%% to return; this is typically 1 (one); cf. {@link function/3}.

+%% +%%

Notes: +%%

    +%%
  • This function assumes that the code has been transformed into a +%% very simple and explicit form, using the {@link cerl_hipeify} +%% module.
  • +%% +%%
  • Several primops (see "`cerl_hipe_primops.hrl'") are +%% detected by the translation and handled specially.
  • +%% +%%
  • Tail call optimization is handled, even when the call is +%% "hidden" by let-definitions.
  • +%% +%%
  • It is assumed that all `primop' calls in the code represent +%% Icode primops or macro instructions, and that all inter-module +%% calls (both calls to statically named functions, and dynamic +%% meta-calls) represent actual inter-module calls - not +%% primitive or built-in operations.
  • +%% +%%
  • The following special form: +%% ```case Test of +%% 'true' when 'true' -> True +%% 'false' when 'true' -> False +%% end''' +%% is recognized as an if-then-else switch where `Test' is known +%% to always yield 'true' or 'false'. Efficient jumping code is +%% generated for such expressions, in particular if nested. Note that +%% there must be exactly two clauses; order is not important.
  • +%% +%%
  • Compilation of clauses is simplistic. No pattern matching +%% compilation or similar optimizations is done at this stage. Guards +%% that are `true' or `false' are recognized as trivially true/false; +%% for all other guards, code will be generated. Catch-all clauses +%% (with `true' guard and variable-only patterns) are detected, and +%% any following clauses are discarded.
  • +%%

+%% +%%

Important: This function does not handle occurrences of +%% fun-expressions in the body of `Fun', nor `apply'-expressions whose +%% operators are not locally bound function variables. These must be +%% transformed away before this function is called, by closure +%% conversion ({@link cerl_cconv}) using the `make_fun' and `call_fun' +%% primitive operations to create and apply functional values.

+%% +%%

`receive'-expressions are expected to have a particular +%% form: +%%

    +%%
  • There must be exactly one clause, with the atom +%% `true' as guard, and only a single variable as pattern. +%% The variable will be bound to a message in the mailbox, and can be +%% referred to in the clause body.
  • +%% +%%
  • In the body of that clause, all paths must execute one of the +%% primitive operations `receive_select/0' or +%% `receive_next/0' before another +%% `receive'-statement might be executed. +%% `receive_select/0' always returns, but without a value, +%% while `receive_next/0' never returns, either causing +%% the nearest surrounding receive-expression to be re-tried with the +%% next message in the input queue, or timing out.
  • +%%

+%% +%% @see function/3 + +-include("cerl_hipe_primops.hrl"). + +%% Main translation function: + +-ifndef(NO_UNUSED). +function(Module, Name, Fun, Degree) -> + S = init(Module), + {Icode, _} = function_1(Name, Fun, Degree, S), + Icode. +-endif. % NO_UNUSED +%% @clear + +function_1(Name, Fun, Degree, S) -> + reset_var_counter(), + LowV = max_var(), + LowL = max_label(), + %% Create input variables for the function parameters, and a list of + %% target variables for the result of the function. + Args = cerl:fun_vars(Fun), + IcodeArity = length(Args), + Vs = make_vars(IcodeArity), + Vs1 = make_vars(IcodeArity), % input variable temporaries + Ts = make_vars(Degree), + + %% Initialise environment and context. + Env = bind_vars(Args, Vs, env__new()), + %% TODO: if the function returns no values, we can use effect mode + Ctxt = #ctxt{final = true, effect = false}, + %% Each basic block must begin with a label. Note that we + %% immediately transfer the input parameters to local variables, for + %% our self-recursive calling convention. + Start = new_label(), + Local = new_label(), + S1 = add_code([icode_label(Start)] + ++ make_moves(Vs, Vs1) + ++ [icode_label(Local)], + s__set_function(Name, S)), + S2 = expr(cerl:fun_body(Fun), Ts, Ctxt, Env, + s__set_local_entry({Local, Vs}, S1)), + + %% This creates an Icode function definition. The ranges of used + %% variables and labels below should be nonempty. Note that the + %% input variables for the Icode function are `Vs1', which will be + %% transferred to `Vs' (see above). + HighV = new_var(), % assure nonempty range + HighL = max_label(), + Closure = lists:member(closure, cerl:get_ann(Fun)), + Module = s__get_module(S2), + Code = s__get_code(S2), + Function = icode_icode(Module, Name, Vs1, Closure, Code, + {LowV, HighV}, {LowL, HighL}), + if Closure -> + {_, OrigArity} = + lists:keyfind(closure_orig_arity, 1, cerl:get_ann(Fun)), + {hipe_icode:icode_closure_arity_update(Function, + OrigArity), + S2}; + true -> {Function, S2} + end. + +%% --------------------------------------------------------------------- +%% Main expression handler + +expr(E, Ts, Ctxt, Env, S0) -> + %% Insert source code position information + case get_line(cerl:get_ann(E)) of + none -> + expr_1(E, Ts, Ctxt, Env, S0); + Line when Line > Ctxt#ctxt.line -> + Txt = "Line: " ++ integer_to_list(Line), + S1 = add_code([icode_comment(Txt)], S0), + expr_1(E, Ts, Ctxt#ctxt{line = Line}, Env, S1); + _ -> + expr_1(E, Ts, Ctxt, Env, S0) + end. + +expr_1(E, Ts, Ctxt, Env, S) -> + case cerl:type(E) of + var -> + expr_var(E, Ts, Ctxt, Env, S); + literal -> + expr_literal(E, Ts, Ctxt, S); + values -> + expr_values(E, Ts, Ctxt, Env, S); + tuple -> + %% (The unit tuple `{}' is a literal, handled above.) + expr_tuple(E, Ts, Ctxt, Env, S); + cons -> + expr_cons(E, Ts, Ctxt, Env, S); + 'let' -> + expr_let(E, Ts, Ctxt, Env, S); + seq -> + expr_seq(E, Ts, Ctxt, Env, S); + apply -> + expr_apply(E, Ts, Ctxt, Env, S); + call -> + expr_call(E, Ts, Ctxt, Env, S); + primop -> + expr_primop(E, Ts, Ctxt, Env, S); + 'case' -> + expr_case(E, Ts, Ctxt, Env, S); + 'receive' -> + expr_receive(E, Ts, Ctxt, Env, S); + 'try' -> + expr_try(E, Ts, Ctxt, Env, S); + binary -> + expr_binary(E, Ts, Ctxt, Env, S); + letrec -> + expr_letrec(E, Ts, Ctxt, Env, S); + 'fun' -> + error_msg("cannot handle fun-valued expressions; " + "must be closure converted."), + throw(error) + end. + +%% This is for when we need new target variables for all of the +%% expressions in the list, and evaluate them for value in a +%% non-tail-call context. + +expr_list(Es, Ctxt, Env, S) -> + Ctxt1 = Ctxt#ctxt{effect = false, final = false}, + lists:mapfoldl(fun (E0, S0) -> + V = make_var(), + {V, expr(E0, [V], Ctxt1, Env, S0)} + end, + S, Es). + +%% This is for when we already have the target variables. It is expected +%% that each expression in the list has degree one, so the result can be +%% assigned to the corresponding variable. + +exprs([E | Es], [V | Vs], Ctxt, Env, S) -> + S1 = expr(E, [V], Ctxt, Env, S), + exprs(Es, Vs, Ctxt, Env, S1); +exprs([], [], _Ctxt, _Env, S) -> + S; +exprs([], _, _Ctxt, _Env, S) -> + warning_low_degree(), + S; +exprs(_, [], _Ctxt, _Env, _S) -> + error_high_degree(), + throw(error). + +get_line([L | _As]) when is_integer(L) -> + L; +get_line([_ | As]) -> + get_line(As); +get_line([]) -> + none. + + +%% --------------------------------------------------------------------- +%% Variables + +expr_var(_E, _Ts, #ctxt{effect = true}, _Env, S) -> + S; +expr_var(E, Ts, Ctxt, Env, S) -> + Name = cerl:var_name(E), + case env__lookup(Name, Env) of + error -> + %% Either an undefined variable or an attempt to use a local + %% function name as a value. + case Name of + {N,A} when is_atom(N), is_integer(A) -> + %% error_fun_value(Name); + error_msg("cannot handle fun-values outside call context; " + "must be closure converted: ~P.", + [Name, 5]), + throw(error); + _ -> + error_msg("undefined variable: ~P.", [Name, 5]), + throw(error) + end; + {ok, #cerl_to_icode__var{name = V}} -> + case Ctxt#ctxt.final of + false -> + glue([V], Ts, S); + true -> + add_return([V], S) + end; + {ok, #'fun'{}} -> + %% A letrec-defined function name, used as a value. + %% error_fun_value(Name) + error_msg("cannot handle fun-values outside call context; " + "must be closure converted: ~P.", + [Name, 5]), + throw(error) + end. + +%% The function has been inlined manually above to suppress message by Dialyzer +%% error_fun_value(Name) -> +%% error_msg("cannot handle fun-values outside call context; " +%% "must be closure converted: ~P.", +%% [Name, 5]), +%% throw(error). + +%% --------------------------------------------------------------------- +%% This handles all constants, both atomic and compound: + +expr_literal(_E, _Ts, #ctxt{effect = true}, S) -> + S; +expr_literal(E, [V] = Ts, Ctxt, S) -> + Code = [icode_move(V, icode_const(cerl:concrete(E)))], + maybe_return(Ts, Ctxt, add_code(Code, S)); +expr_literal(E, Ts, _Ctxt, _S) -> + error_degree_mismatch(length(Ts), E), + throw(error). + +%% --------------------------------------------------------------------- +%% Multiple value aggregate + +expr_values(E, Ts, #ctxt{effect = true} = Ctxt, Env, S) -> + {_, S1} = exprs(cerl:values_es(E), Ts, Ctxt#ctxt{final = false}, + Env, S), + S1; +expr_values(E, Ts, Ctxt, Env, S) -> + S1 = exprs(cerl:values_es(E), Ts, Ctxt#ctxt{final = false}, Env, S), + maybe_return(Ts, Ctxt, S1). + +%% --------------------------------------------------------------------- +%% Nonconstant tuples + +expr_tuple(E, _Ts, #ctxt{effect = true} = Ctxt, Env, S) -> + {_Vs, S1} = expr_list(cerl:tuple_es(E), Ctxt, Env, S), + S1; +expr_tuple(E, [_V] = Ts, Ctxt, Env, S) -> + {Vs, S1} = expr_list(cerl:tuple_es(E), Ctxt, Env, S), + add_code(make_op(?OP_TUPLE, Ts, Vs, Ctxt), S1); +expr_tuple(E, Ts, _Ctxt, _Env, _S) -> + error_degree_mismatch(length(Ts), E), + throw(error). + +%% --------------------------------------------------------------------- +%% Nonconstant cons cells + +expr_cons(E, _Ts, #ctxt{effect = true} = Ctxt, Env, S) -> + {_Vs, S1} = expr_list([cerl:cons_hd(E), cerl:cons_tl(E)], Ctxt, Env, S), + S1; +expr_cons(E, [_V] = Ts, Ctxt, Env, S) -> + {Vs, S1} = expr_list([cerl:cons_hd(E), cerl:cons_tl(E)], Ctxt, Env, S), + add_code(make_op(?OP_CONS, Ts, Vs, Ctxt), S1); +expr_cons(E, Ts, _Ctxt, _Env, _S) -> + error_degree_mismatch(length(Ts), E), + throw(error). + +%% --------------------------------------------------------------------- +%% Let-expressions + +%% We want to make sure we are not easily tricked by expressions hidden +%% in contexts like "let X = Expr in X"; this should not destroy tail +%% call properties. + +expr_let(E, Ts, Ctxt, Env, S) -> + F = fun (BF, CtxtF, EnvF, SF) -> expr(BF, Ts, CtxtF, EnvF, SF) end, + expr_let_1(E, F, Ctxt, Env, S). + +expr_let_1(E, F, Ctxt, Env, S) -> + E1 = cerl_lib:reduce_expr(E), + case cerl:is_c_let(E1) of + true -> + expr_let_2(E1, F, Ctxt, Env, S); + false -> + %% Redispatch the new expression. + F(E1, Ctxt, Env, S) + end. + +expr_let_2(E, F, Ctxt, Env, S) -> + Vars = cerl:let_vars(E), + Vs = make_vars(length(Vars)), + S1 = expr(cerl:let_arg(E), Vs, + Ctxt#ctxt{effect = false, final = false}, Env, S), + Env1 = bind_vars(Vars, Vs, Env), + F(cerl:let_body(E), Ctxt, Env1, S1). + +%% --------------------------------------------------------------------- +%% Sequencing + +%% To compile a sequencing operator, we generate code for effect only +%% for the first expression (the "argument") and then use the +%% surrounding context for the second expression (the "body"). Note that +%% we always create a new dummy target variable; this is necessary for +%% many ICode operations, even if the result is not used. + +expr_seq(E, Ts, Ctxt, Env, S) -> + F = fun (BF, CtxtF, EnvF, SF) -> expr(BF, Ts, CtxtF, EnvF, SF) end, + expr_seq_1(E, F, Ctxt, Env, S). + +expr_seq_1(E, F, Ctxt, Env, S) -> + Ctxt1 = Ctxt#ctxt{effect = true, final = false}, + S1 = expr(cerl:seq_arg(E), [make_var()], Ctxt1, Env, S), + F(cerl:seq_body(E), Ctxt, Env, S1). + +%% --------------------------------------------------------------------- +%% Binaries + +-record(sz_var, {code, sz}). +-record(sz_const, {code, sz}). + +expr_binary(E, [V]=Ts, Ctxt, Env, S) -> + Offset = make_reg(), + Base = make_reg(), + Segs = cerl:binary_segments(E), + S1 = case do_size_code(Segs, S, Env, Ctxt) of + #sz_const{code = S0, sz = Size} -> + Primop = {hipe_bs_primop, {bs_init, Size, 0}}, + add_code([icode_call_primop([V, Base, Offset], Primop, [])], + S0); + #sz_var{code = S0, sz = SizeVar} -> + Primop = {hipe_bs_primop, {bs_init, 0}}, + add_code([icode_call_primop([V, Base, Offset], + Primop, [SizeVar])], + S0) + end, + Vars = make_vars(length(Segs)), + S2 = binary_segments(Segs, Vars, Ctxt, Env, S1, false, Base, Offset), + S3 = case s__get_bitlevel_binaries(S2) of + true -> + POp = {hipe_bs_primop, bs_final}, + add_code([icode_call_primop([V], POp, [V, Offset])], S2); + false -> + S2 + end, + maybe_return(Ts, Ctxt, S3). + +do_size_code(Segs, S, Env, Ctxt) -> + case do_size_code(Segs, S, Env, cerl:c_int(0), [], []) of + {[], [], Const, S1} -> + #sz_const{code = S1, sz = ((cerl:concrete(Const) + 7) div 8)}; + {Pairs, Bins, Const, S1} -> + V1 = make_var(), + S2 = add_code([icode_move(V1, icode_const(cerl:int_val(Const)))], S1), + {S3, SizeVar} = create_size_code(Pairs, Bins, Ctxt, V1, S2), + #sz_var{code = S3, sz = SizeVar} + end. + +do_size_code([Seg|Rest], S, Env, Const, Pairs, Bins) -> + Size = cerl:bitstr_size(Seg), + Unit = cerl:bitstr_unit(Seg), + Val = cerl:bitstr_val(Seg), + case calculate_size(Unit, Size, false, Env, S) of + {all,_, _, S} -> + Binary = make_var(), + S1 = expr(Val, [Binary], #ctxt{final=false}, Env, S), + do_size_code(Rest, S1, Env, Const, Pairs, [{all,Binary}|Bins]); + {NewVal, [], S, _} -> + do_size_code(Rest, S, Env, add_val(NewVal, Const), Pairs, Bins); + {UnitVal, [Var], S1, _} -> + do_size_code(Rest, S1, Env, Const, [{UnitVal,Var}|Pairs], Bins) + end; +do_size_code([], S, _Env, Const, Pairs, Bins) -> + {Pairs, Bins, Const, S}. + +add_val(NewVal, Const) -> + cerl:c_int(NewVal + cerl:concrete(Const)). + +create_size_code([{UnitVal, Var}|Rest], Bins, Ctxt, Old, S0) -> + Dst = make_var(), + S = make_bs_add(UnitVal, Old, Var, Dst, Ctxt, S0), + create_size_code(Rest, Bins, Ctxt, Dst, S); +create_size_code([], Bins, Ctxt, Old, S0) -> + Dst = make_var(), + S = make_bs_bits_to_bytes(Old, Dst, Ctxt, S0), + create_size_code(Bins, Ctxt, Dst, S). + +create_size_code([{all,Bin}|Rest], Ctxt, Old, S0) -> + Dst = make_var(), + S = make_binary_size(Old, Bin, Dst, Ctxt, S0), + create_size_code(Rest, Ctxt, Dst, S); +create_size_code([], _Ctxt, Dst, S) -> + {S, Dst}. + +make_bs_add(Unit, Old, Var, Dst, #ctxt{fail=FL, class=guard}, S0) -> + SL1 = new_label(), + SL2 = new_label(), + SL3 = new_label(), + Temp = make_var(), + add_code([icode_if('>=', [Var, icode_const(0)], SL1, FL), + icode_label(SL1), + icode_guardop([Temp], '*', [Var, icode_const(Unit)], SL2, FL), + icode_label(SL2), + icode_guardop([Dst], '+', [Temp, Old], SL3, FL), + icode_label(SL3)], S0); +make_bs_add(Unit, Old, Var, Dst, _Ctxt, S0) -> + SL = new_label(), + FL = new_label(), + Temp = make_var(), + add_code([icode_if('>=', [Var, icode_const(0)], SL, FL), + icode_label(FL), + icode_fail([icode_const(badarg)], error), + icode_label(SL), + icode_call_primop([Temp], '*', [Var, icode_const(Unit)]), + icode_call_primop([Dst], '+', [Temp, Old])], S0). + +make_bs_bits_to_bytes(Old, Dst, #ctxt{fail=FL, class=guard}, S0) -> + SL = new_label(), + add_code([icode_guardop([Dst], 'bsl', [Old, icode_const(3)], SL, FL), + icode_label(SL)], S0); +make_bs_bits_to_bytes(Old, Dst, _Ctxt, S0) -> + add_code([icode_call_primop([Dst], 'bsl', [Old, icode_const(3)])], S0). + +make_binary_size(Old, Bin, Dst, #ctxt{fail=FL, class=guard}, S0) -> + SL1 = new_label(), + SL2 = new_label(), + add_code([icode_guardop([Dst], {erlang, byte_size, 1}, [Bin], SL1, FL), + icode_label(SL1), + icode_guardop([Dst], '+', [Old, Dst], SL2, FL), + icode_label(SL2)], S0); +make_binary_size(Old, Bin, Dst, _Ctxt, S0) -> + add_code([icode_call_primop([Dst], {erlang, byte_size, 1}, [Bin]), + icode_call_primop([Dst], '+', [Old, Dst])], S0). + +binary_segments(SegList, TList, Ctxt=#ctxt{}, Env, S, Align, Base, + Offset) -> + case do_const_segs(SegList, TList, S, Align, Base, Offset) of + {[Seg|Rest], [T|Ts], S1} -> + {S2, NewAlign} = bitstr(Seg, [T], Ctxt, Env, S1, Align, + Base, Offset), + binary_segments(Rest, Ts, Ctxt, Env, S2, NewAlign, Base, Offset); + {[], [], S1} -> + S1 + end. + +do_const_segs(SegList, TList, S, _Align, Base, Offset) -> + case get_segs(SegList, TList, [], 0, {[], SegList, TList}) of + {[], SegList, TList} -> + {SegList, TList, S}; + {ConstSegs, RestSegs, RestT} -> + String = create_string(ConstSegs, <<>>, 0), + Name = {bs_put_string, String, length(String)}, + Primop = {hipe_bs_primop, Name}, + {RestSegs, RestT, + add_code([icode_call_primop([Offset], Primop, [Base, Offset])], + S)} + end. + +get_segs([Seg|Rest], [_|RestT], Acc, AccSize, BestPresent) -> + Size = cerl:bitstr_size(Seg), + Unit = cerl:bitstr_unit(Seg), + Val = cerl:bitstr_val(Seg), + case allowed(Size, Unit, Val, AccSize) of + {true, NewAccSize} -> + case Acc of + [] -> + get_segs(Rest, RestT, [Seg|Acc], NewAccSize, BestPresent); + _ -> + get_segs(Rest, RestT, [Seg|Acc], NewAccSize, + {lists:reverse([Seg|Acc]), Rest, RestT}) + end; + {possible, NewAccSize} -> + get_segs(Rest, RestT, [Seg|Acc], NewAccSize, BestPresent); + false -> + BestPresent + end; +get_segs([], [], _Acc, _AccSize, Best) -> + Best. + + +create_string([Seg|Rest], Bin, TotalSize) -> + Size = cerl:bitstr_size(Seg), + Unit = cerl:bitstr_unit(Seg), + NewSize = cerl:int_val(Size) * cerl:int_val(Unit), + LitVal = cerl:concrete(cerl:bitstr_val(Seg)), + LiteralFlags = cerl:bitstr_flags(Seg), + FlagVal = translate_flags(LiteralFlags, []), + NewTotalSize = NewSize + TotalSize, + Pad = (8 - NewTotalSize rem 8) rem 8, + NewBin = case cerl:concrete(cerl:bitstr_type(Seg)) of + integer -> + case {FlagVal band 2, FlagVal band 4} of + {2, 4} -> + <>; + {0, 4} -> + <>; + {2, 0} -> + <>; + {0, 0} -> + <> + end; + float -> + case FlagVal band 2 of + 2 -> + <>; + 0 -> + <> + end + end, + create_string(Rest, NewBin, NewTotalSize); + +create_string([], Bin, _Size) -> + binary_to_list(Bin). + +allowed(Size, Unit, Val, AccSize) -> + case {cerl:is_c_int(Size), cerl:is_literal(Val)} of + {true, true} -> + NewAccSize = cerl:int_val(Size) * cerl:int_val(Unit) + AccSize, + case NewAccSize rem 8 of + 0 -> + {true, NewAccSize}; + _ -> + {possible, NewAccSize} + end; + _ -> + false + end. + +bitstr(E, Ts, Ctxt, Env, S, Align, Base, Offset) -> + Size = cerl:bitstr_size(E), + Unit = cerl:bitstr_unit(E), + LiteralFlags = cerl:bitstr_flags(E), + Val = cerl:bitstr_val(E), + Type = cerl:concrete(cerl:bitstr_type(E)), + S0 = expr(Val, Ts, Ctxt#ctxt{final = false, effect = false}, Env, S), + ConstInfo = get_const_info(Val, Type), + Flags = translate_flags(LiteralFlags, Align), + SizeInfo = calculate_size(Unit, Size, false, Env, S0), + bitstr_gen_op(Ts, Ctxt, SizeInfo, ConstInfo, Type, Flags, Base, Offset). + +bitstr_gen_op([V], #ctxt{fail=FL, class=guard}, SizeInfo, ConstInfo, + Type, Flags, Base, Offset) -> + SL = new_label(), + case SizeInfo of + {all,_NewUnit, NewAlign, S1} -> + Type = binary, + Name = {bs_put_binary_all, Flags}, + Primop = {hipe_bs_primop, Name}, + {add_code([icode_guardop([Offset], Primop, + [V, Base, Offset], SL, FL), + icode_label(SL)], S1), NewAlign}; + {NewUnit, NewArgs, S1, NewAlign} -> + Args = [V|NewArgs] ++ [Base, Offset], + Name = + case Type of + integer -> + {bs_put_integer, NewUnit, Flags, ConstInfo}; + float -> + {bs_put_float, NewUnit, Flags, ConstInfo}; + binary -> + {bs_put_binary, NewUnit, Flags} + end, + Primop = {hipe_bs_primop, Name}, + {add_code([icode_guardop([Offset], Primop, Args, SL, FL), + icode_label(SL)], S1), NewAlign} + end; +bitstr_gen_op([V], _Ctxt, SizeInfo, ConstInfo, Type, Flags, Base, + Offset) -> + case SizeInfo of + {all, _NewUnit, NewAlign, S} -> + Type = binary, + Name = {bs_put_binary_all, Flags}, + Primop = {hipe_bs_primop, Name}, + {add_code([icode_call_primop([Offset], Primop, + [V, Base, Offset])], S), + NewAlign}; + {NewUnit, NewArgs, S, NewAlign} -> + Args = [V|NewArgs] ++ [Base, Offset], + Name = + case Type of + integer -> + {bs_put_integer, NewUnit, Flags, ConstInfo}; + float -> + {bs_put_float, NewUnit, Flags, ConstInfo}; + binary -> + {bs_put_binary, NewUnit, Flags} + end, + Primop = {hipe_bs_primop, Name}, + {add_code([icode_call_primop([Offset], Primop, Args)], S), + NewAlign} + end. + +%% --------------------------------------------------------------------- +%% Apply-expressions + +%% Note that the arity of the called function only depends on the length +%% of the argument list; the arity stated by the function name is +%% ignored. + +expr_apply(E, Ts, Ctxt, Env, S) -> + Op = cerl_lib:reduce_expr(cerl:apply_op(E)), + {Vs, S1} = expr_list(cerl:apply_args(E), Ctxt, Env, S), + case cerl:is_c_var(Op) of + true -> + case cerl:var_name(Op) of + {N, A} = V when is_atom(N), is_integer(A) -> + case env__lookup(V, Env) of + error -> + %% Assumed to be a function in the + %% current module; we don't check. + add_local_call(V, Vs, Ts, Ctxt, S1); + {ok, #'fun'{label = L, vars = Vs1}} -> + %% Call to a local letrec-bound function. + add_letrec_call(L, Vs1, Vs, Ctxt, S1); + {ok, #cerl_to_icode__var{}} -> + error_msg("cannot call via variable; must " + "be closure converted: ~P.", + [V, 5]), + throw(error) + end; + _ -> + error_nonlocal_application(Op), + throw(error) + end; + false -> + error_nonlocal_application(Op), + throw(error) + end. + +%% --------------------------------------------------------------------- +%% Call-expressions + +%% Unless we know the module and function names statically, we have to +%% go through the meta-call operator for a static number of arguments. + +expr_call(E, Ts, Ctxt, Env, S) -> + Module = cerl_lib:reduce_expr(cerl:call_module(E)), + Name = cerl_lib:reduce_expr(cerl:call_name(E)), + case cerl:is_c_atom(Module) and cerl:is_c_atom(Name) of + true -> + M = cerl:atom_val(Module), + F = cerl:atom_val(Name), + {Vs, S1} = expr_list(cerl:call_args(E), Ctxt, Env, S), + add_code(make_call(M, F, Ts, Vs, Ctxt), S1); + false -> + Args = cerl:call_args(E), + N = length(Args), + {Vs, S1} = expr_list([Module, Name | Args], Ctxt, Env, S), + add_code(make_op(?OP_APPLY_FIXARITY(N), Ts, Vs, Ctxt), S1) + end. + +%% --------------------------------------------------------------------- +%% Primop calls + +%% Core Erlang primop calls are generally mapped directly to Icode +%% primop calls, with a few exceptions (listed above), which are +%% expanded inline, sometimes depending on context. Note that primop +%% calls do not have specialized tail-call forms. + +expr_primop(E, Ts, Ctxt, Env, S) -> + Name = cerl:atom_val(cerl:primop_name(E)), + As = cerl:primop_args(E), + Arity = length(As), + expr_primop_0(Name, Arity, As, E, Ts, Ctxt, Env, S). + +expr_primop_0(Name, Arity, As, E, Ts, #ctxt{effect = true} = Ctxt, Env, + S) -> + case is_safe_op(Name, Arity) of + true -> + %% Just drop the operation; cf. 'expr_values(...)'. + {_, S1} = expr_list(As, Ctxt, Env, S), + S1; + false -> + expr_primop_1(Name, Arity, As, E, Ts, + Ctxt#ctxt{effect = false}, Env, S) + end; +expr_primop_0(Name, Arity, As, E, Ts, Ctxt, Env, S) -> + expr_primop_1(Name, Arity, As, E, Ts, Ctxt, Env, S). + +%% Some primops must be caught before their arguments are visited. + +expr_primop_1(?PRIMOP_MAKE_FUN, 6, As, _E, Ts, Ctxt, Env, S) -> + primop_make_fun(As, Ts, Ctxt, Env, S); +expr_primop_1(?PRIMOP_APPLY_FUN, 2, As, _E, Ts, Ctxt, Env, S) -> + primop_apply_fun(As, Ts, Ctxt, Env, S); +expr_primop_1(?PRIMOP_FUN_ELEMENT, 2, As, _E, Ts, Ctxt, Env, S) -> + primop_fun_element(As, Ts, Ctxt, Env, S); +expr_primop_1(?PRIMOP_DSETELEMENT, 3, As, _E, Ts, Ctxt, Env, S) -> + primop_dsetelement(As, Ts, Ctxt, Env, S); +expr_primop_1(?PRIMOP_RECEIVE_SELECT, 0, _As, _E, Ts, Ctxt, _Env, S) -> + primop_receive_select(Ts, Ctxt, S); +expr_primop_1(?PRIMOP_RECEIVE_NEXT, 0, _As, _E, _Ts, Ctxt, _Env, S) -> + primop_receive_next(Ctxt, S); +%%expr_primop_1(?PRIMOP_IDENTITY, 1, [A], _E, Ts, Ctxt, Env, S) -> +%% expr(A, Ts, Ctxt, Env, S); % used for unary plus +expr_primop_1(?PRIMOP_NEG, 1, [A], _, Ts, Ctxt, Env, S) -> + E = cerl:c_primop(cerl:c_atom('-'), [cerl:c_int(0), A]), + expr_primop(E, Ts, Ctxt, Env, S); +expr_primop_1(?PRIMOP_GOTO_LABEL, 1, [A], _, _Ts, _Ctxt, _Env, S) -> + primop_goto_label(A, S); +expr_primop_1(?PRIMOP_REDUCTION_TEST, 0, [], _, _Ts, Ctxt, _Env, S) -> + primop_reduction_test(Ctxt, S); +expr_primop_1(Name, Arity, As, E, Ts, Ctxt, Env, S) -> + case is_pure_op_aux(Name, Arity) of + true -> + boolean_expr(E, Ts, Ctxt, Env, S); + false -> + {Vs, S1} = expr_list(As, Ctxt, Env, S), + expr_primop_2(Name, Arity, Vs, Ts, Ctxt, S1) + end. + +expr_primop_2(?PRIMOP_ELEMENT, 2, Vs, Ts, Ctxt, S) -> + add_code(make_op(?OP_ELEMENT, Ts, Vs, Ctxt), S); +expr_primop_2(?PRIMOP_BS_CONTEXT_TO_BINARY, 1, Vs, Ts, Ctxt, S) -> + add_code(make_op(?OP_BS_CONTEXT_TO_BINARY, Ts, Vs, Ctxt), S); +expr_primop_2(?PRIMOP_EXIT, 1, [V], _Ts, Ctxt, S) -> + add_exit(V, Ctxt, S); +expr_primop_2(?PRIMOP_THROW, 1, [V], _Ts, Ctxt, S) -> + add_throw(V, Ctxt, S); +expr_primop_2(?PRIMOP_ERROR, 1, [V], _Ts, Ctxt, S) -> + add_error(V, Ctxt, S); +expr_primop_2(?PRIMOP_ERROR, 2, [V, F], _Ts, Ctxt, S) -> + add_error(V, F, Ctxt, S); +expr_primop_2(?PRIMOP_RETHROW, 2, [E, V], _Ts, Ctxt, S) -> + add_rethrow(E, V, Ctxt, S); +expr_primop_2(Name, _Arity, Vs, Ts, Ctxt, S) -> + %% Other ops are assumed to be recognized by the backend. + add_code(make_op(Name, Ts, Vs, Ctxt), S). + +%% All of M, F, and A must be literals with the right types. +%% V must represent a proper list. + +primop_make_fun([M, F, A, H, I, V] = As, [_T] = Ts, Ctxt, Env, S) -> + case cerl:is_c_atom(M) and + cerl:is_c_atom(F) and + cerl:is_c_int(A) and + cerl:is_c_int(H) and + cerl:is_c_int(I) and + cerl:is_c_list(V) of + true -> + Module = cerl:atom_val(M), + Name = cerl:atom_val(F), + Arity = cerl:int_val(A), + Hash = cerl:int_val(H), + Index = cerl:int_val(I), + {Vs, S1} = expr_list(cerl:list_elements(V), + Ctxt, Env, S), + add_code(make_op(?OP_MAKE_FUN(Module, Name, Arity, + Hash, Index), + Ts, Vs, Ctxt), + S1); + false -> + error_primop_badargs(?PRIMOP_MAKE_FUN, As), + throw(error) + end. + +%% V must represent a proper list. + +primop_apply_fun([F, V] = As, [_T] = Ts, Ctxt, Env, S) -> + case cerl:is_c_list(V) of + true -> + %% Note that the closure itself is passed as the last value. + {Vs, S1} = expr_list(cerl:list_elements(V) ++ [F], + Ctxt, Env, S), + case Ctxt#ctxt.final of + false -> + add_code([icode_call_fun(Ts, Vs)], S1); + true -> + add_code([icode_enter_fun(Vs)], S1) + end; + false -> + error_primop_badargs(?PRIMOP_APPLY_FUN, As), + throw(error) + end. + +primop_fun_element([N, F] = As, Ts, Ctxt, Env, S) -> + case cerl:is_c_int(N) of + true -> + V = make_var(), + S1 = expr(F, [V], Ctxt#ctxt{final = false, effect = false}, + Env, S), + add_code(make_op(?OP_FUN_ELEMENT(cerl:int_val(N)), + Ts, [V], Ctxt), + S1); + false -> + error_primop_badargs(?PRIMOP_FUN_ELEMENT, As), + throw(error) + end. + +primop_goto_label(A, S) -> + {Label,S1} = s__get_label(A, S), + add_code([icode_goto(Label)], S1). + +is_goto(E) -> + case cerl:type(E) of + primop -> + Name = cerl:atom_val(cerl:primop_name(E)), + As = cerl:primop_args(E), + Arity = length(As), + case {Name, Arity} of + {?PRIMOP_GOTO_LABEL, 1} -> + true; + _ -> + false + end; + _ -> + false + end. + +primop_reduction_test(Ctxt, S) -> + add_code(make_op(?OP_REDTEST, [], [], Ctxt), S). + +primop_dsetelement([N | As1] = As, Ts, Ctxt, Env, S) -> + case cerl:is_c_int(N) of + true -> + {Vs, S1} = expr_list(As1, Ctxt, Env, S), + add_code(make_op(?OP_UNSAFE_SETELEMENT(cerl:int_val(N)), + Ts, Vs, Ctxt), + S1); + false -> + error_primop_badargs(?PRIMOP_DSETELEMENT, As), + throw(error) + end. + +%% --------------------------------------------------------------------- +%% Try-expressions: + +%% We want to rewrite trivial things like `try A of X -> B catch ...', +%% where A is safe, into a simple let-binding `let X = A in B', avoiding +%% unnecessary try-blocks. (The `let' might become further simplified.) + +expr_try(E, Ts, Ctxt, Env, S) -> + F = fun (BF, CtxtF, EnvF, SF) -> expr(BF, Ts, CtxtF, EnvF, SF) end, + expr_try_1(E, F, Ctxt, Env, S). + +expr_try_1(E, F, Ctxt, Env, S) -> + A = cerl:try_arg(E), + case is_safe_expr(A) of + true -> + E1 = cerl:c_let(cerl:try_vars(E), A, cerl:try_body(E)), + expr_let_1(E1, F, Ctxt, Env, S); + false -> + expr_try_2(E, F, Ctxt, Env, S) + end. + +%% TODO: maybe skip begin_try/end_try and just use fail-labels... + +expr_try_2(E, F, Ctxt, Env, S) -> + Cont = new_continuation_label(Ctxt), + Catch = new_label(), + Next = new_label(), + S1 = add_code([icode_begin_try(Catch,Next),icode_label(Next)], S), + Vars = cerl:try_vars(E), + Vs = make_vars(length(Vars)), + Ctxt1 = Ctxt#ctxt{final = false}, + S2 = expr(cerl:try_arg(E), Vs, Ctxt1, Env, S1), + Env1 = bind_vars(Vars, Vs, Env), + S3 = add_code([icode_end_try()], S2), + S4 = F(cerl:try_body(E), Ctxt, Env1, S3), + S5 = add_continuation_jump(Cont, Ctxt, S4), + EVars = cerl:try_evars(E), + EVs = make_vars(length(EVars)), + Env2 = bind_vars(EVars, EVs, Env), + S6 = add_code([icode_label(Catch), icode_begin_handler(EVs)], S5), + S7 = F(cerl:try_handler(E), Ctxt, Env2, S6), + add_continuation_label(Cont, Ctxt, S7). + +%% --------------------------------------------------------------------- +%% Letrec-expressions (local goto-labels) + +%% We only handle letrec-functions as continuations. The fun-bodies are +%% always compiled in the same context as the main letrec-body. Note +%% that we cannot propagate "advanced" contexts like boolean-compilation +%% into the letrec body like we do for ordinary lets or seqs, since the +%% context for an individual local function would be depending on the +%% contexts of its call sites. + +expr_letrec(E, Ts, Ctxt, Env, S) -> + Ds = cerl:letrec_defs(E), + Env1 = add_defs(Ds, Env), + S1 = expr(cerl:letrec_body(E), Ts, Ctxt, Env1, S), + Next = new_continuation_label(Ctxt), + S2 = add_continuation_jump(Next, Ctxt, S1), + S3 = defs(Ds, Ts, Ctxt, Env1, S2), + add_continuation_label(Next, Ctxt, S3). + +add_defs([{V, _F} | Ds], Env) -> + {_, A} = cerl:var_name(V), + Vs = make_vars(A), + L = new_label(), + Env1 = bind_fun(V, L, Vs, Env), + add_defs(Ds, Env1); +add_defs([], Env) -> + Env. + +defs([{V, F} | Ds], Ts, Ctxt, Env, S) -> + Name = cerl:var_name(V), + #'fun'{label = L, vars = Vs} = env__get(Name, Env), + S1 = add_code([icode_label(L)], S), + Env1 = bind_vars(cerl:fun_vars(F), Vs, Env), + S2 = expr(cerl:fun_body(F), Ts, Ctxt, Env1, S1), + defs(Ds, Ts, Ctxt, Env, S2); +defs([], _Ts, _Ctxt, _Env, S) -> + S. + +%% --------------------------------------------------------------------- +%% Receive-expressions + +%% There may only be exactly one clause, which must be a trivial +%% catch-all with exactly one (variable) pattern. Each message will be +%% read from the mailbox and bound to the pattern variable; the body of +%% the clause must do the switching and call either of the primops +%% `receive_select/0' or `receive_next/0'. + +expr_receive(E, Ts, Ctxt, Env, S) -> + F = fun (BF, CtxtF, EnvF, SF) -> expr(BF, Ts, CtxtF, EnvF, SF) end, + expr_receive_1(E, F, Ctxt, Env, S). + +expr_receive_1(E, F, Ctxt, Env, S) -> + case cerl:receive_clauses(E) of + [C] -> + case cerl:clause_pats(C) of + [_] -> + case cerl_clauses:is_catchall(C) of + true -> + expr_receive_2(C, E, F, Ctxt, Env, S); + false -> + error_msg("receive-expression clause " + "must be a catch-all."), + throw(error) + end; + _ -> + error_msg("receive-expression clause must " + "have exactly one pattern."), + throw(error) + end; + _ -> + error_msg("receive-expressions must have " + "exactly one clause."), + throw(error) + end. + +%% There are a number of primitives to do the work involved in receiving +%% messages: +%% +%% if-tests: suspend_msg_timeout() +%% +%% primops: V = check_get_msg() +%% select_msg() +%% next_msg() +%% set_timeout(T) +%% clear_timeout() +%% suspend_msg() +%% +%% `check_get_msg' tests if the mailbox is empty or not, and if not it +%% reads the message currently pointed to by the implicit message pointer. +%% `select_msg' removes the current message from the mailbox, resets the +%% message pointer and clears any timeout. `next_msg' advances the +%% message pointer but does nothing else. `set_timeout(T)' sets up the +%% timeout mechanism *unless it is already set*. `suspend_msg' suspends +%% until a message has arrived and does not check for timeout. The test +%% `suspend_msg_timeout' suspends the process and upon resuming +%% execution selects the `true' branch if a message has arrived and the +%% `false' branch otherwise. `clear_timeout' resets the message pointer +%% when a timeout has occurred (the name is somewhat misleading). +%% +%% Note: the receiving of a message must be performed so that the +%% message pointer is always reset when the receive is done; thus, all +%% paths must go through either `select_msg' or `clear_timeout'. + +%% Recall that the `final' and `effect' context flags distribute over +%% the clauses *and* the timeout action (but not over the +%% timeout-expression, which is always executed for its value). + +%% This is the code we generate for a full receive: +%% +%% Loop: check_get_msg(Match, Wait) +%% Wait: set_timeout +%% suspend_msg_timeout(Loop, Timeout) +%% Timeout: clear_timeout +%% TIMEOUT-ACTION +%% goto Next +%% Match: RECEIVE-CLAUSES(Loop, Next) +%% Next: ... +%% +%% For a receive with infinity timout, we generate +%% +%% Wait: suspend_msg +%% goto Loop +%% +%% For a receive with zero timout, we generate +%% +%% Wait: clear_timeout +%% TIMEOUT-ACTION +%% goto Next + +expr_receive_2(C, E, F, Ctxt, Env, S0) -> + Expiry = cerl_lib:reduce_expr(cerl:receive_timeout(E)), + After = case cerl:is_literal(Expiry) of + true -> + cerl:concrete(Expiry); + false -> + undefined + end, + T = make_var(), % T will hold the timeout value + %% It would be harmless to generate code for `infinity', but we + %% might as well avoid it if we can. + S1 = if After =:= 'infinity' -> S0; + true -> + expr(Expiry, [T], + Ctxt#ctxt{final = false, effect = false}, + Env, S0) + end, + + %% This is the top of the receive-loop, which checks if the + %% mailbox is empty, and otherwise reads the next message. + Loop = new_label(), + Wait = new_label(), + Match = new_label(), + V = make_var(), + S2 = add_code([icode_label(Loop), + icode_call_primop([V], ?OP_CHECK_GET_MESSAGE, [], + Match, Wait), + icode_label(Wait)], S1), + + %% The wait-for-message section looks a bit different depending on + %% whether we actually need to set a timer or not. + Ctxt0 = #ctxt{}, + S3 = case After of + 'infinity' -> + %% Only wake up when we get new messages, and never + %% execute the expiry body. + add_code(make_op(?OP_WAIT_FOR_MESSAGE, [], [], Ctxt0) + ++ [icode_goto(Loop)], S2); + 0 -> + %% Zero limit - reset the message pointer (this is what + %% "clear timeout" does) and execute the expiry body. + add_code(make_op(?OP_CLEAR_TIMEOUT, [], [], Ctxt0), + S2); + _ -> + %% Other value - set the timer (if it is already set, + %% nothing is changed) and wait for a message or + %% timeout. Reset the message pointer upon timeout. + Timeout = new_label(), + add_code(make_op(?OP_SET_TIMEOUT, [], [T], Ctxt0) + ++ [make_if(?TEST_WAIT_FOR_MESSAGE_OR_TIMEOUT, + [], Loop, Timeout), + icode_label(Timeout)] + ++ make_op(?OP_CLEAR_TIMEOUT, [], [], Ctxt0), + S2) + end, + + %% We never generate code for the expiry body if the timeout value + %% is 'infinity' (and thus we know that it will not be used), mainly + %% because in this case it is possible (and legal) for the expiry + %% body to not have the expected degree. (Typically, it produces a + %% single constant value such as 'true', while the clauses may be + %% producing 2 or more values.) + Next = new_continuation_label(Ctxt), + S4 = if After =:= 'infinity' -> S3; + true -> + add_continuation_jump(Next, Ctxt, + F(cerl:receive_action(E), Ctxt, + Env, S3)) + end, + + %% When we compile the primitive operations that select the current + %% message or loop to try the next message (see the functions + %% 'primop_receive_next' and 'primop_receive_select'), we will use + %% the receive-loop label in the context (i.e., that of the nearest + %% enclosing receive expression). + Ctxt1 = Ctxt#ctxt{'receive' = #'receive'{loop = Loop}}, + + %% The pattern variable of the clause will be mapped to `V', which + %% holds the message, so it can be accessed in the clause body: + S5 = clauses([C], F, [V], Ctxt1, Env, + add_code([icode_label(Match)], S4)), + add_continuation_label(Next, Ctxt, S5). + +%% Primops supporting "expanded" receive-expressions on the Core level: + +primop_receive_next(#ctxt{'receive' = R} = Ctxt, S0) -> + case R of + #'receive'{loop = Loop} -> + %% Note that this has the same "problem" as the fail + %% instruction (see the 'add_fail' function), namely, that + %% it unexpectedly ends a basic block. The solution is the + %% same - add a dummy label if necessary. + S1 = add_code(make_op(?OP_NEXT_MESSAGE, [], [], #ctxt{}) + ++ [icode_goto(Loop)], S0), + add_new_continuation_label(Ctxt, S1); + _ -> + error_not_in_receive(?PRIMOP_RECEIVE_NEXT), + throw(error) + end. + +primop_receive_select(Ts, #ctxt{'receive' = R} = Ctxt, S) -> + case R of + #'receive'{} -> + add_code(make_op(?OP_SELECT_MESSAGE, Ts, [], Ctxt), S); + _ -> + error_not_in_receive(?PRIMOP_RECEIVE_SELECT), + throw(error) + end. + +%% --------------------------------------------------------------------- +%% Case expressions + +%% Typically, pattern matching compilation has split all switches into +%% separate groups of tuples, integers, atoms, etc., where each such +%% switch over a group of constructors is protected by a type test. +%% Thus, it is straightforward to generate switch instructions. (If no +%% pattern matching compilation has been done, we don't care about +%% efficiency anyway, so we don't spend any extra effort here.) + +expr_case(E, Ts, Ctxt, Env, S) -> + F = fun (BF, CtxtF, EnvF, SF) -> expr(BF, Ts, CtxtF, EnvF, SF) end, + expr_case_1(E, F, Ctxt, Env, S). + +expr_case_1(E, F, Ctxt, Env, S) -> + Cs = cerl:case_clauses(E), + A = cerl:case_arg(E), + case cerl_lib:is_bool_switch(Cs) of + true -> + %% An if-then-else with a known boolean argument + {True, False} = cerl_lib:bool_switch_cases(Cs), + bool_switch(A, True, False, F, Ctxt, Env, S); + false -> + Vs = make_vars(cerl:clause_arity(hd(Cs))), + Ctxt1 = Ctxt#ctxt{final = false, effect = false}, + S1 = expr(A, Vs, Ctxt1, Env, S), + expr_case_2(Vs, Cs, F, Ctxt, Env, S1) + end. + +%% Switching on a value + +expr_case_2(Vs, Cs, F, Ctxt, Env, S1) -> + case is_constant_switch(Cs) of + true -> + switch_val_clauses(Cs, F, Vs, Ctxt, Env, S1); + false -> + case is_tuple_switch(Cs) of + true -> + switch_tuple_clauses(Cs, F, Vs, Ctxt, Env, S1); + false -> + case is_binary_switch(Cs, S1) of + true -> + switch_binary_clauses(Cs, F, Vs, Ctxt, Env, S1); + false -> + clauses(Cs, F, Vs, Ctxt, Env, S1) + end + end + end. + +%% Check if a list of clauses represents a switch over a number (more +%% than 1) of constants (integers or atoms), or tuples (whose elements +%% are all variables) + +is_constant_switch(Cs) -> + is_switch(Cs, fun (P) -> (cerl:type(P) =:= literal) andalso + (is_integer(cerl:concrete(P)) + orelse is_atom(cerl:concrete(P))) end). + +is_tuple_switch(Cs) -> + is_switch(Cs, fun (P) -> cerl:is_c_tuple(P) andalso + all_vars(cerl:tuple_es(P)) end). + +is_binary_switch(Cs, S) -> + case s__get_pmatch(S) of + False when False =:= false; False =:= undefined -> + false; + Other when Other =:= duplicate_all; Other =:= no_duplicates; Other =:= true-> + is_binary_switch1(Cs, 0) + end. + +is_binary_switch1([C|Cs], N) -> + case cerl:clause_pats(C) of + [P] -> + case cerl:is_c_binary(P) of + true -> + is_binary_switch1(Cs, N + 1); + false -> + %% The final clause may be a catch-all. + Cs =:= [] andalso N > 0 andalso cerl:type(P) =:= var + end; + _ -> + false + end; +is_binary_switch1([], N) -> + N > 0. + +all_vars([E | Es]) -> + case cerl:is_c_var(E) of + true -> all_vars(Es); + false -> false + end; +all_vars([]) -> true. + +is_switch(Cs, F) -> + is_switch(Cs, F, 0). + +is_switch([C | Cs], F, N) -> + case cerl_lib:is_simple_clause(C) of + true -> + [P] = cerl:clause_pats(C), + case F(P) of + true -> + is_switch(Cs, F, N + 1); + false -> + %% The final clause may be a catch-all. + Cs =:= [] andalso N > 1 andalso cerl:type(P) =:= var + end; + false -> false + end; +is_switch([], _F, N) -> + N > 1. + +switch_val_clauses(Cs, F, Vs, Ctxt, Env, S) -> + switch_clauses(Cs, F, Vs, Ctxt, Env, + fun (P) -> cerl:concrete(P) end, + fun icode_switch_val/4, + fun val_clause_body/9, + S). + +val_clause_body(_N, _V, C, F, Next, _Fail, Ctxt, Env, S) -> + clause_body(C, F, Next, Ctxt, Env, S). + +switch_tuple_clauses(Cs, F, Vs, Ctxt, Env, S) -> + switch_clauses(Cs, F, Vs, Ctxt, Env, + fun (P) -> cerl:tuple_arity(P) end, + fun icode_switch_tuple_arity/4, + fun tuple_clause_body/9, + S). + +tuple_clause_body(N, V, C, F, Next, Fail, Ctxt, Env, S0) -> + Vs = make_vars(N), + S1 = tuple_elements(Vs, V, S0), + Es = cerl:tuple_es(hd(cerl:clause_pats(C))), + {Env1, S2} = patterns(Es, Vs, Fail, Env, S1), + clause_body(C, F, Next, Ctxt, Env1, S2). + +switch_clauses(Cs, F, [V], Ctxt, Env, GetVal, Switch, Body, S0) -> + Cs1 = [switch_clause(C, GetVal) || C <- Cs], + Cases = [{Val, L} || {Val, L, _} <- Cs1], + Default = [C || {default, C} <- Cs1], + Fail = new_label(), + S1 = add_code([Switch(V, Fail, length(Cases), Cases)], S0), + Next = new_continuation_label(Ctxt), + S3 = case Default of + [] -> add_default_case(Fail, Ctxt, S1); + [C] -> + %% Bind the catch-all variable (this always succeeds) + {Env1, S2} = patterns(cerl:clause_pats(C), [V], Fail, + Env, S1), + clause_body(C, F, Next, Ctxt, Env1, + add_code([icode_label(Fail)], S2)) + end, + S4 = switch_cases(Cs1, V, F, Next, Fail, Ctxt, Env, Body, S3), + add_continuation_label(Next, Ctxt, S4). + +switch_clause(C, F) -> + [P] = cerl:clause_pats(C), + L = new_label(), + case cerl:type(P) of + var -> {default, C}; + _ -> {icode_const(F(P)), L, C} + end. + +switch_binary_clauses(Cs, F, Vs, Ctxt, Env, S) -> + {Bins, Default} = get_binary_clauses(Cs), + Fail = new_label(), + Next = new_continuation_label(Ctxt), + S1 = binary_match(Bins, F, Vs, Next, Fail, Ctxt, Env, S), + S2 = case Default of + [] -> add_default_case(Fail, Ctxt, S1); + [C] -> + clause_body(C, F, Next, Ctxt, Env, + add_code([icode_label(Fail)], S1)) + end, + add_continuation_label(Next, Ctxt, S2). + +get_binary_clauses(Cs) -> + get_binary_clauses(Cs, []). + +get_binary_clauses([C|Cs], Acc) -> + [P] = cerl:clause_pats(C), + case cerl:is_c_binary(P) of + true -> + get_binary_clauses(Cs, [C|Acc]); + false -> + {lists:reverse(Acc),[C]} + end; +get_binary_clauses([], Acc) -> + {lists:reverse(Acc),[]}. + +switch_cases([{N, L, C} | Cs], V, F, Next, Fail, Ctxt, Env, Body, S0) -> + S1 = add_code([icode_label(L)], S0), + S2 = Body(icode_const_val(N), V, C, F, Next, Fail, Ctxt, Env, S1), + switch_cases(Cs, V, F, Next, Fail, Ctxt, Env, Body, S2); +switch_cases([_ | Cs], V, F, Next, Fail, Ctxt, Env, Body, S) -> + switch_cases(Cs, V, F, Next, Fail, Ctxt, Env, Body, S); +switch_cases([], _V, _F, _Next, _Fail, _Ctxt, _Env, _Body, S) -> + S. + +%% Recall that the `final' and `effect' context flags distribute over +%% the clause bodies. + +clauses(Cs, F, Vs, Ctxt, Env, S) -> + Next = new_continuation_label(Ctxt), + S1 = clauses_1(Cs, F, Vs, undefined, Next, Ctxt, Env, S), + add_continuation_label(Next, Ctxt, S1). + +clauses_1([C | Cs], F, Vs, Fail, Next, Ctxt, Env, S) -> + case cerl_clauses:is_catchall(C) of + true -> + %% The fail label will not actually be used in this case. + clause(C, F, Vs, Fail, Next, Ctxt, Env, S); + false -> + %% The previous `Fail' is not used here. + Fail1 = new_label(), + S1 = clause(C, F, Vs, Fail1, Next, Ctxt, Env, S), + S2 = add_code([icode_label(Fail1)], S1), + clauses_1(Cs, F, Vs, Fail1, Next, Ctxt, Env, S2) + end; +clauses_1([], _F, _Vs, Fail, _Next, Ctxt, _Env, S) -> + if Fail =:= undefined -> + L = new_label(), + add_default_case(L, Ctxt, S); + true -> + add_code([icode_goto(Fail)], S) % use existing label + end. + +%% The exact behaviour if all clauses fail is undefined; we generate an +%% 'internal_error' exception if this happens, which is safe and will +%% not get in the way of later analyses. (Continuing execution after the +%% `case', as in a C `switch' statement, would add a new possible path +%% to the program, which could destroy program properties.) Note that +%% this code is only generated if some previous stage has created a +%% switch over clauses without a final catch-all; this could be both +%% legal and non-redundant, e.g. if the last clause does pattern +%% matching to extract components of a (known) constructor. The +%% generated default-case code *should* be unreachable, but we need it +%% in order to have a safe fail-label. + +add_default_case(L, Ctxt, S) -> + S1 = add_code([icode_label(L)], S), + add_error(icode_const(internal_error), Ctxt, S1). + +clause(C, F, Vs, Fail, Next, Ctxt, Env, S) -> + G = cerl:clause_guard(C), + case cerl_clauses:eval_guard(G) of + {value, true} -> + {Env1, S1} = patterns(cerl:clause_pats(C), Vs, Fail, Env, + S), + clause_body(C, F, Next, Ctxt, Env1, S1); + {value, false} -> + add_code([icode_goto(Fail)], S); + _ -> + {Env1, S1} = patterns(cerl:clause_pats(C), Vs, Fail, Env, + S), + Succ = new_label(), + Ctxt1 = Ctxt#ctxt{final = false, + fail = Fail, + class = guard}, + S2 = boolean(G, Succ, Fail, Ctxt1, Env1, S1), + S3 = add_code([icode_label(Succ)], S2), + clause_body(C, F, Next, Ctxt, Env1, S3) + end. + +clause_body(C, F, Next, Ctxt, Env, S) -> + %% This check is inserted as a goto is always final + case is_goto(cerl:clause_body(C)) of + true -> + F(cerl:clause_body(C), Ctxt, Env, S); + false -> + S1 = F(cerl:clause_body(C), Ctxt, Env, S), + add_continuation_jump(Next, Ctxt, S1) + end. + +patterns([P | Ps], [V | Vs], Fail, Env, S) -> + {Env1, S1} = pattern(P, V, Fail, Env, S), + patterns(Ps, Vs, Fail, Env1, S1); +patterns([], [], _, Env, S) -> + {Env, S}. + +pattern(P, V, Fail, Env, S) -> + case cerl:type(P) of + var -> + {bind_var(P, V, Env), S}; + alias -> + {Env1, S1} = pattern(cerl:alias_pat(P), V, + Fail, Env, S), + {bind_var(cerl:alias_var(P), V, Env1), S1}; + literal -> + {Env, literal_pattern(P, V, Fail, S)}; + cons -> + cons_pattern(P, V, Fail, Env, S); + tuple -> + tuple_pattern(P, V, Fail, Env, S); + binary -> + binary_pattern(P, V, Fail, Env, S) + end. + +literal_pattern(P, V, Fail, S) -> + L = new_label(), + S1 = literal_pattern_1(P, V, Fail, L, S), + add_code([icode_label(L)], S1). + +literal_pattern_1(P, V, Fail, Next, S) -> + case cerl:concrete(P) of + X when is_atom(X) -> + add_code([make_type([V], ?TYPE_ATOM(X), Next, Fail)], + S); + X when is_integer(X) -> + add_code([make_type([V], ?TYPE_INTEGER(X), Next, Fail)], + S); + X when is_float(X) -> + V1 = make_var(), + L = new_label(), + %% First doing an "is float" test here might allow later + %% stages to use a specialized equality test. + add_code([make_type([V], ?TYPE_IS_FLOAT, L, Fail), + icode_label(L), + icode_move(V1, icode_const(X)), + make_if(?TEST_EQ, [V, V1], Next, Fail)], + S); + [] -> + add_code([make_type([V], ?TYPE_NIL, Next, Fail)], S); + X -> + %% Compound constants are compared with the generic exact + %% equality test. + V1 = make_var(), + add_code([icode_move(V1, icode_const(X)), + make_if(?TEST_EXACT_EQ, [V, V1], Next, Fail)], + S) + end. + +cons_pattern(P, V, Fail, Env, S) -> + V1 = make_var(), + V2 = make_var(), + Next = new_label(), + Ctxt = #ctxt{}, + S1 = add_code([make_type([V], ?TYPE_CONS, Next, Fail), + icode_label(Next)] + ++ make_op(?OP_UNSAFE_HD, [V1], [V], Ctxt) + ++ make_op(?OP_UNSAFE_TL, [V2], [V], Ctxt), + S), + patterns([cerl:cons_hd(P), cerl:cons_tl(P)], [V1, V2], + Fail, Env, S1). + +tuple_pattern(P, V, Fail, Env, S) -> + Es = cerl:tuple_es(P), + N = length(Es), + Vs = make_vars(N), + Next = new_label(), + S1 = add_code([make_type([V], ?TYPE_IS_N_TUPLE(N), Next, Fail), + icode_label(Next)], + S), + S2 = tuple_elements(Vs, V, S1), + patterns(Es, Vs, Fail, Env, S2). + +tuple_elements(Vs, V, S) -> + tuple_elements(Vs, V, #ctxt{}, 1, S). + +tuple_elements([V1 | Vs], V0, Ctxt, N, S) -> + Code = make_op(?OP_UNSAFE_ELEMENT(N), [V1], [V0], Ctxt), + tuple_elements(Vs, V0, Ctxt, N + 1, add_code(Code, S)); +tuple_elements([], _, _, _, S) -> + S. + +binary_pattern(P, V, Fail, Env, S) -> + L1 = new_label(), + Segs = cerl:binary_segments(P), + Arity = length(Segs), + Vars = make_vars(Arity), + MS = make_var(), + Primop1 = {hipe_bs_primop, {bs_start_match,0}}, + S1 = add_code([icode_guardop([MS], Primop1, [V], L1, Fail), + icode_label(L1)],S), + {Env1,S2} = bin_seg_patterns(Segs, Vars, MS, Fail, Env, S1, false), + L2 = new_label(), + Primop2 = {hipe_bs_primop, {bs_test_tail, 0}}, + {Env1, add_code([icode_guardop([], Primop2, [MS], L2, Fail), + icode_label(L2)], S2)}. + +bin_seg_patterns([Seg|Rest], [T|Ts], MS, Fail, Env, S, Align) -> + {{NewEnv, S1}, NewAlign} = bin_seg_pattern(Seg, T, MS, Fail, Env, S, Align), + bin_seg_patterns(Rest, Ts, MS, Fail, NewEnv, S1, NewAlign); + +bin_seg_patterns([], [], _MS, _Fail, Env, S, _Align) -> + {Env, S}. + +bin_seg_pattern(P, V, MS, Fail, Env, S, Align) -> + L = new_label(), + Size = cerl:bitstr_size(P), + Unit = cerl:bitstr_unit(P), + Type = cerl:concrete(cerl:bitstr_type(P)), + LiteralFlags = cerl:bitstr_flags(P), + T = cerl:bitstr_val(P), + Flags = translate_flags(LiteralFlags, Align), + case calculate_size(Unit, Size, false, Env, S) of + {all, NewUnit, NewAlign, S0} -> + Type = binary, + Name = {bs_get_binary_all_2, NewUnit, Flags}, + Primop = {hipe_bs_primop, Name}, + S1 = add_code([icode_guardop([V,MS], Primop, [MS], L, Fail), + icode_label(L)], S0), + {pattern(T, V, Fail, Env, S1), NewAlign}; + {NewUnit, Args, S0, NewAlign} -> + Name = + case Type of + integer -> + {bs_get_integer, NewUnit, Flags}; + float -> + {bs_get_float, NewUnit, Flags}; + binary -> + {bs_get_binary, NewUnit, Flags} + end, + Primop = {hipe_bs_primop, Name}, + S1 = add_code([icode_guardop([V,MS], Primop, [MS|Args], L, Fail), + icode_label(L)], S0), + {pattern(T, V, Fail, Env, S1), NewAlign} + end. + +%% --------------------------------------------------------------------- +%% Boolean expressions + +%% This generates code for a boolean expression (such as "primop +%% 'and'(X, Y)") in a normal expression context, when an actual `true' +%% or `false' value is to be computed. We set up a default fail-label +%% for generating a `badarg' error, unless we are in a guard. + +boolean_expr(E, [V], Ctxt=#ctxt{class = guard}, Env, S) -> + {Code, True, False} = make_bool_glue(V), + S1 = boolean(E, True, False, Ctxt, Env, S), + add_code(Code, S1); +boolean_expr(E, [V] = Ts, Ctxt, Env, S) -> + {Code, True, False} = make_bool_glue(V), + Fail = new_label(), + Cont = new_continuation_label(Ctxt), + Ctxt1 = Ctxt#ctxt{final = false, effect = false, fail = Fail}, + S1 = boolean(E, True, False, Ctxt1, Env, S), + S2 = maybe_return(Ts, Ctxt, add_code(Code, S1)), + S3 = add_continuation_jump(Cont, Ctxt, S2), + S4 = add_code([icode_label(Fail)], S3), + S5 = add_error(icode_const(badarg), Ctxt, S4), % can add dummy label + S6 = add_continuation_jump(Cont, Ctxt, S5), % avoid empty basic block + add_continuation_label(Cont, Ctxt, S6); +boolean_expr(_, [], _Ctxt, _Env, _S) -> + error_high_degree(), + throw(error); +boolean_expr(_, _, _Ctxt, _Env, _S) -> + error_low_degree(), + throw(error). + +%% This is for when we expect a boolean result in jumping code context, +%% but are not sure what the expression will produce, or we know that +%% the result is not a boolean and we just want error handling. + +expect_boolean_value(E, True, False, Ctxt, Env, S) -> + V = make_var(), + S1 = expr(E, [V], Ctxt#ctxt{final = false}, Env, S), + case Ctxt#ctxt.fail of + [] -> + %% No fail-label set - this means we are *sure* that the + %% result can only be 'true' or 'false'. + add_code([make_type([V], ?TYPE_ATOM(true), True, False)], + S1); + Fail -> + Next = new_label(), + add_code([make_type([V], ?TYPE_ATOM(true), True, Next), + icode_label(Next), + make_type([V], ?TYPE_ATOM(false), False, Fail)], + S1) + end. + +%% This generates code for a case-switch with exactly one 'true' branch +%% and one 'false' branch, and no other branches (not even a catch-all). +%% Note that E must be guaranteed to produce a boolean value for such a +%% switch to have been generated. + +bool_switch(E, TrueExpr, FalseExpr, F, Ctxt, Env, S) -> + Cont = new_continuation_label(Ctxt), + True = new_label(), + False = new_label(), + Ctxt1 = Ctxt#ctxt{final = false, effect = false}, + S1 = boolean(E, True, False, Ctxt1, Env, S), + S2 = add_code([icode_label(True)], S1), + S3 = F(TrueExpr, Ctxt, Env, S2), + S4 = add_continuation_jump(Cont, Ctxt, S3), + S5 = add_code([icode_label(False)], S4), + S6 = F(FalseExpr, Ctxt, Env, S5), + add_continuation_label(Cont, Ctxt, S6). + +%% This generates jumping code for booleans. If the fail-label is set, +%% it tells where to go in case a value turns out not to be a boolean. + +%% In strict boolean expressions, we set a flag to be checked if +%% necessary after both branches have been evaluated. An alternative +%% would be to duplicate the code for the second argument, for each +%% value ('true' or 'false') of the first argument. + +%% (Note that subexpressions are checked repeatedly to see if they are +%% safe - this is quadratic, but I don't expect booleans to be very +%% deeply nested.) + +%% Note that 'and', 'or' and 'xor' are strict (like all primops)! + +boolean(E0, True, False, Ctxt, Env, S) -> + E = cerl_lib:reduce_expr(E0), + case cerl:type(E) of + literal -> + case cerl:concrete(E) of + true -> + add_code([icode_goto(True)], S); + false -> + add_code([icode_goto(False)], S); + _ -> + expect_boolean_value(E, True, False, Ctxt, Env, S) + end; + values -> + case cerl:values_es(E) of + [E1] -> + boolean(E1, True, False, Ctxt, Env, S); + _ -> + error_msg("degree mismatch - expected boolean: ~P", + [E, 10]), + throw(error) + end; + primop -> + Name = cerl:atom_val(cerl:primop_name(E)), + As = cerl:primop_args(E), + Arity = length(As), + case {Name, Arity} of + {?PRIMOP_NOT, 1} -> + %% `not' simply switches true and false labels. + [A] = As, + boolean(A, False, True, Ctxt, Env, S); + {?PRIMOP_AND, 2} -> + strict_and(As, True, False, Ctxt, Env, S); + {?PRIMOP_OR, 2} -> + strict_or(As, True, False, Ctxt, Env, S); + {?PRIMOP_XOR, 2} -> + %% `xor' always needs to evaluate both arguments + strict_xor(As, True, False, Ctxt, Env, S); + _ -> + case is_comp_op(Name, Arity) of + true -> + comparison(Name, As, True, False, Ctxt, Env, + S); + false -> + case is_type_test(Name, Arity) of + true -> + type_test(Name, As, True, False, + Ctxt, Env, S); + false -> + expect_boolean_value(E, True, False, + Ctxt, Env, S) + end + end + end; + 'case' -> + %% Propagate boolean handling into clause bodies. + %% (Note that case switches assume fallthrough code in the + %% clause bodies, so we must add a dummy label as needed.) + F = fun (BF, CtxtF, EnvF, SF) -> + SF1 = boolean(BF, True, False, CtxtF, EnvF, SF), + add_new_continuation_label(CtxtF, SF1) + end, + S1 = expr_case_1(E, F, Ctxt, Env, S), + %% Add a final goto if necessary, to compensate for the + %% final continuation label of the case-expression. This + %% should be unreachable, so the value does not matter. + add_continuation_jump(False, Ctxt, S1); + seq -> + %% Propagate boolean handling into body. + F = fun (BF, CtxtF, EnvF, SF) -> + boolean(BF, True, False, CtxtF, EnvF, SF) + end, + expr_seq_1(E, F, Ctxt, Env, S); + 'let' -> + %% Propagate boolean handling into body. Note that we have + %% called 'cerl_lib:reduce_expr/1' above. + F = fun (BF, CtxtF, EnvF, SF) -> + boolean(BF, True, False, CtxtF, EnvF, SF) + end, + expr_let_1(E, F, Ctxt, Env, S); + 'try' -> + case Ctxt#ctxt.class of + guard -> + %% This *must* be a "protected" guard expression on + %% the form "try E of X -> X catch <...> -> 'false'" + %% (we could of course test if the handler body is + %% the atom 'false', etc.). + Ctxt1 = Ctxt#ctxt{fail = False}, + boolean(cerl:try_arg(E), True, False, Ctxt1, Env, S); + _ -> + %% Propagate boolean handling into the handler and body + %% (see propagation into case switches for comparison) + F = fun (BF, CtxtF, EnvF, SF) -> + boolean(BF, True, False, CtxtF, EnvF, SF) + end, + S1 = expr_try_1(E, F, Ctxt, Env, S), + add_continuation_jump(False, Ctxt, S1) + end; + _ -> + %% This handles everything else, including cases that are + %% known to not return a boolean. + expect_boolean_value(E, True, False, Ctxt, Env, S) + end. + +strict_and([A, B], True, False, Ctxt, Env, S) -> + V = make_var(), + {Glue, True1, False1} = make_bool_glue(V), + S1 = boolean(A, True1, False1, Ctxt, Env, S), + S2 = add_code(Glue, S1), + Test = new_label(), + S3 = boolean(B, Test, False, Ctxt, Env, S2), + add_code([icode_label(Test), + make_bool_test(V, True, False)], + S3). + +strict_or([A, B], True, False, Ctxt, Env, S) -> + V = make_var(), + {Glue, True1, False1} = make_bool_glue(V), + S1 = boolean(A, True1, False1, Ctxt, Env, S), + S2 = add_code(Glue, S1), + Test = new_label(), + S3 = boolean(B, True, Test, Ctxt, Env, S2), + add_code([icode_label(Test), + make_bool_test(V, True, False)], + S3). + +strict_xor([A, B], True, False, Ctxt, Env, S) -> + V = make_var(), + {Glue, True1, False1} = make_bool_glue(V), + S1 = boolean(A, True1, False1, Ctxt, Env, S), + S2 = add_code(Glue, S1), + Test1 = new_label(), + Test2 = new_label(), + S3 = boolean(B, Test1, Test2, Ctxt, Env, S2), + add_code([icode_label(Test1), + make_bool_test(V, False, True), + icode_label(Test2), + make_bool_test(V, True, False)], + S3). + +%% Primitive comparison operations are inline expanded as conditional +%% branches when part of a boolean expression, rather than made into +%% primop or guardop calls. Note that Without type information, we +%% cannot reduce equality tests like `Expr == true' to simply `Expr' +%% (and `Expr == false' to `not Expr'), because we are not sure that +%% Expr will yield a boolean - if it does not, the result of the +%% comparison should be `false'. + +comparison(Name, As, True, False, Ctxt, Env, S) -> + {Vs, S1} = expr_list(As, Ctxt, Env, S), + Test = comp_test(Name), + add_code([make_if(Test, Vs, True, False)], S1). + +comp_test(?PRIMOP_EQ) -> ?TEST_EQ; +comp_test(?PRIMOP_NE) -> ?TEST_NE; +comp_test(?PRIMOP_EXACT_EQ) -> ?TEST_EXACT_EQ; +comp_test(?PRIMOP_EXACT_NE) -> ?TEST_EXACT_NE; +comp_test(?PRIMOP_LT) -> ?TEST_LT; +comp_test(?PRIMOP_GT) -> ?TEST_GT; +comp_test(?PRIMOP_LE) -> ?TEST_LE; +comp_test(?PRIMOP_GE) -> ?TEST_GE. + +type_test(?PRIMOP_IS_RECORD, [T, A, N], True, False, Ctxt, Env, S) -> + is_record_test(T, A, N, True, False, Ctxt, Env, S); +type_test(Name, [A], True, False, Ctxt, Env, S) -> + V = make_var(), + S1 = expr(A, [V], Ctxt#ctxt{final = false, effect = false}, Env, S), + Test = type_test(Name), + add_code([make_type([V], Test, True, False)], S1). + +%% It turned out to be easiest to generate Icode directly for this. +is_record_test(T, A, N, True, False, Ctxt, Env, S) -> + case cerl:is_c_atom(A) andalso cerl:is_c_int(N) + andalso (cerl:concrete(N) > 0) of + true -> + V = make_var(), + Ctxt1 = Ctxt#ctxt{final = false, effect = false}, + S1 = expr(T, [V], Ctxt1, Env, S), + Atom = cerl:concrete(A), + Size = cerl:concrete(N), + add_code([make_type([V], ?TYPE_IS_RECORD(Atom, Size), True, False)], + S1); + false -> + error_primop_badargs(?PRIMOP_IS_RECORD, [T, A, N]), + throw(error) + end. + +type_test(?PRIMOP_IS_ATOM) -> ?TYPE_IS_ATOM; +type_test(?PRIMOP_IS_BIGNUM) -> ?TYPE_IS_BIGNUM; +type_test(?PRIMOP_IS_BINARY) -> ?TYPE_IS_BINARY; +type_test(?PRIMOP_IS_CONSTANT) -> ?TYPE_IS_CONSTANT; +type_test(?PRIMOP_IS_FIXNUM) -> ?TYPE_IS_FIXNUM; +type_test(?PRIMOP_IS_FLOAT) -> ?TYPE_IS_FLOAT; +type_test(?PRIMOP_IS_FUNCTION) -> ?TYPE_IS_FUNCTION; +type_test(?PRIMOP_IS_INTEGER) -> ?TYPE_IS_INTEGER; +type_test(?PRIMOP_IS_LIST) -> ?TYPE_IS_LIST; +type_test(?PRIMOP_IS_NUMBER) -> ?TYPE_IS_NUMBER; +type_test(?PRIMOP_IS_PID) -> ?TYPE_IS_PID; +type_test(?PRIMOP_IS_PORT) -> ?TYPE_IS_PORT; +type_test(?PRIMOP_IS_REFERENCE) -> ?TYPE_IS_REFERENCE; +type_test(?PRIMOP_IS_TUPLE) -> ?TYPE_IS_TUPLE. + +is_comp_op(?PRIMOP_EQ, 2) -> true; +is_comp_op(?PRIMOP_NE, 2) -> true; +is_comp_op(?PRIMOP_EXACT_EQ, 2) -> true; +is_comp_op(?PRIMOP_EXACT_NE, 2) -> true; +is_comp_op(?PRIMOP_LT, 2) -> true; +is_comp_op(?PRIMOP_GT, 2) -> true; +is_comp_op(?PRIMOP_LE, 2) -> true; +is_comp_op(?PRIMOP_GE, 2) -> true; +is_comp_op(Op, A) when is_atom(Op), is_integer(A) -> false. + +is_bool_op(?PRIMOP_AND, 2) -> true; +is_bool_op(?PRIMOP_OR, 2) -> true; +is_bool_op(?PRIMOP_XOR, 2) -> true; +is_bool_op(?PRIMOP_NOT, 1) -> true; +is_bool_op(Op, A) when is_atom(Op), is_integer(A) -> false. + +is_type_test(?PRIMOP_IS_ATOM, 1) -> true; +is_type_test(?PRIMOP_IS_BIGNUM, 1) -> true; +is_type_test(?PRIMOP_IS_BINARY, 1) -> true; +is_type_test(?PRIMOP_IS_CONSTANT, 1) -> true; +is_type_test(?PRIMOP_IS_FIXNUM, 1) -> true; +is_type_test(?PRIMOP_IS_FLOAT, 1) -> true; +is_type_test(?PRIMOP_IS_FUNCTION, 1) -> true; +is_type_test(?PRIMOP_IS_INTEGER, 1) -> true; +is_type_test(?PRIMOP_IS_LIST, 1) -> true; +is_type_test(?PRIMOP_IS_NUMBER, 1) -> true; +is_type_test(?PRIMOP_IS_PID, 1) -> true; +is_type_test(?PRIMOP_IS_PORT, 1) -> true; +is_type_test(?PRIMOP_IS_REFERENCE, 1) -> true; +is_type_test(?PRIMOP_IS_TUPLE, 1) -> true; +is_type_test(?PRIMOP_IS_RECORD, 3) -> true; +is_type_test(Op, A) when is_atom(Op), is_integer(A) -> false. + + +%% --------------------------------------------------------------------- +%% Utility functions + +bind_var(V, Name, Env) -> + env__bind(cerl:var_name(V), #cerl_to_icode__var{name = Name}, Env). + +bind_vars([V | Vs], [X | Xs], Env) -> + bind_vars(Vs, Xs, bind_var(V, X, Env)); +bind_vars([], [], Env) -> + Env. + +bind_fun(V, L, Vs, Env) -> + env__bind(cerl:var_name(V), #'fun'{label = L, vars = Vs}, Env). + +add_code(Code, S) -> + s__add_code(Code, S). + +%% This inserts code when necessary for assigning the targets in the +%% first list to those in the second. + +glue([V1 | Vs1], [V2 | Vs2], S) -> + if V1 =:= V2 -> + S; + true -> + glue(Vs1, Vs2, add_code([icode_move(V2, V1)], S)) + end; +glue([], [], S) -> + S; +glue([], _, S) -> + warning_low_degree(), + S; +glue(_, [], _) -> + error_high_degree(), + throw(error). + +make_moves([V1 | Vs1], [V2 | Vs2]) -> + [icode_move(V1, V2) | make_moves(Vs1, Vs2)]; +make_moves([], []) -> + []. + +%% If the context signals `final', we generate a return instruction, +%% otherwise nothing happens. + +maybe_return(Ts, Ctxt, S) -> + case Ctxt#ctxt.final of + false -> + S; + true -> + add_return(Ts, S) + end. + +add_return(Ts, S) -> + add_code([icode_return(Ts)], S). + +new_continuation_label(Ctxt) -> + case Ctxt#ctxt.final of + false -> + new_label(); + true -> + undefined + end. + +add_continuation_label(Label, Ctxt, S) -> + case Ctxt#ctxt.final of + false -> + add_code([icode_label(Label)], S); + true -> + S + end. + +add_continuation_jump(Label, Ctxt, S) -> + case Ctxt#ctxt.final of + false -> + add_code([icode_goto(Label)], S); + true -> + S + end. + +%% This is used to insert a new dummy label (if necessary) when +%% a block is ended suddenly; cf. add_fail. +add_new_continuation_label(Ctxt, S) -> + add_continuation_label(new_continuation_label(Ctxt), Ctxt, S). + +add_local_call({Name, _Arity} = V, Vs, Ts, Ctxt, S) -> + Module = s__get_module(S), + case Ctxt#ctxt.final of + false -> + add_code([icode_call_local(Ts, Module, Name, Vs)], S); + true -> + Self = s__get_function(S), + if V =:= Self -> + %% Self-recursive tail call: + {Label, Vs1} = s__get_local_entry(S), + add_code(make_moves(Vs1, Vs) ++ [icode_goto(Label)], + S); + true -> + add_code([icode_enter_local(Module, Name, Vs)], S) + end + end. + +%% Note that this has the same "problem" as the fail instruction (see +%% the 'add_fail' function), namely, that it unexpectedly ends a basic +%% block. The solution is the same - add a dummy label if necessary. + +add_letrec_call(Label, Vs1, Vs, Ctxt, S) -> + S1 = add_code(make_moves(Vs1, Vs) ++ [icode_goto(Label)], S), + add_new_continuation_label(Ctxt, S1). + +add_exit(V, Ctxt, S) -> + add_fail([V], exit, Ctxt, S). + +add_throw(V, Ctxt, S) -> + add_fail([V], throw, Ctxt, S). + +add_error(V, Ctxt, S) -> + add_fail([V], error, Ctxt, S). + +add_error(V, F, Ctxt, S) -> + add_fail([V, F], error, Ctxt, S). + +add_rethrow(E, V, Ctxt, S) -> + add_fail([E, V], rethrow, Ctxt, S). + +%% Failing is special, because it can "suddenly" end the basic block, +%% even though the context was expecting the code to fall through, for +%% instance when you have a call to 'exit(X)' that is not in a tail call +%% context. In those cases a dummy label must therefore be added after +%% the fail instruction, to start a new (but unreachable) basic block. + +add_fail(Vs, Class, Ctxt, S0) -> + S1 = add_code([icode_fail(Vs, Class)], S0), + add_new_continuation_label(Ctxt, S1). + +%% We must add continuation- and fail-labels if we are in a guard context. + +make_op(Name, Ts, As, Ctxt) -> + case Ctxt#ctxt.final of + false -> + case Ctxt#ctxt.class of + guard -> + Next = new_label(), + [icode_guardop(Ts, Name, As, Next, Ctxt#ctxt.fail), + icode_label(Next)]; + _ -> + [icode_call_primop(Ts, Name, As)] + end; + true -> + [icode_enter_primop(Name, As)] + end. + +make_call(M, F, Ts, As, Ctxt) -> + case Ctxt#ctxt.final of + false -> + case Ctxt#ctxt.class of + guard -> + Next = new_label(), + [icode_call_remote(Ts, M, F, As, Next, + Ctxt#ctxt.fail, true), + icode_label(Next)]; + _ -> + [icode_call_remote(Ts, M, F, As)] + end; + true -> + %% A final call can't be in a guard anyway + [icode_enter_remote(M, F, As)] + end. + +%% Recognize useless tests that always go to the same label. This often +%% happens as an artefact of the translation. + +make_if(_, _, Label, Label) -> + icode_goto(Label); +make_if(Test, As, True, False) -> + icode_if(Test, As, True, False). + +make_type(_, _, Label, Label) -> + icode_goto(Label); +make_type(Vs, Test, True, False) -> + icode_type(Vs, Test, True, False). + +%% Creating glue code with true/false target labels for assigning a +%% corresponding 'true'/'false' value to a specific variable. Used as +%% glue between boolean jumping code and boolean values. + +make_bool_glue(V) -> + make_bool_glue(V, true, false). + +make_bool_glue(V, T, F) -> + False = new_label(), + True = new_label(), + Next = new_label(), + Code = [icode_label(False), + icode_move(V, icode_const(F)), + icode_goto(Next), + icode_label(True), + icode_move(V, icode_const(T)), + icode_label(Next)], + {Code, True, False}. + +make_bool_test(V, True, False) -> + make_type([V], ?TYPE_ATOM(true), True, False). + +%% Checking if an expression is safe + +is_safe_expr(E) -> + cerl_lib:is_safe_expr(E, fun function_check/2). + +function_check(safe, {Name, Arity}) -> + is_safe_op(Name, Arity); +function_check(safe, {Module, Name, Arity}) -> + erl_bifs:is_safe(Module, Name, Arity); +function_check(pure, {Name, Arity}) -> + is_pure_op(Name, Arity); +function_check(pure, {Module, Name, Arity}) -> + erl_bifs:is_pure(Module, Name, Arity); +function_check(_, _) -> + false. + +%% There are very few really safe operations (sigh!). If we have type +%% information, several operations could be rewritten into specialized +%% safe versions, such as '+'/2 -> add_integer/2. + +is_safe_op(N, A) -> + is_comp_op(N, A) orelse is_type_test(N, A). + +is_pure_op(?PRIMOP_ELEMENT, 2) -> true; +is_pure_op(?PRIMOP_MAKE_FUN, 6) -> true; +is_pure_op(?PRIMOP_FUN_ELEMENT, 2) -> true; +is_pure_op(?PRIMOP_ADD, 2) -> true; +is_pure_op(?PRIMOP_SUB, 2) -> true; +is_pure_op(?PRIMOP_NEG, 1) -> true; +is_pure_op(?PRIMOP_MUL, 2) -> true; +is_pure_op(?PRIMOP_DIV, 2) -> true; +is_pure_op(?PRIMOP_INTDIV, 2) -> true; +is_pure_op(?PRIMOP_REM, 2) -> true; +is_pure_op(?PRIMOP_BAND, 2) -> true; +is_pure_op(?PRIMOP_BOR, 2) -> true; +is_pure_op(?PRIMOP_BXOR, 2) -> true; +is_pure_op(?PRIMOP_BNOT, 1) -> true; +is_pure_op(?PRIMOP_BSL, 2) -> true; +is_pure_op(?PRIMOP_BSR, 2) -> true; +is_pure_op(?PRIMOP_EXIT, 1) -> true; +is_pure_op(?PRIMOP_THROW, 1) -> true; +is_pure_op(?PRIMOP_ERROR, 1) -> true; +is_pure_op(?PRIMOP_ERROR, 2) -> true; +is_pure_op(?PRIMOP_RETHROW, 2) -> true; +is_pure_op(N, A) -> is_pure_op_aux(N, A). + +is_pure_op_aux(N, A) -> + is_bool_op(N, A) orelse is_comp_op(N, A) orelse is_type_test(N, A). + +translate_flags(Flags, Align) -> + translate_flags1(cerl:concrete(Flags), Align). + +translate_flags1([A|Rest], Align) -> + case A of + signed -> + 4 + translate_flags1(Rest, Align); + little -> + 2 + translate_flags1(Rest, Align); + native -> + case hipe_rtl_arch:endianess() of + little -> + 2 + translate_flags1(Rest, Align); + big -> + translate_flags1(Rest, Align) + end; + _ -> + translate_flags1(Rest, Align) + end; +translate_flags1([], Align) -> + case Align of + 0 -> + 1; + _ -> + 0 + end. + +get_const_info(Val, integer) -> + case {cerl:is_c_var(Val), cerl:is_c_int(Val)} of + {true, _} -> + var; + {_, true} -> + pass; + _ -> + fail + end; +get_const_info(Val, float) -> + case {cerl:is_c_var(Val), cerl:is_c_float(Val)} of + {true, _} -> + var; + {_, true} -> + pass; + _ -> + fail + end; +get_const_info(_Val, _Type) -> + []. + +calculate_size(Unit, Var, Align, Env, S) -> + case cerl:is_c_atom(Var) of + true -> + {cerl:atom_val(Var), cerl:concrete(Unit), Align, S}; + false -> + case cerl:is_c_int(Var) of + true -> + NewVal = cerl:concrete(Var) * cerl:concrete(Unit), + NewAlign = + case Align of + false -> + false + %% Currently, all uses of the function are + %% with "Aligned == false", and this case + %% is commented out to shut up Dialyzer. + %% _ -> + %% (NewVal+Align) band 7 + end, + {NewVal, [], S, NewAlign}; + false -> + NewSize = make_var(), + S1 = expr(Var, [NewSize], #ctxt{final=false}, Env, S), + NewAlign = + case cerl:concrete(Unit) band 7 of + 0 -> + Align; + _ -> + false + end, + {cerl:concrete(Unit), [NewSize], S1, NewAlign} + end + end. + + +%% --------------------------------------------------------------------- +%% Environment (abstract datatype) + +env__new() -> + rec_env:empty(). + +env__bind(Key, Val, Env) -> + rec_env:bind(Key, Val, Env). + +env__lookup(Key, Env) -> + rec_env:lookup(Key, Env). + +env__get(Key, Env) -> + rec_env:get(Key, Env). + +%% env__new_integer_keys(N, Env) -> +%% rec_env:new_keys(N, Env). + + +%% --------------------------------------------------------------------- +%% State (abstract datatype) + +-record(state, {module, function, local, labels=gb_trees:empty(), + code = [], pmatch=true, bitlevel_binaries=false}). + +s__new(Module) -> + #state{module = Module}. + +s__get_module(S) -> + S#state.module. + +s__set_function(Name, S) -> + S#state{function = Name}. + +s__get_function(S) -> + S#state.function. + +s__set_local_entry(Info, S) -> + S#state{local = Info}. + +s__get_local_entry(S) -> + S#state.local. + +%% Generated code is kept in reverse order, to make adding fast. + +s__set_code(Code, S) -> + S#state{code = lists:reverse(Code)}. + +s__get_code(S) -> + lists:reverse(S#state.code). + +s__add_code(Code, S) -> + S#state{code = lists:reverse(Code, S#state.code)}. + +s__get_label(Ref, S) -> + Labels = S#state.labels, + case gb_trees:lookup(Ref, Labels) of + none -> + Label = new_label(), + S1 = S#state{labels=gb_trees:enter(Ref, Label, Labels)}, + {Label, S1}; + {value, Label} -> + {Label,S} + end. + +s__set_pmatch(V, S) -> + S#state{pmatch = V}. + +s__get_pmatch(S) -> + S#state.pmatch. + +s__set_bitlevel_binaries(true, S) -> + S#state{bitlevel_binaries = true}; +s__set_bitlevel_binaries(_, S) -> + S#state{bitlevel_binaries = false}. + +s__get_bitlevel_binaries(S) -> + S#state.bitlevel_binaries. +%% --------------------------------------------------------------------- +%%% Match label State + +%-record(mstate,{labels=gb_trees:empty()}). + +%get_correct_label(Alias, MState=#mstate{labels=Labels}) -> +% case gb_trees:lookup(Alias, Labels) of +% none -> +% LabelName=new_label(), +% {LabelName, MState#mstate{labels=gb_trees:insert(Alias, LabelName, Labels)}}; +% {value, LabelName} -> +% {LabelName, MState} +% end. + + +%% --------------------------------------------------------------------- +%% General utilities + +reset_var_counter() -> + hipe_gensym:set_var(0). + +reset_label_counter() -> + hipe_gensym:set_label(0). + +new_var() -> + hipe_gensym:get_next_var(). + +new_label() -> + hipe_gensym:get_next_label(). + +max_var() -> + hipe_gensym:get_var(). + +max_label() -> + hipe_gensym:get_label(). + +make_var() -> + icode_var(new_var()). + +make_vars(N) when N > 0 -> + [make_var() | make_vars(N - 1)]; +make_vars(0) -> + []. + +make_reg() -> + icode_reg(new_var()). + + +%% --------------------------------------------------------------------- +%% ICode interface + +icode_icode(M, {F, A}, Vs, Closure, C, V, L) -> + MFA = {M, F, A}, + hipe_icode:mk_icode(MFA, Vs, Closure, false, C, V, L). + +icode_icode_name(Icode) -> + hipe_icode:icode_fun(Icode). + +icode_comment(S) -> hipe_icode:mk_comment(S). + +icode_var(V) -> hipe_icode:mk_var(V). + +icode_reg(V) -> hipe_icode:mk_reg(V). + +icode_label(L) -> hipe_icode:mk_label(L). + +icode_move(V, D) -> hipe_icode:mk_move(V, D). + +icode_const(X) -> hipe_icode:mk_const(X). + +icode_const_val(X) -> hipe_icode:const_value(X). + +icode_call_local(Ts, M, N, Vs) -> + hipe_icode:mk_call(Ts, M, N, Vs, local). + +icode_call_remote(Ts, M, N, Vs) -> + hipe_icode:mk_call(Ts, M, N, Vs, remote). + +icode_call_remote(Ts, M, N, Vs, Cont, Fail, Guard) -> + hipe_icode:mk_call(Ts, M, N, Vs, remote, Cont, Fail, Guard). + +icode_enter_local(M, N, Vs) -> + hipe_icode:mk_enter(M, N, Vs, local). + +icode_enter_remote(M, N, Vs) -> + hipe_icode:mk_enter(M, N, Vs, remote). + +icode_call_fun(Ts, Vs) -> + icode_call_primop(Ts, call_fun, Vs). + +icode_enter_fun(Vs) -> + icode_enter_primop(enter_fun, Vs). + +icode_begin_try(L,Cont) -> hipe_icode:mk_begin_try(L,Cont). + +icode_end_try() -> hipe_icode:mk_end_try(). + +icode_begin_handler(Ts) -> hipe_icode:mk_begin_handler(Ts). + +icode_goto(L) -> hipe_icode:mk_goto(L). + +icode_return(Ts) -> hipe_icode:mk_return(Ts). + +icode_fail(Vs, C) -> hipe_icode:mk_fail(Vs, C). + +icode_guardop(Ts, Name, As, Succ, Fail) -> + hipe_icode:mk_guardop(Ts, Name, As, Succ, Fail). + +icode_call_primop(Ts, Name, As) -> hipe_icode:mk_primop(Ts, Name, As). + +icode_call_primop(Ts, Name, As, Succ, Fail) -> + hipe_icode:mk_primop(Ts, Name, As, Succ, Fail). + +icode_enter_primop(Name, As) -> hipe_icode:mk_enter_primop(Name, As). + +icode_if(Test, As, True, False) -> + hipe_icode:mk_if(Test, As, True, False). + +icode_type(Test, As, True, False) -> + hipe_icode:mk_type(Test, As, True, False). + +icode_switch_val(Arg, Fail, Length, Cases) -> + hipe_icode:mk_switch_val(Arg, Fail, Length, Cases). + +icode_switch_tuple_arity(Arg, Fail, Length, Cases) -> + SortedCases = lists:keysort(1, Cases), %% immitate BEAM compiler - Kostis + hipe_icode:mk_switch_tuple_arity(Arg, Fail, Length, SortedCases). + + +%% --------------------------------------------------------------------- +%% Error reporting + +error_not_in_receive(Name) -> + error_msg("primitive operation `~w' missing receive-context.", + [Name]). + +low_degree() -> + "degree of expression less than expected.". + +warning_low_degree() -> + warning_msg(low_degree()). + +error_low_degree() -> + error_msg(low_degree()). + +error_high_degree() -> + error_msg("degree of expression greater than expected."). + +error_degree_mismatch(N, E) -> + error_msg("expression does not have expected degree (~w): ~P.", + [N, E, 10]). + +error_nonlocal_application(Op) -> + error_msg("application operator not a local function: ~P.", + [Op, 10]). + +error_primop_badargs(Op, As) -> + error_msg("bad arguments to `~w' operation: ~P.", + [Op, As, 15]). + +%% internal_error_msg(S) -> +%% internal_error_msg(S, []). + +%% internal_error_msg(S, Vs) -> +%% error_msg(lists:concat(["Internal error: ", S]), Vs). + +error_msg(S) -> + error_msg(S, []). + +error_msg(S, Vs) -> + error_logger:error_msg(lists:concat([?MODULE, ": ", S, "\n"]), Vs). + +warning_msg(S) -> + warning_msg(S, []). + +warning_msg(S, Vs) -> + info_msg(lists:concat(["warning: ", S]), Vs). + +%% info_msg(S) -> +%% info_msg(S, []). + +info_msg(S, Vs) -> + error_logger:info_msg(lists:concat([?MODULE, ": ", S, "\n"]), Vs). + + +%% -------------------------------------------------------------------------- +%% Binary stuff + +binary_match([Clause|Clauses], F, [V], Next, Fail, Ctxt, Env, S) -> + Guard = cerl:clause_guard(Clause), + Body = cerl:clause_body(Clause), + [Pat] = cerl:clause_pats(Clause), + {FL,S1} = s__get_label(translate_label_primop(Guard),S), + {Env1,S2} = binary_pattern(Pat,V,FL,Env,S1), + S3 = F(Body, Ctxt, Env1, S2), + S4 = add_continuation_jump(Next, Ctxt, S3), + S5 = add_code([icode_label(FL)], S4), + binary_match(Clauses, F, [V], Next, Fail, Ctxt, Env, S5); +binary_match([], _F, _, _Next, Fail, _Ctxt, _Env, S) -> + add_code([icode_goto(Fail)], S). + +translate_label_primop(LabelPrimop) -> + ?PRIMOP_SET_LABEL = cerl:atom_val(cerl:primop_name(LabelPrimop)), + [Ref] = cerl:primop_args(LabelPrimop), + Ref. + + diff --git a/lib/hipe/cerl/cerl_typean.erl b/lib/hipe/cerl/cerl_typean.erl new file mode 100644 index 0000000000..ccd8903658 --- /dev/null +++ b/lib/hipe/cerl/cerl_typean.erl @@ -0,0 +1,1003 @@ +%% -*- erlang-indent-level: 4 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2003-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%% Type analysis of Core Erlang programs. +%% +%% Copyright (C) 2001-2002 Richard Carlsson +%% +%% Author contact: richardc@it.uu.se +%% +%% @doc Type analysis of Core Erlang programs. + +%% TODO: filters must handle conjunctions for better precision! +%% TODO: should get filters from patterns as well as guards. +%% TODO: unused functions are being included in the analysis. + +-module(cerl_typean). + +-export([core_transform/2, analyze/1, pp_hook/0]). +%%-export([analyze/2, analyze/5, annotate/1, annotate/2, annotate/5]). + +-import(erl_types, [t_any/0, t_atom/0, t_atom_vals/1, t_binary/0, + t_cons/2, t_cons_hd/1, t_cons_tl/1, t_float/0, + t_fun/0, t_fun/2, t_from_range/2, t_from_term/1, + t_inf/2, t_integer/0, + t_is_any/1, t_is_atom/1, t_is_cons/1, t_is_list/1, + t_is_maybe_improper_list/1, t_is_none/1, t_is_tuple/1, + t_limit/2, t_list_elements/1, t_maybe_improper_list/0, + t_none/0, t_number/0, t_pid/0, t_port/0, t_product/1, + t_reference/0, t_sup/2, t_to_tlist/1, t_tuple/0, t_tuple/1, + t_tuple_args/1, t_tuple_size/1, t_tuple_subtypes/1]). + +-import(cerl, [ann_c_fun/3, ann_c_var/2, alias_pat/1, alias_var/1, + apply_args/1, apply_op/1, atom_val/1, bitstr_size/1, + bitstr_val/1, bitstr_type/1, bitstr_flags/1, binary_segments/1, + c_letrec/2, c_nil/0, + c_values/1, call_args/1, call_module/1, call_name/1, + case_arg/1, case_clauses/1, catch_body/1, clause_body/1, + clause_guard/1, clause_pats/1, concrete/1, cons_hd/1, + cons_tl/1, fun_body/1, fun_vars/1, get_ann/1, int_val/1, + is_c_atom/1, is_c_int/1, let_arg/1, let_body/1, let_vars/1, + letrec_body/1, letrec_defs/1, module_defs/1, + module_defs/1, module_exports/1, pat_vars/1, + primop_args/1, primop_name/1, receive_action/1, + receive_clauses/1, receive_timeout/1, seq_arg/1, + seq_body/1, set_ann/2, try_arg/1, try_body/1, + try_evars/1, try_handler/1, try_vars/1, tuple_arity/1, + tuple_es/1, type/1, values_es/1, var_name/1]). + +-import(cerl_trees, [get_label/1]). + +-ifdef(DEBUG). +-define(ANNOTATE(X), case erl_types:t_to_string(X) of Q when length(Q) < 255 -> list_to_atom(Q); Q -> Q end). +-else. +-define(ANNOTATE(X), X). +-endif. + +%% Limit for type representation depth. +-define(DEF_LIMIT, 3). + + +%% @spec core_transform(Module::cerl_records(), Options::[term()]) -> +%% cerl_records() +%% +%% @doc Annotates a module represented by records with type +%% information. See annotate/1 for details. +%% +%%

Use the compiler option {core_transform, cerl_typean} +%% to insert this function as a compilation pass.

+%% +%% @see module/2 + +-spec core_transform(cerl:cerl(), [term()]) -> cerl:cerl(). + +core_transform(Code, _Opts) -> + {Code1, _} = cerl_trees:label(cerl:from_records(Code)), + %% io:fwrite("Running type analysis..."), + %% {T1,_} = statistics(runtime), + {Code2, _, _} = annotate(Code1), + %% {T2,_} = statistics(runtime), + %% io:fwrite("(~w ms).\n", [T2 - T1]), + cerl:to_records(Code2). + + +%% ===================================================================== +%% annotate(Tree) -> {Tree1, Type, Vars} +%% +%% Tree = cerl:cerl() +%% +%% Analyzes `Tree' (see `analyze') and appends terms `{type, Type}' +%% to the annotation list of each fun-expression node and +%% apply-expression node of `Tree', respectively, where `Labels' is +%% an ordered-set list of labels of fun-expressions in `Tree', +%% possibly also containing the atom `external', corresponding to +%% the dependency information derived by the analysis. Any previous +%% such annotations are removed from `Tree'. `Tree1' is the +%% modified tree; for details on `OutList', `Outputs' , +%% `Dependencies' and `Escapes', see `analyze'. +%% +%% Note: `Tree' must be annotated with labels in order to use this +%% function; see `analyze' for details. + +annotate(Tree) -> + annotate(Tree, ?DEF_LIMIT). + +annotate(Tree, Limit) -> + {_, _, Esc, Dep, Par} = cerl_closurean:analyze(Tree), + annotate(Tree, Limit, Esc, Dep, Par). + +annotate(Tree, Limit, Esc, Dep, Par) -> + {Type, Out, Vars} = analyze(Tree, Limit, Esc, Dep, Par), + DelAnn = fun (T) -> set_ann(T, delete_ann(type, get_ann(T))) end, + SetType = fun (T, Dict) -> + case dict:find(get_label(T), Dict) of + {ok, X} -> + case t_is_any(X) of + true -> + DelAnn(T); + false -> + set_ann(T, append_ann(type, + ?ANNOTATE(X), + get_ann(T))) + end; + error -> + DelAnn(T) + end + end, + F = fun (T) -> + case type(T) of + var -> + SetType(T, Vars); + apply -> + SetType(T, Out); + call -> + SetType(T, Out); + primop -> + SetType(T, Out); + 'fun' -> + SetType(T, Out); + _ -> + DelAnn(T) + end + end, + {cerl_trees:map(F, Tree), Type, Vars}. + +append_ann(Tag, Val, [X | Xs]) -> + if tuple_size(X) >= 1, element(1, X) =:= Tag -> + append_ann(Tag, Val, Xs); + true -> + [X | append_ann(Tag, Val, Xs)] + end; +append_ann(Tag, Val, []) -> + [{Tag, Val}]. + +delete_ann(Tag, [X | Xs]) -> + if tuple_size(X) >= 1, element(1, X) =:= Tag -> + delete_ann(Tag, Xs); + true -> + [X | delete_ann(Tag, Xs)] + end; +delete_ann(_, []) -> + []. + + +%% ===================================================================== +%% analyze(Tree) -> {OutList, Outputs, Dependencies} +%% +%% Tree = cerl:cerl() +%% OutList = [LabelSet] | none +%% Outputs = dict(integer(), OutList) +%% Dependencies = dict(integer(), LabelSet) +%% LabelSet = ordset(Label) +%% Label = integer() | external +%% +%% Analyzes a module or an expression represented by `Tree'. +%% +%% The returned `OutList' is a list of sets of labels of +%% fun-expressions which correspond to the possible closures in the +%% value list produced by `Tree' (viewed as an expression; the +%% "value" of a module contains its exported functions). The atom +%% `none' denotes missing or conflicting information. +%% +%% The atom `external' in any label set denotes any possible +%% function outside `Tree', including those in `Escapes'. +%% +%% `Outputs' is a mapping from the labels of fun-expressions in +%% `Tree' to corresponding lists of sets of labels of +%% fun-expressions (or the atom `none'), representing the possible +%% closures in the value lists returned by the respective +%% functions. +%% +%% `Dependencies' is a similar mapping from the labels of +%% fun-expressions and apply-expressions in `Tree' to sets of +%% labels of corresponding fun-expressions which may contain call +%% sites of the functions or be called from the call sites, +%% respectively. Any such label not defined in `Dependencies' +%% represents an unreachable function or a dead or faulty +%% application. +%% +%% `Escapes' is the set of labels of fun-expressions in `Tree' such +%% that corresponding closures may be accessed from outside `Tree'. +%% +%% Note: `Tree' must be annotated with labels (as done by the +%% function `cerl_trees:label/1') in order to use this function. +%% The label annotation `{label, L}' (where L should be an integer) +%% must be the first element of the annotation list of each node in +%% the tree. Instances of variables bound in `Tree' which denote +%% the same variable must have the same label; apart from this, +%% labels should be unique. Constant literals do not need to be +%% labeled. + +-record(state, {k, vars, out, dep, work, funs, envs}). + +%% Note: In order to keep our domain simple, we assume that all remote +%% calls and primops return a single value, if any. + +%% We wrap the given syntax tree T in a fun-expression labeled `top', +%% which is initially in the set of escaped labels. `top' will be +%% visited at least once. +%% +%% We create a separate function labeled `external', defined as: +%% "External = fun () -> Any", which will represent any and all +%% functions outside T, and whose return value has unknown type. + +-type label() :: integer() | 'external' | 'top'. +-type ordset(X) :: [X]. % XXX: TAKE ME OUT +-type labelset() :: ordset(label()). +-type outlist() :: [labelset()] | 'none'. + +-spec analyze(cerl:cerl()) -> {outlist(), dict(), dict()}. + +analyze(Tree) -> + analyze(Tree, ?DEF_LIMIT). + +analyze(Tree, Limit) -> + {_, _, Esc, Dep, Par} = cerl_closurean:analyze(Tree), + analyze(Tree, Limit, Esc, Dep, Par). + +analyze(Tree, Limit, Esc0, Dep0, Par) -> + %% Note that we use different name spaces for variable labels and + %% function/call site labels. We assume that the labeling of Tree + %% only uses integers, not atoms. + LabelExtL = [{label, external}], + External = ann_c_var(LabelExtL, {external, 1}), + ExtFun = ann_c_fun(LabelExtL, [], ann_c_var([{label, any}], 'Any')), +%%% io:fwrite("external fun:\n~s.\n", +%%% [cerl_prettypr:format(ExtFun, [noann, {paper, 80}])]), + LabelTopL = [{label, top}], + Top = ann_c_var(LabelTopL, {top, 0}), + TopFun = ann_c_fun(LabelTopL, [], Tree), + + %% The "start fun" just makes the initialisation easier. It is not + %% itself in the call graph. + StartFun = ann_c_fun([{label, start}], [], + c_letrec([{External, ExtFun}, {Top, TopFun}], + c_nil())), +%%% io:fwrite("start fun:\n~s.\n", +%%% [cerl_prettypr:format(StartFun, [{paper, 80}])]), + + %% Gather a database of all fun-expressions in Tree and initialise + %% their outputs and parameter variables. All escaping functions can + %% receive any values as inputs. Also add an extra dependency edge + %% from each fun-expression label to its parent fun-expression. +%%% io:fwrite("Escape: ~p.\n",[Esc0]), + Esc = sets:from_list(Esc0), + Any = t_any(), + None = t_none(), + Funs0 = dict:new(), + Vars0 = dict:store(any, Any, dict:new()), + Out0 = dict:store(top, None, + dict:store(external, None, dict:new())), + Envs0 = dict:store(top, dict:new(), + dict:store(external, dict:new(), dict:new())), + F = fun (T, S = {Fs, Vs, Os, Es}) -> + case type(T) of + 'fun' -> + L = get_label(T), + As = fun_vars(T), + X = case sets:is_element(L, Esc) of + true -> Any; + false -> None + end, + {dict:store(L, T, Fs), + bind_vars_single(As, X, Vs), + dict:store(L, None, Os), + dict:store(L, dict:new(), Es)}; + _ -> + S + end + end, + {Funs, Vars, Out, Envs} = cerl_trees:fold(F, {Funs0, Vars0, Out0, + Envs0}, StartFun), + + %% Add dependencies from funs to their parent funs. + Dep = lists:foldl(fun ({L, L1}, D) -> add_dep(L, L1, D) end, + Dep0, dict:to_list(Par)), + + %% Enter the fixpoint iteration at the StartFun. + St = loop(TopFun, top, #state{vars = Vars, + out = Out, + dep = Dep, + work = init_work(), + funs = Funs, + envs = Envs, + k = Limit}), + {dict:fetch(top, St#state.out), + tidy_dict([top, external], St#state.out), + tidy_dict([any], St#state.vars)}. + +tidy_dict([X | Xs], D) -> + tidy_dict(Xs, dict:erase(X, D)); +tidy_dict([], D) -> + D. + +loop(T, L, St0) -> +%%% io:fwrite("analyzing: ~w.\n",[L]), +%%% io:fwrite("work: ~w.\n", [Queue0]), + Env = dict:fetch(L, St0#state.envs), + X0 = dict:fetch(L, St0#state.out), + {X1, St1} = visit(fun_body(T), Env, St0), + X = limit(X1, St1#state.k), + {W, M} = case equal(X0, X) of + true -> + {St1#state.work, St1#state.out}; + false -> +%%% io:fwrite("out (~w) changed: ~s <- ~s.\n", +%%% [L, erl_types:t_to_string(X), +%%% erl_types:t_to_string(X0)]), + M1 = dict:store(L, X, St1#state.out), + case dict:find(L, St1#state.dep) of + {ok, S} -> +%%% io:fwrite("adding work: ~w.\n", [S]), + {add_work(S, St1#state.work), M1}; + error -> + {St1#state.work, M1} + end + end, + St2 = St1#state{out = M}, + case take_work(W) of + {ok, L1, W1} -> + T1 = dict:fetch(L1, St2#state.funs), + loop(T1, L1, St2#state{work = W1}); + none -> + St2 + end. + +visit(T, Env, St) -> + case type(T) of + literal -> + {t_from_term(concrete(T)), St}; + var -> + %% If a variable is not already in the store at this point, + %% we initialize it to 'none()'. + L = get_label(T), + Vars = St#state.vars, + case dict:find(L, Vars) of + {ok, X} -> + case dict:find(var_name(T), Env) of + {ok, X1} -> +%%% io:fwrite("filtered variable reference: ~w:~s.\n", +%%% [var_name(T), erl_types:t_to_string(X1)]), + {meet(X, X1), St}; + error -> + {X, St} + end; + error -> + X = t_none(), + Vars1 = dict:store(L, X, Vars), + St1 = St#state{vars = Vars1}, + {X, St1} + end; + 'fun' -> + %% Must revisit the fun also, because its environment might + %% have changed. (We don't keep track of such dependencies.) + L = get_label(T), + Xs = [dict:fetch(get_label(V), St#state.vars) + || V <- fun_vars(T)], + X = dict:fetch(L, St#state.out), + St1 = St#state{work = add_work([L], St#state.work), + envs = dict:store(L, Env, St#state.envs)}, + {t_fun(Xs, X), St1}; + values -> + {Xs, St1} = visit_list(values_es(T), Env, St), + {t_product(Xs), St1}; + cons -> + {[X1, X2], St1} = visit_list([cons_hd(T), cons_tl(T)], Env, St), + {t_cons(X1, X2), St1}; + tuple -> + {Xs, St1} = visit_list(tuple_es(T), Env, St), + {t_tuple(Xs), St1}; + 'let' -> + {X, St1} = visit(let_arg(T), Env, St), + LetVars = let_vars(T), + St1Vars = St1#state.vars, + Vars = case t_is_any(X) orelse t_is_none(X) of + true -> + bind_vars_single(LetVars, X, St1Vars); + false -> + bind_vars(LetVars, t_to_tlist(X), St1Vars) + end, + visit(let_body(T), Env, St1#state{vars = Vars}); + seq -> + {_, St1} = visit(seq_arg(T), Env, St), + visit(seq_body(T), Env, St1); + apply -> + {_F, St1} = visit(apply_op(T), Env, St), + {As, St2} = visit_list(apply_args(T), Env, St1), + L = get_label(T), + Ls = get_deps(L, St#state.dep), + Out = St2#state.out, + X = join_list([dict:fetch(L1, Out) || L1 <- Ls]), + Out1 = dict:store(L, X, Out), + {X, call_site(Ls, As, St2#state{out = Out1})}; + call -> + M = call_module(T), + F = call_name(T), + As = call_args(T), + {[X1, X2], St1} = visit_list([M, F], Env, St), + {Xs, St2} = visit_list(As, Env, St1), +%%% io:fwrite("call: ~w:~w(~w).\n",[X1,X2,Xs]), + X = case {t_atom_vals(X1), t_atom_vals(X2)} of + {[M1], [F1]} -> + A = length(As), +%%% io:fwrite("known call: ~w:~w/~w.\n", +%%% [M1, F1, A]), + call_type(M1, F1, A, Xs); + _ -> + t_any() + end, + L = get_label(T), + {X, St2#state{out = dict:store(L, X, St2#state.out)}}; + primop -> + As = primop_args(T), + {Xs, St1} = visit_list(As, Env, St), + F = atom_val(primop_name(T)), + A = length(As), + L = get_label(T), + X = primop_type(F, A, Xs), + {X, St1#state{out = dict:store(L, X, St1#state.out)}}; + 'case' -> + {X, St1} = visit(case_arg(T), Env, St), + Xs = case t_is_any(X) orelse t_is_none(X) of + true -> + [X || _ <- cerl:case_clauses(T)]; + false -> + t_to_tlist(X) + end, + join_visit_clauses(Xs, case_clauses(T), Env, St1); + 'receive' -> + Any = t_any(), + {X1, St1} = join_visit_clauses([Any], receive_clauses(T), + Env, St), + {X2, St2} = visit(receive_timeout(T), Env, St1), + case t_is_atom(X2) andalso (t_atom_vals(X2) =:= [infinity]) of + true -> + {X1, St2}; + false -> + {X3, St3} = visit(receive_action(T), Env, St2), + {join(X1, X3), St3} + end; + 'try' -> + {X, St1} = visit(try_arg(T), Env, St), + Any = t_any(), + Atom = t_atom(), + TryVars = try_vars(T), + St1Vars = St1#state.vars, + Vars = case t_is_any(X) orelse t_is_none(X) of + true -> + bind_vars_single(TryVars, X, St1Vars); + false -> + bind_vars(TryVars, t_to_tlist(X), St1Vars) + end, + {X1, St2} = visit(try_body(T), Env, St1#state{vars = Vars}), + EVars = bind_vars(try_evars(T), [Atom, Any, Any], St2#state.vars), + {X2, St3} = visit(try_handler(T), Env, St2#state{vars = EVars}), + {join(X1, X2), St3}; + 'catch' -> + {_, St1} = visit(catch_body(T), Env, St), + {t_any(), St1}; + binary -> + {_, St1} = visit_list(binary_segments(T), Env, St), + {t_binary(), St1}; + bitstr -> + %% The other fields are constant literals. + {_, St1} = visit(bitstr_val(T), Env, St), + {_, St2} = visit(bitstr_size(T), Env, St1), + {t_none(), St2}; + letrec -> + %% All the bound funs should be revisited, because the + %% environment might have changed. + Vars = bind_defs(letrec_defs(T), St#state.vars, + St#state.out), + Ls = [get_label(F) || {_, F} <- letrec_defs(T)], + St1 = St#state{work = add_work(Ls, St#state.work), + vars = Vars}, + visit(letrec_body(T), Env, St1); + module -> + %% We handle a module as a sequence of function variables in + %% the body of a `letrec'. + {_, St1} = visit(c_letrec(module_defs(T), + c_values(module_exports(T))), + Env, St), + {t_none(), St1} + end. + +visit_clause(T, Xs, Env, St) -> + Env1 = Env, + Vars = bind_pats(clause_pats(T), Xs, St#state.vars), + G = clause_guard(T), + {_, St1} = visit(G, Env1, St#state{vars = Vars}), + Env2 = guard_filters(G, Env1), + visit(clause_body(T), Env2, St1). + +%% We assume correct value-list typing. + +visit_list([T | Ts], Env, St) -> + {X, St1} = visit(T, Env, St), + {Xs, St2} = visit_list(Ts, Env, St1), + {[X | Xs], St2}; +visit_list([], _Env, St) -> + {[], St}. + +join_visit_clauses(Xs, [T | Ts], Env, St) -> + {X1, St1} = visit_clause(T, Xs, Env, St), + {X2, St2} = join_visit_clauses(Xs, Ts, Env, St1), + {join(X1, X2), St2}; +join_visit_clauses(_, [], _Env, St) -> + {t_none(), St}. + +bind_defs([{V, F} | Ds], Vars, Out) -> + Xs = [dict:fetch(get_label(V1), Vars) || V1 <- fun_vars(F)], + X = dict:fetch(get_label(F), Out), + bind_defs(Ds, dict:store(get_label(V), t_fun(Xs, X), Vars), Out); +bind_defs([], Vars, _Out) -> + Vars. + +bind_pats(Ps, Xs, Vars) -> + if length(Xs) =:= length(Ps) -> + bind_pats_list(Ps, Xs, Vars); + true -> + bind_pats_single(Ps, t_none(), Vars) + end. + +bind_pats_list([P | Ps], [X | Xs], Vars) -> + Vars1 = bind_pat_vars(P, X, Vars), + bind_pats_list(Ps, Xs, Vars1); +bind_pats_list([], [], Vars) -> + Vars. + +bind_pats_single([P | Ps], X, Vars) -> + bind_pats_single(Ps, X, bind_pat_vars(P, X, Vars)); +bind_pats_single([], _X, Vars) -> + Vars. + +bind_pat_vars(P, X, Vars) -> + case type(P) of + var -> + dict:store(get_label(P), X, Vars); + literal -> + Vars; + cons -> + case t_is_cons(X) of + true -> + %% If X is "nonempty proper list of X1", then the + %% head has type X1 and the tail has type "proper + %% list of X1". (If X is just "cons cell of X1", + %% then both head and tail have type X1.) + Vars1 = bind_pat_vars(cons_hd(P), t_cons_hd(X), + Vars), + bind_pat_vars(cons_tl(P), t_cons_tl(X), Vars1); + false -> + case t_is_list(X) of + true -> + %% If X is "proper list of X1", then the + %% head has type X1 and the tail has type + %% "proper list of X1", i.e., type X. + Vars1 = bind_pat_vars(cons_hd(P), + t_list_elements(X), + Vars), + bind_pat_vars(cons_tl(P), X, Vars1); + false -> + case t_is_maybe_improper_list(X) of + true -> + %% If X is "cons cell of X1", both + %% the head and tail have type X1. + X1 = t_list_elements(X), + Vars1 = bind_pat_vars(cons_hd(P), + X1, Vars), + bind_pat_vars(cons_tl(P), X1, + Vars1); + false -> + bind_vars_single(pat_vars(P), + top_or_bottom(X), + Vars) + end + end + end; + tuple -> + case t_is_tuple(X) of + true -> + case t_tuple_subtypes(X) of + unknown -> + bind_vars_single(pat_vars(P), top_or_bottom(X), + Vars); + [Tuple] -> + case t_tuple_size(Tuple) =:= tuple_arity(P) of + true -> + bind_pats_list(tuple_es(P), + t_tuple_args(Tuple), Vars); + + false -> + bind_vars_single(pat_vars(P), + top_or_bottom(X), Vars) + end; + List when is_list(List) -> + bind_vars_single(pat_vars(P), top_or_bottom(X), + Vars) + end; + false -> + bind_vars_single(pat_vars(P), top_or_bottom(X), Vars) + end; + binary -> + bind_pats_single(binary_segments(P), t_none(), Vars); + bitstr -> + %% Only the Value field is a new binding. Size is already + %% bound, and the other fields are constant literals. + %% We could create a filter for Size being an integer(). + Size = bitstr_size(P), + ValType = + case concrete(bitstr_type(P)) of + float -> t_float(); + binary -> t_binary(); + integer -> + case is_c_int(Size) of + false -> t_integer(); + true -> + SizeVal = int_val(Size), + Flags = concrete(bitstr_flags(P)), + case lists:member(signed, Flags) of + true -> + t_from_range(-(1 bsl (SizeVal - 1)), + 1 bsl (SizeVal - 1) - 1); + false -> + t_from_range(0,1 bsl SizeVal - 1) + end + end + end, + bind_pat_vars(bitstr_val(P), ValType, Vars); + alias -> + P1 = alias_pat(P), + Vars1 = bind_pat_vars(P1, X, Vars), + dict:store(get_label(alias_var(P)), pat_type(P1, Vars1), + Vars1) + end. + +pat_type(P, Vars) -> + case type(P) of + var -> + dict:fetch(get_label(P), Vars); + literal -> + t_from_term(concrete(P)); + cons -> + t_cons(pat_type(cons_hd(P), Vars), + pat_type(cons_tl(P), Vars)); + tuple -> + t_tuple([pat_type(E, Vars) || E <- tuple_es(P)]); + binary -> + t_binary(); + alias -> + pat_type(alias_pat(P), Vars) + end. + +bind_vars(Vs, Xs, Vars) -> + if length(Vs) =:= length(Xs) -> + bind_vars_list(Vs, Xs, Vars); + true -> + bind_vars_single(Vs, t_none(), Vars) + end. + +bind_vars_list([V | Vs], [X | Xs], Vars) -> + bind_vars_list(Vs, Xs, dict:store(get_label(V), X, Vars)); +bind_vars_list([], [], Vars) -> + Vars. + +bind_vars_single([V | Vs], X, Vars) -> + bind_vars_single(Vs, X, dict:store(get_label(V), X, Vars)); +bind_vars_single([], _X, Vars) -> + Vars. + +add_dep(Source, Target, Deps) -> + case dict:find(Source, Deps) of + {ok, X} -> + case set__is_member(Target, X) of + true -> + Deps; + false -> +%%% io:fwrite("new dep: ~w <- ~w.\n", [Target, Source]), + dict:store(Source, set__add(Target, X), Deps) + end; + error -> +%%% io:fwrite("new dep: ~w <- ~w.\n", [Target, Source]), + dict:store(Source, set__singleton(Target), Deps) + end. + +%% This handles a call site, updating parameter variables with respect +%% to the actual parameters. + +call_site(Ls, Xs, St) -> +%% io:fwrite("call site: ~w ~s.\n", +%% [Ls, erl_types:t_to_string(erl_types:t_product(Xs))]), + {W, V} = call_site(Ls, Xs, St#state.work, St#state.vars, + St#state.funs, St#state.k), + St#state{work = W, vars = V}. + +call_site([L | Ls], Xs, W, V, Fs, Limit) -> + Vs = fun_vars(dict:fetch(L, Fs)), + case bind_args(Vs, Xs, V, Limit) of + {V1, true} -> + call_site(Ls, Xs, add_work([L], W), V1, Fs, Limit); + {V1, false} -> + call_site(Ls, Xs, W, V1, Fs, Limit) + end; +call_site([], _, W, V, _, _) -> + {W, V}. + +%% If the arity does not match the call, nothing is done here. + +bind_args(Vs, Xs, Vars, Limit) -> + if length(Vs) =:= length(Xs) -> + bind_args(Vs, Xs, Vars, Limit, false); + true -> + {Vars, false} + end. + +bind_args([V | Vs], [X | Xs], Vars, Limit, Ch) -> + L = get_label(V), + {Vars1, Ch1} = bind_arg(L, X, Vars, Limit, Ch), + bind_args(Vs, Xs, Vars1, Limit, Ch1); +bind_args([], [], Vars, _Limit, Ch) -> + {Vars, Ch}. + +%% bind_arg(L, X, Vars, Limit) -> +%% bind_arg(L, X, Vars, Limit, false). + +bind_arg(L, X, Vars, Limit, Ch) -> + X0 = dict:fetch(L, Vars), + X1 = limit(join(X, X0), Limit), + case equal(X0, X1) of + true -> + {Vars, Ch}; + false -> +%%% io:fwrite("arg (~w) changed: ~s <- ~s + ~s.\n", +%%% [L, erl_types:t_to_string(X1), +%%% erl_types:t_to_string(X0), +%%% erl_types:t_to_string(X)]), + {dict:store(L, X1, Vars), true} + end. + +%% Domain: type(), defined in module `erl_types'. + +meet(X, Y) -> t_inf(X, Y). + +join(X, Y) -> t_sup(X, Y). + +join_list([Xs | Xss]) -> + join(Xs, join_list(Xss)); +join_list([]) -> + t_none(). + +equal(X, Y) -> X =:= Y. + +limit(X, K) -> t_limit(X, K). + +top_or_bottom(T) -> + case t_is_none(T) of + true -> + T; + false -> + t_any() + end. + +strict(Xs, T) -> + case erl_types:any_none(Xs) of + true -> + t_none(); + false -> + T + end. + +%% Set abstraction for label sets. + +%% set__new() -> []. + +set__singleton(X) -> [X]. + +%% set__to_list(S) -> S. + +%% set__from_list(S) -> ordsets:from_list(S). + +%% set__union(X, Y) -> ordsets:union(X, Y). + +set__add(X, S) -> ordsets:add_element(X, S). + +set__is_member(X, S) -> ordsets:is_element(X, S). + +%% set__subtract(X, Y) -> ordsets:subtract(X, Y). + +%% set__equal(X, Y) -> X =:= Y. + +%% A simple but efficient functional queue. + +queue__new() -> {[], []}. + +queue__put(X, {In, Out}) -> {[X | In], Out}. + +queue__get({In, [X | Out]}) -> {ok, X, {In, Out}}; +queue__get({[], _}) -> empty; +queue__get({In, _}) -> + [X | In1] = lists:reverse(In), + {ok, X, {[], In1}}. + +%% The work list - a queue without repeated elements. + +init_work() -> + {queue__put(external, queue__new()), sets:new()}. + +add_work(Ls, {Q, Set}) -> + add_work(Ls, Q, Set). + +%% Note that the elements are enqueued in order. + +add_work([L | Ls], Q, Set) -> + case sets:is_element(L, Set) of + true -> + add_work(Ls, Q, Set); + false -> + add_work(Ls, queue__put(L, Q), sets:add_element(L, Set)) + end; +add_work([], Q, Set) -> + {Q, Set}. + +take_work({Queue0, Set0}) -> + case queue__get(Queue0) of + {ok, L, Queue1} -> + Set1 = sets:del_element(L, Set0), + {ok, L, {Queue1, Set1}}; + empty -> + none + end. + +get_deps(L, Dep) -> + case dict:find(L, Dep) of + {ok, Ls} -> Ls; + error -> [] + end. + +%% Type information for built-in functions. We do not check that the +%% arguments have the correct type; if the call would actually fail, +%% rather than return a value, this is a safe overapproximation. + +primop_type(match_fail, 1, _) -> t_none(); +primop_type(_, _, Xs) -> strict(Xs, t_any()). + +call_type(M, F, A, Xs) -> + erl_bif_types:type(M, F, A, Xs). + +guard_filters(T, Env) -> + guard_filters(T, Env, dict:new()). + +guard_filters(T, Env, Vars) -> + case type(T) of + call -> + M = call_module(T), + F = call_name(T), + case is_c_atom(M) andalso is_c_atom(F) of + true -> + As = call_args(T), + case {atom_val(M), atom_val(F), length(As)} of + {erlang, 'and', 2} -> + [A1, A2] = As, + guard_filters(A1, guard_filters(A2, Env)); + {erlang, is_atom, 1} -> + filter(As, t_atom(), Env); + {erlang, is_binary, 1} -> + filter(As, t_binary(), Env); + {erlang, is_float, 1} -> + filter(As, t_float(), Env); + {erlang, is_function, 1} -> + filter(As, t_fun(), Env); + {erlang, is_integer, 1} -> + filter(As, t_integer(), Env); + {erlang, is_list, 1} -> + filter(As, t_maybe_improper_list(), Env); + {erlang, is_number, 1} -> + filter(As, t_number(), Env); + {erlang, is_pid, 1} -> + filter(As, t_pid(), Env); + {erlang, is_port, 1} -> + filter(As, t_port(), Env); + {erlang, is_reference, 1} -> + filter(As, t_reference(), Env); + {erlang, is_tuple, 1} -> + filter(As, t_tuple(), Env); + _ -> + Env + end; + false -> + Env + end; + var -> + case dict:find(var_name(T), Vars) of + {ok, T1} -> + guard_filters(T1, Env, Vars); + error -> + Env + end; + 'let' -> + case let_vars(T) of + [V] -> + guard_filters(let_body(T), Env, + dict:store(var_name(V), let_arg(T), + Vars)); + _ -> + Env + end; + values -> + case values_es(T) of + [T1] -> + guard_filters(T1, Env, Vars); + _ -> + Env + end; + _ -> + Env + end. + +filter(As, X, Env) -> + [A] = As, + case type(A) of + var -> + V = var_name(A), + case dict:find(V, Env) of + {ok, X1} -> + dict:store(V, meet(X, X1), Env); + error -> + dict:store(V, X, Env) + end; + _ -> + Env + end. + +%% Callback hook for cerl_prettypr: + +-spec pp_hook() -> fun((cerl:cerl(), _, fun((_,_) -> any())) -> any()). + +pp_hook() -> + fun pp_hook/3. + +pp_hook(Node, Ctxt, Cont) -> + As = cerl:get_ann(Node), + As1 = proplists:delete(type, proplists:delete(label, As)), + As2 = proplists:delete(typesig, proplists:delete(file, As1)), + D = Cont(cerl:set_ann(Node, []), Ctxt), + T = case proplists:lookup(type, As) of + {type, T0} -> T0; + none -> + case proplists:lookup(typesig, As) of + {typesig, T0} -> T0; + none -> t_any() + end + end, + D1 = case erl_types:t_is_any(T) of + true -> + D; + false -> + case cerl:is_literal(Node) of + true -> + D; + false -> + S = erl_types:t_to_string(T), + Q = prettypr:beside(prettypr:text("::"), + prettypr:text(S)), + prettypr:beside(D, Q) + end + end, + cerl_prettypr:annotate(D1, As2, Ctxt). + +%% ===================================================================== diff --git a/lib/hipe/cerl/erl_bif_types.erl b/lib/hipe/cerl/erl_bif_types.erl new file mode 100644 index 0000000000..0f57a93a7c --- /dev/null +++ b/lib/hipe/cerl/erl_bif_types.erl @@ -0,0 +1,5021 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2003-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%% ===================================================================== +%% Type information for Erlang Built-in functions (implemented in C) +%% +%% Copyright (C) 2002 Richard Carlsson +%% Copyright (C) 2006 Richard Carlsson, Tobias Lindahl and Kostis Sagonas +%% +%% Author contact: richardc@it.uu.se, tobiasl@it.uu.se, kostis@it.uu.se +%% ===================================================================== + +-module(erl_bif_types). + +%-define(BITS, (hipe_rtl_arch:word_size() * 8) - ?TAG_IMMED1_SIZE). +-define(BITS, 128). %This is only in bsl to convert answer to pos_inf/neg_inf. +-define(TAG_IMMED1_SIZE, 4). + +-export([type/3, type/4, arg_types/3, + is_known/3, structure_inspecting_args/3, infinity_add/2]). + +-import(erl_types, [number_max/1, + number_min/1, + t_any/0, + t_arity/0, + t_atom/0, + t_atom/1, + t_atoms/1, + t_atom_vals/1, + t_binary/0, + t_bitstr/0, + t_boolean/0, + t_byte/0, + t_char/0, + t_cons/0, + t_cons/2, + t_cons_hd/1, + t_cons_tl/1, + t_constant/0, + t_fixnum/0, + t_non_neg_fixnum/0, + t_pos_fixnum/0, + t_float/0, + t_from_range/2, + t_from_term/1, + t_fun/0, + t_fun/2, + t_fun_args/1, + t_fun_range/1, + t_identifier/0, + t_inf/2, + t_integer/0, + t_integer/1, + t_non_neg_fixnum/0, + t_non_neg_integer/0, + t_pos_integer/0, + t_integers/1, + t_iodata/0, + t_iolist/0, + t_is_any/1, + t_is_atom/1, + t_is_binary/1, + t_is_bitstr/1, + t_is_boolean/1, + t_is_cons/1, + t_is_constant/1, + t_is_float/1, + t_is_float/1, + t_is_fun/1, + t_is_integer/1, + t_is_integer/1, + t_is_list/1, + t_is_nil/1, + t_is_none/1, + t_is_none_or_unit/1, + t_is_number/1, + t_is_pid/1, + t_is_port/1, + t_is_maybe_improper_list/1, + t_is_reference/1, + t_is_string/1, + t_is_subtype/2, + t_is_tuple/1, + t_list/0, + t_list/1, + t_list_elements/1, + t_list_termination/1, + t_mfa/0, + t_nil/0, + t_node/0, + t_none/0, + t_nonempty_list/0, + t_nonempty_list/1, + t_number/0, + t_number_vals/1, + t_pid/0, + t_port/0, + t_maybe_improper_list/0, + t_reference/0, + t_string/0, + t_subtract/2, + t_sup/1, + t_sup/2, + t_tid/0, + t_timeout/0, + t_tuple/0, + t_tuple/1, + t_tuple_args/1, + t_tuple_size/1, + t_tuple_subtypes/1 + ]). + +-ifdef(DO_ERL_BIF_TYPES_TEST). +-export([test/0]). +-endif. + +%%============================================================================= + +-spec type(atom(), atom(), arity()) -> erl_types:erl_type(). + +type(M, F, A) -> + type(M, F, A, any_list(A)). + +%% Arguments should be checked for undefinedness, so we do not make +%% unnecessary overapproximations. + +-spec type(atom(), atom(), arity(), [erl_types:erl_type()]) -> erl_types:erl_type(). + +%%-- code --------------------------------------------------------------------- +type(code, add_path, 1, Xs) -> + strict(arg_types(code, add_path, 1), Xs, + fun (_) -> + t_sup(t_boolean(), + t_tuple([t_atom('error'), t_atom('bad_directory')])) + end); +type(code, add_patha, 1, Xs) -> + type(code, add_path, 1, Xs); +type(code, add_paths, 1, Xs) -> + strict(arg_types(code, add_paths, 1), Xs, fun(_) -> t_atom('ok') end); +type(code, add_pathsa, 1, Xs) -> + type(code, add_paths, 1, Xs); +type(code, add_pathsz, 1, Xs) -> + type(code, add_paths, 1, Xs); +type(code, add_pathz, 1, Xs) -> + type(code, add_path, 1, Xs); +type(code, all_loaded, 0, _) -> + t_list(t_tuple([t_atom(), t_code_loaded_fname_or_status()])); +type(code, compiler_dir, 0, _) -> + t_string(); +type(code, del_path, 1, Xs) -> + strict(arg_types(code, del_path, 1), Xs, + fun (_) -> + t_sup(t_boolean(), + t_tuple([t_atom('error'), t_atom('bad_name')])) + end); +type(code, delete, 1, Xs) -> + strict(arg_types(code, delete, 1), Xs, fun (_) -> t_boolean() end); +type(code, ensure_loaded, 1, Xs) -> + type(code, load_file, 1, Xs); +type(code, get_chunk, 2, Xs) -> + strict(arg_types(code, get_chunk, 2), Xs, + fun (_) -> t_sup(t_binary(), t_atom('undefined')) end); +type(code, get_object_code, 1, Xs) -> + strict(arg_types(code, get_object_code, 1), Xs, + fun (_) -> + t_sup(t_tuple([t_atom(), t_binary(), t_string()]), + t_atom('error')) + end); +type(code, get_path, 0, _) -> + t_list(t_string()); +type(code, is_loaded, 1, Xs) -> + strict(arg_types(code, is_loaded, 1), Xs, + fun (_) -> + t_sup([t_tuple([t_atom('file'), t_code_loaded_fname_or_status()]), + t_atom('false')]) + end); +type(code, is_sticky, 1, Xs) -> + strict(arg_types(code, is_sticky, 1), Xs, fun (_) -> t_boolean() end); +type(code, is_module_native, 1, Xs) -> + strict(arg_types(code, is_module_native, 1), Xs, + fun (_) -> t_sup(t_boolean(), t_atom('undefined')) end); +type(code, lib_dir, 0, _) -> + t_string(); +type(code, lib_dir, 1, Xs) -> + strict(arg_types(code, lib_dir, 1), Xs, + fun (_) -> + t_sup(t_string(), + t_tuple([t_atom('error'), t_atom('bad_name')])) + end); +type(code, load_abs, 1, Xs) -> + strict(arg_types(code, load_abs, 1), Xs, + fun ([_File]) -> t_code_load_return(t_atom()) end); % XXX: cheating +type(code, load_abs, 2, Xs) -> + strict(arg_types(code, load_abs, 2), Xs, + fun ([_File,Mod]) -> t_code_load_return(Mod) end); +type(code, load_binary, 3, Xs) -> + strict(arg_types(code, load_binary, 3), Xs, + fun ([Mod,_File,_Bin]) -> t_code_load_return(Mod) end); +type(code, load_file, 1, Xs) -> + strict(arg_types(code, load_file, 1), Xs, + fun ([Mod]) -> t_code_load_return(Mod) end); +type(code, load_native_partial, 2, Xs) -> + strict(arg_types(code, load_native_partial, 2), Xs, + fun ([Mod,_Bin]) -> t_code_load_return(Mod) end); +type(code, load_native_sticky, 3, Xs) -> + strict(arg_types(code, load_native_sticky, 3), Xs, + fun ([Mod,_Bin,_]) -> t_code_load_return(Mod) end); +type(code, module_md5, 1, Xs) -> + strict(arg_types(code, module_md5, 1), Xs, + fun (_) -> t_sup(t_binary(), t_atom('undefined')) end); +type(code, make_stub_module, 3, Xs) -> + strict(arg_types(code, make_stub_module, 3), Xs, fun ([Mod,_,_]) -> Mod end); +type(code, priv_dir, 1, Xs) -> + strict(arg_types(code, priv_dir, 1), Xs, + fun (_) -> + t_sup(t_string(), t_tuple([t_atom('error'), t_atom('bad_name')])) + end); +type(code, purge, 1, Xs) -> + type(code, delete, 1, Xs); +type(code, rehash, 0, _) -> t_atom('ok'); +type(code, replace_path, 2, Xs) -> + strict(arg_types(code, replace_path, 2), Xs, + fun (_) -> + t_sup([t_atom('true'), + t_tuple([t_atom('error'), t_atom('bad_name')]), + t_tuple([t_atom('error'), t_atom('bad_directory')]), + t_tuple([t_atom('error'), + t_tuple([t_atom('badarg'), t_any()])])]) + end); +type(code, root_dir, 0, _) -> + t_string(); +type(code, set_path, 1, Xs) -> + strict(arg_types(code, set_path, 1), Xs, + fun (_) -> + t_sup([t_atom('true'), + t_tuple([t_atom('error'), t_atom('bad_path')]), + t_tuple([t_atom('error'), t_atom('bad_directory')])]) + end); +type(code, soft_purge, 1, Xs) -> + type(code, delete, 1, Xs); +type(code, stick_mod, 1, Xs) -> + strict(arg_types(code, stick_mod, 1), Xs, fun (_) -> t_atom('true') end); +type(code, unstick_mod, 1, Xs) -> + type(code, stick_mod, 1, Xs); +type(code, which, 1, Xs) -> + strict(arg_types(code, which, 1), Xs, + fun (_) -> + t_sup([t_code_loaded_fname_or_status(), + t_atom('non_existing')]) + end); +%%-- erl_ddll ----------------------------------------------------------------- +type(erl_ddll, demonitor, 1, Xs) -> + type(erlang, demonitor, 1, Xs); +type(erl_ddll, format_error_int, 1, Xs) -> + strict(arg_types(erl_ddll, format_error_int, 1), Xs, + fun (_) -> t_string() end); +type(erl_ddll, info, 2, Xs) -> + strict(arg_types(erl_ddll, info, 2), Xs, fun (_) -> t_atom() end); +type(erl_ddll, loaded_drivers, 0, _) -> + t_tuple([t_atom('ok'), t_list(t_string())]); +type(erl_ddll, monitor, 2, Xs) -> % return type is the same, though args are not + type(erlang, monitor, 2, Xs); +type(erl_ddll, try_load, 3, Xs) -> + strict(arg_types(erl_ddll, try_load, 3), Xs, + fun (_) -> + t_sup([t_tuple([t_atom('ok'), t_atom('already_loaded')]), + t_tuple([t_atom('ok'), t_atom('loaded')]), + t_tuple([t_atom('ok'), + t_atom('pending_driver'), t_reference()]), + t_tuple([t_atom('error'), t_atom('inconsistent')]), + t_tuple([t_atom('error'), t_atom('permanent')])]) + end); +type(erl_ddll, try_unload, 2, Xs) -> + strict(arg_types(erl_ddll, try_unload, 2), Xs, + fun (_) -> + t_sup([t_tuple([t_atom('ok'), t_atom('pending_process')]), + t_tuple([t_atom('ok'), t_atom('unloaded')]), + t_tuple([t_atom('ok'), t_atom('pending_driver')]), + t_tuple([t_atom('ok'), + t_atom('pending_driver'), t_reference()]), + t_tuple([t_atom('error'), t_atom('permanent')]), + t_tuple([t_atom('error'), t_atom('not_loaded')]), + t_tuple([t_atom('error'), + t_atom('not_loaded_by_this_process')])]) + end); +%%-- erlang ------------------------------------------------------------------- +type(erlang, halt, 0, _) -> t_none(); +type(erlang, halt, 1, _) -> t_none(); +type(erlang, exit, 1, _) -> t_none(); +%% Note that exit/2 sends an exit signal to another process. +type(erlang, exit, 2, _) -> t_atom('true'); +type(erlang, error, 1, _) -> t_none(); +type(erlang, error, 2, _) -> t_none(); +type(erlang, throw, 1, _) -> t_none(); +type(erlang, hibernate, 3, _) -> t_none(); +type(erlang, '==', 2, Xs = [X1, X2]) -> + case t_is_atom(X1) andalso t_is_atom(X2) of + true -> type(erlang, '=:=', 2, Xs); + false -> + case t_is_integer(X1) andalso t_is_integer(X2) of + true -> type(erlang, '=:=', 2, Xs); + false -> strict(Xs, t_boolean()) + end + end; +type(erlang, '/=', 2, Xs = [X1, X2]) -> + case t_is_atom(X1) andalso t_is_atom(X2) of + true -> type(erlang, '=/=', 2, Xs); + false -> + case t_is_integer(X1) andalso t_is_integer(X2) of + true -> type(erlang, '=/=', 2, Xs); + false -> strict(Xs, t_boolean()) + end + end; +type(erlang, '=:=', 2, Xs = [Lhs, Rhs]) -> + Ans = + case t_is_none(t_inf(Lhs, Rhs)) of + true -> t_atom('false'); + false -> + case t_is_atom(Lhs) andalso t_is_atom(Rhs) of + true -> + case {t_atom_vals(Lhs), t_atom_vals(Rhs)} of + {unknown, _} -> t_boolean(); + {_, unknown} -> t_boolean(); + {[X], [X]} -> t_atom('true'); + {LhsVals, RhsVals} -> + case lists:all(fun({X, Y}) -> X =/= Y end, + [{X, Y} || X <- LhsVals, Y <- RhsVals]) of + true -> t_atom('false'); + false -> t_boolean() + end + end; + false -> + case t_is_integer(Lhs) andalso t_is_integer(Rhs) of + false -> t_boolean(); + true -> + case {t_number_vals(Lhs), t_number_vals(Rhs)} of + {[X], [X]} when is_integer(X) -> t_atom('true'); + _ -> + LhsMax = number_max(Lhs), + LhsMin = number_min(Lhs), + RhsMax = number_max(Rhs), + RhsMin = number_min(Rhs), + Ans1 = (is_integer(LhsMin) + andalso is_integer(RhsMax) + andalso (LhsMin > RhsMax)), + Ans2 = (is_integer(LhsMax) + andalso is_integer(RhsMin) + andalso (RhsMin > LhsMax)), + case Ans1 orelse Ans2 of + true -> t_atom('false'); + false -> t_boolean() + end + end + end + end + end, + strict(Xs, Ans); +type(erlang, '=/=', 2, Xs = [Lhs, Rhs]) -> + Ans = + case t_is_none(t_inf(Lhs, Rhs)) of + true -> t_atom('true'); + false -> + case t_is_atom(Lhs) andalso t_is_atom(Rhs) of + true -> + case {t_atom_vals(Lhs), t_atom_vals(Rhs)} of + {unknown, _} -> t_boolean(); + {_, unknown} -> t_boolean(); + {[Val], [Val]} -> t_atom('false'); + {LhsVals, RhsVals} -> + t_sup([t_from_term(X =/= Y) || X <- LhsVals, Y <- RhsVals]) + end; + false -> + case t_is_integer(Lhs) andalso t_is_integer(Rhs) of + false -> t_boolean(); + true -> + LhsMax = number_max(Lhs), + LhsMin = number_min(Lhs), + RhsMax = number_max(Rhs), + RhsMin = number_min(Rhs), + Ans1 = (is_integer(LhsMin) andalso is_integer(RhsMax) + andalso (LhsMin > RhsMax)), + Ans2 = (is_integer(LhsMax) andalso is_integer(RhsMin) + andalso (RhsMin > LhsMax)), + case Ans1 orelse Ans2 of + true -> t_atom('true'); + false -> + if LhsMax =:= LhsMin, + RhsMin =:= RhsMax, + RhsMax =:= LhsMax -> t_atom('false'); + true -> t_boolean() + end + end + end + end + end, + strict(Xs, Ans); +type(erlang, '>', 2, Xs = [Lhs, Rhs]) -> + Ans = + case t_is_integer(Lhs) andalso t_is_integer(Rhs) of + true -> + LhsMax = number_max(Lhs), + LhsMin = number_min(Lhs), + RhsMax = number_max(Rhs), + RhsMin = number_min(Rhs), + T = t_atom('true'), + F = t_atom('false'), + if + is_integer(LhsMin), is_integer(RhsMax), LhsMin > RhsMax -> T; + is_integer(LhsMax), is_integer(RhsMin), RhsMin >= LhsMax -> F; + true -> t_boolean() + end; + false -> t_boolean() + end, + strict(Xs, Ans); +type(erlang, '>=', 2, Xs = [Lhs, Rhs]) -> + Ans = + case t_is_integer(Lhs) andalso t_is_integer(Rhs) of + true -> + LhsMax = number_max(Lhs), + LhsMin = number_min(Lhs), + RhsMax = number_max(Rhs), + RhsMin = number_min(Rhs), + T = t_atom('true'), + F = t_atom('false'), + if + is_integer(LhsMin), is_integer(RhsMax), LhsMin >= RhsMax -> T; + is_integer(LhsMax), is_integer(RhsMin), RhsMin > LhsMax -> F; + true -> t_boolean() + end; + false -> t_boolean() + end, + strict(Xs, Ans); +type(erlang, '<', 2, Xs = [Lhs, Rhs]) -> + Ans = + case t_is_integer(Lhs) andalso t_is_integer(Rhs) of + true -> + LhsMax = number_max(Lhs), + LhsMin = number_min(Lhs), + RhsMax = number_max(Rhs), + RhsMin = number_min(Rhs), + T = t_atom('true'), + F = t_atom('false'), + if + is_integer(LhsMax), is_integer(RhsMin), LhsMax < RhsMin -> T; + is_integer(LhsMin), is_integer(RhsMax), RhsMax =< LhsMin -> F; + true -> t_boolean() + end; + false -> t_boolean() + end, + strict(Xs, Ans); +type(erlang, '=<', 2, Xs = [Lhs, Rhs]) -> + Ans = + case t_is_integer(Lhs) andalso t_is_integer(Rhs) of + true -> + LhsMax = number_max(Lhs), + LhsMin = number_min(Lhs), + RhsMax = number_max(Rhs), + RhsMin = number_min(Rhs), + T = t_atom('true'), + F = t_atom('false'), + if + is_integer(LhsMax), is_integer(RhsMin), LhsMax =< RhsMin -> T; + is_integer(LhsMin), is_integer(RhsMax), RhsMax < LhsMin -> F; + true -> t_boolean() + end; + false -> t_boolean() + end, + strict(Xs, Ans); +type(erlang, '+', 1, Xs) -> + strict(arg_types(erlang, '+', 1), Xs, + fun ([X]) -> X end); +type(erlang, '-', 1, Xs) -> + strict(arg_types(erlang, '-', 1), Xs, + fun ([X]) -> + case t_is_integer(X) of + true -> type(erlang, '-', 2, [t_integer(0), X]); + false -> X + end + end); +type(erlang, '!', 2, Xs) -> + strict(arg_types(erlang, '!', 2), Xs, fun ([_, X2]) -> X2 end); +type(erlang, '+', 2, Xs) -> + strict(arg_types(erlang, '+', 2), Xs, + fun ([X1, X2]) -> + case arith('+', X1, X2) of + {ok, T} -> T; + error -> + case t_is_float(X1) orelse t_is_float(X2) of + true -> t_float(); + false -> t_number() + end + end + end); +type(erlang, '-', 2, Xs) -> + strict(arg_types(erlang, '-', 2), Xs, + fun ([X1, X2]) -> + case arith('-', X1, X2) of + {ok, T} -> T; + error -> + case t_is_float(X1) orelse t_is_float(X2) of + true -> t_float(); + false -> t_number() + end + end + end); +type(erlang, '*', 2, Xs) -> + strict(arg_types(erlang, '*', 2), Xs, + fun ([X1, X2]) -> + case arith('*', X1, X2) of + {ok, T} -> T; + error -> + case t_is_float(X1) orelse t_is_float(X2) of + true -> t_float(); + false -> t_number() + end + end + end); +type(erlang, '/', 2, Xs) -> + strict(arg_types(erlang, '/', 2), Xs, + fun (_) -> t_float() end); +type(erlang, 'div', 2, Xs) -> + strict(arg_types(erlang, 'div', 2), Xs, + fun ([X1, X2]) -> + case arith('div', X1, X2) of + error -> t_integer(); + {ok, T} -> T + end + end); +type(erlang, 'rem', 2, Xs) -> + strict(arg_types(erlang, 'rem', 2), Xs, + fun ([X1, X2]) -> + case arith('rem', X1, X2) of + error -> t_non_neg_integer(); + {ok, T} -> T + end + end); +type(erlang, '++', 2, Xs) -> + strict(arg_types(erlang, '++', 2), Xs, + fun ([X1, X2]) -> + case t_is_nil(X1) of + true -> X2; % even if X2 is not a list + false -> + case t_is_nil(X2) of + true -> X1; + false -> + E1 = t_list_elements(X1), + case t_is_cons(X1) of + true -> t_cons(E1, X2); + false -> + t_sup(X2, t_cons(E1, X2)) + end + end + end + end); +type(erlang, '--', 2, Xs) -> + %% We don't know which elements (if any) in X2 will be found and + %% removed from X1, even if they would have the same type. Thus, we + %% must assume that X1 can remain unchanged. However, if we succeed, + %% we know that X1 must be a proper list, but the result could + %% possibly be empty even if X1 is nonempty. + strict(arg_types(erlang, '--', 2), Xs, + fun ([X1, X2]) -> + case t_is_nil(X1) of + true -> t_nil(); + false -> + case t_is_nil(X2) of + true -> X1; + false -> t_list(t_list_elements(X1)) + end + end + end); +type(erlang, 'and', 2, Xs) -> + strict(arg_types(erlang, 'and', 2), Xs, fun (_) -> t_boolean() end); +type(erlang, 'or', 2, Xs) -> + strict(arg_types(erlang, 'or', 2), Xs, fun (_) -> t_boolean() end); +type(erlang, 'xor', 2, Xs) -> + strict(arg_types(erlang, 'xor', 2), Xs, fun (_) -> t_boolean() end); +type(erlang, 'not', 1, Xs) -> + strict(arg_types(erlang, 'not', 1), Xs, fun (_) -> t_boolean() end); +type(erlang, 'band', 2, Xs) -> + strict(arg_types(erlang, 'band', 2), Xs, + fun ([X1, X2]) -> + case arith('band', X1, X2) of + error -> t_integer(); + {ok, T} -> T + end + end); +%% The result is not wider than the smallest argument. We need to +%% kill any value-sets in the result. +%% strict(arg_types(erlang, 'band', 2), Xs, +%% fun ([X1, X2]) -> t_sup(t_inf(X1, X2), t_byte()) end); +type(erlang, 'bor', 2, Xs) -> + strict(arg_types(erlang, 'bor', 2), Xs, + fun ([X1, X2]) -> + case arith('bor', X1, X2) of + error -> t_integer(); + {ok, T} -> T + end + end); +%% The result is not wider than the largest argument. We need to +%% kill any value-sets in the result. +%% strict(arg_types(erlang, 'bor', 2), Xs, +%% fun ([X1, X2]) -> t_sup(t_sup(X1, X2), t_byte()) end); +type(erlang, 'bxor', 2, Xs) -> + strict(arg_types(erlang, 'bxor', 2), Xs, + fun ([X1, X2]) -> + case arith('bxor', X1, X2) of + error -> t_integer(); + {ok, T} -> T + end + end); +%% The result is not wider than the largest argument. We need to +%% kill any value-sets in the result. +%% strict(arg_types(erlang, 'bxor', 2), Xs, +%% fun ([X1, X2]) -> t_sup(t_sup(X1, X2), t_byte()) end); +type(erlang, 'bsr', 2, Xs) -> + strict(arg_types(erlang, 'bsr', 2), Xs, + fun ([X1, X2]) -> + case arith('bsr', X1, X2) of + error -> t_integer(); + {ok, T} -> T + end + end); +%% If the first argument is unsigned (which is the case for +%% characters and bytes), the result is never wider. We need to kill +%% any value-sets in the result. +%% strict(arg_types(erlang, 'bsr', 2), Xs, +%% fun ([X, _]) -> t_sup(X, t_byte()) end); +type(erlang, 'bsl', 2, Xs) -> + strict(arg_types(erlang, 'bsl', 2), Xs, + fun ([X1, X2]) -> + case arith('bsl', X1, X2) of + error -> t_integer(); + {ok, T} -> T + end + end); +%% Not worth doing anything special here. +%% strict(arg_types(erlang, 'bsl', 2), Xs, fun (_) -> t_integer() end); +type(erlang, 'bnot', 1, Xs) -> + strict(arg_types(erlang, 'bnot', 1), Xs, + fun ([X1]) -> + case arith('bnot', X1) of + error -> t_integer(); + {ok, T} -> T + end + end); +%% This returns (-X)-1, so it often gives a negative result. +%% strict(arg_types(erlang, 'bnot', 1), Xs, fun (_) -> t_integer() end); +type(erlang, abs, 1, Xs) -> + strict(arg_types(erlang, abs, 1), Xs, fun ([X]) -> X end); +type(erlang, append_element, 2, Xs) -> + strict(arg_types(erlang, append_element, 2), Xs, fun (_) -> t_tuple() end); +type(erlang, apply, 2, Xs) -> + Fun = fun ([X, _Y]) -> + case t_is_fun(X) of + true -> + t_fun_range(X); + false -> + t_any() + end + end, + strict(arg_types(erlang, apply, 2), Xs, Fun); +type(erlang, apply, 3, Xs) -> + strict(arg_types(erlang, apply, 3), Xs, fun (_) -> t_any() end); +type(erlang, atom_to_binary, 2, Xs) -> + strict(arg_types(erlang, atom_to_binary, 2), Xs, fun (_) -> t_binary() end); +type(erlang, atom_to_list, 1, Xs) -> + strict(arg_types(erlang, atom_to_list, 1), Xs, fun (_) -> t_string() end); +type(erlang, binary_to_atom, 2, Xs) -> + strict(arg_types(erlang, binary_to_atom, 2), Xs, fun (_) -> t_atom() end); +type(erlang, binary_to_existing_atom, 2, Xs) -> + type(erlang, binary_to_atom, 2, Xs); +type(erlang, binary_to_list, 1, Xs) -> + strict(arg_types(erlang, binary_to_list, 1), Xs, + fun (_) -> t_list(t_byte()) end); +type(erlang, binary_to_list, 3, Xs) -> + strict(arg_types(erlang, binary_to_list, 3), Xs, + fun (_) -> t_list(t_byte()) end); +type(erlang, binary_to_term, 1, Xs) -> + strict(arg_types(erlang, binary_to_term, 1), Xs, fun (_) -> t_any() end); +type(erlang, bitsize, 1, Xs) -> % XXX: TAKE OUT + type(erlang, bit_size, 1, Xs); +type(erlang, bit_size, 1, Xs) -> + strict(arg_types(erlang, bit_size, 1), Xs, + fun (_) -> t_non_neg_integer() end); +type(erlang, bitstr_to_list, 1, Xs) -> % XXX: TAKE OUT + type(erlang, bitstring_to_list, 1, Xs); +type(erlang, bitstring_to_list, 1, Xs) -> + strict(arg_types(erlang, bitstring_to_list, 1), Xs, + fun (_) -> t_list(t_sup(t_byte(), t_bitstr())) end); +type(erlang, bump_reductions, 1, Xs) -> + strict(arg_types(erlang, bump_reductions, 1), Xs, + fun (_) -> t_atom('true') end); +type(erlang, byte_size, 1, Xs) -> + strict(arg_types(erlang, byte_size, 1), Xs, + fun (_) -> t_non_neg_integer() end); +type(erlang, cancel_timer, 1, Xs) -> + strict(arg_types(erlang, cancel_timer, 1), Xs, + fun (_) -> t_sup(t_integer(), t_atom('false')) end); +type(erlang, check_process_code, 2, Xs) -> + strict(arg_types(erlang, check_process_code, 2), Xs, + fun (_) -> t_boolean() end); +type(erlang, concat_binary, 1, Xs) -> + strict(arg_types(erlang, concat_binary, 1), Xs, fun (_) -> t_binary() end); +type(erlang, crc32, 1, Xs) -> + strict(arg_types(erlang, crc32, 1), Xs, fun (_) -> t_integer() end); +type(erlang, crc32, 2, Xs) -> + strict(arg_types(erlang, crc32, 2), Xs, fun (_) -> t_integer() end); +type(erlang, crc32_combine, 3, Xs) -> + strict(arg_types(erlang, crc32_combine, 3), Xs, fun (_) -> t_integer() end); +type(erlang, date, 0, _) -> + t_date(); +type(erlang, decode_packet, 3, Xs) -> + strict(arg_types(erlang, decode_packet, 3), Xs, + fun (_) -> + t_sup([t_tuple([t_atom('ok'), t_packet(), t_binary()]), + t_tuple([t_atom('more'), t_sup([t_non_neg_integer(), + t_atom('undefined')])]), + t_tuple([t_atom('error'), t_any()])]) + end); +type(erlang, delete_module, 1, Xs) -> + strict(arg_types(erlang, delete_module, 1), Xs, + fun (_) -> t_sup(t_atom('true'), t_atom('undefined')) end); +type(erlang, demonitor, 1, Xs) -> + strict(arg_types(erlang, demonitor, 1), Xs, fun (_) -> t_atom('true') end); +%% TODO: overapproximation -- boolean only if 'info' is part of arg2 otherwise 'true' +type(erlang, demonitor, 2, Xs) -> + strict(arg_types(erlang, demonitor, 2), Xs, fun (_) -> t_boolean() end); +type(erlang, disconnect_node, 1, Xs) -> + strict(arg_types(erlang, disconnect_node, 1), Xs, fun (_) -> t_boolean() end); +type(erlang, display, 1, _) -> t_atom('true'); +type(erlang, dist_exit, 3, Xs) -> + strict(arg_types(erlang, dist_exit, 3), Xs, fun (_) -> t_atom('true') end); +type(erlang, element, 2, Xs) -> + strict(arg_types(erlang, element, 2), Xs, + fun ([X1, X2]) -> + case t_tuple_subtypes(X2) of + unknown -> t_any(); + [_] -> + Sz = t_tuple_size(X2), + As = t_tuple_args(X2), + case t_number_vals(X1) of + unknown -> t_sup(As); + Ns when is_list(Ns) -> + Fun = fun + (N, X) when is_integer(N), 1 =< N, N =< Sz -> + t_sup(X, lists:nth(N, As)); + (_, X) -> + X + end, + lists:foldl(Fun, t_none(), Ns) + end; + Ts when is_list(Ts) -> + t_sup([type(erlang, element, 2, [X1, Y]) || Y <- Ts]) + end + end); +type(erlang, erase, 0, _) -> t_any(); +type(erlang, erase, 1, _) -> t_any(); +type(erlang, external_size, 1, _) -> t_integer(); +type(erlang, float, 1, Xs) -> + strict(arg_types(erlang, float, 1), Xs, fun (_) -> t_float() end); +type(erlang, float_to_list, 1, Xs) -> + strict(arg_types(erlang, float_to_list, 1), Xs, fun (_) -> t_string() end); +type(erlang, function_exported, 3, Xs) -> + strict(arg_types(erlang, function_exported, 3), Xs, + fun (_) -> t_boolean() end); +type(erlang, fun_info, 1, Xs) -> + strict(arg_types(erlang, fun_info, 1), Xs, + fun (_) -> t_list(t_tuple([t_atom(), t_any()])) end); +type(erlang, fun_info, 2, Xs) -> + strict(arg_types(erlang, fun_info, 2), Xs, + fun (_) -> t_tuple([t_atom(), t_any()]) end); +type(erlang, fun_to_list, 1, Xs) -> + strict(arg_types(erlang, fun_to_list, 1), Xs, fun (_) -> t_string() end); +type(erlang, garbage_collect, 0, _) -> t_atom('true'); +type(erlang, garbage_collect, 1, Xs) -> + strict(arg_types(erlang, garbage_collect, 1), Xs, fun (_) -> t_boolean() end); +type(erlang, get, 0, _) -> t_list(t_tuple(2)); +type(erlang, get, 1, _) -> t_any(); % | t_atom('undefined') +type(erlang, get_cookie, 0, _) -> t_atom(); % | t_atom('nocookie') +type(erlang, get_keys, 1, _) -> t_list(); +type(erlang, get_module_info, 1, Xs) -> + strict(arg_types(erlang, get_module_info, 1), Xs, + fun (_) -> + t_list(t_tuple([t_atom(), t_list(t_tuple([t_atom(), t_any()]))])) + end); +type(erlang, get_module_info, 2, Xs) -> + T_module_info_2_returns = + t_sup([t_atom(), + t_list(t_tuple([t_atom(), t_any()])), + t_list(t_tuple([t_atom(), t_arity(), t_integer()]))]), + strict(arg_types(erlang, get_module_info, 2), Xs, + fun ([Module, Item]) -> + case t_is_atom(Item) of + true -> + case t_atom_vals(Item) of + ['module'] -> t_inf(t_atom(), Module); + ['imports'] -> t_nil(); + ['exports'] -> t_list(t_tuple([t_atom(), t_arity()])); + ['functions'] -> t_list(t_tuple([t_atom(), t_arity()])); + ['attributes'] -> t_list(t_tuple([t_atom(), t_any()])); + ['compile'] -> t_list(t_tuple([t_atom(), t_any()])); + ['native_addresses'] -> % [{FunName, Arity, Address}] + t_list(t_tuple([t_atom(), t_arity(), t_integer()])); + List when is_list(List) -> + T_module_info_2_returns; + unknown -> + T_module_info_2_returns + end; + false -> + T_module_info_2_returns + end + end); +type(erlang, get_stacktrace, 0, _) -> + t_list(t_tuple([t_atom(), t_atom(), t_sup([t_arity(), t_list()])])); +type(erlang, group_leader, 0, _) -> t_pid(); +type(erlang, group_leader, 2, Xs) -> + strict(arg_types(erlang, group_leader, 2), Xs, + fun (_) -> t_atom('true') end); +type(erlang, hash, 2, Xs) -> + strict(arg_types(erlang, hash, 2), Xs, fun (_) -> t_integer() end); +type(erlang, hd, 1, Xs) -> + strict(arg_types(erlang, hd, 1), Xs, fun ([X]) -> t_cons_hd(X) end); +type(erlang, integer_to_list, 1, Xs) -> + strict(arg_types(erlang, integer_to_list, 1), Xs, + fun (_) -> t_string() end); +type(erlang, info, 1, Xs) -> type(erlang, system_info, 1, Xs); % alias +type(erlang, iolist_size, 1, Xs) -> + strict(arg_types(erlang, iolist_size, 1), Xs, + fun (_) -> t_non_neg_integer() end); +type(erlang, iolist_to_binary, 1, Xs) -> + strict(arg_types(erlang, iolist_to_binary, 1), Xs, + fun (_) -> t_binary() end); +type(erlang, is_alive, 0, _) -> t_boolean(); +type(erlang, is_atom, 1, Xs) -> + Fun = fun (X) -> check_guard(X, fun (Y) -> t_is_atom(Y) end, t_atom()) end, + strict(arg_types(erlang, is_atom, 1), Xs, Fun); +type(erlang, is_binary, 1, Xs) -> + Fun = fun (X) -> + check_guard(X, fun (Y) -> t_is_binary(Y) end, t_binary()) + end, + strict(arg_types(erlang, is_binary, 1), Xs, Fun); +type(erlang, is_bitstr, 1, Xs) -> % XXX: TAKE OUT + type(erlang, is_bitstring, 1, Xs); +type(erlang, is_bitstring, 1, Xs) -> + Fun = fun (X) -> + check_guard(X, fun (Y) -> t_is_bitstr(Y) end, t_bitstr()) + end, + strict(arg_types(erlang, is_bitstring, 1), Xs, Fun); +type(erlang, is_boolean, 1, Xs) -> + Fun = fun (X) -> + check_guard(X, fun (Y) -> t_is_boolean(Y) end, t_boolean()) + end, + strict(arg_types(erlang, is_boolean, 1), Xs, Fun); +type(erlang, is_builtin, 3, Xs) -> + strict(arg_types(erlang, is_builtin, 3), Xs, fun (_) -> t_boolean() end); +type(erlang, is_constant, 1, Xs) -> + Fun = fun (X) -> + check_guard(X, fun (Y) -> t_is_constant(Y) end, t_constant()) + end, + strict(arg_types(erlang, is_constant, 1), Xs, Fun); +type(erlang, is_float, 1, Xs) -> + Fun = fun (X) -> + check_guard(X, fun (Y) -> t_is_float(Y) end, t_float()) + end, + strict(arg_types(erlang, is_float, 1), Xs, Fun); +type(erlang, is_function, 1, Xs) -> + Fun = fun (X) -> check_guard(X, fun (Y) -> t_is_fun(Y) end, t_fun()) end, + strict(arg_types(erlang, is_function, 1), Xs, Fun); +type(erlang, is_function, 2, Xs) -> + Fun = fun ([FunType, ArityType]) -> + case t_number_vals(ArityType) of + unknown -> t_boolean(); + [Val] -> + FunConstr = t_fun(any_list(Val), t_any()), + Fun2 = fun (X) -> + t_is_subtype(X, FunConstr) andalso (not t_is_none(X)) + end, + check_guard_single(FunType, Fun2, FunConstr); + IntList when is_list(IntList) -> t_boolean() %% true? + end + end, + strict(arg_types(erlang, is_function, 2), Xs, Fun); +type(erlang, is_integer, 1, Xs) -> + Fun = fun (X) -> + check_guard(X, fun (Y) -> t_is_integer(Y) end, t_integer()) + end, + strict(arg_types(erlang, is_integer, 1), Xs, Fun); +type(erlang, is_list, 1, Xs) -> + Fun = fun (X) -> + Fun2 = fun (Y) -> t_is_maybe_improper_list(Y) end, + check_guard(X, Fun2, t_maybe_improper_list()) + end, + strict(arg_types(erlang, is_list, 1), Xs, Fun); +type(erlang, is_number, 1, Xs) -> + Fun = fun (X) -> + check_guard(X, fun (Y) -> t_is_number(Y) end, t_number()) + end, + strict(arg_types(erlang, is_number, 1), Xs, Fun); +type(erlang, is_pid, 1, Xs) -> + Fun = fun (X) -> check_guard(X, fun (Y) -> t_is_pid(Y) end, t_pid()) end, + strict(arg_types(erlang, is_pid, 1), Xs, Fun); +type(erlang, is_port, 1, Xs) -> + Fun = fun (X) -> check_guard(X, fun (Y) -> t_is_port(Y) end, t_port()) end, + strict(arg_types(erlang, is_port, 1), Xs, Fun); +type(erlang, is_process_alive, 1, Xs) -> + strict(arg_types(erlang, is_process_alive, 1), Xs, + fun (_) -> t_boolean() end); +type(erlang, is_record, 2, Xs) -> + Fun = fun ([X, Y]) -> + case t_is_tuple(X) of + false -> + case t_is_none(t_inf(t_tuple(), X)) of + true -> t_atom('false'); + false -> t_boolean() + end; + true -> + case t_tuple_subtypes(X) of + unknown -> t_boolean(); + [Tuple] -> + case t_tuple_args(Tuple) of + %% any -> t_boolean(); + [Tag|_] -> + case t_is_atom(Tag) of + false -> + TagAtom = t_inf(Tag, t_atom()), + case t_is_none(TagAtom) of + true -> t_atom('false'); + false -> t_boolean() + end; + true -> + case t_atom_vals(Tag) of + [RealTag] -> + case t_atom_vals(Y) of + [RealTag] -> t_atom('true'); + _ -> t_boolean() + end; + _ -> t_boolean() + end + end + end; + List when length(List) >= 2 -> + t_sup([type(erlang, is_record, 2, [T, Y]) || T <- List]) + end + end + end, + strict(arg_types(erlang, is_record, 2), Xs, Fun); +type(erlang, is_record, 3, Xs) -> + Fun = fun ([X, Y, Z]) -> + Arity = t_number_vals(Z), + case t_is_tuple(X) of + false when length(Arity) =:= 1 -> + [RealArity] = Arity, + case t_is_none(t_inf(t_tuple(RealArity), X)) of + true -> t_atom('false'); + false -> t_boolean() + end; + false -> + case t_is_none(t_inf(t_tuple(), X)) of + true -> t_atom('false'); + false -> t_boolean() + end; + true when length(Arity) =:= 1 -> + [RealArity] = Arity, + case t_tuple_subtypes(X) of + unknown -> t_boolean(); + [Tuple] -> + case t_tuple_args(Tuple) of + %% any -> t_boolean(); + Args when length(Args) =:= RealArity -> + Tag = hd(Args), + case t_is_atom(Tag) of + false -> + TagAtom = t_inf(Tag, t_atom()), + case t_is_none(TagAtom) of + true -> t_atom('false'); + false -> t_boolean() + end; + true -> + case t_atom_vals(Tag) of + [RealTag] -> + case t_atom_vals(Y) of + [RealTag] -> t_atom('true'); + _ -> t_boolean() + end; + _ -> t_boolean() + end + end; + Args when length(Args) =/= RealArity -> + t_atom('false') + end; + [_, _|_] -> + t_boolean() + end; + true -> + t_boolean() + end + end, + strict(arg_types(erlang, is_record, 3), Xs, Fun); +type(erlang, is_reference, 1, Xs) -> + Fun = fun (X) -> + check_guard(X, fun (Y) -> t_is_reference(Y) end, t_reference()) + end, + strict(arg_types(erlang, is_reference, 1), Xs, Fun); +type(erlang, is_tuple, 1, Xs) -> + Fun = fun (X) -> + check_guard(X, fun (Y) -> t_is_tuple(Y) end, t_tuple()) + end, + strict(arg_types(erlang, is_tuple, 1), Xs, Fun); +type(erlang, length, 1, Xs) -> + strict(arg_types(erlang, length, 1), Xs, fun (_) -> t_non_neg_fixnum() end); +type(erlang, link, 1, Xs) -> + strict(arg_types(erlang, link, 1), Xs, fun (_) -> t_atom('true') end); +type(erlang, list_to_atom, 1, Xs) -> + strict(arg_types(erlang, list_to_atom, 1), Xs, fun (_) -> t_atom() end); +type(erlang, list_to_binary, 1, Xs) -> + strict(arg_types(erlang, list_to_binary, 1), Xs, + fun (_) -> t_binary() end); +type(erlang, list_to_bitstr, 1, Xs) -> + type(erlang, list_to_bitstring, 1, Xs); +type(erlang, list_to_bitstring, 1, Xs) -> + strict(arg_types(erlang, list_to_bitstring, 1), Xs, + fun (_) -> t_bitstr() end); +type(erlang, list_to_existing_atom, 1, Xs) -> + strict(arg_types(erlang, list_to_existing_atom, 1), Xs, + fun (_) -> t_atom() end); +type(erlang, list_to_float, 1, Xs) -> + strict(arg_types(erlang, list_to_float, 1), Xs, fun (_) -> t_float() end); +type(erlang, list_to_integer, 1, Xs) -> + strict(arg_types(erlang, list_to_integer, 1), Xs, + fun (_) -> t_integer() end); +type(erlang, list_to_pid, 1, Xs) -> + strict(arg_types(erlang, list_to_pid, 1), Xs, fun (_) -> t_pid() end); +type(erlang, list_to_tuple, 1, Xs) -> + strict(arg_types(erlang, list_to_tuple, 1), Xs, fun (_) -> t_tuple() end); +type(erlang, loaded, 0, _) -> + t_list(t_atom()); +type(erlang, load_module, 2, Xs) -> + strict(arg_types(erlang, load_module, 2), Xs, + fun ([Mod,_Bin]) -> t_code_load_return(Mod) end); +type(erlang, localtime, 0, Xs) -> + type(erlang, universaltime, 0, Xs); % same +type(erlang, localtime_to_universaltime, 1, Xs) -> + type(erlang, universaltime_to_localtime, 1, Xs); % same +type(erlang, localtime_to_universaltime, 2, Xs) -> + strict(arg_types(erlang, localtime_to_universaltime, 2), Xs, % typecheck + fun ([X,_]) -> type(erlang, localtime_to_universaltime, 1, [X]) end); +type(erlang, make_fun, 3, Xs) -> + strict(arg_types(erlang, make_fun, 3), Xs, + fun ([_, _, Arity]) -> + case t_number_vals(Arity) of + [N] -> + case is_integer(N) andalso 0 =< N andalso N =< 255 of + true -> t_fun(N, t_any()); + false -> t_none() + end; + _Other -> t_fun() + end + end); +type(erlang, make_ref, 0, _) -> t_reference(); +type(erlang, make_tuple, 2, Xs) -> + strict(arg_types(erlang, make_tuple, 2), Xs, + fun ([Int, _]) -> + case t_number_vals(Int) of + [N] when is_integer(N), N >= 0 -> t_tuple(N); + _Other -> t_tuple() + end + end); +type(erlang, make_tuple, 3, Xs) -> + strict(arg_types(erlang, make_tuple, 3), Xs, + fun ([Int, _, _]) -> + case t_number_vals(Int) of + [N] when is_integer(N), N >= 0 -> t_tuple(N); + _Other -> t_tuple() + end + end); +type(erlang, match_spec_test, 3, Xs) -> + strict(arg_types(erlang, match_spec_test, 3), Xs, + fun (_) -> t_sup(t_tuple([t_atom('ok'), + t_any(), % it can be any term + t_list(t_atom('return_trace')), + t_match_spec_test_errors()]), + t_tuple([t_atom('error'), + t_match_spec_test_errors()])) end); +type(erlang, md5, 1, Xs) -> + strict(arg_types(erlang, md5, 1), Xs, fun (_) -> t_binary() end); +type(erlang, md5_final, 1, Xs) -> + strict(arg_types(erlang, md5_final, 1), Xs, fun (_) -> t_binary() end); +type(erlang, md5_init, 0, _) -> t_binary(); +type(erlang, md5_update, 2, Xs) -> + strict(arg_types(erlang, md5_update, 2), Xs, fun (_) -> t_binary() end); +type(erlang, memory, 0, _) -> t_list(t_tuple([t_atom(), t_non_neg_fixnum()])); +type(erlang, memory, 1, Xs) -> + strict(arg_types(erlang, memory, 1), Xs, + fun ([Type]) -> + case t_is_atom(Type) of + true -> t_non_neg_fixnum(); + false -> + case t_is_list(Type) of + true -> t_list(t_tuple([t_atom(), t_non_neg_fixnum()])); + false -> + t_sup(t_non_neg_fixnum(), + t_list(t_tuple([t_atom(), t_non_neg_fixnum()]))) + end + end + end); +type(erlang, module_loaded, 1, Xs) -> + strict(arg_types(erlang, module_loaded, 1), Xs, fun (_) -> t_boolean() end); +type(erlang, monitor, 2, Xs) -> + strict(arg_types(erlang, monitor, 2), Xs, fun (_) -> t_reference() end); +type(erlang, monitor_node, 2, Xs) -> + strict(arg_types(erlang, monitor_node, 2), Xs, + fun (_) -> t_atom('true') end); +type(erlang, monitor_node, 3, Xs) -> + strict(arg_types(erlang, monitor_node, 3), Xs, + fun (_) -> t_atom('true') end); +type(erlang, node, 0, _) -> t_node(); +type(erlang, node, 1, Xs) -> + strict(arg_types(erlang, node, 1), Xs, fun (_) -> t_node() end); +type(erlang, nodes, 0, _) -> t_list(t_node()); +type(erlang, nodes, 1, Xs) -> + strict(arg_types(erlang, nodes, 1), Xs, fun (_) -> t_list(t_node()) end); +type(erlang, now, 0, _) -> + t_time(); +type(erlang, open_port, 2, Xs) -> + strict(arg_types(erlang, open_port, 2), Xs, fun (_) -> t_port() end); +type(erlang, phash, 2, Xs) -> + strict(arg_types(erlang, phash, 2), Xs, fun (_) -> t_pos_integer() end); +type(erlang, phash2, 1, Xs) -> + strict(arg_types(erlang, phash2, 1), Xs, fun (_) -> t_non_neg_integer() end); +type(erlang, phash2, 2, Xs) -> + strict(arg_types(erlang, phash2, 2), Xs, fun (_) -> t_non_neg_integer() end); +type(erlang, pid_to_list, 1, Xs) -> + strict(arg_types(erlang, pid_to_list, 1), Xs, fun (_) -> t_string() end); +type(erlang, port_call, 3, Xs) -> + strict(arg_types(erlang, port_call, 3), Xs, fun (_) -> t_any() end); +type(erlang, port_close, 1, Xs) -> + strict(arg_types(erlang, port_close, 1), Xs, + fun (_) -> t_atom('true') end); +type(erlang, port_command, 2, Xs) -> + strict(arg_types(erlang, port_command, 2), Xs, + fun (_) -> t_atom('true') end); +type(erlang, port_command, 3, Xs) -> + strict(arg_types(erlang, port_command, 3), Xs, + fun (_) -> t_boolean() end); +type(erlang, port_connect, 2, Xs) -> + strict(arg_types(erlang, port_connect, 2), Xs, + fun (_) -> t_atom('true') end); +type(erlang, port_control, 3, Xs) -> + strict(arg_types(erlang, port_control, 3), Xs, + fun (_) -> t_sup(t_string(), t_binary()) end); +type(erlang, port_get_data, 1, Xs) -> + strict(arg_types(erlang, port_get_data, 1), Xs, fun (_) -> t_any() end); +type(erlang, port_info, 1, Xs) -> + strict(arg_types(erlang, port_info, 1), Xs, + fun (_) -> t_sup(t_atom('undefined'), t_list()) end); +type(erlang, port_info, 2, Xs) -> + strict(arg_types(erlang, port_info, 2), Xs, + fun ([_Port, Item]) -> + t_sup(t_atom('undefined'), + case t_atom_vals(Item) of + ['connected'] -> t_tuple([Item, t_pid()]); + ['id'] -> t_tuple([Item, t_integer()]); + ['input'] -> t_tuple([Item, t_integer()]); + ['links'] -> t_tuple([Item, t_list(t_pid())]); + ['name'] -> t_tuple([Item, t_string()]); + ['output'] -> t_tuple([Item, t_integer()]); + ['registered_name'] -> t_tuple([Item, t_atom()]); + List when is_list(List) -> + t_tuple([t_sup([t_atom(A) || A <- List]), + t_sup([t_atom(), t_integer(), + t_pid(), t_list(t_pid()), + t_string()])]); + unknown -> + [_, PosItem] = arg_types(erlang, port_info, 2), + t_tuple([PosItem, + t_sup([t_atom(), t_integer(), + t_pid(), t_list(t_pid()), + t_string()])]) + end) + end); +type(erlang, port_to_list, 1, Xs) -> + strict(arg_types(erlang, port_to_list, 1), Xs, fun (_) -> t_string() end); +type(erlang, ports, 0, _) -> t_list(t_port()); +type(erlang, port_set_data, 2, Xs) -> + strict(arg_types(erlang, port_set_data, 2), Xs, + fun (_) -> t_atom('true') end); +type(erlang, pre_loaded, 0, _) -> t_list(t_atom()); +type(erlang, process_display, 2, _) -> t_atom('true'); +type(erlang, process_flag, 2, Xs) -> + T_process_flag_returns = t_sup([t_boolean(), t_atom(), t_non_neg_integer()]), + strict(arg_types(erlang, process_flag, 2), Xs, + fun ([Flag, _Option]) -> + case t_is_atom(Flag) of + true -> + case t_atom_vals(Flag) of + ['error_handler'] -> t_atom(); + ['min_heap_size'] -> t_non_neg_integer(); + ['monitor_nodes'] -> t_boolean(); + ['priority'] -> t_process_priority_level(); + ['save_calls'] -> t_non_neg_integer(); + ['trap_exit'] -> t_boolean(); + List when is_list(List) -> + T_process_flag_returns; + unknown -> + T_process_flag_returns + end; + false -> % XXX: over-approximation if Flag is tuple + T_process_flag_returns + end + end); +type(erlang, process_flag, 3, Xs) -> + strict(arg_types(erlang, process_flag, 3), Xs, + fun (_) -> t_non_neg_integer() end); +type(erlang, process_info, 1, Xs) -> + strict(arg_types(erlang, process_info, 1), Xs, + fun (_) -> + t_sup(t_list(t_tuple([t_pinfo(), t_any()])), + t_atom('undefined')) + end); +type(erlang, process_info, 2, Xs) -> + %% we define all normal return values: the return when the process exists + %% t_nil() is the return for 'registered_name'; perhaps for more + T_process_info_2_normal_returns = + t_sup([t_tuple([t_pinfo_item(), t_any()]), t_nil()]), + strict(arg_types(erlang, process_info, 2), Xs, + fun ([_Pid, InfoItem]) -> + Ret = case t_is_atom(InfoItem) of + true -> + case t_atom_vals(InfoItem) of + ['backtrace'] -> t_tuple([InfoItem, t_binary()]); + ['current_function'] -> t_tuple([InfoItem, t_mfa()]); + ['dictionary'] -> t_tuple([InfoItem, t_list()]); + ['error_handler'] -> t_tuple([InfoItem, t_atom()]); + ['garbage_collection'] -> + t_tuple([InfoItem, t_list()]); + ['group_leader'] -> t_tuple([InfoItem, t_pid()]); + ['heap_size'] -> + t_tuple([InfoItem, t_non_neg_integer()]); + ['initial_call'] -> t_tuple([InfoItem, t_mfa()]); + ['last_calls'] -> + t_tuple([InfoItem, + t_sup(t_atom('false'), t_list())]); + ['links'] -> t_tuple([InfoItem, t_list(t_pid())]); + ['memory'] -> + t_tuple([InfoItem, t_non_neg_integer()]); + ['message_binary'] -> t_tuple([InfoItem, t_list()]); + ['message_queue_len'] -> + t_tuple([InfoItem, t_non_neg_integer()]); + ['messages'] -> t_tuple([InfoItem, t_list()]); + ['monitored_by'] -> + t_tuple([InfoItem, t_list(t_pid())]); + ['monitors'] -> + t_tuple([InfoItem, + t_list(t_sup(t_tuple([t_atom('process'), + t_pid()]), + t_tuple([t_atom('process'), + t_tuple([t_atom(), + t_atom()])])))]); + ['priority'] -> + t_tuple([InfoItem, t_process_priority_level()]); + ['reductions'] -> + t_tuple([InfoItem, t_non_neg_integer()]); + ['registered_name'] -> + t_sup(t_tuple([InfoItem, t_atom()]), t_nil()); + ['sequential_trace_token'] -> + t_tuple([InfoItem, t_any()]); %% Underspecified + ['stack_size'] -> + t_tuple([InfoItem, t_non_neg_integer()]); + ['status'] -> + t_tuple([InfoItem, t_process_status()]); + ['suspending'] -> + t_tuple([InfoItem, + t_list(t_tuple([t_pid(), + t_non_neg_integer(), + t_non_neg_integer()]))]); + ['total_heap_size'] -> + t_tuple([InfoItem, t_non_neg_integer()]); + ['trap_exit'] -> + t_tuple([InfoItem, t_boolean()]); + List when is_list(List) -> + T_process_info_2_normal_returns; + unknown -> + T_process_info_2_normal_returns + end; + false -> + case t_is_list(InfoItem) of + true -> + t_list(t_tuple([t_pinfo_item(), t_any()])); + false -> + t_sup(T_process_info_2_normal_returns, + t_list(t_tuple([t_pinfo_item(), t_any()]))) + end + end, + t_sup([Ret, t_atom('undefined')]) + end); +type(erlang, processes, 0, _) -> t_list(t_pid()); +type(erlang, purge_module, 1, Xs) -> + strict(arg_types(erlang, purge_module, 1), Xs, + fun (_) -> t_atom('true') end); +type(erlang, put, 2, Xs) -> + strict(arg_types(erlang, put, 2), Xs, fun (_) -> t_any() end); +type(erlang, raise, 3, _) -> t_none(); +type(erlang, read_timer, 1, Xs) -> + strict(arg_types(erlang, read_timer, 1), Xs, + fun (_) -> t_sup(t_non_neg_integer(), t_atom('false')) end); +type(erlang, ref_to_list, 1, Xs) -> + strict(arg_types(erlang, ref_to_list, 1), Xs, fun (_) -> t_string() end); +type(erlang, register, 2, Xs) -> + strict(arg_types(erlang, register, 2), Xs, fun (_) -> t_atom('true') end); +type(erlang, registered, 0, _) -> t_list(t_atom()); +type(erlang, resume_process, 1, Xs) -> + strict(arg_types(erlang, resume_process, 1), Xs, + fun (_) -> t_any() end); %% TODO: overapproximation -- fix this +type(erlang, round, 1, Xs) -> + strict(arg_types(erlang, round, 1), Xs, fun (_) -> t_integer() end); +type(erlang, self, 0, _) -> t_pid(); +type(erlang, send, 2, Xs) -> type(erlang, '!', 2, Xs); % alias +type(erlang, send, 3, Xs) -> + strict(arg_types(erlang, send, 3), Xs, + fun (_) -> t_sup(t_atom('ok'), t_sendoptions()) end); +type(erlang, send_after, 3, Xs) -> + strict(arg_types(erlang, send_after, 3), Xs, fun (_) -> t_reference() end); +type(erlang, seq_trace, 2, Xs) -> + strict(arg_types(erlang, seq_trace, 2), Xs, + fun (_) -> t_sup(t_seq_trace_info_returns(), t_tuple(5)) end); +type(erlang, seq_trace_info, 1, Xs) -> + strict(arg_types(erlang, seq_trace_info, 1), Xs, + fun ([Item]) -> + case t_atom_vals(Item) of + ['label'] -> + t_sup(t_tuple([Item, t_non_neg_integer()]), t_nil()); + ['serial'] -> + t_sup(t_tuple([Item, t_tuple([t_non_neg_integer(), + t_non_neg_integer()])]), + t_nil()); + ['send'] -> t_tuple([Item, t_boolean()]); + ['receive'] -> t_tuple([Item, t_boolean()]); + ['print'] -> t_tuple([Item, t_boolean()]); + ['timestamp'] -> t_tuple([Item, t_boolean()]); + List when is_list(List) -> + t_seq_trace_info_returns(); + unknown -> + t_seq_trace_info_returns() + end + end); +type(erlang, seq_trace_print, 1, Xs) -> + strict(arg_types(erlang, seq_trace_print, 1), Xs, fun (_) -> t_boolean() end); +type(erlang, seq_trace_print, 2, Xs) -> + strict(arg_types(erlang, seq_trace_print, 2), Xs, fun (_) -> t_boolean() end); +type(erlang, set_cookie, 2, Xs) -> + strict(arg_types(erlang, set_cookie, 2), Xs, fun (_) -> t_atom('true') end); +type(erlang, setelement, 3, Xs) -> + strict(arg_types(erlang, setelement, 3), Xs, + fun ([X1, X2, X3]) -> + case t_tuple_subtypes(X2) of + unknown -> t_tuple(); + [_] -> + Sz = t_tuple_size(X2), + As = t_tuple_args(X2), + case t_number_vals(X1) of + unknown -> + t_tuple([t_sup(X, X3) || X <- As]); + [N] when is_integer(N), 1 =< N, N =< Sz -> + t_tuple(list_replace(N, X3, As)); + [N] when is_integer(N), N < 1 -> + t_none(); + [N] when is_integer(N), N > Sz -> + t_none(); + Ns -> + Fun = fun (N, XL) when is_integer(N), 1 =< N, N =< Sz -> + X = lists:nth(N, XL), + Y = t_sup(X, X3), + list_replace(N, Y, XL); + (_, XL) -> + XL + end, + t_tuple(lists:foldl(Fun, As, Ns)) + end; + Ts when is_list(Ts) -> + t_sup([type(erlang, setelement, 3, [X1, Y, X3]) || Y <- Ts]) + end + end); +type(erlang, setnode, 2, Xs) -> + strict(arg_types(erlang, setnode, 2), Xs, fun (_) -> t_atom('true') end); +type(erlang, setnode, 3, Xs) -> + strict(arg_types(erlang, setnode, 3), Xs, fun (_) -> t_atom('true') end); +type(erlang, size, 1, Xs) -> + strict(arg_types(erlang, size, 1), Xs, fun (_) -> t_non_neg_integer() end); +type(erlang, spawn, 1, Xs) -> + strict(arg_types(erlang, spawn, 1), Xs, fun (_) -> t_pid() end); +type(erlang, spawn, 2, Xs) -> + strict(arg_types(erlang, spawn, 2), Xs, fun (_) -> t_pid() end); +type(erlang, spawn, 3, Xs) -> + strict(arg_types(erlang, spawn, 3), Xs, fun (_) -> t_pid() end); +type(erlang, spawn, 4, Xs) -> + strict(arg_types(erlang, spawn, 4), Xs, fun (_) -> t_pid() end); +type(erlang, spawn_link, 1, Xs) -> type(erlang, spawn, 1, Xs); % same +type(erlang, spawn_link, 2, Xs) -> type(erlang, spawn, 2, Xs); % same +type(erlang, spawn_link, 3, Xs) -> type(erlang, spawn, 3, Xs); % same +type(erlang, spawn_link, 4, Xs) -> type(erlang, spawn, 4, Xs); % same +type(erlang, spawn_opt, 1, Xs) -> + strict(arg_types(erlang, spawn_opt, 1), Xs, + fun ([Tuple]) -> + Fun = fun (TS) -> + [_, _, _, List] = t_tuple_args(TS), + t_spawn_opt_return(List) + end, + t_sup([Fun(TS) || TS <- t_tuple_subtypes(Tuple)]) + end); +type(erlang, spawn_opt, 2, Xs) -> + strict(arg_types(erlang, spawn_opt, 2), Xs, + fun ([_, List]) -> t_spawn_opt_return(List) end); +type(erlang, spawn_opt, 3, Xs) -> + strict(arg_types(erlang, spawn_opt, 3), Xs, + fun ([_, _, List]) -> t_spawn_opt_return(List) end); +type(erlang, spawn_opt, 4, Xs) -> + strict(arg_types(erlang, spawn_opt, 4), Xs, + fun ([_, _, _, List]) -> t_spawn_opt_return(List) end); +type(erlang, split_binary, 2, Xs) -> + strict(arg_types(erlang, split_binary, 2), Xs, + fun (_) -> t_tuple([t_binary(), t_binary()]) end); +type(erlang, start_timer, 3, Xs) -> + strict(arg_types(erlang, start_timer, 3), Xs, fun (_) -> t_reference() end); +type(erlang, statistics, 1, Xs) -> + strict(arg_types(erlang, statistics, 1), Xs, + fun ([Type]) -> + T_statistics_1 = t_sup([t_non_neg_integer(), + t_tuple([t_non_neg_integer(), + t_non_neg_integer()]), + %% When called with the argument 'io'. + t_tuple([t_tuple([t_atom('input'), + t_non_neg_integer()]), + t_tuple([t_atom('output'), + t_non_neg_integer()])]), + t_tuple([t_non_neg_integer(), + t_non_neg_integer(), + t_non_neg_integer()])]), + case t_atom_vals(Type) of + ['context_switches'] -> + t_tuple([t_non_neg_integer(), t_integer(0)]); + ['exact_reductions'] -> + t_tuple([t_non_neg_integer(), t_non_neg_integer()]); + ['garbage_collection'] -> + t_tuple([t_non_neg_integer(), + t_non_neg_integer(), + t_integer(0)]); + ['io'] -> + t_tuple([t_tuple([t_atom('input'), t_non_neg_integer()]), + t_tuple([t_atom('output'), t_non_neg_integer()])]); + ['reductions'] -> + t_tuple([t_non_neg_integer(), t_non_neg_integer()]); + ['run_queue'] -> + t_non_neg_integer(); + ['runtime'] -> + t_tuple([t_non_neg_integer(), t_integer(0)]); + ['wall_clock'] -> + t_tuple([t_non_neg_integer(), t_integer(0)]); + List when is_list(List) -> + T_statistics_1; + unknown -> + T_statistics_1 + end + end); +type(erlang, suspend_process, 1, Xs) -> + strict(arg_types(erlang, suspend_process, 1), Xs, + fun (_) -> t_atom('true') end); +type(erlang, suspend_process, 2, Xs) -> + strict(arg_types(erlang, suspend_process, 2), Xs, + fun (_) -> t_boolean() end); +type(erlang, system_flag, 2, Xs) -> + strict(arg_types(erlang, system_flag, 2), Xs, + fun ([Flag,_Value]) -> + %% this provides an overapproximation of all return values + T_system_flag_2 = t_sup([t_boolean(), + t_integer(), + t_sequential_tracer(), + t_system_cpu_topology(), + t_system_multi_scheduling()]), + case t_is_atom(Flag) of + true -> + case t_atom_vals(Flag) of + ['backtrace_depth'] -> + t_non_neg_fixnum(); + ['cpu_topology'] -> + t_system_cpu_topology(); + ['debug_flags'] -> + t_atom('true'); + ['display_items'] -> + t_non_neg_fixnum(); + ['fullsweep_after'] -> + t_non_neg_fixnum(); + ['min_heap_size'] -> + t_non_neg_fixnum(); + ['multi_scheduling'] -> + t_system_multi_scheduling(); + ['schedulers_online'] -> + t_pos_fixnum(); + ['scheduler_bind_type'] -> + t_scheduler_bind_type_results(); + ['sequential_tracer'] -> + t_sequential_tracer(); + ['trace_control_word'] -> + t_integer(); + List when is_list(List) -> + T_system_flag_2; + unknown -> + T_system_flag_2 + end; + false -> + case t_is_integer(Flag) of % SHOULD BE: t_is_fixnum + true -> + t_atom('true'); + false -> + T_system_flag_2 + end + end + end); +type(erlang, system_info, 1, Xs) -> + strict(arg_types(erlang, system_info, 1), Xs, + fun ([Type]) -> + case t_is_atom(Type) of + true -> + case t_atom_vals(Type) of + ['allocated_areas'] -> + t_list(t_sup([t_tuple([t_atom(),t_non_neg_integer()]), + t_tuple([t_atom(), + t_non_neg_integer(), + t_non_neg_integer()])])); + ['allocator'] -> + t_tuple([t_sup([t_atom('undefined'), + t_atom('elib_malloc'), + t_atom('glibc')]), + t_list(t_integer()), + t_list(t_atom()), + t_list(t_tuple([t_atom(), + t_list(t_tuple([t_atom(), + t_any()]))]))]); + ['break_ignored'] -> + t_boolean(); + ['cpu_topology'] -> + t_system_cpu_topology(); + ['compat_rel'] -> + t_non_neg_fixnum(); + ['creation'] -> + t_fixnum(); + ['debug_compiled'] -> + t_boolean(); + ['dist'] -> + t_binary(); + ['dist_ctrl'] -> + t_list(t_tuple([t_atom(), t_sup([t_pid(), t_port])])); + ['elib_malloc'] -> + t_sup([t_atom('false'), + t_list(t_tuple([t_atom(), t_any()]))]); + ['endian'] -> + t_sup([t_atom('big'), t_atom('little')]); + ['fullsweep_after'] -> + t_tuple([t_atom('fullsweep_after'), t_non_neg_integer()]); + ['garbage_collection'] -> + t_list(); + ['global_heaps_size'] -> + t_non_neg_integer(); + ['heap_sizes'] -> + t_list(t_integer()); + ['heap_type'] -> + t_sup([t_atom('private'), t_atom('hybrid')]); + ['hipe_architecture'] -> + t_sup([t_atom('amd64'), t_atom('arm'), + t_atom('powerpc'), t_atom('undefined'), + t_atom('ultrasparc'), t_atom('x86')]); + ['info'] -> + t_binary(); + ['internal_cpu_topology'] -> %% Undocumented internal feature + t_internal_cpu_topology(); + ['loaded'] -> + t_binary(); + ['logical_processors'] -> + t_non_neg_fixnum(); + ['machine'] -> + t_string(); + ['multi_scheduling'] -> + t_system_multi_scheduling(); + ['multi_scheduling_blockers'] -> + t_list(t_pid()); + ['os_type'] -> + t_tuple([t_sup([t_atom('ose'), % XXX: undocumented + t_atom('unix'), + t_atom('vxworks'), + t_atom('win32')]), + t_atom()]); + ['os_version'] -> + t_sup(t_tuple([t_non_neg_fixnum(), + t_non_neg_fixnum(), + t_non_neg_fixnum()]), + t_string()); + ['process_count'] -> + t_non_neg_fixnum(); + ['process_limit'] -> + t_non_neg_fixnum(); + ['procs'] -> + t_binary(); + ['scheduler_bindings'] -> + t_tuple(); + ['scheduler_bind_type'] -> + t_scheduler_bind_type_results(); + ['schedulers'] -> + t_pos_fixnum(); + ['schedulers_online'] -> + t_pos_fixnum(); + ['sequential_tracer'] -> + t_tuple([t_atom('sequential_tracer'), + t_sequential_tracer()]); + ['smp_support'] -> + t_boolean(); + ['system_architecture'] -> + t_string(); + ['system_version'] -> + t_string(); + ['threads'] -> + t_boolean(); + ['thread_pool_size'] -> + t_non_neg_fixnum(); + ['trace_control_word'] -> + t_integer(); + ['version'] -> + t_string(); + ['wordsize'] -> + t_integers([4,8]); + List when is_list(List) -> + t_any(); %% gross overapproximation + unknown -> + t_any() + end; + false -> %% This currently handles only {allocator, Alloc} + t_any() %% overapproximation as the return value might change + end + end); +type(erlang, system_monitor, 0, Xs) -> + strict(arg_types(erlang, system_monitor, 0), Xs, + fun (_) -> t_system_monitor_settings() end); +type(erlang, system_monitor, 1, Xs) -> + strict(arg_types(erlang, system_monitor, 1), Xs, + fun (_) -> t_system_monitor_settings() end); +type(erlang, system_monitor, 2, Xs) -> + strict(arg_types(erlang, system_monitor, 2), Xs, + fun (_) -> t_system_monitor_settings() end); +type(erlang, system_profile, 0, _) -> + t_system_profile_return(); +type(erlang, system_profile, 2, Xs) -> + strict(arg_types(erlang, system_profile, 2), Xs, + fun (_) -> t_system_profile_return() end); +type(erlang, term_to_binary, 1, Xs) -> + strict(arg_types(erlang, term_to_binary, 1), Xs, fun (_) -> t_binary() end); +type(erlang, term_to_binary, 2, Xs) -> + strict(arg_types(erlang, term_to_binary, 2), Xs, fun (_) -> t_binary() end); +type(erlang, time, 0, _) -> + t_tuple([t_non_neg_integer(), t_non_neg_integer(), t_non_neg_integer()]); +type(erlang, tl, 1, Xs) -> + strict(arg_types(erlang, tl, 1), Xs, fun ([X]) -> t_cons_tl(X) end); +type(erlang, trace, 3, Xs) -> + strict(arg_types(erlang, trace, 3), Xs, fun (_) -> t_integer() end); +type(erlang, trace_delivered, 1, Xs) -> + strict(arg_types(erlang, trace_delivered, 1), Xs, + fun (_) -> t_reference() end); +type(erlang, trace_info, 2, Xs) -> + strict(arg_types(erlang, trace_info, 2), Xs, + fun (_) -> + t_tuple([t_atom(), + t_sup([%% the following is info about a PID + t_list(t_atom()), t_pid(), t_port(), + %% the following is info about a func + t_atom('global'), t_atom('local'), + t_atom('false'), t_atom('true'), + t_list(), t_pid(), t_port(), + t_integer(), + t_list(t_tuple([t_atom(), t_any()])), + %% and this is the 'not found' value + t_atom('undefined')])]) + end); +type(erlang, trace_pattern, 2, Xs) -> + strict(arg_types(erlang, trace_pattern, 2), Xs, + fun (_) -> t_non_neg_fixnum() end); %% num of MFAs that match pattern +type(erlang, trace_pattern, 3, Xs) -> + strict(arg_types(erlang, trace_pattern, 3), Xs, + fun (_) -> t_non_neg_fixnum() end); %% num of MFAs that match pattern +type(erlang, trunc, 1, Xs) -> + strict(arg_types(erlang, trunc, 1), Xs, fun (_) -> t_integer() end); +type(erlang, tuple_size, 1, Xs) -> + strict(arg_types(erlang, tuple_size, 1), Xs, fun (_) -> t_non_neg_integer() end); +type(erlang, tuple_to_list, 1, Xs) -> + strict(arg_types(erlang, tuple_to_list, 1), Xs, + fun ([X]) -> + case t_tuple_subtypes(X) of + unknown -> t_list(); + SubTypes -> + Args = lists:flatten([t_tuple_args(ST) || ST <- SubTypes]), + %% Can be nil if the tuple can be {} + case lists:any(fun (T) -> + t_tuple_size(T) =:= 0 + end, SubTypes) of + true -> + %% Be careful here. If we had only {} we need to + %% keep the nil. + t_sup(t_nonempty_list(t_sup(Args)), t_nil()); + false -> + t_nonempty_list(t_sup(Args)) + end + end + end); +type(erlang, universaltime, 0, _) -> + t_tuple([t_date(), t_time()]); +type(erlang, universaltime_to_localtime, 1, Xs) -> + strict(arg_types(erlang, universaltime_to_localtime, 1), Xs, + fun ([T]) -> T end); +type(erlang, unlink, 1, Xs) -> + strict(arg_types(erlang, unlink, 1), Xs, fun (_) -> t_atom('true') end); +type(erlang, unregister, 1, Xs) -> + strict(arg_types(erlang, unregister, 1), Xs, fun (_) -> t_atom('true') end); +type(erlang, whereis, 1, Xs) -> + strict(arg_types(erlang, whereis, 1), Xs, + fun (_) -> t_sup([t_pid(), t_port(), t_atom('undefined')]) end); +type(erlang, yield, 0, _) -> t_atom('true'); +%%-- erl_prim_loader ---------------------------------------------------------- +type(erl_prim_loader, get_file, 1, Xs) -> + strict(arg_types(erl_prim_loader, get_file, 1), Xs, + fun (_) -> + t_sup(t_tuple([t_atom('ok'), t_binary(), t_string()]), + t_atom('error')) + end); +type(erl_prim_loader, get_path, 0, _) -> + t_tuple([t_atom('ok'), t_list(t_string())]); +type(erl_prim_loader, set_path, 1, Xs) -> + strict(arg_types(erl_prim_loader, set_path, 1), Xs, + fun (_) -> t_atom('ok') end); +%%-- error_logger ------------------------------------------------------------- +type(error_logger, warning_map, 0, _) -> + t_sup([t_atom('info'), t_atom('warning'), t_atom('error')]); +%%-- erts_debug --------------------------------------------------------------- +type(erts_debug, breakpoint, 2, Xs) -> + strict(arg_types(erts_debug, breakpoint, 2), Xs, fun (_) -> t_fixnum() end); +type(erts_debug, disassemble, 1, Xs) -> + strict(arg_types(erts_debug, disassemble, 1), Xs, + fun (_) -> t_sup([t_atom('false'), + t_atom('undef'), + t_tuple([t_integer(), t_binary(), t_mfa()])]) end); +type(erts_debug, flat_size, 1, Xs) -> + strict(arg_types(erts_debug, flat_size, 1), Xs, fun (_) -> t_integer() end); +type(erts_debug, same, 2, Xs) -> + strict(arg_types(erts_debug, same, 2), Xs, fun (_) -> t_boolean() end); +%%-- ets ---------------------------------------------------------------------- +type(ets, all, 0, _) -> + t_list(t_tab()); +type(ets, delete, 1, Xs) -> + strict(arg_types(ets, delete, 1), Xs, fun (_) -> t_atom('true') end); +type(ets, delete, 2, Xs) -> + strict(arg_types(ets, delete, 2), Xs, fun (_) -> t_atom('true') end); +type(ets, delete_all_objects, 1, Xs) -> + strict(arg_types(ets, delete_all_objects, 1), Xs, + fun (_) -> t_atom('true') end); +type(ets, delete_object, 2, Xs) -> + strict(arg_types(ets, delete_object, 2), Xs, fun (_) -> t_atom('true') end); +type(ets, first, 1, Xs) -> + strict(arg_types(ets, first, 1), Xs, fun (_) -> t_any() end); +type(ets, give_away, 3, Xs) -> + strict(arg_types(ets, give_away, 3), Xs, fun (_) -> t_atom('true') end); +type(ets, info, 1, Xs) -> + strict(arg_types(ets, info, 1), Xs, + fun (_) -> + t_sup(t_list(t_tuple([t_ets_info_items(), t_any()])), + t_atom('undefined')) + end); +type(ets, info, 2, Xs) -> + strict(arg_types(ets, info, 2), Xs, fun (_) -> t_any() end); +type(ets, insert, 2, Xs) -> + strict(arg_types(ets, insert, 2), Xs, fun (_) -> t_atom('true') end); +type(ets, insert_new, 2, Xs) -> + strict(arg_types(ets, insert_new, 2), Xs, fun (_) -> t_boolean() end); +type(ets, is_compiled_ms, 1, Xs) -> + strict(arg_types(ets, is_compiled_ms, 1), Xs, fun (_) -> t_boolean() end); +type(ets, last, 1, Xs) -> + type(ets, first, 1, Xs); +type(ets, lookup, 2, Xs) -> + strict(arg_types(ets, lookup, 2), Xs, fun (_) -> t_list(t_tuple()) end); +type(ets, lookup_element, 3, Xs) -> + strict(arg_types(ets, lookup_element, 3), Xs, fun (_) -> t_any() end); +type(ets, match, 1, Xs) -> + strict(arg_types(ets, match, 1), Xs, fun (_) -> t_matchres() end); +type(ets, match, 2, Xs) -> + strict(arg_types(ets, match, 2), Xs, fun (_) -> t_list() end); +type(ets, match, 3, Xs) -> + strict(arg_types(ets, match, 3), Xs, fun (_) -> t_matchres() end); +type(ets, match_object, 1, Xs) -> type(ets, match, 1, Xs); +type(ets, match_object, 2, Xs) -> type(ets, match, 2, Xs); +type(ets, match_object, 3, Xs) -> type(ets, match, 3, Xs); +type(ets, match_spec_compile, 1, Xs) -> + strict(arg_types(ets, match_spec_compile, 1), Xs, fun (_) -> t_any() end); +type(ets, match_spec_run_r, 3, Xs) -> + strict(arg_types(ets, match_spec_run_r, 3), Xs, fun (_) -> t_list() end); +type(ets, member, 2, Xs) -> + strict(arg_types(ets, member, 2), Xs, fun (_) -> t_boolean() end); +type(ets, new, 2, Xs) -> + strict(arg_types(ets, new, 2), Xs, fun (_) -> t_tab() end); +type(ets, next, 2, Xs) -> + strict(arg_types(ets, next, 2), Xs, + %% t_any below stands for: term() | '$end_of_table' + fun (_) -> t_any() end); +type(ets, prev, 2, Xs) -> type(ets, next, 2, Xs); +type(ets, rename, 2, Xs) -> + strict(arg_types(ets, rename, 2), Xs, fun ([_, Name]) -> Name end); +type(ets, safe_fixtable, 2, Xs) -> + strict(arg_types(ets, safe_fixtable, 2), Xs, fun (_) -> t_atom('true') end); +type(ets, select, 1, Xs) -> + strict(arg_types(ets, select, 1), Xs, fun (_) -> t_matchres() end); +type(ets, select, 2, Xs) -> + strict(arg_types(ets, select, 2), Xs, fun (_) -> t_list() end); +type(ets, select, 3, Xs) -> + strict(arg_types(ets, select, 3), Xs, fun (_) -> t_matchres() end); +type(ets, select_count, 2, Xs) -> + strict(arg_types(ets, select_count, 2), Xs, + fun (_) -> t_non_neg_fixnum() end); +type(ets, select_delete, 2, Xs) -> + strict(arg_types(ets, select_delete, 2), Xs, + fun (_) -> t_non_neg_fixnum() end); +type(ets, select_reverse, 1, Xs) -> type(ets, select, 1, Xs); +type(ets, select_reverse, 2, Xs) -> type(ets, select, 2, Xs); +type(ets, select_reverse, 3, Xs) -> type(ets, select, 3, Xs); +type(ets, setopts, 2, Xs) -> + strict(arg_types(ets, setopts, 2), Xs, fun (_) -> t_atom('true') end); +type(ets, slot, 2, Xs) -> + strict(arg_types(ets, slot, 2), Xs, + fun (_) -> t_sup(t_list(t_tuple()), t_atom('$end_of_table')) end); +type(ets, update_counter, 3, Xs) -> + strict(arg_types(ets, update_counter, 3), Xs, fun (_) -> t_integer() end); +type(ets, update_element, 3, Xs) -> + strict(arg_types(ets, update_element, 3), Xs, fun (_) -> t_boolean() end); +%%-- file --------------------------------------------------------------------- +type(file, close, 1, Xs) -> + strict(arg_types(file, close, 1), Xs, fun (_) -> t_file_return() end); +type(file, delete, 1, Xs) -> + strict(arg_types(file, delete, 1), Xs, fun (_) -> t_file_return() end); +type(file, get_cwd, 0, _) -> + t_sup(t_tuple([t_atom('ok'), t_string()]), + t_tuple([t_atom('error'), t_file_posix_error()])); +type(file, make_dir, 1, Xs) -> + strict(arg_types(file, make_dir, 1), Xs, fun (_) -> t_file_return() end); +type(file, open, 2, Xs) -> + strict(arg_types(file, open, 2), Xs, + fun (_) -> + t_sup([t_tuple([t_atom('ok'), t_file_io_device()]), + t_tuple([t_atom('error'), t_file_posix_error()])]) + end); +type(file, read_file, 1, Xs) -> + strict(arg_types(file, read_file, 1), Xs, + fun (_) -> + t_sup([t_tuple([t_atom('ok'), t_binary()]), + t_tuple([t_atom('error'), t_file_posix_error()])]) + end); +type(file, set_cwd, 1, Xs) -> + strict(arg_types(file, set_cwd, 1), Xs, + fun (_) -> t_sup(t_atom('ok'), + t_tuple([t_atom('error'), t_file_posix_error()])) + end); +type(file, write_file, 2, Xs) -> + strict(arg_types(file, write_file, 2), Xs, fun (_) -> t_file_return() end); +%%-- gen_tcp ------------------------------------------------------------------ +%% NOTE: All type information for this module added to avoid loss of precision +type(gen_tcp, accept, 1, Xs) -> + strict(arg_types(gen_tcp, accept, 1), Xs, fun (_) -> t_gen_tcp_accept() end); +type(gen_tcp, accept, 2, Xs) -> + strict(arg_types(gen_tcp, accept, 2), Xs, fun (_) -> t_gen_tcp_accept() end); +type(gen_tcp, connect, 3, Xs) -> + strict(arg_types(gen_tcp, connect, 3), Xs, + fun (_) -> + t_sup(t_tuple([t_atom('ok'), t_socket()]), + t_tuple([t_atom('error'), t_inet_posix_error()])) + end); +type(gen_tcp, connect, 4, Xs) -> + strict(arg_types(gen_tcp, connect, 4), Xs, + fun (_) -> + t_sup(t_tuple([t_atom('ok'), t_socket()]), + t_tuple([t_atom('error'), t_inet_posix_error()])) + end); +type(gen_tcp, listen, 2, Xs) -> + strict(arg_types(gen_tcp, listen, 2), Xs, + fun (_) -> + t_sup(t_tuple([t_atom('ok'), t_socket()]), + t_tuple([t_atom('error'), t_inet_posix_error()])) + end); +type(gen_tcp, recv, 2, Xs) -> + strict(arg_types(gen_tcp, recv, 2), Xs, fun (_) -> t_gen_tcp_recv() end); +type(gen_tcp, recv, 3, Xs) -> + strict(arg_types(gen_tcp, recv, 3), Xs, fun (_) -> t_gen_tcp_recv() end); +type(gen_tcp, send, 2, Xs) -> + strict(arg_types(gen_tcp, send, 2), Xs, + fun (_) -> + t_sup(t_atom('ok'), + t_tuple([t_atom('error'), t_inet_posix_error()])) + end); +type(gen_tcp, shutdown, 2, Xs) -> + strict(arg_types(gen_tcp, shutdown, 2), Xs, + fun (_) -> + t_sup(t_atom('ok'), + t_tuple([t_atom('error'), t_inet_posix_error()])) + end); +%%-- gen_udp ------------------------------------------------------------------ +%% NOTE: All type information for this module added to avoid loss of precision +type(gen_udp, open, 1, Xs) -> + strict(arg_types(gen_udp, open, 1), Xs, + fun (_) -> + t_sup(t_tuple([t_atom('ok'), t_socket()]), + t_tuple([t_atom('error'), t_inet_posix_error()])) + end); +type(gen_udp, open, 2, Xs) -> + strict(arg_types(gen_udp, open, 2), Xs, + fun (_) -> + t_sup(t_tuple([t_atom('ok'), t_socket()]), + t_tuple([t_atom('error'), t_inet_posix_error()])) + end); +type(gen_udp, recv, 2, Xs) -> + strict(arg_types(gen_udp, recv, 2), Xs, fun (_) -> t_gen_udp_recv() end); +type(gen_udp, recv, 3, Xs) -> + strict(arg_types(gen_udp, recv, 3), Xs, fun (_) -> t_gen_udp_recv() end); +type(gen_udp, send, 4, Xs) -> + strict(arg_types(gen_udp, send, 4), Xs, + fun (_) -> + t_sup(t_atom('ok'), + t_tuple([t_atom('error'), t_sup(t_atom('not_owner'), + t_inet_posix_error())])) + end); +%%-- hipe_bifs ---------------------------------------------------------------- +type(hipe_bifs, add_ref, 2, Xs) -> + strict(arg_types(hipe_bifs, add_ref, 2), Xs, fun (_) -> t_nil() end); +type(hipe_bifs, alloc_data, 2, Xs) -> + strict(arg_types(hipe_bifs, alloc_data, 2), Xs, + fun (_) -> t_integer() end); % address +type(hipe_bifs, array, 2, Xs) -> + strict(arg_types(hipe_bifs, array, 2), Xs, fun (_) -> t_immarray() end); +type(hipe_bifs, array_length, 1, Xs) -> + strict(arg_types(hipe_bifs, array_length, 1), Xs, + fun (_) -> t_non_neg_fixnum() end); +type(hipe_bifs, array_sub, 2, Xs) -> + strict(arg_types(hipe_bifs, array_sub, 2), Xs, fun (_) -> t_immediate() end); +type(hipe_bifs, array_update, 3, Xs) -> + strict(arg_types(hipe_bifs, array_update, 3), Xs, + fun (_) -> t_immarray() end); +type(hipe_bifs, atom_to_word, 1, Xs) -> + strict(arg_types(hipe_bifs, atom_to_word, 1), Xs, + fun (_) -> t_integer() end); +type(hipe_bifs, bif_address, 3, Xs) -> + strict(arg_types(hipe_bifs, bif_address, 3), Xs, + fun (_) -> t_sup(t_integer(), t_atom('false')) end); +type(hipe_bifs, bitarray, 2, Xs) -> + strict(arg_types(hipe_bifs, bitarray, 2), Xs, fun (_) -> t_bitarray() end); +type(hipe_bifs, bitarray_sub, 2, Xs) -> + strict(arg_types(hipe_bifs, bitarray_sub, 2), Xs, fun (_) -> t_boolean() end); +type(hipe_bifs, bitarray_update, 3, Xs) -> + strict(arg_types(hipe_bifs, bitarray_update, 3), Xs, + fun (_) -> t_bitarray() end); +type(hipe_bifs, bytearray, 2, Xs) -> + strict(arg_types(hipe_bifs, bytearray, 2), Xs, fun (_) -> t_bytearray() end); +type(hipe_bifs, bytearray_sub, 2, Xs) -> + strict(arg_types(hipe_bifs, bytearray_sub, 2), Xs, fun (_) -> t_byte() end); +type(hipe_bifs, bytearray_update, 3, Xs) -> + strict(arg_types(hipe_bifs, bytearray_update, 3), Xs, + fun (_) -> t_bytearray() end); +type(hipe_bifs, call_count_clear, 1, Xs) -> + strict(arg_types(hipe_bifs, call_count_clear, 1), Xs, + fun (_) -> t_sup(t_non_neg_integer(), t_atom('false')) end); +type(hipe_bifs, call_count_get, 1, Xs) -> + strict(arg_types(hipe_bifs, call_count_get, 1), Xs, + fun (_) -> t_sup(t_non_neg_integer(), t_atom('false')) end); +type(hipe_bifs, call_count_off, 1, Xs) -> + strict(arg_types(hipe_bifs, call_count_off, 1), Xs, + fun (_) -> t_sup(t_non_neg_integer(), t_atom('false')) end); +type(hipe_bifs, call_count_on, 1, Xs) -> + strict(arg_types(hipe_bifs, call_count_on, 1), Xs, + fun (_) -> t_sup(t_atom('true'), t_nil()) end); +type(hipe_bifs, check_crc, 1, Xs) -> + strict(arg_types(hipe_bifs, check_crc, 1), Xs, fun (_) -> t_boolean() end); +type(hipe_bifs, enter_code, 2, Xs) -> + strict(arg_types(hipe_bifs, enter_code, 2), Xs, + fun (_) -> t_tuple([t_integer(), + %% XXX: The tuple below contains integers and + %% is of size same as the length of the MFA list + t_sup(t_nil(), t_binary())]) end); +type(hipe_bifs, enter_sdesc, 1, Xs) -> + strict(arg_types(hipe_bifs, enter_sdesc, 1), Xs, fun (_) -> t_nil() end); +type(hipe_bifs, find_na_or_make_stub, 2, Xs) -> + strict(arg_types(hipe_bifs, find_na_or_make_stub, 2), Xs, + fun (_) -> t_integer() end); % address +type(hipe_bifs, fun_to_address, 1, Xs) -> + strict(arg_types(hipe_bifs, fun_to_address, 1), Xs, + fun (_) -> t_integer() end); +%% type(hipe_bifs, get_emu_address, 1, Xs) -> +%% strict(arg_types(hipe_bifs, get_emu_address, 1), Xs, +%% fun (_) -> t_integer() end); % address +type(hipe_bifs, get_rts_param, 1, Xs) -> + strict(arg_types(hipe_bifs, get_rts_param, 1), Xs, + fun (_) -> t_sup(t_integer(), t_nil()) end); +type(hipe_bifs, invalidate_funinfo_native_addresses, 1, Xs) -> + strict(arg_types(hipe_bifs, invalidate_funinfo_native_addresses, 1), Xs, + fun (_) -> t_nil() end); +type(hipe_bifs, make_fe, 3, Xs) -> + strict(arg_types(hipe_bifs, make_fe, 3), Xs, fun (_) -> t_integer() end); +%% type(hipe_bifs, make_native_stub, 2, Xs) -> +%% strict(arg_types(hipe_bifs, make_native_stub, 2), Xs, +%% fun (_) -> t_integer() end); % address +type(hipe_bifs, mark_referred_from, 1, Xs) -> + strict(arg_types(hipe_bifs, mark_referred_from, 1), Xs, + fun (_) -> t_nil() end); +type(hipe_bifs, merge_term, 1, Xs) -> + strict(arg_types(hipe_bifs, merge_term, 1), Xs, fun ([X]) -> X end); +type(hipe_bifs, patch_call, 3, Xs) -> + strict(arg_types(hipe_bifs, patch_call, 3), Xs, fun (_) -> t_nil() end); +type(hipe_bifs, patch_insn, 3, Xs) -> + strict(arg_types(hipe_bifs, patch_insn, 3), Xs, fun (_) -> t_nil() end); +type(hipe_bifs, primop_address, 1, Xs) -> + strict(arg_types(hipe_bifs, primop_address, 1), Xs, + fun (_) -> t_sup(t_integer(), t_atom('false')) end); +type(hipe_bifs, redirect_referred_from, 1, Xs) -> + strict(arg_types(hipe_bifs, redirect_referred_from, 1), Xs, + fun (_) -> t_nil() end); +type(hipe_bifs, ref, 1, Xs) -> + strict(arg_types(hipe_bifs, ref, 1), Xs, fun (_) -> t_immarray() end); +type(hipe_bifs, ref_get, 1, Xs) -> + strict(arg_types(hipe_bifs, ref_get, 1), Xs, fun (_) -> t_immediate() end); +type(hipe_bifs, ref_set, 2, Xs) -> + strict(arg_types(hipe_bifs, ref_set, 2), Xs, fun (_) -> t_nil() end); +type(hipe_bifs, remove_refs_from, 1, Xs) -> + strict(arg_types(hipe_bifs, remove_refs_from, 1), Xs, + fun (_) -> t_nil() end); +type(hipe_bifs, set_funinfo_native_address, 3, Xs) -> + strict(arg_types(hipe_bifs, set_funinfo_native_address, 3), Xs, + fun (_) -> t_nil() end); +type(hipe_bifs, set_native_address, 3, Xs) -> + strict(arg_types(hipe_bifs, set_native_address, 3), Xs, + fun (_) -> t_nil() end); +type(hipe_bifs, system_crc, 1, Xs) -> + strict(arg_types(hipe_bifs, system_crc, 1), Xs, fun (_) -> t_integer() end); +type(hipe_bifs, term_to_word, 1, Xs) -> + strict(arg_types(hipe_bifs, term_to_word, 1), Xs, + fun (_) -> t_integer() end); +type(hipe_bifs, update_code_size, 3, Xs) -> + strict(arg_types(hipe_bifs, update_code_size, 3), Xs, + fun (_) -> t_nil() end); +type(hipe_bifs, write_u8, 2, Xs) -> + strict(arg_types(hipe_bifs, write_u8, 2), Xs, fun (_) -> t_nil() end); +type(hipe_bifs, write_u32, 2, Xs) -> + strict(arg_types(hipe_bifs, write_u32, 2), Xs, fun (_) -> t_nil() end); +type(hipe_bifs, write_u64, 2, Xs) -> + strict(arg_types(hipe_bifs, write_u64, 2), Xs, fun (_) -> t_nil() end); +%%-- io ----------------------------------------------------------------------- +type(io, format, 1, Xs) -> + strict(arg_types(io, format, 1), Xs, fun (_) -> t_atom('ok') end); +type(io, format, 2, Xs) -> + strict(arg_types(io, format, 2), Xs, fun (_) -> t_atom('ok') end); +type(io, format, 3, Xs) -> + strict(arg_types(io, format, 3), Xs, fun (_) -> t_atom('ok') end); +type(io, fwrite, 1, Xs) -> type(io, format, 1, Xs); % same +type(io, fwrite, 2, Xs) -> type(io, format, 2, Xs); % same +type(io, fwrite, 3, Xs) -> type(io, format, 3, Xs); % same +type(io, put_chars, 1, Xs) -> + strict(arg_types(io, put_chars, 1), Xs, fun (_) -> t_atom('ok') end); +type(io, put_chars, 2, Xs) -> + strict(arg_types(io, put_chars, 2), Xs, fun (_) -> t_atom('ok') end); +%%-- io_lib ------------------------------------------------------------------- +type(io_lib, format, 2, Xs) -> + strict(arg_types(io_lib, format, 2), Xs, + %% t_list() because the character list might be arbitrarily nested + fun (_) -> t_list(t_sup(t_char(), t_list())) end); +type(io_lib, fwrite, 2, Xs) -> type(io_lib, format, 2, Xs); % same +%%-- lists -------------------------------------------------------------------- +type(lists, all, 2, Xs) -> + strict(arg_types(lists, all, 2), Xs, + fun ([F, L]) -> + case t_is_nil(L) of + true -> t_atom('true'); + false -> + El = t_list_elements(L), + case check_fun_application(F, [El]) of + ok -> + case t_is_cons(L) of + true -> t_fun_range(F); + false -> + %% The list can be empty. + t_sup(t_atom('true'), t_fun_range(F)) + end; + error -> + case t_is_cons(L) of + true -> t_none(); + false -> t_fun_range(F) + end + end + end + end); +type(lists, any, 2, Xs) -> + strict(arg_types(lists, any, 2), Xs, + fun ([F, L]) -> + case t_is_nil(L) of + true -> t_atom('false'); + false -> + El = t_list_elements(L), + case check_fun_application(F, [El]) of + ok -> + case t_is_cons(L) of + true -> t_fun_range(F); + false -> + %% The list can be empty + t_sup(t_atom('false'), t_fun_range(F)) + end; + error -> + case t_is_cons(L) of + true -> t_none(); + false -> t_fun_range(F) + end + end + end + end); +type(lists, append, 2, Xs) -> type(erlang, '++', 2, Xs); % alias +type(lists, delete, 2, Xs) -> + strict(arg_types(lists, delete, 2), Xs, + fun ([_, List]) -> + case t_is_cons(List) of + true -> t_cons_tl(List); + false -> List + end + end); +type(lists, dropwhile, 2, Xs) -> + strict(arg_types(lists, dropwhile, 2), Xs, + fun ([F, X]) -> + case t_is_nil(X) of + true -> t_nil(); + false -> + X1 = t_list_elements(X), + case check_fun_application(F, [X1]) of + ok -> + case t_atom_vals(t_fun_range(F)) of + ['true'] -> + case t_is_none(t_inf(t_list(), X)) of + true -> t_none(); + false -> t_nil() + end; + ['false'] -> + case t_is_none(t_inf(t_list(), X)) of + true -> t_none(); + false -> X + end; + _ -> + t_inf(t_cons_tl(t_inf(X, t_cons())), + t_maybe_improper_list()) + end; + error -> + case t_is_cons(X) of + true -> t_none(); + false -> t_nil() + end + end + end + end); +type(lists, filter, 2, Xs) -> + strict(arg_types(lists, filter, 2), Xs, + fun ([F, L]) -> + case t_is_nil(L) of + true -> t_nil(); + false -> + T = t_list_elements(L), + case check_fun_application(F, [T]) of + ok -> + case t_atom_vals(t_fun_range(F)) =:= ['false'] of + true -> t_nil(); + false -> + case t_atom_vals(t_fun_range(F)) =:= ['true'] of + true -> L; + false -> t_list(T) + end + end; + error -> + case t_is_cons(L) of + true -> t_none(); + false -> t_nil() + end + end + end + end); +type(lists, flatten, 1, Xs) -> + strict(arg_types(lists, flatten, 1), Xs, + fun ([L]) -> + case t_is_nil(L) of + true -> L; % (nil has undefined elements) + false -> + %% Avoiding infinite recursion is tricky + X1 = t_list_elements(L), + case t_is_any(X1) of + true -> + t_list(); + false -> + X2 = type(lists, flatten, 1, [t_inf(X1, t_list())]), + t_sup(t_list(t_subtract(X1, t_list())), + X2) + end + end + end); +type(lists, flatmap, 2, Xs) -> + strict(arg_types(lists, flatmap, 2), Xs, + fun ([F, List]) -> + case t_is_nil(List) of + true -> t_nil(); + false -> + case check_fun_application(F, [t_list_elements(List)]) of + ok -> + case t_is_cons(List) of + true -> t_nonempty_list(t_list_elements(t_fun_range(F))); + false -> t_list(t_list_elements(t_fun_range(F))) + end; + error -> + case t_is_cons(List) of + true -> t_none(); + false -> t_nil() + end + end + end + end); +type(lists, foreach, 2, Xs) -> + strict(arg_types(lists, foreach, 2), Xs, + fun ([F, List]) -> + case t_is_cons(List) of + true -> + case check_fun_application(F, [t_list_elements(List)]) of + ok -> t_atom('ok'); + error -> t_none() + end; + false -> + t_atom('ok') + end + end); +type(lists, foldl, 3, Xs) -> + strict(arg_types(lists, foldl, 3), Xs, + fun ([F, Acc, List]) -> + case t_is_nil(List) of + true -> Acc; + false -> + case check_fun_application(F, [t_list_elements(List), Acc]) of + ok -> + case t_is_cons(List) of + true -> t_fun_range(F); + false -> t_sup(t_fun_range(F), Acc) + end; + error -> + case t_is_cons(List) of + true -> t_none(); + false -> Acc + end + end + end + end); +type(lists, foldr, 3, Xs) -> type(lists, foldl, 3, Xs); % same +type(lists, keydelete, 3, Xs) -> + strict(arg_types(lists, keydelete, 3), Xs, + fun ([_, _, L]) -> + Term = t_list_termination(L), + t_sup(Term, erl_types:lift_list_to_pos_empty(L)) + end); +type(lists, keyfind, 3, Xs) -> + strict(arg_types(lists, keyfind, 3), Xs, + fun ([X, Y, Z]) -> + ListEs = t_list_elements(Z), + Tuple = t_inf(t_tuple(), ListEs), + case t_is_none(Tuple) of + true -> t_atom('false'); + false -> + %% this BIF, contrary to lists:keysearch/3 does not + %% wrap its result in a 'value'-tagged tuple + Ret = t_sup(Tuple, t_atom('false')), + case t_is_any(X) of + true -> Ret; + false -> + case t_tuple_subtypes(Tuple) of + unknown -> Ret; + List -> + Keys = [type(erlang, element, 2, [Y, S]) + || S <- List], + Infs = [t_inf(Key, X) || Key <- Keys], + case all_is_none(Infs) of + true -> t_atom('false'); + false -> Ret + end + end + end + end + end); +type(lists, keymap, 3, Xs) -> + strict(arg_types(lists, keymap, 3), Xs, + fun ([F, _I, L]) -> + case t_is_nil(L) of + true -> L; + false -> t_list(t_sup(t_fun_range(F), t_list_elements(L))) + end + end); +type(lists, keymember, 3, Xs) -> + strict(arg_types(lists, keymember, 3), Xs, + fun ([X, Y, Z]) -> + ListEs = t_list_elements(Z), + Tuple = t_inf(t_tuple(), ListEs), + case t_is_none(Tuple) of + true -> t_atom('false'); + false -> + case t_is_any(X) of + true -> t_boolean(); + false -> + case t_tuple_subtypes(Tuple) of + unknown -> t_boolean(); + List -> + Keys = [type(erlang, element, 2, [Y,S]) || S <- List], + Infs = [t_inf(Key, X) || Key <- Keys], + case all_is_none(Infs) of + true -> t_atom('false'); + false -> t_boolean() + end + end + end + end + end); +type(lists, keymerge, 3, Xs) -> + strict(arg_types(lists, keymerge, 3), Xs, + fun ([_I, L1, L2]) -> type(lists, merge, 2, [L1, L2]) end); +type(lists, keyreplace, 4, Xs) -> + strict(arg_types(lists, keyreplace, 4), Xs, + fun ([_K, _I, L, T]) -> t_list(t_sup(t_list_elements(L), T)) end); +type(lists, keysearch, 3, Xs) -> + strict(arg_types(lists, keysearch, 3), Xs, + fun ([X, Y, Z]) -> + ListEs = t_list_elements(Z), + Tuple = t_inf(t_tuple(), ListEs), + case t_is_none(Tuple) of + true -> t_atom('false'); + false -> + Ret = t_sup(t_tuple([t_atom('value'), Tuple]), + t_atom('false')), + case t_is_any(X) of + true -> Ret; + false -> + case t_tuple_subtypes(Tuple) of + unknown -> Ret; + List -> + Keys = [type(erlang, element, 2, [Y, S]) + || S <- List], + Infs = [t_inf(Key, X) || Key <- Keys], + case all_is_none(Infs) of + true -> t_atom('false'); + false -> Ret + end + end + end + end + end); +type(lists, keysort, 2, Xs) -> + strict(arg_types(lists, keysort, 2), Xs, fun ([_, L]) -> L end); +type(lists, last, 1, Xs) -> + strict(arg_types(lists, last, 1), Xs, fun ([L]) -> t_list_elements(L) end); +type(lists, map, 2, Xs) -> + strict(arg_types(lists, map, 2), Xs, + fun ([F, L]) -> + case t_is_nil(L) of + true -> L; + false -> + El = t_list_elements(L), + case t_is_cons(L) of + true -> + case check_fun_application(F, [El]) of + ok -> t_nonempty_list(t_fun_range(F)); + error -> t_none() + end; + false -> + case check_fun_application(F, [El]) of + ok -> t_list(t_fun_range(F)); + error -> t_nil() + end + end + end + end); +type(lists, mapfoldl, 3, Xs) -> + strict(arg_types(lists, mapfoldl, 3), Xs, + fun ([F, Acc, List]) -> + case t_is_nil(List) of + true -> t_tuple([List, Acc]); + false -> + El = t_list_elements(List), + R = t_fun_range(F), + case t_is_cons(List) of + true -> + case check_fun_application(F, [El, Acc]) of + ok -> + Fun = fun (RangeTuple) -> + [T1, T2] = t_tuple_args(RangeTuple), + t_tuple([t_nonempty_list(T1), T2]) + end, + t_sup([Fun(ST) || ST <- t_tuple_subtypes(R)]); + error -> + t_none() + end; + false -> + case check_fun_application(F, [El, Acc]) of + ok -> + Fun = fun (RangeTuple) -> + [T1, T2] = t_tuple_args(RangeTuple), + t_tuple([t_list(T1), t_sup(Acc, T2)]) + end, + t_sup([Fun(ST) || ST <- t_tuple_subtypes(R)]); + error -> + t_tuple([t_nil(), Acc]) + end + end + end + end); +type(lists, mapfoldr, 3, Xs) -> type(lists, mapfoldl, 3, Xs); % same +type(lists, max, 1, Xs) -> + strict(arg_types(lists, max, 1), Xs, fun ([L]) -> t_list_elements(L) end); +type(lists, member, 2, Xs) -> + strict(arg_types(lists, member, 2), Xs, + fun ([X, Y]) -> + Y1 = t_list_elements(Y), + case t_is_none(t_inf(Y1, X)) of + true -> t_atom('false'); + false -> t_boolean() + end + end); +%% type(lists, merge, 1, Xs) -> +type(lists, merge, 2, Xs) -> + strict(arg_types(lists, merge, 2), Xs, + fun ([L1, L2]) -> + case t_is_none(L1) of + true -> L2; + false -> + case t_is_none(L2) of + true -> L1; + false -> t_sup(L1, L2) + end + end + end); +%% type(lists, merge, 3, Xs) -> +%% type(lists, merge3, 3, Xs) -> +type(lists, min, 1, Xs) -> + strict(arg_types(lists, min, 1), Xs, fun ([L]) -> t_list_elements(L) end); +type(lists, nth, 2, Xs) -> + strict(arg_types(lists, nth, 2), Xs, + fun ([_, Y]) -> t_list_elements(Y) end); +type(lists, nthtail, 2, Xs) -> + strict(arg_types(lists, nthtail, 2), Xs, + fun ([_, Y]) -> t_sup(Y, t_list()) end); +type(lists, partition, 2, Xs) -> + strict(arg_types(lists, partition, 2), Xs, + fun ([F, L]) -> + case t_is_nil(L) of + true -> t_tuple([L,L]); + false -> + El = t_list_elements(L), + case check_fun_application(F, [El]) of + error -> + case t_is_cons(L) of + true -> t_none(); + false -> t_tuple([t_nil(), t_nil()]) + end; + ok -> + case t_atom_vals(t_fun_range(F)) of + ['true'] -> t_tuple([L, t_nil()]); + ['false'] -> t_tuple([t_nil(), L]); + [_, _] -> + L2 = t_list(El), + t_tuple([L2, L2]) + end + end + end + end); +type(lists, reverse, 1, Xs) -> + strict(arg_types(lists, reverse, 1), Xs, fun ([X]) -> X end); +type(lists, reverse, 2, Xs) -> + type(erlang, '++', 2, Xs); % reverse-onto is just like append +type(lists, seq, 2, Xs) -> + strict(arg_types(lists, seq, 2), Xs, fun (_) -> t_list(t_integer()) end); +type(lists, seq, 3, Xs) -> + strict(arg_types(lists, seq, 3), Xs, fun (_) -> t_list(t_integer()) end); +type(lists, sort, 1, Xs) -> + strict(arg_types(lists, sort, 1), Xs, fun ([X]) -> X end); +type(lists, sort, 2, Xs) -> + strict(arg_types(lists, sort, 2), Xs, + fun ([F, L]) -> + R = t_fun_range(F), + case t_is_boolean(R) of + true -> L; + false -> + case t_is_nil(L) of + true -> t_nil(); + false -> t_none() + end + end + end); +type(lists, split, 2, Xs) -> + strict(arg_types(lists, split, 2), Xs, + fun ([_, L]) -> + case t_is_nil(L) of + true -> t_tuple([L, L]); + false -> + T = t_list_elements(L), + t_tuple([t_list(T), t_list(T)]) + end + end); +type(lists, splitwith, 2, Xs) -> + T1 = type(lists, takewhile, 2, Xs), + T2 = type(lists, dropwhile, 2, Xs), + case t_is_none(T1) orelse t_is_none(T2) of + true -> t_none(); + false -> t_tuple([T1, T2]) + end; +type(lists, subtract, 2, Xs) -> type(erlang, '--', 2, Xs); % alias +type(lists, takewhile, 2, Xs) -> + strict(arg_types(lists, takewhile, 2), Xs, + fun([F, L]) -> + case t_is_none(t_inf(t_list(), L)) of + false -> type(lists, filter, 2, Xs); + true -> + %% This works for non-proper lists as well. + El = t_list_elements(L), + type(lists, filter, 2, [F, t_list(El)]) + end + end); +type(lists, usort, 1, Xs) -> type(lists, sort, 1, Xs); % same +type(lists, usort, 2, Xs) -> type(lists, sort, 2, Xs); % same +type(lists, unzip, 1, Xs) -> + strict(arg_types(lists, unzip, 1), Xs, + fun ([Ps]) -> + case t_is_nil(Ps) of + true -> + t_tuple([t_nil(), t_nil()]); + false -> % Ps is a proper list of pairs + TupleTypes = t_tuple_subtypes(t_list_elements(Ps)), + lists:foldl(fun(Tuple, Acc) -> + [A, B] = t_tuple_args(Tuple), + t_sup(t_tuple([t_list(A), t_list(B)]), Acc) + end, t_none(), TupleTypes) + end + end); +type(lists, unzip3, 1, Xs) -> + strict(arg_types(lists, unzip3, 1), Xs, + fun ([Ts]) -> + case t_is_nil(Ts) of + true -> + t_tuple([t_nil(), t_nil(), t_nil()]); + false -> % Ps is a proper list of triples + TupleTypes = t_tuple_subtypes(t_list_elements(Ts)), + lists:foldl(fun(T, Acc) -> + [A, B, C] = t_tuple_args(T), + t_sup(t_tuple([t_list(A), + t_list(B), + t_list(C)]), + Acc) + end, t_none(), TupleTypes) + end + end); +type(lists, zip, 2, Xs) -> + strict(arg_types(lists, zip, 2), Xs, + fun ([As, Bs]) -> + case (t_is_nil(As) orelse t_is_nil(Bs)) of + true -> t_nil(); + false -> + A = t_list_elements(As), + B = t_list_elements(Bs), + t_list(t_tuple([A, B])) + end + end); +type(lists, zip3, 3, Xs) -> + strict(arg_types(lists, zip3, 3), Xs, + fun ([As, Bs, Cs]) -> + case (t_is_nil(As) orelse t_is_nil(Bs) orelse t_is_nil(Cs)) of + true -> t_nil(); + false -> + A = t_list_elements(As), + B = t_list_elements(Bs), + C = t_list_elements(Cs), + t_list(t_tuple([A, B, C])) + end + end); +type(lists, zipwith, 3, Xs) -> + strict(arg_types(lists, zipwith, 3), Xs, + fun ([F, _As, _Bs]) -> t_sup(t_list(t_fun_range(F)), t_nil()) end); +type(lists, zipwith3, 4, Xs) -> + strict(arg_types(lists, zipwith3, 4), Xs, + fun ([F,_As,_Bs,_Cs]) -> t_sup(t_list(t_fun_range(F)), t_nil()) end); +%%-- math --------------------------------------------------------------------- +type(math, acos, 1, Xs) -> + strict(arg_types(math, acos, 1), Xs, fun (_) -> t_float() end); +type(math, acosh, 1, Xs) -> + strict(arg_types(math, acosh, 1), Xs, fun (_) -> t_float() end); +type(math, asin, 1, Xs) -> + strict(arg_types(math, asin, 1), Xs, fun (_) -> t_float() end); +type(math, asinh, 1, Xs) -> + strict(arg_types(math, asinh, 1), Xs, fun (_) -> t_float() end); +type(math, atan, 1, Xs) -> + strict(arg_types(math, atan, 1), Xs, fun (_) -> t_float() end); +type(math, atan2, 2, Xs) -> + strict(arg_types(math, atan2, 2), Xs, fun (_) -> t_float() end); +type(math, atanh, 1, Xs) -> + strict(arg_types(math, atanh, 1), Xs, fun (_) -> t_float() end); +type(math, cos, 1, Xs) -> + strict(arg_types(math, cos, 1), Xs, fun (_) -> t_float() end); +type(math, cosh, 1, Xs) -> + strict(arg_types(math, cosh, 1), Xs, fun (_) -> t_float() end); +type(math, erf, 1, Xs) -> + strict(arg_types(math, erf, 1), Xs, fun (_) -> t_float() end); +type(math, erfc, 1, Xs) -> + strict(arg_types(math, erfc, 1), Xs, fun (_) -> t_float() end); +type(math, exp, 1, Xs) -> + strict(arg_types(math, exp, 1), Xs, fun (_) -> t_float() end); +type(math, log, 1, Xs) -> + strict(arg_types(math, log, 1), Xs, fun (_) -> t_float() end); +type(math, log10, 1, Xs) -> + strict(arg_types(math, log10, 1), Xs, fun (_) -> t_float() end); +type(math, pi, 0, _) -> t_float(); +type(math, pow, 2, Xs) -> + strict(arg_types(math, pow, 2), Xs, fun (_) -> t_float() end); +type(math, sin, 1, Xs) -> + strict(arg_types(math, sin, 1), Xs, fun (_) -> t_float() end); +type(math, sinh, 1, Xs) -> + strict(arg_types(math, sinh, 1), Xs, fun (_) -> t_float() end); +type(math, sqrt, 1, Xs) -> + strict(arg_types(math, sqrt, 1), Xs, fun (_) -> t_float() end); +type(math, tan, 1, Xs) -> + strict(arg_types(math, tan, 1), Xs, fun (_) -> t_float() end); +type(math, tanh, 1, Xs) -> + strict(arg_types(math, tanh, 1), Xs, fun (_) -> t_float() end); +%%-- net_kernel --------------------------------------------------------------- +type(net_kernel, dflag_unicode_io, 1, Xs) -> + strict(arg_types(net_kernel, dflag_unicode_io, 1), Xs, + fun (_) -> t_boolean() end); +%%-- ordsets ------------------------------------------------------------------ +type(ordsets, filter, 2, Xs) -> + type(lists, filter, 2, Xs); +type(ordsets, fold, 3, Xs) -> + type(lists, foldl, 3, Xs); +%%-- os ----------------------------------------------------------------------- +type(os, getenv, 0, _) -> t_list(t_string()); +type(os, getenv, 1, Xs) -> + strict(arg_types(os, getenv, 1), Xs, + fun (_) -> t_sup(t_string(), t_atom('false')) end); +type(os, getpid, 0, _) -> t_string(); +type(os, putenv, 2, Xs) -> + strict(arg_types(os, putenv, 2), Xs, fun (_) -> t_atom('true') end); +%%-- re ----------------------------------------------------------------------- +type(re, compile, 1, Xs) -> + strict(arg_types(re, compile, 1), Xs, + fun (_) -> + t_sup(t_tuple([t_atom('ok'), t_re_MP()]), + t_tuple([t_atom('error'), t_re_ErrorSpec()])) + end); +type(re, compile, 2, Xs) -> + strict(arg_types(re, compile, 2), Xs, + fun (_) -> + t_sup(t_tuple([t_atom('ok'), t_re_MP()]), + t_tuple([t_atom('error'), t_re_ErrorSpec()])) + end); +type(re, run, 2, Xs) -> + strict(arg_types(re, run, 2), Xs, + fun (_) -> + t_sup([t_tuple([t_atom('match'), t_re_Captured()]), + t_atom('nomatch'), + t_tuple([t_atom('error'), t_re_ErrorSpec()])]) + end); +type(re, run, 3, Xs) -> + strict(arg_types(re, run, 3), Xs, + fun (_) -> + t_sup([t_tuple([t_atom('match'), t_re_Captured()]), + t_atom('match'), + t_atom('nomatch'), + t_tuple([t_atom('error'), t_re_ErrorSpec()])]) + end); +%%-- string ------------------------------------------------------------------- +type(string, chars, 2, Xs) -> % NOTE: added to avoid loss of information + strict(arg_types(string, chars, 2), Xs, fun (_) -> t_string() end); +type(string, chars, 3, Xs) -> % NOTE: added to avoid loss of information + strict(arg_types(string, chars, 3), Xs, + fun ([Char, N, Tail]) -> + case t_is_nil(Tail) of + true -> + type(string, chars, 2, [Char, N]); + false -> + case t_is_string(Tail) of + true -> + t_string(); + false -> + t_sup(t_sup(t_string(), Tail), t_cons(Char, Tail)) + end + end + end); +type(string, concat, 2, Xs) -> % NOTE: added to avoid loss of information + strict(arg_types(string, concat, 2), Xs, fun (_) -> t_string() end); +type(string, equal, 2, Xs) -> % NOTE: added to avoid loss of information + strict(arg_types(string, equal, 2), Xs, fun (_) -> t_boolean() end); +type(string, to_float, 1, Xs) -> + strict(arg_types(string, to_float, 1), Xs, + fun (_) -> t_sup(t_tuple([t_float(), t_string()]), + t_tuple([t_atom('error'), + t_sup(t_atom('no_float'), + t_atom('not_a_list'))])) + end); +type(string, to_integer, 1, Xs) -> + strict(arg_types(string, to_integer, 1), Xs, + fun (_) -> t_sup(t_tuple([t_integer(), t_string()]), + t_tuple([t_atom('error'), + t_sup(t_atom('no_integer'), + t_atom('not_a_list'))])) + end); +%%-- unicode ------------------------------------------------------------------ +type(unicode, characters_to_binary, 2, Xs) -> + strict(arg_types(unicode, characters_to_binary, 2), Xs, + fun (_) -> + t_sup([t_binary(), + t_tuple([t_atom('error'), t_binary(), t_ML()]), + t_tuple([t_atom('incomplete'), t_binary(), t_ML()])]) + end); +type(unicode, characters_to_list, 2, Xs) -> + strict(arg_types(unicode, characters_to_list, 2), Xs, + fun (_) -> + t_sup([t_string(), + t_tuple([t_atom('error'), t_string(), t_ML()]), + t_tuple([t_atom('incomplete'), t_string(), t_ML()])]) + end); +type(unicode, bin_is_7bit, 1, Xs) -> + strict(arg_types(unicode, bin_is_7bit, 1), Xs, fun (_) -> t_boolean() end); + +%%----------------------------------------------------------------------------- +type(M, F, A, Xs) when is_atom(M), is_atom(F), + is_integer(A), 0 =< A, A =< 255 -> + strict(Xs, t_any()). % safe approximation for all functions. + + +%%----------------------------------------------------------------------------- +%% Auxiliary functions +%%----------------------------------------------------------------------------- + +strict(Xs, Ts, F) -> + %% io:format("inf lists arg~n1:~p~n2:~p ~n", [Xs, Ts]), + Xs1 = inf_lists(Xs, Ts), + %% io:format("inf lists return ~p ~n", [Xs1]), + case any_is_none_or_unit(Xs1) of + true -> t_none(); + false -> F(Xs1) + end. + +strict(Xs, X) -> + case any_is_none_or_unit(Xs) of + true -> t_none(); + false -> X + end. + +inf_lists([X | Xs], [T | Ts]) -> + [t_inf(X, T) | inf_lists(Xs, Ts)]; +inf_lists([], []) -> + []. + +any_list(N) -> any_list(N, t_any()). + +any_list(N, A) when N > 0 -> + [A | any_list(N - 1, A)]; +any_list(0, _) -> + []. + +list_replace(N, E, [X | Xs]) when N > 1 -> + [X | list_replace(N - 1, E, Xs)]; +list_replace(1, E, [_X | Xs]) -> + [E | Xs]. + +any_is_none_or_unit(Ts) -> + lists:any(fun erl_types:t_is_none_or_unit/1, Ts). + +all_is_none(Ts) -> + lists:all(fun erl_types:t_is_none/1, Ts). + +check_guard([X], Test, Type) -> + check_guard_single(X, Test, Type). + +check_guard_single(X, Test, Type) -> + case Test(X) of + true -> t_atom('true'); + false -> + case erl_types:t_is_opaque(X) of + true -> t_none(); + false -> + case t_is_none(t_inf(Type, X)) of + true -> t_atom('false'); + false -> t_boolean() + end + end + end. + +%%----------------------------------------------------------------------------- +%% Functions for range analysis +%%----------------------------------------------------------------------------- + +infinity_max([]) -> empty; +infinity_max([H|T]) -> + if H =:= empty -> + infinity_max(T); + true -> + lists:foldl( + fun (Elem, Max) -> + Geq = infinity_geq(Elem, Max), + if not Geq orelse (Elem =:= empty) -> + Max; + true -> + Elem + end + end, + H, + T) + end. + +infinity_min([]) -> empty; +infinity_min([H|T]) -> + if H =:= empty -> + infinity_min(T); + true -> + lists:foldl(fun (Elem, Min) -> + Geq = infinity_geq(Elem, Min), + if Geq orelse (Elem =:= empty) -> + Min; + true -> + Elem + end + end, + H, + T) + end. + +-type inf_integer() :: 'neg_inf' | 'pos_inf' | integer(). + +-spec infinity_abs('pos_inf' | 'neg_inf') -> 'pos_inf' + ; (integer()) -> non_neg_integer(). + +infinity_abs(pos_inf) -> pos_inf; +infinity_abs(neg_inf) -> pos_inf; +infinity_abs(Number) when is_integer(Number) -> abs(Number). + +%% span_zero(Range) -> +%% infinity_geq(0, number_min(Range)) and infinity_geq(number_max(Range), 0). + +infinity_inv(pos_inf) -> neg_inf; +infinity_inv(neg_inf) -> pos_inf; +infinity_inv(Number) when is_integer(Number) -> -Number. + +infinity_band(neg_inf, Type2) -> Type2; +%% infinity_band(Type1, neg_inf) -> Type1; +infinity_band(pos_inf, Type2) -> Type2; +%% infinity_band(Type1, pos_inf) -> Type1; +infinity_band(Type1, Type2) when is_integer(Type1), is_integer(Type2) -> + Type1 band Type2. + +infinity_bor(neg_inf, _Type2) -> neg_inf; +%% infinity_bor(_Type1, neg_inf) -> neg_inf; +infinity_bor(pos_inf, _Type2) -> pos_inf; +%% infinity_bor(_Type1, pos_inf) -> pos_inf; +infinity_bor(Type1, Type2) when is_integer(Type1), is_integer(Type2) -> + Type1 bor Type2. + +infinity_div(pos_inf, pos_inf) -> [0, pos_inf]; +infinity_div(pos_inf, neg_inf) -> [neg_inf, 0]; +infinity_div(neg_inf, neg_inf) -> [0, pos_inf]; +infinity_div(neg_inf, pos_inf) -> [neg_inf, 0]; +infinity_div(pos_inf, Number) when is_integer(Number), Number > 0 -> pos_inf; +infinity_div(pos_inf, Number) when is_integer(Number), Number < 0 -> neg_inf; +infinity_div(neg_inf, Number) when is_integer(Number), Number > 0 -> neg_inf; +infinity_div(neg_inf, Number) when is_integer(Number), Number < 0 -> pos_inf; +infinity_div(Number, pos_inf) when is_integer(Number), Number >= 0 -> pos_inf; +infinity_div(Number, pos_inf) when is_integer(Number), Number < 0 -> neg_inf; +infinity_div(Number, neg_inf) when is_integer(Number), Number >= 0 -> neg_inf; +infinity_div(Number, neg_inf) when is_integer(Number), Number < 0 -> pos_inf; +infinity_div(Number1, Number2) when is_integer(Number1), is_integer(Number2) -> + Number1 div Number2. + +infinity_bsl(pos_inf, _) -> pos_inf; +infinity_bsl(neg_inf, _) -> neg_inf; +infinity_bsl(Number, pos_inf) when is_integer(Number), Number >= 0 -> pos_inf; +infinity_bsl(Number, pos_inf) when is_integer(Number) -> neg_inf; +infinity_bsl(Number, neg_inf) when is_integer(Number), Number >= 0 -> 0; +infinity_bsl(Number, neg_inf) when is_integer(Number) -> -1; +infinity_bsl(Number1, Number2) when is_integer(Number1), is_integer(Number2) -> + Bits = ?BITS, + if Number2 > (Bits * 2) -> infinity_bsl(Number1, pos_inf); + Number2 < (-Bits * 2) -> infinity_bsl(Number1, neg_inf); + true -> Number1 bsl Number2 + end. + +infinity_geq(pos_inf, _) -> true; +infinity_geq(_, pos_inf) -> false; +infinity_geq(_, neg_inf) -> true; +infinity_geq(neg_inf, _) -> false; +infinity_geq(A, B) when is_integer(A), is_integer(B) -> A >= B. + +-spec infinity_add(inf_integer(), inf_integer()) -> inf_integer(). + +infinity_add(pos_inf, _Number) -> pos_inf; +infinity_add(neg_inf, _Number) -> neg_inf; +infinity_add(_Number, pos_inf) -> pos_inf; +infinity_add(_Number, neg_inf) -> neg_inf; +infinity_add(Number1, Number2) when is_integer(Number1), is_integer(Number2) -> + Number1 + Number2. + +infinity_mult(neg_inf, Number) -> + Greater = infinity_geq(Number, 0), + if Greater -> neg_inf; + true -> pos_inf + end; +infinity_mult(pos_inf, Number) -> infinity_inv(infinity_mult(neg_inf, Number)); +infinity_mult(Number, pos_inf) -> infinity_inv(infinity_mult(neg_inf, Number)); +infinity_mult(Number, neg_inf) -> infinity_mult(neg_inf, Number); +infinity_mult(Number1, Number2) when is_integer(Number1), is_integer(Number2)-> + Number1 * Number2. + +width({Min, Max}) -> infinity_max([width(Min), width(Max)]); +width(pos_inf) -> pos_inf; +width(neg_inf) -> pos_inf; +width(X) when is_integer(X), X >= 0 -> poswidth(X, 0); +width(X) when is_integer(X), X < 0 -> negwidth(X, 0). + +poswidth(X, N) -> + case X < (1 bsl N) of + true -> N; + false -> poswidth(X, N+1) + end. + +negwidth(X, N) -> + case X >= (-1 bsl N) of + true -> N; + false -> negwidth(X, N+1) + end. + +arith('bnot', X1) -> + case t_is_integer(X1) of + false -> error; + true -> + Min1 = number_min(X1), + Max1 = number_max(X1), + {ok, t_from_range(infinity_add(infinity_inv(Max1), -1), + infinity_add(infinity_inv(Min1), -1))} + end. + +arith_mult(Min1, Max1, Min2, Max2) -> + Tmp_list = [infinity_mult(Min1, Min2), infinity_mult(Min1, Max2), + infinity_mult(Max1, Min2), infinity_mult(Max1, Max2)], + {infinity_min(Tmp_list), infinity_max(Tmp_list)}. + +arith_div(_Min1, _Max1, 0, 0) -> + %% Signal failure. + {pos_inf, neg_inf}; +arith_div(Min1, Max1, Min2, Max2) -> + %% 0 is not an accepted divisor. + NewMin2 = if Min2 =:= 0 -> 1; + true -> Min2 + end, + NewMax2 = if Max2 =:= 0 -> -1; + true -> Max2 + end, + Tmp_list = lists:flatten([infinity_div(Min1, NewMin2), + infinity_div(Min1, NewMax2), + infinity_div(Max1, NewMin2), + infinity_div(Max1, NewMax2)]), + {infinity_min(Tmp_list), infinity_max(Tmp_list)}. + +arith_rem(Min1, Max1, Min2, Max2) -> + Min1_geq_zero = infinity_geq(Min1, 0), + Max1_leq_zero = infinity_geq(0, Max1), + Max_range2 = infinity_max([infinity_abs(Min2), infinity_abs(Max2)]), + Max_range2_leq_zero = infinity_geq(0, Max_range2), + New_min = + if Min1_geq_zero -> 0; + Max_range2 =:= 0 -> 0; + Max_range2_leq_zero -> infinity_add(Max_range2, 1); + true -> infinity_add(infinity_inv(Max_range2), 1) + end, + New_max = + if Max1_leq_zero -> 0; + Max_range2 =:= 0 -> 0; + Max_range2_leq_zero -> infinity_add(infinity_inv(Max_range2), -1); + true -> infinity_add(Max_range2, -1) + end, + {New_min, New_max}. + +arith_bsl(Min1, Max1, Min2, Max2) -> + case infinity_geq(Min1, 0) of + true -> {infinity_bsl(Min1, Min2), infinity_bsl(Max1, Max2)}; + false -> + case infinity_geq(Max1, 0) of + true -> {infinity_bsl(Min1, Max2), infinity_bsl(Max1, Max2)}; + false -> {infinity_bsl(Min1, Max2), infinity_bsl(Max2, Min2)} + end + end. + +arith_band_range_set({Min, Max}, [Int|IntList]) -> + SafeAnd = lists:foldl( + fun (IntFromSet, SafeAndAcc) -> + IntFromSet bor SafeAndAcc + end, + Int, + IntList), + {infinity_band(Min, SafeAnd), infinity_band(Max, SafeAnd)}. + +arith_bor_range_set({Min, Max}, [Int|IntList]) -> + SafeAnd = lists:foldl( + fun (IntFromSet, SafeAndAcc) -> + IntFromSet band SafeAndAcc + end, + Int, + IntList), + {infinity_bor(Min, SafeAnd), infinity_bor(Max, SafeAnd)}. + +arith_band(X1, X2) -> + L1 = t_number_vals(X1), + L2 = t_number_vals(X2), + Min1 = number_min(X1), + Max1 = number_max(X1), + Min2 = number_min(X2), + Max2 = number_max(X2), + case {L1 =:= unknown, L2 =:= unknown} of + {true, false} -> + arith_band_range_set(arith_band_ranges(Min1, Max1, Min2, Max2), L2); + {false, true} -> + arith_band_range_set(arith_band_ranges(Min1, Max1, Min2, Max2), L1); + {true, true} -> + arith_band_ranges(Min1, Max1, Min2, Max2) + end. + +arith_bor(X1, X2) -> + L1 = t_number_vals(X1), + L2 = t_number_vals(X2), + Min1 = number_min(X1), + Max1 = number_max(X1), + Min2 = number_min(X2), + Max2 = number_max(X2), + case {L1 =:= unknown, L2 =:= unknown} of + {true, false} -> + arith_bor_range_set(arith_bor_ranges(Min1, Max1, Min2, Max2), L2); + {false, true} -> + arith_bor_range_set(arith_bor_ranges(Min1, Max1, Min2, Max2), L1); + {true, true} -> + arith_bor_ranges(Min1, Max1, Min2, Max2) + end. + +arith_band_ranges(Min1, Max1, Min2, Max2) -> + Width = infinity_min([width({Min1, Max1}), width({Min2, Max2})]), + Min = + case infinity_geq(Min1, 0) orelse infinity_geq(Min2, 0) of + true -> 0; + false -> infinity_bsl(-1, Width) + end, + Max = + case infinity_geq(Max1, 0) orelse infinity_geq(Max2, 0) of + true -> infinity_add(infinity_bsl(1, Width), -1); + false -> 0 + end, + {Min, Max}. + +arith_bor_ranges(Min1, Max1, Min2, Max2) -> + Width = infinity_max([width({Min1, Max1}), width({Min2, Max2})]), + Min = + case infinity_geq(Min1, 0) andalso infinity_geq(Min2, 0) of + true -> 0; + false -> infinity_bsl(-1, Width) + end, + Max = + case infinity_geq(Max1, 0) andalso infinity_geq(Max2, 0) of + true -> infinity_add(infinity_bsl(1, Width), -1); + false -> -1 + end, + {Min, Max}. + +arith(Op, X1, X2) -> + %% io:format("arith ~p ~p ~p~n", [Op, X1, X2]), + case t_is_integer(X1) andalso t_is_integer(X2) of + false -> error; + true -> + L1 = t_number_vals(X1), + L2 = t_number_vals(X2), + case (L1 =:= unknown) orelse (L2 =:= unknown) of + true -> + Min1 = number_min(X1), + Max1 = number_max(X1), + Min2 = number_min(X2), + Max2 = number_max(X2), + {NewMin, NewMax} = + case Op of + '+' -> {infinity_add(Min1, Min2), infinity_add(Max1, Max2)}; + '-' -> {infinity_add(Min1, infinity_inv(Max2)), + infinity_add(Max1, infinity_inv(Min2))}; + '*' -> arith_mult(Min1, Max1, Min2, Max2); + 'div' -> arith_div(Min1, Max1, Min2, Max2); + 'rem' -> arith_rem(Min1, Max1, Min2, Max2); + 'bsl' -> arith_bsl(Min1, Max1, Min2, Max2); + 'bsr' -> NewMin2 = infinity_inv(Max2), + NewMax2 = infinity_inv(Min2), + arith_bsl(Min1, Max1, NewMin2, NewMax2); + 'band' -> arith_band(X1, X2); + 'bor' -> arith_bor(X1, X2); + 'bxor' -> arith_bor_ranges(Min1, Max1, Min2, Max2) %% overaprox. + end, + %% io:format("done arith ~p = ~p~n", [Op, {NewMin, NewMax}]), + {ok, t_from_range(NewMin, NewMax)}; + false -> + AllVals = + case Op of + '+' -> [X + Y || X <- L1, Y <- L2]; + '-' -> [X - Y || X <- L1, Y <- L2]; + '*' -> [X * Y || X <- L1, Y <- L2]; + 'div' -> [X div Y || X <- L1, Y <- L2,Y =/= 0]; + 'rem' -> [X rem Y || X <- L1, Y <- L2,Y =/= 0]; + 'bsl' -> [X bsl Y || X <- L1, Y <- L2]; + 'bsr' -> [X bsr Y || X <- L1, Y <- L2]; + 'band' -> [X band Y || X <- L1, Y <- L2]; + 'bor' -> [X bor Y || X <- L1, Y <- L2]; + 'bxor' -> [X bxor Y || X <- L1, Y <- L2] + end, + {ok, t_integers(ordsets:from_list(AllVals))} + end + end. + +%%============================================================================= + +-spec arg_types(atom(), atom(), arity()) -> [erl_types:erl_type()] | 'unknown'. + +%%------- code ---------------------------------------------------------------- +arg_types(code, add_path, 1) -> + [t_string()]; +arg_types(code, add_patha, 1) -> + arg_types(code, add_path, 1); +arg_types(code, add_paths, 1) -> + [t_list(t_string())]; +arg_types(code, add_pathsa, 1) -> + arg_types(code, add_paths, 1); +arg_types(code, add_pathsz, 1) -> + arg_types(code, add_paths, 1); +arg_types(code, add_pathz, 1) -> + arg_types(code, add_path, 1); +arg_types(code, all_loaded, 0) -> + []; +arg_types(code, compiler_dir, 0) -> + []; +arg_types(code, del_path, 1) -> + [t_sup(t_string(), t_atom())]; % OBS: doc differs from add_path/1 - why? +arg_types(code, delete, 1) -> + [t_atom()]; +arg_types(code, ensure_loaded, 1) -> + arg_types(code, load_file, 1); +arg_types(code, get_chunk, 2) -> + [t_binary(), t_string()]; +arg_types(code, get_object_code, 1) -> + [t_atom()]; +arg_types(code, get_path, 0) -> + []; +arg_types(code, is_loaded, 1) -> + [t_atom()]; +arg_types(code, is_sticky, 1) -> + [t_atom()]; +arg_types(code, is_module_native, 1) -> + [t_atom()]; +arg_types(code, lib_dir, 0) -> + []; +arg_types(code, lib_dir, 1) -> + [t_atom()]; +arg_types(code, load_abs, 1) -> + [t_string()]; +arg_types(code, load_abs, 2) -> + [t_code_loaded_fname_or_status(), t_atom()]; +arg_types(code, load_binary, 3) -> + [t_atom(), t_code_loaded_fname_or_status(), t_binary()]; +arg_types(code, load_file, 1) -> + [t_atom()]; +arg_types(code, load_native_partial, 2) -> + [t_atom(), t_binary()]; +arg_types(code, load_native_sticky, 3) -> + [t_atom(), t_binary(), t_sup(t_binary(), t_atom('false'))]; +arg_types(code, module_md5, 1) -> + [t_binary()]; +arg_types(code, make_stub_module, 3) -> + [t_atom(), t_binary(), t_tuple([t_list(), t_list()])]; +arg_types(code, priv_dir, 1) -> + [t_atom()]; +arg_types(code, purge, 1) -> + arg_types(code, delete, 1); +arg_types(code, rehash, 0) -> + []; +arg_types(code, replace_path, 2) -> + [t_atom(), t_string()]; +arg_types(code, root_dir, 0) -> + []; +arg_types(code, set_path, 1) -> + [t_string()]; +arg_types(code, soft_purge, 1) -> + arg_types(code, delete, 1); +arg_types(code, stick_mod, 1) -> + [t_atom()]; +arg_types(code, unstick_mod, 1) -> + arg_types(code, stick_mod, 1); +arg_types(code, which, 1) -> + [t_atom()]; +%%------- erl_ddll ------------------------------------------------------------ +arg_types(erl_ddll, demonitor, 1) -> + arg_types(erlang, demonitor, 1); +arg_types(erl_ddll, format_error_int, 1) -> + [t_sup([t_atom('inconsistent'), + t_atom('linked_in_driver'), + t_atom('permanent'), + t_atom('not_loaded'), + t_atom('not_loaded_by_this_process'), + t_atom('not_pending'), + t_atom('already_loaded'), + t_atom('unloading')])]; +arg_types(erl_ddll, info, 2) -> + [t_sup([t_atom(), t_string()]), + t_sup([t_atom('awaiting_load'), + t_atom('awaiting_unload'), + t_atom('driver_options'), + t_atom('linked_in_driver'), + t_atom('permanent'), + t_atom('port_count'), + t_atom('processes')])]; +arg_types(erl_ddll, loaded_drivers, 0) -> + []; +arg_types(erl_ddll, monitor, 2) -> + [t_atom('driver'), + t_tuple([t_atom(), t_sup([t_atom('loaded'), t_atom('unloaded')])])]; +arg_types(erl_ddll, try_load, 3) -> + [t_sup([t_atom(), t_string(), t_nonempty_list(t_sup([t_atom(), t_string()]))]), + t_sup([t_atom(), t_string()]), + t_list(t_sup([t_tuple([t_atom('driver_options'), + t_list(t_atom('kill_ports'))]), + t_tuple([t_atom('monitor'), + t_sup([t_atom('pending_driver'), + t_atom('pending')])]), + t_tuple([t_atom('reload'), + t_sup([t_atom('pending_driver'), + t_atom('pending')])])]))]; +arg_types(erl_ddll, try_unload, 2) -> + [t_sup([t_atom(), t_string(), t_nonempty_list(t_sup([t_atom(), t_string()]))]), + t_list(t_sup([t_atom('kill_ports'), + t_tuple([t_atom('monitor'), + t_sup([t_atom('pending_driver'), + t_atom('pending')])])]))]; +%%------- erlang -------------------------------------------------------------- +arg_types(erlang, '!', 2) -> + Pid = t_sup([t_pid(), t_port(), t_atom(), + t_tuple([t_atom(), t_node()])]), + [Pid, t_any()]; +arg_types(erlang, '==', 2) -> + [t_any(), t_any()]; +arg_types(erlang, '/=', 2) -> + [t_any(), t_any()]; +arg_types(erlang, '=:=', 2) -> + [t_any(), t_any()]; +arg_types(erlang, '=/=', 2) -> + [t_any(), t_any()]; +arg_types(erlang, '>', 2) -> + [t_any(), t_any()]; +arg_types(erlang, '>=', 2) -> + [t_any(), t_any()]; +arg_types(erlang, '<', 2) -> + [t_any(), t_any()]; +arg_types(erlang, '=<', 2) -> + [t_any(), t_any()]; +arg_types(erlang, '+', 1) -> + [t_number()]; +arg_types(erlang, '+', 2) -> + [t_number(), t_number()]; +arg_types(erlang, '++', 2) -> + [t_list(), t_any()]; +arg_types(erlang, '-', 1) -> + [t_number()]; +arg_types(erlang, '-', 2) -> + [t_number(), t_number()]; +arg_types(erlang, '--', 2) -> + [t_list(), t_list()]; +arg_types(erlang, '*', 2) -> + [t_number(), t_number()]; +arg_types(erlang, '/', 2) -> + [t_number(), t_number()]; +arg_types(erlang, 'div', 2) -> + [t_integer(), t_integer()]; +arg_types(erlang, 'rem', 2) -> + [t_integer(), t_integer()]; +arg_types(erlang, 'and', 2) -> + [t_boolean(), t_boolean()]; +arg_types(erlang, 'or', 2) -> + [t_boolean(), t_boolean()]; +arg_types(erlang, 'xor', 2) -> + [t_boolean(), t_boolean()]; +arg_types(erlang, 'not', 1) -> + [t_boolean()]; +arg_types(erlang, 'band', 2) -> + [t_integer(), t_integer()]; +arg_types(erlang, 'bor', 2) -> + [t_integer(), t_integer()]; +arg_types(erlang, 'bxor', 2) -> + [t_integer(), t_integer()]; +arg_types(erlang, 'bsr', 2) -> + [t_integer(), t_integer()]; +arg_types(erlang, 'bsl', 2) -> + [t_integer(), t_integer()]; +arg_types(erlang, 'bnot', 1) -> + [t_integer()]; +arg_types(erlang, abs, 1) -> + [t_number()]; +arg_types(erlang, append_element, 2) -> + [t_tuple(), t_any()]; +arg_types(erlang, apply, 2) -> + [t_sup(t_tuple([t_sup(t_atom(), % module name + t_tuple()), % parameterized module + t_atom()]), + t_fun()), + t_list()]; +arg_types(erlang, apply, 3) -> + [t_sup(t_atom(), t_tuple()), t_atom(), t_list()]; +arg_types(erlang, atom_to_binary, 2) -> + [t_atom(), t_encoding_a2b()]; +arg_types(erlang, atom_to_list, 1) -> + [t_atom()]; +arg_types(erlang, binary_to_atom, 2) -> + [t_binary(), t_encoding_a2b()]; +arg_types(erlang, binary_to_existing_atom, 2) -> + arg_types(erlang, binary_to_atom, 2); +arg_types(erlang, binary_to_list, 1) -> + [t_binary()]; +arg_types(erlang, binary_to_list, 3) -> + [t_binary(), t_pos_integer(), t_pos_integer()]; % I want fixnum, but cannot +arg_types(erlang, binary_to_term, 1) -> + [t_binary()]; +arg_types(erlang, bitsize, 1) -> % XXX: TAKE OUT + arg_types(erlang, bit_size, 1); +arg_types(erlang, bit_size, 1) -> + [t_bitstr()]; +arg_types(erlang, bitstr_to_list, 1) -> % XXX: TAKE OUT + arg_types(erlang, bitstring_to_list, 1); +arg_types(erlang, bitstring_to_list, 1) -> + [t_bitstr()]; +arg_types(erlang, bump_reductions, 1) -> + [t_pos_fixnum()]; +arg_types(erlang, byte_size, 1) -> + [t_binary()]; +arg_types(erlang, cancel_timer, 1) -> + [t_reference()]; +arg_types(erlang, check_process_code, 2) -> + [t_pid(), t_atom()]; +arg_types(erlang, concat_binary, 1) -> + [t_list(t_binary())]; +arg_types(erlang, crc32, 1) -> + [t_iodata()]; +arg_types(erlang, crc32, 2) -> + [t_integer(), t_iodata()]; +arg_types(erlang, crc32_combine, 3) -> + [t_integer(), t_integer(), t_integer()]; +arg_types(erlang, date, 0) -> + []; +arg_types(erlang, decode_packet, 3) -> + [t_decode_packet_type(), t_binary(), t_list(t_decode_packet_option())]; +arg_types(erlang, delete_module, 1) -> + [t_atom()]; +arg_types(erlang, demonitor, 1) -> + [t_reference()]; +arg_types(erlang, demonitor, 2) -> + [t_reference(), t_list(t_atoms(['flush', 'info']))]; +arg_types(erlang, disconnect_node, 1) -> + [t_node()]; +arg_types(erlang, display, 1) -> + [t_any()]; +arg_types(erlang, dist_exit, 3) -> + [t_pid(), t_dist_exit(), t_sup(t_pid(), t_port())]; +arg_types(erlang, element, 2) -> + [t_pos_fixnum(), t_tuple()]; +arg_types(erlang, erase, 0) -> + []; +arg_types(erlang, erase, 1) -> + [t_any()]; +arg_types(erlang, error, 1) -> + [t_any()]; +arg_types(erlang, error, 2) -> + [t_any(), t_list()]; +arg_types(erlang, exit, 1) -> + [t_any()]; +arg_types(erlang, exit, 2) -> + [t_sup(t_pid(), t_port()), t_any()]; +arg_types(erlang, external_size, 1) -> + [t_any()]; % takes any term as input +arg_types(erlang, float, 1) -> + [t_number()]; +arg_types(erlang, float_to_list, 1) -> + [t_float()]; +arg_types(erlang, function_exported, 3) -> + [t_atom(), t_atom(), t_arity()]; +arg_types(erlang, fun_info, 1) -> + [t_fun()]; +arg_types(erlang, fun_info, 2) -> + [t_fun(), t_atom()]; +arg_types(erlang, fun_to_list, 1) -> + [t_fun()]; +arg_types(erlang, garbage_collect, 0) -> + []; +arg_types(erlang, garbage_collect, 1) -> + [t_pid()]; +arg_types(erlang, get, 0) -> + []; +arg_types(erlang, get, 1) -> + [t_any()]; +arg_types(erlang, get_cookie, 0) -> + []; +arg_types(erlang, get_keys, 1) -> + [t_any()]; +arg_types(erlang, get_stacktrace, 0) -> + []; +arg_types(erlang, get_module_info, 1) -> + [t_atom()]; +arg_types(erlang, get_module_info, 2) -> + [t_atom(), t_module_info_2()]; +arg_types(erlang, group_leader, 0) -> + []; +arg_types(erlang, group_leader, 2) -> + [t_pid(), t_pid()]; +arg_types(erlang, halt, 0) -> + []; +arg_types(erlang, halt, 1) -> + [t_sup(t_non_neg_fixnum(), t_string())]; +arg_types(erlang, hash, 2) -> + [t_any(), t_integer()]; +arg_types(erlang, hd, 1) -> + [t_cons()]; +arg_types(erlang, hibernate, 3) -> + [t_atom(), t_atom(), t_list()]; +arg_types(erlang, info, 1) -> + arg_types(erlang, system_info, 1); % alias +arg_types(erlang, iolist_to_binary, 1) -> + [t_sup(t_iolist(), t_binary())]; +arg_types(erlang, iolist_size, 1) -> + [t_sup(t_iolist(), t_binary())]; +arg_types(erlang, integer_to_list, 1) -> + [t_integer()]; +arg_types(erlang, is_alive, 0) -> + []; +arg_types(erlang, is_atom, 1) -> + [t_any()]; +arg_types(erlang, is_binary, 1) -> + [t_any()]; +arg_types(erlang, is_bitstr, 1) -> % XXX: TAKE OUT + arg_types(erlang, is_bitstring, 1); +arg_types(erlang, is_bitstring, 1) -> + [t_any()]; +arg_types(erlang, is_boolean, 1) -> + [t_any()]; +arg_types(erlang, is_builtin, 3) -> + [t_atom(), t_atom(), t_arity()]; +arg_types(erlang, is_constant, 1) -> + [t_any()]; +arg_types(erlang, is_float, 1) -> + [t_any()]; +arg_types(erlang, is_function, 1) -> + [t_any()]; +arg_types(erlang, is_function, 2) -> + [t_any(), t_arity()]; +arg_types(erlang, is_integer, 1) -> + [t_any()]; +arg_types(erlang, is_list, 1) -> + [t_any()]; +arg_types(erlang, is_number, 1) -> + [t_any()]; +arg_types(erlang, is_pid, 1) -> + [t_any()]; +arg_types(erlang, is_port, 1) -> + [t_any()]; +arg_types(erlang, is_process_alive, 1) -> + [t_pid()]; +arg_types(erlang, is_record, 2) -> + [t_any(), t_atom()]; +arg_types(erlang, is_record, 3) -> + [t_any(), t_atom(), t_pos_fixnum()]; +arg_types(erlang, is_reference, 1) -> + [t_any()]; +arg_types(erlang, is_tuple, 1) -> + [t_any()]; +arg_types(erlang, length, 1) -> + [t_list()]; +arg_types(erlang, link, 1) -> + [t_sup(t_pid(), t_port())]; +arg_types(erlang, list_to_atom, 1) -> + [t_string()]; +arg_types(erlang, list_to_binary, 1) -> + [t_iolist()]; +arg_types(erlang, list_to_bitstr, 1) -> % XXX: TAKE OUT + arg_types(erlang, list_to_bitstring, 1); +arg_types(erlang, list_to_bitstring, 1) -> + [t_iolist()]; +arg_types(erlang, list_to_existing_atom, 1) -> + [t_string()]; +arg_types(erlang, list_to_float, 1) -> + [t_list(t_byte())]; +arg_types(erlang, list_to_integer, 1) -> + [t_list(t_byte())]; +arg_types(erlang, list_to_pid, 1) -> + [t_string()]; +arg_types(erlang, list_to_tuple, 1) -> + [t_list()]; +arg_types(erlang, loaded, 0) -> + []; +arg_types(erlang, load_module, 2) -> + [t_atom(), t_binary()]; +arg_types(erlang, localtime, 0) -> + []; +arg_types(erlang, localtime_to_universaltime, 1) -> + [t_tuple([t_date(), t_time()])]; +arg_types(erlang, localtime_to_universaltime, 2) -> + arg_types(erlang, localtime_to_universaltime, 1) ++ + [t_sup(t_boolean(), t_atom('undefined'))]; +arg_types(erlang, make_fun, 3) -> + [t_atom(), t_atom(), t_arity()]; +arg_types(erlang, make_ref, 0) -> + []; +arg_types(erlang, make_tuple, 2) -> + [t_non_neg_fixnum(), t_any()]; % the value 0 is OK as first argument +arg_types(erlang, make_tuple, 3) -> + [t_non_neg_fixnum(), t_any(), t_list(t_tuple([t_pos_integer(), t_any()]))]; +arg_types(erlang, match_spec_test, 3) -> + [t_sup(t_list(), t_tuple()), + t_any(), + t_sup(t_atom('table'), t_atom('trace'))]; +arg_types(erlang, md5, 1) -> + [t_sup(t_iolist(), t_binary())]; +arg_types(erlang, md5_final, 1) -> + [t_binary()]; +arg_types(erlang, md5_init, 0) -> + []; +arg_types(erlang, md5_update, 2) -> + [t_binary(), t_sup(t_iolist(), t_binary())]; +arg_types(erlang, memory, 0) -> + []; +arg_types(erlang, memory, 1) -> + Arg = t_atoms(['total', 'processes', 'processes_used', 'system', + 'atom', 'atom_used', 'binary', 'code', 'ets', + 'maximum']), + [t_sup(Arg, t_list(Arg))]; +arg_types(erlang, module_loaded, 1) -> + [t_atom()]; +arg_types(erlang, monitor, 2) -> + [t_atom(), t_sup([t_pid(), t_atom(), t_tuple([t_atom(), t_node()])])]; +arg_types(erlang, monitor_node, 2) -> + [t_node(), t_boolean()]; +arg_types(erlang, monitor_node, 3) -> + [t_node(), t_boolean(), t_list(t_atom('allow_passive_connect'))]; +arg_types(erlang, node, 0) -> + []; +arg_types(erlang, node, 1) -> + [t_identifier()]; +arg_types(erlang, nodes, 0) -> + []; +arg_types(erlang, nodes, 1) -> + NodesArg = t_atoms(['visible', 'hidden', 'connected', 'this', 'known']), + [t_sup(NodesArg, t_list(NodesArg))]; +arg_types(erlang, now, 0) -> + []; +arg_types(erlang, open_port, 2) -> + [t_sup(t_atom(), t_sup([t_tuple([t_atom('spawn'), t_string()]), + t_tuple([t_atom('spawn_driver'), t_string()]), + t_tuple([t_atom('spawn_executable'), t_string()]), + t_tuple([t_atom('fd'), t_integer(), t_integer()])])), + t_list(t_sup(t_sup([t_atom('stream'), + t_atom('exit_status'), + t_atom('use_stdio'), + t_atom('nouse_stdio'), + t_atom('stderr_to_stdout'), + t_atom('in'), + t_atom('out'), + t_atom('binary'), + t_atom('eof'), + t_atom('hide')]), + t_sup([t_tuple([t_atom('packet'), t_integer()]), + t_tuple([t_atom('line'), t_integer()]), + t_tuple([t_atom('cd'), t_string()]), + t_tuple([t_atom('env'), t_list(t_tuple(2))]), % XXX: More + t_tuple([t_atom('args'), t_list(t_string())]), + t_tuple([t_atom('arg0'), t_string()])])))]; +arg_types(erlang, phash, 2) -> + [t_any(), t_pos_integer()]; +arg_types(erlang, phash2, 1) -> + [t_any()]; +arg_types(erlang, phash2, 2) -> + [t_any(), t_pos_integer()]; +arg_types(erlang, pid_to_list, 1) -> + [t_pid()]; +arg_types(erlang, port_call, 3) -> + [t_sup(t_port(), t_atom()), t_integer(), t_any()]; +arg_types(erlang, port_close, 1) -> + [t_sup(t_port(), t_atom())]; +arg_types(erlang, port_command, 2) -> + [t_sup(t_port(), t_atom()), t_sup(t_iolist(), t_binary())]; +arg_types(erlang, port_command, 3) -> + [t_sup(t_port(), t_atom()), + t_sup(t_iolist(), t_binary()), + t_list(t_atoms(['force', 'nosuspend']))]; +arg_types(erlang, port_connect, 2) -> + [t_sup(t_port(), t_atom()), t_pid()]; +arg_types(erlang, port_control, 3) -> + [t_sup(t_port(), t_atom()), t_integer(), t_sup(t_iolist(), t_binary())]; +arg_types(erlang, port_get_data, 1) -> + [t_sup(t_port(), t_atom())]; +arg_types(erlang, port_info, 1) -> + [t_sup(t_port(), t_atom())]; +arg_types(erlang, port_info, 2) -> + [t_sup(t_port(), t_atom()), + t_atoms(['registered_name', 'id', 'connected', + 'links', 'name', 'input', 'output'])]; +arg_types(erlang, port_to_list, 1) -> + [t_port()]; +arg_types(erlang, ports, 0) -> + []; +arg_types(erlang, port_set_data, 2) -> + [t_sup(t_port(), t_atom()), t_any()]; +arg_types(erlang, pre_loaded, 0) -> + []; +arg_types(erlang, process_display, 2) -> + [t_pid(), t_atom('backtrace')]; +arg_types(erlang, process_flag, 2) -> + [t_sup([t_atom('trap_exit'), t_atom('error_handler'), + t_atom('min_heap_size'), t_atom('priority'), t_atom('save_calls'), + t_atom('monitor_nodes'), % undocumented + t_tuple([t_atom('monitor_nodes'), t_list()])]), % undocumented + t_sup([t_boolean(), t_atom(), t_non_neg_integer()])]; +arg_types(erlang, process_flag, 3) -> + [t_pid(), t_atom('save_calls'), t_non_neg_integer()]; +arg_types(erlang, process_info, 1) -> + [t_pid()]; +arg_types(erlang, process_info, 2) -> + [t_pid(), t_pinfo()]; +arg_types(erlang, processes, 0) -> + []; +arg_types(erlang, purge_module, 1) -> + [t_atom()]; +arg_types(erlang, put, 2) -> + [t_any(), t_any()]; +arg_types(erlang, raise, 3) -> + [t_raise_errorclass(), t_any(), type(erlang, get_stacktrace, 0, [])]; +arg_types(erlang, read_timer, 1) -> + [t_reference()]; +arg_types(erlang, ref_to_list, 1) -> + [t_reference()]; +arg_types(erlang, register, 2) -> + [t_atom(), t_sup(t_port(), t_pid())]; +arg_types(erlang, registered, 0) -> + []; +arg_types(erlang, resume_process, 1) -> + [t_pid()]; % intended for debugging only +arg_types(erlang, round, 1) -> + [t_number()]; +arg_types(erlang, self, 0) -> + []; +arg_types(erlang, send, 2) -> + arg_types(erlang, '!', 2); % alias +arg_types(erlang, send, 3) -> + arg_types(erlang, send, 2) ++ [t_list(t_sendoptions())]; +arg_types(erlang, send_after, 3) -> + [t_non_neg_integer(), t_sup(t_pid(), t_atom()), t_any()]; +arg_types(erlang, seq_trace, 2) -> + [t_atom(), t_sup([t_boolean(), t_tuple([t_fixnum(), t_fixnum()]), t_nil()])]; +arg_types(erlang, seq_trace_info, 1) -> + [t_seq_trace_info()]; +arg_types(erlang, seq_trace_print, 1) -> + [t_any()]; +arg_types(erlang, seq_trace_print, 2) -> + [t_sup(t_atom(), t_fixnum()), t_any()]; +arg_types(erlang, set_cookie, 2) -> + [t_node(), t_atom()]; +arg_types(erlang, setelement, 3) -> + [t_pos_integer(), t_tuple(), t_any()]; +arg_types(erlang, setnode, 2) -> + [t_atom(), t_integer()]; +arg_types(erlang, setnode, 3) -> + [t_atom(), t_port(), t_tuple(4)]; +arg_types(erlang, size, 1) -> + [t_sup(t_tuple(), t_binary())]; +arg_types(erlang, spawn, 1) -> %% TODO: Tuple? + [t_fun()]; +arg_types(erlang, spawn, 2) -> %% TODO: Tuple? + [t_node(), t_fun()]; +arg_types(erlang, spawn, 3) -> %% TODO: Tuple? + [t_atom(), t_atom(), t_list()]; +arg_types(erlang, spawn, 4) -> %% TODO: Tuple? + [t_node(), t_atom(), t_atom(), t_list()]; +arg_types(erlang, spawn_link, 1) -> + arg_types(erlang, spawn, 1); % same +arg_types(erlang, spawn_link, 2) -> + arg_types(erlang, spawn, 2); % same +arg_types(erlang, spawn_link, 3) -> + arg_types(erlang, spawn, 3); % same +arg_types(erlang, spawn_link, 4) -> + arg_types(erlang, spawn, 4); % same +arg_types(erlang, spawn_opt, 1) -> + [t_tuple([t_atom(), t_atom(), t_list(), t_list(t_spawn_options())])]; +arg_types(erlang, spawn_opt, 2) -> + [t_fun(), t_list(t_spawn_options())]; +arg_types(erlang, spawn_opt, 3) -> + [t_atom(), t_fun(), t_list(t_spawn_options())]; +arg_types(erlang, spawn_opt, 4) -> + [t_node(), t_atom(), t_list(), t_list(t_spawn_options())]; +arg_types(erlang, split_binary, 2) -> + [t_binary(), t_non_neg_integer()]; +arg_types(erlang, start_timer, 3) -> + [t_non_neg_integer(), t_sup(t_pid(), t_atom()), t_any()]; +arg_types(erlang, statistics, 1) -> + [t_sup([t_atom('context_switches'), + t_atom('exact_reductions'), + t_atom('garbage_collection'), + t_atom('io'), + t_atom('reductions'), + t_atom('run_queue'), + t_atom('runtime'), + t_atom('wall_clock')])]; +arg_types(erlang, suspend_process, 1) -> + [t_pid()]; +arg_types(erlang, suspend_process, 2) -> + [t_pid(), t_list(t_sup([t_atom('unless_suspending'), + t_atom('asynchronous')]))]; +arg_types(erlang, system_flag, 2) -> + [t_sup([t_atom('backtrace_depth'), + t_atom('cpu_topology'), + t_atom('debug_flags'), % undocumented + t_atom('display_items'), % undocumented + t_atom('fullsweep_after'), + t_atom('min_heap_size'), + t_atom('multi_scheduling'), + t_atom('schedulers_online'), + t_atom('scheduler_bind_type'), + %% Undocumented; used to implement (the documented) seq_trace module. + t_atom('sequential_tracer'), + t_atom('trace_control_word'), + %% 'internal_cpu_topology' is an undocumented internal feature. + t_atom('internal_cpu_topology'), + t_integer()]), + t_sup([t_integer(), + %% 'cpu_topology' + t_system_cpu_topology(), + %% 'scheduler_bind_type' + t_scheduler_bind_type_args(), + %% Undocumented: the following is for 'debug_flags' that + %% takes any erlang term as flags and currently ignores it. + %% t_any(), % commented out since it destroys the type signature + %% + %% Again undocumented; the following are for 'sequential_tracer' + t_sequential_tracer(), + %% The following two are for 'multi_scheduling' + t_atom('block'), + t_atom('unblock'), + %% The following is for 'internal_cpu_topology' + t_internal_cpu_topology()])]; +arg_types(erlang, system_info, 1) -> + [t_sup([t_atom(), % documented + t_tuple([t_atom(), t_any()]), % documented + t_tuple([t_atom(), t_atom(), t_any()])])]; +arg_types(erlang, system_monitor, 0) -> + []; +arg_types(erlang, system_monitor, 1) -> + [t_system_monitor_settings()]; +arg_types(erlang, system_monitor, 2) -> + [t_pid(), t_system_monitor_options()]; +arg_types(erlang, system_profile, 0) -> + []; +arg_types(erlang, system_profile, 2) -> + [t_sup([t_pid(), t_port(), t_atom('undefined')]), + t_system_profile_options()]; +arg_types(erlang, term_to_binary, 1) -> + [t_any()]; +arg_types(erlang, term_to_binary, 2) -> + [t_any(), t_list(t_sup([t_atom('compressed'), + t_tuple([t_atom('compressed'), t_from_range(0, 9)]), + t_tuple([t_atom('minor_version'), t_integers([0, 1])])]))]; +arg_types(erlang, throw, 1) -> + [t_any()]; +arg_types(erlang, time, 0) -> + []; +arg_types(erlang, tl, 1) -> + [t_cons()]; +arg_types(erlang, trace, 3) -> + [t_sup(t_pid(), t_sup([t_atom('existing'), t_atom('new'), t_atom('all')])), + t_boolean(), + t_list(t_sup(t_atom(), t_tuple(2)))]; +arg_types(erlang, trace_delivered, 1) -> + [t_sup(t_pid(), t_atom('all'))]; +arg_types(erlang, trace_info, 2) -> + [t_sup([%% the following two get info about a PID + t_pid(), t_atom('new'), + %% while the following two get info about a func + t_mfa(), t_atom('on_load')]), + t_sup([%% the following are items about a PID + t_atom('flags'), t_atom('tracer'), + %% while the following are items about a func + t_atom('traced'), t_atom('match_spec'), t_atom('meta'), + t_atom('meta_match_spec'), t_atom('call_count'), t_atom('all')])]; +arg_types(erlang, trace_pattern, 2) -> + [t_sup(t_tuple([t_atom(), t_atom(), t_sup(t_arity(), t_atom('_'))]), + t_atom('on_load')), + t_sup([t_boolean(), t_list(), t_atom('restart'), t_atom('pause')])]; +arg_types(erlang, trace_pattern, 3) -> + arg_types(erlang, trace_pattern, 2) ++ + [t_list(t_sup([t_atom('global'), t_atom('local'), + t_atom('meta'), t_tuple([t_atom('meta'), t_pid()]), + t_atom('call_count')]))]; +arg_types(erlang, trunc, 1) -> + [t_number()]; +arg_types(erlang, tuple_size, 1) -> + [t_tuple()]; +arg_types(erlang, tuple_to_list, 1) -> + [t_tuple()]; +arg_types(erlang, universaltime, 0) -> + []; +arg_types(erlang, universaltime_to_localtime, 1) -> + [t_tuple([t_date(), t_time()])]; +arg_types(erlang, unlink, 1) -> + [t_sup(t_pid(), t_port())]; +arg_types(erlang, unregister, 1) -> + [t_atom()]; +arg_types(erlang, whereis, 1) -> + [t_atom()]; +arg_types(erlang, yield, 0) -> + []; +%%------- erl_prim_loader ----------------------------------------------------- +arg_types(erl_prim_loader, get_file, 1) -> + [t_sup(t_atom(), t_string())]; +arg_types(erl_prim_loader, get_path, 0) -> + []; +arg_types(erl_prim_loader, set_path, 1) -> + [t_list(t_string())]; +%%------- error_logger -------------------------------------------------------- +arg_types(error_logger, warning_map, 0) -> + []; +%%------- erts_debug ---------------------------------------------------------- +arg_types(erts_debug, breakpoint, 2) -> + [t_tuple([t_atom(), t_atom(), t_sup(t_integer(), t_atom('_'))]), t_boolean()]; +arg_types(erts_debug, disassemble, 1) -> + [t_sup(t_mfa(), t_integer())]; +arg_types(erts_debug, flat_size, 1) -> + [t_any()]; +arg_types(erts_debug, same, 2) -> + [t_any(), t_any()]; +%%------- ets ----------------------------------------------------------------- +arg_types(ets, all, 0) -> + []; +arg_types(ets, delete, 1) -> + [t_tab()]; +arg_types(ets, delete, 2) -> + [t_tab(), t_any()]; +arg_types(ets, delete_all_objects, 1) -> + [t_tab()]; +arg_types(ets, delete_object, 2) -> + [t_tab(), t_tuple()]; +arg_types(ets, first, 1) -> + [t_tab()]; +arg_types(ets, give_away, 3) -> + [t_tab(), t_pid(), t_any()]; +arg_types(ets, info, 1) -> + [t_tab()]; +arg_types(ets, info, 2) -> + [t_tab(), t_ets_info_items()]; +arg_types(ets, insert, 2) -> + [t_tab(), t_sup(t_tuple(), t_list(t_tuple()))]; +arg_types(ets, insert_new, 2) -> + [t_tab(), t_sup(t_tuple(), t_list(t_tuple()))]; +arg_types(ets, is_compiled_ms, 1) -> + [t_any()]; +arg_types(ets, last, 1) -> + arg_types(ets, first, 1); +arg_types(ets, lookup, 2) -> + [t_tab(), t_any()]; +arg_types(ets, lookup_element, 3) -> + [t_tab(), t_any(), t_pos_fixnum()]; +arg_types(ets, match, 1) -> + [t_any()]; +arg_types(ets, match, 2) -> + [t_tab(), t_match_pattern()]; +arg_types(ets, match, 3) -> + [t_tab(), t_match_pattern(), t_pos_fixnum()]; +arg_types(ets, match_object, 1) -> + arg_types(ets, match, 1); +arg_types(ets, match_object, 2) -> + arg_types(ets, match, 2); +arg_types(ets, match_object, 3) -> + arg_types(ets, match, 3); +arg_types(ets, match_spec_compile, 1) -> + [t_matchspecs()]; +arg_types(ets, match_spec_run_r, 3) -> + [t_matchspecs(), t_any(), t_list()]; +arg_types(ets, member, 2) -> + [t_tab(), t_any()]; +arg_types(ets, new, 2) -> + [t_atom(), t_ets_new_options()]; +arg_types(ets, next, 2) -> + [t_tab(), t_any()]; +arg_types(ets, prev, 2) -> + [t_tab(), t_any()]; +arg_types(ets, rename, 2) -> + [t_atom(), t_atom()]; +arg_types(ets, safe_fixtable, 2) -> + [t_tab(), t_boolean()]; +arg_types(ets, select, 1) -> + [t_any()]; +arg_types(ets, select, 2) -> + [t_tab(), t_matchspecs()]; +arg_types(ets, select, 3) -> + [t_tab(), t_matchspecs(), t_pos_fixnum()]; +arg_types(ets, select_count, 2) -> + [t_tab(), t_matchspecs()]; +arg_types(ets, select_delete, 2) -> + [t_tab(), t_matchspecs()]; +arg_types(ets, select_reverse, 1) -> + arg_types(ets, select, 1); +arg_types(ets, select_reverse, 2) -> + arg_types(ets, select, 2); +arg_types(ets, select_reverse, 3) -> + arg_types(ets, select, 3); +arg_types(ets, slot, 2) -> + [t_tab(), t_non_neg_fixnum()]; % 2nd arg can be 0 +arg_types(ets, setopts, 2) -> + Opt = t_sup(t_tuple([t_atom('heir'), t_pid(), t_any()]), + t_tuple([t_atom('heir'), t_atom('none')])), + [t_tab(), t_sup(Opt, t_list(Opt))]; +arg_types(ets, update_counter, 3) -> + [t_tab(), t_any(), t_sup(t_integer(), + t_sup(t_tuple([t_integer(), t_integer()]), + t_tuple([t_integer(), t_integer(), + t_integer(), t_integer()])))]; +arg_types(ets, update_element, 3) -> + PosValue = t_tuple([t_integer(), t_any()]), + [t_tab(), t_any(), t_sup(PosValue, t_list(PosValue))]; +%%------- file ---------------------------------------------------------------- +arg_types(file, close, 1) -> + [t_file_io_device()]; +arg_types(file, delete, 1) -> + [t_file_name()]; +arg_types(file, get_cwd, 0) -> + []; +arg_types(file, make_dir, 1) -> + [t_file_name()]; +arg_types(file, open, 2) -> + [t_file_name(), t_list(t_file_open_option())]; +arg_types(file, read_file, 1) -> + [t_file_name()]; +arg_types(file, set_cwd, 1) -> + [t_file_name()]; +arg_types(file, write, 2) -> + [t_file_io_device(), t_iodata()]; +arg_types(file, write_file, 2) -> + [t_file_name(), t_sup(t_binary(), t_list())]; +%%------- gen_tcp ------------------------------------------------------------- +arg_types(gen_tcp, accept, 1) -> + [t_socket()]; +arg_types(gen_tcp, accept, 2) -> + [t_socket(), t_timeout()]; +arg_types(gen_tcp, connect, 3) -> + [t_gen_tcp_address(), t_gen_tcp_port(), t_list(t_gen_tcp_connect_option())]; +arg_types(gen_tcp, connect, 4) -> + arg_types(gen_tcp, connect, 3) ++ [t_timeout()]; +arg_types(gen_tcp, listen, 2) -> + [t_gen_tcp_port(), t_list(t_gen_tcp_listen_option())]; +arg_types(gen_tcp, recv, 2) -> + [t_socket(), t_non_neg_integer()]; +arg_types(gen_tcp, recv, 3) -> + arg_types(gen_tcp, recv, 2) ++ [t_timeout()]; +arg_types(gen_tcp, send, 2) -> + [t_socket(), t_packet()]; +arg_types(gen_tcp, shutdown, 2) -> + [t_socket(), t_sup([t_atom('read'), t_atom('write'), t_atom('read_write')])]; +%%------- gen_udp ------------------------------------------------------------- +arg_types(gen_udp, open, 1) -> + [t_gen_tcp_port()]; +arg_types(gen_udp, open, 2) -> + [t_gen_tcp_port(), t_list(t_gen_udp_connect_option())]; +arg_types(gen_udp, recv, 2) -> + arg_types(gen_tcp, recv, 2); +arg_types(gen_udp, recv, 3) -> + arg_types(gen_tcp, recv, 3); +arg_types(gen_udp, send, 4) -> + [t_socket(), t_gen_tcp_address(), t_gen_tcp_port(), t_packet()]; +%%------- hipe_bifs ----------------------------------------------------------- +arg_types(hipe_bifs, add_ref, 2) -> + [t_mfa(), t_tuple([t_mfa(), + t_integer(), + t_sup(t_atom('call'), t_atom('load_mfa')), + t_trampoline(), + t_sup(t_atom('remote'), t_atom('local'))])]; +arg_types(hipe_bifs, alloc_data, 2) -> + [t_integer(), t_integer()]; +arg_types(hipe_bifs, array, 2) -> + [t_non_neg_fixnum(), t_immediate()]; +arg_types(hipe_bifs, array_length, 1) -> + [t_immarray()]; +arg_types(hipe_bifs, array_sub, 2) -> + [t_immarray(), t_non_neg_fixnum()]; +arg_types(hipe_bifs, array_update, 3) -> + [t_immarray(), t_non_neg_fixnum(), t_immediate()]; +arg_types(hipe_bifs, atom_to_word, 1) -> + [t_atom()]; +arg_types(hipe_bifs, bif_address, 3) -> + [t_atom(), t_atom(), t_arity()]; +arg_types(hipe_bifs, bitarray, 2) -> + [t_non_neg_fixnum(), t_boolean()]; +arg_types(hipe_bifs, bitarray_sub, 2) -> + [t_bitarray(), t_non_neg_fixnum()]; +arg_types(hipe_bifs, bitarray_update, 3) -> + [t_bytearray(), t_non_neg_fixnum(), t_boolean()]; +arg_types(hipe_bifs, bytearray, 2) -> + [t_non_neg_fixnum(), t_byte()]; +arg_types(hipe_bifs, bytearray_sub, 2) -> + [t_bytearray(), t_non_neg_fixnum()]; +arg_types(hipe_bifs, bytearray_update, 3) -> + [t_bytearray(), t_non_neg_fixnum(), t_byte()]; +arg_types(hipe_bifs, call_count_clear, 1) -> + [t_mfa()]; +arg_types(hipe_bifs, call_count_get, 1) -> + [t_mfa()]; +arg_types(hipe_bifs, call_count_off, 1) -> + [t_mfa()]; +arg_types(hipe_bifs, call_count_on, 1) -> + [t_mfa()]; +arg_types(hipe_bifs, check_crc, 1) -> + [t_integer()]; +arg_types(hipe_bifs, enter_code, 2) -> + [t_binary(), t_sup(t_nil(), t_tuple())]; +arg_types(hipe_bifs, enter_sdesc, 1) -> + [t_tuple([t_integer(), t_integer(), t_integer(), t_integer(), t_integer()])]; +arg_types(hipe_bifs, find_na_or_make_stub, 2) -> + [t_mfa(), t_boolean()]; +arg_types(hipe_bifs, fun_to_address, 1) -> + [t_mfa()]; +%% arg_types(hipe_bifs, get_emu_address, 1) -> +%% [t_mfa()]; +arg_types(hipe_bifs, get_rts_param, 1) -> + [t_fixnum()]; +arg_types(hipe_bifs, invalidate_funinfo_native_addresses, 1) -> + [t_list(t_mfa())]; +arg_types(hipe_bifs, make_fe, 3) -> + [t_integer(), t_atom(), t_tuple([t_integer(), t_integer(), t_integer()])]; +%% arg_types(hipe_bifs, make_native_stub, 2) -> +%% [t_integer(), t_arity()]; +arg_types(hipe_bifs, mark_referred_from, 1) -> + [t_mfa()]; +arg_types(hipe_bifs, merge_term, 1) -> + [t_any()]; +arg_types(hipe_bifs, patch_call, 3) -> + [t_integer(), t_integer(), t_trampoline()]; +arg_types(hipe_bifs, patch_insn, 3) -> + [t_integer(), t_integer(), t_insn_type()]; +arg_types(hipe_bifs, primop_address, 1) -> + [t_atom()]; +arg_types(hipe_bifs, redirect_referred_from, 1) -> + [t_mfa()]; +arg_types(hipe_bifs, ref, 1) -> + [t_immediate()]; +arg_types(hipe_bifs, ref_get, 1) -> + [t_hiperef()]; +arg_types(hipe_bifs, ref_set, 2) -> + [t_hiperef(), t_immediate()]; +arg_types(hipe_bifs, remove_refs_from, 1) -> + [t_mfa()]; +arg_types(hipe_bifs, set_funinfo_native_address, 3) -> + arg_types(hipe_bifs, set_native_address, 3); +arg_types(hipe_bifs, set_native_address, 3) -> + [t_mfa(), t_integer(), t_boolean()]; +arg_types(hipe_bifs, system_crc, 1) -> + [t_integer()]; +arg_types(hipe_bifs, term_to_word, 1) -> + [t_any()]; +arg_types(hipe_bifs, update_code_size, 3) -> + [t_atom(), t_sup(t_nil(), t_binary()), t_integer()]; +arg_types(hipe_bifs, write_u8, 2) -> + [t_integer(), t_byte()]; +arg_types(hipe_bifs, write_u32, 2) -> + [t_integer(), t_integer()]; +arg_types(hipe_bifs, write_u64, 2) -> + [t_integer(), t_integer()]; +%%------- io ------------------------------------------------------------------ +arg_types(io, format, 1) -> + [t_io_format_string()]; +arg_types(io, format, 2) -> + [t_io_format_string(), t_list()]; +arg_types(io, format, 3) -> + [t_io_device(), t_io_format_string(), t_list()]; +arg_types(io, fwrite, 1) -> + arg_types(io, format, 1); +arg_types(io, fwrite, 2) -> + arg_types(io, format, 2); +arg_types(io, fwrite, 3) -> + arg_types(io, format, 3); +arg_types(io, put_chars, 1) -> + [t_iodata()]; +arg_types(io, put_chars, 2) -> + [t_io_device(), t_iodata()]; +%%------- io_lib -------------------------------------------------------------- +arg_types(io_lib, format, 2) -> + arg_types(io, format, 2); +arg_types(io_lib, fwrite, 2) -> + arg_types(io_lib, format, 2); +%%------- lists --------------------------------------------------------------- +arg_types(lists, all, 2) -> + [t_fun([t_any()], t_boolean()), t_list()]; +arg_types(lists, any, 2) -> + [t_fun([t_any()], t_boolean()), t_list()]; +arg_types(lists, append, 2) -> + arg_types(erlang, '++', 2); % alias +arg_types(lists, delete, 2) -> + [t_any(), t_maybe_improper_list()]; +arg_types(lists, dropwhile, 2) -> + [t_fun([t_any()], t_boolean()), t_maybe_improper_list()]; +arg_types(lists, filter, 2) -> + [t_fun([t_any()], t_boolean()), t_list()]; +arg_types(lists, flatten, 1) -> + [t_list()]; +arg_types(lists, flatmap, 2) -> + [t_fun([t_any()], t_list()), t_list()]; +arg_types(lists, foreach, 2) -> + [t_fun([t_any()], t_any()), t_list()]; +arg_types(lists, foldl, 3) -> + [t_fun([t_any(), t_any()], t_any()), t_any(), t_list()]; +arg_types(lists, foldr, 3) -> + arg_types(lists, foldl, 3); % same +arg_types(lists, keydelete, 3) -> + [t_any(), t_pos_fixnum(), t_maybe_improper_list()]; % t_list(t_tuple())]; +arg_types(lists, keyfind, 3) -> + arg_types(lists, keysearch, 3); +arg_types(lists, keymap, 3) -> + [t_fun([t_any()], t_any()), t_pos_fixnum(), t_list(t_tuple())]; +arg_types(lists, keymember, 3) -> + [t_any(), t_pos_fixnum(), t_maybe_improper_list()]; % t_list(t_tuple()); +arg_types(lists, keymerge, 3) -> + [t_pos_fixnum(), t_list(t_tuple()), t_list(t_tuple())]; +arg_types(lists, keyreplace, 4) -> + [t_any(), t_pos_fixnum(), t_maybe_improper_list(), t_tuple()]; % t_list(t_tuple())]; +arg_types(lists, keysearch, 3) -> + [t_any(), t_pos_fixnum(), t_maybe_improper_list()]; % t_list(t_tuple())]; +arg_types(lists, keysort, 2) -> + [t_pos_fixnum(), t_list(t_tuple())]; +arg_types(lists, last, 1) -> + [t_nonempty_list()]; +arg_types(lists, map, 2) -> + [t_fun([t_any()], t_any()), t_list()]; +arg_types(lists, mapfoldl, 3) -> + [t_fun([t_any(), t_any()], t_tuple([t_any(), t_any()])), t_any(), t_list()]; +arg_types(lists, mapfoldr, 3) -> + arg_types(lists, mapfoldl, 3); % same +arg_types(lists, max, 1) -> + [t_nonempty_list()]; +arg_types(lists, member, 2) -> + [t_any(), t_list()]; +%% arg_types(lists, merge, 1) -> +%% [t_list(t_list())]; +arg_types(lists, merge, 2) -> + [t_list(), t_list()]; +%% arg_types(lists, merge, 3) -> +%% [t_fun([t_any(), t_any()], t_boolean()), t_list(), t_list()]; +%% arg_types(lists, merge3, 3) -> +%% [t_list(), t_list(), t_list()]; +arg_types(lists, min, 1) -> + [t_nonempty_list()]; +arg_types(lists, nth, 2) -> + [t_pos_fixnum(), t_nonempty_list()]; +arg_types(lists, nthtail, 2) -> + [t_non_neg_fixnum(), t_nonempty_list()]; +arg_types(lists, partition, 2) -> + arg_types(lists, filter, 2); % same +arg_types(lists, reverse, 1) -> + [t_list()]; +arg_types(lists, reverse, 2) -> + [t_list(), t_any()]; +arg_types(lists, seq, 2) -> + [t_integer(), t_integer()]; +arg_types(lists, seq, 3) -> + [t_integer(), t_integer(), t_integer()]; +arg_types(lists, sort, 1) -> + [t_list()]; +arg_types(lists, sort, 2) -> + [t_fun([t_any(), t_any()], t_boolean()), t_list()]; +arg_types(lists, split, 2) -> + [t_non_neg_fixnum(), t_maybe_improper_list()]; % do not lie in 2nd arg +arg_types(lists, splitwith, 2) -> + [t_fun([t_any()], t_boolean()), t_maybe_improper_list()]; +arg_types(lists, subtract, 2) -> + arg_types(erlang, '--', 2); % alias +arg_types(lists, takewhile, 2) -> + [t_fun([t_any()], t_boolean()), t_maybe_improper_list()]; +arg_types(lists, usort, 1) -> + arg_types(lists, sort, 1); % same +arg_types(lists, usort, 2) -> + arg_types(lists, sort, 2); +arg_types(lists, unzip, 1) -> + [t_list(t_tuple(2))]; +arg_types(lists, unzip3, 1) -> + [t_list(t_tuple(3))]; +arg_types(lists, zip, 2) -> + [t_list(), t_list()]; +arg_types(lists, zip3, 3) -> + [t_list(), t_list(), t_list()]; +arg_types(lists, zipwith, 3) -> + [t_fun([t_any(), t_any()], t_any()), t_list(), t_list()]; +arg_types(lists, zipwith3, 4) -> + [t_fun([t_any(), t_any(), t_any()], t_any()), t_list(), t_list(), t_list()]; +%%------- math ---------------------------------------------------------------- +arg_types(math, acos, 1) -> + [t_number()]; +arg_types(math, acosh, 1) -> + [t_number()]; +arg_types(math, asin, 1) -> + [t_number()]; +arg_types(math, asinh, 1) -> + [t_number()]; +arg_types(math, atan, 1) -> + [t_number()]; +arg_types(math, atan2, 2) -> + [t_number(), t_number()]; +arg_types(math, atanh, 1) -> + [t_number()]; +arg_types(math, cos, 1) -> + [t_number()]; +arg_types(math, cosh, 1) -> + [t_number()]; +arg_types(math, erf, 1) -> + [t_number()]; +arg_types(math, erfc, 1) -> + [t_number()]; +arg_types(math, exp, 1) -> + [t_number()]; +arg_types(math, log, 1) -> + [t_number()]; +arg_types(math, log10, 1) -> + [t_number()]; +arg_types(math, pi, 0) -> + []; +arg_types(math, pow, 2) -> + [t_number(), t_number()]; +arg_types(math, sin, 1) -> + [t_number()]; +arg_types(math, sinh, 1) -> + [t_number()]; +arg_types(math, sqrt, 1) -> + [t_number()]; +arg_types(math, tan, 1) -> + [t_number()]; +arg_types(math, tanh, 1) -> + [t_number()]; +%%-- net_kernel --------------------------------------------------------------- +arg_types(net_kernel, dflag_unicode_io, 1) -> + [t_pid()]; +%%------- ordsets ------------------------------------------------------------- +arg_types(ordsets, filter, 2) -> + arg_types(lists, filter, 2); +arg_types(ordsets, fold, 3) -> + arg_types(lists, foldl, 3); +%%------- os ------------------------------------------------------------------ +arg_types(os, getenv, 0) -> + []; +arg_types(os, getenv, 1) -> + [t_string()]; +arg_types(os, getpid, 0) -> + []; +arg_types(os, putenv, 2) -> + [t_string(), t_string()]; +%%-- re ----------------------------------------------------------------------- +arg_types(re, compile, 1) -> + [t_iodata()]; +arg_types(re, compile, 2) -> + [t_iodata(), t_list(t_re_compile_option())]; +arg_types(re, run, 2) -> + [t_iodata(), t_re_RE()]; +arg_types(re, run, 3) -> + [t_iodata(), t_re_RE(), t_list(t_re_run_option())]; +%%------- string -------------------------------------------------------------- +arg_types(string, chars, 2) -> + [t_char(), t_non_neg_integer()]; +arg_types(string, chars, 3) -> + [t_char(), t_non_neg_integer(), t_any()]; +arg_types(string, concat, 2) -> + [t_string(), t_string()]; +arg_types(string, equal, 2) -> + [t_string(), t_string()]; +arg_types(string, to_float, 1) -> + [t_string()]; +arg_types(string, to_integer, 1) -> + [t_string()]; +%%------- unicode ------------------------------------------------------------- +arg_types(unicode, characters_to_binary, 2) -> + [t_ML(), t_encoding()]; +arg_types(unicode, characters_to_list, 2) -> + [t_ML(), t_encoding()]; +arg_types(unicode, bin_is_7bit, 1) -> + [t_binary()]; +%%----------------------------------------------------------------------------- +arg_types(M, F, A) when is_atom(M), is_atom(F), + is_integer(A), 0 =< A, A =< 255 -> + unknown. % safe approximation for all functions. + + +-spec is_known(atom(), atom(), arity()) -> boolean(). + +is_known(M, F, A) -> + arg_types(M, F, A) =/= unknown. + + +-spec structure_inspecting_args(atom(), atom(), arity()) -> [1..255]. + +structure_inspecting_args(erlang, element, 2) -> [2]; +structure_inspecting_args(erlang, is_atom, 1) -> [1]; +structure_inspecting_args(erlang, is_boolean, 1) -> [1]; +structure_inspecting_args(erlang, is_binary, 1) -> [1]; +structure_inspecting_args(erlang, is_bitstring, 1) -> [1]; +structure_inspecting_args(erlang, is_float, 1) -> [1]; +structure_inspecting_args(erlang, is_function, 1) -> [1]; +structure_inspecting_args(erlang, is_integer, 1) -> [1]; +structure_inspecting_args(erlang, is_list, 1) -> [1]; +structure_inspecting_args(erlang, is_number, 1) -> [1]; +structure_inspecting_args(erlang, is_pid, 1) -> [1]; +structure_inspecting_args(erlang, is_port, 1) -> [1]; +structure_inspecting_args(erlang, is_reference, 1) -> [1]; +structure_inspecting_args(erlang, is_tuple, 1) -> [1]; +%%structure_inspecting_args(erlang, setelement, 3) -> [2]. +structure_inspecting_args(_, _, _) -> []. % XXX: assume no arg needs inspection + + +check_fun_application(Fun, Args) -> + case t_is_fun(Fun) of + true -> + case t_fun_args(Fun) of + unknown -> + case t_is_none_or_unit(t_fun_range(Fun)) of + true -> error; + false -> ok + end; + FunDom when length(FunDom) =:= length(Args) -> + case any_is_none_or_unit(inf_lists(FunDom, Args)) of + true -> error; + false -> + case t_is_none_or_unit(t_fun_range(Fun)) of + true -> error; + false -> ok + end + end; + _ -> error + end; + false -> + error + end. + + +%% ===================================================================== +%% These are basic types that should probably be moved to erl_types +%% ===================================================================== + +t_socket() -> t_port(). % alias + +t_ip_address() -> + T_int16 = t_from_range(0, 16#FFFF), + t_sup(t_tuple([t_byte(), t_byte(), t_byte(), t_byte()]), + t_tuple([T_int16, T_int16, T_int16, T_int16, + T_int16, T_int16, T_int16, T_int16])). + +%% ===================================================================== +%% Some basic types used in various parts of the system +%% ===================================================================== + +t_date() -> + t_tuple([t_pos_fixnum(), t_pos_fixnum(), t_pos_fixnum()]). + +t_time() -> + t_tuple([t_non_neg_fixnum(), t_non_neg_fixnum(), t_non_neg_fixnum()]). + +t_packet() -> + t_sup([t_binary(), t_iolist(), t_httppacket()]). + +t_httppacket() -> + t_sup([t_HttpRequest(), t_HttpResponse(), + t_HttpHeader(), t_atom('http_eoh'), t_HttpError()]). + +%% ===================================================================== +%% HTTP types documented in R12B-4 +%% ===================================================================== + +t_HttpRequest() -> + t_tuple([t_atom('http_request'), t_HttpMethod(), t_HttpUri(), t_HttpVersion()]). + +t_HttpResponse() -> + t_tuple([t_atom('http_response'), t_HttpVersion(), t_integer(), t_string()]). + +t_HttpHeader() -> + t_tuple([t_atom('http_header'), t_integer(), t_HttpField(), t_any(), t_string()]). + +t_HttpError() -> + t_tuple([t_atom('http_error'), t_string()]). + +t_HttpMethod() -> + t_sup(t_HttpMethodAtom(), t_string()). + +t_HttpMethodAtom() -> + t_atoms(['OPTIONS', 'GET', 'HEAD', 'POST', 'PUT', 'DELETE', 'TRACE']). + +t_HttpUri() -> + t_sup([t_atom('*'), + t_tuple([t_atom('absoluteURI'), + t_sup(t_atom('http'), t_atom('https')), + t_string(), + t_sup(t_non_neg_integer(), t_atom('undefined')), + t_string()]), + t_tuple([t_atom('scheme'), t_string(), t_string()]), + t_tuple([t_atom('abs_path'), t_string()]), + t_string()]). + +t_HttpVersion() -> + t_tuple([t_non_neg_integer(), t_non_neg_integer()]). + +t_HttpField() -> + t_sup(t_HttpFieldAtom(), t_string()). + +t_HttpFieldAtom() -> + t_atoms(['Cache-Control', 'Connection', 'Date', 'Pragma', 'Transfer-Encoding', + 'Upgrade', 'Via', 'Accept', 'Accept-Charset', 'Accept-Encoding', + 'Accept-Language', 'Authorization', 'From', 'Host', + 'If-Modified-Since', 'If-Match', 'If-None-Match', 'If-Range', + 'If-Unmodified-Since', 'Max-Forwards', 'Proxy-Authorization', + 'Range', 'Referer', 'User-Agent', 'Age', 'Location', + 'Proxy-Authenticate', 'Public', 'Retry-After', 'Server', 'Vary', + 'Warning', 'Www-Authenticate', 'Allow', 'Content-Base', + 'Content-Encoding', 'Content-Language', 'Content-Length', + 'Content-Location', 'Content-Md5', 'Content-Range', 'Content-Type', + 'Etag', 'Expires', 'Last-Modified', 'Accept-Ranges', + 'Set-Cookie', 'Set-Cookie2', 'X-Forwarded-For', 'Cookie', + 'Keep-Alive', 'Proxy-Connection']). + +%% ===================================================================== +%% These are used for the built-in functions of 'code' +%% ===================================================================== + +t_code_load_return(Mod) -> + t_sup(t_tuple([t_atom('module'), case t_is_atom(Mod) of + true -> Mod; + false -> t_atom() + end]), + t_tuple([t_atom('error'), t_code_load_error_rsn()])). + +t_code_load_error_rsn() -> % also used in erlang:load_module/2 + t_sup([t_atom('badfile'), + t_atom('nofile'), + t_atom('not_purged'), + t_atom('native_code'), + t_atom('sticky_directory')]). % only for the 'code' functions + +t_code_loaded_fname_or_status() -> + t_sup([t_string(), % filename + t_atom('preloaded'), + t_atom('cover_compiled')]). + +%% ===================================================================== +%% These are used for the built-in functions of 'erlang' +%% ===================================================================== + +t_decode_packet_option() -> + t_sup([t_tuple([t_atom('packet_size'), t_non_neg_integer()]), + t_tuple([t_atom('line_length'), t_non_neg_integer()])]). + +t_decode_packet_type() -> + t_sup(t_inet_setoption_packettype(), t_atom('httph')). + +t_dist_exit() -> + t_sup([t_atom('kill'), t_atom('noconnection'), t_atom('normal')]). + +t_match_spec_test_errors() -> + t_list(t_sup(t_tuple([t_atom('error'), t_string()]), + t_tuple([t_atom('warning'), t_string()]))). + +t_module_info_2() -> + t_sup([t_atom('module'), + t_atom('imports'), + t_atom('exports'), + t_atom('functions'), + t_atom('attributes'), + t_atom('compile'), + t_atom('native_addresses')]). + +t_pinfo() -> + t_sup([t_pinfo_item(), t_list(t_pinfo_item())]). + +t_pinfo_item() -> + t_sup([t_atom('backtrace'), + t_atom('current_function'), + t_atom('dictionary'), + t_atom('error_handler'), + t_atom('garbage_collection'), + t_atom('group_leader'), + t_atom('heap_size'), + t_atom('initial_call'), + t_atom('last_calls'), + t_atom('links'), + t_atom('memory'), + t_atom('message_binary'), % for hybrid heap only + t_atom('message_queue_len'), + t_atom('messages'), + t_atom('monitored_by'), + t_atom('monitors'), + t_atom('priority'), + t_atom('reductions'), + t_atom('registered_name'), + t_atom('sequential_trace_token'), + t_atom('stack_size'), + t_atom('status'), + t_atom('suspending'), + t_atom('total_heap_size'), + t_atom('trap_exit')]). + +t_process_priority_level() -> + t_sup([t_atom('max'), t_atom('high'), t_atom('normal'), t_atom('low')]). + +t_process_status() -> + t_sup([t_atom('runnable'), t_atom('running'), + t_atom('suspended'), t_atom('waiting')]). + +t_raise_errorclass() -> + t_sup([t_atom('error'), t_atom('exit'), t_atom('throw')]). + +t_sendoptions() -> + t_sup(t_atom('noconnect'), t_atom('nosuspend')). + +t_seq_trace_info() -> + t_sup([t_atom('send'), + t_atom('receive'), + t_atom('print'), + t_atom('timestamp'), + t_atom('label'), + t_atom('serial')]). + +%% XXX: Better if we also maintain correspondencies between infos and values +t_seq_trace_info_returns() -> + Values = t_sup([t_non_neg_integer(), t_boolean(), + t_tuple([t_non_neg_integer(), t_non_neg_integer()])]), + t_sup(t_tuple([t_seq_trace_info(), Values]), t_nil()). + +t_sequential_tracer() -> + t_sup([t_atom('false'), t_pid(), t_port()]). + +t_spawn_options() -> + t_sup([t_atom('link'), + t_atom('monitor'), + t_tuple([t_atom('priority'), t_process_priority_level()]), + t_tuple([t_atom('min_heap_size'), t_fixnum()]), + t_tuple([t_atom('fullsweep_after'), t_fixnum()])]). + +t_spawn_opt_return(List) -> + case t_is_none(t_inf(t_list(t_atom('monitor')), List)) of + true -> t_pid(); + false -> t_sup(t_pid(), t_tuple([t_pid(), t_reference()])) + end. + +t_system_cpu_topology() -> + t_sup(t_atom('undefined'), t_system_cpu_topology_level_entry_list()). + +t_system_cpu_topology_level_entry_list() -> + t_list(t_system_cpu_topology_level_entry()). + +t_system_cpu_topology_level_entry() -> + t_sup(t_tuple([t_system_cpu_topology_level_tag(), + t_system_cpu_topology_sublevel_entry()]), + t_tuple([t_system_cpu_topology_level_tag(), + t_system_cpu_topology_info_list(), + t_system_cpu_topology_sublevel_entry()])). + +t_system_cpu_topology_sublevel_entry() -> + t_sup(t_system_cpu_topology_logical_cpu_id(), + t_list(t_tuple())). % approximation + +t_system_cpu_topology_level_tag() -> + t_atoms(['core', 'node', 'processor', 'thread']). + +t_system_cpu_topology_logical_cpu_id() -> + t_tuple([t_atom('logical'), t_non_neg_fixnum()]). + +t_system_cpu_topology_info_list() -> + t_nil(). % it may be extended in the future + +t_internal_cpu_topology() -> %% Internal undocumented type + t_sup(t_list(t_tuple([t_atom('cpu'), + t_non_neg_fixnum(), + t_non_neg_fixnum(), + t_non_neg_fixnum(), + t_non_neg_fixnum(), + t_non_neg_fixnum(), + t_non_neg_fixnum()])), + t_atom('undefined')). + +t_scheduler_bind_type_args() -> + t_sup([t_atom('default_bind'), + t_atom('no_node_processor_spread'), + t_atom('no_node_thread_spread'), + t_atom('no_spread'), + t_atom('processor_spread'), + t_atom('spread'), + t_atom('thread_spread'), + t_atom('thread_no_node_processor_spread'), + t_atom('unbound')]). + +t_scheduler_bind_type_results() -> + t_sup([t_atom('no_node_processor_spread'), + t_atom('no_node_thread_spread'), + t_atom('no_spread'), + t_atom('processor_spread'), + t_atom('spread'), + t_atom('thread_spread'), + t_atom('thread_no_node_processor_spread'), + t_atom('unbound')]). + + +t_system_monitor_settings() -> + t_sup([t_atom('undefined'), + t_tuple([t_pid(), t_system_monitor_options()])]). + +t_system_monitor_options() -> + t_list(t_sup([t_atom('busy_port'), + t_atom('busy_dist_port'), + t_tuple([t_atom('long_gc'), t_integer()]), + t_tuple([t_atom('large_heap'), t_integer()])])). + +t_system_multi_scheduling() -> + t_sup([t_atom('blocked'), t_atom('disabled'), t_atom('enabled')]). + +t_system_profile_options() -> + t_list(t_sup([t_atom('exclusive'), + t_atom('runnable_ports'), + t_atom('runnable_procs'), + t_atom('scheduler')])). + +t_system_profile_return() -> + t_sup(t_atom('undefined'), + t_tuple([t_sup(t_pid(), t_port()), t_system_profile_options()])). + +%% ===================================================================== +%% These are used for the built-in functions of 'ets' +%% ===================================================================== + +t_tab() -> + t_sup(t_tid(), t_atom()). + +t_match_pattern() -> + t_sup(t_atom(), t_tuple()). + +t_matchspecs() -> + t_list(t_tuple([t_match_pattern(), t_list(), t_list()])). + +t_matchres() -> + t_sup(t_tuple([t_list(), t_any()]), t_atom('$end_of_table')). + +%% From the 'ets' documentation +%%----------------------------- +%% Option = Type | Access | named_table | {keypos,Pos} +%% | {heir,pid(),HeirData} | {heir,none} +%% | {write_concurrency,boolean()} +%% Type = set | ordered_set | bag | duplicate_bag +%% Access = public | protected | private +%% Pos = integer() +%% HeirData = term() +t_ets_new_options() -> + t_list(t_sup([t_atom('set'), + t_atom('ordered_set'), + t_atom('bag'), + t_atom('duplicate_bag'), + t_atom('public'), + t_atom('protected'), + t_atom('private'), + t_atom('named_table'), + t_tuple([t_atom('heir'), t_pid(), t_any()]), + t_tuple([t_atom('heir'), t_atom('none')]), + t_tuple([t_atom('keypos'), t_integer()]), + t_tuple([t_atom('write_concurrency'), t_boolean()])])). + +t_ets_info_items() -> + t_sup([t_atom('fixed'), + t_atom('safe_fixed'), + t_atom('keypos'), + t_atom('memory'), + t_atom('name'), + t_atom('named_table'), + t_atom('node'), + t_atom('owner'), + t_atom('protection'), + t_atom('size'), + t_atom('type')]). + +%% ===================================================================== +%% These are used for the built-in functions of 'file' +%% ===================================================================== + +t_file_io_device() -> + t_sup(t_pid(), t_tuple([t_atom('file_descriptor'), t_atom(), t_any()])). + +t_file_name() -> + t_sup([t_atom(), + t_string(), + %% DeepList = [char() | atom() | DeepList] -- approximation below + t_list(t_sup([t_atom(), t_string(), t_list()]))]). + +t_file_open_option() -> + t_sup([t_atom('read'), + t_atom('write'), + t_atom('append'), + t_atom('raw'), + t_atom('binary'), + t_atom('delayed_write'), + t_atom('read_ahead'), + t_atom('compressed'), + t_tuple([t_atom('delayed_write'), + t_pos_integer(), t_non_neg_integer()]), + t_tuple([t_atom('read_ahead'), t_pos_integer()])]). + +%% This lists all Posix errors that can occur in file:*/* functions +t_file_posix_error() -> + t_sup([t_atom('eacces'), + t_atom('eagain'), + t_atom('ebadf'), + t_atom('ebusy'), + t_atom('edquot'), + t_atom('eexist'), + t_atom('efault'), + t_atom('efbig'), + t_atom('eintr'), + t_atom('einval'), + t_atom('eio'), + t_atom('eisdir'), + t_atom('eloop'), + t_atom('emfile'), + t_atom('emlink'), + t_atom('enametoolong'), + t_atom('enfile'), + t_atom('enodev'), + t_atom('enoent'), + t_atom('enomem'), + t_atom('enospc'), + t_atom('enotblk'), + t_atom('enotdir'), + t_atom('enotsup'), + t_atom('enxio'), + t_atom('eperm'), + t_atom('epipe'), + t_atom('erofs'), + t_atom('espipe'), + t_atom('esrch'), + t_atom('estale'), + t_atom('exdev')]). + +t_file_return() -> + t_sup(t_atom('ok'), t_tuple([t_atom('error'), t_file_posix_error()])). + +%% ===================================================================== +%% These are used for the built-in functions of 'gen_tcp' +%% ===================================================================== + +t_gen_tcp_accept() -> + t_sup(t_tuple([t_atom('ok'), t_socket()]), + t_tuple([t_atom('error'), t_sup([t_atom('closed'), + t_atom('timeout'), + t_inet_posix_error()])])). + +t_gen_tcp_address() -> + t_sup([t_string(), t_atom(), t_ip_address()]). + +t_gen_tcp_port() -> + t_from_range(0, 16#FFFF). + +t_gen_tcp_connect_option() -> + t_sup([t_atom('list'), + t_atom('binary'), + t_tuple([t_atom('ip'), t_ip_address()]), + t_tuple([t_atom('port'), t_gen_tcp_port()]), + t_tuple([t_atom('fd'), t_integer()]), + t_atom('inet6'), + t_atom('inet'), + t_inet_setoption()]). + +t_gen_tcp_listen_option() -> + t_sup([t_atom('list'), + t_atom('binary'), + t_tuple([t_atom('backlog'), t_non_neg_integer()]), + t_tuple([t_atom('ip'), t_ip_address()]), + t_tuple([t_atom('fd'), t_integer()]), + t_atom('inet6'), + t_atom('inet'), + t_inet_setoption()]). + +t_gen_tcp_recv() -> + t_sup(t_tuple([t_atom('ok'), t_packet()]), + t_tuple([t_atom('error'), t_sup([t_atom('closed'), + t_inet_posix_error()])])). + +%% ===================================================================== +%% These are used for the built-in functions of 'gen_udp' +%% ===================================================================== + +t_gen_udp_connect_option() -> + t_sup([t_atom('list'), + t_atom('binary'), + t_tuple([t_atom('ip'), t_ip_address()]), + t_tuple([t_atom('fd'), t_integer()]), + t_atom('inet6'), + t_atom('inet'), + t_inet_setoption()]). + +t_gen_udp_recv() -> + t_sup(t_tuple([t_atom('ok'), + t_tuple([t_ip_address(), + t_gen_tcp_port(), + t_packet()])]), + t_tuple([t_atom('error'), + t_sup(t_atom('not_owner'), t_inet_posix_error())])). + +%% ===================================================================== +%% These are used for the built-in functions of 'hipe_bifs' +%% ===================================================================== + +t_trampoline() -> + t_sup(t_nil(), t_integer()). + +t_immediate() -> + t_sup([t_nil(), t_atom(), t_fixnum()]). + +t_immarray() -> + t_integer(). %% abstract data type + +t_hiperef() -> + t_immarray(). + +t_bitarray() -> + t_bitstr(). + +t_bytearray() -> + t_binary(). + +t_insn_type() -> + t_sup([% t_atom('call'), + t_atom('load_mfa'), + t_atom('x86_abs_pcrel'), + t_atom('atom'), + t_atom('constant'), + t_atom('c_const'), + t_atom('closure')]). + +%% ===================================================================== +%% These are used for the built-in functions of 'inet' +%% ===================================================================== + +t_inet_setoption() -> + t_sup([%% first the 2-tuple options + t_tuple([t_atom('active'), t_sup(t_boolean(), t_atom('once'))]), + t_tuple([t_atom('broadcast'), t_boolean()]), + t_tuple([t_atom('delay_send'), t_boolean()]), + t_tuple([t_atom('dontroute'), t_boolean()]), + t_tuple([t_atom('exit_on_close'), t_boolean()]), + t_tuple([t_atom('header'), t_non_neg_integer()]), + t_tuple([t_atom('keepalive'), t_boolean()]), + t_tuple([t_atom('nodelay'), t_boolean()]), + t_tuple([t_atom('packet'), t_inet_setoption_packettype()]), + t_tuple([t_atom('packet_size'), t_non_neg_integer()]), + t_tuple([t_atom('read_packets'), t_non_neg_integer()]), + t_tuple([t_atom('recbuf'), t_non_neg_integer()]), + t_tuple([t_atom('reuseaddr'), t_boolean()]), + t_tuple([t_atom('send_timeout'), t_non_neg_integer()]), + t_tuple([t_atom('sndbuf'), t_non_neg_integer()]), + t_tuple([t_atom('priority'), t_non_neg_integer()]), + t_tuple([t_atom('tos'), t_non_neg_integer()]), + %% and a 4-tuple option + t_tuple([t_atom('raw'), + t_non_neg_integer(), % protocol level + t_non_neg_integer(), % option number + t_binary()])]). % actual option value + +t_inet_setoption_packettype() -> + t_sup([t_atom('raw'), + t_integers([0,1,2,4]), + t_atom('asn1'), t_atom('cdr'), t_atom('sunrm'), + t_atom('fcgi'), t_atom('tpkt'), t_atom('line'), + t_atom('http')]). %% but t_atom('httph') is not needed + +t_inet_posix_error() -> + t_atom(). %% XXX: Very underspecified + +%% ===================================================================== +%% These are used for the built-in functions of 'io' +%% ===================================================================== + +t_io_device() -> + t_sup(t_atom(), t_pid()). + +%% The documentation in R11B-4 reads +%% Format ::= atom() | string() | binary() +%% but the Format can also be a (deep) list, hence the type below +t_io_format_string() -> + t_sup([t_atom(), t_list(), t_binary()]). + +%% ===================================================================== +%% These are used for the built-in functions of 're'; the functions +%% whose last name component starts with a capital letter are types +%% ===================================================================== + +t_re_MP() -> %% it's supposed to be an opaque data type + t_tuple([t_atom('re_pattern'), t_integer(), t_integer(), t_binary()]). + +t_re_RE() -> + t_sup(t_re_MP(), t_iodata()). + +t_re_compile_option() -> + t_sup([t_atoms(['anchored', 'caseless', 'dollar_endonly', 'dotall', + 'extended', 'firstline', 'multiline', 'no_auto_capture', + 'dupnames', 'ungreedy']), + t_tuple([t_atom('newline'), t_re_NLSpec()])]). + +t_re_run_option() -> + t_sup([t_atoms(['anchored', 'global', 'notbol', 'noteol', 'notempty']), + t_tuple([t_atom('offset'), t_integer()]), + t_tuple([t_atom('newline'), t_re_NLSpec()]), + t_tuple([t_atom('capture'), t_re_ValueSpec()]), + t_tuple([t_atom('capture'), t_re_ValueSpec(), t_re_Type()]), + t_re_compile_option()]). + +t_re_ErrorSpec() -> + t_tuple([t_string(), t_non_neg_integer()]). + +t_re_Type() -> + t_atoms(['index', 'list', 'binary']). + +t_re_NLSpec() -> + t_atoms(['cr', 'crlf', 'lf', 'anycrlf']). + +t_re_ValueSpec() -> + t_sup(t_atoms(['all', 'all_but_first', 'first', 'none']), t_re_ValueList()). + +t_re_ValueList() -> + t_list(t_sup([t_integer(), t_string(), t_atom()])). + +t_re_Captured() -> + t_list(t_sup(t_re_CapturedData(), t_list(t_re_CapturedData()))). + +t_re_CapturedData() -> + t_sup([t_tuple([t_integer(), t_integer()]), t_string(), t_binary()]). + +%% ===================================================================== +%% These are used for the built-in functions of 'unicode' +%% ===================================================================== + +t_ML() -> % a binary or a possibly deep list of integers or binaries + t_sup(t_list(t_sup([t_integer(), t_binary(), t_list()])), t_binary()). + +t_encoding() -> + t_atoms(['latin1', 'unicode', 'utf8', 'utf16', 'utf32']). + +t_encoding_a2b() -> % for the 2nd arg of atom_to_binary/2 and binary_to_atom/2 + t_atoms(['latin1', 'unicode', 'utf8']). + +%% ===================================================================== +%% Some testing code for ranges below +%% ===================================================================== + +-ifdef(DO_ERL_BIF_TYPES_TEST). + +test() -> + put(hipe_target_arch, amd64), + + Bsl1 = type(erlang, 'bsl', 2, [t_from_range(1, 299), t_from_range(-4, 22)]), + Bsl2 = type(erlang, 'bsl', 2), + Bsl3 = type(erlang, 'bsl', 2, [t_from_range(1, 299), t_atom('pelle')]), + io:format("Bsl ~p ~p ~p~n", [Bsl1, Bsl2, Bsl3]), + + Add1 = type(erlang, '+', 2, [t_from_range(1, 299), t_from_range(-4, 22)]), + Add2 = type(erlang, '+', 2), + Add3 = type(erlang, '+', 2, [t_from_range(1, 299), t_atom('pelle')]), + io:format("Add ~p ~p ~p~n", [Add1, Add2, Add3]), + + Band1 = type(erlang, 'band', 2, [t_from_range(1, 29), t_from_range(34, 36)]), + Band2 = type(erlang, 'band', 2), + Band3 = type(erlang, 'band', 2, [t_from_range(1, 299), t_atom('pelle')]), + io:format("band ~p ~p ~p~n", [Band1, Band2, Band3]), + + Bor1 = type(erlang, 'bor', 2, [t_from_range(1, 29), t_from_range(8, 11)]), + Bor2 = type(erlang, 'bor', 2), + Bor3 = type(erlang, 'bor', 2, [t_from_range(1, 299), t_atom('pelle')]), + io:format("bor ~p ~p ~p~n", [Bor1, Bor2, Bor3]), + + io:format("inf_?"), + pos_inf = infinity_max([1, 4, 51, pos_inf]), + -12 = infinity_min([1, 142, -4, -12]), + neg_inf = infinity_max([neg_inf]), + + io:format("width"), + 4 = width({7, 9}), + pos_inf = width({neg_inf, 100}), + pos_inf = width({1, pos_inf}), + 3 = width({-8, 7}), + 0 = width({-1, 0}), + + io:format("arith * "), + Mult1 = t_from_range(0, 12), + Mult2 = t_from_range(-21, 7), + Mult1 = type(erlang, '*', 2, [t_from_range(2,3), t_from_range(0,4)]), + Mult2 = type(erlang, '*', 2, [t_from_range(-7,-1), t_from_range(-1,3)]), + ok. + +-endif. diff --git a/lib/hipe/cerl/erl_types.erl b/lib/hipe/cerl/erl_types.erl new file mode 100644 index 0000000000..fac308d0c6 --- /dev/null +++ b/lib/hipe/cerl/erl_types.erl @@ -0,0 +1,3847 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2003-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%% ====================================================================== +%% Copyright (C) 2000-2003 Richard Carlsson +%% +%% ====================================================================== +%% Provides a representation of Erlang types. +%% +%% The initial author of this file is Richard Carlsson (2000-2004). +%% In July 2006, the type representation was totally re-designed by +%% Tobias Lindahl. This is the representation which is used currently. +%% In late 2008, Manouk Manoukian and Kostis Sagonas added support for +%% opaque types to the structure-based representation of types. +%% During February and March 2009, Kostis Sagonas significantly +%% cleaned up the type representation added spec declarations. +%% +%% Author contact: richardc@it.uu.se, tobiasl@it.uu.se, kostis@cs.ntua.gr +%% ====================================================================== + +-module(erl_types). + +-export([any_none/1, + any_none_or_unit/1, + lookup_record/3, + max/2, + module_builtin_opaques/1, + min/2, + number_max/1, + number_min/1, + t_abstract_records/2, + t_any/0, + t_arity/0, + t_atom/0, + t_atom/1, + t_atoms/1, + t_atom_vals/1, + t_binary/0, + t_bitstr/0, + t_bitstr/2, + t_bitstr_base/1, + t_bitstr_concat/1, + t_bitstr_concat/2, + t_bitstr_match/2, + t_bitstr_unit/1, + t_boolean/0, + t_byte/0, + t_char/0, + t_collect_vars/1, + t_cons/0, + t_cons/2, + t_cons_hd/1, + t_cons_tl/1, + t_constant/0, + t_contains_opaque/1, + t_elements/1, + t_find_opaque_mismatch/2, + t_fixnum/0, + t_map/2, + t_non_neg_fixnum/0, + t_pos_fixnum/0, + t_float/0, + t_form_to_string/1, + t_from_form/1, + t_from_form/2, + t_from_form/3, + t_from_range/2, + t_from_range_unsafe/2, + t_from_term/1, + t_fun/0, + t_fun/1, + t_fun/2, + t_fun_args/1, + t_fun_arity/1, + t_fun_range/1, + t_has_opaque_subtype/1, + t_has_var/1, + t_identifier/0, + %% t_improper_list/2, + t_inf/2, + t_inf/3, + t_inf_lists/2, + t_inf_lists/3, + t_integer/0, + t_integer/1, + t_non_neg_integer/0, + t_pos_integer/0, + t_integers/1, + t_iodata/0, + t_iolist/0, + t_is_any/1, + t_is_atom/1, + t_is_atom/2, + t_is_binary/1, + t_is_bitstr/1, + t_is_bitwidth/1, + t_is_boolean/1, + %% t_is_byte/1, + %% t_is_char/1, + t_is_cons/1, + t_is_constant/1, + t_is_equal/2, + t_is_fixnum/1, + t_is_float/1, + t_is_fun/1, + t_is_instance/2, + t_is_integer/1, + t_is_list/1, + t_is_matchstate/1, + t_is_nil/1, + t_is_non_neg_integer/1, + t_is_none/1, + t_is_none_or_unit/1, + t_is_number/1, + t_is_opaque/1, + t_is_pid/1, + t_is_port/1, + t_is_maybe_improper_list/1, + t_is_reference/1, + t_is_remote/1, + t_is_string/1, + t_is_subtype/2, + t_is_tuple/1, + t_is_unit/1, + t_is_var/1, + t_limit/2, + t_list/0, + t_list/1, + t_list_elements/1, + t_list_termination/1, + t_matchstate/0, + t_matchstate/2, + t_matchstate_present/1, + t_matchstate_slot/2, + t_matchstate_slots/1, + t_matchstate_update_present/2, + t_matchstate_update_slot/3, + t_mfa/0, + t_module/0, + t_nil/0, + t_node/0, + t_none/0, + t_nonempty_list/0, + t_nonempty_list/1, + t_nonempty_string/0, + t_number/0, + t_number/1, + t_number_vals/1, + t_opaque_from_records/1, + t_opaque_match_atom/2, + t_opaque_match_record/2, + t_opaque_matching_structure/2, + t_opaque_structure/1, + t_pid/0, + t_port/0, + t_maybe_improper_list/0, + %% t_maybe_improper_list/2, + t_product/1, + t_reference/0, + t_remote/3, + t_string/0, + t_struct_from_opaque/2, + t_solve_remote/2, + t_subst/2, + t_subtract/2, + t_subtract_list/2, + t_sup/1, + t_sup/2, + t_tid/0, + t_timeout/0, + t_to_string/1, + t_to_string/2, + t_to_tlist/1, + t_tuple/0, + t_tuple/1, + t_tuple_args/1, + t_tuple_size/1, + t_tuple_sizes/1, + t_tuple_subtypes/1, + t_unify/2, + t_unit/0, + t_unopaque/1, + t_unopaque/2, + t_var/1, + t_var_name/1, + %% t_assign_variables_to_subtype/2, + type_is_defined/3, + subst_all_vars_to_any/1, + lift_list_to_pos_empty/1 + ]). + +%%-define(DO_ERL_TYPES_TEST, true). + +-ifdef(DO_ERL_TYPES_TEST). +-export([test/0]). +-else. +-define(NO_UNUSED, true). +-endif. + +-ifndef(NO_UNUSED). +-export([t_is_identifier/1]). +-endif. + +%%============================================================================= +%% +%% Definition of the type structure +%% +%%============================================================================= + +%%----------------------------------------------------------------------------- +%% Limits +%% + +-define(TUPLE_TAG_LIMIT, 5). +-define(TUPLE_ARITY_LIMIT, 10). +-define(SET_LIMIT, 13). +-define(MAX_BYTE, 255). +-define(MAX_CHAR, 16#10ffff). + +-define(WIDENING_LIMIT, 7). +-define(UNIT_MULTIPLIER, 8). + +-define(TAG_IMMED1_SIZE, 4). +-define(BITS, (erlang:system_info(wordsize) * 8) - ?TAG_IMMED1_SIZE). + +%%----------------------------------------------------------------------------- +%% Type tags and qualifiers +%% + +-define(atom_tag, atom). +-define(binary_tag, binary). +-define(function_tag, function). +-define(identifier_tag, identifier). +-define(list_tag, list). +-define(matchstate_tag, matchstate). +-define(nil_tag, nil). +-define(number_tag, number). +-define(opaque_tag, opaque). +-define(product_tag, product). +-define(remote_tag, remote). +-define(tuple_set_tag, tuple_set). +-define(tuple_tag, tuple). +-define(union_tag, union). +-define(var_tag, var). + +-type tag() :: ?atom_tag | ?binary_tag | ?function_tag | ?identifier_tag + | ?list_tag | ?matchstate_tag | ?nil_tag | ?number_tag + | ?opaque_tag | ?product_tag | ?tuple_tag | ?tuple_set_tag + | ?union_tag | ?var_tag. + +-define(float_qual, float). +-define(integer_qual, integer). +-define(nonempty_qual, nonempty). +-define(pid_qual, pid). +-define(port_qual, port). +-define(reference_qual, reference). +-define(unknown_qual, unknown). + +-type qual() :: ?float_qual | ?integer_qual | ?nonempty_qual | ?pid_qual + | ?port_qual | ?reference_qual | ?unknown_qual | {_, _}. + +%%----------------------------------------------------------------------------- +%% The type representation +%% + +-define(any, any). +-define(none, none). +-define(unit, unit). +%% Generic constructor - elements can be many things depending on the tag. +-record(c, {tag :: tag(), + elements = [] :: term(), + qualifier = ?unknown_qual :: qual()}). + +-opaque erl_type() :: ?any | ?none | ?unit | #c{}. + +%%----------------------------------------------------------------------------- +%% Auxiliary types and convenient macros +%% + +-type parse_form() :: {atom(), _, _} | {atom(), _, _, _}. %% XXX: Temporarily +-type rng_elem() :: 'pos_inf' | 'neg_inf' | integer(). + +-record(int_set, {set :: [integer()]}). +-record(int_rng, {from :: rng_elem(), to :: rng_elem()}). +-record(opaque, {mod :: module(), name :: atom(), + args = [] :: [erl_type()], struct :: erl_type()}). +-record(remote, {mod:: module(), name :: atom(), args = [] :: [erl_type()]}). + +-define(atom(Set), #c{tag=?atom_tag, elements=Set}). +-define(bitstr(Unit, Base), #c{tag=?binary_tag, elements=[Unit,Base]}). +-define(float, ?number(?any, ?float_qual)). +-define(function(Domain, Range), #c{tag=?function_tag, + elements=[Domain, Range]}). +-define(identifier(Types), #c{tag=?identifier_tag, elements=Types}). +-define(integer(Types), ?number(Types, ?integer_qual)). +-define(int_range(From, To), ?integer(#int_rng{from=From, to=To})). +-define(int_set(Set), ?integer(#int_set{set=Set})). +-define(list(Types, Term, Size), #c{tag=?list_tag, elements=[Types,Term], + qualifier=Size}). +-define(nil, #c{tag=?nil_tag}). +-define(nonempty_list(Types, Term),?list(Types, Term, ?nonempty_qual)). +-define(number(Set, Qualifier), #c{tag=?number_tag, elements=Set, + qualifier=Qualifier}. +-define(opaque(Optypes), #c{tag=?opaque_tag, elements=Optypes}). +-define(product(Types), #c{tag=?product_tag, elements=Types}). +-define(remote(RemTypes), #c{tag=?remote_tag, elements=RemTypes}). +-define(tuple(Types, Arity, Qual), #c{tag=?tuple_tag, elements=Types, + qualifier={Arity, Qual}}). +-define(tuple_set(Tuples), #c{tag=?tuple_set_tag, elements=Tuples}). +-define(var(Id), #c{tag=?var_tag, elements=Id}). + +-define(matchstate(P, Slots), #c{tag=?matchstate_tag, elements=[P,Slots]}). +-define(any_matchstate, ?matchstate(t_bitstr(), ?any)). + +-define(byte, ?int_range(0, ?MAX_BYTE)). +-define(char, ?int_range(0, ?MAX_CHAR)). +-define(integer_pos, ?int_range(1, pos_inf)). +-define(integer_non_neg, ?int_range(0, pos_inf)). +-define(integer_neg, ?int_range(neg_inf, -1)). + +%%----------------------------------------------------------------------------- +%% Unions +%% + +-define(union(List), #c{tag=?union_tag, elements=[_,_,_,_,_,_,_,_,_,_]=List}). + +-define(atom_union(T), ?union([T,?none,?none,?none,?none,?none,?none,?none,?none,?none])). +-define(bitstr_union(T), ?union([?none,T,?none,?none,?none,?none,?none,?none,?none,?none])). +-define(function_union(T), ?union([?none,?none,T,?none,?none,?none,?none,?none,?none,?none])). +-define(identifier_union(T), ?union([?none,?none,?none,T,?none,?none,?none,?none,?none,?none])). +-define(list_union(T), ?union([?none,?none,?none,?none,T,?none,?none,?none,?none,?none])). +-define(number_union(T), ?union([?none,?none,?none,?none,?none,T,?none,?none,?none,?none])). +-define(tuple_union(T), ?union([?none,?none,?none,?none,?none,?none,T,?none,?none,?none])). +-define(matchstate_union(T), ?union([?none,?none,?none,?none,?none,?none,?none,T,?none,?none])). +-define(opaque_union(T), ?union([?none,?none,?none,?none,?none,?none,?none,?none,T,?none])). +-define(remote_union(T), ?union([?none,?none,?none,?none,?none,?none,?none,?none,?none,T])). +-define(integer_union(T), ?number_union(T)). +-define(float_union(T), ?number_union(T)). +-define(nil_union(T), ?list_union(T)). + + +%%============================================================================= +%% +%% Primitive operations such as type construction and type tests +%% +%%============================================================================= + +%%----------------------------------------------------------------------------- +%% Top and bottom +%% + +-spec t_any() -> erl_type(). + +t_any() -> + ?any. + +-spec t_is_any(erl_type()) -> boolean(). + +t_is_any(?any) -> true; +t_is_any(_) -> false. + +-spec t_none() -> erl_type(). + +t_none() -> + ?none. + +-spec t_is_none(erl_type()) -> boolean(). + +t_is_none(?none) -> true; +t_is_none(_) -> false. + +%%----------------------------------------------------------------------------- +%% Opaque types +%% + +-spec t_opaque(module(), atom(), [_], erl_type()) -> erl_type(). + +t_opaque(Mod, Name, Args, Struct) -> + ?opaque(set_singleton(#opaque{mod=Mod, name=Name, args=Args, struct=Struct})). + +-spec t_is_opaque(erl_type()) -> boolean(). + +t_is_opaque(?opaque(_)) -> true; +t_is_opaque(_) -> false. + +-spec t_has_opaque_subtype(erl_type()) -> boolean(). + +t_has_opaque_subtype(?union(Ts)) -> + lists:any(fun t_is_opaque/1, Ts); +t_has_opaque_subtype(T) -> + t_is_opaque(T). + +-spec t_opaque_structure(erl_type()) -> erl_type(). + +t_opaque_structure(?opaque(Elements)) -> + case ordsets:size(Elements) of + 1 -> + [#opaque{struct = Struct}] = ordsets:to_list(Elements), + Struct; + _ -> throw({error, "Unexpected multiple opaque types"}) + end. + +-spec t_opaque_module(erl_type()) -> module(). + +t_opaque_module(?opaque(Elements)) -> + case ordsets:size(Elements) of + 1 -> + [#opaque{mod=Module}] = ordsets:to_list(Elements), + Module; + _ -> throw({error, "Unexpected multiple opaque types"}) + end. + +%% This only makes sense if we know that Type matches Opaque +-spec t_opaque_matching_structure(erl_type(), erl_type()) -> erl_type(). + +t_opaque_matching_structure(Type, Opaque) -> + OpaqueStruct = t_opaque_structure(Opaque), + case OpaqueStruct of + ?union(L1) -> + case Type of + ?union(_L2) -> OpaqueStruct; + _OtherType -> t_opaque_matching_structure_list(Type, L1) + end; + ?tuple_set(_Set1) = TupleSet -> + case Type of + ?tuple_set(_Set2) -> OpaqueStruct; + _ -> t_opaque_matching_structure_list(Type, t_tuple_subtypes(TupleSet)) + end; + _Other -> OpaqueStruct + end. + +t_opaque_matching_structure_list(Type, List) -> + NewList = [t_inf(Element, Type) || Element <- List], + Results = [NotNone || NotNone <- NewList, NotNone =/= ?none], + case Results of + [] -> ?none; + [First|_] -> First + end. + +-spec t_contains_opaque(erl_type()) -> boolean(). + +t_contains_opaque(?any) -> false; +t_contains_opaque(?none) -> false; +t_contains_opaque(?unit) -> false; +t_contains_opaque(?atom(_Set)) -> false; +t_contains_opaque(?bitstr(_Unit, _Base)) -> false; +t_contains_opaque(?float) -> false; +t_contains_opaque(?function(Domain, Range)) -> + t_contains_opaque(Domain) orelse t_contains_opaque(Range); +t_contains_opaque(?identifier(_Types)) -> false; +t_contains_opaque(?integer(_Types)) -> false; +t_contains_opaque(?int_range(_From, _To)) -> false; +t_contains_opaque(?int_set(_Set)) -> false; +t_contains_opaque(?list(Type, _, _)) -> t_contains_opaque(Type); +t_contains_opaque(?matchstate(_P, _Slots)) -> false; +t_contains_opaque(?nil) -> false; +t_contains_opaque(?number(_Set, _Tag)) -> false; +t_contains_opaque(?opaque(_)) -> true; +t_contains_opaque(?product(Types)) -> list_contains_opaque(Types); +t_contains_opaque(?tuple(?any, _, _)) -> false; +t_contains_opaque(?tuple(Types, _, _)) -> list_contains_opaque(Types); +t_contains_opaque(?tuple_set(_Set) = T) -> + list_contains_opaque(t_tuple_subtypes(T)); +t_contains_opaque(?union(List)) -> list_contains_opaque(List); +t_contains_opaque(?var(_Id)) -> false. + +-spec list_contains_opaque([erl_type()]) -> boolean(). + +list_contains_opaque(List) -> + lists:any(fun t_contains_opaque/1, List). + +%% t_find_opaque_mismatch/2 of two types should only be used if their +%% t_inf is t_none() due to some opaque type violation. +%% +%% The first argument of the function is the pattern and its second +%% argument the type we are matching against the pattern. + +-spec t_find_opaque_mismatch(erl_type(), erl_type()) -> 'error' | {'ok', erl_type(), erl_type()}. + +t_find_opaque_mismatch(T1, T2) -> + t_find_opaque_mismatch(T1, T2, T2). + +t_find_opaque_mismatch(?any, _Type, _TopType) -> error; +t_find_opaque_mismatch(?none, _Type, _TopType) -> error; +t_find_opaque_mismatch(?list(T1, _, _), ?list(T2, _, _), TopType) -> + t_find_opaque_mismatch(T1, T2, TopType); +t_find_opaque_mismatch(_T1, ?opaque(_) = T2, TopType) -> {ok, TopType, T2}; +t_find_opaque_mismatch(?product(T1), ?product(T2), TopType) -> + t_find_opaque_mismatch_ordlists(T1, T2, TopType); +t_find_opaque_mismatch(?tuple(T1, Arity, _), ?tuple(T2, Arity, _), TopType) -> + t_find_opaque_mismatch_ordlists(T1, T2, TopType); +t_find_opaque_mismatch(?tuple(_, _, _) = T1, ?tuple_set(_) = T2, TopType) -> + Tuples1 = t_tuple_subtypes(T1), + Tuples2 = t_tuple_subtypes(T2), + t_find_opaque_mismatch_lists(Tuples1, Tuples2, TopType); +t_find_opaque_mismatch(T1, ?union(U2), TopType) -> + t_find_opaque_mismatch_lists([T1], U2, TopType); +t_find_opaque_mismatch(_T1, _T2, _TopType) -> error. + +t_find_opaque_mismatch_ordlists(L1, L2, TopType) -> + List = lists:zipwith(fun(T1, T2) -> + t_find_opaque_mismatch(T1, T2, TopType) + end, L1, L2), + t_find_opaque_mismatch_list(List). + +t_find_opaque_mismatch_lists(L1, L2, _TopType) -> + List = [t_find_opaque_mismatch(T1, T2, T2) || T1 <- L1, T2 <- L2], + t_find_opaque_mismatch_list(List). + +t_find_opaque_mismatch_list([]) -> error; +t_find_opaque_mismatch_list([H|T]) -> + case H of + {ok, _T1, _T2} -> H; + error -> t_find_opaque_mismatch_list(T) + end. + +-spec t_opaque_from_records(dict()) -> [erl_type()]. + +t_opaque_from_records(RecDict) -> + OpaqueRecDict = + dict:filter(fun(Key, _Value) -> + case Key of + {opaque, _Name} -> true; + _ -> false + end + end, RecDict), + OpaqueTypeDict = + dict:map(fun({opaque, Name}, {Module, Type, ArgNames}) -> + case ArgNames of + [] -> + t_opaque(Module, Name, [], t_from_form(Type, RecDict)); + _ -> + throw({error,"Polymorphic opaque types not supported yet"}) + end + end, OpaqueRecDict), + [OpaqueType || {_Key, OpaqueType} <- dict:to_list(OpaqueTypeDict)]. + +-spec t_opaque_match_atom(erl_type(), [erl_type()]) -> [erl_type()]. + +t_opaque_match_atom(?atom(_) = Atom, Opaques) -> + case t_atom_vals(Atom) of + unknown -> []; + _ -> [O || O <- Opaques, t_inf(Atom, O, opaque) =/= ?none, + t_opaque_atom_vals(t_opaque_structure(O)) =/= unknown] + end; +t_opaque_match_atom(_, _) -> []. + +-spec t_opaque_atom_vals(erl_type()) -> 'unknown' | [atom(),...]. + +t_opaque_atom_vals(OpaqueStruct) -> + case OpaqueStruct of + ?atom(_) -> t_atom_vals(OpaqueStruct); + ?union([Atom,_,_,_,_,_,_,_,_,_]) -> t_atom_vals(Atom); + _ -> unknown + end. + +-spec t_opaque_match_record(erl_type(), [erl_type()]) -> [erl_type()]. + +t_opaque_match_record(?tuple([?atom(_) = Tag|_Fields], _, _) = Rec, Opaques) -> + [O || O <- Opaques, t_inf(Rec, O, opaque) =/= ?none, + lists:member(Tag, t_opaque_tuple_tags(t_opaque_structure(O)))]; +t_opaque_match_record(_, _) -> []. + +-spec t_opaque_tuple_tags(erl_type()) -> [erl_type()]. + +t_opaque_tuple_tags(OpaqueStruct) -> + case OpaqueStruct of + ?tuple([?atom(_) = Tag|_Fields], _, _) -> [Tag]; + ?tuple_set(_) = TupleSet -> + Tuples = t_tuple_subtypes(TupleSet), + lists:flatten([t_opaque_tuple_tags(T) || T <- Tuples]); + ?union([_,_,_,_,_,_,Tuples,_,_,_]) -> t_opaque_tuple_tags(Tuples); + _ -> [] + end. + +%% Decompose opaque instances of type arg2 to structured types, in arg1 +-spec t_struct_from_opaque(erl_type(), erl_type()) -> erl_type(). + +t_struct_from_opaque(?function(Domain, Range), Opaque) -> + ?function(t_struct_from_opaque(Domain, Opaque), + t_struct_from_opaque(Range, Opaque)); +t_struct_from_opaque(?list(Types, Term, Size), Opaque) -> + ?list(t_struct_from_opaque(Types, Opaque), Term, Size); +t_struct_from_opaque(?opaque(_) = T, Opaque) -> + case T =:= Opaque of + true -> t_opaque_structure(T); + false -> T + end; +t_struct_from_opaque(?product(Types), Opaque) -> + ?product(list_struct_from_opaque(Types, Opaque)); +t_struct_from_opaque(?tuple(?any, _, _) = T, _Opaque) -> T; +t_struct_from_opaque(?tuple(Types, Arity, Tag), Opaque) -> + ?tuple(list_struct_from_opaque(Types, Opaque), Arity, Tag); +t_struct_from_opaque(?tuple_set(Set), Opaque) -> + NewSet = [{Sz, [t_struct_from_opaque(T, Opaque) || T <- Tuples]} + || {Sz, Tuples} <- Set], + ?tuple_set(NewSet); +t_struct_from_opaque(?union(List), Opaque) -> + t_sup(list_struct_from_opaque(List, Opaque)); +t_struct_from_opaque(Type, _Opaque) -> Type. + +list_struct_from_opaque(Types, Opaque) -> + [t_struct_from_opaque(Type, Opaque) || Type <- Types]. + +-spec module_builtin_opaques(module()) -> [erl_type()]. + +module_builtin_opaques(Module) -> + [O || O <- all_opaque_builtins(), t_opaque_module(O) =:= Module]. + +%%----------------------------------------------------------------------------- +%% Remote types +%% These types are used for preprocessing they should never reach the analysis stage + +-spec t_remote(module(), atom(), [_]) -> erl_type(). + +t_remote(Mod, Name, Args) -> + ?remote(set_singleton(#remote{mod=Mod, name=Name, args=Args})). + +-spec t_is_remote(erl_type()) -> boolean(). + +t_is_remote(?remote(_)) -> true; +t_is_remote(_) -> false. + +-spec t_solve_remote(erl_type(), dict()) -> erl_type(). + +t_solve_remote(Type , Records) -> + t_solve_remote(Type, Records, ordsets:new()). + +t_solve_remote(?function(Domain, Range), R, C) -> + ?function(t_solve_remote(Domain, R, C), t_solve_remote(Range, R, C)); +t_solve_remote(?list(Types, Term, Size), R, C) -> + ?list(t_solve_remote(Types, R, C), Term, Size); +t_solve_remote(?product(Types), R, C) -> + ?product(list_solve_remote(Types, R, C)); +t_solve_remote(?opaque(Set), R, C) -> + List = ordsets:to_list(Set), + NewList = [Remote#opaque{struct = t_solve_remote(Struct, R, C)} + || Remote = #opaque{struct = Struct} <- List], + ?opaque(ordsets:from_list(NewList)); +t_solve_remote(?tuple(?any, _, _) = T, _R, _C) -> T; +t_solve_remote(?tuple(Types, Arity, Tag), R, C) -> + ?tuple(list_solve_remote(Types, R, C), Arity, Tag); +t_solve_remote(?tuple_set(Set), R, C) -> + NewSet = [{Sz, [t_solve_remote(T, R, C) || T <- Tuples]} || {Sz, Tuples} <- Set], + ?tuple_set(NewSet); +t_solve_remote(?remote(Set), R, C) -> + Cycle = ordsets:intersection(Set, C), + case ordsets:size(Cycle) of + 0 -> ok; + _ -> + CycleMsg = "Cycle detected while processing remote types: " ++ + t_to_string(?remote(C), dict:new()), + throw({error, CycleMsg}) + end, + NewCycle = ordsets:union(C, Set), + TypeFun = + fun(#remote{mod = RemoteModule, name = Name, args = Args}) -> + case dict:find(RemoteModule, R) of + error -> + Msg = io_lib:format("Cannot locate module ~w to " + "resolve the remote type: ~w:~w()~n", + [RemoteModule, RemoteModule, Name]), + throw({error, Msg}); + {ok, RemoteDict} -> + case lookup_type(Name, RemoteDict) of + {type, {_TypeMod, Type, ArgNames}} when length(Args) =:= length(ArgNames) -> + List = lists:zip(ArgNames, Args), + TmpVardict = dict:from_list(List), + NewType = t_from_form(Type, RemoteDict, TmpVardict), + t_solve_remote(NewType, R, NewCycle); + {opaque, {OpModule, Type, ArgNames}} when length(Args) =:= length(ArgNames) -> + List = lists:zip(ArgNames, Args), + TmpVardict = dict:from_list(List), + Rep = t_from_form(Type, RemoteDict, TmpVardict), + NewRep = t_solve_remote(Rep, R, NewCycle), + t_from_form({opaque, -1, Name, {OpModule, Args, NewRep}}, + RemoteDict, TmpVardict); + {type, _} -> + Msg = io_lib:format("Unknown remote type ~w\n", [Name]), + throw({error, Msg}); + {opaque, _} -> + Msg = io_lib:format("Unknown remote opaque type ~w\n", [Name]), + throw({error, Msg}); + error -> + Msg = io_lib:format("Unable to find remote type ~w:~w()\n", + [RemoteModule, Name]), + throw({error, Msg}) + end + end + end, + RemoteList = ordsets:to_list(Set), + t_sup([TypeFun(RemoteType) || RemoteType <- RemoteList]); +t_solve_remote(?union(List), R, C) -> + t_sup(list_solve_remote(List, R, C)); +t_solve_remote(T, _R, _C) -> T. + +list_solve_remote(Types, R, C) -> + [t_solve_remote(Type, R, C) || Type <- Types]. + +%%----------------------------------------------------------------------------- +%% Unit type. Signals non termination. +%% + +-spec t_unit() -> erl_type(). + +t_unit() -> + ?unit. + +-spec t_is_unit(erl_type()) -> boolean(). + +t_is_unit(?unit) -> true; +t_is_unit(_) -> false. + +-spec t_is_none_or_unit(erl_type()) -> boolean(). + +t_is_none_or_unit(?none) -> true; +t_is_none_or_unit(?unit) -> true; +t_is_none_or_unit(_) -> false. + +%%----------------------------------------------------------------------------- +%% Atoms and the derived type bool +%% + +-spec t_atom() -> erl_type(). + +t_atom() -> + ?atom(?any). + +-spec t_atom(atom()) -> erl_type(). + +t_atom(A) when is_atom(A) -> + ?atom(set_singleton(A)). + +-spec t_atoms([atom()]) -> erl_type(). + +t_atoms(List) when is_list(List) -> + t_sup([t_atom(A) || A <- List]). + +-spec t_atom_vals(erl_type()) -> 'unknown' | [atom(),...]. + +t_atom_vals(?atom(?any)) -> unknown; +t_atom_vals(?atom(Set)) -> set_to_list(Set); +t_atom_vals(Other) -> + ?atom(_) = Atm = t_inf(t_atom(), Other), + t_atom_vals(Atm). + +-spec t_is_atom(erl_type()) -> boolean(). + +t_is_atom(?atom(_)) -> true; +t_is_atom(_) -> false. + +-spec t_is_atom(atom(), erl_type()) -> boolean(). + +t_is_atom(Atom, ?atom(?any)) when is_atom(Atom) -> false; +t_is_atom(Atom, ?atom(Set)) when is_atom(Atom) -> set_is_singleton(Atom, Set); +t_is_atom(Atom, _) when is_atom(Atom) -> false. + +%%------------------------------------ + +-spec t_boolean() -> erl_type(). + +t_boolean() -> + ?atom(set_from_list([false, true])). + +-spec t_is_boolean(erl_type()) -> boolean(). + +t_is_boolean(?atom(?any)) -> false; +t_is_boolean(?atom(Set)) -> + case set_size(Set) of + 1 -> set_is_element(true, Set) orelse set_is_element(false, Set); + 2 -> set_is_element(true, Set) andalso set_is_element(false, Set); + N when is_integer(N), N > 2 -> false + end; +t_is_boolean(_) -> false. + +%%----------------------------------------------------------------------------- +%% Binaries +%% + +-spec t_binary() -> erl_type(). + +t_binary() -> + ?bitstr(8, 0). + +-spec t_is_binary(erl_type()) -> boolean(). + +t_is_binary(?bitstr(U, B)) -> + ((U rem 8) =:= 0) andalso ((B rem 8) =:= 0); +t_is_binary(_) -> false. + +%%----------------------------------------------------------------------------- +%% Bitstrings +%% + +-spec t_bitstr() -> erl_type(). + +t_bitstr() -> + ?bitstr(1, 0). + +-spec t_bitstr(non_neg_integer(), non_neg_integer()) -> erl_type(). + +t_bitstr(U, B) -> + NewB = + if + U =:= 0 -> B; + B >= (U * (?UNIT_MULTIPLIER + 1)) -> + (B rem U) + U * ?UNIT_MULTIPLIER; + true -> + B + end, + ?bitstr(U, NewB). + +-spec t_bitstr_unit(erl_type()) -> non_neg_integer(). + +t_bitstr_unit(?bitstr(U, _)) -> U. + +-spec t_bitstr_base(erl_type()) -> non_neg_integer(). + +t_bitstr_base(?bitstr(_, B)) -> B. + +-spec t_bitstr_concat([erl_type()]) -> erl_type(). + +t_bitstr_concat(List) -> + t_bitstr_concat_1(List, t_bitstr(0, 0)). + +t_bitstr_concat_1([T|Left], Acc) -> + t_bitstr_concat_1(Left, t_bitstr_concat(Acc, T)); +t_bitstr_concat_1([], Acc) -> + Acc. + +-spec t_bitstr_concat(erl_type(), erl_type()) -> erl_type(). + +t_bitstr_concat(T1, T2) -> + T1p = t_inf(t_bitstr(), T1), + T2p = t_inf(t_bitstr(), T2), + bitstr_concat(T1p, T2p). + +-spec t_bitstr_match(erl_type(), erl_type()) -> erl_type(). + +t_bitstr_match(T1, T2) -> + T1p = t_inf(t_bitstr(), T1), + T2p = t_inf(t_bitstr(), T2), + bitstr_match(T1p, T2p). + +-spec t_is_bitstr(erl_type()) -> boolean(). + +t_is_bitstr(?bitstr(_, _)) -> true; +t_is_bitstr(_) -> false. + +%%----------------------------------------------------------------------------- +%% Matchstates +%% + +-spec t_matchstate() -> erl_type(). + +t_matchstate() -> + ?any_matchstate. + +-spec t_matchstate(erl_type(), non_neg_integer()) -> erl_type(). + +t_matchstate(Init, 0) -> + ?matchstate(Init, Init); +t_matchstate(Init, Max) when is_integer(Max) -> + Slots = [Init|[?none || _ <- lists:seq(1, Max)]], + ?matchstate(Init, t_product(Slots)). + +-spec t_is_matchstate(erl_type()) -> boolean(). + +t_is_matchstate(?matchstate(_, _)) -> true; +t_is_matchstate(_) -> false. + +-spec t_matchstate_present(erl_type()) -> erl_type(). + +t_matchstate_present(Type) -> + case t_inf(t_matchstate(), Type) of + ?matchstate(P, _) -> P; + _ -> ?none + end. + +-spec t_matchstate_slot(erl_type(), non_neg_integer()) -> erl_type(). + +t_matchstate_slot(Type, Slot) -> + RealSlot = Slot + 1, + case t_inf(t_matchstate(), Type) of + ?matchstate(_, ?any) -> ?any; + ?matchstate(_, ?product(Vals)) when length(Vals) >= RealSlot -> + lists:nth(RealSlot, Vals); + ?matchstate(_, ?product(_)) -> + ?none; + ?matchstate(_, SlotType) when RealSlot =:= 1 -> + SlotType; + _ -> + ?none + end. + +-spec t_matchstate_slots(erl_type()) -> erl_type(). + +t_matchstate_slots(?matchstate(_, Slots)) -> + Slots. + +-spec t_matchstate_update_present(erl_type(), erl_type()) -> erl_type(). + +t_matchstate_update_present(New, Type) -> + case t_inf(t_matchstate(), Type) of + ?matchstate(_, Slots) -> + ?matchstate(New, Slots); + _ -> ?none + end. + +-spec t_matchstate_update_slot(erl_type(), erl_type(), non_neg_integer()) -> erl_type(). + +t_matchstate_update_slot(New, Type, Slot) -> + RealSlot = Slot + 1, + case t_inf(t_matchstate(), Type) of + ?matchstate(Pres, Slots) -> + NewSlots = + case Slots of + ?any -> + ?any; + ?product(Vals) when length(Vals) >= RealSlot -> + NewTuple = setelement(RealSlot, list_to_tuple(Vals), New), + NewVals = tuple_to_list(NewTuple), + ?product(NewVals); + ?product(_) -> + ?none; + _ when RealSlot =:= 1 -> + New; + _ -> + ?none + end, + ?matchstate(Pres, NewSlots); + _ -> + ?none + end. + +%%----------------------------------------------------------------------------- +%% Functions +%% + +-spec t_fun() -> erl_type(). + +t_fun() -> + ?function(?any, ?any). + +-spec t_fun(erl_type()) -> erl_type(). + +t_fun(Range) -> + ?function(?any, Range). + +-spec t_fun([erl_type()] | arity(), erl_type()) -> erl_type(). + +t_fun(Domain, Range) when is_list(Domain) -> + ?function(?product(Domain), Range); +t_fun(Arity, Range) when is_integer(Arity), 0 =< Arity, Arity =< 255 -> + ?function(?product(lists:duplicate(Arity, ?any)), Range). + +-spec t_fun_args(erl_type()) -> 'unknown' | [erl_type()]. + +t_fun_args(?function(?any, _)) -> + unknown; +t_fun_args(?function(?product(Domain), _)) when is_list(Domain) -> + Domain. + +-spec t_fun_arity(erl_type()) -> 'unknown' | non_neg_integer(). + +t_fun_arity(?function(?any, _)) -> + unknown; +t_fun_arity(?function(?product(Domain), _)) -> + length(Domain). + +-spec t_fun_range(erl_type()) -> erl_type(). + +t_fun_range(?function(_, Range)) -> + Range. + +-spec t_is_fun(erl_type()) -> boolean(). + +t_is_fun(?function(_, _)) -> true; +t_is_fun(_) -> false. + +%%----------------------------------------------------------------------------- +%% Identifiers. Includes ports, pids and refs. +%% + +-spec t_identifier() -> erl_type(). + +t_identifier() -> + ?identifier(?any). + +-ifdef(DO_ERL_TYPES_TEST). +-spec t_is_identifier(erl_type()) -> erl_type(). + +t_is_identifier(?identifier(_)) -> true; +t_is_identifier(_) -> false. +-endif. + +%%------------------------------------ + +-spec t_port() -> erl_type(). + +t_port() -> + ?identifier(set_singleton(?port_qual)). + +-spec t_is_port(erl_type()) -> boolean(). + +t_is_port(?identifier(?any)) -> false; +t_is_port(?identifier(Set)) -> set_is_singleton(?port_qual, Set); +t_is_port(_) -> false. + +%%------------------------------------ + +-spec t_pid() -> erl_type(). + +t_pid() -> + ?identifier(set_singleton(?pid_qual)). + +-spec t_is_pid(erl_type()) -> boolean(). + +t_is_pid(?identifier(?any)) -> false; +t_is_pid(?identifier(Set)) -> set_is_singleton(?pid_qual, Set); +t_is_pid(_) -> false. + +%%------------------------------------ + +-spec t_reference() -> erl_type(). + +t_reference() -> + ?identifier(set_singleton(?reference_qual)). + +-spec t_is_reference(erl_type()) -> boolean(). + +t_is_reference(?identifier(?any)) -> false; +t_is_reference(?identifier(Set)) -> set_is_singleton(?reference_qual, Set); +t_is_reference(_) -> false. + +%%----------------------------------------------------------------------------- +%% Numbers are divided into floats, integers, chars and bytes. +%% + +-spec t_number() -> erl_type(). + +t_number() -> + ?number(?any, ?unknown_qual). + +-spec t_number(integer()) -> erl_type(). + +t_number(X) when is_integer(X) -> + t_integer(X). + +-spec t_is_number(erl_type()) -> boolean(). + +t_is_number(?number(_, _)) -> true; +t_is_number(_) -> false. + +%% Currently, the type system collapses all floats to ?float and does +%% not keep any information about their values. As a result, the list +%% that this function returns contains only integers. +-spec t_number_vals(erl_type()) -> 'unknown' | [integer(),...]. + +t_number_vals(?int_set(?any)) -> unknown; +t_number_vals(?int_set(Set)) -> set_to_list(Set); +t_number_vals(?number(_, _)) -> unknown; +t_number_vals(Other) -> + Inf = t_inf(Other, t_number()), + false = t_is_none(Inf), % sanity check + t_number_vals(Inf). + +%%------------------------------------ + +-spec t_float() -> erl_type(). + +t_float() -> + ?float. + +-spec t_is_float(erl_type()) -> boolean(). + +t_is_float(?float) -> true; +t_is_float(_) -> false. + +%%------------------------------------ + +-spec t_integer() -> erl_type(). + +t_integer() -> + ?integer(?any). + +-spec t_integer(integer()) -> erl_type(). + +t_integer(I) when is_integer(I) -> + ?int_set(set_singleton(I)). + +-spec t_integers([integer()]) -> erl_type(). + +t_integers(List) when is_list(List) -> + t_sup([t_integer(I) || I <- List]). + +-spec t_is_integer(erl_type()) -> boolean(). + +t_is_integer(?integer(_)) -> true; +t_is_integer(_) -> false. + +%%------------------------------------ + +-spec t_byte() -> erl_type(). + +t_byte() -> + ?byte. + +-ifdef(DO_ERL_TYPES_TEST). +-spec t_is_byte(erl_type()) -> boolean(). + +t_is_byte(?int_range(neg_inf, _)) -> false; +t_is_byte(?int_range(_, pos_inf)) -> false; +t_is_byte(?int_range(From, To)) + when is_integer(From), From >= 0, is_integer(To), To =< ?MAX_BYTE -> true; +t_is_byte(?int_set(Set)) -> + (set_min(Set) >= 0) andalso (set_max(Set) =< ?MAX_BYTE); +t_is_byte(_) -> false. +-endif. + +%%------------------------------------ + +-spec t_char() -> erl_type(). + +t_char() -> + ?char. + +-spec t_is_char(erl_type()) -> boolean(). + +t_is_char(?int_range(neg_inf, _)) -> false; +t_is_char(?int_range(_, pos_inf)) -> false; +t_is_char(?int_range(From, To)) + when is_integer(From), From >= 0, is_integer(To), To =< ?MAX_CHAR -> true; +t_is_char(?int_set(Set)) -> + (set_min(Set) >= 0) andalso (set_max(Set) =< ?MAX_CHAR); +t_is_char(_) -> false. + +%%----------------------------------------------------------------------------- +%% Lists +%% + +-spec t_cons() -> erl_type(). + +t_cons() -> + ?nonempty_list(?any, ?any). + +%% Note that if the tail argument can be a list, we must collapse the +%% content of the list to include both the content of the tail list +%% and the head of the cons. If for example the tail argument is any() +%% then there can be any list in the tail and the content of the +%% returned list must be any(). + +-spec t_cons(erl_type(), erl_type()) -> erl_type(). + +t_cons(?none, _) -> ?none; +t_cons(_, ?none) -> ?none; +t_cons(?unit, _) -> ?none; +t_cons(_, ?unit) -> ?none; +t_cons(Hd, ?nil) -> + ?nonempty_list(Hd, ?nil); +t_cons(Hd, ?list(Contents, Termination, _)) -> + ?nonempty_list(t_sup(Contents, Hd), Termination); +t_cons(Hd, Tail) -> + case t_inf(Tail, t_maybe_improper_list()) of + ?list(Contents, Termination, _Size) -> + %% Collapse the list part of the termination but keep the + %% non-list part intact. + NewTermination = t_sup(t_subtract(Tail, t_maybe_improper_list()), + Termination), + ?nonempty_list(t_sup(Hd, Contents), NewTermination); + ?nil -> ?nonempty_list(Hd, Tail); + ?none -> ?nonempty_list(Hd, Tail); + ?unit -> ?none + end. + +-spec t_is_cons(erl_type()) -> boolean(). + +t_is_cons(?nonempty_list(_, _)) -> true; +t_is_cons(_) -> false. + +-spec t_cons_hd(erl_type()) -> erl_type(). + +t_cons_hd(?nonempty_list(Contents, _Termination)) -> Contents. + +-spec t_cons_tl(erl_type()) -> erl_type(). + +t_cons_tl(?nonempty_list(_Contents, Termination) = T) -> + t_sup(Termination, T). + +-spec t_nil() -> erl_type(). + +t_nil() -> + ?nil. + +-spec t_is_nil(erl_type()) -> boolean(). + +t_is_nil(?nil) -> true; +t_is_nil(_) -> false. + +-spec t_list() -> erl_type(). + +t_list() -> + ?list(?any, ?nil, ?unknown_qual). + +-spec t_list(erl_type()) -> erl_type(). + +t_list(?none) -> ?none; +t_list(?unit) -> ?none; +t_list(Contents) -> + ?list(Contents, ?nil, ?unknown_qual). + +-spec t_list_elements(erl_type()) -> erl_type(). + +t_list_elements(?list(Contents, _, _)) -> Contents; +t_list_elements(?nil) -> ?none. + +-spec t_list_termination(erl_type()) -> erl_type(). + +t_list_termination(?nil) -> ?nil; +t_list_termination(?list(_, Term, _)) -> Term. + +-spec t_is_list(erl_type()) -> boolean(). + +t_is_list(?list(_Contents, ?nil, _)) -> true; +t_is_list(?nil) -> true; +t_is_list(_) -> false. + +-spec t_nonempty_list() -> erl_type(). + +t_nonempty_list() -> + t_cons(?any, ?nil). + +-spec t_nonempty_list(erl_type()) -> erl_type(). + +t_nonempty_list(Type) -> + t_cons(Type, ?nil). + +-spec t_nonempty_string() -> erl_type(). + +t_nonempty_string() -> + t_nonempty_list(t_char()). + +-spec t_string() -> erl_type(). + +t_string() -> + t_list(t_char()). + +-spec t_is_string(erl_type()) -> boolean(). + +t_is_string(X) -> + t_is_list(X) andalso t_is_char(t_list_elements(X)). + +-spec t_maybe_improper_list() -> erl_type(). + +t_maybe_improper_list() -> + ?list(?any, ?any, ?unknown_qual). + +%% Should only be used if you know what you are doing. See t_cons/2 +-spec t_maybe_improper_list(erl_type(), erl_type()) -> erl_type(). + +t_maybe_improper_list(_Content, ?unit) -> ?none; +t_maybe_improper_list(?unit, _Termination) -> ?none; +t_maybe_improper_list(Content, Termination) -> + %% Safety check + true = t_is_subtype(t_nil(), Termination), + ?list(Content, Termination, ?unknown_qual). + +-spec t_is_maybe_improper_list(erl_type()) -> boolean(). + +t_is_maybe_improper_list(?list(_, _, _)) -> true; +t_is_maybe_improper_list(?nil) -> true; +t_is_maybe_improper_list(_) -> false. + +%% %% Should only be used if you know what you are doing. See t_cons/2 +%% -spec t_improper_list(erl_type(), erl_type()) -> erl_type(). +%% +%% t_improper_list(?unit, _Termination) -> ?none; +%% t_improper_list(_Content, ?unit) -> ?none; +%% t_improper_list(Content, Termination) -> +%% %% Safety check +%% false = t_is_subtype(t_nil(), Termination), +%% ?list(Content, Termination, ?any). + +-spec lift_list_to_pos_empty(erl_type()) -> erl_type(). + +lift_list_to_pos_empty(?nil) -> ?nil; +lift_list_to_pos_empty(?list(Content, Termination, _)) -> + ?list(Content, Termination, ?unknown_qual). + +%%----------------------------------------------------------------------------- +%% Tuples +%% + +-spec t_tuple() -> erl_type(). + +t_tuple() -> + ?tuple(?any, ?any, ?any). + +-spec t_tuple(non_neg_integer() | [erl_type()]) -> erl_type(). + +t_tuple(N) when is_integer(N) -> + ?tuple(lists:duplicate(N, ?any), N, ?any); +t_tuple(List) -> + case any_none_or_unit(List) of + true -> t_none(); + false -> + Arity = length(List), + case get_tuple_tags(List) of + [Tag] -> ?tuple(List, Arity, Tag); %% Tag can also be ?any here + TagList -> + SortedTagList = lists:sort(TagList), + Tuples = [?tuple([T|tl(List)], Arity, T) || T <- SortedTagList], + ?tuple_set([{Arity, Tuples}]) + end + end. + +-spec get_tuple_tags([erl_type()]) -> [erl_type(),...]. + +get_tuple_tags([?atom(?any)|_]) -> [?any]; +get_tuple_tags([?atom(Set)|_]) -> + case set_size(Set) > ?TUPLE_TAG_LIMIT of + true -> [?any]; + false -> [t_atom(A) || A <- set_to_list(Set)] + end; +get_tuple_tags(_) -> [?any]. + +%% to be used for a tuple with known types for its arguments (not ?any) +-spec t_tuple_args(erl_type()) -> [erl_type()]. + +t_tuple_args(?tuple(Args, _, _)) when is_list(Args) -> Args. + +%% to be used for a tuple with a known size (not ?any) +-spec t_tuple_size(erl_type()) -> non_neg_integer(). + +t_tuple_size(?tuple(_, Size, _)) when is_integer(Size) -> Size. + +-spec t_tuple_sizes(erl_type()) -> 'unknown' | [non_neg_integer(),...]. + +t_tuple_sizes(?tuple(?any, ?any, ?any)) -> unknown; +t_tuple_sizes(?tuple(_, Size, _)) when is_integer(Size) -> [Size]; +t_tuple_sizes(?tuple_set(List)) -> [Size || {Size, _} <- List]. + +-spec t_tuple_subtypes(erl_type()) -> 'unknown' | [erl_type(),...]. + +t_tuple_subtypes(?tuple(?any, ?any, ?any)) -> unknown; +t_tuple_subtypes(?tuple(_, _, _) = T) -> [T]; +t_tuple_subtypes(?tuple_set(List)) -> + lists:append([Tuples || {_Size, Tuples} <- List]). + +-spec t_is_tuple(erl_type()) -> boolean(). + +t_is_tuple(?tuple(_, _, _)) -> true; +t_is_tuple(?tuple_set(_)) -> true; +t_is_tuple(_) -> false. + +%%----------------------------------------------------------------------------- +%% Non-primitive types, including some handy syntactic sugar types +%% + +-spec t_constant() -> erl_type(). + +t_constant() -> + t_sup([t_number(), t_identifier(), t_atom(), t_fun(), t_binary()]). + +-spec t_is_constant(erl_type()) -> boolean(). + +t_is_constant(X) -> + t_is_subtype(X, t_constant()). + +-spec t_arity() -> erl_type(). + +t_arity() -> + t_from_range(0, 255). % was t_byte(). + +-spec t_pos_integer() -> erl_type(). + +t_pos_integer() -> + t_from_range(1, pos_inf). + +-spec t_non_neg_integer() -> erl_type(). + +t_non_neg_integer() -> + t_from_range(0, pos_inf). + +-spec t_is_non_neg_integer(erl_type()) -> boolean(). + +t_is_non_neg_integer(?integer(_) = T) -> + t_is_subtype(T, t_non_neg_integer()); +t_is_non_neg_integer(_) -> false. + +-spec t_neg_integer() -> erl_type(). + +t_neg_integer() -> + t_from_range(neg_inf, -1). + +-spec t_fixnum() -> erl_type(). + +t_fixnum() -> + t_integer(). % Gross over-approximation + +-spec t_pos_fixnum() -> erl_type(). + +t_pos_fixnum() -> + t_pos_integer(). % Gross over-approximation + +-spec t_non_neg_fixnum() -> erl_type(). + +t_non_neg_fixnum() -> + t_non_neg_integer(). % Gross over-approximation + +-spec t_mfa() -> erl_type(). + +t_mfa() -> + t_tuple([t_atom(), t_atom(), t_arity()]). + +-spec t_module() -> erl_type(). + +t_module() -> + t_atom(). + +-spec t_node() -> erl_type(). + +t_node() -> + t_atom(). + +-spec t_iodata() -> erl_type(). + +t_iodata() -> + t_sup(t_iolist(), t_binary()). + +-spec t_iolist() -> erl_type(). + +t_iolist() -> + t_iolist(1). + +-spec t_iolist(non_neg_integer()) -> erl_type(). + +t_iolist(N) when N > 0 -> + t_maybe_improper_list(t_sup([t_iolist(N-1), t_binary(), t_byte()]), + t_sup(t_binary(), t_nil())); +t_iolist(0) -> + t_maybe_improper_list(t_any(), t_sup(t_binary(), t_nil())). + +-spec t_timeout() -> erl_type(). + +t_timeout() -> + t_sup(t_non_neg_integer(), t_atom('infinity')). + +%%----------------------------------------------------------------------------- +%% Some built-in opaque types +%% + +-spec t_array() -> erl_type(). + +t_array() -> + t_opaque(array, array, [], + t_tuple([t_atom('array'), + t_non_neg_integer(), t_non_neg_integer(), + t_any(), t_any()])). + +-spec t_dict() -> erl_type(). + +t_dict() -> + t_opaque(dict, dict, [], + t_tuple([t_atom('dict'), + t_non_neg_integer(), t_non_neg_integer(), + t_non_neg_integer(), t_non_neg_integer(), + t_non_neg_integer(), t_non_neg_integer(), + t_tuple(), t_tuple()])). + +-spec t_digraph() -> erl_type(). + +t_digraph() -> + t_opaque(digraph, digraph, [], + t_tuple([t_atom('digraph'), + t_sup(t_atom(), t_tid()), + t_sup(t_atom(), t_tid()), + t_sup(t_atom(), t_tid()), + t_boolean()])). + +-spec t_gb_set() -> erl_type(). + +t_gb_set() -> + t_opaque(gb_sets, gb_set, [], + t_tuple([t_non_neg_integer(), t_sup(t_atom('nil'), t_tuple(3))])). + +-spec t_gb_tree() -> erl_type(). + +t_gb_tree() -> + t_opaque(gb_trees, gb_tree, [], + t_tuple([t_non_neg_integer(), t_sup(t_atom('nil'), t_tuple(4))])). + +-spec t_queue() -> erl_type(). + +t_queue() -> + t_opaque(queue, queue, [], t_tuple([t_list(), t_list()])). + +-spec t_set() -> erl_type(). + +t_set() -> + t_opaque(sets, set, [], + t_tuple([t_atom('set'), t_non_neg_integer(), t_non_neg_integer(), + t_pos_integer(), t_non_neg_integer(), t_non_neg_integer(), + t_non_neg_integer(), t_tuple(), t_tuple()])). + +-spec t_tid() -> erl_type(). + +t_tid() -> + t_opaque(ets, tid, [], t_integer()). + +-spec all_opaque_builtins() -> [erl_type()]. + +all_opaque_builtins() -> + [t_array(), t_dict(), t_digraph(), t_gb_set(), + t_gb_tree(), t_queue(), t_set(), t_tid()]. + +-spec is_opaque_builtin(atom(), atom()) -> boolean(). + +is_opaque_builtin(array, array) -> true; +is_opaque_builtin(dict, dict) -> true; +is_opaque_builtin(digraph, digraph) -> true; +is_opaque_builtin(gb_sets, gb_set) -> true; +is_opaque_builtin(gb_trees, gb_tree) -> true; +is_opaque_builtin(queue, queue) -> true; +is_opaque_builtin(sets, set) -> true; +is_opaque_builtin(ets, tid) -> true; +is_opaque_builtin(_, _) -> false. + +%%------------------------------------ + +%% ?none is allowed in products. A product of size 1 is not a product. + +-spec t_product([erl_type()]) -> erl_type(). + +t_product([T]) -> T; +t_product(Types) when is_list(Types) -> + ?product(Types). + +%% This function is intended to be the inverse of the one above. +%% It should NOT be used with ?any, ?none or ?unit as input argument. + +-spec t_to_tlist(erl_type()) -> [erl_type()]. + +t_to_tlist(?product(Types)) -> Types; +t_to_tlist(T) when T =/= ?any orelse T =/= ?none orelse T =/= ?unit -> [T]. + +%%------------------------------------ + +-spec t_var(atom() | integer()) -> erl_type(). + +t_var(Atom) when is_atom(Atom) -> ?var(Atom); +t_var(Int) when is_integer(Int) -> ?var(Int). + +-spec t_is_var(erl_type()) -> boolean(). + +t_is_var(?var(_)) -> true; +t_is_var(_) -> false. + +-spec t_var_name(erl_type()) -> atom() | integer(). + +t_var_name(?var(Id)) -> Id. + +-spec t_has_var(erl_type()) -> boolean(). + +t_has_var(?var(_)) -> true; +t_has_var(?function(Domain, Range)) -> + t_has_var(Domain) orelse t_has_var(Range); +t_has_var(?list(Contents, Termination, _)) -> + t_has_var(Contents) orelse t_has_var(Termination); +t_has_var(?product(Types)) -> t_has_var_list(Types); +t_has_var(?tuple(?any, ?any, ?any)) -> false; +t_has_var(?tuple(Elements, _, _)) -> + t_has_var_list(Elements); +t_has_var(?tuple_set(_) = T) -> + t_has_var_list(t_tuple_subtypes(T)); +%% t_has_var(?union(_) = U) -> +%% exit(lists:flatten(io_lib:format("Union happens in t_has_var/1 ~p\n",[U]))); +t_has_var(_) -> false. + +-spec t_has_var_list([erl_type()]) -> boolean(). + +t_has_var_list([T|Ts]) -> + t_has_var(T) orelse t_has_var_list(Ts); +t_has_var_list([]) -> false. + +-spec t_collect_vars(erl_type()) -> [erl_type()]. + +t_collect_vars(T) -> + t_collect_vars(T, []). + +-spec t_collect_vars(erl_type(), [erl_type()]) -> [erl_type()]. + +t_collect_vars(?var(_) = Var, Acc) -> + ordsets:add_element(Var, Acc); +t_collect_vars(?function(Domain, Range), Acc) -> + ordsets:union(t_collect_vars(Domain, Acc), t_collect_vars(Range, [])); +t_collect_vars(?list(Contents, Termination, _), Acc) -> + ordsets:union(t_collect_vars(Contents, Acc), t_collect_vars(Termination, [])); +t_collect_vars(?product(Types), Acc) -> + lists:foldl(fun(T, TmpAcc) -> t_collect_vars(T, TmpAcc) end, Acc, Types); +t_collect_vars(?tuple(?any, ?any, ?any), Acc) -> + Acc; +t_collect_vars(?tuple(Types, _, _), Acc) -> + lists:foldl(fun(T, TmpAcc) -> t_collect_vars(T, TmpAcc) end, Acc, Types); +t_collect_vars(?tuple_set(_) = TS, Acc) -> + lists:foldl(fun(T, TmpAcc) -> t_collect_vars(T, TmpAcc) end, Acc, + t_tuple_subtypes(TS)); +t_collect_vars(_, Acc) -> + Acc. + + +%%============================================================================= +%% +%% Type construction from Erlang terms. +%% +%%============================================================================= + +%%----------------------------------------------------------------------------- +%% Make a type from a term. No type depth is enforced. +%% + +-spec t_from_term(term()) -> erl_type(). + +t_from_term([H|T]) -> t_cons(t_from_term(H), t_from_term(T)); +t_from_term([]) -> t_nil(); +t_from_term(T) when is_atom(T) -> t_atom(T); +t_from_term(T) when is_bitstring(T) -> t_bitstr(0, erlang:bit_size(T)); +t_from_term(T) when is_float(T) -> t_float(); +t_from_term(T) when is_function(T) -> + {arity, Arity} = erlang:fun_info(T, arity), + t_fun(Arity, t_any()); +t_from_term(T) when is_integer(T) -> t_integer(T); +t_from_term(T) when is_pid(T) -> t_pid(); +t_from_term(T) when is_port(T) -> t_port(); +t_from_term(T) when is_reference(T) -> t_reference(); +t_from_term(T) when is_tuple(T) -> + t_tuple([t_from_term(E) || E <- tuple_to_list(T)]). + +%%----------------------------------------------------------------------------- +%% Integer types from a range. +%%----------------------------------------------------------------------------- + +%%-define(USE_UNSAFE_RANGES, true). + +-spec t_from_range(rng_elem(), rng_elem()) -> erl_type(). + +-ifdef(USE_UNSAFE_RANGES). + +t_from_range(X, Y) -> + t_from_range_unsafe(X, Y). + +-else. + +t_from_range(neg_inf, pos_inf) -> t_integer(); +t_from_range(neg_inf, Y) when is_integer(Y), Y < 0 -> ?integer_neg; +t_from_range(neg_inf, Y) when is_integer(Y), Y >= 0 -> t_integer(); +t_from_range(X, pos_inf) when is_integer(X), X >= 1 -> ?integer_pos; +t_from_range(X, pos_inf) when is_integer(X), X >= 0 -> ?integer_non_neg; +t_from_range(X, pos_inf) when is_integer(X), X < 0 -> t_integer(); +t_from_range(X, Y) when is_integer(X), is_integer(Y), X > Y -> t_none(); +t_from_range(X, Y) when is_integer(X), is_integer(Y) -> + case ((Y - X) < ?SET_LIMIT) of + true -> t_integers(lists:seq(X, Y)); + false -> + case X >= 0 of + false -> + if Y < 0 -> ?integer_neg; + true -> t_integer() + end; + true -> + if Y =< ?MAX_BYTE, X >= 1 -> ?int_range(1, ?MAX_BYTE); + Y =< ?MAX_BYTE -> t_byte(); + Y =< ?MAX_CHAR, X >= 1 -> ?int_range(1, ?MAX_CHAR); + Y =< ?MAX_CHAR -> t_char(); + X >= 1 -> ?integer_pos; + X >= 0 -> ?integer_non_neg + end + end + end; +t_from_range(pos_inf, neg_inf) -> t_none(). + +-endif. + +-spec t_from_range_unsafe(rng_elem(), rng_elem()) -> erl_type(). + +t_from_range_unsafe(neg_inf, pos_inf) -> t_integer(); +t_from_range_unsafe(neg_inf, Y) -> ?int_range(neg_inf, Y); +t_from_range_unsafe(X, pos_inf) -> ?int_range(X, pos_inf); +t_from_range_unsafe(X, Y) when is_integer(X), is_integer(Y), X =< Y -> + if (Y - X) < ?SET_LIMIT -> t_integers(lists:seq(X, Y)); + true -> ?int_range(X, Y) + end; +t_from_range_unsafe(X, Y) when is_integer(X), is_integer(Y) -> t_none(); +t_from_range_unsafe(pos_inf, neg_inf) -> t_none(). + +-spec t_is_fixnum(erl_type()) -> boolean(). + +t_is_fixnum(?int_range(neg_inf, _)) -> false; +t_is_fixnum(?int_range(_, pos_inf)) -> false; +t_is_fixnum(?int_range(From, To)) -> + is_fixnum(From) andalso is_fixnum(To); +t_is_fixnum(?int_set(Set)) -> + is_fixnum(set_min(Set)) andalso is_fixnum(set_max(Set)); +t_is_fixnum(_) -> false. + +-spec is_fixnum(integer()) -> boolean(). + +is_fixnum(N) when is_integer(N) -> + Bits = ?BITS, + (N =< ((1 bsl (Bits - 1)) - 1)) andalso (N >= -(1 bsl (Bits - 1))). + +infinity_geq(pos_inf, _) -> true; +infinity_geq(_, pos_inf) -> false; +infinity_geq(_, neg_inf) -> true; +infinity_geq(neg_inf, _) -> false; +infinity_geq(A, B) -> A >= B. + +-spec t_is_bitwidth(erl_type()) -> boolean(). + +t_is_bitwidth(?int_range(neg_inf, _)) -> false; +t_is_bitwidth(?int_range(_, pos_inf)) -> false; +t_is_bitwidth(?int_range(From, To)) -> + infinity_geq(From, 0) andalso infinity_geq(?BITS, To); +t_is_bitwidth(?int_set(Set)) -> + infinity_geq(set_min(Set), 0) andalso infinity_geq(?BITS, set_max(Set)); +t_is_bitwidth(_) -> false. + +-spec number_min(erl_type()) -> rng_elem(). + +number_min(?int_range(From, _)) -> From; +number_min(?int_set(Set)) -> set_min(Set); +number_min(?number(?any, _Tag)) -> neg_inf. + +-spec number_max(erl_type()) -> rng_elem(). + +number_max(?int_range(_, To)) -> To; +number_max(?int_set(Set)) -> set_max(Set); +number_max(?number(?any, _Tag)) -> pos_inf. + +%% -spec int_range(rgn_elem(), rng_elem()) -> erl_type(). +%% +%% int_range(neg_inf, pos_inf) -> t_integer(); +%% int_range(neg_inf, To) -> ?int_range(neg_inf, To); +%% int_range(From, pos_inf) -> ?int_range(From, pos_inf); +%% int_range(From, To) when From =< To -> t_from_range(From, To); +%% int_range(From, To) when To < From -> ?none. + +in_range(_, ?int_range(neg_inf, pos_inf)) -> true; +in_range(X, ?int_range(From, pos_inf)) -> X >= From; +in_range(X, ?int_range(neg_inf, To)) -> X =< To; +in_range(X, ?int_range(From, To)) -> (X >= From) andalso (X =< To). + +-spec min(rng_elem(), rng_elem()) -> rng_elem(). + +min(neg_inf, _) -> neg_inf; +min(_, neg_inf) -> neg_inf; +min(pos_inf, Y) -> Y; +min(X, pos_inf) -> X; +min(X, Y) when X =< Y -> X; +min(_, Y) -> Y. + +-spec max(rng_elem(), rng_elem()) -> rng_elem(). + +max(neg_inf, Y) -> Y; +max(X, neg_inf) -> X; +max(pos_inf, _) -> pos_inf; +max(_, pos_inf) -> pos_inf; +max(X, Y) when X =< Y -> Y; +max(X, _) -> X. + +expand_range_from_set(Range = ?int_range(From, To), Set) -> + Min = min(set_min(Set), From), + Max = max(set_max(Set), To), + if From =:= Min, To =:= Max -> Range; + true -> t_from_range(Min, Max) + end. + +%%============================================================================= +%% +%% Lattice operations +%% +%%============================================================================= + +%%----------------------------------------------------------------------------- +%% Supremum +%% + +-spec t_sup([erl_type()]) -> erl_type(). + +t_sup([?any|_]) -> + ?any; +t_sup([H1, H2|T]) -> + t_sup([t_sup(H1, H2)|T]); +t_sup([H]) -> + subst_all_vars_to_any(H); +t_sup([]) -> + ?none. + +-spec t_sup(erl_type(), erl_type()) -> erl_type(). + +t_sup(?any, _) -> ?any; +t_sup(_, ?any) -> ?any; +t_sup(?none, T) -> T; +t_sup(T, ?none) -> T; +t_sup(?unit, T) -> T; +t_sup(T, ?unit) -> T; +t_sup(T, T) -> subst_all_vars_to_any(T); +t_sup(?var(_), _) -> ?any; +t_sup(_, ?var(_)) -> ?any; +t_sup(?atom(Set1), ?atom(Set2)) -> + ?atom(set_union(Set1, Set2)); +t_sup(?bitstr(U1, B1), ?bitstr(U2, B2)) -> + t_bitstr(gcd(gcd(U1, U2), abs(B1-B2)), lists:min([B1, B2])); +t_sup(?function(Domain1, Range1), ?function(Domain2, Range2)) -> + %% The domain is either a product or any. + ?function(t_sup(Domain1, Domain2), t_sup(Range1, Range2)); +t_sup(?identifier(Set1), ?identifier(Set2)) -> + ?identifier(set_union(Set1, Set2)); +t_sup(?opaque(Set1), ?opaque(Set2)) -> + ?opaque(set_union_no_limit(Set1, Set2)); +%%Disallow unions with opaque types +%%t_sup(T1=?opaque(_,_,_), T2) -> +%% io:format("Debug: t_sup executed with args ~w and ~w~n",[T1, T2]), ?none; +%%t_sup(T1, T2=?opaque(_,_,_)) -> +%% io:format("Debug: t_sup executed with args ~w and ~w~n",[T1, T2]), ?none; +t_sup(?remote(Set1), ?remote(Set2)) -> + ?remote(set_union_no_limit(Set1, Set2)); +t_sup(?matchstate(Pres1, Slots1), ?matchstate(Pres2, Slots2)) -> + ?matchstate(t_sup(Pres1, Pres2), t_sup(Slots1, Slots2)); +t_sup(?nil, ?nil) -> ?nil; +t_sup(?nil, ?list(Contents, Termination, _)) -> + ?list(Contents, t_sup(?nil, Termination), ?unknown_qual); +t_sup(?list(Contents, Termination, _), ?nil) -> + ?list(Contents, t_sup(?nil, Termination), ?unknown_qual); +t_sup(?list(Contents1, Termination1, Size1), + ?list(Contents2, Termination2, Size2)) -> + NewSize = + case {Size1, Size2} of + {?unknown_qual, ?unknown_qual} -> ?unknown_qual; + {?unknown_qual, ?nonempty_qual} -> ?unknown_qual; + {?nonempty_qual, ?unknown_qual} -> ?unknown_qual; + {?nonempty_qual, ?nonempty_qual} -> ?nonempty_qual + end, + NewContents = t_sup(Contents1, Contents2), + NewTermination = t_sup(Termination1, Termination2), + TmpList = t_cons(NewContents, NewTermination), + case NewSize of + ?nonempty_qual -> TmpList; + ?unknown_qual -> + ?list(FinalContents, FinalTermination, _) = TmpList, + ?list(FinalContents, FinalTermination, ?unknown_qual) + end; +t_sup(?number(_, _), ?number(?any, ?unknown_qual) = T) -> T; +t_sup(?number(?any, ?unknown_qual) = T, ?number(_, _)) -> T; +t_sup(?float, ?float) -> ?float; +t_sup(?float, ?integer(_)) -> t_number(); +t_sup(?integer(_), ?float) -> t_number(); +t_sup(?integer(?any) = T, ?integer(_)) -> T; +t_sup(?integer(_), ?integer(?any) = T) -> T; +t_sup(?int_set(Set1), ?int_set(Set2)) -> + case set_union(Set1, Set2) of + ?any -> + t_from_range(min(set_min(Set1), set_min(Set2)), + max(set_max(Set1), set_max(Set2))); + Set -> ?int_set(Set) + end; +t_sup(?int_range(From1, To1), ?int_range(From2, To2)) -> + t_from_range(min(From1, From2), max(To1, To2)); +t_sup(Range = ?int_range(_, _), ?int_set(Set)) -> + expand_range_from_set(Range, Set); +t_sup(?int_set(Set), Range = ?int_range(_, _)) -> + expand_range_from_set(Range, Set); +t_sup(?product(Types1), ?product(Types2)) -> + L1 = length(Types1), + L2 = length(Types2), + if L1 =:= L2 -> ?product(t_sup_lists(Types1, Types2)); + true -> ?any + end; +t_sup(?product(_), _) -> + ?any; +t_sup(_, ?product(_)) -> + ?any; +t_sup(?tuple(?any, ?any, ?any) = T, ?tuple(_, _, _)) -> T; +t_sup(?tuple(_, _, _), ?tuple(?any, ?any, ?any) = T) -> T; +t_sup(?tuple(?any, ?any, ?any) = T, ?tuple_set(_)) -> T; +t_sup(?tuple_set(_), ?tuple(?any, ?any, ?any) = T) -> T; +t_sup(?tuple(Elements1, Arity, Tag1) = T1, + ?tuple(Elements2, Arity, Tag2) = T2) -> + if Tag1 =:= Tag2 -> t_tuple(t_sup_lists(Elements1, Elements2)); + Tag1 =:= ?any -> t_tuple(t_sup_lists(Elements1, Elements2)); + Tag2 =:= ?any -> t_tuple(t_sup_lists(Elements1, Elements2)); + Tag1 < Tag2 -> ?tuple_set([{Arity, [T1, T2]}]); + Tag1 > Tag2 -> ?tuple_set([{Arity, [T2, T1]}]) + end; +t_sup(?tuple(_, Arity1, _) = T1, ?tuple(_, Arity2, _) = T2) -> + sup_tuple_sets([{Arity1, [T1]}], [{Arity2, [T2]}]); +t_sup(?tuple_set(List1), ?tuple_set(List2)) -> + sup_tuple_sets(List1, List2); +t_sup(?tuple_set(List1), T2 = ?tuple(_, Arity, _)) -> + sup_tuple_sets(List1, [{Arity, [T2]}]); +t_sup(?tuple(_, Arity, _) = T1, ?tuple_set(List2)) -> + sup_tuple_sets([{Arity, [T1]}], List2); +t_sup(T1, T2) -> + ?union(U1) = force_union(T1), + ?union(U2) = force_union(T2), + sup_union(U1, U2). + +-spec t_sup_lists([erl_type()], [erl_type()]) -> [erl_type()]. + +t_sup_lists([T1|Left1], [T2|Left2]) -> + [t_sup(T1, T2)|t_sup_lists(Left1, Left2)]; +t_sup_lists([], []) -> + []. + +sup_tuple_sets(L1, L2) -> + TotalArities = ordsets:union([Arity || {Arity, _} <- L1], + [Arity || {Arity, _} <- L2]), + if length(TotalArities) > ?TUPLE_ARITY_LIMIT -> t_tuple(); + true -> + case sup_tuple_sets(L1, L2, []) of + [{_Arity, [OneTuple = ?tuple(_, _, _)]}] -> OneTuple; + List -> ?tuple_set(List) + end + end. + +sup_tuple_sets([{Arity, Tuples1}|Left1], [{Arity, Tuples2}|Left2], Acc) -> + NewAcc = [{Arity, sup_tuples_in_set(Tuples1, Tuples2)}|Acc], + sup_tuple_sets(Left1, Left2, NewAcc); +sup_tuple_sets([{Arity1, _} = T1|Left1] = L1, + [{Arity2, _} = T2|Left2] = L2, Acc) -> + if Arity1 < Arity2 -> sup_tuple_sets(Left1, L2, [T1|Acc]); + Arity1 > Arity2 -> sup_tuple_sets(L1, Left2, [T2|Acc]) + end; +sup_tuple_sets([], L2, Acc) -> lists:reverse(Acc, L2); +sup_tuple_sets(L1, [], Acc) -> lists:reverse(Acc, L1). + +sup_tuples_in_set([?tuple(_, _, ?any) = T], L) -> + [t_tuple(sup_tuple_elements([T|L]))]; +sup_tuples_in_set(L, [?tuple(_, _, ?any) = T]) -> + [t_tuple(sup_tuple_elements([T|L]))]; +sup_tuples_in_set(L1, L2) -> + FoldFun = fun(?tuple(_, _, Tag), AccTag) -> t_sup(Tag, AccTag) end, + TotalTag0 = lists:foldl(FoldFun, ?none, L1), + TotalTag = lists:foldl(FoldFun, TotalTag0, L2), + case TotalTag of + ?atom(?any) -> + %% We will reach the set limit. Widen now. + [t_tuple(sup_tuple_elements(L1 ++ L2))]; + ?atom(Set) -> + case set_size(Set) > ?TUPLE_TAG_LIMIT of + true -> + %% We will reach the set limit. Widen now. + [t_tuple(sup_tuple_elements(L1 ++ L2))]; + false -> + %% We can go on and build the tuple set. + sup_tuples_in_set(L1, L2, []) + end + end. + +sup_tuple_elements([?tuple(Elements, _, _)|L]) -> + lists:foldl(fun (?tuple(Es, _, _), Acc) -> t_sup_lists(Es, Acc) end, + Elements, L). + +sup_tuples_in_set([?tuple(Elements1, Arity, Tag1) = T1|Left1] = L1, + [?tuple(Elements2, Arity, Tag2) = T2|Left2] = L2, Acc) -> + if + Tag1 < Tag2 -> sup_tuples_in_set(Left1, L2, [T1|Acc]); + Tag1 > Tag2 -> sup_tuples_in_set(L1, Left2, [T2|Acc]); + Tag2 =:= Tag2 -> NewElements = t_sup_lists(Elements1, Elements2), + NewAcc = [?tuple(NewElements, Arity, Tag1)|Acc], + sup_tuples_in_set(Left1, Left2, NewAcc) + end; +sup_tuples_in_set([], L2, Acc) -> lists:reverse(Acc, L2); +sup_tuples_in_set(L1, [], Acc) -> lists:reverse(Acc, L1). + +sup_union(U1, U2) -> + sup_union(U1, U2, 0, []). + +sup_union([?none|Left1], [?none|Left2], N, Acc) -> + sup_union(Left1, Left2, N, [?none|Acc]); +sup_union([T1|Left1], [T2|Left2], N, Acc) -> + sup_union(Left1, Left2, N+1, [t_sup(T1, T2)|Acc]); +sup_union([], [], N, Acc) -> + if N =:= 0 -> ?none; + N =:= 1 -> + [Type] = [T || T <- Acc, T =/= ?none], + Type; + N =:= length(Acc) -> ?any; + true -> ?union(lists:reverse(Acc)) + end. + +force_union(T = ?atom(_)) -> ?atom_union(T); +force_union(T = ?bitstr(_, _)) -> ?bitstr_union(T); +force_union(T = ?function(_, _)) -> ?function_union(T); +force_union(T = ?identifier(_)) -> ?identifier_union(T); +force_union(T = ?list(_, _, _)) -> ?list_union(T); +force_union(T = ?nil) -> ?list_union(T); +force_union(T = ?number(_,_)) -> ?number_union(T); +force_union(T = ?opaque(_)) -> ?opaque_union(T); +force_union(T = ?remote(_)) -> ?remote_union(T); +force_union(T = ?tuple(_, _, _)) -> ?tuple_union(T); +force_union(T = ?tuple_set(_)) -> ?tuple_union(T); +force_union(T = ?matchstate(_, _)) -> ?matchstate_union(T); +force_union(T = ?union(_)) -> T. + +%%----------------------------------------------------------------------------- +%% An attempt to write the inverse operation of t_sup/1 -- XXX: INCOMPLETE !! +%% + +-spec t_elements(erl_type()) -> [erl_type()]. + +t_elements(?none) -> []; +t_elements(?unit) -> []; +t_elements(?any = T) -> [T]; +t_elements(?nil = T) -> [T]; +t_elements(?atom(?any) = T) -> [T]; +t_elements(?atom(Atoms)) -> + [t_atom(A) || A <- Atoms]; +t_elements(?bitstr(_, _) = T) -> [T]; +t_elements(?function(_, _) = T) -> [T]; +t_elements(?identifier(?any) = T) -> [T]; +t_elements(?identifier(IDs)) -> + [?identifier([T]) || T <- IDs]; +t_elements(?list(_, _, _) = T) -> [T]; +t_elements(?number(_, _) = T) -> + case T of + ?number(?any, ?unknown_qual) -> [T]; + ?float -> [T]; + ?integer(?any) -> [T]; + ?int_range(_, _) -> [T]; + ?int_set(Set) -> + [t_integer(I) || I <- Set] + end; +t_elements(?opaque(_) = T) -> [T]; +t_elements(?tuple(_, _, _) = T) -> [T]; +t_elements(?tuple_set(_) = TS) -> + case t_tuple_subtypes(TS) of + unknown -> []; + Elems -> Elems + end; +t_elements(?union(List)) -> + lists:append([t_elements(T) || T <- List]); +t_elements(?var(_)) -> [?any]. %% yes, vars exist -- what else to do here? +%% t_elements(T) -> +%% io:format("T_ELEMENTS => ~p\n", [T]). + +%%----------------------------------------------------------------------------- +%% Infimum +%% + +-spec t_inf([erl_type()]) -> erl_type(). + +t_inf([H1, H2|T]) -> + case t_inf(H1, H2) of + ?none -> ?none; + NewH -> t_inf([NewH|T]) + end; +t_inf([H]) -> H; +t_inf([]) -> ?none. + +-spec t_inf(erl_type(), erl_type()) -> erl_type(). + +t_inf(T1, T2) -> + t_inf(T1, T2, structured). + +-type t_inf_mode() :: 'opaque' | 'structured'. +-spec t_inf(erl_type(), erl_type(), t_inf_mode()) -> erl_type(). + +t_inf(?var(_), ?var(_), _Mode) -> ?any; +t_inf(?var(_), T, _Mode) -> subst_all_vars_to_any(T); +t_inf(T, ?var(_), _Mode) -> subst_all_vars_to_any(T); +t_inf(?any, T, _Mode) -> subst_all_vars_to_any(T); +t_inf(T, ?any, _Mode) -> subst_all_vars_to_any(T); +t_inf(?unit, _, _Mode) -> ?unit; +t_inf(_, ?unit, _Mode) -> ?unit; +t_inf(?none, _, _Mode) -> ?none; +t_inf(_, ?none, _Mode) -> ?none; +t_inf(T, T, _Mode) -> subst_all_vars_to_any(T); +t_inf(?atom(Set1), ?atom(Set2), _) -> + case set_intersection(Set1, Set2) of + ?none -> ?none; + NewSet -> ?atom(NewSet) + end; +t_inf(?bitstr(U1, B1), ?bitstr(0, B2), _Mode) -> + if B2 >= B1 andalso (B2-B1) rem U1 =:= 0 -> t_bitstr(0, B2); + true -> ?none + end; +t_inf(?bitstr(0, B1), ?bitstr(U2, B2), _Mode) -> + if B1 >= B2 andalso (B1-B2) rem U2 =:= 0 -> t_bitstr(0, B1); + true -> ?none + end; +t_inf(?bitstr(U1, B1), ?bitstr(U1, B1), _Mode) -> + t_bitstr(U1, B1); +t_inf(?bitstr(U1, B1), ?bitstr(U2, B2), _Mode) when U2 > U1 -> + inf_bitstr(U2, B2, U1, B1); +t_inf(?bitstr(U1, B1), ?bitstr(U2, B2), _Mode) -> + inf_bitstr(U1, B1, U2, B2); +t_inf(?function(Domain1, Range1), ?function(Domain2, Range2), Mode) -> + case t_inf(Domain1, Domain2, Mode) of + ?none -> ?none; + Domain -> ?function(Domain, t_inf(Range1, Range2, Mode)) + end; +t_inf(?identifier(Set1), ?identifier(Set2), _Mode) -> + case set_intersection(Set1, Set2) of + ?none -> ?none; + Set -> ?identifier(Set) + end; +t_inf(?matchstate(Pres1, Slots1), ?matchstate(Pres2, Slots2), _Mode) -> + ?matchstate(t_inf(Pres1, Pres2), t_inf(Slots1, Slots2)); +t_inf(?nil, ?nil, _Mode) -> ?nil; +t_inf(?nil, ?nonempty_list(_, _), _Mode) -> + ?none; +t_inf(?nonempty_list(_, _), ?nil, _Mode) -> + ?none; +t_inf(?nil, ?list(_Contents, Termination, _), Mode) -> + t_inf(?nil, Termination, Mode); +t_inf(?list(_Contents, Termination, _), ?nil, Mode) -> + t_inf(?nil, Termination, Mode); +t_inf(?list(Contents1, Termination1, Size1), + ?list(Contents2, Termination2, Size2), Mode) -> + case t_inf(Termination1, Termination2, Mode) of + ?none -> ?none; + Termination -> + case t_inf(Contents1, Contents2, Mode) of + ?none -> + %% If none of the lists are nonempty, then the infimum is nil. + case (Size1 =:= ?unknown_qual) andalso (Size2 =:= ?unknown_qual) of + true -> t_nil(); + false -> ?none + end; + Contents -> + Size = + case {Size1, Size2} of + {?unknown_qual, ?unknown_qual} -> ?unknown_qual; + {?unknown_qual, ?nonempty_qual} -> ?nonempty_qual; + {?nonempty_qual, ?unknown_qual} -> ?nonempty_qual; + {?nonempty_qual, ?nonempty_qual} -> ?nonempty_qual + end, + ?list(Contents, Termination, Size) + end + end; +t_inf(?number(_, _) = T1, ?number(_, _) = T2, _Mode) -> + case {T1, T2} of + {T, T} -> T; + {_, ?number(?any, ?unknown_qual)} -> T1; + {?number(?any, ?unknown_qual), _} -> T2; + {?float, ?integer(_)} -> ?none; + {?integer(_), ?float} -> ?none; + {?integer(?any), ?integer(_)} -> T2; + {?integer(_), ?integer(?any)} -> T1; + {?int_set(Set1), ?int_set(Set2)} -> + case set_intersection(Set1, Set2) of + ?none -> ?none; + Set -> ?int_set(Set) + end; + {?int_range(From1, To1), ?int_range(From2, To2)} -> + t_from_range(max(From1, From2), min(To1, To2)); + {Range = ?int_range(_, _), ?int_set(Set)} -> + %% io:format("t_inf range, set args ~p ~p ~n", [T1, T2]), + Ans2 = + case set_filter(fun(X) -> in_range(X, Range) end, Set) of + ?none -> ?none; + NewSet -> ?int_set(NewSet) + end, + %% io:format("Ans2 ~p ~n", [Ans2]), + Ans2; + {?int_set(Set), ?int_range(_, _) = Range} -> + case set_filter(fun(X) -> in_range(X, Range) end, Set) of + ?none -> ?none; + NewSet -> ?int_set(NewSet) + end + end; +t_inf(?product(Types1), ?product(Types2), Mode) -> + L1 = length(Types1), + L2 = length(Types2), + if L1 =:= L2 -> ?product(t_inf_lists(Types1, Types2, Mode)); + true -> ?none + end; +t_inf(?product(_), _, _Mode) -> + ?none; +t_inf(_, ?product(_), _Mode) -> + ?none; +t_inf(?tuple(?any, ?any, ?any), ?tuple(_, _, _) = T, _Mode) -> T; +t_inf(?tuple(_, _, _) = T, ?tuple(?any, ?any, ?any), _Mode) -> T; +t_inf(?tuple(?any, ?any, ?any), ?tuple_set(_) = T, _Mode) -> T; +t_inf(?tuple_set(_) = T, ?tuple(?any, ?any, ?any), _Mode) -> T; +t_inf(?tuple(Elements1, Arity, _Tag1), ?tuple(Elements2, Arity, _Tag2), Mode) -> + case t_inf_lists_strict(Elements1, Elements2, Mode) of + bottom -> ?none; + NewElements -> t_tuple(NewElements) + end; +t_inf(?tuple_set(List1), ?tuple_set(List2), Mode) -> + inf_tuple_sets(List1, List2, Mode); +t_inf(?tuple_set(List), ?tuple(_, Arity, _) = T, Mode) -> + inf_tuple_sets(List, [{Arity, [T]}], Mode); +t_inf(?tuple(_, Arity, _) = T, ?tuple_set(List), Mode) -> + inf_tuple_sets(List, [{Arity, [T]}], Mode); +%% be careful: here and in the next clause T can be ?opaque +t_inf(?union(U1), T, Mode) -> + ?union(U2) = force_union(T), + inf_union(U1, U2, Mode); +t_inf(T, ?union(U2), Mode) -> + ?union(U1) = force_union(T), + inf_union(U1, U2, Mode); +%% and as a result, the cases for ?opaque should appear *after* ?union +t_inf(?opaque(Set1), ?opaque(Set2), _Mode) -> + case set_intersection(Set1, Set2) of + ?none -> ?none; + NewSet -> ?opaque(NewSet) + end; +t_inf(?opaque(_) = T1, T2, opaque) -> + case t_inf(t_opaque_structure(T1), T2, structured) of + ?none -> ?none; + _Type -> T1 + end; +t_inf(T1, ?opaque(_) = T2, opaque) -> + case t_inf(T1, t_opaque_structure(T2), structured) of + ?none -> ?none; + _Type -> T2 + end; +t_inf(#c{}, #c{}, _) -> + ?none. + +-spec t_inf_lists([erl_type()], [erl_type()]) -> [erl_type()]. + +t_inf_lists(L1, L2) -> + t_inf_lists(L1, L2, structured). + +-spec t_inf_lists([erl_type()], [erl_type()], t_inf_mode()) -> [erl_type()]. + +t_inf_lists(L1, L2, Mode) -> + t_inf_lists(L1, L2, [], Mode). + +-spec t_inf_lists([erl_type()], [erl_type()], [erl_type()], t_inf_mode()) -> [erl_type()]. + +t_inf_lists([T1|Left1], [T2|Left2], Acc, Mode) -> + t_inf_lists(Left1, Left2, [t_inf(T1, T2, Mode)|Acc], Mode); +t_inf_lists([], [], Acc, _Mode) -> + lists:reverse(Acc). + +%% Infimum of lists with strictness. +%% If any element is the ?none type, the value 'bottom' is returned. + +-spec t_inf_lists_strict([erl_type()], [erl_type()], t_inf_mode()) -> 'bottom' | [erl_type()]. + +t_inf_lists_strict(L1, L2, Mode) -> + t_inf_lists_strict(L1, L2, [], Mode). + +-spec t_inf_lists_strict([erl_type()], [erl_type()], [erl_type()], t_inf_mode()) -> 'bottom' | [erl_type()]. + +t_inf_lists_strict([T1|Left1], [T2|Left2], Acc, Mode) -> + case t_inf(T1, T2, Mode) of + ?none -> bottom; + T -> t_inf_lists_strict(Left1, Left2, [T|Acc], Mode) + end; +t_inf_lists_strict([], [], Acc, _Mode) -> + lists:reverse(Acc). + +inf_tuple_sets(L1, L2, Mode) -> + case inf_tuple_sets(L1, L2, [], Mode) of + [] -> ?none; + [{_Arity, [?tuple(_, _, _) = OneTuple]}] -> OneTuple; + List -> ?tuple_set(List) + end. + +inf_tuple_sets([{Arity, Tuples1}|Left1], [{Arity, Tuples2}|Left2], Acc, Mode) -> + case inf_tuples_in_sets(Tuples1, Tuples2, Mode) of + [] -> inf_tuple_sets(Left1, Left2, Acc, Mode); + NewTuples -> inf_tuple_sets(Left1, Left2, [{Arity, NewTuples}|Acc], Mode) + end; +inf_tuple_sets(L1 = [{Arity1, _}|Left1], L2 = [{Arity2, _}|Left2], Acc, Mode) -> + if Arity1 < Arity2 -> inf_tuple_sets(Left1, L2, Acc, Mode); + Arity1 > Arity2 -> inf_tuple_sets(L1, Left2, Acc, Mode) + end; +inf_tuple_sets([], _, Acc, _Mode) -> lists:reverse(Acc); +inf_tuple_sets(_, [], Acc, _Mode) -> lists:reverse(Acc). + +inf_tuples_in_sets([?tuple(Elements1, _, ?any)], L2, Mode) -> + NewList = [t_inf_lists_strict(Elements1, Elements2, Mode) + || ?tuple(Elements2, _, _) <- L2], + [t_tuple(Es) || Es <- NewList, Es =/= bottom]; +inf_tuples_in_sets(L1, [?tuple(Elements2, _, ?any)], Mode) -> + NewList = [t_inf_lists_strict(Elements1, Elements2, Mode) + || ?tuple(Elements1, _, _) <- L1], + [t_tuple(Es) || Es <- NewList, Es =/= bottom]; +inf_tuples_in_sets(L1, L2, Mode) -> + inf_tuples_in_sets(L1, L2, [], Mode). + +inf_tuples_in_sets([?tuple(Elements1, Arity, Tag)|Left1], + [?tuple(Elements2, Arity, Tag)|Left2], Acc, Mode) -> + case t_inf_lists_strict(Elements1, Elements2, Mode) of + bottom -> inf_tuples_in_sets(Left1, Left2, Acc, Mode); + NewElements -> + inf_tuples_in_sets(Left1, Left2, [?tuple(NewElements, Arity, Tag)|Acc], Mode) + end; +inf_tuples_in_sets([?tuple(_, _, Tag1)|Left1] = L1, + [?tuple(_, _, Tag2)|Left2] = L2, Acc, Mode) -> + if Tag1 < Tag2 -> inf_tuples_in_sets(Left1, L2, Acc, Mode); + Tag1 > Tag2 -> inf_tuples_in_sets(L1, Left2, Acc, Mode) + end; +inf_tuples_in_sets([], _, Acc, _Mode) -> lists:reverse(Acc); +inf_tuples_in_sets(_, [], Acc, _Mode) -> lists:reverse(Acc). + +inf_union(U1, U2, opaque) -> +%%--------------------------------------------------------------------- +%% Under Testing +%%---------------------------------------------------------------------- +%% OpaqueFun = +%% fun(Union1, Union2) -> +%% [_,_,_,_,_,_,_,_,Opaque,_] = Union1, +%% [A,B,F,I,L,N,T,M,_,_R] = Union2, +%% List = [A,B,F,I,L,N,T,M], +%% case [T || T <- List, t_inf(T, Opaque, opaque) =/= ?none] of +%% [] -> ?none; +%% _ -> Opaque +%% end +%% end, +%% O1 = OpaqueFun(U1, U2), +%% O2 = OpaqueFun(U2, U1), +%% Union = inf_union(U1, U2, 0, [], opaque), +%% t_sup([O1, O2, Union]); + inf_union(U1, U2, 0, [], opaque); +inf_union(U1, U2, OtherMode) -> + inf_union(U1, U2, 0, [], OtherMode). + +inf_union([?none|Left1], [?none|Left2], N, Acc, Mode) -> + inf_union(Left1, Left2, N, [?none|Acc], Mode); +inf_union([T1|Left1], [T2|Left2], N, Acc, Mode) -> + case t_inf(T1, T2, Mode) of + ?none -> inf_union(Left1, Left2, N, [?none|Acc], Mode); + T -> inf_union(Left1, Left2, N+1, [T|Acc], Mode) + end; +inf_union([], [], N, Acc, _Mode) -> + if N =:= 0 -> ?none; + N =:= 1 -> + [Type] = [T || T <- Acc, T =/= ?none], + Type; + N >= 2 -> ?union(lists:reverse(Acc)) + end. + +inf_bitstr(U1, B1, U2, B2) -> + GCD = gcd(U1, U2), + case (B2-B1) rem GCD of + 0 -> + U = (U1*U2) div GCD, + B = findfirst(0, 0, U1, B1, U2, B2), + t_bitstr(U, B); + _ -> + ?none + end. + +findfirst(N1, N2, U1, B1, U2, B2) -> + Val1 = U1*N1+B1, + Val2 = U2*N2+B2, + if Val1 =:= Val2 -> + Val1; + Val1 > Val2 -> + findfirst(N1, N2+1, U1, B1, U2, B2); + Val1 < Val2 -> + findfirst(N1+1, N2, U1, B1, U2, B2) + end. + +%%----------------------------------------------------------------------------- +%% Substitution of variables +%% + +-spec t_subst(erl_type(), dict()) -> erl_type(). + +t_subst(T, Dict) -> + case t_has_var(T) of + true -> t_subst(T, Dict, fun(X) -> X end); + false -> T + end. + +-spec subst_all_vars_to_any(erl_type()) -> erl_type(). + +subst_all_vars_to_any(T) -> + case t_has_var(T) of + true -> t_subst(T, dict:new(), fun(_) -> ?any end); + false -> T + end. + +t_subst(?var(Id) = V, Dict, Fun) -> + case dict:find(Id, Dict) of + error -> Fun(V); + {ok, Type} -> Type + end; +t_subst(?list(Contents, Termination, Size), Dict, Fun) -> + case t_subst(Contents, Dict, Fun) of + ?none -> ?none; + NewContents -> + %% Be careful here to make the termination collapse if necessary. + case t_subst(Termination, Dict, Fun) of + ?nil -> ?list(NewContents, ?nil, Size); + ?any -> ?list(NewContents, ?any, Size); + Other -> + ?list(NewContents, NewTermination, _) = t_cons(NewContents, Other), + ?list(NewContents, NewTermination, Size) + end + end; +t_subst(?function(Domain, Range), Dict, Fun) -> + ?function(t_subst(Domain, Dict, Fun), t_subst(Range, Dict, Fun)); +t_subst(?product(Types), Dict, Fun) -> + ?product([t_subst(T, Dict, Fun) || T <- Types]); +t_subst(?tuple(?any, ?any, ?any) = T, _Dict, _Fun) -> + T; +t_subst(?tuple(Elements, _Arity, _Tag), Dict, Fun) -> + t_tuple([t_subst(E, Dict, Fun) || E <- Elements]); +t_subst(?tuple_set(_) = TS, Dict, Fun) -> + t_sup([t_subst(T, Dict, Fun) || T <- t_tuple_subtypes(TS)]); +t_subst(T, _Dict, _Fun) -> + T. + +%%----------------------------------------------------------------------------- +%% Unification +%% + +-spec t_unify(erl_type(), erl_type()) -> {erl_type(), [{_, erl_type()}]}. + +t_unify(T1, T2) -> + {T, Dict} = t_unify(T1, T2, dict:new()), + {t_subst(T, Dict), lists:keysort(1, dict:to_list(Dict))}. + +t_unify(?var(Id) = T, ?var(Id), Dict) -> + {T, Dict}; +t_unify(?var(Id1) = T, ?var(Id2), Dict) -> + case dict:find(Id1, Dict) of + error -> + case dict:find(Id2, Dict) of + error -> {T, dict:store(Id2, T, Dict)}; + {ok, Type} -> {Type, t_unify(T, Type, Dict)} + end; + {ok, Type1} -> + case dict:find(Id2, Dict) of + error -> {Type1, dict:store(Id2, T, Dict)}; + {ok, Type2} -> t_unify(Type1, Type2, Dict) + end + end; +t_unify(?var(Id), Type, Dict) -> + case dict:find(Id, Dict) of + error -> {Type, dict:store(Id, Type, Dict)}; + {ok, VarType} -> t_unify(VarType, Type, Dict) + end; +t_unify(Type, ?var(Id), Dict) -> + case dict:find(Id, Dict) of + error -> {Type, dict:store(Id, Type, Dict)}; + {ok, VarType} -> t_unify(VarType, Type, Dict) + end; +t_unify(?function(Domain1, Range1), ?function(Domain2, Range2), Dict) -> + {Domain, Dict1} = t_unify(Domain1, Domain2, Dict), + {Range, Dict2} = t_unify(Range1, Range2, Dict1), + {?function(Domain, Range), Dict2}; +t_unify(?list(Contents1, Termination1, Size), + ?list(Contents2, Termination2, Size), Dict) -> + {Contents, Dict1} = t_unify(Contents1, Contents2, Dict), + {Termination, Dict2} = t_unify(Termination1, Termination2, Dict1), + {?list(Contents, Termination, Size), Dict2}; +t_unify(?product(Types1), ?product(Types2), Dict) -> + {Types, Dict1} = unify_lists(Types1, Types2, Dict), + {?product(Types), Dict1}; +t_unify(?tuple(?any, ?any, ?any) = T, ?tuple(?any, ?any, ?any), Dict) -> + {T, Dict}; +t_unify(?tuple(Elements1, Arity, _), + ?tuple(Elements2, Arity, _), Dict) when Arity =/= ?any -> + {NewElements, Dict1} = unify_lists(Elements1, Elements2, Dict), + {t_tuple(NewElements), Dict1}; +t_unify(?tuple_set([{Arity, _}]) = T1, + ?tuple(_, Arity, _) = T2, Dict) when Arity =/= ?any -> + unify_tuple_set_and_tuple(T1, T2, Dict); +t_unify(?tuple(_, Arity, _) = T1, + ?tuple_set([{Arity, _}]) = T2, Dict) when Arity =/= ?any -> + unify_tuple_set_and_tuple(T2, T1, Dict); +t_unify(?tuple_set(List1), ?tuple_set(List2), Dict) -> + {Tuples, NewDict} = + unify_lists(lists:append([T || {_Arity, T} <- List1]), + lists:append([T || {_Arity, T} <- List2]), Dict), + {t_sup(Tuples), NewDict}; +t_unify(T, T, Dict) -> + {T, Dict}; +t_unify(T1, T2, _) -> + throw({mismatch, T1, T2}). + +unify_tuple_set_and_tuple(?tuple_set([{Arity, List}]), + ?tuple(Elements2, Arity, _), Dict) -> + %% Can only work if the single tuple has variables at correct places. + %% Collapse the tuple set. + {NewElements, Dict1} = unify_lists(sup_tuple_elements(List), Elements2, Dict), + {t_tuple(NewElements), Dict1}. + +unify_lists(L1, L2, Dict) -> + unify_lists(L1, L2, Dict, []). + +unify_lists([T1|Left1], [T2|Left2], Dict, Acc) -> + {NewT, NewDict} = t_unify(T1, T2, Dict), + unify_lists(Left1, Left2, NewDict, [NewT|Acc]); +unify_lists([], [], Dict, Acc) -> + {lists:reverse(Acc), Dict}. + +%%t_assign_variables_to_subtype(T1, T2) -> +%% try +%% Dict = assign_vars(T1, T2, dict:new()), +%% {ok, dict:map(fun(_Param, List) -> t_sup(List) end, Dict)} +%% catch +%% throw:error -> error +%% end. + +%%assign_vars(_, ?var(_), _Dict) -> +%% erlang:error("Variable in right hand side of assignment"); +%%assign_vars(?any, _, Dict) -> +%% Dict; +%%assign_vars(?var(_) = Var, Type, Dict) -> +%% store_var(Var, Type, Dict); +%%assign_vars(?function(Domain1, Range1), ?function(Domain2, Range2), Dict) -> +%% DomainList = +%% case Domain2 of +%% ?any -> []; +%% ?product(List) -> List +%% end, +%% case any_none([Range2|DomainList]) of +%% true -> throw(error); +%% false -> +%% Dict1 = assign_vars(Domain1, Domain2, Dict), +%% assign_vars(Range1, Range2, Dict1) +%% end; +%%assign_vars(?list(_Contents, _Termination, ?any), ?nil, Dict) -> +%% Dict; +%%assign_vars(?list(Contents1, Termination1, Size1), +%% ?list(Contents2, Termination2, Size2), Dict) -> +%% Dict1 = assign_vars(Contents1, Contents2, Dict), +%% Dict2 = assign_vars(Termination1, Termination2, Dict1), +%% case {Size1, Size2} of +%% {S, S} -> Dict2; +%% {?any, ?nonempty_qual} -> Dict2; +%% {_, _} -> throw(error) +%% end; +%%assign_vars(?product(Types1), ?product(Types2), Dict) -> +%% case length(Types1) =:= length(Types2) of +%% true -> assign_vars_lists(Types1, Types2, Dict); +%% false -> throw(error) +%% end; +%%assign_vars(?tuple(?any, ?any, ?any), ?tuple(?any, ?any, ?any), Dict) -> +%% Dict; +%%assign_vars(?tuple(?any, ?any, ?any), ?tuple(_, _, _), Dict) -> +%% Dict; +%%assign_vars(?tuple(Elements1, Arity, _), +%% ?tuple(Elements2, Arity, _), Dict) when Arity =/= ?any -> +%% assign_vars_lists(Elements1, Elements2, Dict); +%%assign_vars(?tuple_set(_) = T, ?tuple_set(List2), Dict) -> +%% %% All Rhs tuples must already be subtypes of Lhs, so we can take +%% %% each one separatly. +%% assign_vars_lists([T || _ <- List2], List2, Dict); +%%assign_vars(?tuple(?any, ?any, ?any), ?tuple_set(_), Dict) -> +%% Dict; +%%assign_vars(?tuple(_, Arity, _) = T1, ?tuple_set(List), Dict) -> +%% case reduce_tuple_tags(List) of +%% [Tuple = ?tuple(_, Arity, _)] -> assign_vars(T1, Tuple, Dict); +%% _ -> throw(error) +%% end; +%%assign_vars(?tuple_set(List), ?tuple(_, Arity, Tag) = T2, Dict) -> +%% case [T || ?tuple(_, Arity1, Tag1) = T <- List, +%% Arity1 =:= Arity, Tag1 =:= Tag] of +%% [] -> throw(error); +%% [T1] -> assign_vars(T1, T2, Dict) +%% end; +%%assign_vars(?union(U1), T2, Dict) -> +%% ?union(U2) = force_union(T2), +%% assign_vars_lists(U1, U2, Dict); +%%assign_vars(T, T, Dict) -> +%% Dict; +%%assign_vars(T1, T2, Dict) -> +%% case t_is_subtype(T2, T1) of +%% false -> throw(error); +%% true -> Dict +%% end. + +%%assign_vars_lists([T1|Left1], [T2|Left2], Dict) -> +%% assign_vars_lists(Left1, Left2, assign_vars(T1, T2, Dict)); +%%assign_vars_lists([], [], Dict) -> +%% Dict. + +%%store_var(?var(Id), Type, Dict) -> +%% case dict:find(Id, Dict) of +%% error -> dict:store(Id, [Type], Dict); +%% {ok, _VarType0} -> dict:update(Id, fun(X) -> [Type|X] end, Dict) +%% end. + +%%----------------------------------------------------------------------------- +%% Subtraction. +%% +%% Note that the subtraction is an approximation since we do not have +%% negative types. Also, tuples and products should be handled using +%% the cartesian product of the elements, but this is not feasible to +%% do. +%% +%% Example: {a|b,c|d}\{a,d} = {a,c}|{a,d}|{b,c}|{b,d} \ {a,d} = +%% = {a,c}|{b,c}|{b,d} = {a|b,c|d} +%% +%% Instead, we can subtract if all elements but one becomes none after +%% subtracting element-wise. +%% +%% Example: {a|b,c|d}\{a|b,d} = {a,c}|{a,d}|{b,c}|{b,d} \ {a,d}|{b,d} = +%% = {a,c}|{b,c} = {a|b,c} + +-spec t_subtract_list(erl_type(), [erl_type()]) -> erl_type(). + +t_subtract_list(T1, [T2|Left]) -> + t_subtract_list(t_subtract(T1, T2), Left); +t_subtract_list(T, []) -> + T. + +-spec t_subtract(erl_type(), erl_type()) -> erl_type(). + +t_subtract(_, ?any) -> ?none; +t_subtract(?any, _) -> ?any; +t_subtract(T, ?unit) -> T; +t_subtract(?unit, _) -> ?unit; +t_subtract(?none, _) -> ?none; +t_subtract(T, ?none) -> T; +t_subtract(?atom(Set1), ?atom(Set2)) -> + case set_subtract(Set1, Set2) of + ?none -> ?none; + Set -> ?atom(Set) + end; +t_subtract(?bitstr(U1, B1), ?bitstr(U2, B2)) -> + subtract_bin(t_bitstr(U1, B1), t_inf(t_bitstr(U1, B1), t_bitstr(U2, B2))); +t_subtract(?function(_, _) = T1, ?function(_, _) = T2) -> + case t_is_subtype(T1, T2) of + true -> ?none; + false -> T1 + end; +t_subtract(?identifier(Set1), ?identifier(Set2)) -> + case set_subtract(Set1, Set2) of + ?none -> ?none; + Set -> ?identifier(Set) + end; +t_subtract(?opaque(Set1), ?opaque(Set2)) -> + case set_subtract(Set1, Set2) of + ?none -> ?none; + Set -> ?opaque(Set) + end; +t_subtract(?matchstate(Pres1, Slots1), ?matchstate(Pres2, _Slots2)) -> + Pres = t_subtract(Pres1,Pres2), + case t_is_none(Pres) of + true -> ?none; + false -> ?matchstate(Pres,Slots1) + end; +t_subtract(?matchstate(Present,Slots),_) -> + ?matchstate(Present,Slots); +t_subtract(?nil, ?nil) -> + ?none; +t_subtract(?nil, ?nonempty_list(_, _)) -> + ?nil; +t_subtract(?nil, ?list(_, _, _)) -> + ?none; +t_subtract(?list(Contents, Termination, _Size) = T, ?nil) -> + case Termination =:= ?nil of + true -> ?nonempty_list(Contents, Termination); + false -> T + end; +t_subtract(?list(Contents1, Termination1, Size1) = T, + ?list(Contents2, Termination2, Size2)) -> + case t_is_subtype(Contents1, Contents2) of + true -> + case t_is_subtype(Termination1, Termination2) of + true -> + case {Size1, Size2} of + {?nonempty_qual, ?unknown_qual} -> ?none; + {?unknown_qual, ?nonempty_qual} -> Termination1; + {S, S} -> ?none + end; + false -> + %% If the termination is not covered by the subtracted type + %% we cannot really say anything about the result. + T + end; + false -> + %% All contents must be covered if there is going to be any + %% change to the list. + T + end; +t_subtract(?float, ?float) -> ?none; +t_subtract(?number(_, _) = T1, ?float) -> t_inf(T1, t_integer()); +t_subtract(?float, ?number(_Set, Tag)) -> + case Tag of + ?unknown_qual -> ?none; + _ -> ?float + end; +t_subtract(?number(_, _), ?number(?any, ?unknown_qual)) -> ?none; +t_subtract(?number(_, _) = T1, ?integer(?any)) -> t_inf(?float, T1); +t_subtract(?int_set(Set1), ?int_set(Set2)) -> + case set_subtract(Set1, Set2) of + ?none -> ?none; + Set -> ?int_set(Set) + end; +t_subtract(?int_range(From1, To1) = T1, ?int_range(_, _) = T2) -> + case t_inf(T1, T2) of + ?none -> T1; + ?int_range(From1, To1) -> ?none; + ?int_range(neg_inf, To) -> t_from_range(To + 1, To1); + ?int_range(From, pos_inf) -> t_from_range(From1, From - 1); + ?int_range(From, To) -> t_sup(t_from_range(From1, From - 1), + t_from_range(To + 1, To)) + end; +t_subtract(?int_range(From, To) = T1, ?int_set(Set)) -> + NewFrom = case set_is_element(From, Set) of + true -> From + 1; + false -> From + end, + NewTo = case set_is_element(To, Set) of + true -> To - 1; + false -> To + end, + if (NewFrom =:= From) and (NewTo =:= To) -> T1; + true -> t_from_range(NewFrom, NewTo) + end; +t_subtract(?int_set(Set), ?int_range(From, To)) -> + case set_filter(fun(X) -> not ((X =< From) orelse (X >= To)) end, Set) of + ?none -> ?none; + NewSet -> ?int_set(NewSet) + end; +t_subtract(?integer(?any) = T1, ?integer(_)) -> T1; +t_subtract(?number(_, _) = T1, ?number(_, _)) -> T1; +t_subtract(?tuple(_, _, _), ?tuple(?any, ?any, ?any)) -> ?none; +t_subtract(?tuple_set(_), ?tuple(?any, ?any, ?any)) -> ?none; +t_subtract(?tuple(?any, ?any, ?any) = T1, ?tuple_set(_)) -> T1; +t_subtract(?tuple(Elements1, Arity1, _Tag1) = T1, + ?tuple(Elements2, Arity2, _Tag2)) -> + if Arity1 =/= Arity2 -> T1; + Arity1 =:= Arity2 -> + NewElements = t_subtract_lists(Elements1, Elements2), + case [E || E <- NewElements, E =/= ?none] of + [] -> ?none; + [_] -> t_tuple(replace_nontrivial_element(Elements1, NewElements)); + _ -> T1 + end + end; +t_subtract(?tuple_set(List1) = T1, ?tuple(_, Arity, _) = T2) -> + case orddict:find(Arity, List1) of + error -> T1; + {ok, List2} -> + TuplesLeft0 = [Tuple || {_Arity, Tuple} <- orddict:erase(Arity, List1)], + TuplesLeft1 = lists:append(TuplesLeft0), + t_sup([t_subtract(L, T2) || L <- List2] ++ TuplesLeft1) + end; +t_subtract(?tuple(_, Arity, _) = T1, ?tuple_set(List1)) -> + case orddict:find(Arity, List1) of + error -> T1; + {ok, List2} -> t_inf([t_subtract(T1, L) || L <- List2]) + end; +t_subtract(?tuple_set(_) = T1, ?tuple_set(_) = T2) -> + t_sup([t_subtract(T, T2) || T <- t_tuple_subtypes(T1)]); +t_subtract(?product(Elements1) = T1, ?product(Elements2)) -> + Arity1 = length(Elements1), + Arity2 = length(Elements2), + if Arity1 =/= Arity2 -> T1; + Arity1 =:= Arity2 -> + NewElements = t_subtract_lists(Elements1, Elements2), + case [E || E <- NewElements, E =/= ?none] of + [] -> ?none; + [_] -> t_product(replace_nontrivial_element(Elements1, NewElements)); + _ -> T1 + end + end; +t_subtract(?product(P1), _) -> + ?product(P1); +t_subtract(T, ?product(_)) -> + T; +t_subtract(?union(U1), ?union(U2)) -> + subtract_union(U1, U2); +t_subtract(T1, T2) -> + ?union(U1) = force_union(T1), + ?union(U2) = force_union(T2), + subtract_union(U1, U2). + +-spec t_subtract_lists([erl_type()], [erl_type()]) -> [erl_type()]. + +t_subtract_lists(L1, L2) -> + t_subtract_lists(L1, L2, []). + +-spec t_subtract_lists([erl_type()], [erl_type()], [erl_type()]) -> [erl_type()]. + +t_subtract_lists([T1|Left1], [T2|Left2], Acc) -> + t_subtract_lists(Left1, Left2, [t_subtract(T1, T2)|Acc]); +t_subtract_lists([], [], Acc) -> + lists:reverse(Acc). + +-spec subtract_union([erl_type(),...], [erl_type(),...]) -> erl_type(). + +subtract_union(U1, U2) -> + subtract_union(U1, U2, 0, []). + +-spec subtract_union([erl_type()], [erl_type()], non_neg_integer(), [erl_type()]) -> erl_type(). + +subtract_union([T1|Left1], [T2|Left2], N, Acc) -> + case t_subtract(T1, T2) of + ?none -> subtract_union(Left1, Left2, N, [?none|Acc]); + T -> subtract_union(Left1, Left2, N+1, [T|Acc]) + end; +subtract_union([], [], 0, _Acc) -> + ?none; +subtract_union([], [], 1, Acc) -> + [T] = [X || X <- Acc, X =/= ?none], + T; +subtract_union([], [], N, Acc) when is_integer(N), N > 1 -> + ?union(lists:reverse(Acc)). + +replace_nontrivial_element(El1, El2) -> + replace_nontrivial_element(El1, El2, []). + +replace_nontrivial_element([T1|Left1], [?none|Left2], Acc) -> + replace_nontrivial_element(Left1, Left2, [T1|Acc]); +replace_nontrivial_element([_|Left1], [T2|_], Acc) -> + lists:reverse(Acc) ++ [T2|Left1]. + +subtract_bin(?bitstr(U1, B1), ?bitstr(U1, B1)) -> + ?none; +subtract_bin(?bitstr(U1, B1), ?none) -> + t_bitstr(U1, B1); +subtract_bin(?bitstr(U1, B1), ?bitstr(0, B1)) -> + t_bitstr(U1, B1+U1); +subtract_bin(?bitstr(U1, B1), ?bitstr(U1, B2)) -> + if (B1+U1) =/= B2 -> t_bitstr(0, B1); + true -> t_bitstr(U1, B1) + end; +subtract_bin(?bitstr(U1, B1), ?bitstr(U2, B2)) -> + if (2 * U1) =:= U2 -> + if B1 =:= B2 -> + t_bitstr(U2, B1+U1); + (B1 + U1) =:= B2 -> + t_bitstr(U2, B1); + true -> + t_bitstr(U1, B1) + end; + true -> + t_bitstr(U1, B1) + end. + +%%----------------------------------------------------------------------------- +%% Relations +%% + +-spec t_is_equal(erl_type(), erl_type()) -> boolean(). + +t_is_equal(T, T) -> true; +t_is_equal(_, _) -> false. + +-spec t_is_subtype(erl_type(), erl_type()) -> boolean(). + +t_is_subtype(T1, T2) -> + Inf = t_inf(T1, T2), + t_is_equal(T1, Inf). + +-spec t_is_instance(erl_type(), erl_type()) -> boolean(). + +t_is_instance(ConcreteType, Type) -> + t_is_subtype(ConcreteType, t_unopaque(Type)). + +-spec t_unopaque(erl_type()) -> erl_type(). + +t_unopaque(T) -> + t_unopaque(T, 'universe'). + +-spec t_unopaque(erl_type(), 'universe' | [erl_type()]) -> erl_type(). + +t_unopaque(?opaque(_) = T, Opaques) -> + case Opaques =:= universe orelse lists:member(T, Opaques) of + true -> t_unopaque(t_opaque_structure(T), Opaques); + false -> T % XXX: needs revision for parametric opaque data types + end; +t_unopaque(?list(ElemT, Termination, Sz), Opaques) -> + ?list(t_unopaque(ElemT, Opaques), Termination, Sz); +t_unopaque(?tuple(?any, _, _) = T, _) -> T; +t_unopaque(?tuple(ArgTs, Sz, Tag), Opaques) when is_list(ArgTs) -> + NewArgTs = [t_unopaque(A, Opaques) || A <- ArgTs], + ?tuple(NewArgTs, Sz, Tag); +t_unopaque(?tuple_set(Set), Opaques) -> + NewSet = [{Sz, [t_unopaque(T, Opaques) || T <- Tuples]} + || {Sz, Tuples} <- Set], + ?tuple_set(NewSet); +t_unopaque(?union([A,B,F,I,L,N,T,M,O,R]), Opaques) -> + UL = t_unopaque(L, Opaques), + UT = t_unopaque(T, Opaques), + UO = case O of + ?none -> []; + ?opaque(Os) -> [t_unopaque(S, Opaques) || #opaque{struct = S} <- Os] + end, + t_sup([?union([A,B,F,I,UL,N,UT,M,?none,R])|UO]); +t_unopaque(T, _) -> + T. + +%%----------------------------------------------------------------------------- +%% K-depth abstraction. +%% +%% t_limit/2 is the exported function, which checks the type of the +%% second argument and calls the module local t_limit_k/2 function. +%% + +-spec t_limit(erl_type(), integer()) -> erl_type(). + +t_limit(Term, K) when is_integer(K) -> + t_limit_k(Term, K). + +t_limit_k(_, K) when K =< 0 -> ?any; +t_limit_k(?tuple(?any, ?any, ?any) = T, _K) -> T; +t_limit_k(?tuple(Elements, Arity, _), K) -> + if K =:= 1 -> t_tuple(Arity); + true -> t_tuple([t_limit_k(E, K-1) || E <- Elements]) + end; +t_limit_k(?tuple_set(_) = T, K) -> + t_sup([t_limit_k(Tuple, K) || Tuple <- t_tuple_subtypes(T)]); +t_limit_k(?list(Elements, Termination, Size), K) -> + NewTermination = + if K =:= 1 -> + %% We do not want to lose the termination information. + t_limit_k(Termination, K); + true -> t_limit_k(Termination, K - 1) + end, + NewElements = t_limit_k(Elements, K - 1), + TmpList = t_cons(NewElements, NewTermination), + case Size of + ?nonempty_qual -> TmpList; + ?unknown_qual -> + ?list(NewElements1, NewTermination1, _) = TmpList, + ?list(NewElements1, NewTermination1, ?unknown_qual) + end; +t_limit_k(?function(Domain, Range), K) -> + %% The domain is either a product or any() so we do not decrease the K. + ?function(t_limit_k(Domain, K), t_limit_k(Range, K-1)); +t_limit_k(?product(Elements), K) -> + ?product([t_limit_k(X, K - 1) || X <- Elements]); +t_limit_k(?union(Elements), K) -> + ?union([t_limit_k(X, K) || X <- Elements]); +t_limit_k(T, _K) -> T. + +%%============================================================================ +%% +%% Abstract records. Used for comparing contracts. +%% +%%============================================================================ + +-spec t_abstract_records(erl_type(), dict()) -> erl_type(). + +t_abstract_records(?list(Contents, Termination, Size), RecDict) -> + case t_abstract_records(Contents, RecDict) of + ?none -> ?none; + NewContents -> + %% Be careful here to make the termination collapse if necessary. + case t_abstract_records(Termination, RecDict) of + ?nil -> ?list(NewContents, ?nil, Size); + ?any -> ?list(NewContents, ?any, Size); + Other -> + ?list(NewContents, NewTermination, _) = t_cons(NewContents, Other), + ?list(NewContents, NewTermination, Size) + end + end; +t_abstract_records(?function(Domain, Range), RecDict) -> + ?function(t_abstract_records(Domain, RecDict), + t_abstract_records(Range, RecDict)); +t_abstract_records(?product(Types), RecDict) -> + ?product([t_abstract_records(T, RecDict) || T <- Types]); +t_abstract_records(?union(Types), RecDict) -> + t_sup([t_abstract_records(T, RecDict) || T <- Types]); +t_abstract_records(?tuple(?any, ?any, ?any) = T, _RecDict) -> + T; +t_abstract_records(?tuple(Elements, Arity, ?atom(_) = Tag), RecDict) -> + [TagAtom] = t_atom_vals(Tag), + case lookup_record(TagAtom, Arity - 1, RecDict) of + error -> t_tuple([t_abstract_records(E, RecDict) || E <- Elements]); + {ok, Fields} -> t_tuple([Tag|[T || {_Name, T} <- Fields]]) + end; +t_abstract_records(?tuple(Elements, _Arity, _Tag), RecDict) -> + t_tuple([t_abstract_records(E, RecDict) || E <- Elements]); +t_abstract_records(?tuple_set(_) = Tuples, RecDict) -> + t_sup([t_abstract_records(T, RecDict) || T <- t_tuple_subtypes(Tuples)]); +t_abstract_records(T, _RecDict) -> + T. + +%% Map over types. Depth first. Used by the contract checker. ?list is +%% not fully implemented so take care when changing the type in Termination. + +-spec t_map(fun((erl_type()) -> erl_type()), erl_type()) -> erl_type(). + +t_map(Fun, ?list(Contents, Termination, Size)) -> + Fun(?list(t_map(Fun, Contents), t_map(Fun, Termination), Size)); +t_map(Fun, ?function(Domain, Range)) -> + Fun(?function(t_map(Fun, Domain), t_map(Fun, Range))); +t_map(Fun, ?product(Types)) -> + Fun(?product([t_map(Fun, T) || T <- Types])); +t_map(Fun, ?union(Types)) -> + Fun(t_sup([t_map(Fun, T) || T <- Types])); +t_map(Fun, ?tuple(?any, ?any, ?any) = T) -> + Fun(T); +t_map(Fun, ?tuple(Elements, _Arity, _Tag)) -> + Fun(t_tuple([t_map(Fun, E) || E <- Elements])); +t_map(Fun, ?tuple_set(_) = Tuples) -> + Fun(t_sup([t_map(Fun, T) || T <- t_tuple_subtypes(Tuples)])); +t_map(Fun, T) -> + Fun(T). + +%%============================================================================= +%% +%% Prettyprinter +%% +%%============================================================================= + +-spec t_to_string(erl_type()) -> string(). + +t_to_string(T) -> + t_to_string(T, dict:new()). + +-spec t_to_string(erl_type(), dict()) -> string(). + +t_to_string(?any, _RecDict) -> + "any()"; +t_to_string(?none, _RecDict) -> + "none()"; +t_to_string(?unit, _RecDict) -> + "no_return()"; +t_to_string(?atom(?any), _RecDict) -> + "atom()"; +t_to_string(?atom(Set), _RecDict) -> + case set_size(Set) of + 2 -> + case set_is_element(true, Set) andalso set_is_element(false, Set) of + true -> "boolean()"; + false -> set_to_string(Set) + end; + _ -> + set_to_string(Set) + end; +t_to_string(?bitstr(8, 0), _RecDict) -> + "binary()"; +t_to_string(?bitstr(0, 0), _RecDict) -> + "<<>>"; +t_to_string(?bitstr(0, B), _RecDict) -> + io_lib:format("<<_:~w>>", [B]); +t_to_string(?bitstr(U, 0), _RecDict) -> + io_lib:format("<<_:_*~w>>", [U]); +t_to_string(?bitstr(U, B), _RecDict) -> + io_lib:format("<<_:~w,_:_*~w>>", [B, U]); +t_to_string(?function(?any, ?any), _RecDict) -> + "fun()"; +t_to_string(?function(?any, Range), RecDict) -> + "fun((...) -> " ++ t_to_string(Range, RecDict) ++ ")"; +t_to_string(?function(?product(ArgList), Range), RecDict) -> + "fun((" ++ comma_sequence(ArgList, RecDict) ++ ") -> " + ++ t_to_string(Range, RecDict) ++ ")"; +t_to_string(?identifier(Set), _RecDict) -> + if Set =:= ?any -> "identifier()"; + true -> sequence([io_lib:format("~w()", [T]) + || T <- set_to_list(Set)], [], " | ") + end; +t_to_string(?opaque(Set), _RecDict) -> + sequence([case is_opaque_builtin(Mod, Name) of + true -> io_lib:format("~w()", [Name]); + false -> io_lib:format("~w:~w()", [Mod, Name]) + end + || #opaque{mod = Mod, name = Name} <- set_to_list(Set)], [], " | "); +t_to_string(?matchstate(Pres, Slots), RecDict) -> + io_lib:format("ms(~s,~s)", [t_to_string(Pres, RecDict), + t_to_string(Slots,RecDict)]); +t_to_string(?nil, _RecDict) -> + "[]"; +t_to_string(?nonempty_list(Contents, Termination), RecDict) -> + ContentString = t_to_string(Contents, RecDict), + case Termination of + ?nil -> + case Contents of + ?char -> "nonempty_string()"; + _ -> "["++ContentString++",...]" + end; + ?any -> + %% Just a safety check. + case Contents =:= ?any of + true -> ok; + false -> + erlang:error({illegal_list, ?nonempty_list(Contents, Termination)}) + end, + "nonempty_maybe_improper_list()"; + _ -> + case t_is_subtype(t_nil(), Termination) of + true -> + "nonempty_maybe_improper_list("++ContentString++"," + ++t_to_string(Termination, RecDict)++")"; + false -> + "nonempty_improper_list("++ContentString++"," + ++t_to_string(Termination, RecDict)++")" + end + end; +t_to_string(?list(Contents, Termination, ?unknown_qual), RecDict) -> + ContentString = t_to_string(Contents, RecDict), + case Termination of + ?nil -> + case Contents of + ?char -> "string()"; + _ -> "["++ContentString++"]" + end; + ?any -> + %% Just a safety check. + case Contents =:= ?any of + true -> ok; + false -> + L = ?list(Contents, Termination, ?unknown_qual), + erlang:error({illegal_list, L}) + end, + "maybe_improper_list()"; + _ -> + case t_is_subtype(t_nil(), Termination) of + true -> + "maybe_improper_list("++ContentString++"," + ++t_to_string(Termination, RecDict)++")"; + false -> + "improper_list("++ContentString++"," + ++t_to_string(Termination, RecDict)++")" + end + end; +t_to_string(?int_set(Set), _RecDict) -> + set_to_string(Set); +t_to_string(?byte, _RecDict) -> "byte()"; +t_to_string(?char, _RecDict) -> "char()"; +t_to_string(?integer_pos, _RecDict) -> "pos_integer()"; +t_to_string(?integer_non_neg, _RecDict) -> "non_neg_integer()"; +t_to_string(?integer_neg, _RecDict) -> "neg_integer()"; +t_to_string(?int_range(From, To), _RecDict) -> + lists:flatten(io_lib:format("~w..~w", [From, To])); +t_to_string(?integer(?any), _RecDict) -> "integer()"; +t_to_string(?float, _RecDict) -> "float()"; +t_to_string(?number(?any, ?unknown_qual), _RecDict) -> "number()"; +t_to_string(?product(List), RecDict) -> + "<" ++ comma_sequence(List, RecDict) ++ ">"; +t_to_string(?remote(Set), RecDict) -> + sequence([case Args =:= [] of + true -> io_lib:format("~w:~w()", [Mod, Name]); + false -> + ArgString = comma_sequence(Args, RecDict), + io_lib:format("~w:~w(~s)", [Mod, Name, ArgString]) + end + || #remote{mod = Mod, name = Name, args = Args} <- set_to_list(Set)], + [], " | "); +t_to_string(?tuple(?any, ?any, ?any), _RecDict) -> "tuple()"; +t_to_string(?tuple(Elements, _Arity, ?any), RecDict) -> + "{" ++ comma_sequence(Elements, RecDict) ++ "}"; +t_to_string(?tuple(Elements, Arity, Tag), RecDict) -> + [TagAtom] = t_atom_vals(Tag), + case lookup_record(TagAtom, Arity-1, RecDict) of + error -> "{" ++ comma_sequence(Elements, RecDict) ++ "}"; + {ok, FieldNames} -> + record_to_string(TagAtom, Elements, FieldNames, RecDict) + end; +t_to_string(?tuple_set(_) = T, RecDict) -> + union_sequence(t_tuple_subtypes(T), RecDict); +t_to_string(?union(Types), RecDict) -> + union_sequence([T || T <- Types, T =/= ?none], RecDict); +t_to_string(?var(Id), _RecDict) when is_atom(Id) -> + io_lib:format("~s", [atom_to_list(Id)]); +t_to_string(?var(Id), _RecDict) when is_integer(Id) -> + io_lib:format("var(~w)", [Id]). + +record_to_string(Tag, [_|Fields], FieldNames, RecDict) -> + FieldStrings = record_fields_to_string(Fields, FieldNames, RecDict, []), + "#" ++ atom_to_list(Tag) ++ "{" ++ sequence(FieldStrings, [], ",") ++ "}". + +record_fields_to_string([Field|Left1], [{FieldName, DeclaredType}|Left2], + RecDict, Acc) -> + PrintType = + case t_is_equal(Field, DeclaredType) of + true -> false; + false -> + case t_is_any(DeclaredType) andalso t_is_atom(undefined, Field) of + true -> false; + false -> + TmpType = t_subtract(DeclaredType, t_atom(undefined)), + not t_is_equal(Field, TmpType) + end + end, + case PrintType of + false -> record_fields_to_string(Left1, Left2, RecDict, Acc); + true -> + String = atom_to_list(FieldName) ++ "::" ++ t_to_string(Field, RecDict), + record_fields_to_string(Left1, Left2, RecDict, [String|Acc]) + end; +record_fields_to_string([], [], _RecDict, Acc) -> + lists:reverse(Acc). + +comma_sequence(Types, RecDict) -> + List = [case T =:= ?any of + true -> "_"; + false -> t_to_string(T, RecDict) + end || T <- Types], + sequence(List, ","). + +union_sequence(Types, RecDict) -> + List = [t_to_string(T, RecDict) || T <- Types], + sequence(List, " | "). + +sequence(List, Delimiter) -> + sequence(List, [], Delimiter). + +sequence([], [], _Delimiter) -> + []; +sequence([T], Acc, _Delimiter) -> + lists:flatten(lists:reverse([T|Acc])); +sequence([T|Left], Acc, Delimiter) -> + sequence(Left, [T ++ Delimiter|Acc], Delimiter). + +%%============================================================================= +%% +%% Build a type from parse forms. +%% +%%============================================================================= + +-spec t_from_form(parse_form()) -> erl_type(). + +t_from_form(Form) -> + t_from_form(Form, dict:new()). + +-spec t_from_form(parse_form(), dict()) -> erl_type(). + +t_from_form(Form, RecDict) -> + t_from_form(Form, RecDict, dict:new()). + +-spec t_from_form(parse_form(), dict(), dict()) -> erl_type(). + +t_from_form({var, _L, '_'}, _RecDict, _VarDict) -> t_any(); +t_from_form({var, _L, Name}, _RecDict, VarDict) -> + case dict:find(Name, VarDict) of + error -> t_var(Name); + {ok, Val} -> Val + end; +t_from_form({ann_type, _L, [_Var, Type]}, RecDict, VarDict) -> + t_from_form(Type, RecDict, VarDict); +t_from_form({paren_type, _L, [Type]}, RecDict, VarDict) -> + t_from_form(Type, RecDict, VarDict); +t_from_form({remote_type, _L, [{atom, _, Module}, {atom, _, Type}, Args]}, + RecDict, VarDict) -> + t_remote(Module, Type, [t_from_form(A, RecDict, VarDict) || A <- Args]); +t_from_form({atom, _L, Atom}, _RecDict, _VarDict) -> t_atom(Atom); +t_from_form({integer, _L, Int}, _RecDict, _VarDict) -> t_integer(Int); +t_from_form({type, _L, any, []}, _RecDict, _VarDict) -> t_any(); +t_from_form({type, _L, arity, []}, _RecDict, _VarDict) -> t_arity(); +t_from_form({type, _L, array, []}, _RecDict, _VarDict) -> t_array(); +t_from_form({type, _L, atom, []}, _RecDict, _VarDict) -> t_atom(); +t_from_form({type, _L, binary, []}, _RecDict, _VarDict) -> t_binary(); +t_from_form({type, _L, binary, [{integer, _, Base}, {integer, _, Unit}]}, + _RecDict, _VarDict) -> + t_bitstr(Unit, Base); +t_from_form({type, _L, bitstring, []}, _RecDict, _VarDict) -> t_bitstr(); +t_from_form({type, _L, bool, []}, _RecDict, _VarDict) -> t_boolean(); % XXX: Temporarily +t_from_form({type, _L, boolean, []}, _RecDict, _VarDict) -> t_boolean(); +t_from_form({type, _L, byte, []}, _RecDict, _VarDict) -> t_byte(); +t_from_form({type, _L, char, []}, _RecDict, _VarDict) -> t_char(); +t_from_form({type, _L, dict, []}, _RecDict, _VarDict) -> t_dict(); +t_from_form({type, _L, digraph, []}, _RecDict, _VarDict) -> t_digraph(); +t_from_form({type, _L, float, []}, _RecDict, _VarDict) -> t_float(); +t_from_form({type, _L, function, []}, _RecDict, _VarDict) -> t_fun(); +t_from_form({type, _L, 'fun', []}, _RecDict, _VarDict) -> t_fun(); +t_from_form({type, _L, 'fun', [{type, _, any, []}, Range]}, RecDict, VarDict) -> + t_fun(t_from_form(Range, RecDict, VarDict)); +t_from_form({type, _L, 'fun', [{type, _, product, Domain}, Range]}, + RecDict, VarDict) -> + t_fun([t_from_form(D, RecDict, VarDict) || D <- Domain], + t_from_form(Range, RecDict, VarDict)); +t_from_form({type, _L, gb_set, []}, _RecDict, _VarDict) -> t_gb_set(); +t_from_form({type, _L, gb_tree, []}, _RecDict, _VarDict) -> t_gb_tree(); +t_from_form({type, _L, identifier, []}, _RecDict, _VarDict) -> t_identifier(); +t_from_form({type, _L, integer, []}, _RecDict, _VarDict) -> t_integer(); +t_from_form({type, _L, iodata, []}, _RecDict, _VarDict) -> t_iodata(); +t_from_form({type, _L, iolist, []}, _RecDict, _VarDict) -> t_iolist(); +t_from_form({type, _L, list, []}, _RecDict, _VarDict) -> t_list(); +t_from_form({type, _L, list, [Type]}, RecDict, VarDict) -> + t_list(t_from_form(Type, RecDict, VarDict)); +t_from_form({type, _L, mfa, []}, _RecDict, _VarDict) -> t_mfa(); +t_from_form({type, _L, module, []}, _RecDict, _VarDict) -> t_module(); +t_from_form({type, _L, nil, []}, _RecDict, _VarDict) -> t_nil(); +t_from_form({type, _L, neg_integer, []}, _RecDict, _VarDict) -> t_neg_integer(); +t_from_form({type, _L, non_neg_integer, []}, _RecDict, _VarDict) -> + t_non_neg_integer(); +t_from_form({type, _L, no_return, []}, _RecDict, _VarDict) -> t_unit(); +t_from_form({type, _L, node, []}, _RecDict, _VarDict) -> t_node(); +t_from_form({type, _L, none, []}, _RecDict, _VarDict) -> t_none(); +t_from_form({type, _L, nonempty_list, []}, _RecDict, _VarDict) -> + t_nonempty_list(); +t_from_form({type, _L, nonempty_list, [Type]}, RecDict, VarDict) -> + t_nonempty_list(t_from_form(Type, RecDict, VarDict)); +t_from_form({type, _L, nonempty_improper_list, [Cont, Term]}, + RecDict, VarDict) -> + t_cons(t_from_form(Cont, RecDict, VarDict), + t_from_form(Term, RecDict, VarDict)); +t_from_form({type, _L, nonempty_maybe_improper_list, []}, _RecDict, _VarDict) -> + t_cons(?any, ?any); +t_from_form({type, _L, nonempty_maybe_improper_list, [Cont, Term]}, + RecDict, VarDict) -> + t_cons(t_from_form(Cont, RecDict, VarDict), + t_from_form(Term, RecDict, VarDict)); +t_from_form({type, _L, nonempty_string, []}, _RecDict, _VarDict) -> + t_nonempty_string(); +t_from_form({type, _L, number, []}, _RecDict, _VarDict) -> t_number(); +t_from_form({type, _L, pid, []}, _RecDict, _VarDict) -> t_pid(); +t_from_form({type, _L, port, []}, _RecDict, _VarDict) -> t_port(); +t_from_form({type, _L, pos_integer, []}, _RecDict, _VarDict) -> t_pos_integer(); +t_from_form({type, _L, maybe_improper_list, []}, _RecDict, _VarDict) -> + t_maybe_improper_list(); +t_from_form({type, _L, maybe_improper_list, [Content, Termination]}, + RecDict, VarDict) -> + t_maybe_improper_list(t_from_form(Content, RecDict, VarDict), + t_from_form(Termination, RecDict, VarDict)); +t_from_form({type, _L, product, Elements}, RecDict, VarDict) -> + t_product([t_from_form(E, RecDict, VarDict) || E <- Elements]); +t_from_form({type, _L, queue, []}, _RecDict, _VarDict) -> t_queue(); +t_from_form({type, _L, range, [{integer, _, From}, {integer, _, To}]}, + _RecDict, _VarDict) -> + t_from_range(From, To); +t_from_form({type, _L, record, [Name|Fields]}, RecDict, VarDict) -> + record_from_form(Name, Fields, RecDict, VarDict); +t_from_form({type, _L, reference, []}, _RecDict, _VarDict) -> t_reference(); +t_from_form({type, _L, set, []}, _RecDict, _VarDict) -> t_set(); +t_from_form({type, _L, string, []}, _RecDict, _VarDict) -> t_string(); +t_from_form({type, _L, term, []}, _RecDict, _VarDict) -> t_any(); +t_from_form({type, _L, tid, []}, _RecDict, _VarDict) -> t_tid(); +t_from_form({type, _L, timeout, []}, _RecDict, _VarDict) -> t_timeout(); +t_from_form({type, _L, tuple, any}, _RecDict, _VarDict) -> t_tuple(); +t_from_form({type, _L, tuple, Args}, RecDict, VarDict) -> + t_tuple([t_from_form(A, RecDict, VarDict) || A <- Args]); +t_from_form({type, _L, union, Args}, RecDict, VarDict) -> + t_sup([t_from_form(A, RecDict, VarDict) || A <- Args]); +t_from_form({type, _L, Name, Args}, RecDict, VarDict) -> + case lookup_type(Name, RecDict) of + {type, {_Module, Type, ArgNames}} when length(Args) =:= length(ArgNames) -> + List = lists:zipwith(fun(ArgName, ArgType) -> + {ArgName, t_from_form(ArgType, RecDict, VarDict)} + end, ArgNames, Args), + TmpVardict = dict:from_list(List), + t_from_form(Type, RecDict, TmpVardict); + {opaque, {Module, Type, ArgNames}} when length(Args) =:= length(ArgNames) -> + List = lists:zipwith(fun(ArgName, ArgType) -> + {ArgName, t_from_form(ArgType, RecDict, VarDict)} + end, ArgNames, Args), + TmpVardict = dict:from_list(List), + Rep = t_from_form(Type, RecDict, TmpVardict), + t_from_form({opaque, -1, Name, {Module, Args, Rep}}, RecDict, VarDict); + {type, _} -> + throw({error, io_lib:format("Unknown type ~w\n", [Name])}); + {opaque, _} -> + throw({error, io_lib:format("Unknown opaque type ~w\n", [Name])}); + error -> + throw({error, io_lib:format("Unable to find type ~w\n", [Name])}) + end; +t_from_form({opaque, _L, Name, {Mod, Args, Rep}}, _RecDict, _VarDict) -> + case Args of + [] -> t_opaque(Mod, Name, Args, Rep); + _ -> throw({error, "Polymorphic opaque types not supported yet"}) + end. + +record_from_form({atom, _, Name}, ModFields, RecDict, VarDict) -> + case lookup_record(Name, RecDict) of + {ok, DeclFields} -> + case get_mod_record(ModFields, DeclFields, RecDict, VarDict) of + {error, FieldName} -> + throw({error, io_lib:format("Illegal declaration of ~w#{~w}\n", + [Name, FieldName])}); + {ok, NewFields} -> + t_tuple([t_atom(Name)|[Type || {_FieldName, Type} <- NewFields]]) + end; + error -> + throw({error, + erlang:error(io_lib:format("Unknown record #~w{}\n", [Name]))}) + end. + +get_mod_record([], DeclFields, _RecDict, _VarDict) -> + {ok, DeclFields}; +get_mod_record(ModFields, DeclFields, RecDict, VarDict) -> + DeclFieldsDict = orddict:from_list(DeclFields), + ModFieldsDict = build_field_dict(ModFields, RecDict, VarDict), + case get_mod_record(DeclFieldsDict, ModFieldsDict, []) of + {error, _FieldName} = Error -> Error; + {ok, FinalOrdDict} -> + {ok, [{FieldName, orddict:fetch(FieldName, FinalOrdDict)} + || {FieldName, _} <- DeclFields]} + end. + +build_field_dict(FieldTypes, RecDict, VarDict) -> + build_field_dict(FieldTypes, RecDict, VarDict, []). + +build_field_dict([{type, _, field_type, [{atom, _, Name}, Type]}|Left], + RecDict, VarDict, Acc) -> + NewAcc = [{Name, t_from_form(Type, RecDict, VarDict)}|Acc], + build_field_dict(Left, RecDict, VarDict, NewAcc); +build_field_dict([], _RecDict, _VarDict, Acc) -> + orddict:from_list(Acc). + +get_mod_record([{FieldName, DeclType}|Left1], + [{FieldName, ModType}|Left2], Acc) -> + case t_is_var(ModType) orelse t_is_subtype(ModType, DeclType) of + false -> {error, FieldName}; + true -> get_mod_record(Left1, Left2, [{FieldName, ModType}|Acc]) + end; +get_mod_record([{FieldName1, _DeclType} = DT|Left1], + [{FieldName2, _ModType}|_] = List2, + Acc) when FieldName1 < FieldName2 -> + get_mod_record(Left1, List2, [DT|Acc]); +get_mod_record(DeclFields, [], Acc) -> + {ok, orddict:from_list(Acc ++ DeclFields)}; +get_mod_record(_, [{FieldName2, _ModType}|_], _Acc) -> + {error, FieldName2}. + +-spec t_form_to_string(parse_form()) -> string(). + +t_form_to_string({var, _L, '_'}) -> "_"; +t_form_to_string({var, _L, Name}) -> atom_to_list(Name); +t_form_to_string({atom, _L, Atom}) -> + io_lib:write_string(atom_to_list(Atom), $'); % To quote or not to quote... ' +t_form_to_string({integer, _L, Int}) -> integer_to_list(Int); +t_form_to_string({ann_type, _L, [Var, Type]}) -> + t_form_to_string(Var) ++ "::" ++ t_form_to_string(Type); +t_form_to_string({paren_type, _L, [Type]}) -> + io_lib:format("(~s)", [t_form_to_string(Type)]); +t_form_to_string({remote_type, _L, [{atom, _, Mod}, {atom, _, Name}, Args]}) -> + ArgString = "(" ++ sequence(t_form_to_string_list(Args), ",") ++ ")", + io_lib:format("~w:~w", [Mod, Name]) ++ ArgString; +t_form_to_string({type, _L, arity, []}) -> "arity()"; +t_form_to_string({type, _L, 'fun', []}) -> "fun()"; +t_form_to_string({type, _L, 'fun', [{type, _, any, []}, Range]}) -> + "fun(...) -> " ++ t_form_to_string(Range); +t_form_to_string({type, _L, 'fun', [{type, _, product, Domain}, Range]}) -> + "fun((" ++ sequence(t_form_to_string_list(Domain), ",") ++ ") -> " + ++ t_form_to_string(Range) ++ ")"; +t_form_to_string({type, _L, iodata, []}) -> "iodata()"; +t_form_to_string({type, _L, iolist, []}) -> "iolist()"; +t_form_to_string({type, _L, list, [Type]}) -> + "[" ++ t_form_to_string(Type) ++ "]"; +t_form_to_string({type, _L, mfa, []}) -> "mfa()"; +t_form_to_string({type, _L, module, []}) -> "module()"; +t_form_to_string({type, _L, node, []}) -> "node()"; +t_form_to_string({type, _L, nonempty_list, [Type]}) -> + "[" ++ t_form_to_string(Type) ++ ",...]"; +t_form_to_string({type, _L, nonempty_string, []}) -> "nonempty_string()"; +t_form_to_string({type, _L, product, Elements}) -> + "<" ++ sequence(t_form_to_string_list(Elements), ",") ++ ">"; +t_form_to_string({type, _L, range, [{integer, _, From}, {integer, _, To}]}) -> + io_lib:format("~w..~w", [From, To]); +t_form_to_string({type, _L, record, [{atom, _, Name}]}) -> + io_lib:format("#~w{}", [Name]); +t_form_to_string({type, _L, record, [{atom, _, Name}|Fields]}) -> + FieldString = sequence(t_form_to_string_list(Fields), ","), + io_lib:format("#~w{~s}", [Name, FieldString]); +t_form_to_string({type, _L, field_type, [{atom, _, Name}, Type]}) -> + io_lib:format("~w::~s", [Name, t_form_to_string(Type)]); +t_form_to_string({type, _L, term, []}) -> "term()"; +t_form_to_string({type, _L, timeout, []}) -> "timeout()"; +t_form_to_string({type, _L, tuple, any}) -> "tuple()"; +t_form_to_string({type, _L, tuple, Args}) -> + "{" ++ sequence(t_form_to_string_list(Args), ",") ++ "}"; +t_form_to_string({type, _L, union, Args}) -> + sequence(t_form_to_string_list(Args), " | "); +t_form_to_string({type, _L, Name, []} = T) -> + try t_to_string(t_from_form(T)) + catch throw:{error, _} -> atom_to_list(Name) ++ "()" + end; +t_form_to_string({type, _L, binary, [{integer, _, X}, {integer, _, Y}]}) -> + case Y of + 0 -> + case X of + 0 -> "<<>>"; + _ -> io_lib:format("<<_:~w>>", [X]) + end + end; +t_form_to_string({type, _L, Name, List}) -> + io_lib:format("~w(~s)", [Name, sequence(t_form_to_string_list(List), ",")]). + +t_form_to_string_list(List) -> + t_form_to_string_list(List, []). + +t_form_to_string_list([H|T], Acc) -> + t_form_to_string_list(T, [t_form_to_string(H)|Acc]); +t_form_to_string_list([], Acc) -> + lists:reverse(Acc). + +%%============================================================================= +%% +%% Utilities +%% +%%============================================================================= + +-spec any_none([erl_type()]) -> boolean(). + +any_none([?none|_Left]) -> true; +any_none([_|Left]) -> any_none(Left); +any_none([]) -> false. + +-spec any_none_or_unit([erl_type()]) -> boolean(). + +any_none_or_unit([?none|_]) -> true; +any_none_or_unit([?unit|_]) -> true; +any_none_or_unit([_|Left]) -> any_none_or_unit(Left); +any_none_or_unit([]) -> false. + +-spec lookup_record(atom(), dict()) -> 'error' | {'ok', [{atom(), erl_type()}]}. + +lookup_record(Tag, RecDict) when is_atom(Tag) -> + case dict:find({record, Tag}, RecDict) of + {ok, [{_Arity, Fields}]} -> {ok, Fields}; + {ok, List} when is_list(List) -> + %% This will have to do, since we do not know which record we + %% are looking for. + error; + error -> + error + end. + +-spec lookup_record(atom(), arity(), dict()) -> 'error' | {'ok', [{atom(), erl_type()}]}. + +lookup_record(Tag, Arity, RecDict) when is_atom(Tag) -> + case dict:find({record, Tag}, RecDict) of + {ok, [{Arity, Fields}]} -> {ok, Fields}; + {ok, OrdDict} -> orddict:find(Arity, OrdDict); + error -> error + end. + +lookup_type(Name, RecDict) -> + case dict:find({type, Name}, RecDict) of + error -> + case dict:find({opaque, Name}, RecDict) of + error -> error; + {ok, Found} -> {opaque, Found} + end; + {ok, Found} -> {type, Found} + end. + +-spec type_is_defined('type' | 'opaque', atom(), dict()) -> boolean(). + +type_is_defined(TypeOrOpaque, Name, RecDict) -> + dict:is_key({TypeOrOpaque, Name}, RecDict). + +%% ----------------------------------- +%% Set +%% + +set_singleton(Element) -> + ordsets:from_list([Element]). + +set_is_singleton(Element, Set) -> + set_singleton(Element) =:= Set. + +set_is_element(Element, Set) -> + ordsets:is_element(Element, Set). + +set_union(?any, _) -> ?any; +set_union(_, ?any) -> ?any; +set_union(S1, S2) -> + case ordsets:union(S1, S2) of + S when length(S) =< ?SET_LIMIT -> S; + _ -> ?any + end. + +set_union_no_limit(?any, _) -> ?any; +set_union_no_limit(_, ?any) -> ?any; +set_union_no_limit(S1, S2) -> ordsets:union(S1, S2). + +%% The intersection and subtraction can return ?none. +%% This should always be handled right away since ?none is not a valid set. +%% However, ?any is considered a valid set. + +set_intersection(?any, S) -> S; +set_intersection(S, ?any) -> S; +set_intersection(S1, S2) -> + case ordsets:intersection(S1, S2) of + [] -> ?none; + S -> S + end. + +set_subtract(_, ?any) -> ?none; +set_subtract(?any, _) -> ?any; +set_subtract(S1, S2) -> + case ordsets:subtract(S1, S2) of + [] -> ?none; + S -> S + end. + +set_from_list(List) -> + case length(List) of + L when L =< ?SET_LIMIT -> ordsets:from_list(List); + L when L > ?SET_LIMIT -> ?any + end. + +set_to_list(Set) -> + ordsets:to_list(Set). + +set_filter(Fun, Set) -> + case ordsets:filter(Fun, Set) of + [] -> ?none; + NewSet -> NewSet + end. + +set_size(Set) -> + ordsets:size(Set). + +set_to_string(Set) -> + L = [case is_atom(X) of + true -> io_lib:write_string(atom_to_list(X), $'); % stupid emacs ' + false -> io_lib:format("~w", [X]) + end || X <- set_to_list(Set)], + sequence(L, [], " | "). + +set_min([H|_]) -> H. + +set_max(Set) -> + hd(lists:reverse(Set)). + +%%============================================================================= +%% +%% Utilities for the binary type +%% +%%============================================================================= + +-spec gcd(integer(), integer()) -> integer(). + +gcd(A, B) when B > A -> + gcd1(B, A); +gcd(A, B) -> + gcd1(A, B). + +-spec gcd1(integer(), integer()) -> integer(). + +gcd1(A, 0) -> A; +gcd1(A, B) -> + case A rem B of + 0 -> B; + X -> gcd1(B, X) + end. + +-spec bitstr_concat(erl_type(), erl_type()) -> erl_type(). + +bitstr_concat(?none, _) -> ?none; +bitstr_concat(_, ?none) -> ?none; +bitstr_concat(?bitstr(U1, B1), ?bitstr(U2, B2)) -> + t_bitstr(gcd(U1, U2), B1+B2). + +-spec bitstr_match(erl_type(), erl_type()) -> erl_type(). + +bitstr_match(?none, _) -> ?none; +bitstr_match(_, ?none) -> ?none; +bitstr_match(?bitstr(0, B1), ?bitstr(0, B2)) when B1 =< B2 -> + t_bitstr(0, B2-B1); +bitstr_match(?bitstr(0, _B1), ?bitstr(0, _B2)) -> + ?none; +bitstr_match(?bitstr(0, B1), ?bitstr(U2, B2)) when B1 =< B2 -> + t_bitstr(U2, B2-B1); +bitstr_match(?bitstr(0, B1), ?bitstr(U2, B2)) -> + t_bitstr(U2, handle_base(U2, B2-B1)); +bitstr_match(?bitstr(_, B1), ?bitstr(0, B2)) when B1 > B2 -> + ?none; +bitstr_match(?bitstr(U1, B1), ?bitstr(U2, B2)) -> + GCD = gcd(U1, U2), + t_bitstr(GCD, handle_base(GCD, B2-B1)). + +-spec handle_base(integer(), integer()) -> integer(). + +handle_base(Unit, Pos) when Pos >= 0 -> + Pos rem Unit; +handle_base(Unit, Neg) -> + (Unit+(Neg rem Unit)) rem Unit. + +%%============================================================================= +%% Consistency-testing function(s) below +%%============================================================================= + +-ifdef(DO_ERL_TYPES_TEST). + +test() -> + Atom1 = t_atom(), + Atom2 = t_atom(foo), + Atom3 = t_atom(bar), + true = t_is_atom(Atom2), + + True = t_atom(true), + False = t_atom(false), + Bool = t_boolean(), + true = t_is_boolean(True), + true = t_is_boolean(Bool), + false = t_is_boolean(Atom1), + + Binary = t_binary(), + true = t_is_binary(Binary), + + Bitstr = t_bitstr(), + true = t_is_bitstr(Bitstr), + + Bitstr1 = t_bitstr(7, 3), + true = t_is_bitstr(Bitstr1), + false = t_is_binary(Bitstr1), + + Bitstr2 = t_bitstr(16, 8), + true = t_is_bitstr(Bitstr2), + true = t_is_binary(Bitstr2), + + ?bitstr(8, 16) = t_subtract(t_bitstr(4, 12), t_bitstr(8, 12)), + ?bitstr(8, 16) = t_subtract(t_bitstr(4, 12), t_bitstr(8, 12)), + + Int1 = t_integer(), + Int2 = t_integer(1), + Int3 = t_integer(16#ffffffff), + true = t_is_integer(Int2), + true = t_is_byte(Int2), + false = t_is_byte(Int3), + false = t_is_byte(t_from_range(-1, 1)), + true = t_is_byte(t_from_range(1, ?MAX_BYTE)), + + Tuple1 = t_tuple(), + Tuple2 = t_tuple(3), + Tuple3 = t_tuple([Atom1, Int1]), + Tuple4 = t_tuple([Tuple1, Tuple2]), + Tuple5 = t_tuple([Tuple3, Tuple4]), + Tuple6 = t_limit(Tuple5, 2), + Tuple7 = t_limit(Tuple5, 3), + true = t_is_tuple(Tuple1), + + Port = t_port(), + Pid = t_pid(), + Ref = t_reference(), + Identifier = t_identifier(), + false = t_is_reference(Port), + true = t_is_identifier(Port), + + Function1 = t_fun(), + Function2 = t_fun(Pid), + Function3 = t_fun([], Pid), + Function4 = t_fun([Port, Pid], Pid), + Function5 = t_fun([Pid, Atom1], Int2), + true = t_is_fun(Function3), + + List1 = t_list(), + List2 = t_list(t_boolean()), + List3 = t_cons(t_boolean(), List2), + List4 = t_cons(t_boolean(), t_atom()), + List5 = t_cons(t_boolean(), t_nil()), + List6 = t_cons_tl(List5), + List7 = t_sup(List4, List5), + List8 = t_inf(List7, t_list()), + List9 = t_cons(), + List10 = t_cons_tl(List9), + true = t_is_boolean(t_cons_hd(List5)), + true = t_is_list(List5), + false = t_is_list(List4), + + Product1 = t_product([Atom1, Atom2]), + Product2 = t_product([Atom3, Atom1]), + Product3 = t_product([Atom3, Atom2]), + + Union1 = t_sup(Atom2, Atom3), + Union2 = t_sup(Tuple2, Tuple3), + Union3 = t_sup(Int2, Atom3), + Union4 = t_sup(Port, Pid), + Union5 = t_sup(Union4, Int1), + Union6 = t_sup(Function1, Function2), + Union7 = t_sup(Function4, Function5), + Union8 = t_sup(True, False), + true = t_is_boolean(Union8), + Union9 = t_sup(Int2, t_integer(2)), + true = t_is_byte(Union9), + Union10 = t_sup(t_tuple([t_atom(true), ?any]), + t_tuple([t_atom(false), ?any])), + + ?any = t_sup(Product3, Function5), + + Atom3 = t_inf(Union3, Atom1), + Union2 = t_inf(Union2, Tuple1), + Int2 = t_inf(Int1, Union3), + Union4 = t_inf(Union4, Identifier), + Port = t_inf(Union5, Port), + Function4 = t_inf(Union7, Function4), + ?none = t_inf(Product2, Atom1), + Product3 = t_inf(Product1, Product2), + Function5 = t_inf(Union7, Function5), + true = t_is_byte(t_inf(Union9, t_number())), + true = t_is_char(t_inf(Union9, t_number())), + + io:format("3? ~p ~n", [?int_set([3])]), + + RecDict = dict:store({foo, 2}, [bar, baz], dict:new()), + Record1 = t_from_term({foo, [1,2], {1,2,3}}), + + Types = [ + Atom1, + Atom2, + Atom3, + Binary, + Int1, + Int2, + Tuple1, + Tuple2, + Tuple3, + Tuple4, + Tuple5, + Tuple6, + Tuple7, + Ref, + Port, + Pid, + Identifier, + List1, + List2, + List3, + List4, + List5, + List6, + List7, + List8, + List9, + List10, + Function1, + Function2, + Function3, + Function4, + Function5, + Product1, + Product2, + Record1, + Union1, + Union2, + Union3, + Union4, + Union5, + Union6, + Union7, + Union8, + Union10, + t_inf(Union10, t_tuple([t_atom(true), t_integer()])) + ], + io:format("~p\n", [[t_to_string(X, RecDict) || X <- Types]]). + +-endif. diff --git a/lib/hipe/doc/Makefile b/lib/hipe/doc/Makefile new file mode 100644 index 0000000000..340f909aa6 --- /dev/null +++ b/lib/hipe/doc/Makefile @@ -0,0 +1,29 @@ +# ``The contents of this file are subject to the Erlang Public License, +# Version 1.1, (the "License"); you may not use this file except in +# compliance with the License. You should have received a copy of the +# Erlang Public License along with this software. If not, it can be +# retrieved via the world wide web at http://www.erlang.org/. +# +# Software distributed under the License is distributed on an "AS IS" +# basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +# the License for the specific language governing rights and limitations +# under the License. +# +SHELL=/bin/sh + +include $(ERL_TOP)/make/target.mk +include $(ERL_TOP)/make/$(TARGET)/otp.mk + +clean: + -rm -f *.html edoc-info stylesheet.css + +# ---------------------------------------------------- +# Special Build Targets +# ---------------------------------------------------- + + + +# ---------------------------------------------------- +# Release Target +# ---------------------------------------------------- +include $(ERL_TOP)/make/otp_release_targets.mk diff --git a/lib/hipe/doc/html/.gitignore b/lib/hipe/doc/html/.gitignore new file mode 100644 index 0000000000..e69de29bb2 diff --git a/lib/hipe/doc/overview.edoc b/lib/hipe/doc/overview.edoc new file mode 100644 index 0000000000..0016478a8a --- /dev/null +++ b/lib/hipe/doc/overview.edoc @@ -0,0 +1,9 @@ + + HiPE overview page + +@title The HiPE Compiler + +@author The HiPE group [http://www.it.uu.se/research/group/hipe/] + +@doc This is the online documentation for the HiPE native code compiler. +The user interface is provided by the module {@link hipe}. diff --git a/lib/hipe/doc/pdf/.gitignore b/lib/hipe/doc/pdf/.gitignore new file mode 100644 index 0000000000..e69de29bb2 diff --git a/lib/hipe/doc/src/Makefile b/lib/hipe/doc/src/Makefile new file mode 100644 index 0000000000..3b63e57549 --- /dev/null +++ b/lib/hipe/doc/src/Makefile @@ -0,0 +1,113 @@ +# ``The contents of this file are subject to the Erlang Public License, +# Version 1.1, (the "License"); you may not use this file except in +# compliance with the License. You should have received a copy of the +# Erlang Public License along with this software. If not, it can be +# retrieved via the world wide web at http://www.erlang.org/. +# +# Software distributed under the License is distributed on an "AS IS" +# basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +# the License for the specific language governing rights and limitations +# under the License. +# +# The Initial Developer of the Original Code is Ericsson Utvecklings AB. +# Portions created by Ericsson are Copyright 1999, Ericsson Utvecklings +# AB. All Rights Reserved.'' +# +# $Id$ +# +include $(ERL_TOP)/make/target.mk +include $(ERL_TOP)/make/$(TARGET)/otp.mk + +# ---------------------------------------------------- +# Application version +# ---------------------------------------------------- +include ../../vsn.mk +VSN=$(HIPE_VSN) +APPLICATION=hipe + +# ---------------------------------------------------- +# Release directory specification +# ---------------------------------------------------- +RELSYSDIR = $(RELEASE_PATH)/lib/$(APPLICATION)-$(VSN) + +# ---------------------------------------------------- +# Target Specs +# ---------------------------------------------------- +XML_APPLICATION_FILES = +XML_REF3_FILES = + +XML_PART_FILES = part_notes.xml +XML_CHAPTER_FILES = notes.xml + +BOOK_FILES = book.xml + +XML_FILES = \ + $(BOOK_FILES) $(XML_CHAPTER_FILES) \ + $(XML_PART_FILES) $(XML_REF3_FILES) $(XML_APPLICATION_FILES) + +GIF_FILES = + +# ---------------------------------------------------- + +HTML_FILES = $(XML_APPLICATION_FILES:%.xml=$(HTMLDIR)/%.html) \ + $(XML_PART_FILES:%.xml=$(HTMLDIR)/%.html) + +INFO_FILE = ../../info +EXTRA_FILES = \ + $(DEFAULT_GIF_FILES) \ + $(DEFAULT_HTML_FILES) \ + $(XML_REF3_FILES:%.xml=$(HTMLDIR)/%.html) \ + $(XML_CHAPTER_FILES:%.xml=$(HTMLDIR)/%.html) + +MAN3_FILES = $(XML_REF3_FILES:%.xml=$(MAN3DIR)/%.3) + +HTML_REF_MAN_FILE = $(HTMLDIR)/index.html + +TOP_PDF_FILE = $(PDFDIR)/$(APPLICATION)-$(VSN).pdf + +# ---------------------------------------------------- +# FLAGS +# ---------------------------------------------------- +XML_FLAGS += + +# ---------------------------------------------------- +# Targets +# ---------------------------------------------------- +$(HTMLDIR)/%.gif: %.gif + $(INSTALL_DATA) $< $@ + +docs: pdf html man + +$(TOP_PDF_FILE): $(XML_FILES) + +pdf: $(TOP_PDF_FILE) + +html: gifs $(HTML_REF_MAN_FILE) + +man: $(MAN3_FILES) + +gifs: $(GIF_FILES:%=$(HTMLDIR)/%) + +debug opt: + +clean clean_docs: + rm -rf $(HTMLDIR)/* + rm -f $(MAN3DIR)/* + rm -f $(TOP_PDF_FILE) $(TOP_PDF_FILE:%.pdf=%.fo) + rm -f errs core *~ + +# ---------------------------------------------------- +# Release Target +# ---------------------------------------------------- +include $(ERL_TOP)/make/otp_release_targets.mk + +release_docs_spec: docs + $(INSTALL_DIR) $(RELSYSDIR)/doc/pdf + $(INSTALL_DATA) $(TOP_PDF_FILE) $(RELSYSDIR)/doc/pdf + $(INSTALL_DIR) $(RELSYSDIR)/doc/html + $(INSTALL_DATA) $(HTMLDIR)/* \ + $(RELSYSDIR)/doc/html + $(INSTALL_DATA) $(INFO_FILE) $(RELSYSDIR) + + +release_spec: diff --git a/lib/hipe/doc/src/book.xml b/lib/hipe/doc/src/book.xml new file mode 100644 index 0000000000..236dfc69a1 --- /dev/null +++ b/lib/hipe/doc/src/book.xml @@ -0,0 +1,38 @@ + + + + +
+ + 20062009 + Ericsson AB. All Rights Reserved. + + + The contents of this file are subject to the Erlang Public License, + Version 1.1, (the "License"); you may not use this file except in + compliance with the License. You should have received a copy of the + Erlang Public License along with this software. If not, it can be + retrieved online at http://www.erlang.org/. + + Software distributed under the License is distributed on an "AS IS" + basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + the License for the specific language governing rights and limitations + under the License. + + + + HiPE + + + + +
+ + + + HiPE + + + +
+ diff --git a/lib/hipe/doc/src/fascicules.xml b/lib/hipe/doc/src/fascicules.xml new file mode 100644 index 0000000000..28acc14624 --- /dev/null +++ b/lib/hipe/doc/src/fascicules.xml @@ -0,0 +1,12 @@ + + + + + + Release Notes + + + Off-Print + + + diff --git a/lib/hipe/doc/src/make.dep b/lib/hipe/doc/src/make.dep new file mode 100644 index 0000000000..d5f5844c21 --- /dev/null +++ b/lib/hipe/doc/src/make.dep @@ -0,0 +1,13 @@ +# ---------------------------------------------------- +# >>>> Do not edit this file <<<< +# This file was automaticly generated by +# /home/otp/bin/docdepend +# ---------------------------------------------------- + + +# ---------------------------------------------------- +# TeX files that the DVI file depend on +# ---------------------------------------------------- + +book.dvi: book.tex + diff --git a/lib/hipe/doc/src/notes.xml b/lib/hipe/doc/src/notes.xml new file mode 100644 index 0000000000..8bb9320756 --- /dev/null +++ b/lib/hipe/doc/src/notes.xml @@ -0,0 +1,350 @@ + + + + +
+ + 20062009 + Ericsson AB. All Rights Reserved. + + + The contents of this file are subject to the Erlang Public License, + Version 1.1, (the "License"); you may not use this file except in + compliance with the License. You should have received a copy of the + Erlang Public License along with this software. If not, it can be + retrieved online at http://www.erlang.org/. + + Software distributed under the License is distributed on an "AS IS" + basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + the License for the specific language governing rights and limitations + under the License. + + + + HiPE Release Notes + otp_appnotes + nil + nil + nil + notes.xml +
+

This document describes the changes made to HiPE.

+ +
Hipe 3.7.4 + +
Improvements and New Features + + +

+ The documentation is now built with open source tools + (xsltproc and fop) that exists on most platforms. One + visible change is that the frames are removed.

+

+ Own Id: OTP-8201

+
+ +

+ Misc updates.

+

+ Own Id: OTP-8301

+
+
+
+ +
+ +
Hipe 3.7.3 + +
Improvements and New Features + + +

+ Various small bugs (one involving the handling of large + binaries) were corrected and some additions to its + functionality and/or code cleanups were done.

+

+ Own Id: OTP-8189

+
+
+
+ +
+ +
Hipe 3.7.2 + +
Improvements and New Features + + +

+ Miscellanous updates.

+

+ Own Id: OTP-8038

+
+
+
+ +
+ +
Hipe 3.7.1 + +
Improvements and New Features + + +

+ Minor updates and bug fixes.

+

+ Own Id: OTP-7958

+
+
+
+ +
+ + +
Hipe 3.7 + +
Improvements and New Features + + +

+ Miscellaneous updates.

+

+ Own Id: OTP-7877

+
+
+
+ +
+ +
Hipe 3.6.9 + +
Improvements and New Features + + +

The --disable-hipe option for the + configure will now completely disable the hipe + run-time in the emulator, as is the expected + behaviour.

+

+ Own Id: OTP-7631

+
+
+
+ +
+ +
Hipe 3.6.8 + +
Improvements and New Features + + +

+ Minor updates.

+

+ Own Id: OTP-7522

+
+
+
+ +
+ + +
Hipe 3.6.7 + +
Improvements and New Features + + +

+ Minor changes.

+

+ Own Id: OTP-7388

+
+
+
+ +
+ +
Hipe 3.6.6 + +
Fixed Bugs and Malfunctions + + +

A fix for an #include problem which caused the FP + exception test to fail unnecessarily on + debian/glibc-2.7/x86 systems.

+

Added SIGFPE loop detection to the FP exception test. + This prevents the test from looping indefinitely, which + could happen when the CPU is supported (so we can enable + FP exceptions on it) but the OS isn't (so we can't write + a proper SIGFPE handler). x86 on an unsupported OS is + known to have had this problem.

+

+ Own Id: OTP-7254

+
+
+
+ + +
Improvements and New Features + + +

+ HiPE now also supports little-endian ARM processors.

+

+ Own Id: OTP-7255

+
+
+
+ +
+ +
Hipe 3.6.5 + +
Fixed Bugs and Malfunctions + + +

+ HIPE: Corrected the choice of interface to the send/3 and + setnode/3 BIFs for native-compiled code. Using the + incorrect interface could, in unusual circumstances, lead + to random runtime errors.

+

+ Own Id: OTP-7067

+
+
+
+ + +
Improvements and New Features + + +

+ The HiPE compiler's SPARC backend has been rewritten, + improving its correctness and long-term maintainability.

+

+ Own Id: OTP-7133

+
+
+
+ +
+ +
+ Hipe 3.6.3 + +
+ Improvements and New Features + + +

Minor Makefile changes.

+

Own Id: OTP-6689

+
+ +

Miscellanous updates.

+

Own Id: OTP-6738

+
+
+
+
+ +
+ Hipe 3.6.2 + +
+ Improvements and New Features + + +

Miscellanous improvements.

+

Own Id: OTP-6577

+
+
+
+
+ +
+ Hipe 3.6.1.1 + +
+ Fixed Bugs and Malfunctions + + +

Dialyzer could fail to analyze certain beam files that + used try/catch.

+

Own Id: OTP-6449 Aux Id: seq10563

+
+
+
+
+ +
+ Hipe 3.6.1 + +
+ Improvements and New Features + + +

HiPE runtime system:

+

* added notes about supported systems to README

+

* support 32-bit x86 on FreeBSD

+

* autoenable HiPE on FreeBSD (32-bit x86) and Solaris + (64-bit x86)

+

* updated x86 runtime system to support glibc-2.5

+

* work around probable gcc-4.1.1 bug affecting the x86 + runtime system

+

HiPE compiler:

+

* improved performance of integer multiplications on + all platforms

+

* corrected a code optimisation error in R11B-2 that + broke some bsl/bsr operations on all platforms

+

* corrected a type error in the ARM backend which + could cause the compiler to crash

+

* corrected an error in the SPARC backend's naive + register allocator which could throw the compiler into an + infinite loop

+

Own Id: OTP-6423

+
+
+
+
+ +
+ Hipe 3.6.0 + +
+ Improvements and New Features + + +

Support for native code on Solaris 10/AMD64.

+

Support for native code on FreeBSD/AMD64.

+

Native code now handles external funs (). Native code can now also apply so-called + tuple-funs (). (Tuple funs are NOT + recommended for new code; they are deprecated and will be + removed in some future release.)

+

Own Id: OTP-6305

+
+
+
+
+ +
+ Hipe 3.5.6 + +
+ Improvements and New Features + + +

Improved compilation of receives for the SMP runtime + system.

+

Improved code quality in HiPE compiler on ARM.

+

Fix bug in handling of re-raised exceptions in + try-catch.

+

(HiPE loader) When native code is incompatible with + the current runtime system, fall back to loading the BEAM + code.

+

Own Id: OTP-6127

+
+
+
+
+
+ diff --git a/lib/hipe/doc/src/part_notes.xml b/lib/hipe/doc/src/part_notes.xml new file mode 100644 index 0000000000..8a3e82027b --- /dev/null +++ b/lib/hipe/doc/src/part_notes.xml @@ -0,0 +1,35 @@ + + + + +
+ + 20062009 + Ericsson AB. All Rights Reserved. + + + The contents of this file are subject to the Erlang Public License, + Version 1.1, (the "License"); you may not use this file except in + compliance with the License. You should have received a copy of the + Erlang Public License along with this software. If not, it can be + retrieved online at http://www.erlang.org/. + + Software distributed under the License is distributed on an "AS IS" + basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + the License for the specific language governing rights and limitations + under the License. + + + + HiPE Release Notes + + + + +
+ +

HiPE - High Performance Erlang.

+
+ +
+ diff --git a/lib/hipe/ebin/.gitignore b/lib/hipe/ebin/.gitignore new file mode 100644 index 0000000000..e69de29bb2 diff --git a/lib/hipe/flow/Makefile b/lib/hipe/flow/Makefile new file mode 100644 index 0000000000..5b9d0b7582 --- /dev/null +++ b/lib/hipe/flow/Makefile @@ -0,0 +1,105 @@ +# +# %CopyrightBegin% +# +# Copyright Ericsson AB 2001-2009. All Rights Reserved. +# +# The contents of this file are subject to the Erlang Public License, +# Version 1.1, (the "License"); you may not use this file except in +# compliance with the License. You should have received a copy of the +# Erlang Public License along with this software. If not, it can be +# retrieved online at http://www.erlang.org/. +# +# Software distributed under the License is distributed on an "AS IS" +# basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +# the License for the specific language governing rights and limitations +# under the License. +# +# %CopyrightEnd% +# + +ifndef EBIN +EBIN = ../ebin +endif + +ifndef DOCS +DOCS = ../doc +endif + +include $(ERL_TOP)/make/target.mk +include $(ERL_TOP)/make/$(TARGET)/otp.mk + +# ---------------------------------------------------- +# Application version +# ---------------------------------------------------- +include ../vsn.mk +VSN=$(HIPE_VSN) + +# ---------------------------------------------------- +# Release directory specification +# ---------------------------------------------------- +RELSYSDIR = $(RELEASE_PATH)/lib/hipe-$(VSN) + +# ---------------------------------------------------- +# Target Specs +# ---------------------------------------------------- +MODULES = hipe_bb hipe_dominators hipe_gen_cfg + + +HRL_FILES= +INC_FILES= cfg.inc ebb.inc liveness.inc +ERL_FILES= $(MODULES:%=%.erl) +TARGET_FILES= $(MODULES:%=$(EBIN)/%.$(EMULATOR)) +DOC_FILES= $(MODULES:%=$(DOCS)/%.html) + +# APP_FILE= +# APP_SRC= $(APP_FILE).src +# APP_TARGET= $(EBIN)/$(APP_FILE) +# +# APPUP_FILE= +# APPUP_SRC= $(APPUP_FILE).src +# APPUP_TARGET= $(EBIN)/$(APPUP_FILE) + +# ---------------------------------------------------- +# FLAGS +# ---------------------------------------------------- + +include ../native.mk + +ERL_COMPILE_FLAGS += +warn_exported_vars +warn_missing_spec +warn_untyped_record + +# ---------------------------------------------------- +# Targets +# ---------------------------------------------------- + +debug opt: $(TARGET_FILES) + +docs: $(DOC_FILES) + +clean: + rm -f $(TARGET_FILES) + rm -f core + +$(DOCS)/%.html:%.erl + erl -noshell -run edoc_run file '"$<"' '[{dir, "$(DOCS)"}]' -s init stop + +# ---------------------------------------------------- +# Special Build Targets +# ---------------------------------------------------- + + + +# ---------------------------------------------------- +# Release Target +# ---------------------------------------------------- +include $(ERL_TOP)/make/otp_release_targets.mk + +release_spec: opt + $(INSTALL_DIR) $(RELSYSDIR)/flow + $(INSTALL_DATA) $(ERL_FILES) $(HRL_FILES) $(INC_FILES) $(RELSYSDIR)/flow + $(INSTALL_DIR) $(RELSYSDIR)/ebin + $(INSTALL_DATA) $(TARGET_FILES) $(RELSYSDIR)/ebin + +release_docs_spec: + +$(EBIN)/hipe_bb.beam: hipe_bb.hrl +$(EBIN)/hipe_gen_cfg.beam: cfg.hrl cfg.inc ../main/hipe.hrl diff --git a/lib/hipe/flow/cfg.hrl b/lib/hipe/flow/cfg.hrl new file mode 100644 index 0000000000..62f47a707a --- /dev/null +++ b/lib/hipe/flow/cfg.hrl @@ -0,0 +1,53 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2007-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%%============================================================================ +%% File : cfg.hrl +%% Author : Kostis Sagonas +%% Purpose : Contains typed record declarations for the CFG data structures +%% +%% $Id$ +%%============================================================================ + +-type cfg_lbl() :: non_neg_integer(). + +%% +%% This is supposed to be local but appears here for the time being +%% just so that it is used below +%% +-record(cfg_info, {'fun' :: mfa(), + start_label :: cfg_lbl(), + is_closure :: boolean(), + closure_arity :: arity(), + is_leaf :: boolean(), + params, % :: list() + info = []}). %% this field seems not needed; take out?? + +%% +%% Data is a triple with a dict of constants, a list of labels and an integer +%% +-type cfg_data() :: {dict(), [cfg_lbl()], non_neg_integer()}. + +%% +%% The following is to be used by other modules +%% +-record(cfg, {table = gb_trees:empty() :: gb_tree(), + info :: #cfg_info{}, + data :: cfg_data()}). +-type cfg() :: #cfg{}. diff --git a/lib/hipe/flow/cfg.inc b/lib/hipe/flow/cfg.inc new file mode 100644 index 0000000000..62f399a81c --- /dev/null +++ b/lib/hipe/flow/cfg.inc @@ -0,0 +1,949 @@ +%% -*- Erlang -*- +%% -*- erlang-indent-level: 2 -*- +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2001-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% CONTROL FLOW GRAPHS +%% +%% Construct and manipulate the control flow graph of a function (program?). +%% +%% Exports: +%% ~~~~~~~~ +%% init(Code) - makes a CFG out of code. +%% bb(CFG, Label) - returns the basic block named 'Label' from the CFG. +%% bb_add(CFG, Label, NewBB) - makes NewBB the basic block associated +%% with Label. +%% succ(Map, Label) - returns a list of successors of basic block 'Label'. +%% pred(Map, Label) - returns the predecessors of basic block 'Label'. +%% fallthrough(CFG, Label) - returns fall-through successor of basic +%% block 'Label' (or 'none'). +%% conditional(CFG, Label) - returns conditional successor (or 'none') +%% start_label(CFG) - returns the label of the entry basic block. +%% params(CFG) - returns the list of parameters to the CFG. +%% labels(CFG) - returns a list of labels of all basic blocks in the CFG. +%% postorder(CFG) - returns a list of labels in postorder. +%% reverse_postorder(CFG) - returns a list of labels in reverse postorder. +%% cfg_to_linear(CFG) - converts CFG to linearized code. +%% remove_trivial_bbs(CFG) - removes empty BBs or BBs with only a goto. +%% remove_unreachable_code(CFG) - removes unreachable BBs. +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% TODO: +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%===================================================================== +%% The following are ugly as hell, but what else can I do??? +%%===================================================================== + +-ifdef(GEN_CFG). +-define(PRED_NEEDED,true). +-endif. + +-ifdef(ICODE_CFG). +-define(CLOSURE_ARITY_NEEDED,true). +-define(INFO_NEEDED,true). +-define(PARAMS_NEEDED,true). +-define(PARAMS_UPDATE_NEEDED,true). +-define(PRED_NEEDED,true). +-define(REMOVE_TRIVIAL_BBS_NEEDED,true). +-define(REMOVE_UNREACHABLE_CODE,true). +-define(START_LABEL_UPDATE_NEEDED,true). +-define(CFG_CAN_HAVE_PHI_NODES,true). +-endif. + +-ifdef(RTL_CFG). +-define(PREORDER,true). +-define(FIND_NEW_LABEL_NEEDED,true). +-define(INFO_NEEDED,true). +-define(PARAMS_NEEDED,true). +-define(PARAMS_UPDATE_NEEDED,true). +-define(PRED_NEEDED,true). +-define(REMOVE_TRIVIAL_BBS_NEEDED,true). +-define(REMOVE_UNREACHABLE_CODE,true). +-define(START_LABEL_UPDATE_NEEDED,true). +-define(CFG_CAN_HAVE_PHI_NODES,true). +-endif. + +-ifdef(SPARC_CFG). +-define(BREADTH_ORDER,true). % for linear scan +-define(PARAMS_NEEDED,true). +-define(START_LABEL_UPDATE_NEEDED,true). +-endif. + +%%===================================================================== + +-ifdef(START_LABEL_UPDATE_NEEDED). +-export([start_label_update/2]). +-endif. + +-ifdef(BREADTH_ORDER). +-export([breadthorder/1]). +-endif. + +-compile(inline). + +%%===================================================================== +%% +%% Interface functions that MUST be implemented in the including file: +%% +%% linear_to_cfg(LinearCode) -> CFG, constructs the cfg. +%% is_label(Instr) -> boolean(), true if instruction is a label. +%% label_name(Instr) -> term(), the name of a label. +%% branch_successors(Instr) -> [term()], the successors of a branch. +%% fails_to(Instr) -> [term()], the fail-successors of an instruction. +%% is_branch(Instr) -> boolean(), true if instruction is a branch. +%% is_comment(Instr) -> boolean(), +%% true if instruction is a comment, used by remove dead code. +%% is_goto(Instr) -> boolean(), +%% true if instruction is a pure goto, used by remove dead code. +%% redirect_jmp(Jmp, ToOld, ToNew) -> NewJmp, +%% redirect_ops(Labels, CFG, Map) -> CFG. +%% Rewrite instructions with labels in operands to use +%% the new label as given by map. +%% Use find_new_label(OldLab,Map) to get the new label. +%% (See hipe_sparc_cfg for example) +%% pp(CFG) -> ok, do some nifty output. +%% cfg_to_linear(CFG) -> LinearCode, linearizes the code of CFG +%% mk_goto(Label) -> instruction +%% is_phi(Instr) -> boolean(), true if the instruction is a phi-instruction. +%% phi_remove_pred(PhiInstr, Pred) -> NewPhi, +%% Removes the predecessor Pred from the phi instruction. +%% highest_var(Code) -> term(), +%% Returns the highest variable used or defined in the code. +%% +%%===================================================================== + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% Primitives (not all of these are exported) +%% + +-spec start_label(cfg()) -> cfg_lbl(). +start_label(CFG) -> (CFG#cfg.info)#cfg_info.start_label. + +-ifndef(GEN_CFG). +-spec start_label_update(cfg(), cfg_lbl()) -> cfg(). +start_label_update(CFG, NewStartLabel) -> + Info = CFG#cfg.info, + CFG#cfg{info = Info#cfg_info{start_label = NewStartLabel}}. + +-spec function(cfg()) -> mfa(). +function(CFG) -> (CFG#cfg.info)#cfg_info.'fun'. + +-spec is_closure(cfg()) -> boolean(). +is_closure(CFG) -> (CFG#cfg.info)#cfg_info.is_closure. + +-spec is_leaf(cfg()) -> boolean(). +is_leaf(CFG) -> (CFG#cfg.info)#cfg_info.is_leaf. + +mk_empty_cfg(Fun, StartLbl, Data, Closure, Leaf, Params) -> + Info = #cfg_info{'fun' = Fun, + start_label = StartLbl, + is_closure = Closure, + is_leaf = Leaf, + params = Params}, + #cfg{table = gb_trees:empty(), data = Data, info = Info}. + +data(CFG) -> CFG#cfg.data. +-endif. % GEN_CFG + +-ifdef(REMOVE_TRIVIAL_BBS_NEEDED). +-spec update_data(cfg(), cfg_data()) -> cfg(). +update_data(CFG, D) -> + CFG#cfg{data = D}. +-endif. + +-ifdef(PARAMS_NEEDED). +params(CFG) -> (CFG#cfg.info)#cfg_info.params. +-endif. + +-ifdef(PARAMS_UPDATE_NEEDED). +params_update(CFG, NewParams) -> + Info = CFG#cfg.info, + CFG#cfg{info = Info#cfg_info{params = NewParams}}. +-endif. + +-ifdef(CLOSURE_ARITY_NEEDED). +-spec closure_arity(cfg()) -> arity(). +closure_arity(CFG) -> + Info = CFG#cfg.info, + Info#cfg_info.closure_arity. + +-spec closure_arity_update(cfg(), arity()) -> cfg(). +closure_arity_update(CFG, Arity) -> + Info = CFG#cfg.info, + CFG#cfg{info = Info#cfg_info{closure_arity = Arity}}. +-endif. + +%% %% Don't forget to do a start_label_update if necessary. +%% update_code(CFG, NewCode) -> +%% take_bbs(NewCode, CFG). + +-ifdef(INFO_NEEDED). +info(CFG) -> (CFG#cfg.info)#cfg_info.info. +%% info_add(CFG, A) -> +%% As = info(CFG), +%% Info = CFG#cfg.info, +%% CFG#cfg{info = Info#cfg_info{info = [A|As]}}. +info_update(CFG, I) -> + Info = CFG#cfg.info, + CFG#cfg{info = Info#cfg_info{info = I}}. +-endif. + +%%===================================================================== +-ifndef(GEN_CFG). + +-spec other_entrypoints(cfg()) -> [cfg_lbl()]. +%% @doc Returns a list of labels that are refered to from the data section. + +other_entrypoints(CFG) -> + hipe_consttab:referred_labels(data(CFG)). + +%% is_entry(Lbl, CFG) -> +%% Lbl =:= start_label(CFG) orelse +%% lists:member(Lbl, other_entrypoints(CFG)). + +%% @spec bb(CFG::cfg(), Label::cfg_lbl()) -> basic_block() +%% @doc Returns the basic block of the CFG which begins at the Label. +bb(CFG, Label) -> + HT = CFG#cfg.table, + case gb_trees:lookup(Label, HT) of + {value, {Block,_Succ,_Pred}} -> + Block; + none -> + not_found + end. + +%% Remove duplicates from a list. The first instance (in left-to-right +%% order) of an element is kept, remaining instances are removed. +-spec remove_duplicates([cfg_lbl()]) -> [cfg_lbl()]. +remove_duplicates(List) -> + remove_duplicates(List, []). + +-spec remove_duplicates([cfg_lbl()], [cfg_lbl()]) -> [cfg_lbl()]. +remove_duplicates([H|T], Acc) -> + NewAcc = + case lists:member(H, Acc) of + false -> [H|Acc]; + true -> Acc + end, + remove_duplicates(T, NewAcc); +remove_duplicates([], Acc) -> + lists:reverse(Acc). + + +-ifdef(RTL_CFG). %% this could be CFG_CAN_HAVE_PHI_NODES + %% if Icode also starts using this function + +%% @spec bb_insert_between(CFG::cfg(), +%% Label::cfg_lbl(), NewBB::basic_block(), +%% Pred::cfg_lbl(), Succ::cfg_lbl()) -> cfg() +%% +%% @doc Insert the new basic block with label Label in the edge from +%% Pred to Succ + +bb_insert_between(CFG, Label, NewBB, Pred, Succ) -> + Last = hipe_bb:last(NewBB), + %% Asserts that NewBB ends in a label + true = is_branch(Last), + %% Asserts that the only Successor of NewBB is Succ + [Succ] = remove_duplicates(branch_successors(Last)), + HT = CFG#cfg.table, + %% Asserts that Label does not exist in the CFG + none = gb_trees:lookup(Label, HT), + %% Updates the predecessor of NewBB + {value, {PBB, PSucc, PPred}} = gb_trees:lookup(Pred, HT), + NewPSucc = [Label|lists:delete(Succ, PSucc)], + PLast = hipe_bb:last(PBB), + PButLast = hipe_bb:butlast(PBB), + NewPBB = hipe_bb:code_update(PBB, PButLast++[redirect_jmp(PLast, Succ, Label)]), + HT1 = gb_trees:update(Pred, {NewPBB,NewPSucc,PPred}, HT), + %% Updates the successor of NewBB + {value, {SBB, SSucc, SPred}} = gb_trees:lookup(Succ, HT1), + NewSPred = [Label|lists:delete(Pred, SPred)], + SCode = hipe_bb:code(SBB), + NewSCode = redirect_phis(SCode, Pred, Label, []), + NewSBB = hipe_bb:code_update(SBB, NewSCode), + HT2 = gb_trees:update(Succ, {NewSBB,SSucc,NewSPred}, HT1), + %% Enters NewBB into the CFG + HT3 = gb_trees:insert(Label, {NewBB,[Succ],[Pred]}, HT2), + CFG#cfg{table = HT3}. + +redirect_phis([], _OldPred, _NewPred, Acc) -> + lists:reverse(Acc); +redirect_phis([I|Rest], OldPred, NewPred, Acc) -> + case is_phi(I) of + true -> + Phi = phi_redirect_pred(I, OldPred, NewPred), + redirect_phis(Rest, OldPred, NewPred, [Phi|Acc]); + false -> + redirect_phis(Rest, OldPred, NewPred, [I|Acc]) + end. + +-endif. + +%% @spec bb_add(CFG::cfg(), Label::cfg_lbl(), NewBB::basic_block()) -> cfg() +%% @doc Adds a new basic block to a CFG (or updates an existing block). +bb_add(CFG, Label, NewBB) -> + %% Asserting that the NewBB is a legal basic block + Last = hipe_bb:last(NewBB), + case is_branch(Last) of + true -> ok; + false -> throw({?MODULE, {"Basic block ends without branch", Last}}) + end, + %% The order of the elements from branch_successors/1 is + %% significant. It determines the basic block order when the CFG is + %% converted to linear form. That order may have been tuned for + %% branch prediction purposes. + Succ = remove_duplicates(branch_successors(Last)), + HT = CFG#cfg.table, + {OldSucc, OldPred} = case gb_trees:lookup(Label, HT) of + {value, {_Block, OSucc, OPred}} -> + {OSucc, OPred}; + none -> + {[], []} + end, + %% Change this block to contain new BB and new successors, but keep + %% the old predecessors which will be updated in the following steps + HT1 = gb_trees:enter(Label, {NewBB, Succ, OldPred}, HT), + %% Add this block as predecessor to its new successors + HT2 = lists:foldl(fun (P, HTAcc) -> + add_pred(HTAcc, P, Label) + end, + HT1, Succ -- OldSucc), + %% Remove this block as predecessor of its former successors + HT3 = lists:foldl(fun (S, HTAcc) -> + remove_pred(HTAcc, S, Label) + end, + HT2, OldSucc -- Succ), + CFG#cfg{table = HT3}. + +remove_pred(HT, FromL, PredL) -> + case gb_trees:lookup(FromL, HT) of + {value, {Block, Succ, Preds}} -> + Code = hipe_bb:code(Block), + NewCode = remove_pred_from_phis(Code, PredL, []), + NewBlock = hipe_bb:code_update(Block, NewCode), + gb_trees:update(FromL, {NewBlock,Succ,lists:delete(PredL,Preds)}, HT); + none -> + HT + end. + +add_pred(HT, ToL, PredL) -> + case gb_trees:lookup(ToL, HT) of + {value,{Block,Succ,Preds}} -> + gb_trees:update(ToL, {Block,Succ,[PredL|lists:delete(PredL,Preds)]}, HT); + none -> + gb_trees:insert(ToL, {[],[],[PredL]}, HT) + end. + +%% find_highest_label(CFG) -> +%% Labels = labels(CFG), +%% lists:foldl(fun(X, Acc) -> erlang:max(X, Acc) end, 0, Labels). +%% +%% find_highest_var(CFG) -> +%% Labels = labels(CFG), +%% Fun = fun(X, Max) -> +%% Code = hipe_bb:code(bb(CFG, X)), +%% NewMax = highest_var(Code), +%% erlang:max(Max, NewMax) +%% end, +%% lists:foldl(Fun, 0, Labels). + +-ifdef(CFG_CAN_HAVE_PHI_NODES). +%% phi-instructions in a removed block's successors must be aware of +%% the change. +remove_pred_from_phis(List = [I|Left], Label, Acc) -> + case is_phi(I) of + true -> + NewAcc = [phi_remove_pred(I, Label)|Acc], + remove_pred_from_phis(Left, Label, NewAcc); + false -> + lists:reverse(Acc) ++ List + end; +remove_pred_from_phis([], _Label, Acc) -> + lists:reverse(Acc). +-else. +%% this is used for code representations like those of back-ends which +%% do not have phi-nodes. +remove_pred_from_phis(Code, _Label, _Acc) -> + Code. +-endif. + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% Constructs a CFG from a list of instructions. +%% + +take_bbs([], CFG) -> + CFG; +take_bbs(Xs, CFG) -> + Lbl = hd(Xs), + case is_label(Lbl) of + true -> + case take_bb(tl(Xs), []) of + {Code, Rest} -> + NewCFG = bb_add(CFG, label_name(Lbl), hipe_bb:mk_bb(Code)), + take_bbs(Rest, NewCFG) + end; + false -> + erlang:error({?MODULE,"basic block doesn't start with a label",Xs}) + end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% Take_bb returns: +%% - {Code, Rest}. +%% * Code is a list of all the instructions. +%% * Rest is the remainder of the instructions + +take_bb([], Code) -> + {lists:reverse(Code), []}; +take_bb([X, Y|Xs], Code) -> + case is_label(X) of + true -> %% Empty block fallthrough + {[mk_goto(label_name(X))], [X,Y|Xs]}; + false -> + case is_branch(X) of + true -> + case is_label(Y) of + true -> + {lists:reverse([X|Code]), [Y|Xs]}; + false -> + %% This should not happen... + %% move the problem to the next BB. + {lists:reverse([X|Code]), [Y|Xs]} + end; + false -> %% X not branch + case is_label(Y) of + true -> + {lists:reverse([mk_goto(label_name(Y)),X|Code]), [Y|Xs]}; + false -> + take_bb([Y|Xs], [X|Code]) + end + end + end; +take_bb([X], []) -> + case is_label(X) of + true -> + %% We don't want the CFG to just end with a label... + %% We loop forever instead... + {[X,mk_goto(label_name(X))],[]}; + false -> + {[X],[]} + end; +take_bb([X], Code) -> + case is_label(X) of + true -> + %% We don't want the CFG to just end with a label... + %% We loop for ever instead... + {lists:reverse(Code),[X,mk_goto(label_name(X))]}; + false -> + {lists:reverse([X|Code]),[]} + end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% Functions for extracting the names of the basic blocks in various +%% orders. +%% + +labels(CFG) -> + HT = CFG#cfg.table, + gb_trees:keys(HT). + +postorder(CFG) -> + lists:reverse(reverse_postorder(CFG)). + +reverse_postorder(CFG) -> + Start = start_label(CFG), + {Ordering, _Visited} = + depth_search([Start|other_entrypoints(CFG)], none_visited(), CFG, []), + Ordering. + +depth_search([N|Ns], Visited, CFG, Acc) -> + case is_visited(N, Visited) of + true -> + depth_search(Ns, Visited, CFG, Acc); + false -> + {Order, Vis} = depth_search(succ(CFG, N), visit(N, Visited), CFG, Acc), + depth_search(Ns, Vis, CFG, [N|Order]) + end; +depth_search([], Visited, _, Ordering) -> + {Ordering, Visited}. + +-ifdef(PREORDER). +preorder(CFG) -> + Start = start_label(CFG), + {Ordering, _Visited} = + preorder_search([Start|other_entrypoints(CFG)], none_visited(), CFG, []), + lists:reverse(Ordering). + +preorder_search([N|Ns], Visited, CFG, Acc) -> + case is_visited(N, Visited) of + true -> + preorder_search(Ns, Visited, CFG, Acc); + false -> + {Order, Vis} = + preorder_search(succ(CFG, N), visit(N, Visited), CFG, [N|Acc]), + preorder_search(Ns, Vis, CFG, Order) + end; +preorder_search([], Visited, _, Ordering) -> + {Ordering,Visited}. +-endif. % PREORDER + +-ifdef(BREADTH_ORDER). +breadthorder(CFG) -> + lists:reverse(reverse_breadthorder(CFG)). + +reverse_breadthorder(CFG) -> + Start = start_label(CFG), + {Vis, RBO1} = breadth_list([Start], none_visited(), CFG, []), + {_Vis1, RBO2} = breadth_list(other_entrypoints(CFG), Vis, CFG, RBO1), + RBO2. + +breadth_list([X|Xs], Vis, CFG, BO) -> + case is_visited(X, Vis) of + true -> + breadth_list(Xs, Vis, CFG, BO); + false -> + breadth_list(Xs ++ succ(CFG, X), visit(X, Vis), CFG, [X|BO]) + end; +breadth_list([], Vis, _CFG, BO) -> + {Vis, BO}. +-endif. + +-spec none_visited() -> gb_set(). +none_visited() -> + gb_sets:empty(). + +visit(X, Vis) -> + gb_sets:add(X, Vis). + +is_visited(X, Vis) -> + gb_sets:is_member(X, Vis). + +-endif. % GEN_CFG + +%%--------------------------------------------------------------------- + +succ(SuccMap, Label) -> + HT = SuccMap#cfg.table, + case gb_trees:lookup(Label, HT) of + {value, {_Block,Succ,_Pred}} -> + Succ; + none -> + erlang:error({"successor not found", Label, SuccMap}) + end. + +-ifdef(PRED_NEEDED). +pred(Map, Label) -> + HT = Map#cfg.table, + case gb_trees:lookup(Label, HT) of + {value, {_Block,_Succ,Pred}} -> + Pred; + none -> + erlang:error({"predecessor not found", Label, Map}) + end. +-endif. % PRED_NEEDED + +-ifndef(GEN_CFG). +fallthrough(CFG, Label) -> + HT = CFG#cfg.table, + case gb_trees:lookup(Label, HT) of + {value, {_Block, Succ, _}} -> + case Succ of + [X|_] -> X; + _ -> none + end; + none -> + erlang:error({"fallthrough label not found", Label}) + end. + +conditional(CFG, Label) -> + HT = CFG#cfg.table, + {value,{_Block,Succ,_}} = gb_trees:lookup(Label, HT), + case Succ of + [] -> none; + [_] -> none; + [_|Labels] -> Labels + end. +-endif. % GEN_CFG + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% Linearize the code in a CFG. Returns a list of instructions. +%% + +-ifdef(GEN_CFG). +-else. +linearize_cfg(CFG) -> + Start = start_label(CFG), + Vis = none_visited(), + {Vis0, NestedCode} = lin_succ(Start, CFG, Vis), + BlocksInData = hipe_consttab:referred_labels(data(CFG)), + AllCode = lin_other_entries(NestedCode, CFG, BlocksInData, Vis0), + lists:flatten(AllCode). + +lin_succ(none, _CFG, Vis) -> + {Vis, []}; +lin_succ([Label|Labels], CFG, Vis) -> + {Vis1, Code1} = lin_succ(Label, CFG, Vis), + {Vis2, Code2} = lin_succ(Labels, CFG, Vis1), + {Vis2, [Code1,Code2]}; +lin_succ([], _CFG, Vis) -> + {Vis, []}; +lin_succ(Label, CFG, Vis) -> + case is_visited(Label, Vis) of + true -> + {Vis, []}; % already visited + false -> + Vis0 = visit(Label, Vis), + case bb(CFG, Label) of + not_found -> + erlang:error({?MODULE, "No basic block with label", Label}); + BB -> + Fallthrough = fallthrough(CFG, Label), + Cond = conditional(CFG, Label), + LblInstr = mk_label(Label), + {Vis1, Code1} = lin_succ(Fallthrough, CFG, Vis0), + {Vis2, Code2} = lin_succ(Cond, CFG, Vis1), + {Vis2, [[LblInstr|hipe_bb:code(BB)], Code1, Code2]} + end + end. + +lin_other_entries(Code, _CFG, [], _Vis) -> + Code; +lin_other_entries(Code, CFG, [E|Es], Vis) -> + {Vis0, MoreCode} = lin_succ(E, CFG, Vis), + lin_other_entries([Code, MoreCode], CFG, Es, Vis0). +-endif. + +-ifdef(FIND_NEW_LABEL_NEEDED). +find_new_label(Old, Map) -> + forward(Old, Map). +-endif. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% Remove empty BBs. +%% +%% Removes basic blocks containing only a goto to another BB. +%% Branches to removed blocks are updated to the successor of the +%% removed block. +%% Loads (or other operations) on the label of the BB are also +%% updated. So are any references from the data section. +%% + +-ifdef(REMOVE_TRIVIAL_BBS_NEEDED). + +-spec remove_trivial_bbs(cfg()) -> cfg(). +remove_trivial_bbs(CFG) -> + ?opt_start_timer("Merge BBs"), + CFG0 = merge_bbs(rewrite_trivial_branches(CFG)), + ?opt_stop_timer("Merge BBs"), + %% pp(CFG0), + ?opt_start_timer("FindDead"), + {NewMap, CFG1} = remap(labels(CFG0), rd_map_new(), CFG0), + ?opt_stop_timer("FindDead"), + ?opt_start_timer("Labels"), + Labels = labels(CFG1), + ?opt_stop_timer("Labels"), + ?opt_start_timer("RedirectBranches"), + CFG2 = redirect_branches(NewMap, CFG1), + ?opt_stop_timer("RedirectBranches"), + ?opt_start_timer("RedirectOps"), + CFG3 = redirect_ops(Labels, CFG2, NewMap), + ?opt_stop_timer("RedirectOps"), + ?opt_start_timer("RedirectData"), + CFG4 = redirect_data(CFG3, NewMap), + ?opt_stop_timer("RedirectData"), + ?opt_start_timer("RedirectStart"), + CFG5 = redirect_start(CFG4, NewMap), + ?opt_stop_timer("RedirectStart"), + %% pp(CFG5), + CFG5. + +redirect_start(CFG, Map) -> + Start = start_label(CFG), + case forward(Start, Map) of + Start -> CFG; + NewStart -> + start_label_update(CFG, NewStart) + end. + +redirect_data(CFG, Map) -> + Data = data(CFG), + NewData = hipe_consttab:update_referred_labels(Data, rd_succs(Map)), + update_data(CFG, NewData). + +redirect_branches(Map, CFG) -> + lists:foldl(fun ({From,{newsuccs,Redirects}}, CFGAcc) -> + lists:foldl( + fun({ToOld,ToNew}, CFG1) -> + case bb(CFG1, From) of + not_found -> + CFG1; + _ -> + To = forward(ToNew, Map), + redirect(CFG1, From, ToOld, To) + end + end, + CFGAcc, + Redirects); + (_, CFGAcc) -> CFGAcc + end, + CFG, + gb_trees:to_list(Map)). + +redirect(CFG, From, ToOld, ToNew) -> + BB = bb(CFG, From), + LastInstr = hipe_bb:last(BB), + NewLastInstr = redirect_jmp(LastInstr, ToOld, ToNew), + NewBB = hipe_bb:mk_bb(hipe_bb:butlast(BB) ++ [NewLastInstr]), + bb_add(CFG, From, NewBB). + +bb_remove(CFG, Label) -> + HT = CFG#cfg.table, + case gb_trees:lookup(Label, HT) of + {value, {_Block, Succ, _Preds}} -> + %% Remove this block as a pred from all successors. + HT1 = lists:foldl(fun (S,HTAcc) -> + remove_pred(HTAcc, S, Label) + end, + HT, Succ), + CFG#cfg{table = gb_trees:delete(Label, HT1)}; + none -> + CFG + end. + +remap([L|Rest], Map, CFG) -> + case is_empty(bb(CFG, L)) of + true -> + case succ(CFG, L) of + [L] -> %% This is an empty (infinite) self loop. Leave it. + remap(Rest, Map, CFG); + [SuccL] -> + CFG1 = bb_remove(CFG, L), + NewMap = remap_to_succ(L, SuccL, Map, CFG), + remap(Rest, NewMap, CFG1) + end; + false -> + remap(Rest, Map, CFG) + end; +remap([], Map, CFG) -> + {Map, CFG}. + +remap_to_succ(L, SuccL, Map, PredMap) -> + insert_remap(L, forward(SuccL,Map), pred(PredMap,L), Map). + +%% Find the proxy for a BB +forward(L, Map) -> + case gb_trees:lookup(L, Map) of + {value, {dead, To}} -> + forward(To, Map); %% Hope this terminates. + _ -> L + end. + +%% A redirection map contains mappings from labels to +%% none -> this BB is not affected by the remapping. +%% {dead,To} -> this BB is dead, To is the new proxy. +%% {newsuccs,[{X,Y}|...]} -> The successor X is redirected to Y. + +rd_map_new() -> gb_trees:empty(). + +rd_succs(M) -> + lists:foldl(fun ({From,{dead,To}}, Acc) -> [{From,forward(To,M)}|Acc]; + (_, Acc) -> Acc + end, + [], + gb_trees:to_list(M)). + +add_redirectedto(L, From, To, Map) -> + case gb_trees:lookup(L, Map) of + {value, {newsuccs, NS}} -> + gb_trees:update(L,{newsuccs,[{From,To}|lists:keydelete(From,1,NS)]},Map); + {value, {dead, _}} -> Map; + none -> + gb_trees:insert(L, {newsuccs, [{From, To}]}, Map) + end. + +insert_remap(L, ToL, Preds, Map) -> + Map2 = gb_trees:enter(L, {dead, ToL}, Map), + lists:foldl(fun (Pred, AccMap) -> + add_redirectedto(Pred, L, ToL, AccMap) + end, + Map2, + Preds). + +is_empty(BB) -> + is_empty_bb(hipe_bb:code(BB)). + +is_empty_bb([I]) -> + is_goto(I); %% A BB with just a 'goto' is empty. +is_empty_bb([I|Is]) -> + case is_comment(I) of + true -> + is_empty_bb(Is); + false -> + false + end; +is_empty_bb([]) -> + true. + + +%% Rewrite all pure branches with one successor to goto:s + +-spec rewrite_trivial_branches(cfg()) -> cfg(). +rewrite_trivial_branches(CFG) -> + rewrite_trivial_branches(postorder(CFG), CFG). + +rewrite_trivial_branches([L|Left], CFG) -> + BB = bb(CFG, L), + Last = hipe_bb:last(BB), + case is_goto(Last) of + true -> + rewrite_trivial_branches(Left, CFG); + false -> + case is_pure_branch(Last) of + false -> + rewrite_trivial_branches(Left, CFG); + true -> + case succ(CFG, L) of + [Successor] -> + Head = hipe_bb:butlast(BB), + NewBB = hipe_bb:mk_bb(Head ++ [mk_goto(Successor)]), + NewCFG = bb_add(CFG, L, NewBB), + rewrite_trivial_branches(Left, NewCFG); + _ -> + rewrite_trivial_branches(Left, CFG) + end + end + end; +rewrite_trivial_branches([], CFG) -> + CFG. + + +%% Go through the CFG and find pairs of BBs that can be merged to one BB. +%% They are of the form: +%% +%% L +%% | +%% Successor +%% +%% That is, the block L has only one successor (Successor) and that +%% successor has no other predecessors than L. +%% +%% Note: calls might end a basic block + +merge_bbs(CFG) -> + lists:foldl(fun merge_successor/2, CFG, postorder(CFG)). + +%% If L fulfills the requirements, merge it with its successor. +merge_successor(L, CFG) -> + %% Get the BB L (If it still exists). + case bb(CFG, L) of + not_found -> CFG; + BB -> + StartLabel = start_label(CFG), + Last = hipe_bb:last(BB), + %% Note: Cannot use succ/2 since the instruction can have more than + %% one successor that are the same label. + case {branch_successors(Last), fails_to(Last)} of + {[Successor],[Successor]} -> + %% The single successor is the fail-label; don't merge. + CFG; + {[Successor],_} when Successor =/= StartLabel -> + %% Make sure the succesor only have this block as predecessor. + case [L] =:= pred(CFG, Successor) of + true -> + %% Remove the goto or remap fall-through in BB and merge the BBs + NewCode = merge(BB, bb(CFG, Successor), Successor), + NewBB = hipe_bb:mk_bb(NewCode), + bb_add(bb_remove(CFG, Successor), L, NewBB); + false -> + CFG + end; + _ -> + %% Not exactly one successor or tried to merge with the + %% entry point + CFG + end + end. + +%% Merge BB and BB2 +merge(BB, BB2, BB2_Label) -> + Head = hipe_bb:butlast(BB), + Last = hipe_bb:last(BB), + Tail = hipe_bb:code(BB2), + case is_goto(Last) of + true -> + %% Just ignore the goto. + Head ++ Tail; + false -> + %% The last instr is not a goto, + %% e.g. a call with only fall-through + %% Remove the fall-through with the []-label. + Head ++ [redirect_jmp(Last, BB2_Label, [])|Tail] + end. + +-endif. % REMOVE_TRIVIAL_BBS_NEEDED + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% Remove unreachable BBs. +%% +%% A BB is unreachable if it cannot be reached by any path from the +%% start label of the function. +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +-ifdef(REMOVE_UNREACHABLE_CODE). + +-spec remove_unreachable_code(cfg()) -> cfg(). + +remove_unreachable_code(CFG) -> + Start = start_label(CFG), + Reachable = find_reachable([Start], CFG, gb_sets:from_list([Start])), + %% Reachable is an ordset: it comes from gb_sets:to_list/1. + %% So use ordset:subtract instead of '--' below. + Labels = ordsets:from_list(labels(CFG)), + case ordsets:subtract(Labels, Reachable) of + [] -> + CFG; + Remove -> + NewCFG = lists:foldl(fun(X, Acc) -> bb_remove(Acc, X) end, CFG, Remove), + remove_unreachable_code(NewCFG) + end. + +find_reachable([Label|Left], CFG, Acc) -> + NewAcc = gb_sets:add(Label, Acc), + Succ = succ(CFG, Label), + find_reachable([X || X <- Succ, not gb_sets:is_member(X, Acc)] ++ Left, + CFG, NewAcc); +find_reachable([], _CFG, Acc) -> + gb_sets:to_list(Acc). + +-endif. diff --git a/lib/hipe/flow/ebb.inc b/lib/hipe/flow/ebb.inc new file mode 100644 index 0000000000..42d7ff3793 --- /dev/null +++ b/lib/hipe/flow/ebb.inc @@ -0,0 +1,247 @@ +%% -*- Erlang -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2001-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% IDENTIFIES THE EXTENDED BASIC BLOCKS OF A CFG +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +-export([cfg/1, + %% dag/2, + type/1, + node_label/1, + node_successors/1 + ]). +-ifdef(DEBUG_EBB). +-export([pp/1]). +-endif. + +-define(cfg, ?CFG). + +%%-------------------------------------------------------------------- +%% The extended basic block datatype +%% +%% An EBB is identified with the label of the root node. +%% It's a tree +%% +%% EBB := {ebb_node, Label, [EBB]} +%% | {ebb_leaf, SuccesorLabel} +%%-------------------------------------------------------------------- + +%% XXX: Cheating big time! no recursive types +-type ebb() :: {ebb_node, icode_lbl(), _} + | {ebb_leaf, icode_lbl()}. + +-record(ebb_node, {label :: icode_lbl(), successors :: [ebb()]}). +-record(ebb_leaf, {successor :: icode_lbl()}). + +%%-------------------------------------------------------------------- +%% Returns a list of extended basic blocks. +%%-------------------------------------------------------------------- + +-spec cfg(cfg()) -> [ebb()]. + +cfg(CFG) -> + Start = ?cfg:start_label(CFG), + Labels = ?cfg:reverse_postorder(CFG), + Roots = [Start], + Blocks = Labels -- Roots, + Visited = new_visited(), + build_all_ebb(Roots, Blocks, Visited, CFG). + +new_visited() -> + gb_sets:empty(). +visited(L, Visited) -> + gb_sets:is_member(L, Visited). +visit(L, Visited) -> + gb_sets:add(L, Visited). + +build_all_ebb(Roots, Blocks, Visited, CFG) -> + build_all_ebb(Roots, Blocks, Visited, CFG, []). + +build_all_ebb([], [], _, _CFG, Ebbs) -> + lists:reverse(Ebbs); +build_all_ebb([], [BlockLeft|BlocksLeft], Visited, CFG, Ebbs) -> + case visited(BlockLeft, Visited) of + true -> + build_all_ebb([], BlocksLeft, Visited, CFG, Ebbs); + false -> + build_all_ebb([BlockLeft], BlocksLeft, Visited, CFG, Ebbs) + end; +build_all_ebb([Root|Roots], Blocks, Visited, CFG, Ebbs) -> + {Ebb, NewVisited} = build_ebb(Root, Visited, CFG), + build_all_ebb(Roots, Blocks, NewVisited, CFG, [Ebb|Ebbs]). + +%% +%% Build the extended basic block with Lbl as its root. +%% + +build_ebb(Lbl, Visited, CFG) -> + build_ebb(Lbl, Visited, + fun (NodeL, NewVisited) -> {NodeL, NewVisited} end, + [], CFG). + +build_ebb(Lbl, Visited, MkFun, EBBs, CFG) -> + Succ = ?cfg:succ(CFG, Lbl), + add_succ(Succ, visit(Lbl, Visited), Lbl, MkFun, EBBs, CFG). + +add_succ([], Visited, Node, MkFun, EBBs, _CFG) -> + MkFun(mk_node(Node, lists:reverse(EBBs)), Visited); +add_succ([Lbl|Lbls], Visited, Node, MkFun, EBBs, CFG) -> + case [visited(Lbl, Visited)|?cfg:pred(CFG, Lbl)] of + [false,_] -> + build_ebb(Lbl, Visited, + fun (NewEbb, Visited0) -> + add_succ(Lbls, Visited0, Node, MkFun, [NewEbb|EBBs], CFG) + end, [], CFG); + _ -> + add_succ(Lbls, Visited, Node, MkFun, [mk_leaf(Lbl)|EBBs], CFG) + end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% Generate a list of dags. +%% + +%% dag(EBBs, CFG) -> +%% Start = ?cfg:start_label(CFG), +%% Roots = [Start], +%% Edges = all_adges(EBBs, Roots), +%% start_dag(Roots, Edges, []). +%% +%% start_dag([], _Edges, _Visit) -> +%% []; +%% start_dag([Root|Roots], Edges, Visit) -> +%% case lists:member(Root, Visit) of +%% true -> +%% start_dag(Roots, Edges, Visit); +%% false -> +%% {Dag, Roots0, Visit0} = +%% fill_dag(Root, [Root], Edges, Roots, [Root|Visit]), +%% [lists:reverse(Dag) | start_dag(Roots0, Edges, Visit0)] +%% end. +%% +%% fill_dag(Lbl, Dag, Edges, Roots, Visit) -> +%% Succ = find_succ(Lbl, Edges), +%% add_dag_succ(Succ, Dag, Edges, Roots, Visit). +%% +%% add_dag_succ([], Dag, _Edges, Roots, Visit) -> +%% {Dag, Roots, Visit}; +%% add_dag_succ([S|Ss], Dag, Edges, Roots, Visit) -> +%% {Dag0, Roots0, Visit0} = add_dag_succ(Ss, Dag, Edges, Roots, Visit), +%% Pred = find_pred(S, Edges), +%% case all_in(Pred, Dag0) of +%% true -> +%% fill_dag(S, [S|Dag0], Edges, Roots0, [S|Visit0]); +%% false -> +%% {Dag0, [S|Roots], Visit0} +%% end. +%% +%% find_succ(_Lbl, []) -> +%% []; +%% find_succ(Lbl, [{Lbl, Succ}|Edges]) -> +%% [Succ | find_succ(Lbl, Edges)]; +%% find_succ(Lbl, [_|Edges]) -> +%% find_succ(Lbl, Edges). +%% +%% find_pred(_Lbl, []) -> +%% []; +%% find_pred(Lbl, [{Pred, Lbl}|Edges]) -> +%% [Pred | find_pred(Lbl, Edges)]; +%% find_pred(Lbl, [_|Edges]) -> +%% find_pred(Lbl, Edges). +%% +%% all_edges([], _Roots) -> +%% []; +%% all_edges([EBB|EBBs], Roots) -> +%% succ_edges(node_label(EBB), ebb_successors(EBB), EBBs, Roots). +%% +%% succ_edges(Lbl, [], EBBs, Roots) -> +%% case lists:member(Lbl, Roots) of +%% true -> +%% [{start, Lbl} | all_edges(EBBs, Roots)]; +%% false -> +%% all_edges(EBBs, Roots) +%% end; +%% succ_edges(Lbl, [S|Ss], EBBs, Roots) -> +%% [{Lbl, S} | succ_edges(Lbl, Ss, EBBs, Roots)]. +%% +%% all_in([], _List) -> +%% true; +%% all_in([X|Xs], List) -> +%% lists:member(X, List) andalso all_in(Xs, List). +%% +%% find_ebb(Lbl, [EBB|EBBs]) -> +%% case node_label(EBB) of +%% Lbl -> +%% EBB; +%% _ -> +%% find_ebb(Lbl, EBBs) +%% end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +-spec mk_node(icode_lbl(), [ebb()]) -> #ebb_node{}. +mk_node(Label, Successors) -> #ebb_node{label=Label, successors=Successors}. + +-spec node_label(#ebb_node{}) -> icode_lbl(). +node_label(#ebb_node{label=Label}) -> Label. + +-spec node_successors(#ebb_node{}) -> [ebb()]. +node_successors(#ebb_node{successors=Successors}) -> Successors. + +-spec mk_leaf(icode_lbl()) -> #ebb_leaf{}. +mk_leaf(NextEbb) -> #ebb_leaf{successor=NextEbb}. +%% leaf_next(Leaf) -> Leaf#ebb_leaf.successor. + +-spec type(#ebb_node{}) -> 'node' ; (#ebb_leaf{}) -> 'leaf'. +type(#ebb_node{}) -> node; +type(#ebb_leaf{}) -> leaf. + +%% ebb_successors(EBB) -> +%% ordsets:from_list(ebb_successors0(EBB)). +%% +%% ebb_successors0(#ebb_leaf{successor=NextEBB}) -> +%% [NextEBB]; +%% ebb_successors0(#ebb_node{successors=SuccessorNodes}) -> +%% lists:append(lists:map(fun ebb_successors0/1, SuccessorNodes)). + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% Prettyprint a list of extended basic blocks +%% + +-ifdef(DEBUG_EBB). + +pp(EBBs) -> + lists:map(fun(E) -> pp(E, 0) end, EBBs). + +pp(EBB, Indent) -> + io:format([$~]++integer_to_list(Indent)++[$c],[$ ]), + case type(EBB) of + node -> + io:format("~w~n", [node_label(EBB)]), + lists:map(fun(E) -> pp(E, Indent+3) end, node_successors(EBB)); + leaf -> + io:format("* -> ~w~n", [leaf_next(EBB)]) + end. + +-endif. diff --git a/lib/hipe/flow/hipe_bb.erl b/lib/hipe/flow/hipe_bb.erl new file mode 100644 index 0000000000..16730f1dce --- /dev/null +++ b/lib/hipe/flow/hipe_bb.erl @@ -0,0 +1,81 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2001-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% Basic Block Module +%% +%% Exports: +%% ~~~~~~~~ +%% mk_bb(Code) - construct a basic block. +%% code(BB) - returns the code. +%% code_update(BB, NewCode) - replace the code in a basic block. +%% last(BB) - returns the last instruction. +%% butlast(BB) - returns the code with the last instruction removed. +%% + +-module(hipe_bb). + +-export([mk_bb/1, + code/1, + code_update/2, + is_bb/1, + last/1, + butlast/1]). + +-include("hipe_bb.hrl"). + +%% +%% Constructs a basic block. +%% Returns a basic block: {bb, Code} +%% * Code is a list of instructions + +-spec mk_bb([_]) -> bb(). + +mk_bb(Code) -> + #bb{code=Code}. + +-spec is_bb(_) -> boolean(). + +is_bb(#bb{}) -> true; +is_bb(_) -> false. + +-spec code_update(bb(), [_]) -> bb(). + +code_update(BB, Code) -> + BB#bb{code = Code}. + +-spec code(bb()) -> [_]. + +code(#bb{code = Code}) -> + Code. + +-spec last(bb()) -> _. + +last(#bb{code = Code}) -> + lists:last(Code). + +-spec butlast(bb()) -> [_]. + +butlast(#bb{code = Code}) -> + butlast_1(Code). + +butlast_1([X|Xs]) -> butlast_1(Xs,X). + +butlast_1([X|Xs],Y) -> [Y|butlast_1(Xs,X)]; +butlast_1([],_) -> []. diff --git a/lib/hipe/flow/hipe_bb.hrl b/lib/hipe/flow/hipe_bb.hrl new file mode 100644 index 0000000000..f4d426dad1 --- /dev/null +++ b/lib/hipe/flow/hipe_bb.hrl @@ -0,0 +1,30 @@ +%%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2007-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%%%------------------------------------------------------------------- +%%% File : bb.hrl +%%% Author : Per Gustafsson +%%% Description : Typed record declaration for basic blocks +%%% +%%% Created : 20 Dec 2007 by Per Gustafsson +%%%------------------------------------------------------------------- + +-record(bb, {code=[] :: [_]}). + +-type bb() :: #bb{}. diff --git a/lib/hipe/flow/hipe_dominators.erl b/lib/hipe/flow/hipe_dominators.erl new file mode 100644 index 0000000000..3bfa6d43c4 --- /dev/null +++ b/lib/hipe/flow/hipe_dominators.erl @@ -0,0 +1,715 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2004-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%%------------------------------------------------------------------------ +%% File : hipe_dominators.erl +%% Author : Christoffer Vikström +%% Daniel Deogun +%% Jesper Bengtsson +%% Created : 18 Mar 2002 +%% +%% @doc +%% Contains utilities for creating and manipulating dominator trees +%% and dominance frontiers from a CFG. +%% @end +%%------------------------------------------------------------------------ +-module(hipe_dominators). + +-export([domTree_create/1, + domTree_getChildren/2, + domTree_dominates/3, + domFrontier_create/2, + domFrontier_get/2]). + +-include("cfg.hrl"). + +%%======================================================================== +%% +%% CODE FOR CREATING AND MANIPULATING DOMINATOR TREES. +%% +%%======================================================================== + +-record(workDataCell, {dfnum = 0 :: non_neg_integer(), + dfparent = none :: 'none' | cfg_lbl(), + semi = none :: 'none' | cfg_lbl(), + ancestor = none :: 'none' | cfg_lbl(), + best = none :: 'none' | cfg_lbl(), + samedom = none :: 'none' | cfg_lbl(), + bucket = [] :: [cfg_lbl()]}). + +-record(domTree, {root :: cfg_lbl(), + size = 0 :: non_neg_integer(), + nodes = gb_trees:empty() :: gb_tree()}). +-type domTree() :: #domTree{}. + +%%>----------------------------------------------------------------------< +%% Procedure : domTree_create/1 +%% Purpose : Creates a complete dominator tree given a CFG. +%% Arguments : CFG - a Control Flow Graph representation +%% Returns : A dominator tree +%%>----------------------------------------------------------------------< + +-spec domTree_create(cfg()) -> domTree(). + +domTree_create(CFG) -> + {WorkData, DFS, N} = dfs(CFG), + DomTree = domTree_empty(hipe_gen_cfg:start_label(CFG)), + {DomData, WorkData2} = getIdoms(CFG, DomTree, WorkData, N, DFS), + finalize(WorkData2, DomData, 1, N, DFS). + +%%>----------------------------------------------------------------------< +%% Procedure : domTree_empty/0 +%% Purpose : Creates an empty dominator tree. +%% Arguments : The root node +%% Returns : A dominator tree +%%>----------------------------------------------------------------------< + +domTree_empty(Node) -> + #domTree{root = Node}. + +%%>----------------------------------------------------------------------< +%% Procedure : domTree_createNode/2 +%% Purpose : Creates a new node and inserts it into the dominator tree. +%% Arguments : Node - The new node +%% DomTree - The target dominator tree +%% Returns : A dominator tree +%%>----------------------------------------------------------------------< + +domTree_createNode(Node, DomTree) -> + DomTree2 = domTree_setNodes(DomTree, + gb_trees:enter(Node, {none,[]}, + domTree_getNodes(DomTree))), + domTree_incSize(DomTree2). + +%%>----------------------------------------------------------------------< +%% Procedure : domTree_getNode/2 +%% Purpose : Returns a specific node in the dominator tree. +%% Arguments : Node - The new node +%% DomTree - The target dominator tree +%% Returns : Node +%%>----------------------------------------------------------------------< + +domTree_getNode(Node, DomTree) -> + gb_trees:lookup(Node, domTree_getNodes(DomTree)). + +%%>----------------------------------------------------------------------< +%% Procedure : domTree_getNodes/1 +%% Purpose : Retrieves the nodes from a dominator tree. +%% Arguments : DomTree - The target dominator tree +%% Returns : A map containing the nodes of the dominator tree. +%%>----------------------------------------------------------------------< + +domTree_getNodes(#domTree{nodes=Nodes}) -> Nodes. + +%%>----------------------------------------------------------------------< +%% Procedure : domTree_setNodes/2 +%% Purpose : Replaces the set of nodes in a dominator tree with a +%% new set of nodes. +%% Arguments : Nodes - The new set of nodes +%% DomTree - The target dominator tree +%% Returns : DomTree +%%>----------------------------------------------------------------------< + +domTree_setNodes(DomTree, Nodes) -> DomTree#domTree{nodes = Nodes}. + +%%>----------------------------------------------------------------------< +%% Procedure : domTree_setSize/2 +%% Purpose : Sets the size of the dominator tree, i.e. the number of +%% nodes in it. +%% Arguments : Size - The new size of the target dominator tree +%% DomTree - The target dominator tree +%% Returns : A dominator tree +%%>----------------------------------------------------------------------< + +domTree_setSize(DomTree, Size) -> DomTree#domTree{size = Size}. + +%%>----------------------------------------------------------------------< +%% Procedure : domTree_incSize/1 +%% Purpose : Increases the size of the dominator tree with one. +%% Arguments : DomTree - The target dominator tree +%% Returns : DomTree +%%>----------------------------------------------------------------------< + +domTree_incSize(DomTree) -> + Size = domTree_getSize(DomTree), + domTree_setSize(DomTree, Size + 1). + +%%>----------------------------------------------------------------------< +%% Procedure : get IDom/2 +%% Purpose : Retrieves the immediate dominators of a node in the +%% dominator tree. +%% Arguments : Node - The new node +%% DomTree - The target dominator tree +%% Returns : The immediate dominator +%%>----------------------------------------------------------------------< + +domTree_getIDom(Node, DomTree) -> + case domTree_getNode(Node, DomTree) of + {value, {IDom, _}} -> + IDom; + none -> + [] + end. + +%%>----------------------------------------------------------------------< +%% Procedure : getChildren/2 +%% Purpose : Retrieves the children of a node in the dominator tree. +%% Arguments : Node - The new node +%% DomTree - The target dominator tree +%% Returns : [children] +%%>----------------------------------------------------------------------< + +-spec domTree_getChildren(cfg_lbl(), domTree()) -> [cfg_lbl()]. + +domTree_getChildren(Node, DomTree) -> + case domTree_getNode(Node, DomTree) of + {value, {_, Children}} -> + Children; + none -> + [] + end. + +%%>----------------------------------------------------------------------< +%% Procedure : domTree_getSize/1 +%% Purpose : Retrieves the size of a dominator tree. +%% Arguments : DomTree - The target dominator tree +%% Returns : A number denoting the size of the dominator tree +%%>----------------------------------------------------------------------< + +domTree_getSize(#domTree{size=Size}) -> Size. + +%%>----------------------------------------------------------------------< +%% Procedure : domTree_getRoot/2 +%% Purpose : Retrieves the number of the root node in the dominator tree. +%% Arguments : DomTree - The target dominator tree +%% Returns : Number +%%>----------------------------------------------------------------------< + +domTree_getRoot(#domTree{root=Root}) -> Root. + +%%>----------------------------------------------------------------------< +%% Procedure : domTree_addChild/3 +%% Purpose : Inserts a new node as a child to another node in the +%% dominator tree. +%% Arguments : Node - The old node that should get a new child +%% Child - The new child node +%% DomTree - The target dominator tree +%% Returns : DomTree +%%>----------------------------------------------------------------------< + +domTree_addChild(Node, Child, DomTree) -> + {IDom, Children} = case domTree_getNode(Node, DomTree) of + {value, Tuple} -> + Tuple; + none -> + {none, []} + end, + Nodes = case lists:member(Child, Children) of + true -> + domTree_getNodes(DomTree); + false -> + gb_trees:enter(Node, {IDom, [Child|Children]}, + domTree_getNodes(DomTree)) + end, + domTree_setNodes(DomTree, Nodes). + +%%>----------------------------------------------------------------------< +%% Procedure : setIDom/3 +%% Purpose : Sets the immediate domminator of a node in the domminator tree. +%% Arguments : Node - The node whose immediate domminator we are seting +%% IDom - The immediate domminator +%% DomTree - The target dominator tree +%% Returns : DomTree +%% Notes : Is used to build the dominator tree. +%%>----------------------------------------------------------------------< + +setIDom(Node, IDom, DomTree) -> + DomTree1 = case domTree_getNode(Node, DomTree) of + none -> + domTree_createNode(Node, DomTree); + _ -> + DomTree + end, + DomTree2 = domTree_addChild(IDom, Node, DomTree1), + {value, {_, Children}} = domTree_getNode(Node, DomTree2), + domTree_setNodes(DomTree2, + gb_trees:enter(Node, {IDom, Children}, + domTree_getNodes(DomTree2))). + +%%>----------------------------------------------------------------------< +%% Procedure : lookup +%% Purpose : This function is used as a wrapper for the lookup function. +%% The function retrieves a particular element (defined by +%% Field) stored in a workDataCell in the table (defined by +%% Table). +%% Arguments : Field - Value defined in the workDataCell record +%% Key - Value used as a key in the table +%% Table - Table storing workDataCells +%% Returns : A value defined in the workDataCell record +%%>----------------------------------------------------------------------< + +lookup({Field, Key}, Table) when is_integer(Key) -> + WD = lookup_table(Key, Table), + case Field of + ancestor -> WD#workDataCell.ancestor; + best -> WD#workDataCell.best; + bucket -> WD#workDataCell.bucket; + dfnum -> WD#workDataCell.dfnum; + dfparent -> WD#workDataCell.dfparent; + samedom -> WD#workDataCell.samedom; + semi -> WD#workDataCell.semi + end. + +lookup_table(Key, Table) when is_integer(Key) -> + case gb_trees:lookup(Key, Table) of + {value, Data} -> + Data; + none -> + #workDataCell{} + end. + +%%>----------------------------------------------------------------------< +%% Procedure : update +%% Purpose : This function is used as a wrapper for the update function +%% The main purpose of the update function is therefore +%% change a particular cell in the table (Table) to the +%% value given as an argument (Value). +%% Arguments : Key - Value used as a key in the table +%% Field - Value defined in the workDataCell record. +%% Value - The new value that should replace the old in the table +%% Table - Table storing workDataCells +%% Returns : NewTable +%%>----------------------------------------------------------------------< + +update(Key, {Field, Value}, Table) -> + gb_trees:enter(Key, updateCell(Value, Field, lookup_table(Key, Table)), Table); +update(Key, List, Table) -> + gb_trees:enter(Key, update(List, lookup_table(Key, Table)), Table). + +update([{Field, Value} | T], WD) -> + update(T, updateCell(Value, Field, WD)); +update([], WD) -> WD. + +updateCell(Value, Field, WD) -> + case Field of + dfnum -> WD#workDataCell{dfnum = Value}; + dfparent -> WD#workDataCell{dfparent= Value}; + semi -> WD#workDataCell{semi = Value}; + ancestor -> WD#workDataCell{ancestor= Value}; + best -> WD#workDataCell{best = Value}; + samedom -> WD#workDataCell{samedom = Value}; + bucket -> WD#workDataCell{bucket = Value} + end. + +%%>----------------------------------------------------------------------< +%% Procedure : dfs/1 +%% Purpose : The main purpose of this function is to traverse the CFG in +%% a depth first order. It is aslo used to initialize certain +%% elements defined in a workDataCell. +%% Arguments : CFG - a Control Flow Graph representation +%% Returns : A table (WorkData) and the total number of elements in +%% the CFG. +%%>----------------------------------------------------------------------< + +dfs(CFG) -> + {WorkData, DFS, N} = dfs(CFG, hipe_gen_cfg:start_label(CFG), + none, 1, gb_trees:empty(), gb_trees:empty()), + {WorkData, DFS, N-1}. + +dfs(CFG, Node, Parent, N, WorkData, DFS) -> + case lookup({dfnum, Node}, WorkData) of + 0 -> + WorkData2 = update(Node, [{dfnum, N}, {dfparent, Parent}, + {semi, Node}, {best, Node}], WorkData), + DFS2 = gb_trees:enter(N, Node, DFS), + dfsTraverse(hipe_gen_cfg:succ(CFG, Node), CFG, Node, + N + 1, WorkData2, DFS2); + _ -> {WorkData, DFS, N} + end. + +%%>----------------------------------------------------------------------< +%% Procedure : dfsTraverse/6 +%% Purpose : This function acts as a help function for the dfs algorithm +%% in the sence that it traverses a list of nodes given by the +%% CFG. +%% Arguments : Node - The first element in the node list +%% SuccLst - The remainder of the node list +%% CFG - Control Flow Graph representation +%% Parent - Node representing the parent of the Node defined +%% above. +%% N - The total number of processed nodes. +%% WorkData - Table consisting of workDataCells +%% Returns : An updated version of the table (WorkData) and the +%% total number of nodes processed. +%%>----------------------------------------------------------------------< + +dfsTraverse([Node|T], CFG, Parent, N, WorkData, DFS) -> + {WorkData2, DFS2, N2} = dfs(CFG, Node, Parent, N, WorkData, DFS), + dfsTraverse(T, CFG, Parent, N2, WorkData2, DFS2); +dfsTraverse([], _, _, N, WorkData, DFS) -> {WorkData, DFS, N}. + +%%>----------------------------------------------------------------------< +%% Procedure : getIdoms/6 +%% Purpose : The purpose of this function is to compute the immediate +%% dominators. This is accomplished by traversing the CFG nodes +%% by their depth first number in a bottom up manner. That is, +%% the nodes are processed in a backward order (highest to +%% lowest number). +%% Arguments : CFG - Control Flow Graph representation +%% DomData - Table consisting of domTree cells +%% WorkData - Table consisting of workDataCells +%% Index - The index used for retrieving the node to be +%% processed +%% Returns : An updated version of the tables DomData and WorkData +%%>----------------------------------------------------------------------< + +getIdoms(CFG, DomData, WorkData, Index, DFS) + when is_integer(Index), Index > 1 -> + Node = lookup_table(Index, DFS), + PredLst = hipe_gen_cfg:pred(CFG, Node), + Par = lookup({dfparent, Node}, WorkData), + DfNumN = lookup({dfnum, Node}, WorkData), + {S, WorkData2} = getSemiDominator(PredLst, DfNumN, Par, WorkData), + WorkData3 = update(Node, {semi, S}, WorkData2), + OldBucket = lookup({bucket, S}, WorkData3), + WorkData4 = update(S, {bucket, [Node | OldBucket]}, WorkData3), + WorkData5 = linkTrees(Par, Node, WorkData4), + {WorkData6, DomData2} = filterBucket(lookup({bucket, Par}, WorkData5), + Par, WorkData5, DomData), + WorkData7 = update(Par, {bucket, []}, WorkData6), + getIdoms(CFG, DomData2, WorkData7, Index - 1, DFS); +getIdoms(_, DomData, WorkData, 1, _) -> + {DomData, WorkData}. + +%%>----------------------------------------------------------------------< +%% Procedure : getSemiDominator/4 +%% Purpose : The main purpose of this algorithm is to compute the semi +%% dominator of the node Node based on the Semidominator Theorem +%% Arguments : Preds - The list of predecessors of the node Node +%% Node - Node in the CFG +%% S - Parent of node Node (depth first parent) +%% WorkData - Table consisting of workDataCells +%% Returns : A tuple containing the semidominator and an updated version +%% of the table WorkData. +%%>----------------------------------------------------------------------< + +getSemiDominator([Pred|Preds], DfNumChild, S, WorkData) -> + {Sp, WorkData3} = + case lookup({dfnum, Pred}, WorkData) =< DfNumChild of + true -> + {Pred, WorkData}; + false -> + {AncLowSemi, WorkData2} = getAncestorWithLowestSemi(Pred, WorkData), + {lookup({semi, AncLowSemi}, WorkData2), WorkData2} + end, + S2 = case lookup({dfnum, Sp}, WorkData3) < lookup({dfnum, S}, WorkData3) of + true -> Sp; + false -> S + end, + getSemiDominator(Preds, DfNumChild, S2, WorkData3); +getSemiDominator([], _, S, WorkData) -> + {S, WorkData}. + +%%>----------------------------------------------------------------------< +%% Procedure : getAncestorWithLowestSemi/2 +%% Purpose : The main purpose of this function is to retrieve the ancestor +%% of a node with the lowest depth first number (semi). The +%% function is also using path compression, i.e. it remembers the +%% best node (the one with the lowest semi number) and hence the +%% algorithm is only processing the minimal number of nodes. +%% Arguments : Node - Node in the tree +%% WorkData - Table consisting of workDataCells +%% Returns : A node (the one with the lowest semi) and an updated version +%% of the table WorkData. +%%>----------------------------------------------------------------------< + +getAncestorWithLowestSemi(Node, WorkData) -> + Best = lookup({best, Node}, WorkData), + case lookup({ancestor, Node}, WorkData) of + none -> {Best, WorkData}; + A -> + case lookup({ancestor, A}, WorkData) of + none -> + {Best, WorkData}; + _ -> + {B, WorkData2} = getAncestorWithLowestSemi(A, WorkData), + AncA = lookup({ancestor, A}, WorkData2), + WorkData3 = update(Node, {ancestor, AncA}, WorkData2), + DfSemiB = lookup({dfnum, lookup({semi, B}, WorkData3)}, WorkData3), + BestN = lookup({best, Node}, WorkData3), + SemiB = lookup({semi, BestN}, WorkData3), + DfSemiBestN = lookup({dfnum, SemiB}, WorkData3), + case DfSemiB < DfSemiBestN of + true -> + {B, update(Node, {best, B}, WorkData3)}; + false -> + {BestN, WorkData3} + end + end + end. + +%%>----------------------------------------------------------------------< +%% Procedure : linkTrees/3 +%% Purpose : The main purpose of this function is to combine two trees +%% into one (accomplished by setting the ancestor for node +%% Node to Parent). The algorithm is also updating the best field +%% in the workDataCell for node Node to the value of itself. +%% Arguments : Parent - The parent of the node Node. +%% Node - The node to process +%% WorkData - Table consisting of workDataCells +%% Returns : An updated version of table WorkData +%%>----------------------------------------------------------------------< + +linkTrees(Parent, Node, WorkData) -> + update(Node, [{ancestor, Parent}, {best, Node}], WorkData). + +%%>----------------------------------------------------------------------< +%% Procedure : filterBucket/4 +%% Purpose : The purpose of this algorith is to compute the dominator of +%% the node Node by utilizing the first clause of the Dominator +%% Theorem. If the first clause of the theorem doesn't apply +%% then the computation of that particular node is deferred to +%% a later stage (see finalize). +%% Arguments : Nodes - The list of CFG nodes that need to be computed. +%% Parent - The parent of the nodes in the list Nodes +%% WorkData - Table consisting of workDataCells +%% DomData - Table consisting of domTree cells. +%% Returns : An updated version of the tables WorkData and DomData +%%>----------------------------------------------------------------------< + +filterBucket([Node|Nodes], Parent, WorkData, DomData) -> + {Y, WorkData2} = getAncestorWithLowestSemi(Node, WorkData), + {WorkData3, DomData2} = + case lookup({semi, Y}, WorkData2) =:= lookup({semi, Node}, WorkData2) of + true -> {WorkData2, setIDom(Node, Parent, DomData)}; + false -> {update(Node, {samedom, Y}, WorkData2), DomData} + end, + filterBucket(Nodes, Parent, WorkData3, DomData2); +filterBucket([], _, WorkData, DomData) -> + {WorkData, DomData}. + +%%>----------------------------------------------------------------------< +%% Procedure : finalize/5 +%% Purpose : This algorithm finishes up the second clause of the Dominator +%% Theorem. Hence, the main purpose of this function is therefore +%% to update the dominator tree with the nodes that were deferred +%% in the filterBucket algorithm. +%% Arguments : WorkData - Table consisting of workDataCells +%% DomData - Table consisting of domTree cells +%% N - The index used for retrieving the node to be +%% processed +%% Max - Maximum node index +%% Returns : An updated version of the table DomData +%%>----------------------------------------------------------------------< + +finalize(WorkData, DomData, N, Max, DFS) when N =< Max -> + Node = lookup_table(N, DFS), + case lookup({samedom, Node}, WorkData) of + none -> + finalize(WorkData, DomData, N + 1, Max, DFS); + SameDomN -> + case domTree_getIDom(SameDomN, DomData) of + IdomSameDomN when is_integer(IdomSameDomN) -> + DomData2 = setIDom(Node, IdomSameDomN, DomData), + finalize(WorkData, DomData2, N + 1, Max, DFS) + end + end; +finalize(_, DomData, _, _, _) -> + DomData. + +%%>----------------------------------------------------------------------< +%% Procedure : domTree_dominates/3 +%% Purpose : checks wheter Node1 dominates Node2 with respect to the +%% dominator tree DomTree +%% Arguments : Node1 the possible dominator, Node2 which might be dominated +%% and DomTree - the target dominator tree. +%% Notes : Relies on lists:any to return false when the a list is empty +%%>----------------------------------------------------------------------< + +-spec domTree_dominates(cfg_lbl(), cfg_lbl(), domTree()) -> boolean(). + +domTree_dominates(Node1, Node1, _DomTree) -> + true; +domTree_dominates(Node1, Node2, DomTree) -> + Children = domTree_getChildren(Node1, DomTree), + lists:any(fun(X) -> domTree_dominates(X, Node2, DomTree) end, Children). + +%%>----------------------------------------------------------------------< +%% Procedure : pp/1 +%% Purpose : Pretty Printing a dominator tree. +%% Arguments : DomTree - the target dominator tree. +%% Notes : Uses pp/2 and pp_children to perform its task. +%%>----------------------------------------------------------------------< + +-ifdef(DEBUG). + +domTree_pp(DomTree) -> + io:format("Domtree:\nRoot: ~w\nSize: ~w\n", [domTree_getRoot(DomTree), + domTree_getSize(DomTree)]), + domTree_pp(domTree_getRoot(DomTree), DomTree). + +domTree_pp(N, DomTree) -> + case domTree_getNode(N, DomTree) of + {value, {IDom, Children}} -> + io:format("Node: ~w\n\tIDom: ~w\n\tChildren: ~w\n\n", + [N, IDom, Children]), + domTree_pp_children(Children, DomTree); + none -> + failed + end. + +domTree_pp_children([Child|T], DomTree) -> + domTree_pp(Child, DomTree), + domTree_pp_children(T, DomTree); +domTree_pp_children([], _) -> + ok. + +-endif. %% DEBUG + +%%======================================================================== +%% +%% CODE FOR CREATING AND MANIPULATING DOMINANCE FRONTIERS. +%% +%%======================================================================== + +-type domFrontier() :: gb_tree(). + +%%>----------------------------------------------------------------------< +%% Procedure : domFrontier_create +%% Purpose : This function calculates the Dominance Frontiers given +%% a CFG and a Dominator Tree. +%% Arguments : SuccMap - The successor map of the CFG we are working with. +%% DomTree - The dominance tree of the CFG. +%% Notes : DomTree must actually be the dominance tree of the CFG. +%%>----------------------------------------------------------------------< + +-spec domFrontier_create(cfg(), domTree()) -> domFrontier(). + +domFrontier_create(SuccMap, DomTree) -> + df_create(domTree_getRoot(DomTree), SuccMap, DomTree, df__empty()). + +df_create(Node, SuccMap, DomTree, DF) -> + Children = domTree_getChildren(Node, DomTree), + Succ = hipe_gen_cfg:succ(SuccMap, Node), + DF1 = checkIDomList(Succ, Node, DomTree, DF), + makeDFChildren(Children, Node, SuccMap, DomTree, DF1). + +%%>----------------------------------------------------------------------< +%% Procedure : domFrontier_get +%% Purpose : This function returns the Dominance Frontier for Node. +%% Arguments : Node - The node whose Dominance Frontier we request +%% DF - The Dominance Frontier structure +%% Returns : +%%>----------------------------------------------------------------------< + +-spec domFrontier_get(cfg_lbl(), domFrontier()) -> [cfg_lbl()]. + +domFrontier_get(Node, DF) -> + case gb_trees:lookup(Node, DF) of + {value, List} -> List; + none -> [] + end. + +%%>----------------------------------------------------------------------< +%% Procedure : df__empty +%% Purpose : This function creates an empty instance of the Dominance +%% Frontiers (DF) structure. +%%>----------------------------------------------------------------------< + +df__empty() -> + gb_trees:empty(). + +%%>----------------------------------------------------------------------< +%% Procedure : df__add +%% Purpose : This function adds Node to N in DF. +%% Arguments : N - The value being inserted +%% Node - The node getting the value +%% DF - The Dominance Frontiers +%% Returns : DF +%% Notes : If Node already exists at position N, it is not added again. +%%>----------------------------------------------------------------------< + +df__add_to_node(N, Node, DF) -> + case gb_trees:lookup(N, DF) of + {value, DFList} -> + case lists:member(Node, DFList) of + true -> + DF; + false -> + gb_trees:update(N, [Node|DFList], DF) + end; + none -> + gb_trees:insert(N, [Node], DF) + end. + +%%>----------------------------------------------------------------------< +%% Procedure : makeDFChildren +%% Purpose : This function calculates the dominance frontiers of the +%% children of the parent and adds the nodes in these +%% dominance frontiers who are not immediate dominantors of +%% the parent to parents dominance frontier. +%% Arguments : ChildList - The list of children that the function traverses +%% Parent - The parent of the children +%% SuccMap - The successor map of the CFG +%% DomTree - The dominantor tree of the CFG +%% DF - The dominance frontiers so far +%%>----------------------------------------------------------------------< + +makeDFChildren([Child|T], Parent, SuccMap, DomTree, DF) -> + DF1 = df_create(Child, SuccMap, DomTree, DF), + DF2 = checkIDomList(domFrontier_get(Child, DF1), Parent, DomTree, DF1), + makeDFChildren(T, Parent, SuccMap, DomTree, DF2); +makeDFChildren([], _, _, _, DF) -> + DF. + +%%>----------------------------------------------------------------------< +%% Procedure : checIDomList +%% Purpose : Adds all the nodes in the list to the parents dominance +%% frontier who do not have parent as immediate dominator. +%% Arguments : NodeList - The list of nodes that the function traverses +%% Parent - The parent of the nodes +%% DomTree - Our dominator tree +%% DF - The dominance frontiers so far +%%>----------------------------------------------------------------------< + +checkIDomList([Node|T], Parent, DomTree, DF) -> + DF1 = checkIDom(Node, Parent, DomTree, DF), + checkIDomList(T, Parent, DomTree, DF1); +checkIDomList([], _, _, DF) -> + DF. + +%%>----------------------------------------------------------------------< +%% Procedure : checkIdom +%% Purpose : Adds Node1 to Node2's dominance frontier if Node2 is not +%% Node1's immediate dominator. +%% Arguments : Node1 - a node +%% Node2 - another node +%% DomTree - the dominator tree +%% DF - the dominance frontier so far +%%>----------------------------------------------------------------------< + +checkIDom(Node1, Node2, DomTree, DF) -> + case domTree_getIDom(Node1, DomTree) of + Node2 -> + DF; + none -> + DF; + _ -> + df__add_to_node(Node2, Node1, DF) + end. diff --git a/lib/hipe/flow/hipe_gen_cfg.erl b/lib/hipe/flow/hipe_gen_cfg.erl new file mode 100644 index 0000000000..f9fb1f70c8 --- /dev/null +++ b/lib/hipe/flow/hipe_gen_cfg.erl @@ -0,0 +1,37 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2002-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +-module(hipe_gen_cfg). + +-export([start_label/1, + succ/2, + pred/2 + ]). + +%%-define(DO_ASSERT, true). +-define(GEN_CFG, true). % needed for cfg.inc + +-include("../main/hipe.hrl"). +-include("cfg.hrl"). + +-spec succ(cfg(), cfg_lbl()) -> [cfg_lbl()]. +-spec pred(cfg(), cfg_lbl()) -> [cfg_lbl()]. + +-include("cfg.inc"). + diff --git a/lib/hipe/flow/liveness.inc b/lib/hipe/flow/liveness.inc new file mode 100644 index 0000000000..9c5eaf3e68 --- /dev/null +++ b/lib/hipe/flow/liveness.inc @@ -0,0 +1,332 @@ +%% -*- Erlang -*- +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2001-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% LIVENESS ANALYSIS +%% +%% Exports: +%% ~~~~~~~ +%% analyze(CFG) - returns a liveness analysis of CFG. +%% liveout(Liveness, Label) - returns a set of variables that are live at +%% exit from basic block named Label. +%% livein(Liveness, Label) - returns a set of variables that are live at +%% entry to the basic block named Label. +%% livein_from_liveout(Instructions, LiveOut) - Given a list of instructions +%% and a liveout-set, returns a set of variables live at the +%% first instruction. +%% + +-export([analyze/1, + livein/2]). +-ifdef(LIVEOUT_NEEDED). +-export([liveout/2]). +-endif. +-ifdef(PRETTY_PRINT). +-export([pp/1]). +-endif. +%%-export([livein_from_liveout/2]). +-ifdef(DEBUG_LIVENESS). +-export([annotate_liveness/2]). +-endif. + +-include("../flow/cfg.hrl"). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% Interface functions that MUST be implemented in the including file +%% +%% cfg_bb(CFG, L) -> BasicBlock, extract a basic block from a cfg. +%% cfg_postorder(CFG) -> [Labels], the labels of the cfg in postorder +%% cfg_succ(CFG, L) -> [Labels], +%% uses(Instr) -> +%% defines(Instr) -> +%% +%% Plus the following, if basic block annotations are needed +%% +%% cfg_labels(CFG) -> +%% cfg_bb_add(CFG, L, NewBB) -> +%% mk_comment(Text) -> + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% The generic liveness analysis +%% + +-spec analyze(cfg()) -> gb_tree(). + +-ifdef(HIPE_LIVENESS_CALC_LARGEST_LIVESET). +analyze(CFG) -> + PO = cfg_postorder(CFG), + InitLiveness = liveness_init(init(cfg_labels(CFG), CFG)), + _Max = case get(hipe_largest_liveset) of + undefined -> + put(hipe_largest_liveset, 0), + 0; + LL -> LL + end, + Res = merry_go_around(PO, InitLiveness,0), + case get(hipe_largest_liveset) > _Max of + true -> + io:format("Largest liveset: ~w \n", [get(hipe_largest_liveset)]); + _ -> ok + end, + Res. + +-else. + +analyze(CFG) -> + PO = cfg_postorder(CFG), + InitLiveness = liveness_init(init(PO, CFG)), + Res = merry_go_around(PO, InitLiveness, 0), + Res. +-endif. + +%% +%% The fixpoint iteration +%% + +merry_go_around(Labels, Liveness, Count) -> + case doit_once(Labels, Liveness, 0) of + {NewLiveness, 0} -> + %% io:format("Iterations ~w~n", [Count]), + NewLiveness; + {NewLiveness, _Changed} -> + merry_go_around(Labels, NewLiveness, Count+1) + end. + +%% +%% One iteration +%% + +-ifdef(HIPE_LIVENESS_CALC_LARGEST_LIVESET). +doit_once([], Liveness, Changed) -> + {Liveness, Changed}; +doit_once([L|Ls], Liveness, Changed) -> + LiveOut = liveout(Liveness, L), + Kill = ordsets:subtract(LiveOut, kill(L, Liveness)), + LiveIn = ordsets:union(Kill, gen(L,Liveness)), + {NewLiveness, ChangedP} = update_livein(L, LiveIn, Liveness), + Le = length(LiveIn), + Max = get(hipe_largest_liveset), + if Le > Max -> put(hipe_largest_liveset, Le); + true -> true + end, + doit_once(Ls, NewLiveness, Changed+ChangedP). + +-else. + +doit_once([], Liveness, Changed) -> + {Liveness, Changed}; +doit_once([L|Ls], Liveness, Changed) -> + LiveOut = liveout(Liveness, L), + Kill = ordsets:subtract(LiveOut, kill(L, Liveness)), + LiveIn = ordsets:union(Kill, gen(L,Liveness)), + {NewLiveness, ChangedP} = update_livein(L, LiveIn, Liveness), + doit_once(Ls, NewLiveness, Changed+ChangedP). +-endif. + +%% %% +%% %% Given a list of instructions and liveout, calculates livein +%% %% +%% livein_from_liveout(List, LiveOut) when is_list(List) -> +%% livein_from_liveout_1(lists:reverse(List), gb_sets:from_list(LiveOut)); +%% livein_from_liveout(Instr, LiveOut) -> +%% livein_from_liveout_1([Instr], gb_sets:from_list(LiveOut)). +%% +%% livein_from_liveout_1([], LiveOut) -> +%% gb_sets:to_list(LiveOut); +%% livein_from_liveout_1([I|Is], LiveOut) -> +%% Def = defines(I), +%% Use = uses(I), +%% DefSet = gb_sets:from_list(Def), +%% UseSet = gb_sets:from_list(Use), +%% LiveIn = gb_sets:union(gb_sets:difference(LiveOut, DefSet), UseSet), +%% Le = gb_sets:size(LiveIn), +%% Max = get(hipe_largest_liveset), +%% if Le > Max -> put(hipe_largest_liveset, Le); +%% true -> true +%% end, +%% livein_from_liveout_1(Is, LiveIn). + +%% +%% updates liveness for a basic block +%% - returns: {NewLiveness, ChangedP} +%% - ChangedP is 0 if the new LiveIn is equal to the old one +%% otherwise it's 1. +%% + +update_livein(Label, NewLiveIn, Liveness) -> + {GK, LiveIn, Successors} = liveness_lookup(Label, Liveness), + NewLiveness = liveness_update(Label, {GK, NewLiveIn, Successors}, Liveness), + if LiveIn =:= NewLiveIn -> + {NewLiveness, 0}; + true -> + {NewLiveness, 1} + end. + + +%% +%% LiveOut for a block is the union of the successors LiveIn +%% + +liveout(Liveness, L) -> + Succ = successors(L, Liveness), + case Succ of + [] -> % special case if no successors + liveout_no_succ(); + _ -> + liveout1(Succ, Liveness) + end. + +liveout1(Labels, Liveness) -> + liveout1(Labels, Liveness, ordsets:new()). + +liveout1([], _Liveness, Live) -> + Live; +liveout1([L|Ls], Liveness,Live) -> + liveout1(Ls, Liveness, ordsets:union(livein(Liveness, L), Live)). + +successors(L, Liveness) -> + {_GK, _LiveIn, Successors} = liveness_lookup(L, Liveness), + Successors. + +-spec livein(gb_tree(), _) -> [_]. + +livein(Liveness, L) -> + {_GK, LiveIn, _Successors} = liveness_lookup(L, Liveness), + LiveIn. + +kill(L, Liveness) -> + {{_Gen, Kill}, _LiveIn, _Successors} = liveness_lookup(L, Liveness), + Kill. + +gen(L, Liveness) -> + {{Gen, _Kill}, _LiveIn, _Successors} = liveness_lookup(L, Liveness), + Gen. + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% init returns a list of: {Label, {{Gen, Kill}, LiveIn, Successors}} +%% - Label is the name of the basic block. +%% - Gen is the set of varables that are used by this block. +%% - Kill is the set of varables that are defined by this block. +%% - LiveIn is the set of variables that are alive at entry to the +%% block (initially empty). +%% - Successors is a list of the successors to the block. + +init([], _) -> + []; +init([L|Ls], CFG) -> + BB = cfg_bb(CFG, L), + Code = hipe_bb:code(BB), + Succ = cfg_succ(CFG, L), + Transfer = make_bb_transfer(Code, Succ), + [{L, {Transfer, ordsets:new(), Succ}} | init(Ls, CFG)]. + + +make_bb_transfer([], _Succ) -> + {ordsets:new(), ordsets:new()}; % {Gen, Kill} +make_bb_transfer([I|Is], Succ) -> + {Gen, Kill} = make_bb_transfer(Is, Succ), + InstrGen = ordsets:from_list(uses(I)), + InstrKill = ordsets:from_list(defines(I)), + Gen1 = ordsets:subtract(Gen, InstrKill), + Gen2 = ordsets:union(Gen1, InstrGen), + Kill1 = ordsets:union(Kill, InstrKill), + Kill2 = ordsets:subtract(Kill1, InstrGen), + {Gen2, Kill2}. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% Annotate each basic block with liveness info +%% + +-ifdef(DEBUG_LIVENESS). + +annotate_liveness(CFG, Liveness) -> + Labels = cfg_labels(CFG), + annotate_liveness_bb(Labels, CFG, Liveness). + +annotate_liveness_bb([], CFG, _Liveness) -> + CFG; +annotate_liveness_bb([L|Ls], CFG, Liveness) -> + BB = cfg_bb(CFG, L), + Code0 = hipe_bb:code(BB), + LiveIn = strip(livein(Liveness, L)), + LiveOut = strip(liveout(Liveness, L)), + Code = [mk_comment({live_in, LiveIn}), + mk_comment({live_out, LiveOut}) + | Code0], + NewBB = hipe_bb:code_update(BB, Code), + NewCFG = cfg_bb_add(CFG, L, NewBB), + annotate_liveness_bb(Ls, NewCFG, Liveness). + +strip([]) -> + []; +strip([{_,Y}|Xs]) -> + [Y|strip(Xs)]. + +-endif. % DEBUG_LIVENESS + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +liveness_init(List) -> + liveness_init(List, gb_trees:empty()). + +liveness_init([{Lbl, Data}|Left], Acc) -> + liveness_init(Left, gb_trees:insert(Lbl, Data, Acc)); +liveness_init([], Acc) -> + Acc. + +liveness_lookup(Label, Liveness) -> + gb_trees:get(Label, Liveness). +liveness_update(Label, Val, Liveness) -> + gb_trees:update(Label, Val, Liveness). + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% pp/1 pretty prints liveness information for a CFG +%% + +-ifdef(PRETTY_PRINT). + +-spec pp(cfg()) -> 'ok'. +pp(Cfg) -> + Liveness = analyze(Cfg), + Labels = cfg_labels(Cfg), + ok = print_blocks(Labels, Liveness, Cfg). + +print_blocks([Lbl|Rest], Liveness, Cfg) -> + io:format("~nLivein:", []), + pp_liveness_info(livein(Liveness, Lbl)), + io:format("Label ~w:~n" , [Lbl]), + pp_block(Lbl, Cfg), + io:format("Liveout:", []), + pp_liveness_info(liveout(Liveness, Lbl)), + print_blocks(Rest, Liveness, Cfg); +print_blocks([], _Liveness, _Cfg) -> + ok. + +-endif. % PRETTY_PRINT diff --git a/lib/hipe/icode/Makefile b/lib/hipe/icode/Makefile new file mode 100644 index 0000000000..de37c4e4c4 --- /dev/null +++ b/lib/hipe/icode/Makefile @@ -0,0 +1,144 @@ +# +# %CopyrightBegin% +# +# Copyright Ericsson AB 2001-2009. All Rights Reserved. +# +# The contents of this file are subject to the Erlang Public License, +# Version 1.1, (the "License"); you may not use this file except in +# compliance with the License. You should have received a copy of the +# Erlang Public License along with this software. If not, it can be +# retrieved online at http://www.erlang.org/. +# +# Software distributed under the License is distributed on an "AS IS" +# basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +# the License for the specific language governing rights and limitations +# under the License. +# +# %CopyrightEnd% +# + +ifndef EBIN +EBIN = ../ebin +endif + +ifndef DOCS +DOCS = ../doc +endif + +include $(ERL_TOP)/make/target.mk +include $(ERL_TOP)/make/$(TARGET)/otp.mk + +# ---------------------------------------------------- +# Application version +# ---------------------------------------------------- +include ../vsn.mk +VSN=$(HIPE_VSN) + +# ---------------------------------------------------- +# Release directory specification +# ---------------------------------------------------- +RELSYSDIR = $(RELEASE_PATH)/lib/hipe-$(VSN) + +# ---------------------------------------------------- +# Target Specs +# ---------------------------------------------------- +ifdef HIPE_ENABLED +HIPE_MODULES = hipe_icode_heap_test +else +HIPE_MODULES = +endif + +DOC_MODULES = hipe_beam_to_icode \ + hipe_icode hipe_icode_bincomp \ + hipe_icode_callgraph hipe_icode_cfg hipe_icode_coordinator \ + hipe_icode_fp \ + hipe_icode_exceptions \ + hipe_icode_inline_bifs hipe_icode_instruction_counter \ + hipe_icode_liveness \ + hipe_icode_pp hipe_icode_primops \ + hipe_icode_range \ + hipe_icode_split_arith \ + hipe_icode_ssa hipe_icode_ssa_const_prop \ + hipe_icode_ssa_copy_prop hipe_icode_ssa_struct_reuse \ + hipe_icode_type $(HIPE_MODULES) + +MODULES = $(DOC_MODULES) hipe_icode_ebb hipe_icode_mulret + +HRL_FILES=hipe_icode.hrl hipe_icode_primops.hrl hipe_icode_type.hrl +ERL_FILES= $(MODULES:%=%.erl) +TARGET_FILES= $(MODULES:%=$(EBIN)/%.$(EMULATOR)) +DOC_FILES= $(DOC_MODULES:%=$(DOCS)/%.html) + +# APP_FILE= +# APP_SRC= $(APP_FILE).src +# APP_TARGET= $(EBIN)/$(APP_FILE) +# +# APPUP_FILE= +# APPUP_SRC= $(APPUP_FILE).src +# APPUP_TARGET= $(EBIN)/$(APPUP_FILE) + +# ---------------------------------------------------- +# FLAGS +# ---------------------------------------------------- + +include ../native.mk + +ERL_COMPILE_FLAGS += +warn_unused_import +warn_missing_spec +warn_untyped_record + +# ---------------------------------------------------- +# Targets +# ---------------------------------------------------- + +debug opt: $(TARGET_FILES) + +docs: $(DOC_FILES) + +clean: + rm -f $(TARGET_FILES) + rm -f core + +$(DOCS)/%.html:%.erl + erl -noshell -run edoc_run file '"$<"' '[{dir, "$(DOCS)"}]' -s init stop + +# ---------------------------------------------------- +# Special Build Targets +# ---------------------------------------------------- + + + +# ---------------------------------------------------- +# Release Target +# ---------------------------------------------------- +include $(ERL_TOP)/make/otp_release_targets.mk + +release_spec: opt + $(INSTALL_DIR) $(RELSYSDIR)/icode + $(INSTALL_DATA) $(ERL_FILES) $(HRL_FILES) $(RELSYSDIR)/icode + $(INSTALL_DIR) $(RELSYSDIR)/ebin + $(INSTALL_DATA) $(TARGET_FILES) $(RELSYSDIR)/ebin + +release_docs_spec: + +$(EBIN)/hipe_beam_to_icode.beam: hipe_icode_primops.hrl ../main/hipe.hrl ../../compiler/src/beam_disasm.hrl +$(EBIN)/hipe_icode.beam: ../main/hipe.hrl +$(EBIN)/hipe_icode_bincomp.beam: ../flow/cfg.hrl +$(EBIN)/hipe_icode_callgraph.beam: hipe_icode_primops.hrl +$(EBIN)/hipe_icode_cfg.beam: ../flow/hipe_bb.hrl ../flow/cfg.hrl ../flow/cfg.inc ../main/hipe.hrl +$(EBIN)/hipe_icode_ebb.beam: ../flow/cfg.hrl ../flow/ebb.inc +$(EBIN)/hipe_icode_exceptions.beam: ../flow/cfg.hrl +$(EBIN)/hipe_icode_fp.beam: ../flow/cfg.hrl +$(EBIN)/hipe_icode_heap_test.beam: ../main/hipe.hrl hipe_icode_primops.hrl ../flow/cfg.hrl ../rtl/hipe_literals.hrl +$(EBIN)/hipe_icode_inline_bifs.beam: ../flow/cfg.hrl +$(EBIN)/hipe_icode_instruction_counter.beam: ../main/hipe.hrl ../flow/cfg.hrl +$(EBIN)/hipe_icode_liveness.beam: ../flow/cfg.hrl ../flow/liveness.inc +$(EBIN)/hipe_icode_mulret.beam: ../main/hipe.hrl hipe_icode_primops.hrl +$(EBIN)/hipe_icode_primops.beam: hipe_icode_primops.hrl +$(EBIN)/hipe_icode_range.beam: ../main/hipe.hrl ../flow/cfg.hrl hipe_icode_primops.hrl +$(EBIN)/hipe_icode_split_arith.beam: ../main/hipe.hrl hipe_icode.hrl ../flow/cfg.hrl +$(EBIN)/hipe_icode_ssa.beam: ../main/hipe.hrl ../ssa/hipe_ssa.inc ../ssa/hipe_ssa_liveness.inc +$(EBIN)/hipe_icode_ssa_const_prop.beam: ../main/hipe.hrl hipe_icode_primops.hrl ../flow/cfg.hrl ../ssa/hipe_ssa_const_prop.inc +$(EBIN)/hipe_icode_ssa_copy_prop.beam: ../flow/cfg.hrl ../ssa/hipe_ssa_copy_prop.inc +$(EBIN)/hipe_icode_type.beam: hipe_icode_primops.hrl ../flow/cfg.hrl hipe_icode_type.hrl +$(EBIN)/hipe_icode_ssa_struct_reuse.beam: ../main/hipe.hrl hipe_icode_primops.hrl ../flow/cfg.hrl + +$(TARGET_FILES): hipe_icode.hrl ../misc/hipe_consttab.hrl diff --git a/lib/hipe/icode/hipe_beam_to_icode.erl b/lib/hipe/icode/hipe_beam_to_icode.erl new file mode 100644 index 0000000000..3923e98673 --- /dev/null +++ b/lib/hipe/icode/hipe_beam_to_icode.erl @@ -0,0 +1,2326 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2001-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%%======================================================================= +%% File : hipe_beam_to_icode.erl +%% Author : Kostis Sagonas +%% Description : Translates symbolic BEAM code to Icode +%%======================================================================= +%% $Id$ +%%======================================================================= +%% @doc +%% This file translates symbolic BEAM code to Icode which is HiPE's +%% intermediate code representation. Either the code of an entire +%% module, or the code of a specified function can be translated. +%% @end +%%======================================================================= + +-module(hipe_beam_to_icode). + +-export([module/2, mfa/3]). + +%%----------------------------------------------------------------------- + +%% Uncomment the following lines to turn on debugging for this module +%% or comment them to it turn off. Debug-level 6 inserts a print in +%% each compiled function. +%% +%%-ifndef(DEBUG). +%%-define(DEBUG,6). +%%-endif. + +-include("../main/hipe.hrl"). +-include("hipe_icode.hrl"). +-include("hipe_icode_primops.hrl"). +-include("../../compiler/src/beam_disasm.hrl"). + +-define(no_debug_msg(Str,Xs),ok). +%%-define(no_debug_msg(Str,Xs),msg(Str,Xs)). + +-define(mk_debugcode(MFA, Env, Code), + case MFA of + {io,_,_} -> + %% We do not want to loop infinitely if we are compiling + %% the module io. + {Code,Env}; + {M,F,A} -> + MFAVar = mk_var(new), + StringVar = mk_var(new), + Ignore = mk_var(new), + MkMfa = hipe_icode:mk_move(MFAVar,hipe_icode:mk_const([MFA])), + MkString = hipe_icode:mk_move(StringVar, + hipe_icode:mk_const( + atom_to_list(M) ++ ":" ++ atom_to_list(F) ++"/"++ integer_to_list(A) ++ + " Native enter fun ~w\n")), + Call = + hipe_icode:mk_call([Ignore],io,format,[StringVar,MFAVar],remote), + {[MkMfa,MkString,Call | Code], Env} + end). + +%%----------------------------------------------------------------------- +%% Exported types +%%----------------------------------------------------------------------- + +-type hipe_beam_to_icode_ret() :: [{mfa(),#icode{}}]. + + +%%----------------------------------------------------------------------- +%% Internal data structures +%%----------------------------------------------------------------------- + +-record(beam_const, {value :: simple_const()}). % defined in hipe_icode.hrl + +-record(closure_info, {mfa :: mfa(), arity :: arity(), fv_arity :: arity()}). + +-record(environment, {mfa :: mfa(), entry :: non_neg_integer()}). + + +%%----------------------------------------------------------------------- +%% @doc +%% Translates the code of a whole module into Icode. +%% Returns a tuple whose first argument is a list of {{M,F,A}, ICode} +%% pairs, and its second argument is the list of HiPE compiler options. +%% @end +%%----------------------------------------------------------------------- + +-spec module([#function{}], comp_options()) -> hipe_beam_to_icode_ret(). + +module(BeamFuns, Options) -> + BeamCode0 = [beam_disasm:function__code(F) || F <- BeamFuns], + {ModCode, ClosureInfo} = preprocess_code(BeamCode0), + pp_beam(ModCode, Options), + [trans_beam_function_chunk(FunCode, ClosureInfo) || FunCode <- ModCode]. + +trans_beam_function_chunk(FunBeamCode, ClosureInfo) -> + {M,F,A} = MFA = find_mfa(FunBeamCode), + Icode = trans_mfa_code(M,F,A, FunBeamCode, ClosureInfo), + {MFA,Icode}. + +%%----------------------------------------------------------------------- +%% @doc +%% Translates the BEAM code of a single function into Icode. +%% Returns a tuple whose first argument is list of {{M,F,A}, ICode} +%% pairs, where the first entry is that of the given MFA, and the +%% following (in undefined order) are those of the funs that are +%% defined in the function, and recursively, in the funs. The +%% second argument of the tuple is the HiPE compiler options +%% contained in the file. +%% @end +%%----------------------------------------------------------------------- + +-spec mfa(list(), mfa(), comp_options()) -> hipe_beam_to_icode_ret(). + +mfa(BeamFuns, {M,F,A} = MFA, Options) + when is_atom(M), is_atom(F), is_integer(A) -> + BeamCode0 = [beam_disasm:function__code(Fn) || Fn <- BeamFuns], + {ModCode, ClosureInfo} = preprocess_code(BeamCode0), + mfa_loop([MFA], [], sets:new(), ModCode, ClosureInfo, Options). + +mfa_loop([{M,F,A} = MFA | MFAs], Acc, Seen, ModCode, ClosureInfo, + Options) when is_atom(M), is_atom(F), is_integer(A) -> + case sets:is_element(MFA, Seen) of + true -> + mfa_loop(MFAs, Acc, Seen, ModCode, ClosureInfo, Options); + false -> + {Icode, FunMFAs} = mfa_get(M, F, A, ModCode, ClosureInfo, Options), + mfa_loop(FunMFAs ++ MFAs, [{MFA, Icode} | Acc], + sets:add_element(MFA, Seen), + ModCode, ClosureInfo, Options) + end; +mfa_loop([], Acc, _, _, _, _) -> + lists:reverse(Acc). + +mfa_get(M, F, A, ModCode, ClosureInfo, Options) -> + BeamCode = get_fun(ModCode, M,F,A), + pp_beam([BeamCode], Options), % cheat by using a list + Icode = trans_mfa_code(M,F,A, BeamCode, ClosureInfo), + FunMFAs = get_fun_mfas(BeamCode), + {Icode, FunMFAs}. + +get_fun_mfas([{patched_make_fun,{M,F,A} = MFA,_,_,_}|BeamCode]) + when is_atom(M), is_atom(F), is_integer(A) -> + [MFA|get_fun_mfas(BeamCode)]; +get_fun_mfas([_|BeamCode]) -> + get_fun_mfas(BeamCode); +get_fun_mfas([]) -> + []. + +%%----------------------------------------------------------------------- +%% The main translation function. +%%----------------------------------------------------------------------- + +trans_mfa_code(M,F,A, FunBeamCode, ClosureInfo) -> + ?no_debug_msg("disassembling: {~p,~p,~p} ...", [M,F,A]), + hipe_gensym:init(icode), + %% Extract the function arguments + FunArgs = extract_fun_args(A), + %% Record the function arguments + FunLbl = mk_label(new), + Env1 = env__mk_env(M, F, A, hipe_icode:label_name(FunLbl)), + Code1 = lists:flatten(trans_fun(FunBeamCode,Env1)), + Code2 = fix_fallthroughs(fix_catches(Code1)), + MFA = {M,F,A}, + %% Debug code + ?IF_DEBUG_LEVEL(5, + {Code3,_Env3} = ?mk_debugcode(MFA, Env2, Code2), + {Code3,_Env3} = {Code2,Env1}), + %% For stack optimization + Leafness = leafness(Code3), + IsLeaf = is_leaf_code(Leafness), + Code4 = + [FunLbl | + case needs_redtest(Leafness) of + false -> Code3; + true -> [mk_redtest()|Code3] + end], + IsClosure = get_closure_info(MFA, ClosureInfo) =/= not_a_closure, + Code5 = hipe_icode:mk_icode(MFA, FunArgs, IsClosure, IsLeaf, + remove_dead_code(Code4), + hipe_gensym:var_range(icode), + hipe_gensym:label_range(icode)), + Icode = %% If this function is the code for a closure ... + case get_closure_info(MFA, ClosureInfo) of + not_a_closure -> Code5; + CI -> %% ... then patch the code to + %% get the free_vars from the closure + patch_closure_entry(Code5, CI) + end, + ?no_debug_msg("ok~n", []), + Icode. + +mk_redtest() -> hipe_icode:mk_primop([], redtest, []). + +leafness(Is) -> % -> true, selfrec, or false + leafness(Is, true). + +leafness([], Leafness) -> + Leafness; +leafness([I|Is], Leafness) -> + case I of + #icode_comment{} -> + %% BEAM self-tailcalls become gotos, but they leave + %% a trace behind in comments. Check those to ensure + %% that the computed leafness is correct. Needed to + %% prevent redtest elimination in those cases. + NewLeafness = + case hipe_icode:comment_text(I) of + 'tail_recursive' -> selfrec; % call_last to selfrec + 'self_tail_recursive' -> selfrec; % call_only to selfrec + _ -> Leafness + end, + leafness(Is, NewLeafness); + #icode_call{} -> + case hipe_icode:call_type(I) of + 'primop' -> + case hipe_icode:call_fun(I) of + call_fun -> false; % Calls closure + enter_fun -> false; % Calls closure + #apply_N{} -> false; + _ -> leafness(Is, Leafness) % Other primop calls are ok + end; + T when T =:= 'local' orelse T =:= 'remote' -> + {M,F,A} = hipe_icode:call_fun(I), + case erlang:is_builtin(M, F, A) of + true -> leafness(Is, Leafness); + false -> false + end + end; + #icode_enter{} -> + case hipe_icode:enter_type(I) of + 'primop' -> + case hipe_icode:enter_fun(I) of + enter_fun -> false; + #apply_N{} -> false; + _ -> + %% All primops should be ok except those excluded above, + %% except we don't actually tailcall them... + io:format("leafness: unexpected enter to primop ~w\n", [I]), + true + end; + T when T =:= 'local' orelse T =:= 'remote' -> + {M,F,A} = hipe_icode:enter_fun(I), + case erlang:is_builtin(M, F, A) of + true -> leafness(Is, Leafness); + _ -> false + end + end; + _ -> leafness(Is, Leafness) + end. + +%% XXX: this old stuff is passed around but essentially unused +is_leaf_code(Leafness) -> + case Leafness of + true -> true; + selfrec -> true; + false -> false + end. + +needs_redtest(Leafness) -> + case Leafness of + true -> false; + selfrec -> true; + false -> true + end. + +%%----------------------------------------------------------------------- +%% The main translation switch. +%%----------------------------------------------------------------------- + +%%--- label & func_info combo --- +trans_fun([{label,B},{label,_}, + {func_info,M,F,A},{label,L}|Instructions], Env) -> + trans_fun([{label,B},{func_info,M,F,A},{label,L}|Instructions], Env); +trans_fun([{label,B}, + {func_info,{atom,_M},{atom,_F},_A}, + {label,L}|Instructions], Env) -> + %% Emit code to handle function_clause errors. The BEAM test instructions + %% branch to this label if they fail during function clause selection. + %% Obviously, we must goto past this error point on normal entry. + Begin = mk_label(B), + V = mk_var(new), + EntryPt = mk_label(L), + Goto = hipe_icode:mk_goto(hipe_icode:label_name(EntryPt)), + Mov = hipe_icode:mk_move(V, hipe_icode:mk_const(function_clause)), + Fail = hipe_icode:mk_fail([V],error), + [Goto, Begin, Mov, Fail, EntryPt | trans_fun(Instructions, Env)]; +%%--- label --- +trans_fun([{label,L1},{label,L2}|Instructions], Env) -> + %% Old BEAM code can have two consecutive labels. + Lab1 = mk_label(L1), + Lab2 = mk_label(L2), + Goto = hipe_icode:mk_goto(map_label(L2)), + [Lab1, Goto, Lab2 | trans_fun(Instructions, Env)]; +trans_fun([{label,L}|Instructions], Env) -> + [mk_label(L) | trans_fun(Instructions, Env)]; +%%--- int_code_end --- SHOULD NEVER OCCUR HERE +%%--- call --- +trans_fun([{call,_N,{_M,_F,A}=MFA}|Instructions], Env) -> + Args = extract_fun_args(A), + Dst = [mk_var({r,0})], + I = trans_call(MFA, Dst, Args, local), + [I | trans_fun(Instructions, Env)]; +%%--- call_last --- +%% Differs from call_only in that it deallocates the environment +trans_fun([{call_last,_N,{_M,_F,A}=MFA,_}|Instructions], Env) -> + %% IS IT OK TO IGNORE LAST ARG ?? + ?no_debug_msg(" translating call_last: ~p ...~n", [Env]), + case env__get_mfa(Env) of + MFA -> + %% Does this case really happen, or is it covered by call_only? + Entry = env__get_entry(Env), + [hipe_icode:mk_comment('tail_recursive'), % needed by leafness/2 + hipe_icode:mk_goto(Entry) | trans_fun(Instructions,Env)]; + _ -> + Args = extract_fun_args(A), + I = trans_enter(MFA, Args, local), + [I | trans_fun(Instructions, Env)] + end; +%%--- call_only --- +%% Used when the body contains only one call in which case +%% an environment is not needed/created. +trans_fun([{call_only,_N,{_M,_F,A}=MFA}|Instructions], Env) -> + ?no_debug_msg(" translating call_only: ~p ...~n", [Env]), + case env__get_mfa(Env) of + MFA -> + Entry = env__get_entry(Env), + [hipe_icode:mk_comment('self_tail_recursive'), % needed by leafness/2 + hipe_icode:mk_goto(Entry) | trans_fun(Instructions,Env)]; + _ -> + Args = extract_fun_args(A), + I = trans_enter(MFA,Args,local), + [I | trans_fun(Instructions,Env)] + end; +%%--- call_ext --- +trans_fun([{call_ext,_N,{extfunc,M,F,A}}|Instructions], Env) -> + Args = extract_fun_args(A), + Dst = [mk_var({r,0})], + I = trans_call({M,F,A},Dst,Args,remote), + [hipe_icode:mk_comment('call_ext'),I | trans_fun(Instructions,Env)]; +%%--- call_ext_last --- +trans_fun([{call_ext_last,_N,{extfunc,M,F,A},_}|Instructions], Env) -> + %% IS IT OK TO IGNORE LAST ARG ?? + Args = extract_fun_args(A), + %% Dst = [mk_var({r,0})], + I = trans_enter({M,F,A},Args,remote), + [hipe_icode:mk_comment('call_ext_last'), I | trans_fun(Instructions,Env)]; +%%--- bif0 --- +trans_fun([{bif,BifName,nofail,[],Reg}|Instructions], Env) -> + BifInst = trans_bif0(BifName,Reg), + [hipe_icode:mk_comment({bif0,BifName}),BifInst|trans_fun(Instructions,Env)]; +%%--- bif1 --- +trans_fun([{bif,BifName,{f,Lbl},[_] = Args,Reg}|Instructions], Env) -> + {BifInsts,Env1} = trans_bif(1,BifName,Lbl,Args,Reg,Env), + [hipe_icode:mk_comment({bif1,BifName})|BifInsts] ++ trans_fun(Instructions,Env1); +%%--- bif2 --- +trans_fun([{bif,BifName,{f,Lbl},[_,_] = Args,Reg}|Instructions], Env) -> + {BifInsts,Env1} = trans_bif(2,BifName,Lbl,Args,Reg,Env), + [hipe_icode:mk_comment({bif2,BifName})|BifInsts] ++ trans_fun(Instructions,Env1); +%%--- allocate +trans_fun([{allocate,StackSlots,_}|Instructions], Env) -> + trans_allocate(StackSlots) ++ trans_fun(Instructions,Env); +%%--- allocate_heap +trans_fun([{allocate_heap,StackSlots,_,_}|Instructions], Env) -> + trans_allocate(StackSlots) ++ trans_fun(Instructions,Env); +%%--- allocate_zero +trans_fun([{allocate_zero,StackSlots,_}|Instructions], Env) -> + trans_allocate(StackSlots) ++ trans_fun(Instructions,Env); +%%--- allocate_heap_zero +trans_fun([{allocate_heap_zero,StackSlots,_,_}|Instructions], Env) -> + trans_allocate(StackSlots) ++ trans_fun(Instructions,Env); +%%--- test_heap --- IGNORED ON PURPOSE +trans_fun([{test_heap,_,_}|Instructions], Env) -> + trans_fun(Instructions,Env); +%%--- init --- IGNORED - CORRECT?? +trans_fun([{init,_}|Instructions], Env) -> + trans_fun(Instructions,Env); +%%--- deallocate --- IGNORED ON PURPOSE +trans_fun([{deallocate,_}|Instructions], Env) -> + trans_fun(Instructions,Env); +%%--- return --- +trans_fun([return|Instructions], Env) -> + [hipe_icode:mk_return([mk_var({r,0})]) | trans_fun(Instructions,Env)]; +%%--- send --- +trans_fun([send|Instructions], Env) -> + I = hipe_icode:mk_call([mk_var({r,0})], erlang, send, + [mk_var({x,0}),mk_var({x,1})], remote), + [I | trans_fun(Instructions,Env)]; +%%--- remove_message --- +trans_fun([remove_message|Instructions], Env) -> + [hipe_icode:mk_primop([],select_msg,[]) | trans_fun(Instructions,Env)]; +%%--- timeout --- +trans_fun([timeout|Instructions], Env) -> + [hipe_icode:mk_primop([],clear_timeout,[]) | trans_fun(Instructions,Env)]; +%%--- loop_rec --- +trans_fun([{loop_rec,{_,Lbl},Reg}|Instructions], Env) -> + {Movs,[Temp],Env1} = get_constants_in_temps([Reg],Env), + GotitLbl = mk_label(new), + ChkGetMsg = hipe_icode:mk_primop([Temp],check_get_msg,[], + hipe_icode:label_name(GotitLbl), + map_label(Lbl)), + Movs ++ [ChkGetMsg, GotitLbl | trans_fun(Instructions,Env1)]; +%%--- loop_rec_end --- +trans_fun([{loop_rec_end,{_,Lbl}}|Instructions], Env) -> + Loop = hipe_icode:mk_goto(map_label(Lbl)), + [hipe_icode:mk_primop([],next_msg,[]), Loop | trans_fun(Instructions,Env)]; +%%--- wait --- +trans_fun([{wait,{_,Lbl}}|Instructions], Env) -> + Susp = hipe_icode:mk_primop([],suspend_msg,[]), + Loop = hipe_icode:mk_goto(map_label(Lbl)), + [Susp, Loop | trans_fun(Instructions,Env)]; +%%--- wait_timeout --- +trans_fun([{wait_timeout,{_,Lbl},Reg}|Instructions], Env) -> + {Movs,[_]=Temps,Env1} = get_constants_in_temps([Reg],Env), + SetTmout = hipe_icode:mk_primop([],set_timeout,Temps), + DoneLbl = mk_label(new), + SuspTmout = hipe_icode:mk_if(suspend_msg_timeout,[], + map_label(Lbl),hipe_icode:label_name(DoneLbl)), + Movs ++ [SetTmout, SuspTmout, DoneLbl | trans_fun(Instructions,Env1)]; +%%-------------------------------------------------------------------- +%%--- Translation of arithmetics {bif,ArithOp, ...} --- +%%-------------------------------------------------------------------- +trans_fun([{arithbif,ArithOp,{f,L},SrcRs,DstR}|Instructions], Env) -> + {ICode,NewEnv} = trans_arith(ArithOp,SrcRs,DstR,L,Env), + ICode ++ trans_fun(Instructions,NewEnv); +%%-------------------------------------------------------------------- +%%--- Translation of arithmetic tests {test,is_ARITHTEST, ...} --- +%%-------------------------------------------------------------------- +%%--- is_lt --- +trans_fun([{test,is_lt,{f,Lbl},[Arg1,Arg2]}|Instructions], Env) -> + {ICode,Env1} = trans_test_guard('<',Lbl,Arg1,Arg2,Env), + ICode ++ trans_fun(Instructions,Env1); +%%--- is_ge --- +trans_fun([{test,is_ge,{f,Lbl},[Arg1,Arg2]}|Instructions], Env) -> + {ICode,Env1} = trans_test_guard('>=',Lbl,Arg1,Arg2,Env), + ICode ++ trans_fun(Instructions,Env1); +%%--- is_eq --- +trans_fun([{test,is_eq,{f,Lbl},[Arg1,Arg2]}|Instructions], Env) -> + {ICode,Env1} = trans_is_eq(Lbl,Arg1,Arg2,Env), + ICode ++ trans_fun(Instructions,Env1); +%%--- is_ne --- +trans_fun([{test,is_ne,{f,Lbl},[Arg1,Arg2]}|Instructions], Env) -> + {ICode,Env1} = trans_is_ne(Lbl,Arg1,Arg2,Env), + ICode ++ trans_fun(Instructions,Env1); +%%--- is_eq_exact --- +trans_fun([{test,is_eq_exact,{f,Lbl},[Arg1,Arg2]}|Instructions], Env) -> + {ICode,Env1} = trans_is_eq_exact(Lbl,Arg1,Arg2,Env), + ICode ++ trans_fun(Instructions,Env1); +%%--- is_ne_exact --- +trans_fun([{test,is_ne_exact,{f,Lbl},[Arg1,Arg2]}|Instructions], Env) -> + {ICode,Env1} = trans_is_ne_exact(Lbl,Arg1,Arg2,Env), + ICode ++ trans_fun(Instructions,Env1); +%%-------------------------------------------------------------------- +%%--- Translation of type tests {test,is_TYPE, ...} --- +%%-------------------------------------------------------------------- +%%--- is_integer --- +trans_fun([{test,is_integer,{f,Lbl},[Arg]}|Instructions], Env) -> + {Code,Env1} = trans_type_test(integer,Lbl,Arg,Env), + [Code | trans_fun(Instructions,Env1)]; +%%--- is_float --- +trans_fun([{test,is_float,{f,Lbl},[Arg]}|Instructions], Env) -> + {Code,Env1} = trans_type_test(float,Lbl,Arg,Env), + [Code | trans_fun(Instructions,Env1)]; +%%--- is_number --- +trans_fun([{test,is_number,{f,Lbl},[Arg]}|Instructions], Env) -> + {Code,Env1} = trans_type_test(number,Lbl,Arg,Env), + [Code | trans_fun(Instructions,Env1)]; +%%--- is_atom --- +trans_fun([{test,is_atom,{f,Lbl},[Arg]}|Instructions], Env) -> + {Code,Env1} = trans_type_test(atom,Lbl,Arg,Env), + [Code | trans_fun(Instructions,Env1)]; +%%--- is_pid --- +trans_fun([{test,is_pid,{f,Lbl},[Arg]}|Instructions], Env) -> + {Code,Env1} = trans_type_test(pid,Lbl,Arg,Env), + [Code | trans_fun(Instructions,Env1)]; +%%--- is_ref --- +trans_fun([{test,is_reference,{f,Lbl},[Arg]}|Instructions], Env) -> + {Code,Env1} = trans_type_test(reference,Lbl,Arg,Env), + [Code | trans_fun(Instructions,Env1)]; +%%--- is_port --- +trans_fun([{test,is_port,{f,Lbl},[Arg]}|Instructions], Env) -> + {Code,Env1} = trans_type_test(port,Lbl,Arg,Env), + [Code | trans_fun(Instructions,Env1)]; +%%--- is_nil --- +trans_fun([{test,is_nil,{f,Lbl},[Arg]}|Instructions], Env) -> + {Code,Env1} = trans_type_test(nil,Lbl,Arg,Env), + [Code | trans_fun(Instructions,Env1)]; +%%--- is_binary --- +trans_fun([{test,is_binary,{f,Lbl},[Arg]}|Instructions], Env) -> + {Code,Env1} = trans_type_test(binary,Lbl,Arg,Env), + [Code | trans_fun(Instructions,Env1)]; +%%--- is_constant --- +trans_fun([{test,is_constant,{f,Lbl},[Arg]}|Instructions], Env) -> + {Code,Env1} = trans_type_test(constant,Lbl,Arg,Env), + [Code | trans_fun(Instructions,Env1)]; +%%--- is_list --- +trans_fun([{test,is_list,{f,Lbl},[Arg]}|Instructions], Env) -> + {Code,Env1} = trans_type_test(list,Lbl,Arg,Env), + [Code | trans_fun(Instructions,Env1)]; +%%--- is_nonempty_list --- +trans_fun([{test,is_nonempty_list,{f,Lbl},[Arg]}|Instructions], Env) -> + {Code,Env1} = trans_type_test(cons,Lbl,Arg,Env), + [Code | trans_fun(Instructions,Env1)]; +%%--- is_tuple --- +trans_fun([{test,is_tuple,{f,_Lbl}=FLbl,[Xreg]}, + {test,test_arity,FLbl,[Xreg,_]=Args}|Instructions], Env) -> + trans_fun([{test,test_arity,FLbl,Args}|Instructions],Env); +trans_fun([{test,is_tuple,{_,Lbl},[Arg]}|Instructions], Env) -> + {Code,Env1} = trans_type_test(tuple,Lbl,Arg,Env), + [Code | trans_fun(Instructions,Env1)]; +%%--- test_arity --- +trans_fun([{test,test_arity,{f,Lbl},[Reg,N]}|Instructions], Env) -> + True = mk_label(new), + I = hipe_icode:mk_type([trans_arg(Reg)],{tuple,N}, + hipe_icode:label_name(True),map_label(Lbl)), + [I,True | trans_fun(Instructions,Env)]; +%%-------------------------------------------------------------------- +%%--- select_val --- +trans_fun([{select_val,Reg,{f,Lbl},{list,Cases}}|Instructions], Env) -> + {SwVar,CasePairs} = trans_select_stuff(Reg,Cases), + Len = length(CasePairs), + I = hipe_icode:mk_switch_val(SwVar,map_label(Lbl),Len,CasePairs), + ?no_debug_msg("switch_val instr is ~p~n",[I]), + [I | trans_fun(Instructions,Env)]; +%%--- select_tuple_arity --- +trans_fun([{select_tuple_arity,Reg,{f,Lbl},{list,Cases}}|Instructions],Env) -> + {SwVar,CasePairs} = trans_select_stuff(Reg,Cases), + Len = length(CasePairs), + I = hipe_icode:mk_switch_tuple_arity(SwVar,map_label(Lbl),Len,CasePairs), + ?no_debug_msg("switch_tuple_arity instr is ~p~n",[I]), + [I | trans_fun(Instructions,Env)]; +%%--- jump --- +trans_fun([{jump,{_,L}}|Instructions], Env) -> + Label = mk_label(L), + I = hipe_icode:mk_goto(hipe_icode:label_name(Label)), + [I | trans_fun(Instructions,Env)]; +%%--- move --- +trans_fun([{move,Src,Dst}|Instructions], Env) -> + Dst1 = mk_var(Dst), + Src1 = trans_arg(Src), + [hipe_icode:mk_move(Dst1,Src1) | trans_fun(Instructions,Env)]; +%%--- catch --- ITS PROCESSING IS POSTPONED +trans_fun([{'catch',N,{_,EndLabel}}|Instructions], Env) -> + NewContLbl = mk_label(new), + [{'catch',N,EndLabel},NewContLbl | trans_fun(Instructions,Env)]; +%%--- catch_end --- ITS PROCESSING IS POSTPONED +trans_fun([{catch_end,_N}=I|Instructions], Env) -> + [I | trans_fun(Instructions,Env)]; +%%--- try --- ITS PROCESSING IS POSTPONED +trans_fun([{'try',N,{_,EndLabel}}|Instructions], Env) -> + NewContLbl = mk_label(new), + [{'try',N,EndLabel},NewContLbl | trans_fun(Instructions,Env)]; +%%--- try_end --- +trans_fun([{try_end,_N}|Instructions], Env) -> + [hipe_icode:mk_end_try() | trans_fun(Instructions,Env)]; +%%--- try_case --- ITS PROCESSING IS POSTPONED +trans_fun([{try_case,_N}=I|Instructions], Env) -> + [I | trans_fun(Instructions,Env)]; +%%--- try_case_end --- +trans_fun([{try_case_end,Arg}|Instructions], Env) -> + BadArg = trans_arg(Arg), + ErrVar = mk_var(new), + Vs = [mk_var(new)], + Atom = hipe_icode:mk_move(ErrVar,hipe_icode:mk_const(try_clause)), + Tuple = hipe_icode:mk_primop(Vs,mktuple,[ErrVar,BadArg]), + Fail = hipe_icode:mk_fail(Vs,error), + [Atom,Tuple,Fail | trans_fun(Instructions,Env)]; +%%--- raise --- +trans_fun([{raise,{f,0},[Reg1,Reg2],{x,0}}|Instructions], Env) -> + V1 = trans_arg(Reg1), + V2 = trans_arg(Reg2), + Fail = hipe_icode:mk_fail([V1,V2],rethrow), + [Fail | trans_fun(Instructions,Env)]; +%%--- get_list --- +trans_fun([{get_list,List,Head,Tail}|Instructions], Env) -> + TransList = [trans_arg(List)], + I1 = hipe_icode:mk_primop([mk_var(Head)],unsafe_hd,TransList), + I2 = hipe_icode:mk_primop([mk_var(Tail)],unsafe_tl,TransList), + %% Handle the cases where the dest overwrites the src!! + if + Head =/= List -> + [I1, I2 | trans_fun(Instructions,Env)]; + Tail =/= List -> + [I2, I1 | trans_fun(Instructions,Env)]; + true -> + %% XXX: We should take care of this case!!!!! + ?error_msg("hd and tl regs identical in get_list~n",[]), + erlang:error(not_handled) + end; +%%--- get_tuple_element --- +trans_fun([{get_tuple_element,Xreg,Index,Dst}|Instructions], Env) -> + I = hipe_icode:mk_primop([mk_var(Dst)], + #unsafe_element{index=Index+1}, + [trans_arg(Xreg)]), + [I | trans_fun(Instructions,Env)]; +%%--- set_tuple_element --- +trans_fun([{set_tuple_element,Elem,Tuple,Index}|Instructions], Env) -> + Elem1 = trans_arg(Elem), + I = hipe_icode:mk_primop([mk_var(Tuple)], + #unsafe_update_element{index=Index+1}, + [mk_var(Tuple),Elem1]), + [I | trans_fun(Instructions,Env)]; +%%--- put_string --- +trans_fun([{put_string,_Len,String,Dst}|Instructions], Env) -> + Mov = hipe_icode:mk_move(mk_var(Dst),trans_const(String)), + [Mov | trans_fun(Instructions,Env)]; +%%--- put_list --- +trans_fun([{put_list,Car,Cdr,Dest}|Instructions], Env) -> + {M1,V1,Env2} = mk_move_and_var(Car,Env), + {M2,V2,Env3} = mk_move_and_var(Cdr,Env2), + D = mk_var(Dest), + M1 ++ M2 ++ [hipe_icode:mk_primop([D],cons,[V1,V2]) + | trans_fun(Instructions,Env3)]; +%%--- put_tuple --- +trans_fun([{put_tuple,_Size,Reg}|Instructions], Env) -> + {Moves,Instructions2,Vars,Env2} = trans_puts(Instructions,Env), + Dest = [mk_var(Reg)], + Src = lists:reverse(Vars), + Primop = hipe_icode:mk_primop(Dest,mktuple,Src), + Moves ++ [Primop | trans_fun(Instructions2,Env2)]; +%%--- put --- SHOULD NOT REALLY EXIST HERE; put INSTRUCTIONS ARE HANDLED ABOVE. +%%--- badmatch --- +trans_fun([{badmatch,Arg}|Instructions], Env) -> + BadVar = trans_arg(Arg), + ErrVar = mk_var(new), + Vs = [mk_var(new)], + Atom = hipe_icode:mk_move(ErrVar,hipe_icode:mk_const(badmatch)), + Tuple = hipe_icode:mk_primop(Vs,mktuple,[ErrVar,BadVar]), + Fail = hipe_icode:mk_fail(Vs,error), + [Atom,Tuple,Fail | trans_fun(Instructions,Env)]; +%%--- if_end --- +trans_fun([if_end|Instructions], Env) -> + V = mk_var(new), + Mov = hipe_icode:mk_move(V,hipe_icode:mk_const(if_clause)), + Fail = hipe_icode:mk_fail([V],error), + [Mov,Fail | trans_fun(Instructions, Env)]; +%%--- case_end --- +trans_fun([{case_end,Arg}|Instructions], Env) -> + BadArg = trans_arg(Arg), + ErrVar = mk_var(new), + Vs = [mk_var(new)], + Atom = hipe_icode:mk_move(ErrVar,hipe_icode:mk_const(case_clause)), + Tuple = hipe_icode:mk_primop(Vs,mktuple,[ErrVar,BadArg]), + Fail = hipe_icode:mk_fail(Vs,error), + [Atom,Tuple,Fail | trans_fun(Instructions,Env)]; +%%--- enter_fun --- +trans_fun([{call_fun,N},{deallocate,_},return|Instructions], Env) -> + Args = extract_fun_args(N+1), %% +1 is for the fun itself + [hipe_icode:mk_comment('enter_fun'), + hipe_icode:mk_enter_primop(enter_fun,Args) | trans_fun(Instructions,Env)]; +%%--- call_fun --- +trans_fun([{call_fun,N}|Instructions], Env) -> + Args = extract_fun_args(N+1), %% +1 is for the fun itself + Dst = [mk_var({r,0})], + [hipe_icode:mk_comment('call_fun'), + hipe_icode:mk_primop(Dst,call_fun,Args) | trans_fun(Instructions,Env)]; +%%--- patched_make_fun --- make_fun/make_fun2 after fixes +trans_fun([{patched_make_fun,MFA,Magic,FreeVarNum,Index}|Instructions], Env) -> + Args = extract_fun_args(FreeVarNum), + Dst = [mk_var({r,0})], + Fun = hipe_icode:mk_primop(Dst, + #mkfun{mfa=MFA,magic_num=Magic,index=Index}, + Args), + ?no_debug_msg("mkfun translates to: ~p~n",[Fun]), + [Fun | trans_fun(Instructions,Env)]; +%%--- is_function --- +trans_fun([{test,is_function,{f,Lbl},[Arg]}|Instructions], Env) -> + {Code,Env1} = trans_type_test(function,Lbl,Arg,Env), + [Code | trans_fun(Instructions,Env1)]; +%%--- call_ext_only --- +trans_fun([{call_ext_only,_N,{extfunc,M,F,A}}|Instructions], Env) -> + Args = extract_fun_args(A), + I = trans_enter({M,F,A}, Args, remote), + [hipe_icode:mk_comment('call_ext_only'), I | trans_fun(Instructions,Env)]; +%%-------------------------------------------------------------------- +%%--- Translation of binary instructions --- +%%-------------------------------------------------------------------- +%% This code uses a somewhat unorthodox translation: +%% Since we do not want non-erlang values as arguments to Icode +%% instructions some compile time constants are coded into the +%% name of the function (or rather the primop). +%% TODO: Make sure all cases of argument types are covered. +%%-------------------------------------------------------------------- +trans_fun([{test,bs_start_match2,{f,Lbl},[X,_Live,Max,Ms]}|Instructions], Env) -> + Bin = trans_arg(X), + MsVar = mk_var(Ms), + trans_op_call({hipe_bs_primop, {bs_start_match, Max}}, Lbl, [Bin], + [MsVar], Env, Instructions); +trans_fun([{test,bs_get_float2,{f,Lbl},[Ms,_Live,Size,Unit,{field_flags,Flags0},X]}| + Instructions], Env) -> + Dst = mk_var(X), + MsVar = mk_var(Ms), + Flags = resolve_native_endianess(Flags0), + {Name, Args} = + case Size of + {integer, NoBits} when is_integer(NoBits), NoBits >= 0 -> + {{bs_get_float,NoBits*Unit,Flags}, [MsVar]}; + {integer, NoBits} when is_integer(NoBits), NoBits < 0 -> + ?EXIT({bad_bs_size_constant,Size}); + BitReg -> + Bits = mk_var(BitReg), + {{bs_get_float,Unit,Flags}, [Bits,MsVar]} + end, + trans_op_call({hipe_bs_primop,Name}, Lbl, Args, [Dst,MsVar], Env, Instructions); +trans_fun([{test,bs_get_integer2,{f,Lbl},[Ms,_Live,Size,Unit,{field_flags,Flags0},X]}| + Instructions], Env) -> + Dst = mk_var(X), + MsVar = mk_var(Ms), + Flags = resolve_native_endianess(Flags0), + {Name, Args} = + case Size of + {integer,NoBits} when is_integer(NoBits), NoBits >= 0 -> + {{bs_get_integer,NoBits*Unit,Flags}, [MsVar]}; + {integer,NoBits} when is_integer(NoBits), NoBits < 0 -> + ?EXIT({bad_bs_size_constant,Size}); + BitReg -> + Bits = mk_var(BitReg), + {{bs_get_integer,Unit,Flags}, [MsVar,Bits]} + end, + trans_op_call({hipe_bs_primop,Name}, Lbl, Args, [Dst,MsVar], Env, Instructions); +trans_fun([{test,bs_get_binary2,{f,Lbl},[Ms,_Live,Size,Unit,{field_flags,Flags},X]}| + Instructions], Env) -> + MsVar = mk_var(Ms), + {Name, Args, Dsts} = + case Size of + {atom, all} -> %% put all bits + if Ms =:= X -> + {{bs_get_binary_all,Unit,Flags},[MsVar],[mk_var(X)]}; + true -> + {{bs_get_binary_all_2,Unit,Flags},[MsVar],[mk_var(X),MsVar]} + end; + {integer, NoBits} when is_integer(NoBits), NoBits >= 0 -> + {{bs_get_binary,NoBits*Unit,Flags}, [MsVar], [mk_var(X),MsVar]};%% Create a N*Unit bits subbinary + {integer, NoBits} when is_integer(NoBits), NoBits < 0 -> + ?EXIT({bad_bs_size_constant,Size}); + BitReg -> % Use a number of bits only known at runtime. + Bits = mk_var(BitReg), + {{bs_get_binary,Unit,Flags}, [MsVar,Bits], [mk_var(X),MsVar]} + end, + trans_op_call({hipe_bs_primop,Name}, Lbl, Args, Dsts, Env, Instructions); +trans_fun([{test,bs_skip_bits2,{f,Lbl},[Ms,Size,NumBits,{field_flags,Flags}]}| + Instructions], Env) -> + %% the current match buffer + MsVar = mk_var(Ms), + {Name, Args} = + case Size of + {atom, all} -> %% Skip all bits + {{bs_skip_bits_all,NumBits,Flags},[MsVar]}; + {integer, BitSize} when is_integer(BitSize), BitSize >= 0-> %% Skip N bits + {{bs_skip_bits,BitSize*NumBits}, [MsVar]}; + {integer, BitSize} when is_integer(BitSize), BitSize < 0 -> + ?EXIT({bad_bs_size_constant,Size}); + X -> % Skip a number of bits only known at runtime. + Src = mk_var(X), + {{bs_skip_bits,NumBits},[MsVar,Src]} + end, + trans_op_call({hipe_bs_primop,Name}, Lbl, Args, [MsVar], Env, Instructions); +trans_fun([{test,bs_test_unit,{f,Lbl},[Ms,Unit]}| + Instructions], Env) -> + %% the current match buffer + MsVar = mk_var(Ms), + trans_op_call({hipe_bs_primop,{bs_test_unit,Unit}}, Lbl, + [MsVar], [], Env, Instructions); +trans_fun([{test,bs_match_string,{f,Lbl},[Ms,BitSize,Bin]}| + Instructions], Env) -> + True = mk_label(new), + FalseLabName = map_label(Lbl), + TrueLabName = hipe_icode:label_name(True), + MsVar = mk_var(Ms), + TmpVar = mk_var(new), + ByteSize = BitSize div 8, + ExtraBits = BitSize rem 8, + WordSize = hipe_rtl_arch:word_size(), + if ExtraBits =:= 0 -> + trans_op_call({hipe_bs_primop,{bs_match_string,Bin,ByteSize}}, Lbl, + [MsVar], [MsVar], Env, Instructions); + BitSize =< ((WordSize * 8) - 5) -> + <> = Bin, + {I1,Env1} = trans_one_op_call({hipe_bs_primop,{bs_get_integer,BitSize,0}}, Lbl, + [MsVar], [TmpVar, MsVar], Env), + I2 = hipe_icode:mk_type([TmpVar], {integer,Int}, TrueLabName, FalseLabName), + I1 ++ [I2,True] ++ trans_fun(Instructions, Env1); + true -> + <> = Bin, + {I1,Env1} = trans_one_op_call({hipe_bs_primop,{bs_match_string,RealBin,ByteSize}}, Lbl, + [MsVar], [MsVar], Env), + {I2,Env2} = trans_one_op_call({hipe_bs_primop,{bs_get_integer,ExtraBits,0}}, Lbl, + [MsVar], [TmpVar, MsVar], Env1), + I3 = hipe_icode:mk_type([TmpVar], {integer,Int}, TrueLabName, FalseLabName), + I1 ++ I2 ++ [I3,True] ++ trans_fun(Instructions, Env2) + end; +trans_fun([{bs_context_to_binary,Var}|Instructions], Env) -> + %% the current match buffer + IVars = [trans_arg(Var)], + [hipe_icode:mk_primop(IVars,{hipe_bs_primop,bs_context_to_binary},IVars)| + trans_fun(Instructions, Env)]; +trans_fun([{bs_append,{f,Lbl},Size,W,R,U,Binary,{field_flags,F},Dst}| + Instructions], Env) -> + %% the current match buffer + SizeArg = trans_arg(Size), + BinArg = trans_arg(Binary), + IcodeDst = mk_var(Dst), + Offset = mk_var(reg), + Base = mk_var(reg), + trans_bin_call({hipe_bs_primop,{bs_append,W,R,U,F}},Lbl,[SizeArg,BinArg], + [IcodeDst,Base,Offset], + Base, Offset, Env, Instructions); +trans_fun([{bs_private_append,{f,Lbl},Size,U,Binary,{field_flags,F},Dst}| + Instructions], Env) -> + %% the current match buffer + SizeArg = trans_arg(Size), + BinArg = trans_arg(Binary), + IcodeDst = mk_var(Dst), + Offset = mk_var(reg), + Base = mk_var(reg), + trans_bin_call({hipe_bs_primop,{bs_private_append,U,F}}, + Lbl,[SizeArg,BinArg], + [IcodeDst,Base,Offset], + Base, Offset, Env, Instructions); +trans_fun([bs_init_writable|Instructions], Env) -> + Vars = [mk_var({x,0})], %{x,0} is implict arg and dst + [hipe_icode:mk_primop(Vars,{hipe_bs_primop,bs_init_writable},Vars), + trans_fun(Instructions, Env)]; +trans_fun([{bs_save2,Ms,IndexName}|Instructions], Env) -> + Index = + case IndexName of + {atom, start} -> 0; + _ -> IndexName+1 + end, + MsVars = [mk_var(Ms)], + [hipe_icode:mk_primop(MsVars,{hipe_bs_primop,{bs_save,Index}},MsVars) | + trans_fun(Instructions, Env)]; +trans_fun([{bs_restore2,Ms,IndexName}|Instructions], Env) -> + Index = + case IndexName of + {atom, start} -> 0; + _ -> IndexName+1 + end, + MsVars = [mk_var(Ms)], + [hipe_icode:mk_primop(MsVars,{hipe_bs_primop,{bs_restore,Index}},MsVars) | + trans_fun(Instructions, Env)]; +trans_fun([{test,bs_test_tail2,{f,Lbl},[Ms,Numbits]}| Instructions], Env) -> + MsVar = mk_var(Ms), + trans_op_call({hipe_bs_primop,{bs_test_tail,Numbits}}, + Lbl, [MsVar], [], Env, Instructions); +%%-------------------------------------------------------------------- +%% New bit syntax instructions added in February 2004 (R10B). +%%-------------------------------------------------------------------- +trans_fun([{bs_init2,{f,Lbl},Size,_Words,_LiveRegs,{field_flags,Flags0},X}| + Instructions], Env) -> + Dst = mk_var(X), + Flags = resolve_native_endianess(Flags0), + Offset = mk_var(reg), + Base = mk_var(reg), + {Name, Args} = + case Size of + NoBytes when is_integer(NoBytes) -> + {{bs_init, Size, Flags}, []}; + BitReg -> + Bits = mk_var(BitReg), + {{bs_init, Flags}, [Bits]} + end, + trans_bin_call({hipe_bs_primop,Name}, Lbl, Args, [Dst, Base, Offset], + Base, Offset, Env, Instructions); +trans_fun([{bs_init_bits,{f,Lbl},Size,_Words,_LiveRegs,{field_flags,Flags0},X}| + Instructions], Env) -> + Dst = mk_var(X), + Flags = resolve_native_endianess(Flags0), + Offset = mk_var(reg), + Base = mk_var(reg), + {Name, Args} = + case Size of + NoBits when is_integer(NoBits) -> + {{bs_init_bits, NoBits, Flags}, []}; + BitReg -> + Bits = mk_var(BitReg), + {{bs_init_bits, Flags}, [Bits]} + end, + trans_bin_call({hipe_bs_primop,Name}, Lbl, Args, [Dst, Base, Offset], + Base, Offset, Env, Instructions); +trans_fun([{bs_bits_to_bytes2, Bits, Bytes}|Instructions], Env) -> + Src = trans_arg(Bits), + Dst = mk_var(Bytes), + [hipe_icode:mk_primop([Dst], 'bsl', [Src, hipe_icode:mk_const(3)])| + trans_fun(Instructions,Env)]; +trans_fun([{bs_add, {f,Lbl}, [Old,New,Unit], Res}|Instructions], Env) -> + Dst = mk_var(Res), + Temp = mk_var(new), + MultIs = + case {New,Unit} of + {{integer, NewInt}, _} -> + [hipe_icode:mk_move(Temp, hipe_icode:mk_const(NewInt*Unit))]; + {_, 1} -> + NewVar = mk_var(New), + [hipe_icode:mk_move(Temp, NewVar)]; + _ -> + NewVar = mk_var(New), + if Lbl =:= 0 -> + [hipe_icode:mk_primop([Temp], '*', + [NewVar, hipe_icode:mk_const(Unit)])]; + true -> + Succ = mk_label(new), + [hipe_icode:mk_primop([Temp], '*', + [NewVar, hipe_icode:mk_const(Unit)], + hipe_icode:label_name(Succ), Lbl), + Succ] + end + end, + Succ2 = mk_label(new), + {FailLblName, FailCode} = + if Lbl =:= 0 -> + FailLbl = mk_label(new), + {hipe_icode:label_name(FailLbl), + [FailLbl, + hipe_icode:mk_fail([hipe_icode:mk_const(badarg)], error)]}; + true -> + {Lbl, []} + end, + IsPos = + [hipe_icode:mk_if('>=', [Temp, hipe_icode:mk_const(0)], + hipe_icode:label_name(Succ2), FailLblName)] ++ + FailCode ++ [Succ2], + AddI = + case Old of + {integer,OldInt} -> + hipe_icode:mk_primop([Dst], '+', [Temp, hipe_icode:mk_const(OldInt)]); + _ -> + OldVar = mk_var(Old), + hipe_icode:mk_primop([Dst], '+', [Temp, OldVar]) + end, + MultIs ++ IsPos ++ [AddI|trans_fun(Instructions, Env)]; +%%-------------------------------------------------------------------- +%% Bit syntax instructions added in R12B-5 (Fall 2008) +%%-------------------------------------------------------------------- +trans_fun([{bs_utf8_size,{f,Lbl},A2,A3}|Instructions], Env) -> + Bin = trans_arg(A2), + Dst = mk_var(A3), + trans_op_call({hipe_bs_primop, bs_utf8_size}, Lbl, [Bin], [Dst], Env, Instructions); +trans_fun([{test,bs_get_utf8,{f,Lbl},[Ms,_Live,{field_flags,_Flags},X]} | + Instructions], Env) -> + trans_bs_get_or_skip_utf8(Lbl, Ms, X, Instructions, Env); +trans_fun([{test,bs_skip_utf8,{f,Lbl},[Ms,_Live,{field_flags,_Flags}]} | + Instructions], Env) -> + trans_bs_get_or_skip_utf8(Lbl, Ms, 'new', Instructions, Env); +trans_fun([{bs_utf16_size,{f,Lbl},A2,A3}|Instructions], Env) -> + Bin = trans_arg(A2), + Dst = mk_var(A3), + trans_op_call({hipe_bs_primop, bs_utf16_size}, Lbl, [Bin], [Dst], Env, Instructions); +trans_fun([{test,bs_get_utf16,{f,Lbl},[Ms,_Live,{field_flags,Flags0},X]} | + Instructions], Env) -> + trans_bs_get_or_skip_utf16(Lbl, Ms, Flags0, X, Instructions, Env); +trans_fun([{test,bs_skip_utf16,{f,Lbl},[Ms,_Live,{field_flags,Flags0}]} | + Instructions], Env) -> + trans_bs_get_or_skip_utf16(Lbl, Ms, Flags0, 'new', Instructions, Env); +trans_fun([{test,bs_get_utf32,{f,Lbl},[Ms,_Live,{field_flags,Flags0},X]} | Instructions], Env) -> + trans_bs_get_or_skip_utf32(Lbl, Ms, Flags0, X, Instructions, Env); +trans_fun([{test,bs_skip_utf32,{f,Lbl},[Ms,_Live,{field_flags,Flags0}]} | Instructions], Env) -> + trans_bs_get_or_skip_utf32(Lbl, Ms, Flags0, 'new', Instructions, Env); +%%-------------------------------------------------------------------- +%%--- Translation of floating point instructions --- +%%-------------------------------------------------------------------- +%%--- fclearerror --- +trans_fun([fclearerror|Instructions], Env) -> + case get(hipe_inline_fp) of + true -> + [hipe_icode:mk_primop([], fclearerror, []) | + trans_fun(Instructions,Env)]; + _ -> + trans_fun(Instructions,Env) + end; +%%--- fcheckerror --- +trans_fun([{fcheckerror,{_,Fail}}|Instructions], Env) -> + case get(hipe_inline_fp) of + true -> + ContLbl = mk_label(new), + case Fail of + 0 -> + [hipe_icode:mk_primop([], fcheckerror, [], + hipe_icode:label_name(ContLbl), []), + ContLbl | trans_fun(Instructions,Env)]; + _ -> %% Can this happen? + {Guard,Env1} = + make_guard([], fcheckerror, [], + hipe_icode:label_name(ContLbl), map_label(Fail), Env), + [Guard, ContLbl | trans_fun(Instructions,Env1)] + end; + _ -> + trans_fun(Instructions, Env) + end; +%%--- fmove --- +trans_fun([{fmove,Src,Dst}|Instructions], Env) -> + case get(hipe_inline_fp) of + true -> + Dst1 = mk_var(Dst), + Src1 = trans_arg(Src), + case{hipe_icode:is_fvar(Dst1), + hipe_icode:is_fvar(Src1)} of + {true, true} -> %% fvar := fvar + [hipe_icode:mk_move(Dst1,Src1) | trans_fun(Instructions,Env)]; + {false, true} -> %% var := fvar + [hipe_icode:mk_primop([Dst1], unsafe_tag_float, [Src1]) | + trans_fun(Instructions,Env)]; + {true, false} -> %% fvar := var or fvar := constant + [hipe_icode:mk_primop([Dst1], unsafe_untag_float, [Src1]) | + trans_fun(Instructions,Env)] + end; + _ -> + trans_fun([{move,Src,Dst}|Instructions], Env) + end; +%%--- fconv --- +trans_fun([{fconv,Eterm,FReg}|Instructions], Env) -> + case get(hipe_inline_fp) of + true -> + Src = trans_arg(Eterm), + ContLbl = mk_label(new), + Dst = mk_var(FReg), + [hipe_icode:mk_primop([Dst], conv_to_float, [Src], + hipe_icode:label_name(ContLbl), []), + ContLbl| trans_fun(Instructions, Env)]; + _ -> + trans_fun([{fmove,Eterm,FReg}|Instructions], Env) + end; +%%--- fadd --- +trans_fun([{arithfbif,fadd,Lab,SrcRs,DstR}|Instructions], Env) -> + case get(hipe_inline_fp) of + true -> + trans_fun([{arithbif,fp_add,Lab,SrcRs,DstR}|Instructions], Env); + _ -> + trans_fun([{arithbif,'+',Lab,SrcRs,DstR}|Instructions], Env) + end; +%%--- fsub --- +trans_fun([{arithfbif,fsub,Lab,SrcRs,DstR}|Instructions], Env) -> + case get(hipe_inline_fp) of + true -> + trans_fun([{arithbif,fp_sub,Lab,SrcRs,DstR}|Instructions], Env); + _ -> + trans_fun([{arithbif,'-',Lab,SrcRs,DstR}|Instructions], Env) + end; +%%--- fmult --- +trans_fun([{arithfbif,fmul,Lab,SrcRs,DstR}|Instructions], Env) -> + case get(hipe_inline_fp) of + true -> + trans_fun([{arithbif,fp_mul,Lab,SrcRs,DstR}|Instructions], Env); + _ -> + trans_fun([{arithbif,'*',Lab,SrcRs,DstR}|Instructions], Env) + end; +%%--- fdiv --- +trans_fun([{arithfbif,fdiv,Lab,SrcRs,DstR}|Instructions], Env) -> + case get(hipe_inline_fp) of + true -> + trans_fun([{arithbif,fp_div,Lab,SrcRs,DstR}|Instructions], Env); + _ -> + trans_fun([{arithbif,'/',Lab,SrcRs,DstR}|Instructions], Env) + end; +%%--- fnegate --- +trans_fun([{arithfbif,fnegate,Lab,[SrcR],DestR}|Instructions], Env) -> + case get(hipe_inline_fp) of + true -> + Src = trans_arg(SrcR), + Dst = mk_var(DestR), + [hipe_icode:mk_primop([Dst], fnegate, [Src])| + trans_fun(Instructions,Env)]; + _ -> + trans_fun([{arithbif,'-',Lab,[{float,0.0},SrcR],DestR}|Instructions], Env) + end; +%%-------------------------------------------------------------------- +%% New apply instructions added in April 2004 (R10B). +%%-------------------------------------------------------------------- +trans_fun([{apply,Arity}|Instructions], Env) -> + BeamArgs = extract_fun_args(Arity+2), %% +2 is for M and F + {Args,[M,F]} = lists:split(Arity,BeamArgs), + Dst = [mk_var({r,0})], + [hipe_icode:mk_comment('apply'), + hipe_icode:mk_primop(Dst, #apply_N{arity=Arity}, [M,F|Args]) + | trans_fun(Instructions,Env)]; +trans_fun([{apply_last,Arity,_N}|Instructions], Env) -> % N is StackAdjustment? + BeamArgs = extract_fun_args(Arity+2), %% +2 is for M and F + {Args,[M,F]} = lists:split(Arity,BeamArgs), + [hipe_icode:mk_comment('apply_last'), + hipe_icode:mk_enter_primop(#apply_N{arity=Arity}, [M,F|Args]) + | trans_fun(Instructions,Env)]; +%%-------------------------------------------------------------------- +%% New test instruction added in April 2004 (R10B). +%%-------------------------------------------------------------------- +%%--- is_boolean --- +trans_fun([{test,is_boolean,{f,Lbl},[Arg]}|Instructions], Env) -> + {Code,Env1} = trans_type_test(boolean,Lbl,Arg,Env), + [Code | trans_fun(Instructions,Env1)]; +%%-------------------------------------------------------------------- +%% New test instruction added in June 2005 for R11 +%%-------------------------------------------------------------------- +%%--- is_function2 --- +trans_fun([{test,is_function2,{f,Lbl},[Arg,Arity]}|Instructions], Env) -> + {Code,Env1} = trans_type_test2(function2,Lbl,Arg,Arity,Env), + [Code | trans_fun(Instructions,Env1)]; +%%-------------------------------------------------------------------- +%% New garbage-collecting BIFs added in January 2006 for R11B. +%%-------------------------------------------------------------------- +trans_fun([{gc_bif,'-',Fail,_Live,[SrcR],DstR}|Instructions], Env) -> + %% Unary minus. Change this to binary minus. + trans_fun([{arithbif,'-',Fail,[{integer,0},SrcR],DstR}|Instructions], Env); +trans_fun([{gc_bif,'+',Fail,_Live,[SrcR],DstR}|Instructions], Env) -> + %% Unary plus. Change this to a bif call. + trans_fun([{bif,'+',Fail,[SrcR],DstR}|Instructions], Env); +trans_fun([{gc_bif,Name,Fail,_Live,SrcRs,DstR}|Instructions], Env) -> + case erl_internal:guard_bif(Name, length(SrcRs)) of + false -> + %% Arithmetic instruction. + trans_fun([{arithbif,Name,Fail,SrcRs,DstR}|Instructions], Env); + true -> + %% A guard BIF. + trans_fun([{bif,Name,Fail,SrcRs,DstR}|Instructions], Env) + end; +%%-------------------------------------------------------------------- +%% Instruction for constant pool added in February 2007 for R11B-4. +%%-------------------------------------------------------------------- +trans_fun([{put_literal,{literal,Literal},DstR}|Instructions], Env) -> + DstV = mk_var(DstR), + Move = hipe_icode:mk_move(DstV, hipe_icode:mk_const(Literal)), + [Move | trans_fun(Instructions, Env)]; +%%-------------------------------------------------------------------- +%% New test instruction added in July 2007 for R12. +%%-------------------------------------------------------------------- +%%--- is_bitstr --- +trans_fun([{test,is_bitstr,{f,Lbl},[Arg]}|Instructions], Env) -> + {Code,Env1} = trans_type_test(bitstr, Lbl, Arg, Env), + [Code | trans_fun(Instructions, Env1)]; +%%-------------------------------------------------------------------- +%% New stack triming instruction added in October 2007 for R12. +%%-------------------------------------------------------------------- +trans_fun([{trim,N,NY}|Instructions], Env) -> + %% trim away N registers leaving NY registers + Moves = trans_trim(N, NY), + Moves ++ trans_fun(Instructions, Env); +%%-------------------------------------------------------------------- +%%--- ERROR HANDLING --- +%%-------------------------------------------------------------------- +trans_fun([X|_], _) -> + ?EXIT({'trans_fun/2',X}); +trans_fun([], _) -> + []. + +%%-------------------------------------------------------------------- +%% trans_call and trans_enter generate correct Icode calls/tail-calls, +%% recognizing explicit fails. +%%-------------------------------------------------------------------- + +trans_call(MFA={M,F,_A}, Dst, Args, Type) -> + handle_fail(MFA, Args, fun () -> hipe_icode:mk_call(Dst,M,F,Args,Type) end). + +trans_enter(MFA={M,F,_A}, Args, Type) -> + handle_fail(MFA, Args, fun () -> hipe_icode:mk_enter(M,F,Args,Type) end). + +handle_fail(MFA, Args, F) -> + case MFA of + {erlang,exit,1} -> + hipe_icode:mk_fail(Args,exit); + {erlang,throw,1} -> + hipe_icode:mk_fail(Args,throw); + {erlang,fault,1} -> + hipe_icode:mk_fail(Args,error); + {erlang,fault,2} -> + hipe_icode:mk_fail(Args,error); + {erlang,error,1} -> + hipe_icode:mk_fail(Args,error); + {erlang,error,2} -> + hipe_icode:mk_fail(Args,error); + _ -> + F() + end. + +%%----------------------------------------------------------------------- +%% trans_bif0(BifName, DestReg) +%% trans_bif(Arity, BifName, FailLab, Args, DestReg, Environment) +%%----------------------------------------------------------------------- + +trans_bif0(BifName, DestReg) -> + ?no_debug_msg(" found BIF0: ~p() ...~n", [BifName]), + BifRes = mk_var(DestReg), + hipe_icode:mk_call([BifRes],erlang,BifName,[],remote). + +trans_bif(Arity, BifName, Lbl, Args, DestReg, Env) -> + ?no_debug_msg(" found BIF: ~p(~p) ...~n", [BifName,Args]), + BifRes = mk_var(DestReg), + {Movs, SrcVars, Env1} = get_constants_in_temps(Args,Env), + case Lbl of + 0 -> % Bif is not in a guard + I = hipe_icode:mk_call([BifRes],erlang,BifName,SrcVars,remote), + {Movs ++ [I], Env1}; + _ -> % Bif occurs in a guard - fail silently to Lbl + {GuardI,Env2} = + make_fallthrough_guard([BifRes],{erlang,BifName,Arity},SrcVars, + map_label(Lbl),Env1), + {[Movs,GuardI], Env2} + end. + +trans_op_call(Name, Lbl, Args, Dests, Env, Instructions) -> + {Code, Env1} = trans_one_op_call(Name, Lbl, Args, Dests, Env), + [Code|trans_fun(Instructions, Env1)]. + +trans_one_op_call(Name, Lbl, Args, Dests, Env) -> + case Lbl of + 0 -> % Op is not in a guard + I = hipe_icode:mk_primop(Dests, Name, Args), + {[I], Env}; + _ -> % op occurs in a guard - fail silently to Lbl + make_fallthrough_guard(Dests, Name, Args, map_label(Lbl), Env) + end. + +%%----------------------------------------------------------------------- +%% trans_bin_call +%%----------------------------------------------------------------------- + +trans_bin_call(Name, Lbl, Args, Dests, Base, Offset, Env, Instructions) -> + {Code, Env1} = + case Lbl of + 0 -> % Op is not in a guard + I = hipe_icode:mk_primop(Dests, Name, Args), + {[I], Env}; + _ -> % op occurs in a guard - fail silently to Lbl + make_fallthrough_guard(Dests, Name, Args, map_label(Lbl), Env) + end, + [Code|trans_bin(Instructions, Base, Offset, Env1)]. + +%% Translate instructions for building binaries separately to give +%% them an appropriate state + +trans_bin([{bs_put_float,{f,Lbl},Size,Unit,{field_flags,Flags0},Source}| + Instructions], Base, Offset, Env) -> + Flags = resolve_native_endianess(Flags0), + %% Get source + {Src,SourceInstrs,ConstInfo} = + case is_var(Source) of + true -> + {mk_var(Source),[], var}; + false -> + case Source of + {float, X} when is_float(X) -> + C = trans_const(Source), + SrcVar = mk_var(new), + I = hipe_icode:mk_move(SrcVar, C), + {SrcVar,[I],pass}; + _ -> + C = trans_const(Source), + SrcVar = mk_var(new), + I = hipe_icode:mk_move(SrcVar, C), + {SrcVar,[I],fail} + end + end, + %% Get type of put_float + {Name,Args,Env2} = + case Size of + {integer,NoBits} when is_integer(NoBits), NoBits >= 0 -> + %% Create a N*Unit bits float + {{bs_put_float, NoBits*Unit, Flags, ConstInfo}, [Src, Base, Offset], Env}; + {integer,NoBits} when is_integer(NoBits), NoBits < 0 -> + ?EXIT({bad_bs_size_constant,Size}); + BitReg -> % Use a number of bits only known at runtime. + Bits = mk_var(BitReg), + {{bs_put_float, Unit, Flags, ConstInfo}, [Src,Bits,Base,Offset], Env} + end, + %% Generate code for calling the bs-op. + SourceInstrs ++ + trans_bin_call({hipe_bs_primop,Name}, Lbl, Args, [Offset], Base, Offset, Env2, Instructions); +trans_bin([{bs_put_binary,{f,Lbl},Size,Unit,{field_flags,Flags},Source}| + Instructions], Base, Offset, Env) -> + %% Get the source of the binary. + Src = trans_arg(Source), + %% Get type of put_binary + {Name, Args, Env2} = + case Size of + {atom,all} -> %% put all bits + {{bs_put_binary_all, Flags}, [Src,Base,Offset], Env}; + {integer,NoBits} when is_integer(NoBits), NoBits >= 0 -> + %% Create a N*Unit bits subbinary + {{bs_put_binary, NoBits*Unit, Flags}, [Src,Base,Offset], Env}; + {integer,NoBits} when is_integer(NoBits), NoBits < 0 -> + ?EXIT({bad_bs_size_constant,Size}); + BitReg -> % Use a number of bits only known at runtime. + Bits = mk_var(BitReg), + {{bs_put_binary, Unit, Flags}, [Src, Bits,Base,Offset], Env} + end, + %% Generate code for calling the bs-op. + trans_bin_call({hipe_bs_primop, Name}, + Lbl, Args, [Offset], + Base, Offset, Env2, Instructions); +%%--- bs_put_string --- +trans_bin([{bs_put_string,SizeInBytes,{string,String}}|Instructions], Base, + Offset, Env) -> + [hipe_icode:mk_primop([Offset], + {hipe_bs_primop,{bs_put_string, String, SizeInBytes}}, + [Base, Offset]) | + trans_bin(Instructions, Base, Offset, Env)]; +trans_bin([{bs_put_integer,{f,Lbl},Size,Unit,{field_flags,Flags0},Source}| + Instructions], Base, Offset, Env) -> + Flags = resolve_native_endianess(Flags0), + %% Get size-type + + %% Get the source of the binary. + {Src, SrcInstrs, ConstInfo} = + case is_var(Source) of + true -> + {mk_var(Source), [], var}; + false -> + case Source of + {integer, X} when is_integer(X) -> + C = trans_const(Source), + SrcVar = mk_var(new), + I = hipe_icode:mk_move(SrcVar, C), + {SrcVar,[I], pass}; + _ -> + C = trans_const(Source), + SrcVar = mk_var(new), + I = hipe_icode:mk_move(SrcVar, C), + {SrcVar,[I], fail} + + end + end, + {Name, Args, Env2} = + case is_var(Size) of + true -> + SVar = mk_var(Size), + {{bs_put_integer,Unit,Flags,ConstInfo}, [SVar, Base, Offset], Env}; + false -> + case Size of + {integer, NoBits} when NoBits >= 0 -> + {{bs_put_integer,NoBits*Unit,Flags,ConstInfo}, [Base, Offset], Env}; + _ -> + ?EXIT({bad_bs_size_constant,Size}) + end + end, + SrcInstrs ++ trans_bin_call({hipe_bs_primop, Name}, + Lbl, [Src|Args], [Offset], Base, Offset, Env2, Instructions); +%%---------------------------------------------------------------- +%% New binary construction instructions added in R12B-5 (Fall 2008). +%%---------------------------------------------------------------- +trans_bin([{bs_put_utf8,{f,Lbl},_FF,A3}|Instructions], Base, Offset, Env) -> + Src = trans_arg(A3), + Args = [Src, Base, Offset], + trans_bin_call({hipe_bs_primop, bs_put_utf8}, Lbl, Args, [Offset], Base, Offset, Env, Instructions); +trans_bin([{bs_put_utf16,{f,Lbl},{field_flags,Flags0},A3}|Instructions], Base, Offset, Env) -> + Src = trans_arg(A3), + Args = [Src, Base, Offset], + Flags = resolve_native_endianess(Flags0), + Name = {bs_put_utf16, Flags}, + trans_bin_call({hipe_bs_primop, Name}, Lbl, Args, [Offset], Base, Offset, Env, Instructions); +trans_bin([{bs_put_utf32,F={f,Lbl},FF={field_flags,_Flags0},A3}|Instructions], Base, Offset, Env) -> + Src = trans_arg(A3), + trans_bin_call({hipe_bs_primop,bs_validate_unicode}, Lbl, [Src], [], Base, Offset, Env, + [{bs_put_integer,F,{integer,32},1,FF,A3} | Instructions]); +%%---------------------------------------------------------------- +%% Base cases for the end of a binary construction sequence. +%%---------------------------------------------------------------- +trans_bin([{bs_final2,Src,Dst}|Instructions], _Base, Offset, Env) -> + [hipe_icode:mk_primop([mk_var(Dst)], {hipe_bs_primop, bs_final}, + [trans_arg(Src),Offset]) + |trans_fun(Instructions, Env)]; +trans_bin(Instructions, _Base, _Offset, Env) -> + trans_fun(Instructions, Env). + +%% this translates bs_get_utf8 and bs_skip_utf8 (get with new unused dst) +trans_bs_get_or_skip_utf8(Lbl, Ms, X, Instructions, Env) -> + Dst = mk_var(X), + MsVar = mk_var(Ms), + trans_op_call({hipe_bs_primop,bs_get_utf8}, Lbl, [MsVar], [Dst,MsVar], Env, Instructions). + +%% this translates bs_get_utf16 and bs_skip_utf16 (get with new unused dst) +trans_bs_get_or_skip_utf16(Lbl, Ms, Flags0, X, Instructions, Env) -> + Dst = mk_var(X), + MsVar = mk_var(Ms), + Flags = resolve_native_endianess(Flags0), + Name = {bs_get_utf16,Flags}, + trans_op_call({hipe_bs_primop,Name}, Lbl, [MsVar], [Dst,MsVar], Env, Instructions). + +%% this translates bs_get_utf32 and bs_skip_utf32 (get with new unused dst) +trans_bs_get_or_skip_utf32(Lbl, Ms, Flags0, X, Instructions, Env) -> + Dst = mk_var(X), + MsVar = mk_var(Ms), + Flags = resolve_native_endianess(Flags0), + {I1,Env1} = trans_one_op_call({hipe_bs_primop,{bs_get_integer,32,Flags}}, + Lbl, [MsVar], [Dst,MsVar], Env), + I1 ++ trans_op_call({hipe_bs_primop,bs_validate_unicode_retract}, + Lbl, [Dst,MsVar], [MsVar], Env1, Instructions). + +%%----------------------------------------------------------------------- +%% trans_arith(Op, SrcVars, Des, Lab, Env) -> { Icode, NewEnv } +%% A failure label of type {f,0} means in a body. +%% A failure label of type {f,L} where L>0 means in a guard. +%% Within a guard a failure should branch to the next guard and +%% not trigger an exception!! +%% Handles body arithmetic with Icode primops! +%% Handles guard arithmetic with Icode guardops! +%%----------------------------------------------------------------------- + +trans_arith(Op, SrcRs, DstR, Lbl, Env) -> + {Movs,SrcVars,Env1} = get_constants_in_temps(SrcRs,Env), + DstVar = mk_var(DstR), + %%io:format("~w:trans_arith()\n ~w := ~w ~w\n", + %% [?MODULE,DstVar,SrcVars,Op]), + case Lbl of + 0 -> % Body arithmetic + Primop = hipe_icode:mk_primop([DstVar], arith_op_name(Op), SrcVars), + {Movs++[Primop], Env1}; + _ -> % Guard arithmetic + {Guard,Env2} = + make_fallthrough_guard([DstVar], arith_op_name(Op), SrcVars, + map_label(Lbl), Env1), + {[Movs,Guard], Env2} + end. + +%% Prevent arbitrary names from leaking into Icode from BEAM. +arith_op_name('+') -> '+'; +arith_op_name('-') -> '-'; +arith_op_name('*') -> '*'; +arith_op_name('/') -> '/'; +arith_op_name('div') -> 'div'; +arith_op_name('fp_add') -> 'fp_add'; +arith_op_name('fp_sub') -> 'fp_sub'; +arith_op_name('fp_mul') -> 'fp_mul'; +arith_op_name('fp_div') -> 'fp_div'; +arith_op_name('rem') -> 'rem'; +arith_op_name('bsl') -> 'bsl'; +arith_op_name('bsr') -> 'bsr'; +arith_op_name('band') -> 'band'; +arith_op_name('bor') -> 'bor'; +arith_op_name('bxor') -> 'bxor'; +arith_op_name('bnot') -> 'bnot'. + +%%----------------------------------------------------------------------- +%%----------------------------------------------------------------------- + +trans_test_guard(TestOp,F,Arg1,Arg2,Env) -> + {Movs,Vars,Env1} = get_constants_in_temps([Arg1,Arg2],Env), + True = mk_label(new), + I = hipe_icode:mk_if(TestOp,Vars,hipe_icode:label_name(True),map_label(F)), + {[Movs,I,True], Env1}. + +%%----------------------------------------------------------------------- +%%----------------------------------------------------------------------- + +make_fallthrough_guard(DstVar,GuardOp,Args,FailLName,Env) -> + ContL = mk_label(new), + ContLName = hipe_icode:label_name(ContL), + {Instrs, NewDsts} = clone_dsts(DstVar), + {Guard,Env1} = make_guard(NewDsts,GuardOp,Args,ContLName,FailLName,Env), + {[Guard,ContL]++Instrs,Env1}. + +%% Make sure DstVar gets initialised to a dummy value after a fail: +%make_guard(Dests,{hipe_bs_primop,Primop},Args,ContLName,FailLName,Env) -> +% {[hipe_icode:mk_guardop(Dests,{hipe_bs_primop,Primop},Args,ContLName,FailLName)], +% Env}; +make_guard(Dests=[_|_],GuardOp,Args,ContLName,FailLName,Env) -> + TmpFailL = mk_label(new), + TmpFailLName = hipe_icode:label_name(TmpFailL), + GuardOpIns = hipe_icode:mk_guardop(Dests,GuardOp,Args, + ContLName,TmpFailLName), + FailCode = [TmpFailL, + nillify_all(Dests), + hipe_icode:mk_goto(FailLName)], + {[GuardOpIns|FailCode], Env}; +%% A guard that does not return anything: +make_guard([],GuardOp,Args,ContLName,FailLName,Env) -> + {[hipe_icode:mk_guardop([],GuardOp,Args,ContLName,FailLName)], + Env}. + +nillify_all([Var|Vars]) -> + [hipe_icode:mk_move(Var,hipe_icode:mk_const([]))|nillify_all(Vars)]; +nillify_all([]) -> []. + +clone_dsts(Dests) -> + clone_dsts(Dests, [],[]). + +clone_dsts([Dest|Dests], Instrs, NewDests) -> + {I,ND} = clone_dst(Dest), + clone_dsts(Dests, [I|Instrs], [ND|NewDests]); +clone_dsts([], Instrs, NewDests) -> + {lists:reverse(Instrs), lists:reverse(NewDests)}. + +clone_dst(Dest) -> + New = + case hipe_icode:is_reg(Dest) of + true -> + mk_var(reg); + false -> + true = hipe_icode:is_var(Dest), + mk_var(new) + end, + {hipe_icode:mk_move(Dest, New), New}. + + +%%----------------------------------------------------------------------- +%% trans_type_test(Test, Lbl, Arg, Env) -> { Icode, NewEnv } +%% Handles all unary type tests like is_integer etc. +%%----------------------------------------------------------------------- + +trans_type_test(Test, Lbl, Arg, Env) -> + True = mk_label(new), + {Move,Var,Env1} = mk_move_and_var(Arg,Env), + I = hipe_icode:mk_type([Var], Test, + hipe_icode:label_name(True), map_label(Lbl)), + {[Move,I,True],Env1}. + +%% +%% This handles binary type tests. Currently, the only such is the new +%% is_function/2 BIF. +%% +trans_type_test2(function2, Lbl, Arg, Arity, Env) -> + True = mk_label(new), + {Move1,Var1,Env1} = mk_move_and_var(Arg, Env), + {Move2,Var2,Env2} = mk_move_and_var(Arity, Env1), + I = hipe_icode:mk_type([Var1,Var2], function2, + hipe_icode:label_name(True), map_label(Lbl)), + {[Move1,Move2,I,True],Env2}. + +%%----------------------------------------------------------------------- +%% trans_puts(Code, Environment) -> +%% { Movs, Code, Vars, NewEnv } +%%----------------------------------------------------------------------- + +trans_puts(Code, Env) -> + trans_puts(Code, [], [], Env). + +trans_puts([{put,X}|Code], Vars, Moves, Env) -> + case type(X) of + var -> + Var = mk_var(X), + trans_puts(Code, [Var|Vars], Moves, Env); + #beam_const{value=C} -> + Var = mk_var(new), + Move = hipe_icode:mk_move(Var, hipe_icode:mk_const(C)), + trans_puts(Code, [Var|Vars], [Move|Moves], Env) + end; +trans_puts(Code, Vars, Moves, Env) -> %% No more put operations + {Moves, Code, Vars, Env}. + +%%----------------------------------------------------------------------- +%% The code for this instruction is a bit large because we are treating +%% different cases differently. We want to use the icode `type' +%% instruction when it is applicable to take care of match expressions. +%%----------------------------------------------------------------------- + +trans_is_eq_exact(Lbl, Arg1, Arg2, Env) -> + case {is_var(Arg1),is_var(Arg2)} of + {true,true} -> + True = mk_label(new), + I = hipe_icode:mk_if('=:=', + [mk_var(Arg1),mk_var(Arg2)], + hipe_icode:label_name(True), map_label(Lbl)), + {[I,True], Env}; + {true,false} -> %% right argument is a constant -- use type()! + trans_is_eq_exact_var_const(Lbl, Arg1, Arg2, Env); + {false,true} -> %% mirror of the case above; swap args + trans_is_eq_exact_var_const(Lbl, Arg2, Arg1, Env); + {false,false} -> %% both arguments are constants !!! + case Arg1 =:= Arg2 of + true -> + {[], Env}; + false -> + Never = mk_label(new), + I = hipe_icode:mk_goto(map_label(Lbl)), + {[I,Never], Env} + end + end. + +trans_is_eq_exact_var_const(Lbl, Arg1, Arg2, Env) -> % var =:= const + True = mk_label(new), + NewArg1 = mk_var(Arg1), + TrueLabName = hipe_icode:label_name(True), + FalseLabName = map_label(Lbl), + I = case Arg2 of + {float,Float} -> + hipe_icode:mk_if('=:=', + [NewArg1, hipe_icode:mk_const(Float)], + TrueLabName, FalseLabName); + {literal,Literal} -> + hipe_icode:mk_if('=:=', + [NewArg1, hipe_icode:mk_const(Literal)], + TrueLabName, FalseLabName); + _ -> + hipe_icode:mk_type([NewArg1], Arg2, TrueLabName, FalseLabName) + end, + {[I,True], Env}. + +%%----------------------------------------------------------------------- +%% ... and this is analogous to the above +%%----------------------------------------------------------------------- + +trans_is_ne_exact(Lbl, Arg1, Arg2, Env) -> + case {is_var(Arg1),is_var(Arg2)} of + {true,true} -> + True = mk_label(new), + I = hipe_icode:mk_if('=/=', + [mk_var(Arg1),mk_var(Arg2)], + hipe_icode:label_name(True), map_label(Lbl)), + {[I,True], Env}; + {true,false} -> %% right argument is a constant -- use type()! + trans_is_ne_exact_var_const(Lbl, Arg1, Arg2, Env); + {false,true} -> %% mirror of the case above; swap args + trans_is_ne_exact_var_const(Lbl, Arg2, Arg1, Env); + {false,false} -> %% both arguments are constants !!! + case Arg1 =/= Arg2 of + true -> + {[], Env}; + false -> + Never = mk_label(new), + I = hipe_icode:mk_goto(map_label(Lbl)), + {[I,Never], Env} + end + end. + +trans_is_ne_exact_var_const(Lbl, Arg1, Arg2, Env) -> % var =/= const + True = mk_label(new), + NewArg1 = mk_var(Arg1), + TrueLabName = hipe_icode:label_name(True), + FalseLabName = map_label(Lbl), + I = case Arg2 of + {float,Float} -> + hipe_icode:mk_if('=/=', + [NewArg1, hipe_icode:mk_const(Float)], + TrueLabName, FalseLabName); + {literal,Literal} -> + hipe_icode:mk_if('=/=', + [NewArg1, hipe_icode:mk_const(Literal)], + TrueLabName, FalseLabName); + _ -> + hipe_icode:mk_type([NewArg1], Arg2, FalseLabName, TrueLabName) + end, + {[I,True], Env}. + +%%----------------------------------------------------------------------- +%% Try to do a relatively straightforward optimization: if equality with +%% an atom is used, then convert this test to use of exact equality test +%% with the same atom (which in turn will be translated to a `type' test +%% instruction by the code of trans_is_eq_exact_var_const/4 above). +%%----------------------------------------------------------------------- + +trans_is_eq(Lbl, Arg1, Arg2, Env) -> + case {is_var(Arg1),is_var(Arg2)} of + {true,true} -> %% not much can be done in this case + trans_test_guard('==', Lbl, Arg1, Arg2, Env); + {true,false} -> %% optimize this case, if possible + case Arg2 of + {atom,_SomeAtom} -> + trans_is_eq_exact_var_const(Lbl, Arg1, Arg2, Env); + _ -> + trans_test_guard('==', Lbl, Arg1, Arg2, Env) + end; + {false,true} -> %% probably happens rarely; hence the recursive call + trans_is_eq(Lbl, Arg2, Arg1, Env); + {false,false} -> %% both arguments are constants !!! + case Arg1 == Arg2 of + true -> + {[], Env}; + false -> + Never = mk_label(new), + I = hipe_icode:mk_goto(map_label(Lbl)), + {[I,Never], Env} + end + end. + +%%----------------------------------------------------------------------- +%% ... and this is analogous to the above +%%----------------------------------------------------------------------- + +trans_is_ne(Lbl, Arg1, Arg2, Env) -> + case {is_var(Arg1),is_var(Arg2)} of + {true,true} -> %% not much can be done in this case + trans_test_guard('/=', Lbl, Arg1, Arg2, Env); + {true,false} -> %% optimize this case, if possible + case Arg2 of + {atom,_SomeAtom} -> + trans_is_ne_exact_var_const(Lbl, Arg1, Arg2, Env); + _ -> + trans_test_guard('/=', Lbl, Arg1, Arg2, Env) + end; + {false,true} -> %% probably happens rarely; hence the recursive call + trans_is_ne(Lbl, Arg2, Arg1, Env); + {false,false} -> %% both arguments are constants !!! + case Arg1 /= Arg2 of + true -> + {[], Env}; + false -> + Never = mk_label(new), + I = hipe_icode:mk_goto(map_label(Lbl)), + {[I,Never], Env} + end + end. + + +%%----------------------------------------------------------------------- +%% Translates an allocate instruction into a sequence of initializations +%%----------------------------------------------------------------------- + +trans_allocate(N) -> + trans_allocate(N, []). + +trans_allocate(0, Acc) -> + Acc; +trans_allocate(N, Acc) -> + Move = hipe_icode:mk_move(mk_var({y,N-1}), + hipe_icode:mk_const('dummy_value')), + trans_allocate(N-1, [Move|Acc]). + +%%----------------------------------------------------------------------- +%% Translates a trim instruction into a sequence of moves +%%----------------------------------------------------------------------- + +trans_trim(N, NY) -> + lists:reverse(trans_trim(N, NY, 0, [])). + +trans_trim(_, 0, _, Acc) -> + Acc; +trans_trim(N, NY, Y, Acc) -> + Move = hipe_icode:mk_move(mk_var({y,Y}), mk_var({y,N})), + trans_trim(N+1, NY-1, Y+1, [Move|Acc]). + +%%----------------------------------------------------------------------- +%%----------------------------------------------------------------------- + +mk_move_and_var(Var, Env) -> + case type(Var) of + var -> + V = mk_var(Var), + {[], V, Env}; + #beam_const{value=C} -> + V = mk_var(new), + {[hipe_icode:mk_move(V,hipe_icode:mk_const(C))], V, Env} + end. + +%%----------------------------------------------------------------------- +%% Find names of closures and number of free vars. +%%----------------------------------------------------------------------- + +closure_info_mfa(#closure_info{mfa=MFA}) -> MFA. +closure_info_arity(#closure_info{arity=Arity}) -> Arity. +%% closure_info_fv_arity(#closure_info{fv_arity=Arity}) -> Arity. + +find_closure_info(Code) -> mod_find_closure_info(Code, []). + +mod_find_closure_info([FunCode|Fs], CI) -> + mod_find_closure_info(Fs, find_closure_info(FunCode, CI)); +mod_find_closure_info([], CI) -> + CI. + +find_closure_info([{patched_make_fun,MFA={_M,_F,A},_Magic,FreeVarNum,_Index}|BeamCode], + ClosureInfo) -> + NewClosure = %% A-FreeVarNum+1 (The real arity + 1 for the closure) + #closure_info{mfa=MFA, arity=A-FreeVarNum+1, fv_arity=FreeVarNum}, + find_closure_info(BeamCode, [NewClosure|ClosureInfo]); +find_closure_info([_Inst|BeamCode], ClosureInfo) -> + find_closure_info(BeamCode, ClosureInfo); +find_closure_info([], ClosureInfo) -> + ClosureInfo. + +%%----------------------------------------------------------------------- +%% Is closure +%%----------------------------------------------------------------------- + +get_closure_info(MFA, [CI|Rest]) -> + case closure_info_mfa(CI) of + MFA -> CI; + _ -> get_closure_info(MFA, Rest) + end; +get_closure_info(_, []) -> + not_a_closure. + +%%----------------------------------------------------------------------- +%% Patch closure entry. +%%----------------------------------------------------------------------- + +%% NOTE: this changes the number of parameters in the ICode function, +%% but does *not* change the arity in the function name. Thus, all +%% closure-functions have the exact same names in Beam and in native +%% code, although they have different calling conventions. + +patch_closure_entry(Icode, ClosureInfo)-> + Arity = closure_info_arity(ClosureInfo), + %% ?msg("Arity ~w\n",[Arity]), + {Args, Closure, FreeVars} = + split_params(Arity, hipe_icode:icode_params(Icode), []), + [Start|_] = hipe_icode:icode_code(Icode), + {_LMin, LMax} = hipe_icode:icode_label_range(Icode), + hipe_gensym:set_label(icode,LMax+1), + {_VMin, VMax} = hipe_icode:icode_var_range(Icode), + hipe_gensym:set_var(icode,VMax+1), + MoveCode = gen_get_free_vars(FreeVars, Closure, + hipe_icode:label_name(Start)), + Icode1 = hipe_icode:icode_code_update(Icode, MoveCode ++ + hipe_icode:icode_code(Icode)), + Icode2 = hipe_icode:icode_params_update(Icode1, Args), + %% Arity - 1 since the original arity did not have the closure argument. + Icode3 = hipe_icode:icode_closure_arity_update(Icode2, Arity-1), + Icode3. + +%%----------------------------------------------------------------------- + +gen_get_free_vars(Vars, Closure, StartName) -> + [hipe_icode:mk_new_label()] ++ + get_free_vars(Vars, Closure, 1, []) ++ [hipe_icode:mk_goto(StartName)]. + +get_free_vars([V|Vs], Closure, No, MoveCode) -> + %% TempV = hipe_icode:mk_new_var(), + get_free_vars(Vs, Closure, No+1, + [%% hipe_icode:mk_move(TempV,hipe_icode:mk_const(No)), + hipe_icode:mk_primop([V], #closure_element{n=No}, [Closure]) + |MoveCode]); +get_free_vars([],_,_,MoveCode) -> + MoveCode. + +%%----------------------------------------------------------------------- + +split_params(1, [Closure|_OrgArgs] = Params, Args) -> + {lists:reverse([Closure|Args]), Closure, Params}; +split_params(1, [], Args) -> + Closure = hipe_icode:mk_new_var(), + {lists:reverse([Closure|Args]), Closure, []}; +split_params(N, [ArgN|OrgArgs], Args) -> + split_params(N-1, OrgArgs, [ArgN|Args]). + +%%----------------------------------------------------------------------- + +preprocess_code(ModuleCode) -> + PatchedCode = patch_R7_funs(ModuleCode), + ClosureInfo = find_closure_info(PatchedCode), + {PatchedCode, ClosureInfo}. + +%%----------------------------------------------------------------------- +%% Patches the "make_fun" BEAM instructions of R7 so that they also +%% contain the index that the BEAM loader generates for funs. +%% +%% The index starts from 0 and is incremented by 1 for each make_fun +%% instruction encountered. +%% +%% Retained only for compatibility with BEAM code prior to R8. +%% +%% Temporarily, it also rewrites R8-PRE-RELEASE "make_fun2" +%% instructions, since their embedded indices don't work. +%%----------------------------------------------------------------------- + +patch_R7_funs(ModuleCode) -> + patch_make_funs(ModuleCode, 0). + +patch_make_funs([FunCode0|Fs], FunIndex0) -> + {PatchedFunCode,FunIndex} = patch_make_funs(FunCode0, FunIndex0, []), + [PatchedFunCode|patch_make_funs(Fs, FunIndex)]; +patch_make_funs([], _) -> []. + +patch_make_funs([{make_fun,MFA,Magic,FreeVarNum}|Is], FunIndex, Acc) -> + Patched = {patched_make_fun,MFA,Magic,FreeVarNum,FunIndex}, + patch_make_funs(Is, FunIndex+1, [Patched|Acc]); +patch_make_funs([{make_fun2,MFA,_BogusIndex,Magic,FreeVarNum}|Is], FunIndex, Acc) -> + Patched = {patched_make_fun,MFA,Magic,FreeVarNum,FunIndex}, + patch_make_funs(Is, FunIndex+1, [Patched|Acc]); +patch_make_funs([I|Is], FunIndex, Acc) -> + patch_make_funs(Is, FunIndex, [I|Acc]); +patch_make_funs([], FunIndex, Acc) -> + {lists:reverse(Acc),FunIndex}. + +%%----------------------------------------------------------------------- + +find_mfa([{label,_}|Code]) -> + find_mfa(Code); +find_mfa([{func_info,{atom,M},{atom,F},A}|_]) + when is_atom(M), is_atom(F), is_integer(A), 0 =< A, A =< 255 -> + {M, F, A}. + +%%----------------------------------------------------------------------- + +%% Localize a particular function in a module +get_fun([[L, {func_info,{atom,M},{atom,F},A} | Is] | _], M,F,A) -> + [L, {func_info,{atom,M},{atom,F},A} | Is]; +get_fun([[_L1,_L2, {func_info,{atom,M},{atom,F},A} = MFA| _Is] | _], M,F,A) -> + ?WARNING_MSG("Consecutive labels found; please re-create the .beam file~n", []), + [_L1,_L2, MFA | _Is]; +get_fun([_|Rest], M,F,A) -> + get_fun(Rest, M,F,A). + +%%----------------------------------------------------------------------- +%% Takes a list of arguments and returns the constants of them into +%% fresh temporaries. Return a triple consisting of a list of move +%% instructions, a list of proper icode arguments and the new environment. +%%----------------------------------------------------------------------- + +get_constants_in_temps(Args, Env) -> + get_constants_in_temps(Args, [], [], Env). + +get_constants_in_temps([Arg|Args], Instrs, Temps, Env) -> + case get_constant_in_temp(Arg, Env) of + {none,ArgVar,Env1} -> + get_constants_in_temps(Args, Instrs, [ArgVar|Temps], Env1); + {Instr,Temp,Env1} -> + get_constants_in_temps(Args, [Instr|Instrs], [Temp|Temps], Env1) + end; +get_constants_in_temps([], Instrs, Temps, Env) -> + {lists:reverse(Instrs), lists:reverse(Temps), Env}. + +%% If Arg is a constant then put Arg in a fresh temp! +get_constant_in_temp(Arg, Env) -> + case is_var(Arg) of + true -> % Convert into Icode variable format before return + {none, mk_var(Arg), Env}; + false -> % Create a new temp and move the constant into it + Temp = mk_var(new), + Const = trans_const(Arg), + {hipe_icode:mk_move(Temp, Const), Temp, Env} + end. + +%%----------------------------------------------------------------------- +%% Makes a list of function arguments. +%%----------------------------------------------------------------------- + +extract_fun_args(A) -> + lists:reverse(extract_fun_args1(A)). + +extract_fun_args1(0) -> + []; +extract_fun_args1(1) -> + [mk_var({r,0})]; +extract_fun_args1(N) -> + [mk_var({x,N-1}) | extract_fun_args1(N-1)]. + +%%----------------------------------------------------------------------- +%% Auxiliary translation for arguments of select_val & select_tuple_arity +%%----------------------------------------------------------------------- + +trans_select_stuff(Reg, CaseList) -> + SwVar = case is_var(Reg) of + true -> + mk_var(Reg); + false -> + trans_const(Reg) + end, + CasePairs = trans_case_list(CaseList), + {SwVar,CasePairs}. + +trans_case_list([Symbol,{f,Lbl}|L]) -> + [{trans_const(Symbol),map_label(Lbl)} | trans_case_list(L)]; +trans_case_list([]) -> + []. + +%%----------------------------------------------------------------------- +%% Makes an Icode argument from a BEAM argument. +%%----------------------------------------------------------------------- + +trans_arg(Arg) -> + case is_var(Arg) of + true -> + mk_var(Arg); + false -> + trans_const(Arg) + end. + +%%----------------------------------------------------------------------- +%% Makes an Icode constant from a BEAM constant. +%%----------------------------------------------------------------------- + +trans_const(Const) -> + case Const of + {atom,Atom} when is_atom(Atom) -> + hipe_icode:mk_const(Atom); + {integer,N} when is_integer(N) -> + hipe_icode:mk_const(N); + {float,Float} when is_float(Float) -> + hipe_icode:mk_const(Float); + {string,String} -> + hipe_icode:mk_const(String); + {literal,Literal} -> + hipe_icode:mk_const(Literal); + nil -> + hipe_icode:mk_const([]); + Int when is_integer(Int) -> + hipe_icode:mk_const(Int) + end. + +%%----------------------------------------------------------------------- +%% Make an icode variable of proper type +%% (Variables mod 5) =:= 0 are X regs +%% (Variables mod 5) =:= 1 are Y regs +%% (Variables mod 5) =:= 2 are FR regs +%% (Variables mod 5) =:= 3 are new temporaries +%% (Variables mod 5) =:= 4 are new register temporaries +%% Tell hipe_gensym to update its state for each new thing created!! +%%----------------------------------------------------------------------- + +mk_var({r,0}) -> + hipe_icode:mk_var(0); +mk_var({x,R}) when is_integer(R) -> + V = 5*R, + hipe_gensym:update_vrange(icode,V), + hipe_icode:mk_var(V); +mk_var({y,R}) when is_integer(R) -> + V = (5*R)+1, + hipe_gensym:update_vrange(icode,V), + hipe_icode:mk_var(V); +mk_var({fr,R}) when is_integer(R) -> + V = (5*R)+2, + hipe_gensym:update_vrange(icode,V), + case get(hipe_inline_fp) of + true -> + hipe_icode:mk_fvar(V); + _ -> + hipe_icode:mk_var(V) + end; +mk_var(new) -> + T = hipe_gensym:new_var(icode), + V = (5*T)+3, + hipe_gensym:update_vrange(icode,V), + hipe_icode:mk_var(V); +mk_var(reg) -> + T = hipe_gensym:new_var(icode), + V = (5*T)+4, + hipe_gensym:update_vrange(icode,V), + hipe_icode:mk_reg(V). + +%%----------------------------------------------------------------------- +%% Make an icode label of proper type +%% (Labels mod 2) =:= 0 are actually occuring in the BEAM code +%% (Labels mod 2) =:= 1 are new labels generated by the translation +%%----------------------------------------------------------------------- + +mk_label(L) when is_integer(L) -> + LL = 2 * L, + hipe_gensym:update_lblrange(icode, LL), + hipe_icode:mk_label(LL); +mk_label(new) -> + L = hipe_gensym:new_label(icode), + LL = (2 * L) + 1, + hipe_gensym:update_lblrange(icode, LL), + hipe_icode:mk_label(LL). + +%% Maps from the BEAM's labelling scheme to our labelling scheme. +%% See mk_label to understand how it works. + +map_label(L) -> + L bsl 1. % faster and more type-friendly version of 2 * L + +%%----------------------------------------------------------------------- +%% Returns the type of the given variables. +%%----------------------------------------------------------------------- + +type({x,_}) -> + var; +type({y,_}) -> + var; +type({fr,_}) -> + var; +type({atom,A}) when is_atom(A) -> + #beam_const{value=A}; +type(nil) -> + #beam_const{value=[]}; +type({integer,X}) when is_integer(X) -> + #beam_const{value=X}; +type({float,X}) when is_float(X) -> + #beam_const{value=X}; +type({literal,X}) -> + #beam_const{value=X}. + +%%----------------------------------------------------------------------- +%% Returns true iff the argument is a variable. +%%----------------------------------------------------------------------- + +is_var({x,_}) -> + true; +is_var({y,_}) -> + true; +is_var({fr,_}) -> + true; +is_var({atom,A}) when is_atom(A) -> + false; +is_var(nil) -> + false; +is_var({integer,N}) when is_integer(N) -> + false; +is_var({float,F}) when is_float(F) -> + false; +is_var({literal,_Literal}) -> + false. + +%%----------------------------------------------------------------------- +%% Fixes the code for catches by adding some code. +%%----------------------------------------------------------------------- + +fix_catches(Code) -> + fix_catches(Code, gb_trees:empty()). + +%% We need to handle merged catch blocks, that is multiple 'catch' with +%% only one 'catch_end', or multiple 'try' with one 'try_case'. (Catch +%% and try can never be merged.) All occurrences of 'catch' or 'try' +%% with a particular fail-to label are assumed to only occur before the +%% corresponding 'catch_end'/'try_end' in the Beam code. + +fix_catches([{'catch',N,Lbl},ContLbl|Code], HandledCatchLbls) -> + fix_catch('catch',Lbl,ContLbl,Code,HandledCatchLbls,{catch_end,N}); +fix_catches([{'try',N,Lbl},ContLbl|Code], HandledCatchLbls) -> + fix_catch('try',Lbl,ContLbl,Code,HandledCatchLbls,{try_case,N}); +fix_catches([Instr|Code], HandledCatchLbls) -> + [Instr|fix_catches(Code, HandledCatchLbls)]; +fix_catches([], _HandledCatchLbls) -> + []. + +fix_catch(Type, Lbl, ContLbl, Code, HandledCatchLbls, Instr) -> + TLbl = {Type, Lbl}, + case gb_trees:lookup(TLbl, HandledCatchLbls) of + {value, Catch} when is_integer(Catch) -> + NewCode = fix_catches(Code, HandledCatchLbls), + Cont = hipe_icode:label_name(ContLbl), + [hipe_icode:mk_begin_try(Catch,Cont),ContLbl | NewCode]; + none -> + OldCatch = map_label(Lbl), + OldCatchLbl = hipe_icode:mk_label(OldCatch), + {CodeToCatch,RestOfCode} = split_code(Code,OldCatchLbl,Instr), + NewCatchLbl = mk_label(new), + NewCatch = hipe_icode:label_name(NewCatchLbl), + %% The rest of the code cannot contain catches with the same label. + RestOfCode1 = fix_catches(RestOfCode, HandledCatchLbls), + %% The catched code *can* contain more catches with the same label. + NewHandledCatchLbls = gb_trees:insert(TLbl, NewCatch, HandledCatchLbls), + CatchedCode = fix_catches(CodeToCatch, NewHandledCatchLbls), + %% The variables which will get the tag, value, and trace. + Vars = [mk_var({r,0}), mk_var({x,1}), mk_var({x,2})], + Cont = hipe_icode:label_name(ContLbl), + [hipe_icode:mk_begin_try(NewCatch,Cont), ContLbl] + ++ CatchedCode + ++ [mk_label(new), % dummy label before the goto + hipe_icode:mk_goto(OldCatch), % normal execution path + NewCatchLbl, % exception handing enters here + hipe_icode:mk_begin_handler(Vars)] + ++ catch_handler(Type, Vars, OldCatchLbl) + ++ RestOfCode1 % back to normal execution + end. + +catch_handler('try', _Vars, OldCatchLbl) -> + %% A try just falls through to the old fail-to label which marked the + %% start of the try_case block. All variables are set up as expected. + [OldCatchLbl]; +catch_handler('catch', [TagVar,ValueVar,TraceVar], OldCatchLbl) -> + %% This basically implements a catch as a try-expression. We must jump + %% to the given end label afterwards so we don't pass through both the + %% begin_handler and the end_try. + ContLbl = mk_label(new), + Cont = hipe_icode:label_name(ContLbl), + ThrowLbl = mk_label(new), + NoThrowLbl = mk_label(new), + ExitLbl = mk_label(new), + ErrorLbl = mk_label(new), + Dst = mk_var({r,0}), + [hipe_icode:mk_if('=:=', [TagVar, hipe_icode:mk_const('throw')], + hipe_icode:label_name(ThrowLbl), + hipe_icode:label_name(NoThrowLbl)), + ThrowLbl, + hipe_icode:mk_move(Dst, ValueVar), + hipe_icode:mk_goto(Cont), + NoThrowLbl, + hipe_icode:mk_if('=:=', [TagVar, hipe_icode:mk_const('exit')], + hipe_icode:label_name(ExitLbl), + hipe_icode:label_name(ErrorLbl)), + ExitLbl, + hipe_icode:mk_primop([Dst],mktuple,[hipe_icode:mk_const('EXIT'), + ValueVar]), + hipe_icode:mk_goto(Cont), + ErrorLbl, + %% We use the trace variable to hold the symbolic trace. Its previous + %% value is just that in p->ftrace, so get_stacktrace() works fine. + hipe_icode:mk_call([TraceVar],erlang,get_stacktrace,[],remote), + hipe_icode:mk_primop([ValueVar],mktuple, [ValueVar, TraceVar]), + hipe_icode:mk_goto(hipe_icode:label_name(ExitLbl)), + OldCatchLbl, % normal execution paths must go through end_try + hipe_icode:mk_end_try(), + hipe_icode:mk_goto(Cont), + ContLbl]. + +%% Note that it is the fail-to label that is the important thing, but +%% for 'catch' we want to make sure that the label is followed by the +%% 'catch_end' instruction - if it is not, we might have a real problem. +%% Checking that a 'try' label is followed by 'try_case' is not as +%% important, but we get that as a bonus. + +split_code([First|Code], Label, Instr) -> + split_code(Code, Label, Instr, First, []). + +split_code([Instr|Code], Label, Instr, Prev, As) when Prev =:= Label -> + split_code_final(Code, As); % drop both label and instruction +split_code([Other|_Code], Label, Instr, Prev, _As) when Prev =:= Label -> + ?EXIT({missing_instr_after_label, Label, Instr, [Other, Prev | _As]}); +split_code([Other|Code], Label, Instr, Prev, As) -> + split_code(Code, Label, Instr, Other, [Prev|As]); +split_code([], _Label, _Instr, Prev, As) -> + split_code_final([], [Prev|As]). + +split_code_final(Code, As) -> + {lists:reverse(As), Code}. + +%%----------------------------------------------------------------------- +%% Fixes fallthroughs +%%----------------------------------------------------------------------- + +fix_fallthroughs([]) -> + []; +fix_fallthroughs([I|Is]) -> + fix_fallthroughs(Is, I, []). + +fix_fallthroughs([I1|Is], I0, Acc) -> + case hipe_icode:is_label(I1) of + false -> + fix_fallthroughs(Is, I1, [I0 | Acc]); + true -> + case hipe_icode:is_branch(I0) of + true -> + fix_fallthroughs(Is, I1, [I0 | Acc]); + false -> + %% non-branch before label - insert a goto + Goto = hipe_icode:mk_goto(hipe_icode:label_name(I1)), + fix_fallthroughs(Is, I1, [Goto, I0 | Acc]) + end + end; +fix_fallthroughs([], I, Acc) -> + lists:reverse([I | Acc]). + +%%----------------------------------------------------------------------- +%% Removes the code between a fail instruction and the closest following +%% label. +%%----------------------------------------------------------------------- + +-spec remove_dead_code(icode_instrs()) -> icode_instrs(). +remove_dead_code([I|Is]) -> + case I of + #icode_fail{} -> + [I|remove_dead_code(skip_to_label(Is))]; + _ -> + [I|remove_dead_code(Is)] + end; +remove_dead_code([]) -> + []. + +%% returns the instructions from the closest label +-spec skip_to_label(icode_instrs()) -> icode_instrs(). +skip_to_label([I|Is] = Instrs) -> + case I of + #icode_label{} -> Instrs; + _ -> skip_to_label(Is) + end; +skip_to_label([]) -> + []. + +%%----------------------------------------------------------------------- +%% This needs to be extended in case new architectures are added. +%%----------------------------------------------------------------------- + +resolve_native_endianess(Flags) -> + case {Flags band 16#10, hipe_rtl_arch:endianess()} of + {16#10, big} -> + Flags band 5; + {16#10, little} -> + (Flags bor 2) band 7; + _ -> + Flags band 7 + end. + +%%----------------------------------------------------------------------- +%% Potentially useful for debugging. +%%----------------------------------------------------------------------- + +pp_beam(BeamCode, Options) -> + case proplists:get_value(pp_beam, Options) of + true -> + pp(BeamCode); + {file,FileName} -> + {ok,File} = file:open(FileName, [write]), + pp(File, BeamCode); + _ -> %% includes "false" case + ok + end. + +pp(Code) -> + pp(standard_io, Code). + +pp(Stream, []) -> + case Stream of %% I am not sure whether this is necessary + standard_io -> ok; + _ -> ok = file:close(Stream) + end; +pp(Stream, [FunCode|FunCodes]) -> + pp_mfa(Stream, FunCode), + put_nl(Stream), + pp(Stream, FunCodes). + +pp_mfa(Stream, FunCode) -> + lists:foreach(fun(Instr) -> print_instr(Stream, Instr) end, FunCode). + +print_instr(Stream, {label,Lbl}) -> + io:format(Stream, " label ~p:\n", [Lbl]); +print_instr(Stream, Op) -> + io:format(Stream, " ~p\n", [Op]). + +put_nl(Stream) -> + io:format(Stream, "\n", []). + +%%----------------------------------------------------------------------- +%% Handling of environments -- used to process local tail calls. +%%----------------------------------------------------------------------- + +%% Construct an environment +env__mk_env(M, F, A, Entry) -> + #environment{mfa={M,F,A}, entry=Entry}. + +%% Get current MFA +env__get_mfa(#environment{mfa=MFA}) -> MFA. + +%% Get entry point of the current function +env__get_entry(#environment{entry=EP}) -> EP. + +%%----------------------------------------------------------------------- diff --git a/lib/hipe/icode/hipe_icode.erl b/lib/hipe/icode/hipe_icode.erl new file mode 100644 index 0000000000..a4614d7237 --- /dev/null +++ b/lib/hipe/icode/hipe_icode.erl @@ -0,0 +1,1820 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2001-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% HiPE Intermediate Code +%% ==================================================================== +%% Filename : hipe_icode.erl +%% Module : hipe_icode +%% Purpose : Provide primops for the Icode data structure. +%% History : 1997-? Erik Johansson (happi@it.uu.se): Created. +%% 2001-01-30 EJ (happi@it.uu.se): +%% Apply, primop, guardop removed +%% 2003-03-15 ES (happi@acm.org): +%% Started commenting in Edoc. +%% Moved pretty printer to separate file. +%% +%% $Id$ +%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%@doc +%% This module implements "Linear Icode" and Icode instructions. +%% +%%

Icode is a simple (in that it has few instructions) imperative +%% language, used as the first Intermediate Code in the HiPE compiler. +%% Icode is closely related to Erlang, and Icode instructions operate +%% on Erlang terms.

+%% +%%

Icode

+%% +%%

Linear Icode for a function consists of: +%%

    +%%
  • the function's name (`{M,F,A}'),
  • +%%
  • a list of parameters,
  • +%%
  • a list of instructions,
  • +%%
  • data,
  • +%%
  • information about whether the function is a leaf function,
  • +%%
  • information about whether the function is a closure, and
  • +%%
  • the range for labels and variables in the code.
  • +%%
+%%

+%% +%%

Icode Instructions (and +%% their components)

+%% +%% Control flow: +%%
+%%
'if' +%% {Cond::cond(), +%% Args::[arg()], +%% TrueLabel::label_name(), +%% FalseLabel::label_name() +%% } :: +%% icode_instr()
+%%
+%% The if instruction compares the arguments (Args) with +%% condition (Cond) and jumps to either TrueLabel or +%% FalseLabel. (At the moment...) There are only binary +%% conditions so the number of arguments should be two. +%%

+%% An if instructions ends a basic block and should be followed +%% by a label (or be the last instruction of the code). +%%

+%% +%%
switch_val +%% {Term::var(), +%% FailLabel::label_name(), +%% Length::integer(), +%% Cases::[{symbol(),label_name()}] +%% }:: +%% icode_instr()
+%%
+%% The switch_val instruction compares the argument Term to the +%% symbols in the lists Cases, control is transfered to the label +%% that corresponds to the first symbol that matches. If no +%% symbol matches control is transfered to FailLabel. (NOTE: The +%% length argument is not currently in use.) +%%

+%% The switch_val instruction can be assumed to be implemented as +%% efficiently as possible given the symbols in the case +%% list. (Jump-table, bianry-serach, or nested ifs) +%%

+%% A switch_val instructions ends a basic block and should be +%% followed by a label (or be the last instruction of the code). +%%

+%% +%%
switch_tuple_arity +%% {Term::var(), +%% FailLabel::label_name(), +%% Length::integer(), +%% Cases::[{integer(),label_name()}] +%% }:: +%% icode_instr()
+%%
+%% The switch_tuple_arity instruction compares the size of the +%% tuple in the argument Term to the integers in the lists Cases, +%% control is transfered to the label that corresponds to the +%% first integer that matches. If no integer matches control is +%% transfered to FailLabel. (NOTE: The length argument is not +%% currently in use.) +%%

+%% The switch_tuple_arity instruction can be assumed to be +%% implemented as efficently as possible given the symbols in the +%% case list. (Jump-table, bianry-serach, or nested ifs) +%%

+%% A switch_tuple_arity instructions ends a basic block and +%% should be followed by a label (or be the last instruction of +%% the code). +%%

+%% +%%
`type {typ_expr, arg, true_label, false_label}}'
+%%
`goto {label}'
+%%
`label {name}'
+%%
+%% +%% Moves: +%%
+%%
`move {dst, src}'
+%%
`phi {dst, arglist}'
+%%
+%% +%% Function application: +%%
+%%
`call {[dst], fun, [arg], type, continuation, fail, +%% in_guard}'
+%%
+%% Where `type' is one of {`local', `remote', `primop'} +%% and `in_guard' is either `true' or `false'.
+%%
`enter {fun, [arg], type}'
+%%
+%% Where `type' is one of {`local', `remote', `primop'} +%% and `in_guard' is either `true' or `false'.
+%%
`return {[var]}'
+%%
+%% WARNING: Multiple return values are yet not +%% fully implemented and tested. +%%
+%%
+%% +%% Error handling: +%%
+%%
`begin_try {label, successor}'
+%%
`end_try'
+%%
`begin_handler {dstlist}'
+%%
`fail {Args, Class}'
+%%
Where `Class' is one of +%% {`exit', `throw', `error', `rethrow'}. For `error/2', `[args]' +%% is `[Reason,Trace]'. For `rethrow', `Args' is +%% `[Exception,Reason]' - this only occurs in autogenerated code. +%%
+%%
+%% +%% Comments: +%%
+%%
`comment{Text::string()}'
+%%
+%% +%%

Notes

+%% +%%

A constant can only show up on the RHS of a `move' instruction +%% and in `if' and `switch_*'

+%%

+%% Classification of primops should be like this: +%%

    +%%
  • `erlang:exit/1, erlang:throw/1, erlang:error/1, +%% erlang:error/2, erlang:fault/1', +%% and `erlang:fault/2' should use the +%% {@link fail(). fail-instruction} in Icode.
  • +%%
  • Calls or tail-recursive calls to BIFs, operators, or internal +%% functions should be implemented with `call' or `enter' +%% respectively, with the primop flag set.
  • +%%
  • All other Erlang functions should be implemented with `call' +%% or `enter' respectively, without the primop flag set.
  • +%%
+%%

+%% +%%

Primops

+%% +%%
+%%  Constructors:
+%%    cons                       - [Car, Cdr]
+%%    mktuple                    - [Element1, Element2, ..., ElementN]
+%%    call_fun                   - [BoundArg1, ..., BoundArgN, Fun]
+%%    enter_fun                  - [BoundArg1, ..., BoundArgN, Fun]
+%%    #mkfun{}                   - [FreeVar1, FreeVar2, ..., FreeVarN]
+%%
+%%  Binaries:
+%%    bs_init
+%%    {bs_put_string, Bytes, Size}
+%%    bs_final
+%%
+%%  Selectors:
+%%    element                    - [Index, Tuple]
+%%    unsafe_hd                  - [List]
+%%    unsafe_tl                  - [List]
+%%    #unsafe_element{}          - [Tuple]
+%%    #unsafe_update_element{}   - [Tuple, Val]
+%%    #closure_element{}         - [Fun]
+%%
+%%  Arithmetic:       [Arg1, Arg2]
+%%    '+', '-', '*', '/', 'div', 'rem',
+%%    'band', 'bor', 'bxor', 'bnot', 'bsl', 'bsr'
+%%
+%%  Receive:         
+%%    check_get_msg - []
+%%    next_msg      - []
+%%    select_msg    - []
+%%    set_timeout   - [Timeout]
+%%    clear_timeout - []
+%%    suspend_msg   - []
+%%
+%% 
+%% +%%

Guardops: (primops that can be used in guards and can fail)

+%%
+%%  Selectors:
+%%    unsafe_hd         - [List]
+%%    unsafe_tl         - [List]
+%%    #element{}        - [Index, Tuple]
+%%    #unsafe_element{} - [Tuple]
+%%
+%%  Arithmetic:       [Arg1, Arg2]
+%%    '+', '-', '*', '/', 'div', 'rem',
+%%   'band', 'bor', 'bxor', 'bnot', 'bsl', 'bsr',
+%%    fix_add, fix_sub               %% Do these exist?
+%%
+%%  Concurrency:
+%%    {erlang,self,0}          - []
+%% 
+%% +%% +%%

Relational Operations (Cond in if instruction)

+%%
+%%    gt, lt, geq, leq,
+%%    eqeq, neq, exact_eqeq, exact_neq
+%% 
+%% +%%

Type tests

+%%
+%%    list
+%%    nil
+%%    cons
+%%    tuple
+%%    {tuple, N}
+%%    atom
+%%    {atom, Atom}
+%%    constant
+%%    number
+%%    integer
+%%    {integer, N}
+%%    fixnum
+%%    bignum
+%%    float
+%%    pid
+%%    port
+%%    {record, Atom, Size}
+%%    reference
+%%    binary
+%%    function
+%% 
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + +%%===================================================================== + +-module(hipe_icode). + +-include("../main/hipe.hrl"). +-include("hipe_icode.hrl"). + +%% @type icode(Fun, Params, IsClosure, IsLeaf, Code, Data, VarRange,LabelRange) +%% Fun = mfa() +%% Params = [var()] +%% IsClosure = boolean() +%% IsLeaf = boolean() +%% Code = [icode_instr()] +%% Data = data() +%% VarRange = {integer(),integer()} +%% LabelRange = {integer(),integer()} +%% +%% @type icode_instr(I) +%% I = if() | switch_val() | switch_tuple_arity() | type() | goto() +%% | label() | move() | phi() | call() | enter() | return() +%% | begin_try() | end_try() | begin_handler() | fail() | comment() +%% +%% @type if(Cond, Args, TrueLabel, FalseLabel) +%% Cond = cond() +%% Args = [arg()] +%% TrueLabel = label_name() +%% FalseLabel = label_name() +%% +%% @type switch_val(Term, FailLabel, Length, Cases) +%% Term = var() +%% FailLabel = label_name() +%% Length = integer() +%% Cases = [{symbol(),label_name()}] +%% +%% @type switch_tuple_arity(Arg, FailLabel, Length, Cases) +%% Term = var() +%% FailLabel = label_name() +%% Length = integer() +%% Cases = [{symbol(),label_name()}] +%% +%% @type type(TypeTest, Arg, True_label, False_label) +%% TypeTest = type_test() +%% Args = [arg()] +%% TrueLabel = label_name() +%% FalseLabel = label_name() +%% +%% @type goto(Label) Label = label_name() +%% +%% @type label(Name) Name = label_name() +%% +%% @type move(Dst, Src) Dst = var() Src = arg() +%% +%% @type phi(Dst, Id, Arglist) +%% Dst = var() | fvar() +%% Id = var() | fvar() +%% Arglist = [{Pred, Src}] +%% Pred = label_name() +%% Src = var() | fvar() +%% +%% @type call(Dst, Fun, Arg, Type, Continuation, FailLabel, InGuard) +%% Dst = [var()] +%% Fun = mfa() | primop() | closure() +%% Arg = [var()] +%% Type = call_type() +%% Continuation = [] | label_name() +%% FailLabel = [] | label_name() +%% InGuard = boolean() +%% +%% @type enter(Fun, Arg, Type) +%% Fun = mfa() | primop() | closure() +%% Arg = [var()] +%% Type = call_type() +%% +%% @type return (Vars) Vars = [var()] +%% +%% @type begin_try(FailLabel, Successor) +%% FailLabel = label_name() +%% Successor = label_name() +%% +%% @type end_try() +%% +%% @type begin_handler(Dst) +%% Dst = [var()] +%% +%% @type fail(Class, Args, Label) +%% Class = exit_class() +%% Args = [var()] +%% Label = label_name() +%% +%% @type comment(Text) Text = string() + +%% @type call_type() = 'local' | 'remote' | 'primop' +%% @type exit_class() = 'exit' | 'throw' | 'error' | 'rethrow' +%% @type cond() = gt | lt | geq | leq | eqeq | neq | exact_eqeq | exact_neq +%% @type type_test() = +%% list +%% | nil +%% | cons +%% | tuple +%% | {tuple, integer()} +%% | atom +%% | {atom, atom()} +%% | constant +%% | number +%% | integer +%% | {integer, integer()} +%% | fixnum +%% | bignum +%% | float +%% | pid +%% | port +%% | {record, atom(), integer()} +%% | reference +%% | binary +%% | function +%% +%% @type mfa(Mod,Fun,Arity) = {atom(),atom(),arity()} + +%% @type arg() = var() | const() +%% @type farg() = fvar() | float() +%% @type var(Name) Name = integer() +%% @type fvar(Name) Name = integer() +%% @type label_name(Name) Name = integer() +%% @type symbol(S) = atom() | number() +%% @type const(C) C = immediate() +%% @type immediate(I) = I +%% I = term() +%% @end + + +%% ____________________________________________________________________ +%% +%% Exports +%% +-export([mk_icode/7, %% mk_icode(Fun, Params, IsClosure, IsLeaf, + %% Code, VarRange, LabelRange) + mk_icode/8, %% mk_icode(Fun, Params, IsClosure, IsLeaf, + %% Code, Data, VarRange, LabelRange) + icode_fun/1, + icode_params/1, + icode_params_update/2, + icode_is_closure/1, + icode_closure_arity/1, + icode_closure_arity_update/2, + icode_is_leaf/1, + icode_code/1, + icode_code_update/2, + icode_data/1, + %% icode_data_update/2, + icode_var_range/1, + icode_label_range/1, + icode_info/1, + icode_info_update/2]). + +-export([mk_if/4, %% mk_if(Op, Args, TrueLbl, FalseLbl) + %% mk_if/5, %% mk_if(Op, Args, TrueLbl, FalseLbl, Prob) + if_op/1, + if_op_update/2, + if_true_label/1, + if_false_label/1, + if_args/1, + if_pred/1, + %% is_if/1, + + mk_switch_val/4, + %% mk_switch_val/5, + switch_val_term/1, + switch_val_fail_label/1, + %% switch_val_length/1, + switch_val_cases/1, + switch_val_cases_update/2, + %% is_switch_val/1, + + mk_switch_tuple_arity/4, + %% mk_switch_tuple_arityl/5, + switch_tuple_arity_term/1, + switch_tuple_arity_fail_label/1, + switch_tuple_arity_fail_label_update/2, + %% switch_tuple_arity_length/1, + switch_tuple_arity_cases/1, + switch_tuple_arity_cases_update/2, + %% is_switch_tuple_arity/1, + + mk_type/4, %% mk_type(Args, Type, TrueLbl, FalseLbl) + mk_type/5, %% mk_type(Args, Type, TrueLbl, FalseLbl, P) + type_args/1, + %% type_args_update/2, + type_test/1, + type_true_label/1, + type_false_label/1, + type_pred/1, + is_type/1, + + mk_guardop/5, %% mk_guardop(Dst, Fun, Args, Continuation, Fail) + mk_primop/3, %% mk_primop(Dst, Fun, Args) + mk_primop/5, %% mk_primop(Dst, Fun, Args, Cont, Fail) + mk_call/5, %% mk_call(Dst, Mod, Fun, Args, Type) + %% mk_call/7, %% mk_call(Dst, Mod, Fun, Args, Type, + %% Continuation, Fail) + mk_call/8, %% mk_call(Dst, Mod, Fun, Args, Type, + %% Continuation, Fail, Guard) + call_dstlist/1, + call_dstlist_update/2, + %% call_dst_type/1, + call_args/1, + call_args_update/2, + call_fun/1, + call_fun_update/2, + call_type/1, + call_continuation/1, + call_fail_label/1, + call_set_fail_label/2, + call_set_continuation/2, + is_call/1, + call_in_guard/1, + + mk_goto/1, %% mk_goto(Lbl) + goto_label/1, + + mk_enter/4, %% mk_enter(Mod, Fun, Args, Type) + mk_enter_primop/2, %% mk_enter_primop(Op, Type) + enter_fun/1, + enter_fun_update/2, + enter_args/1, + enter_args_update/2, + enter_type/1, + is_enter/1, + + + mk_return/1, %% mk_return(Vars) + %% mk_fail/1, %% mk_fail(Args) class = exit + mk_fail/2, %% mk_fail(Args, Class) + %% mk_fail/3, %% mk_fail(Args, Class, Label) + mk_move/2, %% mk_move(Dst, Src) + %% mk_moves/2, %% mk_moves(DstList, SrcList) + mk_begin_try/2, %% mk_begin_try(Label, Successor) + mk_begin_handler/1, %% mk_begin_handler(ReasonDst) + mk_end_try/0, %% mk_end_try() + %% mk_elements/2, %% mk_elements(Tuple, Vars) + mk_label/1, %% mk_label(Name) + mk_new_label/0, %% mk_new_label() + mk_comment/1, %% mk_comment(Text) + mk_const/1, %% mk_const(Const) + mk_var/1, %% mk_var(Id) + annotate_variable/2, %% annotate_var_or_reg(VarOrReg, Type) + unannotate_variable/1,%% unannotate_var_or_reg(VarOrReg) + mk_reg/1, %% mk_reg(Id) + mk_fvar/1, %% mk_fvar(Id) + mk_new_var/0, %% mk_new_var() + mk_new_fvar/0, %% mk_new_fvar() + mk_new_reg/0, %% mk_new_reg() + mk_phi/1, %% mk_phi(Id) + mk_phi/2 %% mk_phi(Id, ArgList) + ]). + +%% +%% Identifiers +%% + +-export([%% is_fail/1, + is_return/1, + is_move/1, + %% is_begin_try/1, + %% is_begin_handler/1, + %% is_end_try/1, + is_goto/1, + is_label/1, + is_comment/1, + is_const/1, + is_var/1, + is_fvar/1, + is_reg/1, + is_variable/1, + is_annotated_variable/1, + %% is_uncond/1, + is_phi/1]). + +%% +%% Selectors +%% + +-export([phi_dst/1, + phi_id/1, + %% phi_args/1, + phi_arg/2, + phi_arglist/1, + phi_enter_pred/3, + phi_remove_pred/2, + phi_redirect_pred/3, + move_dst/1, + move_src/1, + move_src_update/2, + begin_try_label/1, + begin_try_successor/1, + begin_handler_dstlist/1, + label_name/1, + comment_text/1, + return_vars/1, + fail_args/1, + fail_class/1, + fail_label/1, + fail_set_label/2, + var_name/1, + variable_annotation/1, + fvar_name/1, + reg_name/1, + reg_is_gcsafe/1, + const_value/1 + ]). + +%% +%% Misc +%% + +-export([args/1, + uses/1, + defines/1, + is_safe/1, + strip_comments/1, + subst/2, + subst_uses/2, + subst_defines/2, + redirect_jmp/3, + successors/1, + fails_to/1, + is_branch/1 + ]). + +-export([highest_var/1, highest_label/1]). + +%%--------------------------------------------------------------------- +%% +%% Icode +%% +%%--------------------------------------------------------------------- + +-spec mk_icode(mfa(), [icode_var()], boolean(), boolean(), [icode_instr()], + {non_neg_integer(),non_neg_integer()}, + {icode_lbl(),icode_lbl()}) -> #icode{}. +mk_icode(Fun, Params, IsClosure, IsLeaf, Code, VarRange, LabelRange) -> + #icode{'fun'=Fun, params=Params, code=Code, + is_closure=IsClosure, + is_leaf=IsLeaf, + data=hipe_consttab:new(), + var_range=VarRange, + label_range=LabelRange}. + +-spec mk_icode(mfa(), [icode_var()], boolean(), boolean(), [icode_instr()], + hipe_consttab(), {non_neg_integer(),non_neg_integer()}, + {icode_lbl(),icode_lbl()}) -> #icode{}. +mk_icode(Fun, Params, IsClosure, IsLeaf, Code, Data, VarRange, LabelRange) -> + #icode{'fun'=Fun, params=Params, code=Code, + data=Data, is_closure=IsClosure, is_leaf=IsLeaf, + var_range=VarRange, label_range=LabelRange}. + +-spec icode_fun(#icode{}) -> mfa(). +icode_fun(#icode{'fun' = MFA}) -> MFA. + +-spec icode_params(#icode{}) -> [icode_var()]. +icode_params(#icode{params = Params}) -> Params. + +-spec icode_params_update(#icode{}, [icode_var()]) -> #icode{}. +icode_params_update(Icode, Params) -> + Icode#icode{params = Params}. + +-spec icode_is_closure(#icode{}) -> boolean(). +icode_is_closure(#icode{is_closure = Closure}) -> Closure. + +-spec icode_is_leaf(#icode{}) -> boolean(). +icode_is_leaf(#icode{is_leaf = Leaf}) -> Leaf. + +-spec icode_code(#icode{}) -> icode_instrs(). +icode_code(#icode{code = Code}) -> Code. + +-spec icode_code_update(#icode{}, icode_instrs()) -> #icode{}. +icode_code_update(Icode, NewCode) -> + Vmax = highest_var(NewCode), + Lmax = highest_label(NewCode), + Icode#icode{code = NewCode, var_range = {0,Vmax}, label_range = {0,Lmax}}. + +-spec icode_data(#icode{}) -> hipe_consttab(). +icode_data(#icode{data=Data}) -> Data. + +%% %% -spec icode_data_update(#icode{}, hipe_consttab()) -> #icode{}. +%% icode_data_update(Icode, NewData) -> Icode#icode{data=NewData}. + +-spec icode_var_range(#icode{}) -> {non_neg_integer(), non_neg_integer()}. +icode_var_range(#icode{var_range = VarRange}) -> VarRange. + +-spec icode_label_range(#icode{}) -> {non_neg_integer(), non_neg_integer()}. +icode_label_range(#icode{label_range = LabelRange}) -> LabelRange. + +-spec icode_info(#icode{}) -> icode_info(). +icode_info(#icode{info = Info}) -> Info. + +-spec icode_info_update(#icode{}, icode_info()) -> #icode{}. +icode_info_update(Icode, Info) -> Icode#icode{info = Info}. + +-spec icode_closure_arity(#icode{}) -> arity(). +icode_closure_arity(#icode{closure_arity = Arity}) -> Arity. + +-spec icode_closure_arity_update(#icode{}, arity()) -> #icode{}. +icode_closure_arity_update(Icode, Arity) -> Icode#icode{closure_arity = Arity}. + + +%%---------------------------------------------------------------------------- +%% Instructions +%%---------------------------------------------------------------------------- + +%%---- +%% if +%%---- + +-spec mk_if(icode_if_op(), [icode_term_arg()], + icode_lbl(), icode_lbl()) -> #icode_if{}. +mk_if(Op, Args, TrueLbl, FalseLbl) -> + #icode_if{op=Op, args=Args, true_label=TrueLbl, false_label=FalseLbl, p=0.5}. +%% mk_if(Op, Args, TrueLbl, FalseLbl, P) -> +%% #icode_if{op=Op, args=Args, true_label=TrueLbl, false_label=FalseLbl, p=P}. + +-spec if_op(#icode_if{}) -> icode_if_op(). +if_op(#icode_if{op=Op}) -> Op. + +-spec if_op_update(#icode_if{}, icode_if_op()) -> #icode_if{}. +if_op_update(IF, NewOp) -> IF#icode_if{op=NewOp}. + +-spec if_args(#icode_if{}) -> [icode_term_arg()]. +if_args(#icode_if{args=Args}) -> Args. + +-spec if_true_label(#icode_if{}) -> icode_lbl(). +if_true_label(#icode_if{true_label=TrueLbl}) -> TrueLbl. + +-spec if_true_label_update(#icode_if{}, icode_lbl()) -> #icode_if{}. +if_true_label_update(IF, TrueLbl) -> IF#icode_if{true_label=TrueLbl}. + +-spec if_false_label(#icode_if{}) -> icode_lbl(). +if_false_label(#icode_if{false_label=FalseLbl}) -> FalseLbl. + +-spec if_false_label_update(#icode_if{}, icode_lbl()) -> #icode_if{}. +if_false_label_update(IF, FalseLbl) -> IF#icode_if{false_label=FalseLbl}. + +-spec if_pred(#icode_if{}) -> float(). +if_pred(#icode_if{p=P}) -> P. + +%%------------ +%% switch_val +%%------------ + +-spec mk_switch_val(icode_var(), icode_lbl(), + non_neg_integer(), [icode_switch_case()]) -> + #icode_switch_val{}. +mk_switch_val(Term = #icode_variable{kind='var'}, FailLbl, Length, Cases) -> + #icode_switch_val{term=Term, fail_label=FailLbl, length=Length, cases=Cases}. + +-spec switch_val_term(#icode_switch_val{}) -> icode_var(). +switch_val_term(#icode_switch_val{term=Term}) -> Term. + +-spec switch_val_fail_label(#icode_switch_val{}) -> icode_lbl(). +switch_val_fail_label(#icode_switch_val{fail_label=FailLbl}) -> FailLbl. + +-spec switch_val_fail_label_update(#icode_switch_val{}, icode_lbl()) -> + #icode_switch_val{}. +switch_val_fail_label_update(SV, FailLbl) -> + SV#icode_switch_val{fail_label=FailLbl}. + +%% switch_val_length(#icode_switch_val{length=Length}) -> Length. + +-spec switch_val_cases(#icode_switch_val{}) -> [icode_switch_case()]. +switch_val_cases(#icode_switch_val{cases=Cases}) -> Cases. + +-spec switch_val_cases_update(#icode_switch_val{}, [icode_switch_case()]) -> + #icode_switch_val{}. +switch_val_cases_update(SV, NewCases) -> + SV#icode_switch_val{cases = NewCases}. + +%%-------------------- +%% switch_tuple_arity +%%-------------------- + +-spec mk_switch_tuple_arity(icode_var(), icode_lbl(), + non_neg_integer(), [icode_switch_case()]) -> + #icode_switch_tuple_arity{}. +mk_switch_tuple_arity(Term = #icode_variable{kind='var'}, FailLbl, Length, Cases) -> + #icode_switch_tuple_arity{term=Term, fail_label=FailLbl, + length=Length, cases=Cases}. + +-spec switch_tuple_arity_term(#icode_switch_tuple_arity{}) -> icode_var(). +switch_tuple_arity_term(#icode_switch_tuple_arity{term=Term}) -> Term. + +-spec switch_tuple_arity_fail_label(#icode_switch_tuple_arity{}) -> icode_lbl(). +switch_tuple_arity_fail_label(#icode_switch_tuple_arity{fail_label=FailLbl}) -> + FailLbl. + +-spec switch_tuple_arity_fail_label_update(#icode_switch_tuple_arity{}, icode_lbl()) -> + #icode_switch_tuple_arity{}. +switch_tuple_arity_fail_label_update(S, FailLbl) -> + S#icode_switch_tuple_arity{fail_label=FailLbl}. + +%% switch_tuple_arity_length(#icode_switch_tuple_arity{length=Length}) -> Length. + +-spec switch_tuple_arity_cases(#icode_switch_tuple_arity{}) -> [icode_switch_case()]. +switch_tuple_arity_cases(#icode_switch_tuple_arity{cases=Cases}) -> Cases. + +-spec switch_tuple_arity_cases_update(#icode_switch_tuple_arity{}, + [icode_switch_case()]) -> + #icode_switch_tuple_arity{}. +switch_tuple_arity_cases_update(Cond, NewCases) -> + Cond#icode_switch_tuple_arity{cases = NewCases}. + +%%------ +%% type +%%------ + +-spec mk_type([icode_term_arg()], icode_type_test(), icode_lbl(), icode_lbl()) -> + #icode_type{}. +mk_type(Args, Test, TrueLbl, FalseLbl) -> + mk_type(Args, Test, TrueLbl, FalseLbl, 0.5). + +-spec mk_type([icode_term_arg()], icode_type_test(), + icode_lbl(), icode_lbl(), float()) -> #icode_type{}. +mk_type(Args, Test, TrueLbl, FalseLbl, P) -> + #icode_type{test=Test, args=Args, + true_label=TrueLbl, false_label=FalseLbl, p=P}. + +-spec type_test(#icode_type{}) -> icode_type_test(). +type_test(#icode_type{test=Test}) -> Test. + +-spec type_args(#icode_type{}) -> [icode_term_arg()]. +type_args(#icode_type{args=Args}) -> Args. + +%% type_args_update(T, Args) -> T#icode_type{args=Args}. + +-spec type_true_label(#icode_type{}) -> icode_lbl(). +type_true_label(#icode_type{true_label=TrueLbl}) -> TrueLbl. + +-spec type_false_label(#icode_type{}) -> icode_lbl(). +type_false_label(#icode_type{false_label=FalseLbl}) -> FalseLbl. + +-spec type_pred(#icode_type{}) -> float(). +type_pred(#icode_type{p=P}) -> P. + +-spec is_type(icode_instr()) -> boolean(). +is_type(#icode_type{}) -> true; +is_type(_) -> false. + +%%------ +%% goto +%%------ + +-spec mk_goto(icode_lbl()) -> #icode_goto{}. +mk_goto(Lbl) -> #icode_goto{label=Lbl}. + +-spec goto_label(#icode_goto{}) -> icode_lbl(). +goto_label(#icode_goto{label=Lbl}) -> Lbl. + +-spec is_goto(icode_instr()) -> boolean(). +is_goto(#icode_goto{}) -> true; +is_goto(_) -> false. + +%%-------- +%% return +%%-------- + +-spec mk_return([icode_var()]) -> #icode_return{}. +mk_return(Vars) -> #icode_return{vars=Vars}. + +-spec return_vars(#icode_return{}) -> [icode_var()]. +return_vars(#icode_return{vars=Vars}) -> Vars. + +-spec is_return(icode_instr()) -> boolean(). +is_return(#icode_return{}) -> true; +is_return(_) -> false. + +%%------ +%% fail +%%------ + +%% mk_fail(Args) when is_list(Args) -> mk_fail(Args, error). + +-spec mk_fail([icode_term_arg()], icode_exit_class()) -> #icode_fail{}. +mk_fail(Args, Class) when is_list(Args) -> + case Class of + error -> ok; + exit -> ok; + rethrow -> ok; + throw -> ok + end, + #icode_fail{class=Class, args=Args}. + +%% mk_fail(Args, Class, Label) when is_list(Args) -> +%% #icode_fail{class=Class, args=Args, fail_label=Label}. + +-spec fail_class(#icode_fail{}) -> icode_exit_class(). +fail_class(#icode_fail{class=Class}) -> Class. + +-spec fail_args(#icode_fail{}) -> [icode_term_arg()]. +fail_args(#icode_fail{args=Args}) -> Args. + +-spec fail_label(#icode_fail{}) -> [] | icode_lbl(). +fail_label(#icode_fail{fail_label=Label}) -> Label. + +-spec fail_set_label(#icode_fail{}, [] | icode_lbl()) -> #icode_fail{}. +fail_set_label(I=#icode_fail{}, Label) -> + I#icode_fail{fail_label = Label}. + +%%------ +%% move +%%------ + +-spec mk_move(#icode_variable{}, #icode_variable{} | #icode_const{}) -> + #icode_move{}. +mk_move(Dst, Src) -> + case Src of + #icode_variable{} -> ok; + #icode_const{} -> ok + end, + #icode_move{dst=Dst, src=Src}. + +-spec move_dst(#icode_move{}) -> #icode_variable{}. +move_dst(#icode_move{dst=Dst}) -> Dst. + +-spec move_src(#icode_move{}) -> #icode_variable{} | #icode_const{}. +move_src(#icode_move{src=Src}) -> Src. + +-spec move_src_update(#icode_move{}, #icode_variable{} | #icode_const{}) -> + #icode_move{}. +move_src_update(M, NewSrc) -> M#icode_move{src=NewSrc}. + +-spec is_move(icode_instr()) -> boolean(). +is_move(#icode_move{}) -> true; +is_move(_) -> false. + +%%----- +%% phi +%%----- + +%% The id field is not entirely redundant. It is used in mappings +%% in the SSA pass since the dst field can change. +-spec mk_phi(#icode_variable{}) -> #icode_phi{}. +mk_phi(Var) -> #icode_phi{dst=Var, id=Var, arglist=[]}. + +-spec mk_phi(#icode_variable{}, [{icode_lbl(), #icode_variable{}}]) -> + #icode_phi{}. +mk_phi(Var, ArgList) -> #icode_phi{dst=Var, id=Var, arglist=ArgList}. + +-spec phi_dst(#icode_phi{}) -> #icode_variable{}. +phi_dst(#icode_phi{dst=Dst}) -> Dst. + +-spec phi_id(#icode_phi{}) -> #icode_variable{}. +phi_id(#icode_phi{id=Id}) -> Id. + +-spec phi_arglist(#icode_phi{}) -> [{icode_lbl(), #icode_variable{}}]. +phi_arglist(#icode_phi{arglist=ArgList}) -> ArgList. + +-spec phi_args(#icode_phi{}) -> [#icode_variable{}]. +phi_args(P) -> [Var || {_, Var} <- phi_arglist(P)]. + +-spec phi_arg(#icode_phi{}, icode_lbl()) -> #icode_variable{}. +phi_arg(P, Pred) -> + case lists:keyfind(Pred, 1, phi_arglist(P)) of + {_, Var} -> Var; + false -> exit({'No such predecessor to phi', {Pred, P}}) + end. + +-spec is_phi(icode_instr()) -> boolean(). +is_phi(#icode_phi{}) -> true; +is_phi(_) -> false. + +-spec phi_enter_pred(#icode_phi{}, icode_lbl(), #icode_variable{}) -> + #icode_phi{}. +phi_enter_pred(Phi, Pred, Var) -> + NewArg = {Pred, Var}, + Phi#icode_phi{arglist=[NewArg|lists:keydelete(Pred, 1, phi_arglist(Phi))]}. + +-spec phi_remove_pred(#icode_phi{}, icode_lbl()) -> #icode_move{} | #icode_phi{}. +phi_remove_pred(Phi, Pred) -> + NewArgList = lists:keydelete(Pred, 1, phi_arglist(Phi)), + case NewArgList of + [Arg] -> %% the Phi should be turned into an appropriate move instruction + {_Label, Var = #icode_variable{}} = Arg, + mk_move(phi_dst(Phi), Var); + [_|_] -> + Phi#icode_phi{arglist=NewArgList} + end. + +phi_argvar_subst(P, Subst) -> + NewArgList = [{Pred, subst1(Subst, Var)} || {Pred,Var} <- phi_arglist(P)], + P#icode_phi{arglist=NewArgList}. + +-spec phi_redirect_pred(#icode_phi{}, icode_lbl(), icode_lbl()) -> #icode_phi{}. +phi_redirect_pred(P, OldPred, NewPred) -> + Subst = [{OldPred, NewPred}], + NewArgList = [{subst1(Subst, Pred), Var} || {Pred,Var} <- phi_arglist(P)], + P#icode_phi{arglist=NewArgList}. + +%% +%% primop and guardop +%% +%% Whether a function is a "primop" - i.e., an internal thing - or not, +%% is really only shown by its name. An {M,F,A} always represents a +%% function in some Erlang module (although it might be a BIF, and +%% could possibly be inline expanded). It is convenient to let the +%% constructor functions check the name and set the type automatically, +%% especially for guardops - some guardops are primitives and some are +%% MFA:s, and this way we won't have to rewrite all calls to mk_guardop +%% to flag whether they are primops or not. + +-spec mk_primop([#icode_variable{}], icode_funcall(), + [icode_argument()]) -> #icode_call{}. +mk_primop(DstList, Fun, ArgList) -> + mk_primop(DstList, Fun, ArgList, [], []). + +-spec mk_primop([#icode_variable{}], icode_funcall(), + [icode_argument()], [] | icode_lbl(), [] | icode_lbl()) -> + #icode_call{}. +mk_primop(DstList, Fun, ArgList, Continuation, Fail) -> + Type = op_type(Fun), + make_call(DstList, Fun, ArgList, Type, Continuation, Fail, false). + +%% Note that a 'guardop' is just a call that occurred in a guard. In +%% this case, we should always have continuation labels True and False. + +-spec mk_guardop([#icode_variable{}], icode_funcall(), + [icode_argument()], icode_lbl(), icode_lbl()) -> #icode_call{}. +mk_guardop(DstList, Fun, ArgList, True, False) -> + Type = op_type(Fun), + make_call(DstList, Fun, ArgList, Type, True, False, true). + +op_type(Fun) -> + case is_mfa(Fun) of + true -> remote; + false -> primop + end. + +is_mfa({M,F,A}) when is_atom(M), is_atom(F), + is_integer(A), 0 =< A, A =< 255 -> true; +is_mfa(_) -> false. + +%%------ +%% call +%%------ + +-spec mk_call([#icode_variable{}], atom(), atom(), + [icode_argument()], 'local' | 'remote') -> #icode_call{}. +mk_call(DstList, M, F, ArgList, Type) -> + mk_call(DstList, M, F, ArgList, Type, [], [], false). + +%% mk_call(DstList, M, F, ArgList, Type, Continuation, Fail) -> +%% mk_call(DstList, M, F, ArgList, Type, Continuation, Fail, false). + +-spec mk_call([#icode_variable{}], atom(), atom(), [icode_argument()], + 'local' | 'remote', [] | icode_lbl(), [] | icode_lbl(), boolean()) -> + #icode_call{}. +mk_call(DstList, M, F, ArgList, Type, Continuation, Fail, InGuard) + when is_atom(M), is_atom(F) -> + case Type of + local -> ok; + remote -> ok + end, + Fun = {M,F,length(ArgList)}, + make_call(DstList, Fun, ArgList, Type, Continuation, Fail, InGuard). + +%% The common constructor for all calls (for internal use only) +%% +%% Note: If the "guard" flag is `true', it means that if the call fails, +%% we can simply jump to the Fail label (if it exists) without +%% generating any additional exception information - it isn't needed. +-spec make_call([#icode_variable{}], icode_funcall(), [icode_argument()], + icode_call_type(), [] | icode_lbl(), [] | icode_lbl(), boolean()) -> + #icode_call{}. +make_call(DstList, Fun, ArgList, Type, Continuation, Fail, InGuard) -> + #icode_call{dstlist=DstList, 'fun'=Fun, args=ArgList, type=Type, + continuation=Continuation, fail_label=Fail, in_guard=InGuard}. + +-spec call_dstlist(#icode_call{}) -> [#icode_variable{}]. +call_dstlist(#icode_call{dstlist=DstList}) -> DstList. + +-spec call_dstlist_update(#icode_call{}, [#icode_variable{}]) -> #icode_call{}. +call_dstlist_update(C, Dest) -> C#icode_call{dstlist=Dest}. + +-spec call_type(#icode_call{}) -> icode_call_type(). +call_type(#icode_call{type=Type}) -> Type. + +%% -spec call_dst_type(#icode_call{}) -> erl_type(). +%% call_dst_type(#icode_call{dst_type=DstType}) -> DstType. + +-spec call_args(#icode_call{}) -> [icode_argument()]. +call_args(#icode_call{args=Args}) -> Args. + +-spec call_args_update(#icode_call{}, [icode_argument()]) -> #icode_call{}. +call_args_update(C, Args) -> C#icode_call{args=Args}. + +-spec call_fun(#icode_call{}) -> icode_funcall(). +call_fun(#icode_call{'fun'=Fun}) -> Fun. + +%% Note that updating the name field requires recomputing the call type, +%% in case it changes from a remote/local call to a primop call. +-spec call_fun_update(#icode_call{}, icode_funcall()) -> #icode_call{}. +call_fun_update(C, Fun) -> + Type = case is_mfa(Fun) of + true -> call_type(C); + false -> primop + end, + C#icode_call{'fun'=Fun, type=Type}. + +-spec call_continuation(#icode_call{}) -> [] | icode_lbl(). +call_continuation(#icode_call{continuation=Continuation}) -> Continuation. + +-spec call_fail_label(#icode_call{}) -> [] | icode_lbl(). +call_fail_label(#icode_call{fail_label=Fail}) -> Fail. + +-spec call_set_continuation(#icode_call{}, [] | icode_lbl()) -> #icode_call{}. +call_set_continuation(I, Continuation) -> + I#icode_call{continuation = Continuation}. + +-spec call_set_fail_label(#icode_call{}, [] | icode_lbl()) -> #icode_call{}. +call_set_fail_label(I=#icode_call{}, Fail) -> + case Fail of + [] -> + I#icode_call{fail_label=Fail, in_guard=false}; + _ -> + I#icode_call{fail_label=Fail} + end. + +-spec is_call(icode_instr()) -> boolean(). +is_call(#icode_call{}) -> true; +is_call(_) -> false. + +-spec call_in_guard(#icode_call{}) -> boolean(). +call_in_guard(#icode_call{in_guard=InGuard}) -> InGuard. + +%%------- +%% enter +%%------- + +-spec mk_enter(atom(), atom(), [icode_term_arg()], 'local' | 'remote') -> + #icode_enter{}. +mk_enter(M, F, Args, Type) when is_atom(M), is_atom(F) -> + case Type of + local -> ok; + remote -> ok + end, + #icode_enter{'fun'={M,F,length(Args)}, args=Args, type=Type}. + +-spec enter_fun(#icode_enter{}) -> icode_funcall(). +enter_fun(#icode_enter{'fun'=Fun}) -> Fun. + +-spec enter_fun_update(#icode_enter{}, icode_funcall()) -> + #icode_enter{}. +enter_fun_update(E, Fun) -> + Type = case is_mfa(Fun) of + true -> enter_type(E); + false -> primop + end, + E#icode_enter{'fun'=Fun, type=Type}. + +-spec enter_args(#icode_enter{}) -> [icode_term_arg()]. +enter_args(#icode_enter{args=Args}) -> Args. + +-spec enter_args_update(#icode_enter{}, [icode_term_arg()]) -> #icode_enter{}. +enter_args_update(E, Args) -> E#icode_enter{args=Args}. + +-spec enter_type(#icode_enter{}) -> icode_call_type(). +enter_type(#icode_enter{type=Type}) -> Type. + +-spec is_enter(icode_instr()) -> boolean(). +is_enter(#icode_enter{}) -> true; +is_enter(_) -> false. + +-spec mk_enter_primop(icode_primop(), [icode_term_arg()]) -> + #icode_enter{type::'primop'}. +mk_enter_primop(Op, Args) -> + #icode_enter{'fun'=Op, args=Args, type=primop}. + +%%----------- +%% begin_try +%%----------- + +%% The reason that begin_try is a branch instruction is just so that it +%% keeps the fail-to block linked into the CFG, until the exception +%% handling instructions are eliminated. + +-spec mk_begin_try(icode_lbl(), icode_lbl()) -> #icode_begin_try{}. +mk_begin_try(Label, Successor) -> + #icode_begin_try{label=Label, successor=Successor}. + +-spec begin_try_label(#icode_begin_try{}) -> icode_lbl(). +begin_try_label(#icode_begin_try{label=Label}) -> Label. + +-spec begin_try_successor(#icode_begin_try{}) -> icode_lbl(). +begin_try_successor(#icode_begin_try{successor=Successor}) -> Successor. + +%%--------- +%% end_try +%%--------- + +-spec mk_end_try() -> #icode_end_try{}. +mk_end_try() -> #icode_end_try{}. + +%%--------------- +%% begin_handler +%%--------------- + +-spec mk_begin_handler([icode_var()]) -> #icode_begin_handler{}. +mk_begin_handler(Dstlist) -> + #icode_begin_handler{dstlist=Dstlist}. + +-spec begin_handler_dstlist(#icode_begin_handler{}) -> [icode_var()]. +begin_handler_dstlist(#icode_begin_handler{dstlist=Dstlist}) -> Dstlist. + +%% -spec is_begin_handler(icode_instr()) -> boolean(). +%% is_begin_handler(#icode_begin_handler{}) -> true; +%% is_begin_handler(_) -> false. + +%%------- +%% label +%%------- + +-spec mk_label(icode_lbl()) -> #icode_label{}. +mk_label(Name) when is_integer(Name), Name >= 0 -> #icode_label{name=Name}. + +-spec label_name(#icode_label{}) -> icode_lbl(). +label_name(#icode_label{name=Name}) -> Name. + +-spec is_label(icode_instr()) -> boolean(). +is_label(#icode_label{}) -> true; +is_label(_) -> false. + +%%--------- +%% comment +%%--------- + +-spec mk_comment(icode_comment_text()) -> #icode_comment{}. +%% @doc If `Txt' is a list of characters (possibly deep), it will be +%% printed as a string; otherwise, `Txt' will be printed as a term. +mk_comment(Txt) -> #icode_comment{text=Txt}. + +-spec comment_text(#icode_comment{}) -> icode_comment_text(). +comment_text(#icode_comment{text=Txt}) -> Txt. + +-spec is_comment(icode_instr()) -> boolean(). +is_comment(#icode_comment{}) -> true; +is_comment(_) -> false. + + +%%--------------------------------------------------------------------- +%% Arguments (variables and constants) +%%--------------------------------------------------------------------- + +%%------- +%% const +%%------- + +-spec mk_const(simple_const() | structured_const() | binary()) -> #icode_const{}. +mk_const(C) -> #icode_const{value=#flat{value=C}}. + +-spec const_value(#icode_const{}) -> simple_const() | structured_const() | binary(). +const_value(#icode_const{value=#flat{value=X}}) -> X. + +-spec is_const(icode_argument()) -> boolean(). +is_const(#icode_const{}) -> true; +is_const(_) -> false. + +%%----- +%% var +%%----- + +-spec mk_var(non_neg_integer()) -> #icode_variable{kind::'var'}. +mk_var(V) -> #icode_variable{name=V, kind=var}. + +-spec var_name(#icode_variable{kind::'var'}) -> non_neg_integer(). +var_name(#icode_variable{name=Name, kind=var}) -> Name. + +-spec is_var(icode_argument()) -> boolean(). +is_var(#icode_variable{kind=var}) -> true; +is_var(_) -> false. + +-spec mk_reg(non_neg_integer()) -> #icode_variable{kind::'reg'}. +mk_reg(V) -> #icode_variable{name=V, kind=reg}. + +-spec reg_name(#icode_variable{kind::'reg'}) -> non_neg_integer(). +reg_name(#icode_variable{name=Name, kind=reg}) -> Name. + +-spec reg_is_gcsafe(#icode_variable{kind::'reg'}) -> 'false'. +reg_is_gcsafe(#icode_variable{kind=reg}) -> false. % for now + +-spec is_reg(icode_argument()) -> boolean(). +is_reg(#icode_variable{kind=reg}) -> true; +is_reg(_) -> false. + +-spec mk_fvar(non_neg_integer()) -> #icode_variable{kind::'fvar'}. +mk_fvar(V) -> #icode_variable{name=V, kind=fvar}. + +-spec fvar_name(#icode_variable{kind::'fvar'}) -> non_neg_integer(). +fvar_name(#icode_variable{name=Name, kind=fvar}) -> Name. + +-spec is_fvar(icode_argument()) -> boolean(). +is_fvar(#icode_variable{kind=fvar}) -> true; +is_fvar(_) -> false. + +-spec is_variable(icode_argument()) -> boolean(). +is_variable(#icode_variable{}) -> true; +is_variable(_) -> false. + +-spec annotate_variable(#icode_variable{}, variable_annotation()) -> + #icode_variable{}. +annotate_variable(X, Anno) -> + X#icode_variable{annotation = Anno}. + +-spec is_annotated_variable(icode_argument()) -> boolean(). +is_annotated_variable(#icode_variable{annotation=[]}) -> + false; +is_annotated_variable(#icode_variable{}) -> + true; +is_annotated_variable(_) -> + false. + +-spec unannotate_variable(#icode_variable{}) -> #icode_variable{}. +unannotate_variable(X) -> + X#icode_variable{annotation=[]}. + +-spec variable_annotation(#icode_variable{}) -> variable_annotation(). +variable_annotation(#icode_variable{annotation=Anno}) -> + Anno. + +%% +%% Floating point Icode instructions. +%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% Liveness info +%% + +-spec uses(icode_instr()) -> [#icode_variable{}]. +uses(Instr) -> + remove_constants(args(Instr)). + +-spec args(icode_instr()) -> [icode_argument()]. +args(I) -> + case I of + #icode_if{} -> if_args(I); + #icode_switch_val{} -> [switch_val_term(I)]; + #icode_switch_tuple_arity{} -> [switch_tuple_arity_term(I)]; + #icode_type{} -> type_args(I); + #icode_move{} -> [move_src(I)]; + #icode_fail{} -> fail_args(I); + #icode_call{} -> call_args(I); + #icode_enter{} -> enter_args(I); + #icode_return{} -> return_vars(I); + #icode_phi{} -> phi_args(I); + #icode_goto{} -> []; + #icode_begin_try{} -> []; + #icode_begin_handler{} -> []; + #icode_end_try{} -> []; + #icode_comment{} -> []; + #icode_label{} -> [] + end. + +-spec defines(icode_instr()) -> [#icode_variable{}]. +defines(I) -> + case I of + #icode_move{} -> remove_constants([move_dst(I)]); + #icode_call{} -> remove_constants(call_dstlist(I)); + #icode_begin_handler{} -> remove_constants(begin_handler_dstlist(I)); + #icode_phi{} -> remove_constants([phi_dst(I)]); + #icode_if{} -> []; + #icode_switch_val{} -> []; + #icode_switch_tuple_arity{} -> []; + #icode_type{} -> []; + #icode_goto{} -> []; + #icode_fail{} -> []; + #icode_enter{} -> []; + #icode_return{} -> []; + #icode_begin_try{} -> []; + #icode_end_try{} -> []; + #icode_comment{} -> []; + #icode_label{} -> [] + end. + +-spec remove_constants([icode_argument()]) -> [#icode_variable{}]. +remove_constants(L) -> + [V || V <- L, (not is_const(V))]. + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% Utilities +%% + +%% +%% Substitution: replace occurrences of X by Y if {X,Y} is in the +%% Subst_list. + +-spec subst([{_,_}], I) -> I when is_subtype(I, icode_instr()). + +subst(Subst, I) -> + subst_defines(Subst, subst_uses(Subst, I)). + +-spec subst_uses([{_,_}], I) -> I when is_subtype(I, icode_instr()). + +subst_uses(Subst, I) -> + case I of + #icode_if{} -> I#icode_if{args = subst_list(Subst, if_args(I))}; + #icode_switch_val{} -> + I#icode_switch_val{term = subst1(Subst, switch_val_term(I))}; + #icode_switch_tuple_arity{} -> + I#icode_switch_tuple_arity{term = subst1(Subst, switch_tuple_arity_term(I))}; + #icode_type{} -> I#icode_type{args = subst_list(Subst, type_args(I))}; + #icode_move{} -> I#icode_move{src = subst1(Subst, move_src(I))}; + #icode_fail{} -> I#icode_fail{args = subst_list(Subst, fail_args(I))}; + #icode_call{} -> I#icode_call{args = subst_list(Subst, call_args(I))}; + #icode_enter{} -> I#icode_enter{args = subst_list(Subst, enter_args(I))}; + #icode_return{} -> I#icode_return{vars = subst_list(Subst, return_vars(I))}; + #icode_phi{} -> phi_argvar_subst(I, Subst); + #icode_goto{} -> I; + #icode_begin_try{} -> I; + #icode_begin_handler{} -> I; + #icode_end_try{} -> I; + #icode_comment{} -> I; + #icode_label{} -> I + end. + +-spec subst_defines([{_,_}], I) -> I when is_subtype(I, icode_instr()). + +subst_defines(Subst, I) -> + case I of + #icode_move{} -> I#icode_move{dst = subst1(Subst, move_dst(I))}; + #icode_call{} -> + I#icode_call{dstlist = subst_list(Subst, call_dstlist(I))}; + #icode_begin_handler{} -> + I#icode_begin_handler{dstlist = subst_list(Subst, + begin_handler_dstlist(I))}; + #icode_phi{} -> I#icode_phi{dst = subst1(Subst, phi_dst(I))}; + #icode_if{} -> I; + #icode_switch_val{} -> I; + #icode_switch_tuple_arity{} -> I; + #icode_type{} -> I; + #icode_goto{} -> I; + #icode_fail{} -> I; + #icode_enter{} -> I; + #icode_return{} -> I; + #icode_begin_try{} -> I; + #icode_end_try{} -> I; + #icode_comment{} -> I; + #icode_label{} -> I + end. + +subst_list(S, Is) -> + [subst1(S, I) || I <- Is]. + +subst1([], I) -> I; +subst1([{I,Y}|_], I) -> Y; +subst1([_|Pairs], I) -> subst1(Pairs, I). + +%% +%% @doc Returns the successors of an Icode instruction. +%% In CFG form only branch instructions have successors, +%% but in linear form other instructions like e.g. moves and +%% others might be the last instruction of some basic block. +%% + +-spec successors(icode_instr()) -> [icode_lbl()]. + +successors(I) -> + case I of + #icode_if{} -> + [if_true_label(I), if_false_label(I)]; + #icode_goto{} -> + [goto_label(I)]; + #icode_switch_val{} -> + CaseLabels = [L || {_,L} <- switch_val_cases(I)], + [switch_val_fail_label(I) | CaseLabels]; + #icode_switch_tuple_arity{} -> + CaseLabels = [L || {_,L} <- switch_tuple_arity_cases(I)], + [switch_tuple_arity_fail_label(I) | CaseLabels]; + #icode_type{} -> + [type_true_label(I), type_false_label(I)]; + #icode_call{} -> + case call_continuation(I) of [] -> []; L when is_integer(L) -> [L] end + ++ + case call_fail_label(I) of [] -> []; L when is_integer(L) -> [L] end; + #icode_begin_try{} -> + [begin_try_successor(I), begin_try_label(I)]; + #icode_fail{} -> + case fail_label(I) of [] -> []; L when is_integer(L) -> [L] end; + #icode_enter{} -> []; + #icode_return{} -> []; + %% the following are included here for handling linear code + #icode_move{} -> []; + #icode_begin_handler{} -> [] + end. + +%% +%% @doc Returns the fail labels of an Icode instruction. +%% + +-spec fails_to(icode_instr()) -> [icode_lbl()]. + +fails_to(I) -> + case I of + #icode_switch_val{} -> [switch_val_fail_label(I)]; + #icode_switch_tuple_arity{} -> [switch_tuple_arity_fail_label(I)]; + #icode_call{} -> + case call_fail_label(I) of [] -> []; L when is_integer(L) -> [L] end; + #icode_begin_try{} -> [begin_try_label(I)]; % just for safety + #icode_fail{} -> + case fail_label(I) of [] -> []; L when is_integer(L) -> [L] end; + #icode_if{} -> []; % XXX: Correct? + #icode_enter{} -> []; % XXX: Correct? + #icode_goto{} -> []; + #icode_type{} -> []; % XXX: Correct? + #icode_return{} -> [] + end. + +%% +%% @doc Redirects jumps from label Old to label New. +%% If the instruction does not jump to Old, it remains unchanged. +%% The New label can be the special [] label used for calls with +%% fall-throughs. +%% + +-spec redirect_jmp(icode_instr(), icode_lbl(), [] | icode_lbl()) -> icode_instr(). + +redirect_jmp(Jmp, ToOld, ToOld) -> + Jmp; % no need to do anything +redirect_jmp(Jmp, ToOld, ToNew) -> + NewI = + case Jmp of + #icode_if{} -> + NewJmp = case if_true_label(Jmp) of + ToOld -> if_true_label_update(Jmp, ToNew); + _ -> Jmp + end, + case if_false_label(NewJmp) of + ToOld -> if_false_label_update(NewJmp, ToNew); + _ -> NewJmp + end; + #icode_goto{} -> + case goto_label(Jmp) of + ToOld -> Jmp#icode_goto{label=ToNew}; + _ -> Jmp + end; + #icode_switch_val{} -> + NewJmp = case switch_val_fail_label(Jmp) of + ToOld -> switch_val_fail_label_update(Jmp, ToNew); + _ -> Jmp + end, + Cases = [case Pair of + {Val,ToOld} -> {Val,ToNew}; + Unchanged -> Unchanged + end || Pair <- switch_val_cases(NewJmp)], + NewJmp#icode_switch_val{cases = Cases}; + #icode_switch_tuple_arity{} -> + NewJmp = case switch_tuple_arity_fail_label(Jmp) of + ToOld -> + Jmp#icode_switch_tuple_arity{fail_label=ToNew}; + _ -> Jmp + end, + Cases = [case Pair of + {Val,ToOld} -> {Val,ToNew}; + Unchanged -> Unchanged + end || Pair <- switch_tuple_arity_cases(NewJmp)], + NewJmp#icode_switch_tuple_arity{cases = Cases}; + #icode_type{} -> + NewJmp = case type_true_label(Jmp) of + ToOld -> Jmp#icode_type{true_label=ToNew}; + _ -> Jmp + end, + case type_false_label(NewJmp) of + ToOld -> NewJmp#icode_type{false_label=ToNew}; + _ -> NewJmp + end; + #icode_call{} -> + NewCont = case call_continuation(Jmp) of + ToOld -> ToNew; + OldCont -> OldCont + end, + NewFail = case call_fail_label(Jmp) of + ToOld -> ToNew; + OldFail -> OldFail + end, + Jmp#icode_call{continuation = NewCont, + fail_label = NewFail}; + #icode_begin_try{} -> + NewLabl = case begin_try_label(Jmp) of + ToOld -> ToNew; + OldLab -> OldLab + end, + NewSucc = case begin_try_successor(Jmp) of + ToOld -> ToNew; + OldSucc -> OldSucc + end, + Jmp#icode_begin_try{label=NewLabl, successor=NewSucc}; + #icode_fail{} -> + case fail_label(Jmp) of + ToOld -> Jmp#icode_fail{fail_label=ToNew}; + _ -> Jmp + end + end, + simplify_branch(NewI). + +%% +%% @doc Turns a branch into a goto if it has only one successor and it +%% is safe to do so. +%% + +-spec simplify_branch(icode_instr()) -> icode_instr(). + +simplify_branch(I) -> + case ordsets:from_list(successors(I)) of + [Label] -> + Goto = mk_goto(Label), + case I of + #icode_type{} -> Goto; + #icode_if{} -> Goto; + #icode_switch_tuple_arity{} -> Goto; + #icode_switch_val{} -> Goto; + _ -> I + end; + _ -> I + end. + +%% +%% Is this an unconditional jump (causes a basic block not to have a +%% fallthrough successor). +%% + +%% is_uncond(I) -> +%% case I of +%% #icode_goto{} -> true; +%% #icode_fail{} -> true; +%% #icode_enter{} -> true; +%% #icode_return{} -> true; +%% #icode_call{} -> +%% case call_fail_label(I) of +%% [] -> +%% case call_continuation(I) of +%% [] -> false; +%% _ -> true +%% end; +%% _ -> true +%% end; +%% _ -> false +%% end. + +%% @spec is_branch(icode_instr()) -> boolean() +%% +%% @doc Succeeds if the Icode instruction is a branch. I.e. a +%% (possibly conditional) discontinuation of linear control flow. +%% @end + +-spec is_branch(icode_instr()) -> boolean(). +is_branch(Instr) -> + case Instr of + #icode_if{} -> true; + #icode_switch_val{} -> true; + #icode_switch_tuple_arity{} -> true; + #icode_type{} -> true; + #icode_goto{} -> true; + #icode_fail{} -> true; + #icode_call{} -> + case call_fail_label(Instr) of + [] -> call_continuation(Instr) =/= []; + _ -> true + end; + #icode_enter{} -> true; + #icode_return{} -> true; + #icode_begin_try{} -> true; + %% false cases below + #icode_move{} -> false; + #icode_begin_handler{} -> false; + #icode_end_try{} -> false; + #icode_comment{} -> false; + #icode_label{} -> false; + #icode_phi{} -> false + end. + +%% +%% @doc Makes a new variable. +%% + +-spec mk_new_var() -> icode_var(). +mk_new_var() -> + mk_var(hipe_gensym:get_next_var(icode)). + +%% +%% @doc Makes a new fp variable. +%% + +-spec mk_new_fvar() -> icode_fvar(). +mk_new_fvar() -> + mk_fvar(hipe_gensym:get_next_var(icode)). + +%% +%% @doc Makes a new register. +%% + +-spec mk_new_reg() -> icode_reg(). +mk_new_reg() -> + mk_reg(hipe_gensym:get_next_var(icode)). + +%% +%% @doc Makes a new label. +%% + +-spec mk_new_label() -> #icode_label{}. +mk_new_label() -> + mk_label(hipe_gensym:get_next_label(icode)). + +%% %% +%% %% @doc Makes a bunch of move operations. +%% %% +%% +%% -spec mk_moves([_], [_]) -> [#icode_move{}]. +%% mk_moves([], []) -> +%% []; +%% mk_moves([X|Xs], [Y|Ys]) -> +%% [mk_move(X, Y) | mk_moves(Xs, Ys)]. + +%% +%% Makes a series of element operations. +%% + +%% mk_elements(_, []) -> +%% []; +%% mk_elements(Tuple, [X|Xs]) -> +%% [mk_primop([X], #unsafe_element{index=length(Xs)+1}, [Tuple]) | +%% mk_elements(Tuple, Xs)]. + +%% +%% @doc Removes comments from Icode. +%% + +-spec strip_comments(#icode{}) -> #icode{}. +strip_comments(ICode) -> + icode_code_update(ICode, no_comments(icode_code(ICode))). + +%% The following spec is underspecified: the resulting list does not +%% contain any #comment{} instructions +-spec no_comments(icode_instrs()) -> icode_instrs(). +no_comments([]) -> + []; +no_comments([I|Xs]) -> + case is_comment(I) of + true -> no_comments(Xs); + false -> [I|no_comments(Xs)] + end. + +%%----------------------------------------------------------------------- + +%% @doc True if an Icode instruction is safe (can be removed if the +%% result is not used). Note that pure control flow instructions +%% cannot be regarded as safe, as they are not defining anything. + +-spec is_safe(icode_instr()) -> boolean(). + +is_safe(Instr) -> + case Instr of + %% Instructions that are safe, or might be safe to remove. + #icode_move{} -> true; + #icode_phi{} -> true; + #icode_begin_handler{} -> true; + #icode_call{} -> + case call_fun(Instr) of + {M,F,A} -> + erl_bifs:is_safe(M,F,A); + Op -> + hipe_icode_primops:is_safe(Op) + end; + %% Control flow instructions. + #icode_if{} -> false; + #icode_switch_val{} -> false; + #icode_switch_tuple_arity{} -> false; + #icode_type{} -> false; + #icode_goto{} -> false; + #icode_label{} -> false; + %% Returning instructions without defines. + #icode_return{} -> false; + #icode_fail{} -> false; + #icode_enter{} -> false; + %% Internal auxiliary instructions that should not be removed + %% unless you really know what you are doing. + #icode_comment{} -> false; + #icode_begin_try{} -> false; + #icode_end_try{} -> false + end. + +%%----------------------------------------------------------------------- + +-spec highest_var(icode_instrs()) -> non_neg_integer(). +highest_var(Instrs) -> + highest_var(Instrs, 0). + +-spec highest_var(icode_instrs(), non_neg_integer()) -> non_neg_integer(). +highest_var([I|Is], Max) -> + Defs = defines(I), + Uses = uses(I), + highest_var(Is, new_max(Defs++Uses, Max)); +highest_var([], Max) -> + Max. + +-spec new_max([#icode_variable{}], non_neg_integer()) -> non_neg_integer(). +new_max([V|Vs], Max) -> + VName = + case is_var(V) of + true -> + var_name(V); + false -> + case is_fvar(V) of + true -> + fvar_name(V); + _ -> + reg_name(V) + end + end, + new_max(Vs, erlang:max(VName, Max)); +new_max([], Max) when is_integer(Max) -> + Max. + +%%----------------------------------------------------------------------- + +-spec highest_label(icode_instrs()) -> icode_lbl(). +highest_label(Instrs) -> + highest_label(Instrs, 0). + +-spec highest_label(icode_instrs(), icode_lbl()) -> icode_lbl(). +highest_label([I|Is], Max) -> + case is_label(I) of + true -> + L = label_name(I), + NewMax = erlang:max(L, Max), + highest_label(Is, NewMax); + false -> + highest_label(Is, Max) + end; +highest_label([], Max) when is_integer(Max) -> + Max. + +%%----------------------------------------------------------------------- diff --git a/lib/hipe/icode/hipe_icode.hrl b/lib/hipe/icode/hipe_icode.hrl new file mode 100644 index 0000000000..65deaf6d7c --- /dev/null +++ b/lib/hipe/icode/hipe_icode.hrl @@ -0,0 +1,188 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2004-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%%===================================================================== +%% +%% Contains type and record definitions for all Icode data structures. +%% +%%===================================================================== + +%%--------------------------------------------------------------------- +%% THIS DOES NOT REALLY BELONG HERE -- PLEASE REMOVE ASAP! +%%--------------------------------------------------------------------- + +-type ordset(T) :: [T]. + +%%--------------------------------------------------------------------- +%% Include files needed for the compilation of this header file +%%--------------------------------------------------------------------- + +-include("../misc/hipe_consttab.hrl"). + +%%--------------------------------------------------------------------- +%% Icode argument types +%%--------------------------------------------------------------------- + +-type simple_const() :: atom() | [] | integer() | float(). +-type structured_const() :: list() | tuple(). + +-type icode_lbl() :: non_neg_integer(). + +%%--------------------------------------------------------------------- +%% Icode records +%%--------------------------------------------------------------------- + +-record(flat, {value :: simple_const() | structured_const() | binary()}). + +-record(icode_const, {value :: #flat{}}). + +-type variable_annotation() :: {atom(), any(), fun((any()) -> string())}. + +-record(icode_variable, {name :: non_neg_integer(), + kind :: 'var' | 'reg' | 'fvar', + annotation = [] :: [] | variable_annotation()}). + +%%--------------------------------------------------------------------- +%% Type declarations for Icode instructions +%%--------------------------------------------------------------------- + +-type icode_if_op() :: '>' | '<' | '>=' | '=<' | '=:=' | '=/=' | '==' | '/=' + | 'fixnum_eq' | 'fixnum_neq' | 'fixnum_lt' + | 'fixnum_le' | 'fixnum_ge' | 'fixnum_gt' + | 'suspend_msg_timeout'. + +-type icode_type_test() :: 'atom' | 'bignum' | 'binary' | 'bitrst' | 'boolean' + | 'cons' | 'constant' | 'fixnum' | 'float' + | 'function' | 'function2' | 'integer' | 'list' | 'nil' + | 'number' | 'pid' | 'port' | 'reference' | 'tuple' + | {'atom', atom()} | {'integer', integer()} + | {'record', atom(), non_neg_integer()} + | {'tuple', non_neg_integer()}. + +-type icode_primop() :: atom() | tuple(). % XXX: temporarily, I hope +-type icode_funcall() :: mfa() | icode_primop(). + +-type icode_var() :: #icode_variable{kind::'var'}. +-type icode_reg() :: #icode_variable{kind::'reg'}. +-type icode_fvar() :: #icode_variable{kind::'fvar'}. +-type icode_argument() :: #icode_const{} | #icode_variable{}. +-type icode_term_arg() :: icode_var() | #icode_const{}. + +-type icode_switch_case() :: {#icode_const{}, icode_lbl()}. + +-type icode_call_type() :: 'local' | 'primop' | 'remote'. +-type icode_exit_class() :: 'error' | 'exit' | 'rethrow' | 'throw'. + +-type icode_comment_text() :: atom() | string() | {atom(), term()}. + +-type icode_info() :: [{'arg_types', [erl_types:erl_type()]}]. + +%%--------------------------------------------------------------------- +%% Icode instructions +%%--------------------------------------------------------------------- + +-record(icode_label, {name :: icode_lbl()}). + +-record(icode_if, {op :: icode_if_op(), + args :: [icode_term_arg()], + true_label :: icode_lbl(), + false_label :: icode_lbl(), + p :: float()}). + +-record(icode_switch_val, {term :: icode_var(), + fail_label :: icode_lbl(), + length :: non_neg_integer(), + cases :: [icode_switch_case()]}). + +-record(icode_switch_tuple_arity, {term :: icode_var(), + fail_label :: icode_lbl(), + length :: non_neg_integer(), + cases :: [icode_switch_case()]}). + + +-record(icode_type, {test :: icode_type_test(), + args :: [icode_term_arg()], + true_label :: icode_lbl(), + false_label :: icode_lbl(), + p :: float()}). + +-record(icode_goto, {label :: icode_lbl()}). + +-record(icode_move, {dst :: #icode_variable{}, + src :: #icode_variable{} | #icode_const{}}). + +-record(icode_phi, {dst :: #icode_variable{}, + id :: #icode_variable{}, + arglist :: [{icode_lbl(), #icode_variable{}}]}). + +-record(icode_call, {dstlist :: [#icode_variable{}], + 'fun' :: icode_funcall(), + args :: [icode_argument()], + type :: icode_call_type(), + continuation :: [] | icode_lbl(), + fail_label = [] :: [] | icode_lbl(), + in_guard = false :: boolean()}). + +-record(icode_enter, {'fun' :: icode_funcall(), + args :: [icode_term_arg()], + type :: icode_call_type()}). + +-record(icode_return, {vars :: [icode_var()]}). + +-record(icode_begin_try, {label :: icode_lbl(), successor :: icode_lbl()}). + +-record(icode_end_try, {}). + +-record(icode_begin_handler, {dstlist :: [icode_var()]}). + +%% TODO: Remove [] from fail_label +-record(icode_fail, {class :: icode_exit_class(), + args :: [icode_term_arg()], + fail_label = [] :: [] | icode_lbl()}). + +-record(icode_comment, {text :: icode_comment_text()}). + +%%--------------------------------------------------------------------- +%% Icode instructions +%%--------------------------------------------------------------------- + +-type icode_instr() :: #icode_begin_handler{} | #icode_begin_try{} + | #icode_call{} | #icode_comment{} | #icode_end_try{} + | #icode_enter{} | #icode_fail{} + | #icode_goto{} | #icode_if{} | #icode_label{} + | #icode_move{} | #icode_phi{} | #icode_return{} + | #icode_switch_tuple_arity{} | #icode_switch_val{} + | #icode_type{}. +-type icode_instrs() :: [icode_instr()]. + +%%--------------------------------------------------------------------- +%% The Icode data structure +%%--------------------------------------------------------------------- + +-record(icode, {'fun' :: mfa(), + params :: [icode_var()], + is_closure :: boolean(), + closure_arity :: arity(), + is_leaf :: boolean(), + code = [] :: icode_instrs(), + data :: hipe_consttab(), + var_range :: {non_neg_integer(), non_neg_integer()}, + label_range :: {icode_lbl(), icode_lbl()}, + info = [] :: icode_info()}). + +%%--------------------------------------------------------------------- diff --git a/lib/hipe/icode/hipe_icode_bincomp.erl b/lib/hipe/icode/hipe_icode_bincomp.erl new file mode 100644 index 0000000000..6f694f2bce --- /dev/null +++ b/lib/hipe/icode/hipe_icode_bincomp.erl @@ -0,0 +1,178 @@ +%%% -*- erlang-indent-level: 2 -*- +%%% +%%% %CopyrightBegin% +%%% +%%% Copyright Ericsson AB 2006-2009. All Rights Reserved. +%%% +%%% The contents of this file are subject to the Erlang Public License, +%%% Version 1.1, (the "License"); you may not use this file except in +%%% compliance with the License. You should have received a copy of the +%%% Erlang Public License along with this software. If not, it can be +%%% retrieved online at http://www.erlang.org/. +%%% +%%% Software distributed under the License is distributed on an "AS IS" +%%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%%% the License for the specific language governing rights and limitations +%%% under the License. +%%% +%%% %CopyrightEnd% +%%% +%%%------------------------------------------------------------------- +%%% File : hipe_icode_bincomp.erl +%%% Author : Per Gustafsson +%%% Description : +%%% +%%% Created : 12 Sep 2005 by Per Gustafsson +%%%------------------------------------------------------------------- + +-module(hipe_icode_bincomp). + +-export([cfg/1]). + +%%-------------------------------------------------------------------- + +-include("hipe_icode.hrl"). +-include("../flow/cfg.hrl"). + +%%-------------------------------------------------------------------- + +-spec cfg(cfg()) -> cfg(). + +cfg(Cfg1) -> + StartLbls = ordsets:from_list([hipe_icode_cfg:start_label(Cfg1)]), + find_bs_get_integer(StartLbls, Cfg1, StartLbls). + +find_bs_get_integer([Lbl|Rest], Cfg, Visited) -> + BB = hipe_icode_cfg:bb(Cfg, Lbl), + Last = hipe_bb:last(BB), + NewCfg = + case ok(Last, Cfg) of + {ok,{Type, FakeFail, RealFail, SuccLbl, MsIn, MsOut}} -> + {Cont, Info, OldLbl, LastMsOut} = + collect_info(SuccLbl, Cfg, [Type], Lbl, RealFail, MsOut), + update_code(Lbl, OldLbl, Cfg, Info, Cont, FakeFail, MsIn, LastMsOut); + not_ok -> + Cfg + end, + Succs = ordsets:from_list(hipe_icode_cfg:succ(NewCfg, Lbl)), + NewSuccs = ordsets:subtract(Succs, Visited), + NewLbls = ordsets:union(NewSuccs, Rest), + NewVisited = ordsets:union(NewSuccs, Visited), + find_bs_get_integer(NewLbls, NewCfg, NewVisited); +find_bs_get_integer([], Cfg, _) -> + Cfg. + +ok(I, Cfg) -> + case hipe_icode:is_call(I) of + true -> + case hipe_icode:call_fun(I) of + {hipe_bs_primop, {bs_get_integer, Size, Flags}} when (Flags band 6) =:= 0 -> + case {hipe_icode:call_dstlist(I), hipe_icode:call_args(I)} of + {[Dst, MsOut] = DstList, [MsIn]} -> + Cont = hipe_icode:call_continuation(I), + FirstFail = hipe_icode:call_fail_label(I), + FirstFailBB = hipe_icode_cfg:bb(Cfg, FirstFail), + case check_for_restore_block(FirstFailBB, DstList) of + {restore_block, RealFail} -> + {ok, {{Dst, Size}, FirstFail, RealFail, Cont, MsIn, MsOut}}; + not_restore_block -> + not_ok + end; + _ -> + not_ok + end; + _ -> + not_ok + end; + false -> + not_ok + end. + +check_for_restore_block(FirstFailBB, DefVars) -> + Moves = hipe_bb:butlast(FirstFailBB), + case [Instr || Instr <- Moves, is_badinstr(Instr, DefVars)] of + [] -> + Last = hipe_bb:last(FirstFailBB), + case hipe_icode:is_goto(Last) of + true -> + {restore_block, hipe_icode:goto_label(Last)}; + false -> + not_restore_block + end; + [_|_] -> + not_restore_block + end. + +is_badinstr(Instr, DefVars) -> + not(hipe_icode:is_move(Instr) andalso + lists:member(hipe_icode:move_dst(Instr), DefVars)). + +collect_info(Lbl, Cfg, Acc, OldLbl, FailLbl, MsOut) -> + case do_collect_info(Lbl, Cfg, Acc, FailLbl, MsOut) of + done -> + {Lbl, Acc, OldLbl, MsOut}; + {cont, NewAcc, NewLbl, NewMsOut} -> + collect_info(NewLbl, Cfg, NewAcc, Lbl, FailLbl, NewMsOut) + end. + +do_collect_info(Lbl, Cfg, Acc, FailLbl, MsOut) -> + BB = hipe_icode_cfg:bb(Cfg,Lbl), + case hipe_bb:code(BB) of + [I] -> + case hipe_icode_cfg:pred(Cfg,Lbl) of + [_] -> + case ok(I, Cfg) of + {ok, {Type,_FakeFail,FailLbl,SuccLbl,MsOut,NewMsOut}} -> + NewAcc = [Type|Acc], + MaxSize = hipe_rtl_arch:word_size() * 8 - 5, + case calc_size(NewAcc) of + Size when Size =< MaxSize -> + {cont,NewAcc,SuccLbl,NewMsOut}; + _ -> + done + end; + _ -> + done + end; + _ -> + done + end; + _ -> + done + end. + +calc_size([{_,Size}|Rest]) when is_integer(Size) -> + Size + calc_size(Rest); +calc_size([]) -> 0. + +update_code(_Lbl, _, Cfg, [_Info], _Cont, _LastFail, _MsIn, _MsOut) -> + Cfg; +update_code(Lbl, OldLbl, Cfg, Info, Cont, LastFail, MsIn, MsOut) -> + BB = hipe_icode_cfg:bb(Cfg, Lbl), + ButLast = hipe_bb:butlast(BB), + NewVar = hipe_icode:mk_new_var(), + Size = calc_size(Info), + NewLast = + hipe_icode:mk_primop([NewVar,MsOut], + {hipe_bs_primop, {bs_get_integer,Size,0}}, + [MsIn], + OldLbl, + LastFail), + NewBB = hipe_bb:mk_bb(ButLast++[NewLast]), + NewCfg = hipe_icode_cfg:bb_add(Cfg, Lbl, NewBB), + fix_rest(Info, NewVar, OldLbl, Cont, NewCfg). + +fix_rest(Info, Var, Lbl, Cont, Cfg) -> + ButLast = make_butlast(Info, Var), + Last = hipe_icode:mk_goto(Cont), + NewBB = hipe_bb:mk_bb(ButLast++[Last]), + hipe_icode_cfg:bb_add(Cfg, Lbl, NewBB). + +make_butlast([{Res,_Size}], Var) -> + [hipe_icode:mk_move(Res, Var)]; +make_butlast([{Res, Size}|Rest], Var) -> + NewVar = hipe_icode:mk_new_var(), + [hipe_icode:mk_primop([Res], 'band', + [Var, hipe_icode:mk_const((1 bsl Size)-1)]), + hipe_icode:mk_primop([NewVar], 'bsr', [Var, hipe_icode:mk_const(Size)]) + |make_butlast(Rest, NewVar)]. diff --git a/lib/hipe/icode/hipe_icode_callgraph.erl b/lib/hipe/icode/hipe_icode_callgraph.erl new file mode 100644 index 0000000000..95182fc002 --- /dev/null +++ b/lib/hipe/icode/hipe_icode_callgraph.erl @@ -0,0 +1,217 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2004-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%%----------------------------------------------------------------------- +%% File : hipe_icode_callgraph.erl +%% Author : Tobias Lindahl +%% Purpose : Creates a call graph to find out in what order functions +%% in a module have to be compiled to gain best information +%% in hipe_icode_type.erl. +%% +%% Created : 7 Jun 2004 by Tobias Lindahl +%% +%% $Id$ +%%----------------------------------------------------------------------- +-module(hipe_icode_callgraph). + +-export([construct/1, + get_called_modules/1, + to_list/1, + construct_callgraph/1]). + +-define(NO_UNUSED, true). + +-ifndef(NO_UNUSED). +-export([is_empty/1, take_first/1, pp/1]). +-endif. + +-include("hipe_icode.hrl"). +-include("hipe_icode_primops.hrl"). + +%%------------------------------------------------------------------------ + +-type mfa_icode() :: {mfa(), #icode{}}. + +-record(icode_callgraph, {codedict :: dict(), ordered_sccs :: [[atom()]]}). + +%%------------------------------------------------------------------------ +%% Exported functions +%%------------------------------------------------------------------------ + +-spec construct([mfa_icode()]) -> #icode_callgraph{}. + +construct(List) -> + Calls = get_local_calls(List), + %% io:format("Calls: ~p\n", [lists:keysort(1, Calls)]), + Edges = get_edges(Calls), + %% io:format("Edges: ~p\n", [Edges]), + DiGraph = hipe_digraph:from_list(Edges), + Nodes = ordsets:from_list([MFA || {MFA, _} <- List]), + DiGraph1 = hipe_digraph:add_node_list(Nodes, DiGraph), + SCCs = hipe_digraph:reverse_preorder_sccs(DiGraph1), + #icode_callgraph{codedict = dict:from_list(List), ordered_sccs = SCCs}. + +-spec construct_callgraph([mfa_icode()]) -> hipe_digraph:hdg(). + +construct_callgraph(List) -> + Calls = get_local_calls2(List), + Edges = get_edges(Calls), + hipe_digraph:from_list(Edges). + +-spec to_list(#icode_callgraph{}) -> [mfa_icode()]. + +to_list(#icode_callgraph{codedict = Dict, ordered_sccs = SCCs}) -> + FlatList = lists:flatten(SCCs), + [{Mod, dict:fetch(Mod, Dict)} || Mod <- FlatList]. + +%%------------------------------------------------------------------------ + +-ifndef(NO_UNUSED). + +-spec is_empty(#icode_callgraph{}) -> boolean(). + +is_empty(#icode_callgraph{ordered_sccs = SCCs}) -> + SCCs =:= []. + +-spec take_first(#icode_callgraph{}) -> {[mfa_icode()], #icode_callgraph{}}. + +take_first(#icode_callgraph{codedict = Dict, ordered_sccs = [H|T]} = CG) -> + SCCCode = [{Mod, dict:fetch(Mod, Dict)} || Mod <- H], + {SCCCode, CG#icode_callgraph{ordered_sccs = T}}. + +-spec pp(#icode_callgraph{}) -> 'ok'. + +pp(#icode_callgraph{ordered_sccs = SCCs}) -> + io:format("Callgraph ~p\n", [SCCs]). +-endif. + +%%------------------------------------------------------------------------ +%% Get the modules called from this module + +-spec get_called_modules([mfa_icode()]) -> ordset(atom()). + +get_called_modules(List) -> + get_remote_calls(List, []). + +get_remote_calls([{_MFA, Icode}|Left], Acc) -> + CallSet = get_remote_calls_1(hipe_icode:icode_code(Icode), Acc), + get_remote_calls(Left, ordsets:union(Acc, CallSet)); +get_remote_calls([], Acc) -> + Acc. + +get_remote_calls_1([I|Left], Set) -> + NewSet = + case I of + #icode_call{} -> + case hipe_icode:call_type(I) of + remote -> + {M, _F, _A} = hipe_icode:call_fun(I), + ordsets:add_element(M, Set); + _ -> + Set + end; + #icode_enter{} -> + case hipe_icode:enter_type(I) of + remote -> + {M, _F, _A} = hipe_icode:enter_fun(I), + ordsets:add_element(M, Set); + _ -> + Set + end; + _ -> + Set + end, + get_remote_calls_1(Left, NewSet); +get_remote_calls_1([], Set) -> + Set. + +%%------------------------------------------------------------------------ +%% Find functions called (or entered) by each function. + +get_local_calls(List) -> + RemoveFun = fun ordsets:del_element/2, + get_local_calls(List, RemoveFun, []). + +get_local_calls2(List) -> + RemoveFun = fun(_,Set) -> Set end, + get_local_calls(List, RemoveFun, []). + +get_local_calls([{{_M, _F, _A} = MFA, Icode}|Left], RemoveFun, Acc) -> + CallSet = get_local_calls_1(hipe_icode:icode_code(Icode)), + %% Exclude recursive calls. + CallSet1 = RemoveFun(MFA, CallSet), + get_local_calls(Left, RemoveFun, [{MFA, CallSet1}|Acc]); +get_local_calls([], _RemoveFun, Acc) -> + Acc. + +get_local_calls_1(Icode) -> + get_local_calls_1(Icode, []). + +get_local_calls_1([I|Left], Set) -> + NewSet = + case I of + #icode_call{} -> + case hipe_icode:call_type(I) of + local -> + Fun = hipe_icode:call_fun(I), + ordsets:add_element(Fun, Set); + primop -> + case hipe_icode:call_fun(I) of + #mkfun{mfa = Fun} -> + ordsets:add_element(Fun, Set); + _ -> + Set + end; + remote -> + Set + end; + #icode_enter{} -> + case hipe_icode:enter_type(I) of + local -> + Fun = hipe_icode:enter_fun(I), + ordsets:add_element(Fun, Set); + primop -> + case hipe_icode:enter_fun(I) of + #mkfun{mfa = Fun} -> + ordsets:add_element(Fun, Set); + _ -> + Set + end; + remote -> + Set + end; + _ -> + Set + end, + get_local_calls_1(Left, NewSet); +get_local_calls_1([], Set) -> + Set. + +%%------------------------------------------------------------------------ +%% Find the edges in the callgraph. + +get_edges(Calls) -> + get_edges(Calls, []). + +get_edges([{MFA, Set}|Left], Edges) -> + EdgeList = [{MFA, X} || X <- Set], + EdgeSet = ordsets:from_list(EdgeList), + get_edges(Left, ordsets:union(EdgeSet, Edges)); +get_edges([], Edges) -> + Edges. diff --git a/lib/hipe/icode/hipe_icode_cfg.erl b/lib/hipe/icode/hipe_icode_cfg.erl new file mode 100644 index 0000000000..9b4a10e273 --- /dev/null +++ b/lib/hipe/icode/hipe_icode_cfg.erl @@ -0,0 +1,203 @@ +%% -*- erlang-indent-level: 2 -*- +%%====================================================================== +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2001-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +-module(hipe_icode_cfg). + +-export([bb/2, bb_add/3, + cfg_to_linear/1, + is_closure/1, + closure_arity/1, + linear_to_cfg/1, + labels/1, start_label/1, + pp/1, pp/2, + params/1, params_update/2, + pred/2, + redirect/4, + remove_trivial_bbs/1, remove_unreachable_code/1, + succ/2, + visit/2, is_visited/2, none_visited/0 + ]). +-export([postorder/1, reverse_postorder/1]). + +-define(ICODE_CFG, true). % needed by cfg.inc +%%-define(DO_ASSERT, true). + +-include("../main/hipe.hrl"). +-include("hipe_icode.hrl"). +-include("../flow/hipe_bb.hrl"). +-include("../flow/cfg.hrl"). +-include("../flow/cfg.inc"). + +%%---------------------------------------------------------------------- +%% Prototypes for exported functions which are Icode specific +%%---------------------------------------------------------------------- + +-spec labels(cfg()) -> [icode_lbl()]. +-spec postorder(cfg()) -> [icode_lbl()]. +-spec reverse_postorder(cfg()) -> [icode_lbl()]. + +-spec is_visited(icode_lbl(), gb_set()) -> boolean(). +-spec visit(icode_lbl(), gb_set()) -> gb_set(). + +-spec bb(cfg(), icode_lbl()) -> 'not_found' | bb(). +-spec bb_add(cfg(), icode_lbl(), bb()) -> cfg(). +-spec pred(cfg(), icode_lbl()) -> [icode_lbl()]. +-spec succ(cfg(), icode_lbl()) -> [icode_lbl()]. +-spec redirect(cfg(), icode_lbl(), icode_lbl(), icode_lbl()) -> cfg(). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% Interface to Icode +%% + +-spec linear_to_cfg(#icode{}) -> cfg(). + +linear_to_cfg(LinearIcode) -> + %% hipe_icode_pp:pp(Icode), + Code = hipe_icode:icode_code(LinearIcode), + IsClosure = hipe_icode:icode_is_closure(LinearIcode), + StartLabel = hipe_icode:label_name(hd(Code)), + CFG0 = mk_empty_cfg(hipe_icode:icode_fun(LinearIcode), + StartLabel, + hipe_icode:icode_data(LinearIcode), + IsClosure, + hipe_icode:icode_is_leaf(LinearIcode), + hipe_icode:icode_params(LinearIcode)), + CFG1 = info_update(CFG0, hipe_icode:icode_info(LinearIcode)), + CFG2 = case IsClosure of + true -> + closure_arity_update(CFG1, + hipe_icode:icode_closure_arity(LinearIcode)); + false -> + CFG1 + end, + ?opt_start_timer("Get BBs icode"), + FullCFG = take_bbs(Code, CFG2), + ?opt_stop_timer("Get BBs icode"), + FullCFG. + +%% remove_blocks(CFG, []) -> +%% CFG; +%% remove_blocks(CFG, [Lbl|Lbls]) -> +%% remove_blocks(bb_remove(CFG, Lbl), Lbls). + +-spec is_label(icode_instr()) -> boolean(). +is_label(Instr) -> + hipe_icode:is_label(Instr). + +label_name(Instr) -> + hipe_icode:label_name(Instr). + +mk_label(Name) -> + hipe_icode:mk_label(Name). + +mk_goto(Name) -> + hipe_icode:mk_goto(Name). + +branch_successors(Instr) -> + hipe_icode:successors(Instr). + +fails_to(Instr) -> + hipe_icode:fails_to(Instr). + +%% True if instr has no effect. +-spec is_comment(icode_instr()) -> boolean(). +is_comment(Instr) -> + hipe_icode:is_comment(Instr). + +%% True if instr is just a jump (no side-effects). +-spec is_goto(icode_instr()) -> boolean(). +is_goto(Instr) -> + hipe_icode:is_goto(Instr). + +-spec is_branch(icode_instr()) -> boolean(). +is_branch(Instr) -> + hipe_icode:is_branch(Instr). + +-spec is_pure_branch(icode_instr()) -> boolean(). +is_pure_branch(Instr) -> + case Instr of + #icode_if{} -> true; + #icode_goto{} -> true; + #icode_switch_val{} -> true; + #icode_switch_tuple_arity{} -> true; + #icode_type{} -> true; + %% false cases below -- XXX: are they correct? + #icode_label{} -> false; + #icode_move{} -> false; + #icode_phi{} -> false; + #icode_call{} -> false; + #icode_enter{} -> false; + #icode_return{} -> false; + #icode_begin_try{} -> false; + #icode_end_try{} -> false; + #icode_begin_handler{} -> false; + #icode_fail{} -> false; + #icode_comment{} -> false + end. + +-spec is_phi(icode_instr()) -> boolean(). +is_phi(I) -> + hipe_icode:is_phi(I). + +phi_remove_pred(I, Pred) -> + hipe_icode:phi_remove_pred(I, Pred). + +%% phi_redirect_pred(I, OldPred, NewPred) -> +%% hipe_icode:phi_redirect_pred(I, OldPred, NewPred). + +redirect_jmp(Jmp, ToOld, ToNew) -> + hipe_icode:redirect_jmp(Jmp, ToOld, ToNew). + +redirect_ops(_, CFG, _) -> %% We do not refer to labels in Icode ops. + CFG. + +%%---------------------------------------------------------------------------- + +-spec pp(cfg()) -> 'ok'. + +pp(CFG) -> + hipe_icode_pp:pp(cfg_to_linear(CFG)). + +-spec pp(io:device(), cfg()) -> 'ok'. + +pp(Dev, CFG) -> + hipe_icode_pp:pp(Dev, cfg_to_linear(CFG)). + +%%---------------------------------------------------------------------------- + +-spec cfg_to_linear(cfg()) -> #icode{}. +cfg_to_linear(CFG) -> + Code = linearize_cfg(CFG), + IsClosure = is_closure(CFG), + Icode = hipe_icode:mk_icode(function(CFG), + params(CFG), + IsClosure, + is_leaf(CFG), + Code, + data(CFG), + hipe_gensym:var_range(icode), + hipe_gensym:label_range(icode)), + Icode1 = hipe_icode:icode_info_update(Icode, info(CFG)), + case IsClosure of + true -> hipe_icode:icode_closure_arity_update(Icode1, closure_arity(CFG)); + false -> Icode1 + end. diff --git a/lib/hipe/icode/hipe_icode_coordinator.erl b/lib/hipe/icode/hipe_icode_coordinator.erl new file mode 100644 index 0000000000..a71e143192 --- /dev/null +++ b/lib/hipe/icode/hipe_icode_coordinator.erl @@ -0,0 +1,274 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2007-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%%-------------------------------------------------------------------- +%% File : hipe_icode_coordinator.erl +%% Author : Per Gustafsson +%% Description : This module coordinates an Icode pass. +%% Created : 20 Feb 2007 by Per Gustafsson +%%--------------------------------------------------------------------- + +-module(hipe_icode_coordinator). + +-export([coordinate/4]). + +-include("hipe_icode.hrl"). + +%%--------------------------------------------------------------------- + +-define(MAX_CONCURRENT, erlang:system_info(schedulers)). + +%%--------------------------------------------------------------------- + +-spec coordinate(hipe_digraph:hdg(), [{mfa(),boolean()}], [mfa()], module()) -> + no_return(). + +coordinate(CG, Escaping, NonEscaping, Mod) -> + ServerPid = initialize_server(Escaping, Mod), + Clean = [MFA || {MFA, _} <- Escaping], + All = NonEscaping ++ Clean, + Restart = + fun (MFALists, PM) -> restart_funs(MFALists, PM, All, ServerPid) end, + LastAction = + fun (PM) -> last_action(PM, ServerPid, Mod, All) end, + coordinate({Clean,All}, CG, gb_trees:empty(), Restart, LastAction, ServerPid). + +coordinate(MFALists, CG, PM, Restart, LastAction, ServerPid) -> + case MFALists of + {[], []} -> + LastAction(PM), + ServerPid ! stop, + receive + {stop, Ans2Pid} -> + Ans2Pid ! {done, self()}, + exit(normal) + end; + _ -> ok + end, + receive + {stop, AnsPid} -> + ServerPid ! stop, + AnsPid ! {done, self()}, + exit(normal); + Message -> + {NewPM, NewMFALists} = + case Message of + {restart_call, MFA} -> + {PM, handle_restart_call(MFA, MFALists)}; + {ready, {MFA, Pid}} -> + handle_ready(MFA, Pid, MFALists, PM); + {restart_done, MFA} -> + {PM, handle_restart_done(MFA, MFALists, CG)}; + {no_change_done, MFA} -> + {PM, handle_no_change_done(MFA, MFALists)} + end, + coordinate(Restart(NewMFALists, NewPM), CG, NewPM, Restart, + LastAction, ServerPid) + end. + +handle_restart_call(MFA, {Queue, Busy} = QB) -> + case lists:member(MFA, Queue) of + true -> + QB; + false -> + {[MFA|Queue], Busy} + end. + +handle_ready(MFA, Pid, {Queue, Busy}, PM) -> + {gb_trees:insert(MFA, Pid, PM), {Queue, Busy -- [MFA]}}. + +handle_restart_done(MFA, {Queue, Busy}, CG) -> + Restarts = hipe_digraph:get_parents(MFA, CG), + {ordsets:from_list(Restarts ++ Queue), Busy -- [MFA]}. + +handle_no_change_done(MFA, {Queue, Busy}) -> + {Queue, Busy -- [MFA]}. + +last_action(PM, ServerPid, Mod, All) -> + lists:foreach(fun (MFA) -> + gb_trees:get(MFA, PM) ! {done, final_funs(ServerPid, Mod)}, + receive + {done_rewrite, MFA} -> ok + end + end, All), + ok. + +restart_funs({Queue, Busy} = QB, PM, All, ServerPid) -> + case ?MAX_CONCURRENT - length(Busy) of + X when is_integer(X), X > 0 -> + Possible = [Pos || Pos <- Queue, (not lists:member(Pos, Busy))], + Restarts = lists:sublist(Possible, X), + lists:foreach(fun (MFA) -> + restart_fun(MFA, PM, All, ServerPid) + end, Restarts), + {Queue -- Restarts, Busy ++ Restarts}; + X when is_integer(X) -> + QB + end. + +initialize_server(Escaping, Mod) -> + Pid = spawn_link(fun () -> info_server(Mod) end), + lists:foreach(fun ({MFA, _}) -> Pid ! {set_escaping, MFA} end, Escaping), + Pid. + +safe_get_args(MFA, Cfg, Pid, Mod) -> + Mod:replace_nones(get_args(MFA, Cfg, Pid)). + +get_args(MFA, Cfg, Pid) -> + Ref = make_ref(), + Pid ! {get_call, MFA, Cfg, self(), Ref}, + receive + {Ref, Types} -> + Types + end. + +safe_get_res(MFA, Pid, Mod) -> + Mod:replace_nones(get_res(MFA, Pid)). + +get_res(MFA, Pid) -> + Ref = make_ref(), + Pid ! {get_return, MFA, self(), Ref}, + receive + {Ref, Types} -> + Types + end. + +update_return_type(MFA, NewType, Pid) -> + Ref = make_ref(), + Pid ! {update_return, MFA, NewType, self(), Ref}, + receive + {Ref, Ans} -> + Ans + end. + +update_call_type(MFA, NewTypes, Pid) -> + Ref = make_ref(), + Pid ! {update_call, MFA, NewTypes, self(), Ref}, + receive + {Ref, Ans} -> + Ans + end. + +restart_fun(MFA, PM, All, ServerPid) -> + gb_trees:get(MFA, PM) ! {analyse, analysis_funs(All, ServerPid)}, + ok. + +analysis_funs(All, Pid) -> + Self = self(), + ArgsFun = fun (MFA, Cfg) -> get_args(MFA, Cfg, Pid) end, + GetResFun = fun (MFA, Args) -> + case lists:member(MFA, All) of + true -> + case update_call_type(MFA, Args, Pid) of + do_restart -> + Self ! {restart_call, MFA}, + ok; + no_change -> + ok + end; + false -> + ok + end, + [Ans] = get_res(MFA, Pid), + Ans + end, + FinalFun = fun (MFA, RetTypes) -> + case update_return_type(MFA, RetTypes, Pid) of + do_restart -> + Self ! {restart_done, MFA}, + ok; + no_change -> + Self ! {no_change_done, MFA}, + ok + end + end, + {ArgsFun, GetResFun, FinalFun}. + +final_funs(Pid,Mod) -> + ArgsFun = fun (MFA, Cfg) -> safe_get_args(MFA, Cfg, Pid, Mod) end, + GetResFun = fun (MFA, _) -> + [Ans] = safe_get_res(MFA, Pid, Mod), + Ans + end, + FinalFun = fun (_, _) -> ok end, + {ArgsFun, GetResFun, FinalFun}. + +info_server(Mod) -> + info_server_loop(gb_trees:empty(), gb_trees:empty(), Mod). + +info_server_loop(CallInfo, ReturnInfo, Mod) -> + receive + {update_return, MFA, NewInfo, Pid, Ref} -> + NewReturnInfo = handle_update(MFA, ReturnInfo, NewInfo, Pid, Ref, Mod), + info_server_loop(CallInfo, NewReturnInfo, Mod); + {update_call, MFA, NewInfo, Pid, Ref} -> + NewCallInfo = handle_update(MFA, CallInfo, NewInfo, Pid, Ref, Mod), + info_server_loop(NewCallInfo, ReturnInfo, Mod); + {get_return, MFA, Pid, Ref} -> + Ans = + case gb_trees:lookup(MFA, ReturnInfo) of + none -> + Mod:return_none(); + {value, TypesComp} -> + Mod:return__info((TypesComp)) + end, + Pid ! {Ref, Ans}, + info_server_loop(CallInfo, ReturnInfo, Mod); + {get_call, MFA, Cfg, Pid, Ref} -> + Ans = + case gb_trees:lookup(MFA, CallInfo) of + none -> + Mod:return_none_args(Cfg, MFA); + {value, escaping} -> + Mod:return_any_args(Cfg, MFA); + {value, TypesComp} -> + Mod:return__info(TypesComp) + end, + Pid ! {Ref, Ans}, + info_server_loop(CallInfo, ReturnInfo, Mod); + {set_escaping, MFA} -> + NewCallInfo = gb_trees:enter(MFA, escaping, CallInfo), + info_server_loop(NewCallInfo, ReturnInfo, Mod); + stop -> + ok + end. + +handle_update(MFA, Tree, NewInfo, Pid, Ref, Mod) -> + ResType = + case gb_trees:lookup(MFA, Tree) of + none -> + %% io:format("First Type: ~w ~w~n", [NewType, MFA]), + Pid ! {Ref, do_restart}, + Mod:new__info(NewInfo); + {value, escaping} -> + Pid ! {Ref, no_change}, + escaping; + {value, OldInfo} -> + %% io:format("New Type: ~w ~w~n", [NewType, MFA]), + %% io:format("Old Type: ~w ~w~n", [OldType, MFA]), + case Mod:update__info(NewInfo, OldInfo) of + {true, Type} -> + Pid ! {Ref, no_change}, + Type; + {false, Type} -> + Pid ! {Ref, do_restart}, + Type + end + end, + gb_trees:enter(MFA, ResType, Tree). diff --git a/lib/hipe/icode/hipe_icode_ebb.erl b/lib/hipe/icode/hipe_icode_ebb.erl new file mode 100644 index 0000000000..966c4d7564 --- /dev/null +++ b/lib/hipe/icode/hipe_icode_ebb.erl @@ -0,0 +1,30 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2001-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% Icode version of extended basic blocks. +%% + +-module(hipe_icode_ebb). + +-define(CFG, hipe_icode_cfg). + +-include("hipe_icode.hrl"). +-include("../flow/cfg.hrl"). +-include("../flow/ebb.inc"). diff --git a/lib/hipe/icode/hipe_icode_exceptions.erl b/lib/hipe/icode/hipe_icode_exceptions.erl new file mode 100644 index 0000000000..787fb05415 --- /dev/null +++ b/lib/hipe/icode/hipe_icode_exceptions.erl @@ -0,0 +1,474 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2004-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%% ==================================================================== +%% Filename : hipe_icode_exceptions.erl +%% Module : hipe_icode_exceptions +%% Purpose : Rewrite calls in intermediate code to use Continuation +%% and Fail-To labels. +%% +%% Catch-instructions work as follows: +%% - A begin_try(FailLabel) starts a catch-region which +%% is ended by a corresponding end_try(FailLabel). +%% - The handler begins with a begin_handler(FailLabel). +%% +%% However, the begin/end instructions do not always appear +%% as parentheses around the section that they protect (in +%% linear Beam/Icode). Also, different begin_catch +%% instructions can reach the same basic blocks (which may +%% raise exceptions), due to code compation optimizations +%% in the Beam compiler, even though they have different +%% handlers. Because of this, a data flow analysis is +%% necessary to find out which catches may reach which +%% basic blocks. After that, we clone basic blocks as +%% needed to ensure that each block belongs to at most one +%% unique begin_catch. The Beam does not have this problem, +%% since it will find the correct catch-handler frame +%% pushed on the stack. (Note that since there can be no +%% tail-calls within a catch region, our dataflow analysis +%% for finding all catch-stacks is sure to terminate.) +%% +%% Finally, we can remove all special catch instructions +%% and rewrite calls within catch regions to use explicit +%% fail-to labels, which is the main point of all this. +%% Fail labels that were set before this pass are kept. +%% (Note that calls that have only a continuation label do +%% not always end their basic blocks. Adding a fail label +%% to such a call can thus force us to split the block.) +%% +%% Notes : As of November 2003, primops that do not fail in the +%% normal sense are allowed to have a fail-label even +%% before this pass. (Used for the mbox-empty + get_msg +%% primitive in receives.) +%% +%% Native floating point operations cannot fail in the +%% normal sense. Instead they throw a hardware exception +%% which will be caught by a special fp check error +%% instruction. These primops do not need a fail label even +%% in a catch. This pass checks for this with +%% hipe_icode_primops:fails/1. If a call cannot fail, no +%% fail label is added. +%% +%% Explicit fails (exit, error and throw) inside +%% a catch have to be handled. They have to build their +%% exit value and jump directly to the catch handler. An +%% alternative solution would be to have a new type of +%% fail instruction that takes a fail-to label... +%% +%% CVS: +%% $Id$ +%% ==================================================================== + +-module(hipe_icode_exceptions). + +-export([fix_catches/1]). + +-include("hipe_icode.hrl"). +-include("../flow/cfg.hrl"). + +%%---------------------------------------------------------------------------- + +-spec fix_catches(#cfg{}) -> #cfg{}. + +fix_catches(CFG) -> + {Map, State} = build_mapping(find_catches(init_state(CFG))), + hipe_icode_cfg:remove_unreachable_code(get_cfg(rewrite(State, Map))). + +%% This finds the set of possible catch-stacks for each basic block + +find_catches(State) -> + find_catches(get_start_labels(State), + clear_visited(clear_changed(State))). + +find_catches([L|Ls], State0) -> + case is_visited(L, State0) of + true -> + find_catches(Ls, State0); + false -> + State1 = set_visited(L, State0), + Code = get_bb_code(L, State1), + Cs = get_new_catches_in(L, State1), + State2 = set_catches_in(L, Cs, State1), % memorize + Cs1 = catches_out(Code, Cs), + Ls1 = get_succ(L, State2) ++ Ls, + Cs0 = get_catches_out(L, State2), + if Cs1 =:= Cs0 -> + find_catches(Ls1, State2); + true -> + State3 = set_catches_out(L, Cs1, State2), + find_catches(Ls1, set_changed(State3)) + end + end; +find_catches([], State) -> + case is_changed(State) of + true -> + find_catches(State); + false -> + State + end. + +catches_out([I|Is], Cs) -> + catches_out(Is, catches_out_instr(I, Cs)); +catches_out([], Cs) -> + Cs. + +catches_out_instr(I, Cs) -> + case I of + #icode_begin_try{} -> + Id = hipe_icode:begin_try_label(I), + push_catch(Id, Cs); + #icode_end_try{} -> + pop_catch(Cs); + #icode_begin_handler{} -> + pop_catch(Cs); + _ -> + Cs + end. + + +%% This builds the mapping used for cloning + +build_mapping(State) -> + build_mapping(get_start_labels(State), clear_visited(State), + new_mapping()). + +build_mapping([L|Ls], State0, Map) -> + case is_visited(L, State0) of + true -> + build_mapping(Ls, State0, Map); + false -> + State1 = set_visited(L, State0), + Cs = list_of_catches(get_catches_in(L, State1)), % get memorized + {Map1, State2} = map_bb(L, Cs, State1, Map), + Ls1 = get_succ(L, State2) ++ Ls, + build_mapping(Ls1, State2, Map1) + end; +build_mapping([], State, Map) -> + {Map, State}. + +map_bb(_L, [_C], State, Map) -> + {Map, State}; +map_bb(L, [C | Cs], State, Map) -> + %% This block will be cloned - we need to create N-1 new labels. + %% The identity mapping will be used for the first element. + Map1 = new_catch_labels(Cs, L, Map), + State1 = set_catches_in(L, single_catch(C), State), % update catches in + Code = get_bb_code(L, State1), + State2 = clone(Cs, L, Code, State1, Map1), + {Map1, State2}. + +clone([C | Cs], L, Code, State, Map) -> + Ren = get_renaming(C, Map), + L1 = Ren(L), + State1 = set_bb_code(L1, Code, State), + State2 = set_catches_in(L1, single_catch(C), State1), % set catches in + clone(Cs, L, Code, State2, Map); +clone([], _L, _Code, State, _Map) -> + State. + +new_catch_labels([C | Cs], L, Map) -> + L1 = hipe_icode:label_name(hipe_icode:mk_new_label()), + Map1 = set_mapping(C, L, L1, Map), + new_catch_labels(Cs, L, Map1); +new_catch_labels([], _L, Map) -> + Map. + + +%% This does all the actual rewriting and cloning. + +rewrite(State, Map) -> + rewrite(get_start_labels(State), clear_visited(State), Map). + +rewrite([L|Ls], State0, Map) -> + case is_visited(L, State0) of + true -> + rewrite(Ls, State0, Map); + false -> + State1 = set_visited(L, State0), + Code = get_bb_code(L, State1), + Cs = list_of_catches(get_catches_in(L, State1)), % get memorized + State2 = rewrite_bb(L, Cs, Code, State1, Map), + Ls1 = get_succ(L, State2) ++ Ls, + rewrite(Ls1, State2, Map) + end; +rewrite([], State, _Map) -> + State. + +rewrite_bb(L, [C], Code, State, Map) -> + {Code1, State1} = rewrite_code(Code, C, State, Map), + set_bb_code(L, Code1, State1). + +rewrite_code(Is, C, State, Map) -> + rewrite_code(Is, C, State, Map, []). + +rewrite_code([I|Is], C, State, Map, As) -> + [C1] = list_of_catches(catches_out_instr(I, single_catch(C))), + case I of + #icode_begin_try{} -> + {I1, Is1, State1} = update_begin_try(I, Is, C, State, Map), + I2 = redirect_instr(I1, C, Map), + rewrite_code(Is1, C1, State1, Map, [I2 | As]); + #icode_end_try{} -> + rewrite_code(Is, C1, State, Map, As); + #icode_call{} -> + {I1, Is1, State1} = update_call(I, Is, C, State, Map), + I2 = redirect_instr(I1, C, Map), + rewrite_code(Is1, C1, State1, Map, [I2 | As]); + #icode_fail{} -> + {I1, Is1, State1} = update_fail(I, Is, C, State, Map), + I2 = redirect_instr(I1, C, Map), + rewrite_code(Is1, C1, State1, Map, [I2 | As]); + _ -> + I1 = redirect_instr(I, C, Map), + rewrite_code(Is, C1, State, Map, [I1 | As]) + end; +rewrite_code([], _C, State, _Map, As) -> + {lists:reverse(As), State}. + +redirect_instr(I, C, Map) -> + redirect_instr_1(I, hipe_icode:successors(I), get_renaming(C, Map)). + +redirect_instr_1(I, [L0 | Ls], Ren) -> + I1 = hipe_icode:redirect_jmp(I, L0, Ren(L0)), + redirect_instr_1(I1, Ls, Ren); +redirect_instr_1(I, [], _Ren) -> + I. + +update_begin_try(I, Is, _C, State0, _Map) -> + L = hipe_icode:begin_try_successor(I), + I1 = hipe_icode:mk_goto(L), + {I1, Is, State0}. + +update_call(I, Is, C, State0, Map) -> + case top_of_stack(C) of + [] -> + %% No active catch. Assume cont./fail labels are correct as is. + {I, Is, State0}; + L -> + %% Only update the fail label if the call *can* fail. + case hipe_icode_primops:fails(hipe_icode:call_fun(I)) of + true -> + %% We only update the fail label if it is not already set. + case hipe_icode:call_fail_label(I) of + [] -> + I1 = hipe_icode:call_set_fail_label(I, L), + %% Now the call will end the block, so we must put the rest of + %% the code (if nonempty) in a new block! + if Is =:= [] -> + {I1, Is, State0}; + true -> + L1 = hipe_icode:label_name(hipe_icode:mk_new_label()), + I2 = hipe_icode:call_set_continuation(I1, L1), + State1 = set_bb_code(L1, Is, State0), + State2 = set_catches_in(L1, single_catch(C), State1), + State3 = rewrite_bb(L1, [C], Is, State2, Map), + {I2, [], State3} + end; + _ when Is =:= [] -> + %% Something is very wrong if Is is not empty here. A call + %% with a fail label should have ended its basic block. + {I, Is, State0} + end; + false -> + %% Make sure that the fail label is not set. + I1 = hipe_icode:call_set_fail_label(I, []), + {I1, Is, State0} + end + end. + +update_fail(I, Is, C, State, _Map) -> + case hipe_icode:fail_label(I) of + [] -> + {hipe_icode:fail_set_label(I, top_of_stack(C)), Is, State}; + _ -> + {I, Is, State} + end. + + +%%--------------------------------------------------------------------- +%% Abstraction for sets of catch stacks. + +%% This is the bottom element +no_catches() -> []. + +%% A singleton set +single_catch(C) -> [C]. + +%% A single, empty stack +empty_stack() -> []. + +%% Getting the label to fail to +top_of_stack([C|_]) -> C; +top_of_stack([]) -> []. % nil is used in Icode for "no label" + +join_catches(Cs1, Cs2) -> + ordsets:union(Cs1, Cs2). + +list_of_catches(Cs) -> Cs. + +%% Note that prepending an element to all elements in the list will +%% preserve the ordering of the list, and will never make two existing +%% elements become identical, so the list is still an ordset. + +push_catch(L, []) -> + [[L]]; +push_catch(L, Cs) -> + push_catch_1(L, Cs). + +push_catch_1(L, [C|Cs]) -> + [[L|C] | push_catch_1(L, Cs)]; +push_catch_1(_L, []) -> + []. + +%% However, after discarding the head of all elements, the list +%% is no longer an ordset, and must be processed. + +pop_catch(Cs) -> + ordsets:from_list(pop_catch_1(Cs)). + +pop_catch_1([[_|C] | Cs]) -> + [C | pop_catch_1(Cs)]; +pop_catch_1([]) -> + []. + + +%%--------------------------------------------------------------------- +%% Mapping from catch-stacks to renamings on labels. + +new_mapping() -> + gb_trees:empty(). + +set_mapping(C, L0, L1, Map) -> + Dict = case gb_trees:lookup(C, Map) of + {value, Dict0} -> + gb_trees:enter(L0, L1, Dict0); + none -> + gb_trees:insert(L0, L1, gb_trees:empty()) + end, + gb_trees:enter(C, Dict, Map). + +%% Return a label renaming function for a particular catch-stack + +get_renaming(C, Map) -> + case gb_trees:lookup(C, Map) of + {value, Dict} -> + fun (L0) -> + case gb_trees:lookup(L0, Dict) of + {value, L1} -> L1; + none -> L0 + end + end; + none -> + fun (L0) -> L0 end + end. + + +%%--------------------------------------------------------------------- +%% State abstraction + +-record(state, {cfg :: #cfg{}, + changed = false :: boolean(), + succ :: #cfg{}, + pred :: #cfg{}, + start_labels :: [icode_lbl(),...], + visited = hipe_icode_cfg:none_visited() :: gb_set(), + out = gb_trees:empty() :: gb_tree(), + in = gb_trees:empty() :: gb_tree() + }). + +init_state(CFG) -> + State = #state{cfg = CFG}, + refresh_state_cache(State). + +refresh_state_cache(State) -> + CFG = State#state.cfg, + SLs = [hipe_icode_cfg:start_label(CFG)], + State#state{succ = CFG, pred = CFG, start_labels = SLs}. + +get_cfg(State) -> + State#state.cfg. + +get_start_labels(State) -> + State#state.start_labels. + +get_pred(L, State) -> + hipe_icode_cfg:pred(State#state.pred, L). + +get_succ(L, State) -> + hipe_icode_cfg:succ(State#state.succ, L). + +set_changed(State) -> + State#state{changed = true}. + +is_changed(State) -> + State#state.changed. + +clear_changed(State) -> + State#state{changed = false}. + +set_catches_out(L, Cs, State) -> + State#state{out = gb_trees:enter(L, Cs, State#state.out)}. + +get_catches_out(L, State) -> + case gb_trees:lookup(L, State#state.out) of + {value, Cs} -> Cs; + none -> no_catches() + end. + +set_catches_in(L, Cs, State) -> + State#state{in = gb_trees:enter(L, Cs, State#state.in)}. + +get_catches_in(L, State) -> + case gb_trees:lookup(L, State#state.in) of + {value, Cs} -> Cs; + none -> no_catches() + end. + +set_visited(L, State) -> + State#state{visited = hipe_icode_cfg:visit(L, State#state.visited)}. + +is_visited(L, State) -> + hipe_icode_cfg:is_visited(L, State#state.visited). + +clear_visited(State) -> + State#state{visited = hipe_icode_cfg:none_visited()}. + +get_bb_code(L, State) -> + hipe_bb:code(hipe_icode_cfg:bb(State#state.cfg, L)). + +set_bb_code(L, Code, State) -> + CFG = State#state.cfg, + CFG1 = hipe_icode_cfg:bb_add(CFG, L, hipe_bb:mk_bb(Code)), + refresh_state_cache(State#state{cfg = CFG1}). + +get_new_catches_in(L, State) -> + Ps = get_pred(L, State), + Cs = case lists:member(L, get_start_labels(State)) of + true -> single_catch(empty_stack()); + false -> no_catches() + end, + get_new_catches_in(Ps, Cs, State). + +get_new_catches_in([P | Ps], Cs, State) -> + Cs1 = join_catches(Cs, get_catches_out(P, State)), + get_new_catches_in(Ps, Cs1, State); +get_new_catches_in([], Cs, _) -> + Cs. + +%%--------------------------------------------------------------------- diff --git a/lib/hipe/icode/hipe_icode_fp.erl b/lib/hipe/icode/hipe_icode_fp.erl new file mode 100644 index 0000000000..a2ca6132d1 --- /dev/null +++ b/lib/hipe/icode/hipe_icode_fp.erl @@ -0,0 +1,1043 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2003-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%%-------------------------------------------------------------------- +%% File : hipe_icode_fp.erl +%% Author : Tobias Lindahl +%% Description : One pass analysis to find floating point values. +%% Mapping to FP variables and creation of FP EBBs. +%% +%% Created : 23 Apr 2003 by Tobias Lindahl +%%-------------------------------------------------------------------- + +-module(hipe_icode_fp). + +-export([cfg/1]). + +-include("hipe_icode.hrl"). +-include("../flow/cfg.hrl"). + +-record(state, {edge_map = gb_trees:empty() :: gb_tree(), + fp_ebb_map = gb_trees:empty() :: gb_tree(), + cfg :: #cfg{}}). + +%%-------------------------------------------------------------------- + +-spec cfg(#cfg{}) -> #cfg{}. + +cfg(Cfg) -> + %%hipe_icode_cfg:pp(Cfg), + NewCfg = annotate_fclearerror(Cfg), + State = new_state(NewCfg), + NewState = place_fp_blocks(State), + %% hipe_icode_cfg:pp(state__cfg(NewState)), + NewState2 = finalize(NewState), + NewCfg1 = state__cfg(NewState2), + %% hipe_icode_cfg:pp(NewCfg1), + NewCfg2 = unannotate_fclearerror(NewCfg1), + NewCfg2. + +%%-------------------------------------------------------------------- +%% Annotate fclearerror with information of the fail label of the +%% corresponding fcheckerror. +%%-------------------------------------------------------------------- + +annotate_fclearerror(Cfg) -> + Labels = hipe_icode_cfg:reverse_postorder(Cfg), + annotate_fclearerror(Labels, Cfg). + +annotate_fclearerror([Label|Left], Cfg) -> + BB = hipe_icode_cfg:bb(Cfg, Label), + Code = hipe_bb:code(BB), + NewCode = annotate_fclearerror1(Code, Label, Cfg, []), + NewBB = hipe_bb:code_update(BB, NewCode), + NewCfg = hipe_icode_cfg:bb_add(Cfg, Label, NewBB), + annotate_fclearerror(Left, NewCfg); +annotate_fclearerror([], Cfg) -> + Cfg. + +annotate_fclearerror1([I|Left], Label, Cfg, Acc) -> + case I of + #icode_call{} -> + case hipe_icode:call_fun(I) of + fclearerror -> + Fail = lookahead_for_fcheckerror(Left, Label, Cfg), + NewI = hipe_icode:call_fun_update(I, {fclearerror, Fail}), + annotate_fclearerror1(Left, Label, Cfg, [NewI|Acc]); + _ -> + annotate_fclearerror1(Left, Label, Cfg, [I|Acc]) + end; + _ -> + annotate_fclearerror1(Left, Label, Cfg, [I|Acc]) + end; +annotate_fclearerror1([], _Label, _Cfg, Acc) -> + lists:reverse(Acc). + +lookahead_for_fcheckerror([I|Left], Label, Cfg) -> + case I of + #icode_call{} -> + case hipe_icode:call_fun(I) of + fcheckerror -> + hipe_icode:call_fail_label(I); + _ -> + lookahead_for_fcheckerror(Left, Label, Cfg) + end; + _ -> + lookahead_for_fcheckerror(Left, Label, Cfg) + end; +lookahead_for_fcheckerror([], Label, Cfg) -> + case hipe_icode_cfg:succ(Cfg, Label) of + [] -> exit("Unterminated fp ebb"); + SuccList -> + Succ = hd(SuccList), + Code = hipe_bb:code(hipe_icode_cfg:bb(Cfg, Label)), + lookahead_for_fcheckerror(Code, Succ, Cfg) + end. + +unannotate_fclearerror(Cfg) -> + Labels = hipe_icode_cfg:reverse_postorder(Cfg), + unannotate_fclearerror(Labels, Cfg). + +unannotate_fclearerror([Label|Left], Cfg) -> + BB = hipe_icode_cfg:bb(Cfg, Label), + Code = hipe_bb:code(BB), + NewCode = unannotate_fclearerror1(Code, []), + NewBB = hipe_bb:code_update(BB, NewCode), + NewCfg = hipe_icode_cfg:bb_add(Cfg, Label, NewBB), + unannotate_fclearerror(Left, NewCfg); +unannotate_fclearerror([], Cfg) -> + Cfg. + +unannotate_fclearerror1([I|Left], Acc) -> + case I of + #icode_call{} -> + case hipe_icode:call_fun(I) of + {fclearerror, _Fail} -> + NewI = hipe_icode:call_fun_update(I, fclearerror), + unannotate_fclearerror1(Left, [NewI|Acc]); + _ -> + unannotate_fclearerror1(Left, [I|Acc]) + end; + _ -> + unannotate_fclearerror1(Left, [I|Acc]) + end; +unannotate_fclearerror1([], Acc) -> + lists:reverse(Acc). + +%%-------------------------------------------------------------------- +%% Make float EBBs +%%-------------------------------------------------------------------- + +place_fp_blocks(State) -> + WorkList = new_worklist(State), + transform_block(WorkList, State). + +transform_block(WorkList, State) -> + case get_work(WorkList) of + none -> + State; + {Label, NewWorkList} -> + %%io:format("Handling ~w \n", [Label]), + BB = state__bb(State, Label), + Code1 = hipe_bb:butlast(BB), + Last = hipe_bb:last(BB), + NofPreds = length(state__pred(State, Label)), + Map = state__map(State, Label), + FilteredMap = filter_map(Map, NofPreds), + {Prelude, NewFilteredMap} = do_prelude(FilteredMap), + + %% Take care to have a map without any new bindings from the + %% last instruction if it can fail. + {FailMap, NewCode1} = transform_instrs(Code1, Map, NewFilteredMap, []), + {NewMap, NewCode2} = transform_instrs([Last], Map, FailMap, []), + SuccSet0 = ordsets:from_list(hipe_icode:successors(Last)), + FailSet = ordsets:from_list(hipe_icode:fails_to(Last)), + SuccSet = ordsets:subtract(SuccSet0, FailSet), + NewCode = NewCode1 ++ NewCode2, + NewBB = hipe_bb:code_update(BB, Prelude++NewCode), + NewState = state__bb_add(State, Label, NewBB), + case update_maps(NewState, Label, SuccSet, NewMap, FailSet, FailMap) of + fixpoint -> + transform_block(NewWorkList, NewState); + {NewState1, AddBlocks} -> + NewWorkList1 = add_work(NewWorkList, AddBlocks), + transform_block(NewWorkList1, NewState1) + end + end. + +update_maps(State, Label, SuccSet, SuccMap, FailSet, FailMap) -> + {NewState, Add1} = update_maps(State, Label, SuccSet, SuccMap, []), + case update_maps(NewState, Label, FailSet, FailMap, Add1) of + {_NewState1, []} -> fixpoint; + {_NewState1, _Add} = Ret -> Ret + end. + +update_maps(State, From, [To|Left], Map, Acc) -> + case state__map_update(State, From, To, Map) of + fixpoint -> + update_maps(State, From, Left, Map, Acc); + NewState -> + update_maps(NewState, From, Left, Map, [To|Acc]) + end; +update_maps(State, _From, [], _Map, Acc) -> + {State, Acc}. + +transform_instrs([I|Left], PhiMap, Map, Acc) -> + Defines = hipe_icode:defines(I), + NewMap = delete_all(Defines, Map), + NewPhiMap = delete_all(Defines, PhiMap), + case I of + #icode_phi{} -> + Uses = hipe_icode:uses(I), + case [X || X <- Uses, lookup(X, PhiMap) =/= none] of + [] -> + %% No ordinary variables from the argument have been untagged. + transform_instrs(Left, NewPhiMap, NewMap, [I|Acc]); + Uses -> + %% All arguments are untagged. Let's untag the destination. + Dst = hipe_icode:phi_dst(I), + NewDst = hipe_icode:mk_new_fvar(), + NewMap1 = gb_trees:enter(Dst, NewDst, NewMap), + NewI = subst_phi_uncond(I, NewDst, PhiMap), + transform_instrs(Left, NewPhiMap, NewMap1, [NewI|Acc]); + _ -> + %% Some arguments are untagged. Keep the destination. + Dst = hipe_icode:phi_dst(I), + NewI = subst_phi(I, Dst, PhiMap), + transform_instrs(Left, NewPhiMap, NewMap, [NewI|Acc]) + end; + #icode_call{} -> + case hipe_icode:call_fun(I) of + X when X =:= unsafe_untag_float orelse X =:= conv_to_float -> + [Dst] = hipe_icode:defines(I), + case hipe_icode:uses(I) of + [] -> %% Constant + transform_instrs(Left, NewPhiMap, NewMap, [I|Acc]); + [Src] -> + case lookup(Src, Map) of + none -> + NewMap1 = gb_trees:enter(Src, {assigned, Dst}, NewMap), + transform_instrs(Left, NewPhiMap, NewMap1, [I|Acc]); + Dst -> + %% This is the instruction that untagged the variable. + %% Use old maps. + transform_instrs(Left, NewPhiMap, Map, [I|Acc]); + FVar -> + %% The variable was already untagged. + %% This instruction can be changed to a move. + NewI = hipe_icode:mk_move(Dst, FVar), + case hipe_icode:call_continuation(I) of + [] -> + transform_instrs(Left,NewPhiMap,NewMap,[NewI|Acc]); + ContLbl -> + Goto = hipe_icode:mk_goto(ContLbl), + transform_instrs(Left, NewPhiMap, NewMap, + [Goto, NewI|Acc]) + end + end + end; + unsafe_tag_float -> + [Dst] = hipe_icode:defines(I), + [Src] = hipe_icode:uses(I), + NewMap1 = gb_trees:enter(Dst, {assigned, Src}, NewMap), + transform_instrs(Left, NewPhiMap, NewMap1,[I|Acc]); + _ -> + {NewMap1, NewAcc} = check_for_fop_candidates(I, NewMap, Acc), + transform_instrs(Left, NewPhiMap, NewMap1, NewAcc) + end; + _ -> + NewIns = handle_untagged_arguments(I, NewMap), + transform_instrs(Left, NewPhiMap, NewMap, NewIns ++ Acc) + end; +transform_instrs([], _PhiMap, Map, Acc) -> + {Map, lists:reverse(Acc)}. + +check_for_fop_candidates(I, Map, Acc) -> + case is_fop_cand(I) of + false -> + NewIs = handle_untagged_arguments(I, Map), + {Map, NewIs ++ Acc}; + true -> + Fail = hipe_icode:call_fail_label(I), + Cont = hipe_icode:call_continuation(I), + Op = fun_to_fop(hipe_icode:call_fun(I)), + case Fail of + [] -> + Args = hipe_icode:args(I), + ConstArgs = [X || X <- Args, hipe_icode:is_const(X)], + try lists:foreach(fun(X) -> float(hipe_icode:const_value(X)) end, + ConstArgs) of + ok -> + %%io:format("Changing ~w to ~w\n", [hipe_icode:call_fun(I), Op]), + Uses = hipe_icode:uses(I), + Defines = hipe_icode:defines(I), + Convs = [X||X <- remove_duplicates(Uses), lookup(X,Map) =:= none], + NewMap0 = add_new_bindings_assigned(Convs, Map), + NewMap = add_new_bindings_unassigned(Defines, NewMap0), + ConvIns = get_conv_instrs(Convs, NewMap), + NewI = hipe_icode:mk_primop(lookup_list(Defines, NewMap), Op, + lookup_list_keep_consts(Args,NewMap), + Cont, Fail), + NewI2 = conv_consts(ConstArgs, NewI), + {NewMap, [NewI2|ConvIns]++Acc} + catch + error:badarg -> + %% This instruction will fail at runtime. The warning + %% should already have happened in hipe_icode_type. + NewIs = handle_untagged_arguments(I, Map), + {Map, NewIs ++ Acc} + end; + _ -> %% Bailing out! Can't handle instructions in catches (yet). + NewIs = handle_untagged_arguments(I, Map), + {Map, NewIs ++ Acc} + end + end. + + +%% If this is an instruction that needs to operate on tagged values, +%% which currently are untagged, we must tag the values and perhaps +%% end the fp ebb. + +handle_untagged_arguments(I, Map) -> + case [X || X <- hipe_icode:uses(I), must_be_tagged(X, Map)] of + [] -> + [I]; + Tag -> + TagIntrs = + [hipe_icode:mk_primop([Dst], unsafe_tag_float, + [gb_trees:get(Dst, Map)]) || Dst <- Tag], + [I|TagIntrs] + end. + +%% Add phi nodes for untagged fp values. + +do_prelude(Map) -> + case gb_trees:lookup(phi, Map) of + none -> + {[], Map}; + {value, List} -> + %%io:format("Adding phi: ~w\n", [List]), + Fun = fun ({FVar, Bindings}, Acc) -> + [hipe_icode:mk_phi(FVar, Bindings)|Acc] + end, + {lists:foldl(Fun, [], List), gb_trees:delete(phi, Map)} + end. + +split_code(Code) -> + split_code(Code, []). + +split_code([I], Acc) -> + {lists:reverse(Acc), I}; +split_code([I|Left], Acc) -> + split_code(Left, [I|Acc]). + + +%% When all code is mapped to fp instructions we must make sure that +%% the fp ebb information going into each block is the same as the +%% information coming out of each predecessor. Otherwise, we must add +%% a block in between. + +finalize(State) -> + Worklist = new_worklist(State), + NewState = place_error_handling(Worklist, State), + Edges = needs_fcheckerror(NewState), + finalize(Edges, NewState). + +finalize([{From, To}|Left], State) -> + NewState = add_fp_ebb_fixup(From, To, State), + finalize(Left, NewState); +finalize([], State) -> + State. + +needs_fcheckerror(State) -> + Cfg = state__cfg(State), + Labels = hipe_icode_cfg:labels(Cfg), + needs_fcheckerror(Labels, State, []). + +needs_fcheckerror([Label|Left], State, Acc) -> + case state__get_in_block_in(State, Label) of + {true, _} -> + needs_fcheckerror(Left, State, Acc); + false -> + Pred = state__pred(State, Label), + case [X || X <- Pred, state__get_in_block_out(State, X) =/= false] of + [] -> + needs_fcheckerror(Left, State, Acc); + NeedsFcheck -> + case length(Pred) =:= length(NeedsFcheck) of + true -> + %% All edges need fcheckerror. Add this to the + %% beginning of the block instead. + needs_fcheckerror(Left, State, [{none, Label}|Acc]); + false -> + Edges = [{X, Label} || X <- NeedsFcheck], + needs_fcheckerror(Left, State, Edges ++ Acc) + end + end + end; +needs_fcheckerror([], _State, Acc) -> + Acc. + +add_fp_ebb_fixup('none', To, State) -> + %% Add the fcheckerror to the start of the block. + BB = state__bb(State, To), + Code = hipe_bb:code(BB), + Phis = lists:takewhile(fun(X) -> hipe_icode:is_phi(X) end, Code), + TailCode = lists:dropwhile(fun(X) -> hipe_icode:is_phi(X) end, Code), + FC = hipe_icode:mk_primop([], fcheckerror, []), + NewCode = Phis ++ [FC|TailCode], + state__bb_add(State, To, hipe_bb:code_update(BB, NewCode)); +add_fp_ebb_fixup(From, To, State) -> + FCCode = [hipe_icode:mk_primop([], fcheckerror, [], To, [])], + FCBB = hipe_bb:mk_bb(FCCode), + FCLabel = hipe_icode:label_name(hipe_icode:mk_new_label()), + NewState = state__bb_add(State, FCLabel, FCBB), + NewState1 = state__redirect(NewState, From, To, FCLabel), + ToBB = state__bb(NewState, To), + ToCode = hipe_bb:code(ToBB), + NewToCode = redirect_phis(ToCode, From, FCLabel), + NewToBB = hipe_bb:code_update(ToBB, NewToCode), + state__bb_add(NewState1, To, NewToBB). + +redirect_phis(Code, OldFrom, NewFrom) -> + redirect_phis(Code, OldFrom, NewFrom, []). + +redirect_phis([I|Is] = Code, OldFrom, NewFrom, Acc) -> + case I of + #icode_phi{} -> + NewI = hipe_icode:phi_redirect_pred(I, OldFrom, NewFrom), + redirect_phis(Is, OldFrom, NewFrom, [NewI|Acc]); + _ -> + lists:reverse(Acc) ++ Code + end; +redirect_phis([], _OldFrom, _NewFrom, Acc) -> + lists:reverse(Acc). + +subst_phi(I, Dst, Map) -> + ArgList = subst_phi_uses0(hipe_icode:phi_arglist(I), Map, []), + hipe_icode:mk_phi(Dst, ArgList). + +subst_phi_uses0([{Pred, Var}|Left], Map, Acc) -> + case gb_trees:lookup(Var, Map) of + {value, List} -> + case lists:keyfind(Pred, 1, List) of + {Pred, {assigned, _NewVar}} -> + %% The variable is untagged, but it has been assigned. Keep it! + subst_phi_uses0(Left, Map, [{Pred, Var} | Acc]); + {Pred, _NewVar} = PredNV -> + %% The variable is untagged and it has never been assigned as tagged. + subst_phi_uses0(Left, Map, [PredNV | Acc]); + false -> + %% The variable is not untagged. + subst_phi_uses0(Left, Map, [{Pred, Var} | Acc]) + end; + none -> + %% The variable is not untagged. + subst_phi_uses0(Left, Map, [{Pred, Var} | Acc]) + end; +subst_phi_uses0([], _Map, Acc) -> + Acc. + +subst_phi_uncond(I, Dst, Map) -> + ArgList = subst_phi_uses_uncond0(hipe_icode:phi_arglist(I), Map, []), + hipe_icode:mk_phi(Dst, ArgList). + +subst_phi_uses_uncond0([{Pred, Var}|Left], Map, Acc) -> + case gb_trees:lookup(Var, Map) of + {value, List} -> + case lists:keyfind(Pred, 1, List) of + {Pred, {assigned, NewVar}} -> + %% The variable is untagged! + subst_phi_uses_uncond0(Left, Map, [{Pred, NewVar} | Acc]); + {Pred, _NewVar} = PredNV -> + %% The variable is untagged! + subst_phi_uses_uncond0(Left, Map, [PredNV | Acc]); + false -> + %% The variable is not untagged. + subst_phi_uses_uncond0(Left, Map, [{Pred, Var} | Acc]) + end; + none -> + %% The variable is not untagged. + subst_phi_uses_uncond0(Left, Map, [{Pred, Var} | Acc]) + end; +subst_phi_uses_uncond0([], _Map, Acc) -> + Acc. + +place_error_handling(WorkList, State) -> + case get_work(WorkList) of + none -> + State; + {Label, NewWorkList} -> + BB = state__bb(State, Label), + Code = hipe_bb:code(BB), + case state__join_in_block(State, Label) of + fixpoint -> + place_error_handling(NewWorkList, State); + {NewState, NewInBlock} -> + {NewCode1, InBlockOut} = place_error(Code, NewInBlock, []), + Succ = state__succ(NewState, Label), + NewCode2 = handle_unchecked_end(Succ, NewCode1, InBlockOut), + NewBB = hipe_bb:code_update(BB, NewCode2), + NewState1 = state__bb_add(NewState, Label, NewBB), + NewState2 = state__in_block_out_update(NewState1, Label, InBlockOut), + NewWorkList1 = add_work(NewWorkList, Succ), + place_error_handling(NewWorkList1, NewState2) + end + end. + +place_error([I|Left], InBlock, Acc) -> + case I of + #icode_call{} -> + case hipe_icode:call_fun(I) of + X when X =:= fp_add; X =:= fp_sub; + X =:= fp_mul; X =:= fp_div; X =:= fnegate -> + case InBlock of + false -> + Clear = hipe_icode:mk_primop([], {fclearerror, []}, []), + place_error(Left, {true, []}, [I, Clear|Acc]); + {true, _} -> + place_error(Left, InBlock, [I|Acc]) + end; + unsafe_tag_float -> + case InBlock of + {true, Fail} -> + Check = hipe_icode:mk_primop([], fcheckerror, [], [], Fail), + place_error(Left, false, [I, Check|Acc]); + false -> + place_error(Left, InBlock, [I|Acc]) + end; + {fclearerror, Fail} -> + case InBlock of + {true, Fail} -> + %% We can remove this fclearerror! + case hipe_icode:call_continuation(I) of + [] -> + place_error(Left, InBlock, Acc); + Cont -> + place_error(Left, InBlock, [hipe_icode:mk_goto(Cont)|Acc]) + end; + {true, _OtherFail} -> + %% TODO: This can be handled but it requires breaking up + %% the BB in two. Currently this should not happen. + exit("Starting fp ebb with different fail label"); + false -> + place_error(Left, {true, Fail}, [I|Acc]) + end; + fcheckerror -> + case {true, hipe_icode:call_fail_label(I)} of + InBlock -> + %% No problem + place_error(Left, false, [I|Acc]); + NewInblock -> + exit({"Fcheckerror has the wrong fail label", + InBlock, NewInblock}) + end; + X when X =:= conv_to_float; X =:= unsafe_untag_float -> + place_error(Left, InBlock, [I|Acc]); + _Other -> + case hipe_icode_primops:fails(hipe_icode:call_fun(I)) of + false -> + place_error(Left, InBlock, [I|Acc]); + true -> + case InBlock of + {true, Fail} -> + Check = hipe_icode:mk_primop([], fcheckerror, [], [], Fail), + place_error(Left, false, [I, Check|Acc]); + false -> + place_error(Left, InBlock, [I|Acc]) + end + end + end; + #icode_fail{} -> + place_error_1(I, Left, InBlock, Acc); + #icode_return{} -> + place_error_1(I, Left, InBlock, Acc); + #icode_enter{} -> + place_error_1(I, Left, InBlock, Acc); + Other -> + case instr_allowed_in_fp_ebb(Other) of + true -> + place_error(Left, InBlock, [I|Acc]); + false -> + case InBlock of + {true, []} -> + Check = hipe_icode:mk_primop([], fcheckerror, []), + place_error(Left, false, [I, Check|Acc]); + {true, _} -> + exit({"Illegal instruction in caught fp ebb", I}); + false -> + place_error(Left, InBlock, [I|Acc]) + end + end + end; +place_error([], InBlock, Acc) -> + {lists:reverse(Acc), InBlock}. + +place_error_1(I, Left, InBlock, Acc) -> + case InBlock of + {true, []} -> + Check = hipe_icode:mk_primop([], fcheckerror, []), + place_error(Left, false, [I, Check|Acc]); + {true, _} -> + exit({"End of control flow in caught fp ebb", I}); + false -> + place_error(Left, InBlock, [I|Acc]) + end. + +%% If the block has no successors and we still are in a fp ebb we must +%% end it to make sure we don't have any unchecked fp exceptions. + +handle_unchecked_end(Succ, Code, InBlock) -> + case Succ of + [] -> + case InBlock of + {true, []} -> + {TopCode, Last} = split_code(Code), + NewI = hipe_icode:mk_primop([], fcheckerror, []), + TopCode ++ [NewI, Last]; + false -> + Code + end; + _ -> + Code + end. + +instr_allowed_in_fp_ebb(Instr) -> + case Instr of + #icode_comment{} -> true; + #icode_goto{} -> true; + #icode_if{} -> true; + #icode_move{} -> true; + #icode_phi{} -> true; + #icode_begin_handler{} -> true; + #icode_switch_tuple_arity{} -> true; + #icode_switch_val{} -> true; + #icode_type{} -> true; + _ -> false + end. + +%%============================================================= +%% Help functions +%%============================================================= + +%% ------------------------------------------------------------ +%% Handling the gb_tree + +delete_all([Key|Left], Tree) -> + delete_all(Left, gb_trees:delete_any(Key, Tree)); +delete_all([], Tree) -> + Tree. + +lookup_list(List, Info) -> + lookup_list(List, fun lookup/2, Info, []). + +lookup_list([H|T], Fun, Info, Acc) -> + lookup_list(T, Fun, Info, [Fun(H, Info)|Acc]); +lookup_list([], _, _, Acc) -> + lists:reverse(Acc). + +lookup(Key, Tree) -> + case hipe_icode:is_const(Key) of + %% This can be true if the same constant has been + %% untagged more than once + true -> none; + false -> + case gb_trees:lookup(Key, Tree) of + none -> none; + {value, {assigned, Val}} -> Val; + {value, Val} -> Val + end + end. + +lookup_list_keep_consts(List, Info) -> + lookup_list(List, fun lookup_keep_consts/2, Info, []). + +lookup_keep_consts(Key, Tree) -> + case hipe_icode:is_const(Key) of + true -> Key; + false -> + case gb_trees:lookup(Key, Tree) of + none -> none; + {value, {assigned, Val}} -> Val; + {value, Val} -> Val + end + end. + +get_type(Var) -> + case hipe_icode:is_const(Var) of + true -> erl_types:t_from_term(hipe_icode:const_value(Var)); + false -> + case hipe_icode:is_annotated_variable(Var) of + true -> + {type_anno, Type, _} = hipe_icode:variable_annotation(Var), + Type +%%% false -> erl_types:t_any() + end + end. + +%% ------------------------------------------------------------ +%% Handling the map from variables to fp-variables + +join_maps(Edges, EdgeMap) -> + join_maps(Edges, EdgeMap, gb_trees:empty()). + +join_maps([Edge = {Pred, _}|Left], EdgeMap, Map) -> + case gb_trees:lookup(Edge, EdgeMap) of + none -> + %% All predecessors have not been handled. Use empty map. + gb_trees:empty(); + {value, OldMap} -> + NewMap = join_maps0(gb_trees:to_list(OldMap), Pred, Map), + join_maps(Left, EdgeMap, NewMap) + end; +join_maps([], _, Map) -> + Map. + +join_maps0([{phi, _}|Tail], Pred, Map) -> + join_maps0(Tail, Pred, Map); +join_maps0([{Var, FVar}|Tail], Pred, Map) -> + case gb_trees:lookup(Var, Map) of + none -> + join_maps0(Tail, Pred, gb_trees:enter(Var, [{Pred, FVar}], Map)); + {value, List} -> + case lists:keyfind(Pred, 1, List) of + false -> + join_maps0(Tail, Pred, gb_trees:update(Var, [{Pred, FVar}|List], Map)); + {Pred, FVar} -> + %% No problem. + join_maps0(Tail, Pred, Map); + _ -> + exit('New binding to same variable') + end + end; +join_maps0([], _, Map) -> + Map. + +filter_map(Map, NofPreds) -> + filter_map(gb_trees:to_list(Map), NofPreds, Map). + +filter_map([{Var, Bindings}|Left], NofPreds, Map) -> + case length(Bindings) =:= NofPreds of + true -> + case all_args_equal(Bindings) of + true -> + {_, FVar} = hd(Bindings), + filter_map(Left, NofPreds, gb_trees:update(Var, FVar, Map)); + false -> + PhiDst = hipe_icode:mk_new_fvar(), + PhiArgs = strip_of_assigned(Bindings), + NewMap = + case gb_trees:lookup(phi, Map) of + none -> + gb_trees:insert(phi, [{PhiDst, PhiArgs}], Map); + {value, Val} -> + gb_trees:update(phi, [{PhiDst, PhiArgs}|Val], Map) + end, + NewBinding = + case bindings_are_assigned(Bindings) of + true -> {assigned, PhiDst}; + false -> PhiDst + end, + filter_map(Left, NofPreds, gb_trees:update(Var, NewBinding, NewMap)) + end; + false -> + filter_map(Left, NofPreds, gb_trees:delete(Var, Map)) + end; +filter_map([], _NofPreds, Map) -> + Map. + +bindings_are_assigned([{_, {assigned, _}}|Left]) -> + assert_assigned(Left), + true; +bindings_are_assigned(Bindings) -> + assert_not_assigned(Bindings), + false. + +assert_assigned([{_, {assigned, _}}|Left]) -> + assert_assigned(Left); +assert_assigned([]) -> + ok. + +assert_not_assigned([{_, FVar}|Left]) -> + true = hipe_icode:is_fvar(FVar), + assert_not_assigned(Left); +assert_not_assigned([]) -> + ok. + +%% all_args_equal returns true if the mapping for a variable is the +%% same from all predecessors, i.e., we do not need a phi-node. + +all_args_equal([{_, FVar}|Left]) -> + all_args_equal(Left, FVar). + +all_args_equal([{_, FVar1}|Left], FVar1) -> + all_args_equal(Left, FVar1); +all_args_equal([], _) -> + true; +all_args_equal(_, _) -> + false. + + +%% We differentiate between values that have been assigned as +%% tagged variables and those that got a 'virtual' binding. + +add_new_bindings_unassigned([Var|Left], Map) -> + FVar = hipe_icode:mk_new_fvar(), + add_new_bindings_unassigned(Left, gb_trees:insert(Var, FVar, Map)); +add_new_bindings_unassigned([], Map) -> + Map. + +add_new_bindings_assigned([Var|Left], Map) -> + case lookup(Var, Map) of + none -> + FVar = hipe_icode:mk_new_fvar(), + NewMap = gb_trees:insert(Var, {assigned, FVar}, Map), + add_new_bindings_assigned(Left, NewMap); + _ -> + add_new_bindings_assigned(Left, Map) + end; +add_new_bindings_assigned([], Map) -> + Map. + +strip_of_assigned(List) -> + strip_of_assigned(List, []). + +strip_of_assigned([{Pred, {assigned, Val}}|Left], Acc) -> + strip_of_assigned(Left, [{Pred, Val}|Acc]); +strip_of_assigned([Tuple|Left], Acc) -> + strip_of_assigned(Left, [Tuple|Acc]); +strip_of_assigned([], Acc) -> + Acc. + +%% ------------------------------------------------------------ +%% Help functions for the transformation from ordinary instruction to +%% fp-instruction + +is_fop_cand(I) -> + case hipe_icode:call_fun(I) of + '/' -> true; + Fun -> + case fun_to_fop(Fun) of + false -> false; + _ -> any_is_float(hipe_icode:args(I)) + end + end. + +any_is_float(Vars) -> + lists:any(fun (V) -> erl_types:t_is_float(get_type(V)) end, Vars). + +remove_duplicates(List) -> + remove_duplicates(List, []). + +remove_duplicates([X|Left], Acc) -> + case lists:member(X, Acc) of + true -> + remove_duplicates(Left, Acc); + false -> + remove_duplicates(Left, [X|Acc]) + end; +remove_duplicates([], Acc) -> + Acc. + +fun_to_fop(Fun) -> + case Fun of + '+' -> fp_add; + '-' -> fp_sub; + '*' -> fp_mul; + '/' -> fp_div; + _ -> false + end. + + +%% If there is a tagged version of this variable available we don't +%% have to tag the untagged version. + +must_be_tagged(Var, Map) -> + case gb_trees:lookup(Var, Map) of + none -> false; + {value, {assigned, _}} -> false; + {value, Val} -> hipe_icode:is_fvar(Val) + end. + + +%% Converting to floating point variables + +get_conv_instrs(Vars, Map) -> + get_conv_instrs(Vars, Map, []). + +get_conv_instrs([Var|Left], Map, Acc) -> + {_, Dst} = gb_trees:get(Var, Map), + NewI = + case erl_types:t_is_float(get_type(Var)) of + true -> + [hipe_icode:mk_primop([Dst], unsafe_untag_float, [Var])]; + false -> + [hipe_icode:mk_primop([Dst], conv_to_float, [Var])] + end, + get_conv_instrs(Left, Map, NewI++Acc); +get_conv_instrs([], _, Acc) -> + Acc. + + +conv_consts(ConstArgs, I) -> + conv_consts(ConstArgs, I, []). + +conv_consts([Const|Left], I, Subst) -> + NewConst = hipe_icode:mk_const(float(hipe_icode:const_value(Const))), + conv_consts(Left, I, [{Const, NewConst}|Subst]); +conv_consts([], I, Subst) -> + hipe_icode:subst_uses(Subst, I). + + +%% _________________________________________________________________ +%% +%% Handling the state +%% + +new_state(Cfg) -> + #state{cfg = Cfg}. + +state__cfg(#state{cfg = Cfg}) -> + Cfg. + +state__succ(#state{cfg = Cfg}, Label) -> + hipe_icode_cfg:succ(Cfg, Label). + +state__pred(#state{cfg = Cfg}, Label) -> + hipe_icode_cfg:pred(Cfg, Label). + +state__redirect(S = #state{cfg = Cfg}, From, ToOld, ToNew) -> + NewCfg = hipe_icode_cfg:redirect(Cfg, From, ToOld, ToNew), + S#state{cfg=NewCfg}. + +state__bb(#state{cfg = Cfg}, Label) -> + hipe_icode_cfg:bb(Cfg, Label). + +state__bb_add(S = #state{cfg = Cfg}, Label, BB) -> + NewCfg = hipe_icode_cfg:bb_add(Cfg, Label, BB), + S#state{cfg = NewCfg}. + +state__map(S = #state{edge_map = EM}, To) -> + join_maps([{From, To} || From <- state__pred(S, To)], EM). + +state__map_update(S = #state{edge_map = EM}, From, To, Map) -> + FromTo = {From, To}, + MapChanged = + case gb_trees:lookup(FromTo, EM) of + {value, Map1} -> not match(Map1, Map); + none -> true + end, + case MapChanged of + true -> + NewEM = gb_trees:enter(FromTo, Map, EM), + S#state{edge_map = NewEM}; + false -> + fixpoint + end. + +state__join_in_block(S = #state{fp_ebb_map = Map}, Label) -> + Pred = state__pred(S, Label), + Edges = [{X, Label} || X <- Pred], + NewInBlock = join_in_block([gb_trees:lookup(X, Map) || X <- Edges]), + InBlockLabel = {inblock_in, Label}, + case gb_trees:lookup(InBlockLabel, Map) of + none -> + NewMap = gb_trees:insert(InBlockLabel, NewInBlock, Map), + {S#state{fp_ebb_map = NewMap}, NewInBlock}; + {value, NewInBlock} -> + fixpoint; + _Other -> + NewMap = gb_trees:update(InBlockLabel, NewInBlock, Map), + {S#state{fp_ebb_map = NewMap}, NewInBlock} + end. + +state__in_block_out_update(S = #state{fp_ebb_map = Map}, Label, NewInBlock) -> + Succ = state__succ(S, Label), + Edges = [{Label, X} || X <- Succ], + NewMap = update_edges(Edges, NewInBlock, Map), + NewMap1 = gb_trees:enter({inblock_out, Label}, NewInBlock, NewMap), + S#state{fp_ebb_map = NewMap1}. + +update_edges([Edge|Left], NewInBlock, Map) -> + NewMap = gb_trees:enter(Edge, NewInBlock, Map), + update_edges(Left, NewInBlock, NewMap); +update_edges([], _NewInBlock, NewMap) -> + NewMap. + +join_in_block([]) -> + false; +join_in_block([none|_]) -> + false; +join_in_block([{value, InBlock}|Left]) -> + join_in_block(Left, InBlock). + +join_in_block([none|_], _Current) -> + false; +join_in_block([{value, InBlock}|Left], Current) -> + if Current =:= InBlock -> join_in_block(Left, Current); + Current =:= false -> false; + InBlock =:= false -> false; + true -> exit("Basic block is in two different fp ebb:s") + end; +join_in_block([], Current) -> + Current. + + +state__get_in_block_in(#state{fp_ebb_map = Map}, Label) -> + gb_trees:get({inblock_in, Label}, Map). + +state__get_in_block_out(#state{fp_ebb_map = Map}, Label) -> + gb_trees:get({inblock_out, Label}, Map). + + +new_worklist(#state{cfg = Cfg}) -> + Start = hipe_icode_cfg:start_label(Cfg), + {[Start], [], gb_sets:insert(Start, gb_sets:empty())}. + +get_work({[Label|Left], List, Set}) -> + {Label, {Left, List, gb_sets:delete(Label, Set)}}; +get_work({[], [], _Set}) -> + none; +get_work({[], List, Set}) -> + get_work({lists:reverse(List), [], Set}). + +add_work({List1, List2, Set} = Work, [Label|Left]) -> + case gb_sets:is_member(Label, Set) of + true -> + add_work(Work, Left); + false -> + %% io:format("Added work: ~w\n", [Label]), + NewSet = gb_sets:insert(Label, Set), + add_work({List1, [Label|List2], NewSet}, Left) + end; +add_work(WorkList, []) -> + WorkList. + +match(Tree1, Tree2) -> + match_1(gb_trees:to_list(Tree1), Tree2) andalso + match_1(gb_trees:to_list(Tree2), Tree1). + +match_1([{Key, Val}|Left], Tree2) -> + case gb_trees:lookup(Key, Tree2) of + {value, Val} -> + match_1(Left, Tree2); + _ -> false + end; +match_1([], _) -> + true. diff --git a/lib/hipe/icode/hipe_icode_heap_test.erl b/lib/hipe/icode/hipe_icode_heap_test.erl new file mode 100644 index 0000000000..92d5f023fa --- /dev/null +++ b/lib/hipe/icode/hipe_icode_heap_test.erl @@ -0,0 +1,200 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2001-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Copyright (c) 2000 by Erik Johansson. All Rights Reserved +%% ==================================================================== +%% Filename : hipe_icode_heap_test.erl +%% Module : hipe_icode_heap_test +%% Purpose : +%% Notes : +%% History : * 2000-11-07 Erik Johansson (happi@it.uu.se): +%% Created. +%% +%% $Id$ +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +-module(hipe_icode_heap_test). + +-export([cfg/1]). + +-define(DO_ASSERT,true). + +-include("../main/hipe.hrl"). +-include("hipe_icode.hrl"). +-include("hipe_icode_primops.hrl"). +-include("../flow/cfg.hrl"). +-include("../rtl/hipe_literals.hrl"). + +%------------------------------------------------------------------------- + +-spec cfg(#cfg{}) -> #cfg{}. + +cfg(CFG) -> + Icode = hipe_icode_cfg:cfg_to_linear(CFG), + Code = hipe_icode:icode_code(Icode), + ActualVmax = hipe_icode:highest_var(Code), + ActualLmax = hipe_icode:highest_label(Code), + hipe_gensym:set_label(icode, ActualLmax+1), + hipe_gensym:set_var(icode, ActualVmax+1), + EBBs = hipe_icode_ebb:cfg(CFG), + {EBBcode,_Visited} = ebbs(EBBs, [], CFG), + NewCode = add_gc_tests(EBBcode), + NewIcode = hipe_icode:icode_code_update(Icode, NewCode), + NewCFG = hipe_icode_cfg:linear_to_cfg(NewIcode), + %% hipe_icode_cfg:pp(NewCFG), + NewCFG. + +ebbs([EBB|EBBs], Visited, CFG) -> + case hipe_icode_ebb:type(EBB) of + node -> + L = hipe_icode_ebb:node_label(EBB), + case visited(L, Visited) of + true -> + ebbs(EBBs, Visited, CFG); + false -> + EBBCode = hipe_bb:code(hipe_icode_cfg:bb(CFG, L)), + case hipe_icode_ebb:node_successors(EBB) of + [Succ|Succs] -> + {[SuccCode|More], Visited1} = + ebbs([Succ], [L|Visited], CFG), + {[OtherCode|MoreOther], Visited2} = + ebbs(Succs ++ EBBs, Visited1, CFG), + {[[hipe_icode:mk_label(L)|EBBCode] ++ SuccCode| + More] ++ [OtherCode|MoreOther], + Visited2}; + [] -> + {OtherCode, Visited1} = ebbs(EBBs, [L|Visited], CFG), + {[[hipe_icode:mk_label(L)|EBBCode] | OtherCode], Visited1} + end + end; + leaf -> + ebbs(EBBs, Visited, CFG) + end; +ebbs([], Visited,_) -> + {[[]], Visited}. + +visited(L, Visited) -> + lists:member(L, Visited). + +add_gc_tests([[]|EBBCodes]) -> add_gc_tests(EBBCodes); +add_gc_tests([EBBCode|EBBCodes]) -> + case need(EBBCode, 0, []) of + {Need, RestCode, [Lbl|Code]} -> + if Need > 0 -> + [Lbl] ++ gc_test(Need) ++ Code ++ add_gc_tests([RestCode|EBBCodes]); + true -> + [Lbl|Code] ++ add_gc_tests([RestCode|EBBCodes]) + end; + {0, RestCode, []} -> + add_gc_tests([RestCode|EBBCodes]) + end; +add_gc_tests([]) -> []. + +need([I|Is] , Need, Code) -> + case split(I) of + true -> + case I of + #icode_call{} -> + case hipe_icode:call_continuation(I) of + [] -> %% Was fallthrough. + NewLab = hipe_icode:mk_new_label(), + LabName = hipe_icode:label_name(NewLab), + NewCall = hipe_icode:call_set_continuation(I,LabName), + {Need + need(I), [NewLab|Is], lists:reverse([NewCall|Code])}; + _ -> + {Need + need(I), Is, lists:reverse([I|Code])} + end; + _ -> + {Need + need(I), Is, lists:reverse([I|Code])} + end; + false -> + need(Is, Need + need(I), [I|Code]) + end; +need([], Need, Code) -> + {Need, [], lists:reverse(Code)}. + +need(I) -> + case I of + #icode_call{} -> + primop_need(hipe_icode:call_fun(I), hipe_icode:call_args(I)); + #icode_enter{} -> + primop_need(hipe_icode:enter_fun(I), hipe_icode:enter_args(I)); + _ -> + 0 + end. + +primop_need(Op, As) -> + case Op of + cons -> + 2; + mktuple -> + length(As) + 1; + #mkfun{} -> + NumFree = length(As), + ?ERL_FUN_SIZE + NumFree; + unsafe_tag_float -> + 3; + _ -> + 0 + end. + +gc_test(Need) -> + L = hipe_icode:mk_new_label(), + [hipe_icode:mk_primop([], #gc_test{need=Need}, [], + hipe_icode:label_name(L), + hipe_icode:label_name(L)), + L]. + +split(I) -> + case I of + #icode_call{} -> not known_heap_need(hipe_icode:call_fun(I)); + #icode_enter{} -> not known_heap_need(hipe_icode:enter_fun(I)); + _ -> false + end. + +known_heap_need(Name) -> + case Name of + %% Primops + cons -> true; + fcheckerror -> true; + fclearerror -> true; + fnegate -> true; + fp_add -> true; + fp_div -> true; + fp_mul -> true; + fp_sub -> true; + mktuple -> true; + unsafe_hd -> true; + unsafe_tag_float -> true; + unsafe_tl -> true; + unsafe_untag_float -> true; + #element{} -> true; + #unsafe_element{} -> true; + #unsafe_update_element{} -> true; + + %% MFAs + {erlang, element, 2} -> true; + {erlang, length, 1} -> true; + {erlang, self, 0} -> true; + {erlang, size, 1} -> true; + + _ -> false + end. diff --git a/lib/hipe/icode/hipe_icode_inline_bifs.erl b/lib/hipe/icode/hipe_icode_inline_bifs.erl new file mode 100644 index 0000000000..27296dcad5 --- /dev/null +++ b/lib/hipe/icode/hipe_icode_inline_bifs.erl @@ -0,0 +1,240 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2007-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%%-------------------------------------------------------------------- +%% File : hipe_icode_inline_bifs.erl +%% Author : Per Gustafsson +%% Purpose : Inlines BIFs which can be expressed easily in ICode. +%% This allows for optimizations in later ICode passes +%% and makes the code faster. +%% +%% Created : 14 May 2007 by Per Gustafsson +%%-------------------------------------------------------------------- + +%% Currently inlined BIFs: +%% and, or, xor, not, <, >, >=, =<, ==, /=, =/=, =:= +%% is_atom, is_boolean, is_binary, is_constant, is_float, is_function, +%% is_integer, is_list, is_pid, is_port, is_reference, is_tuple + +-module(hipe_icode_inline_bifs). + +-export([cfg/1]). + +-include("hipe_icode.hrl"). +-include("../flow/cfg.hrl"). + +%%-------------------------------------------------------------------- + +-spec cfg(#cfg{}) -> #cfg{}. + +cfg(Cfg) -> + Linear = hipe_icode_cfg:cfg_to_linear(Cfg), + #icode{code = StraightCode} = Linear, + FinalCode = lists:flatten([inline_bif(I) || I <- StraightCode]), + Cfg1 = hipe_icode_cfg:linear_to_cfg(Linear#icode{code = FinalCode}), + hipe_icode_cfg:remove_unreachable_code(Cfg1). + +inline_bif(I = #icode_call{}) -> + try_conditional(I); +inline_bif(I) -> + I. + +try_conditional(I = #icode_call{dstlist = [Dst], 'fun' = {erlang, Name, 2}, + args = [Arg1, Arg2], + continuation = Cont}) -> + case is_conditional(Name) of + true -> + inline_conditional(Dst, Name, Arg1, Arg2, Cont); + false -> + try_bool(I) + end; +try_conditional(I) -> + try_bool(I). + +is_conditional(Name) -> + case Name of + '=:=' -> true; + '=/=' -> true; + '==' -> true; + '/=' -> true; + '>' -> true; + '<' -> true; + '>=' -> true; + '=<' -> true; + _ -> false + end. + +try_bool(I = #icode_call{dstlist = [Dst], 'fun' = Name, + args = [Arg1, Arg2], + continuation = Cont, fail_label = Fail}) -> + case is_binary_bool(Name) of + {true, Results, ResLbls} -> + inline_binary_bool(Dst, Results, ResLbls, Arg1, Arg2, Cont, Fail, I); + false -> + try_type_tests(I) + end; +try_bool(I = #icode_call{dstlist = [Dst], 'fun' = {erlang, 'not', 1}, + args = [Arg1], + continuation = Cont, + fail_label = Fail}) -> + inline_unary_bool(Dst, {false, true}, Arg1, Cont, Fail, I); +try_bool(I) -> try_type_tests(I). + +is_binary_bool({erlang, Name, 2}) -> + ResTLbl = hipe_icode:mk_new_label(), + ResFLbl = hipe_icode:mk_new_label(), + ResTL = hipe_icode:label_name(ResTLbl), + ResFL = hipe_icode:label_name(ResFLbl), + case Name of + 'and' -> {true, {ResTL, ResFL, ResFL}, {ResTLbl, ResFLbl}}; + 'or' -> {true, {ResTL, ResTL, ResFL}, {ResTLbl, ResFLbl}}; + 'xor' -> {true, {ResFL, ResTL, ResFL}, {ResTLbl, ResFLbl}}; + _ -> false + end; +is_binary_bool(_) -> false. + +try_type_tests(I = #icode_call{dstlist=[Dst], 'fun' = {erlang, Name, 1}, + args = Args, continuation = Cont}) -> + case is_type_test(Name) of + {true, Type} -> + inline_type_test(Dst, Type, Args, Cont); + false -> + I + end; +try_type_tests(I) -> I. + +is_type_test(Name) -> + case Name of + is_integer -> {true, integer}; + is_float -> {true, float}; + is_tuple -> {true, tuple}; + is_binary -> {true, binary}; + is_list -> {true, list}; + is_pid -> {true, pid}; + is_atom -> {true, atom}; + is_boolean -> {true, boolean}; + is_function -> {true, function}; + is_reference -> {true, reference}; + is_constant -> {true, constant}; + is_port -> {true, port}; + _ -> false + end. + +inline_type_test(BifRes, Type, Src, Cont) -> + {NewCont, NewEnd} = get_cont_lbl(Cont), + TLbl = hipe_icode:mk_new_label(), + FLbl = hipe_icode:mk_new_label(), + TL = hipe_icode:label_name(TLbl), + FL = hipe_icode:label_name(FLbl), + [hipe_icode:mk_type(Src, Type, TL, FL), + TLbl, + hipe_icode:mk_move(BifRes, hipe_icode:mk_const(true)), + hipe_icode:mk_goto(NewCont), + FLbl, + hipe_icode:mk_move(BifRes, hipe_icode:mk_const(false)), + hipe_icode:mk_goto(NewCont)| + NewEnd]. + +inline_conditional(BifRes, Op, Src1, Src2, Cont) -> + {NewCont, NewEnd} = get_cont_lbl(Cont), + TLbl = hipe_icode:mk_new_label(), + FLbl = hipe_icode:mk_new_label(), + TL = hipe_icode:label_name(TLbl), + FL = hipe_icode:label_name(FLbl), + [hipe_icode:mk_if(Op, [Src1, Src2], TL, FL), + TLbl, + hipe_icode:mk_move(BifRes, hipe_icode:mk_const(true)), + hipe_icode:mk_goto(NewCont), + FLbl, + hipe_icode:mk_move(BifRes, hipe_icode:mk_const(false)), + hipe_icode:mk_goto(NewCont)| + NewEnd]. + +%% +%% The TTL TFL FFL labelnames points to either ResTLbl or ResFLbl +%% Depending on what boolean expression we are inlining +%% + +inline_binary_bool(Dst, {TTL, TFL, FFL}, {ResTLbl, ResFLbl}, + Arg1, Arg2, Cont, Fail, I) -> + {NewCont, NewEnd} = get_cont_lbl(Cont), + {NewFail, FailCode} = get_fail_lbl(Fail, I), + EndCode = FailCode++NewEnd, + TLbl = hipe_icode:mk_new_label(), + FLbl = hipe_icode:mk_new_label(), + NotTLbl = hipe_icode:mk_new_label(), + NotTTLbl = hipe_icode:mk_new_label(), + NotTFLbl = hipe_icode:mk_new_label(), + TL = hipe_icode:label_name(TLbl), + FL = hipe_icode:label_name(FLbl), + NotTL = hipe_icode:label_name(NotTLbl), + NotTTL = hipe_icode:label_name(NotTTLbl), + NotTFL = hipe_icode:label_name(NotTFLbl), + [hipe_icode:mk_type([Arg1], {atom, true}, TL, NotTL, 0.5), + NotTLbl, + hipe_icode:mk_type([Arg1], {atom, false}, FL, NewFail, 0.99), + TLbl, + hipe_icode:mk_type([Arg2], {atom, true}, TTL, NotTTL, 0.5), + NotTTLbl, + hipe_icode:mk_type([Arg2], {atom, false}, TFL, NewFail, 0.99), + FLbl, + hipe_icode:mk_type([Arg2], {atom, true}, TFL, NotTFL, 0.5), + NotTFLbl, + hipe_icode:mk_type([Arg2], {atom, false}, FFL, NewFail, 0.99), + ResTLbl, + hipe_icode:mk_move(Dst, hipe_icode:mk_const(true)), + hipe_icode:mk_goto(NewCont), + ResFLbl, + hipe_icode:mk_move(Dst, hipe_icode:mk_const(false)), + hipe_icode:mk_goto(NewCont)| + EndCode]. + +inline_unary_bool(Dst, {T,F}, Arg1, Cont, Fail, I) -> + TLbl = hipe_icode:mk_new_label(), + NotTLbl = hipe_icode:mk_new_label(), + FLbl = hipe_icode:mk_new_label(), + TL = hipe_icode:label_name(TLbl), + NotTL = hipe_icode:label_name(NotTLbl), + FL = hipe_icode:label_name(FLbl), + {NewCont, NewEnd} = get_cont_lbl(Cont), + {NewFail, FailCode} = get_fail_lbl(Fail, I), + EndCode = FailCode ++ NewEnd, + Arg1L = [Arg1], + [hipe_icode:mk_type(Arg1L, {atom, true}, TL, NotTL, 0.5), + NotTLbl, + hipe_icode:mk_type(Arg1L, {atom, false}, FL, NewFail, 0.99), + TLbl, + hipe_icode:mk_move(Dst, hipe_icode:mk_const(T)), + hipe_icode:mk_goto(NewCont), + FLbl, + hipe_icode:mk_move(Dst, hipe_icode:mk_const(F)), + hipe_icode:mk_goto(NewCont)| + EndCode]. + +get_cont_lbl([]) -> + NL = hipe_icode:mk_new_label(), + {hipe_icode:label_name(NL), [NL]}; +get_cont_lbl(Cont) -> + {Cont, []}. + +get_fail_lbl([], I) -> + NL = hipe_icode:mk_new_label(), + {hipe_icode:label_name(NL), [NL, I]}; +get_fail_lbl(Fail, _) -> + {Fail, []}. diff --git a/lib/hipe/icode/hipe_icode_instruction_counter.erl b/lib/hipe/icode/hipe_icode_instruction_counter.erl new file mode 100644 index 0000000000..92658d294a --- /dev/null +++ b/lib/hipe/icode/hipe_icode_instruction_counter.erl @@ -0,0 +1,135 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2006-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%%------------------------------------------------------------------- +%% File : icode_instruction_counter.erl +%% Author : Andreas Hasselberg +%% Purpose : This module counts the number of different instructions +%% in a function. It is useful when you want to know if +%% your Icode analysis or specialization is good, bad or +%% simply unlucky :) +%% +%% Created : 2 Oct 2006 by Andreas Hasselberg +%%------------------------------------------------------------------- + +-module(hipe_icode_instruction_counter). + +-export([cfg/3, compare/3]). + +-include("../main/hipe.hrl"). +-include("hipe_icode.hrl"). +-include("../flow/cfg.hrl"). + +%%------------------------------------------------------------------- +%% A general CFG instruction walktrough +%%------------------------------------------------------------------- + +-spec cfg(#cfg{}, mfa(), comp_options()) -> [_]. + +cfg(Cfg, _IcodeFun, _Options) -> + Labels = hipe_icode_cfg:labels(Cfg), + %% Your Info init function goes here + InitInfo = counter__init_info(), + Info = lists:foldl(fun (Label, InfoAcc) -> + BB = hipe_icode_cfg:bb(Cfg, Label), + Code = hipe_bb:code(BB), + walktrough_bb(Code, InfoAcc) + end, InitInfo, Labels), + %% counter__output_info(IcodeFun, Info), + Info. + +walktrough_bb(BB, Info) -> + lists:foldl(fun (Insn, InfoAcc) -> + %% Your analysis function here + counter__analys_insn(Insn, InfoAcc) + end, Info, BB). + +%%------------------------------------------------------------------- +%% The counter specific functions +%%------------------------------------------------------------------- + +-spec compare(gb_tree(), gb_tree(), gb_tree()) -> gb_tree(). + +compare(Name, Old, New) -> + NewList = gb_trees:to_list(New), + OldList = gb_trees:to_list(Old), + TempTree = compare_one_way(NewList, Old, added, gb_trees:empty()), + DiffTree = compare_one_way(OldList, New, removed, TempTree), + DiffList = gb_trees:to_list(DiffTree), + if DiffList =:= [] -> + ok; + true -> + io:format("~p: ~p ~n", [Name, DiffList]) + end, + DiffTree. + +compare_one_way(List, Tree, Key, Fold_tree) -> + lists:foldl(fun({Insn, ListCount}, DiffAcc) when is_integer(ListCount) -> + DiffCount = + case gb_trees:lookup(Insn, Tree) of + {value, TreeCount} when is_integer(TreeCount) -> + ListCount - TreeCount; + none -> + ListCount + end, + if DiffCount > 0 -> + gb_trees:insert({Key, Insn}, DiffCount, DiffAcc); + true -> + DiffAcc + end + end, + Fold_tree, + List). + +counter__init_info() -> + gb_trees:empty(). + +counter__analys_insn(Insn, Info) -> + Key = counter__insn_get_key(Insn), + counter__increase_key(Key, Info). + +counter__insn_get_key(If = #icode_if{}) -> {'if', hipe_icode:if_op(If)}; +counter__insn_get_key(Call = #icode_call{}) -> {call, hipe_icode:call_fun(Call)}; +counter__insn_get_key(#icode_enter{}) -> enter; +counter__insn_get_key(#icode_return{}) -> return; +counter__insn_get_key(#icode_type{}) -> type; +counter__insn_get_key(#icode_switch_val{}) -> switch_val; +counter__insn_get_key(#icode_switch_tuple_arity{}) -> switch_tuple_arity; +counter__insn_get_key(#icode_goto{}) -> goto; +counter__insn_get_key(#icode_move{}) -> move; +counter__insn_get_key(#icode_phi{}) -> phi; +counter__insn_get_key(#icode_begin_try{}) -> begin_try; +counter__insn_get_key(#icode_end_try{}) -> end_try; +counter__insn_get_key(#icode_begin_handler{}) -> begin_handler; +counter__insn_get_key(#icode_fail{}) -> fail; +counter__insn_get_key(#icode_comment{}) -> comment. + +counter__increase_key(Key, Info) -> + NewCounter = + case gb_trees:lookup(Key, Info) of + {value, Counter} when is_integer(Counter) -> + Counter + 1; + none -> + 1 + end, + gb_trees:enter(Key, NewCounter, Info). + +%%counter__output_info(IcodeFun, Info) -> +%% InfoList = gb_trees:to_list(Info), +%% io:format("~p instructions : ~p ~n", [IcodeFun, InfoList]). diff --git a/lib/hipe/icode/hipe_icode_liveness.erl b/lib/hipe/icode/hipe_icode_liveness.erl new file mode 100644 index 0000000000..5816e59032 --- /dev/null +++ b/lib/hipe/icode/hipe_icode_liveness.erl @@ -0,0 +1,101 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2001-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% ICODE LIVENESS ANALYSIS +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +-module(hipe_icode_liveness). + +-define(PRETTY_PRINT, true). + +-include("hipe_icode.hrl"). +-include("../flow/liveness.inc"). + +%%-------------------------------------------------------------------- +%% Interface to CFG and icode. +%%-------------------------------------------------------------------- + +cfg_bb(CFG, L) -> + hipe_icode_cfg:bb(CFG, L). + +cfg_postorder(CFG) -> + hipe_icode_cfg:postorder(CFG). + +cfg_succ(CFG, L) -> + hipe_icode_cfg:succ(CFG, L). + +uses(Instr) -> + hipe_icode:uses(Instr). + +defines(Instr) -> + hipe_icode:defines(Instr). + +%% +%% This is the list of registers that are live at exit from a function +%% +cfg_labels(CFG) -> + hipe_icode_cfg:labels(CFG). + +liveout_no_succ() -> + ordsets:new(). + +pp_liveness_info(LiveList) -> + print_live_list(LiveList). + +print_live_list([]) -> + io:format(" none~n", []); +print_live_list([Last]) -> + io:format(" ", []), + print_var(Last), + io:format("~n", []); +print_live_list([Var|Rest]) -> + io:format(" ", []), + print_var(Var), + io:format(",", []), + print_live_list(Rest). + +pp_block(Label, CFG) -> + BB = hipe_icode_cfg:bb(CFG, Label), + Code = hipe_bb:code(BB), + hipe_icode_pp:pp_block(Code). + +print_var(#icode_variable{name=V, kind=Kind, annotation=T}) -> + case Kind of + var -> io:format("v~p", [V]); + reg -> io:format("r~p", [V]); + fvar -> io:format("fv~p", [V]) + end, + case T of + [] -> ok; + {_,X,F} -> io:format(" (~s)", F(X)) + end. + +%% +%% The following are used only if annotation of the code is requested. +%% +-ifdef(DEBUG_LIVENESS). +cfg_bb_add(CFG, L, NewBB) -> + hipe_icode_cfg:bb_add(CFG, L, NewBB). + +mk_comment(Text) -> + hipe_icode:mk_comment(Text). +-endif. diff --git a/lib/hipe/icode/hipe_icode_mulret.erl b/lib/hipe/icode/hipe_icode_mulret.erl new file mode 100644 index 0000000000..a6529c8519 --- /dev/null +++ b/lib/hipe/icode/hipe_icode_mulret.erl @@ -0,0 +1,1323 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2005-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%%---------------------------------------------------------------------- +%% File : hipe_icode_mulret.erl +%% Author : Christoffer Vikström +%% Purpose : +%% Created : 23 Jun 2004 by Christoffer Vikström +%%---------------------------------------------------------------------- + +-module(hipe_icode_mulret). +-export([mult_ret/4]). + +-include("../main/hipe.hrl"). +-include("hipe_icode.hrl"). +-include("hipe_icode_primops.hrl"). + +%%>----------------------------------------------------------------------< +%% Procedure : mult_ret/4 +%% Purpose : +%% Arguments : +%% Return : +%% Notes : +%%>----------------------------------------------------------------------< + +-spec mult_ret([_], atom(), comp_options(), _) -> [_]. + +mult_ret(List, Mod, Opts, Exports) -> + case length(List) > 1 of + true -> + Table = analyse(List, Mod, Exports), + %% printTable(Mod, Exports, Table), + optimize(List, Mod, Opts, Table); + false -> + List + end. + +%%>-----------------------< Analysis Steps >-----------------------------< + +%%>----------------------------------------------------------------------< +%% Procedure : analyse/3 +%% Purpose : +%% Arguments : +%% Return : +%% Notes : +%%>----------------------------------------------------------------------< +analyse(List, _Mod, Exports) -> + MaxRets = hipe_rtl_arch:nr_of_return_regs(), + Table = mkTable(List), + %% printTable(Mod, Exports, Table), + Table2 = filterTable(Table, MaxRets, Exports), + %% printTable(Mod, Exports, Table2), + Table2. + +%%>----------------------------------------------------------------------< +%% Procedure : mkTable/1 +%% Purpose : +%% Arguments : +%% Return : +%% Notes : +%%>----------------------------------------------------------------------< +mkTable(List) -> + mkTable(List, {[], []}). + +mkTable([{MFA, Icode} | List], Table) -> + %% New Icode + {_LMin,LMax} = hipe_icode:icode_label_range(Icode), + hipe_gensym:set_label(icode, LMax+1), + {_VMin,VMax} = hipe_icode:icode_var_range(Icode), + hipe_gensym:set_var(icode, VMax+1), + case isFunDef(MFA) of + true -> + mkTable(List, Table); + false -> + CallList = mkCallList(MFA, Icode), + Optimizable = isOptimizable(Icode), + NewTable = addToTable(MFA, Optimizable, CallList, Table), + mkTable(List, NewTable) + end; +mkTable([_|List], Table) -> mkTable(List, Table); +mkTable([], Table) -> Table. + +%%>----------------------------------------------------------------------< +%% Procedure : isFunDef/1 +%% Purpose : +%% Arguments : +%% Return : +%% Notes : +%%>----------------------------------------------------------------------< +isFunDef({_, F, _}) -> + hd(atom_to_list(F)) =:= 45. %% 45 is the character '-' + +%%>----------------------------------------------------------------------< +%% Procedure : mkCallList/1 +%% Purpose : +%% Arguments : +%% Return : +%% Notes : +%%>----------------------------------------------------------------------< +mkCallList(MFA, Icode) -> + Code = hipe_icode:icode_code(Icode), + mkCallList(Code, MFA, []). + +mkCallList([#icode_call{'fun'=F, dstlist=Vars, type=local}|Code], MFA, Res) -> + {Size, DstList} = lookForDef(Code, Vars), + mkCallList(Code, MFA, [{callPair,MFA,{F,{matchSize,Size,DstList}}}|Res]); +mkCallList([_|Code], MFA, Res) -> mkCallList(Code, MFA, Res); +mkCallList([], _, Res) -> Res. + +%%>----------------------------------------------------------------------< +%% Procedure : lookForDef/1 +%% Purpose : +%% Arguments : +%% Return : +%% Notes : +%%>----------------------------------------------------------------------< +lookForDef([#icode_type{test={tuple,Size}, true_label=L}|Code], Vars) -> + Code2 = skipToLabel(Code, L), + DstLst = lookForUnElems(Code2, Vars), + case DstLst of + [] -> {1, Vars}; + _ -> + DstLst2 = fixDstLst(DstLst, Size), + {Size, DstLst2} + end; +lookForDef([#icode_move{src=Var, dst=NewVar}|Code], [Var]) -> + lookForDef(Code, [NewVar]); +lookForDef([#icode_label{}|_], Vars) -> + {1, Vars}; +lookForDef([I|Code], [Var] = Vars) -> + Defs = hipe_icode:defines(I), + case lists:member(Var, Defs) of + true -> + {1, Vars}; + false -> + lookForDef(Code, Vars) + end; +lookForDef([], Vars) -> {1, Vars}. + +%%>----------------------------------------------------------------------< +%% Procedure : skipToLabel/2 +%% Purpose : +%% Arguments : +%% Return : +%% Notes : +%%>----------------------------------------------------------------------< +skipToLabel(Code, L) -> + case skipToLabel2(Code, L) of + noLabel -> + Code; + NewCode -> + NewCode + end. + +skipToLabel2([#icode_label{name = L}|Code],L) -> Code; +skipToLabel2([_|Code], L) -> skipToLabel2(Code, L); +skipToLabel2([], _) -> noLabel. + +%%>----------------------------------------------------------------------< +%% Procedure : lookForUnElems/2 +%% Purpose : +%% Arguments : +%% Return : +%% Notes : +%%>----------------------------------------------------------------------< +lookForUnElems(Code, Var) -> + lookForUnElems(Code, Var, []). + +lookForUnElems([#icode_call{'fun'=#unsafe_element{index=Nr}, args=Var, + dstlist=[Ret]}|Code], Var, Res) -> + lookForUnElems(Code, Var, [{Nr, Ret}|Res]); +lookForUnElems([#icode_move{dst=Var}|_], [Var], Res) -> + lists:flatten(Res); +lookForUnElems([#icode_call{dstlist=VarList}|_], VarList, Res) -> + lists:flatten(Res); +lookForUnElems([_|Code], Var, Res) -> + lookForUnElems(Code, Var, Res); +lookForUnElems([], _, Res) -> lists:flatten(Res). + +%%>----------------------------------------------------------------------< +%% Procedure : fixDstLst/2 +%% Purpose : +%% Arguments : +%% Return : +%% Notes : +%%>----------------------------------------------------------------------< +fixDstLst(DstLst, Size) when is_integer(Size) -> + fixDstLst(DstLst, Size, 1, []). + +fixDstLst(DstLst, Size, Cnt, Res) when Cnt =< Size -> + case isInLst(Cnt, DstLst) of + {true, Var} -> + fixDstLst(DstLst, Size, Cnt+1, [Var|Res]); + false -> + Var = hipe_icode:mk_var(hipe_gensym:new_var(icode)), + fixDstLst(DstLst, Size, Cnt+1, [Var|Res]) + end; +fixDstLst(_, Size, Cnt, Res) when Cnt > Size -> lists:reverse(Res). + +%%>----------------------------------------------------------------------< +%% Procedure : isInLst/2 +%% Purpose : +%% Arguments : +%% Return : +%% Notes : +%%>----------------------------------------------------------------------< +isInLst(Nr, [{Nr,Var}|_]) -> {true, Var}; +isInLst(Cnt, [_|DstLst]) -> isInLst(Cnt, DstLst); +isInLst(_, []) -> false. + +%%>----------------------------------------------------------------------< +%% Procedure : isOptimizable/1 +%% Purpose : +%% Arguments : +%% Return : +%% Notes : +%%>----------------------------------------------------------------------< +isOptimizable(Icode) -> + %% Icode2 = hipe_icode:fixup_fallthroughs(Icode), + Icode2 = hipe_icode:strip_comments(Icode), + Cfg = hipe_icode_cfg:linear_to_cfg(Icode2), + %% hipe_icode_cfg:pp(Cfg), + case findReturnBlocks(Cfg) of + noReturn -> + {false, -1}; + BlockList -> + processReturnBlocks(BlockList, Cfg) + end. + +%%>----------------------------------------------------------------------< +%% Procedure : findReturnBlocks/2 +%% Purpose : +%% Arguments : +%% Return : +%% Notes : +%%>----------------------------------------------------------------------< +findReturnBlocks(IcodeCfg) -> + Labels = hipe_icode_cfg:labels(IcodeCfg), + case searchBlocks(Labels, IcodeCfg) of + [] -> + noReturn; + BlockList-> + BlockList + end. + +%%>----------------------------------------------------------------------< +%% Procedure : searchBlocks/2 +%% Purpose : +%% Arguments : +%% Return : +%% Notes : +%%>----------------------------------------------------------------------< +searchBlocks(Labels, IcodeCfg) -> + searchBlocks(Labels, IcodeCfg, []). + +searchBlocks([Label|Labels], IcodeCfg, Res) -> + Block = hipe_icode_cfg:bb(IcodeCfg, Label), + Code = hipe_bb:code(Block), + case searchBlockCode(Code) of + {hasReturn, RetVar} -> + searchBlocks(Labels, IcodeCfg, [{Label, RetVar}|Res]); + noReturn -> + searchBlocks(Labels, IcodeCfg, Res) + end; +searchBlocks([], _, Res) -> Res. + +%%>----------------------------------------------------------------------< +%% Procedure : searchBlockCode/1 +%% Purpose : +%% Arguments : +%% Return : +%% Notes : +%%>----------------------------------------------------------------------< +searchBlockCode([#icode_return{vars=Vars}|_]) -> + {hasReturn, Vars}; +searchBlockCode([_|Icode]) -> + searchBlockCode(Icode); +searchBlockCode([]) -> noReturn. + +%%>----------------------------------------------------------------------< +%% Procedure : processReturnBlock/2 +%% Purpose : +%% Arguments : +%% Return : +%% Notes : +%%>----------------------------------------------------------------------< +processReturnBlocks(Blocks, Cfg) -> + processReturnBlocks(Blocks, Cfg, {true, -1}, []). + +processReturnBlocks([{Label, Var}|BlockList], Cfg, {Opts, Size}, TypeLst) -> + {Opt, Type, Size2} = traverseCode(Label, Var, Cfg), + case (Size =:= -1) orelse (Size =:= Size2) of + true -> + processReturnBlocks(BlockList, Cfg, + {Opt andalso Opts, Size2}, [Type|TypeLst]); + false -> + {false, -1} + end; +processReturnBlocks([], _, Res, TypeLst) -> + case lists:member(icode_var, TypeLst) of + true -> + {_, Size} = Res, + case Size > 1 of + true -> + Res; + false -> + {false, -1} + end; + false -> + {false, -1} + end. + +%%>----------------------------------------------------------------------< +%% Procedure : traverseCode/3 +%% Purpose : +%% Arguments : +%% Return : +%% Notes : +%%>----------------------------------------------------------------------< +traverseCode(Label, Var, Cfg) -> + traverseCode(Label, Var, Cfg, []). + +traverseCode(Label, Var, Cfg, LabLst) -> + Preds = hipe_icode_cfg:pred(Cfg, Label), + Block = hipe_icode_cfg:bb(Cfg, Label), + Code = hipe_bb:code(Block), + case findDefine(lists:reverse(Code), Var) of + {found, Type, NumRets} -> + {true, Type, NumRets}; + {notFound, SrcVar} -> + case Preds of + [] -> + {false, none, -1}; + [Pred] -> + case lists:member(Label, LabLst) of + false -> + traverseCode(Pred, SrcVar, Cfg, [Label|LabLst]); + true -> + {false, none, -1} + end; + _ -> + {false, none, -1} + end + end. + +%%>----------------------------------------------------------------------< +%% Procedure : findDefine/2 +%% Purpose : +%% Arguments : +%% Return : +%% Notes : +%%>----------------------------------------------------------------------< +findDefine([#icode_call{dstlist=Vars,'fun'=mktuple,args=Vs}|_], Vars) -> + case length(Vs) of + 1 -> + [{Type, _}] = Vs, + {found, Type, 1}; + Len -> + case lists:any(fun hipe_icode:is_var/1, Vs) of + true -> + {found, icode_var, Len}; + false -> + {found, icode_const, Len} + end + end; +findDefine([#icode_move{dst=Var, src=Src}|Code], [Var]) -> + case hipe_icode:is_var(Src) of + true -> + findDefine(Code, [Src]); + false -> + case Src of + #icode_const{value={flat, Value}} -> + case is_tuple(Value) of + true -> + {found, icode_const, tuple_size(Value)}; + false -> + {found, icode_const, 1} + end; + _ -> + findDefine(Code, [Var]) + end + end; +findDefine([_|Code], Var) -> + findDefine(Code, Var); +findDefine([], Var) -> + {notFound, Var}. + +%%>----------------------------------------------------------------------< +%% Procedure : addToTable/4 +%% Purpose : +%% Arguments : +%% Return : +%% Notes : +%%>----------------------------------------------------------------------< +addToTable(MFA, Optimizable, CallList, {FunLst, CallLst}) -> + NewFunLst = [{MFA, Optimizable}|FunLst], + {NewFunLst, CallList ++ CallLst}. + +%%>----------------------------------------------------------------------< +%% Procedure : filterTable/1 +%% Purpose : +%% Arguments : +%% Return : +%% Notes : +%%>----------------------------------------------------------------------< +filterTable({FunLst, CallLst}, MaxRets, Exports) -> + filterTable(FunLst, CallLst, MaxRets, Exports, {[],[]}). + +filterTable([Fun|FunLst], CallLst, MaxRets, Exports, {Funs, Calls} = FCs) -> + {MFA, {ReturnOpt, Rets}} = Fun, + {CallOpt, CallsToKeep} = checkCalls(CallLst, MFA, Rets), + CallsToKeep2 = removeDuplicateCalls(CallsToKeep), + NotExported = checkExported(MFA, Exports), + case CallOpt andalso ReturnOpt andalso (Rets =< MaxRets) andalso + NotExported andalso (not containRecursiveCalls(CallsToKeep2, MFA)) of + true -> + filterTable(FunLst, CallLst, MaxRets, Exports, + {[Fun|Funs], CallsToKeep2 ++ Calls}); + false -> + filterTable(FunLst, CallLst, MaxRets, Exports, FCs) + end; +filterTable([], _, _, _, Res) -> Res. + +removeDuplicateCalls(Calls) -> + removeDuplicateCalls(Calls, []). + +removeDuplicateCalls([Call|CallsToKeep], Res) -> + case lists:member(Call, CallsToKeep) of + true -> + removeDuplicateCalls(CallsToKeep, Res); + false -> + removeDuplicateCalls(CallsToKeep, [Call|Res]) + end; +removeDuplicateCalls([], Res) -> lists:reverse(Res). + +containRecursiveCalls([Call|Calls], Fun) -> + {callPair, Caller, {Callee, _}} = Call, + case (Callee =:= Fun) andalso (Caller =:= Fun) of + true -> + true; + false-> + containRecursiveCalls(Calls, Fun) + end; +containRecursiveCalls([], _) -> false. + +%%>----------------------------------------------------------------------< +%% Procedure : checkCalls/3 +%% Purpose : +%% Arguments : +%% Return : +%% Notes : +%%>----------------------------------------------------------------------< +checkCalls(CallLst, MFA, Rets) -> + checkCalls(CallLst, MFA, Rets, [], []). + +checkCalls([C = {callPair, _, {MFA, {matchSize, Rets, _}}}|CallLst], + MFA, Rets, Res, Opt) -> + checkCalls(CallLst, MFA, Rets, [C|Res], [true|Opt]); +checkCalls([{callPair, _, {MFA, {matchSize, _, _}}}|CallLst], + MFA, Rets, Res, Opt) -> + checkCalls(CallLst, MFA, Rets, Res, [false|Opt]); +checkCalls([_|CallLst], MFA, Rets, Res, Opt) -> + checkCalls(CallLst, MFA, Rets, Res, Opt); +checkCalls([], _, _, Res, Opt) -> {combineOpts(Opt), Res}. + +%%>----------------------------------------------------------------------< +%% Procedure : combineOpts/1 +%% Purpose : +%% Arguments : +%% Return : +%% Notes : +%%>----------------------------------------------------------------------< +combineOpts([]) -> false; +combineOpts([Opt]) -> Opt; +combineOpts([Opt|Opts]) -> Opt andalso combineOpts(Opts). + +%%>----------------------------------------------------------------------< +%% Procedure : checkCalls/2 +%% Purpose : +%% Arguments : +%% Return : +%% Notes : +%%>----------------------------------------------------------------------< +checkExported({_,F,A}, [{F,A}|_]) -> false; +checkExported(MFA, [_|Exports]) -> checkExported(MFA, Exports); +checkExported(_, []) -> true. + +%%>----------------------< Optimization Steps >--------------------------< + +%%>----------------------------------------------------------------------< +%% Procedure : optimize/4 +%% Purpose : +%% Arguments : +%% Return : +%% Notes : +%%>----------------------------------------------------------------------< +optimize(List, _Mod, Opts, Table) -> + {FunLst, CallLst} = Table, + List2 = optimizeFuns(FunLst, Opts, List), + optimizeCalls(CallLst, Opts, List2). + +%%>----------------------------------------------------------------------< +%% Procedure : optimizeFuns/3 +%% Purpose : +%% Arguments : +%% Return : +%% Notes : +%%>----------------------------------------------------------------------< +optimizeFuns([{Fun, _}|FunList], Opts, List) -> + NewList = findFun(List, Fun), + optimizeFuns(FunList, Opts, NewList); +optimizeFuns([],_,List) -> List. + +findFun(List, Fun) -> findFun(List, Fun, []). +findFun([{Fun, Icode}|List], Fun, Res) -> + NewIcode = optimizeFun(Icode), + findFun(List, Fun, [{Fun, NewIcode}|Res]); +findFun([I|List], Fun, Res) -> findFun(List, Fun, [I|Res]); +findFun([], _, Res) -> lists:reverse(Res). + + +optimizeFun(Icode) -> + {_LMin,LMax} = hipe_icode:icode_label_range(Icode), + hipe_gensym:set_label(icode, LMax+1), + {_VMin,VMax} = hipe_icode:icode_var_range(Icode), + hipe_gensym:set_var(icode, VMax+1), + %% Icode2 = hipe_icode:fixup_fallthroughs(Icode), + Icode2 = hipe_icode:strip_comments(Icode), + Cfg = hipe_icode_cfg:linear_to_cfg(Icode2), + case findReturnBlocks(Cfg) of + noReturn -> + false; + BlockList -> + NewCfg = optimizeReturnBlocks(BlockList, Cfg), + hipe_icode_cfg:cfg_to_linear(NewCfg) + end. + +optimizeReturnBlocks([Block|BlockList], Cfg) -> + {NewCfg, Vars} = optimizeReturnBlock(Block, Cfg), + NewCfg2 = case Vars of + [_] -> + Cfg; + _ -> + {Label, _} = Block, + updateReturnBlock(Label, Vars, NewCfg) + end, + optimizeReturnBlocks(BlockList, NewCfg2); +optimizeReturnBlocks([], Cfg) -> Cfg. + +optimizeReturnBlock(Block, Cfg) -> + optimizeReturnBlock(Block, Cfg, []). + +optimizeReturnBlock({Label,Var}, Cfg, UpdateMap) -> + Preds = hipe_icode_cfg:pred(Cfg, Label), + Block = hipe_icode_cfg:bb(Cfg, Label), + Code = hipe_bb:code(Block), + case optimizeDefine(Code, Var) of + {found, NewBlockCode, Vars} -> + NewBlock = hipe_bb:code_update(Block, NewBlockCode), + NewCfg = resolveUpdateMap(UpdateMap, Cfg), + {hipe_icode_cfg:bb_add(NewCfg, Label, NewBlock), Vars}; + {none, NewBlockCode, NewVar} -> + case Preds of + [Pred] -> + NewBlock = hipe_bb:code_update(Block, NewBlockCode), + optimizeReturnBlock({Pred,NewVar}, Cfg, + [{Label, NewBlock}|UpdateMap]); + [_|_] -> + {Cfg, Var} + end; + {none, noOpt} -> + {Cfg, Var} + end. + +optimizeDefine(Code, Dst) -> + optimizeDefine(lists:reverse(Code), Dst, [], []). + +optimizeDefine([I|Code], Dsts, DstLst, Res) -> + [Ds] = Dsts, + case isCallPrimop(I, mktuple) andalso DstLst =:= [] of + true -> + case (hipe_icode:call_dstlist(I) =:= Dsts) of + true -> + case (hipe_icode:call_args(I) > 1) of + true -> + optimizeDefine(Code, Dsts, hipe_icode:call_args(I), Res); + false -> + {none, noOpt} + end; + false -> + optimizeDefine(Code, Dsts, DstLst, [I|Res]) + end; + false -> + case hipe_icode:is_move(I) andalso DstLst =:= [] of + true -> + case hipe_icode:move_dst(I) =:= Ds of + true -> + Src = hipe_icode:move_src(I), + case hipe_icode:is_var(Src) of + true -> + NewDst = hipe_icode:move_src(I), + optimizeDefine(Code, [NewDst], DstLst, Res); + false -> + case Src of + #icode_const{value={flat, T}} when is_tuple(T) -> + NewLst = tuple_to_list(T), + optimizeDefine(Code, Dsts, NewLst, Res); + _ -> + {none, noOpt} + end + end; + false -> + optimizeDefine(Code, Dsts, DstLst, [I|Res]) + end; + false -> + case lists:member(Ds, hipe_icode:defines(I)) andalso DstLst =:= [] of + true -> + {none, noOpt}; + false -> + optimizeDefine(Code, Dsts, DstLst, [I|Res]) + end + end + end; +optimizeDefine([], Dsts, DstLst, Res) -> + case DstLst of + [] -> + {none, Res, Dsts}; + _ -> + {found, Res, DstLst} + end. + +resolveUpdateMap([{Label, Block}|UpdateMap], Cfg) -> + resolveUpdateMap(UpdateMap, hipe_icode_cfg:bb_add(Cfg, Label, Block)); +resolveUpdateMap([], Cfg) -> Cfg. + +%%>----------------------------------------------------------------------< +%% Procedure : updateReturnBlock/3 +%% Purpose : +%% Arguments : +%% Return : +%% Notes : +%%>----------------------------------------------------------------------< +updateReturnBlock(Label, Vars, IcodeCfg) -> + Block = hipe_icode_cfg:bb(IcodeCfg, Label), + Code = hipe_bb:code(Block), + NewCode = updateReturnCode(Code, Vars), + NewBlock = hipe_bb:code_update(Block, NewCode), + hipe_icode_cfg:bb_add(IcodeCfg, Label, NewBlock). + +updateReturnCode(Code, DstLst) -> + updateReturnCode(Code, DstLst, []). + +updateReturnCode([I| Code], DstLst, Res) -> + case hipe_icode:is_return(I) of + true -> + updateReturnCode(Code, DstLst, [hipe_icode:mk_return(DstLst)|Res]); + false -> + updateReturnCode(Code, DstLst, [I|Res]) + end; +updateReturnCode([], _, Res) -> lists:reverse(Res). + +%%>----------------------------------------------------------------------< +%% Procedure : optimizeCalls/3 +%% Purpose : +%% Arguments : +%% Return : +%% Notes : +%%>----------------------------------------------------------------------< +optimizeCalls([Call|CallLst], _Opts, List) -> + {callPair, Caller, {Callee, {matchSize, _, DstLst}}} = Call, + NewList = optimizeCall(List, Caller, Callee, DstLst), + optimizeCalls(CallLst, _Opts, NewList); +optimizeCalls([], _Opts, List) -> List. + +%%>----------------------------------------------------------------------< +%% Procedure : optimizeCall/4 +%% Purpose : +%% Arguments : +%% Return : +%% Notes : +%%>----------------------------------------------------------------------< +optimizeCall(List, Caller, Callee, DstLst) -> + optimizeCall(List, Caller, Callee, DstLst, []). + +optimizeCall([{MFA, Icode}|List], MFA, Callee, DstLst, Res) -> + {_LMin,LMax} = hipe_icode:icode_label_range(Icode), + hipe_gensym:set_label(icode, LMax+1), + {_VMin,VMax} = hipe_icode:icode_var_range(Icode), + hipe_gensym:set_var(icode, VMax+1), + %% Icode2 = hipe_icode:fixup_fallthroughs(Icode), + Icode2 = hipe_icode:strip_comments(Icode), + Cfg = hipe_icode_cfg:linear_to_cfg(Icode2), + NewIcode = findAndUpdateCalls(Cfg, Callee, DstLst), + optimizeCall(List, MFA, Callee, DstLst, [{MFA, NewIcode}|Res]); +optimizeCall([I|List], Caller, Callee, DstLst, Res) -> + optimizeCall(List, Caller, Callee, DstLst, [I|Res]); +optimizeCall([], _, _, _, Res) -> lists:reverse(Res). + +%%>----------------------------------------------------------------------< +%% Procedure : findAndUpdateCall/3 +%% Purpose : +%% Arguments : +%% Return : +%% Notes : +%%>----------------------------------------------------------------------< +findAndUpdateCalls(Cfg, Callee, DstLst) -> + Labels = hipe_icode_cfg:labels(Cfg), + Cfg2 = findAndUpdateCalls(Cfg, Labels, Callee, DstLst, []), + hipe_icode_cfg:cfg_to_linear(Cfg2). +findAndUpdateCalls(Cfg, [L|Labels], Callee, DstLst, Visited) -> + %% Block = hipe_icode_cfg:bb(Cfg, L), + %% Code = hipe_bb:code(Block), + case containCorrectCall(Cfg, L, Callee, DstLst) of + true -> + Block = hipe_icode_cfg:bb(Cfg,L), + Code = hipe_bb:code(Block), + {NewCode, OldVar} = updateCode(Code, Callee, DstLst), + NewBlock = hipe_bb:code_update(Block, NewCode), + Cfg2 = hipe_icode_cfg:bb_add(Cfg, L, NewBlock), + Cfg3 = cleanUpAffectedCode(Cfg2, OldVar, Callee, L, Visited), + findAndUpdateCalls(Cfg3, Labels, Callee, DstLst, [L|Visited]); + false -> + findAndUpdateCalls(Cfg, Labels, Callee, DstLst, [L|Visited]) + end; +findAndUpdateCalls(Cfg,[], _, _, _) -> Cfg. + +containCorrectCall(Cfg, Label, Callee, DstLst) -> + Block = hipe_icode_cfg:bb(Cfg,Label), + Code = hipe_bb:code(Block), + case containCallee(Code, Callee) of + {true, OldVar} -> + Succs = hipe_icode_cfg:succ(Cfg, Label), + checkForUnElems(Succs, OldVar, DstLst, Cfg); + false -> + false + end. + +checkForUnElems([], _, _, _) -> false; +checkForUnElems([Succ|Succs], OldVar, DstLst, Cfg) -> + Block = hipe_icode_cfg:bb(Cfg,Succ), + Code = hipe_bb:code(Block), + case checkForUnElems2(Code, OldVar, DstLst, []) of + true -> + true; + false -> + checkForUnElems(Succs, OldVar, DstLst, Cfg) + end. + +checkForUnElems2([I|Code], OldVar, DstLst, DstRes) -> + case isCallPrimop(I, unsafe_element) of + true -> + case (hipe_icode:call_args(I) =:= OldVar) of + true -> + [Dst] = hipe_icode:call_dstlist(I), + case lists:member(Dst, DstLst) of + true -> + checkForUnElems2(Code, OldVar, DstLst, [Dst|DstRes]); + false -> + checkForUnElems2(Code, OldVar, DstLst, DstRes) + end; + false -> + checkForUnElems2(Code, OldVar, DstLst, DstRes) + end; + false -> + checkForUnElems2(Code, OldVar, DstLst, DstRes) + end; +checkForUnElems2([], _, DstLst, DstRes) -> DstLst =:= lists:reverse(DstRes). + + +containCallee([I|Code], Callee) -> + case isCallLocal(I, Callee) of + true -> + {true, hipe_icode:call_dstlist(I)}; + false -> + containCallee(Code, Callee) + end; +containCallee([], _) -> false. + + +updateCode(Code, Callee, DstLst) -> + updateCode(Code, Callee, DstLst, [], []). + +updateCode([I|Code], Callee, DstLst, Res, OldVars) -> + case isCallLocal(I, Callee) of + true -> + Vars = hipe_icode:call_dstlist(I), + I2 = hipe_icode:call_dstlist_update(I, DstLst), + updateCode(Code, Callee, DstLst, [I2|Res], Vars); + false -> + updateCode(Code, Callee, DstLst, [I|Res], OldVars) + end; +updateCode([], _, _, Res, OldVars) -> {lists:reverse(Res), OldVars}. + + +cleanUpAffectedCode(Cfg, OldVar, Callee, Label, Visited) -> + Block = hipe_icode_cfg:bb(Cfg,Label), + Code = hipe_bb:code(Block), + {CodeBefore, CodeAfter, DstLst} = divideAtCall(Code, Callee), + {NewCodeAfter, ContLab, FailLab} = findType(CodeAfter, OldVar), + ContBlock = hipe_icode_cfg:bb(Cfg, ContLab), + Succs = hipe_icode_cfg:succ(Cfg, ContLab), + ContCode = hipe_bb:code(ContBlock), + {NewContCode, NewFailLab} = removeUnElems(ContCode, OldVar, DstLst), + NewBlock = hipe_bb:code_update(Block, + CodeBefore ++ NewCodeAfter ++ NewContCode), + Cfg2 = hipe_icode_cfg:bb_add(Cfg, Label, NewBlock), + Cfg3 = resolveSuccBlocks(Succs, OldVar, DstLst, [Label|Visited], + NewFailLab, Cfg2), + insertMiddleFailBlock(Cfg3, NewFailLab, FailLab, OldVar, DstLst). + +divideAtCall(Code, Caller) -> + divideAtCall(Code, Caller, []). + +divideAtCall([I|Code], Caller, Tail) -> + case isCallLocal(I, Caller) of + true -> + {lists:reverse([I|Tail]), Code, hipe_icode:call_dstlist(I)}; + false -> + divideAtCall(Code, Caller, [I|Tail]) + end; +divideAtCall([], _, Tail) -> {Tail, []}. + +findType(CodeAfter, OldVar) -> + findType(CodeAfter, OldVar, [], {none, none}). + +findType([I|Code], OldVar, Rest, Succs) -> + case hipe_icode:is_type(I) of + true -> + case hipe_icode:type_args(I) =:= OldVar of + true -> + TrueLab = hipe_icode:type_true_label(I), + FalseLab = hipe_icode:type_false_label(I), + findType(Code, OldVar, Rest, {TrueLab, FalseLab}); + false -> + findType(Code, OldVar, [I|Rest], Succs) + end; + false -> + case hipe_icode:is_move(I) of + true -> + case [hipe_icode:move_src(I)] =:= OldVar of + true -> + findType(Code, hipe_icode:move_dst(I), [I|Rest], Succs); + false -> + findType(Code, OldVar, [I|Rest], Succs) + end; + false -> + findType(Code, OldVar, [I|Rest], Succs) + end + end; +findType([],_,Rest, {TrueLab, FalseLab}) -> + {lists:reverse(Rest), TrueLab, FalseLab}. + +%% Nesting hell... check for redundancies. +%% --------------------------------------- +removeUnElems(Code, OldVars, DstLst) -> + removeUnElems(Code, OldVars, DstLst, [], false, none). + +removeUnElems([I|Code], [OldVar] = OldVars, DstLst, Res, Def, Lab) -> + case isCallPrimop(I, unsafe_element) of + true -> + case (hipe_icode:call_args(I) =:= OldVars) of + true -> + removeUnElems(Code, OldVars, DstLst, Res, Def, Lab); + false -> + case lists:member(OldVar, hipe_icode:call_args(I)) of + true -> + %% XXX: the following test seems redundant, + %% hence commented out -- KOSTIS + %% case Def of + %% true -> + removeUnElems(Code, OldVars, DstLst, [I|Res], Def, Lab); + %% false -> + %% removeUnElems(Code, OldVars, DstLst, + %% [I|Res], Def, Lab) + %% end; + false -> + io:format("Borde aldrig kunna hamna här!", []), + removeUnElems(Code, OldVars, DstLst, [I|Res], Def, Lab) + end + end; + false -> + case hipe_icode:is_move(I) of + true -> + case hipe_icode:move_src(I) =:= OldVar of + true -> + NewVar = hipe_icode:move_dst(I), + removeUnElems(Code, [NewVar], DstLst, [I|Res], Def, Lab); + false -> + removeUnElems(Code, OldVars, DstLst, [I|Res], Def, Lab) + end; + false -> + case hipe_icode:is_type(I) andalso not Def of + true -> + NewFalseLab = case Lab =:= none of + true -> + hipe_gensym:get_next_label(icode); + false -> + Lab + end, + _I2 = updateTypeFalseLabel(I, NewFalseLab), + removeUnElems(Code, OldVars, DstLst, [I|Res], Def, NewFalseLab); + false -> + case lists:member(OldVar, hipe_icode:uses(I)) andalso Def of + true -> + removeUnElems(Code, OldVars, DstLst, [I|Res], Def, Lab); + false -> + case lists:member(OldVar, hipe_icode:defines(I)) of + true -> + removeUnElems(Code, OldVars, DstLst, [I|Res], true, Lab); + false -> + removeUnElems(Code, OldVars, DstLst, [I|Res], Def, Lab) + end + end + end + end + end; +removeUnElems([], _, _, Res,_, Lab) -> {lists:reverse(Res), Lab}. + + +updateTypeFalseLabel(Instr, NewFalseLabel) -> + TrueLabel = hipe_icode:type_true_label(Instr), + Args = hipe_icode:type_args(Instr), + Type = hipe_icode:type_test(Instr), + hipe_icode:mk_type(Args, Type, TrueLabel, NewFalseLabel). + + +resolveSuccBlocks(Succs, OldVar, DstLst, Visited, FailLab, Cfg) -> + NewSuccs = [X || X <- Succs, not lists:member(X, Visited)], + resolveSuccBlocks2(NewSuccs, OldVar, DstLst, Visited, FailLab, Cfg). + +resolveSuccBlocks2([Succ|Succs], OldVar, DstLst, Vis, FailLab, Cfg) -> + Block = hipe_icode_cfg:bb(Cfg,Succ), + Code = hipe_bb:code(Block), + {NewCode, ReDefined} = checkUsesDefs(Code, OldVar, DstLst, FailLab), + NewBlock = hipe_bb:code_update(Block, NewCode), + Cfg2 = hipe_icode_cfg:bb_add(Cfg, Succ, NewBlock), + case ReDefined of + true -> + resolveSuccBlocks2(Succs, OldVar, DstLst, [Succ|Vis], FailLab, Cfg2); + false -> + NewSuccs = hipe_icode_cfg:succ(Cfg, Succ), + NewSuccs2 = [X || X <- NewSuccs, not lists:member(X, Vis++Succs)], + resolveSuccBlocks2(NewSuccs2++Succs, OldVar, DstLst, + [Succ|Vis], FailLab, Cfg2) + end; +resolveSuccBlocks2([], _, _, _, _, Cfg) -> Cfg. + + +checkUsesDefs(Code, OldVar, DstLst, FailLab) -> + checkUsesDefs(Code, OldVar, DstLst, FailLab, [], false). + +checkUsesDefs([I|Code], OldVar, DstLst, FailLab, Res, Defined) -> + [OVar] = OldVar, + case hipe_icode:is_move(I) of + true -> + case hipe_icode:move_src(I) =:= OVar of + true -> + NewVar = hipe_icode:move_dst(I), + checkUsesDefs(Code, NewVar, DstLst, FailLab, [I|Res], true); + false -> + case lists:member(OVar, hipe_icode:defines(I)) of + true -> + checkUsesDefs(Code, OldVar, DstLst, FailLab, [I|Res], true); + false -> + checkUsesDefs(Code, OldVar, DstLst, FailLab, [I|Res], Defined) + end + end; + false -> + case hipe_icode:is_type(I) andalso not Defined of + true -> + case FailLab =/= none of + true -> + _I2 = updateTypeFalseLabel(I, FailLab), + checkUsesDefs(Code, OldVar, DstLst, FailLab, [I|Res], Defined); + false -> + checkUsesDefs(Code, OldVar, DstLst, FailLab, [I|Res], Defined) + end; + false -> + case (lists:member(OVar, hipe_icode:uses(I))) andalso + (not Defined) andalso (FailLab =/= none) of + true -> + Tpl = hipe_icode:mk_primop(OldVar, mktuple, DstLst), + checkUsesDefs(Code, OldVar, DstLst, FailLab, [I,Tpl|Res], true); + false -> + case lists:member(OVar, hipe_icode:defines(I)) of + true -> + checkUsesDefs(Code, OldVar, DstLst, FailLab, [I|Res], true); + false -> + checkUsesDefs(Code, OldVar, DstLst, FailLab, [I|Res],Defined) + end + end + end + end; +checkUsesDefs([], _, _, _, Res, Defined) -> {lists:reverse(Res), Defined}. + + +insertMiddleFailBlock(Cfg, NewFailLabel, OldFailLabel, OldVar, DstLst) -> + case NewFailLabel =:= none of + true -> + Cfg; + false -> + NewCode = [hipe_icode:mk_primop(OldVar, mktuple, DstLst), + hipe_icode:mk_goto(OldFailLabel)], + NewBlock = hipe_bb:mk_bb(NewCode), + hipe_icode_cfg:bb_add(Cfg, NewFailLabel, NewBlock) + end. + + +isCallLocal(Instr, Fun) -> + hipe_icode:is_call(Instr) andalso (hipe_icode:call_type(Instr) =:= local) + andalso (hipe_icode:call_fun(Instr) =:= Fun). + +isCallPrimop(Instr, Fun) -> + case hipe_icode:is_call(Instr) of + true -> + case is_tuple(hipe_icode:call_fun(Instr)) of + true -> + ((hipe_icode:call_type(Instr) =:= primop) andalso + (element(1,hipe_icode:call_fun(Instr)) =:= Fun)); + false -> + ((hipe_icode:call_type(Instr) =:= primop) andalso + (hipe_icode:call_fun(Instr) =:= Fun)) + end; + false -> + false + end. + + +%% >-------------------------< Debug code >------------------------------< + +-ifdef(DEBUG_MULRET). + +%%>----------------------------------------------------------------------< +%% Procedure : printTable/1 +%% Purpose : +%% Arguments : +%% Return : +%% Notes : +%%>----------------------------------------------------------------------< +printTable(Mod, Exports, {FunLst, CallLst}) -> + {Y,Mo,D} = date(), + {H,Mi,S} = time(), + io:format("Module: ~w - (~w/~w-~w, ~w:~w:~w)~n=======~n", + [Mod,D,Mo,Y,H,Mi,S]), + io:format("Exports: ~w~n", [Exports]), + io:format("FunList: ~n"), + printFunList(FunLst), + io:format("CallList: ~n"), + printCallList(CallLst). + +printFunList([Fun|FunLst]) -> + io:format(" ~w~n", [Fun]), + printFunList(FunLst); +printFunList([]) -> io:format("~n"). + +printCallList([Call|CallLst]) -> + io:format(" ~w~n", [Call]), + printCallList(CallLst); +printCallList([]) -> io:format("~n"). + +-endif. + +%% >----------------------------< Old code >--------------------------------< + +%% %%>----------------------------------------------------------------------< +%% % Procedure : findCallCode/3 +%% % Purpose : +%% % Arguments : +%% % Return : +%% % Notes : +%% %%>----------------------------------------------------------------------< +%% findCallCode(List, Callee, DstLst) -> findCallCode(List, Callee, DstLst, []). +%% findCallCode([I=#icode_call{'fun'=Callee, dstlist=Var, type=local}, I2, I3|List], +%% Callee, DstLst, Res) -> +%% NewList = removeUnElems(List, Var), +%% %% _Uses = checkForUses(NewList, Var, DstLst), +%% Size = length(DstLst), +%% case I2 of +%% #icode_type{test={tuple, Size}, args=Var, true_label=Label} -> +%% case I3 of +%% #icode_label{name=Label} -> +%% findCallCode(NewList, Callee, DstLst, +%% [I#icode_call{dstlist=DstLst}|Res]); +%% _ -> +%% findCallCode(NewList, Callee, DstLst, +%% [#goto{label=Label}, +%% I#icode_call{dstlist=DstLst}|Res]) +%% end; +%% _ -> +%% findCallCode(NewList, Callee, DstLst, +%% [I2,I#icode_call{dstlist=DstLst}|Res]) +%% end; +%% findCallCode([I|List], Callee, DstLst, Res) -> +%% findCallCode(List, Callee, DstLst, [I|Res]); +%% findCallCode([], _, _, Res) -> lists:reverse(Res). + + +%% %%>----------------------------------------------------------------------< +%% % Procedure : checkForUses +%% % Purpose : +%% % Arguments : +%% % Return : +%% % Notes : +%% %%>----------------------------------------------------------------------< +%% checkForUses(List, Var, Dsts) -> checkForUses(List, Var, Dsts, [], List). +%% checkForUses([I|List], Var, Dsts, Rest, Code) -> +%% Defs = hipe_icode:defines(I), +%% Uses = hipe_icode:uses(I), +%% case lists:member(Var, Uses) of +%% true -> +%% true; +%% false -> +%% case lists:member(Var, Defs) of +%% true -> +%% false; +%% false -> +%% case hipe_icode:is_branch(I) of +%% true -> +%% Succs = hipe_icode:successors(I), +%% checkSuccsForUses(Succs, Var, Dsts, Rest, Code); +%% false -> +%% checkForUses(List, Var, Dsts, [I|Rest], Code) +%% end +%% end +%% end; +%% checkForUses([], _, _, _, _) -> false. + +%% checkSuccsForUses(Succs, Var, Dsts, Rest, Code) -> +%% checkSuccsForUses(Succs, Var, Dsts, Rest, Code, false). +%% checkSuccsForUses([S|Succs], Var, Dsts, Rest, Code, Res) -> +%% List = gotoLabel(S, Code), +%% Used = checkForUses(List, Var, Dsts, Rest, Code), +%% checkSuccsForUses(Succs, Var, Code, Dsts, Used andalso Res); +%% checkSuccsForUses([], _, _, _, _, Res) -> Res. + +%% gotoLabel(L, [L|List]) -> List; +%% gotoLabel(L, [_|List]) -> gotoLabel(L, List); +%% gotoLabel(_, []) -> []. + + +%% %%>----------------------------------------------------------------------< +%% % Procedure : removeUnElems/2 +%% % Purpose : +%% % Arguments : +%% % Return : +%% % Notes : Fixa så att funktionen använder defines(I) istället och +%% % selektorer istället för att matcha på #call{}. Lätt gjort. +%% %%>----------------------------------------------------------------------< +%% removeUnElems(List, Var) -> removeUnElems(List, Var, []). +%% removeUnElems([#icode_call{'fun'={unsafe_element,_}, args=Var}|List], Var, Res) -> +%% removeUnElems(List, Var, Res); +%% removeUnElems([I=#icode_move{dst=Var}|List], [Var], Res) -> +%% lists:reverse(Res) ++ [I|List]; +%% removeUnElems([I=#icode_call{dstlist=Var}|List], Var, Res) -> +%% lists:reverse(Res) ++ [I|List]; +%% removeUnElems([I|List], Var, Res) -> +%% removeUnElems(List, Var, [I|Res]); +%% removeUnElems([], _, Res) -> lists:reverse(Res). + +%% removeUnElems(List, Var) -> removeUnElems(List, Var, []). +%% removeUnElems([I|List], Var, Res) -> +%% Defs = hipe_icode:defines(I), +%% case hipe_icode:is_call(I) of +%% true -> +%% Fn = hipe_icode:call_fun(I), +%% case (hipe_icode:call_args(I) =:= Var) andalso is_tuple(Fn) of +%% true -> +%% case element(1,Fn) =:= unsafe_element of +%% true -> +%% removeUnElems(List, Var, Res); +%% false -> +%% case lists:member(Var, Defs) of +%% true -> +%% lists:reverse(Res) ++ [I|List]; +%% false -> +%% removeUnElems(List, Var, [I|Res]) +%% end +%% end; +%% false -> +%% case lists:member(Var, Defs) of +%% true -> +%% lists:reverse(Res) ++ [I|List]; +%% false -> +%% removeUnElems(List, Var, [I|Res]) +%% end +%% end; +%% false -> +%% case lists:member(Var, Defs) of +%% true -> +%% lists:reverse(Res) ++ [I|List]; +%% false -> +%% removeUnElems(List, Var, [I|Res]) +%% end +%% end; +%% removeUnElems([], _, Res) -> lists:reverse(Res). + + +%% Old findDefine that also could update it. +%% ----------------------------------------- +%% findDefine(Code, Var) -> findDefine(Code, Var, [], []). +%% findDefine([#icode_call{dstlist=Var,'fun'=mktuple,args=Vs}|Code],Var,NewCode,_)-> +%% findDefine(Code, Var, NewCode, Vs); +%% findDefine([I=#icode_move{dst=Var, src=Src}|Code], [Var], NewCode, _) -> +%% case Src of +%% #icode_var{} -> +%% findDefine(Code, [Src], [I|NewCode], [Src]); +%% #icode_const{value={flat, Tuple}} -> +%% findDefine(Code, [Var], [I|NewCode], []) %% Check this case! [Var] +%% end; +%% findDefine([I|Code], Var, NewCode, Vars) -> +%% findDefine(Code, Var, [I|NewCode], Vars); +%% findDefine([], _, NewCode, Vars) -> +%% case Vars of +%% [] -> +%% notFound; +%% [_] -> +%% {notFound, Vars}; +%% _ -> +%% {found, lists:reverse(NewCode), Vars} +%% end. + +%% modifyCode(Code, Var) -> +%% [#icode_return{vars=Var}|Code2] = lists:reverse(Code), +%% case (length(Var) =< hipe_rtl_arch:nr_of_return_regs()) of +%% true -> +%% {Arity, Code3} = modifyCode(Code2, Var, []), +%% {Arity, Code3}; +%% false -> +%% {1, Code} +%% end. + +%% modifyCode([I|Code], Var, Res) -> +%% case scanInstr(I, Var) of +%% {move, Arity, VarLst} -> +%% Code2 = [#icode_return{vars=VarLst}, I |lists:reverse(Res) ++ Code], +%% {Arity, lists:reverse(Code2)}; +%% {mktuple, Arity, VarLst} -> +%% Code2 = [#icode_return{vars=VarLst}|lists:reverse(Res) ++ Code], +%% {Arity, lists:reverse(Code2)}; +%% other -> +%% modifyCode(Code, Var, [I|Res]) +%% end; +%% modifyCode([], Var, Res) -> +%% {1, lists:reverse(Res) ++ [#icode_return{vars=Var}]}. + +%% scanInstr(#icode_call{dstlist=Var, 'fun'=mktuple, args=Lst}, Var) -> +%% {mktuple, length(Lst), Lst}; +%% scanInstr(_, _) -> other. + +%% printCode(Cfg) -> +%% Labels = hipe_icode_cfg:labels(Cfg), +%% {_,_,{_,F,_,_,_,_,_,_},_} = Cfg, +%% io:format("~nFunction: ~w~n", [F]), +%% Print = fun(Label) -> +%% Block = hipe_icode_cfg:bb(Cfg, Label), +%% Code = hipe_bb:code(Block), +%% io:format("Label: ~w~n", [Label]), +%% lists:foreach(fun(I) -> io:format("~w~n", [I]) end, Code), +%% io:format("~n") +%% end, +%% lists:foreach(Print, Labels). + +%% printList(File, [{MFA, #icode{code=Code, params=Parms}}|List]) -> +%% io:format(File, "MFA: ~w - Params: ~w~n", [MFA, Parms]), +%% printList2(File, Code), +%% printList(File, List); +%% printList(_, []) -> ok. + +%% printList2(File, []) -> io:format(File, "~n~n", []); +%% printList2(File, IList) when is_list(IList) -> +%% [I|List] = IList, +%% io:format(File, "~w~n", [I]), +%% printList2(File, List); +%% printList2(File, SomethingElse) -> +%% io:format(File, "Got: ~w~n", [SomethingElse]). + +%% optimizeDefine([#icode_call{dstlist=Var,'fun'=mktuple,args=Vs}|Code], +%% Var, _, Res) -> +%% case Vs of +%% [_] -> +%% {none, noOpt}; +%% _ -> +%% optimizeDefine(Code, Var, Vs, Res) +%% end; +%% optimizeDefine([I=#icode_move{dst=Var, src=Src}|Code], [Var], Rets, Res) -> +%% case hipe_icode:is_var(Src) of +%% true -> +%% optimizeDefine(Code, [Src], Rets, Res); +%% false -> +%% case Src of +%% #icode_const{value={flat, Tuple}} when is_tuple(Tuple) -> +%% optimizeDefine(Code, [Var], tuple_to_list(Tuple), [I|Res]); +%% #icode_const{value={flat, _}} -> +%% {none, noOpt}; +%% _ -> +%% optimizeDefine(Code, [Var], Rets, [I|Res]) +%% end +%% end; +%% optimizeDefine([I|Code], Var, Rets, Res) -> +%% optimizeDefine(Code, Var, Rets, [I|Res]); +%% optimizeDefine([], Var, Rets, Res) -> +%% case Rets of +%% [] -> +%% {none, Res, Var}; +%% _ -> +%% {found, Res, Rets} +%% end. diff --git a/lib/hipe/icode/hipe_icode_pp.erl b/lib/hipe/icode/hipe_icode_pp.erl new file mode 100755 index 0000000000..575bbfe43d --- /dev/null +++ b/lib/hipe/icode/hipe_icode_pp.erl @@ -0,0 +1,303 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2003-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Copyright (c) 2003 by Erik Stenman. +%% ==================================================================== +%% Filename : hipe_icode_pp.erl +%% Module : hipe_icode_pp +%% Purpose : Pretty-printer for Icode. +%% Notes : +%% History : * 2003-04-16 (stenman@epfl.ch): Created. +%% CVS : +%% $Author$ +%% $Date$ +%% $Revision$ +%% ==================================================================== +%% +%% @doc +%% Icode Pretty-Printer. +%% @end +%% +%% ==================================================================== + +-module(hipe_icode_pp). + +-export([pp/1, pp/2, pp_block/1]). + +-ifdef(DEBUG_ICODE). +-export([pp_instrs/2]). +-endif. + +-include("hipe_icode.hrl"). + +%%--------------------------------------------------------------------- + +-spec pp(#icode{}) -> 'ok'. +%% @doc Prettyprints linear Icode on stdout. +%%

Badly formed or unknown instructions are printed surrounded +%% by three stars "***".

+pp(Icode) -> + pp(standard_io, Icode). + +-spec pp(io:device(), #icode{}) -> 'ok'. +%% @doc Prettyprints linear Icode on IoDevice. +%%

Badly formed or unknown instructions are printed surrounded by +%% three stars "***".

+pp(Dev, Icode) -> + {Mod, Fun, Arity} = hipe_icode:icode_fun(Icode), + Args = hipe_icode:icode_params(Icode), + io:format(Dev, "~w:~w/~w(", [Mod, Fun, Arity]), + pp_args(Dev, Args), + io:format(Dev, ") ->~n", []), + io:format(Dev, "%% Info:~p\n", + [[case hipe_icode:icode_is_closure(Icode) of + true -> 'Closure'; + false -> 'Not a closure' + end, + case hipe_icode:icode_is_leaf(Icode) of + true -> 'Leaf function'; + false -> 'Not a leaf function' + end | + hipe_icode:icode_info(Icode)]]), + pp_instrs(Dev, hipe_icode:icode_code(Icode)), + io:format(Dev, "%% Data:\n", []), + hipe_data_pp:pp(Dev, hipe_icode:icode_data(Icode), icode, ""). + +-spec pp_block(icode_instrs()) -> 'ok'. +pp_block(Code) -> + pp_instrs(standard_io, Code). + +-spec pp_instrs(io:device(), icode_instrs()) -> 'ok'. +%% @doc Prettyprints a list of Icode instructions. +pp_instrs(Dev, Is) -> + lists:foreach(fun (I) -> pp_instr(Dev, I) end, Is). + +%%--------------------------------------------------------------------- + +pp_instr(Dev, I) -> + case I of + #icode_label{} -> + io:format(Dev, "~p:~n", [hipe_icode:label_name(I)]); + #icode_comment{} -> + Txt = hipe_icode:comment_text(I), + Str = case io_lib:deep_char_list(Txt) of + true -> Txt; + false -> io_lib:format("~p", [Txt]) + end, + io:format(Dev, " % ~s~n", [Str]); + #icode_phi{} -> + io:format(Dev, " ", []), + pp_arg(Dev, hipe_icode:phi_dst(I)), + io:format(Dev, " := phi(", []), + pp_phi_args(Dev, hipe_icode:phi_arglist(I)), + io:format(Dev, ")~n", []); + #icode_move{} -> + io:format(Dev, " ", []), + pp_arg(Dev, hipe_icode:move_dst(I)), + io:format(Dev, " := ", []), + pp_arg(Dev, hipe_icode:move_src(I)), + io:format(Dev, "~n", []); + #icode_call{} -> + io:format(Dev, " ", []), + case hipe_icode:call_dstlist(I) of + [] -> %% result is unused -- e.g. taken out by dead code elimination + io:format(Dev, "_ := ", []); + DstList -> + pp_args(Dev, DstList), + io:format(Dev, " := ", []) + end, + pp_fun(Dev, hipe_icode:call_fun(I), + hipe_icode:call_args(I), + hipe_icode:call_type(I), + hipe_icode:call_in_guard(I)), + case hipe_icode:call_continuation(I) of + [] -> + ok; + CC -> + io:format(Dev, " -> ~w", [CC]) + end, + case hipe_icode:call_fail_label(I) of + [] -> io:format(Dev, "~n", []); + Fail -> io:format(Dev, ", #fail ~w~n", [Fail]) + end; + #icode_enter{} -> + io:format(Dev, " ", []), + pp_fun(Dev, hipe_icode:enter_fun(I), + hipe_icode:enter_args(I), + hipe_icode:enter_type(I)), + io:format(Dev, "~n", []); + #icode_return{} -> + io:format(Dev, " return(", []), + pp_args(Dev, hipe_icode:return_vars(I)), + io:format(Dev, ")~n", []); + #icode_begin_try{} -> + io:format(Dev, " begin_try -> ~w cont ~w~n", + [hipe_icode:begin_try_label(I), + hipe_icode:begin_try_successor(I)]); + #icode_begin_handler{} -> + io:format(Dev, " ", []), + pp_args(Dev, hipe_icode:begin_handler_dstlist(I)), + io:format(Dev, " := begin_handler()~n",[]); + #icode_end_try{} -> + io:format(Dev, " end_try~n", []); + #icode_fail{} -> + Type = hipe_icode:fail_class(I), + io:format(Dev, " fail(~w, [", [Type]), + pp_args(Dev, hipe_icode:fail_args(I)), + case hipe_icode:fail_label(I) of + [] -> io:put_chars(Dev, "])\n"); + Fail -> io:format(Dev, "]) -> ~w\n", [Fail]) + end; + #icode_if{} -> + io:format(Dev, " if ~w(", [hipe_icode:if_op(I)]), + pp_args(Dev, hipe_icode:if_args(I)), + io:format(Dev, ") then ~p (~.2f) else ~p~n", + [hipe_icode:if_true_label(I), hipe_icode:if_pred(I), + hipe_icode:if_false_label(I)]); + #icode_switch_val{} -> + io:format(Dev, " switch_val ",[]), + pp_arg(Dev, hipe_icode:switch_val_term(I)), + pp_switch_cases(Dev, hipe_icode:switch_val_cases(I)), + io:format(Dev, " fail -> ~w\n", + [hipe_icode:switch_val_fail_label(I)]); + #icode_switch_tuple_arity{} -> + io:format(Dev, " switch_tuple_arity ",[]), + pp_arg(Dev, hipe_icode:switch_tuple_arity_term(I)), + pp_switch_cases(Dev,hipe_icode:switch_tuple_arity_cases(I)), + io:format(Dev, " fail -> ~w\n", + [hipe_icode:switch_tuple_arity_fail_label(I)]); + #icode_type{} -> + io:format(Dev, " if is_", []), + pp_type(Dev, hipe_icode:type_test(I)), + io:format(Dev, "(", []), + pp_args(Dev, hipe_icode:type_args(I)), + io:format(Dev, ") then ~p (~.2f) else ~p~n", + [hipe_icode:type_true_label(I), hipe_icode:type_pred(I), + hipe_icode:type_false_label(I)]); + #icode_goto{} -> + io:format(Dev, " goto ~p~n", [hipe_icode:goto_label(I)]) + end. + +pp_fun(Dev, Fun, Args, Type) -> + pp_fun(Dev, Fun, Args, Type, false). + +pp_fun(Dev, Fun, Args, Type, Guard) -> + case Type of + primop -> + hipe_icode_primops:pp(Dev, Fun); + local -> + {_,F,A} = Fun, + io:format(Dev, "~w/~w", [F, A]); + remote -> + {M,F,A} = Fun, + io:format(Dev, "~w:~w/~w", [M, F, A]) + end, + io:format(Dev, "(", []), + pp_args(Dev, Args), + case Guard of + true -> + case Type of + primop -> + io:format(Dev, ") (primop,guard)", []); + _ -> + io:format(Dev, ") (guard)", []) + end; + false -> + case Type of + primop -> + io:format(Dev, ") (primop)", []); + _ -> + io:format(Dev, ")", []) + end + end. + +pp_arg(Dev, Arg) -> + case hipe_icode:is_variable(Arg) of + true -> + case hipe_icode:is_var(Arg) of + true -> + N = hipe_icode:var_name(Arg), + io:format(Dev, "v~p", [N]); + false -> + case hipe_icode:is_reg(Arg) of + true -> + N = hipe_icode:reg_name(Arg), + io:format(Dev, "r~p", [N]); + false -> + N = hipe_icode:fvar_name(Arg), + io:format(Dev, "fv~p", [N]) + end + end, + case hipe_icode:is_annotated_variable(Arg) of + true -> + {_,Val,Fun} = hipe_icode:variable_annotation(Arg), + io:format(Dev, " (~s)", [Fun(Val)]); + false -> + ok + end; + false -> + Const = hipe_icode:const_value(Arg), + io:format(Dev, "~p", [Const]) % ~p because it also prints "" + end. + +pp_args(_Dev, []) -> ok; +pp_args(Dev, [A]) -> + pp_arg(Dev, A); +pp_args(Dev, [A|Args]) -> + pp_arg(Dev, A), + io:format(Dev, ", ", []), + pp_args(Dev, Args). + +pp_phi_args(_Dev, []) -> ok; +pp_phi_args(Dev, [{Pred,A}]) -> + io:format(Dev, "{~w, ", [Pred]), + pp_arg(Dev, A), + io:format(Dev, "}", []); +pp_phi_args(Dev, [{Pred,A}|Args]) -> + io:format(Dev, "{~w, ", [Pred]), + pp_arg(Dev, A), + io:format(Dev, "}, ", []), + pp_phi_args(Dev, Args). + +pp_type(Dev, T) -> + io:format(Dev, "~w", [T]). + +pp_switch_cases(Dev, Cases) -> + io:format(Dev, " of\n",[]), + pp_switch_cases(Dev, Cases,1), + io:format(Dev, "",[]). + +pp_switch_cases(Dev, [{Val,L}], _Pos) -> + io:format(Dev, " ",[]), + pp_arg(Dev, Val), + io:format(Dev, " -> ~w\n", [L]); +pp_switch_cases(Dev, [{Val, L}|Ls], Pos) -> + io:format(Dev, " ",[]), + pp_arg(Dev, Val), + io:format(Dev, " -> ~w;\n", [L]), + NewPos = Pos, + %% case Pos of + %% 5 -> io:format(Dev, "\n ",[]), + %% 0; + %% N -> N + 1 + %% end, + pp_switch_cases(Dev, Ls, NewPos); +pp_switch_cases(_Dev, [], _) -> ok. + diff --git a/lib/hipe/icode/hipe_icode_primops.erl b/lib/hipe/icode/hipe_icode_primops.erl new file mode 100644 index 0000000000..b0fe7eb708 --- /dev/null +++ b/lib/hipe/icode/hipe_icode_primops.erl @@ -0,0 +1,963 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2001-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Copyright (c) 2001 by Erik Johansson. All Rights Reserved +%% ==================================================================== +%% Filename : hipe_icode_primops.erl +%% Module : hipe_icode_primops +%% Purpose : +%% Notes : +%% History : * 2001-06-13 Erik Johansson (happi@it.uu.se): +%% Created. +%% +%% $Id$ +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +-module(hipe_icode_primops). + +-export([is_safe/1, fails/1, pp/2, type/1, type/2, arg_types/1]). + +-include("hipe_icode.hrl"). +-include("hipe_icode_primops.hrl"). + +%%--------------------------------------------------------------------- + +%% Note that 'unsafe_...' operations are generally "safe", i.e., it is +%% typically unsafe to use them unless you have extra information about +%% the call (e.g., if the types are known). However, if they have been +%% correctly introduced in the code, most of them are also OK to remove +%% if the result is not used. + +-spec is_safe(icode_primop()) -> boolean(). + +is_safe('+') -> false; +is_safe('/') -> false; +is_safe('*') -> false; +is_safe('-') -> false; +is_safe('bsr') -> false; +is_safe('bsl') -> false; +is_safe('band') -> false; +is_safe('bor') -> false; +is_safe('bxor') -> false; +is_safe('bnot') -> false; +is_safe('div') -> false; +is_safe('rem') -> false; +is_safe(call_fun) -> false; +is_safe(check_get_msg) -> false; +is_safe(clear_timeout) -> false; +is_safe(cons) -> true; +%% is_safe(conv_to_float) -> false; +is_safe(extra_unsafe_add) -> true; +is_safe(extra_unsafe_sub) -> true; +is_safe(fcheckerror) -> false; +is_safe(fclearerror) -> false; +is_safe(fp_add) -> false; +is_safe(fp_div) -> false; +is_safe(fp_mul) -> false; +is_safe(fp_sub) -> false; +is_safe(mktuple) -> true; +is_safe(next_msg) -> false; +is_safe(redtest) -> false; +is_safe(select_msg) -> false; +is_safe(self) -> true; +is_safe(set_timeout) -> false; +is_safe(suspend_msg) -> false; +is_safe(unsafe_add) -> true; +is_safe(unsafe_band) -> true; +is_safe(unsafe_bnot) -> true; +is_safe(unsafe_bor) -> true; +is_safe(unsafe_bsl) -> true; +is_safe(unsafe_bsr) -> true; +is_safe(unsafe_bxor) -> true; +is_safe(unsafe_hd) -> true; +is_safe(unsafe_sub) -> true; +is_safe(unsafe_tag_float) -> true; +is_safe(unsafe_tl) -> true; +is_safe(unsafe_untag_float) -> true; +is_safe(#apply_N{}) -> false; +is_safe(#closure_element{}) -> true; +is_safe(#element{}) -> false; +%% is_safe(#gc_test{}) -> ??? +is_safe({hipe_bs_primop, {bs_start_match, _}}) -> false; +is_safe({hipe_bs_primop, {{bs_start_match, bitstr}, _}}) -> true; +is_safe({hipe_bs_primop, {{bs_start_match, ok_matchstate}, _}}) -> false; +is_safe({hipe_bs_primop, {bs_get_binary, _, _}}) -> false; +is_safe({hipe_bs_primop, {bs_get_binary_all, _, _}}) -> false; +is_safe({hipe_bs_primop, {bs_get_binary_all_2, _, _}}) -> false; +is_safe({hipe_bs_primop, {bs_get_integer, _, _}}) -> false; +is_safe({hipe_bs_primop, {bs_get_float, _, _}}) -> false; +is_safe({hipe_bs_primop, {bs_skip_bits, _}}) -> false; +is_safe({hipe_bs_primop, {bs_skip_bits_all, _, _}}) -> false; +is_safe({hipe_bs_primop, {bs_test_tail, _}}) -> false; +is_safe({hipe_bs_primop, {bs_restore, _}}) -> true; +is_safe({hipe_bs_primop, {bs_save, _}}) -> true; +is_safe({hipe_bs_primop, {bs_add, _}}) -> false; +is_safe({hipe_bs_primop, {bs_add, _, _}}) -> false; +is_safe({hipe_bs_primop, bs_bits_to_bytes}) -> false; +is_safe({hipe_bs_primop, bs_bits_to_bytes2}) -> false; +is_safe({hipe_bs_primop, {bs_init, _}}) -> false; +is_safe({hipe_bs_primop, {bs_init, _, _}}) -> false; +is_safe({hipe_bs_primop, {bs_init_bits, _}}) -> false; +is_safe({hipe_bs_primop, {bs_init_bits, _, _}}) -> false; +is_safe({hipe_bs_primop, {bs_put_binary, _, _}}) -> false; +is_safe({hipe_bs_primop, {bs_put_binary_all, _}}) -> false; +is_safe({hipe_bs_primop, {bs_put_float, _, _, _}}) -> false; +is_safe({hipe_bs_primop, {bs_put_integer, _, _, _}}) -> false; +is_safe({hipe_bs_primop, {bs_put_string, _, _}}) -> false; +is_safe({hipe_bs_primop, bs_put_utf8}) -> false; +is_safe({hipe_bs_primop, bs_utf8_size}) -> true; +is_safe({hipe_bs_primop, bs_get_utf8}) -> false; +is_safe({hipe_bs_primop, bs_utf16_size}) -> true; +is_safe({hipe_bs_primop, {bs_put_utf16, _}}) -> false; +is_safe({hipe_bs_primop, {bs_get_utf16, _}}) -> false; +is_safe({hipe_bs_primop, bs_validate_unicode}) -> false; +is_safe({hipe_bs_primop, bs_validate_unicode_retract}) -> false; +is_safe({hipe_bs_primop, {unsafe_bs_put_integer, _, _, _}}) -> false; +is_safe({hipe_bs_primop, bs_final}) -> true; +is_safe({hipe_bs_primop, bs_context_to_binary}) -> true; +is_safe({hipe_bs_primop, {bs_test_unit, _}}) -> false; +is_safe({hipe_bs_primop, {bs_match_string, _, _}}) -> false; +is_safe({hipe_bs_primop, {bs_append, _, _, _, _}}) -> false; +is_safe({hipe_bs_primop, {bs_private_append, _, _}}) -> false; +is_safe({hipe_bs_primop, bs_init_writable}) -> true; +is_safe(#mkfun{}) -> true; +is_safe(#unsafe_element{}) -> true; +is_safe(#unsafe_update_element{}) -> true. + + +-spec fails(icode_funcall()) -> boolean(). + +fails('+') -> true; +fails('-') -> true; +fails('*') -> true; +fails('/') -> true; +fails('bnot') -> true; +fails('band') -> true; +fails('bor') -> true; +fails('bsl') -> true; +fails('bsr') -> true; +fails('bxor') -> true; +fails('div') -> true; +fails('rem') -> true; +fails(call_fun) -> true; +fails(check_get_msg) -> true; +fails(clear_timeout) -> false; +fails(cons) -> false; +fails(conv_to_float) -> true; +fails(extra_unsafe_add) -> false; +fails(extra_unsafe_sub) -> false; +fails(fcheckerror) -> true; +fails(fclearerror) -> false; +fails(fp_add) -> false; +fails(fp_div) -> false; +fails(fp_mul) -> false; +fails(fp_sub) -> false; +fails(mktuple) -> false; +fails(next_msg) -> false; +fails(redtest) -> false; +fails(select_msg) -> false; +fails(self) -> false; +fails(set_timeout) -> true; +fails(suspend_msg) -> false; +fails(unsafe_untag_float) -> false; +fails(unsafe_tag_float) -> false; +fails(unsafe_add) -> false; +fails(unsafe_band) -> false; +fails(unsafe_bnot) -> false; +fails(unsafe_bor) -> false; +fails(unsafe_bsl) -> false; +fails(unsafe_bsr) -> false; +fails(unsafe_bxor) -> false; +fails(unsafe_hd) -> false; +fails(unsafe_sub) -> false; +%% fails(unsafe_tag_float) -> false; +fails(unsafe_tl) -> false; +%% fails(unsafe_untag_float) -> false; +fails(#apply_N{}) -> true; +fails(#closure_element{}) -> false; +fails(#element{}) -> true; +%% fails(#gc_test{}) -> ??? +fails({hipe_bs_primop, {bs_start_match, _}}) -> true; +fails({hipe_bs_primop, {{bs_start_match, bitstr}, _}}) -> true; +fails({hipe_bs_primop, {{bs_start_match, ok_matchstate}, _}}) -> false; +fails({hipe_bs_primop, {bs_get_binary, _, _}}) -> true; +fails({hipe_bs_primop, {bs_get_binary_all, _, _}}) -> true; +fails({hipe_bs_primop, {bs_get_binary_all_2, _, _}}) -> true; +fails({hipe_bs_primop, {bs_get_integer, _, _}}) -> true; +fails({hipe_bs_primop, {bs_get_float, _, _}}) -> true; +fails({hipe_bs_primop, {bs_skip_bits, _}}) -> true; +fails({hipe_bs_primop, {bs_skip_bits_all, _, _}}) -> true; +fails({hipe_bs_primop, {bs_test_tail, _}}) -> true; +fails({hipe_bs_primop, {bs_restore, _}}) -> false; +fails({hipe_bs_primop, {bs_save, _}}) -> false; +fails({hipe_bs_primop, bs_context_to_binary}) -> false; +fails({hipe_bs_primop, {bs_test_unit, _}}) -> true; +fails({hipe_bs_primop, {bs_match_string, _, _}}) -> true; +fails({hipe_bs_primop, {bs_add, _}}) -> true; +fails({hipe_bs_primop, {bs_add, _, _}}) -> true; +fails({hipe_bs_primop, bs_bits_to_bytes}) -> true; +fails({hipe_bs_primop, bs_bits_to_bytes2}) -> true; +fails({hipe_bs_primop, {bs_init, _}}) -> true; +fails({hipe_bs_primop, {bs_init, _, _}}) -> true; +fails({hipe_bs_primop, {bs_init_bits, _}}) -> true; +fails({hipe_bs_primop, {bs_init_bits, _, _}}) -> true; +fails({hipe_bs_primop, {bs_put_binary, _, _}}) -> true; +fails({hipe_bs_primop, {bs_put_binary_all, _}}) -> true; +fails({hipe_bs_primop, {bs_put_float, _, _, _}}) -> true; +fails({hipe_bs_primop, {bs_put_integer, _, _, _}}) -> true; +fails({hipe_bs_primop, {bs_put_string, _, _}}) -> true; +fails({hipe_bs_primop, bs_put_utf8}) -> true; +fails({hipe_bs_primop, bs_utf8_size}) -> false; +fails({hipe_bs_primop, bs_get_utf8}) -> true; +fails({hipe_bs_primop, bs_utf16_size}) -> false; +fails({hipe_bs_primop, {bs_put_utf16, _}}) -> true; +fails({hipe_bs_primop, {bs_get_utf16, _}}) -> true; +fails({hipe_bs_primop, bs_validate_unicode}) -> true; +fails({hipe_bs_primop, bs_validate_unicode_retract}) -> true; +fails({hipe_bs_primop, {unsafe_bs_put_integer, _, _, _}}) -> true; +fails({hipe_bs_primop, bs_final}) -> false; +fails({hipe_bs_primop, {bs_append, _, _, _, _}}) -> true; +fails({hipe_bs_primop, {bs_private_append, _, _}}) -> true; +fails({hipe_bs_primop, bs_init_writable}) -> true; +fails(#mkfun{}) -> false; +fails(#unsafe_element{}) -> false; +fails(#unsafe_update_element{}) -> false; +%% Apparently, we are calling fails/1 for all MFAs which are compiled. +%% This is weird and we should restructure the compiler to avoid +%% calling fails/1 for things that are not primops. +fails({M, F, A}) when is_atom(M), is_atom(F), is_integer(A), 0 =< A, A =< 255 -> + %% Yes, we should move this. + not erl_bifs:is_safe(M, F, A). + +%%===================================================================== +%% Pretty printing +%%===================================================================== + +-spec pp(io:device(), icode_primop()) -> 'ok'. + +pp(Dev, Op) -> + case Op of + #apply_N{arity = N} -> + io:format(Dev, "apply_N<~w>/", [N]); + #closure_element{n = N} -> + io:format(Dev, "closure_element<~w>", [N]); + #element{} -> + io:format(Dev, "element", []); + #gc_test{need = N} -> + io:format(Dev, "gc_test<~w>", [N]); + {hipe_bs_primop, BsOp} -> + case BsOp of + {bs_put_binary_all, Flags} -> + io:format(Dev, "bs_put_binary_all<~w>", [Flags]); + {bs_put_binary, Size} -> + io:format(Dev, "bs_put_binary<~w>", [Size]); + {bs_put_binary, Flags, Size} -> + io:format(Dev, "bs_put_binary<~w, ~w>", [Flags, Size]); + {bs_put_float, Flags, Size, _ConstInfo} -> + io:format(Dev, "bs_put_float<~w, ~w>", [Flags, Size]); + {bs_put_string, String, SizeInBytes} -> + io:format(Dev, "bs_put_string<~w, ~w>", [String, SizeInBytes]); + {bs_put_integer, Bits, Flags, _ConstInfo} -> + io:format(Dev, "bs_put_integer<~w, ~w>", [Bits, Flags]); + {unsafe_bs_put_integer, Bits, Flags, _ConstInfo} -> + io:format(Dev, "unsafe_bs_put_integer<~w, ~w>", [Bits, Flags]); + {bs_skip_bits_all, Unit, Flags} -> + io:format(Dev, "bs_skip_bits_all<~w,~w>", [Unit, Flags]); + {bs_skip_bits, Unit} -> + io:format(Dev, "bs_skip_bits<~w>", [Unit]); + {bs_start_match, Max} -> + io:format(Dev, "bs_start_match<~w>", [Max]); + {{bs_start_match, Type}, Max} -> + io:format(Dev, "bs_start_match<~w,~w>", [Type,Max]); + {bs_match_string, String, SizeInBytes} -> + io:format(Dev, "bs_match_string<~w, ~w>", [String, SizeInBytes]); + {bs_get_integer, Size, Flags} -> + io:format(Dev, "bs_get_integer<~w, ~w>", [Size, Flags]); + {bs_get_float, Size, Flags} -> + io:format(Dev, "bs_get_float<~w, ~w>", [Size, Flags]); + {bs_get_binary, Size, Flags} -> + io:format(Dev, "bs_get_binary<~w, ~w>", [Size, Flags]); + {bs_get_binary_all, Unit, Flags} -> + io:format(Dev, "bs_get_binary_all<~w,~w>", [Unit, Flags]); + {bs_get_binary_all_2, Unit, Flags} -> + io:format(Dev, "bs_get_binary_all<~w,~w>", [Unit, Flags]); + {bs_test_tail, NumBits} -> + io:format(Dev, "bs_test_tail<~w>", [NumBits]); + {bs_test_unit, Unit} -> + io:format(Dev, "bs_test_unit<~w>", [Unit]); + bs_context_to_binary -> + io:format(Dev, "bs_context_to_binary", []); + {bs_restore, Index} -> + io:format(Dev, "bs_restore<~w>", [Index]); + {bs_save, Index} -> + io:format(Dev, "bs_save<~w>", [Index]); + {bs_init, Size, Flags} -> + io:format(Dev, "bs_init<~w, ~w>", [Size, Flags]); + {bs_init,Flags} -> + io:format(Dev, "bs_init<~w>", [Flags]); + {bs_init_bits, Size, Flags} -> + io:format(Dev, "bs_init_bits<~w, ~w>", [Size, Flags]); + {bs_init_bits, Flags} -> + io:format(Dev, "bs_init_bits<~w>", [Flags]); + {bs_add, Unit} -> + io:format(Dev, "bs_add<~w>", [Unit]); + {bs_add, Const, Unit} -> + io:format(Dev, "bs_add<~w, ~w>", [Const, Unit]); + {bs_append, X, Y, Z, W} -> + io:format(Dev, "bs_append<~w, ~w, ~w, ~w>", [X, Y, Z, W]); + {bs_private_append, U, Flags} -> + io:format(Dev, "bs_private_append<~w, ~w>", [U, Flags]); + bs_bits_to_bytes -> + io:format(Dev, "bs_bits_to_bytes", []); + bs_bits_to_bytes2 -> + io:format(Dev, "bs_bits_to_bytes2", []); + bs_utf8_size -> + io:format(Dev, "bs_utf8_size", []); + bs_put_utf8 -> + io:format(Dev, "bs_put_utf8", []); + bs_get_utf8 -> + io:format(Dev, "bs_get_utf8", []); + bs_utf16_size -> + io:format(Dev, "bs_utf16_size", []); + {bs_put_utf16, Flags} -> + io:format(Dev, "bs_put_utf16<~w>", [Flags]); + {bs_get_utf16, Flags} -> + io:format(Dev, "bs_get_utf16<~w>", [Flags]); + bs_validate_unicode -> + io:format(Dev, "bs_validate_unicode", []); + bs_validate_unicode_retract -> + io:format(Dev, "bs_validate_unicode_retract", []); + bs_final -> + io:format(Dev, "bs_final", []); + bs_final2 -> + io:format(Dev, "bs_final2", []); + bs_init_writable -> + io:format(Dev, "bs_init_writable", []) + end; + #mkfun{mfa = {Mod, Fun, Arity}, magic_num = Unique, index = I} -> + io:format(Dev, "mkfun<~w,~w,~w,~w,~w>", [Mod, Fun, Arity, Unique, I]); + #unsafe_element{index = N} -> + io:format(Dev, "unsafe_element<~w>", [N]); + #unsafe_update_element{index = N} -> + io:format(Dev, "unsafe_update_element<~w>", [N]); + Fun when is_atom(Fun) -> + io:format(Dev, "~w", [Fun]) + end. + +%%===================================================================== +%% Type handling +%%===================================================================== + +-spec type(icode_funcall(), [erl_types:erl_type()]) -> erl_types:erl_type(). + +type(Primop, Args) -> + case Primop of +%%% ----------------------------------------------------- +%%% Arithops + '+' -> + erl_bif_types:type(erlang, '+', 2, Args); + '-' -> + erl_bif_types:type(erlang, '-', 2, Args); + '*' -> + erl_bif_types:type(erlang, '*', 2, Args); + '/' -> + erl_bif_types:type(erlang, '/', 2, Args); + 'band' -> + erl_bif_types:type(erlang, 'band', 2, Args); + 'bnot' -> + erl_bif_types:type(erlang, 'bnot', 1, Args); + 'bor' -> + erl_bif_types:type(erlang, 'bor', 2, Args); + 'bxor' -> + erl_bif_types:type(erlang, 'bxor', 2, Args); + 'bsl' -> + erl_bif_types:type(erlang, 'bsl', 2, Args); + 'bsr' -> + erl_bif_types:type(erlang, 'bsr', 2, Args); + 'div' -> + erl_bif_types:type(erlang, 'div', 2, Args); + 'rem' -> + erl_bif_types:type(erlang, 'rem', 2, Args); + extra_unsafe_add -> + erl_bif_types:type(erlang, '+', 2, Args); + unsafe_add -> + erl_bif_types:type(erlang, '+', 2, Args); + unsafe_bnot -> + erl_bif_types:type(erlang, 'bnot', 1, Args); + unsafe_bor -> + erl_bif_types:type(erlang, 'bor', 2, Args); + unsafe_band -> + erl_bif_types:type(erlang, 'band', 2, Args); + unsafe_bxor -> + erl_bif_types:type(erlang, 'bxor', 2, Args); + unsafe_sub -> + erl_bif_types:type(erlang, '-', 2, Args); +%%% ----------------------------------------------------- +%%% Lists + cons -> + [HeadType, TailType] = Args, + erl_types:t_cons(HeadType, TailType); + unsafe_hd -> + [Type] = Args, + case erl_types:t_is_cons(Type) of + true -> erl_types:t_cons_hd(Type); + false -> erl_types:t_none() + end; + unsafe_tl -> + [Type] = Args, + case erl_types:t_is_cons(Type) of + true -> erl_types:t_cons_tl(Type); + false -> erl_types:t_none() + end; +%%% ----------------------------------------------------- +%%% Tuples + mktuple -> + erl_types:t_tuple(Args); + #element{} -> + erl_bif_types:type(erlang, element, 2, Args); + #unsafe_element{index = N} -> + [Type] = Args, + case erl_types:t_is_tuple(Type) of + false -> + erl_types:t_none(); + true -> + Index = erl_types:t_from_term(N), + erl_bif_types:type(erlang, element, 2, [Index|Args]) + end; + #unsafe_update_element{index = N} -> + %% Same, same + erl_bif_types:type(erlang, setelement, 3, [erl_types:t_integer(N)|Args]); +%%% ----------------------------------------------------- +%%% Floats + fclearerror -> + erl_types:t_any(); + fcheckerror -> + erl_types:t_any(); + unsafe_tag_float -> + erl_types:t_float(); + %% These might look surprising, but the return is an untagged + %% float and we have no type for untagged values. + conv_to_float -> + erl_types:t_any(); + unsafe_untag_float -> + erl_types:t_any(); + fp_add -> + erl_types:t_any(); + fp_sub -> + erl_types:t_any(); + fp_mul -> + erl_types:t_any(); + fp_div -> + erl_types:t_any(); + fnegate -> + erl_types:t_any(); +%%% ----------------------------------------------------- +%%% + {hipe_bs_primop, {bs_start_match, Max}} -> + [Type] = Args, + Init = + erl_types:t_sup( + erl_types:t_matchstate_present(Type), + erl_types:t_inf(erl_types:t_bitstr(1, 0), Type)), + case erl_types:t_is_none(Init) of + true -> + erl_types:t_none(); + false -> + erl_types:t_matchstate(Init, Max) + end; + {hipe_bs_primop, {{bs_start_match, _}, Max}} -> + [Type] = Args, + Init = + erl_types:t_sup( + erl_types:t_matchstate_present(Type), + erl_types:t_inf(erl_types:t_bitstr(1, 0), Type)), + case erl_types:t_is_none(Init) of + true -> + erl_types:t_none(); + false -> + erl_types:t_matchstate(Init, Max) + end; + {hipe_bs_primop, {bs_get_integer, Size, Flags}} -> + Signed = Flags band 4, + [MatchState|RestArgs] = Args, + BinType = erl_types:t_matchstate_present(MatchState), + case RestArgs of + [] -> + NewBinType = match_bin(erl_types:t_bitstr(0, Size), BinType), + NewMatchState = + erl_types:t_matchstate_update_present(NewBinType, MatchState), + if Signed =:= 0 -> + erl_types:t_product([erl_types:t_from_range(0, 1 bsl Size - 1), + NewMatchState]); + Signed =:= 4 -> + erl_types:t_product([erl_types:t_from_range(- (1 bsl (Size-1)), + (1 bsl (Size-1)) - 1), + NewMatchState]) + end; + [_Arg] -> + NewBinType = match_bin(erl_types:t_bitstr(Size, 0), BinType), + NewMatchState = + erl_types:t_matchstate_update_present(NewBinType, MatchState), + erl_types:t_product([erl_types:t_integer(), NewMatchState]) + end; + {hipe_bs_primop, {bs_get_float, Size, _Flags}} -> + [MatchState|RestArgs] = Args, + BinType = erl_types:t_matchstate_present(MatchState), + NewBinType = + case RestArgs of + [] -> + match_bin(erl_types:t_bitstr(0,Size),BinType); + [_Arg] -> + erl_types:t_sup(match_bin(erl_types:t_bitstr(0, 32), BinType), + match_bin(erl_types:t_bitstr(0, 64), BinType)) + end, + NewMatchState = erl_types:t_matchstate_update_present(NewBinType, MatchState), + erl_types:t_product([erl_types:t_float(), NewMatchState]); + {hipe_bs_primop, {bs_get_binary, Size, _Flags}} -> + [MatchState|RestArgs] = Args, + BinType = erl_types:t_matchstate_present(MatchState), + case RestArgs of + [] -> + NewBinType = match_bin(erl_types:t_bitstr(0, Size), BinType), + NewMatchState = erl_types:t_matchstate_update_present(NewBinType, MatchState), + erl_types:t_product([erl_types:t_bitstr(0,Size), NewMatchState]); + [ArgType] -> + Posint = erl_types:t_inf(erl_types:t_non_neg_integer(), ArgType), + case erl_types:t_is_none(Posint) of + true -> + erl_types:t_product([erl_types:t_none(), + erl_types:t_matchstate_update_present( + erl_types:t_none(), + MatchState)]); + false -> + OutBinType = + erl_types:t_bitstr(Size,erl_types:number_min(Posint)*Size), + NewBinType = match_bin(OutBinType,BinType), + NewMatchState = erl_types:t_matchstate_update_present(NewBinType, MatchState), + erl_types:t_product([OutBinType, NewMatchState]) + end + end; + {hipe_bs_primop, {bs_get_binary_all, Unit, _Flags}} -> + [MatchState] = Args, + BinType = erl_types:t_matchstate_present(MatchState), + erl_types:t_inf(BinType, erl_types:t_bitstr(Unit, 0)); + {hipe_bs_primop, {bs_get_binary_all_2, Unit, _Flags}} -> + [MatchState] = Args, + BinType = erl_types:t_matchstate_present(MatchState), + erl_types:t_product( + [erl_types:t_inf(BinType,erl_types:t_bitstr(Unit, 0)), + erl_types:t_matchstate_update_present( + erl_types:t_bitstr(0, 0), MatchState)]); + {hipe_bs_primop, {bs_skip_bits_all, _Unit, _Flags}} -> + [MatchState] = Args, + erl_types:t_matchstate_update_present(erl_types:t_bitstr(0,0),MatchState); + {hipe_bs_primop, {bs_skip_bits, Size}} -> + [MatchState|RestArgs] = Args, + BinType = erl_types:t_matchstate_present(MatchState), + NewBinType = + case RestArgs of + [] -> + match_bin(erl_types:t_bitstr(0, Size), BinType); + [_Arg] -> + match_bin(erl_types:t_bitstr(Size, 0), BinType) + end, + erl_types:t_matchstate_update_present(NewBinType, MatchState); + {hipe_bs_primop, {bs_save, Slot}} -> + [MatchState] = Args, + BinType = erl_types:t_matchstate_present(MatchState), + erl_types:t_matchstate_update_slot(BinType, MatchState, Slot); + {hipe_bs_primop, {bs_restore, Slot}} -> + [MatchState] = Args, + BinType = erl_types:t_matchstate_slot(MatchState, Slot), + erl_types:t_matchstate_update_present(BinType, MatchState); + {hipe_bs_primop, bs_context_to_binary} -> + [Type] = Args, + erl_types:t_sup( + erl_types:t_subtract(Type, erl_types:t_matchstate()), + erl_types:t_matchstate_slot( + erl_types:t_inf(Type, erl_types:t_matchstate()), 0)); + {hipe_bs_primop, {bs_match_string,_,Bytes}} -> + [MatchState] = Args, + BinType = erl_types:t_matchstate_present(MatchState), + NewBinType = match_bin(erl_types:t_bitstr(0, Bytes*8), BinType), + erl_types:t_matchstate_update_present(NewBinType, MatchState); + {hipe_bs_primop, {bs_test_unit,Unit}} -> + [MatchState] = Args, + BinType = erl_types:t_matchstate_present(MatchState), + NewBinType = erl_types:t_inf(erl_types:t_bitstr(Unit, 0), BinType), + erl_types:t_matchstate_update_present(NewBinType, MatchState); + {hipe_bs_primop, {bs_add, _, _}} -> + erl_types:t_integer(); + {hipe_bs_primop, {bs_add, _}} -> + erl_types:t_integer(); + {hipe_bs_primop, bs_bits_to_bytes} -> + erl_types:t_integer(); + {hipe_bs_primop, bs_bits_to_bytes2} -> + erl_types:t_integer(); + {hipe_bs_primop, {Name, Size, _Flags, _ConstInfo}} + when Name =:= bs_put_integer; + Name =:= bs_put_float -> + case Args of + [_SrcType, _Base, Type] -> + erl_types:t_bitstr_concat(Type, erl_types:t_bitstr(0, Size)); + [_SrcType,_BitsType, _Base, Type] -> + erl_types:t_bitstr_concat(Type, erl_types:t_bitstr(Size, 0)) + end; + {hipe_bs_primop, {bs_put_binary, Size, _Flags}} -> + case Args of + [_SrcType, _Base, Type] -> + erl_types:t_bitstr_concat(Type, erl_types:t_bitstr(0, Size)); + [_SrcType, _BitsType, _Base, Type] -> + erl_types:t_bitstr_concat(Type, erl_types:t_bitstr(Size, 0)) + end; + {hipe_bs_primop, {bs_put_binary_all, _Flags}} -> + [SrcType, _Base, Type] = Args, + erl_types:t_bitstr_concat(SrcType,Type); + {hipe_bs_primop, {bs_put_string, _, Size}} -> + [_Base, Type] = Args, + erl_types:t_bitstr_concat(Type, erl_types:t_bitstr(0, 8*Size)); + {hipe_bs_primop, bs_utf8_size} -> + [_Arg] = Args, + erl_types:t_from_range(1, 4); + {hipe_bs_primop, bs_utf16_size} -> + [_Arg] = Args, + erl_types:t_from_range(2, 4); % XXX: really 2 | 4 + {hipe_bs_primop, bs_final} -> + [_Base, Type] = Args, + Type; + {hipe_bs_primop, {bs_init, Size, _Flags}} -> + erl_types:t_product( + [erl_types:t_bitstr(0, Size*8), + erl_types:t_any(), + erl_types:t_bitstr(0, 0)]); + {hipe_bs_primop, {bs_init, _Flags}} -> + erl_types:t_product( + [erl_types:t_binary(), + erl_types:t_any(), + erl_types:t_bitstr(0, 0)]); + {hipe_bs_primop, {bs_init_bits, Size, _Flags}} -> + erl_types:t_product( + [erl_types:t_bitstr(0, Size), + erl_types:t_any(), + erl_types:t_bitstr(0, 0)]); + {hipe_bs_primop, {bs_init_bits, _Flags}} -> + erl_types:t_product( + [erl_types:t_bitstr(), + erl_types:t_any(), + erl_types:t_bitstr(0, 0)]); + {hipe_bs_primop, {bs_private_append, _U, _Flags}} -> + erl_types:t_product( + [erl_types:t_bitstr(), + erl_types:t_any(), + erl_types:t_bitstr()]); + {hipe_bs_primop, {bs_append, _W, _R, _U, _Flags}} -> + erl_types:t_product( + [erl_types:t_bitstr(), + erl_types:t_any(), + erl_types:t_bitstr()]); + {hipe_bs_primop, bs_init_writable} -> + erl_types:t_bitstr(0, 0); + {hipe_bs_primop, _BsOp} -> + erl_types:t_any(); +%%% ----------------------------------------------------- +%%% Funs + #mkfun{mfa = {_M, _F, A}} -> + %% Note that the arity includes the bound variables in args + erl_types:t_fun(A - length(Args), erl_types:t_any()); + #apply_N{} -> + erl_types:t_any(); + Op when Op =:= call_fun orelse Op =:= enter_fun -> + [Fun0|TailArgs0] = lists:reverse(Args), + TailArgs = lists:reverse(TailArgs0), + Fun = erl_types:t_inf(erl_types:t_fun(), Fun0), + case erl_types:t_is_fun(Fun) of + true -> + case erl_types:t_fun_args(Fun) of + unknown -> + erl_types:t_any(); + FunArgs -> + case check_fun_args(FunArgs, TailArgs) of + ok -> + erl_types:t_fun_range(Fun); + error -> + erl_types:t_none() + end + end; + false -> + erl_types:t_none() + end; +%%% ----------------------------------------------------- +%%% Communication + check_get_msg -> + erl_types:t_any(); + clear_timeout -> + erl_types:t_any(); + next_msg -> + erl_types:t_any(); + select_msg -> + erl_types:t_any(); + set_timeout -> + erl_types:t_any(); + suspend_msg -> + erl_types:t_any(); +%%% ----------------------------------------------------- +%%% Other + #closure_element{} -> + erl_types:t_any(); + redtest -> + erl_types:t_any(); + {M, F, A} -> + erl_bif_types:type(M, F, A, Args) + end. + + +-spec type(icode_funcall()) -> erl_types:erl_type(). + +type(Primop) -> + case Primop of +%%% ----------------------------------------------------- +%%% Arithops + 'bnot' -> + erl_bif_types:type(erlang, 'bnot', 1); + '+' -> + erl_bif_types:type(erlang, '+', 2); + '-' -> + erl_bif_types:type(erlang, '-', 2); + '*' -> + erl_bif_types:type(erlang, '*', 2); + '/' -> + erl_bif_types:type(erlang, '/', 2); + 'div' -> + erl_bif_types:type(erlang, 'div', 2); + 'rem' -> + erl_bif_types:type(erlang, 'rem', 2); + 'band' -> + erl_bif_types:type(erlang, 'band', 2); + 'bor' -> + erl_bif_types:type(erlang, 'bor', 2); + 'bxor' -> + erl_bif_types:type(erlang, 'bxor', 2); + 'bsr' -> + erl_bif_types:type(erlang, 'bsr', 2); + 'bsl' -> + erl_bif_types:type(erlang, 'bsl', 2); + unsafe_add -> + erl_bif_types:type(erlang, '+', 2); + extra_unsafe_add -> + erl_bif_types:type(erlang, '+', 2); + unsafe_sub -> + erl_bif_types:type(erlang, '-', 2); + unsafe_bor -> + erl_bif_types:type(erlang, 'bor', 2); + unsafe_band -> + erl_bif_types:type(erlang, 'band', 2); + unsafe_bxor -> + erl_bif_types:type(erlang, 'bxor', 2); +%%% ----------------------------------------------------- +%%% Lists + cons -> + erl_types:t_cons(); + unsafe_hd -> + erl_bif_types:type(erlang, hd, 1); + unsafe_tl -> + erl_bif_types:type(erlang, tl, 1); +%%% ----------------------------------------------------- +%%% Tuples + mktuple -> + erl_types:t_tuple(); + #element{} -> + erl_bif_types:type(erlang, element, 2); + #unsafe_element{} -> + erl_bif_types:type(erlang, element, 2); + #unsafe_update_element{} -> + erl_bif_types:type(erlang, setelement, 3); +%%% ----------------------------------------------------- +%%% Floats + fclearerror -> + erl_types:t_any(); + fcheckerror -> + erl_types:t_any(); + unsafe_tag_float -> + erl_types:t_float(); + %% These might look surprising, but the return is an untagged + %% float and we have no type for untagged values. + conv_to_float -> + erl_types:t_any(); + unsafe_untag_float -> + erl_types:t_any(); + fp_add -> + erl_types:t_any(); + fp_sub -> + erl_types:t_any(); + fp_mul -> + erl_types:t_any(); + fp_div -> + erl_types:t_any(); + fnegate -> + erl_types:t_any(); +%%% ----------------------------------------------------- +%%% Binaries + {hipe_bs_primop, bs_get_utf8} -> + erl_types:t_product([erl_types:t_integer(), erl_types:t_matchstate()]); + {hipe_bs_primop, {bs_get_utf16, _Flags}} -> + erl_types:t_product([erl_types:t_integer(), erl_types:t_matchstate()]); + {hipe_bs_primop, {bs_get_integer, _Size, _Flags}} -> + erl_types:t_product([erl_types:t_integer(), erl_types:t_matchstate()]); + {hipe_bs_primop, {bs_get_float, _, _}} -> + erl_types:t_product([erl_types:t_float(), erl_types:t_matchstate()]); + {hipe_bs_primop, {bs_get_binary, _, _}} -> + erl_types:t_product([erl_types:t_bitstr(), erl_types:t_matchstate()]); + {hipe_bs_primop, {bs_get_binary_all, _, _}} -> + erl_types:t_bitstr(); + {hipe_bs_primop, {bs_get_binary_all_2, _, _}} -> + erl_types:t_product([erl_types:t_bitstr(), erl_types:t_matchstate()]); + {hipe_bs_primop, bs_final} -> + erl_types:t_bitstr(); + {hipe_bs_primop, {bs_init, _, _}} -> + erl_types:t_product([erl_types:t_binary(), erl_types:t_bitstr(), + erl_types:t_bitstr()]); + {hipe_bs_primop, {bs_init, _}} -> + erl_types:t_product([erl_types:t_binary(), erl_types:t_bitstr(), + erl_types:t_bitstr()]); + {hipe_bs_primop, {bs_init_bits, Size, _}} -> + erl_types:t_product([erl_types:t_bitstr(0, Size), erl_types:t_bitstr(), + erl_types:t_bitstr()]); + {hipe_bs_primop, {bs_init_bits, _}} -> + erl_types:t_product([erl_types:t_bitstr(), erl_types:t_bitstr(), + erl_types:t_bitstr()]); + {hipe_bs_primop, {bs_add, _, _}} -> + erl_types:t_integer(); + {hipe_bs_primop, {bs_add, _}} -> + erl_types:t_integer(); + {hipe_bs_primop, bs_bits_to_bytes} -> + erl_types:t_integer(); + {hipe_bs_primop, bs_bits_to_bytes2} -> + erl_types:t_integer(); + {hipe_bs_primop, {bs_private_append, _U, _Flags}} -> + erl_types:t_product( + [erl_types:t_bitstr(), + erl_types:t_any(), + erl_types:t_bitstr()]); + {hipe_bs_primop, {bs_append, _W, _R, _U, _Flags}} -> + erl_types:t_product( + [erl_types:t_bitstr(), + erl_types:t_any(), + erl_types:t_bitstr()]); + {hipe_bs_primop, bs_init_writable} -> + erl_types:t_bitstr(); + {hipe_bs_primop, _BsOp} -> + erl_types:t_any(); +%%% ----------------------------------------------------- +%%% Funs + #mkfun{} -> + %% Note that the arity includes the bound variables in args + erl_types:t_fun(); + #apply_N{} -> + erl_types:t_any(); + call_fun -> + erl_types:t_any(); + enter_fun -> + erl_types:t_any(); +%%% ----------------------------------------------------- +%%% Communication + check_get_msg -> + erl_types:t_any(); + clear_timeout -> + erl_types:t_any(); + next_msg -> + erl_types:t_any(); + select_msg -> + erl_types:t_any(); + set_timeout -> + erl_types:t_any(); + suspend_msg -> + erl_types:t_any(); +%%% ----------------------------------------------------- +%%% Other + #closure_element{} -> + erl_types:t_any(); + redtest -> + erl_types:t_any(); + {M, F, A} -> + erl_bif_types:type(M, F, A) + end. + + +%% ===================================================================== +%% @doc +%% function arg_types returns a list of the demanded argument types for +%% a bif to succeed. + +-spec arg_types(icode_funcall()) -> [erl_types:erl_type()] | 'unknown'. + +arg_types(Primop) -> + case Primop of + {M, F, A} -> + erl_bif_types:arg_types(M, F, A); + #element{} -> + [erl_types:t_pos_fixnum(), erl_types:t_tuple()]; + '+' -> + erl_bif_types:arg_types(erlang, '+', 2); + '-' -> + erl_bif_types:arg_types(erlang, '-', 2); + '*' -> + erl_bif_types:arg_types(erlang, '*', 2); + '/' -> + erl_bif_types:arg_types(erlang, '/', 2); + 'band' -> + erl_bif_types:arg_types(erlang, 'band', 2); + 'bnot' -> + erl_bif_types:arg_types(erlang, 'bnot', 1); + 'bor' -> + erl_bif_types:arg_types(erlang, 'bor', 2); + 'bxor' -> + erl_bif_types:arg_types(erlang, 'bxor', 2); + 'bsl' -> + erl_bif_types:arg_types(erlang, 'bsl', 2); + 'bsr' -> + erl_bif_types:arg_types(erlang, 'bsr', 2); + 'div' -> + erl_bif_types:arg_types(erlang, 'div', 2); + 'rem' -> + erl_bif_types:arg_types(erlang, 'rem', 2); + _ -> + unknown % safe approximation for all primops. + end. + +%%===================================================================== +%% Auxiliary functions +%%===================================================================== + +check_fun_args([T1|Left1], [T2|Left2]) -> + Inf = erl_types:t_inf(T1, T2), + case erl_types:t_inf(Inf, T2) of + Inf -> + check_fun_args(Left1, Left2); + _ -> + error + end; +check_fun_args([], []) -> + ok; +check_fun_args(_, _) -> + error. + +match_bin(Pattern, Match) -> + erl_types:t_bitstr_match(Pattern, Match). diff --git a/lib/hipe/icode/hipe_icode_primops.hrl b/lib/hipe/icode/hipe_icode_primops.hrl new file mode 100644 index 0000000000..8a65c5ece0 --- /dev/null +++ b/lib/hipe/icode/hipe_icode_primops.hrl @@ -0,0 +1,40 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2007-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%%======================================================================= +%% File : hipe_icode_primops.hrl +%% Author : Kostis Sagonas +%% Description : Contains definitions for HiPE's primitive operations. +%%======================================================================= +%% $Id$ +%%======================================================================= + +-record(apply_N, {arity :: arity()}). + +-record(closure_element, {n :: arity()}). + +-record(element, {typeinfo :: list()}). %% XXX: refine? + +-record(gc_test, {need :: non_neg_integer()}). + +-record(mkfun, {mfa :: mfa(), magic_num :: integer(), index :: integer()}). + +-record(unsafe_element, {index :: non_neg_integer()}). + +-record(unsafe_update_element, {index :: non_neg_integer()}). diff --git a/lib/hipe/icode/hipe_icode_range.erl b/lib/hipe/icode/hipe_icode_range.erl new file mode 100644 index 0000000000..bcc857acf4 --- /dev/null +++ b/lib/hipe/icode/hipe_icode_range.erl @@ -0,0 +1,1966 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2007-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%%%------------------------------------------------------------------- +%%% File : hipe_icode_range.erl +%%% Author : Per Gustafsson +%%% Description : +%%% +%%% Created : 12 Mar 2007 by Per Gustafsson +%%%------------------------------------------------------------------- +-module(hipe_icode_range). + +-export([cfg/4]). + +%%===================================================================== +%% Icode Coordinator Behaviour Callbacks +%%===================================================================== + +-export([replace_nones/1, + update__info/2, new__info/1, return__info/1, + return_none/0, return_none_args/2, return_any_args/2]). + +%%===================================================================== + +-import(erl_types, [t_any/0, + t_from_range_unsafe/2, + t_inf/2, t_integer/0, + t_to_string/1, t_to_tlist/1, + t_limit/2, t_none/0, + number_min/1, number_max/1]). + +-include("hipe_icode.hrl"). +-include("hipe_icode_primops.hrl"). +-include("../main/hipe.hrl"). +-include("../flow/cfg.hrl"). +-include("../flow/hipe_bb.hrl"). +-include("hipe_icode_type.hrl"). + +-type range_tuple() :: {'neg_inf' | integer(), 'pos_inf' | integer()}. +-type range_rep() :: range_tuple() | 'empty'. +-type fun_name() :: atom() | tuple(). +-type inf_integer() :: 'neg_inf' | 'pos_inf' | integer(). + +-record(range, {range :: range_rep(), + other :: boolean()}). + +-record(ann, {range :: #range{}, + type :: erl_types:erl_type(), + count :: integer()}). + +-type range_anno() :: {range_anno, #ann{}, fun((#ann{}) -> string())}. +-type args_fun() :: fun((mfa(),cfg()) -> [#range{}]). +-type call_fun() :: fun((mfa(),[#range{}]) -> #range{}). +-type final_fun() :: fun((mfa(),[#range{}]) -> ok). +-type data() :: {mfa(), args_fun(), call_fun(), final_fun()}. +-type label() :: non_neg_integer(). +-type info() :: gb_tree(). +-type work_list() :: {[label()], [label()], set()}. +-type variable() :: #icode_variable{}. +-type annotated_variable() :: #icode_variable{}. +-type argument() :: #icode_const{} | variable(). +-type three_range_fun() :: fun((#range{},#range{},#range{}) -> #range{}). +-type instr_split_info() :: {icode_instr(), [{label(),info()}]}. +-type last_instr_return() :: {instr_split_info(), #range{}}. + +-record(state, {info_map = gb_trees:empty() :: info(), + counter = dict:new() :: dict(), + cfg :: cfg(), + liveness = gb_trees:empty() :: gb_tree(), + ret_type :: #range{}, + lookup_fun :: call_fun(), + result_action :: final_fun()}). + +-define(WIDEN, 1). + +-define(TAG_IMMED1_SIZE, 4). + +-define(BITS, 64). + +%%--------------------------------------------------------------------- + +-spec cfg(cfg(), mfa(), comp_options(), #comp_servers{}) -> cfg(). + +cfg(Cfg, MFA, Options, Servers) -> + case proplists:get_bool(concurrent_comp, Options) of + true -> + concurrent_cfg(Cfg, MFA, Servers#comp_servers.range); + false -> + ordinary_cfg(Cfg, MFA) + end. + +-spec concurrent_cfg(cfg(), mfa(), pid()) -> cfg(). + +concurrent_cfg(Cfg, MFA, CompServer) -> + CompServer ! {ready, {MFA,self()}}, + {ArgsFun,CallFun,FinalFun} = do_analysis(Cfg, MFA), + Ans = do_rewrite(Cfg, MFA, ArgsFun, CallFun, FinalFun), + CompServer ! {done_rewrite, MFA}, + Ans. + +-spec do_analysis(cfg(), mfa()) -> {args_fun(), call_fun(), final_fun()}. + +do_analysis(Cfg, MFA) -> + receive + {analyse, {ArgsFun, CallFun, FinalFun}} -> + analyse(Cfg, {MFA, ArgsFun, CallFun, FinalFun}), + do_analysis(Cfg, MFA); + {done, {_NewArgsFun, _NewCallFun, _NewFinalFun} = T} -> + T + end. + +-spec do_rewrite(cfg(), mfa(), args_fun(), call_fun(), final_fun()) -> cfg(). + +do_rewrite(Cfg, MFA, ArgsFun, CallFun, FinalFun) -> + common_rewrite(Cfg, {MFA, ArgsFun, CallFun, FinalFun}). + +-spec ordinary_cfg(cfg(), mfa()) -> cfg(). + +ordinary_cfg(Cfg, MFA) -> + Data = make_data(Cfg,MFA), + common_rewrite(Cfg, Data). + +-spec common_rewrite(cfg(), data()) -> cfg(). + +common_rewrite(Cfg, Data) -> + State = safe_analyse(Cfg, Data), + State2 = rewrite_blocks(State), + Cfg1 = state__cfg(State2), + Cfg2 = hipe_icode_cfg:remove_unreachable_code(Cfg1), + Cfg3 = convert_cfg_to_types(Cfg2), + hipe_icode_type:specialize(Cfg3). + +-spec make_data(cfg(), mfa()) -> data(). + +make_data(Cfg, {_M,_F,A}=MFA) -> + NoArgs = + case hipe_icode_cfg:is_closure(Cfg) of + true -> hipe_icode_cfg:closure_arity(Cfg)+1; + false -> A + end, + Args = lists:duplicate(NoArgs, any_type()), + ArgsFun = fun(_,_) -> Args end, + CallFun = fun(_,_) -> any_type() end, + FinalFun = fun(_,_) -> ok end, + {MFA, ArgsFun, CallFun, FinalFun}. + +-spec analyse(cfg(), data()) -> 'ok'. + +analyse(Cfg, Data) -> + try + #state{} = safe_analyse(Cfg, Data), + ok + catch throw:no_input -> ok + end. + +-spec safe_analyse(cfg(), data()) -> #state{}. + +safe_analyse(CFG, Data={MFA,_,_,_}) -> + State = state__init(CFG, Data), + Work = init_work(State), + NewState = analyse_blocks(State, Work), + (state__result_action(NewState))(MFA, [state__ret_type(NewState)]), + NewState. + +-spec rewrite_blocks(#state{}) -> #state{}. + +rewrite_blocks(State) -> + CFG = state__cfg(State), + Start = hipe_icode_cfg:start_label(CFG), + rewrite_blocks([Start], State, [Start]). + +-spec rewrite_blocks([label()], #state{}, [label()]) -> #state{}. + +rewrite_blocks([Next|Rest], State, Visited) -> + Info = state__info_in(State, Next), + {NewState, NewLabels} = analyse_block(Next, Info, State, true), + NewLabelsSet = ordsets:from_list(NewLabels), + RealNew = ordsets:subtract(NewLabelsSet, Visited), + NewVisited = ordsets:union([RealNew, Visited, [Next]]), + NewWork = ordsets:union([RealNew, Rest]), + rewrite_blocks(NewWork, NewState, NewVisited); +rewrite_blocks([], State, _) -> + State. + +-spec analyse_blocks(#state{}, work_list()) -> #state{}. + +analyse_blocks(State, Work) -> + case get_work(Work) of + fixpoint -> + State; + {Label, NewWork} -> + Info = state__info_in(State, Label), + {NewState, NewLabels} = + try analyse_block(Label, Info, State, false) + catch throw:none_range -> + {State, []} + end, + NewWork2 = add_work(NewWork, NewLabels), + analyse_blocks(NewState, NewWork2) + end. + +-spec analyse_block(label(), info(), #state{}, boolean()) -> {#state{}, [label()]}. + +analyse_block(Label, Info, State, Rewrite) -> + BB = state__bb(State, Label), + Code = hipe_bb:code(BB), + {NewCode, InfoList, RetType} = + analyse_BB(Code, Info, [], Rewrite, state__lookup_fun(State)), + State1 = state__bb_add(State, Label, hipe_bb:mk_bb(NewCode)), + State2 = state__ret_type_update(State1, RetType), + state__update_info(State2, InfoList, Rewrite). + +-spec analyse_BB([icode_instr()], info(), [icode_instr()], boolean(), call_fun()) -> + {[icode_instr()], [{label(),info()}], #range{}}. + +analyse_BB([Last], Info, Code, Rewrite, LookupFun) -> + {{NewI, LabelInfoList}, RetType} = + analyse_last_insn(Last, Info, Rewrite, LookupFun), + {lists:reverse([NewI|Code]), LabelInfoList, RetType}; +analyse_BB([Insn|InsnList], Info, Code, Rewrite, LookupFun) -> + {NewInfo, NewI} = analyse_insn(Insn, Info, LookupFun), + analyse_BB(InsnList, NewInfo, [NewI|Code], Rewrite, LookupFun). + +-spec analyse_insn(icode_instr(), info(), call_fun()) -> {info(), icode_instr()}. + +analyse_insn(I, Info, LookupFun) -> + %% io:format("~w Info: ~p~n", [I, Info]), + NewI = handle_args(I,Info), + FinalI = + case NewI of + #icode_call{} -> analyse_call(NewI, LookupFun); + #icode_move{} -> analyse_move(NewI); + #icode_phi{} -> analyse_phi(NewI); + #icode_begin_handler{} -> analyse_begin_handler(NewI); + #icode_comment{} -> NewI + end, + {enter_vals(FinalI, Info), FinalI}. + +-spec handle_args(icode_instr(), info()) -> icode_instr(). + +handle_args(I, Info) -> + WidenFun = fun update_three/3, + handle_args(I, Info, WidenFun). + +-spec handle_args(icode_instr(), info(), three_range_fun()) -> icode_instr(). + +handle_args(I, Info, WidenFun) -> + Uses = hipe_icode:uses(I), + PresentRanges = [lookup(V, Info) || V <- Uses], + %% io:format("Uses: ~p~nRanges: ~p~n", [Uses, PresentRanges]), + JoinFun = fun(Var, Range) -> update_info(Var, Range, WidenFun) end, + NewUses = lists:zipwith(JoinFun, Uses, PresentRanges), + hipe_icode:subst_uses(lists:zip(Uses, NewUses),I). + +-spec join_info(#ann{}, #range{}, three_range_fun()) -> #ann{}. + +join_info(Ann = #ann{range = R1, type = Type, count = ?WIDEN}, R2, Fun) -> + Ann#ann{range = Fun(R1, R2, range_from_simple_type(Type))}; +join_info(Ann = #ann{range = R1, type = Type, count = C}, R2, _Fun) when C < ?WIDEN -> + case join_three(R1, R2, range_from_simple_type(Type)) of + R1 -> Ann; + NewR -> Ann#ann{range = NewR, count = C+1} + end. + +-spec join_three(#range{}, #range{}, #range{}) -> #range{}. + +join_three(R1, R2, R3) -> + inf(sup(R1, R2), R3). + +-spec update_info(variable(), #range{}) -> annotated_variable(). + +update_info(Var, Range) -> + update_info(Var, Range, fun update_three/3). + +-spec update_info(variable(), #range{}, three_range_fun()) -> annotated_variable(). + +update_info(Arg, R, Fun) -> + case hipe_icode:is_annotated_variable(Arg) of + true -> + Ann = hipe_icode:variable_annotation(Arg), + hipe_icode:annotate_variable(Arg, update_info1(Ann, R, Fun)); + false -> + Arg + end. + +-spec update_info1(any(), #range{}, three_range_fun()) -> range_anno(). + +update_info1({range_anno, Ann, _}, R2, Fun) -> + make_range_anno(update_ann(Ann,R2,Fun)); +update_info1({type_anno, Type, _}, R2, Fun) -> + make_range_anno(update_ann(type_to_ann(Type), R2, Fun)). + +update_ann(Ann = #ann{range = R1, type = Type, count = ?WIDEN}, R2, Fun) -> + Ann#ann{range = Fun(R1,R2,range_from_simple_type(Type))}; +update_ann(Ann = #ann{range = R1, type = Type, count = C}, R2, _Fun) -> + case update_three(R1, R2, range_from_simple_type(Type)) of + R1 -> Ann; + NewR -> Ann#ann{range = NewR, count = C+1} + end. + +-spec type_to_ann(erl_types:erl_type()) -> #ann{}. + +type_to_ann(Type) -> + #ann{range = range_from_simple_type(Type), type = t_limit(Type,1), count=1}. + +-spec make_range_anno(#ann{}) -> range_anno(). + +make_range_anno(Ann) -> + {range_anno, Ann, fun pp_ann/1}. + +-spec update_three(#range{}, #range{}, #range{}) -> #range{}. + +update_three(_R1, R2, R3) -> + inf(R2, R3). + +-spec safe_widen(#range{}, #range{}, #range{}) -> #range{}. + +safe_widen(#range{range=Old}, #range{range=New}, T = #range{range=Wide}) -> + ResRange = + case {Old,New,Wide} of + {{Min,Max1},{Min,Max2},{_,Max}} -> + case inf_geq(OMax = next_up_limit(inf_max([Max1,Max2])),Max) of + true -> {Min,Max}; + false -> {Min,OMax} + end; + {{Min1,Max},{Min2,Max},{Min,_}} -> + case inf_geq(Min, OMin = next_down_limit(inf_min([Min1,Min2]))) of + true -> {Min,Max}; + false -> {OMin,Max} + end; + {{Min1,Max1},{Min2,Max2},{Min,Max}} -> + RealMax = + case inf_geq(OMax = next_up_limit(inf_max([Max1,Max2])),Max) of + true -> Max; + false -> OMax + end, + RealMin = + case inf_geq(Min, OMin = next_down_limit(inf_min([Min1,Min2]))) of + true -> Min; + false -> OMin + end, + {RealMin,RealMax}; + _ -> + Wide + end, + T#range{range=ResRange}. + +-spec widen(#range{}, #range{}, #range{}) -> #range{}. + +widen(#range{range=Old}, #range{range=New}, T = #range{range=Wide}) -> + ResRange = + case {Old,New,Wide} of + {{Min,_},{Min,Max2},{_,Max}} -> + case inf_geq(OMax = next_up_limit(Max2),Max) of + true -> {Min,Max}; + false -> {Min,OMax} + end; + {{_,Max},{Min2,Max},{Min,_}} -> + case inf_geq(Min, OMin = next_down_limit(Min2)) of + true -> {Min,Max}; + false -> {OMin,Max} + end; + {_,{Min2,Max2},{Min,Max}} -> + RealMax = + case inf_geq(OMax = next_up_limit(Max2),Max) of + true -> Max; + false -> OMax + end, + RealMin = + case inf_geq(Min, OMin = next_down_limit(Min2)) of + true -> Min; + false -> OMin + end, + {RealMin,RealMax}; + _ -> + Wide + end, + T#range{range=ResRange}. + +-spec analyse_call(#icode_call{}, call_fun()) -> #icode_call{}. + +analyse_call(Call, LookupFun) -> + case hipe_icode:call_dstlist(Call) of + [] -> + Call; + Dsts -> + Args = hipe_icode:args(Call), + Fun = hipe_icode:call_fun(Call), + Type = hipe_icode:call_type(Call), + DstRanges = analyse_call_or_enter_fun(Fun, Args, Type, LookupFun), + NewDefs = [update_info(Var, R) || {Var,R} <- lists:zip(Dsts, DstRanges)], + hipe_icode:subst_defines(lists:zip(Dsts, NewDefs), Call) + end. + +-spec analyse_move(#icode_move{}) -> #icode_move{}. + +analyse_move(Move) -> + Src = hipe_icode:move_src(Move), + Dst = hipe_icode:move_dst(Move), + Range = get_range_from_arg(Src), + NewDst = update_info(Dst, Range), + hipe_icode:subst_defines([{Dst,NewDst}], Move). + +-spec analyse_begin_handler(#icode_begin_handler{}) -> #icode_begin_handler{}. + +analyse_begin_handler(Handler) -> + SubstList = + [{Dst,update_info(Dst,any_type())} || + Dst <- hipe_icode:begin_handler_dstlist(Handler)], + hipe_icode:subst_defines(SubstList, Handler). + +-spec analyse_phi(#icode_phi{}) -> #icode_phi{}. + +analyse_phi(Phi) -> + {_, Args} = lists:unzip(hipe_icode:phi_arglist(Phi)), + Dst = hipe_icode:phi_dst(Phi), + ArgRanges = get_range_from_args(Args), + %% io:format("Phi-Arg_ranges: ~p ~n", [Arg_ranges]), + DstRange = sup(ArgRanges), + NewDst = update_info(Dst, DstRange, fun widen/3), + hipe_icode:subst_defines([{Dst, NewDst}], Phi). + +-spec analyse_last_insn(icode_instr(), info(), boolean(), call_fun()) -> + last_instr_return(). + +analyse_last_insn(I, Info, Rewrite, LookupFun) -> + %% io:format("~w Info: ~p~n",[I,Info]), + NewI = handle_args(I, Info), + %% io:format("~w -> ~w~n",[NewI,I]), + case NewI of + #icode_return{} -> analyse_return(NewI, Info); + #icode_enter{} -> analyse_enter(NewI, Info, LookupFun); + #icode_switch_val{} -> + {analyse_switch_val(NewI, Info, Rewrite), none_type()}; + #icode_if{} -> {analyse_if(NewI, Info, Rewrite), none_type()}; + #icode_goto{} -> {analyse_goto(NewI, Info), none_type()}; + #icode_type{} -> {analyse_type(NewI, Info, Rewrite), none_type()}; + #icode_fail{} -> {analyse_fail(NewI, Info), none_type()}; + #icode_call{} -> {analyse_last_call(NewI, Info, LookupFun), none_type()}; + #icode_switch_tuple_arity{} -> + {analyse_switch_tuple_arity(NewI, Info), none_type()}; + #icode_begin_try{} -> {analyse_begin_try(NewI, Info), none_type()} + end. + +-spec analyse_return(#icode_return{}, info()) -> last_instr_return(). + +analyse_return(Insn, _Info) -> + [RetRange] = get_range_from_args(hipe_icode:return_vars(Insn)), + {{Insn,[]}, RetRange}. + +-spec analyse_enter(#icode_enter{}, info(), call_fun()) -> last_instr_return(). + +analyse_enter(Insn, _Info, LookupFun) -> + Args = hipe_icode:args(Insn), + Fun = hipe_icode:enter_fun(Insn), + CallType = hipe_icode:enter_type(Insn), + [RetRange] = analyse_call_or_enter_fun(Fun, Args, CallType, LookupFun), + {{Insn,[]}, RetRange}. + +-spec analyse_switch_val(#icode_switch_val{}, info(), boolean()) -> instr_split_info(). + +analyse_switch_val(Switch, Info, Rewrite) -> + Var = hipe_icode:switch_val_term(Switch), + SwitchRange = get_range_from_arg(Var), + Cases = hipe_icode:switch_val_cases(Switch), + {FailRange, LabelRangeList} = get_range_label_list(Cases, SwitchRange, []), + case range__is_none(FailRange) of + true -> + InfoList = update_infos(Var, Info, LabelRangeList), + if Rewrite -> {update_switch(Switch, LabelRangeList, false), InfoList}; + true -> {Switch, InfoList} + end; + false -> + FailLabel = hipe_icode:switch_val_fail_label(Switch), + InfoList = update_infos(Var, Info, [{FailRange, FailLabel}|LabelRangeList]), + if Rewrite -> {update_switch(Switch, LabelRangeList, true), InfoList}; + true -> {Switch, InfoList} + end + end. + +-spec update_infos(argument(), info(), [{#range{},label()}]) -> [{label(),info()}]. + +update_infos(Arg, Info, [{Range, Label}|Rest]) -> + [{Label,enter_define({Arg,Range},Info)} | update_infos(Arg,Info,Rest)]; +update_infos(_, _, []) -> []. + +-spec get_range_label_list([{argument(),label()}], #range{}, [{#range{},label()}]) -> + {#range{},[{#range{},label()}]}. + +get_range_label_list([{Val,Label}|Cases], SRange, Acc) -> + VRange = get_range_from_arg(Val), + None = none_type(), + case inf(SRange, VRange) of + None -> + get_range_label_list(Cases, SRange, Acc); + ResRange -> + get_range_label_list(Cases, SRange, [{ResRange,Label}|Acc]) + end; +get_range_label_list([], SRange, Acc) -> + {PointTypes, _} = lists:unzip(Acc), + {remove_point_types(SRange, PointTypes), Acc}. + +-spec update_switch(#icode_switch_val{}, [{#range{},label()}], boolean()) -> + #icode_switch_val{}. + +update_switch(Switch, LabelRangeList, KeepFail) -> + S2 = + case label_range_list_to_cases(LabelRangeList, []) of + no_update -> + Switch; + Cases -> + hipe_icode:switch_val_cases_update(Switch, Cases) + end, + if KeepFail -> S2; + true -> S2 + end. + +-spec label_range_list_to_cases([{#range{},label()}], [{#icode_const{},label()}]) -> + 'no_update' | [{#icode_const{},label()}]. + +label_range_list_to_cases([{#range{range={C,C},other=false},Label}|Rest], + Acc) when is_integer(C) -> + label_range_list_to_cases(Rest, [{hipe_icode:mk_const(C),Label}|Acc]); +label_range_list_to_cases([{_NotAConstantRange,_Label}|_Rest], _Acc) -> + no_update; +label_range_list_to_cases([], Acc) -> + lists:reverse(Acc). + +-spec analyse_switch_tuple_arity(#icode_switch_tuple_arity{}, info()) -> + {#icode_switch_tuple_arity{}, [{label(),info()}]}. + +analyse_switch_tuple_arity(Switch, Info) -> + Var = hipe_icode:switch_tuple_arity_term(Switch), + NewInfo = enter_define({Var, get_range_from_arg(Var)}, Info), + Cases = hipe_icode:switch_tuple_arity_cases(Switch), + Fail = hipe_icode:switch_tuple_arity_fail_label(Switch), + {_, Case_labels} = lists:unzip(Cases), + Labels = [Fail|Case_labels], + {Switch, [{Label,NewInfo} || Label <- Labels]}. + +-spec analyse_goto(#icode_goto{}, info()) -> {#icode_goto{}, [{label(),info()},...]}. + +analyse_goto(Insn, Info) -> + GotoLabel = hipe_icode:goto_label(Insn), + {Insn, [{GotoLabel,Info}]}. + +-spec analyse_fail(#icode_fail{}, info()) -> {#icode_fail{}, [{label(),info()}]}. + +analyse_fail(Fail, Info) -> + case hipe_icode:fail_label(Fail) of + [] -> {Fail, []}; + Label -> {Fail, [{Label,Info}]} + end. + +-spec analyse_begin_try(#icode_begin_try{}, info()) -> + {#icode_begin_try{}, [{label(),info()},...]}. + +analyse_begin_try(Insn, Info) -> + Label = hipe_icode:begin_try_label(Insn), + Successor = hipe_icode:begin_try_successor(Insn), + {Insn, [{Label,Info},{Successor,Info}]}. + +-spec analyse_last_call(#icode_call{}, info(), call_fun()) -> + {#icode_call{}, [{label(),info()},...]}. + +analyse_last_call(Call, Info, LookupFun) -> + %% hipe_icode_pp:pp_block([Insn]), + NewI = analyse_call(Call, LookupFun), + Continuation = hipe_icode:call_continuation(Call), + NewInfo = enter_vals(NewI, Info), + case hipe_icode:call_fail_label(Call) of + [] -> + {NewI, [{Continuation,NewInfo}]}; + Fail -> + {NewI, [{Continuation,NewInfo}, {Fail,Info}]} + end. + +-spec analyse_if(#icode_if{}, info(), boolean()) -> + {#icode_goto{} | #icode_if{}, [{label(),info()}]}. + +analyse_if(If, Info, Rewrite) -> + case hipe_icode:if_args(If) of + Args = [_,_] -> + analyse_sane_if(If, Info, Args, get_range_from_args(Args), Rewrite); + _ -> + TrueLabel = hipe_icode:if_true_label(If), + FalseLabel = hipe_icode:if_false_label(If), + {If, [{TrueLabel,Info},{FalseLabel,Info}]} + end. + +-spec analyse_sane_if(#icode_if{}, info(), [argument(),...], + [#range{},...], boolean()) -> + {#icode_goto{} | #icode_if{}, [{label(), info()}]}. + +analyse_sane_if(If, Info, [Arg1, Arg2], [Range1, Range2], Rewrite) -> + case normalize_name(hipe_icode:if_op(If)) of + '>' -> + {TrueRange2, TrueRange1, FalseRange2, FalseRange1} = + range_inequality_propagation(Range2, Range1); + '==' -> + {TempTrueRange1, TempTrueRange2, FalseRange1, FalseRange2}= + range_equality_propagation(Range1, Range2), + TrueRange1 = set_other(TempTrueRange1,other(Range1)), + TrueRange2 = set_other(TempTrueRange2,other(Range2)); + '<' -> + {TrueRange1, TrueRange2, FalseRange1, FalseRange2} = + range_inequality_propagation(Range1, Range2); + '>=' -> + {FalseRange1, FalseRange2, TrueRange1, TrueRange2} = + range_inequality_propagation(Range1, Range2); + '=<' -> + {FalseRange2, FalseRange1, TrueRange2, TrueRange1} = + range_inequality_propagation(Range2, Range1); + '=:=' -> + {TrueRange1, TrueRange2, FalseRange1, FalseRange2}= + range_equality_propagation(Range1, Range2); + '=/=' -> + {FalseRange1, FalseRange2, TrueRange1, TrueRange2} = + range_equality_propagation(Range1, Range2); + '/=' -> + {TempFalseRange1, TempFalseRange2, TrueRange1, TrueRange2}= + range_equality_propagation(Range1, Range2), + FalseRange1 = set_other(TempFalseRange1,other(Range1)), + FalseRange2 = set_other(TempFalseRange2,other(Range2)) + end, + TrueLabel = hipe_icode:if_true_label(If), + FalseLabel = hipe_icode:if_false_label(If), + TrueInfo = + enter_defines([{Arg1,TrueRange1}, {Arg2,TrueRange2}],Info), + FalseInfo = + enter_defines([{Arg1,FalseRange1}, {Arg2,FalseRange2}],Info), + True = + case lists:any(fun range__is_none/1,[TrueRange1,TrueRange2]) of + true -> []; + false -> [{TrueLabel,TrueInfo}] + end, + False = + case lists:any(fun range__is_none/1, [FalseRange1,FalseRange2]) of + true -> []; + false -> [{FalseLabel,FalseInfo}] + end, + UpdateInfo = True++False, + NewIF = + if Rewrite -> + %%io:format("~w~n~w~n", [{Arg1,FalseRange1},{Arg2,FalseRange2}]), + %%io:format("Any none: ~w~n", [lists:any(fun range__is_none/1,[FalseRange1,FalseRange2])]), + case UpdateInfo of + [] -> %%This is weird + If; + [{Label,_Info}] -> + hipe_icode:mk_goto(Label); + [_,_] -> + If + end; + true -> + If + end, + {NewIF, UpdateInfo}. + +-spec normalize_name(atom()) -> atom(). + +normalize_name(Name) -> + case Name of + 'fixnum_eq' -> '=:='; + 'fixnum_neq' -> '=/='; + 'fixnum_gt' -> '>'; + 'fixnum_lt' -> '<'; + 'fixnum_ge' -> '>='; + 'fixnum_le' -> '=<'; + Name -> Name + end. + +-spec range_equality_propagation(#range{}, #range{}) -> + {#range{}, #range{}, #range{}, #range{}}. + +range_equality_propagation(Range_1, Range_2) -> + True_range = inf(Range_1, Range_2), + case {range(Range_1), range(Range_2)} of + {{N,N},{ N,N}} -> + False_range_1 = none_range(), + False_range_2 = none_range(); + {{N1,N1}, {N2,N2}} -> + False_range_1 = Range_1, + False_range_2 = Range_2; + {{N,N}, _} -> + False_range_1 = Range_1, + {_,False_range_2} = compare_with_integer(N, Range_2); + {_, {N,N}} -> + False_range_2 = Range_2, + {_,False_range_1} = compare_with_integer(N, Range_1); + {_, _} -> + False_range_1 = Range_1, + False_range_2 = Range_2 + end, + {True_range, True_range, False_range_1, False_range_2}. + +-spec range_inequality_propagation(#range{}, #range{}) -> + {#range{}, #range{}, #range{}, #range{}}. + +%% Range1 < Range2 +range_inequality_propagation(Range1, Range2) -> + R1_other = other(Range1), + R2_other = other(Range2), + {R1_true_range, R1_false_range, R2_true_range, R2_false_range} = + case {range(Range1), range(Range2)} of + {{N1,N1}, {N2,N2}} -> + case inf_geq(N2,inf_add(N1,1)) of + true -> + {{N1,N1},empty,{N2,N2},empty}; + false -> + {empty,{N1,N1},empty,{N2,N2}} + end; + {{N1,N1}, {Min2,Max2}} -> + case inf_geq(Min2,inf_add(N1,1)) of + true -> + {{N1,N1},empty,{inf_add(N1,1),Max2},empty}; + false -> + case inf_geq(N1,Max2) of + true -> + {empty,{N1,N1},empty,{Min2,N1}}; + false -> + {{N1,N1},{N1,N1},{inf_add(N1,1),Max2},{Min2,N1}} + end + end; + {{Min1,Max1}, {N2,N2}} -> + case inf_geq(N2,inf_add(Max1,1)) of + true -> + {{Min1,inf_add(N2,-1)},empty,{N2,N2},empty}; + false -> + case inf_geq(Min1,N2) of + true -> + {empty,{N2,Max1},empty,{N2,N2}}; + false -> + {{Min1,inf_add(N2,-1)},{N2,Max1},{N2,N2},{N2,N2}} + end + end; + {empty, {Min2,Max2}} -> + {empty,empty,{Min2,Max2},{Min2,Max2}}; + {{Min1,Max1}, empty} -> + {{Min1,Max1},{Min1,Max1},empty,empty}; + {empty, empty} -> + {empty,empty,empty,empty}; + {{Min1,Max1}, {Min2,Max2}} -> + {{Min1,inf_min([Max1,inf_add(Max2,-1)])}, + {inf_max([Min1,Min2]),Max1}, + {inf_max([inf_add(Min1,1),Min2]),Max2}, + {Min2,inf_min([Max1,Max2])}} + end, + {range_init(R1_true_range, R1_other), + range_init(R2_true_range, R2_other), + range_init(R1_false_range, R1_other), + range_init(R2_false_range, R2_other)}. + +-spec analyse_type(#icode_type{}, info(), boolean()) -> + {#icode_goto{} | #icode_type{}, [{label(),info()}]}. + +analyse_type(Type, Info, Rewrite) -> + TypeTest = hipe_icode:type_test(Type), + [Arg|_] = hipe_icode:type_args(Type), + OldVarRange = get_range_from_arg(Arg), + case TypeTest of + {integer, N} -> + {TrueRange,FalseRange} = compare_with_integer(N,OldVarRange); + integer -> + TrueRange = inf(any_range(), OldVarRange), + FalseRange = inf(none_range(), OldVarRange); + _ -> + TrueRange = inf(none_range(),OldVarRange), + FalseRange = OldVarRange + end, + TrueLabel = hipe_icode:type_true_label(Type), + FalseLabel = hipe_icode:type_false_label(Type), + TrueInfo = + enter_define({Arg,TrueRange},Info), + FalseInfo = + enter_define({Arg,FalseRange},Info), + True = + case range__is_none(TrueRange) of + true -> []; + false -> [{TrueLabel,TrueInfo}] + end, + False = + case range__is_none(FalseRange) of + true -> []; + false -> [{FalseLabel,FalseInfo}] + end, + UpdateInfo = True++False, + NewType = + if Rewrite -> + case UpdateInfo of + [] -> %% This is weird + Type; + [{Label,_Info}] -> + hipe_icode:mk_goto(Label); + [_,_] -> + Type + end; + true -> + Type + end, + {NewType,True ++ False}. + +-spec compare_with_integer(integer(), #range{}) -> {#range{}, #range{}}. + +compare_with_integer(N, OldVarRange) -> + TestRange = range_init({N, N}, false), + TrueRange = inf(TestRange, OldVarRange), + %% False range + TempFalseRange = range__remove_constant(OldVarRange, TestRange), + BetterRange = + case range(TempFalseRange) of + {Min, Max} = MM -> + New_small = inf_geq(Min, N), + New_large = inf_geq(N, Max), + if New_small and not New_large -> + {N + 1, Max}; + New_large and not New_small -> + {Min, N - 1}; + true -> + MM + end; + Not_tuple -> + Not_tuple + end, + FalseRange = range_init(BetterRange, other(TempFalseRange)), + {TrueRange, FalseRange}. + +%%== Ranges ================================================================== + +-spec pp_ann(#ann{} | erl_types:erl_type()) -> [string()]. + +pp_ann(#ann{range=#range{range=R, other=false}}) -> + pp_range(R); +pp_ann(#ann{range=#range{range=empty, other=true}, type=Type}) -> + t_to_string(Type); +pp_ann(#ann{range=#range{range=R, other=true}, type=Type}) -> + pp_range(R) ++ " | " ++ t_to_string(Type); +pp_ann(Type) -> + t_to_string(Type). + +-spec pp_range(range_rep()) -> nonempty_string(). + +pp_range(empty) -> + "none"; +pp_range({Min, Max}) -> + val_to_string(Min) ++ ".." ++ val_to_string(Max). + +-spec val_to_string('pos_inf' | 'neg_inf' | integer()) -> string(). + +val_to_string(pos_inf) -> "inf"; +val_to_string(neg_inf) -> "-inf"; +val_to_string(X) when is_integer(X) -> integer_to_list(X). + +-spec range_from_type(erl_types:erl_type()) -> [#range{}]. + +range_from_type(Type) -> + [range_from_simple_type(T) || T <- t_to_tlist(Type)]. + +-spec range_from_simple_type(erl_types:erl_type()) -> #range{}. + +range_from_simple_type(Type) -> + None = t_none(), + case t_inf(t_integer(), Type) of + None -> + #range{range = empty, other = true}; + Type -> + Range = {number_min(Type), number_max(Type)}, + #range{range = Range, other = false}; + NewType -> + Range = {number_min(NewType), number_max(NewType)}, + #range{range = Range, other = true} + end. + +-spec range_init(range_rep(), boolean()) -> #range{}. + +range_init({Min, Max} = Range, Other) -> + case inf_geq(Max, Min) of + true -> + #range{range = Range, other = Other}; + false -> + #range{range = empty, other = Other} + end; +range_init(empty, Other) -> + #range{range = empty, other = Other}. + +-spec range(#range{}) -> range_rep(). + +range(#range{range = R}) -> R. + +-spec other(#range{}) -> boolean(). + +other(#range{other = O}) -> O. + +-spec set_other(#range{}, boolean()) -> #range{}. + +set_other(R, O) -> R#range{other = O}. + +-spec range__min(#range{}) -> 'empty' | 'neg_inf' | integer(). + +range__min(#range{range=empty}) -> empty; +range__min(#range{range={Min,_}}) -> Min. + +-spec range__max(#range{}) -> 'empty' | 'pos_inf' | integer(). + +range__max(#range{range=empty}) -> empty; +range__max(#range{range={_,Max}}) -> Max. + +-spec range__is_none(#range{}) -> boolean(). + +range__is_none(#range{range=empty, other=false}) -> true; +range__is_none(#range{}) -> false. + +-spec range__is_empty(#range{}) -> boolean(). + +range__is_empty(#range{range=empty}) -> true; +range__is_empty(#range{range={_,_}}) -> false. + +-spec remove_point_types(#range{}, [#range{}]) -> #range{}. + +remove_point_types(Range, Ranges) -> + Sorted = lists:sort(Ranges), + FoldFun = fun (R, Acc) -> range__remove_constant(Acc,R) end, + Range1 = lists:foldl(FoldFun, Range, Sorted), + lists:foldl(FoldFun, Range1, lists:reverse(Sorted)). + +-spec range__remove_constant(#range{}, #range{}) -> #range{}. + +range__remove_constant(R = #range{range={C,C}}, #range{range={C,C}}) -> + R#range{range=empty}; +range__remove_constant(R = #range{range={C,H}}, #range{range={C,C}}) -> + R#range{range={C+1,H}}; +range__remove_constant(R = #range{range={L,C}}, #range{range={C,C}}) -> + R#range{range={L,C-1}}; +range__remove_constant(R = #range{}, #range{range={C,C}}) -> + R; +range__remove_constant(R = #range{}, _) -> + R. + +-spec any_type() -> #range{}. + +any_type() -> + #range{range=any_r(), other=true}. + +-spec any_range() -> #range{}. + +any_range() -> + #range{range=any_r(), other=false}. + +-spec none_range() -> #range{}. + +none_range() -> + #range{range=empty, other=true}. + +-spec none_type() -> #range{}. + +none_type() -> + #range{range = empty, other = false}. + +-spec any_r() -> {'neg_inf','pos_inf'}. + +any_r() -> {neg_inf, pos_inf}. + +-spec get_range_from_args([argument()]) -> [#range{}]. + +get_range_from_args(Args) -> + [get_range_from_arg(Arg) || Arg <- Args]. + +-spec get_range_from_arg(argument()) -> #range{}. + +get_range_from_arg(Arg) -> + case hipe_icode:is_const(Arg) of + true -> + Value = hipe_icode:const_value(Arg), + case is_integer(Value) of + true -> + #range{range={Value,Value}, other=false}; + false -> + #range{range=empty, other=true} + end; + false -> + case hipe_icode:is_annotated_variable(Arg) of + true -> + case hipe_icode:variable_annotation(Arg) of + {range_anno, #ann{range=Range}, _} -> + Range; + {type_anno, Type, _} -> + range_from_simple_type(Type) + end; + false -> + any_type() + end + end. + +%% inf([R]) -> +%% R; +%% inf([R1,R2|Rest]) -> +%% inf([inf(R1,R2)|Rest]). + +-spec inf(#range{}, #range{}) -> #range{}. + +inf(#range{range=R1, other=O1}, #range{range=R2, other=O2}) -> + #range{range=range_inf(R1,R2), other=other_inf(O1,O2)}. + +-spec range_inf(range_rep(), range_rep()) -> range_rep(). + +range_inf(empty, _) -> empty; +range_inf(_, empty) -> empty; +range_inf({Min1,Max1}, {Min2,Max2}) -> + NewMin = inf_max([Min1,Min2]), + NewMax = inf_min([Max1,Max2]), + case inf_geq(NewMax, NewMin) of + true -> + {NewMin, NewMax}; + false -> + empty + end. + +-spec other_inf(boolean(), boolean()) -> boolean(). + +other_inf(O1, O2) -> O1 and O2. + +-spec sup([#range{},...]) -> #range{}. + +sup([R]) -> + R; +sup([R1,R2|Rest]) -> + sup([sup(R1, R2)|Rest]). + +-spec sup(#range{}, #range{}) -> #range{}. + +sup(#range{range=R1,other=O1}, #range{range=R2,other=O2}) -> + #range{range=range_sup(R1,R2), other=other_sup(O1,O2)}. + +-spec range_sup(range_rep(), range_rep()) -> range_rep(). + +range_sup(empty, R) -> R; +range_sup(R, empty) -> R; +range_sup({Min1,Max1}, {Min2,Max2}) -> + NewMin = inf_min([Min1,Min2]), + NewMax = inf_max([Max1,Max2]), + {NewMin,NewMax}. + +-spec other_sup(boolean(), boolean()) -> boolean(). + +other_sup(O1, O2) -> O1 or O2. + +%%== Call Support ============================================================= + +-spec analyse_call_or_enter_fun(fun_name(), [argument()], + icode_call_type(), call_fun()) -> [#range{}]. + +analyse_call_or_enter_fun(Fun, Args, CallType, LookupFun) -> + %%io:format("Fun: ~p~n Args: ~p~n CT: ~p~n LF: ~p~n", [Fun, Args, CallType, LookupFun]), + case basic_type(Fun) of + {bin, Operation} -> + [Arg_range1,Arg_range2] = get_range_from_args(Args), + A1_is_empty = range__is_empty(Arg_range1), + A2_is_empty = range__is_empty(Arg_range2), + case A1_is_empty orelse A2_is_empty of + true -> + [none_type()]; + false -> + [Operation(Arg_range1, Arg_range2)] + end; + {unary, Operation} -> + [Arg_range] = get_range_from_args(Args), + case range__is_empty(Arg_range) of + true -> + [none_type()]; + false -> + [Operation(Arg_range)] + end; + {fcall, MFA} -> + case CallType of + local -> + Range = LookupFun(MFA, get_range_from_args(Args)), + case range__is_none(Range) of + true -> + throw(none_range); + false -> + [Range] + end; + remote -> + [any_type()] + end; + not_int -> + [any_type()]; + not_analysed -> + [any_type()]; + {hipe_bs_primop, {bs_get_integer, Size, Flags}} -> + {Min, Max} = analyse_bs_get_integer(Size, Flags, length(Args) =:= 1), + [#range{range={Min, Max}, other=false}, any_type()]; + {hipe_bs_primop, _} = Primop -> + Type = hipe_icode_primops:type(Primop), + range_from_type(Type) + end. + +-type bin_operation() :: fun((#range{},#range{}) -> #range{}). +-type unary_operation() :: fun((#range{}) -> #range{}). + +-spec basic_type(fun_name()) -> 'not_int' | 'not_analysed' + | {bin, bin_operation()} + | {unary, unary_operation()} + | {fcall, mfa()} | {hipe_bs_primop, _}. + +%% Arithmetic operations +basic_type('+') -> {bin, fun(R1, R2) -> range_add(R1, R2) end}; +basic_type('-') -> {bin, fun(R1, R2) -> range_sub(R1, R2) end}; +basic_type('*') -> {bin, fun(R1, R2) -> range_mult(R1, R2) end}; +basic_type('/') -> not_int; +basic_type('div') -> {bin, fun(R1, R2) -> range_div(R1, R2) end}; +basic_type('rem') -> {bin, fun(R1, R2) -> range_rem(R1, R2) end}; +basic_type('bor') -> {bin, fun(R1, R2) -> range_bor(R1, R2) end}; +basic_type('band') -> {bin, fun(R1, R2) -> range_band(R1, R2) end}; +basic_type('bxor') -> {bin, fun(R1, R2) -> range_bxor(R1, R2) end}; +basic_type('bnot') -> {unary, fun(R1) -> range_bnot(R1) end}; +basic_type('bsl') -> {bin, fun(R1, R2) -> range_bsl(R1, R2) end}; +basic_type('bsr') -> {bin, fun(R1, R2) -> range_bsr(R1, R2) end}; +%% unsafe_* +basic_type('unsafe_bor') -> + {bin, fun(R1, R2) -> range_bor(R1, R2) end}; +basic_type('unsafe_band') -> + {bin, fun(R1, R2) -> range_band(R1, R2) end}; +basic_type('unsafe_bxor') -> + {bin, fun(R1, R2) -> range_bxor(R1, R2) end}; +basic_type('unsafe_bnot') -> + {unary, fun(R1) -> range_bnot(R1) end}; +basic_type('unsafe_bsl') -> + {bin, fun(R1, R2) -> range_bsl(R1, R2) end}; +basic_type('unsafe_bsr') -> + {bin, fun(R1, R2) -> range_bsr(R1, R2) end}; +basic_type('unsafe_add') -> + {bin, fun(R1, R2) -> range_add(R1, R2) end}; +basic_type('unsafe_sub') -> + {bin, fun(R1, R2) -> range_sub(R1, R2) end}; +basic_type('extra_unsafe_add') -> + {bin, fun(R1, R2) -> range_add(R1, R2) end}; +basic_type('extra_unsafe_sub') -> + {bin, fun(R1, R2) -> range_sub(R1, R2) end}; +%% Binaries +basic_type({hipe_bs_primop, _} = Primop) -> Primop; +%% Unknown, other +basic_type(call_fun) -> not_analysed; +basic_type(clear_timeout) -> not_analysed; +basic_type(redtest) -> not_analysed; +basic_type(set_timeout) -> not_analysed; +basic_type(#apply_N{}) -> not_analysed; +basic_type(#closure_element{}) -> not_analysed; +basic_type(#gc_test{}) -> not_analysed; +%% Message handling +basic_type(check_get_msg) -> not_analysed; +basic_type(next_msg) -> not_analysed; +basic_type(select_msg) -> not_analysed; +basic_type(suspend_msg) -> not_analysed; +%% Functions +basic_type(enter_fun) -> not_analysed; +basic_type(#mkfun{}) -> not_int; +basic_type({_M,_F,_A} = MFA) -> {fcall, MFA}; +%% Floats +basic_type(conv_to_float) -> not_int; +basic_type(fclearerror) -> not_analysed; +basic_type(fcheckerror) -> not_analysed; +basic_type(fnegate) -> not_int; +basic_type(fp_add) -> not_int; +basic_type(fp_div) -> not_int; +basic_type(fp_mul) -> not_int; +basic_type(fp_sub) -> not_int; +basic_type(unsafe_tag_float) -> not_int; +basic_type(unsafe_untag_float) -> not_int; +%% Lists, tuples, records +basic_type(cons) -> not_int; +basic_type(mktuple) -> not_int; +basic_type(unsafe_hd) -> not_analysed; +basic_type(unsafe_tl) -> not_int; +basic_type(#element{}) -> not_analysed; +basic_type(#unsafe_element{}) -> not_analysed; +basic_type(#unsafe_update_element{}) -> not_analysed. + +-spec analyse_bs_get_integer(integer(), integer(), boolean()) -> range_tuple(). + +analyse_bs_get_integer(Size, Flags, true) -> + Signed = Flags band 4, + if Signed =:= 0 -> + Max = 1 bsl Size - 1, + Min = 0; + true -> + Max = 1 bsl (Size-1) - 1, + Min = -(1 bsl (Size-1)) + end, + {Min, Max}; +analyse_bs_get_integer(Size, Flags, false) when is_integer(Size), + is_integer(Flags) -> + any_r(). + +%%--------------------------------------------------------------------------- +%% Range operations +%%--------------------------------------------------------------------------- + +%% Arithmetic + +-spec range_add(#range{}, #range{}) -> #range{}. + +range_add(Range1, Range2) -> + NewMin = inf_add(range__min(Range1), range__min(Range2)), + NewMax = inf_add(range__max(Range1), range__max(Range2)), + Other = other(Range1) orelse other(Range2), + range_init({NewMin, NewMax}, Other). + +-spec range_sub(#range{}, #range{}) -> #range{}. + +range_sub(Range1, Range2) -> + Min_sub = inf_min([inf_inv(range__max(Range2)), + inf_inv(range__min(Range2))]), + Max_sub = inf_max([inf_inv(range__max(Range2)), + inf_inv(range__min(Range2))]), + NewMin = inf_add(range__min(Range1), Min_sub), + NewMax = inf_add(range__max(Range1), Max_sub), + Other = other(Range1) orelse other(Range2), + range_init({NewMin, NewMax}, Other). + +-spec range_mult(#range{}, #range{}) -> #range{}. + +range_mult(#range{range=empty, other=true}, _Range2) -> + range_init(empty, true); +range_mult(_Range1, #range{range=empty, other=true}) -> + range_init(empty, true); +range_mult(Range1, Range2) -> + Min1 = range__min(Range1), + Min2 = range__min(Range2), + Max1 = range__max(Range1), + Max2 = range__max(Range2), + GreaterMin1 = inf_greater_zero(Min1), + GreaterMin2 = inf_greater_zero(Min2), + GreaterMax1 = inf_greater_zero(Max1), + GreaterMax2 = inf_greater_zero(Max2), + Range = + if GreaterMin1 -> + if GreaterMin2 -> {inf_mult(Min1, Min2), inf_mult(Max1, Max2)}; + GreaterMax2 -> {inf_mult(Min2, Max1), inf_mult(Max2, Max1)}; + true -> {inf_mult(Min2, Max1), inf_mult(Max2, Min1)} + end; + %% Column 1 or 2 + GreaterMin2 -> % Column 1 or 2 row 3 + range(range_mult(Range2, Range1)); + GreaterMax1 -> % Column 2 Row 1 or 2 + if GreaterMax2 -> % Column 2 Row 2 + NewMin = inf_min([inf_mult(Min2, Max1), inf_mult(Max2, Min1)]), + NewMax = inf_max([inf_mult(Min2, Min1), inf_mult(Max2, Max1)]), + {NewMin, NewMax}; + true -> % Column 2 Row 1 + {inf_mult(Min2, Max1), inf_mult(Min2, Min1)} + end; + GreaterMax2 -> % Column 1 Row 2 + range(range_mult(Range2, Range1)); + true -> % Column 1 Row 1 + {inf_mult(Max1, Max2), inf_mult(Min2, Min1)} + end, + Other = other(Range1) orelse other(Range2), + range_init(Range, Other). + +-spec extreme_divisors(#range{}) -> range_tuple(). + +extreme_divisors(#range{range={0,0}}) -> {0,0}; +extreme_divisors(#range{range={0,Max}}) -> {1,Max}; +extreme_divisors(#range{range={Min,0}}) -> {Min,-1}; +extreme_divisors(#range{range={Min,Max}}) -> + case inf_geq(Min, 0) of + true -> {Min, Max}; + false -> % Min < 0 + case inf_geq(0, Max) of + true -> {Min,Max}; % Max < 0 + false -> {-1,1} % Max > 0 + end + end. + +-spec range_div(#range{}, #range{}) -> #range{}. + +%% this is div, not /. +range_div(_, #range{range={0,0}}) -> + range_init(empty, false); +range_div(#range{range=empty}, _) -> + range_init(empty, false); +range_div(_, #range{range=empty}) -> + range_init(empty, false); +range_div(Range1, Den) -> + Min1 = range__min(Range1), + Max1 = range__max(Range1), + {Min2, Max2} = extreme_divisors(Den), + Min_max_list = [inf_div(Min1, Min2), inf_div(Min1, Max2), + inf_div(Max1, Min2), inf_div(Max1, Max2)], + range_init({inf_min(Min_max_list), inf_max(Min_max_list)}, false). + +-spec range_rem(#range{}, #range{}) -> #range{}. + +range_rem(Range1, Range2) -> + %% Range1 desides the sign of the answer. + Min1 = range__min(Range1), + Max1 = range__max(Range1), + Min2 = range__min(Range2), + Max2 = range__max(Range2), + Min1_geq_zero = inf_geq(Min1, 0), + Max1_leq_zero = inf_geq(0, Max1), + Max_range2 = inf_max([inf_abs(Min2), inf_abs(Max2)]), + Max_range2_leq_zero = inf_geq(0, Max_range2), + New_min = + if Min1_geq_zero -> 0; + Max_range2_leq_zero -> Max_range2; + true -> inf_inv(Max_range2) + end, + New_max = + if Max1_leq_zero -> 0; + Max_range2_leq_zero -> inf_inv(Max_range2); + true -> Max_range2 + end, + range_init({New_min, New_max}, false). + +%%--- Bit operations ---------------------------- + +-spec range_bsr(#range{}, #range{}) -> #range{}. + +range_bsr(Range1, Range2=#range{range={Min, Max}}) -> + New_Range2 = range_init({inf_inv(Max), inf_inv(Min)}, other(Range2)), + Ans = range_bsl(Range1, New_Range2), + %% io:format("bsr res:~w~nInput:= ~w~n", [Ans, {Range1,Range2}]), + Ans. + +-spec range_bsl(#range{}, #range{}) -> #range{}. + +range_bsl(Range1, Range2) -> + Min1 = range__min(Range1), + Min2 = range__min(Range2), + Max1 = range__max(Range1), + Max2 = range__max(Range2), + Min1Geq0 = inf_geq(Min1, 0), + Max1Less0 = not inf_geq(Max1, 0), + MinMax = + if Min1Geq0 -> + {inf_bsl(Min1, Min2), inf_bsl(Max1, Max2)}; + true -> + if Max1Less0 -> {inf_bsl(Min1, Max2), inf_bsl(Max1, Min2)}; + true -> {inf_bsl(Min1, Max2), inf_bsl(Max1, Max2)} + end + end, + range_init(MinMax, false). + +-spec range_bnot(#range{}) -> #range{}. + +range_bnot(Range) -> + Minus_one = range_init({-1,-1}, false), + range_add(range_mult(Range, Minus_one), Minus_one). + +-spec width(range_rep() | integer()) -> 'pos_inf' | non_neg_integer(). + +width({Min, Max}) -> inf_max([width(Min), width(Max)]); +width(pos_inf) -> pos_inf; +width(neg_inf) -> pos_inf; +width(X) when is_integer(X), X >= 0 -> poswidth(X, 0); +width(X) when is_integer(X), X < 0 -> negwidth(X, 0). + +-spec poswidth(non_neg_integer(), non_neg_integer()) -> non_neg_integer(). + +poswidth(X, N) -> + case X < (1 bsl N) of + true -> N; + false -> poswidth(X, N+1) + end. + +-spec negwidth(neg_integer(), non_neg_integer()) -> non_neg_integer(). + +negwidth(X, N) -> + case X > (-1 bsl N) of + true -> N; + false -> negwidth(X, N+1) + end. + +-spec range_band(#range{}, #range{}) -> #range{}. + +range_band(R1, R2) -> + {_Min1, Max1} = MM1 = range(R1), + {_Min2, Max2} = MM2 = range(R2), + Width1 = width(MM1), + Width2 = width(MM2), + Range = + case {classify_range(R1), classify_range(R2)} of + {minus_minus, minus_minus} -> + Width = inf_max([Width1, Width2]), + {inf_bsl(-1, Width), -1}; + {minus_minus, minus_plus} -> + Width = inf_max([Width1, Width2]), + {inf_bsl(-1, Width), Max2}; + {minus_minus, plus_plus} -> + {0, Max2}; + {minus_plus, minus_minus} -> + Width = inf_max([Width1, Width2]), + {inf_bsl(-1, Width), Max1}; + {minus_plus, minus_plus} -> + Width = inf_max([Width1, Width2]), + {inf_bsl(-1, Width), inf_max([Max1, Max2])}; + {minus_plus, plus_plus} -> + {0, Max2}; + {plus_plus, minus_minus} -> + {0, Max1}; + {plus_plus, minus_plus} -> + {0, Max1}; + {plus_plus, plus_plus} -> + {0, inf_min([Max1, Max2])} + end, + range_init(Range, false). + +-spec range_bor(#range{}, #range{}) -> #range{}. + +range_bor(R1, R2) -> + {Min1, _Max1} = MM1 = range(R1), + {Min2, _Max2} = MM2 = range(R2), + Width1 = width(MM1), + Width2 = width(MM2), + Range = + case {classify_range(R1), classify_range(R2)} of + {minus_minus, minus_minus} -> + {inf_max([Min1, Min2]), -1}; + {minus_minus, minus_plus} -> + {Min1, -1}; + {minus_minus, plus_plus} -> + {Min1, -1}; + {minus_plus, minus_minus} -> + {Min2, -1}; + {minus_plus, minus_plus} -> + Width = inf_max([Width1, Width2]), + {inf_min([Min1, Min2]), inf_add(-1, inf_bsl(1, Width))}; + {minus_plus, plus_plus} -> + Width = inf_max([Width1, Width2]), + {Min1, inf_add(-1, inf_bsl(1, Width))}; + {plus_plus, minus_minus} -> + {Min2, -1}; + {plus_plus, minus_plus} -> + Width = inf_max([Width1, Width2]), + {Min2, inf_add(-1, inf_bsl(1, Width))}; + {plus_plus, plus_plus} -> + Width = inf_max([Width1, Width2]), + {0, inf_add(-1, inf_bsl(1, Width))} + end, + range_init(Range, false). + +-spec classify_range(#range{}) -> 'minus_minus' | 'minus_plus' | 'plus_plus'. + +classify_range(Range) -> + case range(Range) of + {neg_inf, Number} when is_integer(Number), Number < 0 -> minus_minus; + {neg_inf, Number} when is_integer(Number), Number >= 0 -> minus_plus; + {Number, pos_inf} when is_integer(Number), Number < 0 -> minus_plus; + {Number, pos_inf} when is_integer(Number), Number >= 0 -> plus_plus; + {neg_inf, pos_inf} -> minus_plus; + {Number1,Number2} when is_integer(Number1), is_integer(Number2) -> + classify_int_range(Number1, Number2) + end. + +-spec classify_int_range(integer(), integer()) -> + 'minus_minus' | 'minus_plus' | 'plus_plus'. + +classify_int_range(Number1, _Number2) when Number1 >= 0 -> + plus_plus; +classify_int_range(_Number1, Number2) when Number2 < 0 -> + minus_minus; +classify_int_range(_Number1, _Number2) -> + minus_plus. + +-spec range_bxor(#range{}, #range{}) -> #range{}. + +range_bxor(R1, R2) -> + {Min1, Max1} = MM1 = range(R1), + {Min2, Max2} = MM2 = range(R2), + Width1 = width(MM1), + Width2 = width(MM2), + Range = + case {classify_range(R1), classify_range(R2)} of + {minus_minus, minus_minus} -> + Width = inf_max([Width1, Width2]), + {0, inf_add(-1, inf_bsl(1, Width))}; + {minus_minus, minus_plus} -> + MinWidth = inf_max([Width1, width({0,Max2})]), + MaxWidth = inf_max([Width1, width({Min2,-1})]), + {inf_bsl(-1, MinWidth), inf_add(-1, inf_bsl(1, MaxWidth))}; + {minus_minus, plus_plus} -> + Width = inf_max([Width1, Width2]), + {inf_bsl(-1, Width), -1}; + {minus_plus, minus_minus} -> + MinWidth = inf_max([Width2,width({0,Max1})]), + MaxWidth = inf_max([Width2,width({Min1,-1})]), + {inf_bsl(-1, MinWidth), inf_add(-1, inf_bsl(1, MaxWidth))}; + {minus_plus, minus_plus} -> + Width = inf_max([Width1, Width2]), + {inf_bsl(-1, Width), inf_add(-1, inf_bsl(1, Width))}; + {minus_plus, plus_plus} -> + MinWidth = inf_max([Width2,width({Min1,-1})]), + MaxWidth = inf_max([Width2,width({0,Max1})]), + {inf_bsl(-1, MinWidth), inf_add(-1, inf_bsl(1, MaxWidth))}; + {plus_plus, minus_minus} -> + Width = inf_max([Width1, Width2]), + {inf_bsl(-1, Width), -1}; + {plus_plus, minus_plus} -> + MinWidth = inf_max([Width1,width({Min2,-1})]), + MaxWidth = inf_max([Width1,width({0,Max2})]), + {inf_bsl(-1, MinWidth), inf_add(-1, inf_bsl(1, MaxWidth))}; + {plus_plus, plus_plus} -> + Width = inf_max([Width1, Width2]), + {0, inf_add(-1, inf_bsl(1, Width))} + end, + range_init(Range, false). + +%%--------------------------------------------------------------------------- +%% Inf operations +%%--------------------------------------------------------------------------- + +-spec inf_max([inf_integer(),...]) -> inf_integer(). + +inf_max([H|T]) -> + lists:foldl(fun (Elem, Max) -> + case inf_geq(Elem, Max) of + false -> Max; + true -> Elem + end + end, H, T). + +-spec inf_min([inf_integer(),...]) -> inf_integer(). + +inf_min([H|T]) -> + lists:foldl(fun (Elem, Min) -> + case inf_geq(Elem, Min) of + true -> Min; + false -> Elem + end + end, H, T). + +-spec inf_abs(inf_integer()) -> 'pos_inf' | integer(). + +inf_abs(pos_inf) -> pos_inf; +inf_abs(neg_inf) -> pos_inf; +inf_abs(Number) when is_integer(Number), (Number < 0) -> - Number; +inf_abs(Number) when is_integer(Number) -> Number. + +-spec inf_add(inf_integer(), inf_integer()) -> inf_integer(). + +inf_add(pos_inf, _Number) -> pos_inf; +inf_add(neg_inf, _Number) -> neg_inf; +inf_add(_Number, pos_inf) -> pos_inf; +inf_add(_Number, neg_inf) -> neg_inf; +inf_add(Number1, Number2) when is_integer(Number1), is_integer(Number2) -> + Number1 + Number2. + +-spec inf_inv(inf_integer()) -> inf_integer(). + +inf_inv(pos_inf) -> neg_inf; +inf_inv(neg_inf) -> pos_inf; +inf_inv(Number) -> -Number. + +-spec inf_geq(inf_integer(), inf_integer()) -> boolean(). + +inf_geq(pos_inf, _) -> true; +inf_geq(_, pos_inf) -> false; +inf_geq(_, neg_inf) -> true; +inf_geq(neg_inf, _) -> false; +inf_geq(A, B) -> A >= B. + +-spec inf_greater_zero(inf_integer()) -> boolean(). + +inf_greater_zero(pos_inf) -> true; +inf_greater_zero(neg_inf) -> false; +inf_greater_zero(Number) when is_integer(Number), Number >= 0 -> true; +inf_greater_zero(Number) when is_integer(Number), Number < 0 -> false. + +-spec inf_div(inf_integer(), inf_integer()) -> inf_integer(). + +inf_div(Number, 0) -> + Greater = inf_greater_zero(Number), + if Greater -> pos_inf; + true -> neg_inf + end; +inf_div(pos_inf, Number) -> + Greater = inf_greater_zero(Number), + if Greater -> pos_inf; + true -> neg_inf + end; +inf_div(neg_inf, Number) -> + Greater = inf_greater_zero(Number), + if Greater -> neg_inf; + true -> pos_inf + end; +inf_div(Number, pos_inf) -> + Greater = inf_greater_zero(Number), + if Greater -> pos_inf; + true -> neg_inf + end; +inf_div(Number, neg_inf) -> + Greater = inf_greater_zero(Number), + if Greater -> neg_inf; + true -> pos_inf + end; +inf_div(Number1, Number2) -> Number1 div Number2. + +-spec inf_mult(inf_integer(), inf_integer()) -> inf_integer(). + +inf_mult(neg_inf, Number) -> + Greater = inf_greater_zero(Number), + if Greater -> neg_inf; + true -> pos_inf + end; +inf_mult(pos_inf, Number) -> + Greater = inf_greater_zero(Number), + if Greater -> pos_inf; + true -> neg_inf + end; +inf_mult(Number, pos_inf) -> inf_mult(pos_inf, Number); +inf_mult(Number, neg_inf) -> inf_mult(neg_inf, Number); +inf_mult(Number1, Number2) -> Number1 * Number2. + +-spec inf_bsl(inf_integer(), inf_integer()) -> inf_integer(). + +inf_bsl(pos_inf, _) -> pos_inf; +inf_bsl(neg_inf, _) -> neg_inf; +inf_bsl(Number, pos_inf) when is_integer(Number), Number >= 0 -> pos_inf; +inf_bsl(_, pos_inf) -> neg_inf; +inf_bsl(Number, neg_inf) when is_integer(Number), Number >= 0 -> 0; +inf_bsl(_Number, neg_inf) -> -1; +inf_bsl(Number1, Number2) when is_integer(Number1), is_integer(Number2) -> + %% We can not shift left with a number which is not a fixnum. We + %% don't have enough memory. + Bits = ?BITS, + if Number2 > (Bits bsl 1) -> inf_bsl(Number1, pos_inf); + Number2 < (-Bits bsl 1) -> inf_bsl(Number1, neg_inf); + true -> Number1 bsl Number2 + end. + +%% State + +-spec state__init(cfg(), data()) -> #state{}. + +state__init(Cfg, {MFA, ArgsFun, CallFun, FinalFun}) -> + Start = hipe_icode_cfg:start_label(Cfg), + Params = hipe_icode_cfg:params(Cfg), + Ranges = ArgsFun(MFA, Cfg), + %% io:format("MFA: ~w~nRanges: ~w~n", [MFA, Ranges]), + Liveness = + hipe_icode_ssa:ssa_liveness__analyze(hipe_icode_type:unannotate_cfg(Cfg)), + case lists:any(fun range__is_none/1, Ranges) of + true -> + FinalFun(MFA, [none_type()]), + throw(no_input); + false -> + NewParams = lists:zipwith(fun update_info/2, Params, Ranges), + NewCfg = hipe_icode_cfg:params_update(Cfg, NewParams), + Info = enter_defines(NewParams, gb_trees:empty()), + InfoMap = gb_trees:insert({Start, in}, Info, gb_trees:empty()), + #state{info_map=InfoMap, cfg=NewCfg, liveness=Liveness, + ret_type=none_type(), + lookup_fun=CallFun, result_action=FinalFun} + end. + +-spec state__cfg(#state{}) -> cfg(). + +state__cfg(#state{cfg=Cfg}) -> + Cfg. + +-spec state__bb(#state{}, label()) -> bb(). + +state__bb(#state{cfg=Cfg}, Label) -> + BB = hipe_icode_cfg:bb(Cfg, Label), + true = hipe_bb:is_bb(BB), % Just an assert + BB. + +-spec state__bb_add(#state{}, label(), bb()) -> #state{}. + +state__bb_add(S=#state{cfg=Cfg}, Label, BB) -> + NewCfg = hipe_icode_cfg:bb_add(Cfg, Label, BB), + S#state{cfg=NewCfg}. + +state__lookup_fun(#state{lookup_fun=LF}) -> LF. + +state__result_action(#state{result_action=RA}) -> RA. + +state__ret_type(#state{ret_type=RT}) -> RT. + +state__ret_type_update(#state{ret_type=RT} = State, NewType) -> + TotType = sup(RT, NewType), + State#state{ret_type=TotType}. + +state__info_in(S, Label) -> + state__info(S, {Label, in}). + +state__info(#state{info_map=IM}, Key) -> + gb_trees:get(Key, IM). + +state__update_info(State, LabelInfo, Rewrite) -> + update_info(LabelInfo, State, [], Rewrite). + +update_info([{Label,InfoIn}|Rest], State, LabelAcc, Rewrite) -> + case state__info_in_update(State, Label, InfoIn) of + fixpoint -> + if Rewrite -> + update_info(Rest, State, [Label|LabelAcc], Rewrite); + true -> + update_info(Rest, State, LabelAcc, Rewrite) + end; + NewState -> + update_info(Rest, NewState, [Label|LabelAcc], Rewrite) + end; +update_info([], State, LabelAcc, _Rewrite) -> + {State, LabelAcc}. + +state__info_in_update(S=#state{info_map=IM,liveness=Liveness}, Label, Info) -> + LabelIn = {Label, in}, + case gb_trees:lookup(LabelIn, IM) of + none -> + LiveIn = hipe_icode_ssa:ssa_liveness__livein(Liveness, Label), + NamesLiveIn = [hipe_icode:var_name(Var) || Var <- LiveIn, + hipe_icode:is_var(Var)], + OldInfo = gb_trees:empty(), + case join_info_in(NamesLiveIn, OldInfo, Info) of + fixpoint -> + S#state{info_map=gb_trees:insert(LabelIn, OldInfo, IM)}; + NewInfo -> + S#state{info_map=gb_trees:enter(LabelIn, NewInfo, IM)} + end; + {value, OldInfo} -> + OldVars = gb_trees:keys(OldInfo), + case join_info_in(OldVars, OldInfo, Info) of + fixpoint -> + fixpoint; + NewInfo -> + S#state{info_map=gb_trees:update(LabelIn, NewInfo, IM)} + end + end. + +join_info_in(Vars, OldInfo, NewInfo) -> + case join_info_in(Vars, OldInfo, NewInfo, gb_trees:empty(), false) of + {Res, true} -> Res; + {_, false} -> fixpoint + end. + +join_info_in([Var|Left], Info1, Info2, Acc, Changed) -> + Type1 = gb_trees:lookup(Var, Info1), + Type2 = gb_trees:lookup(Var, Info2), + case {Type1, Type2} of + {none, none} -> + NewTree = gb_trees:insert(Var, none_type(), Acc), + join_info_in(Left, Info1, Info2, NewTree, true); + {none, {value, Val}} -> + NewTree = gb_trees:insert(Var, Val, Acc), + join_info_in(Left, Info1, Info2, NewTree, true); + {{value, Val}, none} -> + NewTree = gb_trees:insert(Var, Val, Acc), + join_info_in(Left, Info1, Info2, NewTree, Changed); + {{value, Val}, {value, Val}} -> + NewTree = gb_trees:insert(Var, Val, Acc), + join_info_in(Left, Info1, Info2, NewTree, Changed); + {{value, Val1}, {value, Val2}} -> + NewVal = + case sup(Val1, Val2) of + Val1 -> + NewChanged = Changed, + Val1; + Val -> + NewChanged = true, + Val + end, + NewTree = gb_trees:insert(Var, NewVal, Acc), + join_info_in(Left, Info1, Info2, NewTree, NewChanged) + end; +join_info_in([], _Info1, _Info2, Acc, NewChanged) -> + {Acc, NewChanged}. + +enter_defines([Def|Rest], Info) -> + enter_defines(Rest, enter_define(Def, Info)); +enter_defines([], Info) -> Info. + +enter_define({PossibleVar, Range = #range{}}, Info) -> + case hipe_icode:is_var(PossibleVar) of + true -> + gb_trees:enter(hipe_icode:var_name(PossibleVar), Range, Info); + false -> + Info + end; +enter_define(PossibleVar, Info) -> + case hipe_icode:is_var(PossibleVar) of + true -> + case hipe_icode:variable_annotation(PossibleVar) of + {range_anno, #ann{range=Range}, _} -> + gb_trees:enter(hipe_icode:var_name(PossibleVar), Range, Info); + _ -> + Info + end; + false -> + Info + end. + +enter_vals(Ins, Info) -> + NewInfo = enter_defines(hipe_icode:args(Ins), Info), + enter_defines(hipe_icode:defines(Ins), NewInfo). + +lookup(PossibleVar, Info) -> + case hipe_icode:is_var(PossibleVar) of + true -> + case gb_trees:lookup(hipe_icode:var_name(PossibleVar), Info) of + none -> + none_type(); + {value, Val} -> + Val + end; + false -> + none_type() + end. + +%% _________________________________________________________________ +%% +%% The worklist. +%% + +init_work(State) -> + %% Labels = hipe_icode_cfg:reverse_postorder(state__cfg(State)), + Labels = [hipe_icode_cfg:start_label(state__cfg(State))], + {Labels, [], sets:from_list(Labels)}. + +get_work({[Label|Left], List, Set}) -> + NewWork = {Left, List, sets:del_element(Label, Set)}, + {Label, NewWork}; +get_work({[], [], _Set}) -> + fixpoint; +get_work({[], List, Set}) -> + get_work({lists:reverse(List), [], Set}). + +add_work(Work = {List1, List2, Set}, [Label|Left]) -> + case sets:is_element(Label, Set) of + true -> + add_work(Work, Left); + false -> + %% io:format("Adding work: ~w\n", [Label]), + add_work({List1, [Label|List2], sets:add_element(Label, Set)}, Left) + end; +add_work(Work, []) -> + Work. + +convert_cfg_to_types(Cfg) -> + Lbls = hipe_icode_cfg:reverse_postorder(Cfg), + lists:foldl(fun convert_lbl_to_type/2, Cfg, Lbls). + +convert_lbl_to_type(Lbl, Cfg) -> + BB = hipe_icode_cfg:bb(Cfg, Lbl), + Code = hipe_bb:code(BB), + NewCode = [convert_instr_to_type(I) || I <- Code], + hipe_icode_cfg:bb_add(Cfg, Lbl, hipe_bb:mk_bb(NewCode)). + +convert_instr_to_type(I) -> + Uses = hipe_icode:uses(I), + UseSubstList = [{Use, convert_to_types(Use)} || + Use <- Uses, hipe_icode:is_annotated_variable(Use)], + NewI = hipe_icode:subst_uses(UseSubstList, I), + Defs = hipe_icode:defines(NewI), + DefSubstList = [{Def, convert_to_types(Def)} || + Def <- Defs, hipe_icode:is_annotated_variable(Def)], + hipe_icode:subst_defines(DefSubstList, NewI). + +convert_to_types(VarOrReg) -> + Annotation = + case hipe_icode:variable_annotation(VarOrReg) of + {range_anno, Ann, _} -> + {type_anno, convert_ann_to_types(Ann), fun erl_types:t_to_string/1}; + {type_anno, _, _} = TypeAnn -> + TypeAnn + end, + hipe_icode:annotate_variable(VarOrReg, Annotation). + +convert_ann_to_types(#ann{range=#range{range={Min,Max}, other=false}}) -> + t_from_range_unsafe(Min, Max); +convert_ann_to_types(#ann{range=#range{range=empty, other=false}}) -> + t_none(); +convert_ann_to_types(#ann{range=#range{other=true}, type=Type}) -> + Type. + +%%===================================================================== +%% Icode Coordinator Callbacks +%%===================================================================== + +-spec replace_nones([#range{}]) -> [#range{}]. +replace_nones(Args) -> + [replace_none(Arg) || Arg <- Args]. + +replace_none(Arg) -> + case range__is_none(Arg) of + true -> any_type(); + false -> Arg + end. + +-spec update__info([#range{}], [#range{}]) -> {boolean(), [#ann{}]}. +update__info(NewRanges, OldRanges) -> + SupFun = fun (Ann, Range) -> + join_info(Ann, Range, fun safe_widen/3) + end, + EqFun = fun (X, Y) -> X =:= Y end, + ResRanges = lists:zipwith(SupFun, OldRanges, NewRanges), + Change = lists:zipwith(EqFun, ResRanges, OldRanges), + {lists:all(fun (X) -> X end, Change), ResRanges}. + +-spec new__info/1 :: ([#range{}]) -> [#ann{}]. +new__info(NewRanges) -> + [#ann{range=Range,count=1,type=t_any()} || Range <- NewRanges]. + +-spec return__info/1 :: ([#ann{}]) -> [#range{}]. +return__info(Ranges) -> + [Range || #ann{range=Range} <- Ranges]. + +-spec return_none/0 :: () -> [#range{},...]. +return_none() -> + [none_type()]. + +-spec return_none_args/2 :: (#cfg{}, mfa()) -> [#range{}]. +return_none_args(Cfg, {_M,_F,A}) -> + NoArgs = + case hipe_icode_cfg:is_closure(Cfg) of + true -> hipe_icode_cfg:closure_arity(Cfg) + 1; + false -> A + end, + lists:duplicate(NoArgs, none_type()). + +-spec return_any_args/2 :: (#cfg{}, mfa()) -> [#range{}]. +return_any_args(Cfg, {_M,_F,A}) -> + NoArgs = + case hipe_icode_cfg:is_closure(Cfg) of + true -> hipe_icode_cfg:closure_arity(Cfg) + 1; + false -> A + end, + lists:duplicate(NoArgs, any_type()). + +%%===================================================================== + +next_up_limit(X) when is_integer(X), X < 0 -> 0; +next_up_limit(X) when is_integer(X), X < 255 -> 255; +next_up_limit(X) when is_integer(X), X < 16#10ffff -> 16#10ffff; +next_up_limit(X) when is_integer(X), X < 16#7ffffff -> 16#7ffffff; +next_up_limit(X) when is_integer(X), X < 16#7fffffff -> 16#7fffffff; +next_up_limit(X) when is_integer(X), X < 16#ffffffff -> 16#ffffffff; +next_up_limit(X) when is_integer(X), X < 16#fffffffffff -> 16#fffffffffff; +next_up_limit(X) when is_integer(X), X < 16#7fffffffffffffff -> 16#7fffffffffffffff; +next_up_limit(_X) -> pos_inf. + +next_down_limit(X) when is_integer(X), X > 0 -> 0; +next_down_limit(X) when is_integer(X), X > -256 -> -256; +next_down_limit(X) when is_integer(X), X > -16#10ffff -> -16#10ffff; +next_down_limit(X) when is_integer(X), X > -16#8000000 -> -16#8000000; +next_down_limit(X) when is_integer(X), X > -16#80000000 -> -16#80000000; +next_down_limit(X) when is_integer(X), X > -16#800000000000000 -> -16#800000000000000; +next_down_limit(_X) -> neg_inf. diff --git a/lib/hipe/icode/hipe_icode_split_arith.erl b/lib/hipe/icode/hipe_icode_split_arith.erl new file mode 100644 index 0000000000..d59f9247fa --- /dev/null +++ b/lib/hipe/icode/hipe_icode_split_arith.erl @@ -0,0 +1,553 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2004-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%%------------------------------------------------------------------- +%% File : hipe_icode_split_arith.erl +%% Author : Tobias Lindahl +%% Description : +%% +%% Created : 12 Nov 2003 by Tobias Lindahl +%%------------------------------------------------------------------- +-module(hipe_icode_split_arith). + +-export([cfg/3]). + +-include("../main/hipe.hrl"). +-include("hipe_icode.hrl"). +-include("../flow/cfg.hrl"). + +-define(MIN_RATIO, 0.005). + +%%------------------------------------------------------------------- + +-spec cfg(#cfg{}, mfa(), comp_options()) -> #cfg{}. + +cfg(Cfg, _MFA, Options) -> + Icode = hipe_icode_cfg:cfg_to_linear(Cfg), + case proplists:get_bool(split_arith_unsafe, Options) of + true -> make_split_unsafe(Icode); + _ -> + case preprocess(Icode) of + {do_not_split, _Ratio} -> + Cfg; + {split, _Ratio, Icode1} -> + NewCfg = split(Icode1), + %% hipe_icode_cfg:pp(NewCfg), + NewCfg + end + end. + +check_nofix_const([Arg1|Arg2]) -> + case hipe_icode:is_const(Arg1) of + true -> + Val1 = hipe_tagscheme:fixnum_val(hipe_icode:const_value(Arg1)), + case hipe_tagscheme:is_fixnum(Val1) of + true -> + check_nofix_const(Arg2); + false -> {no} + end; + false -> + check_nofix_const(Arg2) + end; +check_nofix_const([]) -> true. + +check_const([I|Left]) -> + case I of + #icode_call{} -> + case is_arith(I) of + true -> + Args = hipe_icode:call_args(I), + case check_nofix_const(Args) of + {no} -> {do_not_split}; + _ -> check_const(Left) + end; + _ -> check_const(Left) + end; + _ -> check_const(Left) + end; +check_const([]) -> {yes}. + +make_split_unsafe(Icode) -> + LinearCode = hipe_icode:icode_code(Icode), + NewLinearCode = change_unsafe(LinearCode), + NewIcode = hipe_icode:icode_code_update(Icode, NewLinearCode), + hipe_icode_cfg:linear_to_cfg(NewIcode). + +change_unsafe([I|Is]) -> + case I of + #icode_call{} -> + case is_arith_extra_unsafe(I) of + true -> + NewOp = arithop_to_extra_unsafe(hipe_icode:call_fun(I)), + NewI1 = hipe_icode:call_fun_update(I, NewOp), + [NewI1|change_unsafe(Is)]; + false -> + [I|change_unsafe(Is)] + end; + _ -> + [I|change_unsafe(Is)] + end; +change_unsafe([]) -> []. + +preprocess(Icode) -> + LinearCode = hipe_icode:icode_code(Icode), + case check_const(LinearCode) of + {do_not_split} -> %%io:format("NO FIXNUM....."), + {do_not_split, 1.9849}; % Ratio val is ignored + _ -> + {NofArith, NofIns, NewLinearCode} = preprocess_code(LinearCode), + case NofArith / NofIns of + X when X >= ?MIN_RATIO -> + NewIcode = hipe_icode:icode_code_update(Icode, NewLinearCode), + {split, X, NewIcode}; + Y -> + {do_not_split, Y} + end + end. + +preprocess_code([H|Code]) -> + preprocess_code(Code, 0, 0, [H]). + +preprocess_code([I|Left], NofArith, NofIns, CodeAcc = [PrevI|_]) -> + case I of + #icode_call{} -> + case is_arith(I) of + true -> + %% Note that we need to put these instructions in a separate + %% basic block since we need the ability to fail to these + %% instructions, but also fail from them. The basic block + %% merger will take care of unnecessary splits. + + %% If call is an arithmetic operation replace the operation + %% with the specified replacement operator. + NewOp = arithop_to_split(hipe_icode:call_fun(I)), + NewI = hipe_icode:call_fun_update(I, NewOp), + case hipe_icode:is_label(PrevI) of + true -> + case (Left =:= []) orelse hipe_icode:is_label(hd(Left)) of + true -> + preprocess_code(Left, NofArith+1, NofIns+1, [NewI|CodeAcc]); + false -> + NewLabel = hipe_icode:mk_new_label(), + NewLabelName = hipe_icode:label_name(NewLabel), + NewI1 = hipe_icode:call_set_continuation(NewI, NewLabelName), + preprocess_code(Left, NofArith+1, NofIns+1, + [NewLabel, NewI1|CodeAcc]) + end; + false -> + RevPreCode = + case hipe_icode:is_branch(PrevI) of + true -> + [hipe_icode:mk_new_label()]; + false -> + NewLabel1 = hipe_icode:mk_new_label(), + NewLabelName1 = hipe_icode:label_name(NewLabel1), + [NewLabel1, hipe_icode:mk_goto(NewLabelName1)] + end, + case (Left =:= []) orelse hipe_icode:is_label(hd(Left)) of + true -> + preprocess_code(Left, NofArith+1, NofIns+1, + [NewI|RevPreCode] ++ CodeAcc); + false -> + NewLabel2 = hipe_icode:mk_new_label(), + NewLabelName2 = hipe_icode:label_name(NewLabel2), + NewI1 = hipe_icode:call_set_continuation(NewI, NewLabelName2), + preprocess_code(Left, NofArith+1, NofIns+1, + [NewLabel2, NewI1|RevPreCode] ++ CodeAcc) + end + end; + false -> + preprocess_code(Left, NofArith, NofIns + 1, [I|CodeAcc]) + end; + #icode_label{} -> + %% Don't count labels as instructions. + preprocess_code(Left, NofArith, NofIns, [I|CodeAcc]); + _ -> + preprocess_code(Left, NofArith, NofIns+1, [I|CodeAcc]) + end; +preprocess_code([], NofArith, NofIns, CodeAcc) -> + {NofArith, NofIns, lists:reverse(CodeAcc)}. + +split(Icode) -> + LinearCode = hipe_icode:icode_code(Icode), + %% create a new icode label for each existing icode label + %% create mappings, NewToOld and OldToNew. + AllLabels = lists:foldl(fun(I, Acc) -> + case hipe_icode:is_label(I) of + true -> [hipe_icode:label_name(I)|Acc]; + false -> Acc + end + end, [], LinearCode), + {OldToNewMap, NewToOldMap} = new_label_maps(AllLabels), + + %% the call below doubles the number of basic blocks with the new + %% labels instead of the old. + + NewLinearCode = map_code(LinearCode, OldToNewMap), + NewIcode = hipe_icode:icode_code_update(Icode, NewLinearCode), + NewCfg = hipe_icode_cfg:linear_to_cfg(NewIcode), + NewCfg2 = + insert_tests(NewCfg, [gb_trees:get(X, OldToNewMap) || X<-AllLabels], + NewToOldMap, OldToNewMap), + %% io:format("split(Cfg): Inserting testsL Done\n", []), + NewCfg2. + +map_code(OldCode, LabelMap) -> + AddedCode = map_code(OldCode, none, LabelMap, []), + OldCode ++ AddedCode. + +map_code([I|Left], ArithFail, LabelMap, Acc) -> + case I of + #icode_call{} -> + case is_arith(I) of + true -> + case hipe_icode:defines(I) of + []-> + map_code(Left, ArithFail, LabelMap, [redirect(I, LabelMap)|Acc]); + _ -> + NewOp = split_to_unsafe(I), + NewI1 = hipe_icode:call_fun_update(I, NewOp), + NewI2 = redirect(NewI1, LabelMap), + NewI3 = hipe_icode:call_set_fail_label(NewI2, ArithFail), + map_code(Left, ArithFail, LabelMap, [NewI3|Acc]) + end; + false -> + map_code(Left, ArithFail, LabelMap, [redirect(I, LabelMap)|Acc]) + end; + #icode_label{} -> + LabelName = hipe_icode:label_name(I), + NewLabel = hipe_icode:mk_label(gb_trees:get(LabelName, LabelMap)), + map_code(Left, LabelName, LabelMap, [NewLabel|Acc]); + _ -> + map_code(Left, ArithFail, LabelMap, [redirect(I, LabelMap)|Acc]) + end; +map_code([], _ArithFail, _LabelMap, Acc) -> + lists:reverse(Acc). + +insert_tests(Cfg, Labels,NewToOldMap, OldToNewMap) -> + InfoMap = infomap_init(Labels), + %%io:format("insert_tests/3: Finding testpoints ...\n", []), + NewInfoMap = find_testpoints(Cfg, Labels, InfoMap), + %%io:format("insert_tests/3: Finding testpoints: Done\n", []), + %%io:format("insert_tests/3: Infomap: ~w\n", [gb_trees:to_list(NewInfoMap)]), + make_tests(Cfg, NewInfoMap, NewToOldMap, OldToNewMap). + +find_testpoints(Cfg, Labels, InfoMap) -> + case find_testpoints(Labels, InfoMap, Cfg, false) of + {dirty, NewInfoMap} -> + %%io:format("find_testpoints/3: Looping\n", []), + find_testpoints(Cfg, Labels, NewInfoMap); + fixpoint -> + InfoMap + end. + +find_testpoints([Lbl|Left], InfoMap, Cfg, Dirty) -> + Code = hipe_bb:code(hipe_icode_cfg:bb(Cfg, Lbl)), + InfoOut = join_info(hipe_icode_cfg:succ(Cfg, Lbl), InfoMap), + OldInfoIn = infomap_get_all(Lbl, InfoMap), + NewInfoIn = traverse_code(lists:reverse(Code), InfoOut), + case (gb_sets:is_subset(OldInfoIn, NewInfoIn) andalso + gb_sets:is_subset(NewInfoIn, OldInfoIn)) of + true -> + find_testpoints(Left, InfoMap, Cfg, Dirty); + false -> + %%io:format("find_testpoints/4: Label: ~w: OldMap ~w\nNewMap: ~w\n", + %% [Lbl, gb_sets:to_list(OldInfoIn), gb_sets:to_list(NewInfoIn)]), + NewInfoMap = gb_trees:update(Lbl, NewInfoIn, InfoMap), + find_testpoints(Left, NewInfoMap, Cfg, true) + end; +find_testpoints([], InfoMap, _Cfg, Dirty) -> + if Dirty -> {dirty, InfoMap}; + true -> fixpoint + end. + +traverse_code([I|Left], Info) -> + NewInfo = kill_defines(I, Info), + case I of + #icode_call{} -> + case is_unsafe_arith(I) of + true -> + %% The dst is sure to be a fixnum. Remove the 'killed' mark. + Dst = hd(hipe_icode:call_dstlist(I)), + NewInfo1 = gb_sets:delete_any({killed, Dst}, NewInfo), + NewInfo2 = + gb_sets:union(NewInfo1, gb_sets:from_list(hipe_icode:uses(I))), + traverse_code(Left, NewInfo2); + false -> + traverse_code(Left, NewInfo) + end; + #icode_move{} -> + Dst = hipe_icode:move_dst(I), + case gb_sets:is_member(Dst, Info) of + true -> + %% The dst is an argument to an arith op. Transfer the test + %% to the src and remove the 'killed' mark from the dst. + NewInfo1 = gb_sets:delete({killed, Dst}, NewInfo), + Src = hipe_icode:move_src(I), + case hipe_icode:is_const(Src) of + true -> + traverse_code(Left, NewInfo1); + false -> + NewInfo2 = gb_sets:add(Src, NewInfo1), + traverse_code(Left, NewInfo2) + end; + false -> + traverse_code(Left, NewInfo) + end; + _ -> + traverse_code(Left, NewInfo) + end; +traverse_code([], Info) -> + Info. + +kill_defines(I, Info) -> + Defines = hipe_icode:defines(I), + case [X || X<-Defines, gb_sets:is_member(X, Info)] of + [] -> + Info; + List -> + TmpInfo = gb_sets:difference(Info, gb_sets:from_list(List)), + gb_sets:union(gb_sets:from_list([{killed, X} || X <- List]), TmpInfo) + end. + +make_tests(Cfg, InfoMap, NewToOldMap, OldToNewMap) -> + %%io:format("make_tests 0:\n",[]), + WorkList = make_worklist(gb_trees:keys(NewToOldMap), InfoMap, + NewToOldMap, Cfg, []), + %%io:format("make_tests 1:Worklist: ~w\n",[WorkList]), + NewCfg = make_tests(WorkList, Cfg), + %%io:format("make_tests 2\n",[]), + %% If the arguments to this function are used in unsafe arith + %% they should be marked as killed by a new start block. + Args = hipe_icode_cfg:params(NewCfg), + Start = hipe_icode_cfg:start_label(NewCfg), + AltStart = gb_trees:get(Start, OldToNewMap), + UnsafeIn = gb_sets:to_list(infomap_get(AltStart, InfoMap)), + case [X || X <- UnsafeIn, Y <- Args, X =:= Y] of + [] -> + hipe_icode_cfg:start_label_update(NewCfg, AltStart); + KilledArgs -> + NewStart = hipe_icode:label_name(hipe_icode:mk_new_label()), + NewCfg1 = insert_test_block(NewStart, AltStart, Start, + KilledArgs, NewCfg), + hipe_icode_cfg:start_label_update(NewCfg1, NewStart) + end. + +make_worklist([Lbl|Left], InfoMap, LabelMap, Cfg, Acc) -> + Vars = infomap_get_killed(Lbl, InfoMap), + case gb_sets:is_empty(Vars) of + true -> make_worklist(Left, InfoMap, LabelMap, Cfg, Acc); + false -> + %% io:format("make_worklist 1 ~w\n", [Vars]), + NewAcc0 = + [{Lbl, Succ, gb_trees:get(Succ, LabelMap), + gb_sets:intersection(infomap_get(Succ, InfoMap), Vars)} + || Succ <- hipe_icode_cfg:succ(Cfg, Lbl)], + NewAcc = [{Label, Succ, FailLbl, gb_sets:to_list(PrunedVars)} + || {Label, Succ, FailLbl, PrunedVars} <- NewAcc0, + gb_sets:is_empty(PrunedVars) =:= false] ++ Acc, + %% io:format("make_worklist 2\n", []), + make_worklist(Left, InfoMap, LabelMap, Cfg, NewAcc) + end; +make_worklist([], _InfoMap, _LabelMap, _Cfg, Acc) -> + Acc. + +make_tests([{FromLbl, ToLbl, FailLbl, Vars}|Left], Cfg) -> + NewLbl = hipe_icode:label_name(hipe_icode:mk_new_label()), + TmpCfg = insert_test_block(NewLbl, ToLbl, FailLbl, Vars, Cfg), + NewCfg = hipe_icode_cfg:redirect(TmpCfg, FromLbl, ToLbl, NewLbl), + make_tests(Left, NewCfg); +make_tests([], Cfg) -> + Cfg. + +insert_test_block(NewLbl, Succ, FailLbl, Vars, Cfg) -> + Code = [hipe_icode:mk_type(Vars, fixnum, Succ, FailLbl, 0.99)], + BB = hipe_bb:mk_bb(Code), + hipe_icode_cfg:bb_add(Cfg, NewLbl, BB). + +infomap_init(Labels) -> + infomap_init(Labels, gb_trees:empty()). + +infomap_init([Lbl|Left], Map) -> + infomap_init(Left, gb_trees:insert(Lbl, gb_sets:empty(), Map)); +infomap_init([], Map) -> + Map. + +join_info(Labels, Map) -> + join_info(Labels, Map, gb_sets:empty()). + +join_info([Lbl|Left], Map, Set) -> + join_info(Left, Map, gb_sets:union(Set, infomap_get(Lbl, Map))); +join_info([], _Map, Set) -> + Set. + +infomap_get(Lbl, Map) -> + case gb_trees:lookup(Lbl, Map) of + none -> gb_sets:empty(); + {value, Val} -> + gb_sets:filter(fun(X) -> case X of + {killed, _} -> false; + _ -> true + end + end, + Val) + end. + +infomap_get_all(Lbl, Map) -> + case gb_trees:lookup(Lbl, Map) of + none -> gb_sets:empty(); + {value, Val} -> Val + end. + +infomap_get_killed(Lbl, Map) -> + case gb_trees:lookup(Lbl, Map) of + none -> gb_sets:empty(); + {value, Val} -> + Fun = fun(X, Acc) -> + case X of + {killed, Var} -> [Var|Acc]; + _ -> Acc + end + end, + gb_sets:from_list(lists:foldl(Fun, [], gb_sets:to_list(Val))) + end. + +%%%------------------------------------------------------------------- +%%% General replace of '+'/'-' to super safe version + +arithop_to_split(Op) -> + case Op of + '+' -> gen_add; + '-' -> gen_sub; + _ -> Op + end. + +%%%------------------------------------------------------------------- +%%% Check if it's an arith op that needs to be split + +is_arith(I) -> + case hipe_icode:call_fun(I) of + '+' -> true; + '-' -> true; + gen_add -> true; + gen_sub -> true; + 'bor' -> true; + 'bxor' -> true; + 'bsr' -> + %% Need to check that the second argument is a non-negative + %% fixnum. We only allow for constants to simplify things. + [_, Arg2] = hipe_icode:args(I), + hipe_icode:is_const(Arg2) andalso (hipe_icode:const_value(Arg2) >= 0); + 'bsl' -> + %% There are major issues with bsl since it doesn't flag + %% overflow. We cannot allow for this in this optimization pass. + false; + 'bnot' -> true; + 'band' -> true; + _ -> false + end. + +%%%------------------------------------------------------------------- + +is_unsafe_arith(I) -> + case hipe_icode:call_fun(I) of + unsafe_add -> true; + unsafe_sub -> true; + unsafe_bor -> true; + unsafe_bxor -> true; + unsafe_bsr -> true; + unsafe_bsl -> true; + unsafe_bnot -> true; + unsafe_band -> true; + _ -> false + end. + +split_to_unsafe(I) -> + case hipe_icode:call_fun(I) of + gen_add -> unsafe_add; + gen_sub -> unsafe_sub; + 'bor' -> unsafe_bor; + 'bxor' -> unsafe_bxor; + 'bsr' -> + case is_arith(I) of + true -> unsafe_bsr; + false -> 'bsr' + end; + 'bsl' -> + %% There are major issues with bsl since it doesn't flag + %% overflow. We cannot allow for this in this optimization pass. + 'bsl'; + 'bnot' -> unsafe_bnot; + 'band' -> unsafe_band; + Op -> Op + end. + +%%%------------------------------------------------------------------- +%%% FLAG = split_arith_unsafe + +is_arith_extra_unsafe(I) -> + case hipe_icode:call_fun(I) of + '+' -> true; + '-' -> true; + 'bor' -> true; + 'bxor' -> true; + 'bsr' -> is_arith(I); + 'bsl' -> false; %% See comment in is_arith/1 + 'bnot' -> true; + 'band' -> true; + _ -> false + end. + +arithop_to_extra_unsafe(Op) -> + case Op of + '+' -> extra_unsafe_add; + '-' -> extra_unsafe_sub; + 'bor' -> unsafe_bor; + 'bxor' -> unsafe_bxor; + 'bsr' -> unsafe_bsr; + 'bsl' -> 'bsl'; %% See comment in split_to_unsafe/1 + 'bnot' -> unsafe_bnot; + 'band' -> unsafe_band + end. + +%%%------------------------------------------------------------------- + +redirect(I, LabelMap) -> + case hipe_icode:successors(I) of + [] -> I; + Successors -> + RedirectMap = [{X, gb_trees:get(X, LabelMap)} || X <- Successors], + redirect_1(RedirectMap, I) + end. + +redirect_1([{From, To}|Left], I) -> + redirect_1(Left, hipe_icode:redirect_jmp(I, From, To)); +redirect_1([], I) -> + I. + +new_label_maps(Labels) -> + new_label_maps(Labels, gb_trees:empty(), gb_trees:empty()). + +new_label_maps([Lbl|Left], Map1, Map2) -> + NewLabel = hipe_icode:label_name(hipe_icode:mk_new_label()), + NewMap1 = gb_trees:insert(Lbl, NewLabel, Map1), + NewMap2 = gb_trees:insert(NewLabel, Lbl, Map2), + new_label_maps(Left, NewMap1, NewMap2); +new_label_maps([], Map1, Map2) -> + {Map1, Map2}. diff --git a/lib/hipe/icode/hipe_icode_ssa.erl b/lib/hipe/icode/hipe_icode_ssa.erl new file mode 100755 index 0000000000..719d5d8f45 --- /dev/null +++ b/lib/hipe/icode/hipe_icode_ssa.erl @@ -0,0 +1,98 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2002-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%%---------------------------------------------------------------------- +%% File : hipe_icode_ssa.erl +%% Author : +%% Created : +%% Purpose : Provides interface functions for converting Icode into +%% SSA form and back using the generic SSA converter. +%%---------------------------------------------------------------------- + +-module(hipe_icode_ssa). + +%% The following defines are needed by the included file below +-define(CODE, hipe_icode). +-define(CFG, hipe_icode_cfg). +-define(LIVENESS, hipe_icode_liveness). +-define(LIVENESS_NEEDED, true). + +-include("hipe_icode.hrl"). +-include("../ssa/hipe_ssa.inc"). + +%% Declarations for exported functions which are Icode-specific. +-spec ssa_liveness__analyze(#cfg{}) -> gb_tree(). +-spec ssa_liveness__livein(_, icode_lbl()) -> [#icode_variable{}]. +%% -spec ssa_liveness__livein(_, icode_lbl(), _) -> [#icode_var{}]. + +%%---------------------------------------------------------------------- +%% Auxiliary operations which seriously differ between Icode and RTL. +%%---------------------------------------------------------------------- + +defs_to_rename(Statement) -> + hipe_icode:defines(Statement). + +uses_to_rename(Statement) -> + hipe_icode:uses(Statement). + +liveout_no_succ() -> + []. + +%%---------------------------------------------------------------------- + +reset_var_indx() -> + hipe_gensym:set_var(icode, 0). + +%%---------------------------------------------------------------------- + +is_fp_temp(Temp) -> + hipe_icode:is_fvar(Temp). + +mk_new_fp_temp() -> + hipe_icode:mk_new_fvar(). + +%%---------------------------------------------------------------------- +%% Procedure : makePhiMove +%% Purpose : Create an ICode-specific version of a move instruction +%% depending on the type of the arguments. +%% Arguments : Dst, Src - the arguments of a Phi instruction that is +%% to be moved up the predecessor block as part +%% of the SSA unconvert phase. +%% Returns : Code +%%---------------------------------------------------------------------- + +makePhiMove(Dst, Src) -> + case hipe_icode:is_fvar(Dst) of + false -> + case hipe_icode:is_fvar(Src) of + false -> + hipe_icode:mk_move(Dst, Src); + true -> + hipe_icode:mk_primop([Dst], unsafe_tag_float, [Src]) + end; + true -> + case hipe_icode:is_fvar(Src) of + true -> + hipe_icode:mk_move(Dst, Src); + false -> + hipe_icode:mk_primop([Dst], conv_to_float, [Src]) + end + end. + +%%---------------------------------------------------------------------- diff --git a/lib/hipe/icode/hipe_icode_ssa_const_prop.erl b/lib/hipe/icode/hipe_icode_ssa_const_prop.erl new file mode 100644 index 0000000000..f1640b1cee --- /dev/null +++ b/lib/hipe/icode/hipe_icode_ssa_const_prop.erl @@ -0,0 +1,728 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2003-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% ============================================================================ +%% Filename : hipe_icode_ssa_const_prop.erl +%% Authors : Daniel Luna, Erik Andersson +%% Purpose : Perform sparse conditional constant propagation on Icode. +%% Notes : Works on the control-flow graph. +%% +%% History : * 2003-03-05: Created. +%% * 2003-08-11: Passed simple testsuite. +%% * 2003-10-01: Passed compiler testsuite. +%% ============================================================================ +%% +%% Exports: propagate/1. +%% +%% ============================================================================ +%% +%% TODO: +%% +%% Take care of failures in call and replace operation with appropriate +%% failure. +%% +%% Handle ifs with non-binary operators +%% +%% We want multisets for easier (and faster) creation of env->ssa_edges +%% +%% Maybe do things with begin_handler, begin_try if possible +%% +%% Propagation of constant arguments when some of the arguments are bottom +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +-module(hipe_icode_ssa_const_prop). +-export([propagate/1]). + +-include("../main/hipe.hrl"). +-include("hipe_icode.hrl"). +-include("../flow/cfg.hrl"). +-include("hipe_icode_primops.hrl"). + +-define(CONST_PROP_MSG(Str,L), ok). +%%-define(CONST_PROP_MSG(Str,L), io:format(Str,L)). + +%%-define(DEBUG, 1). + +%%----------------------------------------------------------------------------- +%% Include stuff shared between SCCP on Icode and RTL. +%% NOTE: Needs to appear after DEBUG is possibly defined. +%%----------------------------------------------------------------------------- + +-define(CODE, hipe_icode). +-define(CFG, hipe_icode_cfg). + +-include("../ssa/hipe_ssa_const_prop.inc"). + +%%----------------------------------------------------------------------------- + +visit_expression(Instruction, Environment) -> + EvaluatedArguments = [lookup_lattice_value(Argument, Environment) + || Argument <- hipe_icode:args(Instruction)], + case Instruction of + #icode_move{} -> + visit_move (Instruction, EvaluatedArguments, Environment); + #icode_if{} -> + visit_if (Instruction, EvaluatedArguments, Environment); + #icode_goto{} -> + visit_goto (Instruction, EvaluatedArguments, Environment); + #icode_type{} -> + visit_type (Instruction, EvaluatedArguments, Environment); + #icode_call{} -> + visit_call (Instruction, EvaluatedArguments, Environment); + #icode_switch_val{} -> + visit_switch_val (Instruction, EvaluatedArguments, Environment); + #icode_switch_tuple_arity{} -> + visit_switch_tuple_arity(Instruction, EvaluatedArguments, Environment); + #icode_begin_handler{} -> + visit_begin_handler (Instruction, EvaluatedArguments, Environment); + #icode_begin_try{} -> + visit_begin_try (Instruction, EvaluatedArguments, Environment); + #icode_fail{} -> + visit_fail (Instruction, EvaluatedArguments, Environment); + _ -> + %% label, end_try, comment, return, + {[], [], Environment} + end. + +%%----------------------------------------------------------------------------- + +visit_begin_try(Instruction, [], Environment) -> + Label = hipe_icode:begin_try_label(Instruction), + Successor = hipe_icode:begin_try_successor(Instruction), + {[Label, Successor], [], Environment}. + +%%----------------------------------------------------------------------------- + +visit_begin_handler(Instruction, _Arguments, Environment) -> + Destinations = hipe_icode:begin_handler_dstlist(Instruction), + {Environment1, SSAWork} = + lists:foldl(fun (Dst, {Env0,Work0}) -> + {Env, Work} = update_lattice_value({Dst, bottom}, Env0), + {Env, Work ++ Work0} + end, + {Environment, []}, + Destinations), + {[], SSAWork, Environment1}. + +%%----------------------------------------------------------------------------- + +visit_switch_val(Instruction, [Argument], Environment) -> + Cases = hipe_icode:switch_val_cases(Instruction), + FailLabel = hipe_icode:switch_val_fail_label(Instruction), + case Argument of + bottom -> + FlowWork = [Label || {_Value, Label} <- Cases], + FlowWork1 = [FailLabel | FlowWork], + {FlowWork1, [], Environment}; + _ -> + Target = get_switch_target(Cases, Argument, FailLabel), + {[Target], [], Environment} + end. + +%%----------------------------------------------------------------------------- + +visit_switch_tuple_arity(Instruction, [Argument], Environment) -> + Cases = hipe_icode:switch_tuple_arity_cases(Instruction), + FailLabel = hipe_icode:switch_tuple_arity_fail_label(Instruction), + case Argument of + bottom -> + FlowWork = [Label || {_Value, Label} <- Cases], + FlowWork1 = [FailLabel | FlowWork], + {FlowWork1, [], Environment}; + Constant -> + UnTagged = hipe_icode:const_value(Constant), + case is_tuple(UnTagged) of + true -> + Target = get_switch_target(Cases, tuple_size(UnTagged), FailLabel), + {[Target], [], Environment}; + false -> + {[FailLabel], [], Environment} + end + end. + +%%----------------------------------------------------------------------------- + +get_switch_target([], _Argument, FailLabel) -> + FailLabel; +get_switch_target([{CaseValue, Target} | CaseList], Argument, FailLabel) -> + case CaseValue =:= Argument of + true -> + Target; + false -> + get_switch_target(CaseList, Argument, FailLabel) + end. + +%%----------------------------------------------------------------------------- + +visit_move(Instruction, [SourceValue], Environment) -> + Destination = hipe_icode:move_dst(Instruction), + {Environment1, SSAWork} = update_lattice_value({Destination, SourceValue}, + Environment), + {[], SSAWork, Environment1}. + +%%----------------------------------------------------------------------------- + +visit_if(Instruction, Arguments, Environment) -> + FlowWork = + case evaluate_if(hipe_icode:if_op(Instruction), Arguments) of + true -> + TrueLabel = hipe_icode:if_true_label(Instruction), + [TrueLabel]; + false -> + FalseLabel = hipe_icode:if_false_label(Instruction), + [FalseLabel]; + bottom -> + TrueLabel = hipe_icode:if_true_label(Instruction), + FalseLabel = hipe_icode:if_false_label(Instruction), + [TrueLabel, FalseLabel] + end, + {FlowWork, [], Environment}. + +%%----------------------------------------------------------------------------- + +visit_goto(Instruction, _Arguments, Environment) -> + GotoLabel = hipe_icode:goto_label(Instruction), + FlowWork = [GotoLabel], + {FlowWork, [], Environment}. + +%%----------------------------------------------------------------------------- + +visit_fail(Instruction, _Arguments, Environment) -> + FlowWork = hipe_icode:successors(Instruction), + {FlowWork, [], Environment}. + +%%----------------------------------------------------------------------------- + +visit_type(Instruction, Values, Environment) -> + FlowWork = + case evaluate_type(hipe_icode:type_test(Instruction), Values) of + true -> + TrueLabel = hipe_icode:type_true_label(Instruction), + [TrueLabel]; + false -> + FalseLabel = hipe_icode:type_false_label(Instruction), + [FalseLabel]; + bottom -> + TrueLabel = hipe_icode:type_true_label(Instruction), + FalseLabel = hipe_icode:type_false_label(Instruction), + [TrueLabel, FalseLabel] + end, + {FlowWork, [], Environment}. + +%%----------------------------------------------------------------------------- + +visit_call(Ins, Args, Environment) -> + Dsts = hipe_icode:call_dstlist(Ins), + Fun = hipe_icode:call_fun(Ins), + Fail = call_fail_labels(Ins), + Cont = call_continuation_labels(Ins), + visit_call(Dsts, Args, Fun, Cont, Fail, Environment). + +visit_call(Dst, Args, Fun, Cont, Fail, Environment) -> + {FlowWork, {Environment1, SSAWork}} = + case lists:any(fun(X) -> (X =:= bottom) end, Args) of + true -> + {Fail ++ Cont, update_lattice_value({Dst, bottom}, Environment)}; + false -> + ConstArgs = [hipe_icode:const_value(Argument) || Argument <- Args], + try evaluate_call_or_enter(ConstArgs, Fun) of + bottom -> + {Fail ++ Cont, update_lattice_value({Dst, bottom}, Environment)}; + Constant -> + {Cont, update_lattice_value({Dst, Constant}, Environment)} + catch + _:_ -> + {Fail, update_lattice_value({Dst, bottom}, Environment)} + end + end, + {FlowWork, SSAWork, Environment1}. + +%%----------------------------------------------------------------------------- + +call_fail_labels(I) -> + case hipe_icode:call_fail_label(I) of + [] -> []; + Label -> [Label] + end. + +call_continuation_labels(I) -> + case hipe_icode:call_continuation(I) of + [] -> []; + Label -> [Label] + end. + +%%----------------------------------------------------------------------------- + +%% Unary calls +evaluate_call_or_enter([Argument], Fun) -> + case Fun of + mktuple -> + hipe_icode:mk_const(list_to_tuple([Argument])); + unsafe_untag_float -> + hipe_icode:mk_const(float(Argument)); + conv_to_float -> + hipe_icode:mk_const(float(Argument)); + fnegate -> + hipe_icode:mk_const(0.0 - Argument); + 'bnot' -> + hipe_icode:mk_const(Argument); + #unsafe_element{index=N} -> + hipe_icode:mk_const(element(N, Argument)); + {erlang, hd, 1} -> + hipe_icode:mk_const(hd(Argument)); + {erlang, tl, 1} -> + hipe_icode:mk_const(tl(Argument)); + {erlang, atom_to_list, 1} -> + hipe_icode:mk_const(atom_to_list(Argument)); + {erlang, list_to_atom, 1} -> + hipe_icode:mk_const(list_to_atom(Argument)); + {erlang, tuple_to_list, 1} -> + hipe_icode:mk_const(tuple_to_list(Argument)); + {erlang, list_to_tuple, 1} -> + hipe_icode:mk_const(list_to_tuple(Argument)); + {erlang, length, 1} -> + hipe_icode:mk_const(length(Argument)); + {erlang, size, 1} -> + hipe_icode:mk_const(size(Argument)); + {erlang, bit_size, 1} -> + hipe_icode:mk_const(bit_size(Argument)); + {erlang, byte_size, 1} -> + hipe_icode:mk_const(byte_size(Argument)); + {erlang, tuple_size, 1} -> + hipe_icode:mk_const(tuple_size(Argument)); + {erlang, abs, 1} -> + hipe_icode:mk_const(abs(Argument)); + {erlang, round, 1} -> + hipe_icode:mk_const(round(Argument)); + {erlang, trunc, 1} -> + hipe_icode:mk_const(trunc(Argument)); + _ -> + bottom + end; +%% Binary calls +evaluate_call_or_enter([Argument1,Argument2], Fun) -> + case Fun of + '+' -> + hipe_icode:mk_const(Argument1 + Argument2); + '-' -> + hipe_icode:mk_const(Argument1 - Argument2); + '*' -> + hipe_icode:mk_const(Argument1 * Argument2); + '/' -> + hipe_icode:mk_const(Argument1 / Argument2); + 'band' -> + hipe_icode:mk_const(Argument1 band Argument2); + 'bor' -> + hipe_icode:mk_const(Argument1 bor Argument2); + 'bsl' -> + hipe_icode:mk_const(Argument1 bsl Argument2); + 'bsr' -> + hipe_icode:mk_const(Argument1 bsr Argument2); + 'bxor' -> + hipe_icode:mk_const(Argument1 bxor Argument2); + fp_add -> + hipe_icode:mk_const(float(Argument1 + Argument2)); + fp_sub -> + hipe_icode:mk_const(float(Argument1 - Argument2)); + fp_mul -> + hipe_icode:mk_const(float(Argument1 * Argument2)); + fp_div -> + hipe_icode:mk_const(Argument1 / Argument2); + cons -> + hipe_icode:mk_const([Argument1 | Argument2]); + mktuple -> + hipe_icode:mk_const(list_to_tuple([Argument1,Argument2])); + #unsafe_update_element{index=N} -> + hipe_icode:mk_const(setelement(N, Argument1, Argument2)); + {erlang, '++', 2} -> + hipe_icode:mk_const(Argument1 ++ Argument2); + {erlang, '--', 2} -> + hipe_icode:mk_const(Argument1 -- Argument2); + {erlang, 'div', 2} -> + hipe_icode:mk_const(Argument1 div Argument2); + {erlang, 'rem', 2} -> + hipe_icode:mk_const(Argument1 rem Argument2); + {erlang, append_element, 2} -> + hipe_icode:mk_const(erlang:append_element(Argument1, Argument2)); + {erlang, element, 2} -> + hipe_icode:mk_const(element(Argument1, Argument2)); + _Other -> + %% io:format("In ~w(~w,~w)~n", [_Other,Argument1,Argument2]), + bottom + end; + +%% The rest of the calls +evaluate_call_or_enter(Arguments, Fun) -> + case Fun of + mktuple -> + hipe_icode:mk_const(list_to_tuple(Arguments)); + {erlang, setelement, 3} -> + [Argument1, Argument2, Argument3] = Arguments, + hipe_icode:mk_const(setelement(Argument1, Argument2, Argument3)); + _ -> + bottom + end. + +%%----------------------------------------------------------------------------- + +evaluate_if(Conditional, [Argument1, Argument2]) -> + case ((Argument1 =:= bottom) or (Argument2 =:= bottom)) of + true -> bottom; + false -> evaluate_if_const(Conditional, Argument1, Argument2) + end; +evaluate_if(_Conditional, _Arguments) -> + bottom. + +%%----------------------------------------------------------------------------- + +evaluate_if_const(Conditional, Argument1, Argument2) -> + case Conditional of + '=:=' -> Argument1 =:= Argument2; + '==' -> Argument1 == Argument2; + '=/=' -> Argument1 =/= Argument2; + '/=' -> Argument1 /= Argument2; + '<' -> Argument1 < Argument2; + '>=' -> Argument1 >= Argument2; + '=<' -> Argument1 =< Argument2; + '>' -> Argument1 > Argument2; + _ -> bottom + end. + +%%----------------------------------------------------------------------------- + +evaluate_type(Type, Vals) -> + case [X || X <- Vals, X =:= bottom] of + [] -> evaluate_type_const(Type, Vals); + _ -> bottom + end. + +%%----------------------------------------------------------------------------- + +evaluate_type_const(Type, [Arg|Left]) -> + Test = + case {Type, hipe_icode:const_value(Arg)} of + {nil, [] } -> true; + {nil, _ } -> false; + {cons, [_|_]} -> true; + {cons, _ } -> false; + {{tuple, N}, T} when tuple_size(T) =:= N -> true; + {atom, A} when is_atom(A) -> true; + {{atom, A}, A} when is_atom(A) -> true; + {{record, A, S}, R} when tuple_size(R) =:= S, + element(1, R) =:= A -> true; + {{record, _, _}, _} -> false; + _ -> bottom + end, + case Test of + bottom -> bottom; + false -> false; + true -> evaluate_type_const(Type, Left) + end; +evaluate_type_const(_Type, []) -> + true. + +%%----------------------------------------------------------------------------- +%% Icode-specific code below +%%----------------------------------------------------------------------------- + +update_instruction(Instruction, Environment) -> + case Instruction of + #icode_call{} -> + update_call(Instruction, Environment); + #icode_enter{} -> + update_enter(Instruction, Environment); + #icode_if{} -> + update_if(Instruction, Environment); + #icode_move{} -> + update_move(Instruction, Environment); + #icode_phi{} -> + update_phi(Instruction, Environment); + #icode_switch_val{} -> + update_switch_val(Instruction, Environment); + #icode_type{} -> + update_type(Instruction, Environment); + #icode_switch_tuple_arity{} -> + update_switch_tuple_arity(Instruction, Environment); + _ -> + %% goto, comment, label, return, begin_handler, end_try, + %% begin_try, fail + %% We could but don't handle: catch?, fail? + [Instruction] + end. + +%%----------------------------------------------------------------------------- + +update_call(Instruction, Environment) -> + DestList = hipe_icode:call_dstlist(Instruction), + case DestList of + [Destination] -> + case lookup_lattice_value(Destination, Environment) of + bottom -> + NewArguments = update_arguments( + hipe_icode:call_args(Instruction), + Environment), + [hipe_icode:call_args_update(Instruction, NewArguments)]; + X -> + NewInstructions = + case is_call_to_fp_op(Instruction) of + true -> + TmpIns = + hipe_icode:call_fun_update(Instruction, unsafe_untag_float), + [hipe_icode:call_args_update(TmpIns, [X])]; + false -> + case hipe_icode:call_continuation(Instruction) of + [] -> + [hipe_icode:mk_move(Destination, X)]; + ContinuationLabel -> + [hipe_icode:mk_move(Destination, X), + hipe_icode:mk_goto(ContinuationLabel)] + end + end, + ?CONST_PROP_MSG("call: ~w ---> ~w\n", + [Instruction, NewInstructions]), + NewInstructions + end; +%% %% [] -> %% No destination; we don't touch this +%% [] -> +%% NewArguments = update_arguments(hipe_icode:call_args(Instruction), +%% Environment), +%% [hipe_icode:call_args_update(Instruction, NewArguments)]; + %% List-> %% Means register allocation; not implemented at this point + _ -> + [Instruction] + end. + +%%----------------------------------------------------------------------------- + +is_call_to_fp_op(Instruction) -> + case hipe_icode:call_fun(Instruction) of + fp_add -> true; + fp_sub -> true; + fp_mul -> true; + fp_div -> true; + fnegate -> true; + conv_to_float -> true; + unsafe_untag_float -> true; + _ -> false + end. + +%%----------------------------------------------------------------------------- + +update_enter(Instruction, Environment) -> + Args = hipe_icode:enter_args(Instruction), + EvalArgs = [lookup_lattice_value(X, Environment) || X <- Args], + Fun = hipe_icode:enter_fun(Instruction), + case lists:any(fun(X) -> (X =:= bottom) end, EvalArgs) of + true -> + update_enter_arguments(Instruction, Environment); + false -> + ConstVals = [hipe_icode:const_value(X) || X <- EvalArgs], + try evaluate_call_or_enter(ConstVals, Fun) of + bottom -> + update_enter_arguments(Instruction, Environment); + Const -> + Dst = hipe_icode:mk_new_var(), + [hipe_icode:mk_move(Dst, Const), + hipe_icode:mk_return([Dst])] + catch + _:_ -> + update_enter_arguments(Instruction, Environment) + end + end. + +update_enter_arguments(Instruction, Env) -> + NewArguments = update_arguments(hipe_icode:enter_args(Instruction), Env), + [hipe_icode:enter_args_update(Instruction, NewArguments)]. + +%%----------------------------------------------------------------------------- + +update_if(Instruction, Environment) -> + Args = hipe_icode:if_args(Instruction), + EvaluatedArguments = [lookup_lattice_value(Argument, Environment) + || Argument <- Args], + Op = hipe_icode:if_op(Instruction), + case evaluate_if(Op, EvaluatedArguments) of + true -> + TrueLabel = hipe_icode:if_true_label(Instruction), + ?CONST_PROP_MSG("ifT: ~w ---> goto ~w\n", [Instruction, TrueLabel]), + [hipe_icode:mk_goto(TrueLabel)]; + false -> + FalseLabel = hipe_icode:if_false_label(Instruction), + ?CONST_PROP_MSG("ifF: ~w ---> goto ~w\n", [Instruction, FalseLabel]), + [hipe_icode:mk_goto(FalseLabel)]; + bottom -> + %% Convert the if-test to a type test if possible. + Op = hipe_icode:if_op(Instruction), + case Op =:= '=:=' orelse Op =:= '=/=' of + false -> [Instruction]; + true -> + [Arg1, Arg2] = Args, + case EvaluatedArguments of + [bottom, bottom] -> + [Instruction]; + [bottom, X] -> + conv_if_to_type(Instruction, hipe_icode:const_value(X), Arg1); + [X, bottom] -> + conv_if_to_type(Instruction, hipe_icode:const_value(X), Arg2) + end + end + end. + +conv_if_to_type(I, Const, Arg) when is_atom(Const); + is_integer(Const); + Const =:= [] -> + Test = + if is_atom(Const) -> {atom, Const}; + is_integer(Const) -> {integer, Const}; + true -> nil + end, + {T, F} = + case hipe_icode:if_op(I) of + '=:=' -> {hipe_icode:if_true_label(I),hipe_icode:if_false_label(I)}; + '=/=' -> {hipe_icode:if_false_label(I),hipe_icode:if_true_label(I)} + end, + NewI = hipe_icode:mk_type([Arg], Test, T, F), + ?CONST_PROP_MSG("if: ~w ---> type ~w\n", [I, NewI]), + [NewI]; +conv_if_to_type(I, _, _) -> + [I]. + +%%----------------------------------------------------------------------------- + +update_move(Instruction, Environment) -> + Destination = hipe_icode:move_dst(Instruction), + case lookup_lattice_value(Destination, Environment) of + bottom -> + [Instruction]; + X -> + case hipe_icode:move_src(Instruction) of + X -> + [Instruction]; + _ -> + ?CONST_PROP_MSG("move: ~w ---> ~w\n", [Instruction, X]), + [hipe_icode:move_src_update(Instruction, X)] + end + %% == [hipe_icode:mk_move(Destination, X)] + end. + +%%----------------------------------------------------------------------------- + +update_phi(Instruction, Environment) -> + Destination = hipe_icode:phi_dst(Instruction), + case lookup_lattice_value(Destination, Environment) of + bottom -> + [Instruction]; + X -> + ?CONST_PROP_MSG("phi: ~w ---> ~w\n", [Instruction, X]), + [hipe_icode:mk_move(Destination, X)] + end. + +%%----------------------------------------------------------------------------- + +update_type(Instruction, Environment) -> + EvaluatedArguments = [lookup_lattice_value(Argument, Environment) || + Argument <- hipe_icode:type_args(Instruction)], + case evaluate_type(hipe_icode:type_test(Instruction), EvaluatedArguments) of + true -> + TrueLabel = hipe_icode:type_true_label(Instruction), + ?CONST_PROP_MSG("typeT: ~w ---> goto ~w\n", [Instruction, TrueLabel]), + [hipe_icode:mk_goto(TrueLabel)]; + false -> + FalseLabel = hipe_icode:type_false_label(Instruction), + ?CONST_PROP_MSG("typeF: ~w ---> goto ~w\n", [Instruction, FalseLabel]), + [hipe_icode:mk_goto(FalseLabel)]; + bottom -> + [Instruction] + end. + +%%----------------------------------------------------------------------------- + +update_switch_val(Instruction, Environment) -> + Argument = hipe_icode:switch_val_term(Instruction), + Value = lookup_lattice_value(Argument, Environment), + case Value of + bottom -> + [Instruction]; + _ -> + Cases = hipe_icode:switch_val_cases(Instruction), + FailLabel = hipe_icode:switch_val_fail_label(Instruction), + Target = get_switch_target(Cases, Value, FailLabel), + ?CONST_PROP_MSG("sv: ~w ---> goto ~w\n", [Instruction, Target]), + [hipe_icode:mk_goto(Target)] + end. + +%%----------------------------------------------------------------------------- + +update_switch_tuple_arity(Instruction, Environment) -> + Argument = hipe_icode:switch_tuple_arity_term(Instruction), + Value = lookup_lattice_value(Argument, Environment), + case Value of + bottom -> + [Instruction]; + Constant -> + UnTagged = hipe_icode:const_value(Constant), + case is_tuple(UnTagged) of + true -> + Cases = hipe_icode:switch_tuple_arity_cases(Instruction), + FailLabel = hipe_icode:switch_tuple_arity_fail_label(Instruction), + Target = get_switch_target(Cases, tuple_size(UnTagged), FailLabel), + ?CONST_PROP_MSG("sta: ~w ---> goto ~w\n", [Instruction, Target]), + [hipe_icode:mk_goto(Target)]; + false -> + [Instruction] + %% TODO: Can the above be replaced with below??? Perhaps + %% together with some sort of "generate failure". + %% [hipe_icode:mk_goto(FailLabel)] + end + end. + +%%----------------------------------------------------------------------------- + +lookup_lattice_value(X, Environment) -> + LatticeValues = env__lattice_values(Environment), + case hipe_icode:is_const(X) of + true -> + X; + false -> + case gb_trees:lookup(X, LatticeValues) of + none -> + ?WARNING_MSG("Earlier compiler steps generated erroneous " + "code for X = ~w. We are ignoring this.\n",[X]), + bottom; + {value, top} -> + ?EXIT({"lookup_lattice_value, top", X}); + {value, Y} -> + Y + end + end. + +%%----------------------------------------------------------------------------- + +update_arguments(ArgumentList, Environment) -> + [case lookup_lattice_value(X, Environment) of + bottom -> + X; + Constant -> + Constant + end || X <- ArgumentList]. + +%%----------------------------- End of file ----------------------------------- diff --git a/lib/hipe/icode/hipe_icode_ssa_copy_prop.erl b/lib/hipe/icode/hipe_icode_ssa_copy_prop.erl new file mode 100644 index 0000000000..1899c09715 --- /dev/null +++ b/lib/hipe/icode/hipe_icode_ssa_copy_prop.erl @@ -0,0 +1,41 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2003-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%%------------------------------------------------------------------- +%% File : hipe_icode_ssa_copy_prop.erl +%% Author : Tobias Lindahl +%% Description : Performs copy propagation on SSA form. +%% +%% Created : 4 Apr 2003 by Tobias Lindahl +%%------------------------------------------------------------------- + +-module(hipe_icode_ssa_copy_prop). + +%% +%% modules given as parameters +%% +-define(code, hipe_icode). +-define(cfg, hipe_icode_cfg). + +%% +%% appropriate include files +%% +-include("hipe_icode.hrl"). +-include("../flow/cfg.hrl"). +-include("../ssa/hipe_ssa_copy_prop.inc"). diff --git a/lib/hipe/icode/hipe_icode_ssa_struct_reuse.erl b/lib/hipe/icode/hipe_icode_ssa_struct_reuse.erl new file mode 100644 index 0000000000..675c8c1ad8 --- /dev/null +++ b/lib/hipe/icode/hipe_icode_ssa_struct_reuse.erl @@ -0,0 +1,1444 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2007-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%%%======================================================================= +%% File : hipe_icode_ssa_struct_reuse.erl +%% Author : Ragnar Osterlund +%% student at the compiler techniques 2 course at UU 2007 +%% Description : HiPE module that removes redundant or partially redundant +%% structure creations from Icode. +%% It does so by inserting redundant expressions as late +%% as possible in the CFG, with the exception of loops where +%% expressions are moved to just before the loop head. +%% Current Icode instructions that can be moved are mktuple() +%% and cons() primop calls. It also handles cases like +%% f({Z}) -> {Z}. It does so by looking at the structure of +%% the match, and recognizes tuples and conses. +%%======================================================================= + +-module(hipe_icode_ssa_struct_reuse). + +-export([struct_reuse/1]). + +-include("../main/hipe.hrl"). +-include("hipe_icode.hrl"). +-include("hipe_icode_primops.hrl"). +-include("../flow/cfg.hrl"). + +-define(SET, ordset). +-define(SETS, ordsets). +%%-define(DEBUG, true). + +-define(MKTUPLE, mktuple). +-define(CONS, cons). +-define(SR_INSTR_TYPE, sr_instr_type). +-define(SR_STRUCT_INSTR_TYPE, sr_struct_instr_type). + +-type struct_type() :: {?CONS | ?MKTUPLE, icode_term_arg(), any()}. +-type struct_elems() :: {icode_var(), non_neg_integer(), icode_term_arg()}. + +%% DATATYPE AREA + +%%----------------------------------------------------------------------------- +%% maps +%% The maps are used to identify variables and expressions. +%% The maps are: +%% +%% expr - a map that contains value numbered structure expressions, ie +%% mktuple and cons expression. The key is the value number and the value +%% is an expr record. +%% +%% instr - maps the semantic instruction to an expression value number, +%% that is, a key in the expr map above. +%% +%% var - maps variables to expression value numbers. These variables are +%% defined or used by the structure expressions. + +-record(maps, {var = gb_trees:empty() :: gb_tree(), + instr = gb_trees:empty() :: gb_tree(), + expr = gb_trees:empty() :: gb_tree()}). + +maps_var(#maps{var = Out}) -> Out. +maps_instr(#maps{instr = Out}) -> Out. +maps_expr(#maps{expr = Out}) -> Out. + +maps_expr_keys(Maps) -> gb_trees:keys(maps_expr(Maps)). +maps_expr_values(Maps) -> gb_trees:values(maps_expr(Maps)). + +maps_instr_lookup(Instr, Maps) -> gb_trees:lookup(Instr, maps_instr(Maps)). +maps_instr_enter(Instr, ExprId, Maps) -> + NewInstr = gb_trees:enter(Instr, ExprId, maps_instr(Maps)), + Maps#maps{instr = NewInstr}. + +maps_expr_get(Id, Maps) -> gb_trees:get(Id, maps_expr(Maps)). +maps_expr_enter(Expr, Maps) -> + NewExprMap = gb_trees:enter(expr_id(Expr), Expr, maps_expr(Maps)), + Maps#maps{expr = NewExprMap}. + +maps_var_get(Var, Maps) -> gb_trees:get(Var, maps_var(Maps)). +maps_var_lookup(Var, #maps{var = VarMap}) -> gb_trees:lookup(Var, VarMap). +maps_var_enter(Var, Info, Maps = #maps{var = VarMap}) -> + NewMap = gb_trees:enter(Var, Info, VarMap), + Maps#maps{var = NewMap}. +maps_var_insert(Var, Info, Maps = #maps{var = VarMap}) -> + NewMap = gb_trees:insert(Var, Info, VarMap), + Maps#maps{var = NewMap}. + +maps_balance(Maps) -> + Maps#maps{instr = gb_trees:balance(maps_instr(Maps)), + expr = gb_trees:balance(maps_expr(Maps)), + var = gb_trees:balance(maps_var(Maps))}. + +maps_expr_key_enter(Expr, Maps) -> + NewMaps = maps_instr_enter(expr_key(Expr), expr_id(Expr), Maps), + maps_expr_enter(Expr, NewMaps). + +%%----------------------------------------------------------------------------- +%% expr +%% An expression record. Contains information about a structure expression. +%% The fields are: +%% +%% id - the value number of the expression +%% key - the semantic instruction, as defined in icode, with destination +%% removed and arguments rewritten. +%% defs - destination variable to hold the value of the expression. +%% direct_replace - indicates whether the expression shall be replaced wherever +%% it occurs, although it might not have been inserted. This is used for +%% the expressions that are detected by the icode type constructs. +%% inserts - a list of node labels that will insert this expression +%% use - a list of expression value numbers that use the value of this +%% expression + +-record(expr, {id = none :: 'none' | non_neg_integer(), + key = none :: 'none' | tuple(), % illegal_icode_instr() + defs = none :: 'none' | [icode_var()], + direct_replace = false :: boolean(), + inserts = ?SETS:new() :: ?SET(_), + use = ?SETS:new() :: ?SET(_)}). + +expr_id(#expr{id = Out}) -> Out. +expr_defs(#expr{defs = Out}) -> Out. +expr_key(#expr{key = Out}) -> Out. +expr_inserts(#expr{inserts = Out}) -> Out. +expr_use(#expr{use = Out}) -> Out. +expr_direct_replace(#expr{direct_replace = Out}) -> Out. + +expr_use_add(Expr = #expr{use = UseSet}, Use) -> + Expr#expr{use = ?SETS:add_element(Use, UseSet)}. + +%% expr_key_set(Expr, In) -> Expr#expr{key = In}. +expr_direct_replace_set(Expr, In) -> Expr#expr{direct_replace = In}. +expr_inserts_set(Expr, In) -> Expr#expr{inserts = In}. + +expr_create(Key, Defs) -> + NewExprId = new_expr_id(), + #expr{id = NewExprId, key = Key, defs = Defs}. + +%%----------------------------------------------------------------------------- +%% varinfo +%% A variable mapping info. Contains info about variable references. +%% The fields are: +%% +%% use - a set of expression value numbers that use this variable +%% ref - the variable which value this variable will be assigned +%% when expression is replaced. This is encoded as {N, M} where +%% N is the expression value number and M is the nth destination +%% variable defined by the expression N. +%% elem - indicates that this variable has been detected to be a part of +%% a tuple. The field contains a {V, N} tuple where V is the variable +%% that refers to the structure that this variable is an element in +%% and N is the position that the element occurs on in the tuple. Eg. +%% {{var, 3}, 2} means that the variable {var, 3} refers to a tuple +%% in which this variable is on second place. +%% exprid - a expression value number which is the expression that +%% the variable is defined by. + +-record(varinfo, {use = ?SETS:new() :: ?SET(_), + ref = none :: 'none' | {non_neg_integer(), non_neg_integer()}, + elem = none :: 'none' | {icode_var(), non_neg_integer()}, + exprid = none :: 'none' | non_neg_integer()}). + +varinfo_exprid(#varinfo{exprid = Out}) -> Out. + +varinfo_use_add(#varinfo{use = UseSet} = I, Use) -> + I#varinfo{use = ?SETS:add_element(Use, UseSet)}. + +%%----------------------------------------------------------------------------- +%% node - a node in the temp CFG. +%% +%% label - the label of the node in the original CFG +%% pred - a list of predecessors to this node +%% succ - a list of successors to this node +%% code - code from CFG filtered to only contain structure instructions +%% non_struct_defs - a list of variable definitions that are not defined +%% by structures +%% up_expr - upwards exposed expression value numbers +%% killed_expr - killed expressions value numbers +%% sub_inserts - a set of labels of nodes that defines one or more +%% expressions and that are in a subtree of this node +%% inserts - a set of expression value numbers to be inserted into the node +%% antic_in - a set of expression value numbers that are anticipated into +%% the node +%% antic_out - a set of expression value numbers that are anticipated out of +%% the node +%% phi - a tree of node labels which is defined in phi functions in the node +%% varmap - a list of variable tuples {V1, V2} that maps a variable that are +%% the output of phi functions in sub blocks, V1, into a variable +%% flowing from the block of this node, V2. +%% struct_type - a list of {V, N} tuples that indicates that V is a tuple +%% with N elements. These are added from the icode primop type(). +%% struct_elems - a list of {VD, N, VS} tuples where VD is a variable in the N'th position +%% in VS. These are added from the icode primop unsafe_element() + +-record(node, { + label = none :: 'none' | icode_lbl(), + pred = none :: 'none' | [icode_lbl()], + succ = none :: 'none' | [icode_lbl()], + code = [] :: [tuple()], % [illegal_icode_instr()] + phi = gb_trees:empty() :: gb_tree(), + varmap = [] :: [{icode_var(), icode_var()}], + pre_loop = false :: boolean(), + non_struct_defs = gb_sets:new() :: gb_set(), + up_expr = none :: 'none' | ?SET(_), + killed_expr = none :: 'none' | ?SET(_), + sub_inserts = ?SETS:new() :: ?SET(_), + inserts = ?SETS:new() :: ?SET(_), + antic_in = none :: 'none' | ?SET(_), + antic_out = none :: 'none' | ?SET(_), + struct_type = [] :: [struct_type()], + struct_elems = [] :: [struct_elems()]}). + +node_sub_inserts(#node{sub_inserts = Out}) -> Out. +node_inserts(#node{inserts = Out}) -> Out. +node_antic_out(#node{antic_out = Out}) -> Out. +node_antic_in(#node{antic_in = Out}) -> Out. +node_killed_expr(#node{killed_expr = Out}) -> Out. +node_pred(#node{pred = Out}) -> Out. +node_succ(#node{succ = Out}) -> Out. +node_label(#node{label = Out}) -> Out. +node_code(#node{code = Out}) -> Out. +node_non_struct_defs(#node{non_struct_defs = Out}) -> Out. +node_up_expr(#node{up_expr = Out}) -> Out. +node_pre_loop(#node{pre_loop = Out}) -> Out. +node_struct_type(#node{struct_type = Out}) -> Out. +%% node_atom_type(#node{atom_type = Out}) -> Out. +node_struct_elems(#node{struct_elems = Out}) -> Out. + +node_pre_loop_set(Node) -> Node#node{pre_loop = true}. + +node_phi_add(Node = #node{phi = Phi}, Pred, Value) -> + NewList = + case gb_trees:lookup(Pred, Phi) of + {value, List} -> [Value | List]; + none -> [Value] + end, + Node#node{phi = gb_trees:enter(Pred, NewList, Phi)}. + +node_phi_get(#node{phi = Phi}, Pred) -> + case gb_trees:lookup(Pred, Phi) of + {value, List} -> List; + none -> [] + end. + +node_code_add(Node = #node{code = Code}, Instr) -> + Node#node{code = [Instr | Code]}. + +node_code_rev(Node = #node{code = Code}) -> + Node#node{code = lists:reverse(Code)}. + +node_struct_type_add(Node = #node{struct_type = T}, Value) -> + Node#node{struct_type = [Value | T]}. + +%% node_atom_type_add(Node = #node{atom_type = T}, Value) -> +%% Node#node{atom_type = [Value | T]}. + +node_struct_elems_add(Node = #node{struct_elems = T}, Value) -> + Node#node{struct_elems = [Value | T]}. + +node_non_struct_defs_list(Node) -> + gb_sets:to_list(node_non_struct_defs(Node)). + +node_non_struct_instr_add(Node, Instr) -> + DefList = hipe_icode:defines(Instr), + Tmp = gb_sets:union(node_non_struct_defs(Node), gb_sets:from_list(DefList)), + Node#node{non_struct_defs = Tmp}. + +node_set_sub_inserts(Node, In) -> Node#node{sub_inserts = In}. + +node_add_insert(Node, In) -> + NewIns = ?SETS:add_element(In, node_inserts(Node)), + Node#node{inserts = NewIns}. + +node_union_sub_inserts(Node, SubIns) -> + NewSubIns = ?SETS:union(SubIns, node_sub_inserts(Node)), + node_set_sub_inserts(Node, NewSubIns). + +node_varmap_set(Node, Vars) -> + Node#node{varmap = Vars}. + +node_varmap_lookup(#node{varmap = Varmap}, Var) -> + case lists:keyfind(Var, 1, Varmap) of + {_, NewVar} -> NewVar; + false -> Var + end. + +node_create(Label, Pred, Succ) -> + #node{label = Label, pred = Pred, succ = Succ}. + +%%----------------------------------------------------------------------------- +%% nodes - describes the new temporary CFG +%% +%% domtree - the dominator tree of the original CFG +%% labels - the labels of the original CFG, filtered to only contain non fail trace paths +%% postorder - the postorder walk of labels of the original CFG, filtered to only contain non fail trace paths +%% rev_postorder - reverse of postorder. +%% start_label - the start basic block label. +%% all_expr - all expression value numbers that the CFG defines +%% tree - the tree of nodes, with labels as keys and node records as values + +-record(nodes, { + domtree :: hipe_dominators:domTree(), + labels = none :: 'none' | [icode_lbl()], + postorder = none :: 'none' | [icode_lbl()], + start_label = none :: 'none' | icode_lbl(), + rev_postorder = none :: 'none' | [icode_lbl()], + all_expr = none :: 'none' | [non_neg_integer()], + tree = gb_trees:empty() :: gb_tree()}). + +nodes_postorder(#nodes{postorder = Out}) -> Out. +nodes_rev_postorder(#nodes{rev_postorder = Out}) -> Out. +nodes_tree(#nodes{tree = Out}) -> Out. +nodes_domtree(#nodes{domtree = Out}) -> Out. +nodes_start_label(#nodes{start_label = Out}) -> Out. + +nodes_tree_is_empty(#nodes{tree = Tree}) -> + gb_trees:is_empty(Tree). + +nodes_tree_set(Tree, Nodes) -> Nodes#nodes{tree = Tree}. +nodes_all_expr_set(AllExpr, Nodes) -> Nodes#nodes{all_expr = AllExpr}. + +nodes_tree_values(Nodes) -> + gb_trees:values(nodes_tree(Nodes)). + +get_node(Label, Nodes) -> + gb_trees:get(Label, nodes_tree(Nodes)). + +enter_node(Node, Nodes) -> + nodes_tree_set(gb_trees:enter(node_label(Node), Node, nodes_tree(Nodes)), Nodes). + +remove_node(Node, Nodes) -> + nodes_tree_set(gb_trees:delete(node_label(Node), nodes_tree(Nodes)), Nodes). + +nodes_create() -> #nodes{}. + +%%----------------------------------------------------------------------------- +%% update +%% record used when updating the CFG, keeping track of which expressions +%% have been inserted and their mappings to variable names. +%% +%% inserted - maps an expression to a list of variables +%% del_red_test - flag that is set to true when the reduction test +%% has been inserted is used to move the reduction test. + +-record(update, {inserted = gb_trees:empty() :: gb_tree(), + del_red_test = false :: boolean()}). + +update_inserted_lookup(#update{inserted = Inserted}, ExprId) -> + gb_trees:lookup(ExprId, Inserted). + +update_inserted_add_new(Update = #update{inserted = Inserted}, ExprId, Defs) -> + VarList = [case hipe_icode:is_var(Def) of + true -> hipe_icode:mk_new_var(); + false -> + case hipe_icode:is_reg(Def) of + true -> hipe_icode:mk_new_reg(); + false -> + true = hipe_icode:is_fvar(Def), + hipe_icode:mk_new_fvar() + end + end || Def <- Defs], + NewInserted = gb_trees:enter(ExprId, VarList, Inserted), + {Update#update{inserted = NewInserted}, VarList}. + +update_inserted_add(Update = #update{inserted = Inserted}, ExprId, Defs) -> + Update#update{inserted = gb_trees:enter(ExprId, Defs, Inserted)}. + +update_del_red_test(#update{del_red_test = DelRed}) -> DelRed. +update_del_red_test_set(Update) -> + Update#update{del_red_test = true}. + +%%----------------------------------------------------------------------------- +%% CODE AREA + +%%----------------------------------------------------------------------------- +%% Main function called from the hipe_main module + +-spec struct_reuse(#cfg{}) -> #cfg{}. + +struct_reuse(CFG) -> + %% debug_init_case_count(?SR_INSTR_TYPE), + %% debug_init_case_count(?SR_STRUCT_INSTR_TYPE), + + %% debug_function({wings_ask,ask_unzip,3}, CFG), + %% debug_function(nil, CFG), + %% set_debug_flag(true), + %% debug_struct("CFG In: ", CFG), + %% debug_cfg_pp(CFG), + + init_expr_id(), + + Nodes = construct_nodes(CFG), + + case nodes_tree_is_empty(Nodes) of + false -> + Maps = create_maps(Nodes), + + Nodes3 = init_nodes(Nodes, Maps), + Nodes4 = calc_anticipated(Nodes3), + + {Nodes5, Maps3} = calc_inserts(Nodes4, Maps), + + Nodes6 = update_nodes_inserts(Nodes5, Maps3), + + %% debug_list("ExprMap: ", gb_trees:to_list(Maps3#maps.expr)), + %% debug_list("VarMap: ", gb_trees:to_list(maps_var(Maps3))), + %% debug_nodes(Nodes6), + + %% update the cfg + CFG1 = rewrite_cfg(CFG, Nodes6, Maps3), + CFG2 = hipe_icode_ssa:remove_dead_code(CFG1), + CFGOut = hipe_icode_ssa_copy_prop:cfg(CFG2), + %% CFGOut = CFG1, + + %% print_struct("CFG: ", CFG), + %% debug_cfg_pp(CFG), + %% debug_cfg_pp(CFGOut), + + %% debug_print_case_count(?SR_STRUCT_INSTR_TYPE), + %% debug_print_case_count(?SR_INSTR_TYPE), + %% debug("Done~n"), + %% debug_struct("CFG Out: ", CFGOut), + CFGOut; + true -> + CFG + end. + +%%----------------------------------------------------------------------------- +%% Calculate simplified CFG with all fail paths removed + +construct_nodes(CFG) -> + %% all important dominator tree + DomTree = hipe_dominators:domTree_create(CFG), + + %% construct initial nodes + {Nodes, NonFailSet} = nodes_from_cfg(CFG, DomTree), + + %% remove nodes on fail paths + NewNodes = prune_nodes(Nodes, NonFailSet), + + %% fill in misc node tree info + Postorder = [Label || Label <- hipe_icode_cfg:postorder(CFG), + gb_sets:is_member(Label, NonFailSet)], + + %% check postorder is valid + PostOrderTmp = hipe_icode_cfg:postorder(CFG), + LabelsTmp = hipe_icode_cfg:labels(CFG), + case length(PostOrderTmp) =/= length(LabelsTmp) of + true -> + print("Warning, Postorder and Labels differ!~n"), + print_struct("Postorder: ", PostOrderTmp), + print_struct("Labels: ", LabelsTmp); + false -> + done + end, + + RevPostorder = lists:reverse(Postorder), + + StartLabel = hipe_icode_cfg:start_label(CFG), + NewTree = gb_trees:balance(nodes_tree(NewNodes)), + + NewNodes#nodes{postorder = Postorder, + rev_postorder = RevPostorder, + start_label = StartLabel, + tree = NewTree, + domtree = DomTree}. + +%%----------------------------------------------------------------------------- +%% Constructs a tree of nodes, one node for each basic block in CFG + +nodes_from_cfg(CFG, DomTree) -> + lists:foldl(fun(Label, {NodesAcc, NonFailAcc}) -> + Code = hipe_bb:code(hipe_icode_cfg:bb(CFG, Label)), + Pred = hipe_icode_cfg:pred(CFG, Label), + Succ = hipe_icode_cfg:succ(CFG, Label), + %% debug_struct("Label: ", Label), + %% debug_struct("Code: ", Code), + + %% Find all structures and phi functions. + %% Find all defines in this bb that are not from structures + %% and add them to NonStructDefs, later to be used for calculating upwards + %% exposed expressions, and killed expressions. + %% Also find all non fail blocks, ie backtrace from return blocks, + %% and add them to NewNonFailAcc + + Node = node_create(Label, Pred, Succ), + + {NewNode, NewNonFailAcc, PreLoopPreds} = + lists:foldl(fun(Instr, {NodeAcc, NFAcc, PLPAcc}) -> + case instr_type(Instr) of + struct -> + {node_code_add(NodeAcc, Instr), NFAcc, PLPAcc}; + return -> + {NodeAcc, get_back_trace_rec(CFG, Label, NFAcc), PLPAcc}; + {struct_elems, NumElem, DstVar, SrcVar} -> + NewNodeAcc = node_struct_elems_add(NodeAcc, {DstVar, NumElem, SrcVar}), + {node_non_struct_instr_add(NewNodeAcc, Instr), NFAcc, PLPAcc}; + {struct_type, NumElems, Var, Type} -> + {node_struct_type_add(NodeAcc, {Type, Var, NumElems}), NFAcc, PLPAcc}; + {tuple_arity, Var, Cases} -> + NewNodeAcc = + lists:foldl(fun(Case, NAcc) -> + case Case of + {{const, {flat, Arity}}, _} -> + Tuple = {?MKTUPLE, Var, Arity}, + node_struct_type_add(NAcc, Tuple); + _ -> NAcc + end + end, NodeAcc, Cases), + {NewNodeAcc, NFAcc, PLPAcc}; + %% {atom_type, Atom, Var} -> + %% {node_atom_type_add(NodeAcc, {Var, Atom}), NFAcc, PLPAcc}; + phi -> + Def = hipe_icode:phi_dst(Instr), + Part = lists:foldl(fun(P = {Pr, PredVar}, {IsDef, NotDom}) -> + case hipe_dominators:domTree_dominates(Label, Pr, DomTree) of + false -> + {IsDef, [P | NotDom]}; + true -> + {IsDef andalso PredVar =:= Def, NotDom} + end + end, {true, []}, hipe_icode:phi_arglist(Instr)), + + case Part of + {true, [{P, V}]} -> + %% This is the only case recognized so far. All phi + %% sub block references a static variable that is + %% assigned the same value again in the phi function. + {node_phi_add(NodeAcc, P, {Def, V}), + NFAcc, ?SETS:add_element(P, PLPAcc)}; + + {false, [{P, _}]} -> + {node_non_struct_instr_add(NodeAcc, Instr), + NFAcc, ?SETS:add_element(P, PLPAcc)}; + + _ -> + {node_non_struct_instr_add(NodeAcc, Instr), NFAcc, PLPAcc} + end; + _ -> + {node_non_struct_instr_add(NodeAcc, Instr), NFAcc, PLPAcc} + end + end, {Node, NonFailAcc, ?SETS:new()}, Code), + + %% insert the new node + NewNodesAcc = enter_node(node_code_rev(NewNode), NodesAcc), + + %% Set the pre loop flag of all nodes that are predecessor to this node + %% and that are the first nodes prior to a loop. + NewNodesAcc2 = + lists:foldl(fun(Lbl, NsAcc) -> + PredNode = get_node(Lbl, NsAcc), + NewPredNode = node_pre_loop_set(PredNode), + NewPredNode2 = node_varmap_set(NewPredNode, node_phi_get(NewNode, Lbl)), + + enter_node(NewPredNode2, NsAcc) + end, NewNodesAcc, PreLoopPreds), + + {NewNodesAcc2, NewNonFailAcc} + end, {nodes_create(), gb_sets:new()}, hipe_icode_cfg:reverse_postorder(CFG)). + +%%----------------------------------------------------------------------------- +%% Get all labels from Label to root of CFG, ie backtraces from Label. + +get_back_trace_rec(CFG, Label, LabelSet) -> + %% debug_struct("Label :", Label), + %% debug_struct("Set :", gb_sets:to_list(LabelSet)), + case gb_sets:is_member(Label, LabelSet) of + false -> + Preds = hipe_icode_cfg:pred(CFG, Label), + lists:foldl(fun(Lbl, SetAcc) -> + get_back_trace_rec(CFG, Lbl, SetAcc) + end, gb_sets:add(Label, LabelSet), Preds); + true -> LabelSet + end. + +%%----------------------------------------------------------------------------- +%% Remove all fail block paths and successors and predecessors +%% That are on fail paths + +prune_nodes(Nodes, NonFailSet) -> + lists:foldl(fun(Node, NodesAcc) -> + case gb_sets:is_member(node_label(Node), NonFailSet) of + true -> + NewSucc = [L || L <- node_succ(Node), gb_sets:is_member(L, NonFailSet)], + NewPred = [L || L <- node_pred(Node), gb_sets:is_member(L, NonFailSet)], + enter_node(Node#node{succ = NewSucc, pred = NewPred}, NodesAcc); + false -> + remove_node(Node, NodesAcc) + end + end, Nodes, nodes_tree_values(Nodes)). + +%%----------------------------------------------------------------------------- +%% Map calculations. + +%%----------------------------------------------------------------------------- +%% Create a maps structure from the Nodes record + +create_maps(Nodes) -> + Maps = lists:foldl(fun(Label, MapsAcc) -> + Node = get_node(Label, Nodes), + NewMapsAcc = maps_from_node_struct_type(MapsAcc, Node), + NewMapsAcc2 = maps_from_node_struct_elems(NewMapsAcc, Node), + %% NewMapsAcc3 = maps_from_node_atom_type(NewMapsAcc2, Node), + maps_from_node_code(NewMapsAcc2, Node) + end, #maps{}, nodes_rev_postorder(Nodes)), + maps_balance(Maps). + +%%----------------------------------------------------------------------------- +%% Add all elements in the struct_type list of Node to Maps as expressions + +maps_from_node_struct_type(Maps, Node) -> + %% debug_struct("Node Label: ", node_label(Node)), + %% debug_struct("Node Tuple Type: ", node_struct_type(Node)), + lists:foldl(fun({Type, Var, Size}, MapsAcc) -> + Key = create_elem_expr_key(Size, Var, []), + InstrKey = hipe_icode:mk_primop([], Type, Key), + NewExpr2 = expr_create(InstrKey, [Var]), + NewExpr3 = expr_direct_replace_set(NewExpr2, true), + maps_expr_key_enter(NewExpr3, MapsAcc) + end, Maps, node_struct_type(Node)). + +create_elem_expr_key(0, _, Key) -> Key; +create_elem_expr_key(N, Var, Key) -> + create_elem_expr_key(N - 1, Var, [{Var, N} | Key]). + +%%----------------------------------------------------------------------------- +%%maps_from_node_atom_type(Maps, Node) -> +%% lists:foldl(fun({Var, Atom}, MapsAcc) -> +%% case maps_var_lookup(Var, MapsAcc) of +%% none -> +%% MapsAcc; +%% {value, #varinfo{elem = none}} -> +%% MapsAcc; +%% {value, #varinfo{elem = {Src, Num, ExprId}}} -> +%% Expr = maps_expr_get(ExprId, MapsAcc), +%% Key = expr_key(Expr), +%% +%% Filter = fun(Arg) -> +%% case Arg of +%% {Src, Num, ExprId} -> +%% hipe_icode:mk_const(Atom); +%% _ -> +%% Arg +%% end end, +%% +%% NewKey = replace_call_variables(Filter, Key), +%% NewExpr = expr_create(NewKey, expr_defs(Expr)), +%% maps_expr_key_enter(NewExpr, MapsAcc) +%% end +%% end, Maps, node_atom_type(Node)). + +%%----------------------------------------------------------------------------- +%% Add all struct_elemns in Node to Maps as variables + +maps_from_node_struct_elems(Maps, Node) -> + lists:foldl(fun({Dst, Num, Src}, MapsAcc) -> + VarInfo = #varinfo{elem = {Src, Num}}, + maps_var_insert(Dst, VarInfo, MapsAcc) + end, Maps, node_struct_elems(Node)). + +%%----------------------------------------------------------------------------- +%% Get all expressions defined by the Node and insert them into Maps. +%% Also insert information about all affected variables into Maps. + +maps_from_node_code(Maps, Node) -> + %% debug_struct("Node Label: ", Label), + %% debug_struct("Node Code: ", Code), + %% Label = node_label(Node), + lists:foldl(fun(Instr, MapsAcc) -> + %% create two keys that are used to reference this structure creation + %% instruction, so that we can lookup its expression value number + %% later. + InstrKey = hipe_icode:call_dstlist_update(Instr, []), + + %% Fetch the two keys from the instruction + {HasElems, RefKey, ElemKey} = + replace_call_vars_elems(MapsAcc, InstrKey), + + %% create a new expr record or lookup an existing one. + case HasElems of + true -> + %% The instruction contains uses of variables that are + %% part of another structure. + case maps_instr_lookup(ElemKey, MapsAcc) of + {value, ExprId} -> + %% The instruction is equal to a structure that has + %% already been created. This is the f({Z}) -> {Z} + %% optimization. I.e. there is no need to create {Z} again. + %% Also lookup if ExprId is defining a variable that is + %% already an element in another structure. If so, + %% use that element. This takes care of nested structures + %% such as f({X, {Y, Z}}) -> {X, {Y, Z}}. + + #expr{defs = [Var]} = maps_expr_get(ExprId, MapsAcc), + StructElem = + case maps_var_lookup(Var, MapsAcc) of + {value, #varinfo{elem = Elem, exprid = none}} when Elem =/= none -> + Elem; + _ -> none + end, + Defines = hipe_icode:defines(Instr), + maps_varinfos_create(Defines, ExprId, StructElem, MapsAcc); + none -> + %% create a new expression + maps_expr_varinfos_create(Instr, RefKey, MapsAcc) + end; + false -> + %% create a new expression + maps_expr_varinfos_create(Instr, RefKey, MapsAcc) + end + end, Maps, node_code(Node)). + +%%----------------------------------------------------------------------------- +%% Creates varinfo structures with exprid set to ExprId for all +%% variables contained in Defines. These are put into MapsIn. + +maps_varinfos_create(Defines, ExprId, Elem, MapsIn) -> + VarInfo = #varinfo{exprid = ExprId, elem = Elem}, + {MapsOut, _} = + lists:foldl(fun (Def, {Maps, NumAcc}) -> + NewVarInfo = VarInfo#varinfo{ref = {ExprId, NumAcc}}, + {maps_var_insert(Def, NewVarInfo, Maps), NumAcc + 1} + end, {MapsIn, 1}, Defines), + MapsOut. + +%%----------------------------------------------------------------------------- +%% Creates a new expression from RefKey if RefKey is not already reffering +%% to an expression. Also creates varinfo structures for all variables defined +%% and used by Instr. Result is put in Maps. + +maps_expr_varinfos_create(Instr, RefKey, Maps) -> + Defines = hipe_icode:defines(Instr), + {ExprId, Maps2} = + case maps_instr_lookup(RefKey, Maps) of + {value, EId} -> + {EId, Maps}; + none -> + NewExpr = expr_create(RefKey, Defines), + {expr_id(NewExpr), maps_expr_key_enter(NewExpr, Maps)} + end, + Maps3 = maps_varinfos_create(Defines, ExprId, none, Maps2), + update_maps_var_use(Instr, ExprId, Maps3). + +%%----------------------------------------------------------------------------- +%% A variable replacement function that returns a tuple of three elements +%% {T, K1, K2}, where T indicates if Instr contained variables that where +%% elements of other structures, K1 is the Instr with all variables that +%% references another structure replaced, and K2 is K1 but also with all +%% variables that are elements of other structures replaced. + +replace_call_vars_elems(Maps, Instr) -> + VarMap = maps_var(Maps), + {HasElems, Vars, Elems} = + lists:foldr(fun(Arg, {HasElems, Vars, Elems}) -> + case hipe_icode:is_const(Arg) of + false -> + case gb_trees:lookup(Arg, VarMap) of + none -> + {HasElems, [Arg | Vars], [Arg | Elems]}; + {value, #varinfo{ref = none, elem = none}} -> + {HasElems, [Arg | Vars], [Arg | Elems]}; + {value, #varinfo{ref = Ref, elem = none}} -> + {HasElems, [Ref | Vars], [Ref | Elems]}; + {value, #varinfo{ref = none, elem = Elem}} -> + {true, [Arg | Vars], [Elem | Elems]}; + {value, #varinfo{ref = Ref, elem = Elem}} -> + {true, [Ref | Vars], [Elem | Elems]} + end; + true -> + {HasElems, [Arg | Vars], [Arg | Elems]} + end end, {false, [], []}, hipe_icode:args(Instr)), + {HasElems, hipe_icode:call_args_update(Instr, Vars), + hipe_icode:call_args_update(Instr, Elems)}. + +%%----------------------------------------------------------------------------- +%% Updates the usage information of all variables used by Instr to also +%% contain Id and updates Maps to contain the new variable information. +%% Also updates the expressions where the updated variables are used to +%% contain the use information. + +update_maps_var_use(Instr, Id, Maps) -> + lists:foldl(fun(Use, MapsAcc) -> + VarInfo = get_varinfo(Use, MapsAcc), + NewVarInfo = varinfo_use_add(VarInfo, Id), + MapsAcc2 = maps_var_enter(Use, NewVarInfo, MapsAcc), + case varinfo_exprid(VarInfo) of + none -> + MapsAcc2; + VarExprId -> + Expr = maps_expr_get(VarExprId, MapsAcc2), + NewExpr = expr_use_add(Expr, Id), + maps_expr_enter(NewExpr, MapsAcc2) + end + end, Maps, hipe_icode:uses(Instr)). + +%%----------------------------------------------------------------------------- +%% Looks up an old variable info or creates a new one if none is found. + +get_varinfo(Var, Maps) -> + case maps_var_lookup(Var, Maps) of + {value, Info} -> + Info; + none -> + #varinfo{} + end. + +%%----------------------------------------------------------------------------- +%% filters all arguments to a function call Instr that are not constants +%% through the Filter function, and replaces the arguments in Instr with +%% the result. + +replace_call_variables(Filter, Instr) -> + NewArgs = [case hipe_icode:is_const(Arg) of + false -> Filter(Arg); + true -> Arg + end || Arg <- hipe_icode:args(Instr)], + hipe_icode:call_args_update(Instr, NewArgs). + +%%----------------------------------------------------------------------------- +%% Init nodes from node local expression information + +init_nodes(Nodes, Maps) -> + AllExpr = maps_expr_keys(Maps), + lists:foldl(fun(Node, NodesAcc) -> + UEExpr = calc_up_exposed_expr(maps_var(Maps), Node), + %% print_list("Up ExprSet: ", ?SETS:to_list(UEExpr)), + + KilledExpr = calc_killed_expr(Node, Maps), + %% print_list("Killed: ", ?SETS:to_list(KilledExpr)), + + %% End nodes have no anticipated out + AnticOut = + case node_succ(Node) of + [] -> + ?SETS:new(); + _ -> + AllExpr + end, + enter_node(Node#node{up_expr = UEExpr, + killed_expr = KilledExpr, + antic_out = AnticOut}, NodesAcc) + end, nodes_all_expr_set(AllExpr, Nodes), nodes_tree_values(Nodes)). + +%%----------------------------------------------------------------------------- +%% Calculate the upwards exposed expressions for a node. + +calc_up_exposed_expr(VarMap, Node) -> + %% debug_struct("UpExpr label: ", node_label(Node)), + NonStructDefs = node_non_struct_defs(Node), + {_, ExprIdSet} = + lists:foldl(fun(Instr, {NotToUseAcc, ExprIdAcc}) -> + Defs = hipe_icode:defines(Instr), + Uses = hipe_icode:uses(Instr), + IsNotToUse = + lists:any(fun(Use) -> gb_sets:is_member(Use, NotToUseAcc) end, Uses), + case IsNotToUse of + false -> + NewExprIdAcc = + lists:foldl(fun(Def, Acc) -> + #varinfo{exprid = Id} = gb_trees:get(Def, VarMap), + ?SETS:add_element(Id, Acc) end, ExprIdAcc, Defs), + {NotToUseAcc, NewExprIdAcc}; + true -> + NewNotToUse = + gb_sets:union(gb_sets:from_list(Defs), NotToUseAcc), + {NewNotToUse, ExprIdAcc} + end + end, {NonStructDefs, ?SETS:new()}, node_code(Node)), + ExprIdSet. + +%%----------------------------------------------------------------------------- +%% Calculate killed expression for node + +calc_killed_expr(Node, Maps) -> + calc_killed_expr_defs(node_non_struct_defs_list(Node), ?SETS:new(), Maps). + +calc_killed_expr_defs(Defs, UseSet, Maps) -> + lists:foldl(fun(Def, Acc) -> + case maps_var_lookup(Def, Maps) of + none -> + Acc; + {value, #varinfo{use = Use}} -> + ?SETS:union(Acc, calc_killed_expr_use(Use, Maps)) + end + end, UseSet, Defs). + +calc_killed_expr_use(ExprIds, Maps) -> + ?SETS:fold(fun(Id, Acc) -> + Expr = maps_expr_get(Id, Maps), + ?SETS:union(Acc, calc_killed_expr_use(expr_use(Expr), Maps)) + end, ExprIds, ExprIds). + +%%----------------------------------------------------------------------------- +%% Calculate the anticipated in and anticipated out sets for each node + +calc_anticipated(NodesIn) -> + calc_anticipated_rec(NodesIn, nodes_postorder(NodesIn)). + +calc_anticipated_rec(NodesIn, []) -> NodesIn; +calc_anticipated_rec(NodesIn, WorkIn) -> + {NodesOut, WorkOut} = + lists:foldl(fun(Label, {NodesAcc, WorkAcc}) -> + Node = get_node(Label, NodesAcc), + + %debug_struct("~nNode Label: ", Label), + + AnticIn = ?SETS:union(node_up_expr(Node), + ?SETS:subtract(node_antic_out(Node), node_killed_expr(Node))), + + %debug_struct("AnticIn: ", AnticIn), + case (node_antic_in(Node) =:= AnticIn) of + false -> + NewNodes1 = enter_node(Node#node{antic_in = AnticIn}, NodesAcc), + Preds = node_pred(Node), + %debug_struct("Preds: ", Preds), + + NewNodes2 = + lists:foldl(fun(Label2, NodesAcc2) -> + PredNode = get_node(Label2, NodesAcc2), + AnticOut = ?SETS:intersection(AnticIn, node_antic_out(PredNode)), + %debug_struct("Pred Node Label: ", Label2), + %debug_struct("Pred AnticOut: ", AnticOut), + + enter_node(PredNode#node{antic_out = AnticOut}, NodesAcc2) + end, NewNodes1, Preds), + + NewWork = add_work_list(Preds, WorkAcc), + %debug_struct("New Work: ", NewWork), + + {NewNodes2, NewWork}; + true -> + {NodesAcc, WorkAcc} + end + end, {NodesIn, new_work()}, WorkIn), + + calc_anticipated_rec(NodesOut, get_work_list(WorkOut)). + +%%----------------------------------------------------------------------------- +%% Function that adds inserts to expressions from nodes which either +%% have an upwards exposed expression or dominate more than one node +%% that inserts the same expression or the node is a prior to loop +%% node. The inserted info is stored in the #expr records in the expr +%% map of the #maps structure. + +calc_inserts(NodesIn, MapsIn) -> + DomTree = nodes_domtree(NodesIn), + + lists:foldl(fun(Label, {NodesAcc, MapsAcc}) -> + Node = get_node(Label, NodesAcc), + + %% get some basic properties. + UpExpr = node_up_expr(Node), + AnticOut = node_antic_out(Node), + SubIns = node_sub_inserts(Node), + + %% debug_struct("Label: ", Label), + + {HasIns, NewMapsAcc} = + ?SETS:fold(fun(ExprId, {HasInsAcc, MapsAcc2}) -> + Expr = maps_expr_get(ExprId, MapsAcc2), + + ExprIns = expr_inserts(Expr), + ExprSubIns = ?SETS:intersection(ExprIns, SubIns), + + %% There are three cases when to insert an expression + %% 1. The expression is defined at least twice in the subtree of this + %% node, that is length(ExprSubIns) > 1. + %% 2. It is defined in the node and is upwards exposed. + %% 3. The node is a block just above a loop, so we should move + %% all anticipated expressions to the node. + + case length(ExprSubIns) > 1 orelse ?SETS:is_element(ExprId, UpExpr) + orelse node_pre_loop(Node) of + true -> + %% get labels of all sub blocks that inserts the expression and + %% that are dominated by the current node. + Dominates = + ?SETS:filter(fun(SubLabel) -> + hipe_dominators:domTree_dominates(Label, SubLabel, DomTree) + end, ExprSubIns), + + %% remove inserts labels from insert labelset. + NewIns = ?SETS:subtract(ExprIns, Dominates), + NewIns2 = ?SETS:add_element(Label, NewIns), + + %% update the node. + NewMaps = + maps_expr_enter(expr_inserts_set(Expr, NewIns2), MapsAcc2), + {true, NewMaps}; + false -> + {HasInsAcc, MapsAcc2} + end + end, {false, MapsAcc}, ?SETS:union(AnticOut, UpExpr)), + + %% Check if there was an insert into this node, + %% and if so add to the sub inserts set. + NewSubIns = + case HasIns of + true -> + ?SETS:add_element(Label, SubIns); + false -> + SubIns + end, + + %% update sub inserts for all predecessors to the node. + NewNodes2 = + lists:foldl(fun(PredLabel, NodesAcc2) -> + PredNode = get_node(PredLabel, NodesAcc2), + enter_node(node_union_sub_inserts(PredNode, NewSubIns), NodesAcc2) + end, NodesAcc, node_pred(Node)), + + {NewNodes2, NewMapsAcc} + + end, {NodesIn, MapsIn}, nodes_postorder(NodesIn)). + +%%----------------------------------------------------------------------------- +%% Update the insert sets of each node in the node tree. +%% That is, move the insert information from the expressions to +%% the actual nodes that perform the inserts. + +update_nodes_inserts(Nodes, Maps) -> + lists:foldl(fun(Expr, NodesAcc) -> + ExprId = expr_id(Expr), + ?SETS:fold(fun(Label, NsAcc) -> + Nd = get_node(Label, NsAcc), + enter_node(node_add_insert(Nd, ExprId), NsAcc) + end, NodesAcc, expr_inserts(Expr)) + end, Nodes, maps_expr_values(Maps)). + +%%----------------------------------------------------------------------------- +%% Rewrite CFG functions + +%%----------------------------------------------------------------------------- +%% Do the code updating from the info in the nodes and maps structures. This +%% is a proxy function for rewrite_cfg/6 +rewrite_cfg(CFG, Nodes, Maps) -> + {NewCFG, _Visited} = + rewrite_cfg(CFG, ?SETS:new(), #update{}, Nodes, Maps, [nodes_start_label(Nodes)]), + %% debug_struct("Visited: ", _Visited), + NewCFG. + +%%----------------------------------------------------------------------------- +%% rewrite_cfg +%% traverse the CFG in reverse postorder and rewrite each basic block before +%% rewriteing its children. Pass along to each BB update the mappings of +%% inserted expressions in the Update record. + +rewrite_cfg(CFG, Visited, Update, Nodes, Maps, Labels) -> + lists:foldl(fun(Label, {CFGAcc, VisitedAcc}) -> + case ?SETS:is_element(Label, VisitedAcc) of + false -> + %% debug_struct("Visit: ", Label), + Node = get_node(Label, Nodes), + NewVisitedAcc = ?SETS:add_element(Label, VisitedAcc), + {NewCFGAcc, NewUpdate} = rewrite_bb(CFGAcc, Update, Maps, Node), + %% debug_struct("Update inserted: ", update_inserted_list(NewUpdate)), + rewrite_cfg(NewCFGAcc, NewVisitedAcc, NewUpdate, Nodes, Maps, node_succ(Node)); + true -> + {CFGAcc, VisitedAcc} + end + end, {CFG, Visited}, Labels). + +%%----------------------------------------------------------------------------- +%% rewrite one single basic block in the CFG as described by the properties +%% in the Node for that block. Uses the Maps and Update info to lookup +%% the instructions and expressions to insert or delete. + +rewrite_bb(CFG, Update, Maps, Node) -> + #node{pre_loop = PreLoop, label = Label, up_expr = UpExpr, inserts = Inserts} = Node, + + Code = hipe_bb:code(hipe_icode_cfg:bb(CFG, Label)), + + %debug_struct("RW Label: ", Label), + %debug_struct("Inserts", Inserts), + + DelRed = update_del_red_test(Update), + Delete = ?SETS:subtract(UpExpr, Inserts), + + %% local function that gets the instruction and defines list of an + %% expression id in the current node and and returns them. + GetInstrFunc = fun(Expr) -> + Instr = expr_key(Expr), + Defs = expr_defs(Expr), + NewInstr = + if + PreLoop -> + replace_call_variables(fun(Var) -> + node_varmap_lookup(Node, + Var) + end, + Instr); + true -> + Instr + end, + {NewInstr, Defs} + end, + + %% go through all expressions defined by the node and replace + %% or remove them as indicated by the delete set. Also perform + %% reduction test replacement if neccessary. + {[CodeLast | CodeRest], NewUpdate, LocalAcc} = + lists:foldl(fun(Instr, {CodeAcc, UpdateAcc, LocalAcc}) -> + case struct_instr_type(Instr) of + struct -> + Defs = hipe_icode:defines(Instr), + + #varinfo{exprid = ExprId} = maps_var_get(hd(Defs), Maps), + + Expr = maps_expr_get(ExprId, Maps), + DirectReplace = expr_direct_replace(Expr), + + %% Creates move intstructions from Vars to Defs + RemoveFuncVars = fun(Vars) -> + CodeAcc2 = mk_defs_moves(CodeAcc, Defs, Vars), + {CodeAcc2, UpdateAcc, LocalAcc} end, + + %% Looks up an already inserted ExprId and makes moves + %% of variables from that expression to this expression. + RemoveFunc = fun() -> + {value, Vars} = update_inserted_lookup(UpdateAcc, ExprId), + RemoveFuncVars(Vars) end, + + %% Is ExprId already inserted? + IsLocal = ?SETS:is_element(ExprId, LocalAcc), + + case DirectReplace of + true -> + %% The Instr is reffering to an expression that is + %% defined as an identical already present instruction, + %% and can be removed directly. + RemoveFuncVars(expr_defs(Expr)); + false when IsLocal -> + %% The instruction has already been inserted. + RemoveFunc(); + _ -> + case ?SETS:is_element(ExprId, Delete) of + true -> + %% should not be inserted + RemoveFunc(); + _ -> + %% Should remain + UpdateAcc2 = update_inserted_add(UpdateAcc, ExprId, Defs), + LocalAcc2 = ?SETS:add_element(ExprId, LocalAcc), + {[Instr | CodeAcc], UpdateAcc2, LocalAcc2} + end + end; + redtest when DelRed -> + %% delete reduction test + {CodeAcc, UpdateAcc, LocalAcc}; + _ -> + {[Instr | CodeAcc], UpdateAcc, LocalAcc} + end + end, {[], Update, ?SETS:new()}, Code), + + %debug_struct("RW Label 2: ", Label), + + %% calculate the inserts that are new to this node, that is + %% the expressions that are in Inserts but not in UpExpr, + %% and that have not been added already, + %% that is not present in LocalAcc + NewInserts = ?SETS:subtract(?SETS:subtract(Inserts, UpExpr), LocalAcc), + + {NewCodeRest, NewUpdate2} = + ?SETS:fold(fun(ExprId, {CodeAcc, UpdateAcc}) -> + Expr = maps_expr_get(ExprId, Maps), + {ExprInstr, Defs} = GetInstrFunc(Expr), + {UpdateAcc2, NewDefs} = update_inserted_add_new(UpdateAcc, ExprId, Defs), + + %% check if there exists an identical expression, so that + %% this expression can be replaced directly. + CodeAcc2 = + case expr_direct_replace(Expr) of + false -> + NewInstr = rewrite_expr(UpdateAcc2, ExprInstr, NewDefs), + [NewInstr | CodeAcc]; + true -> + mk_defs_moves(CodeAcc, NewDefs, Defs) + end, + {CodeAcc2, UpdateAcc2} + end, {CodeRest, NewUpdate}, NewInserts), + + NewCode = lists:reverse([CodeLast | NewCodeRest]), + + %% Check if we are to insert new reduction test here... + {NewCode2, NewUpdate3} = + case PreLoop andalso ?SETS:size(Inserts) > 0 andalso not DelRed of + true -> + {[hipe_icode:mk_primop([], redtest, []) | NewCode], update_del_red_test_set(NewUpdate2)}; + false -> + {NewCode, NewUpdate2} + end, + + NewBB = hipe_bb:mk_bb(NewCode2), + NewCFG = hipe_icode_cfg:bb_add(CFG, Label, NewBB), + + {NewCFG, NewUpdate3}. + +%%----------------------------------------------------------------------------- +%% Create a new structure instruction from Instr with destination Defs +%% from the insert mapping in Update. + +rewrite_expr(Update, Instr, Defs) -> + NewInstr = + replace_call_variables(fun(Ref) -> + case Ref of + {ExprId, Num} when is_integer(ExprId) -> + {value, DefList} = update_inserted_lookup(Update, ExprId), + lists:nth(Num, DefList); + _ -> Ref + end end, Instr), + hipe_icode:call_dstlist_update(NewInstr, Defs). + +%%----------------------------------------------------------------------------- +%% Make move instructions from Defs list to all variables in +%% the Refs list and insert into Code. + +mk_defs_moves(Code, [], []) -> Code; +mk_defs_moves(Code, [Ref | Refs], [Def | Defs]) -> + mk_defs_moves([hipe_icode:mk_move(Ref, Def) | Code], Refs, Defs). + +%%----------------------------------------------------------------------------- +%% Utilities + +new_work() -> + {[], gb_sets:new()}. + +add_work_list(List, Work) -> + lists:foldl(fun(Label, WorkAcc) -> + add_work_label(Label, WorkAcc) end, Work, List). + +add_work_label(Label, {List, Set}) -> + case gb_sets:is_member(Label, Set) of + false -> + {[Label | List], gb_sets:add(Label, Set)}; + true -> + {List, Set} + end. + +get_work_list({List, _}) -> + lists:reverse(List). + +%%----------------------------------------------------------------------------- +%% instr_type +%% gets a tag for the type of instruction that is passed in I + +struct_instr_type(I) -> + case I of + #icode_call{type = primop, 'fun' = mktuple} -> + %%debug_count_case(?SR_STRUCT_INSTR_TYPE, #call{type = primop, 'fun' = mktuple}), + struct; + #icode_call{type = primop, 'fun' = cons} -> + %%debug_count_case(?SR_STRUCT_INSTR_TYPE, #call{type = primop, 'fun' = cons}), + struct; + #icode_call{type = primop, 'fun' = redtest} -> + %%debug_count_case(?SR_STRUCT_INSTR_TYPE, #call{type = primop, 'fun' = redtest}), + redtest; + _ -> + %%debug_count_case(?SR_STRUCT_INSTR_TYPE, other), + other + end. + +instr_type(I) -> + case I of + %#call{type = primop, dstlist = List} when length(List) >= 1 -> struct; + #icode_call{type = primop, 'fun' = {unsafe_element, Elem}, dstlist = [DstVar], args = [SrcVar]} -> + %%debug_count_case(?SR_INSTR_TYPE, #call{type = primop, 'fun' = {unsafe_element, num}}), + {struct_elems, Elem, DstVar, SrcVar}; + #icode_phi{} -> + %%debug_count_case(?SR_INSTR_TYPE,#phi{}), + phi; + #icode_enter{} -> + %%debug_count_case(?SR_INSTR_TYPE,#enter{}), + return; + #icode_return{} -> + %%debug_count_case(?SR_INSTR_TYPE,#return{}), + return; + #icode_call{type = primop, 'fun' = mktuple} -> + %%debug_count_case(?SR_INSTR_TYPE, #call{type = primop, 'fun' = mktuple}), + struct; + #icode_call{type = primop, 'fun' = cons} -> + %%debug_count_case(?SR_INSTR_TYPE, #call{type = primop, 'fun' = cons}), + struct; + #icode_call{type = primop, 'fun' = redtest} -> + %%debug_count_case(?SR_INSTR_TYPE, #call{type = primop, 'fun' = redtest}), + redtest; + #icode_type{test = {tuple, Size}, args = [Var]} -> + %%debug_count_case(?SR_INSTR_TYPE, #type{type = {tuple, size}}), + {struct_type, Size, Var, ?MKTUPLE}; + #icode_type{test = cons, args = [Var]} -> + %%debug_count_case(?SR_INSTR_TYPE,#type{type = cons}), + {struct_type, 2, Var, ?CONS}; + %#type{type = {atom, Atom}, args = [Var]} -> {atom_type, Atom, Var}; + #icode_call{type = primop, 'fun' = unsafe_hd, + dstlist = [DstVar], args = [SrcVar]} -> + %%debug_count_case(?SR_INSTR_TYPE,#call{type = primop, 'fun' = unsafe_hd}), + {struct_elems, 1, DstVar, SrcVar}; + #icode_call{type = primop, 'fun' = unsafe_tl, + dstlist = [DstVar], args = [SrcVar]} -> + %%debug_count_case(?SR_INSTR_TYPE, #call{type = primop, 'fun' = unsafe_tl}), + {struct_elems, 2, DstVar, SrcVar}; + #icode_switch_tuple_arity{term = Var, cases = Cases} -> + %%debug_count_case(?SR_INSTR_TYPE,#switch_tuple_arity{}), + {tuple_arity, Var, Cases}; + _ -> other + end. + +%%----------------------------------------------------------------------------- +%% Expression ID counter + +init_expr_id() -> + put({struct_reuse, expr_id_count}, 0). + +-spec new_expr_id() -> non_neg_integer(). +new_expr_id() -> + V = get({struct_reuse, expr_id_count}), + put({struct_reuse, expr_id_count}, V+1), + V. + +%%----------------------------------------------------------------------------- +%% Debug and print functions + +print_struct(String, Struct) -> + io:format(String), + erlang:display(Struct). + +print(String) -> + io:format(String). + +-ifdef(DEBUG). + +debug_count_case(Type, Case) -> + Cases = get(Type), + NewCases = + case gb_trees:lookup(Case, Cases) of + {value, Value} -> gb_trees:enter(Case, Value + 1, Cases); + none -> gb_trees:insert(Case, 1, Cases) + end, + put(Type, NewCases). + +debug_init_case_count(Type) -> + case get(Type) of + undefined -> put(Type, gb_trees:empty()); + _ -> ok + end. + +debug_print_case_count(Type) -> + Cases = get(Type), + debug_struct("Case type: ", Type), + debug_list("Cases: ", gb_trees:to_list(Cases)). + +set_debug_flag(Value) -> + put({struct_reuse, debug}, Value). + +get_debug_flag() -> get({struct_reuse, debug}). + +debug_function(FuncName, CFG) -> + Linear = hipe_icode_cfg:cfg_to_linear(CFG), + Func = hipe_icode:icode_fun(Linear), + case Func =:= FuncName orelse FuncName =:= nil of + true -> + set_debug_flag(true), + %% debug_struct("Code: ", hipe_icode_cfg:bb(CFG, 15)), + debug_struct("~nFunction name :", Func); + false -> + set_debug_flag(undefined) + end. + +debug_cfg_pp(CFG) -> + case get_debug_flag() of + true -> hipe_icode_cfg:pp(CFG); + _ -> none + end. + +debug_struct(String, Struct) -> + case get_debug_flag() of + true -> + io:format(String), + erlang:display(Struct); + _ -> none + end. + +debug(String) -> + case get_debug_flag() of + true -> io:format(String); + _ -> none + end. + +debug_list(String, List) -> + case get_debug_flag() of + true -> print_list(String, List); + _ -> none + end. + +print_list(String, List) -> + io:format(String), + io:format("~n"), + print_list_rec(List), + io:format("~n"). + +print_list_rec([]) -> ok; +print_list_rec([Struct | List]) -> + erlang:display(Struct), + print_list_rec(List). + +debug_nodes(Nodes) -> + lists:foreach(fun(Node) -> debug_node(Node) end, nodes_tree_values(Nodes)). + +debug_node(Node) -> + case get_debug_flag() of + true -> + print_struct("Node Label: ", Node#node.label), + print_struct("Code: ", Node#node.code), + print_struct("Phi: ", Node#node.phi), + print_struct("PreLoop: ", Node#node.pre_loop), + print_struct("Preds: ", Node#node.pred), + print_struct("Succ: ", Node#node.succ), + print_struct("Up Expr: ", Node#node.up_expr), + print_struct("Kill : ", Node#node.killed_expr), + print_struct("AnticIn: ", Node#node.antic_in), + print_struct("AnticOut: ", Node#node.antic_out), + print_struct("SubInserts: ", Node#node.sub_inserts), + print_struct("Inserts: ", Node#node.inserts), + print_struct("NonStructDefs: ", Node#node.non_struct_defs), + print_struct("Params: ", Node#node.struct_type), + print_struct("Elems: ", Node#node.struct_elems), + io:format("~n"); + _ -> none + end. + +-endif. diff --git a/lib/hipe/icode/hipe_icode_type.erl b/lib/hipe/icode/hipe_icode_type.erl new file mode 100644 index 0000000000..28198467f7 --- /dev/null +++ b/lib/hipe/icode/hipe_icode_type.erl @@ -0,0 +1,2266 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2003-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%%%-------------------------------------------------------------------- +%%% File : hipe_icode_type.erl +%%% Author : Tobias Lindahl +%%% Description : Propagate type information. +%%% +%%% Created : 25 Feb 2003 by Tobias Lindahl +%%% +%%% $Id$ +%%%-------------------------------------------------------------------- + +-module(hipe_icode_type). + +-export([cfg/4, unannotate_cfg/1, specialize/1]). + +%%===================================================================== +%% Icode Coordinator Callbacks +%%===================================================================== + +-export([replace_nones/1, + update__info/2, new__info/1, return__info/1, + return_none/0, return_none_args/2, return_any_args/2]). + +%%===================================================================== + +-include("../main/hipe.hrl"). +-include("hipe_icode.hrl"). +-include("hipe_icode_primops.hrl"). +-include("hipe_icode_type.hrl"). +-include("../flow/cfg.hrl"). + +-type args_fun() :: fun((mfa(), cfg()) -> [erl_types:erl_type()]). +-type call_fun() :: fun((mfa(), [_]) -> erl_types:erl_type()). +-type final_fun() :: fun((mfa(), [_]) -> 'ok'). +-type data() :: {mfa(), args_fun(), call_fun(), final_fun()}. + +%-define(DO_HIPE_ICODE_TYPE_TEST, false). + +-ifdef(DO_HIPE_ICODE_TYPE_TEST). +-export([test/0]). +-endif. + +-define(MFA_debug, fun(_, _, _) -> ok end). + +%-define(debug, fun(X, Y) -> io:format("~s ~p~n", [X, Y]) end). +-define(debug, fun(_, _) -> ok end). + +%-define(flow_debug, fun(X, Y) -> io:format("flow: ~s ~p~n", [X, Y]) end). +-define(flow_debug, fun(_, _) -> ok end). + +%-define(widening_debug, fun(X, Y) -> io:format("wid: ~s ~p~n", [X, Y]) end). +-define(widening_debug, fun(_, _) -> ok end). + +%-define(call_debug, fun(X, Y) -> io:format("call: ~s ~p~n", [X, Y]) end). +-define(call_debug, fun(_, _) -> ok end). + +%-define(ineq_debug, fun(X, Y) -> io:format("ineq: ~s ~p~n", [X, Y]) end). +-define(ineq_debug, fun(_, _) -> ok end). + +%-define(server_debug, fun(X, Y) -> io:format("~p server: ~s ~p~n", [self(), X, Y]) end). +-define(server_debug, fun(_, _) -> ok end). + +-import(erl_types, [min/2, max/2, number_min/1, number_max/1, + t_any/0, t_atom/1, t_atom/0, t_atom_vals/1, + t_binary/0, t_bitstr/0, t_bitstr_base/1, t_bitstr_unit/1, + t_boolean/0, t_cons/0, t_constant/0, + t_float/0, t_from_term/1, t_from_range/2, + t_fun/0, t_fun/1, t_fun/2, t_fun_args/1, t_fun_arity/1, + t_inf/2, t_inf_lists/2, t_integer/0, + t_integer/1, t_is_atom/1, t_is_any/1, + t_is_binary/1, t_is_bitstr/1, t_is_bitwidth/1, t_is_boolean/1, + t_is_fixnum/1, t_is_cons/1, t_is_constant/1, + t_is_maybe_improper_list/1, t_is_equal/2, t_is_float/1, + t_is_fun/1, t_is_integer/1, t_is_non_neg_integer/1, + t_is_number/1, t_is_matchstate/1, + t_is_nil/1, t_is_none/1, t_is_port/1, t_is_pid/1, + t_is_reference/1, t_is_subtype/2, t_is_tuple/1, + t_limit/2, t_matchstate_present/1, t_matchstate/0, + t_matchstate_slots/1, t_maybe_improper_list/0, + t_nil/0, t_none/0, t_number/0, t_number/1, t_number_vals/1, + t_pid/0, t_port/0, t_reference/0, t_subtract/2, t_sup/2, + t_to_tlist/1, t_tuple/0, t_tuple/1, t_tuple_sizes/1]). + +-record(state, {info_map = gb_trees:empty() :: gb_tree(), + cfg :: cfg(), + liveness = gb_trees:empty() :: gb_tree(), + arg_types :: [erl_types:erl_type()], + ret_type = [t_none()] :: [erl_types:erl_type()], + lookupfun :: call_fun(), + resultaction :: final_fun()}). + +%%----------------------------------------------------------------------- +%% The main exported function +%%----------------------------------------------------------------------- + +-spec cfg(cfg(), mfa(), comp_options(), #comp_servers{}) -> cfg(). + +cfg(Cfg, MFA, Options, Servers) -> + case proplists:get_bool(concurrent_comp, Options) of + true -> + concurrent_cfg(Cfg, MFA, Servers#comp_servers.type); + false -> + ordinary_cfg(Cfg, MFA) + end. + +concurrent_cfg(Cfg, MFA, CompServer) -> + CompServer ! {ready, {MFA, self()}}, + {ArgsFun, CallFun, FinalFun} = do_analysis(Cfg, MFA), + Ans = do_rewrite(Cfg, MFA, ArgsFun, CallFun, FinalFun), + CompServer ! {done_rewrite, MFA}, + Ans. + +do_analysis(Cfg, MFA) -> + receive + {analyse, {ArgsFun,CallFun,FinalFun}} -> + analyse(Cfg, {MFA,ArgsFun,CallFun,FinalFun}), + do_analysis(Cfg, MFA); + {done, {_NewArgsFun,_NewCallFun,_NewFinalFun} = Done} -> + Done + end. + +do_rewrite(Cfg, MFA, ArgsFun, CallFun, FinalFun) -> + common_rewrite(Cfg, {MFA,ArgsFun,CallFun,FinalFun}). + +ordinary_cfg(Cfg, MFA) -> + Data = make_data(Cfg,MFA), + common_rewrite(Cfg, Data). + +common_rewrite(Cfg, Data) -> + State = safe_analyse(Cfg, Data), + NewState = simplify_controlflow(State), + NewCfg = state__cfg(annotate_cfg(NewState)), + hipe_icode_cfg:remove_unreachable_code(specialize(NewCfg)). + +make_data(Cfg, {_M,_F,A}=MFA) -> + NoArgs = + case hipe_icode_cfg:is_closure(Cfg) of + true -> hipe_icode_cfg:closure_arity(Cfg); + false -> A + end, + Args = lists:duplicate(NoArgs, t_any()), + ArgsFun = fun(_,_) -> Args end, + CallFun = fun(_,_) -> t_any() end, + FinalFun = fun(_,_) -> ok end, + {MFA,ArgsFun,CallFun,FinalFun}. + +%%debug_make_data(Cfg, {_M,_F,A}=MFA) -> +%% NoArgs = +%% case hipe_icode_cfg:is_closure(Cfg) of +%% true -> hipe_icode_cfg:closure_arity(Cfg); +%% false -> A +%% end, +%% Args = lists:duplicate(NoArgs, t_any()), +%% ArgsFun = fun(MFA,_Cfg) -> io:format("Start:~p~n",[MFA]),Args end, +%% CallFun = fun(MFA,Types) -> io:format("Call With:~p~nTo:~p~n",[Types,MFA]), t_any() end, +%% FinalFun = fun(MFA,Type) -> io:format("ResType:~p~nFor:~p~n",[Type,MFA]),ok end, +%% {MFA,ArgsFun,CallFun,FinalFun}. + + +%%------------------------------------------------------------------- +%% Global type analysis on the whole function. Demands that the code +%% is in SSA form. When we encounter a phi node, the types of the +%% arguments are joined. At the end of a block the information out is +%% joined with the current information in for all _valid_ successors, +%% that is, of all successors that actually can be reached. If the +%% join produces new information in for the successor, this +%% information is added to the worklist. +%%------------------------------------------------------------------- + +-spec analyse(cfg(), data()) -> 'ok'. + +analyse(Cfg, Data) -> + try + #state{} = safe_analyse(Cfg, Data), + ok + catch throw:no_input -> ok % No need to do anything since we have no input + end. + +-spec safe_analyse(cfg(), data()) -> #state{}. + +safe_analyse(Cfg, {MFA,_,_,_}=Data) -> + State = new_state(Cfg, Data), + NewState = analyse_blocks(State,MFA), + (state__resultaction(NewState))(MFA,state__ret_type(NewState)), + NewState. + +analyse_blocks(State, MFA) -> + Work = init_work(State), + analyse_blocks(Work, State, MFA). + +analyse_blocks(Work, State, MFA) -> + case get_work(Work) of + fixpoint -> + State; + {Label, NewWork} -> + Info = state__info_in(State, Label), + {NewState, NewLabels} = + try analyse_block(Label, Info, State) + catch throw:none_type -> + %% io:format("received none type at label: ~p~n",[Label]), + {State,[]} + end, + NewWork2 = add_work(NewWork, NewLabels), + analyse_blocks(NewWork2, NewState, MFA) + end. + +analyse_block(Label, InfoIn, State) -> + BB = state__bb(State, Label), + Code = hipe_bb:butlast(BB), + Last = hipe_bb:last(BB), + InfoOut = analyse_insns(Code, InfoIn, state__lookupfun(State)), + NewState = state__info_out_update(State, Label, InfoOut), + case Last of + #icode_if{} -> + UpdateInfo = do_if(Last, InfoOut), + do_updates(NewState, UpdateInfo); + #icode_type{} -> + UpdateInfo = do_type(Last, InfoOut), + do_updates(NewState, UpdateInfo); + #icode_switch_tuple_arity{} -> + UpdateInfo = do_switch_tuple_arity(Last, InfoOut), + do_updates(NewState, UpdateInfo); + #icode_switch_val{} -> + UpdateInfo = do_switch_val(Last, InfoOut), + do_updates(NewState, UpdateInfo); + #icode_enter{} -> + NewState1 = do_enter(Last, InfoOut, NewState, state__lookupfun(NewState)), + do_updates(NewState1,[]); + #icode_call{} -> + {NewState1,UpdateInfo} = do_last_call(Last, InfoOut, NewState, Label), + do_updates(NewState1, UpdateInfo); + #icode_return{} -> + NewState1 = do_return(Last, InfoOut, NewState), + do_updates(NewState1,[]); + _ -> + UpdateInfo = [{X, InfoOut} || X <- state__succ(NewState, Label)], + do_updates(NewState, UpdateInfo) + end. + +analyse_insns([I|Insns], Info, LookupFun) -> + NewInfo = analyse_insn(I, Info, LookupFun), + analyse_insns(Insns, NewInfo, LookupFun); +analyse_insns([], Info, _) -> + Info. + +analyse_insn(I, Info, LookupFun) -> + case I of + #icode_move{} -> + do_move(I, Info); + #icode_call{} -> + NewInfo = do_call(I, Info, LookupFun), + %%io:format("Analysing Call: ~w~n~w~n", [I,NewInfo]), + update_call_arguments(I, NewInfo); + #icode_phi{} -> + Type = t_limit(join_list(hipe_icode:args(I), Info), ?TYPE_DEPTH), + enter_defines(I, Type, Info); + #icode_begin_handler{} -> + enter_defines(I, t_any(), Info); + _ -> + %% Just an assert + case defines(I) of + [] -> Info; + _ -> exit({"Instruction with destination not analysed", I}) + end + end. + +do_move(I, Info) -> + %% Can't use uses/1 since we must keep constants. + [Src] = hipe_icode:args(I), + enter_defines(I, lookup(Src, Info), Info). + +do_basic_call(I, Info, LookupFun) -> + case hipe_icode:call_type(I) of + primop -> + Fun = hipe_icode:call_fun(I), + ArgTypes = lookup_list(hipe_icode:args(I), Info), + primop_type(Fun, ArgTypes); + remote -> + {M, F, A} = hipe_icode:call_fun(I), + ArgTypes = lookup_list(hipe_icode:args(I), Info), + None = t_none(), + case erl_bif_types:type(M, F, A, ArgTypes) of + None -> + NewArgTypes = add_funs_to_arg_types(ArgTypes), + erl_bif_types:type(M, F, A, NewArgTypes); + Other -> + Other + end; + local -> + MFA = hipe_icode:call_fun(I), + ArgTypes = lookup_list(hipe_icode:args(I), Info), + %% io:format("Call:~p~nTypes: ~p~n",[I,ArgTypes]), + LookupFun(MFA,ArgTypes) + end. + +do_call(I, Info, LookupFun) -> + RetType = do_basic_call(I, Info, LookupFun), + IsNone = t_is_none(RetType), + %% io:format("RetType ~p~nIsNone ~p~n~p~n",[RetType,IsNone,I]), + if IsNone -> throw(none_type); + true -> enter_defines(I, RetType, Info) + end. + +do_safe_call(I, Info, LookupFun) -> + RetType = do_basic_call(I, Info, LookupFun), + enter_defines(I, RetType, Info). + +do_last_call(Last, InfoOut, State, Label) -> + try + NewInfoOut = do_call(Last, InfoOut, state__lookupfun(State)), + NewState = state__info_out_update(State, Label, NewInfoOut), + ContInfo = update_call_arguments(Last, NewInfoOut), + Cont = hipe_icode:call_continuation(Last), + Fail = hipe_icode:call_fail_label(Last), + ?call_debug("Continfo, NewInfoOut", {ContInfo, NewInfoOut}), + UpdateInfo = + case Fail of + [] -> + [{Cont, ContInfo}]; + _ -> + case call_always_fails(Last, InfoOut) of + true -> + [{Fail, NewInfoOut}]; + false -> + Fun = hipe_icode:call_fun(Last), + case hipe_icode_primops:fails(Fun) of + true -> + [{Cont, ContInfo}, {Fail, NewInfoOut}]; + false -> + [{Cont, ContInfo}] + end + end + end, + {NewState,UpdateInfo} + catch throw:none_type -> + State2 = state__info_out_update(State, Label, InfoOut), + case hipe_icode:call_fail_label(Last) of + [] -> throw(none_type); + FailLbl -> + {State2,[{FailLbl, InfoOut}]} + end + end. + +call_always_fails(#icode_call{} = I, Info) -> + case hipe_icode:call_fun(I) of + %% These can actually be calls too. + {erlang, halt, 0} -> false; + {erlang, halt, 1} -> false; + {erlang, exit, 1} -> false; + {erlang, error, 1} -> false; + {erlang, error, 2} -> false; + {erlang, throw, 1} -> false; + {erlang, hibernate, 3} -> false; + Fun -> + case hipe_icode:call_type(I) of + primop -> + Args = safe_lookup_list(hipe_icode:call_args(I), Info), + ReturnType = primop_type(Fun, Args), + t_is_none(ReturnType); + _ -> false + end + end. + +do_enter(I, Info, State, LookupFun) -> + %% io:format("Enter:~p~n",[I]), + ArgTypes = lookup_list(hipe_icode:args(I), Info), + RetTypes = + case hipe_icode:enter_type(I) of + local -> + MFA = hipe_icode:enter_fun(I), + LookupFun(MFA,ArgTypes); + remote -> + {M, F, A} = hipe_icode:enter_fun(I), + None = t_none(), + case erl_bif_types:type(M, F, A, ArgTypes) of + None -> + NewArgTypes = add_funs_to_arg_types(ArgTypes), + erl_bif_types:type(M, F, A, NewArgTypes); + Other -> + Other + end; + primop -> + Fun = hipe_icode:enter_fun(I), + primop_type(Fun, ArgTypes) + end, + state__ret_type_update(State, RetTypes). + +do_return(I, Info, State) -> + RetTypes = lookup_list(hipe_icode:args(I), Info), + state__ret_type_update(State, RetTypes). + +do_if(I, Info) -> + %% XXX: Could probably do better than this. + TrueLab = hipe_icode:if_true_label(I), + FalseLab = hipe_icode:if_false_label(I), + case hipe_icode:if_args(I) of + [Arg1, Arg2] = Args -> + [Type1, Type2] = lookup_list(Args, Info), + case t_is_none(Type1) orelse t_is_none(Type2) of + true -> + [{TrueLab, Info}, {FalseLab, Info}]; + false -> + Inf = t_inf(Type1, Type2), + case hipe_icode:if_op(I) of + '=:='-> + case t_is_none(Inf) of + true -> + [{FalseLab, Info}]; + false -> + [{TrueLab, enter(Arg1, Inf, enter(Arg2, Inf, Info))}, + {FalseLab, Info}] + end; + '=/=' -> + case t_is_none(Inf) of + true -> + [{TrueLab, Info}]; + false -> + [{FalseLab, enter(Arg1, Inf, enter(Arg2, Inf, Info))}, + {TrueLab, Info}] + end; + '==' -> + [{TrueLab, Info}, {FalseLab, Info}]; + '/=' -> + [{TrueLab, Info}, {FalseLab, Info}]; + Op -> + integer_range_inequality_propagation(Op, Arg1, Arg2, + TrueLab, FalseLab, Info) + %%_ -> + %% [{TrueLab, Info}, {FalseLab, Info}] + end + end; + _ -> + %% Only care for binary if:s + [{TrueLab, Info}, {FalseLab, Info}] + end. + +integer_range_inequality_propagation(Op, A1, A2, TrueLab, FalseLab, Info) -> + Arg1 = lookup(A1, Info), + Arg2 = lookup(A2, Info), + ?ineq_debug("args", [Arg1,Arg2]), + IntArg1 = t_inf(Arg1, t_integer()), + IntArg2 = t_inf(Arg2, t_integer()), + NonIntArg1 = t_subtract(Arg1, t_integer()), + NonIntArg2 = t_subtract(Arg2, t_integer()), + ?ineq_debug("nonintargs", [NonIntArg1,NonIntArg2]), + case t_is_none(IntArg1) or t_is_none(IntArg2) of + true -> + ?ineq_debug("one is none", [IntArg1,IntArg2]), + [{TrueLab, Info}, {FalseLab, Info}]; + false -> + case Op of + '>=' -> + {FalseArg1, FalseArg2, TrueArg1, TrueArg2} = + integer_range_less_then_propagator(IntArg1, IntArg2); + '>' -> + {TrueArg2, TrueArg1, FalseArg2, FalseArg1} = + integer_range_less_then_propagator(IntArg2, IntArg1); + '<' -> + {TrueArg1, TrueArg2, FalseArg1, FalseArg2} = + integer_range_less_then_propagator(IntArg1, IntArg2); + '=<' -> + {FalseArg2, FalseArg1, TrueArg2, TrueArg1} = + integer_range_less_then_propagator(IntArg2, IntArg1) + end, + ?ineq_debug("int res", [TrueArg1, TrueArg2, FalseArg1, FalseArg2]), + False = {FalseLab, enter(A1, t_sup(FalseArg1, NonIntArg1), + enter(A2, t_sup(FalseArg2, NonIntArg2), Info))}, + True = {TrueLab, enter(A1, t_sup(TrueArg1, NonIntArg1), + enter(A2, t_sup(TrueArg2, NonIntArg2), Info))}, + [True, False] + end. + +integer_range_less_then_propagator(IntArg1, IntArg2) -> + Min1 = number_min(IntArg1), + Max1 = number_max(IntArg1), + Min2 = number_min(IntArg2), + Max2 = number_max(IntArg2), + %% is this the same as erl_types:t_subtract?? no ... ?? + TrueMax1 = min(Max1, erl_bif_types:infinity_add(Max2, -1)), + TrueMin2 = max(erl_bif_types:infinity_add(Min1, 1), Min2), + FalseMin1 = max(Min1, Min2), + FalseMax2 = min(Max1, Max2), + {t_from_range(Min1, TrueMax1), + t_from_range(TrueMin2, Max2), + t_from_range(FalseMin1, Max1), + t_from_range(Min2, FalseMax2)}. + +do_type(I, Info) -> + case hipe_icode:args(I) of + [Var] -> do_type(I, Info, Var); + [Var1,Var2] -> do_type2(I, Info, Var1, Var2) + end. + +do_type2(I, Info, FunVar, ArityVar) -> % function2(Fun,Arity) + %% Just for sanity. + function2 = hipe_icode:type_test(I), + FunType = lookup(FunVar, Info), + ArityType = lookup(ArityVar, Info), + TrueLab = hipe_icode:type_true_label(I), + FalseLab = hipe_icode:type_false_label(I), + SuccType1 = t_inf(t_fun(), FunType), + case combine_test(test_type(function, FunType), + test_type(integer, ArityType)) of + true -> + case t_number_vals(ArityType) of + [Arity] -> + case t_fun_arity(SuccType1) of + unknown -> + SuccType = t_inf(t_fun(Arity,t_any()),FunType), + [{TrueLab, enter(FunVar, SuccType, Info)}, + {FalseLab, Info}]; + Arity when is_integer(Arity) -> + FalseType = t_subtract(FunType, t_fun(Arity, t_any())), + [{TrueLab, enter(FunVar, SuccType1, Info)}, + {FalseLab, enter(FunVar, FalseType, Info)}] + end; + _ -> + case t_fun_arity(SuccType1) of + unknown -> + [{TrueLab, enter(FunVar,SuccType1,Info)}, + {FalseLab, Info}]; + Arity when is_integer(Arity) -> + T = t_from_term(Arity), + NewInfo = enter(ArityVar, T, Info), + [{TrueLab, enter(FunVar, SuccType1, NewInfo)}, + {FalseLab, enter(ArityVar, t_subtract(T, ArityType), Info)}] + end + end; + false -> + [{FalseLab, Info}]; + maybe -> + GenTrueArity = t_inf(t_integer(), ArityType), + GenTrueFun = t_inf(t_fun(), FunType), + case {t_number_vals(GenTrueArity), t_fun_arity(GenTrueFun)} of + {unknown, unknown} -> + TrueInfo = enter_list([FunVar, ArityVar], + [GenTrueFun, GenTrueArity], Info), + [{TrueLab, TrueInfo}, {FalseLab, Info}]; + {unknown, Arity} when is_integer(Arity) -> + TrueInfo = enter_list([FunVar, ArityVar], + [GenTrueFun, t_integer(Arity)], Info), + [{TrueLab, TrueInfo}, {FalseLab, Info}]; + {[Val], unknown} when is_integer(Val) -> + TrueInfo = enter_list([FunVar, ArityVar], + [t_inf(GenTrueFun, t_fun(Val, t_any())), + GenTrueArity], Info), + [{TrueLab, TrueInfo}, {FalseLab, Info}]; + {Vals, unknown} when is_list(Vals) -> + %% The function type gets widened when we have more than one arity. + TrueInfo = enter_list([FunVar, ArityVar], + [GenTrueFun, GenTrueArity], Info), + [{TrueLab, TrueInfo}, {FalseLab, Info}]; + {Vals, Arity} when is_list(Vals), is_integer(Arity) -> + case lists:member(Arity, Vals) of + false -> + [{FalseLab, Info}]; + true -> + TrueInfo = enter_list([FunVar, ArityVar], + [GenTrueFun, t_integer(Arity)], Info), + [{TrueLab, TrueInfo}, {FalseLab, Info}] + end + end + end. + +combine_test(true, true) -> true; +combine_test(false, _) -> false; +combine_test(_, false) -> false; +combine_test(_, _) -> maybe. + +do_type(I, Info, Var) -> + TrueLab = hipe_icode:type_true_label(I), + FalseLab = hipe_icode:type_false_label(I), + None = t_none(), + + case lookup(Var, Info) of + None -> + [{TrueLab, Info}, {FalseLab, Info}]; + VarInfo -> + case hipe_icode:type_test(I) of + cons -> + test_cons_or_nil(t_cons(), Var, VarInfo, TrueLab, FalseLab, Info); + nil -> + test_cons_or_nil(t_nil(), Var, VarInfo, TrueLab, FalseLab, Info); + {atom, A} = Test -> + test_number_or_atom(fun(X) -> t_atom(X) end, + A, Var, VarInfo, Test, TrueLab, FalseLab, Info); + {integer, N} = Test -> + test_number_or_atom(fun(X) -> t_number(X) end, + N, Var, VarInfo, Test, TrueLab, FalseLab, Info); + {record, Atom, Size} -> + test_record(Atom, Size, Var, VarInfo, TrueLab, FalseLab, Info); + Other -> + case t_is_any(VarInfo) of + true -> + TrueType = t_inf(true_branch_info(Other), VarInfo), + TrueInfo = enter(Var, TrueType, Info), + [{TrueLab, TrueInfo}, {FalseLab, Info}]; + false -> + case test_type(Other, VarInfo) of + true -> + [{TrueLab, Info}]; + false -> + [{FalseLab, Info}]; + maybe -> + TrueType = t_inf(true_branch_info(Other), VarInfo), + TrueInfo = enter(Var, TrueType, Info), + FalseType = t_subtract(VarInfo, TrueType), + FalseInfo = enter(Var, FalseType, Info), + [{TrueLab, TrueInfo}, {FalseLab, FalseInfo}] + end + end + end + end. + +do_switch_tuple_arity(I, Info) -> + Var = hipe_icode:switch_tuple_arity_term(I), + VarType = lookup(Var, Info), + Cases = hipe_icode:switch_tuple_arity_cases(I), + FailLabel = hipe_icode:switch_tuple_arity_fail_label(I), + case legal_switch_tuple_arity_cases(Cases, VarType) of + [] -> + [{FailLabel, Info}]; + LegalCases -> + {Fail, UpdateInfo} = + switch_tuple_arity_update_info(LegalCases, Var, VarType, + FailLabel, VarType, Info, []), + case switch_tuple_arity_can_fail(LegalCases, VarType) of + true -> [Fail|UpdateInfo]; + false -> UpdateInfo + end + end. + +legal_switch_tuple_arity_cases(Cases, Type) -> + case t_is_tuple(Type) of + false -> + Inf = t_inf(t_tuple(), Type), + case t_is_tuple(Inf) of + true -> legal_switch_tuple_arity_cases_1(Cases, Inf); + false -> [] + end; + true -> + legal_switch_tuple_arity_cases_1(Cases, Type) + end. + +legal_switch_tuple_arity_cases_1(Cases, Type) -> + case t_tuple_sizes(Type) of + unknown -> + Cases; + TupleSizes -> + [Case || {Size, _Label} = Case <- Cases, + lists:member(hipe_icode:const_value(Size), TupleSizes)] + end. + +switch_tuple_arity_can_fail(LegalCases, ArgType) -> + case t_is_tuple(ArgType) of + false -> true; + true -> + case t_tuple_sizes(ArgType) of + unknown -> true; + Sizes1 -> + Sizes2 = [hipe_icode:const_value(X) || {X, _} <- LegalCases], + Set1 = sets:from_list(Sizes1), + Set2 = sets:from_list(Sizes2), + not sets:is_subset(Set1, Set2) + end + end. + +switch_tuple_arity_update_info([{Arity, Label}|Left], Var, TupleType, + FailLabel, FailType, Info, Acc) -> + Inf = t_inf(TupleType, t_tuple(hipe_icode:const_value(Arity))), + NewInfo = enter(Var, Inf, Info), + NewFailType = t_subtract(FailType, Inf), + switch_tuple_arity_update_info(Left, Var, TupleType, FailLabel, NewFailType, + Info, [{Label, NewInfo}|Acc]); +switch_tuple_arity_update_info([], Var, _TupleType, + FailLabel, FailType, Info, Acc) -> + {{FailLabel, enter(Var, FailType, Info)}, Acc}. + +do_switch_val(I, Info) -> + Var = hipe_icode:switch_val_term(I), + VarType = lookup(Var, Info), + Cases = hipe_icode:switch_val_cases(I), + FailLabel = hipe_icode:switch_val_fail_label(I), + case legal_switch_val_cases(Cases, VarType) of + [] -> + [{FailLabel, Info}]; + LegalCases -> + switch_val_update_info(LegalCases, Var, VarType, + FailLabel, VarType, Info, []) + end. + +legal_switch_val_cases(Cases, Type) -> + legal_switch_val_cases(Cases, Type, []). + +legal_switch_val_cases([{Val, _Label} = VL|Left], Type, Acc) -> + ConstType = t_from_term(hipe_icode:const_value(Val)), + case t_is_subtype(ConstType, Type) of + true -> + legal_switch_val_cases(Left, Type, [VL|Acc]); + false -> + legal_switch_val_cases(Left, Type, Acc) + end; +legal_switch_val_cases([], _Type, Acc) -> + lists:reverse(Acc). + +switch_val_update_info([{Const, Label}|Left], Arg, ArgType, + FailLabel, FailType, Info, Acc) -> + TrueType = t_from_term(hipe_icode:const_value(Const)), + NewInfo = enter(Arg, TrueType, Info), + NewFailType = t_subtract(FailType, TrueType), + switch_val_update_info(Left, Arg, ArgType, FailLabel, NewFailType, + Info, [{Label, NewInfo}|Acc]); +switch_val_update_info([], Arg, _ArgType, FailLabel, FailType,Info, Acc) -> + [{FailLabel, enter(Arg, FailType, Info)}|Acc]. + +test_cons_or_nil(Type, Var, VarInfo, TrueLab, FalseLab, Info) -> + case t_is_any(VarInfo) of + true -> + [{TrueLab, enter(Var, Type, Info)}, + {FalseLab, Info}]; + false -> + TrueType = t_inf(VarInfo, Type), + FalseType = t_subtract(VarInfo, TrueType), + case t_is_none(FalseType) of + true -> + [{TrueLab, Info}]; + false -> + case t_is_none(TrueType) of + true -> + [{FalseLab, Info}]; + false -> + [{TrueLab, enter(Var, TrueType, Info)}, + {FalseLab, enter(Var, FalseType, Info)}] + end + end + end. + +test_number_or_atom(Fun, X, Var, VarInfo, TypeTest, + TrueLab, FalseLab, Info) -> + case t_is_any(VarInfo) of + true -> + [{TrueLab, enter(Var, Fun(X), Info)}, + {FalseLab, Info}]; + false -> + case test_type(TypeTest, VarInfo) of + false -> + [{FalseLab, Info}]; + true -> + [{TrueLab, Info}]; + maybe -> + FalseType = t_subtract(VarInfo, Fun(X)), + [{TrueLab, enter(Var, Fun(X), Info)}, + {FalseLab, enter(Var, FalseType, Info)}] + end + end. + +test_record(Atom, Size, Var, VarInfo, TrueLab, FalseLab, Info) -> + AnyList = lists:duplicate(Size - 1, t_any()), + RecordType = t_tuple([t_atom(Atom)|AnyList]), + Inf = t_inf(RecordType, VarInfo), + case t_is_none(Inf) of + true -> + [{FalseLab, Info}]; + false -> + Sub = t_subtract(VarInfo, Inf), + case t_is_none(Sub) of + true -> + [{TrueLab, enter(Var, Inf, Info)}]; + false -> + [{TrueLab, enter(Var, Inf, Info)}, + {FalseLab, enter(Var, Sub, Info)}] + end + end. + +test_type(Test, Type) -> + %%io:format("Test is: ~w\n", [Test]), + %%io:format("Type is: ~s\n", [format_type(Type)]), + Ans = + case t_is_any(Type) of + true -> maybe; + false -> + TrueTest = true_branch_info(Test), + Inf = t_inf(TrueTest, Type), + %%io:format("TrueTest is: ~s\n", [format_type(TrueTest)]), + %%io:format("Inf is: ~s\n", [format_type(Inf)]), + case t_is_equal(Type, Inf) of + true -> + not t_is_none(Type); + false -> + case t_is_equal(TrueTest, Inf) of + true -> + case test_type0(Test, Type) of + false -> + maybe; + true -> + true; + maybe -> + maybe + end; + false -> + case test_type0(Test, Inf) of + true -> + maybe; + false -> + false; + maybe -> + maybe + end + end + end + end, + %% io:format("Result is: ~s\n\n", [Ans]), + Ans. + +test_type0(integer, T) -> + t_is_integer(T); +test_type0({integer, N}, T) -> + case t_is_integer(T) of + true -> + case t_number_vals(T) of + unknown -> maybe; + [N] -> true; + List when is_list(List) -> + case lists:member(N, List) of + true -> maybe; + false -> false + end + end; + false -> false + end; +test_type0(float, T) -> + t_is_float(T); +test_type0(number, T) -> + t_is_number(T); +test_type0(atom, T) -> + t_is_atom(T); +test_type0({atom, A}, T) -> + case t_is_atom(T) of + true -> + case t_atom_vals(T) of + unknown -> maybe; + [A] -> true; + List when is_list(List) -> + case lists:member(A, List) of + true -> maybe; + false -> false + end + end; + false -> false + end; +test_type0(tuple, T) -> + t_is_tuple(T); +test_type0({tuple, N}, T) -> + case t_is_tuple(T) of + true -> + case t_tuple_sizes(T) of + unknown -> maybe; + [X] when is_integer(X) -> X =:= N; + List when is_list(List) -> + case lists:member(N, List) of + true -> maybe; + false -> false + end + end; + false -> false + end; +test_type0(pid, T) -> + t_is_pid(T); +test_type0(port, T) -> + t_is_port(T); +test_type0(binary, T) -> + t_is_binary(T); +test_type0(bitstr, T) -> + t_is_bitstr(T); +test_type0(reference, T) -> + t_is_reference(T); +test_type0(function, T) -> + t_is_fun(T); +test_type0(boolean, T) -> + t_is_boolean(T); +test_type0(list, T) -> + t_is_maybe_improper_list(T); +test_type0(cons, T) -> + t_is_cons(T); +test_type0(nil, T) -> + t_is_nil(T); +test_type0(constant, T) -> + t_is_constant(T). + + +true_branch_info(integer) -> + t_integer(); +true_branch_info({integer, N}) -> + t_integer(N); +true_branch_info(float) -> + t_float(); +true_branch_info(number) -> + t_number(); +true_branch_info(atom) -> + t_atom(); +true_branch_info({atom, A}) -> + t_atom(A); +true_branch_info(list) -> + t_maybe_improper_list(); +true_branch_info(tuple) -> + t_tuple(); +true_branch_info({tuple, N}) -> + t_tuple(N); +true_branch_info(pid) -> + t_pid(); +true_branch_info(port) -> + t_port(); +true_branch_info(binary) -> + t_binary(); +true_branch_info(bitstr) -> + t_bitstr(); +true_branch_info(reference) -> + t_reference(); +true_branch_info(function) -> + t_fun(); +true_branch_info(cons) -> + t_cons(); +true_branch_info(nil) -> + t_nil(); +true_branch_info(boolean) -> + t_boolean(); +true_branch_info(constant) -> + t_constant(); +true_branch_info(T) -> + exit({?MODULE,unknown_typetest,T}). + + +%% _________________________________________________________________ +%% +%% Remove the redundant type tests. If a test is removed, the trace +%% that isn't taken is explicitly removed from the CFG to simpilify +%% the handling of Phi nodes. If a Phi node is left and at least one +%% branch into it has disappeared, the SSA propagation pass can't +%% handle it. +%% +%% If the CFG has changed at the end of this pass, the analysis is +%% done again since we might be able to find more information because +%% of the simplification of the CFG. +%% + +simplify_controlflow(State) -> + Cfg = state__cfg(State), + simplify_controlflow(hipe_icode_cfg:reverse_postorder(Cfg), State). + +simplify_controlflow([Label|Left], State) -> + Info = state__info_out(State, Label), + NewState = + case state__bb(State, Label) of + not_found -> State; + BB -> + I = hipe_bb:last(BB), + case I of + #icode_if{} -> + rewrite_if(State,I,BB,Info,Label); + #icode_type{} -> + rewrite_type(State,I,BB,Info,Label); + #icode_switch_tuple_arity{} -> + rewrite_switch_tuple_arity(State,I,BB,Info,Label); + #icode_switch_val{} -> + rewrite_switch_val(State,I,BB,Info,Label); + #icode_call{} -> + rewrite_call(State,I,BB,Info,Label); + _ -> + State + end + end, + simplify_controlflow(Left, NewState); +simplify_controlflow([], State) -> + State. + +rewrite_if(State, I, BB, Info, Label) -> + case do_if(I, Info) of + [{Lab, _}] -> + mk_goto(State, BB, Label, Lab); + [_,_] -> + State + end. + +rewrite_type(State, I, BB, Info, Label) -> + FalseLab = hipe_icode:type_false_label(I), + case hipe_icode:type_true_label(I) of + FalseLab -> + %% true label = false label, this can occur! + mk_goto(State, BB, Label, FalseLab); + TrueLab -> + case do_type(I, Info) of + [{TrueLab, _}] -> + mk_goto(State, BB, Label, TrueLab); + [{FalseLab, _}] -> + mk_goto(State, BB, Label, FalseLab); + [_,_] -> %% Maybe + State + end + end. + +rewrite_switch_tuple_arity(State, I, BB, Info, Label) -> + Cases = hipe_icode:switch_tuple_arity_cases(I), + Var = hipe_icode:switch_tuple_arity_term(I), + Type = safe_lookup(Var, Info), + case legal_switch_tuple_arity_cases(Cases, Type) of + [] -> + Fail = hipe_icode:switch_tuple_arity_fail_label(I), + mk_goto(State, BB, Label, Fail); + Cases -> + %% Nothing changed. + case switch_tuple_arity_can_fail(Cases, Type) of + true -> State; + false -> + NewCases = butlast(Cases), + {_Arity, NewFail} = lists:last(Cases), + TmpI = + hipe_icode:switch_tuple_arity_fail_label_update(I, NewFail), + NewI = + hipe_icode:switch_tuple_arity_cases_update(TmpI, NewCases), + NewBB = hipe_bb:code_update(BB, hipe_bb:butlast(BB) ++ [NewI]), + state__bb_add(State, Label, NewBB) + end; + LegalCases -> + NewI = + case switch_tuple_arity_can_fail(LegalCases, Type) of + true -> + hipe_icode:switch_tuple_arity_cases_update(I, LegalCases); + false -> + NewCases = butlast(LegalCases), + {_Arity, NewFail} = lists:last(LegalCases), + TmpI = + hipe_icode:switch_tuple_arity_cases_update(I, NewCases), + hipe_icode:switch_tuple_arity_fail_label_update(TmpI, NewFail) + end, + NewBB = hipe_bb:code_update(BB, hipe_bb:butlast(BB) ++ [NewI]), + state__bb_add(State, Label, NewBB) + end. + +rewrite_switch_val(State, I, BB, Info, Label) -> + Cases = hipe_icode:switch_val_cases(I), + Var = hipe_icode:switch_val_term(I), + VarType = safe_lookup(Var, Info), + case legal_switch_val_cases(Cases, VarType) of + [] -> + Fail = hipe_icode:switch_val_fail_label(I), + mk_goto(State, BB, Label, Fail); + Cases -> + State; + %% TODO: Find out whether switch_val can fail + %% just as switch_tuple_arity + LegalCases -> + NewI = hipe_icode:switch_val_cases_update(I, LegalCases), + NewBB = hipe_bb:code_update(BB, hipe_bb:butlast(BB) ++ [NewI]), + state__bb_add(State, Label, NewBB) + end. + +rewrite_call(State,I,BB,Info,Label) -> + case call_always_fails(I, Info) of + false -> + Fun = hipe_icode:call_fun(I), + case hipe_icode_primops:fails(Fun) of + false -> + case hipe_icode:call_fail_label(I) of + [] -> State; + _ -> unset_fail(State, BB, Label, I) + end; + true -> State + end; + true -> + case hipe_icode:call_in_guard(I) of + false -> State; + true -> + FailLabel = hipe_icode:call_fail_label(I), + mk_goto(State, BB, Label, FailLabel) + end + end. + +mk_goto(State, BB, Label, Succ) -> + NewI = hipe_icode:mk_goto(Succ), + NewBB = hipe_bb:code_update(BB, hipe_bb:butlast(BB) ++ [NewI]), + state__bb_add(State, Label, NewBB). + +unset_fail(State, BB, Label, I) -> + %%io:format("Setting a guard that cannot fail\n", []), + NewI = hipe_icode:call_set_fail_label(I, []), + NewBB = hipe_bb:code_update(BB, hipe_bb:butlast(BB) ++ [NewI]), + state__bb_add(State, Label, NewBB). + +%% _________________________________________________________________ +%% +%% Make transformations (specialisations) based on the type knowledge. +%% +%% Annotate the variables with the local information. Since we have +%% the code in SSA form and the type information can only depend on +%% assignments or branches (type tests), we can use the information +%% out of the block to annotate all variables in it. +%% + +-spec specialize(cfg()) -> cfg(). + +specialize(Cfg) -> + Labels = hipe_icode_cfg:reverse_postorder(Cfg), + transform_bbs(Labels, Cfg). + +transform_bbs([Label|Left], Cfg) -> + BB = hipe_icode_cfg:bb(Cfg, Label), + Code = hipe_bb:code(BB), + NewCode = make_transformations(Code), + NewBB = hipe_bb:code_update(BB, NewCode), + NewCfg = hipe_icode_cfg:bb_add(Cfg, Label, NewBB), + transform_bbs(Left, NewCfg); +transform_bbs([], Cfg) -> + Cfg. + +make_transformations(Is) -> + lists:flatten([transform_insn(I) || I <- Is]). + +transform_insn(I) -> + case I of + #icode_call{} -> + handle_call_and_enter(I); + #icode_enter{} -> + handle_call_and_enter(I); + #icode_if{} -> + CurrentIfOp = hipe_icode:if_op(I), + UsesFixnums = all_fixnums([get_type(A) || A <- hipe_icode:args(I)]), + AnyImmediate = any_immediate([get_type(A) || A <- hipe_icode:args(I)]), + ExactComp = is_exact_comp(CurrentIfOp), + if UsesFixnums -> + hipe_icode:if_op_update(I, fixnum_ifop(CurrentIfOp)); + AnyImmediate andalso ExactComp -> + hipe_icode:if_op_update(I, fixnum_ifop(CurrentIfOp)); + true -> + I + end; + _ -> + I + end. + +handle_call_and_enter(I) -> + case call_or_enter_fun(I) of + #element{} -> + transform_insn(update_call_or_enter(I, {erlang, element, 2})); + {erlang, element, 2} -> + NewI1 = transform_element2(I), + case is_record(I, icode_call) andalso hipe_icode:call_in_guard(I) of + true -> + case hipe_icode:call_fun(NewI1) of + #unsafe_element{} -> NewI1; + _ -> I + end; + false -> + NewI1 + end; + {erlang, hd, 1} -> transform_hd_or_tl(I, unsafe_hd); + {erlang, tl, 1} -> transform_hd_or_tl(I, unsafe_tl); + {hipe_bs_primop, BsOP} -> + NewBsOp = + bit_opts(BsOP, get_type_list(hipe_icode:args(I))), + update_call_or_enter(I, {hipe_bs_primop, NewBsOp}); + conv_to_float -> + [Src] = hipe_icode:args(I), + case t_is_float(get_type(Src)) of + true -> + update_call_or_enter(I, unsafe_untag_float); + false -> + I + end; + FunName -> + case is_arith_function(FunName) of + true -> + case strength_reduce(I, FunName) of + NewIs when is_list(NewIs) -> + [pos_transform_arith(NewI) || NewI <- NewIs]; + NewI -> + pos_transform_arith(NewI) + end; + false -> + I + end + end. + +pos_transform_arith(I) -> + case hipe_icode:is_enter(I) orelse hipe_icode:is_call(I) of + true -> + FunName = call_or_enter_fun(I), + transform_arith(I, FunName); + false -> + I + end. + +is_arith_function(Name) -> + case Name of + 'band' -> true; + 'bor' -> true; + 'bxor' -> true; + 'bnot' -> true; + 'bsl' -> true; + 'bsr' -> true; + '+' -> true; + '-' -> true; + '*' -> true; + 'div' -> true; + 'rem' -> true; + _ -> false + end. + +%%--------------------------------------------------------------------- +%% Perform a limited form of strength reduction for multiplication and +%% division of an integer with constants which are multiples of 2. +%%--------------------------------------------------------------------- + +strength_reduce(I, Op) -> + case Op of + '*' -> + [Arg1, Arg2] = mult_args_const_second(I), + ArgT1 = get_type(Arg1), + case t_is_integer(ArgT1) of + true -> + case hipe_icode:is_const(Arg2) of + true -> + case hipe_icode:const_value(Arg2) of + 0 -> case call_dstlist(I) of + [] -> remove_useless_arithmetic_instruction(I); + [Dst] -> create_strength_reduce_move(I, Dst, Arg2) + end; + 1 -> case call_dstlist(I) of + [] -> remove_useless_arithmetic_instruction(I); + [Dst] -> create_strength_reduce_move(I, Dst, Arg1) + end; + 2 -> strength_reduce_imult(I, Arg1, 1); + 4 -> strength_reduce_imult(I, Arg1, 2); + 8 -> strength_reduce_imult(I, Arg1, 3); + 16 -> strength_reduce_imult(I, Arg1, 4); + 32 -> strength_reduce_imult(I, Arg1, 5); + 64 -> strength_reduce_imult(I, Arg1, 6); + 128 -> strength_reduce_imult(I, Arg1, 7); + 256 -> strength_reduce_imult(I, Arg1, 8); + ___ -> I + end; + false -> I + end; + false -> I + end; + 'div' -> + [Arg1, Arg2] = hipe_icode:args(I), + ArgT1 = get_type(Arg1), + case t_is_non_neg_integer(ArgT1) of + true -> %% the optimization is NOT valid for negative integers + case hipe_icode:is_const(Arg2) of + true -> + case hipe_icode:const_value(Arg2) of + 0 -> io:fwrite("Integer division by 0 detected!\n"), I; + 1 -> case call_dstlist(I) of + [] -> remove_useless_arithmetic_instruction(I); + [Dst] -> create_strength_reduce_move(I, Dst, Arg1) + end; + 2 -> strength_reduce_div(I, Arg1, 1); + 4 -> strength_reduce_div(I, Arg1, 2); + 8 -> strength_reduce_div(I, Arg1, 3); + 16 -> strength_reduce_div(I, Arg1, 4); + 32 -> strength_reduce_div(I, Arg1, 5); + 64 -> strength_reduce_div(I, Arg1, 6); + 128 -> strength_reduce_div(I, Arg1, 7); + 256 -> strength_reduce_div(I, Arg1, 8); + ___ -> I + end; + false -> I + end; + false -> I + end; + 'rem' -> + [Arg1, Arg2] = hipe_icode:args(I), + ArgT1 = get_type(Arg1), + case t_is_non_neg_integer(ArgT1) of + true -> %% the optimization is NOT valid for negative integers + case hipe_icode:is_const(Arg2) of + true -> + case hipe_icode:const_value(Arg2) of + 0 -> io:fwrite("Remainder with 0 detected!\n"), I; + 1 -> case call_dstlist(I) of + [] -> remove_useless_arithmetic_instruction(I); + [Dst] -> create_strength_reduce_move( + I, Dst, hipe_icode:mk_const(0)) + end; + 2 -> strength_reduce_rem(I, Arg1, 1); + 4 -> strength_reduce_rem(I, Arg1, 3); + 8 -> strength_reduce_rem(I, Arg1, 7); + 16 -> strength_reduce_rem(I, Arg1, 15); + 32 -> strength_reduce_rem(I, Arg1, 31); + 64 -> strength_reduce_rem(I, Arg1, 63); + 128 -> strength_reduce_rem(I, Arg1, 127); + 256 -> strength_reduce_rem(I, Arg1, 255); + ___ -> I + end; + false -> I + end; + false -> I + end; + _ -> I + end. + +remove_useless_arithmetic_instruction(_) -> + []. + +create_strength_reduce_move(I, Dst, Val) -> + case hipe_icode:call_continuation(I) of + [] -> + hipe_icode:mk_move(Dst, Val); + Lbl -> + [hipe_icode:mk_move(Dst, Val), + hipe_icode:mk_goto(Lbl)] + end. + +%% Puts the args of a multiplication in a form where the constant +%% (if present) is always the second argument. +mult_args_const_second(I) -> + [Arg1, Arg2] = Args = hipe_icode:args(I), + case hipe_icode:is_const(Arg1) of + true -> [Arg2, Arg1]; + false -> Args + end. + +%% In all three functions below: +%% - Arg1 is a variable of integer type +%% - N is a small positive integer that will be used in a bit shift operation +strength_reduce_imult(I, Arg1, N) -> + case t_number_vals(get_type(Arg1)) of + [X] when is_integer(X) -> + %% io:format("Multiplication with constant arguments:\n ~w\n", [I]), + case call_dstlist(I) of + [] -> remove_useless_arithmetic_instruction(I); + [D] -> create_strength_reduce_move(I, D, hipe_icode:mk_const(X bsl N)) + end; + _ -> + update_call_or_enter(I, 'bsl', [Arg1, hipe_icode:mk_const(N)]) + end. + +strength_reduce_div(I, Arg1, N) -> + case t_number_vals(get_type(Arg1)) of + [X] when is_integer(X) -> + %% io:format("Division with constant arguments:\n ~w\n", [I]), + case call_dstlist(I) of + [] -> remove_useless_arithmetic_instruction(I); + [D] -> create_strength_reduce_move(I, D, hipe_icode:mk_const(X bsr N)) + end; + _ -> + update_call_or_enter(I, 'bsr', [Arg1, hipe_icode:mk_const(N)]) + end. + +strength_reduce_rem(I, Arg1, N) -> + case t_number_vals(get_type(Arg1)) of + [X] when is_integer(X) -> + %% io:format("Remainder with constant arguments:\n ~w\n", [I]), + case call_dstlist(I) of + [] -> remove_useless_arithmetic_instruction(I); + [D] -> create_strength_reduce_move(I, D, hipe_icode:mk_const(X band N)) + end; + _ -> + update_call_or_enter(I, 'band', [Arg1, hipe_icode:mk_const(N)]) + end. + +%%--------------------------------------------------------------------- + +call_or_enter_fun(I) -> + case hipe_icode:is_call(I) of + true -> hipe_icode:call_fun(I); + false -> hipe_icode:enter_fun(I) + end. + +update_call_or_enter(I, NewFun) -> + case hipe_icode:is_call(I) of + true -> + case hipe_icode_primops:fails(NewFun) of + false -> + NewI = hipe_icode:call_fun_update(I, NewFun), + hipe_icode:call_set_fail_label(NewI, []); + true -> + hipe_icode:call_fun_update(I, NewFun) + end; + false -> hipe_icode:enter_fun_update(I, NewFun) + end. + +update_call_or_enter(I, NewFun, NewArgs) -> + case hipe_icode:is_call(I) of + true -> + I1 = hipe_icode:call_args_update(I, NewArgs), + hipe_icode:call_fun_update(I1, NewFun); + false -> + I1 = hipe_icode:enter_args_update(I, NewArgs), + hipe_icode:enter_fun_update(I1, NewFun) + end. + +transform_element2(I) -> + [Index, Tuple] = hipe_icode:args(I), + IndexType = get_type(Index), + TupleType = get_type(Tuple), + ?debug("Tuple", TupleType), + NewIndex = + case test_type(integer, IndexType) of + true -> + case t_number_vals(IndexType) of + unknown -> unknown; + [_|_] = Vals -> {number, Vals} + end; + _ -> unknown + end, + MinSize = + case test_type(tuple, TupleType) of + true -> + ?debug("is tuple", TupleType), + case t_tuple_sizes(TupleType) of + unknown -> unknown; + Sizes -> {tuple, lists:min(Sizes)} + end; + _ -> unknown + end, + case {NewIndex, MinSize} of + {{number, [_|_] = Ns}, {tuple, A}} when is_integer(A) -> + case lists:all(fun(X) -> 0 < X andalso X =< A end, Ns) of + true -> + case Ns of + [Idx] -> + [_, Tuple] = hipe_icode:args(I), + update_call_or_enter(I, #unsafe_element{index = Idx}, [Tuple]); + [_|_] -> + NewFun = {element, [MinSize, valid]}, + update_call_or_enter(I, NewFun) + end; + false -> + case lists:all(fun(X) -> hipe_tagscheme:is_fixnum(X) end, Ns) of + true -> + NewFun = {element, [MinSize, fixnums]}, + update_call_or_enter(I, NewFun); + false -> + NewFun = {element, [MinSize, unknown]}, + update_call_or_enter(I, NewFun) + end + end; + _ when (NewIndex =:= unknown) orelse (MinSize =:= unknown) -> + case t_is_fixnum(IndexType) of + true -> + NewFun = {element, [MinSize, fixnums]}, + update_call_or_enter(I, NewFun); + false -> + NewFun = {element, [MinSize, NewIndex]}, + update_call_or_enter(I, NewFun) + end + end. + +transform_hd_or_tl(I, Primop) -> + [Arg] = hipe_icode:args(I), + case t_is_cons(get_type(Arg)) of + true -> update_call_or_enter(I, Primop); + false -> I + end. + +transform_arith(I, Op) -> + ArgTypes = get_type_list(hipe_icode:args(I)), + %% io:format("Op = ~w, Args = ~w\n", [Op, ArgTypes]), + DstTypes = + case hipe_icode:is_call(I) of + true -> get_type_list(call_dstlist(I)); + false -> [erl_bif_types:type(erlang, Op, length(ArgTypes), ArgTypes)] + end, + case valid_unsafe_args(ArgTypes, Op) of + true -> + case all_is_fixnum(DstTypes) of + true -> + update_call_or_enter(I, arithop_to_extra_unsafe(Op)); + false -> + update_call_or_enter(I, arithop_to_unsafe(Op)) + end; + false -> + I + end. + +all_is_fixnum(Types) -> + lists:all(fun erl_types:t_is_fixnum/1, Types). + +valid_unsafe_args(Args, Op) -> + if Op =:= 'bnot' -> + [Arg] = Args, + t_is_fixnum(Arg); + true -> + [LeftArg, RightArg] = Args, + case Op of + 'bsl' -> t_is_fixnum(LeftArg) and t_is_bitwidth(RightArg); + 'bsr' -> t_is_fixnum(LeftArg) and t_is_bitwidth(RightArg); + _ -> t_is_fixnum(LeftArg) and t_is_fixnum(RightArg) + end + end. + +arithop_to_extra_unsafe(Op) -> + case Op of + '+' -> extra_unsafe_add; + '-' -> extra_unsafe_sub; + '*' -> '*'; %% XXX: Revise? + 'div' -> 'div'; %% XXX: Revise? + 'rem' -> 'rem'; %% XXX: Revise? + 'band' -> unsafe_band; + 'bor' -> unsafe_bor; + 'bxor' -> unsafe_bxor; + 'bnot' -> unsafe_bnot; + 'bsl' -> unsafe_bsl; + 'bsr' -> unsafe_bsr + end. + +arithop_to_unsafe(Op) -> + case Op of + '+' -> unsafe_add; + '-' -> unsafe_sub; + _ -> Op + end. + +fixnum_ifop(Op) -> + case Op of + '=:=' -> 'fixnum_eq'; + '=/=' -> 'fixnum_neq'; + '==' -> 'fixnum_eq'; + '/=' -> 'fixnum_neq'; + '>' -> 'fixnum_gt'; + '<' -> 'fixnum_lt'; + '>=' -> 'fixnum_ge'; + '=<' -> 'fixnum_le'; + Op -> Op + end. + +bit_opts({Name, Size, Flags} = I, [MSType]) when Name =:= bs_get_integer; + Name =:= bs_get_float; + Name =:= bs_get_binary -> + Bits = t_matchstate_present(MSType), + case t_is_bitstr(Bits) of + true -> + Base = t_bitstr_base(Bits), + if Base >= Size -> + {Name, Size, Flags bor 16}; + true -> I + end; + false -> I + end; +bit_opts({bs_get_binary_all, Size, Flags} = I, [MSType]) -> + Bits = t_matchstate_present(MSType), + case t_is_bitstr(Bits) of + true -> + Base = t_bitstr_base(Bits), + Unit = t_bitstr_unit(Bits), + if (Base rem Size) =:= 0 andalso (Unit rem Size) =:= 0 -> + {bs_get_binary_all, Size, Flags bor 16}; + true -> I + end; + false -> I + end; +bit_opts({bs_test_unit, Size} = I, [MSType]) -> + Bits = t_matchstate_present(MSType), + case t_is_bitstr(Bits) of + true -> + Base = t_bitstr_base(Bits), + Unit = t_bitstr_unit(Bits), + if (Base rem Size) =:= 0 andalso (Unit rem Size) =:= 0 -> + {bs_test_unit, 1}; + true -> I + end; + false -> I + end; +bit_opts({bs_put_integer, Size, Flags, ConstInfo} = I, [Src|_]) -> + case t_is_fixnum(Src) of + true -> + {unsafe_bs_put_integer, Size, Flags, ConstInfo}; + false -> I + end; +bit_opts({bs_start_match, Max} = I, [Src]) -> + case t_is_bitstr(Src) of + true -> {{bs_start_match, bitstr}, Max}; + false -> + MSorNone = t_inf(t_matchstate(), Src), + case t_is_matchstate(MSorNone) of + true -> + Slots = t_matchstate_slots(MSorNone), + case t_is_any(Slots) orelse (length(t_to_tlist(Slots)) =< Max) of + true -> I; + false -> {{bs_start_match, ok_matchstate}, Max} + end; + false -> I + end + end; +bit_opts(I, _) -> I. + +is_exact_comp(Op) -> + case Op of + '=:=' -> true; + '=/=' -> true; + _Op -> false + end. + +all_fixnums([Type|Types]) -> + t_is_fixnum(Type) andalso all_fixnums(Types); +all_fixnums([]) -> + true. + +any_immediate([Type|Types]) -> + t_is_fixnum(Type) orelse t_is_atom(Type) orelse any_immediate(Types); +any_immediate([]) -> false. + +get_standard_primop(unsafe_bsl) -> 'bsl'; +get_standard_primop(unsafe_bsr) -> 'bsr'; +get_standard_primop(unsafe_add) -> '+'; +get_standard_primop(extra_unsafe_add) -> '+'; +get_standard_primop(unsafe_bnot) -> 'bnot'; +get_standard_primop(unsafe_bxor) -> 'bxor'; +get_standard_primop(unsafe_band) -> 'band'; +get_standard_primop(unsafe_bor) -> 'bor'; +get_standard_primop(unsafe_sub) -> '-'; +get_standard_primop(extra_unsafe_sub) -> '-'; +get_standard_primop(Op) -> Op. + +primop_type(Op, Args) -> + case Op of + #mkfun{mfa = MFA} -> + t_inf(t_fun(), find_signature_mfa(MFA)); + _ -> + None = t_none(), + Primop = get_standard_primop(Op), + RetType = hipe_icode_primops:type(Primop, Args), + case RetType of + None -> + hipe_icode_primops:type(Primop, add_funs_to_arg_types(Args)); + Other -> + Other + end + end. + +%%------------------------------------------------------------------ +%% Various help functions. +%%------------------------------------------------------------------ + +add_arg_types(Args, Types) -> + add_arg_types(Args, Types, gb_trees:empty()). + +add_arg_types([Arg|Args], [Type|Types], Acc) -> + Type1 = + case t_is_none(Type) of + true -> t_any(); + false -> Type + end, + add_arg_types(Args,Types, enter(Arg, Type1, Acc)); +add_arg_types(_, [], Acc) -> + Acc. + +get_type_list(ArgList) -> + [get_type(Arg) || Arg <- ArgList]. + +get_type(Arg) -> + case hipe_icode:is_annotated_variable(Arg) of + true -> + None = t_none(), + case hipe_icode:variable_annotation(Arg) of + {type_anno, None, _} -> t_any(); + {type_anno, Type, _} -> Type + end; + false -> + case hipe_icode:is_const(Arg) of + true -> const_type(Arg); + false -> t_any() + end + end. + +%% Lookup treats anything that is neither in the map or a constant as +%% t_none(). Use this during type propagation! + +lookup(Var, Tree) -> + case gb_trees:lookup(Var, Tree) of + none -> + case hipe_icode:is_const(Var) of + true -> const_type(Var); + false -> t_none() + end; + {value, Type} -> + Type + end. + +lookup_list(List, Info) -> + lookup_list0(List, Info, []). + +lookup_list0([H|T], Info, Acc) -> + lookup_list0(T, Info, [lookup(H, Info)|Acc]); +lookup_list0([], _, Acc) -> + lists:reverse(Acc). + + +%% safe_lookup treats anything that is neither in the map nor a +%% constant as t_any(). Use this during transformations. + +safe_lookup(Var, Tree) -> + case gb_trees:lookup(Var, Tree) of + none -> + case hipe_icode:is_const(Var) of + true -> const_type(Var); + false -> + %% io:format("Expression has undefined type\n",[]), + t_any() + end; + {value, Type} -> + Type + end. + +safe_lookup_list(List, Info) -> + safe_lookup_list0(List, Info, []). + +safe_lookup_list0([H|T], Info, Acc) -> + safe_lookup_list0(T, Info, [safe_lookup(H, Info)|Acc]); +safe_lookup_list0([], _, Acc) -> + lists:reverse(Acc). + +enter_list([Var|VarLeft], [Type|TypeLeft], Info) -> + NewInfo = enter(Var, Type, Info), + enter_list(VarLeft, TypeLeft, NewInfo); +enter_list([], [], Info) -> + Info. + +enter([Key], Value, Tree) -> + enter(Key, Value, Tree); +enter(Key, Value, Tree) -> + case is_var_or_reg(Key) of + true -> + case t_is_none(Value) of + true -> + gb_trees:delete_any(Key, Tree); + false -> + gb_trees:enter(Key, Value, Tree) + end; + false -> + Tree + end. + +join_list(List, Info) -> + join_list(List, Info, t_none()). + +join_list([H|T], Info, Acc) -> + Type = t_sup(lookup(H, Info), Acc), + join_list(T, Info, Type); +join_list([], _, Acc) -> + Acc. + +join_info_in([], _OldInfo, _NewInfo) -> + %% No variables are live in. The information must be at a fixpoint. + fixpoint; +join_info_in(Vars, OldInfo, NewInfo) -> + NewInfo2 = join_info_in(Vars, Vars, OldInfo, NewInfo, gb_trees:empty()), + case info_is_equal(NewInfo2, OldInfo) of + true -> fixpoint; + false -> NewInfo2 + end. + +%% NOTE: Variables can be bound to other variables. Joining these is +%% only possible if the binding is the same from both traces and this +%% variable is still live. + +join_info_in([Var|Left], LiveIn, Info1, Info2, Acc) -> + Type1 = gb_trees:lookup(Var, Info1), + Type2 = gb_trees:lookup(Var, Info2), + case {Type1, Type2} of + {none, none} -> + join_info_in(Left, LiveIn, Info1, Info2, Acc); + {none, {value, Val}} -> + NewTree = gb_trees:insert(Var, Val, Acc), + join_info_in(Left, LiveIn, Info1, Info2, NewTree); + {{value, Val}, none} -> + NewTree = gb_trees:insert(Var, Val, Acc), + join_info_in(Left, LiveIn, Info1, Info2, NewTree); + {{value, Val1}, {value, Val2}} -> + NewTree = gb_trees:insert(Var, t_sup(Val1, Val2), Acc), + join_info_in(Left, LiveIn, Info1, Info2, NewTree) + end; +join_info_in([], _LiveIn, _Info1, _Info2, Acc) -> + Acc. + +info_is_equal(Info1, Info2) -> + compare(gb_trees:to_list(Info1), gb_trees:to_list(Info2)). + +compare([{Var, Type1}|Left1], [{Var, Type2}|Left2]) -> + t_is_equal(Type1, Type2) andalso compare(Left1, Left2); +compare([], []) -> + true; +compare(_, _) -> + false. + +const_type(Const) -> + t_from_term(hipe_icode:const_value(Const)). + +do_updates(State, List) -> + do_updates(State, List, []). + +do_updates(State, [{Label, Info}|Tail], Worklist) -> + case state__info_in_update(State, Label, Info) of + fixpoint -> + %% io:format("Info in for ~w is: fixpoint\n", [Label]), + do_updates(State, Tail, Worklist); + NewState -> + %% io:format("Info in for ~w is:\n", [Label]), + %% [io:format("~w: ~p\n", [X, format_type(Y)]) + %% || {X, Y} <- gb_trees:to_list(state__info_in(NewState, Label))], + do_updates(NewState, Tail, [Label|Worklist]) + end; +do_updates(State, [], Worklist) -> + {State, Worklist}. + +enter_defines(I, Type, Info) -> + case defines(I) of + [] -> Info; + [Def] -> + enter(Def, Type, Info); + Defs -> + Pairs = case t_is_any(Type) of + true -> + [{Def, t_any()} || Def <- Defs]; + false -> + case t_is_none(Type) of + true -> + [{Def, t_none()} || Def <- Defs]; + false -> + lists:zip(Defs, t_to_tlist(Type)) + end + end, + lists:foldl(fun({X, T}, Inf) -> enter(X, T, Inf) end, Info, Pairs) + end. + +defines(I) -> + keep_vars_and_regs(hipe_icode:defines(I)). + +call_dstlist(I) -> + hipe_icode:call_dstlist(I). + +uses(I) -> + keep_vars_and_regs(hipe_icode:uses(I)). + +keep_vars_and_regs(Vars) -> + [V || V <- Vars, is_var_or_reg(V)]. + +butlast([_]) -> + []; +butlast([H|T]) -> + [H|butlast(T)]. + +-spec any_is_none([erl_types:erl_type()]) -> boolean(). + +any_is_none(Types) -> + lists:any(fun (T) -> t_is_none(T) end, Types). + +is_var_or_reg(X) -> + hipe_icode:is_var(X) orelse hipe_icode:is_reg(X). + +%% _________________________________________________________________ +%% +%% Handling the state +%% + +new_state(Cfg, {MFA, GetCallFun, GetResFun, FinalAction}) -> + Start = hipe_icode_cfg:start_label(Cfg), + Params = hipe_icode_cfg:params(Cfg), + ParamTypes = GetCallFun(MFA, Cfg), + case any_is_none(ParamTypes) of + true -> + FinalAction(MFA, [t_none()]), + throw(no_input); + false -> + Info = add_arg_types(Params, ParamTypes), + InfoMap = gb_trees:insert({Start, in}, Info, gb_trees:empty()), + Liveness = hipe_icode_ssa:ssa_liveness__analyze(Cfg), + #state{info_map = InfoMap, cfg = Cfg, liveness = Liveness, + arg_types = ParamTypes, lookupfun = GetResFun, + resultaction = FinalAction} + end. + +state__cfg(#state{cfg = Cfg}) -> + Cfg. + +state__succ(#state{cfg = Cfg}, Label) -> + hipe_icode_cfg:succ(Cfg, Label). + +state__bb(#state{cfg = Cfg}, Label) -> + hipe_icode_cfg:bb(Cfg, Label). + +state__bb_add(S = #state{cfg = Cfg}, Label, BB) -> + NewCfg = hipe_icode_cfg:bb_add(Cfg, Label, BB), + S#state{cfg=NewCfg}. + +state__params_update(S = #state{cfg = Cfg}, NewParams) -> + NewCfg = hipe_icode_cfg:params_update(Cfg, NewParams), + S#state{cfg = NewCfg}. + +state__ret_type(#state{ret_type = RT}) -> RT. + +state__lookupfun(#state{lookupfun = LF}) -> LF. + +state__resultaction(#state{resultaction = RA}) -> RA. + +state__info_in(S, Label) -> + state__info(S, {Label, in}). + +state__info_out(S, Label) -> + state__info(S, {Label, out}). + +state__info(#state{info_map = IM}, Label) -> + case gb_trees:lookup(Label, IM) of + {value, Info} -> Info; + none -> gb_trees:empty() + end. + +state__ret_type_update(#state{ret_type = RT} = State, NewType) when + is_list(NewType) -> + TotType = lists:zipwith(fun erl_types:t_sup/2, RT, NewType), + State#state{ret_type = TotType}; +state__ret_type_update(#state{ret_type = RT} = State, NewType) -> + state__ret_type_update(State, [NewType || _ <- RT]). + +state__info_in_update(S=#state{info_map=IM, liveness=Liveness}, Label, Info) -> + LiveIn = hipe_icode_ssa:ssa_liveness__livein(Liveness, Label), + LabelIn = {Label, in}, + case gb_trees:lookup(LabelIn, IM) of + none -> + OldInfo = gb_trees:empty(), + case join_info_in(LiveIn, OldInfo, Info) of + fixpoint -> + %% If the BB has not been handled we ignore the fixpoint. + S#state{info_map = gb_trees:enter(LabelIn, OldInfo, IM)}; + NewInfo -> + S#state{info_map = gb_trees:enter(LabelIn, NewInfo, IM)} + end; + {value, OldInfo} -> + case join_info_in(LiveIn, OldInfo, Info) of + fixpoint -> + fixpoint; + NewInfo -> + S#state{info_map = gb_trees:enter(LabelIn, NewInfo, IM)} + end + end. + +state__info_out_update(#state{info_map = IM} = State, Label, Info) -> + State#state{info_map = gb_trees:enter({Label, out}, Info, IM)}. + +%% _________________________________________________________________ +%% +%% The worklist. +%% + +init_work(State) -> + %% Labels = hipe_icode_cfg:reverse_postorder(state__cfg(State)), + Labels = [hipe_icode_cfg:start_label(state__cfg(State))], + {Labels, [], gb_sets:from_list(Labels)}. + +get_work({[Label|Left], List, Set}) -> + NewWork = {Left, List, gb_sets:delete(Label, Set)}, + {Label, NewWork}; +get_work({[], [], _Set}) -> + fixpoint; +get_work({[], List, Set}) -> + get_work({lists:reverse(List), [], Set}). + +add_work(Work = {List1, List2, Set}, [Label|Left]) -> + case gb_sets:is_member(Label, Set) of + true -> + add_work(Work, Left); + false -> + %% io:format("Adding work: ~w\n", [Label]), + add_work({List1, [Label|List2], gb_sets:insert(Label, Set)}, Left) + end; +add_work(Work, []) -> + Work. + +%% _________________________________________________________________ +%% +%% Annotator +%% + +annotate_cfg(State) -> + Cfg = state__cfg(State), + NewState = annotate_params(hipe_icode_cfg:params(Cfg), State, + hipe_icode_cfg:start_label(Cfg)), + Labels = hipe_icode_cfg:reverse_postorder(Cfg), + annotate_bbs(Labels, NewState). + +annotate_params(Params, State, Start) -> + Info = state__info_in(State, Start), + AnnoFun = fun hipe_icode:annotate_variable/2, + NewParams = + lists:zipwith(AnnoFun, Params, [make_annotation(P,Info) || P <- Params]), + state__params_update(State,NewParams). + +annotate_bbs([Label|Left], State) -> + BB = state__bb(State, Label), + Code = hipe_bb:code(BB), + Info = state__info_in(State, Label), + NewCode = annotate_instr_list(Code, Info, state__lookupfun(State), []), + NewBB = hipe_bb:code_update(BB, NewCode), + NewState = state__bb_add(State, Label, NewBB), + annotate_bbs(Left, NewState); +annotate_bbs([], State) -> + State. + +annotate_instr_list([I], Info, LookupFun, Acc) -> + NewInfo = + case I of + #icode_call{} -> + do_safe_call(I, Info, LookupFun); + _ -> + analyse_insn(I, Info, LookupFun) + end, + NewI = annotate_instr(I, NewInfo, Info), + lists:reverse([NewI|Acc]); +annotate_instr_list([I|Left], Info, LookupFun, Acc) -> + NewInfo = + case I of + #icode_call{} -> + do_safe_call(I, Info, LookupFun); + _ -> + analyse_insn(I, Info, LookupFun) + end, + NewI = annotate_instr(I, NewInfo, Info), + annotate_instr_list(Left, NewInfo, LookupFun, [NewI|Acc]). + +annotate_instr(I, DefInfo, UseInfo) -> + Def = defines(I), + Use = uses(I), + Fun = fun hipe_icode:annotate_variable/2, + DefSubst = [{X, Fun(X, make_annotation(X, DefInfo))} || X <- Def], + UseSubst = [{X, Fun(X, make_annotation(X, UseInfo))} || X <- Use], + case DefSubst ++ UseSubst of + [] -> + I; + Subst -> + hipe_icode:subst(Subst, I) + end. + +make_annotation(X, Info) -> + {type_anno, safe_lookup(X, Info), fun erl_types:t_to_string/1}. + +-spec unannotate_cfg(cfg()) -> cfg(). + +unannotate_cfg(Cfg) -> + NewCfg = unannotate_params(Cfg), + Labels = hipe_icode_cfg:labels(NewCfg), + unannotate_bbs(Labels, NewCfg). + +unannotate_params(Cfg) -> + Params = hipe_icode_cfg:params(Cfg), + NewParams = [hipe_icode:unannotate_variable(X) + || X <- Params, hipe_icode:is_variable(X)], + hipe_icode_cfg:params_update(Cfg, NewParams). + +unannotate_bbs([Label|Left], Cfg) -> + BB = hipe_icode_cfg:bb(Cfg, Label), + Code = hipe_bb:code(BB), + NewCode = unannotate_instr_list(Code, []), + NewBB = hipe_bb:code_update(BB, NewCode), + NewCfg = hipe_icode_cfg:bb_add(Cfg, Label, NewBB), + unannotate_bbs(Left, NewCfg); +unannotate_bbs([], Cfg) -> + Cfg. + +unannotate_instr_list([I|Left], Acc) -> + NewI = unannotate_instr(I), + unannotate_instr_list(Left, [NewI|Acc]); +unannotate_instr_list([], Acc) -> + lists:reverse(Acc). + +unannotate_instr(I) -> + DefUses = hipe_icode:defines(I) ++ hipe_icode:uses(I), + Subst = [{X, hipe_icode:unannotate_variable(X)} || X <- DefUses, + hipe_icode:is_variable(X)], + if Subst =:= [] -> I; + true -> hipe_icode:subst(Subst, I) + end. + +%% _________________________________________________________________ +%% +%% Find the types of the arguments to a call +%% + +update_call_arguments(I, Info) -> + Args = hipe_icode:call_args(I), + ArgTypes = lookup_list(Args, Info), + Signature = find_signature(hipe_icode:call_fun(I), length(Args)), + case t_fun_args(Signature) of + unknown -> + Info; + PltArgTypes -> + NewArgTypes = t_inf_lists(ArgTypes, PltArgTypes), + enter_list(Args, NewArgTypes, Info) + end. + +%% _________________________________________________________________ +%% +%% PLT info +%% + +find_signature(MFA = {_, _, _}, _) -> find_signature_mfa(MFA); +find_signature(Primop, Arity) -> find_signature_primop(Primop, Arity). + +find_signature_mfa(MFA) -> + case get_mfa_arg_types(MFA) of + any -> + t_fun(get_mfa_type(MFA)); + BifArgs -> + t_fun(BifArgs, get_mfa_type(MFA)) + end. + +find_signature_primop(Primop, Arity) -> + case get_primop_arg_types(Primop) of + any -> + t_fun(Arity, get_primop_type(Primop)); + ArgTypes -> + t_fun(ArgTypes, get_primop_type(Primop)) + end. + +get_primop_arg_types(Primop) -> + case hipe_icode_primops:arg_types(Primop) of + unknown -> any; + ArgTypes -> add_tuple_to_args(ArgTypes) + end. + +get_mfa_arg_types({M, F, A}) -> + case erl_bif_types:arg_types(M, F, A) of + unknown -> + any; + BifArgs -> + add_tuple_to_args(BifArgs) + end. + +get_mfa_type({M, F, A}) -> + erl_bif_types:type(M, F, A). + +get_primop_type(Primop) -> + hipe_icode_primops:type(get_standard_primop(Primop)). + +add_tuple_to_args(Types) -> + [add_tuple_to_type(T) || T <- Types]. + +add_tuple_to_type(T) -> + None = t_none(), + case t_inf(t_fun(), T) of + None -> T; + _Other -> t_sup(T, t_tuple([t_atom(),t_atom()])) + end. + +add_funs_to_arg_types(Types) -> + [add_fun_to_arg_type(T) || T <- Types]. + +add_fun_to_arg_type(T) -> + None = t_none(), + case t_inf(t_tuple([t_atom(),t_atom()]), T) of + None -> T; + _Other -> t_sup(T, t_fun()) + end. + +%%===================================================================== +%% Icode Coordinator Callbacks +%%===================================================================== + +-spec replace_nones([erl_types:erl_type()] | erl_types:erl_type()) -> + [erl_types:erl_type()]. + +replace_nones(Types) when is_list(Types) -> + [replace_none(T) || T <- Types]; +replace_nones(Type) -> + [replace_none(Type)]. + +-spec replace_none(erl_types:erl_type()) -> erl_types:erl_type(). + +replace_none(Type) -> + case erl_types:t_is_none(Type) of + true -> + erl_types:t_any(); + false -> + Type + end. + +-spec update__info([erl_types:erl_type()], [erl_types:erl_type()]) -> + {boolean(), [erl_types:erl_type()]}. + +update__info(NewTypes, OldTypes) -> + SupFun = + fun(T1, T2) -> erl_types:t_limit(erl_types:t_sup(T1,T2), ?TYPE_DEPTH) end, + EqFun = fun erl_types:t_is_equal/2, + ResTypes = lists:zipwith(SupFun, NewTypes, OldTypes), + Change = lists:zipwith(EqFun, ResTypes, OldTypes), + {lists:all(fun(X) -> X end, Change), ResTypes}. + +-spec new__info([erl_types:erl_type()]) -> [erl_types:erl_type()]. + +new__info(NewTypes) -> + [erl_types:t_limit(T, ?TYPE_DEPTH) || T <- NewTypes]. + +-spec return__info(erl_types:erl_type()) -> erl_types:erl_type(). + +return__info(Types) -> + Types. + +-spec return_none() -> [erl_types:erl_type(),...]. + +return_none() -> + [erl_types:t_none()]. + +-spec return_none_args(cfg(), mfa()) -> [erl_types:erl_type()]. + +return_none_args(Cfg, {_M,_F,A}) -> + NoArgs = + case hipe_icode_cfg:is_closure(Cfg) of + true -> hipe_icode_cfg:closure_arity(Cfg) - 1; + false -> A + end, + lists:duplicate(NoArgs, erl_types:t_none()). + +-spec return_any_args(cfg(), mfa()) -> [erl_types:erl_type()]. + +return_any_args(Cfg, {_M,_F,A}) -> + NoArgs = + case hipe_icode_cfg:is_closure(Cfg) of + true -> hipe_icode_cfg:closure_arity(Cfg); + false -> A + end, + lists:duplicate(NoArgs, erl_types:t_any()). + +%%===================================================================== +%% Testing function below +%%===================================================================== + +-ifdef(DO_HIPE_ICODE_TYPE_TEST). + +test() -> + Range1 = t_from_range(1, pos_inf), + Range2 = t_from_range(0, 5), + Var1 = hipe_icode:mk_var(1), + Var2 = hipe_icode:mk_var(2), + + Info = enter(Var1, Range1, enter(Var2, Range2, gb_trees:empty())), + io:format("A1 ~p~n", [Info]), + A = integer_range_inequality_propagation('<', Var1, Var2, 1, 2, Info), + B = integer_range_inequality_propagation('>=', Var1, Var2, 1, 2, Info), + C = integer_range_inequality_propagation('=<', Var1, Var2, 1, 2, Info), + D = integer_range_inequality_propagation('>', Var1, Var2, 1, 2, Info), + + io:format("< ~p~n", [A]), + io:format(">= ~p~n", [B]), + io:format("<= ~p~n", [C]), + io:format("> ~p~n", [D]). + +-endif. diff --git a/lib/hipe/icode/hipe_icode_type.hrl b/lib/hipe/icode/hipe_icode_type.hrl new file mode 100644 index 0000000000..dd69c1e8b2 --- /dev/null +++ b/lib/hipe/icode/hipe_icode_type.hrl @@ -0,0 +1,25 @@ +%%% +%%% %CopyrightBegin% +%%% +%%% Copyright Ericsson AB 2004-2009. All Rights Reserved. +%%% +%%% The contents of this file are subject to the Erlang Public License, +%%% Version 1.1, (the "License"); you may not use this file except in +%%% compliance with the License. You should have received a copy of the +%%% Erlang Public License along with this software. If not, it can be +%%% retrieved online at http://www.erlang.org/. +%%% +%%% Software distributed under the License is distributed on an "AS IS" +%%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%%% the License for the specific language governing rights and limitations +%%% under the License. +%%% +%%% %CopyrightEnd% +%%% +%%%------------------------------------------------------------------- +%%% File : hipe_icode_type.hrl +%%% Author : Tobias Lindahl +%%% Created : 2 Sep 2004 by Tobias Lindahl +%%%------------------------------------------------------------------- + +-define(TYPE_DEPTH, 3). diff --git a/lib/hipe/info b/lib/hipe/info new file mode 100644 index 0000000000..51b5dfb979 --- /dev/null +++ b/lib/hipe/info @@ -0,0 +1,2 @@ +group: basic +short: High Performance Erlang \ No newline at end of file diff --git a/lib/hipe/main/Makefile b/lib/hipe/main/Makefile new file mode 100644 index 0000000000..0ac522b1b2 --- /dev/null +++ b/lib/hipe/main/Makefile @@ -0,0 +1,117 @@ +# +# %CopyrightBegin% +# +# Copyright Ericsson AB 2001-2009. All Rights Reserved. +# +# The contents of this file are subject to the Erlang Public License, +# Version 1.1, (the "License"); you may not use this file except in +# compliance with the License. You should have received a copy of the +# Erlang Public License along with this software. If not, it can be +# retrieved online at http://www.erlang.org/. +# +# Software distributed under the License is distributed on an "AS IS" +# basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +# the License for the specific language governing rights and limitations +# under the License. +# +# %CopyrightEnd% +# + +ifndef EBIN +EBIN = ../ebin +endif + +ifndef DOCS +DOCS = ../doc +endif + +include $(ERL_TOP)/make/target.mk +include $(ERL_TOP)/make/$(TARGET)/otp.mk + +# ---------------------------------------------------- +# Application version +# ---------------------------------------------------- +include ../vsn.mk +VSN=$(HIPE_VSN) + +# ---------------------------------------------------- +# Release directory specification +# ---------------------------------------------------- +RELSYSDIR = $(RELEASE_PATH)/lib/hipe-$(VSN) + +# ---------------------------------------------------- +# Target Specs +# ---------------------------------------------------- +ifdef HIPE_ENABLED +HIPE_MODULES = hipe +else +HIPE_MODULES = +endif +MODULES = hipe_main $(HIPE_MODULES) + +## hipe.hrl is automatically generated from hipe.hrl.src -- see below +HRL_FILES= hipe.hrl +ERL_FILES= $(MODULES:%=%.erl) +TARGET_FILES= $(MODULES:%=$(EBIN)/%.$(EMULATOR)) $(APP_TARGET) $(APPUP_TARGET) +DOC_FILES= $(MODULES:%=$(DOCS)/%.html) + +APP_FILE= hipe.app +APP_SRC= $(APP_FILE).src +APP_TARGET= $(EBIN)/$(APP_FILE) + +APPUP_FILE= hipe.appup +APPUP_SRC= $(APPUP_FILE).src +APPUP_TARGET= $(EBIN)/$(APPUP_FILE) + +# ---------------------------------------------------- +# FLAGS +# ---------------------------------------------------- + +include ../native.mk + +ERL_COMPILE_FLAGS += +nowarn_shadow_vars +warn_missing_spec +warn_untyped_record + +# ---------------------------------------------------- +# Targets +# ---------------------------------------------------- + +hipe.hrl: ../vsn.mk hipe.hrl.src + sed -e "s;%VSN%;$(HIPE_VSN);" ../../hipe/main/hipe.hrl.src > ../../hipe/main/hipe.hrl + +$(EBIN)/hipe.beam: hipe.hrl ../../compiler/src/beam_disasm.hrl +$(EBIN)/hipe_main.beam: hipe.hrl ../icode/hipe_icode.hrl #../rtl/hipe_rtl.hrl + +debug opt: $(TARGET_FILES) + +docs: $(DOC_FILES) + +clean: + rm -f $(TARGET_FILES) $(DOC_FILES) $(HRL_FILES) + rm -f core + +$(DOCS)/%.html:%.erl + erl -noshell -run edoc_run file '"$<"' '[{dir, "$(DOCS)"}]' -s init stop + +# ---------------------------------------------------- +# Special Build Targets +# ---------------------------------------------------- + +$(APP_TARGET): $(APP_SRC) ../vsn.mk + sed -e 's;%VSN%;$(VSN);' $< > $@ + +$(APPUP_TARGET): $(APPUP_SRC) ../vsn.mk + sed -e 's;%VSN%;$(VSN);' $< > $@ + +# ---------------------------------------------------- +# Release Target +# ---------------------------------------------------- +include $(ERL_TOP)/make/otp_release_targets.mk + +release_spec: opt + $(INSTALL_DATA) ../vsn.mk $(RELSYSDIR) + $(INSTALL_DIR) $(RELSYSDIR)/main + $(INSTALL_DATA) $(ERL_FILES) $(HRL_FILES) $(RELSYSDIR)/main + $(INSTALL_DIR) $(RELSYSDIR)/ebin + $(INSTALL_DATA) $(TARGET_FILES) $(RELSYSDIR)/ebin + +release_docs_spec: diff --git a/lib/hipe/main/hipe.app.src b/lib/hipe/main/hipe.app.src new file mode 100644 index 0000000000..e5d6d1e540 --- /dev/null +++ b/lib/hipe/main/hipe.app.src @@ -0,0 +1,222 @@ +%% This is an -*- erlang -*- file. +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2002-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +{application, hipe, + [{description, "HiPE Native Code Compiler, version %VSN%"}, + {vsn, "%VSN%"}, + {modules, [cerl_cconv, + cerl_closurean, + cerl_hipeify, + cerl_hybrid_transform, + cerl_lib, + cerl_messagean, + cerl_pmatch, + cerl_prettypr, + cerl_to_icode, + cerl_typean, + erl_bif_types, + erl_types, + hipe, + hipe_adj_list, + hipe_amd64_assemble, + hipe_amd64_defuse, + hipe_amd64_encode, + hipe_amd64_frame, + hipe_amd64_liveness, + hipe_amd64_main, + hipe_amd64_pp, + hipe_amd64_ra, + hipe_amd64_ra_finalise, + hipe_amd64_ra_ls, + hipe_amd64_ra_naive, + hipe_amd64_ra_postconditions, + hipe_amd64_ra_sse2_postconditions, + hipe_amd64_ra_x87_ls, + hipe_amd64_registers, + hipe_amd64_specific, + hipe_amd64_specific_sse2, + hipe_amd64_specific_x87, + hipe_amd64_spill_restore, + hipe_amd64_x87, + hipe_arm, + hipe_arm_assemble, + hipe_arm_cfg, + hipe_arm_defuse, + hipe_arm_encode, + hipe_arm_finalise, + hipe_arm_frame, + hipe_arm_liveness_gpr, + hipe_arm_main, + hipe_arm_pp, + hipe_arm_ra, + hipe_arm_ra_finalise, + hipe_arm_ra_ls, + hipe_arm_ra_naive, + hipe_arm_ra_postconditions, + hipe_arm_registers, + hipe_arm_specific, + hipe_bb, + hipe_beam_to_icode, + hipe_ceach, + hipe_coalescing_regalloc, + hipe_consttab, + hipe_data_pp, + hipe_digraph, + hipe_dominators, + hipe_dot, + hipe_gen_cfg, + hipe_gensym, + hipe_graph_coloring_regalloc, + hipe_icode, + hipe_icode2rtl, + hipe_icode_bincomp, + hipe_icode_callgraph, + hipe_icode_cfg, + hipe_icode_coordinator, + hipe_icode_ebb, + hipe_icode_exceptions, + hipe_icode_fp, + hipe_icode_heap_test, + hipe_icode_inline_bifs, + hipe_icode_instruction_counter, + hipe_icode_liveness, + hipe_icode_mulret, + hipe_icode_pp, + hipe_icode_primops, + hipe_icode_range, + hipe_icode_ssa, + hipe_icode_ssa_const_prop, + hipe_icode_ssa_copy_prop, + hipe_icode_ssa_struct_reuse, + hipe_icode_split_arith, + hipe_icode_type, + hipe_ig, + hipe_ig_moves, + hipe_jit, + hipe_ls_regalloc, + hipe_main, + hipe_moves, + hipe_node_sets, + hipe_optimistic_regalloc, + hipe_pack_constants, + hipe_ppc, + hipe_ppc_assemble, + hipe_ppc_cfg, + hipe_ppc_defuse, + hipe_ppc_encode, + hipe_ppc_finalise, + hipe_ppc_frame, + hipe_ppc_liveness_all, + hipe_ppc_liveness_fpr, + hipe_ppc_liveness_gpr, + hipe_ppc_main, + hipe_ppc_pp, + hipe_ppc_ra, + hipe_ppc_ra_finalise, + hipe_ppc_ra_ls, + hipe_ppc_ra_naive, + hipe_ppc_ra_postconditions, + hipe_ppc_ra_postconditions_fp, + hipe_ppc_registers, + hipe_ppc_specific, + hipe_ppc_specific_fp, + hipe_profile, + hipe_reg_worklists, + hipe_regalloc_loop, + hipe_rtl, + hipe_rtl_arch, + hipe_rtl_arith_32, + hipe_rtl_arith_64, + hipe_rtl_binary, + hipe_rtl_binary_match, + hipe_rtl_binary_construct, + hipe_rtl_cfg, + hipe_rtl_cleanup_const, + hipe_rtl_exceptions, + hipe_rtl_lcm, + hipe_rtl_liveness, + hipe_rtl_mk_switch, + hipe_rtl_primops, + hipe_rtl_ssa, + hipe_rtl_ssa_const_prop, + hipe_rtl_ssa_avail_expr, + hipe_rtl_ssapre, + hipe_rtl_symbolic, + hipe_rtl_to_amd64, + hipe_rtl_to_arm, + hipe_rtl_to_ppc, + hipe_rtl_to_sparc, + hipe_rtl_to_x86, + hipe_rtl_varmap, + hipe_sdi, + hipe_sparc, + hipe_sparc_assemble, + hipe_sparc_cfg, + hipe_sparc_defuse, + hipe_sparc_encode, + hipe_sparc_finalise, + hipe_sparc_frame, + hipe_sparc_liveness_all, + hipe_sparc_liveness_fpr, + hipe_sparc_liveness_gpr, + hipe_sparc_main, + hipe_sparc_pp, + hipe_sparc_ra, + hipe_sparc_ra_finalise, + hipe_sparc_ra_ls, + hipe_sparc_ra_naive, + hipe_sparc_ra_postconditions, + hipe_sparc_ra_postconditions_fp, + hipe_sparc_registers, + hipe_sparc_specific, + hipe_sparc_specific_fp, + hipe_spillcost, + hipe_spillmin, + hipe_spillmin_color, + hipe_spillmin_scan, + hipe_tagscheme, + hipe_temp_map, + hipe_timing, + hipe_tool, + hipe_vectors, + hipe_x86, + hipe_x86_assemble, + hipe_x86_cfg, + hipe_x86_defuse, + hipe_x86_encode, + hipe_x86_frame, + hipe_x86_liveness, + hipe_x86_main, + hipe_x86_postpass, + hipe_x86_pp, + hipe_x86_ra, + hipe_x86_ra_finalise, + hipe_x86_ra_ls, + hipe_x86_ra_naive, + hipe_x86_ra_postconditions, + hipe_x86_ra_x87_ls, + hipe_x86_registers, + hipe_x86_specific, + hipe_x86_specific_x87, + hipe_x86_spill_restore, + hipe_x86_x87]}, + {registered,[]}, + {applications, [kernel,stdlib]}, + {env, []}]}. diff --git a/lib/hipe/main/hipe.appup.src b/lib/hipe/main/hipe.appup.src new file mode 100644 index 0000000000..1d5a0d93f5 --- /dev/null +++ b/lib/hipe/main/hipe.appup.src @@ -0,0 +1,19 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2002-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +{"%VSN%",[],[]}. diff --git a/lib/hipe/main/hipe.erl b/lib/hipe/main/hipe.erl new file mode 100644 index 0000000000..ed722fecba --- /dev/null +++ b/lib/hipe/main/hipe.erl @@ -0,0 +1,1555 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2001-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%% ==================================================================== +%% Copyright (c) 1998 by Erik Johansson. All Rights Reserved +%% ==================================================================== +%% Filename : hipe.erl +%% Module : hipe +%% Purpose : +%% Notes : +%% History : * 1998-01-28 Erik Johansson (happi@it.uu.se): Created. +%% CVS : $Id$ +%% ==================================================================== +%% @doc This is the direct interface to the HiPE compiler. +%% +%%

Normal use

+%% +%%

The normal way to native-compile an Erlang module using HiPE is to +%% include the atom native in the Erlang compiler options, +%% as in: +%% +%%

    1> c(my_module, [native]).

+%% +%%

Options to the HiPE compiler are then passed as follows: +%% +%%

    1> c(my_module, [native,{hipe,Options}]).

+%% +%%

For on-line help in the Erlang shell, call hipe:help(). Details on HiPE compiler +%% options are given by hipe:help_options().

+%% +%%

Using the direct interface - for advanced users only

+%% +%% To compile a module or a specific function to native code and +%% automatically load the code into memory, call hipe:c(Module) or hipe:c(Module, Options). Note that all +%% options are specific to the HiPE compiler. See the function index for other compiler functions. +%% +%%

Main Options

+%% +%% Options are processed in the order they appear in the list; an +%% early option will shadow a later one. +%%
+%%
o0, 'O0', o1, 'O1', o2, 'O2', o3, 'O3'
+%%
Set optimization level (default 2).
+%% +%%
load
+%%
Automatically load the code into memory after compiling.
+%% +%%
time
+%%
Reports the compilation times for the different stages +%% of the compiler. Call hipe:help_option(time) for +%% details.
+%% +%%
{timeout, Time}
+%%
Sets the time the compiler is allowed to use for the +%% compilation. Time is time in ms or the atom +%% infinity (the default).
+%% +%%
verbose
+%%
Make the HiPE compiler output information about what it is +%% being done.
+%%
+%% +%%

Advanced Options

+%% +%% Note: You can also specify {Option, false} to turn a +%% particular option off, or {Option, true} to force it on. +%% Boolean-valued (true/false) options also +%% have negative-form aliases, e.g. no_load = {load, +%% false}. +%% +%%

+%%
debug
+%%
Outputs internal debugging information during +%% compilation.
+%% +%%
icode_ssa_copy_prop
+%%
Performs copy propagation on the SSA form on the Icode +%% level.
+%% +%%
icode_ssa_const_prop
+%%
Performs sparse conditional constant propagation on the SSA +%% form on the Icode level.
+%% +%%
icode_ssa_struct_reuse
+%%
Tries to factor out identical tuple and list constructions +%% on the Icode level.
+%% +%%
icode_type
+%%
Simplifies the code by employing type analysis and propagation +%% on the Icode level.
+%% +%%
icode_range
+%%
Performs integer range analysis on the Icode level.
+%% +%%
pp_all
+%%
Equivalent to [pp_beam, pp_icode, pp_rtl, +%% pp_native].
+%% +%%
pp_asm
+%%
Prints the assembly listing with addresses and bytecode. +%% Currently available for x86 only.
+%% +%%
pp_beam, {pp_beam, {file, File}}
+%%
Display the input Beam code to stdout or file.
+%% +%%
pp_icode, {pp_icode, {file, File}}, +%% {pp_icode, {only, Functions}}
+%%
Pretty-print Icode intermediate code to stdout or file.
+%% +%%
pp_native, {pp_native, {file, File}}, +%% {pp_native, {only, Functions}}
+%%
Pretty-print native code to stdout or file.
+%% +%%
pp_opt_icode, {pp_opt_icode, {file, File}}, +%% {pp_opt_icode, {only, Functions}}
+%%
Pretty-print optimized Icode to stdout or file.
+%% +%%
pp_rtl, {pp_rtl, {file, File}}, +%% {pp_rtl, {only, Functions}}
+%%
Pretty-print RTL intermediate code to stdout or file.
+%% +%%
regalloc
+%%
Select register allocation algorithm. Used as +%% {regalloc, Method}. +%% +%%

Method is one of the following: +%%

    +%%
  • naive: spills everything (for debugging and +%% testing only).
  • +%%
  • linear_scan: fast compilation; not so good if +%% only few registers available.
  • +%%
  • graph_color: slower, but gives better +%% performance.
  • +%%
  • coalescing: tries hard to use registers; can be +%% very slow, but typically results in code with best performance.
  • +%%

+%% +%%
remove_comments
+%%
Remove comments from intermediate code.
+%% +%%
rtl_ssa_const_prop
+%%
Performs sparse conditional constant propagation on the SSA +%% form on the RTL level.
+%% +%%
rtl_lcm
+%%
Lazy Code Motion on RTL.
+%% +%%
rtl_ssapre
+%%
Lazy Partial Redundancy Elimination on RTL (SSA level).
+%% +%%
use_indexing
+%%
Use indexing for multiple-choice branch selection.
+%% +%%
use_callgraph
+%%
Use a static call graph for determining the order in which +%% the functions of a module should be compiled (in reversed +%% topological sort order).
+%%

+%% +%%

Debugging Options

+%% (May require that some modules have been +%% compiled with the DEBUG flag.) +%%
+%%
rtl_show_translation
+%%
Prints each step in the translation from Icode to RTL
+%%
+%% +%% @end +%% ==================================================================== + +-module(hipe). + +-export([c/1, + c/2, + f/1, + f/2, + compile/1, + compile/2, + compile/4, + compile_core/4, + file/1, + file/2, + load/1, + help/0, + help_hiper/0, + help_options/0, + help_option/1, + help_debug_options/0, + version/0]). + +-ifndef(DEBUG). +-define(DEBUG,true). +-endif. + +-include("hipe.hrl"). +-include("../../compiler/src/beam_disasm.hrl"). + +%%------------------------------------------------------------------- +%% Basic type declaration for exported functions of the 'hipe' module +%%------------------------------------------------------------------- + +-type mod() :: atom(). +-type c_unit() :: mod() | mfa(). +-type f_unit() :: mod() | binary(). +-type ret_rtl() :: [_]. +-type c_ret() :: {'ok', c_unit()} | {'error', term()} | + {'ok', c_unit(), ret_rtl()}. %% The last for debugging only +-type compile_file() :: atom() | string() | binary(). +-type compile_ret() :: {hipe_architecture(), binary()} | list(). + +%%------------------------------------------------------------------- + +-define(COMPILE_DEFAULTS, [o2]). +-define(DEFAULT_TIMEOUT, infinity). + +%%------------------------------------------------------------------- + +%% @spec load(Mod) -> {module, Mod} | {error, Reason} +%% Mod = mod() +%% Reason = term() +%% +%% @doc Like load/2, but tries to locate a BEAM file automatically. +%% +%% @see load/2 + +-spec load(Mod) -> {'module', Mod} | {'error', term()} + when is_subtype(Mod, mod()). + +load(Mod) -> + load(Mod, beam_file(Mod)). + +%% @spec load(Mod, BeamFileName) -> {module, Mod} | {error, Reason} +%% Mod = mod() +%% Reason = term() +%% BeamFileName = string() +%% filename() = term() +%% +%% @type mod() = atom(). A module name. +%% +%% @doc User interface for loading code into memory. The code can be +%% given as a native code binary or as the file name of a BEAM file +%% which should contain a native-code chunk. If only the module name is +%% given (see load/1), the BEAM file is located +%% automatically. +%% +%% @see load/1 + +-spec load(Mod, string()) -> {'module', Mod} | {'error', term()} + when is_subtype(Mod, mod()). + +load(Mod, BeamFileName) when is_list(BeamFileName) -> + Architecture = erlang:system_info(hipe_architecture), + ChunkName = hipe_unified_loader:chunk_name(Architecture), + case beam_lib:chunks(BeamFileName, [ChunkName]) of + {ok,{_,[{_,Bin}]}} when is_binary(Bin) -> do_load(Mod, Bin, Bin); + Error -> {error, Error} + end. + +%% @spec c(Name) -> {ok, Name} | {error, Reason} +%% Name = mod() | mfa() +%% Reason = term() +%% +%% @equiv c(Name, []) + +-spec c(c_unit()) -> c_ret(). + +c(Name) -> + c(Name, []). + +%% @spec c(Name, options()) -> {ok, Name} | {error, Reason} +%% Name = mod() | mfa() +%% options() = [option()] +%% option() = term() +%% Reason = term() +%% +%% @type mfa() = {M::mod(),F::fun(),A::arity()}. +%% A fully qualified function name. +%% +%% @type fun() = atom(). A function identifier. +%% +%% @type arity() = integer(). A function arity; always nonnegative. +%% +%% @doc User-friendly native code compiler interface. Reads BEAM code +%% from the corresponding "Module.beam" file in the system +%% path, and compiles either a single function or the whole module to +%% native code. By default, the compiled code is loaded directly. See +%% above for documentation of options. +%% +%% @see c/1 +%% @see c/3 +%% @see f/2 +%% @see compile/2 + +-spec c(c_unit(), comp_options()) -> c_ret(). + +c(Name, Options) -> + c(Name, beam_file(Name), Options). + +%% @spec c(Name, File, options()) -> {ok, Name} | {error, Reason} +%% Name = mod() | mfa() +%% File = filename() | binary() +%% Reason = term() +%% +%% @doc Like c/2, but reads BEAM code from the specified +%% File. +%% +%% @see c/2 +%% @see f/2 + +c(Name, File, Opts) -> + %% No server if only one function is compiled + Opts1 = user_compile_opts(Opts), + case compile(Name, File, Opts1) of + {ok, Res} -> + case proplists:get_bool(to_rtl, Opts1) of + true -> {ok, Name, Res}; + false -> {ok, Name} + end; + Other -> + Other + end. + +%% @spec f(File) -> {ok, Name} | {error, Reason} +%% File = filename() | binary() +%% Name = mod() +%% Reason = term() +%% +%% @equiv f(File, []) + +-spec f(f_unit()) -> {'ok', mod()} | {'error', term()}. + +f(File) -> + f(File, []). + +%% @spec f(File, options()) -> {ok, Name} | {error, Reason} +%% File = filename() | binary() +%% Name = mod() +%% Reason = term() +%% +%% @doc Like c/3, but takes the module name from the +%% specified File. This always compiles the whole module; +%% there is no possibility to compile just a single function. +%% +%% @see c/3 + +-spec f(f_unit(), comp_options()) -> {'ok', mod()} | {'error', term()}. + +f(File, Opts) -> + case file(File, user_compile_opts(Opts)) of + {ok, Name, _} -> + {ok, Name}; + Other -> + Other + end. + +-define(USER_DEFAULTS, [load]). + +user_compile_opts(Opts) -> + Opts ++ ?USER_DEFAULTS. + + +%% @spec compile(Name) -> {ok, {Target,Binary}} | {error, Reason} +%% Name = mod() | mfa() +%% Binary = binary() +%% Reason = term() +%% +%% @equiv compile(Name, []) + +-spec compile(c_unit()) -> {'ok', compile_ret()} | {'error', term()}. + +compile(Name) -> + compile(Name, []). + +%% @spec compile(Name, options()) -> {ok, {Target,Binary}} | {error, Reason} +%% Name = mod() | mfa() +%% Binary = binary() +%% Reason = term() +%% +%% @doc Direct compiler interface, for advanced use. This just compiles +%% the named function or module, reading BEAM code from the +%% corresponding "Module.beam" file in the system path. +%% Returns {ok, Binary} if successful, or {error, +%% Reason} otherwise. By default, it does not load the +%% binary to memory (the load option can be used to +%% activate automatic loading). File can be either a file +%% name or a binary containing the BEAM code for the module. +%% +%% @see c/2 +%% @see compile/1 +%% @see compile/3 +%% @see file/2 +%% @see load/2 + +-spec compile(c_unit(), comp_options()) -> {'ok', compile_ret()} | {'error', _}. + +compile(Name, Options) -> + compile(Name, beam_file(Name), Options). + +-spec beam_file(mod() | mfa()) -> string(). + +beam_file({M,F,A}) when is_atom(M), is_atom(F), is_integer(A), A >= 0 -> + beam_file(M); +beam_file(Module) when is_atom(Module) -> + case code:which(Module) of + non_existing -> + ?error_msg("Cannot find ~w.beam file.",[Module]), + ?EXIT({cant_find_beam_file,Module}); + File -> % string() + File + end. + +%% @spec compile(Name, File, options()) -> +%% {ok, {Target, Binary}} | {error, Reason} +%% Name = mod() | mfa() +%% File = filename() | binary() +%% Binary = binary() +%% Reason = term() +%% +%% @doc Like compile/2, but reads BEAM code from the +%% specified File. +%% +%% @see compile/2 + +-spec compile(c_unit(), compile_file(), comp_options()) -> + {'ok', compile_ret()} | {'error', term()}. + +compile(Name, File, Opts0) -> + Opts1 = expand_kt2(Opts0), + Opts = + case Name of + {_Mod, _Fun, _Arity} -> + [no_concurrent_comp|Opts1]; + _ -> + Opts1 + end, + case proplists:get_value(core, Opts) of + true when is_binary(File) -> + ?error_msg("Cannot get Core Erlang code from BEAM binary.",[]), + ?EXIT({cant_compile_core_from_binary}); + true -> + case filename:find_src(filename:rootname(File, ".beam")) of + {error, _} -> + ?error_msg("Cannot find source code for ~p.",[File]), + ?EXIT({cant_find_source_code}); + {Source, CompOpts} -> + CoreOpts = [X || X = {core_transform, _} <- Opts], + %%io:format("Using: ~w\n", [CoreOpts]), + case compile:file(Source, CoreOpts ++ [to_core, binary|CompOpts]) of + {ok, _, Core} -> + compile_core(Name, Core, File, Opts); + Error -> + ?error_msg("Error compiling ~p:\n~p.",[File, Error]), + ?EXIT({cant_compile_source_code}) + end + end; + {src_file, Source} -> + CoreOpts1 = [X || X = {core_transform, _} <- Opts], + CoreOpts2 = [report_errors, to_core, binary, {i,"../include"}|CoreOpts1], + %% io:format("Using: ~w\n", [CoreOpts2]), + case compile:file(Source, CoreOpts2) of + {ok, _, Core} -> + compile_core(Name, Core, File, Opts); + Error -> + ?error_msg("Error compiling ~p:\n~p\n",[Source, Error]), + ?EXIT({cant_compile_source_code, Error}) + end; + Other when Other =:= false; Other =:= undefined -> + NewOpts = + case proplists:get_value(use_callgraph, Opts) of + No when No =:= false; No =:= undefined -> Opts; + _ -> + case Name of + {_M,_F,_A} -> + %% There is no point in using the callgraph or concurrent_comp + %% when analyzing just one function. + [no_use_callgraph, no_concurrent_comp|Opts]; + _ -> Opts + end + end, + DisasmFun = fun (_) -> disasm(File) end, + IcodeFun = fun (Code, Opts_) -> + get_beam_icode(Name, Code, File, Opts_) + end, + run_compiler(Name, DisasmFun, IcodeFun, NewOpts) + end. + +-spec compile_core(mod(), _, compile_file(), comp_options()) -> + {'ok', compile_ret()} | {'error', term()}. + +compile_core(Name, Core0, File, Opts) -> + Core = cerl:from_records(Core0), + Core1 = case (erlang:system_info(heap_type) =:= hybrid) + andalso proplists:get_bool(hybrid, Opts) of + true -> cerl_hybrid_transform:transform(Core, Opts); + false -> Core + end, + compile(Name, Core1, File, Opts). + +%% @spec compile(Name, Core, File, options()) -> +%% {ok, {Target, Binary}} | {error, Reason} +%% Name = mod() +%% Core = coreErlang() | [] +%% File = filename() | binary() +%% Binary = binary() +%% Reason = term() +%% +%% @doc Like compile/3, but unless Core is +%% [], low-level code is generated from the given Core +%% Erlang code instead of from the BEAM code. +%% +%%

Note that only whole modules can be compiled with this +%% function.

+%% +%% @see compile/3 + +-spec compile(mod(), _, compile_file(), comp_options()) -> + {'ok', compile_ret()} | {'error', term()}. + +compile(Name, [], File, Opts) -> + compile(Name, File, Opts); +compile(Name, Core, File, Opts) when is_atom(Name) -> + DisasmFun = fun (_) -> {false, []} end, + IcodeFun = fun (_, Opts) -> + get_core_icode(Name, Core, File, Opts) + end, + run_compiler(Name, DisasmFun, IcodeFun, Opts). + +%% @spec file(File) -> {ok, Name, {Target, Binary}} | {error, Reason} +%% File = filename() | binary() +%% Name = mod() | mfa() +%% Binary = binary() +%% Reason = term() +%% +%% @equiv file(File, []) + +-spec file(Mod) -> {'ok', Mod, compile_ret()} | {'error', term()} + when is_subtype(Mod, mod()). + +file(File) -> + file(File, []). + +%% @spec file(File, options()) -> {ok, Name, {Target,Binary}} | {error, Reason} +%% File = filename() +%% Name = mod() | mfa() +%% Binary = binary() +%% Reason = term() +%% +%% @doc Like compile/2, but takes the module name from the +%% specified File. Returns both the name and the final +%% binary if successful. +%% +%% @see file/1 +%% @see compile/2 + +-spec file(Mod, comp_options()) -> {'ok', Mod, compile_ret()} + | {'error', term()} + when is_subtype(Mod, mod()). +file(File, Options) when is_atom(File) -> + case beam_lib:info(File) of + L when is_list(L) -> + {module, Mod} = lists:keyfind(module, 1, L), + case compile(Mod, File, Options) of + {ok, CompRet} -> + {ok, Mod, CompRet}; + Other -> + Other + end; + Error -> + Error + end. + + +%%----------------------------------------------------------------------- +%% The rest are internal functions: +%%----------------------------------------------------------------------- + +%% @doc +%% Get BEAM code from `.beam' files or directly from binaries. +%% File is either a file name or a binary containing the BEAM code. + +disasm(File) -> + case beam_disasm:file(File) of + #beam_file{labeled_exports = LabeledExports, + compile_info = CompInfo, + code = BeamCode} -> + {options, CompOpts} = lists:keyfind(options, 1, CompInfo), + HCompOpts = case lists:keyfind(hipe, 1, CompOpts) of + {hipe, L} when is_list(L) -> L; + {hipe, X} -> [X]; + _ -> [] + end, + Exports = fix_beam_exports(LabeledExports), + {{BeamCode, Exports}, HCompOpts}; + {error, _Mod, Error} -> + io:format("~s\n", [beam_lib:format_error(Error)]), + ?EXIT(no_beam_code) + end. + +fix_beam_exports(BeamExports) -> + fix_beam_exports(BeamExports, []). + +fix_beam_exports([{F,A,_}|BeamExports], Exports) -> + fix_beam_exports(BeamExports, [{F,A} | Exports]); +fix_beam_exports([], Exports) -> + Exports. + +get_beam_icode({M,_F,_A} = MFA, {BeamCode, Exports}, _File, Options) -> + ?option_time({ok, Icode} = + (catch {ok, hipe_beam_to_icode:mfa(BeamCode, MFA, Options)}), + "BEAM-to-Icode", Options), + {{M, Exports, Icode}, false}; +get_beam_icode(Mod, {BeamCode, Exports}, File, Options) -> + ?option_time({ok, Icode} = + (catch {ok, hipe_beam_to_icode:module(BeamCode, Options)}), + "BEAM-to-Icode", Options), + BeamBin = get_beam_code(File), + {{Mod, Exports, Icode}, BeamBin}. + +get_core_icode(Mod, Core, File, Options) -> + ?option_time({ok, Icode} = + (catch {ok, cerl_to_icode:module(Core, Options)}), + "BEAM-to-Icode", Options), + NeedBeamCode = not proplists:get_bool(load, Options), + BeamBin = + case NeedBeamCode of + true -> []; + false -> get_beam_code(File) + end, + Exports = [cerl:var_name(V) || V <- cerl:module_exports(Core)], + {{Mod, Exports, Icode}, BeamBin}. + +get_beam_code(Bin) when is_binary(Bin) -> Bin; +get_beam_code(FileName) -> + case erl_prim_loader:get_file(FileName) of + {ok,Bin,_} -> + Bin; + error -> + ?EXIT(no_beam_file) + end. + + +%% --------------------------------------------------------------------- +%% All compilations go through this function. Note that it receives only +%% "basic" options. Name is just used for verbosity. The DisasmFun and +%% IcodeFun only collect the Icode; most of the real work is done in the +%% 'finalize' function. + +run_compiler(Name, DisasmFun, IcodeFun, Opts0) -> + Opts = expand_basic_options(Opts0 ++ ?COMPILE_DEFAULTS), + ?when_option(verbose, Opts, ?debug_msg("Compiling: ~p\n",[Name])), + ?option_start_time("Compile", Opts), + Res = run_compiler_1(DisasmFun, IcodeFun, Opts), + ?option_stop_time("Compile", Opts), + Res. + +run_compiler_1(DisasmFun, IcodeFun, Options) -> + Parent = self(), + {trap_exit,TrapExit} = process_info(Parent, trap_exit), + %% Spawn a compilation process CompProc. In case this process gets + %% killed, the trap_exit flag is restored to that of the Parent process. + process_flag(trap_exit, true), + CompProc = spawn_link(fun () -> + %% Compiler process + set_architecture(Options), + pre_init(Options), + %% The full option expansion is not done + %% until the DisasmFun returns. + {Code, CompOpts} = DisasmFun(Options), + Opts = expand_options(Options ++ CompOpts), + check_options(Opts), + ?when_option(verbose, Options, + ?debug_msg("Options: ~p.\n",[Opts])), + init(Opts), + {Icode, WholeModule} = IcodeFun(Code, Opts), + CompRes = compile_finish(Icode, WholeModule, Opts), + compiler_return(CompRes, Parent) + end), + Timeout = case proplists:get_value(timeout, Options) of + N when is_integer(N), N >= 0 -> N; + undefined -> ?DEFAULT_TIMEOUT; + infinity -> infinity; + Other -> + ?WARNING_MSG("Bad timeout value: ~P\n" + "Using default timeout limit.\n", + [Other, 5]), + ?DEFAULT_TIMEOUT + end, + receive + {'EXIT', CompProc, normal} -> ok; + {'EXIT', CompProc, Reason} -> exit(Reason) + after Timeout -> + %% Kill the compilation process + exit(CompProc, kill), + receive {'EXIT', CompProc, _} -> ok end, + flush(), + ?error_msg("ERROR: Compilation timed out.\n",[]), + exit(timed_out) + end, + Result = receive {CompProc, Res} -> Res end, + process_flag(trap_exit, TrapExit), + Result. + +flush() -> + receive + _ -> flush() + after 0 -> + ok + end. + +compiler_return(Res, Client) -> + Client ! {self(), Res}. + +compile_finish({Mod, Exports, Icode}, WholeModule, Options) -> + Res = finalize(Icode, Mod, Exports, WholeModule, Options), + post(Res, Icode, Options). + + +%% ------------------------------------------------------------------------- +%% finalize/5 +%% compiles, assembles, and optionally loads a list of `{MFA, Icode}' pairs, +%% and returns `{ok, {TargetArch, Binary}}' or `{error, Reason, Stack}'. + +finalize(OrigList, Mod, Exports, WholeModule, Opts) -> + List = icode_multret(OrigList, Mod, Opts, Exports), + {T1Compile,_} = erlang:statistics(runtime), + CompiledCode = + case proplists:get_value(use_callgraph, Opts) of + true -> + %% Compiling the functions bottom-up by using a call graph + CallGraph = hipe_icode_callgraph:construct(List), + OrdList = hipe_icode_callgraph:to_list(CallGraph), + finalize_fun(OrdList, Exports, Opts); + _ -> + %% Compiling the functions bottom-up by reversing the list + OrdList = lists:reverse(List), + finalize_fun(OrdList, Exports, Opts) + end, + {T2Compile,_} = erlang:statistics(runtime), + ?when_option(verbose, Opts, + ?debug_msg("Compiled ~p in ~.2f s\n", + [Mod,(T2Compile-T1Compile)/1000])), + case proplists:get_bool(to_rtl, Opts) of + true -> + {ok, CompiledCode}; + false -> + Closures = + [MFA || {MFA, Icode} <- List, + hipe_icode:icode_is_closure(Icode)], + {T1,_} = erlang:statistics(runtime), + ?when_option(verbose, Opts, ?debug_msg("Assembling ~w",[Mod])), + try assemble(CompiledCode, Closures, Exports, Opts) of + Bin -> + {T2,_} = erlang:statistics(runtime), + ?when_option(verbose, Opts, + ?debug_untagged_msg(" in ~.2f s\n", + [(T2-T1)/1000])), + {module,Mod} = maybe_load(Mod, Bin, WholeModule, Opts), + TargetArch = get(hipe_target_arch), + {ok, {TargetArch,Bin}} + catch + error:Error -> + {error,Error,erlang:get_stacktrace()} + end + end. + +finalize_fun(MfaIcodeList, Exports, Opts) -> + case proplists:get_value(concurrent_comp, Opts) of + FalseVal when (FalseVal =:= undefined) orelse (FalseVal =:= false) -> + [finalize_fun_sequential(MFAIcode, Opts, #comp_servers{}) + || {_MFA, _Icode} = MFAIcode <- MfaIcodeList]; + TrueVal when (TrueVal =:= true) or (TrueVal =:= debug) -> + finalize_fun_concurrent(MfaIcodeList, Exports, Opts) + end. + +finalize_fun_concurrent(MfaIcodeList, Exports, Opts) -> + Self = self(), + case MfaIcodeList of + [{{M,_,_},_}|_] -> + CallGraph = hipe_icode_callgraph:construct_callgraph(MfaIcodeList), + Closures = [{MFA, true} || {MFA, Icode} <- MfaIcodeList, + hipe_icode:icode_is_closure(Icode)], + Exported = [{{M, F, A}, false} || {F, A} <- Exports], + NonEscaping = [MFA || {{_M, F, A} = MFA, Icode} <- MfaIcodeList, + not lists:member({F, A}, Exports), + not hipe_icode:icode_is_closure(Icode)], + Escaping = Closures ++ Exported, + TypeServerFun = + fun() -> + hipe_icode_coordinator:coordinate(CallGraph, Escaping, + NonEscaping, hipe_icode_type) + end, + TypeServer = spawn_link(TypeServerFun), + PPServerFun = + fun() -> + pp_server_start(Opts) + end, + PPServer = spawn_link(PPServerFun), + RangeServerFun = + fun() -> + hipe_icode_coordinator:coordinate(CallGraph, Escaping, + NonEscaping, hipe_icode_range) + end, + RangeServer = spawn_link(RangeServerFun), + Servers = #comp_servers{pp_server = PPServer, + range = RangeServer, + type = TypeServer}, + CompFuns = + [fun() -> + set_architecture(Opts), + pre_init(Opts), + init(Opts), + Self ! finalize_fun_sequential(IcodeFun, Opts, Servers) + end || IcodeFun <- MfaIcodeList], + lists:foreach(fun (F) -> spawn_link(F) end, CompFuns), + Final = [receive Res when element(1, Res) =:= MFA -> Res end + || {MFA, _} <- MfaIcodeList], + lists:foreach(fun (Pid) -> stop_and_wait(Pid) end, + [PPServer, TypeServer, RangeServer]), + Final; + [] -> + [] + end. + +stop_and_wait(Pid) -> + Pid ! {stop, self()}, + receive + _ -> ok + end. + +finalize_fun_sequential({MFA, Icode}, Opts, Servers) -> + {T1, _} = erlang:statistics(runtime), + ?when_option(verbose, Opts, ?debug_msg("Compiling ~w~n", [MFA])), + try hipe_main:compile_icode(MFA, Icode, Opts, Servers) of + {native, _Platform, {unprofiled, Code}} -> + {T2, _} = erlang:statistics(runtime), + ?when_option(verbose, Opts, + ?debug_msg("Compiled ~w in ~.2f s\n", [MFA,(T2-T1)/1000])), + {MFA, Code}; + {rtl, LinearRtl} -> + {MFA, LinearRtl} + catch + error:Error -> + ?when_option(verbose, Opts, ?debug_untagged_msg("\n", [])), + ErrorInfo = {Error, erlang:get_stacktrace()}, + ?error_msg("ERROR: ~p~n", [ErrorInfo]), + ?EXIT(ErrorInfo) + end. + +pp_server_start(Opts) -> + set_architecture(Opts), + garbage_collect(), + pp_server(). + +pp_server() -> + receive + {print, Fun} -> + Fun(), pp_server(); + {stop, Pid} -> + Pid ! {done, self()}; + _ -> + pp_server() + end. + +icode_multret(List, Mod, Opts, Exports) -> + case proplists:get_bool(icode_multret, Opts) of + true -> + hipe_icode_mulret:mult_ret(List, Mod, Opts, Exports); + false -> + List + end. + +maybe_load(Mod, Bin, WholeModule, Opts) -> + case proplists:get_bool(load, Opts) of + false -> + {module, Mod}; + true -> + ?when_option(verbose, Opts, ?debug_msg("Loading/linking\n", [])), + do_load(Mod, Bin, WholeModule) + end. + +do_load(Mod, Bin, WholeModule) -> + HostArch = get(hipe_host_arch), + TargetArch = get(hipe_target_arch), + %% Make sure we can do the load. + if HostArch =/= TargetArch -> + ?EXIT({host_and_target_arch_differ, HostArch, TargetArch}); + true -> ok + end, + case WholeModule of + false -> + %% In this case, the emulated code for the module must be loaded. + {module, Mod} = code:ensure_loaded(Mod), + code:load_native_partial(Mod, Bin); + BinCode when is_binary(BinCode) -> + case code:is_sticky(Mod) of + true -> + %% We unpack and repack the Beam binary as a workaround to + %% ensure that it is not compressed. + {ok, _, Chunks} = beam_lib:all_chunks(WholeModule), + {ok, Beam} = beam_lib:build_module(Chunks), + %% Don't purge or register sticky mods; just load native. + code:load_native_sticky(Mod, Bin, Beam); + false -> + %% Normal loading of a whole module + Architecture = erlang:system_info(hipe_architecture), + ChunkName = hipe_unified_loader:chunk_name(Architecture), + {ok, _, Chunks0} = beam_lib:all_chunks(WholeModule), + Chunks = [{ChunkName, Bin}|lists:keydelete(ChunkName, 1, Chunks0)], + {ok, BeamPlusNative} = beam_lib:build_module(Chunks), + code:load_binary(Mod, code:which(Mod), BeamPlusNative) + end + end. + +assemble(CompiledCode, Closures, Exports, Options) -> + case get(hipe_target_arch) of + ultrasparc -> + hipe_sparc_assemble:assemble(CompiledCode, Closures, Exports, Options); + powerpc -> + hipe_ppc_assemble:assemble(CompiledCode, Closures, Exports, Options); + arm -> + hipe_arm_assemble:assemble(CompiledCode, Closures, Exports, Options); + x86 -> + hipe_x86_assemble:assemble(CompiledCode, Closures, Exports, Options); + amd64 -> + hipe_amd64_assemble:assemble(CompiledCode, Closures, Exports, Options); + Arch -> + ?EXIT({executing_on_an_unsupported_architecture, Arch}) + end. + +%% -------------------------------------------------------------------- + +%% Initialise host and target architectures. Target defaults to host, +%% but can be overridden by passing an option {target, Target}. + +set_architecture(Options) -> + put(hipe_host_arch, erlang:system_info(hipe_architecture)), + put(hipe_target_arch, + proplists:get_value(target, Options, get(hipe_host_arch))), + ok. + +%% This sets up some globally accessed stuff that are needed by the +%% compiler process before it even gets the full list of options. +%% Therefore, this expands the current set of options for local use. + +pre_init(Opts) -> + Options = expand_options(Opts), + %% Initialise some counters used for measurements and benchmarking. If + %% the option 'measure_regalloc' is given the compilation will return + %% a keylist with the counter values. + put(hipe_time, + case proplists:get_value(time, Options, false) of + true -> [hipe, hipe_main]; + OptTime -> OptTime + end), + lists:foreach(fun (T) -> ?set_hipe_timer_val(T, 0) end, hipe_timers()), + lists:foreach(fun (Counter) -> + case Counter of + {CounterName, InitVal} -> put(CounterName, InitVal); + CounterName -> put(CounterName, 0) + end + end, + proplists:get_value(counters, Options, [])), + put(hipe_debug, proplists:get_bool(debug, Options)), + put(hipe_inline_fp, proplists:get_bool(inline_fp, Options)), + ok. + +%% Prepare the compiler process by setting up variables which are +%% accessed globally. Options have been fully expanded at ths point. + +init(_Options) -> + put(callersavetime, 0), + put(totalspill, {0,0}), + put(spilledtemps, 0), + put(pre_ra_instrs, 0), + put(post_ra_instrs, 0), + put(pre_ra_temps, 0), + put(post_ra_temps, 0), + put(noregs, 0), + put(bbs, 0), + ok. + +%% -------------------------------------------------------------------- + +post(Res, Icode, Options) -> + TimerVals = + case proplists:get_value(timers, Options) of + Timers when is_list(Timers) -> + [{Timer, ?get_hipe_timer_val(Timer)} || Timer <- Timers]; + _ -> [] + end, + CounterVals = + case proplists:get_value(counters, Options) of + Counters when is_list(Counters) -> + [case Counter of + {CounterName, _InitVal} -> {CounterName, get(CounterName)}; + CounterName -> {CounterName, get(CounterName)} + end + || Counter <- Counters]; + _ -> [] + end, + Measures = + case proplists:get_bool(measure_regalloc, Options) of + true -> + get(); % return whole process dictionary list (simplest way...) + false -> [] + end, + Info = TimerVals ++ CounterVals ++ Measures, + case proplists:get_bool(get_called_modules, Options) of + true -> + CalledMods = hipe_icode_callgraph:get_called_modules(Icode), + case Info of + [] -> + {Res, {called_modules, CalledMods}}; + _ -> + {Res, {info, Info}, {called_modules, CalledMods}} + end; + false -> + case Info of + [] -> + Res; + _ -> + {Res, {info, Info}} + end + end. + +%% -------------------------------------------------------------------- + +%% @doc Returns the current HiPE version as a string(). +-spec version() -> string(). + +version() -> + ?VERSION_STRING(). + +%% -------------------------------------------------------------------- +%% D O C U M E N T A T I O N - H E L P +%% -------------------------------------------------------------------- + +%% @doc Prints on-line documentation to the standard output. +-spec help() -> 'ok'. + +help() -> + M = + "The HiPE Compiler (Version " ++ ?VERSION_STRING() ++ ")\n" ++ + "\n" ++ + " The normal way to native-compile Erlang code using HiPE is to\n" ++ + " include `native' in the Erlang compiler options, as in:\n" ++ + " 1> c(my_module, [native]).\n" ++ + " Options to the HiPE compiler must then be passed as follows:\n" ++ + " 1> c(my_module, [native,{hipe,Options}]).\n" ++ + " Use `help_options()' for details.\n" ++ + "\n" ++ + " Utility functions:\n" ++ + " help()\n" ++ + " Prints this message.\n" ++ + " help_options()\n" ++ + " Prints a description of options recognized by the\n" ++ + " HiPE compiler.\n" ++ + " help_option(Option)\n" ++ + " Prints a description of that option.\n" ++ + " help_debug_options()\n" ++ + " Prints a description of debug options.\n" ++ + " version() ->\n" ++ + " Returns the HiPE version as a string'.\n" ++ + "\n" ++ + " For HiPE developers only:\n" ++ + " Use `help_hiper()' for information about HiPE's low-level interface\n", + io:put_chars(M), + ok. + +-spec help_hiper() -> 'ok'. + +help_hiper() -> + M = + " This interface is supposed to be used by HiPE-developers only!\n" ++ + " Note that all options are specific to the HiPE compiler.\n" ++ + " c(Name,Options)\n" ++ + " Compiles the module or function Name and loads it\n" ++ + " to memory. Name is an atom or a tuple {M,F,A}.\n" ++ + " c(Name)\n" ++ + " As above, but using only default options.\n" ++ + " f(File,Options)\n" ++ + " As c(Name,File,Options), but taking the module name\n" ++ + " from File.\n" ++ + " f(File)\n" ++ + " As above, but using only default options.\n" ++ + " compile(Name,Options)\n" ++ + " Compiles the module or function Name to a binary.\n" ++ + " By default, this does not load to memory.\n" ++ + " compile(Name)\n" ++ + " As above, but using only default options.\n" ++ + " file(File,Options)\n" ++ + " As compile(Name,File,Options), but taking the\n" ++ + " module name from File.\n" ++ + " file(File)\n" ++ + " As above, but using only default options.\n" ++ + " load(Module)\n" ++ + " Loads the named module into memory.\n", + io:put_chars(M), + ok. + +%% TODO: it should be possible to specify the target somehow when asking +%% for available options. Right now, you only see host machine options. + +%% @doc Prints documentation about options to the standard output. +-spec help_options() -> 'ok'. + +help_options() -> + set_architecture([]), %% needed for target-specific option expansion + O1 = expand_options([o1]), + O2 = expand_options([o2]), + O3 = expand_options([o3]), + io:format("HiPE Compiler Options\n" ++ + " Boolean-valued options generally have corresponding " ++ + "aliases `no_...',\n" ++ + " and can also be specified as `{Option, true}' " ++ + "or `{Option, false}.\n\n" ++ + " General boolean options:\n" ++ + " ~p.\n\n" ++ + " Non-boolean options:\n" ++ + " o#, where 0 =< # =< 3:\n" ++ + " Select optimization level (the default is 2).\n\n" ++ + " Further options can be found below; " ++ + "use `hipe:help_option(Name)' for details.\n\n" ++ + " Aliases:\n" ++ + " pp_all = ~p,\n" ++ + " pp_sparc = pp_native,\n" ++ + " pp_x86 = pp_native,\n" ++ + " pp_amd64 = pp_native,\n" ++ + " pp_ppc = pp_native,\n" ++ + " o0,\n" ++ + " o1 = ~p,\n" ++ + " o2 = ~p ++ o1,\n" ++ + " o3 = ~p ++ o2.\n", + [ordsets:from_list([verbose, debug, time, load, pp_beam, + pp_icode, pp_rtl, pp_native, pp_asm, + timeout]), + expand_options([pp_all]), + O1 -- [o1], + (O2 -- O1) -- [o2], + (O3 -- O2) -- [o3]]), + ok. + +%% Documentation of the individual options. +%% If you add an option, please add help-text here. + +-spec option_text(atom()) -> string(). + +option_text('O') -> + "Specify optimization level. Used as o1, o2, o3.\n" ++ + " At the moment levels 0 - 3 are implemented.\n" ++ + " Aliases: 'O1', 'O2', O3'."; +option_text(caller_save_spill_restore) -> + "Activates caller save register spills and restores"; +option_text(debug) -> + "Outputs internal debugging information during compilation"; +option_text(icode_range) -> + "Performs integer range analysis on the Icode level"; +option_text(icode_ssa_check) -> + "Checks whether Icode is on SSA form or not\n"; +option_text(icode_ssa_copy_prop) -> + "Performs copy propagation on Icode SSA"; +option_text(icode_ssa_const_prop) -> + "Performs sparse conditional constant propagation on Icode SSA"; +option_text(icode_ssa_struct_reuse) -> + "Factors out common tuple and list constructions on Icode SSA"; +option_text(icode_type) -> + "Performs type analysis on the Icode level" ++ + "and then simplifies the code based on the results of this analysis"; +option_text(load) -> + "Automatically load the produced native code into memory"; +option_text(peephole) -> + "Enables peephole optimizations"; +option_text(pmatch) -> + "Enables pattern matching compilation when compiling from Core; " ++ + "has no effect when compiling from BEAM bytecode"; +option_text(pp_asm) -> + "Displays assembly listing with addresses and bytecode\n" ++ + "Currently available for x86 only"; +option_text(pp_beam) -> + "Display the input BEAM code"; +option_text(pp_icode) -> + "Display the intermediate HiPE-ICode"; +option_text(pp_rtl) -> + "Display the intermediate HiPE-RTL code"; +option_text(pp_rtl_lcm) -> + "Display the intermediate HiPE-RTL lazy code motion sets"; +option_text(pp_rtl_ssapre) -> + "Display the intermediate HiPE-RTL A-SSAPRE sets"; +option_text(pp_native) -> + "Display the generated (back-end specific) native code"; +option_text(regalloc) -> + "Select register allocation algorithm. Used as {regalloc, METHOD}.\n" ++ + " Currently available methods:\n" ++ + " naive - spills everything (for debugging and testing)\n" ++ + " linear_scan - fast; not so good if few registers available\n" ++ + " graph_color - slow, but gives OK performance\n" ++ + " coalescing - slower, tries hard to use registers\n" ++ + " optimistic - another variant of a coalescing allocator"; +option_text(remove_comments) -> + "Strip comments from intermediate code"; +option_text(rtl_ssa) -> + "Perform SSA conversion on the RTL level -- default starting at O2"; +option_text(rtl_ssa_const_prop) -> + "Performs sparse conditional constant propagation on RTL SSA"; +option_text(rtl_lcm) -> + "Perform Lazy Code Motion on RTL"; +option_text(rtl_ssapre) -> + "Perform A-SSAPRE on RTL"; +option_text(time) -> + "Reports the compilation times for the different stages\n" ++ + "of the compiler.\n" ++ + " {time, Module} reports timings for the module Module.\n" ++ + " {time, [M1, M2, M3]} reports timings for the specified modules.\n" ++ + " {time, all} reports timings all modules.\n" ++ + " time reports timings for the main module.\n"; +option_text(timeout) -> + "Specify compilation time limit in ms. Used as {timeout, LIMIT}.\n" ++ + " The limit must be a non-negative integer or the atom 'infinity'.\n" ++ + " The current default limit is 15 minutes (900000 ms)."; +option_text(use_indexing) -> + "Use indexing for multiple-choice branch selection."; +option_text(use_callgraph) -> + "Compile the functions in a module according to a reversed topological " ++ + "sorted order to gain more information when using a persistent lookup " ++ + "table for storing intra-modular type information."; +option_text(verbose) -> + "Output information about what is being done"; +option_text(Opt) when is_atom(Opt) -> + "". + +%% @doc Prints documentation about a specific option to the standard output. +-spec help_option(comp_option()) -> 'ok'. + +help_option(Opt) -> + set_architecture([]), %% needed for target-specific option expansion + case expand_options([Opt]) of + [Opt] -> + Name = if is_atom(Opt) -> Opt; + tuple_size(Opt) =:= 2 -> element(1, Opt) + end, + case option_text(Name) of + "" -> + case lists:member(Name, opt_keys()) of + true -> + io:format("~w - Sorry, this option is not documented yet.\n", + [Name]); + _ -> + io:format("Unknown option ~p.\n", [Name]) + end; + Txt -> + io:fwrite("~w - ~s\n", [Name, Txt]) + end; + Opts -> + io:fwrite("This is an alias for: ~p.\n", [Opts]) + end, + ok. + +%% @doc Prints documentation about debugging options to the standard +%% output. +-spec help_debug_options() -> 'ok'. + +help_debug_options() -> + io:format("HiPE compiler debug options:\n" ++ + " Might require that some modules have been compiled " ++ + "with the debug flag.\n" ++ + " rtl_show_translation - Prints each step in the\n" ++ + " translation from Icode to RTL\n", + []), + ok. + +hipe_timers() -> + [time_ra]. + +%% ____________________________________________________________________ +%% +%% Option expansion + +%% These are currently in use, but not documented: +%% +%% count_instrs: +%% icode_type: +%% icode_range: +%% {ls_order, Order}: +%% {regalloc, Algorithm}: +%% remove_comments +%% timeregalloc: +%% timers +%% use_indexing + +%% Valid option keys. (Don't list aliases or negations - the check is +%% done after the options have been expanded to normal form.) + +opt_keys() -> + [ + binary_opt, + bitlevel_binaries, + caller_save_spill_restore, + concurrent_comp, + core, + core_transform, + counters, + count_instrs, + count_spills, + count_temps, + debug, + get_called_modules, + split_arith, + split_arith_unsafe, + icode_inline_bifs, + icode_ssa_check, + icode_ssa_copy_prop, + icode_ssa_const_prop, + icode_ssa_struct_reuse, + icode_type, + icode_range, + icode_multret, + inline_fp, + ls_order, + load, + measure_regalloc, + peephole, + pmatch, + pp_asm, + pp_beam, + pp_icode, + pp_icode_ssa, + pp_icode_split_arith, + pp_opt_icode, + pp_range_icode, + pp_typed_icode, + pp_icode_liveness, + pp_native, + pp_rtl, + pp_rtl_liveness, + pp_rtl_ssa, + pp_rtl_lcm, + pp_rtl_ssapre, + pp_rtl_linear, + regalloc, + remove_comments, + rtl_ssa, + rtl_ssa_const_prop, + rtl_lcm, + rtl_ssapre, + rtl_show_translation, + spillmin_color, + target, + time, + timeout, + timeregalloc, + timers, + to_rtl, + use_indexing, + use_inline_atom_search, + use_callgraph, + use_clusters, + use_jumptable, + verbose, + %% verbose_spills, + x87]. + +%% Definitions: + +o1_opts() -> + Common = [inline_fp, pmatch, peephole], + case get(hipe_target_arch) of + ultrasparc -> + Common; + powerpc -> + Common; + arm -> + Common -- [inline_fp]; % Pointless optimising for absent hardware + x86 -> + [x87 | Common]; % XXX: Temporary until x86 has sse2 + amd64 -> + Common; + Arch -> + ?EXIT({executing_on_an_unsupported_architecture,Arch}) + end. + +o2_opts() -> + Common = [icode_ssa_const_prop, icode_ssa_copy_prop, % icode_ssa_struct_reuse, + icode_type, icode_inline_bifs, rtl_lcm, + rtl_ssa, rtl_ssa_const_prop, + spillmin_color, use_indexing, remove_comments, + concurrent_comp, binary_opt | o1_opts()], + case get(hipe_target_arch) of + ultrasparc -> + Common; + powerpc -> + Common; + arm -> + Common; + x86 -> + Common; + % [rtl_ssapre | Common]; + amd64 -> + [icode_range | Common]; % range analysis is effective on 64 bits + Arch -> + ?EXIT({executing_on_an_unsupported_architecture,Arch}) + end. + +o3_opts() -> + Common = [icode_range, {regalloc,coalescing} | o2_opts()], + case get(hipe_target_arch) of + ultrasparc -> + Common; + powerpc -> + Common; + arm -> + Common; + x86 -> + Common; + amd64 -> + Common; + Arch -> + ?EXIT({executing_on_an_unsupported_architecture,Arch}) + end. + +%% Note that in general, the normal form for options should be positive. +%% This is a good programming convention, so that tests in the code say +%% "if 'x' ..." instead of "if not 'no_x' ...". + +opt_negations() -> + [{no_binary_opt, binary_opt}, + {no_bitlevel_binaries, bitlevel_binaries}, + {no_core, core}, + {no_debug, debug}, + {no_get_called_modules, get_called_modules}, + {no_split_arith, split_arith}, + {no_concurrent_comp, concurrent_comp}, + {no_icode_inline_bifs, icode_inline_bifs}, + {no_icode_range, icode_range}, + {no_icode_split_arith, icode_split_arith}, + {no_icode_ssa_check, icode_ssa_check}, + {no_icode_ssa_copy_prop, icode_ssa_copy_prop}, + {no_icode_ssa_const_prop, icode_ssa_const_prop}, + {no_icode_ssa_struct_reuse, icode_ssa_struct_reuse}, + {no_icode_type, icode_type}, + {no_inline_fp, inline_fp}, + {no_load, load}, + {no_peephole, peephole}, + {no_pmatch, pmatch}, + {no_pp_beam, pp_beam}, + {no_pp_icode, pp_icode}, + {no_pp_icode_ssa, pp_icode_ssa}, + {no_pp_opt_icode, pp_opt_icode}, + {no_pp_typed_icode, pp_typed_icode}, + {no_pp_rtl, pp_rtl}, + {no_pp_native, pp_native}, + {no_pp_rtl_lcm, pp_rtl_lcm}, + {no_pp_rtl_ssapre, pp_rtl_ssapre}, + {no_remove_comments, remove_comments}, + {no_rtl_ssa, rtl_ssa}, + {no_rtl_ssa_const_prop, rtl_ssa_const_prop}, + {no_rtl_lcm, rtl_lcm}, + {no_rtl_ssapre, rtl_ssapre}, + {no_rtl_show_translation, rtl_show_translation}, + {no_time, time}, + {no_use_callgraph, use_callgraph}, + {no_use_clusters, use_clusters}, + {no_use_inline_atom_search, use_inline_atom_search}, + {no_use_indexing, use_indexing}]. + +%% Don't use negative forms in right-hand sides of aliases and expansions! +%% We only expand negations once, before the other expansions are done. + +opt_aliases() -> + [{'O0', o0}, + {'O1', o1}, + {'O2', o2}, + {'O3', o3}, + {pp_sparc, pp_native}, + {pp_x86, pp_native}, + {pp_amd64, pp_native}, + {pp_ppc, pp_native}]. + +opt_basic_expansions() -> + [{pp_all, [pp_beam, pp_icode, pp_rtl, pp_native]}]. + +opt_expansions() -> + [{o1, o1_opts()}, + {o2, o2_opts()}, + {o3, o3_opts()}, + {x87, [x87, inline_fp]}, + {inline_fp, case get(hipe_target_arch) of %% XXX: Temporary until x86 + x86 -> [x87, inline_fp]; %% has sse2 + _ -> [inline_fp] end}]. + +%% This expands "basic" options, which may be tested early and cannot be +%% in conflict with options found in the source code. + +-spec expand_basic_options(comp_options()) -> comp_options(). + +expand_basic_options(Opts) -> + proplists:normalize(Opts, [{negations, opt_negations()}, + {aliases, opt_aliases()}, + {expand, opt_basic_expansions()}]). + +-spec expand_kt2(comp_options()) -> comp_options(). + +expand_kt2(Opts) -> + proplists:normalize(Opts, [{expand, [{kt2_type, + [{use_callgraph, fixpoint}, core, + {core_transform, cerl_typean}]}]}]). + +%% Note that set_architecture/1 must be called first, and that the given +%% list should contain the total set of options, since things like 'o2' +%% are expanded here. Basic expansions are processed here also, since +%% this function is called from the help functions. + +-spec expand_options(comp_options()) -> comp_options(). + +expand_options(Opts) -> + proplists:normalize(Opts, [{negations, opt_negations()}, + {aliases, opt_aliases()}, + {expand, opt_basic_expansions()}, + {expand, opt_expansions()}]). + +-spec check_options(comp_options()) -> 'ok'. + +check_options(Opts) -> + Keys = ordsets:from_list(opt_keys()), + Used = ordsets:from_list(proplists:get_keys(Opts)), + case ordsets:subtract(Used, Keys) of + [] -> + ok; + L -> + ?WARNING_MSG("Unknown options: ~p.\n", [L]), + ok + end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% diff --git a/lib/hipe/main/hipe.hrl.src b/lib/hipe/main/hipe.hrl.src new file mode 100644 index 0000000000..a1fbeda9cf --- /dev/null +++ b/lib/hipe/main/hipe.hrl.src @@ -0,0 +1,322 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2001-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Filename : hipe.hrl (automatically generated by hipe.hrl.src) +%% Purpose : Defines some useful macros for debugging and error +%% reporting. +%% +%% History : * 2000-11-03 Erik Johansson (happi@it.uu.se): Created. +%% ==================================================================== +%% +%% Defines: +%% msg/2 - Works like io:format but prepends +%% ?MSGTAG to the message. +%% If LOGGING is defined then error_logger is used, +%% or rather its substitute in code_server. +%% untagged_msg/2 - Like msg/2 but without the tag. +%% WARNING_MSG/2 - Prints a tagged warning. +%% error_msg/2 - Logs a tagged error. +%% debug_msg/2 - Prints a tagged msg if DEBUG is defined. +%% IF_DEBUG(A,B) - Executes A if DEBUG is defined B otherwise. +%% IF_DEBUG(Lvl,A,B) - Executes A if DEBUG is defined to a value >= Lvl +%% otherwise B is executed. +%% EXIT - Exits with added module and line info. +%% ASSERT - Exits if the expresion does not evaluate to true. +%% VERBOSE_ASSSERT - A message is printed even when an asertion is true. +%% TIME_STMNT(Stmnt, String, FreeVar) +%% - Times the statemnet Stmnt if TIMING is on. +%% The execution time is bound to FreeVar. +%% String is printed after the execution +%% followed by the execution time in seconds and +%% a newline. +%% +%% Flags: +%% DEBUG - Turns on debugging. (Can be defined to a integer +%% value to determine the level of debugging) +%% VERBOSE - More info is printed... +%% HIPE_LOGGING - Turn on logging of messages with erl_logger. +%% DO_ASSERT - Turn on Assertions. +%% TIMING - Turn on timing. +%% HIPE_INSTRUMENT_COMPILER - Turn on instrumentation of the compiler. +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +-define(VERSION_STRING(),"%VSN%"). +-define(MSGTAG, " "). + +%% +%% Define the message macros with or without logging, +%% depending on the value of the HIPE_LOGGING flag. +%% + +-ifdef(HIPE_LOGGING). +-define(msg(Msg, Args), + code_server:info_msg(?MSGTAG ++ Msg, Args)). +-define(untagged_msg(Msg, Args), + code_server:info_msg(Msg, Args)). +-else. +-define(msg(Msg, Args), + io:format(?MSGTAG ++ Msg, Args)). +-define(untagged_msg(Msg, Args), + io:format(Msg, Args)). +-endif. + +%% +%% Define error and warning messages. +%% +-define(error_msg(Msg, Args), + code_server:error_msg(?MSGTAG ++ + "Error: [~s:~w]: " ++ Msg, + [?MODULE,?LINE|Args])). +-define(WARNING_MSG(Msg, Args), + ?msg("Warning: [~s:~w]: " ++ Msg, [?MODULE,?LINE|Args])). + +%% +%% Define the macros that are dependent on the debug flag. +%% + +-ifdef(DEBUG). +-define(debug_msg(Msg,Data), ?msg(Msg,Data)). +-define(debug_untagged_msg(Msg,Data), ?untagged_msg(Msg,Data)). +-define(IF_DEBUG(DebugAction,NoDebugAction), DebugAction). +-define(IF_DEBUG_LEVEL(Level,DebugAction,NoDebugAction), + if (Level =< ?DEBUG) -> DebugAction; true -> NoDebugAction end). +-else. +-define(debug_msg(Msg,Data), no_debug). +-define(debug_untagged_msg(Msg,Data), no_debug). +-define(IF_DEBUG(DebugAction,NoDebugAction), NoDebugAction). +-define(IF_DEBUG_LEVEL(Level,DebugAction,NoDebugAction), NoDebugAction). +-endif. + +%% +%% Define the exit macro +%% +-ifdef(VERBOSE). +-define(EXIT(Reason), erlang:error({?MODULE,?LINE,Reason})). +-else. +-define(EXIT(Reason), + ?msg("EXITED with reason ~w @~w:~w\n", [Reason,?MODULE,?LINE]), + erlang:error({?MODULE,?LINE,Reason})). +-endif. + +%% +%% Assertions. +%% +-ifdef(DO_ASSERT). +-define(VERBOSE_ASSERT(X), + case X of + true -> + io:format("Assertion ok ~w ~w\n",[?MODULE,?LINE]), + true; + __ASSVAL_R -> + io:format("Assertion failed ~w ~w: ~p\n", + [?MODULE,?LINE, __ASSVAL_R]), + ?EXIT(assertion_failed) + end). +-define(ASSERT(X), + case X of + true -> true; + _ -> ?EXIT(assertion_failed) + end). +-else. +-define(ASSERT(X),true). +-define(VERBOSE_ASSERT(X),true). +-endif. + + +%% Use this to display info, save stuff and so on. +%% Vars cannot be exported from __Action +-define(when_option(__Opt,__Opts,__Action), + case proplists:get_bool(__Opt,__Opts) of + true -> __Action; + false -> ok + end). + +%% Timing macros + +-ifdef(TIMING). +-define(TIME_STMNT(STMNT,Msg,Timer), + Timer = hipe_timing:start_timer(), + STMNT, + ?untagged_msg(Msg ++ "~.2f s\n",[hipe_timing:stop_timer(Timer)/1000])). +-else. +-define(TIME_STMNT(STMNT,Msg,Timer),STMNT). +-endif. + +-define(start_timer(Text), hipe_timing:start(Text, ?MODULE)). +-define(stop_timer(Text), hipe_timing:stop(Text, ?MODULE)). +-define(start_hipe_timer(Timer), hipe_timing:start_hipe_timer(Timer)). +-define(stop_hipe_timer(Timer), hipe_timing:stop_hipe_timer(Timer)). +-define(get_hipe_timer_val(Timer), get(Timer)). +-define(set_hipe_timer_val(Timer, Val), put(Timer, Val)). +-define(option_time(Stmnt, Text, Options), + if true -> ?when_option(time, Options, ?start_timer(Text)), + fun(R) -> + ?when_option(time, Options, ?stop_timer(Text)), + R + end(Stmnt)end). + +-define(option_start_time(Text,Options), + ?when_option(time, Options, ?start_timer(Text))). + +-define(option_stop_time(Text,Options), + ?when_option(time, Options, ?stop_timer(Text))). + +-define(opt_start_timer(Text), + hipe_timing:start_optional_timer(Text,?MODULE)). +-define(opt_stop_timer(Text), + hipe_timing:stop_optional_timer(Text,?MODULE)). + +%% +%% Turn on instrumentation of the compiler. +%% +-ifdef(HIPE_INSTRUMENT_COMPILER). + +-define(count_pre_ra_instructions(Options, NoInstrs), + ?when_option(count_instrs, Options, + put(pre_ra_instrs, + get(pre_ra_instrs)+ NoInstrs))). +-define(count_post_ra_instructions(Options, NoInstrs), + ?when_option(count_instrs, Options, + put(post_ra_instrs, + get(post_ra_instrs)+ NoInstrs))). + +-define(start_time_regalloc(Options), + ?when_option(timeregalloc, Options, + put(regalloctime1,erlang:statistics(runtime)))). +-define(stop_time_regalloc(Options), + ?when_option(timeregalloc, Options, + put(regalloctime, + get(regalloctime) + + (element(1,erlang:statistics(runtime)) + -element(1,get(regalloctime1)))))). +-define(start_time_caller_saves(Options), + ?when_option(timeregalloc, Options, + put(callersavetime1,erlang:statistics(runtime)))). +-define(stop_time_caller_saves(Options), + ?when_option(timeregalloc, Options, + put(callersavetime, + get(callersavetime) + + (element(1,erlang:statistics(runtime)) + -element(1,get(callersavetime1)))))). + +-define(count_pre_ra_temps(Options, NoTemps), + ?when_option(count_temps, Options, + put(pre_ra_temps, + get(pre_ra_temps)+ NoTemps))). +-define(count_post_ra_temps(Options, NoTemps), + ?when_option(count_temps, Options, + put(post_ra_temps, + get(post_ra_temps)+ NoTemps))). + +-define(inc_counter(Counter, Val), + case get(Counter) of + undefined -> true; + _ -> put(Counter, Val + get(Counter)) + end). + +-define(cons_counter(Counter, Val), + case get(Counter) of + undefined -> true; + _ -> put(Counter, [Val|get(Counter)]) + end). + +-define(update_counter(Counter, Val, Op), + case get(Counter) of + undefined -> true; + _ -> put(Counter, get(Counter) Op Val) + end). + +-define(start_ra_instrumentation(Options, NoInstrs, NoTemps), + begin + ?count_pre_ra_instructions(Options, NoInstrs), + ?count_pre_ra_temps(Options, NoTemps), + case get(counter_mem_temps) of + undefined -> true; + _ -> put(counter_mfa_mem_temps,[]) + end, + ?start_time_regalloc(Options) + end). +-define(stop_ra_instrumentation(Options, NoInstrs, NoTemps), + begin + ?stop_time_regalloc(Options), + ?count_post_ra_instructions(Options, NoInstrs), + ?cons_counter(counter_mem_temps, get(counter_mfa_mem_temps)), + ?cons_counter(ra_all_iterations_counter, get(ra_iteration_counter)), + put(ra_iteration_counter,0), + ?count_post_ra_temps(Options, NoTemps) + end). + +-define(add_spills(Options, NoSpills), + ?when_option(count_spills, Options, + put(spilledtemps, get(spilledtemps) + NoSpills))). + +-define(optional_start_timer(Timer, Options), + case lists:member(Timer, proplists:get_value(timers,Options++[{timers,[]}])) of + true -> ?start_hipe_timer(Timer); + false -> true + end). +-define(optional_stop_timer(Timer, Options), + case lists:member(Timer, proplists:get_value(timers,Options++[{timers,[]}])) of + true -> ?stop_hipe_timer(Timer); + false -> true + end). + +-else. %% HIPE_INSTRUMENT_COMPILER + +-define(count_pre_ra_instructions(Options, NoInstrs), no_instrumentation). +-define(count_post_ra_instructions(Options, NoInstrs),no_instrumentation). +-define(start_time_regalloc(Options), no_instrumentation). +-define(stop_time_regalloc(Options), no_instrumentation). +-define(start_time_caller_saves(Options), no_instrumentation). +-define(stop_time_caller_saves(Options), no_instrumentation). +-define(count_pre_ra_temps(Options, NoTemps), no_instrumentation). +-define(count_post_ra_temps(Options, NoTemps), no_instrumentation). +-define(start_ra_instrumentation(Options, NoInstrs, NoTemps),no_instrumentation). +-define(stop_ra_instrumentation(Options, NoInstrs, NoTemps),no_instrumentation). +-define(add_spills(Options, NoSpills), no_instrumentation). +-define(optional_start_timer(Options, Timer), no_instrumentation). +-define(optional_stop_timer(Options, Timer), no_instrumentation). +-define(inc_counter(Counter, Val), no_instrumentation). +-define(update_counter(Counter, Val, Op), no_instrumentation). +-define(cons_counter(Counter, Val), no_instrumentation). + +-endif. %% HIPE_INSTRUMENT_COMPILER + +%%---------------------------------------------------------------------------- +%% Records defined in the hipe module used in other parts of the compiler +%%---------------------------------------------------------------------------- + +-record(comp_servers, {pp_server :: pid(), range :: pid(), type :: pid()}). + +%%---------------------------------------------------------------------------- +%% Basic types of the 'hipe' application used in other parts of the system +%%---------------------------------------------------------------------------- + +-type comp_option() :: atom() | {atom(), atom()}. +-type comp_options() :: [comp_option()]. + +-type hipe_architecture() :: + 'amd64' | 'arm' | 'powerpc' | 'ppc64' | 'ultrasparc' | 'x86'. + +-type hipe_map() :: [{non_neg_integer(), + 'unknown' | {'reg' | 'fp_reg' | 'spill', + non_neg_integer()}}]. +-type hipe_temp_map() :: tuple(). +-type hipe_spill_map() :: [{non_neg_integer(), {'spill',non_neg_integer()}}]. diff --git a/lib/hipe/main/hipe_main.erl b/lib/hipe/main/hipe_main.erl new file mode 100644 index 0000000000..fe9bc83fd2 --- /dev/null +++ b/lib/hipe/main/hipe_main.erl @@ -0,0 +1,549 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2001-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%% @doc This is the HiPE compiler's main "loop". +%% +%%

Purpose

+%% +%%

This module provides code which compiles a single Erlang +%% function, represented as linear ICode all the way down to a linear +%% native code representation (which depends on the 'hipe_target_arch' +%% global variable).

+%% +%% @end +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%===================================================================== + +-module(hipe_main). +-export([compile_icode/4]). + +%%===================================================================== + +-ifndef(DEBUG). +-define(DEBUG,1). +-endif. + +-define(HIPE_INSTRUMENT_COMPILER, true). %% Turn on instrumentation. + +-include("hipe.hrl"). +-include("../icode/hipe_icode.hrl"). +%%-include("../rtl/hipe_rtl.hrl"). + +%%===================================================================== + +-type comp_icode_ret() :: {'native',hipe_architecture(),{'unprofiled',_}} + | {'rtl',tuple()}. + +%%===================================================================== + +%% @spec compile_icode(MFA::mfa(), +%% LinearIcode::#icode{}, +%% CompilerOptions::comp_options(), +%% CompServers::#comp_servers()) -> +%% {native,Platform,{unprofiled,NativeCode}} | {rtl,RTLCode} +%% +%% @doc Compiles the Icode (in linear form) of a single MFA down to +%% native code for the platform of the target architecture. +%% CompilerOptions influence the steps of this compilation process. +%% +%%

In particular, the compiler option 'to_rtl' stops +%% compilation after translation to RTL (in which case RTL code is +%% generated). The compiler options must have already been expanded +%% (cf. `hipe:expand_options').

+ +-spec compile_icode(mfa(), #icode{}, comp_options(), #comp_servers{}) -> + comp_icode_ret(). + +compile_icode(MFA, LinearIcode, Options, Servers) -> + compile_icode(MFA, LinearIcode, Options, Servers, get(hipe_debug)). + +%%-------------------------------------------------------------------- +%% +%% The following constraints apply to the passes on Icode: +%% +%% 1. The no_comment pass must be done on linear form; +%% +%% 2. linear_to_cfg, which turns linear form into a CFG, must be +%% performed before any of the passes on CFG form; +%% +%% 3. handle_exceptions must be performed before icode_ssa; +%% +%% 4. split_arith should be performed after icode_ssa for +%% effectiveness reasons (and perhaps to work at all); +%% +%% 5. remove_trivial_bbs should be performed last to tidy up the CFG. +%% +%%--------------------------------------------------------------------- + +compile_icode(MFA, LinearIcode0, Options, Servers, DebugState) -> + %% Set up gensym with the right ranges for this function. + {LMin,LMax} = hipe_icode:icode_label_range(LinearIcode0), + hipe_gensym:set_label_range(icode, LMin, LMax+1), + {VMin,VMax} = hipe_icode:icode_var_range(LinearIcode0), + hipe_gensym:set_var_range(icode, VMin, VMax+1), + %%hipe_icode_pp:pp(LinearIcode0), + ?opt_start_timer("Icode"), + LinearIcode1 = icode_no_comment(LinearIcode0, Options), + IcodeCfg0 = icode_linear_to_cfg(LinearIcode1, Options), + %%hipe_icode_cfg:pp(IcodeCfg1), + IcodeCfg1 = icode_handle_exceptions(IcodeCfg0, MFA, Options), + IcodeCfg3 = icode_inline_bifs(IcodeCfg1, Options), + pp(IcodeCfg3, MFA, icode, pp_icode, Options, Servers), + IcodeCfg4 = icode_ssa(IcodeCfg3, MFA, Options, Servers), + IcodeCfg5 = icode_split_arith(IcodeCfg4, MFA, Options), + pp(IcodeCfg5, MFA, icode, pp_icode_split_arith, Options, Servers), + IcodeCfg6 = icode_heap_test(IcodeCfg5, Options), + IcodeCfg7 = icode_remove_trivial_bbs(IcodeCfg6, Options), + pp(IcodeCfg7, MFA, icode, pp_opt_icode, Options, Servers), + pp(IcodeCfg7, MFA, icode_liveness, pp_icode_liveness, Options, Servers), + FinalIcode = hipe_icode_cfg:cfg_to_linear(IcodeCfg7), + ?opt_stop_timer("Icode"), + LinearRTL = ?option_time(icode_to_rtl(MFA,FinalIcode,Options, Servers), + "RTL", Options), + case proplists:get_bool(to_rtl, Options) of + false -> + rtl_to_native(MFA, LinearRTL, Options, DebugState); + true -> + put(hipe_debug, DebugState), + {rtl, LinearRTL} + end. + +%%---------------------------------------------------------------- +%% +%% Icode passes +%% +%%---------------------------------------------------------------- + +icode_no_comment(LinearIcode, Options) -> + case proplists:get_bool(remove_comments, Options) of + true -> + ?option_time(hipe_icode:strip_comments(LinearIcode), + "Icode remove comments", Options); + _ -> + LinearIcode + end. + +icode_linear_to_cfg(LinearIcode, Options) -> + ?option_time(hipe_icode_cfg:linear_to_cfg(LinearIcode), + "transform linear Icode to CFG", Options). + +icode_ssa_binary_pass(IcodeSSA, Options) -> + case proplists:get_bool(binary_opt, Options) of + true -> + ?option_time(hipe_icode_bincomp:cfg(IcodeSSA), + "Icode binary pass", Options); + false -> + IcodeSSA + end. + +icode_handle_exceptions(IcodeCfg, MFA, Options) -> + debug("Icode fix catches: ~w~n", [MFA], Options), + ?option_time(hipe_icode_exceptions:fix_catches(IcodeCfg), + "Icode fix catches", Options). + +icode_inline_bifs(IcodeCfg, Options) -> + case proplists:get_bool(icode_inline_bifs, Options) of + true -> + ?option_time(hipe_icode_inline_bifs:cfg(IcodeCfg), + "Icode inline bifs", Options); + false -> + IcodeCfg + end. + +%%--------------------------------------------------------------------- + +icode_split_arith(IcodeCfg, MFA, Options) -> + case proplists:get_bool(split_arith, Options) orelse + proplists:get_bool(split_arith_unsafe, Options) of + true -> + ?option_time(hipe_icode_split_arith:cfg(IcodeCfg, MFA, Options), + "Icode split arith", Options); + false -> + IcodeCfg + end. + +icode_heap_test(IcodeCfg, Options) -> + ?option_time(hipe_icode_heap_test:cfg(IcodeCfg), + "Icode heap_test", Options). + +icode_remove_trivial_bbs(IcodeCfg, Options) -> + ?option_time(hipe_icode_cfg:remove_trivial_bbs(IcodeCfg), + "Icode trivial BB removal", Options). + +pp(Cfg, MFA, Level, PrintOption, Options, Servers) -> + perform_io(pp_fun(Cfg, MFA, get_pp_module(Level), + proplists:get_value(PrintOption, Options)), + Servers#comp_servers.pp_server). + +pp_fun(Cfg, MFA, PP, PrintOptionValue) -> + case PrintOptionValue of + true -> + fun() -> PP:pp(Cfg) end; + {only, Lst} when is_list(Lst) -> + case lists:member(MFA, Lst) of + true -> + fun() -> PP:pp(Cfg) end; + false -> + no_fun + end; + {only, MFA} -> + fun() -> PP:pp(Cfg) end; + {file, FileName} -> + fun() -> + {ok, File} = file:open(FileName, [write,append]), + PP:pp(File, Cfg), + file:close(File) + end; + _ -> + no_fun + end. + +get_pp_module(icode) -> hipe_icode_cfg; +get_pp_module(rtl) -> hipe_rtl_cfg; +get_pp_module(rtl_linear) -> hipe_rtl; +get_pp_module(icode_liveness) -> hipe_icode_liveness; +get_pp_module(rtl_liveness) -> hipe_rtl_liveness. + +perform_io(no_fun, _) -> ok; +perform_io(Fun,PPServer) when is_pid(PPServer) -> + PPServer ! {print,Fun}; +perform_io(Fun, undefined) -> + Fun(). + + +%%-------------------------------------------------------------------- +%% +%% Icode passes on SSA form. The following constraints are applicable: +%% +%% 1. ssa_convert must be first and ssa_unconvert last +%% +%% 2. ssa_dead_code must be run after the other passes +%% +%% 3. The present order was chosen to maximize effectiveness as +%% ssa_const_prop might make ssa_type_info more effective +%% +%% 4. ssa_check could be put in between all passes to make sure that +%% they preserve SSA-ness +%% +%%--------------------------------------------------------------------- + +icode_ssa(IcodeCfg0, MFA, Options, Servers) -> + ?opt_start_timer("Icode SSA passes"), + IcodeSSA0 = icode_ssa_convert(IcodeCfg0, Options), + pp(IcodeSSA0, MFA, icode, pp_icode_ssa, Options, Servers), + IcodeSSA1 = icode_ssa_const_prop(IcodeSSA0, Options), + IcodeSSA2 = icode_ssa_dead_code_elimination(IcodeSSA1, Options), + IcodeSSA3 = icode_ssa_copy_prop(IcodeSSA2, Options), + IcodeSSA3a = icode_ssa_binary_pass(IcodeSSA3, Options), + IcodeSSA4 = icode_ssa_type(IcodeSSA3a, MFA, Options, Servers), + IcodeSSA5 = icode_ssa_dead_code_elimination(IcodeSSA4, Options), + IcodeSSA6 = icode_ssa_struct_reuse(IcodeSSA5, Options), + icode_ssa_check(IcodeSSA6, Options), %% just for sanity + pp(IcodeSSA6, MFA, icode, pp_icode_ssa, Options, Servers), + IcodeCfg = icode_ssa_unconvert(IcodeSSA6, Options), + ?opt_stop_timer("Icode SSA passes"), + IcodeCfg. + +icode_ssa_type(IcodeSSA, MFA, Options, Servers) -> + case proplists:get_value(icode_type, Options) of + false -> IcodeSSA; + undefined -> IcodeSSA; + true -> + AnnIcode1 = icode_ssa_type_info(IcodeSSA, MFA, Options, Servers), + pp(AnnIcode1, MFA, icode, pp_typed_icode, Options, Servers), + AnnIcode2 = + case proplists:get_bool(inline_fp, Options) of + true -> hipe_icode_fp:cfg(AnnIcode1); + false -> AnnIcode1 + end, + AnnIcode3 = icode_range_analysis(AnnIcode2, MFA, Options, Servers), + pp(AnnIcode3, MFA, icode, pp_range_icode, Options, Servers), + hipe_icode_type:unannotate_cfg(AnnIcode3) + end. + +icode_ssa_convert(IcodeCfg, Options) -> + ?option_time(hipe_icode_ssa:convert(IcodeCfg), + "Icode SSA conversion", Options). + +icode_ssa_const_prop(IcodeSSA, Options) -> + case proplists:get_bool(icode_ssa_const_prop, Options) of + true -> + ?option_time(Tmp=hipe_icode_ssa_const_prop:propagate(IcodeSSA), + "Icode SSA sparse conditional constant propagation", Options), + ?option_time(hipe_icode_ssa:remove_dead_code(Tmp), + "Icode SSA dead code elimination pass 1", Options); + false -> + IcodeSSA + end. + +icode_ssa_copy_prop(IcodeSSA, Options) -> + case proplists:get_bool(icode_ssa_copy_prop, Options) of + true -> + ?option_time(hipe_icode_ssa_copy_prop:cfg(IcodeSSA), + "Icode SSA copy propagation", Options); + false -> + IcodeSSA + end. + +icode_ssa_struct_reuse(IcodeSSA, Options) -> + case proplists:get_value(icode_ssa_struct_reuse, Options) of + true -> + ?option_time(hipe_icode_ssa_struct_reuse:struct_reuse(IcodeSSA), + "Icode SSA structure reuse", Options); + _ -> + IcodeSSA + end. + +icode_ssa_type_info(IcodeSSA, MFA, Options, Servers) -> + ?option_time(hipe_icode_type:cfg(IcodeSSA, MFA, Options, Servers), + "Icode SSA type info", Options). + +icode_range_analysis(IcodeSSA, MFA, Options, Servers) -> + case proplists:get_bool(icode_range, Options) of + true -> + ?option_time(hipe_icode_range:cfg(IcodeSSA, MFA, Options, Servers), + "Icode SSA integer range analysis", Options); + false -> + IcodeSSA + end. + +icode_ssa_dead_code_elimination(IcodeSSA, Options) -> + IcodeSSA1 = ?option_time(hipe_icode_ssa:remove_dead_code(IcodeSSA), + "Icode SSA dead code elimination pass 2", + Options), + hipe_icode_cfg:remove_unreachable_code(IcodeSSA1). + +icode_ssa_check(IcodeSSA, Options) -> + ?when_option(icode_ssa_check, Options, + ?option_time(hipe_icode_ssa:check(IcodeSSA), + "Icode check for SSA-ness", Options)). + +icode_ssa_unconvert(IcodeSSA, Options) -> + ?option_time(hipe_icode_ssa:unconvert(IcodeSSA), + "Icode SSA unconversion", Options). + + +%%===================================================================== +%% +%% @spec icode_to_rtl(MFA::mfa(), Icode, options()) -> Linear_RTL_code +%% @end +%%===================================================================== + +%%--------------------------------------------------------------------- +%% +%% The passes on RTL are as follows: +%% +%% 1. The translation to RTL, in particular the way exceptions are +%% currently handled in RTL, introduces some unreachable code. +%% Therefore, unreachable code is removed early on followed by a +%% pass that removes trivial basic blocks so as to have smaller +%% code to play with. +%% +%% 2. Code is then converted to SSA so as to perform as many +%% optimizations as possible in this pass. +%% Currently, the following optimizations are performed on SSA: +%% - sparse conditional constant propagation (controlled by an option) +%% - dead code elimination +%% - detection of available exceptions +%% - partial redundancy elimination (controlled by an option) +%% Finally, code is converted back to non-SSA form. +%% +%% 3. rtl_symbolic expands some symbolic instructions. +%% +%% 4. rtl_lcm performs a lazy code motion on RTL. +%% +%%---------------------------------------------------------------------- + +icode_to_rtl(MFA, Icode, Options, Servers) -> + debug("ICODE -> RTL: ~w, ~w~n", [MFA, hash(Icode)], Options), + LinearRTL = translate_to_rtl(Icode, Options), + pp(LinearRTL, MFA, rtl_linear, pp_rtl_linear, Options, Servers), + RtlCfg = initialize_rtl_cfg(LinearRTL, Options), + %% hipe_rtl_cfg:pp(RtlCfg), + RtlCfg0 = hipe_rtl_cfg:remove_unreachable_code(RtlCfg), + RtlCfg1 = hipe_rtl_cfg:remove_trivial_bbs(RtlCfg0), + %% hipe_rtl_cfg:pp(RtlCfg1), + RtlCfg2 = rtl_ssa(RtlCfg1, Options), + RtlCfg3 = rtl_symbolic(RtlCfg2, Options), + %% hipe_rtl_cfg:pp(RtlCfg3), + pp(RtlCfg3, MFA, rtl_liveness, pp_rtl_liveness, Options, Servers), + RtlCfg4 = rtl_lcm(RtlCfg3, Options), + pp(RtlCfg4, MFA, rtl, pp_rtl, Options, Servers), + LinearRTL1 = hipe_rtl_cfg:linearize(RtlCfg4), + LinearRTL2 = hipe_rtl_cleanup_const:cleanup(LinearRTL1), + %% hipe_rtl:pp(standard_io, LinearRTL2), + LinearRTL2. + +translate_to_rtl(Icode, Options) -> + %% GC tests should have been added in the conversion to Icode. + ?option_time(hipe_icode2rtl:translate(Icode, Options), + "translate", Options). + +initialize_rtl_cfg(LinearRTL, Options) -> + ?option_time(hipe_rtl_cfg:init(LinearRTL), "to cfg", Options). + +rtl_symbolic(RtlCfg, Options) -> + ?option_time(hipe_rtl_symbolic:expand(RtlCfg), + "Expansion of symbolic instructions", Options). + +%%---------------------------------------------------------------------- +%% +%% RTL passes on SSA form. The following constraints are applicable: +%% +%% 1. ssa_convert must be first and ssa_unconvert last. +%% +%% 2. dead_code_elimination should be performed after conditional +%% constant propagation in order to cleanup dead code that might +%% be created by that pass. +%% +%% 3. avail_expr ... (PER ADD THIS) +%% +%% 4. rtl_ssapre performs A-SSAPRE and has to be done after all other +%% optimizations. +%% +%% 5. ssa_check could be put in between all passes to make sure that +%% they preserve SSA-ness. +%% +%%---------------------------------------------------------------------- + +rtl_ssa(RtlCfg0, Options) -> + case proplists:get_bool(rtl_ssa, Options) of + true -> + ?opt_start_timer("RTL SSA passes"), + RtlSSA0 = rtl_ssa_convert(RtlCfg0, Options), + RtlSSA1 = rtl_ssa_const_prop(RtlSSA0, Options), + %% RtlSSA1a = rtl_ssa_copy_prop(RtlSSA1, Options), + RtlSSA2 = rtl_ssa_dead_code_elimination(RtlSSA1, Options), + RtlSSA3 = rtl_ssa_avail_expr(RtlSSA2, Options), + RtlSSA4 = rtl_ssapre(RtlSSA3, Options), + %% rtl_ssa_check(RtlSSA4, Options), %% just for sanity + RtlCfg = rtl_ssa_unconvert(RtlSSA4, Options), + case proplists:get_bool(pp_rtl_ssa, Options) of + true -> + io:format("%%------------- After SSA un-conversion -----------\n"), + hipe_rtl_cfg:pp(RtlCfg); + false -> + ok + end, + ?opt_stop_timer("RTL SSA passes"), + RtlCfg; + false -> + RtlCfg0 + end. + +rtl_ssa_convert(RtlCfg, Options) -> + case proplists:get_bool(pp_rtl_ssa, Options) of + true -> + io:format("%%------------- Before SSA conversion --------------\n"), + hipe_rtl_cfg:pp(RtlCfg), + io:format("%%------------- After SSA conversion --------------\n"), + RtlCfgSSA = hipe_rtl_ssa:convert(RtlCfg), + hipe_rtl_cfg:pp(RtlCfgSSA), + io:format("%%------------- SSA check warnings below -----------\n"), + hipe_rtl_ssa:check(RtlCfgSSA), + RtlCfgSSA; + false -> + ?option_time(hipe_rtl_ssa:convert(RtlCfg), + "RTL SSA conversion", Options) + end. + +rtl_ssa_const_prop(RtlCfgSSA, Options) -> + case proplists:get_bool(rtl_ssa_const_prop, Options) of + true -> + ?option_time(hipe_rtl_ssa_const_prop:propagate(RtlCfgSSA), + "RTL SSA sparse conditional constant propagation", Options); + false -> + RtlCfgSSA + end. + +rtl_ssa_dead_code_elimination(RtlCfgSSA, Options) -> + ?option_time(hipe_rtl_ssa:remove_dead_code(RtlCfgSSA), + "RTL SSA dead code elimination", Options). + +rtl_ssa_avail_expr(RtlCfgSSA, Options) -> + ?option_time(hipe_rtl_ssa_avail_expr:cfg(RtlCfgSSA), + "RTL SSA heap optimizations", Options). + +%%--------------------------------------------------------------------- + +rtl_ssapre(RtlCfg, Options) -> + case proplists:get_bool(rtl_ssapre, Options) of + true -> + ?opt_start_timer("Partial Redundancy Elimination (A-SSAPRE)"), + NewRtlCfg = hipe_rtl_ssapre:rtl_ssapre(RtlCfg, Options), + ?opt_stop_timer("Partial Redundancy Elimination (A-SSAPRE)"), + NewRtlCfg; + false -> + RtlCfg + end. + +%%--------------------------------------------------------------------- + +rtl_ssa_unconvert(RtlCfgSSA, Options) -> + ?option_time(hipe_rtl_ssa:unconvert(RtlCfgSSA), + "RTL SSA un-convert", Options). + +%%--------------------------------------------------------------------- + +rtl_lcm(RtlCfg, Options) -> + case proplists:get_bool(rtl_lcm, Options) of + true -> + ?opt_start_timer("RTL lazy code motion"), + %% ?option_time(hipe_rtl_lcm:rtl_lcm(RtlCfg, Options), + %% "RTL lazy code motion", Options); + RtlCfg1 = hipe_rtl_lcm:rtl_lcm(RtlCfg, Options), + ?opt_stop_timer("RTL lazy code motion"), + RtlCfg1; + false -> + RtlCfg + end. + +%%===================================================================== +%% Translation to native code takes place in the corresponding back-end +%%===================================================================== + +rtl_to_native(MFA, LinearRTL, Options, DebugState) -> + ?opt_start_timer("Native code"), + LinearNativeCode = + case get(hipe_target_arch) of + ultrasparc -> + hipe_sparc_main:rtl_to_sparc(MFA, LinearRTL, Options); + powerpc -> + hipe_ppc_main:rtl_to_ppc(MFA, LinearRTL, Options); + arm -> + hipe_arm_main:rtl_to_arm(MFA, LinearRTL, Options); + x86 -> + hipe_x86_main:rtl_to_x86(MFA, LinearRTL, Options); + amd64 -> + hipe_amd64_main:rtl_to_amd64(MFA, LinearRTL, Options) + end, + ?opt_stop_timer("Native code"), + put(hipe_debug, DebugState), + LinearNativeCode. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Debugging stuff ... +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +debug(Text, Args, Options) -> + ?when_option(debug, Options, ?msg(Text,Args)). + +hash(X) -> + erlang:phash(X, 16#7f3f5f1). diff --git a/lib/hipe/misc/Makefile b/lib/hipe/misc/Makefile new file mode 100644 index 0000000000..d5c395855a --- /dev/null +++ b/lib/hipe/misc/Makefile @@ -0,0 +1,113 @@ +# +# %CopyrightBegin% +# +# Copyright Ericsson AB 2001-2009. All Rights Reserved. +# +# The contents of this file are subject to the Erlang Public License, +# Version 1.1, (the "License"); you may not use this file except in +# compliance with the License. You should have received a copy of the +# Erlang Public License along with this software. If not, it can be +# retrieved online at http://www.erlang.org/. +# +# Software distributed under the License is distributed on an "AS IS" +# basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +# the License for the specific language governing rights and limitations +# under the License. +# +# %CopyrightEnd% +# + +ifndef EBIN +EBIN = ../ebin +endif + +ifndef DOCS +DOCS = ../doc +endif + +include $(ERL_TOP)/make/target.mk +include $(ERL_TOP)/make/$(TARGET)/otp.mk + +# ---------------------------------------------------- +# Application version +# ---------------------------------------------------- +include ../vsn.mk +VSN=$(HIPE_VSN) + +# ---------------------------------------------------- +# Release directory specification +# ---------------------------------------------------- +RELSYSDIR = $(RELEASE_PATH)/lib/hipe-$(VSN) + +# ---------------------------------------------------- +# Target Specs +# ---------------------------------------------------- +ifdef HIPE_ENABLED +HIPE_MODULES = hipe_data_pp hipe_pack_constants hipe_sdi +else +HIPE_MODULES = +endif +MODULES = hipe_consttab hipe_gensym $(HIPE_MODULES) + +HRL_FILES= hipe_sdi.hrl +ERL_FILES= $(MODULES:%=%.erl) +TARGET_FILES= $(MODULES:%=$(EBIN)/%.$(EMULATOR)) +DOC_FILES= $(MODULES:%=$(DOCS)/%.html) + +# APP_FILE= +# APP_SRC= $(APP_FILE).src +# APP_TARGET= $(EBIN)/$(APP_FILE) +# +# APPUP_FILE= +# APPUP_SRC= $(APPUP_FILE).src +# APPUP_TARGET= $(EBIN)/$(APPUP_FILE) + +# ---------------------------------------------------- +# FLAGS +# ---------------------------------------------------- + +include ../native.mk + +ERL_COMPILE_FLAGS += +warn_exported_vars +warn_missing_spec +warn_untyped_record + +# ---------------------------------------------------- +# Targets +# ---------------------------------------------------- + +debug opt: $(TARGET_FILES) + +docs: $(DOC_FILES) + +clean: + rm -f $(TARGET_FILES) + rm -f core + +$(DOCS)/%.html:%.erl + erl -noshell -run edoc_run file '"$<"' '[{dir, "$(DOCS)"}]' -s init stop + +# ---------------------------------------------------- +# Special Build Targets +# ---------------------------------------------------- + + + +# ---------------------------------------------------- +# Release Target +# ---------------------------------------------------- +include $(ERL_TOP)/make/otp_release_targets.mk + +release_spec: opt + $(INSTALL_DIR) $(RELSYSDIR)/misc + $(INSTALL_DATA) $(ERL_FILES) $(HRL_FILES) $(RELSYSDIR)/misc + $(INSTALL_DIR) $(RELSYSDIR)/ebin + $(INSTALL_DATA) $(TARGET_FILES) $(RELSYSDIR)/ebin + +release_docs_spec: + +distclean: clean +realclean: clean + +$(EBIN)/hipe_consttab.beam: hipe_consttab.hrl +$(EBIN)/hipe_data_pp.beam: hipe_consttab.hrl +$(EBIN)/hipe_pack_constants.beam: hipe_consttab.hrl ../../kernel/src/hipe_ext_format.hrl +$(EBIN)/hipe_sdi.beam: hipe_sdi.hrl diff --git a/lib/hipe/misc/hipe_consttab.erl b/lib/hipe/misc/hipe_consttab.erl new file mode 100644 index 0000000000..c381e6a057 --- /dev/null +++ b/lib/hipe/misc/hipe_consttab.erl @@ -0,0 +1,503 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2001-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% @doc +%% CONSTTAB - maps labels to constants. +%%

+%% Note: 'constant' is a misnomer throughout this code. +%%

+%%

+%% There are two different types of constants that can be stored: +%%

    +%%
  • Erlang terms
  • +%%
  • Blocks of binary data
  • +%%
+%%

+%%

+%% Erlang terms are just what you would expect, you can store any +%% Erlang term in the constant table. +%% The term is assumed to be loaded to the place in memory denoted by the +%% label returned by the insertion function. +%%

+%%

+%% Blocks of binary data comes in some different shapes, you can +%% either insert a block of integers (of byte, word (4 bytes), or +%% word (8 bytes) size) or a list of references to code. +%% These references will then be threated as word sized addresses +%% and can be used for jumptables. +%% The list of references can have an optional ordering, so that +%% you can create a jumptable that will be sorted on the load-time +%% representation of e.g. atoms. +%%

+%% @type ctdata() = #ctdata{}. See {@link mk_ctdata/4}. +%% @type ct_type() = term | block | sorted_block | ref +%% @type data() = term() | [term()] | [byte()] | internal(). +%% This type is dependent on ct_type +%%
    +%%
  • If ct_type() = term -- data() = term()
  • +%%
  • If ct_type() = block -- data() = [byte()]
  • +%%
  • If ct_type() = sorted_block -- data() = [term()]
  • +%%
  • If ct_type() = ref -- data() = internal()
  • +%%
+%% @type ct_alignment(). +%% Alignment is always a power of two equal to the number of bytes +%% in the machine word. +%% @end +%% @type byte(). B is an integer between 0 and 255. +%% @type hipe_consttab(). +%% An abstract datatype for storing data. +%% @end +%% Internal note: +%% A hipe_consttab is a tuple {Data, ReferedLabels, NextConstLabel} +%% @type hipe_constlbl(). +%% An abstract datatype for referring to data. +%% @type element_type() = byte | word | ctab_array() +%% @type ctab_array() = {ctab_array, Type::element_type(), +%% NoElements::pos_integer()} +%% @type block() = [integer() | label_ref()] +%% @type label_ref() = {label, Label::code_label()} +%% @type code_label() = hipe_sparc:label_name() | hipe_x86:label_name() +%% @end +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +-module(hipe_consttab). + +-export([new/0, % new() -> ConstTab + insert_term/2, % insert_term(ConstTab, Term) -> {NewTab, Lbl} + %% insert_fun/2, % insert_term(ConstTab, Fun) -> {NewTab, Lbl} + %% insert_word/2, % insert_word(ConstTab, Value) -> {NewTab, Lbl} + insert_sorted_block/2, % insert_word(ConstTab, ValueList) -> + % {NewTab, Lbl} + insert_sorted_block/4, + insert_block/3, + %% insert_global_word/2, + %% insert_global_block/4, + %% update_word/3, % update_word(ConstTab, Value) -> {NewTab, Lbl} + %% update_block/5, + %% update_global_word/3, + %% update_global_block/5, + lookup/2, % lookup(Key, ConstTab) -> [Term|Block] + labels/1, % labels(ConstTab) -> LabelList + referred_labels/1, % referred_labels(ConstTab) -> LabelList + update_referred_labels/2, + decompose/1, + size_of/1, + const_type/1, + const_align/1, + const_exported/1, + const_data/1, + const_size/1 + %% block_size/1 % size of a block in bytes + ]). + +%%----------------------------------------------------------------------------- + +-include("hipe_consttab.hrl"). + +-type code_label() :: term(). % XXX: FIXME +-type label_ref() :: {'label', code_label()}. +-type block() :: [hipe_constlbl() | label_ref()]. + +-type ctab_array() :: {'ctab_array', 'byte' | 'word', pos_integer()}. +-type element_type() :: 'byte' | 'word' | ctab_array(). + +-type sort_order() :: term(). % XXX: FIXME + +%%----------------------------------------------------------------------------- + +%% @doc Create a new constant table. +-spec new() -> hipe_consttab(). +new() -> {tree_empty(), [], 0}. + + +%% @spec insert_term(ConstTab::hipe_consttab(), Term::term()) -> {NewTab, Lbl} +%% NewTab = hipe_consttab() +%% Lbl = hipe_constlbl() +%% @doc Inserts an erlang term into the const table if the term was not +%% present before, otherwise do nothing. +-spec insert_term(hipe_consttab(), term()) -> {hipe_consttab(),hipe_constlbl()}. +insert_term(ConstTab, Term) -> + case lookup_const(ConstTab, term, word_size(), false, Term) of + {value, Label} -> + {ConstTab, Label}; + none -> + insert_const(ConstTab, term, word_size(), false, Term) + end. + + +%% %% @spec insert_fun(ConstTab::hipe_consttab(), Term::term()) -> {NewTab, Lbl} +%% %% NewTab = hipe_consttab() +%% %% Lbl = hipe_constlbl() +%% %% @doc Inserts a Fun into the const table. +%% %% Don't ask me what this is for... +%% -spec insert_fun(hipe_consttab(), term()) -> {hipe_consttab(), hipe_constlbl()}. +%% insert_fun(ConstTab, Fun) -> +%% insert_const(ConstTab, term, word_size(), false, Fun). + + +%% @spec (ConstTab::hipe_consttab(), TermList::[term()]) -> {NewTab, Lbl} +%% NewTab = hipe_consttab() +%% Lbl = hipe_constlbl() +%% @doc Inserts a list of terms into the const table. +-spec insert_sorted_block(hipe_consttab(), [term()]) -> {hipe_consttab(), hipe_constlbl()}. +insert_sorted_block(CTab, TermList) -> + insert_const(CTab, sorted_block, word_size(), false, TermList). + +%% %% @spec (ConstTab::hipe_consttab(), InitVal::integer()) -> {NewTab, Lbl} +%% %% NewTab = hipe_consttab() +%% %% Lbl = hipe_constlbl() +%% %% @doc Inserts a word into the const table. +%% %% Shorthand for inserting a word. +%% insert_word(ConstTab, InitVal) -> +%% insert_block(ConstTab, word, [InitVal]). + +%% %% @spec (ConstTab::hipe_consttab(), InitVal::integer()) -> {NewTab, Lbl} +%% %% NewTab = hipe_consttab() +%% %% Lbl = hipe_constlbl() +%% %% @doc Inserts a word into the const table. +%% %% This constant should be exported from the function... +%% %% Note Global constants are +%% %% not supported in current version of HiPE. +%% insert_global_word(ConstTab, InitVal) -> +%% insert_global_block(ConstTab, word_size(), word, [InitVal]). + + +%% @spec (ConstTab::hipe_consttab(), +%% ElementType::element_type(), +%% InitList::block()) -> {hipe_consttab(), hipe_constlbl()} +%% @doc Inserts a block into the const table. +%% The block can consist of references to labels in the code. +%% This is used for jump tables. These references should be tracked +%% and the corresponding BBs should not be considered dead. +-spec insert_block(hipe_consttab(), element_type(), block()) -> + {hipe_consttab(), hipe_constlbl()}. +insert_block({ConstTab, RefToLabels, NextLabel}, ElementType, InitList) -> + ReferredLabels = get_labels(InitList, []), + NewRefTo = ReferredLabels ++ RefToLabels, + {NewTa, Id} = insert_const({ConstTab, NewRefTo, NextLabel}, + block, word_size(), false, + {ElementType,InitList}), + {insert_backrefs(NewTa, Id, ReferredLabels), Id}. + + +%% @spec (ConstTab::hipe_consttab(), ElementType::element_type(), +%% InitList::block(), SortOrder) -> {hipe_consttab(), hipe_constlbl()} +%% @doc Inserts a block into the const table. +%% The block can consist of references to labels in the code. +%% This is used for jump tables. These references should be tracked +%% and the corresponding BBs should not be considered dead. +%% At load-time the block will be sorted according to SortOrder. +%% This is used to make jump tables on atom indices. +-spec insert_sorted_block(hipe_consttab(), element_type(), block(), sort_order()) -> + {hipe_consttab(), hipe_constlbl()}. +insert_sorted_block({ConstTab, RefToLabels, NextLabel}, + ElementType, InitList, SortOrder) -> + ReferredLabels = get_labels(InitList, []), + NewRefTo = ReferredLabels ++ RefToLabels, + {NewTa, Id} = insert_const({ConstTab, NewRefTo, NextLabel}, + block, word_size(), false, + {ElementType, InitList, SortOrder}), + {insert_backrefs(NewTa, Id, ReferredLabels), Id}. + +insert_backrefs(Tbl, From, ToLabels) -> + lists:foldl(fun(To, Tab) -> + insert_ref(Tab, From, To) + end, Tbl, ToLabels). + +insert_ref({Table, RefToLabels, NextLblNr}, From, To) -> + Ref = {To, ref}, + case tree_lookup(Ref, Table) of + none -> + {tree_insert(Ref, [From], Table), RefToLabels, NextLblNr}; + {value, RefList} -> + {tree_update(Ref, [From|RefList], Table), RefToLabels, NextLblNr} + end. + +find_refs(To, {Table,_,_}) -> + %% returns 'none' or {value, V} + tree_lookup({To, ref}, Table). + +delete_ref(To, {ConstTab, RefToLabels, NextLabel}) -> + {tree_delete({To, ref}, ConstTab), RefToLabels, NextLabel}. + +%% TODO: handle refs to labels. +%% insert_global_block(ConstTab, Align, ElementType, InitList) -> +%% ByteList = decompose(size_of(ElementType), InitList), +%% insert_const(ConstTab, block, Align, true, {byte,ByteList}). + +get_labels([{label, L}|Rest], Acc) -> + get_labels(Rest, [L|Acc]); +get_labels([I|Rest], Acc) when is_integer(I) -> + get_labels(Rest, Acc); +get_labels([], Acc) -> + Acc. + +%% @spec size_of(element_type()) -> pos_integer() +%% @doc Returns the size in bytes of an element_type. +%% The is_atom/1 guard in the clause handling arrays +%% constraints the argument to 'byte' | 'word' +-spec size_of(element_type()) -> pos_integer(). +size_of(byte) -> 1; +size_of(word) -> word_size(); +size_of({ctab_array,S,N}) when is_atom(S), is_integer(N), N > 0 -> + N * size_of(S). + +%% @spec decompose({element_type(), block()}) -> [byte()] +%% @doc Turns a block into a list of bytes. +%% Note: Be careful with the byte order here. +-spec decompose({element_type(), block()}) -> [byte()]. +decompose({ElementType, Data}) -> + decompose(size_of(ElementType), Data). + +decompose(_Bytes, []) -> + []; +decompose(Bytes, [X|Xs]) -> + number_to_bytes(Bytes, X, decompose(Bytes, Xs)). + +number_to_bytes(0, X, Bytes) when is_integer(X) -> + Bytes; +number_to_bytes(N, X, Bytes) -> + Byte = X band 255, + number_to_bytes(N-1, X bsr 8, [Byte|Bytes]). + +%% @spec block_size({element_type(), block()}) -> non_neg_integer() +%% @doc Returns the size in bytes of a block. +block_size({ElementType, Block}) -> + length(Block) * size_of(ElementType); +block_size({ElementType, Block, _SortOrder}) -> + length(Block) * size_of(ElementType). + + +%%-------------------- +%% ctdata and friends +%%-------------------- + +-type ct_type() :: 'block' | 'ref' | 'sorted_block' | 'term'. + +-record(ctdata, {type :: ct_type(), + alignment :: ct_alignment(), + exported :: boolean(), + data :: term()}). +-type ctdata() :: #ctdata{}. + +-spec mk_ctdata(Type::ct_type(), Alignment::ct_alignment(), + Exported::boolean(), Data::term()) -> ctdata(). +mk_ctdata(Type, Alignment, Exported, Data) -> + #ctdata{type = Type, alignment = Alignment, exported = Exported, data = Data}. + +-spec const_type(ctdata()) -> ct_type(). +const_type(#ctdata{type = Type}) -> Type. + +-spec const_align(ctdata()) -> ct_alignment(). +const_align(#ctdata{alignment = Alignment}) -> Alignment. + +-spec const_exported(ctdata()) -> boolean(). +const_exported(#ctdata{exported = Exported}) -> Exported. + +-spec const_data(ctdata()) -> term(). +const_data(#ctdata{data = Data}) -> Data. + +-spec update_const_data(ctdata(), {_,[_]} | {_,[_],_}) -> ctdata(). +update_const_data(CTData, Data) -> + CTData#ctdata{data = Data}. + +%% @doc Returns the size in bytes. +-spec const_size(ctdata()) -> non_neg_integer(). +const_size(Constant) -> + case const_type(Constant) of + %% term: you can't and shouldn't ask for its size + block -> block_size(const_data(Constant)); + sorted_block -> length(const_data(Constant)) * word_size() + end. + +-spec word_size() -> ct_alignment(). +word_size() -> + hipe_rtl_arch:word_size(). + + +%%-------------------- +%% Update a label +%%-------------------- + + +%% TODO: Remove RefsTOfrom overwitten labels... +%% update_word(ConstTab, Label, InitVal) -> +%% update_block(ConstTab, Label, word_size(), word, [InitVal]). +%% +%% update_global_word(ConstTab, Label, InitVal) -> +%% update_global_block(ConstTab, Label, word_size(), word, [InitVal]). + +%% +%% Update info for an existing label +%% +%% Returns NewTable +%% +%% +%% update_block(ConstTab, Label, Align, ElementType, InitList) -> +%% ByteList = decompose(size_of(ElementType), InitList), +%% update_const(ConstTab, Label, block, Align, false, {ElementType,ByteList}). + +update_block_labels(ConstTab, DataLbl, OldLbl, NewLbl) -> + Const = lookup(DataLbl, ConstTab), + Old = {label, OldLbl}, + case const_data(Const) of + {Type, Data} -> + NewData = update_data(Data, Old, NewLbl), + update(ConstTab, DataLbl, update_const_data(Const, {Type,NewData})); + {Type, Data, Order} -> + NewData = update_data(Data, Old, NewLbl), + update(ConstTab, DataLbl, update_const_data(Const, {Type,NewData,Order})) + end. + +update_data(Data, Old, New) -> + [if Lbl =:= Old -> {label, New}; true -> Lbl end || Lbl <- Data]. + +%% update_global_block(ConstTab, Label, Align, ElementType, InitList) -> +%% ByteList = decompose(size_of(ElementType), InitList), +%% update_const(ConstTab, Label, block, Align, true, ByteList). + +%% +%% Insert a constant in the table, returns {NewTable, Label}. +%% + +insert_const({Table, RefToLabels, NextLblNr}, Type, Alignment, Exported, Data) -> + Const = mk_ctdata(Type, Alignment, Exported, Data), + {{tree_insert(NextLblNr, Const, Table), RefToLabels, NextLblNr+1}, + NextLblNr}. + +%% %% Update information for a label, returns NewTable. +%% %% (Removes old info.) +%% +%% update_const({Table, RefToLabels, NextLblNr}, Label, Type, Alignment, Exported, Data) -> +%% Const = mk_ctdata(Type, Alignment, Exported, Data), +%% {tree_update(Label, Const, Table), RefToLabels, NextLblNr}. + +update({Table, RefToLabels, NextLblNr}, Label, NewConst) -> + {tree_update(Label, NewConst, Table), RefToLabels, NextLblNr}. + +%% @spec lookup(hipe_constlbl(), hipe_consttab()) -> ctdata() +%% @doc Lookup a label. +-spec lookup(hipe_constlbl(), hipe_consttab()) -> ctdata(). +lookup(Lbl, {Table, _RefToLabels, _NextLblNr}) -> + tree_get(Lbl, Table). + +%% Find out if a constant term is present in the constant table. +lookup_const({Table, _RefToLabels, _NextLblNr}, + Type, Alignment, Exported, Data) -> + Const = mk_ctdata(Type, Alignment, Exported, Data), + tree_lookup_key_for_value(Const, Table). + +%% @doc Return the labels bound in a table. +-spec labels(hipe_consttab()) -> [hipe_constlbl() | {hipe_constlbl(), 'ref'}]. +labels({Table, _RefToLabels, _NextLblNr}) -> + tree_keys(Table). + +%% @spec referred_labels(hipe_consttab()) -> [hipe_constlbl()] +%% @doc Return the referred labels bound in a table. +-spec referred_labels(hipe_consttab()) -> [hipe_constlbl()]. +referred_labels({_Table, RefToLabels, _NextLblNr}) -> + RefToLabels. + + +%% +%% Change label names in constant blocks (jump_tables). +%% +-spec update_referred_labels(hipe_consttab(), + [{hipe_constlbl(), hipe_constlbl()}]) -> + hipe_consttab(). +update_referred_labels(Table, LabelMap) -> + %% io:format("LabelMap: ~w\nTb:~w\n", [LabelMap, Table]), + {Tb, Refs, Next} = + lists:foldl( + fun({OldLbl, NewLbl}, Tbl) -> + case find_refs(OldLbl, Tbl) of + none -> + Tbl; + {value, DataLbls} -> + %% A label may be referred several times. + UniqueLbls = ordsets:from_list(DataLbls), + lists:foldl(fun(DataLbl, AccTbl) -> + insert_ref( + delete_ref(OldLbl, + update_block_labels(AccTbl, DataLbl, OldLbl, NewLbl)), + DataLbl, NewLbl) + end, + Tbl, + UniqueLbls) + end + end, + Table, + LabelMap), + NewRefs = [case lists:keyfind(Lbl, 1, LabelMap) of + {_, New} -> New; + false -> Lbl + end || Lbl <- Refs], + %% io:format("NewTb:~w\n", [{Tb, NewRefs, Next}]), + {Tb, NewRefs, Next}. + + +%%----------------------------------------------------------------------------- +%% primitives for constants +%%----------------------------------------------------------------------------- + +%% Since using `gb_trees' is not safe because of term ordering, we use +%% the `dict' module instead since it matches with =:= on the keys. + +tree_keys(T) -> + dict:fetch_keys(T). + +-spec tree_to_list(dict()) -> [{_, _}]. +tree_to_list(T) -> + dict:to_list(T). + +tree_get(Key, T) -> + dict:fetch(Key, T). + +tree_update(Key, Val, T) -> + dict:store(Key, Val, T). + +tree_insert(Key, Val, T) -> + dict:store(Key, Val, T). + +tree_delete(Key, T) -> + dict:erase(Key, T). + +tree_lookup(Key, T) -> + case dict:find(Key, T) of + {ok, Val} -> + {value, Val}; + error -> + none + end. + +-spec tree_empty() -> dict(). +tree_empty() -> + dict:new(). + +-spec tree_lookup_key_for_value(ctdata(), dict()) -> 'none' | {'value', _}. +tree_lookup_key_for_value(Val, T) -> + tree_lookup_key_for_value_1(tree_to_list(T), Val). + +-spec tree_lookup_key_for_value_1([{_,_}], ctdata()) -> 'none' | {'value', _}. +tree_lookup_key_for_value_1([{Key, Val}|_], Val) -> + {value, Key}; +tree_lookup_key_for_value_1([_|Left], Val) -> + tree_lookup_key_for_value_1(Left, Val); +tree_lookup_key_for_value_1([], _Val) -> + none. diff --git a/lib/hipe/misc/hipe_consttab.hrl b/lib/hipe/misc/hipe_consttab.hrl new file mode 100644 index 0000000000..39018dac34 --- /dev/null +++ b/lib/hipe/misc/hipe_consttab.hrl @@ -0,0 +1,27 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2008-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%%----------------------------------------------------------------------------- + +-type ct_alignment() :: 4 | 8. + +-type hipe_constlbl() :: non_neg_integer(). +-type hipe_consttab() :: {dict(), [hipe_constlbl()], hipe_constlbl()}. + +%%----------------------------------------------------------------------------- diff --git a/lib/hipe/misc/hipe_data_pp.erl b/lib/hipe/misc/hipe_data_pp.erl new file mode 100644 index 0000000000..0f206e8ade --- /dev/null +++ b/lib/hipe/misc/hipe_data_pp.erl @@ -0,0 +1,158 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2001-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Copyright (c) 2001 by Erik Johansson. All Rights Reserved +%% Time-stamp: <2008-04-20 14:57:08 richard> +%% ==================================================================== +%% Module : hipe_data_pp +%% Purpose : +%% Notes : +%% History : * 2001-02-25 Erik Johansson (happi@it.uu.se): Created. +%% ==================================================================== +%% Exports : +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +-module(hipe_data_pp). +-export([pp/4]). + +%%----------------------------------------------------------------------------- + +-include("hipe_consttab.hrl"). + +-type hipe_code_type() :: 'icode' | 'rtl' | 'arm' | 'ppc' | 'sparc' | 'x86'. + +%%----------------------------------------------------------------------------- +%% +%% Pretty print + +-spec pp(io:device(), hipe_consttab(), hipe_code_type(), string()) -> 'ok'. + +pp(Dev, Table, CodeType, Pre) -> + Ls = hipe_consttab:labels(Table), + lists:foreach(fun ({{_, ref}, _}) -> ok; + ({L, E}) -> pp_element(Dev, L, E, CodeType, Pre) + end, + [{L, hipe_consttab:lookup(L, Table)} || L <- Ls]). + +pp_element(Dev, Name, Element, CodeType, Prefix) -> + %% Alignment + case hipe_consttab:const_align(Element) of + 4 -> ok; %% Wordalignment is assumed + Alignment -> + io:format(Dev, " .align~w\n", [Alignment]) + end, + %% Local or exported? + Exported = hipe_consttab:const_exported(Element), + case CodeType of + rtl -> + case Exported of + true -> + io:format(Dev, "DL~w: ", [Name]); + false -> + io:format(Dev, ".DL~w: ", [Name]) + end; + _ -> + io:format(Dev, "~w ", [Name]) + end, + %% Type and data... + case hipe_consttab:const_type(Element) of + term -> + io:format(Dev, "~w\n", [hipe_consttab:const_data(Element)]); + sorted_block -> + Data = hipe_consttab:const_data(Element), + pp_block(Dev, {word, lists:sort(Data)}, CodeType, Prefix); + block -> + pp_block(Dev, hipe_consttab:const_data(Element), CodeType, Prefix) + end. + +pp_block(Dev, {word, Data, SortOrder}, CodeType, Prefix) -> + case CodeType of + rtl -> + io:format(Dev, "\n",[]); + _ -> + ok + end, + pp_wordlist(Dev, Data, CodeType, Prefix), + case CodeType of + rtl -> + io:format(Dev, ";; Sorted by ~w\n",[SortOrder]); + _ -> + ok + end; +pp_block(Dev, {word, Data}, CodeType, Prefix) -> + case CodeType of + rtl -> + io:format(Dev, ".word\n",[]); + _ -> + ok + end, + pp_wordlist(Dev, Data, CodeType, Prefix); +pp_block(Dev, {byte, Data}, CodeType, _Prefix) -> + case CodeType of + rtl -> + io:format(Dev, ".byte\n ",[]); + _ -> + ok + end, + pp_bytelist(Dev, Data, CodeType), + case CodeType of + rtl -> + io:format(Dev, " ;; ~s\n ", [Data]); + _ -> ok + end. + +pp_wordlist(Dev, [{label, L}|Rest], CodeType, Prefix) -> + case CodeType of + rtl -> + io:format(Dev, " &L~w\n", [L]); + _ -> + io:format(Dev, " <~w>\n", [L]) + end, + pp_wordlist(Dev, Rest, CodeType, Prefix); +pp_wordlist(Dev, [D|Rest], CodeType, Prefix) -> + case CodeType of + rtl -> + io:format(Dev, " ~w\n", [D]); + _ -> + io:format(Dev, " ~w\n", [D]) + end, + pp_wordlist(Dev, Rest, CodeType, Prefix); +pp_wordlist(_Dev, [], _CodeType, _Prefix) -> + ok. + +pp_bytelist(Dev, [D], CodeType) -> + case CodeType of + rtl -> + io:format(Dev, "~w\n", [D]); + _ -> + io:format(Dev, "~w\n", [D]) + end, + ok; +pp_bytelist(Dev, [D|Rest], CodeType) -> + case CodeType of + rtl -> + io:format(Dev, "~w,", [D]); + _ -> + io:format(Dev, "~w,", [D]) + end, + pp_bytelist(Dev, Rest, CodeType); +pp_bytelist(Dev, [], _CodeType) -> + io:format(Dev, "\n", []). diff --git a/lib/hipe/misc/hipe_gensym.erl b/lib/hipe/misc/hipe_gensym.erl new file mode 100644 index 0000000000..84fc8fa7e8 --- /dev/null +++ b/lib/hipe/misc/hipe_gensym.erl @@ -0,0 +1,244 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2001-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%%======================================================================= +%% File : hipe_gensym.erl +%% Author : Eric Johansson and Kostis Sagonas +%% Description : Generates unique symbols and fresh integer counts. +%%======================================================================= +%% $Id$ +%%======================================================================= +%% Notes: Written while we were in Montreal, Canada for PPDP-2000 as an +%% exercise in Principles and Practice of Declarative Programming! +%%======================================================================= + +-module(hipe_gensym). + +-export([%% init/0, new_var/0, new_label/0, + %% update_lblrange/1, update_vrange/1, var_range/0, label_range/0, + set_var/1, get_var/0, get_next_var/0, + set_label/1, get_label/0, get_next_label/0]). +-export([init/1, new_var/1, new_label/1, + update_vrange/2, update_lblrange/2, var_range/1, label_range/1, + set_var_range/3, set_label_range/3, + set_var/2, get_var/1, get_next_var/1, + set_label/2, get_label/1, get_next_label/1]). + +%%----------------------------------------------------------------------- +%% Types of allowable entities to set global variables for +%%----------------------------------------------------------------------- + +-type gvarname() :: 'icode' | 'rtl' | 'arm' | 'ppc' | 'sparc' | 'x86'. + +%%----------------------------------------------------------------------- + +%% init() -> +%% put(var_count, 0), +%% put(label_count, 0), +%% put(var_min, 0), +%% put(var_max, 0), +%% put(lbl_min, 1), +%% put(lbl_max, 1), +%% ok. + +-spec init(gvarname()) -> 'ok'. + +init(What) -> + put({What,var_count}, 0), + put({What,label_count}, 0), + put({What,var_min}, 0), + put({What,var_max}, 0), + put({What,lbl_min}, 1), + put({What,lbl_max}, 1), + ok. + +%% new_var() -> +%% V = get(var_count), +%% put(var_count, V+1), +%% V. + +-spec new_var(gvarname()) -> non_neg_integer(). + +new_var(What) -> + T = {What, var_count}, + V = get(T), + put(T, V+1), + V. + +%% new_label() -> +%% L = get(label_count), +%% put(label_count, L+1), +%% L. + +-spec new_label(gvarname()) -> non_neg_integer(). + +new_label(What) -> + T = {What, label_count}, + L = get(T), + put(T, L+1), + L. + +%% update_vrange(V) -> +%% Vmax = get(var_max), +%% Vmin = get(var_min), +%% put(var_min, erlang:min(V, Vmin)), +%% put(var_max, erlang:max(V, Vmax)), +%% ok. + +-spec update_vrange(gvarname(), non_neg_integer()) -> 'ok'. +update_vrange(What, V) -> + Tmin = {What, var_min}, + Tmax = {What, var_max}, + Vmax = get(Tmax), + Vmin = get(Tmin), + put(Tmin, erlang:min(V, Vmin)), + put(Tmax, erlang:max(V, Vmax)), + ok. + +%% update_lblrange(L) -> +%% Lmax = get(lbl_max), +%% Lmin = get(lbl_min), +%% put(lbl_min, erlang:min(L, Lmin)), +%% put(lbl_max, erlang:max(L, Lmax)), +%% ok. + +-spec update_lblrange(gvarname(), non_neg_integer()) -> 'ok'. + +update_lblrange(What, L) -> + Tmin = {What, lbl_min}, + Tmax = {What, lbl_max}, + Lmax = get(Tmax), + Lmin = get(Tmin), + put(Tmin, erlang:min(L, Lmin)), + put(Tmax, erlang:max(L, Lmax)), + ok. + +%% var_range() -> +%% {get(var_min), get(var_max)}. + +-spec var_range(gvarname()) -> {non_neg_integer(), non_neg_integer()}. + +var_range(What) -> + {get({What,var_min}), get({What,var_max})}. + +-spec set_var_range(gvarname(), non_neg_integer(), non_neg_integer()) -> 'ok'. + +set_var_range(What, Min, Max) -> + put({What,var_min}, Min), + put({What,var_max}, Max), + ok. + +%% label_range() -> +%% {get(lbl_min), get(lbl_max)}. + +-spec label_range(gvarname()) -> {non_neg_integer(), non_neg_integer()}. + +label_range(What) -> + {get({What,lbl_min}), get({What,lbl_max})}. + +-spec set_label_range(gvarname(), non_neg_integer(), non_neg_integer()) -> 'ok'. + +set_label_range(What, Min, Max) -> + put({What,lbl_min}, Min), + put({What,lbl_max}, Max), + ok. + +%%----------------------------------------------------------------------- +%% Variable counter +%%----------------------------------------------------------------------- + +-spec set_var(non_neg_integer()) -> 'ok'. + +set_var(X) -> + put(var_max, X), + ok. + +-spec set_var(gvarname(), non_neg_integer()) -> 'ok'. + +set_var(What, X) -> + put({What,var_max}, X), + ok. + +-spec get_var() -> non_neg_integer(). + +get_var() -> + get(var_max). + +-spec get_var(gvarname()) -> non_neg_integer(). + +get_var(What) -> + get({What,var_max}). + +-spec get_next_var() -> non_neg_integer(). + +get_next_var() -> + C = get(var_max), + put(var_max, C+1), + C+1. + +-spec get_next_var(gvarname()) -> non_neg_integer(). + +get_next_var(What) -> + T = {What, var_max}, + C = get(T), + put(T, C+1), + C+1. + +%%----------------------------------------------------------------------- +%% Label counter +%%----------------------------------------------------------------------- + +-spec set_label(non_neg_integer()) -> 'ok'. + +set_label(X) -> + put(lbl_max, X), + ok. + +-spec set_label(gvarname(), non_neg_integer()) -> 'ok'. + +set_label(What, X) -> + put({What,lbl_max}, X), + ok. + +-spec get_label() -> non_neg_integer(). + +get_label() -> + get(lbl_max). + +-spec get_label(gvarname()) -> non_neg_integer(). + +get_label(What) -> + get({What,lbl_max}). + +-spec get_next_label() -> non_neg_integer(). + +get_next_label() -> + C = get(lbl_max), + put(lbl_max, C+1), + C+1. + +-spec get_next_label(gvarname()) -> non_neg_integer(). + +get_next_label(What) -> + T = {What, lbl_max}, + C = get(T), + put(T, C+1), + C+1. + +%%----------------------------------------------------------------------- diff --git a/lib/hipe/misc/hipe_pack_constants.erl b/lib/hipe/misc/hipe_pack_constants.erl new file mode 100644 index 0000000000..e214d7ebbc --- /dev/null +++ b/lib/hipe/misc/hipe_pack_constants.erl @@ -0,0 +1,211 @@ +%% -*- erlang-indent-level: 2 -*- +%%============================================================================= +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2003-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +-module(hipe_pack_constants). +-export([pack_constants/2, slim_refs/1, slim_constmap/1]). + +-include("hipe_consttab.hrl"). +-include("../../kernel/src/hipe_ext_format.hrl"). + +%%----------------------------------------------------------------------------- + +-type raw_data() :: binary() | number() | list() | tuple(). +-type tbl_ref() :: {hipe_constlbl(), non_neg_integer()}. + +-record(pcm_entry, {mfa :: mfa(), + label :: hipe_constlbl(), + const_num :: non_neg_integer(), + start :: non_neg_integer(), + type :: 0 | 1 | 2, + raw_data :: raw_data()}). + +%%----------------------------------------------------------------------------- + +-spec pack_constants([{mfa(),[_],hipe_consttab()}], ct_alignment()) -> + {ct_alignment(), + non_neg_integer(), + [#pcm_entry{}], + [{mfa(),[tbl_ref() | {'sorted',non_neg_integer(),[tbl_ref()]}]}]}. + +pack_constants(Data, Align) -> + pack_constants(Data, 0, Align, 0, [], []). + +pack_constants([{MFA,_,ConstTab}|Rest], Size, Align, ConstNo, Acc, Refs) -> + Labels = hipe_consttab:labels(ConstTab), + %% RefToLabels = hipe_consttab:referred_labels(ConstTab), + {NewSize, NewAlign, Map, NewConstNo, RefToLabels} = + pack_labels(Labels, MFA, ConstTab, Size, Align, ConstNo, Acc, []), + NewRefs = + case RefToLabels of + [] -> Refs; + _ -> [{MFA,RefToLabels}|Refs] + end, + pack_constants(Rest, NewSize, NewAlign, NewConstNo, Map, NewRefs); +pack_constants([], Size, Align, _, Acc, Refs) -> + {Align, Size, Acc, Refs}. + +%% +%% pack_labels converts a ConstTab to a packed ConstMap, which +%% maps {MFA,Label} pairs to information about individual constants, +%% including their ConstNo and start offset in the constants pool. +%% +pack_labels([{_Label,ref}|Labels],MFA,ConstTab,Size,Align,ConstNo,Acc, Refs) -> + pack_labels(Labels, MFA, ConstTab, Size, Align, ConstNo, Acc, Refs); +pack_labels([Label|Labels],MFA,ConstTab,AccSize,OldAlign,ConstNo, Acc, Refs) -> + Const = hipe_consttab:lookup(Label, ConstTab), + Align = hipe_consttab:const_align(Const), + NewAlign = erlang:max(Align, OldAlign), + Start = + case AccSize rem Align of + 0 -> AccSize; + N -> AccSize + (Align - N) + end, + %% io:format("Const ~w\n", [Const]), + RawType = hipe_consttab:const_type(Const), + Type = ?CONST_TYPE2EXT(RawType), + RawData = hipe_consttab:const_data(Const), + case RawType of + term -> + %% If the constant term is already in the constant map we want + %% to use the same constant number so that, in the end, the + %% constant term is not duplicated. + case lists:keyfind(RawData, 7, Acc) of + false -> + NewInfo = #pcm_entry{mfa=MFA, label=Label, const_num=ConstNo, + start=0, type=Type, raw_data=RawData}, + pack_labels(Labels, MFA, ConstTab, AccSize, OldAlign, ConstNo+1, + [NewInfo|Acc], Refs); + #pcm_entry{const_num=OtherConstNo, type=Type, raw_data=RawData} -> + NewInfo = #pcm_entry{mfa=MFA, label=Label, const_num=OtherConstNo, + start=0, type=Type, raw_data=RawData}, + pack_labels(Labels, MFA, ConstTab, AccSize, OldAlign, ConstNo, + [NewInfo|Acc], Refs); + _ -> + NewInfo = #pcm_entry{mfa=MFA, label=Label, const_num=ConstNo, + start=0, type=Type, raw_data=RawData}, + pack_labels(Labels, MFA, ConstTab, AccSize, OldAlign, ConstNo+1, + [NewInfo|Acc], Refs) + end; + sorted_block -> + Need = hipe_consttab:const_size(Const), + NewInfo = #pcm_entry{mfa=MFA, label=Label, const_num=ConstNo, + start=Start, type=Type, raw_data=RawData}, + pack_labels(Labels, MFA, ConstTab, Start+Need, NewAlign, ConstNo+1, + [NewInfo|Acc], Refs); + block -> + Need = hipe_consttab:const_size(Const), + {Data, NewRefs} = + case RawData of + {ElementType, ElementData} -> + decompose_block(ElementType, ElementData, Start); + {ElementType, ElementData, SortOrder} -> + {TblData, TblRefs} = get_sorted_refs(ElementData, SortOrder), + {hipe_consttab:decompose({ElementType, TblData}), + [{sorted,Start,TblRefs}]} + end, + NewInfo = #pcm_entry{mfa=MFA, label=Label, const_num=ConstNo, + start=Start, type=Type, raw_data=Data}, + pack_labels(Labels, MFA, ConstTab, Start+Need, NewAlign, ConstNo+1, + [NewInfo|Acc], NewRefs++Refs) + end; +pack_labels([], _, _, Size, Align, ConstNo, Acc, Refs) -> + {Size, Align, Acc, ConstNo, Refs}. + +decompose_block(ElementType, Data, Addr) -> + ElementSize = hipe_consttab:size_of(ElementType), + {NewData, Refs} = get_refs(Data, Addr, ElementSize), + {hipe_consttab:decompose({ElementType, NewData}), Refs}. + +get_refs([{label,L}|Rest], Pos, ElementSize) -> + {NewData, Refs} = get_refs(Rest, Pos+ElementSize, ElementSize), + {[0|NewData], [{L,Pos}|Refs]}; +get_refs([D|Rest], Pos, ElementSize) -> + {NewData, Refs} = get_refs(Rest, Pos+ElementSize, ElementSize), + {[D|NewData], Refs}; +get_refs([], _, _) -> + {[],[]}. + +get_sorted_refs([{label,L}|Rest], [Ordering|Os]) -> + {NewData, Refs} = get_sorted_refs(Rest, Os), + {[0|NewData], [{L,Ordering}|Refs]}; +get_sorted_refs([D|Rest], [_Ordering|Os]) -> + {NewData, Refs} = get_sorted_refs(Rest, Os), + {[D|NewData], Refs}; +get_sorted_refs([], []) -> + {[], []}. + +-type ref_type() :: 0..4. + +-spec slim_refs([{ref_type(),non_neg_integer(),term()}]) -> + [{ref_type(), [{term(), [non_neg_integer()]}]}]. +slim_refs([]) -> []; +slim_refs(Refs) -> + [Ref|Rest] = lists:keysort(1, Refs), + compact_ref_types(Rest, element(1, Ref), [Ref], []). + +compact_ref_types([Ref|Refs], Type, AccofType, Acc) -> + case element(1, Ref) of + Type -> + compact_ref_types(Refs, Type, [Ref|AccofType], Acc); + NewType -> + compact_ref_types(Refs, NewType, [Ref], + [{Type,lists:sort(compact_dests(AccofType))}|Acc]) + end; +compact_ref_types([], Type, AccofType ,Acc) -> + [{Type,lists:sort(compact_dests(AccofType))}|Acc]. + + +%% compact_dests([]) -> []; % clause is redundant +compact_dests(Refs) -> + [Ref|Rest] = lists:keysort(3, Refs), + compact_dests(Rest, element(3,Ref), [element(2,Ref)], []). + +compact_dests([Ref|Refs], Dest, AccofDest, Acc) -> + case element(3, Ref) of + Dest -> + compact_dests(Refs, Dest, [element(2,Ref)|AccofDest], Acc); + NewDest -> + compact_dests(Refs, NewDest, [element(2,Ref)], [{Dest,AccofDest}|Acc]) + end; +compact_dests([], Dest, AccofDest, Acc) -> + [{Dest,AccofDest}|Acc]. + +%% +%% slim_constmap/1 takes a packed ConstMap, as produced by pack_labels +%% called from hipe_pack_constants:pack_constants/2, and converts it +%% to the slimmed and flattened format ConstMap which is put in object +%% files. +%% +-spec slim_constmap([#pcm_entry{}]) -> [raw_data()]. +slim_constmap(Map) -> + slim_constmap(Map, gb_sets:new(), []). + +-spec slim_constmap([#pcm_entry{}], gb_set(), [raw_data()]) -> [raw_data()]. +slim_constmap([#pcm_entry{const_num=ConstNo, start=Offset, + type=Type, raw_data=Term}|Rest], Inserted, Acc) -> + case gb_sets:is_member(ConstNo, Inserted) of + true -> + slim_constmap(Rest, Inserted, Acc); + false -> + NewInserted = gb_sets:insert(ConstNo, Inserted), + slim_constmap(Rest, NewInserted, [ConstNo, Offset, Type, Term|Acc]) + end; +slim_constmap([], _Inserted, Acc) -> Acc. diff --git a/lib/hipe/misc/hipe_sdi.erl b/lib/hipe/misc/hipe_sdi.erl new file mode 100644 index 0000000000..ef1b5b48c5 --- /dev/null +++ b/lib/hipe/misc/hipe_sdi.erl @@ -0,0 +1,378 @@ +%%% -*- erlang-indent-level: 2 -*- +%%%====================================================================== +%%% +%%% %CopyrightBegin% +%%% +%%% Copyright Ericsson AB 2004-2009. All Rights Reserved. +%%% +%%% The contents of this file are subject to the Erlang Public License, +%%% Version 1.1, (the "License"); you may not use this file except in +%%% compliance with the License. You should have received a copy of the +%%% Erlang Public License along with this software. If not, it can be +%%% retrieved online at http://www.erlang.org/. +%%% +%%% Software distributed under the License is distributed on an "AS IS" +%%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%%% the License for the specific language governing rights and limitations +%%% under the License. +%%% +%%% %CopyrightEnd% +%%% +%%% An implementation of the algorithm described in: +%%% "Assembling Code for Machines with Span-Dependent Instructions", +%%% Thomas G. Szymanski, CACM 21(4), April 1978, pp. 300--308. +%%% +%%% Copyright (C) 2000, 2004, 2007 Mikael Pettersson + +-module(hipe_sdi). +-export([pass1_init/0, + pass1_add_label/3, + pass1_add_sdi/4, + pass2/1]). + +-include("hipe_sdi.hrl"). + +%%------------------------------------------------------------------------ + +-type hipe_array() :: integer(). % declare this in hipe.hrl or builtin? + +-type label() :: non_neg_integer(). +-type address() :: non_neg_integer(). + +%%------------------------------------------------------------------------ + +-record(label_data, {address :: address(), + prevSdi :: integer()}). + +-record(pre_sdi_data, {address :: address(), + label :: label(), + si :: #sdi_info{}}). + +-record(pass1, {prevSdi :: integer(), + preS = [] :: [#pre_sdi_data{}], + labelMap = gb_trees:empty() :: gb_tree()}). + +-record(sdi_data, {address :: address(), + label_address :: address(), + prevSdi :: integer(), %% -1 is the first previous + si :: #sdi_info{}}). + +%%------------------------------------------------------------------------ + +%%% "During the first pass we assign addresses to instructions +%%% and build a symbol table of labels and their addresses +%%% according to the minimum address assignment. We do this by +%%% treating each sdi as having its shorter length. We also +%%% number the sdi's [sic] from 1 to n in order of occurrence +%%% and record in the symbol table entry for each label the +%%% number of sdi's [sic] preceding it in the program. +%%% Simultaneously with pass 1 we build a set +%%% S = {(i,a,l,c) | 1 <= i <= n, a is the minimum address of +%%% the ith sdi, l and c, are the label and constant +%%% components of the operand of the ith sdi respectively}." +%%% +%%% Implementation notes: +%%% - We number the SDIs from 0 to n-1, not from 1 to n. +%%% - SDIs target only labels, so the constant offsets are omitted. +%%% - The set S is represented by a vector S[0..n-1] such that if +%%% (i,a,l) is in the set, then S[i] = (a,l). +%%% - The symbol table maps a label to its minimum address and the +%%% number of the last SDI preceding it (-1 if none). +%%% - To allow this module to make architecture-specific decisions +%%% without using callbacks or making it architecture-specific, +%%% the elements in the set S include a fourth component, SdiInfo, +%%% supplied by the caller of this module. +%%% - At the end of the first pass we finalise the preliminary SDIs +%%% by replacing their symbolic target labels with the corresponding +%%% data from the symbol table. This avoids repeated O(logn) time +%%% lookup costs for the labels. + +-spec pass1_init() -> #pass1{}. +pass1_init() -> + #pass1{prevSdi = -1}. + +-spec pass1_add_label(#pass1{}, non_neg_integer(), label()) -> #pass1{}. +pass1_add_label(Pass1, Address, Label) -> + #pass1{prevSdi=PrevSdi, labelMap=LabelMap} = Pass1, + LabelData = #label_data{address=Address, prevSdi=PrevSdi}, + LabelMap2 = gb_trees:insert(Label, LabelData, LabelMap), + Pass1#pass1{labelMap=LabelMap2}. + +-spec pass1_add_sdi(#pass1{}, non_neg_integer(), label(), #sdi_info{}) -> + #pass1{}. +pass1_add_sdi(Pass1, Address, Label, SdiInfo) -> + #pass1{prevSdi=PrevSdi, preS=PreS} = Pass1, + PreSdiData = #pre_sdi_data{address=Address, label=Label, si=SdiInfo}, + Pass1#pass1{prevSdi=PrevSdi+1, preS=[PreSdiData|PreS]}. + +-spec pass1_finalise(#pass1{}) -> {non_neg_integer(),tuple(),gb_tree()}. +pass1_finalise(#pass1{prevSdi=PrevSdi, preS=PreS, labelMap=LabelMap}) -> + {PrevSdi+1, pass1_finalise_preS(PreS, LabelMap, []), LabelMap}. + +-spec pass1_finalise_preS([#pre_sdi_data{}], gb_tree(), [#sdi_data{}]) -> + tuple(). +pass1_finalise_preS([], _LabelMap, S) -> vector_from_list(S); +pass1_finalise_preS([PreSdiData|PreS], LabelMap, S) -> + #pre_sdi_data{address=Address, label=Label, si=SdiInfo} = PreSdiData, + LabelData = gb_trees:get(Label, LabelMap), + #label_data{address=LabelAddress, prevSdi=PrevSdi} = LabelData, + SdiData = #sdi_data{address=Address, label_address=LabelAddress, + prevSdi=PrevSdi, si=SdiInfo}, + pass1_finalise_preS(PreS, LabelMap, [SdiData|S]). + +%%% Pass2. + +-spec pass2(#pass1{}) -> {gb_tree(), non_neg_integer()}. +pass2(Pass1) -> + {N,SDIS,LabelMap} = pass1_finalise(Pass1), + LONG = mk_long(N), + SPAN = mk_span(N, SDIS), + PARENTS = mk_parents(N, SDIS), + update_long(N, SDIS, SPAN, PARENTS, LONG), + {INCREMENT,CodeSizeIncr} = mk_increment(N, LONG), + {adjust_label_map(LabelMap, INCREMENT), CodeSizeIncr}. + +%%% "Between passes 1 and 2 we will construct an integer table +%%% LONG[1:n] such that LONG[i] is nonzero if and only if the +%%% ith sdi must be given a long form translation. Initially +%%% LONG[i] is zero for all i." +%%% +%%% Implementation notes: +%%% - LONG is an integer array indexed from 0 to N-1. + +-spec mk_long(non_neg_integer()) -> hipe_array(). +mk_long(N) -> + mk_array_of_zeros(N). + +%%% "At the heart of our algorithm is a graphical representation +%%% of the interdependencies of the sdi's [sic] of the program. +%%% For each sdi we construct a node containing the empty span +%%% of that instruction. Nodes of this graph will be referred to +%%% by the number of the sdi to which they correspond. Directed +%%% arcs are now added to the graph so that i->j is an arc if +%%% and only if the span of the ith sdi depends on the size of +%%% the jth sdi, that is, the jth sdi lies between the ith sdi +%%% and the label occurring in its operand. It is easy to see +%%% that the graph we have just described can be constructed from +%%% the information present in the set S and the symbol table. +%%% +%%% The significance if this graph is that sizes can be assigned +%%% to the sdi's [sic] of the program so that the span of the ith +%%% sdi is equal to the number appearing in node i if and only if +%%% all the children of i can be given short translations." +%%% +%%% Implementation notes: +%%% - The nodes are represented by an integer array SPAN[0..n-1] +%%% such that SPAN[i] contains the current span of sdi i. +%%% - Since the graph is traversed from child to parent nodes in +%%% Step 3, the edges are represented by a vector PARENTS[0..n-1] +%%% such that PARENTS[j] = { i | i is a parent of j }. +%%% - An explicit PARENTS graph would have size O(n^2). Instead we +%%% compute PARENTS[j] from the SDI vector when needed. This +%%% reduces memory overheads, and may reduce time overheads too. + +-spec mk_span(non_neg_integer(), tuple()) -> hipe_array(). +mk_span(N, SDIS) -> + initSPAN(0, N, SDIS, mk_array_of_zeros(N)). + +-spec initSPAN(non_neg_integer(), non_neg_integer(), + tuple(), hipe_array()) -> hipe_array(). +initSPAN(SdiNr, N, SDIS, SPAN) -> + if SdiNr >= N -> SPAN; + true -> + SdiData = vector_sub(SDIS, SdiNr), + #sdi_data{address=SdiAddress, label_address=LabelAddress} = SdiData, + SdiSpan = LabelAddress - SdiAddress, + array_update(SPAN, SdiNr, SdiSpan), + initSPAN(SdiNr+1, N, SDIS, SPAN) + end. + +mk_parents(N, SDIS) -> {N,SDIS}. + +%%% "After the structure is built we process it as follows. +%%% For any node i whose listed span exceeds the architectural +%%% limit for a short form instruction, the LONG[i] equal to +%%% the difference between the long and short forms of the ith +%%% sdi. Increment the span of each parent of i by LONG[i] if +%%% the parent precedes the child in the program. Otherwise, +%%% decrement the span of the parent by LONG[i]. Finally, remove +%%% node i from the graph. Clearly this process must terminate. +%%% Any nodes left in the final graph correspond to sdi's [sic] +%%% which can be translated in the short form." +%%% +%%% Implementation notes: +%%% - We use a simple worklist algorithm, operating on a set +%%% of SDIs known to require long form. +%%% - A node is removed from the graph by setting its span to zero. +%%% - The result is the updated LONG array. Afterwards, S, SPAN, +%%% and PARENTS are no longer useful. + +-spec update_long(non_neg_integer(), tuple(), hipe_array(), + {non_neg_integer(),tuple()},hipe_array()) -> 'ok'. +update_long(N, SDIS, SPAN, PARENTS, LONG) -> + WKL = initWKL(N-1, SDIS, SPAN, []), + processWKL(WKL, SDIS, SPAN, PARENTS, LONG). + +-spec initWKL(integer(), tuple(), + hipe_array(), [non_neg_integer()]) -> [non_neg_integer()]. +initWKL(SdiNr, SDIS, SPAN, WKL) -> + if SdiNr < 0 -> WKL; + true -> + SdiSpan = array_sub(SPAN, SdiNr), + WKL2 = updateWKL(SdiNr, SDIS, SdiSpan, WKL), + initWKL(SdiNr-1, SDIS, SPAN, WKL2) + end. + +-spec processWKL([non_neg_integer()], tuple(), hipe_array(), + {non_neg_integer(), tuple()}, hipe_array()) -> 'ok'. +processWKL([], _SDIS, _SPAN, _PARENTS, _LONG) -> ok; +processWKL([Child|WKL], SDIS, SPAN, PARENTS, LONG) -> + WKL2 = updateChild(Child, WKL, SDIS, SPAN, PARENTS, LONG), + processWKL(WKL2, SDIS, SPAN, PARENTS, LONG). + +-spec updateChild(non_neg_integer(), [non_neg_integer()], tuple(), hipe_array(), + {non_neg_integer(),tuple()}, hipe_array()) -> [non_neg_integer()]. +updateChild(Child, WKL, SDIS, SPAN, PARENTS, LONG) -> + case array_sub(SPAN, Child) of + 0 -> WKL; % removed + _ -> + SdiData = vector_sub(SDIS, Child), + Incr = sdiLongIncr(SdiData), + array_update(LONG, Child, Incr), + array_update(SPAN, Child, 0), % remove child + PS = parentsOfChild(PARENTS, Child), + updateParents(PS, Child, Incr, SDIS, SPAN, WKL) + end. + +-spec parentsOfChild({non_neg_integer(),tuple()}, + non_neg_integer()) -> [non_neg_integer()]. +parentsOfChild({N,SDIS}, Child) -> + parentsOfChild(N-1, SDIS, Child, []). + +-spec parentsOfChild(integer(), tuple(), non_neg_integer(), + [non_neg_integer()]) -> [non_neg_integer()]. +parentsOfChild(-1, _SDIS, _Child, PS) -> PS; +parentsOfChild(SdiNr, SDIS, Child, PS) -> + SdiData = vector_sub(SDIS, SdiNr), + #sdi_data{prevSdi=PrevSdi} = SdiData, + {LO,HI} = % inclusive + if SdiNr =< PrevSdi -> {SdiNr+1, PrevSdi}; % forwards + true -> {PrevSdi+1, SdiNr-1} % backwards + end, + NewPS = + if LO =< Child, Child =< HI -> [SdiNr | PS]; + true -> PS + end, + parentsOfChild(SdiNr-1, SDIS, Child, NewPS). + +-spec updateParents([non_neg_integer()], non_neg_integer(), + byte(), tuple(), hipe_array(), + [non_neg_integer()]) -> [non_neg_integer()]. +updateParents([], _Child, _Incr, _SDIS, _SPAN, WKL) -> WKL; +updateParents([P|PS], Child, Incr, SDIS, SPAN, WKL) -> + WKL2 = updateParent(P, Child, Incr, SDIS, SPAN, WKL), + updateParents(PS, Child, Incr, SDIS, SPAN, WKL2). + +-spec updateParent(non_neg_integer(), non_neg_integer(), + byte(), tuple(), hipe_array(), + [non_neg_integer()]) -> [non_neg_integer()]. +updateParent(Parent, Child, Incr, SDIS, SPAN, WKL) -> + case array_sub(SPAN, Parent) of + 0 -> WKL; % removed + OldSpan -> + NewSpan = + if Parent < Child -> OldSpan + Incr; + true -> OldSpan - Incr + end, + array_update(SPAN, Parent, NewSpan), + updateWKL(Parent, SDIS, NewSpan, WKL) + end. + +-spec updateWKL(non_neg_integer(), tuple(), + integer(), [non_neg_integer()]) -> [non_neg_integer()]. +updateWKL(SdiNr, SDIS, SdiSpan, WKL) -> + case sdiSpanIsShort(vector_sub(SDIS, SdiNr), SdiSpan) of + true -> WKL; + false -> [SdiNr|WKL] + end. + +-spec sdiSpanIsShort(#sdi_data{}, integer()) -> boolean(). +sdiSpanIsShort(#sdi_data{si = #sdi_info{lb = LB, ub = UB}}, SdiSpan) -> + SdiSpan >= LB andalso SdiSpan =< UB. + +-spec sdiLongIncr(#sdi_data{}) -> byte(). +sdiLongIncr(#sdi_data{si = #sdi_info{incr = Incr}}) -> Incr. + +%%% "Now construct a table INCREMENT[0:n] by defining +%%% INCREMENT[0] = 0 and INCREMENT[i] = INCREMENT[i-1]+LONG[i] +%%% for 1 <= i <= n. INCREMENT[i] represents the total increase +%%% in size of the first i sdi's [sic] in the program." +%%% +%%% Implementation notes: +%%% - INCREMENT is an integer vector indexed from 0 to n-1. +%%% INCREMENT[i] = SUM(0 <= j <= i)(LONG[j]), for 0 <= i < n. +%%% - Due to the lack of an SML-like Array.extract operation, +%%% INCREMENT is an array, not an immutable vector. + +-spec mk_increment(non_neg_integer(), hipe_array()) -> + {hipe_array(), non_neg_integer()}. +mk_increment(N, LONG) -> + initINCR(0, 0, N, LONG, mk_array_of_zeros(N)). + +-spec initINCR(non_neg_integer(), non_neg_integer(), non_neg_integer(), + hipe_array(), hipe_array()) -> {hipe_array(), non_neg_integer()}. +initINCR(SdiNr, PrevIncr, N, LONG, INCREMENT) -> + if SdiNr >= N -> {INCREMENT, PrevIncr}; + true -> + SdiIncr = PrevIncr + array_sub(LONG, SdiNr), + array_update(INCREMENT, SdiNr, SdiIncr), + initINCR(SdiNr+1, SdiIncr, N, LONG, INCREMENT) + end. + +%%% "At this point we can adjust the addresses of each label L +%%% in the symbol table. If L is preceded by i sdi's [sic] in +%%% the program, then add INCREMENT[i] to the value of L in the +%%% symbol table." +%%% +%%% Implementation notes: +%%% - Due to the 0..n-1 SDI numbering, a label L with address +%%% a and previous sdi i is remapped to a+incr(i), where +%%% incr(i) = if i < 0 then 0 else INCREMENT[i]. + +-spec adjust_label_map(gb_tree(), hipe_array()) -> gb_tree(). +adjust_label_map(LabelMap, INCREMENT) -> + applyIncr(gb_trees:to_list(LabelMap), INCREMENT, gb_trees:empty()). + +-type label_pair() :: {label(), #label_data{}}. + +-spec applyIncr([label_pair()], hipe_array(), gb_tree()) -> gb_tree(). +applyIncr([], _INCREMENT, LabelMap) -> LabelMap; +applyIncr([{Label,LabelData}|List], INCREMENT, LabelMap) -> + #label_data{address=Address, prevSdi=PrevSdi} = LabelData, + Incr = + if PrevSdi < 0 -> 0; + true -> array_sub(INCREMENT, PrevSdi) + end, + applyIncr(List, INCREMENT, gb_trees:insert(Label, Address+Incr, LabelMap)). + +%%% ADT for immutable vectors, indexed from 0 to N-1. +%%% Currently implemented as tuples. +%%% Used for the 'SDIS' and 'PARENTS' vectors. + +-spec vector_from_list([#sdi_data{}]) -> tuple(). +vector_from_list(Values) -> list_to_tuple(Values). + +vector_sub(Vec, I) -> element(I+1, Vec). + +%%% ADT for mutable integer arrays, indexed from 0 to N-1. +%%% Currently implemented as HiPE arrays. +%%% Used for the 'LONG', 'SPAN', and 'INCREMENT' arrays. + +-spec mk_array_of_zeros(non_neg_integer()) -> hipe_array(). +mk_array_of_zeros(N) -> hipe_bifs:array(N, 0). + +-spec array_update(hipe_array(), non_neg_integer(), integer()) -> hipe_array(). +array_update(A, I, V) -> hipe_bifs:array_update(A, I, V). + +-spec array_sub(hipe_array(), non_neg_integer()) -> integer(). +array_sub(A, I) -> hipe_bifs:array_sub(A, I). diff --git a/lib/hipe/misc/hipe_sdi.hrl b/lib/hipe/misc/hipe_sdi.hrl new file mode 100644 index 0000000000..f89cae1529 --- /dev/null +++ b/lib/hipe/misc/hipe_sdi.hrl @@ -0,0 +1,25 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2004-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + + +-record(sdi_info, + {lb :: integer(), % span lower bound for short form + ub :: integer(), % span upper bound for short form + incr :: byte()}). % instruction size increase for long form diff --git a/lib/hipe/native.mk b/lib/hipe/native.mk new file mode 100644 index 0000000000..6f4602477b --- /dev/null +++ b/lib/hipe/native.mk @@ -0,0 +1,5 @@ +ifndef SECONDARY_BOOTSTRAP +ifeq ($(NATIVE_LIBS_ENABLED),yes) +ERL_COMPILE_FLAGS += +native +endif +endif diff --git a/lib/hipe/opt/Makefile b/lib/hipe/opt/Makefile new file mode 100644 index 0000000000..972cf63944 --- /dev/null +++ b/lib/hipe/opt/Makefile @@ -0,0 +1,101 @@ +# +# %CopyrightBegin% +# +# Copyright Ericsson AB 2001-2009. All Rights Reserved. +# +# The contents of this file are subject to the Erlang Public License, +# Version 1.1, (the "License"); you may not use this file except in +# compliance with the License. You should have received a copy of the +# Erlang Public License along with this software. If not, it can be +# retrieved online at http://www.erlang.org/. +# +# Software distributed under the License is distributed on an "AS IS" +# basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +# the License for the specific language governing rights and limitations +# under the License. +# +# %CopyrightEnd% +# + +ifndef EBIN +EBIN = ../ebin +endif + +ifndef DOCS +DOCS = ../doc +endif + +include $(ERL_TOP)/make/target.mk +include $(ERL_TOP)/make/$(TARGET)/otp.mk + +# ---------------------------------------------------- +# Application version +# ---------------------------------------------------- +include ../vsn.mk +VSN=$(HIPE_VSN) + +# ---------------------------------------------------- +# Release directory specification +# ---------------------------------------------------- +RELSYSDIR = $(RELEASE_PATH)/lib/hipe-$(VSN) + +# ---------------------------------------------------- +# Target Specs +# ---------------------------------------------------- +MODULES = hipe_spillmin hipe_spillmin_color hipe_spillmin_scan + +HRL_FILES= +ERL_FILES= $(MODULES:%=%.erl) +TARGET_FILES= $(MODULES:%=$(EBIN)/%.$(EMULATOR)) +DOC_FILES= $(MODULES:%=$(DOCS)/%.html) + +# APP_FILE= +# APP_SRC= $(APP_FILE).src +# APP_TARGET= $(EBIN)/$(APP_FILE) +# +# APPUP_FILE= +# APPUP_SRC= $(APPUP_FILE).src +# APPUP_TARGET= $(EBIN)/$(APPUP_FILE) + +# ---------------------------------------------------- +# FLAGS +# ---------------------------------------------------- + +include ../native.mk + +ERL_COMPILE_FLAGS += +warn_exported_vars +warn_missing_spec +warn_untyped_record + +# ---------------------------------------------------- +# Targets +# ---------------------------------------------------- + +debug opt: $(TARGET_FILES) + +docs: $(DOC_FILES) + +clean: + rm -f $(TARGET_FILES) + rm -f core + +$(DOCS)/%.html:%.erl + erl -noshell -run edoc_run file '"$<"' '[{dir, "$(DOCS)"}]' -s init stop + +# ---------------------------------------------------- +# Special Build Targets +# ---------------------------------------------------- + + +# ---------------------------------------------------- +# Release Target +# ---------------------------------------------------- +include $(ERL_TOP)/make/otp_release_targets.mk + +release_spec: opt + $(INSTALL_DIR) $(RELSYSDIR)/ebin + $(INSTALL_DATA) $(TARGET_FILES) $(RELSYSDIR)/ebin + +release_docs_spec: + +$(EBIN)/hipe_spillmin.beam: ../main/hipe.hrl ../flow/cfg.hrl +$(EBIN)/hipe_spillmin_color.beam: ../main/hipe.hrl ../flow/cfg.hrl +$(EBIN)/hipe_spillmin_scan.beam: ../main/hipe.hrl ../flow/cfg.hrl diff --git a/lib/hipe/opt/hipe_schedule.erl b/lib/hipe/opt/hipe_schedule.erl new file mode 100644 index 0000000000..4925b2927b --- /dev/null +++ b/lib/hipe/opt/hipe_schedule.erl @@ -0,0 +1,1489 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2001-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% INSTRUCTION SCHEDULER +%% +%% This is a basic ILP cycle scheduler: +%% * set cycle = 0 +%% * while ready[cycle] nonempty do +%% - take x with greatest priority from ready[cycle] +%% - try to schedule x; +%% * if scheduling x was possible, +%% - reserve resources +%% - add x to schedule and delete x from dag +%% - update earliest-time for all successor nodes +%% as max[earliest[y],cycle+latency[x]] +%% - if some node y now has no predecessors, +%% add y to ready[earliest[y]] +%% * if it was impossible, put x in ready[cycle+1] +%% (= try again) +%% +%% We use the following data structures: +%% 1. all nodes are numbered and indices used as array keys +%% 2. priority per node can be computed statically or dynamically +%% * statically: before scheduling, each node gets a priority value +%% * dynamically: at each cycle, compute priorities for all ready nodes +%% 3. earliest: earliest cycle of issue, starts at 0 +%% and is updated as predecessors issue +%% 4. predecessors: number of predecessors (0 = ready to issue) +%% 5. successors: list of {Latency,NodeID} +%% 6. ready: an array indexed by cycle-time (integer), where +%% ready nodes are kept. +%% 7. resources: a resource representation (ADT) that answers +%% certain queries, e.g., "can x be scheduled this cycle" +%% and "reserve resources for x". +%% 8. schedule: list of scheduled instructions {Instr,Cycle} +%% in the order of issue +%% 9. instructions: maps IDs back to instructions +%% +%% Inputs: +%% - a list of {ID,Node} pairs (where ID is a unique key) +%% - a dependence list {ID0,Latency,ID1}, which is used to +%% build the DAG. +%% +%% Note that there is some leeway in how things are represented +%% from here. +%% +%% MODIFICATIONS: +%% - Some basic blocks are not worth scheduling (e.g., GC save/restore code) +%% yet are pretty voluminous. How do we skip them? +%% - Scheduling should be done at finalization time: when basic block is +%% linearized and is definitely at Sparc assembly level, THEN reorder +%% stuff. + +-module(hipe_schedule). +-export([cfg/1, est_cfg/1, delete_node/5]). + +-include("../sparc/hipe_sparc.hrl"). + +%%-define(debug1,true). + +-define(debug2(Str,Args),ok). +%%-define(debug2(Str,Args),io:format(Str,Args)). + +-define(debug3(Str,Args),ok). +%%-define(debug3(Str,Args),io:format(Str,Args)). + +-define(debug4(Str,Args),ok). +%%-define(debug4(Str,Args),io:format(Str,Args)). + +-define(debug5(Str,Args),ok). +%%-define(debug5(Str,Args),io:format(Str,Args)). + +-define(debug(Str,Args),ok). +%%-define(debug(Str,Args),io:format(Str,Args)). + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : cfg +%% Argument : CFG - the control flow graph +%% Returns : CFG - A new cfg with scheduled blocks +%% Description : Takes each basic block and schedules them one by one. +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +cfg(CFG) -> + ?debug3("CFG: ~n~p", [CFG]), + update_all( [ {L, + hipe_bb:mk_bb( + block(L,hipe_bb:code(hipe_sparc_cfg:bb(CFG,L))) )} + || L <- hipe_sparc_cfg:labels(CFG) ], CFG). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : update_all +%% Argument : Blocks - [{Label, Block}] , a list with labels and new code +%% used for updating the old CFG. +%% CFG - The old controlflow graph +%% Returns : An updated controlflow graph. +%% Description : Just swappes the basic blocks in the CFG to the scheduled one. +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +update_all([],CFG) -> CFG; +update_all([{L,NewB}|Ls],CFG) -> + update_all(Ls,hipe_sparc_cfg:bb_add(CFG,L,NewB)). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +est_cfg(CFG) -> + update_all([ {L, hipe_bb:mk_bb(est_block(hipe_bb:code(hipe_sparc_cfg:bb(CFG,L))))} + || L <- hipe_sparc_cfg:labels(CFG) ], CFG). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% Provides an estimation of how quickly a block will execute. +%% This is done by chaining all instructions in sequential order +%% by 0-cycle dependences (which means they will never be reordered), +%% then scheduling the mess. + +est_block([]) -> []; +est_block([I]) -> [I]; +est_block(Blk) -> + {IxBlk,DAG} = est_deps(Blk), + Sch = bb(IxBlk,DAG), + separate_block(Sch,IxBlk). + +est_deps(Blk) -> + IxBlk = indexed_bb(Blk), + DAG = deps(IxBlk), + {IxBlk, chain_instrs(IxBlk,DAG)}. + +chain_instrs([{N,_}|Xs],DAG) -> + chain_i(N,Xs,DAG). + +chain_i(_,[],DAG) -> DAG; +chain_i(N,[{M,_}|Xs],DAG) -> + NewDAG = dep_arc(N,zero_latency(),M,DAG), + chain_i(M,Xs,NewDAG). + +zero_latency() -> 0. + +lookup_instr([{N,I}|_], N) -> I; +lookup_instr([_|Xs], N) -> lookup_instr(Xs, N). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : block +%% Argument : Instrs - [Instr], list of all the instructions in a basic +%% block. +%% Returns : A new scheduled block +%% Description : Schedule a basic block +%% +%% Note: does not consider delay slots! +%% (another argument for using only annulled delay slots?) +%% * how do we add delay slots? somewhat tricky to +%% reconcile with the sort of scheduling we consider. +%% (as-early-as-possible) +%% => rewrite scheduler into as-late-as-possible? +%% (=> just reverse the dependence arcs??) +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%% Don't fire up the scheduler if there's no work to do. +block(_, []) -> + []; +block(_L, [I]) -> + case hipe_sparc:is_any_branch(I) of + true -> [hipe_sparc:nop_create(), I]; + false -> [I] + end; +block(_L, Blk) -> + IxBlk = indexed_bb(Blk), + case IxBlk of + [{_N, I}] -> % comments and nops may have been removed. + case hipe_sparc:is_any_branch(I) of + true -> [hipe_sparc:nop_create(), I]; + false -> [I] + end; + _ -> + Sch = bb(IxBlk, {DAG, _Preds} = deps(IxBlk)), + {NewSch, NewIxBlk} = fill_delays(Sch, IxBlk, DAG), + X = finalize_block(NewSch, NewIxBlk), + debug1_stuff(Blk, DAG, IxBlk, Sch, X), + X + end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : fill_delays +%% Argument : Sch - List of {{cycle, C}, {node, N}} : C = current cycle +%% N = node index +%% IxBlk - Indexed block [{N, Instr}] +%% DAG - Dependence graph +%% Returns : {NewSch, NewIxBlk} - vector with new schedule and vector +%% with {N, Instr} +%% Description : Goes through the schedule from back to front looking for +%% branches/jumps. If one is found fill_del tries to find +%% an instr to fill the delayslot. +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +fill_delays(Sch, IxBlk, DAG) -> + NewIxBlk = hipe_vectors:list_to_vector(IxBlk), + %% NewSch = hipe_vectors:list_to_vector(Sch), + NewSch = fill_del(length(Sch), hipe_vectors:list_to_vector(Sch), + NewIxBlk, DAG), + {NewSch, NewIxBlk}. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : fill_del +%% Argument : N - current index in the schedule +%% Sch - schedule +%% IxBlk - indexed block +%% DAG - dependence graph +%% Returns : Sch - New schedule with possibly a delay instr in the last +%% position. +%% Description : If a call/jump is found fill_branch_delay/fill_call_delay +%% is called to find a delay-filler. +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +fill_del(N, Sch, _IxBlk, _DAG) when N < 1 -> Sch; +fill_del(N, Sch, IxBlk, DAG) -> + Index = get_index(Sch, N), + ?debug2("Index for ~p: ~p~nInstr: ~p~n", + [N, Index, get_instr(IxBlk, Index)]), + NewSch = + case get_instr(IxBlk, Index) of + #call_link{} -> + fill_branch_delay(N - 1, N, Sch, IxBlk, DAG); + #jmp_link{} -> + fill_call_delay(N - 1, N, Sch, IxBlk, DAG); + #jmp{} -> + fill_call_delay(N - 1, N, Sch, IxBlk, DAG); + #b{} -> + fill_branch_delay(N - 1, N, Sch, IxBlk, DAG); + #br{} -> + fill_branch_delay(N - 1, N, Sch, IxBlk, DAG); + #goto{} -> + fill_branch_delay(N - 1, N, Sch, IxBlk, DAG); + _Other -> + Sch + end, + NewSch. + %% fill_del(N - 1, NewSch, IxBlk, DAG). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : fill_call_delay +%% Argument : Cand - index in schedule of delay-candidate +%% Call - index in schedule of call +%% Sch - schedule vector: < {{cycle,Ci},{node,Nj}}, ... > +%% IxBlk - block vector: < {N, Instr1}, {N+1, Instr2} ... > +%% DAG - dependence graph +%% Returns : Sch - new updated schedule. +%% Description : Searches backwards through the schedule trying to find an +%% instr without conflicts with the Call-instr. +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +fill_call_delay(Cand, _Call, Sch, _IxBlk, _DAG) when Cand < 1 -> Sch; +fill_call_delay(Cand, Call, Sch, IxBlk, DAG) -> + CandIndex = get_index(Sch, Cand), + CallIndex = get_index(Sch, Call), + CandI = get_instr(IxBlk, CandIndex), + case move_or_alu(CandI) of + true -> + case single_depend(CandIndex, CallIndex, DAG) of + false -> % Other instrs depends on Cand ... + fill_call_delay(Cand - 1, Call, Sch, IxBlk, DAG); + + true -> + CallI = get_instr(IxBlk, CallIndex), + + CandDefs = ordsets:from_list(hipe_sparc:defines(CandI)), + %% CandUses = ordsets:from_list(hipe_sparc:uses(CandI)), + %% CallDefs = ordsets:from_list(hipe_sparc:defines(CallI)), + CallUses = ordsets:from_list(hipe_sparc:uses(CallI)), + + Args = case CallI of + #jmp_link{} -> + ordsets:from_list( + hipe_sparc:jmp_link_args(CallI)); + #jmp{} -> + ordsets:from_list(hipe_sparc:jmp_args(CallI)); + #call_link{} -> + ordsets:from_list( + hipe_sparc:call_link_args(CallI)) + end, + CallUses2 = ordsets:subtract(CallUses, Args), + Conflict = ordsets:intersection(CandDefs, CallUses2), + %% io:format("single_depend -> true:~n ~p~n, ~p~n,~p~n",[CandI,CallI,DAG]), + %% io:format("Cand = ~p~nCall = ~p~n",[CandI,CallI]), + %% io:format("CandDefs = ~p~nCallDefs = ~p~n",[CandDefs,CallDefs]), + %% io:format("CandUses = ~p~nCallUses = ~p~n",[CandUses,CallUses]), + %% io:format("Args = ~p~nCallUses2 = ~p~n",[Args,CallUses2]), + %% io:format("Conflict = ~p~n",[Conflict]), + + case Conflict of + [] -> % No conflicts ==> Cand can fill delayslot after Call + update_schedule(Cand, Call, Sch); + _ -> % Conflict: try with preceeding instrs + fill_call_delay(Cand - 1, Call, Sch, IxBlk, DAG) + end + end; + false -> + fill_call_delay(Cand - 1, Call, Sch, IxBlk, DAG) + end. + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : fill_branch_delay +%% Argument : Cand - index in schedule of delay-candidate +%% Branch - index in schedule of branch +%% Sch - schedule +%% IxBlk - indexed block +%% DAG - dependence graph +%% Returns : Sch - new updated schedule. +%% Description : Searches backwards through the schedule trying to find an +%% instr without conflicts with the Branch-instr. +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +fill_branch_delay(Cand, _Br, Sch, _IxBlk, _DAG) when Cand < 1 -> Sch; +fill_branch_delay(Cand, Br, Sch, IxBlk, DAG) -> + CandIndex = get_index(Sch, Cand), + BrIndex = get_index(Sch, Br), + CandI = get_instr(IxBlk, CandIndex), + case move_or_alu(CandI) of + true -> + case single_depend(CandIndex, BrIndex, DAG) of + false -> % Other instrs depends on Cand ... + fill_branch_delay(Cand - 1, Br, Sch, IxBlk, DAG); + + true -> + BrI = get_instr(IxBlk, BrIndex), + CandDefs = ordsets:from_list(hipe_sparc:defines(CandI)), + %% CandUses = ordsets:from_list(hipe_sparc:uses(CandI)), + %% BrDefs = ordsets:from_list(hipe_sparc:defines(BrI)), + BrUses = ordsets:from_list(hipe_sparc:uses(BrI)), + + Conflict = ordsets:intersection(CandDefs, BrUses), + %% io:format("single_depend -> true: ~p~n, ~p~n,~p~n", [CandI, BrI, DAG]), + %% io:format("Cand = ~p~nBr = ~p~n",[CandI,BrI]), + %% io:format("CandDefs = ~p~nBrDefs = ~p~n",[CandDefs,BrDefs]), + %% io:format("CandUses = ~p~nBrUses = ~p~n",[CandUses,BrUses]), + %% io:format("Conflict = ~p~n",[Conflict]); + + case Conflict of + [] -> % No conflicts ==> + % Cand can fill delayslot after Branch + update_schedule(Cand, Br, Sch); + _ -> % Conflict: try with preceeding instrs + fill_branch_delay(Cand - 1, Br, Sch, IxBlk, DAG) + end + end; + false -> + fill_branch_delay(Cand - 1, Br, Sch, IxBlk, DAG) + end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : update_schedule +%% Argument : From - the position from where to switch indexes in Sch +%% To - the position to where to switch indexes in Sch +%% Sch - schedule +%% Returns : Sch - an updated schedule +%% Description : If From is the delay-filler and To is the Call/jump, the +%% schedule is updated so From gets index To, To gets index +%% To - 1, and the nodes between From and To gets old_index - 1. +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +update_schedule(To, To, Sch) -> + {{cycle, C}, {node, _N} = Node} = hipe_vectors:get(Sch, To-1), + hipe_vectors:set(Sch, To-1, {{cycle, C+1}, Node}); +update_schedule(From, To, Sch) -> + Temp = hipe_vectors:get(Sch, From-1), + Sch1 = hipe_vectors:set(Sch, From-1, hipe_vectors:get(Sch, From)), + update_schedule(From + 1, To, hipe_vectors:set(Sch1, From, Temp)). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : single_depend +%% Argument : N - Index of the delayslot candidate +%% M - Index of the node that N possibly has a single +%% depend to. +%% DAG - The dependence graph +%% Returns : true if no other nodes than N os depending on N +%% Description : Checks that no other nodes than M depends on N and that the +%% latency between them is zero or 1. +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +single_depend(N, M, DAG) -> + Deps = hipe_vectors:get(DAG, N-1), + single_depend(M, Deps). + +single_depend(_N, []) -> true; +single_depend(N, [{0, N}]) -> true; +single_depend(N, [{1, N}]) -> true; +single_depend(_N, [{_Lat, _}|_]) -> false. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : get_index +%% Argument : Sch - schedule +%% N - index in schedule +%% Returns : Index - index of the node +%% Description : Returns the index of the node on position N in the schedule. +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +get_index(Sch, N) -> + {{cycle, _C}, {node, Index}} = hipe_vectors:get(Sch,N-1), + Index. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : get_instr +%% Argument : IxBlk - indexed block +%% N - index in block +%% Returns : Instr +%% Description : Returns the instr on position N in the indexed block. +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +get_instr(IxBlk, N) -> + {_, Instr} = hipe_vectors:get(IxBlk, N-1), + Instr. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : get_instr +%% Argument : Sch - schedule +%% IxBlk - indexed block +%% N - index in schedule +%% Returns : Instr +%% Description : Returns the instr on position N in the schedule. +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +get_instr(Sch, IxBlk, N) -> + {{cycle, _C}, {node, Index}} = hipe_vectors:get(Sch, N-1), + {_, Instr} = hipe_vectors:get(IxBlk, Index-1), + Instr. + +separate_block(Sch,IxBlk) -> + sep_comments([{C,lookup_instr(IxBlk,N)} || {{cycle,C},{node,N}} <- Sch]). + +sep_comments([]) -> []; +sep_comments([{C,I}|Xs]) -> + [hipe_sparc:comment_create({cycle,C}), I | sep_comments(Xs,C)]. + +sep_comments([], _) -> []; +sep_comments([{C1,I}|Xs], C0) -> + if + C1 > C0 -> + [hipe_sparc:comment_create({cycle,C1}),I|sep_comments(Xs,C1)]; + true -> + [I|sep_comments(Xs, C0)] + end. + +finalize_block(Sch, IxBlk) -> + ?debug5("Sch: ~p~nIxBlk: ~p~n",[Sch,IxBlk]), + finalize_block(1, hipe_vectors:size(Sch), 1, Sch, IxBlk, []). + +finalize_block(N, End, _C, Sch, IxBlk, _Instrs) when N =:= End - 1 -> + NextLast = get_instr(Sch, IxBlk, N), + Last = get_instr(Sch, IxBlk, End), + ?debug5("NextLast: ~p~nLast: ~p~n",[NextLast,Last]), + case hipe_sparc:is_any_branch(Last) of + true -> % Couldn't fill delayslot ==> add NOP + [NextLast , hipe_sparc:nop_create(), Last]; + false -> % Last is a delayslot-filler ==> change order... + [Last, NextLast] + end; +finalize_block(N, End, C0, Sch, IxBlk, Instrs) -> + {{cycle, _C1}, {node, _M}} = hipe_vectors:get(Sch, N-1), + Instr = get_instr(Sch, IxBlk, N), + ?debug5("Instr: ~p~n~n",[Instr]), + [Instr | finalize_block(N + 1, End, C0, Sch, IxBlk, Instrs)]. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : bb +%% Argument : IxBlk - indexed block +%% DAG - {Dag, Preds} where Dag is dependence graph and +%% Preds is number of predecessors for each node. +%% Returns : Sch +%% Description : Initializes earliest-list, ready-list, priorities, resources +%% and so on, and calls the cycle_sched which does the scheduling +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +bb(IxBlk,DAG) -> + bb(length(IxBlk), IxBlk, DAG). + +bb(N,IxBlk,{DAG, Preds}) -> + Earliest = init_earliest(N), + BigArray = N*10, % "nothing" is this big :-) + Ready = hipe_schedule_prio:init_ready(BigArray,Preds), + I_res = init_instr_resources(N, IxBlk), + + Prio = hipe_schedule_prio:init_instr_prio(N,DAG), + Rsrc = init_resources(BigArray), + ?debug4("I_res: ~n~p~nPrio: ~n~p~nRsrc: ~n~p~n", [I_res,Prio,Rsrc]), + ?debug('cycle 1~n',[]), + Sch = empty_schedule(), + cycle_sched(1,Ready,DAG,Preds,Earliest,Rsrc,I_res,Prio,Sch,N,IxBlk). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : cycle_sched +%% Argument : - C is current cycle, 1 or more. +%% - Ready is an array (Cycle -> [Node]) +%% yielding the collection of nodes ready to be +%% scheduled in a cycle. +%% - DAG is an array (Instr -> [{Latency,Instr}]) +%% represents the dependence DAG. +%% - Preds is an array (Instr -> NumPreds) +%% counts the number of predecessors +%% (0 preds = ready to be scheduled). +%% - Earl is an array (Instr -> EarliestCycle) +%% holds the earliest cycle an instruction can be scheduled. +%% - Rsrc is a 'resource ADT' that handles scheduler resource +%% management checks whether instruction can be scheduled +%% this cycle without a stall. +%% - I_res is an array (Instr -> Required_resources) +%% holds the resources required to schedule an instruction. +%% - Sch is the representation of the schedule current schedule. +%% - N is the number of nodes remaining to be scheduled +%% tells us when to stop the scheduler. +%% - IxBlk is the indexed block with instrs +%% Returns : present schedule +%% Description : Scheduler main loop. +%% Pick next ready node in priority order for cycle C until +%% none remain. +%% * check each node if it can be scheduled w/o stalling +%% * if so, schedule it +%% * otherwise, bump the node to the next cycle +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +cycle_sched(C,Ready,DAG,Preds,Earl,Rsrc,I_res,Prio,Sch,N,IxBlk) -> + case hipe_schedule_prio:next_ready(C,Ready,Prio,IxBlk,DAG,Preds,Earl) of +% case hipe_schedule_prio:next_ready(C,Ready,Prio,IxBlk) of + {next,I,Ready1} -> + ?debug('try ~p~n==> ready = ~p~n',[I, Ready1]), + case resources_available(C,I,Rsrc,I_res) of + {yes,NewRsrc} -> + ?debug(' scheduled~n==> Rscrs = ~p~n',[NewRsrc]), + NewSch = add_to_schedule(I,C,Sch), + {ReadyNs,NewDAG,NewPreds,NewEarl} = + delete_node(C,I,DAG,Preds,Earl), + ?debug("NewPreds : ~p~n",[Preds]), + ?debug(' ReadyNs: ~p~n',[ReadyNs]), + NewReady = hipe_schedule_prio:add_ready_nodes(ReadyNs, + Ready1), + ?debug(' New ready: ~p~n',[NewReady]), + cycle_sched(C,NewReady,NewDAG,NewPreds,NewEarl, + NewRsrc,I_res,Prio,NewSch,N-1, IxBlk); + no -> + ?debug(' resource conflict~n',[]), + NewReady = hipe_schedule_prio:insert_node(C+1,I,Ready1), + cycle_sched(C,NewReady,DAG,Preds,Earl,Rsrc, + I_res,Prio,Sch,N,IxBlk) + end; + none -> % schedule next cycle if some node remains + if + N > 0 -> + ?debug('cycle ~p~n',[C+1]), + cycle_sched(C+1,Ready,DAG,Preds,Earl, + advance_cycle(Rsrc), + I_res,Prio,Sch,N, IxBlk); + true -> + present_schedule(Sch) + end + end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : init_earliest +%% Argument : N - number of instrs +%% Returns : +%% Description : +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +init_earliest(N) -> + hipe_vectors:new(N,1). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% Schedule is kept reversed until the end. + +-define(present_node(I,Cycle),{{cycle,Cycle},{node,I}}). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : empty_schedule +%% Description : Returns an empty schedule. +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +empty_schedule() -> []. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : add_to_schedule +%% Argument : I - instr +%% Cycle - cycle when I was placed +%% Sch - schedule +%% Description : Adds instr to schedule +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +add_to_schedule(I,Cycle,Sch) -> + [?present_node(I,Cycle)|Sch]. + +present_schedule(Sch) -> lists:reverse(Sch). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% Interface to resource manager: +%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : init_resources +%% Description : Yields a 'big enough' array mapping (Cycle -> Resources); +%% this array is called Rsrc below. +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +init_resources(S) -> + hipe_target_machine:init_resources(S). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : init_instr_resources +%% Argument : Nodes - a list of the instructions +%% N - is the number of nodes +%% Description : return a vector (NodeID -> Resource_requirements) +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +init_instr_resources(N,Nodes) -> + hipe_target_machine:init_instr_resources(N,Nodes). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : resources_available +%% Argument : Cycle - the current cycle +%% I - the current instruction (index = NodeID) +%% Rsrc - a map (Cycle -> Resources) +%% I_res - maps (NodeID -> Resource_requirements) +%% Description : returns {yes,NewResTab} | no +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +resources_available(Cycle,I,Rsrc,I_res) -> + hipe_target_machine:resources_available(Cycle,I,Rsrc,I_res). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : advance_cycle +%% Argument : Rsrc - resources +%% Description : Returns an empty resources-state +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +advance_cycle(Rsrc) -> + hipe_target_machine:advance_cycle(Rsrc). + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : delete_node +%% Argument : Cycle - current cycle +%% I - index of instr +%% DAG - dependence dag +%% Preds - array with number of predecessors for nodes +%% Earl - array with earliest-times for nodes +%% Returns : {ReadyNs,NewDAG,NewPreds,NewEarl} +%% Description : Deletes node I and updates earliest times for the rest. +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +delete_node(Cycle,I,DAG,Preds,Earl) -> + Succ = hipe_vectors:get(DAG,I-1), + NewDAG = hipe_vectors:set(DAG,I-1,scheduled), % provides debug 'support' + {ReadyNs,NewPreds,NewEarl} = update_earliest(Succ,Cycle,Preds,Earl,[]), + ?debug('earliest after ~p: ~p~n',[I,[{Ix+1,V} || {Ix,V} <- hipe_vectors:list(NewEarl)]]), + {ReadyNs,NewDAG,NewPreds,NewEarl}. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : update_earliest +%% Argument : Succ - successor list +%% Cycle - current cycle +%% Preds - predecessors +%% Earl - earliest times for nodes +%% Ready - array with readynodes for cycles +%% Returns : {Ready,Preds,Earl} +%% Description : Updates the earliest times for nodes and updates number of +%% predecessors for nodes +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +update_earliest([],_Cycle,Preds,Earl,Ready) -> + {Ready,Preds,Earl}; +update_earliest([{Lat,N}|Xs],Cycle,Preds,Earl,Ready) -> + Old_earl = hipe_vectors:get(Earl,N-1), + New_earl = erlang:max(Old_earl,Cycle+Lat), + NewEarl = hipe_vectors:set(Earl,N-1,New_earl), + Num_preds = hipe_vectors:get(Preds,N-1), + NewPreds = hipe_vectors:set(Preds,N-1,Num_preds-1), + if + Num_preds =:= 0 -> + ?debug('inconsistent DAG~n',[]), + exit({update_earliest,N}); + Num_preds =:= 1 -> + NewReady = [{New_earl,N}|Ready], + NewPreds2 = hipe_vectors:set(NewPreds,N-1,0), + update_earliest(Xs,Cycle,NewPreds2,NewEarl,NewReady); + is_integer(Num_preds), Num_preds > 1 -> + update_earliest(Xs,Cycle,NewPreds,NewEarl,Ready) + end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% Collect instruction dependences. +%% +%% Three forms: +%% - data/register +%% * insert RAW, WAR, WAW dependences +%% - memory +%% * stores serialize memory references +%% * alias analysis may allow loads to bypass stores +%% - control +%% * unsafe operations are 'trapped' between branches +%% * branches are ordered +%% +%% returns { [{Index,Instr}], DepDAG } +%% DepDAG is defined below. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : deps +%% Argument : BB - Basic block +%% Returns : {IxBB,DAG} - indexed block and dependence graph. DAG consists +%% of both Dag and Preds, where Preds is number +%% of predecessors for nodes. +%% Description : Collect instruction dependences. +%% +%% Three forms: +%% - data/register +%% * insert RAW, WAR, WAW dependences +%% - memory +%% * stores serialize memory references +%% * alias analysis may allow loads to bypass stores +%% - control +%% * unsafe operations are 'trapped' between branches +%% * branches are ordered +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +deps(IxBB) -> + N = length(IxBB), + DAG = empty_dag(N), % The DAG contains both dependence-arcs and + % number of predeccessors... + {_DepTab,DAG1} = dd(IxBB, DAG), + DAG2 = md(IxBB, DAG1), + cd(IxBB, DAG2). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : empty_dag +%% Argument : N - number of nodes +%% Returns : empty DAG +%% Description : DAG consists of dependence graph and predeccessors +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +empty_dag(N) -> + {hipe_vectors:new(N, []), hipe_vectors:new(N, 0)}. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : indexed_bb +%% Argument : BB - basic block +%% Returns : [{N, Instr}] +%% Description : Puts indexes to all instrs of a block, removes comments. +%% NOP's are also removed because if both sparc_schedule and +%% sparc_post_schedule options are used, the first pass will +%% add nop's before the branch if necessary, and these are +%% removed before scheduling the second pass. +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +indexed_bb(BB) -> + indexed_bb(BB,1). + +indexed_bb([],_N) -> []; +indexed_bb([X|Xs],N) -> + case X of + #comment{} -> + indexed_bb(Xs,N); + #nop{} -> + indexed_bb(Xs,N); + _Other -> + [{N,X}|indexed_bb(Xs,N+1)] + end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : dep_arc +%% Argument : N - Current node +%% Lat - Latency from current node to M +%% M - The dependent node +%% DAG - The dependence graph. Consists of both DAG and +%% predeccessors +%% Returns : A new DAG with the arc added and number of predeccessors for +%% M increased. +%% Description : Adds a new arc to the graph, if an older arc goes from N to M +%% it will be replaced with a new arc {max(OldLat, NewLat), M}. +%% Number of predeccessors for node M is increased. +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +dep_arc(N, Lat, M, {Dag,Preds}) -> + OldDeps = hipe_vectors:get(Dag, N-1), + %% io:format("{OldDeps} = {~p}~n",[OldDeps]), + {NewDeps, Status} = add_arc(Lat, M, OldDeps), + %% io:format("{NewDeps, Status} = {~p, ~p}~n",[NewDeps, Status]), + NewDag = hipe_vectors:set(Dag, N-1, NewDeps), + NewPreds = case Status of + added -> % just increase preds if new arc was added + OldPreds = hipe_vectors:get(Preds, M-1), + hipe_vectors:set(Preds, M-1, OldPreds + 1); + non_added -> + Preds + end, + {NewDag, NewPreds}. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : add_arc +%% Argument : Lat - The latency from current node to To. +%% To - The instr-id of the node which the dependence goes to +%% Arcs - The dependecies that are already in the dep-graph +%% Returns : A dependence graph sorted by To. +%% Description : A new arc that is added is sorted in the right place, and if +%% there is already an arc between nodes A and B, the one with +%% the greatest latency is choosen. +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +add_arc(Lat,To, []) -> {[{Lat, To}], added}; +add_arc(Lat1, To, [{Lat2, To} | Arcs]) -> + {[{erlang:max(Lat1, Lat2), To} | Arcs], non_added}; +add_arc(Lat1,To1, [{Lat2, To2} | Arcs]) when To1 < To2 -> + {[{Lat1, To1}, {Lat2, To2} | Arcs], added}; +add_arc(Lat1 ,To1, [{Lat2, To2} | Arcs]) -> + {Arcs1, Status} = add_arc(Lat1, To1, Arcs), + {[{Lat2, To2} | Arcs1], Status}. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% The register/data dependence DAG of a block is represented +%% as a mapping (Variable -> {NextWriter,NextReaders}) +%% where NextWriter is a pair {Ix,Type} +%% and NextReaders is a list of pairs {Ix,Type}. +%% +%% Type is used to determine latencies of operations; on the UltraSparc, +%% latencies of arcs (n -> m) are determined by both n and m. (E.g., if +%% n is an integer op and m is a store, then latency is 0; if m is an +%% integer op, it's 1.) + +dd([],DAG) -> { empty_deptab(), DAG }; +dd([{N,I}|Is],DAG0) -> + {DepTab,DAG1} = dd(Is,DAG0), + add_deps(N,I,DepTab,DAG1). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : add_deps +%% Argument : N - current node +%% Instr - current instr +%% DepTab - hashtable with {next-writer, next-readers} for reg +%% DAG - dependence graph +%% Returns : {DepTab, BlockInfo, DAG} - with new values +%% Description : Adds dependencies for node N to the graph. The registers that +%% node N defines and uses are used for computing the +%% dependencies to the following nodes. +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +add_deps(N,Instr,DepTab,DAG) -> + {Ds,Us} = def_use(Instr), + Type = dd_type(Instr), + {DepTab1,DAG1} = add_write_deps(Ds,N,Type,DepTab,DAG), + add_read_deps(Us,N,Type,DepTab1,DAG1). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Instructions are classified into symbolic categories, +%% which are subsequently used to determine operation latencies +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +dd_type(Instr) -> + case Instr of + #b{} -> branch; + %% #br{} -> branch; + #call_link{} -> branch; + #jmp_link{} -> branch; + #jmp{} -> branch; + #goto{} -> branch; + #load{} -> load; + #store{} -> store; + #alu{} -> alu; + #move{} -> alu; + #multimove{} -> + Src = hipe_sparc:multimove_src(Instr), + Lat = round(length(Src)/2), + {mmove,Lat}; + #sethi{} -> alu; + #alu_cc{} -> alu_cc; + %% #cmov_cc{} -> cmov_cc; + %% #cmov_r{} -> alu; + #load_atom{} -> alu; + #load_address{} -> alu; + #pseudo_enter{} -> pseudo; + #pseudo_pop{} -> pseudo; + #pseudo_return{} -> pseudo; + #pseudo_spill{} -> pseudo; + #pseudo_unspill{} -> pseudo + end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : add_write_deps +%% Argument : Defs - registers that node N defines. +%% N - current node +%% Ty - the type of current instr +%% DepTab - Dependence-table +%% DAG - The dependence graph. +%% Returns : {DepTab,DAG} - with new values +%% Description : Adds dependencies to the graph for nodes that depends on the +%% registers that N defines. +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +add_write_deps([],_N,_Ty,DepTab,DAG) -> {DepTab,DAG}; +add_write_deps([D|Ds],N,Ty,DepTab,DAG) -> + {NewDepTab,NewDAG} = add_write_dep(D,N,Ty,DepTab,DAG), + add_write_deps(Ds,N,Ty,NewDepTab,NewDAG). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : add_write_dep +%% Description : Updates the dependence table with N as next writer, and +%% updates the DAG with the dependencies from N to subsequent +%% nodes. +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +add_write_dep(X,N,Ty,DepTab,DAG) -> + {NxtWriter,NxtReaders} = lookup(X,DepTab), + NewDepTab = writer(X,N,Ty,DepTab), + NewDAG = write_deps(N,Ty,NxtWriter,NxtReaders,DAG), + {NewDepTab, NewDAG}. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : write_deps +%% Argument : Instr - Current instr +%% Ty - Type of current instr +%% NxtWriter - The node that is the next writer of the ragister +%% that Instr defines. +%% NxtReaders - The nodes that are subsequent readers of the +%% register that N defines. +%% DAG - The dependence graph +%% Returns : Calls raw_deps that finally returns a new DAG with the new +%% dependence arcs added. +%% Description : If a next writer exists a dependence arc for this node is +%% added, and after this raw_deps is called to compute the +%% arcs for read-after-write dependencies. +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +write_deps(Instr,Ty,NxtWriter,NxtReaders,DAG) -> + DAG1 = case NxtWriter of + none -> + DAG; + {Instr,_} -> + DAG; + {Wr,WrTy} -> + dep_arc(Instr, + hipe_target_machine:waw_latency(Ty,WrTy), + Wr, DAG) + end, + raw_deps(Instr,Ty,NxtReaders,DAG1). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : raw_deps +%% Argument : Instr - current instr +%% Type - type of instr +%% Readers - subsequent readers +%% DAG - dependence graph +%% Returns : DAG - A new DAG with read-after-write dependencies added +%% Description : Updates the DAG with the dependence-arcs from Instr to the +%% subsequent readers, with the appropriate latencies. +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +raw_deps(_Instr,_Type,[],DAG) -> DAG; +raw_deps(Instr,Ty,[{Rd,RdTy}|Xs],DAG) -> + raw_deps(Instr,Ty,Xs, + dep_arc(Instr,hipe_target_machine:raw_latency(Ty,RdTy), + Rd,DAG)). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : add_read_deps +%% Argument : Uses - The registers that node N uses. +%% N - Index of the current node. +%% Ty - Type of current node. +%% DepTab - Dependence table +%% DAG - Dependence graph +%% Returns : {DepTab, DAG} - with updated values. +%% Description : Adds the read dependencies from node N to subsequent ones, +%% according to the registers that N uses. +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +add_read_deps([],_N,_Ty,DepTab,DAG) -> {DepTab,DAG}; +add_read_deps([U|Us],N,Ty,DepTab,DAG) -> + {NewDepTab,NewDAG} = add_read_dep(U,N,Ty,DepTab,DAG), + add_read_deps(Us,N,Ty,NewDepTab,NewDAG). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : add_read_dep +%% Argument : X - Used register +%% N - Index of checked instr +%% Ty - Type of checked instr +%% DepTab - Hashtable with {next-writer, next-readers} +%% DAG - Dependence graph +%% Returns : {DepTab, DAG} - with updated values +%% Description : Looks up what the next-writer/next-readers are, and adjusts +%% the table with current node as new reader. Finally +%% read-dependencies are added to the DAG. +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +add_read_dep(X,N,Ty,DepTab,DAG) -> + {NxtWriter,_NxtReaders} = lookup(X,DepTab), + NewDepTab = reader(X,N,Ty,DepTab), + NewDAG = read_deps(N,Ty,NxtWriter,DAG), + {NewDepTab, NewDAG}. + +% If NxtWriter is 'none', then this var is not written subsequently +% Add WAR from Instr to NxtWriter (if it exists) +% *** UNFINISHED *** +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : read_deps +%% Argument : N - Index of current node +%% Ty - Type of current node +%% Writer - tuple {NextWriter, WrType} where NextWriter is the +%% subsequent instr that writes this register next time, +%% and WrType is the type of that instr. +%% DAG - The dependence graph +%% Returns : DAG +%% Description : Returns a new DAG if a next-writer exists, otherwise the old +%% DAG is returned. +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +read_deps(_Instr,_Ty,none,DAG) -> + DAG; +read_deps(_Instr,_Ty,{_Instr,_},DAG) -> + DAG; +read_deps(Instr,Ty,{NxtWr,NxtWrTy},DAG) -> + dep_arc(Instr,hipe_target_machine:war_latency(Ty,NxtWrTy),NxtWr, + DAG). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : empty_deptab +%% Description : Creates an empty dependence table (hash-table) +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +empty_deptab() -> + gb_trees:empty(). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : lookup +%% Argument : X - key (register) +%% DepTab - dependence table +%% Returns : {NextWriter, NextReaders} +%% Description : Returns next writer and a list of following readers on +%% register X. +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +lookup(X, DepTab) -> + case gb_trees:lookup(X, DepTab) of + none -> + {none, []}; + {value, {W, Rs} = Val} -> + Val + end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : writer +%% Argument : X - key (register) +%% N - index of writer +%% Ty - type of writer +%% DepTab - dependence table to be updated +%% Returns : DepTab - new dependence table +%% Description : Sets N tobe next writer on X +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +writer(X, N, Ty, DepTab) -> + gb_trees:enter(X, {{N, Ty}, []}, DepTab). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : reader +%% Argument : X - key (register) +%% N - index of reader +%% Ty - type of reader +%% DepTab - dependence table to be updated +%% Returns : DepTab - new dependence table +%% Description : Adds N to the dependence table as a reader. +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +reader(X,N,Ty,DepTab) -> + {W,Rs} = lookup(X,DepTab), + gb_trees:enter(X,{W,[{N,Ty}|Rs]},DepTab). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% The following version of md/2 separates heap- and stack operations, +%% which allows for greater reordering. +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : md +%% Argument : IxBB - indexed block +%% DAG - dependence graph +%% Returns : DAG - new dependence graph +%% Description : Adds arcs for load/store dependencies to the DAG. +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +md(IxBB, DAG) -> + md(IxBB,empty_md_state(),DAG). + +md([],_,DAG) -> DAG; +md([{N,I}|Is],St,DAG) -> + case md_type(I) of + other -> + md(Is,St,DAG); + {st,T} -> + { WAW_nodes, WAR_nodes, NewSt } = st_overlap(N,T,St), + md(Is,NewSt, + md_war_deps(WAR_nodes,N,md_waw_deps(WAW_nodes,N,DAG))); + {ld,T} -> + { RAW_nodes, NewSt } = ld_overlap(N,T,St), + md(Is,NewSt, + md_raw_deps(RAW_nodes,N,DAG)) + end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : md_war_deps +%% Argument : WAR_nodes - write-after-read nodes depending on N +%% N - index of current instr +%% DAG - dependence graph +%% Returns : DAG - updated DAG +%% Description : Adds arcs for write-after-read dependencies for N +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +md_war_deps([],_,DAG) -> DAG; +md_war_deps([M|Ms],N,DAG) -> + md_war_deps(Ms,N,dep_arc(M,hipe_target_machine:m_war_latency(),N,DAG)). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : md_waw_deps +%% Argument : WAW_nodes - write-after-write nodes depending on N +%% N - index of current instr +%% DAG - dependence graph +%% Returns : DAG - updated DAG +%% Description : Adds arcs for write-after-write dependencies for N +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +md_waw_deps([],_,DAG) -> DAG; +md_waw_deps([M|Ms],N,DAG) -> + md_waw_deps(Ms,N,dep_arc(M,hipe_target_machine:m_waw_latency(),N,DAG)). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : md_raw_deps +%% Argument : RAW_nodes - read-after-write nodes depending on N +%% N - index of current instr +%% DAG - dependence graph +%% Returns : DAG - updated DAG +%% Description : Adds arcs for read-after-write dependencies for N +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +md_raw_deps([],_,DAG) -> DAG; +md_raw_deps([M|Ms],N,DAG) -> + md_raw_deps(Ms,N,dep_arc(M,hipe_target_machine:m_raw_latency(),N,DAG)). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : empty_md_state +%% Description : Returns an empty memorydependence state, eg. 4 lists +%% representing {StackStores, HeapStores, StackLoads, HeapLoads} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +empty_md_state() -> {[], [], [], []}. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : md_type +%% Argument : I - instr +%% Description : Maps the instr-type to a simplified type, telling if it's +%% store/load resp. heap or stack. +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +md_type(I) -> + case I of + #load{} -> + Sp = hipe_sparc_registers:stack_pointer(), + Src = hipe_sparc:load_src(I), + N = hipe_sparc:reg_nr(Src), + Off = hipe_sparc:load_off(I), + if + N =:= Sp -> % operation on stack + {ld,{sp,Off}}; + true -> + {ld,{hp,Src,Off}} + end; + #store{} -> + Sp = hipe_sparc_registers:stack_pointer(), + Dst = hipe_sparc:store_dest(I), + N = hipe_sparc:reg_nr(Dst), + Off = hipe_sparc:store_off(I), + if + N =:= Sp -> + {st,{sp,Off}}; + true -> + {st,{hp,Dst,Off}} + end; + _ -> + other + end. + +%% Given a memory operation and a 'memory op state', +%% overlap(N,MemOp,State) returns { Preceding_Dependent_Ops, NewState }. +%% which are either a tuple { WAW_deps, WAR_deps } or a list RAW_deps. +%% +%% NOTES: +%% Note that Erlang's semantics ("heap stores never overwrite existing data") +%% means we can be quite free in reordering stores to the heap. +%% Ld/St to the stack are simply handled by their offsets; since we do not +%% rename the stack pointer, this is sufficient. +%% *** We assume all memory ops have uniform size = 4 *** +%% +%% NOTES: +%% The method mentioned above has now been changed because the assumption that +%% "heap stores never overwrite existing data" caused a bug when the +%% process-pointer was treated the same way as the heap. We were also told +%% that the semantics can possibly change in the future, so it would be more +%% safe to treat the heap store/loads as the stack. +%% A future improvement can be to do an alias analysis to give more freedom +%% in reordering stuff... +%% +%% Alias state: +%% { [StackOp], [HeapOp], [StackOp], [HeapOp] } +%% where StackOp = {InstrID, Offset} +%% HeapOp = {InstrID, Reg, Offset} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : st_overlap +%% Argument : N - Index of current node +%% Type - {sp,Off} or {hp,Dst,Off}, store on stack or heap +%% State - { [StackStrs], [HeapStrs], [StackLds], [HeapLds] } +%% where StackStrs/StackLds = {InstrID, Offset} +%% and HeapStrs/HeapLds = {InstrID, Reg, Offset} +%% Returns : { DepStrs, DepLds, State } - +%% where DepStrs/DepLds = [NodeId] +%% and State is the new state +%% Description : Adds dependencies for overlapping stores. +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +st_overlap(N, {sp, Off}, {St_Sp, St_Hp, Ld_Sp, Ld_Hp}) -> + {DepSt, IndepSt_Sp} = st_sp_dep(St_Sp, Off), + {DepLd, IndepLd_Sp} = ld_sp_dep(Ld_Sp, Off), + {DepSt, DepLd, {[{N, Off}|IndepSt_Sp], St_Hp, IndepLd_Sp, Ld_Hp}}; +st_overlap(N, {hp, Dst, Off}, {St_Sp, St_Hp, Ld_Sp, Ld_Hp}) -> + DstOff = {Dst, Off}, + {DepSt,_IndepSt_Hp} = st_hp_dep(St_Hp, DstOff), + {DepLd, IndepLd_Hp} = ld_hp_dep(Ld_Hp, DstOff), + {DepSt, DepLd, {St_Sp, [{N, Dst, Off}|St_Hp], Ld_Sp, IndepLd_Hp}}. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : ld_overlap +%% Argument : N - Index of current node +%% Type - {sp,Off} or {hp,Dst,Off}, store on stack or heap +%% State - { [StackStrs], [HeapStrs], [StackLds], [HeapLds] } +%% where StackStrs/StackLds = {InstrID, Offset} +%% and HeapStrs/HeapLds = {InstrID, Reg, Offset} +%% Returns : { DepStrs, State } +%% Description : Adds dependencies for overlapping laods +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +ld_overlap(N, {sp, Off}, {St_Sp, St_Hp, Ld_Sp, Ld_Hp}) -> + DepSt = sp_dep_only(St_Sp, Off), + {DepSt, {St_Sp, St_Hp, [{N, Off}|Ld_Sp], Ld_Hp}}; +ld_overlap(N, {hp, Src, Off}, {St_Sp, St_Hp, Ld_Sp, Ld_Hp}) -> + DepSt = hp_dep_only(St_Hp, Src, Off), + {DepSt, {St_Sp, St_Hp, Ld_Sp, [{N, Src, Off}|Ld_Hp]}}. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : st_sp_dep +%% Description : Adds dependencies that are depending on a stack store +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +st_sp_dep(Stores, Off) -> + sp_dep(Stores, Off, [], []). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : ld_sp_dep +%% Description : Adds dependencies that are depending on a stack load +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +ld_sp_dep(Loads, Off) -> + sp_dep(Loads, Off, [], []). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : st_hp_dep +%% Description : Adds dependencies that are depending on a heap store +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +st_hp_dep(Stores, {_Reg, _Off} = RegOff) -> + hp_dep(Stores, RegOff, [], []). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : ld_hp_dep +%% Description : Adds dependencies that are depending on a heap load +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +ld_hp_dep(Loads, {_Reg, _Off} = RegOff) -> + hp_dep(Loads, RegOff, [], []). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : sp_dep +%% Description : Returns {Dependent, Independent} which are lists of nodes +%% that depends or not on a stack load/store +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +sp_dep([], _Off, Dep, Indep) -> {Dep, Indep}; +sp_dep([{N,Off}|Xs], Off, Dep, Indep) -> + sp_dep(Xs, Off, [N|Dep], Indep); +sp_dep([X|Xs], Off, Dep, Indep) -> + sp_dep(Xs, Off, Dep, [X|Indep]). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : hp_dep +%% Description : Returns {Dependent, Independent} which are lists of nodes +%% that depends or not on a heap load/store +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +hp_dep([], {_Reg,_Off}, Dep, Indep) -> {Dep,Indep}; +hp_dep([{N,Reg,Off1}|Xs], {Reg,Off}, Dep, Indep) when Off1 =/= Off -> + hp_dep(Xs, {Reg,Off}, Dep, [{N,Reg,Off1}|Indep]); +hp_dep([{N,_,_}|Xs], {Reg,Off}, Dep, Indep) -> + hp_dep(Xs, {Reg,Off}, [N|Dep], Indep). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : sp_dep_only +%% Description : Returns a list of nodes that are depending on a stack store +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +sp_dep_only(Stores, Off) -> + [N || {N,Off0} <- Stores, Off =:= Off0]. + +%% Dependences from heap stores to heap loads. +%% *** UNFINISHED *** +%% - but works +%% This is somewhat subtle: +%% - a heap load can only bypass a heap store if we KNOW it won't +%% load the stored value +%% - unfortunately, we do not know the relationships between registers +%% at this point, so we can't say that store(p+4) is independent of +%% load(q+0). +%% (OR CAN WE? A bit closer reasoning might show that it's possible?) +%% - We can ONLY say that st(p+c) and ld(p+c') are independent when c /= c' +%% +%% (As said before, it might be possible to lighten this restriction?) + +hp_dep_only([], _Reg, _Off) -> []; +hp_dep_only([{_N,Reg,Off_1}|Xs], Reg, Off) when Off_1 =/= Off -> + hp_dep_only(Xs, Reg, Off); +hp_dep_only([{N,_,_}|Xs], Reg, Off) -> + [N|hp_dep_only(Xs, Reg, Off)]. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Control dependences: +%% - add dependences so that +%% * branches are performed in order +%% * unsafe operations are 'fenced in' by surrounding branches +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : cd +%% Argument : IxBB - indexed block +%% DAG - dependence graph +%% Returns : DAG - new dependence graph +%% Description : Adds conditional dependencies to the DAG +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +cd(IxBB,DAG) -> + cd(IxBB, DAG, none, [], []). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : cd +%% Argument : IxBB - indexed block +%% DAG - dependence graph +%% PrevBr - previous branch +%% PrevUnsafe - previous unsafe instr (mem-op) +%% PrevOthers - previous other instrs, used to "fix" preceeding +%% instrs so they don't bypass a branch. +%% Returns : DAG - new dependence graph +%% Description : Adds conditional dependencies to the graph. +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +cd([], DAG, _PrevBr, _PrevUnsafe, _PrevOthers) -> + DAG; +cd([{N,I}|Xs], DAG, PrevBr, PrevUnsafe, PrevOthers) -> + case cd_type(I) of + {branch,Ty} -> + DAG1 = cd_branch_to_other_deps(N, PrevOthers, DAG), + NewDAG = cd_branch_deps(PrevBr, PrevUnsafe, N, Ty, DAG1), + cd(Xs,NewDAG,{N,Ty},[],[]); + {unsafe,Ty} -> + NewDAG = cd_unsafe_deps(PrevBr,N,Ty,DAG), + cd(Xs, NewDAG, PrevBr, [{N,Ty}|PrevUnsafe], PrevOthers); + {other,_Ty} -> + cd(Xs, DAG, PrevBr, PrevUnsafe, [N|PrevOthers]) + end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : cd_branch_to_other_deps +%% Argument : N - index of branch +%% Ms - list of indexes of "others" preceeding instrs +%% DAG - dependence graph +%% Returns : DAG - new graph +%% Description : Makes preceeding instrs fixed so they don't bypass a branch +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +cd_branch_to_other_deps(_, [], DAG) -> + DAG; +cd_branch_to_other_deps(N, [M | Ms], DAG) -> + cd_branch_to_other_deps(N, Ms, dep_arc(M, zero_latency(), N, DAG)). + +%% Is the operation a branch, an unspeculable op or something else? + +%% Returns +%% {branch,BranchType} +%% {unsafe,OpType} +%% {other,OpType} + +%% *** UNFINISHED *** +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : cd_type +%% Argument : I - instr +%% Description : Maps instrs to a simpler type. +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +cd_type(I) -> + case I of + #goto{} -> + {branch,uncond}; + #br{} -> + {branch,'cond'}; + #b{} -> + {branch,'cond'}; + #call_link{} -> + {branch,call}; + #jmp_link{} -> + {branch,call}; + #jmp{} -> + {branch,call}; + #load{} -> + {unsafe,load}; + #store{} -> + {unsafe,load}; + T -> + {other,T} + end. + +%% add dependences to keep order of branches + unspeculable ops: +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : cd_branch_deps +%% Argument : PrevBr - preceeding branch +%% PrevUnsafe - preceeding unsafe ops, eg, mem-ops +%% N - current id. +%% Ty - type of current instr +%% DAG - dependence graph +%% Returns : DAG - new DAG +%% Description : Adds arcs between branches and calls deps_to_unsafe that adds +%% arcs between branches and unsafe ops. +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +cd_branch_deps(PrevBr, PrevUnsafe, N, Ty, DAG) -> + DAG1 = case PrevBr of + none -> + DAG; + {Br,BrTy} -> + dep_arc(Br, + hipe_target_machine:br_br_latency(BrTy,Ty), + N, DAG) + end, + deps_to_unsafe(PrevUnsafe, N, Ty, DAG1). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : deps_to_unsafe +%% Description : Adds dependencies between unsafe's and branches +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +deps_to_unsafe([], _, _, DAG) -> DAG; +deps_to_unsafe([{M,UTy}|Us], N, Ty, DAG) -> + deps_to_unsafe(Us,N,Ty, + dep_arc(M, hipe_target_machine:unsafe_to_br_latency(UTy,Ty), + N, DAG)). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : cd_unsafe_deps +%% Description : Adds dependencies between branches and unsafe's +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +cd_unsafe_deps(none, _, _, DAG) -> + DAG; +cd_unsafe_deps({Br,BrTy}, N, Ty, DAG) -> + dep_arc(Br, hipe_target_machine:br_to_unsafe_latency(BrTy, Ty), N, DAG). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : def_use +%% Argument : Instr +%% Description : Returns the registers that Instr defines resp. uses as 2 lists +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +def_use(Instr) -> + {hipe_sparc:defines(Instr), hipe_sparc:uses(Instr)}. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : move_or_alu +%% Description : True if the instruction is a move or an alu; false otherwise +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +move_or_alu(#move{}) -> true; +move_or_alu(#alu{}) -> true; +move_or_alu(_) -> false. + +%% Debugging stuff below %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +-ifdef(debug1). +debug1_stuff(Blk, DAG, IxBlk, Sch, X) -> + io:format("Blk: ~p~n",[Blk]), + io:format("DAG: ~n~p~n~p",[DAG,IxBlk]), + io:format("~n"), + print_instrs(IxBlk), + print_sch(Sch, IxBlk), + print_instrs2(X). + +print_instrs([]) -> + io:format("~n"); +print_instrs([{N,Instr} | Instrs]) -> + io:format("(~p): ",[N]), + hipe_sparc_pp:pp_instr(Instr), + io:format("~p~n",[element(1,Instr)]), + print_instrs(Instrs). + +print_instrs2([]) -> + io:format("~n"); +print_instrs2([Instr | Instrs]) -> + hipe_sparc_pp:pp_instr(Instr), + print_instrs2(Instrs). + +print_sch([],_) -> io:format("~n"); +print_sch([{{cycle,Cycle},{node,I}} | Rest], IxBlk) -> + io:format("{C~p, N~p} ",[Cycle,I]), + print_node(I, IxBlk), + print_sch(Rest, IxBlk). + +print_node(_, []) -> + io:format("~n"); +print_node(I, [{I, Instr} | _]) -> + hipe_sparc_pp:pp_instr(Instr); +print_node(I, [_ | IxBlk]) -> + print_node(I, IxBlk). +-else. +debug1_stuff(_Blk, _DAG, _IxBlk, _Sch, _X) -> + ok. +-endif. diff --git a/lib/hipe/opt/hipe_schedule_prio.erl b/lib/hipe/opt/hipe_schedule_prio.erl new file mode 100644 index 0000000000..4d078b007d --- /dev/null +++ b/lib/hipe/opt/hipe_schedule_prio.erl @@ -0,0 +1,58 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2001-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% PRIORITY HANDLING AND PRIORITY CALCULATION +%% +%% Handling of ready nodes and priorities. +%% - at present, all nodes have the same priority and so on. +%% +%% *** UNFINISHED *** +%% - should compute a static priority estimate +%% - should dynamically modify priorities + possibly insert NOPs +%% (e.g., to separate branches, etc.) +%% - thus, ought to be passed the current schedule and/or resources as well + +-module(hipe_schedule_prio). +-export([init_ready/2, + init_instr_prio/2, + %% initial_ready_set/4, + next_ready/7, + add_ready_nodes/2, + insert_node/3 + ]). + +init_ready(Size,Preds) -> + hipe_ultra_prio:init_ready(Size,Preds). + +init_instr_prio(N,DAG) -> + hipe_ultra_prio:init_instr_prio(N,DAG). + +%% initial_ready_set(M,N,Preds,Ready) -> +%% hipe_ultra_prio:initial_ready_set(M,N,Preds,Ready). + +next_ready(C,Ready,Prio,Nodes,DAG,Preds,Earl) -> + hipe_ultra_prio:next_ready(C,Ready,Prio,Nodes,DAG,Preds,Earl). + +add_ready_nodes(NodeLst,Ready) -> + hipe_ultra_prio:add_ready_nodes(NodeLst,Ready). + +insert_node(C,I,Ready) -> + hipe_ultra_prio:insert_node(C,I,Ready). diff --git a/lib/hipe/opt/hipe_spillmin.erl b/lib/hipe/opt/hipe_spillmin.erl new file mode 100644 index 0000000000..df885a7dff --- /dev/null +++ b/lib/hipe/opt/hipe_spillmin.erl @@ -0,0 +1,111 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2005-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%% ========================================================================== +%% Module : hipe_spillmin +%% Purpose : Driver module for minimizing the number of stack slots used +%% by a function. This is done using an algorithm for register +%% allocation. The implementation is target-independent and +%% requires a target-specific interface module as argument. +%% +%% $Id$ +%% ========================================================================== +%% Exported functions (short description): +%% +%% stackalloc(CFG, StackSlots, SpillIndex, Options, Target, TempMap) -> +%% {Coloring, NumberOfSpills} +%% Takes a CFG and the TempMap from register allocation and returns +%% a coloring of stack slots. +%% StackSlots should be a list of used stack slots, usually empty at +%% first call to function. +%% SpillIndex is the the first position we will spill to, usually 0. +%% TempMap is the TempMap from the register allocation +%% +%% The Coloring will be in the form of the "allocation datastructure" +%% described below, that is, a list of tuples on the form +%% {Name, {spill, SpillIndex}} +%% The NumberOfSpills is either 0 indicating no spill or the +%% SpillIndex of the last spilled register. +%% +%% mapmerge(Map, SpillMap) -> NewMap +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +-module(hipe_spillmin). +-export([stackalloc/6, mapmerge/2]). + +%%-define(DEBUG, 1). +-define(HIPE_INSTRUMENT_COMPILER, true). + +%%--------------------------------------------------------------------------- + +-include("../main/hipe.hrl"). +-include("../flow/cfg.hrl"). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% stackalloc(CFG, StackSlots, SpillIndex, Options, Target, TempMap) +%% Calculates an allocation of stack slots using either a linear scan +%% or a graph coloring allocation algorithm. +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +-spec stackalloc(#cfg{}, [_], non_neg_integer(), + comp_options(), module(), hipe_temp_map()) -> + {hipe_spill_map(), non_neg_integer()}. + +stackalloc(CFG, StackSlots, SpillIndex, Options, Target, TempMap) -> + case proplists:get_bool(spillmin_color, Options) of + false -> + ?option_time(hipe_spillmin_scan:stackalloc(CFG, StackSlots, SpillIndex, + Options, Target, TempMap), + "Spill minimize, linear scan", Options); + true -> + ?option_time(hipe_spillmin_color:stackalloc(CFG, StackSlots, SpillIndex, + Options, Target, TempMap), + "Spill minimize, graph coloring", Options) + end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% mapmerge(Map, SpillMap) +%% +%% stackalloc/6 will only return the subset of the tempmap that contains +%% the spilled temporaries. This function is used to merge the old +%% complete tempmap with the new spill information. +%% Map is the old map (a list of [{R0, C1}, {R1, C2}, ...]). +%% SpillMap is the new "spill" map. +%% !! Warning, the function does not work with the maps in another order !! +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%% Combines the map with allocated spills with a map from the register +%% allocator + +-spec mapmerge(hipe_map(), hipe_spill_map()) -> hipe_map(). + +mapmerge(TempMap, SpillMap) -> + mapmerge(TempMap, SpillMap, []). + +mapmerge([], _, Ack) -> + lists:reverse(Ack); +mapmerge([{T1, _}|T1s], [{T2, C}|T2s], Ack) when T1 =:= T2 -> + mapmerge(T1s, T2s, [{T1, C}|Ack]); +mapmerge([{_, unknown}|T1s], T2s, Ack) -> + mapmerge(T1s, T2s, Ack); +mapmerge([T1|T1s], T2s, Ack) -> + mapmerge(T1s, T2s, [T1|Ack]). diff --git a/lib/hipe/opt/hipe_spillmin_color.erl b/lib/hipe/opt/hipe_spillmin_color.erl new file mode 100644 index 0000000000..11a281100b --- /dev/null +++ b/lib/hipe/opt/hipe_spillmin_color.erl @@ -0,0 +1,556 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2005-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%% =========================================================================== +%%@doc +%% GRAPH COLORING STACK SLOT SPILL MINIMIZER +%% +%% A simple pessimistic graph coloring stack slot spill minimizer +%% +%% - build interference graph +%% - estimate number of stack slots needed +%% - simplify graph (push on stack, abort and retry with more stack slots if spill) +%% - select colors +%% +%% Emits a coloring: a list of {TempName,Location} +%% where Location is {spill,M}. +%% {spill,M} denotes the Mth spilled node +%% +%% This version uses ETS tables +%% +%% Deficiencies: +%% - pessimistic coloring +%% + +-module(hipe_spillmin_color). + +-export([stackalloc/6]). + +%%-ifndef(DO_ASSERT). +%%-define(DO_ASSERT, true). +%%-endif. + +%%-ifndef(DEBUG). +%%-define(DEBUG,0). +%%-endif. + +%%--------------------------------------------------------------------------- + +-include("../main/hipe.hrl"). +-include("../flow/cfg.hrl"). + +%% Define these as 'ok' or 'report(X,Y)' depending on how much output you want. +-define(report0(X,Y), ?IF_DEBUG_LEVEL(0,?msg(X, Y),ok)). +-define(report(X,Y), ?IF_DEBUG_LEVEL(1,?msg(X, Y),ok)). +-define(report2(X,Y), ?IF_DEBUG_LEVEL(2,?msg(X, Y),ok)). +-define(report3(X,Y), ?IF_DEBUG_LEVEL(3,?msg(X, Y),ok)). + +%% Emits a coloring: a list of {TempName,Location} +%% where Location is {spill,M}. +%% {spill,M} denotes the Mth spilled node + +-spec stackalloc(#cfg{}, [_], non_neg_integer(), + comp_options(), module(), hipe_temp_map()) -> + {hipe_spill_map(), non_neg_integer()}. + +stackalloc(CFG, _StackSlots, SpillIndex, _Options, Target, TempMap) -> + ?report2("building IG~n", []), + {IG, NumNodes} = build_ig(CFG, Target, TempMap), + {Cols, MaxColors} = + color_heuristic(IG, 0, NumNodes, NumNodes, NumNodes, Target, 1), + SortedCols = lists:sort(Cols), + {remap_temp_map(SortedCols, TempMap, SpillIndex), SpillIndex+MaxColors}. + +%% Rounds a floating point value upwards +ceiling(X) -> + T = trunc(X), + case (X - T) of + Neg when Neg < 0.0 -> T; + Pos when Pos > 0.0 -> T + 1; + _ -> T + end. + +%% Emits a coloring: an unsorted list of {Temp,Location} +%% where Location is {spill,M}. +%% {spill,M} denotes the Mth spilled node +%% +%% Notes: +%% - Arguments: +%% IG: The interference graph +%% Min: The lower bound, the minimal number of colors tried. +%% Max: The upper bound, the maximal number of colors tried. +%% Safe: The number of colors that are guaranteed to work. This is +%% needed, because we reuse information from color() about how +%% many colors it used at the last try, but this is not guaranteed to +%% be a feasible solution because color might work differently using +%% more colors although it has successfully colored the graph with +%% fewer colors previously. Example: color(666) colors with 23 colors, +%% but color(23) fails. +%% We use Safe inefficently, because we run color 1 additional +%% time with the same argument if Safe is needed. +%% MaxNodes: The number of nodes in IG. +%% Target: Target specific information. +%% MaxDepth: The maximum recursion depth. +color_heuristic(IG, Min, Max, Safe, MaxNodes, Target, MaxDepth) -> + case MaxDepth of + 0 -> + case color(IG, ordsets:from_list(init_stackslots(Max)), + MaxNodes, Target) of + not_easily_colorable -> + color(IG, ordsets:from_list(init_stackslots(Safe)), + MaxNodes, Target); + Else -> + Else + end; + _ -> + %% This can be increased from 2, and by this the heuristic can be + %% exited earlier, but the same can be achived by decreasing the + %% recursion depth. This should not be decreased below 2. + case (Max - Min) < 2 of + true -> + case color(IG, ordsets:from_list(init_stackslots(Max)), + MaxNodes, Target) of + not_easily_colorable -> + color(IG, ordsets:from_list(init_stackslots(Safe)), + MaxNodes, Target); + Else -> + Else + end; + false -> + NumSlots = ceiling((Max - Min)/2) + Min, + case color(IG, ordsets:from_list(init_stackslots(NumSlots)), + MaxNodes, Target) of + not_easily_colorable -> + color_heuristic(IG, NumSlots, Max, + Safe, MaxNodes, Target, MaxDepth - 1); + {_TmpCols, TmpMaxColors} -> + color_heuristic(IG, Min, TmpMaxColors, + NumSlots, MaxNodes, Target, MaxDepth - 1) + end + end + end. + +%% Returns a new temp map with the spilled temporaries mapped to stack slots, +%% located after SpillIndex, according to Cols. +remap_temp_map(Cols, TempMap, SpillIndex) -> + remap_temp_map0(Cols, hipe_temp_map:to_substlist(TempMap), SpillIndex). + +remap_temp_map0([], _TempMap, _SpillIndex) -> + []; +remap_temp_map0([{_M, {spill, N}}|Xs], [{TempNr, {spill,_}}|Ys], SpillIndex) -> + [{TempNr, {spill, SpillIndex + N-1}}|remap_temp_map0(Xs, Ys, SpillIndex)]; +remap_temp_map0(Cols, [_Y|Ys], SpillIndex) -> + remap_temp_map0(Cols, Ys, SpillIndex). + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% *** BUILD THE INTERFERENCE GRAPH *** +%% +%% Returns {Interference_graph, Number_Of_Nodes} +%% + +build_ig(CFG, Target, TempMap) -> + try build_ig0(CFG, Target, TempMap) + catch error:Rsn -> exit({regalloc, build_ig, Rsn}) + end. + +%% Creates an ETS table consisting of the keys given in List, with the values +%% being an integer which is the position of the key in List. +%% [1,5,7] -> {1,0} {5,1} {7,2} +%% etc. +setup_ets(List) -> + setup_ets0(List, ets:new(tempMappingTable, []), 0). + +setup_ets0([], Table, _N) -> + Table; +setup_ets0([X|Xs], Table, N) -> + ets:insert(Table, {X, N}), + setup_ets0(Xs, Table, N+1). + +build_ig0(CFG, Target, TempMap) -> + Live = Target:analyze(CFG), + TempMapping = map_spilled_temporaries(TempMap), + TempMappingTable = setup_ets(TempMapping), + NumSpilled = length(TempMapping), + IG = build_ig_bbs(Target:labels(CFG), CFG, Live, empty_ig(NumSpilled), + Target, TempMap, TempMappingTable), + ets:delete(TempMappingTable), + {normalize_ig(IG), NumSpilled}. + +build_ig_bbs([], _CFG, _Live, IG, _Target, _TempMap, _TempMapping) -> + IG; +build_ig_bbs([L|Ls], CFG, Live, IG, Target, TempMap, TempMapping) -> + Xs = bb(CFG, L, Target), + LiveOut = [X || X <- liveout(Live, L, Target), + hipe_temp_map:is_spilled(X, TempMap)], + LiveOutList = ordsets:to_list(LiveOut), + LiveOutListMapped = list_map(LiveOutList, TempMapping, []), + LiveOutSetMapped = ordsets:from_list(LiveOutListMapped), + {_, NewIG} = + build_ig_bb(Xs, LiveOutSetMapped, IG, Target, TempMap, TempMapping), + build_ig_bbs(Ls, CFG, Live, NewIG, Target, TempMap, TempMapping). + +build_ig_bb([], LiveOut, IG, _Target, _TempMap, _TempMapping) -> + {LiveOut, IG}; +build_ig_bb([X|Xs], LiveOut, IG, Target, TempMap, TempMapping) -> + {Live,NewIG} = + build_ig_bb(Xs, LiveOut, IG, Target, TempMap, TempMapping), + build_ig_instr(X, Live, NewIG, Target, TempMap, TempMapping). + +build_ig_instr(X, Live, IG, Target, TempMap, TempMapping) -> + {Def, Use} = def_use(X, Target, TempMap), + ?report3("Live ~w\n~w : Def: ~w Use ~w\n",[Live, X, Def,Use]), + DefListMapped = list_map(Def, TempMapping, []), + UseListMapped = list_map(Use, TempMapping, []), + DefSetMapped = ordsets:from_list(DefListMapped), + UseSetMapped = ordsets:from_list(UseListMapped), + NewIG = interference_arcs(DefListMapped, ordsets:to_list(Live), IG), + NewLive = ordsets:union(UseSetMapped, ordsets:subtract(Live, DefSetMapped)), + {NewLive, NewIG}. + +%% Given a list of Keys and an ets-table returns a list of the elements +%% in Mapping corresponding to the Keys and appends Acc to this list. +list_map([], _Mapping, Acc) -> + Acc; +list_map([X|Xs], Mapping, Acc) -> + {_Key, Val} = hd(ets:lookup(Mapping, X)), + list_map(Xs, Mapping, [Val | Acc]). + +%% Returns an ordered list of spilled temporaries in TempMap +map_spilled_temporaries(TempMap) -> + map_spilled_temporaries0(hipe_temp_map:to_substlist(TempMap)). + +map_spilled_temporaries0([]) -> + []; +map_spilled_temporaries0([{N, {spill, _}}|Xs]) -> + [N | map_spilled_temporaries0(Xs)]; +map_spilled_temporaries0([_X|Xs]) -> + map_spilled_temporaries0(Xs). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +interference_arcs([], _Live, IG) -> + IG; +interference_arcs([X|Xs], Live, IG) -> + interference_arcs(Xs, Live, i_arcs(X, Live, IG)). + +i_arcs(_X, [], IG) -> + IG; +i_arcs(X, [Y|Ys], IG) -> + i_arcs(X, Ys, add_edge(X, Y, IG)). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% *** COLORING *** +%% +%% Coloring is done straightforwardly: +%% - find the low-degree nodes, put them in low +%% - while low non-empty: +%% * remove x from low +%% * push x on stack +%% * decrement degree of neighbors of x +%% * for each neighbor y of low degree, put y on low +%% - when low empty: +%% - if graph empty, return stack +%% - otherwise +%% throw an exception (the caller should retry with more stack slots) + +color(IG, StackSlots, NumNodes, Target) -> + try + color_0(IG, StackSlots, NumNodes, Target) + catch + error:Rsn -> + ?error_msg("Coloring failed with ~p~n", [Rsn]), + ?EXIT(Rsn) + end. + +color_0(IG, StackSlots, NumNodes, Target) -> + ?report("simplification of IG~n", []), + K = ordsets:size(StackSlots), + Nodes = list_ig(IG), + Low = low_degree_nodes(Nodes, K), + ?report(" starting with low degree nodes ~p~n", [Low]), + EmptyStk = [], + case simplify(Low, NumNodes, IG, K, EmptyStk, Target) of + non_simplifiable -> not_easily_colorable; + Stk -> + ?report(" selecting colors~n", []), + select(Stk, IG, StackSlots, NumNodes) + end. + +%%%%%%%%%%%%%%%%%%%% +%% +%% Simplification: push all easily colored nodes on a stack; +%% when the list of easy nodes becomes empty, see if graph is +%% empty as well. If it is not, throw an exception and abort. +%% If it is empty, return the stack. +%% +%% Notes: +%% - Arguments: +%% Low: low-degree nodes (ready to color) +%% NumNodes: number of remaining nodes in graph +%% IG: interference graph +%% K: number of colors +%% Stk: stack of already simplified nodes +%% Target: Machine to compile for + +simplify(Low, NumNodes, IG, K, Stk, Target) -> + Vis = none_visited(NumNodes), + simplify_ig(Low, NumNodes, IG, K, Stk, Vis, Target). + +simplify_ig([], 0, _IG, _K, Stk, _Vis, _Target) -> + Stk; +simplify_ig([], N, _IG, _K, _Stk, _Vis, _Target) when N > 0 -> + ?report3("N: ~w Stk: ~w N+Stk ~w\n", [N,length(Stk),N+length(Stk)]), + non_simplifiable; +simplify_ig([X|Xs], N, IG, K, Stk, Vis, Target) -> + ?report3("N: ~w Stk: ~w N+Stk ~w\n", [N,length(Stk),N+length(Stk)]), + case is_visited(X, Vis) of + true -> + ?report(" node ~p already visited~n", [X]), + simplify_ig(Xs, N, IG, K, Stk, Vis, Target); + false -> + ?report("Stack ~w\n", [Stk]), + {NewLow, NewIG} = decrement_neighbors(X, Xs, IG, Vis, K), + ?report(" node ~w pushed\n(~w now ready)~n", [X, NewLow]), + NewStk = push_colored(X, Stk), + simplify_ig(NewLow, N-1, NewIG, K, NewStk, visit(X, Vis), Target) + end. + +decrement_neighbors(X, Xs, IG, Vis, K) -> + Ns = unvisited_neighbors(X, Vis, IG), + ?report(" node ~p has neighbors ~w\n(unvisited ~p)~n", + [X, neighbors(X, IG), Ns]), + decrement_each(Ns, Xs, IG, Vis, K). + +%% For each node, decrement its degree and check if it is now +%% a low-degree node. In that case, add it to the 'low list'. +decrement_each([], Low, IG, _Vis, _K) -> + {Low, IG}; +decrement_each([N|Ns], OldLow, IG, Vis, K) -> + {Low, CurrIG} = Res = decrement_each(Ns, OldLow, IG, Vis, K), + case is_visited(N, Vis) of + true -> + Res; + false -> + {D, NewIG} = decrement_degree(N, CurrIG), + if + D =:= K-1 -> + {[N|Low], NewIG}; + true -> + {Low, NewIG} + end + end. + +%%%%%%%%%%%%%%%%%%%% +%% +%% Returns a list of {Name,Location}, where Location is {spill,M} +%% +%% Note: we use pessimistic coloring here. +%% - we could use optimistic coloring: for spilled node, check if there is +%% an unused color among the neighbors and choose that. + +select(Stk, IG, PhysRegs, NumNodes) -> + select_colors(Stk, IG, none_colored(NumNodes), PhysRegs). + +select_colors([], _IG, _Cols, _PhysRegs) -> + ?report("all nodes colored~n", []), + {[], 0}; +select_colors([{X,colorable}|Xs], IG, Cols, PhysRegs) -> + ?report("color of ~p\n", [X]), + {Slot,NewCols} = select_color(X, IG, Cols, PhysRegs), + ?report("~p~n", [Slot]), + {Tail, MaxColor} = select_colors(Xs, IG, NewCols, PhysRegs), + NewMaxColor = erlang:max(Slot, MaxColor), + %% Since we are dealing with spills we label all our temporaries accordingly. + {[{X,{spill,Slot}} | Tail], NewMaxColor}. + +select_color(X, IG, Cols, PhysRegs) -> + UsedColors = get_colors(neighbors(X, IG), Cols), + Reg = select_unused_color(UsedColors, PhysRegs), + {Reg, set_color(X, Reg, Cols)}. + +%%%%%%%%%%%%%%%%%%%% + +get_colors([], _Cols) -> []; +get_colors([X|Xs], Cols) -> + case color_of(X, Cols) of + uncolored -> + get_colors(Xs, Cols); + {color, R} -> + [R|get_colors(Xs, Cols)] + end. + +select_unused_color(UsedColors, PhysRegs) -> + Summary = ordsets:from_list(UsedColors), + AvailRegs = ordsets:to_list(ordsets:subtract(PhysRegs, Summary)), + hd(AvailRegs). + +push_colored(X, Stk) -> + [{X, colorable} | Stk]. + +low_degree_nodes([], _K) -> []; +low_degree_nodes([{N,Info}|Xs], K) -> + ?report0("node ~p has degree ~p: ~w~n", [N, degree(Info), neighbors(Info)]), + Deg = degree(Info), + if + Deg < K -> + [N|low_degree_nodes(Xs, K)]; + true -> + low_degree_nodes(Xs, K) + end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +unvisited_neighbors(X, Vis, IG) -> + ordsets:from_list(unvisited(neighbors(X, IG), Vis)). + +unvisited([], _Vis) -> []; +unvisited([X|Xs], Vis) -> + case is_visited(X, Vis) of + true -> + unvisited(Xs, Vis); + false -> + [X|unvisited(Xs, Vis)] + end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% *** ABSTRACT DATATYPES *** +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%% +%% The stack slot datatype +%% + +init_stackslots(NumSlots) -> + init_stackslots(NumSlots, []). + +init_stackslots(0, Acc) -> + Acc; +init_stackslots(NumSlots, Acc) -> + init_stackslots(NumSlots - 1, [NumSlots|Acc]). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% The ig datatype: +%% +%% Note: if we know the number of temps used, we can use a VECTOR +%% instead, which will speed up things. +%% +%% Note: later on, we may wish to add 'move-related' support. + +-record(ig_info, {neighbors = [] :: [_], degree = 0 :: non_neg_integer()}). + +empty_ig(NumNodes) -> + hipe_vectors:new(NumNodes, #ig_info{}). + +degree(Info) -> + Info#ig_info.degree. + +neighbors(Info) -> + Info#ig_info.neighbors. + +add_edge(X, X, IG) -> IG; +add_edge(X, Y, IG) -> + add_arc(X, Y, add_arc(Y, X, IG)). + +add_arc(X, Y, IG) -> + Info = hipe_vectors:get(IG, X), + Old = neighbors(Info), + New = Info#ig_info{neighbors = [Y|Old]}, + hipe_vectors:set(IG,X,New). + +normalize_ig(IG) -> + Size = hipe_vectors:size(IG), + normalize_ig(Size-1, IG). + +normalize_ig(-1, IG) -> + IG; +normalize_ig(I, IG) -> + Info = hipe_vectors:get(IG, I), + N = ordsets:from_list(neighbors(Info)), + NewInfo = Info#ig_info{neighbors = N, degree = length(N)}, + NewIG = hipe_vectors:set(IG, I, NewInfo), + normalize_ig(I-1, NewIG). + +neighbors(X, IG) -> + Info = hipe_vectors:get(IG, X), + Info#ig_info.neighbors. + +decrement_degree(X, IG) -> + Info = hipe_vectors:get(IG, X), + Degree = degree(Info), + NewDegree = Degree-1, + NewInfo = Info#ig_info{degree = NewDegree}, + {NewDegree, hipe_vectors:set(IG, X, NewInfo)}. + +list_ig(IG) -> + hipe_vectors:list(IG). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% The coloring datatype: + +none_colored(NumNodes) -> + hipe_vectors:new(NumNodes, uncolored). + +color_of(X, Cols) -> + hipe_vectors:get(Cols, X). + +set_color(X, R, Cols) -> + hipe_vectors:set(Cols, X, {color, R}). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% Note: there might be a slight gain in separating the two versions +%% of visit/2 and visited/2. (So that {var,X} selects X and calls +%% the integer version. + +none_visited(NumNodes) -> + hipe_vectors:new(NumNodes, false). + +visit(X, Vis) -> + hipe_vectors:set(Vis, X, true). + +is_visited(X, Vis) -> + hipe_vectors:get(Vis, X). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% *** INTERFACES TO OTHER MODULES *** +%% + +liveout(CFG, L, Target) -> + ordsets:from_list(reg_names(Target:liveout(CFG, L), Target)). + +bb(CFG, L, Target) -> + hipe_bb:code(Target:bb(CFG, L)). + +def_use(X, Target, TempMap) -> + Defines = [Y || Y <- reg_names(Target:defines(X), Target), + hipe_temp_map:is_spilled(Y, TempMap)], + Uses = [Z || Z <- reg_names(Target:uses(X), Target), + hipe_temp_map:is_spilled(Z, TempMap)], + {Defines, Uses}. + +reg_names(Regs, Target) -> + [Target:reg_nr(X) || X <- Regs]. diff --git a/lib/hipe/opt/hipe_spillmin_scan.erl b/lib/hipe/opt/hipe_spillmin_scan.erl new file mode 100644 index 0000000000..c58906c389 --- /dev/null +++ b/lib/hipe/opt/hipe_spillmin_scan.erl @@ -0,0 +1,559 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2005-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%% =========================================================================== +%% Copyright (c) 2002 by Niklas Andersson, Andreas Lundin, and Erik Johansson. +%% =========================================================================== +%% Module : hipe_spillmin_scan +%% Purpose : Optimizes the number of stack slots used by using a +%% "linear-scan algorithm" to allocate stack slots. +%% Notes : * This is a simplified implementation of +%% "Linear Scan Register Allocation" by +%% Massimiliano Poletto & Vivek Sarkar described in +%% ACM TOPLAS Vol 21, No 5, September 1999. +%% +%% * This implementation is target-independent and +%% requires a target specific interface module +%% as argument. +%% +%% * Based on the hipe_ls_regalloc module by Erik Johansson +%% +%% History : * 2002-04-01, NA & AL: Created +%% * 2002-10-08, Happi: Cleanup and speedup +%% ============================================================================ +%% Exported functions (short description): +%% stackalloc(CFG, StackSlots, SpillIndex, Options, Target, TempMap) -> +%% {Coloring, NumberOfSpills} +%% Takes a CFG and the TempMap from register allocation and returns +%% a coloring of stack slots. +%% StackSlots should be a list of used stack slots, usually empty at +%% first call to function. +%% SpillIndex is the the first position we will spill to, usually 0. +%% TempMap is the TempMap from the register allocation +%% +%% The Coloring will be in the form of the "allocation datastructure" +%% described below, that is, a list of tuples on the form +%% {Name, {spill, SpillIndex}} +%% The NumberOfSpills is either 0 indicating no spill or the +%% SpillIndex of the last spilled register. +%% +%% mapmerge +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +-module(hipe_spillmin_scan). + +-export([stackalloc/6]). + +%%-define(DEBUG, 1). +-define(HIPE_INSTRUMENT_COMPILER, true). + +%%---------------------------------------------------------------------------- + +-include("../main/hipe.hrl"). +-include("../flow/cfg.hrl"). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% stackalloc(CFG, StackSlots, SpillIndex, Options, Target, TempMap) +%% Calculates an allocation of stack slots using a linear_scan algorithm. +%% There are three steps in the algorithm: +%% 1. Calculate live-ranges for all spilled temporaries. +%% 2. Calculate live-intervals for each temporary. +%% The live interval consists of a start position and a end position +%% these are the first definition and last use of the temporary +%% given as instruction numbers in a breadth-first traversal of the +%% control-flow-graph. +%% 3. Do a linear scan allocation over the live intervals. +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +-spec stackalloc(#cfg{}, [_], non_neg_integer(), + comp_options(), module(), hipe_temp_map()) -> + {hipe_spill_map(), non_neg_integer()}. + +stackalloc(CFG, StackSlots, SpillIndex, Options, Target, TempMap) -> + ?debug_msg("LinearScan: ~w\n", [erlang:statistics(runtime)]), + %% Step 1: Calculate liveness (Call external implementation.) + Liveness = liveness(CFG, Target), + ?debug_msg("liveness (done)~w\n", [erlang:statistics(runtime)]), + USIntervals = calculate_intervals(CFG, Liveness, Options, + Target, TempMap), + %% ?debug_msg("intervals (done) ~w\n", [erlang:statistics(runtime)]), + Intervals = sort_on_start(USIntervals), + ?debug_msg("sort intervals (done) ~w\n", [erlang:statistics(runtime)]), + ?debug_msg("Intervals ~w\n", [Intervals]), + ?debug_msg("No intervals: ~w\n", [length(Intervals)]), + ?debug_msg("count intervals (done) ~w\n", [erlang:statistics(runtime)]), + Allocation = allocate(Intervals, StackSlots, SpillIndex, Target), + ?debug_msg("allocation (done) ~w\n", [erlang:statistics(runtime)]), + Allocation. + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% %% +%% Step 2: Calculate live-intervals for each temporary. %% +%% %% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%- - - - - - - - - - - - - - - - - - - - - - - - +%% calculate_intervals(CFG, Liveness, Options, Target, TempMap) +%% CFG: The Control-Flow Graph. +%% Liveness: A map of live-in and live-out sets for each Basic-Block. +%% TempMap: The TempMap from the register allocation +%% +%% This function will only consider the intervals of the temporaries +%% that have been spilled during register allocation, and will ignore +%% all other. +%%- - - - - - - - - - - - - - - - - - - - - - - - +calculate_intervals(CFG, Liveness, _Options, Target, TempMap) -> + Interval = empty_interval(Target:number_of_temporaries(CFG)), + Worklist = Target:reverse_postorder(CFG), + intervals(Worklist, Interval, 1, CFG, Liveness, Target, TempMap). + +%%- - - - - - - - - - - - - - - - - - - - - - - - +%% intervals(WorkList, Intervals, InstructionNr, +%% CFG, Liveness, Target, TempMap) +%% WorkList: List of BB-names to handle. +%% Intervals: Intervals seen so far (sorted on register names). +%% InstructionNr: The number of examined instructions. +%% CFG: The Control-Flow Graph. +%% Liveness: A map of live-in and live-out sets for each Basic-Block. +%% Target: The backend for which we generate native code. +%% TempMap: The TempMap from the register allocation +%% +%% This function will only consider the intervals of the temporaries +%% that have been spilled during register allocation, and will ignore +%% all other. +%%- - - - - - - - - - - - - - - - - - - - - - - - +intervals([L|ToDO], Intervals, InstructionNr, CFG, Liveness, Target, + TempMap) -> + ?debug_msg("Block ~w\n", [L]), + %% Add all variables that are live at the entry of this block + %% to the interval data structure. + + %% Only consider spilled temporaries in LiveIn + LiveIn = [X || X <- livein(Liveness, L, Target), + hipe_temp_map:is_spilled(X, TempMap)], + Intervals2 = add_def_point(LiveIn, InstructionNr, Intervals), + + %% Only consider spilled temporaries in LiveOut + LiveOut = [X2 || X2 <- liveout(Liveness, L, Target), + hipe_temp_map:is_spilled(X2, TempMap)], + ?debug_msg("In ~w -> Out ~w\n", [LiveIn, LiveOut]), + + %% Traverse this block instruction by instruction and add all + %% uses and defines to the intervals. + Code = hipe_bb:code(bb(CFG, L, Target)), + {Intervals3, NewINr} = traverse_block(Code, InstructionNr+1, + Intervals2, Target, TempMap), + + %% Add end points for the temporaries that are in the live-out set. + Intervals4 = add_use_point(LiveOut, NewINr+1, Intervals3), + + intervals(ToDO, Intervals4, NewINr+1, CFG, Liveness, Target, TempMap); +intervals([], Intervals, _, _, _, _, _) -> + %% Return the calculated intervals + interval_to_list(Intervals). + %% Intervals. + +%%- - - - - - - - - - - - - - - - - - - - - - - - +%% traverse_block(Code, InstructionNo, Intervals, Unchanged) +%% Examine each instruction in the Code: +%% For each temporary T used or defined by instruction number N: +%% extend the interval of T to include N. +%% TempMap: The TempMap from the register allocation +%% +%% This function will only consider the the instruction that have temporaries +%% that have been spilled during register allocation, and will ignore +%% all other. +%%- - - - - - - - - - - - - - - - - - - - - - - - + +traverse_block([Instruction|Is], InstrNo, Intervals, Target, TempMap) -> + %% Get used temps. + %% Only consider spilled temporaries in the Use set. + UsesSet = [X || X <- uses(Instruction, Target), + hipe_temp_map:is_spilled(X, TempMap)], + %% Get defined temps. + %% Only consider spilled temporaries in the Def set. + DefsSet = [X2 || X2 <- defines(Instruction, Target), + hipe_temp_map:is_spilled(X2, TempMap)], + %% Only consider those temps that starts or ends their lifetime + %% within the basic block (that is remove all Unchanged temps). + Intervals1 = add_def_point( DefsSet, InstrNo, Intervals), + %% Extend the intervals for these temporaries to include InstrNo. + Intervals2 = add_use_point(UsesSet, InstrNo, Intervals1), + %% Handle the next instruction. + traverse_block(Is, InstrNo+1, Intervals2, Target, TempMap); +traverse_block([], InstrNo, Intervals, _, _) -> + %% Return the new intervals and the number of the next instruction. + {Intervals,InstrNo}. + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% %% +%% Step 3. Do a linear scan allocation over the live intervals. %% +%% %% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% allocate(Intervals, PhysicalRegisters, Target) +%% +%% This function performs the linear scan algorithm. +%% Intervals contains the start and stop position of each spilled temporary, +%% sorted on increasing startpositions +%% StackSlots is a list of available Stack slots to use. If they run out a +%% new stack slot is allocated from an (in theory) infinite domain. +%% +%%- - - - - - - - - - - - - - - - - - - - - - - - +allocate(Intervals, StackSlots, SpillIndex, Target) -> + AllocatedSlots = empty_allocation(), + allocate(Intervals, StackSlots, [], AllocatedSlots, SpillIndex, Target). + +%%- - - - - - - - - - - - - - - - - - - - - - - - +%% allocate(Intervals, Free, Active, Allocated, SpillIndex, Target) +%% Iterates on each temporary interval. +%% Intervals: The list of temporary intervals. +%% Free: Currently available stack slots. +%% Active: Currently used stack slots (sorted on increasing +%% interval enpoints) +%% Allocated: The mapping of register names to spill positions. +%% SpillIndex: The number of spilled registers. +%%- - - - - - - - - - - - - - - - - - - - - - - - +allocate([TempInt|TIS], Free, Active, Alloc, SpillIndex, Target) -> + %% Remove from the active list those temporaries whose interval + %% ends before the start of the current interval. + {NewActive, NewFree} = + expire_old_intervals(Active, startpoint(TempInt), Free, Target), + %% Get the name of the temp in the current interval. + Temp = reg(TempInt), + case NewFree of + [] -> + %% There are no free spill slots, so we allocate a new one + NewSpillIndex = SpillIndex+1, + NewAlloc = spillalloc(Temp, SpillIndex, Alloc), + NewActive2 = add_active(endpoint(TempInt), SpillIndex, NewActive), + allocate(TIS, NewFree, NewActive2, NewAlloc, NewSpillIndex, Target); + [FreeSpillslot | Spillslots] -> + %% The spill slot FreeSpillSlot is available, let's use it. + allocate(TIS, Spillslots, + add_active(endpoint(TempInt), FreeSpillslot, NewActive), + spillalloc(Temp, FreeSpillslot, Alloc), + SpillIndex, Target) + end; +allocate([], _, _, Alloc, SpillIndex, _) -> + %% No more register intervals to handle; + %% return the result sorted on regnames. + {lists:sort(Alloc), SpillIndex}. + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% expire_old_intervals(ActiveTemps, CurrentPos, FreeRegisters) +%% Remove all temporaries that have live-ranges that ends before the +%% current position from the active list and put them into the free +%% list instead. +%% +%% --------------------------------------------------------------------- +expire_old_intervals([Act|Acts] = AllActives, CurrentPos, Free, Target) -> + %% Does the live-range of the first active register end before + %% the current position? + + %% We expand multimove before regalloc, ignore the next 2 lines. + %% %% We don't free registers that end at the current position, + %% %% since a multimove can decide to do the moves in another order... + case active_endpoint(Act) =< CurrentPos of + true -> %% Yes -> Then we can free that register. + Spillslot = active_spillslot(Act), + %% Add the spillslot to the free pool. + NewFree = [Spillslot|Free], + %% Here we could try appending the register to get a more + %% widespread use of registers. + %% Free ++ [active_spillslot(Act)]); + expire_old_intervals(Acts, CurrentPos, NewFree, Target); + false -> + %% No -> Then we cannot free any more temporaries. + %% (Since they are sorted on endpoints...) + {AllActives, Free} + end; +expire_old_intervals([], _, Free, _) -> + {[], Free}. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% %% +%% D A T A S T R U C T U R E S %% +%% & %% +%% A U X I L I A R Y F U N C T I O N S %% +%% %% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% The "allocation datastructure" +%% +%% This is an order list of register names paired with their allocations. +%% {Name, Allocation} +%% Since we are only dealing with spills, the allocation will look like: +%% {spill, SpillIndex} +%% +%% --------------------------------------------------------------------- + +empty_allocation() -> []. + +spillalloc(Name, N, Allocation) -> [{Name,{spill,N}}|Allocation]. + +%% spillalloc(Name,N,[{Name,_}|A]) -> +%% ?debug_msg("Spilled ~w\n",[Name]), +%% [{Name,{spill,N}}|A]; +%% spillalloc(Name,N,[{Name2,Binding}|Bindings]) when Name > Name2 -> +%% [{Name2,Binding}|spillalloc(Name,N,Bindings)]; +%% spillalloc(Name,N,Bindings) -> +%% [{Name,{spill,N}}|Bindings]. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% The active datastructure. +%% Keeps tracks of currently active (allocated) spill slots. +%% It is sorted on end points in the intervals +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +add_active(Endpoint, SpillSlot, [A1={P1,_}|Active]) when P1 < Endpoint -> + [A1|add_active(Endpoint, SpillSlot, Active)]; +add_active(Endpoint, SpillSlot, Active) -> + [{Endpoint, SpillSlot}|Active]. + +active_spillslot({_,SpillSlot}) -> + SpillSlot. + +active_endpoint({EndPoint,_}) -> + EndPoint. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% The Interval data structure. +%% +%%- - - - - - - - - - - - - - - - - - - - - - - - + +%% mk_interval(Name, Start, End) -> +%% {Name, Start, End}. + +endpoint({_R,_S,Endpoint}) -> + Endpoint. + +startpoint({_R,Startpoint,_E}) -> + Startpoint. + +reg({RegName,_S,_E}) -> + RegName. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% The Intervals data structure. + +sort_on_start(I) -> + lists:keysort(2, I). + +-ifdef(gb_intervals). +empty_interval(_) -> + gb_trees:empty(). + +interval_to_list(Intervals) -> + lists:flatten( + lists:map( + fun({T, I}) when is_list(I) -> + lists:map( + fun ({none, End}) -> + {T,End,End}; + ({Beg, none}) -> + {T,Beg, Beg} + end, + I); + ({T,{B,E}}) -> {T, B, E} + end, + gb_trees:to_list(Intervals))). + +add_use_point([Temp|Temps], Pos, Intervals) -> + %% Extend the old interval... + NewInterval = + case gb_trees:lookup(Temp, Intervals) of + %% This temp has an old interval... + {value, Value} -> + %% ... extend it. + extend_interval(Pos, Value); + %% This is the first time we see this temp... + none -> + %% ... create a new interval + {Pos, Pos} + end, + %% Add or update the extended interval. + Intervals2 = gb_trees:enter(Temp, NewInterval, Intervals), + %% Add the rest of the temporaries. + add_use_point(Temps, Pos, Intervals2); +add_use_point([], _, I) -> + %% No more to add return the interval. + I. + +add_def_point([Temp|Temps], Pos, Intervals) -> + %% Extend the old interval... + NewInterval = + case gb_trees:lookup(Temp, Intervals) of + %% This temp has an old interval... + {value, Value} -> + %% ... extend it. + extend_interval(Pos, Value); + %% This is the first time we see this temp... + none -> + %% ... create a new interval + {Pos, Pos} + end, + %% Add or update the extended interval. + Intervals2 = gb_trees:enter(Temp, NewInterval, Intervals), + %% Add the rest of the temporaries. + add_def_point(Temps, Pos, Intervals2); +add_def_point([], _, I) -> + %% No more to add return the interval. + I. + +extend_interval(Pos, {Beginning, End}) -> + %% If this position occurs before the beginning of the interval, + %% then extend the beginning to this position. + NewBeginning = erlang:min(Pos, Beginning), + %% If this position occurs after the end of the interval, then + %% extend the end to this position. + NewEnd = erlang:max(Pos, End), + {NewBeginning, NewEnd}. + +extend_def_interval(Pos, {Beginning, End}) -> + %% If this position occurs before the beginning of the interval, + %% then extend the beginning to this position. + NewBeginning = erlang:min(Pos, Beginning), + %% If this position occurs after the end of the interval, then + %% extend the end to this position. + NewEnd = erlang:max(Pos, End), + {NewBeginning, NewEnd}; +extend_def_interval(Pos, [{Beginning, none}|More]) -> + [{Pos,none}, {Beginning, none}|More]; +extend_def_interval(Pos, Intervals) -> + {Pos, Pos}. + +-else. %% ifdef gb_intervals + +empty_interval(N) -> + hipe_vectors:new(N, none). + +interval_to_list(Intervals) -> + add_indices(hipe_vectors:vector_to_list(Intervals), 0). + +add_indices([{B, E}|Xs], N) -> + [{N, B, E}|add_indices(Xs, N+1)]; +add_indices([List|Xs], N) when is_list(List) -> + flatten(List, N, Xs); +add_indices([none|Xs], N) -> + add_indices(Xs, N+1); +add_indices([], _N) -> []. + +flatten([{none, End}|Rest], N, More) -> + [{N,End,End} | flatten(Rest, N, More)]; +flatten([{Beg, none}|Rest], N ,More) -> + [{N,Beg,Beg} | flatten(Rest, N, More)]; +flatten([], N, More) -> + add_indices(More, N+1). + +add_use_point([Temp|Temps], Pos, Intervals) -> + %% Extend the old interval... + NewInterval = + case hipe_vectors:get(Intervals, Temp) of + %% This is the first time we see this temp... + none -> + %% ... create a new interval + {Pos, Pos}; + %% This temp has an old interval... + Value -> + %% ... extend it. + extend_interval(Pos, Value) + end, + %% Add or update the extended interval. + Intervals2 = hipe_vectors:set(Intervals, Temp, NewInterval), + %% Add the rest of the temporaries. + add_use_point(Temps, Pos, Intervals2); +add_use_point([], _, I) -> + %% No more to add return the interval. + I. + +add_def_point([Temp|Temps], Pos, Intervals) -> + %% Extend the old interval... + NewInterval = + case hipe_vectors:get(Intervals, Temp) of + %% This is the first time we see this temp... + none -> + %% ... create a new interval + {Pos, Pos}; + %% This temp has an old interval... + Value -> + %% ... extend it. + extend_interval(Pos, Value) + end, + %% Add or update the extended interval. + Intervals2 = hipe_vectors:set(Intervals, Temp, NewInterval), + %% Add the rest of the temporaries. + add_def_point(Temps, Pos, Intervals2); +add_def_point([], _, I) -> + %% No more to add return the interval. + I. + +extend_interval(Pos, {Beginning, End}) + when is_integer(Beginning), is_integer(End) -> + %% If this position occurs before the beginning of the interval, + %% then extend the beginning to this position. + NewBeginning = erlang:min(Pos, Beginning), + %% If this position occurs after the end of the interval, then + %% extend the end to this position. + NewEnd = erlang:max(Pos, End), + {NewBeginning, NewEnd}. + +-endif. %% gb_intervals + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% Interface to external functions. +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +liveness(CFG, Target) -> + Target:analyze(CFG). + +bb(CFG, L, Target) -> + Target:bb(CFG, L). + +livein(Liveness, L, Target) -> + regnames(Target:livein(Liveness, L), Target). + +liveout(Liveness, L, Target) -> + regnames(Target:liveout(Liveness, L), Target). + +uses(I, Target) -> + regnames(Target:uses(I), Target). + +defines(I, Target) -> + regnames(Target:defines(I), Target). + +regnames(Regs, Target) -> + [Target:reg_nr(X) || X <- Regs]. diff --git a/lib/hipe/opt/hipe_target_machine.erl b/lib/hipe/opt/hipe_target_machine.erl new file mode 100644 index 0000000000..be9f095429 --- /dev/null +++ b/lib/hipe/opt/hipe_target_machine.erl @@ -0,0 +1,93 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2001-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% INTERFACE TO TARGET MACHINE MODEL +%% +%% Interfaces the instruction scheduler to the (resource) machine model. + +-module(hipe_target_machine). +-export([init_resources/1, + init_instr_resources/2, + resources_available/4, + advance_cycle/1 + ]). +-export([raw_latency/2, + war_latency/2, + waw_latency/2, + %% m_raw_latency/2, + %% m_war_latency/2, + %% m_waw_latency/2, + m_raw_latency/0, + m_war_latency/0, + m_waw_latency/0, + br_to_unsafe_latency/2, + unsafe_to_br_latency/2, + br_br_latency/2 + ]). + +-define(target,hipe_ultra_mod2). + +init_resources(X) -> + ?target:init_resources(X). + +init_instr_resources(X,Y) -> + ?target:init_instr_resources(X,Y). + +resources_available(X,Y,Z,W) -> + ?target:resources_available(X,Y,Z,W). + +advance_cycle(X) -> + ?target:advance_cycle(X). + +raw_latency(From,To) -> + ?target:raw_latency(From,To). + +war_latency(From,To) -> + ?target:war_latency(From,To). + +waw_latency(From,To) -> + ?target:waw_latency(From,To). + +%% m_raw_latency(From,To) -> +%% ?target:m_raw_latency(From,To). + +%% m_war_latency(From,To) -> +%% ?target:m_war_latency(From,To). + +%% m_waw_latency(From,To) -> +%% ?target:m_waw_latency(From,To). + +m_raw_latency() -> + ?target:m_raw_latency(). + +m_war_latency() -> + ?target:m_war_latency(). + +m_waw_latency() -> + ?target:m_waw_latency(). + +br_to_unsafe_latency(Br,U) -> + ?target:br_to_unsafe_latency(Br,U). + +unsafe_to_br_latency(U,Br) -> + ?target:unsafe_to_br_latency(U,Br). + +br_br_latency(Br1,Br2) -> + ?target:br_br_latency(Br1,Br2). diff --git a/lib/hipe/opt/hipe_ultra_mod2.erl b/lib/hipe/opt/hipe_ultra_mod2.erl new file mode 100644 index 0000000000..b039eaee80 --- /dev/null +++ b/lib/hipe/opt/hipe_ultra_mod2.erl @@ -0,0 +1,239 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2001-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% ULTRASPARC MACHINE MODEL +%% +%% This module is used by the scheduler. +%% The following interface is used: +%% ... +%% +%% NOTES: +%% - the machine model is simple (on the verge of simplistic) +%% * all FUs are pipelined => model only one cycle at a time +%% * instruction latencies are mostly 1 +%% * floating point is left for later (I _think_ it works, but ...) +%% - conservative: instructions that require multiple resources are +%% modelled as 'single'; instead, they could reserve IEU+BR or whatever +%% - possibly inefficient: I think machine state model could be turned into +%% a bitvector. + +-module(hipe_ultra_mod2). +-export([init_resources/1, + init_instr_resources/2, + resources_available/4, + advance_cycle/1 + ]). +-export([raw_latency/2, + war_latency/2, + waw_latency/2, + %% m_raw_latency/2, + %% m_war_latency/2, + %% m_waw_latency/2, + m_raw_latency/0, + m_war_latency/0, + m_waw_latency/0, + br_to_unsafe_latency/2, + unsafe_to_br_latency/2, + br_br_latency/2 + ]). + +-include("../sparc/hipe_sparc.hrl"). + +-define(debug(Str,Args),ok). +%-define(debug(Str,Args),io:format(Str,Args)). + +-define(debug_ultra(Str,Args),ok). +%-define(debug_ultra(Str,Args),io:format(Str,Args)). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% Straightforward and somewhat simplistic model for UltraSparc: +%% - only one cycle at a time is modelled +%% - resources are simplified: +%% * ieu0, ieu1, ieu, mem, br, single +%% * per-cycle state = done | { I0, I1, NumI, X, Mem, Br } +%% * unoptimized representation (could be bit vector) + +init_resources(_Size) -> + ?debug_ultra('init res ~p~n',[_Size]), + empty_state(). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +init_instr_resources(N,Nodes) -> + ultra_instr_rsrcs(Nodes,hipe_vectors:new(N, '')). + +ultra_instr_rsrcs([],I_res) -> I_res; +ultra_instr_rsrcs([N|Ns],I_res) -> + ultra_instr_rsrcs(Ns,ultra_instr_type(N,I_res)). + +ultra_instr_type({N,I},I_res) -> + hipe_vectors:set(I_res,N-1,instr_type(I)). + +instr_type(I) -> + case I of + #move{} -> + ieu; + #multimove{} -> %% TODO: expand multimoves before scheduling + ieu; + #alu{} -> + case hipe_sparc:alu_operator(I) of + '>>' -> ieu0; + '<<' -> ieu0; + _ -> ieu + end; + #alu_cc{} -> + ieu1; + #sethi{} -> + ieu; + #load{} -> + mem; + #store{} -> + mem; + #b{} -> + br; + #br{} -> + br; + #goto{} -> + br; + #jmp_link{} -> % imprecise; should be mem+br? + single; + #jmp{} -> % imprecise + br; + #call_link{} -> % imprecise; should be mem+br? + single; + #cmov_cc{} -> % imprecise + single; + #cmov_r{} -> % imprecise + single; + #load_atom{} -> % should be resolved to sethi/or + single; + #load_address{} -> % should be resolved to sethi/or + single; + #load_word_index{} -> % should be resolved to sethi/or + single; + %% uncommon types: + #label{} -> + none; + #nop{} -> + none; + #comment{} -> + none; + _ -> + exit({ultrasparc_instr_type,{cant_schedule,I}}) + end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +resources_available(_Cycle, I, Rsrc, I_res) -> + res_avail(instruction_resource(I_res, I), Rsrc). + +instruction_resource(I_res, I) -> + hipe_vectors:get(I_res, I-1). + +%% The following function checks resource availability. +%% * all function units are assumed to be fully pipelined, so only +%% one cycle at a time is modelled. +%% * for IEU0 and IEU1, these must precede all generic IEU instructions +%% (handled by X bit) +%% * at most 2 integer instructions can issue in a cycle +%% * mem is straightforward +%% * br closes the cycle (= returns done). +%% * single requires an entirely empty state and closes the cycle + +res_avail(ieu0, { free, I1, NumI, free, Mem, Br }) + when is_integer(NumI), NumI < 2 -> + { yes, { occ, I1, NumI+1, free, Mem, Br }}; +res_avail(ieu1, { _I0, free, NumI, free, Mem, Br }) + when is_integer(NumI), NumI < 2 -> + { yes, { free, occ, NumI+1, free, Mem, Br }}; +res_avail(ieu, { I0, I1, NumI, _X, Mem, Br }) + when is_integer(NumI), NumI < 2 -> + { yes, { I0, I1, NumI+1, occ, Mem, Br }}; +res_avail(mem, { I0, I1, NumI, X, free, Br }) -> + { yes, { I0, I1, NumI, X, occ, Br }}; +res_avail(br, { _I0, _I1, _NumI, _X, _Mem, free }) -> + { yes, done }; +res_avail(single, { free, free, 0, free, free, free }) -> + { yes, done }; +res_avail(_, _) -> + no. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +advance_cycle(_Rsrc) -> + empty_state(). + +empty_state() -> { free, free, 0, free, free, free }. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Latencies are taken from UltraSparc hardware manual +%% +%% *** UNFINISHED *** +%% more precisely, they are taken from my memory of the US-manual +%% at the moment. +%% +%% Note: all ld/st are assumed to hit in the L1 cache (D-cache), +%% which is sort of imprecise. + +raw_latency(alu, store) -> 0; +raw_latency(load, _) -> 2; % only if load is L1 hit +raw_latency(alu_cc, b) -> 0; +raw_latency(_I0, _I1) -> + 1. + +war_latency(_I0, _I1) -> + 0. + +waw_latency(_I0, _I1) -> + 1. + +%% *** UNFINISHED *** +%% At present, all load/stores are assumed to hit in the L1 cache, +%% which isn't really satisfying. + +%% m_raw_latency(_St, _Ld) -> +%% 1. +%% +%% m_war_latency(_Ld, _St) -> +%% 1. +%% +%% m_waw_latency(_St1, _St2) -> +%% 1. + +%% Use these for 'default latencies' = do not permit reordering. + +m_raw_latency() -> + 1. + +m_war_latency() -> + 1. + +m_waw_latency() -> + 1. + +br_to_unsafe_latency(_BrTy, _UTy) -> + 0. + +unsafe_to_br_latency(_UTy, _BrTy) -> + 0. + +br_br_latency(_BrTy1, _BrTy2) -> + 0. diff --git a/lib/hipe/opt/hipe_ultra_prio.erl b/lib/hipe/opt/hipe_ultra_prio.erl new file mode 100644 index 0000000000..9e2c1a0489 --- /dev/null +++ b/lib/hipe/opt/hipe_ultra_prio.erl @@ -0,0 +1,304 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2001-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% PRIORITY HANDLING AND PRIORITY CALCULATION +%% +%% Handling of ready nodes and priorities. +%% Priorities are mainly from the critical path. More priorities are added. +%% * One version is adding priorities just depending on the instr, so +%% for example loads get higher priority than stores, and ordered +%% after reg's and offset for better cache performance. +%% * The other version gives higher priority to a node that adds more new +%% nodes to the ready list. This one is maybe not so effectively +%% implemented, but was added too late for smarter solutions. +%% One version is commented away + +-module(hipe_ultra_prio). +-export([init_ready/2, + init_instr_prio/2, + %% initial_ready_set/4, + next_ready/7, + add_ready_nodes/2, + insert_node/3 + ]). + +-include("../sparc/hipe_sparc.hrl"). + +% At first, only nodes with no predecessors are selected. +% - if R is empty, there is an error (unless BB itself is empty) + +%% Arguments : Size - size of ready-array +%% Preds - array with number of predecessors for each node +%% Returns : An array with list of ready-nodes for each cycle. + +init_ready(Size, Preds) -> + P = hipe_vectors:size(Preds), + Ready = hipe_vectors:new(Size, []), + R = initial_ready_set(1, P, Preds, []), + hipe_vectors:set(Ready, 0, R). + +init_instr_prio(N, DAG) -> + critical_path(N, DAG). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : initial_ready_set +%% Argument : M - current node-index +%% N - where to stop +%% Preds - array with number of predecessors for each node +%% Ready - list with ready-nodes +%% Returns : Ready - list with ready-nodes +%% Description : Finds all nodes with no predecessors and adds them to ready. +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +initial_ready_set(M, N, Preds, Ready) -> + if + M > N -> + Ready; + true -> + case hipe_vectors:get(Preds, M-1) of + 0 -> + initial_ready_set(M+1, N, Preds, [M|Ready]); + V when is_integer(V), V > 0 -> + initial_ready_set(M+1, N, Preds, Ready) + end + end. + +%% The following handles the nodes ready to schedule: +%% 1. select the ready queue of given cycle +%% 2. if queue empty, return none +%% 3. otherwise, remove entry with highest priority +%% and return {next,Highest_Prio,NewReady} + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : next_ready +%% Argument : C - current cycle +%% Ready - array with ready nodes +%% Prio - array with cpath-priorities for all nodes +%% Nodes - indexed list [{N, Instr}] +%% Returns : none / {next,Highest_Prio,NewReady} +%% Description : 1. select the ready queue of given cycle +%% 2. if queue empty, return none +%% 3. otherwise, remove entry with highest priority +%% and return {next,Highest_Prio,NewReady} where Highest_Prio +%% = Id of instr and NewReady = updated ready-array. +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +next_ready(C, Ready, Prio, Nodes, DAG, Preds, Earl) -> + Curr = hipe_vectors:get(Ready, C-1), + case Curr of + [] -> + none; + Instrs -> + {BestI,RestIs} = + get_best_instr(Instrs, Prio, Nodes, DAG, Preds, Earl, C), + {next,BestI,hipe_vectors:set(Ready,C-1,RestIs)} + end. + +% next_ready(C,Ready,Prio,Nodes) -> +% Curr = hipe_vectors:get(Ready,C-1), +% case Curr of +% [] -> +% none; +% Instrs -> +% {BestInstr,RestInstrs} = get_best_instr(Instrs, Prio, Nodes), +% {next,BestInstr,hipe_vectors:set(Ready,C-1,RestInstrs)} +% end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : get_best_instr +%% Argument : Instrs - list of node-id's +%% Prio - array with cpath-priorities for the nodes +%% Nodes - indexed list [{Id, Instr}] +%% Returns : {BestSoFar, Rest} - Id of best instr and the rest of id's +%% Description : Returns the id of the instr that is the best choice. +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +get_best_instr([Instr|Instrs], Prio, Nodes, DAG, Preds, Earl, C) -> + get_best_instr(Instrs, [], Instr, Prio, Nodes, DAG, Preds, Earl, C). + +get_best_instr([], Rest, BestSoFar, _Prio, _Nodes, _DAG, _Preds, _Earl, _C) -> + {BestSoFar, Rest}; +get_best_instr([Instr|Instrs], PassedInstrs, BestSoFar, Prio, Nodes, + DAG, Preds, Earl, C) -> + case better(Instr, BestSoFar, Prio, Nodes, DAG, Preds, Earl, C) of + true -> + get_best_instr(Instrs, [BestSoFar|PassedInstrs], + Instr, Prio, Nodes, DAG, Preds, Earl, C); + false -> + get_best_instr(Instrs, [Instr|PassedInstrs], BestSoFar, Prio, + Nodes, DAG, Preds, Earl, C) + end. + +% get_best_instr([Instr|Instrs], Prio, Nodes) -> +% get_best_instr(Instrs, [], Instr, Prio, Nodes). + +% get_best_instr([], Rest, BestSoFar, Prio, Nodes) -> {BestSoFar, Rest}; +% get_best_instr([Instr|Instrs], PassedInstrs, BestSoFar, Prio, Nodes) -> +% case better(Instr, BestSoFar, Prio, Nodes) of +% true -> +% get_best_instr(Instrs, [BestSoFar|PassedInstrs], +% Instr, Prio, Nodes); +% false -> +% get_best_instr(Instrs, [Instr|PassedInstrs],BestSoFar, Prio, Nodes) +% end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : better +%% Argument : Instr1 - Id of instr 1 +%% Instr2 - Id of instr 2 +%% Prio - array with cpath-priorities for the nodes +%% Nodes - indexed list [{Id, Instr}] +%% Returns : true if Instr1 has higher priority than Instr2 +%% Description : Checks if Instr1 is a better choice than Instr2 for scheduling +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +better(Instr1, Instr2, Prio, Nodes, DAG, Preds, Earl, C) -> + better_hlp(priority(Instr1, Prio, Nodes, DAG, Preds, Earl, C), + priority(Instr2, Prio, Nodes, DAG, Preds, Earl, C)). + +better_hlp([], []) -> false; +better_hlp([], [_|_]) -> false; +better_hlp([_|_], []) -> true; +better_hlp([X|Xs], [Y|Ys]) -> (X > Y) or ((X =:= Y) and better_hlp(Xs,Ys)). + +%% +%% Returns the instr corresponding to id +%% +get_instr(InstrId, [{InstrId,Instr}|_]) -> Instr; +get_instr(InstrId, [_|Xs]) -> get_instr(InstrId, Xs). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : priority +%% Argument : InstrId - Id +%% Prio - array with cpath-priorities for the nodes +%% Nodes - indexed list [{Id, Instr}] +%% Returns : PrioList - list of priorities [MostSignificant, LessSign, ...] +%% Description : Returns a list of priorities where the first element is the +%% cpath-priority and the rest are added depending on what kind +%% of instr it is. Used to order loads/stores sequentially and +%% there is possibility to add whatever stuff... +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +priority(InstrId, Prio, Nodes, DAG, Preds, Earl, C) -> + {ReadyNodes,_,_,_} = hipe_schedule:delete_node(C,InstrId,DAG,Preds,Earl), + Instr = get_instr(InstrId, Nodes), + Prio1 = hipe_vectors:get(Prio, InstrId-1), + Prio2 = length(ReadyNodes), + PrioRest = + case Instr of + #load_atom{} -> + [3]; + #move{} -> + [3]; + #load{} -> + Src = hipe_sparc:load_src(Instr), + Off = hipe_sparc:load_off(Instr), + case hipe_sparc:is_reg(Off) of + false -> [3, + -(hipe_sparc:reg_nr(Src)), + -(hipe_sparc:imm_value(Off))]; + true -> [1] + end; + #store{} -> + Src = hipe_sparc:store_dest(Instr), + Off = hipe_sparc:store_off(Instr), + case hipe_sparc:is_reg(Off) of + false -> [2, + -(hipe_sparc:reg_nr(Src)), + -(hipe_sparc:imm_value(Off))]; + true -> [1] + end; + _ -> [0] + end, + [Prio1,Prio2|PrioRest]. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : add_ready_nodes +%% Argument : Nodes - list of [{Cycle,Id}] +%% Ready - array of ready nodes for all cycles +%% Returns : NewReady - updated ready-array +%% Description : Gets a list of instrs and adds them to the ready-array +%% to the corresponding cycle. +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +add_ready_nodes([], Ready) -> Ready; +add_ready_nodes([{C,I}|Xs], Ready) -> + add_ready_nodes(Xs, insert_node(C, I, Ready)). + +insert_node(C, I, Ready) -> + Old = hipe_vectors:get(Ready, C-1), + hipe_vectors:set(Ready, C-1, [I|Old]). + +%% +%% Computes the latency for the "most expensive" way through the graph +%% for all nodes. Returns an array of priorities for all nodes. +%% +critical_path(N, DAG) -> + critical_path(1, N, DAG, hipe_vectors:new(N, -1)). + +critical_path(M, N, DAG, Prio) -> + if + M > N -> + Prio; + true -> + critical_path(M+1, N, DAG, cpath(M, DAG, Prio)) + end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : cpath +%% Argument : M - current node id +%% DAG - the dependence graph +%% Prio - array of priorities for all nodes +%% Returns : Prio - updated prio array +%% Description : If node has prio -1, it has not been visited +%% - otherwise, compute priority as max of priorities of +%% successors (+ latency) +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +cpath(M, DAG, Prio) -> + InitPrio = hipe_vectors:get(Prio, M-1), + if + InitPrio =:= -1 -> + cpath_node(M, DAG, Prio); + true -> + Prio + end. + +cpath_node(N, DAG, Prio) -> + SuccL = dag_succ(DAG, N), + {Max, NewPrio} = cpath_succ(SuccL, DAG, Prio), + hipe_vectors:set(NewPrio, N-1, Max). + +cpath_succ(SuccL, DAG, Prio) -> + cpath_succ(SuccL, DAG, Prio, 0). + +%% performs an unnecessary lookup of priority of Succ, but that might +%% not be such a big deal + +cpath_succ([], _DAG, Prio, NodePrio) -> {NodePrio,Prio}; +cpath_succ([{Lat,Succ}|Xs], DAG, Prio, NodePrio) -> + NewPrio = cpath(Succ, DAG, Prio), + NewNodePrio = erlang:max(hipe_vectors:get(NewPrio, Succ - 1) + Lat, NodePrio), + cpath_succ(Xs, DAG, NewPrio, NewNodePrio). + +dag_succ(DAG, N) when is_integer(N) -> + hipe_vectors:get(DAG, N-1). + diff --git a/lib/hipe/ppc/Makefile b/lib/hipe/ppc/Makefile new file mode 100644 index 0000000000..0857043527 --- /dev/null +++ b/lib/hipe/ppc/Makefile @@ -0,0 +1,120 @@ +# +# %CopyrightBegin% +# +# Copyright Ericsson AB 2004-2009. All Rights Reserved. +# +# The contents of this file are subject to the Erlang Public License, +# Version 1.1, (the "License"); you may not use this file except in +# compliance with the License. You should have received a copy of the +# Erlang Public License along with this software. If not, it can be +# retrieved online at http://www.erlang.org/. +# +# Software distributed under the License is distributed on an "AS IS" +# basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +# the License for the specific language governing rights and limitations +# under the License. +# +# %CopyrightEnd% +# + +ifndef EBIN +EBIN = ../ebin +endif + +ifndef DOCS +DOCS = ../doc +endif + +include $(ERL_TOP)/make/target.mk +include $(ERL_TOP)/make/$(TARGET)/otp.mk + +# ---------------------------------------------------- +# Application version +# ---------------------------------------------------- +include ../vsn.mk +VSN=$(HIPE_VSN) + +# ---------------------------------------------------- +# Release directory specification +# ---------------------------------------------------- +RELSYSDIR = $(RELEASE_PATH)/lib/hipe-$(VSN) + +# ---------------------------------------------------- +# Target Specs +# ---------------------------------------------------- +# Please keep this list sorted. +MODULES=hipe_ppc \ + hipe_ppc_assemble \ + hipe_ppc_cfg \ + hipe_ppc_defuse \ + hipe_ppc_encode \ + hipe_ppc_finalise \ + hipe_ppc_frame \ + hipe_ppc_liveness_all \ + hipe_ppc_liveness_fpr \ + hipe_ppc_liveness_gpr \ + hipe_ppc_main \ + hipe_ppc_pp \ + hipe_ppc_ra \ + hipe_ppc_ra_finalise \ + hipe_ppc_ra_ls \ + hipe_ppc_ra_naive \ + hipe_ppc_ra_postconditions \ + hipe_ppc_ra_postconditions_fp \ + hipe_ppc_registers \ + hipe_rtl_to_ppc + +HRL_FILES=hipe_ppc.hrl +ERL_FILES=$(MODULES:%=%.erl) +TARGET_FILES=$(MODULES:%=$(EBIN)/%.$(EMULATOR)) +DOC_FILES= $(MODULES:%=$(DOCS)/%.html) + +# ---------------------------------------------------- +# FLAGS +# ---------------------------------------------------- + +include ../native.mk + +ERL_COMPILE_FLAGS += +warn_exported_vars + +# ---------------------------------------------------- +# Targets +# ---------------------------------------------------- + +debug opt: $(TARGET_FILES) + +docs: $(DOC_FILES) + +clean: + rm -f $(TARGET_FILES) + rm -f core + +$(DOCS)/%.html:%.erl + erl -noshell -run edoc_run file '"$<"' '[{dir, "$(DOCS)"}]' -s init stop + +# ---------------------------------------------------- +# Special Build Targets +# ---------------------------------------------------- + +# ---------------------------------------------------- +# Release Target +# ---------------------------------------------------- +include $(ERL_TOP)/make/otp_release_targets.mk + +release_spec: opt + $(INSTALL_DIR) $(RELSYSDIR)/ebin + $(INSTALL_DATA) $(TARGET_FILES) $(RELSYSDIR)/ebin + +release_docs_spec: + +# Please keep this list sorted. +$(EBIN)/hipe_ppc_assemble.beam: ../main/hipe.hrl ../../kernel/src/hipe_ext_format.hrl ../rtl/hipe_literals.hrl ../misc/hipe_sdi.hrl +$(EBIN)/hipe_ppc_cfg.beam: ../flow/cfg.hrl ../flow/cfg.inc +$(EBIN)/hipe_ppc_frame.beam: ../rtl/hipe_literals.hrl +$(EBIN)/hipe_ppc_liveness_all.beam: ../flow/liveness.inc +$(EBIN)/hipe_ppc_liveness_fpr.beam: ../flow/liveness.inc +$(EBIN)/hipe_ppc_liveness_gpr.beam: ../flow/liveness.inc +$(EBIN)/hipe_ppc_registers.beam: ../rtl/hipe_literals.hrl +$(EBIN)/hipe_rtl_to_ppc.beam: ../rtl/hipe_rtl.hrl + +$(TARGET_FILES): hipe_ppc.hrl ../misc/hipe_consttab.hrl diff --git a/lib/hipe/ppc/hipe_ppc.erl b/lib/hipe/ppc/hipe_ppc.erl new file mode 100644 index 0000000000..047e86c45b --- /dev/null +++ b/lib/hipe/ppc/hipe_ppc.erl @@ -0,0 +1,415 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2004-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + + +-module(hipe_ppc). +-export([ + mk_temp/2, + mk_new_temp/1, + mk_new_nonallocatable_temp/1, + is_temp/1, + temp_reg/1, + temp_type/1, + temp_is_allocatable/1, + temp_is_precoloured/1, + + mk_simm16/1, + mk_uimm16/1, + + mk_mfa/3, + + mk_prim/1, + is_prim/1, + prim_prim/1, + + mk_sdesc/4, + + mk_alu/4, + + mk_b_fun/2, + + mk_b_label/1, + + mk_bc/3, + + mk_bctr/1, + + mk_bctrl/1, + + mk_bl/3, + + mk_blr/0, + + mk_cmp/3, + + mk_comment/1, + + mk_label/1, + is_label/1, + label_label/1, + + mk_li/2, + mk_li/3, + mk_addi/4, + + mk_load/4, + mk_loadx/4, + mk_load/6, + ldop_to_ldxop/1, + + mk_mfspr/2, + + mk_mtcr/1, + + mk_mtspr/2, + + mk_pseudo_bc/4, + negate_bcond/1, + + mk_pseudo_call/4, + pseudo_call_contlab/1, + pseudo_call_func/1, + pseudo_call_sdesc/1, + pseudo_call_linkage/1, + + mk_pseudo_call_prepare/1, + pseudo_call_prepare_nrstkargs/1, + + mk_pseudo_li/2, + + mk_pseudo_move/2, + is_pseudo_move/1, + pseudo_move_dst/1, + pseudo_move_src/1, + + mk_pseudo_tailcall/4, + pseudo_tailcall_func/1, + pseudo_tailcall_stkargs/1, + pseudo_tailcall_linkage/1, + + mk_pseudo_tailcall_prepare/0, + + mk_store/4, + mk_storex/4, + mk_store/6, + stop_to_stxop/1, + + mk_unary/3, + + mk_lfd/3, + mk_lfdx/3, + mk_fload/4, + + %% mk_stfd/3, + mk_stfdx/3, + mk_fstore/4, + + mk_fp_binary/4, + + mk_fp_unary/3, + + mk_pseudo_fmove/2, + is_pseudo_fmove/1, + pseudo_fmove_dst/1, + pseudo_fmove_src/1, + + mk_defun/8, + defun_mfa/1, + defun_formals/1, + defun_is_closure/1, + defun_is_leaf/1, + defun_code/1, + defun_data/1, + defun_var_range/1]). + +-include("hipe_ppc.hrl"). + +mk_temp(Reg, Type, Allocatable) -> + #ppc_temp{reg=Reg, type=Type, allocatable=Allocatable}. +mk_temp(Reg, Type) -> mk_temp(Reg, Type, true). +mk_new_temp(Type, Allocatable) -> + mk_temp(hipe_gensym:get_next_var(ppc), Type, Allocatable). +mk_new_temp(Type) -> mk_new_temp(Type, true). +mk_new_nonallocatable_temp(Type) -> mk_new_temp(Type, false). +is_temp(X) -> case X of #ppc_temp{} -> true; _ -> false end. +temp_reg(#ppc_temp{reg=Reg}) -> Reg. +temp_type(#ppc_temp{type=Type}) -> Type. +temp_is_allocatable(#ppc_temp{allocatable=A}) -> A. +temp_is_precoloured(#ppc_temp{reg=Reg,type=Type}) -> + case Type of + 'double' -> hipe_ppc_registers:is_precoloured_fpr(Reg); + _ -> hipe_ppc_registers:is_precoloured_gpr(Reg) + end. + +mk_simm16(Value) -> #ppc_simm16{value=Value}. +mk_uimm16(Value) -> #ppc_uimm16{value=Value}. + +mk_mfa(M, F, A) -> #ppc_mfa{m=M, f=F, a=A}. + +mk_prim(Prim) -> #ppc_prim{prim=Prim}. +is_prim(X) -> case X of #ppc_prim{} -> true; _ -> false end. +prim_prim(#ppc_prim{prim=Prim}) -> Prim. + +mk_sdesc(ExnLab, FSize, Arity, Live) -> + #ppc_sdesc{exnlab=ExnLab, fsize=FSize, arity=Arity, live=Live}. + +mk_alu(AluOp, Dst, Src1, Src2) -> + #alu{aluop=AluOp, dst=Dst, src1=Src1, src2=Src2}. + +mk_b_fun(Fun, Linkage) -> #b_fun{'fun'=Fun, linkage=Linkage}. + +mk_b_label(Label) -> #b_label{label=Label}. + +mk_bc(BCond, Label, Pred) -> #bc{bcond=BCond, label=Label, pred=Pred}. + +mk_bctr(Labels) -> #bctr{labels=Labels}. + +mk_bctrl(SDesc) -> #bctrl{sdesc=SDesc}. + +mk_bl(Fun, SDesc, Linkage) -> #bl{'fun'=Fun, sdesc=SDesc, linkage=Linkage}. + +mk_blr() -> #blr{}. + +mk_cmp(CmpOp, Src1, Src2) -> #cmp{cmpop=CmpOp, src1=Src1, src2=Src2}. + +mk_comment(Term) -> #comment{term=Term}. + +mk_label(Label) -> #label{label=Label}. +is_label(I) -> case I of #label{} -> true; _ -> false end. +label_label(#label{label=Label}) -> Label. + +%%% Load an integer constant into a register. +mk_li(Dst, Value) -> mk_li(Dst, Value, []). + +mk_li(Dst, Value, Tail) -> + R0 = mk_temp(0, 'untagged'), + mk_addi(Dst, R0, Value, Tail). + +mk_addi(Dst, R0, Value, Tail) -> + Low = at_l(Value), + High = at_ha(Value), + case High of + 0 -> + [mk_alu('addi', Dst, R0, mk_simm16(Low)) | + Tail]; + _ -> + case Low of + 0 -> + [mk_alu('addis', Dst, R0, mk_simm16(High)) | + Tail]; + _ -> + [mk_alu('addi', Dst, R0, mk_simm16(Low)), + mk_alu('addis', Dst, Dst, mk_simm16(High)) | + Tail] + end + end. + +at_l(Value) -> + simm16sext(Value band 16#FFFF). + +at_ha(Value) -> + simm16sext(((Value + 16#8000) bsr 16) band 16#FFFF). + +simm16sext(Value) -> + if Value >= 32768 -> (-1 bsl 16) bor Value; + true -> Value + end. + +mk_li_new(Dst, Value, Tail) -> % Dst may be R0 + R0 = mk_temp(0, 'untagged'), + case at_ha(Value) of + 0 -> + %% Value[31:16] are the sign-extension of Value[15]. + %% Use a single addi to load and sign-extend 16 bits. + [mk_alu('addi', Dst, R0, mk_simm16(at_l(Value))) | + Tail]; + _ -> + %% Use addis to load the high 16 bits, followed by an + %% optional ori to load non sign-extended low 16 bits. + High = simm16sext((Value bsr 16) band 16#FFFF), + [mk_alu('addis', Dst, R0, mk_simm16(High)) | + case (Value band 16#FFFF) of + 0 -> Tail; + Low -> + [mk_alu('ori', Dst, Dst, mk_uimm16(Low)) | + Tail] + end] + end. + +mk_load(LDop, Dst, Disp, Base) -> + #load{ldop=LDop, dst=Dst, disp=Disp, base=Base}. + +mk_loadx(LdxOp, Dst, Base1, Base2) -> + #loadx{ldxop=LdxOp, dst=Dst, base1=Base1, base2=Base2}. + +mk_load(LdOp, Dst, Offset, Base, Scratch, Rest) when is_integer(Offset) -> + if Offset >= -32768, Offset =< 32767 -> + [mk_load(LdOp, Dst, Offset, Base) | Rest]; + true -> + LdxOp = ldop_to_ldxop(LdOp), + Index = + begin + DstReg = temp_reg(Dst), + BaseReg = temp_reg(Base), + if DstReg =/= BaseReg -> Dst; + true -> mk_scratch(Scratch) + end + end, + mk_li_new(Index, Offset, + [mk_loadx(LdxOp, Dst, Base, Index) | Rest]) + end. + +ldop_to_ldxop(LdOp) -> + case LdOp of + 'lbz' -> 'lbzx'; + 'lha' -> 'lhax'; + 'lhz' -> 'lhzx'; + 'lwz' -> 'lwzx' + end. + +mk_scratch(Scratch) -> + case Scratch of + 0 -> mk_temp(0, 'untagged'); + 'new' -> mk_new_temp('untagged') + end. + +mk_mfspr(Dst, Spr) -> #mfspr{dst=Dst, spr=Spr}. + +mk_mtcr(Src) -> #mtcr{src=Src}. + +mk_mtspr(Spr, Src) -> #mtspr{spr=Spr, src=Src}. + +mk_pseudo_bc(BCond, TrueLab, FalseLab, Pred) -> + if Pred >= 0.5 -> + mk_pseudo_bc_simple(negate_bcond(BCond), FalseLab, + TrueLab, 1.0-Pred); + true -> + mk_pseudo_bc_simple(BCond, TrueLab, FalseLab, Pred) + end. + +mk_pseudo_bc_simple(BCond, TrueLab, FalseLab, Pred) when Pred =< 0.5 -> + #pseudo_bc{bcond=BCond, true_label=TrueLab, + false_label=FalseLab, pred=Pred}. + +negate_bcond(BCond) -> + case BCond of + 'lt' -> 'ge'; + 'ge' -> 'lt'; + 'gt' -> 'le'; + 'le' -> 'gt'; + 'eq' -> 'ne'; + 'ne' -> 'eq'; + 'so' -> 'ns'; + 'ns' -> 'so' + end. + +mk_pseudo_call(FunC, SDesc, ContLab, Linkage) -> + #pseudo_call{func=FunC, sdesc=SDesc, contlab=ContLab, linkage=Linkage}. +pseudo_call_func(#pseudo_call{func=FunC}) -> FunC. +pseudo_call_sdesc(#pseudo_call{sdesc=SDesc}) -> SDesc. +pseudo_call_contlab(#pseudo_call{contlab=ContLab}) -> ContLab. +pseudo_call_linkage(#pseudo_call{linkage=Linkage}) -> Linkage. + +mk_pseudo_call_prepare(NrStkArgs) -> + #pseudo_call_prepare{nrstkargs=NrStkArgs}. +pseudo_call_prepare_nrstkargs(#pseudo_call_prepare{nrstkargs=NrStkArgs}) -> + NrStkArgs. + +mk_pseudo_li(Dst, Imm) -> #pseudo_li{dst=Dst, imm=Imm}. + +mk_pseudo_move(Dst, Src) -> #pseudo_move{dst=Dst, src=Src}. +is_pseudo_move(I) -> case I of #pseudo_move{} -> true; _ -> false end. +pseudo_move_dst(#pseudo_move{dst=Dst}) -> Dst. +pseudo_move_src(#pseudo_move{src=Src}) -> Src. + +mk_pseudo_tailcall(FunC, Arity, StkArgs, Linkage) -> + #pseudo_tailcall{func=FunC, arity=Arity, stkargs=StkArgs, linkage=Linkage}. +pseudo_tailcall_func(#pseudo_tailcall{func=FunC}) -> FunC. +pseudo_tailcall_stkargs(#pseudo_tailcall{stkargs=StkArgs}) -> StkArgs. +pseudo_tailcall_linkage(#pseudo_tailcall{linkage=Linkage}) -> Linkage. + +mk_pseudo_tailcall_prepare() -> #pseudo_tailcall_prepare{}. + +mk_store(STop, Src, Disp, Base) -> + #store{stop=STop, src=Src, disp=Disp, base=Base}. + +mk_storex(StxOp, Src, Base1, Base2) -> + #storex{stxop=StxOp, src=Src, base1=Base1, base2=Base2}. + +mk_store(StOp, Src, Offset, Base, Scratch, Rest)when is_integer(Offset) -> + if Offset >= -32768, Offset =< 32767 -> + [mk_store(StOp, Src, Offset, Base) | Rest]; + true -> + StxOp = stop_to_stxop(StOp), + Index = mk_scratch(Scratch), + mk_li_new(Index, Offset, + [mk_storex(StxOp, Src, Base, Index) | Rest]) + end. + +stop_to_stxop(StOp) -> + case StOp of + 'stb' -> 'stbx'; + 'sth' -> 'sthx'; + 'stw' -> 'stwx' + end. + +mk_unary(UnOp, Dst, Src) -> #unary{unop=UnOp, dst=Dst, src=Src}. + +mk_lfd(Dst, Disp, Base) -> #lfd{dst=Dst, disp=Disp, base=Base}. +mk_lfdx(Dst, Base1, Base2) -> #lfdx{dst=Dst, base1=Base1, base2=Base2}. +mk_fload(Dst, Offset, Base, Scratch) when is_integer(Offset) -> + if Offset >= -32768, Offset =< 32767 -> + [mk_lfd(Dst, Offset, Base)]; + true -> + Index = mk_scratch(Scratch), + mk_li_new(Index, Offset, [mk_lfdx(Dst, Base, Index)]) + end. + +mk_stfd(Src, Disp, Base) -> #stfd{src=Src, disp=Disp, base=Base}. +mk_stfdx(Src, Base1, Base2) -> #stfdx{src=Src, base1=Base1, base2=Base2}. +mk_fstore(Src, Offset, Base, Scratch) when is_integer(Offset) -> + if Offset >= -32768, Offset =< 32767 -> + [mk_stfd(Src, Offset, Base)]; + true -> + Index = mk_scratch(Scratch), + mk_li_new(Index, Offset, [mk_stfdx(Src, Base, Index)]) + end. + +mk_fp_binary(FpBinOp, Dst, Src1, Src2) -> + #fp_binary{fp_binop=FpBinOp, dst=Dst, src1=Src1, src2=Src2}. + +mk_fp_unary(FpUnOp, Dst, Src) -> #fp_unary{fp_unop=FpUnOp, dst=Dst, src=Src}. + +mk_pseudo_fmove(Dst, Src) -> #pseudo_fmove{dst=Dst, src=Src}. +is_pseudo_fmove(I) -> case I of #pseudo_fmove{} -> true; _ -> false end. +pseudo_fmove_dst(#pseudo_fmove{dst=Dst}) -> Dst. +pseudo_fmove_src(#pseudo_fmove{src=Src}) -> Src. + +mk_defun(MFA, Formals, IsClosure, IsLeaf, Code, Data, VarRange, LabelRange) -> + #defun{mfa=MFA, formals=Formals, code=Code, data=Data, + isclosure=IsClosure, isleaf=IsLeaf, + var_range=VarRange, label_range=LabelRange}. +defun_mfa(#defun{mfa=MFA}) -> MFA. +defun_formals(#defun{formals=Formals}) -> Formals. +defun_is_closure(#defun{isclosure=IsClosure}) -> IsClosure. +defun_is_leaf(#defun{isleaf=IsLeaf}) -> IsLeaf. +defun_code(#defun{code=Code}) -> Code. +defun_data(#defun{data=Data}) -> Data. +defun_var_range(#defun{var_range=VarRange}) -> VarRange. diff --git a/lib/hipe/ppc/hipe_ppc.hrl b/lib/hipe/ppc/hipe_ppc.hrl new file mode 100644 index 0000000000..25e7ae0b5f --- /dev/null +++ b/lib/hipe/ppc/hipe_ppc.hrl @@ -0,0 +1,118 @@ +%%% -*- erlang-indent-level: 2 -*- +%%% +%%% %CopyrightBegin% +%%% +%%% Copyright Ericsson AB 2004-2009. All Rights Reserved. +%%% +%%% The contents of this file are subject to the Erlang Public License, +%%% Version 1.1, (the "License"); you may not use this file except in +%%% compliance with the License. You should have received a copy of the +%%% Erlang Public License along with this software. If not, it can be +%%% retrieved online at http://www.erlang.org/. +%%% +%%% Software distributed under the License is distributed on an "AS IS" +%%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%%% the License for the specific language governing rights and limitations +%%% under the License. +%%% +%%% %CopyrightEnd% +%%% + + +%%%-------------------------------------------------------------------- +%%% Basic Values: +%%% +%%% temp ::= {ppc_temp, reg, type, allocatable} +%%% reg ::= +%%% type ::= tagged | untagged +%%% allocatable ::= true | false +%%% +%%% sdesc ::= {ppc_sdesc, exnlab, fsize, arity, live} +%%% exnlab ::= [] | label +%%% fsize ::= int32 (frame size in words) +%%% live ::= (word offsets) +%%% arity ::= uint8 +%%% +%%% mfa ::= {ppc_mfa, atom, atom, arity} +%%% prim ::= {ppc_prim, atom} + +-record(ppc_mfa, {m::atom(), f::atom(), a::arity()}). +-record(ppc_prim, {prim}). +-record(ppc_sdesc, {exnlab, fsize, arity::arity(), live}). +-record(ppc_simm16, {value}). +-record(ppc_temp, {reg, type, allocatable}). +-record(ppc_uimm16, {value}). + +%%% Instruction Operands: +%%% +%%% aluop ::= add | add. | addi | addic. | addis | addo. | subf | subf. | subfo. +%%% | and | and. | andi. | or | or. | ori | xor | xor. | xori +%%% | slw | slw. | slwi | slwi. | srw | srw. | srwi | srwi. +%%% | sraw | sraw. | srawi | srawi. | mulli | mullw | mullw. | mullwo. +%%% bcond ::= eq | ne | gt | ge | lt | le | so | ns +%%% cmpop ::= cmp | cmpi | cmpl | cmpli +%%% ldop ::= lbz | lha | lhz | lwz +%%% ldxop ::= lbzx | lhax | lhzx | lwzx | lhbrx | lwbrx +%%% stop ::= stb | stw (HW has sth, but we don't use it) +%%% stxop ::= stbx | stwx (HW has sthx/sthbrx/stwbrx, but we don't use them) +%%% unop ::= extsb | extsh | {rlwinm,SH,MB,ME} | {rlwinm.,SH,MB,ME} +%%% +%%% immediate ::= int32 | atom | {label, label_type} +%%% label_type ::= constant | closure | c_const +%%% +%%% dst ::= temp +%%% src ::= temp +%%% | simm16 | uimm16 (only in alu.src2, cmp.src2) +%%% base ::= temp +%%% disp ::= sint16 (untagged simm16) +%%% +%%% fun ::= mfa | prim +%%% func ::= mfa | prim | 'ctr' +%%% +%%% spr ::= ctr | lr | xer + +%%% Instructions: + +-record(alu, {aluop, dst, src1, src2}). +-record(b_fun, {'fun', linkage}). % known tailcall +-record(b_label, {label}). % local jump, unconditional +-record(bc, {bcond, label, pred}). % local jump, conditional +-record(bctr, {labels}). % computed tailcall or switch +-record(bctrl, {sdesc}). % computed recursive call +-record(bl, {'fun', sdesc, linkage}). % known recursive call +-record(blr, {}). % unconditional bclr (return) +-record(cmp, {cmpop, src1, src2}). +-record(comment, {term}). +-record(label, {label}). +-record(load, {ldop, dst, disp, base}). % non-indexed, non-update form +-record(loadx, {ldxop, dst, base1, base2}). % indexed, non-update form +-record(mfspr, {dst, spr}). % for reading LR and XER +-record(mtcr, {src}). % for copying XER[CA] to CR0[EQ] via a temp +-record(mtspr, {spr, src}). % for writing LR, CTR, and XER +-record(pseudo_bc, {bcond, true_label, false_label, pred}). +-record(pseudo_call, {func, sdesc, contlab, linkage}). +-record(pseudo_call_prepare, {nrstkargs}). +-record(pseudo_li, {dst, imm}). +-record(pseudo_move, {dst, src}). +-record(pseudo_tailcall, {func, arity, stkargs, linkage}). +-record(pseudo_tailcall_prepare, {}). +-record(store, {stop, src, disp, base}). % non-indexed, non-update form +-record(storex, {stxop, src, base1, base2}).% indexed, non-update form +-record(unary, {unop, dst, src}). +-record(lfd, {dst, disp, base}). +-record(lfdx, {dst, base1, base2}). +-record(stfd, {src, disp, base}). +-record(stfdx, {src, base1, base2}). +-record(fp_binary, {fp_binop, dst, src1, src2}). +-record(fp_unary, {fp_unop, dst, src}). +-record(pseudo_fmove, {dst, src}). + +%%% Function definitions. + +-include("../misc/hipe_consttab.hrl"). + +-record(defun, {mfa :: mfa(), formals, code, + data :: hipe_consttab(), + isclosure :: boolean(), + isleaf :: boolean(), + var_range, label_range}). diff --git a/lib/hipe/ppc/hipe_ppc_assemble.erl b/lib/hipe/ppc/hipe_ppc_assemble.erl new file mode 100644 index 0000000000..6f06f8b841 --- /dev/null +++ b/lib/hipe/ppc/hipe_ppc_assemble.erl @@ -0,0 +1,603 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2004-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + + +-module(hipe_ppc_assemble). +-export([assemble/4]). + +-include("../main/hipe.hrl"). % for VERSION_STRING, when_option +-include("hipe_ppc.hrl"). +-include("../../kernel/src/hipe_ext_format.hrl"). +-include("../rtl/hipe_literals.hrl"). +-include("../misc/hipe_sdi.hrl"). +-undef(ASSERT). +-define(ASSERT(G), if G -> [] ; true -> exit({assertion_failed,?MODULE,?LINE,??G}) end). + +assemble(CompiledCode, Closures, Exports, Options) -> + print("****************** Assembling *******************\n", [], Options), + %% + Code = [{MFA, + hipe_ppc:defun_code(Defun), + hipe_ppc:defun_data(Defun)} + || {MFA, Defun} <- CompiledCode], + %% + {ConstAlign,ConstSize,ConstMap,RefsFromConsts} = + hipe_pack_constants:pack_constants(Code, 4), + %% + {CodeSize,CodeBinary,AccRefs,LabelMap,ExportMap} = + encode(translate(Code, ConstMap), Options), + print("Total num bytes=~w\n", [CodeSize], Options), + %% + SC = hipe_pack_constants:slim_constmap(ConstMap), + DataRelocs = mk_data_relocs(RefsFromConsts, LabelMap), + SSE = slim_sorted_exportmap(ExportMap,Closures,Exports), + SlimRefs = hipe_pack_constants:slim_refs(AccRefs), + Bin = term_to_binary([{?VERSION_STRING(),?HIPE_SYSTEM_CRC}, + ConstAlign, ConstSize, + SC, + DataRelocs, % nee LM, LabelMap + SSE, + CodeSize,CodeBinary,SlimRefs, + 0,[] % ColdCodeSize, SlimColdRefs + ]), + %% + Bin. + +%%% +%%% Assembly Pass 1. +%%% Process initial {MFA,Code,Data} list. +%%% Translate each MFA's body, choosing operand & instruction kinds. +%%% +%%% Assembly Pass 2. +%%% Perform short/long form optimisation for jumps. +%%% +%%% Result is {MFA,NewCode,CodeSize,LabelMap} list. +%%% + +translate(Code, ConstMap) -> + translate_mfas(Code, ConstMap, []). + +translate_mfas([{MFA,Insns,_Data}|Code], ConstMap, NewCode) -> + {NewInsns,CodeSize,LabelMap} = + translate_insns(Insns, MFA, ConstMap, hipe_sdi:pass1_init(), 0, []), + translate_mfas(Code, ConstMap, [{MFA,NewInsns,CodeSize,LabelMap}|NewCode]); +translate_mfas([], _ConstMap, NewCode) -> + lists:reverse(NewCode). + +translate_insns([I|Insns], MFA, ConstMap, SdiPass1, Address, NewInsns) -> + NewIs = translate_insn(I, MFA, ConstMap), + add_insns(NewIs, Insns, MFA, ConstMap, SdiPass1, Address, NewInsns); +translate_insns([], _MFA, _ConstMap, SdiPass1, Address, NewInsns) -> + {LabelMap,CodeSizeIncr} = hipe_sdi:pass2(SdiPass1), + {lists:reverse(NewInsns), Address+CodeSizeIncr, LabelMap}. + +add_insns([I|Is], Insns, MFA, ConstMap, SdiPass1, Address, NewInsns) -> + NewSdiPass1 = + case I of + {'.label',L,_} -> + hipe_sdi:pass1_add_label(SdiPass1, Address, L); + {bc_sdi,{_,{label,L},_},_} -> + SdiInfo = #sdi_info{incr=(8-4),lb=-16#2000*4,ub=16#1FFF*4}, + hipe_sdi:pass1_add_sdi(SdiPass1, Address, L, SdiInfo); + _ -> + SdiPass1 + end, + Address1 = Address + insn_size(I), + add_insns(Is, Insns, MFA, ConstMap, NewSdiPass1, Address1, [I|NewInsns]); +add_insns([], Insns, MFA, ConstMap, SdiPass1, Address, NewInsns) -> + translate_insns(Insns, MFA, ConstMap, SdiPass1, Address, NewInsns). + +insn_size(I) -> + case I of + {'.label',_,_} -> 0; + {'.reloc',_,_} -> 0; + _ -> 4 % bc_sdi included in this case + end. + +translate_insn(I, MFA, ConstMap) -> % -> [{Op,Opnd,OrigI}] + case I of + #alu{} -> do_alu(I); + #b_fun{} -> do_b_fun(I); + #b_label{} -> do_b_label(I); + #bc{} -> do_bc(I); + #bctr{} -> do_bctr(I); + #bctrl{} -> do_bctrl(I); + #bl{} -> do_bl(I); + #blr{} -> do_blr(I); + #comment{} -> []; + #cmp{} -> do_cmp(I); + #label{} -> do_label(I); + #load{} -> do_load(I); + #loadx{} -> do_loadx(I); + #mfspr{} -> do_mfspr(I); + #mtcr{} -> do_mtcr(I); + #mtspr{} -> do_mtspr(I); + %% pseudo_bc: eliminated before assembly + %% pseudo_call: eliminated before assembly + %% pseudo_call_prepare: eliminated before assembly + #pseudo_li{} -> do_pseudo_li(I, MFA, ConstMap); + %% pseudo_move: eliminated before assembly + %% pseudo_tailcall: eliminated before assembly + %% pseudo_tailcall_prepare: eliminated before assembly + #store{} -> do_store(I); + #storex{} -> do_storex(I); + #unary{} -> do_unary(I); + #lfd{} -> do_lfd(I); + #stfd{} -> do_stfd(I); + #fp_binary{} -> do_fp_binary(I); + #fp_unary{} -> do_fp_unary(I); + _ -> exit({?MODULE,translate_insn,I}) + end. + +do_alu(I) -> + #alu{aluop=AluOp,dst=Dst,src1=Src1,src2=Src2} = I, + NewDst = do_reg(Dst), + NewSrc1 = do_reg(Src1), + NewSrc2 = do_reg_or_imm(Src2), + {NewI,NewOpnds} = + case AluOp of + 'slwi' -> {'rlwinm', do_slwi_opnds(NewDst, NewSrc1, NewSrc2)}; + 'slwi.' -> {'rlwinm.', do_slwi_opnds(NewDst, NewSrc1, NewSrc2)}; + 'srwi' -> {'rlwinm', do_srwi_opnds(NewDst, NewSrc1, NewSrc2)}; + 'srwi.' -> {'rlwinm.', do_srwi_opnds(NewDst, NewSrc1, NewSrc2)}; + 'srawi' -> {'srawi', {NewDst,NewSrc1,do_srawi_src2(NewSrc2)}}; + 'srawi.' -> {'srawi.', {NewDst,NewSrc1,do_srawi_src2(NewSrc2)}}; + _ -> {AluOp, {NewDst,NewSrc1,NewSrc2}} + end, + [{NewI, NewOpnds, I}]. + +do_slwi_opnds(Dst, Src1, {uimm,N}) when is_integer(N), 0 =< N, N < 32 -> + {Dst, Src1, {sh,N}, {mb,0}, {me,31-N}}. + +do_srwi_opnds(Dst, Src1, {uimm,N}) when is_integer(N), 0 =< N, N < 32 -> + {Dst, Src1, {sh,32-N}, {mb,N}, {me,31}}. + +do_srawi_src2({uimm,N}) when is_integer(N), 0 =< N, N < 32 -> {sh,N}. + +do_b_fun(I) -> + #b_fun{'fun'=Fun,linkage=Linkage} = I, + [{'.reloc', {b_fun,Fun,Linkage}, #comment{term='fun'}}, + {b, {{li,0}}, I}]. + +do_b_label(I) -> + #b_label{label=Label} = I, + [{b, do_label_ref(Label), I}]. + +do_bc(I) -> + #bc{bcond=BCond,label=Label,pred=Pred} = I, + [{bc_sdi, {{bcond,BCond},do_label_ref(Label),{pred,Pred}}, I}]. + +do_bctr(I) -> + [{bcctr, {{bo,2#10100},{bi,0}}, I}]. + +do_bctrl(I) -> + #bctrl{sdesc=SDesc} = I, + [{bcctrl, {{bo,2#10100},{bi,0}}, I}, + {'.reloc', {sdesc,SDesc}, #comment{term=sdesc}}]. + +do_bl(I) -> + #bl{'fun'=Fun,sdesc=SDesc,linkage=Linkage} = I, + [{'.reloc', {b_fun,Fun,Linkage}, #comment{term='fun'}}, + {bl, {{li,0}}, I}, + {'.reloc', {sdesc,SDesc}, #comment{term=sdesc}}]. + +do_blr(I) -> + [{bclr, {{bo,2#10100},{bi,0}}, I}]. + +do_cmp(I) -> + #cmp{cmpop=CmpOp,src1=Src1,src2=Src2} = I, + NewSrc1 = do_reg(Src1), + NewSrc2 = do_reg_or_imm(Src2), + [{CmpOp, {{crf,0},0,NewSrc1,NewSrc2}, I}]. + +do_label(I) -> + #label{label=Label} = I, + [{'.label', Label, I}]. + +do_load(I) -> + #load{ldop=LdOp,dst=Dst,disp=Disp,base=Base} = I, + NewDst = do_reg(Dst), + NewDisp = do_disp(Disp), + NewBase = do_reg(Base), + [{LdOp, {NewDst,NewDisp,NewBase}, I}]. + +do_loadx(I) -> + #loadx{ldxop=LdxOp,dst=Dst,base1=Base1,base2=Base2} = I, + NewDst = do_reg(Dst), + NewBase1 = do_reg(Base1), + NewBase2 = do_reg(Base2), + [{LdxOp, {NewDst,NewBase1,NewBase2}, I}]. + +do_mfspr(I) -> + #mfspr{dst=Dst,spr=SPR} = I, + NewDst = do_reg(Dst), + NewSPR = do_spr(SPR), + [{mfspr, {NewDst,NewSPR}, I}]. + +do_mtcr(I) -> + #mtcr{src=Src} = I, + NewSrc = do_reg(Src), + [{mtcrf, {{crm,16#80},NewSrc}, I}]. + +do_mtspr(I) -> + #mtspr{spr=SPR,src=Src} = I, + NewSPR = do_spr(SPR), + NewSrc = do_reg(Src), + [{mtspr, {NewSPR,NewSrc}, I}]. + +do_pseudo_li(I, MFA, ConstMap) -> + #pseudo_li{dst=Dst,imm=Imm} = I, + RelocData = + case Imm of + Atom when is_atom(Atom) -> + {load_atom, Atom}; +%%% {mfa,MFAorPrim,Linkage} -> +%%% Tag = +%%% case Linkage of +%%% remote -> remote_function; +%%% not_remote -> local_function +%%% end, +%%% {load_address, {Tag,untag_mfa_or_prim(MFAorPrim)}}; + {Label,constant} -> + ConstNo = find_const({MFA,Label}, ConstMap), + {load_address, {constant,ConstNo}}; + {Label,closure} -> + {load_address, {closure,Label}}; + {Label,c_const} -> + {load_address, {c_const,Label}} + end, + NewDst = do_reg(Dst), + Simm0 = {simm,0}, + [{'.reloc', RelocData, #comment{term=reloc}}, + {addi, {NewDst,{r,0},Simm0}, I}, + {addis, {NewDst,NewDst,Simm0}, I}]. + +do_store(I) -> + #store{stop=StOp,src=Src,disp=Disp,base=Base} = I, + NewSrc = do_reg(Src), + NewDisp = do_disp(Disp), + NewBase = do_reg(Base), + [{StOp, {NewSrc,NewDisp,NewBase}, I}]. + +do_storex(I) -> + #storex{stxop=StxOp,src=Src,base1=Base1,base2=Base2} = I, + NewSrc = do_reg(Src), + NewBase1 = do_reg(Base1), + NewBase2 = do_reg(Base2), + [{StxOp, {NewSrc,NewBase1,NewBase2}, I}]. + +do_unary(I) -> + #unary{unop=UnOp,dst=Dst,src=Src} = I, + NewDst = do_reg(Dst), + NewSrc = do_reg(Src), + {NewI,NewOpnds} = + case UnOp of + {RLWINM,SH,MB,ME} -> {RLWINM, {NewDst,NewSrc,{sh,SH},{mb,MB},{me,ME}}}; + _ -> {UnOp, {NewDst,NewSrc}} + end, + [{NewI, NewOpnds, I}]. + +do_lfd(I) -> + #lfd{dst=Dst,disp=Disp,base=Base} = I, + NewDst = do_fpreg(Dst), + NewDisp = do_disp(Disp), + NewBase = do_reg(Base), + [{lfd, {NewDst,NewDisp,NewBase}, I}]. + +do_stfd(I) -> + #stfd{src=Src,disp=Disp,base=Base} = I, + NewSrc = do_fpreg(Src), + NewDisp = do_disp(Disp), + NewBase = do_reg(Base), + [{stfd, {NewSrc,NewDisp,NewBase}, I}]. + +do_fp_binary(I) -> + #fp_binary{fp_binop=FpBinOp,dst=Dst,src1=Src1,src2=Src2} = I, + NewDst = do_fpreg(Dst), + NewSrc1 = do_fpreg(Src1), + NewSrc2 = do_fpreg(Src2), + [{FpBinOp, {NewDst,NewSrc1,NewSrc2}, I}]. + +do_fp_unary(I) -> + #fp_unary{fp_unop=FpUnOp,dst=Dst,src=Src} = I, + NewDst = do_fpreg(Dst), + NewSrc = do_fpreg(Src), + [{FpUnOp, {NewDst,NewSrc}, I}]. + +do_fpreg(#ppc_temp{reg=Reg,type='double'}) when is_integer(Reg), 0 =< Reg, Reg < 32 -> + {fr,Reg}. + +do_reg(#ppc_temp{reg=Reg,type=Type}) + when is_integer(Reg), 0 =< Reg, Reg < 32, Type =/= 'double' -> + {r,Reg}. + +do_label_ref(Label) when is_integer(Label) -> + {label,Label}. % symbolic, since offset is not yet computable + +do_reg_or_imm(Src) -> + case Src of + #ppc_temp{} -> + do_reg(Src); + #ppc_simm16{value=Value} when is_integer(Value), -32768 =< Value, Value =< 32767 -> + {simm, Value band 16#ffff}; + #ppc_uimm16{value=Value} when is_integer(Value), 0 =< Value, Value =< 65535 -> + {uimm, Value} + end. + +do_disp(Disp) when is_integer(Disp), -32768 =< Disp, Disp =< 32767 -> + {d, Disp band 16#ffff}. + +do_spr(SPR) -> + SPR_NR = + case SPR of + 'xer' -> 1; + 'lr' -> 8; + 'ctr' -> 9 + end, + {spr,SPR_NR}. + +%%% +%%% Assembly Pass 3. +%%% Process final {MFA,Code,CodeSize,LabelMap} list from pass 2. +%%% Translate to a single binary code segment. +%%% Collect relocation patches. +%%% Build ExportMap (MFA-to-address mapping). +%%% Combine LabelMaps to a single one (for mk_data_relocs/2 compatibility). +%%% Return {CombinedCodeSize,BinaryCode,Relocs,CombinedLabelMap,ExportMap}. +%%% + +encode(Code, Options) -> + CodeSize = compute_code_size(Code, 0), + ExportMap = build_export_map(Code, 0, []), + {AccCode,Relocs} = encode_mfas(Code, 0, [], [], Options), + CodeBinary = list_to_binary(lists:reverse(AccCode)), + ?ASSERT(CodeSize =:= byte_size(CodeBinary)), + CombinedLabelMap = combine_label_maps(Code, 0, gb_trees:empty()), + {CodeSize,CodeBinary,Relocs,CombinedLabelMap,ExportMap}. + +compute_code_size([{_MFA,_Insns,CodeSize,_LabelMap}|Code], Size) -> + compute_code_size(Code, Size+CodeSize); +compute_code_size([], Size) -> Size. + +build_export_map([{{M,F,A},_Insns,CodeSize,_LabelMap}|Code], Address, ExportMap) -> + build_export_map(Code, Address+CodeSize, [{Address,M,F,A}|ExportMap]); +build_export_map([], _Address, ExportMap) -> ExportMap. + +combine_label_maps([{MFA,_Insns,CodeSize,LabelMap}|Code], Address, CLM) -> + NewCLM = merge_label_map(gb_trees:to_list(LabelMap), MFA, Address, CLM), + combine_label_maps(Code, Address+CodeSize, NewCLM); +combine_label_maps([], _Address, CLM) -> CLM. + +merge_label_map([{Label,Offset}|Rest], MFA, Address, CLM) -> + NewCLM = gb_trees:insert({MFA,Label}, Address+Offset, CLM), + merge_label_map(Rest, MFA, Address, NewCLM); +merge_label_map([], _MFA, _Address, CLM) -> CLM. + +encode_mfas([{MFA,Insns,CodeSize,LabelMap}|Code], Address, AccCode, Relocs, Options) -> + print("Generating code for: ~w\n", [MFA], Options), + print("Offset | Opcode | Instruction\n", [], Options), + {Address1,Relocs1,AccCode1} = + encode_insns(Insns, Address, Address, LabelMap, Relocs, AccCode, Options), + ExpectedAddress = Address + CodeSize, + ?ASSERT(Address1 =:= ExpectedAddress), + print("Finished.\n", [], Options), + encode_mfas(Code, Address1, AccCode1, Relocs1, Options); +encode_mfas([], _Address, AccCode, Relocs, _Options) -> + {AccCode,Relocs}. + +encode_insns([I|Insns], Address, FunAddress, LabelMap, Relocs, AccCode, Options) -> + case I of + {'.label',L,_} -> + LabelAddress = gb_trees:get(L, LabelMap) + FunAddress, + ?ASSERT(Address =:= LabelAddress), % sanity check + print_insn(Address, [], I, Options), + encode_insns(Insns, Address, FunAddress, LabelMap, Relocs, AccCode, Options); + {'.reloc',Data,_} -> + Reloc = encode_reloc(Data, Address, FunAddress, LabelMap), + encode_insns(Insns, Address, FunAddress, LabelMap, [Reloc|Relocs], AccCode, Options); + {bc_sdi,_,_} -> + encode_insns(fix_bc_sdi(I, Insns, Address, FunAddress, LabelMap), + Address, FunAddress, LabelMap, Relocs, AccCode, Options); + _ -> + {Op,Arg,_} = fix_jumps(I, Address, FunAddress, LabelMap), + Word = hipe_ppc_encode:insn_encode(Op, Arg), + print_insn(Address, Word, I, Options), + Segment = <>, + NewAccCode = [Segment|AccCode], + encode_insns(Insns, Address+4, FunAddress, LabelMap, Relocs, NewAccCode, Options) + end; +encode_insns([], Address, _FunAddress, _LabelMap, Relocs, AccCode, _Options) -> + {Address,Relocs,AccCode}. + +encode_reloc(Data, Address, FunAddress, LabelMap) -> + case Data of + {b_fun,MFAorPrim,Linkage} -> + %% b and bl are patched the same, so no need to distinguish + %% call from tailcall + PatchTypeExt = + case Linkage of + remote -> ?CALL_REMOTE; + not_remote -> ?CALL_LOCAL + end, + {PatchTypeExt, Address, untag_mfa_or_prim(MFAorPrim)}; + {load_atom,Atom} -> + {?LOAD_ATOM, Address, Atom}; + {load_address,X} -> + {?LOAD_ADDRESS, Address, X}; + {sdesc,SDesc} -> + #ppc_sdesc{exnlab=ExnLab,fsize=FSize,arity=Arity,live=Live} = SDesc, + ExnRA = + case ExnLab of + [] -> []; % don't cons up a new one + ExnLab -> gb_trees:get(ExnLab, LabelMap) + FunAddress + end, + {?SDESC, Address, + ?STACK_DESC(ExnRA, FSize, Arity, Live)} + end. + +untag_mfa_or_prim(#ppc_mfa{m=M,f=F,a=A}) -> {M,F,A}; +untag_mfa_or_prim(#ppc_prim{prim=Prim}) -> Prim. + +fix_bc_sdi(I, Insns, InsnAddress, FunAddress, LabelMap) -> + {bc_sdi,Opnds,OrigI} = I, + {{bcond,BCond},Label,{pred,Pred}} = Opnds, + {label,L} = Label, + LabelAddress = gb_trees:get(L, LabelMap) + FunAddress, + BD = (LabelAddress - InsnAddress) div 4, + if BD >= -(16#2000), BD =< 16#1FFF -> + [{bc, Opnds, OrigI} | Insns]; + true -> + NewBCond = hipe_ppc:negate_bcond(BCond), + NewPred = 1.0 - Pred, + [{bc, + {{bcond,NewBCond},'.+8',{pred,NewPred}}, + #bc{bcond=NewBCond,label='.+8',pred=NewPred}}, %% pp will be ugly + {b, Label, #b_label{label=L}} | + Insns] + end. + +fix_jumps(I, InsnAddress, FunAddress, LabelMap) -> + case I of + {b, {label,L}, OrigI} -> + LabelAddress = gb_trees:get(L, LabelMap) + FunAddress, + LI = (LabelAddress - InsnAddress) div 4, + %% ensure LI fits in a 24 bit sign-extended field + ?ASSERT(LI =< 16#7FFFFF), + ?ASSERT(LI >= -(16#800000)), + {b, {{li,LI band 16#FFFFFF}}, OrigI}; + {bc, {{bcond,BCond},Target,{pred,Pred}}, OrigI} -> + LabelAddress = + case Target of + {label,L} -> gb_trees:get(L, LabelMap) + FunAddress; + '.+8' -> InsnAddress + 8 + end, + BD = (LabelAddress - InsnAddress) div 4, + %% ensure BD fits in a 14 bit sign-extended field + ?ASSERT(BD =< 16#1FFF), + ?ASSERT(BD >= -(16#2000)), + {BO1,BI} = split_bcond(BCond), + BO = mk_bo(BO1, Pred, BD), + {bc, {{bo,BO},{bi,BI},{bd,BD band 16#3FFF}}, OrigI}; + _ -> I + end. + +split_bcond(BCond) -> % {BO[1], BI for CR0} + case BCond of + 'lt' -> {1, 2#0000}; + 'ge' -> {0, 2#0000}; % not lt + 'gt' -> {1, 2#0001}; + 'le' -> {0, 2#0001}; % not gt + 'eq' -> {1, 2#0010}; + 'ne' -> {0, 2#0010}; % not eq + 'so' -> {1, 2#0011}; + 'ns' -> {0, 2#0011} % not so + end. + +mk_bo(BO1, Pred, BD) -> + (BO1 bsl 3) bor 2#00100 bor mk_y(Pred, BD). + +mk_y(Pred, BD) -> + if Pred < 0.5 -> % not taken + if BD < 0 -> 1; true -> 0 end; + true -> % taken + if BD < 0 -> 0; true -> 1 end + end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +mk_data_relocs(RefsFromConsts, LabelMap) -> + lists:flatten(mk_data_relocs(RefsFromConsts, LabelMap, [])). + +mk_data_relocs([{MFA,Labels} | Rest], LabelMap, Acc) -> + Map = [case Label of + {L,Pos} -> + Offset = find({MFA,L}, LabelMap), + {Pos,Offset}; + {sorted,Base,OrderedLabels} -> + {sorted, Base, [begin + Offset = find({MFA,L}, LabelMap), + {Order, Offset} + end + || {L,Order} <- OrderedLabels]} + end + || Label <- Labels], + %% msg("Map: ~w Map\n",[Map]), + mk_data_relocs(Rest, LabelMap, [Map,Acc]); +mk_data_relocs([],_,Acc) -> Acc. + +find({_MFA,_L} = MFAL,LabelMap) -> + gb_trees:get(MFAL, LabelMap). + +slim_sorted_exportmap([{Addr,M,F,A}|Rest], Closures, Exports) -> + IsClosure = lists:member({M,F,A}, Closures), + IsExported = is_exported(F, A, Exports), + [Addr,M,F,A,IsClosure,IsExported | slim_sorted_exportmap(Rest, Closures, Exports)]; +slim_sorted_exportmap([],_,_) -> []. + +is_exported(F, A, Exports) -> lists:member({F,A}, Exports). + +%%% +%%% Assembly listing support (pp_asm option). +%%% + +print(String, Arglist, Options) -> + ?when_option(pp_asm, Options, io:format(String, Arglist)). + +print_insn(Address, Word, I, Options) -> + ?when_option(pp_asm, Options, print_insn_2(Address, Word, I)). + +print_insn_2(Address, Word, {_,_,OrigI}) -> + io:format("~8.16.0b | ", [Address]), + print_code_list(word_to_bytes(Word), 0), + hipe_ppc_pp:pp_insn(OrigI). + +word_to_bytes(W) -> + case W of + [] -> []; % label or other pseudo instruction + _ -> [(W bsr 24) band 16#FF, (W bsr 16) band 16#FF, + (W bsr 8) band 16#FF, W band 16#FF] + end. + +print_code_list([Byte|Rest], Len) -> + print_byte(Byte), + print_code_list(Rest, Len+1); +print_code_list([], Len) -> + fill_spaces(8-(Len*2)), + io:format(" | "). + +print_byte(Byte) -> + io:format("~2.16.0b", [Byte band 16#FF]). + +fill_spaces(N) when N > 0 -> + io:format(" "), + fill_spaces(N-1); +fill_spaces(0) -> + []. + +%%% +%%% Lookup a constant in a ConstMap. +%%% + +find_const({MFA,Label}, [{pcm_entry,MFA,Label,ConstNo,_,_,_}|_]) -> + ConstNo; +find_const(N, [_|R]) -> + find_const(N, R); +find_const(C, []) -> + ?EXIT({constant_not_found,C}). diff --git a/lib/hipe/ppc/hipe_ppc_cfg.erl b/lib/hipe/ppc/hipe_ppc_cfg.erl new file mode 100644 index 0000000000..13a7754831 --- /dev/null +++ b/lib/hipe/ppc/hipe_ppc_cfg.erl @@ -0,0 +1,131 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2004-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +-module(hipe_ppc_cfg). + +-export([init/1, + labels/1, start_label/1, + succ/2, + bb/2, bb_add/3]). +-export([postorder/1]). +-export([linearise/1, params/1, reverse_postorder/1]). +-export([arity/1]). +%%%-export([redirect_jmp/3, arity/1]). + +%%% these tell cfg.inc what to define (ugly as hell) +-define(BREADTH_ORDER,true). +-define(PARAMS_NEEDED,true). +-define(START_LABEL_UPDATE_NEEDED,true). + +-include("hipe_ppc.hrl"). +-include("../flow/cfg.hrl"). +-include("../flow/cfg.inc"). + +init(Defun) -> + Code = hipe_ppc:defun_code(Defun), + StartLab = hipe_ppc:label_label(hd(Code)), + Data = hipe_ppc:defun_data(Defun), + IsClosure = hipe_ppc:defun_is_closure(Defun), + Name = hipe_ppc:defun_mfa(Defun), + IsLeaf = hipe_ppc:defun_is_leaf(Defun), + Formals = hipe_ppc:defun_formals(Defun), + CFG0 = mk_empty_cfg(Name, StartLab, Data, IsClosure, IsLeaf, Formals), + take_bbs(Code, CFG0). + +is_branch(I) -> + case I of + #b_fun{} -> true; + #b_label{} -> true; + %% not bc + #bctr{} -> true; + %% not bctrl + %% not bl + #blr{} -> true; + #pseudo_bc{} -> true; + #pseudo_call{} -> true; + #pseudo_tailcall{} -> true; + _ -> false + end. + +branch_successors(Branch) -> + case Branch of + #b_fun{} -> []; + #b_label{label=Label} -> [Label]; + #bctr{labels=Labels} -> Labels; + #blr{} -> []; + #pseudo_bc{true_label=TrueLab,false_label=FalseLab} -> [FalseLab,TrueLab]; + #pseudo_call{contlab=ContLab, sdesc=#ppc_sdesc{exnlab=ExnLab}} -> + case ExnLab of + [] -> [ContLab]; + _ -> [ContLab,ExnLab] + end; + #pseudo_tailcall{} -> [] + end. + +-ifdef(REMOVE_TRIVIAL_BBS_NEEDED). +fails_to(_Instr) -> []. +-endif. + +-ifdef(notdef). +redirect_jmp(I, Old, New) -> + case I of + #b_label{label=Label} -> + if Old =:= Label -> I#b_label{label=New}; + true -> I + end; + #pseudo_bc{true_label=TrueLab, false_label=FalseLab} -> + I1 = if Old =:= TrueLab -> I#pseudo_bc{true_label=New}; + true -> I + end, + if Old =:= FalseLab -> I1#pseudo_bc{false_label=New}; + true -> I1 + end; + %% handle pseudo_call too? + _ -> I + end. +-endif. + +mk_goto(Label) -> + hipe_ppc:mk_b_label(Label). + +is_label(I) -> + hipe_ppc:is_label(I). + +label_name(Label) -> + hipe_ppc:label_label(Label). + +mk_label(Name) -> + hipe_ppc:mk_label(Name). + +linearise(CFG) -> % -> defun, not insn list + MFA = function(CFG), + Formals = params(CFG), + Code = linearize_cfg(CFG), + Data = data(CFG), + VarRange = hipe_gensym:var_range(ppc), + LabelRange = hipe_gensym:label_range(ppc), + IsClosure = is_closure(CFG), + IsLeaf = is_leaf(CFG), + hipe_ppc:mk_defun(MFA, Formals, IsClosure, IsLeaf, + Code, Data, VarRange, LabelRange). + +arity(CFG) -> + {_M, _F, A} = function(CFG), + A. diff --git a/lib/hipe/ppc/hipe_ppc_defuse.erl b/lib/hipe/ppc/hipe_ppc_defuse.erl new file mode 100644 index 0000000000..03a8f82abf --- /dev/null +++ b/lib/hipe/ppc/hipe_ppc_defuse.erl @@ -0,0 +1,145 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2004-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +-module(hipe_ppc_defuse). +-export([insn_def_all/1, insn_use_all/1]). +-export([insn_def_gpr/1, insn_use_gpr/1]). +-export([insn_def_fpr/1, insn_use_fpr/1]). +-include("hipe_ppc.hrl"). + +%%% +%%% Defs and uses for both general-purpose and floating-point registers. +%%% This is needed for the frame module, alas. +%%% +insn_def_all(I) -> + addtemps(insn_def_fpr(I), insn_def_gpr(I)). + +insn_use_all(I) -> + addtemps(insn_use_fpr(I), insn_use_gpr(I)). + +%%% +%%% Defs and uses for general-purpose (integer) registers only. +%%% +insn_def_gpr(I) -> + case I of + #alu{dst=Dst} -> [Dst]; + #load{dst=Dst} -> [Dst]; + #loadx{dst=Dst} -> [Dst]; + #mfspr{dst=Dst} -> [Dst]; + #pseudo_call{} -> call_clobbered_gpr(); + #pseudo_li{dst=Dst} -> [Dst]; + #pseudo_move{dst=Dst} -> [Dst]; + #pseudo_tailcall_prepare{} -> tailcall_clobbered_gpr(); + #unary{dst=Dst} -> [Dst]; + _ -> [] + end. + +call_clobbered_gpr() -> + [hipe_ppc:mk_temp(R, T) + || {R,T} <- hipe_ppc_registers:call_clobbered() ++ all_fp_pseudos()]. + +all_fp_pseudos() -> []. % XXX: for now + +tailcall_clobbered_gpr() -> + [hipe_ppc:mk_temp(R, T) + || {R,T} <- hipe_ppc_registers:tailcall_clobbered() ++ all_fp_pseudos()]. + +insn_use_gpr(I) -> + case I of + #alu{src1=Src1,src2=Src2} -> addsrc(Src2, [Src1]); + #blr{} -> + [hipe_ppc:mk_temp(hipe_ppc_registers:return_value(), 'tagged')]; + #cmp{src1=Src1,src2=Src2} -> addsrc(Src2, [Src1]); + #load{base=Base} -> [Base]; + #loadx{base1=Base1,base2=Base2} -> addtemp(Base1, [Base2]); + #mtcr{src=Src} -> [Src]; + #mtspr{src=Src} -> [Src]; + #pseudo_call{sdesc=#ppc_sdesc{arity=Arity}} -> arity_use_gpr(Arity); + #pseudo_move{src=Src} -> [Src]; + #pseudo_tailcall{arity=Arity,stkargs=StkArgs} -> + addsrcs(StkArgs, addtemps(tailcall_clobbered_gpr(), arity_use_gpr(Arity))); + #store{src=Src,base=Base} -> addtemp(Src, [Base]); + #storex{src=Src,base1=Base1,base2=Base2} -> + addtemp(Src, addtemp(Base1, [Base2])); + #unary{src=Src} -> [Src]; + #lfd{base=Base} -> [Base]; + #lfdx{base1=Base1,base2=Base2} -> addtemp(Base1, [Base2]); + #stfd{base=Base} -> [Base]; + #stfdx{base1=Base1,base2=Base2} -> addtemp(Base1, [Base2]); + _ -> [] + end. + +arity_use_gpr(Arity) -> + [hipe_ppc:mk_temp(R, 'tagged') + || R <- hipe_ppc_registers:args(Arity)]. + +addsrcs([Arg|Args], Set) -> + addsrcs(Args, addsrc(Arg, Set)); +addsrcs([], Set) -> + Set. + +addsrc(Src, Set) -> + case Src of + #ppc_temp{} -> addtemp(Src, Set); + _ -> Set + end. + +%%% +%%% Defs and uses for floating-point registers only. +%%% +insn_def_fpr(I) -> + case I of + #pseudo_call{} -> call_clobbered_fpr(); + #lfd{dst=Dst} -> [Dst]; + #lfdx{dst=Dst} -> [Dst]; + #fp_binary{dst=Dst} -> [Dst]; + #fp_unary{dst=Dst} -> [Dst]; + #pseudo_fmove{dst=Dst} -> [Dst]; + _ -> [] + end. + +call_clobbered_fpr() -> + [hipe_ppc:mk_temp(R, 'double') || R <- hipe_ppc_registers:allocatable_fpr()]. + +insn_use_fpr(I) -> + case I of + #stfd{src=Src} -> [Src]; + #stfdx{src=Src} -> [Src]; + #fp_binary{src1=Src1,src2=Src2} -> addtemp(Src1, [Src2]); + #fp_unary{src=Src} -> [Src]; + #pseudo_fmove{src=Src} -> [Src]; + _ -> [] + end. + +%%% +%%% Auxiliary operations on sets of temps +%%% These sets are small. No point using gb_trees, right? +%%% + +addtemps([Arg|Args], Set) -> + addtemps(Args, addtemp(Arg, Set)); +addtemps([], Set) -> + Set. + +addtemp(Temp, Set) -> + case lists:member(Temp, Set) of + false -> [Temp|Set]; + _ -> Set + end. diff --git a/lib/hipe/ppc/hipe_ppc_encode.erl b/lib/hipe/ppc/hipe_ppc_encode.erl new file mode 100644 index 0000000000..97cb0bf635 --- /dev/null +++ b/lib/hipe/ppc/hipe_ppc_encode.erl @@ -0,0 +1,1558 @@ +%%% -*- erlang-indent-level: 4 -*- +%%% +%%% %CopyrightBegin% +%%% +%%% Copyright Ericsson AB 2004-2009. All Rights Reserved. +%%% +%%% The contents of this file are subject to the Erlang Public License, +%%% Version 1.1, (the "License"); you may not use this file except in +%%% compliance with the License. You should have received a copy of the +%%% Erlang Public License along with this software. If not, it can be +%%% retrieved online at http://www.erlang.org/. +%%% +%%% Software distributed under the License is distributed on an "AS IS" +%%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%%% the License for the specific language governing rights and limitations +%%% under the License. +%%% +%%% %CopyrightEnd% +%%% +%%% Encode symbolic PowerPC instructions to binary form. +%%% Copyright (C) 2003-2005, 2009 Mikael Pettersson +%%% +%%% Notes: +%%% - PowerPC manuals use reversed bit numbering. In a 32-bit word, +%%% the most significant bit has number 0, and the least significant +%%% bit has number 31. +%%% - PowerPC manuals list opcodes in decimal, not hex. +%%% - This module does not support AltiVec instructions. +%%% +%%% Instruction Operands: +%%% +%%% {li,LI} long branch offset/address (24 bits, signed) +%%% {bo,BO} branch control operand (5 bits, restricted) +%%% {bi,BI} branch CR field and bits operand (5 bits) +%%% {bd,BD} branch offset (14 bits, signed) +%%% {to,TO} trap condition (5 bits) +%%% {nb,NB} number of bytes to copy (5 bits) +%%% {sh,SH} shift count (5 bits) +%%% {mb,MB} mask begin bit number (5 bits) +%%% {mb6,MB6} mask begin bit number (6 bits) (64-bit) +%%% {me,ME} mask end bit number (5 bits) +%%% {me6,ME6} mask end bit number (6 bits) (64-bit) +%%% {sr,SR} segment register (4 bits) +%%% {crimm,IMM} FPSCR CR image (4 bits) +%%% {simm,SIMM} immediate operand (16 bits, signed) +%%% {uimm,UIMM} immediate operand (16 bits, unsigned) +%%% {d,Disp} load/store byte displacement (16 bits, signed) +%%% {ds,DS} load/store word displacement (14 bits, signed) (64-bit) +%%% {r,R} integer register (5 bits) +%%% {fr,FR} floating-point register (5 bits) +%%% {crf,CRF} CR field number (3 bits) +%%% {crb,CRB} CR bit number (5 bits) +%%% {tbr,TBR} TBR number (10 bits, 268 or 269) +%%% {spr,SPR} SPR number (10 bits) +%%% {crm,CRM} CR fields set (8 bits) +%%% {fm,FM} FPSCR fields set (8 bits) + +-module(hipe_ppc_encode). + +-export([insn_encode/2]). + +%-define(TESTING,1). +-ifdef(TESTING). +-export([dotest/0, dotest/1]). +-endif. + +-define(ASSERT(G), if G -> [] ; true -> exit({assertion_failed,?MODULE,?LINE,??G}) end). + +-define(BF(LB,RB,V), bf(LB,RB,V)). + +bf(LeftBit, RightBit, Value) -> + ?ASSERT(LeftBit >= 0), + ?ASSERT(LeftBit =< RightBit), + ?ASSERT(RightBit < 32), + ?ASSERT(Value >= 0), + ?ASSERT(Value < (1 bsl ((RightBit - LeftBit) + 1))), + Value bsl (31 - RightBit). + +-define(BIT(Pos,Val), ?BF(Pos,Pos,Val)). +-define(BITS(N,Val), ?BF(32-N,31,Val)). + +%%% I-Form Instructions +%%% b, ba, bl, bla + +b_AA_LK({{li,LI}}, AA, LK) -> + ?BF(0,5,10#18) bor ?BF(6,29,LI) bor ?BIT(30,AA) bor ?BIT(31,LK). + +%%% B-Form Instructions +%%% bc, bca, bcl, bcla + +bc_AA_LK({{bo,BO}, {bi,BI}, {bd,BD}}, AA, LK) -> + ?BF(0,5,10#16) bor ?BF(6,10,BO) bor ?BF(11,15,BI) bor ?BF(16,29,BD) bor ?BIT(30,AA) bor ?BIT(31,LK). + +%%% SC-Form Instructions +%%% sc + +sc({}) -> + ?BF(0,5,10#17) bor ?BIT(30,1). + +%%% D-Form Instructions +%%% addi, addic, addic., addis, mulli, subfic +%%% andi., andis., ori, oris, xori, xoris +%%% lbz, lbzu, lha, lhau, lhz, lhzu, lwz, lwzu, lfd, lfdu, lfs, lfsu, lmw +%%% stb, stbu, sth, sthu, stw, stwu, stfd, stfdu, stfs, stfsu, stmw +%%% cmpi, cmpli, twi +%%% tdi (64-bit) + +d_form(OPCD, D, A, IMM) -> + ?BF(0,5,OPCD) bor ?BF(6,10,D) bor ?BF(11,15,A) bor ?BF(16,31,IMM). + +d_form_D_A_SIMM(OPCD, {{r,D}, {r,A}, {simm,SIMM}}) -> + d_form(OPCD, D, A, SIMM). + +addi(Opnds) -> d_form_D_A_SIMM(10#14, Opnds). +addic(Opnds) -> d_form_D_A_SIMM(10#12, Opnds). +addic_dot(Opnds) -> d_form_D_A_SIMM(10#13, Opnds). +addis(Opnds) -> d_form_D_A_SIMM(10#15, Opnds). +mulli(Opnds) -> d_form_D_A_SIMM(10#07, Opnds). +subfic(Opnds) -> d_form_D_A_SIMM(10#08, Opnds). + +d_form_S_A_UIMM(OPCD, {{r,A}, {r,S}, {uimm,UIMM}}) -> + d_form(OPCD, S, A, UIMM). + +andi_dot(Opnds) -> d_form_S_A_UIMM(10#28, Opnds). +andis_dot(Opnds) -> d_form_S_A_UIMM(10#29, Opnds). +ori(Opnds) -> d_form_S_A_UIMM(10#24, Opnds). +oris(Opnds) -> d_form_S_A_UIMM(10#25, Opnds). +xori(Opnds) -> d_form_S_A_UIMM(10#26, Opnds). +xoris(Opnds) -> d_form_S_A_UIMM(10#27, Opnds). + +d_form_D_A_d_simple(OPCD, {{r,D}, {d,Disp}, {r,A}}) -> + d_form(OPCD, D, A, Disp). + +d_form_D_A_d_update(OPCD, {{r,D}, {d,Disp}, {r,A}}) -> + ?ASSERT(A =/= 0), + ?ASSERT(A =/= D), + d_form(OPCD, D, A, Disp). + +lbz(Opnds) -> d_form_D_A_d_simple(10#34, Opnds). +lbzu(Opnds) -> d_form_D_A_d_update(10#35, Opnds). +lha(Opnds) -> d_form_D_A_d_simple(10#42, Opnds). +lhau(Opnds) -> d_form_D_A_d_update(10#43, Opnds). +lhz(Opnds) -> d_form_D_A_d_simple(10#40, Opnds). +lhzu(Opnds) -> d_form_D_A_d_update(10#41, Opnds). +lwz(Opnds) -> d_form_D_A_d_simple(10#32, Opnds). +lwzu(Opnds) -> d_form_D_A_d_update(10#33, Opnds). + +d_form_frD_A_d_simple(OPCD, {{fr,D}, {d,Disp}, {r,A}}) -> + d_form(OPCD, D, A, Disp). + +d_form_frD_A_d_update(OPCD, {{fr,D}, {d,Disp}, {r,A}}) -> + ?ASSERT(A =/= 0), + d_form(OPCD, D, A, Disp). + +lfd(Opnds) -> d_form_frD_A_d_simple(10#50, Opnds). +lfdu(Opnds) -> d_form_frD_A_d_update(10#51, Opnds). +lfs(Opnds) -> d_form_frD_A_d_simple(10#48, Opnds). +lfsu(Opnds) -> d_form_frD_A_d_update(10#49, Opnds). + +lmw({{r,D}, {d,Disp}, {r,A}}) -> + ?ASSERT(A < D), + d_form(10#46, D, A, Disp). + +d_form_S_A_d_simple(OPCD, {{r,S}, {d,Disp}, {r,A}}) -> + d_form(OPCD, S, A, Disp). + +d_form_S_A_d_update(OPCD, {{r,S}, {d,Disp}, {r,A}}) -> + ?ASSERT(A =/= 0), + d_form(OPCD, S, A, Disp). + +stb(Opnds) -> d_form_S_A_d_simple(10#38, Opnds). +stbu(Opnds) -> d_form_S_A_d_update(10#39, Opnds). +sth(Opnds) -> d_form_S_A_d_simple(10#44, Opnds). +sthu(Opnds) -> d_form_S_A_d_update(10#45, Opnds). +stmw(Opnds) -> d_form_S_A_d_simple(10#47, Opnds). +stw(Opnds) -> d_form_S_A_d_simple(10#36, Opnds). +stwu(Opnds) -> d_form_S_A_d_update(10#37, Opnds). + +d_form_frS_A_d_simple(OPCD, {{fr,S}, {d,Disp}, {r,A}}) -> + d_form(OPCD, S, A, Disp). + +d_form_frS_A_d_update(OPCD, {{fr,S}, {d,Disp}, {r,A}}) -> + ?ASSERT(A =/= 0), + d_form(OPCD, S, A, Disp). + +stfd(Opnds) -> d_form_frS_A_d_simple(10#54, Opnds). +stfdu(Opnds) -> d_form_frS_A_d_update(10#55, Opnds). +stfs(Opnds) -> d_form_frS_A_d_simple(10#52, Opnds). +stfsu(Opnds) -> d_form_frS_A_d_update(10#53, Opnds). + +cmpi({{crf,CRFD}, L, {r,A}, {simm,SIMM}}) -> + %% ?ASSERT(L == 0), % L must be zero in 32-bit code + d_form(10#11, (CRFD bsl 2) bor L, A, SIMM). + +cmpli({{crf,CRFD}, L, {r,A}, {uimm,UIMM}}) -> + %% ?ASSERT(L == 0), % L must be zero in 32-bit code + d_form(10#10, (CRFD bsl 2) bor L, A, UIMM). + +d_form_OPCD_TO_A_SIMM(OPCD, {{to,TO}, {r,A}, {simm,SIMM}}) -> + d_form(OPCD, TO, A, SIMM). + +tdi(Opnds) -> d_form_OPCD_TO_A_SIMM(10#02, Opnds). % 64-bit +twi(Opnds) -> d_form_OPCD_TO_A_SIMM(10#03, Opnds). + +%%% DS-Form Instructions +%%% ld, ldu, lwa, std, stdu (64-bit) + +ds_form(OPCD, D, A, DS, XO) -> + ?BF(0,5,OPCD) bor ?BF(6,10,D) bor ?BF(11,15,A) bor ?BF(16,29,DS) bor ?BF(30,31,XO). + +ds_form_D_A_DS_XO_simple(OPCD, {{r,D}, {ds,DS}, {r,A}}, XO) -> + ds_form(OPCD, D, A, DS, XO). + +ds_form_D_A_DS_XO_update(OPCD, {{r,D}, {ds,DS}, {r,A}}, XO) -> + ?ASSERT(A =/= 0), + ?ASSERT(A =/= D), + ds_form(OPCD, D, A, DS, XO). + +ld(Opnds) -> ds_form_D_A_DS_XO_simple(10#58, Opnds, 10#0). % 64-bit +ldu(Opnds) -> ds_form_D_A_DS_XO_update(10#58, Opnds, 10#1). % 64-bit +lwa(Opnds) -> ds_form_D_A_DS_XO_simple(10#58, Opnds, 10#2). % 64-bit +std(Opnds) -> ds_form_D_A_DS_XO_simple(10#62, Opnds, 10#0). % 64-bit +stdu(Opnds) -> ds_form_D_A_DS_XO_update(10#62, Opnds, 10#1). % 64-bit + +%%% X-Form Instructions +%%% ecixw, lbzux, lbzx, lhaux, lhax, lhbrx, lhzux, lhzx, lwarx, lwbrx, lwzux, lwzx, lswx +%%% lwaux, lwax (64-bit) +%%% lfdux, lfdx, lfsux, lfsx +%%% lswi +%%% fabs, fctiw, fctiwz, fmr, fnabs, fneg, frsp +%%% fcfid, fctid, fctidz (64-bit) +%%% mfsrin +%%% mffs +%%% mfcr, mfmsr +%%% mfsr +%%% and, andc, eqv, nand, nor, or, orc, slw, sraw, srw, xor +%%% sld, srad, srd (64-bit) +%%% stwcx. +%%% stdcx. (64-bit) +%%% ecowx, stbx, stbux, sthbrx, sthx, sthux, stswx, stwbrx, stwx, stwux +%%% stdux, stdx (64-bit) +%%% stfdx, stfdux, stfiwx, stfsx, stfsux +%%% stswi +%%% cntlzw, extsb, extsh +%%% cntlzd, extsw (64-bit) +%%% mtmsr +%%% mtmsrd (64-bit) +%%% mtsr, mtsrin +%%% mtsrd, mtsrdin (64-bit) +%%% srawi +%%% sradi (64-bit) +%%% cmp, cmpl +%%% fcmpo, fcmpu +%%% mcrfs +%%% mcrxr (obsolete) +%%% mtfsfi +%%% tw +%%% td (64-bit) +%%% mtfsb0, mtfsb1 +%%% dcba, dcbf, dcbi, dcbst, dcbt, dcbtst, dcbz, icbi +%%% tlbie +%%% eieio, sync, tlbia, tlbsync + +x_form(OPCD, D, A, B, XO, Rc) -> + ?BF(0,5,OPCD) bor ?BF(6,10,D) bor ?BF(11,15,A) bor ?BF(16,20,B) bor ?BF(21,30,XO) bor ?BIT(31,Rc). + +x_form_D_A_B_XO_simple({{r,D}, {r,A}, {r,B}}, XO) -> + x_form(10#31, D, A, B, XO, 0). + +x_form_D_A_B_XO_update({{r,D}, {r,A}, {r,B}}, XO) -> + ?ASSERT(A =/= 0), + ?ASSERT(A =/= D), + x_form(10#31, D, A, B, XO, 0). + +eciwx(Opnds) -> x_form_D_A_B_XO_simple(Opnds, 10#310). % optional +lbzux(Opnds) -> x_form_D_A_B_XO_update(Opnds, 10#119). +lbzx(Opnds) -> x_form_D_A_B_XO_simple(Opnds, 10#87). +ldarx(Opnds) -> x_form_D_A_B_XO_simple(Opnds, 10#84). % 64-bit +ldux(Opnds) -> x_form_D_A_B_XO_update(Opnds, 10#53). % 64-bit +ldx(Opnds) -> x_form_D_A_B_XO_simple(Opnds, 10#21). % 64-bit +lhaux(Opnds) -> x_form_D_A_B_XO_update(Opnds, 10#375). +lhax(Opnds) -> x_form_D_A_B_XO_simple(Opnds, 10#343). +lhbrx(Opnds) -> x_form_D_A_B_XO_simple(Opnds, 10#790). +lhzux(Opnds) -> x_form_D_A_B_XO_update(Opnds, 10#311). +lhzx(Opnds) -> x_form_D_A_B_XO_simple(Opnds, 10#279). +lswx(Opnds) -> x_form_D_A_B_XO_simple(Opnds, 10#533). % XXX: incomplete checks +lwarx(Opnds) -> x_form_D_A_B_XO_simple(Opnds, 10#20). +lwaux(Opnds) -> x_form_D_A_B_XO_update(Opnds, 10#373). % 64-bit +lwax(Opnds) -> x_form_D_A_B_XO_simple(Opnds, 10#341). % 64-bit +lwbrx(Opnds) -> x_form_D_A_B_XO_simple(Opnds, 10#534). +lwzux(Opnds) -> x_form_D_A_B_XO_update(Opnds, 10#55). +lwzx(Opnds) -> x_form_D_A_B_XO_simple(Opnds, 10#23). + +x_form_frD_A_B_XO_simple({{fr,D}, {r,A}, {r,B}}, XO) -> + x_form(10#31, D, A, B, XO, 0). + +x_form_frD_A_B_XO_update({{fr,D}, {r,A}, {r,B}}, XO) -> + ?ASSERT(A =/= 0), + x_form(10#31, D, A, B, XO, 0). + +lfdux(Opnds) -> x_form_frD_A_B_XO_update(Opnds, 10#631). +lfdx(Opnds) -> x_form_frD_A_B_XO_simple(Opnds, 10#599). +lfsux(Opnds) -> x_form_frD_A_B_XO_update(Opnds, 10#567). +lfsx(Opnds) -> x_form_frD_A_B_XO_simple(Opnds, 10#535). + +lswi({{r,D}, {r,A}, {nb,NB}}) -> % XXX: incomplete checks + x_form(10#31, D, A, NB, 10#597, 0). + +x_form_D_B_XO_Rc({{fr,D}, {fr,B}}, XO, Rc) -> + x_form(10#63, D, 0, B, XO, Rc). + +fabs_Rc(Opnds, Rc) -> x_form_D_B_XO_Rc(Opnds, 10#264, Rc). +fcfid_Rc(Opnds, Rc) -> x_form_D_B_XO_Rc(Opnds, 10#846, Rc). % 64-bit +fctid_Rc(Opnds, Rc) -> x_form_D_B_XO_Rc(Opnds, 10#814, Rc). % 64-bit +fctidz_Rc(Opnds, Rc) -> x_form_D_B_XO_Rc(Opnds, 10#815, Rc). % 64-bit +fctiw_Rc(Opnds, Rc) -> x_form_D_B_XO_Rc(Opnds, 10#14, Rc). +fctiwz_Rc(Opnds, Rc) -> x_form_D_B_XO_Rc(Opnds, 10#15, Rc). +fmr_Rc(Opnds, Rc) -> x_form_D_B_XO_Rc(Opnds, 10#72, Rc). +fnabs_Rc(Opnds, Rc) -> x_form_D_B_XO_Rc(Opnds, 10#136, Rc). +fneg_Rc(Opnds, Rc) -> x_form_D_B_XO_Rc(Opnds, 10#40, Rc). +frsp_Rc(Opnds, Rc) -> x_form_D_B_XO_Rc(Opnds, 10#12, Rc). + +mfsrin({{r,D}, {r,B}}) -> % supervisor + x_form(10#31, D, 0, B, 10#659, 0). + +mffs_Rc({{fr,D}}, Rc) -> + x_form(10#63, D, 0, 0, 10#583, Rc). + +x_form_D_XO({{r,D}}, XO) -> + x_form(10#31, D, 0, 0, XO, 0). + +mfcr(Opnds) -> x_form_D_XO(Opnds, 10#19). +mfmsr(Opnds) -> x_form_D_XO(Opnds, 10#83). % supervisor + +mfsr({{r,D}, {sr,SR}}) -> % supervisor + x_form(10#31, D, ?BITS(4,SR), 0, 10#595, 0). + +x_form_S_A_B_XO_Rc({{r,A}, {r,S}, {r,B}}, XO, Rc) -> + x_form(10#31, S, A, B, XO, Rc). + +and_Rc(Opnds, Rc) -> x_form_S_A_B_XO_Rc(Opnds, 10#28, Rc). +andc_Rc(Opnds, Rc) -> x_form_S_A_B_XO_Rc(Opnds, 10#60, Rc). +eqv_Rc(Opnds, Rc) -> x_form_S_A_B_XO_Rc(Opnds, 10#284, Rc). +nand_Rc(Opnds, Rc) -> x_form_S_A_B_XO_Rc(Opnds, 10#476, Rc). +nor_Rc(Opnds, Rc) -> x_form_S_A_B_XO_Rc(Opnds, 10#124, Rc). +or_Rc(Opnds, Rc) -> x_form_S_A_B_XO_Rc(Opnds, 10#444, Rc). +orc_Rc(Opnds, Rc) -> x_form_S_A_B_XO_Rc(Opnds, 10#412, Rc). +sld_Rc(Opnds, Rc) -> x_form_S_A_B_XO_Rc(Opnds, 10#27, Rc). % 64-bit +slw_Rc(Opnds, Rc) -> x_form_S_A_B_XO_Rc(Opnds, 10#24, Rc). +srad_Rc(Opnds, Rc) -> x_form_S_A_B_XO_Rc(Opnds, 10#794, Rc). % 64-bit +sraw_Rc(Opnds, Rc) -> x_form_S_A_B_XO_Rc(Opnds, 10#792, Rc). +srd_Rc(Opnds, Rc) -> x_form_S_A_B_XO_Rc(Opnds, 10#539, Rc). % 64-bit +srw_Rc(Opnds, Rc) -> x_form_S_A_B_XO_Rc(Opnds, 10#536, Rc). +xor_Rc(Opnds, Rc) -> x_form_S_A_B_XO_Rc(Opnds, 10#316, Rc). + +xform_S_A_B_XO_1({{r,S}, {r,A}, {r,B}}, XO) -> + x_form(10#31, S, A, B, XO, 1). + +stdcx_dot(Opnds) -> xform_S_A_B_XO_1(Opnds, 10#214). % 64-bit +stwcx_dot(Opnds) -> xform_S_A_B_XO_1(Opnds, 10#150). + +x_form_S_A_B_XO_simple({{r,S}, {r,A}, {r,B}}, XO) -> + x_form(10#31, S, A, B, XO, 0). + +x_form_S_A_B_XO_update({{r,S}, {r,A}, {r,B}}, XO) -> + ?ASSERT(A =/= 0), + x_form(10#31, S, A, B, XO, 0). + +ecowx(Opnds) -> x_form_S_A_B_XO_simple(Opnds, 10#438). % optional +stbx(Opnds) -> x_form_S_A_B_XO_simple(Opnds, 10#215). +stbux(Opnds) -> x_form_S_A_B_XO_update(Opnds, 10#247). +sthbrx(Opnds) -> x_form_S_A_B_XO_simple(Opnds, 10#918). +stdx(Opnds) -> x_form_S_A_B_XO_simple(Opnds, 10#149). % 64-bit +stdux(Opnds) -> x_form_S_A_B_XO_update(Opnds, 10#181). % 64-bit +sthx(Opnds) -> x_form_S_A_B_XO_simple(Opnds, 10#407). +sthux(Opnds) -> x_form_S_A_B_XO_update(Opnds, 10#439). +stswx(Opnds) -> x_form_S_A_B_XO_simple(Opnds, 10#661). +stwbrx(Opnds) -> x_form_S_A_B_XO_simple(Opnds, 10#662). +stwx(Opnds) -> x_form_S_A_B_XO_simple(Opnds, 10#151). +stwux(Opnds) -> x_form_S_A_B_XO_update(Opnds, 10#183). + +x_form_frS_A_B_XO_simple({{fr,S}, {r,A}, {r,B}}, XO) -> + x_form(10#31, S, A, B, XO, 0). + +x_form_frS_A_B_XO_update({{fr,S}, {r,A}, {r,B}}, XO) -> + ?ASSERT(A =/= 0), + x_form(10#31, S, A, B, XO, 0). + +stfdx(Opnds) -> x_form_frS_A_B_XO_simple(Opnds, 10#727). +stfdux(Opnds) -> x_form_frS_A_B_XO_update(Opnds, 10#759). +stfiwx(Opnds) -> x_form_frS_A_B_XO_simple(Opnds, 10#983). % optional +stfsx(Opnds) -> x_form_frS_A_B_XO_simple(Opnds, 10#663). +stfsux(Opnds) -> x_form_frS_A_B_XO_update(Opnds, 10#695). + +stswi({{r,S}, {r,A}, {nb,NB}}) -> + x_form(10#31, S, A, NB, 10#725, 0). + +x_form_S_A_XO_Rc({{r,A}, {r,S}}, XO, Rc) -> + x_form(10#31, S, A, 0, XO, Rc). + +cntlzd_Rc(Opnds, Rc) -> x_form_S_A_XO_Rc(Opnds, 10#58, Rc). % 64-bit +cntlzw_Rc(Opnds, Rc) -> x_form_S_A_XO_Rc(Opnds, 10#26, Rc). +extsb_Rc(Opnds, Rc) -> x_form_S_A_XO_Rc(Opnds, 10#954, Rc). +extsh_Rc(Opnds, Rc) -> x_form_S_A_XO_Rc(Opnds, 10#922, Rc). +extsw_Rc(Opnds, Rc) -> x_form_S_A_XO_Rc(Opnds, 10#986, Rc). % 64-bit + +mtmsr({{r,S}}) -> % supervisor + x_form(10#31, S, 0, 0, 10#146, 0). + +mtmsrd({{r,S}}) -> % supervisor, 64-bit + x_form(10#31, S, 0, 0, 10#178, 0). + +mtsr({{sr,SR}, {r,S}}) -> % supervisor + x_form(10#31, S, ?BITS(4,SR), 0, 10#210, 0). + +mtsrd({{sr,SR}, {r,S}}) -> % supervisor, 64-bit + x_form(10#31, S, ?BITS(4,SR), 0, 10#82, 0). + +mtsrdin({{r,S}, {r,B}}) -> % supervisor, 64-bit + x_form(10#31, S, 0, B, 10#114, 0). + +mtsrin({{r,S}, {r,B}}) -> % supervisor, 32-bit + x_form(10#31, S, 0, B, 10#242, 0). + +slbia({}) -> % supervisor, 64-bit + x_form(10#31, 0, 0, 0, 10#498, 0). + +slbie({{r,B}}) -> % supervisor, 64-bit + x_form(10#31, 0, 0, B, 10#434, 0). + +srawi_Rc({{r,A}, {r,S}, {sh,SH}}, Rc) -> + x_form(10#31, S, A, SH, 10#824, Rc). + +x_form_crfD_L_A_B_XO({{crf,CRFD}, L, {r,A}, {r,B}}, XO) -> + %% ?ASSERT(L == 0), % L should be zero in 32-bit code + x_form(10#31, (CRFD bsl 2) bor L, A, B, XO, 0). + +cmp(Opnds) -> x_form_crfD_L_A_B_XO(Opnds, 0). +cmpl(Opnds) -> x_form_crfD_L_A_B_XO(Opnds, 10#32). + +x_form_crfD_A_B_XO({{crf,CRFD}, {fr,A}, {fr,B}}, XO) -> + x_form(10#63, CRFD bsl 2, A, B, XO, 0). + +fcmpo(Opnds) -> x_form_crfD_A_B_XO(Opnds, 10#32). +fcmpu(Opnds) -> x_form_crfD_A_B_XO(Opnds, 0). + +mcrfs({{crf,CRFD}, {crf,CRFS}}) -> + x_form(10#63, CRFD bsl 2, CRFS bsl 2, 0, 10#64, 0). + +%% mcrxr({{crf,CRFD}}) -> +%% x_form(10#31, CRFD bsl 2, 0, 0, 10#512, 0). + +mtfsfi_Rc({{crf,CRFD}, {crimm,IMM}}, Rc) -> + x_form(10#63, CRFD bsl 2, 0, IMM bsl 1, 10#134, Rc). + +x_form_TO_A_B_XO({{to,TO}, {r,A}, {r,B}}, XO) -> + x_form(10#31, TO, A, B, XO, 0). + +td(Opnds) -> x_form_TO_A_B_XO(Opnds, 10#68). % 64-bit +tw(Opnds) -> x_form_TO_A_B_XO(Opnds, 10#4). + +x_form_crbD_XO_Rc({{crb,CRBD}}, XO, Rc) -> + x_form(10#63, CRBD, 0, 0, XO, Rc). + +mtfsb0_Rc(Opnds, Rc) -> x_form_crbD_XO_Rc(Opnds, 10#70, Rc). +mtfsb1_Rc(Opnds, Rc) -> x_form_crbD_XO_Rc(Opnds, 10#38, Rc). + +x_form_A_B_XO({{r,A}, {r,B}}, XO) -> + x_form(10#31, 0, A, B, XO, 0). + +dcba(Opnds) -> x_form_A_B_XO(Opnds, 10#758). % optional +dcbf(Opnds) -> x_form_A_B_XO(Opnds, 10#86). +dcbi(Opnds) -> x_form_A_B_XO(Opnds, 10#470). % supervisor +dcbst(Opnds) -> x_form_A_B_XO(Opnds, 10#54). +dcbt(Opnds) -> x_form_A_B_XO(Opnds, 10#278). +dcbtst(Opnds) -> x_form_A_B_XO(Opnds, 10#246). +dcbz(Opnds) -> x_form_A_B_XO(Opnds, 10#1014). +icbi(Opnds) -> x_form_A_B_XO(Opnds, 10#982). + +x_form_B_XO({{r,B}}, XO) -> + x_form(10#31, 0, 0, B, XO, 0). + +tlbie(Opnds) -> x_form_B_XO(Opnds, 10#306). % supervisor, optional +tlbld(Opnds) -> x_form_B_XO(Opnds, 10#978). % supervisor, optional +tlbli(Opnds) -> x_form_B_XO(Opnds, 10#1010). % supervisor, optional + +x_form_XO({}, XO) -> + x_form(10#31, 0, 0, 0, XO, 0). + +eieio(Opnds) -> x_form_XO(Opnds, 10#854). +sync(Opnds) -> x_form_XO(Opnds, 10#598). +tlbia(Opnds) -> x_form_XO(Opnds, 10#370). % supervisor, optional +tlbsync(Opnds) -> x_form_XO(Opnds, 10#566). % supervisor, optional + +%%% XL-Form Instructions +%%% bcctr, bclr +%%% crand, crandc, creqv, crnand, crnor, cror, crorc, crxor +%%% mcrf +%%% isync, rfi +%%% rfid (64-bit) + +xl_form(A, B, C, XO, LK) -> + ?BF(0,5,10#19) bor ?BF(6,10,A) bor ?BF(11,15,B) bor ?BF(16,20,C) bor ?BF(21,30,XO) bor ?BIT(31,LK). + +xl_form_BO_BI_XO_LK({{bo,BO}, {bi,BI}}, XO, LK) -> + xl_form(BO, BI, 0, XO, LK). + +bcctr_lk(Opnds, LK) -> xl_form_BO_BI_XO_LK(Opnds, 10#528, LK). +bclr_lk(Opnds, LK) -> xl_form_BO_BI_XO_LK(Opnds, 10#16, LK). + +xl_form_crbD_crbA_crbB_XO({{crb,CRBD}, {crb,CRBA}, {crb,CRBB}}, XO) -> + xl_form(CRBD, CRBA, CRBB, XO, 0). + +crand(Opnds) -> xl_form_crbD_crbA_crbB_XO(Opnds, 10#257). +crandc(Opnds) -> xl_form_crbD_crbA_crbB_XO(Opnds, 10#129). +creqv(Opnds) -> xl_form_crbD_crbA_crbB_XO(Opnds, 10#289). +crnand(Opnds) -> xl_form_crbD_crbA_crbB_XO(Opnds, 10#225). +crnor(Opnds) -> xl_form_crbD_crbA_crbB_XO(Opnds, 10#33). +cror(Opnds) -> xl_form_crbD_crbA_crbB_XO(Opnds, 10#449). +crorc(Opnds) -> xl_form_crbD_crbA_crbB_XO(Opnds, 10#417). +crxor(Opnds) -> xl_form_crbD_crbA_crbB_XO(Opnds, 10#193). + +mcrf({{crf,CRFD}, {crf,CRFS}}) -> + xl_form(CRFD bsl 2, CRFS bsl 2, 0, 0, 0). + +xl_form_XO({}, XO) -> + xl_form(0, 0, 0, XO, 0). + +isync(Opnds) -> xl_form_XO(Opnds, 10#150). +rfi(Opnds) -> xl_form_XO(Opnds, 10#50). % supervisor +rfid(Opnds) -> xl_form_XO(Opnds, 10#18). % supervisor, 64-bit + +%%% XFX-Form Instructions +%%% mfspr, mtspr, mftb, mtcrf + +xfx_form(A, B, XO) -> + ?BF(0,5,10#31) bor ?BF(6,10,A) bor ?BF(11,20,B) bor ?BF(21,30,XO). + +xfx_form_R_SPR_XO(R, SPR, XO) -> + SPR04 = SPR band 16#1F, + SPR59 = (SPR bsr 5) band 16#1F, + xfx_form(R, (SPR04 bsl 5) bor SPR59, XO). + +mfspr({{r,D}, {spr,SPR}}) -> xfx_form_R_SPR_XO(D, SPR, 10#339). +mtspr({{spr,SPR}, {r,S}}) -> xfx_form_R_SPR_XO(S, SPR, 10#467). +mftb({{r,D}, {tbr,TBR}}) -> xfx_form_R_SPR_XO(D, TBR, 10#371). + +mtcrf({{crm,CRM}, {r,S}}) -> xfx_form(S, ?BITS(8,CRM) bsl 1, 10#144). + +%%% XFL-Form Instructions +%%% mtfsf + +xfl_form(FM, B, Rc) -> + ?BF(0,5,10#63) bor ?BF(7,14,FM) bor ?BF(16,20,B) bor ?BF(21,30,10#711) bor ?BIT(31,Rc). + +mtfsf_Rc({{fm,FM}, {fr,B}}, Rc) -> xfl_form(FM, B, Rc). + +%%% XS-Form Instructions +%%% sradi (64-bit) + +xs_form(S, A, SH1, XO, SH2, Rc) -> + ?BF(0,5,10#31) bor ?BF(6,10,S) bor ?BF(11,15,A) bor ?BF(16,20,SH1) bor ?BF(21,29,XO) bor ?BIT(30,SH2) bor ?BIT(31,Rc). + +sradi_Rc({{r,A}, {r,S}, {sh6,SH6}}, Rc) -> % 64-bit + xs_form(S, A, sh6_bits0to4(SH6), 10#413, sh6_bit5(SH6), Rc). + +%%% XO-Form Instructions +%%% add, addc, adde, divw, divwu, mullw, subf, subfc, subfe +%%% divd, divdu, mulld (64-bit) +%%% mulhw, mulhwu +%%% mulhd, mulhdu (64-bit) +%%% addme, addze, neg, subfme, subfze + +xo_form(D, A, B, OE, XO, Rc) -> + ?BF(0,5,10#31) bor ?BF(6,10,D) bor ?BF(11,15,A) bor ?BF(16,20,B) bor ?BIT(21,OE) bor ?BF(22,30,XO) bor ?BIT(31,Rc). + +xo_form_D_A_B_OE_XO_Rc({{r,D}, {r,A}, {r,B}}, OE, XO, Rc) -> + xo_form(D, A, B, OE, XO, Rc). + +add_OE_Rc(Opnds, OE, Rc) -> xo_form_D_A_B_OE_XO_Rc(Opnds, OE, 10#266, Rc). +addc_OE_Rc(Opnds, OE, Rc) -> xo_form_D_A_B_OE_XO_Rc(Opnds, OE, 10#10, Rc). +adde_OE_Rc(Opnds, OE, Rc) -> xo_form_D_A_B_OE_XO_Rc(Opnds, OE, 10#138, Rc). +divd_OE_Rc(Opnds, OE, Rc) -> xo_form_D_A_B_OE_XO_Rc(Opnds, OE, 10#489, Rc). % 64-bit +divdu_OE_Rc(Opnds, OE, Rc) -> xo_form_D_A_B_OE_XO_Rc(Opnds, OE, 10#457, Rc). % 64-bit +divw_OE_Rc(Opnds, OE, Rc) -> xo_form_D_A_B_OE_XO_Rc(Opnds, OE, 10#491, Rc). +divwu_OE_Rc(Opnds, OE, Rc) -> xo_form_D_A_B_OE_XO_Rc(Opnds, OE, 10#459, Rc). +mulld_OE_Rc(Opnds, OE, Rc) -> xo_form_D_A_B_OE_XO_Rc(Opnds, OE, 10#233, Rc). % 64-bit +mullw_OE_Rc(Opnds, OE, Rc) -> xo_form_D_A_B_OE_XO_Rc(Opnds, OE, 10#235, Rc). +subf_OE_Rc(Opnds, OE, Rc) -> xo_form_D_A_B_OE_XO_Rc(Opnds, OE, 10#40, Rc). +subfc_OE_Rc(Opnds, OE, Rc) -> xo_form_D_A_B_OE_XO_Rc(Opnds, OE, 10#8, Rc). +subfe_OE_Rc(Opnds, OE, Rc) -> xo_form_D_A_B_OE_XO_Rc(Opnds, OE, 10#136, Rc). + +mulhd_Rc(Opnds, Rc) -> xo_form_D_A_B_OE_XO_Rc(Opnds, 0, 10#73, Rc). % 64-bit +mulhdu_Rc(Opnds, Rc) -> xo_form_D_A_B_OE_XO_Rc(Opnds, 0, 10#9, Rc). % 64-bit +mulhw_Rc(Opnds, Rc) -> xo_form_D_A_B_OE_XO_Rc(Opnds, 0, 10#75, Rc). +mulhwu_Rc(Opnds, Rc) -> xo_form_D_A_B_OE_XO_Rc(Opnds, 0, 10#11, Rc). + +xo_form_D_A_OE_XO_Rc({{r,D}, {r,A}}, OE, XO, Rc) -> + xo_form(D, A, 0, OE, XO, Rc). + +addme_OE_Rc(Opnds, OE, Rc) -> xo_form_D_A_OE_XO_Rc(Opnds, OE, 10#234, Rc). +addze_OE_Rc(Opnds, OE, Rc) -> xo_form_D_A_OE_XO_Rc(Opnds, OE, 10#202, Rc). +neg_OE_Rc(Opnds, OE, Rc) -> xo_form_D_A_OE_XO_Rc(Opnds, OE, 10#104, Rc). +subfme_OE_Rc(Opnds, OE, Rc) -> xo_form_D_A_OE_XO_Rc(Opnds, OE, 10#232, Rc). +subfze_OE_Rc(Opnds, OE, Rc) -> xo_form_D_A_OE_XO_Rc(Opnds, OE, 10#200, Rc). + +%%% A-Form Instructions +%%% fadd, fadds, fdiv, fdivs, fsub, fsubs +%%% fmadd, fmadds, fmsub, fmsubs, fnmadd, fnmadds, fnmsub, fnmsubs, fsel +%%% fmul, fmuls +%%% fres, fsqrte, fsqrt, fsqrts + +a_form(OPCD, D, A, B, C, XO, Rc) -> + ?BF(0,5,OPCD) bor ?BF(6,10,D) bor ?BF(11,15,A) bor ?BF(16,20,B) bor ?BF(21,25,C) bor ?BF(26,30,XO) bor ?BIT(31,Rc). + +a_form_D_A_B_XO_Rc(OPCD, {{fr,D}, {fr,A}, {fr,B}}, XO, Rc) -> + a_form(OPCD, D, A, B, 0, XO, Rc). + +fadd_OPCD_Rc(OPCD, Opnds, Rc) -> a_form_D_A_B_XO_Rc(OPCD, Opnds, 10#21, Rc). +fadd_Rc(Opnds, Rc) -> fadd_OPCD_Rc(10#63, Opnds, Rc). +fadds_Rc(Opnds, Rc) -> fadd_OPCD_Rc(10#59, Opnds, Rc). + +fdiv_OPCD_Rc(OPCD, Opnds, Rc) -> a_form_D_A_B_XO_Rc(OPCD, Opnds, 10#18, Rc). +fdiv_Rc(Opnds, Rc) -> fdiv_OPCD_Rc(10#63, Opnds, Rc). +fdivs_Rc(Opnds, Rc) -> fdiv_OPCD_Rc(10#59, Opnds, Rc). + +fsub_OPCD_Rc(OPCD, Opnds, Rc) -> a_form_D_A_B_XO_Rc(OPCD, Opnds, 10#20, Rc). +fsub_Rc(Opnds, Rc) -> fsub_OPCD_Rc(10#63, Opnds, Rc). +fsubs_Rc(Opnds, Rc) -> fsub_OPCD_Rc(10#59, Opnds, Rc). + +a_form_D_A_B_C_XO_Rc(OPCD, {{fr,D}, {fr,A}, {fr,C}, {fr,B}}, XO, Rc) -> + a_form(OPCD, D, A, B, C, XO, Rc). + +fmadd_OPCD_Rc(OPCD, Opnds, Rc) -> a_form_D_A_B_C_XO_Rc(OPCD, Opnds, 10#29, Rc). +fmadd_Rc(Opnds, Rc) -> fmadd_OPCD_Rc(10#63, Opnds, Rc). +fmadds_Rc(Opnds, Rc) -> fmadd_OPCD_Rc(10#59, Opnds, Rc). + +fmsub_OPCD_Rc(OPCD, Opnds, Rc) -> a_form_D_A_B_C_XO_Rc(OPCD, Opnds, 10#28, Rc). +fmsub_Rc(Opnds, Rc) -> fmsub_OPCD_Rc(10#63, Opnds, Rc). +fmsubs_Rc(Opnds, Rc) -> fmsub_OPCD_Rc(10#59, Opnds, Rc). + +fnmadd_OPCD_Rc(OPCD, Opnds, Rc) -> a_form_D_A_B_C_XO_Rc(OPCD, Opnds, 10#31, Rc). +fnmadd_Rc(Opnds, Rc) -> fnmadd_OPCD_Rc(10#63, Opnds, Rc). +fnmadds_Rc(Opnds, Rc) -> fnmadd_OPCD_Rc(10#59, Opnds, Rc). + +fnmsub_OPCD_Rc(OPCD, Opnds, Rc) -> a_form_D_A_B_C_XO_Rc(OPCD, Opnds, 10#30, Rc). +fnmsub_Rc(Opnds, Rc) -> fnmsub_OPCD_Rc(10#63, Opnds, Rc). +fnmsubs_Rc(Opnds, Rc) -> fnmsub_OPCD_Rc(10#59, Opnds, Rc). + +fsel_Rc(Opnds, Rc) -> a_form_D_A_B_C_XO_Rc(10#63, Opnds, 10#23, Rc). % optional + +fmul_OPCD_Rc(OPCD, {{fr,D}, {fr,A}, {fr,C}}, Rc) -> + a_form(OPCD, D, A, 0, C, 10#25, Rc). + +fmul_Rc(Opnds, Rc) -> fmul_OPCD_Rc(10#63, Opnds, Rc). +fmuls_Rc(Opnds, Rc) -> fmul_OPCD_Rc(10#59, Opnds, Rc). + +a_form_D_B_XO_Rc(OPCD, {{fr,D}, {fr,B}}, XO, Rc) -> + a_form(OPCD, D, 0, B, 0, XO, Rc). + +fres_Rc(Opnds, Rc) -> a_form_D_B_XO_Rc(10#59, Opnds, 10#24, Rc). % optional +frsqrte_Rc(Opnds, Rc) -> a_form_D_B_XO_Rc(10#63, Opnds, 10#26, Rc). % optional + +fsqrt_OPCD_Rc(OPCD, Opnds, Rc) -> a_form_D_B_XO_Rc(OPCD, Opnds, 10#22, Rc). % optional +fsqrt_Rc(Opnds, Rc) -> fsqrt_OPCD_Rc(10#63, Opnds, Rc). % optional +fsqrts_Rc(Opnds, Rc) -> fsqrt_OPCD_Rc(10#59, Opnds, Rc). % optional + +%%% M-Form Instructions +%%% rlwimi, rlwinm +%%% rlwnm + +m_form(OPCD, S, A, SH, MB, ME, Rc) -> + ?BF(0,5,OPCD) bor ?BF(6,10,S) bor ?BF(11,15,A) bor ?BF(16,20,SH) bor ?BF(21,25,MB) bor ?BF(26,30,ME) bor ?BIT(31,Rc). + +m_form_S_A_SH_MB_ME_Rc(OPCD, {{r,A}, {r,S}, {sh,SH}, {mb,MB}, {me,ME}}, Rc) -> + m_form(OPCD, S, A, SH, MB, ME, Rc). + +rlwimi_Rc(Opnds, Rc) -> m_form_S_A_SH_MB_ME_Rc(10#20, Opnds, Rc). +rlwinm_Rc(Opnds, Rc) -> m_form_S_A_SH_MB_ME_Rc(10#21, Opnds, Rc). + +rlwnm_Rc({{r,A}, {r,S}, {r,B}, {mb,MB}, {me,ME}}, Rc) -> + m_form(10#23, S, A, B, MB, ME, Rc). + +%%% MD-Form Instructions +%%% rldic, rldicl, rldicr, rldimi (64-bit) + +md_form(S, A, SH1, MB, XO, SH2, Rc) -> + ?BF(0,5,10#30) bor ?BF(6,10,S) bor ?BF(11,15,A) bor ?BF(16,20,SH1) bor ?BF(21,26,MB) bor ?BF(27,29,XO) bor ?BIT(30,SH2) bor ?BIT(31,Rc). + +mb6_reformat(MB6) -> + ((MB6 band 16#1F) bsl 1) bor ((MB6 bsr 5) band 1). + +sh6_bits0to4(SH6) -> + SH6 band 16#1F. + +sh6_bit5(SH6) -> + (SH6 bsr 5) band 1. + +md_form_S_A_SH6_MB6_XO_Rc({{r,A}, {r,S}, {sh6,SH6}, {mb6,MB6}}, XO, Rc) -> + md_form(S, A, sh6_bits0to4(SH6), mb6_reformat(MB6), XO, sh6_bit5(SH6), Rc). + +rldic_Rc(Opnds, Rc) -> md_form_S_A_SH6_MB6_XO_Rc(Opnds, 10#2, Rc). % 64-bit +rldicl_Rc(Opnds, Rc) -> md_form_S_A_SH6_MB6_XO_Rc(Opnds, 10#0, Rc). % 64-bit +rldimi_Rc(Opnds, Rc) -> md_form_S_A_SH6_MB6_XO_Rc(Opnds, 10#3, Rc). % 64-bit + +rldicr_Rc({{r,A}, {r,S}, {sh6,SH6}, {me6,ME6}}, Rc) -> % 64-bit + md_form(S, A, sh6_bits0to4(SH6), mb6_reformat(ME6), 10#1, sh6_bit5(SH6), Rc). + +%%% MDS-Form Instructions +%%% rldcl, rldcr (64-bit) + +mds_form(S, A, B, MB, XO, Rc) -> + ?BF(0,5,10#30) bor ?BF(6,10,S) bor ?BF(11,15,A) bor ?BF(16,20,B) bor ?BF(21,26,MB) bor ?BF(27,30,XO) bor ?BIT(31,Rc). + +rldcl({{r,A}, {r,S}, {r,B}, {mb6,MB6}}, Rc) -> % 64-bit + mds_form(S, A, B, mb6_reformat(MB6), 10#8, Rc). + +rldcr({{r,A}, {r,S}, {r,B}, {me6,ME6}}, Rc) -> % 64-bit + mds_form(S, A, B, mb6_reformat(ME6), 10#9, Rc). + +%%% main encode dispatch + +insn_encode(Op, Opnds) -> + case Op of + %% I-Form + 'b' -> b_AA_LK(Opnds, 0, 0); + 'ba' -> b_AA_LK(Opnds, 1, 0); + 'bl' -> b_AA_LK(Opnds, 0, 1); + 'bla' -> b_AA_LK(Opnds, 1, 1); + %% B-Form + 'bc' -> bc_AA_LK(Opnds, 0, 0); + 'bca' -> bc_AA_LK(Opnds, 1, 0); + 'bcl' -> bc_AA_LK(Opnds, 0, 1); + 'bcla' -> bc_AA_LK(Opnds, 1, 1); + %% SC-Form + 'sc' -> sc(Opnds); + %% D-Form + 'addi' -> addi(Opnds); + 'addic' -> addic(Opnds); + 'addic.' -> addic_dot(Opnds); + 'addis' -> addis(Opnds); + 'andi.' -> andi_dot(Opnds); + 'andis.' -> andis_dot(Opnds); + 'cmpi' -> cmpi(Opnds); + 'cmpli' -> cmpli(Opnds); + 'lbz' -> lbz(Opnds); + 'lbzu' -> lbzu(Opnds); + 'lfd' -> lfd(Opnds); + 'lfdu' -> lfdu(Opnds); + 'lfs' -> lfs(Opnds); + 'lfsu' -> lfsu(Opnds); + 'lha' -> lha(Opnds); + 'lhau' -> lhau(Opnds); + 'lhz' -> lhz(Opnds); + 'lhzu' -> lhzu(Opnds); + 'lmw' -> lmw(Opnds); + 'lwz' -> lwz(Opnds); + 'lwzu' -> lwzu(Opnds); + 'mulli' -> mulli(Opnds); + 'ori' -> ori(Opnds); + 'oris' -> oris(Opnds); + 'stb' -> stb(Opnds); + 'stbu' -> stbu(Opnds); + 'stfd' -> stfd(Opnds); + 'stfdu' -> stfdu(Opnds); + 'stfs' -> stfs(Opnds); + 'stfsu' -> stfsu(Opnds); + 'sth' -> sth(Opnds); + 'sthu' -> sthu(Opnds); + 'stmw' -> stmw(Opnds); + 'stw' -> stw(Opnds); + 'stwu' -> stwu(Opnds); + 'subfic' -> subfic(Opnds); + 'tdi' -> tdi(Opnds); + 'twi' -> twi(Opnds); + 'xori' -> xori(Opnds); + 'xoris' -> xoris(Opnds); + %% DS-Form + 'ld' -> ld(Opnds); + 'ldu' -> ldu(Opnds); + 'lwa' -> lwa(Opnds); + 'std' -> std(Opnds); + 'stdu' -> stdu(Opnds); + %% X-Form + 'and' -> and_Rc(Opnds, 0); + 'and.' -> and_Rc(Opnds, 1); + 'andc' -> andc_Rc(Opnds, 0); + 'andc.' -> andc_Rc(Opnds, 1); + 'cmp' -> cmp(Opnds); + 'cmpl' -> cmpl(Opnds); + 'cntlzd' -> cntlzd_Rc(Opnds, 0); + 'cntlzd.' -> cntlzd_Rc(Opnds, 1); + 'cntlzw' -> cntlzw_Rc(Opnds, 0); + 'cntlzw.' -> cntlzw_Rc(Opnds, 1); + 'dcba' -> dcba(Opnds); + 'dcbf' -> dcbf(Opnds); + 'dcbi' -> dcbi(Opnds); + 'dcbst' -> dcbst(Opnds); + 'dcbt' -> dcbt(Opnds); + 'dcbtst' -> dcbtst(Opnds); + 'dcbz' -> dcbz(Opnds); + 'eciwx' -> eciwx(Opnds); + 'ecowx' -> ecowx(Opnds); + 'eieio' -> eieio(Opnds); + 'eqv' -> eqv_Rc(Opnds, 0); + 'eqv.' -> eqv_Rc(Opnds, 1); + 'extsb' -> extsb_Rc(Opnds, 0); + 'extsb.' -> extsb_Rc(Opnds, 1); + 'extsh' -> extsh_Rc(Opnds, 0); + 'extsh.' -> extsh_Rc(Opnds, 1); + 'extsw' -> extsw_Rc(Opnds, 0); + 'extsw.' -> extsw_Rc(Opnds, 1); + 'fabs' -> fabs_Rc(Opnds, 0); + 'fabs.' -> fabs_Rc(Opnds, 1); + 'fcfid' -> fcfid_Rc(Opnds, 0); + 'fcfid.' -> fcfid_Rc(Opnds, 1); + 'fcmpo' -> fcmpo(Opnds); + 'fcmpu' -> fcmpu(Opnds); + 'fctid' -> fctid_Rc(Opnds, 0); + 'fctid.' -> fctid_Rc(Opnds, 1); + 'fctidz' -> fctidz_Rc(Opnds, 0); + 'fctidz.' -> fctidz_Rc(Opnds, 1); + 'fctiw' -> fctiw_Rc(Opnds, 0); + 'fctiw.' -> fctiw_Rc(Opnds, 1); + 'fctiwz' -> fctiwz_Rc(Opnds, 0); + 'fctiwz.' -> fctiwz_Rc(Opnds, 1); + 'fmr' -> fmr_Rc(Opnds, 0); + 'fmr.' -> fmr_Rc(Opnds, 1); + 'fnabs' -> fnabs_Rc(Opnds, 0); + 'fnabs.' -> fnabs_Rc(Opnds, 1); + 'fneg' -> fneg_Rc(Opnds, 0); + 'fneg.' -> fneg_Rc(Opnds, 1); + 'frsp' -> frsp_Rc(Opnds, 0); + 'frsp.' -> frsp_Rc(Opnds, 1); + 'icbi' -> icbi(Opnds); + 'lbzux' -> lbzux(Opnds); + 'lbzx' -> lbzx(Opnds); + 'ldarx' -> ldarx(Opnds); + 'ldux' -> ldux(Opnds); + 'ldx' -> ldx(Opnds); + 'lfdux' -> lfdux(Opnds); + 'lfdx' -> lfdx(Opnds); + 'lfsux' -> lfsux(Opnds); + 'lfsx' -> lfsx(Opnds); + 'lhaux' -> lhaux(Opnds); + 'lhax' -> lhax(Opnds); + 'lhbrx' -> lhbrx(Opnds); + 'lhzux' -> lhzux(Opnds); + 'lhzx' -> lhzx(Opnds); + 'lswi' -> lswi(Opnds); + 'lswx' -> lswx(Opnds); + 'lwarx' -> lwarx(Opnds); + 'lwaux' -> lwaux(Opnds); + 'lwax' -> lwax(Opnds); + 'lwbrx' -> lwbrx(Opnds); + 'lwzux' -> lwzux(Opnds); + 'lwzx' -> lwzx(Opnds); + 'mcrfs' -> mcrfs(Opnds); + %% 'mcrxr' -> mcrxr(Opnds); + 'mfcr' -> mfcr(Opnds); + 'mffs' -> mffs_Rc(Opnds, 0); + 'mffs.' -> mffs_Rc(Opnds, 1); + 'mfmsr' -> mfmsr(Opnds); + 'mfsr' -> mfsr(Opnds); + 'mfsrin' -> mfsrin(Opnds); + 'mtfsb0' -> mtfsb0_Rc(Opnds, 0); + 'mtfsb0.' -> mtfsb0_Rc(Opnds, 1); + 'mtfsb1' -> mtfsb1_Rc(Opnds, 0); + 'mtfsb1.' -> mtfsb1_Rc(Opnds, 1); + 'mtfsfi' -> mtfsfi_Rc(Opnds, 0); + 'mtfsfi.' -> mtfsfi_Rc(Opnds, 1); + 'mtmsr' -> mtmsr(Opnds); + 'mtmsrd' -> mtmsrd(Opnds); + 'mtsr' -> mtsr(Opnds); + 'mtsrd' -> mtsrd(Opnds); + 'mtsrdin' -> mtsrdin(Opnds); + 'mtsrin' -> mtsrin(Opnds); + 'nand' -> nand_Rc(Opnds, 0); + 'nand.' -> nand_Rc(Opnds, 1); + 'nor' -> nor_Rc(Opnds, 0); + 'nor.' -> nor_Rc(Opnds, 1); + 'or' -> or_Rc(Opnds, 0); + 'or.' -> or_Rc(Opnds, 1); + 'orc' -> orc_Rc(Opnds, 0); + 'orc.' -> orc_Rc(Opnds, 1); + 'slbia' -> slbia(Opnds); + 'slbie' -> slbie(Opnds); + 'sld' -> sld_Rc(Opnds, 0); + 'sld.' -> sld_Rc(Opnds, 1); + 'slw' -> slw_Rc(Opnds, 0); + 'slw.' -> slw_Rc(Opnds, 1); + 'srad' -> srad_Rc(Opnds, 0); + 'srad.' -> srad_Rc(Opnds, 1); + 'sraw' -> sraw_Rc(Opnds, 0); + 'sraw.' -> sraw_Rc(Opnds, 1); + 'srawi' -> srawi_Rc(Opnds, 0); + 'srawi.' -> srawi_Rc(Opnds, 1); + 'srd' -> srd_Rc(Opnds, 0); + 'srd.' -> srd_Rc(Opnds, 1); + 'srw' -> srw_Rc(Opnds, 0); + 'srw.' -> srw_Rc(Opnds, 1); + 'stbux' -> stbux(Opnds); + 'stbx' -> stbx(Opnds); + 'stdcx.' -> stdcx_dot(Opnds); + 'stdux' -> stdux(Opnds); + 'stdx' -> stdx(Opnds); + 'stfdux' -> stfdux(Opnds); + 'stfdx' -> stfdx(Opnds); + 'stfiwx' -> stfiwx(Opnds); + 'stfsux' -> stfsux(Opnds); + 'stfsx' -> stfsx(Opnds); + 'sthbrx' -> sthbrx(Opnds); + 'sthux' -> sthux(Opnds); + 'sthx' -> sthx(Opnds); + 'stswi' -> stswi(Opnds); + 'stswx' -> stswx(Opnds); + 'stwbrx' -> stwbrx(Opnds); + 'stwcx.' -> stwcx_dot(Opnds); + 'stwux' -> stwux(Opnds); + 'stwx' -> stwx(Opnds); + 'sync' -> sync(Opnds); + 'td' -> td(Opnds); + 'tlbia' -> tlbia(Opnds); % not implemented in MPC603e or MPC7450 + 'tlbie' -> tlbie(Opnds); + 'tlbld' -> tlbld(Opnds); + 'tlbli' -> tlbli(Opnds); + 'tlbsync' -> tlbsync(Opnds); + 'tw' -> tw(Opnds); + 'xor' -> xor_Rc(Opnds, 0); + 'xor.' -> xor_Rc(Opnds, 1); + %% XL-Form + 'bcctr' -> bcctr_lk(Opnds, 0); + 'bcctrl' -> bcctr_lk(Opnds, 1); + 'bclr' -> bclr_lk(Opnds, 0); + 'bclrl' -> bclr_lk(Opnds, 1); + 'crand' -> crand(Opnds); + 'crandc' -> crandc(Opnds); + 'creqv' -> creqv(Opnds); + 'crnand' -> crnand(Opnds); + 'crnor' -> crnor(Opnds); + 'cror' -> cror(Opnds); + 'crorc' -> crorc(Opnds); + 'crxor' -> crxor(Opnds); + 'isync' -> isync(Opnds); + 'mcrf' -> mcrf(Opnds); + 'rfi' -> rfi(Opnds); + 'rfid' -> rfid(Opnds); + %% XFX-Form + 'mfspr' -> mfspr(Opnds); + 'mftb' -> mftb(Opnds); + 'mtcrf' -> mtcrf(Opnds); + 'mtspr' -> mtspr(Opnds); + %% XFL-Form + 'mtfsf' -> mtfsf_Rc(Opnds, 0); + 'mtfsf.' -> mtfsf_Rc(Opnds, 1); + %% XS-Form + 'sradi' -> sradi_Rc(Opnds, 0); + 'sradi.' -> sradi_Rc(Opnds, 1); + %% XO-Form + 'add' -> add_OE_Rc(Opnds, 0, 0); + 'add.' -> add_OE_Rc(Opnds, 0, 1); + 'addo' -> add_OE_Rc(Opnds, 1, 0); + 'addo.' -> add_OE_Rc(Opnds, 1, 1); + 'addc' -> addc_OE_Rc(Opnds, 0, 0); + 'addc.' -> addc_OE_Rc(Opnds, 0, 1); + 'addco' -> addc_OE_Rc(Opnds, 1, 0); + 'addco.' -> addc_OE_Rc(Opnds, 1, 1); + 'adde' -> adde_OE_Rc(Opnds, 0, 0); + 'adde.' -> adde_OE_Rc(Opnds, 0, 1); + 'addeo' -> adde_OE_Rc(Opnds, 1, 0); + 'addeo.' -> adde_OE_Rc(Opnds, 1, 1); + 'addme' -> addme_OE_Rc(Opnds, 0, 0); + 'addme.' -> addme_OE_Rc(Opnds, 0, 1); + 'addmeo' -> addme_OE_Rc(Opnds, 1, 0); + 'addmeo.' -> addme_OE_Rc(Opnds, 1, 1); + 'addze' -> addze_OE_Rc(Opnds, 0, 0); + 'addze.' -> addze_OE_Rc(Opnds, 0, 1); + 'addzeo' -> addze_OE_Rc(Opnds, 1, 0); + 'addzeo.' -> addze_OE_Rc(Opnds, 1, 1); + 'divd' -> divd_OE_Rc(Opnds, 0, 0); + 'divd.' -> divd_OE_Rc(Opnds, 0, 1); + 'divdo' -> divd_OE_Rc(Opnds, 1, 0); + 'divdo.' -> divd_OE_Rc(Opnds, 1, 1); + 'divdu' -> divdu_OE_Rc(Opnds, 0, 0); + 'divdu.' -> divdu_OE_Rc(Opnds, 0, 1); + 'divduo' -> divdu_OE_Rc(Opnds, 1, 0); + 'divduo.' -> divdu_OE_Rc(Opnds, 1, 1); + 'divw' -> divw_OE_Rc(Opnds, 0, 0); + 'divw.' -> divw_OE_Rc(Opnds, 0, 1); + 'divwo' -> divw_OE_Rc(Opnds, 1, 0); + 'divwo.' -> divw_OE_Rc(Opnds, 1, 1); + 'divwu' -> divwu_OE_Rc(Opnds, 0, 0); + 'divwu.' -> divwu_OE_Rc(Opnds, 0, 1); + 'divwuo' -> divwu_OE_Rc(Opnds, 1, 0); + 'divwuo.' -> divwu_OE_Rc(Opnds, 1, 1); + 'mulhd' -> mulhd_Rc(Opnds, 0); + 'mulhd.' -> mulhd_Rc(Opnds, 1); + 'mulhdu' -> mulhdu_Rc(Opnds, 0); + 'mulhdu.' -> mulhdu_Rc(Opnds, 1); + 'mulhw' -> mulhw_Rc(Opnds, 0); + 'mulhw.' -> mulhw_Rc(Opnds, 1); + 'mulhwu' -> mulhwu_Rc(Opnds, 0); + 'mulhwu.' -> mulhwu_Rc(Opnds, 1); + 'mulld' -> mulld_OE_Rc(Opnds, 0, 0); + 'mulld.' -> mulld_OE_Rc(Opnds, 0, 1); + 'mulldo' -> mulld_OE_Rc(Opnds, 1, 0); + 'mulldo.' -> mulld_OE_Rc(Opnds, 1, 1); + 'mullw' -> mullw_OE_Rc(Opnds, 0, 0); + 'mullw.' -> mullw_OE_Rc(Opnds, 0, 1); + 'mullwo' -> mullw_OE_Rc(Opnds, 1, 0); + 'mullwo.' -> mullw_OE_Rc(Opnds, 1, 1); + 'neg' -> neg_OE_Rc(Opnds, 0, 0); + 'neg.' -> neg_OE_Rc(Opnds, 0, 1); + 'nego' -> neg_OE_Rc(Opnds, 1, 0); + 'nego.' -> neg_OE_Rc(Opnds, 1, 1); + 'subf' -> subf_OE_Rc(Opnds, 0, 0); + 'subf.' -> subf_OE_Rc(Opnds, 0, 1); + 'subfo' -> subf_OE_Rc(Opnds, 1, 0); + 'subfo.' -> subf_OE_Rc(Opnds, 1, 1); + 'subfc' -> subfc_OE_Rc(Opnds, 0, 0); + 'subfc.' -> subfc_OE_Rc(Opnds, 0, 1); + 'subfco' -> subfc_OE_Rc(Opnds, 1, 0); + 'subfco.' -> subfc_OE_Rc(Opnds, 1, 1); + 'subfe' -> subfe_OE_Rc(Opnds, 0, 0); + 'subfe.' -> subfe_OE_Rc(Opnds, 0, 1); + 'subfeo' -> subfe_OE_Rc(Opnds, 1, 0); + 'subfeo.' -> subfe_OE_Rc(Opnds, 1, 1); + 'subfme' -> subfme_OE_Rc(Opnds, 0, 0); + 'subfme.' -> subfme_OE_Rc(Opnds, 0, 1); + 'subfmeo' -> subfme_OE_Rc(Opnds, 1, 0); + 'subfmeo.' -> subfme_OE_Rc(Opnds, 1, 1); + 'subfze' -> subfze_OE_Rc(Opnds, 0, 0); + 'subfze.' -> subfze_OE_Rc(Opnds, 0, 1); + 'subfzeo' -> subfze_OE_Rc(Opnds, 1, 0); + 'subfzeo.' -> subfze_OE_Rc(Opnds, 1, 1); + %% A-Form + 'fadd' -> fadd_Rc(Opnds, 0); + 'fadd.' -> fadd_Rc(Opnds, 1); + 'fadds' -> fadds_Rc(Opnds, 0); + 'fadds.' -> fadds_Rc(Opnds, 1); + 'fdiv' -> fdiv_Rc(Opnds, 0); + 'fdiv.' -> fdiv_Rc(Opnds, 1); + 'fdivs' -> fdivs_Rc(Opnds, 0); + 'fdivs.' -> fdivs_Rc(Opnds, 1); + 'fmadd' -> fmadd_Rc(Opnds, 0); + 'fmadd.' -> fmadd_Rc(Opnds, 1); + 'fmadds' -> fmadds_Rc(Opnds, 0); + 'fmadds.' -> fmadds_Rc(Opnds, 1); + 'fmsub' -> fmsub_Rc(Opnds, 0); + 'fmsub.' -> fmsub_Rc(Opnds, 1); + 'fmsubs' -> fmsubs_Rc(Opnds, 0); + 'fmsubs.' -> fmsubs_Rc(Opnds, 1); + 'fmul' -> fmul_Rc(Opnds, 0); + 'fmul.' -> fmul_Rc(Opnds, 1); + 'fmuls' -> fmuls_Rc(Opnds, 0); + 'fmuls.' -> fmuls_Rc(Opnds, 1); + 'fnmadd' -> fnmadd_Rc(Opnds, 0); + 'fnmadd.' -> fnmadd_Rc(Opnds, 1); + 'fnmadds' -> fnmadds_Rc(Opnds, 0); + 'fnmadds.' -> fnmadds_Rc(Opnds, 1); + 'fnmsub' -> fnmsub_Rc(Opnds, 0); + 'fnmsub.' -> fnmsub_Rc(Opnds, 1); + 'fnmsubs' -> fnmsubs_Rc(Opnds, 0); + 'fnmsubs.' -> fnmsubs_Rc(Opnds, 1); + 'fres' -> fres_Rc(Opnds, 0); + 'fres.' -> fres_Rc(Opnds, 1); + 'frsqrte' -> frsqrte_Rc(Opnds, 0); + 'frsqrte.' -> frsqrte_Rc(Opnds, 1); + 'fsel' -> fsel_Rc(Opnds, 0); + 'fsel.' -> fsel_Rc(Opnds, 1); + 'fsqrt' -> fsqrt_Rc(Opnds, 0); % not implemented in MPC603e or MPC7450 + 'fsqrt.' -> fsqrt_Rc(Opnds, 1); % not implemented in MPC603e or MPC7450 + 'fsqrts' -> fsqrts_Rc(Opnds, 0); % not implemented in MPC603e or MPC7450 + 'fsqrts.' -> fsqrts_Rc(Opnds, 1); % not implemented in MPC603e or MPC7450 + 'fsub' -> fsub_Rc(Opnds, 0); + 'fsub.' -> fsub_Rc(Opnds, 1); + 'fsubs' -> fsubs_Rc(Opnds, 0); + 'fsubs.' -> fsubs_Rc(Opnds, 1); + %% M-Form + 'rlwimi' -> rlwimi_Rc(Opnds, 0); + 'rlwimi.' -> rlwimi_Rc(Opnds, 1); + 'rlwinm' -> rlwinm_Rc(Opnds, 0); + 'rlwinm.' -> rlwinm_Rc(Opnds, 1); + 'rlwnm' -> rlwnm_Rc(Opnds, 0); + 'rlwnm.' -> rlwnm_Rc(Opnds, 1); + %% MD-Form + 'rldic' -> rldic_Rc(Opnds, 0); + 'rldic.' -> rldic_Rc(Opnds, 1); + 'rldicl' -> rldicl_Rc(Opnds, 0); + 'rldicl.' -> rldicl_Rc(Opnds, 1); + 'rldicr' -> rldicr_Rc(Opnds, 0); + 'rldicr.' -> rldicr_Rc(Opnds, 1); + 'rldimi' -> rldimi_Rc(Opnds, 0); + 'rldimi.' -> rldimi_Rc(Opnds, 1); + %% MDS-Form + 'rldcl' -> rldcl(Opnds, 0); + 'rldcl.' -> rldcl(Opnds, 1); + 'rldcr' -> rldcr(Opnds, 0); + 'rldcr.' -> rldcr(Opnds, 1); + _ -> exit({?MODULE,insn_encode,Op}) + end. + +%%% testing interface + +-ifdef(TESTING). + +say(OS, Str) -> + file:write(OS, Str). + +hex_digit(Dig0) -> + Dig = Dig0 band 16#F, + if Dig >= 16#A -> $A + (Dig - 16#A); + true -> $0 + Dig + end. + +say_byte(OS, Byte) -> + say(OS, [hex_digit(Byte bsr 4)]), + say(OS, [hex_digit(Byte)]). + +say_word(OS, Word) -> + say(OS, "0x"), + say_byte(OS, Word bsr 24), + say_byte(OS, Word bsr 16), + say_byte(OS, Word bsr 8), + say_byte(OS, Word). + +t(OS, Op, Opnds) -> + Word = insn_encode(Op, Opnds), + say(OS, "\t.long "), + say_word(OS, Word), + say(OS, "\n"). + +dotest1(OS) -> + say(OS, "\t.text\n\t.align 4\n"), + %% + R14 = {r,14}, + R10 = {r,10}, + R11 = {r,11}, + F2 = {fr,2}, + F4 = {fr,4}, + F6 = {fr,6}, + F8 = {fr,8}, + DispM3 = {d,16#FFFD}, + DS = {ds,16#FFFD bsr 2}, + SIMM99 = {simm,10#99}, + UIMM4711 = {uimm,10#4711}, + TO_LLE = {to, 2#00110}, % =, dotest1(group_leader()). + +dotest(File) -> + {ok,OS} = file:open(File, [write]), + dotest1(OS), + file:close(OS). + +-endif. diff --git a/lib/hipe/ppc/hipe_ppc_finalise.erl b/lib/hipe/ppc/hipe_ppc_finalise.erl new file mode 100644 index 0000000000..c4b9526fec --- /dev/null +++ b/lib/hipe/ppc/hipe_ppc_finalise.erl @@ -0,0 +1,65 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2004-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +-module(hipe_ppc_finalise). +-export([finalise/1]). +-include("hipe_ppc.hrl"). + +finalise(Defun) -> + #defun{code=Code0} = Defun, + Code1 = peep(expand(Code0)), + Defun#defun{code=Code1}. + +expand(Insns) -> + expand_list(Insns, []). + +expand_list([I|Insns], Accum) -> + expand_list(Insns, expand_insn(I, Accum)); +expand_list([], Accum) -> + lists:reverse(Accum). + +expand_insn(I, Accum) -> + case I of + #pseudo_bc{bcond=BCond,true_label=TrueLab,false_label=FalseLab,pred=Pred} -> + [hipe_ppc:mk_b_label(FalseLab), + hipe_ppc:mk_bc(BCond, TrueLab, Pred) | + Accum]; + #pseudo_call{func=FunC,sdesc=SDesc,contlab=ContLab,linkage=Linkage} -> + [hipe_ppc:mk_b_label(ContLab), + case FunC of + 'ctr' -> hipe_ppc:mk_bctrl(SDesc); + Fun -> hipe_ppc:mk_bl(Fun, SDesc, Linkage) + end | + Accum]; + #pseudo_tailcall_prepare{} -> + Accum; + _ -> + [I|Accum] + end. + +peep(Insns) -> + peep_list(Insns, []). + +peep_list([#b_label{label=Label} | (Insns = [#label{label=Label}|_])], Accum) -> + peep_list(Insns, Accum); +peep_list([I|Insns], Accum) -> + peep_list(Insns, [I|Accum]); +peep_list([], Accum) -> + lists:reverse(Accum). diff --git a/lib/hipe/ppc/hipe_ppc_frame.erl b/lib/hipe/ppc/hipe_ppc_frame.erl new file mode 100644 index 0000000000..158009872f --- /dev/null +++ b/lib/hipe/ppc/hipe_ppc_frame.erl @@ -0,0 +1,657 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2004-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +-module(hipe_ppc_frame). +-export([frame/1]). +-include("hipe_ppc.hrl"). +-include("../rtl/hipe_literals.hrl"). + +frame(Defun) -> + Formals = fix_formals(hipe_ppc:defun_formals(Defun)), + Temps0 = all_temps(hipe_ppc:defun_code(Defun), Formals), + MinFrame = defun_minframe(Defun), + Temps = ensure_minframe(MinFrame, Temps0), + ClobbersLR = clobbers_lr(hipe_ppc:defun_code(Defun)), + CFG0 = hipe_ppc_cfg:init(Defun), + Liveness = hipe_ppc_liveness_all:analyse(CFG0), + CFG1 = do_body(CFG0, Liveness, Formals, Temps, ClobbersLR), + hipe_ppc_cfg:linearise(CFG1). + +fix_formals(Formals) -> + fix_formals(hipe_ppc_registers:nr_args(), Formals). + +fix_formals(0, Rest) -> Rest; +fix_formals(N, [_|Rest]) -> fix_formals(N-1, Rest); +fix_formals(_, []) -> []. + +do_body(CFG0, Liveness, Formals, Temps, ClobbersLR) -> + Context = mk_context(Liveness, Formals, Temps, ClobbersLR), + CFG1 = do_blocks(CFG0, Context), + do_prologue(CFG1, Context). + +do_blocks(CFG, Context) -> + Labels = hipe_ppc_cfg:labels(CFG), + do_blocks(Labels, CFG, Context). + +do_blocks([Label|Labels], CFG, Context) -> + Liveness = context_liveness(Context), + LiveOut = hipe_ppc_liveness_all:liveout(Liveness, Label), + Block = hipe_ppc_cfg:bb(CFG, Label), + Code = hipe_bb:code(Block), + NewCode = do_block(Code, LiveOut, Context), + NewBlock = hipe_bb:code_update(Block, NewCode), + NewCFG = hipe_ppc_cfg:bb_add(CFG, Label, NewBlock), + do_blocks(Labels, NewCFG, Context); +do_blocks([], CFG, _) -> + CFG. + +do_block(Insns, LiveOut, Context) -> + do_block(Insns, LiveOut, Context, context_framesize(Context), []). + +do_block([I|Insns], LiveOut, Context, FPoff0, RevCode) -> + {NewIs, FPoff1} = do_insn(I, LiveOut, Context, FPoff0), + do_block(Insns, LiveOut, Context, FPoff1, lists:reverse(NewIs, RevCode)); +do_block([], _, Context, FPoff, RevCode) -> + FPoff0 = context_framesize(Context), + if FPoff =:= FPoff0 -> []; + true -> exit({?MODULE,do_block,FPoff}) + end, + lists:reverse(RevCode, []). + +do_insn(I, LiveOut, Context, FPoff) -> + case I of + #blr{} -> + {do_blr(I, Context, FPoff), context_framesize(Context)}; + #pseudo_call{} -> + do_pseudo_call(I, LiveOut, Context, FPoff); + #pseudo_call_prepare{} -> + do_pseudo_call_prepare(I, FPoff); + #pseudo_move{} -> + {do_pseudo_move(I, Context, FPoff), FPoff}; + #pseudo_tailcall{} -> + {do_pseudo_tailcall(I, Context), context_framesize(Context)}; + #pseudo_fmove{} -> + {do_pseudo_fmove(I, Context, FPoff), FPoff}; + _ -> + {[I], FPoff} + end. + +%%% +%%% Moves, with Dst or Src possibly a pseudo +%%% + +do_pseudo_move(I, Context, FPoff) -> + Dst = hipe_ppc:pseudo_move_dst(I), + Src = hipe_ppc:pseudo_move_src(I), + case temp_is_pseudo(Dst) of + true -> + Offset = pseudo_offset(Dst, FPoff, Context), + mk_store('stw', Src, Offset, mk_sp(), []); + _ -> + case temp_is_pseudo(Src) of + true -> + Offset = pseudo_offset(Src, FPoff, Context), + mk_load('lwz', Dst, Offset, mk_sp(), []); + _ -> + [hipe_ppc:mk_alu('or', Dst, Src, Src)] + end + end. + +do_pseudo_fmove(I, Context, FPoff) -> + Dst = hipe_ppc:pseudo_fmove_dst(I), + Src = hipe_ppc:pseudo_fmove_src(I), + case temp_is_pseudo(Dst) of + true -> + Offset = pseudo_offset(Dst, FPoff, Context), + hipe_ppc:mk_fstore(Src, Offset, mk_sp(), 0); + _ -> + case temp_is_pseudo(Src) of + true -> + Offset = pseudo_offset(Src, FPoff, Context), + hipe_ppc:mk_fload(Dst, Offset, mk_sp(), 0); + _ -> + [hipe_ppc:mk_fp_unary('fmr', Dst, Src)] + end + end. + +pseudo_offset(Temp, FPoff, Context) -> + FPoff + context_offset(Context, Temp). + +%%% +%%% Return - deallocate frame and emit 'ret $N' insn. +%%% + +do_blr(I, Context, FPoff) -> + %% XXX: perhaps use explicit pseudo_move;mtlr, + %% avoiding the need to hard-code Temp1 here + %% XXX: typically only one instruction between + %% the mtlr and the blr, ouch + restore_lr(FPoff, Context, + adjust_sp(FPoff + word_size() * context_arity(Context), + [I])). + +restore_lr(FPoff, Context, Rest) -> + case context_clobbers_lr(Context) of + false -> Rest; + true -> + Temp = mk_temp1(), + mk_load('lwz', Temp, FPoff - word_size(), mk_sp(), + [hipe_ppc:mk_mtspr('lr', Temp) | + Rest]) + end. + +adjust_sp(N, Rest) -> + if N =:= 0 -> + Rest; + true -> + SP = mk_sp(), + hipe_ppc:mk_addi(SP, SP, N, Rest) + end. + +%%% +%%% Recursive calls. +%%% + +do_pseudo_call_prepare(I, FPoff0) -> + %% Create outgoing arguments area on the stack. + NrStkArgs = hipe_ppc:pseudo_call_prepare_nrstkargs(I), + Offset = NrStkArgs * word_size(), + {adjust_sp(-Offset, []), FPoff0 + Offset}. + +do_pseudo_call(I, LiveOut, Context, FPoff0) -> + #ppc_sdesc{exnlab=ExnLab,arity=OrigArity} = hipe_ppc:pseudo_call_sdesc(I), + FunC = hipe_ppc:pseudo_call_func(I), + LiveTemps = [Temp || Temp <- LiveOut, temp_is_pseudo(Temp)], + SDesc = mk_sdesc(ExnLab, Context, LiveTemps), + ContLab = hipe_ppc:pseudo_call_contlab(I), + Linkage = hipe_ppc:pseudo_call_linkage(I), + CallCode = [hipe_ppc:mk_pseudo_call(FunC, SDesc, ContLab, Linkage)], + StkArity = erlang:max(0, OrigArity - hipe_ppc_registers:nr_args()), + context_need_stack(Context, stack_need(FPoff0, StkArity, FunC)), + ArgsBytes = word_size() * StkArity, + {CallCode, FPoff0 - ArgsBytes}. + +stack_need(FPoff, StkArity, FunC) -> + case FunC of + #ppc_prim{} -> FPoff; + #ppc_mfa{m=M,f=F,a=A} -> + case erlang:is_builtin(M, F, A) of + true -> FPoff; + false -> stack_need_general(FPoff, StkArity) + end; + 'ctr' -> stack_need_general(FPoff, StkArity) + end. + +stack_need_general(FPoff, StkArity) -> + erlang:max(FPoff, FPoff + (?PPC_LEAF_WORDS - StkArity) * word_size()). + +%%% +%%% Create stack descriptors for call sites. +%%% + +mk_sdesc(ExnLab, Context, Temps) -> % for normal calls + Temps0 = only_tagged(Temps), + Live = mk_live(Context, Temps0), + Arity = context_arity(Context), + FSize = context_framesize(Context), + hipe_ppc:mk_sdesc(ExnLab, (FSize div word_size())-1, Arity, + list_to_tuple(Live)). + +only_tagged(Temps)-> + [X || X <- Temps, hipe_ppc:temp_type(X) =:= 'tagged']. + +mk_live(Context, Temps) -> + lists:sort([temp_to_slot(Context, Temp) || Temp <- Temps]). + +temp_to_slot(Context, Temp) -> + (context_framesize(Context) + context_offset(Context, Temp)) + div word_size(). + +mk_minimal_sdesc(Context) -> % for inc_stack_0 calls + hipe_ppc:mk_sdesc([], 0, context_arity(Context), {}). + +%%% +%%% Tailcalls. +%%% + +do_pseudo_tailcall(I, Context) -> % always at FPoff=context_framesize(Context) + Arity = context_arity(Context), + Args = hipe_ppc:pseudo_tailcall_stkargs(I), + FunC = hipe_ppc:pseudo_tailcall_func(I), + Linkage = hipe_ppc:pseudo_tailcall_linkage(I), + {Insns, FPoff1} = do_tailcall_args(Args, Context), + context_need_stack(Context, FPoff1), + StkArity = length(Args), + FPoff2 = FPoff1 + (Arity - StkArity) * word_size(), + context_need_stack(Context, stack_need(FPoff2, StkArity, FunC)), + I2 = + case FunC of + 'ctr' -> + hipe_ppc:mk_bctr([]); + Fun -> + hipe_ppc:mk_b_fun(Fun, Linkage) + end, + %% XXX: break out the LR restore, just like for blr? + restore_lr(context_framesize(Context), Context, + Insns ++ adjust_sp(FPoff2, [I2])). + +do_tailcall_args(Args, Context) -> + FPoff0 = context_framesize(Context), + Arity = context_arity(Context), + FrameTop = word_size()*Arity, + DangerOff = FrameTop - word_size()*length(Args), + %% + Moves = mk_moves(Args, FrameTop, []), + %% + {Stores, Simple, Conflict} = + split_moves(Moves, Context, DangerOff, [], [], []), + %% sanity check (shouldn't trigger any more) + if DangerOff < -FPoff0 -> + exit({?MODULE,do_tailcall_args,DangerOff,-FPoff0}); + true -> [] + end, + FPoff1 = FPoff0, + %% + {Pushes, Pops, FPoff2} = split_conflict(Conflict, FPoff1, [], []), + %% + TempReg = hipe_ppc_registers:temp1(), + %% + {adjust_sp(-(FPoff2 - FPoff1), + simple_moves(Pushes, FPoff2, TempReg, + store_moves(Stores, FPoff2, TempReg, + simple_moves(Simple, FPoff2, TempReg, + simple_moves(Pops, FPoff2, TempReg, + []))))), + FPoff2}. + +mk_moves([Arg|Args], Off, Moves) -> + Off1 = Off - word_size(), + mk_moves(Args, Off1, [{Arg,Off1}|Moves]); +mk_moves([], _, Moves) -> + Moves. + +split_moves([Move|Moves], Context, DangerOff, Stores, Simple, Conflict) -> + {Src,DstOff} = Move, + case src_is_pseudo(Src) of + false -> + split_moves(Moves, Context, DangerOff, [Move|Stores], + Simple, Conflict); + true -> + SrcOff = context_offset(Context, Src), + Type = typeof_temp(Src), + if SrcOff =:= DstOff -> + split_moves(Moves, Context, DangerOff, Stores, + Simple, Conflict); + SrcOff >= DangerOff -> + split_moves(Moves, Context, DangerOff, Stores, + Simple, [{SrcOff,DstOff,Type}|Conflict]); + true -> + split_moves(Moves, Context, DangerOff, Stores, + [{SrcOff,DstOff,Type}|Simple], Conflict) + end + end; +split_moves([], _, _, Stores, Simple, Conflict) -> + {Stores, Simple, Conflict}. + +split_conflict([{SrcOff,DstOff,Type}|Conflict], FPoff, Pushes, Pops) -> + FPoff1 = FPoff + word_size(), + Push = {SrcOff,-FPoff1,Type}, + Pop = {-FPoff1,DstOff,Type}, + split_conflict(Conflict, FPoff1, [Push|Pushes], [Pop|Pops]); +split_conflict([], FPoff, Pushes, Pops) -> + {lists:reverse(Pushes), Pops, FPoff}. + +simple_moves([{SrcOff,DstOff,Type}|Moves], FPoff, TempReg, Rest) -> + Temp = hipe_ppc:mk_temp(TempReg, Type), + SP = mk_sp(), + LoadOff = FPoff+SrcOff, + StoreOff = FPoff+DstOff, + simple_moves(Moves, FPoff, TempReg, + mk_load('lwz', Temp, LoadOff, SP, + mk_store('stw', Temp, StoreOff, SP, + Rest))); +simple_moves([], _, _, Rest) -> + Rest. + +store_moves([{Src,DstOff}|Moves], FPoff, TempReg, Rest) -> + %%Type = typeof_temp(Src), + SP = mk_sp(), + StoreOff = FPoff+DstOff, + {NewSrc,FixSrc} = + case hipe_ppc:is_temp(Src) of + true -> + {Src, []}; + _ -> + Temp = hipe_ppc:mk_temp(TempReg, 'untagged'), + {Temp, hipe_ppc:mk_li(Temp, Src)} + end, + store_moves(Moves, FPoff, TempReg, + FixSrc ++ mk_store('stw', NewSrc, StoreOff, SP, Rest)); +store_moves([], _, _, Rest) -> + Rest. + +%%% +%%% Contexts +%%% + +-record(context, {liveness, framesize, arity, map, clobbers_lr, ref_maxstack}). + +mk_context(Liveness, Formals, Temps, ClobbersLR) -> + {Map, MinOff} = mk_temp_map(Formals, ClobbersLR, Temps), + FrameSize = (-MinOff), + RefMaxStack = hipe_bifs:ref(FrameSize), + #context{liveness=Liveness, + framesize=FrameSize, arity=length(Formals), + map=Map, clobbers_lr=ClobbersLR, ref_maxstack=RefMaxStack}. + +context_need_stack(#context{ref_maxstack=RM}, N) -> + M = hipe_bifs:ref_get(RM), + if N > M -> hipe_bifs:ref_set(RM, N); + true -> [] + end. + +context_maxstack(#context{ref_maxstack=RM}) -> + hipe_bifs:ref_get(RM). + +context_arity(#context{arity=Arity}) -> + Arity. + +context_framesize(#context{framesize=FrameSize}) -> + FrameSize. + +context_liveness(#context{liveness=Liveness}) -> + Liveness. + +context_offset(#context{map=Map}, Temp) -> + tmap_lookup(Map, Temp). + +context_clobbers_lr(#context{clobbers_lr=ClobbersLR}) -> ClobbersLR. + +mk_temp_map(Formals, ClobbersLR, Temps) -> + {Map, 0} = enter_vars(Formals, word_size() * length(Formals), + tmap_empty()), + TempsList = tset_to_list(Temps), + AllTemps = + case ClobbersLR of + false -> TempsList; + true -> + RA = hipe_ppc:mk_new_temp('untagged'), + [RA|TempsList] + end, + enter_vars(AllTemps, 0, Map). + +enter_vars([V|Vs], PrevOff, Map) -> + Off = + case hipe_ppc:temp_type(V) of + 'double' -> PrevOff - 2*word_size(); + _ -> PrevOff - word_size() + end, + enter_vars(Vs, Off, tmap_bind(Map, V, Off)); +enter_vars([], Off, Map) -> + {Map, Off}. + +tmap_empty() -> + gb_trees:empty(). + +tmap_bind(Map, Key, Val) -> + gb_trees:insert(Key, Val, Map). + +tmap_lookup(Map, Key) -> + gb_trees:get(Key, Map). + +%%% +%%% do_prologue: prepend stack frame allocation code. +%%% +%%% NewStart: +%%% temp1 = *(P + P_SP_LIMIT) +%%% temp2 = SP - MaxStack +%%% cmp temp2, temp1 +%%% temp1 = LR [if ClobbersLR][hoisted] +%%% if (ltu) goto IncStack else goto AllocFrame +%%% AllocFrame: +%%% SP = temp2 [if FrameSize == MaxStack] +%%% SP -= FrameSize [if FrameSize != MaxStack] +%%% *(SP + FrameSize-WordSize) = temp1 [if ClobbersLR] +%%% goto OldStart +%%% OldStart: +%%% ... +%%% IncStack: +%%% temp1 = LR [if not ClobbersLR] +%%% bl inc_stack +%%% LR = temp1 +%%% goto NewStart + +do_prologue(CFG, Context) -> + MaxStack = context_maxstack(Context), + if MaxStack > 0 -> + FrameSize = context_framesize(Context), + OldStartLab = hipe_ppc_cfg:start_label(CFG), + NewStartLab = hipe_gensym:get_next_label(ppc), + %% + P = hipe_ppc:mk_temp(hipe_ppc_registers:proc_pointer(), 'untagged'), + Temp1 = mk_temp1(), + SP = mk_sp(), + %% + ClobbersLR = context_clobbers_lr(Context), + GotoOldStartCode = [hipe_ppc:mk_b_label(OldStartLab)], + AllocFrameCodeTail = + case ClobbersLR of + false -> GotoOldStartCode; + true -> mk_store('stw', Temp1, FrameSize-word_size(), SP, GotoOldStartCode) + end, + %% + Arity = context_arity(Context), + Guaranteed = erlang:max(0, (?PPC_LEAF_WORDS - Arity) * word_size()), + %% + {CFG1,NewStartCode} = + if MaxStack =< Guaranteed -> + %% io:format("~w: MaxStack ~w =< Guaranteed ~w :-)\n", [?MODULE,MaxStack,Guaranteed]), + AllocFrameCode = adjust_sp(-FrameSize, AllocFrameCodeTail), + NewStartCode0 = + case ClobbersLR of + false -> AllocFrameCode; + true -> [hipe_ppc:mk_mfspr(Temp1, 'lr') | AllocFrameCode] + end, + {CFG,NewStartCode0}; + true -> + %% io:format("~w: MaxStack ~w > Guaranteed ~w :-(\n", [?MODULE,MaxStack,Guaranteed]), + AllocFrameLab = hipe_gensym:get_next_label(ppc), + IncStackLab = hipe_gensym:get_next_label(ppc), + Temp2 = mk_temp2(), + %% + NewStartCodeTail2 = + [hipe_ppc:mk_pseudo_bc('lt', IncStackLab, AllocFrameLab, 0.01)], + NewStartCodeTail1 = + case ClobbersLR of + false -> NewStartCodeTail2; + true -> [hipe_ppc:mk_mfspr(Temp1, 'lr') | NewStartCodeTail2] + end, + NewStartCode0 = + [hipe_ppc:mk_load('lwz', Temp1, ?P_NSP_LIMIT, P) | + hipe_ppc:mk_addi(Temp2, SP, -MaxStack, + [hipe_ppc:mk_cmp('cmpl', Temp2, Temp1) | + NewStartCodeTail1])], + %% + AllocFrameCode = + if MaxStack =:= FrameSize -> + %% io:format("~w: MaxStack =:= FrameSize =:= ~w :-)\n", [?MODULE,MaxStack]), + [hipe_ppc:mk_alu('or', SP, Temp2, Temp2) | + AllocFrameCodeTail]; + true -> + %% io:format("~w: MaxStack ~w =/= FrameSize ~w :-(\n", [?MODULE,MaxStack,FrameSize]), + adjust_sp(-FrameSize, AllocFrameCodeTail) + end, + %% + IncStackCodeTail = + [hipe_ppc:mk_bl(hipe_ppc:mk_prim('inc_stack_0'), + mk_minimal_sdesc(Context), not_remote), + hipe_ppc:mk_mtspr('lr', Temp1), + hipe_ppc:mk_b_label(NewStartLab)], + IncStackCode = + case ClobbersLR of + true -> IncStackCodeTail; + false -> [hipe_ppc:mk_mfspr(Temp1, 'lr') | IncStackCodeTail] + end, + %% + CFG0a = hipe_ppc_cfg:bb_add(CFG, AllocFrameLab, + hipe_bb:mk_bb(AllocFrameCode)), + CFG0b = hipe_ppc_cfg:bb_add(CFG0a, IncStackLab, + hipe_bb:mk_bb(IncStackCode)), + %% + {CFG0b,NewStartCode0} + end, + %% + CFG2 = hipe_ppc_cfg:bb_add(CFG1, NewStartLab, + hipe_bb:mk_bb(NewStartCode)), + hipe_ppc_cfg:start_label_update(CFG2, NewStartLab); + true -> + CFG + end. + +%%% Create a load instruction. +%%% May clobber Dst early for large offsets. In principle we could +%%% clobber R0 if Dst =:= Base, but Dst =/= Base here in frame. + +mk_load(LdOp, Dst, Offset, Base, Rest) -> + hipe_ppc:mk_load(LdOp, Dst, Offset, Base, 'error', Rest). + +%%% Create a store instruction. +%%% May clobber R0 for large offsets. + +mk_store(StOp, Src, Offset, Base, Rest) -> + hipe_ppc:mk_store(StOp, Src, Offset, Base, 0, Rest). + +%%% typeof_temp -- what's temp's type? + +typeof_temp(Temp) -> + hipe_ppc:temp_type(Temp). + +%%% Cons up an 'SP' Temp. + +mk_sp() -> + hipe_ppc:mk_temp(hipe_ppc_registers:stack_pointer(), 'untagged'). + +%%% Cons up a 'TEMP1' Temp. + +mk_temp1() -> + hipe_ppc:mk_temp(hipe_ppc_registers:temp1(), 'untagged'). + +%%% Cons up a 'TEMP2' Temp. + +mk_temp2() -> + hipe_ppc:mk_temp(hipe_ppc_registers:temp2(), 'untagged'). + +%%% Check if an operand is a pseudo-Temp. + +src_is_pseudo(Src) -> + hipe_ppc:is_temp(Src) andalso temp_is_pseudo(Src). + +temp_is_pseudo(Temp) -> + not(hipe_ppc:temp_is_precoloured(Temp)). + +%%% +%%% Detect if a Defun's body clobbers LR. +%%% + +clobbers_lr([I|Insns]) -> + case I of + #pseudo_call{} -> true; + %% mtspr to lr cannot occur yet + _ -> clobbers_lr(Insns) + end; +clobbers_lr([]) -> false. + +%%% +%%% Build the set of all temps used in a Defun's body. +%%% + +all_temps(Code, Formals) -> + S0 = find_temps(Code, tset_empty()), + S1 = tset_del_list(S0, Formals), + tset_filter(S1, fun(T) -> temp_is_pseudo(T) end). + +find_temps([I|Insns], S0) -> + S1 = tset_add_list(S0, hipe_ppc_defuse:insn_def_all(I)), + S2 = tset_add_list(S1, hipe_ppc_defuse:insn_use_all(I)), + find_temps(Insns, S2); +find_temps([], S) -> + S. + +tset_empty() -> + gb_sets:new(). + +tset_size(S) -> + gb_sets:size(S). + +tset_insert(S, T) -> + gb_sets:add_element(T, S). + +tset_add_list(S, Ts) -> + gb_sets:union(S, gb_sets:from_list(Ts)). + +tset_del_list(S, Ts) -> + gb_sets:subtract(S, gb_sets:from_list(Ts)). + +tset_filter(S, F) -> + gb_sets:filter(F, S). + +tset_to_list(S) -> + gb_sets:to_list(S). + +%%% +%%% Compute minimum permissible frame size, ignoring spilled temps. +%%% This is done to ensure that we won't have to adjust the frame size +%%% in the middle of a tailcall. +%%% + +defun_minframe(Defun) -> + MaxTailArity = body_mta(hipe_ppc:defun_code(Defun), 0), + MyArity = length(fix_formals(hipe_ppc:defun_formals(Defun))), + erlang:max(MaxTailArity - MyArity, 0). + +body_mta([I|Code], MTA) -> + body_mta(Code, insn_mta(I, MTA)); +body_mta([], MTA) -> + MTA. + +insn_mta(I, MTA) -> + case I of + #pseudo_tailcall{arity=Arity} -> + erlang:max(MTA, Arity - hipe_ppc_registers:nr_args()); + _ -> MTA + end. + +%%% +%%% Ensure that we have enough temps to satisfy the minimum frame size, +%%% if necessary by prepending unused dummy temps. +%%% + +ensure_minframe(MinFrame, Temps) -> + ensure_minframe(MinFrame, tset_size(Temps), Temps). + +ensure_minframe(MinFrame, Frame, Temps) -> + if MinFrame > Frame -> + Temp = hipe_ppc:mk_new_temp('untagged'), + ensure_minframe(MinFrame, Frame+1, tset_insert(Temps, Temp)); + true -> Temps + end. + +word_size() -> + hipe_rtl_arch:word_size(). diff --git a/lib/hipe/ppc/hipe_ppc_liveness_all.erl b/lib/hipe/ppc/hipe_ppc_liveness_all.erl new file mode 100644 index 0000000000..c9234e8100 --- /dev/null +++ b/lib/hipe/ppc/hipe_ppc_liveness_all.erl @@ -0,0 +1,38 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2004-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +-module(hipe_ppc_liveness_all). +-export([analyse/1]). +-export([liveout/2]). + +-include("hipe_ppc.hrl"). +-include("../flow/liveness.inc"). + +analyse(CFG) -> analyze(CFG). +cfg_bb(CFG, L) -> hipe_ppc_cfg:bb(CFG, L). +cfg_postorder(CFG) -> hipe_ppc_cfg:postorder(CFG). +cfg_succ(CFG, L) -> hipe_ppc_cfg:succ(CFG, L). +uses(Insn) -> hipe_ppc_defuse:insn_use_all(Insn). +defines(Insn) -> hipe_ppc_defuse:insn_def_all(Insn). +liveout_no_succ() -> + ordsets:from_list(lists:map(fun({Reg,Type}) -> + hipe_ppc:mk_temp(Reg, Type) + end, + hipe_ppc_registers:live_at_return())). diff --git a/lib/hipe/ppc/hipe_ppc_liveness_fpr.erl b/lib/hipe/ppc/hipe_ppc_liveness_fpr.erl new file mode 100644 index 0000000000..ff9db21e2b --- /dev/null +++ b/lib/hipe/ppc/hipe_ppc_liveness_fpr.erl @@ -0,0 +1,34 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2004-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +-module(hipe_ppc_liveness_fpr). +-export([analyse/1]). +-export([liveout/2]). + +-include("hipe_ppc.hrl"). +-include("../flow/liveness.inc"). + +analyse(CFG) -> analyze(CFG). +cfg_bb(CFG, L) -> hipe_ppc_cfg:bb(CFG, L). +cfg_postorder(CFG) -> hipe_ppc_cfg:postorder(CFG). +cfg_succ(CFG, L) -> hipe_ppc_cfg:succ(CFG, L). +uses(Insn) -> hipe_ppc_defuse:insn_use_fpr(Insn). +defines(Insn) -> hipe_ppc_defuse:insn_def_fpr(Insn). +liveout_no_succ() -> []. diff --git a/lib/hipe/ppc/hipe_ppc_liveness_gpr.erl b/lib/hipe/ppc/hipe_ppc_liveness_gpr.erl new file mode 100644 index 0000000000..a55052b944 --- /dev/null +++ b/lib/hipe/ppc/hipe_ppc_liveness_gpr.erl @@ -0,0 +1,38 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2004-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +-module(hipe_ppc_liveness_gpr). +-export([analyse/1]). +-export([liveout/2]). + +-include("hipe_ppc.hrl"). +-include("../flow/liveness.inc"). + +analyse(CFG) -> analyze(CFG). +cfg_bb(CFG, L) -> hipe_ppc_cfg:bb(CFG, L). +cfg_postorder(CFG) -> hipe_ppc_cfg:postorder(CFG). +cfg_succ(CFG, L) -> hipe_ppc_cfg:succ(CFG, L). +uses(Insn) -> hipe_ppc_defuse:insn_use_gpr(Insn). +defines(Insn) -> hipe_ppc_defuse:insn_def_gpr(Insn). +liveout_no_succ() -> + ordsets:from_list(lists:map(fun({Reg,Type}) -> + hipe_ppc:mk_temp(Reg, Type) + end, + hipe_ppc_registers:live_at_return())). diff --git a/lib/hipe/ppc/hipe_ppc_main.erl b/lib/hipe/ppc/hipe_ppc_main.erl new file mode 100644 index 0000000000..1d84f6db11 --- /dev/null +++ b/lib/hipe/ppc/hipe_ppc_main.erl @@ -0,0 +1,51 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2004-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +-module(hipe_ppc_main). +-export([rtl_to_ppc/3]). + +rtl_to_ppc(MFA, RTL, Options) -> + PPC1 = hipe_rtl_to_ppc:translate(RTL), + PPC2 = hipe_ppc_ra:ra(PPC1, Options), + PPC3 = hipe_ppc_frame:frame(PPC2), + PPC4 = hipe_ppc_finalise:finalise(PPC3), + ppc_pp(PPC4, MFA, Options), + {native, powerpc, {unprofiled, PPC4}}. + +ppc_pp(PPC, MFA, Options) -> + case proplists:get_value(pp_native, Options) of + true -> + hipe_ppc_pp:pp(PPC); + {only,Lst} when is_list(Lst) -> + case lists:member(MFA,Lst) of + true -> + hipe_ppc_pp:pp(PPC); + false -> + ok + end; + {only,MFA} -> + hipe_ppc_pp:pp(PPC); + {file,FileName} -> + {ok, File} = file:open(FileName, [write,append]), + hipe_ppc_pp:pp(File, PPC), + ok = file:close(File); + _ -> + ok + end. diff --git a/lib/hipe/ppc/hipe_ppc_pp.erl b/lib/hipe/ppc/hipe_ppc_pp.erl new file mode 100644 index 0000000000..f88e922808 --- /dev/null +++ b/lib/hipe/ppc/hipe_ppc_pp.erl @@ -0,0 +1,350 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2004-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +-module(hipe_ppc_pp). +-export([pp/1, pp/2, pp_insn/1]). + +-include("hipe_ppc.hrl"). + +pp(Defun) -> + pp(standard_io, Defun). + +pp(Dev, #defun{mfa={M,F,A}, code=Code, data=Data}) -> + Fname = atom_to_list(M)++"_"++atom_to_list(F)++"_"++integer_to_list(A), + io:format(Dev, "\t.text\n", []), + io:format(Dev, "\t.align 4\n", []), + io:format(Dev, "\t.global ~s\n", [Fname]), + io:format(Dev, "~s:\n", [Fname]), + pp_insns(Dev, Code, Fname), + io:format(Dev, "\t.rodata\n", []), + io:format(Dev, "\t.align 4\n", []), + hipe_data_pp:pp(Dev, Data, ppc, Fname), + io:format(Dev, "\n", []). + +pp_insns(Dev, [I|Is], Fname) -> + pp_insn(Dev, I, Fname), + pp_insns(Dev, Is, Fname); +pp_insns(_, [], _) -> + []. + +pp_insn(I) -> + pp_insn(standard_io, I, ""). + +pp_insn(Dev, I, Pre) -> + case I of + #alu{aluop=AluOp, dst=Dst, src1=Src1, src2=Src2} -> + io:format(Dev, "\t~s ", [alu_op_name(AluOp)]), + pp_temp(Dev, Dst), + io:format(Dev, ", ", []), + pp_temp(Dev, Src1), + io:format(Dev, ", ", []), + pp_src(Dev, Src2), + io:format(Dev, "\n", []); + #b_fun{'fun'=Fun, linkage=Linkage} -> + io:format(Dev, "\tb ", []), + pp_fun(Dev, Fun), + io:format(Dev, " # ~w\n", [Linkage]); + #b_label{label=Label} -> + io:format(Dev, "\tb .~s_~w\n", [Pre, Label]); + #bc{bcond=BCond, label=Label, pred=Pred} -> + io:format(Dev, "\tb~w ~s_~w # ~.2f\n", [bcond_name(BCond), Pre, Label, Pred]); + #bctr{labels=Labels} -> + io:format(Dev, "\tbctr", []), + case Labels of + [] -> []; + _ -> + io:format(Dev, " #", []), + pp_labels(Dev, Labels, Pre) + end, + io:format(Dev, "\n", []); + #bctrl{sdesc=SDesc} -> + io:format(Dev, "\tbctrl #", []), + pp_sdesc(Dev, Pre, SDesc), + io:format(Dev, "\n", []); + #bl{'fun'=Fun, sdesc=SDesc, linkage=Linkage} -> + io:format(Dev, "\tbl ", []), + pp_fun(Dev, Fun), + io:format(Dev, " #", []), + pp_sdesc(Dev, Pre, SDesc), + io:format(Dev, " ~w\n", [Linkage]); + #blr{} -> + io:format(Dev, "\tblr\n", []); + #comment{term=Term} -> + io:format(Dev, "\t# ~p\n", [Term]); + #cmp{cmpop=CmpOp, src1=Src1, src2=Src2} -> + io:format(Dev, "\t~s ", [cmp_op_name(CmpOp)]), + pp_temp(Dev, Src1), + io:format(Dev, ", ", []), + pp_src(Dev, Src2), + io:format(Dev, "\n", []); + #label{label=Label} -> + io:format(Dev, ".~s_~w:~n", [Pre, Label]); + #load{ldop=LdOp, dst=Dst, disp=Disp, base=Base} -> + io:format(Dev, "\t~w ", [ldop_name(LdOp)]), + pp_temp(Dev, Dst), + io:format(Dev, ", ~s(", [to_hex(Disp)]), + pp_temp(Dev, Base), + io:format(Dev, ")\n", []); + #loadx{ldxop=LdxOp, dst=Dst, base1=Base1, base2=Base2} -> + io:format(Dev, "\t~w ", [ldxop_name(LdxOp)]), + pp_temp(Dev, Dst), + io:format(Dev, ", ", []), + pp_temp(Dev, Base1), + io:format(Dev, ", ", []), + pp_temp(Dev, Base2), + io:format(Dev, "\n", []); + #mfspr{dst=Dst, spr=SPR} -> + io:format(Dev, "\tmf~w ", [spr_name(SPR)]), + pp_temp(Dev, Dst), + io:format(Dev, "\n", []); + #mtcr{src=Src} -> + io:format(Dev, "\tmtcrf 0x80, ", []), + pp_temp(Dev, Src), + io:format(Dev, "\n", []); + #mtspr{spr=SPR, src=Src} -> + io:format(Dev, "\tmt~w ", [spr_name(SPR)]), + pp_temp(Dev, Src), + io:format(Dev, "\n", []); + #pseudo_bc{bcond=BCond, true_label=TrueLab, false_label=FalseLab, pred=Pred} -> + io:format(Dev, "\tpseudo_bc ~w, .~s_~w # .~s_~w ~.2f\n", + [bcond_name(BCond), Pre, TrueLab, Pre, FalseLab, Pred]); + #pseudo_call{func=FunC, sdesc=SDesc, contlab=ContLab, linkage=Linkage} -> + io:format(Dev, "\tpseudo_call ", []), + pp_func(Dev, FunC), + io:format(Dev, " # contlab .~s_~w", [Pre, ContLab]), + pp_sdesc(Dev, Pre, SDesc), + io:format(Dev, " ~w\n", [Linkage]); + #pseudo_call_prepare{nrstkargs=NrStkArgs} -> + SP = hipe_ppc_registers:reg_name_gpr(hipe_ppc_registers:stack_pointer()), + io:format(Dev, "\taddi ~s, ~s, ~w # pseudo_call_prepare\n", + [SP, SP, -(4*NrStkArgs)]); + #pseudo_li{dst=Dst, imm=Imm} -> + io:format(Dev, "\tpseudo_li ", []), + pp_temp(Dev, Dst), + io:format(Dev, ", ", []), + pp_imm(Dev, Imm), + io:format(Dev, "\n", []); + #pseudo_move{dst=Dst, src=Src} -> + io:format(Dev, "\tpseudo_move ", []), + pp_temp(Dev, Dst), + io:format(Dev, ", ", []), + pp_temp(Dev, Src), + io:format(Dev, "\n", []); + #pseudo_tailcall{func=FunC, arity=Arity, stkargs=StkArgs, linkage=Linkage} -> + io:format(Dev, "\tpseudo_tailcall ", []), + pp_func(Dev, FunC), + io:format(Dev, "/~w (", [Arity]), + pp_args(Dev, StkArgs), + io:format(Dev, ") ~w\n", [Linkage]); + #pseudo_tailcall_prepare{} -> + io:format(Dev, "\tpseudo_tailcall_prepare\n", []); + #store{stop=StOp, src=Src, disp=Disp, base=Base} -> + io:format(Dev, "\t~s ", [stop_name(StOp)]), + pp_temp(Dev, Src), + io:format(Dev, ", ~s(", [to_hex(Disp)]), + pp_temp(Dev, Base), + io:format(Dev, ")\n", []); + #storex{stxop=StxOp, src=Src, base1=Base1, base2=Base2} -> + io:format(Dev, "\t~s ", [stxop_name(StxOp)]), + pp_temp(Dev, Src), + io:format(Dev, ", ", []), + pp_temp(Dev, Base1), + io:format(Dev, ", ", []), + pp_temp(Dev, Base2), + io:format(Dev, "\n", []); + #unary{unop=UnOp, dst=Dst, src=Src} -> + io:format(Dev, "\t~w ", [unop_name(UnOp)]), + pp_temp(Dev, Dst), + io:format(Dev, ", ", []), + pp_temp(Dev, Src), + io:format(Dev, "\n", []); + #lfd{dst=Dst, disp=Disp, base=Base} -> + io:format(Dev, "\tlfd ", []), + pp_temp(Dev, Dst), + io:format(Dev, ", ~s(", [to_hex(Disp)]), + pp_temp(Dev, Base), + io:format(Dev, ")\n", []); + #lfdx{dst=Dst, base1=Base1, base2=Base2} -> + io:format(Dev, "\tlfdx ", []), + pp_temp(Dev, Dst), + io:format(Dev, ", ", []), + pp_temp(Dev, Base1), + io:format(Dev, ", ", []), + pp_temp(Dev, Base2), + io:format(Dev, "\n", []); + #stfd{src=Src, disp=Disp, base=Base} -> + io:format(Dev, "\tstfd ", []), + pp_temp(Dev, Src), + io:format(Dev, ", ~s(", [to_hex(Disp)]), + pp_temp(Dev, Base), + io:format(Dev, ")\n", []); + #stfdx{src=Src, base1=Base1, base2=Base2} -> + io:format(Dev, "\tstfdx ", []), + pp_temp(Dev, Src), + io:format(Dev, ", ", []), + pp_temp(Dev, Base1), + io:format(Dev, ", ", []), + pp_temp(Dev, Base2), + io:format(Dev, "\n", []); + #fp_binary{fp_binop=FpBinOp, dst=Dst, src1=Src1, src2=Src2} -> + io:format(Dev, "\t~s ", [FpBinOp]), + pp_temp(Dev, Dst), + io:format(Dev, ", ", []), + pp_temp(Dev, Src1), + io:format(Dev, ", ", []), + pp_temp(Dev, Src2), + io:format(Dev, "\n", []); + #fp_unary{fp_unop=FpUnOp, dst=Dst, src=Src} -> + io:format(Dev, "\t~s ", [FpUnOp]), + pp_temp(Dev, Dst), + io:format(Dev, ", ", []), + pp_temp(Dev, Src), + io:format(Dev, "\n", []); + #pseudo_fmove{dst=Dst, src=Src} -> + io:format(Dev, "\tpseudo_fmove ", []), + pp_temp(Dev, Dst), + io:format(Dev, ", ", []), + pp_temp(Dev, Src), + io:format(Dev, "\n", []); + _ -> + exit({?MODULE, pp_insn, I}) + end. + +to_hex(N) -> + io_lib:format("~.16x", [N, "0x"]). + +pp_sdesc(Dev, Pre, #ppc_sdesc{exnlab=ExnLab,fsize=FSize,arity=Arity,live=Live}) -> + pp_sdesc_exnlab(Dev, Pre, ExnLab), + io:format(Dev, " ~s ~w [", [to_hex(FSize), Arity]), + pp_sdesc_live(Dev, Live), + io:format(Dev, "]", []). + +pp_sdesc_exnlab(Dev, _, []) -> io:format(Dev, " []", []); +pp_sdesc_exnlab(Dev, Pre, ExnLab) -> io:format(Dev, " .~s_~w", [Pre, ExnLab]). + +pp_sdesc_live(_, {}) -> []; +pp_sdesc_live(Dev, Live) -> pp_sdesc_live(Dev, Live, 1). + +pp_sdesc_live(Dev, Live, I) -> + io:format(Dev, "~s", [to_hex(element(I, Live))]), + if I < tuple_size(Live) -> + io:format(Dev, ",", []), + pp_sdesc_live(Dev, Live, I+1); + true -> [] + end. + +pp_labels(Dev, [Label|Labels], Pre) -> + io:format(Dev, " .~s_~w", [Pre, Label]), + pp_labels(Dev, Labels, Pre); +pp_labels(_, [], _) -> + []. + +pp_fun(Dev, Fun) -> + case Fun of + #ppc_mfa{m=M, f=F, a=A} -> + io:format(Dev, "~w:~w/~w", [M, F, A]); + #ppc_prim{prim=Prim} -> + io:format(Dev, "~w", [Prim]) + end. + +pp_func(Dev, FunC) -> + case FunC of + 'ctr' -> + io:format(Dev, "ctr", []); + Fun -> + pp_fun(Dev, Fun) + end. + +alu_op_name(Op) -> Op. + +bcond_name(BCond) -> BCond. + +cmp_op_name(Op) -> Op. + +spr_name(SPR) -> SPR. + +ldop_name(LdOp) -> LdOp. + +ldxop_name(LdxOp) -> LdxOp. + +stop_name(StOp) -> StOp. + +stxop_name(StxOp) -> StxOp. + +unop_name(UnOp) -> UnOp. + +pp_temp(Dev, Temp=#ppc_temp{reg=Reg, type=Type}) -> + case hipe_ppc:temp_is_precoloured(Temp) of + true -> + Name = + case Type of + 'double' -> hipe_ppc_registers:reg_name_fpr(Reg); + _ -> hipe_ppc_registers:reg_name_gpr(Reg) + end, + io:format(Dev, "~s", [Name]); + false -> + Tag = + case Type of + double -> "f"; + tagged -> "t"; + untagged -> "u" + end, + io:format(Dev, "~s~w", [Tag, Reg]) + end. + +pp_hex(Dev, Value) -> io:format(Dev, "~s", [to_hex(Value)]). +pp_simm16(Dev, #ppc_simm16{value=Value}) -> pp_hex(Dev, Value). +pp_uimm16(Dev, #ppc_uimm16{value=Value}) -> pp_hex(Dev, Value). + +pp_imm(Dev, Value) -> + if is_integer(Value) -> pp_hex(Dev, Value); + true -> io:format(Dev, "~w", [Value]) + end. + +pp_src(Dev, Src) -> + case Src of + #ppc_temp{} -> + pp_temp(Dev, Src); + #ppc_simm16{} -> + pp_simm16(Dev, Src); + #ppc_uimm16{} -> + pp_uimm16(Dev, Src) + end. + +pp_arg(Dev, Arg) -> + case Arg of + #ppc_temp{} -> + pp_temp(Dev, Arg); + _ -> + pp_hex(Dev, Arg) + end. + +pp_args(Dev, [A|As]) -> + pp_arg(Dev, A), + pp_comma_args(Dev, As); +pp_args(_, []) -> + []. + +pp_comma_args(Dev, [A|As]) -> + io:format(Dev, ", ", []), + pp_arg(Dev, A), + pp_comma_args(Dev, As); +pp_comma_args(_, []) -> + []. diff --git a/lib/hipe/ppc/hipe_ppc_ra.erl b/lib/hipe/ppc/hipe_ppc_ra.erl new file mode 100644 index 0000000000..3de7f48de1 --- /dev/null +++ b/lib/hipe/ppc/hipe_ppc_ra.erl @@ -0,0 +1,56 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2004-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +-module(hipe_ppc_ra). +-export([ra/2]). + +ra(Defun0, Options) -> + %% hipe_ppc_pp:pp(Defun0), + {Defun1, Coloring_fp, SpillIndex} + = case proplists:get_bool(inline_fp, Options) of + true -> + hipe_regalloc_loop:ra_fp(Defun0, Options, + hipe_coalescing_regalloc, + hipe_ppc_specific_fp); + false -> + {Defun0,[],0} + end, + %% hipe_ppc_pp:pp(Defun1), + {Defun2, Coloring} + = case proplists:get_value(regalloc, Options, coalescing) of + coalescing -> + ra(Defun1, SpillIndex, Options, hipe_coalescing_regalloc); + optimistic -> + ra(Defun1, SpillIndex, Options, hipe_optimistic_regalloc); + graph_color -> + ra(Defun1, SpillIndex, Options, hipe_graph_coloring_regalloc); + linear_scan -> + hipe_ppc_ra_ls:ra(Defun1, SpillIndex, Options); + naive -> + hipe_ppc_ra_naive:ra(Defun1, Coloring_fp, Options); + _ -> + exit({unknown_regalloc_compiler_option, + proplists:get_value(regalloc,Options)}) + end, + %% hipe_ppc_pp:pp(Defun2), + hipe_ppc_ra_finalise:finalise(Defun2, Coloring, Coloring_fp). + +ra(Defun, SpillIndex, Options, RegAllocMod) -> + hipe_regalloc_loop:ra(Defun, SpillIndex, Options, RegAllocMod, hipe_ppc_specific). diff --git a/lib/hipe/ppc/hipe_ppc_ra_finalise.erl b/lib/hipe/ppc/hipe_ppc_ra_finalise.erl new file mode 100644 index 0000000000..53f8b739c2 --- /dev/null +++ b/lib/hipe/ppc/hipe_ppc_ra_finalise.erl @@ -0,0 +1,271 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2004-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +-module(hipe_ppc_ra_finalise). +-export([finalise/3]). +-include("hipe_ppc.hrl"). + +finalise(Defun, TempMap, FPMap0) -> + Code = hipe_ppc:defun_code(Defun), + {_, SpillLimit} = hipe_ppc:defun_var_range(Defun), + Map = mk_ra_map(TempMap, SpillLimit), + FPMap1 = mk_ra_map_fp(FPMap0, SpillLimit), + NewCode = ra_code(Code, Map, FPMap1, []), + Defun#defun{code=NewCode}. + +ra_code([I|Insns], Map, FPMap, Accum) -> + ra_code(Insns, Map, FPMap, [ra_insn(I, Map, FPMap) | Accum]); +ra_code([], _Map, _FPMap, Accum) -> + lists:reverse(Accum). + +ra_insn(I, Map, FPMap) -> + case I of + #alu{} -> ra_alu(I, Map); + #cmp{} -> ra_cmp(I, Map); + #load{} -> ra_load(I, Map); + #loadx{} -> ra_loadx(I, Map); + #mfspr{} -> ra_mfspr(I, Map); + #mtcr{} -> ra_mtcr(I, Map); + #mtspr{} -> ra_mtspr(I, Map); + #pseudo_li{} -> ra_pseudo_li(I, Map); + #pseudo_move{} -> ra_pseudo_move(I, Map); + #pseudo_tailcall{} -> ra_pseudo_tailcall(I, Map); + #store{} -> ra_store(I, Map); + #storex{} -> ra_storex(I, Map); + #unary{} -> ra_unary(I, Map); + #lfd{} -> ra_lfd(I, Map, FPMap); + #lfdx{} -> ra_lfdx(I, Map, FPMap); + #stfd{} -> ra_stfd(I, Map, FPMap); + #stfdx{} -> ra_stfdx(I, Map, FPMap); + #fp_binary{} -> ra_fp_binary(I, FPMap); + #fp_unary{} -> ra_fp_unary(I, FPMap); + #pseudo_fmove{} -> ra_pseudo_fmove(I, FPMap); + _ -> I + end. + +ra_alu(I=#alu{dst=Dst,src1=Src1,src2=Src2}, Map) -> + NewDst = ra_temp(Dst, Map), + NewSrc1 = ra_temp(Src1, Map), + NewSrc2 = ra_temp_or_imm(Src2, Map), + I#alu{dst=NewDst,src1=NewSrc1,src2=NewSrc2}. + +ra_cmp(I=#cmp{src1=Src1,src2=Src2}, Map) -> + NewSrc1 = ra_temp(Src1, Map), + NewSrc2 = ra_temp_or_imm(Src2, Map), + I#cmp{src1=NewSrc1,src2=NewSrc2}. + +ra_load(I=#load{dst=Dst,base=Base}, Map) -> + NewDst = ra_temp(Dst, Map), + NewBase = ra_temp(Base, Map), + I#load{dst=NewDst,base=NewBase}. + +ra_loadx(I=#loadx{dst=Dst,base1=Base1,base2=Base2}, Map) -> + NewDst = ra_temp(Dst, Map), + NewBase1 = ra_temp(Base1, Map), + NewBase2 = ra_temp(Base2, Map), + I#loadx{dst=NewDst,base1=NewBase1,base2=NewBase2}. + +ra_mfspr(I=#mfspr{dst=Dst}, Map) -> + NewDst = ra_temp(Dst, Map), + I#mfspr{dst=NewDst}. + +ra_mtcr(I=#mtcr{src=Src}, Map) -> + NewSrc = ra_temp(Src, Map), + I#mtcr{src=NewSrc}. + +ra_mtspr(I=#mtspr{src=Src}, Map) -> + NewSrc = ra_temp(Src, Map), + I#mtspr{src=NewSrc}. + +ra_pseudo_li(I=#pseudo_li{dst=Dst}, Map) -> + NewDst = ra_temp(Dst, Map), + I#pseudo_li{dst=NewDst}. + +ra_pseudo_move(I=#pseudo_move{dst=Dst,src=Src}, Map) -> + NewDst = ra_temp(Dst, Map), + NewSrc = ra_temp(Src, Map), + I#pseudo_move{dst=NewDst,src=NewSrc}. + +ra_pseudo_tailcall(I=#pseudo_tailcall{stkargs=StkArgs}, Map) -> + NewStkArgs = ra_args(StkArgs, Map), + I#pseudo_tailcall{stkargs=NewStkArgs}. + +ra_store(I=#store{src=Src,base=Base}, Map) -> + NewSrc = ra_temp(Src, Map), + NewBase = ra_temp(Base, Map), + I#store{src=NewSrc,base=NewBase}. + +ra_storex(I=#storex{src=Src,base1=Base1,base2=Base2}, Map) -> + NewSrc = ra_temp(Src, Map), + NewBase1 = ra_temp(Base1, Map), + NewBase2 = ra_temp(Base2, Map), + I#storex{src=NewSrc,base1=NewBase1,base2=NewBase2}. + +ra_unary(I=#unary{dst=Dst,src=Src}, Map) -> + NewDst = ra_temp(Dst, Map), + NewSrc = ra_temp(Src, Map), + I#unary{dst=NewDst,src=NewSrc}. + +ra_lfd(I=#lfd{dst=Dst,base=Base}, Map, FPMap) -> + NewDst = ra_temp_fp(Dst, FPMap), + NewBase = ra_temp(Base, Map), + I#lfd{dst=NewDst,base=NewBase}. + +ra_lfdx(I=#lfdx{dst=Dst,base1=Base1,base2=Base2}, Map, FPMap) -> + NewDst = ra_temp_fp(Dst, FPMap), + NewBase1 = ra_temp(Base1, Map), + NewBase2 = ra_temp(Base2, Map), + I#lfdx{dst=NewDst,base1=NewBase1,base2=NewBase2}. + +ra_stfd(I=#stfd{src=Src,base=Base}, Map, FPMap) -> + NewSrc = ra_temp_fp(Src, FPMap), + NewBase = ra_temp(Base, Map), + I#stfd{src=NewSrc,base=NewBase}. + +ra_stfdx(I=#stfdx{src=Src,base1=Base1,base2=Base2}, Map, FPMap) -> + NewSrc = ra_temp_fp(Src, FPMap), + NewBase1 = ra_temp(Base1, Map), + NewBase2 = ra_temp(Base2, Map), + I#stfdx{src=NewSrc,base1=NewBase1,base2=NewBase2}. + +ra_fp_binary(I=#fp_binary{dst=Dst,src1=Src1,src2=Src2}, FPMap) -> + NewDst = ra_temp_fp(Dst, FPMap), + NewSrc1 = ra_temp_fp(Src1, FPMap), + NewSrc2 = ra_temp_fp(Src2, FPMap), + I#fp_binary{dst=NewDst,src1=NewSrc1,src2=NewSrc2}. + +ra_fp_unary(I=#fp_unary{dst=Dst,src=Src}, FPMap) -> + NewDst = ra_temp_fp(Dst, FPMap), + NewSrc = ra_temp_fp(Src, FPMap), + I#fp_unary{dst=NewDst,src=NewSrc}. + +ra_pseudo_fmove(I=#pseudo_fmove{dst=Dst,src=Src}, FPMap) -> + NewDst = ra_temp_fp(Dst, FPMap), + NewSrc = ra_temp_fp(Src, FPMap), + I#pseudo_fmove{dst=NewDst,src=NewSrc}. + +ra_args([Arg|Args], Map) -> + [ra_temp_or_imm(Arg, Map) | ra_args(Args, Map)]; +ra_args([], _) -> + []. + +ra_temp_or_imm(Arg, Map) -> + case hipe_ppc:is_temp(Arg) of + true -> + ra_temp(Arg, Map); + false -> + Arg + end. + +ra_temp_fp(Temp, FPMap) -> + Reg = hipe_ppc:temp_reg(Temp), + case hipe_ppc:temp_type(Temp) of + 'double' -> + case hipe_ppc_registers:is_precoloured_fpr(Reg) of + true -> Temp; + _ -> ra_temp_common(Reg, Temp, FPMap) + end + end. + +ra_temp(Temp, Map) -> + Reg = hipe_ppc:temp_reg(Temp), + case hipe_ppc:temp_type(Temp) of + 'double' -> + exit({?MODULE,ra_temp,Temp}); + _ -> + case hipe_ppc_registers:is_precoloured_gpr(Reg) of + true -> Temp; + _ -> ra_temp_common(Reg, Temp, Map) + end + end. + +ra_temp_common(Reg, Temp, Map) -> + case gb_trees:lookup(Reg, Map) of + {value,NewReg} -> Temp#ppc_temp{reg=NewReg}; + _ -> Temp + end. + +mk_ra_map(TempMap, SpillLimit) -> + %% Build a partial map from pseudo to reg or spill. + %% Spills are represented as pseudos with indices above SpillLimit. + %% (I'd prefer to use negative indices, but that breaks + %% hipe_ppc_registers:is_precoloured/1.) + %% The frame mapping proper is unchanged, since spills look just like + %% ordinary (un-allocated) pseudos. + lists:foldl(fun(MapLet, Map) -> + {Key,Val} = conv_ra_maplet(MapLet, SpillLimit, is_precoloured_gpr), + gb_trees:insert(Key, Val, Map) + end, + gb_trees:empty(), + TempMap). + +conv_ra_maplet(MapLet = {From,To}, SpillLimit, IsPrecoloured) -> + %% From should be a pseudo, or a hard reg mapped to itself. + if is_integer(From), From =< SpillLimit -> + case hipe_ppc_registers:IsPrecoloured(From) of + false -> []; + _ -> + case To of + {reg, From} -> []; + _ -> exit({?MODULE,conv_ra_maplet,MapLet}) + end + end; + true -> exit({?MODULE,conv_ra_maplet,MapLet}) + end, + %% end of From check + case To of + {reg, NewReg} -> + %% NewReg should be a hard reg, or a pseudo mapped + %% to itself (formals are handled this way). + if is_integer(NewReg) -> + case hipe_ppc_registers:IsPrecoloured(NewReg) of + true -> []; + _ -> if From =:= NewReg -> []; + true -> + exit({?MODULE,conv_ra_maplet,MapLet}) + end + end; + true -> exit({?MODULE,conv_ra_maplet,MapLet}) + end, + %% end of NewReg check + {From, NewReg}; + {spill, SpillIndex} -> + %% SpillIndex should be >= 0. + if is_integer(SpillIndex), SpillIndex >= 0 -> []; + true -> exit({?MODULE,conv_ra_maplet,MapLet}) + end, + %% end of SpillIndex check + ToTempNum = SpillLimit+SpillIndex+1, + MaxTempNum = hipe_gensym:get_var(ppc), + if MaxTempNum >= ToTempNum -> ok; + true -> hipe_gensym:set_var(ppc, ToTempNum) + end, + {From, ToTempNum}; + _ -> exit({?MODULE,conv_ra_maplet,MapLet}) + end. + +mk_ra_map_fp(FPMap, SpillLimit) -> + lists:foldl(fun(MapLet, Map) -> + {Key,Val} = conv_ra_maplet(MapLet, SpillLimit, + is_precoloured_fpr), + gb_trees:insert(Key, Val, Map) + end, + gb_trees:empty(), + FPMap). diff --git a/lib/hipe/ppc/hipe_ppc_ra_ls.erl b/lib/hipe/ppc/hipe_ppc_ra_ls.erl new file mode 100644 index 0000000000..0b5d915ee8 --- /dev/null +++ b/lib/hipe/ppc/hipe_ppc_ra_ls.erl @@ -0,0 +1,56 @@ +%%% -*- erlang-indent-level: 2 -*- +%%% +%%% %CopyrightBegin% +%%% +%%% Copyright Ericsson AB 2005-2009. All Rights Reserved. +%%% +%%% The contents of this file are subject to the Erlang Public License, +%%% Version 1.1, (the "License"); you may not use this file except in +%%% compliance with the License. You should have received a copy of the +%%% Erlang Public License along with this software. If not, it can be +%%% retrieved online at http://www.erlang.org/. +%%% +%%% Software distributed under the License is distributed on an "AS IS" +%%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%%% the License for the specific language governing rights and limitations +%%% under the License. +%%% +%%% %CopyrightEnd% +%%% +%%% Linear Scan register allocator for PowerPC + +-module(hipe_ppc_ra_ls). +-export([ra/3]). + +ra(Defun, SpillIndex, Options) -> + NewDefun = Defun, %% hipe_${ARCH}_ra_rename:rename(Defun,Options), + CFG = hipe_ppc_cfg:init(NewDefun), + SpillLimit = hipe_ppc_specific:number_of_temporaries(CFG), + alloc(NewDefun, SpillIndex, SpillLimit, Options). + +alloc(Defun, SpillIndex, SpillLimit, Options) -> + CFG = hipe_ppc_cfg:init(Defun), + {Coloring, _NewSpillIndex} = + regalloc( + CFG, + hipe_ppc_registers:allocatable_gpr()-- + [hipe_ppc_registers:temp3(), + hipe_ppc_registers:temp2(), + hipe_ppc_registers:temp1()], + [hipe_ppc_cfg:start_label(CFG)], + SpillIndex, SpillLimit, Options, + hipe_ppc_specific), + {NewDefun, _DidSpill} = + hipe_ppc_ra_postconditions:check_and_rewrite( + Defun, Coloring, 'linearscan'), + TempMap = hipe_temp_map:cols2tuple(Coloring, hipe_ppc_specific), + {TempMap2,_NewSpillIndex2} = + hipe_spillmin:stackalloc(CFG, [], SpillIndex, Options, + hipe_ppc_specific, TempMap), + Coloring2 = + hipe_spillmin:mapmerge(hipe_temp_map:to_substlist(TempMap), TempMap2), + {NewDefun, Coloring2}. + +regalloc(CFG, PhysRegs, Entrypoints, SpillIndex, DontSpill, Options, Target) -> + hipe_ls_regalloc:regalloc( + CFG, PhysRegs, Entrypoints, SpillIndex, DontSpill, Options, Target). diff --git a/lib/hipe/ppc/hipe_ppc_ra_naive.erl b/lib/hipe/ppc/hipe_ppc_ra_naive.erl new file mode 100644 index 0000000000..f0ca41b49e --- /dev/null +++ b/lib/hipe/ppc/hipe_ppc_ra_naive.erl @@ -0,0 +1,29 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2005-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +-module(hipe_ppc_ra_naive). +-export([ra/3]). + +-include("hipe_ppc.hrl"). + +ra(Defun, _Coloring_fp, _Options) -> % -> {Defun, Coloring} + {NewDefun,_DidSpill} = + hipe_ppc_ra_postconditions:check_and_rewrite2(Defun, [], 'naive'), + {NewDefun, []}. diff --git a/lib/hipe/ppc/hipe_ppc_ra_postconditions.erl b/lib/hipe/ppc/hipe_ppc_ra_postconditions.erl new file mode 100644 index 0000000000..142bce39cc --- /dev/null +++ b/lib/hipe/ppc/hipe_ppc_ra_postconditions.erl @@ -0,0 +1,243 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2004-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +-module(hipe_ppc_ra_postconditions). + +-export([check_and_rewrite/3, check_and_rewrite2/3]). + +-include("hipe_ppc.hrl"). + +check_and_rewrite(Defun, Coloring, Allocator) -> + TempMap = hipe_temp_map:cols2tuple(Coloring, hipe_ppc_specific), + check_and_rewrite2(Defun, TempMap, Allocator). + +check_and_rewrite2(Defun, TempMap, Allocator) -> + Strategy = strategy(Allocator), + #defun{code=Code0} = Defun, + {Code1,DidSpill} = do_insns(Code0, TempMap, Strategy, [], false), + VarRange = {0, hipe_gensym:get_var(ppc)}, + {Defun#defun{code=Code1, var_range=VarRange}, + DidSpill}. + +strategy(Allocator) -> + case Allocator of + 'normal' -> 'new'; + 'linearscan' -> 'fixed'; + 'naive' -> 'fixed' + end. + +do_insns([I|Insns], TempMap, Strategy, Accum, DidSpill0) -> + {NewIs, DidSpill1} = do_insn(I, TempMap, Strategy), + do_insns(Insns, TempMap, Strategy, lists:reverse(NewIs, Accum), DidSpill0 or DidSpill1); +do_insns([], _TempMap, _Strategy, Accum, DidSpill) -> + {lists:reverse(Accum), DidSpill}. + +do_insn(I, TempMap, Strategy) -> + case I of + #alu{} -> do_alu(I, TempMap, Strategy); + #cmp{} -> do_cmp(I, TempMap, Strategy); + #load{} -> do_load(I, TempMap, Strategy); + #loadx{} -> do_loadx(I, TempMap, Strategy); + #mfspr{} -> do_mfspr(I, TempMap, Strategy); + #mtcr{} -> do_mtcr(I, TempMap, Strategy); + #mtspr{} -> do_mtspr(I, TempMap, Strategy); + #pseudo_li{} -> do_pseudo_li(I, TempMap, Strategy); + #pseudo_move{} -> do_pseudo_move(I, TempMap, Strategy); + #store{} -> do_store(I, TempMap, Strategy); + #storex{} -> do_storex(I, TempMap, Strategy); + #unary{} -> do_unary(I, TempMap, Strategy); + #lfd{} -> do_lfd(I, TempMap, Strategy); + #lfdx{} -> do_lfdx(I, TempMap, Strategy); + #stfd{} -> do_stfd(I, TempMap, Strategy); + #stfdx{} -> do_stfdx(I, TempMap, Strategy); + _ -> {[I], false} + end. + +%%% Fix relevant instruction types. + +do_alu(I=#alu{dst=Dst,src1=Src1,src2=Src2}, TempMap, Strategy) -> + {FixDst,NewDst,DidSpill1} = fix_dst(Dst, TempMap, Strategy), + {FixSrc1,NewSrc1,DidSpill2} = fix_src1(Src1, TempMap, Strategy), + {FixSrc2,NewSrc2,DidSpill3} = fix_src2_or_imm(Src2, TempMap, Strategy), + NewI = I#alu{dst=NewDst,src1=NewSrc1,src2=NewSrc2}, + {FixSrc1 ++ FixSrc2 ++ [NewI | FixDst], DidSpill1 or DidSpill2 or DidSpill3}. + +do_cmp(I=#cmp{src1=Src1,src2=Src2}, TempMap, Strategy) -> + {FixSrc1,NewSrc1,DidSpill1} = fix_src1(Src1, TempMap, Strategy), + {FixSrc2,NewSrc2,DidSpill2} = fix_src2_or_imm(Src2, TempMap, Strategy), + NewI = I#cmp{src1=NewSrc1,src2=NewSrc2}, + {FixSrc1 ++ FixSrc2 ++ [NewI], DidSpill1 or DidSpill2}. + +do_load(I=#load{dst=Dst,base=Base}, TempMap, Strategy) -> + {FixDst,NewDst,DidSpill1} = fix_dst(Dst, TempMap, Strategy), + {FixBase,NewBase,DidSpill2} = fix_src1(Base, TempMap, Strategy), + NewI = I#load{dst=NewDst,base=NewBase}, + {FixBase ++ [NewI | FixDst], DidSpill1 or DidSpill2}. + +do_loadx(I=#loadx{dst=Dst,base1=Base1,base2=Base2}, TempMap, Strategy) -> + {FixDst,NewDst,DidSpill1} = fix_dst(Dst, TempMap, Strategy), + {FixBase1,NewBase1,DidSpill2} = fix_src1(Base1, TempMap, Strategy), + {FixBase2,NewBase2,DidSpill3} = fix_src2(Base2, TempMap, Strategy), + NewI = I#loadx{dst=NewDst,base1=NewBase1,base2=NewBase2}, + {FixBase1 ++ FixBase2 ++ [NewI | FixDst], DidSpill1 or DidSpill2 or DidSpill3}. + +do_mfspr(I=#mfspr{dst=Dst}, TempMap, Strategy) -> + {FixDst,NewDst,DidSpill} = fix_dst(Dst, TempMap, Strategy), + NewI = I#mfspr{dst=NewDst}, + {[NewI | FixDst], DidSpill}. + +do_mtcr(I=#mtcr{src=Src}, TempMap, Strategy) -> + {FixSrc,NewSrc,DidSpill} = fix_src1(Src, TempMap, Strategy), + NewI = I#mtcr{src=NewSrc}, + {FixSrc ++ [NewI], DidSpill}. + +do_mtspr(I=#mtspr{src=Src}, TempMap, Strategy) -> + {FixSrc,NewSrc,DidSpill} = fix_src1(Src, TempMap, Strategy), + NewI = I#mtspr{src=NewSrc}, + {FixSrc ++ [NewI], DidSpill}. + +do_pseudo_li(I=#pseudo_li{dst=Dst}, TempMap, Strategy) -> + {FixDst,NewDst,DidSpill} = fix_dst(Dst, TempMap, Strategy), + NewI = I#pseudo_li{dst=NewDst}, + {[NewI | FixDst], DidSpill}. + +do_pseudo_move(I=#pseudo_move{dst=Dst,src=Src}, TempMap, Strategy) -> + %% Either Dst or Src (but not both) may be a pseudo temp. + %% pseudo_move and pseudo_tailcall are special cases: in + %% all other instructions, all temps must be non-pseudos + %% after register allocation. + case temp_is_spilled(Dst, TempMap) of + true -> % Src must not be a pseudo + {FixSrc,NewSrc,DidSpill} = fix_src1(Src, TempMap, Strategy), + NewI = I#pseudo_move{src=NewSrc}, + {FixSrc ++ [NewI], DidSpill}; + _ -> + {[I], false} + end. + +do_store(I=#store{src=Src,base=Base}, TempMap, Strategy) -> + {FixSrc,NewSrc,DidSpill1} = fix_src1(Src, TempMap, Strategy), + {FixBase,NewBase,DidSpill2} = fix_src2(Base, TempMap, Strategy), + NewI = I#store{src=NewSrc,base=NewBase}, + {FixSrc ++ FixBase ++ [NewI], DidSpill1 or DidSpill2}. + +do_storex(I=#storex{src=Src,base1=Base1,base2=Base2}, TempMap, Strategy) -> + {FixSrc,NewSrc,DidSpill1} = fix_src1(Src, TempMap, Strategy), + {FixBase1,NewBase1,DidSpill2} = fix_src2(Base1, TempMap, Strategy), + {FixBase2,NewBase2,DidSpill3} = fix_src3(Base2, TempMap, Strategy), + NewI = I#storex{src=NewSrc,base1=NewBase1,base2=NewBase2}, + {FixSrc ++ FixBase1 ++ FixBase2 ++ [NewI], DidSpill1 or DidSpill2 or DidSpill3}. + +do_unary(I=#unary{dst=Dst,src=Src}, TempMap, Strategy) -> + {FixDst,NewDst,DidSpill1} = fix_dst(Dst, TempMap, Strategy), + {FixSrc,NewSrc,DidSpill2} = fix_src1(Src, TempMap, Strategy), + NewI = I#unary{dst=NewDst,src=NewSrc}, + {FixSrc ++ [NewI | FixDst], DidSpill1 or DidSpill2}. + +do_lfd(I=#lfd{base=Base}, TempMap, Strategy) -> + {FixBase,NewBase,DidSpill} = fix_src1(Base, TempMap, Strategy), + NewI = I#lfd{base=NewBase}, + {FixBase ++ [NewI], DidSpill}. + +do_lfdx(I=#lfdx{base1=Base1,base2=Base2}, TempMap, Strategy) -> + {FixBase1,NewBase1,DidSpill1} = fix_src1(Base1, TempMap, Strategy), + {FixBase2,NewBase2,DidSpill2} = fix_src2(Base2, TempMap, Strategy), + NewI = I#lfdx{base1=NewBase1,base2=NewBase2}, + {FixBase1 ++ FixBase2 ++ [NewI], DidSpill1 or DidSpill2}. + +do_stfd(I=#stfd{base=Base}, TempMap, Strategy) -> + {FixBase,NewBase,DidSpill} = fix_src1(Base, TempMap, Strategy), + NewI = I#stfd{base=NewBase}, + {FixBase ++ [NewI], DidSpill}. + +do_stfdx(I=#stfdx{base1=Base1,base2=Base2}, TempMap, Strategy) -> + {FixBase1,NewBase1,DidSpill1} = fix_src1(Base1, TempMap, Strategy), + {FixBase2,NewBase2,DidSpill2} = fix_src2(Base2, TempMap, Strategy), + NewI = I#stfdx{base1=NewBase1,base2=NewBase2}, + {FixBase1 ++ FixBase2 ++ [NewI], DidSpill1 or DidSpill2}. + +%%% Fix Dst and Src operands. + +fix_src2_or_imm(Src2, TempMap, Strategy) -> + case Src2 of + #ppc_temp{} -> fix_src2(Src2, TempMap, Strategy); + _ -> {[], Src2, false} + end. + +fix_src1(Src, TempMap, Strategy) -> + fix_src(Src, TempMap, temp1(Strategy)). + +temp1('new') -> []; +temp1('fixed') -> hipe_ppc_registers:temp1(). + +fix_src2(Src, TempMap, Strategy) -> + fix_src(Src, TempMap, temp2(Strategy)). + +temp2('new') -> []; +temp2('fixed') -> hipe_ppc_registers:temp2(). + +fix_src3(Src, TempMap, Strategy) -> % storex :-( + fix_src(Src, TempMap, temp3(Strategy)). + +temp3('new') -> []; +temp3('fixed') -> hipe_ppc_registers:temp3(). + +fix_src(Src, TempMap, RegOpt) -> + case temp_is_spilled(Src, TempMap) of + true -> + NewSrc = clone(Src, RegOpt), + {[hipe_ppc:mk_pseudo_move(NewSrc, Src)], + NewSrc, + true}; + _ -> + {[], Src, false} + end. + +fix_dst(Dst, TempMap, Strategy) -> + case temp_is_spilled(Dst, TempMap) of + true -> + NewDst = clone(Dst, temp3(Strategy)), + {[hipe_ppc:mk_pseudo_move(Dst, NewDst)], + NewDst, + true}; + _ -> + {[], Dst, false} + end. + +%%% Check if an operand is a pseudo-temp. + +temp_is_spilled(Temp, []) -> % special case for naive regalloc + not(hipe_ppc:temp_is_precoloured(Temp)); +temp_is_spilled(Temp, TempMap) -> + case hipe_ppc:temp_is_allocatable(Temp) of + true -> + Reg = hipe_ppc:temp_reg(Temp), + tuple_size(TempMap) > Reg andalso hipe_temp_map:is_spilled(Reg, TempMap); + false -> true + end. + +%%% Make a certain reg into a clone of Temp. + +clone(Temp, RegOpt) -> + Type = hipe_ppc:temp_type(Temp), + case RegOpt of + [] -> hipe_ppc:mk_new_temp(Type); + Reg -> hipe_ppc:mk_temp(Reg, Type) + end. diff --git a/lib/hipe/ppc/hipe_ppc_ra_postconditions_fp.erl b/lib/hipe/ppc/hipe_ppc_ra_postconditions_fp.erl new file mode 100644 index 0000000000..889c5681ac --- /dev/null +++ b/lib/hipe/ppc/hipe_ppc_ra_postconditions_fp.erl @@ -0,0 +1,130 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2004-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +-module(hipe_ppc_ra_postconditions_fp). +-export([check_and_rewrite/2]). +-include("hipe_ppc.hrl"). + +check_and_rewrite(Defun, Coloring) -> + TempMap = hipe_temp_map:cols2tuple(Coloring, hipe_ppc_specific_fp), + #defun{code=Code0} = Defun, + {Code1,DidSpill} = do_insns(Code0, TempMap, [], false), + VarRange = {0, hipe_gensym:get_var(ppc)}, + {Defun#defun{code=Code1, var_range=VarRange}, + DidSpill}. + +do_insns([I|Insns], TempMap, Accum, DidSpill0) -> + {NewIs, DidSpill1} = do_insn(I, TempMap), + do_insns(Insns, TempMap, lists:reverse(NewIs, Accum), DidSpill0 or DidSpill1); +do_insns([], _TempMap, Accum, DidSpill) -> + {lists:reverse(Accum), DidSpill}. + +do_insn(I, TempMap) -> + case I of + #lfd{} -> do_lfd(I, TempMap); + #lfdx{} -> do_lfdx(I, TempMap); + #stfd{} -> do_stfd(I, TempMap); + #stfdx{} -> do_stfdx(I, TempMap); + #fp_binary{} -> do_fp_binary(I, TempMap); + #fp_unary{} -> do_fp_unary(I, TempMap); + #pseudo_fmove{} -> do_pseudo_fmove(I, TempMap); + _ -> {[I], false} + end. + +%%% Fix relevant instruction types. + +do_lfd(I=#lfd{dst=Dst}, TempMap) -> + {FixDst, NewDst, DidSpill} = fix_dst(Dst, TempMap), + NewI = I#lfd{dst=NewDst}, + {[NewI | FixDst], DidSpill}. + +do_lfdx(I=#lfdx{dst=Dst}, TempMap) -> + {FixDst, NewDst, DidSpill} = fix_dst(Dst, TempMap), + NewI = I#lfdx{dst=NewDst}, + {[NewI | FixDst], DidSpill}. + +do_stfd(I=#stfd{src=Src}, TempMap) -> + {FixSrc, NewSrc, DidSpill} = fix_src(Src, TempMap), + NewI = I#stfd{src=NewSrc}, + {FixSrc ++ [NewI], DidSpill}. + +do_stfdx(I=#stfdx{src=Src}, TempMap) -> + {FixSrc, NewSrc, DidSpill} = fix_src(Src, TempMap), + NewI = I#stfdx{src=NewSrc}, + {FixSrc ++ [NewI], DidSpill}. + +do_fp_binary(I=#fp_binary{dst=Dst,src1=Src1,src2=Src2}, TempMap) -> + {FixDst,NewDst,DidSpill1} = fix_dst(Dst, TempMap), + {FixSrc1,NewSrc1,DidSpill2} = fix_src(Src1, TempMap), + {FixSrc2,NewSrc2,DidSpill3} = fix_src(Src2, TempMap), + NewI = I#fp_binary{dst=NewDst,src1=NewSrc1,src2=NewSrc2}, + {FixSrc1 ++ FixSrc2 ++ [NewI | FixDst], DidSpill1 or DidSpill2 or DidSpill3}. + +do_fp_unary(I=#fp_unary{dst=Dst,src=Src}, TempMap) -> + {FixDst,NewDst,DidSpill1} = fix_dst(Dst, TempMap), + {FixSrc,NewSrc,DidSpill2} = fix_src(Src, TempMap), + NewI = I#fp_unary{dst=NewDst,src=NewSrc}, + {FixSrc ++ [NewI | FixDst], DidSpill1 or DidSpill2}. + +do_pseudo_fmove(I=#pseudo_fmove{dst=Dst,src=Src}, TempMap) -> + case temp_is_spilled(Dst, TempMap) of + true -> + {FixSrc,NewSrc,DidSpill} = fix_src(Src, TempMap), + NewI = I#pseudo_fmove{src=NewSrc}, + {FixSrc ++ [NewI], DidSpill}; + _ -> + {[I], false} + end. + +%%% Fix Dst and Src operands. + +fix_src(Src, TempMap) -> + case temp_is_spilled(Src, TempMap) of + true -> + NewSrc = clone(Src), + {[hipe_ppc:mk_pseudo_fmove(NewSrc, Src)], NewSrc, true}; + _ -> + {[], Src, false} + end. + +fix_dst(Dst, TempMap) -> + case temp_is_spilled(Dst, TempMap) of + true -> + NewDst = clone(Dst), + {[hipe_ppc:mk_pseudo_fmove(Dst, NewDst)], NewDst, true}; + _ -> + {[], Dst, false} + end. + +%%% Check if an operand is a pseudo-temp. + +temp_is_spilled(Temp, TempMap) -> + case hipe_ppc:temp_is_allocatable(Temp) of + true -> + Reg = hipe_ppc:temp_reg(Temp), + tuple_size(TempMap) > Reg andalso hipe_temp_map:is_spilled(Reg, TempMap); + false -> true + end. + +%%% Create a new temp with the same type as an old one. + +clone(Temp) -> + Type = hipe_ppc:temp_type(Temp), % XXX: always double? + hipe_ppc:mk_new_temp(Type). diff --git a/lib/hipe/ppc/hipe_ppc_registers.erl b/lib/hipe/ppc/hipe_ppc_registers.erl new file mode 100644 index 0000000000..74aeab3df4 --- /dev/null +++ b/lib/hipe/ppc/hipe_ppc_registers.erl @@ -0,0 +1,246 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2004-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +-module(hipe_ppc_registers). + +-export([reg_name_gpr/1, + reg_name_fpr/1, + first_virtual/0, + is_precoloured_gpr/1, + is_precoloured_fpr/1, + all_precoloured/0, + return_value/0, + temp1/0, + temp2/0, + temp3/0, % for base2 in storeix :-( + heap_pointer/0, + stack_pointer/0, + proc_pointer/0, + %%heap_limit/0, + %%fcalls/0, + allocatable_gpr/0, + allocatable_fpr/0, + is_fixed/1, + nr_args/0, + arg/1, + args/1, + is_arg/1, % for linear scan + call_clobbered/0, + tailcall_clobbered/0, + live_at_return/0 + ]). + +-include("../rtl/hipe_literals.hrl"). + +-define(R0, 0). +-define(R1, 1). +-define(R2, 2). +-define(R3, 3). +-define(R4, 4). +-define(R5, 5). +-define(R6, 6). +-define(R7, 7). +-define(R8, 8). +-define(R9, 9). +-define(R10, 10). +-define(R11, 11). +-define(R12, 12). +-define(R13, 13). +-define(R14, 14). +-define(R15, 15). +-define(R16, 16). +-define(R17, 17). +-define(R18, 18). +-define(R19, 19). +-define(R20, 20). +-define(R21, 21). +-define(R22, 22). +-define(R23, 23). +-define(R24, 24). +-define(R25, 25). +-define(R26, 26). +-define(R27, 27). +-define(R28, 28). +-define(R29, 29). +-define(R30, 30). +-define(R31, 31). +-define(LAST_PRECOLOURED, 31). % must handle both GPR and FPR ranges + +-define(ARG0, ?R4). +-define(ARG1, ?R5). +-define(ARG2, ?R6). +-define(ARG3, ?R7). +-define(ARG4, ?R8). +-define(ARG5, ?R9). +-define(ARG6, ?R10). + +-define(TEMP1, ?R28). +-define(TEMP2, ?R27). +-define(TEMP3, ?R26). % XXX: for base2 in storeix, switch to R0 instead? + +-define(RETURN_VALUE, ?R3). +-define(HEAP_POINTER, ?R29). +-define(STACK_POINTER, ?R30). +-define(PROC_POINTER, ?R31). + +reg_name_gpr(R) -> [$r | integer_to_list(R)]. +reg_name_fpr(R) -> [$f | integer_to_list(R)]. + +%%% Must handle both GPR and FPR ranges. +first_virtual() -> ?LAST_PRECOLOURED + 1. + +%%% These two tests have the same implementation, but that's +%%% not something we should cast in stone in the interface. +is_precoloured_gpr(R) -> R =< ?LAST_PRECOLOURED. +is_precoloured_fpr(R) -> R =< ?LAST_PRECOLOURED. + +all_precoloured() -> + %% XXX: skip R1, R2, and R13. They should never occur anywhere. + [ ?R0, ?R1, ?R2, ?R3, ?R4, ?R5, ?R6, ?R7, + ?R8, ?R9, ?R10, ?R11, ?R12, ?R13, ?R14, ?R15, + ?R16, ?R17, ?R18, ?R19, ?R20, ?R21, ?R22, ?R23, + ?R24, ?R25, ?R26, ?R27, ?R28, ?R29, ?R30, ?R31]. + +return_value() -> ?RETURN_VALUE. + +temp1() -> ?TEMP1. +temp2() -> ?TEMP2. +temp3() -> ?TEMP3. % for base2 in storeix :-( + +heap_pointer() -> ?HEAP_POINTER. + +stack_pointer() -> ?STACK_POINTER. + +proc_pointer() -> ?PROC_POINTER. + +allocatable_gpr() -> + %% r0 is too restricted to be useful for variables + %% r1, r2, and r13 are reserved for C + %% r29, r30, and r31 are fixed global registers + [ ?R3, ?R4, ?R5, ?R6, ?R7, + ?R8, ?R9, ?R10, ?R11, ?R12, ?R14, ?R15, + ?R16, ?R17, ?R18, ?R19, ?R20, ?R21, ?R22, ?R23, + ?R24, ?R25, ?R26, ?R27, ?R28]. + +allocatable_fpr() -> + [ 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31]. + +%% Needed for hipe_graph_coloring_regalloc. +%% Presumably true for Reg in AllPrecoloured \ Allocatable. +is_fixed(Reg) -> + case Reg of + ?HEAP_POINTER -> true; + ?STACK_POINTER -> true; + ?PROC_POINTER -> true; + %% The following cases are required for linear scan: + %% it gets confused if it sees a register which is + %% neither allocatable nor global (fixed or one of + %% the scratch registers set aside for linear scan). + ?R0 -> true; + ?R1 -> true; + ?R2 -> true; + ?R13 -> true; + _ -> false + end. + +nr_args() -> ?PPC_NR_ARG_REGS. + +args(Arity) when is_integer(Arity) -> + N = erlang:min(Arity, ?PPC_NR_ARG_REGS), + args(N-1, []). + +args(I, Rest) when is_integer(I), I < 0 -> Rest; +args(I, Rest) -> args(I-1, [arg(I) | Rest]). + +arg(N) -> + if N < ?PPC_NR_ARG_REGS -> + case N of + 0 -> ?ARG0; + 1 -> ?ARG1; + 2 -> ?ARG2; + 3 -> ?ARG3; + 4 -> ?ARG4; + 5 -> ?ARG5; + 6 -> ?ARG6; + _ -> exit({?MODULE, arg, N}) + end; + true -> + exit({?MODULE, arg, N}) + end. + +is_arg(R) -> + case R of + ?ARG0 -> ?PPC_NR_ARG_REGS > 0; + ?ARG1 -> ?PPC_NR_ARG_REGS > 1; + ?ARG2 -> ?PPC_NR_ARG_REGS > 2; + ?ARG3 -> ?PPC_NR_ARG_REGS > 3; + ?ARG4 -> ?PPC_NR_ARG_REGS > 4; + ?ARG5 -> ?PPC_NR_ARG_REGS > 5; + ?ARG6 -> ?PPC_NR_ARG_REGS > 6; + _ -> false + end. + +call_clobbered() -> % does the RA strip the type or not? + [{?R0,tagged},{?R0,untagged}, + %% R1 is reserved for C + %% R2 is reserved for C + {?R3,tagged},{?R3,untagged}, + {?R4,tagged},{?R4,untagged}, + {?R5,tagged},{?R5,untagged}, + {?R6,tagged},{?R6,untagged}, + {?R7,tagged},{?R7,untagged}, + {?R8,tagged},{?R8,untagged}, + {?R9,tagged},{?R9,untagged}, + {?R10,tagged},{?R10,untagged}, + {?R11,tagged},{?R11,untagged}, + {?R12,tagged},{?R12,untagged}, + %% R13 is reserved for C + {?R14,tagged},{?R14,untagged}, + {?R15,tagged},{?R15,untagged}, + {?R16,tagged},{?R16,untagged}, + {?R17,tagged},{?R17,untagged}, + {?R18,tagged},{?R18,untagged}, + {?R19,tagged},{?R19,untagged}, + {?R20,tagged},{?R20,untagged}, + {?R21,tagged},{?R21,untagged}, + {?R22,tagged},{?R22,untagged}, + {?R23,tagged},{?R23,untagged}, + {?R24,tagged},{?R24,untagged}, + {?R25,tagged},{?R25,untagged}, + {?R26,tagged},{?R26,untagged}, + {?R27,tagged},{?R27,untagged}, + {?R28,tagged},{?R28,untagged} + %% R29 is fixed (HP) + %% R30 is fixed (NSP) + %% R31 is fixed (P) + ]. + +tailcall_clobbered() -> % tailcall crapola needs one temp + [{?TEMP1,tagged},{?TEMP1,untagged}]. + +live_at_return() -> + [%%{?LR,untagged}, + {?HEAP_POINTER,untagged}, + {?STACK_POINTER,untagged}, + {?PROC_POINTER,untagged} + ]. diff --git a/lib/hipe/ppc/hipe_rtl_to_ppc.erl b/lib/hipe/ppc/hipe_rtl_to_ppc.erl new file mode 100644 index 0000000000..458af250de --- /dev/null +++ b/lib/hipe/ppc/hipe_rtl_to_ppc.erl @@ -0,0 +1,1249 @@ +%%% -*- erlang-indent-level: 2 -*- +%%% +%%% %CopyrightBegin% +%%% +%%% Copyright Ericsson AB 2004-2009. All Rights Reserved. +%%% +%%% The contents of this file are subject to the Erlang Public License, +%%% Version 1.1, (the "License"); you may not use this file except in +%%% compliance with the License. You should have received a copy of the +%%% Erlang Public License along with this software. If not, it can be +%%% retrieved online at http://www.erlang.org/. +%%% +%%% Software distributed under the License is distributed on an "AS IS" +%%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%%% the License for the specific language governing rights and limitations +%%% under the License. +%%% +%%% %CopyrightEnd% +%%% +%%% The PowerPC instruction set is quite irregular. +%%% The following quirks must be handled by the translation: +%%% +%%% - The instruction names are different for reg/reg and reg/imm +%%% source operands. For some operations, completely different +%%% instructions handle the reg/reg and reg/imm cases. +%%% - The name of an arithmetic instruction depends on whether any +%%% condition codes are to be set or not. Overflow is treated +%%% separately from other conditions. +%%% - Some combinations or RTL ALU operations, source operand shapes, +%%% and requested conditions have no direct correspondence in the +%%% PowerPC instruction set. +%%% - The tagging of immediate operands as simm16 or uimm16 depends +%%% on the actual instruction. +%%% - Conditional branches have no unsigned conditions. Instead there +%%% are signed and unsigned versions of the compare instruction. +%%% - The arithmetic overflow flag XER[SO] is sticky: once set it +%%% remains set until explicitly cleared. + +-module(hipe_rtl_to_ppc). +-export([translate/1]). + +-include("../rtl/hipe_rtl.hrl"). + +translate(RTL) -> + hipe_gensym:init(ppc), + hipe_gensym:set_var(ppc, hipe_ppc_registers:first_virtual()), + hipe_gensym:set_label(ppc, hipe_gensym:get_label(rtl)), + Map0 = vmap_empty(), + {Formals, Map1} = conv_formals(hipe_rtl:rtl_params(RTL), Map0), + OldData = hipe_rtl:rtl_data(RTL), + {Code0, NewData} = conv_insn_list(hipe_rtl:rtl_code(RTL), Map1, OldData), + {RegFormals, _} = split_args(Formals), + Code = + case RegFormals of + [] -> Code0; + _ -> [hipe_ppc:mk_label(hipe_gensym:get_next_label(ppc)) | + move_formals(RegFormals, Code0)] + end, + IsClosure = hipe_rtl:rtl_is_closure(RTL), + IsLeaf = hipe_rtl:rtl_is_leaf(RTL), + hipe_ppc:mk_defun(hipe_rtl:rtl_fun(RTL), + Formals, + IsClosure, + IsLeaf, + Code, + NewData, + [], + []). + +conv_insn_list([H|T], Map, Data) -> + {NewH, NewMap, NewData1} = conv_insn(H, Map, Data), + %% io:format("~w \n ==>\n ~w\n- - - - - - - - -\n",[H,NewH]), + {NewT, NewData2} = conv_insn_list(T, NewMap, NewData1), + {NewH ++ NewT, NewData2}; +conv_insn_list([], _, Data) -> + {[], Data}. + +conv_insn(I, Map, Data) -> + case I of + #alu{} -> conv_alu(I, Map, Data); + #alub{} -> conv_alub(I, Map, Data); + #branch{} -> conv_branch(I, Map, Data); + #call{} -> conv_call(I, Map, Data); + #comment{} -> conv_comment(I, Map, Data); + #enter{} -> conv_enter(I, Map, Data); + #goto{} -> conv_goto(I, Map, Data); + #label{} -> conv_label(I, Map, Data); + #load{} -> conv_load(I, Map, Data); + #load_address{} -> conv_load_address(I, Map, Data); + #load_atom{} -> conv_load_atom(I, Map, Data); + #move{} -> conv_move(I, Map, Data); + #return{} -> conv_return(I, Map, Data); + #store{} -> conv_store(I, Map, Data); + #switch{} -> conv_switch(I, Map, Data); + #fconv{} -> conv_fconv(I, Map, Data); + #fmove{} -> conv_fmove(I, Map, Data); + #fload{} -> conv_fload(I, Map, Data); + #fstore{} -> conv_fstore(I, Map, Data); + #fp{} -> conv_fp_binary(I, Map, Data); + #fp_unop{} -> conv_fp_unary(I, Map, Data); + _ -> exit({?MODULE,conv_insn,I}) + end. + +conv_fconv(I, Map, Data) -> + %% Dst := (double)Src, where Dst is FP reg and Src is int reg + {Dst, Map0} = conv_fpreg(hipe_rtl:fconv_dst(I), Map), + {Src, Map1} = conv_src(hipe_rtl:fconv_src(I), Map0), % exclude imm src + I2 = mk_fconv(Dst, Src), + {I2, Map1, Data}. + +mk_fconv(Dst, Src) -> + CSP = hipe_ppc:mk_temp(1, 'untagged'), + R0 = hipe_ppc:mk_temp(0, 'untagged'), + RTmp1 = hipe_ppc:mk_new_temp('untagged'), + RTmp2 = hipe_ppc:mk_new_temp('untagged'), + RTmp3 = hipe_ppc:mk_new_temp('untagged'), + FTmp1 = hipe_ppc:mk_new_temp('double'), + FTmp2 = hipe_ppc:mk_new_temp('double'), + [hipe_ppc:mk_pseudo_li(RTmp1, {fconv_constant,c_const}), + hipe_ppc:mk_lfd(FTmp1, 0, RTmp1), + hipe_ppc:mk_alu('xoris', RTmp2, Src, hipe_ppc:mk_uimm16(16#8000)), + hipe_ppc:mk_store('stw', RTmp2, 28, CSP), + hipe_ppc:mk_alu('addis', RTmp3, R0, hipe_ppc:mk_simm16(16#4330)), + hipe_ppc:mk_store('stw', RTmp3, 24, CSP), + hipe_ppc:mk_lfd(FTmp2, 24, CSP), + hipe_ppc:mk_fp_binary('fsub', Dst, FTmp2, FTmp1)]. + +conv_fmove(I, Map, Data) -> + %% Dst := Src, where both Dst and Src are FP regs + {Dst, Map0} = conv_fpreg(hipe_rtl:fmove_dst(I), Map), + {Src, Map1} = conv_fpreg(hipe_rtl:fmove_src(I), Map0), + I2 = mk_fmove(Dst, Src), + {I2, Map1, Data}. + +mk_fmove(Dst, Src) -> + [hipe_ppc:mk_pseudo_fmove(Dst, Src)]. + +conv_fload(I, Map, Data) -> + %% Dst := MEM[Base+Off], where Dst is FP reg + {Dst, Map0} = conv_fpreg(hipe_rtl:fload_dst(I), Map), + {Base1, Map1} = conv_src(hipe_rtl:fload_src(I), Map0), + {Base2, Map2} = conv_src(hipe_rtl:fload_offset(I), Map1), + I2 = mk_fload(Dst, Base1, Base2), + {I2, Map2, Data}. + +mk_fload(Dst, Base1, Base2) -> + case hipe_ppc:is_temp(Base1) of + true -> + case hipe_ppc:is_temp(Base2) of + true -> + mk_fload_rr(Dst, Base1, Base2); + _ -> + mk_fload_ri(Dst, Base1, Base2) + end; + _ -> + case hipe_ppc:is_temp(Base2) of + true -> + mk_fload_ri(Dst, Base2, Base1); + _ -> + mk_fload_ii(Dst, Base1, Base2) + end + end. + +mk_fload_ii(Dst, Base1, Base2) -> + io:format("~w: RTL fload with two immediates\n", [?MODULE]), + Tmp = new_untagged_temp(), + mk_li(Tmp, Base1, + mk_fload_ri(Dst, Tmp, Base2)). + +mk_fload_ri(Dst, Base, Disp) -> + hipe_ppc:mk_fload(Dst, Disp, Base, 'new'). + +mk_fload_rr(Dst, Base1, Base2) -> + [hipe_ppc:mk_lfdx(Dst, Base1, Base2)]. + +conv_fstore(I, Map, Data) -> + %% MEM[Base+Off] := Src, where Src is FP reg + {Base1, Map0} = conv_dst(hipe_rtl:fstore_base(I), Map), + {Src, Map1} = conv_fpreg(hipe_rtl:fstore_src(I), Map0), + {Base2, Map2} = conv_src(hipe_rtl:fstore_offset(I), Map1), + I2 = mk_fstore(Src, Base1, Base2), + {I2, Map2, Data}. + +mk_fstore(Src, Base1, Base2) -> + case hipe_ppc:is_temp(Base2) of + true -> + mk_fstore_rr(Src, Base1, Base2); + _ -> + mk_fstore_ri(Src, Base1, Base2) + end. + +mk_fstore_ri(Src, Base, Disp) -> + hipe_ppc:mk_fstore(Src, Disp, Base, 'new'). + +mk_fstore_rr(Src, Base1, Base2) -> + [hipe_ppc:mk_stfdx(Src, Base1, Base2)]. + +conv_fp_binary(I, Map, Data) -> + {Dst, Map0} = conv_fpreg(hipe_rtl:fp_dst(I), Map), + {Src1, Map1} = conv_fpreg(hipe_rtl:fp_src1(I), Map0), + {Src2, Map2} = conv_fpreg(hipe_rtl:fp_src2(I), Map1), + RtlFpOp = hipe_rtl:fp_op(I), + I2 = mk_fp_binary(Dst, Src1, RtlFpOp, Src2), + {I2, Map2, Data}. + +mk_fp_binary(Dst, Src1, RtlFpOp, Src2) -> + FpBinOp = + case RtlFpOp of + 'fadd' -> 'fadd'; + 'fdiv' -> 'fdiv'; + 'fmul' -> 'fmul'; + 'fsub' -> 'fsub' + end, + [hipe_ppc:mk_fp_binary(FpBinOp, Dst, Src1, Src2)]. + +conv_fp_unary(I, Map, Data) -> + {Dst, Map0} = conv_fpreg(hipe_rtl:fp_unop_dst(I), Map), + {Src, Map1} = conv_fpreg(hipe_rtl:fp_unop_src(I), Map0), + RtlFpUnOp = hipe_rtl:fp_unop_op(I), + I2 = mk_fp_unary(Dst, Src, RtlFpUnOp), + {I2, Map1, Data}. + +mk_fp_unary(Dst, Src, RtlFpUnOp) -> + FpUnOp = + case RtlFpUnOp of + 'fchs' -> 'fneg' + end, + [hipe_ppc:mk_fp_unary(FpUnOp, Dst, Src)]. + +conv_alu(I, Map, Data) -> + %% dst = src1 aluop src2 + {Dst, Map0} = conv_dst(hipe_rtl:alu_dst(I), Map), + {Src1, Map1} = conv_src(hipe_rtl:alu_src1(I), Map0), + {Src2, Map2} = conv_src(hipe_rtl:alu_src2(I), Map1), + RtlAluOp = hipe_rtl:alu_op(I), + I2 = mk_alu(Dst, Src1, RtlAluOp, Src2), + {I2, Map2, Data}. + +mk_alu(Dst, Src1, RtlAluOp, Src2) -> + case hipe_ppc:is_temp(Src1) of + true -> + case hipe_ppc:is_temp(Src2) of + true -> + mk_alu_rr(Dst, Src1, RtlAluOp, Src2); + _ -> + mk_alu_ri(Dst, Src1, RtlAluOp, Src2) + end; + _ -> + case hipe_ppc:is_temp(Src2) of + true -> + mk_alu_ir(Dst, Src1, RtlAluOp, Src2); + _ -> + mk_alu_ii(Dst, Src1, RtlAluOp, Src2) + end + end. + +mk_alu_ii(Dst, Src1, RtlAluOp, Src2) -> + io:format("~w: RTL alu with two immediates (~w ~w ~w)\n", + [?MODULE, Src1, RtlAluOp, Src2]), + Tmp = new_untagged_temp(), + mk_li(Tmp, Src1, + mk_alu_ri(Dst, Tmp, RtlAluOp, Src2)). + +mk_alu_ir(Dst, Src1, RtlAluOp, Src2) -> + case rtl_aluop_commutes(RtlAluOp) of + true -> + mk_alu_ri(Dst, Src2, RtlAluOp, Src1); + _ -> + Tmp = new_untagged_temp(), + mk_li(Tmp, Src1, + mk_alu_rr(Dst, Tmp, RtlAluOp, Src2)) + end. + +mk_alu_ri(Dst, Src1, RtlAluOp, Src2) -> + case RtlAluOp of + 'sub' -> % there is no 'subi' + mk_alu_ri_addi(Dst, Src1, -Src2); + 'add' -> % 'addi' has a 16-bit simm operand + mk_alu_ri_addi(Dst, Src1, Src2); + 'mul' -> % 'mulli' has a 16-bit simm operand + mk_alu_ri_simm16(Dst, Src1, RtlAluOp, 'mulli', Src2); + 'and' -> % 'andi.' has a 16-bit uimm operand + case rlwinm_mask(Src2) of + {MB,ME} -> + [hipe_ppc:mk_unary({'rlwinm',0,MB,ME}, Dst, Src1)]; + _ -> + mk_alu_ri_bitop(Dst, Src1, RtlAluOp, 'andi.', Src2) + end; + 'or' -> % 'ori' has a 16-bit uimm operand + mk_alu_ri_bitop(Dst, Src1, RtlAluOp, 'ori', Src2); + 'xor' -> % 'xori' has a 16-bit uimm operand + mk_alu_ri_bitop(Dst, Src1, RtlAluOp, 'xori', Src2); + _ -> % shift ops have 5-bit uimm operands + mk_alu_ri_shift(Dst, Src1, RtlAluOp, Src2) + end. + +rlwinm_mask(Imm) -> + Res1 = rlwinm_mask2(Imm), + case Res1 of + {_MB,_ME} -> Res1; + [] -> + case rlwinm_mask2(bnot Imm) of + {MB,ME} -> {ME+1,MB-1}; + [] -> [] + end + end. + +rlwinm_mask2(Imm) -> + case Imm band 16#ffffffff of + 0 -> []; + Word -> + MB = lsb_log2(Word), % first 1 bit + case bnot(Word bsr MB) band 16#ffffffff of + 0 -> []; % Imm was all-bits-one XXX: we should handle this + Word1 -> + ME1 = lsb_log2(Word1),% first 0 bit after the 1s + case Word bsr (MB+ME1) of + 0 -> + ME = MB+ME1-1, % last 1 bit + {31-ME, 31-MB}; % convert to PPC sick and twisted bit numbers + _ -> + [] + end + end + end. + +lsb_log2(Word) -> % PRE: Word =/= 0 + bitN_log2(Word band -Word, 0). + +bitN_log2(BitN, ShiftN) -> + if BitN > 16#ffff -> + bitN_log2(BitN bsr 16, ShiftN + 16); + true -> + ShiftN + hweight16(BitN - 1) + end. + +hweight16(Word) -> % PRE: 0 <= Word <= 16#ffff + Res1 = (Word band 16#5555) + ((Word bsr 1) band 16#5555), + Res2 = (Res1 band 16#3333) + ((Res1 bsr 2) band 16#3333), + Res3 = (Res2 band 16#0F0F) + ((Res2 bsr 4) band 16#0F0F), + (Res3 band 16#00FF) + ((Res3 bsr 8) band 16#00FF). + +mk_alu_ri_addi(Dst, Src1, Src2) -> + mk_alu_ri_simm16(Dst, Src1, 'add', 'addi', Src2). + +mk_alu_ri_simm16(Dst, Src1, RtlAluOp, AluOp, Src2) -> + if is_integer(Src2), -32768 =< Src2, Src2 < 32768 -> + [hipe_ppc:mk_alu(AluOp, Dst, Src1, + hipe_ppc:mk_simm16(Src2))]; + true -> + mk_alu_ri_rr(Dst, Src1, RtlAluOp, Src2) + end. + +mk_alu_ri_bitop(Dst, Src1, RtlAluOp, AluOp, Src2) -> + if is_integer(Src2), 0 =< Src2, Src2 < 65536 -> + [hipe_ppc:mk_alu(AluOp, Dst, Src1, + hipe_ppc:mk_uimm16(Src2))]; + true -> + mk_alu_ri_rr(Dst, Src1, RtlAluOp, Src2) + end. + +mk_alu_ri_shift(Dst, Src1, RtlAluOp, Src2) -> + if Src2 < 32, Src2 >= 0 -> + AluOp = + case RtlAluOp of + 'sll' -> 'slwi'; % alias for rlwinm + 'srl' -> 'srwi'; % alias for rlwinm + 'sra' -> 'srawi' + end, + [hipe_ppc:mk_alu(AluOp, Dst, Src1, + hipe_ppc:mk_uimm16(Src2))]; + true -> + mk_alu_ri_rr(Dst, Src1, RtlAluOp, Src2) + end. + +mk_alu_ri_rr(Dst, Src1, RtlAluOp, Src2) -> + Tmp = new_untagged_temp(), + mk_li(Tmp, Src2, + mk_alu_rr(Dst, Src1, RtlAluOp, Tmp)). + +mk_alu_rr(Dst, Src1, RtlAluOp, Src2) -> + case RtlAluOp of + 'sub' -> % PPC weirdness + [hipe_ppc:mk_alu('subf', Dst, Src2, Src1)]; + _ -> + AluOp = + case RtlAluOp of + 'add' -> 'add'; + 'mul' -> 'mullw'; + 'or' -> 'or'; + 'and' -> 'and'; + 'xor' -> 'xor'; + 'sll' -> 'slw'; + 'srl' -> 'srw'; + 'sra' -> 'sraw' + end, + [hipe_ppc:mk_alu(AluOp, Dst, Src1, Src2)] + end. + +conv_alub(I, Map, Data) -> + %% dst = src1 aluop src2; if COND goto label + {Dst, Map0} = conv_dst(hipe_rtl:alub_dst(I), Map), + {Src1, Map1} = conv_src(hipe_rtl:alub_src1(I), Map0), + {Src2, Map2} = conv_src(hipe_rtl:alub_src2(I), Map1), + {AluOp, BCond} = + case {hipe_rtl:alub_op(I), hipe_rtl:alub_cond(I)} of + {'add', 'ltu'} -> + {'addc', 'eq'}; + {RtlAlubOp, RtlAlubCond} -> + {conv_alub_op(RtlAlubOp), conv_alub_cond(RtlAlubCond)} + end, + BC = mk_pseudo_bc(BCond, + hipe_rtl:alub_true_label(I), + hipe_rtl:alub_false_label(I), + hipe_rtl:alub_pred(I)), + I2 = + case {AluOp, BCond} of + {'addc', 'eq'} -> % copy XER[CA] to CR0[EQ] before the BC + TmpR = new_untagged_temp(), + [hipe_ppc:mk_mfspr(TmpR, 'xer'), + hipe_ppc:mk_mtcr(TmpR) | + BC]; + _ -> BC + end, + {NewSrc1, NewSrc2} = + case AluOp of + 'subf' -> {Src2, Src1}; + _ -> {Src1, Src2} + end, + I1 = mk_alub(Dst, NewSrc1, AluOp, NewSrc2, BCond), + {I1 ++ I2, Map2, Data}. + +conv_alub_op(RtlAluOp) -> + case RtlAluOp of + 'add' -> 'add'; + 'sub' -> 'subf'; % XXX: must swap operands + 'mul' -> 'mullw'; + 'or' -> 'or'; + 'and' -> 'and'; + 'xor' -> 'xor'; + 'sll' -> 'slw'; + 'srl' -> 'srw'; + 'sra' -> 'sraw' + end. + +aluop_commutes(AluOp) -> + case AluOp of + 'add' -> true; + 'addc' -> true; + 'subf' -> false; + 'mullw' -> true; + 'or' -> true; + 'and' -> true; + 'xor' -> true; + 'slw' -> false; + 'srw' -> false; + 'sraw' -> false + end. + +conv_alub_cond(Cond) -> % only signed + case Cond of + eq -> 'eq'; + ne -> 'ne'; + gt -> 'gt'; + ge -> 'ge'; + lt -> 'lt'; + le -> 'le'; + overflow -> 'so'; + not_overflow -> 'ns'; + _ -> exit({?MODULE,conv_alub_cond,Cond}) + end. + +mk_alub(Dst, Src1, AluOp, Src2, BCond) -> + case hipe_ppc:is_temp(Src1) of + true -> + case hipe_ppc:is_temp(Src2) of + true -> + mk_alub_rr(Dst, Src1, AluOp, Src2, BCond); + _ -> + mk_alub_ri(Dst, Src1, AluOp, Src2, BCond) + end; + _ -> + case hipe_ppc:is_temp(Src2) of + true -> + mk_alub_ir(Dst, Src1, AluOp, Src2, BCond); + _ -> + mk_alub_ii(Dst, Src1, AluOp, Src2, BCond) + end + end. + +mk_alub_ii(Dst, Src1, AluOp, Src2, BCond) -> + io:format("~w: RTL alub with two immediates\n", [?MODULE]), + Tmp = new_untagged_temp(), + mk_li(Tmp, Src1, + mk_alub_ri(Dst, Tmp, AluOp, Src2, BCond)). + +mk_alub_ir(Dst, Src1, AluOp, Src2, BCond) -> + case aluop_commutes(AluOp) of + true -> + mk_alub_ri(Dst, Src2, AluOp, Src1, BCond); + _ -> + Tmp = new_untagged_temp(), + mk_li(Tmp, Src1, + mk_alub_rr(Dst, Tmp, AluOp, Src2, BCond)) + end. + +mk_alub_ri(Dst, Src1, AluOp, Src2, BCond) -> + true = is_integer(Src2), + case BCond of + 'so' -> mk_alub_ri_OE(Dst, Src1, AluOp, Src2); + 'ns' -> mk_alub_ri_OE(Dst, Src1, AluOp, Src2); + _ -> mk_alub_ri_Rc(Dst, Src1, AluOp, Src2) + end. + +mk_alub_ri_OE(Dst, Src1, AluOp, Src2) -> + %% Only 'add', 'subf', and 'mullw' apply here, and 'subf' becomes 'add'. + %% 'add' and 'mullw' have no immediate+Rc+OE forms. + %% Rewrite to reg/reg form. Sigh. + Tmp = new_untagged_temp(), + mk_li(Tmp, Src2, + mk_alub_rr_OE(Dst, Src1, AluOp, Tmp)). + +mk_alub_ri_Rc(Dst, Src1, AluOp, Src2) -> + case AluOp of + 'subf' -> % there is no 'subfi.', use 'addic.' or 'add.' + mk_alub_ri_Rc_addi(Dst, Src1, -Src2, 'addic.', 'add.'); + 'add' -> % 'addic.' has a 16-bit simm operand + mk_alub_ri_Rc_addi(Dst, Src1, Src2, 'addic.', 'add.'); + 'addc' -> % 'addic' has a 16-bit simm operand + mk_alub_ri_Rc_addi(Dst, Src1, Src2, 'addic', 'addc'); + 'mullw' -> % there is no 'mulli.' + mk_alub_ri_Rc_rr(Dst, Src1, 'mullw.', Src2); + 'or' -> % there is no 'ori.' + mk_alub_ri_Rc_rr(Dst, Src1, 'or.', Src2); + 'xor' -> % there is no 'xori.' + mk_alub_ri_Rc_rr(Dst, Src1, 'xor.', Src2); + 'and' -> % 'andi.' has a 16-bit uimm operand + case rlwinm_mask(Src2) of + {MB,ME} -> + [hipe_ppc:mk_unary({'rlwinm.',0,MB,ME}, Dst, Src1)]; + _ -> + mk_alub_ri_Rc_andi(Dst, Src1, Src2) + end; + _ -> % shift ops have 5-bit uimm operands + mk_alub_ri_Rc_shift(Dst, Src1, AluOp, Src2) + end. + +mk_alub_ri_Rc_addi(Dst, Src1, Src2, AddImmOp, AddRegOp) -> + if is_integer(Src2), -32768 =< Src2, Src2 < 32768 -> + [hipe_ppc:mk_alu(AddImmOp, Dst, Src1, + hipe_ppc:mk_simm16(Src2))]; + true -> + mk_alub_ri_Rc_rr(Dst, Src1, AddRegOp, Src2) + end. + +mk_alub_ri_Rc_andi(Dst, Src1, Src2) -> + if Src2 < 65536, Src2 >= 0 -> + [hipe_ppc:mk_alu('andi.', Dst, Src1, + hipe_ppc:mk_uimm16(Src2))]; + true -> + mk_alub_ri_Rc_rr(Dst, Src1, 'and.', Src2) + end. + +mk_alub_ri_Rc_shift(Dst, Src1, AluOp, Src2) -> + if Src2 < 32, Src2 >= 0 -> + AluOpIDot = + case AluOp of + 'slw' -> 'slwi.'; % alias for rlwinm. + 'srw' -> 'srwi.'; % alias for rlwinm. + 'sraw' -> 'srawi.' + end, + [hipe_ppc:mk_alu(AluOpIDot, Dst, Src1, + hipe_ppc:mk_uimm16(Src2))]; + true -> + AluOpDot = + case AluOp of + 'slw' -> 'slw.'; + 'srw' -> 'srw.'; + 'sraw' -> 'sraw.' + end, + mk_alub_ri_Rc_rr(Dst, Src1, AluOpDot, Src2) + end. + +mk_alub_ri_Rc_rr(Dst, Src1, AluOp, Src2) -> + Tmp = new_untagged_temp(), + mk_li(Tmp, Src2, + [hipe_ppc:mk_alu(AluOp, Dst, Src1, Tmp)]). + +mk_alub_rr(Dst, Src1, AluOp, Src2, BCond) -> + case BCond of + 'so' -> mk_alub_rr_OE(Dst, Src1, AluOp, Src2); + 'ns' -> mk_alub_rr_OE(Dst, Src1, AluOp, Src2); + _ -> mk_alub_rr_Rc(Dst, Src1, AluOp, Src2) + end. + +mk_alub_rr_OE(Dst, Src1, AluOp, Src2) -> + AluOpODot = + case AluOp of + 'subf' -> 'subfo.'; + 'add' -> 'addo.'; + 'mullw' -> 'mullwo.' + %% fail for addc, or, and, xor, slw, srw, sraw + end, + [hipe_ppc:mk_alu(AluOpODot, Dst, Src1, Src2)]. + +mk_alub_rr_Rc(Dst, Src1, AluOp, Src2) -> + AluOpDot = + case AluOp of + 'subf' -> 'subf.'; + 'add' -> 'add.'; + 'addc' -> 'addc'; % only interested in CA, no Rc needed + 'mullw' -> 'mullw.'; + 'or' -> 'or.'; + 'and' -> 'and.'; + 'xor' -> 'xor.'; + 'slw' -> 'slw.'; + 'srw' -> 'srw.'; + 'sraw' -> 'sraw.' + end, + [hipe_ppc:mk_alu(AluOpDot, Dst, Src1, Src2)]. + +conv_branch(I, Map, Data) -> + %% = src1 - src2; if COND goto label + {Src1, Map0} = conv_src(hipe_rtl:branch_src1(I), Map), + {Src2, Map1} = conv_src(hipe_rtl:branch_src2(I), Map0), + {BCond,Sign} = conv_branch_cond(hipe_rtl:branch_cond(I)), + I2 = mk_branch(Src1, BCond, Sign, Src2, + hipe_rtl:branch_true_label(I), + hipe_rtl:branch_false_label(I), + hipe_rtl:branch_pred(I)), + {I2, Map1, Data}. + +conv_branch_cond(Cond) -> % may be unsigned + case Cond of + gtu -> {'gt', 'unsigned'}; + geu -> {'ge', 'unsigned'}; + ltu -> {'lt', 'unsigned'}; + leu -> {'le', 'unsigned'}; + _ -> {conv_alub_cond(Cond), 'signed'} + end. + +mk_branch(Src1, BCond, Sign, Src2, TrueLab, FalseLab, Pred) -> + case hipe_ppc:is_temp(Src1) of + true -> + case hipe_ppc:is_temp(Src2) of + true -> + mk_branch_rr(Src1, BCond, Sign, Src2, TrueLab, FalseLab, Pred); + _ -> + mk_branch_ri(Src1, BCond, Sign, Src2, TrueLab, FalseLab, Pred) + end; + _ -> + case hipe_ppc:is_temp(Src2) of + true -> + NewBCond = commute_bcond(BCond), + mk_branch_ri(Src2, NewBCond, Sign, Src1, TrueLab, FalseLab, Pred); + _ -> + mk_branch_ii(Src1, BCond, Sign, Src2, TrueLab, FalseLab, Pred) + end + end. + +commute_bcond(BCond) -> % if x BCond y, then y commute_bcond(BCond) x + case BCond of + 'eq' -> 'eq'; % ==, == + 'ne' -> 'ne'; % !=, != + 'gt' -> 'lt'; % >, < + 'ge' -> 'le'; % >=, <= + 'lt' -> 'gt'; % <, > + 'le' -> 'ge'; % <=, >= + %% so/ns: n/a + _ -> exit({?MODULE,commute_bcond,BCond}) + end. + +mk_branch_ii(Src1, BCond, Sign, Src2, TrueLab, FalseLab, Pred) -> + io:format("~w: RTL branch with two immediates\n", [?MODULE]), + Tmp = new_untagged_temp(), + mk_li(Tmp, Src1, + mk_branch_ri(Tmp, BCond, Sign, Src2, + TrueLab, FalseLab, Pred)). + +mk_branch_ri(Src1, BCond, Sign, Src2, TrueLab, FalseLab, Pred) -> + {FixSrc2,NewSrc2,CmpOp} = + case Sign of + 'signed' -> + if is_integer(Src2), -32768 =< Src2, Src2 < 32768 -> + {[], hipe_ppc:mk_simm16(Src2), 'cmpi'}; + true -> + Tmp = new_untagged_temp(), + {mk_li(Tmp, Src2), Tmp, 'cmp'} + end; + 'unsigned' -> + if is_integer(Src2), 0 =< Src2, Src2 < 65536 -> + {[], hipe_ppc:mk_uimm16(Src2), 'cmpli'}; + true -> + Tmp = new_untagged_temp(), + {mk_li(Tmp, Src2), Tmp, 'cmpl'} + end + end, + FixSrc2 ++ + mk_cmp_bc(CmpOp, Src1, NewSrc2, BCond, TrueLab, FalseLab, Pred). + +mk_branch_rr(Src1, BCond, Sign, Src2, TrueLab, FalseLab, Pred) -> + CmpOp = + case Sign of + 'signed' -> 'cmp'; + 'unsigned' -> 'cmpl' + end, + mk_cmp_bc(CmpOp, Src1, Src2, BCond, TrueLab, FalseLab, Pred). + +mk_cmp_bc(CmpOp, Src1, Src2, BCond, TrueLab, FalseLab, Pred) -> + [hipe_ppc:mk_cmp(CmpOp, Src1, Src2) | + mk_pseudo_bc(BCond, TrueLab, FalseLab, Pred)]. + +conv_call(I, Map, Data) -> + {Args, Map0} = conv_src_list(hipe_rtl:call_arglist(I), Map), + {Dsts, Map1} = conv_dst_list(hipe_rtl:call_dstlist(I), Map0), + {Fun, Map2} = conv_fun(hipe_rtl:call_fun(I), Map1), + ContLab = hipe_rtl:call_continuation(I), + ExnLab = hipe_rtl:call_fail(I), + Linkage = hipe_rtl:call_type(I), + I2 = mk_call(Dsts, Fun, Args, ContLab, ExnLab, Linkage), + {I2, Map2, Data}. + +mk_call(Dsts, Fun, Args, ContLab, ExnLab, Linkage) -> + case hipe_ppc:is_prim(Fun) of + true -> + mk_primop_call(Dsts, Fun, Args, ContLab, ExnLab, Linkage); + false -> + mk_general_call(Dsts, Fun, Args, ContLab, ExnLab, Linkage) + end. + +mk_primop_call(Dsts, Prim, Args, ContLab, ExnLab, Linkage) -> + case hipe_ppc:prim_prim(Prim) of + 'extsh' -> + mk_extsh_call(Dsts, Args, ContLab, ExnLab, Linkage); + 'lhbrx' -> + mk_lhbrx_call(Dsts, Args, ContLab, ExnLab, Linkage); + 'lwbrx' -> + mk_lwbrx_call(Dsts, Args, ContLab, ExnLab, Linkage); + _ -> + mk_general_call(Dsts, Prim, Args, ContLab, ExnLab, Linkage) + end. + +mk_extsh_call([Dst], [Src], [], [], not_remote) -> + true = hipe_ppc:is_temp(Src), + [hipe_ppc:mk_unary('extsh', Dst, Src)]. + +mk_lhbrx_call(Dsts, [Base,Offset], [], [], not_remote) -> + case Dsts of + [Dst] -> mk_loadx('lhbrx', Dst, Base, Offset); + [] -> [] % result unused, cancel the operation + end. + +mk_lwbrx_call([Dst], [Base,Offset], [], [], not_remote) -> + mk_loadx('lwbrx', Dst, Base, Offset). + +mk_loadx(LdxOp, Dst, Base, Offset) -> + true = hipe_ppc:is_temp(Base), + {FixOff,NewOff} = + case hipe_ppc:is_temp(Offset) of + true -> {[], Offset}; + false -> + Tmp = new_untagged_temp(), + {mk_li(Tmp, Offset), Tmp} + end, + FixOff ++ [hipe_ppc:mk_loadx(LdxOp, Dst, Base, NewOff)]. + +mk_general_call(Dsts, Fun, Args, ContLab, ExnLab, Linkage) -> + %% The backend does not support pseudo_calls without a + %% continuation label, so we make sure each call has one. + {RealContLab, Tail} = + case mk_call_results(Dsts) of + [] -> + %% Avoid consing up a dummy basic block if the moves list + %% is empty, as is typical for calls to suspend/0. + %% This should be subsumed by a general "optimise the CFG" + %% module, and could probably be removed. + case ContLab of + [] -> + NewContLab = hipe_gensym:get_next_label(ppc), + {NewContLab, [hipe_ppc:mk_label(NewContLab)]}; + _ -> + {ContLab, []} + end; + Moves -> + %% Change the call to continue at a new basic block. + %% In this block move the result registers to the Dsts, + %% then continue at the call's original continuation. + NewContLab = hipe_gensym:get_next_label(ppc), + case ContLab of + [] -> + %% This is just a fallthrough + %% No jump back after the moves. + {NewContLab, + [hipe_ppc:mk_label(NewContLab) | + Moves]}; + _ -> + %% The call has a continuation. Jump to it. + {NewContLab, + [hipe_ppc:mk_label(NewContLab) | + Moves ++ + [hipe_ppc:mk_b_label(ContLab)]]} + end + end, + SDesc = hipe_ppc:mk_sdesc(ExnLab, 0, length(Args), {}), + {FixFunC,FunC} = fix_func(Fun), + CallInsn = hipe_ppc:mk_pseudo_call(FunC, SDesc, RealContLab, Linkage), + {RegArgs,StkArgs} = split_args(Args), + FixFunC ++ + mk_push_args(StkArgs, move_actuals(RegArgs, [CallInsn | Tail])). + +mk_call_results([]) -> + []; +mk_call_results([Dst]) -> + RV = hipe_ppc:mk_temp(hipe_ppc_registers:return_value(), 'tagged'), + [hipe_ppc:mk_pseudo_move(Dst, RV)]; +mk_call_results(Dsts) -> + exit({?MODULE,mk_call_results,Dsts}). + +fix_func(Fun) -> + case hipe_ppc:is_temp(Fun) of + true -> {[hipe_ppc:mk_mtspr('ctr', Fun)], 'ctr'}; + _ -> {[], Fun} + end. + +mk_push_args(StkArgs, Tail) -> + case length(StkArgs) of + 0 -> + Tail; + NrStkArgs -> + [hipe_ppc:mk_pseudo_call_prepare(NrStkArgs) | + mk_store_args(StkArgs, NrStkArgs * word_size(), Tail)] + end. + +mk_store_args([Arg|Args], PrevOffset, Tail) -> + Offset = PrevOffset - word_size(), + {Src,FixSrc} = + case hipe_ppc:is_temp(Arg) of + true -> + {Arg, []}; + _ -> + Tmp = new_tagged_temp(), + {Tmp, mk_li(Tmp, Arg)} + end, + Store = hipe_ppc:mk_store('stw', Src, Offset, mk_sp()), + mk_store_args(Args, Offset, FixSrc ++ [Store | Tail]); +mk_store_args([], _, Tail) -> + Tail. + +conv_comment(I, Map, Data) -> + I2 = [hipe_ppc:mk_comment(hipe_rtl:comment_text(I))], + {I2, Map, Data}. + +conv_enter(I, Map, Data) -> + {Args, Map0} = conv_src_list(hipe_rtl:enter_arglist(I), Map), + {Fun, Map1} = conv_fun(hipe_rtl:enter_fun(I), Map0), + I2 = mk_enter(Fun, Args, hipe_rtl:enter_type(I)), + {I2, Map1, Data}. + +mk_enter(Fun, Args, Linkage) -> + {FixFunC,FunC} = fix_func(Fun), + Arity = length(Args), + {RegArgs,StkArgs} = split_args(Args), + FixFunC ++ + move_actuals(RegArgs, + [hipe_ppc:mk_pseudo_tailcall_prepare(), + hipe_ppc:mk_pseudo_tailcall(FunC, Arity, StkArgs, Linkage)]). + +conv_goto(I, Map, Data) -> + I2 = [hipe_ppc:mk_b_label(hipe_rtl:goto_label(I))], + {I2, Map, Data}. + +conv_label(I, Map, Data) -> + I2 = [hipe_ppc:mk_label(hipe_rtl:label_name(I))], + {I2, Map, Data}. + +conv_load(I, Map, Data) -> + {Dst, Map0} = conv_dst(hipe_rtl:load_dst(I), Map), + {Base1, Map1} = conv_src(hipe_rtl:load_src(I), Map0), + {Base2, Map2} = conv_src(hipe_rtl:load_offset(I), Map1), + LoadSize = hipe_rtl:load_size(I), + LoadSign = hipe_rtl:load_sign(I), + I2 = mk_load(Dst, Base1, Base2, LoadSize, LoadSign), + {I2, Map2, Data}. + +mk_load(Dst, Base1, Base2, LoadSize, LoadSign) -> + Rest = + case LoadSize of + byte -> + case LoadSign of + signed -> [hipe_ppc:mk_unary('extsb', Dst, Dst)]; + _ -> [] + end; + _ -> [] + end, + LdOp = + case LoadSize of + byte -> 'lbz'; + int32 -> 'lwz'; + word -> 'lwz'; + int16 -> + case LoadSign of + signed -> 'lha'; + unsigned -> 'lhz' + end + end, + case hipe_ppc:is_temp(Base1) of + true -> + case hipe_ppc:is_temp(Base2) of + true -> + mk_load_rr(Dst, Base1, Base2, LdOp, Rest); + _ -> + mk_load_ri(Dst, Base1, Base2, LdOp, Rest) + end; + _ -> + case hipe_ppc:is_temp(Base2) of + true -> + mk_load_ri(Dst, Base2, Base1, LdOp, Rest); + _ -> + mk_load_ii(Dst, Base1, Base2, LdOp, Rest) + end + end. + +mk_load_ii(Dst, Base1, Base2, LdOp, Rest) -> + io:format("~w: RTL load with two immediates\n", [?MODULE]), + Tmp = new_untagged_temp(), + mk_li(Tmp, Base1, + mk_load_ri(Dst, Tmp, Base2, LdOp, Rest)). + +mk_load_ri(Dst, Base, Disp, LdOp, Rest) -> + hipe_ppc:mk_load(LdOp, Dst, Disp, Base, 'new', Rest). + +mk_load_rr(Dst, Base1, Base2, LdOp, Rest) -> + LdxOp = hipe_ppc:ldop_to_ldxop(LdOp), + [hipe_ppc:mk_loadx(LdxOp, Dst, Base1, Base2) | Rest]. + +conv_load_address(I, Map, Data) -> + {Dst, Map0} = conv_dst(hipe_rtl:load_address_dst(I), Map), + Addr = hipe_rtl:load_address_addr(I), + Type = hipe_rtl:load_address_type(I), + Src = {Addr,Type}, + I2 = [hipe_ppc:mk_pseudo_li(Dst, Src)], + {I2, Map0, Data}. + +conv_load_atom(I, Map, Data) -> + {Dst, Map0} = conv_dst(hipe_rtl:load_atom_dst(I), Map), + Src = hipe_rtl:load_atom_atom(I), + I2 = [hipe_ppc:mk_pseudo_li(Dst, Src)], + {I2, Map0, Data}. + +conv_move(I, Map, Data) -> + {Dst, Map0} = conv_dst(hipe_rtl:move_dst(I), Map), + {Src, Map1} = conv_src(hipe_rtl:move_src(I), Map0), + I2 = mk_move(Dst, Src, []), + {I2, Map1, Data}. + +mk_move(Dst, Src, Tail) -> + case hipe_ppc:is_temp(Src) of + true -> [hipe_ppc:mk_pseudo_move(Dst, Src) | Tail]; + _ -> mk_li(Dst, Src, Tail) + end. + +conv_return(I, Map, Data) -> + %% TODO: multiple-value returns + {[Arg], Map0} = conv_src_list(hipe_rtl:return_varlist(I), Map), + I2 = mk_move(mk_rv(), Arg, + [hipe_ppc:mk_blr()]), + {I2, Map0, Data}. + +conv_store(I, Map, Data) -> + {Base1, Map0} = conv_dst(hipe_rtl:store_base(I), Map), + {Src, Map1} = conv_src(hipe_rtl:store_src(I), Map0), + {Base2, Map2} = conv_src(hipe_rtl:store_offset(I), Map1), + StoreSize = hipe_rtl:store_size(I), + I2 = mk_store(Src, Base1, Base2, StoreSize), + {I2, Map2, Data}. + +mk_store(Src, Base1, Base2, StoreSize) -> + StOp = + case StoreSize of + byte -> 'stb'; + int16 -> 'sth'; + int32 -> 'stw'; + word -> 'stw' + end, + case hipe_ppc:is_temp(Src) of + true -> + mk_store2(Src, Base1, Base2, StOp); + _ -> + Tmp = new_untagged_temp(), + mk_li(Tmp, Src, + mk_store2(Tmp, Base1, Base2, StOp)) + end. + +mk_store2(Src, Base1, Base2, StOp) -> + case hipe_ppc:is_temp(Base2) of + true -> + mk_store_rr(Src, Base1, Base2, StOp); + _ -> + mk_store_ri(Src, Base1, Base2, StOp) + end. + +mk_store_ri(Src, Base, Disp, StOp) -> + hipe_ppc:mk_store(StOp, Src, Disp, Base, 'new', []). + +mk_store_rr(Src, Base1, Base2, StOp) -> + StxOp = hipe_ppc:stop_to_stxop(StOp), + [hipe_ppc:mk_storex(StxOp, Src, Base1, Base2)]. + +conv_switch(I, Map, Data) -> + Labels = hipe_rtl:switch_labels(I), + LMap = [{label,L} || L <- Labels], + {NewData, JTabLab} = + case hipe_rtl:switch_sort_order(I) of + [] -> + hipe_consttab:insert_block(Data, word, LMap); + SortOrder -> + hipe_consttab:insert_sorted_block( + Data, word, LMap, SortOrder) + end, + %% no immediates allowed here + {IndexR, Map1} = conv_dst(hipe_rtl:switch_src(I), Map), + JTabR = new_untagged_temp(), + OffsetR = new_untagged_temp(), + DestR = new_untagged_temp(), + I2 = + [hipe_ppc:mk_pseudo_li(JTabR, {JTabLab,constant}), + hipe_ppc:mk_alu('slwi', OffsetR, IndexR, hipe_ppc:mk_uimm16(2)), + hipe_ppc:mk_loadx('lwzx', DestR, JTabR, OffsetR), + hipe_ppc:mk_mtspr('ctr', DestR), + hipe_ppc:mk_bctr(Labels)], + {I2, Map1, NewData}. + +%%% Create a conditional branch. +%%% If the condition tests CR0[SO], rewrite the path +%%% corresponding to SO being set to clear XER[SO]. + +mk_pseudo_bc(BCond, TrueLabel, FalseLabel, Pred) -> + case BCond of + 'so' -> + NewTrueLabel = hipe_gensym:get_next_label(ppc), + ZeroR = new_untagged_temp(), + [hipe_ppc:mk_pseudo_bc(BCond, NewTrueLabel, FalseLabel, Pred), + hipe_ppc:mk_label(NewTrueLabel) | + mk_li(ZeroR, 0, + [hipe_ppc:mk_mtspr('xer', ZeroR), + hipe_ppc:mk_b_label(TrueLabel)])]; + 'ns' -> + NewFalseLabel = hipe_gensym:get_next_label(ppc), + ZeroR = new_untagged_temp(), + [hipe_ppc:mk_pseudo_bc(BCond, TrueLabel, NewFalseLabel, Pred), + hipe_ppc:mk_label(NewFalseLabel) | + mk_li(ZeroR, 0, + [hipe_ppc:mk_mtspr('xer', ZeroR), + hipe_ppc:mk_b_label(FalseLabel)])]; + _ -> + [hipe_ppc:mk_pseudo_bc(BCond, TrueLabel, FalseLabel, Pred)] + end. + +%%% Load an integer constant into a register. + +mk_li(Dst, Value) -> mk_li(Dst, Value, []). + +mk_li(Dst, Value, Tail) -> + hipe_ppc:mk_li(Dst, Value, Tail). + +%%% Check if an RTL ALU or ALUB operator commutes. + +rtl_aluop_commutes(RtlAluOp) -> + case RtlAluOp of + 'add' -> true; + 'mul' -> true; + 'or' -> true; + 'and' -> true; + 'xor' -> true; + _ -> false + end. + +%%% Split a list of formal or actual parameters into the +%%% part passed in registers and the part passed on the stack. +%%% The parameters passed in registers are also tagged with +%%% the corresponding registers. + +split_args(Args) -> + split_args(0, hipe_ppc_registers:nr_args(), Args, []). + +split_args(I, N, [Arg|Args], RegArgs) when I < N -> + Reg = hipe_ppc_registers:arg(I), + Temp = hipe_ppc:mk_temp(Reg, 'tagged'), + split_args(I+1, N, Args, [{Arg,Temp}|RegArgs]); +split_args(_, _, StkArgs, RegArgs) -> + {RegArgs, StkArgs}. + +%%% Convert a list of actual parameters passed in +%%% registers (from split_args/1) to a list of moves. + +move_actuals([{Src,Dst}|Actuals], Rest) -> + move_actuals(Actuals, mk_move(Dst, Src, Rest)); +move_actuals([], Rest) -> + Rest. + +%%% Convert a list of formal parameters passed in +%%% registers (from split_args/1) to a list of moves. + +move_formals([{Dst,Src}|Formals], Rest) -> + move_formals(Formals, [hipe_ppc:mk_pseudo_move(Dst, Src) | Rest]); +move_formals([], Rest) -> + Rest. + +%%% Convert a 'fun' operand (MFA, prim, or temp) + +conv_fun(Fun, Map) -> + case hipe_rtl:is_var(Fun) of + true -> + conv_dst(Fun, Map); + false -> + case hipe_rtl:is_reg(Fun) of + true -> + conv_dst(Fun, Map); + false -> + if is_atom(Fun) -> + {hipe_ppc:mk_prim(Fun), Map}; + true -> + {conv_mfa(Fun), Map} + end + end + end. + +%%% Convert an MFA operand. + +conv_mfa({M,F,A}) when is_atom(M), is_atom(F), is_integer(A) -> + hipe_ppc:mk_mfa(M, F, A). + +%%% Convert an RTL source operand (imm/var/reg). +%%% Returns a temp or a naked integer. + +conv_src(Opnd, Map) -> + case hipe_rtl:is_imm(Opnd) of + true -> + Value = hipe_rtl:imm_value(Opnd), + if is_integer(Value) -> + {Value, Map} + end; + false -> + conv_dst(Opnd, Map) + end. + +conv_src_list([O|Os], Map) -> + {V, Map1} = conv_src(O, Map), + {Vs, Map2} = conv_src_list(Os, Map1), + {[V|Vs], Map2}; +conv_src_list([], Map) -> + {[], Map}. + +%%% Convert an RTL destination operand (var/reg). + +conv_fpreg(Opnd, Map) -> + case hipe_rtl:is_fpreg(Opnd) of + true -> conv_dst(Opnd, Map) + end. + +conv_dst(Opnd, Map) -> + {Name, Type} = + case hipe_rtl:is_var(Opnd) of + true -> + {hipe_rtl:var_index(Opnd), 'tagged'}; + false -> + case hipe_rtl:is_fpreg(Opnd) of + true -> + {hipe_rtl:fpreg_index(Opnd), 'double'}; + false -> + {hipe_rtl:reg_index(Opnd), 'untagged'} + end + end, + IsPrecoloured = + case Type of + 'double' -> hipe_ppc_registers:is_precoloured_fpr(Name); + _ -> hipe_ppc_registers:is_precoloured_gpr(Name) + end, + case IsPrecoloured of + true -> + {hipe_ppc:mk_temp(Name, Type), Map}; + false -> + case vmap_lookup(Map, Opnd) of + {value, NewTemp} -> + {NewTemp, Map}; + _ -> + NewTemp = hipe_ppc:mk_new_temp(Type), + {NewTemp, vmap_bind(Map, Opnd, NewTemp)} + end + end. + +conv_dst_list([O|Os], Map) -> + {Dst, Map1} = conv_dst(O, Map), + {Dsts, Map2} = conv_dst_list(Os, Map1), + {[Dst|Dsts], Map2}; +conv_dst_list([], Map) -> + {[], Map}. + +conv_formals(Os, Map) -> + conv_formals(hipe_ppc_registers:nr_args(), Os, Map, []). + +conv_formals(N, [O|Os], Map, Res) -> + Type = + case hipe_rtl:is_var(O) of + true -> 'tagged'; + _ -> 'untagged' + end, + Dst = + if N > 0 -> hipe_ppc:mk_new_temp(Type); % allocatable + true -> hipe_ppc:mk_new_nonallocatable_temp(Type) + end, + Map1 = vmap_bind(Map, O, Dst), + conv_formals(N-1, Os, Map1, [Dst|Res]); +conv_formals(_, [], Map, Res) -> + {lists:reverse(Res), Map}. + +%%% Create a temp representing the stack pointer register. + +mk_sp() -> + hipe_ppc:mk_temp(hipe_ppc_registers:stack_pointer(), 'untagged'). + +%%% Create a temp representing the return value register. + +mk_rv() -> + hipe_ppc:mk_temp(hipe_ppc_registers:return_value(), 'tagged'). + +%%% new_untagged_temp -- conjure up an untagged scratch reg + +new_untagged_temp() -> + hipe_ppc:mk_new_temp('untagged'). + +%%% new_tagged_temp -- conjure up a tagged scratch reg + +new_tagged_temp() -> + hipe_ppc:mk_new_temp('tagged'). + +%%% Map from RTL var/reg operands to temps. + +vmap_empty() -> + gb_trees:empty(). + +vmap_lookup(Map, Key) -> + gb_trees:lookup(Key, Map). + +vmap_bind(Map, Key, Val) -> + gb_trees:insert(Key, Val, Map). + +word_size() -> + hipe_rtl_arch:word_size(). diff --git a/lib/hipe/prebuild.skip b/lib/hipe/prebuild.skip new file mode 100644 index 0000000000..9c558e357c --- /dev/null +++ b/lib/hipe/prebuild.skip @@ -0,0 +1 @@ +. diff --git a/lib/hipe/regalloc/Makefile b/lib/hipe/regalloc/Makefile new file mode 100644 index 0000000000..5ab70d1837 --- /dev/null +++ b/lib/hipe/regalloc/Makefile @@ -0,0 +1,123 @@ +# +# %CopyrightBegin% +# +# Copyright Ericsson AB 2001-2009. All Rights Reserved. +# +# The contents of this file are subject to the Erlang Public License, +# Version 1.1, (the "License"); you may not use this file except in +# compliance with the License. You should have received a copy of the +# Erlang Public License along with this software. If not, it can be +# retrieved online at http://www.erlang.org/. +# +# Software distributed under the License is distributed on an "AS IS" +# basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +# the License for the specific language governing rights and limitations +# under the License. +# +# %CopyrightEnd% +# + +ifndef EBIN +EBIN = ../ebin +endif + +ifndef DOCS +DOCS = ../doc +endif + +include $(ERL_TOP)/make/target.mk +include $(ERL_TOP)/make/$(TARGET)/otp.mk + +# ---------------------------------------------------- +# Application version +# ---------------------------------------------------- +include ../vsn.mk +VSN=$(HIPE_VSN) + +# ---------------------------------------------------- +# Release directory specification +# ---------------------------------------------------- +RELSYSDIR = $(RELEASE_PATH)/lib/hipe-$(VSN) + +# ---------------------------------------------------- +# Target Specs +# ---------------------------------------------------- +MODULES = hipe_ig hipe_ig_moves hipe_moves \ + hipe_node_sets hipe_spillcost hipe_reg_worklists \ + hipe_adj_list \ + hipe_temp_map \ + hipe_optimistic_regalloc \ + hipe_coalescing_regalloc \ + hipe_graph_coloring_regalloc \ + hipe_regalloc_loop \ + hipe_ls_regalloc \ + hipe_ppc_specific hipe_ppc_specific_fp \ + hipe_sparc_specific hipe_sparc_specific_fp \ + hipe_arm_specific \ + hipe_x86_specific hipe_x86_specific_x87 \ + hipe_amd64_specific hipe_amd64_specific_sse2 hipe_amd64_specific_x87 + +HRL_FILES= +ERL_FILES= $(MODULES:%=%.erl) +TARGET_FILES= $(MODULES:%=$(EBIN)/%.$(EMULATOR)) +DOC_FILES= $(MODULES:%=$(DOCS)/%.html) + +# APP_FILE= +# APP_SRC= $(APP_FILE).src +# APP_TARGET= $(EBIN)/$(APP_FILE) +# +# APPUP_FILE= +# APPUP_SRC= $(APPUP_FILE).src +# APPUP_TARGET= $(EBIN)/$(APPUP_FILE) + +# ---------------------------------------------------- +# FLAGS +# ---------------------------------------------------- + +include ../native.mk + +ERL_COMPILE_FLAGS += +warn_exported_vars# +warn_missing_spec +warn_untyped_record + +# ---------------------------------------------------- +# Targets +# ---------------------------------------------------- + +debug opt: $(TARGET_FILES) + +docs: $(DOC_FILES) + +clean: + rm -f $(TARGET_FILES) + rm -f core + +$(DOCS)/%.html:%.erl + erl -noshell -run edoc_run file '"$<"' '[{dir, "$(DOCS)"}]' -s init stop + +# ---------------------------------------------------- +# Special Build Targets +# ---------------------------------------------------- + + + +# ---------------------------------------------------- +# Release Target +# ---------------------------------------------------- +include $(ERL_TOP)/make/otp_release_targets.mk + +release_spec: opt + $(INSTALL_DIR) $(RELSYSDIR)/ebin + $(INSTALL_DATA) $(TARGET_FILES) $(RELSYSDIR)/ebin + +release_docs_spec: + +$(EBIN)/hipe_amd64_specific.beam: hipe_x86_specific.erl +$(EBIN)/hipe_amd64_specific_x87.beam: hipe_x86_specific_x87.erl +$(EBIN)/hipe_coalescing_regalloc.beam: ../main/hipe.hrl +$(EBIN)/hipe_graph_coloring_regalloc.beam: ../main/hipe.hrl +$(EBIN)/hipe_ig.beam: ../main/hipe.hrl ../flow/cfg.hrl hipe_spillcost.hrl +$(EBIN)/hipe_ig_moves.beam: ../util/hipe_vectors.hrl +$(EBIN)/hipe_ls_regalloc.beam: ../main/hipe.hrl +$(EBIN)/hipe_optimistic_regalloc.beam: ../main/hipe.hrl +$(EBIN)/hipe_regalloc_loop.beam: ../main/hipe.hrl +$(EBIN)/hipe_spillcost.beam: hipe_spillcost.hrl +$(EBIN)/hipe_temp_map.beam: ../main/hipe.hrl diff --git a/lib/hipe/regalloc/hipe_adj_list.erl b/lib/hipe/regalloc/hipe_adj_list.erl new file mode 100644 index 0000000000..b55b22cb22 --- /dev/null +++ b/lib/hipe/regalloc/hipe_adj_list.erl @@ -0,0 +1,143 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2001-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%%---------------------------------------------------------------------- +%% File : hipe_adj_list.erl +%% Author : Andreas Wallin +%% Purpose : Keeps track of adjacency lists for the inference graph. +%% Created : 18 Mar 2000 by Andreas Wallin +%%---------------------------------------------------------------------- + +-module(hipe_adj_list). +-author("Andreas Wallin"). +-export([new/1, + add_edge/3, + %% add_edges/3, + remove_edge/3, + %% remove_edges/3, + edges/2]). + +%%---------------------------------------------------------------------- +%% Function: new +%% +%% Description: Creates an empty structure for adjacency lists +%% +%% Parameters: +%% Max_nodes -- Limit for node numbers +%% +%% Returns: +%% Empty adj_list structure +%% +%%---------------------------------------------------------------------- + +new(Max_nodes) -> + hipe_vectors:new(Max_nodes, []). + +%%---------------------------------------------------------------------- +%% Function: add_edges +%% +%% Description: Adds edges from a node to other nodes +%% +%% Parameters: +%% U -- A node +%% Vs -- Nodes to add edges to +%% Adj_list -- Old adjacency lists +%% +%% Returns: +%% An updated adj_list data-structure +%% +%%---------------------------------------------------------------------- + +%%add_edges(_, [], Adj_list) -> Adj_list; +%%add_edges(U, Vs, Adj_list) when is_list(Vs), is_integer(U) -> +%% hipe_vectors:set(Adj_list, U, ordsets:union(Vs, hipe_vectors:get(Adj_list, U))). + +%%---------------------------------------------------------------------- +%% Function: add_edge +%% +%% Description: Creates an edge between two nodes +%% +%% Parameters: +%% U -- A node +%% V -- Another node +%% Adj_list -- Old adjacency lists +%% +%% Returns: +%% New adj_list data-structure with (U and V connected) +%% +%%---------------------------------------------------------------------- + +add_edge(U, V, Adj_list) -> % PRE: U =/= V, not V \in adjList[U] + hipe_vectors:set(Adj_list, U, + [V | hipe_vectors:get(Adj_list, U)]). + +%%---------------------------------------------------------------------- +%% Function: remove_edges +%% +%% Description: Removes edges from a node to other nodes +%% +%% Parameters: +%% U -- A node +%% Vs -- Nodes to remove edges to +%% Adj_list -- Old adjacency lists +%% +%% Returns: +%% An updated adj_list data-structure +%% +%%---------------------------------------------------------------------- + +%% remove_edges(_, [], Adj_list) -> Adj_list; +remove_edges(U, Vs, Adj_list) when is_list(Vs), is_integer(U) -> + hipe_vectors:set(Adj_list, U, hipe_vectors:get(Adj_list, U) -- Vs). + +%%---------------------------------------------------------------------- +%% Function: remove_edge +%% +%% Description: Removes an edge between two nodes +%% +%% Parameters: +%% U -- A node +%% V -- Another node +%% Adj_list -- Old adjacency lists +%% +%% Returns: +%% New adjacency lists with (U and V not connected) +%% +%%---------------------------------------------------------------------- + +remove_edge(U, U, Adj_list) -> Adj_list; +remove_edge(U, V, Adj_list) when is_integer(U), is_integer(V) -> + remove_edges(U, [V], Adj_list). + +%%---------------------------------------------------------------------- +%% Function: edges +%% +%% Description: Tells where the edges of a node go +%% +%% Parameters: +%% U -- A node +%% Adj_list -- Adjacency lists +%% +%% Returns: +%% The set of nodes connected to U +%% +%%---------------------------------------------------------------------- + +edges(U, Adj_list) -> + hipe_vectors:get(Adj_list, U). diff --git a/lib/hipe/regalloc/hipe_amd64_specific.erl b/lib/hipe/regalloc/hipe_amd64_specific.erl new file mode 100644 index 0000000000..91a8c7253a --- /dev/null +++ b/lib/hipe/regalloc/hipe_amd64_specific.erl @@ -0,0 +1,20 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2004-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +-define(HIPE_AMD64, true). +-include("hipe_x86_specific.erl"). diff --git a/lib/hipe/regalloc/hipe_amd64_specific_sse2.erl b/lib/hipe/regalloc/hipe_amd64_specific_sse2.erl new file mode 100644 index 0000000000..5654455b44 --- /dev/null +++ b/lib/hipe/regalloc/hipe_amd64_specific_sse2.erl @@ -0,0 +1,175 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2004-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +-module(hipe_amd64_specific_sse2). + +-export([number_of_temporaries/1]). + +% The following exports are used as M:F(...) calls from other modules; +%% e.g. hipe_amd64_ra_ls. +-export([analyze/1, + bb/2, + args/1, + labels/1, + livein/2, + liveout/2, + uses/1, + defines/1, + def_use/1, + is_arg/1, %% used by hipe_ls_regalloc + is_move/1, + is_fixed/1, %% used by hipe_graph_coloring_regalloc + is_global/1, + is_precoloured/1, + reg_nr/1, + non_alloc/1, + allocatable/0, + physical_name/1, + all_precoloured/0, + new_spill_index/1, %% used by hipe_ls_regalloc + var_range/1, + breadthorder/1, + postorder/1, + reverse_postorder/1]). + +%% callbacks for hipe_regalloc_loop +-export([defun_to_cfg/1, + check_and_rewrite/2]). + +%%---------------------------------------------------------------------------- + +-include("../flow/cfg.hrl"). + +%%---------------------------------------------------------------------------- + +defun_to_cfg(Defun) -> + hipe_x86_cfg:init(Defun). + +check_and_rewrite(Defun, Coloring) -> + hipe_amd64_ra_sse2_postconditions:check_and_rewrite(Defun, Coloring). + +reverse_postorder(CFG) -> + hipe_x86_cfg:reverse_postorder(CFG). + +breadthorder(CFG) -> + hipe_x86_cfg:breadthorder(CFG). + +postorder(CFG) -> + hipe_x86_cfg:postorder(CFG). + +is_global(_Reg) -> + false. + +is_fixed(_Reg) -> + false. + +is_arg(_Reg) -> + false. + +-spec args(#cfg{}) -> []. +args(_CFG) -> + []. + +non_alloc(_) -> + []. + +%% Liveness stuff + +analyze(CFG) -> + hipe_amd64_liveness:analyze(CFG). + +livein(Liveness, L) -> + [X || X <- hipe_amd64_liveness:livein(Liveness, L), + hipe_x86:temp_is_allocatable(X), + hipe_x86:temp_type(X) =:= 'double']. + +liveout(BB_in_out_liveness, Label) -> + [X || X <- hipe_amd64_liveness:liveout(BB_in_out_liveness, Label), + hipe_x86:temp_is_allocatable(X), + hipe_x86:temp_type(X) =:= 'double']. + +%% Registers stuff + +allocatable() -> + hipe_amd64_registers:allocatable_sse2(). + +all_precoloured() -> + allocatable(). + +is_precoloured(Reg) -> + lists:member(Reg,all_precoloured()). + +physical_name(Reg) -> + Reg. + +%% CFG stuff + +labels(CFG) -> + hipe_x86_cfg:labels(CFG). + +var_range(_CFG) -> + hipe_gensym:var_range(x86). + +-spec number_of_temporaries(#cfg{}) -> non_neg_integer(). +number_of_temporaries(_CFG) -> + Highest_temporary = hipe_gensym:get_var(x86), + %% Since we can have temps from 0 to Max adjust by +1. + Highest_temporary + 1. + +bb(CFG, L) -> + hipe_x86_cfg:bb(CFG, L). + +%% AMD64 stuff + +def_use(Instruction) -> + {[X || X <- hipe_amd64_defuse:insn_def(Instruction), + hipe_x86:temp_is_allocatable(X), + hipe_x86:temp_type(X) =:= 'double'], + [X || X <- hipe_amd64_defuse:insn_use(Instruction), + hipe_x86:temp_is_allocatable(X), + hipe_x86:temp_type(X) =:= 'double'] + }. + +uses(I) -> + [X || X <- hipe_amd64_defuse:insn_use(I), + hipe_x86:temp_is_allocatable(X), + hipe_x86:temp_type(X) =:= 'double']. + +defines(I) -> + [X || X <- hipe_amd64_defuse:insn_def(I), + hipe_x86:temp_is_allocatable(X), + hipe_x86:temp_type(X) =:= 'double']. + +is_move(Instruction) -> + case hipe_x86:is_fmove(Instruction) of + true -> + Src = hipe_x86:fmove_src(Instruction), + Dst = hipe_x86:fmove_dst(Instruction), + hipe_x86:is_temp(Src) andalso hipe_x86:temp_is_allocatable(Src) + andalso hipe_x86:is_temp(Dst) andalso hipe_x86:temp_is_allocatable(Dst); + false -> false + end. + +reg_nr(Reg) -> + hipe_x86:temp_reg(Reg). + +-spec new_spill_index(non_neg_integer()) -> pos_integer(). +new_spill_index(SpillIndex) when is_integer(SpillIndex) -> + SpillIndex + 1. diff --git a/lib/hipe/regalloc/hipe_amd64_specific_x87.erl b/lib/hipe/regalloc/hipe_amd64_specific_x87.erl new file mode 100644 index 0000000000..b5e8253ae1 --- /dev/null +++ b/lib/hipe/regalloc/hipe_amd64_specific_x87.erl @@ -0,0 +1,20 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2004-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +-define(HIPE_AMD64, true). +-include("hipe_x86_specific_x87.erl"). diff --git a/lib/hipe/regalloc/hipe_arm_specific.erl b/lib/hipe/regalloc/hipe_arm_specific.erl new file mode 100644 index 0000000000..246095e926 --- /dev/null +++ b/lib/hipe/regalloc/hipe_arm_specific.erl @@ -0,0 +1,168 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2005-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +-module(hipe_arm_specific). + +%% for hipe_coalescing_regalloc: +-export([number_of_temporaries/1 + ,analyze/1 + ,labels/1 + ,all_precoloured/0 + ,bb/2 + ,liveout/2 + ,reg_nr/1 + ,def_use/1 + ,is_move/1 + ,is_precoloured/1 + ,var_range/1 + ,allocatable/0 + ,non_alloc/1 + ,physical_name/1 + ,reverse_postorder/1 + ,livein/2 + ,uses/1 + ,defines/1 + ]). + +%% for hipe_graph_coloring_regalloc: +-export([is_fixed/1]). + +%% for hipe_ls_regalloc: +-export([args/1, is_arg/1, is_global/1, new_spill_index/1]). +-export([breadthorder/1, postorder/1]). + +%% callbacks for hipe_regalloc_loop +-export([defun_to_cfg/1, + check_and_rewrite/2]). + +defun_to_cfg(Defun) -> + hipe_arm_cfg:init(Defun). + +check_and_rewrite(Defun, Coloring) -> + hipe_arm_ra_postconditions:check_and_rewrite(Defun, Coloring, 'normal'). + +reverse_postorder(CFG) -> + hipe_arm_cfg:reverse_postorder(CFG). + +non_alloc(CFG) -> + non_alloc(hipe_arm_registers:nr_args(), hipe_arm_cfg:params(CFG)). + +%% same as hipe_arm_frame:fix_formals/2 +non_alloc(0, Rest) -> Rest; +non_alloc(N, [_|Rest]) -> non_alloc(N-1, Rest); +non_alloc(_, []) -> []. + +%% Liveness stuff + +analyze(CFG) -> + hipe_arm_liveness_gpr:analyse(CFG). + +livein(Liveness,L) -> + [X || X <- hipe_arm_liveness_gpr:livein(Liveness,L), + hipe_arm:temp_is_allocatable(X)]. + +liveout(BB_in_out_liveness,Label) -> + [X || X <- hipe_arm_liveness_gpr:liveout(BB_in_out_liveness,Label), + hipe_arm:temp_is_allocatable(X)]. + +%% Registers stuff + +allocatable() -> + hipe_arm_registers:allocatable_gpr(). + +all_precoloured() -> + hipe_arm_registers:all_precoloured(). + +is_precoloured(Reg) -> + hipe_arm_registers:is_precoloured_gpr(Reg). + +is_fixed(R) -> + hipe_arm_registers:is_fixed(R). + +physical_name(Reg) -> + Reg. + +%% CFG stuff + +labels(CFG) -> + hipe_arm_cfg:labels(CFG). + +var_range(_CFG) -> + hipe_gensym:var_range(arm). + +number_of_temporaries(_CFG) -> + Highest_temporary = hipe_gensym:get_var(arm), + %% Since we can have temps from 0 to Max adjust by +1. + Highest_temporary + 1. + +bb(CFG,L) -> + hipe_arm_cfg:bb(CFG,L). + +%% ARM stuff + +def_use(Instruction) -> + {defines(Instruction), uses(Instruction)}. + +uses(I) -> + [X || X <- hipe_arm_defuse:insn_use_gpr(I), + hipe_arm:temp_is_allocatable(X)]. + +defines(I) -> + [X || X <- hipe_arm_defuse:insn_def_gpr(I), + hipe_arm:temp_is_allocatable(X)]. + +is_move(Instruction) -> + case hipe_arm:is_pseudo_move(Instruction) of + true -> + Dst = hipe_arm:pseudo_move_dst(Instruction), + case hipe_arm:temp_is_allocatable(Dst) of + false -> false; + _ -> + Src = hipe_arm:pseudo_move_src(Instruction), + hipe_arm:temp_is_allocatable(Src) + end; + false -> false + end. + +reg_nr(Reg) -> + hipe_arm:temp_reg(Reg). + +%%% Linear Scan stuff + +new_spill_index(SpillIndex) when is_integer(SpillIndex) -> + SpillIndex+1. + +breadthorder(CFG) -> + hipe_arm_cfg:breadthorder(CFG). + +postorder(CFG) -> + hipe_arm_cfg:postorder(CFG). + +is_global(R) -> + R =:= hipe_arm_registers:temp1() orelse + R =:= hipe_arm_registers:temp2() orelse + R =:= hipe_arm_registers:temp3() orelse + hipe_arm_registers:is_fixed(R). + +is_arg(R) -> + hipe_arm_registers:is_arg(R). + +args(CFG) -> + hipe_arm_registers:args(hipe_arm_cfg:arity(CFG)). diff --git a/lib/hipe/regalloc/hipe_coalescing_regalloc.erl b/lib/hipe/regalloc/hipe_coalescing_regalloc.erl new file mode 100644 index 0000000000..5a4b017c71 --- /dev/null +++ b/lib/hipe/regalloc/hipe_coalescing_regalloc.erl @@ -0,0 +1,1029 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2001-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%%----------------------------------------------------------------------- +%% File : hipe_coalescing_regalloc.erl +%% Authors : Andreas Wallin +%% Thorild Selén +%% Ingemar Åberg +%% Purpose : Play paintball with registers on a target machine. We win +%% if they are all colored. This is an iterated coalescing +%% register allocator. +%% Created : 4 Mar 2000 +%%----------------------------------------------------------------------- + +-module(hipe_coalescing_regalloc). +-export([regalloc/5]). + +%%-ifndef(DEBUG). +%%-define(DEBUG,true). +%%-endif. +-include("../main/hipe.hrl"). + +%%----------------------------------------------------------------------- +%% Function: regalloc +%% +%% Description: Creates a K coloring for a function. +%% Parameters: +%% CFG -- A control flow graph +%% SpillIndex -- Last index of spill variable +%% SpillLimit -- Temporaries with numbers higher than this have +%% infinite spill cost. +%% Consider changing this to a set. +%% Target -- The module containing the target-specific functions. +%% +%% Returns: +%% Coloring -- A coloring for specified CFG +%% SpillIndex0 -- A new spill index +%%----------------------------------------------------------------------- + +regalloc(CFG, SpillIndex, SpillLimit, Target, _Options) -> + %% Build interference graph + ?debug_msg("Build IG\n", []), + IG = hipe_ig:build(CFG, Target), + %% io:format("IG: ~p\n", [IG]), + + ?debug_msg("Init\n", []), + Num_Temps = Target:number_of_temporaries(CFG), + ?debug_msg("Coalescing RA: num_temps = ~p~n", [Num_Temps]), + Allocatable = Target:allocatable(), + K = length(Allocatable), + All_colors = colset_from_list(Allocatable), + + %% Add registers with their own coloring + ?debug_msg("Moves\n", []), + Move_sets = hipe_moves:new(IG), + + ?debug_msg("Build Worklist\n", []), + Worklists = hipe_reg_worklists:new(IG, Target, CFG, Move_sets, K, Num_Temps), + Alias = initAlias(Num_Temps), + + ?debug_msg("Do coloring\n~p~n", [Worklists]), + {_IG0, Worklists0, _Moves0, Alias0} = + do_coloring(IG, Worklists, Move_sets, Alias, K, SpillLimit, Target), + %% io:format("SelStk0 ~w\n",[SelStk0]), + ?debug_msg("Init node sets\n", []), + Node_sets = hipe_node_sets:new(), + %% io:format("NodeSet: ~w\n NonAlloc ~w\n",[Node_sets,Target:non_alloc(CFG)]), + ?debug_msg("Default coloring\n", []), + {Color0,Node_sets1} = + defaultColoring(Target:all_precoloured(), + initColor(Num_Temps), Node_sets, Target), + + ?debug_msg("Assign colors\n", []), + {Color1,Node_sets2} = + assignColors(hipe_reg_worklists:stack(Worklists0), Node_sets1, Color0, + Alias0, All_colors, Target), + %% io:format("color0:~w\nColor1:~w\nNodes:~w\nNodes2:~w\nNum_Temps:~w\n",[Color0,Color1,Node_sets,Node_sets2,Num_Temps]), + + ?debug_msg("Build mapping ~p\n", [Node_sets2]), + Coloring = build_namelist(Node_sets2, SpillIndex, Alias0, Color1), + ?debug_msg("Coloring ~p\n", [Coloring]), + Coloring. + +%%---------------------------------------------------------------------- +%% Function: do_coloring +%% +%% Description: Create a coloring. That is, play paintball. +%% Parameters: +%% IG -- An interference graph +%% Worklists -- Worklists, that is simplify, spill and freeze +%% Moves -- Moves sets, that is coalesced, constrained +%% and so on. +%% Alias -- Tells if two temporaries can have their value +%% in the same register. +%% K -- Want to create a K coloring. +%% SpillLimit -- Try not to spill nodes that are above the spill limit. +%% +%% Returns: +%% IG -- Updated interference graph +%% Worklists -- Updated Worklists structure +%% Moves -- Updated Moves structure +%% Alias -- Updates Alias structure +%% +%%---------------------------------------------------------------------- + +do_coloring(IG, Worklists, Moves, Alias, K, SpillLimit, Target) -> + Simplify = not(hipe_reg_worklists:is_empty_simplify(Worklists)), + Coalesce = not(hipe_moves:is_empty_worklist(Moves)), + Freeze = not(hipe_reg_worklists:is_empty_freeze(Worklists)), + Spill = not(hipe_reg_worklists:is_empty_spill(Worklists)), + if Simplify =:= true -> + {IG0, Worklists0, Moves0} = + simplify(hipe_reg_worklists:simplify(Worklists), + IG, + Worklists, + Moves, + K), + do_coloring(IG0, Worklists0, Moves0, Alias, K, SpillLimit, Target); + Coalesce =:= true -> + {Moves0, IG0, Worklists0, Alias0} = + coalesce(Moves, IG, Worklists, Alias, K, Target), + do_coloring(IG0, Worklists0, Moves0, Alias0, K, SpillLimit, Target); + Freeze =:= true -> + {Worklists0,Moves0} = + freeze(K, Worklists, Moves, IG, Alias), + do_coloring(IG, Worklists0, Moves0, Alias, + K, SpillLimit, Target); + Spill =:= true -> + {Worklists0, Moves0} = + selectSpill(Worklists, Moves, IG, K, Alias, SpillLimit), + do_coloring(IG, Worklists0, Moves0, Alias, K, SpillLimit, Target); + true -> % Catchall case + {IG, Worklists, Moves, Alias} + end. + +%%---------------------------------------------------------------------- +%% Function: adjacent +%% +%% Description: Adjacent nodes that's not coalesced, on the stack or +%% precoloured. +%% Parameters: +%% Node -- Node that you want to adjacents of +%% IG -- The interference graph +%% +%% Returns: +%% A set with nodes/temporaries that are not coalesced, on the +%% stack or precoloured. +%%---------------------------------------------------------------------- + +adjacent(Node, IG, Worklists) -> + Adjacent_edges = hipe_ig:node_adj_list(Node, IG), + hipe_reg_worklists:non_stacked_or_coalesced_nodes(Adjacent_edges, Worklists). + +%%---------------------------------------------------------------------- +%% Function: simplify +%% +%% Description: Simplify graph by removing nodes of low degree. This +%% function simplifies all nodes it can at once. +%% Parameters: +%% [Node|Nodes] -- The simplify worklist +%% IG -- The interference graph +%% Worklists -- The worklists data-structure +%% Moves -- The moves data-structure +%% K -- Produce a K coloring +%% +%% Returns: +%% IG -- An updated interference graph +%% Worklists -- An updated worklists data-structure +%% Moves -- An updated moves data-structure +%%---------------------------------------------------------------------- + +simplify([], IG, Worklists, Moves, _K) -> + {IG, Worklists, Moves}; +simplify([Node|Nodes], IG, Worklists, Moves, K) -> + Worklists0 = hipe_reg_worklists:remove_simplify(Node, Worklists), + ?debug_msg("putting ~w on stack~n",[Node]), + Adjacent = adjacent(Node, IG, Worklists0), + Worklists01 = hipe_reg_worklists:push_stack(Node, Adjacent, Worklists0), + {New_ig, Worklists1, New_moves} = + decrement_degree(Adjacent, IG, Worklists01, Moves, K), + simplify(Nodes, New_ig, Worklists1, New_moves, K). + +%%---------------------------------------------------------------------- +%% Function: decrement_degree +%% +%% Description: Decrement the degree on a number of nodes/temporaries. +%% Parameters: +%% [Node|Nodes] -- Decrement degree on these nodes +%% IG -- The interference graph +%% Worklists -- The Worklists data structure +%% Moves -- The Moves data structure. +%% K -- We want to create a coloring with K colors +%% +%% Returns: +%% IG -- An updated interference graph (the degrees) +%% Worklists -- Updated Worklists. Changed if one degree goes +%% down to K. +%% Moves -- Updated Moves. Changed if a move related temporary +%% gets degree K. +%%---------------------------------------------------------------------- + +decrement_degree([], IG, Worklists, Moves, _K) -> + {IG, Worklists, Moves}; +decrement_degree([Node|Nodes], IG, Worklists, Moves, K) -> + PrevDegree = hipe_ig:get_node_degree(Node, IG), + IG0 = hipe_ig:dec_node_degree(Node, IG), + if PrevDegree =:= K -> + AdjList = hipe_ig:node_adj_list(Node, IG0), + %% Ok since Node (a) is still in IG, and (b) cannot be adjacent to itself + Moves00 = enable_moves_active_to_worklist(hipe_moves:node_movelist(Node, Moves), + Moves), + Moves0 = enable_moves(AdjList, Worklists, Moves00), + Worklists0 = hipe_reg_worklists:remove_spill(Node, Worklists), + case hipe_moves:move_related(Node, Moves0) of + true -> + Worklists1 = hipe_reg_worklists:add_freeze(Node, Worklists0), + decrement_degree(Nodes, IG0, Worklists1, Moves0, K); + _ -> + Worklists1 = hipe_reg_worklists:add_simplify(Node, Worklists0), + decrement_degree(Nodes, IG0, Worklists1, Moves0, K) + end; + true -> + decrement_degree(Nodes, IG0, Worklists, Moves, K) + end. + +%%---------------------------------------------------------------------- +%% Function: enable_moves +%% +%% Description: Make (move-related) nodes that are not yet considered for +%% coalescing, ready for possible coalescing. +%% +%% Parameters: +%% [Node|Nodes] -- A list of move nodes +%% Moves -- The moves data-structure +%% +%% Returns: +%% An updated moves data-structure +%%---------------------------------------------------------------------- + +enable_moves([], _Worklists, Moves) -> Moves; +enable_moves([Node|Nodes], Worklists, Moves) -> + case hipe_reg_worklists:member_stack_or_coalesced(Node, Worklists) of + true -> enable_moves(Nodes, Worklists, Moves); + _ -> + %% moveList[n] suffices since we're checking for activeMoves membership + Node_moves = hipe_moves:node_movelist(Node, Moves), + New_moves = enable_moves_active_to_worklist(Node_moves, Moves), + enable_moves(Nodes, Worklists, New_moves) + end. + +%%---------------------------------------------------------------------- +%% Function: enable_moves_active_to_worklist +%% +%% Description: Make (move-related) nodes that are not yet considered for +%% coalescing, ready for possible coalescing. +%% +%% Parameters: +%% [Node|Nodes] -- A list of move nodes +%% Moves -- The moves data structure +%% +%% Returns: +%% An updated moves data structure +%%---------------------------------------------------------------------- + +enable_moves_active_to_worklist([], Moves) -> Moves; +enable_moves_active_to_worklist([Node|Nodes], Moves) -> + NewMoves = + case hipe_moves:member_active(Node, Moves) of + true -> + hipe_moves:add_worklist(Node, hipe_moves:remove_active(Node, Moves)); + _ -> + Moves + end, + enable_moves_active_to_worklist(Nodes, NewMoves). + +%% Build the namelists, these functions are fast hacks, they use knowledge +%% about data representation that they shouldn't know, bad abstraction. + +build_namelist(NodeSets, Index, Alias, Color) -> + ?debug_msg("Building mapping\n",[]), + ?debug_msg("Vector to list\n",[]), + AliasList = build_alias_list(aliasToList(Alias), + 0, %% The first temporary has index 0 + []), %% Accumulator + ?debug_msg("Alias list:~p\n",[AliasList]), + ?debug_msg("Coalesced\n",[]), + NL1 = build_coalescedlist(AliasList, Color, Alias, []), + ?debug_msg("Coalesced list:~p\n",[NL1]), + ?debug_msg("Regs\n",[]), + NL2 = build_reglist(hipe_node_sets:colored(NodeSets), Color, NL1), + ?debug_msg("Regs list:~p\n",[NL2]), + ?debug_msg("Spills\n",[]), + build_spillist(hipe_node_sets:spilled(NodeSets), Index, NL2). + +build_spillist([], Index, List) -> + {List,Index}; +build_spillist([Node|Nodes], Index, List) -> + ?debug_msg("[~p]: Spill ~p to ~p\n", [?MODULE,Node,Index]), + build_spillist(Nodes, Index+1, [{Node,{spill,Index}}|List]). + +build_coalescedlist([], _Color, _Alias, List) -> + List; +build_coalescedlist([Node|Ns], Color, Alias, List) when is_integer(Node) -> + ?debug_msg("Alias of ~p is ~p~n", [Node, getAlias(Node,Alias)]), + AC = getColor(getAlias(Node, Alias), Color), + build_coalescedlist(Ns, Color, Alias, [{Node,{reg,AC}}|List]). + +build_reglist([], _Color, List) -> + List; +build_reglist([Node|Ns], Color, List) -> + build_reglist(Ns, Color, [{Node,{reg,getColor(Node,Color)}}|List]). + +build_alias_list([], _I, List) -> + List; +build_alias_list([Alias|Aliases], I, List) when is_integer(Alias) -> + build_alias_list(Aliases, I+1, [I|List]); +build_alias_list([_Alias|Aliases], I, List) -> + build_alias_list(Aliases, I+1, List). + +%%---------------------------------------------------------------------- +%% Function: assignColors +%% +%% Description: Tries to assign colors to nodes in a stack. +%% Parameters: +%% Stack -- The SelectStack built by the Select function, +%% this stack contains tuples in the form {Node,Edges} +%% where Node is the Node number and Edges is an ordset +%% containing the numbers of all the adjacent nodes. +%% NodeSets -- This is a record containing all the different node +%% sets that are used in the register allocator. +%% Alias -- This is a mapping from nodes to nodes, if a node has +%% been coalesced this mapping shows the alias for that +%% node. +%% AllColors -- This is an ordset containing all the available colors +%% +%% Target -- The module containing the target-specific functions. +%% +%% Returns: +%% Color -- A mapping from nodes to their respective color. +%% NodeSets -- The updated node sets. +%%---------------------------------------------------------------------- + +assignColors(Stack, NodeSets, Color, Alias, AllColors, Target) -> + case Stack of + [] -> + {Color,NodeSets}; + [{Node,Edges}|Stack1] -> + ?debug_msg("Coloring Node: ~p~n",[Node]), + ?IF_DEBUG(lists:foreach(fun (_E) -> + ?msg(" Edge ~w-><~w>->~w~n", + begin A = getAlias(_E,Alias), + [_E,A,getColor(A,Color)] + end) + end, Edges), + []), + %% When debugging, check that Node isn't precoloured. + OkColors = findOkColors(Edges, AllColors, Color, Alias), + case colset_is_empty(OkColors) of + true -> % Spill case + NodeSets1 = hipe_node_sets:add_spilled(Node, NodeSets), + assignColors(Stack1, NodeSets1, Color, Alias, AllColors, Target); + false -> % Colour case + Col = colset_smallest(OkColors), + NodeSets1 = hipe_node_sets:add_colored(Node, NodeSets), + Color1 = setColor(Node, Target:physical_name(Col), Color), + assignColors(Stack1, NodeSets1, Color1, Alias, AllColors, Target) + end + end. + +%%--------------------------------------------------------------------- +%% Function: defaultColoring +%% +%% Description: Make the default coloring +%% Parameters: +%% Regs -- The list of registers to be default colored +%% Color -- The color mapping that shall be changed +%% NodeSets -- The node sets that shall be updated +%% Target -- The module containing the target-specific functions. +%% +%% Returns: +%% NewColor -- The updated color mapping +%% NewNodeSets -- The updated node sets +%%--------------------------------------------------------------------- + +defaultColoring([], Color, NodeSets, _Target) -> + {Color,NodeSets}; +defaultColoring([Reg|Regs], Color, NodeSets, Target) -> + Color1 = setColor(Reg,Target:physical_name(Reg), Color), + NodeSets1 = hipe_node_sets:add_colored(Reg, NodeSets), + defaultColoring(Regs, Color1, NodeSets1, Target). + +%% Find the colors that are OK for a node with certain edges. + +findOkColors(Edges, AllColors, Color, Alias) -> + find(Edges, AllColors, Color, Alias). + +%% Find all the colors of the nodes in the list [Node|Nodes] and remove them +%% from the set OkColors, when the list is empty, return OkColors. + +find([], OkColors, _Color, _Alias) -> + OkColors; +find([Node0|Nodes], OkColors, Color, Alias) -> + Node = getAlias(Node0, Alias), + case getColor(Node, Color) of + [] -> + find(Nodes, OkColors, Color, Alias); + Col -> + OkColors1 = colset_del_element(Col, OkColors), + find(Nodes, OkColors1, Color, Alias) + end. + +%%% +%%% ColSet -- ADT for the set of available colours while +%%% assigning colours. +%%% +-ifdef(notdef). % old ordsets-based implementation +colset_from_list(Allocatable) -> + ordsets:from_list(Allocatable). + +colset_del_element(Colour, ColSet) -> + ordsets:del_element(Colour, ColSet). + +colset_is_empty(ColSet) -> + case ColSet of + [] -> true; + [_|_] -> false + end. + +colset_smallest([Colour|_]) -> + Colour. +-endif. + +-ifdef(notdef). % new gb_sets-based implementation +colset_from_list(Allocatable) -> + gb_sets:from_list(Allocatable). + +colset_del_element(Colour, ColSet) -> + %% Must use gb_sets:delete_any/2 since gb_sets:del_element/2 + %% fails if the element isn't present. Bummer. + gb_sets:delete_any(Colour, ColSet). + +colset_is_empty(ColSet) -> + gb_sets:is_empty(ColSet). + +colset_smallest(ColSet) -> + gb_sets:smallest(ColSet). +-endif. + +%%-ifdef(notdef). % new bitmask-based implementation +colset_from_list(Allocatable) -> + colset_from_list(Allocatable, 0). + +colset_from_list([], ColSet) -> + ColSet; +colset_from_list([Colour|Allocatable], ColSet) -> + colset_from_list(Allocatable, ColSet bor (1 bsl Colour)). + +colset_del_element(Colour, ColSet) -> + ColSet band bnot(1 bsl Colour). + +colset_is_empty(0) -> true; +colset_is_empty(_) -> false. + +colset_smallest(ColSet) -> + bitN_log2(ColSet band -ColSet, 0). + +bitN_log2(BitN, ShiftN) -> + if BitN > 16#ffff -> + bitN_log2(BitN bsr 16, ShiftN + 16); + true -> + ShiftN + hweight16(BitN - 1) + end. + +hweight16(W) -> + Res1 = ( W band 16#5555) + (( W bsr 1) band 16#5555), + Res2 = (Res1 band 16#3333) + ((Res1 bsr 2) band 16#3333), + Res3 = (Res2 band 16#0F0F) + ((Res2 bsr 4) band 16#0F0F), + (Res3 band 16#00FF) + ((Res3 bsr 8) band 16#00FF). +%%-endif. + +%%% +%%% Colour ADT providing a partial mapping from nodes to colours. +%%% + +initColor(NrNodes) -> + {colmap, hipe_bifs:array(NrNodes, [])}. + +getColor(Node, {colmap, ColMap}) -> + hipe_bifs:array_sub(ColMap, Node). + +setColor(Node, Colour, {colmap, ColMap} = Col) -> + hipe_bifs:array_update(ColMap, Node, Colour), + Col. + +%%% +%%% Alias ADT providing a partial mapping from nodes to nodes. +%%% + +initAlias(NrNodes) -> + {alias, hipe_bifs:array(NrNodes, [])}. + +getAlias(Node, {alias, AliasMap} = Alias) -> + case hipe_bifs:array_sub(AliasMap, Node) of + [] -> + Node; + AliasNode -> + getAlias(AliasNode, Alias) + end. + +setAlias(Node, AliasNode, {alias, AliasMap} = Alias) -> + hipe_bifs:array_update(AliasMap, Node, AliasNode), + Alias. + +aliasToList({alias,AliasMap}) -> + aliasToList(AliasMap, hipe_bifs:array_length(AliasMap), []). + +aliasToList(AliasMap, I1, Tail) -> + I0 = I1 - 1, + if I0 >= 0 -> + aliasToList(AliasMap, I0, [hipe_bifs:array_sub(AliasMap, I0)|Tail]); + true -> + Tail + end. + +%%---------------------------------------------------------------------- +%% Function: coalesce +%% +%% Description: Coalesces nodes in worklist +%% Parameters: +%% Moves -- Current move information +%% IG -- Interference graph +%% Worklists -- Current worklists +%% Alias -- Current aliases for temporaries +%% K -- Number of registers +%% +%% Returns: +%% {Moves, IG, Worklists, Alias} +%% (Updated versions of above structures, after coalescing) +%%---------------------------------------------------------------------- + +coalesce(Moves, IG, Worklists, Alias, K, Target) -> + case hipe_moves:worklist_get_and_remove(Moves) of + {[],Moves0} -> + %% Moves marked for removal from worklistMoves by FreezeMoves() + %% are removed by worklist_get_and_remove(). This case is unlikely, + %% but can occur if only stale moves remain in worklistMoves. + {Moves0,IG,Worklists,Alias}; + {Move,Moves0} -> + {Dest,Source} = hipe_moves:get_move(Move, Moves0), + ?debug_msg("Testing nodes ~p and ~p for coalescing~n",[Dest,Source]), + Alias_src = getAlias(Source, Alias), + Alias_dst = getAlias(Dest, Alias), + {U,V} = case Target:is_precoloured(Alias_dst) of + true -> {Alias_dst, Alias_src}; + false -> {Alias_src, Alias_dst} + end, + %% When debugging, check that neither V nor U is on the stack. + if U =:= V -> + Moves1 = Moves0, % drop coalesced move Move + Worklists1 = add_worklist(Worklists, U, K, Moves1, IG, Target), + {Moves1, IG, Worklists1, Alias}; + true -> + case (Target:is_precoloured(V) orelse + hipe_ig:nodes_are_adjacent(U, V, IG)) of + true -> + Moves1 = Moves0, % drop constrained move Move + Worklists1 = add_worklist(Worklists, U, K, Moves1, IG, Target), + Worklists2 = add_worklist(Worklists1, V, K, Moves1, IG, Target), + {Moves1, IG, Worklists2, Alias}; + false -> + case (case Target:is_precoloured(U) of + true -> + AdjV = hipe_ig:node_adj_list(V, IG), + all_adjacent_ok(AdjV, U, Worklists, IG, K, Target); + false -> + AdjV = hipe_ig:node_adj_list(V, IG), + AdjU = hipe_ig:node_adj_list(U, IG), + conservative(AdjU, AdjV, U, Worklists, IG, K) + end) of + true -> + Moves1 = Moves0, % drop coalesced move Move + {IG1,Worklists1,Moves2,Alias1} = + combine(U, V, IG, Worklists, Moves1, Alias, K, Target), + Worklists2 = add_worklist(Worklists1, U, K, Moves2, IG1, Target), + {Moves2, IG1, Worklists2, Alias1}; + false -> + Moves1 = hipe_moves:add_active(Move, Moves0), + {Moves1, IG, Worklists, Alias} + end + end + end + end. + +%%---------------------------------------------------------------------- +%% Function: add_worklist +%% +%% Description: Builds new worklists where U is transferred from freeze +%% to simplify, if possible +%% +%% Parameters: +%% Worklists -- Current worklists +%% U -- Node to operate on +%% K -- Number of registers +%% Moves -- Current move information +%% IG -- Interference graph +%% Target -- The containing the target-specific functions +%% +%% Returns: +%% Worklists (updated) +%%---------------------------------------------------------------------- + +add_worklist(Worklists, U, K, Moves, IG, Target) -> + case (not(Target:is_precoloured(U)) + andalso not(hipe_moves:move_related(U, Moves)) + andalso (hipe_ig:is_trivially_colourable(U, K, IG))) of + true -> + hipe_reg_worklists:transfer_freeze_simplify(U, Worklists); + false -> + Worklists + end. + +%%---------------------------------------------------------------------- +%% Function: combine +%% +%% Description: Combines two nodes into one (used when coalescing) +%% +%% Parameters: +%% U -- First node to operate on +%% V -- Second node to operate on +%% IG -- Interference graph +%% Worklists -- Current worklists +%% Moves -- Current move information +%% Alias -- Current aliases for temporaries +%% K -- Number of registers +%% +%% Returns: +%% {IG, Worklists, Moves, Alias} (updated) +%%---------------------------------------------------------------------- + +combine(U, V, IG, Worklists, Moves, Alias, K, Target) -> + Worklists1 = case hipe_reg_worklists:member_freeze(V, Worklists) of + true -> hipe_reg_worklists:remove_freeze(V, Worklists); + false -> hipe_reg_worklists:remove_spill(V, Worklists) + end, + Worklists11 = hipe_reg_worklists:add_coalesced(V, Worklists1), + + ?debug_msg("Coalescing ~p and ~p to ~p~n",[V,U,U]), + + Alias1 = setAlias(V, U, Alias), + + %% Typo in published algorithm: s/nodeMoves/moveList/g to fix. + %% XXX: moveList[u] \union moveList[v] OR NodeMoves(u) \union NodeMoves(v) ??? + %% XXX: NodeMoves() is correct, but unnecessarily strict. The ordsets:union + %% constrains NodeMoves() to return an ordset. + Moves1 = hipe_moves:update_movelist(U, + ordsets:union(hipe_moves:node_moves(U, Moves), + hipe_moves:node_moves(V, Moves)), + Moves), + %% Missing in published algorithm. From Tiger book Errata. + Moves2 = enable_moves_active_to_worklist(hipe_moves:node_movelist(V, Moves1), Moves1), + AdjV = hipe_ig:node_adj_list(V, IG), + + {IG1, Worklists2, Moves3} = + combine_edges(AdjV, U, IG, Worklists11, Moves2, K, Target), + + New_worklists = case (not(hipe_ig:is_trivially_colourable(U, K, IG1)) + andalso + hipe_reg_worklists:member_freeze(U, Worklists2)) of + true -> + hipe_reg_worklists:transfer_freeze_spill(U, Worklists2); + false -> Worklists2 + end, + {IG1, New_worklists, Moves3, Alias1}. + +%%---------------------------------------------------------------------- +%% Function: combine_edges +%% +%% Description: For each node in a list, make an edge between that node +%% and node U, and decrement its degree by 1 +%% (Used when two nodes are coalesced, to connect all nodes +%% adjacent to one node to the other node) +%% +%% Parameters: +%% [T|Ts] -- List of nodes to make edges to +%% U -- Node to make edges from +%% IG -- Interference graph +%% Worklists -- Current worklists +%% Moves -- Current move information +%% K -- Number of registers +%% +%% Returns: +%% {IG, Worklists, Moves} (updated) +%%---------------------------------------------------------------------- + +combine_edges([], _U, IG, Worklists, Moves, _K, _Target) -> + {IG, Worklists, Moves}; +combine_edges([T|Ts], U, IG, Worklists, Moves, K, Target) -> + case hipe_reg_worklists:member_stack_or_coalesced(T, Worklists) of + true -> combine_edges(Ts, U, IG, Worklists, Moves, K, Target); + _ -> + %% XXX: The issue below occurs because the T->V edge isn't removed. + %% This causes adjList[T] to contain stale entries, to possibly grow + %% (if T isn't already adjacent to U), and degree[T] to possibly + %% increase (again, if T isn't already adjacent to U). + %% The decrement_degree() call repairs degree[T] but not adjList[T]. + %% It would be better to physically replace T->V with T->U, and only + %% decrement_degree(T) if T->U already existed. + %% + %% add_edge() may change a low-degree move-related node to be of + %% significant degree. In this case the node belongs in the spill + %% worklist, and that's where decrement_degree() expects to find it. + %% This issue is not covered in the published algorithm. + OldDegree = hipe_ig:get_node_degree(T, IG), + IG1 = hipe_ig:add_edge(T, U, IG, Target), + NewDegree = hipe_ig:get_node_degree(T, IG1), + Worklists0 = + if NewDegree =:= K, OldDegree =:= K-1 -> + %% io:format("~w:combine_edges(): repairing worklist membership for node ~w\n", [?MODULE,T]), + %% The node T must be on the freeze worklist: + %% 1. Since we're coalescing, the simplify worklist must have been + %% empty when combine_edges() started. + %% 2. decrement_degree() may put the node T back on the simplify + %% worklist, but that occurs after the worklists repair step. + %% 3. There are no duplicates among the edges. + Worklists00 = hipe_reg_worklists:remove_freeze(T, Worklists), + hipe_reg_worklists:add_spill(T, Worklists00); + true -> + Worklists + end, + {IG2, Worklists1, Moves1} = + decrement_degree([T], IG1, Worklists0, Moves, K), + combine_edges(Ts, U, IG2, Worklists1, Moves1, K, Target) + end. + +%%---------------------------------------------------------------------- +%% Function: ok +%% +%% Description: Checks if a node T is suitable to coalesce with R +%% +%% Parameters: +%% T -- Node to test +%% R -- Other node to test +%% IG -- Interference graph +%% K -- Number of registers +%% Target -- The module containing the target-specific functions +%% +%% Returns: +%% true iff coalescing is OK +%%---------------------------------------------------------------------- + +ok(T, R, IG, K, Target) -> + ((hipe_ig:is_trivially_colourable(T, K, IG)) + orelse Target:is_precoloured(T) + orelse hipe_ig:nodes_are_adjacent(T, R, IG)). + +%%---------------------------------------------------------------------- +%% Function: all_ok +%% +%% Description: True iff, for every T in the list, OK(T,U) +%% +%% Parameters: +%% [T|Ts] -- Nodes to test +%% U -- Node to test for coalescing +%% IG -- Interference graph +%% K -- Number of registers +%% Target -- The module containing the target-specific functions +%% +%% Returns: +%% true iff coalescing is OK for all nodes in the list +%%---------------------------------------------------------------------- + +all_adjacent_ok([], _U, _Worklists, _IG, _K, _Target) -> true; +all_adjacent_ok([T|Ts], U, Worklists, IG, K, Target) -> + case hipe_reg_worklists:member_stack_or_coalesced(T, Worklists) of + true -> all_adjacent_ok(Ts, U, Worklists, IG, K, Target); + _ -> + %% 'andalso' does not preserve tail-recursion + case ok(T, U, IG, K, Target) of + true -> all_adjacent_ok(Ts, U, Worklists, IG, K, Target); + false -> false + end + end. + +%%---------------------------------------------------------------------- +%% Function: conservative +%% +%% Description: Checks if nodes can be safely coalesced according to +%% the Briggs' conservative coalescing heuristic +%% +%% Parameters: +%% Nodes -- Adjacent nodes +%% IG -- Interference graph +%% K -- Number of registers +%% +%% Returns: +%% true iff coalescing is safe +%%---------------------------------------------------------------------- + +conservative(AdjU, AdjV, U, Worklists, IG, K) -> + conservative_countU(AdjU, AdjV, U, Worklists, IG, K, 0). + +%%---------------------------------------------------------------------- +%% Function: conservative_count +%% +%% Description: Counts degrees for conservative (Briggs' heuristic) +%% +%% Parameters: +%% Nodes -- (Remaining) adjacent nodes +%% IG -- Interference graph +%% K -- Number of registers +%% Cnt -- Accumulator for counting +%% +%% Returns: +%% Final value of accumulator +%%---------------------------------------------------------------------- + +conservative_countU([], AdjV, U, Worklists, IG, K, Cnt) -> + conservative_countV(AdjV, U, Worklists, IG, K, Cnt); +conservative_countU([Node|AdjU], AdjV, U, Worklists, IG, K, Cnt) -> + case hipe_reg_worklists:member_stack_or_coalesced(Node, Worklists) of + true -> conservative_countU(AdjU, AdjV, U, Worklists, IG, K, Cnt); + _ -> + case hipe_ig:is_trivially_colourable(Node, K, IG) of + true -> conservative_countU(AdjU, AdjV, U, Worklists, IG, K, Cnt); + _ -> + Cnt1 = Cnt + 1, + if Cnt1 < K -> + conservative_countU(AdjU, AdjV, U, Worklists, IG, K, Cnt1); + true -> false + end + end + end. + +conservative_countV([], _U, _Worklists, _IG, _K, _Cnt) -> true; +conservative_countV([Node|AdjV], U, Worklists, IG, K, Cnt) -> + case hipe_reg_worklists:member_stack_or_coalesced(Node, Worklists) of + true -> conservative_countV(AdjV, U, Worklists, IG, K, Cnt); + _ -> + case hipe_ig:nodes_are_adjacent(Node, U, IG) of + true -> conservative_countV(AdjV, U, Worklists, IG, K, Cnt); + _ -> + case hipe_ig:is_trivially_colourable(Node, K, IG) of + true -> conservative_countV(AdjV, U, Worklists, IG, K, Cnt); + _ -> + Cnt1 = Cnt + 1, + if Cnt1 < K -> + conservative_countV(AdjV, U, Worklists, IG, K, Cnt1); + true -> false + end + end + end + end. + +%%--------------------------------------------------------------------- +%% Function: selectSpill +%% +%% Description: Select the node to spill and spill it +%% Parameters: +%% WorkLists -- A datatype containing the different worklists +%% Moves -- A datatype containing the move sets +%% IG -- The interference graph +%% K -- The number of available registers +%% Alias -- The alias mapping +%% SpillLimit -- Try not to spill any nodes above the spill limit +%% +%% Returns: +%% WorkLists -- The updated worklists +%% Moves -- The updated moves +%%--------------------------------------------------------------------- + +selectSpill(WorkLists, Moves, IG, K, Alias, SpillLimit) -> + [CAR|CDR] = hipe_reg_worklists:spill(WorkLists), + + SpillCost = getCost(CAR, IG, SpillLimit), + M = findCheapest(CDR, IG, SpillCost, CAR, SpillLimit), + + WorkLists1 = hipe_reg_worklists:remove_spill(M, WorkLists), + %% The published algorithm adds M to the simplify worklist + %% before the freezeMoves() call. That breaks the worklist + %% invariants, which is why the order is switched here. + {WorkLists2,Moves1} = freezeMoves(M, K, WorkLists1, Moves, IG, Alias), + WorkLists3 = hipe_reg_worklists:add_simplify(M, WorkLists2), + {WorkLists3,Moves1}. + +%% Find the node that is cheapest to spill + +findCheapest([], _IG, _Cost, Cheapest, _SpillLimit) -> + Cheapest; +findCheapest([Node|Nodes], IG, Cost, Cheapest, SpillLimit) -> + ThisCost = getCost(Node, IG, SpillLimit), + case ThisCost < Cost of + true -> + findCheapest(Nodes, IG, ThisCost, Node, SpillLimit); + false -> + findCheapest(Nodes, IG, Cost, Cheapest, SpillLimit) + end. + +%% Get the cost for spilling a certain node, node numbers above the spill +%% limit are extremely expensive. + +getCost(Node, IG, SpillLimit) -> + case Node > SpillLimit of + true -> inf; + false -> hipe_ig:node_spill_cost(Node, IG) + end. + +%%---------------------------------------------------------------------- +%% Function: freeze +%% +%% Description: When both simplifying and coalescing is impossible we +%% rather freezes a node in stead of spilling, this function +%% selects a node for freezing (it just picks the first one in +%% the list) +%% +%% Parameters: +%% K -- The number of available registers +%% WorkLists -- A datatype containing the different worklists +%% Moves -- A datatype containing the different movelists +%% IG -- Interference graph +%% Alias -- An alias mapping, shows the alias of all coalesced +%% nodes +%% +%% Returns: +%% WorkLists -- The updated worklists +%% Moves -- The updated movelists +%%---------------------------------------------------------------------- + +freeze(K, WorkLists, Moves, IG, Alias) -> + [U|_] = hipe_reg_worklists:freeze(WorkLists), % Smarter routine? + ?debug_msg("freezing node ~p~n", [U]), + WorkLists0 = hipe_reg_worklists:remove_freeze(U, WorkLists), + %% The published algorithm adds U to the simplify worklist + %% before the freezeMoves() call. That breaks the worklist + %% invariants, which is why the order is switched here. + {WorkLists1,Moves1} = freezeMoves(U,K,WorkLists0,Moves,IG,Alias), + WorkLists2 = hipe_reg_worklists:add_simplify(U, WorkLists1), + {WorkLists2,Moves1}. + +%%---------------------------------------------------------------------- +%% Function: freezeMoves +%% +%% Description: Make all move related interferences for a certain node +%% into ordinary interference arcs. +%% +%% Parameters: +%% U -- The node we want to freeze +%% K -- The number of available registers +%% WorkLists -- A datatype containing the different worklists +%% Moves -- A datatype containing the different movelists +%% IG -- Interference graph +%% Alias -- An alias mapping, shows the alias of all coalesced +%% nodes +%% +%% Returns: +%% WorkLists -- The updated worklists +%% Moves -- The updated movelists +%%---------------------------------------------------------------------- + +freezeMoves(U, K, WorkLists, Moves, IG, Alias) -> + Nodes = hipe_moves:node_moves(U, Moves), + freezeEm(U, Nodes, K, WorkLists, Moves, IG, Alias). + +%% Find what the other value in a copy instruction is, return false if +%% the instruction isn't a move with the first argument in it. + +moves(U, Move, Alias, Moves) -> + {X,Y} = hipe_moves:get_move(Move, Moves), + %% The old code (which followed the published algorithm) did + %% not follow aliases before looking for "the other" node. + %% This caused moves() to skip some moves, making some nodes + %% still move-related after freezeMoves(). These move-related + %% nodes were then added to the simplify worklist (by freeze() + %% or selectSpill()), breaking the worklist invariants. Nodes + %% already simplified appeared in coalesce(), were re-added to + %% the simplify worklist by add_worklist(), simplified again, + %% and coloured multiple times by assignColors(). Ouch! + X1 = getAlias(X, Alias), + Y1 = getAlias(Y, Alias), + if U =:= X1 -> Y1; + U =:= Y1 -> X1; + true -> exit({?MODULE,moves}) % XXX: shouldn't happen + end. + +freezeEm(_U, [], _K, WorkLists, Moves, _IG, _Alias) -> + {WorkLists,Moves}; +freezeEm(U,[M|Ms], K, WorkLists, Moves, IG, Alias) -> + V = moves(U, M, Alias, Moves), + {WorkLists2,Moves2} = freezeEm2(U, V, M, K, WorkLists, Moves, IG, Alias), + freezeEm(U, Ms, K, WorkLists2, Moves2, IG, Alias). + +freezeEm2(U, V, M, K, WorkLists, Moves, IG, Alias) -> + case hipe_moves:member_active(M, Moves) of + true -> + Moves1 = hipe_moves:remove_active(M, Moves), + freezeEm3(U, V, M, K, WorkLists, Moves1, IG, Alias); + false -> + Moves1 = hipe_moves:remove_worklist(M, Moves), + freezeEm3(U, V, M, K, WorkLists, Moves1, IG, Alias) + end. + +freezeEm3(_U, V, _M, K, WorkLists, Moves, IG, _Alias) -> + Moves1 = Moves, % drop frozen move M + V1 = V, % getAlias(V,Alias), + %% "not MoveRelated(v)" is cheaper than "NodeMoves(v) = {}" + case ((not hipe_moves:move_related(V1, Moves1)) andalso + hipe_ig:is_trivially_colourable(V1, K, IG)) of + true -> + ?debug_msg("freezing move to ~p~n", [V]), + Worklists1 = hipe_reg_worklists:transfer_freeze_simplify(V1, WorkLists), + {Worklists1, Moves1}; + false -> + {WorkLists, Moves1} + end. diff --git a/lib/hipe/regalloc/hipe_graph_coloring_regalloc.erl b/lib/hipe/regalloc/hipe_graph_coloring_regalloc.erl new file mode 100644 index 0000000000..ac555b933c --- /dev/null +++ b/lib/hipe/regalloc/hipe_graph_coloring_regalloc.erl @@ -0,0 +1,806 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2001-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%@doc +%% GRAPH COLORING REGISTER ALLOCATOR +%% +%% A simple graph coloring register allocator: +%% +%% - build interference graph + estimate spill costs +%% - simplify graph (push on stack + spill) +%% - select colors +%% +%% Emits a coloring: a list of {TempName,Location} +%% where Location is {reg,N} or {spill,M} +%% and {reg,N} denotes some register N +%% and {spill,M} denotes the Mth spilled node +%% You have to figure out how to rewrite the code yourself. +%% +%% This version uses vectors rather than hash tables, and uses +%% faster algorithms since all vars are known at the start. +%% The result should be considerably quicker than earlier versions. +%% +%% Deficiencies: +%% - no renaming (to reduce unnecessary register pressure) +%% - spill costs are naive (should use better; e.g., exec.estimates) +%% - no biased coloring (which coalesces moves) +%% - no live range splitting (possibly not critical) +%% +%% *** NOTE *** +%% Uses apply for target specific functions, takes the module name as +%% argument. This target specific module should implement all target +%% specific functions, see the end of the file. +%% + +-module(hipe_graph_coloring_regalloc). +-export([regalloc/5]). + +%%-ifndef(DO_ASSERT). +%%-define(DO_ASSERT, true). +%%-endif. + +%%-ifndef(DEBUG). +%%-define(DEBUG,0). +%%-endif. +-include("../main/hipe.hrl"). + +%% Define these as 'ok' or 'report(X,Y)' depending on how much output you want. +-define(report0(X,Y), ?IF_DEBUG_LEVEL(0,?msg(X, Y),ok)). +-define(report(X,Y), ?IF_DEBUG_LEVEL(1,?msg(X, Y),ok)). +-define(report2(X,Y), ?IF_DEBUG_LEVEL(2,?msg(X, Y),ok)). +-define(report3(X,Y), ?IF_DEBUG_LEVEL(3,?msg(X, Y),ok)). + +%% Given CFG and number of colors K, produce a coloring list +%% of items {reg,N} (0 =< N =< K) and {spill,M}, where M is +%% an index denoting 'a location'. +%% (You might use it as a stack index, perhaps.) +%% +%% You can in principle delete check_coloring/2; it merely checks +%% that the coloring agrees with the interference graph (that is, that +%% no neighbors have the same register or spill location). + +%% @spec regalloc(#cfg{}, non_neg_fixnum(), non_neg_fixnum(), atom(), list()) -> {, non_neg_fixnum()} + +regalloc(CFG, SpillIndex, SpillLimit, Target, _Options) -> + PhysRegs = Target:allocatable(), + ?report2("building IG~n", []), + {IG, Spill} = build_ig(CFG, Target), + + %% check_ig(IG), + ?report3("graph: ~p~nphysical regs: ~p~n", [list_ig(IG), PhysRegs]), + + %% These nodes *can't* be allocated to registers. + NotAllocatable = [Target:reg_nr(X) || X <- Target:non_alloc(CFG)], + %% i.e. Arguments on x86 + ?report2("Nonalloc ~w~n", [NotAllocatable]), + + {Cols, NewSpillIndex} = + color(IG, Spill, + ordsets:from_list(PhysRegs), + SpillIndex, + SpillLimit, + Target:number_of_temporaries(CFG), + Target, NotAllocatable), + Coloring = [{X, {reg, X}} || X <- NotAllocatable] ++ Cols, + ?ASSERT(check_coloring(Coloring, IG, Target)), + + {Coloring, NewSpillIndex}. + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% *** BUILD THE INTERFERENCE GRAPH *** +%% +%% Returns {Interference_graph, Spill_cost_dictionary} +%% + +build_ig(CFG, Target) -> + try build_ig0(CFG, Target) + catch error:Rsn -> exit({?MODULE, build_ig, Rsn}) + end. + +build_ig0(CFG, Target) -> + Live = Target:analyze(CFG), + NumN = Target:number_of_temporaries(CFG), % poss. N-1? + {IG, Spill} = build_ig_bbs(Target:labels(CFG), + CFG, + Live, + empty_ig(NumN), + empty_spill(NumN), + Target), + {normalize_ig(IG), Spill}. + +build_ig_bbs([], _CFG, _Live, IG, Spill, _Target) -> + {IG, Spill}; +build_ig_bbs([L|Ls], CFG, Live, IG, Spill, Target) -> + Xs = bb(CFG, L, Target), + {_, NewIG, NewSpill} = + build_ig_bb(Xs, liveout(Live, L, Target), IG, Spill, Target), + build_ig_bbs(Ls, CFG, Live, NewIG, NewSpill, Target). + +build_ig_bb([], LiveOut, IG, Spill, _Target) -> + {LiveOut, IG, Spill}; +build_ig_bb([X|Xs], LiveOut, IG, Spill, Target) -> + {Live,NewIG,NewSpill} = build_ig_bb(Xs, LiveOut, IG, Spill, Target), + build_ig_instr(X, Live, NewIG, NewSpill, Target). + +%% Note: We could add move-related arcs here as well. +%% +%% Note: Ideally, we would like to add all registers to the IG +%% at once rather than doing 'add_nodes' for each instruction. +%% (This is costly, since nodes that already are present are checked!) + +build_ig_instr(X, Live, IG, Spill, Target) -> + {Def, Use} = def_use(X, Target), + ?report3("Live ~w\n~w : Def: ~w Use ~w\n", [Live, X, Def,Use]), + DefList = ordsets:to_list(Def), + NewSpill = inc_spill_costs(DefList, + inc_spill_costs(ordsets:to_list(Use), Spill)), + NewIG = interference_arcs(DefList, ordsets:to_list(Live), IG), + NewLive = ordsets:union(Use, ordsets:subtract(Live, Def)), + {NewLive, NewIG, NewSpill}. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +interference_arcs([], _Live, IG) -> + IG; +interference_arcs([X|Xs], Live, IG) -> + interference_arcs(Xs, Live, i_arcs(X, Live, IG)). + +i_arcs(_X, [], IG) -> + IG; +i_arcs(X, [Y|Ys], IG) -> + i_arcs(X, Ys, add_edge(X,Y, IG)). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +inc_spill_costs([], Spill) -> Spill; +inc_spill_costs([X|Xs], Spill) -> + inc_spill_costs(Xs, inc_spill_cost(X, Spill)). + +inc_spill_cost(X, Spill) -> + set_spill_cost(X, get_spill_cost(X, Spill)+1, Spill). + +get_spill_cost(X, Spill) -> + spill_cost_lookup(X, Spill). + +set_spill_cost(X, N, Spill) -> + spill_cost_update(X, N, Spill). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% *** COLORING *** +%% +%% Coloring is done straightforwardly: +%% - find the low-degree nodes, put them in low +%% - while low non-empty: +%% * remove x from low +%% * push x on stack +%% * decrement degree of neighbors of x +%% * for each neighbor y of low degree, put y on low +%% - when low empty: +%% - if graph empty, return stack +%% - otherwise +%% * select a node z to spill +%% * push z on stack +%% * decrement degree of neighbors of z +%% * add low-degree neighbors of z to low +%% * restart the while-loop above + +color(IG, Spill, PhysRegs, SpillIx, SpillLimit, NumNodes, Target, NotAllocatable) -> + try color_0(IG, Spill, PhysRegs, SpillIx, SpillLimit, + NumNodes, Target, NotAllocatable) + catch + error:Rsn -> + ?error_msg("Coloring failed with ~p~n", [Rsn]), + ?EXIT(Rsn) + end. + +color_0(IG, Spill, PhysRegs, SpillIx, SpillLimit, NumNodes, Target, + NotAllocatable) -> + ?report("simplification of IG~n", []), + K = ordsets:size(PhysRegs), + Nodes = list_ig(IG), + + Low = low_degree_nodes(Nodes, K, NotAllocatable), + + %% Any nodes above the spillimit must be colored first... + MustNotSpill = + if NumNodes > SpillLimit+1 -> + sort_on_degree(lists:seq(SpillLimit+1,NumNodes-1) -- Low,IG); + true -> [] + end, + + ?report(" starting with low degree nodes ~p~n",[Low]), + EmptyStk = [], + Precolored = Target:all_precoloured(), + {Stk, NewSpillIx} = + simplify(Low, NumNodes, Precolored, + IG, Spill, K, SpillIx, EmptyStk, + SpillLimit, Target, NotAllocatable, MustNotSpill), + ?report("selecting colors~n",[]), + {select(Stk, Precolored, IG, K, PhysRegs, NumNodes, Target), + NewSpillIx}. + +sort_on_degree(Nodes, IG) -> + [ Node3 || {_,Node3} <- + lists:sort([{degree(Info),Node2} || + {Info,Node2} <- [{hipe_vectors:get(IG, Node), + Node} || Node <- + Nodes]])]. + +%%%%%%%%%%%%%%%%%%%% +%% +%% Simplification: push all easily colored nodes on a stack; +%% when the list of easy nodes becomes empty, see if graph is +%% empty as well. If it is not, spill a node and continue. +%% If it is empty, return the stack. +%% +%% Notes: +%% - We keep the set of visited nodes around for spill purposes +%% (visited nodes are not considered for spilling) +%% +%% - At present, nodes can be pushed onto the stack even if they +%% already are on the stack. This can be fixed by another 'Vis' +%% dictionary that keeps track of what is on the stack. +%% Currently, we just skip already colored nodes. +%% +%% - Arguments: +%% Low: low-degree nodes (ready to color) +%% NumNodes: number of remaining nodes in graph +%% IG: interference graph +%% Spill: spill costs of nodes +%% K: number of colors +%% Ix: next spill index +%% Stk: stack of already simplified nodes +%% +%% Physical registers are marked as 'visited' prior to simplify. +%% This has the following effect: +%% - they are not considered for spilling +%% - they are not pushed on the stack +%% - since we do NOT decrement degrees of surrounding vars, the +%% non-physreg variables must still take them into account. + +simplify(Low, NumNodes, PreC, IG, Spill, K, Ix, Stk, SpillLimit, + Target, NotAllocatable, MustNotSpill) -> + Vis = visit_all(PreC, none_visited(NumNodes)), + Vis1 = visit_all(NotAllocatable, Vis), + ActualNumNodes = (NumNodes-length(PreC))-length(NotAllocatable), + %% Make sure that the registers that must not be spilled + %% get a degree less than K by spilling other regs. + {Stk2, Ix2, Vis2, Low2} = + handle_non_spill(MustNotSpill, IG, Spill, K, Ix, Stk, Vis1, Low, + SpillLimit, Target), + simplify_ig(Low2, ActualNumNodes-length(Stk2), IG, Spill, K, Ix2, Stk2, Vis2, + SpillLimit, Target). + +handle_non_spill([], _IG, _Spill, _K, Ix, Stk, Vis, Low, _SpillLimit, _Target) -> + {Stk, Ix, Vis, Low}; +handle_non_spill([X|Xs] = L, IG, Spill, K, Ix, Stk, Vis, Low, SpillLimit, Target) -> + Info = hipe_vectors:get(IG, X), + Degree = degree(Info), + ?report("Can't Spill ~w with degree ~w\n", [X,Degree]), + if Degree > K -> + ?report(" *** spill required (N<~w)***~n", [SpillLimit]), + {Y, NewLow, NewIG} = spill(IG, Vis, Spill, K, SpillLimit, Target), + NewVis = visit(Y,Vis), + {NewStk, NewIx} = push_spill_node(Y, Ix, Stk), + ?report(" node ~w spilled~n", [Y]), + handle_non_spill(L, NewIG, Spill, K, NewIx, NewStk, NewVis, + Low ++ NewLow, SpillLimit, Target); + true -> + {NewLow, NewIG} = decrement_neighbors(X, Low, IG, Vis, K), + ?report(" node ~w pushed\n(~w now ready)~n", [X,NewLow]), + NewStk = push_colored(X, Stk), + handle_non_spill(Xs, NewIG, Spill, K, Ix, NewStk, visit(X,Vis), + NewLow, SpillLimit, Target) + end. + +simplify_ig([], 0, _IG, _Spill, _K, Ix, Stk, _Vis, _SpillLimit, _Target) -> + {Stk, Ix}; +simplify_ig([], N, IG, Spill, K, Ix, Stk, Vis, SpillLimit, Target) + when N > 0 -> + ?report3("N: ~w Stk: ~w N+Stk ~w\n", [N,length(Stk),N+length(Stk)]), + ?report(" *** spill required (N<~w)***~n", [SpillLimit]), + {X, Low, NewIG} = spill(IG, Vis, Spill, K, SpillLimit, Target), + NewVis = visit(X,Vis), + {NewStk, NewIx} = push_spill_node(X, Ix, Stk), + ?report(" node ~w spilled\n(~w now ready)~n", [X, Low]), + simplify_ig(Low, N-1, NewIG, Spill, K, NewIx, NewStk, NewVis, + SpillLimit, Target); +simplify_ig([X|Xs], N, IG, Spill, K, Ix, Stk, Vis, SpillLimit, Target) -> + ?report3("N: ~w Stk: ~w N+Stk ~w\n", [N,length(Stk),N+length(Stk)]), + case is_visited(X,Vis) of + true -> + ?report(" node ~p already visited~n",[X]), + simplify_ig(Xs, N, IG, Spill, K, Ix, Stk, Vis, SpillLimit, Target); + false -> + ?report("Stack ~w\n", [Stk]), + {NewLow, NewIG} = decrement_neighbors(X, Xs, IG, Vis, K), + ?report(" node ~w pushed\n(~w now ready)~n", [X,NewLow]), + NewStk = push_colored(X, Stk), + simplify_ig(NewLow, N-1, NewIG, Spill, K, Ix, NewStk, visit(X,Vis), + SpillLimit, Target) + end. + +%% Returns { NowLowDegreeNeighbors, NewIG } + +decrement_neighbors(X, Xs, IG, Vis, K) -> + Ns = unvisited_neighbors(X, Vis, IG), + ?report(" node ~p has neighbors ~w\n(unvisited ~p)~n", + [X, neighbors(X, IG), Ns]), + decrement_each(Ns, Xs, IG, Vis, K). + +%% For each node, decrement its degree and check if it is now +%% a low-degree node. In that case, add it to the 'low list'. + +decrement_each([], Low, IG, _Vis, _K) -> + {Low, IG}; +decrement_each([N|Ns], OldLow, IG, Vis, K) -> + {Low, CurrIG} = Res = decrement_each(Ns, OldLow, IG, Vis, K), + case is_visited(N, Vis) of + true -> + Res; + false -> + {D, NewIG} = decrement_degree(N, CurrIG), + if + D =:= K-1 -> + {[N|Low], NewIG}; + true -> + {Low, NewIG} + end + end. + +%%%%%%%%%%%%%%%%%%%% +%% +%% The spill cost of a node is: +%% est_spill_cost / current_degree +%% +%% For all unvisited nodes, compute spill cost and select the minimum. +%% This node is chosen to be spilled. Then decrement the degree of its +%% neighbors, and return those of low degree. +%% +%% Notes: +%% - A better method for computing spill costs is to just keep the +%% minimum cost node. But for debugging purposes, we compute a list +%% of {node,spillcost} pairs and select the minimum. +%% +%% Returns: +%% {Spilled_node, Low_degree_neighbors, New_interference_graph} + +spill(IG, Vis, Spill, K, SpillLimit, Target) -> + Ns = list_ig(IG), + Costs = spill_costs(Ns, IG, Vis, Spill, SpillLimit, Target), + ?report3("spill costs are ~p~n",[Costs]), + ActualCosts = lists:sort(Costs), + ?report3("actual costs are ~p~n",[ActualCosts]), + case ActualCosts of + [] -> + ?error_msg("There is no node to spill",[]), + ?EXIT('no node to spill'); + [{_Cost,N}|_] -> + {Low, NewIG} = decrement_neighbors(N, [], IG, Vis, K), + %?report("spilled node ~p at cost ~p (~p now ready)~n",[N,Cost,Low]), + {N, Low, NewIG} + end. + +spill_costs([], _IG, _Vis, _Spill, _SpillLimit, _Target) -> + []; +spill_costs([{N,Info}|Ns], IG, Vis, Spill, SpillLimit, Target) -> + case degree(Info) of + 0 -> spill_costs(Ns,IG,Vis,Spill, SpillLimit, Target); + Deg -> + case is_visited(N,Vis) of + true -> + spill_costs(Ns,IG,Vis,Spill, SpillLimit, Target); + _ -> + case Target:is_fixed(N) of + true -> + spill_costs(Ns, IG, Vis, Spill, SpillLimit, Target); + false -> + if N > SpillLimit -> + spill_costs(Ns, IG, Vis, Spill, SpillLimit, Target); + true -> + [{spill_cost_of(N,Spill)/Deg,N} | + spill_costs(Ns,IG, Vis, Spill, SpillLimit, Target)] + end + end + end + end. + +%%%%%%%%%%%%%%%%%%%% +%% +%% Returns a list of {Name,Location}, where Location is +%% either {spill,M} or {reg,R} +%% +%% Note: we use pessimistic coloring here. +%% - we could use optimistic coloring: for spilled node, check if there is +%% an unused color among the neighbors and choose that. + +select(Stk, PreC, IG, K, PhysRegs, NumNodes, Target) -> + %% NumNodes = length(Stk)+length(PreC), + {PhysColors, Cols} = precolor(PreC, none_colored(NumNodes), Target), + ?report("precoloring has yielded ~p~n",[list_coloring(Cols)]), + PhysColors ++ select_colors(Stk, IG, Cols, PhysRegs, K). + +select_colors([], _IG, _Cols, _PhysRegs, _K) -> + ?report("all nodes colored~n",[]), + []; +select_colors([{X,colorable}|Xs], IG, Cols, PhysRegs, K) -> + ?report("color of ~p\n",[X]), + {Reg,NewCols} = select_color(X, IG, Cols, PhysRegs), + ?report("~p~n",[Reg]), + [{X,{reg,Reg}} | select_colors(Xs, IG, NewCols, PhysRegs, K)]; +%select_colors([{X,{spill,M}}|Xs], IG, Cols, PhysRegs, K) -> +% ?report('spilled: ~p~n',[X]), +% %% Check if optimistic coloring could have found a color +% case catch select_color(X,IG,Cols,K) of +% {'EXIT',_} -> % no color possible +% ?report('(no optimistic color)~n',[]), +% [{X,{spill,M}}|select_colors(Xs, IG, Cols, PhysRegs, K)]; +% {Reg,NewCols} -> +% ?report('(optimistic color: ~p)~n',[Reg]), +% [{X,{reg,Reg}}|select_colors(Xs, IG, Cols, PhysRegs, K)] +% end. + +%% Old code / pessimistic coloring: +select_colors([{X,{spill,M}}|Xs], IG, Cols, PhysRegs, K) -> + ?report("spilled: ~p~n",[X]), + %% Check if optimistic coloring could have found a color +% case catch select_color(X,IG,Cols,K) of +% {'EXIT',_} -> % no color possible +% ?report('(no optimistic color)~n',[]); +% {Reg,NewCols} -> +% ?report('(optimistic color: ~p)~n',[Reg]) +% end, + [{X,{spill,M}} | select_colors(Xs, IG, Cols, PhysRegs, K)]. + +select_color(X, IG, Cols, PhysRegs) -> + UsedColors = get_colors(neighbors(X, IG), Cols), + Reg = select_unused_color(UsedColors, PhysRegs), + {Reg, set_color(X, Reg, Cols)}. + +%%%%%%%%%%%%%%%%%%%% + +get_colors([], _Cols) -> []; +get_colors([X|Xs], Cols) -> + case color_of(X, Cols) of + uncolored -> + get_colors(Xs, Cols); + {color,R} -> + [R|get_colors(Xs, Cols)] + end. + +select_unused_color(UsedColors, PhysRegs) -> + Summary = ordsets:from_list(UsedColors), + AvailRegs = ordsets:to_list(ordsets:subtract(PhysRegs, Summary)), + hd(AvailRegs). + %% select_avail_reg(AvailRegs). + +%% We choose the register to use randomly from the set of available +%% registers. +%% +%% Note: Another way of doing it is LRU-order: +%% - Have an LRU-queue of register names; when coloring, try the colors in that +%% order (some may be occupied). +%% - When a color has been selected, put it at the end of the LRU. + +%% select_avail_reg(Regs) -> +%% case get(seeded) of +%% undefined -> +%% random:seed(), +%% put(seeded,true); +%% true -> +%% ok +%% end, +%% NReg = length(Regs), +%% RegNo = random:uniform(NReg), +%% lists:nth(RegNo, Regs). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +push_spill_node(X, M, Stk) -> + {[{X,{spill,M}}|Stk], M+1}. + +push_colored(X, Stk) -> + [{X, colorable} | Stk]. + +%%%%%%%%%%%%%%%%%%%% + +low_degree_nodes([], _K, _NotAllocatable) -> []; +low_degree_nodes([{N,Info}|Xs], K, NotAllocatable) -> + case lists:member(N, NotAllocatable) of + true -> + low_degree_nodes(Xs,K, NotAllocatable); + false -> + ?report0("node ~p has degree ~p: ~w~n",[N,degree(Info),neighbors(Info)]), + Deg = degree(Info), + if + Deg < K -> + [N|low_degree_nodes(Xs, K, NotAllocatable)]; + true -> + low_degree_nodes(Xs, K, NotAllocatable) + end + end. + +%%%%%%%%%%%%%%%%%%%% + +unvisited_neighbors(X, Vis, IG) -> + ordsets:from_list(unvisited(neighbors(X,IG), Vis)). + +unvisited([], _Vis) -> []; +unvisited([X|Xs], Vis) -> + case is_visited(X, Vis) of + true -> + unvisited(Xs, Vis); + false -> + [X|unvisited(Xs, Vis)] + end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% *** ABSTRACT DATATYPES *** + + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% The ig datatype: +%% +%% Note: if we know the number of temps used, we can use a VECTOR +%% instead, which will speed up things. +%% +%% Note: later on, we may wish to add 'move-related' support. + +-record(ig_info, {neighbors=[], degree=0 :: integer()}). + +empty_ig(NumNodes) -> + hipe_vectors:new(NumNodes, #ig_info{neighbors=[], degree=0}). + +degree(Info) -> + Info#ig_info.degree. + +neighbors(Info) -> + Info#ig_info.neighbors. + +add_edge(X, X, IG) -> IG; +add_edge(X, Y, IG) -> + add_arc(X, Y, add_arc(Y, X, IG)). + +add_arc(X, Y, IG) -> + Info = hipe_vectors:get(IG, X), + Old = neighbors(Info), + New = Info#ig_info{neighbors=[Y|Old]}, + hipe_vectors:set(IG, X, New). + +normalize_ig(IG) -> + Size = hipe_vectors:size(IG), + normalize_ig(Size-1, IG). + +normalize_ig(-1, IG) -> + IG; +normalize_ig(I, IG) -> + Info = hipe_vectors:get(IG, I), + N = ordsets:from_list(neighbors(Info)), + NewIG = hipe_vectors:set(IG, I, Info#ig_info{neighbors=N, degree=length(N)}), + normalize_ig(I-1, NewIG). + +%%degree(X, IG) -> +%% Info = hipe_vectors:get(IG, X), +%% Info#ig_info.degree. + +neighbors(X, IG) -> + Info = hipe_vectors:get(IG, X), + Info#ig_info.neighbors. + +decrement_degree(X, IG) -> + Info = hipe_vectors:get(IG, X), + Degree = degree(Info), + NewDegree = Degree-1, + NewInfo = Info#ig_info{degree=NewDegree}, + {NewDegree, hipe_vectors:set(IG,X,NewInfo)}. + +list_ig(IG) -> + hipe_vectors:list(IG). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% The spill cost datatype: +%% +%% Note: if we know the number of temps used, we can use a VECTOR +%% instead, which will speed up things. + +empty_spill(NumNodes) -> + hipe_vectors:new(NumNodes, 0). + +spill_cost_of(X, Spill) -> + hipe_vectors:get(Spill, X). + +spill_cost_lookup(X, Spill) -> + spill_cost_of(X, Spill). + +spill_cost_update(X, N, Spill) -> + hipe_vectors:set(Spill, X, N). + +%%list_spill_costs(Spill) -> +%% hipe_vectors:list(Spill). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% The coloring datatype: + +none_colored(NumNodes) -> + hipe_vectors:new(NumNodes,uncolored). + +color_of(X,Cols) -> + hipe_vectors:get(Cols,X). + +set_color(X,R,Cols) -> + hipe_vectors:set(Cols,X,{color,R}). + +-ifdef(DEBUG). +list_coloring(Cols) -> + hipe_vectors:list(Cols). +-endif. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% Note: there might be a slight gain in separating the two versions +%% of visit/2 and visited/2. (So that {var,X} selects X and calls the +%% integer version. + +none_visited(NumNodes) -> + hipe_vectors:new(NumNodes, false). + +visit(X,Vis) -> + hipe_vectors:set(Vis, X, true). + +is_visited(X,Vis) -> + hipe_vectors:get(Vis, X). + +visit_all([], Vis) -> Vis; +visit_all([X|Xs], Vis) -> + visit_all(Xs, visit(X, Vis)). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Check that all arcs in IG are bidirectional + degree is correct + +%% check_ig(IG) -> +%% check_ig(list_ig(IG),IG). + +%% check_ig([],IG) -> +%% ok; +%% check_ig([{N,Info}|Xs],IG) -> +%% Ns = neighbors(Info), +%% NumNs = length(Ns), +%% D = degree(Info), +%% if +%% D =:= NumNs -> +%% ok; +%% true -> +%% ?WARNING_MSG('node ~p has degree ~p but ~p neighbors~n',[N,D,NumNs]) +%% end, +%% check_neighbors(N,Ns,IG), +%% check_ig(Xs,IG). + +%% check_neighbors(N,[],IG) -> +%% ok; +%% check_neighbors(N,[M|Ms],IG) -> +%% Ns = neighbors(M,IG), +%% case member(N,Ns) of +%% true -> +%% ok; +%% true -> +%% ?WARNING_MSG('node ~p should have ~p as neighbor (has ~p)~n',[M,N,Ns]) +%% end, +%% check_neighbors(N,Ms,IG). + +-ifdef(DO_ASSERT). +%%%%%%%%%%%%%%%%%%%% +%% Check that the coloring is correct (if the IG is correct): +%% + +check_coloring(Coloring, IG, Target) -> + ?report0("checking coloring ~p~n",[Coloring]), + check_cols(list_ig(IG),init_coloring(Coloring, Target)). + +init_coloring(Xs, Target) -> + hipe_temp_map:cols2tuple(Xs, Target). + +check_color_of(X, Cols) -> +%% if +%% is_precoloured(X) -> +%% phys_reg_color(X,Cols); +%% true -> + case hipe_temp_map:find(X, Cols) of + unknown -> + ?WARNING_MSG("node ~p: color not found~n", [X]), + uncolored; + C -> + C + end. + +check_cols([], Cols) -> + ?report("coloring valid~n",[]), + true; +check_cols([{X,Info}|Xs], Cols) -> + Cs = [{N, check_color_of(N, Cols)} || N <- neighbors(Info)], + C = check_color_of(X, Cols), + case valid_coloring(X, C, Cs) of + yes -> + check_cols(Xs, Cols); + {no,Invalids} -> + ?WARNING_MSG("node ~p has same color (~p) as ~p~n", [X,C,Invalids]), + check_cols(Xs, Cols) + end. + +valid_coloring(X, C, []) -> + yes; +valid_coloring(X, C, [{Y,C}|Ys]) -> + case valid_coloring(X, C, Ys) of + yes -> {no, [Y]}; + {no,Zs} -> {no, [Y|Zs]} + end; +valid_coloring(X, C, [_|Ys]) -> + valid_coloring(X, C, Ys). +-endif. + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% *** INTERFACES TO OTHER MODULES *** +%% + +liveout(CFG, L, Target) -> + ordsets:from_list(reg_names(Target:liveout(CFG, L), Target)). + +bb(CFG, L, Target) -> + hipe_bb:code(Target:bb(CFG, L)). + +def_use(X, Target) -> + {ordsets:from_list(reg_names(Target:defines(X), Target)), + ordsets:from_list(reg_names(Target:uses(X), Target))}. + +reg_names(Regs, Target) -> + [Target:reg_nr(X) || X <- Regs]. + +%% +%% Precoloring: use this version when a proper implementation of +%% physical_name(X) is available! +%% + +precolor(Xs, Cols, Target) -> + ?report("precoloring ~p~n", [Xs]), + {_Cs, _NewCol} = Res = precolor0(Xs, Cols, Target), + ?report(" yielded ~p~n", [_Cs]), + Res. + +precolor0([], Cols, _Target) -> + {[], Cols}; +precolor0([R|Rs], Cols, Target) -> + {Cs, Cols1} = precolor0(Rs, Cols, Target), + {[{R, {reg, physical_name(R, Target)}}|Cs], + set_color(R, physical_name(R, Target), Cols1)}. + +physical_name(X, Target) -> + Target:physical_name(X). diff --git a/lib/hipe/regalloc/hipe_ig.erl b/lib/hipe/regalloc/hipe_ig.erl new file mode 100644 index 0000000000..4991e73e53 --- /dev/null +++ b/lib/hipe/regalloc/hipe_ig.erl @@ -0,0 +1,776 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2001-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%%---------------------------------------------------------------------- +%% File : hipe_ig.erl +%% Author : Andreas Wallin +%% Purpose : Creates an interference graph that tells which temporaries +%% interfere with each other. +%% Created : 5 Feb 2000 +%%---------------------------------------------------------------------- + +-module(hipe_ig). + +-export([build/2, + nodes_are_adjacent/3, + node_spill_cost/2, + node_adj_list/2, + get_moves/1, + %% degree/1, + %% number_of_temps/1, + spill_costs/1, + adj_list/1, + %% adj_set/1, + add_edge/4, + remove_edge/4, + %% set_adj_set/2, + %% set_adj_list/2, + %% set_ig_moves/2, + %% set_spill_costs/2, + %% set_degree/2 + get_node_degree/2, + dec_node_degree/2, + is_trivially_colourable/3 + ]). +-ifdef(DEBUG_PRINTOUTS). +-export([print_spill_costs/1, + print_adjacent/1, + print_degrees/1 + ]). +-endif. + +%%-ifndef(DEBUG). +%%-define(DEBUG,true). +%%-endif. + +-include("../main/hipe.hrl"). +-include("../flow/cfg.hrl"). +-include("hipe_spillcost.hrl"). + +%%---------------------------------------------------------------------- + +-record(igraph, {adj_set, adj_list, ig_moves, degree, + spill_costs :: #spill_cost{}, + num_temps :: non_neg_integer()}). + +%%---------------------------------------------------------------------- +%% Degree: array mapping nodes to integer degrees. +%% Precoloured nodes have 'infinite' degrees: they are initialised with +%% degrees K + number_of_temporaries. +%% Operations include incrementing, decrementing, and querying a node's +%% degree, and testing for trivial colourability (degree < K). +%%---------------------------------------------------------------------- + +degree_new(No_temporaries, Target) -> + Degree = hipe_bifs:array(No_temporaries, 0), + K = length(Target:allocatable()), + Inf = K + No_temporaries, + precoloured_to_inf_degree(Target:all_precoloured(), Inf, Degree). + +precoloured_to_inf_degree([], _Inf, Degree) -> Degree; +precoloured_to_inf_degree([P|Ps], Inf, Degree) -> + hipe_bifs:array_update(Degree, P, Inf), + precoloured_to_inf_degree(Ps, Inf, Degree). + +degree_inc(Node, Degree) -> + hipe_bifs:array_update(Degree, Node, hipe_bifs:array_sub(Degree, Node) + 1). + +degree_dec(Node, Degree) -> + hipe_bifs:array_update(Degree, Node, hipe_bifs:array_sub(Degree, Node) - 1). + +degree_get(Node, Degree) -> + hipe_bifs:array_sub(Degree, Node). + +degree_is_trivially_colourable(Node, K, Degree) -> + hipe_bifs:array_sub(Degree, Node) < K. + +%%---------------------------------------------------------------------- +%% AdjSet: +%% Implements sets of adjacent nodes. +%% Symmetry implies that when (U,V) is a member, then so is (V,U). +%% Hence, only (U,V), where U + ArrayWords = (ArrayBits + (?BITS_PER_WORD - 1)) bsr ?LOG2_BITS_PER_WORD, + Byte = + case Val of + true -> 16#FF; + false -> 16#00 + end, + hipe_bifs:bytearray(ArrayWords, Byte). + +hipe_bifs_bitarray_update(Array, BitNr, Val) -> + WordNr = BitNr bsr ?LOG2_BITS_PER_WORD, + WordMask = 1 bsl (BitNr band (?BITS_PER_WORD - 1)), + Word = hipe_bifs:bytearray_sub(Array, WordNr), + NewWord = + case Val of + true -> Word bor WordMask; + false -> Word band (bnot WordMask) + end, + hipe_bifs:bytearray_update(Array, WordNr, NewWord). + +hipe_bifs_bitarray_sub(Array, BitNr) -> + WordNr = BitNr bsr ?LOG2_BITS_PER_WORD, + WordMask = 1 bsl (BitNr band (?BITS_PER_WORD - 1)), + Word = hipe_bifs:bytearray_sub(Array, WordNr), + Word band WordMask =/= 0. + +-define(HIPE_BIFS_BITARRAY(ArrayBits, Val), hipe_bifs_bitarray(ArrayBits, Val)). +-define(HIPE_BIFS_BITARRAY_UPDATE(Array, BitNr, Val), hipe_bifs_bitarray_update(Array, BitNr, Val)). +-define(HIPE_BIFS_BITARRAY_SUB(Array, BitNr), hipe_bifs_bitarray_sub(Array, BitNr)). + +-endif. % EMULATE_BITARRAY_BIFS + +-record(adjset, {index, array}). +-record(adjset_chunked, {index, chunks}). + +-spec adjset_new(non_neg_integer()) -> #adjset{} | #adjset_chunked{}. + +adjset_new(NrTemps) -> + ArrayBits = (NrTemps * (NrTemps - 1)) div 2, + Index = adjset_mk_index(NrTemps, []), + try ?HIPE_BIFS_BITARRAY(ArrayBits, false) of + Array -> + #adjset{index=Index,array=Array} + catch + _:_ -> + #adjset_chunked{index=Index,chunks=adjset_mk_chunks(ArrayBits)} + end. + +-define(LOG2_CHUNK_BITS, 19). % 2^19 bits == 64KB +-define(CHUNK_BITS, (1 bsl ?LOG2_CHUNK_BITS)). + +adjset_mk_chunks(ArrayBits) -> + Tail = + case ArrayBits band (?CHUNK_BITS - 1) of + 0 -> []; + LastChunkBits -> [?HIPE_BIFS_BITARRAY(LastChunkBits, false)] + end, + N = ArrayBits bsr ?LOG2_CHUNK_BITS, + adjset_mk_chunks(N, Tail). + +adjset_mk_chunks(0, Tail) -> + list_to_tuple(Tail); +adjset_mk_chunks(N, Tail) -> + adjset_mk_chunks(N-1, [?HIPE_BIFS_BITARRAY(?CHUNK_BITS, false) | Tail]). + +adjset_mk_index(0, Tail) -> + list_to_tuple(Tail); +adjset_mk_index(N, Tail) -> + I = N - 1, + adjset_mk_index(I, [(I * (I-1)) div 2 | Tail]). + +adjset_add_edge(U0, V0, #adjset{index=Index,array=Array}) -> % PRE: U0 =/= V0 + {U,V} = + if U0 < V0 -> {U0,V0}; + true -> {V0,U0} + end, + %% INV: U < V + BitNr = element(V+1, Index) + U, + ?HIPE_BIFS_BITARRAY_UPDATE(Array, BitNr, true); +adjset_add_edge(U0, V0, #adjset_chunked{index=Index,chunks=Chunks}) -> % PRE: U0 =/= V0 + {U,V} = + if U0 < V0 -> {U0,V0}; + true -> {V0,U0} + end, + %% INV: U < V + BitNr = element(V+1, Index) + U, + %% here things become different + ChunkNr = BitNr bsr ?LOG2_CHUNK_BITS, + ChunkBit = BitNr band (?CHUNK_BITS - 1), + Chunk = element(ChunkNr+1, Chunks), + ?HIPE_BIFS_BITARRAY_UPDATE(Chunk, ChunkBit, true). + +adjset_remove_edge(U0, V0, #adjset{index=Index,array=Array}) -> % PRE: U0 =/= V0 + {U,V} = + if U0 < V0 -> {U0,V0}; + true -> {V0,U0} + end, + %% INV: U < V + BitNr = element(V+1, Index) + U, + ?HIPE_BIFS_BITARRAY_UPDATE(Array, BitNr, false); +adjset_remove_edge(U0, V0, #adjset_chunked{index=Index,chunks=Chunks}) -> % PRE: U0 =/= V0 + {U,V} = + if U0 < V0 -> {U0,V0}; + true -> {V0,U0} + end, + %% INV: U < V + BitNr = element(V+1, Index) + U, + %% here things become different + ChunkNr = BitNr bsr ?LOG2_CHUNK_BITS, + ChunkBit = BitNr band (?CHUNK_BITS - 1), + Chunk = element(ChunkNr+1, Chunks), + ?HIPE_BIFS_BITARRAY_UPDATE(Chunk, ChunkBit, false). + +adjset_are_adjacent(U0, V0, #adjset{index=Index,array=Array}) -> + {U,V} = + if U0 < V0 -> {U0,V0}; + U0 =:= V0 -> exit({?MODULE,adjacent,U0,V0}); % XXX: probably impossible + true -> {V0,U0} + end, + %% INV: U < V + BitNr = element(V+1, Index) + U, + ?HIPE_BIFS_BITARRAY_SUB(Array, BitNr); +adjset_are_adjacent(U0, V0, #adjset_chunked{index=Index,chunks=Chunks}) -> + {U,V} = + if U0 < V0 -> {U0,V0}; + U0 =:= V0 -> exit({?MODULE,adjacent,U0,V0}); % XXX: probably impossible + true -> {V0,U0} + end, + %% INV: U < V + BitNr = element(V+1, Index) + U, + %% here things become different + ChunkNr = BitNr bsr ?LOG2_CHUNK_BITS, + ChunkBit = BitNr band (?CHUNK_BITS - 1), + Chunk = element(ChunkNr+1, Chunks), + ?HIPE_BIFS_BITARRAY_SUB(Chunk, ChunkBit). + +%%--------------------------------------------------------------------- +%% Print functions - only used for debugging + +-ifdef(DEBUG_PRINTOUTS). +print_adjacent(IG) -> + ?debug_msg("Adjacent nodes:\n", []), + adjset_print(number_of_temps(IG),IG). + +adjset_print(2, IG) -> + adjset_print(1, 0, IG); +adjset_print(Ntemps, IG) -> + adjset_print(Ntemps - 1, Ntemps - 2, IG), + adjset_print(Ntemps - 1, IG). + +adjset_print(U, 0, IG) -> + case nodes_are_adjacent(U, 0, IG) of + true -> ?debug_msg("edge ~w ~w\n", [U, 0]); + _ -> true + end; +adjset_print(U, V, IG) -> + case nodes_are_adjacent(U, V, IG) of + true -> ?debug_msg("edge ~w ~w\n", [U, V]); + _ -> true + end, + adjset_print(U, V - 1, IG). +-endif. + +%%---------------------------------------------------------------------- +%% Function: adj_set, adj_list, degree, spill_costs +%% +%% Description: Selector functions. Used to get one of the encapsulated +%% data-structure contained in the IG structure. +%% Parameters: +%% IG -- An interference graph +%% +%% Returns: +%% One of the encapsulated data-structures. +%%---------------------------------------------------------------------- +adj_set(IG) -> IG#igraph.adj_set. +adj_list(IG) -> IG#igraph.adj_list. +ig_moves(IG) -> IG#igraph.ig_moves. +degree(IG) -> IG#igraph.degree. + +-spec spill_costs(#igraph{}) -> #spill_cost{}. +spill_costs(IG) -> IG#igraph.spill_costs. + +-ifdef(DEBUG_PRINTOUTS). +number_of_temps(IG) -> IG#igraph.no_temps. +-endif. + +%%---------------------------------------------------------------------- +%% Function: set_adj_set, set_adj_list, set_degree, set_spill_costs +%% +%% Description: Modifier functions. Used to set one of the encapsulated +%% data-structure contained in the IG structure. +%% Parameters: +%% Data-structure -- Data-structure you want to set. An adj_set +%% data-structure for example. +%% IG -- An interference graph +%% +%% Returns: +%% An updated interference graph. +%%---------------------------------------------------------------------- + +%%set_adj_set(Adj_set, IG) -> IG#igraph{adj_set = Adj_set}. +set_adj_list(Adj_list, IG) -> IG#igraph{adj_list = Adj_list}. +set_ig_moves(IG_moves, IG) -> IG#igraph{ig_moves = IG_moves}. +%%set_degree(Degree, IG) -> IG#igraph{degree = Degree}. +set_spill_costs(Spill_costs, IG) -> IG#igraph{spill_costs = Spill_costs}. + +%%---------------------------------------------------------------------- +%% Function: initial_ig +%% +%% Description: The initial interference record that we start with when +%% building the interference graph. +%% Parameters: +%% NumTemps -- Number of temporaries in the CFG we work on. This is +%% because we have some data structures built out of vectors. +%% +%% Returns: +%% A new interference record +%%---------------------------------------------------------------------- + +-spec initial_ig(non_neg_integer(), atom()) -> #igraph{}. + +initial_ig(NumTemps, Target) -> + #igraph{adj_set = adjset_new(NumTemps), + adj_list = hipe_adj_list:new(NumTemps), + ig_moves = hipe_ig_moves:new(NumTemps), + degree = degree_new(NumTemps, Target), + spill_costs = hipe_spillcost:new(NumTemps), + num_temps = NumTemps + }. + +%%---------------------------------------------------------------------- +%% Function: build +%% +%% Description: Constructs an interference graph for the specifyed CFG. +%% +%% Parameters: +%% CFG -- A Control Flow Graph +%% Target -- The module that contains the target-specific functions +%% +%% Returns: +%% An interference graph for the given CFG. +%%---------------------------------------------------------------------- + +-spec build(#cfg{}, atom()) -> #igraph{}. + +build(CFG, Target) -> + BBs_in_out_liveness = Target:analyze(CFG), + Labels = Target:labels(CFG), + %% How many temporaries exist? + NumTemps = Target:number_of_temporaries(CFG), + IG0 = initial_ig(NumTemps, Target), + %%?debug_msg("initial adjset: ~p\n",[element(2, IG0)]), + %%?debug_msg("initial adjset array: ~.16b\n",[element(3, element(2, IG0))]), + analyze_bbs(Labels, BBs_in_out_liveness, IG0, CFG, Target). + +%%---------------------------------------------------------------------- +%% Function: analyze_bbs +%% +%% Description: Looks up the code that exists in all basic blocks and +%% analyse instructions use and def's to see what +%% temporaries that interfere with each other. +%% +%% Parameters: +%% L -- A label +%% Ls -- Other labels that exits in the CFG +%% BBs_in_out_liveness -- The in and out liveness on all basic blocks +%% IG -- The interference graph in it's current state +%% CFG -- The Control Flow Graph that we constructs +%% the interference graph from. +%% Target -- The module containing the target-specific +%% functions +%% +%% Returns: +%% An interference graph for the given CFG. +%%---------------------------------------------------------------------- + +analyze_bbs([], _, IG, _, _) -> IG; +analyze_bbs([L|Ls], BBs_in_out_liveness, IG, CFG, Target) -> + % Get basic block associated with label L + BB = Target:bb(CFG, L), + % Get basic block code + BB_code = hipe_bb:code(BB), + % Temporaries that are live out from this basic block + BB_liveout = Target:liveout(BBs_in_out_liveness, L), + % Only temporary numbers + BB_liveout_numbers = reg_numbers(BB_liveout, Target), + % {Liveness, New Interference Graph} + {_, New_ig, Ref} = analyze_bb_instructions(BB_code, + ordsets:from_list(BB_liveout_numbers), + IG, + Target), + Newer_ig = set_spill_costs(hipe_spillcost:ref_in_bb(Ref, + spill_costs(New_ig)), + New_ig), + analyze_bbs(Ls, BBs_in_out_liveness, Newer_ig, CFG, Target). + +%%---------------------------------------------------------------------- +%% Function: analyze_bb_instructions +%% +%% Description: Analyzes all instructions that is contained in a basic +%% block in reverse order. +%% +%% Parameters: +%% Instruction -- An instruction +%% Instructions -- The remaining instructions +%% Live -- All temporaries that are live at the time. +%% Live is a set of temporary "numbers only". +%% IG -- The interference graph in it's current state +%% Target -- The mopdule containing the target-specific functions +%% +%% Returns: +%% Live -- Temporaries that are live at entery of basic block +%% that we analyze. +%% IG -- Updated interference graph. +%% Ref -- Set of temporaries referred to in this bb. +%%---------------------------------------------------------------------- + +%% Ref: set of temporaries referred to in this bb +analyze_bb_instructions([], Live, IG, _) -> {Live, IG, ordsets:new()}; +analyze_bb_instructions([Instruction|Instructions], Live, IG, Target) -> + %% Analyze last instruction first. + {Live0, IG0, Ref} = analyze_bb_instructions(Instructions, Live, + IG, Target), + %% Check for temporaries that are defined and used in instruction + {Def, Use} = Target:def_use(Instruction), + %% Convert to register numbers + Def_numbers = ordsets:from_list(reg_numbers(Def, Target)), + Use_numbers = ordsets:from_list(reg_numbers(Use, Target)), + Ref_numbers = ordsets:union(Ref, ordsets:union(Def_numbers, Use_numbers)), + %% Increase spill cost on all used temporaries + IG1 = set_spill_costs(hipe_spillcost:inc_costs(Use_numbers, + spill_costs(IG0)), + IG0), + {Live1, IG2} = analyze_move(Instruction, + Live0, + Def_numbers, + Use_numbers, + IG1, + Target), + %% Adding Def to Live here has the effect of creating edges between + %% the defined registers, which is O(N^2) for an instruction that + %% clobbers N registers. + %% + %% Adding Def to Live is redundant when: + %% 1. Def is empty, or + %% 2. Def is a singleton, or + %% 3. Def contains only precoloured registers, or + %% 4. Def contains exactly one non-precoloured register, and the + %% remaining ones are all non-allocatable precoloured registers. + %% + %% HiPE's backends only create multiple-element Def sets + %% for CALL instructions, and then all elements are precoloured. + %% + %% Therefore we can avoid adding Def to Live. The benefit is greatest + %% on backends with many physical registers, since CALLs clobber all + %% physical registers. + Live2 = Live1, % ordsets:union(Live1, Def_numbers), + IG3 = interfere(Def_numbers, Live2, IG2, Target), + Live3 = ordsets:union(Use_numbers, ordsets:subtract(Live2, Def_numbers)), + {Live3, IG3, Ref_numbers}. + +%%---------------------------------------------------------------------- +%% Function: analyze_move +%% +%% Description: If a move instructions is discovered, this function is +%% called. It is used to remember what move instructions +%% a temporary is associated with and all moves that exists +%% in the CFG. +%% +%% Parameters: +%% Instruction -- An instruction +%% Live -- All temporaries that are live at the time. +%% Live is a set of temporary "numbers only". +%% Def_numbers -- Temporaries that are defined at this instruction +%% Use_numbers -- Temporaries that are used at this instruction +%% IG -- The interference graph in its current state +%% Target -- The module containing the target-specific functions +%% Returns: +%% Live -- An updated live set +%% IG -- An updated interference graph +%%---------------------------------------------------------------------- + +analyze_move(Instruction, Live, Def_numbers, Use_numbers, IG, Target) -> + case Target:is_move(Instruction) of + true -> + case {Def_numbers, Use_numbers} of + {[Dst], [Src]} -> + New_IG = set_ig_moves(hipe_ig_moves:new_move(Dst, Src, ig_moves(IG)), IG), + New_live = ordsets:del_element(Src, Live), + {New_live, New_IG}; + _ -> + {Live, IG} + end; + _ -> + {Live, IG} + end. + +%%---------------------------------------------------------------------- +%% Function: interfere +%% +%% Description: A number of temporaries that are defined interfere with +%% everything in the current live set. +%% +%% Parameters: +%% Define -- A Define temporary +%% Defines -- Rest of temporaries. +%% Live -- Current live set +%% IG -- An interference graph +%% +%% Returns: +%% An updated interference graph. +%%---------------------------------------------------------------------- + +interfere([], _, IG, _) -> IG; +interfere([Define|Defines], Living, IG, Target) -> + New_ig = interfere_with_living(Define, Living, IG, Target), + interfere(Defines, Living, New_ig, Target). + +%%---------------------------------------------------------------------- +%% Function: interfere_with_living +%% +%% Description: Let one temporary that is in the define set interfere +%% with all live temporaries. +%% +%% Parameters: +%% Define -- A Define temporary +%% Live -- Current live set +%% Lives -- Rest of living temporaries. +%% IG -- An interference graph +%% Target -- The module containing the target-specific functions +%% Returns: +%% An updated interference graph +%%---------------------------------------------------------------------- + +interfere_with_living(_, [], IG, _) -> IG; +interfere_with_living(Define, [Live|Living], IG, Target) -> + New_ig = add_edge(Define, Live, IG, Target), + interfere_with_living(Define, Living, New_ig, Target). + +%% +%% nodes_are_adjacent(U, V, IG) +%% returns true if nodes U and V are adjacent in interference graph IG +%% +-spec nodes_are_adjacent(integer(), integer(), #igraph{}) -> boolean(). +nodes_are_adjacent(U, V, IG) -> + adjset_are_adjacent(U, V, adj_set(IG)). + +%% +%% node_adj_set(Node, IG) +%% returns list of Node's adjacent nodes in interference graph IG +%% +node_adj_list(Node, IG) -> + hipe_adj_list:edges(Node, adj_list(IG)). + +%% +%% node_spill_cost(Node, IG) +%% returns the Node's spill cost +%% +node_spill_cost(Node, IG) -> + hipe_spillcost:spill_cost(Node, spill_costs(IG)). + +%%---------------------------------------------------------------------- +%% Print functions - only used for debugging + +-ifdef(DEBUG_PRINTOUTS). +print_spill_costs(IG) -> + ?debug_msg("Spill costs:\n", []), + print_spill_costs(number_of_temps(IG), IG). + +print_spill_costs(0, _) -> + true; +print_spill_costs(Node, IG) -> + NextNode = Node - 1, + case hipe_spillcost:nr_of_use(NextNode, spill_costs(IG)) of + 0 -> + ?debug_msg("node ~w not used\n", [NextNode]); + _ -> + ?debug_msg("node ~w sc ~p\n", [NextNode, node_spill_cost(NextNode, IG)]) + end, + print_spill_costs(NextNode, IG). +-endif. + +%%---------------------------------------------------------------------- + +get_moves(IG) -> + hipe_ig_moves:get_moves(ig_moves(IG)). + +%%---------------------------------------------------------------------- +%% Function: add_edge +%% +%% Description: Adds an edge to the adj_set data structure if it is +%% not already a part of it and if U is not precoloured +%% we add V to its adj_list. If V is not precoloured +%% we add U to its adj_list. +%% +%% Parameters: +%% U -- A temporary number +%% V -- A temporary number +%% Target -- The module containing the target-specific functions +%% Returns: +%% An updated interference graph. +%%---------------------------------------------------------------------- + +add_edge(U, U, IG, _) -> IG; +add_edge(U, V, IG, Target) -> + case nodes_are_adjacent(U, V, IG) of + true -> + IG; + false -> + _ = adjset_add_edge(U, V, adj_set(IG)), + Degree = degree(IG), + AdjList0 = interfere_if_uncolored(U, V, adj_list(IG), Degree, Target), + AdjList1 = interfere_if_uncolored(V, U, AdjList0, Degree, Target), + set_adj_list(AdjList1, IG) + end. + +%%---------------------------------------------------------------------- +%% Function: remove_edge +%% +%% Description: Removes an edge to the adj_set data-structure if it's +%% a part of it and if U is not precoloured +%% we remove V from it's adj_list. If V is not precoloured +%% we remove U from it's adj_list. +%% +%% Parameters: +%% U -- A temporary number +%% V -- A temporary number +%% Target -- The module containing the target-specific functions +%% Returns: +%% An updated interference graph. +%%---------------------------------------------------------------------- + +remove_edge(U, U, IG, _) -> IG; +remove_edge(U, V, IG, Target) -> + case nodes_are_adjacent(U, V, IG) of + false -> + IG; + true -> + _ = adjset_remove_edge(U, V, adj_set(IG)), + Degree = degree(IG), + AdjList0 = remove_if_uncolored(U, V, adj_list(IG), Degree, Target), + AdjList1 = remove_if_uncolored(V, U, AdjList0, Degree, Target), + set_adj_list(AdjList1, IG) + end. + +%%---------------------------------------------------------------------- +%% Function: remove_if_uncolored +%% +%% Description: +%% +%% Parameters: +%% Temporary -- A temporary that is added to the adjacent +%% list if it's not precoloured. +%% Interfere_temporary -- Temporary will interfere with +%% Interfere_temporary if temporary is not +%% precoloured. +%% Adj_list -- An adj_list +%% Degree -- The degree that all nodes currently have +%% Target -- The module containing the target-specific +%% functions +%% +%% Returns: +%% Adj_list -- An updated adj_list data structure +%% Degree -- An updated degree data structure (via side-effects) +%%---------------------------------------------------------------------- + +remove_if_uncolored(Temp, InterfereTemp, Adj_list, Degree, Target) -> + case Target:is_precoloured(Temp) of + false -> + New_adj_list = hipe_adj_list:remove_edge(Temp, InterfereTemp, Adj_list), + degree_dec(Temp, Degree), + New_adj_list; + true -> + Adj_list + end. + +%%---------------------------------------------------------------------- +%% Function: interfere_if_uncolored +%% +%% Description: Let a not precoloured temporary interfere with another. +%% +%% Parameters: +%% Temporary -- A temporary that is added to the adjacent +%% list if it's not precoloured. +%% Interfere_temporary -- Temporary will interfere with +%% Interfere_temporary if temporary is not +%% precoloured. +%% Adj_list -- An adj_list +%% Degree -- The degree that all nodes currently have +%% Target -- The module containing the target-specific +%% functions +%% +%% Returns: +%% Adj_list -- An updated adj_list data structure +%% Degree -- An updated degree data structure (via side-effects) +%%---------------------------------------------------------------------- + +interfere_if_uncolored(Temp, InterfereTemp, Adj_list, Degree, Target) -> + case Target:is_precoloured(Temp) of + false -> + New_adj_list = hipe_adj_list:add_edge(Temp, InterfereTemp, Adj_list), + degree_inc(Temp, Degree), + New_adj_list; + true -> + Adj_list + end. + +%%---------------------------------------------------------------------- +%% Function: reg_numbers +%% +%% Description: Converts a list of tuple with {something, reg_number} +%% to a list of register numbers. +%% +%% Parameters: +%% TRs -- A list of temporary registers +%% Target -- The module containing the target-specific functions +%% Returns: +%% A list of register numbers. +%%---------------------------------------------------------------------- + +reg_numbers(Regs, Target) -> + [Target:reg_nr(X) || X <- Regs]. + +%%--------------------------------------------------------------------- +%% Print functions - only used for debugging + +-ifdef(DEBUG_PRINTOUTS). +print_degrees(IG) -> + ?debug_msg("The nodes degrees:\n", []), + print_node_degree(number_of_temps(IG), IG). + +print_node_degree(0, _) -> + true; +print_node_degree(Node, IG) -> + NextNode = Node - 1, + ?debug_msg("node ~w ~w\n", [NextNode, get_node_degree(NextNode, IG)]), + print_node_degree(NextNode, IG). +-endif. + +%%---------------------------------------------------------------------- + +get_node_degree(Node, IG) -> + degree_get(Node, degree(IG)). + +dec_node_degree(Node, IG) -> + degree_dec(Node, degree(IG)), + IG. + +is_trivially_colourable(Node, K, IG) -> + degree_is_trivially_colourable(Node, K, degree(IG)). diff --git a/lib/hipe/regalloc/hipe_ig_moves.erl b/lib/hipe/regalloc/hipe_ig_moves.erl new file mode 100644 index 0000000000..186c87a690 --- /dev/null +++ b/lib/hipe/regalloc/hipe_ig_moves.erl @@ -0,0 +1,81 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2001-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%%============================================================================= + +-module(hipe_ig_moves). +-export([new/1, + new_move/3, + get_moves/1]). + +-include("../util/hipe_vectors.hrl"). + +%%----------------------------------------------------------------------------- +%% The main data structure; its fields are: +%% - movelist : mapping from temp to set of associated move numbers +%% - nrmoves : number of distinct move instructions seen so far +%% - moveinsns : list of move instructions, in descending move number order +%% - moveset : set of move instructions + +-record(ig_moves, {movelist :: hipe_vector(), + nrmoves = 0 :: non_neg_integer(), + moveinsns = [] :: [{_,_}], + moveset = gb_sets:empty() :: gb_set()}). + +%%----------------------------------------------------------------------------- + +-spec new(non_neg_integer()) -> #ig_moves{}. + +new(NrTemps) -> + MoveList = hipe_vectors:new(NrTemps, ordsets:new()), + #ig_moves{movelist = MoveList}. + +-spec new_move(_, _, #ig_moves{}) -> #ig_moves{}. + +new_move(Dst, Src, IG_moves) -> + MoveSet = IG_moves#ig_moves.moveset, + MoveInsn = {Dst, Src}, + case gb_sets:is_member(MoveInsn, MoveSet) of + true -> + IG_moves; + false -> + MoveNr = IG_moves#ig_moves.nrmoves, + Movelist0 = IG_moves#ig_moves.movelist, + Movelist1 = add_movelist(MoveNr, Dst, + add_movelist(MoveNr, Src, Movelist0)), + IG_moves#ig_moves{nrmoves = MoveNr+1, + movelist = Movelist1, + moveinsns = [MoveInsn|IG_moves#ig_moves.moveinsns], + moveset = gb_sets:insert(MoveInsn, MoveSet)} + end. + +-spec add_movelist(non_neg_integer(), non_neg_integer(), hipe_vector()) -> hipe_vector(). + +add_movelist(MoveNr, Temp, MoveList) -> + AssocMoves = hipe_vectors:get(MoveList, Temp), + %% XXX: MoveNr does not occur in moveList[Temp], but the new list must be an + %% ordset due to the ordsets:union in hipe_coalescing_regalloc:combine(). + hipe_vectors:set(MoveList, Temp, ordsets:add_element(MoveNr, AssocMoves)). + +-spec get_moves(#ig_moves{}) -> {hipe_vector(), non_neg_integer(), tuple()}. + +get_moves(IG_moves) -> % -> {MoveList, NrMoves, MoveInsns} + {IG_moves#ig_moves.movelist, + IG_moves#ig_moves.nrmoves, + list_to_tuple(lists:reverse(IG_moves#ig_moves.moveinsns))}. diff --git a/lib/hipe/regalloc/hipe_ls_regalloc.erl b/lib/hipe/regalloc/hipe_ls_regalloc.erl new file mode 100644 index 0000000000..d06b938bea --- /dev/null +++ b/lib/hipe/regalloc/hipe_ls_regalloc.erl @@ -0,0 +1,788 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2001-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%% ===================================================================== +%% @doc +%%
+%%  Module   :	hipe_ls_regalloc
+%%  Purpose  :  Perform a register allocation based on the 
+%%              "linear-scan algorithm".
+%%  Notes    :  * This is an implementation of 
+%%                "Linear Scan Register Allocation" by 
+%%                Massimiliano Poletto & Vivek Sarkar described in
+%%                ACM TOPLAS Vol 21, No 5, September 1999.
+%%
+%%              * This implementation is target-independent and
+%%                requires a target specific interface module
+%%                as argument.  
+%%                (Still waiting for a modular module system for Erlang.)
+%% 
+%% @end +%% +%% History : * 2000-04-07 Erik Johansson (happi@it.uu.se): Created. +%% * 2001-07-16 Erik Johansson: Made less sparc-specific. +%% ===================================================================== +%% Exported functions (short description): +%% regalloc(CFG,PhysRegs,Entrypoints, Options) -> +%% {Coloring, NumberOfSpills} +%% Takes a CFG and returns a coloring of all used registers. +%% PhysRegs should be a list of available physical registers. +%% Entrypoints should be a list of names of Basic Blocks that have +%% external entry points. +%% +%% The Coloring will be in the form of the "allocation datastructure" +%% described below, that is, a list of tuples on the form +%% {Name, {reg, PhysicalRegister}} or +%% {Name, {spill, SpillIndex}} +%% The NumberOfSpills is either 0 indicating no spill or the +%% SpillIndex of the last spilled register. +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +-module(hipe_ls_regalloc). +-export([regalloc/7]). + +%%-define(DEBUG,1). +-define(HIPE_INSTRUMENT_COMPILER, true). + +-include("../main/hipe.hrl"). + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% @spec +%% regalloc(CFG, PhysRegs, Entrypoints, SpillIndex, DontSpill, Options, +%% Target) -> +%% {Coloring, NumberOfSpills} +%% CFG = cfg() +%% PhysRegs = [reg()] +%% Entrypoints = [labelname()] +%% DontSpill = reg() +%% Options = proplist:proplist() +%% Target = atom() +%% Coloring = [{temp(), pos()}] +%% NumberOfSpills = integer() +%% reg() = integer() +%% temp() = integer() +%% pos() = {reg, reg()} | {spill, integer()} +%% +%% @doc +%% Calculates an allocation of registers using a linear_scan algorithm. +%% There are three steps in the algorithm: +%%
    +%%
  1. Calculate live-ranges for all registers.
  2. +%%
  3. Calculate live-intervals for each register. +%% The live interval consists of a start position and an end +%% position. These are the first definition and last use of the +%% register given as instruction numbers in a breadth-first +%% traversal of the control-flow-graph.
  4. +%%
  5. Perform a linear scan allocation over the live intervals.
  6. +%%
+%% @end +%%- - - - - - - - - - - - - - - - - - - - - - - - +regalloc(CFG, PhysRegs, Entrypoints, SpillIndex, DontSpill, Options, Target) -> + ?debug_msg("LinearScan: ~w\n", [erlang:statistics(runtime)]), + %% Step 1: Calculate liveness (Call external implementation.) + Liveness = liveness(CFG, Target), + ?debug_msg("liveness (done)~w\n", [erlang:statistics(runtime)]), + USIntervals = calculate_intervals(CFG, Liveness, + Entrypoints, Options, Target), + ?debug_msg("intervals (done) ~w\n", [erlang:statistics(runtime)]), + Intervals = sort_on_start(USIntervals), + ?debug_msg("sort intervals (done) ~w\n", [erlang:statistics(runtime)]), + %% ?debug_msg("Intervals ~w\n", [Intervals]), + ?debug_msg("No intervals: ~w\n",[length(Intervals)]), + ?debug_msg("count intervals (done) ~w\n", [erlang:statistics(runtime)]), + Allocation = allocate(Intervals, PhysRegs, SpillIndex, DontSpill, Target), + ?debug_msg("allocation (done) ~w\n", [erlang:statistics(runtime)]), + Allocation. + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% %% +%% Step 2: Calculate live-intervals for each register. %% +%% %% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%- - - - - - - - - - - - - - - - - - - - - - - - +%% calculate_intervals(CFG,Liveness,Entrypoints, Options, Target) +%% CFG: The Control-Flow Graph. +%% Liveness: A map of live-in and live-out sets for each Basic-Block. +%% Entrypoints: A set of BB names that have external entrypoints. +%% +calculate_intervals(CFG,Liveness,_Entrypoints, Options, Target) -> + %% Add start point for the argument registers. + Args = arg_vars(CFG, Target), + Interval = + add_def_point(Args, 0, empty_interval(Target:number_of_temporaries(CFG))), + %% Interval = add_livepoint(Args, 0, empty_interval()), + Worklist = + case proplists:get_value(ls_order, Options) of + reversepostorder -> + Target:reverse_postorder(CFG); + breadth -> + Target:breadthorder(CFG); + postorder -> + Target:postorder(CFG); + inorder -> + Target:inorder(CFG); + reverse_inorder -> + Target:reverse_inorder(CFG); + preorder -> + Target:preorder(CFG); + prediction -> + Target:predictionorder(CFG); + random -> + Target:labels(CFG); + _ -> + Target:reverse_postorder(CFG) + end, + %% ?inc_counter(bbs_counter, length(Worklist)), + %% ?debug_msg("No BBs ~w\n",[length(Worklist)]), + intervals(Worklist, Interval, 1, CFG, Liveness, Target). + +%%- - - - - - - - - - - - - - - - - - - - - - - - +%% intervals(WorkList, Intervals, InstructionNr, CFG, Liveness, Target) +%% WorkList: List of BB-names to handle. +%% Intervals: Intervals seen so far (sorted on register names). +%% InstructionNr: The number of examined insturctions. +%% CFG: The Control-Flow Graph. +%% Liveness: A map of live-in and live-out sets for each Basic-Block. +%% Target: The backend for which we generate code. +%%- - - - - - - - - - - - - - - - - - - - - - - - +intervals([L|ToDO], Intervals, InstructionNr, CFG, Liveness, Target) -> + %% Add all variables that are live at the entry of this block + %% to the interval data structure. + LiveIn = livein(Liveness, L, Target), + Intervals2 = add_def_point(LiveIn, InstructionNr, Intervals), + LiveOut = liveout(Liveness, L, Target), + + %% Traverse this block instruction by instruction and add all + %% uses and defines to the intervals. + Code = hipe_bb:code(bb(CFG, L, Target)), + {Intervals3, NewINr} = + traverse_block(Code, InstructionNr+1, Intervals2, Target), + + %% Add end points for the registers that are in the live-out set. + Intervals4 = add_use_point(LiveOut, NewINr+1, Intervals3), + + intervals(ToDO, Intervals4, NewINr+1, CFG, Liveness, Target); +intervals([], Intervals, _, _, _, _) -> + %% Return the calculated intervals + LI = interval_to_list(Intervals), + %% io:format("Intervals:~n~p~n", [LI]), + LI. + +%%- - - - - - - - - - - - - - - - - - - - - - - - +%% traverse_block(Code, InstructionNo, Intervals, Unchanged) +%% Examine each instruction in the Code: +%% For each temporary T used or defined by instruction number N: +%% extend the interval of T to include N. +%%- - - - - - - - - - - - - - - - - - - - - - - - +traverse_block([Instruction|Is],InstrNo,Intervals, Target) -> + %% Get defined temps. + DefsSet = defines(Instruction, Target), + Intervals1 = add_def_point(DefsSet, InstrNo, Intervals), + + %% Get used temps. + UsesSet = uses(Instruction, Target), + %% Extend the intervals for these temporaries to include InstrNo. + Intervals2 = add_use_point(UsesSet, InstrNo, Intervals1), + + %% Handle the next instruction. + traverse_block(Is,InstrNo+1,Intervals2,Target); +traverse_block([], InstrNo, Intervals, _) -> + %% Return the new intervals and the number of the next instruction. + {Intervals,InstrNo}. + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% %% +%% Step 3. Do a linear scan allocation over the live intervals. %% +%% %% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% allocate(Intervals, PhysicalRegisters, DontSpill, Target) +%% +%% This function performs the linear scan algorithm. +%% Intervals contains the start and stop position of each register, +%% sorted on increasing startpositions +%% PhysicalRegisters is a list of available Physical registers to use. +%% +%%- - - - - - - - - - - - - - - - - - - - - - - - +allocate(Intervals, PhysRegs, SpillIndex, DontSpill, Target) -> + ActiveRegisters =[], + AllocatedRegisters = empty_allocation(), + AllFree = create_freeregs(PhysRegs), + allocate(Intervals, AllFree, ActiveRegisters, + AllocatedRegisters, SpillIndex, DontSpill, Target). +%%- - - - - - - - - - - - - - - - - - - - - - - - +%% allocate(Intervals, Free, Active, Allocated, SpillIndex, Target) +%% Iterates of each register interval. +%% Intervals: The list of register intervals. +%% Free: Currently available physical registers. +%% Active: Currently used physical registers (sorted on increasing +%% interval enpoints) +%% Allocated: The mapping of register names to physical registers or +%% to spill positions. +%% SpillIndex: The number of spilled registers. +%%- - - - - - - - - - - - - - - - - - - - - - - - +allocate([RegInt|RIS], Free, Active, Alloc, SpillIndex, DontSpill, Target) -> + %io:format("~nAlloc:~n~p", [Alloc]), + %% Remove from the active list those registers who's intervals + %% ends before the start of the current interval. + {NewActive, NewFree} = + expire_old_intervals(Active, startpoint(RegInt), Free, Target), + ?debug_msg("Alloc interval: ~w, Free ~w\n",[RegInt, NewFree]), + %% Get the name of the temp in the current interval. + Temp = reg(RegInt), + case is_precoloured(Temp, Target) of + true -> + %% This is a precoloured register we don't need to find a color + %% Get the physical name of the register. + PhysName = physical_name(Temp, Target), + %% Bind it to the precoloured name. + NewAlloc = alloc(Temp, PhysName, Alloc), + case is_global(Temp, Target) of + true -> + %% this is a global precoloured register + allocate(RIS, NewFree, NewActive, + NewAlloc, SpillIndex, DontSpill, Target); + false -> + case is_free(PhysName, NewFree) of + {true,Rest} -> + allocate(RIS, Rest, + add_active(endpoint(RegInt), startpoint(RegInt), + PhysName, Temp, NewActive), + NewAlloc, + SpillIndex, DontSpill, Target); + false -> + %% Some other temp has taken this precoloured register, + %% throw it out. + {OtherActive, NewActive2} = deactivate(PhysName, NewActive), + OtherTemp = active_name(OtherActive), + OtherEnd = active_endpoint(OtherActive), + OtherStart = active_startpoint(OtherActive), + NewActive3 = add_active(endpoint(RegInt), startpoint(RegInt), + PhysName, Temp, NewActive2), + case exists_free_register(OtherStart, NewFree) of + {true, NewPhys, RestFree} -> + allocate(RIS, RestFree, + add_active(OtherEnd, OtherStart, + NewPhys, OtherTemp, NewActive3), + alloc(OtherTemp,NewPhys,NewAlloc), + SpillIndex, DontSpill, Target); + false -> + NewSpillIndex = Target:new_spill_index(SpillIndex), + {NewAlloc2, NewActive4} = + spill(OtherTemp, OtherEnd, OtherStart, NewActive3, + NewAlloc, SpillIndex, DontSpill, Target), + allocate(RIS, + NewFree, + NewActive4, + NewAlloc2, NewSpillIndex, DontSpill, Target) + end + end + end; + false -> + %% This is not a precoloured register. + case NewFree of + [] -> + %% No physical registers available, we have to spill. + NewSpillIndex = Target:new_spill_index(SpillIndex), + {NewAlloc, NewActive2} = + spill(Temp, endpoint(RegInt), startpoint(RegInt), + Active, Alloc, SpillIndex, DontSpill, Target), + %% io:format("Spilled ~w\n",[NewAlloc]), + allocate(RIS, NewFree, NewActive2, NewAlloc, NewSpillIndex, + DontSpill, Target); + + [{FreeReg,_Start} | Regs] -> + %% The register FreeReg is available, let's use it. + %%io:format("Allocating Reg:~p~n",[FreeReg]), + allocate(RIS,Regs, + add_active(endpoint(RegInt), startpoint(RegInt), + FreeReg, Temp, NewActive), + alloc(Temp, FreeReg, Alloc), + SpillIndex, DontSpill, Target) + end + end; +allocate([],_,_,Alloc,SpillIndex, _, _) -> + %% No more register intervals to handle + %% return the result. + %%io:format("~nAlloc:~n~p", [Alloc]), + {Alloc, SpillIndex}. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% expire_old_intervals(ActiveRegisters, CurrentPos, FreeRegisters) +%% Remove all registers that have live-ranges that ends before the +%% current position from the active list and put them into the free +%% list instead. +%% +%% --------------------------------------------------------------------- +expire_old_intervals([Act|Acts] = AllActives, CurrentPos, Free, Target) -> + %% Does the live-range of the first active register end before + %% the current position? + + %% We expand multimove before regalloc, ignore the next 2 lines. + %% %% We don't free registers that end at the current position, + %% %% since a multimove can decide to do the moves in another order... + case active_endpoint(Act) =< CurrentPos of + true -> %% Yes -> Then we can free that register. + Reg = active_reg(Act), + %% Add the register to the free pool. + NewFree = + case is_arg(Reg, Target) of + true -> + [{Reg, CurrentPos}|Free]; + false -> + [{Reg, CurrentPos}|Free] + %% Here we could try appending the + %% register to get a more widespread + %% use of registers. + %% Free ++ [active_reg(Act)]); + %% At the moment this does not seem to + %% improve performance at all, + %% on the other hand, the cost is very low. + end, + expire_old_intervals(Acts, CurrentPos, NewFree, Target); + false -> + %% No -> Then we cannot free any more registers. + %% (Since they are sorted on endpoints...) + {AllActives, Free} + end; +expire_old_intervals([], _, Free, _) -> + {[], Free}. + +deactivate(Reg, [Active|Actives]) -> + case Reg =:= active_reg(Active) of + true -> + {Active, Actives}; + false -> + {TheActive, NewActives} = deactivate(Reg, Actives), + {TheActive, [Active|NewActives]} + end; +deactivate(_,[]) -> {no,[]}. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% spill(CurrentReg, CurrentEndpoint, Active, Alloc, SpillIndex, +%% DontSpill, Target) +%% Find the register with the longest live range and spill it to memory. +%% +%% --------------------------------------------------------------------- +spill(CurrentReg, CurrentEndpoint,CurrentStartpoint, + Active = [_|_], + Alloc, SpillIndex, + DontSpill, Target) -> + ?debug_msg("spilling one of ~w\nDOnt spill ~w\n", + [[CurrentReg|Active], DontSpill]), + + %% Find a spill candidate (one of the active): + %% The register with the longest live-range. + {NewActive, SpillCandidate} = butlast_last(Active), + + SpillStartpoint = active_startpoint(SpillCandidate) , + SpillEndpoint = active_endpoint(SpillCandidate) , + SpillName = active_name(SpillCandidate), + SpillPhysName = active_reg(SpillCandidate), + + case SpillEndpoint > CurrentEndpoint of + true -> + %% There is an already allocated register that has + %% a longer live-range than the current register. + case can_spill(SpillName, DontSpill, Target) and + (SpillStartpoint =< CurrentStartpoint) of + false -> + {NewAlloc, NewActive2} = + spill(CurrentReg, CurrentEndpoint, CurrentStartpoint, + NewActive, Alloc, SpillIndex, DontSpill, Target), + {NewAlloc, + add_active(SpillEndpoint, SpillStartpoint, SpillPhysName, + SpillName, NewActive2)}; + true -> + %% It is not precoloured... or have too short liverange + + %% Allocate SpillCandidate to spill-slot SpillIndex + SpillAlloc = + spillalloc(active_name(SpillCandidate), SpillIndex, + Alloc), + + %% Allocated the current register to the physical register + %% used by the spill candidate. + NewAlloc = alloc(CurrentReg, SpillPhysName, SpillAlloc), + + %% Add the current register to the active registers + NewActive2 = + add_active(CurrentEndpoint, CurrentStartpoint, + SpillPhysName, CurrentReg, NewActive), + {NewAlloc, NewActive2} + end; + + false -> + %% The current register has the longest live-range. + + case can_spill(CurrentReg, DontSpill, Target) of + false -> + %% Cannot spill a precoloured register + {NewAlloc, NewActive2} = + spill(SpillName, SpillEndpoint, SpillStartpoint, + NewActive, Alloc, SpillIndex, DontSpill, Target), + NewActive3 = + add_active(CurrentEndpoint, CurrentStartpoint, + SpillPhysName, CurrentReg, NewActive2), + {NewAlloc, NewActive3}; + true -> + %% It is not precoloured... + %% Allocate the current register to spill-slot SpillIndex + {spillalloc(CurrentReg, SpillIndex, Alloc), Active} + end + end; +spill(CurrentReg, _CurrentEndpoint, _CurrentStartpoint, [], + Alloc, SpillIndex, DontSpill, Target) -> + case can_spill(CurrentReg, DontSpill, Target) of + false -> %% Can't spill current! + ?error_msg("Can't allocate registers\n",[]), + ?EXIT({cannot_allocate_regs}); + true -> %% Can spill current. + %% Allocate the current register to spill-slot SpillIndex + {spillalloc(CurrentReg, SpillIndex, Alloc), []} + end. + +can_spill(Name, DontSpill, Target) -> + (Name < DontSpill) and (not is_precoloured(Name, Target)). + + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% %% +%% D A T A S T R U C T U R E S %% +%% & %% +%% A U X I L I A R Y F U N C T I O N S %% +%% %% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% The "allocation datastructure" +%% +%% This is an order list of register names paired with their allocations. +%% {Name, Allocation} +%% The allocation is either {reg, physical register} or +%% {spill, spill index} +%% +%% --------------------------------------------------------------------- +empty_allocation() -> []. + +alloc(Name,Reg,[{Name,_}|A]) -> + [{Name,{reg,Reg}}|A]; +alloc(Name,Reg,[{Name2,Binding}|Bindings]) when Name > Name2 -> + [{Name2,Binding}|alloc(Name,Reg,Bindings)]; +alloc(Name,Reg,Bindings) -> + [{Name,{reg,Reg}}|Bindings]. + +spillalloc(Name,N,[{Name,_}|A]) -> + ?debug_msg("Spilled ~w\n",[Name]), + [{Name,{spill,N}}|A]; +spillalloc(Name,N,[{Name2,Binding}|Bindings]) when Name > Name2 -> + [{Name2,Binding}|spillalloc(Name,N,Bindings)]; +spillalloc(Name,N,Bindings) -> + [{Name,{spill,N}}|Bindings]. + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% +butlast_last([X]) -> + {[],X}; +butlast_last([X|Y]) -> + {L,Last} = butlast_last(Y), + {[X|L],Last}. + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% The active datastructure. +%% Keeps tracks of currently active (allocated) physical registers. +%% It is sorted on end points in the intervals +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +add_active(Endpoint, StartPoint, PhysReg, RegName, + [{P1,R1,O1,S1}|Active]) when P1 < Endpoint -> + [{P1,R1,O1,S1}|add_active(Endpoint, StartPoint, PhysReg, RegName, Active)]; +add_active(Endpoint, StartPoint, PhysReg, RegName, Active) -> + [{Endpoint, PhysReg, RegName, StartPoint}|Active]. + +active_reg({_,PhysReg,_,_}) -> + PhysReg. +active_endpoint({EndPoint,_,_,_}) -> + EndPoint. +active_startpoint({_,_,_,StartPoint}) -> + StartPoint. +active_name({_,_,RegName,_}) -> + RegName. +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% The Interval data structure. +%% +%% +%%- - - - - - - - - - - - - - - - - - - - - - - - + +%% mk_interval(Name, Start, End) -> +%% {Name, Start, End}. + +endpoint({_R,_S,Endpoint}) -> + Endpoint. +startpoint({_R,Startpoint,_E}) -> + Startpoint. +reg({RegName,_S,_E}) -> + RegName. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% The Intervals data structure. + +sort_on_start(I) -> + lists:keysort(2, I). + +-ifdef(gb_intervals). +empty_interval(_) -> + gb_trees:empty(). + +interval_to_list(Intervals) -> + lists:flatten( + lists:map( + fun({T, I}) when list(I) -> + lists:map( + fun ({none, End}) -> + {T,End,End}; + ({Beg, none}) -> + {T,Beg, Beg} + end, + I); + ({T,{B,E}}) -> {T, B, E} + end, + gb_trees:to_list(Intervals))). + +add_use_point([Temp|Temps],Pos,Intervals) -> + %% Extend the old interval... + NewInterval = + case gb_trees:lookup(Temp, Intervals) of + %% This temp has an old interval... + {value, Value} -> + %% ... extend it. + extend_interval(Pos, Value); + %% This is the first time we see this temp... + none -> + %% ... create a new interval + {Pos, Pos} + end, + %% Add or update the extended interval. + Intervals2 = gb_trees:enter(Temp, NewInterval, Intervals), + %% Add the rest of teh temporaries. + add_use_point(Temps, Pos, Intervals2); +add_use_point([], _, I) -> + %% No more to add return the interval. + I. + +add_def_point([Temp|Temps],Pos,Intervals) -> + %% Extend the old interval... + NewInterval = + case gb_trees:lookup(Temp, Intervals) of + %% This temp has an old interval... + {value, Value} -> + %% ... extend it. + extend_interval(Pos, Value); + + %% This is the first time we see this temp... + none -> + %% ... create a new interval + {Pos, Pos} + end, + %% Add or update the extended interval. + Intervals2 = gb_trees:enter(Temp, NewInterval, Intervals), + %% Add the rest of the temporaries. + add_def_point(Temps, Pos, Intervals2); +add_def_point([], _, I) -> + %% No more to add return the interval. + I. + +extend_interval(Pos, {Beginning, End}) -> + %% If this position occures before the beginning + %% of the interval, then extend the beginning to + %% this position. + NewBeginning = erlang:min(Pos, Beginning), + %% If this position occures after the end + %% of the interval, then extend the end to + %% this position. + NewEnd = erlang:max(Pos, End), + {NewBeginning, NewEnd}. + +-else. %% isdef gb_intervals + +empty_interval(N) -> + hipe_vectors:new(N, none). + +interval_to_list(Intervals) -> + add_indices(hipe_vectors:vector_to_list(Intervals),0). + +add_indices([{B,E}|Xs],N) -> + [{N,B,E}|add_indices(Xs,N+1)]; +add_indices([List|Xs],N) when is_list(List) -> + flatten(List,N,Xs); +add_indices([none|Xs],N) -> + add_indices(Xs,N+1); +add_indices([],_N) -> []. + +flatten([{none, End}|Rest], N, More) -> + [{N,End,End} | flatten(Rest, N, More)]; +flatten([{Beg, none}|Rest], N ,More) -> + [{N,Beg,Beg} | flatten(Rest, N, More)]; +flatten([],N,More) -> + add_indices(More,N+1). + +add_use_point([Temp|Temps],Pos,Intervals) -> + %% Extend the old interval... + NewInterval = + case hipe_vectors:get(Intervals, Temp) of + %% This is the first time we see this temp... + none -> + %% ... create a new interval + {Pos, Pos}; + %% This temp has an old interval... + Value -> + %% ... extend it. + extend_interval(Pos, Value) + end, + %% Add or update the extended interval. + Intervals2 = hipe_vectors:set(Intervals, Temp, NewInterval), + %% Add the rest of the temporaries. + add_use_point(Temps, Pos, Intervals2); +add_use_point([], _, I) -> + %% No more to add return the interval. + I. + +add_def_point([Temp|Temps],Pos,Intervals) -> + %% Extend the old interval... + NewInterval = + case hipe_vectors:get(Intervals, Temp) of + %% This is the first time we see this temp... + none -> + %% ... create a new interval + {Pos, Pos}; + %% This temp has an old interval... + Value -> + %% ... extend it. + extend_interval(Pos, Value) + end, + %% Add or update the extended interval. + Intervals2 = hipe_vectors:set(Intervals, Temp, NewInterval), + %% Add the rest of teh temporaries. + add_def_point(Temps, Pos, Intervals2); +add_def_point([], _, I) -> + %% No more to add return the interval. + I. + +extend_interval(Pos, {Beginning, End}) -> + %% If this position occurs before the beginning of the interval, + %% then extend the beginning to this position. + NewBeginning = erlang:min(Pos, Beginning), + %% If this position occures after the end + %% of the interval, then extend the end to + %% this position. + NewEnd = erlang:max(Pos, End), + {NewBeginning, NewEnd}. +-endif. %% gb_intervals + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% The Freel data structure. +%% +%%- - - - - - - - - - - - - - - - - - - - - - - - + +is_free(R, Free) -> + is_free(R, Free, []). + +is_free(R, [{R,_}|Rest], Acc) -> + {true,lists:reverse(Acc)++Rest}; +is_free(R, [X|Rs],Acc) -> + is_free(R, Rs, [X|Acc]); +is_free(_, [], _) -> + false. + +exists_free_register(Start, Regs) -> + exists_free_register(Start, Regs, []). + +exists_free_register(Start, [{Phys, Start0}|Rest], Acc) + when Start > Start0 -> + {true, Phys, lists:reverse(Acc)++Rest}; +exists_free_register(Start, [Free|Rest], Acc) -> + exists_free_register(Start, Rest, [Free|Acc]); +exists_free_register(_, [], _) -> + false. + +create_freeregs([Phys|Rest]) -> + [{Phys,-1}|create_freeregs(Rest)]; +create_freeregs([]) -> + []. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% Interface to external functions. +%% XXX: Make this efficient somehow... +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +liveness(CFG, Target) -> + Target:analyze(CFG). + +bb(CFG, L, Target) -> + Target:bb(CFG,L). + +livein(Liveness,L, Target) -> + regnames(Target:livein(Liveness,L), Target). + +liveout(Liveness,L, Target) -> + regnames(Target:liveout(Liveness,L), Target). + +uses(I, Target) -> + regnames(Target:uses(I), Target). + +defines(I, Target) -> + regnames(Target:defines(I), Target). + +is_precoloured(R, Target) -> + Target:is_precoloured(R). + +is_global(R, Target) -> + Target:is_global(R). + +physical_name(R, Target) -> + Target:physical_name(R). + +regnames(Regs, Target) -> + [Target:reg_nr(X) || X <- Regs]. + +arg_vars(CFG, Target) -> + Target:args(CFG). + +is_arg(Reg, Target) -> + Target:is_arg(Reg). diff --git a/lib/hipe/regalloc/hipe_moves.erl b/lib/hipe/regalloc/hipe_moves.erl new file mode 100644 index 0000000000..afec4aa4ce --- /dev/null +++ b/lib/hipe/regalloc/hipe_moves.erl @@ -0,0 +1,165 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2001-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +-module(hipe_moves). +-export([new/1, + update_movelist/3, + node_moves/2, + move_related/2, + node_movelist/2, + get_move/2, + is_empty_worklist/1, + worklist_get_and_remove/1, + remove_worklist/2, + remove_active/2, + add_worklist/2, + add_active/2, + member_active/2 + ]). +-ifdef(DEBUG_PRINTOUTS). +-export([print_memberships/1]). +-endif. + +-record(movesets, + {worklist, % Moves enabled for possible coalescing + membership, % Maps move numbers to 'worklist' or 'active' or 'none' + moveinsns, % Maps move numbers to move insns ({Dst,Src}-tuples) + movelist % Mapping from node to list of moves it's associated with + }). + +%%-ifndef(DEBUG). +%%-define(DEBUG,true). +%%-endif. +-include("../main/hipe.hrl"). + +worklist(MoveSets) -> MoveSets#movesets.worklist. +movelist(MoveSets) -> MoveSets#movesets.movelist. + +set_worklist(New_worklist, MoveSets) -> + MoveSets#movesets{worklist = New_worklist}. +set_movelist(New_movelist, MoveSets) -> + MoveSets#movesets{movelist = New_movelist}. + +update_movelist(Node, MoveList, MoveSets) -> + set_movelist(hipe_vectors:set(movelist(MoveSets), Node, MoveList), + MoveSets). + +new(IG) -> + {MoveList,NrMoves,MoveInsns} = hipe_ig:get_moves(IG), + Worklist = case NrMoves of 0 -> []; _ -> lists:seq(0, NrMoves-1) end, + #movesets{worklist = Worklist, + membership = hipe_bifs:array(NrMoves, 'worklist'), + moveinsns = MoveInsns, + movelist = MoveList}. + +remove_worklist(Element, MoveSets) -> + Membership = MoveSets#movesets.membership, + %% check for 'worklist' membership here, if debugging + hipe_bifs:array_update(Membership, Element, 'none'), + %% Implementing this faithfully would require a SET structure, such + %% as an ordset or a gb_set. However, removal of elements not at the + %% head of the structure is a fairly infrequent event (only done by + %% FreezeMoves()), so instead we let the elements remain but mark + %% them as being removed. It is the task of worklist_get_and_remove() + %% to filter out any stale elements. + MoveSets. + +remove_active(Element, MoveSets) -> + Membership = MoveSets#movesets.membership, + %% check for 'active' membership here, if debugging + hipe_bifs:array_update(Membership, Element, 'none'), + MoveSets. + +add_worklist(Element, MoveSets) -> + Membership = MoveSets#movesets.membership, + %% check for 'none' membership here, if debugging + hipe_bifs:array_update(Membership, Element, 'worklist'), + set_worklist([Element | worklist(MoveSets)], MoveSets). + +add_active(Element, MoveSets) -> + Membership = MoveSets#movesets.membership, + %% check for 'none' membership here, if debugging + hipe_bifs:array_update(Membership, Element, 'active'), + MoveSets. + +member_active(Element, MoveSets) -> + hipe_bifs:array_sub(MoveSets#movesets.membership, Element) =:= 'active'. + +is_empty_worklist(MoveSets) -> + %% This is an approximation. See worklist_get_and_remove(). + worklist(MoveSets) =:= []. + +worklist_get_and_remove(MoveSets) -> + worklist_get_and_remove(worklist(MoveSets), MoveSets#movesets.membership, MoveSets). + +worklist_get_and_remove([], _Membership, MoveSets) -> + {[], set_worklist([], MoveSets)}; +worklist_get_and_remove([Move|Worklist], Membership, MoveSets) -> + case hipe_bifs:array_sub(Membership, Move) of + 'worklist' -> + hipe_bifs:array_update(Membership, Move, 'none'), + {Move, set_worklist(Worklist, MoveSets)}; + _ -> + worklist_get_and_remove(Worklist, Membership, MoveSets) + end. + +node_moves(Node, MoveSets) -> + Associated = node_movelist(Node, MoveSets), + Membership = MoveSets#movesets.membership, + %% The ordsets:union() in hipe_coalescing_regalloc:combine() + %% constrains us to return an ordset here. + [X || X <- Associated, hipe_bifs:array_sub(Membership, X) =/= 'none']. + +move_related(Node, MoveSets) -> + %% Same as node_moves(Node, MoveSets) =/= [], but less expensive to compute. + %% XXX: George&Appel'96 hints that this should be maintained as a per-node counter. + move_related2(node_movelist(Node, MoveSets), MoveSets#movesets.membership). + +move_related2([], _Membership) -> false; +move_related2([Move|MoveSets], Membership) -> + case hipe_bifs:array_sub(Membership, Move) of + 'none' -> move_related2(MoveSets, Membership); + _ -> true % 'active' or 'worklist' + end. + +node_movelist(Node, MoveSets) -> + hipe_vectors:get(movelist(MoveSets), Node). + +get_move(Move, MoveSets) -> + element(Move+1, MoveSets#movesets.moveinsns). + +%%---------------------------------------------------------------------- +%% Print functions - only used for debugging + +-ifdef(DEBUG_PRINTOUTS). +print_memberships(MoveSets) -> + ?debug_msg("Move memeberships:\n", []), + Membership = MoveSets#movesets.membership, + NrMoves = hipe_bifs:array_length(Membership), + print_membership(NrMoves, Membership). + +print_membership(0, _) -> + true; +print_membership(Element, Membership) -> + NextElement = Element - 1, + ?debug_msg("move ~w ~w\n", [NextElement, hipe_bifs:array_sub(Membership, NextElement)]), + print_membership(NextElement, Membership). +-endif. + diff --git a/lib/hipe/regalloc/hipe_node_sets.erl b/lib/hipe/regalloc/hipe_node_sets.erl new file mode 100644 index 0000000000..b5e2971c4d --- /dev/null +++ b/lib/hipe/regalloc/hipe_node_sets.erl @@ -0,0 +1,48 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2001-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +-module(hipe_node_sets). + +-export([new/0, + spilled/1, + colored/1, + add_spilled/2, + add_colored/2 + ]). + +-record(node_sets, + {spilled, % Nodes marked for spilling + colored % Nodes succesfully colored + }). + +spilled(Node_sets) -> Node_sets#node_sets.spilled. +colored(Node_sets) -> Node_sets#node_sets.colored. + +set_spilled(Spilled, Node_sets) -> Node_sets#node_sets{spilled = Spilled}. +set_colored(Colored, Node_sets) -> Node_sets#node_sets{colored = Colored}. + +new() -> + #node_sets{spilled = [], colored = []}. + +add_spilled(Node, Node_sets) -> + set_spilled([Node | spilled(Node_sets)], Node_sets). + +add_colored(Node, Node_sets) -> + set_colored([Node | colored(Node_sets)], Node_sets). diff --git a/lib/hipe/regalloc/hipe_optimistic_regalloc.erl b/lib/hipe/regalloc/hipe_optimistic_regalloc.erl new file mode 100644 index 0000000000..183ec1994c --- /dev/null +++ b/lib/hipe/regalloc/hipe_optimistic_regalloc.erl @@ -0,0 +1,2043 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2005-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%%----------------------------------------------------------------------- +%% File : hipe_optimistic_regalloc.erl +%% Authors : NilsOla Linnermark +%% Petter Holmberg +%% Purpose : Play paintball with registers on a target machine. We win +%% if they are all colored. This is an optimistic coalescing +%% register allocator. +%% Created : Spring 2005 +%%----------------------------------------------------------------------- + +-module(hipe_optimistic_regalloc). +-export([regalloc/5]). + +-ifndef(DEBUG). +%%-define(DEBUG,true). +-else. +-ifndef(COMPARE_ITERATED_OPTIMISTIC). +%% If this macro is turned on you can easily compare +%% each intermediate step in the iterated coalescing +%% register allocator and the optimitsitc coalescing +%% register allocator. This is useful for debugging - +%% many small erlang functions should render the same +%% register allocaton for both allocators. +-define(COMPARE_ITERATED_OPTIMISTIC, true). +-endif. +-endif. +-include("../main/hipe.hrl"). +-ifdef(DEBUG_PRINTOUTS). +-define(print_adjacent(IG), hipe_ig:print_adjacent(IG)). +-define(print_degrees(IG), hipe_ig:print_degrees(IG)). +-define(print_spill_costs(IG), hipe_ig:print_spill_costs(IG)). +-define(mov_print_memberships(MV), hipe_moves:print_memberships(MV)). +-define(reg_print_memberships(WL), hipe_reg_worklists:print_memberships(WL)). +-define(print_alias(A), printAlias(A)). +-define(print_colors(T,C), printColors(T,C)). +-else. +-define(print_adjacent(IG), no_print). +-define(print_degrees(IG), no_print). +-define(print_spill_costs(IG), no_print). +-define(mov_print_memberships(MV), no_print). +-define(reg_print_memberships(WL), no_print). +-define(print_alias(A), no_print). +-define(print_colors(T,C), no_print). +-endif. + + +%%----------------------------------------------------------------------- +%% Function: regalloc +%% +%% Description: Creates a K coloring for a function. +%% Parameters: +%% CFG -- A control flow graph +%% SpillIndex -- Last index of spill variable +%% SpillLimit -- Temporaris with numbers higher than this have +%% infinit spill cost. +%% Consider changing this to a set. +%% Target -- The module containing the target-specific functions. +%% +%% Returns: +%% Coloring -- A coloring for specified CFG +%% SpillIndex0 -- A new spill index +%%----------------------------------------------------------------------- +-ifdef(COMPARE_ITERATED_OPTIMISTIC). +regalloc(CFG, SpillIndex, SpillLimit, Target, _Options) -> + ?debug_msg("optimistic ~w\n",[Target]), + ?debug_msg("CFG: ~p\n",[CFG]), + %% Build interference graph + ?debug_msg("Build IG\n",[]), + IG_O = hipe_ig:build(CFG, Target), + IG = hipe_ig:build(CFG, Target), + ?debug_msg("adjlist: ~p\n",[hipe_ig:adj_list(IG)]), + ?debug_msg("IG:\n",[]), + ?print_adjacent(IG), + ?print_degrees(IG), + ?print_spill_costs(IG), + + SavedSpillCosts = hipe_ig:spill_costs(IG), + SavedAdjList = hipe_ig:adj_list(IG), + + ?debug_msg("Init\n",[]), + No_temporaries = Target:number_of_temporaries(CFG), + ?debug_msg("Coalescing RA: num_temps = ~p~n", [No_temporaries]), + Allocatable = Target:allocatable(), + K = length(Allocatable), + All_colors = colset_from_list(Allocatable), + ?debug_msg("K: ~w~nAll_colors: ~p\n",[K, All_colors]), + + %% Add registers with their own coloring + ?debug_msg("Moves\n",[]), + Move_sets_O = hipe_moves:new(IG_O), + Move_sets = hipe_moves:new(IG), + ?debug_msg("Move_sets:\n ~p\n",[Move_sets]), + ?mov_print_memberships(Move_sets), + + ?debug_msg("Build Worklist\n",[]), + Worklists_O = hipe_reg_worklists:new(IG_O, Target, CFG, Move_sets_O, K, No_temporaries), + ?debug_msg("Worklists:\n ~p\n", [Worklists_O]), + ?reg_print_memberships(Worklists_O), + + Worklists = hipe_reg_worklists:new(IG, Target, CFG, K, No_temporaries), + ?debug_msg("New Worklists:\n ~p\n", [Worklists]), + ?reg_print_memberships(Worklists), + + Alias_O = initAlias(No_temporaries), + Alias = initAlias(No_temporaries), + ?print_alias(Alias), + + ?debug_msg("Do coloring\n~p~n",[Worklists_O]), + {IG0_O, Worklists0_O, Moves0_O, Alias0_O} = + do_coloring(IG_O, Worklists_O, Move_sets_O, Alias_O, + K, SpillLimit, Target), + ?debug_msg("IG_O after color:\n ~p\n",[IG0_O]), + ?print_adjacent(IG0_O), + ?print_degrees(IG0_O), + ?print_spill_costs(IG0_O), + ?debug_msg("Move_sets after color:\n ~p\n",[Moves0_O]), + ?mov_print_memberships(Moves0_O), + ?debug_msg("Worklists after color:\n ~p\n", [Worklists0_O]), + ?reg_print_memberships(Worklists0_O), + + {IG0, Moves0, Alias0, Worklists0} = + do_coalescing(IG, Worklists, Move_sets, Alias, K, Target), + ?debug_msg("IG after coalescing:\n",[]), + ?print_adjacent(IG0), + ?print_degrees(IG0), + ?print_spill_costs(IG0), + ?debug_msg("Move_sets after coalescing:\n ~p\n",[Moves0]), + ?mov_print_memberships(Moves0), + ?debug_msg("New Worklists after coalescing:\n ~p\n", + [Worklists0]), + ?reg_print_memberships(Worklists0), + + {IG1, Worklists1, Moves1, Alias1} = + do_simplify_or_spill(IG0, Worklists0, Moves0, Alias0, + K, SpillLimit, Target), + ?debug_msg("IG after simplify_or_spill:\n",[]), + ?print_adjacent(IG1), + ?print_degrees(IG1), + ?print_spill_costs(IG1), + ?debug_msg("Saved spill costs ~p~n", [SavedSpillCosts]), + ?debug_msg("Move_sets after simplify_or_spill:\n ~p\n",[Moves1]), + ?mov_print_memberships(Moves1), + ?debug_msg("New Worklists after simplify_or_spill:\n ~p\n", + [Worklists1]), + ?reg_print_memberships(Worklists1), + ?print_alias(Alias1), + + %% only for testing undoCoalescing and member_coalesced_to + %test_undoCoalescing(No_temporaries, Alias1, Worklists1), + + %% only for testing fixAdj + %?debug_msg("adj_lists_before_fixAdj ~n~p~n", [hipe_ig:adj_list(IG1)]), + %IG2 = test_fixAdj(No_temporaries, SavedAdjList, IG1, Target), + %?debug_msg("adj_lists__after_fixAdj ~n~p~n", [hipe_ig:adj_list(IG2)]), + + ?debug_msg("Init node sets\n",[]), + Node_sets = hipe_node_sets:new(), + %% ?debug_msg("NodeSet: ~w\n NonAlloc ~w\n",[Node_sets,Target:non_alloc(CFG)]), + ?debug_msg("Default coloring\n",[]), + {Color0,Node_sets1} = + defaultColoring(Target:all_precoloured(), + initColor(No_temporaries), Node_sets, Target), + ?debug_msg("Color0\n",[]), + ?print_colors(No_temporaries, Color0), + + ?debug_msg("----------------------Assign colors _N\n",[]), + + Stack = hipe_reg_worklists:stack(Worklists1), + ?debug_msg("The stack _N ~p~n", [Stack]), + %SortedStack = sort_stack(Stack), + %?debug_msg("The stack _N ~p~n", [SortedStack]), + + %?debug_msg("Nodes _N ~w~n", [Node_sets1]), + + {Color1,Node_sets2,Alias2} = + assignColors(Worklists1, Stack, Node_sets1, Color0, + No_temporaries, SavedAdjList, SavedSpillCosts, IG1, Alias1, All_colors, Target), + ?print_colors(No_temporaries, Color1), + ?debug_msg("Nodes:~w\nNodes2:~w\nNo_temporaries:~w\n",[Node_sets,Node_sets2,No_temporaries]), + + ?debug_msg("Build mapping _N ~w\n",[Node_sets2]), + Coloring = build_namelist(Node_sets2,SpillIndex,Alias2,Color1), + ?debug_msg("Coloring ~p\n",[Coloring]), + SortedColoring = { sort_stack(element(1, Coloring)), element(2, Coloring)}, + ?debug_msg("SortedColoring ~p\n",[SortedColoring]), + %%Coloring. + ?debug_msg("----------------------Assign colors _O\n",[]), + {Color1_O,Node_sets2_O} = + assignColors_O(hipe_reg_worklists:stack(Worklists0_O), Node_sets1, Color0, + Alias0_O, All_colors, Target), + ?print_colors(No_temporaries, Color1_O), + ?debug_msg("Nodes:~w\nNodes2:~w\nNo_temporaries:~w\n",[Node_sets,Node_sets2_O,No_temporaries]), + + ?debug_msg("Build mapping ~w\n",[Node_sets2_O]), + Coloring_O = build_namelist_O(Node_sets2_O,SpillIndex,Alias0_O,Color1_O), + ?debug_msg("Coloring_O ~p\n",[Coloring_O]), + SortedColoring_O = {sort_stack(element(1, Coloring_O)), element(2, Coloring_O)}, + ?debug_msg("SortedColoring_O ~p\n",[SortedColoring_O]), + sanity_compare(SortedColoring_O, SortedColoring), + Coloring. +-else. +regalloc(CFG, SpillIndex, SpillLimit, Target, _Options) -> + ?debug_msg("optimistic ~w\n",[Target]), + ?debug_msg("CFG: ~p\n",[CFG]), + %% Build interference graph + ?debug_msg("Build IG\n",[]), + IG = hipe_ig:build(CFG, Target), + ?debug_msg("adjlist: ~p\n",[hipe_ig:adj_list(IG)]), + ?debug_msg("IG:\n",[]), + ?print_adjacent(IG), + ?print_degrees(IG), + ?print_spill_costs(IG), + + SavedSpillCosts = hipe_ig:spill_costs(IG), + SavedAdjList = hipe_ig:adj_list(IG), + + ?debug_msg("Init\n",[]), + No_temporaries = Target:number_of_temporaries(CFG), + ?debug_msg("Coalescing RA: num_temps = ~p~n", [No_temporaries]), + Allocatable = Target:allocatable(), + K = length(Allocatable), + All_colors = colset_from_list(Allocatable), + ?debug_msg("K: ~w~nAll_colors: ~p\n",[K, All_colors]), + + %% Add registers with their own coloring + ?debug_msg("Moves\n",[]), + Move_sets = hipe_moves:new(IG), + ?debug_msg("Move_sets:\n ~p\n",[Move_sets]), + ?mov_print_memberships(Move_sets), + + ?debug_msg("Build Worklist\n",[]), + + Worklists = hipe_reg_worklists:new(IG, Target, CFG, K, No_temporaries), + ?debug_msg("New Worklists:\n ~p\n", [Worklists]), + ?reg_print_memberships(Worklists), + + Alias = initAlias(No_temporaries), + ?print_alias(Alias), + + {IG0, Moves0, Alias0, Worklists0} = + do_coalescing(IG, Worklists, Move_sets, Alias, K, Target), + ?debug_msg("IG after coalescing:\n",[]), + ?print_adjacent(IG0), + ?print_degrees(IG0), + ?print_spill_costs(IG0), + ?debug_msg("Move_sets after coalescing:\n ~p\n",[Moves0]), + ?mov_print_memberships(Moves0), + ?debug_msg("New Worklists after coalescing:\n ~p\n", + [Worklists0]), + ?reg_print_memberships(Worklists0), + + {IG1, Worklists1, _Moves1, Alias1} = + do_simplify_or_spill(IG0, Worklists0, Moves0, Alias0, + K, SpillLimit, Target), + ?debug_msg("IG after simplify_or_spill:\n",[]), + ?print_adjacent(IG1), + ?print_degrees(IG1), + ?print_spill_costs(IG1), + ?debug_msg("Saved spill costs ~p~n", [SavedSpillCosts]), + ?debug_msg("New Worklists after simplify_or_spill:\n ~p\n", + [Worklists1]), + ?reg_print_memberships(Worklists1), + ?print_alias(Alias1), + + %% only for testing undoCoalescing and member_coalesced_to + %test_undoCoalescing(No_temporaries, Alias1, Worklists1), + + %% only for testing fixAdj + %?debug_msg("adj_lists_before_fixAdj ~n~p~n", [hipe_ig:adj_list(IG1)]), + %IG2 = test_fixAdj(No_temporaries, SavedAdjList, IG1, Target), + %?debug_msg("adj_lists__after_fixAdj ~n~p~n", [hipe_ig:adj_list(IG2)]), + + ?debug_msg("Init node sets\n",[]), + Node_sets = hipe_node_sets:new(), + %% ?debug_msg("NodeSet: ~w\n NonAlloc ~w\n",[Node_sets,Target:non_alloc(CFG)]), + ?debug_msg("Default coloring\n",[]), + {Color0,Node_sets1} = + defaultColoring(Target:all_precoloured(), + initColor(No_temporaries), Node_sets, Target), + ?debug_msg("Color0\n",[]), + ?print_colors(No_temporaries, Color0), + + ?debug_msg("----------------------Assign colors _N\n",[]), + + Stack = hipe_reg_worklists:stack(Worklists1), + ?debug_msg("The stack _N ~p~n", [Stack]), + %SortedStack = sort_stack(Stack), + %?debug_msg("The stack _N ~p~n", [SortedStack]), + + %?debug_msg("Nodes _N ~w~n", [Node_sets1]), + + {Color1,Node_sets2,Alias2} = + assignColors(Worklists1, Stack, Node_sets1, Color0, + No_temporaries, SavedAdjList, SavedSpillCosts, IG1, Alias1, All_colors, Target), + ?print_colors(No_temporaries, Color1), + ?debug_msg("Nodes:~w\nNodes2:~w\nNo_temporaries:~w\n",[Node_sets,Node_sets2,No_temporaries]), + + ?debug_msg("Build mapping _N ~w\n",[Node_sets2]), + Coloring = build_namelist(Node_sets2,SpillIndex,Alias2,Color1), + ?debug_msg("Coloring ~p\n",[Coloring]), + Coloring. +-endif. + +%%---------------------------------------------------------------------- +%% Function: do_coloring +%% +%% Description: Create a coloring. That is, play paintball. +%% Parameters: +%% IG -- An interference graph +%% Worklists -- Worklists, that is simplify, spill and freeze +%% Moves -- Moves sets, that is coalesced, constrained +%% and so on. +%% Alias -- Tells if two temporaries can have their value +%% in the same register. +%% K -- Want to create a K coloring. +%% SpillLimit -- Try not to spill nodes that are above the spill limit. +%% +%% Returns: +%% IG -- Updated interference graph +%% Worklists -- Updated Worklists structure +%% Moves -- Updated Moves structure +%% Alias -- Updates Alias structure +%% +%%---------------------------------------------------------------------- + +-ifdef(COMPARE_ITERATED_OPTIMISTIC). +do_coloring(IG, Worklists, Moves, Alias, K, SpillLimit, Target) -> + Simplify = not(hipe_reg_worklists:is_empty_simplify(Worklists)), + Coalesce = not(hipe_moves:is_empty_worklist(Moves)), + Freeze = not(hipe_reg_worklists:is_empty_freeze(Worklists)), + Spill = not(hipe_reg_worklists:is_empty_spill(Worklists)), + if Simplify =:= true -> + {IG0, Worklists0, Moves0} = + simplify_O(hipe_reg_worklists:simplify(Worklists), + IG, + Worklists, + Moves, + K), + do_coloring(IG0, Worklists0, Moves0, Alias, K, SpillLimit, Target); + Coalesce =:= true -> + {Moves0, IG0, Worklists0, Alias0} = + coalesce_O(Moves, IG, Worklists, Alias, K, Target), + do_coloring(IG0, Worklists0, Moves0, Alias0, K, SpillLimit, Target); + Freeze =:= true -> + {Worklists0, Moves0} = + freeze(K, Worklists, Moves, IG, Alias), + do_coloring(IG, Worklists0, Moves0, Alias, K, SpillLimit, Target); + Spill =:= true -> + {Worklists0, Moves0} = + selectSpill_O(Worklists, Moves, IG, K, Alias, SpillLimit), + do_coloring(IG, Worklists0, Moves0, Alias, K, SpillLimit, Target); + true -> % Catchall case + {IG, Worklists, Moves, Alias} + end. +-endif. + +%%---------------------------------------------------------------------- +%% Function: do_coalescing +%% +%% Description: Try to coalesce everything (find out later if it was +%% possible). +%% Parameters: +%% IG -- An interference graph +%% Moves -- Moves sets, that is coalesced, constrained +%% and so on. +%% Alias -- Tells if two temporaries can have their value +%% in the same register. +%% +%% Returns: +%% IG -- Updated interference graph +%% Moves -- Updated Moves structure +%% Alias -- Updates Alias structure +%% +%%---------------------------------------------------------------------- + +do_coalescing(IG, Worklists, Moves, Alias, K, Target) -> + case hipe_moves:is_empty_worklist(Moves) of + true -> + {IG, Moves, Alias, Worklists}; + _ -> + {Moves0, IG0, Alias0, Worklists0} = + coalesce(Moves, IG, Worklists, Alias, K, Target), + do_coalescing(IG0, Worklists0, Moves0, Alias0, K, Target) + end. + +%%---------------------------------------------------------------------- +%% Function: do_simplify_or_spill +%% +%% Parameters: +%% IG -- An interference graph +%% Worklists -- Worklists, that is simplify, spill and freeze +%% Moves -- Moves sets, that is coalesced, constrained +%% and so on. +%% Alias -- Tells if two temporaries can have their value +%% in the same register. +%% K -- Want to create a K coloring. +%% SpillLimit -- Try not to spill nodes that are above the spill limit. +%% +%% Returns: +%% IG -- Updated interference graph +%% Worklists -- Updated Worklists structure +%% Moves -- Updated Moves structure +%% Alias -- Updates Alias structure +%% +%%---------------------------------------------------------------------- + +do_simplify_or_spill(IG, Worklists, Moves, Alias, K, SpillLimit, Target) -> + Simplify = not(hipe_reg_worklists:is_empty_simplify(Worklists)), + Spill = not(hipe_reg_worklists:is_empty_spill(Worklists)), + if Simplify =:= true -> + {IG0, Worklists0, Moves0} = + simplify(hipe_reg_worklists:simplify(Worklists), + IG, + Worklists, + Moves, + K), + do_simplify_or_spill(IG0, Worklists0, Moves0, Alias, + K, SpillLimit, Target); + Spill =:= true -> + Worklists0 = + selectSpill(Worklists, IG, SpillLimit), + do_simplify_or_spill(IG, Worklists0, Moves, Alias, + K, SpillLimit, Target); + true -> % Catchall case + {IG, Worklists, Moves, Alias} + end. + +%%---------------------------------------------------------------------- +%% Function: adjacent +%% +%% Description: Adjacent nodes that's not coalesced, on the stack or +%% precoloured. +%% Parameters: +%% Node -- Node that you want to adjacents of +%% IG -- The interference graph +%% +%% Returns: +%% A set with nodes/temporaries that are not coalesced, on the +%% stack or precoloured. +%%---------------------------------------------------------------------- + +adjacent(Node, IG, Worklists) -> + Adjacent_edges = hipe_ig:node_adj_list(Node, IG), + hipe_reg_worklists:non_stacked_or_coalesced_nodes(Adjacent_edges, Worklists). + +%%---------------------------------------------------------------------- +%% Function: simplify +%% +%% Description: Simplify graph by removing nodes of low degree. This +%% function simplify all nodes it can at once. +%% Parameters: +%% [Node|Nodes] -- The simplify worklist +%% IG -- The interference graph +%% Worklists -- The worklists data-structure +%% Moves -- The moves data-structure +%% K -- Produce a K coloring +%% +%% Returns: +%% IG -- An updated interference graph +%% Worklists -- An updated worklists data-structure +%% Moves -- An updated moves data-structure +%%---------------------------------------------------------------------- + +-ifdef(COMPARE_ITERATED_OPTIMISTIC). +simplify_O([], IG, Worklists, Moves, _K) -> + {IG, Worklists, Moves}; +simplify_O([Node|Nodes], IG, Worklists, Moves, K) -> + Worklists0 = hipe_reg_worklists:remove_simplify(Node, Worklists), + ?debug_msg("putting ~w on stack~n",[Node]), + Adjacent = adjacent(Node, IG, Worklists0), + Worklists01 = hipe_reg_worklists:push_stack(Node, Adjacent, Worklists0), + {New_ig, Worklists1, New_moves} = + decrement_degree_O(Adjacent, IG, Worklists01, Moves, K), + simplify_O(Nodes, New_ig, Worklists1, New_moves, K). +-endif. + +%%---------------------------------------------------------------------- +%% Function: simplify +%% +%% Description: Simplify graph by removing nodes of low degree. This +%% function simplify all nodes it can at once. +%% Parameters: +%% [Node|Nodes] -- The simplify worklist +%% IG -- The interference graph +%% Worklists -- The worklists data-structure +%% Moves -- The moves data-structure +%% K -- Produce a K coloring +%% +%% Returns: +%% IG -- An updated interference graph +%% Worklists -- An updated worklists data-structure +%% Moves -- An updated moves data-structure +%%---------------------------------------------------------------------- + +simplify([], IG, Worklists, Moves, _K) -> + {IG, Worklists, Moves}; +simplify([Node|Nodes], IG, Worklists, Moves, K) -> + Worklists0 = hipe_reg_worklists:remove_simplify(Node, Worklists), + ?debug_msg("putting ~w on stack~n",[Node]), + Adjacent = adjacent(Node, IG, Worklists0), + Worklists01 = hipe_reg_worklists:push_stack(Node, Adjacent, Worklists0), + {New_ig, Worklists1} = decrement_degree(Adjacent, IG, Worklists01, K), + simplify(Nodes, New_ig, Worklists1, Moves, K). + +%%---------------------------------------------------------------------- +%% Function: decrement_degree +%% +%% Description: Decrement the degree on a number of nodes/temporaries. +%% Parameters: +%% [Node|Nodes] -- Decrement degree on these nodes +%% IG -- The interference graph +%% Worklists -- The Worklists data structure +%% Moves -- The Moves data structure. +%% K -- We want to create a coloring with K colors +%% +%% Returns: +%% IG -- An updated interference graph (the degrees) +%% Worklists -- Updated Worklists. Changed if one degree goes +%% down to K. +%% Moves -- Updated Moves. Changed if a move related temporary +%% gets degree K. +%%---------------------------------------------------------------------- + +-ifdef(COMPARE_ITERATED_OPTIMISTIC). +decrement_degree_O([], IG, Worklists, Moves, _K) -> + {IG, Worklists, Moves}; +decrement_degree_O([Node|Nodes], IG, Worklists, Moves, K) -> + PrevDegree = hipe_ig:get_node_degree(Node, IG), + IG0 = hipe_ig:dec_node_degree(Node, IG), + case PrevDegree =:= K of + true -> + AdjList = hipe_ig:node_adj_list(Node, IG0), + %% OK since Node (a) is still in IG, and (b) cannot be adjacent to itself. + Moves00 = enable_moves_active_to_worklist(hipe_moves:node_movelist(Node, Moves), + Moves), + Moves0 = enable_moves(AdjList, Worklists, Moves00), + Worklists0 = hipe_reg_worklists:remove_spill(Node, Worklists), + case hipe_moves:move_related(Node, Moves0) of + true -> + Worklists1 = hipe_reg_worklists:add_freeze(Node, Worklists0), + decrement_degree_O(Nodes, IG0, Worklists1, Moves0, K); + _ -> + Worklists1 = hipe_reg_worklists:add_simplify(Node, Worklists0), + decrement_degree_O(Nodes, IG0, Worklists1, Moves0, K) + end; + _ -> + decrement_degree_O(Nodes, IG0, Worklists, Moves, K) + end. +-endif. + +%%---------------------------------------------------------------------- +%% Function: decrement_degree +%% +%% Description: Decrement the degree on a number of nodes/temporaries. +%% Parameters: +%% [Node|Nodes] -- Decrement degree on these nodes +%% IG -- The interference graph +%% Worklists -- The Worklists data structure +%% Moves -- The Moves data structure. +%% K -- We want to create a coloring with K colors +%% +%% Returns: +%% IG -- An updated interference graph (the degrees) +%% Worklists -- Updated Worklists. Changed if one degree goes +%% down to K. +%% Moves -- Updated Moves. Changed if a move related temporary +%% gets degree K. +%%---------------------------------------------------------------------- + +decrement_degree([], IG, Worklists, _K) -> + {IG, Worklists}; +decrement_degree([Node|Nodes], IG, Worklists, K) -> + PrevDegree = hipe_ig:get_node_degree(Node, IG), + IG0 = hipe_ig:dec_node_degree(Node, IG), + case PrevDegree =:= K of + true -> + Worklists0 = hipe_reg_worklists:remove_spill(Node, Worklists), + Worklists1 = hipe_reg_worklists:add_simplify(Node, Worklists0), + decrement_degree(Nodes, IG0, Worklists1, K); + _ -> + decrement_degree(Nodes, IG0, Worklists, K) + end. + +%%---------------------------------------------------------------------- +%% Function: enable_moves +%% +%% Description: Make (move-related) nodes that are not yet considered for +%% coalescing, ready for possible coalescing. +%% +%% Parameters: +%% [Node|Nodes] -- A list of move nodes +%% Moves -- The moves data-structure +%% +%% Returns: +%% An updated moves data-structure +%%---------------------------------------------------------------------- + +-ifdef(COMPARE_ITERATED_OPTIMISTIC). +enable_moves([], _Worklists, Moves) -> Moves; +enable_moves([Node|Nodes], Worklists, Moves) -> + case hipe_reg_worklists:member_stack_or_coalesced(Node, Worklists) of + true -> enable_moves(Nodes, Worklists, Moves); + _ -> + %% moveList[n] suffices since we're checking for activeMoves membership + Node_moves = hipe_moves:node_movelist(Node, Moves), + New_moves = enable_moves_active_to_worklist(Node_moves, Moves), + enable_moves(Nodes, Worklists, New_moves) + end. +-endif. + +%%---------------------------------------------------------------------- +%% Function: enable_moves_active_to_worklist +%% +%% Description: Make (move-related) nodes that are not yeat considered for +%% coalescing, ready for possible coalescing. +%% +%% Parameters: +%% [Node|Nodes] -- A list of move nodes +%% Moves -- The moves data-structure +%% +%% Returns: +%% An updated moves data-structure +%%---------------------------------------------------------------------- + +-ifdef(COMPARE_ITERATED_OPTIMISTIC). +enable_moves_active_to_worklist([], Moves) -> Moves; +enable_moves_active_to_worklist([Node|Nodes], Moves) -> + case hipe_moves:member_active(Node, Moves) of + true -> + New_moves = + hipe_moves:add_worklist(Node, hipe_moves:remove_active(Node, Moves)), + enable_moves_active_to_worklist(Nodes, New_moves); + _ -> + enable_moves_active_to_worklist(Nodes, Moves) + end. +-endif. + +-ifdef(COMPARE_ITERATED_OPTIMISTIC). +sanity_compare(Coloring, Coloring_N) -> + case compare_sanity(Coloring, Coloring_N) of + false -> + ?debug_msg("mismatch for coloring: ~n~p~n~p", [Coloring, Coloring_N]); + _ -> true + end. +compare_sanity({[], _C}, {[], _C_N}) -> + ?debug_msg("Sanity - OK!~n", []), + true; +compare_sanity({_Coloring_list, _C}, {[], _C_N}) -> + ?debug_msg("Sanity - unequal numbers~n", []), + false; +compare_sanity({[], _C}, {_Coloring_list_N, _C_N}) -> + ?debug_msg("Sanity - unequal numbers~n", []), + false; +compare_sanity({[Color|Coloring_list], C}, {[Color_N|Coloring_list_N], C_N}) -> + case element(1, Color) =:= element(1, Color_N) of + false -> + ?debug_msg("Sanity - unequal measure~n", []), + false; + _ -> + case element(2, Color) =:= element(2, Color_N) of + false -> + ?debug_msg("Sanity - unequal color~n", []), + false; + _ -> + case C =:= C_N of + false -> + ?debug_msg("Sanity - unequal last element~n", []), + false; + _ -> + compare_sanity({Coloring_list, C}, {Coloring_list_N, C_N}) + end + end + end. +-endif. + + +%% Build the namelists, these functions are fast hacks, they use knowledge +%% about data representation that they shouldn't know, bad abstraction. + +-ifdef(COMPARE_ITERATED_OPTIMISTIC). +build_namelist_O(NodeSets,Index,Alias,Color) -> + ?debug_msg("NodeSets ~w~n", [NodeSets]), + ?debug_msg("Building mapping\n",[]), + ?debug_msg("Vector to list\n",[]), + AliasList = + build_alias_list(aliasToList(Alias), + 0, %% The first temporary has index 0 + []), %% Accumulator + ?debug_msg("Alias list:~p\n",[AliasList]), + ?debug_msg("Coalesced\n",[]), + NL1 = build_coalescedlist(AliasList,Color,Alias,[]), + ?debug_msg("Coalesced list:~p\n",[NL1]), + ?debug_msg("Regs\n",[]), + NL2 = build_reglist_O(hipe_node_sets:colored(NodeSets),Color,NL1), + ?debug_msg("Regs list:~p\n",[NL2]), + ?debug_msg("Spills\n",[]), + build_spillist(hipe_node_sets:spilled(NodeSets),Index,NL2). +-endif. + +build_namelist(NodeSets,Index,Alias,Color) -> + ?debug_msg("NodeSets _N ~w~n", [NodeSets]), + ?debug_msg("Building mapping _N\n",[]), + ?debug_msg("Vector to list _N\n",[]), + AliasList = + build_alias_list(aliasToList(Alias), + 0, %% The first temporary has index 0 + []), %% Accumulator + ?debug_msg("Alias list _N:~p\n",[AliasList]), + ?debug_msg("Coalesced\n",[]), + NL1 = build_coalescedlist(AliasList,Color,Alias,[]), + ?debug_msg("Coalesced list:~p\n",[NL1]), + ?debug_msg("Regs _N\n",[]), + ColoredNodes = hipe_node_sets:colored(NodeSets), + ?debug_msg("ColoredNodes ~p~n", [ColoredNodes]), + NL2 = build_reglist_N(ColoredNodes,Color,NL1,NL1), + ?debug_msg("Regs list _N:~p\n",[NL2]), + ?debug_msg("Spills _N\n",[]), + build_spillist(hipe_node_sets:spilled(NodeSets),Index,NL2). + +build_spillist([],Index,List) -> + {List,Index}; +build_spillist([Node|Nodes],Index,List) -> + ?debug_msg("[~p]: Spill ~p to ~p\n", [?MODULE,Node,Index]), + build_spillist(Nodes,Index+1,[{Node,{spill,Index}}|List]). + +build_coalescedlist([],_Color,_Alias,List) -> + List; +build_coalescedlist([Node|Ns],Color,Alias,List) when is_integer(Node) -> + ?debug_msg("Alias of ~p is ~p~n",[Node,getAlias(Node,Alias)]), + AC = getColor(getAlias(Node,Alias),Color), + build_coalescedlist(Ns,Color,Alias,[{Node,{reg,AC}}|List]). + +-ifdef(COMPARE_ITERATED_OPTIMISTIC). +build_reglist_O([],_Color,List) -> + List; +build_reglist_O([Node|Ns],Color,List) -> + build_reglist_O(Ns,Color,[{Node,{reg,getColor(Node,Color)}}|List]). +-endif. + +build_reglist_N([],_Color,List,_OrgList) -> + List; +build_reglist_N([Node|Ns],Color,List,OrgList) -> + %% XXX this could be done more efficiently if both lists were sorted + case is_already_in_list(Node, OrgList) of + true -> build_reglist_N(Ns, Color, List, OrgList); + _ -> build_reglist_N(Ns,Color,[{Node,{reg,getColor(Node,Color)}}|List], OrgList) + end. + +is_already_in_list(_Node, []) -> + false; +is_already_in_list(Node, [L|List]) -> + ?debug_msg("---test--- Node ~w element ~w~n", [Node, element(1, L)]), + case Node =:= element(1, L) of + true -> true; + _ -> is_already_in_list(Node, List) + end. + +build_alias_list([], _I, List) -> + List; +build_alias_list([Alias|Aliases], I, List) when is_integer(Alias) -> + build_alias_list(Aliases, I+1, [I|List]); +build_alias_list([_Alias|Aliases], I, List) -> + build_alias_list(Aliases, I+1, List). + +-ifdef(COMPARE_ITERATED_OPTIMISTIC). +sort_stack([]) -> []; +sort_stack([Pivot|Rest]) -> + {Smaller, Bigger} = sort_stack_split(Pivot, Rest), + lists:append(sort_stack(Smaller), [Pivot|sort_stack(Bigger)]). + +sort_stack_split(Pivot, L) -> + sort_stack_split(Pivot, L, [], []). + +sort_stack_split(_Pivot, [], Smaller, Bigger) -> + {Smaller, Bigger}; +sort_stack_split(Pivot, [H|T], Smaller, Bigger) when element(1, H) > element(1, Pivot) -> + sort_stack_split(Pivot, T, [H|Smaller], Bigger); +sort_stack_split(Pivot, [H|T], Smaller, Bigger) -> + sort_stack_split(Pivot, T, Smaller, [H|Bigger]). +-endif. + +%sort([]) -> []; +%sort([Pivot|Rest]) -> +% {Smaller, Bigger} = sort_split(Pivot, Rest), +% lists:append(sort(Smaller), [Pivot|sort(Bigger)]). +% +%sort_split(Pivot, L) -> +% sort_split(Pivot, L, [], []). +% +%sort_split(_Pivot, [], Smaller, Bigger) -> {Smaller, Bigger}; +%sort_split(Pivot, [H|T], Smaller, Bigger) when H > Pivot -> +% sort_split(Pivot, T, [H|Smaller], Bigger); +%sort_split(Pivot, [H|T], Smaller, Bigger) -> +% sort_split(Pivot, T, Smaller, [H|Bigger]). + +%%---------------------------------------------------------------------- +%% Function: assignColors +%% +%% Description: Tries to assign colors to nodes in a stack. +%% Parameters: +%% Worklists -- The Worklists data structure. +%% Stack -- The SelectStack built by the Select function, +%% this stack contains tuples in the form {Node,Edges} +%% where Node is the Node number and Edges is an ordset +%% containing the numbers of all the adjacent nodes. +%% NodeSets -- This is a record containing all the different node +%% sets that are used in the register allocator. +%% Color -- A mapping from nodes to their respective color. +%% No_temporaries -- Number of temporaries. +%% SavedAdjList -- Saved adjacency list (from before coalescing). +%% SavedSpillCosts -- Saved spill costs (from before coalescing). +%% IG -- The interference graph. +%% Alias -- This is a mapping from nodes to nodes. If a node has +%% been coalesced, this mapping shows the alias for that +%% node. +%% AllColors -- This is an ordset containing all the available colors +%% Target -- The module containing the target-specific functions. +%% +%% Returns: +%% Color -- A mapping from nodes to their respective color. +%% NodeSets -- The updated node sets. +%% Alias -- The updated aliases. +%%---------------------------------------------------------------------- + +assignColors(Worklists, Stack, NodeSets, Color, No_Temporaries, + SavedAdjList, SavedSpillCosts, IG, Alias, AllColors, Target) -> + case Stack of + [] -> + {Color,NodeSets,Alias}; + [{Node,Edges}|Stack1] -> + ?debug_msg("Coloring Node: ~p~n",[Node]), + ?IF_DEBUG(lists:foreach(fun (_E) -> + ?msg(" Edge ~w-><~w>->~w~n", + begin A = getAlias(_E,Alias), + [_E,A,getColor(A,Color)] + end) + end, Edges), + []), + %% When debugging, check that Node isn't precoloured. + OkColors = findOkColors(Edges, AllColors, Color, Alias), + case colset_is_empty(OkColors) of + true -> % Spill case + case hipe_reg_worklists:member_coalesced_to(Node, Worklists) of + true -> + ?debug_msg("Alias case. Undoing coalescing.~n", []), + {Alias1, IG1, NodeSets1, Color1, Stack2} = tryPrimitiveNodes(Node, Stack1, NodeSets, AllColors, Color, No_Temporaries, SavedAdjList, SavedSpillCosts, IG, Alias, Target), + %{Alias1, IG1, NodeSets1, Color1, Stack2} = {Alias, IG, NodeSets, Color, Stack1}, + assignColors(Worklists, Stack2, NodeSets1, Color1, No_Temporaries, SavedAdjList, SavedSpillCosts, IG1, Alias1, AllColors, Target); + false -> + ?debug_msg("Spill case. Spilling node.~n", []), + NodeSets1 = hipe_node_sets:add_spilled(Node, NodeSets), + assignColors(Worklists, Stack1, NodeSets1, Color, No_Temporaries, SavedAdjList, SavedSpillCosts, IG, Alias, AllColors, Target) + end; + false -> % Color case + Col = colset_smallest(OkColors), + NodeSets1 = hipe_node_sets:add_colored(Node, NodeSets), + Color1 = setColor(Node, Target:physical_name(Col), Color), + ?debug_msg("Color case. Assigning color ~p to node.~n", [Col]), + assignColors(Worklists, Stack1, NodeSets1, Color1, No_Temporaries, SavedAdjList, SavedSpillCosts, IG, Alias, AllColors, Target) + end + end. + +%%---------------------------------------------------------------------- +%% Function: tryPrimitiveNodes +%% +%% Description: Undoes coalescing of a non-colorable coalesced node and tries +%% to assign colors to its primitives, such that the cheapest +%% potential spill cost is achieved. +%% Parameters: +%% Node -- The representative node to undo coalescing for. +%% Stack -- The SelectStack built by the Select function, +%% this stack contains tuples in the form {Node,Edges} +%% where Node is the Node number and Edges is an ordset +%% containing the numbers of all the adjacent nodes. +%% NodeSets -- This is a record containing all the different node +%% sets that are used in the register allocator. +%% AllColors -- This is an ordset containing all the available colors. +%% No_temporaries -- Number of temporaries. +%% SavedAdjList -- Saved adjacency list (from before coalescing). +%% SavedSpillCosts -- Saved spill costs (from before coalescing). +%% IG -- The interference graph. +%% Alias -- This is a mapping from nodes to nodes. If a node has +%% been coalesced, this mapping shows the alias for that +%% node. +%% Target -- The module containing the target-specific functions. +%% +%% Returns: +%% Alias -- The restored aliases after the uncoalescing. +%% IG -- An updated interference graph after the uncoalescing. +%% NodeSets -- The updated node sets. +%% Color -- A mapping from nodes to their respective color. +%% Stack -- The updated SelectStack with non-colored primitives +%% placed at the bottom. +%%---------------------------------------------------------------------- + +tryPrimitiveNodes(Node, Stack, NodeSets, AllColors, Color, No_temporaries, SavedAdjList, SavedSpillCosts, IG, Alias, Target) -> + ?debug_msg("Undoing coalescing of node ~p.~n", [Node]), + {PrimitiveNodes, Alias1, IG1} = undoCoalescing(Node, No_temporaries, Alias, SavedAdjList, IG, Target), + ?debug_msg("Spilling non-colorable primitives.~n", []), + {ColorableNodes, NodeSets1} = spillNonColorablePrimitives([], PrimitiveNodes, NodeSets, AllColors, Color, SavedAdjList, Alias1), + ?debug_msg("Generating splits of colorable nodes.~n", []), + Splits = splits(ColorableNodes, SavedSpillCosts), + {NodeSets2, Color1, Stack1} = processSplits(Splits, AllColors, IG1, Color, NodeSets1, Alias1, Target, Stack), + {Alias1, IG1, NodeSets2, Color1, Stack1}. + +%% Spill all non-colorable primitives and return the remaining set of nodes. + +spillNonColorablePrimitives(ColorableNodes, [], NodeSets, _AllColors, _Color, _SavedAdjList, _Alias) -> + {ColorableNodes, NodeSets}; +spillNonColorablePrimitives(ColorableNodes, [Primitive|Primitives], NodeSets, AllColors, Color, SavedAdjList, Alias) -> + OkColors = findOkColors(hipe_adj_list:edges(Primitive, SavedAdjList), AllColors, Color, Alias), + case colset_is_empty(OkColors) of + true -> % Spill case + ?debug_msg(" Spilling primitive node ~p.~n", [Primitive]), + NodeSets1 = hipe_node_sets:add_spilled(Primitive, NodeSets), + spillNonColorablePrimitives(ColorableNodes, Primitives, NodeSets1, AllColors, Color, SavedAdjList, Alias); + false -> % Colorable case + ?debug_msg(" Primitive node ~p is colorable.~n", [Primitive]), + spillNonColorablePrimitives([Primitive|ColorableNodes], Primitives, NodeSets, AllColors, Color, SavedAdjList, Alias) + end. + +%% Generate all splits of colorable primitives, sorted in spill cost order. + +splits([], _SavedSpillCosts) -> + [{[], [], 0}]; +splits([L|Ls], SavedSpillCosts) -> + Spl = splits(Ls, SavedSpillCosts), + SpillCost = hipe_spillcost:spill_cost(L, SavedSpillCosts), + Spl1 = [splits_1(S, L) || S <- Spl], + Spl2 = [splits_2(S, L, SpillCost) || S <- Spl], + spillCostOrderedMerge(Spl1, Spl2, []). + +splits_1({Cols, NonCols, OldSpillCost}, L) -> + {[L|Cols], NonCols, OldSpillCost}. + +splits_2({Cols, NonCols, OldSpillCost}, L, SpillCost) -> + {Cols, [L|NonCols], OldSpillCost + SpillCost}. + +%% Merge two ordered sub-splits into one. + +spillCostOrderedMerge(Spl1, [], Spl) -> + lists:reverse(Spl) ++ Spl1; +spillCostOrderedMerge([], Spl2, Spl) -> + lists:reverse(Spl) ++ Spl2; +spillCostOrderedMerge(Spl1, Spl2, Spl) -> + {_, _, SpillCost1} = hd(Spl1), + {_, _, SpillCost2} = hd(Spl2), + case SpillCost1 =< SpillCost2 of + true -> + spillCostOrderedMerge(tl(Spl1), Spl2, [hd(Spl1)|Spl]); + false -> + spillCostOrderedMerge(Spl1, tl(Spl2), [hd(Spl2)|Spl]) + end. + +%% Process splits, finding the one with the smallest spill cost that +%% can be assigned one color. + +processSplits([], _AllColors, _IG, Color, NodeSets, _Alias, _Target, Stack) -> + {NodeSets, Color, Stack}; +processSplits([{Cols, NonCols, _SpillCost}|Splits], AllColors, IG, Color, NodeSets, Alias, Target, Stack) -> + OkColors = findCommonColors(Cols, IG, Color, Alias, AllColors), + case colset_is_empty(OkColors) of + false -> % This split can be colored with one color - use it + ?debug_msg("Found a colorable split.~n", []), + Col = colset_smallest(OkColors), + {NodeSets1, Color1} = colorSplit(Cols, Col, NodeSets, Color, Target), + Stack1 = enqueueSplit(NonCols, IG, Stack), + {NodeSets1, Color1, Stack1}; + true -> % This split cannot be colored with one color - try another + ?debug_msg("Unable to color split.~n", []), + processSplits(Splits, AllColors, IG, Color, NodeSets, Alias, Target, Stack) + end. + +%% Find the set of colors that can be assigned to one split. + +findCommonColors([], _IG, _Color, _Alias, OkColors) -> + OkColors; +findCommonColors([Primitive|Primitives], IG, Color, Alias, OkColors) -> + OkColors1 = findOkColors(hipe_ig:node_adj_list(Primitive, IG), OkColors, Color, Alias), + findCommonColors(Primitives, IG, Color, Alias, OkColors1). + +%% Color nodes in a split. + +colorSplit([], _Col, NodeSets, Color, _Target) -> + {NodeSets, Color}; +colorSplit([Node|Nodes], Col, NodeSets, Color, Target) -> + ?debug_msg(" Coloring node ~p with color ~p.~n", [Node, Col]), + NodeSets1 = hipe_node_sets:add_colored(Node, NodeSets), + Color1 = setColor(Node, Target:physical_name(Col), Color), + colorSplit(Nodes, Col, NodeSets1, Color1, Target). + +%% Place non-colorable nodes in a split at the bottom of the SelectStack. + +enqueueSplit([], _IG, Stack) -> + Stack; +enqueueSplit([Node|Nodes], IG, Stack) -> + ?debug_msg(" Placing node ~p at the bottom of the stack.~n", [Node]), + Edges = hipe_ig:node_adj_list(Node, IG), + Stack1 = Stack ++ [{Node, Edges}], + enqueueSplit(Nodes, IG, Stack1). + +%%---------------------------------------------------------------------- +%% Function: assignColors +%% +%% Description: Tries to assign colors to nodes in a stack. +%% Parameters: +%% Stack -- The SelectStack built by the Select function, +%% this stack contains tuples in the form {Node,Edges} +%% where Node is the Node number and Edges is an ordset +%% containing the numbers of all the adjacent nodes. +%% NodeSets -- This is a record containing all the different node +%% sets that are used in the register allocator. +%% Alias -- This is a mapping from nodes to nodes, if a node has +%% been coalesced this mapping shows the alias for that +%% node. +%% AllColors -- This is an ordset containing all the available colors +%% +%% Target -- The module containing the target-specific functions. +%% +%% Returns: +%% Color -- A mapping from nodes to their respective color. +%% NodeSets -- The updated node sets. +%%---------------------------------------------------------------------- + +-ifdef(COMPARE_ITERATED_OPTIMISTIC). +assignColors_O(Stack,NodeSets,Color,Alias,AllColors,Target) -> + case Stack of + [] -> + {Color,NodeSets}; + [{Node,Edges}|Stack1] -> + ?debug_msg("Coloring Node: ~p~n",[Node]), + ?IF_DEBUG(lists:foreach(fun (_E) -> + ?msg(" Edge ~w-><~w>->~w~n", + begin A = getAlias(_E,Alias), + [_E,A,getColor(A,Color)] + end) + end, Edges), + []), + %% When debugging, check that Node isn't precoloured. + OkColors = findOkColors(Edges, AllColors, Color, Alias), + case colset_is_empty(OkColors) of + true -> % Spill case + NodeSets1 = hipe_node_sets:add_spilled(Node, NodeSets), + assignColors_O(Stack1, NodeSets1, Color, Alias, AllColors, Target); + false -> % Colour case + Col = colset_smallest(OkColors), + NodeSets1 = hipe_node_sets:add_colored(Node, NodeSets), + Color1 = setColor(Node, Target:physical_name(Col), Color), + assignColors_O(Stack1, NodeSets1, Color1, Alias, AllColors, Target) + end + end. +-endif. + +%%--------------------------------------------------------------------- +%% Function: defaultColoring +%% +%% Description: Make the default coloring +%% Parameters: +%% Regs -- The list of registers to be default colored +%% Color -- The color mapping that shall be changed +%% NodeSets -- The node sets that shall be updated +%% Target -- The module containing the target-specific functions. +%% +%% Returns: +%% NewColor -- The updated color mapping +%% NewNodeSets -- The updated node sets +%%--------------------------------------------------------------------- + +defaultColoring([], Color, NodeSets, _Target) -> + {Color,NodeSets}; +defaultColoring([Reg|Regs], Color, NodeSets, Target) -> + Color1 = setColor(Reg,Target:physical_name(Reg), Color), + NodeSets1 = hipe_node_sets:add_colored(Reg, NodeSets), + defaultColoring(Regs, Color1, NodeSets1, Target). + +%% Find the colors that are OK for a node with certain edges. + +findOkColors(Edges, AllColors, Color, Alias) -> + find(Edges, AllColors, Color, Alias). + +%% Find all the colors of the nodes in the list [Node|Nodes] and remove them +%% from the set OkColors, when the list is empty, return OkColors. + +find([], OkColors, _Color, _Alias) -> + OkColors; +find([Node0|Nodes], OkColors, Color, Alias) -> + Node = getAlias(Node0, Alias), + case getColor(Node, Color) of + [] -> + find(Nodes, OkColors, Color, Alias); + Col -> + OkColors1 = colset_del_element(Col, OkColors), + find(Nodes, OkColors1, Color, Alias) + end. + +%%% +%%% ColSet -- ADT for the set of available colours while +%%% assigning colours. +%%% +-ifdef(notdef). % old ordsets-based implementation +colset_from_list(Allocatable) -> + ordsets:from_list(Allocatable). + +colset_del_element(Colour, ColSet) -> + ordsets:del_element(Colour, ColSet). + +colset_is_empty(ColSet) -> + case ColSet of + [] -> true; + [_|_] -> false + end. + +colset_smallest([Colour|_]) -> + Colour. +-endif. + +-ifdef(notdef). % new gb_sets-based implementation +colset_from_list(Allocatable) -> + gb_sets:from_list(Allocatable). + +colset_del_element(Colour, ColSet) -> + %% Must use gb_sets:delete_any/2 since gb_sets:del_element/2 + %% fails if the element isn't present. Bummer. + gb_sets:delete_any(Colour, ColSet). + +colset_is_empty(ColSet) -> + gb_sets:is_empty(ColSet). + +colset_smallest(ColSet) -> + gb_sets:smallest(ColSet). +-endif. + +%%-ifdef(notdef). % new bitmask-based implementation +colset_from_list(Allocatable) -> + colset_from_list(Allocatable, 0). + +colset_from_list([], ColSet) -> + ColSet; +colset_from_list([Colour|Allocatable], ColSet) -> + colset_from_list(Allocatable, ColSet bor (1 bsl Colour)). + +colset_del_element(Colour, ColSet) -> + ColSet band bnot(1 bsl Colour). + +colset_is_empty(0) -> true; +colset_is_empty(_) -> false. + +colset_smallest(ColSet) -> + bitN_log2(ColSet band -ColSet, 0). + +bitN_log2(BitN, ShiftN) -> + case BitN > 16#ffff of + true -> + bitN_log2(BitN bsr 16, ShiftN + 16); + _ -> + ShiftN + hweight16(BitN - 1) + end. + +hweight16(W) -> + Res1 = ( W band 16#5555) + (( W bsr 1) band 16#5555), + Res2 = (Res1 band 16#3333) + ((Res1 bsr 2) band 16#3333), + Res3 = (Res2 band 16#0F0F) + ((Res2 bsr 4) band 16#0F0F), + (Res3 band 16#00FF) + ((Res3 bsr 8) band 16#00FF). +%%-endif. + +%%% +%%% Colour ADT providing a partial mapping from nodes to colours. +%%% + +initColor(NrNodes) -> + {colmap, hipe_bifs:array(NrNodes, [])}. + +getColor(Node, {colmap, ColMap}) -> + hipe_bifs:array_sub(ColMap, Node). + +setColor(Node, Color, {colmap, ColMap} = C) -> + hipe_bifs:array_update(ColMap, Node, Color), + C. + +-ifdef(DEBUG_PRINTOUTS). +printColors(0, _) -> + true; +printColors(Node, {colmap, ColMap} = C) -> + NextNode = Node - 1, + ?debug_msg("node ~w color ~w~n", [NextNode, hipe_bifs:array_sub(ColMap, NextNode)]), + printColors(NextNode, C). +-endif. + +%%% +%%% Alias ADT providing a partial mapping from nodes to nodes. +%%% + +initAlias(NrNodes) -> + {alias, hipe_bifs:array(NrNodes, [])}. + +%% Get alias for a node. +%% Note that non-aliased nodes could be represented in +%% two ways, either not aliased or aliased to itself. +%% Including the latter case prevents looping bugs. +getAlias(Node, {alias, AliasMap} = Alias) -> + case hipe_bifs:array_sub(AliasMap, Node) of + [] -> + Node; + Node -> + Node; + AliasNode -> + getAlias(AliasNode, Alias) + end. + +-ifdef(DEBUG_PRINTOUTS). +printAlias({alias, AliasMap} = Alias) -> + ?debug_msg("Aliases:\n",[]), + printAlias(hipe_bifs:array_length(AliasMap), Alias). + +printAlias(0, {alias, _}) -> + true ; +printAlias(Node, {alias, _AliasMap} = Alias) -> + ?debug_msg("alias ~p ~p\n", [Node - 1, getAlias(Node - 1, Alias)]), + printAlias(Node - 1, Alias). +-endif. + +setAlias(Node, AliasNode, {alias, AliasMap} = Alias) -> + hipe_bifs:array_update(AliasMap, Node, AliasNode), + Alias. + +aliasToList({alias, AliasMap}) -> + aliasToList(AliasMap, hipe_bifs:array_length(AliasMap), []). + +aliasToList(AliasMap, I1, Tail) -> + I0 = I1 - 1, + case I0 >= 0 of + true -> + aliasToList(AliasMap, I0, [hipe_bifs:array_sub(AliasMap, I0)|Tail]); + _ -> + Tail + end. + +%%---------------------------------------------------------------------- +%% Function: coalesce +%% +%% Description: Coalesces nodes in worklist +%% Parameters: +%% Moves -- Current move information +%% IG -- Interference graph +%% Worklists -- Current worklists +%% Alias -- Current aliases for temporaries +%% K -- Number of registers +%% +%% Returns: +%% {Moves, IG, Worklists, Alias} +%% (Updated versions of above structures, after coalescing) +%%---------------------------------------------------------------------- + +coalesce(Moves, IG, Worklists, Alias, K, Target) -> + case hipe_moves:worklist_get_and_remove(Moves) of + {[],Moves0} -> + %% Moves marked for removal from worklistMoves by FreezeMoves() + %% are removed by worklist_get_and_remove(). This case is unlikely, + %% but can occur if only stale moves remain in worklistMoves. + {Moves0, IG, Alias}; + {Move,Moves0} -> + {Dest,Source} = hipe_moves:get_move(Move, Moves0), + ?debug_msg("Testing nodes ~p and ~p for coalescing~n",[Dest,Source]), + Alias_src = getAlias(Source, Alias), + Alias_dst = getAlias(Dest, Alias), + {U,V} = case Target:is_precoloured(Alias_dst) of + true -> {Alias_dst, Alias_src}; + false -> {Alias_src, Alias_dst} + end, + %% When debugging, check that neither V nor U is on the stack. + case U =:= V of + true -> + %% drop coalesced move Move + {Moves0, IG, Alias, Worklists}; + _ -> + case (Target:is_precoloured(V) orelse + hipe_ig:nodes_are_adjacent(U, V, IG)) of + true -> + %% drop constrained move Move + {Moves0, IG, Alias, Worklists}; + false -> + case (case Target:is_precoloured(U) of + true -> + AdjV = hipe_ig:node_adj_list(V, IG), + all_adjacent_ok(AdjV, U, Worklists, IG, K, Target); + false -> + AdjV = hipe_ig:node_adj_list(V, IG), + AdjU = hipe_ig:node_adj_list(U, IG), + conservative(AdjU, AdjV, U, Worklists, IG, K) + end) of + true -> + %% drop coalesced move Move + {IG1, Alias1, Worklists1} = + combine(U, V, IG, Alias, Worklists, K, Target), + {Moves0, IG1, Alias1, Worklists1}; + false -> + Moves1 = hipe_moves:add_active(Move, Moves0), + {Moves1, IG, Alias, Worklists} + end + end + end + end. + +%%---------------------------------------------------------------------- +%% Function: coalesce_O +%% +%% Description: Coalesces nodes in worklist +%% Parameters: +%% Moves -- Current move information +%% IG -- Interference graph +%% Worklists -- Current worklists +%% Alias -- Current aliases for temporaries +%% K -- Number of registers +%% +%% Returns: +%% {Moves, IG, Worklists, Alias} +%% (Updated versions of above structures, after coalescing) +%%---------------------------------------------------------------------- + +-ifdef(COMPARE_ITERATED_OPTIMISTIC). +coalesce_O(Moves, IG, Worklists, Alias, K, Target) -> + case hipe_moves:worklist_get_and_remove(Moves) of + {[],Moves0} -> + %% Moves marked for removal from worklistMoves by FreezeMoves() + %% are removed by worklist_get_and_remove(). This case is unlikely, + %% but can occur if only stale moves remain in worklistMoves. + {Moves0,IG,Worklists,Alias}; + {Move,Moves0} -> + {Dest,Source} = hipe_moves:get_move(Move, Moves0), + ?debug_msg("Testing nodes ~p and ~p for coalescing~n",[Dest,Source]), + Alias_src = getAlias(Source, Alias), + Alias_dst = getAlias(Dest, Alias), + {U,V} = case Target:is_precoloured(Alias_dst) of + true -> {Alias_dst, Alias_src}; + false -> {Alias_src, Alias_dst} + end, + %% When debugging, check that neither V nor U is on the stack. + case U =:= V of + true -> + Moves1 = Moves0, % drop coalesced move Move + Worklists1 = add_worklist(Worklists, U, K, Moves1, IG, Target), + {Moves1, IG, Worklists1, Alias}; + _ -> + case (Target:is_precoloured(V) orelse + hipe_ig:nodes_are_adjacent(U, V, IG)) of + true -> + Moves1 = Moves0, % drop constrained move Move + Worklists1 = add_worklist(Worklists, U, K, Moves1, IG, Target), + Worklists2 = add_worklist(Worklists1, V, K, Moves1, IG, Target), + {Moves1, IG, Worklists2, Alias}; + false -> + case (case Target:is_precoloured(U) of + true -> + AdjV = hipe_ig:node_adj_list(V, IG), + all_adjacent_ok(AdjV, U, Worklists, IG, K, Target); + false -> + AdjV = hipe_ig:node_adj_list(V, IG), + AdjU = hipe_ig:node_adj_list(U, IG), + conservative(AdjU, AdjV, U, Worklists, IG, K) + end) of + true -> + Moves1 = Moves0, % drop coalesced move Move + {IG1,Worklists1,Moves2,Alias1} = + combine_O(U, V, IG, Worklists, Moves1, Alias, K, Target), + Worklists2 = add_worklist(Worklists1, U, K, Moves2, IG1, Target), + {Moves2, IG1, Worklists2, Alias1}; + false -> + Moves1 = hipe_moves:add_active(Move, Moves0), + {Moves1, IG, Worklists, Alias} + end + end + end + end. +-endif. + +%%---------------------------------------------------------------------- +%% Function: add_worklist +%% +%% Description: Builds new worklists where U is transferred from freeze +%% to simplify, if possible +%% +%% Parameters: +%% Worklists -- Current worklists +%% U -- Node to operate on +%% K -- Number of registers +%% Moves -- Current move information +%% IG -- Interference graph +%% Target -- The containing the target-specific functions +%% +%% Returns: +%% Worklists (updated) +%%---------------------------------------------------------------------- + +-ifdef(COMPARE_ITERATED_OPTIMISTIC). +add_worklist(Worklists, U, K, Moves, IG, Target) -> + case (not(Target:is_precoloured(U)) + andalso not(hipe_moves:move_related(U, Moves)) + andalso (hipe_ig:is_trivially_colourable(U, K, IG))) of + true -> + hipe_reg_worklists:transfer_freeze_simplify(U, Worklists); + false -> + Worklists + end. +-endif. + +%%---------------------------------------------------------------------- +%% Function: combine +%% +%% Description: Combines two nodes into one (used when coalescing) +%% +%% Parameters: +%% U -- First node to operate on +%% V -- Second node to operate on +%% IG -- Interference graph +%% Worklists -- Current worklists +%% Moves -- Current move information +%% Alias -- Current aliases for temporaries +%% K -- Number of registers +%% +%% Returns: +%% {IG, Worklists, Moves, Alias} (updated) +%%---------------------------------------------------------------------- + +-ifdef(COMPARE_ITERATED_OPTIMISTIC). +combine_O(U, V, IG, Worklists, Moves, Alias, K, Target) -> + Worklists1 = case hipe_reg_worklists:member_freeze(V, Worklists) of + true -> hipe_reg_worklists:remove_freeze(V, Worklists); + false -> hipe_reg_worklists:remove_spill(V, Worklists) + end, + Worklists11 = hipe_reg_worklists:add_coalesced(V, Worklists1), + + ?debug_msg("Coalescing ~p and ~p to ~p~n",[V,U,U]), + + Alias1 = setAlias(V, U, Alias), + + %% Typo in published algorithm: s/nodeMoves/moveList/g to fix. + %% XXX: moveList[u] \union moveList[v] OR NodeMoves(u) \union NodeMoves(v) ??? + %% XXX: NodeMoves() is correct, but unnecessarily strict. The ordsets:union + %% constrains NodeMoves() to return an ordset. + Moves1 = hipe_moves:update_movelist(U, + ordsets:union(hipe_moves:node_moves(U, Moves), + hipe_moves:node_moves(V, Moves)), + Moves), + %% Missing in published algorithm. From Tiger book Errata. + Moves2 = enable_moves_active_to_worklist(hipe_moves:node_movelist(V, Moves1), Moves1), + AdjV = hipe_ig:node_adj_list(V, IG), + + {IG1, Worklists2, Moves3} = + combine_edges_O(AdjV, U, IG, Worklists11, Moves2, K, Target), + + New_worklists = case (not(hipe_ig:is_trivially_colourable(U, K, IG1)) + andalso hipe_reg_worklists:member_freeze(U, Worklists2)) of + true -> hipe_reg_worklists:transfer_freeze_spill(U, Worklists2); + false -> Worklists2 + end, + {IG1, New_worklists, Moves3, Alias1}. +-endif. + +%%---------------------------------------------------------------------- +%% Function: combine +%% +%% Description: Combines two nodes into one (used when coalescing) +%% +%% Parameters: +%% U -- First node to operate on +%% V -- Second node to operate on +%% IG -- Interference graph +%% Worklists -- Current worklists +%% Moves -- Current move information +%% Alias -- Current aliases for temporaries +%% K -- Number of registers +%% +%% Returns: +%% {IG, Worklists, Moves, Alias} (updated) +%%---------------------------------------------------------------------- + +combine(U, V, IG, Alias, Worklists, K, Target) -> + ?debug_msg("N_Coalescing ~p and ~p to ~p~n",[V,U,U]), + Worklists1 = hipe_reg_worklists:add_coalesced(V, U, Worklists), + Alias1 = setAlias(V, U, Alias), + AdjV = hipe_ig:node_adj_list(V, IG), + IG1 = combine_edges(AdjV, U, IG, Worklists1, K, Target), + {IG1, Alias1, Worklists1}. + +%%---------------------------------------------------------------------- +%% Function: combine_edges +%% +%% Description: For each node in a list, make an edge between that node +%% and node U, and decrement its degree by 1 +%% (Used when two nodes are coalesced, to connect all nodes +%% adjacent to one node to the other node) +%% +%% Parameters: +%% [T|Ts] -- List of nodes to make edges to +%% U -- Node to make edges from +%% IG -- Interference graph +%% Worklists -- Current worklists +%% Moves -- Current move information +%% K -- Number of registers +%% +%% Returns: +%% {IG, Worklists, Moves} (updated) +%%---------------------------------------------------------------------- + +combine_edges([], _U, IG, _Worklists, _K, _Target) -> + IG; +combine_edges([T|Ts], U, IG, Worklists, K, Target) -> + case hipe_reg_worklists:member_stack_or_coalesced(T, Worklists) of + true -> combine_edges(Ts, U, IG, Worklists, K, Target); + _ -> + IG1 = hipe_ig:add_edge(T, U, IG, Target), + IG2 = case Target:is_precoloured(T) of + true -> IG1; + false -> hipe_ig:dec_node_degree(T, IG1) + end, + combine_edges(Ts, U, IG2, Worklists, K, Target) + end. + +%%---------------------------------------------------------------------- +%% Function: combine_edges +%% +%% Description: For each node in a list, make an edge between that node +%% and node U, and decrement its degree by 1 +%% (Used when two nodes are coalesced, to connect all nodes +%% adjacent to one node to the other node) +%% +%% Parameters: +%% [T|Ts] -- List of nodes to make edges to +%% U -- Node to make edges from +%% IG -- Interference graph +%% Worklists -- Current worklists +%% Moves -- Current move information +%% K -- Number of registers +%% +%% Returns: +%% {IG, Worklists, Moves} (updated) +%%---------------------------------------------------------------------- + +-ifdef(COMPARE_ITERATED_OPTIMISTIC). +combine_edges_O([], _U, IG, Worklists, Moves, _K, _Target) -> + {IG, Worklists, Moves}; +combine_edges_O([T|Ts], U, IG, Worklists, Moves, K, Target) -> + case hipe_reg_worklists:member_stack_or_coalesced(T, Worklists) of + true -> combine_edges_O(Ts, U, IG, Worklists, Moves, K, Target); + _ -> + %% XXX: The issue below occurs because the T->V edge isn't removed. + %% This causes adjList[T] to contain stale entries, to possibly grow + %% (if T isn't already adjacent to U), and degree[T] to possibly + %% increase (again, if T isn't already adjacent to U). + %% The decrement_degree() call repairs degree[T] but not adjList[T]. + %% It would be better to physically replace T->V with T->U, and only + %% decrement_degree(T) if T->U already existed. + %% + %% add_edge() may change a low-degree move-related node to be of + %% significant degree. In this case the node belongs in the spill + %% worklist, and that's where decrement_degree() expects to find it. + %% This issue is not covered in the published algorithm. + OldDegree = hipe_ig:get_node_degree(T, IG), + IG1 = hipe_ig:add_edge(T, U, IG, Target), + NewDegree = hipe_ig:get_node_degree(T, IG1), + Worklists0 = + if NewDegree =:= K, OldDegree =:= K-1 -> + %% ?debug_msg("~w:combine_edges_O(): repairing worklist membership for node ~w\n", [?MODULE,T]), + %% The node T must be on the freeze worklist: + %% 1. Since we're coalescing, the simplify worklist must have been + %% empty when combine_edges_O() started. + %% 2. decrement_degree() may put the node T back on the simplify + %% worklist, but that occurs after the worklists repair step. + %% 3. There are no duplicates among the edges. + Worklists00 = hipe_reg_worklists:remove_freeze(T, Worklists), + hipe_reg_worklists:add_spill(T, Worklists00); + true -> + Worklists + end, + {IG2, Worklists1, Moves1} = + decrement_degree_O([T], IG1, Worklists0, Moves, K), + combine_edges_O(Ts, U, IG2, Worklists1, Moves1, K, Target) + end. +-endif. + +%%---------------------------------------------------------------------- +%% Function: undoCoalescing +%% +%% Description: Returns necessary information for a coalesced node +%% +%% Parameters: +%% N -- The node to uncoalesce +%% No_temporaries -- Number of temporaries +%% Alias -- The Alias vector before undoing +%% SavedAdj -- Saved adjacency list +%% IG -- Interference graph +%% Target -- The module containing the target-specific functions. +%% +%% Returns: +%% list of primitive nodes, that is all nodes that were previously +%% coalesced to N +%% updated alias vector +%% updated Interferece graph +%%---------------------------------------------------------------------- +undoCoalescing(N, No_temporaries, Alias, SavedAdj, IG, Target) -> + Primitives = findPrimitiveNodes(No_temporaries, N, Alias), + Alias1 = restoreAliases(Primitives, Alias), + IG1 = fixAdj(N, SavedAdj, IG, Target), + {Primitives, Alias1, IG1}. + +%% Restore aliasinfo for primitive nodes, that is +%% unalias the node sthat were aliased to the primitive +%% nodes. Note that an unaliased node could be +%% represented in two ways, either not aliased or aliased +%% to itself. See also getAlias +restoreAliases([], Alias) -> + Alias; +restoreAliases([Primitive|Primitives], Alias) -> + Alias1 = setAlias(Primitive, Primitive, Alias), + restoreAliases(Primitives, Alias1). + +%% find the primitive nodes to N, that is find all +%% nodes that are aliased to N +findPrimitiveNodes(No_temporaries, N, Alias) -> + findPrimitiveNodes(No_temporaries, N, Alias, []). + +findPrimitiveNodes(0, _N, _Alias, PrimitiveNodes) -> + PrimitiveNodes; +findPrimitiveNodes(Node, N, Alias, PrimitiveNodes) -> + NextNode = Node - 1, + case (getAlias(NextNode, Alias) =:= N) of + true -> findPrimitiveNodes(NextNode, N, Alias, [NextNode | PrimitiveNodes]); + _ -> findPrimitiveNodes(NextNode, N, Alias, PrimitiveNodes) + end. + +%test_undoCoalescing(No_temporaries, Alias, Worklists) -> +% test_undoCoalescing(No_temporaries, No_temporaries, Alias, Worklists). +% +%test_undoCoalescing(0, _No_temporaries, _Alias, _Worklists) -> +% true; +%test_undoCoalescing(Node, No_temporaries, Alias, Worklists) -> +% %?debug_msg("++ the adj list: ~p~n", [SavedAdj]), +% %?debug_msg("Node ~p~n", [Node]), +% NextNode = Node - 1, +% Coalesced_to = hipe_reg_worklists:member_coalesced_to(NextNode, Worklists), +% ?debug_msg("³³-- member coalesced: ~p~n", [Coalesced_to]), +% {Primitives, Alias1} = undoCoalescing(NextNode, No_temporaries, Alias), +% ?debug_msg("½½-- primitivenodes ~w\n", [Primitives]), +% case (Coalesced_to) of +% true -> printAlias(Alias1); +% _ -> true +% end, +% test_undoCoalescing(NextNode, No_temporaries, Alias, Worklists). + +%%---------------------------------------------------------------------- +%% Function: fixAdj +%% +%% Description: Fixes adajency set and adjacency list when undoing coalescing +%% +%% Parameters: +%% N -- Node that should be uncoalesced +%% SavedAdj -- Saved adjacency list +%% IG -- Interference graph +%% Target -- The module containing the target-specific functions. +%% +%% Returns: +%% updated Interferece graph +%%---------------------------------------------------------------------- +fixAdj(N, SavedAdj, IG, Target) -> + %Saved = hipe_vectors:get(SavedAdj, N), + Saved = hipe_adj_list:edges(N, SavedAdj), + ?debug_msg("§§--adj to ~p: ~p~n", [N, Saved]), + Adj = hipe_ig:node_adj_list(N, IG), + ?debug_msg("««--adj to ~p: ~p~n", [N, Adj]), + New = findNew(Adj, Saved), + ?debug_msg("++--new adj to ~p: ~p~n", [N, New]), + removeAdj(New, N, IG, Target), + %% XXX the following lines seems to make double nodes in + %% some adj_lists, which is a bug, apart from that they + %% don't seem to make any difference at all (even though + %% they are in the pseudocode of "optimistic coalescing") + %% addedge for all in the restored adj_list + %%RestoredAdj = hipe_ig:node_adj_list(N, IG), + %%?debug_msg("adj_lists_before_restore_o ~n~p~n", [hipe_ig:adj_list(IG)]), + %%restoreAdj(RestoredAdj, N, IG, Alias, Target). + IG. + +removeAdj([], _N, _IG, _Target) -> + true; +removeAdj([V| New], N, IG, Target) -> + hipe_ig:remove_edge(V, N, IG, Target), + removeAdj(New, N, IG, Target). + +%%restoreAdj([], _N, IG, _Alias, _Target) -> +%% %%?debug_msg("adj_lists__after_restore_o ~n~p~n", [hipe_ig:adj_list(IG)]), +%% IG; +%%restoreAdj([V| AdjToN], N, IG, Alias, Target) -> +%% AliasToV = getAlias(V, Alias), +%% IG1 = hipe_ig:add_edge(N, AliasToV, IG, Target), +%% restoreAdj(AdjToN, N, IG1, Alias, Target). + +%% XXX This is probably a clumsy way of doing it +%% better to assure the lists are sorted from the beginning +%% also coalesce findNew and removeAdj should improve performance +findNew(Adj, Saved) -> + findNew(Adj, Saved, []). + +findNew([], _Saved, New) -> + New; +findNew([A| Adj], Saved, New) -> + case lists:member(A, Saved) of + true -> findNew(Adj, Saved, New); + _ -> findNew(Adj, Saved, [A| New]) + end. + +%test_fixAdj(0, _SavedAdj, IG, _Target) -> +% IG; +%test_fixAdj(Node, SavedAdj, IG, Target) -> +% NextNode = Node - 1, +% IG1 = fixAdj(NextNode, SavedAdj, IG, Target), +% test_fixAdj(NextNode, SavedAdj, IG1, Target). +%%---------------------------------------------------------------------- +%% Function: ok +%% +%% Description: Checks if a node T is suitable to coalesce with R +%% +%% Parameters: +%% T -- Node to test +%% R -- Other node to test +%% IG -- Interference graph +%% K -- Number of registers +%% Target -- The module containing the target-specific functions +%% +%% Returns: +%% true iff coalescing is OK +%%---------------------------------------------------------------------- + +ok(T, R, IG, K, Target) -> + ((hipe_ig:is_trivially_colourable(T, K, IG)) + orelse Target:is_precoloured(T) + orelse hipe_ig:nodes_are_adjacent(T, R, IG)). + +%%---------------------------------------------------------------------- +%% Function: all_ok +%% +%% Description: True iff, for every T in the list, OK(T,U) +%% +%% Parameters: +%% [T|Ts] -- Nodes to test +%% U -- Node to test for coalescing +%% IG -- Interference graph +%% K -- Number of registers +%% Target -- The module containing the target-specific functions +%% +%% Returns: +%% true iff coalescing is OK for all nodes in the list +%%---------------------------------------------------------------------- + +all_adjacent_ok([], _U, _Worklists, _IG, _K, _Target) -> true; +all_adjacent_ok([T|Ts], U, Worklists, IG, K, Target) -> + case hipe_reg_worklists:member_stack_or_coalesced(T, Worklists) of + true -> all_adjacent_ok(Ts, U, Worklists, IG, K, Target); + _ -> + %% 'andalso' does not preserve tail-recursion + case ok(T, U, IG, K, Target) of + true -> all_adjacent_ok(Ts, U, Worklists, IG, K, Target); + false -> false + end + end. + +%%---------------------------------------------------------------------- +%% Function: conservative +%% +%% Description: Checks if nodes can be safely coalesced according to +%% the Briggs' conservative coalescing heuristic +%% +%% Parameters: +%% Nodes -- Adjacent nodes +%% IG -- Interference graph +%% K -- Number of registers +%% +%% Returns: +%% true iff coalescing is safe +%%---------------------------------------------------------------------- + +conservative(AdjU, AdjV, U, Worklists, IG, K) -> + conservative_countU(AdjU, AdjV, U, Worklists, IG, K, 0). + +%%---------------------------------------------------------------------- +%% Function: conservative_count +%% +%% Description: Counts degrees for conservative (Briggs' heuristics) +%% +%% Parameters: +%% Nodes -- (Remaining) adjacent nodes +%% IG -- Interference graph +%% K -- Number of registers +%% Cnt -- Accumulator for counting +%% +%% Returns: +%% Final value of accumulator +%%---------------------------------------------------------------------- + +conservative_countU([], AdjV, U, Worklists, IG, K, Cnt) -> + conservative_countV(AdjV, U, Worklists, IG, K, Cnt); +conservative_countU([Node|AdjU], AdjV, U, Worklists, IG, K, Cnt) -> + case hipe_reg_worklists:member_stack_or_coalesced(Node, Worklists) of + true -> conservative_countU(AdjU, AdjV, U, Worklists, IG, K, Cnt); + _ -> + case hipe_ig:is_trivially_colourable(Node, K, IG) of + true -> conservative_countU(AdjU, AdjV, U, Worklists, IG, K, Cnt); + _ -> + Cnt1 = Cnt + 1, + if Cnt1 < K -> conservative_countU(AdjU, AdjV, U, Worklists, IG, K, Cnt1); + true -> false + end + end + end. + +conservative_countV([], _U, _Worklists, _IG, _K, _Cnt) -> true; +conservative_countV([Node|AdjV], U, Worklists, IG, K, Cnt) -> + case hipe_reg_worklists:member_stack_or_coalesced(Node, Worklists) of + true -> conservative_countV(AdjV, U, Worklists, IG, K, Cnt); + _ -> + case hipe_ig:nodes_are_adjacent(Node, U, IG) of + true -> conservative_countV(AdjV, U, Worklists, IG, K, Cnt); + _ -> + case hipe_ig:is_trivially_colourable(Node, K, IG) of + true -> conservative_countV(AdjV, U, Worklists, IG, K, Cnt); + _ -> + Cnt1 = Cnt + 1, + if Cnt1 < K -> conservative_countV(AdjV, U, Worklists, IG, K, Cnt1); + true -> false + end + end + end + end. + +%%--------------------------------------------------------------------- +%% Function: selectSpill +%% +%% Description: Select the node to spill and spill it +%% Parameters: +%% WorkLists -- A datatype containing the different worklists +%% IG -- The interference graph +%% K -- The number of available registers +%% Alias -- The alias mapping +%% SpillLimit -- Try not to spill any nodes above the spill limit +%% +%% Returns: +%% WorkLists -- The updated worklists +%%--------------------------------------------------------------------- + +selectSpill(WorkLists, IG, SpillLimit) -> + [CAR|CDR] = hipe_reg_worklists:spill(WorkLists), + SpillCost = getCost(CAR, IG, SpillLimit), + M = findCheapest(CDR, IG, SpillCost, CAR, SpillLimit), + WorkLists1 = hipe_reg_worklists:remove_spill(M, WorkLists), + hipe_reg_worklists:add_simplify(M, WorkLists1). + +%%--------------------------------------------------------------------- +%% Function: selectSpill +%% +%% Description: Select the node to spill and spill it +%% Parameters: +%% WorkLists -- A datatype containing the different worklists +%% Moves -- A datatype containing the move sets +%% IG -- The interference graph +%% K -- The number of available registers +%% Alias -- The alias mapping +%% SpillLimit -- Try not to spill any nodes above the spill limit +%% +%% Returns: +%% WorkLists -- The updated worklists +%% Moves -- The updated moves +%%--------------------------------------------------------------------- + +-ifdef(COMPARE_ITERATED_OPTIMISTIC). +selectSpill_O(WorkLists, Moves, IG, K, Alias, SpillLimit) -> + [CAR|CDR] = hipe_reg_worklists:spill(WorkLists), + + SpillCost = getCost(CAR, IG, SpillLimit), + M = findCheapest(CDR, IG, SpillCost, CAR, SpillLimit), + + WorkLists1 = hipe_reg_worklists:remove_spill(M, WorkLists), + %% The published algorithm adds M to the simplify worklist + %% before the freezeMoves() call. That breaks the worklist + %% invariants, which is why the order is switched here. + {WorkLists2,Moves1} = freezeMoves(M, K, WorkLists1, Moves, IG, Alias), + WorkLists3 = hipe_reg_worklists:add_simplify(M, WorkLists2), + {WorkLists3,Moves1}. +-endif. + +%% Find the node that is cheapest to spill + +findCheapest([], _IG, _Cost, Cheapest, _SpillLimit) -> + Cheapest; +findCheapest([Node|Nodes], IG, Cost, Cheapest, SpillLimit) -> + ThisCost = getCost(Node, IG, SpillLimit), + case ThisCost < Cost of + true -> + findCheapest(Nodes, IG, ThisCost, Node, SpillLimit); + false -> + findCheapest(Nodes, IG, Cost, Cheapest, SpillLimit) + end. + +%% Get the cost for spilling a certain node, node numbers above the spill +%% limit are extremely expensive. + +getCost(Node, IG, SpillLimit) -> + case Node > SpillLimit of + true -> inf; + false -> + SpillCost = hipe_ig:node_spill_cost(Node, IG), + ?debug_msg("Actual spillcost f node ~w is ~w~n", [Node, SpillCost]), + SpillCost + end. + +%%---------------------------------------------------------------------- +%% Function: freeze +%% +%% Description: When both simplifying and coalescing is impossible we +%% rather freezes a node in stead of spilling, this function +%% selects a node for freezing (it just picks the first one in +%% the list) +%% +%% Parameters: +%% K -- The number of available registers +%% WorkLists -- A datatype containing the different worklists +%% Moves -- A datatype containing the different movelists +%% IG -- Interference graph +%% Alias -- An alias mapping, shows the alias of all coalesced +%% nodes +%% +%% Returns: +%% WorkLists -- The updated worklists +%% Moves -- The updated movelists +%%---------------------------------------------------------------------- + +-ifdef(COMPARE_ITERATED_OPTIMISTIC). +freeze(K, WorkLists, Moves, IG, Alias) -> + [U|_] = hipe_reg_worklists:freeze(WorkLists), % Smarter routine? + ?debug_msg("freezing node ~p~n", [U]), + WorkLists0 = hipe_reg_worklists:remove_freeze(U, WorkLists), + %% The published algorithm adds U to the simplify worklist + %% before the freezeMoves() call. That breaks the worklist + %% invariants, which is why the order is switched here. + {WorkLists1, Moves1} = freezeMoves(U, K, WorkLists0, Moves, IG, Alias), + WorkLists2 = hipe_reg_worklists:add_simplify(U, WorkLists1), + {WorkLists2, Moves1}. +-endif. + +%%---------------------------------------------------------------------- +%% Function: freezeMoves +%% +%% Description: Make all move related interferences for a certain node +%% into ordinary interference arcs. +%% +%% Parameters: +%% U -- The node we want to freeze +%% K -- The number of available registers +%% WorkLists -- A datatype containing the different worklists +%% Moves -- A datatype containing the different movelists +%% IG -- Interference graph +%% Alias -- An alias mapping, shows the alias of all coalesced +%% nodes +%% +%% Returns: +%% WorkLists -- The updated worklists +%% Moves -- The updated movelists +%%---------------------------------------------------------------------- + +-ifdef(COMPARE_ITERATED_OPTIMISTIC). +freezeMoves(U, K, WorkLists, Moves, IG, Alias) -> + Nodes = hipe_moves:node_moves(U, Moves), + freezeEm(U, Nodes, K, WorkLists, Moves, IG, Alias). + +%% Find what the other value in a copy instruction is, return false if +%% the instruction isn't a move with the first argument in it. + +moves(U, Move, Alias, Moves) -> + {X,Y} = hipe_moves:get_move(Move, Moves), + %% The old code (which followed the published algorithm) did + %% not follow aliases before looking for "the other" node. + %% This caused moves() to skip some moves, making some nodes + %% still move-related after freezeMoves(). These move-related + %% nodes were then added to the simplify worklist (by freeze() + %% or selectSpill()), breaking the worklist invariants. Nodes + %% already simplified appeared in coalesce_O(), were re-added to + %% the simplify worklist by add_worklist(), simplified again, + %% and coloured multiple times by assignColors(). Ouch! + X1 = getAlias(X, Alias), + Y1 = getAlias(Y, Alias), + if U =:= X1 -> Y1; + U =:= Y1 -> X1; + true -> exit({?MODULE,moves}) % XXX: shouldn't happen + end. + +freezeEm(_U, [], _K, WorkLists, Moves, _IG, _Alias) -> + {WorkLists,Moves}; +freezeEm(U, [M|Ms], K, WorkLists, Moves, IG, Alias) -> + V = moves(U, M, Alias, Moves), + {WorkLists2,Moves2} = freezeEm2(U, V, M, K, WorkLists, Moves, IG, Alias), + freezeEm(U, Ms, K, WorkLists2, Moves2, IG, Alias). + +freezeEm2(U, V, M, K, WorkLists, Moves, IG, Alias) -> + case hipe_moves:member_active(M, Moves) of + true -> + Moves1 = hipe_moves:remove_active(M, Moves), + freezeEm3(U, V, M, K, WorkLists, Moves1, IG, Alias); + false -> + Moves1 = hipe_moves:remove_worklist(M, Moves), + freezeEm3(U, V, M, K, WorkLists, Moves1, IG, Alias) + end. + +freezeEm3(_U,V,_M,K,WorkLists,Moves,IG,_Alias) -> + Moves1 = Moves, % drop frozen move M + V1 = V, % getAlias(V,Alias), + %% "not MoveRelated(v)" is cheaper than "NodeMoves(v) = {}" + case ((not hipe_moves:move_related(V1,Moves1)) andalso + hipe_ig:is_trivially_colourable(V1,K,IG)) of + true -> + ?debug_msg("freezing move to ~p~n", [V]), + Worklists1 = hipe_reg_worklists:transfer_freeze_simplify(V1, WorkLists), + {Worklists1,Moves1}; + false -> + {WorkLists,Moves1} + end. +-endif. diff --git a/lib/hipe/regalloc/hipe_ppc_specific.erl b/lib/hipe/regalloc/hipe_ppc_specific.erl new file mode 100644 index 0000000000..dd2855208b --- /dev/null +++ b/lib/hipe/regalloc/hipe_ppc_specific.erl @@ -0,0 +1,168 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2004-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +-module(hipe_ppc_specific). + +%% for hipe_coalescing_regalloc: +-export([number_of_temporaries/1 + ,analyze/1 + ,labels/1 + ,all_precoloured/0 + ,bb/2 + ,liveout/2 + ,reg_nr/1 + ,def_use/1 + ,is_move/1 + ,is_precoloured/1 + ,var_range/1 + ,allocatable/0 + ,non_alloc/1 + ,physical_name/1 + ,reverse_postorder/1 + ,livein/2 + ,uses/1 + ,defines/1 + ]). + +%% for hipe_graph_coloring_regalloc: +-export([is_fixed/1]). + +%% for hipe_ls_regalloc: +-export([args/1, is_arg/1, is_global/1, new_spill_index/1]). +-export([breadthorder/1, postorder/1]). + +%% callbacks for hipe_regalloc_loop +-export([defun_to_cfg/1, + check_and_rewrite/2]). + +defun_to_cfg(Defun) -> + hipe_ppc_cfg:init(Defun). + +check_and_rewrite(Defun, Coloring) -> + hipe_ppc_ra_postconditions:check_and_rewrite(Defun, Coloring, 'normal'). + +reverse_postorder(CFG) -> + hipe_ppc_cfg:reverse_postorder(CFG). + +non_alloc(CFG) -> + non_alloc(hipe_ppc_registers:nr_args(), hipe_ppc_cfg:params(CFG)). + +%% same as hipe_ppc_frame:fix_formals/2 +non_alloc(0, Rest) -> Rest; +non_alloc(N, [_|Rest]) -> non_alloc(N-1, Rest); +non_alloc(_, []) -> []. + +%% Liveness stuff + +analyze(CFG) -> + hipe_ppc_liveness_gpr:analyse(CFG). + +livein(Liveness,L) -> + [X || X <- hipe_ppc_liveness_gpr:livein(Liveness,L), + hipe_ppc:temp_is_allocatable(X)]. + +liveout(BB_in_out_liveness,Label) -> + [X || X <- hipe_ppc_liveness_gpr:liveout(BB_in_out_liveness,Label), + hipe_ppc:temp_is_allocatable(X)]. + +%% Registers stuff + +allocatable() -> + hipe_ppc_registers:allocatable_gpr(). + +all_precoloured() -> + hipe_ppc_registers:all_precoloured(). + +is_precoloured(Reg) -> + hipe_ppc_registers:is_precoloured_gpr(Reg). + +is_fixed(R) -> + hipe_ppc_registers:is_fixed(R). + +physical_name(Reg) -> + Reg. + +%% CFG stuff + +labels(CFG) -> + hipe_ppc_cfg:labels(CFG). + +var_range(_CFG) -> + hipe_gensym:var_range(ppc). + +number_of_temporaries(_CFG) -> + Highest_temporary = hipe_gensym:get_var(ppc), + %% Since we can have temps from 0 to Max adjust by +1. + Highest_temporary + 1. + +bb(CFG,L) -> + hipe_ppc_cfg:bb(CFG,L). + +%% PowerPC stuff + +def_use(Instruction) -> + {defines(Instruction), uses(Instruction)}. + +uses(I) -> + [X || X <- hipe_ppc_defuse:insn_use_gpr(I), + hipe_ppc:temp_is_allocatable(X)]. + +defines(I) -> + [X || X <- hipe_ppc_defuse:insn_def_gpr(I), + hipe_ppc:temp_is_allocatable(X)]. + +is_move(Instruction) -> + case hipe_ppc:is_pseudo_move(Instruction) of + true -> + Dst = hipe_ppc:pseudo_move_dst(Instruction), + case hipe_ppc:temp_is_allocatable(Dst) of + false -> false; + _ -> + Src = hipe_ppc:pseudo_move_src(Instruction), + hipe_ppc:temp_is_allocatable(Src) + end; + false -> false + end. + +reg_nr(Reg) -> + hipe_ppc:temp_reg(Reg). + +%%% Linear Scan stuff + +new_spill_index(SpillIndex) when is_integer(SpillIndex) -> + SpillIndex+1. + +breadthorder(CFG) -> + hipe_ppc_cfg:breadthorder(CFG). + +postorder(CFG) -> + hipe_ppc_cfg:postorder(CFG). + +is_global(R) -> + R =:= hipe_ppc_registers:temp1() orelse + R =:= hipe_ppc_registers:temp2() orelse + R =:= hipe_ppc_registers:temp3() orelse + hipe_ppc_registers:is_fixed(R). + +is_arg(R) -> + hipe_ppc_registers:is_arg(R). + +args(CFG) -> + hipe_ppc_registers:args(hipe_ppc_cfg:arity(CFG)). diff --git a/lib/hipe/regalloc/hipe_ppc_specific_fp.erl b/lib/hipe/regalloc/hipe_ppc_specific_fp.erl new file mode 100644 index 0000000000..35623e7994 --- /dev/null +++ b/lib/hipe/regalloc/hipe_ppc_specific_fp.erl @@ -0,0 +1,146 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2004-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +-module(hipe_ppc_specific_fp). + +%% for hipe_coalescing_regalloc: +-export([number_of_temporaries/1 + ,analyze/1 + ,labels/1 + ,all_precoloured/0 + ,bb/2 + ,liveout/2 + ,reg_nr/1 + ,def_use/1 + ,is_move/1 + ,is_precoloured/1 + ,var_range/1 + ,allocatable/0 + ,non_alloc/1 + ,physical_name/1 + ,reverse_postorder/1 + ,livein/2 + ,uses/1 + ,defines/1 + ]). + +%% for hipe_graph_coloring_regalloc: +-export([is_fixed/1]). + +%% for hipe_ls_regalloc: +%%-export([args/1, is_arg/1, is_global, new_spill_index/1]). +%%-export([breadthorder/1, postorder/1]). + +%% callbacks for hipe_regalloc_loop +-export([defun_to_cfg/1, + check_and_rewrite/2]). + +defun_to_cfg(Defun) -> + hipe_ppc_cfg:init(Defun). + +check_and_rewrite(Defun, Coloring) -> + hipe_ppc_ra_postconditions_fp:check_and_rewrite(Defun, Coloring). + +reverse_postorder(CFG) -> + hipe_ppc_cfg:reverse_postorder(CFG). + +non_alloc(_CFG) -> + []. + +%% Liveness stuff + +analyze(CFG) -> + hipe_ppc_liveness_fpr:analyse(CFG). + +livein(Liveness, L) -> + hipe_ppc_liveness_fpr:livein(Liveness, L). + +liveout(BB_in_out_liveness, Label) -> + hipe_ppc_liveness_fpr:liveout(BB_in_out_liveness, Label). + +%% Registers stuff + +allocatable() -> + hipe_ppc_registers:allocatable_fpr(). + +all_precoloured() -> + allocatable(). + +is_precoloured(Reg) -> + hipe_ppc_registers:is_precoloured_fpr(Reg). + +is_fixed(_Reg) -> + false. + +physical_name(Reg) -> + Reg. + +%% CFG stuff + +labels(CFG) -> + hipe_ppc_cfg:labels(CFG). + +var_range(_CFG) -> + hipe_gensym:var_range(ppc). + +number_of_temporaries(_CFG) -> + Highest_temporary = hipe_gensym:get_var(ppc), + %% Since we can have temps from 0 to Max adjust by +1. + Highest_temporary + 1. + +bb(CFG, L) -> + hipe_ppc_cfg:bb(CFG, L). + +%% PowerPC stuff + +def_use(I) -> + {defines(I), uses(I)}. + +uses(I) -> + hipe_ppc_defuse:insn_use_fpr(I). + +defines(I) -> + hipe_ppc_defuse:insn_def_fpr(I). + +is_move(I) -> + hipe_ppc:is_pseudo_fmove(I). + +reg_nr(Reg) -> + hipe_ppc:temp_reg(Reg). + +-ifdef(notdef). +new_spill_index(SpillIndex) -> + SpillIndex+1. + +breadthorder(CFG) -> + hipe_ppc_cfg:breadthorder(CFG). + +postorder(CFG) -> + hipe_ppc_cfg:postorder(CFG). + +is_global(_R) -> + false. + +is_arg(_R) -> + false. + +args(_CFG) -> + []. +-endif. diff --git a/lib/hipe/regalloc/hipe_reg_worklists.erl b/lib/hipe/regalloc/hipe_reg_worklists.erl new file mode 100644 index 0000000000..67a5788c7c --- /dev/null +++ b/lib/hipe/regalloc/hipe_reg_worklists.erl @@ -0,0 +1,360 @@ +%%% -*- erlang-indent-level: 2 -*- +%%% +%%% %CopyrightBegin% +%%% +%%% Copyright Ericsson AB 2001-2009. All Rights Reserved. +%%% +%%% The contents of this file are subject to the Erlang Public License, +%%% Version 1.1, (the "License"); you may not use this file except in +%%% compliance with the License. You should have received a copy of the +%%% Erlang Public License along with this software. If not, it can be +%%% retrieved online at http://www.erlang.org/. +%%% +%%% Software distributed under the License is distributed on an "AS IS" +%%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%%% the License for the specific language governing rights and limitations +%%% under the License. +%%% +%%% %CopyrightEnd% +%%% +%%%---------------------------------------------------------------------- +%%% File : hipe_reg_worklists.erl +%%% Author : Andreas Wallin +%%% Purpose : Represents sets of nodes/temporaries that we are +%%% working on, such as simplify and spill sets. +%%% Created : 3 Feb 2000 by Andreas Wallin +%%% Modified: Spring 2005 by NilsOla Linnermark +%%% to suit the optimistic coalesching allocator +%%%---------------------------------------------------------------------- + +-module(hipe_reg_worklists). +-author(['Andreas Wallin', 'Thorild Selén']). +-export([new/5, % only used by optimistic allocator + new/6, + simplify/1, + spill/1, + freeze/1, + stack/1, + add_simplify/2, + add_freeze/2, + add_coalesced/2, + add_coalesced/3, % only used by optimistic allocator + add_spill/2, + push_stack/3, + remove_simplify/2, + remove_spill/2, + remove_freeze/2, + is_empty_simplify/1, + is_empty_spill/1, + is_empty_freeze/1, + member_freeze/2, + member_coalesced_to/2, % only used by optimistic allocator + member_stack_or_coalesced/2, + non_stacked_or_coalesced_nodes/2, + transfer_freeze_simplify/2, + transfer_freeze_spill/2 + ]). +-ifdef(DEBUG_PRINTOUTS). +-export([print_memberships/1]). +-endif. + +-record(worklists, + {simplify, % Low-degree nodes (if coalescing non move-related) + stack, % Stack of removed low-degree nodes, with adjacency lists + membership, % Mapping from temp to which set it is in + coalesced_to, % if the node is coalesced to (only used by optimistic allocator) + spill, % Significant-degree nodes + freeze % Low-degree move-related nodes + }). + +%%-ifndef(DEBUG). +%%-define(DEBUG,true). +%%-endif. +-include("../main/hipe.hrl"). + +%%%---------------------------------------------------------------------- +%% Function: new +%% +%% Description: Constructor for worklists structure +%% +%% Parameters: +%% IG -- Interference graph +%% Target -- Target module name +%% CFG -- Target-specific CFG +%% Move_sets -- Move information +%% K -- Number of registers +%% +%% Returns: +%% A new worklists data structure +%% +%%%---------------------------------------------------------------------- + +new(IG, Target, CFG, K, No_temporaries) -> % only used by optimistic allocator + CoalescedTo = hipe_bifs:array(No_temporaries, 'none'), + init(initial(Target, CFG), K, IG, empty(No_temporaries, CoalescedTo)). + +new(IG, Target, CFG, Move_sets, K, No_temporaries) -> + init(initial(Target, CFG), K, IG, Move_sets, empty(No_temporaries, [])). + +initial(Target, CFG) -> + {Min_temporary, Max_temporary} = Target:var_range(CFG), + NonAlloc = Target:non_alloc(CFG), + non_precoloured(Target, Min_temporary, Max_temporary, []) + -- [Target:reg_nr(X) || X <- NonAlloc]. + +non_precoloured(Target, Current, Max_temporary, Initial) -> + if Current > Max_temporary -> + Initial; + true -> + NewInitial = + case Target:is_precoloured(Current) of + true -> Initial; + false -> [Current|Initial] + end, + non_precoloured(Target, Current+1, Max_temporary, NewInitial) + end. + +%% construct an empty initialized worklists data structure +empty(No_temporaries, CoalescedTo) -> + #worklists{ + membership = hipe_bifs:array(No_temporaries, 'none'), + coalesced_to = CoalescedTo, % only used by optimistic allocator + simplify = ordsets:new(), + stack = [], + spill = ordsets:new(), + freeze = ordsets:new() + }. + +%% Selectors for worklists record + +simplify(Worklists) -> Worklists#worklists.simplify. +spill(Worklists) -> Worklists#worklists.spill. +freeze(Worklists) -> Worklists#worklists.freeze. +stack(Worklists) -> Worklists#worklists.stack. + +%% Updating worklists records + +set_simplify(Simplify, Worklists) -> + Worklists#worklists{simplify = Simplify}. +set_spill(Spill, Worklists) -> + Worklists#worklists{spill = Spill}. +set_freeze(Freeze, Worklists) -> + Worklists#worklists{freeze = Freeze}. + + +%%---------------------------------------------------------------------- +%% Function: init +%% +%% Description: Initializes worklists +%% +%% Parameters: +%% Initials -- Not precoloured temporaries +%% K -- Number of registers +%% IG -- Interference graph +%% Move_sets -- Move information +%% Worklists -- (Empty) worklists structure +%% +%% Returns: +%% Initialized worklists structure +%% +%%---------------------------------------------------------------------- + +init([], _, _, Worklists) -> Worklists; +init([Initial|Initials], K, IG, Worklists) -> + case hipe_ig:is_trivially_colourable(Initial, K, IG) of + false -> + New_worklists = add_spill(Initial, Worklists), + init(Initials, K, IG, New_worklists); + _ -> + New_worklists = add_simplify(Initial, Worklists), + init(Initials, K, IG, New_worklists) + end. + +init([], _, _, _, Worklists) -> Worklists; +init([Initial|Initials], K, IG, Move_sets, Worklists) -> + case hipe_ig:is_trivially_colourable(Initial, K, IG) of + false -> + New_worklists = add_spill(Initial, Worklists), + init(Initials, K, IG, Move_sets, New_worklists); + _ -> + case hipe_moves:move_related(Initial, Move_sets) of + true -> + New_worklists = add_freeze(Initial, Worklists), + init(Initials, K, IG, Move_sets, New_worklists); + _ -> + New_worklists = add_simplify(Initial, Worklists), + init(Initials, K, IG, Move_sets, New_worklists) + end + end. + +%%%---------------------------------------------------------------------- +%% Function: is_empty +%% +%% Description: Tests if the selected worklist if empty or not. +%% +%% Parameters: +%% Worklists -- A worklists data structure +%% +%% Returns: +%% true -- If the worklist was empty +%% false -- otherwise +%% +%%%---------------------------------------------------------------------- + +is_empty_simplify(Worklists) -> + simplify(Worklists) =:= []. + +is_empty_spill(Worklists) -> + spill(Worklists) =:= []. + +is_empty_freeze(Worklists) -> + freeze(Worklists) =:= []. + +%%%---------------------------------------------------------------------- +%% Function: add +%% +%% Description: Adds one element to one of the worklists. +%% +%% Parameters: +%% Element -- An element you want to add to the +%% selected worklist. The element should +%% be a node/temporary. +%% Worklists -- A worklists data structure +%% +%% Returns: +%% An worklists data-structure that have Element in selected +%% worklist. +%% +%%%---------------------------------------------------------------------- +add_coalesced(Element, Worklists) -> + Membership = Worklists#worklists.membership, + hipe_bifs:array_update(Membership, Element, 'stack_or_coalesced'), + Worklists. + +add_coalesced(From, To, Worklists) -> % only used by optimistic allocator + Membership = Worklists#worklists.membership, + hipe_bifs:array_update(Membership, From, 'stack_or_coalesced'), + Coalesced_to = Worklists#worklists.coalesced_to, + hipe_bifs:array_update(Coalesced_to, To, 'coalesced_to'), + Worklists. + +add_simplify(Element, Worklists) -> + Membership = Worklists#worklists.membership, + hipe_bifs:array_update(Membership, Element, 'simplify'), + Simplify = ordsets:add_element(Element, simplify(Worklists)), + set_simplify(Simplify, Worklists). + +add_spill(Element, Worklists) -> + Membership = Worklists#worklists.membership, + hipe_bifs:array_update(Membership, Element, 'spill'), + Spill = ordsets:add_element(Element, spill(Worklists)), + set_spill(Spill, Worklists). + +add_freeze(Element, Worklists) -> + Membership = Worklists#worklists.membership, + hipe_bifs:array_update(Membership, Element, 'freeze'), + Freeze = ordsets:add_element(Element, freeze(Worklists)), + set_freeze(Freeze, Worklists). + +push_stack(Node, AdjList, Worklists) -> + Membership = Worklists#worklists.membership, + hipe_bifs:array_update(Membership, Node, 'stack_or_coalesced'), + Stack = Worklists#worklists.stack, + Worklists#worklists{stack = [{Node,AdjList}|Stack]}. + +%%%---------------------------------------------------------------------- +%% Function: remove +%% +%% Description: Removes one element to one of the worklists. +%% +%% Parameters: +%% Element -- An element you want to remove from the +%% selected worklist. The element should +%% be a node/temporary. +%% Worklists -- A worklists data structure +%% +%% Returns: +%% A worklists data-structure that don't have Element in selected +%% worklist. +%% +%%%---------------------------------------------------------------------- +remove_simplify(Element, Worklists) -> + Membership = Worklists#worklists.membership, + hipe_bifs:array_update(Membership, Element, 'none'), + Simplify = ordsets:del_element(Element, simplify(Worklists)), + set_simplify(Simplify, Worklists). + +remove_spill(Element, Worklists) -> + Membership = Worklists#worklists.membership, + hipe_bifs:array_update(Membership, Element, 'none'), + Spill = ordsets:del_element(Element, spill(Worklists)), + set_spill(Spill, Worklists). + +remove_freeze(Element, Worklists) -> + Membership = Worklists#worklists.membership, + hipe_bifs:array_update(Membership, Element, 'none'), + Freeze = ordsets:del_element(Element, freeze(Worklists)), + set_freeze(Freeze, Worklists). + +%%%---------------------------------------------------------------------- +%% Function: transfer +%% +%% Description: Moves element from one worklist to another. +%% +%%%---------------------------------------------------------------------- +transfer_freeze_simplify(Element, Worklists) -> + add_simplify(Element, remove_freeze(Element, Worklists)). + +transfer_freeze_spill(Element, Worklists) -> + add_spill(Element, remove_freeze(Element, Worklists)). + +%%%---------------------------------------------------------------------- +%% Function: member +%% +%% Description: Checks if one element if member of selected worklist. +%% +%% Parameters: +%% Element -- Element you want to know if it's a +%% member of selected worklist. +%% Worklists -- A worklists data structure +%% +%% Returns: +%% true -- if Element is a member of selected worklist +%% false -- Otherwise +%% +%%%---------------------------------------------------------------------- + +member_coalesced_to(Element, Worklists) -> % only used by optimistic allocator + hipe_bifs:array_sub(Worklists#worklists.coalesced_to, Element) =:= 'coalesced_to'. + +member_freeze(Element, Worklists) -> + hipe_bifs:array_sub(Worklists#worklists.membership, Element) =:= 'freeze'. + +member_stack_or_coalesced(Element, Worklists) -> + hipe_bifs:array_sub(Worklists#worklists.membership, Element) =:= 'stack_or_coalesced'. + +non_stacked_or_coalesced_nodes(Nodes, Worklists) -> + Membership = Worklists#worklists.membership, + [Node || Node <- Nodes, + hipe_bifs:array_sub(Membership, Node) =/= 'stack_or_coalesced']. + +%%%---------------------------------------------------------------------- +%% Print functions - only used for debugging + +-ifdef(DEBUG_PRINTOUTS). +print_memberships(Worklists) -> + ?debug_msg("Worklist memeberships:\n", []), + Membership = Worklists#worklists.membership, + NrElems = hipe_bifs:array_length(Membership), + Coalesced_to = Worklists#worklists.coalesced_to, + print_membership(NrElems, Membership, Coalesced_to). + +print_membership(0, _, _) -> + true; +print_membership(Element, Membership, Coalesced_to) -> + NextElement = Element - 1, + ?debug_msg("worklist ~w ~w ~w\n", + [NextElement, hipe_bifs:array_sub(Membership, NextElement), + hipe_bifs:array_sub(Coalesced_to, NextElement)]), + print_membership(NextElement, Membership, Coalesced_to). +-endif. diff --git a/lib/hipe/regalloc/hipe_regalloc_loop.erl b/lib/hipe/regalloc/hipe_regalloc_loop.erl new file mode 100644 index 0000000000..428a82c87b --- /dev/null +++ b/lib/hipe/regalloc/hipe_regalloc_loop.erl @@ -0,0 +1,68 @@ +%%% -*- erlang-indent-level: 2 -*- +%%% +%%% %CopyrightBegin% +%%% +%%% Copyright Ericsson AB 2004-2009. All Rights Reserved. +%%% +%%% The contents of this file are subject to the Erlang Public License, +%%% Version 1.1, (the "License"); you may not use this file except in +%%% compliance with the License. You should have received a copy of the +%%% Erlang Public License along with this software. If not, it can be +%%% retrieved online at http://www.erlang.org/. +%%% +%%% Software distributed under the License is distributed on an "AS IS" +%%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%%% the License for the specific language governing rights and limitations +%%% under the License. +%%% +%%% %CopyrightEnd% +%%% +%%% Common wrapper for graph_coloring and coalescing regallocs. + +-module(hipe_regalloc_loop). +-export([ra/5, ra_fp/4]). + +%%-define(HIPE_INSTRUMENT_COMPILER, true). %% Turn on instrumentation. +-include("../main/hipe.hrl"). + +ra(Defun, SpillIndex, Options, RegAllocMod, TargetMod) -> + {NewDefun, Coloring, _NewSpillIndex} = + ra_common(Defun, SpillIndex, Options, RegAllocMod, TargetMod), + {NewDefun, Coloring}. + +ra_fp(Defun, Options, RegAllocMod, TargetMod) -> + ra_common(Defun, 0, Options, RegAllocMod, TargetMod). + +ra_common(Defun, SpillIndex, Options, RegAllocMod, TargetMod) -> + ?inc_counter(ra_calls_counter, 1), + CFG = TargetMod:defun_to_cfg(Defun), + SpillLimit = TargetMod:number_of_temporaries(CFG), + alloc(Defun, SpillLimit, SpillIndex, Options, RegAllocMod, TargetMod). + +alloc(Defun, SpillLimit, SpillIndex, Options, RegAllocMod, TargetMod) -> + ?inc_counter(ra_iteration_counter, 1), + CFG = TargetMod:defun_to_cfg(Defun), + {Coloring, _NewSpillIndex} = + RegAllocMod:regalloc(CFG, SpillIndex, SpillLimit, TargetMod, Options), + {NewDefun, DidSpill} = TargetMod:check_and_rewrite(Defun, Coloring), + case DidSpill of + false -> %% No new temps, we are done. + ?add_spills(Options, _NewSpillIndex), + TempMap = hipe_temp_map:cols2tuple(Coloring, TargetMod), + {TempMap2, NewSpillIndex2} = + hipe_spillmin:stackalloc(CFG, [], SpillIndex, Options, + TargetMod, TempMap), + Coloring2 = + hipe_spillmin:mapmerge(hipe_temp_map:to_substlist(TempMap), TempMap2), + %% case proplists:get_bool(verbose_spills, Options) of + %% true -> + %% ?msg("Num spill slots used: ~p~n", [NewSpillIndex2-SpillIndex]); + %% false -> + %% ok + %% end, + {NewDefun, Coloring2, NewSpillIndex2}; + _ -> + %% Since SpillLimit is used as a low-water-mark + %% the list of temps not to spill is uninteresting. + alloc(NewDefun, SpillLimit, SpillIndex, Options, RegAllocMod, TargetMod) + end. diff --git a/lib/hipe/regalloc/hipe_sparc_specific.erl b/lib/hipe/regalloc/hipe_sparc_specific.erl new file mode 100644 index 0000000000..7b8c62e802 --- /dev/null +++ b/lib/hipe/regalloc/hipe_sparc_specific.erl @@ -0,0 +1,168 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2001-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +-module(hipe_sparc_specific). + +%% for hipe_coalescing_regalloc: +-export([number_of_temporaries/1 + ,analyze/1 + ,labels/1 + ,all_precoloured/0 + ,bb/2 + ,liveout/2 + ,reg_nr/1 + ,def_use/1 + ,is_move/1 + ,is_precoloured/1 + ,var_range/1 + ,allocatable/0 + ,non_alloc/1 + ,physical_name/1 + ,reverse_postorder/1 + ,livein/2 + ,uses/1 + ,defines/1 + ]). + +%% for hipe_graph_coloring_regalloc: +-export([is_fixed/1]). + +%% for hipe_ls_regalloc: +-export([args/1, is_arg/1, is_global/1, new_spill_index/1]). +-export([breadthorder/1, postorder/1]). + +%% callbacks for hipe_regalloc_loop +-export([defun_to_cfg/1, + check_and_rewrite/2]). + +defun_to_cfg(Defun) -> + hipe_sparc_cfg:init(Defun). + +check_and_rewrite(Defun, Coloring) -> + hipe_sparc_ra_postconditions:check_and_rewrite(Defun, Coloring, 'normal'). + +reverse_postorder(CFG) -> + hipe_sparc_cfg:reverse_postorder(CFG). + +non_alloc(CFG) -> + non_alloc(hipe_sparc_registers:nr_args(), hipe_sparc_cfg:params(CFG)). + +%% same as hipe_sparc_frame:fix_formals/2 +non_alloc(0, Rest) -> Rest; +non_alloc(N, [_|Rest]) -> non_alloc(N-1, Rest); +non_alloc(_, []) -> []. + +%% Liveness stuff + +analyze(CFG) -> + hipe_sparc_liveness_gpr:analyse(CFG). + +livein(Liveness,L) -> + [X || X <- hipe_sparc_liveness_gpr:livein(Liveness,L), + hipe_sparc:temp_is_allocatable(X)]. + +liveout(BB_in_out_liveness,Label) -> + [X || X <- hipe_sparc_liveness_gpr:liveout(BB_in_out_liveness,Label), + hipe_sparc:temp_is_allocatable(X)]. + +%% Registers stuff + +allocatable() -> + hipe_sparc_registers:allocatable_gpr(). + +all_precoloured() -> + hipe_sparc_registers:all_precoloured(). + +is_precoloured(Reg) -> + hipe_sparc_registers:is_precoloured_gpr(Reg). + +is_fixed(R) -> + hipe_sparc_registers:is_fixed(R). + +physical_name(Reg) -> + Reg. + +%% CFG stuff + +labels(CFG) -> + hipe_sparc_cfg:labels(CFG). + +var_range(_CFG) -> + hipe_gensym:var_range(sparc). + +number_of_temporaries(_CFG) -> + Highest_temporary = hipe_gensym:get_var(sparc), + %% Since we can have temps from 0 to Max adjust by +1. + Highest_temporary + 1. + +bb(CFG,L) -> + hipe_sparc_cfg:bb(CFG,L). + +%% SPARC stuff + +def_use(Instruction) -> + {defines(Instruction), uses(Instruction)}. + +uses(I) -> + [X || X <- hipe_sparc_defuse:insn_use_gpr(I), + hipe_sparc:temp_is_allocatable(X)]. + +defines(I) -> + [X || X <- hipe_sparc_defuse:insn_def_gpr(I), + hipe_sparc:temp_is_allocatable(X)]. + +is_move(Instruction) -> + case hipe_sparc:is_pseudo_move(Instruction) of + true -> + Dst = hipe_sparc:pseudo_move_dst(Instruction), + case hipe_sparc:temp_is_allocatable(Dst) of + false -> false; + _ -> + Src = hipe_sparc:pseudo_move_src(Instruction), + hipe_sparc:temp_is_allocatable(Src) + end; + false -> false + end. + +reg_nr(Reg) -> + hipe_sparc:temp_reg(Reg). + +%%% Linear Scan stuff + +new_spill_index(SpillIndex) when is_integer(SpillIndex) -> + SpillIndex+1. + +breadthorder(CFG) -> + hipe_sparc_cfg:breadthorder(CFG). + +postorder(CFG) -> + hipe_sparc_cfg:postorder(CFG). + +is_global(R) -> + R =:= hipe_sparc_registers:temp1() orelse + R =:= hipe_sparc_registers:temp2() orelse + R =:= hipe_sparc_registers:temp3() orelse + hipe_sparc_registers:is_fixed(R). + +is_arg(R) -> + hipe_sparc_registers:is_arg(R). + +args(CFG) -> + hipe_sparc_registers:args(hipe_sparc_cfg:arity(CFG)). diff --git a/lib/hipe/regalloc/hipe_sparc_specific_fp.erl b/lib/hipe/regalloc/hipe_sparc_specific_fp.erl new file mode 100644 index 0000000000..8a27f84e67 --- /dev/null +++ b/lib/hipe/regalloc/hipe_sparc_specific_fp.erl @@ -0,0 +1,146 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2002-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +-module(hipe_sparc_specific_fp). + +%% for hipe_coalescing_regalloc: +-export([number_of_temporaries/1 + ,analyze/1 + ,labels/1 + ,all_precoloured/0 + ,bb/2 + ,liveout/2 + ,reg_nr/1 + ,def_use/1 + ,is_move/1 + ,is_precoloured/1 + ,var_range/1 + ,allocatable/0 + ,non_alloc/1 + ,physical_name/1 + ,reverse_postorder/1 + ,livein/2 + ,uses/1 + ,defines/1 + ]). + +%% for hipe_graph_coloring_regalloc: +-export([is_fixed/1]). + +%% for hipe_ls_regalloc: +%%-export([args/1, is_arg/1, is_global, new_spill_index/1]). +%%-export([breadthorder/1, postorder/1]). + +%% callbacks for hipe_regalloc_loop +-export([defun_to_cfg/1, + check_and_rewrite/2]). + +defun_to_cfg(Defun) -> + hipe_sparc_cfg:init(Defun). + +check_and_rewrite(Defun, Coloring) -> + hipe_sparc_ra_postconditions_fp:check_and_rewrite(Defun, Coloring). + +reverse_postorder(CFG) -> + hipe_sparc_cfg:reverse_postorder(CFG). + +non_alloc(_CFG) -> + []. + +%% Liveness stuff + +analyze(CFG) -> + hipe_sparc_liveness_fpr:analyse(CFG). + +livein(Liveness, L) -> + hipe_sparc_liveness_fpr:livein(Liveness, L). + +liveout(BB_in_out_liveness, Label) -> + hipe_sparc_liveness_fpr:liveout(BB_in_out_liveness, Label). + +%% Registers stuff + +allocatable() -> + hipe_sparc_registers:allocatable_fpr(). + +all_precoloured() -> + allocatable(). + +is_precoloured(Reg) -> + hipe_sparc_registers:is_precoloured_fpr(Reg). + +is_fixed(_Reg) -> + false. + +physical_name(Reg) -> + Reg. + +%% CFG stuff + +labels(CFG) -> + hipe_sparc_cfg:labels(CFG). + +var_range(_CFG) -> + hipe_gensym:var_range(sparc). + +number_of_temporaries(_CFG) -> + Highest_temporary = hipe_gensym:get_var(sparc), + %% Since we can have temps from 0 to Max adjust by +1. + Highest_temporary + 1. + +bb(CFG, L) -> + hipe_sparc_cfg:bb(CFG, L). + +%% SPARC stuff + +def_use(I) -> + {defines(I), uses(I)}. + +uses(I) -> + hipe_sparc_defuse:insn_use_fpr(I). + +defines(I) -> + hipe_sparc_defuse:insn_def_fpr(I). + +is_move(I) -> + hipe_sparc:is_pseudo_fmove(I). + +reg_nr(Reg) -> + hipe_sparc:temp_reg(Reg). + +-ifdef(notdef). +new_spill_index(SpillIndex)-> + SpillIndex+1. + +breadthorder(CFG) -> + hipe_sparc_cfg:breadthorder(CFG). + +postorder(CFG) -> + hipe_sparc_cfg:postorder(CFG). + +is_global(_R) -> + false. + +is_arg(_R) -> + false. + +args(_CFG) -> + []. +-endif. diff --git a/lib/hipe/regalloc/hipe_spillcost.erl b/lib/hipe/regalloc/hipe_spillcost.erl new file mode 100644 index 0000000000..04b25f6339 --- /dev/null +++ b/lib/hipe/regalloc/hipe_spillcost.erl @@ -0,0 +1,101 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2001-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +-module(hipe_spillcost). + +-export([new/1, + inc_costs/2, + ref_in_bb/2, + spill_cost/2]). +%% The following is exported only for debugging purposes. +-ifdef(DEBUG_PRINTOUTS). +-export([nr_of_use/2]). +-endif. + +%%---------------------------------------------------------------------------- + +-include("hipe_spillcost.hrl"). + +%%---------------------------------------------------------------------------- + +-spec new(non_neg_integer()) -> #spill_cost{}. + +new(NrTemps) -> + #spill_cost{uses = hipe_bifs:array(NrTemps, 0), + bb_uses = hipe_bifs:array(NrTemps, 0)}. + +%%---------------------------------------------------------------------------- +%% Function: inc_costs +%% +%% Description: Registers usage of a list of temporaries (for spill_cost) +%%---------------------------------------------------------------------------- + +-spec inc_costs([non_neg_integer()], #spill_cost{}) -> #spill_cost{}. + +inc_costs(Temps, SC) -> + Uses = SC#spill_cost.uses, + lists:foreach(fun (T) -> inc_use(T, Uses) end, Temps), + SC. % updated via side-effects + +inc_use(Temp, Uses) -> + hipe_bifs:array_update(Uses, Temp, get_uses(Temp, Uses) + 1). + +nr_of_use(Temp, SC) -> + get_uses(Temp, SC#spill_cost.uses). + +get_uses(Temp, Uses) -> + hipe_bifs:array_sub(Uses, Temp). + +%%---------------------------------------------------------------------------- +%% Function: ref_in_bb +%% +%% Description: Registers that a set of temporaries are used in one basic +%% block; should be done exactly once per basic block +%%---------------------------------------------------------------------------- + +-spec ref_in_bb([non_neg_integer()], #spill_cost{}) -> #spill_cost{}. + +ref_in_bb(Temps, SC) -> + BBUses = SC#spill_cost.bb_uses, + lists:foreach(fun (T) -> inc_bb_use(T, BBUses) end, Temps), + SC. % updated via side-effects + +inc_bb_use(Temp, BBUses) -> + hipe_bifs:array_update(BBUses, Temp, get_bb_uses(Temp, BBUses) + 1). + +bb_use(Temp, SC) -> + get_bb_uses(Temp, SC#spill_cost.bb_uses). + +get_bb_uses(Temp, BBUses) -> + hipe_bifs:array_sub(BBUses, Temp). + +%%---------------------------------------------------------------------------- +%% Function: spill_cost +%% +%% Description: Computes a spill cost for a temporary +%% +%% Returns: +%% Spill cost (a real number -- higher means worse to spill) +%%---------------------------------------------------------------------------- + +-spec spill_cost(non_neg_integer(), #spill_cost{}) -> float(). + +spill_cost(Temp, SC) -> + nr_of_use(Temp, SC) / bb_use(Temp, SC). diff --git a/lib/hipe/regalloc/hipe_spillcost.hrl b/lib/hipe/regalloc/hipe_spillcost.hrl new file mode 100644 index 0000000000..e736a561d7 --- /dev/null +++ b/lib/hipe/regalloc/hipe_spillcost.hrl @@ -0,0 +1,27 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +-type hipe_array() :: integer(). + +-record(spill_cost, + {uses :: hipe_array(), % number of uses of each temp + bb_uses :: hipe_array() % number of basic blocks each temp occurs in + }). + diff --git a/lib/hipe/regalloc/hipe_temp_map.erl b/lib/hipe/regalloc/hipe_temp_map.erl new file mode 100644 index 0000000000..85678edd54 --- /dev/null +++ b/lib/hipe/regalloc/hipe_temp_map.erl @@ -0,0 +1,125 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2001-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%% =========================================================================== +%% Copyright (c) 2001 by Erik Johansson. All Rights Reserved +%% Time-stamp: <2008-04-20 14:54:00 richard> +%% =========================================================================== +%% Module : hipe_temp_map +%% Purpose : +%% Notes : +%% History : * 2001-07-24 Erik Johansson (happi@it.uu.se): Created. +%% =========================================================================== +%% Exports : +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +-module(hipe_temp_map). + +-export([cols2tuple/2, is_spilled/2, to_substlist/1]). + +-include("../main/hipe.hrl"). + +%%---------------------------------------------------------------------------- +%% Convert a list of [{R0, C1}, {R1, C2}, ...] to a temp_map +%% (Currently implemented as a tuple) tuple {C1, C2, ...}. +%% +%% The indices (Ri) must be unique but do not have to be sorted and +%% they can be sparse. +%% Note that the first allowed index is 0 -- this will be mapped to +%% element 1 +%%---------------------------------------------------------------------------- + +-spec cols2tuple(hipe_map(), atom()) -> hipe_temp_map(). + +cols2tuple(Map, Target) -> + ?ASSERT(check_list(Map)), + SortedMap = lists:keysort(1, Map), + cols2tuple(0, SortedMap, [], Target). + +%% sorted_cols2tuple(Map, Target) -> +%% ?ASSERT(check_list(Map)), +%% ?ASSERT(Map =:= lists:keysort(1, Map)), +%% cols2tuple(0, Map, [], Target). + +%% Build a dense mapping +cols2tuple(_, [], Vs, _) -> + %% Done reverse the list and convert to tuple. + list_to_tuple(lists:reverse(Vs)); +cols2tuple(N, [{R, C}|Ms], Vs, Target) when N =:= R -> + %% N makes sure the mapping is dense. N is he next key. + cols2tuple(N+1, Ms, [C|Vs], Target); +cols2tuple(N, SourceMapping, Vs, Target) -> + %% The source was sparse, make up some placeholders... + Val = + case Target:is_precoloured(N) of + %% If it is precoloured, we know what to map it to. + true -> {reg, N}; + false -> unknown + end, + cols2tuple(N+1, SourceMapping, [Val|Vs], Target). + +%% +%% True if temp Temp is spilled. +%% +-spec is_spilled(non_neg_integer(), hipe_temp_map()) -> boolean(). + +is_spilled(Temp, Map) -> + case element(Temp+1, Map) of + {reg, _R} -> false; + {fp_reg, _R}-> false; + {spill, _N} -> true; + unknown -> false + end. + +%% %% True if temp Temp is allocated to a reg. +%% in_reg(Temp, Map) -> +%% case element(Temp+1, Map) of +%% {reg, _R} -> true; +%% {fp_reg, _R}-> false; +%% {spill, _N} -> false; +%% unknown -> false +%% end. +%% +%% %% True if temp Temp is allocated to a fp_reg. +%% in_fp_reg(Temp, Map) -> +%% case element(Temp+1, Map) of +%% {fp_reg, _R} -> true; +%% {reg, _R} -> false; +%% {spill, _N} -> false; +%% unknown -> false +%% end. +%% +%% %% Returns the inf temp Temp is mapped to. +%% find(Temp, Map) -> element(Temp+1, Map). + + +%% +%% Converts a temp_map tuple back to a (sorted) key-list. +%% +-spec to_substlist(hipe_temp_map()) -> hipe_map(). + +to_substlist(Map) -> + T = tuple_to_list(Map), + mapping(T, 0). + +mapping([R|Rs], Temp) -> + [{Temp, R}| mapping(Rs, Temp+1)]; +mapping([], _) -> + []. diff --git a/lib/hipe/regalloc/hipe_x86_specific.erl b/lib/hipe/regalloc/hipe_x86_specific.erl new file mode 100644 index 0000000000..0f490ba14d --- /dev/null +++ b/lib/hipe/regalloc/hipe_x86_specific.erl @@ -0,0 +1,203 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2001-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +-ifdef(HIPE_AMD64). +-define(HIPE_X86_SPECIFIC, hipe_amd64_specific). +-define(HIPE_X86_RA_POSTCONDITIONS, hipe_amd64_ra_postconditions). +-define(HIPE_X86_REGISTERS, hipe_amd64_registers). +-define(HIPE_X86_LIVENESS, hipe_amd64_liveness). +-define(HIPE_X86_DEFUSE, hipe_amd64_defuse). +-else. +-define(HIPE_X86_SPECIFIC, hipe_x86_specific). +-define(HIPE_X86_RA_POSTCONDITIONS, hipe_x86_ra_postconditions). +-define(HIPE_X86_REGISTERS, hipe_x86_registers). +-define(HIPE_X86_LIVENESS, hipe_x86_liveness). +-define(HIPE_X86_DEFUSE, hipe_x86_defuse). +-endif. + +-module(?HIPE_X86_SPECIFIC). + +-export([number_of_temporaries/1]). + +%% The following exports are used as M:F(...) calls from other modules; +%% e.g. hipe_x86_ra_ls. +-export([analyze/1, + bb/2, + args/1, + labels/1, + livein/2, + liveout/2, + uses/1, + defines/1, + def_use/1, + is_arg/1, % used by hipe_ls_regalloc + is_move/1, + is_fixed/1, % used by hipe_graph_coloring_regalloc + is_global/1, + is_precoloured/1, + reg_nr/1, + non_alloc/1, + allocatable/0, + physical_name/1, + all_precoloured/0, + new_spill_index/1, % used by hipe_ls_regalloc + var_range/1, + breadthorder/1, + postorder/1, + reverse_postorder/1]). + +%% callbacks for hipe_regalloc_loop +-export([defun_to_cfg/1, + check_and_rewrite/2]). + +defun_to_cfg(Defun) -> + hipe_x86_cfg:init(Defun). + +check_and_rewrite(Defun, Coloring) -> + ?HIPE_X86_RA_POSTCONDITIONS:check_and_rewrite(Defun, Coloring, 'normal'). + +reverse_postorder(CFG) -> + hipe_x86_cfg:reverse_postorder(CFG). + +breadthorder(CFG) -> + hipe_x86_cfg:breadthorder(CFG). + +postorder(CFG) -> + hipe_x86_cfg:postorder(CFG). + +%% Globally defined registers for linear scan +is_global(R) -> + ?HIPE_X86_REGISTERS:temp1() =:= R orelse + ?HIPE_X86_REGISTERS:temp0() =:= R orelse + ?HIPE_X86_REGISTERS:is_fixed(R). + +is_fixed(R) -> + ?HIPE_X86_REGISTERS:is_fixed(R). + +is_arg(R) -> + ?HIPE_X86_REGISTERS:is_arg(R). + +args(CFG) -> + ?HIPE_X86_REGISTERS:args(hipe_x86_cfg:arity(CFG)). + +non_alloc(CFG) -> + non_alloc(?HIPE_X86_REGISTERS:nr_args(), hipe_x86_cfg:params(CFG)). + +%% same as hipe_x86_frame:fix_formals/2 +non_alloc(0, Rest) -> Rest; +non_alloc(N, [_|Rest]) -> non_alloc(N-1, Rest); +non_alloc(_, []) -> []. + +%% Liveness stuff + +analyze(CFG) -> + ?HIPE_X86_LIVENESS:analyze(CFG). + +livein(Liveness,L) -> + [X || X <- ?HIPE_X86_LIVENESS:livein(Liveness,L), + hipe_x86:temp_is_allocatable(X), + hipe_x86:temp_reg(X) =/= ?HIPE_X86_REGISTERS:fcalls(), + hipe_x86:temp_reg(X) =/= ?HIPE_X86_REGISTERS:heap_limit(), + hipe_x86:temp_type(X) =/= 'double']. + +liveout(BB_in_out_liveness,Label) -> + [X || X <- ?HIPE_X86_LIVENESS:liveout(BB_in_out_liveness,Label), + hipe_x86:temp_is_allocatable(X), + hipe_x86:temp_reg(X) =/= ?HIPE_X86_REGISTERS:fcalls(), + hipe_x86:temp_reg(X) =/= ?HIPE_X86_REGISTERS:heap_limit(), + hipe_x86:temp_type(X) =/= 'double']. + +%% Registers stuff + +allocatable() -> + ?HIPE_X86_REGISTERS:allocatable(). + +all_precoloured() -> + ?HIPE_X86_REGISTERS:all_precoloured(). + +is_precoloured(Reg) -> + ?HIPE_X86_REGISTERS:is_precoloured(Reg). + +physical_name(Reg) -> + Reg. + +%% CFG stuff + +labels(CFG) -> + hipe_x86_cfg:labels(CFG). + +var_range(_CFG) -> + hipe_gensym:var_range(x86). + +number_of_temporaries(_CFG) -> + Highest_temporary = hipe_gensym:get_var(x86), + %% Since we can have temps from 0 to Max adjust by +1. + Highest_temporary + 1. + +bb(CFG,L) -> + hipe_x86_cfg:bb(CFG,L). + +%% X86 stuff + +def_use(Instruction) -> + {[X || X <- ?HIPE_X86_DEFUSE:insn_def(Instruction), + hipe_x86:temp_is_allocatable(X), + hipe_x86:temp_type(X) =/= 'double'], + [X || X <- ?HIPE_X86_DEFUSE:insn_use(Instruction), + hipe_x86:temp_is_allocatable(X), + hipe_x86:temp_type(X) =/= 'double'] + }. + +uses(I) -> + [X || X <- ?HIPE_X86_DEFUSE:insn_use(I), + hipe_x86:temp_is_allocatable(X), + hipe_x86:temp_type(X) =/= 'double']. + +defines(I) -> + [X || X <- ?HIPE_X86_DEFUSE:insn_def(I), + hipe_x86:temp_is_allocatable(X), + hipe_x86:temp_type(X) =/= 'double']. + +is_move(Instruction) -> + case hipe_x86:is_move(Instruction) of + true -> + Src = hipe_x86:move_src(Instruction), + Dst = hipe_x86:move_dst(Instruction), + case hipe_x86:is_temp(Src) of + true -> + case hipe_x86:temp_is_allocatable(Src) of + true -> + case hipe_x86:is_temp(Dst) of + true -> + hipe_x86:temp_is_allocatable(Dst); + false -> false + end; + false -> false + end; + false -> false + end; + false -> false + end. + +reg_nr(Reg) -> + hipe_x86:temp_reg(Reg). + +new_spill_index(SpillIndex) when is_integer(SpillIndex) -> + SpillIndex+1. diff --git a/lib/hipe/regalloc/hipe_x86_specific_x87.erl b/lib/hipe/regalloc/hipe_x86_specific_x87.erl new file mode 100644 index 0000000000..7fd80b63d8 --- /dev/null +++ b/lib/hipe/regalloc/hipe_x86_specific_x87.erl @@ -0,0 +1,164 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2006-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +-ifdef(HIPE_AMD64). +-define(HIPE_X86_SPECIFIC_X87, hipe_amd64_specific_x87). +-define(HIPE_X86_LIVENESS, hipe_amd64_liveness). +-define(HIPE_X86_REGISTERS, hipe_amd64_registers). +-define(HIPE_X86_DEFUSE, hipe_amd64_defuse). +-else. +-define(HIPE_X86_SPECIFIC_X87, hipe_x86_specific_x87). +-define(HIPE_X86_LIVENESS, hipe_x86_liveness). +-define(HIPE_X86_REGISTERS, hipe_x86_registers). +-define(HIPE_X86_DEFUSE, hipe_x86_defuse). +-endif. + +-module(?HIPE_X86_SPECIFIC_X87). +-export([allocatable/0, + is_precoloured/1, + %% var_range/1, + %% def_use/1, + %% is_fixed/1, + is_arg/1, + %% non_alloc/1, + new_spill_index/1, + number_of_temporaries/1 + ]). + +%% The following exports are used as M:F(...) calls from other modules; +%% e.g. hipe_x86_ra_ls. +-export([analyze/1, + bb/2, + args/1, + labels/1, + livein/2, + liveout/2, + uses/1, + defines/1, + is_global/1, + reg_nr/1, + physical_name/1, + breadthorder/1, + postorder/1, + reverse_postorder/1]). + +breadthorder(CFG) -> + hipe_x86_cfg:breadthorder(CFG). +postorder(CFG) -> + hipe_x86_cfg:postorder(CFG). +reverse_postorder(CFG) -> + hipe_x86_cfg:reverse_postorder(CFG). + +is_global(_) -> + false. + +-ifdef(notdef). +is_fixed(_) -> + false. +-endif. + +is_arg(_) -> + false. + +args(_) -> + []. + +-ifdef(notdef). +non_alloc(_) -> + []. +-endif. + +%% Liveness stuff + +analyze(CFG) -> + ?HIPE_X86_LIVENESS:analyze(CFG). + +livein(Liveness,L) -> + [X || X <- ?HIPE_X86_LIVENESS:livein(Liveness,L), + hipe_x86:temp_is_allocatable(X), + hipe_x86:temp_type(X) =:= 'double']. + +liveout(BB_in_out_liveness,Label) -> + [X || X <- ?HIPE_X86_LIVENESS:liveout(BB_in_out_liveness,Label), + hipe_x86:temp_is_allocatable(X), + hipe_x86:temp_type(X) =:= 'double']. + +%% Registers stuff + +allocatable() -> + ?HIPE_X86_REGISTERS:allocatable_x87(). + +is_precoloured(Reg) -> + ?HIPE_X86_REGISTERS:is_precoloured_x87(Reg). + +physical_name(Reg) -> + Reg. + +%% CFG stuff + +labels(CFG) -> + hipe_x86_cfg:labels(CFG). + +-ifdef(notdef). +var_range(_CFG) -> + {Min,Max} = hipe_gensym:var_range(x86), + %% io:format("Var_range: ~w\n",[{Min,Max}]), + {Min,Max}. +-endif. + +number_of_temporaries(_CFG) -> + Highest_temporary = hipe_gensym:get_var(x86), + %% Since we can have temps from 0 to Max adjust by +1. + Highest_temporary + 1. + +bb(CFG,L) -> + hipe_x86_cfg:bb(CFG,L). + +%% X86 stuff + +-ifdef(notdef). +def_use(Instruction) -> + {[X || X <- ?HIPE_X86_DEFUSE:insn_def(Instruction), + hipe_x86:temp_is_allocatable(X), + temp_is_double(X)], + [X || X <- ?HIPE_X86_DEFUSE:insn_use(Instruction), + hipe_x86:temp_is_allocatable(X), + temp_is_double(X)] + }. +-endif. + +uses(I) -> + [X || X <- ?HIPE_X86_DEFUSE:insn_use(I), + hipe_x86:temp_is_allocatable(X), + temp_is_double(X)]. + +defines(I) -> + [X || X <- ?HIPE_X86_DEFUSE:insn_def(I), + hipe_x86:temp_is_allocatable(X), + temp_is_double(X)]. + +temp_is_double(Temp) -> + hipe_x86:temp_type(Temp) =:= 'double'. + +reg_nr(Reg) -> + hipe_x86:temp_reg(Reg). + +new_spill_index(SpillIndex) -> + SpillIndex+1. diff --git a/lib/hipe/rtl/Makefile b/lib/hipe/rtl/Makefile new file mode 100644 index 0000000000..beab8da547 --- /dev/null +++ b/lib/hipe/rtl/Makefile @@ -0,0 +1,142 @@ +# +# %CopyrightBegin% +# +# Copyright Ericsson AB 2001-2009. All Rights Reserved. +# +# The contents of this file are subject to the Erlang Public License, +# Version 1.1, (the "License"); you may not use this file except in +# compliance with the License. You should have received a copy of the +# Erlang Public License along with this software. If not, it can be +# retrieved online at http://www.erlang.org/. +# +# Software distributed under the License is distributed on an "AS IS" +# basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +# the License for the specific language governing rights and limitations +# under the License. +# +# %CopyrightEnd% +# + +ifndef EBIN +EBIN = ../ebin +endif + +include $(ERL_TOP)/make/target.mk +include $(ERL_TOP)/make/$(TARGET)/otp.mk + +# ---------------------------------------------------- +# Application version +# ---------------------------------------------------- +include ../vsn.mk +VSN=$(HIPE_VSN) + +# ---------------------------------------------------- +# Release directory specification +# ---------------------------------------------------- +RELSYSDIR = $(RELEASE_PATH)/lib/hipe-$(VSN) + +# ---------------------------------------------------- +# Target Specs +# ---------------------------------------------------- +ifdef HIPE_ENABLED +HIPE_MODULES = hipe_rtl hipe_rtl_cfg \ + hipe_rtl_liveness \ + hipe_icode2rtl hipe_rtl_mk_switch \ + hipe_rtl_primops \ + hipe_rtl_varmap hipe_rtl_exceptions \ + hipe_rtl_binary_match hipe_rtl_binary_construct \ + hipe_rtl_arith_32 hipe_rtl_arith_64 \ + hipe_rtl_ssa hipe_rtl_ssa_const_prop \ + hipe_rtl_cleanup_const hipe_rtl_symbolic hipe_rtl_lcm \ + hipe_rtl_ssapre hipe_rtl_binary hipe_rtl_ssa_avail_expr \ + hipe_rtl_arch hipe_tagscheme +else +HIPE_MODULES = +endif + +MODULES = $(HIPE_MODULES) + +HRL_FILES= hipe_literals.hrl +ERL_FILES= $(MODULES:%=%.erl) +TARGET_FILES= $(MODULES:%=$(EBIN)/%.$(EMULATOR)) + +# APP_FILE= +# App_SRC= $(APP_FILE).src +# APP_TARGET= $(EBIN)/$(APP_FILE) +# +# APPUP_FILE= +# APPUP_SRC= $(APPUP_FILE).src +# APPUP_TARGET= $(EBIN)/$(APPUP_FILE) + +# ---------------------------------------------------- +# FLAGS: Please keep +inline below +# ---------------------------------------------------- + +include ../native.mk + +ERL_COMPILE_FLAGS += +inline + +# ---------------------------------------------------- +# Targets +# ---------------------------------------------------- + +debug opt: $(TARGET_FILES) + +docs: + +clean: + rm -f hipe_literals.hrl + rm -f $(TARGET_FILES) + rm -f core erl_crash.dump + +# ---------------------------------------------------- +# Special Build Targets +# ---------------------------------------------------- + + +# ---------------------------------------------------- +# Release Target +# ---------------------------------------------------- +include $(ERL_TOP)/make/otp_release_targets.mk + +release_spec: opt + $(INSTALL_DIR) $(RELSYSDIR)/rtl + $(INSTALL_DATA) $(ERL_FILES) $(HRL_FILES) $(RELSYSDIR)/rtl + $(INSTALL_DIR) $(RELSYSDIR)/ebin + $(INSTALL_DATA) $(TARGET_FILES) $(RELSYSDIR)/ebin + +release_docs_spec: + + +HIPE_MKLITERALS=$(ERL_TOP)/bin/$(TARGET)/hipe_mkliterals + +hipe_literals.hrl: $(HIPE_MKLITERALS) + $(HIPE_MKLITERALS) -e > hipe_literals.hrl + +../main/hipe.hrl: ../vsn.mk ../main/hipe.hrl.src + sed -e "s;%VSN%;$(HIPE_VSN);" ../main/hipe.hrl.src > ../main/hipe.hrl + +$(EBIN)/hipe_rtl.beam: hipe_rtl.hrl ../main/hipe.hrl +$(EBIN)/hipe_rtl_arch.beam: hipe_rtl.hrl hipe_literals.hrl +$(EBIN)/hipe_rtl_binary.beam: hipe_rtl.hrl hipe_literals.hrl +$(EBIN)/hipe_rtl_bin_util.beam: hipe_rtl.hrl hipe_literals.hrl +$(EBIN)/hipe_rtl_cfg.beam: hipe_rtl.hrl ../flow/cfg.hrl ../flow/cfg.inc ../main/hipe.hrl +$(EBIN)/hipe_rtl_cleanup_const.beam: hipe_rtl.hrl +$(EBIN)/hipe_rtl_liveness.beam: hipe_rtl.hrl ../flow/cfg.hrl ../flow/liveness.inc +$(EBIN)/hipe_icode2rtl.beam: hipe_literals.hrl ../main/hipe.hrl ../icode/hipe_icode.hrl +$(EBIN)/hipe_tagscheme.beam: hipe_rtl.hrl hipe_literals.hrl +$(EBIN)/hipe_rtl_primops.beam: hipe_rtl.hrl ../icode/hipe_icode_primops.hrl hipe_literals.hrl ../main/hipe.hrl +$(EBIN)/hipe_rtl_arith_32.beam: ../main/hipe.hrl hipe_rtl_arith.inc +$(EBIN)/hipe_rtl_arith_64.beam: ../main/hipe.hrl hipe_rtl_arith.inc +$(EBIN)/hipe_rtl_bs_ops.beam: hipe_literals.hrl ../main/hipe.hrl +$(EBIN)/hipe_rtl_cerl_bs_ops.beam: ../main/hipe.hrl hipe_literals.hrl hipe_rtl.hrl +$(EBIN)/hipe_rtl_exceptions.beam: hipe_literals.hrl ../main/hipe.hrl +$(EBIN)/hipe_rtl_inline_bs_ops.beam: hipe_rtl.hrl hipe_literals.hrl ../main/hipe.hrl +$(EBIN)/hipe_rtl_mk_switch.beam: ../main/hipe.hrl +$(EBIN)/hipe_rtl_lcm.beam: ../flow/cfg.hrl hipe_rtl.hrl +$(EBIN)/hipe_rtl_symbolic.beam: hipe_rtl.hrl hipe_literals.hrl ../flow/cfg.hrl ../icode/hipe_icode_primops.hrl +$(EBIN)/hipe_rtl_varmap.beam: ../main/hipe.hrl ../icode/hipe_icode.hrl + +$(EBIN)/hipe_rtl_ssa.beam: ../ssa/hipe_ssa.inc ../main/hipe.hrl ../ssa/hipe_ssa_liveness.inc hipe_rtl.hrl +$(EBIN)/hipe_rtl_ssa_const_prop.beam: hipe_rtl.hrl ../main/hipe.hrl ../flow/cfg.hrl ../ssa/hipe_ssa_const_prop.inc +$(EBIN)/hipe_rtl_ssapre.beam: ../main/hipe.hrl ../flow/cfg.hrl hipe_rtl.hrl diff --git a/lib/hipe/rtl/hipe_icode2rtl.erl b/lib/hipe/rtl/hipe_icode2rtl.erl new file mode 100644 index 0000000000..034153a3cb --- /dev/null +++ b/lib/hipe/rtl/hipe_icode2rtl.erl @@ -0,0 +1,727 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2001-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%%======================================================================= +%% File : hipe_icode2rtl.erl +%% Author(s) : Erik Johansson +%% Description : Translates Icode to RTL +%%======================================================================= +%% +%% $Id$ +%% +%% TODO: Better handling of switches... + +-module(hipe_icode2rtl). + +-export([translate/2]). +-export([translate_instrs/4]). %% used in hipe_rtl_mk_switch + +%%------------------------------------------------------------------------- + +%% -define(DEBUG,1). % used by hipe.hrl below + +-include("../main/hipe.hrl"). +-include("../icode/hipe_icode.hrl"). +-include("hipe_literals.hrl"). + +%%------------------------------------------------------------------------- + +%% @spec translate(IcodeRecord::#icode{}, Options::options()) -> term() +%% +%% options() = [option()] +%% option() = term() +%% +%% @doc Translates a linear form of Icode for a single function to a +%% linear form of RTL-code. +%% +translate(IcodeRecord = #icode{}, Options) -> + ?IF_DEBUG_LEVEL(2, put(hipe_mfa, hipe_icode:icode_fun(IcodeRecord)), ok), + %% hipe_icode_pp:pp(Fun), + + %% Initialize gensym and varmap + {Args, VarMap} = hipe_rtl_varmap:init(IcodeRecord), + %% Get the name and other info of the function to translate. + MFA = hipe_icode:icode_fun(IcodeRecord), + ConstTab = hipe_consttab:new(), % hipe_icode:icode_data(IcodeRecord), + %% io:format("~w\n", [ConstTab]), + Icode = hipe_icode:icode_code(IcodeRecord), + IsClosure = hipe_icode:icode_is_closure(IcodeRecord), + IsLeaf = hipe_icode:icode_is_leaf(IcodeRecord), + IcodeInfo = hipe_icode:icode_info(IcodeRecord), + + %% Translate Icode instructions to RTL instructions + ?opt_start_timer("Icode to nested RTL"), + {Code, _VarMap1, ConstTab1} = + translate_instrs(Icode, VarMap, ConstTab, Options), + ?opt_stop_timer("Icode to nested RTL"), + %% We build the code as list of lists of... + %% in order to avoid appends. + ?opt_start_timer("Flatten RTL"), + Code1 = lists:flatten(Code), + ?opt_stop_timer("Flatten RTL"), + %% Build the RTL structure. + Rtl = hipe_rtl:mk_rtl(MFA, + Args, + IsClosure, + IsLeaf, + Code1, + ConstTab1, + {1, hipe_gensym:get_var(rtl)}, + {1, hipe_gensym:get_label(rtl)}), + %% hipe_rtl:pp(Rtl), + %% Propagate info from Icode to RTL. + hipe_rtl:rtl_info_update(Rtl, IcodeInfo). + +%%------------------------------------------------------------------------- + +%% +%% @doc Translates a list of Icode instructions to a list of RTL instructions. +%% +translate_instrs(Is, VarMap, ConstTab, Options) -> + translate_instrs(Is, VarMap, [], ConstTab, Options). + +translate_instrs([], VarMap, RTL_Code, ConstTab, _Options) -> + {RTL_Code, VarMap, ConstTab}; +translate_instrs([I|Is], VarMap, AccCode, ConstTab, Options) -> + %% Translate one instruction. + {Code, VarMap0, ConstTab0} = + translate_instruction(I, VarMap, ConstTab, Options), + %% ?IF_DEBUG_LEVEL(3,?msg(" To Instr: ~w~n",[Code]),no_debug), + ?IF_DEBUG(?when_option(rtl_show_translation, Options, + ?msg(" To Instr: ~w~n", [Code])), ok), + translate_instrs(Is, VarMap0, [AccCode,Code], ConstTab0, Options). + +%% +%% @doc Translates an Icode instruction to one or more RTL instructions. +%% + +translate_instruction(I, VarMap, ConstTab, Options) -> + %% ?IF_DEBUG_LEVEL(3,?msg("From Instr: ~w~n",[I]),no_debug), + ?IF_DEBUG(?when_option(rtl_show_translation, Options, + ?msg("From Instr: ~w~n", [I])), ok), + case I of + #icode_call{} -> + gen_call(I, VarMap, ConstTab); + #icode_comment{} -> + {hipe_rtl:mk_comment(hipe_icode:comment_text(I)), VarMap, ConstTab}; + #icode_enter{} -> + gen_enter(I, VarMap, ConstTab); + #icode_fail{} -> + gen_fail(I, VarMap, ConstTab); + #icode_goto{} -> + gen_goto(I, VarMap, ConstTab); + #icode_if{} -> + gen_if(I, VarMap, ConstTab); + #icode_label{} -> + gen_label(I, VarMap, ConstTab); + #icode_move{} -> + gen_move(I, VarMap, ConstTab); + #icode_begin_handler{} -> + hipe_rtl_exceptions:gen_begin_handler(I, VarMap, ConstTab); + #icode_return{} -> + gen_return(I, VarMap, ConstTab); + #icode_switch_val{} -> + gen_switch_val(I, VarMap, ConstTab, Options); + #icode_switch_tuple_arity{} -> + gen_switch_tuple(I, VarMap, ConstTab, Options); + #icode_type{} -> + gen_type(I, VarMap, ConstTab); + X -> + exit({?MODULE,{"unknown Icode instruction",X}}) + end. + +%%------------------------------------------------------------------------- + +%% +%% CALL +%% + +gen_call(I, VarMap, ConstTab) -> + Fun = hipe_icode:call_fun(I), + {Dst, VarMap0} = hipe_rtl_varmap:ivs2rvs(hipe_icode:call_dstlist(I), VarMap), + Fail = hipe_icode:call_fail_label(I), + + {Args, VarMap1, ConstTab1, InitCode} = + args_to_vars(hipe_icode:call_args(I), VarMap0, ConstTab), + + IsGuard = hipe_icode:call_in_guard(I), + + {FailLblName, VarMap3} = + case Fail of + [] -> %% Not in a catch + {[], VarMap1}; + _ -> + {FLbl, VarMap2} = + hipe_rtl_varmap:icode_label2rtl_label(Fail, VarMap1), + {hipe_rtl:label_name(FLbl), VarMap2} + end, + + {ContLblName, ContLbl, VarMap4} = + case hipe_icode:call_continuation(I) of + [] -> %% This call does not end a BB. + CLbl = hipe_rtl:mk_new_label(), + {hipe_rtl:label_name(CLbl), CLbl, VarMap3}; + Cont -> + {CLbl, NewVarMap} = + hipe_rtl_varmap:icode_label2rtl_label(Cont, VarMap3), + {hipe_rtl:label_name(CLbl), [], NewVarMap} + end, + + {Code, ConstTab2} = + case hipe_icode:call_type(I) of + primop -> + hipe_rtl_primops:gen_primop( + {Fun, Dst, Args, ContLblName, FailLblName}, + IsGuard, ConstTab1); + Type -> + Call = gen_call_1(Fun, Dst, Args, IsGuard, ContLblName, + FailLblName, Type), + {Call, ConstTab1} + end, + {[InitCode,Code,ContLbl], VarMap4, ConstTab2}. + +%% This catches those standard functions that we inline expand + +gen_call_1(Fun={_M,_F,_A}, Dst, Args, IsGuard, Cont, Fail, Type) -> + case hipe_rtl_primops:gen_call_builtin(Fun, Dst, Args, IsGuard, Cont, + Fail) of + [] -> + hipe_rtl:mk_call(Dst, Fun, Args, Cont, Fail, conv_call_type(Type)); + Code -> + Code + end. + +conv_call_type(remote) -> remote; +conv_call_type(local) -> not_remote. + +%% -------------------------------------------------------------------- + +%% +%% ENTER +%% + +gen_enter(I, VarMap, ConstTab) -> + Fun = hipe_icode:enter_fun(I), + {Args, VarMap1, ConstTab1, InitCode} = + args_to_vars(hipe_icode:enter_args(I), VarMap, ConstTab), + {Code1, ConstTab2} = + case hipe_icode:enter_type(I) of + primop -> + IsGuard = false, % enter can not happen in a guard + hipe_rtl_primops:gen_enter_primop({Fun, Args}, IsGuard, ConstTab1); + Type -> + Call = gen_enter_1(Fun, Args, Type), + {Call, ConstTab1} + end, + {[InitCode,Code1], VarMap1, ConstTab2}. + +%% This catches those standard functions that we inline expand + +gen_enter_1(Fun, Args, Type) -> + case hipe_rtl_primops:gen_enter_builtin(Fun, Args) of + [] -> + hipe_rtl:mk_enter(Fun, Args, conv_call_type(Type)); + Code -> + Code + end. + +%% -------------------------------------------------------------------- + +%% +%% FAIL +%% + +gen_fail(I, VarMap, ConstTab) -> + Fail = hipe_icode:fail_label(I), + {Label, VarMap0} = + if Fail =:= [] -> + %% not in a catch + {[], VarMap}; + true -> + {Lbl, Map} = hipe_rtl_varmap:icode_label2rtl_label(Fail, VarMap), + {hipe_rtl:label_name(Lbl), Map} + end, + {Args, VarMap1, ConstTab1, InitCode} = + args_to_vars(hipe_icode:fail_args(I), VarMap0, ConstTab), + Class = hipe_icode:fail_class(I), + FailCode = hipe_rtl_exceptions:gen_fail(Class, Args, Label), + {[InitCode, FailCode], VarMap1, ConstTab1}. + +%% -------------------------------------------------------------------- + +%% +%% GOTO +%% + +gen_goto(I, VarMap, ConstTab) -> + {Label, Map0} = + hipe_rtl_varmap:icode_label2rtl_label(hipe_icode:goto_label(I), VarMap), + {hipe_rtl:mk_goto(hipe_rtl:label_name(Label)), Map0, ConstTab}. + +%% -------------------------------------------------------------------- + +%% +%% IF +%% + +gen_if(I, VarMap, ConstTab) -> + {Args, VarMap1, ConstTab1, InitCode} = + args_to_vars(hipe_icode:if_args(I), VarMap, ConstTab), + {TrueLbl, VarMap2} = + hipe_rtl_varmap:icode_label2rtl_label(hipe_icode:if_true_label(I), VarMap1), + {FalseLbl, VarMap3} = + hipe_rtl_varmap:icode_label2rtl_label(hipe_icode:if_false_label(I),VarMap2), + CondCode = + gen_cond(hipe_icode:if_op(I), + Args, + hipe_rtl:label_name(TrueLbl), + hipe_rtl:label_name(FalseLbl), + hipe_icode:if_pred(I)), + {[InitCode,CondCode], VarMap3, ConstTab1}. + + +%% -------------------------------------------------------------------- + +%% +%% LABEL +%% + +gen_label(I, VarMap, ConstTab) -> + LabelName = hipe_icode:label_name(I), + {NewLabel,Map0} = hipe_rtl_varmap:icode_label2rtl_label(LabelName, VarMap), + {NewLabel,Map0,ConstTab}. + +%% -------------------------------------------------------------------- + +%% +%% MOVE +%% + +gen_move(I, VarMap, ConstTab) -> + MovedSrc = hipe_icode:move_src(I), + {Dst, VarMap0} = + hipe_rtl_varmap:icode_var2rtl_var(hipe_icode:move_dst(I), VarMap), + case hipe_icode:is_const(MovedSrc) of + true -> + {Code, NewConstMap} = gen_const_move(Dst, MovedSrc, ConstTab), + {[Code], VarMap0, NewConstMap}; + false -> + {Src, VarMap1} = hipe_rtl_varmap:icode_var2rtl_var(MovedSrc, VarMap0), + Code = + case hipe_icode:is_fvar(MovedSrc) of + true -> + hipe_rtl:mk_fmove(Dst, Src); + false -> % It is a var or reg + hipe_rtl:mk_move(Dst, Src) + end, + {[Code], VarMap1, ConstTab} + end. + +%% -------------------------------------------------------------------- + +%% +%% RETURN +%% + +gen_return(I, VarMap, ConstTab) -> + {RetVars, VarMap0, ConstTab0, Code} = + args_to_vars(hipe_icode:return_vars(I), VarMap, ConstTab), + {Code ++ [hipe_rtl:mk_return(RetVars)], VarMap0, ConstTab0}. + +%% -------------------------------------------------------------------- + +%% +%% SWITCH +%% + +%% +%% Rewrite switch_val to the equivalent Icode if-then-else sequence, +%% then translate that sequence instead. +%% Doing this at the RTL level would generate the exact same code, +%% but would also require _a_lot_ more work. +%% (Don't believe me? Try it. I did, and threw the code away in disgust. +%% The main ugliness comes from (1) maintaining ConstTab for the constants +%% that may be added there [switch_val is not limited to immediates!], +%% (2) maintaining Map for the translated labels, and (3) expanding +%% equality tests to eq-or-call-primop-exact_eqeq_2.) +%% +%% TODO: +%% - separate immediate and non-immediate cases, +%% and translate each list separately +%% +-ifdef(usesjumptable). +-define(uumess,?msg("~w Use jtab: ~w\n", + [Options,proplists:get_bool(use_jumptable, Options)])). +-else. +-define(uumess,ok). +-endif. + +gen_switch_val(I, VarMap, ConstTab, Options) -> + %% If you want to see whether jumptables are used or not... + ?uumess, + hipe_rtl_mk_switch:gen_switch_val(I, VarMap, ConstTab, Options). + +gen_switch_tuple(I, Map, ConstTab, Options) -> + hipe_rtl_mk_switch:gen_switch_tuple(I, Map, ConstTab, Options). + +%% -------------------------------------------------------------------- + +%% +%% TYPE +%% + +gen_type(I, VarMap, ConstTab) -> + {Vars, Map0, NewConstTab, Code1} = + args_to_vars(hipe_icode:type_args(I), VarMap, ConstTab), + {TrueLbl, Map1} = + hipe_rtl_varmap:icode_label2rtl_label(hipe_icode:type_true_label(I), Map0), + {FalseLbl, Map2} = + hipe_rtl_varmap:icode_label2rtl_label(hipe_icode:type_false_label(I), Map1), + {Code2, NewConstTab1} = gen_type_test(Vars, hipe_icode:type_test(I), + hipe_rtl:label_name(TrueLbl), + hipe_rtl:label_name(FalseLbl), + hipe_icode:type_pred(I), + NewConstTab), + {Code1 ++ Code2, Map2, NewConstTab1}. + +%% -------------------------------------------------------------------- + +%% +%% Generate code for a type test. If X is not of type Type then goto Label. +%% + +gen_type_test([X], Type, TrueLbl, FalseLbl, Pred, ConstTab) -> + case Type of + atom -> + {hipe_tagscheme:test_atom(X, TrueLbl, FalseLbl, Pred), ConstTab}; + bignum -> + {hipe_tagscheme:test_bignum(X, TrueLbl, FalseLbl, Pred), ConstTab}; + binary -> + {hipe_tagscheme:test_binary(X, TrueLbl, FalseLbl, Pred), ConstTab}; + bitstr -> + {hipe_tagscheme:test_bitstr(X, TrueLbl, FalseLbl, Pred), ConstTab}; + boolean -> + TmpT = hipe_rtl:mk_new_var(), + TmpF = hipe_rtl:mk_new_var(), + Lbl = hipe_rtl:mk_new_label(), + {[hipe_rtl:mk_load_atom(TmpT, true), + hipe_rtl:mk_branch(X, eq, TmpT, TrueLbl,hipe_rtl:label_name(Lbl),Pred), + Lbl, + hipe_rtl:mk_load_atom(TmpF, false), + hipe_rtl:mk_branch(X, eq, TmpF, TrueLbl, FalseLbl, Pred)], ConstTab}; + cons -> + {hipe_tagscheme:test_cons(X, TrueLbl, FalseLbl, Pred), ConstTab}; + constant -> + {hipe_tagscheme:test_constant(X, TrueLbl, FalseLbl, Pred), ConstTab}; + fixnum -> + {hipe_tagscheme:test_fixnum(X, TrueLbl, FalseLbl, Pred), ConstTab}; + float -> + {hipe_tagscheme:test_flonum(X, TrueLbl, FalseLbl, Pred), ConstTab}; + function -> + {hipe_tagscheme:test_fun(X, TrueLbl, FalseLbl, Pred), ConstTab}; + integer -> + {hipe_tagscheme:test_integer(X, TrueLbl, FalseLbl, Pred), ConstTab}; + list -> + {hipe_tagscheme:test_list(X, TrueLbl, FalseLbl, Pred), ConstTab}; + nil -> + {hipe_tagscheme:test_nil(X, TrueLbl, FalseLbl, Pred), ConstTab}; + number -> + {hipe_tagscheme:test_number(X, TrueLbl, FalseLbl, Pred), ConstTab}; + pid -> + {hipe_tagscheme:test_any_pid(X, TrueLbl, FalseLbl, Pred), ConstTab}; + port -> + {hipe_tagscheme:test_any_port(X, TrueLbl, FalseLbl, Pred), ConstTab}; + reference -> + {hipe_tagscheme:test_ref(X, TrueLbl, FalseLbl, Pred), ConstTab}; + tuple -> + {hipe_tagscheme:test_tuple(X, TrueLbl, FalseLbl, Pred), ConstTab}; + {atom, Atom} -> + Tmp = hipe_rtl:mk_new_var(), + {[hipe_rtl:mk_load_atom(Tmp, Atom), + hipe_rtl:mk_branch(X, eq, Tmp, TrueLbl, FalseLbl, Pred)], ConstTab}; + {integer, N} when is_integer(N) -> + %% XXX: warning, does not work for bignums + case hipe_tagscheme:is_fixnum(N) of + true -> + Int = hipe_tagscheme:mk_fixnum(N), + {hipe_rtl:mk_branch(X, eq, hipe_rtl:mk_imm(Int), + TrueLbl, FalseLbl, Pred), + ConstTab}; + false -> + BignumLbl = hipe_rtl:mk_new_label(), + RetLbl = hipe_rtl:mk_new_label(), + BigN = hipe_rtl:mk_new_var(), + Tmp = hipe_rtl:mk_new_var(), + {BigCode,NewConstTab} = gen_big_move(BigN, N, ConstTab), + {[hipe_tagscheme:test_fixnum(X, FalseLbl, + hipe_rtl:label_name(BignumLbl),1-Pred), + BignumLbl, BigCode] + ++ + [hipe_rtl:mk_call([Tmp], op_exact_eqeq_2 , [X,BigN], + hipe_rtl:label_name(RetLbl),[],not_remote), + RetLbl, + hipe_rtl:mk_branch(Tmp, ne, hipe_rtl:mk_imm(0), + TrueLbl, FalseLbl, Pred)], + NewConstTab} + end; + {record, A, S} -> + TupleLbl = hipe_rtl:mk_new_label(), + TupleLblName = hipe_rtl:label_name(TupleLbl), + AtomLab = hipe_rtl:mk_new_label(), + AtomLabName = hipe_rtl:label_name(AtomLab), + TagVar = hipe_rtl:mk_new_var(), + TmpAtomVar = hipe_rtl:mk_new_var(), + {UntagCode, ConstTab1} = + hipe_rtl_primops:gen_primop({{unsafe_element,1},[TagVar],[X], + AtomLabName,[]}, + false, ConstTab), + Code = + hipe_tagscheme:test_tuple_N(X, S, TupleLblName, FalseLbl, Pred) ++ + [TupleLbl|UntagCode] ++ + [AtomLab, + hipe_rtl:mk_load_atom(TmpAtomVar, A), + hipe_rtl:mk_branch(TagVar, eq, TmpAtomVar, TrueLbl, FalseLbl, Pred)], + {Code, + ConstTab1}; + {tuple, N} -> + {hipe_tagscheme:test_tuple_N(X, N, TrueLbl, FalseLbl, Pred), ConstTab}; + Other -> + exit({?MODULE,{"unknown type",Other}}) + end; +gen_type_test(Z = [X,Y], Type, TrueLbl, FalseLbl, Pred, ConstTab) -> + case Type of + function2 -> + {hipe_tagscheme:test_fun2(X, Y, TrueLbl, FalseLbl, Pred), ConstTab}; + fixnum -> + {hipe_tagscheme:test_fixnums(Z, TrueLbl, FalseLbl, Pred), ConstTab}; + Other -> + exit({?MODULE,{"unknown type",Other}}) + end; +gen_type_test(X, Type, TrueLbl, FalseLbl, Pred, ConstTab) -> + case Type of + fixnum -> + {hipe_tagscheme:test_fixnums(X, TrueLbl, FalseLbl, Pred), ConstTab}; + Other -> + exit({?MODULE,{"type cannot have several arguments",Other}}) + end. + + +%% -------------------------------------------------------------------- +%% +%% Generate code for the if-conditional. +%% + +gen_cond(CondOp, Args, TrueLbl, FalseLbl, Pred) -> + Tmp = hipe_rtl:mk_new_reg_gcsafe(), + GenLbl = hipe_rtl:mk_new_label(), + TestRetLbl = hipe_rtl:mk_new_label(), + TestRetName = hipe_rtl:label_name(TestRetLbl), + + case CondOp of + 'fixnum_eq' -> + [Arg1, Arg2] = Args, + [hipe_rtl:mk_branch(Arg1, eq, Arg2, TrueLbl, + FalseLbl, Pred)]; + '=:=' -> + [Arg1, Arg2] = Args, + [hipe_rtl:mk_branch(Arg1, eq, Arg2, TrueLbl, + hipe_rtl:label_name(GenLbl), Pred), + GenLbl, + hipe_rtl:mk_call([Tmp], op_exact_eqeq_2, Args, + TestRetName, [], not_remote), + TestRetLbl, + hipe_rtl:mk_branch(Tmp, ne, hipe_rtl:mk_imm(0), + TrueLbl, FalseLbl, Pred)]; + 'fixnum_neq' -> + [Arg1, Arg2] = Args, + [hipe_rtl:mk_branch(Arg1, eq, Arg2, FalseLbl, + TrueLbl, 1-Pred)]; + '=/=' -> + [Arg1, Arg2] = Args, + [hipe_rtl:mk_branch(Arg1, eq, Arg2, FalseLbl, + hipe_rtl:label_name(GenLbl), 1-Pred), + GenLbl, + hipe_rtl:mk_call([Tmp], op_exact_eqeq_2, Args, + TestRetName, [], not_remote), + TestRetLbl, + hipe_rtl:mk_branch(Tmp, ne, hipe_rtl:mk_imm(0), + FalseLbl, TrueLbl, Pred)]; + '==' -> + [Arg1, Arg2] = Args, + [hipe_rtl:mk_branch(Arg1, eq, Arg2, + TrueLbl, hipe_rtl:label_name(GenLbl), Pred), + GenLbl, + hipe_rtl:mk_call([Tmp], cmp_2, Args, TestRetName, [], not_remote), + TestRetLbl, + hipe_rtl:mk_branch(Tmp, eq, hipe_rtl:mk_imm(0), + TrueLbl, FalseLbl, Pred)]; + '/=' -> + [Arg1, Arg2] = Args, + [hipe_rtl:mk_branch(Arg1, eq, Arg2, + FalseLbl, hipe_rtl:label_name(GenLbl), 1-Pred), + GenLbl, + hipe_rtl:mk_call([Tmp], cmp_2, Args, TestRetName, [], not_remote), + TestRetLbl, + hipe_rtl:mk_branch(Tmp, ne, hipe_rtl:mk_imm(0), + TrueLbl, FalseLbl, Pred)]; + 'fixnum_gt' -> + [Arg1, Arg2] = Args, + [hipe_tagscheme:fixnum_gt(Arg1, Arg2, TrueLbl, FalseLbl, Pred)]; + 'fixnum_ge' -> + [Arg1, Arg2] = Args, + [hipe_tagscheme:fixnum_ge(Arg1, Arg2, TrueLbl, FalseLbl, Pred)]; + 'fixnum_lt' -> + [Arg1, Arg2] = Args, + [hipe_tagscheme:fixnum_lt(Arg1, Arg2, TrueLbl, FalseLbl, Pred)]; + 'fixnum_le' -> + [Arg1, Arg2] = Args, + [hipe_tagscheme:fixnum_le(Arg1, Arg2, TrueLbl, FalseLbl, Pred)]; + '>' -> + [Arg1, Arg2] = Args, + [hipe_tagscheme:test_two_fixnums(Arg1, Arg2, + hipe_rtl:label_name(GenLbl)), + hipe_tagscheme:fixnum_gt(Arg1, Arg2, TrueLbl, FalseLbl, Pred), + GenLbl, + hipe_rtl:mk_call([Tmp], cmp_2, Args, TestRetName, [], not_remote), + TestRetLbl, + hipe_rtl:mk_branch(Tmp, gt, hipe_rtl:mk_imm(0), + TrueLbl, FalseLbl, Pred)]; + '<' -> + [Arg1, Arg2] = Args, + [hipe_tagscheme:test_two_fixnums(Arg1, Arg2, + hipe_rtl:label_name(GenLbl)), + hipe_tagscheme:fixnum_lt(Arg1, Arg2, TrueLbl, FalseLbl, Pred), + GenLbl, + hipe_rtl:mk_call([Tmp], cmp_2, Args, TestRetName, [], not_remote), + TestRetLbl, + hipe_rtl:mk_branch(Tmp, lt, hipe_rtl:mk_imm(0), + TrueLbl, FalseLbl, Pred)]; + '>=' -> + [Arg1, Arg2] = Args, + [hipe_tagscheme:test_two_fixnums(Arg1, Arg2, + hipe_rtl:label_name(GenLbl)), + hipe_tagscheme:fixnum_ge(Arg1, Arg2, TrueLbl, FalseLbl, Pred), + GenLbl, + hipe_rtl:mk_call([Tmp], cmp_2, Args, TestRetName, [], not_remote), + TestRetLbl, + hipe_rtl:mk_branch(Tmp, ge, hipe_rtl:mk_imm(0), + TrueLbl, FalseLbl, Pred)]; + '=<' -> + [Arg1, Arg2] = Args, + [hipe_tagscheme:test_two_fixnums(Arg1, Arg2, + hipe_rtl:label_name(GenLbl)), + hipe_tagscheme:fixnum_le(Arg1, Arg2, TrueLbl, FalseLbl, Pred), + GenLbl, + hipe_rtl:mk_call([Tmp], cmp_2, Args, TestRetName, [], not_remote), + TestRetLbl, + hipe_rtl:mk_branch(Tmp, le, hipe_rtl:mk_imm(0), + TrueLbl, FalseLbl, Pred)]; + _Other -> + [hipe_rtl:mk_call([Tmp], CondOp, Args, TestRetName, [], not_remote), + TestRetLbl, + hipe_rtl:mk_branch(Tmp, ne, hipe_rtl:mk_imm(0), + TrueLbl, FalseLbl, Pred)] + end. + +%% -------------------------------------------------------------------- +%% +%% Translate a list argument list of icode vars to rtl vars. Also +%% handles constants in arguments. +%% + +args_to_vars([Arg|Args],VarMap, ConstTab) -> + {Vars, VarMap1, ConstTab1, Code} = + args_to_vars(Args, VarMap, ConstTab), + case hipe_icode:is_variable(Arg) of + true -> + {Var, VarMap2} = hipe_rtl_varmap:icode_var2rtl_var(Arg, VarMap1), + {[Var|Vars], VarMap2, ConstTab1, Code}; + false -> + case type_of_const(Arg) of + big -> + ConstVal = hipe_icode:const_value(Arg), + {ConstTab2, Label} = hipe_consttab:insert_term(ConstTab1, ConstVal), + NewArg = hipe_rtl:mk_const_label(Label), + {[NewArg|Vars], VarMap1, ConstTab2, Code}; + fixnum -> + ConstVal = hipe_icode:const_value(Arg), + NewArg = hipe_rtl:mk_imm(tagged_val_of(ConstVal)), + {[NewArg|Vars], VarMap1, ConstTab1, Code}; + nil -> + NewArg = hipe_rtl:mk_imm(tagged_val_of([])), + {[NewArg|Vars], VarMap1, ConstTab1, Code}; + _ -> + Var = hipe_rtl:mk_new_var(), + {Code2, ConstTab2} = gen_const_move(Var, Arg, ConstTab1), + {[Var|Vars], VarMap1, ConstTab2, [Code2,Code]} + end + end; +args_to_vars([], VarMap, ConstTab) -> + {[], VarMap, ConstTab, []}. + +%% -------------------------------------------------------------------- + +%% +%% Translate a move where the source is a constant +%% + +gen_const_move(Dst, Const, ConstTab) -> + ConstVal = hipe_icode:const_value(Const), + case type_of_const(Const) of + %% const_fun -> + %% gen_fun_move(Dst, ConstVal, ConstTab); + nil -> + Src = hipe_rtl:mk_imm(tagged_val_of([])), + {hipe_rtl:mk_move(Dst, Src), ConstTab}; + fixnum -> + Src = hipe_rtl:mk_imm(tagged_val_of(ConstVal)), + {hipe_rtl:mk_move(Dst, Src), ConstTab}; + atom -> + {hipe_rtl:mk_load_atom(Dst, ConstVal), ConstTab}; + big -> + gen_big_move(Dst, ConstVal, ConstTab) + end. + +%% gen_fun_move(Dst, Fun, ConstTab) -> +%% ?WARNING_MSG("Funmove ~w! -- NYI\n", [Fun]), +%% {NewTab, Label} = hipe_consttab:insert_fun(ConstTab, Fun), +%% {hipe_rtl:mk_load_address(Dst, Label, constant), NewTab}. + +gen_big_move(Dst, Big, ConstTab) -> + {NewTab, Label} = hipe_consttab:insert_term(ConstTab, Big), + {hipe_rtl:mk_move(Dst, hipe_rtl:mk_const_label(Label)), + NewTab}. + +type_of_const(Const) -> + case hipe_icode:const_value(Const) of + [] -> + nil; + X when is_integer(X) -> + case hipe_tagscheme:is_fixnum(X) of + true -> fixnum; + false -> big + end; + A when is_atom(A) -> + atom; + _ -> + big + end. + +tagged_val_of([]) -> hipe_tagscheme:mk_nil(); +tagged_val_of(X) when is_integer(X) -> hipe_tagscheme:mk_fixnum(X). diff --git a/lib/hipe/rtl/hipe_rtl.erl b/lib/hipe/rtl/hipe_rtl.erl new file mode 100644 index 0000000000..ef06b2abf8 --- /dev/null +++ b/lib/hipe/rtl/hipe_rtl.erl @@ -0,0 +1,1655 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2001-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% @doc +%% +%% Provides an abstract datatype for HiPE's RTL (Register Transfer Language). +%% +%%

RTL - Register Transfer Language

+%% +%% Consists of the instructions: +%%
    +%%
  • {alu, Dst, Src1, Op, Src2}
  • +%%
  • {alub, Dst, Src1, Op, Src2, RelOp, TrueLabel, FalseLabel, P}
  • +%%
  • {branch, Src1, Src2, RelOp, TrueLabel, FalseLabel, P}
  • +%%
  • {call, DsListt, Fun, ArgList, Type, Continuation, FailContinuation} +%% Type is one of {local, remote, primop, closure}
  • +%%
  • {comment, Text}
  • +%%
  • {enter, Fun, ArgList, Type} +%% Type is one of {local, remote, primop, closure}
  • +%%
  • {fconv, Dst, Src}
  • +%%
  • {fload, Dst, Src, Offset}
  • +%%
  • {fmove, Dst, Src}
  • +%%
  • {fp, Dst, Src1, Op, Src2}
  • +%%
  • {fp_unop, Dst, Src, Op}
  • +%%
  • {fstore, Base, Offset, Src}
  • +%%
  • {gctest, Words}
  • +%%
  • {goto, Label}
  • +%%
  • {goto_index, Block, Index, LabelList}
  • +%%
  • {label, Name}
  • +%%
  • {load, Dst, Src, Offset, Size, Sign}
  • +%%
  • {load_address, Dst, Addr, Type}
  • +%%
  • {load_atom, Dst, Atom}
  • +%%
  • {load_word_index, Dst, Block, Index}
  • +%%
  • {move, Dst, Src}
  • +%%
  • {multimove, [Dst1, ..., DstN], [Src1, ..., SrcN]}
  • +%%
  • {phi, Dst, Id, [Src1, ..., SrcN]}
  • +%%
  • {return, VarList}
  • +%%
  • {store, Base, Offset, Src, Size}
  • +%%
  • {switch, Src1, Labels, SortedBy}
  • +%%
+%% +%% There are three kinds of 'registers' in RTL. +%%
    +%%
  1. Variables containing tagged data that are traced by the GC.
  2. +%%
  3. Registers that are ignored by the GC.
  4. +%%
  5. Floating point registers.
  6. +%%
+%% These registers all share the same namespace. +%% +%% IMPORTANT: +%% +%% The variables contain tagged Erlang terms, the registers +%% contain untagged values (that can be all sorts of things) and +%% the floating point registers contain untagged floating point +%% values. This means that the different kinds of 'registers' are +%% incompatible and CANNOT be assigned to each other unless the +%% proper conversions are made. +%% +%% When performing optimizations, it is reasonably safe to move +%% values stored in variables. However, when moving around untagged +%% values from either registers or floating point registers make +%% sure you know what you are doing. +%% +%% Example 1: A register might contain the untagged pointer to +%% something on the heap. If this value is moved across +%% a program point where a garbage collection might +%% occur, the pointer can be invalid. If you are lucky +%% you will end up with a segmentation fault; if unlucky, +%% you will be stuck on a wild goose chase. +%% +%% Example 2: Floating point arithmetic instructions must occur in +%% a floating point block. Otherwise, exceptions can be +%% masked. +%% +%% @end +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +-module(hipe_rtl). +-include("../main/hipe.hrl"). + +-export([mk_rtl/8, + rtl_fun/1, + rtl_params/1, + rtl_is_closure/1, + rtl_is_leaf/1, + rtl_code/1, + rtl_code_update/2, + rtl_data/1, + %% rtl_data_update/2, + %% rtl_var_range/1, + %% rtl_var_range_update/2, + %% rtl_label_range/1, + %% rtl_label_range_update/2, + rtl_info/1, + rtl_info_update/2]). + +-export([mk_move/2, + move_dst/1, + move_src/1, + %% move_src_update/2, + %% is_move/1, + + mk_multimove/2, + multimove_dstlist/1, + multimove_srclist/1, + %% multimove_srclist_update/2, + %% is_multimove/1, + + mk_phi/1, + phi_dst/1, + phi_id/1, + phi_arg/2, + phi_arglist/1, + is_phi/1, + phi_enter_pred/3, + phi_remove_pred/2, + + mk_alu/4, + alu_dst/1, + alu_src1/1, + alu_src1_update/2, + alu_src2/1, + alu_src2_update/2, + alu_op/1, + %% is_alu_op/1, + is_shift_op/1, + + mk_load/3, + mk_load/5, + load_dst/1, + load_src/1, + load_offset/1, + load_size/1, + load_sign/1, + + mk_load_atom/2, + load_atom_dst/1, + load_atom_atom/1, + + mk_load_word_index/3, + load_word_index_dst/1, + %% load_word_index_index/1, + %% load_word_index_block/1, + + mk_goto_index/3, + goto_index_index/1, + %% goto_index_block/1, + goto_index_labels/1, + + mk_load_address/3, + load_address_dst/1, + %% load_address_dst_update/2, + load_address_addr/1, + load_address_addr_update/2, + load_address_type/1, + %% load_address_type_update/2, + + mk_store/3, + mk_store/4, + store_base/1, + store_src/1, + store_offset/1, + store_size/1, + + mk_label/1, + mk_new_label/0, + label_name/1, + is_label/1, + + mk_branch/5, + mk_branch/6, + branch_src1/1, + branch_src2/1, + branch_cond/1, + branch_true_label/1, + branch_false_label/1, + branch_pred/1, + %% is_branch/1, + %% branch_true_label_update/2, + %% branch_false_label_update/2, + + mk_alub/7, + mk_alub/8, + alub_dst/1, + alub_src1/1, + alub_op/1, + alub_src2/1, + alub_cond/1, + alub_true_label/1, + %% alub_true_label_update/2, + alub_false_label/1, + %% alub_false_label_update/2, + alub_pred/1, + %% is_alub/1, + + mk_switch/2, + %% mk_switch/3, + mk_sorted_switch/3, + switch_src/1, + %% switch_src_update/2, + switch_labels/1, + %% switch_labels_update/2, + switch_sort_order/1, + %% switch_sort_order_update/2, + + mk_goto/1, + goto_label/1, + is_goto/1, + %% goto_label_update/2, + + mk_call/6, + call_fun/1, + call_dstlist/1, + call_dstlist_update/2, + call_arglist/1, + call_continuation/1, + call_fail/1, + call_type/1, + %% call_continuation_update/2, + %% call_fail_update/2, + is_call/1, + + mk_enter/3, + enter_fun/1, + enter_arglist/1, + enter_type/1, + + mk_return/1, + return_varlist/1, + + mk_gctest/1, + gctest_words/1, + + mk_comment/1, + comment_text/1, + is_comment/1, + + mk_fload/3, + fload_dst/1, + fload_src/1, + %% fload_src_update/2, + fload_offset/1, + %% fload_offset_update/2, + + mk_fstore/3, + fstore_base/1, + fstore_src/1, + fstore_offset/1, + + mk_fp/4, + fp_dst/1, + fp_src1/1, + %% fp_src1_update/2, + fp_src2/1, + %% fp_src2_update/2, + fp_op/1, + + mk_fp_unop/3, + fp_unop_dst/1, + fp_unop_src/1, + %% fp_unop_src_update/2, + fp_unop_op/1, + + mk_fmove/2, + fmove_dst/1, + fmove_src/1, + %% fmove_src_update/2, + %% is_fmove/1, + + mk_fconv/2, + fconv_dst/1, + fconv_src/1, + %% fconv_src_update/2, + %% is_fconv/1, + + %% mk_var/1, + mk_new_var/0, + is_var/1, + var_index/1, + + %% change_vars_to_regs/1, + + mk_fixnumop/3, + fixnumop_dst/1, + fixnumop_src/1, + fixnumop_type/1, + + mk_reg/1, % assumes non gc-safe + mk_reg_gcsafe/1, + mk_new_reg/0, % assumes non gc-safe + mk_new_reg_gcsafe/0, + is_reg/1, + reg_index/1, + reg_is_gcsafe/1, + + %% mk_fpreg/1, + mk_new_fpreg/0, + is_fpreg/1, + fpreg_index/1, + + mk_imm/1, + is_imm/1, + imm_value/1, + + mk_const_label/1, + const_label_label/1, + is_const_label/1, + + args/1, + uses/1, + %% subst/2, + subst_uses/2, + subst_defines/2, + defines/1, + redirect_jmp/3, + is_safe/1, + %% highest_var/1, + pp/1, + pp/2, + pp_block/1, + + %% FIXME _dst_update command. Ok to export these? + alu_dst_update/2, + fconv_dst_update/2, + fload_dst_update/2, + %% fmove_dst_update/2, + fp_dst_update/2, + fp_unop_dst_update/2, + load_dst_update/2, + load_address_dst_update/2, + load_atom_dst_update/2, + load_word_index_dst_update/2, + %% move_dst_update/2, + fixnumop_dst_update/2, + pp_instr/2, + %% pp_arg/2, + phi_arglist_update/2, + phi_redirect_pred/3]). + +%% +%% RTL +%% + +-record(rtl, {'fun', %% Name of the function (MFA) + arglist, %% List of argument names (formals) + is_closure, %% True if this is code for a closure. + is_leaf, %% True if this is a leaf function. + code, %% Linear list of RTL-instructions. + data, %% Data segment + var_range, %% {Min,Max} First and last name used for + %% regs, fpregs, or vars. + %% (they use a common namespace) + label_range, %% {Min,Max} First and last name used for labels + info=[] %% A keylist with arbitrary information. + }). + +mk_rtl(Fun, ArgList, Closure, Leaf, Code, Data, VarRange, LabelRange) -> + #rtl{'fun'=Fun, arglist=ArgList, code=Code, + data=Data, is_closure=Closure, is_leaf=Leaf, + var_range=VarRange, label_range=LabelRange}. +rtl_fun(#rtl{'fun'=Fun}) -> Fun. +rtl_params(#rtl{arglist=ArgList}) -> ArgList. +rtl_is_closure(#rtl{is_closure=Closure}) -> Closure. +rtl_is_leaf(#rtl{is_leaf=Leaf}) -> Leaf. +rtl_code(#rtl{code=Code}) -> Code. +rtl_code_update(Rtl, Code) -> Rtl#rtl{code=Code}. +rtl_data(#rtl{data=Data}) -> Data. +%% rtl_data_update(Rtl, Data) -> Rtl#rtl{data=Data}. +%% rtl_var_range(#rtl{var_range=VarRange}) -> VarRange. +%% rtl_var_range_update(Rtl, VarRange) -> Rtl#rtl{var_range=VarRange}. +%% rtl_label_range(#rtl{label_range=LabelRange}) -> LabelRange. +%% rtl_label_range_update(Rtl, LabelRange) -> Rtl#rtl{label_range=LabelRange}. +rtl_info(#rtl{info=Info}) -> Info. +rtl_info_update(Rtl, Info) -> Rtl#rtl{info=Info}. + +%%----------------------------------------------------------------------------- + +-include("hipe_rtl.hrl"). + +%%----------------------------------------------------------------------------- + +%% +%% move +%% + +mk_move(Dst, Src) -> #move{dst=Dst, src=Src}. +move_dst(#move{dst=Dst}) -> Dst. +move_dst_update(M, NewDst) -> M#move{dst=NewDst}. +move_src(#move{src=Src}) -> Src. +move_src_update(M, NewSrc) -> M#move{src=NewSrc}. +%% is_move(#move{}) -> true; +%% is_move(_) -> false. + +%% +%% multimove +%% + +mk_multimove(DstList, SrcList) -> + case length(DstList) =:= length(SrcList) of + true -> true; + false -> + exit({?MODULE,mk_multimove, + {"different arities",{dstlist,DstList},{srclist,SrcList}}}) + end, + #multimove{dstlist=DstList, srclist=SrcList}. +multimove_dstlist(#multimove{dstlist=DstList}) -> DstList. +multimove_dstlist_update(M, NewDstList) -> M#multimove{dstlist=NewDstList}. +multimove_srclist(#multimove{srclist=SrcList}) -> SrcList. +multimove_srclist_update(M, NewSrcList) -> M#multimove{srclist=NewSrcList}. +%% is_multimove(#multimove{}) -> true; +%% is_multimove(_) -> false. + +%% +%% phi +%% + +%% The id field is not entirely redundant. It is used in mappings +%% in the SSA pass since the dst field can change. +mk_phi(Var) -> #phi{dst = Var, id = Var, arglist = []}. +%% mk_phi(Var, ArgList) -> #phi{dst = Var, id = Var, arglist = ArgList}. +phi_dst(#phi{dst=Dst}) -> Dst. +phi_dst_update(Phi, NewDst) -> Phi#phi{dst = NewDst}. +phi_id(#phi{id=Id}) -> Id. +phi_args(Phi) -> [X || {_,X} <- phi_arglist(Phi)]. +phi_arg(Phi, Pred) -> + case lists:keyfind(Pred, 1, phi_arglist(Phi)) of + false -> + exit({?MODULE,phi_arg,{"Uknown Phi predecessor",Phi,{pred,Pred}}}); + {_, Var} -> Var + end. +phi_arglist(#phi{arglist=ArgList}) -> ArgList. +phi_arglist_update(P,NewArgList) ->P#phi{arglist=NewArgList}. +is_phi(#phi{}) -> true; +is_phi(_) -> false. +phi_enter_pred(Phi, Pred, Var) -> + Phi#phi{arglist=[{Pred,Var}|lists:keydelete(Pred, 1, phi_arglist(Phi))]}. +phi_remove_pred(Phi, Pred) -> + NewArgList = lists:keydelete(Pred, 1, phi_arglist(Phi)), + case NewArgList of + [Arg] -> %% the phi should be turned into a move instruction + {_Label,Var} = Arg, + mk_move(phi_dst(Phi), Var); + %% io:format("~nPhi (~w) turned into move (~w) when removing pred ~w~n",[Phi,Move,Pred]), + [_|_] -> + Phi#phi{arglist=NewArgList} + end. +phi_argvar_subst(Phi, Subst) -> + NewArgList = [{Pred,subst1(Subst, Var)} || {Pred,Var} <- phi_arglist(Phi)], + Phi#phi{arglist=NewArgList}. +phi_redirect_pred(P, OldPred, NewPred)-> + Subst = [{OldPred, NewPred}], + NewArgList = [{subst1(Subst, Pred), Var} || {Pred,Var} <- phi_arglist(P)], + P#phi{arglist=NewArgList}. + + +%% +%% alu +%% + +mk_alu(Dst, Src1, Op, Src2) -> + #alu{dst=Dst, src1=Src1, op=Op, src2=Src2}. +alu_dst(#alu{dst=Dst}) -> Dst. +alu_dst_update(Alu, NewDst) -> Alu#alu{dst=NewDst}. +alu_src1(#alu{src1=Src1}) -> Src1. +alu_src1_update(Alu, NewSrc) -> Alu#alu{src1=NewSrc}. +alu_src2(#alu{src2=Src2}) -> Src2. +alu_src2_update(Alu, NewSrc) -> Alu#alu{src2=NewSrc}. +alu_op(#alu{op=Op}) -> Op. + +%% +%% load +%% + +mk_load(Dst, Src, Offset) -> mk_load(Dst, Src, Offset, word, unsigned). +mk_load(Dst, Src, Offset, Size, Sign) -> + ?ASSERT((Sign =:= unsigned) orelse (Sign =:= signed)), + ?ASSERT((Size =:= word) orelse (Size =:= int32) orelse + (Size =:= int16) orelse (Size =:= byte)), + #load{dst=Dst, src=Src, offset=Offset, size=Size, sign=Sign}. +load_dst(#load{dst=Dst}) -> Dst. +load_dst_update(L, NewDst) -> L#load{dst=NewDst}. +load_src(#load{src=Src}) -> Src. +load_src_update(L, NewSrc) -> L#load{src=NewSrc}. +load_offset(#load{offset=Offset}) -> Offset. +load_offset_update(L, NewOffset) -> L#load{offset=NewOffset}. +load_size(#load{size=Size}) -> Size. +load_sign(#load{sign=Sign}) -> Sign. + +%% +%% load_atom +%% + +mk_load_atom(Dst, Atom) -> #load_atom{dst=Dst,atom=Atom}. +load_atom_dst(#load_atom{dst=Dst}) -> Dst. +load_atom_dst_update(L, NewDst) -> L#load_atom{dst=NewDst}. +load_atom_atom(#load_atom{atom=Atom}) -> Atom. + +mk_load_word_index(Dst, Block, Index) -> + #load_word_index{dst=Dst, block=Block, index=Index}. +load_word_index_dst(#load_word_index{dst=Dst}) -> Dst. +load_word_index_dst_update(L, NewDst) -> L#load_word_index{dst=NewDst}. +load_word_index_block(#load_word_index{block=Block}) -> Block. +load_word_index_index(#load_word_index{index=Index}) -> Index. + +mk_goto_index(Block, Index, Labels) -> + #goto_index{block=Block, index=Index, labels=Labels}. +goto_index_block(#goto_index{block=Block}) -> Block. +goto_index_index(#goto_index{index=Index}) -> Index. +goto_index_labels(#goto_index{labels=Labels}) -> Labels. + +%% +%% load_address +%% + +mk_load_address(Dst, Addr, Type) -> + #load_address{dst=Dst, addr=Addr, type=Type}. +load_address_dst(#load_address{dst=Dst}) -> Dst. +load_address_dst_update(LA, NewDst) -> LA#load_address{dst=NewDst}. +load_address_addr(#load_address{addr=Addr}) -> Addr. +load_address_addr_update(LoadAddress, NewAdr) -> + LoadAddress#load_address{addr=NewAdr}. +load_address_type(#load_address{type=Type}) -> Type. +%% load_address_type_update(LA, NewType) -> LA#load_address{type=NewType}. + +%% +%% store +%% + +mk_store(Base, Offset, Src) -> mk_store(Base, Offset, Src, word). +mk_store(Base, Offset, Src, Size) -> + ?ASSERT((Size =:= word) orelse (Size =:= int32) orelse + (Size =:= int16) orelse (Size =:= byte)), + #store{base=Base, src=Src, offset=Offset, size=Size}. +store_base(#store{base=Base}) -> Base. +store_base_update(S, NewBase) -> S#store{base=NewBase}. +store_offset(#store{offset=Offset}) -> Offset. +store_offset_update(S, NewOffset) -> S#store{offset=NewOffset}. +store_src(#store{src=Src}) -> Src. +store_src_update(S, NewSrc) -> S#store{src=NewSrc}. +store_size(#store{size=Size}) -> Size. + +%% +%% label +%% + +mk_label(Name) -> #label{name=Name}. +mk_new_label() -> mk_label(hipe_gensym:get_next_label(rtl)). +label_name(#label{name=Name}) -> Name. +is_label(#label{}) -> true; +is_label(_) -> false. + +%% +%% branch +%% + +mk_branch(Src1, Op, Src2, True, False) -> + mk_branch(Src1, Op, Src2, True, False, 0.5). +mk_branch(Src1, Op, Src2, True, False, P) -> + #branch{src1=Src1, 'cond'=Op, src2=Src2, true_label=True, + false_label=False, p=P}. +branch_src1(#branch{src1=Src1}) -> Src1. +branch_src1_update(Br, NewSrc) -> Br#branch{src1=NewSrc}. +branch_src2(#branch{src2=Src2}) -> Src2. +branch_src2_update(Br, NewSrc) -> Br#branch{src2=NewSrc}. +branch_cond(#branch{'cond'=Cond}) -> Cond. +branch_true_label(#branch{true_label=TrueLbl}) -> TrueLbl. +branch_true_label_update(Br, NewTrue) -> Br#branch{true_label=NewTrue}. +branch_false_label(#branch{false_label=FalseLbl}) -> FalseLbl. +branch_false_label_update(Br, NewFalse) -> Br#branch{false_label=NewFalse}. +branch_pred(#branch{p=P}) -> P. + +%% +%% alub +%% + +mk_alub(Dst, Src1, Op, Src2, Cond, True, False) -> + mk_alub(Dst, Src1, Op, Src2, Cond, True, False, 0.5). +mk_alub(Dst, Src1, Op, Src2, Cond, True, False, P) -> + #alub{dst=Dst, src1=Src1, op=Op, src2=Src2, 'cond'=Cond, + true_label=True, false_label=False, p=P}. +alub_dst(#alub{dst=Dst}) -> Dst. +alub_dst_update(A, NewDst) -> A#alub{dst=NewDst}. +alub_src1(#alub{src1=Src1}) -> Src1. +alub_src1_update(A, NewSrc) -> A#alub{src1=NewSrc}. +alub_op(#alub{op=Op}) -> Op. +alub_src2(#alub{src2=Src2}) -> Src2. +alub_src2_update(A, NewSrc) -> A#alub{src2=NewSrc}. +alub_cond(#alub{'cond'=Cond}) -> Cond. +alub_true_label(#alub{true_label=TrueLbl}) -> TrueLbl. +alub_true_label_update(A, NewTrue) -> A#alub{true_label=NewTrue}. +alub_false_label(#alub{false_label=FalseLbl}) -> FalseLbl. +alub_false_label_update(A, NewFalse) -> A#alub{false_label=NewFalse}. +alub_pred(#alub{p=P}) -> P. + +%% +%% switch +%% + +mk_switch(Src, Labels) -> #switch{src=Src, labels=Labels}. +mk_sorted_switch(Src, Labels, Order) -> + #switch{src=Src, labels=Labels, sorted_by=Order}. +switch_src(#switch{src=Src}) -> Src. +switch_src_update(I, N) -> I#switch{src=N}. +switch_labels(#switch{labels=Labels}) -> Labels. +switch_labels_update(I,N) -> I#switch{labels=N}. +switch_sort_order(#switch{sorted_by=Order}) -> Order. +%% switch_sort_order_update(I,N) -> I#switch{sorted_by=N}. + +%% +%% goto +%% + +mk_goto(Label) -> #goto{label=Label}. +goto_label(#goto{label=Label}) -> Label. +goto_label_update(I, NewLabel) -> + I#goto{label=NewLabel}. +is_goto(#goto{}) -> true; +is_goto(_) -> false. + +%% +%% call +%% + +mk_call(DstList, Fun, ArgList, Continuation, FailContinuation, Type) -> + case Type of + remote -> ok; + not_remote -> ok + end, + #call{dstlist=DstList, 'fun'=Fun, arglist=ArgList, type=Type, + continuation=Continuation, + failcontinuation=FailContinuation}. +call_dstlist(#call{dstlist=DstList}) -> DstList. +call_dstlist_update(C, NewDstList) -> C#call{dstlist=NewDstList}. +call_fun(#call{'fun'=Fun}) -> Fun. +call_fun_update(C, F) -> C#call{'fun'=F}. +call_arglist(#call{arglist=ArgList}) -> ArgList. +call_arglist_update(C, NewArgList) -> C#call{arglist=NewArgList}. +call_continuation(#call{continuation=Continuation}) -> Continuation. +call_fail(#call{failcontinuation=FailContinuation}) -> FailContinuation. +call_type(#call{type=Type}) -> Type. +call_continuation_update(C, NewCont) -> C#call{continuation=NewCont}. +call_fail_update(C, NewCont) -> C#call{failcontinuation=NewCont}. +is_call(#call{}) -> true; +is_call(_) -> false. +call_is_known(C) -> + Fun = call_fun(C), + call_or_enter_fun_is_known(Fun). + +call_or_enter_fun_is_known(Fun) -> + case is_atom(Fun) of + true -> true; %% make the expected common case fast + false -> + case is_reg(Fun) of + true -> false; + false -> + case is_var(Fun) of + true -> false; + false -> + case Fun of + {M,F,A} when is_atom(M), is_atom(F), is_integer(A), A >= 0 -> + true; + {F,A} when is_atom(F), is_integer(A), A >= 0 -> + true; + _ -> %% colored versions of rtl_reg or rtl_var (used in SSA) + false + end + end + end + end. + +%% +%% enter +%% + +mk_enter(Fun, ArgList, Type) -> + case Type of + remote -> ok; + not_remote -> ok % {local,primop,closure,pointer} + end, + #enter{'fun'=Fun, arglist=ArgList, type=Type}. +enter_fun(#enter{'fun'=Fun}) -> Fun. +enter_fun_update(I, F) -> I#enter{'fun' = F}. +enter_arglist(#enter{arglist=ArgList}) -> ArgList. +enter_arglist_update(E, NewArgList) -> E#enter{arglist=NewArgList}. +enter_type(#enter{type=Type}) -> Type. +enter_is_known(E) -> + Fun = enter_fun(E), + call_or_enter_fun_is_known(Fun). + +%% +%% return +%% + +mk_return(VarList) -> #return{varlist=VarList}. +return_varlist(#return{varlist=VarList}) -> VarList. +return_varlist_update(R, NewVarList) -> R#return{varlist=NewVarList}. + +%% +%% gctests +%% + +mk_gctest(Words) when is_integer(Words) -> #gctest{words=mk_imm(Words)}; +mk_gctest(Reg) -> #gctest{words=Reg}. % This handles rtl_regs and rtl_vars +gctest_words(#gctest{words=Words}) -> Words. +gctest_words_update(S, NewWords) -> S#gctest{words=NewWords}. + + +%% +%% fixnumop +%% + +mk_fixnumop(Dst, Src, Type) -> + #fixnumop{dst=Dst, src=Src, type=Type}. +fixnumop_dst(#fixnumop{dst=Dst}) -> Dst. +fixnumop_dst_update(S, Dst) -> S#fixnumop{dst=Dst}. +fixnumop_src(#fixnumop{src=Src}) -> Src. +fixnumop_src_update(S, Src) -> S#fixnumop{src=Src}. +fixnumop_type(#fixnumop{type=Type}) -> Type. + +%% +%% comments +%% + +mk_comment(Text) -> #comment{text=Text}. +comment_text(#comment{text=Text}) -> Text. +is_comment(#comment{}) -> true; +is_comment(_) -> false. + +%%------------------------------------------------------------------------- +%% Floating point stuff. +%%------------------------------------------------------------------------- + +%% +%% fload +%% + +mk_fload(Dst, Src, Offset) -> #fload{dst=Dst, src=Src, offset=Offset}. +fload_dst(#fload{dst=Dst}) -> Dst. +fload_dst_update(L, NewDst) -> L#fload{dst=NewDst}. +fload_src(#fload{src=Src}) -> Src. +fload_src_update(L, NewSrc) -> L#fload{src=NewSrc}. +fload_offset(#fload{offset=Offset}) -> Offset. +fload_offset_update(L, NewOffset) -> L#fload{offset=NewOffset}. + +%% +%% fstore +%% + +mk_fstore(Base, Offset, Src) -> + #fstore{base=Base, offset=Offset, src=Src}. +fstore_base(#fstore{base=Base}) -> Base. +fstore_base_update(F, NewBase) -> F#fstore{base=NewBase}. +fstore_offset(#fstore{offset=Offset}) -> Offset. +fstore_offset_update(F, NewOff) -> F#fstore{offset=NewOff}. +fstore_src(#fstore{src=Src}) -> Src. +fstore_src_update(F, NewSrc) -> F#fstore{src=NewSrc}. + +%% +%% fp +%% + +mk_fp(Dst, Src1, Op, Src2) -> + #fp{dst=Dst, src1=Src1, op=Op, src2=Src2}. +fp_dst(#fp{dst=Dst}) -> Dst. +fp_dst_update(Fp, NewDst) -> Fp#fp{dst=NewDst}. +fp_src1(#fp{src1=Src1}) -> Src1. +fp_src1_update(Fp, NewSrc) -> Fp#fp{src1=NewSrc}. +fp_src2(#fp{src2=Src2}) -> Src2. +fp_src2_update(Fp, NewSrc) -> Fp#fp{src2=NewSrc}. +fp_op(#fp{op=Op}) -> Op. + +%% +%% fp_unop +%% + +mk_fp_unop(Dst, Src, Op) -> + #fp_unop{dst=Dst, src=Src, op=Op}. +fp_unop_dst(#fp_unop{dst=Dst}) -> Dst. +fp_unop_dst_update(Fp, NewDst) -> Fp#fp_unop{dst=NewDst}. +fp_unop_src(#fp_unop{src=Src}) -> Src. +fp_unop_src_update(Fp, NewSrc) -> Fp#fp_unop{src=NewSrc}. +fp_unop_op(#fp_unop{op=Op}) -> Op. + +%% +%% fmove +%% + +mk_fmove(X, Y) -> #fmove{dst=X, src=Y}. +fmove_dst(#fmove{dst=Dst}) -> Dst. +fmove_dst_update(M, NewDst) -> M#fmove{dst=NewDst}. +fmove_src(#fmove{src=Src}) -> Src. +fmove_src_update(M, NewSrc) -> M#fmove{src=NewSrc}. + +%% +%% fconv +%% + +mk_fconv(X, Y) -> #fconv{dst=X, src=Y}. +fconv_dst(#fconv{dst=Dst}) -> Dst. +fconv_dst_update(C, NewDst) -> C#fconv{dst=NewDst}. +fconv_src(#fconv{src=Src}) -> Src. +fconv_src_update(C, NewSrc) -> C#fconv{src=NewSrc}. + +%% +%% The values +%% +%% change_vars_to_regs(Vars) -> +%% change_vars_to_regs(Vars, []). +%% change_vars_to_regs([Var|Rest], Acc) -> +%% change_vars_to_regs(Rest,[change_var_to_reg(Var)|Acc]); +%% change_vars_to_regs([], Acc) -> +%% lists:reverse(Acc). +%% +%% change_var_to_reg(Var) -> +%% mk_reg(var_index(Var)). + +-record(rtl_reg, {index :: integer(), + is_gc_safe :: boolean()}). + +mk_reg(Num, IsGcSafe) when is_integer(Num), Num >= 0 -> + #rtl_reg{index=Num,is_gc_safe=IsGcSafe}. +mk_reg(Num) -> mk_reg(Num, false). +mk_reg_gcsafe(Num) -> mk_reg(Num, true). +mk_new_reg() -> mk_reg(hipe_gensym:get_next_var(rtl), false). +mk_new_reg_gcsafe() -> mk_reg(hipe_gensym:get_next_var(rtl), true). +reg_index(#rtl_reg{index=Index}) -> Index. +reg_is_gcsafe(#rtl_reg{is_gc_safe=IsGcSafe}) -> IsGcSafe. +is_reg(#rtl_reg{}) -> true; +is_reg(_) -> false. + +-record(rtl_var, {index :: non_neg_integer()}). + +mk_var(Num) when is_integer(Num), Num >= 0 -> #rtl_var{index=Num}. +mk_new_var() -> mk_var(hipe_gensym:get_next_var(rtl)). +var_index(#rtl_var{index=Index}) -> Index. +is_var(#rtl_var{}) -> true; +is_var(_) -> false. + +-record(rtl_fpreg, {index :: non_neg_integer()}). + +mk_fpreg(Num) when is_integer(Num), Num >= 0 -> #rtl_fpreg{index=Num}. +mk_new_fpreg() -> mk_fpreg(hipe_gensym:get_next_var(rtl)). +fpreg_index(#rtl_fpreg{index=Index}) -> Index. +is_fpreg(#rtl_fpreg{}) -> true; +is_fpreg(_) -> false. + +-record(rtl_imm, {value}). + +mk_imm(Value) -> #rtl_imm{value=Value}. +imm_value(#rtl_imm{value=Value}) -> Value. +is_imm(#rtl_imm{}) -> true; +is_imm(_) -> false. + +-record(rtl_const_lbl, {label}). + +mk_const_label(Label) -> #rtl_const_lbl{label=Label}. +const_label_label(#rtl_const_lbl{label=Label}) -> Label. +is_const_label(#rtl_const_lbl{}) -> true; +is_const_label(_) -> false. + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% Utilities - no representation visible below this point +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%% +%% @doc Returns the list of variables, constant labels and immediates +%% an RTL instruction uses. +%% + +uses(I) -> + remove_imms_and_const_lbls(args(I)). + +%% +%% @doc Returns the list of variables an RTL instruction uses. +%% + +args(I) -> + case I of + #alu{} -> [alu_src1(I), alu_src2(I)]; + #alub{} -> [alub_src1(I), alub_src2(I)]; + #branch{} -> [branch_src1(I), branch_src2(I)]; + #call{} -> + Args = call_arglist(I) ++ hipe_rtl_arch:call_used(), + case call_is_known(I) of + false -> [call_fun(I) | Args]; + true -> Args + end; + #comment{} -> []; + #enter{} -> + Args = enter_arglist(I) ++ hipe_rtl_arch:tailcall_used(), + case enter_is_known(I) of + false -> [enter_fun(I) | Args]; + true -> Args + end; + #fconv{} -> [fconv_src(I)]; + #fixnumop{} -> [fixnumop_src(I)]; + #fload{} -> [fload_src(I), fload_offset(I)]; + #fmove{} -> [fmove_src(I)]; + #fp{} -> [fp_src1(I), fp_src2(I)]; + #fp_unop{} -> [fp_unop_src(I)]; + #fstore{} -> [fstore_base(I), fstore_offset(I), fstore_src(I)]; + #goto{} -> []; + #goto_index{} -> []; + #gctest{} -> [gctest_words(I)]; + #label{} -> []; + #load{} -> [load_src(I), load_offset(I)]; + #load_address{} -> []; + #load_atom{} -> []; + #load_word_index{} -> []; + #move{} -> [move_src(I)]; + #multimove{} -> multimove_srclist(I); + #phi{} -> phi_args(I); + #return{} -> return_varlist(I) ++ hipe_rtl_arch:return_used(); + #store{} -> [store_base(I), store_offset(I), store_src(I)]; + #switch{} -> [switch_src(I)] + end. + +%% +%% @doc Returns a list of variables that an RTL instruction defines. +%% + +defines(Instr) -> + Defs = case Instr of + #alu{} -> [alu_dst(Instr)]; + #alub{} -> [alub_dst(Instr)]; + #branch{} -> []; + #call{} -> call_dstlist(Instr) ++ hipe_rtl_arch:call_defined(); + #comment{} -> []; + #enter{} -> []; + #fconv{} -> [fconv_dst(Instr)]; + #fixnumop{} -> [fixnumop_dst(Instr)]; + #fload{} -> [fload_dst(Instr)]; + #fmove{} -> [fmove_dst(Instr)]; + #fp{} -> [fp_dst(Instr)]; + #fp_unop{} -> [fp_unop_dst(Instr)]; + #fstore{} -> []; + #gctest{} -> []; + #goto{} -> []; + #goto_index{} -> []; + #label{} -> []; + #load{} -> [load_dst(Instr)]; + #load_address{} -> [load_address_dst(Instr)]; + #load_atom{} -> [load_atom_dst(Instr)]; + #load_word_index{} -> [load_word_index_dst(Instr)]; + #move{} -> [move_dst(Instr)]; + #multimove{} -> multimove_dstlist(Instr); + #phi{} -> [phi_dst(Instr)]; + #return{} -> []; + #store{} -> []; + #switch{} -> [] + end, + remove_imms_and_const_lbls(Defs). + +%% @spec remove_imms_and_const_lbls([rtl_argument()]) -> [rtl_argument()] +%% +%% @doc Removes all RTL immediates and constant labels from a list of arguments. + +remove_imms_and_const_lbls([]) -> + []; +remove_imms_and_const_lbls([Arg|Args]) -> + case is_imm(Arg) orelse is_const_label(Arg) of + true -> remove_imms_and_const_lbls(Args); + false -> [Arg | remove_imms_and_const_lbls(Args)] + end. + +%% +%% Substitution: replace occurrences of X by Y if {X,Y} is in Subst. +%% +%% subst(Subst, X) -> +%% subst_defines(Subst, subst_uses(Subst,X)). + +subst_uses(Subst, I) -> + case I of + #alu{} -> + I0 = alu_src1_update(I, subst1(Subst, alu_src1(I))), + alu_src2_update(I0, subst1(Subst, alu_src2(I))); + #alub{} -> + I0 = alub_src1_update(I, subst1(Subst, alub_src1(I))), + alub_src2_update(I0, subst1(Subst, alub_src2(I))); + #branch{} -> + I0 = branch_src1_update(I, subst1(Subst, branch_src1(I))), + branch_src2_update(I0, subst1(Subst, branch_src2(I))); + #call{} -> + case call_is_known(I) of + false -> + I0 = call_fun_update(I, subst1(Subst, call_fun(I))), + call_arglist_update(I0, subst_list(Subst, call_arglist(I0))); + true -> + call_arglist_update(I, subst_list(Subst, call_arglist(I))) + end; + #comment{} -> + I; + #enter{} -> + case enter_is_known(I) of + false -> + I0 = enter_fun_update(I, subst1(Subst, enter_fun(I))), + enter_arglist_update(I0, subst_list(Subst, enter_arglist(I0))); + true -> + enter_arglist_update(I, subst_list(Subst, enter_arglist(I))) + end; + #fconv{} -> + fconv_src_update(I, subst1(Subst, fconv_src(I))); + #fixnumop{} -> + fixnumop_src_update(I, subst1(Subst, fixnumop_src(I))); + #fload{} -> + I0 = fload_src_update(I, subst1(Subst, fload_src(I))), + fload_offset_update(I0, subst1(Subst, fload_offset(I))); + #fmove{} -> + fmove_src_update(I, subst1(Subst, fmove_src(I))); + #fp{} -> + I0 = fp_src1_update(I, subst1(Subst, fp_src1(I))), + fp_src2_update(I0, subst1(Subst, fp_src2(I))); + #fp_unop{} -> + fp_unop_src_update(I, subst1(Subst, fp_unop_src(I))); + #fstore{} -> + I0 = fstore_src_update(I, subst1(Subst, fstore_src(I))), + I1 = fstore_base_update(I0, subst1(Subst, fstore_base(I))), + fstore_offset_update(I1, subst1(Subst, fstore_offset(I))); + #goto{} -> + I; + #goto_index{} -> + I; + #gctest{} -> + gctest_words_update(I, subst1(Subst, gctest_words(I))); + #label{} -> + I; + #load{} -> + I0 = load_src_update(I, subst1(Subst, load_src(I))), + load_offset_update(I0, subst1(Subst, load_offset(I))); + #load_address{} -> + I; + #load_atom{} -> + I; + #load_word_index{} -> + I; + #move{} -> + move_src_update(I, subst1(Subst, move_src(I))); + #multimove{} -> + multimove_srclist_update(I, subst_list(Subst, multimove_srclist(I))); + #phi{} -> + phi_argvar_subst(I, Subst); + #return{} -> + return_varlist_update(I, subst_list(Subst, return_varlist(I))); + #store{} -> + I0 = store_src_update(I, subst1(Subst, store_src(I))), + I1 = store_base_update(I0, subst1(Subst, store_base(I))), + store_offset_update(I1, subst1(Subst, store_offset(I))); + #switch{} -> + switch_src_update(I, subst1(Subst, switch_src(I))) + end. + +subst_defines(Subst, I)-> + case I of + #alu{} -> + alu_dst_update(I, subst1(Subst, alu_dst(I))); + #alub{} -> + alub_dst_update(I, subst1(Subst, alub_dst(I))); + #branch{} -> + I; + #call{} -> + call_dstlist_update(I, subst_list(Subst, call_dstlist(I))); + #comment{} -> + I; + #enter{} -> + I; + #fconv{} -> + fconv_dst_update(I, subst1(Subst, fconv_dst(I))); + #fixnumop{} -> + fixnumop_dst_update(I, subst1(Subst, fixnumop_dst(I))); + #fload{} -> + fload_dst_update(I, subst1(Subst, fload_dst(I))); + #fmove{} -> + fmove_dst_update(I, subst1(Subst, fmove_dst(I))); + #fp{} -> + fp_dst_update(I, subst1(Subst, fp_dst(I))); + #fp_unop{} -> + fp_unop_dst_update(I, subst1(Subst, fp_unop_dst(I))); + #fstore{} -> + I; + #gctest{} -> + I; + #goto{} -> + I; + #goto_index{} -> + I; + #label{} -> + I; + #load{} -> + load_dst_update(I, subst1(Subst, load_dst(I))); + #load_address{} -> + load_address_dst_update(I, subst1(Subst, load_address_dst(I))); + #load_atom{} -> + load_atom_dst_update(I, subst1(Subst, load_atom_dst(I))); + #load_word_index{} -> + load_word_index_dst_update(I, subst1(Subst, load_word_index_dst(I))); + #move{} -> + move_dst_update(I, subst1(Subst, move_dst(I))); + #multimove{} -> + multimove_dstlist_update(I, subst_list(Subst, multimove_dstlist(I))); + #phi{} -> + phi_dst_update(I, subst1(Subst, phi_dst(I))); + #return{} -> + I; + #store{} -> + I; + #switch{} -> + I + end. + +subst_list(S, Xs) -> + [subst1(S, X) || X <- Xs]. + +subst1([], X) -> X; +subst1([{X,Y}|_], X) -> Y; +subst1([_|Xs], X) -> subst1(Xs,X). + +%% @spec is_safe(rtl_instruction()) -> boolean() +%% +%% @doc Succeeds if an RTL instruction is safe and can be deleted if the +%% result is not used. + +is_safe(Instr) -> + case Instr of + #alu{} -> true; + #alub{} -> false; + #branch{} -> false; + #call{} -> false; + #comment{} -> false; + #enter{} -> false; + #fconv{} -> true; + #fixnumop{} -> true; + #fload{} -> true; + #fmove{} -> true; + #fp{} -> false; + #fp_unop{} -> false; + #fstore{} -> false; + #gctest{} -> false; + #goto{} -> false; + #goto_index{} -> false; % ??? + #label{} -> true; + #load{} -> true; + #load_address{} -> true; + #load_atom{} -> true; + #load_word_index{} -> true; + #move{} -> true; + #multimove{} -> true; + #phi{} -> true; + #return{} -> false; + #store{} -> false; + #switch{} -> false %% Maybe this is safe... + end. + +%% +%% True if argument is an alu-operator +%% + +%% is_alu_op(add) -> true; +%% is_alu_op(sub) -> true; +%% is_alu_op('or') -> true; +%% is_alu_op('and') -> true; +%% is_alu_op('xor') -> true; +%% is_alu_op(andnot) -> true; +%% is_alu_op(sll) -> true; +%% is_alu_op(srl) -> true; +%% is_alu_op(sra) -> true; +%% is_alu_op(_) -> false. + +%% @spec is_shift_op(rtl_operator()) -> boolean() +%% +%% @doc Succeeds if its argument is an RTL operator. +is_shift_op(sll) -> true; +is_shift_op(srl) -> true; +is_shift_op(sra) -> true; +is_shift_op(_) -> false. + + +%% +%% True if argument is an relational operator +%% + +%% is_rel_op(eq) -> true; +%% is_rel_op(ne) -> true; +%% is_rel_op(gt) -> true; +%% is_rel_op(gtu) -> true; +%% is_rel_op(ge) -> true; +%% is_rel_op(geu) -> true; +%% is_rel_op(lt) -> true; +%% is_rel_op(ltu) -> true; +%% is_rel_op(le) -> true; +%% is_rel_op(leu) -> true; +%% is_rel_op(overflow) -> true; +%% is_rel_op(not_overflow) -> true; +%% is_rel_op(_) -> false. + +redirect_jmp(Jmp, ToOld, ToNew) -> + %% OBS: In a jmp instruction more than one labels may be identical + %% and thus need redirection! + case Jmp of + #branch{} -> + TmpJmp = case branch_true_label(Jmp) of + ToOld -> branch_true_label_update(Jmp, ToNew); + _ -> Jmp + end, + case branch_false_label(TmpJmp) of + ToOld -> + branch_false_label_update(TmpJmp, ToNew); + _ -> + TmpJmp + end; + #switch{} -> + NewLbls = [case Lbl =:= ToOld of + true -> ToNew; + false -> Lbl + end || Lbl <- switch_labels(Jmp)], + switch_labels_update(Jmp, NewLbls); + #alub{} -> + TmpJmp = case alub_true_label(Jmp) of + ToOld -> alub_true_label_update(Jmp, ToNew); + _ -> Jmp + end, + case alub_false_label(TmpJmp) of + ToOld -> alub_false_label_update(TmpJmp, ToNew); + _ -> TmpJmp + end; + #goto{} -> + case goto_label(Jmp) of + ToOld -> goto_label_update(Jmp, ToNew); + _ -> Jmp + end; + #call{} -> + TmpJmp = case call_continuation(Jmp) of + ToOld -> call_continuation_update(Jmp, ToNew); + _ -> Jmp + end, + case call_fail(TmpJmp) of + ToOld -> call_fail_update(TmpJmp, ToNew); + _ -> TmpJmp + end; + _ -> + Jmp + end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%% highest_var(Code) -> +%% highest_var(Code,0). +%% +%% highest_var([I|Is],Max) -> +%% Defs = defines(I), +%% Uses = uses(I), +%% highest_var(Is,new_max(Defs++Uses,Max)); +%% highest_var([],Max) -> +%% Max. +%% +%% new_max([V|Vs],Max) -> +%% VName = +%% case is_var(V) of +%% true -> +%% var_index(V); +%% false -> +%% case is_fpreg(V) of +%% true -> +%% fpreg_index(V); +%% _ -> +%% reg_index(V) +%% end +%% end, +%% if VName > Max -> +%% new_max(Vs, VName); +%% true -> +%% new_max(Vs, Max) +%% end; +%% new_max([],Max) -> +%% Max. + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% @doc Pretty-printer for RTL. +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +pp(Rtl) -> + pp(standard_io, Rtl). + +pp_block(Instrs) -> + pp_instrs(standard_io, Instrs). + +pp(Dev, Rtl) -> + io:format(Dev, "~w(", [rtl_fun(Rtl)]), + pp_args(Dev, rtl_params(Rtl)), + io:format(Dev, ") ->~n", []), + case rtl_is_closure(Rtl) of + true -> + io:format(Dev, ";; Closure\n", []); + false -> ok + end, + case rtl_is_leaf(Rtl) of + true -> + io:format(Dev, ";; Leaf function\n", []); + false -> ok + end, + io:format(Dev, ";; Info: ~w\n", [rtl_info(Rtl)]), + io:format(Dev, ".DataSegment\n", []), + hipe_data_pp:pp(Dev, rtl_data(Rtl), rtl, ""), + io:format(Dev, ".CodeSegment\n", []), + pp_instrs(Dev, rtl_code(Rtl)). + +pp_instrs(_Dev, []) -> + ok; +pp_instrs(Dev, [I|Is]) -> + try pp_instr(Dev, I) + catch _:_ -> io:format("*** ~w ***\n", [I]) + end, + pp_instrs(Dev, Is). + +pp_instr(Dev, I) -> + case I of + #phi{} -> + io:format(Dev, " ", []), + pp_arg(Dev, phi_dst(I)), + io:format(Dev, " <- phi(", []), + pp_phi_args(Dev, phi_arglist(I)), + io:format(Dev, ")~n", []); + #move{} -> + io:format(Dev, " ", []), + pp_arg(Dev, move_dst(I)), + io:format(Dev, " <- ", []), + pp_arg(Dev, move_src(I)), + io:format(Dev, "~n", []); + #multimove{} -> + io:format(Dev, " ", []), + pp_args(Dev, multimove_dstlist(I)), + io:format(Dev, " <= ", []), + pp_args(Dev, multimove_srclist(I)), + io:format(Dev, "~n", []); + #alu{} -> + io:format(Dev, " ", []), + pp_arg(Dev, alu_dst(I)), + io:format(Dev, " <- ", []), + pp_arg(Dev, alu_src1(I)), + io:format(Dev, " ~w ", [alu_op(I)]), + pp_arg(Dev, alu_src2(I)), + io:format(Dev, "~n", []); + #load{} -> + io:format(Dev, " ", []), + pp_arg(Dev, load_dst(I)), + io:format(Dev, " <- [", []), + pp_arg(Dev, load_src(I)), + io:format(Dev, "+", []), + pp_arg(Dev, load_offset(I)), + io:format(Dev, "]", []), + case load_sign(I) of + signed -> io:format(Dev, " -signed",[]); + _ -> ok + end, + case load_size(I) of + byte -> io:format(Dev, " -byte",[]); + int16 -> io:format(Dev, " -int16",[]); + int32 -> io:format(Dev, " -int32",[]); + _ -> ok + end, + io:format(Dev, "~n", []); + #load_atom{} -> + io:format(Dev, " ", []), + pp_arg(Dev, load_atom_dst(I)), + io:format(Dev, " <- atom_no(\'~s\')~n", [load_atom_atom(I)]); + #load_word_index{} -> + io:format(Dev, " ", []), + pp_arg(Dev, load_word_index_dst(I)), + io:format(Dev, " <- word_index_no( DL~p[~p] )~n", + [load_word_index_block(I),load_word_index_index(I)]); + #goto_index{} -> + io:format(Dev, " ", []), + io:format(Dev, "goto_index DL~p[~p]~n", + [goto_index_block(I), goto_index_index(I)]); + #load_address{} -> + io:format(Dev, " ", []), + pp_arg(Dev, load_address_dst(I)), + case load_address_type(I) of + constant -> + io:format(Dev, " <- DL~p~n", [load_address_addr(I)]); + closure -> + io:format(Dev, " <- L~p [closure]~n", [load_address_addr(I)]); + Type -> + io:format(Dev, " <- L~p [~p]~n", [load_address_addr(I),Type]) + end; + #store{} -> + io:format(Dev, " [", []), + pp_arg(Dev, store_base(I)), + io:format(Dev, "+", []), + pp_arg(Dev, store_offset(I)), + io:format(Dev, "] <- ", []), + pp_arg(Dev, store_src(I)), + case store_size(I) of + byte -> io:format(Dev, " -byte",[]); + int16 -> io:format(Dev, " -int16",[]); + int32 -> io:format(Dev, " -int32",[]); + _ -> ok + end, + io:format(Dev, "~n", []); + #label{} -> + io:format(Dev, "L~w:~n", [label_name(I)]); + #branch{} -> + io:format(Dev, " if (", []), + pp_arg(Dev, branch_src1(I)), + io:format(Dev, " ~w ", [branch_cond(I)]), + pp_arg(Dev, branch_src2(I)), + io:format(Dev, ") then L~w (~.2f) else L~w~n", + [branch_true_label(I), branch_pred(I), branch_false_label(I)]); + #switch{} -> + io:format(Dev, " switch (", []), + pp_arg(Dev, switch_src(I)), + io:format(Dev, ") <", []), + pp_switch_labels(Dev, switch_labels(I)), + io:format(Dev, ">\n", []); + #alub{} -> + io:format(Dev, " ", []), + pp_arg(Dev, alub_dst(I)), + io:format(Dev, " <- ", []), + pp_arg(Dev, alub_src1(I)), + io:format(Dev, " ~w ", [alub_op(I)]), + pp_arg(Dev, alub_src2(I)), + io:format(Dev, " if",[]), + io:format(Dev, " ~w ", [alub_cond(I)]), + io:format(Dev, "then L~w (~.2f) else L~w~n", + [alub_true_label(I), alub_pred(I), alub_false_label(I)]); + #goto{} -> + io:format(Dev, " goto L~w~n", [goto_label(I)]); + #call{} -> + io:format(Dev, " ", []), + pp_args(Dev, call_dstlist(I)), + io:format(Dev, " <- ", []), + case call_is_known(I) of + true -> + case call_fun(I) of + F when is_atom(F) -> + io:format(Dev, "~w(", [F]); + {M,F,A} when is_atom(M), is_atom(F), is_integer(A), A >= 0 -> + io:format(Dev, "~w:~w(", [M, F]); + {F,A} when is_atom(F), is_integer(A), A >=0 -> + io:format(Dev, "~w(", [F]) + end; + false -> + io:format(Dev, "(",[]), + pp_arg(Dev, call_fun(I)), + io:format(Dev, ")(",[]) + end, + pp_args(Dev, call_arglist(I)), + io:format(Dev, ")", []), + case call_continuation(I) of + [] -> true; + CC -> + io:format(Dev, " then L~w", [CC]) + end, + case call_fail(I) of + [] -> true; + L -> + io:format(Dev, " fail to L~w", [L]) + end, + io:format(Dev, "~n", []); + #enter{} -> + io:format(Dev, " ", []), + case enter_is_known(I) of + true -> + case enter_fun(I) of + F when is_atom(F) -> + io:format(Dev, "~w(", [F]); + {M,F,A} when is_atom(M), is_atom(F), is_integer(A), A >= 0 -> + io:format(Dev, "~w:~w(", [M, F]); + {F,A} when is_atom(F), is_integer(A), A >= 0 -> + io:format(Dev, "~w(", [F]) + end; + false -> + io:format(Dev, "(",[]), + pp_arg(Dev, enter_fun(I)), + io:format(Dev, ")(",[]) + end, + pp_args(Dev, enter_arglist(I)), + io:format(Dev, ")~n", []); + #return{} -> + io:format(Dev, " return(", []), + pp_args(Dev, return_varlist(I)), + io:format(Dev, ")~n", []); + #comment{} -> + io:format(Dev, " ;; ~p~n", [comment_text(I)]); + #fixnumop{} -> + io:format(Dev, " ", []), + pp_arg(Dev, fixnumop_dst(I)), + io:format(Dev, " <- ", []), + case fixnumop_type(I) of + tag -> + io:format(Dev, "fixnum_tag(", []); + untag -> + io:format(Dev, "fixnum_untag(", []) + end, + pp_arg(Dev, fixnumop_src(I)), + io:format(Dev, ")~n", []); + #gctest{} -> + io:format(Dev, " gctest(", []), + pp_arg(Dev, gctest_words(I)), + io:format(Dev, ")~n", []); + %% Floating point handling instructions below + #fload{} -> + io:format(Dev, " ", []), + pp_arg(Dev, fload_dst(I)), + io:format(Dev, " <-f [", []), + pp_arg(Dev, fload_src(I)), + io:format(Dev, "+", []), + pp_arg(Dev, fload_offset(I)), + io:format(Dev, "]~n", []); + #fstore{} -> + io:format(Dev, " [", []), + pp_arg(Dev, fstore_base(I)), + io:format(Dev, "+", []), + pp_arg(Dev, fstore_offset(I)), + io:format(Dev, "] <- ", []), + pp_arg(Dev, fstore_src(I)), + io:format(Dev, "~n", []); + #fp{} -> + io:format(Dev, " ", []), + pp_arg(Dev, fp_dst(I)), + io:format(Dev, " <- ", []), + pp_arg(Dev, fp_src1(I)), + io:format(Dev, " ~w ", [fp_op(I)]), + pp_arg(Dev, fp_src2(I)), + io:format(Dev, "~n", []); + #fp_unop{} -> + io:format(Dev, " ", []), + pp_arg(Dev, fp_unop_dst(I)), + io:format(Dev, " <- ", []), + io:format(Dev, " ~w ", [fp_unop_op(I)]), + pp_arg(Dev, fp_unop_src(I)), + io:format(Dev, "~n", []); + #fmove{} -> + io:format(Dev, " ", []), + pp_arg(Dev, fmove_dst(I)), + io:format(Dev, " <- ", []), + pp_arg(Dev, fmove_src(I)), + io:format(Dev, "~n", []); + #fconv{} -> + io:format(Dev, " ", []), + pp_arg(Dev, fconv_dst(I)), + io:format(Dev, " <-fconv ", []), + pp_arg(Dev, fconv_src(I)), + io:format(Dev, "~n", []); + Other -> + exit({?MODULE,pp_instr,{"unknown RTL instruction",Other}}) + end. + +pp_args(_Dev, []) -> + ok; +pp_args(Dev, [A]) -> + pp_arg(Dev, A); +pp_args(Dev, [A|As]) -> + pp_arg(Dev, A), + io:format(Dev, ", ", []), + pp_args(Dev, As). + +pp_phi_args(_Dev, []) -> ok; +pp_phi_args(Dev, [{Pred,A}]) -> + io:format(Dev, "{~w, ", [Pred]), + pp_arg(Dev, A), + io:format(Dev, "}", []); +pp_phi_args(Dev, [{Pred,A}|Args]) -> + io:format(Dev, "{~w, ", [Pred]), + pp_arg(Dev, A), + io:format(Dev, "}, ", []), + pp_phi_args(Dev, Args); +pp_phi_args(Dev, Args) -> + pp_args(Dev, Args). + +pp_hard_reg(Dev, N) -> + io:format(Dev, "~s", [hipe_rtl_arch:reg_name(N)]). + +pp_reg(Dev, Arg) -> + case hipe_rtl_arch:is_precoloured(Arg) of + true -> + pp_hard_reg(Dev, reg_index(Arg)); + false -> + io:format(Dev, "r~w", [reg_index(Arg)]) + end. + +pp_var(Dev, Arg) -> + case hipe_rtl_arch:is_precoloured(Arg) of + true -> + pp_hard_reg(Dev, var_index(Arg)); + false -> + io:format(Dev, "v~w", [var_index(Arg)]) + end. + +pp_arg(Dev, A) -> + case is_var(A) of + true -> + pp_var(Dev, A); + false -> + case is_reg(A) of + true -> + pp_reg(Dev, A); + false -> + case is_imm(A) of + true -> + io:format(Dev, "~w", [imm_value(A)]); + false -> + case is_fpreg(A) of + true -> + io:format(Dev, "f~w", [fpreg_index(A)]); + false -> + case is_const_label(A) of + true -> + io:format(Dev, "DL~w", [const_label_label(A)]); + false -> + exit({?MODULE,pp_arg,{"bad RTL arg",A}}) + end + end + end + end + end. + +pp_switch_labels(Dev,Lbls) -> + pp_switch_labels(Dev,Lbls,1). + +pp_switch_labels(Dev, [L], _Pos) -> + io:format(Dev, "L~w", [L]); +pp_switch_labels(Dev, [L|Ls], Pos) -> + io:format(Dev, "L~w, ", [L]), + NewPos = + case Pos of + 5 -> io:format(Dev, "\n ",[]), + 0; + N -> N + 1 + end, + pp_switch_labels(Dev, Ls, NewPos); +pp_switch_labels(_Dev, [], _) -> + ok. diff --git a/lib/hipe/rtl/hipe_rtl.hrl b/lib/hipe/rtl/hipe_rtl.hrl new file mode 100644 index 0000000000..974e40f830 --- /dev/null +++ b/lib/hipe/rtl/hipe_rtl.hrl @@ -0,0 +1,61 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2005-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% Provides abstract datatypes for HiPE's RTL (Register Transfer Language). +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%--------------------------------------------------------------------- + +-record(alu, {dst, src1, op, src2}). +-record(alub, {dst, src1, op, src2, 'cond', true_label, false_label, p}). +-record(branch, {src1, src2, 'cond', true_label, false_label, p}). +-record(call, {dstlist, 'fun', arglist, type, continuation, failcontinuation}). +-record(comment, {text}). +-record(enter, {'fun', arglist, type}). +-record(fconv, {dst, src}). +-record(fixnumop, {dst, src, type}). +-record(fload, {dst, src, offset}). +-record(fmove, {dst, src}). +-record(fp, {dst, src1, op, src2}). +-record(fp_unop, {dst, src, op}). +-record(fstore, {base, offset, src}). +-record(gctest, {words}). +-record(goto, {label}). +-record(goto_index, {block, index, labels}). +-record(label, {name}). +-record(load, {dst, src, offset, size, sign}). +-record(load_address, {dst, addr, type}). +-record(load_atom, {dst, atom}). +-record(load_word_index, {dst, block, index}). +-record(move, {dst, src}). +-record(multimove, {dstlist, srclist}). +-record(phi, {dst, id, arglist}). +-record(return, {varlist}). +-record(store, {base, offset, src, size}). +-record(switch, {src, labels, sorted_by=[]}). + +%%--------------------------------------------------------------------- + +%% An efficient macro to convert byte sizes to bit sizes +-define(bytes_to_bits(Bytes), ((Bytes) bsl 3)). % (N * 8) + +%%--------------------------------------------------------------------- diff --git a/lib/hipe/rtl/hipe_rtl_arch.erl b/lib/hipe/rtl/hipe_rtl_arch.erl new file mode 100644 index 0000000000..2afdf4eb6b --- /dev/null +++ b/lib/hipe/rtl/hipe_rtl_arch.erl @@ -0,0 +1,612 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2001-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Copyright (c) 2001 by Erik Johansson. +%%===================================================================== +%% Filename : hipe_rtl_arch.erl +%% History : * 2001-04-10 Erik Johansson (happi@it.uu.se): Created. +%%===================================================================== +%% @doc +%% +%% This module contains interface functions whose semantics and +%% implementation depend on the target architecture. +%% +%% @end +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +-module(hipe_rtl_arch). + +-export([first_virtual_reg/0, + heap_pointer/0, + heap_limit/0, + fcalls/0, + reg_name/1, + is_precoloured/1, + call_defined/0, + call_used/0, + tailcall_used/0, + return_used/0, + live_at_return/0, + endianess/0, + load_big_2/4, + load_little_2/4, + load_big_4/4, + load_little_4/4, + %% store_4/3, + eval_alu/3, + %% eval_alub/4, + eval_cond/3, + eval_cond_bits/5, + fwait/0, + handle_fp_exception/0, + pcb_load/2, + pcb_load/3, + pcb_store/2, + pcb_store/3, + pcb_address/2, + call_bif/5, + %% alignment/0, + nr_of_return_regs/0, + log2_word_size/0, + word_size/0 + ]). + +-include("hipe_literals.hrl"). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% ____________________________________________________________________ +%% +%% ARCH-specific stuff +%% ____________________________________________________________________ +%% +%% +%% XXX: x86 might not _have_ real registers for some of these things +%% + +first_virtual_reg() -> + case get(hipe_target_arch) of + ultrasparc -> + hipe_sparc_registers:first_virtual(); + powerpc -> + hipe_ppc_registers:first_virtual(); + arm -> + hipe_arm_registers:first_virtual(); + x86 -> + hipe_x86_registers:first_virtual(); + amd64 -> + hipe_amd64_registers:first_virtual() + end. + +heap_pointer() -> % {GetHPInsn, HPReg, PutHPInsn} + case get(hipe_target_arch) of + ultrasparc -> + heap_pointer_from_reg(hipe_sparc_registers:heap_pointer()); + powerpc -> + heap_pointer_from_reg(hipe_ppc_registers:heap_pointer()); + arm -> + heap_pointer_from_reg(hipe_arm_registers:heap_pointer()); + x86 -> + x86_heap_pointer(); + amd64 -> + amd64_heap_pointer() + end. + +heap_pointer_from_reg(Reg) -> + {hipe_rtl:mk_comment('get_heap_pointer'), + hipe_rtl:mk_reg(Reg), + hipe_rtl:mk_comment('put_heap_pointer')}. + +-ifdef(AMD64_HP_IN_REGISTER). +amd64_heap_pointer() -> + heap_pointer_from_reg(hipe_amd64_registers:heap_pointer()). +-else. +-define(HEAP_POINTER_FROM_PCB_NEEDED,1). +amd64_heap_pointer() -> + heap_pointer_from_pcb(). +-endif. + +-ifdef(X86_HP_IN_ESI). +x86_heap_pointer() -> + heap_pointer_from_reg(hipe_x86_registers:heap_pointer()). +-else. +-define(HEAP_POINTER_FROM_PCB_NEEDED,1). +x86_heap_pointer() -> + heap_pointer_from_pcb(). +-endif. + +-ifdef(HEAP_POINTER_FROM_PCB_NEEDED). +heap_pointer_from_pcb() -> + Reg = hipe_rtl:mk_new_reg(), + {pcb_load(Reg, ?P_HP), Reg, pcb_store(?P_HP, Reg)}. +-endif. + +heap_limit() -> % {GetHLIMITInsn, HLIMITReg} + case get(hipe_target_arch) of + ultrasparc -> + heap_limit_from_pcb(); + powerpc -> + heap_limit_from_pcb(); + arm -> + heap_limit_from_pcb(); + x86 -> + heap_limit_from_reg(hipe_x86_registers:heap_limit()); + amd64 -> + heap_limit_from_reg(hipe_amd64_registers:heap_limit()) + end. + +heap_limit_from_reg(Reg) -> + {hipe_rtl:mk_comment('get_heap_limit'), + hipe_rtl:mk_reg(Reg)}. + +heap_limit_from_pcb() -> + Reg = hipe_rtl:mk_new_reg(), + {pcb_load(Reg, ?P_HP_LIMIT), Reg}. + +fcalls() -> % {GetFCallsInsn, FCallsReg, PutFCallsInsn} + case get(hipe_target_arch) of + ultrasparc -> + fcalls_from_pcb(); + powerpc -> + fcalls_from_pcb(); + arm -> + fcalls_from_pcb(); + x86 -> + fcalls_from_reg(hipe_x86_registers:fcalls()); + amd64 -> + fcalls_from_reg(hipe_amd64_registers:fcalls()) + end. + +fcalls_from_reg(Reg) -> + {hipe_rtl:mk_comment('get_fcalls'), + hipe_rtl:mk_reg(Reg), + hipe_rtl:mk_comment('put_fcalls')}. + +fcalls_from_pcb() -> + Reg = hipe_rtl:mk_new_reg(), + {pcb_load(Reg, ?P_FCALLS), Reg, pcb_store(?P_FCALLS, Reg)}. + +reg_name(Reg) -> + case get(hipe_target_arch) of + ultrasparc -> + hipe_sparc_registers:reg_name_gpr(Reg); + powerpc -> + hipe_ppc_registers:reg_name_gpr(Reg); + arm -> + hipe_arm_registers:reg_name_gpr(Reg); + x86 -> + hipe_x86_registers:reg_name(Reg); + amd64 -> + hipe_amd64_registers:reg_name(Reg) + end. + +%% @spec is_precoloured(rtl_arg()) -> boolean() +%% +%% @doc Succeeds if Arg is mapped to a precoloured register in the target. +%% +is_precoloured(Arg) -> + case hipe_rtl:is_reg(Arg) of + true -> + is_precolored_regnum(hipe_rtl:reg_index(Arg)); + false -> + hipe_rtl:is_var(Arg) andalso + is_precolored_regnum(hipe_rtl:var_index(Arg)) + end. + +is_precolored_regnum(RegNum) -> + case get(hipe_target_arch) of + ultrasparc -> + hipe_sparc_registers:is_precoloured_gpr(RegNum); + powerpc -> + hipe_ppc_registers:is_precoloured_gpr(RegNum); + arm -> + hipe_arm_registers:is_precoloured_gpr(RegNum); + x86 -> + hipe_x86_registers:is_precoloured(RegNum); + amd64 -> + hipe_amd64_registers:is_precoloured(RegNum) + end. + +call_defined() -> + call_used(). + +call_used() -> + live_at_return(). + +tailcall_used() -> + call_used(). + +return_used() -> + tailcall_used(). + +live_at_return() -> + case get(hipe_target_arch) of + ultrasparc -> + ordsets:from_list([hipe_rtl:mk_reg(R) + || {R,_} <- hipe_sparc_registers:live_at_return()]); + powerpc -> + ordsets:from_list([hipe_rtl:mk_reg(R) + || {R,_} <- hipe_ppc_registers:live_at_return()]); + arm -> + ordsets:from_list([hipe_rtl:mk_reg(R) + || {R,_} <- hipe_arm_registers:live_at_return()]); + x86 -> + ordsets:from_list([hipe_rtl:mk_reg(R) + || {R,_} <- hipe_x86_registers:live_at_return()]); + amd64 -> + ordsets:from_list([hipe_rtl:mk_reg(R) + || {R,_} <- hipe_amd64_registers:live_at_return()]) + end. + +%% @spec word_size() -> integer() +%% +%% @doc Returns the target's word size. +%% +word_size() -> + case get(hipe_target_arch) of + ultrasparc -> 4; + powerpc -> 4; + arm -> 4; + x86 -> 4; + amd64 -> 8 + end. + +%% alignment() -> +%% case get(hipe_target_arch) of +%% ultrasparc -> 4; +%% powerpc -> 4; +%% arm -> 4; +%% x86 -> 4; +%% amd64 -> 8 +%% end. + +%% @spec log2_word_size() -> integer() +%% +%% @doc Returns log2 of the target's word size. +%% +log2_word_size() -> + case get(hipe_target_arch) of + ultrasparc -> 2; + powerpc -> 2; + arm -> 2; + x86 -> 2; + amd64 -> 3 + end. + +%% @spec endianess() -> big | little +%% +%% @doc Returns the target's endianess. +%% +endianess() -> + case get(hipe_target_arch) of + ultrasparc -> big; + powerpc -> big; + x86 -> little; + amd64 -> little; + arm -> ?ARM_ENDIANESS + end. + +%%%------------------------------------------------------------------------ +%%% Reading integers from binaries, in various sizes and endianesses. +%%% Operand-sized alignment is NOT guaranteed, only byte alignment. +%%%------------------------------------------------------------------------ + +%%% Load a 2-byte big-endian integer from a binary. +%%% Increment Offset by 2. +load_big_2(Dst, Base, Offset, Signedness) -> + case get(hipe_target_arch) of + powerpc -> + load_2_directly(Dst, Base, Offset, Signedness); + %% Note: x86 could use a "load;xchgb" or "load;rol $8,<16-bit reg>" + %% sequence here. This has been implemented, but unfortunately didn't + %% make consistent improvements to our benchmarks. + _ -> + load_big_2_in_pieces(Dst, Base, Offset, Signedness) + end. + +%%% Load a 2-byte little-endian integer from a binary. +%%% Increment Offset by 2. +load_little_2(Dst, Base, Offset, Signedness) -> + case get(hipe_target_arch) of + x86 -> + load_2_directly(Dst, Base, Offset, Signedness); + powerpc -> + [hipe_rtl:mk_call([Dst], 'lhbrx', [Base,Offset], [], [], not_remote), + hipe_rtl:mk_alu(Offset, Offset, add, hipe_rtl:mk_imm(2)) | + case Signedness of + unsigned -> []; + signed -> [hipe_rtl:mk_call([Dst], 'extsh', [Dst], [], [], not_remote)] + end]; + _ -> + load_little_2_in_pieces(Dst, Base, Offset, Signedness) + end. + +load_2_directly(Dst, Base, Offset, Signedness) -> + [hipe_rtl:mk_load(Dst, Base, Offset, int16, Signedness), + hipe_rtl:mk_alu(Offset, Offset, add, hipe_rtl:mk_imm(2))]. + +load_big_2_in_pieces(Dst, Base, Offset, Signedness) -> + Tmp1 = hipe_rtl:mk_new_reg(), + [hipe_rtl:mk_load(Dst, Base, Offset, byte, Signedness), + hipe_rtl:mk_alu(Offset, Offset, add, hipe_rtl:mk_imm(1)), + hipe_rtl:mk_alu(Dst, Dst, sll, hipe_rtl:mk_imm(8)), + hipe_rtl:mk_load(Tmp1, Base, Offset, byte, unsigned), + hipe_rtl:mk_alu(Dst, Dst, 'or', Tmp1), + hipe_rtl:mk_alu(Offset, Offset, add, hipe_rtl:mk_imm(1))]. + +load_little_2_in_pieces(Dst, Base, Offset, Signedness) -> + Tmp1 = hipe_rtl:mk_new_reg(), + [hipe_rtl:mk_load(Dst, Base, Offset, byte, unsigned), + hipe_rtl:mk_alu(Offset, Offset, add, hipe_rtl:mk_imm(1)), + hipe_rtl:mk_load(Tmp1, Base, Offset, byte, Signedness), + hipe_rtl:mk_alu(Tmp1, Tmp1, sll, hipe_rtl:mk_imm(8)), + hipe_rtl:mk_alu(Dst, Dst, 'or', Tmp1), + hipe_rtl:mk_alu(Offset, Offset, add, hipe_rtl:mk_imm(1))]. + +%%% Load a 4-byte big-endian integer from a binary. +%%% Increment Offset by 4. +load_big_4(Dst, Base, Offset, Signedness) -> + case get(hipe_target_arch) of + powerpc -> + load_4_directly(Dst, Base, Offset, Signedness); + %% Note: x86 could use a "load;bswap" sequence here. + %% This has been implemented, but unfortunately didn't + %% make any noticeable improvements in our benchmarks. + arm -> + %% When loading 4 bytes into a 32-bit register, the + %% signedness of the high-order byte doesn't matter. + %% ARM prefers unsigned byte loads so we'll use that. + load_big_4_in_pieces(Dst, Base, Offset, unsigned); + _ -> + load_big_4_in_pieces(Dst, Base, Offset, Signedness) + end. + +%%% Load a 4-byte little-endian integer from a binary. +%%% Increment Offset by 4. +load_little_4(Dst, Base, Offset, Signedness) -> + case get(hipe_target_arch) of + x86 -> + load_4_directly(Dst, Base, Offset, Signedness); + powerpc -> + [hipe_rtl:mk_call([Dst], 'lwbrx', [Base,Offset], [], [], not_remote), + hipe_rtl:mk_alu(Offset, Offset, add, hipe_rtl:mk_imm(4))]; + arm -> + %% When loading 4 bytes into a 32-bit register, the + %% signedness of the high-order byte doesn't matter. + %% ARM prefers unsigned byte loads so we'll use that. + load_little_4_in_pieces(Dst, Base, Offset, unsigned); + _ -> + load_little_4_in_pieces(Dst, Base, Offset, Signedness) + end. + +load_4_directly(Dst, Base, Offset, Signedness) -> + [hipe_rtl:mk_load(Dst, Base, Offset, word, Signedness), + hipe_rtl:mk_alu(Offset, Offset, add, hipe_rtl:mk_imm(4))]. + +load_big_4_in_pieces(Dst, Base, Offset, Signedness) -> + Tmp1 = hipe_rtl:mk_new_reg(), + [hipe_rtl:mk_load(Dst, Base, Offset, byte, Signedness), + hipe_rtl:mk_alu(Offset, Offset, add, hipe_rtl:mk_imm(1)), + hipe_rtl:mk_alu(Dst, Dst, sll, hipe_rtl:mk_imm(8)), + hipe_rtl:mk_load(Tmp1, Base, Offset, byte, unsigned), + hipe_rtl:mk_alu(Dst, Dst, 'or', Tmp1), + hipe_rtl:mk_alu(Offset, Offset, add, hipe_rtl:mk_imm(1)), + hipe_rtl:mk_alu(Dst, Dst, sll, hipe_rtl:mk_imm(8)), + hipe_rtl:mk_load(Tmp1, Base, Offset, byte, unsigned), + hipe_rtl:mk_alu(Dst, Dst, 'or', Tmp1), + hipe_rtl:mk_alu(Offset, Offset, add, hipe_rtl:mk_imm(1)), + hipe_rtl:mk_alu(Dst, Dst, sll, hipe_rtl:mk_imm(8)), + hipe_rtl:mk_load(Tmp1, Base, Offset, byte, unsigned), + hipe_rtl:mk_alu(Dst, Dst, 'or', Tmp1), + hipe_rtl:mk_alu(Offset, Offset, add, hipe_rtl:mk_imm(1))]. + +load_little_4_in_pieces(Dst, Base, Offset, Signedness) -> + Tmp1 = hipe_rtl:mk_new_reg(), + [hipe_rtl:mk_load(Dst, Base, Offset, byte, unsigned), + hipe_rtl:mk_alu(Offset, Offset, add, hipe_rtl:mk_imm(1)), + hipe_rtl:mk_load(Tmp1, Base, Offset, byte, unsigned), + hipe_rtl:mk_alu(Tmp1, Tmp1, sll, hipe_rtl:mk_imm(8)), + hipe_rtl:mk_alu(Dst, Dst, 'or', Tmp1), + hipe_rtl:mk_alu(Offset, Offset, add, hipe_rtl:mk_imm(1)), + hipe_rtl:mk_load(Tmp1, Base, Offset, byte, unsigned), + hipe_rtl:mk_alu(Tmp1, Tmp1, sll, hipe_rtl:mk_imm(16)), + hipe_rtl:mk_alu(Dst, Dst, 'or', Tmp1), + hipe_rtl:mk_alu(Offset, Offset, add, hipe_rtl:mk_imm(1)), + hipe_rtl:mk_load(Tmp1, Base, Offset, byte, Signedness), + hipe_rtl:mk_alu(Tmp1, Tmp1, sll, hipe_rtl:mk_imm(24)), + hipe_rtl:mk_alu(Dst, Dst, 'or', Tmp1), + hipe_rtl:mk_alu(Offset, Offset, add, hipe_rtl:mk_imm(1))]. + +-ifdef(STORE_4_NEEDED). +store_4(Base, Offset, Src) -> + case get(hipe_target_arch) of + x86 -> + store_4_directly(Base, Offset, Src); + powerpc -> + store_4_directly(Base, Offset, Src); + arm -> + store_big_4_in_pieces(Base, Offset, Src); + ultrasparc -> + store_big_4_in_pieces(Base, Offset, Src); + amd64 -> + store_4_directly(Base, Offset, Src) + end. + +store_4_directly(Base, Offset, Src) -> + [hipe_rtl:mk_store(Base, Offset, Src, int32), + hipe_rtl:mk_alu(Offset, Offset, add, hipe_rtl:mk_imm(4))]. + +store_big_4_in_pieces(Base, Offset, Src) -> + [hipe_rtl:mk_alu(Offset, Offset, add, hipe_rtl:mk_imm(3)), + hipe_rtl:mk_store(Base, Offset, Src, byte), + hipe_rtl:mk_alu(Offset, Offset, sub, hipe_rtl:mk_imm(1)), + hipe_rtl:mk_alu(Src, Src, srl, hipe_rtl:mk_imm(8)), + hipe_rtl:mk_store(Base, Offset, Src, byte), + hipe_rtl:mk_alu(Offset, Offset, sub, hipe_rtl:mk_imm(1)), + hipe_rtl:mk_alu(Src, Src, srl, hipe_rtl:mk_imm(8)), + hipe_rtl:mk_store(Base, Offset, Src, byte), + hipe_rtl:mk_alu(Offset, Offset, sub, hipe_rtl:mk_imm(1)), + hipe_rtl:mk_alu(Src, Src, srl, hipe_rtl:mk_imm(8)), + hipe_rtl:mk_store(Base, Offset, Src, byte), + hipe_rtl:mk_alu(Offset, Offset, add, hipe_rtl:mk_imm(4))]. +-endif. + +%%---------------------------------------------------------------------- +%% Handling of arithmetic -- depends on the size of word. +%%---------------------------------------------------------------------- + +eval_alu(Op, Arg1, Arg2) -> + %% io:format("Evaluated alu: ~w ~w ~w = ",[Arg1, Op, Arg2]), + Res = case word_size() of + 4 -> + hipe_rtl_arith_32:eval_alu(Op, Arg1, Arg2); + 8 -> + hipe_rtl_arith_64:eval_alu(Op, Arg1, Arg2) + end, + %% io:format("~w~n ",[Res]), + Res. + +-ifdef(EVAL_ALUB_NEEDED). +eval_alub(Op, Cond, Arg1, Arg2) -> + %% io:format("Evaluated alub: ~w ~w ~w cond ~w = ",[Arg1, Op, Arg2, Cond]), + Res = case word_size() of + 4 -> + hipe_rtl_arith_32:eval_alub(Op, Cond, Arg1, Arg2); + 8 -> + hipe_rtl_arith_64:eval_alub(Op, Cond, Arg1, Arg2) + end, + %% io:format("~w~n ",[Res]), + Res. +-endif. + +eval_cond(Cond, Arg1, Arg2) -> + %% io:format("Evaluated cond: ~w ~w ~w = ",[Arg1, Cond, Arg2]), + Res = case word_size() of + 4 -> + hipe_rtl_arith_32:eval_cond(Cond, Arg1, Arg2); + 8 -> + hipe_rtl_arith_64:eval_cond(Cond, Arg1, Arg2) + end, + %% io:format("~w~n ",[Res]), + Res. + +eval_cond_bits(Cond, N, Z, V, C) -> + %% io:format("Evaluated cond: ~w ~w ~w = ",[Arg1, Cond, Arg2]), + Res = case word_size() of + 4 -> + hipe_rtl_arith_32:eval_cond_bits(Cond, N, Z, V, C); + 8 -> + hipe_rtl_arith_64:eval_cond_bits(Cond, N, Z, V, C) + end, + %% io:format("~w~n ",[Res]), + Res. + +%%---------------------------------------------------------------------- + +fwait() -> + case get(hipe_target_arch) of + x86 -> [hipe_rtl:mk_call([], 'fwait', [], [], [], not_remote)]; + amd64 -> [hipe_rtl:mk_call([], 'fwait', [], [], [], not_remote)]; + arm -> []; + powerpc -> []; + ultrasparc -> [] + end. + +%% @spec handle_fp_exception() -> [term()] +%% +%% @doc +%% Returns RTL code to restore the FPU after a floating-point exception. +%% @end +handle_fp_exception() -> + case get(hipe_target_arch) of + x86 -> + ContLbl = hipe_rtl:mk_new_label(), + [hipe_rtl:mk_call([], handle_fp_exception, [], + hipe_rtl:label_name(ContLbl), [], not_remote), + ContLbl]; + amd64 -> + ContLbl = hipe_rtl:mk_new_label(), + [hipe_rtl:mk_call([], handle_fp_exception, [], + hipe_rtl:label_name(ContLbl), [], not_remote), + ContLbl]; + arm -> + []; + powerpc -> + []; + ultrasparc -> + [] + end. + +%% +%% PCB accesses. +%% Wrapped to avoid leaking the PCB pointer to the wrong places. +%% + +pcb_load(Dst, Off) -> pcb_load(Dst, Off, word). + +pcb_load(Dst, Off, Size) -> + hipe_rtl:mk_load(Dst, proc_pointer(), hipe_rtl:mk_imm(Off), Size, unsigned). + +pcb_store(Off, Src) -> pcb_store(Off, Src, word). + +pcb_store(Off, Src, Size) -> + hipe_rtl:mk_store(proc_pointer(), hipe_rtl:mk_imm(Off), Src, Size). + +pcb_address(Dst, Off) -> + hipe_rtl:mk_alu(Dst, proc_pointer(), 'add', hipe_rtl:mk_imm(Off)). + +proc_pointer() -> % must not be exported + case get(hipe_target_arch) of + ultrasparc -> + hipe_rtl:mk_reg_gcsafe(hipe_sparc_registers:proc_pointer()); + powerpc -> + hipe_rtl:mk_reg_gcsafe(hipe_ppc_registers:proc_pointer()); + arm -> + hipe_rtl:mk_reg_gcsafe(hipe_arm_registers:proc_pointer()); + x86 -> + hipe_rtl:mk_reg_gcsafe(hipe_x86_registers:proc_pointer()); + amd64 -> + hipe_rtl:mk_reg_gcsafe(hipe_amd64_registers:proc_pointer()) + end. + +%% +%% Special BIF calls. +%% Wrapped to avoid leaking the PCB pointer to the wrong places, +%% and to allow ARCH-specific expansion. +%% + +call_bif(Dst, Name, Args, Cont, Fail) -> + hipe_rtl:mk_call(Dst, Name, Args, Cont, Fail, not_remote). + +nr_of_return_regs() -> + case get(hipe_target_arch) of + ultrasparc -> + 1; + %% hipe_sparc_registers:nr_rets(); + powerpc -> + 1; + %% hipe_ppc_registers:nr_rets(); + arm -> + 1; + x86 -> + hipe_x86_registers:nr_rets(); + amd64 -> + 1 + %% hipe_amd64_registers:nr_rets(); + end. diff --git a/lib/hipe/rtl/hipe_rtl_arith.inc b/lib/hipe/rtl/hipe_rtl_arith.inc new file mode 100644 index 0000000000..31fedd927e --- /dev/null +++ b/lib/hipe/rtl/hipe_rtl_arith.inc @@ -0,0 +1,177 @@ +%% -*- Erlang -*- +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2004-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%%---------------------------------------------------------------------- +%% File : hipe_rtl_arith.inc +%% Created : Feb 2004 +%% Purpose : Implements arithmetic which is parameterized by the size +%% of the word of the target architecture (given as defines). +%%---------------------------------------------------------------------- + + +%% Returns a tuple +%% {Res, Sign, Zero, Overflow, Carry} +%% Res will be a number in the range +%% MAX_SIGNED_INT >= Res >= MIN_SIGNED_INT +%% The other four values are flags that are either true or false +%% +eval_alu(Op, Arg1, Arg2) + when Arg1 =< ?MAX_SIGNED_INT, + Arg1 >= ?MIN_SIGNED_INT, + Arg2 =< ?MAX_SIGNED_INT, + Arg2 >= ?MIN_SIGNED_INT -> + + Sign1 = sign_bit(Arg1), + Sign2 = sign_bit(Arg2), + + case Op of + 'sub' -> + Res = (Arg1 - Arg2) band ?WORDMASK, + N = sign_bit(Res), + Z = zero(Res), + V = (Sign1 and (not Sign2) and (not N)) + or + ((not Sign1) and Sign2 and N), + C = ((not Sign1) and Sign2) + or + (N and ((not Sign1) or Sign2)); + 'add' -> + Res = (Arg1 + Arg2) band ?WORDMASK, + N = sign_bit(Res), + Z = zero(Res), + V = (Sign1 and Sign2 and (not N)) + or + ((not Sign1) and (not Sign2) and N), + C = (Sign1 and Sign2) + or + ((not N) and (Sign1 or Sign2)); + 'mul' -> + FullRes = Arg1 * Arg2, + Res = FullRes band ?WORDMASK, + ResHi = FullRes bsr ?BITS, + N = sign_bit(Res), + Z = zero(Res), + V = (N and (ResHi =/= -1)) or ((not N) and (ResHi =/= 0)), + C = V; + 'sra' -> + Res = (Arg1 bsr Arg2) band ?WORDMASK, + N = sign_bit(Res), + Z = zero(Res), + V = 0, + C = 0; + 'srl' -> + Res = (Arg1 bsr Arg2) band shiftmask(Arg2), + N = sign_bit(Res), + Z = zero(Res), + V = 0, + C = 0; + 'sll' -> + Res = (Arg1 bsl Arg2) band ?WORDMASK, + N = sign_bit(Res), + Z = zero(Res), + V = 0, + C = 0; + 'or' -> + Res = (Arg1 bor Arg2) band ?WORDMASK, + N = sign_bit(Res), + Z = zero(Res), + V = 0, + C = 0; + 'and' -> + Res = (Arg1 band Arg2) band ?WORDMASK, + N = sign_bit(Res), + Z = zero(Res), + V = 0, + C = 0; + 'xor' -> + Res = (Arg1 bxor Arg2) band ?WORDMASK, + N = sign_bit(Res), + Z = zero(Res), + V = 0, + C = 0; + Op -> + Res = N = Z = V = C = 0, + ?EXIT({"unknown alu op", Op}) + end, + {two_comp_to_erl(Res), N, Z, V, C}; +eval_alu(Op, Arg1, Arg2) -> + ?EXIT({argument_overflow,Op,Arg1,Arg2}). + +%% Björn & Bjarni: +%% We need to be able to do evaluations based only on the bits, since +%% there are cases where we can evaluate a subset of the bits, but can +%% not do a full eval-alub call (eg. a + 0 gives no carry) +%% +-spec eval_cond_bits(atom(), boolean(), boolean(), boolean(), boolean()) -> boolean(). + +eval_cond_bits(Cond, N, Z, V, C) -> + case Cond of + 'eq' -> + Z; + 'ne' -> + not Z; + 'gt' -> + not (Z or (N xor V)); + 'gtu' -> + not (C or Z); + 'ge' -> + not (N xor V); + 'geu'-> + not C; + 'lt' -> + N xor V; + 'ltu'-> + C; + 'le' -> + Z or (N xor V); + 'leu'-> + C or Z; + 'overflow' -> + V; + 'not_overflow' -> + not V; + _ -> + ?EXIT({'condition code not handled',Cond}) + end. + +eval_alub(Op, Cond, Arg1, Arg2) -> + {Res, N, Z, V, C} = eval_alu(Op, Arg1, Arg2), + {Res, eval_cond_bits(Cond, N, Z, V, C)}. + +eval_cond(Cond, Arg1, Arg2) -> + {_, Bool} = eval_alub('sub', Cond, Arg1, Arg2), + Bool. + +sign_bit(Val) -> + ((Val bsr ?SIGN_BIT) band 1) =:= 1. + +two_comp_to_erl(V) -> + if V > ?MAX_SIGNED_INT -> + - ((?MAX_UNSIGNED_INT + 1) - V); + true -> V + end. + +shiftmask(Arg) -> + Setbits = ?BITS - Arg, + (1 bsl Setbits) - 1. + +zero(Val) -> + Val =:= 0. + diff --git a/lib/hipe/rtl/hipe_rtl_arith_32.erl b/lib/hipe/rtl/hipe_rtl_arith_32.erl new file mode 100644 index 0000000000..a8a6043cda --- /dev/null +++ b/lib/hipe/rtl/hipe_rtl_arith_32.erl @@ -0,0 +1,50 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2003-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Copyright (c) 2002 by Erik Johansson. +%% ==================================================================== +%% Filename : hipe_rtl_arith_32.erl +%% Module : hipe_rtl_arith_32 +%% Purpose : To implement 32-bit RTL-arithmetic +%% Notes : The arithmetic works on 32-bit signed integers. +%% The implementation is taken from the implementation +%% of arithmetic on SPARC. +%% XXX: This code is seldom used, and hence also +%% seldom tested. +%% Look here for strange bugs appearing when +%% turning on rtl_prop. +%% +%% History : * 2002-10-23 Erik Stenman (happi@it.uu.se): Created. +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +-module(hipe_rtl_arith_32). + +-export([eval_alu/3, eval_alub/4, eval_cond/3, eval_cond_bits/5]). + +-define(BITS, 32). +-define(SIGN_BIT, 31). +-define(WORDMASK, 16#ffffffff). +-define(MAX_SIGNED_INT, 16#7fffffff). +-define(MIN_SIGNED_INT, -16#80000000). +-define(MAX_UNSIGNED_INT, 16#ffffffff). + +-include("../main/hipe.hrl"). %% for ?EXIT + +-include("hipe_rtl_arith.inc"). diff --git a/lib/hipe/rtl/hipe_rtl_arith_64.erl b/lib/hipe/rtl/hipe_rtl_arith_64.erl new file mode 100644 index 0000000000..d0d576b65e --- /dev/null +++ b/lib/hipe/rtl/hipe_rtl_arith_64.erl @@ -0,0 +1,38 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2004-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%%---------------------------------------------------------------------- +%% File : hipe_rtl_arith_64.erl +%% Created : Feb 2004 +%% Purpose : Implements arithmetic for 64-bit target architectures. +%%---------------------------------------------------------------------- + +-module(hipe_rtl_arith_64). +-export([eval_alu/3, eval_alub/4, eval_cond/3, eval_cond_bits/5]). + +-define(BITS, 64). +-define(SIGN_BIT, 63). +-define(WORDMASK, 16#ffffffffffffffff). +-define(MAX_SIGNED_INT, 16#7fffffffffffffff). +-define(MIN_SIGNED_INT, -16#8000000000000000). +-define(MAX_UNSIGNED_INT,16#ffffffffffffffff). + +-include("../main/hipe.hrl"). %% for ?EXIT + +-include("hipe_rtl_arith.inc"). diff --git a/lib/hipe/rtl/hipe_rtl_binary.erl b/lib/hipe/rtl/hipe_rtl_binary.erl new file mode 100644 index 0000000000..5ea51acedb --- /dev/null +++ b/lib/hipe/rtl/hipe_rtl_binary.erl @@ -0,0 +1,80 @@ +%%% +%%% %CopyrightBegin% +%%% +%%% Copyright Ericsson AB 2006-2009. All Rights Reserved. +%%% +%%% The contents of this file are subject to the Erlang Public License, +%%% Version 1.1, (the "License"); you may not use this file except in +%%% compliance with the License. You should have received a copy of the +%%% Erlang Public License along with this software. If not, it can be +%%% retrieved online at http://www.erlang.org/. +%%% +%%% Software distributed under the License is distributed on an "AS IS" +%%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%%% the License for the specific language governing rights and limitations +%%% under the License. +%%% +%%% %CopyrightEnd% +%%% +%%%------------------------------------------------------------------- +%%% File : hipe_rtl_binary_2.erl +%%% Author : Per Gustafsson +%%% Description : +%%% +%%% Created : 5 Mar 2007 by Per Gustafsson +%%%------------------------------------------------------------------- +-module(hipe_rtl_binary). + +-export([gen_rtl/7]). + +gen_rtl(BsOP, Dst, Args, TrueLblName, FalseLblName, SysLimName, ConstTab) -> + case type_of_operation(BsOP) of + match -> + {hipe_rtl_binary_match:gen_rtl( + BsOP, Dst, Args, TrueLblName, FalseLblName),ConstTab}; + construct -> + hipe_rtl_binary_construct:gen_rtl( + BsOP, Dst, Args, TrueLblName, FalseLblName, SysLimName, ConstTab) + end. + +type_of_operation({bs_start_match,_}) -> match; +type_of_operation({{bs_start_match,_},_}) -> match; +type_of_operation({bs_get_binary,_,_}) -> match; +type_of_operation({bs_get_binary_all,_,_}) -> match; +type_of_operation({bs_get_binary_all_2,_,_}) -> match; +type_of_operation({bs_get_integer,_,_}) -> match; +type_of_operation({bs_get_float,_,_}) -> match; +type_of_operation({bs_skip_bits,_}) -> match; +type_of_operation({bs_skip_bits_all,_,_}) -> match; +type_of_operation({bs_test_tail,_}) -> match; +type_of_operation({bs_restore,_}) -> match; +type_of_operation({bs_save,_}) -> match; +type_of_operation({bs_test_unit,_}) -> match; +type_of_operation({bs_match_string,_,_}) -> match; +type_of_operation(bs_context_to_binary) -> match; +type_of_operation({bs_add,_}) -> construct; +type_of_operation({bs_add,_,_}) -> construct; +type_of_operation(bs_bits_to_bytes) -> construct; +type_of_operation(bs_bits_to_bytes2) -> construct; +type_of_operation({bs_init,_}) -> construct; +type_of_operation({bs_init,_,_}) -> construct; +type_of_operation({bs_init_bits,_}) -> construct; +type_of_operation({bs_init_bits,_,_}) -> construct; +type_of_operation({bs_put_binary,_,_}) -> construct; +type_of_operation({bs_put_binary_all,_}) -> construct; +type_of_operation({bs_put_float,_,_,_}) -> construct; +type_of_operation({bs_put_integer,_,_,_}) -> construct; +type_of_operation({bs_put_string,_,_}) -> construct; +type_of_operation({unsafe_bs_put_integer,_,_,_}) -> construct; +type_of_operation(bs_utf8_size) -> construct; +type_of_operation(bs_put_utf8) -> construct; +type_of_operation(bs_get_utf8) -> match; +type_of_operation(bs_utf16_size) -> construct; +type_of_operation({bs_put_utf16,_}) -> construct; +type_of_operation({bs_get_utf16,_}) -> match; +type_of_operation(bs_validate_unicode) -> construct; +type_of_operation(bs_validate_unicode_retract) -> match; +type_of_operation(bs_final) -> construct; +type_of_operation({bs_append,_,_,_,_}) -> construct; +type_of_operation({bs_private_append,_,_}) -> construct; +type_of_operation(bs_init_writable) -> construct. diff --git a/lib/hipe/rtl/hipe_rtl_binary_construct.erl b/lib/hipe/rtl/hipe_rtl_binary_construct.erl new file mode 100644 index 0000000000..29993b9715 --- /dev/null +++ b/lib/hipe/rtl/hipe_rtl_binary_construct.erl @@ -0,0 +1,1363 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2007-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%% ==================================================================== +%% Module : hipe_rtl_inline_bs_ops +%% Purpose : +%% Notes : +%% History : * 2001-06-14 Erik Johansson (happi@it.uu.se): Created. +%% ==================================================================== +%% Exports : +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +-module(hipe_rtl_binary_construct). +-export([gen_rtl/7]). +-import(hipe_tagscheme, [set_field_from_term/3, + get_field_from_term/3, + set_field_from_pointer/3, + get_field_from_pointer/3]). +%%------------------------------------------------------------------------- + +-include("../main/hipe.hrl"). +-include("hipe_rtl.hrl"). +-include("hipe_literals.hrl"). + +-define(BYTE_SHIFT, hipe_rtl:mk_imm(3)). %% Turn bits into bytes or vice versa +-define(LOW_BITS, hipe_rtl:mk_imm(7)). %% Three lowest bits set +-define(LOW_BITS_INT, 7). +-define(BYTE_SIZE, 8). +-define(MAX_BINSIZE, ((1 bsl ((hipe_rtl_arch:word_size()*?BYTE_SIZE)-3)) - 1)). + + +%% ------------------------------------------------------------------------- +%% The code is generated as a list of lists, it will be flattened later. +%% + +gen_rtl(BsOP, Dst, Args, TrueLblName, FalseLblName, SystemLimitLblName, ConstTab) -> + %%io:format("~w, ~w, ~w~n", [BsOP, Args, Dst]), + case BsOP of + {bs_put_string, String, SizeInBytes} -> + [NewOffset] = get_real(Dst), + [Base, Offset] = Args, + put_string(NewOffset, ConstTab, String, SizeInBytes, Base, Offset, + TrueLblName); + _ -> + Code = + case BsOP of + {bs_init, Size, _Flags} -> + [] = Args, + [Dst0, Base, Offset] = Dst, + case is_illegal_const(Size bsl 3) of + true -> + hipe_rtl:mk_goto(SystemLimitLblName); + false -> + const_init2(Size, Dst0, Base, Offset, TrueLblName) + end; + + {bs_init, _Flags} -> + [Size] = Args, + [Dst0, Base, Offset] = Dst, + var_init2(Size, Dst0, Base, Offset, TrueLblName, + SystemLimitLblName, FalseLblName); + + {bs_init_bits, Size, _Flags} -> + [] = Args, + [Dst0, Base, Offset] = Dst, + case is_illegal_const(Size) of + true -> + hipe_rtl:mk_goto(SystemLimitLblName); + false -> + const_init_bits(Size, Dst0, Base, Offset, TrueLblName) + end; + + {bs_init_bits, _Flags} -> + [Size] = Args, + [Dst0, Base, Offset] = Dst, + var_init_bits(Size, Dst0, Base, Offset, TrueLblName, + SystemLimitLblName, FalseLblName); + + {bs_put_binary_all, _Flags} -> + [Src, Base, Offset] = Args, + [NewOffset] = get_real(Dst), + put_binary_all(NewOffset, Src, Base, Offset, TrueLblName, FalseLblName); + + {bs_put_binary, Size, _Flags} -> + case is_illegal_const(Size) of + true -> + [hipe_rtl:mk_goto(FalseLblName)]; + false -> + [NewOffset] = get_real(Dst), + case Args of + [Src, Base, Offset] -> + put_static_binary(NewOffset, Src, Size, Base, Offset, + TrueLblName, FalseLblName); + [Src, Bits, Base, Offset] -> + {SizeCode, SizeReg} = make_size(Size, Bits, FalseLblName), + InCode = put_dynamic_binary(NewOffset, Src, SizeReg, Base, + Offset, TrueLblName, FalseLblName), + SizeCode ++ InCode + end + end; + + {bs_put_float, Size, Flags, ConstInfo} -> + [NewOffset] = get_real(Dst), + Aligned = aligned(Flags), + LittleEndian = littleendian(Flags), + case is_illegal_const(Size) of + true -> + [hipe_rtl:mk_goto(FalseLblName)]; + false -> + case Args of + [Src, Base, Offset] -> + CCode = static_float_c_code(NewOffset, Src, Base, Offset, Size, Flags, + TrueLblName, FalseLblName), + put_float(NewOffset, Src, Base, Offset, Size, CCode, Aligned, + LittleEndian, ConstInfo, TrueLblName); + [Src, Bits, Base, Offset] -> + {SizeCode, SizeReg} = make_size(Size, Bits, FalseLblName), + InCode = float_c_code(NewOffset, Src, Base, Offset, SizeReg, + Flags, TrueLblName, FalseLblName), + SizeCode ++ InCode + end + end; + + {bs_put_integer, Size, Flags, ConstInfo} -> + Aligned = aligned(Flags), + LittleEndian = littleendian(Flags), + [NewOffset] = get_real(Dst), + case is_illegal_const(Size) of + true -> + [hipe_rtl:mk_goto(FalseLblName)]; + false -> + case ConstInfo of + fail -> + [hipe_rtl:mk_goto(FalseLblName)]; + _ -> + case Args of + [Src, Base, Offset] -> + CCode = static_int_c_code(NewOffset, Src, + Base, Offset, Size, + Flags, TrueLblName, + FalseLblName), + put_static_int(NewOffset, Src, Base, Offset, Size, + CCode, Aligned, LittleEndian, TrueLblName); + [Src, Bits, Base, Offset] -> + {SizeCode, SizeReg} = make_size(Size, Bits, + FalseLblName), + CCode = int_c_code(NewOffset, Src, Base, + Offset, SizeReg, Flags, + TrueLblName, FalseLblName), + InCode = + put_dynamic_int(NewOffset, Src, Base, Offset, + SizeReg, CCode, Aligned, + LittleEndian, TrueLblName), + SizeCode ++ InCode + end + end + end; + + {unsafe_bs_put_integer, 0, _Flags, _ConstInfo} -> + [NewOffset] = get_real(Dst), + case Args of + [_Src, _Base, Offset] -> + [hipe_rtl:mk_move(NewOffset,Offset), + hipe_rtl:mk_goto(TrueLblName)]; + [_Src, _Bits, _Base, Offset] -> + [hipe_rtl:mk_move(NewOffset,Offset), + hipe_rtl:mk_goto(TrueLblName)] + end; + + {unsafe_bs_put_integer, Size, Flags, ConstInfo} -> + case is_illegal_const(Size) of + true -> + [hipe_rtl:mk_goto(FalseLblName)]; + false -> + Aligned = aligned(Flags), + LittleEndian = littleendian(Flags), + [NewOffset] = get_real(Dst), + case ConstInfo of + fail -> + [hipe_rtl:mk_goto(FalseLblName)]; + _ -> + case Args of + [Src, Base, Offset] -> + CCode = static_int_c_code(NewOffset, Src, + Base, Offset, Size, + Flags, TrueLblName, + FalseLblName), + put_unsafe_static_int(NewOffset, Src, Base, + Offset, Size, + CCode, Aligned, LittleEndian, + TrueLblName); + [Src, Bits, Base, Offset] -> + {SizeCode, SizeReg} = make_size(Size, Bits, + FalseLblName), + CCode = int_c_code(NewOffset, Src, Base, + Offset, SizeReg, Flags, + TrueLblName, FalseLblName), + InCode = + put_unsafe_dynamic_int(NewOffset, Src, Base, + Offset, SizeReg, CCode, + Aligned, LittleEndian, + TrueLblName), + SizeCode ++ InCode + end + end + end; + + bs_utf8_size -> + case Dst of + [_DstVar] -> + [_Arg] = Args, + [hipe_rtl:mk_call(Dst, bs_utf8_size, Args, + TrueLblName, [], not_remote)]; + [] -> + [hipe_rtl:mk_goto(TrueLblName)] + end; + + bs_put_utf8 -> + [_Src, _Base, _Offset] = Args, + NewDsts = get_real(Dst), + [hipe_rtl:mk_call(NewDsts, bs_put_utf8, Args, + TrueLblName, FalseLblName, not_remote)]; + + bs_utf16_size -> + case Dst of + [_DstVar] -> + [_Arg] = Args, + [hipe_rtl:mk_call(Dst, bs_utf16_size, Args, + TrueLblName, [], not_remote)]; + [] -> + [hipe_rtl:mk_goto(TrueLblName)] + end; + + {bs_put_utf16, Flags} -> + [_Src, _Base, _Offset] = Args, + NewDsts = get_real(Dst), + PrimOp = % workaround for bif/primop arity restrictions + case littleendian(Flags) of + false -> bs_put_utf16be; + true -> bs_put_utf16le + end, + [hipe_rtl:mk_call(NewDsts, PrimOp, Args, + TrueLblName, FalseLblName, not_remote)]; + + bs_validate_unicode -> + [_Arg] = Args, + [hipe_rtl:mk_call([], bs_validate_unicode, Args, + TrueLblName, FalseLblName, not_remote)]; + + bs_final -> + Zero = hipe_rtl:mk_imm(0), + [Src, Offset] = Args, + [BitSize, ByteSize] = create_regs(2), + [ShortLbl, LongLbl] = create_lbls(2), + case Dst of + [DstVar] -> + [hipe_rtl:mk_alub(BitSize, Offset, 'and', ?LOW_BITS, eq, + hipe_rtl:label_name(ShortLbl), + hipe_rtl:label_name(LongLbl)), ShortLbl, + hipe_rtl:mk_move(DstVar, Src), + hipe_rtl:mk_goto(TrueLblName), + LongLbl, + hipe_rtl:mk_alu(ByteSize, Offset, 'srl', ?BYTE_SHIFT), + hipe_tagscheme:mk_sub_binary(DstVar, ByteSize, + Zero, BitSize, Zero, Src), + hipe_rtl:mk_goto(TrueLblName)]; + [] -> + [hipe_rtl:mk_goto(TrueLblName)] + end; + + bs_init_writable -> + Zero = hipe_rtl:mk_imm(0), + [Size] = Args, + [DstVar] = Dst, + [SizeReg] = create_regs(1), + [Base] = create_unsafe_regs(1), + [hipe_rtl:mk_gctest(?PROC_BIN_WORDSIZE + ?SUB_BIN_WORDSIZE), + check_and_untag_fixnum(Size, SizeReg, FalseLblName), + allocate_writable(DstVar, Base, SizeReg, Zero, Zero), + hipe_rtl:mk_goto(TrueLblName)]; + + {bs_private_append, _U, _F} -> + [Size, Bin] = Args, + [DstVar, Base, Offset] = Dst, + [ProcBin] = create_vars(1), + [SubSize, SizeReg, EndSubSize, EndSubBitSize] = create_regs(4), + SubBinSize = {sub_binary, binsize}, + [get_field_from_term({sub_binary, orig}, Bin, ProcBin), + get_field_from_term(SubBinSize, Bin, SubSize), + check_and_untag_fixnum(Size, SizeReg, FalseLblName), + realloc_binary(SizeReg, ProcBin, Base), + calculate_sizes(Bin, SizeReg, Offset, EndSubSize, EndSubBitSize), + set_field_from_term(SubBinSize, Bin, EndSubSize), + set_field_from_term({sub_binary, bitsize}, Bin, EndSubBitSize), + hipe_rtl:mk_move(DstVar, Bin), + hipe_rtl:mk_goto(TrueLblName)]; + + {bs_append, _U, _F, _B, _Bla} -> + [Size, Bin] = Args, + [DstVar, Base, Offset] = Dst, + [ProcBin] = create_vars(1), + [Flags, SizeReg, IsWritable, EndSubSize, EndSubBitSize] = + create_regs(5), + [ContLbl,ContLbl2,ContLbl3,WritableLbl,NotWritableLbl] = Lbls = + create_lbls(5), + [ContLblName, ContLbl2Name, ContLbl3Name, Writable, NotWritable] = + [hipe_rtl:label_name(Lbl) || Lbl <- Lbls], + Zero = hipe_rtl:mk_imm(0), + SubIsWritable = {sub_binary, is_writable}, + [hipe_rtl:mk_gctest(?SUB_BIN_WORDSIZE + ?PROC_BIN_WORDSIZE), + check_and_untag_fixnum(Size, SizeReg, FalseLblName), + hipe_tagscheme:test_bitstr(Bin, ContLblName, FalseLblName, 0.99), + ContLbl, + hipe_tagscheme:test_subbinary(Bin,ContLbl2Name, NotWritable), + ContLbl2, + get_field_from_term(SubIsWritable, Bin, IsWritable), + hipe_rtl:mk_branch(IsWritable, 'ne', Zero, + ContLbl3Name, NotWritable), + ContLbl3, + get_field_from_term({sub_binary, orig}, Bin, ProcBin), + get_field_from_term({proc_bin, flags}, ProcBin, Flags), + hipe_rtl:mk_alub(Flags, Flags, 'and', + hipe_rtl:mk_imm(?PB_IS_WRITABLE), + eq, NotWritable, Writable, 0.01), + WritableLbl, + set_field_from_term(SubIsWritable, Bin, Zero), + realloc_binary(SizeReg, ProcBin, Base), + calculate_sizes(Bin, SizeReg, Offset, EndSubSize, EndSubBitSize), + hipe_tagscheme:mk_sub_binary(DstVar, EndSubSize, Zero, + EndSubBitSize, Zero, + hipe_rtl:mk_imm(1), ProcBin), + hipe_rtl:mk_goto(TrueLblName), + NotWritableLbl, + not_writable_code(Bin, SizeReg, DstVar, Base, Offset, + TrueLblName, FalseLblName)] + end, + {Code, ConstTab} + end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% Code that is used in the append and init writeable functions +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +not_writable_code(Bin, SizeReg, Dst, Base, Offset, + TrueLblName, FalseLblName) -> + [SrcBase] = create_unsafe_regs(1), + [SrcOffset, SrcSize, TotSize, TotBytes, UsedBytes] = create_regs(5), + [IncLbl,AllLbl] = Lbls = create_lbls(2), + [IncLblName,AllLblName] = get_label_names(Lbls), + [get_base_offset_size(Bin, SrcBase, SrcOffset, SrcSize, FalseLblName), + hipe_rtl:mk_alu(TotSize, SrcSize, add, SizeReg), + hipe_rtl:mk_alu(TotBytes, TotSize, add, ?LOW_BITS), + hipe_rtl:mk_alu(TotBytes, TotBytes, srl, ?BYTE_SHIFT), + hipe_rtl:mk_alu(UsedBytes, TotBytes, sll, hipe_rtl:mk_imm(1)), + hipe_rtl:mk_branch(UsedBytes, ge, hipe_rtl:mk_imm(256), + AllLblName, IncLblName), + IncLbl, + hipe_rtl:mk_move(UsedBytes, hipe_rtl:mk_imm(256)), + AllLbl, + allocate_writable(Dst, Base, UsedBytes, TotBytes, TotSize), + put_binary_all(Offset, Bin, Base, hipe_rtl:mk_imm(0), + TrueLblName, FalseLblName)]. + +allocate_writable(Dst, Base, UsedBytes, TotBytes, TotSize) -> + Zero = hipe_rtl:mk_imm(0), + [NextLbl] = create_lbls(1), + [EndSubSize, EndSubBitSize, ProcBin] = create_regs(3), + [hipe_rtl:mk_call([Base], bs_allocate, [UsedBytes], + hipe_rtl:label_name(NextLbl), [], not_remote), + NextLbl, + hipe_tagscheme:create_refc_binary(Base, TotBytes, + hipe_rtl:mk_imm(?PB_IS_WRITABLE bor + ?PB_ACTIVE_WRITER), + ProcBin), + hipe_rtl:mk_alu(EndSubSize, TotSize, srl, ?BYTE_SHIFT), + hipe_rtl:mk_alu(EndSubBitSize, TotSize, 'and', ?LOW_BITS), + hipe_tagscheme:mk_sub_binary(Dst, EndSubSize, Zero, EndSubBitSize, + Zero, hipe_rtl:mk_imm(1), ProcBin)]. + +check_and_untag_fixnum(Size, SizeReg, FalseLblName) -> + [ContLbl,NextLbl] = Lbls = create_lbls(2), + [ContLblName,NextLblName] = get_label_names(Lbls), + [hipe_tagscheme:test_fixnum(Size, ContLblName, FalseLblName, 0.99), + ContLbl, + hipe_tagscheme:untag_fixnum(SizeReg,Size), + hipe_rtl:mk_branch(SizeReg, ge, hipe_rtl:mk_imm(0), NextLblName, + FalseLblName), + NextLbl]. + +realloc_binary(SizeReg, ProcBin, Base) -> + [NoReallocLbl, ReallocLbl, NextLbl, ContLbl] = Lbls = create_lbls(4), + [NoReallocLblName, ReallocLblName, NextLblName, ContLblName] = + [hipe_rtl:label_name(Lbl) || Lbl <- Lbls], + [PBSize, Tmp, ByteSize, NewSize, Flags, ResultingSize, OrigSize, + BinPointer] = create_regs(8), + ProcBinSizeTag = {proc_bin, binsize}, + ProcBinFlagsTag = {proc_bin, flags}, + ProcBinValTag = {proc_bin, val}, + ProcBinBytesTag = {proc_bin, bytes}, + BinOrigSizeTag = {binary, orig_size}, + [get_field_from_term(ProcBinSizeTag, ProcBin, PBSize), + hipe_rtl:mk_alu(Tmp, SizeReg, 'add', ?LOW_BITS), + hipe_rtl:mk_alu(ByteSize, Tmp, 'srl', ?BYTE_SHIFT), + hipe_rtl:mk_alu(ResultingSize, ByteSize, 'add', PBSize), + set_field_from_term(ProcBinSizeTag, ProcBin, ResultingSize), + get_field_from_term(ProcBinFlagsTag, ProcBin, Flags), + hipe_rtl:mk_alu(Flags, Flags, 'or', hipe_rtl:mk_imm(?PB_ACTIVE_WRITER)), + set_field_from_term(ProcBinFlagsTag, ProcBin, Flags), + get_field_from_term(ProcBinValTag, ProcBin, BinPointer), + get_field_from_pointer(BinOrigSizeTag, BinPointer, OrigSize), + hipe_rtl:mk_branch(OrigSize, 'lt', ResultingSize, + ReallocLblName, NoReallocLblName), + NoReallocLbl, + get_field_from_term(ProcBinBytesTag, ProcBin, Base), + hipe_rtl:mk_goto(ContLblName), + ReallocLbl, + hipe_rtl:mk_alu(NewSize, ResultingSize, 'sll', hipe_rtl:mk_imm(1)), + hipe_rtl:mk_call([BinPointer], bs_reallocate, [BinPointer, NewSize], + NextLblName, [], not_remote), + NextLbl, + set_field_from_pointer(BinOrigSizeTag, BinPointer, NewSize), + set_field_from_term(ProcBinValTag, ProcBin, BinPointer), + hipe_tagscheme:extract_binary_bytes(BinPointer, Base), + set_field_from_term(ProcBinBytesTag, ProcBin, Base), + ContLbl]. + +calculate_sizes(Bin, SizeReg, Offset, EndSubSize, EndSubBitSize) -> + [SubSize, SubBitSize, EndSize] = create_regs(3), + [get_field_from_term({sub_binary, binsize}, Bin, SubSize), + get_field_from_term({sub_binary, bitsize}, Bin, SubBitSize), + hipe_rtl:mk_alu(Offset, SubSize, 'sll', ?BYTE_SHIFT), + hipe_rtl:mk_alu(Offset, Offset, 'add', SubBitSize), + hipe_rtl:mk_alu(EndSize, Offset, 'add', SizeReg), + hipe_rtl:mk_alu(EndSubSize, EndSize, srl, ?BYTE_SHIFT), + hipe_rtl:mk_alu(EndSubBitSize, EndSize, 'and', ?LOW_BITS)]. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% Code that is used to create calls to beam functions +%% +%% X_c_code/8, used for putting terms into binaries +%% +%% X_get_c_code/10, used for getting terms from binaries +%% +%% - gen_test_sideffect_bs_call/4 is used to make a C-call that might +%% fail but doesn't return an erlang value. +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +static_float_c_code(NewOffset, Src, Base, Offset, Size, Flags, + TrueLblName, FalseLblName) -> + [SizeReg] = create_regs(1), + [hipe_rtl:mk_move(SizeReg, hipe_rtl:mk_imm(Size))| + float_c_code(NewOffset, Src, Base, Offset, SizeReg, Flags, + TrueLblName, FalseLblName)]. + +float_c_code(NewOffset, Src, Base, Offset, SizeReg, Flags, + TrueLblName, FalseLblName) -> + put_c_code(bs_put_small_float, NewOffset, Src, Base, Offset, SizeReg, + Flags, TrueLblName, FalseLblName). + +static_int_c_code(NewOffset, Src, Base, Offset, Size, Flags, + TrueLblName, FalseLblName) -> + [SizeReg] = create_regs(1), + [hipe_rtl:mk_move(SizeReg, hipe_rtl:mk_imm(Size))| + int_c_code(NewOffset, Src, Base, Offset, SizeReg, Flags, + TrueLblName, FalseLblName)]. + +int_c_code(NewOffset, Src, Base, Offset, SizeReg, Flags, + TrueLblName, FalseLblName) -> + put_c_code(bs_put_big_integer, NewOffset, Src, Base, Offset, SizeReg, + Flags, TrueLblName, FalseLblName). + +binary_c_code(NewOffset, Src, Base, Offset, Size, TrueLblName) -> + PassedLbl = hipe_rtl:mk_new_label(), + [SizeReg, FlagsReg] = create_regs(2), + [hipe_rtl:mk_move(FlagsReg, hipe_rtl:mk_imm(0)), + hipe_rtl:mk_move(SizeReg, Size), + hipe_rtl:mk_call([], bs_put_bits, [Src, SizeReg, Base, Offset, FlagsReg], + hipe_rtl:label_name(PassedLbl),[],not_remote), + PassedLbl, + hipe_rtl:mk_alu(NewOffset, Offset, add, SizeReg), + hipe_rtl:mk_goto(TrueLblName)]. + +put_c_code(Func, NewOffset, Src, Base, Offset, SizeReg, Flags, + TrueLblName, FalseLblName) -> + PassedLbl = hipe_rtl:mk_new_label(), + [FlagsReg] = create_regs(1), + [hipe_rtl:mk_move(FlagsReg, hipe_rtl:mk_imm(Flags)), + gen_test_sideffect_bs_call(Func, [Src, SizeReg, Base, Offset, FlagsReg], + hipe_rtl:label_name(PassedLbl), FalseLblName), + PassedLbl, + hipe_rtl:mk_alu(NewOffset, Offset, add, SizeReg), + hipe_rtl:mk_goto(TrueLblName)]. + +gen_test_sideffect_bs_call(Name, Args, TrueLblName, FalseLblName) -> + [Tmp1] = create_regs(1), + RetLbl = hipe_rtl:mk_new_label(), + [hipe_rtl:mk_call([Tmp1], Name, Args, + hipe_rtl:label_name(RetLbl), [], not_remote), + RetLbl, + hipe_rtl:mk_branch(Tmp1, eq, hipe_rtl:mk_imm(0), + FalseLblName, TrueLblName, 0.01)]. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% Small utility functions: +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +create_regs(X) when X > 0 -> + [hipe_rtl:mk_new_reg_gcsafe()|create_regs(X-1)]; +create_regs(0) -> + []. + +create_unsafe_regs(X) when X > 0 -> + [hipe_rtl:mk_new_reg()|create_unsafe_regs(X-1)]; +create_unsafe_regs(0) -> + []. + +create_vars(X) when X > 0 -> + [hipe_rtl:mk_new_var()|create_vars(X-1)]; +create_vars(0) -> + []. + +create_lbls(X) when X > 0 -> + [hipe_rtl:mk_new_label()|create_lbls(X-1)]; +create_lbls(0) -> + []. + +get_label_names(Lbls) -> + [hipe_rtl:label_name(Lbl) || Lbl <- Lbls]. + +aligned(Flags) -> + case Flags band ?BSF_ALIGNED of + 1 -> true; + 0 -> false + end. + +littleendian(Flags) -> + case Flags band 2 of + 2 -> true; + 0 -> false + end. + +is_illegal_const(Const) -> + Const >= (1 bsl (hipe_rtl_arch:word_size() * ?BYTE_SIZE)) orelse Const < 0. + +get_real(Dst) -> + case Dst of + [_NewOffset] -> Dst; + [] -> create_regs(1) + end. + +%%----------------------------------------------------------------------------- +%% Help functions implementing the bs operations in rtl code. +%% +%% The following functions are called from the translation switch: +%% +%% - put_string/7 creates code to copy a string to a binary +%% starting at base+offset and ending at base+newoffset +%% +%% - const_init2/6 initializes the creation of a binary of constant size +%% +%% - var_init2/6 initializes the creation of a binary of variable size +%% +%% - get_int_from_unaligned_bin/11 creates code to extract a fixed +%% size integer from a binary or makes a c-call if it does not +%% conform to some certain rules. +%% +%% - get_unknown_size_int/11 creates code to extract a variable size +%% byte-aligned integer from a binary or makes a c-call if it +%% does not conform to some certain rules. +%% +%% - skip_no_of_bits/5 creates code to skip a variable amount of bits +%% in a binary. +%% +%% - load_match_buffer/7 reloads the C-matchbuffer to RTL registers. +%% +%% - expand_runtime/4 creates code that calculates a maximal heap need +%% before a binary match +%%----------------------------------------------------------------------------- + +put_string(NewOffset, ConstTab, String, SizeInBytes, Base, Offset, TLName) -> + [StringBase] = create_regs(1), + {NewTab, Lbl} = hipe_consttab:insert_block(ConstTab, byte, String), + {[hipe_rtl:mk_load_address(StringBase, Lbl, constant)| + copy_string(StringBase, SizeInBytes, Base, Offset, + NewOffset, TLName)], + NewTab}. + +const_init2(Size, Dst, Base, Offset, TrueLblName) -> + Log2WordSize = hipe_rtl_arch:log2_word_size(), + WordSize = hipe_rtl_arch:word_size(), + NextLbl = hipe_rtl:mk_new_label(), + case Size =< ?MAX_HEAP_BIN_SIZE of + true -> + [hipe_rtl:mk_gctest(((Size + 3*WordSize-1) bsr Log2WordSize)+?SUB_BIN_WORDSIZE), + hipe_tagscheme:create_heap_binary(Base, Size, Dst), + hipe_rtl:mk_move(Offset, hipe_rtl:mk_imm(0)), + hipe_rtl:mk_goto(TrueLblName)]; + false -> + ByteSize = hipe_rtl:mk_new_reg(), + [hipe_rtl:mk_gctest(?PROC_BIN_WORDSIZE+?SUB_BIN_WORDSIZE), + hipe_rtl:mk_move(Offset, hipe_rtl:mk_imm(0)), + hipe_rtl:mk_move(ByteSize, hipe_rtl:mk_imm(Size)), + hipe_rtl:mk_call([Base], bs_allocate, [ByteSize], + hipe_rtl:label_name(NextLbl), [], not_remote), + NextLbl, + hipe_tagscheme:create_refc_binary(Base, ByteSize, Dst), + hipe_rtl:mk_goto(TrueLblName)] + end. + +const_init_bits(Size, Dst, Base, Offset, TrueLblName) -> + Log2WordSize = hipe_rtl_arch:log2_word_size(), + WordSize = hipe_rtl_arch:word_size(), + [NextLbl] = create_lbls(1), + TmpDst = hipe_rtl:mk_new_var(), + Zero = hipe_rtl:mk_imm(0), + {ExtraSpace, SubBinCode} = + if (Size rem ?BYTE_SIZE) =:= 0 -> + {0,[hipe_rtl:mk_move(Dst, TmpDst)]}; + true -> + {?SUB_BIN_WORDSIZE, + hipe_tagscheme:mk_sub_binary(Dst, hipe_rtl:mk_imm(Size bsr 3), Zero, + hipe_rtl:mk_imm(Size band ?LOW_BITS_INT), + Zero, TmpDst)} + end, + BaseBinCode = + if Size =< (?MAX_HEAP_BIN_SIZE * 8) -> + ByteSize = (Size + 7) div 8, + [hipe_rtl:mk_gctest(((ByteSize+ 3*WordSize-1) bsr Log2WordSize)+ ExtraSpace), + hipe_tagscheme:create_heap_binary(Base, ByteSize, TmpDst), + hipe_rtl:mk_move(Offset, Zero)]; + true -> + ByteSize = hipe_rtl:mk_new_reg(), + [hipe_rtl:mk_gctest(?PROC_BIN_WORDSIZE+ExtraSpace), + hipe_rtl:mk_move(Offset, Zero), + hipe_rtl:mk_move(ByteSize, hipe_rtl:mk_imm((Size+7) bsr 3)), + hipe_rtl:mk_call([Base], bs_allocate, [ByteSize], + hipe_rtl:label_name(NextLbl),[],not_remote), + NextLbl, + hipe_tagscheme:create_refc_binary(Base, ByteSize, TmpDst)] + end, + [BaseBinCode, SubBinCode, hipe_rtl:mk_goto(TrueLblName)]. + +var_init2(Size, Dst, Base, Offset, TrueLblName, SystemLimitLblName, FalseLblName) -> + Log2WordSize = hipe_rtl_arch:log2_word_size(), + WordSize = hipe_rtl_arch:word_size(), + [ContLbl,HeapLbl,REFCLbl,NextLbl] = create_lbls(4), + [USize,Tmp] = create_unsafe_regs(2), + [get_32_bit_value(Size, USize, SystemLimitLblName, FalseLblName), + hipe_rtl:mk_branch(USize, le, hipe_rtl:mk_imm(?MAX_BINSIZE), + hipe_rtl:label_name(ContLbl), + SystemLimitLblName), + ContLbl, + hipe_rtl:mk_move(Offset, hipe_rtl:mk_imm(0)), + hipe_rtl:mk_branch(USize, le, hipe_rtl:mk_imm(?MAX_HEAP_BIN_SIZE), + hipe_rtl:label_name(HeapLbl), + hipe_rtl:label_name(REFCLbl)), + HeapLbl, + hipe_rtl:mk_alu(Tmp, USize, add, hipe_rtl:mk_imm(3*WordSize-1)), + hipe_rtl:mk_alu(Tmp, Tmp, srl, hipe_rtl:mk_imm(Log2WordSize)), + hipe_rtl:mk_alu(Tmp, Tmp, add, hipe_rtl:mk_imm(?SUB_BIN_WORDSIZE)), + hipe_rtl:mk_gctest(Tmp), + hipe_tagscheme:create_heap_binary(Base, USize, Dst), + hipe_rtl:mk_goto(TrueLblName), + REFCLbl, + hipe_rtl:mk_gctest(?PROC_BIN_WORDSIZE+?SUB_BIN_WORDSIZE), + hipe_rtl:mk_call([Base], bs_allocate, [USize], + hipe_rtl:label_name(NextLbl), [], not_remote), + NextLbl, + hipe_tagscheme:create_refc_binary(Base, USize, Dst), + hipe_rtl:mk_goto(TrueLblName)]. + +var_init_bits(Size, Dst, Base, Offset, TrueLblName, SystemLimitLblName, FalseLblName) -> + [HeapLbl,REFCLbl,NextLbl,NoSubLbl,SubLbl,ContLbl, + NoCreateSubBin, CreateSubBin, JoinLbl, JoinLbl2] = create_lbls(10), + [USize,ByteSize,TotByteSize,OffsetBits] = create_regs(4), + [TmpDst] = create_unsafe_regs(1), + Log2WordSize = hipe_rtl_arch:log2_word_size(), + WordSize = hipe_rtl_arch:word_size(), + MaximumWords = + erlang:max((?MAX_HEAP_BIN_SIZE + 3*WordSize) bsr Log2WordSize, + ?PROC_BIN_WORDSIZE) + ?SUB_BIN_WORDSIZE, + Zero = hipe_rtl:mk_imm(0), + [hipe_rtl:mk_gctest(MaximumWords), + get_32_bit_value(Size, USize, SystemLimitLblName, FalseLblName), + hipe_rtl:mk_alu(ByteSize, USize, srl, ?BYTE_SHIFT), + hipe_rtl:mk_alub(OffsetBits, USize, 'and', ?LOW_BITS, eq, + hipe_rtl:label_name(NoSubLbl), + hipe_rtl:label_name(SubLbl)), + NoSubLbl, + hipe_rtl:mk_move(TotByteSize, ByteSize), + hipe_rtl:mk_goto(hipe_rtl:label_name(JoinLbl)), + SubLbl, + hipe_rtl:mk_alu(TotByteSize, ByteSize, 'add', hipe_rtl:mk_imm(1)), + JoinLbl, + hipe_rtl:mk_branch(USize, le, hipe_rtl:mk_imm(?MAX_BINSIZE), + hipe_rtl:label_name(ContLbl), + SystemLimitLblName), + ContLbl, + hipe_rtl:mk_branch(TotByteSize, 'le', hipe_rtl:mk_imm(?MAX_HEAP_BIN_SIZE), + hipe_rtl:label_name(HeapLbl), + hipe_rtl:label_name(REFCLbl)), + HeapLbl, + hipe_tagscheme:create_heap_binary(Base, TotByteSize, TmpDst), + hipe_rtl:mk_goto(hipe_rtl:label_name(JoinLbl2)), + REFCLbl, + hipe_rtl:mk_call([Base], bs_allocate, [TotByteSize], + hipe_rtl:label_name(NextLbl),[],not_remote), + NextLbl, + hipe_tagscheme:create_refc_binary(Base, TotByteSize, TmpDst), + JoinLbl2, + hipe_rtl:mk_move(Offset, Zero), + hipe_rtl:mk_branch(OffsetBits, 'eq', Zero, + hipe_rtl:label_name(NoCreateSubBin), + hipe_rtl:label_name(CreateSubBin)), + CreateSubBin, + hipe_tagscheme:mk_sub_binary(Dst, ByteSize, Zero, OffsetBits, Zero, TmpDst), + hipe_rtl:mk_goto(TrueLblName), + NoCreateSubBin, + hipe_rtl:mk_move(Dst, TmpDst), + hipe_rtl:mk_goto(TrueLblName)]. + +put_binary_all(NewOffset, Src, Base, Offset, TLName, FLName) -> + [SrcBase,SrcOffset,NumBits] = create_regs(3), + CCode = binary_c_code(NewOffset, Src, Base, Offset, NumBits, TLName), + AlignedCode = copy_aligned_bytes(SrcBase, SrcOffset, NumBits, Base, Offset, + NewOffset, TLName), + get_base_offset_size(Src, SrcBase, SrcOffset, NumBits,FLName) ++ + test_alignment(SrcOffset, NumBits, Offset, AlignedCode, CCode). + +test_alignment(SrcOffset, NumBits, Offset, AlignedCode, CCode) -> + [Tmp] = create_regs(1), + [AlignedLbl,CLbl] = create_lbls(2), + [hipe_rtl:mk_alu(Tmp, SrcOffset, 'or', NumBits), + hipe_rtl:mk_alu(Tmp, Tmp, 'or', Offset), + hipe_rtl:mk_alub(Tmp, Tmp, 'and', ?LOW_BITS, 'eq', + hipe_rtl:label_name(AlignedLbl), + hipe_rtl:label_name(CLbl)), + AlignedLbl, + AlignedCode, + CLbl, + CCode]. + +put_static_binary(NewOffset, Src, Size, Base, Offset, TLName, FLName) -> + [SrcBase] = create_unsafe_regs(1), + [SrcOffset, SrcSize] = create_regs(2), + case Size of + 0 -> + get_base_offset_size(Src, SrcBase, SrcOffset, SrcSize, FLName) ++ + [hipe_rtl:mk_move(NewOffset, Offset), + hipe_rtl:mk_goto(TLName)]; + _ -> + SizeImm = hipe_rtl:mk_imm(Size), + CCode = binary_c_code(NewOffset, Src, Base, Offset, SizeImm, TLName), + AlignedCode = copy_aligned_bytes(SrcBase, SrcOffset, SizeImm, Base, + Offset, NewOffset, TLName), + get_base_offset_size(Src, SrcBase, SrcOffset, SrcSize, FLName) ++ + small_check(SizeImm, SrcSize, FLName) ++ + test_alignment(SrcOffset, SizeImm, Offset, AlignedCode, CCode) + end. + +put_dynamic_binary(NewOffset, Src, SizeReg, Base, Offset, TLName, FLName) -> + [SrcBase] = create_unsafe_regs(1), + [SrcOffset, SrcSize] = create_regs(2), + CCode = binary_c_code(NewOffset, Src, Base, Offset, SizeReg, TLName), + AlignedCode = copy_aligned_bytes(SrcBase, SrcOffset, SizeReg, Base, Offset, + NewOffset, TLName), + get_base_offset_size(Src, SrcBase, SrcOffset, SrcSize, FLName) ++ + small_check(SizeReg, SrcSize, FLName) ++ + test_alignment(SrcOffset, SizeReg, Offset, AlignedCode, CCode). + +put_float(NewOffset, Src, Base, Offset, 64, CCode, Aligned, LittleEndian, + ConstInfo, TrueLblName) -> + [CLbl] = create_lbls(1), + case {Aligned, LittleEndian} of + {true, false} -> + copy_float_big(Base, Offset, NewOffset, Src, + hipe_rtl:label_name(CLbl), TrueLblName, ConstInfo) ++ + [CLbl|CCode]; + {true, true} -> + copy_float_little(Base, Offset, NewOffset, Src, + hipe_rtl:label_name(CLbl), TrueLblName, ConstInfo) ++ + [CLbl|CCode]; + {false, _} -> + CCode + end; +put_float(_NewOffset, _Src, _Base, _Offset, _Size, CCode, _Aligned, + _LittleEndian, _ConstInfo, _TrueLblName) -> + CCode. + +put_static_int(NewOffset, Src, Base, Offset, Size, CCode, Aligned, + LittleEndian, TrueLblName) -> + {Init, End, UntaggedSrc} = make_init_end(Src, CCode, TrueLblName), + case {Aligned, LittleEndian} of + {true, true} -> + Init ++ + copy_int_little(Base, Offset, NewOffset, Size, UntaggedSrc) ++ + End; + {true, false} -> + Init ++ + copy_int_big(Base, Offset, NewOffset, Size, UntaggedSrc) ++ + End; + {false, true} -> + CCode; + {false, false} -> + Init ++ + copy_offset_int_big(Base, Offset, NewOffset, Size, UntaggedSrc) ++ + End + end. + +put_unsafe_static_int(NewOffset, Src, Base, Offset, Size, CCode, Aligned, + LittleEndian, TrueLblName) -> + {Init, End, UntaggedSrc} = make_init_end(Src, TrueLblName), + case {Aligned, LittleEndian} of + {true, true} -> + Init ++ + copy_int_little(Base, Offset, NewOffset, Size, UntaggedSrc) ++ + End; + {true, false} -> + Init ++ + copy_int_big(Base, Offset, NewOffset, Size, UntaggedSrc) ++ + End; + {false, true} -> + CCode; + {false, false} -> + Init ++ + copy_offset_int_big(Base, Offset, NewOffset, Size, UntaggedSrc) ++ + End + end. + +put_dynamic_int(NewOffset, Src, Base, Offset, SizeReg, CCode, Aligned, + LittleEndian, TrueLblName) -> + {Init, End, UntaggedSrc} = make_init_end(Src, CCode, TrueLblName), + case Aligned of + true -> + case LittleEndian of + true -> + Init ++ + copy_int_little(Base, Offset, NewOffset, SizeReg, UntaggedSrc) ++ + End; + false -> + Init ++ + copy_int_big(Base, Offset, NewOffset, SizeReg, UntaggedSrc) ++ + End + end; + false -> + CCode + end. + +put_unsafe_dynamic_int(NewOffset, Src, Base, Offset, SizeReg, CCode, Aligned, + LittleEndian, TrueLblName) -> + {Init, End, UntaggedSrc} = make_init_end(Src, TrueLblName), + case Aligned of + true -> + case LittleEndian of + true -> + Init ++ + copy_int_little(Base, Offset, NewOffset, SizeReg, UntaggedSrc) ++ + End; + false -> + Init ++ + copy_int_big(Base, Offset, NewOffset, SizeReg, UntaggedSrc) ++ + End + end; + false -> + CCode + end. + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% Help functions used by the above +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +make_init_end(Src, CCode, TrueLblName) -> + [CLbl, SuccessLbl] = create_lbls(2), + [UntaggedSrc] = create_regs(1), + Init = [hipe_tagscheme:test_fixnum(Src, hipe_rtl:label_name(SuccessLbl), + hipe_rtl:label_name(CLbl), 0.99), + SuccessLbl, + hipe_tagscheme:untag_fixnum(UntaggedSrc,Src)], + End = [hipe_rtl:mk_goto(TrueLblName), CLbl| CCode], + {Init, End, UntaggedSrc}. + +make_init_end(Src, TrueLblName) -> + [UntaggedSrc] = create_regs(1), + Init = [hipe_tagscheme:untag_fixnum(UntaggedSrc,Src)], + End = [hipe_rtl:mk_goto(TrueLblName)], + {Init, End, UntaggedSrc}. + +get_base_offset_size(Binary, SrcBase, SrcOffset, SrcSize, FLName) -> + [JoinLbl, EndLbl, SuccessLbl, SubLbl, OtherLbl, HeapLbl, REFCLbl] = + Lbls = create_lbls(7), + [JoinLblName, EndLblName, SuccessLblName, SubLblName, + OtherLblName, HeapLblName, REFCLblName] = get_label_names(Lbls), + [BitSize,BitOffset] = create_regs(2), + [Orig] = create_vars(1), + [hipe_tagscheme:test_bitstr(Binary, SuccessLblName, FLName, 0.99), + SuccessLbl, + get_field_from_term({sub_binary,binsize}, Binary, SrcSize), + hipe_rtl:mk_alu(SrcSize, SrcSize, sll, ?BYTE_SHIFT), + hipe_tagscheme:test_subbinary(Binary, SubLblName, OtherLblName), + SubLbl, + get_field_from_term({sub_binary,bitsize}, Binary, BitSize), + get_field_from_term({sub_binary,offset}, Binary, SrcOffset), + hipe_rtl:mk_alu(SrcSize, SrcSize, add, BitSize), + get_field_from_term({sub_binary,bitoffset}, Binary, BitOffset), + hipe_rtl:mk_alu(SrcOffset, SrcOffset, sll, ?BYTE_SHIFT), + hipe_rtl:mk_alu(SrcOffset, SrcOffset, add, BitOffset), + get_field_from_term({sub_binary,orig}, Binary, Orig), + hipe_rtl:mk_goto(JoinLblName), + OtherLbl, + hipe_rtl:mk_move(SrcOffset, hipe_rtl:mk_imm(0)), + hipe_rtl:mk_move(Orig, Binary), + JoinLbl, + hipe_tagscheme:test_heap_binary(Orig, HeapLblName, REFCLblName), + HeapLbl, + hipe_rtl:mk_alu(SrcBase, Orig, add, hipe_rtl:mk_imm(?HEAP_BIN_DATA-2)), + hipe_rtl:mk_goto(EndLblName), + REFCLbl, + get_field_from_term({proc_bin,bytes}, Orig, SrcBase), + EndLbl]. + +copy_aligned_bytes(CopyBase, CopyOffset, Size, Base, Offset, NewOffset, TrueLblName) -> + [BaseDst, BaseSrc] = create_unsafe_regs(2), + [Iter, Extra, BothOffset] = create_regs(3), + initializations(BaseSrc, BaseDst, BothOffset, CopyOffset, Offset, CopyBase, Base) ++ + [hipe_rtl:mk_alu(Extra, Size, 'and', ?LOW_BITS), + hipe_rtl:mk_alu(Iter, Size, srl, ?BYTE_SHIFT), + hipe_rtl:mk_alu(NewOffset, Offset, 'add', Size)] ++ + easy_loop(BaseSrc, BaseDst, BothOffset, Iter, Extra, TrueLblName). + +copy_string(StringBase, StringSize, BinBase, BinOffset, NewOffset, TrueLblName) -> + [TmpOffset,BothOffset,InitOffs] = create_regs(3), + [NewBinBase] = create_unsafe_regs(1), + [EasyLbl,HardLbl] = create_lbls(2), + [hipe_rtl:mk_alu(TmpOffset, BinOffset, srl, ?BYTE_SHIFT), + hipe_rtl:mk_alu(NewBinBase, BinBase, add, TmpOffset), + hipe_rtl:mk_move(BothOffset, hipe_rtl:mk_imm(0)), + hipe_rtl:mk_alub(InitOffs, BinOffset, 'and', ?LOW_BITS, eq, + hipe_rtl:label_name(EasyLbl), hipe_rtl:label_name(HardLbl)), + EasyLbl, + hipe_rtl:mk_alu(NewOffset, BinOffset, add, + hipe_rtl:mk_imm(?bytes_to_bits(StringSize)))] ++ + easy_loop(StringBase, NewBinBase, BothOffset, + hipe_rtl:mk_imm(StringSize), hipe_rtl:mk_imm(0), TrueLblName) ++ + [HardLbl, + hipe_rtl:mk_alu(NewOffset, BinOffset, add, + hipe_rtl:mk_imm(?bytes_to_bits(StringSize)))] ++ + hard_loop(StringBase, NewBinBase, BothOffset, hipe_rtl:mk_imm(StringSize), + InitOffs, TrueLblName). + +small_check(SizeVar, CopySize, FalseLblName) -> + SuccessLbl = hipe_rtl:mk_new_label(), + [hipe_rtl:mk_branch(SizeVar, le, CopySize, + hipe_rtl:label_name(SuccessLbl), FalseLblName), + SuccessLbl]. + +easy_loop(BaseSrc, BaseDst, BothOffset, Iterations, Extra, TrueLblName) -> + [Tmp1,Shift] = create_regs(2), + [LoopLbl,TopLbl,EndLbl,ExtraLbl] = create_lbls(4), + [TopLbl, + hipe_rtl:mk_branch(BothOffset, ne, Iterations, hipe_rtl:label_name(LoopLbl), + hipe_rtl:label_name(EndLbl), 0.99), + LoopLbl, + hipe_rtl:mk_load(Tmp1, BaseSrc, BothOffset, byte, unsigned), + hipe_rtl:mk_store(BaseDst, BothOffset, Tmp1, byte), + hipe_rtl:mk_alu(BothOffset, BothOffset, add, hipe_rtl:mk_imm(1)), + hipe_rtl:mk_goto(hipe_rtl:label_name(TopLbl)), + EndLbl, + hipe_rtl:mk_branch(Extra, eq, hipe_rtl:mk_imm(0), TrueLblName, + hipe_rtl:label_name(ExtraLbl)), + ExtraLbl, + hipe_rtl:mk_load(Tmp1, BaseSrc, BothOffset, byte, unsigned), + hipe_rtl:mk_alu(Shift, hipe_rtl:mk_imm(?BYTE_SIZE), sub, Extra), + hipe_rtl:mk_alu(Tmp1, Tmp1, srl, Shift), + hipe_rtl:mk_alu(Tmp1, Tmp1, sll, Shift), + hipe_rtl:mk_store(BaseDst, BothOffset, Tmp1, byte), + hipe_rtl:mk_goto(TrueLblName)]. + +hard_loop(BaseSrc, BaseDst, BothOffset, Iterations, + InitOffset, TrueLblName) -> + [Tmp1, Tmp2, OldByte, NewByte, SaveByte] = create_regs(5), + [LoopLbl,EndLbl,TopLbl] = create_lbls(3), + [hipe_rtl:mk_load(OldByte, BaseDst, BothOffset, byte, unsigned), + hipe_rtl:mk_alu(Tmp1, hipe_rtl:mk_imm(?BYTE_SIZE), sub, InitOffset), + TopLbl, + hipe_rtl:mk_branch(BothOffset, ne, Iterations, + hipe_rtl:label_name(LoopLbl), + hipe_rtl:label_name(EndLbl)), + LoopLbl, + hipe_rtl:mk_load(NewByte, BaseSrc, BothOffset, byte, unsigned), + hipe_rtl:mk_alu(Tmp2, NewByte, srl, InitOffset), + hipe_rtl:mk_alu(SaveByte, OldByte, 'or', Tmp2), + hipe_rtl:mk_store(BaseDst, BothOffset, SaveByte, byte), + hipe_rtl:mk_alu(OldByte, NewByte, sll, Tmp1), + hipe_rtl:mk_alu(BothOffset, BothOffset, 'add', hipe_rtl:mk_imm(1)), + hipe_rtl:mk_goto(hipe_rtl:label_name(TopLbl)), + EndLbl, + hipe_rtl:mk_store(BaseDst, BothOffset, OldByte, byte), + hipe_rtl:mk_goto(TrueLblName)]. + +initializations(BaseTmp1, BaseTmp2, BothOffset, CopyOffset, Offset, CopyBase, Base) -> + [OffsetTmp1,OffsetTmp2] = create_regs(2), + [hipe_rtl:mk_alu(OffsetTmp1, CopyOffset, srl, ?BYTE_SHIFT), + hipe_rtl:mk_alu(OffsetTmp2, Offset, srl, ?BYTE_SHIFT), + hipe_rtl:mk_alu(BaseTmp1, CopyBase, add, OffsetTmp1), + hipe_rtl:mk_alu(BaseTmp2, Base, add, OffsetTmp2), + hipe_rtl:mk_move(BothOffset, hipe_rtl:mk_imm(0))]. + +copy_int_little(Base, Offset, NewOffset, Size, Tmp1) when is_integer(Size) -> + [Tmp2,TmpOffset] = create_regs(2), + ByteSize = Size div ?BYTE_SIZE, + [hipe_rtl:mk_alu(TmpOffset, Offset, srl, ?BYTE_SHIFT), + hipe_rtl:mk_alu(Tmp2, hipe_rtl:mk_imm(ByteSize), 'add', TmpOffset)] ++ + + little_loop(Tmp1, Tmp2, TmpOffset, Base) ++ + + case Size band 7 of + 0 -> + [hipe_rtl:mk_alu(NewOffset, Offset, 'add', hipe_rtl:mk_imm(Size))]; + Bits -> + [hipe_rtl:mk_alu(Tmp1, Tmp1, sll, hipe_rtl:mk_imm(?BYTE_SIZE-Bits)), + hipe_rtl:mk_store(Base, TmpOffset, Tmp1, byte), + hipe_rtl:mk_alu(NewOffset, Offset, 'add', hipe_rtl:mk_imm(Size))] + end; + +copy_int_little(Base, Offset, NewOffset, Size, Tmp1) -> + [Tmp2, Tmp3, Tmp4, TmpOffset] = create_regs(4), + + [hipe_rtl:mk_alu(Tmp2, Size, srl, ?BYTE_SHIFT), + hipe_rtl:mk_alu(TmpOffset, Offset, srl, ?BYTE_SHIFT), + hipe_rtl:mk_alu(Tmp3, Tmp2, 'add', TmpOffset)] ++ + + little_loop(Tmp1, Tmp3, TmpOffset, Base) ++ + + [hipe_rtl:mk_alu(Tmp4, Size, 'and', ?LOW_BITS), + hipe_rtl:mk_alu(Tmp4, hipe_rtl:mk_imm(?BYTE_SIZE), 'sub', Tmp4), + hipe_rtl:mk_alu(Tmp1, Tmp1, sll, Tmp4), + hipe_rtl:mk_store(Base, TmpOffset, Tmp1, byte), + hipe_rtl:mk_alu(NewOffset, Offset, 'add', Size)]. + +little_loop(Tmp1, Tmp3, TmpOffset, Base) -> + [BranchLbl, BodyLbl, EndLbl] = create_lbls(3), + [BranchLbl, + hipe_rtl:mk_branch(TmpOffset, 'ne', Tmp3, + hipe_rtl:label_name(BodyLbl), + hipe_rtl:label_name(EndLbl)), + BodyLbl, + hipe_rtl:mk_store(Base, TmpOffset, Tmp1, byte), + hipe_rtl:mk_alu(Tmp1, Tmp1, 'sra', hipe_rtl:mk_imm(?BYTE_SIZE)), + hipe_rtl:mk_alu(TmpOffset, TmpOffset, 'add', hipe_rtl:mk_imm(1)), + hipe_rtl:mk_goto(hipe_rtl:label_name(BranchLbl)), + EndLbl]. + +big_loop(Tmp1, Tmp3, TmpOffset, Base) -> + [BranchLbl, BodyLbl, EndLbl] = create_lbls(3), + [BranchLbl, + hipe_rtl:mk_branch(TmpOffset, 'ne', Tmp3, + hipe_rtl:label_name(BodyLbl), + hipe_rtl:label_name(EndLbl)), + BodyLbl, + hipe_rtl:mk_alu(TmpOffset, TmpOffset, 'sub', hipe_rtl:mk_imm(1)), + hipe_rtl:mk_store(Base, TmpOffset, Tmp1, byte), + hipe_rtl:mk_alu(Tmp1, Tmp1, 'sra', hipe_rtl:mk_imm(?BYTE_SIZE)), + hipe_rtl:mk_goto(hipe_rtl:label_name(BranchLbl)), + EndLbl]. + +copy_int_big(_Base, Offset, NewOffset, 0, _Tmp1) -> + [hipe_rtl:mk_move(NewOffset, Offset)]; +copy_int_big(Base, Offset, NewOffset, ?BYTE_SIZE, Tmp1) -> + TmpOffset = hipe_rtl:mk_new_reg(), + [hipe_rtl:mk_alu(TmpOffset, Offset, 'srl', hipe_rtl:mk_imm(3)), + hipe_rtl:mk_store(Base, TmpOffset, Tmp1, byte), + hipe_rtl:mk_alu(NewOffset, Offset, 'add', hipe_rtl:mk_imm(8))]; +copy_int_big(Base, Offset, NewOffset, 2*?BYTE_SIZE, Tmp1) -> + TmpOffset = hipe_rtl:mk_new_reg(), + [hipe_rtl:mk_alu(TmpOffset, Offset, 'srl', hipe_rtl:mk_imm(3)), + hipe_rtl:mk_alu(TmpOffset, TmpOffset, 'add', hipe_rtl:mk_imm(1)), + hipe_rtl:mk_store(Base, TmpOffset, Tmp1, byte), + hipe_rtl:mk_alu(TmpOffset, TmpOffset, sub, hipe_rtl:mk_imm(1)), + hipe_rtl:mk_alu(Tmp1, Tmp1, 'sra', hipe_rtl:mk_imm(8)), + hipe_rtl:mk_store(Base, TmpOffset, Tmp1, byte), + hipe_rtl:mk_alu(NewOffset, Offset, 'add', hipe_rtl:mk_imm(16))]; +copy_int_big(Base, Offset, NewOffset, 3*?BYTE_SIZE, Tmp1) -> + TmpOffset = hipe_rtl:mk_new_reg(), + [hipe_rtl:mk_alu(TmpOffset, Offset, srl, hipe_rtl:mk_imm(3)), + hipe_rtl:mk_alu(TmpOffset, TmpOffset, add, hipe_rtl:mk_imm(2)), + hipe_rtl:mk_store(Base, TmpOffset, Tmp1, byte), + hipe_rtl:mk_alu(TmpOffset, TmpOffset, sub, hipe_rtl:mk_imm(1)), + hipe_rtl:mk_alu(Tmp1, Tmp1, sra, hipe_rtl:mk_imm(8)), + hipe_rtl:mk_store(Base, TmpOffset, Tmp1, byte), + hipe_rtl:mk_alu(TmpOffset, TmpOffset, sub, hipe_rtl:mk_imm(1)), + hipe_rtl:mk_alu(Tmp1, Tmp1, sra, hipe_rtl:mk_imm(8)), + hipe_rtl:mk_store(Base, TmpOffset, Tmp1, byte), + hipe_rtl:mk_alu(NewOffset, Offset, add, hipe_rtl:mk_imm(24))]; +copy_int_big(Base, Offset,NewOffset, 4*?BYTE_SIZE, Tmp1) -> + copy_big_word(Base, Offset, NewOffset, Tmp1); +copy_int_big(Base, Offset, NewOffset, Size, Tmp1) when is_integer(Size) -> + [OldOffset, TmpOffset, Bits] = create_regs(3), + ByteSize = (Size + 7) div ?BYTE_SIZE, + case Size band 7 of + 0 -> + [hipe_rtl:mk_alu(OldOffset, Offset, sra, hipe_rtl:mk_imm(3)), + hipe_rtl:mk_alu(TmpOffset, OldOffset, add, hipe_rtl:mk_imm(ByteSize))]; + Rest -> + [hipe_rtl:mk_alu(OldOffset, Offset, sra, hipe_rtl:mk_imm(3)), + hipe_rtl:mk_alu(TmpOffset, OldOffset, add, hipe_rtl:mk_imm(ByteSize-1)), + hipe_rtl:mk_alu(Bits, Tmp1, sll, hipe_rtl:mk_imm(?BYTE_SIZE-Rest)), + hipe_rtl:mk_store(Base, TmpOffset, Bits, byte), + hipe_rtl:mk_alu(Tmp1, Tmp1, sra, hipe_rtl:mk_imm(Rest))] + end ++ + big_loop(Tmp1, OldOffset, TmpOffset, Base) ++ + [hipe_rtl:mk_alu(NewOffset, Offset, 'add', hipe_rtl:mk_imm(Size))]; +copy_int_big(Base, Offset, NewOffset, Size, Tmp1) -> + Tmp2 = hipe_rtl:mk_new_reg(), + Tmp3 = hipe_rtl:mk_new_reg(), + Tmp4 = hipe_rtl:mk_new_reg(), + Tmp5 = hipe_rtl:mk_new_reg(), + Tmp6 = hipe_rtl:mk_new_reg(), + TmpOffset = hipe_rtl:mk_new_reg(), + EvenLbl = hipe_rtl:mk_new_label(), + OddLbl = hipe_rtl:mk_new_label(), + [hipe_rtl:mk_alu(Tmp2, Size, 'srl', hipe_rtl:mk_imm(3)), + hipe_rtl:mk_alu(Tmp3, Offset, 'srl', hipe_rtl:mk_imm(3)), + hipe_rtl:mk_alu(TmpOffset, Tmp2, 'add', Tmp3), + hipe_rtl:mk_alub(Tmp4, Size, 'and', hipe_rtl:mk_imm(7), 'eq', + hipe_rtl:label_name(EvenLbl), hipe_rtl:label_name(OddLbl)), + OddLbl, + hipe_rtl:mk_alu(Tmp6, hipe_rtl:mk_imm(8), 'sub', Tmp4), + hipe_rtl:mk_alu(Tmp5, Tmp1, 'sll', Tmp6), + hipe_rtl:mk_store(Base, TmpOffset, Tmp5, byte), + EvenLbl, + hipe_rtl:mk_alu(Tmp1, Tmp1, srl, Tmp4)] ++ + + big_loop(Tmp1, Tmp3, TmpOffset, Base) ++ + + [hipe_rtl:mk_alu(NewOffset, Offset, 'add', Size)]. + +copy_big_word(Base, Offset, NewOffset, Word) -> + TmpOffset = hipe_rtl:mk_new_reg(), + [hipe_rtl:mk_alu(TmpOffset, Offset, 'srl', hipe_rtl:mk_imm(3)), + hipe_rtl:mk_alu(TmpOffset, TmpOffset, 'add', hipe_rtl:mk_imm(3)), + hipe_rtl:mk_store(Base, TmpOffset, Word, byte), + hipe_rtl:mk_alu(TmpOffset, TmpOffset, 'sub', hipe_rtl:mk_imm(1)), + hipe_rtl:mk_alu(Word, Word, 'sra', hipe_rtl:mk_imm(8)), + hipe_rtl:mk_store(Base, TmpOffset, Word, byte), + hipe_rtl:mk_alu(TmpOffset, TmpOffset, 'sub', hipe_rtl:mk_imm(1)), + hipe_rtl:mk_alu(Word, Word, 'sra', hipe_rtl:mk_imm(8)), + hipe_rtl:mk_store(Base, TmpOffset, Word, byte), + hipe_rtl:mk_alu(TmpOffset, TmpOffset, 'sub', hipe_rtl:mk_imm(1)), + hipe_rtl:mk_alu(Word, Word, 'sra', hipe_rtl:mk_imm(8)), + hipe_rtl:mk_store(Base, TmpOffset, Word, byte), + hipe_rtl:mk_alu(NewOffset, Offset, 'add', hipe_rtl:mk_imm(32))]. + +copy_little_word(Base, Offset, NewOffset, Word) -> + TmpOffset = hipe_rtl:mk_new_reg(), + [hipe_rtl:mk_alu(TmpOffset, Offset, 'srl', ?BYTE_SHIFT), + hipe_rtl:mk_store(Base, TmpOffset, Word, byte), + hipe_rtl:mk_alu(TmpOffset, TmpOffset, 'add', hipe_rtl:mk_imm(1)), + hipe_rtl:mk_alu(Word, Word, 'sra', hipe_rtl:mk_imm(?BYTE_SIZE)), + hipe_rtl:mk_store(Base, TmpOffset, Word, byte), + hipe_rtl:mk_alu(TmpOffset, TmpOffset, 'add', hipe_rtl:mk_imm(1)), + hipe_rtl:mk_alu(Word, Word, 'sra', hipe_rtl:mk_imm(?BYTE_SIZE)), + hipe_rtl:mk_store(Base, TmpOffset, Word, byte), + hipe_rtl:mk_alu(TmpOffset, TmpOffset, 'add', hipe_rtl:mk_imm(1)), + hipe_rtl:mk_alu(Word, Word, 'sra', hipe_rtl:mk_imm(?BYTE_SIZE)), + hipe_rtl:mk_store(Base, TmpOffset, Word, byte), + hipe_rtl:mk_alu(NewOffset, Offset, 'add', hipe_rtl:mk_imm(32))]. + +copy_offset_int_big(Base, Offset, NewOffset, Size, Tmp1) when is_integer(Size) -> + Tmp2 = hipe_rtl:mk_new_reg(), + Tmp3 = hipe_rtl:mk_new_reg(), + Tmp4 = hipe_rtl:mk_new_reg(), + Tmp5 = hipe_rtl:mk_new_reg(), + Tmp6 = hipe_rtl:mk_new_reg(), + Tmp7 = hipe_rtl:mk_new_reg(), + Tmp8 = hipe_rtl:mk_new_reg(), + Tmp9 = hipe_rtl:mk_new_reg(), + OldByte = hipe_rtl:mk_new_reg(), + TmpOffset = hipe_rtl:mk_new_reg(), + BranchLbl = hipe_rtl:mk_new_label(), + BodyLbl = hipe_rtl:mk_new_label(), + EndLbl = hipe_rtl:mk_new_label(), + NextLbl = hipe_rtl:mk_new_label(), + WordSize = hipe_rtl_arch:word_size(), + [hipe_rtl:mk_alu(Tmp2, Offset, 'and', ?LOW_BITS), + hipe_rtl:mk_alu(Tmp3, Offset, srl, ?BYTE_SHIFT), + hipe_rtl:mk_alu(NewOffset, Offset, 'add', hipe_rtl:mk_imm(Size)), + hipe_rtl:mk_alu(Tmp9, NewOffset, 'sub', hipe_rtl:mk_imm(1)), + hipe_rtl:mk_alu(TmpOffset, Tmp9, srl, ?BYTE_SHIFT), + hipe_rtl:mk_alu(Tmp4, NewOffset, 'and', ?LOW_BITS), + hipe_rtl:mk_alu(Tmp6, hipe_rtl:mk_imm(?BYTE_SIZE), 'sub', Tmp4), + hipe_rtl:mk_alu(Tmp6, Tmp6, 'and', ?LOW_BITS), + hipe_rtl:mk_alu(Tmp4, hipe_rtl:mk_imm(?BYTE_SIZE), 'sub', Tmp6), + hipe_rtl:mk_move(Tmp5, Tmp1), + hipe_rtl:mk_alu(Tmp1, Tmp1, 'sll', Tmp6), + hipe_rtl:mk_branch(TmpOffset, 'ne', Tmp3, hipe_rtl:label_name(NextLbl), + hipe_rtl:label_name(EndLbl)), + NextLbl, + hipe_rtl:mk_store(Base, TmpOffset, Tmp1, byte), + hipe_rtl:mk_move(Tmp1, Tmp5), + hipe_rtl:mk_alu(Tmp1, Tmp1, 'sra', Tmp4), + hipe_rtl:mk_alu(TmpOffset, TmpOffset, 'sub', hipe_rtl:mk_imm(1)), + BranchLbl, + hipe_rtl:mk_branch(TmpOffset, 'ne', Tmp3, hipe_rtl:label_name(BodyLbl), + hipe_rtl:label_name(EndLbl)), + BodyLbl, + hipe_rtl:mk_store(Base, TmpOffset, Tmp1, byte), + hipe_rtl:mk_alu(Tmp1, Tmp1, 'sra', hipe_rtl:mk_imm(?BYTE_SIZE)), + hipe_rtl:mk_alu(TmpOffset, TmpOffset, 'sub', hipe_rtl:mk_imm(1)), + hipe_rtl:mk_goto(hipe_rtl:label_name(BranchLbl)), + EndLbl, + hipe_rtl:mk_load(OldByte, Base, TmpOffset, byte, unsigned), + hipe_rtl:mk_alu(Tmp8, hipe_rtl:mk_imm(?BYTE_SIZE), 'sub', Tmp2), + hipe_rtl:mk_alu(OldByte, OldByte, 'srl', Tmp8), + hipe_rtl:mk_alu(OldByte, OldByte, 'sll', Tmp8), + hipe_rtl:mk_alu(Tmp7, Tmp2, 'add', + hipe_rtl:mk_imm(?bytes_to_bits(WordSize-1))), + hipe_rtl:mk_alu(Tmp1, Tmp1, 'sll', Tmp7), + hipe_rtl:mk_alu(Tmp1, Tmp1, 'srl', Tmp7), + hipe_rtl:mk_alu(Tmp1, Tmp1, 'or', OldByte), + hipe_rtl:mk_store(Base, TmpOffset, Tmp1, byte)]. + +copy_float_little(_Base, _Offset, _NewOffset, _Src, FalseLblName, _TrueLblName, fail) -> + [hipe_rtl:mk_goto(FalseLblName)]; +copy_float_little(Base, Offset, NewOffset, Src, _FalseLblName, TrueLblName, pass) -> + FloatLo = hipe_rtl:mk_new_reg(), + FloatHi = hipe_rtl:mk_new_reg(), + TmpOffset = hipe_rtl:mk_new_reg(), + hipe_tagscheme:unsafe_load_float(FloatLo, FloatHi, Src) ++ + copy_little_word(Base, Offset, TmpOffset, FloatLo) ++ + copy_little_word(Base, TmpOffset, NewOffset, FloatHi) ++ + [hipe_rtl:mk_goto(TrueLblName)]; +copy_float_little(Base, Offset, NewOffset, Src, FalseLblName, TrueLblName, var) -> + SuccessLbl = hipe_rtl:mk_new_label(), + hipe_tagscheme:test_flonum(Src, hipe_rtl:label_name(SuccessLbl), FalseLblName, 0.99) ++ + [SuccessLbl|copy_float_little(Base, Offset, NewOffset, Src, FalseLblName, TrueLblName, pass)]. + +copy_float_big(_Base, _Offset, _NewOffset, _Src, FalseLblName, _TrueLblName, fail) -> + [hipe_rtl:mk_goto(FalseLblName)]; +copy_float_big(Base, Offset, NewOffset, Src, _FalseLblName, TrueLblName,pass) -> + FloatLo = hipe_rtl:mk_new_reg(), + FloatHi = hipe_rtl:mk_new_reg(), + TmpOffset =hipe_rtl:mk_new_reg(), + hipe_tagscheme:unsafe_load_float(FloatLo, FloatHi, Src) ++ + copy_big_word(Base, Offset, TmpOffset, FloatHi) ++ + copy_big_word(Base, TmpOffset, NewOffset, FloatLo) ++ + [hipe_rtl:mk_goto(TrueLblName)]; +copy_float_big(Base, Offset, NewOffset, Src, FalseLblName, TrueLblName, var) -> + SuccessLbl = hipe_rtl:mk_new_label(), + hipe_tagscheme:test_flonum(Src, hipe_rtl:label_name(SuccessLbl), FalseLblName, 0.99) ++ + [SuccessLbl|copy_float_big(Base, Offset, NewOffset, Src, FalseLblName, TrueLblName, pass)]. + +make_size(1, BitsVar, FalseLblName) -> + [DstReg] = create_regs(1), + {first_part(BitsVar, DstReg, FalseLblName), DstReg}; +make_size(?BYTE_SIZE, BitsVar, FalseLblName) -> + [DstReg] = create_regs(1), + Code = + first_part(BitsVar, DstReg, FalseLblName) ++ + [hipe_rtl:mk_alu(DstReg, DstReg, 'sll', ?BYTE_SHIFT)], + {Code, DstReg}; +make_size(UnitImm, BitsVar, FalseLblName) -> + [DstReg] = create_regs(1), + UnitList = number2list(UnitImm), + Code = multiply_code(UnitList, BitsVar, DstReg, FalseLblName), + {Code, DstReg}. + +multiply_code(List=[Head|_Tail], Variable, Result, FalseLblName) -> + Test = set_high(Head), + Tmp1 = hipe_rtl:mk_new_reg(), + SuccessLbl = hipe_rtl:mk_new_label(), + Register = hipe_rtl:mk_new_reg(), + Code = [hipe_rtl:mk_move(Result, hipe_rtl:mk_imm(0))| + first_part(Variable, Register, FalseLblName)] + ++ + [hipe_rtl:mk_alub(Tmp1, Register, 'and', hipe_rtl:mk_imm(Test), + 'eq', hipe_rtl:label_name(SuccessLbl), + FalseLblName, 0.99), + SuccessLbl], + multiply_code(List, Register, Result, FalseLblName, Tmp1, Code). + +multiply_code([ShiftSize|Rest], Register, Result, FalseLblName, Tmp1, OldCode) -> + SuccessLbl = hipe_rtl:mk_new_label(), + Code = OldCode ++ [hipe_rtl:mk_alu(Tmp1, Register, 'sll', + hipe_rtl:mk_imm(ShiftSize)), + hipe_rtl:mk_alub(Result, Tmp1, 'add', Result, not_overflow, hipe_rtl:label_name(SuccessLbl), FalseLblName, 0.99), + SuccessLbl], + multiply_code(Rest, Register, Result, FalseLblName, Tmp1, Code); +multiply_code([], _Register, _Result, _FalseLblName, _Tmp1, Code) -> + Code. + +number2list(X) when is_integer(X), X >= 0 -> + number2list(X, []). + +number2list(1, Acc) -> + lists:reverse([0|Acc]); +number2list(0, Acc) -> + lists:reverse(Acc); +number2list(X, Acc) -> + F = floorlog2(X), + number2list(X-(1 bsl F), [F|Acc]). + +floorlog2(X) -> + round(math:log(X)/math:log(2)-0.5). + +set_high(X) -> + set_high(X, 0). + +set_high(0, Y) -> + Y; +set_high(X, Y) -> + set_high(X-1, Y+(1 bsl (27-X))). + +get_32_bit_value(Size, USize, SystemLimitLblName, NegLblName) -> + Lbls = [FixLbl, BigLbl, OkLbl, PosBigLbl] = create_lbls(4), + [FixLblName, BigLblName, OkLblName, PosBigLblName] = [hipe_rtl:label_name(Lbl) || Lbl <- Lbls], + [hipe_tagscheme:test_fixnum(Size, FixLblName, BigLblName, 0.99), + FixLbl, + hipe_tagscheme:untag_fixnum(USize, Size), + hipe_rtl:mk_branch(USize, ge, hipe_rtl:mk_imm(0), OkLblName, NegLblName), + BigLbl, + hipe_tagscheme:test_pos_bignum(Size, PosBigLblName, NegLblName, 0.99), + PosBigLbl, + hipe_tagscheme:get_one_word_pos_bignum(USize, Size, SystemLimitLblName), + OkLbl]. + + +first_part(Var, Register, FalseLblName) -> + [SuccessLbl1, SuccessLbl2] = create_lbls(2), + [hipe_tagscheme:test_fixnum(Var, hipe_rtl:label_name(SuccessLbl1), + FalseLblName, 0.99), + SuccessLbl1, + hipe_tagscheme:fixnum_ge(Var, hipe_rtl:mk_imm(hipe_tagscheme:mk_fixnum(0)), + hipe_rtl:label_name(SuccessLbl2), FalseLblName, 0.99), + SuccessLbl2, + hipe_tagscheme:untag_fixnum(Register, Var)]. + + diff --git a/lib/hipe/rtl/hipe_rtl_binary_match.erl b/lib/hipe/rtl/hipe_rtl_binary_match.erl new file mode 100644 index 0000000000..d147bed6d8 --- /dev/null +++ b/lib/hipe/rtl/hipe_rtl_binary_match.erl @@ -0,0 +1,1134 @@ +%%% -*- erlang-indent-level: 2 -*- +%%% +%%% %CopyrightBegin% +%%% +%%% Copyright Ericsson AB 2007-2009. All Rights Reserved. +%%% +%%% The contents of this file are subject to the Erlang Public License, +%%% Version 1.1, (the "License"); you may not use this file except in +%%% compliance with the License. You should have received a copy of the +%%% Erlang Public License along with this software. If not, it can be +%%% retrieved online at http://www.erlang.org/. +%%% +%%% Software distributed under the License is distributed on an "AS IS" +%%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%%% the License for the specific language governing rights and limitations +%%% under the License. +%%% +%%% %CopyrightEnd% +%%% +%%%------------------------------------------------------------------- +%%% File : hipe_rtl_binary_match.erl +%%% Author : Per Gustafsson +%%% Description : +%%% +%%% Created : 5 Mar 2007 by Per Gustafsson +%%%------------------------------------------------------------------- +-module(hipe_rtl_binary_match). + +-export([gen_rtl/5]). + +-import(hipe_tagscheme, [set_field_from_term/3, get_field_from_term/3]). + +-include("hipe_literals.hrl"). + +%%-------------------------------------------------------------------- + +-define(MAX_BINSIZE, trunc(?MAX_HEAP_BIN_SIZE / hipe_rtl_arch:word_size()) + 2). +-define(BYTE_SHIFT, 3). %% Turn bits into bytes or vice versa +-define(LOW_BITS, 7). %% Three lowest bits set +-define(BYTE_SIZE, 8). +-define(MAX_SMALL_BITS, (hipe_rtl_arch:word_size() * ?BYTE_SIZE - 5)). + +%%-------------------------------------------------------------------- + +gen_rtl({bs_start_match, 0}, [Ms], [Binary], TrueLblName, FalseLblName) -> + ReInitLbl = hipe_rtl:mk_new_label(), + BinaryLbl = hipe_rtl:mk_new_label(), + TestCode = + [hipe_rtl:mk_move(Ms,Binary), + hipe_tagscheme:test_matchstate(Binary, + hipe_rtl:label_name(ReInitLbl), + hipe_rtl:label_name(BinaryLbl), + 0.99)], + ReInitCode = reinit_matchstate(Ms, TrueLblName), + OrdinaryCode = make_matchstate(Binary, 0, Ms, TrueLblName, FalseLblName), + [TestCode,[ReInitLbl|ReInitCode],[BinaryLbl|OrdinaryCode]]; +gen_rtl({bs_start_match, Max}, [Ms], [Binary], TrueLblName, FalseLblName) -> + MatchStateLbl = hipe_rtl:mk_new_label(), + BinaryLbl = hipe_rtl:mk_new_label(), + ReSizeLbl = hipe_rtl:mk_new_label(), + ReInitLbl = hipe_rtl:mk_new_label(), + TestCode = + [hipe_rtl:mk_move(Ms,Binary), + hipe_tagscheme:test_matchstate(Binary, + hipe_rtl:label_name(MatchStateLbl), + hipe_rtl:label_name(BinaryLbl), + 0.99)], + MatchStateTestCode = + [hipe_tagscheme:compare_matchstate(Max, Ms, + hipe_rtl:label_name(ReInitLbl), + hipe_rtl:label_name(ReSizeLbl))], + ReSizeCode = resize_matchstate(Ms, Max, TrueLblName), + ReInitCode = reinit_matchstate(Ms, TrueLblName), + OrdinaryCode = make_matchstate(Binary, Max, Ms, TrueLblName, FalseLblName), + [TestCode, [MatchStateLbl|MatchStateTestCode], [ReSizeLbl|ReSizeCode], + [ReInitLbl|ReInitCode], [BinaryLbl|OrdinaryCode]]; +gen_rtl({bs_start_match, _Max}, [], [Binary], TrueLblName, FalseLblName) -> + MatchStateLbl = hipe_rtl:mk_new_label(), + [hipe_tagscheme:test_bitstr(Binary, TrueLblName, + hipe_rtl:label_name(MatchStateLbl), 0.99), + MatchStateLbl, + hipe_tagscheme:test_matchstate(Binary, TrueLblName, FalseLblName, 0.99)]; +gen_rtl({{bs_start_match, bitstr}, Max}, [Ms], [Binary], + TrueLblName, FalseLblName) -> + make_matchstate(Binary, Max, Ms, TrueLblName, FalseLblName); +gen_rtl({{bs_start_match, bitstr}, _Max}, [], [_Binary], + TrueLblName, _FalseLblName) -> + [hipe_rtl:mk_goto(TrueLblName)]; +gen_rtl({{bs_start_match,ok_matchstate}, Max}, [Ms], [Binary], + TrueLblName, FalseLblName) -> + MatchStateLbl = hipe_rtl:mk_new_label(), + BinaryLbl = hipe_rtl:mk_new_label(), + TestCode = + [hipe_rtl:mk_move(Ms,Binary), + hipe_tagscheme:test_matchstate(Binary, + hipe_rtl:label_name(MatchStateLbl), + hipe_rtl:label_name(BinaryLbl), + 0.99)], + MatchStateCode = reinit_matchstate(Ms, TrueLblName), + OrdinaryCode = make_matchstate(Binary, Max, Ms, TrueLblName, FalseLblName), + TestCode ++ [MatchStateLbl|MatchStateCode] ++ [BinaryLbl|OrdinaryCode]; +gen_rtl({{bs_start_match, ok_matchstate}, _Max}, [], [Binary], + TrueLblName, FalseLblName) -> + MatchStateLbl = hipe_rtl:mk_new_label(), + [hipe_tagscheme:test_bitstr(Binary, TrueLblName, + hipe_rtl:label_name(MatchStateLbl), 0.99), + MatchStateLbl, + hipe_tagscheme:test_matchstate(Binary, TrueLblName, FalseLblName, 0.99)]; +gen_rtl({bs_get_integer, 0, _Flags}, [Dst, NewMs], [Ms], + TrueLblName, _FalseLblName) -> + update_ms(NewMs, Ms) ++ + [hipe_rtl:mk_move(Dst, hipe_rtl:mk_imm(15)), + hipe_rtl:mk_goto(TrueLblName)]; +gen_rtl({bs_get_integer,Size,Flags}, [Dst,NewMs], Args, + TrueLblName, FalseLblName) -> + case is_illegal_const(Size) of + true -> + [hipe_rtl:mk_goto(FalseLblName)]; + false -> + Signed = signed(Flags), + LittleEndian = littleendian(Flags), + Aligned = aligned(Flags), + UnSafe = unsafe(Flags), + case Args of + [Ms] -> + CCode= int_get_c_code(Dst, Ms, hipe_rtl:mk_imm(Size), + Flags, TrueLblName, FalseLblName), + update_ms(NewMs, Ms) ++ + get_static_int(Dst, Ms, Size, CCode, + Signed, LittleEndian, Aligned, UnSafe, + TrueLblName, FalseLblName); + [Ms, Arg] -> + {SizeCode1, SizeReg1} = + make_size(Size, Arg, FalseLblName), + CCode = int_get_c_code(Dst, Ms, SizeReg1, Flags, + TrueLblName, FalseLblName), + InCode = get_dynamic_int(Dst, Ms, SizeReg1, CCode, + Signed, LittleEndian, Aligned, + TrueLblName, FalseLblName), + update_ms(NewMs, Ms) ++ SizeCode1 ++ InCode + end + end; +gen_rtl({bs_get_float,Size,Flags}, [Dst1,NewMs], Args, + TrueLblName, FalseLblName) -> + case is_illegal_const(Size) of + true -> + [hipe_rtl:mk_goto(FalseLblName)]; + false -> + [hipe_rtl:mk_gctest(3)] ++ + case Args of + [Ms] -> + CCode = float_get_c_code(Dst1, Ms, hipe_rtl:mk_imm(Size), Flags, + TrueLblName, FalseLblName), + update_ms(NewMs, Ms) ++ CCode; + [Ms,Arg] -> + {SizeCode, SizeReg} = make_size(Size, Arg, + FalseLblName), + CCode = float_get_c_code(Dst1, Ms, SizeReg, Flags, + TrueLblName, FalseLblName), + update_ms(NewMs, Ms) ++ SizeCode ++ CCode + end + end; +gen_rtl({bs_get_binary_all, Unit, _Flags}, [Dst], [Ms], + TrueLblName, FalseLblName) -> + [hipe_rtl:mk_gctest(?SUB_BIN_WORDSIZE)] ++ + get_binary_all(Dst, Unit, Ms, TrueLblName,FalseLblName); +gen_rtl({bs_get_binary_all_2, Unit, _Flags}, [Dst,NewMs], [Ms], + TrueLblName, FalseLblName) -> + [hipe_rtl:mk_gctest(?SUB_BIN_WORDSIZE)] ++ + update_ms(NewMs, Ms) ++ + get_binary_all(Dst, Unit, Ms, TrueLblName, FalseLblName); +gen_rtl({bs_get_binary,Size,Flags}, [Dst,NewMs], Args, + TrueLblName, FalseLblName) -> + case is_illegal_const(Size) of + true -> + [hipe_rtl:mk_goto(FalseLblName)]; + false -> + Unsafe = unsafe(Flags), + case Args of + [Ms] -> + SizeReg = hipe_rtl:mk_new_reg(), + SizeCode = [hipe_rtl:mk_move(SizeReg, hipe_rtl:mk_imm(Size))]; + [Ms, BitsVar] -> + {SizeCode, SizeReg} = make_size(Size, BitsVar, FalseLblName) + end, + InCode = get_binary(Dst, Ms, SizeReg, Unsafe, + TrueLblName, FalseLblName), + [hipe_rtl:mk_gctest(?SUB_BIN_WORDSIZE)] ++ + update_ms(NewMs, Ms) ++ SizeCode ++ InCode + end; +gen_rtl(bs_get_utf8, [Dst,NewMs], [Ms], TrueLblName, FalseLblName) -> + update_ms(NewMs, Ms) ++ utf8_get_c_code(Dst, Ms, TrueLblName, FalseLblName); +gen_rtl({bs_get_utf16,Flags}, [Dst,NewMs], [Ms], TrueLblName, FalseLblName) -> + update_ms(NewMs, Ms) ++ utf16_get_c_code(Flags, Dst, Ms, TrueLblName, FalseLblName); +gen_rtl(bs_validate_unicode_retract, [NewMs], [Src,Ms], TrueLblName, FalseLblName) -> + update_ms(NewMs, Ms) ++ validate_unicode_retract_c_code(Src, Ms, TrueLblName, FalseLblName); +gen_rtl({bs_test_tail, NumBits}, [NewMs], [Ms], TrueLblName, FalseLblName) -> + {[Offset,BinSize], ExCode} = extract_matchstate_vars([offset,binsize], Ms), + update_ms(NewMs, Ms) ++ ExCode ++ + [add_to_offset(Offset, Offset, hipe_rtl:mk_imm(NumBits), FalseLblName), + hipe_rtl:mk_branch(Offset, eq, BinSize, TrueLblName, FalseLblName)]; +gen_rtl({bs_test_unit, Unit}, [], [Ms], TrueLblName, FalseLblName) -> + {[Offset,BinSize], ExCode} = extract_matchstate_vars([offset,binsize], Ms), + SizeReg = hipe_rtl:mk_new_reg(), + ExCode ++ + [hipe_rtl:mk_alu(SizeReg, BinSize, sub, Offset)| + test_alignment_code(SizeReg, Unit, TrueLblName, FalseLblName)]; +gen_rtl({bs_test_tail, NumBits}, [], [Ms], TrueLblName, FalseLblName) -> + {[Offset,BinSize], ExCode} = extract_matchstate_vars([offset,binsize], Ms), + ExCode ++ + [add_to_offset(Offset, Offset, hipe_rtl:mk_imm(NumBits), FalseLblName), + hipe_rtl:mk_branch(Offset, eq, BinSize, TrueLblName, FalseLblName)]; +gen_rtl({bs_skip_bits_all, Unit, _Flags}, Dst, [Ms], + TrueLblName, FalseLblName) -> + opt_update_ms(Dst, Ms) ++ + skip_bits_all(Unit, Ms, TrueLblName, FalseLblName); +gen_rtl({bs_skip_bits, Bits}, Dst, [Ms|Args], TrueLblName, FalseLblName) -> + opt_update_ms(Dst,Ms) ++ + case Args of + [] -> + skip_bits2(Ms, hipe_rtl:mk_imm(Bits), TrueLblName, FalseLblName); + [Arg] -> + {SizeCode, SizeReg} = make_size(Bits, Arg, FalseLblName), + InCode = skip_bits2(Ms, SizeReg, TrueLblName, FalseLblName), + SizeCode ++ InCode + end; +gen_rtl({bs_restore, Slot}, [NewMs], [Ms], TrueLblName, _FalseLblName) -> + Tmp1 = hipe_rtl:mk_new_reg_gcsafe(), + update_ms(NewMs, Ms) ++ + [get_field_from_term({matchstate, {saveoffset, Slot}}, Ms, Tmp1), + set_field_from_term({matchstate, {matchbuffer, offset}}, Ms, Tmp1), + hipe_rtl:mk_goto(TrueLblName)]; +gen_rtl({bs_save, Slot}, [NewMs], [Ms], TrueLblName, _FalseLblName) -> + {Offset, Instr} = extract_matchstate_var(offset, Ms), + update_ms(NewMs, Ms) ++ + [Instr, + set_field_from_term({matchstate, {saveoffset, Slot}}, Ms, Offset), + hipe_rtl:mk_goto(TrueLblName)]; +gen_rtl({bs_match_string, String, ByteSize}, [NewMs], + [Ms], TrueLblName, FalseLblName) -> + {[Offset, BinSize, Base], Instrs} = + extract_matchstate_vars([offset, binsize, base], Ms), + [SuccessLbl, ALbl, ULbl] = create_lbls(3), + [NewOffset,BitOffset] = create_gcsafe_regs(2), + Unit = hipe_rtl_arch:word_size() - 1, + Loops = ByteSize div Unit, + Init = + [Instrs, + update_ms(NewMs,Ms), + check_size(Offset, hipe_rtl:mk_imm(ByteSize*?BYTE_SIZE), BinSize, + NewOffset, hipe_rtl:label_name(SuccessLbl), FalseLblName), + SuccessLbl], + SplitCode = + [hipe_rtl:mk_alub(BitOffset, Offset, 'and', hipe_rtl:mk_imm(?LOW_BITS), eq, + hipe_rtl:label_name(ALbl), hipe_rtl:label_name(ULbl))], + Loops = ByteSize div Unit, + SkipSize = Loops * Unit, + {ACode1,UCode1} = + case Loops of + 0 -> + {[],[]}; + _ -> + create_loops(Loops, Unit, String, Base, + Offset, BitOffset, FalseLblName) + end, + <<_:SkipSize/binary, RestString/binary>> = String, + {ACode2, UCode2} = + case ByteSize rem Unit of + 0 -> + {[],[]}; + Rem -> + create_rests(Rem, RestString, Base, Offset, BitOffset, FalseLblName) + end, + End = [update_offset(NewOffset, NewMs), hipe_rtl:mk_goto(TrueLblName)], + [Init, SplitCode, ALbl, ACode1, ACode2, End, ULbl, UCode1, UCode2,End]; +gen_rtl(bs_context_to_binary, [Bin], [Var], TrueLblName, _FalseLblName) -> + MSLabel = hipe_rtl:mk_new_label(), + [hipe_rtl:mk_move(Bin, Var), + hipe_tagscheme:test_matchstate(Var, hipe_rtl:label_name(MSLabel), + TrueLblName, 0.5), + MSLabel, + hipe_tagscheme:convert_matchstate(Bin), + hipe_rtl:mk_goto(TrueLblName)]. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% Calls to C %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +int_get_c_code(Dst1, Ms, Size, Flags, TrueLblName, FalseLblName) -> + make_int_gc_code(Size) ++ + get_c_code(bs_get_integer_2, Dst1, Ms, Size, Flags, + TrueLblName, FalseLblName). + +float_get_c_code(Dst1, Ms, Size, Flags, TrueLblName, FalseLblName) -> + get_c_code(bs_get_float_2, Dst1, Ms, Size, Flags, TrueLblName, FalseLblName). + +get_c_code(Func, Dst1, Ms, Size, Flags, TrueLblName, FalseLblName) -> + SizeReg = hipe_rtl:mk_new_reg_gcsafe(), + FlagsReg = hipe_rtl:mk_new_reg_gcsafe(), + MatchBuf = hipe_rtl:mk_new_reg(), + RetLabel = hipe_rtl:mk_new_label(), + NonVal = hipe_rtl:mk_imm(hipe_tagscheme:mk_non_value()), + [hipe_rtl:mk_move(SizeReg, Size), + hipe_rtl:mk_move(FlagsReg, hipe_rtl:mk_imm(Flags)), + hipe_tagscheme:extract_matchbuffer(MatchBuf, Ms), + hipe_rtl_arch:call_bif([Dst1], Func, [SizeReg, FlagsReg, MatchBuf], + hipe_rtl:label_name(RetLabel), FalseLblName), + RetLabel, + hipe_rtl:mk_branch(Dst1, eq, NonVal, + FalseLblName, + TrueLblName, 0.01)]. + +utf8_get_c_code(Dst, Ms, TrueLblName, FalseLblName) -> + MatchBuf = hipe_rtl:mk_new_reg(), + NonVal = hipe_rtl:mk_imm(hipe_tagscheme:mk_non_value()), + [hipe_tagscheme:extract_matchbuffer(MatchBuf, Ms), + hipe_rtl_arch:call_bif([Dst], bs_get_utf8, [MatchBuf], [], []), + hipe_rtl:mk_branch(Dst, eq, NonVal, FalseLblName, TrueLblName, 0.01)]. + +utf16_get_c_code(Flags, Dst, Ms, TrueLblName, FalseLblName) -> + MatchBuf = hipe_rtl:mk_new_reg(), + NonVal = hipe_rtl:mk_imm(hipe_tagscheme:mk_non_value()), + FlagsReg = hipe_rtl:mk_new_reg_gcsafe(), + [hipe_tagscheme:extract_matchbuffer(MatchBuf, Ms), + hipe_rtl:mk_move(FlagsReg, hipe_rtl:mk_imm(Flags)), + hipe_rtl_arch:call_bif([Dst], bs_get_utf16, [MatchBuf, FlagsReg], [], []), + hipe_rtl:mk_branch(Dst, eq, NonVal, FalseLblName, TrueLblName, 0.01)]. + +validate_unicode_retract_c_code(Src, Ms, TrueLblName, FalseLblName) -> + MatchBuf = hipe_rtl:mk_new_reg(), + Zero = hipe_rtl:mk_imm(0), + Tmp = hipe_rtl:mk_new_reg(), + [hipe_tagscheme:extract_matchbuffer(MatchBuf, Ms), + hipe_rtl_arch:call_bif([Tmp], bs_validate_unicode_retract, + [MatchBuf,Src], [], []), + hipe_rtl:mk_branch(Tmp, eq, Zero, FalseLblName, TrueLblName, 0.01)]. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% Int Code %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +create_loops(Loops, Unit, String, Base, Offset, BitOffset, FalseLblName) -> + [Reg] = create_gcsafe_regs(1), + AlignedFun = fun(Value) -> + [get_int_to_reg(Reg, Unit*?BYTE_SIZE, Base, Offset, 'srl', + {unsigned, big}), + update_and_test(Reg, Unit, Offset, Value, FalseLblName)] + end, + UnAlignedFun = fun(Value) -> + [get_unaligned_int_to_reg(Reg, Unit*?BYTE_SIZE, + Base, Offset, BitOffset, + 'srl', {unsigned, big})| + update_and_test(Reg, Unit, Offset, Value, FalseLblName)] + end, + {create_loops(Loops, Unit, String, AlignedFun), + create_loops(Loops, Unit, String, UnAlignedFun)}. + +create_rests(Rem, String, Base, Offset, BitOffset, FalseLblName) -> + [Reg] = create_gcsafe_regs(1), + AlignedFun = fun(Value) -> + [get_int_to_reg(Reg, Rem*?BYTE_SIZE, Base, Offset, 'srl', + {unsigned, big})| + just_test(Reg, Value, FalseLblName)] + end, + UnAlignedFun = fun(Value) -> + [get_unaligned_int_to_reg(Reg, Rem*?BYTE_SIZE, + Base, Offset, BitOffset, + 'srl', {unsigned, big})| + just_test(Reg, Value, FalseLblName)] + end, + {create_loops(1, Rem, String, AlignedFun), + create_loops(1, Rem, String, UnAlignedFun)}. + +create_loops(0, _Unit, _String, _IntFun) -> + []; +create_loops(N, Unit, String, IntFun) -> + {Value, RestString} = get_value(Unit,String), + [IntFun(Value), + create_loops(N-1, Unit, RestString, IntFun)]. + +update_and_test(Reg, Unit, Offset, Value, FalseLblName) -> + [add_to_offset(Offset, Offset, hipe_rtl:mk_imm(Unit*?BYTE_SIZE), FalseLblName), + just_test(Reg, Value, FalseLblName)]. + +just_test(Reg, Value, FalseLblName) -> + [ContLbl] = create_lbls(1), + [hipe_rtl:mk_branch(Reg, eq, hipe_rtl:mk_imm(Value), + hipe_rtl:label_name(ContLbl), FalseLblName), + ContLbl]. + +get_value(N,String) -> + <> = String, + {I, Rest}. + +make_int_gc_code(I) when is_integer(I) -> + case hipe_tagscheme:bignum_sizeneed(I) of + 0 -> []; + X when is_integer(X) -> [hipe_rtl:mk_gctest(X)] + end; +make_int_gc_code(SReg) -> + FixNumLbl = hipe_rtl:mk_new_label(), + FixNumLblName = hipe_rtl:label_name(FixNumLbl), + {ResReg,Code} = hipe_tagscheme:bignum_sizeneed_code(SReg, FixNumLblName), + Code ++ + [hipe_rtl:mk_gctest(ResReg), + hipe_rtl:mk_goto(FixNumLblName), + FixNumLbl]. + +get_static_int(Dst1, Ms, Size, CCode, Signed, LittleEndian, Aligned, + Unsafe, TrueLblName, FalseLblName) -> + WordSize = hipe_rtl_arch:word_size(), + case Size =< WordSize*?BYTE_SIZE of + true -> + case {Aligned, LittleEndian} of + {true, false} -> + get_int_from_bin(Ms, Size, Dst1,Signed, LittleEndian, + Unsafe, FalseLblName, TrueLblName); + {true, true} -> + case Size rem ?BYTE_SIZE of + 0 -> + get_int_from_bin(Ms, Size, Dst1, Signed, LittleEndian, + Unsafe, FalseLblName, TrueLblName); + _ -> + CCode + end; + {false, false} -> + get_int_from_unaligned_bin(Ms, Size, Dst1, Signed, + Unsafe, FalseLblName, TrueLblName); + {false, true} -> + CCode + end; + false -> + CCode + end. + +get_dynamic_int(Dst1, Ms, SizeReg, CCode, Signed, LittleEndian, true, + TrueLblName, FalseLblName) -> + {Init, End} = make_dyn_prep(SizeReg, CCode), + Init ++ + get_unknown_size_int(SizeReg, Ms, Dst1, Signed, LittleEndian, + FalseLblName, TrueLblName) ++ + End; +get_dynamic_int(_Dst1, _Ms, _SizeReg, CCode, _Signed, _LittleEndian, false, + _TrueLblName, _FalseLblName) -> + CCode. + +get_int_from_bin(Ms, Size, Dst1, Signed, LittleEndian, + Unsafe, FalseLblName, TrueLblName) -> + Shiftr = shift_type(Signed), + Type = get_type(Signed, LittleEndian), + NewOffset = hipe_rtl:mk_new_reg_gcsafe(), + [SuccessLbl] = create_lbls(1), + {[Base,Offset,BinSize], ExCode} = + extract_matchstate_vars([base,offset,binsize], Ms), + ExCode ++ + [check_size(Offset, hipe_rtl:mk_imm(Size), BinSize, NewOffset, + Unsafe, hipe_rtl:label_name(SuccessLbl), FalseLblName), + SuccessLbl] ++ + [update_offset(NewOffset, Ms)] ++ + get_int(Dst1, Size, Base, Offset, Shiftr, Type, TrueLblName). + +get_int_from_unaligned_bin(Ms, Size, Dst1, Signed, + UnSafe, FalseLblName, TrueLblName) -> + Shiftr = shift_type(Signed), + Type = get_type(Signed, false), + NewOffset = hipe_rtl:mk_new_reg_gcsafe(), + [SuccessLbl] = create_lbls(1), + {[Base,Offset,BinSize], ExCode} = + extract_matchstate_vars([base,offset,binsize], Ms), + ExCode ++ + [check_size(Offset, hipe_rtl:mk_imm(Size), BinSize, NewOffset, + UnSafe, hipe_rtl:label_name(SuccessLbl), FalseLblName), + SuccessLbl] ++ + [update_offset(NewOffset, Ms)] ++ + get_unaligned_int(Dst1, Size, Base, Offset, Shiftr, Type, TrueLblName). + +get_unknown_size_int(SizeReg, Ms, Dst1, Signed, Little, + FalseLblName, TrueLblName) -> + Shiftr = shift_type(Signed), + Type = get_type(Signed, false), + [NewOffset] = create_gcsafe_regs(1), + [SuccessLbl] = create_lbls(1), + {[Base,Offset,BinSize], ExCode} = + extract_matchstate_vars([base,offset,binsize], Ms), + ExCode ++ + [check_size(Offset, SizeReg, BinSize, NewOffset, + hipe_rtl:label_name(SuccessLbl), FalseLblName), + SuccessLbl, + update_offset(NewOffset, Ms)] ++ + case Little of + true -> + get_little_unknown_int(Dst1, Base, Offset, NewOffset, + Shiftr, Type, TrueLblName); + false -> + get_big_unknown_int(Dst1, Base, Offset, NewOffset, + Shiftr, Type, TrueLblName) + end. + +make_matchstate(Binary, Max, Ms, TrueLblName, FalseLblName) -> + Base = hipe_rtl:mk_new_reg(), + Orig = hipe_rtl:mk_new_var(), + BinSize = hipe_rtl:mk_new_reg_gcsafe(), + Offset = hipe_rtl:mk_new_reg_gcsafe(), + Lbl = hipe_rtl:mk_new_label(), + [hipe_rtl:mk_gctest(?MS_MIN_SIZE+Max), + get_binary_bytes(Binary, BinSize, Base, Offset, + Orig, hipe_rtl:label_name(Lbl), FalseLblName), + Lbl, + hipe_tagscheme:create_matchstate(Max, BinSize, Base, Offset, Orig, Ms), + hipe_rtl:mk_goto(TrueLblName)]. + +resize_matchstate(Ms, Max, TrueLblName) -> + Base = hipe_rtl:mk_new_reg(), + Orig = hipe_rtl:mk_new_var(), + BinSize = hipe_rtl:mk_new_reg_gcsafe(), + Offset = hipe_rtl:mk_new_reg_gcsafe(), + [hipe_rtl:mk_gctest(?MS_MIN_SIZE+Max), + get_field_from_term({matchstate, {matchbuffer, binsize}}, Ms, BinSize), + get_field_from_term({matchstate, {matchbuffer, base}}, Ms, Base), + get_field_from_term({matchstate, {matchbuffer, orig}}, Ms, Orig), + get_field_from_term({matchstate, {matchbuffer, offset}}, Ms, Offset), + hipe_tagscheme:create_matchstate(Max, BinSize, Base, Offset, Orig, Ms), + hipe_rtl:mk_goto(TrueLblName)]. + +reinit_matchstate(Ms, TrueLblName) -> + Tmp = hipe_rtl:mk_new_reg_gcsafe(), + [get_field_from_term({matchstate, {matchbuffer, offset}}, Ms, Tmp), + set_field_from_term({matchstate, {saveoffset, 0}}, Ms, Tmp), + hipe_rtl:mk_goto(TrueLblName)]. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%% Binary Code %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +get_binary_all(Dst1, 1, Ms, TrueLblName, _FalseLblName) -> + [SizeReg] = create_gcsafe_regs(1), + {[Offset,BinSize,Orig], ExCode} = + extract_matchstate_vars([offset,binsize,orig], Ms), + MakeCode = + [hipe_rtl:mk_alu(SizeReg, BinSize, sub, Offset)| + construct_subbin(Dst1,SizeReg,Offset,Orig)] ++ + [update_offset(BinSize, Ms), + hipe_rtl:mk_goto(TrueLblName)], + ExCode ++ MakeCode; +get_binary_all(Dst1, Unit, Ms, TrueLblName, FalseLblName) -> + [SizeReg] = create_gcsafe_regs(1), + [SuccessLbl] = create_lbls(1), + SLblName = hipe_rtl:label_name(SuccessLbl), + {[Offset,BinSize,Orig], ExCode} = + extract_matchstate_vars([offset,binsize,orig], Ms), + MakeCode = + [hipe_rtl:mk_alu(SizeReg, BinSize, sub, Offset)| + test_alignment_code(SizeReg,Unit,SLblName,FalseLblName)] ++ + [SuccessLbl| + construct_subbin(Dst1,SizeReg,Offset,Orig)] ++ + [update_offset(BinSize, Ms), + hipe_rtl:mk_goto(TrueLblName)], + ExCode ++ MakeCode. + +get_binary(Dst1, Ms, SizeReg, + UnSafe, TrueLblName, FalseLblName) -> + [SuccessLbl] = create_lbls(1), + [EndOffset] = create_gcsafe_regs(1), + {[Offset,BinSize,Orig], ExCode} = + extract_matchstate_vars([offset,binsize,orig], Ms), + CheckCode = + [check_size(Offset, SizeReg, BinSize, EndOffset, + UnSafe, hipe_rtl:label_name(SuccessLbl), + FalseLblName), + SuccessLbl], + MakeCode = + construct_subbin(Dst1,SizeReg,Offset,Orig) + ++ [update_offset(EndOffset, Ms), + hipe_rtl:mk_goto(TrueLblName)], + ExCode ++ CheckCode ++ MakeCode. + +construct_subbin(Dst,Size,Offset,Orig) -> + [BitOffset, ByteOffset, BitSize, ByteSize] = create_gcsafe_regs(4), + [hipe_rtl:mk_alu(ByteSize, Size, srl, hipe_rtl:mk_imm(?BYTE_SHIFT)), + hipe_rtl:mk_alu(BitSize, Size, 'and', hipe_rtl:mk_imm(?LOW_BITS)), + hipe_rtl:mk_alu(ByteOffset, Offset, srl, hipe_rtl:mk_imm(?BYTE_SHIFT)), + hipe_rtl:mk_alu(BitOffset, Offset, 'and', hipe_rtl:mk_imm(?LOW_BITS)), + hipe_tagscheme:mk_sub_binary(Dst, ByteSize, ByteOffset, + BitSize, BitOffset, Orig)]. + +%%%%%%%%%%%%%%%%%%%%%%%%% Skip Bits %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +skip_bits_all(1, Ms, TrueLblName,_FalseLblName) -> + {[BinSize], ExCode} = extract_matchstate_vars([binsize], Ms), + ExCode ++ + [update_offset(BinSize,Ms), + hipe_rtl:mk_goto(TrueLblName)]; +skip_bits_all(Unit,Ms, TrueLblName,FalseLblName) -> + [Size] = create_gcsafe_regs(1), + [SuccessLbl] = create_lbls(1), + SLblName = hipe_rtl:label_name(SuccessLbl), + {[Offset,BinSize], ExCode} = extract_matchstate_vars([offset,binsize], Ms), + ExCode ++ + [hipe_rtl:mk_alu(Size,BinSize,sub,Offset)] + ++ + test_alignment_code(Size,Unit,SLblName,FalseLblName) ++ + [SuccessLbl, + update_offset(BinSize,Ms), + hipe_rtl:mk_goto(TrueLblName)]. + +test_alignment_code(Size,Unit,SLblName,FalseLblName) -> + case Unit of + 1 -> [hipe_rtl:mk_goto(SLblName)]; + 2 -> get_fast_test_code(Size,1,SLblName,FalseLblName); + 4 -> get_fast_test_code(Size,3,SLblName,FalseLblName); + 8 -> get_fast_test_code(Size,7,SLblName,FalseLblName); + 16 -> get_fast_test_code(Size,15,SLblName,FalseLblName); + 32 -> get_fast_test_code(Size,31,SLblName,FalseLblName); + _ -> get_slow_test_code(Size,Unit,SLblName,FalseLblName) + end. + +get_fast_test_code(Size,AndTest,SLblName,FalseLblName) -> + [Tmp] = create_gcsafe_regs(1), + [hipe_rtl:mk_alub(Tmp,Size,'and',hipe_rtl:mk_imm(AndTest), + eq,SLblName,FalseLblName)]. + +%% This is really slow +get_slow_test_code(Size,Unit,SLblName,FalseLblName) -> + [Tmp] = create_gcsafe_regs(1), + [LoopLbl,Lbl1,Lbl2] = create_lbls(3), + LoopLblName = hipe_rtl:label_name(LoopLbl), + Lbl1Name = hipe_rtl:label_name(Lbl1), + Lbl2Name = hipe_rtl:label_name(Lbl2), + [hipe_rtl:mk_move(Tmp,Size), + LoopLbl, + hipe_rtl:mk_branch(Tmp, eq, hipe_rtl:mk_imm(0), SLblName, Lbl1Name), + Lbl1, + hipe_rtl:mk_branch(Tmp, lt, hipe_rtl:mk_imm(0), FalseLblName, Lbl2Name), + Lbl2, + hipe_rtl:mk_alu(Tmp,Tmp,sub,hipe_rtl:mk_imm(Unit)), + hipe_rtl:mk_goto(LoopLblName)]. + +skip_bits2(Ms, NoOfBits, TrueLblName, FalseLblName) -> + [NewOffset] = create_gcsafe_regs(1), + [TempLbl] = create_lbls(1), + {[Offset,BinSize], ExCode} = extract_matchstate_vars([offset,binsize], Ms), + ExCode ++ + add_to_offset(NewOffset, NoOfBits, Offset, FalseLblName) ++ + [hipe_rtl:mk_branch(BinSize, 'ltu', NewOffset, FalseLblName, + hipe_rtl:label_name(TempLbl), 0.01), + TempLbl, + update_offset(NewOffset,Ms), + hipe_rtl:mk_goto(TrueLblName)]. + +add_to_offset(Result, Extra, Original, FalseLblName) -> + TrueLbl = hipe_rtl:mk_new_label(), + %% Note: 'ltu' means 'unsigned overflow'. + [hipe_rtl:mk_alub(Result, Extra, 'add', Original, 'ltu', + FalseLblName, hipe_rtl:label_name(TrueLbl)), + TrueLbl]. + +%%%%%%%%%%%%%%%%%%%%%%% Code for start match %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +get_binary_bytes(Binary, BinSize, Base, Offset, Orig, + TrueLblName, FalseLblName) -> + [OrigOffset,BitSize,BitOffset] = create_gcsafe_regs(3), + [SuccessLbl,SubLbl,OtherLbl,JoinLbl] = create_lbls(4), + [hipe_tagscheme:test_bitstr(Binary, hipe_rtl:label_name(SuccessLbl), + FalseLblName, 0.99), + SuccessLbl, + get_field_from_term({sub_binary, binsize}, Binary, BinSize), + hipe_rtl:mk_alu(BinSize, BinSize, sll, hipe_rtl:mk_imm(?BYTE_SHIFT)), + hipe_tagscheme:test_subbinary(Binary, hipe_rtl:label_name(SubLbl), + hipe_rtl:label_name(OtherLbl)), + SubLbl, + get_field_from_term({sub_binary, offset}, Binary, OrigOffset), + hipe_rtl:mk_alu(Offset, OrigOffset, sll, hipe_rtl:mk_imm(?BYTE_SHIFT)), + get_field_from_term({sub_binary, bitoffset}, Binary, BitOffset), + hipe_rtl:mk_alu(Offset, Offset, add, BitOffset), + get_field_from_term({sub_binary, bitsize}, Binary, BitSize), + hipe_rtl:mk_alu(BinSize, BinSize, add, Offset), + hipe_rtl:mk_alu(BinSize, BinSize, add, BitSize), + get_field_from_term({sub_binary, orig}, Binary, Orig), + hipe_rtl:mk_goto(hipe_rtl:label_name(JoinLbl)), + OtherLbl, + hipe_rtl:mk_move(Offset, hipe_rtl:mk_imm(0)), + hipe_rtl:mk_move(Orig, Binary), + JoinLbl] ++ + get_base(Orig,Base) ++ + [hipe_rtl:mk_goto(TrueLblName)]. + +%%%%%%%%%%%%%%%%%%%%%%%%% UTILS %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +get_base(Orig,Base) -> + [HeapLbl,REFCLbl,EndLbl] = create_lbls(3), + [hipe_tagscheme:test_heap_binary(Orig, hipe_rtl:label_name(HeapLbl), + hipe_rtl:label_name(REFCLbl)), + HeapLbl, + hipe_rtl:mk_alu(Base, Orig, add, hipe_rtl:mk_imm(?HEAP_BIN_DATA-2)), + hipe_rtl:mk_goto(hipe_rtl:label_name(EndLbl)), + REFCLbl, + hipe_rtl:mk_load(Base, Orig, hipe_rtl:mk_imm(?PROC_BIN_BYTES-2)), + EndLbl]. + +extract_matchstate_var(binsize, Ms) -> + BinSize = hipe_rtl:mk_new_reg_gcsafe(), + {BinSize, + get_field_from_term({matchstate, {matchbuffer, binsize}}, Ms, BinSize)}; +extract_matchstate_var(offset, Ms) -> + Offset = hipe_rtl:mk_new_reg_gcsafe(), + {Offset, + get_field_from_term({matchstate, {matchbuffer, offset}}, Ms, Offset)}; +extract_matchstate_var(base, Ms) -> + Base = hipe_rtl:mk_new_reg(), + {Base, + get_field_from_term({matchstate, {matchbuffer, base}}, Ms, Base)}; +extract_matchstate_var(orig, Ms) -> + Orig = hipe_rtl:mk_new_var(), + {Orig, + get_field_from_term({matchstate, {matchbuffer, orig}}, Ms, Orig)}. + +extract_matchstate_vars(List, Ms) -> + lists:unzip([extract_matchstate_var(Name, Ms) || Name <- List]). + +check_size(Offset, Size, BinSize, Tmp1, ContLblName, FalseLblName) -> + [add_to_offset(Tmp1, Offset, Size, FalseLblName), + hipe_rtl:mk_branch(Tmp1, leu, BinSize, ContLblName, FalseLblName, 0.99)]. + +check_size(Offset, Size, _BinSize, Tmp1, true, ContLblName, _FalseLblName) -> + [hipe_rtl:mk_alu(Tmp1, Offset, add, Size), + hipe_rtl:mk_goto(ContLblName)]; +check_size(Offset, Size, BinSize, Tmp1, false, ContLblName, FalseLblName) -> + check_size(Offset, Size, BinSize, Tmp1, ContLblName, FalseLblName). + +shift_type(true) -> + sra; +shift_type(false) -> + srl. + +get_type(true, LittleEndian) -> + {signed, endianess(LittleEndian)}; +get_type(false, LittleEndian) -> + {unsigned, endianess(LittleEndian)}. + +endianess(true) -> + little; +endianess(false) -> + big. + +aligned(Flags) -> + case Flags band ?BSF_ALIGNED of + 1 -> true; + 0 -> false + end. + +littleendian(Flags) -> + case Flags band 2 of + 2 -> true; + 0 -> false + end. + +signed(Flags) -> + case Flags band 4 of + 4 -> true; + 0 -> false + end. + +unsafe(Flags) -> + case Flags band 16 of + 16 -> true; + 0 -> false + end. + +update_offset(NewOffset, Ms) -> + set_field_from_term({matchstate,{matchbuffer,offset}}, + Ms, NewOffset). + +opt_update_ms([NewMs], OldMs) -> + [hipe_rtl:mk_move(NewMs, OldMs)]; +opt_update_ms([], _OldMs) -> + []. + +update_ms(NewMs, OldMs) -> + [hipe_rtl:mk_move(NewMs, OldMs)]. + +create_lbls(0) -> + []; +create_lbls(X) when X > 0-> + [hipe_rtl:mk_new_label()|create_lbls(X-1)]. + +make_dyn_prep(SizeReg, CCode) -> + [CLbl, SuccessLbl] = create_lbls(2), + Init = [hipe_rtl:mk_branch(SizeReg, le, hipe_rtl:mk_imm(?MAX_SMALL_BITS), + hipe_rtl:label_name(SuccessLbl), + hipe_rtl:label_name(CLbl)), + SuccessLbl], + End = [CLbl|CCode], + {Init, End}. + +%%------------------------------------------------------------------------ +%% From hipe_rtl_binutil.erl +%%------------------------------------------------------------------------ + +get_unaligned_int(Dst1, Size, Base, Offset, Shiftr, Type, TrueLblName) -> + [Reg] = create_regs(1), + [get_maybe_unaligned_int_to_reg(Reg, Size, Base, Offset, Shiftr, Type), + do_bignum_code(Size, Type, Reg, Dst1, TrueLblName)]. + +get_maybe_unaligned_int_to_reg(Reg, Size, Base, Offset, Shiftr, Type) -> + [LowBits] = create_regs(1), + [AlignedLbl, UnAlignedLbl, EndLbl] = create_lbls(3), + [hipe_rtl:mk_alub(LowBits, Offset, 'and', hipe_rtl:mk_imm(?LOW_BITS), + eq, hipe_rtl:label_name(AlignedLbl), + hipe_rtl:label_name(UnAlignedLbl)), + AlignedLbl, + get_int_to_reg(Reg, Size, Base, Offset, Shiftr, Type), + hipe_rtl:mk_goto(hipe_rtl:label_name(EndLbl)), + UnAlignedLbl, + get_unaligned_int_to_reg(Reg, Size, Base, Offset, LowBits, Shiftr, Type), + EndLbl]. + +get_unaligned_int_to_reg(Reg, Size, Base, Offset, LowBits, Shiftr, Type) -> + [ByteOffset, ShiftBits, LoadDst, Tmp, TotBits] = create_gcsafe_regs(5), + [MoreLbl, LessLbl, JoinLbl] = create_lbls(3), + WordSize = hipe_rtl_arch:word_size(), + MinLoad = (Size-1) div ?BYTE_SIZE +1, + MaxLoad = MinLoad + 1, + Code1 = + [hipe_rtl:mk_alu(TotBits, LowBits, 'add', hipe_rtl:mk_imm(Size)), + hipe_rtl:mk_alu(ByteOffset, Offset, 'srl', hipe_rtl:mk_imm(?BYTE_SHIFT))], + Code2 = + case {Size rem ?BYTE_SIZE, MinLoad} of + {1, _} -> + [load_bytes(LoadDst, Base, ByteOffset, Type, MinLoad), + hipe_rtl:mk_alu(ShiftBits, LowBits, 'add', + hipe_rtl:mk_imm((WordSize-MinLoad)*?BYTE_SIZE))]; + {_, WordSize} -> + UnsignedBig = {unsigned, big}, + [hipe_rtl:mk_branch(TotBits, le, hipe_rtl:mk_imm(MinLoad*?BYTE_SIZE), + hipe_rtl:label_name(LessLbl), + hipe_rtl:label_name(MoreLbl)), + LessLbl, + load_bytes(LoadDst, Base, ByteOffset, Type, MinLoad), + hipe_rtl:mk_alu(ShiftBits, LowBits, 'add', + hipe_rtl:mk_imm((WordSize-MinLoad)*?BYTE_SIZE)), + hipe_rtl:mk_goto(hipe_rtl:label_name(JoinLbl)), + MoreLbl, + load_bytes(LoadDst, Base, ByteOffset, UnsignedBig, MinLoad), + hipe_rtl:mk_alu(LoadDst, LoadDst, 'sll', LowBits), + load_bytes(Tmp, Base, ByteOffset, UnsignedBig, 1), + hipe_rtl:mk_alu(LowBits, hipe_rtl:mk_imm(?BYTE_SIZE), 'sub', LowBits), + hipe_rtl:mk_alu(Tmp, Tmp, 'srl', LowBits), + hipe_rtl:mk_alu(LoadDst, LoadDst, 'or', Tmp), + hipe_rtl:mk_move(ShiftBits, hipe_rtl:mk_imm(0)), + JoinLbl]; + {_, _} -> + [load_bytes(LoadDst, Base, ByteOffset, Type, MaxLoad), + hipe_rtl:mk_alu(ShiftBits, LowBits, 'add', + hipe_rtl:mk_imm((WordSize-MaxLoad)*?BYTE_SIZE))] + end, + Code3 = + [hipe_rtl:mk_alu(Tmp, LoadDst, sll, ShiftBits), + hipe_rtl:mk_alu(Reg, Tmp, Shiftr, + hipe_rtl:mk_imm(WordSize*?BYTE_SIZE-Size))], + Code1 ++ Code2 ++ Code3. + +get_int(Dst1, Size, Base, Offset, Shiftr, Type, TrueLblName) -> + [Reg] = create_gcsafe_regs(1), + [get_int_to_reg(Reg, Size, Base, Offset, Shiftr, Type), + do_bignum_code(Size, Type, Reg, Dst1, TrueLblName)]. + +get_int_to_reg(Reg, Size, Base, Offset, Shiftr, Type) -> + [ByteOffset] = create_gcsafe_regs(1), + Code1 = + [hipe_rtl:mk_alu(ByteOffset, Offset, srl, hipe_rtl:mk_imm(?BYTE_SHIFT)), + load_bytes(Reg, Base, ByteOffset, Type, ((Size-1) div ?BYTE_SIZE +1))], + Code2 = + case Size rem ?BYTE_SIZE of + 0 -> + []; + _ -> + [hipe_rtl:mk_alu(Reg, Reg, Shiftr, + hipe_rtl:mk_imm(?BYTE_SIZE -Size rem ?BYTE_SIZE))] + end, + Code1 ++ Code2. + +get_big_unknown_int(Dst1, Base, Offset, NewOffset, + Shiftr, Type, TrueLblName) -> + [LoadDst, ByteOffset, Limit, Tmp, LowBits] = create_gcsafe_regs(5), + [ContLbl, BackLbl, LoopLbl, TagLbl, LastLbl, EndLbl] = create_lbls(6), + [hipe_rtl:mk_move(LoadDst, hipe_rtl:mk_imm(0)), + hipe_rtl:mk_branch(NewOffset, ne, Offset, hipe_rtl:label_name(ContLbl), + hipe_rtl:label_name(TagLbl), 0.99), + ContLbl, + hipe_rtl:mk_alu(Limit, NewOffset, sub, hipe_rtl:mk_imm(1)), + hipe_rtl:mk_alu(Limit, Limit, srl, hipe_rtl:mk_imm(?BYTE_SHIFT)), + hipe_rtl:mk_alu(ByteOffset, Offset, srl, hipe_rtl:mk_imm(?BYTE_SHIFT)), + load_bytes(LoadDst, Base, ByteOffset, Type, 1), + BackLbl, + hipe_rtl:mk_branch(ByteOffset, le, Limit, hipe_rtl:label_name(LoopLbl), + hipe_rtl:label_name(EndLbl)), + LoopLbl, + load_bytes(Tmp, Base, ByteOffset, {unsigned, big}, 1), + hipe_rtl:mk_alu(LoadDst, LoadDst, sll, hipe_rtl:mk_imm(?BYTE_SIZE)), + hipe_rtl:mk_alu(LoadDst, LoadDst, 'or', Tmp), + hipe_rtl:mk_goto(hipe_rtl:label_name(BackLbl)), + EndLbl, + hipe_rtl:mk_alub(LowBits, NewOffset, 'and', hipe_rtl:mk_imm(?LOW_BITS), eq, + hipe_rtl:label_name(TagLbl), hipe_rtl:label_name(LastLbl)), + LastLbl, + hipe_rtl:mk_alu(LowBits, hipe_rtl:mk_imm(?BYTE_SIZE), 'sub', LowBits), + hipe_rtl:mk_alu(LoadDst, LoadDst, Shiftr, LowBits), + TagLbl] ++ + do_bignum_code(64, Type, LoadDst, Dst1, TrueLblName). + +get_little_unknown_int(Dst1, Base, Offset, NewOffset, + Shiftr, Type, TrueLblName) -> + [LoadDst, ByteOffset, Limit, ShiftReg, LowBits, Tmp] = create_gcsafe_regs(6), + [ContLbl, BackLbl, LoopLbl, DoneLbl, TagLbl] = create_lbls(5), + [hipe_rtl:mk_move(LoadDst, hipe_rtl:mk_imm(0)), + hipe_rtl:mk_branch(NewOffset, ne, Offset, hipe_rtl:label_name(ContLbl), + hipe_rtl:label_name(TagLbl), 0.99), + ContLbl, + hipe_rtl:mk_alu(Tmp, NewOffset, sub, hipe_rtl:mk_imm(1)), + hipe_rtl:mk_alu(ByteOffset, Offset, srl, hipe_rtl:mk_imm(?BYTE_SHIFT)), + hipe_rtl:mk_alu(Limit, Tmp, srl, hipe_rtl:mk_imm(?BYTE_SHIFT)), + hipe_rtl:mk_move(ShiftReg, hipe_rtl:mk_imm(0)), + BackLbl, + hipe_rtl:mk_branch(ByteOffset, lt, Limit, + hipe_rtl:label_name(LoopLbl), + hipe_rtl:label_name(DoneLbl)), + LoopLbl, + load_bytes(Tmp, Base, ByteOffset, {unsigned, big}, 1), + hipe_rtl:mk_alu(Tmp, Tmp, sll, ShiftReg), + hipe_rtl:mk_alu(ShiftReg, ShiftReg, add, hipe_rtl:mk_imm(?BYTE_SIZE)), + hipe_rtl:mk_alu(LoadDst, LoadDst, 'or', Tmp), + hipe_rtl:mk_goto(hipe_rtl:label_name(BackLbl)), + DoneLbl, + hipe_rtl:mk_alu(LowBits, NewOffset, 'and', hipe_rtl:mk_imm(?LOW_BITS)), + hipe_rtl:mk_alu(LowBits, hipe_rtl:mk_imm(?BYTE_SIZE), sub, LowBits), + hipe_rtl:mk_alu(LowBits, LowBits, 'and', hipe_rtl:mk_imm(?LOW_BITS)), + load_bytes(Tmp, Base, ByteOffset, Type, 1), + hipe_rtl:mk_alu(Tmp, Tmp, Shiftr, LowBits), + hipe_rtl:mk_alu(Tmp, Tmp, sll, ShiftReg), + hipe_rtl:mk_alu(LoadDst, LoadDst, 'or', Tmp), + TagLbl] ++ + do_bignum_code(64, Type, LoadDst, Dst1, TrueLblName). + +do_bignum_code(Size, {Signedness,_}, Src, Dst1, TrueLblName) + when is_integer(Size) -> + case {Size > ?MAX_SMALL_BITS, Signedness} of + {false, _} -> + [hipe_tagscheme:tag_fixnum(Dst1, Src), + hipe_rtl:mk_goto(TrueLblName)]; + {true, signed} -> + make_int_gc_code(Size) ++ + signed_bignum(Dst1, Src, TrueLblName); + {true, unsigned} -> + make_int_gc_code(Size) ++ + unsigned_bignum(Dst1, Src, TrueLblName) + end. + +signed_bignum(Dst1, Src, TrueLblName) -> + Tmp1 = hipe_rtl:mk_new_reg(), + BignumLabel = hipe_rtl:mk_new_label(), + [hipe_tagscheme:realtag_fixnum(Dst1, Src), + hipe_tagscheme:realuntag_fixnum(Tmp1, Dst1), + hipe_rtl:mk_branch(Tmp1, eq, Src, TrueLblName, + hipe_rtl:label_name(BignumLabel)), + BignumLabel, + hipe_tagscheme:unsafe_mk_big(Dst1, Src, signed), + hipe_rtl:mk_goto(TrueLblName)]. + +unsigned_bignum(Dst1, Src, TrueLblName) -> + Tmp1 = hipe_rtl:mk_new_reg_gcsafe(), + BignumLbl = hipe_rtl:mk_new_label(), + BignumLblName = hipe_rtl:label_name(BignumLbl), + NxtLbl = hipe_rtl:mk_new_label(), + NxtLblName = hipe_rtl:label_name(NxtLbl), + [hipe_rtl:mk_branch(Src, lt, hipe_rtl:mk_imm(0), BignumLblName, NxtLblName), + NxtLbl, + hipe_tagscheme:realtag_fixnum(Dst1, Src), + hipe_tagscheme:realuntag_fixnum(Tmp1, Dst1), + hipe_rtl:mk_branch(Tmp1, eq, Src, TrueLblName, BignumLblName), + BignumLbl, + hipe_tagscheme:unsafe_mk_big(Dst1, Src, unsigned), + hipe_rtl:mk_goto(TrueLblName)]. + +load_bytes(Dst, Base, Offset, {Signedness, _Endianess},1) -> + [hipe_rtl:mk_load(Dst, Base, Offset, byte, Signedness), + hipe_rtl:mk_alu(Offset, Offset, add, hipe_rtl:mk_imm(1))]; +load_bytes(Dst, Base, Offset, {Signedness, Endianess},2) -> + case Endianess of + big -> + hipe_rtl_arch:load_big_2(Dst, Base, Offset, Signedness); + little -> + hipe_rtl_arch:load_little_2(Dst, Base, Offset, Signedness) + end; +load_bytes(Dst, Base, Offset, {Signedness, Endianess},3) -> + Tmp1 = hipe_rtl:mk_new_reg(), + case Endianess of + big -> + [hipe_rtl:mk_load(Dst, Base, Offset, byte, Signedness), + hipe_rtl:mk_alu(Offset, Offset, add, hipe_rtl:mk_imm(1)), + hipe_rtl:mk_alu(Dst, Dst, sll, hipe_rtl:mk_imm(8)), + hipe_rtl:mk_load(Tmp1, Base, Offset, byte, unsigned), + hipe_rtl:mk_alu(Dst, Dst, 'or', Tmp1), + hipe_rtl:mk_alu(Offset, Offset, add, hipe_rtl:mk_imm(1)), + hipe_rtl:mk_alu(Dst, Dst, sll, hipe_rtl:mk_imm(8)), + hipe_rtl:mk_load(Tmp1, Base, Offset, byte, unsigned), + hipe_rtl:mk_alu(Dst, Dst, 'or', Tmp1), + hipe_rtl:mk_alu(Offset, Offset, add, hipe_rtl:mk_imm(1))]; + little -> + [hipe_rtl:mk_load(Dst, Base, Offset, byte, unsigned), + hipe_rtl:mk_alu(Offset, Offset, add, hipe_rtl:mk_imm(1)), + hipe_rtl:mk_load(Tmp1, Base, Offset, byte,unsigned), + hipe_rtl:mk_alu(Tmp1, Tmp1, sll, hipe_rtl:mk_imm(8)), + hipe_rtl:mk_alu(Dst, Dst, 'or', Tmp1), + hipe_rtl:mk_alu(Offset, Offset, add, hipe_rtl:mk_imm(1)), + hipe_rtl:mk_load(Tmp1, Base, Offset, byte,Signedness), + hipe_rtl:mk_alu(Tmp1, Tmp1, sll, hipe_rtl:mk_imm(16)), + hipe_rtl:mk_alu(Dst, Dst, 'or', Tmp1), + hipe_rtl:mk_alu(Offset, Offset, add, hipe_rtl:mk_imm(1))] + end; +load_bytes(Dst, Base, Offset, {Signedness, Endianess}, 4) -> + case Endianess of + big -> + hipe_rtl_arch:load_big_4(Dst, Base, Offset, Signedness); + little -> + hipe_rtl_arch:load_little_4(Dst, Base, Offset, Signedness) + end; + +load_bytes(Dst, Base, Offset, {Signedness, Endianess}, X) when X > 1 -> + [LoopLbl, EndLbl] = create_lbls(2), + [Tmp1, Limit, TmpOffset] = create_regs(3), + case Endianess of + big -> + [hipe_rtl:mk_alu(Limit, Offset, add, hipe_rtl:mk_imm(X)), + hipe_rtl:mk_load(Dst, Base, Offset, byte, Signedness), + hipe_rtl:mk_alu(Offset, Offset, add, hipe_rtl:mk_imm(1)), + LoopLbl, + hipe_rtl:mk_load(Tmp1, Base, Offset, byte, unsigned), + hipe_rtl:mk_alu(Offset, Offset, add, hipe_rtl:mk_imm(1)), + hipe_rtl:mk_alu(Dst, Dst, sll, hipe_rtl:mk_imm(8)), + hipe_rtl:mk_alu(Dst, Dst, 'or', Tmp1), + hipe_rtl:mk_branch(Offset, lt, Limit, hipe_rtl:label_name(LoopLbl), + hipe_rtl:label_name(EndLbl)), + EndLbl]; + little -> + [hipe_rtl:mk_alu(Limit, Offset, add, hipe_rtl:mk_imm(X)), + hipe_rtl:mk_alu(TmpOffset, Limit, sub, hipe_rtl:mk_imm(1)), + hipe_rtl:mk_load(Dst, Base, TmpOffset, byte, Signedness), + LoopLbl, + hipe_rtl:mk_alu(TmpOffset, TmpOffset, sub, hipe_rtl:mk_imm(1)), + hipe_rtl:mk_load(Tmp1, Base, TmpOffset, byte, Signedness), + hipe_rtl:mk_alu(Dst, Dst, sll, hipe_rtl:mk_imm(8)), + hipe_rtl:mk_alu(Dst, Dst, 'or', Tmp1), + hipe_rtl:mk_branch(Offset, lt, TmpOffset, hipe_rtl:label_name(LoopLbl), + hipe_rtl:label_name(EndLbl)), + EndLbl, + hipe_rtl:mk_move(Offset, Limit)] + end. + +create_regs(X) when X > 0 -> + [hipe_rtl:mk_new_reg()|create_regs(X-1)]; +create_regs(0) -> + []. + +create_gcsafe_regs(X) when X > 0 -> + [hipe_rtl:mk_new_reg_gcsafe()|create_gcsafe_regs(X-1)]; +create_gcsafe_regs(0) -> + []. + +first_part(Var, Register, FalseLblName) -> + [SuccessLbl1, SuccessLbl2] = create_lbls(2), + [hipe_tagscheme:test_fixnum(Var, hipe_rtl:label_name(SuccessLbl1), + FalseLblName, 0.99), + SuccessLbl1, + hipe_tagscheme:fixnum_ge(Var, hipe_rtl:mk_imm(hipe_tagscheme:mk_fixnum(0)), + hipe_rtl:label_name(SuccessLbl2), FalseLblName, 0.99), + SuccessLbl2, + hipe_tagscheme:untag_fixnum(Register, Var)]. + +make_size(1, BitsVar, FalseLblName) -> + [DstReg] = create_regs(1), + {first_part(BitsVar, DstReg, FalseLblName), DstReg}; +make_size(?BYTE_SIZE, BitsVar, FalseLblName) -> + [DstReg] = create_regs(1), + Code = + first_part(BitsVar, DstReg, FalseLblName) ++ + [hipe_rtl:mk_alu(DstReg, DstReg, sll, hipe_rtl:mk_imm(?BYTE_SHIFT))], + {Code, DstReg}; +make_size(UnitImm, BitsVar, FalseLblName) -> + [DstReg] = create_regs(1), + UnitList = number2list(UnitImm), + Code = multiply_code(UnitList, BitsVar, DstReg, FalseLblName), + {Code, DstReg}. + +multiply_code(List=[Head|_Tail], Variable, Result, FalseLblName) -> + Test = set_high(Head), + Tmp1 = hipe_rtl:mk_new_reg(), + SuccessLbl = hipe_rtl:mk_new_label(), + Register = hipe_rtl:mk_new_reg(), + Code = [hipe_rtl:mk_move(Result, hipe_rtl:mk_imm(0))| + first_part(Variable, Register, FalseLblName)] + ++ + [hipe_rtl:mk_alub(Tmp1, Register, 'and', hipe_rtl:mk_imm(Test), + eq, hipe_rtl:label_name(SuccessLbl), + FalseLblName, 0.99), + SuccessLbl], + multiply_code(List, Register, Result, FalseLblName, Tmp1, Code). + +multiply_code([ShiftSize|Rest], Register, Result, FalseLblName, Tmp1, OldCode) -> + SuccessLbl = hipe_rtl:mk_new_label(), + Code = OldCode ++ [hipe_rtl:mk_alu(Tmp1, Register, sll, hipe_rtl:mk_imm(ShiftSize)), + hipe_rtl:mk_alub(Result, Tmp1, 'add', Result, not_overflow, hipe_rtl:label_name(SuccessLbl), FalseLblName, 0.99), + SuccessLbl], + multiply_code(Rest, Register, Result, FalseLblName, Tmp1, Code); +multiply_code([], _Register, _Result, _FalseLblName, _Tmp1, Code) -> + Code. + +number2list(X) when is_integer(X), X >= 0 -> + number2list(X, []). + +number2list(1, Acc) -> + lists:reverse([0|Acc]); +number2list(0, Acc) -> + lists:reverse(Acc); +number2list(X, Acc) -> + F = floorlog2(X), + number2list(X-(1 bsl F), [F|Acc]). + +floorlog2(X) -> + round(math:log(X)/math:log(2)-0.5). + +set_high(X) -> + set_high(X, 0). + +set_high(0, Y) -> + Y; +set_high(X, Y) -> + set_high(X-1, Y+(1 bsl (27-X))). + +is_illegal_const(Const) -> + Const >= 1 bsl (hipe_rtl_arch:word_size() * ?BYTE_SIZE) orelse Const < 0. diff --git a/lib/hipe/rtl/hipe_rtl_cfg.erl b/lib/hipe/rtl/hipe_rtl_cfg.erl new file mode 100644 index 0000000000..b6c1d63262 --- /dev/null +++ b/lib/hipe/rtl/hipe_rtl_cfg.erl @@ -0,0 +1,201 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2001-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +-module(hipe_rtl_cfg). + +-export([init/1, + labels/1, + params/1, params_update/2, + start_label/1, + succ/2, + pred/2, + bb/2, bb_add/3, bb_insert_between/5, + redirect/4, + remove_trivial_bbs/1, remove_unreachable_code/1, + linearize/1, + pp/1, pp/2]). +-export([preorder/1, postorder/1, reverse_postorder/1]). + +-define(RTL_CFG, true). % needed for cfg.inc below + +-include("../main/hipe.hrl"). +-include("hipe_rtl.hrl"). +-include("../flow/cfg.hrl"). +-include("../flow/cfg.inc"). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% CFG interface to RTL. +%% + +init(Rtl) -> + %% hipe_rtl:pp(Rtl), + Code = hipe_rtl:rtl_code(Rtl), + StartLabel = hipe_rtl:label_name(hd(Code)), + CFG0 = mk_empty_cfg(hipe_rtl:rtl_fun(Rtl), + StartLabel, + hipe_rtl:rtl_data(Rtl), + hipe_rtl:rtl_is_closure(Rtl), + hipe_rtl:rtl_is_leaf(Rtl), + hipe_rtl:rtl_params(Rtl)), + CFG = info_update(CFG0, hipe_rtl:rtl_info(Rtl)), + take_bbs(Code, CFG). + +%% @spec is_comment(hipe_rtl:rtl_instruction()) -> boolean() +%% @doc Succeeds if Instr has no effect. +is_comment(Instr) -> + hipe_rtl:is_comment(Instr). + +%% @spec is_goto(hipe_rtl:rtl_instruction()) -> boolean() +%% @doc Succeeds if Instr is just a jump (no side-effects). +is_goto(Instr) -> + hipe_rtl:is_goto(Instr). + +is_label(Instr) -> + hipe_rtl:is_label(Instr). + +label_name(Instr) -> + hipe_rtl:label_name(Instr). + +mk_label(Name) -> + hipe_rtl:mk_label(Name). + +mk_goto(Name) -> + hipe_rtl:mk_goto(Name). + +branch_successors(Instr) -> + case Instr of + #branch{} -> [hipe_rtl:branch_true_label(Instr), + hipe_rtl:branch_false_label(Instr)]; + #alub{} -> [hipe_rtl:alub_true_label(Instr), + hipe_rtl:alub_false_label(Instr)]; + #switch{} -> hipe_rtl:switch_labels(Instr); + #call{} -> + case hipe_rtl:call_fail(Instr) of + [] -> [hipe_rtl:call_continuation(Instr)]; + Fail -> [hipe_rtl:call_continuation(Instr),Fail] + end; + #goto{} -> [hipe_rtl:goto_label(Instr)]; + #goto_index{} -> hipe_rtl:goto_index_labels(Instr); + _ -> [] + end. + +fails_to(Instr) -> + case Instr of + #call{} -> [hipe_rtl:call_fail(Instr)]; + _ -> [] + end. + +is_branch(Instr) -> + case Instr of + #branch{} -> true; + #alub{} -> true; + #switch{} -> true; + #goto{} -> true; + #goto_index{} -> true; + #enter{} -> true; + #return{} -> true; + #call{} -> + case hipe_rtl:call_fail(Instr) of + [] -> + case hipe_rtl:call_continuation(Instr) of + [] -> false; + _ -> true + end; + _ -> true + end; + _ -> false + end. + +is_pure_branch(Instr) -> + case Instr of + #branch{} -> true; + #switch{} -> true; + #goto{} -> true; + _ -> false + end. + +redirect_jmp(Jmp, ToOld, ToNew) -> + hipe_rtl:redirect_jmp(Jmp, ToOld, ToNew). + +redirect_ops([Label|Labels], CFG, Map) -> + BB = bb(CFG, Label), + Code = hipe_bb:code(BB), + NewCode = [rewrite(I,Map) || I <- Code], + NewCFG = bb_add(CFG, Label, hipe_bb:code_update(BB, NewCode)), + redirect_ops(Labels, NewCFG, Map); +redirect_ops([],CFG,_) -> CFG. + +rewrite(I, Map) -> + case I of + #load_address{} -> + case hipe_rtl:load_address_type(I) of + constant -> I; + _ -> + NewL = + find_new_label(hipe_rtl:load_address_addr(I), Map), + hipe_rtl:load_address_addr_update(I, NewL) + end; + _ -> I + end. + + +pp(CFG) -> + hipe_rtl:pp(linearize(CFG)). + +pp(Dev, CFG) -> + hipe_rtl:pp(Dev, linearize(CFG)). + +linearize(CFG) -> + Code = linearize_cfg(CFG), + Rtl = hipe_rtl:mk_rtl(function(CFG), + params(CFG), + is_closure(CFG), + is_leaf(CFG), + Code, + data(CFG), + hipe_gensym:var_range(rtl), + hipe_gensym:label_range(rtl)), + hipe_rtl:rtl_info_update(Rtl, info(CFG)). + +%% %% Warning: this arity might not be the true arity. +%% %% The true arity of a closure usually differs. +%% arity(CFG) -> +%% {_M,_F,A} = function(CFG), +%% A. + +%% init_gensym(CFG)-> +%% HighestVar = find_highest_var(CFG), +%% HighestLabel = find_highest_label(CFG), +%% hipe_gensym:init(), +%% hipe_gensym:set_var(rtl, HighestVar), +%% hipe_gensym:set_label(rtl, HighestLabel). +%% +%% highest_var(Code)-> +%% hipe_rtl:highest_var(Code). + +is_phi(I) -> + hipe_rtl:is_phi(I). + +phi_remove_pred(I, Pred) -> + hipe_rtl:phi_remove_pred(I, Pred). + +phi_redirect_pred(I, OldPred, NewPred) -> + hipe_rtl:phi_redirect_pred(I, OldPred, NewPred). diff --git a/lib/hipe/rtl/hipe_rtl_cleanup_const.erl b/lib/hipe/rtl/hipe_rtl_cleanup_const.erl new file mode 100644 index 0000000000..d3e71a56c1 --- /dev/null +++ b/lib/hipe/rtl/hipe_rtl_cleanup_const.erl @@ -0,0 +1,85 @@ +%%% -*- erlang-indent-level: 2 -*- +%%% +%%% %CopyrightBegin% +%%% +%%% Copyright Ericsson AB 2004-2009. All Rights Reserved. +%%% +%%% The contents of this file are subject to the Erlang Public License, +%%% Version 1.1, (the "License"); you may not use this file except in +%%% compliance with the License. You should have received a copy of the +%%% Erlang Public License along with this software. If not, it can be +%%% retrieved online at http://www.erlang.org/. +%%% +%%% Software distributed under the License is distributed on an "AS IS" +%%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%%% the License for the specific language governing rights and limitations +%%% under the License. +%%% +%%% %CopyrightEnd% +%%% +%%%------------------------------------------------------------------- +%%% File : hipe_rtl_cleanup_const.erl +%%% Author : Tobias Lindahl +%%% Description : +%%% +%%% Created : 5 Mar 2004 by Tobias Lindahl +%%%------------------------------------------------------------------- + +%% Big constants (floats, bignums) can be used as arguments to +%% arbitrary instructions in RTL. Since these are located in the +%% constants area and the only instruction that currently can access +%% them is load_address, the constants have to be moved out of the +%% instruction and loaded into temporary variables before the +%% instruction. +%% +%% Some backends can make use of the information that the arguments +%% are really constants. Here is the place to add new backend-specific +%% behaviour depending on this. + +%%-------------------------------------------------------------------- + +-module(hipe_rtl_cleanup_const). + +-export([cleanup/1]). + +-include("hipe_rtl.hrl"). + +%%-------------------------------------------------------------------- + +%%-spec cleanup(#rtl{}) -> #rtl{}. + +cleanup(Rtl) -> + Code = cleanup(hipe_rtl:rtl_code(Rtl), []), + hipe_rtl:rtl_code_update(Rtl, Code). + +cleanup([I|Left], Acc) -> + Args = hipe_rtl:args(I), + case [X || X <- Args, hipe_rtl:is_const_label(X)] of + [] -> + cleanup(Left, [I|Acc]); + ConstArgs -> + NewIns = cleanup_instr(ConstArgs, I), + cleanup(Left, NewIns ++ Acc) + end; +cleanup([], Acc) -> + lists:reverse(Acc). + +cleanup_instr(Consts, I) -> + cleanup_instr(ordsets:from_list(Consts), I, []). + +cleanup_instr([Const|Left], I, Acc) -> + Dst = hipe_rtl:mk_new_var(), + ConstLabel = hipe_rtl:const_label_label(Const), + Load = hipe_rtl:mk_load_address(Dst, ConstLabel, constant), + case I of + X when is_record(X, fp_unop) orelse is_record(X, fp) -> + Fdst = hipe_rtl:mk_new_fpreg(), + Fconv = hipe_tagscheme:unsafe_untag_float(Fdst, Dst), + NewI = hipe_rtl:subst_uses([{Const, Fdst}], I), + cleanup_instr(Left, NewI, Fconv ++ [Load|Acc]); + _ -> + NewI = hipe_rtl:subst_uses([{Const, Dst}], I), + cleanup_instr(Left, NewI, [Load|Acc]) + end; +cleanup_instr([], I, Acc) -> + [I|Acc]. diff --git a/lib/hipe/rtl/hipe_rtl_exceptions.erl b/lib/hipe/rtl/hipe_rtl_exceptions.erl new file mode 100644 index 0000000000..879b84c0b0 --- /dev/null +++ b/lib/hipe/rtl/hipe_rtl_exceptions.erl @@ -0,0 +1,120 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2001-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Copyright (c) 2001 by Erik Johansson. All Rights Reserved +%% ==================================================================== +%% Filename : hipe_rtl_exceptions.erl +%% Module : hipe_rtl_exceptions +%% Purpose : +%% Notes : +%% History : * 2001-04-10 Erik Johansson (happi@it.uu.se): +%% Created. +%% CVS : +%% $Id$ +%% ==================================================================== +%% Exports : +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +-module(hipe_rtl_exceptions). + +-export([gen_fail/3, gen_begin_handler/3]). + +-include("../main/hipe.hrl"). +-include("hipe_literals.hrl"). + +%% -------------------------------------------------------------------- +%% Handle the Icode instruction +%% FAIL +%% +gen_fail(Class, Args, L) -> + case Args of + [Reason] -> + case Class of + exit -> + gen_exit(Reason, L); + throw -> + gen_throw(Reason, L); + error -> + gen_error(Reason, L) + end; + [Arg1,Arg2] -> + case Class of + error -> + Reason = Arg1, ArgList = Arg2, + gen_error(Reason, ArgList, L); + rethrow -> + Exception = Arg1, Reason = Arg2, + gen_rethrow(Exception, Reason, L) + end + end. + +%% -------------------------------------------------------------------- +%% Exception handler glue; interfaces between the runtime system's +%% exception state and the Icode view of exception handling. + +gen_begin_handler(I, VarMap, ConstTab) -> + Ds = hipe_icode:begin_handler_dstlist(I), + {Vars, VarMap1} = hipe_rtl_varmap:ivs2rvs(Ds, VarMap), + [FTagVar,FValueVar,FTraceVar] = Vars, + {[hipe_rtl:mk_comment('begin_handler'), + hipe_rtl_arch:pcb_load(FValueVar, ?P_FVALUE), + hipe_rtl_arch:pcb_load(FTraceVar, ?P_FTRACE), + %% synthesized from P->freason by hipe_handle_exception() + hipe_rtl_arch:pcb_load(FTagVar, ?P_ARG0) + ], + VarMap1, ConstTab}. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% Exceptions + +gen_exit(Reason, L) -> + gen_fail_call({erlang,exit,1}, [Reason], L). + +gen_throw(Reason, L) -> + gen_fail_call({erlang,throw,1}, [Reason], L). + +gen_error(Reason, L) -> + gen_fail_call({erlang,error,1}, [Reason], L). + +gen_error(Reason, ArgList, L) -> + gen_fail_call({erlang,error,2}, [Reason,ArgList], L). + +gen_rethrow(Exception, Reason, L) -> + gen_fail_call(rethrow, [Exception,Reason], L). + +%% Generic fail. We can't use 'enter' with a fail label (there can be no +%% stack descriptor info for an enter), so for a non-nil fail label we +%% generate a call followed by a dummy return. +%% +%% Update: The runtime system now interprets the return address of +%% the BIF call in order to list the invoking MFA in the stack trace. +%% Generating tailcalls here defeats that purpose, so we no longer do that. + +%%gen_fail_call(Fun, Args, []) -> +%% [hipe_rtl:mk_enter(Fun, Args, remote)]; +gen_fail_call(Fun, Args, L) -> + ContLbl = hipe_rtl:mk_new_label(), + Cont = hipe_rtl:label_name(ContLbl), + Zero = hipe_rtl:mk_imm(hipe_tagscheme:mk_fixnum(0)), + [hipe_rtl:mk_call([], Fun, Args, Cont, L, remote), + ContLbl, + hipe_rtl:mk_return([Zero])]. diff --git a/lib/hipe/rtl/hipe_rtl_lcm.erl b/lib/hipe/rtl/hipe_rtl_lcm.erl new file mode 100644 index 0000000000..5d65389d48 --- /dev/null +++ b/lib/hipe/rtl/hipe_rtl_lcm.erl @@ -0,0 +1,1696 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2004-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% File : hipe_rtl_lcm.erl +%% Author : Henrik Nyman and Erik Cedheim +%% Description : Performs Lazy Code Motion on RTL +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% @doc +%% +%% This module implements Lazy Code Motion on RTL. +%% +%% @end +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +-module(hipe_rtl_lcm). + +-export([rtl_lcm/2]). + +-define(SETS, ordsets). %% Which set implementation module to use + %% We have tried gb_sets, sets and ordsets and + %% ordsets seems to be a lot faster according to + %% our test runs. + +-include("../main/hipe.hrl"). +-include("hipe_rtl.hrl"). +-include("../flow/cfg.hrl"). + +%%-define(LCM_DEBUG, true). %% When defined and true, produces debug printouts + +%%============================================================================= + +%% +%% @doc Performs Lazy Code Motion on RTL. +%% + +-spec rtl_lcm(cfg(), comp_options()) -> cfg(). + +rtl_lcm(CFG, Options) -> + %% Perform pre-calculation of the data sets. + ?opt_start_timer("RTL LCM precalc"), + {NodeInfo, EdgeInfo, AllExpr, ExprMap, IdMap, Labels} = lcm_precalc(CFG, Options), + ?opt_stop_timer("RTL LCM precalc"), + %% {NodeInfo, EdgeInfo, AllExpr, ExprMap, Labels} = + %% ?option_time(lcm_precalc(CFG, Options), "RTL LCM precalc", Options), + + pp_debug("-------------------------------------------------~n",[]), + %% pp_debug( "~w~n", [MFA]), + + %% A check if we should pretty print the result. + case proplists:get_bool(pp_rtl_lcm, Options) of + true-> + pp_debug("-------------------------------------------------~n",[]), + %% pp_debug("AllExpr: ~w~n", [AllExpr]), + pp_debug("AllExpr:~n", []), + pp_exprs(ExprMap, IdMap, ?SETS:to_list(AllExpr)), + %% pp_sets(ExprMap, NodeInfo, EdgeInfo, AllExpr, CFG2<-ERROR!, Labels); + pp_sets(ExprMap, IdMap, NodeInfo, EdgeInfo, AllExpr, CFG, Labels); + _ -> + ok + end, + + pp_debug("-------------------------------------------------~n",[]), + ?option_time({CFG1, MoveSet} = perform_lcm(CFG, NodeInfo, EdgeInfo, ExprMap, + IdMap, AllExpr, mk_edge_bb_map(), + ?SETS:new(), Labels), + "RTL LCM perform_lcm", Options), + + %% Scan through list of moved expressions and replace their + %% assignments with the new temporary created for that expression + MoveList = ?SETS:to_list(MoveSet), + ?option_time(CFG2 = moved_expr_replace_assignments(CFG1, ExprMap, IdMap, + MoveList), + "RTL LCM moved_expr_replace_assignments", Options), + pp_debug("-------------------------------------------------~n~n",[]), + + CFG2. + +%%============================================================================= +%% Performs lazy code motion given the pre-calculated data sets. +perform_lcm(CFG, _, _, _, _, _, _, MoveSet, []) -> + {CFG, MoveSet}; +perform_lcm(CFG0, NodeInfo, EdgeInfo, ExprMap, IdMap, AllExp, BetweenMap, + MoveSet0, [Label|Labels]) -> + Code0 = hipe_bb:code(hipe_rtl_cfg:bb(CFG0, Label)), + DeleteSet = delete(NodeInfo, Label), + + %% Check if something should be deleted from this block. + {CFG1, MoveSet1} = + case ?SETS:size(DeleteSet) > 0 of + true -> + pp_debug("Label ~w: Expressions Deleted: ~n", [Label]), + Code1 = delete_exprs(Code0, ExprMap, IdMap, ?SETS:to_list(DeleteSet)), + BB = hipe_bb:mk_bb(Code1), + {hipe_rtl_cfg:bb_add(CFG0, Label, BB), + ?SETS:union(MoveSet0, DeleteSet)}; + false -> + {CFG0, MoveSet0} + end, + + Succs = hipe_rtl_cfg:succ(CFG1, Label), + + %% Go through the list of successors and insert expression where needed. + %% Also collect a list of expressions that are inserted somewhere + {CFG2, NewBetweenMap, MoveSet2} = + lists:foldl(fun(Succ, {CFG, BtwMap, MoveSet}) -> + InsertSet = calc_insert_edge(NodeInfo, EdgeInfo, + Label, Succ), + %% Check if something should be inserted on this edge. + case ?SETS:size(InsertSet) > 0 of + true -> + pp_debug("Label ~w: Expressions Inserted for Successor: ~w~n", [Label, Succ]), + InsertList = ?SETS:to_list(InsertSet), + {NewCFG, NewBtwMap} = + insert_exprs(CFG, Label, Succ, ExprMap, IdMap, + BtwMap, InsertList), + {NewCFG, NewBtwMap, ?SETS:union(MoveSet, InsertSet)}; + false -> + {CFG, BtwMap, MoveSet} + end + end, + {CFG1, BetweenMap, MoveSet1}, Succs), + + perform_lcm(CFG2, NodeInfo, EdgeInfo, ExprMap, IdMap, AllExp, NewBetweenMap, + MoveSet2, Labels). + +%%============================================================================= +%% Scan through list of moved expressions and replace their +%% assignments with the new temporary created for that expression. +moved_expr_replace_assignments(CFG, _, _, []) -> + CFG; +moved_expr_replace_assignments(CFG0, ExprMap, IdMap, [ExprId|Exprs]) -> + Expr = expr_id_map_get_expr(IdMap, ExprId), + case expr_map_lookup(ExprMap, Expr) of + {value, {_, ReplaceList, NewReg}} -> + CFG1 = lists:foldl(fun({Label, Reg}, CFG) -> + %% Find and replace expression in block + pp_debug("Label ~w: Expressions Replaced:~n", [Label]), + Code0 = hipe_bb:code(hipe_rtl_cfg:bb(CFG, Label)), + Code1 = + moved_expr_do_replacement(expr_set_dst(Expr, Reg), + Reg, NewReg, Code0), + hipe_rtl_cfg:bb_add(CFG, Label, hipe_bb:mk_bb(Code1)) + end, CFG0, ReplaceList), + moved_expr_replace_assignments(CFG1, ExprMap, IdMap, Exprs); + none -> + moved_expr_replace_assignments(CFG0, ExprMap, IdMap, Exprs) + end. + +moved_expr_do_replacement(_, _, _, []) -> + []; +moved_expr_do_replacement(Expr, Reg, NewReg, [Expr|Instrs]) -> + NewExpr = expr_set_dst(Expr, NewReg), + Move = mk_expr_move_instr(Reg, NewReg), + pp_debug(" Replacing:~n", []), + pp_debug_instr(Expr), + pp_debug(" With:~n", []), + pp_debug_instr(NewExpr), + pp_debug_instr(Move), + [NewExpr, Move | moved_expr_do_replacement(Expr, Reg, NewReg, Instrs)]; +moved_expr_do_replacement(Expr, Reg, NewReg, [Instr|Instrs]) -> + [Instr | moved_expr_do_replacement(Expr, Reg, NewReg, Instrs)]. + +%%============================================================================= +%% Goes through the given list of expressions and deletes them from the code. +%% NOTE We do not actually delete an expression, but instead we replace it +%% with an assignment from the new temporary containing the result of the +%% expressions which is guaranteed to have been calculated earlier in +%% the code. +delete_exprs(Code, _, _, []) -> + Code; +delete_exprs(Code, ExprMap, IdMap, [ExprId|Exprs]) -> + Expr = expr_id_map_get_expr(IdMap, ExprId), + %% Perform a foldl that goes through the code and deletes all + %% occurences of the expression. + NewCode = + lists:reverse + (lists:foldl(fun(CodeExpr, Acc) -> + case is_expr(CodeExpr) of + true -> + case expr_clear_dst(CodeExpr) =:= Expr of + true -> + pp_debug(" Deleting: ", []), + pp_debug_instr(CodeExpr), + %% Lookup expression entry. + Defines = + case expr_map_lookup(ExprMap, Expr) of + {value, {_, _, Defs}} -> + Defs; + none -> + exit({?MODULE, expr_map_lookup, + "expression missing"}) + end, + MoveCode = + mk_expr_move_instr(hipe_rtl:defines(CodeExpr), + Defines), + pp_debug(" Replacing with: ", []), + pp_debug_instr(MoveCode), + [MoveCode|Acc]; + false -> + [CodeExpr|Acc] + end; + false -> + [CodeExpr|Acc] + end + end, + [], Code)), + delete_exprs(NewCode, ExprMap, IdMap, Exprs). + +%%============================================================================= +%% Goes through the given list of expressions and inserts them at +%% appropriate places in the code. +insert_exprs(CFG, _, _, _, _, BetweenMap, []) -> + {CFG, BetweenMap}; +insert_exprs(CFG, Pred, Succ, ExprMap, IdMap, BetweenMap, [ExprId|Exprs]) -> + Expr = expr_id_map_get_expr(IdMap, ExprId), + Instr = expr_map_get_instr(ExprMap, Expr), + case hipe_rtl_cfg:succ(CFG, Pred) of + [_] -> + pp_debug(" Inserted last: ", []), + pp_debug_instr(Instr), + NewCFG = insert_expr_last(CFG, Pred, Instr), + insert_exprs(NewCFG, Pred, Succ, ExprMap, IdMap, BetweenMap, Exprs); + _ -> + case hipe_rtl_cfg:pred(CFG, Succ) of + [_] -> + pp_debug(" Inserted first: ", []), + pp_debug_instr(Instr), + NewCFG = insert_expr_first(CFG, Succ, Instr), + insert_exprs(NewCFG, Pred, Succ, ExprMap, IdMap, BetweenMap, Exprs); + _ -> + pp_debug(" Inserted between: ", []), + pp_debug_instr(Instr), + {NewCFG, NewBetweenMap} = + insert_expr_between(CFG, BetweenMap, Pred, Succ, Instr), + insert_exprs(NewCFG, Pred, Succ, ExprMap, IdMap, NewBetweenMap, Exprs) + end + end. + +%%============================================================================= +%% Recursively goes through the code in a block and returns a new block +%% with the new code inserted second to last (assuming the last expression +%% is a branch operation). +insert_expr_last(CFG0, Label, Instr) -> + Code0 = hipe_bb:code(hipe_rtl_cfg:bb(CFG0, Label)), + %% FIXME: Use hipe_bb:butlast() instead? + Code1 = insert_expr_last_work(Label, Instr, Code0), + hipe_rtl_cfg:bb_add(CFG0, Label, hipe_bb:mk_bb(Code1)). + +%%============================================================================= +%% Recursively goes through the code in a block and returns a new block +%% with the new code inserted second to last (assuming the last expression +%% is a branch operation). +insert_expr_last_work(_, Instr, []) -> + %% This case should not happen since this means that block was completely + %% empty when the function was called. For compability we insert it last. + [Instr]; +insert_expr_last_work(_, Instr, [Code1]) -> + %% We insert the code next to last. + [Instr, Code1]; +insert_expr_last_work(Label, Instr, [Code|Codes]) -> + [Code|insert_expr_last_work(Label, Instr, Codes)]. + +%%============================================================================= +%% Inserts expression first in the block for the given label. +insert_expr_first(CFG0, Label, Instr) -> + %% The first instruction is always a label + [Lbl|Code0] = hipe_bb:code(hipe_rtl_cfg:bb(CFG0, Label)), + Code1 = [Lbl, Instr | Code0], + hipe_rtl_cfg:bb_add(CFG0, Label, hipe_bb:mk_bb(Code1)). + +%%============================================================================= +%% Inserts an expression on and edge between two existing blocks. +%% It creates a new basic block to hold the expression. +%% Created bbs are inserted into BetweenMap to be able to reuse them for +%% multiple inserts on the same edge. +%% NOTE Currently creates multiple blocks for identical expression with the +%% same successor. Since the new bb usually contains very few instructions +%% this should not be a problem. +insert_expr_between(CFG0, BetweenMap, Pred, Succ, Instr) -> + PredSucc = {Pred, Succ}, + case edge_bb_map_lookup(BetweenMap, PredSucc) of + none -> + NewLabel = hipe_rtl:mk_new_label(), + NewLabelName = hipe_rtl:label_name(NewLabel), + pp_debug(" Creating new bb ~w~n", [NewLabel]), + Code = [Instr, hipe_rtl:mk_goto(Succ)], + CFG1 = hipe_rtl_cfg:bb_add(CFG0, NewLabelName, hipe_bb:mk_bb(Code)), + CFG2 = hipe_rtl_cfg:redirect(CFG1, Pred, Succ, NewLabelName), + NewBetweenMap = edge_bb_map_insert(BetweenMap, PredSucc, NewLabelName), + pp_debug(" Mapping edge (~w,~w) to label ~w~n", + [Pred, Succ, NewLabelName]), + {CFG2, NewBetweenMap}; + {value, Label} -> + pp_debug(" Using existing new bb for edge (~w,~w) with label ~w~n", + [Pred, Succ, Label]), + {insert_expr_last(CFG0, Label, Instr), BetweenMap} + end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%%%%%%%%%%%%%% GENERAL UTILITY FUNCTIONS %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%============================================================================= +%% Returns true if the list of registers only contains virtual registers and +%% no machine registers. +no_machine_regs([]) -> + true; +no_machine_regs([Reg|Regs]) -> + case hipe_rtl:is_reg(Reg) of + true -> + N = hipe_rtl:reg_index(Reg), + (N >= hipe_rtl_arch:first_virtual_reg()) andalso no_machine_regs(Regs); + _ -> + case hipe_rtl:is_fpreg(Reg) of + true -> + N = hipe_rtl:fpreg_index(Reg), + (N >= hipe_rtl_arch:first_virtual_reg()) andalso no_machine_regs(Regs); + _ -> + no_machine_regs(Regs) + end + end. + +%%============================================================================= +%% Returns true if an RTL instruction is an expression. +%% +is_expr(I) -> + Defines = hipe_rtl:defines(I), + Uses = hipe_rtl:uses(I), + + %% We don't cosider something that doesn't define anything as an expression. + %% Also we don't consider machine registers to be expressions. + case length(Defines) > 0 andalso no_machine_regs(Defines) + andalso no_machine_regs(Uses) of + true -> + case I of + #alu{} -> true; +%% #alu{} -> +%% Dst = hipe_rtl:alu_dst(I), +%% Src1 = hipe_rtl:alu_src1(I), +%% Src2 = hipe_rtl:alu_src2(I), + + %% Check if dst updates src +%% case Dst =:= Src1 orelse Dst =:= Src2 of +%% true -> +%% false; +%% false -> +%% true +%% end; + + %% Check if alu expression is untagging of boxed (rX <- vX sub 2) +%% case hipe_rtl:is_reg(Dst) andalso hipe_rtl:is_var(Src1) andalso +%% (hipe_rtl:alu_op(I) =:= sub) andalso hipe_rtl:is_imm(Src2) of +%% true -> +%% case hipe_rtl:imm_value(Src2) of +%% 2 -> false; %% Tag for boxed. TODO: Should not be hardcoded... +%% _ -> true +%% end; +%% false -> +%% true +%% end; + + #alub{} -> false; %% TODO: Split instruction to consider alu expression? + #branch{} -> false; + #call{} -> false; %% We cannot prove that a call has no side-effects + #comment{} -> false; + #enter{} -> false; + %% #fail_to{} -> false; %% Deprecated? + #fconv{} -> true; + #fixnumop{} -> true; + #fload{} -> true; + #fmove{} -> false; + #fp{} -> true; + #fp_unop{} -> true; + #fstore{} -> false; + #goto{} -> false; + #goto_index{} -> false; + #gctest{} -> false; + #label{} -> false; + #load{} -> true; + #load_address{} -> + case hipe_rtl:load_address_type(I) of + c_const -> false; + closure -> false; %% not sure whether safe to move; + %% also probably not worth it + constant -> true + end; + #load_atom{} -> true; + #load_word_index{} -> true; + #move{} -> false; + #multimove{} -> false; + #phi{} -> false; + #return{} -> false; + #store{} -> false; + #switch{} -> false + end; + false -> + false + end. + +%%============================================================================= +%% Replaces destination of RTL expression with empty list. +%% +expr_set_dst(I, [Dst|_Dsts] = DstList) -> + case I of + #alu{} -> hipe_rtl:alu_dst_update(I, Dst); + #call{} -> hipe_rtl:call_dstlist_update(I, DstList); + #fconv{} -> hipe_rtl:fconv_dst_update(I, Dst); + #fixnumop{} -> hipe_rtl:fixnumop_dst_update(I, Dst); + #fload{} -> hipe_rtl:fload_dst_update(I, Dst); + %% #fmove{} -> hipe_rtl:fmove_dst_update(I, Dst); + #fp{} -> hipe_rtl:fp_dst_update(I, Dst); + #fp_unop{} -> hipe_rtl:fp_unop_dst_update(I, Dst); + #load{} -> hipe_rtl:load_dst_update(I, Dst); + #load_address{} -> hipe_rtl:load_address_dst_update(I, Dst); + #load_atom{} -> hipe_rtl:load_atom_dst_update(I, Dst); + #load_word_index{} -> hipe_rtl:load_word_index_dst_update(I, Dst); + %% #move{} -> hipe_rtl:move_dst_update(I, Dst); + _ -> exit({?MODULE, expr_set_dst, "bad expression"}) + end. + +%%============================================================================= +%% Replaces destination of RTL expression with empty list. +%% +expr_clear_dst(I) -> + case I of + #alu{} -> hipe_rtl:alu_dst_update(I, nil); + #call{} -> hipe_rtl:call_dstlist_update(I, nil); + #fconv{} -> hipe_rtl:fconv_dst_update(I, nil); + #fixnumop{} -> hipe_rtl:fixnumop_dst_update(I, nil); + #fload{} -> hipe_rtl:fload_dst_update(I, nil); + %% #fmove{} -> hipe_rtl:fmove_dst_update(I, nil); + #fp{} -> hipe_rtl:fp_dst_update(I, nil); + #fp_unop{} -> hipe_rtl:fp_unop_dst_update(I, nil); + #load{} -> hipe_rtl:load_dst_update(I, nil); + #load_address{} -> hipe_rtl:load_address_dst_update(I, nil); + #load_atom{} -> hipe_rtl:load_atom_dst_update(I, nil); + #load_word_index{} -> hipe_rtl:load_word_index_dst_update(I, nil); + %% #move{} -> hipe_rtl:move_dst_update(I, nil); + _ -> exit({?MODULE, expr_clear_dst, "bad expression"}) + end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%%%%%%%%%%%%%%%%%%%% PRECALC FUNCTIONS %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%============================================================================= +%% Pre-calculates the flow analysis and puts the calculated sets in maps for +%% easy access later. +lcm_precalc(CFG, Options) -> + %% Calculate use map and expression map. + ?option_time({ExprMap, IdMap} = mk_expr_map(CFG), + "RTL LCM mk_expr_map", Options), + ?option_time(UseMap = mk_use_map(CFG, ExprMap), + "RTL LCM mk_use_map", Options), + %% Labels = hipe_rtl_cfg:reverse_postorder(CFG), + Labels = hipe_rtl_cfg:labels(CFG), + %% StartLabel = hipe_rtl_cfg:start_label(CFG), + %% AllExpr = all_exprs(CFG, Labels), + AllExpr = ?SETS:from_list(gb_trees:keys(IdMap)), + + %% Calculate the data sets. + ?option_time(NodeInfo0 = mk_node_info(Labels), "RTL LCM mk_node_info", + Options), + %% ?option_time(EdgeInfo0 = mk_edge_info(), "RTL LCM mk_edge_info", + %% Options), + EdgeInfo0 = mk_edge_info(), + ?option_time(NodeInfo1 = calc_up_exp(CFG, ExprMap, NodeInfo0, Labels), + "RTL LCM calc_up_exp", Options), + ?option_time(NodeInfo2 = calc_down_exp(CFG, ExprMap, NodeInfo1, Labels), + "RTL LCM calc_down_exp", Options), + ?option_time(NodeInfo3 = calc_killed_expr(CFG, NodeInfo2, UseMap, AllExpr, + Labels), + "RTL LCM calc_killed_exp", Options), + ?option_time(NodeInfo4 = calc_avail(CFG, NodeInfo3), + "RTL LCM calc_avail", Options), + ?option_time(NodeInfo5 = calc_antic(CFG, NodeInfo4, AllExpr), + "RTL LCM calc_antic", Options), + ?option_time(EdgeInfo1 = calc_earliest(CFG, NodeInfo5, EdgeInfo0, Labels), + "RTL LCM calc_earliest", Options), + ?option_time({NodeInfo6, EdgeInfo2} = calc_later(CFG, NodeInfo5, EdgeInfo1), + "RTL LCM calc_later", Options), + ?option_time(NodeInfo7 = calc_delete(CFG, NodeInfo6, Labels), + "RTL LCM calc_delete", Options), + {NodeInfo7, EdgeInfo2, AllExpr, ExprMap, IdMap, Labels}. + +%%%%%%%%%%%%%%%%%%% AVAILABLE IN/OUT FLOW ANALYSIS %%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Fixpoint calculation of anticipated in/out sets. +%% Uses a worklist algorithm. +%% Performs the avail in/out flow analysis. + +%%============================================================================= +%% Calculates the available in/out sets, and returns an updated NodeInfo. + +calc_avail(CFG, NodeInfo) -> + StartLabel = hipe_rtl_cfg:start_label(CFG), + Work = init_work([StartLabel]), + %% Initialize start node + NewNodeInfo = set_avail_in(NodeInfo, StartLabel, ?SETS:new()), + calc_avail_fixpoint(Work, CFG, NewNodeInfo). + +calc_avail_fixpoint(Work, CFG, NodeInfo) -> + case get_work(Work) of + fixpoint -> + NodeInfo; + {Label, NewWork} -> + {NewNodeInfo, NewLabels} = calc_avail_node(Label, CFG, NodeInfo), + NewWork2 = add_work(NewWork, NewLabels), + calc_avail_fixpoint(NewWork2, CFG, NewNodeInfo) + end. + +calc_avail_node(Label, CFG, NodeInfo) -> + %% Get avail in + AvailIn = avail_in(NodeInfo, Label), + + %% Calculate avail out + AvailOut = ?SETS:union(down_exp(NodeInfo, Label), + ?SETS:subtract(AvailIn, + killed_expr(NodeInfo, Label))), + + {Changed, NodeInfo2} = + case avail_out(NodeInfo, Label) of + none -> + %% If there weren't any old avail out we use this one. + {true, set_avail_out(NodeInfo, Label, AvailOut)}; + OldAvailOut -> + %% Check if the avail outs are equal. + case AvailOut =:= OldAvailOut of + true -> + {false, NodeInfo}; + false -> + {true, set_avail_out(NodeInfo, Label, AvailOut)} + end + end, + + case Changed of + true -> + %% Update AvailIn-sets of successors and add them to worklist + Succs = hipe_rtl_cfg:succ(CFG, Label), + NodeInfo3 = + lists:foldl + (fun(Succ, NewNodeInfo) -> + case avail_in(NewNodeInfo, Succ) of + none -> + %% Initialize avail in to all expressions + set_avail_in(NewNodeInfo, Succ, AvailOut); + OldAvailIn -> + set_avail_in(NewNodeInfo, Succ, + ?SETS:intersection(OldAvailIn, AvailOut)) + end + end, + NodeInfo2, Succs), + {NodeInfo3, Succs}; + false -> + {NodeInfo2, []} + end. + +%%%%%%%%%%%%%%%%%% ANTICIPATED IN/OUT FLOW ANALYSIS %%%%%%%%%%%%%%%%%%%%%%%%%% +%% Fixpoint calculation of anticipated in/out sets. +%% Uses a worklist algorithm. + +%%============================================================================= +%% Calculates the anicipated in/out sets, and returns an updated NodeInfo. +calc_antic(CFG, NodeInfo, AllExpr) -> + %% Initialize worklist with all nodes in postorder + Labels = hipe_rtl_cfg:postorder(CFG), + Work = init_work(Labels), + calc_antic_fixpoint(Work, CFG, NodeInfo, AllExpr). + +calc_antic_fixpoint(Work, CFG, NodeInfo, AllExpr) -> + case get_work(Work) of + fixpoint -> + NodeInfo; + {Label, NewWork} -> + {NewNodeInfo, NewLabels} = calc_antic_node(Label, CFG, NodeInfo, AllExpr), + NewWork2 = add_work(NewWork, NewLabels), + calc_antic_fixpoint(NewWork2, CFG, NewNodeInfo, AllExpr) + end. + +calc_antic_node(Label, CFG, NodeInfo, AllExpr) -> + %% Get antic out + AnticOut = + case antic_out(NodeInfo, Label) of + none -> + case is_exit_label(CFG, Label) of + true -> + ?SETS:new(); + false -> + AllExpr + end; + + AnticOutTemp -> AnticOutTemp + end, + + %% Calculate antic in + AnticIn = ?SETS:union(up_exp(NodeInfo, Label), + ?SETS:subtract(AnticOut, + killed_expr(NodeInfo, Label))), + {Changed, NodeInfo2} = + case antic_in(NodeInfo, Label) of + %% If there weren't any old antic in we use this one. + none -> + {true, set_antic_in(NodeInfo, Label, AnticIn)}; + + OldAnticIn -> + %% Check if the antic in:s are equal. + case AnticIn =:= OldAnticIn of + true -> + {false, NodeInfo}; + false -> + {true, + set_antic_in(NodeInfo, Label, AnticIn)} + end + end, + + case Changed of + true -> + %% Update AnticOut-sets of predecessors and add them to worklist + Preds = hipe_rtl_cfg:pred(CFG, Label), + NodeInfo3 = + lists:foldl + (fun(Pred, NewNodeInfo) -> + case antic_out(NewNodeInfo, Pred) of + none -> + %% Initialize antic out to all expressions + set_antic_out(NewNodeInfo, Pred, AnticIn); + OldAnticOut -> + set_antic_out(NewNodeInfo, Pred, + ?SETS:intersection(OldAnticOut, AnticIn)) + end + end, + NodeInfo2, Preds), + {NodeInfo3, Preds}; + false -> + {NodeInfo2, []} + end. + +%%%%%%%%%%%%%%%%%%%%% LATER / LATER IN FLOW ANALYSIS %%%%%%%%%%%%%%%%%%%%%%%%%% +%% Fixpoint calculations of Later and LaterIn sets. +%% Uses a worklist algorithm. +%% Note that the Later set is calculated on edges. + +%%============================================================================= +%% Calculates the Later and LaterIn sets, and returns updates of both +%% NodeInfo (with LaterIn sets) and EdgeInfo (with Later sets). + +calc_later(CFG, NodeInfo, EdgeInfo) -> + StartLabel = hipe_rtl_cfg:start_label(CFG), + Work = init_work([{node, StartLabel}]), + %% Initialize start node + NewNodeInfo = set_later_in(NodeInfo, StartLabel, ?SETS:new()), + calc_later_fixpoint(Work, CFG, NewNodeInfo, EdgeInfo). + +calc_later_fixpoint(Work, CFG, NodeInfo, EdgeInfo) -> + case get_work(Work) of + {{edge, From, To}, Work2} -> + {NewNodeInfo, NewEdgeInfo, AddWork} = + calc_later_edge(From, To, CFG, NodeInfo, EdgeInfo), + Work3 = add_work(Work2, AddWork), + calc_later_fixpoint(Work3, CFG, NewNodeInfo, NewEdgeInfo); + {{node, Label}, Work2} -> + AddWork = calc_later_node(Label, CFG), + Work3 = add_work(Work2, AddWork), + calc_later_fixpoint(Work3, CFG, NodeInfo, EdgeInfo); + fixpoint -> + {NodeInfo, EdgeInfo} + end. + +calc_later_node(Label, CFG) -> + Succs = hipe_rtl_cfg:succ(CFG, Label), + [{edge, Label, Succ} || Succ <- Succs]. + +calc_later_edge(From, To, _CFG, NodeInfo, EdgeInfo) -> + FromTo = {From, To}, + Earliest = earliest(EdgeInfo, FromTo), + LaterIn = later_in(NodeInfo, From), + UpExp = up_exp(NodeInfo, From), + Later = ?SETS:union(Earliest, ?SETS:subtract(LaterIn, UpExp)), + {Changed, EdgeInfo2} = + case lookup_later(EdgeInfo, FromTo) of + none -> {true, set_later(EdgeInfo, FromTo, Later)}; + Later -> {false, EdgeInfo}; + _Old -> {true, set_later(EdgeInfo, FromTo, Later)} + end, + case Changed of + true -> + %% Update later in set of To-node + case lookup_later_in(NodeInfo, To) of + %% If the data isn't set initialize to all expressions + none -> + {set_later_in(NodeInfo, To, Later), EdgeInfo2, [{node, To}]}; + OldLaterIn -> + NewLaterIn = ?SETS:intersection(OldLaterIn, Later), + %% Check if something changed + %% FIXME: Implement faster equality test? + case NewLaterIn =:= OldLaterIn of + true -> + {NodeInfo, EdgeInfo2, []}; + false -> + {set_later_in(NodeInfo, To, NewLaterIn), + EdgeInfo2, [{node, To}]} + end + end; + false -> + {NodeInfo, EdgeInfo2, []} + end. + +%%%%%%%%%%%%%%%%%% UPWARDS/DOWNWARDS EXPOSED EXPRESSIONS %%%%%%%%%%%%%%%%%%%%%% +%% Calculates upwards and downwards exposed expressions. + +%%============================================================================= +%% Calculates the downwards exposed expression sets for the given labels in +%% the CFG. +calc_down_exp(_, _, NodeInfo, []) -> + NodeInfo; +calc_down_exp(CFG, ExprMap, NodeInfo, [Label|Labels]) -> + Code = hipe_bb:code(hipe_rtl_cfg:bb(CFG, Label)), + %% Data = ?SETS:from_list(lists:map(fun expr_clear_dst/1, exp_work(Code))), + Data = ?SETS:from_list(get_expr_ids(ExprMap, exp_work(Code))), + NewNodeInfo = set_down_exp(NodeInfo, Label, Data), + calc_down_exp(CFG, ExprMap, NewNodeInfo, Labels). + +%%============================================================================= +%% Calculates the upwards exposed expressions sets for the given labels in +%% the CFG. +calc_up_exp(_, _, NodeInfo, []) -> + NodeInfo; +calc_up_exp(CFG, ExprMap, NodeInfo, [Label|Labels]) -> + BB = hipe_rtl_cfg:bb(CFG, Label), + RevCode = lists:reverse(hipe_bb:code(BB)), + Data = ?SETS:from_list(get_expr_ids(ExprMap, exp_work(RevCode))), + NewNodeInfo = set_up_exp(NodeInfo, Label, Data), + calc_up_exp(CFG, ExprMap, NewNodeInfo, Labels). + +%%============================================================================= +%% Given a list of expression instructions, gets a list of expression ids +%% from an expression map. +get_expr_ids(ExprMap, Instrs) -> + [expr_map_get_id(ExprMap, expr_clear_dst(I)) || I <- Instrs]. + +%%============================================================================= +%% Does the work of the calc_*_exp functions. +exp_work(Code) -> + exp_work([], Code). + +exp_work([], [Instr|Instrs]) -> + case is_expr(Instr) of + true -> + exp_work([Instr], Instrs); + false -> + exp_work([], Instrs) + end; +exp_work(Exprs, []) -> + Exprs; +exp_work(Exprs, [Instr|Instrs]) -> + NewExprs = case is_expr(Instr) of + true -> + exp_kill_expr(Instr, [Instr|Exprs]); + false -> + exp_kill_expr(Instr, Exprs) + end, + exp_work(NewExprs, Instrs). + +%%============================================================================= +%% Checks if the given instruction redefines any operands of +%% instructions in the instruction list. +%% It returns the list of expressions with those instructions that has +%% operands redefined removed. +exp_kill_expr(_Instr, []) -> + []; +exp_kill_expr(Instr, [CheckedExpr|Exprs]) -> + %% Calls, gctests and stores potentially clobber everything + case Instr of + #call{} -> []; + #gctest{} -> []; + #store{} -> []; %% FIXME: Only regs and vars clobbered, not fregs... + #fstore{} -> + %% fstore potentially clobber float expressions + [ExprDefine|_] = hipe_rtl:defines(CheckedExpr), + case hipe_rtl:is_fpreg(ExprDefine) of + true -> + exp_kill_expr(Instr, Exprs); + false -> + [CheckedExpr | exp_kill_expr(Instr, Exprs)] + end; + _ -> + InstrDefines = hipe_rtl:defines(Instr), + ExprUses = hipe_rtl:uses(CheckedExpr), + Diff = ExprUses -- InstrDefines, + case length(Diff) < length(ExprUses) of + true -> + exp_kill_expr(Instr, Exprs); + false -> + [CheckedExpr | exp_kill_expr(Instr, Exprs)] + end + end. + +%%%%%%%%%%%%%%%%%%%%%%%% KILLED EXPRESSIONS %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%============================================================================= +%% Calculates the killed expression sets for all given labels. +calc_killed_expr(_, NodeInfo, _, _, []) -> + NodeInfo; +calc_killed_expr(CFG, NodeInfo, UseMap, AllExpr, [Label|Labels]) -> + Code = hipe_bb:code(hipe_rtl_cfg:bb(CFG, Label)), + KilledExprs = calc_killed_expr_bb(Code, UseMap, AllExpr, ?SETS:new()), + NewNodeInfo = set_killed_expr(NodeInfo, Label, KilledExprs), + calc_killed_expr(CFG, NewNodeInfo, UseMap, AllExpr, Labels). + +%%============================================================================= +%% Calculates the killed expressions set for one basic block. +calc_killed_expr_bb([], _UseMap, _AllExpr, KilledExprs) -> + KilledExprs; +calc_killed_expr_bb([Instr|Instrs], UseMap, AllExpr, KilledExprs) -> + %% Calls, gctests and stores potentially clobber everything + case Instr of + #call{} -> AllExpr; + #gctest{} -> AllExpr; + #store{} -> AllExpr; %% FIXME: Only regs and vars clobbered, not fregs... + #fstore{} -> + %% Kill all float expressions + %% FIXME: Make separate function is_fp_expr + ?SETS:from_list + (lists:foldl(fun(Expr, Fexprs) -> + [Define|_] = hipe_rtl:defines(Expr), + case hipe_rtl:is_fpreg(Define) of + true -> + [Expr|Fexprs]; + false -> + Fexprs + end + end, [], ?SETS:to_list(AllExpr))); + _ -> + case hipe_rtl:defines(Instr) of + [] -> + calc_killed_expr_bb(Instrs, UseMap, AllExpr, KilledExprs); + [Define|_] -> + NewKilledExprs = use_map_get_expr_uses(UseMap, Define), + calc_killed_expr_bb(Instrs, UseMap, AllExpr, + ?SETS:union(NewKilledExprs, KilledExprs)) + end + end. +%%%%%%%%%%%%%%%%%%%%%%%%%%%% EARLIEST %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%============================================================================= +%% Calculates the earliest set for all edges in the CFG. + +calc_earliest(_, _, EdgeInfo, []) -> + EdgeInfo; +calc_earliest(CFG, NodeInfo, EdgeInfo, [To|Labels]) -> + EmptySet = ?SETS:new(), + Preds = hipe_rtl_cfg:pred(CFG, To), + NewEdgeInfo = + case EmptySet =:= antic_in(NodeInfo, To) of + true -> + %% Earliest is empty for all edges into this block. + lists:foldl(fun(From, EdgeInfoAcc) -> + set_earliest(EdgeInfoAcc, {From, To}, EmptySet) + end, EdgeInfo, Preds); + false -> + lists:foldl(fun(From, EdgeInfoAcc) -> + IsStartLabel = (From =:= hipe_rtl_cfg:start_label(CFG)), + Earliest = + calc_earliest_edge(NodeInfo, IsStartLabel, From, To), + set_earliest(EdgeInfoAcc, {From, To}, Earliest) + end, EdgeInfo, Preds) + end, + calc_earliest(CFG, NodeInfo, NewEdgeInfo, Labels). + +%%============================================================================= +%% Calculates the earliest set for one edge. + +calc_earliest_edge(NodeInfo, IsStartLabel, From, To) -> + AnticIn = antic_in(NodeInfo, To), + AvailOut = avail_out(NodeInfo, From), + + case IsStartLabel of + true -> + ?SETS:subtract(AnticIn, AvailOut); + false -> + AnticOut = antic_out(NodeInfo, From), + ExprKill = killed_expr(NodeInfo, From), + ?SETS:subtract(?SETS:subtract(AnticIn, AvailOut), + ?SETS:subtract(AnticOut, ExprKill)) + end. +%% The above used to be: +%% +%% ?SETS:intersection(?SETS:subtract(AnticIn, AvailOut), +%% ?SETS:union(ExprKill, ?SETS:subtract(AllExpr, AnticOut))) +%% +%% But it is costly to use the AllExpr, so let's do some tricky set algebra. +%% +%% Let A = AnticIn, B = AvailOut, C = ExprKill, D = AnticOut, U = AllExpr +%% Let n = intersection, u = union, ' = inverse +%% +%% Then +%% (A - B) n (C u (U - D)) = +%% = (A - B) n ((C u U) - (D - C)) = +%% = (A - B) n (U - (D - C)) = +%% = (A - B) n (D - C)' = +%% = (A - B) - (D - C) +%% +%% or in other words +%% ?SETS:subtract(?SETS:subtract(AnticIn, AvailOut), +%% ?SETS:subtract(AnticOut, ExprKill)) + + + +%%%%%%%%%%%%%%%%%%%%%%%% INSERT / DELETE %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%============================================================================= +%% Calculates the insert set for one edge and returns the resulting set. +%% NOTE This does not modify the EdgeInfo set, since the resulting set is +%% returned and used immediately, instead of being pre-calculated as are +%% the other sets. +calc_insert_edge(NodeInfo, EdgeInfo, From, To) -> + Later = later(EdgeInfo, {From, To}), + LaterIn = later_in(NodeInfo, To), + ?SETS:subtract(Later, LaterIn). + +%%============================================================================= +%% Calculates the delete set for all given labels in a CFG. +calc_delete(_, NodeInfo, []) -> + NodeInfo; +calc_delete(CFG, NodeInfo, [Label|Labels]) -> + case Label =:= hipe_rtl_cfg:start_label(CFG) of + true -> + NewNodeInfo = set_delete(NodeInfo, Label, ?SETS:new()); + false -> + UpExp = up_exp(NodeInfo, Label), + LaterIn = later_in(NodeInfo, Label), + Delete = ?SETS:subtract(UpExp, LaterIn), + NewNodeInfo = set_delete(NodeInfo, Label, Delete) + end, + calc_delete(CFG, NewNodeInfo, Labels). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%%%%%%%%%%%%%%%%%%%% FIXPOINT FUNCTIONS %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%============================================================================= +%% Worklist used by the fixpoint calculations. +%% +%% We use gb_sets here, which is optimized for continuous inserts and +%% membership tests. + +init_work(Labels) -> + {Labels, [], gb_sets:from_list(Labels)}. + +get_work({[Label|Left], List, Set}) -> + NewWork = {Left, List, gb_sets:delete(Label, Set)}, + {Label, NewWork}; +get_work({[], [], _Set}) -> + fixpoint; +get_work({[], List, Set}) -> + get_work({lists:reverse(List), [], Set}). + +add_work(Work = {List1, List2, Set}, [Label|Labels]) -> + case gb_sets:is_member(Label, Set) of + true -> + add_work(Work, Labels); + false -> + %%io:format("Adding work: ~w\n", [Label]), + add_work({List1, [Label|List2], gb_sets:insert(Label, Set)}, Labels) + end; +add_work(Work, []) -> + Work. + +%%============================================================================= +%% Calculates the labels that are the exit labels. +%% FIXME We do not detect dead-end loops spanning more than one block. +%% This could potentially cause a bug in the future... +%% exit_labels(CFG) -> +%% Labels = hipe_rtl_cfg:labels(CFG), +%% lists:foldl(fun(Label, ExitLabels) -> +%% Succs = hipe_rtl_cfg:succ(CFG, Label), +%% case Succs of +%% [] -> +%% [Label|ExitLabels]; +%% [Label] -> %% Count single bb dead-end loops as exit labels +%% [Label|ExitLabels]; +%% _ -> +%% ExitLabels +%% end +%% end, [], Labels ). + +%%============================================================================= +%% Return true if label is an exit label, +%% i.e. its bb has no successors or itself as only successor. +is_exit_label(CFG, Label) -> + case hipe_rtl_cfg:succ(CFG, Label) of + [] -> true; + [Label] -> true; + _ -> false + end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%%%%%%%%%%%%%%%%%%%% DATASET FUNCTIONS %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% The dataset is a collection of data about the CFG. +%% It is divided into two parts, NodeInfo and EdgeInfo. +%% The pre-calculation step stores the calculated sets here. + +-record(node_data, {up_exp = none, + down_exp = none, + killed_expr = none, + avail_in = none, + avail_out = none, + antic_in = none, + antic_out = none, + later_in = none, + delete = none}). + +-record(edge_data, {earliest = none, + later = none, + insert = none}). + +%%============================================================================= +%% Creates a node info from a CFG (one entry for each Label). +mk_node_info(Labels) -> + lists:foldl(fun(Label, DataTree) -> + gb_trees:insert(Label, #node_data{}, DataTree) + %%gb_trees:enter(Label, #node_data{}, DataTree) + end, + gb_trees:empty(), Labels). + +%%mk_edge_info(Labels) -> +%% FIXME Should we traverse cfg and initialize edges? +mk_edge_info() -> + gb_trees:empty(). + +%%============================================================================= +%% Get methods +up_exp(NodeInfo, Label) -> + Data = gb_trees:get(Label, NodeInfo), + Data#node_data.up_exp. + +down_exp(NodeInfo, Label) -> + Data = gb_trees:get(Label, NodeInfo), + Data#node_data.down_exp. + +killed_expr(NodeInfo, Label) -> + Data = gb_trees:get(Label, NodeInfo), + Data#node_data.killed_expr. + +avail_in(NodeInfo, Label) -> + Data = gb_trees:get(Label, NodeInfo), + Data#node_data.avail_in. + +avail_out(NodeInfo, Label) -> + Data = gb_trees:get(Label, NodeInfo), + Data#node_data.avail_out. + +antic_in(NodeInfo, Label) -> + Data = gb_trees:get(Label, NodeInfo), + Data#node_data.antic_in. + +antic_out(NodeInfo, Label) -> + Data = gb_trees:get(Label, NodeInfo), + Data#node_data.antic_out. + +later_in(NodeInfo, Label) -> + Data = gb_trees:get(Label, NodeInfo), + Data#node_data.later_in. + +lookup_later_in(NodeInfo, Label) -> + case gb_trees:lookup(Label, NodeInfo) of + none -> + none; + {value, #node_data{later_in = Data}} -> + Data + end. + +delete(NodeInfo, Label) -> + Data = gb_trees:get(Label, NodeInfo), + Data#node_data.delete. + +earliest(EdgeInfo, Edge) -> + Data = gb_trees:get(Edge, EdgeInfo), + Data#edge_data.earliest. + +-ifdef(LOOKUP_EARLIEST_NEEDED). +lookup_earliest(EdgeInfo, Edge) -> + case gb_trees:lookup(Edge, EdgeInfo) of + none -> + none; + {value, #edge_data{earliest = Data}} -> + Data + end. +-endif. + +later(EdgeInfo, Edge) -> + Data = gb_trees:get(Edge, EdgeInfo), + Data#edge_data.later. + +lookup_later(EdgeInfo, Edge) -> + case gb_trees:lookup(Edge, EdgeInfo) of + none -> + none; + {value, #edge_data{later = Data}} -> + Data + end. + +%% insert(EdgeInfo, Edge) -> +%% case gb_trees:lookup(Edge, EdgeInfo) of +%% none -> +%% exit({?MODULE, insert, "edge info not found"}), +%% none; +%% {value, #edge_data{insert = Data}} -> +%% Data +%% end. + +%%============================================================================= +%% Set methods +set_up_exp(NodeInfo, Label, Data) -> + NodeData = + case gb_trees:lookup(Label, NodeInfo) of + none -> + #node_data{up_exp = Data}; + {value, OldNodeData} -> + OldNodeData#node_data{up_exp = Data} + end, + gb_trees:enter(Label, NodeData, NodeInfo). + +set_down_exp(NodeInfo, Label, Data) -> + NodeData = + case gb_trees:lookup(Label, NodeInfo) of + none -> + #node_data{down_exp = Data}; + {value, OldNodeData} -> + OldNodeData#node_data{down_exp = Data} + end, + gb_trees:enter(Label, NodeData, NodeInfo). + +set_killed_expr(NodeInfo, Label, Data) -> + NodeData = + case gb_trees:lookup(Label, NodeInfo) of + none -> + #node_data{killed_expr = Data}; + {value, OldNodeData} -> + OldNodeData#node_data{killed_expr = Data} + end, + gb_trees:enter(Label, NodeData, NodeInfo). + +set_avail_in(NodeInfo, Label, Data) -> + NodeData = + case gb_trees:lookup(Label, NodeInfo) of + none -> + #node_data{avail_in = Data}; + {value, OldNodeData} -> + OldNodeData#node_data{avail_in = Data} + end, + gb_trees:enter(Label, NodeData, NodeInfo). + +set_avail_out(NodeInfo, Label, Data) -> + NodeData = + case gb_trees:lookup(Label, NodeInfo) of + none -> + #node_data{avail_out = Data}; + {value, OldNodeData} -> + OldNodeData#node_data{avail_out = Data} + end, + gb_trees:enter(Label, NodeData, NodeInfo). + +set_antic_in(NodeInfo, Label, Data) -> + NodeData = + case gb_trees:lookup(Label, NodeInfo) of + none -> + #node_data{antic_in = Data}; + {value, OldNodeData} -> + OldNodeData#node_data{antic_in = Data} + end, + gb_trees:enter(Label, NodeData, NodeInfo). + +set_antic_out(NodeInfo, Label, Data) -> + NodeData = + case gb_trees:lookup(Label, NodeInfo) of + none -> + #node_data{antic_out = Data}; + {value, OldNodeData} -> + OldNodeData#node_data{antic_out = Data} + end, + gb_trees:enter(Label, NodeData, NodeInfo). + +set_later_in(NodeInfo, Label, Data) -> + NodeData = + case gb_trees:lookup(Label, NodeInfo) of + none -> + #node_data{later_in = Data}; + {value, OldNodeData} -> + OldNodeData#node_data{later_in = Data} + end, + gb_trees:enter(Label, NodeData, NodeInfo). + +set_delete(NodeInfo, Label, Data) -> + NodeData = + case gb_trees:lookup(Label, NodeInfo) of + none -> + #node_data{delete = Data}; + {value, OldNodeData} -> + OldNodeData#node_data{delete = Data} + end, + gb_trees:enter(Label, NodeData, NodeInfo). + +set_earliest(EdgeInfo, Edge, Data) -> + EdgeData = + case gb_trees:lookup(Edge, EdgeInfo) of + none -> + #edge_data{earliest = Data}; + {value, OldEdgeData} -> + OldEdgeData#edge_data{earliest = Data} + end, + gb_trees:enter(Edge, EdgeData, EdgeInfo). + +set_later(EdgeInfo, Edge, Data) -> + EdgeData = + case gb_trees:lookup(Edge, EdgeInfo) of + none -> + #edge_data{later = Data}; + {value, OldEdgeData} -> + OldEdgeData#edge_data{later = Data} + end, + gb_trees:enter(Edge, EdgeData, EdgeInfo). + +%% set_insert(EdgeInfo, Edge, Data) -> +%% EdgeData = +%% case gb_trees:lookup(Edge, EdgeInfo) of +%% none -> +%% #edge_data{insert = Data}; +%% {value, OldEdgeData} -> +%% OldEdgeData#edge_data{insert = Data} +%% end, +%% gb_trees:enter(Edge, EdgeData, EdgeInfo). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%% USE MAP %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% The use map is a mapping from "use" (which is an rtl register/variable) +%% to a set of expressions (IDs) where that register/variable is used. +%% It is used by calc_killed_expr to know what expressions are affected by +%% a definition. + +%%============================================================================= +%% Creates and calculates the use map for a CFG. +%% It uses ExprMap to lookup the expression IDs. +mk_use_map(CFG, ExprMap) -> + Labels = hipe_rtl_cfg:reverse_postorder(CFG), + NewMap = mk_use_map(gb_trees:empty(), CFG, ExprMap, Labels), + gb_trees:balance(NewMap). + +mk_use_map(Map, _, _, []) -> + Map; +mk_use_map(Map, CFG, ExprMap, [Label|Labels]) -> + Code = hipe_bb:code(hipe_rtl_cfg:bb(CFG, Label)), + NewMap = mk_use_map_bb(Map, ExprMap, Code), + mk_use_map(NewMap, CFG, ExprMap, Labels). + +mk_use_map_bb(UseMap, _, []) -> + UseMap; +mk_use_map_bb(UseMap, ExprMap, [Instr|Instrs]) -> + case is_expr(Instr) of + true -> + Uses = hipe_rtl:uses(Instr), + ExprId = expr_map_get_id(ExprMap, expr_clear_dst(Instr)), + NewUseMap = mk_use_map_insert_uses(UseMap, ExprId, Uses), + mk_use_map_bb(NewUseMap, ExprMap, Instrs); + false -> + mk_use_map_bb(UseMap, ExprMap, Instrs) + end. + +%%============================================================================= +%% Worker function for mk_use_map that inserts the expression id for every +%% rtl register the expression uses in a use map. +mk_use_map_insert_uses(Map, _, []) -> + Map; +mk_use_map_insert_uses(Map, Expr, [Use|Uses]) -> + case gb_trees:lookup(Use, Map) of + {value, UseSet} -> + NewUseSet = ?SETS:add_element(Expr, UseSet), + mk_use_map_insert_uses(gb_trees:update(Use, NewUseSet, Map), Expr, Uses); + none -> + UseSet = ?SETS:new(), + NewUseSet = ?SETS:add_element(Expr, UseSet), + mk_use_map_insert_uses(gb_trees:insert(Use, NewUseSet, Map), Expr, Uses) + end. + +%%============================================================================= +%% Gets a set of expressions where the given rtl register is used. +use_map_get_expr_uses(Map, Reg) -> + case gb_trees:lookup(Reg, Map) of + {value, UseSet} -> + UseSet; + none -> + ?SETS:new() + end. + +%%%%%%%%%%%%%%%%%%%%%% EXPRESSION MAP %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% The expression map is a mapping from expression to +%% (1) Expression Id (Integer used to speed up set operations) +%% (2) List of definitions (labels where the expression is defined and the +%% list of registers or variables defined by an instruction in that label, +%% represented as a tuple {Label, Defines}) +%% (3) The list of replacement registers created for the expression + +%%============================================================================= +%% Creates and calculates the expression map for a CFG. +mk_expr_map(CFG) -> + init_expr_id(), + Labels = hipe_rtl_cfg:reverse_postorder(CFG), + {ExprMap, IdMap} = mk_expr_map(gb_trees:empty(), gb_trees:empty(), + CFG, Labels), + {gb_trees:balance(ExprMap), gb_trees:balance(IdMap)}. + +mk_expr_map(ExprMap, IdMap, _, []) -> + {ExprMap, IdMap}; +mk_expr_map(ExprMap, IdMap, CFG, [Label|Labels]) -> + Code = hipe_bb:code(hipe_rtl_cfg:bb(CFG, Label)), + {NewExprMap, NewIdMap} = mk_expr_map_bb(ExprMap, IdMap, Label, Code), + mk_expr_map(NewExprMap, NewIdMap, CFG, Labels). + +mk_expr_map_bb(ExprMap, IdMap, _, []) -> + {ExprMap, IdMap}; +mk_expr_map_bb(ExprMap, IdMap, Label, [Instr|Instrs]) -> + case is_expr(Instr) of + true -> + Expr = expr_clear_dst(Instr), + Defines = hipe_rtl:defines(Instr), + case gb_trees:lookup(Expr, ExprMap) of + {value, {ExprId, DefinesList, ReplRegs}} -> + NewExprMap = gb_trees:update(Expr, {ExprId, + [{Label, Defines}|DefinesList], + ReplRegs}, ExprMap), + mk_expr_map_bb(NewExprMap, IdMap, Label, Instrs); + none -> + NewExprId = new_expr_id(), + NewReplRegs = mk_replacement_regs(Defines), + NewExprMap = gb_trees:insert(Expr, {NewExprId, + [{Label, Defines}], + NewReplRegs}, ExprMap), + NewIdMap = gb_trees:insert(NewExprId, Expr, IdMap), + mk_expr_map_bb(NewExprMap, NewIdMap, Label, Instrs) + end; + false -> + mk_expr_map_bb(ExprMap, IdMap, Label, Instrs) + end. + +%%============================================================================= +%% Creates new temporaries to replace defines in moved expressions. +mk_replacement_regs([]) -> + []; +mk_replacement_regs(Defines) -> + mk_replacement_regs(Defines, []). + +mk_replacement_regs([], NewRegs) -> + lists:reverse(NewRegs); +mk_replacement_regs([Define|Defines], NewRegs) -> + case hipe_rtl:is_reg(Define) of + true -> + NewReg = + case hipe_rtl:reg_is_gcsafe(Define) of + true -> hipe_rtl:mk_new_reg_gcsafe(); + false -> hipe_rtl:mk_new_reg() + end, + mk_replacement_regs(Defines, [NewReg|NewRegs]); + false -> + case hipe_rtl:is_var(Define) of + true -> + mk_replacement_regs(Defines, [hipe_rtl:mk_new_var()|NewRegs]); + false -> + true = hipe_rtl:is_fpreg(Define), + mk_replacement_regs(Defines, [hipe_rtl:mk_new_fpreg()|NewRegs]) + end + end. + +%%============================================================================= +%% Performs a lookup, which returns a tuple +%% {expression ID, list of definitions, list of replacement registers} +expr_map_lookup(Map, Expr) -> + gb_trees:lookup(Expr, Map). + +%%============================================================================= +%% Gets the actual RTL instruction to be generated for insertions of an +%% expression. +expr_map_get_instr(Map, Expr) -> + case gb_trees:lookup(Expr, Map) of + {value, {_, _, Regs}} -> + expr_set_dst(Expr, Regs); + none -> + exit({?MODULE, expr_map_get_instr, "expression missing"}) + end. + +%%============================================================================= +%% Gets expression id. +expr_map_get_id(Map, Expr) -> + case gb_trees:lookup(Expr, Map) of + {value, {ExprId, _, _}} -> + ExprId; + none -> + exit({?MODULE, expr_map_get_instr, "expression missing"}) + end. + +%%============================================================================= +%% Creates an rtl instruction that moves a value +mk_expr_move_instr([Reg], [Define]) -> + case hipe_rtl:is_fpreg(Reg) of + true -> + hipe_rtl:mk_fmove(Reg, Define); + false -> + %% FIXME Check is_var() orelse is_reg() ? + hipe_rtl:mk_move(Reg, Define) + end; +mk_expr_move_instr([_Reg|_Regs] = RegList, Defines) -> + %% FIXME Does this really work? What about floats... + %% (Multiple defines does not seem to be used by any of the + %% instructions considered by rtl_lcm at the moment so this is pretty much + %% untested/unused.) + hipe_rtl:mk_multimove(RegList, Defines); +mk_expr_move_instr(_, []) -> + exit({?MODULE, mk_expr_move_instr, "bad match"}). + +%%============================================================================= +%% Returns a set of all expressions in the code. +%% all_exprs(_CFG, []) -> +%% ?SETS:new(); +%% all_exprs(CFG, [Label|Labels]) -> +%% BB = hipe_rtl_cfg:bb(CFG, Label), +%% Code = hipe_bb:code(BB), +%% ?SETS:union(all_exprs_bb(Code), +%% all_exprs(CFG, Labels)). + +%%============================================================================= +%% Returns a set of expressions in a basic block. +%% all_exprs_bb([]) -> +%% ?SETS:new(); +%% all_exprs_bb([Instr|Instrs]) -> +%% case is_expr(Instr) of +%% true -> +%% Expr = expr_clear_dst(Instr), +%% ExprSet = all_exprs_bb(Instrs), +%% ?SETS:add_element(Expr, ExprSet); +%% false -> +%% all_exprs_bb(Instrs) +%% end. + +%%%%%%%%%%%%%%%%%% EXPRESSION ID -> EXPRESSION MAP %%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Map from expression IDs to expressions. +%%============================================================================= +%% mk_expr_id_map() -> +%% gb_trees:empty(). + +%% expr_id_map_insert(Map, ExprId, Expr) -> +%% gb_trees:insert(ExprId, Expr, Map). + +%% expr_id_map_lookup(Map, ExprId) -> +%% gb_trees:lookup(ExprId, Map). + +%%============================================================================= +%% Given expression id, gets expression. +expr_id_map_get_expr(Map, ExprId) -> + case gb_trees:lookup(ExprId, Map) of + {value, Expr} -> + Expr; + none -> + exit({?MODULE, expr_id_map_get_expr, "expression id missing"}) + end. + +%%============================================================================= +%% Expression ID counter +init_expr_id() -> + put({rtl_lcm,expr_id_count}, 0), + ok. + +-spec new_expr_id() -> non_neg_integer(). +new_expr_id() -> + Obj = {rtl_lcm, expr_id_count}, + V = get(Obj), + put(Obj, V+1), + V. + +%%%%%%%%%%%%%%%%%% EDGE BB (INSERT BETWEEN) MAP %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Map from edges to labels. +%% This is used by insert_expr_between to remember what new bbs it has created +%% for insertions on edges, and thus for multiple insertions on the same edge +%% to end up in the same bb. +%%============================================================================= +mk_edge_bb_map() -> + gb_trees:empty(). + +edge_bb_map_insert(Map, Edge, Label) -> + gb_trees:enter(Edge, Label, Map). + +edge_bb_map_lookup(Map, Edge) -> + gb_trees:lookup(Edge, Map). + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%% PRETTY-PRINTING %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%============================================================================= +%% Prints debug messages. +-ifdef(LCM_DEBUG). + +pp_debug(Str, Args) -> + case ?LCM_DEBUG of + true -> + io:format(standard_io, Str, Args); + false -> + ok + end. + +pp_debug_instr(Instr) -> + case ?LCM_DEBUG of + true -> + hipe_rtl:pp_instr(standard_io, Instr); + false -> + ok + end. + +-else. + +pp_debug(_, _) -> + ok. + +pp_debug_instr(_) -> + ok. + +-endif. %% DEBUG + +%%============================================================================= +%% Pretty-prints the calculated sets for the lazy code motion. +pp_sets(_, _, _, _, _, _, []) -> + ok; +pp_sets(ExprMap, IdMap, NodeInfo, EdgeInfo, AllExpr, CFG, [Label|Labels]) -> + Preds = hipe_rtl_cfg:pred(CFG, Label), + Succs = hipe_rtl_cfg:succ(CFG, Label), + + io:format(standard_io, "Label ~w~n", [Label]), + io:format(standard_io, " Preds: ~w~n", [Preds]), + io:format(standard_io, " Succs: ~w~n", [Succs]), + + case up_exp(NodeInfo, Label) of + none -> ok; + UpExp -> + case ?SETS:size(UpExp) =:= 0 of + false -> + io:format(standard_io, " UEExpr: ~n", []), + pp_exprs(ExprMap, IdMap, ?SETS:to_list(UpExp)); + true -> ok + end + end, + case down_exp(NodeInfo, Label) of + none -> ok; + DownExp -> + case ?SETS:size(DownExp) =:= 0 of + false -> + io:format(standard_io, " DEExpr: ~n", []), + pp_exprs(ExprMap, IdMap, ?SETS:to_list(DownExp)); + true -> ok + end + end, + case killed_expr(NodeInfo, Label) of + none -> ok; + KilledExpr -> + case ?SETS:size(KilledExpr) =:= 0 of + false -> + io:format(standard_io, " ExprKill: ~n", []), + pp_exprs(ExprMap, IdMap, ?SETS:to_list(KilledExpr)); + true -> ok + end + end, + case avail_in(NodeInfo, Label) of + none -> ok; + AvailIn -> + case ?SETS:size(AvailIn) =:= 0 of + false -> + io:format(standard_io, " AvailIn: ~n", []), + pp_exprs(ExprMap, IdMap, ?SETS:to_list(AvailIn)); + true -> ok + end + end, + case avail_out(NodeInfo, Label) of + none -> ok; + AvailOut -> + case ?SETS:size(AvailOut) =:= 0 of + false -> + io:format(standard_io, " AvailOut: ~n", []), + pp_exprs(ExprMap, IdMap, ?SETS:to_list(AvailOut)); + true -> ok + end + end, + case antic_in(NodeInfo, Label) of + none -> ok; + AnticIn -> + case ?SETS:size(AnticIn) =:= 0 of + false -> + io:format(standard_io, " AnticIn: ~n", []), + pp_exprs(ExprMap, IdMap, ?SETS:to_list(AnticIn)); + true -> ok + end + end, + case antic_out(NodeInfo, Label) of + none -> ok; + AnticOut -> + case ?SETS:size(AnticOut) =:= 0 of + false -> + io:format(standard_io, " AnticOut: ~n", []), + pp_exprs(ExprMap, IdMap, ?SETS:to_list(AnticOut)); + true -> ok + end + end, + case later_in(NodeInfo, Label) of + none -> ok; + LaterIn -> + case ?SETS:size(LaterIn) =:= 0 of + false -> + io:format(standard_io, " LaterIn: ~n", []), + pp_exprs(ExprMap, IdMap, ?SETS:to_list(LaterIn)); + true -> ok + end + end, + + pp_earliest(ExprMap, IdMap, EdgeInfo, Label, Succs), + pp_later(ExprMap, IdMap, EdgeInfo, Label, Succs), + + case delete(NodeInfo, Label) of + none -> ok; + Delete -> + case ?SETS:size(Delete) =:= 0 of + false -> + io:format(standard_io, " Delete: ~n", []), + pp_exprs(ExprMap, IdMap, ?SETS:to_list(Delete)); + true -> ok + end + end, + pp_sets(ExprMap, IdMap, NodeInfo, EdgeInfo, AllExpr, CFG, Labels). + +%%============================================================================= +%% Pretty-prints the later set. +pp_later(_, _, _, _, []) -> + ok; +pp_later(ExprMap, IdMap, EdgeInfo, Pred, [Succ|Succs]) -> + case later(EdgeInfo, {Pred, Succ}) of + none -> ok; + Later -> + case ?SETS:size(Later) =:= 0 of + false -> + io:format(standard_io, " Later(~w->~w): ~n", [Pred,Succ]), + pp_exprs(ExprMap, IdMap, ?SETS:to_list(Later)); + true -> ok + end + end, + pp_later(ExprMap, IdMap, EdgeInfo, Pred, Succs). + +%%============================================================================= +%% Pretty-prints the earliest set. +pp_earliest(_, _, _, _, []) -> + ok; +pp_earliest(ExprMap, IdMap, EdgeInfo, Pred, [Succ|Succs]) -> + case earliest(EdgeInfo, {Pred, Succ}) of + none -> ok; + Earliest -> + case ?SETS:size(Earliest) =:= 0 of + false -> + io:format(standard_io, " Earliest(~w->~w): ~n", [Pred,Succ]), + pp_exprs(ExprMap, IdMap, ?SETS:to_list(Earliest)); + true -> ok + end + end, + pp_earliest(ExprMap, IdMap, EdgeInfo, Pred, Succs). + +%%============================================================================= +%% Pretty-prints an expression +pp_expr(ExprMap, IdMap, ExprId) -> + Expr = expr_id_map_get_expr(IdMap, ExprId), + hipe_rtl:pp_instr(standard_io, expr_map_get_instr(ExprMap, Expr)). + +pp_exprs(_, _, []) -> + ok; +pp_exprs(ExprMap, IdMap, [E|Es]) -> + pp_expr(ExprMap, IdMap, E), + pp_exprs(ExprMap, IdMap, Es). diff --git a/lib/hipe/rtl/hipe_rtl_liveness.erl b/lib/hipe/rtl/hipe_rtl_liveness.erl new file mode 100644 index 0000000000..3cfada9d6c --- /dev/null +++ b/lib/hipe/rtl/hipe_rtl_liveness.erl @@ -0,0 +1,145 @@ +%% $Id$ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2001-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% LIVENESS ANALYSIS +%% +%% Exports: +%% ~~~~~~~ +%% analyze(CFG) - returns a liveness analysis of CFG. +%% liveout(Liveness, Label) - returns a set of variables that are live on +%% exit from basic block named Label. +%% livein(Liveness, Label) - returns a set of variables that are live on +%% entry to the basic block named Label. +%% list(Instructions, LiveOut) - Given a list of instructions and a liveout +%% set, returns a set of variables live at the first instruction. +%% + +-module(hipe_rtl_liveness). + +%% -define(LIVEOUT_NEEDED,true). % needed for liveness.inc below. +-define(PRETTY_PRINT,false). + +-include("hipe_rtl.hrl"). +-include("../flow/liveness.inc"). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% Interface to CFG and RTL. +%% + +cfg_bb(CFG, L) -> + hipe_rtl_cfg:bb(CFG, L). + +cfg_postorder(CFG) -> + hipe_rtl_cfg:postorder(CFG). + +cfg_succ(CFG, L) -> + hipe_rtl_cfg:succ(CFG, L). + +uses(Instr) -> + hipe_rtl:uses(Instr). + +defines(Instr) -> + hipe_rtl:defines(Instr). + +%% +%% This is the list of registers that are live at exit from a function +%% + +liveout_no_succ() -> + hipe_rtl_arch:live_at_return(). + +%% +%% The following are used only if annotation of the code is requested. +%% + +cfg_labels(CFG) -> + hipe_rtl_cfg:reverse_postorder(CFG). + +pp_block(Label, CFG) -> + BB=hipe_rtl_cfg:bb(CFG, Label), + Code=hipe_bb:code(BB), + hipe_rtl:pp_block(Code). + +pp_liveness_info(LiveList) -> + NewList=remove_precoloured(LiveList), + print_live_list(NewList). + +print_live_list([]) -> + io:format(" none~n", []); +print_live_list([Last]) -> + io:format(" ", []), + print_var(Last), + io:format("~n", []); +print_live_list([Var|Rest]) -> + io:format(" ", []), + print_var(Var), + io:format(",", []), + print_live_list(Rest). + +print_var(A) -> + case hipe_rtl:is_var(A) of + true -> + pp_var(A); + false -> + case hipe_rtl:is_reg(A) of + true -> + pp_reg(A); + false -> + case hipe_rtl:is_fpreg(A) of + true -> + io:format("f~w", [hipe_rtl:fpreg_index(A)]); + false -> + io:format("unknown:~w", [A]) + end + end + end. + +pp_hard_reg(N) -> + io:format("~s", [hipe_rtl_arch:reg_name(N)]). + +pp_reg(Arg) -> + case hipe_rtl_arch:is_precoloured(Arg) of + true -> + pp_hard_reg(hipe_rtl:reg_index(Arg)); + false -> + io:format("r~w", [hipe_rtl:reg_index(Arg)]) + end. + +pp_var(Arg) -> + case hipe_rtl_arch:is_precoloured(Arg) of + true -> + pp_hard_reg(hipe_rtl:var_index(Arg)); + false -> + io:format("v~w", [hipe_rtl:var_index(Arg)]) + end. + +remove_precoloured(List) -> + List. + %% [X || X <- List, not hipe_rtl_arch:is_precoloured(X)]. + +-ifdef(DEBUG_LIVENESS). +cfg_bb_add(CFG, L, NewBB) -> + hipe_rtl_cfg:bb_add(CFG, L, NewBB). + +mk_comment(Text) -> + hipe_rtl:mk_comment(Text). +-endif. diff --git a/lib/hipe/rtl/hipe_rtl_mk_switch.erl b/lib/hipe/rtl/hipe_rtl_mk_switch.erl new file mode 100644 index 0000000000..e5175217d6 --- /dev/null +++ b/lib/hipe/rtl/hipe_rtl_mk_switch.erl @@ -0,0 +1,985 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2001-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Copyright (c) 2001 by Erik Johansson. All Rights Reserved +%% ==================================================================== +%% Filename : hipe_rtl_mk_switch.erl +%% Module : hipe_rtl_mk_switch +%% Purpose : Implements switching on Erlang values. +%% Notes : Only fixnums are supported well, +%% atoms work with table search, +%% the inline search of atoms might have some bugs. +%% Should be extended to handle bignums and floats. +%% +%% History : * 2001-02-28 Erik Johansson (happi@it.uu.se): +%% Created. +%% * 2001-04-01 Erik Trulsson (ertr1013@csd.uu.se): +%% Stefan Lindström (stli3993@csd.uu.se): +%% Added clustering and inlined binary search trees. +%% * 2001-07-30 EJ (happi@it.uu.se): +%% Fixed some bugs and started cleanup. +%% ==================================================================== +%% Exports : +%% gen_switch_val(I, VarMap, ConstTab, Options) +%% gen_switch_tuple(I, Map, ConstTab, Options) +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +-module(hipe_rtl_mk_switch). + +-export([gen_switch_val/4, gen_switch_tuple/4]). + +%%------------------------------------------------------------------------- + +-include("../main/hipe.hrl"). + +%%------------------------------------------------------------------------- + +-define(MINFORJUMPTABLE,9). + % Minimum number of integers needed to use something else than an inline search. +-define(MINFORINTSEARCHTREE,65). % Must be at least 3 + % Minimum number of integer elements needed to use a non-inline binary search. + +-define(MININLINEATOMSEARCH,8). + % Minimum number of atoms needed to use an inline binary search instead + % of a fast linear search. + +-define(MINFORATOMSEARCHTREE,20). % Must be at least 3 + % Minimum number of atoms needed to use a non-inline binary search instead + % of a linear search. + +-define(MAXINLINEATOMSEARCH,64). % Must be at least 3 + % The cutoff point between inlined and non-inlined binary search for atoms + +-define(WORDSIZE, hipe_rtl_arch:word_size()). +-define(MINDENSITY, 0.5). + % Minimum density required to use a jumptable instead of a binary search. + +%% The reason why MINFORINTSEARCHTREE and MINFORATOMSEARCHTREE must be +%% at least 3 is that the function tab/5 will enter an infinite loop +%% and hang when faced with a switch of size 1 or 2. + + +%% Options used by this module: +%% +%% [no_]use_indexing +%% Determines if any indexing be should be done at all. Turned on +%% by default at optimization level o2 and higher. +%% +%% [no_]use_clusters +%% Controls whether we attempt to divide sparse integer switches +%% into smaller dense clusters for which jumptables are practical. +%% Turned off by default since it can increase compilation time +%% considerably and most programs will gain little benefit from it. +%% +%% [no_]use_inline_atom_search +%% Controls whether we use an inline binary search for small number +%% of atoms. Turned off by default since this is currently only +%% supported on SPARC (and not on x86) and probably needs a bit +%% more testing before it can be turned on by default. + +gen_switch_val(I, VarMap, ConstTab, Options) -> + case proplists:get_bool(use_indexing, Options) of + false -> gen_slow_switch_val(I, VarMap, ConstTab, Options); + true -> gen_fast_switch_val(I, VarMap, ConstTab, Options) + end. + +gen_fast_switch_val(I, VarMap, ConstTab, Options) -> + {Arg, VarMap0} = + hipe_rtl_varmap:icode_var2rtl_var(hipe_icode:switch_val_term(I), VarMap), + IcodeFail = hipe_icode:switch_val_fail_label(I), + {Fail, VarMap1} = hipe_rtl_varmap:icode_label2rtl_label(IcodeFail, VarMap0), + %% Important that the list of cases is sorted when handling integers. + UnsortedCases = hipe_icode:switch_val_cases(I), + Cases = lists:sort(UnsortedCases), + + check_duplicates(Cases), + %% This check is currently not really necessary. The checking + %% happens at an earlier phase of the compilation. + {Types, InitCode} = split_types(Cases, Arg), + handle_types(Types, InitCode, VarMap1, ConstTab, Arg, {I, Fail, Options}). + +handle_types([{Type,Lbl,Cases}|Types], Code, VarMap, ConstTab, Arg, Info) -> + {Code1,VarMap1,ConstTab1} = gen_fast_switch_on(Type, Cases, + VarMap, + ConstTab, Arg, Info), + handle_types(Types, [Code,Lbl,Code1], VarMap1, ConstTab1, Arg, Info); +handle_types([], Code, VarMap, ConstTab, _, _) -> + {Code, VarMap, ConstTab}. + + +gen_fast_switch_on(integer, Cases, VarMap, ConstTab, Arg, {I, Fail, Options}) -> + {First,_} = hd(Cases), + Min = hipe_icode:const_value(First), + if length(Cases) < ?MINFORJUMPTABLE -> + gen_small_switch_val(Arg,Cases,Fail,VarMap,ConstTab,Options); + true -> + case proplists:get_bool(use_clusters, Options) of + false -> + M = list_to_tuple(Cases), + D = density(M, 1, tuple_size(M)), + if + D >= ?MINDENSITY -> + gen_jump_table(Arg,Fail,hipe_icode:switch_val_fail_label(I),VarMap,ConstTab,Cases,Min); + true -> + gen_search_switch_val(Arg, Cases, Fail, VarMap, ConstTab, Options) + end; + true -> + MC = minclusters(Cases), + Cl = cluster_split(Cases,MC), + CM = cluster_merge(Cl), + find_cluster(CM,VarMap,ConstTab,Options,Arg,Fail,hipe_icode:switch_val_fail_label(I)) + end + end; +gen_fast_switch_on(atom, Cases, VarMap, ConstTab, Arg, {_I, Fail, Options}) -> + case proplists:get_bool(use_inline_atom_search, Options) of + true -> + if + length(Cases) < ?MININLINEATOMSEARCH -> + gen_linear_switch_val(Arg, Cases, Fail, VarMap, ConstTab, Options); + length(Cases) > ?MAXINLINEATOMSEARCH -> + gen_search_switch_val(Arg, Cases, Fail, VarMap, ConstTab, Options); + true -> + gen_atom_switch_val(Arg,Cases,Fail,VarMap,ConstTab,Options) + end; + false -> + if length(Cases) < ?MINFORATOMSEARCHTREE -> + gen_linear_switch_val(Arg, Cases, Fail, VarMap, ConstTab, Options); + true -> + gen_search_switch_val(Arg, Cases, Fail, VarMap, ConstTab, Options) + end + end; +gen_fast_switch_on(_, _, VarMap, ConstTab, _, {I,_Fail,Options}) -> + %% We can only handle smart indexing of integers and atoms + %% TODO: Consider bignum + gen_slow_switch_val(I, VarMap, ConstTab, Options). + + +%% Split different types into separate switches. +split_types([Case|Cases], Arg) -> + Type1 = casetype(Case), + Types = split(Cases,Type1,[Case],[]), + switch_on_types(Types,[], [], Arg); +split_types([],_) -> + %% Cant happen. + ?EXIT({empty_caselist}). + +switch_on_types([{Type,Cases}], AccCode, AccCases, _Arg) -> + Lbl = hipe_rtl:mk_new_label(), + I = hipe_rtl:mk_goto(hipe_rtl:label_name(Lbl)), + {[{Type,Lbl,lists:reverse(Cases)} | AccCases], lists:reverse([I|AccCode])}; +switch_on_types([{other,Cases} | Rest], AccCode, AccCases, Arg) -> + %% Make sure the general case is handled last. + switch_on_types(Rest ++ [{other,Cases}], AccCode, AccCases, Arg); +switch_on_types([{Type,Cases} | Rest], AccCode, AccCases, Arg) -> + TLab = hipe_rtl:mk_new_label(), + FLab = hipe_rtl:mk_new_label(), + TestCode = + case Type of + integer -> + hipe_tagscheme:test_fixnum(Arg, hipe_rtl:label_name(TLab), + hipe_rtl:label_name(FLab), 0.5); + atom -> + hipe_tagscheme:test_atom(Arg, hipe_rtl:label_name(TLab), + hipe_rtl:label_name(FLab), 0.5); + bignum -> + hipe_tagscheme:test_bignum(Arg, hipe_rtl:label_name(TLab), + hipe_rtl:label_name(FLab), 0.5); + _ -> ?EXIT({ooops, type_not_handled, Type}) + end, + switch_on_types(Rest, [[TestCode,FLab] | AccCode], + [{Type,TLab,lists:reverse(Cases)} | AccCases], Arg). + +split([Case|Cases], Type, Current, Rest) -> + case casetype(Case) of + Type -> + split(Cases, Type, [Case|Current],Rest); + Other -> + split(Cases, Other, [Case], [{Type,Current}|Rest]) + end; +split([], Type, Current, Rest) -> + [{Type, Current} | Rest]. + +%% Determine what type an entry in the caselist has + +casetype({Const,_}) -> + casetype(hipe_icode:const_value(Const)); +casetype(A) -> + if + is_integer(A) -> + case hipe_tagscheme:is_fixnum(A) of + true -> integer; + false -> bignum + end; + is_float(A) -> float; + is_atom(A) -> atom; + true -> other + end. + +%% check that no duplicate values occur in the case list and also +%% check that all case values have the same type. +check_duplicates([]) -> true; +check_duplicates([_]) -> true; +check_duplicates([{Const1,_},{Const2,L2}|T]) -> + C1 = hipe_icode:const_value(Const1), + C2 = hipe_icode:const_value(Const2), + %% T1 = casetype(C1), + %% T2 = casetype(C2), + if C1 =/= C2 -> %% , T1 =:= T2 -> + check_duplicates([{Const2,L2}|T]); + true -> + ?EXIT({bad_values_in_switchval,C1}) + end. + +%% +%% Determine the optimal way to divide Cases into clusters such that each +%% cluster is dense. +%% +%% See: +%% Producing Good Code for the Case Statement, Robert L. Bernstein +%% Software - Practice and Experience vol 15, 1985, no 10, pp 1021--1024 +%% And +%% Correction to "Producing Good Code for the Case Statement" +%% Sampath Kannan and Todd A. Proebsting, +%% Software - Practice and Experience vol 24, 1994, no 2, p 233 +%% +%% (The latter is where the algorithm comes from.) + +%% This function will return a tuple with the first element being 0 +%% The rest of the elements being integers. A value of M at index N +%% (where the first element is considered to have index 0) means that +%% the first N cases can be divided into M (but no fewer) clusters where +%% each cluster is dense. + +minclusters(Cases) when is_list(Cases) -> + minclusters(list_to_tuple(Cases)); +minclusters(Cases) when is_tuple(Cases) -> + N = tuple_size(Cases), + MinClusters = list_to_tuple([0|n_list(N,inf)]), + i_loop(1,N,MinClusters,Cases). + +%% Create a list with N elements initialized to Init +n_list(0,_) -> []; +n_list(N,Init) -> [Init | n_list(N-1,Init)]. + +%% Do the dirty work of minclusters +i_loop(I,N,MinClusters,_Cases) when I > N -> + MinClusters; +i_loop(I,N,MinClusters,Cases) when I =< N -> + M = j_loop(0, I-1, MinClusters, Cases), + i_loop(I+1, N, M, Cases). + +%% More dirty work +j_loop(J,I1,MinClusters,_Cases) when J > I1 -> + MinClusters; +j_loop(J,I1,MinClusters,Cases) when J =< I1 -> + D = density(Cases,J+1,I1+1), + A0 = element(J+1,MinClusters), + A = if + is_number(A0) -> + A0+1; + true -> + A0 + end, + B = element(I1+2,MinClusters), + M = if + D >= ?MINDENSITY, A + setelement(I1+2,MinClusters,A); + true -> + MinClusters + end, + j_loop(J+1,I1,M,Cases). + + +%% Determine the density of a (subset of a) case list +%% A is a tuple with the cases in order from smallest to largest +%% I is the index of the first element and J of the last + +density(A,I,J) -> + {AI,_} = element(I,A), + {AJ,_} = element(J,A), + (J-I+1)/(hipe_icode:const_value(AJ)-hipe_icode:const_value(AI)+1). + + +%% Split a case list into dense clusters +%% Returns a list of lists of cases. +%% +%% Cases is the case list and Clust is a list describing the optimal +%% clustering as returned by minclusters +%% +%% If the value in the last place in minclusters is M then we can +%% split the case list into M clusters. We then search for the last +%% (== right-most) occurance of the value M-1 in minclusters. That +%% indicates the largest number of cases that can be split into M-1 +%% clusters. This means that the cases in between constitute one +%% cluster. Then we recurse on the remainder of the cases. +%% +%% The various calls to lists:reverse are just to ensure that the +%% cases remain in the correct, sorted order. + +cluster_split(Cases, Clust) -> + A = tl(tuple_to_list(Clust)), + Max = element(tuple_size(Clust), Clust), + L1 = lists:reverse(Cases), + L2 = lists:reverse(A), + cluster_split(Max, [], [], L1, L2). + +cluster_split(0, [], Res, Cases, _Clust) -> + L = lists:reverse(Cases), + {H,_} = hd(L), + {T,_} = hd(Cases), + [{dense,hipe_icode:const_value(H),hipe_icode:const_value(T),L}|Res]; +cluster_split(N, [], Res, Cases, [N|_] = Clust) -> + cluster_split(N-1, [], Res, Cases, Clust); +cluster_split(N,Sofar,Res,Cases,[N|Clust]) -> + {H,_} = hd(Sofar), + {T,_} = lists:last(Sofar), + cluster_split(N-1,[],[{dense,hipe_icode:const_value(H),hipe_icode:const_value(T),Sofar}|Res],Cases,[N|Clust]); +cluster_split(N,Sofar,Res,[C|Cases],[_|Clust]) -> + cluster_split(N,[C|Sofar],Res,Cases,Clust). + +%% +%% Merge adjacent small clusters into larger sparse clusters +%% +cluster_merge([C]) -> [C]; +cluster_merge([{dense,Min,Max,C}|T]) when length(C) >= ?MINFORJUMPTABLE -> + C2 = cluster_merge(T), + [{dense,Min,Max,C}|C2]; +cluster_merge([{sparse,Min,_,C},{sparse,_,Max,D}|T]) -> + R = {sparse,Min,Max,C ++ D}, + cluster_merge([R|T]); +cluster_merge([{sparse,Min,_,C},{dense,_,Max,D}|T]) when length(D) < ?MINFORJUMPTABLE -> + R = {sparse,Min,Max,C ++ D}, + cluster_merge([R|T]); +cluster_merge([{dense,Min,_,C},{dense,_,Max,D}|T]) when length(C) < ?MINFORJUMPTABLE, length(D) < ?MINFORJUMPTABLE -> + R = {sparse,Min,Max,C ++ D}, + cluster_merge([R|T]); +cluster_merge([{dense,Min,_,D},{sparse,_,Max,C}|T]) when length(D) < ?MINFORJUMPTABLE -> + R = {sparse,Min,Max,C ++ D}, + cluster_merge([R|T]); +cluster_merge([A,{dense,Min,Max,C}|T]) when length(C) >= ?MINFORJUMPTABLE -> + R = cluster_merge([{dense,Min,Max,C}|T]), + [A|R]. + + +%% Generate code to search for the correct cluster + +find_cluster([{sparse,_Min,_Max,C}],VarMap,ConstTab,Options,Arg,Fail,_IcodeFail) -> + case length(C) < ?MINFORINTSEARCHTREE of + true -> + gen_small_switch_val(Arg,C,Fail,VarMap,ConstTab,Options); + _ -> + gen_search_switch_val(Arg,C,Fail,VarMap,ConstTab,Options) + end; +find_cluster([{dense,Min,_Max,C}],VarMap,ConstTab,Options,Arg,Fail,IcodeFail) -> + case length(C) < ?MINFORJUMPTABLE of + true -> + gen_small_switch_val(Arg,C,Fail,VarMap,ConstTab,Options); + _ -> + gen_jump_table(Arg,Fail,IcodeFail,VarMap,ConstTab,C,Min) + end; +find_cluster([{Density,Min,Max,C}|T],VarMap,ConstTab,Options,Arg,Fail,IcodeFail) -> + ClustLab = hipe_rtl:mk_new_label(), + NextLab = hipe_rtl:mk_new_label(), + {ClustCode,V1,C1} = find_cluster([{Density,Min,Max,C}],VarMap,ConstTab,Options,Arg,Fail,IcodeFail), + + {Rest,V2,C2} = find_cluster(T,V1,C1,Options,Arg,Fail,IcodeFail), + + {[ + hipe_rtl:mk_branch(Arg, gt, hipe_rtl:mk_imm(hipe_tagscheme:mk_fixnum(Max)), + hipe_rtl:label_name(NextLab), + hipe_rtl:label_name(ClustLab), 0.50), + ClustLab + ] ++ + ClustCode ++ + [NextLab] ++ + Rest, + V2,C2}. + +%% Generate efficient code for a linear search through the case list. +%% Only works for atoms and integer. +gen_linear_switch_val(Arg,Cases,Fail,VarMap,ConstTab,_Options) -> + {Values,_Labels} = split_cases(Cases), + {LabMap,VarMap1} = lbls_from_cases(Cases,VarMap), + Code = fast_linear_search(Arg,Values,LabMap,Fail), + {Code,VarMap1,ConstTab}. + +fast_linear_search(_Arg,[],[],Fail) -> + [hipe_rtl:mk_goto(hipe_rtl:label_name(Fail))]; +fast_linear_search(Arg,[Case|Cases],[Label|Labels],Fail) -> + Reg = hipe_rtl:mk_new_reg_gcsafe(), + NextLab = hipe_rtl:mk_new_label(), + C2 = fast_linear_search(Arg,Cases,Labels,Fail), + C1 = + if + is_integer(Case) -> + TVal = hipe_tagscheme:mk_fixnum(Case), + [ + hipe_rtl:mk_move(Reg,hipe_rtl:mk_imm(TVal)), + hipe_rtl:mk_branch(Arg,eq,Reg, + Label, + hipe_rtl:label_name(NextLab), 0.5), + NextLab + ]; + is_atom(Case) -> + [ + hipe_rtl:mk_load_atom(Reg,Case), + hipe_rtl:mk_branch(Arg,eq,Reg, + Label, + hipe_rtl:label_name(NextLab), 0.5), + NextLab + ]; + true -> % This should never happen ! + ?EXIT({internal_error_in_switch_val,Case}) + end, + [C1,C2]. + + +%% Generate code to search through a small cluster of integers using +%% binary search +gen_small_switch_val(Arg,Cases,Fail,VarMap,ConstTab,_Options) -> + {Values,_Labels} = split_cases(Cases), + {LabMap,VarMap1} = lbls_from_cases(Cases,VarMap), + Keys = [hipe_tagscheme:mk_fixnum(X) % Add tags to the values + || X <- Values], + Code = inline_search(Keys, LabMap, Arg, Fail), + {Code, VarMap1, ConstTab}. + + +%% Generate code to search through a small cluster of atoms +gen_atom_switch_val(Arg,Cases,Fail,VarMap,ConstTab,_Options) -> + {Values, _Labels} = split_cases(Cases), + {LabMap,VarMap1} = lbls_from_cases(Cases,VarMap), + LMap = [{label,L} || L <- LabMap], + {NewConstTab,Id} = hipe_consttab:insert_sorted_block(ConstTab, Values), + {NewConstTab2,LabId} = + hipe_consttab:insert_sorted_block(NewConstTab, word, LMap, Values), + Code = inline_atom_search(0, length(Cases)-1, Id, LabId, Arg, Fail, LabMap), + {Code, VarMap1, NewConstTab2}. + + +%% calculate the middle position of a list (+ 1 because of 1-indexing of lists) +get_middle(List) -> + N = length(List), + N div 2 + 1. + +%% get element [N1, N2] from a list +get_cases(_, 0, 0) -> + []; +get_cases([H|T], 0, N) -> + [H | get_cases(T, 0, N - 1)]; +get_cases([_|T], N1, N2) -> + get_cases(T, N1 - 1, N2 - 1). + + +%% inline_search/4 creates RTL code for a inlined binary search. +%% It requires two sorted tables - one with the keys to search +%% through and one with the corresponding labels to jump to. +%% +%% Input: +%% KeyList - A list of keys to search through. +%% LableList - A list of labels to jump to. +%% KeyReg - A register containing the key to search for. +%% Default - A label to jump to if the key is not found. +%% + +inline_search([], _LabelList, _KeyReg, _Default) -> []; +inline_search(KeyList, LabelList, KeyReg, Default) -> + %% Create some registers and labels that we need. + Reg = hipe_rtl:mk_new_reg_gcsafe(), + Lab1 = hipe_rtl:mk_new_label(), + Lab2 = hipe_rtl:mk_new_label(), + Lab3 = hipe_rtl:mk_new_label(), + + Length = length(KeyList), + + if + Length >= 3 -> + %% Get middle element and keys/labels before that and after + Middle_pos = get_middle(KeyList), + Middle_key = lists:nth(Middle_pos, KeyList), + Keys_beginning = get_cases(KeyList, 0, Middle_pos - 1), + Labels_beginning = get_cases(LabelList, 0, Middle_pos - 1), + Keys_ending = get_cases(KeyList, Middle_pos, Length), + Labels_ending = get_cases(LabelList, Middle_pos, Length), + + %% Create the code. + + %% Get the label and build it up properly + Middle_label = lists:nth(Middle_pos, LabelList), + + A = [hipe_rtl:mk_move(Reg, hipe_rtl:mk_imm(Middle_key)), + hipe_rtl:mk_branch(KeyReg, lt, Reg, + hipe_rtl:label_name(Lab2), + hipe_rtl:label_name(Lab1), 0.5), + Lab1, + hipe_rtl:mk_branch(KeyReg, gt, Reg, + hipe_rtl:label_name(Lab3), + Middle_label , 0.5), + Lab2], + %% build search tree for keys less than the middle element + B = inline_search(Keys_beginning, Labels_beginning, KeyReg, Default), + %% ...and for keys bigger than the middle element + D = inline_search(Keys_ending, Labels_ending, KeyReg, Default), + + %% append the code and return it + A ++ B ++ [Lab3] ++ D; + + Length =:= 2 -> + %% get the first and second elements and theirs labels + Key_first = hd(KeyList), + First_label = hd(LabelList), + + %% Key_second = hipe_tagscheme:mk_fixnum(lists:nth(2, KeyList)), + Key_second = lists:nth(2, KeyList), + Second_label = lists:nth(2, LabelList), + + NewLab = hipe_rtl:mk_new_label(), + + %% compare them + A = [hipe_rtl:mk_move(Reg,hipe_rtl:mk_imm(Key_first)), + hipe_rtl:mk_branch(KeyReg, eq, Reg, + First_label, + hipe_rtl:label_name(NewLab) , 0.5), + NewLab], + + B = [hipe_rtl:mk_move(Reg,hipe_rtl:mk_imm(Key_second)), + hipe_rtl:mk_branch(KeyReg, eq, Reg, + Second_label, + hipe_rtl:label_name(Default) , 0.5)], + A ++ B; + + Length =:= 1 -> + Key = hd(KeyList), + Label = hd(LabelList), + + [hipe_rtl:mk_move(Reg,hipe_rtl:mk_imm(Key)), + hipe_rtl:mk_branch(KeyReg, eq, Reg, + Label, + hipe_rtl:label_name(Default) , 0.5)] + end. + + +inline_atom_search(Start, End, Block, LBlock, KeyReg, Default, Labels) -> + Reg = hipe_rtl:mk_new_reg_gcsafe(), + + Length = (End - Start) + 1, + + if + Length >= 3 -> + Lab1 = hipe_rtl:mk_new_label(), + Lab2 = hipe_rtl:mk_new_label(), + Lab3 = hipe_rtl:mk_new_label(), + Lab4 = hipe_rtl:mk_new_label(), + + Mid = ((End-Start) div 2)+Start, + End1 = Mid-1, + Start1 = Mid+1, + A = [ + hipe_rtl:mk_load_word_index(Reg,Block,Mid), + hipe_rtl:mk_branch(KeyReg, lt, Reg, + hipe_rtl:label_name(Lab2), + hipe_rtl:label_name(Lab1), 0.5), + Lab1, + hipe_rtl:mk_branch(KeyReg, gt, Reg, + hipe_rtl:label_name(Lab3), + hipe_rtl:label_name(Lab4), 0.5), + Lab4, + hipe_rtl:mk_goto_index(LBlock, Mid, Labels), + Lab2 + ], + B = [inline_atom_search(Start,End1,Block,LBlock,KeyReg,Default,Labels)], + C = [inline_atom_search(Start1,End,Block,LBlock,KeyReg,Default,Labels)], + A ++ B ++ [Lab3] ++ C; + + Length =:= 2 -> + L1 = hipe_rtl:mk_new_label(), + L2 = hipe_rtl:mk_new_label(), + L3 = hipe_rtl:mk_new_label(), + [ + hipe_rtl:mk_load_word_index(Reg,Block,Start), + hipe_rtl:mk_branch(KeyReg,eq,Reg, + hipe_rtl:label_name(L1), + hipe_rtl:label_name(L2), 0.5), + L1, + hipe_rtl:mk_goto_index(LBlock,Start,Labels), + + L2, + hipe_rtl:mk_load_word_index(Reg,Block,End), + hipe_rtl:mk_branch(KeyReg,eq,Reg, + hipe_rtl:label_name(L3), + hipe_rtl:label_name(Default), 0.5), + L3, + hipe_rtl:mk_goto_index(LBlock, End, Labels) + ]; + + Length =:= 1 -> + NewLab = hipe_rtl:mk_new_label(), + [ + hipe_rtl:mk_load_word_index(Reg,Block,Start), + hipe_rtl:mk_branch(KeyReg, eq, Reg, + hipe_rtl:label_name(NewLab), + hipe_rtl:label_name(Default), 0.9), + NewLab, + hipe_rtl:mk_goto_index(LBlock, Start, Labels) + ] + end. + + +%% Create a jumptable +gen_jump_table(Arg,Fail,IcodeFail,VarMap,ConstTab,Cases,Min) -> + %% Map is a rtl mapping of Dense + {Max,DenseTbl} = dense_interval(Cases,Min,IcodeFail), + {Map,VarMap2} = lbls_from_cases(DenseTbl,VarMap), + + %% Make some labels and registers that we need. + BelowLab = hipe_rtl:mk_new_label(), + UntaggedR = hipe_rtl:mk_new_reg_gcsafe(), + StartR = hipe_rtl:mk_new_reg_gcsafe(), + + %% Generate the code to do the switch... + {[ + %% Untag the index. + hipe_tagscheme:untag_fixnum(UntaggedR, Arg)| + %% Check that the index is within Min and Max. + case Min of + 0 -> %% First element is 0 this is simple. + [hipe_rtl:mk_branch(UntaggedR, gtu, hipe_rtl:mk_imm(Max), + hipe_rtl:label_name(Fail), + hipe_rtl:label_name(BelowLab), 0.01), + BelowLab, + %% StartR contains the index into the jumptable + hipe_rtl:mk_switch(UntaggedR, Map)]; + _ -> %% First element is not 0 + [hipe_rtl:mk_alu(StartR, UntaggedR, sub, + hipe_rtl:mk_imm(Min)), + hipe_rtl:mk_branch(StartR, gtu, hipe_rtl:mk_imm(Max-Min), + hipe_rtl:label_name(Fail), + hipe_rtl:label_name(BelowLab), 0.01), + BelowLab, + %% StartR contains the index into the jumptable + hipe_rtl:mk_switch(StartR, Map)] + end], + VarMap2, + ConstTab}. + + +%% Generate the jumptable for Cases while filling in unused positions +%% with the fail label + +dense_interval(Cases, Min, IcodeFail) -> + dense_interval(Cases, Min, IcodeFail, 0, 0). +dense_interval([Pair = {Const,_}|Rest], Pos, Fail, Range, NoEntries) -> + Val = hipe_icode:const_value(Const), + if + Pos < Val -> + {Max, Res} = + dense_interval([Pair|Rest], Pos+1, Fail, Range+1, NoEntries), + {Max,[{hipe_icode:mk_const(Pos), Fail}|Res]}; + true -> + {Max, Res} = dense_interval(Rest, Pos+1, Fail, Range+1, NoEntries+1), + {Max, [Pair | Res]} + end; +dense_interval([], Max, _, _, _) -> + {Max-1, []}. + + +%%------------------------------------------------------------------------- +%% switch_val without jumptable +%% + +gen_slow_switch_val(I, VarMap, ConstTab, Options) -> + Is = rewrite_switch_val(I), + ?IF_DEBUG_LEVEL(3,?msg("Switch: ~w\n", [Is]), no_debug), + hipe_icode2rtl:translate_instrs(Is, VarMap, ConstTab, Options). + +rewrite_switch_val(I) -> + Var = hipe_icode:switch_val_term(I), + Fail = hipe_icode:switch_val_fail_label(I), + Cases = hipe_icode:switch_val_cases(I), + rewrite_switch_val_cases(Cases, Fail, Var). + +rewrite_switch_val_cases([{C,L}|Cases], Fail, Arg) -> + Tmp = hipe_icode:mk_new_var(), + NextLab = hipe_icode:mk_new_label(), + [hipe_icode:mk_move(Tmp, C), + hipe_icode:mk_if(op_exact_eqeq_2, [Arg, Tmp], L, + hipe_icode:label_name(NextLab)), + NextLab | + rewrite_switch_val_cases(Cases, Fail, Arg)]; +rewrite_switch_val_cases([], Fail, _Arg) -> + [hipe_icode:mk_goto(Fail)]. + + +%%------------------------------------------------------------------------- +%% switch_val with binary search jumptable +%% + +gen_search_switch_val(Arg, Cases, Default, VarMap, ConstTab, _Options) -> + ValTableR = hipe_rtl:mk_new_reg_gcsafe(), + + {Values,_Labels} = split_cases(Cases), + {NewConstTab,Id} = hipe_consttab:insert_sorted_block(ConstTab, Values), + {LabMap,VarMap1} = lbls_from_cases(Cases,VarMap), + + Code = + [hipe_rtl:mk_load_address(ValTableR, Id, constant)| + tab(Values,LabMap,Arg,ValTableR,Default)], + {Code, VarMap1, NewConstTab}. + + +%%------------------------------------------------------------------------- +%% +%% tab/5 creates RTL code for a binary search. +%% It requires two sorted tables one with the keys to search +%% through and one with the corresponding labels to jump to. +%% +%% The implementation is derived from John Bentlys +%% Programming Pearls. +%% +%% Input: +%% KeyList - A list of keys to search through. +%% (Just used to calculate the number of elements.) +%% LableList - A list of labels to jump to. +%% KeyReg - A register containing the key to search for. +%% TablePntrReg - A register containing a pointer to the +%% tables with keys +%% Default - A lable to jump to if the key is not found. +%% +%% Example: +%% KeyTbl: < a, b, d, f, h, i, z > +%% Lbls: < 5, 3, 2, 4, 1, 7, 6 > +%% Default: 8 +%% KeyReg: v37 +%% TablePntrReg: r41 +%% +%% should give code like: +%% r41 <- KeyTbl +%% r42 <- 0 +%% r43 <- [r41+16] +%% if (r43 gt v37) then L17 (0.50) else L16 +%% L16: +%% r42 <- 16 +%% goto L17 +%% L17: +%% r46 <- r42 add 16 +%% r45 <- [r41+r46] +%% if (r45 gt v37) then L21 (0.50) else L20 +%% L20: +%% r42 <- r46 +%% goto L21 +%% L21: +%% r48 <- r42 add 8 +%% r47 <- [r41+r48] +%% if (r47 gt v37) then L23 (0.50) else L22 +%% L22: +%% r42 <- r48 +%% goto L23 +%% L23: +%% r50 <- r42 add 4 +%% r49 <- [r41+r50] +%% if (r49 gt v37) then L25 (0.50) else L24 +%% L24: +%% r42 <- r42 add 4 +%% goto L25 +%% L25: +%% if (r42 gt 28) then L6 (0.50) else L18 +%% L18: +%% r44 <- [r41+r42] +%% if (r44 eq v37) then L19 (0.90) else L8 +%% L19: +%% r42 <- r42 sra 2 +%% switch (r42) + +%% +%% The search is done like a rolled out binary search, +%% but instead of starting in the middle we start at +%% the power of two closest above the middle. +%% +%% We let IndexReg point to the lower bound of our +%% search, and then we speculatively look at a +%% position at IndexReg + I where I is a power of 2. +%% +%% Example: Looking for 'h' in +%% KeyTbl: < a, b, d, f, h, i, z > +%% +%% We start with IndexReg=0 and I=4 +%% < a, b, d, f, h, i, z > +%% ^ ^ +%% IndexReg + I +%% +%% 'f' < 'h' so we add I to IndexReg and divide I with 2 +%% IndexReg=4 and I=2 +%% < a, b, d, f, h, i, z > +%% ^ ^ +%% IndexReg + I +%% +%% 'i' > 'h' so we keep IndexReg and divide I with 2 +%% IndexReg=4 and I=1 +%% < a, b, d, f, h, i, z > +%% ^ ^ +%% IndexReg+ I +%% Now we have found 'h' so we add I to IndexReg -> 5 +%% And we can load switch to the label at position 5 in +%% the label table. +%% +%% Now since the wordsize is 4 all numbers above are +%% Multiples of 4. + +tab(KeyList, LabelList, KeyReg, TablePntrReg, Default) -> + %% Calculate the size of the table: + %% the number of keys * wordsize + LastOffset = (length(KeyList)-1)*?WORDSIZE, + + %% Calculate the power of two closest to the size of the table. + Pow2 = 1 bsl trunc(math:log(LastOffset) / math:log(2)), + + %% Create some registers and lables that we need + IndexReg = hipe_rtl:mk_new_reg_gcsafe(), + Temp = hipe_rtl:mk_new_reg_gcsafe(), + Temp2 = hipe_rtl:mk_new_reg_gcsafe(), + Lab1 = hipe_rtl:mk_new_label(), + Lab2 = hipe_rtl:mk_new_label(), + Lab3 = hipe_rtl:mk_new_label(), + Lab4 = hipe_rtl:mk_new_label(), + + %% Calculate the position to start looking at + Init = (LastOffset)-Pow2, + + %% Create the code + [ + hipe_rtl:mk_move(IndexReg,hipe_rtl:mk_imm(0)), + hipe_rtl:mk_load(Temp,TablePntrReg,hipe_rtl:mk_imm(Init)), + hipe_rtl:mk_branch(Temp, geu, KeyReg, + hipe_rtl:label_name(Lab2), + hipe_rtl:label_name(Lab1), 0.5), + Lab1, + hipe_rtl:mk_alu(IndexReg, IndexReg, add, hipe_rtl:mk_imm(Init+?WORDSIZE)), + hipe_rtl:mk_goto(hipe_rtl:label_name(Lab2)), + Lab2] ++ + + step(Pow2 div 2, TablePntrReg, IndexReg, KeyReg) ++ + + [hipe_rtl:mk_branch(IndexReg, gt, hipe_rtl:mk_imm(LastOffset), + hipe_rtl:label_name(Default), + hipe_rtl:label_name(Lab3), 0.5), + Lab3, + hipe_rtl:mk_load(Temp2,TablePntrReg,IndexReg), + hipe_rtl:mk_branch(Temp2, eq, KeyReg, + hipe_rtl:label_name(Lab4), + hipe_rtl:label_name(Default), 0.9), + Lab4, + hipe_rtl:mk_alu(IndexReg, IndexReg, sra, + hipe_rtl:mk_imm(hipe_rtl_arch:log2_word_size())), + hipe_rtl:mk_sorted_switch(IndexReg, LabelList, KeyList) + ]. + +step(I,TablePntrReg,IndexReg,KeyReg) -> + Temp = hipe_rtl:mk_new_reg_gcsafe(), + TempIndex = hipe_rtl:mk_new_reg_gcsafe(), + Lab1 = hipe_rtl:mk_new_label(), + Lab2 = hipe_rtl:mk_new_label(), + [hipe_rtl:mk_alu(TempIndex, IndexReg, add, hipe_rtl:mk_imm(I)), + hipe_rtl:mk_load(Temp,TablePntrReg,TempIndex), + hipe_rtl:mk_branch(Temp, gtu, KeyReg, + hipe_rtl:label_name(Lab2), + hipe_rtl:label_name(Lab1) , 0.5), + Lab1] ++ + case ?WORDSIZE of + I -> %% Recursive base case + [hipe_rtl:mk_alu(IndexReg, IndexReg, add, hipe_rtl:mk_imm(I)), + hipe_rtl:mk_goto(hipe_rtl:label_name(Lab2)), + Lab2 + ]; + _ -> %% Recursion case + [hipe_rtl:mk_move(IndexReg, TempIndex), + hipe_rtl:mk_goto(hipe_rtl:label_name(Lab2)), + Lab2 + | step(I div 2, TablePntrReg, IndexReg, KeyReg) + ] + end. + +%%------------------------------------------------------------------------- + +lbls_from_cases([{_,L}|Rest], VarMap) -> + {Map,VarMap1} = lbls_from_cases(Rest, VarMap), + {RtlL, VarMap2} = hipe_rtl_varmap:icode_label2rtl_label(L,VarMap1), + %% {[{label,hipe_rtl:label_name(RtlL)}|Map],VarMap2}; + {[hipe_rtl:label_name(RtlL)|Map],VarMap2}; +lbls_from_cases([], VarMap) -> + {[], VarMap}. + +%%------------------------------------------------------------------------- + +split_cases(L) -> + split_cases(L, [], []). + +split_cases([], Vs, Ls) -> {lists:reverse(Vs),lists:reverse(Ls)}; +split_cases([{V,L}|Rest], Vs, Ls) -> + split_cases(Rest, [hipe_icode:const_value(V)|Vs], [L|Ls]). + +%%------------------------------------------------------------------------- +%% +%% {switch_tuple_arity,X,Fail,N,[{A1,L1},...,{AN,LN}]} +%% +%% if not boxed(X) goto Fail +%% Hdr := *boxed_val(X) +%% switch_int(Hdr,Fail,[{H(A1),L1},...,{H(AN),LN}]) +%% where H(Ai) = make_arityval(Ai) +%% +%%------------------------------------------------------------------------- + +gen_switch_tuple(I, Map, ConstTab, _Options) -> + Var = hipe_icode:switch_tuple_arity_term(I), + {X, Map1} = hipe_rtl_varmap:icode_var2rtl_var(Var, Map), + Fail0 = hipe_icode:switch_tuple_arity_fail_label(I), + {Fail1, Map2} = hipe_rtl_varmap:icode_label2rtl_label(Fail0, Map1), + FailLab = hipe_rtl:label_name(Fail1), + {Cases, Map3} = + lists:foldr(fun({A,L}, {Rest,M}) -> + {L1,M1} = hipe_rtl_varmap:icode_label2rtl_label(L, M), + L2 = hipe_rtl:label_name(L1), + A1 = hipe_icode:const_value(A), + H1 = hipe_tagscheme:mk_arityval(A1), + {[{H1,L2}|Rest], M1} end, + {[], Map2}, + hipe_icode:switch_tuple_arity_cases(I)), + Hdr = hipe_rtl:mk_new_reg_gcsafe(), + IsBoxedLab = hipe_rtl:mk_new_label(), + {[hipe_tagscheme:test_is_boxed(X, hipe_rtl:label_name(IsBoxedLab), + FailLab, 0.9), + IsBoxedLab, + hipe_tagscheme:get_header(Hdr, X) | + gen_switch_int(Hdr, FailLab, Cases)], + Map3, ConstTab}. + +%% +%% RTL-level switch-on-int +%% + +gen_switch_int(X, FailLab, [{C,L}|Rest]) -> + NextLab = hipe_rtl:mk_new_label(), + [hipe_rtl:mk_branch(X, eq, hipe_rtl:mk_imm(C), L, + hipe_rtl:label_name(NextLab), 0.5), + NextLab | + gen_switch_int(X, FailLab, Rest)]; +gen_switch_int(_, FailLab, []) -> + [hipe_rtl:mk_goto(FailLab)]. + diff --git a/lib/hipe/rtl/hipe_rtl_primops.erl b/lib/hipe/rtl/hipe_rtl_primops.erl new file mode 100644 index 0000000000..560e0259f8 --- /dev/null +++ b/lib/hipe/rtl/hipe_rtl_primops.erl @@ -0,0 +1,1259 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2001-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Copyright (c) 2001 by Erik Johansson. All Rights Reserved +%% ==================================================================== +%% Filename : hipe_rtl_primops.erl +%% Purpose : +%% Notes : +%% History : * 2001-03-15 Erik Johansson (happi@it.uu.se): +%% Created. +%% +%% $Id$ +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +-module(hipe_rtl_primops). + +-export([gen_primop/3, gen_enter_primop/3, gen_call_builtin/6, + gen_enter_builtin/2]). + +%% -------------------------------------------------------------------- + +-include("../main/hipe.hrl"). +-include("../icode/hipe_icode_primops.hrl"). +-include("hipe_rtl.hrl"). +-include("hipe_literals.hrl"). + +%% -------------------------------------------------------------------- +%% Handling of known MFA builtins that are inline expanded + +gen_call_builtin(Fun, Dst, Args, IsGuard, Cont, Fail) -> + case Fun of + {erlang, apply, 3} -> + gen_apply(Dst, Args, Cont, Fail); + + {erlang, element, 2} -> + gen_element(Dst, Args, IsGuard, Cont, Fail); + + {erlang, self, 0} -> + gen_self(Dst, Cont); + + {erlang, is_tuple, 1} -> + gen_is_tuple(Dst, Args, Cont); + + {hipe_bifs, in_native, 0} -> + Dst1 = + case Dst of + [] -> %% The result is not used. + hipe_rtl:mk_new_var(); + [Dst0] -> Dst0 + end, + [hipe_rtl:mk_load_atom(Dst1, true), hipe_rtl:mk_goto(Cont)]; + + _ -> [] % not a builtin + end. + +%% (Recall that enters cannot occur within a catch-region in the same +%% function, so we do not need to consider fail-continuations here.) +%% TODO: should we inline expand more functions here? Cf. above. +gen_enter_builtin(Fun, Args) -> + case Fun of + {erlang, apply, 3} -> + gen_enter_apply(Args); + +%% TODO +%% {erlang, element, 2} -> +%% gen_enter_element(Args, IsGuard); + +%% TODO +%% {erlang, self, 0} -> +%% gen_enter_self(); + + {hipe_bifs, in_native, 0} -> + Dst = hipe_rtl:mk_new_var(), + [hipe_rtl:mk_load_atom(Dst, true), hipe_rtl:mk_return([Dst])]; + + _ -> [] % not a builtin + end. + +%% -------------------------------------------------------------------- +%% Generate code to jump to in case the inlined function fails. + +gen_fail_code(Fail, Type) -> + gen_fail_code(Fail, Type, false). + +gen_fail_code(Fail, Type, IsGuard) -> + case IsGuard of + true when Fail =/= [] -> + {Fail, []}; % go directly to target + false -> + NewLabel = hipe_rtl:mk_new_label(), + NewLabelName = hipe_rtl:label_name(NewLabel), + {NewLabelName, [NewLabel | fail_code(Fail, Type)]} + end. + +fail_code(Fail, Type) when is_atom(Type) -> + Var = hipe_rtl:mk_new_var(), + [hipe_rtl:mk_load_atom(Var, Type), + hipe_rtl_exceptions:gen_fail(error, [Var], Fail)]; +fail_code(Fail, {Type, Value}) when is_atom(Type) -> + Var = hipe_rtl:mk_new_var(), + [hipe_rtl:mk_load_atom(Var, Type), + hipe_rtl:mk_gctest(3), % room for a 2-tuple + gen_mk_tuple(Var,[Var,Value]), + hipe_rtl_exceptions:gen_fail(error, [Var], Fail)]. + +fp_fail_code(TmpFailLbl, FailLbl) -> + [TmpFailLbl | + hipe_rtl_arch:handle_fp_exception() ++ + [fail_code(FailLbl, badarith)]]. + +%% -------------------------------------------------------------------- +%% CALL PRIMOP +%% +%% @doc +%% Generates RTL code for primops. This is mostly a dispatch function. +%% Tail calls to primops (enter_fun, apply, etc.) are not handled here! +%% @end + +gen_primop({Op,Dst,Args,Cont,Fail}, IsGuard, ConstTab) -> + GotoCont = hipe_rtl:mk_goto(Cont), + case Op of + %% + %% Binary Syntax + %% + {hipe_bs_primop, BsOP} -> + {FailLabelName, FailCode1} = gen_fail_code(Fail, badarg, IsGuard), + {SysLimLblName, FailCode2} = gen_fail_code(Fail, system_limit, IsGuard), + {Code1,NewConstTab} = + hipe_rtl_binary:gen_rtl(BsOP, Dst, Args, Cont, FailLabelName, + SysLimLblName, ConstTab), + {[Code1,FailCode1,FailCode2], NewConstTab}; + %% + %% Other primops + %% + _ -> + Code = + case Op of + %% Arithmetic + '+' -> + %gen_extra_unsafe_add_2(Dst, Args, Cont); + gen_add_sub_2(Dst, Args, Cont, Fail, Op, add); + '-' -> + gen_add_sub_2(Dst, Args, Cont, Fail, Op, sub); + '*' -> + gen_mul_2(Dst, Args, Cont, Fail); + '/' -> + %% BIF call: am_Div -> nbif_div_2 -> erts_mixed_div + [hipe_rtl:mk_call(Dst, '/', Args, Cont, Fail, not_remote)]; + 'gen_add' -> + gen_general_add_sub(Dst, Args, Cont, Fail, '+'); + 'gen_sub' -> + gen_general_add_sub(Dst, Args, Cont, Fail, '-'); + 'unsafe_add' -> + %gen_extra_unsafe_add_2(Dst, Args, Cont); + gen_unsafe_add_sub_2(Dst, Args, Cont, Fail, '+', add); + 'extra_unsafe_add' -> + gen_extra_unsafe_add_2(Dst, Args, Cont); + 'unsafe_sub' -> + gen_unsafe_add_sub_2(Dst, Args, Cont, Fail, '-', sub); + 'extra_unsafe_sub' -> + gen_extra_unsafe_sub_2(Dst, Args, Cont); + %'unsafe_mul' -> + % gen_unsafe_mul_2(Dst, Args, Cont, Fail, '*'); + 'div' -> + %% BIF call: am_div -> nbif_intdiv_2 -> intdiv_2 + [hipe_rtl:mk_call(Dst, 'div', Args, Cont, Fail, not_remote)]; + 'rem' -> + %% BIF call: am_rem -> nbif_rem_2 -> rem_2 + [hipe_rtl:mk_call(Dst, 'rem', Args, Cont, Fail, not_remote)]; + 'band' -> + gen_bitop_2(Dst, Args, Cont, Fail, Op, 'and'); + 'bor' -> + gen_bitop_2(Dst, Args, Cont, Fail, Op, 'or'); + 'bxor' -> + gen_bitop_2(Dst, Args, Cont, Fail, Op, 'xor'); + 'bnot' -> + gen_bnot_2(Dst, Args, Cont, Fail, Op); + 'bsr'-> + %% BIF call: am_bsr -> nbif_bsr_2 -> bsr_2 + gen_bsr_2(Dst, Args, Cont, Fail, Op); + %[hipe_rtl:mk_call(Dst, 'bsr', Args, Cont, Fail, not_remote)]; + 'bsl' -> + %% BIF call: am_bsl -> nbif_bsl_2 -> bsl_2 + [hipe_rtl:mk_call(Dst, 'bsl', Args, Cont, Fail, not_remote)]; + unsafe_band -> + gen_unsafe_bitop_2(Dst, Args, Cont, 'and'); + unsafe_bor -> + gen_unsafe_bitop_2(Dst, Args, Cont, 'or'); + unsafe_bxor -> + gen_unsafe_bitop_2(Dst, Args, Cont, 'xor'); + unsafe_bnot -> + gen_unsafe_bnot_2(Dst, Args, Cont); + unsafe_bsr -> + gen_unsafe_bsr_2(Dst, Args, Cont); + unsafe_bsl -> + gen_unsafe_bsl_2(Dst, Args, Cont); + %%--------------------------------------------- + %% List handling + %%--------------------------------------------- + cons -> + case Dst of + [] -> %% The result is not used. + [GotoCont]; + [Dst1] -> + [gen_cons(Dst1, Args), GotoCont] + end; + unsafe_hd -> + case Dst of + [] -> %% The result is not used. + [GotoCont]; + [Dst1] -> + [gen_unsafe_hd(Dst1, Args), GotoCont] + end; + unsafe_tl -> + case Dst of + [] -> %% The result is not used. + [GotoCont]; + [Dst1] -> + [gen_unsafe_tl(Dst1, Args),GotoCont] + end; + %%--------------------------------------------- + %% Tuple handling + %%--------------------------------------------- + mktuple -> + case Dst of + [] -> %% The result is not used. + [GotoCont]; + [Dst1] -> + [gen_mk_tuple(Dst1, Args),GotoCont] + end; + #unsafe_element{index=N} -> + case Dst of + [] -> %% The result is not used. + [GotoCont]; + [Dst1] -> + [Tuple] = Args, + [gen_unsafe_element(Dst1, hipe_rtl:mk_imm(N), Tuple),GotoCont] + end; + #unsafe_update_element{index=N} -> + [Dst1] = Dst, + [Tuple, Value] = Args, + [gen_unsafe_update_element(Tuple, hipe_rtl:mk_imm(N), Value), + hipe_rtl:mk_move(Dst1, Tuple), + GotoCont]; + {element, [TupleInfo, IndexInfo]} -> + Dst1 = + case Dst of + [] -> %% The result is not used. + hipe_rtl:mk_new_var(); + [Dst0] -> Dst0 + end, + [Index, Tuple] = Args, + [gen_element_1(Dst1, Index, Tuple, IsGuard, Cont, Fail, + TupleInfo, IndexInfo)]; + + %%--------------------------------------------- + %% Apply-fixarity + %%--------------------------------------------- + #apply_N{arity = Arity} -> + gen_apply_N(Dst, Arity, Args, Cont, Fail); + + %%--------------------------------------------- + %% GC test + %%--------------------------------------------- + #gc_test{need = Need} -> + [hipe_rtl:mk_gctest(Need), GotoCont]; + + %%--------------------------------------------- + %% Process handling + %%--------------------------------------------- + redtest -> + [gen_redtest(1), GotoCont]; + %%--------------------------------------------- + %% Receives + %%--------------------------------------------- + check_get_msg -> + gen_check_get_msg(Dst, GotoCont, Fail); + next_msg -> + gen_next_msg(Dst, GotoCont); + select_msg -> + gen_select_msg(Dst, Cont); + clear_timeout -> + gen_clear_timeout(Dst, GotoCont); + set_timeout -> + %% BIF call: am_set_timeout -> nbif_set_timeout -> hipe_set_timeout + [hipe_rtl:mk_call(Dst, set_timeout, Args, Cont, Fail, not_remote)]; + suspend_msg -> + gen_suspend_msg(Dst, Cont); + %%--------------------------------------------- + %% Closures + %%--------------------------------------------- + call_fun -> + gen_call_fun(Dst, Args, Cont, Fail); + #mkfun{mfa=MFA, magic_num=MagicNum, index=Index} -> + case Dst of + [] -> %% The result is not used. + [GotoCont]; + _ -> + [gen_mkfun(Dst, MFA, MagicNum, Index, Args), GotoCont] + end; + #closure_element{n=N} -> + case Dst of + [] -> %% The result is not used. + [GotoCont]; + [Dst1] -> + [Closure] = Args, + [gen_closure_element(Dst1, hipe_rtl:mk_imm(N), Closure), + GotoCont] + end; + %%--------------------------------------------- + %% Floating point instructions. + %%--------------------------------------------- + fp_add -> + [Arg1, Arg2] = Args, + case Dst of + [] -> + hipe_rtl:mk_fp(hipe_rtl:mk_new_fpreg(), Arg1, 'fadd', Arg2); + [Dst1] -> + hipe_rtl:mk_fp(Dst1, Arg1, 'fadd', Arg2) + end; + fp_sub -> + [Arg1, Arg2] = Args, + case Dst of + [] -> + hipe_rtl:mk_fp(hipe_rtl:mk_new_fpreg(), Arg1, 'fsub', Arg2); + [Dst1] -> + hipe_rtl:mk_fp(Dst1, Arg1, 'fsub', Arg2) + end; + fp_mul -> + [Arg1, Arg2] = Args, + case Dst of + [] -> + hipe_rtl:mk_fp(hipe_rtl:mk_new_fpreg(), Arg1, 'fmul', Arg2); + [Dst1] -> + hipe_rtl:mk_fp(Dst1, Arg1, 'fmul', Arg2) + end; + fp_div -> + [Arg1, Arg2] = Args, + case Dst of + [] -> + hipe_rtl:mk_fp(hipe_rtl:mk_new_fpreg(), Arg1, 'fdiv', Arg2); + [Dst1] -> + hipe_rtl:mk_fp(Dst1, Arg1, 'fdiv', Arg2) + end; + fnegate -> + [Arg] = Args, + case Dst of + [] -> + hipe_rtl:mk_fp_unop(hipe_rtl:mk_new_fpreg(), Arg, 'fchs'); + [Dst1] -> + hipe_rtl:mk_fp_unop(Dst1, Arg, 'fchs') + end; + fclearerror -> + gen_fclearerror(); + fcheckerror -> + gen_fcheckerror(Cont, Fail); + conv_to_float -> + case Dst of + [] -> + gen_conv_to_float(hipe_rtl:mk_new_fpreg(), Args, Cont, Fail); + [Dst1] -> + gen_conv_to_float(Dst1, Args, Cont, Fail) + end; + unsafe_untag_float -> + [Arg] = Args, + case Dst of + [] -> + hipe_tagscheme:unsafe_untag_float(hipe_rtl:mk_new_fpreg(), + Arg); + [Dst1]-> + hipe_tagscheme:unsafe_untag_float(Dst1, Arg) + end; + unsafe_tag_float -> + [Arg] = Args, + case Dst of + [] -> + hipe_tagscheme:unsafe_tag_float(hipe_rtl:mk_new_var(), Arg); + [Dst1]-> + hipe_tagscheme:unsafe_tag_float(Dst1, Arg) + end; + + %% Only names listed above are accepted! MFA:s are not primops! + _ -> + erlang:error({bad_primop, Op}) + end, + {Code, ConstTab} + end. + +gen_enter_primop({Op, Args}, IsGuard, ConstTab) -> + case Op of + enter_fun -> + %% Tail-call to a closure must preserve tail-callness! + %% (Passing Continuation = [] to gen_call_fun/5 does this.) + Code = gen_call_fun([], Args, [], []), + {Code, ConstTab}; + + #apply_N{arity=Arity} -> + %% Tail-call to a closure must preserve tail-callness! + %% (Passing Continuation = [] to gen_apply_N/5 does this.) + Code = gen_apply_N([], Arity, Args, [], []), + {Code, ConstTab}; + + _ -> + %% All other primop tail calls are converted to call + return. + Dst = [hipe_rtl:mk_new_var()], + OkLab = hipe_rtl:mk_new_label(), + {Code,ConstTab1} = + gen_primop({Op,Dst,Args,hipe_rtl:label_name(OkLab),[]}, + IsGuard, ConstTab), + {Code ++ [OkLab, hipe_rtl:mk_return(Dst)], ConstTab1} + end. + + +%% -------------------------------------------------------------------- +%% ARITHMETIC +%% -------------------------------------------------------------------- + +%% +%% Inline addition & subtraction +%% + +gen_general_add_sub(Dst, Args, Cont, Fail, Op) -> + case Dst of + [] -> + [hipe_rtl:mk_call([hipe_rtl:mk_new_var()], + Op, Args, Cont, Fail, not_remote)]; + [Res] -> + [hipe_rtl:mk_call([Res], Op, Args, Cont, Fail, not_remote)] + end. + +gen_add_sub_2(Dst, Args, Cont, Fail, Op, AluOp) -> + [Arg1, Arg2] = Args, + GenCaseLabel = hipe_rtl:mk_new_label(), + case Dst of + [] -> + [hipe_tagscheme:test_two_fixnums(Arg1, Arg2, + hipe_rtl:label_name(GenCaseLabel))| + gen_op_general_case(hipe_rtl:mk_new_var(), + Op, Args, Cont, Fail, GenCaseLabel)]; + [Res] -> + [hipe_tagscheme:test_two_fixnums(Arg1, Arg2, + hipe_rtl:label_name(GenCaseLabel)), + hipe_tagscheme:fixnum_addsub(AluOp, Arg1, Arg2, Res, GenCaseLabel)| + gen_op_general_case(Res,Op, Args, Cont, Fail, GenCaseLabel)] + end. + +gen_unsafe_add_sub_2(Dst, Args, Cont, Fail, Op, AluOp) -> + [Arg1, Arg2] = Args, + case Dst of + [] -> + [hipe_rtl:mk_goto(Cont)]; + [Res] -> + case Fail of + []-> + GenCaseLabel = hipe_rtl:mk_new_label(), + [hipe_tagscheme:fixnum_addsub(AluOp, Arg1, Arg2, Res, GenCaseLabel)| + gen_op_general_case(Res,Op, Args, Cont, Fail, GenCaseLabel)]; + _ -> + [hipe_tagscheme:fixnum_addsub(AluOp, Arg1, Arg2, Res, + hipe_rtl:mk_label(Fail))] + end + end. + +gen_extra_unsafe_add_2(Dst, Args, Cont) -> + [Arg1, Arg2] = Args, + case Dst of + [] -> + [hipe_rtl:mk_goto(Cont)]; + [Res] -> + hipe_tagscheme:unsafe_fixnum_add(Arg1, Arg2, Res) + end. + +gen_extra_unsafe_sub_2(Dst, Args, Cont) -> + [Arg1, Arg2] = Args, + case Dst of + [] -> + [hipe_rtl:mk_goto(Cont)]; + [Res] -> + hipe_tagscheme:unsafe_fixnum_sub(Arg1, Arg2, Res) + end. + +gen_op_general_case(Res, Op, Args, Cont, Fail, GenCaseLabel) -> + [hipe_rtl:mk_goto(Cont), + GenCaseLabel, + hipe_rtl:mk_call([Res], Op, Args, Cont, Fail, not_remote)]. + +%% +%% Inline multiplication +%% + +gen_mul_2(Dst, Args, Cont, Fail) -> + [Arg1,Arg2] = Args, + GenCaseLabel = hipe_rtl:mk_new_label(), + {Res1,I2} = + case Dst of + [] -> + {hipe_rtl:mk_new_var(), []}; + [Res0] -> + {Res0, hipe_tagscheme:fixnum_mul(Arg1, Arg2, Res0, GenCaseLabel)} + end, + [hipe_tagscheme:test_two_fixnums(Arg1, Arg2, hipe_rtl:label_name(GenCaseLabel)), + I2, + %% BIF call: am_Times -> nbif_mul_2 -> erts_mixed_times + gen_op_general_case(Res1, '*', Args, Cont, Fail, GenCaseLabel)]. + +%% gen_unsafe_mul_2([Res], Args, Cont, Fail, Op) -> +%% [Arg1, Arg2] = Args, +%% GenCaseLabel = hipe_rtl:mk_new_label(), +%% [hipe_tagscheme:test_two_fixnums(Arg1, Arg2, +%% hipe_rtl:label_name(GenCaseLabel)), +%% hipe_tagscheme:fixnum_mul(Arg1, Arg2, Res, GenCaseLabel)| +%% gen_op_general_case(Res, Op, Args, Cont, Fail, GenCaseLabel)]. + +%% +%% Inline bitoperations. +%% Only works for band, bor and bxor. +%% The shift operations are too expensive to inline. +%% + +gen_bitop_2(Res, Args, Cont, Fail, Op, BitOp) -> + [Arg1, Arg2] = Args, + GenCaseLabel = hipe_rtl:mk_new_label(), + case Res of + [] -> %% The result is not used. + [hipe_tagscheme:test_two_fixnums(Arg1, Arg2, + hipe_rtl:label_name(GenCaseLabel))| + gen_op_general_case(hipe_rtl:mk_new_var(), + Op, Args, Cont, Fail, GenCaseLabel)]; + [Res0] -> + [hipe_tagscheme:test_two_fixnums(Arg1, Arg2, + hipe_rtl:label_name(GenCaseLabel)), + hipe_tagscheme:fixnum_andorxor(BitOp, Arg1, Arg2, Res0)| + gen_op_general_case(Res0, Op, Args, Cont, Fail, GenCaseLabel)] + end. + +gen_unsafe_bitop_2(Res, Args, Cont, BitOp) -> + case Res of + [] -> %% The result is not used. + [hipe_rtl:mk_goto(Cont)]; + [Res0] -> + [Arg1, Arg2] = Args, + [hipe_tagscheme:fixnum_andorxor(BitOp, Arg1, Arg2, Res0), + hipe_rtl:mk_goto(Cont)] + end. + +gen_bsr_2(Res, Args, Cont, Fail, Op) -> + [Arg1, Arg2] = Args, + GenCaseLabel = hipe_rtl:mk_new_label(), + case hipe_rtl:is_imm(Arg2) of + true -> + Val = hipe_tagscheme:fixnum_val(hipe_rtl:imm_value(Arg2)), + Limit = ?bytes_to_bits(hipe_rtl_arch:word_size()), + if + Val < Limit, Val >= 0 -> + case Res of + [] -> + FixLabel = hipe_rtl:mk_new_label(), + [hipe_tagscheme:test_fixnum(Arg1, + hipe_rtl:label_name(FixLabel), + hipe_rtl:label_name(GenCaseLabel), + 0.99), + FixLabel, + gen_op_general_case(hipe_rtl:mk_new_var(), Op, Args, Cont, Fail, + GenCaseLabel)]; + [Res0] -> + FixLabel = hipe_rtl:mk_new_label(), + [hipe_tagscheme:test_fixnum(Arg1, + hipe_rtl:label_name(FixLabel), + hipe_rtl:label_name(GenCaseLabel), + 0.99), + FixLabel, + hipe_tagscheme:fixnum_bsr(Arg1, Arg2, Res0), + gen_op_general_case(Res0, Op, Args, Cont, Fail, GenCaseLabel)] + end; + true -> + [hipe_rtl:mk_call(Res, 'bsr', Args, Cont, Fail, not_remote)] + end; + false -> + [hipe_rtl:mk_call(Res, 'bsr', Args, Cont, Fail, not_remote)] + end. + +gen_unsafe_bsr_2(Res, Args, Cont) -> + case Res of + [] -> %% The result is not used. + [hipe_rtl:mk_goto(Cont)]; + [Res0] -> + [Arg1, Arg2] = Args, + [hipe_tagscheme:fixnum_bsr(Arg1, Arg2, Res0), + hipe_rtl:mk_goto(Cont)] + end. + +gen_unsafe_bsl_2(Res, Args, Cont) -> + case Res of + [] -> %% The result is not used. + [hipe_rtl:mk_goto(Cont)]; + [Res0] -> + [Arg1, Arg2] = Args, + [hipe_tagscheme:fixnum_bsl(Arg1, Arg2, Res0), + hipe_rtl:mk_goto(Cont)] + end. + +%% +%% Inline not. +%% + +gen_bnot_2(Res, Args, Cont, Fail, Op) -> + [Arg] = Args, + GenCaseLabel = hipe_rtl:mk_new_label(), + case Res of + [] -> %% The result is not used. + FixLabel = hipe_rtl:mk_new_label(), + [hipe_tagscheme:test_fixnum(Arg, hipe_rtl:label_name(FixLabel), + hipe_rtl:label_name(GenCaseLabel), 0.99), + FixLabel, + gen_op_general_case(hipe_rtl:mk_new_var(), Op, Args, Cont, Fail, + GenCaseLabel)]; + + [Res0] -> + FixLabel = hipe_rtl:mk_new_label(), + [hipe_tagscheme:test_fixnum(Arg, hipe_rtl:label_name(FixLabel), + hipe_rtl:label_name(GenCaseLabel), 0.99), + FixLabel, + hipe_tagscheme:fixnum_not(Arg, Res0), + gen_op_general_case(Res0, Op, Args, Cont, Fail, GenCaseLabel)] + end. + +gen_unsafe_bnot_2(Res, Args, Cont) -> + case Res of + [] -> %% The result is not used. + [hipe_rtl:mk_goto(Cont)]; + [Res0] -> + [Arg1] = Args, + [hipe_tagscheme:fixnum_not(Arg1, Res0), + hipe_rtl:mk_goto(Cont)] + end. + + +%% -------------------------------------------------------------------- +%% + +%% +%% Inline cons +%% + +gen_cons(Dst, [Arg1, Arg2]) -> + Tmp = hipe_rtl:mk_new_reg(), + {GetHPInsn, HP, PutHPInsn} = hipe_rtl_arch:heap_pointer(), + WordSize = hipe_rtl_arch:word_size(), + HeapNeed = 2*WordSize, + [GetHPInsn, + hipe_rtl:mk_store(HP, hipe_rtl:mk_imm(0), Arg1), + hipe_rtl:mk_store(HP, hipe_rtl:mk_imm(WordSize), Arg2), + hipe_rtl:mk_move(Tmp, HP), + hipe_tagscheme:tag_cons(Dst, Tmp), + hipe_rtl:mk_alu(HP, HP, add, hipe_rtl:mk_imm(HeapNeed)), + PutHPInsn]. + +%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% -------------------------------------------------------------------- +%% Handling of closures... +%% -------------------------------------------------------------------- + +%% -------------------------------------------------------------------- +%% gen_mkfun +%% +%% The gc_test should have expanded to +%% unsigned needed = ERL_FUN_SIZE + num_free; +%% ErlFunThing* funp = (ErlFunThing *) HAlloc(p, needed); +%% +%% The code generated should do the equivalent of: +%% Copy arguments to the fun thing +%% Eterm* hp = funp->env; +%% for (i = 0; i < num_free; i++) { +%% *hp++ = reg[i]; +%% } +%% +%% Fill in fileds +%% funp->thing_word = HEADER_FUN; +%% funp->fe = fe; +%% funp->num_free = num_free; +%% funp->creator = p->id; +%% funp->native_code = fe->native_code; +%% Increase refcount +%% fe->refc++; +%% +%% Link to the process off_heap.funs list +%% funp->next = p->off_heap.funs; +%% p->off_heap.funs = funp; +%% +%% Tag the thing +%% return make_fun(funp); +%% +gen_mkfun([Dst], {_Mod, _FunId, _Arity} = MFidA, MagicNr, Index, FreeVars) -> + {GetHPInsn, HP, PutHPInsn} = hipe_rtl_arch:heap_pointer(), + NumFree = length(FreeVars), + + %% Copy arguments to the fun thing + %% Eterm* hp = funp->env; + %% for (i = 0; i < num_free; i++) { + %% *hp++ = reg[i]; + %% } + CopyFreeVarsCode = gen_free_vars(FreeVars, HP), + + %% Fill in fields + %% funp->thing_word = HEADER_FUN; + %% funp->fe = fe; + %% funp->num_free = num_free; + %% funp->creator = p->id; + %% funp->native_code = fe->native_code; + %% Increase refcount + %% fe->refc++; + SkeletonCode = gen_fun_thing_skeleton(HP, MFidA, NumFree, MagicNr, Index), + + %% Link to the process off_heap.funs list + %% funp->next = p->off_heap.funs; + %% p->off_heap.funs = funp; + LinkCode = gen_link_closure(HP), + + %% Tag the thing and increase the heap_pointer. + %% make_fun(funp); + WordSize = hipe_rtl_arch:word_size(), + HeapNeed = (?ERL_FUN_SIZE + NumFree) * WordSize, + TagCode = [hipe_tagscheme:tag_fun(Dst, HP), + %% AdjustHPCode + hipe_rtl:mk_alu(HP, HP, add, hipe_rtl:mk_imm(HeapNeed)), + PutHPInsn], + [[GetHPInsn | CopyFreeVarsCode], SkeletonCode, LinkCode, TagCode]. + + +gen_fun_thing_skeleton(FunP, FunName={_Mod,_FunId,Arity}, NumFree, + MagicNr, Index) -> + %% Assumes that funp == heap_pointer + %% Fill in fields + %% funp->thing_word = HEADER_FUN; + %% funp->fe = fe; + %% funp->num_free = num_free; + %% funp->creator = p->id; + %% funp->native_code = fe->native_code; + %% And creates a fe (at load time). + FeVar = hipe_rtl:mk_new_reg(), + PidVar = hipe_rtl:mk_new_reg_gcsafe(), + NativeVar = hipe_rtl:mk_new_reg(), + + [hipe_rtl:mk_load_address(FeVar, {FunName, MagicNr, Index}, closure), + store_struct_field(FunP, ?EFT_FE, FeVar), + load_struct_field(NativeVar, FeVar, ?EFE_NATIVE_ADDRESS), + store_struct_field(FunP, ?EFT_NATIVE_ADDRESS, NativeVar), + + store_struct_field(FunP, ?EFT_ARITY, hipe_rtl:mk_imm(Arity-NumFree)), + + gen_inc_refc(FeVar, ?EFE_REFC), + + store_struct_field(FunP, ?EFT_NUM_FREE, hipe_rtl:mk_imm(NumFree)), + load_p_field(PidVar, ?P_ID), + store_struct_field(FunP, ?EFT_CREATOR, PidVar), + store_struct_field(FunP, ?EFT_THING, hipe_tagscheme:mk_fun_header())]. + +gen_inc_refc(Ptr, Offset) -> + case ?ERTS_IS_SMP of + 0 -> gen_inc_refc_notsmp(Ptr, Offset); + 1 -> gen_inc_refc_smp(Ptr, Offset) + end. + +gen_inc_refc_notsmp(Ptr, Offset) -> + Refc = hipe_rtl:mk_new_reg(), + [load_struct_field(Refc, Ptr, Offset, int32), + hipe_rtl:mk_alu(Refc, Refc, add, hipe_rtl:mk_imm(1)), + store_struct_field(Ptr, Offset, Refc, int32)]. + +gen_inc_refc_smp(Ptr, Offset) -> + Refc = hipe_rtl:mk_new_reg(), + [hipe_rtl:mk_alu(Refc, Ptr, 'add', hipe_rtl:mk_imm(Offset)), + hipe_rtl:mk_call([], 'atomic_inc', [Refc], [], [], not_remote)]. + +gen_link_closure(FUNP) -> + case ?P_OFF_HEAP_FUNS of + [] -> gen_link_closure_non_private(FUNP); + _ -> gen_link_closure_private(FUNP) + end. + +gen_link_closure_private(FUNP) -> + %% Link to the process off_heap.funs list + %% funp->next = p->off_heap.funs; + %% p->off_heap.funs = funp; + FunsVar = hipe_rtl:mk_new_reg(), + + [load_p_field(FunsVar,?P_OFF_HEAP_FUNS), + hipe_rtl:mk_store(FUNP, hipe_rtl:mk_imm(?EFT_NEXT), FunsVar), + store_p_field(FUNP,?P_OFF_HEAP_FUNS)]. + +gen_link_closure_non_private(_FUNP) -> []. + +load_p_field(Dst,Offset) -> + hipe_rtl_arch:pcb_load(Dst, Offset). +store_p_field(Src, Offset) -> + hipe_rtl_arch:pcb_store(Offset, Src). + +store_struct_field(StructP, Offset, Src) -> + hipe_rtl:mk_store(StructP, hipe_rtl:mk_imm(Offset), Src). + +load_struct_field(Dest, StructP, Offset) -> + hipe_rtl:mk_load(Dest, StructP, hipe_rtl:mk_imm(Offset)). + +store_struct_field(StructP, Offset, Src, int32) -> + hipe_rtl:mk_store(StructP, hipe_rtl:mk_imm(Offset), Src, int32). + +load_struct_field(Dest, StructP, Offset, int32) -> + hipe_rtl:mk_load(Dest, StructP, hipe_rtl:mk_imm(Offset), int32, signed). + +gen_free_vars(Vars, HPReg) -> + HPVar = hipe_rtl:mk_new_var(), + WordSize = hipe_rtl_arch:word_size(), + [hipe_rtl:mk_alu(HPVar, HPReg, add, hipe_rtl:mk_imm(?EFT_ENV)) | + gen_free_vars(Vars, HPVar, 0, WordSize, [])]. + +gen_free_vars([Var|Vars], EnvPVar, Offset, WordSize, AccCode) -> + Code = hipe_rtl:mk_store(EnvPVar, hipe_rtl:mk_imm(Offset), Var), + gen_free_vars(Vars, EnvPVar, Offset + WordSize, WordSize, + [Code|AccCode]); +gen_free_vars([], _, _, _, AccCode) -> AccCode. + +%% ------------------------------------------------------------------ +%% +%% call_fun (also handles enter_fun when Continuation = []) + +gen_call_fun(Dst, ArgsAndFun, Continuation, Fail) -> + NAddressReg = hipe_rtl:mk_new_reg(), + ArityReg = hipe_rtl:mk_new_reg_gcsafe(), + [Fun|RevArgs] = lists:reverse(ArgsAndFun), + + %% {BadFunLabName, BadFunCode} = gen_fail_code(Fail, {badfun, Fun}), + Args = lists:reverse(RevArgs), + NonClosureLabel = hipe_rtl:mk_new_label(), + CallNonClosureLabel = hipe_rtl:mk_new_label(), + BadFunLabName = hipe_rtl:label_name(NonClosureLabel), + BadFunCode = + [NonClosureLabel, + hipe_rtl:mk_call([NAddressReg], + 'nonclosure_address', + [Fun, hipe_rtl:mk_imm(length(Args))], + hipe_rtl:label_name(CallNonClosureLabel), + Fail, + not_remote), + CallNonClosureLabel, + case Continuation of + [] -> + hipe_rtl:mk_enter(NAddressReg, Args, not_remote); + _ -> + hipe_rtl:mk_call(Dst, NAddressReg, Args, + Continuation, Fail, not_remote) + end], + + {BadArityLabName, BadArityCode} = gen_fail_code(Fail, {badarity, Fun}), + + CheckGetCode = + hipe_tagscheme:if_fun_get_arity_and_address(ArityReg, NAddressReg, + Fun, BadFunLabName, + 0.9), + CheckArityCode = check_arity(ArityReg, length(RevArgs), BadArityLabName), + CallCode = + case Continuation of + [] -> %% This is a tailcall + [hipe_rtl:mk_enter(NAddressReg, ArgsAndFun, not_remote)]; + _ -> %% Ordinary call + [hipe_rtl:mk_call(Dst, NAddressReg, ArgsAndFun, + Continuation, Fail, not_remote)] + end, + [CheckGetCode, CheckArityCode, CallCode, BadFunCode, BadArityCode]. + +check_arity(ArityReg, Arity, BadArityLab) -> + TrueLab1 = hipe_rtl:mk_new_label(), + [hipe_rtl:mk_branch(ArityReg, eq, hipe_rtl:mk_imm(Arity), + hipe_rtl:label_name(TrueLab1), BadArityLab, 0.9), + TrueLab1]. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% apply +%% +%% The tail call case is not handled here. + +gen_apply(Dst, Args = [_M,_F,_AppArgs], Cont, Fail) -> + %% Dst can be [Res] or []. + [hipe_rtl:mk_call(Dst, hipe_apply, Args, Cont, Fail, not_remote)]. + +gen_enter_apply(Args=[_M,_F,_AppArgs]) -> + %% 'apply' in tail-call context + [hipe_rtl:mk_enter(hipe_apply, Args, not_remote)]. + +%% +%% apply_N +%% also handles tailcall case (Cont=[]) +%% + +gen_apply_N(Dst, Arity, [M,F|CallArgs], Cont, Fail) -> + MM = hipe_rtl:mk_new_var(), + NotModuleLbl = hipe_rtl:mk_new_label(), + NotModuleLblName = hipe_rtl:label_name(NotModuleLbl), + Tuple = M, + Index = hipe_rtl:mk_imm(1), + IndexInfo = 1, + [hipe_tagscheme:element(MM, Index, Tuple, NotModuleLblName, unknown, IndexInfo), + gen_apply_N_common(Dst, Arity+1, MM, F, CallArgs ++ [M], Cont, Fail), + NotModuleLbl, + gen_apply_N_common(Dst, Arity, M, F, CallArgs, Cont, Fail)]. + +gen_apply_N_common(Dst, Arity, M, F, CallArgs, Cont, Fail) -> + CallLabel = hipe_rtl:mk_new_label(), + CodeAddress = hipe_rtl:mk_new_reg(), + [hipe_rtl:mk_call([CodeAddress], find_na_or_make_stub, + [M,F,hipe_rtl:mk_imm(hipe_tagscheme:mk_fixnum(Arity))], + hipe_rtl:label_name(CallLabel), + Fail, not_remote), + CallLabel, + case Cont of + [] -> % tailcall + hipe_rtl:mk_enter(CodeAddress, CallArgs, not_remote); + _ -> % recursive call + hipe_rtl:mk_call(Dst, CodeAddress, CallArgs, Cont, Fail, not_remote) + end]. + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% mkTuple +%% + +gen_mk_tuple(Dst, Elements) -> + {GetHPInsn, HP, PutHPInsn} = hipe_rtl_arch:heap_pointer(), + Arity = length(Elements), + WordSize = hipe_rtl_arch:word_size(), + HeapNeed = (Arity+1)*WordSize, + [GetHPInsn, + gen_tuple_header(HP, Arity), + set_tuple_elements(HP, WordSize, WordSize, Elements, []), + hipe_tagscheme:tag_tuple(Dst, HP), + hipe_rtl:mk_alu(HP, HP, add, hipe_rtl:mk_imm(HeapNeed)), + PutHPInsn]. + +set_tuple_elements(HP, Offset, WordSize, [Element|Elements], Stores) -> + Store = hipe_rtl:mk_store(HP, hipe_rtl:mk_imm(Offset), Element), + set_tuple_elements(HP, Offset+WordSize, WordSize, Elements, [Store|Stores]); +set_tuple_elements(_, _, _, [], Stores) -> + lists:reverse(Stores). + +%% +%% @doc Generate RTL code for the reduction test. +%% +gen_redtest(Amount) -> + {GetFCallsInsn, FCalls, PutFCallsInsn} = hipe_rtl_arch:fcalls(), + SuspendLabel = hipe_rtl:mk_new_label(), + StayLabel = hipe_rtl:mk_new_label(), + ContinueLabel = hipe_rtl:mk_new_label(), + [GetFCallsInsn, + hipe_rtl:mk_alub(FCalls, FCalls, 'sub', hipe_rtl:mk_imm(Amount), 'lt', + hipe_rtl:label_name(SuspendLabel), + hipe_rtl:label_name(StayLabel), 0.01), + SuspendLabel, + %% The suspend path should not execute PutFCallsInsn. + hipe_rtl:mk_call([], suspend_0, [], + hipe_rtl:label_name(ContinueLabel), [], not_remote), + StayLabel, + PutFCallsInsn, + ContinueLabel]. + +gen_self(Dst, Cont) -> + case Dst of + [] -> %% The result is not used. + [hipe_rtl:mk_goto(Cont)]; + [Dst1] -> + [load_p_field(Dst1, ?P_ID), + hipe_rtl:mk_goto(Cont)] + end. + +%% +%% @doc Generate is_tuple/1 test +%% +gen_is_tuple(Dst, [Arg], Cont) -> + GotoCont = hipe_rtl:mk_goto(Cont), + case Dst of + [] -> %% The result is not used. + [GotoCont]; + [Dst1] -> + TrueLabel = hipe_rtl:mk_new_label(), + FalseLabel = hipe_rtl:mk_new_label(), + [hipe_tagscheme:test_tuple(Arg, hipe_rtl:label_name(TrueLabel), + hipe_rtl:label_name(FalseLabel), 0.5), + TrueLabel, + hipe_rtl:mk_load_atom(Dst1, true), + GotoCont, + FalseLabel, + hipe_rtl:mk_load_atom(Dst1, false), + GotoCont] + end. + +%% +%% @doc Generate unsafe head +%% +gen_unsafe_hd(Dst, [Arg]) -> hipe_tagscheme:unsafe_car(Dst, Arg). + +%% +%% @doc Generate unsafe tail +%% +gen_unsafe_tl(Dst, [Arg]) -> hipe_tagscheme:unsafe_cdr(Dst, Arg). + +%% +%% element +%% +gen_element(Dst, Args, IsGuard, Cont, Fail) -> + Dst1 = + case Dst of + [] -> %% The result is not used. + hipe_rtl:mk_new_var(); + [Dst0] -> Dst0 + end, + [Index, Tuple] = Args, + gen_element_1(Dst1, Index, Tuple, IsGuard, Cont, Fail, unknown, unknown). + +gen_element_1(Dst, Index, Tuple, IsGuard, Cont, Fail, TupleInfo, IndexInfo) -> + {FailLblName, FailCode} = gen_fail_code(Fail, badarg, IsGuard), + [hipe_tagscheme:element(Dst, Index, Tuple, FailLblName, TupleInfo, IndexInfo), + hipe_rtl:mk_goto(Cont), + FailCode]. + +%% +%% unsafe element +%% +gen_unsafe_element(Dst, Index, Tuple) -> + case hipe_rtl:is_imm(Index) of + true -> hipe_tagscheme:unsafe_constant_element(Dst, Index, Tuple); + false -> ?EXIT({illegal_index_to_unsafe_element,Index}) + end. + +gen_unsafe_update_element(Tuple, Index, Value) -> + case hipe_rtl:is_imm(Index) of + true -> + hipe_tagscheme:unsafe_update_element(Tuple, Index, Value); + false -> + ?EXIT({illegal_index_to_unsafe_update_element,Index}) + end. + + +gen_closure_element(Dst, Index, Closure) -> + hipe_tagscheme:unsafe_closure_element(Dst, Index, Closure). + +%% +%% @doc Generate RTL code that writes a tuple header. +%% +gen_tuple_header(Ptr, Arity) -> + Header = hipe_tagscheme:mk_arityval(Arity), + hipe_rtl:mk_store(Ptr, hipe_rtl:mk_imm(0), hipe_rtl:mk_imm(Header)). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%% +%%% Receives + +gen_check_get_msg(Dsts, GotoCont, Fail) -> + gen_check_get_msg_outofline(Dsts, GotoCont, Fail). + +gen_clear_timeout([], GotoCont) -> + case ?ERTS_IS_SMP of + 0 -> gen_clear_timeout_notsmp(GotoCont); + 1 -> gen_clear_timeout_smp(GotoCont) + end. + +-ifdef(notdef). % for reference, currently unused +%%% check_get_msg is: +%%% if (!PEEK_MESSAGE(p)) goto Fail; +%%% Dst = ERL_MESSAGE_TERM(PEEK_MESSAGE(p)); +%%% i.e., +%%% ErlMessage **save = p->msg.save; +%%% ErlMessage *msg = *save; +%%% if (!msg) goto Fail; +%%% Dst = msg->m[0]; +gen_check_get_msg_inline(Dsts, GotoCont, Fail) -> + Save = hipe_rtl:mk_new_reg(), + Msg = hipe_rtl:mk_new_reg(), + TrueLbl = hipe_rtl:mk_new_label(), + [load_p_field(Save, ?P_MSG_SAVE), + load_struct_field(Msg, Save, 0), + hipe_rtl:mk_branch(Msg, eq, hipe_rtl:mk_imm(0), Fail, + hipe_rtl:label_name(TrueLbl), 0.1), + TrueLbl | + case Dsts of + [Dst] -> + [load_struct_field(Dst, Msg, ?MSG_MESSAGE), + GotoCont]; + [] -> % receive which throws away the message + [GotoCont] + end]. +-endif. + +%%% next_msg is: +%%% SAVE_MESSAGE(p); +%%% i.e., +%%% ErlMessage **save = p->msg.save; +%%% ErlMessage *msg = *save; +%%% ErlMessage **next = &msg->next; +%%% p->msg.save = next; +gen_next_msg([], GotoCont) -> + Save = hipe_rtl:mk_new_reg(), + Msg = hipe_rtl:mk_new_reg(), + Next = hipe_rtl:mk_new_reg(), + [load_p_field(Save, ?P_MSG_SAVE), + load_struct_field(Msg, Save, 0), + hipe_rtl:mk_alu(Next, Msg, 'add', hipe_rtl:mk_imm(?MSG_NEXT)), + store_p_field(Next, ?P_MSG_SAVE), + GotoCont]. + +%%% clear_timeout is: +%%% p->flags &= ~F_TIMO; JOIN_MESSAGE(p); +%%% i.e., +%%% p->flags &= ~F_TIMO; +%%% p->msg.save = &p->msg.first; +gen_clear_timeout_notsmp(GotoCont) -> + Flags1 = hipe_rtl:mk_new_reg(), + Flags2 = hipe_rtl:mk_new_reg_gcsafe(), + First = hipe_rtl:mk_new_reg_gcsafe(), + [load_p_field(Flags1, ?P_FLAGS), + hipe_rtl:mk_alu(Flags2, Flags1, 'and', hipe_rtl:mk_imm(bnot(?F_TIMO))), + store_p_field(Flags2, ?P_FLAGS), + hipe_rtl_arch:pcb_address(First, ?P_MSG_FIRST), + store_p_field(First, ?P_MSG_SAVE), + GotoCont]. + +gen_check_get_msg_outofline(Dsts, GotoCont, Fail) -> + RetLbl = hipe_rtl:mk_new_label(), + TrueLbl = hipe_rtl:mk_new_label(), + Tmp = hipe_rtl:mk_new_reg(), + TheNonValue = hipe_rtl:mk_imm(hipe_tagscheme:mk_non_value()), + [hipe_rtl_arch:call_bif([Tmp], check_get_msg, [], + hipe_rtl:label_name(RetLbl), []), + RetLbl, + hipe_rtl:mk_branch(Tmp, eq, TheNonValue, Fail, + hipe_rtl:label_name(TrueLbl), 0.1), + TrueLbl | + case Dsts of + [Dst] -> + [hipe_rtl:mk_move(Dst, Tmp), + GotoCont]; + [] -> % receive which throws away the message + [GotoCont] + end]. + +gen_clear_timeout_smp(GotoCont) -> + RetLbl = hipe_rtl:mk_new_label(), + [hipe_rtl_arch:call_bif([], clear_timeout, [], + hipe_rtl:label_name(RetLbl), []), + RetLbl, + GotoCont]. + +gen_select_msg([], Cont) -> + [hipe_rtl_arch:call_bif([], select_msg, [], Cont, [])]. + +gen_suspend_msg([], Cont) -> + [hipe_rtl:mk_call([], suspend_msg, [], Cont, [], not_remote)]. + +%% -------------------------------------------------------------------- +%% +%% Floating point handling +%% + +gen_fclearerror() -> + case ?P_FP_EXCEPTION of + [] -> + []; + Offset -> + Tmp = hipe_rtl:mk_new_reg(), + FailLbl = hipe_rtl:mk_new_label(), + ContLbl = hipe_rtl:mk_new_label(), + ContLblName = hipe_rtl:label_name(ContLbl), + [hipe_rtl_arch:pcb_load(Tmp, Offset), + hipe_rtl:mk_branch(Tmp, eq, hipe_rtl:mk_imm(0), ContLblName, + hipe_rtl:label_name(FailLbl), 0.9), + FailLbl, + hipe_rtl:mk_call([], 'fclearerror_error', [], [], [], not_remote), + hipe_rtl:mk_goto(ContLblName), + ContLbl] + end. + +gen_fcheckerror(ContLbl, FailLbl) -> + case ?P_FP_EXCEPTION of + [] -> + []; + Offset -> + Tmp = hipe_rtl:mk_new_reg(), + TmpFailLbl0 = hipe_rtl:mk_new_label(), + FailCode = fp_fail_code(TmpFailLbl0, FailLbl), + PreFailLbl = hipe_rtl:mk_new_label(), + hipe_rtl_arch:fwait() ++ + [hipe_rtl_arch:pcb_load(Tmp, Offset), + hipe_rtl:mk_branch(Tmp, eq, hipe_rtl:mk_imm(0), ContLbl, + hipe_rtl:label_name(PreFailLbl), 0.9), + PreFailLbl, + hipe_rtl_arch:pcb_store(Offset, hipe_rtl:mk_imm(0)), + hipe_rtl:mk_goto(hipe_rtl:label_name(TmpFailLbl0)) | + FailCode] + end. + +gen_conv_to_float(Dst, [Src], ContLbl, FailLbl) -> + case hipe_rtl:is_var(Src) of + true -> + Tmp = hipe_rtl:mk_new_var(), + TmpReg = hipe_rtl:mk_new_reg_gcsafe(), + TrueFixNum = hipe_rtl:mk_new_label(), + ContFixNum = hipe_rtl:mk_new_label(), + TrueFp = hipe_rtl:mk_new_label(), + ContFp = hipe_rtl:mk_new_label(), + ContBigNum = hipe_rtl:mk_new_label(), + TestFixNum = hipe_tagscheme:test_fixnum(Src, + hipe_rtl:label_name(TrueFixNum), + hipe_rtl:label_name(ContFixNum), + 0.5), + TestFp = hipe_tagscheme:test_flonum(Src, hipe_rtl:label_name(TrueFp), + hipe_rtl:label_name(ContFp), 0.5), + GotoCont = hipe_rtl:mk_goto(ContLbl), + TmpFailLbl0 = hipe_rtl:mk_new_label(), + FailCode = fp_fail_code(TmpFailLbl0, FailLbl), + + TestFixNum ++ + [TrueFixNum, + hipe_tagscheme:untag_fixnum(TmpReg, Src), + hipe_rtl:mk_fconv(Dst, TmpReg), + GotoCont, + ContFixNum] ++ + TestFp ++ + [TrueFp, + hipe_tagscheme:unsafe_untag_float(Dst, Src), + GotoCont, + ContFp] ++ + [hipe_rtl:mk_call([Tmp], conv_big_to_float, [Src], + hipe_rtl:label_name(ContBigNum), + hipe_rtl:label_name(TmpFailLbl0), not_remote)]++ + FailCode ++ + [ContBigNum, + hipe_tagscheme:unsafe_untag_float(Dst, Tmp)]; + _ -> + %% This must be an attempt to convert an illegal term. + [gen_fail_code(FailLbl, badarith)] + end. + diff --git a/lib/hipe/rtl/hipe_rtl_ssa.erl b/lib/hipe/rtl/hipe_rtl_ssa.erl new file mode 100644 index 0000000000..f55cc0dd5c --- /dev/null +++ b/lib/hipe/rtl/hipe_rtl_ssa.erl @@ -0,0 +1,93 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2004-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%%---------------------------------------------------------------------- +%% File : hipe_rtl_ssa.erl +%% Author : Kostis Sagonas +%% Created : 30 Jan 2004 +%% Purpose : Provides interface functions for converting RTL code into +%% SSA form and back using the generic SSA converter. +%%---------------------------------------------------------------------- + +-module(hipe_rtl_ssa). + +-export([uses_to_rename/1]). %% needed by hipe_rtl_ssa_const_prop + +%% The following defines are needed by the included file below +-define(CODE, hipe_rtl). +-define(CFG, hipe_rtl_cfg). +-define(LIVENESS, hipe_rtl_liveness). + +-include("hipe_rtl.hrl"). +-include("../ssa/hipe_ssa.inc"). + +%%---------------------------------------------------------------------- +%% Auxiliary operations which seriously differ between Icode and RTL. +%%---------------------------------------------------------------------- + +defs_to_rename(Statement) -> + Defs = hipe_rtl:defines(Statement), + [D || D <- Defs, not hipe_rtl_arch:is_precoloured(D)]. + +uses_to_rename(Statement) -> + Uses = hipe_rtl:uses(Statement), + [U || U <- Uses, not hipe_rtl_arch:is_precoloured(U)]. + +liveout_no_succ() -> + hipe_rtl_arch:live_at_return(). + +%----------------------------------------------------------------------- + +reset_var_indx() -> + hipe_gensym:set_var(rtl, hipe_rtl_arch:first_virtual_reg()). + +%%---------------------------------------------------------------------- + +is_fp_temp(Temp) -> + hipe_rtl:is_fpreg(Temp). + +mk_new_fp_temp() -> + hipe_rtl:mk_new_fpreg(). + +%----------------------------------------------------------------------- +%% Procedure : makePhiMove +%% Purpose : Create an RTL-specific version of a move instruction +%% depending on the type of the arguments. +%% Arguments : Dst, Src - the arguments of a Phi instruction that is +%% to be moved up the predecessor block as part +%% of the SSA un-convert phase. +%% Returns : Code +%% Note : ?CODE here is hipe_rtl +%%---------------------------------------------------------------------- + +makePhiMove(Dst, Src) -> + case hipe_rtl:is_fpreg(Dst) of + false -> + case hipe_rtl:is_fpreg(Src) of %% this test is just a sanity check + false -> + hipe_rtl:mk_move(Dst, Src) + end; + true -> + case hipe_rtl:is_fpreg(Src) of %% this test is just a sanity check + true -> + hipe_rtl:mk_fmove(Dst, Src) + end + end. + +%----------------------------------------------------------------------- diff --git a/lib/hipe/rtl/hipe_rtl_ssa_avail_expr.erl b/lib/hipe/rtl/hipe_rtl_ssa_avail_expr.erl new file mode 100644 index 0000000000..cae6da542f --- /dev/null +++ b/lib/hipe/rtl/hipe_rtl_ssa_avail_expr.erl @@ -0,0 +1,357 @@ +%%% +%%% %CopyrightBegin% +%%% +%%% Copyright Ericsson AB 2007-2009. All Rights Reserved. +%%% +%%% The contents of this file are subject to the Erlang Public License, +%%% Version 1.1, (the "License"); you may not use this file except in +%%% compliance with the License. You should have received a copy of the +%%% Erlang Public License along with this software. If not, it can be +%%% retrieved online at http://www.erlang.org/. +%%% +%%% Software distributed under the License is distributed on an "AS IS" +%%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%%% the License for the specific language governing rights and limitations +%%% under the License. +%%% +%%% %CopyrightEnd% +%%% +%%%------------------------------------------------------------------- +%%% File : hipe_rtl_ssa_avail_expr.erl +%%% Author : Per Gustafsson +%%% Description : A couple of optimizations on rtl_ssa +%%% 1. Remove unnecessary loads (Global) +%%% 2. Remove unnecessary stores (Local) +%%% 3. Remove unnecessary tag/untag operations +%%% +%%% Changed : 7 Feb 2007 by Per Gustafsson +%%%------------------------------------------------------------------- +-module(hipe_rtl_ssa_avail_expr). + +-export([cfg/1]). + +-include("../main/hipe.hrl"). +-include("hipe_rtl.hrl"). + +cfg(CFG) -> + CFG1 = remove_loads(CFG), + CFG2 = remove_stores(CFG1), + CFG3 = optimize_fixnums(CFG2), + hipe_rtl_ssa:remove_dead_code(CFG3). + +%%%============================================================================= +%%% +%%% Remove unnecessary loads +%%% +%%%============================================================================= + +remove_loads(CFG) -> + LoadsFun = fun spread_info/2, + Info=fix_point(CFG, LoadsFun), + pass_through(CFG, LoadsFun, Info). + +spread_info(Code, Info) -> + lists:foldl(fun do_instr/2, {[],Info}, Code). + +do_instr(Instr, {Acc,Info}) -> + case Instr of + #call{} -> + {Acc++[Instr], new_env()}; + #store{} -> + {Acc++[Instr], new_env()}; + #gctest{} -> + {Acc++[Instr], new_env()}; + #load{} -> + Dst = hipe_rtl:load_dst(Instr), + LoadType = {hipe_rtl:load_src(Instr), hipe_rtl:load_offset(Instr), + hipe_rtl:load_size(Instr), hipe_rtl:load_sign(Instr)}, + NewInstr = + case lookup_y(LoadType, Info) of + none -> + Instr; + Var -> + hipe_rtl:mk_move(Dst, Var) + end, + Fun = fun load_filter_fun/2, + {Acc++[NewInstr], insert(Dst,LoadType,remove_defines(Instr,Info,Fun))}; + _ -> + {Acc++[Instr],remove_defines(Instr,Info,fun load_filter_fun/2)} + end. + +load_filter_fun({X1,{X2,X3,_,_}},PreColDefs) -> + not (lists:member(X1,PreColDefs) or + lists:member(X2,PreColDefs) or + lists:member(X3,PreColDefs)). + +%%%============================================================================= +%%% +%%% Remove unnecessary stores (local optimization) +%%% +%%%============================================================================= + +remove_stores(CFG) -> + pass_through(CFG, fun remove_store/2, new_info()). + +remove_store(Code,_) -> + remove_store_from_bb(Code). + +remove_store_from_bb(Code) -> + remove_store_from_bb(lists:reverse(Code), new_env(), []). + +remove_store_from_bb([Instr|Instrs], Env, Acc) -> + {NewAcc, NewEnv} = + case Instr of + #call{} -> + {[Instr|Acc],new_env()}; + #gctest{} -> + {[Instr|Acc], new_env()}; + #store{} -> + Base = hipe_rtl:store_base(Instr), + Offset = hipe_rtl:store_offset(Instr), + Size = hipe_rtl:store_size(Instr), + StoreType = {Base, Offset, Size}, + case lookup_y(StoreType, Env) of + none -> + {[Instr|Acc], insert(StoreType, true, Env)}; + true -> + {Acc, Env} + end; + #load{} -> + {[Instr|Acc],new_env()}; + _ -> + {[Instr|Acc],remove_defines(Instr,Env,fun store_filter_fun/2)} + end, + remove_store_from_bb(Instrs, NewEnv, NewAcc); +remove_store_from_bb([], Env, Acc) -> + {Acc,Env}. + +store_filter_fun({{X1,X2,_},_},PreColDefs) -> + not (lists:member(X1,PreColDefs) or + lists:member(X2,PreColDefs)). + +%%%============================================================================= +%%% +%%% Optimize Fixnum Operations +%%% +%%%============================================================================= + +optimize_fixnums(CFG) -> + FixFun = fun fixnum_opt/2, + Info=fix_point(CFG, FixFun), + pass_through(CFG, FixFun, Info). + +fixnum_opt(Code,Info) -> + lists:foldl(fun do_fixnums/2, {[],Info}, Code). + +do_fixnums(Instr, {Acc,Env}) -> + case Instr of + #call{} -> + {Acc++[Instr],Env}; + #gctest{} -> + {Acc++[Instr],Env}; + #fixnumop{dst=Dst,src=Src} -> + case lookup_y(Src,Env) of + none -> + case lookup_x(Src,Env) of + none -> + case hipe_rtl_arch:is_precoloured(Src) or + hipe_rtl_arch:is_precoloured(Dst) of + true -> + {Acc++[Instr],Env}; %% To Avoid non ssa problems + false -> + {Acc++[Instr],insert(Dst,Src,Env)} + end; + OtherSrc -> + {Acc++[hipe_rtl:mk_move(Dst,OtherSrc)],Env} + end; + OtherDst -> + {Acc++[hipe_rtl:mk_move(Dst,OtherDst)],Env} + end; + _ -> + {Acc++[Instr],Env} + end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% Code handling functions +%% + +get_code_from_label(Label,CFG) -> + CurrentBB = hipe_rtl_cfg:bb(CFG, Label), + hipe_bb:code(CurrentBB). + +put_code_at_label(Label,Code,CFG) -> + NewBB = hipe_bb:mk_bb(Code), + hipe_rtl_cfg:bb_add(CFG, Label, NewBB). + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% The info environment. +%% An info environment is a mapping from labels to info_out +%% + +new_info() -> + gb_trees:empty(). + +get_info(Label,Info) -> + case gb_trees:lookup(Label, Info) of + {value, V} -> V; + none -> none + end. + +add_info(Label, NewInfo, OldInfo) -> + gb_trees:enter(Label, NewInfo, OldInfo). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% Simple worklist utility +%% + +add_succ_to_list(NewList, OldList) -> + RealNew = [New || New <- NewList, lists:member(New,OldList)], + OldList ++ RealNew. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% Generic Fixpoint Code +%% + +fix_point(CFG, Fun) -> + Start = hipe_rtl_cfg:start_label(CFG), + Info = new_info(), + fix_point([Start], CFG, Fun, Info). + +fix_point([Label|Labels], CFG, Fun, Info) -> + case initial_stage(Label,CFG,Fun,Info) of + {true, _, _} -> + fix_point(Labels, CFG, Fun, Info); + {false, _, NewInfoOut} -> + Succ = hipe_rtl_cfg:succ(CFG, Label), + NewList = add_succ_to_list(Succ, Labels), + NewInfo = add_info(Label, NewInfoOut, Info), + fix_point(NewList, CFG, Fun, NewInfo) + end; +fix_point([], _CFG, _Fun, Info) -> + Info. + +pass_through(CFG, Fun, Info) -> + pass_through(hipe_rtl_cfg:reverse_postorder(CFG), + CFG, Fun, Info). + +pass_through([Label|Labels], CFG, Fun, Info) -> + {_, NewCode, _} = initial_stage(Label,CFG,Fun,Info), + NewCFG = put_code_at_label(Label,NewCode,CFG), + pass_through(Labels, NewCFG, Fun, Info); +pass_through([], CFG, _Fun, _Info) -> + CFG. + +initial_stage(Label,CFG,Fun,Info) -> + OldInfoOut = get_info(Label,Info), + Pred = hipe_rtl_cfg:pred(CFG,Label), + InfoEnv = join([get_info(L,Info) || L <- Pred]), + OldCode = get_code_from_label(Label,CFG), + {PhiCode,Code} = split_code(OldCode), + InfoIn = join_phi(PhiCode,Info,InfoEnv), + {NewCode, NewInfoOut} = Fun(Code, InfoIn), + {OldInfoOut=:=NewInfoOut,PhiCode++NewCode, NewInfoOut}. + +join_phi([#phi{dst=Dst,arglist=AList}|Rest], Info, Env) -> + case lists:foldl(fun(Val,Acc) -> + check_label(Val,Info,Acc) + end, none, AList) of + no_val -> + join_phi(Rest,Info,Env); + none -> + join_phi(Rest,Info,Env); + Expr -> + join_phi(Rest,Info,insert(Dst,Expr,Env)) + end; +join_phi([], _Info, Env) -> + Env. + +check_label({Lbl,Var}, Info, Acc) -> + case gb_trees:lookup(Lbl,Info) of + none -> Acc; + {value,Env} -> + case lookup_x(Var,Env) of + none -> no_val; + Acc -> Acc; + V -> + if Acc =:= none -> V; + true -> no_val + end + end + end. + +split_code(Code) -> + Phis = extract_phis(Code), + {Phis,Code--Phis}. + +extract_phis(Code) -> + [I || #phi{}=I <- Code]. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% One2One Environment +%% + +new_env() -> + {gb_trees:empty(),gb_trees:empty()}. + +insert(X,Y,{XtoY,YtoX}) -> + NewYtoX = remove_old_binding(X,XtoY,YtoX), + NewXtoY = remove_old_binding(Y,YtoX,XtoY), + {gb_trees:enter(X,Y,NewXtoY), + gb_trees:enter(Y,X,NewYtoX)}. + +remove_old_binding(Key,LookupTree,ChangeTree) -> + case gb_trees:lookup(Key,LookupTree) of + none -> + ChangeTree; + {value,V} -> + gb_trees:balance(gb_trees:delete(V,ChangeTree)) + end. + +lookup_x(X,{XtoY,_YtoX}) -> + case gb_trees:lookup(X,XtoY) of + none -> none; + {value,Val} -> Val + end. + +lookup_y(Y,{_XtoY,YtoX}) -> + case gb_trees:lookup(Y,YtoX) of + none -> none; + {value,Val} -> Val + end. + +join([]) -> new_env(); +join([none]) -> new_env(); +join([E]) -> E; +join([E1,E2|Rest]) -> join([join(E1,E2)|Rest]). + +join({MapXY1,MapYX1},{MapXY2,MapYX2}) -> + {join_maps(MapXY1,MapXY2), + join_maps(MapYX1,MapYX2)}; +join(none,E) -> E; +join(E,none) -> E. + +join_maps(Map1,Map2) -> + OrdDict = ordsets:intersection(gb_trees:to_list(Map1), + gb_trees:to_list(Map2)), + gb_trees:from_orddict(OrdDict). + +remove_defines(Instr,Info,Fun) -> + Defs = hipe_rtl:defines(Instr), + case [Def || Def <- Defs, hipe_rtl_arch:is_precoloured(Def)] of + [] -> + Info; + PreColDefs -> + filter_environments(PreColDefs,Info,Fun) + end. + +filter_environments(PreColDefs,{M1,_M2},Fun) -> + L1 = gb_trees:to_list(M1), + F1 = [Tup || Tup <- L1, Fun(Tup,PreColDefs)], + F2 = [{Y,X} || {X,Y} <- F1], + {gb_trees:from_orddict(F1),gb_trees:from_orddict(orddict:from_list(F2))}. diff --git a/lib/hipe/rtl/hipe_rtl_ssa_const_prop.erl b/lib/hipe/rtl/hipe_rtl_ssa_const_prop.erl new file mode 100644 index 0000000000..76c0a88933 --- /dev/null +++ b/lib/hipe/rtl/hipe_rtl_ssa_const_prop.erl @@ -0,0 +1,1082 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2004-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% ============================================================================ +%% Filename : hipe_rtl_ssa_const_prop.erl +%% Authors : Bjorn Bergman, Bjarni Juliusson +%% Purpose : Perform sparse conditional constant propagation on RTL. +%% Notes : Works on an SSA-converted control-flow graph. +%% +%% History : * 2004-03-14: Blatantly stolen from Icode (code by +%% Daniel Luna and Erik Andersson) and query-replaced for RTL. +%% * 2004-04-30: Added in the repository. +%% ============================================================================ +%% +%% Exports: propagate/1. +%% +%% ============================================================================ +%% +%% Some things to note: +%% +%% 1. All precoloured registers are assumed to contain bottom. We can not +%% do anything with them since they are not in SSA-form. This might be +%% possible to resolve in some way, but we decided to not go there. +%% +%% 2. const_labels are assumed to be bottom, we can not find the address +%% in any nice way (that I know of, maybe someone can help ?). I +%% suppose they don't get a value until linking (or some step that +%% resembles it). They are only affecting bignums and floats (at least +%% as far as I can tell), which are both stored in memory and hence +%% not handled very well by us anyway. +%% +%% 3. can v <- Constant be removed ? I think so. all uses of v will be +%% replaced with an immediate. So why not ? +%% +%% ============================================================================ +%% +%% TODO: +%% +%% Take care of failures in call and replace operation with apropriate +%% failure. +%% +%% Handle ifs with non-binary operators +%% +%% We want multisets for easier (and faster) creation of env->ssa_edges +%% +%% Propagation of constant arguments when some of the arguments are bottom +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + +-module(hipe_rtl_ssa_const_prop). +-export([propagate/1]). + +-include("../main/hipe.hrl"). +-include("hipe_rtl.hrl"). +-include("../flow/cfg.hrl"). + +%-define(DEBUG, true). + +-ifdef(DEBUG). +-define(SCCPDBG(W), W). +-define(DEBUG_TST, true). % make sure that we can use ?DEBUG in if-cases... +-else. +-define(DEBUG_TST, false). % make sure that we can use ?DEBUG in if-cases... +-define(SCCPDBG(W), ok). +-endif. + +%%----------------------------------------------------------------------------- +%% Include stuff shared between SCCP on Icode and RTL. +%% NOTE: Needs to appear after DEBUG is possibly defined. +%%----------------------------------------------------------------------------- + +-define(CODE, hipe_rtl). +-define(CFG, hipe_rtl_cfg). +-include("../ssa/hipe_ssa_const_prop.inc"). + +-type bool_lattice() :: 'true' | 'false' | 'top' | 'bottom'. +-type conditional() :: 'eq' | 'ne' | 'ge' | 'geu' | 'gt' | 'gtu' | 'le' + | 'leu' | 'lt' | 'ltu' | 'overflow' | 'not_overflow'. + +%%----------------------------------------------------------------------------- +%% Procedure : visit_expression/2 +%% Purpose : do a symbolic execution of the given instruction. This is just +%% a wrapper that chooses the right function to handle a particular +%% instruction. +%% Arguments : Instructions - the instruction +%% Environment - have a guess. +%% Returns : {FlowWorkList, SSAWorkList, Environment} +%%----------------------------------------------------------------------------- +visit_expression(Instruction, Environment) -> + case Instruction of + #alu{} -> + visit_alu(Instruction, Environment); + #alub{} -> + visit_alub(Instruction, Environment); + #branch{} -> + visit_branch(Instruction, Environment); + #call{} -> + visit_call(Instruction, Environment); +%% #comment{} -> +%% visit_comment(Instruction, Environment); +%% #enter{} -> +%% visit_enter(Instruction, Environment); + #fconv{} -> + visit_fconv(Instruction, Environment); + #fixnumop{} -> + visit_fixnumop(Instruction, Environment); + #fload{} -> + visit_fload(Instruction, Environment); + #fmove{} -> + visit_fmove(Instruction, Environment); + #fp{} -> + visit_fp(Instruction, Environment); + #fp_unop{} -> + visit_fp_unop(Instruction, Environment); +%% #fstore{} -> +%% visit_fstore(Instruction, Environment); +%% #gctest{} -> +%% visit_gctest(Instruction, Environment); + #goto{} -> + visit_goto(Instruction, Environment); + #goto_index{} -> + visit_goto_index(Instruction, Environment); +%% #label{} -> +%% visit_label(Instruction, Environment); + #load{} -> + visit_load(Instruction, Environment); + #load_address{} -> + visit_load_address(Instruction, Environment); + #load_atom{} -> + visit_load_atom(Instruction, Environment); + #load_word_index{} -> + visit_load_word_index(Instruction, Environment); + #move{} -> + visit_move(Instruction, Environment); + #multimove{} -> + visit_multimove(Instruction, Environment); +%% phi-nodes are handled in scc +%% #phi{} -> +%% visit_phi(Instruction, Environment); +%% #return{} -> +%% visit_return(Instruction, Environment); +%% #store{} -> +%% visit_store(Instruction, Environment); + #switch{} -> + visit_switch(Instruction, Environment); + _ -> + %% label, end_try, comment, return, fail, et al + {[], [], Environment} + end. + + +%%----------------------------------------------------------------------------- +%% Procedure : set_to/3 +%% Purpose : many of the visit_ functions ends in a update of the +%% environment (and resulting SSA-edges) this function does the +%% update in a nice way and formats the result so that it can be +%% imediatly returned to visit_expression +%% Arguments : Dst - the destination may be a list of destinations. +%% Val - the new value (bottom, or some constant). +%% Env - the environment in which the update should be done. +%% Returns : { FlowWorkList, SSAWorkList, NewEnvironment} +%%----------------------------------------------------------------------------- + +set_to(Dst, Val, Env) -> + {Env1, SSAWork} = update_lattice_value({Dst, Val}, Env), + {[], SSAWork, Env1}. + +%%----------------------------------------------------------------------------- +%% Procedure : visit_branch/2 +%% Purpose : do symbolic exection of branch instructions. +%% Arguments : Inst - The instruction +%% Env - The environment +%% Returns : { FlowWorkList, SSAWorkList, NewEnvironment} +%%----------------------------------------------------------------------------- + +visit_branch(Inst, Env) -> %% Titta också på exekverbarflagga + Val1 = lookup_lattice_value(hipe_rtl:branch_src1(Inst), Env), + Val2 = lookup_lattice_value(hipe_rtl:branch_src2(Inst), Env), + CFGWL = case evaluate_relop(Val1, hipe_rtl:branch_cond(Inst), Val2) of + true -> [hipe_rtl:branch_true_label(Inst)]; + false -> [hipe_rtl:branch_false_label(Inst)]; + bottom -> [hipe_rtl:branch_true_label(Inst), + hipe_rtl:branch_false_label(Inst)]; + top -> [] + end, + {CFGWL, [], Env}. + +%%----------------------------------------------------------------------------- +%% Procedure : evaluate_relop/3 +%% Purpose : evaluate the given relop. While taking care to handle top & +%% bottom in some sane way. +%% Arguments : Val1, Val2 - The operands Integers or top or bottom +%% RelOp - some relop atom from rtl. +%% Returns : bottom, top, true or false +%%----------------------------------------------------------------------------- + +evaluate_relop(Val1, RelOp, Val2) -> + if + (Val1==bottom) or (Val2==bottom) -> bottom ; + (Val1==top) or (Val2==top) -> top; + true -> hipe_rtl_arch:eval_cond(RelOp, Val1, Val2) + end. + +%%----------------------------------------------------------------------------- +%% Procedure : evaluate_fixnumop/2 +%% Purpose : try to evaluate a fixnumop. +%% Arguments : Val1 - operand (an integer, 'top' or 'bottom') +%% Op - the operation. +%% Returns : Result +%% where result is an integer, 'top' or 'bottom' +%%----------------------------------------------------------------------------- + +evaluate_fixnumop(Val1, Op) -> + if Val1 =:= top -> + top; + Val1 =:= bottom -> + bottom; + is_integer(Val1) -> + case Op of + tag -> + hipe_tagscheme:mk_fixnum(Val1); + untag -> + hipe_tagscheme:fixnum_val(Val1) + end + end. + +%%----------------------------------------------------------------------------- +%% Procedure : visit_alu/2 +%% Purpose : do symbolic exection of a alu +%% Arguments : Inst - The instruction +%% Env - The environment +%% Returns : { FlowWorkList, SSAWorkList, NewEnvironment} +%%----------------------------------------------------------------------------- + +visit_alu(Inst, Env) -> + Val1 = lookup_lattice_value(hipe_rtl:alu_src1(Inst), Env), + Val2 = lookup_lattice_value(hipe_rtl:alu_src2(Inst), Env), + {NewVal, _, _, _, _} = evaluate_alu(Val1, hipe_rtl:alu_op(Inst), Val2), + set_to(hipe_rtl:alu_dst(Inst), NewVal, Env). + +%% Here follows the alu-evaluation stuff. This is the most involved part I +%% guess. The function that you may want to use is evaluate_alu/3. The +%% evaluation functions returns +%% { Result, SignFlag, ZeroFlag, Overflow flag, CarryBit} +%% it uses some helpers which are explained breifly: +%% lattice_meet/2 - handles the general case of most alu-operations, called +%% when at least one of the operands is nonconstant, and the +%% operation-specifics have been taken care of. +%% all_ones/0 - returns the value of a rtl-word set to all 1 bits. +%% partial_eval_alu - tries to catch some operation specific special cases +%% when one (or both) of the operands is nonconstant. + +lattice_meet(Val1, Val2) -> + M = if (Val1 =:= top) or (Val2 =:= top) -> top; + (Val1 =:= bottom) or (Val2 =:= bottom) -> bottom + % the check is realy just sanity + end, + {M, M, M, M, M}. + +all_ones() -> + (1 bsl ?bytes_to_bits(hipe_rtl_arch:word_size())) - 1. + +%% when calling partial_eval*() we know that at least one of the Values +%% are bottom or top. They return { Value, Sign, Zero, Overflow, Carry }. +%% (just like hipe_rtl_arch:eval_alu) + +%% logic shifts are very similar each other. Limit is the number of +%% bits in the words. +partial_eval_shift(Limit, Val1, Val2) -> + if + Val2 =:= 0 -> {Val1, Val1, Val1, Val1, Val1}; + Val1 =:= 0 -> {0, false, true, false, false}; + is_integer(Val2), Val2 >= Limit -> % (Val2 =/= top) and (Val2 =/= bottom) + {0, false, true, Val1, Val1}; % OVerflow & carry we dont know about. + true -> lattice_meet(Val1, Val2) + end. + +%%----------------------------------------------------------------------------- +%% Procedure : partial_eval_alu/3 +%% Purpose : try to evaluate as much as possible an alu operation where at +%% least one of the operands is not constant. +%% Arguments : Val1, Val2 - operands (integer, top or bottom) +%% Op - the operation. +%% Returns : {Result, Sign, Zero, Overflow, Carry} +%% where Result is an integer, 'top' or 'bottom' +%% and the others are bool, 'top' or 'bottom'. +%%----------------------------------------------------------------------------- + +partial_eval_alu(Val1, add, Val2) -> + if + (Val1 == 0) -> {Val2, Val2, Val2, false, false}; + (Val2 == 0) -> {Val1, Val1, Val1, false, false}; + true -> lattice_meet(Val1, Val2) + end; +partial_eval_alu(Val1, sub, Val2) -> + if + (Val2 == 0) -> {Val1, Val1, Val1, false, false}; + true -> lattice_meet(Val1, Val2) + end; +partial_eval_alu(Val1, 'or', Val2) -> + All_ones = all_ones(), + if + (Val1 == 0) -> {Val2, Val2, Val2, false, false}; + (Val2 == 0) -> {Val1, Val1, Val1, false, false}; + (Val1 == All_ones) or (Val2 == All_ones) -> + {All_ones, true, false, false, false}; + true -> lattice_meet(Val1, Val2) + end; +partial_eval_alu(Val1, 'and', Val2) -> + All_ones = all_ones(), + if + Val1 == All_ones -> {Val2, Val2, Val2, false, false}; + Val2 == All_ones -> {Val1, Val1, Val1, false, false}; + (Val1 == 0) or (Val2 == 0) -> {0, false, true, false, false}; + true -> lattice_meet(Val1, Val2) + end; +partial_eval_alu(Val1, 'xor', Val2) -> + if + (Val1 == 0) -> {Val2, Val2, Val2, false, false}; + (Val2 == 0) -> {Val1, Val1, Val1, false, false}; + true -> lattice_meet(Val1, Val2) + end; +partial_eval_alu(Val1, 'xornot', Val2) -> + All_ones = all_ones(), + if + Val1 == All_ones -> {Val2, Val2, Val2, false, false}; + Val2 == All_ones -> {Val1, Val1, Val1, false, false}; + true -> lattice_meet(Val1, Val2) + end; +partial_eval_alu(Val1, andnot, Val2) -> + All_ones = all_ones(), + if + (Val2 == 0) -> {Val1, Val1, Val1, false, false}; + (Val1 == 0) or (Val2 == All_ones) -> {0, false, true, false, false}; + true -> lattice_meet(Val1, Val2) + end; +partial_eval_alu(Val1, Op, Val2) when (Op =:= 'sll') or (Op =:= 'srl') -> + BitSize = ?bytes_to_bits(hipe_rtl_arch:word_size()), + partial_eval_shift(BitSize, Val1, Val2); +partial_eval_alu(Val1, Op, Val2) when (Op =:= 'sllx') or (Op =:= 'srlx') -> + partial_eval_shift(64, Val1, Val2); +partial_eval_alu(Val1, mul, Val2) -> lattice_meet(Val1, Val2); % XXX: suboptimal + +% arithmetic shifts are more tricky, shifting something unknown can +% generate all_ones() and 0 depenging on the sign of Val1. +partial_eval_alu(Val1, Op, Val2) when (Op =:= 'sra') or (Op =:= 'srax') -> + if + (Val2 == 0) -> {Val1, Val1, Val1, false, false}; + (Val1 == 0) -> {0, false, true, false, false}; + true -> lattice_meet(Val1, Val2) + end. + +%%----------------------------------------------------------------------------- +%% Procedure : evaluate_alu/3 +%% Purpose : try to evaluate as much as possible of a alu operation. +%% Arguments : Val1, Val2 - operands (an integer, 'top' or 'bottom') +%% Op - the operation. +%% Returns : {Result, Sign, Zero, Overflow, Carry} +%% where result is an integer, 'top' or 'bottom' +%% and the others are Bool, 'top' or 'bottom'. +%%----------------------------------------------------------------------------- + +evaluate_alu(Val1, Op, Val2) -> + if + (Val1 =:= top) or (Val2 =:= top) or + (Val1 =:= bottom) or (Val2 =:= bottom) -> partial_eval_alu(Val1, Op, Val2); + true -> + case Op of + sllx -> hipe_rtl_arith_64:eval_alu('sll', Val1, Val2); + srlx -> hipe_rtl_arith_64:eval_alu('srl', Val1, Val2); + srax -> hipe_rtl_arith_64:eval_alu('sra', Val1, Val2); + _ -> hipe_rtl_arch:eval_alu(Op, Val1, Val2) + end + end. + +maybe_top_or_bottom(List) -> + maybe_top_or_bottom(List, false). + +maybe_top_or_bottom([], TB) -> TB; +maybe_top_or_bottom([top | Rest], _) -> maybe_top_or_bottom(Rest, top); +maybe_top_or_bottom([bottom | _], _) -> bottom; +maybe_top_or_bottom([_ | Rest], TB) -> maybe_top_or_bottom(Rest, TB). + +-spec partial_eval_branch(conditional(), bool_lattice(), bool_lattice(), + bool_lattice() | 0, bool_lattice() | 0) -> + bool_lattice(). +partial_eval_branch(Cond, N0, Z0, V0, C0) -> + {N, Z, V, C} = + if Cond =:= 'eq'; + Cond =:= 'ne' -> {true, Z0, true, true}; + Cond =:= 'gt'; + Cond =:= 'le' -> {N0, Z0, V0, true}; + Cond =:= 'gtu' -> {true, Z0, true, C0 }; + Cond =:= 'lt'; + Cond =:= 'ge' -> {N0, true, V0, true}; + Cond =:= 'geu'; + Cond =:= 'ltu' -> {true, true, true, C0 }; + Cond =:= 'overflow'; + Cond =:= 'not_overflow' -> {true, true, V0, true} + end, + case maybe_top_or_bottom([N, Z, V, C]) of + false -> hipe_rtl_arch:eval_cond_bits(Cond, N, Z, V, C); + top -> top; + bottom -> bottom + end. + +%%----------------------------------------------------------------------------- +%% Procedure : visit_alub/2 +%% Purpose : do symbolic exection of a alub instruction +%% Arguments : Inst - The instruction +%% Env - The environment +%% Returns : { FlowWorkList, SSAWorkList, NewEnvironment} +%%----------------------------------------------------------------------------- + +visit_alub(Inst, Env) -> + Val1 = lookup_lattice_value(hipe_rtl:alub_src1(Inst), Env), + Val2 = lookup_lattice_value(hipe_rtl:alub_src2(Inst), Env), + {NewVal, N, Z, C, V} = evaluate_alu(Val1, hipe_rtl:alub_op(Inst), Val2), + Labels = + case NewVal of + bottom -> [hipe_rtl:alub_true_label(Inst), + hipe_rtl:alub_false_label(Inst)]; + top -> []; + _ -> + %if the partial branch cannot be evaluated we must execute the + % instruction at runtime. + case partial_eval_branch(hipe_rtl:alub_cond(Inst), N, Z, C, V) of + bottom -> [hipe_rtl:alub_true_label(Inst), + hipe_rtl:alub_false_label(Inst)]; + top -> []; + true -> [hipe_rtl:alub_true_label(Inst) ]; + false -> [hipe_rtl:alub_false_label(Inst) ] + end + end, + {[], NewSSA, NewEnv} = set_to(hipe_rtl:alub_dst(Inst), NewVal, Env), + {Labels, NewSSA, NewEnv}. + +%%----------------------------------------------------------------------------- +%% Procedure : visit_fixnumop/2 +%% Purpose : do symbolic exection of a fixnumop instruction. +%% fixnumop is like a specialized alu. +%% Arguments : Inst - The instruction +%% Env - The environment +%% Returns : { FlowWorkList, SSAWorkList, NewEnvironment} +%%----------------------------------------------------------------------------- + +visit_fixnumop(Inst, Env) -> + Val = lookup_lattice_value(hipe_rtl:fixnumop_src(Inst), Env), + Res = evaluate_fixnumop(Val, hipe_rtl:fixnumop_type(Inst)), + set_to(hipe_rtl:fixnumop_dst(Inst), Res, Env). + +%%----------------------------------------------------------------------------- +%% Procedure : visit_f* +%% Purpose : Do symbolic execution of floating point instructions. +%% All floating-point hitngs are mapped to bottom. In order to +%% implement them we would have to add hipe_rtl_arch:eval_f* +%% instructions since floating point is no exact science. +%% Arguments : Inst - The instruction +%% Env - The environment +%% Returns : {FlowWorkList, SSAWorkList, NewEnvironment} +%%----------------------------------------------------------------------------- + +visit_fconv(Inst, Env) -> + set_to(hipe_rtl:fconv_dst(Inst), bottom, Env). + +visit_fp(Inst, Env) -> + set_to(hipe_rtl:fp_dst(Inst), bottom, Env). + +visit_fp_unop(Inst, Env) -> + set_to(hipe_rtl:fp_unop_dst(Inst), bottom, Env). + +visit_fload(Inst, Env) -> + set_to(hipe_rtl:fload_dst(Inst), bottom, Env). + +visit_fmove(Inst, Env) -> + set_to(hipe_rtl:fmove_dst(Inst), bottom, Env). + +%%----------------------------------------------------------------------------- +%% Procedure : visit_move/2 +%% Purpose : execute a register-copy +%% Arguments : Inst - The instruction +%% Env - The environment +%% Returns : {FlowWorkList, SSAWorkList, NewEnvironment} +%%----------------------------------------------------------------------------- + +visit_move(Inst, Env) -> + Src = hipe_rtl:move_src(Inst), + Dst = hipe_rtl:move_dst(Inst), + set_to(Dst, lookup_lattice_value(Src, Env), Env). + +%%----------------------------------------------------------------------------- +%% Procedure : visit_goto/2 +%% Purpose : execute a goto +%% Arguments : Inst - The instruction +%% Env - The environment +%% Returns : {FlowWorkList, SSAWorkList, NewEnvironment} +%%----------------------------------------------------------------------------- + +visit_goto(Instruction, Environment) -> + GotoLabel = hipe_rtl:goto_label(Instruction), + {[GotoLabel], [], Environment}. + +%%----------------------------------------------------------------------------- +%% Procedure : visit_goto_index/2 +%% Purpose : execute a goto_index +%% Arguments : Inst - The instruction +%% Env - The environment +%% Returns : {FlowWorkList, SSAWorkList, NewEnvironment} +%%----------------------------------------------------------------------------- + +visit_goto_index(Inst, Env) -> + Index = hipe_rtl:goto_index_index(Inst), + case lookup_lattice_value(Index, Env) of + top -> { [], [], Env }; + bottom -> %% everything is reachable + { hipe_rtl:goto_index_labels(Inst), [], Env }; + I -> %% only the ith label will be taken. + io:format("hipe_rtl_ssa_const_prop foud goto-index with constant index ~w in ~w\n", + [I, Inst]), + { [ lists:nth(hipe_rtl:goto_index_labels(Inst), I) ], [], Env } + end. + +%%----------------------------------------------------------------------------- +%% Procedure : visit_load/2 +%% Purpose : do a visit_load. Its hard to track whats in memory, and it's +%% not in ssa form, so let's assume bottom-values ! +%% Arguments : Inst - The instruction +%% Env - The environment +%% Returns : {FlowWorkList, SSAWorkList, NewEnvironment} +%%----------------------------------------------------------------------------- + +visit_load(Inst, Env) -> + set_to(hipe_rtl:load_dst(Inst), bottom, Env). + +%%----------------------------------------------------------------------------- +%% Procedure : visit_load_address/2 +%% Purpose : execute a load_address instruction, while there might be things +%% here that are runtime-constant they are not compile-time +%% constant since code loading interferes with addresses. +%% Arguments : Inst - The instruction +%% Env - The environment +%% Returns : {FlowWorkList, SSAWorkList, NewEnvironment} +%%----------------------------------------------------------------------------- + +visit_load_address(Inst, Env) -> + Dst = hipe_rtl:load_address_dst(Inst), + Val = bottom, %% all these are probably run-time, but not + %% compile-time constants + set_to(Dst, Val, Env). + +%%----------------------------------------------------------------------------- +%% Procedure : visit_load_atom/2 +%% Purpose : Like loadadress this one gets something that is not +%% compiletime-constant +%% Arguments : Inst - The instruction +%% Env - The environment +%% Returns : {FlowWorkList, SSAWorkList, NewEnvironment} +%%----------------------------------------------------------------------------- + +visit_load_atom(Inst, Env) -> + set_to(hipe_rtl:load_atom_dst(Inst), bottom, Env). + +%%----------------------------------------------------------------------------- +%% Procedure : visit_load_word_index/2 +%% Purpose : execute a load_word_index. Here is probably room for +%% improvement, we should be able to find some constants here, +%% since we can get the labeled values from the environment, and +%% then find the value with the given index. +%% Arguments : Inst - The instruction +%% Env - The environment +%% Returns : {FlowWorkList, SSAWorkList, NewEnvironment} +%%----------------------------------------------------------------------------- + +visit_load_word_index(Inst, Env) -> + io:format(" this is load word index: ~w\n", [Inst]), + set_to(hipe_rtl:load_word_index_dst(Inst), bottom, Env). + +%%----------------------------------------------------------------------------- +%% Procedure : visit_multimove/2 & visit_multimove/4 +%% Purpose : execute a multimove instruction. +%% Arguments : Inst - The instruction +%% Env - The environment +%% Returns : {FlowWorkList, SSAWorkList, NewEnvironment} +%%----------------------------------------------------------------------------- + +visit_multimove([Dst | Dsts], [Val | Vals], MyEnv, MySSA) -> + {NewEnv, NewSSA} = update_lattice_value({Dst, Val}, MyEnv), + visit_multimove(Dsts, Vals, NewEnv, MySSA ++ NewSSA); +visit_multimove([], [], MyEnv, MySSA) -> + {MyEnv, MySSA}. + +visit_multimove(Inst, Env) -> + Srcs = [lookup_lattice_value(S, Env) || + S <- hipe_rtl:multimove_srclist(Inst)], + {NewEnv, NewSSA} = visit_multimove(hipe_rtl:multimove_dstlist(Inst), + Srcs, Env, []), + {[], NewSSA, NewEnv}. + +%%----------------------------------------------------------------------------- +%% Procedure : visit_call/2 +%% Purpose : execute a call-instruction. All calls return bottom. We make +%% this assumption since the icode-leel have taken care of BIF's +%% and we belive that we are left with the things that can not be +%% done att compile time. +%% Arguments : Inst - The instruction +%% Env - The environment +%% Returns : {FlowWorkList, SSAWorkList, NewEnvironment} +%%----------------------------------------------------------------------------- + +visit_call(Inst, Env) -> + {Env1, SSAWork} = + update_lattice_value({hipe_rtl:call_dstlist(Inst), bottom}, Env), + % remeber to add both continuation & failto things to the cfgwl + Cont = case hipe_rtl:call_continuation(Inst) of + [] -> []; + C -> [C] + end, + Succ = case hipe_rtl:call_fail(Inst) of + [] -> Cont; + Fail -> [Fail | Cont] + end, + {Succ, SSAWork, Env1}. + +%%----------------------------------------------------------------------------- +%% Procedure : visit_switch/2 +%% Purpose : execute a switch-statement. +%% Arguments : Inst - The instruction +%% Env - The environment +%% Returns : {FlowWorkList, SSAWorkList, NewEnvironment} +%%----------------------------------------------------------------------------- + +%% first two helpers that are used to handle the mapping from value to label. +%% why isn't there a function that does this ? + +find_switch_label(Inst, Val) -> + Labels = hipe_rtl:switch_labels(Inst), + ?SCCPDBG(io:format("finding switch_label, ~w in ~w\n", [Val,Inst])), + %% it seems like the index is zero based. nth uses 1-based indexing. + lists:nth(Val + 1, Labels). + +%% Switches seem tricky. the sort-order is a list of key-values to be +%% tested in order. (if elem i matches then we should jump to elem i of +%% the labels-list) +visit_switch(Inst, Env) -> + case lookup_lattice_value(hipe_rtl:switch_src(Inst), Env) of + top -> + {[], [], Env}; + bottom -> + {hipe_rtl:switch_labels(Inst), [], Env}; + Val -> + {[find_switch_label(Inst, Val) ], [], Env} + end. + +%%----------------------------------------------------------------------------- +%% Procedure : update_instruction/2 +%% Purpose : update the given instruction using any information found in +%% the environment. +%% Arguments : Inst - the instruction +%% Environment - in which everything happens. +%% Returns : list of new instructions. +%%----------------------------------------------------------------------------- + +%% idea: what to do with vi <- Constant. wouldn't it be possible to +%% remove those ? (and similarily for alu-instructions. and alub +%% instructions also ! (of course this will be done in some later step dead +%% code elimination ? but it's a simple check.) +update_instruction(Inst, Env) -> + case Inst of + #alu{} -> + update_alu(Inst, Env); + #alub{} -> + update_alub(Inst, Env); + #branch{} -> + update_branch(Inst, Env); + #call{} -> + subst_all_uses(Inst, Env); +%% #comment{} -> +%% [Inst]; + #enter{} -> + subst_all_uses(Inst, Env); + #fconv{} -> + subst_all_uses(Inst, Env); + #fload{} -> + subst_all_uses(Inst, Env); + #fmove{} -> + subst_all_uses(Inst, Env); + #fp{} -> + subst_all_uses(Inst, Env); + #fp_unop{} -> + subst_all_uses(Inst, Env); + #fstore{} -> + subst_all_uses(Inst, Env); + #gctest{} -> + subst_all_uses(Inst, Env); +%% #goto{} -> +%% [ Inst ]; + #goto_index{} -> + update_goto_index(Inst, Env); +%% #label{} -> +%% [ Inst ]; + #load{} -> + subst_all_uses(Inst, Env); + #load_address{} -> + subst_all_uses(Inst, Env); + #load_atom{} -> + subst_all_uses(Inst, Env); + #load_word_index{} -> + subst_all_uses(Inst, Env); + #move{} -> + subst_all_uses(Inst, Env); + #multimove{} -> + subst_all_uses(Inst, Env); + #return{} -> + subst_all_uses(Inst, Env); + #store{} -> + subst_all_uses(Inst, Env); + #switch{} -> + update_switch(Inst, Env); + #phi{} -> + update_phi(Inst, Env); + _ -> % for the others it's sufficient to just update any thing they use. + [ Inst ] + end. + +%%----------------------------------------------------------------------------- +%% Procedure : subst_uses/2 +%% Purpose : looks up all things that an instruction uses and replaces +%% anything that is determined to be constant. +%% Arguments : Inst - the instruction +%% Env - in which everything happen. +%% Returns : list of instructions to replace Inst with. +%%----------------------------------------------------------------------------- + +subst_all_uses(Inst, Env) -> + Uses = hipe_rtl_ssa:uses_to_rename(Inst), + [ hipe_rtl:subst_uses(update_srcs(Uses, Env), Inst) ]. + +%%----------------------------------------------------------------------------- +%% Procedure : update_srcs/2 +%% Purpose : given the things that a instruction use return a list +%% {Src, NewValue} pairs that can be sent to subs_uses. +%% Arguments : Srcs - list of uses +%% Env - in which everything happens. +%% Returns : list of {Src, NewValue} pairs. +%%----------------------------------------------------------------------------- + +update_srcs(Srcs, Env) -> + Update = + fun(Src, Os) -> + case lookup_lattice_value(Src, Env) of + bottom -> Os; + top -> % this would be realy strange. + ?EXIT({"update_src, top", Src }); + Constant -> + [ {Src, hipe_rtl:mk_imm(Constant)} | Os] + end + end, + lists:foldl(Update, [], Srcs ). + +%%----------------------------------------------------------------------------- +%% functions for performing partial evaluation of alu-operations. They can +%% return either an integer (the actual result), move_src1 or move_src2 in +%% which case the alu-operation can be replace with a move, or keep_it in +%% which case the instruction must be kept. + +%%----------------------------------------------------------------------------- +%% Procedure : partial_update_shift/3 +%% Purpose : perform a shift +%% Arguments : Limit - the number of bits in the word to shift. +%% Val1 - the shiftee +%% Val2 - number of bits to shift +%% Returns : Integer, move_src1, keep_it +%%----------------------------------------------------------------------------- + +partial_update_shift(Limit, Val1, Val2) -> + if + (Val1 =:= bottom) and (Val2 =:= 0) -> move_src1; + (Val1 =:= 0) or ((Val2 =/= bottom) and (Val2 >= Limit)) -> 0; + true -> keep_it + end. + +%%----------------------------------------------------------------------------- +%% Procedure : partial_update_alu/3 +%% Purpose : perform as much of alu-operations where exatcly one of the +%% operands is bottom. +%% Arguments : Val1, Val2 - operands +%% Op - the operation. +%% Returns : Integer, move_src1, move_src2, keep_it +%%----------------------------------------------------------------------------- + +%% we know that exactly one of the operands are bottom this one +%% returns what to do with the instruction (it's either replace with +%% src1, replace src2 replace with constant or keep it. + +partial_update_alu(Val1, 'add', Val2) -> + if + (Val1 == 0) -> move_src2; + (Val2 == 0) -> move_src1; + true -> keep_it + end; +partial_update_alu(_Val1, 'sub', Val2) -> + if + (Val2 == 0) -> move_src1; + true -> keep_it + end; +partial_update_alu(Val1, 'or', Val2) -> + All_ones = all_ones(), + if + (Val1 == 0) -> move_src2; + (Val2 == 0) -> move_src1; + (Val1 == All_ones) or (Val2 == All_ones) -> All_ones; + true -> keep_it + end; +partial_update_alu(Val1, 'and', Val2) -> + All_ones = all_ones(), + if + Val1 == All_ones -> move_src2; + Val2 == All_ones -> move_src1; + (Val1 == 0) or (Val2 == 0) -> 0; + true -> keep_it + end; +partial_update_alu(Val1, 'xor', Val2) -> + if + (Val1 == 0) -> move_src2; + (Val2 == 0) -> move_src1; + true -> keep_it + end; +partial_update_alu(Val1, 'xornot', Val2) -> + All_ones = all_ones(), + if + (Val1 == All_ones) -> move_src2; + (Val2 == All_ones) -> move_src1; + true -> keep_it + end; +partial_update_alu(Val1, andnot, Val2) -> + All_ones = all_ones(), + if + Val2 == 0 -> move_src1; + (Val1 == 0) or (Val2 == All_ones) -> 0; + true -> keep_it + end; +partial_update_alu(Val1, Op, Val2) when (Op =:= 'sll') or (Op =:= 'srl') -> + BitSize = ?bytes_to_bits(hipe_rtl_arch:word_size()), + partial_update_shift(BitSize, Val1, Val2); +partial_update_alu(Val1, Op, Val2) when (Op =:= 'sllx') or (Op =:= 'srlx') -> + partial_update_shift(64, Val1, Val2); +partial_update_alu(Val1, Op, Val2) when (Op =:= 'sra') or (Op =:= 'srax') -> + if + Val2 == 0 -> move_src1; + Val1 == 0 -> 0; + true -> keep_it + end. + +%%----------------------------------------------------------------------------- +%% Procedure : update_alu/2 +%% Purpose : update an alu-instruction. +%% Arguments : Inst - the instruction. +%% Env - in which everything happens. +%% Returns : list of new instruction +%%----------------------------------------------------------------------------- + +update_alu(Inst, Env) -> + Val1 = lookup_lattice_value(hipe_rtl:alu_src1(Inst), Env), + Val2 = lookup_lattice_value(hipe_rtl:alu_src2(Inst), Env), + if + (Val1 =:= bottom) and (Val2 =:= bottom) -> + [Inst]; + (Val1 =:= bottom) or (Val2 =:= bottom) -> + NewInst = + case partial_update_alu(Val1, hipe_rtl:alu_op(Inst), Val2) of + move_src1 -> + hipe_rtl:mk_move(hipe_rtl:alu_dst(Inst), hipe_rtl:alu_src1(Inst)); + move_src2 -> + hipe_rtl:mk_move(hipe_rtl:alu_dst(Inst), hipe_rtl:alu_src2(Inst)); + keep_it -> + S1 = make_alub_subst_list(Val1, hipe_rtl:alu_src1(Inst), []), + S2 = make_alub_subst_list(Val2, hipe_rtl:alu_src2(Inst), S1), + hipe_rtl:subst_uses(S2, Inst); + Constant -> + hipe_rtl:mk_move(hipe_rtl:alu_dst(Inst), hipe_rtl:mk_imm(Constant)) + end, + [NewInst]; + true -> + {Val,_,_,_,_} = evaluate_alu(Val1, hipe_rtl:alu_op(Inst), Val2), + [hipe_rtl:mk_move(hipe_rtl:alu_dst(Inst), hipe_rtl:mk_imm(Val))] + end. + +%%----------------------------------------------------------------------------- +%% Procedure : update_branch/2 +%% Purpose : update an branch-instruction +%% Arguments : Inst - the instruction. +%% Env - in which everything happens. +%% Returns : list of new instruction +%%----------------------------------------------------------------------------- + +update_branch(Inst, Env) -> + Src1 = hipe_rtl:branch_src1(Inst), + Src2 = hipe_rtl:branch_src2(Inst), + Val1 = lookup_lattice_value(Src1, Env), + Val2 = lookup_lattice_value(Src2, Env), + if + (Val1 =:= bottom) and (Val2 =:= bottom) -> + [Inst]; + Val1 =:= bottom -> + [hipe_rtl:subst_uses([{Src2, hipe_rtl:mk_imm(Val2)}], Inst)]; + Val2 =:= bottom -> + [hipe_rtl:subst_uses([{Src1, hipe_rtl:mk_imm(Val1)}], Inst)]; + true -> + case hipe_rtl_arch:eval_cond(hipe_rtl:branch_cond(Inst), Val1, Val2) of + true -> [hipe_rtl:mk_goto(hipe_rtl:branch_true_label(Inst))]; + false -> [hipe_rtl:mk_goto(hipe_rtl:branch_false_label(Inst))] + end + end. + +%%----------------------------------------------------------------------------- +%% Procedure : update_alub/2 +%% Purpose : update an alub-instruction. Here are some finer points, we might +%% be able to do the math (think b = a+0), but it's hard to replace +%% the branch, since the mapping b/w AluOp,RelOp to BranchInstr is +%% boring to do. (lazyness is a bliss). +%% Arguments : Inst - the instruction. +%% Env - in which everything happens. +%% Returns : list of new instructions +%%----------------------------------------------------------------------------- + +%% some small helpers. +alub_to_move(Inst, Res, Lab) -> + [ hipe_rtl:mk_move(hipe_rtl:alub_dst(Inst), Res), + hipe_rtl:mk_goto(Lab) ]. + +make_alub_subst_list(bottom, _, Tail) -> Tail; +make_alub_subst_list(top, Src, _) -> + ?EXIT({"~w is top during update",Src }); +make_alub_subst_list(Val, Src, Tail) -> + case hipe_rtl:is_imm(Src) of + true -> Tail; + false -> [{Src, hipe_rtl:mk_imm(Val)} | Tail] + end. + +update_alub(Inst, Env) -> + Src1 = hipe_rtl:alub_src1(Inst), + Src2 = hipe_rtl:alub_src2(Inst), + Val1 = lookup_lattice_value(Src1, Env), + Val2 = lookup_lattice_value(Src2, Env), + {ResVal, N, Z, C, V} = evaluate_alu(Val1, hipe_rtl:alub_op(Inst), Val2), + CondRes = partial_eval_branch(hipe_rtl:alub_cond(Inst), N, Z, C, V), + case CondRes of + bottom -> + %% if we can't evaluate the branch, we have to keep it as a alub isnt + %% since other optimizations might insert other instructions b/w the + %% move and the branch. We can however replace variable with constants: + S1 = make_alub_subst_list(Val1, Src1, []), + S2 = make_alub_subst_list(Val2, Src2, S1), + [ hipe_rtl:subst_uses(S2, Inst) ]; + _ -> % we know where we will be going, let's find out what Dst should be. + % knowing where we are going means that at most one of the values is + % bottom, hence we can replace the alu-instr with a move. + % remember, a = b + 0 can give us enough info to know what jump to + % do without knowing the value of a. (I wonder if this will ever + % actualy happen ;) + Res = case ResVal of + bottom -> % something nonconstant. + if (Val1 =:= bottom) -> Src1; + (Val2 =:= bottom) -> Src2 + end; + _ -> hipe_rtl:mk_imm(ResVal) + end, + case CondRes of + top -> io:format("oops. something VERY bad: ~w ~w V1 & 2 ~w ~w\n", + [Inst, {ResVal, N, Z, C, V} , Val1, Val2]), + [Inst ]; + true -> alub_to_move(Inst, Res, hipe_rtl:alub_true_label(Inst)); + false -> alub_to_move(Inst, Res, hipe_rtl:alub_false_label(Inst)) + end + end. + +%%----------------------------------------------------------------------------- +%% Procedure : update_goto_index/2 +%% Purpose : update a goto_index instruction. +%% Arguments : Inst - the instruction. +%% Env - in which everything happens. +%% Returns : list of new instructions. +%%----------------------------------------------------------------------------- + +update_goto_index(Inst, Env) -> + Index = hipe_rtl:goto_index_index(Inst), + case lookup_lattice_value(Index, Env) of + bottom -> %% everything is reachable + [Inst]; + I -> %% only the ith label will be taken. + [hipe_rtl:mk_goto(lists:nth(hipe_rtl:goto_index_labels(Inst), I))] + end. + +%%----------------------------------------------------------------------------- +%% Procedure : update_switch/2 +%% Purpose : update a switch instruction. +%% Arguments : Inst - the instruction. +%% Env - in which everything happens. +%% Returns : list of new instructions. +%%----------------------------------------------------------------------------- + +update_switch(Inst, Env) -> + case lookup_lattice_value(hipe_rtl:switch_src(Inst), Env) of + bottom -> + [Inst]; + Const -> + Lab = find_switch_label(Inst, Const), + [hipe_rtl:mk_goto(Lab)] + end. + +%%----------------------------------------------------------------------------- +%% Procedure : update_phi/3 +%% Purpose : Update a phi-function w.r.t. constants. do nothing for now. +%% Arguments : Instruction - The instruction +%% Environment - The environment +%% Returns : [NewInstruction] +%%----------------------------------------------------------------------------- + +update_phi(Instruction, Environment) -> + Destination = hipe_rtl:phi_dst(Instruction), + case lookup_lattice_value(Destination, Environment) of + bottom -> + [Instruction]; + top -> + ?WARNING_MSG("The dst of ~w is top after SCCP. Strange\n",[Instruction]), + ?EXIT({"bang !", Instruction}), + [Instruction]; + Value -> + [hipe_rtl:mk_move(Destination, hipe_rtl:mk_imm(Value))] + end. + +%%----------------------------------------------------------------------------- + +%% make sure that all precoloured rgisters are taken out of the equation. +lookup_lattice_value(X, Environment) -> + case hipe_rtl_arch:is_precoloured(X) or hipe_rtl:is_const_label(X) of + true -> + bottom; + false -> + lookup_lattice_value2(X, Environment) + end. + +lookup_lattice_value2(X, Environment) -> + LatticeValues = env__lattice_values(Environment), + case hipe_rtl:is_imm(X) of + true -> + hipe_rtl:imm_value(X); + false -> + case gb_trees:lookup(X, LatticeValues) of + none -> + io:format("~w~n",[LatticeValues]), + ?WARNING_MSG("Earlier compiler steps generated erroneous " + "code for X = ~w. We are ignoring this.\n",[X]), + bottom; + {value, top} -> + ?EXIT({"lookup_lattice_value, top", X}), + top; + {value, Y} -> + Y + end + end. + +%%----------------------------- End of file ----------------------------------- diff --git a/lib/hipe/rtl/hipe_rtl_ssapre.erl b/lib/hipe/rtl/hipe_rtl_ssapre.erl new file mode 100644 index 0000000000..a9e92e5688 --- /dev/null +++ b/lib/hipe/rtl/hipe_rtl_ssapre.erl @@ -0,0 +1,1679 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2005-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% File : hipe_rtl_ssapre.erl +%% Author : He Bingwen and Frédéric Haziza +%% Description : Performs Partial Redundancy Elimination on SSA form. +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% @doc +%% +%% This module implements the Anticipation-SSAPRE algorithm, +%% with several modifications for Partial Redundancy Elimination on SSA form. +%% We actually found problems in this algorithm, so +%% we implement another version with several advantages: +%% - No loop for Xsi insertions +%% - No fix point iteration for the downsafety part +%% - Less computations for Will Be Available part +%% - Complexity of the overall algorithm is improved +%% +%% We were supposed to publish these results anyway :D +%% +%% @end +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +-module(hipe_rtl_ssapre). + +-export([rtl_ssapre/2]). + +-include("../main/hipe.hrl"). +-include("hipe_rtl.hrl"). + +%%-define(SSAPRE_DEBUG, true ). %% When uncommented, produces debug printouts +-define( SETS, ordsets ). %% Which set implementation module to use +-define( CFG, hipe_rtl_cfg ). +-define( RTL, hipe_rtl ). +-define( BB, hipe_bb ). +-define( ARCH, hipe_rtl_arch ). +-define( GRAPH, digraph ). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Debugging stuff +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +-ifndef(SSAPRE_DEBUG). +-define(pp_debug(_Str, _Args), ok). +-else. +-define(pp_debug(Str, Args), io:format(standard_io, Str, Args)). +-endif. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Records / Structures +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +-record(xsi_link, {num}). %% Number is the index of the temporary (a Key into the Xsi Tree) +-record(temp, {key, var}). +-record(bottom, {key, var}). +-record(xsi, {inst, %% Associated instruction + def, %% Hypothetical temporary variable + %% that stores the result of the computation + label, %% Block Label where the xsi is inserted + opList, %% List of operands + cba, %% + later, %% + wba + }). + +-record(pre_candidate, {alu, def}). +-record(xsi_op, {pred, op}). + +-record(mp, {xsis, maps, preds, defs, uses, ndsSet}). +-record(block, {type, attributes}). + +-record(eop, {expr, var, stopped_by}). +-record(insertion, {code, from}). + +-record(const_expr, {var, value}). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Main function +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +rtl_ssapre(RtlSSACfg, Options) -> + %% io:format("\n################ Original CFG ################\n"), + %% hipe_rtl_cfg:pp(RtlSSACfg), + %% io:format("\n\n############ SSA-Form CHECK ==> ~w\n",[hipe_rtl_ssa:check(RtlSSACfg)]), + + {CFG2,XsiGraph,CFGGraph,MPs} = perform_Xsi_insertion(RtlSSACfg,Options), + %%?pp_debug("~n~n################ Xsi CFG ################\n",[]),pp_cfg(CFG2,XsiGraph), + XsiList = ?GRAPH:vertices(XsiGraph), + case XsiList of + [] -> + %% No Xsi + ?option_time(?pp_debug("~n~n################ No Xsi Inserted ################~n",[]),"RTL A-SSAPRE No Xsi inserted (skip Downsafety and Will Be Available)",Options), + ok; + _ -> + ?pp_debug("~n############ Downsafety ##########~n",[]), + ?option_time(perform_downsafety(MPs,CFGGraph,XsiGraph),"RTL A-SSAPRE Downsafety",Options), + ?pp_debug("~n~n################ CFG Graph ################~n",[]),pp_cfggraph(CFGGraph), + ?pp_debug("~n############ Will Be Available ##########~n",[]), + ?option_time(perform_will_be_available(XsiGraph,CFGGraph,Options),"RTL A-SSAPRE WillBeAvailable",Options) + end, + + ?pp_debug("~n############ No more need for the CFG Graph....Deleting...",[]),?GRAPH:delete(CFGGraph), + ?pp_debug("~n~n################ Xsi Graph ################~n",[]),pp_xsigraph(XsiGraph), + + ?pp_debug("~n############ Code Motion ##########~n",[]), + Labels = ?CFG:preorder(CFG2), + + ?pp_debug("~n~n################ Xsi CFG ################~n",[]),pp_cfg(CFG2,XsiGraph), + + init_redundancy_count(), + ?option_time(FinalCFG=perform_code_motion(Labels,CFG2,XsiGraph),"RTL A-SSAPRE Code Motion",Options), + + ?pp_debug("\n############ No more need for the Xsi Graph....Deleting...",[]),?GRAPH:delete(XsiGraph), + + %% io:format("\n################ Final CFG ################\n"), + %% hipe_rtl_cfg:pp(FinalCFG), + %% io:format("\n\n############ SSA-Form CHECK ==> ~w\n", + %% [hipe_rtl_ssa:check(FinalCFG)]), + ?pp_debug("\nSSAPRE : ~w redundancies were found\n",[get_redundancy_count()]), + + FinalCFG. + +%% ########################################################################## +%% ######################## XSI INSERTION ################################### +%% ########################################################################## + +perform_Xsi_insertion(Cfg, Options) -> + init_counters(), %% Init counters for Bottoms and Temps + DigraphOpts = [cyclic, private], + XsiGraph = digraph:new(DigraphOpts), + %% Be carefull, the digraph component is NOT garbage collected, + %% so don't create 20 millions of instances! + %% finds the longest depth + %% Depth-first, preorder traversal over Basic Blocks. + %%Labels = ?CFG:reverse_postorder(Cfg), + Labels = ?CFG:preorder(Cfg), + + ?pp_debug("~n~n############# Finding definitions for computation~n~n",[]), + ?option_time({Cfg2,XsiGraph} = find_definition_for_computations(Labels,Cfg,XsiGraph),"RTL A-SSAPRE Xsi Insertion, searching from instructions",Options), + + %% Active List creation + GeneratorXsiList = lists:sort(?GRAPH:vertices(XsiGraph)), + ?pp_debug("~n~n############# Inserted Xsis ~w",[GeneratorXsiList]), + ?pp_debug("~n~n############# Finding operands~n",[]), + ?option_time({Cfg3,XsiGraph} = find_operands(Cfg2,XsiGraph,GeneratorXsiList,0),"RTL A-SSAPRE Xsi Insertion, finding operands",Options), + + %% Creating the CFGGraph + ?pp_debug("~n~n############# Creating CFG Graph",[]), + ?pp_debug("~n############# Labels = ~w",[Labels]), + CFGGraph = digraph:new(DigraphOpts), + [StartLabel|Others] = Labels, % adding the start label as a leaf + ?pp_debug("~nAdding a vertex for the start label: ~w",[StartLabel]), + ?GRAPH:add_vertex(CFGGraph, StartLabel, #block{type = top}), + % Doing the others + ?option_time(MPs=create_cfggraph(Others,Cfg3,CFGGraph,[],[],[],XsiGraph),"RTL A-SSAPRE Xsi Insertion, creating intermediate 'SSAPRE Graph'",Options), + + %% Return the bloody collected information + {Cfg3,XsiGraph,CFGGraph,MPs}. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +find_definition_for_computations([], Cfg, XsiGraph) -> + {Cfg,XsiGraph}; %% No more block to inspect in the depth-first order +find_definition_for_computations([Label|Rest], Cfg, XsiGraph) -> + Code = ?BB:code(?CFG:bb(Cfg,Label)), + {NewCfg,XsiGraph} = find_definition_for_computations_in_block(Label,Code,Cfg,[],XsiGraph), + find_definition_for_computations(Rest, NewCfg, XsiGraph). + +%%=========================================================================== +%% Searches from instruction for one block BlockLabel. +%% We process forward over instructions. + +find_definition_for_computations_in_block(BlockLabel,[],Cfg, + VisitedInstructions,XsiGraph)-> + Code = lists:reverse(VisitedInstructions), + NewBB = ?BB:mk_bb(Code), + NewCfg = ?CFG:bb_add(Cfg,BlockLabel,NewBB), + {NewCfg,XsiGraph}; %% No more instructions to inspect in this block +find_definition_for_computations_in_block(BlockLabel,[Inst|Rest],Cfg, + VisitedInstructions,XsiGraph) -> + %% ?pp_debug(" Inspecting instruction: ",[]),pp_instr(Inst,nil), + case Inst of + #alu{} -> + %% Is Inst interesting for SSAPRE? + %% i.e., is Inst an arithmetic operation which doesn't deal with precoloured? + %% Note that since we parse forward, we have no 'pre_candidate'-type so far. + case check_definition(Inst,VisitedInstructions,BlockLabel,Cfg,XsiGraph) of + {def_found,Def} -> + %% Replacing Inst in Cfg + NewInst = #pre_candidate{alu=Inst,def=Def}, + NewVisited = [NewInst|VisitedInstructions], + %% Recurse forward over instructions, same CFG, same XsiGraph + find_definition_for_computations_in_block(BlockLabel,Rest,Cfg, + NewVisited,XsiGraph); + {merge_point,Xsi} -> + Def = Xsi#xsi.def, + Key = Def#temp.key, + NewInst = #pre_candidate{alu=Inst,def=Def}, + XsiLink = #xsi_link{num=Key}, + + %% Add a vertex to the Xsi Graph + ?GRAPH:add_vertex(XsiGraph,Key,Xsi), + ?pp_debug(" Inserting Xsi: ",[]),pp_xsi(Xsi), + + Label = Xsi#xsi.label, + case BlockLabel =:= Label of + false -> + %% Insert the Xsi in the appropriate block + Code = hipe_bb:code(?CFG:bb(Cfg,Label)), + {BeforeCode,AfterCode} = split_for_xsi(lists:reverse(Code),[]), + NewCode = BeforeCode++[XsiLink|AfterCode], + NewBB = hipe_bb:mk_bb(NewCode), + NewCfg = ?CFG:bb_add(Cfg,Label,NewBB), + NewVisited = [NewInst|VisitedInstructions]; + _-> + {BeforeCode,AfterCode} = split_for_xsi(VisitedInstructions,[]), + TempVisited = BeforeCode++[XsiLink|AfterCode], + TempVisited2 = lists:reverse(TempVisited), + NewVisited = [NewInst|TempVisited2], + NewCfg = Cfg + end, + find_definition_for_computations_in_block(BlockLabel, Rest, NewCfg, + NewVisited, XsiGraph) + end; + _ -> + %%?pp_debug("~n [L~w] Not concerned with: ~w",[BlockLabel,Inst]), + %% If the instruction is not a SSAPRE candidate, we skip it and keep on + %% processing instructions + %% Prepend Inst, so that we have all in reverse order. + %% Easy to parse backwards + find_definition_for_computations_in_block(BlockLabel, Rest, Cfg, + [Inst|VisitedInstructions], XsiGraph) + end. + +%% ############################################################################ +%% We have E as an expression, I has an alu (arithmetic operation), and +%% we inspect backwards the previous instructions to find a definition for E. +%% Since we parse in forward order, we know that the previous SSAPRE +%% instruction will have a definition. + +check_definition(E,[],BlockLabel,Cfg,XsiGraph)-> + %% No more instructions in that block + %% No definition found in that block + %% Search is previous blocks + Preds = ?CFG:pred(Cfg, BlockLabel), + %% ?pp_debug("~n CHECKING DEFINITION ####### Is L~w a merge block? It has ~w preds. So far E=",[BlockLabel,length(Preds)]),pp_expr(E), + case Preds of + [] -> + %% Entry Point + {def_found,bottom}; + [P] -> + %% One predecessor only, we just keep looking for a definition in that block + VisitedInstructions = lists:reverse(hipe_bb:code(?CFG:bb(Cfg,P))), + check_definition(E,VisitedInstructions,P,Cfg,XsiGraph); + _ -> + Temp = new_temp(), + %% It's a merge point + OpList = [#xsi_op{pred=X} || X<-Preds], + Xsi = #xsi{inst=E,def=Temp,label=BlockLabel,opList=OpList}, + {merge_point,Xsi} + end; +check_definition(E,[CC|Rest],BlockLabel,Cfg,XsiGraph) -> + SRC1 = ?RTL:alu_src1(E), + SRC2 = ?RTL:alu_src2(E), + case CC of + #alu{} -> + exit({?MODULE,should_not_be_an_alu, + {"Why the hell do we still have an alu???",CC}}); + #pre_candidate{} -> + %% C is the previous instruction + C = CC#pre_candidate.alu, + DST = ?RTL:alu_dst(C), + case DST =:= SRC1 orelse DST =:= SRC2 of + false -> + case check_match(E,C) of + true -> %% It's a computation of E! + %% Get the dst of the alu + {def_found,DST}; + _-> + check_definition(E,Rest,BlockLabel,Cfg,XsiGraph) + end; + true -> + %% Get the definition of C, since C is PRE-candidate AND has been processed before + DEF = CC#pre_candidate.def, + case DEF of + bottom -> + %% Def(E)=bottom, STOP + {def_found,bottom}; + _ -> + %% Emend E with this def(C) + %%?pp_debug("Parameters are E=~w, DST=~w, DEF=~w",[E,DST,DEF]), + F = emend(E,DST,DEF), + check_definition(F,Rest,BlockLabel,Cfg,XsiGraph) %% Continue the search + end + end; + #move{} -> + %% It's a move, we emend E, and continue the definition search + DST = ?RTL:move_dst(CC), + F = case SRC1 =:= DST orelse SRC2 =:= DST of + true -> + SRC = ?RTL:move_src(CC), + emend(E,DST,SRC); + _ -> + E + end, + check_definition(F,Rest,BlockLabel,Cfg,XsiGraph); %% Continue the search + #xsi_link{} -> + {_K,Xsi} = ?GRAPH:vertex(XsiGraph,CC#xsi_link.num), + C = Xsi#xsi.inst, + case check_match(C,E) of + true -> %% There is a Xsi already with a computation of E! + %% fetch definition of C, and give it to E + {def_found,Xsi#xsi.def}; + _-> + check_definition(E,Rest,BlockLabel,Cfg,XsiGraph) + end; + #phi{} -> + %% skip them. NOTE: Important to separate this case from the next one + check_definition(E,Rest,BlockLabel,Cfg,XsiGraph); + _ -> + %% Note: the function calls or some other instructions can change the pre-coloured registers + %% which are able to be redefined. This breaks of course the SSA form. + %% If there is a redefinition we can give bottom to the computation, and no xsi will be inserted. + %% (In some sens, the result of the computation is new at that point.) + PreColouredTest = ?ARCH:is_precoloured(SRC1) orelse ?ARCH:is_precoloured(SRC2), + + %%RegisterTest = ?RTL:is_reg(?RTL:alu_dst(E)) orelse ?RTL:is_reg(SRC1) orelse ?RTL:is_reg(SRC2), + RegisterTest = ?RTL:is_reg(?RTL:alu_dst(E)), %% That means we cannot reuse the result held in this register... + + case PreColouredTest orelse RegisterTest of + true -> + {def_found,bottom}; + false -> + DC = ?RTL:defines(CC), + case lists:member(SRC1,DC) orelse lists:member(SRC2,DC) of + true -> + {def_found,bottom}; + false -> + %% Orthogonal to E, we continue the search + check_definition(E,Rest,BlockLabel,Cfg,XsiGraph) + end + end + end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +check_match(E, C) -> + OpE = ?RTL:alu_op(E), + OpC = ?RTL:alu_op(C), + case OpE =:= OpC of + false -> + false; + true -> + Src1E = ?RTL:alu_src1(E), + Src2E = ?RTL:alu_src2(E), + Src1C = ?RTL:alu_src1(C), + Src2C = ?RTL:alu_src2(C), + case Src1E =:= Src1C of + true -> + Src2E =:= Src2C; + false -> + Src1E =:= Src2C andalso Src2E =:= Src1C + end + end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +expr_is_const(E) -> + ?RTL:is_imm(?RTL:alu_src1(E)) andalso ?RTL:is_imm(?RTL:alu_src2(E)). +%% is_number(?RTL:alu_src1(E)) andalso is_number(?RTL:alu_src2(E)). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Must be an arithmetic operation, i.e. #alu{} +emend(Expr, S, Var) -> + SRC1 = ?RTL:alu_src1(Expr), + NewExpr = case SRC1 =:= S of + true -> ?RTL:alu_src1_update(Expr,Var); + false -> Expr + end, + SRC2 = ?RTL:alu_src2(NewExpr), + case SRC2 =:= S of + true -> ?RTL:alu_src2_update(NewExpr,Var); + false -> NewExpr + end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +split_for_xsi([], Acc) -> + {[], Acc}; % no_xsi_no_phi_found; +split_for_xsi([I|Is] = Code, Acc) -> %% [I|Is] in backward order, Acc in order + case I of + #xsi_link{} -> + {lists:reverse(Code), Acc}; + #phi{} -> + {lists:reverse(Code), Acc}; + _ -> + split_for_xsi(Is, [I|Acc]) + end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Phase 1.B : Search for operands +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +find_operands(Cfg,XsiGraph,[],_Count) -> + {Cfg,XsiGraph}; +find_operands(Cfg,XsiGraph,ActiveList,Count) -> + {NewCfg,TempActiveList} = find_operands_for_active_list(Cfg,XsiGraph,ActiveList,[]), + NewActiveList = lists:reverse(TempActiveList), + ?pp_debug("~n################ Finding operands (iteration ~w): ~w have been introduced. Now ~w in total~n", + [Count+1, length(NewActiveList), length(?GRAPH:vertices(XsiGraph))]), + find_operands(NewCfg,XsiGraph,NewActiveList,Count+1). + +find_operands_for_active_list(Cfg,_XsiGraph,[],ActiveListAcc) -> + {Cfg,ActiveListAcc}; +find_operands_for_active_list(Cfg,XsiGraph,[K|Ks],ActiveListAcc) -> + {_Key,Xsi} = ?GRAPH:vertex(XsiGraph,K), + ?pp_debug("~n Inspecting operands of : ~n",[]),pp_xsi(Xsi), + Preds = ?CFG:pred(Cfg, Xsi#xsi.label), + {NewCfg,NewActiveListAcc}=determine_operands(Xsi,Preds,Cfg,K,XsiGraph,ActiveListAcc), + {_Key2,Xsi2} = ?GRAPH:vertex(XsiGraph,K), + ?pp_debug("~n ** Final Xsi: ~n",[]),pp_xsi(Xsi2), + ?pp_debug("~n #####################################################~n",[]), + find_operands_for_active_list(NewCfg,XsiGraph,Ks,NewActiveListAcc). + +determine_operands(_Xsi,[],Cfg,_K,_XsiGraph,ActiveAcc) -> + %% All operands have been determined. + %% The CFG is not updated, only the XsiGraph + {Cfg,ActiveAcc}; +determine_operands(Xsi,[P|Ps],Cfg,K,XsiGraph,ActiveAcc) -> + Label = Xsi#xsi.label, + ReverseCode = lists:reverse(hipe_bb:code(?CFG:bb(Cfg,Label))), + VisitedInstructions = get_visited_instructions(Xsi,ReverseCode), + Res = determine_e_prime(Xsi#xsi.inst,VisitedInstructions,P,XsiGraph), + case Res of + operand_is_bottom -> + NewXsi = xsi_arg_update(Xsi,P,new_bottom()), + ?GRAPH:add_vertex(XsiGraph,K,NewXsi), + determine_operands(NewXsi,Ps,Cfg,K,XsiGraph,ActiveAcc); + operand_is_const_expr -> + NewXsi = xsi_arg_update(Xsi,P,new_bottom()), + ?GRAPH:add_vertex(XsiGraph,K,NewXsi), + determine_operands(NewXsi,Ps,Cfg,K,XsiGraph,ActiveAcc); + {sharing_operand,Op} -> + NewXsi = xsi_arg_update(Xsi,P,Op), + ?GRAPH:add_vertex(XsiGraph,K,NewXsi), + determine_operands(NewXsi,Ps,Cfg,K,XsiGraph,ActiveAcc); + {revised_expression,E_prime} -> + ?pp_debug(" E' is determined : ",[]),pp_expr(E_prime), + ?pp_debug(" and going along the edge L~w~n",[P]), + %% Go along the edge P + RevCode = lists:reverse(hipe_bb:code(?CFG:bb(Cfg,P))), + case check_one_operand(E_prime,RevCode,P,Cfg,K,XsiGraph) of + {def_found,Def} -> + NewXsi = xsi_arg_update(Xsi,P,Def), + ?GRAPH:add_vertex(XsiGraph,K,NewXsi), + determine_operands(NewXsi,Ps,Cfg,K,XsiGraph,ActiveAcc); + + {expr_found,ChildExpr} -> + NewXsi = xsi_arg_update(Xsi,P,ChildExpr), + ?GRAPH:add_vertex(XsiGraph,K,NewXsi), + determine_operands(NewXsi,Ps,Cfg,K,XsiGraph,ActiveAcc); + + {expr_is_const, Op} -> + %% We detected that the expression is of the form: 'N op M' + %% where N and M are constant. + NewXsi = xsi_arg_update(Xsi,P,Op), + ?GRAPH:add_vertex(XsiGraph,K,NewXsi), + determine_operands(NewXsi,Ps,Cfg,K,XsiGraph,ActiveAcc); + + {merge_point,XsiChild} -> + %% Update that Xsi, give its definition as Operand for the + %% search, and go on + XsiChildDef = XsiChild#xsi.def, + NewXsi = xsi_arg_update(Xsi,P,XsiChildDef), + ?GRAPH:add_vertex(XsiGraph,K,NewXsi), + + KeyChild = XsiChildDef#temp.key, + XsiChildLink = #xsi_link{num=KeyChild}, + ?GRAPH:add_vertex(XsiGraph,KeyChild,XsiChild), + + %% Should not be the same block !!!!!!! + RCode = lists:reverse(hipe_bb:code(?CFG:bb(Cfg,XsiChild#xsi.label))), + {BCode,ACode} = split_code_for_xsi(RCode,[]), + + NewCode = BCode++[XsiChildLink|ACode], + NewBB = hipe_bb:mk_bb(NewCode), + NewCfg = ?CFG:bb_add(Cfg, XsiChild#xsi.label, NewBB), + + ?pp_debug(" -- ",[]),pp_arg(Xsi#xsi.def),?pp_debug(" causes insertion of: ~n",[]),pp_xsi(XsiChild), + ?pp_debug(" -- Adding an edge ",[]),pp_arg(Xsi#xsi.def),?pp_debug(" -> ",[]),pp_arg(XsiChild#xsi.def), + + %% Adding an edge... + %%?GRAPH:add_edge(XsiGraph,K,KeyChild,"family"), + ?GRAPH:add_edge(XsiGraph,K,KeyChild), + determine_operands(NewXsi,Ps,NewCfg,K,XsiGraph,[KeyChild|ActiveAcc]) + end + end. + +determine_e_prime(Expr,VisitedInstructions,Pred,XsiGraph) -> + %% MUST FETCH FROM THE XSI TREE, since Xsis are not updated yet in the CFG + NewExpr = emend_with_phis(Expr,VisitedInstructions,Pred), + emend_with_processed_xsis(NewExpr,VisitedInstructions,Pred,XsiGraph). + +emend_with_phis(EmendedE, [], _) -> + EmendedE; +emend_with_phis(E, [I|Rest], Pred) -> + case I of + #phi{} -> + Dst = ?RTL:phi_dst(I), + UE = ?RTL:uses(E), %% Should we get SRC1 and SRC2 instead? + case lists:member(Dst, UE) of + false -> + emend_with_phis(E, Rest, Pred); + true -> + NewE = emend(E, Dst, ?RTL:phi_arg(I,Pred)), + emend_with_phis(NewE, Rest, Pred) + end; + _ -> + emend_with_phis(E, Rest, Pred) + end. + +emend_with_processed_xsis(EmendedE, [], _, _) -> + {revised_expression,EmendedE}; +emend_with_processed_xsis(E, [I|Rest], Pred, XsiGraph) -> + case I of + #xsi_link{} -> + Key = I#xsi_link.num, + {_KK,Xsi} = ?GRAPH:vertex(XsiGraph,Key), + Def = Xsi#xsi.def, + UE = ?RTL:uses(E), %% Should we get SRC1 and SRC2 instead? + case lists:member(Def,UE) of + false -> + CE = Xsi#xsi.inst, + case check_match(E,CE) of + true -> %% It's a computation of E! + case xsi_arg(Xsi,Pred) of + undetermined_operand -> + exit({?MODULE,check_operand_sharing,"######## Ôh Dear, we trusted Kostis !!!!!!!!! #############"}); + XsiOp -> + {sharing_operand,XsiOp} %% They share operands + end; + _-> + emend_with_processed_xsis(E,Rest,Pred,XsiGraph) + end; + true -> + A = xsi_arg(Xsi,Pred), + %% ?pp_debug(" ######### xsi_arg(I:~w,Pred:~w) = ~w~n",[I,Pred,A]), + case A of + #bottom{} -> + operand_is_bottom; + #const_expr{} -> + operand_is_const_expr; + #eop{} -> + NewE = emend(E,Def,A#eop.var), + emend_with_processed_xsis(NewE,Rest,Pred,XsiGraph); + undetermined_operand -> + exit({?MODULE,emend_with_processed_xsis,"######## Ôh Dear, we trusted Kostis, again !!!!!!!!! #############"}); + XsiOp -> + NewE = emend(E,Def,XsiOp), + emend_with_processed_xsis(NewE,Rest,Pred,XsiGraph) + end + end; + _ -> + emend_with_processed_xsis(E,Rest,Pred,XsiGraph) + end. + +%% get_visited_instructions(Xsi,[]) -> +%% ?pp_debug("~nWe don't find this xsi with def ",[]),pp_arg(Xsi#xsi.def),?pp_debug(" in L~w : ",[Xsi#xsi.label]), +%% exit({?MODULE,no_such_xsi_in_block,"We didn't find that Xsi in the block"}); +get_visited_instructions(Xsi, [I|Is]) -> + case I of + #xsi_link{} -> + XsiDef = Xsi#xsi.def, + Key = XsiDef#temp.key, + case I#xsi_link.num =:= Key of + true -> + Is; + false -> + get_visited_instructions(Xsi, Is) + end; + _ -> + get_visited_instructions(Xsi, Is) + end. + +split_code_for_xsi([], Acc) -> + {[],Acc}; +split_code_for_xsi([I|Is] = Code, Acc) -> + case I of + #xsi_link{} -> + {lists:reverse(Code), Acc}; + #phi{} -> + {lists:reverse(Code), Acc}; + _ -> + split_code_for_xsi(Is, [I|Acc]) + end. + +check_one_operand(E, [], BlockLabel, Cfg, XsiKey, XsiGraph) -> + %% No more instructions in that block + %% No definition found in that block + %% Search is previous blocks + Preds = ?CFG:pred(Cfg, BlockLabel), + case Preds of + [] -> + %% Entry Point + {def_found,new_bottom()}; + [P] -> + %% One predecessor only, we just keep looking for a definition in that block + case expr_is_const(E) of + true -> + ?pp_debug("\n\n############## Wow expr is constant: ~w",[E]), + Var = ?RTL:mk_new_var(), + Value = eval_expr(E), + Op = #const_expr{var = Var, value = Value}, + {expr_is_const, Op}; + false -> + VisitedInstructions = lists:reverse(?BB:code(?CFG:bb(Cfg,P))), + check_one_operand(E, VisitedInstructions, P, Cfg, XsiKey, XsiGraph) + end; + _ -> + %% It's a merge point + case expr_is_const(E) of + true -> + ?pp_debug("\n\n############## Wow expr is constant at merge point: ~w",[E]), + Var = ?RTL:mk_new_var(), + Value = eval_expr(E), + Op = #const_expr{var = Var, value = Value}, + {expr_is_const, Op}; + false -> + Temp = new_temp(), + OpList = [#xsi_op{pred = X} || X <- Preds], + Xsi = #xsi{inst = E, def = Temp, label = BlockLabel, opList = OpList}, + {merge_point, Xsi} + end + end; +check_one_operand(E, [CC|Rest], BlockLabel, Cfg, XsiKey, XsiGraph) -> + SRC1 = ?RTL:alu_src1(E), + SRC2 = ?RTL:alu_src2(E), + %% C is the previous instruction + case CC of + #alu{} -> + exit({?MODULE,should_not_be_an_alu, + {"Why the hell do we still have an alu???",CC}}); + #xsi{} -> + exit({?MODULE,should_not_be_a_xsi, + {"Why the hell do we still have a xsi???",CC}}); + #pre_candidate{} -> + C = CC#pre_candidate.alu, + DST = ?RTL:alu_dst(C), + case DST =:= SRC1 orelse DST =:= SRC2 of + true -> + %% Get the definition of C, since C is PRE-candidate AND has + %% been processed before + DEF = CC#pre_candidate.def, + case DEF of + bottom -> + %% Def(E)=bottom, STOP + %% No update of the XsiGraph + {def_found,new_bottom()}; + _-> + %% Simply emend + F = emend(E,DST,DEF), + ?pp_debug("~nEmendation : E= ",[]),pp_expr(E),?pp_debug(" ==> E'= ",[]),pp_expr(F),?pp_debug("~n",[]), + check_one_operand(F,Rest,BlockLabel,Cfg,XsiKey,XsiGraph) + end; + false -> + case check_match(C,E) of + true -> %% It's a computation of E! + %% It should give DST and not Def + %% No update of the XsiGraph, cuz we use DST and not Def + %% The operand is therefore gonna be a real variable + {def_found,DST}; + _-> + %% Nothing to do with E + check_one_operand(E,Rest,BlockLabel,Cfg,XsiKey,XsiGraph) + end + end; + #move{} -> + %% It's a move, we emend E, and continue the definition search + DST = ?RTL:move_dst(CC), + case SRC1 =:= DST orelse SRC2 =:= DST of + true -> + SRC = ?RTL:move_src(CC), + F = emend(E,DST,SRC), + check_one_operand(F,Rest,BlockLabel,Cfg,XsiKey,XsiGraph); %% Continue the search + _ -> + check_one_operand(E,Rest,BlockLabel,Cfg,XsiKey,XsiGraph) %% Continue the search + end; + #xsi_link{} -> + Key = CC#xsi_link.num, + %% Is Key a family member of XsiDef ? + {_KK,Xsi} = ?GRAPH:vertex(XsiGraph,Key), + C = Xsi#xsi.inst, + case check_match(E,C) of + true -> %% There is a Xsi already with a computation of E! + %% fetch definition of C, and give it to E + %% Must update an edge in the XsiGraph, and here, we know it's a Temp + %% Note: this can create a loop (= a cycle of length 1) + ?pp_debug(" -- Found a cycle with match: Adding an edge t~w -> t~w",[XsiKey,Key]), + ?GRAPH:add_edge(XsiGraph,XsiKey,Key), + {def_found,Xsi#xsi.def}; + _ -> + case ?GRAPH:get_path(XsiGraph,Key,XsiKey) of + false -> + %% Is it a loop back to itself??? + case Key =:= XsiKey of + false -> + check_one_operand(E,Rest,BlockLabel,Cfg,XsiKey,XsiGraph); + _ -> + {expr_found,#eop{expr=E,var=?RTL:mk_new_var(),stopped_by=Key}} + end; + _ -> + %% Returning the expression instead of looping + %% And in case of no match + ExprOp = #eop{expr=E,var=?RTL:mk_new_var(),stopped_by=Key}, + {expr_found,ExprOp} + end + end; + #phi{} -> %% skip them + check_one_operand(E,Rest,BlockLabel,Cfg,XsiKey,XsiGraph); + _ -> + PreColouredTest = ?ARCH:is_precoloured(SRC1) orelse ?ARCH:is_precoloured(SRC2), + + %%RegisterTest = ?RTL:is_reg(?RTL:alu_dst(E)) orelse ?RTL:is_reg(SRC1) orelse ?RTL:is_reg(SRC2), + RegisterTest = ?RTL:is_reg(?RTL:alu_dst(E)), + case PreColouredTest orelse RegisterTest of + true -> + {def_found,new_bottom()}; + _-> + DC = ?RTL:defines(CC), + case lists:member(SRC1,DC) orelse lists:member(SRC2,DC) of + true -> + {def_found,new_bottom()}; + _ -> + %% Orthogonal to E, we continue the search + check_one_operand(E,Rest,BlockLabel,Cfg,XsiKey,XsiGraph) + end + end + end. + +eval_expr(E) -> + ?pp_debug("~n Evaluating the result of ~w~n", [E]), + Op1 = ?RTL:alu_src1(E), + Op2 = ?RTL:alu_src2(E), + true = ?RTL:is_imm(Op1), + Val1 = ?RTL:imm_value(Op1), + true = ?RTL:is_imm(Op2), + Val2 = ?RTL:imm_value(Op2), + {Result, _Sign, _Zero, _Overflow, _Carry} = ?ARCH:eval_alu(?RTL:alu_op(E), Val1, Val2), + ?pp_debug("~n Result is then ~w~n", [Result]), + ?RTL:mk_imm(Result). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%%%%%%%%%%%%%%%%%%%%%%%%% CREATTING CFGGRAPH %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +create_cfggraph([],_Cfg,CFGGraph,ToBeFactorizedAcc,MPAcc,LateEdges,_XsiGraph) -> + ?pp_debug("~n~n ############# PostProcessing ~n~w~n",[LateEdges]), + post_process(LateEdges,CFGGraph), + ?pp_debug("~n~n ############# Factorizing ~n~w~n",[ToBeFactorizedAcc]), + factorize(ToBeFactorizedAcc,CFGGraph), + MPAcc; +create_cfggraph([Label|Ls],Cfg,CFGGraph,ToBeFactorizedAcc,MPAcc,LateEdges,XsiGraph) -> + Preds = ?CFG:pred(Cfg, Label), + case Preds of + [] -> + exit({?MODULE,do_not_call_on_top,{"Why the hell do we call that function on the start label???",Label}}); + [P] -> + Code = ?BB:code(?CFG:bb(Cfg, Label)), + Defs = get_defs_in_non_merge_block(Code, []), + ?pp_debug("~nAdding a vertex for ~w", [Label]), + Succs = ?CFG:succ(Cfg, Label), + case Succs of + [] -> %% Exit point + ?GRAPH:add_vertex(CFGGraph, Label, #block{type = exit}), + NewToBeFactorizedAcc = ToBeFactorizedAcc; + _ -> %% Split point + ?GRAPH:add_vertex(CFGGraph,Label,#block{type=not_mp,attributes={P,Succs}}), + NewToBeFactorizedAcc = [Label|ToBeFactorizedAcc] + end, + ?pp_debug("~nAdding an edge ~w -> ~w (~w)",[P,Label,Defs]), + case ?GRAPH:add_edge(CFGGraph,P,Label,Defs) of + {error,Reason} -> + exit({?MODULE,forget_that_for_christs_sake_bingwen_please,{"Bad edge",Reason}}); + _ -> + ok + end, + create_cfggraph(Ls,Cfg,CFGGraph,NewToBeFactorizedAcc,MPAcc,LateEdges,XsiGraph); + _ -> %% Merge point + Code = ?BB:code(?CFG:bb(Cfg,Label)), + {Defs,Xsis,Maps,Uses} = get_info_in_merge_block(Code,XsiGraph,[],[],gb_trees:empty(),gb_trees:empty()), + Attributes = #mp{preds=Preds,xsis=Xsis,defs=Defs,maps=Maps,uses=Uses}, + MergeBlock = #block{type=mp,attributes=Attributes}, + ?pp_debug("~nAdding a vertex for ~w with Defs= ~w",[Label,Defs]), + ?GRAPH:add_vertex(CFGGraph,Label,MergeBlock), + %% Add edges + NewLateEdges = add_edges_for_mp(Preds,Label,LateEdges), + create_cfggraph(Ls,Cfg,CFGGraph,ToBeFactorizedAcc,[Label|MPAcc],NewLateEdges,XsiGraph) + end. + +get_defs_in_non_merge_block([], Acc) -> + ?SETS:from_list(Acc); +get_defs_in_non_merge_block([Inst|Rest], Acc) -> + case Inst of + #pre_candidate{} -> + Def = Inst#pre_candidate.def, + case Def of + #temp{} -> + %% {temp,Key,_Var} -> + %% get_defs_in_non_merge_block(Rest,[Key|Acc]); + get_defs_in_non_merge_block(Rest, [Def#temp.key|Acc]); + _-> %% Real variables or bottom + get_defs_in_non_merge_block(Rest, Acc) + end; + _ -> + get_defs_in_non_merge_block(Rest, Acc) + end. + +get_info_in_merge_block([],_XsiGraph,Defs,Xsis,Maps,Uses) -> + {?SETS:from_list(Defs),Xsis,Maps,Uses}; %% Xsis are in backward order +get_info_in_merge_block([Inst|Rest],XsiGraph,Defs,Xsis,Maps,Uses) -> + case Inst of + #pre_candidate{} -> + Def = Inst#pre_candidate.def, + case Def of + #temp{} -> + get_info_in_merge_block(Rest,XsiGraph,[Def#temp.key|Defs],Xsis,Maps,Uses); + _ -> + get_info_in_merge_block(Rest,XsiGraph,Defs,Xsis,Maps,Uses) + end; + #xsi_link{} -> + Key = Inst#xsi_link.num, + {_Key,Xsi} = ?GRAPH:vertex(XsiGraph,Key), + OpList = xsi_oplist(Xsi), + {NewMaps,NewUses} = add_map_and_uses(OpList,Key,Maps,Uses), + get_info_in_merge_block(Rest,XsiGraph,Defs,[Key|Xsis],NewMaps,NewUses); + _ -> + get_info_in_merge_block(Rest,XsiGraph,Defs,Xsis,Maps,Uses) + end. + +add_edges_for_mp([], _Label, LateEdges) -> + LateEdges; +add_edges_for_mp([P|Ps], Label, LateEdges) -> + add_edges_for_mp(Ps,Label,[{P,Label}|LateEdges]). + +%% Doesn't do anything so far +add_map_and_uses([], _Key, Maps, Uses) -> + {Maps,Uses}; +add_map_and_uses([XsiOp|Ops], Key, Maps, Uses) -> + case XsiOp#xsi_op.op of + #bottom{} -> + Set = case gb_trees:lookup(XsiOp,Maps) of + {value, V} -> + ?SETS:add_element(Key,V); + none -> + ?SETS:from_list([Key]) + end, + NewMaps = gb_trees:enter(XsiOp,Set,Maps), + NewUses = Uses; + #temp{} -> + Set = case gb_trees:lookup(XsiOp,Maps) of + {value, V} -> + ?SETS:add_element(Key,V); + none -> + ?SETS:from_list([Key]) + end, + NewMaps = gb_trees:enter(XsiOp,Set,Maps), + Pred = XsiOp#xsi_op.pred, + OOP = XsiOp#xsi_op.op, + SSet = case gb_trees:lookup(Pred,Uses) of + {value, VV} -> + ?SETS:add_element(OOP#temp.key,VV); + none -> + ?SETS:from_list([OOP#temp.key]) + end, + NewUses = gb_trees:enter(Pred,SSet,Uses); + #eop{} -> + Set = case gb_trees:lookup(XsiOp,Maps) of + {value, V} -> + ?SETS:add_element(Key,V); + none -> + ?SETS:from_list([Key]) + end, + NewMaps = gb_trees:enter(XsiOp,Set,Maps), + Pred = XsiOp#xsi_op.pred, + Op = XsiOp#xsi_op.op, + SSet = case gb_trees:lookup(Pred,Uses) of + {value, VV} -> + ?SETS:add_element(Op#eop.stopped_by,VV); + none -> + ?SETS:from_list([Op#eop.stopped_by]) + end, + NewUses = gb_trees:enter(Pred,SSet,Uses); + _-> + NewMaps = Maps, + NewUses = Uses + end, + add_map_and_uses(Ops, Key, NewMaps, NewUses). + +post_process([], _CFGGraph) -> ok; +post_process([E|Es], CFGGraph) -> + {Pred,Label} = E, + {_PP,Block} = ?GRAPH:vertex(CFGGraph,Label), + Att = Block#block.attributes, + Uses = Att#mp.uses, + SetToAdd = case gb_trees:lookup(Pred,Uses) of + {value, Set} -> + Set; + none -> + ?SETS:new() + end, + %% ?pp_debug("~nAdding an edge ~w -> ~w (~w)",[Pred,Label,SetToAdd]), + ?GRAPH:add_edge(CFGGraph, Pred, Label, SetToAdd), + post_process(Es, CFGGraph). + +factorize([], _CFGGraph) -> ok; +factorize([P|Ps], CFGGraph) -> + [OE|OEs] = ?GRAPH:out_edges(CFGGraph,P), + %% ?pp_debug("~nIn_degrees ~w : ~w",[P,?GRAPH:in_degree(CFGGraph,P)]), + [InEdge] = ?GRAPH:in_edges(CFGGraph,P), + {E,V1,V2,Label} = ?GRAPH:edge(CFGGraph,InEdge), + {_OEE,_OEV1,_OEV2,LOE} = ?GRAPH:edge(CFGGraph,OE), + List = shoot_info_upwards(OEs,LOE,CFGGraph), + NewLabel = ?SETS:union(Label,List), + ?GRAPH:add_edge(CFGGraph,E,V1,V2,NewLabel), + factorize(Ps, CFGGraph). + +shoot_info_upwards([], Acc, _CFGGraph) -> Acc; +shoot_info_upwards([E|Es], Acc, CFGGraph) -> + {_E,_V1,_V2,Set} = ?GRAPH:edge(CFGGraph,E), + NewAcc = ?SETS:intersection(Acc, Set), + case ?SETS:size(NewAcc) of + 0 -> NewAcc; + _ -> shoot_info_upwards(Es,NewAcc,CFGGraph) + end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% DOWNSAFETY %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +perform_downsafety([], _G, _XsiG) -> + ok; +perform_downsafety([MP|MPs], G, XG) -> + {V,Block} = ?GRAPH:vertex(G, MP), + NDS = ?SETS:new(), + Att = Block#block.attributes, + Maps = Att#mp.maps, + Defs = Att#mp.defs, + OutEdges = ?GRAPH:out_edges(G, MP), + %% ?pp_debug("~n Inspection Maps : ~w",[Maps]), + NewNDS = parse_keys(gb_trees:keys(Maps),Maps,OutEdges,G,Defs,NDS,XG), + NewAtt = Att#mp{ndsSet = NewNDS}, + ?GRAPH:add_vertex(G, V, Block#block{attributes = NewAtt}), + ?pp_debug("~n Not Downsafe at L~w: ~w", [V, NewNDS]), + %%io:format(standard_io,"~n Not Downsafe at L~w: ~w",[V,NewNDS]), + perform_downsafety(MPs, G, XG). + +parse_keys([], _Maps, _OutEdges, _G, _Defs, NDS, _XsiG) -> + NDS; +parse_keys([M|Ms], Maps, OutEdges, G, Defs, NDS, XsiG) -> + KillerSet = gb_trees:get(M,Maps), + %% ?pp_debug("~n Inspection ~w -> ~w",[M,KillerSet]), + TempSet = ?SETS:intersection(KillerSet,Defs), + NewNDS = case ?SETS:size(TempSet) of + 0 -> getNDS(M,KillerSet,NDS,OutEdges,G,XsiG); + _ -> + %% One Xsi which has M as operand has killed it + %% M is then Downsafe + %% and is not added to the NotDownsafeSet (NDS) + NDS + end, + parse_keys(Ms, Maps, OutEdges, G, Defs, NewNDS, XsiG). + +getNDS(_M, _KillerSet, NDS, [], _G, _XsiG) -> + NDS; +getNDS(M, KillerSet, NDS, [E|Es], G, XsiG) -> + {_EE,_V1,_V2,Label} = ?GRAPH:edge(G, E), + Set = ?SETS:intersection(KillerSet, Label), + %% ?pp_debug("~n ######## Intersection between KillerSet: ~w and Label: ~w",[KillerSet,Label]), + %% ?pp_debug("~n ######## ~w",[Set]), + case ?SETS:size(Set) of + 0 -> + %% M is not downsafe + ?SETS:add_element(M, NDS); + _ -> + %% Try the other edges + getNDS(M, KillerSet, NDS, Es, G, XsiG) + end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%% WILL BE AVAILABLE %%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +perform_will_be_available(XsiGraph,CFGGraph,Options) -> + Keys = ?GRAPH:vertices(XsiGraph), + ?pp_debug("~n############ Can Be Available ##########~n",[]), + ?option_time(perform_can_be_available(Keys,XsiGraph,CFGGraph),"RTL A-SSAPRE WillBeAvailable - Compute CanBeAvailable",Options), + ?pp_debug("~n############ Later ##########~n",[]), + ?option_time(perform_later(Keys,XsiGraph),"RTL A-SSAPRE WillBeAvailable - Compute Later",Options). + +perform_can_be_available([],_XsiGraph,_CFGGraph) -> ok; +perform_can_be_available([Key|Keys],XsiGraph,CFGGraph) -> + {V,Xsi} = ?GRAPH:vertex(XsiGraph,Key), + case Xsi#xsi.cba of + undefined -> + {_VV,Block} = ?GRAPH:vertex(CFGGraph,Xsi#xsi.label), + Att = Block#block.attributes, + NDS = Att#mp.ndsSet, + OpList = ?SETS:from_list(xsi_oplist(Xsi)), + Set = ?SETS:intersection(NDS,OpList), + case ?SETS:size(Set) of + 0 -> + ?GRAPH:add_vertex(XsiGraph, V, Xsi#xsi{cba = true}), + perform_can_be_available(Keys, XsiGraph, CFGGraph); + _ -> + LIST = [X || #temp{key=X} <- ?SETS:to_list(Set)], + case LIST of + [] -> + ?GRAPH:add_vertex(XsiGraph, V, Xsi#xsi{cba = false}), + ImmediateParents = ?GRAPH:in_neighbours(XsiGraph, Key), + propagate_cba(ImmediateParents,XsiGraph,Xsi#xsi.def,CFGGraph); + _ -> + ok + end, + perform_can_be_available(Keys, XsiGraph, CFGGraph) + end; + _ -> %% True or False => recurse + perform_can_be_available(Keys, XsiGraph, CFGGraph) + end. + +propagate_cba([],_XG,_Def,_CFGG) -> ok; +propagate_cba([IPX|IPXs],XsiGraph,XsiDef,CFGGraph) -> + {V,IPXsi} = ?GRAPH:vertex(XsiGraph,IPX), + {_VV,Block} = ?GRAPH:vertex(CFGGraph,IPXsi#xsi.label), + Att = Block#block.attributes, + NDS = Att#mp.ndsSet, + List = ?SETS:to_list(?SETS:intersection(NDS,?SETS:from_list(xsi_oplist(IPXsi)))), + case IPXsi#xsi.cba of + false -> ok; + _ -> + case lists:keymember(XsiDef, #xsi_op.op, List) of + true -> + ?GRAPH:add_vertex(XsiGraph, V, IPXsi#xsi{cba = false}), + ImmediateParents = ?GRAPH:in_neighbours(XsiGraph, IPX), + propagate_cba(ImmediateParents,XsiGraph,IPXsi#xsi.def,CFGGraph); + _ -> + ok + end + end, + propagate_cba(IPXs,XsiGraph,XsiDef,CFGGraph). + +perform_later([], _XsiGraph) -> ok; +perform_later([Key|Keys], XsiGraph) -> + {V, Xsi} = ?GRAPH:vertex(XsiGraph, Key), + %% ?pp_debug("~n DEBUG : inspecting later of ~w (~w)~n",[Key,Xsi#xsi.later]), + case Xsi#xsi.later of + undefined -> + OpList = xsi_oplist(Xsi), + case parse_ops(OpList,fangpi) of %% It means "fart" in chinese :D + has_temp -> + perform_later(Keys,XsiGraph); + has_real -> + case Xsi#xsi.cba of + true -> + ?GRAPH:add_vertex(XsiGraph,V,Xsi#xsi{later=false,wba=true}); + undefined -> + ?GRAPH:add_vertex(XsiGraph,V,Xsi#xsi{later=false,wba=true}); + _ -> + ?GRAPH:add_vertex(XsiGraph,V,Xsi#xsi{later=false,wba=false}) + end, + AllParents = digraph_utils:reaching([Key], XsiGraph), + ?pp_debug("~nPropagating to all parents of t~w: ~w",[Key,AllParents]), + propagate_later(AllParents,XsiGraph), + perform_later(Keys,XsiGraph); + _ -> %% Just contains bottoms and/or expressions + ?GRAPH:add_vertex(XsiGraph,V,Xsi#xsi{later=true}), + perform_later(Keys,XsiGraph) + end; + _ -> %% True or False => recurse + perform_later(Keys,XsiGraph) + end. + +propagate_later([], _XG) -> ok; +propagate_later([IPX|IPXs], XsiGraph) -> + {V,IPXsi} = ?GRAPH:vertex(XsiGraph,IPX), + case IPXsi#xsi.later of + false -> + ?pp_debug("~nThrough propagation, later of t~w is already reset",[IPX]), + propagate_later(IPXs,XsiGraph); + _ -> + ?pp_debug("~nThrough propagation, resetting later of t~w",[IPX]), + case IPXsi#xsi.cba of + true -> + ?GRAPH:add_vertex(XsiGraph,V,IPXsi#xsi{later=false,wba=true}); + undefined -> + ?GRAPH:add_vertex(XsiGraph,V,IPXsi#xsi{later=false,wba=true}); + _ -> + ?GRAPH:add_vertex(XsiGraph,V,IPXsi#xsi{later=false,wba=false}) + end, + propagate_later(IPXs,XsiGraph) + end. + +parse_ops([], Res) -> + Res; +parse_ops([Op|Ops], Res) -> + case Op#xsi_op.op of + #temp{} -> + NewRes = has_temp, + parse_ops(Ops,NewRes); + #bottom{} -> + parse_ops(Ops,Res); + #eop{} -> + parse_ops(Ops,Res); + _ -> + has_real + end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% CODE MOTION %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +perform_code_motion([], Cfg, _XsiG) -> + Cfg; +perform_code_motion([L|Labels], Cfg, XsiG) -> + Code=?BB:code(?CFG:bb(Cfg,L)), + ?pp_debug("~n################ Code Motion in L~w~n",[L]), + ?pp_debug("~nCode to move ~n",[]), + pp_instrs(Code,XsiG), + NewCfg = code_motion_in_block(L,Code,Cfg,XsiG,[],gb_trees:empty()), + ?pp_debug("~n################ Code Motion successful in L~w~n",[L]), + perform_code_motion(Labels,NewCfg,XsiG). + +code_motion_in_block(Label,[],Cfg,_XsiG,Visited,InsertionsAcc) -> + InsertionsAlong = gb_trees:keys(InsertionsAcc), + Code = lists:reverse(Visited), + NewBB = ?BB:mk_bb(Code), + Cfg2 = ?CFG:bb_add(Cfg,Label,NewBB), + %% Must come after the bb_add, since redirect will update the Phis too... + Cfg3 = make_insertions(Label,InsertionsAlong,InsertionsAcc,Cfg2), + %% ?pp_debug("~nChecking the Code at L~w:~n~p",[Label,?BB:code(?CFG:bb(Cfg3,Label))]), + Cfg3; +code_motion_in_block(L,[Inst|Insts],Cfg,XsiG,Visited,InsertionsAcc) -> + ?pp_debug("~nInspecting Inst : ~n",[]),pp_instr(Inst,XsiG), + case Inst of + #pre_candidate{} -> + Def = Inst#pre_candidate.def, + Alu = Inst#pre_candidate.alu, + case Def of + bottom -> + InstToAdd = Alu; + #temp{} -> + Key = Def#temp.key, + {_V,Xsi} = ?GRAPH:vertex(XsiG,Key), + case Xsi#xsi.wba of + true -> + %% Turn into a move + Dst = ?RTL:alu_dst(Alu), + Move = ?RTL:mk_move(Dst,Def#temp.var), + pp_instr(Inst#pre_candidate.alu,nil), ?pp_debug(" ==> ",[]), pp_instr(Move,nil), + %% Counting redundancies + redundancy_add(), + InstToAdd = Move; + _ -> + InstToAdd = Alu + end; + _ -> %% Def is a real variable + %% Turn into a move + Dst = ?RTL:alu_dst(Alu), + Move = ?RTL:mk_move(Dst,Def), + pp_instr(Alu,nil), ?pp_debug(" ==> ",[]), pp_instr(Move,nil), + %% Counting redundancies + redundancy_add(), + InstToAdd = Move + end, + code_motion_in_block(L,Insts,Cfg,XsiG,[InstToAdd|Visited],InsertionsAcc); + #xsi_link{} -> + Key = Inst#xsi_link.num, + {_V,Xsi} = ?GRAPH:vertex(XsiG,Key), + case Xsi#xsi.wba of + true -> + %% Xsi is a WBA, it might trigger insertions + OpList = xsi_oplist(Xsi), + ?pp_debug(" This Xsi is a 'Will be available'",[]), + %% Cleaning the instruction + Expr = prepare_inst(Xsi#xsi.inst), + {NewOpList,NewInsertionsAcc} = get_insertions(OpList,[],InsertionsAcc,Visited,Expr,XsiG), + %% Making Xsi a Phi with Oplist + PhiOpList = [{Pred,Var} || #xsi_op{pred=Pred,op=Var} <- NewOpList], + Def = Xsi#xsi.def, + Phi = ?RTL:phi_arglist_update(?RTL:mk_phi(Def#temp.var),PhiOpList), + ?pp_debug("~n Xsi is turned into Phi : ~w",[Phi]), + code_motion_in_block(L,Insts,Cfg,XsiG,[Phi|Visited],NewInsertionsAcc); + _ -> + ?pp_debug(" This Xsi is not a 'Will be available'",[]), + code_motion_in_block(L,Insts,Cfg,XsiG,Visited,InsertionsAcc) + end; +%% phi -> +%% code_motion_in_block(L,Insts,Cfg,XsiG,[Inst|Visited],InsertionsAcc); + _ -> + %% Other instructions.... Phis too + code_motion_in_block(L,Insts,Cfg,XsiG,[Inst|Visited],InsertionsAcc) + end. + +prepare_inst(Expr) -> + S1 = ?RTL:alu_src1(Expr), + S2 = ?RTL:alu_src2(Expr), + NewInst = case S1 of + #temp{} -> ?RTL:alu_src1_update(Expr,S1#temp.var); + _ -> Expr + end, + case S2 of + #temp{} -> ?RTL:alu_src2_update(NewInst,S2#temp.var); + _ -> NewInst + end. + +get_insertions([],OpAcc,InsertionsAcc,_Visited,_Expr,_XsiG) -> + {OpAcc,InsertionsAcc}; +get_insertions([XsiOp|Ops],OpAcc,InsertionsAcc,Visited,Expr,XsiG) -> + Pred = XsiOp#xsi_op.pred, + Op = XsiOp#xsi_op.op, + case Op of + #bottom{} -> + case gb_trees:lookup(Pred,InsertionsAcc) of + {value,Insertion} -> + From = Insertion#insertion.from, + case lists:keyfind(Op, 1, From) of + false -> + ?pp_debug("~nThere has been insertions along the edge L~w already, but not for that operand | Op=",[Pred]),pp_arg(Op), + Dst = Op#bottom.var, + Expr2 = ?RTL:alu_dst_update(Expr,Dst), + Inst = manufacture_computation(Pred,Expr2,Visited), + Code = Insertion#insertion.code, + NewInsertion = Insertion#insertion{from=[{Op,Dst}|From],code=[Inst|Code]}, + NewInsertionsAcc = gb_trees:update(Pred,NewInsertion,InsertionsAcc); + {_, Val} -> + ?pp_debug("~nThere has been insertions along the edge L~w already, and for that operand too | Op=",[Pred]),pp_arg(Op), + Dst = Val, + NewInsertionsAcc = InsertionsAcc + end; + none -> + ?pp_debug("~nThere has been no insertion along the edge L~w, (and not for that operand, of course)| Op=",[Pred]),pp_arg(Op), + Dst = Op#bottom.var, + Expr2 = ?RTL:alu_dst_update(Expr,Dst), + Inst = manufacture_computation(Pred,Expr2,Visited), + NewInsertion = #insertion{from=[{Op,Dst}],code=[Inst]}, + NewInsertionsAcc = gb_trees:insert(Pred,NewInsertion,InsertionsAcc) + end; + #const_expr{} -> + case gb_trees:lookup(Pred,InsertionsAcc) of + {value,Insertion} -> + From = Insertion#insertion.from, + case lists:keyfind(Op, 1, From) of + false -> + ?pp_debug("~nThere have been insertions along the edge L~w already, but not for that operand | Op=",[Pred]),pp_arg(Op), + Dst = Op#const_expr.var, + Val = Op#const_expr.value, + Inst = ?RTL:mk_move(Dst,Val), + Code = Insertion#insertion.code, + NewInsertion = Insertion#insertion{from=[{Op,Dst}|From],code=[Inst|Code]}, + NewInsertionsAcc = gb_trees:update(Pred,NewInsertion,InsertionsAcc); + {_, Val} -> + ?pp_debug("~nThere have been insertions along the edge L~w already, and for that operand too | Op=",[Pred]),pp_arg(Op), + Dst = Val, + NewInsertionsAcc = InsertionsAcc + end; + none -> + ?pp_debug("~nThere has been no insertion along the edge L~w, (and not for that operand, of course)| Op=",[Pred]),pp_arg(Op), + Dst = Op#const_expr.var, + Val = Op#const_expr.value, + Inst = ?RTL:mk_move(Dst,Val), + NewInsertion = #insertion{from=[{Op,Dst}],code=[Inst]}, + NewInsertionsAcc = gb_trees:insert(Pred,NewInsertion,InsertionsAcc) + end; + #eop{} -> + %% We treat expressions like bottoms + %% The value must be recomputed, and therefore not available... + case gb_trees:lookup(Pred,InsertionsAcc) of + {value,Insertion} -> + From = Insertion#insertion.from, + case lists:keyfind(Op, 1, From) of + false -> + ?pp_debug("~nThere has been insertions along the edge L~w already, but not for that operand | Op=",[Pred]),pp_arg(Op), + Dst = Op#eop.var, + Expr2 = ?RTL:alu_dst_update(Expr,Dst), + Inst = manufacture_computation(Pred,Expr2,Visited), + Code = Insertion#insertion.code, + NewInsertion = Insertion#insertion{from=[{Op,Dst}|From],code=[Inst|Code]}, + NewInsertionsAcc = gb_trees:update(Pred,NewInsertion,InsertionsAcc); + {_, Val} -> + ?pp_debug("~nThere has been insertions along the edge L~w already, and for that operand too | Op=",[Pred]),pp_arg(Op), + Dst = Val, + NewInsertionsAcc = InsertionsAcc + end; + none -> + ?pp_debug("~nThere has been no insertion along the edge L~w, (and not for that operand, of course)| Op=",[Pred]),pp_arg(Op), + Dst = Op#eop.var, + Expr2 = ?RTL:alu_dst_update(Expr,Dst), + Inst = manufacture_computation(Pred,Expr2,Visited), + NewInsertion = #insertion{from=[{Op,Dst}],code=[Inst]}, + NewInsertionsAcc = gb_trees:insert(Pred,NewInsertion,InsertionsAcc) + end; + #temp{} -> + case gb_trees:lookup(Pred,InsertionsAcc) of + {value,Insertion} -> + From = Insertion#insertion.from, + case lists:keyfind(Op, 1, From) of + false -> + ?pp_debug("~nThere has been insertions along the edge L~w already, but not for that operand | Op=",[Pred]),pp_arg(Op), + Key = Op#temp.key, + {_V,Xsi} = ?GRAPH:vertex(XsiG,Key), + case Xsi#xsi.wba of + true -> + ?pp_debug("~nBut the operand is a WBA Xsi: no need for insertion",[]), + Dst = Op#temp.var, + NewInsertionsAcc = InsertionsAcc; + _ -> + ?pp_debug("~nBut the operand is a NOT WBA Xsi: we must make an insertion",[]), + Dst = ?RTL:mk_new_var(), + Expr2 = ?RTL:alu_dst_update(Expr,Dst), + Inst = manufacture_computation(Pred,Expr2,Visited), + Code = Insertion#insertion.code, + NewInsertion = Insertion#insertion{from=[{Op,Dst}|From],code=[Inst|Code]}, + NewInsertionsAcc = gb_trees:update(Pred,NewInsertion,InsertionsAcc) + end; + {_, Val} -> + ?pp_debug("~nThere has been insertions along the edge L~w already, and for that operand too (Op=~w)",[Pred,Op]), + ?pp_debug("~nThis means, this temp is a WBA Xsi's definition",[]), + Dst = Val, + NewInsertionsAcc = InsertionsAcc + end; + none -> + ?pp_debug("~nThere has been no insertion along the edge L~w, (and not for that operand, of course | Op=",[Pred]),pp_arg(Op), + Key = Op#temp.key, + {_V,Xsi} = ?GRAPH:vertex(XsiG,Key), + case Xsi#xsi.wba of + true -> + ?pp_debug("~nBut the operand is a WBA Xsi: no need for insertion",[]), + Dst = Op#temp.var, + NewInsertionsAcc = InsertionsAcc; + _ -> + ?pp_debug("~nBut the operand is a NOT WBA Xsi: we must make an insertion",[]), + Dst = ?RTL:mk_new_var(), + Expr2 = ?RTL:alu_dst_update(Expr,Dst), + Inst = manufacture_computation(Pred,Expr2,Visited), + NewInsertion = #insertion{from=[{Op,Dst}],code=[Inst]}, + NewInsertionsAcc = gb_trees:insert(Pred,NewInsertion,InsertionsAcc) + end + end; + _ -> + ?pp_debug("~nThe operand (Op=",[]),pp_arg(Op),?pp_debug(") is a real variable, no need for insertion along L~w",[Pred]), + Dst = Op, + NewInsertionsAcc = InsertionsAcc + end, + NewXsiOp = XsiOp#xsi_op{op=Dst}, + get_insertions(Ops, [NewXsiOp|OpAcc], NewInsertionsAcc, Visited, Expr, XsiG). + +manufacture_computation(_Pred, Expr, []) -> + ?pp_debug("~n Manufactured computation : ~w", [Expr]), + Expr; +manufacture_computation(Pred, Expr, [I|Rest]) -> + %% ?pp_debug("~n Expr = ~w",[Expr]), + SRC1 = ?RTL:alu_src1(Expr), + SRC2 = ?RTL:alu_src2(Expr), + case I of + #xsi_link{} -> + exit({?MODULE,should_not_be_a_xsi_link,{"Why the hell do we still have a xsi link???",I}}); + #xsi{} -> + exit({?MODULE,should_not_be_a_xsi,{"Why the hell do we still have a xsi ???",I}}); + #phi{} -> + DST = ?RTL:phi_dst(I), + Arg = ?RTL:phi_arg(I,Pred), + NewInst = case DST =:= SRC1 of + true -> ?RTL:alu_src1_update(Expr,Arg); + false -> Expr + end, + NewExpr = case DST =:= SRC2 of + true -> ?RTL:alu_src2_update(NewInst,Arg); + false -> NewInst + end, + manufacture_computation(Pred,NewExpr,Rest) + end. + +make_insertions(_L, [], _ITree, Cfg) -> + Cfg; +make_insertions(L, [OldPred|Is], ITree, Cfg) -> + NewPred = ?RTL:label_name(?RTL:mk_new_label()), + I = gb_trees:get(OldPred, ITree), + CodeToInsert = lists:reverse([?RTL:mk_goto(L)|I#insertion.code]), + BBToInsert = ?BB:mk_bb(CodeToInsert), + NewCfg = ?CFG:bb_insert_between(Cfg, NewPred, BBToInsert, OldPred, L), + make_insertions(L, Is, ITree, NewCfg). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%% XSI INTERFACE %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +xsi_oplist(#xsi{opList=OpList}) -> + case OpList of undefined -> [] ; _ -> OpList end. +xsi_arg(Xsi, Pred) -> + case lists:keyfind(Pred, #xsi_op.pred, xsi_oplist(Xsi)) of + false -> + undetermined_operand; + R -> + R#xsi_op.op + end. +xsi_arg_update(Xsi, Pred, Op) -> + NewOpList = lists:keyreplace(Pred, #xsi_op.pred, xsi_oplist(Xsi), + #xsi_op{pred=Pred,op=Op}), + Xsi#xsi{opList=NewOpList}. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%% PRETTY-PRINTING %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +-ifndef(SSAPRE_DEBUG). + +%%pp_cfg(Cfg,_) -> ?CFG:pp(Cfg). +pp_cfg(_,_) -> ok. +pp_instr(_,_) -> ok. +pp_instrs(_,_) -> ok. +pp_expr(_) -> ok. +pp_xsi(_) -> ok. +pp_arg(_) -> ok. +pp_xsigraph(_) -> ok. +pp_cfggraph(_) -> ok. +%% pp_xsigraph(G) -> +%% Vertices = lists:sort(?GRAPH:vertices(G)), +%% io:format(standard_io, "Size of the Xsi Graph: ~w", [length(Vertices)]). +%% pp_cfggraph(G) -> +%% Vertices = lists:sort(?GRAPH:vertices(G)), +%% io:format(standard_io, "Size of the CFG Graph: ~w", [length(Vertices)]). + +-else. + +pp_cfg(Cfg, Graph) -> + Labels = ?CFG:preorder(Cfg), + pp_blocks(Labels, Cfg, Graph). + +pp_blocks([], _, _) -> + ok; +pp_blocks([L|Ls], Cfg, Graph) -> + Code = hipe_bb:code(?CFG:bb(Cfg,L)), + io:format(standard_io,"~n########## Label L~w~n", [L]), + pp_instrs(Code, Graph), + pp_blocks(Ls, Cfg, Graph). + +pp_instrs([], _) -> + ok; +pp_instrs([I|Is], Graph) -> + pp_instr(I, Graph), + pp_instrs(Is, Graph). + +pp_xsi_link(Key, Graph) -> + {_Key,Xsi} = ?GRAPH:vertex(Graph, Key), + pp_xsi(Xsi). + +pp_xsi(Xsi) -> + io:format(standard_io, " [L~w] ", [Xsi#xsi.label]), + io:format(standard_io, "[", []), pp_expr(Xsi#xsi.inst), + io:format(standard_io, "] Xsi(", []), pp_xsi_args(xsi_oplist(Xsi)), + io:format(standard_io, ") (", []), pp_xsi_def(Xsi#xsi.def), + io:format(standard_io, ") cba=~w, later=~w | wba=~w~n", [Xsi#xsi.cba,Xsi#xsi.later,Xsi#xsi.wba]). + +pp_instr(I, Graph) -> + case I of + #alu{} -> + io:format(standard_io, " ", []), + pp_arg(?RTL:alu_dst(I)), + io:format(standard_io, " <- ", []), + pp_expr(I), + io:format(standard_io, "~n", []); + _ -> + try ?RTL:pp_instr(standard_io, I) + catch _:_ -> + case I of + #pre_candidate{} -> + pp_pre(I); + #xsi{} -> + pp_xsi(I); + #xsi_link{} -> + pp_xsi_link(I#xsi_link.num, Graph); + _-> + io:format(standard_io,"*** ~w ***~n", [I]) + end + end + end. + +pp_pre(I) -> + A = I#pre_candidate.alu, + io:format(standard_io, " ", []), + pp_arg(?RTL:alu_dst(A)), + io:format(standard_io, " <- ", []),pp_expr(A), + io:format(standard_io, " [ ", []),pp_arg(I#pre_candidate.def), + %%io:format(standard_io, "~w", [I#pre_candidate.def]), + io:format(standard_io, " ]~n",[]). + +pp_expr(I) -> + pp_arg(?RTL:alu_dst(I)), + io:format(standard_io, " <- ", []), + pp_arg(?RTL:alu_src1(I)), + io:format(standard_io, " ~w ", [?RTL:alu_op(I)]), + pp_arg(?RTL:alu_src2(I)). + +pp_arg(Arg) -> + case Arg of + bottom -> + io:format(standard_io, "_|_", []); + #bottom{} -> + io:format(standard_io, "_|_:~w (", [Arg#bottom.key]),pp_arg(Arg#bottom.var),io:format(standard_io,")",[]); + #temp{} -> + pp_xsi_def(Arg); + #eop{} -> + io:format(standard_io,"#",[]),pp_expr(Arg#eop.expr),io:format(standard_io,"(",[]),pp_arg(Arg#eop.var),io:format(standard_io,")#",[]); + #const_expr{} -> + io:format(standard_io,"*",[]),pp_arg(Arg#const_expr.var),io:format(standard_io," -> ",[]),pp_arg(Arg#const_expr.value),io:format(standard_io,"*",[]); + undefined -> + io:format(standard_io, "...", []); %%"undefined", []); + _-> + case Arg of + #alu{} -> + pp_expr(Arg); + _-> + ?RTL:pp_arg(standard_io, Arg) + end + end. + +pp_args([]) -> + ok; +pp_args(undefined) -> + io:format(standard_io, "...,...,...", []); +pp_args([A]) -> + pp_arg(A); +pp_args([A|As]) -> + pp_arg(A), + io:format(standard_io, ", ", []), + pp_args(As). + +pp_xsi_args([]) -> ok; +pp_xsi_args([XsiOp]) -> + io:format(standard_io, "{~w| ", [XsiOp#xsi_op.pred]), + pp_arg(XsiOp#xsi_op.op), + io:format(standard_io, "}", []); +pp_xsi_args([XsiOp|Args]) -> + io:format(standard_io, "{~w| ", [XsiOp#xsi_op.pred]), + pp_arg(XsiOp#xsi_op.op), + io:format(standard_io, "}, ", []), + pp_xsi_args(Args); +pp_xsi_args(Args) -> + pp_args(Args). + +pp_xsi_def(Arg) -> + D = Arg#temp.key, + V = Arg#temp.var, + io:format(standard_io, "t~w (", [D]),pp_arg(V),io:format(standard_io,")",[]). + +pp_cfggraph(G) -> + Vertices = lists:sort(?GRAPH:vertices(G)), + io:format(standard_io, "Size of the CFG Graph: ~w ~n", [length(Vertices)]), + pp_cfgvertex(Vertices, G). + +pp_xsigraph(G) -> + Vertices = lists:sort(?GRAPH:vertices(G)), + io:format(standard_io, "Size of the Xsi Graph: ~w ~n", [length(Vertices)]), + pp_xsivertex(Vertices,G). + +pp_xsivertex([], _G) -> + ok; +pp_xsivertex([Key|Keys], G) -> + {V,Xsi} = ?GRAPH:vertex(G, Key), + OutNeighbours = ?GRAPH:out_neighbours(G, V), + ?pp_debug(" ~w -> ~w", [V,OutNeighbours]), pp_xsi(Xsi), + pp_xsivertex(Keys, G). + +pp_cfgvertex([], _G) -> + ok; +pp_cfgvertex([Key|Keys], G) -> + {V,Block} = ?GRAPH:vertex(G,Key), + case Block#block.type of + mp -> + ?pp_debug("~n Block ~w's attributes: ~n", [V]), + pp_attributes(Block), + ?pp_debug("~n Block ~w's edges: ~n", [V]), + pp_edges(G, ?GRAPH:in_edges(G,Key), ?GRAPH:out_edges(G,Key)); + _-> + ok + end, + pp_cfgvertex(Keys, G). + +pp_attributes(Block) -> + Att = Block#block.attributes, + case Att of + undefined -> + ok; + _ -> + ?pp_debug(" Maps: ~n",[]),pp_maps(gb_trees:keys(Att#mp.maps),Att#mp.maps), + ?pp_debug(" Uses: ~n",[]),pp_uses(gb_trees:keys(Att#mp.uses),Att#mp.uses), + ?pp_debug(" Defs: ~w~n",[Att#mp.defs]), + ?pp_debug(" Xsis: ~w~n",[Att#mp.xsis]), + ?pp_debug(" NDS : ",[]),pp_nds(?SETS:to_list(Att#mp.ndsSet)) + end. + +pp_maps([], _Maps) -> ok; +pp_maps([K|Ks], Maps) -> + ?pp_debug(" ",[]),pp_arg(K#xsi_op.op),?pp_debug("-> ~w~n",[?SETS:to_list(gb_trees:get(K,Maps))]), + pp_maps(Ks, Maps). + +pp_uses([], _Maps) -> ok; +pp_uses([K|Ks], Maps) -> + ?pp_debug(" ~w -> ~w~n",[K,?SETS:to_list(gb_trees:get(K,Maps))]), + pp_uses(Ks, Maps). + +pp_nds([]) -> ?pp_debug("~n",[]); +pp_nds(undefined) -> ?pp_debug("None",[]); +pp_nds([K]) -> + pp_arg(K#xsi_op.op), ?pp_debug("~n",[]); +pp_nds([K|Ks]) -> + pp_arg(K#xsi_op.op), ?pp_debug(", ",[]), + pp_nds(Ks). + +pp_edges(_G, [], []) -> ok; +pp_edges(G, [], [OUT|OUTs]) -> + {_E,V1,V2,Label} = ?GRAPH:edge(G,OUT), + ?pp_debug(" Out edge ~w -> ~w (~w)~n", [V1,V2,?SETS:to_list(Label)]), + pp_edges(G, [], OUTs); +pp_edges(G, [IN|INs], Outs) -> + {_E,V1,V2,Label} = ?GRAPH:edge(G,IN), + ?pp_debug(" In edge ~w -> ~w (~w)~n", [V1,V2,?SETS:to_list(Label)]), + pp_edges(G, INs, Outs). + +-endif. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% COUNTERS %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +init_counters() -> + put({ssapre_temp,temp_count}, 0), + put({ssapre_index,index_count}, 0). + +new_bottom() -> + IndxCountPair = {ssapre_index, index_count}, + V = get(IndxCountPair), + put(IndxCountPair, V+1), + #bottom{key = V, var = ?RTL:mk_new_var()}. + +new_temp() -> + TmpCountPair = {ssapre_temp, temp_count}, + V = get(TmpCountPair), + put(TmpCountPair, V+1), + #temp{key = V, var = ?RTL:mk_new_var()}. + +init_redundancy_count() -> + put({ssapre_redundancy,redundancy_count}, 0). + +redundancy_add() -> + RedCountPair = {ssapre_redundancy, redundancy_count}, + V = get(RedCountPair), + put(RedCountPair, V+1). + +-ifdef(SSAPRE_DEBUG). +get_redundancy_count() -> + get({ssapre_redundancy,redundancy_count}). +-endif. diff --git a/lib/hipe/rtl/hipe_rtl_symbolic.erl b/lib/hipe/rtl/hipe_rtl_symbolic.erl new file mode 100644 index 0000000000..bc8640dec9 --- /dev/null +++ b/lib/hipe/rtl/hipe_rtl_symbolic.erl @@ -0,0 +1,99 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2004-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%%------------------------------------------------------------------- +%% File : hipe_rtl_symbolic.erl +%% Author : Per Gustafsson +%% Description : Expansion of symbolic instructions. +%% +%% Created : 18 May 2004 by Per Gustafsson +%%------------------------------------------------------------------- + +-module(hipe_rtl_symbolic). + +-export([expand/1]). + +-include("hipe_rtl.hrl"). +-include("hipe_literals.hrl"). +-include("../icode/hipe_icode_primops.hrl"). + +expand(Cfg) -> + Linear = hipe_rtl_cfg:linearize(Cfg), + Code = hipe_rtl:rtl_code(Linear), + NonFlatCode = [expand_instr(Instr) || Instr <- Code], + NewCode = lists:flatten(NonFlatCode), + Linear1 = hipe_rtl:rtl_code_update(Linear, NewCode), + hipe_rtl_cfg:init(Linear1). + +expand_instr(Instr) -> + case Instr of + #fixnumop{} -> + expand_fixnumop(Instr); + #gctest{} -> + expand_gctest(Instr); + _ -> + Instr + end. + +expand_fixnumop(Instr) -> + case hipe_rtl:fixnumop_type(Instr) of + untag -> + Dst = hipe_rtl:fixnumop_dst(Instr), + Src = hipe_rtl:fixnumop_src(Instr), + hipe_tagscheme:realuntag_fixnum(Dst, Src); + tag -> + Dst = hipe_rtl:fixnumop_dst(Instr), + Src = hipe_rtl:fixnumop_src(Instr), + hipe_tagscheme:realtag_fixnum(Dst, Src) + end. + +expand_gctest(Instr) -> + HeapNeed = hipe_rtl:gctest_words(Instr), + {GetHPInsn, HP, _PutHPInsn} = hipe_rtl_arch:heap_pointer(), + {GetHLIMITInsn, H_LIMIT} = hipe_rtl_arch:heap_limit(), + ContLabel = hipe_rtl:mk_new_label(), + GCLabel = hipe_rtl:mk_new_label(), + ContLabelName = hipe_rtl:label_name(ContLabel), + GCLabelName = hipe_rtl:label_name(GCLabel), + Tmp = hipe_rtl:mk_new_reg(), % diff between two gc-unsafe pointers + StartCode = + [GetHPInsn, + GetHLIMITInsn, + hipe_rtl:mk_alu(Tmp, H_LIMIT, 'sub', HP)], + {SeparateCode, GCAmount, HPAmount} = + case hipe_rtl:is_reg(HeapNeed) of + true -> + GA = hipe_rtl:mk_new_reg_gcsafe(), + HA = hipe_rtl:mk_new_reg_gcsafe(), + {[hipe_rtl:mk_alu(HA, HeapNeed, sll, + hipe_rtl:mk_imm(hipe_rtl_arch:log2_word_size()))| + hipe_tagscheme:realtag_fixnum(GA, HeapNeed)], GA, HA}; + false -> + WordsNeeded = hipe_rtl:imm_value(HeapNeed), + GA = hipe_rtl:mk_imm(hipe_tagscheme:mk_fixnum(WordsNeeded)), + HA = hipe_rtl:mk_imm(WordsNeeded*hipe_rtl_arch:word_size()), + {[], GA, HA} + end, + EndCode = + [hipe_rtl:mk_branch(Tmp, 'lt', HPAmount, GCLabelName, ContLabelName, 0.01), + GCLabel, + hipe_rtl:mk_call([], 'gc_1', [GCAmount], ContLabelName, [], not_remote), + ContLabel], + StartCode ++ SeparateCode ++ EndCode. + diff --git a/lib/hipe/rtl/hipe_rtl_varmap.erl b/lib/hipe/rtl/hipe_rtl_varmap.erl new file mode 100644 index 0000000000..9bd5e88611 --- /dev/null +++ b/lib/hipe/rtl/hipe_rtl_varmap.erl @@ -0,0 +1,161 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2001-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Copyright (c) 2001 by Erik Johansson. All Rights Reserved +%% Time-stamp: <2008-04-20 14:55:35 richard> +%% ==================================================================== +%% Module : hipe_rtl_varmap +%% Purpose : +%% Notes : +%% History : * 2001-04-10 Erik Johansson (happi@it.uu.se): Created. +%% ==================================================================== +%% Exports : +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +-module(hipe_rtl_varmap). + +-export([init/1, + ivs2rvs/2, + icode_var2rtl_var/2, + icode_label2rtl_label/2]). + +%------------------------------------------------------------------------- + +-include("../main/hipe.hrl"). +-include("../icode/hipe_icode.hrl"). + +%------------------------------------------------------------------------- + +%% @spec init(IcodeRecord::#icode{}) -> {Args, VarMap} +%% +%% @doc Initializes gensym for RTL. + +-spec init(#icode{}) -> {[_], _}. % XXX: fix me please + +init(IcodeRecord) -> + hipe_gensym:init(rtl), + hipe_gensym:set_var(rtl, hipe_rtl_arch:first_virtual_reg()), + hipe_gensym:set_label(rtl, 0), + VarMap = new_var_map(), + {_Args, _VarMap1} = ivs2rvs(hipe_icode:icode_params(IcodeRecord), VarMap). + + +%%------------------------------------------------------------------------ +%% +%% Mapping of labels and variables from Icode to RTL. +%% +%%------------------------------------------------------------------------ + + +%% @spec icode_label2rtl_label(Icode_Label::term(), LabelMap::term()) -> +%% {RTL_Label, NewLabelMap} +%% +%% @doc Converts an Icode label to an RTL label. + +icode_label2rtl_label(LabelName, Map) -> + case lookup(LabelName, Map) of + {value, NewLabel} -> + {NewLabel, Map}; + none -> + NewLabel = hipe_rtl:mk_new_label(), + {NewLabel, insert(LabelName, NewLabel, Map)} + end. + + +%% @spec ivs2rvs(Icode_Vars::[term()], VarMap::term()) -> {[RTL_Var],NewVarMap} +%% +%% @doc Converts a list of Icode variables to a list of RTL variables. + +ivs2rvs([], VarMap) -> + {[], VarMap}; +ivs2rvs([V|Vs], VarMap) -> + {NewV, VarMap0} = icode_var2rtl_var(V, VarMap), + {NewVs, VarMap1} = ivs2rvs(Vs, VarMap0), + {[NewV|NewVs], VarMap1}. + + +%% @spec icode_var2rtl_var(Icode_Var::term(), VarMap::term()) -> +%% {RTL_Var, NewVarMap} +%% +%% @doc Converts an Icode variable to an RTL variable. + +icode_var2rtl_var(Var, Map) -> + Value = lookup(Var, Map), + case Value of + none -> + case type_of_var(Var) of + fvar -> + NewVar = hipe_rtl:mk_new_fpreg(), + {NewVar, insert(Var, NewVar, Map)}; + var -> + NewVar = hipe_rtl:mk_new_var(), + {NewVar, insert(Var, NewVar, Map)}; + {reg, IsGcSafe} -> + NewVar = + case IsGcSafe of + %% true -> hipe_rtl:mk_new_reg_gcsafe(); + false -> hipe_rtl:mk_new_reg() + end, + {NewVar, insert(Var, NewVar, Map)} + end; + {value, NewVar} -> + {NewVar, Map} + end. + +%% +%% Simple type test +%% + +type_of_var(X) -> + case hipe_icode:is_fvar(X) of + true -> + fvar; + false -> + case hipe_icode:is_var(X) of + true -> + var; + false -> + case hipe_icode:is_reg(X) of + true -> + {reg, hipe_icode:reg_is_gcsafe(X)}; + false -> + %% Sanity check + case hipe_icode:is_const(X) of + true -> const; + false -> + exit({"Unknown Icode variable", X}) + end + end + end + end. + +%% +%% Helping utilities +%% + +new_var_map() -> + gb_trees:empty(). + +lookup(V, Map) -> + gb_trees:lookup(V, Map). + +insert(Key, Val, Map) -> + gb_trees:insert(Key, Val, Map). diff --git a/lib/hipe/rtl/hipe_tagscheme.erl b/lib/hipe/rtl/hipe_tagscheme.erl new file mode 100644 index 0000000000..dc44b803a1 --- /dev/null +++ b/lib/hipe/rtl/hipe_tagscheme.erl @@ -0,0 +1,1209 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2001-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%%======================================================================== +%% +%% Filename : hipe_tagscheme.erl +%% Note : This is specific to Erlang 5.* (i.e. R9 to R13). +%% +%% Modifications: +%% 020904: Happi - added support for external pids and ports. +%% +%%======================================================================== +%% $Id$ +%%======================================================================== + +-module(hipe_tagscheme). + +-export([mk_nil/0, mk_fixnum/1, mk_arityval/1, mk_non_value/0]). +-export([is_fixnum/1]). +-export([tag_tuple/2, tag_cons/2]). +-export([test_is_boxed/4, get_header/2]). +-export([test_nil/4, test_cons/4, test_flonum/4, test_fixnum/4, + test_tuple/4, test_atom/4, test_bignum/4, test_pos_bignum/4, + test_any_pid/4, test_any_port/4, + test_ref/4, test_fun/4, test_fun2/5, test_matchstate/4, + test_binary/4, test_bitstr/4, test_list/4, + test_integer/4, test_number/4, test_constant/4, test_tuple_N/5]). +-export([realtag_fixnum/2, tag_fixnum/2, realuntag_fixnum/2, untag_fixnum/2]). +-export([test_two_fixnums/3, test_fixnums/4, unsafe_fixnum_add/3, + unsafe_fixnum_sub/3, + fixnum_gt/5, fixnum_lt/5, fixnum_ge/5, fixnum_le/5, fixnum_val/1, + fixnum_mul/4, + fixnum_addsub/5, fixnum_andorxor/4, fixnum_not/2, + fixnum_bsr/3, fixnum_bsl/3]). +-export([unsafe_car/2, unsafe_cdr/2, + unsafe_constant_element/3, unsafe_update_element/3, element/6]). +-export([unsafe_closure_element/3]). +-export([mk_fun_header/0, tag_fun/2]). +-export([unsafe_untag_float/2, unsafe_tag_float/2]). +-export([mk_sub_binary/6,mk_sub_binary/7]). +-export([unsafe_mk_big/3, unsafe_load_float/3]). +-export([bignum_sizeneed/1,bignum_sizeneed_code/2, get_one_word_pos_bignum/3]). +-export([test_subbinary/3, test_heap_binary/3]). +-export([create_heap_binary/3, create_refc_binary/3, create_refc_binary/4]). +-export([create_matchstate/6, convert_matchstate/1, compare_matchstate/4]). +-export([get_field_from_term/3, get_field_from_pointer/3, + set_field_from_term/3, set_field_from_pointer/3, + extract_matchbuffer/2, extract_binary_bytes/2]). + +-include("hipe_rtl.hrl"). +-include("hipe_literals.hrl"). + +-ifdef(EFT_NATIVE_ADDRESS). +-export([if_fun_get_arity_and_address/5]). +-endif. + +-undef(TAG_PRIMARY_BOXED). +-undef(TAG_IMMED2_MASK). +-undef(TAG_IMMED2_CATCH). +-undef(TAG_IMMED2_SIZE). + +%%------------------------------------------------------------------------ + +-define(TAG_PRIMARY_SIZE, 2). +-define(TAG_PRIMARY_MASK, 16#3). +-define(TAG_PRIMARY_HEADER, 16#0). +-define(TAG_PRIMARY_LIST, 16#1). +-define(TAG_PRIMARY_BOXED, 16#2). +-define(TAG_PRIMARY_IMMED1, 16#3). + +-define(TAG_IMMED1_SIZE, 4). +-define(TAG_IMMED1_MASK, 16#F). +-define(TAG_IMMED1_PID, ((16#0 bsl ?TAG_PRIMARY_SIZE) bor ?TAG_PRIMARY_IMMED1)). +-define(TAG_IMMED1_PORT, ((16#1 bsl ?TAG_PRIMARY_SIZE) bor ?TAG_PRIMARY_IMMED1)). +-define(TAG_IMMED1_IMMED2,((16#2 bsl ?TAG_PRIMARY_SIZE) bor ?TAG_PRIMARY_IMMED1)). +-define(TAG_IMMED1_SMALL, ((16#3 bsl ?TAG_PRIMARY_SIZE) bor ?TAG_PRIMARY_IMMED1)). + +-define(TAG_IMMED2_SIZE, 6). +-define(TAG_IMMED2_MASK, 16#3F). +-define(TAG_IMMED2_ATOM, ((16#0 bsl ?TAG_IMMED1_SIZE) bor ?TAG_IMMED1_IMMED2)). +-define(TAG_IMMED2_CATCH, ((16#1 bsl ?TAG_IMMED1_SIZE) bor ?TAG_IMMED1_IMMED2)). +-define(TAG_IMMED2_NIL, ((16#3 bsl ?TAG_IMMED1_SIZE) bor ?TAG_IMMED1_IMMED2)). + +-define(TAG_HEADER_ARITYVAL,((16#0 bsl ?TAG_PRIMARY_SIZE) bor ?TAG_PRIMARY_HEADER)). +-define(TAG_HEADER_BIN_MATCHSTATE, ((16#1 bsl ?TAG_PRIMARY_SIZE) bor ?TAG_PRIMARY_HEADER)). +-define(TAG_HEADER_POS_BIG, ((16#2 bsl ?TAG_PRIMARY_SIZE) bor ?TAG_PRIMARY_HEADER)). +-define(TAG_HEADER_NEG_BIG, ((16#3 bsl ?TAG_PRIMARY_SIZE) bor ?TAG_PRIMARY_HEADER)). +-define(BIG_SIGN_BIT, (16#1 bsl ?TAG_PRIMARY_SIZE)). +-define(TAG_HEADER_REF, ((16#4 bsl ?TAG_PRIMARY_SIZE) bor ?TAG_PRIMARY_HEADER)). +-define(TAG_HEADER_FUN, ((16#5 bsl ?TAG_PRIMARY_SIZE) bor ?TAG_PRIMARY_HEADER)). +-define(TAG_HEADER_FLOAT, ((16#6 bsl ?TAG_PRIMARY_SIZE) bor ?TAG_PRIMARY_HEADER)). +-define(TAG_HEADER_EXPORT, ((16#7 bsl ?TAG_PRIMARY_SIZE) bor ?TAG_PRIMARY_HEADER)). +-define(BINARY_XXX_MASK, (16#3 bsl ?TAG_PRIMARY_SIZE)). +-define(TAG_HEADER_REFC_BIN,((16#8 bsl ?TAG_PRIMARY_SIZE) bor ?TAG_PRIMARY_HEADER)). +-define(TAG_HEADER_HEAP_BIN,((16#9 bsl ?TAG_PRIMARY_SIZE) bor ?TAG_PRIMARY_HEADER)). +-define(TAG_HEADER_SUB_BIN, ((16#A bsl ?TAG_PRIMARY_SIZE) bor ?TAG_PRIMARY_HEADER)). +-define(TAG_HEADER_EXTERNAL_PID, ((16#C bsl ?TAG_PRIMARY_SIZE) bor ?TAG_PRIMARY_HEADER)). +-define(TAG_HEADER_EXTERNAL_PORT,((16#D bsl ?TAG_PRIMARY_SIZE) bor ?TAG_PRIMARY_HEADER)). +-define(TAG_HEADER_EXTERNAL_REF, ((16#E bsl ?TAG_PRIMARY_SIZE) bor ?TAG_PRIMARY_HEADER)). + +-define(TAG_HEADER_MASK, 16#3F). +-define(HEADER_ARITY_OFFS, 6). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +mk_header(SZ,TAG) -> (SZ bsl ?HEADER_ARITY_OFFS) + TAG. +mk_arityval(SZ) -> mk_header(SZ, ?TAG_HEADER_ARITYVAL). + +size_from_header(Sz, Header) -> + [hipe_rtl:mk_alu(Sz, Header, 'srl', hipe_rtl:mk_imm(?HEADER_ARITY_OFFS))]. + +mk_var_header(Header, Size, Tag) -> + Tmp = hipe_rtl:mk_new_reg_gcsafe(), + [hipe_rtl:mk_alu(Tmp, Size, sll, hipe_rtl:mk_imm(?HEADER_ARITY_OFFS)), + hipe_rtl:mk_alu(Header, Tmp, 'add', hipe_rtl:mk_imm(Tag))]. + +mk_fixnum(X) -> (X bsl ?TAG_IMMED1_SIZE) + ?TAG_IMMED1_SMALL. + +-define(NIL, ((-1 bsl ?TAG_IMMED2_SIZE) bor ?TAG_IMMED2_NIL)). +mk_nil() -> ?NIL. +%% mk_atom(X) -> (X bsl ?TAG_IMMED2_SIZE) + ?TAG_IMMED2_ATOM. +mk_non_value() -> ?THE_NON_VALUE. + +-spec is_fixnum(integer()) -> boolean(). +is_fixnum(N) when is_integer(N) -> + Bits = ?bytes_to_bits(hipe_rtl_arch:word_size()) - ?TAG_IMMED1_SIZE, + (N =< ((1 bsl (Bits - 1)) - 1)) and (N >= -(1 bsl (Bits - 1))). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +-define(HEADER_EXPORT, mk_header(1, ?TAG_HEADER_EXPORT)). +-define(HEADER_FUN, mk_header(?ERL_FUN_SIZE-2, ?TAG_HEADER_FUN)). +-define(HEADER_PROC_BIN, mk_header(?PROC_BIN_WORDSIZE-1, ?TAG_HEADER_REFC_BIN)). +-define(HEADER_SUB_BIN, mk_header(?SUB_BIN_WORDSIZE-2, ?TAG_HEADER_SUB_BIN)). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +tag_boxed(Res, X) -> + hipe_rtl:mk_alu(Res, X, 'add', hipe_rtl:mk_imm(?TAG_PRIMARY_BOXED)). + +%% tag_bignum(Res, X) -> tag_boxed(Res, X). +tag_flonum(Res, X) -> tag_boxed(Res, X). +tag_tuple(Res, X) -> tag_boxed(Res, X). + +tag_cons(Res, X) -> + hipe_rtl:mk_alu(Res, X, 'add', hipe_rtl:mk_imm(?TAG_PRIMARY_LIST)). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%% Operations to test if an object has a known type T. + +test_nil(X, TrueLab, FalseLab, Pred) -> + hipe_rtl:mk_branch(X, eq, hipe_rtl:mk_imm(?NIL), TrueLab, FalseLab, Pred). + +test_cons(X, TrueLab, FalseLab, Pred) -> + Tmp = hipe_rtl:mk_new_reg_gcsafe(), + Mask = hipe_rtl:mk_imm(?TAG_PRIMARY_MASK - ?TAG_PRIMARY_LIST), + hipe_rtl:mk_alub(Tmp, X, 'and', Mask, 'eq', TrueLab, FalseLab, Pred). + +test_is_boxed(X, TrueLab, FalseLab, Pred) -> + Tmp = hipe_rtl:mk_new_reg_gcsafe(), + Mask = hipe_rtl:mk_imm(?TAG_PRIMARY_MASK - ?TAG_PRIMARY_BOXED), + hipe_rtl:mk_alub(Tmp, X, 'and', Mask, 'eq', TrueLab, FalseLab, Pred). + +get_header(Res, X) -> + hipe_rtl:mk_load(Res, X, hipe_rtl:mk_imm(-(?TAG_PRIMARY_BOXED))). + +mask_and_compare(X, Mask, Value, TrueLab, FalseLab, Pred) -> + Tmp = hipe_rtl:mk_new_reg_gcsafe(), + [hipe_rtl:mk_alu(Tmp, X, 'and', hipe_rtl:mk_imm(Mask)), + hipe_rtl:mk_branch(Tmp, 'eq', hipe_rtl:mk_imm(Value), TrueLab, FalseLab, Pred)]. + +test_immed1(X, Value, TrueLab, FalseLab, Pred) -> + mask_and_compare(X, ?TAG_IMMED1_MASK, Value, TrueLab, FalseLab, Pred). + +test_internal_pid(X, TrueLab, FalseLab, Pred) -> + test_immed1(X, ?TAG_IMMED1_PID, TrueLab, FalseLab, Pred). + +test_any_pid(X, TrueLab, FalseLab, Pred) -> + NotInternalPidLab = hipe_rtl:mk_new_label(), + [test_internal_pid(X, TrueLab, hipe_rtl:label_name(NotInternalPidLab), Pred), + NotInternalPidLab, + test_external_pid(X, TrueLab, FalseLab, Pred)]. + +test_external_pid(X, TrueLab, FalseLab, Pred) -> + Tmp = hipe_rtl:mk_new_reg_gcsafe(), + HalfTrueLab = hipe_rtl:mk_new_label(), + ExternalPidMask = ?TAG_HEADER_MASK, + [test_is_boxed(X, hipe_rtl:label_name(HalfTrueLab), FalseLab, Pred), + HalfTrueLab, + get_header(Tmp, X), + mask_and_compare(Tmp, ExternalPidMask, ?TAG_HEADER_EXTERNAL_PID, + TrueLab, FalseLab, Pred)]. + +test_internal_port(X, TrueLab, FalseLab, Pred) -> + test_immed1(X, ?TAG_IMMED1_PORT, TrueLab, FalseLab, Pred). + +test_any_port(X, TrueLab, FalseLab, Pred) -> + NotInternalPortLab = hipe_rtl:mk_new_label(), + [test_internal_port(X, TrueLab, hipe_rtl:label_name(NotInternalPortLab), Pred), + NotInternalPortLab, + test_external_port(X, TrueLab, FalseLab, Pred)]. + +test_external_port(X, TrueLab, FalseLab, Pred) -> + Tmp = hipe_rtl:mk_new_reg_gcsafe(), + HalfTrueLab = hipe_rtl:mk_new_label(), + ExternalPortMask = ?TAG_HEADER_MASK, + [test_is_boxed(X, hipe_rtl:label_name(HalfTrueLab), FalseLab, Pred), + HalfTrueLab, + get_header(Tmp, X), + mask_and_compare(Tmp, ExternalPortMask, ?TAG_HEADER_EXTERNAL_PORT, + TrueLab, FalseLab, Pred)]. + +test_fixnum(X, TrueLab, FalseLab, Pred) -> + test_immed1(X, ?TAG_IMMED1_SMALL, TrueLab, FalseLab, Pred). + +test_atom(X, TrueLab, FalseLab, Pred) -> + mask_and_compare(X, ?TAG_IMMED2_MASK, ?TAG_IMMED2_ATOM, + TrueLab, FalseLab, Pred). + +test_tuple(X, TrueLab, FalseLab, Pred) -> + Tmp = hipe_rtl:mk_new_reg_gcsafe(), + Tmp2 = hipe_rtl:mk_new_reg_gcsafe(), + HalfTrueLab = hipe_rtl:mk_new_label(), + [test_is_boxed(X, hipe_rtl:label_name(HalfTrueLab), FalseLab, Pred), + HalfTrueLab, + get_header(Tmp, X), + hipe_rtl:mk_alub(Tmp2, Tmp, 'and', hipe_rtl:mk_imm(?TAG_HEADER_MASK), 'eq', + TrueLab, FalseLab, Pred)]. + +test_tuple_N(X, N, TrueLab, FalseLab, Pred) -> + Tmp = hipe_rtl:mk_new_reg_gcsafe(), + HalfTrueLab = hipe_rtl:mk_new_label(), + [test_is_boxed(X, hipe_rtl:label_name(HalfTrueLab), FalseLab, Pred), + HalfTrueLab, + get_header(Tmp, X), + hipe_rtl:mk_branch(Tmp, 'eq', hipe_rtl:mk_imm(mk_arityval(N)), + TrueLab, FalseLab, Pred)]. + +test_ref(X, TrueLab, FalseLab, Pred) -> + Hdr = hipe_rtl:mk_new_reg_gcsafe(), + Tag = hipe_rtl:mk_new_reg_gcsafe(), + HalfTrueLab = hipe_rtl:mk_new_label(), + TwoThirdsTrueLab = hipe_rtl:mk_new_label(), + [test_is_boxed(X, hipe_rtl:label_name(HalfTrueLab), FalseLab, Pred), + HalfTrueLab, + get_header(Hdr, X), + hipe_rtl:mk_alu(Tag, Hdr, 'and', hipe_rtl:mk_imm(?TAG_HEADER_MASK)), + hipe_rtl:mk_branch(Tag, 'eq', hipe_rtl:mk_imm(?TAG_HEADER_REF), + TrueLab, hipe_rtl:label_name(TwoThirdsTrueLab), Pred), + TwoThirdsTrueLab, + hipe_rtl:mk_branch(Tag, 'eq', hipe_rtl:mk_imm(?TAG_HEADER_EXTERNAL_REF), + TrueLab, FalseLab, Pred) + ]. + +-ifdef(EFT_NATIVE_ADDRESS). +test_closure(X, TrueLab, FalseLab, Pred) -> + Tmp = hipe_rtl:mk_new_reg_gcsafe(), + HalfTrueLab = hipe_rtl:mk_new_label(), + [test_is_boxed(X, hipe_rtl:label_name(HalfTrueLab), FalseLab, Pred), + HalfTrueLab, + get_header(Tmp, X), + mask_and_compare(Tmp, ?TAG_HEADER_MASK, ?TAG_HEADER_FUN, + TrueLab, FalseLab, Pred)]. +-endif. + +test_fun(X, TrueLab, FalseLab, Pred) -> + Hdr = hipe_rtl:mk_new_reg_gcsafe(), + Tag = hipe_rtl:mk_new_reg_gcsafe(), + HalfTrueLab = hipe_rtl:mk_new_label(), + TwoThirdsTrueLab = hipe_rtl:mk_new_label(), + [test_is_boxed(X, hipe_rtl:label_name(HalfTrueLab), FalseLab, Pred), + HalfTrueLab, + get_header(Hdr, X), + hipe_rtl:mk_alu(Tag, Hdr, 'and', hipe_rtl:mk_imm(?TAG_HEADER_MASK)), + hipe_rtl:mk_branch(Tag, 'eq', hipe_rtl:mk_imm(?TAG_HEADER_FUN), + TrueLab, hipe_rtl:label_name(TwoThirdsTrueLab), Pred), + TwoThirdsTrueLab, + hipe_rtl:mk_branch(Tag, 'eq', hipe_rtl:mk_imm(?TAG_HEADER_EXPORT), + TrueLab, FalseLab, Pred)]. + +test_fun2(X, Arity, TrueLab, FalseLab, Pred) -> + Tmp = hipe_rtl:mk_new_reg_gcsafe(), + TFalse = hipe_rtl:mk_new_reg_gcsafe(), + HalfTrueLab = hipe_rtl:mk_new_label(), + [hipe_rtl:mk_call([Tmp], {erlang,is_function,2}, [X,Arity], + hipe_rtl:label_name(HalfTrueLab), FalseLab, 'not_remote'), + HalfTrueLab, + hipe_rtl:mk_load_atom(TFalse, 'false'), + hipe_rtl:mk_branch(Tmp, 'ne', TFalse, TrueLab, FalseLab, Pred)]. + +flonum_header() -> + mk_header(8 div hipe_rtl_arch:word_size(), ?TAG_HEADER_FLOAT). + +test_flonum(X, TrueLab, FalseLab, Pred) -> + HeaderFlonum = flonum_header(), + Tmp = hipe_rtl:mk_new_reg_gcsafe(), + HalfTrueLab = hipe_rtl:mk_new_label(), + [test_is_boxed(X, hipe_rtl:label_name(HalfTrueLab), FalseLab, Pred), + HalfTrueLab, + get_header(Tmp, X), + hipe_rtl:mk_branch(Tmp, 'eq', hipe_rtl:mk_imm(HeaderFlonum), + TrueLab, FalseLab, Pred)]. + +test_bignum(X, TrueLab, FalseLab, Pred) -> + Tmp = hipe_rtl:mk_new_reg_gcsafe(), + HalfTrueLab = hipe_rtl:mk_new_label(), + BigMask = ?TAG_HEADER_MASK - ?BIG_SIGN_BIT, + [test_is_boxed(X, hipe_rtl:label_name(HalfTrueLab), FalseLab, Pred), + HalfTrueLab, + get_header(Tmp, X), + mask_and_compare(Tmp, BigMask, ?TAG_HEADER_POS_BIG, + TrueLab, FalseLab, Pred)]. + +test_pos_bignum(X, TrueLab, FalseLab, Pred) -> + Tmp = hipe_rtl:mk_new_reg_gcsafe(), + HalfTrueLab = hipe_rtl:mk_new_label(), + BigMask = ?TAG_HEADER_MASK, + [test_is_boxed(X, hipe_rtl:label_name(HalfTrueLab), FalseLab, Pred), + HalfTrueLab, + get_header(Tmp, X), + mask_and_compare(Tmp, BigMask, ?TAG_HEADER_POS_BIG, + TrueLab, FalseLab, Pred)]. + +test_matchstate(X, TrueLab, FalseLab, Pred) -> + Tmp = hipe_rtl:mk_new_reg_gcsafe(), + HalfTrueLab = hipe_rtl:mk_new_label(), + [test_is_boxed(X, hipe_rtl:label_name(HalfTrueLab), FalseLab, Pred), + HalfTrueLab, + get_header(Tmp, X), + mask_and_compare(Tmp, ?TAG_HEADER_MASK, ?TAG_HEADER_BIN_MATCHSTATE, + TrueLab, FalseLab, Pred)]. + +test_bitstr(X, TrueLab, FalseLab, Pred) -> + Tmp = hipe_rtl:mk_new_reg_gcsafe(), + HalfTrueLab = hipe_rtl:mk_new_label(), + Mask = ?TAG_HEADER_MASK - ?BINARY_XXX_MASK, + [test_is_boxed(X, hipe_rtl:label_name(HalfTrueLab), FalseLab, Pred), + HalfTrueLab, + get_header(Tmp, X), + mask_and_compare(Tmp, Mask, ?TAG_HEADER_REFC_BIN, TrueLab, FalseLab, Pred)]. + +test_binary(X, TrueLab, FalseLab, Pred) -> + Tmp1 = hipe_rtl:mk_new_reg_gcsafe(), + Tmp2 = hipe_rtl:mk_new_reg_gcsafe(), + IsBoxedLab = hipe_rtl:mk_new_label(), + IsBitStrLab = hipe_rtl:mk_new_label(), + IsSubBinLab = hipe_rtl:mk_new_label(), + Mask = ?TAG_HEADER_MASK - ?BINARY_XXX_MASK, + [test_is_boxed(X, hipe_rtl:label_name(IsBoxedLab), FalseLab, Pred), + IsBoxedLab, + get_header(Tmp1, X), + mask_and_compare(Tmp1, Mask, ?TAG_HEADER_REFC_BIN, + hipe_rtl:label_name(IsBitStrLab), FalseLab, Pred), + IsBitStrLab, + mask_and_compare(Tmp1, ?TAG_HEADER_MASK, ?TAG_HEADER_SUB_BIN, + hipe_rtl:label_name(IsSubBinLab), TrueLab, 0.5), + IsSubBinLab, + get_field_from_term({sub_binary, bitsize}, X, Tmp2), + hipe_rtl:mk_branch(Tmp2, eq, hipe_rtl:mk_imm(0), TrueLab, FalseLab, Pred)]. + +test_list(X, TrueLab, FalseLab, Pred) -> + Lab = hipe_rtl:mk_new_label(), + [test_cons(X, TrueLab, hipe_rtl:label_name(Lab), 0.5), + Lab, + test_nil(X, TrueLab, FalseLab, Pred)]. + +test_integer(X, TrueLab, FalseLab, Pred) -> + Lab = hipe_rtl:mk_new_label(), + [test_fixnum(X, TrueLab, hipe_rtl:label_name(Lab), 0.5), + Lab, + test_bignum(X, TrueLab, FalseLab, Pred)]. + +test_number(X, TrueLab, FalseLab, Pred) -> + Lab1 = hipe_rtl:mk_new_label(), + Lab2 = hipe_rtl:mk_new_label(), + Lab3 = hipe_rtl:mk_new_label(), + Tmp = hipe_rtl:mk_new_reg_gcsafe(), + BigMask = ?TAG_HEADER_MASK - ?BIG_SIGN_BIT, + HeaderFlonum = flonum_header(), + [test_fixnum(X, TrueLab, hipe_rtl:label_name(Lab1), 0.5), + Lab1, + test_is_boxed(X, hipe_rtl:label_name(Lab2), FalseLab, 0.5), + Lab2, + get_header(Tmp, X), + mask_and_compare(Tmp, BigMask, ?TAG_HEADER_POS_BIG, + TrueLab, hipe_rtl:label_name(Lab3), 0.5), + Lab3, + hipe_rtl:mk_branch(Tmp, 'eq', hipe_rtl:mk_imm(HeaderFlonum), + TrueLab, FalseLab, Pred)]. + +%% CONS, NIL, and TUPLE are not constants, everything else is +test_constant(X, TrueLab, FalseLab, Pred) -> + Lab1 = hipe_rtl:mk_new_label(), + Lab2 = hipe_rtl:mk_new_label(), + Pred1 = 1-Pred, + [test_cons(X, FalseLab, hipe_rtl:label_name(Lab1), Pred1), + Lab1, + test_nil(X, FalseLab, hipe_rtl:label_name(Lab2), Pred1), + Lab2, + test_tuple(X, FalseLab, TrueLab, Pred1)]. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +tag_fixnum(DestVar, SrcReg) -> + [hipe_rtl:mk_fixnumop(DestVar, SrcReg, tag)]. +%% [hipe_rtl:mk_alu(DestVar, SrcReg, sll, hipe_rtl:mk_imm(?TAG_IMMED1_SIZE)), +%% hipe_rtl:mk_alu(DestVar, DestVar, add, hipe_rtl:mk_imm(?TAG_IMMED1_SMALL))]. + +realtag_fixnum(DestVar, SrcReg) -> + [hipe_rtl:mk_alu(DestVar, SrcReg, sll, hipe_rtl:mk_imm(?TAG_IMMED1_SIZE)), + hipe_rtl:mk_alu(DestVar, DestVar, add, hipe_rtl:mk_imm(?TAG_IMMED1_SMALL))]. + +untag_fixnum(DestReg, SrcVar) -> + hipe_rtl:mk_fixnumop(DestReg, SrcVar, untag). +%% hipe_rtl:mk_alu(DestReg, SrcVar, 'sra', hipe_rtl:mk_imm(?TAG_IMMED1_SIZE)). + +realuntag_fixnum(DestReg, SrcVar) -> + hipe_rtl:mk_alu(DestReg, SrcVar, 'sra', hipe_rtl:mk_imm(?TAG_IMMED1_SIZE)). + +fixnum_val(Fixnum) -> + Fixnum bsr ?TAG_IMMED1_SIZE. + +test_fixnums(Args, TrueLab, FalseLab, Pred) -> + {Reg, Ands} = test_fixnums_1(Args, []), + Ands ++ [test_fixnum(Reg, TrueLab, FalseLab, Pred)]. + +test_fixnums_1([Arg1, Arg2], Acc) -> + Tmp = hipe_rtl:mk_new_reg_gcsafe(), + {Tmp, lists:reverse([hipe_rtl:mk_alu(Tmp, Arg1, 'and', Arg2)|Acc])}; +test_fixnums_1([Arg1, Arg2|Args], Acc) -> + Tmp = hipe_rtl:mk_new_reg_gcsafe(), + test_fixnums_1([Tmp|Args], [hipe_rtl:mk_alu(Tmp, Arg1, 'and', Arg2)|Acc]). + +test_two_fixnums(Arg1, Arg2, FalseLab) -> + TrueLab = hipe_rtl:mk_new_label(), + case hipe_rtl:is_imm(Arg2) of + true -> + Value = hipe_rtl:imm_value(Arg2), + case Value band ?TAG_IMMED1_MASK of + ?TAG_IMMED1_SMALL -> + [test_fixnum(Arg1, hipe_rtl:label_name(TrueLab), FalseLab, 0.99), + TrueLab]; + _ -> + [hipe_rtl:mk_goto(FalseLab)] + end; + false -> + Tmp = hipe_rtl:mk_new_reg_gcsafe(), + [hipe_rtl:mk_alu(Tmp, Arg1, 'and', Arg2), + test_fixnum(Tmp, hipe_rtl:label_name(TrueLab), FalseLab, 0.99), + TrueLab] + end. + +fixnum_cmp(Arg1, Arg2, TrueLab, FalseLab, Pred, CmpOp) -> + hipe_rtl:mk_branch(Arg1, CmpOp, Arg2, TrueLab, FalseLab, Pred). + +fixnum_gt(Arg1, Arg2, TrueLab, FalseLab, Pred) -> + fixnum_cmp(Arg1, Arg2, TrueLab, FalseLab, Pred, gt). + +fixnum_lt(Arg1, Arg2, TrueLab, FalseLab, Pred) -> + fixnum_cmp(Arg1, Arg2, TrueLab, FalseLab, Pred, lt). + +fixnum_ge(Arg1, Arg2, TrueLab, FalseLab, Pred) -> + fixnum_cmp(Arg1, Arg2, TrueLab, FalseLab, Pred, ge). + +fixnum_le(Arg1, Arg2, TrueLab, FalseLab, Pred) -> + fixnum_cmp(Arg1, Arg2, TrueLab, FalseLab, Pred, le). + +%% We know the answer will be a fixnum +unsafe_fixnum_add(Arg1, Arg2, Res) -> + Tmp = hipe_rtl:mk_new_reg_gcsafe(), + [hipe_rtl:mk_alu(Tmp, Arg2, sub, hipe_rtl:mk_imm(?TAG_IMMED1_SMALL)), + hipe_rtl:mk_alu(Res, Arg1, add, Tmp)]. + +%% We know the answer will be a fixnum +unsafe_fixnum_sub(Arg1, Arg2, Res) -> + Tmp = hipe_rtl:mk_new_reg_gcsafe(), + [hipe_rtl:mk_alu(Tmp, Arg2, sub, hipe_rtl:mk_imm(?TAG_IMMED1_SMALL)), + hipe_rtl:mk_alu(Res, Arg1, sub, Tmp)]. + +%%% (16X+tag)+((16Y+tag)-tag) = 16X+tag+16Y = 16(X+Y)+tag +%%% (16X+tag)-((16Y+tag)-tag) = 16X+tag-16Y = 16(X-Y)+tag +fixnum_addsub(AluOp, Arg1, Arg2, Res, OtherLab) -> + Tmp = hipe_rtl:mk_new_reg_gcsafe(), + %% XXX: Consider moving this test to the users of fixnum_addsub. + case Arg1 =/= Res andalso Arg2 =/= Res of + true -> + %% Args differ from res. + NoOverflowLab = hipe_rtl:mk_new_label(), + [hipe_rtl:mk_alu(Tmp, Arg2, sub, hipe_rtl:mk_imm(?TAG_IMMED1_SMALL)), + hipe_rtl:mk_alub(Res, Arg1, AluOp, Tmp, not_overflow, + hipe_rtl:label_name(NoOverflowLab), + hipe_rtl:label_name(OtherLab), 0.99), + NoOverflowLab]; + false -> + %% At least one of the arguments is the same as Res. + Tmp2 = hipe_rtl:mk_new_var(), % XXX: shouldn't this var be a reg? + NoOverflowLab = hipe_rtl:mk_new_label(), + [hipe_rtl:mk_alu(Tmp, Arg2, sub, hipe_rtl:mk_imm(?TAG_IMMED1_SMALL)), + hipe_rtl:mk_alub(Tmp2, Arg1, AluOp, Tmp, not_overflow, + hipe_rtl:label_name(NoOverflowLab), + hipe_rtl:label_name(OtherLab), 0.99), + NoOverflowLab, + hipe_rtl:mk_move(Res, Tmp2)] + end. + +%%% ((16X+tag) div 16) * ((16Y+tag)-tag) + tag = X*16Y+tag = 16(XY)+tag +fixnum_mul(Arg1, Arg2, Res, OtherLab) -> + Tmp = hipe_rtl:mk_new_reg_gcsafe(), + U1 = hipe_rtl:mk_new_reg_gcsafe(), + U2 = hipe_rtl:mk_new_reg_gcsafe(), + NoOverflowLab = hipe_rtl:mk_new_label(), + [untag_fixnum(U1, Arg1), + hipe_rtl:mk_alu(U2, Arg2, 'sub', hipe_rtl:mk_imm(?TAG_IMMED1_SMALL)), + hipe_rtl:mk_alub(Tmp, U1, 'mul', U2, overflow, hipe_rtl:label_name(OtherLab), + hipe_rtl:label_name(NoOverflowLab), 0.01), + NoOverflowLab, + hipe_rtl:mk_alu(Res, Tmp, 'add', hipe_rtl:mk_imm(?TAG_IMMED1_SMALL))]. + +fixnum_andorxor(AluOp, Arg1, Arg2, Res) -> + case AluOp of + 'xor' -> + Tmp = hipe_rtl:mk_new_reg_gcsafe(), + [hipe_rtl:mk_alu(Tmp, Arg1, 'xor', Arg2), % clears tag :-( + hipe_rtl:mk_alu(Res, Tmp, 'or', hipe_rtl:mk_imm(?TAG_IMMED1_SMALL))]; + _ -> hipe_rtl:mk_alu(Res, Arg1, AluOp, Arg2) + end. + +fixnum_not(Arg, Res) -> + Mask = (-1 bsl ?TAG_IMMED1_SIZE), + hipe_rtl:mk_alu(Res, Arg, 'xor', hipe_rtl:mk_imm(Mask)). + +fixnum_bsr(Arg1, Arg2, Res) -> + Tmp1 = hipe_rtl:mk_new_reg_gcsafe(), + Tmp2 = hipe_rtl:mk_new_reg_gcsafe(), + [untag_fixnum(Tmp1, Arg2), + hipe_rtl:mk_alu(Tmp2, Arg1, 'sra', Tmp1), + hipe_rtl:mk_alu(Res, Tmp2, 'or', hipe_rtl:mk_imm(?TAG_IMMED1_SMALL))]. + +%% If someone knows how to make this better, please do. +fixnum_bsl(Arg1, Arg2, Res) -> + Tmp1 = hipe_rtl:mk_new_reg_gcsafe(), + Tmp2 = hipe_rtl:mk_new_reg_gcsafe(), + Tmp3 = hipe_rtl:mk_new_reg_gcsafe(), + [untag_fixnum(Tmp2, Arg2), + hipe_rtl:mk_alu(Tmp1, Arg1, 'sub', hipe_rtl:mk_imm(?TAG_IMMED1_SMALL)), + hipe_rtl:mk_alu(Tmp3, Tmp1, 'sll', Tmp2), + hipe_rtl:mk_alu(Res, Tmp3, 'or', hipe_rtl:mk_imm(?TAG_IMMED1_SMALL))]. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +unsafe_car(Dst, Arg) -> + hipe_rtl:mk_load(Dst, Arg, hipe_rtl:mk_imm(-(?TAG_PRIMARY_LIST))). + +unsafe_cdr(Dst, Arg) -> + WordSize = hipe_rtl_arch:word_size(), + hipe_rtl:mk_load(Dst, Arg, hipe_rtl:mk_imm(-(?TAG_PRIMARY_LIST)+WordSize)). + +unsafe_constant_element(Dst, Index, Tuple) -> % Index is an immediate + WordSize = hipe_rtl_arch:word_size(), + Offset = -(?TAG_PRIMARY_BOXED) + WordSize * hipe_rtl:imm_value(Index), + hipe_rtl:mk_load(Dst, Tuple, hipe_rtl:mk_imm(Offset)). + +unsafe_update_element(Tuple, Index, Value) -> % Index is an immediate + WordSize = hipe_rtl_arch:word_size(), + Offset = -(?TAG_PRIMARY_BOXED) + WordSize * hipe_rtl:imm_value(Index), + hipe_rtl:mk_store(Tuple, hipe_rtl:mk_imm(Offset), Value). + +%%% wrong semantics +%% unsafe_variable_element(Dst, Index, Tuple) -> % Index is an unknown fixnum +%% %% Load word at (Tuple - 2) + ((Index >> 4) << 2). +%% %% Offset = ((Index >> 4) << 2) - 2. +%% %% Index = x..x1111 (fixnum tag is 2#1111). +%% %% (Index >> 2) = 00x..x11 and ((Index >> 4) << 2) = 00x..x00. +%% %% Therefore, ((Index >> 4) << 2) = (Index >> 2) - 3. +%% %% So Offset = ((Index >> 4) << 2) - 2 = (Index >> 2) - (3 + 2). +%% Tmp1 = hipe_rtl:mk_new_reg_gcsafe(), +%% Tmp2 = hipe_rtl:mk_new_reg_gcsafe(), +%% Shift = ?TAG_IMMED1_SIZE - 2, +%% OffAdj = (?TAG_IMMED1_SMALL bsr Shift) + ?TAG_PRIMARY_BOXED, +%% [hipe_rtl:mk_alu(Tmp1, Index, 'srl', hipe_rtl:mk_imm(Shift)), +%% hipe_rtl:mk_alu(Tmp2, Tmp1, 'sub', hipe_rtl:mk_imm(OffAdj)), +%% hipe_rtl:mk_load(Dst, Tuple, Tmp2)]. + +element(Dst, Index, Tuple, FailLabName, {tuple, A}, IndexInfo) -> + FixnumOkLab = hipe_rtl:mk_new_label(), + IndexOkLab = hipe_rtl:mk_new_label(), + Ptr = hipe_rtl:mk_new_reg(), % offset from Tuple + UIndex = hipe_rtl:mk_new_reg_gcsafe(), + Arity = hipe_rtl:mk_imm(A), + InvIndex = hipe_rtl:mk_new_reg_gcsafe(), + Offset = hipe_rtl:mk_new_reg_gcsafe(), + case IndexInfo of + valid -> + %% This is no branch, 1 load and 3 alus = 4 instr + [untag_fixnum(UIndex, Index), + hipe_rtl:mk_alu(Ptr, Tuple, 'sub', hipe_rtl:mk_imm(?TAG_PRIMARY_BOXED)), + hipe_rtl:mk_alu(Offset, UIndex, 'sll', + hipe_rtl:mk_imm(hipe_rtl_arch:log2_word_size())), + hipe_rtl:mk_load(Dst, Ptr, Offset)]; + fixnums -> + %% This is 1 branch, 1 load and 4 alus = 6 instr + [untag_fixnum(UIndex, Index), + hipe_rtl:mk_alu(Ptr, Tuple, 'sub',hipe_rtl:mk_imm(?TAG_PRIMARY_BOXED))| + gen_element_tail(Dst, Ptr, InvIndex, Arity, Offset, UIndex, + FailLabName, IndexOkLab)]; + _ -> + %% This is 3 branches, 1 load and 5 alus = 9 instr + [test_fixnum(Index, hipe_rtl:label_name(FixnumOkLab), + FailLabName, 0.99), + FixnumOkLab, + untag_fixnum(UIndex, Index), + hipe_rtl:mk_alu(Ptr, Tuple, 'sub',hipe_rtl:mk_imm(?TAG_PRIMARY_BOXED))| + gen_element_tail(Dst, Ptr, InvIndex, Arity, Offset, UIndex, + FailLabName, IndexOkLab)] + end; +element(Dst, Index, Tuple, FailLabName, tuple, IndexInfo) -> + FixnumOkLab = hipe_rtl:mk_new_label(), + IndexOkLab = hipe_rtl:mk_new_label(), + Ptr = hipe_rtl:mk_new_reg(), % offset from Tuple + Header = hipe_rtl:mk_new_reg_gcsafe(), + UIndex = hipe_rtl:mk_new_reg_gcsafe(), + Arity = hipe_rtl:mk_new_reg_gcsafe(), + InvIndex = hipe_rtl:mk_new_reg_gcsafe(), + Offset = hipe_rtl:mk_new_reg_gcsafe(), + case IndexInfo of + fixnums -> + %% This is 1 branch, 2 loads and 5 alus = 8 instr + [hipe_rtl:mk_alu(Ptr, Tuple, 'sub', hipe_rtl:mk_imm(?TAG_PRIMARY_BOXED)), + hipe_rtl:mk_load(Header, Ptr, hipe_rtl:mk_imm(0)), + untag_fixnum(UIndex, Index), + hipe_rtl:mk_alu(Arity,Header,'srl',hipe_rtl:mk_imm(?HEADER_ARITY_OFFS))| + gen_element_tail(Dst, Ptr, InvIndex, Arity, Offset, UIndex, + FailLabName, IndexOkLab)]; + Num when is_integer(Num) -> + %% This is 1 branch, 1 load and 3 alus = 5 instr + [hipe_rtl:mk_alu(Ptr, Tuple, 'sub', hipe_rtl:mk_imm(?TAG_PRIMARY_BOXED))| + gen_element_tail(Dst, Ptr, InvIndex, hipe_rtl:mk_imm(Num), + Offset, UIndex, FailLabName, IndexOkLab)]; + _ -> + %% This is 2 branches, 2 loads and 6 alus = 10 instr + [test_fixnum(Index, hipe_rtl:label_name(FixnumOkLab), FailLabName, 0.99), + FixnumOkLab, + hipe_rtl:mk_alu(Ptr, Tuple, 'sub', hipe_rtl:mk_imm(?TAG_PRIMARY_BOXED)), + hipe_rtl:mk_load(Header, Ptr, hipe_rtl:mk_imm(0)), + untag_fixnum(UIndex, Index), + hipe_rtl:mk_alu(Arity,Header,'srl',hipe_rtl:mk_imm(?HEADER_ARITY_OFFS))| + gen_element_tail(Dst, Ptr, InvIndex, Arity, Offset, UIndex, + FailLabName, IndexOkLab)] + end; +element(Dst, Index, Tuple, FailLabName, unknown, IndexInfo) -> + FixnumOkLab = hipe_rtl:mk_new_label(), + BoxedOkLab = hipe_rtl:mk_new_label(), + TupleOkLab = hipe_rtl:mk_new_label(), + IndexOkLab = hipe_rtl:mk_new_label(), + Ptr = hipe_rtl:mk_new_reg(), % offset from Tuple + Header = hipe_rtl:mk_new_reg_gcsafe(), + Tmp = hipe_rtl:mk_new_reg_gcsafe(), + UIndex = hipe_rtl:mk_new_reg_gcsafe(), + Arity = hipe_rtl:mk_new_reg_gcsafe(), + InvIndex = hipe_rtl:mk_new_reg_gcsafe(), + Offset = hipe_rtl:mk_new_reg_gcsafe(), + case IndexInfo of + fixnums -> + %% This is 3 branches, 2 loads and 5 alus = 10 instr + [test_is_boxed(Tuple, hipe_rtl:label_name(BoxedOkLab), + FailLabName, 0.99), + BoxedOkLab, + hipe_rtl:mk_alu(Ptr, Tuple, 'sub', hipe_rtl:mk_imm(?TAG_PRIMARY_BOXED)), + hipe_rtl:mk_load(Header, Ptr, hipe_rtl:mk_imm(0)), + hipe_rtl:mk_alub(Tmp, Header, 'and', + hipe_rtl:mk_imm(?TAG_HEADER_MASK), 'eq', + hipe_rtl:label_name(TupleOkLab), FailLabName, 0.99), + TupleOkLab, + untag_fixnum(UIndex, Index), + hipe_rtl:mk_alu(Arity, Header, 'srl', + hipe_rtl:mk_imm(?HEADER_ARITY_OFFS))| + gen_element_tail(Dst, Ptr, InvIndex, Arity, Offset, + UIndex, FailLabName, IndexOkLab)]; + Num when is_integer(Num) -> + %% This is 3 branches, 2 loads and 4 alus = 9 instr + [test_is_boxed(Tuple, hipe_rtl:label_name(BoxedOkLab), + FailLabName, 0.99), + BoxedOkLab, + hipe_rtl:mk_alu(Ptr, Tuple, 'sub', hipe_rtl:mk_imm(?TAG_PRIMARY_BOXED)), + hipe_rtl:mk_load(Header, Ptr, hipe_rtl:mk_imm(0)), + hipe_rtl:mk_alub(Tmp, Header, 'and', + hipe_rtl:mk_imm(?TAG_HEADER_MASK), 'eq', + hipe_rtl:label_name(TupleOkLab), FailLabName, 0.99), + TupleOkLab, + hipe_rtl:mk_alu(Arity, Header, 'srl', + hipe_rtl:mk_imm(?HEADER_ARITY_OFFS))| + gen_element_tail(Dst, Ptr, InvIndex, Arity, Offset, + hipe_rtl:mk_imm(Num), FailLabName, IndexOkLab)]; + _ -> + %% This is 4 branches, 2 loads, and 6 alus = 12 instr :( + [test_fixnum(Index, hipe_rtl:label_name(FixnumOkLab), + FailLabName, 0.99), + FixnumOkLab, + test_is_boxed(Tuple, hipe_rtl:label_name(BoxedOkLab), + FailLabName, 0.99), + BoxedOkLab, + hipe_rtl:mk_alu(Ptr, Tuple, 'sub', hipe_rtl:mk_imm(?TAG_PRIMARY_BOXED)), + hipe_rtl:mk_load(Header, Ptr, hipe_rtl:mk_imm(0)), + hipe_rtl:mk_alub(Tmp, Header, 'and', + hipe_rtl:mk_imm(?TAG_HEADER_MASK), 'eq', + hipe_rtl:label_name(TupleOkLab), FailLabName, 0.99), + TupleOkLab, + untag_fixnum(UIndex, Index), + hipe_rtl:mk_alu(Arity, Header, 'srl', + hipe_rtl:mk_imm(?HEADER_ARITY_OFFS))| + gen_element_tail(Dst, Ptr, InvIndex, Arity, Offset, + UIndex, FailLabName, IndexOkLab)] + end. + +gen_element_tail(Dst, Ptr, InvIndex, Arity, Offset, + UIndex, FailLabName, IndexOkLab) -> + %% now check that 1 <= UIndex <= Arity + %% if UIndex < 1, then (Arity - UIndex) >= Arity + %% if UIndex > Arity, then (Arity - UIndex) < 0, which is >=u Arity + %% otherwise, 0 <= (Arity - UIndex) < Arity + [hipe_rtl:mk_alu(InvIndex, Arity, 'sub', UIndex), + hipe_rtl:mk_branch(InvIndex, 'geu', Arity, FailLabName, + hipe_rtl:label_name(IndexOkLab), 0.01), + IndexOkLab, + hipe_rtl:mk_alu(Offset, UIndex, 'sll', + hipe_rtl:mk_imm(hipe_rtl_arch:log2_word_size())), + hipe_rtl:mk_load(Dst, Ptr, Offset)]. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +unsafe_closure_element(Dst, Index, Closure) -> % Index is an immediate + Offset = -(?TAG_PRIMARY_BOXED) %% Untag + + ?EFT_ENV %% Field offset + %% Index from 1 to N hence -1) + + (hipe_rtl_arch:word_size() * (hipe_rtl:imm_value(Index)-1)), + hipe_rtl:mk_load(Dst, Closure, hipe_rtl:mk_imm(Offset)). + +mk_fun_header() -> + hipe_rtl:mk_imm(?HEADER_FUN). + +tag_fun(Res, X) -> + tag_boxed(Res, X). + +%% untag_fun(Res, X) -> +%% hipe_rtl:mk_alu(Res, X, 'sub', hipe_rtl:mk_imm(?TAG_PRIMARY_BOXED)). + +-ifdef(EFT_NATIVE_ADDRESS). +if_fun_get_arity_and_address(ArityReg, AddressReg, FunP, BadFunLab, Pred) -> + %% EmuAddressPtrReg = hipe_rtl:mk_new_reg(), + %% FEPtrReg = hipe_rtl:mk_new_reg(), + %% ArityReg = hipe_rtl:mk_new_reg(), + %% NumFreeReg = hipe_rtl:mk_new_reg(), + %% RealArityReg = hipe_rtl:mk_new_reg(), + TrueLab0 = hipe_rtl:mk_new_label(), + %% TrueLab1 = hipe_rtl:mk_new_label(), + IsFunCode = test_closure(FunP, hipe_rtl:label_name(TrueLab0), BadFunLab, Pred), + GetArityCode = + [TrueLab0, + %% Funp->arity contains the arity + hipe_rtl:mk_load(ArityReg, FunP, + hipe_rtl:mk_imm(-(?TAG_PRIMARY_BOXED)+ + ?EFT_ARITY)), + hipe_rtl:mk_load(AddressReg, FunP, + hipe_rtl:mk_imm(-(?TAG_PRIMARY_BOXED)+ + ?EFT_NATIVE_ADDRESS))], + IsFunCode ++ GetArityCode. +-endif. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% Binary Code +%% + +create_heap_binary(Base, Size, Dst) when is_integer(Size) -> + {GetHPInsn, HP, PutHPInsn} = hipe_rtl_arch:heap_pointer(), + WordSize = hipe_rtl_arch:word_size(), + NoWords=(Size + 3*WordSize-1) div WordSize, + NoBytes = NoWords*WordSize, + HeapBinHeader = hipe_rtl:mk_imm(mk_header(NoWords-1, + ?TAG_HEADER_HEAP_BIN)), + [GetHPInsn, + tag_boxed(Dst, HP), + set_field_from_pointer({heap_bin, thing_word}, HP, HeapBinHeader), + set_field_from_pointer({heap_bin, binsize}, HP, hipe_rtl:mk_imm(Size)), + hipe_rtl:mk_alu(Base, HP, add, hipe_rtl:mk_imm(?HEAP_BIN_DATA)), + hipe_rtl:mk_alu(HP, HP, add, hipe_rtl:mk_imm(NoBytes)), + PutHPInsn]; + +create_heap_binary(Base, Size, Dst) -> + {GetHPInsn, HP, PutHPInsn} = hipe_rtl_arch:heap_pointer(), + WordSize = hipe_rtl_arch:word_size(), + Log2WordSize = hipe_rtl_arch:log2_word_size(), + EvenWordSize = hipe_rtl:mk_new_reg_gcsafe(), + Tmp1 = hipe_rtl:mk_new_reg_gcsafe(), + Tmp2 = hipe_rtl:mk_new_reg_gcsafe(), + Header = hipe_rtl:mk_new_reg_gcsafe(), + Tmp3 = hipe_rtl:mk_new_reg(), % offset from HP + Tmp4 = hipe_rtl:mk_new_reg(), % offset from HP + [GetHPInsn, + hipe_rtl:mk_alu(Tmp1, Size, add, hipe_rtl:mk_imm(WordSize-1)), + hipe_rtl:mk_alu(EvenWordSize, Tmp1, sra, hipe_rtl:mk_imm(Log2WordSize)), + hipe_rtl:mk_alu(Tmp2, EvenWordSize, add, hipe_rtl:mk_imm(1)), + hipe_rtl:mk_alu(Base, HP, add, hipe_rtl:mk_imm(?HEAP_BIN_DATA)), + mk_var_header(Header, Tmp2, ?TAG_HEADER_HEAP_BIN), + set_field_from_pointer({heap_bin, thing_word}, HP, Header), + set_field_from_pointer({heap_bin, binsize}, HP, Size), + tag_boxed(Dst, HP), + hipe_rtl:mk_alu(Tmp3, HP, add, Size), + hipe_rtl:mk_alu(Tmp4, Tmp3, add, hipe_rtl:mk_imm(3*WordSize-1)), + hipe_rtl:mk_alu(HP, Tmp4, 'and', hipe_rtl:mk_imm(-WordSize)), + PutHPInsn]. + +create_refc_binary(Base, Size, Dst) -> + create_refc_binary(Base, Size, hipe_rtl:mk_imm(0), Dst). + +create_refc_binary(Base, Size, Flags, Dst) -> + {GetHPInsn, HP, PutHPInsn} = hipe_rtl_arch:heap_pointer(), + ProcBinHeader = hipe_rtl:mk_imm(?HEADER_PROC_BIN), + WordSize = hipe_rtl_arch:word_size(), + Val = hipe_rtl:mk_new_reg(), % offset from Base + [GetHPInsn, + tag_boxed(Dst, HP), + set_field_from_pointer({proc_bin, thing_word}, HP, ProcBinHeader), + set_field_from_pointer({proc_bin, binsize}, HP, Size), + heap_arch_spec(HP), + hipe_rtl:mk_alu(Val, Base, sub, hipe_rtl:mk_imm(?BINARY_ORIG_BYTES)), + set_field_from_pointer({proc_bin, val}, HP, Val), + set_field_from_pointer({proc_bin, bytes}, HP, Base), + set_field_from_pointer({proc_bin, flags}, HP, Flags), + hipe_rtl:mk_alu(HP, HP, add, hipe_rtl:mk_imm(?PROC_BIN_WORDSIZE*WordSize)), + PutHPInsn]. + +heap_arch_spec(HP) -> + Tmp1 = hipe_rtl:mk_new_reg(), % MSO state + [hipe_rtl_arch:pcb_load(Tmp1, ?P_OFF_HEAP_MSO), + set_field_from_pointer({proc_bin, next}, HP, Tmp1), + hipe_rtl_arch:pcb_store(?P_OFF_HEAP_MSO, HP)]. + +test_heap_binary(Binary, TrueLblName, FalseLblName) -> + Tmp1 = hipe_rtl:mk_new_reg_gcsafe(), + Tmp2 = hipe_rtl:mk_new_reg_gcsafe(), + [get_header(Tmp1, Binary), + hipe_rtl:mk_alu(Tmp2, Tmp1, 'and', hipe_rtl:mk_imm(?TAG_HEADER_MASK)), + hipe_rtl:mk_branch(Tmp2, eq, hipe_rtl:mk_imm(?TAG_HEADER_HEAP_BIN), + TrueLblName, FalseLblName)]. + +mk_sub_binary(Dst, ByteSize, ByteOffs, BitSize, BitOffs, Orig) -> + mk_sub_binary(Dst, ByteSize, ByteOffs, BitSize, BitOffs, + hipe_rtl:mk_imm(0), Orig). + +mk_sub_binary(Dst, ByteSize, ByteOffs, BitSize, BitOffs, + Writable, Orig) -> + {GetHPInsn, HP, PutHPInsn} = hipe_rtl_arch:heap_pointer(), + WordSize = hipe_rtl_arch:word_size(), + [GetHPInsn, + tag_boxed(Dst, HP), + build_sub_binary(Dst, ByteSize, ByteOffs, BitSize, BitOffs, Writable, Orig), + hipe_rtl:mk_alu(HP, HP, add, hipe_rtl:mk_imm(?SUB_BIN_WORDSIZE*WordSize)), + PutHPInsn]. + +build_sub_binary(Dst, ByteSize, ByteOffs, BitSize, BitOffs, + Writable, Orig) -> + Head = hipe_rtl:mk_imm(?HEADER_SUB_BIN), + [set_field_from_term({sub_binary, thing_word}, Dst, Head), + set_field_from_term({sub_binary, binsize}, Dst, ByteSize), + set_field_from_term({sub_binary, offset}, Dst, ByteOffs), + set_field_from_term({sub_binary, bitsize}, Dst, BitSize), + set_field_from_term({sub_binary, bitoffset}, Dst, BitOffs), + set_field_from_term({sub_binary, is_writable}, Dst, Writable), + set_field_from_term({sub_binary, orig}, Dst, Orig)]. + +test_subbinary(Binary, TrueLblName, FalseLblName) -> + Tmp1 = hipe_rtl:mk_new_reg_gcsafe(), + Tmp2 = hipe_rtl:mk_new_reg_gcsafe(), + [get_header(Tmp1, Binary), + hipe_rtl:mk_alu(Tmp2, Tmp1, 'and', hipe_rtl:mk_imm(?TAG_HEADER_MASK)), + hipe_rtl:mk_branch(Tmp2, eq, hipe_rtl:mk_imm(?TAG_HEADER_SUB_BIN), TrueLblName, FalseLblName)]. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% Float Code + +unsafe_load_float(DstLo, DstHi, Src) -> + WordSize = hipe_rtl_arch:word_size(), + Offset1 = -(?TAG_PRIMARY_BOXED) + WordSize, + Offset2 = Offset1 + 4, %% This should really be 4 and not WordSize + case hipe_rtl_arch:endianess() of + little -> + [hipe_rtl:mk_load(DstLo, Src, hipe_rtl:mk_imm(Offset1), int32, unsigned), + hipe_rtl:mk_load(DstHi, Src, hipe_rtl:mk_imm(Offset2), int32, unsigned)]; + big -> + [hipe_rtl:mk_load(DstHi, Src, hipe_rtl:mk_imm(Offset1), int32, unsigned), + hipe_rtl:mk_load(DstLo, Src, hipe_rtl:mk_imm(Offset2), int32, unsigned)] + end. + +unsafe_untag_float(Dst, Src) -> + Offset = -(?TAG_PRIMARY_BOXED) + hipe_rtl_arch:word_size(), + [hipe_rtl:mk_fload(Dst, Src, hipe_rtl:mk_imm(Offset))]. + +unsafe_tag_float(Dst, Src) -> + {GetHPInsn, HP, PutHPInsn} = hipe_rtl_arch:heap_pointer(), + Head = hipe_rtl:mk_imm(flonum_header()), + WordSize = hipe_rtl_arch:word_size(), + [GetHPInsn, + hipe_rtl:mk_store(HP, hipe_rtl:mk_imm(0), Head), + hipe_rtl:mk_fstore(HP, hipe_rtl:mk_imm(WordSize), Src), + tag_flonum(Dst, HP), + hipe_rtl:mk_alu(HP, HP, add, hipe_rtl:mk_imm(WordSize+8)), + PutHPInsn]. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% BigNum Code + +unsafe_mk_big(Dst, Src, Signedness) -> + WordSize = hipe_rtl_arch:word_size(), + {GetHPInsn, HP, PutHPInsn} = hipe_rtl_arch:heap_pointer(), + PosHead = hipe_rtl:mk_imm(mk_header(1, ?TAG_HEADER_POS_BIG)), + NegHead = hipe_rtl:mk_imm(mk_header(1, ?TAG_HEADER_NEG_BIG)), + PosLabel = hipe_rtl:mk_new_label(), + NegLabel = hipe_rtl:mk_new_label(), + JoinLabel = hipe_rtl:mk_new_label(), + PutHeaderCode = + case Signedness of + unsigned -> + [hipe_rtl:mk_store(HP, hipe_rtl:mk_imm(0*WordSize), PosHead)]; + signed -> + [hipe_rtl:mk_branch(Src, ge, hipe_rtl:mk_imm(0), + hipe_rtl:label_name(PosLabel), + hipe_rtl:label_name(NegLabel)), + PosLabel, + hipe_rtl:mk_store(HP, hipe_rtl:mk_imm(0*WordSize), PosHead), + hipe_rtl:mk_goto(hipe_rtl:label_name(JoinLabel)), + NegLabel, + hipe_rtl:mk_store(HP, hipe_rtl:mk_imm(0*WordSize), NegHead), + JoinLabel] + end, + RestCode = + [hipe_rtl:mk_store(HP, hipe_rtl:mk_imm(1*WordSize), Src), + tag_boxed(Dst, HP), + hipe_rtl:mk_alu(HP, HP, add, hipe_rtl:mk_imm(2*WordSize)), + PutHPInsn], + [GetHPInsn] ++ PutHeaderCode ++ RestCode. + +get_one_word_pos_bignum(USize, Size, Fail) -> + Header = hipe_rtl:mk_new_reg(), + HalfLbl = hipe_rtl:mk_new_label(), + HalfLblName = hipe_rtl:label_name(HalfLbl), + WordSize = hipe_rtl_arch:word_size(), + PosHead = hipe_rtl:mk_imm(mk_header(1, ?TAG_HEADER_POS_BIG)), + [get_header(Header, Size), + hipe_rtl:mk_branch(Header, eq, PosHead, HalfLblName, Fail), + HalfLbl, + hipe_rtl:mk_load(USize, Size, hipe_rtl:mk_imm(1*WordSize + -?TAG_PRIMARY_BOXED))]. + +-spec bignum_sizeneed(non_neg_integer()) -> non_neg_integer(). + +bignum_sizeneed(Size) -> + WordSizeBits = hipe_rtl_arch:word_size() * 8, + case is_fixnum(1 bsl Size) of + true -> + 0; + false -> + ((Size + (WordSizeBits-1)) div WordSizeBits) + 1 + end. + +bignum_sizeneed_code(SizeReg,FixNumLblName) -> + WordSizeBits = hipe_rtl_arch:word_size() * 8, + WordShifts = hipe_rtl_arch:log2_word_size() + 3, + MaxFixNum = WordSizeBits - ?TAG_IMMED1_SIZE - 1, + ResReg = hipe_rtl:mk_new_reg_gcsafe(), + Tmp1 = hipe_rtl:mk_new_reg_gcsafe(), + BigLbl = hipe_rtl:mk_new_label(), + Code = + [hipe_rtl:mk_branch(SizeReg, le, hipe_rtl:mk_imm(MaxFixNum), + FixNumLblName, hipe_rtl:label_name(BigLbl)), + BigLbl, + hipe_rtl:mk_alu(Tmp1,SizeReg,add,hipe_rtl:mk_imm(WordSizeBits-1)), + hipe_rtl:mk_alu(ResReg,Tmp1,srl,hipe_rtl:mk_imm(WordShifts)), + hipe_rtl:mk_alu(ResReg,ResReg,add,hipe_rtl:mk_imm(1))], + {ResReg,Code}. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% MatchState Code + +create_matchstate(Max, BinSize, Base, Offset, Orig, Ms) -> + WordSize = hipe_rtl_arch:word_size(), + {GetHPInsn, HP, PutHPInsn} = hipe_rtl_arch:heap_pointer(), + ByteSize = (Max+1)*WordSize + ?MS_SAVEOFFSET, + SizeInWords = ((ByteSize div WordSize) - 1), + Header = hipe_rtl:mk_imm(mk_header(SizeInWords, ?TAG_HEADER_BIN_MATCHSTATE)), + [GetHPInsn, + hipe_rtl:mk_alu(Ms, HP, add, hipe_rtl:mk_imm(?TAG_PRIMARY_BOXED)), + set_field_from_term({matchstate,thing_word}, Ms, Header), + set_field_from_term({matchstate,{matchbuffer,orig}}, Ms, Orig), + set_field_from_term({matchstate,{matchbuffer,base}}, Ms, Base), + set_field_from_term({matchstate,{matchbuffer,binsize}}, Ms, BinSize), + set_field_from_term({matchstate,{matchbuffer,offset}}, Ms, Offset), + set_field_from_term({matchstate,{saveoffset, 0}}, Ms, Offset), + hipe_rtl:mk_alu(HP, HP, add, hipe_rtl:mk_imm(ByteSize)), + PutHPInsn]. + +convert_matchstate(Ms) -> + WordSize = hipe_rtl_arch:word_size(), + Header = hipe_rtl:mk_new_reg_gcsafe(), + TmpSize = hipe_rtl:mk_new_reg_gcsafe(), + SavedOffset = hipe_rtl:mk_new_reg_gcsafe(), + Orig = hipe_rtl:mk_new_reg_gcsafe(), + BinSize = hipe_rtl:mk_new_reg_gcsafe(), + ByteSize = hipe_rtl:mk_new_reg_gcsafe(), + BitSize = hipe_rtl:mk_new_reg_gcsafe(), + ByteOffset = hipe_rtl:mk_new_reg_gcsafe(), + BitOffset = hipe_rtl:mk_new_reg_gcsafe(), + SizeInWords = hipe_rtl:mk_new_reg_gcsafe(), + Hole = hipe_rtl:mk_new_reg_gcsafe(), + BigIntHeader = hipe_rtl:mk_new_reg_gcsafe(), + [get_field_from_term({matchstate, {matchbuffer, orig}}, Ms, Orig), + get_field_from_term({matchstate, {matchbuffer, binsize}}, Ms, BinSize), + get_field_from_term({matchstate, {saveoffset, 0}}, Ms, SavedOffset), + get_field_from_term({matchstate, thing_word}, Ms, Header), + hipe_rtl:mk_alu(TmpSize, BinSize, sub, SavedOffset), + hipe_rtl:mk_alu(BitSize, TmpSize, 'and', hipe_rtl:mk_imm(7)), + hipe_rtl:mk_alu(BitOffset, SavedOffset, 'and', hipe_rtl:mk_imm(7)), + hipe_rtl:mk_alu(ByteSize, TmpSize, srl, hipe_rtl:mk_imm(3)), + hipe_rtl:mk_alu(ByteOffset, SavedOffset, srl, hipe_rtl:mk_imm(3)), + build_sub_binary(Ms, ByteSize, ByteOffset, BitSize, BitOffset, + hipe_rtl:mk_imm(0), Orig), + size_from_header(SizeInWords, Header), + hipe_rtl:mk_alu(Hole, SizeInWords, sub, hipe_rtl:mk_imm(?SUB_BIN_WORDSIZE-1)), + mk_var_header(BigIntHeader, Hole, ?TAG_HEADER_POS_BIG), + hipe_rtl:mk_store(Ms, hipe_rtl:mk_imm(?SUB_BIN_WORDSIZE*WordSize-?TAG_PRIMARY_BOXED), + BigIntHeader)]. + +compare_matchstate(Max, Ms, LargeEnough, TooSmall) -> + WordSize = hipe_rtl_arch:word_size(), + ByteSize = (Max+1)*WordSize + ?MS_SAVEOFFSET, + SizeInWords = ((ByteSize div WordSize) - 1), + Header = hipe_rtl:mk_imm(mk_header(SizeInWords, ?TAG_HEADER_BIN_MATCHSTATE)), + RealHeader = hipe_rtl:mk_new_reg_gcsafe(), + [hipe_rtl:mk_load(RealHeader, Ms, hipe_rtl:mk_imm(-?TAG_PRIMARY_BOXED)), + hipe_rtl:mk_branch(RealHeader, ge, Header, LargeEnough, TooSmall)]. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% Struct manipulation code + +get_field_offset({matchstate, thing_word}) -> + ?MS_THING_WORD; +get_field_offset({matchstate, matchbuffer}) -> + ?MS_MATCHBUFFER; +get_field_offset({matchstate, {matchbuffer, _} = Field}) -> + ?MS_MATCHBUFFER + get_field_offset(Field); +get_field_offset({matchstate, {saveoffset, N}} = Field) -> + ?MS_SAVEOFFSET + N*get_field_size1(Field); +get_field_offset({sub_binary, thing_word}) -> + ?SUB_BIN_THING_WORD; +get_field_offset({sub_binary, binsize}) -> + ?SUB_BIN_BINSIZE; +get_field_offset({sub_binary, bitsize}) -> + ?SUB_BIN_BITSIZE; +get_field_offset({sub_binary, offset}) -> + ?SUB_BIN_OFFS; +get_field_offset({sub_binary, bitoffset}) -> + ?SUB_BIN_BITOFFS; +get_field_offset({sub_binary, is_writable}) -> + ?SUB_BIN_WRITABLE; +get_field_offset({sub_binary, orig}) -> + ?SUB_BIN_ORIG; +get_field_offset({proc_bin, thing_word}) -> + ?PROC_BIN_THING_WORD; +get_field_offset({proc_bin, binsize}) -> + ?PROC_BIN_BINSIZE; +get_field_offset({proc_bin, next}) -> + ?PROC_BIN_NEXT; +get_field_offset({proc_bin, val}) -> + ?PROC_BIN_VAL; +get_field_offset({proc_bin, bytes}) -> + ?PROC_BIN_BYTES; +get_field_offset({proc_bin, flags}) -> + ?PROC_BIN_FLAGS; +get_field_offset({binary, orig_bytes}) -> + ?BINARY_ORIG_BYTES; +get_field_offset({binary, orig_size}) -> + ?BINARY_ORIG_SIZE; +get_field_offset({heap_bin, thing_word}) -> + ?HEAP_BIN_THING_WORD; +get_field_offset({heap_bin, binsize}) -> + ?HEAP_BIN_SIZE; +get_field_offset({heap_bin, {data, N}} = Field) -> + ?HEAP_BIN_DATA+N*get_field_size1(Field); +get_field_offset({matchbuffer, offset}) -> + ?MB_OFFSET; +get_field_offset({matchbuffer, orig}) -> + ?MB_ORIG; +get_field_offset({matchbuffer, base}) -> + ?MB_BASE; +get_field_offset({matchbuffer, binsize}) -> + ?MB_SIZE. + +get_field_size(Field) -> + size_to_atom(get_field_size1(Field)). + +size_to_atom(Bytes) -> + WordSize = hipe_rtl_arch:word_size(), + case Bytes of + WordSize -> word; + 4 -> int32; + %%2 -> int16; So far there are no 2 byte fields + 1 -> byte + end. + +get_field_size1({matchstate, thing_word}) -> + ?MS_THING_WORD_SIZE; +get_field_size1({matchstate, {matchbuffer, _} = Field}) -> + get_field_size1(Field); +get_field_size1({matchstate, {saveoffset, _N}}) -> + ?MS_SAVEOFFSET_SIZE; +get_field_size1({sub_binary, thing_word}) -> + ?SUB_BIN_THING_WORD_SIZE; +get_field_size1({sub_binary, binsize}) -> + ?SUB_BIN_BINSIZE_SIZE; +get_field_size1({sub_binary, bitsize}) -> + ?SUB_BIN_BITSIZE_SIZE; +get_field_size1({sub_binary, offset}) -> + ?SUB_BIN_OFFS_SIZE; +get_field_size1({sub_binary, bitoffset}) -> + ?SUB_BIN_BITOFFS_SIZE; +get_field_size1({sub_binary, is_writable}) -> + ?SUB_BIN_WRITABLE_SIZE; +get_field_size1({sub_binary, orig}) -> + ?SUB_BIN_ORIG_SIZE; +get_field_size1({proc_bin, thing_word}) -> + ?PROC_BIN_THING_WORD_SIZE; +get_field_size1({proc_bin, binsize}) -> + ?PROC_BIN_BINSIZE_SIZE; +get_field_size1({proc_bin, next}) -> + ?PROC_BIN_NEXT_SIZE; +get_field_size1({proc_bin, val}) -> + ?PROC_BIN_VAL_SIZE; +get_field_size1({proc_bin, bytes}) -> + ?PROC_BIN_BYTES_SIZE; +get_field_size1({proc_bin, flags}) -> + ?PROC_BIN_FLAGS_SIZE; +get_field_size1({binary, orig_bytes}) -> + ?BINARY_ORIG_BYTES_SIZE; +get_field_size1({binary, orig_size}) -> + ?BINARY_ORIG_SIZE_SIZE; +get_field_size1({heap_bin, thing_word}) -> + ?HEAP_BIN_THING_WORD_SIZE; +get_field_size1({heap_bin, binsize}) -> + ?HEAP_BIN_SIZE_SIZE; +get_field_size1({heap_bin, {data, _}}) -> + ?HEAP_BIN_DATA_SIZE; +get_field_size1({matchbuffer, offset}) -> + ?MB_OFFSET_SIZE; +get_field_size1({matchbuffer, orig}) -> + ?MB_ORIG_SIZE; +get_field_size1({matchbuffer, base}) -> + ?MB_BASE_SIZE; +get_field_size1({matchbuffer, binsize}) -> + ?MB_SIZE_SIZE. + +get_field_from_term(Struct, Term, Dst) -> + Offset = hipe_rtl:mk_imm(get_field_offset(Struct) - ?TAG_PRIMARY_BOXED), + Size = get_field_size(Struct), + hipe_rtl:mk_load(Dst, Term, Offset, Size, unsigned). + +set_field_from_term(Struct, Term, Value) -> + Offset = hipe_rtl:mk_imm(get_field_offset(Struct) - ?TAG_PRIMARY_BOXED), + Size = get_field_size(Struct), + hipe_rtl:mk_store(Term, Offset, Value, Size). + +get_field_from_pointer(Struct, Term, Dst) -> + Offset = hipe_rtl:mk_imm(get_field_offset(Struct)), + Size = get_field_size(Struct), + hipe_rtl:mk_load(Dst, Term, Offset, Size, unsigned). + +set_field_from_pointer(Struct, Term, Value) -> + Offset = hipe_rtl:mk_imm(get_field_offset(Struct)), + Size = get_field_size(Struct), + hipe_rtl:mk_store(Term, Offset, Value, Size). + +extract_matchbuffer(Mb, Ms) -> + What = {matchstate, matchbuffer}, + Offset = hipe_rtl:mk_imm(get_field_offset(What) - ?TAG_PRIMARY_BOXED), + hipe_rtl:mk_alu(Mb, Ms, add, Offset). + +extract_binary_bytes(Binary, Base) -> + Offset = hipe_rtl:mk_imm(get_field_offset({binary, orig_bytes})), + hipe_rtl:mk_alu(Base, Binary, add, Offset). diff --git a/lib/hipe/sparc/Makefile b/lib/hipe/sparc/Makefile new file mode 100644 index 0000000000..efd4996046 --- /dev/null +++ b/lib/hipe/sparc/Makefile @@ -0,0 +1,120 @@ +# +# %CopyrightBegin% +# +# Copyright Ericsson AB 2001-2009. All Rights Reserved. +# +# The contents of this file are subject to the Erlang Public License, +# Version 1.1, (the "License"); you may not use this file except in +# compliance with the License. You should have received a copy of the +# Erlang Public License along with this software. If not, it can be +# retrieved online at http://www.erlang.org/. +# +# Software distributed under the License is distributed on an "AS IS" +# basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +# the License for the specific language governing rights and limitations +# under the License. +# +# %CopyrightEnd% +# + +ifndef EBIN +EBIN = ../ebin +endif + +ifndef DOCS +DOCS = ../doc +endif + +include $(ERL_TOP)/make/target.mk +include $(ERL_TOP)/make/$(TARGET)/otp.mk + +# ---------------------------------------------------- +# Application version +# ---------------------------------------------------- +include ../vsn.mk +VSN=$(HIPE_VSN) + +# ---------------------------------------------------- +# Release directory specification +# ---------------------------------------------------- +RELSYSDIR = $(RELEASE_PATH)/lib/hipe-$(VSN) + +# ---------------------------------------------------- +# Target Specs +# ---------------------------------------------------- +# Please keep this list sorted. +MODULES=hipe_rtl_to_sparc \ + hipe_sparc \ + hipe_sparc_assemble \ + hipe_sparc_cfg \ + hipe_sparc_defuse \ + hipe_sparc_encode \ + hipe_sparc_finalise \ + hipe_sparc_frame \ + hipe_sparc_liveness_all \ + hipe_sparc_liveness_fpr \ + hipe_sparc_liveness_gpr \ + hipe_sparc_main \ + hipe_sparc_pp \ + hipe_sparc_ra \ + hipe_sparc_ra_finalise \ + hipe_sparc_ra_ls \ + hipe_sparc_ra_naive \ + hipe_sparc_ra_postconditions \ + hipe_sparc_ra_postconditions_fp \ + hipe_sparc_registers + +HRL_FILES=hipe_sparc.hrl +ERL_FILES=$(MODULES:%=%.erl) +TARGET_FILES=$(MODULES:%=$(EBIN)/%.$(EMULATOR)) +DOC_FILES= $(MODULES:%=$(DOCS)/%.html) + +# ---------------------------------------------------- +# FLAGS +# ---------------------------------------------------- + +include ../native.mk + +ERL_COMPILE_FLAGS += +warn_exported_vars + +# ---------------------------------------------------- +# Targets +# ---------------------------------------------------- + +debug opt: $(TARGET_FILES) + +docs: $(DOC_FILES) + +clean: + rm -f $(TARGET_FILES) + rm -f core + +$(DOCS)/%.html:%.erl + erl -noshell -run edoc_run file '"$<"' '[{dir, "$(DOCS)"}]' -s init stop + +# ---------------------------------------------------- +# Special Build Targets +# ---------------------------------------------------- + +# ---------------------------------------------------- +# Release Target +# ---------------------------------------------------- +include $(ERL_TOP)/make/otp_release_targets.mk + +release_spec: opt + $(INSTALL_DIR) $(RELSYSDIR)/ebin + $(INSTALL_DATA) $(TARGET_FILES) $(RELSYSDIR)/ebin + +release_docs_spec: + +# Please keep this list sorted. +$(EBIN)/hipe_rtl_to_sparc.beam: ../rtl/hipe_rtl.hrl +$(EBIN)/hipe_sparc_assemble.beam: ../main/hipe.hrl ../../kernel/src/hipe_ext_format.hrl ../rtl/hipe_literals.hrl ../misc/hipe_sdi.hrl +$(EBIN)/hipe_sparc_cfg.beam: ../flow/cfg.hrl ../flow/cfg.inc +$(EBIN)/hipe_sparc_frame.beam: ../rtl/hipe_literals.hrl +$(EBIN)/hipe_sparc_liveness_all.beam: ../flow/liveness.inc +$(EBIN)/hipe_sparc_liveness_fpr.beam: ../flow/liveness.inc +$(EBIN)/hipe_sparc_liveness_gpr.beam: ../flow/liveness.inc +$(EBIN)/hipe_sparc_registers.beam: ../rtl/hipe_literals.hrl + +$(TARGET_FILES): hipe_sparc.hrl ../misc/hipe_consttab.hrl diff --git a/lib/hipe/sparc/hipe_rtl_to_sparc.erl b/lib/hipe/sparc/hipe_rtl_to_sparc.erl new file mode 100644 index 0000000000..df5e2b0077 --- /dev/null +++ b/lib/hipe/sparc/hipe_rtl_to_sparc.erl @@ -0,0 +1,972 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2005-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +-module(hipe_rtl_to_sparc). +-export([translate/1]). + +-include("../rtl/hipe_rtl.hrl"). + +translate(RTL) -> + hipe_gensym:init(sparc), + hipe_gensym:set_var(sparc, hipe_sparc_registers:first_virtual()), + hipe_gensym:set_label(sparc, hipe_gensym:get_label(rtl)), + Map0 = vmap_empty(), + {Formals, Map1} = conv_formals(hipe_rtl:rtl_params(RTL), Map0), + OldData = hipe_rtl:rtl_data(RTL), + {Code0, NewData} = conv_insn_list(hipe_rtl:rtl_code(RTL), Map1, OldData), + {RegFormals, _} = split_args(Formals), + Code = + case RegFormals of + [] -> Code0; + _ -> [hipe_sparc:mk_label(hipe_gensym:get_next_label(sparc)) | + move_formals(RegFormals, Code0)] + end, + IsClosure = hipe_rtl:rtl_is_closure(RTL), + IsLeaf = hipe_rtl:rtl_is_leaf(RTL), + hipe_sparc:mk_defun(hipe_rtl:rtl_fun(RTL), + Formals, + IsClosure, + IsLeaf, + Code, + NewData, + [], + []). + +conv_insn_list([H|T], Map, Data) -> + {NewH, NewMap, NewData1} = conv_insn(H, Map, Data), + %% io:format("~w \n ==>\n ~w\n- - - - - - - - -\n",[H,NewH]), + {NewT, NewData2} = conv_insn_list(T, NewMap, NewData1), + {NewH ++ NewT, NewData2}; +conv_insn_list([], _, Data) -> + {[], Data}. + +conv_insn(I, Map, Data) -> + case I of + #alu{} -> conv_alu(I, Map, Data); + #alub{} -> conv_alub(I, Map, Data); + #branch{} -> conv_branch(I, Map, Data); + #call{} -> conv_call(I, Map, Data); + #comment{} -> conv_comment(I, Map, Data); + #enter{} -> conv_enter(I, Map, Data); + #goto{} -> conv_goto(I, Map, Data); + #label{} -> conv_label(I, Map, Data); + #load{} -> conv_load(I, Map, Data); + #load_address{} -> conv_load_address(I, Map, Data); + #load_atom{} -> conv_load_atom(I, Map, Data); + #move{} -> conv_move(I, Map, Data); + #return{} -> conv_return(I, Map, Data); + #store{} -> conv_store(I, Map, Data); + #switch{} -> conv_switch(I, Map, Data); % XXX: only switch uses/updates Data + #fconv{} -> conv_fconv(I, Map, Data); + #fmove{} -> conv_fmove(I, Map, Data); + #fload{} -> conv_fload(I, Map, Data); + #fstore{} -> conv_fstore(I, Map, Data); + #fp{} -> conv_fp_binary(I, Map, Data); + #fp_unop{} -> conv_fp_unary(I, Map, Data); + _ -> exit({?MODULE,conv_insn,I}) + end. + +conv_fconv(I, Map, Data) -> + %% Dst := (double)Src, where Dst is FP reg and Src is int reg + {Src, Map1} = conv_src(hipe_rtl:fconv_src(I), Map), % exclude imm src + {Dst, Map2} = conv_fpreg(hipe_rtl:fconv_dst(I), Map1), + I2 = mk_fconv(Src, Dst), + {I2, Map2, Data}. + +mk_fconv(Src, Dst) -> + CSP = hipe_sparc:mk_temp(14, 'untagged'), % o6 + Disp = hipe_sparc:mk_simm13(100), + [hipe_sparc:mk_store('stw', Src, CSP, Disp), + hipe_sparc:mk_pseudo_fload(CSP, Disp, Dst, true), + hipe_sparc:mk_fp_unary('fitod', Dst, Dst)]. + +conv_fmove(I, Map, Data) -> + %% Dst := Src, where both Dst and Src are FP regs + {Src, Map1} = conv_fpreg(hipe_rtl:fmove_src(I), Map), + {Dst, Map2} = conv_fpreg(hipe_rtl:fmove_dst(I), Map1), + I2 = mk_fmove(Src, Dst), + {I2, Map2, Data}. + +mk_fmove(Src, Dst) -> + [hipe_sparc:mk_pseudo_fmove(Src, Dst)]. + +conv_fload(I, Map, Data) -> + %% Dst := MEM[Base+Off], where Dst is FP reg + {Base1, Map1} = conv_src(hipe_rtl:fload_src(I), Map), + {Base2, Map2} = conv_src(hipe_rtl:fload_offset(I), Map1), + {Dst, Map3} = conv_fpreg(hipe_rtl:fload_dst(I), Map2), + I2 = mk_fload(Base1, Base2, Dst), + {I2, Map3, Data}. + +mk_fload(Base1, Base2, Dst) -> + case hipe_sparc:is_temp(Base1) of + true -> + case hipe_sparc:is_temp(Base2) of + true -> + mk_fload_rr(Base1, Base2, Dst); + _ -> + mk_fload_ri(Base1, Base2, Dst) + end; + _ -> + case hipe_sparc:is_temp(Base2) of + true -> + mk_fload_ri(Base2, Base1, Dst); + _ -> + mk_fload_ii(Base1, Base2, Dst) + end + end. + +mk_fload_rr(Base1, Base2, Dst) -> + Tmp = new_untagged_temp(), + Disp = hipe_sparc:mk_simm13(0), + [hipe_sparc:mk_alu('add', Base1, Base2, Tmp), + hipe_sparc:mk_pseudo_fload(Tmp, Disp, Dst, false)]. + +mk_fload_ii(Base1, Base2, Dst) -> + io:format("~w: RTL fload with two immediates\n", [?MODULE]), + Tmp = new_untagged_temp(), + mk_set(Base1, Tmp, + mk_fload_ri(Tmp, Base2, Dst)). + +mk_fload_ri(Base, Disp, Dst) -> + hipe_sparc:mk_fload(Base, Disp, Dst, 'new'). + +conv_fstore(I, Map, Data) -> + %% MEM[Base+Off] := Src, where Src is FP reg + {Base1, Map1} = conv_dst(hipe_rtl:fstore_base(I), Map), + {Base2, Map2} = conv_src(hipe_rtl:fstore_offset(I), Map1), + {Src, Map3} = conv_fpreg(hipe_rtl:fstore_src(I), Map2), + I2 = mk_fstore(Src, Base1, Base2), + {I2, Map3, Data}. + +mk_fstore(Src, Base1, Base2) -> + case hipe_sparc:is_temp(Base2) of + true -> + mk_fstore_rr(Src, Base1, Base2); + _ -> + mk_fstore_ri(Src, Base1, Base2) + end. + +mk_fstore_rr(Src, Base1, Base2) -> + Tmp = new_untagged_temp(), + Disp = hipe_sparc:mk_simm13(0), + [hipe_sparc:mk_alu('add', Base1, Base2, Tmp), + hipe_sparc:mk_pseudo_fstore(Src, Tmp, Disp)]. + +mk_fstore_ri(Src, Base, Disp) -> + hipe_sparc:mk_fstore(Src, Base, Disp, 'new'). + +conv_fp_binary(I, Map, Data) -> + {Src1, Map1} = conv_fpreg(hipe_rtl:fp_src1(I), Map), + {Src2, Map2} = conv_fpreg(hipe_rtl:fp_src2(I), Map1), + {Dst, Map3} = conv_fpreg(hipe_rtl:fp_dst(I), Map2), + RtlFpOp = hipe_rtl:fp_op(I), + I2 = mk_fp_binary(RtlFpOp, Src1, Src2, Dst), + {I2, Map3, Data}. + +mk_fp_binary(RtlFpOp, Src1, Src2, Dst) -> + FpBinOp = + case RtlFpOp of + 'fadd' -> 'faddd'; + 'fdiv' -> 'fdivd'; + 'fmul' -> 'fmuld'; + 'fsub' -> 'fsubd' + end, + [hipe_sparc:mk_fp_binary(FpBinOp, Src1, Src2, Dst)]. + +conv_fp_unary(I, Map, Data) -> + {Src, Map1} = conv_fpreg(hipe_rtl:fp_unop_src(I), Map), + {Dst, Map2} = conv_fpreg(hipe_rtl:fp_unop_dst(I), Map1), + RtlFpUnOp = hipe_rtl:fp_unop_op(I), + I2 = mk_fp_unary(RtlFpUnOp, Src, Dst), + {I2, Map2, Data}. + +mk_fp_unary(RtlFpUnOp, Src, Dst) -> + FpUnOp = + case RtlFpUnOp of + 'fchs' -> 'fnegd' + end, + [hipe_sparc:mk_fp_unary(FpUnOp, Src, Dst)]. + +conv_alu(I, Map, Data) -> + %% dst = src1 aluop src2 + {Dst, Map0} = conv_dst(hipe_rtl:alu_dst(I), Map), + {Src1, Map1} = conv_src(hipe_rtl:alu_src1(I), Map0), + {Src2, Map2} = conv_src(hipe_rtl:alu_src2(I), Map1), + AluOp = conv_aluop(hipe_rtl:alu_op(I)), + {I2, _DidCommute} = mk_alu(AluOp, Src1, Src2, Dst), + {I2, Map2, Data}. + +mk_alu(XAluOp, Src1, Src2, Dst) -> + case hipe_sparc:is_temp(Src1) of + true -> + case hipe_sparc:is_temp(Src2) of + true -> + {mk_alu_rs(XAluOp, Src1, Src2, Dst), + false}; + _ -> + {mk_alu_ri(XAluOp, Src1, Src2, Dst), + false} + end; + _ -> + case hipe_sparc:is_temp(Src2) of + true -> + mk_alu_ir(XAluOp, Src1, Src2, Dst); + _ -> + {mk_alu_ii(XAluOp, Src1, Src2, Dst), + false} + end + end. + +mk_alu_ii(XAluOp, Src1, Src2, Dst) -> + io:format("~w: ALU with two immediates (~w ~w ~w ~w)\n", + [?MODULE, XAluOp, Src1, Src2, Dst]), + Tmp = new_untagged_temp(), + mk_set(Src1, Tmp, + mk_alu_ri(XAluOp, Tmp, Src2, Dst)). + +mk_alu_ir(XAluOp, Src1, Src2, Dst) -> + case xaluop_commutes(XAluOp) of + true -> + {mk_alu_ri(XAluOp, Src2, Src1, Dst), + true}; + _ -> + Tmp = new_untagged_temp(), + {mk_set(Src1, Tmp, + mk_alu_rs(XAluOp, Tmp, Src2, Dst)), + false} + end. + +mk_alu_ri(XAluOp, Src1, Src2, Dst) -> + case xaluop_is_shift(XAluOp) of + true -> + mk_shift_ri(XAluOp, Src1, Src2, Dst); + false -> + mk_arith_ri(XAluOp, Src1, Src2, Dst) + end. + +mk_shift_ri(XShiftOp, Src1, Src2, Dst) when is_integer(Src2) -> + if Src2 >= 0, Src2 < 32 -> % XXX: sparc64: < 64 + mk_alu_rs(XShiftOp, Src1, hipe_sparc:mk_uimm5(Src2), Dst); + true -> + exit({?MODULE,mk_shift_ri,Src2}) % excessive shifts are errors + end. + +mk_arith_ri(XAluOp, Src1, Src2, Dst) when is_integer(Src2) -> + if -4096 =< Src2, Src2 < 4096 -> + mk_alu_rs(XAluOp, Src1, hipe_sparc:mk_simm13(Src2), Dst); + true -> + Tmp = new_untagged_temp(), + mk_set(Src2, Tmp, + mk_alu_rs(XAluOp, Src1, Tmp, Dst)) + end. + +mk_alu_rs(XAluOp, Src1, Src2, Dst) -> + [hipe_sparc:mk_alu(xaluop_normalise(XAluOp), Src1, Src2, Dst)]. + +conv_alub(I, Map, Data) -> + %% dst = src1 aluop src2; if COND goto label + {Dst, Map0} = conv_dst(hipe_rtl:alub_dst(I), Map), + {Src1, Map1} = conv_src(hipe_rtl:alub_src1(I), Map0), + {Src2, Map2} = conv_src(hipe_rtl:alub_src2(I), Map1), + Cond = conv_cond(hipe_rtl:alub_cond(I)), + RtlAlubOp = hipe_rtl:alub_op(I), + I2 = + case RtlAlubOp of + 'mul' -> + %% To check for overflow in 32x32->32 multiplication: + %% smul Src1,Src2,Dst % Dst is lo32(Res), %y is %hi32(Res) + %% rd %y,TmpHi + %% sra Dst,31,TmpSign % fill TmpSign with sign of Dst + %% subcc TmpSign,TmpHi,%g0 + %% [bne OverflowLabel] + NewCond = + case Cond of + vs -> ne; + vc -> eq + end, + TmpHi = hipe_sparc:mk_new_temp('untagged'), + TmpSign = hipe_sparc:mk_new_temp('untagged'), + G0 = hipe_sparc:mk_g0(), + {I1, _DidCommute} = mk_alu('smul', Src1, Src2, Dst), + I1 ++ + [hipe_sparc:mk_rdy(TmpHi), + hipe_sparc:mk_alu('sra', Dst, hipe_sparc:mk_uimm5(31), TmpSign) | + conv_alub2(G0, TmpSign, 'sub', NewCond, TmpHi, I)]; + _ -> + conv_alub2(Dst, Src1, RtlAlubOp, Cond, Src2, I) + end, + {I2, Map2, Data}. + +-ifdef(notdef). % XXX: only for sparc64, alas +conv_alub2(Dst, Src1, RtlAlubOp, Cond, Src2, I) -> + case conv_cond_rcond(Cond) of + [] -> + conv_alub_bp(Dst, Src1, RtlAlubOp, Cond, Src2, I); + RCond -> + conv_alub_br(Dst, Src1, RtlAlubOp, RCond, Src2, I) + end. + +conv_alub_br(Dst, Src1, RtlAlubOp, RCond, Src2, I) -> + TrueLab = hipe_rtl:alub_true_label(I), + FalseLab = hipe_rtl:alub_false_label(I), + Pred = hipe_rtl:alub_pred(I), + %% "Dst = Src1 AluOp Src2; if COND" becomes + %% "Dst = Src1 AluOp Src2; if-COND(Dst)" + {I2, _DidCommute} = mk_alu(conv_alubop_nocc(RtlAlubOp), Src1, Src2, Dst), + I2 ++ mk_pseudo_br(RCond, Dst, TrueLab, FalseLab, Pred). + +conv_cond_rcond(Cond) -> + case Cond of + 'e' -> 'z'; + 'ne' -> 'nz'; + 'g' -> 'gz'; + 'ge' -> 'gez'; + 'l' -> 'lz'; + 'le' -> 'lez'; + _ -> [] % vs, vc, gu, geu, lu, leu + end. + +conv_alubop_nocc(RtlAlubOp) -> + case RtlAlubOp of + 'add' -> 'add'; + 'sub' -> 'sub'; + %% mul: handled elsewhere + 'or' -> 'or'; + 'and' -> 'and'; + 'xor' -> 'xor' + %% no shift ops + end. + +mk_pseudo_br(RCond, Dst, TrueLab, FalseLab, Pred) -> + [hipe_sparc:mk_pseudo_br(RCond, Dst, TrueLab, FalseLab, Pred)]. +-else. +conv_alub2(Dst, Src1, RtlAlubOp, Cond, Src2, I) -> + conv_alub_bp(Dst, Src1, RtlAlubOp, Cond, Src2, I). +-endif. + +conv_alub_bp(Dst, Src1, RtlAlubOp, Cond, Src2, I) -> + TrueLab = hipe_rtl:alub_true_label(I), + FalseLab = hipe_rtl:alub_false_label(I), + Pred = hipe_rtl:alub_pred(I), + %% "Dst = Src1 AluOp Src2; if COND" becomes + %% "Dst = Src1 AluOpCC Src22; if-COND(CC)" + {I2, _DidCommute} = mk_alu(conv_alubop_cc(RtlAlubOp), Src1, Src2, Dst), + I2 ++ mk_pseudo_bp(Cond, TrueLab, FalseLab, Pred). + +conv_alubop_cc(RtlAlubOp) -> + case RtlAlubOp of + 'add' -> 'addcc'; + 'sub' -> 'subcc'; + %% mul: handled elsewhere + 'or' -> 'orcc'; + 'and' -> 'andcc'; + 'xor' -> 'xorcc' + %% no shift ops + end. + +conv_branch(I, Map, Data) -> + %% = src1 - src2; if COND goto label + {Src1, Map0} = conv_src(hipe_rtl:branch_src1(I), Map), + {Src2, Map1} = conv_src(hipe_rtl:branch_src2(I), Map0), + Cond = conv_cond(hipe_rtl:branch_cond(I)), + I2 = conv_branch2(Src1, Cond, Src2, I), + {I2, Map1, Data}. + +-ifdef(notdef). % XXX: only for sparc64, alas +conv_branch2(Src1, Cond, Src2, I) -> + case conv_cond_rcond(Cond) of + [] -> + conv_branch_bp(Src1, Cond, Src2, I); + RCond -> + conv_branch_br(Src1, RCond, Src2, I) + end. + +conv_branch_br(Src1, RCond, Src2, I) -> + TrueLab = hipe_rtl:branch_true_label(I), + FalseLab = hipe_rtl:branch_false_label(I), + Pred = hipe_rtl:branch_pred(I), + %% "if src1-COND-src2" becomes + %% "sub src1,src2,tmp; if-COND(tmp)" + Dst = hipe_sparc:mk_new_temp('untagged'), + XAluOp = 'cmp', % == a sub that commutes + {I1, DidCommute} = mk_alu(XAluOp, Src1, Src2, Dst), + NewRCond = + case DidCommute of + true -> commute_rcond(RCond); + false -> RCond + end, + I1 ++ mk_pseudo_br(NewRCond, Dst, TrueLab, FalseLab, Pred). + +commute_rcond(RCond) -> % if x RCond y, then y commute_rcond(RCond) x + case RCond of + 'z' -> 'z'; % ==, == + 'nz' -> 'nz'; % !=, != + 'gz' -> 'lz'; % >, < + 'gez' -> 'lez'; % >=, <= + 'lz' -> 'gz'; % <, > + 'lez' -> 'gez' % <=, >= + end. +-else. +conv_branch2(Src1, Cond, Src2, I) -> + conv_branch_bp(Src1, Cond, Src2, I). +-endif. + +conv_branch_bp(Src1, Cond, Src2, I) -> + TrueLab = hipe_rtl:branch_true_label(I), + FalseLab = hipe_rtl:branch_false_label(I), + Pred = hipe_rtl:branch_pred(I), + %% "if src1-COND-src2" becomes + %% "subcc src1,src2,%g0; if-COND(CC)" + Dst = hipe_sparc:mk_g0(), + XAluOp = 'cmpcc', % == a subcc that commutes + {I1, DidCommute} = mk_alu(XAluOp, Src1, Src2, Dst), + NewCond = + case DidCommute of + true -> commute_cond(Cond); + false -> Cond + end, + I1 ++ mk_pseudo_bp(NewCond, TrueLab, FalseLab, Pred). + +conv_call(I, Map, Data) -> + {Args, Map0} = conv_src_list(hipe_rtl:call_arglist(I), Map), + {Dsts, Map1} = conv_dst_list(hipe_rtl:call_dstlist(I), Map0), + {Fun, Map2} = conv_fun(hipe_rtl:call_fun(I), Map1), + ContLab = hipe_rtl:call_continuation(I), + ExnLab = hipe_rtl:call_fail(I), + Linkage = hipe_rtl:call_type(I), + I2 = mk_call(Dsts, Fun, Args, ContLab, ExnLab, Linkage), + {I2, Map2, Data}. + +mk_call(Dsts, Fun, Args, ContLab, ExnLab, Linkage) -> + case hipe_sparc:is_prim(Fun) of + true -> + mk_primop_call(Dsts, Fun, Args, ContLab, ExnLab, Linkage); + false -> + mk_general_call(Dsts, Fun, Args, ContLab, ExnLab, Linkage) + end. + +mk_primop_call(Dsts, Prim, Args, ContLab, ExnLab, Linkage) -> + case hipe_sparc:prim_prim(Prim) of + %% no SPARC-specific primops defined yet + _ -> + mk_general_call(Dsts, Prim, Args, ContLab, ExnLab, Linkage) + end. + +mk_general_call(Dsts, Fun, Args, ContLab, ExnLab, Linkage) -> + %% The backend does not support pseudo_calls without a + %% continuation label, so we make sure each call has one. + {RealContLab, Tail} = + case mk_call_results(Dsts) of + [] -> + %% Avoid consing up a dummy basic block if the moves list + %% is empty, as is typical for calls to suspend/0. + %% This should be subsumed by a general "optimise the CFG" + %% module, and could probably be removed. + case ContLab of + [] -> + NewContLab = hipe_gensym:get_next_label(sparc), + {NewContLab, [hipe_sparc:mk_label(NewContLab)]}; + _ -> + {ContLab, []} + end; + Moves -> + %% Change the call to continue at a new basic block. + %% In this block move the result registers to the Dsts, + %% then continue at the call's original continuation. + NewContLab = hipe_gensym:get_next_label(sparc), + case ContLab of + [] -> + %% This is just a fallthrough + %% No jump back after the moves. + {NewContLab, + [hipe_sparc:mk_label(NewContLab) | + Moves]}; + _ -> + %% The call has a continuation. Jump to it. + {NewContLab, + [hipe_sparc:mk_label(NewContLab) | + Moves ++ + [hipe_sparc:mk_b_label(ContLab)]]} + end + end, + SDesc = hipe_sparc:mk_sdesc(ExnLab, 0, length(Args), {}), + CallInsn = hipe_sparc:mk_pseudo_call(Fun, SDesc, RealContLab, Linkage), + {RegArgs,StkArgs} = split_args(Args), + mk_push_args(StkArgs, move_actuals(RegArgs, [CallInsn | Tail])). + +mk_call_results(Dsts) -> + case Dsts of + [] -> []; + [Dst] -> + RV = hipe_sparc:mk_rv(), + [hipe_sparc:mk_pseudo_move(RV, Dst)] + end. + +mk_push_args(StkArgs, Tail) -> + case length(StkArgs) of + 0 -> + Tail; + NrStkArgs -> + [hipe_sparc:mk_pseudo_call_prepare(NrStkArgs) | + mk_store_args(StkArgs, NrStkArgs * word_size(), Tail)] + end. + +mk_store_args([Arg|Args], PrevOffset, Tail) -> + Offset = PrevOffset - word_size(), + {Src,FixSrc} = + case hipe_sparc:is_temp(Arg) of + true -> + {Arg, []}; + _ -> + Tmp = new_tagged_temp(), + {Tmp, mk_set(Arg, Tmp)} + end, + %% XXX: sparc64: stx + Store = hipe_sparc:mk_store('stw', Src, hipe_sparc:mk_sp(), hipe_sparc:mk_simm13(Offset)), + mk_store_args(Args, Offset, FixSrc ++ [Store | Tail]); +mk_store_args([], _, Tail) -> + Tail. + +conv_comment(I, Map, Data) -> + I2 = [hipe_sparc:mk_comment(hipe_rtl:comment_text(I))], + {I2, Map, Data}. + +conv_enter(I, Map, Data) -> + {Args, Map0} = conv_src_list(hipe_rtl:enter_arglist(I), Map), + {Fun, Map1} = conv_fun(hipe_rtl:enter_fun(I), Map0), + I2 = mk_enter(Fun, Args, hipe_rtl:enter_type(I)), + {I2, Map1, Data}. + +mk_enter(Fun, Args, Linkage) -> + Arity = length(Args), + {RegArgs,StkArgs} = split_args(Args), + move_actuals(RegArgs, + [hipe_sparc:mk_pseudo_tailcall_prepare(), + hipe_sparc:mk_pseudo_tailcall(Fun, Arity, StkArgs, Linkage)]). + +conv_goto(I, Map, Data) -> + I2 = [hipe_sparc:mk_b_label(hipe_rtl:goto_label(I))], + {I2, Map, Data}. + +conv_label(I, Map, Data) -> + I2 = [hipe_sparc:mk_label(hipe_rtl:label_name(I))], + {I2, Map, Data}. + +conv_load(I, Map, Data) -> + {Dst, Map0} = conv_dst(hipe_rtl:load_dst(I), Map), + {Base1, Map1} = conv_src(hipe_rtl:load_src(I), Map0), + {Base2, Map2} = conv_src(hipe_rtl:load_offset(I), Map1), + LdOp = conv_ldop(hipe_rtl:load_size(I), hipe_rtl:load_sign(I)), + {I2, _DidCommute} = mk_alu(LdOp, Base1, Base2, Dst), + {I2, Map2, Data}. + +conv_ldop(LoadSize, LoadSign) -> + case LoadSize of + word -> 'lduw'; % XXX: sparc64: ldx + int32 -> 'lduw'; % XXX: sparc64: lduw or ldsw + int16 -> + case LoadSign of + signed -> 'ldsh'; + unsigned -> 'lduh' + end; + byte -> + case LoadSign of + signed -> 'ldsb'; + unsigned -> 'ldub' + end + end. + +conv_load_address(I, Map, Data) -> + {Dst, Map0} = conv_dst(hipe_rtl:load_address_dst(I), Map), + Addr = hipe_rtl:load_address_addr(I), + Type = hipe_rtl:load_address_type(I), + Src = {Addr,Type}, + I2 = [hipe_sparc:mk_pseudo_set(Src, Dst)], + {I2, Map0, Data}. + +conv_load_atom(I, Map, Data) -> + {Dst, Map0} = conv_dst(hipe_rtl:load_atom_dst(I), Map), + Src = hipe_rtl:load_atom_atom(I), + I2 = [hipe_sparc:mk_pseudo_set(Src, Dst)], + {I2, Map0, Data}. + +conv_move(I, Map, Data) -> + {Dst, Map0} = conv_dst(hipe_rtl:move_dst(I), Map), + {Src, Map1} = conv_src(hipe_rtl:move_src(I), Map0), + I2 = mk_move(Src, Dst, []), + {I2, Map1, Data}. + +mk_move(Src, Dst, Tail) -> + case hipe_sparc:is_temp(Src) of + true -> [hipe_sparc:mk_pseudo_move(Src, Dst) | Tail]; + _ -> mk_set(Src, Dst, Tail) + end. + +conv_return(I, Map, Data) -> + %% TODO: multiple-value returns + {[Arg], Map0} = conv_src_list(hipe_rtl:return_varlist(I), Map), + I2 = mk_move(Arg, hipe_sparc:mk_rv(), + [hipe_sparc:mk_pseudo_ret()]), + {I2, Map0, Data}. + +conv_store(I, Map, Data) -> + {Base1, Map0} = conv_dst(hipe_rtl:store_base(I), Map), % no immediates allowed + {Src, Map1} = conv_src(hipe_rtl:store_src(I), Map0), + {Base2, Map2} = conv_src(hipe_rtl:store_offset(I), Map1), + StOp = conv_stop(hipe_rtl:store_size(I)), + I2 = mk_store(StOp, Src, Base1, Base2), + {I2, Map2, Data}. + +conv_stop(StoreSize) -> + case StoreSize of + word -> 'stw'; % XXX: sparc64: stx + int32 -> 'stw'; + byte -> 'stb' + end. + +mk_store(StOp, Src, Base1, Base2) -> + case hipe_sparc:is_temp(Src) of + true -> + mk_store2(StOp, Src, Base1, Base2); + _ -> + Tmp = new_untagged_temp(), + mk_set(Src, Tmp, + mk_store2(StOp, Tmp, Base1, Base2)) + end. + +mk_store2(StOp, Src, Base1, Base2) -> + case hipe_sparc:is_temp(Base2) of + true -> + mk_store_rr(StOp, Src, Base1, Base2); + _ -> + mk_store_ri(StOp, Src, Base1, Base2) + end. + +mk_store_ri(StOp, Src, Base, Disp) -> + hipe_sparc:mk_store(StOp, Src, Base, Disp, 'new', []). + +mk_store_rr(StOp, Src, Base1, Base2) -> + [hipe_sparc:mk_store(StOp, Src, Base1, Base2)]. + +conv_switch(I, Map, Data) -> + Labels = hipe_rtl:switch_labels(I), + LMap = [{label,L} || L <- Labels], + {NewData, JTabLab} = + case hipe_rtl:switch_sort_order(I) of + [] -> + hipe_consttab:insert_block(Data, word, LMap); + SortOrder -> + hipe_consttab:insert_sorted_block( + Data, word, LMap, SortOrder) + end, + %% no immediates allowed here + {IndexR, Map1} = conv_dst(hipe_rtl:switch_src(I), Map), + JTabR = new_untagged_temp(), + OffsetR = new_untagged_temp(), + DestR = new_untagged_temp(), + I2 = + [hipe_sparc:mk_pseudo_set({JTabLab,constant}, JTabR), + %% XXX: sparc64: << 3 + hipe_sparc:mk_alu('sll', IndexR, hipe_sparc:mk_uimm5(2), OffsetR), + %% XXX: sparc64: ldx + hipe_sparc:mk_alu('lduw', JTabR, OffsetR, DestR), + hipe_sparc:mk_jmp(DestR, hipe_sparc:mk_simm13(0), Labels)], + {I2, Map1, NewData}. + +%%% Create a conditional branch. + +mk_pseudo_bp(Cond, TrueLabel, FalseLabel, Pred) -> + [hipe_sparc:mk_pseudo_bp(Cond, TrueLabel, FalseLabel, Pred)]. + +%%% Load an integer constant into a register. + +mk_set(Value, Dst) -> mk_set(Value, Dst, []). + +mk_set(Value, Dst, Tail) -> + hipe_sparc:mk_set(Value, Dst, Tail). + +%%% Convert an RTL ALU op. + +conv_aluop(RtlAluOp) -> + case RtlAluOp of + 'add' -> 'add'; + 'sub' -> 'sub'; + 'mul' -> 'mulx'; + 'or' -> 'or'; + 'and' -> 'and'; + 'xor' -> 'xor'; + 'sll' -> 'sll'; % XXX: sparc64: sllx + 'srl' -> 'srl'; % XXX: sparc64: srlx + 'sra' -> 'sra' % XXX: sparc64: srax + end. + +%%% Check if an extended SPARC AluOp commutes. + +xaluop_commutes(XAluOp) -> + case XAluOp of + 'cmp' -> true; + 'cmpcc' -> true; + 'add' -> true; + 'addcc' -> true; + 'and' -> true; + 'andcc' -> true; + 'or' -> true; + 'orcc' -> true; + 'xor' -> true; + 'xorcc' -> true; + 'sub' -> false; + 'subcc' -> false; + 'mulx' -> true; + 'smul' -> true; + 'sll' -> false; + 'srl' -> false; + 'sra' -> false; + 'sllx' -> false; + 'srlx' -> false; + 'srax' -> false; + 'ldsb' -> true; + 'ldsh' -> true; + 'ldsw' -> true; + 'ldub' -> true; + 'lduh' -> true; + 'lduw' -> true; + 'ldx' -> true + end. + +%%% Check if an extended SPARC AluOp is a shift. + +xaluop_is_shift(XAluOp) -> + case XAluOp of + 'sll' -> true; + 'srl' -> true; + 'sra' -> true; + 'sllx' -> true; + 'srlx' -> true; + 'srax' -> true; + _ -> false + end. + +%%% Convert an extended SPARC AluOp back to a plain AluOp. +%%% This just maps cmp{,cc} to sub{,cc}. + +xaluop_normalise(XAluOp) -> + case XAluOp of + 'cmp' -> 'sub'; + 'cmpcc' -> 'subcc'; + _ -> XAluOp + end. + +%%% Convert an RTL condition code. + +conv_cond(RtlCond) -> + case RtlCond of + eq -> 'e'; + ne -> 'ne'; + gt -> 'g'; + gtu -> 'gu'; % >u + ge -> 'ge'; + geu -> 'geu'; % >=u + lt -> 'l'; + ltu -> 'lu'; % 'le'; + leu -> 'leu'; % <=u + overflow -> 'vs'; + not_overflow -> 'vc' + end. + +%%% Commute a SPARC condition code. + +commute_cond(Cond) -> % if x Cond y, then y commute_cond(Cond) x + case Cond of + 'e' -> 'e'; % ==, == + 'ne' -> 'ne'; % !=, != + 'g' -> 'l'; % >, < + 'ge' -> 'le'; % >=, <= + 'l' -> 'g'; % <, > + 'le' -> 'ge'; % <=, >= + 'gu' -> 'lu'; % >u, 'leu'; % >=u, <=u + 'lu' -> 'gu'; % u + 'leu' -> 'geu' % <=u, >=u + %% vs/vc: n/a + end. + +%%% Split a list of formal or actual parameters into the +%%% part passed in registers and the part passed on the stack. +%%% The parameters passed in registers are also tagged with +%%% the corresponding registers. + +split_args(Args) -> + split_args(0, hipe_sparc_registers:nr_args(), Args, []). + +split_args(I, N, [Arg|Args], RegArgs) when I < N -> + Reg = hipe_sparc_registers:arg(I), + Temp = hipe_sparc:mk_temp(Reg, 'tagged'), + split_args(I+1, N, Args, [{Arg,Temp}|RegArgs]); +split_args(_, _, StkArgs, RegArgs) -> + {RegArgs, StkArgs}. + +%%% Convert a list of actual parameters passed in +%%% registers (from split_args/1) to a list of moves. + +move_actuals([{Src,Dst}|Actuals], Rest) -> + move_actuals(Actuals, mk_move(Src, Dst, Rest)); +move_actuals([], Rest) -> + Rest. + +%%% Convert a list of formal parameters passed in +%%% registers (from split_args/1) to a list of moves. + +move_formals([{Dst,Src}|Formals], Rest) -> + move_formals(Formals, [hipe_sparc:mk_pseudo_move(Src, Dst) | Rest]); +move_formals([], Rest) -> + Rest. + +%%% Convert a 'fun' operand (MFA, prim, or temp) + +conv_fun(Fun, Map) -> + case hipe_rtl:is_var(Fun) of + true -> + conv_dst(Fun, Map); + false -> + case hipe_rtl:is_reg(Fun) of + true -> + conv_dst(Fun, Map); + false -> + if is_atom(Fun) -> + {hipe_sparc:mk_prim(Fun), Map}; + true -> + {conv_mfa(Fun), Map} + end + end + end. + +%%% Convert an MFA operand. + +conv_mfa({M,F,A}) when is_atom(M), is_atom(F), is_integer(A) -> + hipe_sparc:mk_mfa(M, F, A). + +%%% Convert an RTL source operand (imm/var/reg). +%%% Returns a temp or a naked integer. + +conv_src(Opnd, Map) -> + case hipe_rtl:is_imm(Opnd) of + true -> + Value = hipe_rtl:imm_value(Opnd), + if is_integer(Value) -> + {Value, Map} + end; + false -> + conv_dst(Opnd, Map) + end. + +conv_src_list([O|Os], Map) -> + {V, Map1} = conv_src(O, Map), + {Vs, Map2} = conv_src_list(Os, Map1), + {[V|Vs], Map2}; +conv_src_list([], Map) -> + {[], Map}. + +%%% Convert an RTL destination operand (var/reg). + +conv_fpreg(Opnd, Map) -> + true = hipe_rtl:is_fpreg(Opnd), + conv_dst(Opnd, Map). + +conv_dst(Opnd, Map) -> + {Name, Type} = + case hipe_rtl:is_var(Opnd) of + true -> + {hipe_rtl:var_index(Opnd), 'tagged'}; + false -> + case hipe_rtl:is_fpreg(Opnd) of + true -> + {hipe_rtl:fpreg_index(Opnd), 'double'}; + false -> + {hipe_rtl:reg_index(Opnd), 'untagged'} + end + end, + IsPrecoloured = + case Type of + 'double' -> false; %hipe_sparc_registers:is_precoloured_fpr(Name); + _ -> hipe_sparc_registers:is_precoloured_gpr(Name) + end, + case IsPrecoloured of + true -> + {hipe_sparc:mk_temp(Name, Type), Map}; + false -> + case vmap_lookup(Map, Opnd) of + {value, NewTemp} -> + {NewTemp, Map}; + _ -> + NewTemp = hipe_sparc:mk_new_temp(Type), + {NewTemp, vmap_bind(Map, Opnd, NewTemp)} + end + end. + +conv_dst_list([O|Os], Map) -> + {Dst, Map1} = conv_dst(O, Map), + {Dsts, Map2} = conv_dst_list(Os, Map1), + {[Dst|Dsts], Map2}; +conv_dst_list([], Map) -> + {[], Map}. + +conv_formals(Os, Map) -> + conv_formals(hipe_sparc_registers:nr_args(), Os, Map, []). + +conv_formals(N, [O|Os], Map, Res) -> + Type = + case hipe_rtl:is_var(O) of + true -> 'tagged'; + _ -> 'untagged' + end, + Dst = + if N > 0 -> hipe_sparc:mk_new_temp(Type); % allocatable + true -> hipe_sparc:mk_new_nonallocatable_temp(Type) + end, + Map1 = vmap_bind(Map, O, Dst), + conv_formals(N-1, Os, Map1, [Dst|Res]); +conv_formals(_, [], Map, Res) -> + {lists:reverse(Res), Map}. + +%%% new_untagged_temp -- conjure up an untagged scratch reg + +new_untagged_temp() -> + hipe_sparc:mk_new_temp('untagged'). + +%%% new_tagged_temp -- conjure up a tagged scratch reg + +new_tagged_temp() -> + hipe_sparc:mk_new_temp('tagged'). + +%%% Map from RTL var/reg operands to temps. + +vmap_empty() -> + gb_trees:empty(). + +vmap_lookup(Map, Key) -> + gb_trees:lookup(Key, Map). + +vmap_bind(Map, Key, Val) -> + gb_trees:insert(Key, Val, Map). + +word_size() -> + hipe_rtl_arch:word_size(). diff --git a/lib/hipe/sparc/hipe_sparc.erl b/lib/hipe/sparc/hipe_sparc.erl new file mode 100644 index 0000000000..9fcb94afb6 --- /dev/null +++ b/lib/hipe/sparc/hipe_sparc.erl @@ -0,0 +1,407 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2001-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +-module(hipe_sparc). +-export([ + mk_temp/2, + mk_new_temp/1, + mk_new_nonallocatable_temp/1, + is_temp/1, + temp_reg/1, + temp_type/1, + temp_is_allocatable/1, + temp_is_precoloured/1, + + mk_g0/0, + mk_ra/0, + mk_rv/0, + mk_sp/0, + mk_temp1/0, + mk_temp2/0, + + mk_simm13/1, + mk_uimm5/1, + + mk_mfa/3, + + mk_prim/1, + is_prim/1, + prim_prim/1, + + mk_sdesc/4, + + mk_alu/4, + mk_mov/2, + mk_load/6, + + mk_bp/3, + mk_b_label/1, + + %% mk_br/4, + + mk_call_rec/3, + + mk_call_tail/2, + + mk_comment/1, + + mk_label/1, + is_label/1, + label_label/1, + + mk_jmp/3, + mk_jmpl/2, + + mk_pseudo_bp/4, + negate_cond/1, + + %% mk_pseudo_br/5, + %% negate_rcond/1, + + mk_pseudo_call/4, + pseudo_call_contlab/1, + pseudo_call_funv/1, + pseudo_call_linkage/1, + pseudo_call_sdesc/1, + + mk_pseudo_call_prepare/1, + pseudo_call_prepare_nrstkargs/1, + + mk_pseudo_move/2, + is_pseudo_move/1, + pseudo_move_dst/1, + pseudo_move_src/1, + + mk_pseudo_ret/0, + + mk_pseudo_set/2, + + mk_pseudo_tailcall/4, + pseudo_tailcall_funv/1, + pseudo_tailcall_linkage/1, + pseudo_tailcall_stkargs/1, + + mk_pseudo_tailcall_prepare/0, + + mk_rdy/1, + + %% mk_sethi/2, + mk_nop/0, + mk_set/2, + mk_set/3, + mk_addi/4, + + mk_store/4, + mk_store/6, + + mk_fp_binary/4, + + mk_fp_unary/3, + + mk_pseudo_fload/4, + mk_fload/4, + + mk_pseudo_fmove/2, + is_pseudo_fmove/1, + pseudo_fmove_src/1, + pseudo_fmove_dst/1, + + mk_pseudo_fstore/3, + mk_fstore/4, + + mk_defun/8, + defun_code/1, + defun_data/1, + defun_formals/1, + defun_is_closure/1, + defun_is_leaf/1, + defun_mfa/1, + defun_var_range/1 + ]). + +-include("hipe_sparc.hrl"). + +mk_temp(Reg, Type, Allocatable) -> + #sparc_temp{reg=Reg, type=Type, allocatable=Allocatable}. +mk_temp(Reg, Type) -> mk_temp(Reg, Type, true). +mk_new_temp(Type, Allocatable) -> + mk_temp(hipe_gensym:get_next_var(sparc), Type, Allocatable). +mk_new_temp(Type) -> mk_new_temp(Type, true). +mk_new_nonallocatable_temp(Type) -> mk_new_temp(Type, false). +is_temp(X) -> case X of #sparc_temp{} -> true; _ -> false end. +temp_reg(#sparc_temp{reg=Reg}) -> Reg. +temp_type(#sparc_temp{type=Type}) -> Type. +temp_is_allocatable(#sparc_temp{allocatable=A}) -> A. +temp_is_precoloured(#sparc_temp{reg=Reg,type=Type}) -> + case Type of + %% 'double' -> hipe_sparc_registers:is_precoloured_fpr(Reg); + _ -> hipe_sparc_registers:is_precoloured_gpr(Reg) + end. + +mk_g0() -> mk_temp(hipe_sparc_registers:g0(), 'untagged'). +mk_ra() -> mk_temp(hipe_sparc_registers:return_address(), 'untagged'). +mk_rv() -> mk_temp(hipe_sparc_registers:return_value(), 'tagged'). +mk_sp() -> mk_temp(hipe_sparc_registers:stack_pointer(), 'untagged'). +mk_temp1() -> mk_temp(hipe_sparc_registers:temp1(), 'untagged'). +mk_temp2() -> mk_temp(hipe_sparc_registers:temp2(), 'untagged'). + +mk_simm13(Value) -> #sparc_simm13{value=Value}. +mk_uimm5(Value) -> #sparc_uimm5{value=Value}. +mk_uimm22(Value) -> #sparc_uimm22{value=Value}. + +mk_mfa(M, F, A) -> #sparc_mfa{m=M, f=F, a=A}. + +mk_prim(Prim) -> #sparc_prim{prim=Prim}. +is_prim(X) -> case X of #sparc_prim{} -> true; _ -> false end. +prim_prim(#sparc_prim{prim=Prim}) -> Prim. + +mk_sdesc(ExnLab, FSize, Arity, Live) -> + #sparc_sdesc{exnlab=ExnLab, fsize=FSize, arity=Arity, live=Live}. + +mk_alu(AluOp, Src1, Src2, Dst) -> + #alu{aluop=AluOp, src1=Src1, src2=Src2, dst=Dst}. +mk_mov(Src, Dst) -> mk_alu('or', mk_g0(), Src, Dst). + +mk_bp(Cond, Label, Pred) -> #bp{'cond'=Cond, label=Label, pred=Pred}. +mk_b_label(Label) -> mk_bp('a', Label, 1.0). + +-ifdef(notdef). % XXX: only for sparc64, alas +mk_br(RCond, Src, Label, Pred) -> + #br{rcond=RCond, src=Src, label=Label, pred=Pred}. +-endif. + +mk_call_rec(Fun, SDesc, Linkage) -> + #call_rec{'fun'=Fun, sdesc=SDesc, linkage=Linkage}. + +mk_call_tail(Fun, Linkage) -> #call_tail{'fun'=Fun, linkage=Linkage}. + +mk_comment(Term) -> #comment{term=Term}. + +mk_label(Label) -> #label{label=Label}. +is_label(I) -> case I of #label{} -> true; _ -> false end. +label_label(#label{label=Label}) -> Label. + +mk_jmp(Src1, Src2, Labels) -> #jmp{src1=Src1, src2=Src2, labels=Labels}. + +mk_jmpl(Src, SDesc) -> #jmpl{src=Src, sdesc=SDesc}. + +mk_pseudo_bp(Cond, TrueLab, FalseLab, Pred) -> + if Pred >= 0.5 -> + mk_pseudo_bp_simple(negate_cond(Cond), FalseLab, + TrueLab, 1.0-Pred); + true -> + mk_pseudo_bp_simple(Cond, TrueLab, FalseLab, Pred) + end. + +mk_pseudo_bp_simple(Cond, TrueLab, FalseLab, Pred) when Pred =< 0.5 -> + #pseudo_bp{'cond'=Cond, true_label=TrueLab, + false_label=FalseLab, pred=Pred}. + +negate_cond(Cond) -> + case Cond of + 'l' -> 'ge'; % <, >= + 'ge' -> 'l'; % >=, < + 'g' -> 'le'; % >, <= + 'le' -> 'g'; % <=, > + 'e' -> 'ne'; % ==, != + 'ne' -> 'e'; % !=, == + 'gu' -> 'leu'; % >u, <=u + 'leu'-> 'gu'; % <=u, >u + 'geu'-> 'lu'; % >=u, 'geu'; % =u + 'vs' -> 'vc'; % overflow, not_overflow + 'vc' -> 'vs' % not_overflow, overflow + end. + +-ifdef(notdef). % XXX: only for sparc64, alas +mk_pseudo_br(RCond, Src, TrueLab, FalseLab, Pred) -> + if Pred >= 0.5 -> + mk_pseudo_br_simple(negate_rcond(RCond), Src, FalseLab, + TrueLab, 1.0-Pred); + true -> + mk_pseudo_br_simple(RCond, Src, TrueLab, FalseLab, Pred) + end. + +mk_pseudo_br_simple(RCond, Src, TrueLab, FalseLab, Pred) when Pred =< 0.5 -> + #pseudo_br{rcond=RCond, src=Src, true_label=TrueLab, + false_label=FalseLab, pred=Pred}. + +negate_rcond(RCond) -> + case RCond of + 'z' -> 'nz'; % ==, != + 'nz' -> 'z'; % !=, == + 'gz' -> 'lez'; % >, <= + 'lez' -> 'gz'; % <=, > + 'gez' -> 'lz'; % >=, < + 'lz' -> 'gez' % <, >= + end. +-endif. + +mk_pseudo_call(FunV, SDesc, ContLab, Linkage) -> + #pseudo_call{funv=FunV, sdesc=SDesc, contlab=ContLab, linkage=Linkage}. +pseudo_call_funv(#pseudo_call{funv=FunV}) -> FunV. +pseudo_call_contlab(#pseudo_call{contlab=ContLab}) -> ContLab. +pseudo_call_linkage(#pseudo_call{linkage=Linkage}) -> Linkage. +pseudo_call_sdesc(#pseudo_call{sdesc=SDesc}) -> SDesc. + +mk_pseudo_call_prepare(NrStkArgs) -> + #pseudo_call_prepare{nrstkargs=NrStkArgs}. +pseudo_call_prepare_nrstkargs(#pseudo_call_prepare{nrstkargs=NrStkArgs}) -> + NrStkArgs. + +mk_pseudo_move(Src, Dst) -> #pseudo_move{src=Src, dst=Dst}. +is_pseudo_move(I) -> case I of #pseudo_move{} -> true; _ -> false end. +pseudo_move_dst(#pseudo_move{dst=Dst}) -> Dst. +pseudo_move_src(#pseudo_move{src=Src}) -> Src. + +mk_pseudo_ret() -> #pseudo_ret{}. + +mk_pseudo_set(Imm, Dst) -> #pseudo_set{imm=Imm, dst=Dst}. + +mk_pseudo_tailcall(FunV, Arity, StkArgs, Linkage) -> + #pseudo_tailcall{funv=FunV, arity=Arity, stkargs=StkArgs, linkage=Linkage}. +pseudo_tailcall_funv(#pseudo_tailcall{funv=FunV}) -> FunV. +pseudo_tailcall_linkage(#pseudo_tailcall{linkage=Linkage}) -> Linkage. +pseudo_tailcall_stkargs(#pseudo_tailcall{stkargs=StkArgs}) -> StkArgs. + +mk_pseudo_tailcall_prepare() -> #pseudo_tailcall_prepare{}. + +mk_rdy(Dst) -> #rdy{dst=Dst}. + +mk_sethi(UImm22, Dst) -> #sethi{uimm22=UImm22, dst=Dst}. +mk_nop() -> mk_sethi(mk_uimm22(0), mk_g0()). + +%%% Load an integer constant into a register. +mk_set(Value, Dst) -> mk_set(Value, Dst, []). + +mk_set(Value, Dst, Tail) -> + if -4096 =< Value, Value < 4096 -> + [mk_alu('or', mk_g0(), mk_simm13(Value), Dst) | Tail]; + true -> + Hi22 = mk_uimm22((Value bsr 10) band 16#003FFFFF), + case (Value band 16#3FF) of + 0 -> + [mk_sethi(Hi22, Dst) | Tail]; + Lo10 -> + [mk_sethi(Hi22, Dst), + mk_alu('or', Dst, mk_simm13(Lo10), Dst) | + Tail] + end + end. + +%%% Add an integer constant. Dst may equal Src, +%%% in which case temp2 may be clobbered. +mk_addi(Src, Value, Dst, Tail) -> + if -4096 =< Value, Value < 4096 -> + [mk_alu('add', Src, mk_simm13(Value), Dst) | Tail]; + true -> + Tmp = + begin + DstReg = temp_reg(Dst), + SrcReg = temp_reg(Src), + if DstReg =:= SrcReg -> mk_temp2(); + true -> Dst + end + end, + mk_set(Value, Tmp, [mk_alu('add', Src, Tmp, Dst) | Tail]) + end. + +mk_store(StOp, Src, Base, Disp) -> + #store{stop=StOp, src=Src, base=Base, disp=Disp}. + +mk_store(StOp, Src, Base, Offset, Scratch, Rest) when is_integer(Offset) -> + if -4096 =< Offset, Offset < 4096 -> + [mk_store(StOp, Src, Base, mk_simm13(Offset)) | Rest]; + true -> + Index = mk_scratch(Scratch), + mk_set(Offset, Index, [mk_store(StOp, Src, Base, Index) | Rest]) + end. + +mk_load(LdOp, Base, Disp, Dst) -> + mk_alu(LdOp, Base, Disp, Dst). + +mk_load(LdOp, Base, Offset, Dst, Scratch, Rest) when is_integer(Offset) -> + if -4096 =< Offset, Offset < 4096 -> + [mk_load(LdOp, Base, mk_simm13(Offset), Dst) | Rest]; + true -> + Index = + begin + DstReg = temp_reg(Dst), + BaseReg = temp_reg(Base), + if DstReg =/= BaseReg -> Dst; + true -> mk_scratch(Scratch) + end + end, + mk_set(Offset, Index, [mk_load(LdOp, Base, Index, Dst) | Rest]) + end. + +mk_scratch(Scratch) -> + case Scratch of + 'temp2' -> mk_temp2(); + 'new' -> mk_new_temp('untagged') + end. + +mk_fp_binary(FpBinOp, Src1, Src2, Dst) -> + #fp_binary{fp_binop=FpBinOp, src1=Src1, src2=Src2, dst=Dst}. + +mk_fp_unary(FpUnOp, Src, Dst) -> #fp_unary{fp_unop=FpUnOp, src=Src, dst=Dst}. + +mk_pseudo_fload(Base, Disp, Dst, IsSingle) -> + #pseudo_fload{base=Base, disp=Disp, dst=Dst, is_single=IsSingle}. + +mk_fload(Base, Disp, Dst, Scratch) when is_integer(Disp) -> + if -4096 =< Disp, Disp < (4096-4) -> + [mk_pseudo_fload(Base, mk_simm13(Disp), Dst, false)]; + true -> + Tmp = mk_scratch(Scratch), + mk_set(Disp, Tmp, + [mk_alu('add', Tmp, Base, Tmp), + mk_pseudo_fload(Tmp, mk_simm13(0), Dst, false)]) + end. + +mk_pseudo_fmove(Src, Dst) -> #pseudo_fmove{src=Src, dst=Dst}. +is_pseudo_fmove(I) -> case I of #pseudo_fmove{} -> true; _ -> false end. +pseudo_fmove_src(#pseudo_fmove{src=Src}) -> Src. +pseudo_fmove_dst(#pseudo_fmove{dst=Dst}) -> Dst. + +mk_pseudo_fstore(Src, Base, Disp) -> + #pseudo_fstore{src=Src, base=Base, disp=Disp}. + +mk_fstore(Src, Base, Disp, Scratch) when is_integer(Disp) -> + if -4096 =< Disp, Disp < (4096-4) -> + [mk_pseudo_fstore(Src, Base, hipe_sparc:mk_simm13(Disp))]; + true -> + Tmp = mk_scratch(Scratch), + mk_set(Disp, Tmp, + [mk_alu('add', Tmp, Base, Tmp), + mk_pseudo_fstore(Src, Tmp, mk_simm13(0))]) + end. + +mk_defun(MFA, Formals, IsClosure, IsLeaf, Code, Data, VarRange, LabelRange) -> + #defun{mfa=MFA, formals=Formals, code=Code, data=Data, + isclosure=IsClosure, isleaf=IsLeaf, + var_range=VarRange, label_range=LabelRange}. +defun_code(#defun{code=Code}) -> Code. +defun_data(#defun{data=Data}) -> Data. +defun_formals(#defun{formals=Formals}) -> Formals. +defun_is_closure(#defun{isclosure=IsClosure}) -> IsClosure. +defun_is_leaf(#defun{isleaf=IsLeaf}) -> IsLeaf. +defun_mfa(#defun{mfa=MFA}) -> MFA. +defun_var_range(#defun{var_range=VarRange}) -> VarRange. diff --git a/lib/hipe/sparc/hipe_sparc.hrl b/lib/hipe/sparc/hipe_sparc.hrl new file mode 100644 index 0000000000..107541f96a --- /dev/null +++ b/lib/hipe/sparc/hipe_sparc.hrl @@ -0,0 +1,116 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2001-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +%%%-------------------------------------------------------------------- +%%% Basic Values: +%%% +%%% temp ::= #sparc_temp{reg, type, allocatable} +%%% reg ::= +%%% type ::= tagged | untagged | double +%%% allocatable ::= true | false +%%% +%%% sdesc ::= #sparc_sdesc{exnlab, fsize, arity, live} +%%% exnlab ::= [] | label +%%% fsize ::= int32 (frame size in words) +%%% live ::= (word offsets) +%%% arity ::= uint8 +%%% +%%% mfa ::= #sparc_mfa{atom, atom, arity} +%%% prim ::= #sparc_prim{atom} + +-record(sparc_mfa, {m::atom(), f::atom(), a::arity()}). +-record(sparc_prim, {prim}). +-record(sparc_sdesc, {exnlab, fsize, arity::arity(), live}). +-record(sparc_temp, {reg, type, allocatable}). +-record(sparc_simm13, {value}). +-record(sparc_uimm5, {value}). +-record(sparc_uimm6, {value}). % shift counts in 64-bit mode +-record(sparc_uimm22, {value}). + +%%% Instruction Operands: +%%% +%%% aluop ::= add | addcc | and | andcc | or | orcc +%%% | xor | xorcc | sub | subcc | mulx | smul +%%% | sll | srl | sra | sllx | srlx | srax +%%% | ldsb | ldsh | ldsw | ldub | lduh | lduw | ldx +%%% (HW has andn{,cc}, orn{,cc}, xnor{,cc}, addc{,cc}, +%%% and subc{,cc}, but we don't use them) +%%% cond ::= n | e | le | l | leu | lu | neg | vs | +%%% | a | ne | g | ge | gu | geu | pos | vc +%%% rcond ::= z | lez | lz | nz | gz | gez +%%% stop ::= stb | stw | stx (HW has sth, but we don't use it) +%%% +%%% immediate ::= int32 | atom | {label, label_type} +%%% label_type ::= constant | closure | c_const +%%% +%%% dst ::= temp +%%% src ::= temp +%%% src1 ::= temp +%%% src2 ::= temp +%%% | simm13 (only in alu.src2, jmp.src2, jmpl.src2) +%%% base ::= src1 +%%% disp ::= src2 +%%% +%%% fun ::= mfa | prim +%%% funv ::= fun | temp +%%% +%%% fp_binop ::= faddd | fdivd | fmuld | fsubd +%%% fp_unop ::= fitod | fmovd | fnegd + +%%% Instructions: + +-record(alu, {aluop, src1, src2, dst}). +-record(bp, {'cond', label, pred}). % local jump on %icc +-ifdef(notdef). % XXX: only for sparc64, alas +-record(br, {rcond, src, label, pred}). % local jump on register +-endif. +-record(call_rec, {'fun', sdesc, linkage}). % known recursive call +-record(call_tail, {'fun', linkage}). % known tailcall +-record(comment, {term}). +-record(jmp, {src1, src2, labels}). % return, switch, or computed tailcall +-record(jmpl, {src, sdesc}). % computed recursive call (jmpl [src+0],%o7) +-record(label, {label}). +-record(pseudo_bp, {'cond', true_label, false_label, pred}). +%%-record(pseudo_br, {rcond, src, true_label, false_label, pred}). +-record(pseudo_call, {funv, sdesc, contlab, linkage}). +-record(pseudo_call_prepare, {nrstkargs}). +-record(pseudo_move, {src, dst}). +-record(pseudo_ret, {}). +-record(pseudo_set, {imm, dst}). +-record(pseudo_tailcall, {funv, arity, stkargs, linkage}). +-record(pseudo_tailcall_prepare, {}). +-record(rdy, {dst}). +-record(sethi, {uimm22, dst}). +-record(store, {stop, src, base, disp}). +-record(fp_binary, {fp_binop, src1, src2, dst}). +-record(fp_unary, {fp_unop, src, dst}). +-record(pseudo_fload, {base, disp, dst, is_single}). +-record(pseudo_fmove, {src, dst}). +-record(pseudo_fstore, {src, base, disp}). + +%%% Function definitions. + +-include("../misc/hipe_consttab.hrl"). + +-record(defun, {mfa :: mfa(), formals, code, + data :: hipe_consttab(), + isclosure :: boolean(), + isleaf :: boolean(), + var_range, label_range}). diff --git a/lib/hipe/sparc/hipe_sparc_assemble.erl b/lib/hipe/sparc/hipe_sparc_assemble.erl new file mode 100644 index 0000000000..b534fe20ec --- /dev/null +++ b/lib/hipe/sparc/hipe_sparc_assemble.erl @@ -0,0 +1,588 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2001-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +-module(hipe_sparc_assemble). +-export([assemble/4]). + +-include("../main/hipe.hrl"). % for VERSION_STRING, when_option +-include("hipe_sparc.hrl"). +-include("../../kernel/src/hipe_ext_format.hrl"). +-include("../rtl/hipe_literals.hrl"). +-include("../misc/hipe_sdi.hrl"). +-undef(ASSERT). +-define(ASSERT(G), if G -> [] ; true -> exit({assertion_failed,?MODULE,?LINE,??G}) end). + +assemble(CompiledCode, Closures, Exports, Options) -> + print("****************** Assembling *******************\n", [], Options), + %% + Code = [{MFA, + hipe_sparc:defun_code(Defun), + hipe_sparc:defun_data(Defun)} + || {MFA, Defun} <- CompiledCode], + %% + {ConstAlign,ConstSize,ConstMap,RefsFromConsts} = + hipe_pack_constants:pack_constants(Code, 4), + %% + {CodeSize,CodeBinary,AccRefs,LabelMap,ExportMap} = + encode(translate(Code, ConstMap), Options), + print("Total num bytes=~w\n", [CodeSize], Options), + %% + SC = hipe_pack_constants:slim_constmap(ConstMap), + DataRelocs = mk_data_relocs(RefsFromConsts, LabelMap), + SSE = slim_sorted_exportmap(ExportMap,Closures,Exports), + SlimRefs = hipe_pack_constants:slim_refs(AccRefs), + Bin = term_to_binary([{?VERSION_STRING(),?HIPE_SYSTEM_CRC}, + ConstAlign, ConstSize, + SC, + DataRelocs, % nee LM, LabelMap + SSE, + CodeSize,CodeBinary,SlimRefs, + 0,[] % ColdCodeSize, SlimColdRefs + ]), + %% + Bin. + +%%% +%%% Assembly Pass 1. +%%% Process initial {MFA,Code,Data} list. +%%% Translate each MFA's body, choosing operand & instruction kinds. +%%% +%%% Assembly Pass 2. +%%% Perform short/long form optimisation for jumps. +%%% +%%% Result is {MFA,NewCode,CodeSize,LabelMap} list. +%%% + +translate(Code, ConstMap) -> + translate_mfas(Code, ConstMap, []). + +translate_mfas([{MFA,Insns,_Data}|Code], ConstMap, NewCode) -> + {NewInsns,CodeSize,LabelMap} = + translate_insns(Insns, MFA, ConstMap, hipe_sdi:pass1_init(), 0, []), + translate_mfas(Code, ConstMap, [{MFA,NewInsns,CodeSize,LabelMap}|NewCode]); +translate_mfas([], _ConstMap, NewCode) -> + lists:reverse(NewCode). + +translate_insns([I|Insns], MFA, ConstMap, SdiPass1, Address, NewInsns) -> + NewIs = translate_insn(I, MFA, ConstMap), + add_insns(NewIs, Insns, MFA, ConstMap, SdiPass1, Address, NewInsns); +translate_insns([], _MFA, _ConstMap, SdiPass1, Address, NewInsns) -> + {LabelMap,CodeSizeIncr} = hipe_sdi:pass2(SdiPass1), + {lists:reverse(NewInsns), Address+CodeSizeIncr, LabelMap}. + +add_insns([I|Is], Insns, MFA, ConstMap, SdiPass1, Address, NewInsns) -> + NewSdiPass1 = + case I of + {'.label',L,_} -> + hipe_sdi:pass1_add_label(SdiPass1, Address, L); + {bp_sdi,{_,_,{label,L}},_} -> % BP has 19-bit offset + SdiInfo = #sdi_info{incr=(12-4),lb=-16#40000*4,ub=16#3FFFF*4}, + hipe_sdi:pass1_add_sdi(SdiPass1, Address, L, SdiInfo); + %% {br_sdi,_,_} -> add_insns_br(I, SdiPass1, Address); + _ -> + SdiPass1 + end, + Address1 = Address + insn_size(I), + add_insns(Is, Insns, MFA, ConstMap, NewSdiPass1, Address1, [I|NewInsns]); +add_insns([], Insns, MFA, ConstMap, SdiPass1, Address, NewInsns) -> + translate_insns(Insns, MFA, ConstMap, SdiPass1, Address, NewInsns). + +-ifdef(notdef). % XXX: only for sparc64, alas +add_insns_br(I, SdiPass1, Address) -> % BR has 16-bit offset + {br_sdi,{_,_,_,{label,L}},_} = I, + SdiInfo = #sdi_info{incr=(12-4),lb=-16#8000*4,ub=16#7FFF*4}, + hipe_sdi:pass1_add_sdi(SdiPass1, Address, L, SdiInfo). +-endif. + +insn_size(I) -> + case I of + {'.label',_,_} -> 0; + {'.reloc',_,_} -> 0; + _ -> 4 % b{p,r}_sdi included in this case + end. + +translate_insn(I, MFA, ConstMap) -> % -> [{Op,Opnd,OrigI}] + case I of + #alu{} -> do_alu(I); + #bp{} -> do_bp(I); + %% #br{} -> do_br(I); + #call_rec{} -> do_call_rec(I); + #call_tail{} -> do_call_tail(I); + #comment{} -> []; + #jmp{} -> do_jmp(I); + #jmpl{} -> do_jmpl(I); + #label{} -> do_label(I); + %% pseudo_bp: eliminated before assembly + %% pseudo_br: eliminated before assembly + %% pseudo_call: eliminated before assembly + %% pseudo_call_prepare: eliminated before assembly + %% pseudo_move: eliminated before assembly + %% pseudo_ret: eliminated before assembly + #pseudo_set{} -> do_pseudo_set(I, MFA, ConstMap); + %% pseudo_tailcall: eliminated before assembly + %% pseudo_tailcall_prepare: eliminated before assembly + #rdy{} -> do_rdy(I); + #sethi{} -> do_sethi(I); + #store{} -> do_store(I); + #fp_binary{} -> do_fp_binary(I); + #fp_unary{} -> do_fp_unary(I); + #pseudo_fload{} -> do_pseudo_fload(I); + %% #pseudo_fmove: eliminated before assembly + #pseudo_fstore{} -> do_pseudo_fstore(I); + _ -> exit({?MODULE,translate_insn,I}) + end. + +do_alu(I) -> + #alu{aluop=AluOp,src1=Src1,src2=Src2,dst=Dst} = I, + NewDst = do_reg(Dst), + NewSrc1 = do_reg(Src1), + NewSrc2 = do_reg_or_imm(Src2), + [{AluOp, {NewSrc1,NewSrc2,NewDst}, I}]. + +do_bp(I) -> + #bp{'cond'=Cond,pred=Pred,label=Label} = I, + NewLabel = {label,Label}, + case Cond of + 'a' -> + [{ba, NewLabel, I}]; % 3 more offset bits + _ -> + NewCond = {'cond',Cond}, + NewPred = {pred,Pred}, + [{bp_sdi, {NewCond,NewPred,NewLabel}, I}] + end. + +-ifdef(notdef). % XXX: only for sparc64, alas +do_br(I) -> + #br{rcond=RCond,pred=Pred,src=Src,label=Label} = I, + NewRCond = {rcond,RCond}, + NewPred = {pred,Pred}, + NewSrc = do_reg(Src), + NewLabel = {label,Label}, + [{br_sdi, {NewRCond,NewPred,NewSrc,NewLabel}, I}]. +-endif. + +do_call_rec(I) -> + #call_rec{'fun'=Fun,sdesc=SDesc,linkage=Linkage} = I, + [{'.reloc', {call,Fun,Linkage}, #comment{term='fun'}}, + {'.reloc', {sdesc,SDesc}, #comment{term=sdesc}}, + {call, {disp30,0}, I}]. + +do_call_tail(I) -> + #call_tail{'fun'=Fun,linkage=Linkage} = I, + [{'.reloc', {call,Fun,Linkage}, #comment{term='fun'}}, + {call, {disp30,0}, I}]. + +do_jmp(I) -> + #jmp{src1=Src1,src2=Src2} = I, + NewSrc1 = do_reg(Src1), + NewSrc2 = do_reg_or_imm(Src2), + NewDst = {r,0}, + [{jmpl, {NewSrc1,NewSrc2,NewDst}, I}]. + +do_jmpl(I) -> + #jmpl{src=Src,sdesc=SDesc} = I, + NewSrc1 = do_reg(Src), + NewSrc2 = {simm13,0}, + NewDst = {r,15}, % %o7 + [{'.reloc', {sdesc,SDesc}, #comment{term=sdesc}}, + {jmpl, {NewSrc1,NewSrc2,NewDst}, I}]. + +do_label(I) -> + #label{label=Label} = I, + [{'.label', Label, I}]. + +do_pseudo_set(I, MFA, ConstMap) -> + #pseudo_set{imm=Imm,dst=Dst} = I, + RelocData = + case Imm of + Atom when is_atom(Atom) -> + {load_atom, Atom}; +%%% {mfa,MFAorPrim,Linkage} -> +%%% Tag = +%%% case Linkage of +%%% remote -> remote_function; +%%% not_remote -> local_function +%%% end, +%%% {load_address, {Tag,untag_mfa_or_prim(MFAorPrim)}}; + {Label,constant} -> + ConstNo = find_const({MFA,Label}, ConstMap), + {load_address, {constant,ConstNo}}; + {Label,closure} -> + {load_address, {closure,Label}}; + {Label,c_const} -> + {load_address, {c_const,Label}} + end, + NewDst = do_reg(Dst), + [{'.reloc', RelocData, #comment{term=reloc}}, + {sethi, {{uimm22,0},NewDst}, I}, + {'or', {NewDst,{simm13,0},NewDst}, I}]. + +do_rdy(I) -> + #rdy{dst=Dst} = I, + NewDst = do_reg(Dst), + [{rd, {y,NewDst}, I}]. + +do_sethi(I) -> + #sethi{uimm22=#sparc_uimm22{value=UImm22},dst=Dst} = I, + NewUImm22 = {uimm22,UImm22}, + NewDst = do_reg(Dst), + [{sethi, {NewUImm22,NewDst}, I}]. + +do_store(I) -> + #store{stop=StOp,src=Src,base=Base,disp=Disp} = I, + NewSrc = do_reg(Src), + NewBase = do_reg(Base), + NewDisp = do_reg_or_imm(Disp), + [{StOp, {NewSrc,NewBase,NewDisp}, I}]. + +do_fp_binary(I) -> + #fp_binary{fp_binop=FpBinOp,src1=Src1,src2=Src2,dst=Dst} = I, + NewSrc1 = do_fpreg(Src1), + NewSrc2 = do_fpreg(Src2), + NewDst = do_fpreg(Dst), + [{FpBinOp, {NewSrc1,NewSrc2,NewDst}, I}]. + +do_fp_unary(I) -> + #fp_unary{fp_unop=FpUnOp,src=Src,dst=Dst} = I, + NewSrc = do_fpreg(Src), + NewDst = do_fpreg(Dst), + [{FpUnOp, {NewSrc,NewDst}, I}]. + +do_pseudo_fload(I) -> + #pseudo_fload{base=Base,disp=Disp,dst=Dst,is_single=IsSingle} = I, + NewBase = do_reg(Base), + #sparc_simm13{value=RawDisp} = Disp, + {fr,RawDst} = FrRawDst = do_fpreg(Dst), + case IsSingle of + true -> + [{'ldf', {NewBase,{simm13,RawDisp},FrRawDst}, I}]; + _ -> + [{'ldf', {NewBase,{simm13,RawDisp},FrRawDst}, I}, + {'ldf', {NewBase,{simm13,RawDisp+4},{fr,RawDst+1}}, I}] + end. + +do_pseudo_fstore(I) -> + #pseudo_fstore{src=Src,base=Base,disp=Disp} = I, + {fr,RawSrc} = FrRawSrc = do_fpreg(Src), + NewBase = do_reg(Base), + #sparc_simm13{value=RawDisp} = Disp, + [{'stf', {FrRawSrc,NewBase,{simm13,RawDisp}}, I}, + {'stf', {{fr,RawSrc+1},NewBase,{simm13,RawDisp+4}}, I}]. + +%% map a virtual double-precision fp reg in [0,15] to its +%% corresponding single-precision fp reg in [0,2,4,...,28,30] +do_fpreg(#sparc_temp{reg=Reg,type='double'}) + when is_integer(Reg), 0 =< Reg, Reg < 16 -> + {fr,2*Reg}. + +do_reg(#sparc_temp{reg=Reg,type=Type}) + when is_integer(Reg), 0 =< Reg, Reg < 32, Type =/= 'double' -> + {r,Reg}. + +do_reg_or_imm(Src) -> + case Src of + #sparc_temp{} -> + do_reg(Src); + #sparc_simm13{value=Value} when is_integer(Value), -4096 =< Value, Value =< 4095 -> + {simm13, Value band 16#1fff}; + #sparc_uimm5{value=Value} when is_integer(Value), 0 =< Value, Value =< 31 -> + {uimm5, Value}; + #sparc_uimm6{value=Value} when is_integer(Value), 0 =< Value, Value =< 63 -> + {uimm6, Value} + end. + +%%% +%%% Assembly Pass 3. +%%% Process final {MFA,Code,CodeSize,LabelMap} list from pass 2. +%%% Translate to a single binary code segment. +%%% Collect relocation patches. +%%% Build ExportMap (MFA-to-address mapping). +%%% Combine LabelMaps to a single one (for mk_data_relocs/2 compatibility). +%%% Return {CombinedCodeSize,BinaryCode,Relocs,CombinedLabelMap,ExportMap}. +%%% + +encode(Code, Options) -> + CodeSize = compute_code_size(Code, 0), + ExportMap = build_export_map(Code, 0, []), + {AccCode,Relocs} = encode_mfas(Code, 0, [], [], Options), + CodeBinary = list_to_binary(lists:reverse(AccCode)), + ?ASSERT(CodeSize =:= byte_size(CodeBinary)), + CombinedLabelMap = combine_label_maps(Code, 0, gb_trees:empty()), + {CodeSize,CodeBinary,Relocs,CombinedLabelMap,ExportMap}. + +compute_code_size([{_MFA,_Insns,CodeSize,_LabelMap}|Code], Size) -> + compute_code_size(Code, Size+CodeSize); +compute_code_size([], Size) -> Size. + +build_export_map([{{M,F,A},_Insns,CodeSize,_LabelMap}|Code], Address, ExportMap) -> + build_export_map(Code, Address+CodeSize, [{Address,M,F,A}|ExportMap]); +build_export_map([], _Address, ExportMap) -> ExportMap. + +combine_label_maps([{MFA,_Insns,CodeSize,LabelMap}|Code], Address, CLM) -> + NewCLM = merge_label_map(gb_trees:to_list(LabelMap), MFA, Address, CLM), + combine_label_maps(Code, Address+CodeSize, NewCLM); +combine_label_maps([], _Address, CLM) -> CLM. + +merge_label_map([{Label,Offset}|Rest], MFA, Address, CLM) -> + NewCLM = gb_trees:insert({MFA,Label}, Address+Offset, CLM), + merge_label_map(Rest, MFA, Address, NewCLM); +merge_label_map([], _MFA, _Address, CLM) -> CLM. + +encode_mfas([{MFA,Insns,CodeSize,LabelMap}|Code], Address, AccCode, Relocs, Options) -> + print("Generating code for: ~w\n", [MFA], Options), + print("Offset | Opcode | Instruction\n", [], Options), + {Address1,Relocs1,AccCode1} = + encode_insns(Insns, Address, Address, LabelMap, Relocs, AccCode, Options), + ExpectedAddress = Address + CodeSize, + ?ASSERT(Address1 =:= ExpectedAddress), + print("Finished.\n", [], Options), + encode_mfas(Code, Address1, AccCode1, Relocs1, Options); +encode_mfas([], _Address, AccCode, Relocs, _Options) -> + {AccCode,Relocs}. + +encode_insns([I|Insns], Address, FunAddress, LabelMap, Relocs, AccCode, Options) -> + case I of + {'.label',L,_} -> + LabelAddress = gb_trees:get(L, LabelMap) + FunAddress, + ?ASSERT(Address =:= LabelAddress), % sanity check + print_insn(Address, [], I, Options), + encode_insns(Insns, Address, FunAddress, LabelMap, Relocs, AccCode, Options); + {'.reloc',Data,_} -> + Reloc = encode_reloc(Data, Address, FunAddress, LabelMap), + encode_insns(Insns, Address, FunAddress, LabelMap, [Reloc|Relocs], AccCode, Options); + {bp_sdi,_,_} -> + encode_insns(fix_bp_sdi(I, Insns, Address, FunAddress, LabelMap), + Address, FunAddress, LabelMap, Relocs, AccCode, Options); + %% {br_sdi,_,_} -> + %% encode_insns(fix_br_sdi(I, Insns, Address, FunAddress, LabelMap), + %% Address, FunAddress, LabelMap, Relocs, AccCode, Options); + _ -> + {Op,Arg,_} = fix_jumps(I, Address, FunAddress, LabelMap), + Word = hipe_sparc_encode:insn_encode(Op, Arg), + print_insn(Address, Word, I, Options), + Segment = <>, + NewAccCode = [Segment|AccCode], + encode_insns(Insns, Address+4, FunAddress, LabelMap, Relocs, NewAccCode, Options) + end; +encode_insns([], Address, _FunAddress, _LabelMap, Relocs, AccCode, _Options) -> + {Address,Relocs,AccCode}. + +encode_reloc(Data, Address, FunAddress, LabelMap) -> + case Data of + {call,MFAorPrim,Linkage} -> + %% call_rec and call_tail are patched the same, so no need to distinguish + %% call from tailcall + PatchTypeExt = + case Linkage of + remote -> ?CALL_REMOTE; + not_remote -> ?CALL_LOCAL + end, + {PatchTypeExt, Address, untag_mfa_or_prim(MFAorPrim)}; + {load_atom,Atom} -> + {?LOAD_ATOM, Address, Atom}; + {load_address,X} -> + {?LOAD_ADDRESS, Address, X}; + {sdesc,SDesc} -> + #sparc_sdesc{exnlab=ExnLab,fsize=FSize,arity=Arity,live=Live} = SDesc, + ExnRA = + case ExnLab of + [] -> []; % don't cons up a new one + ExnLab -> gb_trees:get(ExnLab, LabelMap) + FunAddress + end, + {?SDESC, Address, + ?STACK_DESC(ExnRA, FSize, Arity, Live)} + end. + +untag_mfa_or_prim(#sparc_mfa{m=M,f=F,a=A}) -> {M,F,A}; +untag_mfa_or_prim(#sparc_prim{prim=Prim}) -> Prim. + +fix_bp_sdi(I, Insns, InsnAddress, FunAddress, LabelMap) -> + {bp_sdi,Opnds,OrigI} = I, + {{'cond',Cond},{pred,Pred},Label} = Opnds, + {label,L} = Label, + LabelAddress = gb_trees:get(L, LabelMap) + FunAddress, + BD = (LabelAddress - InsnAddress) div 4, + if BD >= -16#40000, BD =< 16#3FFFF -> + [{bp, Opnds, OrigI} | Insns]; + true -> + %% bp, L; Delay + %% --> + %% bp, 1f; Delay; ba L; nop; 1: + [Delay|Rest] = Insns, + NewCond = hipe_sparc:negate_cond(Cond), + NewPred = 1.0 - Pred, + [{bp, + {{'cond',NewCond},{pred,NewPred},'.+16'}, + #bp{'cond'=NewCond,pred=NewPred,label='.+16'}}, % pp will be ugly + Delay, % should be a NOP + {ba, Label, #bp{'cond'='a',pred=1.0,label=L}}, + {sethi, {{uimm22,0},{r,0}}, #comment{term=nop}} | + Rest] + end. + +-ifdef(notdef). % XXX: only for sparc64, alas +fix_br_sdi(I, Insns, InsnAddress, FunAddress, LabelMap) -> + {br_sdi,Opnds,OrigI} = I, + {{rcond,RCond},{pred,Pred},Src,{label,L}} = Opnds, + LabelAddress = gb_trees:get(L, LabelMap) + FunAddress, + BD = (LabelAddress - InsnAddress) div 4, + if BD >= -16#8000, BD =< 16#7FFF -> + [{br, Opnds, OrigI} | Insns]; + true -> + %% br, reg, L; Delay + %% --> + %% br, reg, 1f; Delay; ba L; nop; 1: + [Delay|Rest] = Insns, + {reg,SrcReg} = Src, + NewRCond = hipe_sparc:negate_rcond(RCond), + NewPred = 1.0 - Pred, + [{br, + {{rcond,NewRCond},{pred,NewPred},Src,'.+16'}, + #br{rcond=NewRCond,pred=NewPred,src=SrcReg,label='.+16'}}, % pp will be ugly + Delay, % should be a NOP + {ba, {label,L}, #bp{'cond'='a',pred=1.0,label=L}}, + {sethi, {{uimm22,0},{r,0}}, #comment{term=nop}} | + Rest] + end. +-endif. + +fix_jumps(I, InsnAddress, FunAddress, LabelMap) -> + case I of + {ba, {label,L}, OrigI} -> + LabelAddress = gb_trees:get(L, LabelMap) + FunAddress, + BD = (LabelAddress - InsnAddress) div 4, + %% ensure BD fits in a 22 bit sign-extended field + ?ASSERT(BD =< 16#1FFFFF), + ?ASSERT(BD >= -16#200000), + {ba, {disp22,BD band 16#3FFFFF}, OrigI}; + {bp, {Cond,Pred,Target}, OrigI} -> + LabelAddress = + case Target of + {label,L} -> gb_trees:get(L, LabelMap) + FunAddress; + '.+16' -> InsnAddress + 16 + end, + BD = (LabelAddress - InsnAddress) div 4, + %% ensure BD fits in a 19 bit sign-extended field + ?ASSERT(BD =< 16#3FFFF), + ?ASSERT(BD >= -16#40000), + {bp, {Cond,px(Pred),{disp19,BD band 16#7FFFF}}, OrigI}; + %% {br, _, _} -> fix_br(I, InsnAddress, FunAddress, LabelMap); + _ -> I + end. + +-ifdef(notdef). % XXX: only for sparc64, alas +fix_br(I, InsnAddress, FunAddress, LabelMap) -> + {br, {RCond,Pred,Src,Target}, OrigI} = I, + LabelAddress = + case Target of + {label,L} -> gb_trees:get(L, LabelMap) + FunAddress; + '.+16' -> InsnAddress + 16 + end, + BD = (LabelAddress - InsnAddress) div 4, + %% ensure BD fits in a 16 bit sign-extended field + ?ASSERT(BD =< 16#7FFF), + ?ASSERT(BD >= -16#8000), + {br, {RCond,px(Pred),Src,{disp16,BD band 16#FFFF}}, OrigI}. +-endif. + +px({pred,Pred}) -> % XXX: use pt/pn throughout entire backend + {pred, if Pred >= 0.5 -> 'pt'; true -> 'pn' end}. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +mk_data_relocs(RefsFromConsts, LabelMap) -> + lists:flatten(mk_data_relocs(RefsFromConsts, LabelMap, [])). + +mk_data_relocs([{MFA,Labels} | Rest], LabelMap, Acc) -> + Map = [case Label of + {L,Pos} -> + Offset = find({MFA,L}, LabelMap), + {Pos,Offset}; + {sorted,Base,OrderedLabels} -> + {sorted, Base, [begin + Offset = find({MFA,L}, LabelMap), + {Order, Offset} + end + || {L,Order} <- OrderedLabels]} + end + || Label <- Labels], + %% msg("Map: ~w Map\n",[Map]), + mk_data_relocs(Rest, LabelMap, [Map,Acc]); +mk_data_relocs([],_,Acc) -> Acc. + +find({_MFA,_L} = MFAL, LabelMap) -> + gb_trees:get(MFAL, LabelMap). + +slim_sorted_exportmap([{Addr,M,F,A}|Rest], Closures, Exports) -> + IsClosure = lists:member({M,F,A}, Closures), + IsExported = is_exported(F, A, Exports), + [Addr,M,F,A,IsClosure,IsExported | slim_sorted_exportmap(Rest, Closures, Exports)]; +slim_sorted_exportmap([],_,_) -> []. + +is_exported(F, A, Exports) -> lists:member({F,A}, Exports). + +%%% +%%% Assembly listing support (pp_asm option). +%%% + +print(String, Arglist, Options) -> + ?when_option(pp_asm, Options, io:format(String, Arglist)). + +print_insn(Address, Word, I, Options) -> + ?when_option(pp_asm, Options, print_insn_2(Address, Word, I)). + +print_insn_2(Address, Word, {_,_,OrigI}) -> + io:format("~8.16.0b | ", [Address]), + print_code_list(word_to_bytes(Word), 0), + hipe_sparc_pp:pp_insn(OrigI). + +word_to_bytes(W) -> + case W of + [] -> []; % label or other pseudo instruction + _ -> [(W bsr 24) band 16#FF, (W bsr 16) band 16#FF, + (W bsr 8) band 16#FF, W band 16#FF] + end. + +print_code_list([Byte|Rest], Len) -> + print_byte(Byte), + print_code_list(Rest, Len+1); +print_code_list([], Len) -> + fill_spaces(8-(Len*2)), + io:format(" | "). + +print_byte(Byte) -> + io:format("~2.16.0b", [Byte band 16#FF]). + +fill_spaces(N) when N > 0 -> + io:format(" "), + fill_spaces(N-1); +fill_spaces(0) -> + []. + +%%% +%%% Lookup a constant in a ConstMap. +%%% + +find_const({MFA,Label},[{pcm_entry,MFA,Label,ConstNo,_,_,_}|_]) -> + ConstNo; +find_const(N,[_|R]) -> + find_const(N,R); +find_const(C,[]) -> + ?EXIT({constant_not_found,C}). diff --git a/lib/hipe/sparc/hipe_sparc_cfg.erl b/lib/hipe/sparc/hipe_sparc_cfg.erl new file mode 100644 index 0000000000..d938a3bdf1 --- /dev/null +++ b/lib/hipe/sparc/hipe_sparc_cfg.erl @@ -0,0 +1,134 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2001-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +-module(hipe_sparc_cfg). + +-export([init/1, + labels/1, start_label/1, + succ/2, + bb/2, bb_add/3]). +-export([postorder/1, reverse_postorder/1]). +-export([linearise/1]). +-export([params/1]). +-export([arity/1]). % for linear scan + +-define(SPARC_CFG, true). % needed for cfg.inc + +-include("../main/hipe.hrl"). +-include("hipe_sparc.hrl"). +-include("../flow/cfg.hrl"). +-include("../flow/cfg.inc"). + +%%---------------------------------------------------------------------------- +%% CFG interface to SPARC +%%---------------------------------------------------------------------------- + +init(Defun) -> + Code = hipe_sparc:defun_code(Defun), + StartLab = hipe_sparc:label_label(hd(Code)), + Data = hipe_sparc:defun_data(Defun), + IsClosure = hipe_sparc:defun_is_closure(Defun), + Name = hipe_sparc:defun_mfa(Defun), + IsLeaf = hipe_sparc:defun_is_leaf(Defun), + Formals = hipe_sparc:defun_formals(Defun), + CFG = mk_empty_cfg(Name, StartLab, Data, IsClosure, IsLeaf, Formals), + take_bbs(Code, CFG). + +is_branch(I) -> + case I of + #bp{'cond'='a'} -> true; + %% not br + #call_tail{} -> true; + #jmp{} -> true; + %% not jmpl + #pseudo_bp{} -> true; + %% #pseudo_br{} -> true; + #pseudo_call{} -> true; + #pseudo_ret{} -> true; + #pseudo_tailcall{} -> true; + _ -> false + end. + +branch_successors(Branch) -> + case Branch of + #bp{'cond'='a',label=Label} -> [Label]; + #call_tail{} -> []; + #jmp{labels=Labels} -> Labels; + #pseudo_bp{true_label=TrueLab,false_label=FalseLab} -> [FalseLab,TrueLab]; + %% #pseudo_br{true_label=TrueLab,false_label=FalseLab} -> [FalseLab,TrueLab]; + #pseudo_call{contlab=ContLab, sdesc=#sparc_sdesc{exnlab=ExnLab}} -> + case ExnLab of + [] -> [ContLab]; + _ -> [ContLab,ExnLab] + end; + #pseudo_ret{} -> []; + #pseudo_tailcall{} -> [] + end. + +-ifdef(REMOVE_TRIVIAL_BBS_NEEDED). +fails_to(_Instr) -> []. +-endif. + +-ifdef(notdef). +redirect_jmp(I, Old, New) -> + case I of + #b_label{label=Label} -> + if Old =:= Label -> I#b_label{label=New}; + true -> I + end; + #pseudo_bc{true_label=TrueLab, false_label=FalseLab} -> + I1 = if Old =:= TrueLab -> I#pseudo_bc{true_label=New}; + true -> I + end, + if Old =:= FalseLab -> I1#pseudo_bc{false_label=New}; + true -> I1 + end; + %% handle pseudo_call too? + _ -> I + end. +-endif. + +mk_goto(Label) -> + hipe_sparc:mk_b_label(Label). + +is_label(I) -> + hipe_sparc:is_label(I). + +label_name(Label) -> + hipe_sparc:label_label(Label). + +mk_label(Name) -> + hipe_sparc:mk_label(Name). + +linearise(CFG) -> % -> defun, not insn list + MFA = function(CFG), + Formals = params(CFG), + Code = linearize_cfg(CFG), + Data = data(CFG), + VarRange = hipe_gensym:var_range(sparc), + LabelRange = hipe_gensym:label_range(sparc), + IsClosure = is_closure(CFG), + IsLeaf = is_leaf(CFG), + hipe_sparc:mk_defun(MFA, Formals, IsClosure, IsLeaf, + Code, Data, VarRange, LabelRange). + +arity(CFG) -> + {_M, _F, A} = function(CFG), + A. diff --git a/lib/hipe/sparc/hipe_sparc_defuse.erl b/lib/hipe/sparc/hipe_sparc_defuse.erl new file mode 100644 index 0000000000..d59ad436b5 --- /dev/null +++ b/lib/hipe/sparc/hipe_sparc_defuse.erl @@ -0,0 +1,143 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2007-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +-module(hipe_sparc_defuse). +-export([insn_def_all/1, insn_use_all/1]). +-export([insn_def_gpr/1, insn_use_gpr/1]). +-export([insn_def_fpr/1, insn_use_fpr/1]). +-include("hipe_sparc.hrl"). + +%%% +%%% Defs and uses for both general-purpose and floating-point registers. +%%% This is needed for the frame module, alas. +%%% +insn_def_all(I) -> + addtemps(insn_def_fpr(I), insn_def_gpr(I)). + +insn_use_all(I) -> + addtemps(insn_use_fpr(I), insn_use_gpr(I)). + +%%% +%%% Defs and uses for general-purpose (integer) registers only. +%%% +insn_def_gpr(I) -> + case I of + #alu{dst=Dst} -> [Dst]; + %% #jmpl{} -> [hipe_sparc:mk_ra()]; % XXX: can jmpl occur this early? + #pseudo_call{} -> call_clobbered_gpr(); + #pseudo_move{dst=Dst} -> [Dst]; + #pseudo_set{dst=Dst} -> [Dst]; + #pseudo_tailcall_prepare{} -> tailcall_clobbered_gpr(); + #rdy{dst=Dst} -> [Dst]; + #sethi{dst=Dst} -> [Dst]; + _ -> [] + end. + +call_clobbered_gpr() -> + [hipe_sparc:mk_temp(R, T) + || {R,T} <- hipe_sparc_registers:call_clobbered() ++ all_fp_pseudos()]. + +all_fp_pseudos() -> []. % XXX: for now + +tailcall_clobbered_gpr() -> + [hipe_sparc:mk_temp(R, T) + || {R,T} <- hipe_sparc_registers:tailcall_clobbered() ++ all_fp_pseudos()]. + +insn_use_gpr(I) -> + case I of + #alu{src1=Src1,src2=Src2} -> addsrc(Src2, [Src1]); + %% #br{src=Src} -> [Src]; % XXX: can br occur this early? + #jmp{src1=Src1,src2=Src2} -> addsrc(Src2, [Src1]); + %% #jmpl{src=Src} -> [Src]; % XXX: can jmpl occur this early? + %% #pseudo_br{src=Src} -> [Src]; + #pseudo_call{funv=FunV,sdesc=#sparc_sdesc{arity=Arity}} -> + funv_use(FunV, arity_use_gpr(Arity)); + #pseudo_move{src=Src} -> [Src]; + #pseudo_ret{} -> [hipe_sparc:mk_rv()]; + #pseudo_tailcall{funv=FunV,arity=Arity,stkargs=StkArgs} -> + addsrcs(StkArgs, addtemps(tailcall_clobbered_gpr(), funv_use(FunV, arity_use_gpr(Arity)))); + #store{src=Src,base=Base,disp=Disp} -> + addtemp(Src, addsrc(Disp, [Base])); + #pseudo_fload{base=Base} -> [Base]; + #pseudo_fstore{base=Base} -> [Base]; + _ -> [] + end. + +arity_use_gpr(Arity) -> + [hipe_sparc:mk_temp(R, 'tagged') + || R <- hipe_sparc_registers:args(Arity)]. + +funv_use(FunV, Set) -> + case FunV of + #sparc_temp{} -> addtemp(FunV, Set); + _ -> Set + end. + +addsrcs([Arg|Args], Set) -> + addsrcs(Args, addsrc(Arg, Set)); +addsrcs([], Set) -> + Set. + +addsrc(Src, Set) -> + case Src of + #sparc_temp{} -> addtemp(Src, Set); + _ -> Set + end. + +%%% +%%% Defs and uses for floating-point registers only. +%%% +insn_def_fpr(I) -> + case I of + #pseudo_call{} -> call_clobbered_fpr(); + #fp_binary{dst=Dst} -> [Dst]; + #fp_unary{dst=Dst} -> [Dst]; + #pseudo_fload{dst=Dst} -> [Dst]; + #pseudo_fmove{dst=Dst} -> [Dst]; + _ -> [] + end. + +call_clobbered_fpr() -> + [hipe_sparc:mk_temp(R, 'double') || R <- hipe_sparc_registers:allocatable_fpr()]. + +insn_use_fpr(I) -> + case I of + #fp_binary{src1=Src1,src2=Src2} -> addtemp(Src1, [Src2]); + #fp_unary{src=Src} -> [Src]; + #pseudo_fmove{src=Src} -> [Src]; + #pseudo_fstore{src=Src} -> [Src]; + _ -> [] + end. + +%%% +%%% Auxiliary operations on sets of temps +%%% These sets are small. No point using gb_trees, right? +%%% + +addtemps([Arg|Args], Set) -> + addtemps(Args, addtemp(Arg, Set)); +addtemps([], Set) -> + Set. + +addtemp(Temp, Set) -> + case lists:member(Temp, Set) of + false -> [Temp|Set]; + _ -> Set + end. diff --git a/lib/hipe/sparc/hipe_sparc_encode.erl b/lib/hipe/sparc/hipe_sparc_encode.erl new file mode 100644 index 0000000000..8a28f33ab9 --- /dev/null +++ b/lib/hipe/sparc/hipe_sparc_encode.erl @@ -0,0 +1,476 @@ +%%% -*- erlang-indent-level: 2 -*- +%%% +%%% %CopyrightBegin% +%%% +%%% Copyright Ericsson AB 2007-2009. All Rights Reserved. +%%% +%%% The contents of this file are subject to the Erlang Public License, +%%% Version 1.1, (the "License"); you may not use this file except in +%%% compliance with the License. You should have received a copy of the +%%% Erlang Public License along with this software. If not, it can be +%%% retrieved online at http://www.erlang.org/. +%%% +%%% Software distributed under the License is distributed on an "AS IS" +%%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%%% the License for the specific language governing rights and limitations +%%% under the License. +%%% +%%% %CopyrightEnd% +%%% +%%% Encode symbolic SPARC instructions to binary form. +%%% Copyright (C) 2007-2008 Mikael Pettersson + +-module(hipe_sparc_encode). + +-export([insn_encode/2]). + +%%-define(TESTING,1). +-ifdef(TESTING). +-export([dotest/0, dotest/1]). +-endif. + +-define(ASSERT(G), + if G -> []; + true -> exit({assertion_failed,?MODULE,?LINE,??G}) + end). + +bf(LeftBit, RightBit, Value) -> + ?ASSERT(32 > LeftBit), + ?ASSERT(LeftBit >= RightBit), + ?ASSERT(RightBit >= 0), + ?ASSERT(Value >= 0), + ?ASSERT(Value < (1 bsl ((LeftBit - RightBit) + 1))), + Value bsl RightBit. + +-define(BF(LB,RB,V), bf(LB,RB,V)). +-define(BIT(Pos,Val), ?BF(Pos,Pos,Val)). +%%-define(BITS(N,Val), ?BF(N,0,Val)). + +%%% +%%% Instruction Formats +%%% + +format1(Disp30) -> + ?BIT(30,1) bor ?BF(29,0,Disp30). + +format2a(Rd, Op2, Imm22) -> + ?BF(29,25,Rd) bor ?BF(24,22,Op2) bor ?BF(21,0,Imm22). + +format2b(A, Cond, Op2, Disp22) -> + ?BIT(29,A) bor ?BF(28,25,Cond) bor ?BF(24,22,Op2) bor ?BF(21,0,Disp22). + +format2c(A, Cond, Op2, CC1, CC0, P, Disp19) -> + ?BIT(29,A) bor ?BF(28,25,Cond) bor ?BF(24,22,Op2) bor ?BIT(21,CC1) + bor ?BIT(20,CC0) bor ?BIT(19,P) bor ?BF(18,0,Disp19). + +format2d(A, RCond, Op2, P, Rs1, Disp16) -> + D16Hi = Disp16 bsr 14, + D16Lo = Disp16 band 16#3FFF, + ?BIT(29,A) bor ?BF(27,25,RCond) bor ?BF(24,22,Op2) bor ?BF(21,20,D16Hi) + bor ?BIT(19,P) bor ?BF(18,14,Rs1) bor ?BF(13,0,D16Lo). + +format3common(Op, Rd, Op3, Rs1) -> % format 3, bits 31..14 + ?BF(31,30,Op) bor ?BF(29,25,Rd) bor ?BF(24,19,Op3) bor ?BF(18,14,Rs1). + +format3a(Op, Rd, Op3, Rs1, Rs2) -> + format3common(Op, Rd, Op3, Rs1) bor ?BF(4,0,Rs2). + +format3ax(Op, Rd, Op3, Rs1, Rs2) -> + format3a(Op, Rd, Op3, Rs1, Rs2) bor ?BIT(12,1). + +format3b(Op, Rd, Op3, Rs1, Simm13) -> + format3common(Op, Rd, Op3, Rs1) bor ?BIT(13,1) bor ?BF(12,0,Simm13). + +format3b32(Op, Rd, Op3, Rs1, Shcnt32) -> + format3a(Op, Rd, Op3, Rs1, Shcnt32) bor ?BIT(13,1). + +format3b64(Op, Rd, Op3, Rs1, Shcnt64) -> + format3common(Op, Rd, Op3, Rs1) bor ?BIT(13,1) bor ?BF(5,0,Shcnt64). + +format3ab(Op, {r,Rd}, Op3, {r,Rs1}, Src2) -> + case Src2 of + {r,Rs2} -> + format3a(Op, Rd, Op3, Rs1, Rs2); + {simm13,Simm13} -> + format3b(Op, Rd, Op3, Rs1, Simm13) + end. + +format3ab({Rs1,Src2,Rd}, Op3, Op) -> format3ab(Op, Rd, Op3, Rs1, Src2). + +-ifdef(notdef). +format3c(Op, Rd, Op3, Rs1, Opf, Rs2) -> + format3h(Op, Rd, Op3, Rs1) bor (Opf bsl 5) bor Rs2. + +format3d(Op, Rd, Op3, Rs1, I, Rs2) -> + format3h(Op, Rd, Op3, Rs1) bor (I bsl 13) bor Rs2. +-endif. + +%%% +%%% Instruction Operands +%%% + +'cond'(Cond) -> + case Cond of + 'n' -> 2#0000; + 'e' -> 2#0001; + 'le' -> 2#0010; + 'l' -> 2#0011; + 'leu' -> 2#0100; + 'lu' -> 2#0101; % a.k.a. 'cs' + 'neg' -> 2#0110; + 'vs' -> 2#0111; + 'a' -> 2#1000; + 'ne' -> 2#1001; + 'g' -> 2#1010; + 'ge' -> 2#1011; + 'gu' -> 2#1100; + 'geu' -> 2#1101; % a.k.a. 'cc' + 'pos' -> 2#1110; + 'vc' -> 2#1111 + end. + +rcond(RCond) -> + case RCond of + 'z' -> 2#001; + 'lez' -> 2#010; + 'lz' -> 2#011; + 'nz' -> 2#101; + 'gz' -> 2#110; + 'gez' -> 2#111 + end. + +pred(Pred) -> + case Pred of + 'pt' -> 1; + 'pn' -> 0 + end. + +%%% +%%% Branch Instructions +%%% + +call({disp30,Disp30}) -> + format1(Disp30). + +ba({disp22,Disp22}) -> % V7 Bicc, only used for unconditional branches + format2b(0, 'cond'('a'), 2#010, Disp22). + +bp({{'cond',Cond},{pred,Pred},{disp19,Disp19}}) -> + %% XXX: sparc64 will need CC1=1 here + format2c(0, 'cond'(Cond), 2#001, 0, 0, pred(Pred), Disp19). + +br({{rcond,RCond},{pred,Pred},{r,Rs1},{disp16,Disp16}}) -> + format2d(0, rcond(RCond), 2#011, pred(Pred), Rs1, Disp16). + +%%% +%%% Integer Arithmetic Instructions +%%% + +alu(Opnds, Op3) -> format3ab(Opnds, Op3, 2#10). + +add(Opnds) -> alu(Opnds, 2#000000). +addcc(Opnds) -> alu(Opnds, 2#010000). +%%addc(Opnds) -> alu(Opnds, 2#001000). +%%addccc(Opnds) -> alu(Opnds, 2#011000). + +sub(Opnds) -> alu(Opnds, 2#000100). +subcc(Opnds) -> alu(Opnds, 2#010100). +%%subc(Opnds) -> alu(Opnds, 2#001100). % XXX: hipe_sparc_op has bug here +%%subccc(Opnds) -> alu(Opnds, 2#011100). % XXX: hipe_sparc_op has bug here + +%%taddcc(Opnds) -> alu(Opnds, 2#100000). +%%taddcctv(Opnds) -> alu(Opnds, 2#100010). + +%%tsubcc(Opnds) -> alu(Opnds, 2#100001). +%%tsubcctv(Opnds) -> alu(Opnds, 2#100011). + +mulx(Opnds) -> alu(Opnds, 2#001001). +%%sdivx(Opnds) -> alu(Opnds, 2#101101). +%%udivx(Opnds) -> alu(Opnds, 2#001101). + +%%umul(Opnds) -> alu(Opnds, 2#001010). +smul(Opnds) -> alu(Opnds, 2#001011). +%%umulcc(Opnds) -> alu(Opnds, 2#011010). +%%smulcc(Opnds) -> alu(Opnds, 2#011011). + +'and'(Opnds) -> alu(Opnds, 2#000001). +andcc(Opnds) -> alu(Opnds, 2#010001). +%%andn(Opnds) -> alu(Opnds, 2#000101). +%%andncc(Opnds) -> alu(Opnds, 2#010101). + +'or'(Opnds) -> alu(Opnds, 2#000010). +orcc(Opnds) -> alu(Opnds, 2#010010). +%%orn(Opnds) -> alu(Opnds, 2#000110). +%%orncc(Opnds) -> alu(Opnds, 2#010110). + +'xor'(Opnds) -> alu(Opnds, 2#000011). +xorcc(Opnds) -> alu(Opnds, 2#010011). +%%xnor(Opnds) -> alu(Opnds, 2#000111). +%%xnorcc(Opnds) -> alu(Opnds, 2#010111). + +shift32({{r,Rs1},Src2,{r,Rd}}, Op3) -> + case Src2 of + {r,Rs2} -> + format3a(2#10, Rd, Op3, Rs1, Rs2); + {uimm5,Shcnt32} -> + format3b32(2#10, Rd, Op3, Rs1, Shcnt32) + end. + +shift64({{r,Rs1},Src2,{r,Rd}}, Op3) -> + case Src2 of + {r,Rs2} -> + format3ax(2#10, Rd, Op3, Rs1, Rs2); + {uimm6,Shcnt64} -> + format3b64(2#10, Rd, Op3, Rs1, Shcnt64) + end. + +sll(Opnds) -> shift32(Opnds, 2#100101). +sllx(Opnds) -> shift64(Opnds, 2#100101). +srl(Opnds) -> shift32(Opnds, 2#100110). +srlx(Opnds) -> shift64(Opnds, 2#100110). +sra(Opnds) -> shift32(Opnds, 2#100111). +srax(Opnds) -> shift64(Opnds, 2#100111). + +jmpl(Opnds) -> alu(Opnds, 2#111000). + +rd({y,{r,Rd}}) -> format3a(2#10, Rd, 2#101000, 0, 0). + +sethi({{uimm22,UImm22},{r,Rd}}) -> format2a(Rd, 2#100, UImm22). + +ld(Opnds, Op3) -> format3ab(Opnds, Op3, 2#11). + +ldsb(Opnds) -> ld(Opnds, 2#001001). +ldsh(Opnds) -> ld(Opnds, 2#001010). +ldsw(Opnds) -> ld(Opnds, 2#001000). +ldub(Opnds) -> ld(Opnds, 2#000001). +lduh(Opnds) -> ld(Opnds, 2#000010). +lduw(Opnds) -> ld(Opnds, 2#000000). +ldx(Opnds) -> ld(Opnds, 2#001011). +%%ldd(Opnds) -> ld(Opnds, 2#000011). + +st({Rd,Rs1,Src2}, Op3) -> format3ab(2#11, Rd, Op3, Rs1, Src2). + +stb(Opnds) -> st(Opnds, 2#000101). +%%sth(Opnds) -> st(Opnds, 2#000110). +stw(Opnds) -> st(Opnds, 2#000100). +stx(Opnds) -> st(Opnds, 2#001110). +%%std(Opnds) -> st(Opnds, 2#000111). + +%%% +%%% Floating-Point Instructions +%%% + +format3f(Rd, Rs1, Opf, Rs2) -> + format3a(2#10, Rd, 2#110100, Rs1, Rs2) bor ?BF(13,5,Opf). + +fpop1binary(Opf, {{fr,Rs1},{fr,Rs2},{fr,Rd}}) -> + format3f(Rd, Rs1, Opf, Rs2). + +faddd(Opnds) -> fpop1binary(2#001000010, Opnds). +fdivd(Opnds) -> fpop1binary(2#001001110, Opnds). +fmuld(Opnds) -> fpop1binary(2#001001010, Opnds). +fsubd(Opnds) -> fpop1binary(2#001000110, Opnds). + +fpop1unary(Opf, {{fr,Rs2},{fr,Rd}}) -> + format3f(Rd, 0, Opf, Rs2). + +fitod(Opnds) -> fpop1unary(2#011001000, Opnds). +fmovd(Opnds) -> fpop1unary(2#000000010, Opnds). +fnegd(Opnds) -> fpop1unary(2#000000110, Opnds). + +ldf({{r,Rs1},{simm13,Simm13},{fr,Rd}}) -> + format3b(2#11, Rd, 2#100000, Rs1, Simm13). + +stf({{fr,Rd},{r,Rs1},{simm13,Simm13}}) -> + format3b(2#11, Rd, 2#100100, Rs1, Simm13). + +-ifdef(notdef). +fpop1(Rs1,Opf,Rs2,Rd) -> format3a(2#10, Rd, 2#110100, Rs1, Opf, Rs2). +%% fpop2(Rs1,Opf,Rs2,Rd) -> format3a(2#10, Rd, 2#110101, Rs1, Opf, Rs2). + +%% fxtos(Rs2, Rd) -> fpop1(0,2#010000100,Rs2,Rd). +%% fxtod(Rs2, Rd) -> fpop1(0,2#010001000,Rs2,Rd). +%% fxtoq(Rs2, Rd) -> fpop1(0,2#010001100,Rs2,Rd). +fitos(Rs2, Rd) -> fpop1(0,2#011000100,Rs2,Rd). +fitoq(Rs2, Rd) -> fpop1(0,2#011001100,Rs2,Rd). + +%% fstox(Rs2, Rd) -> fpop1(0,2#010000001,Rs2,Rd). +%% fdtox(Rs2, Rd) -> fpop1(0,2#010000010,Rs2,Rd). +%% fqtox(Rs2, Rd) -> fpop1(0,2#010000011,Rs2,Rd). +%% fstoi(Rs2, Rd) -> fpop1(0,2#011010001,Rs2,Rd). +%% fdtoi(Rs2, Rd) -> fpop1(0,2#011010010,Rs2,Rd). +%% fqtoi(Rs2, Rd) -> fpop1(0,2#011010011,Rs2,Rd). + +%% fstod(Rs2, Rd) -> fpop1(0,2#011001001,Rs2,Rd). +%% fstoq(Rs2, Rd) -> fpop1(0,2#011001101,Rs2,Rd). +%% fdtos(Rs2, Rd) -> fpop1(0,2#011000110,Rs2,Rd). +%% fdtoq(Rs2, Rd) -> fpop1(0,2#011001110,Rs2,Rd). +%% fqtos(Rs2, Rd) -> fpop1(0,2#011000111,Rs2,Rd). +%% fqtod(Rs2, Rd) -> fpop1(0,2#011001011,Rs2,Rd). + +fmovs(Rs2, Rd) -> fpop1(0,2#000000001,Rs2,Rd). +fnegs(Rs2, Rd) -> fpop1(0,2#000000101,Rs2,Rd). +fabss(Rs2, Rd) -> fpop1(0,2#000001001,Rs2,Rd). +fabsd(Rs2, Rd) -> fpop1(0,2#000001010,Rs2,Rd). +fmovq(Rs2, Rd) -> fpop1(0,2#000000011,Rs2,Rd). +fnegq(Rs2, Rd) -> fpop1(0,2#000000111,Rs2,Rd). +fabsq(Rs2, Rd) -> fpop1(0,2#000001011,Rs2,Rd). + +%% fsqrts(Rs2, Rd) -> fpop1(0,2#000101001,Rs2,Rd). +%% fsqrtd(Rs2, Rd) -> fpop1(0,2#000101010,Rs2,Rd). +%% fsqrtq(Rs2, Rd) -> fpop1(0,2#000101011,Rs2,Rd). + +fadds(Rs1, Rs2, Rd) -> fpop1(Rs1,2#001000001,Rs2,Rd). +faddq(Rs1, Rs2, Rd) -> fpop1(Rs1,2#001000011,Rs2,Rd). +fsubs(Rs1, Rs2, Rd) -> fpop1(Rs1,2#001000101,Rs2,Rd). +fsubq(Rs1, Rs2, Rd) -> fpop1(Rs1,2#001000111,Rs2,Rd). + +fmuls(Rs1, Rs2, Rd) -> fpop1(Rs1,2#001001001,Rs2,Rd). +fmulq(Rs1, Rs2, Rd) -> fpop1(Rs1,2#001001011,Rs2,Rd). +%% fsmuld(Rs1, Rs2, Rd) -> fpop1(Rs1,2#001101001,Rs2,Rd). +%% fdmulq(Rs1, Rs2, Rd) -> fpop1(Rs1,2#001101110,Rs2,Rd). +fdivs(Rs1, Rs2, Rd) -> fpop1(Rs1,2#001001101,Rs2,Rd). +fdivq(Rs1, Rs2, Rd) -> fpop1(Rs1,2#001001111,Rs2,Rd). + +%% Uses fcc0 +%% fcmps(Rs1, Rs2) -> fpop2(Rs1,2#001010001,Rs2,0). +%% fcmpd(Rs1, Rs2) -> fpop2(Rs1,2#001010010,Rs2,0). +%% fcmpq(Rs1, Rs2) -> fpop2(Rs1,2#001010011,Rs2,0). +%% fcmpes(Rs1, Rs2) -> fpop2(Rs1,2#001010101,Rs2,0). +%% fcmped(Rs1, Rs2) -> fpop2(Rs1,2#001010110,Rs2,0). +%% fcmpeq(Rs1, Rs2) -> fpop2(Rs1,2#001010111,Rs2,0). + +%% fcmps(N, Rs1, Rs2) -> fpcn(N,2#001010001,Rs1,Rs2). +%% fcmpd(N, Rs1, Rs2) -> fpcn(N,2#001010010,Rs1,Rs2). +%% fcmpq(N, Rs1, Rs2) -> fpcn(N,2#001010011,Rs1,Rs2). +%% fcmpes(N, Rs1, Rs2) -> fpcn(N,2#001010101,Rs1,Rs2). +%% fcmped(N, Rs1, Rs2) -> fpcn(N,2#001010110,Rs1,Rs2). +%% fcmpeq(N, Rs1, Rs2) -> fpcn(N,2#001010111,Rs1,Rs2). + +stfi(Rd, Rs1, Offset) -> format3b(2#11, Rd, 2#100100, Rs1, Offset). +stdf(Rd, Rs1, Rs2) -> format3a(2#11, Rd, 2#100111, Rs1, 0, Rs2). +stdfi(Rd, Rs1, Offset) -> format3b(2#11, Rd, 2#100111, Rs1, Offset). +stqf(Rd, Rs1, Rs2) -> format3a(2#11, Rd, 2#100110, Rs1, 0, Rs2). +stqfi(Rd, Rs1, Offset) -> format3b(2#11, Rd, 2#100110, Rs1, Offset). +%% stfsr(Rd, Rs1, Rs2) -> format3a(2#11, Rd, 2#100101, Rs1, 0, Rs2). +%% stfsri(Rd, Rs1, Offset) -> format3b(2#11, Rd, 2#100101, Rs1, Offset). + +ldfi(Rd, Rs1, Offset) -> format3b(2#11, Rd, 2#100000, Rs1, Offset). +lddf(Rd, Rs1, Rs2) -> format3a(2#11, Rd, 2#100011, Rs1, 0, Rs2). +lddfi(Rd, Rs1, Offset) -> format3b(2#11, Rd, 2#100011, Rs1, Offset). +ldqf(Rd, Rs1, Rs2) -> format3a(2#11, Rd, 2#100010, Rs1, 0, Rs2). +ldqfi(Rd, Rs1, Offset) -> format3b(2#11, Rd, 2#100010, Rs1, Offset). +%% ldxfsr(Rs1, Rs2) -> format3a(2#11, 1, 2#100001, Rs1, 0, Rs2). +%% ldxfsri(Rs1, Offset) -> format3b(2#11, 1, 2#100001, Rs1, Offset). + +%% fpcn(N, Opf, Rs1, Rs2) -> +%% case N of +%% 0 -> fpc0(Opf, Rs1, Rs2); +%% 1 -> fpc1(Opf, Rs1, Rs2); +%% 2 -> fpc2(Opf, Rs1, Rs2); +%% 3 -> fpc3(Opf, Rs1, Rs2) +%% end. + +%% fpc0(Opf, Rs1, Rs2) -> format3c(2#10, 2#00000, 2#110101, Rs1, Opf, Rs2). +%% fpc1(Opf, Rs1, Rs2) -> format3c(2#10, 2#00001, 2#110101, Rs1, Opf, Rs2). +%% fpc2(Opf, Rs1, Rs2) -> format3c(2#10, 2#00010, 2#110101, Rs1, Opf, Rs2). +%% fpc3(Opf, Rs1, Rs2) -> format3c(2#10, 2#00011, 2#110101, Rs1, Opf, Rs2). +-endif. % FP insns + +%%% +%%% Main Encode Dispatch +%%% + +insn_encode(Op, Opnds) -> + case Op of + 'add' -> add(Opnds); + 'addcc' -> addcc(Opnds); + 'and' -> 'and'(Opnds); + 'andcc' -> andcc(Opnds); + 'ba' -> ba(Opnds); + 'bp' -> bp(Opnds); + 'br' -> br(Opnds); + 'call' -> call(Opnds); + 'jmpl' -> jmpl(Opnds); + 'ldsb' -> ldsb(Opnds); + 'ldsh' -> ldsh(Opnds); + 'ldsw' -> ldsw(Opnds); + 'ldub' -> ldub(Opnds); + 'lduh' -> lduh(Opnds); + 'lduw' -> lduw(Opnds); + 'ldx' -> ldx(Opnds); + 'mulx' -> mulx(Opnds); + 'or' -> 'or'(Opnds); + 'orcc' -> orcc(Opnds); + 'rd' -> rd(Opnds); + 'sethi' -> sethi(Opnds); + 'sll' -> sll(Opnds); + 'sllx' -> sllx(Opnds); + 'smul' -> smul(Opnds); + 'sra' -> sra(Opnds); + 'srax' -> srax(Opnds); + 'srl' -> srl(Opnds); + 'srlx' -> srlx(Opnds); + 'stb' -> stb(Opnds); + 'stw' -> stw(Opnds); + 'stx' -> stx(Opnds); + 'sub' -> sub(Opnds); + 'subcc' -> subcc(Opnds); + 'xor' -> 'xor'(Opnds); + 'xorcc' -> xorcc(Opnds); + 'faddd' -> faddd(Opnds); + 'fdivd' -> fdivd(Opnds); + 'fmuld' -> fmuld(Opnds); + 'fsubd' -> fsubd(Opnds); + 'fitod' -> fitod(Opnds); + 'fmovd' -> fmovd(Opnds); + 'fnegd' -> fnegd(Opnds); + 'ldf' -> ldf(Opnds); + 'stf' -> stf(Opnds); + _ -> exit({?MODULE,insn_encode,Op}) + end. + +%%% +%%% Testing Interface +%%% + +-ifdef(TESTING). + +say(OS, Str) -> + file:write(OS, Str). + +hex_digit(Dig0) -> + Dig = Dig0 band 16#F, + if Dig >= 16#A -> $A + (Dig - 16#A); + true -> $0 + Dig + end. + +say_byte(OS, Byte) -> + say(OS, [hex_digit(Byte bsr 4)]), + say(OS, [hex_digit(Byte)]). + +say_word(OS, Word) -> + say(OS, "0x"), + say_byte(OS, Word bsr 24), + say_byte(OS, Word bsr 16), + say_byte(OS, Word bsr 8), + say_byte(OS, Word). + +t(OS, Op, Opnds) -> + Word = insn_encode(Op, Opnds), + say(OS, "\t.long "), + say_word(OS, Word), + say(OS, "\n"). + +dotest1(OS) -> + say(OS, "\t.text\n\t.align 4\n"), + []. + +dotest() -> dotest1(group_leader()). + +dotest(File) -> + {ok,OS} = file:open(File, [write]), + dotest1(OS), + file:close(OS). + +-endif. diff --git a/lib/hipe/sparc/hipe_sparc_finalise.erl b/lib/hipe/sparc/hipe_sparc_finalise.erl new file mode 100644 index 0000000000..b44a21f7c0 --- /dev/null +++ b/lib/hipe/sparc/hipe_sparc_finalise.erl @@ -0,0 +1,138 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2007-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +-module(hipe_sparc_finalise). +-export([finalise/1]). +-include("hipe_sparc.hrl"). + +finalise(Defun) -> + #defun{code=Code0} = Defun, + Code1 = peep(expand(Code0)), + Defun#defun{code=Code1}. + +expand(Insns) -> + expand_list(Insns, []). + +expand_list([I|Insns], Accum) -> + expand_list(Insns, expand_insn(I, Accum)); +expand_list([], Accum) -> + lists:reverse(Accum). + +expand_insn(I, Accum) -> + case I of + #bp{'cond'='a'} -> + [hipe_sparc:mk_nop(), + I | + Accum]; + #call_rec{} -> + [hipe_sparc:mk_nop(), + I | + Accum]; + #call_tail{} -> + RA = hipe_sparc:mk_ra(), + TempRA = hipe_sparc:mk_temp1(), + [hipe_sparc:mk_mov(TempRA, RA), + I, % becomes a call, which clobbers RA + hipe_sparc:mk_mov(RA, TempRA) | + Accum]; + #jmp{} -> + [hipe_sparc:mk_nop(), + I | + Accum]; + #pseudo_bp{'cond'=Cond,true_label=TrueLab,false_label=FalseLab, pred=Pred} -> + [hipe_sparc:mk_nop(), + hipe_sparc:mk_b_label(FalseLab), + hipe_sparc:mk_nop(), + hipe_sparc:mk_bp(Cond, TrueLab, Pred) | + Accum]; + %% #pseudo_br{} -> expand_pseudo_br(I, Accum); + #pseudo_call{funv=FunV,sdesc=SDesc,contlab=ContLab,linkage=Linkage} -> + [hipe_sparc:mk_nop(), + hipe_sparc:mk_b_label(ContLab), + hipe_sparc:mk_nop(), + case FunV of + #sparc_temp{} -> + hipe_sparc:mk_jmpl(FunV, SDesc); + _ -> + hipe_sparc:mk_call_rec(FunV, SDesc, Linkage) + end | + Accum]; + #pseudo_ret{} -> + RA = hipe_sparc:mk_ra(), + [hipe_sparc:mk_nop(), + hipe_sparc:mk_jmp(RA, hipe_sparc:mk_simm13(8), []) | + Accum]; + #pseudo_tailcall_prepare{} -> + Accum; + _ -> + XXX = + case I of + #alu{} -> true; + #comment{} -> true; + #label{} -> true; + #pseudo_set{} -> true; + #rdy{} -> true; + #sethi{} -> true; + #store{} -> true; + #bp{} -> false; + %% #br{} -> false; + #call_rec{} -> false; + #call_tail{} -> false; + #jmp{} -> false; + #jmpl{} -> false; + #pseudo_bp{} -> false; + %% #pseudo_br{} -> false; + #pseudo_call{} -> false; + #pseudo_call_prepare{} -> false; + #pseudo_move{} -> false; + #pseudo_ret{} -> false; + #pseudo_tailcall{} -> false; + #pseudo_tailcall_prepare{} -> false; + #fp_binary{} -> true; + #fp_unary{} -> true; + #pseudo_fload{} -> true; + #pseudo_fstore{} -> true + end, + case XXX of + true -> []; + false -> exit({?MODULE,expand_insn,I}) + end, + [I|Accum] + end. + +-ifdef(notdef). % XXX: only for sparc64, alas +expand_pseudo_br(I, Accum) -> + #pseudo_br{rcond=RCond,src=Src,true_label=TrueLab,false_label=FalseLab, pred=Pred} = I, + [hipe_sparc:mk_nop(), + hipe_sparc:mk_b_label(FalseLab), + hipe_sparc:mk_nop(), + hipe_sparc:mk_br(RCond, Src, TrueLab, Pred) | + Accum]. +-endif. + +peep(Insns) -> + peep_list(Insns, []). + +peep_list([#bp{'cond'='a',label=Label}, #sethi{uimm22=#sparc_uimm22{value=0},dst=#sparc_temp{reg=0}} | (Insns = [#label{label=Label}|_])], Accum) -> + peep_list(Insns, Accum); +peep_list([I|Insns], Accum) -> + peep_list(Insns, [I|Accum]); +peep_list([], Accum) -> + lists:reverse(Accum). diff --git a/lib/hipe/sparc/hipe_sparc_frame.erl b/lib/hipe/sparc/hipe_sparc_frame.erl new file mode 100644 index 0000000000..f7d7f40df3 --- /dev/null +++ b/lib/hipe/sparc/hipe_sparc_frame.erl @@ -0,0 +1,636 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2002-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +-module(hipe_sparc_frame). +-export([frame/1]). + +-include("hipe_sparc.hrl"). +-include("../rtl/hipe_literals.hrl"). + +frame(Defun) -> + Formals = fix_formals(hipe_sparc:defun_formals(Defun)), + Temps0 = all_temps(hipe_sparc:defun_code(Defun), Formals), + MinFrame = defun_minframe(Defun), + Temps = ensure_minframe(MinFrame, Temps0), + ClobbersRA = clobbers_ra(hipe_sparc:defun_code(Defun)), + CFG0 = hipe_sparc_cfg:init(Defun), + Liveness = hipe_sparc_liveness_all:analyse(CFG0), + CFG1 = do_body(CFG0, Liveness, Formals, Temps, ClobbersRA), + hipe_sparc_cfg:linearise(CFG1). + +fix_formals(Formals) -> + fix_formals(hipe_sparc_registers:nr_args(), Formals). + +fix_formals(0, Rest) -> Rest; +fix_formals(N, [_|Rest]) -> fix_formals(N-1, Rest); +fix_formals(_, []) -> []. + +do_body(CFG0, Liveness, Formals, Temps, ClobbersRA) -> + Context = mk_context(Liveness, Formals, Temps, ClobbersRA), + CFG1 = do_blocks(CFG0, Context), + do_prologue(CFG1, Context). + +do_blocks(CFG, Context) -> + Labels = hipe_sparc_cfg:labels(CFG), + do_blocks(Labels, CFG, Context). + +do_blocks([Label|Labels], CFG, Context) -> + Liveness = context_liveness(Context), + LiveOut = hipe_sparc_liveness_all:liveout(Liveness, Label), + Block = hipe_sparc_cfg:bb(CFG, Label), + Code = hipe_bb:code(Block), + NewCode = do_block(Code, LiveOut, Context), + NewBlock = hipe_bb:code_update(Block, NewCode), + NewCFG = hipe_sparc_cfg:bb_add(CFG, Label, NewBlock), + do_blocks(Labels, NewCFG, Context); +do_blocks([], CFG, _) -> + CFG. + +do_block(Insns, LiveOut, Context) -> + do_block(Insns, LiveOut, Context, context_framesize(Context), []). + +do_block([I|Insns], LiveOut, Context, FPoff0, RevCode) -> + {NewIs, FPoff1} = do_insn(I, LiveOut, Context, FPoff0), + do_block(Insns, LiveOut, Context, FPoff1, lists:reverse(NewIs, RevCode)); +do_block([], _, Context, FPoff, RevCode) -> + FPoff0 = context_framesize(Context), + if FPoff =:= FPoff0 -> []; + true -> exit({?MODULE,do_block,FPoff}) + end, + lists:reverse(RevCode, []). + +do_insn(I, LiveOut, Context, FPoff) -> + case I of + #pseudo_call{} -> + do_pseudo_call(I, LiveOut, Context, FPoff); + #pseudo_call_prepare{} -> + do_pseudo_call_prepare(I, FPoff); + #pseudo_move{} -> + {do_pseudo_move(I, Context, FPoff), FPoff}; + #pseudo_ret{} -> + {do_pseudo_ret(I, Context, FPoff), context_framesize(Context)}; + #pseudo_tailcall{} -> + {do_pseudo_tailcall(I, Context), context_framesize(Context)}; + #pseudo_fmove{} -> + {do_pseudo_fmove(I, Context, FPoff), FPoff}; + _ -> + {[I], FPoff} + end. + +%%% +%%% Moves, with Dst or Src possibly a pseudo +%%% + +do_pseudo_move(I, Context, FPoff) -> + Dst = hipe_sparc:pseudo_move_dst(I), + Src = hipe_sparc:pseudo_move_src(I), + case temp_is_pseudo(Dst) of + true -> + Offset = pseudo_offset(Dst, FPoff, Context), + mk_store(Src, hipe_sparc:mk_sp(), Offset, []); + _ -> + case temp_is_pseudo(Src) of + true -> + Offset = pseudo_offset(Src, FPoff, Context), + mk_load(hipe_sparc:mk_sp(), Offset, Dst, []); + _ -> + [hipe_sparc:mk_mov(Src, Dst)] + end + end. + +do_pseudo_fmove(I, Context, FPoff) -> + Dst = hipe_sparc:pseudo_fmove_dst(I), + Src = hipe_sparc:pseudo_fmove_src(I), + case temp_is_pseudo(Dst) of + true -> + Offset = pseudo_offset(Dst, FPoff, Context), + mk_fstore(Src, hipe_sparc:mk_sp(), Offset); + _ -> + case temp_is_pseudo(Src) of + true -> + Offset = pseudo_offset(Src, FPoff, Context), + mk_fload(hipe_sparc:mk_sp(), Offset, Dst); + _ -> + [hipe_sparc:mk_fp_unary('fmovd', Src, Dst)] + end + end. + +pseudo_offset(Temp, FPoff, Context) -> + FPoff + context_offset(Context, Temp). + +%%% +%%% Return - deallocate frame and emit 'ret $N' insn. +%%% + +do_pseudo_ret(I, Context, FPoff) -> + %% XXX: typically only one instruction between + %% the move-to-RA and the jmp-via-RA, ouch + restore_ra(FPoff, Context, + adjust_sp(FPoff + word_size() * context_arity(Context), + [I])). + +restore_ra(FPoff, Context, Rest) -> + case context_clobbers_ra(Context) of + false -> Rest; + true -> + RA = hipe_sparc:mk_ra(), + mk_load(hipe_sparc:mk_sp(), FPoff - word_size(), RA, Rest) + end. + +adjust_sp(N, Rest) -> + if N =:= 0 -> + Rest; + true -> + SP = hipe_sparc:mk_sp(), + hipe_sparc:mk_addi(SP, N, SP, Rest) + end. + +%%% +%%% Recursive calls. +%%% + +do_pseudo_call_prepare(I, FPoff0) -> + %% Create outgoing arguments area on the stack. + NrStkArgs = hipe_sparc:pseudo_call_prepare_nrstkargs(I), + Offset = NrStkArgs * word_size(), + {adjust_sp(-Offset, []), FPoff0 + Offset}. + +do_pseudo_call(I, LiveOut, Context, FPoff0) -> + #sparc_sdesc{exnlab=ExnLab,arity=OrigArity} = hipe_sparc:pseudo_call_sdesc(I), + FunV = hipe_sparc:pseudo_call_funv(I), + LiveTemps = [Temp || Temp <- LiveOut, temp_is_pseudo(Temp)], + SDesc = mk_sdesc(ExnLab, Context, LiveTemps), + ContLab = hipe_sparc:pseudo_call_contlab(I), + Linkage = hipe_sparc:pseudo_call_linkage(I), + CallCode = [hipe_sparc:mk_pseudo_call(FunV, SDesc, ContLab, Linkage)], + StkArity = erlang:max(0, OrigArity - hipe_sparc_registers:nr_args()), + context_need_stack(Context, stack_need(FPoff0, StkArity, FunV)), + ArgsBytes = word_size() * StkArity, + {CallCode, FPoff0 - ArgsBytes}. + +stack_need(FPoff, StkArity, FunV) -> + case FunV of + #sparc_prim{} -> FPoff; + #sparc_mfa{m=M,f=F,a=A} -> + case erlang:is_builtin(M, F, A) of + true -> FPoff; + false -> stack_need_general(FPoff, StkArity) + end; + _ -> stack_need_general(FPoff, StkArity) + end. + +stack_need_general(FPoff, StkArity) -> + erlang:max(FPoff, FPoff + (?SPARC_LEAF_WORDS - StkArity) * word_size()). + +%%% +%%% Create stack descriptors for call sites. +%%% + +mk_sdesc(ExnLab, Context, Temps) -> % for normal calls + Temps0 = only_tagged(Temps), + Live = mk_live(Context, Temps0), + Arity = context_arity(Context), + FSize = context_framesize(Context), + hipe_sparc:mk_sdesc(ExnLab, (FSize div word_size())-1, Arity, + list_to_tuple(Live)). + +only_tagged(Temps)-> + [X || X <- Temps, hipe_sparc:temp_type(X) =:= 'tagged']. + +mk_live(Context, Temps) -> + lists:sort([temp_to_slot(Context, Temp) || Temp <- Temps]). + +temp_to_slot(Context, Temp) -> + (context_framesize(Context) + context_offset(Context, Temp)) + div word_size(). + +mk_minimal_sdesc(Context) -> % for inc_stack_0 calls + hipe_sparc:mk_sdesc([], 0, context_arity(Context), {}). + +%%% +%%% Tailcalls. +%%% + +do_pseudo_tailcall(I, Context) -> % always at FPoff=context_framesize(Context) + Arity = context_arity(Context), + Args = hipe_sparc:pseudo_tailcall_stkargs(I), + FunV = hipe_sparc:pseudo_tailcall_funv(I), + Linkage = hipe_sparc:pseudo_tailcall_linkage(I), + {Insns, FPoff1} = do_tailcall_args(Args, Context), + context_need_stack(Context, FPoff1), + StkArity = length(Args), + FPoff2 = FPoff1 + (Arity - StkArity) * word_size(), + context_need_stack(Context, stack_need(FPoff2, StkArity, FunV)), + I2 = + case FunV of + #sparc_temp{} -> + hipe_sparc:mk_jmp(FunV, hipe_sparc:mk_simm13(0), []); + Fun -> + hipe_sparc:mk_call_tail(Fun, Linkage) + end, + %% XXX: break out the RA restore, just like for pseudo_ret? + restore_ra(context_framesize(Context), Context, + Insns ++ adjust_sp(FPoff2, [I2])). + +do_tailcall_args(Args, Context) -> + FPoff0 = context_framesize(Context), + Arity = context_arity(Context), + FrameTop = word_size()*Arity, + DangerOff = FrameTop - word_size()*length(Args), + %% + Moves = mk_moves(Args, FrameTop, []), + %% + {Stores, Simple, Conflict} = + split_moves(Moves, Context, DangerOff, [], [], []), + %% sanity check (shouldn't trigger any more) + if DangerOff < -FPoff0 -> + exit({?MODULE,do_tailcall_args,DangerOff,-FPoff0}); + true -> [] + end, + FPoff1 = FPoff0, + %% + {Pushes, Pops, FPoff2} = split_conflict(Conflict, FPoff1, [], []), + %% + TempReg = hipe_sparc_registers:temp1(), + %% + {adjust_sp(-(FPoff2 - FPoff1), + simple_moves(Pushes, FPoff2, TempReg, + store_moves(Stores, FPoff2, TempReg, + simple_moves(Simple, FPoff2, TempReg, + simple_moves(Pops, FPoff2, TempReg, + []))))), + FPoff2}. + +mk_moves([Arg|Args], Off, Moves) -> + Off1 = Off - word_size(), + mk_moves(Args, Off1, [{Arg,Off1}|Moves]); +mk_moves([], _, Moves) -> + Moves. + +split_moves([Move|Moves], Context, DangerOff, Stores, Simple, Conflict) -> + {Src,DstOff} = Move, + case src_is_pseudo(Src) of + false -> + split_moves(Moves, Context, DangerOff, [Move|Stores], + Simple, Conflict); + true -> + SrcOff = context_offset(Context, Src), + Type = typeof_temp(Src), + if SrcOff =:= DstOff -> + split_moves(Moves, Context, DangerOff, Stores, + Simple, Conflict); + SrcOff >= DangerOff -> + split_moves(Moves, Context, DangerOff, Stores, + Simple, [{SrcOff,DstOff,Type}|Conflict]); + true -> + split_moves(Moves, Context, DangerOff, Stores, + [{SrcOff,DstOff,Type}|Simple], Conflict) + end + end; +split_moves([], _, _, Stores, Simple, Conflict) -> + {Stores, Simple, Conflict}. + +split_conflict([{SrcOff,DstOff,Type}|Conflict], FPoff, Pushes, Pops) -> + FPoff1 = FPoff + word_size(), + Push = {SrcOff,-FPoff1,Type}, + Pop = {-FPoff1,DstOff,Type}, + split_conflict(Conflict, FPoff1, [Push|Pushes], [Pop|Pops]); +split_conflict([], FPoff, Pushes, Pops) -> + {lists:reverse(Pushes), Pops, FPoff}. + +simple_moves([{SrcOff,DstOff,Type}|Moves], FPoff, TempReg, Rest) -> + Temp = hipe_sparc:mk_temp(TempReg, Type), + SP = hipe_sparc:mk_sp(), + LoadOff = FPoff+SrcOff, + StoreOff = FPoff+DstOff, + simple_moves(Moves, FPoff, TempReg, + mk_load(SP, LoadOff, Temp, + mk_store(Temp, SP, StoreOff, + Rest))); +simple_moves([], _, _, Rest) -> + Rest. + +store_moves([{Src,DstOff}|Moves], FPoff, TempReg, Rest) -> + %% Type = typeof_temp(Src), + SP = hipe_sparc:mk_sp(), + StoreOff = FPoff+DstOff, + {NewSrc,FixSrc} = + case hipe_sparc:is_temp(Src) of + true -> + {Src, []}; + _ -> + Temp = hipe_sparc:mk_temp(TempReg, 'untagged'), + {Temp, hipe_sparc:mk_set(Src, Temp)} + end, + store_moves(Moves, FPoff, TempReg, + FixSrc ++ mk_store(NewSrc, SP, StoreOff, Rest)); +store_moves([], _, _, Rest) -> + Rest. + +%%% +%%% Contexts +%%% + +-record(context, {liveness, framesize, arity, map, clobbers_ra, ref_maxstack}). + +mk_context(Liveness, Formals, Temps, ClobbersRA) -> + {Map, MinOff} = mk_temp_map(Formals, ClobbersRA, Temps), + FrameSize = (-MinOff), + RefMaxStack = hipe_bifs:ref(FrameSize), + #context{liveness=Liveness, + framesize=FrameSize, arity=length(Formals), + map=Map, clobbers_ra=ClobbersRA, ref_maxstack=RefMaxStack}. + +context_need_stack(#context{ref_maxstack=RM}, N) -> + M = hipe_bifs:ref_get(RM), + if N > M -> hipe_bifs:ref_set(RM, N); + true -> [] + end. + +context_maxstack(#context{ref_maxstack=RM}) -> + hipe_bifs:ref_get(RM). + +context_arity(#context{arity=Arity}) -> + Arity. + +context_framesize(#context{framesize=FrameSize}) -> + FrameSize. + +context_liveness(#context{liveness=Liveness}) -> + Liveness. + +context_offset(#context{map=Map}, Temp) -> + tmap_lookup(Map, Temp). + +context_clobbers_ra(#context{clobbers_ra=ClobbersRA}) -> ClobbersRA. + +mk_temp_map(Formals, ClobbersRA, Temps) -> + {Map, 0} = enter_vars(Formals, word_size() * length(Formals), + tmap_empty()), + TempsList = tset_to_list(Temps), + AllTemps = + case ClobbersRA of + false -> TempsList; + true -> + RA = hipe_sparc:mk_new_temp('untagged'), + [RA|TempsList] + end, + enter_vars(AllTemps, 0, Map). + +enter_vars([V|Vs], PrevOff, Map) -> + Off = + case hipe_sparc:temp_type(V) of + 'double' -> PrevOff - 2*word_size(); % XXX: sparc64: 1*word_size() + _ -> PrevOff - word_size() + end, + enter_vars(Vs, Off, tmap_bind(Map, V, Off)); +enter_vars([], Off, Map) -> + {Map, Off}. + +tmap_empty() -> + gb_trees:empty(). + +tmap_bind(Map, Key, Val) -> + gb_trees:insert(Key, Val, Map). + +tmap_lookup(Map, Key) -> + gb_trees:get(Key, Map). + +%%% +%%% do_prologue: prepend stack frame allocation code. +%%% +%%% NewStart: +%%% temp1 = *(P + P_SP_LIMIT) +%%% temp2 = SP - MaxStack +%%% cmp temp2, temp1 +%%% if (ltu) goto IncStack else goto AllocFrame +%%% AllocFrame: +%%% SP = temp2 [if FrameSize == MaxStack] +%%% SP -= FrameSize [if FrameSize != MaxStack] +%%% *(SP + FrameSize-WordSize) = RA [if ClobbersRA] +%%% goto OldStart +%%% OldStart: +%%% ... +%%% IncStack: +%%% temp1 = RA +%%% call inc_stack; nop +%%% RA = temp1 +%%% goto NewStart + +do_prologue(CFG, Context) -> + MaxStack = context_maxstack(Context), + if MaxStack > 0 -> + FrameSize = context_framesize(Context), + OldStartLab = hipe_sparc_cfg:start_label(CFG), + NewStartLab = hipe_gensym:get_next_label(sparc), + %% + P = hipe_sparc:mk_temp(hipe_sparc_registers:proc_pointer(), 'untagged'), + Temp1 = hipe_sparc:mk_temp1(), + SP = hipe_sparc:mk_sp(), + %% + RA = hipe_sparc:mk_ra(), + ClobbersRA = context_clobbers_ra(Context), + GotoOldStartCode = [hipe_sparc:mk_b_label(OldStartLab)], + AllocFrameCodeTail = + case ClobbersRA of + false -> GotoOldStartCode; + true -> mk_store(RA, SP, FrameSize-word_size(), GotoOldStartCode) + end, + %% + Arity = context_arity(Context), + Guaranteed = erlang:max(0, (?SPARC_LEAF_WORDS - Arity) * word_size()), + %% + {CFG1,NewStartCode} = + if MaxStack =< Guaranteed -> + %% io:format("~w: MaxStack ~w =< Guaranteed ~w :-)\n", [?MODULE,MaxStack,Guaranteed]), + AllocFrameCode = adjust_sp(-FrameSize, AllocFrameCodeTail), + NewStartCode0 = AllocFrameCode, % no mflr needed + {CFG,NewStartCode0}; + true -> + %% io:format("~w: MaxStack ~w > Guaranteed ~w :-(\n", [?MODULE,MaxStack,Guaranteed]), + AllocFrameLab = hipe_gensym:get_next_label(sparc), + IncStackLab = hipe_gensym:get_next_label(sparc), + Temp2 = hipe_sparc:mk_temp2(), + %% + NewStartCodeTail2 = + [hipe_sparc:mk_pseudo_bp('lu', IncStackLab, AllocFrameLab, 0.01)], + NewStartCodeTail1 = NewStartCodeTail2, % no mflr needed + NewStartCode0 = + mk_load(P, ?P_NSP_LIMIT, Temp1, + hipe_sparc:mk_addi(SP, -MaxStack, Temp2, + [hipe_sparc:mk_alu('subcc', Temp2, Temp1, hipe_sparc:mk_g0()) | + NewStartCodeTail1])), + %% + AllocFrameCode = + if MaxStack =:= FrameSize -> + %% io:format("~w: MaxStack =:= FrameSize =:= ~w :-)\n", [?MODULE,MaxStack]), + [hipe_sparc:mk_mov(Temp2, SP) | + AllocFrameCodeTail]; + true -> + %% io:format("~w: MaxStack ~w =/= FrameSize ~w :-(\n", [?MODULE,MaxStack,FrameSize]), + adjust_sp(-FrameSize, AllocFrameCodeTail) + end, + %% + IncStackCodeTail = + [hipe_sparc:mk_call_rec(hipe_sparc:mk_prim('inc_stack_0'), + mk_minimal_sdesc(Context), not_remote), + hipe_sparc:mk_mov(Temp1, RA), + hipe_sparc:mk_b_label(NewStartLab)], + IncStackCode = + [hipe_sparc:mk_mov(RA, Temp1) | IncStackCodeTail], % mflr always needed + %% + CFG0a = hipe_sparc_cfg:bb_add(CFG, AllocFrameLab, + hipe_bb:mk_bb(AllocFrameCode)), + CFG0b = hipe_sparc_cfg:bb_add(CFG0a, IncStackLab, + hipe_bb:mk_bb(IncStackCode)), + %% + {CFG0b,NewStartCode0} + end, + %% + CFG2 = hipe_sparc_cfg:bb_add(CFG1, NewStartLab, + hipe_bb:mk_bb(NewStartCode)), + hipe_sparc_cfg:start_label_update(CFG2, NewStartLab); + true -> + CFG + end. + +%%% Create a load instruction. +%%% May clobber Dst early for large offsets. In principle we could +%%% clobber TEMP2 if Dst =:= Base, but Dst =/= Base here in frame. + +mk_load(Base, Offset, Dst, Rest) -> + LdOp = 'lduw', % XXX: sparc64: ldx + hipe_sparc:mk_load(LdOp, Base, Offset, Dst, 'error', Rest). + +mk_fload(Base, Offset, Dst) -> + hipe_sparc:mk_fload(Base, Offset, Dst, 'temp2'). + +%%% Create a store instruction. +%%% May clobber TEMP2 for large offsets. + +mk_store(Src, Base, Offset, Rest) -> + StOp = 'stw', % XXX: sparc64: stx + hipe_sparc:mk_store(StOp, Src, Base, Offset, 'temp2', Rest). + +mk_fstore(Src, Base, Offset) -> + hipe_sparc:mk_fstore(Src, Base, Offset, 'temp2'). + +%%% typeof_temp -- what's temp's type? + +typeof_temp(Temp) -> + hipe_sparc:temp_type(Temp). + +%%% Check if an operand is a pseudo-Temp. + +src_is_pseudo(Src) -> + hipe_sparc:is_temp(Src) andalso temp_is_pseudo(Src). + +temp_is_pseudo(Temp) -> + not(hipe_sparc:temp_is_precoloured(Temp)). + +%%% +%%% Detect if a Defun's body clobbers RA. +%%% + +clobbers_ra(Insns) -> + case Insns of + [#pseudo_call{}|_] -> true; + %% moves to RA cannot occur yet + [_|Rest] -> clobbers_ra(Rest); + [] -> false + end. + +%%% +%%% Build the set of all temps used in a Defun's body. +%%% + +all_temps(Code, Formals) -> + S0 = find_temps(Code, tset_empty()), + S1 = tset_del_list(S0, Formals), + tset_filter(S1, fun(T) -> temp_is_pseudo(T) end). + +find_temps([I|Insns], S0) -> + S1 = tset_add_list(S0, hipe_sparc_defuse:insn_def_all(I)), + S2 = tset_add_list(S1, hipe_sparc_defuse:insn_use_all(I)), + find_temps(Insns, S2); +find_temps([], S) -> + S. + +tset_empty() -> + gb_sets:new(). + +tset_size(S) -> + gb_sets:size(S). + +tset_insert(S, T) -> + gb_sets:add_element(T, S). + +tset_add_list(S, Ts) -> + gb_sets:union(S, gb_sets:from_list(Ts)). + +tset_del_list(S, Ts) -> + gb_sets:subtract(S, gb_sets:from_list(Ts)). + +tset_filter(S, F) -> + gb_sets:filter(F, S). + +tset_to_list(S) -> + gb_sets:to_list(S). + +%%% +%%% Compute minimum permissible frame size, ignoring spilled temps. +%%% This is done to ensure that we won't have to adjust the frame size +%%% in the middle of a tailcall. +%%% + +defun_minframe(Defun) -> + MaxTailArity = body_mta(hipe_sparc:defun_code(Defun), 0), + MyArity = length(fix_formals(hipe_sparc:defun_formals(Defun))), + erlang:max(MaxTailArity - MyArity, 0). + +body_mta([I|Code], MTA) -> + body_mta(Code, insn_mta(I, MTA)); +body_mta([], MTA) -> + MTA. + +insn_mta(I, MTA) -> + case I of + #pseudo_tailcall{arity=Arity} -> + erlang:max(MTA, Arity - hipe_sparc_registers:nr_args()); + _ -> MTA + end. + +%%% +%%% Ensure that we have enough temps to satisfy the minimum frame size, +%%% if necessary by prepending unused dummy temps. +%%% + +ensure_minframe(MinFrame, Temps) -> + ensure_minframe(MinFrame, tset_size(Temps), Temps). + +ensure_minframe(MinFrame, Frame, Temps) -> + if MinFrame > Frame -> + Temp = hipe_sparc:mk_new_temp('untagged'), + ensure_minframe(MinFrame, Frame+1, tset_insert(Temps, Temp)); + true -> Temps + end. + +word_size() -> + hipe_rtl_arch:word_size(). diff --git a/lib/hipe/sparc/hipe_sparc_liveness_all.erl b/lib/hipe/sparc/hipe_sparc_liveness_all.erl new file mode 100644 index 0000000000..c6f78f9f7a --- /dev/null +++ b/lib/hipe/sparc/hipe_sparc_liveness_all.erl @@ -0,0 +1,38 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2008-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +-module(hipe_sparc_liveness_all). +-export([analyse/1]). +-export([liveout/2]). + +-include("hipe_sparc.hrl"). +-include("../flow/liveness.inc"). + +analyse(CFG) -> analyze(CFG). +cfg_bb(CFG, L) -> hipe_sparc_cfg:bb(CFG, L). +cfg_postorder(CFG) -> hipe_sparc_cfg:postorder(CFG). +cfg_succ(CFG, L) -> hipe_sparc_cfg:succ(CFG, L). +uses(Insn) -> hipe_sparc_defuse:insn_use_all(Insn). +defines(Insn) -> hipe_sparc_defuse:insn_def_all(Insn). +liveout_no_succ() -> + ordsets:from_list(lists:map(fun({Reg,Type}) -> + hipe_sparc:mk_temp(Reg, Type) + end, + hipe_sparc_registers:live_at_return())). diff --git a/lib/hipe/sparc/hipe_sparc_liveness_fpr.erl b/lib/hipe/sparc/hipe_sparc_liveness_fpr.erl new file mode 100644 index 0000000000..ac67e499ad --- /dev/null +++ b/lib/hipe/sparc/hipe_sparc_liveness_fpr.erl @@ -0,0 +1,34 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2008-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +-module(hipe_sparc_liveness_fpr). +-export([analyse/1]). +-export([liveout/2]). + +-include("hipe_sparc.hrl"). +-include("../flow/liveness.inc"). + +analyse(CFG) -> analyze(CFG). +cfg_bb(CFG, L) -> hipe_sparc_cfg:bb(CFG, L). +cfg_postorder(CFG) -> hipe_sparc_cfg:postorder(CFG). +cfg_succ(CFG, L) -> hipe_sparc_cfg:succ(CFG, L). +uses(Insn) -> hipe_sparc_defuse:insn_use_fpr(Insn). +defines(Insn) -> hipe_sparc_defuse:insn_def_fpr(Insn). +liveout_no_succ() -> []. diff --git a/lib/hipe/sparc/hipe_sparc_liveness_gpr.erl b/lib/hipe/sparc/hipe_sparc_liveness_gpr.erl new file mode 100644 index 0000000000..0b07ae5c9d --- /dev/null +++ b/lib/hipe/sparc/hipe_sparc_liveness_gpr.erl @@ -0,0 +1,38 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2007-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +-module(hipe_sparc_liveness_gpr). +-export([analyse/1]). +-export([liveout/2]). + +-include("hipe_sparc.hrl"). +-include("../flow/liveness.inc"). + +analyse(CFG) -> analyze(CFG). +cfg_bb(CFG, L) -> hipe_sparc_cfg:bb(CFG, L). +cfg_postorder(CFG) -> hipe_sparc_cfg:postorder(CFG). +cfg_succ(CFG, L) -> hipe_sparc_cfg:succ(CFG, L). +uses(Insn) -> hipe_sparc_defuse:insn_use_gpr(Insn). +defines(Insn) -> hipe_sparc_defuse:insn_def_gpr(Insn). +liveout_no_succ() -> + ordsets:from_list(lists:map(fun({Reg,Type}) -> + hipe_sparc:mk_temp(Reg, Type) + end, + hipe_sparc_registers:live_at_return())). diff --git a/lib/hipe/sparc/hipe_sparc_main.erl b/lib/hipe/sparc/hipe_sparc_main.erl new file mode 100644 index 0000000000..2e5c8e0494 --- /dev/null +++ b/lib/hipe/sparc/hipe_sparc_main.erl @@ -0,0 +1,58 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2004-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +-module(hipe_sparc_main). +-export([rtl_to_sparc/3]). + +rtl_to_sparc(MFA, RTL, Options) -> + Defun1 = hipe_rtl_to_sparc:translate(RTL), + %% io:format("~w: after translate\n", [?MODULE]), + %% hipe_sparc_pp:pp(Defun1), + Defun2 = hipe_sparc_ra:ra(Defun1, Options), + %% io:format("~w: after regalloc\n", [?MODULE]), + %% hipe_sparc_pp:pp(Defun2), + Defun3 = hipe_sparc_frame:frame(Defun2), + %% io:format("~w: after frame\n", [?MODULE]), + %% hipe_sparc_pp:pp(Defun3), + Defun4 = hipe_sparc_finalise:finalise(Defun3), + %% io:format("~w: after finalise\n", [?MODULE]), + pp(Defun4, MFA, Options), + {native, sparc, {unprofiled, Defun4}}. + +pp(Defun, MFA, Options) -> + case proplists:get_value(pp_native, Options) of + true -> + hipe_sparc_pp:pp(Defun); + {only,Lst} when is_list(Lst) -> + case lists:member(MFA,Lst) of + true -> + hipe_sparc_pp:pp(Defun); + false -> + ok + end; + {only,MFA} -> + hipe_sparc_pp:pp(Defun); + {file,FileName} -> + {ok, File} = file:open(FileName, [write,append]), + hipe_sparc_pp:pp(File, Defun), + ok = file:close(File); + _ -> + ok + end. diff --git a/lib/hipe/sparc/hipe_sparc_pp.erl b/lib/hipe/sparc/hipe_sparc_pp.erl new file mode 100644 index 0000000000..6b49acdd11 --- /dev/null +++ b/lib/hipe/sparc/hipe_sparc_pp.erl @@ -0,0 +1,342 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2002-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +-module(hipe_sparc_pp). +-export([pp/1, pp/2, pp_insn/1]). +-include("hipe_sparc.hrl"). + +pp(Defun) -> + pp(standard_io, Defun). + +pp(Dev, #defun{mfa={M,F,A}, code=Code, data=Data}) -> + Fname = atom_to_list(M)++"_"++atom_to_list(F)++"_"++integer_to_list(A), + io:format(Dev, "\t.text\n", []), + io:format(Dev, "\t.align 4\n", []), + io:format(Dev, "\t.global ~s\n", [Fname]), + io:format(Dev, "~s:\n", [Fname]), + pp_insns(Dev, Code, Fname), + io:format(Dev, "\t.rodata\n", []), + io:format(Dev, "\t.align 4\n", []), + hipe_data_pp:pp(Dev, Data, sparc, Fname), + io:format(Dev, "\n", []). + +pp_insns(Dev, [I|Is], Fname) -> + pp_insn(Dev, I, Fname), + pp_insns(Dev, Is, Fname); +pp_insns(_, [], _) -> + []. + +pp_insn(I) -> + pp_insn(standard_io, I, ""). + +pp_insn(Dev, I, Pre) -> + case I of + #alu{aluop=AluOp, dst=Dst, src1=Src1, src2=Src2} -> + io:format(Dev, "\t~s ", [alu_op_name(AluOp)]), + case aluop_is_ldop(AluOp) of + true -> + io:format(Dev, "[", []), + pp_temp(Dev, Src1), + io:format(Dev, " + ", []), + pp_src(Dev, Src2), + io:format(Dev, "]", []); + false -> + pp_temp(Dev, Src1), + io:format(Dev, ", ", []), + pp_src(Dev, Src2) + end, + io:format(Dev, ", ", []), + pp_temp(Dev, Dst), + io:format(Dev, "\n", []); + #bp{'cond'=Cond, label=Label, pred=Pred} -> + io:format(Dev, "\tb~w,~w .~s_~w\n", + [cond_name(Cond), pred_name(Pred), Pre, Label]); + %% #br{} -> pp_br(Dev, I, Pre); + #call_rec{'fun'=Fun, sdesc=SDesc, linkage=Linkage} -> + io:format(Dev, "\tcall ", []), + pp_fun(Dev, Fun), + io:format(Dev, " #", []), + pp_sdesc(Dev, Pre, SDesc), + io:format(Dev, " ~w\n", [Linkage]); + #call_tail{'fun'=Fun, linkage=Linkage} -> + io:format(Dev, "\tb ", []), + pp_fun(Dev, Fun), + io:format(Dev, " # ~w\n", [Linkage]); + #comment{term=Term} -> + io:format(Dev, "\t# ~p\n", [Term]); + #jmp{src1=Src1, src2=Src2, labels=Labels} -> + io:format(Dev, "\tjmp [", []), + pp_temp(Dev, Src1), + io:format(Dev, " + ", []), + pp_src(Dev, Src2), + io:format(Dev, "]", []), + case Labels of + [] -> []; + _ -> + io:format(Dev, " #", []), + pp_labels(Dev, Labels, Pre) + end, + io:format(Dev, "\n", []); + #jmpl{src=Src, sdesc=SDesc} -> + io:format(Dev, "\tjmpl [", []), + pp_temp(Dev, Src), + io:format(Dev, " + 0], %o7 # ", []), + pp_sdesc(Dev, Pre, SDesc), + io:format(Dev, "\n", []); + #label{label=Label} -> + io:format(Dev, ".~s_~w:~n", [Pre, Label]); + #pseudo_bp{'cond'=Cond, true_label=TrueLab, false_label=FalseLab, pred=Pred} -> + io:format(Dev, "\tpseudo_b~w,~w .~s_~w # .~s_~w\n", + [cond_name(Cond), pred_name(Pred), Pre, TrueLab, Pre, FalseLab]); + %% #pseudo_br{} -> pp_pseudo_br(Dev, I, Pre); + #pseudo_call{funv=FunV, sdesc=SDesc, contlab=ContLab, linkage=Linkage} -> + io:format(Dev, "\tpseudo_call ", []), + pp_funv(Dev, FunV), + io:format(Dev, " # contlab .~s_~w", [Pre, ContLab]), + pp_sdesc(Dev, Pre, SDesc), + io:format(Dev, " ~w\n", [Linkage]); + #pseudo_call_prepare{nrstkargs=NrStkArgs} -> + SP = hipe_sparc_registers:reg_name_gpr(hipe_sparc_registers:stack_pointer()), + io:format(Dev, "\tsub ~s, ~w, ~s # pseudo_call_prepare\n", + [SP, 4*NrStkArgs, SP]); + #pseudo_move{src=Src, dst=Dst} -> + io:format(Dev, "\tpseudo_move ", []), + pp_temp(Dev, Src), + io:format(Dev, ", ", []), + pp_temp(Dev, Dst), + io:format(Dev, "\n", []); + #pseudo_ret{} -> + io:format(Dev, "\tpseudo_ret\n", []); + #pseudo_set{imm=Imm, dst=Dst} -> + io:format(Dev, "\tpseudo_set ", []), + pp_imm(Dev, Imm), + io:format(Dev, ", ", []), + pp_temp(Dev, Dst), + io:format(Dev, "\n", []); + #pseudo_tailcall{funv=FunV, arity=Arity, stkargs=StkArgs, linkage=Linkage} -> + io:format(Dev, "\tpseudo_tailcall ", []), + pp_funv(Dev, FunV), + io:format(Dev, "/~w (", [Arity]), + pp_args(Dev, StkArgs), + io:format(Dev, ") ~w\n", [Linkage]); + #pseudo_tailcall_prepare{} -> + io:format(Dev, "\tpseudo_tailcall_prepare\n", []); + #rdy{dst=Dst} -> + io:format(Dev, "\trd %y, ", []), + pp_temp(Dev, Dst), + io:format(Dev, "\n", []); + #sethi{dst=Dst, uimm22=#sparc_uimm22{value=Value}} -> + io:format(Dev, "\tsethi ", []), + pp_hex(Dev, Value), + io:format(Dev, ", ", []), + pp_temp(Dev, Dst), + io:format(Dev, "\n", []); + #store{stop=StOp, src=Src, base=Base, disp=Disp} -> + io:format(Dev, "\t~s ", [stop_name(StOp)]), + pp_temp(Dev, Src), + io:format(Dev, ", [", []), + pp_temp(Dev, Base), + io:format(Dev, " + ", []), + pp_src(Dev, Disp), + io:format(Dev, "]\n", []); + #fp_binary{fp_binop=FpBinOp, src1=Src1, src2=Src2, dst=Dst} -> + io:format(Dev, "\t~s ", [FpBinOp]), + pp_temp(Dev, Src1), + io:format(Dev, ", ", []), + pp_temp(Dev, Src2), + io:format(Dev, ", ", []), + pp_temp(Dev, Dst), + io:format(Dev, "\n", []); + #fp_unary{fp_unop=FpUnOp, src=Src, dst=Dst} -> + io:format(Dev, "\t~s ", [FpUnOp]), + pp_temp(Dev, Src), + io:format(Dev, ", ", []), + pp_temp(Dev, Dst), + io:format(Dev, "\n", []); + #pseudo_fload{base=Base, disp=Disp, dst=Dst, is_single=IsSingle} -> + io:format(Dev, "\t~s [", + [case IsSingle of + true -> 'ldf'; + _ -> 'pseudo_fload' end]), + pp_temp(Dev, Base), + io:format(Dev, " + ", []), + pp_simm13(Dev, Disp), + io:format(Dev, "], ", []), + pp_temp(Dev, Dst), + io:format(Dev, "\n", []); + #pseudo_fmove{src=Src, dst=Dst} -> + io:format(Dev, "\tpseudo_fmove ", []), + pp_temp(Dev, Src), + io:format(Dev, ", ", []), + pp_temp(Dev, Dst), + io:format(Dev, "\n", []); + #pseudo_fstore{src=Src, base=Base, disp=Disp} -> + io:format(Dev, "\tpseudo_fstore ", []), + pp_temp(Dev, Src), + io:format(Dev, ", [", []), + pp_temp(Dev, Base), + io:format(Dev, " + ", []), + pp_simm13(Dev, Disp), + io:format(Dev, "]\n", []); + _ -> + exit({?MODULE, pp_insn, I}) + end. + +-ifdef(notdef). % XXX: only for sparc64, alas +pp_br(Dev, I, Pre) -> + #br{rcond=RCond, src=Src, label=Label, pred=Pred} = I, + io:format(Dev, "\tbr~w,~w ", [rcond_name(RCond), pred_name(Pred)]), + pp_temp(Dev, Src), + io:format(Dev, ", .~s_~w\n", [Pre, Label]). + +pp_pseudo_br(Dev, I, Pre) -> + #pseudo_br{rcond=RCond, src=Src, true_label=TrueLab, false_label=FalseLab, pred=Pred} = I, + io:format(Dev, "\tpseudo_br~w,~w ", [rcond_name(RCond), pred_name(Pred)]), + pp_src(Dev, Src), + io:format(Dev, ", .~s_~w # .~s_~w\n", [Pre, TrueLab, Pre, FalseLab]). +-endif. + +to_hex(N) -> + io_lib:format("~.16x", [N, "0x"]). + +pp_sdesc(Dev, Pre, #sparc_sdesc{exnlab=ExnLab,fsize=FSize,arity=Arity,live=Live}) -> + pp_sdesc_exnlab(Dev, Pre, ExnLab), + io:format(Dev, " ~s ~w [", [to_hex(FSize), Arity]), + pp_sdesc_live(Dev, Live), + io:format(Dev, "]", []). + +pp_sdesc_exnlab(Dev, _, []) -> io:format(Dev, " []", []); +pp_sdesc_exnlab(Dev, Pre, ExnLab) -> io:format(Dev, " .~s_~w", [Pre, ExnLab]). + +pp_sdesc_live(_, {}) -> []; +pp_sdesc_live(Dev, Live) -> pp_sdesc_live(Dev, Live, 1). + +pp_sdesc_live(Dev, Live, I) -> + io:format(Dev, "~s", [to_hex(element(I, Live))]), + if I < tuple_size(Live) -> + io:format(Dev, ",", []), + pp_sdesc_live(Dev, Live, I+1); + true -> [] + end. + +pp_labels(Dev, [Label|Labels], Pre) -> + io:format(Dev, " .~s_~w", [Pre, Label]), + pp_labels(Dev, Labels, Pre); +pp_labels(_, [], _) -> + []. + +pp_fun(Dev, Fun) -> + case Fun of + #sparc_mfa{m=M, f=F, a=A} -> + io:format(Dev, "~w:~w/~w", [M, F, A]); + #sparc_prim{prim=Prim} -> + io:format(Dev, "~w", [Prim]) + end. + +pp_funv(Dev, FunV) -> + case FunV of + #sparc_temp{} -> + pp_temp(Dev, FunV); + Fun -> + pp_fun(Dev, Fun) + end. + +alu_op_name(Op) -> Op. + +aluop_is_ldop(AluOp) -> + case AluOp of + 'ldsb' -> true; + 'ldsh' -> true; + 'ldsw' -> true; + 'ldub' -> true; + 'lduh' -> true; + 'lduw' -> true; + 'ldx' -> true; + _ -> false + end. + +cond_name(Cond) -> Cond. +%%rcond_name(RCond) -> RCond. + +pred_name(Pred) -> + if Pred >= 0.5 -> 'pt'; + true -> 'pn' + end. + +stop_name(StOp) -> StOp. + +pp_temp(Dev, Temp=#sparc_temp{reg=Reg, type=Type}) -> + case hipe_sparc:temp_is_precoloured(Temp) of + true -> + Name = + case Type of + double -> hipe_sparc_registers:reg_name_fpr(Reg); + _ -> hipe_sparc_registers:reg_name_gpr(Reg) + end, + io:format(Dev, "~s", [Name]); + false -> + Tag = + case Type of + double -> "f"; + tagged -> "t"; + untagged -> "u" + end, + io:format(Dev, "~s~w", [Tag, Reg]) + end. + +pp_hex(Dev, Value) -> io:format(Dev, "~s", [to_hex(Value)]). +pp_simm13(Dev, #sparc_simm13{value=Value}) -> pp_hex(Dev, Value). +pp_uimm5(Dev, #sparc_uimm5{value=Value}) -> pp_hex(Dev, Value). + +pp_imm(Dev, Value) -> + if is_integer(Value) -> pp_hex(Dev, Value); + true -> io:format(Dev, "~w", [Value]) + end. + +pp_src(Dev, Src) -> + case Src of + #sparc_temp{} -> + pp_temp(Dev, Src); + #sparc_simm13{} -> + pp_simm13(Dev, Src); + #sparc_uimm5{} -> % XXX: sparc64: uimm6 + pp_uimm5(Dev, Src) + end. + +pp_arg(Dev, Arg) -> + case Arg of + #sparc_temp{} -> + pp_temp(Dev, Arg); + _ -> + pp_hex(Dev, Arg) + end. + +pp_args(Dev, [A|As]) -> + pp_arg(Dev, A), + pp_comma_args(Dev, As); +pp_args(_, []) -> + []. + +pp_comma_args(Dev, [A|As]) -> + io:format(Dev, ", ", []), + pp_arg(Dev, A), + pp_comma_args(Dev, As); +pp_comma_args(_, []) -> + []. diff --git a/lib/hipe/sparc/hipe_sparc_ra.erl b/lib/hipe/sparc/hipe_sparc_ra.erl new file mode 100644 index 0000000000..40360e97fe --- /dev/null +++ b/lib/hipe/sparc/hipe_sparc_ra.erl @@ -0,0 +1,56 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2001-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +-module(hipe_sparc_ra). +-export([ra/2]). + +ra(Defun0, Options) -> + %% hipe_sparc_pp:pp(Defun0), + {Defun1, Coloring_fp, SpillIndex} + = case proplists:get_bool(inline_fp, Options) of + true -> + hipe_regalloc_loop:ra_fp(Defun0, Options, + hipe_coalescing_regalloc, + hipe_sparc_specific_fp); + false -> + {Defun0,[],0} + end, + %% hipe_sparc_pp:pp(Defun1), + {Defun2, Coloring} + = case proplists:get_value(regalloc, Options, coalescing) of + coalescing -> + ra(Defun1, SpillIndex, Options, hipe_coalescing_regalloc); + optimistic -> + ra(Defun1, SpillIndex, Options, hipe_optimistic_regalloc); + graph_color -> + ra(Defun1, SpillIndex, Options, hipe_graph_coloring_regalloc); + linear_scan -> + hipe_sparc_ra_ls:ra(Defun1, SpillIndex, Options); + naive -> + hipe_sparc_ra_naive:ra(Defun1, Coloring_fp, Options); + _ -> + exit({unknown_regalloc_compiler_option, + proplists:get_value(regalloc,Options)}) + end, + %% hipe_sparc_pp:pp(Defun2), + hipe_sparc_ra_finalise:finalise(Defun2, Coloring, Coloring_fp). + +ra(Defun, SpillIndex, Options, RegAllocMod) -> + hipe_regalloc_loop:ra(Defun, SpillIndex, Options, RegAllocMod, hipe_sparc_specific). diff --git a/lib/hipe/sparc/hipe_sparc_ra_finalise.erl b/lib/hipe/sparc/hipe_sparc_ra_finalise.erl new file mode 100644 index 0000000000..3403636118 --- /dev/null +++ b/lib/hipe/sparc/hipe_sparc_ra_finalise.erl @@ -0,0 +1,254 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2008-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +-module(hipe_sparc_ra_finalise). +-export([finalise/3]). +-include("hipe_sparc.hrl"). + +finalise(Defun, TempMap, FPMap0) -> + Code = hipe_sparc:defun_code(Defun), + {_, SpillLimit} = hipe_sparc:defun_var_range(Defun), + Map = mk_ra_map(TempMap, SpillLimit), + FPMap1 = mk_ra_map_fp(FPMap0, SpillLimit), + NewCode = ra_code(Code, Map, FPMap1, []), + Defun#defun{code=NewCode}. + +ra_code([I|Insns], Map, FPMap, Accum) -> + ra_code(Insns, Map, FPMap, [ra_insn(I, Map, FPMap) | Accum]); +ra_code([], _Map, _FPMap, Accum) -> + lists:reverse(Accum). + +ra_insn(I, Map, FPMap) -> + case I of + #alu{} -> ra_alu(I, Map); + #jmp{} -> ra_jmp(I, Map); + %% #pseudo_br{} -> ra_pseudo_br(I, Map); + #pseudo_call{} -> ra_pseudo_call(I, Map); + #pseudo_move{} -> ra_pseudo_move(I, Map); + #pseudo_set{} -> ra_pseudo_set(I, Map); + #pseudo_tailcall{} -> ra_pseudo_tailcall(I, Map); + #rdy{} -> ra_rdy(I, Map); + #sethi{} -> ra_sethi(I, Map); + #store{} -> ra_store(I, Map); + #fp_binary{} -> ra_fp_binary(I, FPMap); + #fp_unary{} -> ra_fp_unary(I, FPMap); + #pseudo_fload{} -> ra_pseudo_fload(I, Map, FPMap); + #pseudo_fmove{} -> ra_pseudo_fmove(I, FPMap); + #pseudo_fstore{} -> ra_pseudo_fstore(I, Map, FPMap); + _ -> I + end. + +ra_alu(I=#alu{src1=Src1,src2=Src2,dst=Dst}, Map) -> + NewSrc1 = ra_temp(Src1, Map), + NewSrc2 = ra_src(Src2, Map), + NewDst = ra_temp(Dst, Map), + I#alu{src1=NewSrc1,src2=NewSrc2,dst=NewDst}. + +ra_jmp(I=#jmp{src1=Src1,src2=Src2}, Map) -> + NewSrc1 = ra_temp(Src1, Map), + NewSrc2 = ra_src(Src2, Map), + I#jmp{src1=NewSrc1,src2=NewSrc2}. + +-ifdef(notdef). % XXX: only for sparc64, alas +ra_pseudo_br(I=#pseudo_br{src=Src}, Map) -> + NewSrc = ra_temp(Src, Map), + I#pseudo_br{src=NewSrc}. +-endif. + +ra_pseudo_call(I=#pseudo_call{funv=FunV}, Map) -> + NewFunV = ra_funv(FunV, Map), + I#pseudo_call{funv=NewFunV}. + +ra_pseudo_move(I=#pseudo_move{src=Src,dst=Dst}, Map) -> + NewSrc = ra_temp(Src, Map), + NewDst = ra_temp(Dst, Map), + I#pseudo_move{src=NewSrc,dst=NewDst}. + +ra_pseudo_set(I=#pseudo_set{dst=Dst}, Map) -> + NewDst = ra_temp(Dst, Map), + I#pseudo_set{dst=NewDst}. + +ra_pseudo_tailcall(I=#pseudo_tailcall{funv=FunV,stkargs=StkArgs}, Map) -> + NewFunV = ra_funv(FunV, Map), + NewStkArgs = ra_args(StkArgs, Map), + I#pseudo_tailcall{funv=NewFunV,stkargs=NewStkArgs}. + +ra_rdy(I=#rdy{dst=Dst}, Map) -> + NewDst = ra_temp(Dst, Map), + I#rdy{dst=NewDst}. + +ra_sethi(I=#sethi{dst=Dst}, Map) -> + NewDst = ra_temp(Dst, Map), + I#sethi{dst=NewDst}. + +ra_store(I=#store{src=Src,base=Base,disp=Disp}, Map) -> + NewSrc = ra_temp(Src, Map), + NewBase = ra_temp(Base, Map), + NewDisp = ra_src(Disp, Map), + I#store{src=NewSrc,base=NewBase,disp=NewDisp}. + +ra_fp_binary(I=#fp_binary{src1=Src1,src2=Src2,dst=Dst}, FPMap) -> + NewSrc1 = ra_temp_fp(Src1, FPMap), + NewSrc2 = ra_temp_fp(Src2, FPMap), + NewDst = ra_temp_fp(Dst, FPMap), + I#fp_binary{src1=NewSrc1,src2=NewSrc2,dst=NewDst}. + +ra_fp_unary(I=#fp_unary{src=Src,dst=Dst}, FPMap) -> + NewSrc = ra_temp_fp(Src, FPMap), + NewDst = ra_temp_fp(Dst, FPMap), + I#fp_unary{src=NewSrc,dst=NewDst}. + +ra_pseudo_fload(I=#pseudo_fload{base=Base,dst=Dst}, Map, FPMap) -> + NewBase = ra_temp(Base, Map), + NewDst = ra_temp_fp(Dst, FPMap), + I#pseudo_fload{base=NewBase,dst=NewDst}. + +ra_pseudo_fmove(I=#pseudo_fmove{src=Src,dst=Dst}, FPMap) -> + NewSrc = ra_temp_fp(Src, FPMap), + NewDst = ra_temp_fp(Dst, FPMap), + I#pseudo_fmove{src=NewSrc,dst=NewDst}. + +ra_pseudo_fstore(I=#pseudo_fstore{src=Src,base=Base}, Map, FPMap) -> + NewSrc = ra_temp_fp(Src, FPMap), + NewBase = ra_temp(Base, Map), + I#pseudo_fstore{src=NewSrc,base=NewBase}. + +%%% Tailcall stack arguments. + +ra_args([Arg|Args], Map) -> + [ra_temp_or_imm(Arg, Map) | ra_args(Args, Map)]; +ra_args([], _) -> + []. + +ra_temp_or_imm(Arg, Map) -> + case hipe_sparc:is_temp(Arg) of + true -> + ra_temp(Arg, Map); + false -> + Arg + end. + +%%% FunV, Src, and Temp operands. + +ra_funv(FunV, Map) -> + case FunV of + #sparc_temp{} -> ra_temp(FunV, Map); + _ -> FunV + end. + +ra_src(Src, Map) -> + case Src of + #sparc_temp{} -> ra_temp(Src, Map); + _ -> Src + end. + +ra_temp_fp(Temp, FPMap) -> + Reg = hipe_sparc:temp_reg(Temp), + double = hipe_sparc:temp_type(Temp), + case hipe_sparc_registers:is_precoloured_fpr(Reg) of + true -> Temp; + _ -> ra_temp_common(Reg, Temp, FPMap) + end. + +ra_temp(Temp, Map) -> + Reg = hipe_sparc:temp_reg(Temp), + case hipe_sparc:temp_type(Temp) of + 'double' -> + exit({?MODULE,ra_temp,Temp}); + _ -> + case hipe_sparc_registers:is_precoloured_gpr(Reg) of + true -> Temp; + _ -> ra_temp_common(Reg, Temp, Map) + end + end. + +ra_temp_common(Reg, Temp, Map) -> + case gb_trees:lookup(Reg, Map) of + {value, NewReg} -> Temp#sparc_temp{reg=NewReg}; + _ -> Temp + end. + +mk_ra_map(TempMap, SpillLimit) -> + %% Build a partial map from pseudo to reg or spill. + %% Spills are represented as pseudos with indices above SpillLimit. + %% (I'd prefer to use negative indices, but that breaks + %% hipe_sparc_registers:is_precoloured/1.) + %% The frame mapping proper is unchanged, since spills look just like + %% ordinary (un-allocated) pseudos. + lists:foldl(fun(MapLet, Map) -> + {Key,Val} = conv_ra_maplet(MapLet, SpillLimit, is_precoloured_gpr), + gb_trees:insert(Key, Val, Map) + end, + gb_trees:empty(), + TempMap). + +conv_ra_maplet(MapLet = {From,To}, SpillLimit, IsPrecoloured) -> + %% From should be a pseudo, or a hard reg mapped to itself. + if is_integer(From), From =< SpillLimit -> + case hipe_sparc_registers:IsPrecoloured(From) of + false -> []; + _ -> + case To of + {reg, From} -> []; + _ -> exit({?MODULE,conv_ra_maplet,MapLet}) + end + end; + true -> exit({?MODULE,conv_ra_maplet,MapLet}) + end, + %% end of From check + case To of + {reg, NewReg} -> + %% NewReg should be a hard reg, or a pseudo mapped + %% to itself (formals are handled this way). + if is_integer(NewReg) -> + case hipe_sparc_registers:IsPrecoloured(NewReg) of + true -> []; + _ -> if From =:= NewReg -> []; + true -> + exit({?MODULE,conv_ra_maplet,MapLet}) + end + end; + true -> exit({?MODULE,conv_ra_maplet,MapLet}) + end, + %% end of NewReg check + {From, NewReg}; + {spill, SpillIndex} -> + %% SpillIndex should be >= 0. + if is_integer(SpillIndex), SpillIndex >= 0 -> []; + true -> exit({?MODULE,conv_ra_maplet,MapLet}) + end, + %% end of SpillIndex check + ToTempNum = SpillLimit+SpillIndex+1, + MaxTempNum = hipe_gensym:get_var(sparc), + if MaxTempNum >= ToTempNum -> ok; + true -> hipe_gensym:set_var(sparc, ToTempNum) + end, + {From, ToTempNum}; + _ -> exit({?MODULE,conv_ra_maplet,MapLet}) + end. + +mk_ra_map_fp(FPMap, SpillLimit) -> + lists:foldl(fun(MapLet, Map) -> + {Key,Val} = conv_ra_maplet(MapLet, SpillLimit, + is_precoloured_fpr), + gb_trees:insert(Key, Val, Map) + end, + gb_trees:empty(), + FPMap). diff --git a/lib/hipe/sparc/hipe_sparc_ra_ls.erl b/lib/hipe/sparc/hipe_sparc_ra_ls.erl new file mode 100644 index 0000000000..cdb15e738c --- /dev/null +++ b/lib/hipe/sparc/hipe_sparc_ra_ls.erl @@ -0,0 +1,56 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2008-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%% Linear Scan register allocator for SPARC + +-module(hipe_sparc_ra_ls). +-export([ra/3]). + +ra(Defun, SpillIndex, Options) -> + NewDefun = Defun, %% hipe_${ARCH}_ra_rename:rename(Defun,Options), + CFG = hipe_sparc_cfg:init(NewDefun), + SpillLimit = hipe_sparc_specific:number_of_temporaries(CFG), + alloc(NewDefun, SpillIndex, SpillLimit, Options). + +alloc(Defun, SpillIndex, SpillLimit, Options) -> + CFG = hipe_sparc_cfg:init(Defun), + {Coloring, _NewSpillIndex} = + regalloc( + CFG, + hipe_sparc_registers:allocatable_gpr()-- + [hipe_sparc_registers:temp3(), + hipe_sparc_registers:temp2(), + hipe_sparc_registers:temp1()], + [hipe_sparc_cfg:start_label(CFG)], + SpillIndex, SpillLimit, Options, + hipe_sparc_specific), + {NewDefun, _DidSpill} = + hipe_sparc_ra_postconditions:check_and_rewrite( + Defun, Coloring, 'linearscan'), + TempMap = hipe_temp_map:cols2tuple(Coloring, hipe_sparc_specific), + {TempMap2,_NewSpillIndex2} = + hipe_spillmin:stackalloc(CFG, [], SpillIndex, Options, + hipe_sparc_specific, TempMap), + Coloring2 = + hipe_spillmin:mapmerge(hipe_temp_map:to_substlist(TempMap), TempMap2), + {NewDefun, Coloring2}. + +regalloc(CFG, PhysRegs, Entrypoints, SpillIndex, DontSpill, Options, Target) -> + hipe_ls_regalloc:regalloc( + CFG, PhysRegs, Entrypoints, SpillIndex, DontSpill, Options, Target). diff --git a/lib/hipe/sparc/hipe_sparc_ra_naive.erl b/lib/hipe/sparc/hipe_sparc_ra_naive.erl new file mode 100644 index 0000000000..8c378c4850 --- /dev/null +++ b/lib/hipe/sparc/hipe_sparc_ra_naive.erl @@ -0,0 +1,29 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2005-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +-module(hipe_sparc_ra_naive). +-export([ra/3]). + +-include("hipe_sparc.hrl"). + +ra(Defun, _Coloring_fp, _Options) -> % -> {Defun, Coloring} + {NewDefun,_DidSpill} = + hipe_sparc_ra_postconditions:check_and_rewrite2(Defun, [], 'naive'), + {NewDefun, []}. diff --git a/lib/hipe/sparc/hipe_sparc_ra_postconditions.erl b/lib/hipe/sparc/hipe_sparc_ra_postconditions.erl new file mode 100644 index 0000000000..f7fdae0491 --- /dev/null +++ b/lib/hipe/sparc/hipe_sparc_ra_postconditions.erl @@ -0,0 +1,222 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2002-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +-module(hipe_sparc_ra_postconditions). + +-export([check_and_rewrite/3, check_and_rewrite2/3]). + +-include("hipe_sparc.hrl"). + +check_and_rewrite(Defun, Coloring, Allocator) -> + TempMap = hipe_temp_map:cols2tuple(Coloring, hipe_sparc_specific), + check_and_rewrite2(Defun, TempMap, Allocator). + +check_and_rewrite2(Defun, TempMap, Allocator) -> + Strategy = strategy(Allocator), + #defun{code=Code0} = Defun, + {Code1,DidSpill} = do_insns(Code0, TempMap, Strategy, [], false), + VarRange = {0, hipe_gensym:get_var(sparc)}, + {Defun#defun{code=Code1, var_range=VarRange}, + DidSpill}. + +strategy(Allocator) -> + case Allocator of + 'normal' -> 'new'; + 'linearscan' -> 'fixed'; + 'naive' -> 'fixed' + end. + +do_insns([I|Insns], TempMap, Strategy, Accum, DidSpill0) -> + {NewIs, DidSpill1} = do_insn(I, TempMap, Strategy), + do_insns(Insns, TempMap, Strategy, lists:reverse(NewIs, Accum), DidSpill0 or DidSpill1); +do_insns([], _TempMap, _Strategy, Accum, DidSpill) -> + {lists:reverse(Accum), DidSpill}. + +do_insn(I, TempMap, Strategy) -> + case I of + #alu{} -> do_alu(I, TempMap, Strategy); + #jmp{} -> do_jmp(I, TempMap, Strategy); + %% #pseudo_br{} -> do_pseudo_br(I, TempMap, Strategy); + #pseudo_call{} -> do_pseudo_call(I, TempMap, Strategy); + #pseudo_move{} -> do_pseudo_move(I, TempMap, Strategy); + #pseudo_set{} -> do_pseudo_set(I, TempMap, Strategy); + #pseudo_tailcall{} -> do_pseudo_tailcall(I, TempMap, Strategy); + #rdy{} -> do_rdy(I, TempMap, Strategy); + #sethi{} -> do_sethi(I, TempMap, Strategy); + #store{} -> do_store(I, TempMap, Strategy); + #pseudo_fload{} -> do_pseudo_fload(I, TempMap, Strategy); + #pseudo_fstore{} -> do_pseudo_fstore(I, TempMap, Strategy); + _ -> {[I], false} + end. + +%%% Fix relevant instruction types. + +do_alu(I=#alu{dst=Dst,src1=Src1,src2=Src2}, TempMap, Strategy) -> + {FixDst,NewDst,DidSpill1} = fix_dst(Dst, TempMap, Strategy), + {FixSrc1,NewSrc1,DidSpill2} = fix_src1(Src1, TempMap, Strategy), + {FixSrc2,NewSrc2,DidSpill3} = fix_src2_or_imm(Src2, TempMap, Strategy), + NewI = I#alu{dst=NewDst,src1=NewSrc1,src2=NewSrc2}, + {FixSrc1 ++ FixSrc2 ++ [NewI | FixDst], DidSpill1 or DidSpill2 or DidSpill3}. + +do_jmp(I=#jmp{src1=Src1,src2=Src2}, TempMap, Strategy) -> + {FixSrc1,NewSrc1,DidSpill1} = fix_src1(Src1, TempMap, Strategy), + {FixSrc2,NewSrc2,DidSpill2} = fix_src2_or_imm(Src2, TempMap, Strategy), + NewI = I#jmp{src1=NewSrc1,src2=NewSrc2}, + {FixSrc1 ++ FixSrc2 ++ [NewI], DidSpill1 or DidSpill2}. + +-ifdef(notdef). % XXX: only for sparc64, alas +do_pseudo_br(I=#pseudo_br{src=Src}, TempMap, Strategy) -> + {FixSrc,NewSrc,DidSpill} = fix_src1(Src, TempMap, Strategy), + NewI = I#pseudo_br{src=NewSrc}, + {FixSrc ++ [NewI], DidSpill}. +-endif. + +do_pseudo_call(I=#pseudo_call{funv=FunV}, TempMap, Strategy) -> + {FixFunV,NewFunV,DidSpill} = fix_funv(FunV, TempMap, Strategy), + NewI = I#pseudo_call{funv=NewFunV}, + {FixFunV ++ [NewI], DidSpill}. + +do_pseudo_move(I=#pseudo_move{src=Src,dst=Dst}, TempMap, Strategy) -> + %% Either Dst or Src (but not both) may be a pseudo temp. + %% pseudo_move is a special case: in [XXX: not pseudo_tailcall] + %% all other instructions, all temps must be non-pseudos + %% after register allocation. + case temp_is_spilled(Dst, TempMap) of + true -> % Src must not be a pseudo + {FixSrc,NewSrc,DidSpill} = fix_src1(Src, TempMap, Strategy), + NewI = I#pseudo_move{src=NewSrc}, + {FixSrc ++ [NewI], DidSpill}; + _ -> + {[I], false} + end. + +do_pseudo_set(I=#pseudo_set{dst=Dst}, TempMap, Strategy) -> + {FixDst,NewDst,DidSpill} = fix_dst(Dst, TempMap, Strategy), + NewI = I#pseudo_set{dst=NewDst}, + {[NewI | FixDst], DidSpill}. + +do_pseudo_tailcall(I=#pseudo_tailcall{funv=FunV}, TempMap, Strategy) -> + {FixFunV,NewFunV,DidSpill} = fix_funv(FunV, TempMap, Strategy), + NewI = I#pseudo_tailcall{funv=NewFunV}, + {FixFunV ++ [NewI], DidSpill}. + +do_rdy(I=#rdy{dst=Dst}, TempMap, Strategy) -> + {FixDst,NewDst,DidSpill} = fix_dst(Dst, TempMap, Strategy), + NewI = I#rdy{dst=NewDst}, + {[NewI | FixDst], DidSpill}. + +do_sethi(I=#sethi{dst=Dst}, TempMap, Strategy) -> + {FixDst,NewDst,DidSpill} = fix_dst(Dst, TempMap, Strategy), + NewI = I#sethi{dst=NewDst}, + {[NewI | FixDst], DidSpill}. + +do_store(I=#store{src=Src,base=Base,disp=Disp}, TempMap, Strategy) -> + {FixSrc,NewSrc,DidSpill1} = fix_src1(Src, TempMap, Strategy), + {FixBase,NewBase,DidSpill2} = fix_src2(Base, TempMap, Strategy), + {FixDisp,NewDisp,DidSpill3} = fix_src3_or_imm(Disp, TempMap, Strategy), + NewI = I#store{src=NewSrc,base=NewBase,disp=NewDisp}, + {FixSrc ++ FixBase ++ FixDisp ++ [NewI], DidSpill1 or DidSpill2 or DidSpill3}. + +do_pseudo_fload(I=#pseudo_fload{base=Base}, TempMap, Strategy) -> + {FixBase,NewBase,DidSpill} = fix_src1(Base, TempMap, Strategy), + NewI = I#pseudo_fload{base=NewBase}, + {FixBase ++ [NewI], DidSpill}. + +do_pseudo_fstore(I=#pseudo_fstore{base=Base}, TempMap, Strategy) -> + {FixBase,NewBase,DidSpill} = fix_src1(Base, TempMap, Strategy), + NewI = I#pseudo_fstore{base=NewBase}, + {FixBase ++ [NewI], DidSpill}. + +%%% Fix Dst and Src operands. + +fix_funv(FunV, TempMap, Strategy) -> + case FunV of + #sparc_temp{} -> fix_src3(FunV, TempMap, Strategy); + _ -> {[], FunV, false} + end. + +fix_src2_or_imm(Src2, TempMap, Strategy) -> + case Src2 of + #sparc_temp{} -> fix_src2(Src2, TempMap, Strategy); + _ -> {[], Src2, false} + end. + +fix_src3_or_imm(Src3, TempMap, Strategy) -> + case Src3 of + #sparc_temp{} -> fix_src3(Src3, TempMap, Strategy); + _ -> {[], Src3, false} + end. + +fix_src1(Src, TempMap, Strategy) -> + fix_src(Src, TempMap, temp1(Strategy)). + +temp1('new') -> []; +temp1('fixed') -> hipe_sparc_registers:temp1(). + +fix_src2(Src, TempMap, Strategy) -> + fix_src(Src, TempMap, temp2(Strategy)). + +temp2('new') -> []; +temp2('fixed') -> hipe_sparc_registers:temp2(). + +fix_src3(Src, TempMap, Strategy) -> + fix_src(Src, TempMap, temp3(Strategy)). + +temp3('new') -> []; +temp3('fixed') -> hipe_sparc_registers:temp3(). + +fix_src(Src, TempMap, RegOpt) -> + case temp_is_spilled(Src, TempMap) of + true -> + NewSrc = clone(Src, RegOpt), + {[hipe_sparc:mk_pseudo_move(Src, NewSrc)], NewSrc, true}; + _ -> + {[], Src, false} + end. + +fix_dst(Dst, TempMap, Strategy) -> + case temp_is_spilled(Dst, TempMap) of + true -> + NewDst = clone(Dst, temp1(Strategy)), + {[hipe_sparc:mk_pseudo_move(NewDst, Dst)], NewDst, true}; + _ -> + {[], Dst, false} + end. + +%%% Check if an operand is a pseudo-temp. + +temp_is_spilled(Temp, []) -> % special case for naive regalloc + not(hipe_sparc:temp_is_precoloured(Temp)); +temp_is_spilled(Temp, TempMap) -> + case hipe_sparc:temp_is_allocatable(Temp) of + true -> + Reg = hipe_sparc:temp_reg(Temp), + tuple_size(TempMap) > Reg andalso hipe_temp_map:is_spilled(Reg, TempMap); + false -> true + end. + +%%% Make a certain reg into a clone of Temp. + +clone(Temp, RegOpt) -> + Type = hipe_sparc:temp_type(Temp), + case RegOpt of + [] -> hipe_sparc:mk_new_temp(Type); + Reg -> hipe_sparc:mk_temp(Reg, Type) + end. diff --git a/lib/hipe/sparc/hipe_sparc_ra_postconditions_fp.erl b/lib/hipe/sparc/hipe_sparc_ra_postconditions_fp.erl new file mode 100644 index 0000000000..17dc0f88d5 --- /dev/null +++ b/lib/hipe/sparc/hipe_sparc_ra_postconditions_fp.erl @@ -0,0 +1,120 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2008-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +-module(hipe_sparc_ra_postconditions_fp). + +-export([check_and_rewrite/2]). + +-include("hipe_sparc.hrl"). + +check_and_rewrite(Defun, Coloring) -> + TempMap = hipe_temp_map:cols2tuple(Coloring, hipe_sparc_specific_fp), + #defun{code=Code0} = Defun, + {Code1,DidSpill} = do_insns(Code0, TempMap, [], false), + VarRange = {0, hipe_gensym:get_var(sparc)}, + {Defun#defun{code=Code1, var_range=VarRange}, + DidSpill}. + +do_insns([I|Insns], TempMap, Accum, DidSpill0) -> + {NewIs, DidSpill1} = do_insn(I, TempMap), + do_insns(Insns, TempMap, lists:reverse(NewIs, Accum), DidSpill0 or DidSpill1); +do_insns([], _TempMap, Accum, DidSpill) -> + {lists:reverse(Accum), DidSpill}. + +do_insn(I, TempMap) -> + case I of + #fp_binary{} -> do_fp_binary(I, TempMap); + #fp_unary{} -> do_fp_unary(I, TempMap); + #pseudo_fload{} -> do_pseudo_fload(I, TempMap); + #pseudo_fmove{} -> do_pseudo_fmove(I, TempMap); + #pseudo_fstore{} -> do_pseudo_fstore(I, TempMap); + _ -> {[I], false} + end. + +%%% Fix relevant instruction types. + +do_fp_binary(I=#fp_binary{src1=Src1,src2=Src2,dst=Dst}, TempMap) -> + {FixSrc1,NewSrc1,DidSpill1} = fix_src(Src1, TempMap), + {FixSrc2,NewSrc2,DidSpill2} = fix_src(Src2, TempMap), + {FixDst,NewDst,DidSpill3} = fix_dst(Dst, TempMap), + NewI = I#fp_binary{src1=NewSrc1,src2=NewSrc2,dst=NewDst}, + {FixSrc1 ++ FixSrc2 ++ [NewI | FixDst], DidSpill1 or DidSpill2 or DidSpill3}. + +do_fp_unary(I=#fp_unary{src=Src,dst=Dst}, TempMap) -> + {FixSrc,NewSrc,DidSpill1} = fix_src(Src, TempMap), + {FixDst,NewDst,DidSpill2} = fix_dst(Dst, TempMap), + NewI = I#fp_unary{src=NewSrc,dst=NewDst}, + {FixSrc ++ [NewI | FixDst], DidSpill1 or DidSpill2}. + +do_pseudo_fload(I=#pseudo_fload{dst=Dst}, TempMap) -> + {FixDst,NewDst,DidSpill} = fix_dst(Dst, TempMap), + NewI = I#pseudo_fload{dst=NewDst}, + {[NewI | FixDst], DidSpill}. + +do_pseudo_fmove(I=#pseudo_fmove{src=Src,dst=Dst}, TempMap) -> + case temp_is_spilled(Dst, TempMap) of + true -> + {FixSrc,NewSrc,DidSpill} = fix_src(Src, TempMap), + NewI = I#pseudo_fmove{src=NewSrc}, + {FixSrc ++ [NewI], DidSpill}; + _ -> + {[I], false} + end. + +do_pseudo_fstore(I=#pseudo_fstore{src=Src}, TempMap) -> + {FixSrc,NewSrc,DidSpill} = fix_src(Src, TempMap), + NewI = I#pseudo_fstore{src=NewSrc}, + {FixSrc ++ [NewI], DidSpill}. + +%%% Fix Dst and Src operands. + +fix_src(Src, TempMap) -> + case temp_is_spilled(Src, TempMap) of + true -> + NewSrc = clone(Src), + {[hipe_sparc:mk_pseudo_fmove(Src, NewSrc)], NewSrc, true}; + _ -> + {[], Src, false} + end. + +fix_dst(Dst, TempMap) -> + case temp_is_spilled(Dst, TempMap) of + true -> + NewDst = clone(Dst), + {[hipe_sparc:mk_pseudo_fmove(NewDst, Dst)], NewDst, true}; + _ -> + {[], Dst, false} + end. + +%%% Check if an operand is a pseudo-temp. + +temp_is_spilled(Temp, TempMap) -> + case hipe_sparc:temp_is_allocatable(Temp) of + true -> + Reg = hipe_sparc:temp_reg(Temp), + tuple_size(TempMap) > Reg andalso hipe_temp_map:is_spilled(Reg, TempMap); + false -> true + end. + +%%% Create a new temp with the same type as an old one. + +clone(Temp) -> + Type = hipe_sparc:temp_type(Temp), % XXX: always double? + hipe_sparc:mk_new_temp(Type). diff --git a/lib/hipe/sparc/hipe_sparc_registers.erl b/lib/hipe/sparc/hipe_sparc_registers.erl new file mode 100644 index 0000000000..adb01a65ca --- /dev/null +++ b/lib/hipe/sparc/hipe_sparc_registers.erl @@ -0,0 +1,291 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2001-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +-module(hipe_sparc_registers). + +-export([reg_name_gpr/1, + reg_name_fpr/1, + first_virtual/0, + is_precoloured_gpr/1, + is_precoloured_fpr/1, + all_precoloured/0, % for coalescing ra + return_value/0, + temp1/0, + temp2/0, + temp3/0, + heap_pointer/0, + stack_pointer/0, + proc_pointer/0, + return_address/0, + g0/0, + %% heap_limit/0, + %% fcalls/0, + allocatable_gpr/0, % for coalescing ra + allocatable_fpr/0, + is_fixed/1, % for graph_coloring ra + nr_args/0, + arg/1, + args/1, + is_arg/1, % for linear_scan ra + call_clobbered/0, + tailcall_clobbered/0, + live_at_return/0 + ]). + +-include("../rtl/hipe_literals.hrl"). + +-define(G0, 0). +-define(G1, 1). +-define(G2, 2). +-define(G3, 3). +-define(G4, 4). +-define(G5, 5). +-define(G6, 6). +-define(G7, 7). +-define(O0, 8). +-define(O1, 9). +-define(O2, 10). +-define(O3, 11). +-define(O4, 12). +-define(O5, 13). +-define(O6, 14). +-define(O7, 15). +-define(L0, 16). +-define(L1, 17). +-define(L2, 18). +-define(L3, 19). +-define(L4, 20). +-define(L5, 21). +-define(L6, 22). +-define(L7, 23). +-define(I0, 24). +-define(I1, 25). +-define(I2, 26). +-define(I3, 27). +-define(I4, 28). +-define(I5, 29). +-define(I6, 30). +-define(I7, 31). +-define(LAST_PRECOLOURED,31). % must handle both GRP and FPR ranges + +-define(ARG0, ?O1). +-define(ARG1, ?O2). +-define(ARG2, ?O3). +-define(ARG3, ?O4). +-define(ARG4, ?O5). +-define(ARG5, ?O0). + +-define(TEMP1, ?I3). % stores RA around inc_stack calls, must be C calleE-save +-define(TEMP2, ?I4). +-define(TEMP3, ?I5). + +-define(RETURN_VALUE, ?O0). +-define(HEAP_POINTER, ?I2). +-define(STACK_POINTER, ?I1). +-define(PROC_POINTER, ?I0). + +reg_name_gpr(R) -> + case R of + ?G0 -> "%g0"; + ?G1 -> "%g1"; + ?G2 -> "%g2"; + ?G3 -> "%g3"; + ?G4 -> "%g4"; + ?G5 -> "%g5"; + ?G6 -> "%g6"; + ?G7 -> "%g7"; + ?O0 -> "%o0"; + ?O1 -> "%o1"; + ?O2 -> "%o2"; + ?O3 -> "%o3"; + ?O4 -> "%o4"; + ?O5 -> "%o5"; + ?O6 -> "%sp"; + ?O7 -> "%o7"; + ?L0 -> "%l0"; + ?L1 -> "%l1"; + ?L2 -> "%l2"; + ?L3 -> "%l3"; + ?L4 -> "%l4"; + ?L5 -> "%l5"; + ?L6 -> "%l6"; + ?L7 -> "%l7"; + ?I0 -> "%i0"; + ?I1 -> "%i1"; + ?I2 -> "%i2"; + ?I3 -> "%i3"; + ?I4 -> "%i4"; + ?I5 -> "%i5"; + ?I6 -> "%fp"; + ?I7 -> "%i7"; + %% to handle code before regalloc: + _ -> "%r" ++ integer_to_list(R) + end. + +reg_name_fpr(R) -> [$f | integer_to_list(2*R)]. + +%%% Must handle both GPR and FPR ranges. +first_virtual() -> ?LAST_PRECOLOURED + 1. + +%%% These two tests have the same implementation, but that's +%%% not something we should cast in stone in the interface. +is_precoloured_gpr(R) -> R =< ?LAST_PRECOLOURED. +is_precoloured_fpr(R) -> R =< ?LAST_PRECOLOURED. + +all_precoloured() -> + %% <%g6, %g7, %o6, %i6> should be skipped as they are unused. + %% Unfortunately, gaps in the list of precoloured registers + %% cause the graph_color register allocator to create bogus + %% assignments for those "registers", which in turn causes + %% the "precoloured reg must map to itself" sanity check in + %% the frame module to signal errors. + [?G0, ?G1, ?G2, ?G3, ?G4, ?G5, ?G6, ?G7, + ?O0, ?O1, ?O2, ?O3, ?O4, ?O5, ?O6, ?O7, + ?L0, ?L1, ?L2, ?L3, ?L4, ?L5, ?L6, ?L7, + ?I0, ?I1, ?I2, ?I3, ?I4, ?I5, ?I6, ?I7]. + +return_value() -> ?RETURN_VALUE. + +temp1() -> ?TEMP1. +temp2() -> ?TEMP2. +temp3() -> ?TEMP3. + +heap_pointer() -> ?HEAP_POINTER. + +stack_pointer() -> ?STACK_POINTER. + +proc_pointer() -> ?PROC_POINTER. + +return_address() -> ?O7. + +g0() -> ?G0. + +allocatable_gpr() -> + %% %g0 is not writable + %% %g6, %g7, %o6, and %i6 are reserved for C + %% %i0, %i1, and %i2 are fixed global registers + %% %i4 may be used by the frame module for large load/store offsets + [ ?G1, ?G2, ?G3, ?G4, ?G5, + ?O0, ?O1, ?O2, ?O3, ?O4, ?O5, ?O7, + ?L0, ?L1, ?L2, ?L3, ?L4, ?L5, ?L6, ?L7, + ?I3, ?I5, ?I7]. + +allocatable_fpr() -> + %% We expose 16 virtual fp regs, 0-15, corresponding to the + %% f0/f2/f4/.../f28/f30 double-precision hardware fp regs. + %% The mapping is done by reg_name_fpr/1 and the assembler. + %% We ignore f32/.../f60 since they cannot be used in loads + %% or stores for non 8-byte aligned addresses. + [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]. + +%% Needed for hipe_graph_coloring_regalloc. +%% Presumably true for Reg in AllPrecoloured \ Allocatable. +is_fixed(Reg) -> + case Reg of + ?HEAP_POINTER -> true; + ?STACK_POINTER -> true; + ?PROC_POINTER -> true; + %% The following cases are required for linear scan: + %% it gets confused if it sees a register which is + %% neither allocatable nor global (fixed or one of + %% the scratch registers set aside for linear scan). + ?G0 -> true; + ?G6 -> true; + ?G7 -> true; + ?O6 -> true; + ?I6 -> true; + _ -> false + end. + +nr_args() -> ?SPARC_NR_ARG_REGS. + +args(Arity) when is_integer(Arity) -> + N = erlang:min(Arity, ?SPARC_NR_ARG_REGS), + args(N-1, []). + +args(I, Rest) when is_integer(I), I < 0 -> Rest; +args(I, Rest) -> args(I-1, [arg(I) | Rest]). + +arg(N) -> + if N < ?SPARC_NR_ARG_REGS -> + case N of + 0 -> ?ARG0; + 1 -> ?ARG1; + 2 -> ?ARG2; + 3 -> ?ARG3; + 4 -> ?ARG4; + 5 -> ?ARG5 + end + end. + +is_arg(R) -> + case R of + ?ARG0 -> ?SPARC_NR_ARG_REGS > 0; + ?ARG1 -> ?SPARC_NR_ARG_REGS > 1; + ?ARG2 -> ?SPARC_NR_ARG_REGS > 2; + ?ARG3 -> ?SPARC_NR_ARG_REGS > 3; + ?ARG4 -> ?SPARC_NR_ARG_REGS > 4; + ?ARG5 -> ?SPARC_NR_ARG_REGS > 5; + _ -> false + end. + +call_clobbered() -> % does the RA strip the type or not? + [%% ?G0 is the non-allocatable constant zero + {?G1,tagged},{?G1,untagged}, + {?G2,tagged},{?G2,untagged}, + {?G3,tagged},{?G3,untagged}, + {?G4,tagged},{?G4,untagged}, + {?G5,tagged},{?G5,untagged}, + %% ?G6 is reserved for C + %% ?G7 is reserved for C + {?O0,tagged},{?O0,untagged}, + {?O1,tagged},{?O1,untagged}, + {?O2,tagged},{?O2,untagged}, + {?O3,tagged},{?O3,untagged}, + {?O4,tagged},{?O4,untagged}, + {?O5,tagged},{?O5,untagged}, + %% ?O6 is reserved for C + {?O7,tagged},{?O7,untagged}, + {?L0,tagged},{?L0,untagged}, + {?L1,tagged},{?L1,untagged}, + {?L2,tagged},{?L2,untagged}, + {?L3,tagged},{?L3,untagged}, + {?L4,tagged},{?L4,untagged}, + {?L5,tagged},{?L5,untagged}, + {?L6,tagged},{?L6,untagged}, + {?L7,tagged},{?L7,untagged}, + %% ?I0 is fixed (P) + %% ?I1 is fixed (NSP) + %% ?I2 is fixed (HP) + {?I3,tagged},{?I3,untagged}, + {?I4,tagged},{?I4,untagged}, + {?I5,tagged},{?I5,untagged}, + %% ?I6 is reserved for C + {?I7,tagged},{?I7,untagged} + ]. + +tailcall_clobbered() -> % tailcall crapola needs one temp + [{?TEMP1,tagged},{?TEMP1,untagged}]. + +live_at_return() -> + [{?HEAP_POINTER,untagged}, + {?STACK_POINTER,untagged}, + {?PROC_POINTER,untagged} + ]. diff --git a/lib/hipe/ssa/hipe_ssa.inc b/lib/hipe/ssa/hipe_ssa.inc new file mode 100644 index 0000000000..d15b5ddd56 --- /dev/null +++ b/lib/hipe/ssa/hipe_ssa.inc @@ -0,0 +1,978 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2002-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%%---------------------------------------------------------------------- +%% File : hipe_ssa.inc +%% Authors : Christoffer Vikström, Daniel Deogun, and Jesper Bengtsson +%% Created : March 2002 +%% Purpose : Provides code which converts the code of a CFG into SSA +%% (Static Single Assignment) form and back. +%% A routine to check for SSA-ness is also provided. +%% +%% Major Modifications: +%% * Feb 2003: Per Gustafsson - added SSA checker. +%% * Aug 2003: Per Gustafsson - added removal of dead code. +%% * Feb 2004: Kostis Sagonas - made it work on RTL level too. +%% * Feb 2004: Tobias Lindahl - re-wrote the unconvert/1 function. +%%---------------------------------------------------------------------- + +-export([convert/1, check/1, unconvert/1, remove_dead_code/1]). + +-include("../main/hipe.hrl"). +-include("../flow/cfg.hrl"). %% needed for the specs +-include("../ssa/hipe_ssa_liveness.inc"). %% needed for dead code removal + +%%---------------------------------------------------------------------- +%% +%% NOTE! When the phi-instructions are placed, it is important that +%% the internal order is preserved. Otherwise the (correct) order: +%% +%% v1 := phi({1, v2}, {2, v11}) +%% v2 := phi({1, v11}, {2, v12}) +%% +%% can become (the incorrect) +%% +%% v2 := phi({1, v11}, {2, v12}) +%% v1 := phi({1, v2}, {2, v11}) +%% +%% that will set v1 to the _new_ value of v2 instead of the old value. +%% +%%---------------------------------------------------------------------- + +-spec convert(#cfg{}) -> #cfg{}. + +convert(CFG) -> + CFG1 = insertNewStartNode(CFG), + + ?opt_start_timer("Dominator Tree construction"), + DomTree = hipe_dominators:domTree_create(CFG1), + ?opt_stop_timer("Dominator Tree construction done"), + + ?opt_start_timer("Dominance Frontier"), + DomFrontier = hipe_dominators:domFrontier_create(CFG1, DomTree), + ?opt_stop_timer("Dominance Frontier done"), + + ?opt_start_timer("placement of Phi-nodes"), + CFG2 = place_phi(CFG1, DomFrontier), + ?opt_stop_timer("placement of Phi-nodes done"), + + ?opt_start_timer("Rename"), + CFG3 = rename(CFG2, DomTree), + ?opt_stop_timer("Rename done"), + + CFG3. + +%%---------------------------------------------------------------------- + +insertNewStartNode(CFG) -> + StartLabel = ?CFG:start_label(CFG), + NewStartLabel = ?CODE:label_name(?CODE:mk_new_label()), + BB = hipe_bb:mk_bb([?CODE:mk_goto(StartLabel)]), + CFG2 = ?CFG:bb_add(CFG, NewStartLabel, BB), + ?CFG:start_label_update(CFG2, NewStartLabel). + + +%%====================================================================== +%% PlacePhi Algorithm +%%====================================================================== + +%%---------------------------------------------------------------------- +%% Procedure : place_phi/2 +%% Purpose : Places phi nodes at appropriate places in the CFG. +%% Arguments : CFG - Control Flow Graph. +%% DF - Dominance Frontier. +%% Returns : CFG with phi functions. +%%---------------------------------------------------------------------- + +place_phi(CFG, DF) -> + AssMap = insertParams(CFG), + AssMap2 = preProcess(CFG, AssMap), + VarList = gb_trees:to_list(AssMap2), + Liveness = ?LIVENESS:analyze(CFG), + variableTraverse(CFG, DF, gb_trees:empty(), gb_trees:empty(), + 0, AssMap2, Liveness, VarList). + +%%---------------------------------------------------------------------- +%% Procedure : insertParams/1 +%% Purpose : Inserts the parameters of the CFG into the AssMap. +%% Arguments : CFG - Control Flow Graph +%% Returns : AssMap - Assignment map. +%%---------------------------------------------------------------------- + +insertParams(CFG) -> + StartLabel = ?CFG:start_label(CFG), + Params = ?CFG:params(CFG), + insertParams(Params, StartLabel, gb_trees:empty()). + +insertParams([Param|T], StartLabel, AssMap) -> + insertParams(T, StartLabel, gb_trees:insert(Param, [StartLabel], AssMap)); +insertParams([], _, AssMap) -> AssMap. + +%%---------------------------------------------------------------------- +%% Procedure : preProcessg/2 +%% Purpose : Creates the assignment map. +%% Arguments : CFG - Control Flow Graph +%% AssMap - Assignment map +%% Returns : AssMap. +%%---------------------------------------------------------------------- + +preProcess(CFG, AssMap) -> + traverseLabels(CFG, ?CFG:labels(CFG), AssMap). + +%%---------------------------------------------------------------------- +%% Procedure : traverseLabels/3 +%% Purpose : Traverses all labels and adds all assignments in the basic +%% block to the assignment map. +%% Arguments : CFG - Control Flow Graph +%% AssMap - Assignment Map +%% Label - A label for a node +%% Returns : AssMap. +%%---------------------------------------------------------------------- + +traverseLabels(CFG, [Label|T], AssMap) -> + Code = get_code_from_label(CFG, Label), + NewVarList = getAssignments(Code), + traverseLabels(CFG, T, updateAssMap(NewVarList, Label, AssMap)); +traverseLabels(_, [], AssMap) -> AssMap. + +%%---------------------------------------------------------------------- +%% Procedure : getAssignments/1 +%% Purpose : Retrieves all assigned variables in a basic block. +%% Arguments : InstrLst - A list of instructions from a basic block. +%% VarList - A list of variables. +%% Returns : VarList. +%% Notes : This function may return a list containing duplicates. +%%---------------------------------------------------------------------- + +getAssignments(InstrList) -> getAssignments(InstrList, []). + +getAssignments([Instr|T], VarList) -> + getAssignments(T, defs_to_rename(Instr) ++ VarList); +getAssignments([], VarList) -> VarList. + +%%---------------------------------------------------------------------- +%% Procedure : updateAssMap/3 +%% Purpose : Updates the assignment map with. Each variable in the AssVar +%% list is inserted with the value Label. +%% Arguments : Label - a label of a node +%% AssVar - a variable that is assigned at Label +%% AssMap - Assignment map. +%% Returns : AssMap. +%%---------------------------------------------------------------------- + +updateAssMap([AssVar|T], Label, AssMap) -> + Lst = getAssMap(AssVar, AssMap), + updateAssMap(T, Label, gb_trees:enter(AssVar, [Label|Lst], AssMap)); +updateAssMap([], _, AssMap) -> AssMap. + +getAssMap(AssVar, AssMap) -> + case gb_trees:lookup(AssVar, AssMap) of + {value, L} -> L; + none -> [] + end. + +%%---------------------------------------------------------------------- +%% Procedure : variableTraverse/7 +%% Purpose : This function traverses all variables and adds phi functions +%% at appropriate nodes. +%% Arguments : CFG - Control Flow Graph +%% DFMap - Dominance Frontier Map +%% HasAlready - A map of nodes which already have phi functions +%% Work - +%% IterCount - Counter of how many iterations have been done +%% AssMap - Assignment map +%% VarLst - Variable list that is traversed +%% Returns : CFG. +%%---------------------------------------------------------------------- + +variableTraverse(CFG, DFMap, HasAlready, Work, + IterCount, AssMap, Liveness, [{Var,_}|VarLst]) -> + IterCount2 = IterCount + 1, + DefLst = getAssMap(Var, AssMap), + {Work2, WorkLst2} = workListBuilder(DefLst, Work, [], IterCount2), + {CFG2, HasAlready2, Work3} = doWork(CFG, DFMap, HasAlready, + Work2, IterCount2, WorkLst2, + Var, Liveness), + variableTraverse(CFG2, DFMap, HasAlready2, Work3, + IterCount2, AssMap, Liveness, VarLst); +variableTraverse(CFG, _, _, _, _, _, _, []) -> CFG. + +%%---------------------------------------------------------------------- +%% Procedure : workListBuilder/4 +%% Purpose : Builds the worklist that the algorithm is working on. +%% Arguments : Work - +%% WorkLst - The worklist that is worked through +%% IterCount - Counter of how many itterations that has been done +%% Node - A node in the CFG +%% Returns : +%%---------------------------------------------------------------------- + +workListBuilder([Node|T], Work, WorkLst, IterCount) -> + case getCount(Node, Work) of + 0 -> + Work2 = gb_trees:enter(Node, IterCount, Work), + workListBuilder(T, Work2, [Node|WorkLst], IterCount); + _ -> + Work2 = gb_trees:enter(Node, IterCount, Work), + workListBuilder(T, Work2, [Node|WorkLst], IterCount) + end; +workListBuilder([], Work, WorkLst, _IterCount) -> + {Work, WorkLst}. + +getCount(Key, Dict) -> + case gb_trees:lookup(Key, Dict) of + {value, V} -> V; + none -> 0 + end. + +%%---------------------------------------------------------------------- +%% Procedure : doWork/7 +%% Purpose : This procedure works itself through the worklist and checks +%% if a node needs any phi functions. +%% Arguments : CFG - Control Flow Graph +%% DFMap - Dominance Frontier Map +%% HasAlready - A map of nodes that already have phi functions +%% Work - +%% IterCount - Counter of how many iterations have taken place +%% WorkLst - The worklist that is worked through +%% Var - Variable +%% Returns : {CFG, HasAlready, Work} +%%---------------------------------------------------------------------- + +doWork(CFG, DFMap, HasAlready, Work, IterCount, + [Node|WorkLst], Var, Liveness) -> + DFofX = hipe_dominators:domFrontier_get(Node, DFMap), + {CFG2, HasAlready2, Work2, WorkLst2} = + checkPhiNeeds(CFG, DFofX, HasAlready, Work, + IterCount, WorkLst, Var, Liveness), + doWork(CFG2, DFMap, HasAlready2, Work2, + IterCount, WorkLst2, Var, Liveness); +doWork(CFG, _, HasAlready, Work, _, [], _, _) -> + {CFG, HasAlready, Work}. + +%%---------------------------------------------------------------------- +%% Procedure : checkPhiNeeds/7 +%% Purpose : This function checks if a node needs a phi function and adds +%% one if its needed. +%% Arguments : CFG - Control Flow Graph +%% DFofX - Dominance Frontier of a node +%% HasAlready - A map of nodes that already have phi functions +%% Work - +%% IterCount - Counter of how many iterations have taken place +%% WorkLst - The worklist that is worked through +%% Var - Variable +%% Returns : {CFG, HasAlready, Work, WorkLst} +%%---------------------------------------------------------------------- + +checkPhiNeeds(CFG, [Node|DFofX], HasAlready, Work, + IterCount, WorkLst, Var, Liveness) -> + case getCount(Node, HasAlready) < IterCount of + true -> + LiveIn = ?LIVENESS:livein(Liveness, Node), + case lists:member(Var, LiveIn) of + true -> + CFG2 = insertPhiCode(CFG, Node, Var), + HasAlready2 = gb_trees:enter(Node, IterCount, HasAlready), + case getCount(Node, Work) < IterCount of + true -> + Work2 = gb_trees:enter(Node, IterCount, Work), + WorkLst2 = [Node|WorkLst], + checkPhiNeeds(CFG2, DFofX, HasAlready2, Work2, + IterCount, WorkLst2, Var, Liveness); + false -> + checkPhiNeeds(CFG2, DFofX, HasAlready2, Work, + IterCount, WorkLst, Var, Liveness) + end; + false -> + checkPhiNeeds(CFG, DFofX, HasAlready, Work, IterCount, + WorkLst, Var, Liveness) + end; + false -> + checkPhiNeeds(CFG, DFofX, HasAlready, Work, IterCount, + WorkLst, Var, Liveness) + end; +checkPhiNeeds(CFG, [], HasAlready, Work, _, WorkLst, _, _) -> + {CFG, HasAlready, Work, WorkLst}. + +%%---------------------------------------------------------------------- +%% Procedure : insertPhiCode/3 +%% Purpose : +%% Arguments : CFG - Control Flow Graph +%% Node - A node +%% Var - A variable +%% Returns : CFG +%%---------------------------------------------------------------------- + +insertPhiCode(CFG, Node, Var) -> + BB = ?CFG:bb(CFG, Node), + Phi = ?CODE:mk_phi(Var), + Code = [Phi | hipe_bb:code(BB)], + ?CFG:bb_add(CFG, Node, hipe_bb:code_update(BB, Code)). + + +%%====================================================================== +%% SSA Renaming pass +%%====================================================================== + +%%---------------------------------------------------------------------- +%% Procedure : rename/2 +%% Purpose : Renames all the variables in the CFG according to the SSA +%% conversion algorithm. +%% Arguments : CFG - The CFG being translated. +%% DomTree - The dominator tree of the CFG. +%% Returns : A CFG where all variables are renamed. +%%---------------------------------------------------------------------- + +rename(CFG, DomTree) -> + %% Reset the appropriate variable index so that we start from low + %% variable numbers again + reset_var_indx(), + {CFG2,Current} = insertRenamedParams(CFG), + rename(CFG2, ?CFG:start_label(CFG2), DomTree, Current). + +rename(CFG, Node, DomTree, Current) -> + BB = ?CFG:bb(CFG, Node), + Statements = hipe_bb:code(BB), + {Statements2,Current2} = renameVars(Statements, Current), + CFG1 = ?CFG:bb_add(CFG, Node, hipe_bb:code_update(BB, Statements2)), + Succ = ?CFG:succ(CFG1, Node), + CFG2 = updateSuccPhi(Succ, Node, CFG1, Current2), + Children = hipe_dominators:domTree_getChildren(Node, DomTree), + childrenRename(Children, CFG2, DomTree, Current2). + +%%---------------------------------------------------------------------- +%% Procedure : childrenRename/5 +%% Purpose : Renames all the nodes in a list according to the SSA +%% conversion algorithm. +%% Arguments : ChildList - the list of nodes being renamed +%% CFG - the CFG that the children are a part of +%% DomTree - The dominator tree for the CFG +%% Current - the current index of all variables encountered +%% Returns : CFG +%%---------------------------------------------------------------------- + +childrenRename([Child|Children], CFG, DomTree, Current) -> + CFG2 = rename(CFG, Child, DomTree, Current), + childrenRename(Children, CFG2, DomTree, Current); +childrenRename([], CFG, _, _) -> + CFG. + +%%---------------------------------------------------------------------- +%% Procedure : renameVars/3 +%% Purpose : Renames the variables in basic block +%% Arguments : Statements - the basic block +%% Current - the current index of all variables encountered +%% Returns : {Statements,Current} +%%---------------------------------------------------------------------- + +renameVars(Statements, Current) -> + renameVars(Statements, Current, []). + +renameVars([Statement|Statements], Current, Result) -> + Statement2 = renameUses(Statement, Current), + {Statement3,Current2} = renameDefs(Statement2, Current), + renameVars(Statements, Current2, [Statement3|Result]); +renameVars([], Current, Result) -> + {lists:reverse(Result),Current}. + +%%---------------------------------------------------------------------- +%% Procedure : renameUses/2 +%% Purpose : Renames all the uses of a variable in a statement. +%% Arguments : Statement - the statement being renamed. +%% Current - the current index of all variables encountered. +%% Returns : Statement +%%---------------------------------------------------------------------- + +renameUses(Statement, Current) -> + case ?CODE:is_phi(Statement) of + true -> Statement; + false -> VarList = uses_to_rename(Statement), + updateStatementUses(VarList, Statement, Current) + end. + +%%---------------------------------------------------------------------- +%% Procedure : updateStatementUses/3 +%% Purpose : Traverses the variable list and renames all the instances +%% of a variable in the Statement uses to its current value. +%% Arguments : VarList - the list of variables being updated. +%% Statement - the statement being updated. +%% Current - the current index of all variables encountered. +%% Returns : An updated statement. +%%---------------------------------------------------------------------- + +updateStatementUses(Vars, Statement, Current) -> + Substs = [{Var,gb_trees:get(Var, Current)} || Var <- Vars], + ?CODE:subst_uses(Substs, Statement). + +%%---------------------------------------------------------------------- +%% Procedure : renameDefs/3 +%% Purpose : Renames all the definitons in Statement. +%% Arguments : Statement - the statement where the definitions are being +%% renamed. +%% Current - the current index of all variables encountered. +%% Returns : Statement +%%---------------------------------------------------------------------- + +renameDefs(Statement, Current) -> + VarList = defs_to_rename(Statement), + updateStatementDefs(VarList, Statement, Current). + +%%---------------------------------------------------------------------- +%% Procedure : updateStatementDefs/4 +%% Purpose : traverses a variable list and exchanges all instances of +%% the variable in the statements definitions by its current +%% value. +%% Arguments : VariableList - the list of varibles being renamed +%% Statement - the statement whos definitions are being changed +%% Current - the current index of all variables encountered +%% Returns : {Statement, Current} +%% Notes : Per Gustafsson: +%% I changed this function to update the statement only when +%% all substitutions are found. +%%---------------------------------------------------------------------- + +updateStatementDefs(Vars, Statement, Current) -> + updateStatementDefs(Vars, Statement, Current, []). + +updateStatementDefs([Var|Vars], Statement, Current, Acc) -> + {NewVar,Current2} = updateIndices(Current, Var), + updateStatementDefs(Vars, Statement, Current2, [{Var,NewVar}|Acc]); +updateStatementDefs([], Statement, Current, Acc) -> + Statement2 = ?CODE:subst_defines(Acc, Statement), + {Statement2,Current}. + +%%---------------------------------------------------------------------- +%% Procedure : updateIndices/3 +%% Purpose : This function is used for updating the Current hash table +%% and for getting a new variable/fp variable/register. +%% Arguments : Current - Hash table containg the current index for a +%% particular variable. +%% Variable - The variable that is used as key in the hash table. +%% Returns : A two-tuple containing the new variable and Current. +%%---------------------------------------------------------------------- + +updateIndices(Current, Variable) -> + case ?CODE:is_var(Variable) of + true -> + NewVar = ?CODE:mk_new_var(), + {NewVar,gb_trees:enter(Variable, NewVar, Current)}; + false -> + case is_fp_temp(Variable) of + true -> + NewFVar = mk_new_fp_temp(), + {NewFVar,gb_trees:enter(Variable, NewFVar, Current)}; + false -> + NewReg = ?CODE:mk_new_reg(), + {NewReg,gb_trees:enter(Variable, NewReg, Current)} + end + end. + +%%---------------------------------------------------------------------- +%% Procedure : updateSuccPhi/4 +%% Purpose : This function is used for updating phi functions in a +%% particular node's successors. That is, the function +%% traverses the successor list of a node and updates the +%% arguments in the phi function calls. +%% Arguments : Succ - A successor to the node Parent. +%% T - The remainder of the successor list +%% Parent - The parent of the node Succ +%% CFG - Control Flow Graph +%% Current - Hash table containg the current index for a +%% particular variable +%% Returns : An updated version of the CFG +%%---------------------------------------------------------------------- + +updateSuccPhi([Succ|T], Parent, CFG, Current) -> + CFG2 = updatePhi(Succ, Parent, CFG, Current), + updateSuccPhi(T, Parent, CFG2, Current); +updateSuccPhi([], _, CFG, _) -> + CFG. + +%%---------------------------------------------------------------------- +%% Procedure : updatePhi/4 +%% Purpose : This function prepares for an update of a phi function call. +%% That is, if a statement contains a phi function call +%% then the number of predecessors are computed and the index +%% of the parent in the predecessor list is used for computing +%% which variable in the argument list of the phi function call +%% that need to be updated. +%% Arguments : Node - A node in the CFG +%% Parent - The parent of the node Node in the dominator tree +%% CFG - Control Flow Graph +%% Current - Hash table containg the current index for a +%% particular variable +%% Returns : An updated version of the CFG +%%---------------------------------------------------------------------- + +updatePhi(Node, Parent, CFG, Current) -> + BB = ?CFG:bb(CFG, Node), + case hipe_bb:code(BB) of + [Code|_] = Statements -> + case ?CODE:is_phi(Code) of + true -> + Code2 = updateCode(Statements, Parent, Current), + ?CFG:bb_add(CFG, Node, hipe_bb:code_update(BB, Code2)); + _ -> + CFG + end; + _ -> + CFG + end. + +%%---------------------------------------------------------------------- +%% Procedure : updateCode/3 +%% Purpose : This function updates a statement that contains a phi +%% function, i.e. it changes the arguments in the phi +%% function to their correct names. +%% Arguments : Code - A list of code +%% Pred - A predecessor of the node containing the +%% phi-function +%% Current - Hash table containing the current index for a +%% particular variable +%% Returns : A list of Code +%%---------------------------------------------------------------------- + +updateCode(Code, Pred, Current) -> + updateCode(Code, Pred, Current, []). + +updateCode([Stat|Stats] = Statements, Pred, Current, Result) -> + case ?CODE:is_phi(Stat) of + true -> + Var = ?CODE:phi_id(Stat), + Result2 = case gb_trees:lookup(Var, Current) of + none -> + [Stat|Result]; + {value,Var2} -> + Stat2 = ?CODE:phi_enter_pred(Stat, Pred, Var2), + [Stat2|Result] + end, + updateCode(Stats, Pred, Current, Result2); + _ -> + Result ++ Statements + end. + +%%---------------------------------------------------------------------- +%% Procedure : insertRenamedParams/1 +%% Purpose : Inserts the parameters of the CFG into the working hashmaps. +%% Arguments : CFG - the target control flow graph. +%% Returns : {CFG,Current} +%%---------------------------------------------------------------------- + +insertRenamedParams(CFG) -> + Params = ?CFG:params(CFG), + %% Current - the current variable we are working on. + {Current,Params2} = insertRenamedParams(Params, gb_trees:empty(), []), + CFG2 = ?CFG:params_update(CFG, Params2), + {CFG2,Current}. + +insertRenamedParams([Param|Params], Current, Result) -> + {Var,Current2} = updateIndices(Current, Param), + insertRenamedParams(Params, Current2, [Var|Result]); +insertRenamedParams([], Current, Result) -> + {Current,lists:reverse(Result)}. + + +%%====================================================================== +%% SSA Checker +%%====================================================================== + +%% +%% @doc Checks the control flow graph CFG of a function for SSA-ness. +%% More specifically, it checks that all variables in the CFG are only +%% defined once and that all uses of each variable in the function are +%% dominated by a define. If a variable does not abide by these rules, +%% a warning message will be printed on stdout. +%% +-spec check(#cfg{}) -> 'ok'. + +check(CFG) -> + Labels = ?CFG:labels(CFG), + VarTree = traverse_labels(Labels, CFG), + DomTree = hipe_dominators:domTree_create(CFG), + test_uses(Labels, VarTree, DomTree, CFG). + +%% +%% @doc Traverses all the labels in a CFG. +%% +traverse_labels(Labels, CFG) -> + VarTree = add_args(?CFG:params(CFG)), + traverse_labels(Labels, VarTree, CFG). + +traverse_labels([Label|Rest], VarTree, CFG) -> + Code = get_code_from_label(CFG, Label), + NewVarTree = traverse_code(Code, VarTree, Label), + traverse_labels(Rest, NewVarTree, CFG); +traverse_labels([], VarTree, _CFG) -> + VarTree. + +%% +%% @doc Traverses the code in a basic block. +%% +traverse_code([Instr|Rest], VarTree, Label) -> + Defined = defs_to_rename(Instr), + NewVarTree = add_to_var_tree(Defined, VarTree, Instr, Label), + traverse_code(Rest, NewVarTree, Label); +traverse_code([], VarTree, _) -> + VarTree. + +%% +%% @doc +%% Adds a variable to the variable tree if the variable is defined. +%% The entry in the variable tree will have the variable as key and a +%% two tuple consisting of a list of Instructions and a list of labels +%% where the variable is defined. If a variable is defined a second +%% time a warning message to this effect is printed on stdout. +%% +add_to_var_tree([Var|Rest], VarTree, Instr, Label) -> + NewVarTree = + case gb_trees:lookup(Var, VarTree) of + {value,{OldInstr,OldLabel}} -> + ?WARNING_MSG("Variable: ~w defined a second time\n"++ + "in Instr: ~w\n"++ + "at Label: ~w\n"++ + "variable was first defined at Label(s) ~w\n"++ + "in Instr(s): ~w\n -> non SSA form\n", + [Var,Instr,Label,OldLabel,OldInstr]), + gb_trees:update(Var, {[Instr|OldInstr],[Label|OldLabel]}, VarTree); + none -> + gb_trees:insert(Var, {[Instr],[Label]}, VarTree) + end, + add_to_var_tree(Rest, NewVarTree, Instr, Label); +add_to_var_tree([], VarTree, _, _) -> + VarTree. + +%% +%% @doc Adds the argument of a function to the VarTree. +%% They are defined at Label 0. +%% +add_args(Args) -> + add_args(Args, gb_trees:empty()). + +add_args([Arg|Rest], VarTree) -> + add_args(Rest, gb_trees:insert(Arg, {[argument_variable],[0]}, VarTree)); +add_args([], VarTree) -> + VarTree. + +%% +%% The functions below test that a use is dominated by a corresponding def. +%% + +%% +%% This function is analogous to traverse_labels. +%% +test_uses([Label|Rest], VarTree, DomTree,CFG) -> + Code = get_code_from_label(CFG, Label), + test_code(Code, VarTree, Label, DomTree, CFG, []), + test_uses(Rest, VarTree, DomTree, CFG); +test_uses([], _VarTree, _DomTree, _CFG) -> + ok. + +%% +%% This function is analogous to traverse_code. +%% +test_code([Instr|Instrs], VarTree, Label, DomTree, CFG, Old) -> + case ?CODE:is_phi(Instr) of + true -> + ArgList = ?CODE:phi_arglist(Instr), + case ArgList of + [_Arg] -> + ?WARNING_MSG("Phi with only one source at BB with label ~w:\n", + [Label]), + %% case ?CODE of + %% hipe_rtl -> ?CODE:pp_block(get_code_from_label(CFG, Label)); + %% _ -> ok + %% end, + ok; + [_|_] -> ok + end, + lists:foreach(fun ({Pred,Var}) -> + def_doms_use([Var], VarTree, Pred, DomTree, + get_code_from_label(CFG,Pred)) + end, ArgList); + false -> + Uses = uses_to_rename(Instr), + def_doms_use(Uses, VarTree, Label, DomTree, Old) + end, + test_code(Instrs, VarTree, Label, DomTree, CFG, [Instr|Old]); +test_code([], _VarTree, _Label, _DomTree, _CFG, _Old) -> + ok. + +get_code_from_label(CFG, Label) -> + case ?CFG:bb(CFG,Label) of + not_found -> + ?error_msg("Basic block with label ~w was not found\n", [Label]); + %% ?EXIT('Detected serious problem in SSA form'); + BB -> + hipe_bb:code(BB) + end. + +%% +%% This function checks whether a use is dominated by a def. +%% There are five different cases: +%% 1. A use of an argument register. This use is dominated by the def. +%% 2. Use and Def in same basic block if Use comes first this will +%% lead to a warning message, otherwise it is ok. +%% 3. The deinition is in a basic block that dominates the basic block +%% of the use. This is ok. +%% 4. The definition is in a basic block that does not dominate the use. +%% This will result in a warning message being printed. +%% 5. A use without any definition. This will result in a warning message +%% being printed. +%% +def_doms_use([Var|Vars], VarTree, Label, DomTree, Old) -> + case gb_trees:lookup(Var, VarTree) of + {value,{_,[DefLabel|_]}} -> + case DefLabel of + 0 -> + ok; + Label -> + Fun = fun(X) -> Defs = defs_to_rename(X), + lists:any(fun(Y) -> Var == Y end, Defs) + end, + case lists:any(Fun, Old) of + true -> + ok; + false -> + ?WARNING_MSG("Variable : ~w used before definition in bb: ~w\n", + [Var,Label]) + end; + _ -> + case hipe_dominators:domTree_dominates(DefLabel, Label, DomTree) of + true -> + ok; + false -> + ?WARNING_MSG("Definition does not dominate use for variable: ~w "++ + "at label: ~w (definition label: ~w)\n", + [Var, Label, DefLabel]) + end + end; + none -> + ?WARNING_MSG("Use with no definition of variable: ~w at label: ~w\n", + [Var, Label]) + end, + def_doms_use(Vars, VarTree, Label, DomTree, Old); +def_doms_use([], _VarTree, _Label, _DomTree, _Old) -> + ok. + + +%%====================================================================== +%% SSA Un-Converter +%%====================================================================== + +%%---------------------------------------------------------------------- +%% Procedure : unconvert/2 +%% Purpose : Removes all phi functions and propagates all +%% assignments up to the appropriate predecessors. +%% Arguments : CFG - Control Flow Graph +%% Node - A node in the CFG +%% Returns : CFG +%% Note : The call to remove_trivial_bbs is needed so that moves, +%% which are introduced in new basic blocks as part of the +%% un-conversion, are merged with the basic blocks of their +%% predecessors, if possible. +%%---------------------------------------------------------------------- + +-spec unconvert(#cfg{}) -> #cfg{}. + +unconvert(CFG) -> + ?CFG:remove_trivial_bbs(unconvert(?CFG:reverse_postorder(CFG), CFG)). + +unconvert([Node|Nodes], CFG) -> + BB = ?CFG:bb(CFG, Node), + Code = hipe_bb:code(BB), + {Phis,Code2} = getPhiFuncts(Code, []), + case Phis of + [] -> + unconvert(Nodes, CFG); + _ -> + BB2 = hipe_bb:code_update(BB, Code2), + CFG2 = ?CFG:bb_add(CFG, Node, BB2), + Pred = ?CFG:pred(CFG2, Node), + PredMoveMap = get_moves(Pred, Phis), + CFG3 = insert_move_bbs(PredMoveMap, Node, CFG2), + unconvert(Nodes, CFG3) + end; +unconvert([], CFG) -> + CFG. + +%%---------------------------------------------------------------------- +%% Procedure : get_moves/2 and /3 +%% Purpose : Find the moves that corresponds to phi-instructions of +%% a block. Try to merge incoming edges to avoid duplicate +%% blocks. +%% Arguments : Preds - The predecessors to this block. +%% Phis - The phi instructions that used to start this block. +%% Returns : [{ListOfMoves, [Preds]}] +%%---------------------------------------------------------------------- + +get_moves(Preds, Phis) -> + get_moves(Preds, Phis, gb_trees:empty()). + +get_moves([Pred|Left], Phis, Map)-> + Moves = get_moves_from_phis(Pred, Phis, []), + NewMap = + case gb_trees:lookup(Moves, Map) of + none -> gb_trees:insert(Moves, [Pred], Map); + {value,List} -> gb_trees:update(Moves, [Pred|List], Map) + end, + get_moves(Left, Phis, NewMap); +get_moves([], _Phis, Map) -> + gb_trees:to_list(Map). + +%%---------------------------------------------------------------------- +%% Procedure : get_moves_from_phis/3 +%% Purpose : Find all the moves that should be done in the edge +%% coming in from Pred. +%% Arguments : Pred - The predecessor +%% Phis - Reverse list of phi instructions. +%% Returns : [{Dst,Src}] representing the move instructions; +%% ORDERING IS SIGNIFICANT! +%%---------------------------------------------------------------------- + +get_moves_from_phis(Pred, [Phi|Left], Acc) -> + Dst = ?CODE:phi_dst(Phi), + Src = ?CODE:phi_arg(Phi, Pred), + NewAcc = [{Dst, Src}|Acc], + get_moves_from_phis(Pred, Left, NewAcc); +get_moves_from_phis(_Pred, [], Acc) -> + Acc. + +%%---------------------------------------------------------------------- +%% Procedure : insert_move_bbs/3 +%% Purpose : Create the bbs that contains the moves. +%% Arguments : Ordset - The move instruction tuples {Dst, Src} +%% Preds - The predecessors that needs the moves in Ordset +%% Label - The original label that contained the phis. +%% Cfg - The current cfg +%% Returns : The new Cfg. +%%---------------------------------------------------------------------- + +insert_move_bbs([{Ordset,Preds}|Left], Label, Cfg) -> + Code = create_moves(Ordset, []) ++ [?CODE:mk_goto(Label)], + BB = hipe_bb:mk_bb(Code), + NewLabel = ?CODE:label_name(?CODE:mk_new_label()), + NewCfg1 = ?CFG:bb_add(Cfg, NewLabel, BB), + NewCfg2 = lists:foldl(fun(X, Acc) -> + ?CFG:redirect(Acc, X, Label, NewLabel) + end, + NewCfg1, Preds), + insert_move_bbs(Left, Label, NewCfg2); +insert_move_bbs([], _Label, Cfg) -> + Cfg. + +create_moves([{X,X}|Left], Acc) -> + create_moves(Left, Acc); +create_moves([{Dst,Src}|Left], Acc) -> + create_moves(Left, [makePhiMove(Dst, Src)|Acc]); +create_moves([], Acc) -> + %% NOTE: ORDERING IS SIGNIFICANT! + lists:reverse(Acc). + +%%---------------------------------------------------------------------- +%% Procedure : getPhiFuncts/2 +%% Purpose : This function returns the list of phi-functions from a +%% list of intermediate code instructions. +%% Arguments : +%% List - A list of Code +%% Result - Accumulative parameter to store the result +%% Returns : Reverse list of the phi instructions. ORDERING IS SIGNIFICANT! +%%---------------------------------------------------------------------- + +getPhiFuncts([I|T] = List, Result) -> + case ?CODE:is_phi(I) of + true -> + getPhiFuncts(T, [I|Result]); + false -> + {Result,List} + end; +getPhiFuncts([], Result) -> + {Result,[]}. + + +%%====================================================================== +%% Dead Code Elimination on SSA form +%%====================================================================== + +-spec remove_dead_code(#cfg{}) -> #cfg{}. + +remove_dead_code(CFG) -> + Lbls = ?CFG:reverse_postorder(CFG), + Liveness = ssa_liveness__analyze(CFG), + case do_lbls(Lbls, CFG, Liveness, false) of + {CFG1,true} -> + remove_dead_code(CFG1); + {CFG1,false} -> + CFG1 + end. + +do_lbls([Lbl|Rest], CFG, Liveness, Changed) -> + LiveOut = gb_sets:from_list(ssa_liveness__liveout(Liveness, Lbl)), + BB = ?CFG:bb(CFG, Lbl), + Code = hipe_bb:code(BB), + {NewCode,NewChanged} = do_code(lists:reverse(Code), LiveOut, Changed, []), + NewBB = hipe_bb:code_update(BB, NewCode), + NewCFG = ?CFG:bb_add(CFG, Lbl, NewBB), + do_lbls(Rest, NewCFG, Liveness, NewChanged); +do_lbls([], CFG, _Liveness, Changed) -> + {CFG,Changed}. + +do_code([Instr|Instrs], LiveOut, Changed, Acc) -> + Def = ?CODE:defines(Instr), + Use = ?CODE:uses(Instr), + DefSet = gb_sets:from_list(Def), + UseSet = gb_sets:from_list(Use), + LiveIn = gb_sets:union(gb_sets:difference(LiveOut, DefSet), UseSet), + case gb_sets:is_empty(gb_sets:intersection(DefSet, LiveOut)) of + false -> + do_code(Instrs, LiveIn, Changed, [Instr|Acc]); + true -> + case ?CODE:is_safe(Instr) of + true -> + case ?CODE:is_call(Instr) of + true -> + case ?CODE:call_continuation(Instr) of + [] -> + do_code(Instrs, LiveOut, true, Acc); + SuccLblName -> + NewInstr = ?CODE:mk_goto(SuccLblName), + do_code(Instrs, LiveOut, true, [NewInstr|Acc]) + end; + false -> + do_code(Instrs, LiveOut, true, Acc) + end; + false -> %% not a safe instruction - cannot be removed + case ?CODE:is_call(Instr) of + true -> + case ?CODE:call_dstlist(Instr) of + [] -> %% result was not used anyway; no change + do_code(Instrs, LiveIn, Changed, [Instr|Acc]); + [_Dst] -> %% remove the unused assignment to call's destination + NewInstr = ?CODE:call_dstlist_update(Instr, []), + do_code(Instrs, LiveIn, true, [NewInstr|Acc]); + [_|_] -> %% calls with multiple dests are left untouched + do_code(Instrs, LiveIn, Changed, [Instr|Acc]) + end; + false -> + do_code(Instrs, LiveIn, Changed, [Instr|Acc]) + end + end + end; +do_code([], _LiveOut, Changed, Acc) -> + {Acc,Changed}. + diff --git a/lib/hipe/ssa/hipe_ssa_const_prop.inc b/lib/hipe/ssa/hipe_ssa_const_prop.inc new file mode 100644 index 0000000000..2fce384197 --- /dev/null +++ b/lib/hipe/ssa/hipe_ssa_const_prop.inc @@ -0,0 +1,522 @@ +%% -*- Erlang -*- +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2004-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%%----------------------------------------------------------------------------- +%% File : hipe_ssa_const_prop.inc +%% Author : Kostis Sagonas +%% Description : Supporting routines for sparse conditional constant +%% propagation on SSA form. +%% +%% Created : 21 June 2004 by Kostis Sagonas +%%----------------------------------------------------------------------------- + +%%----------------------------------------------------------------------------- +%% Procedure : propagate/1 +%% Purpose : Perform sparse conditional constant propagation on a +%% control flow graph +%% Arguments : CFG - The cfg to work on +%% Returns : A new cfg. +%%----------------------------------------------------------------------------- + +-spec propagate(#cfg{}) -> #cfg{}. + +propagate(CFG) -> + Environment = create_env(CFG), + StartEdge = {?CFG:start_label(CFG), ?CFG:start_label(CFG)}, + NewEnvironment = scc([StartEdge], [], Environment), + NewCFG = update_cfg(NewEnvironment), + NewCFG. + +%%----------------------------------------------------------------------------- +%% Procedure : visit_expressions/2 & visit_expressions/4 +%% Purpose : visit each instruction in a list of instructions. +%% Arguments : Instructions - the list of instructions to visit +%% Environment - have a guess. +%% FlowWork - list of destination part of flowgraph edges +%% from the visited instructions +%% SSAWork - resulting ssa-edges from visited instrs. +%% Returns : {FlowWorkList, SSAWorkList, Environment} +%%----------------------------------------------------------------------------- + +visit_expressions(Instructions, Environment) -> + visit_expressions(Instructions, Environment, [], []). + +visit_expressions([], Environment, FlowWork, SSAWork) -> + {FlowWork, SSAWork, Environment}; +visit_expressions([Inst | Insts], Environment, FlowWork, SSAWork) -> + {MoreFlowWork, MoreSSAWork, Environment1} + = visit_expression(Inst, Environment), + FlowWork1 = MoreFlowWork ++ FlowWork, + SSAWork1 = MoreSSAWork ++ SSAWork, + visit_expressions(Insts, Environment1, FlowWork1, SSAWork1). + +%%----------------------------------------------------------------------------- +%% The environment record: Shared between incarnations of SCCP. +%%----------------------------------------------------------------------------- + +-record(env, {cfg :: #cfg{}, + executable_flags = gb_sets:empty() :: gb_set(), + handled_blocks = gb_sets:empty() :: gb_set(), + lattice_values = gb_trees:empty() :: gb_tree(), + ssa_edges = gb_trees:empty() :: gb_tree() + }). + +create_env(CFG) -> + #env{cfg = CFG, + executable_flags = gb_sets:empty(), + handled_blocks = gb_sets:empty(), + lattice_values = initialize_lattice(CFG), + ssa_edges = initialize_ssa_edges(CFG) + }. + +env__cfg(#env{cfg=CFG}) -> CFG. +env__executable_flags(#env{executable_flags=Flags}) -> Flags. +env__lattice_values(#env{lattice_values=Values}) -> Values. +env__ssa_edges(#env{ssa_edges=Edges}) -> Edges. + +%%----------------------------------------------------------------------------- +%% Procedure : initialize_lattice/1 +%% Purpose : Compute the initial value-lattice for the CFG +%% Arguments : CFG a control flow graph +%% Returns : a value-latice (gb_tree) +%%----------------------------------------------------------------------------- + +initialize_lattice(CFG) -> + Lattice = gb_trees:empty(), + Parameters = ?CFG:params(CFG), + Inserter = fun(Parameter, Tree) -> + gb_trees:insert(Parameter, bottom, Tree) + end, + lists:foldl(Inserter, Lattice, Parameters). + +%%----------------------------------------------------------------------------- +%% Procedure : initialize_ssa_edges/1 +%% Purpose : Compute the SSA edges in the CFG. SSA edges are used to map +%% the definition of a value to its uses. +%% Arguments : CFG - the cfg +%% Returns : A gb_tree of values (variables & registers) to +%% lists of {Node, Instruction} pairs. +%%----------------------------------------------------------------------------- + +initialize_ssa_edges(CFG) -> + IterateNodes = + fun(Node, Tree1) -> + IterateInstructions = + fun(Instruction, Tree2) -> + IterateArguments = + fun(Argument, Tree3) -> + Data = gb_trees:lookup(Argument, Tree3), + NewEdge = {Node, Instruction}, + case Data of + none -> + %% insert assumes key is not present + gb_trees:insert(Argument, [NewEdge], Tree3); + {value, EdgeList} -> + %% update assumes key is present + gb_trees:update(Argument, [NewEdge|EdgeList], Tree3) + end + end, + Arguments = ?CODE:uses(Instruction), + lists:foldl(IterateArguments, Tree2, Arguments) + end, + Instructions = hipe_bb:code(?CFG:bb(CFG, Node)), + lists:foldl(IterateInstructions, Tree1, Instructions) + end, + NodeList = ?CFG:labels(CFG), + lists:foldl(IterateNodes, gb_trees:empty(), NodeList). + +%%----------------------------------------------------------------------------- +%% Procedure : scc/3 +%% Purpose : Do the symbolic execution of a cfg and compute the resulting +%% value-lattice, and reachability information (Environment). +%% This is the main loop that does a fixpoint computation of the +%% lattice-values for each variable and register. +%% Arguments : FlowWorkList - work list of control-flow edges +%% SSAWorkList - work list of ssa-edges +%% Environment - the environment that have been computed so far. +%% Returns : The environment after execution +%%----------------------------------------------------------------------------- + +scc([], [], Environment) -> + Environment; +%% Take an element from the FlowWorkList and process it +scc([{Source,Destination} | FlowWorkList], SSAWorkList, Environment) -> + case executable({Source, Destination}, Environment) of + true -> + scc(FlowWorkList, SSAWorkList, Environment); + false -> + Environment1 = mark_as_executable({Source,Destination}, Environment), + Code = extract_code(Destination, Environment), + {Environment2, Code1, ExtraSSA} = + visit_phi_nodes(Code, Destination, Environment1, []), + case handled(Destination, Environment2) of + true -> + scc(FlowWorkList, ExtraSSA ++ SSAWorkList, Environment2); + false -> + {MoreFlowDests, MoreSSAWork, Environment3} = + visit_expressions(Code1, Environment2), + MoreFlowWork = [{Destination, Node} || Node <- MoreFlowDests], + FlowWorkList1 = MoreFlowWork ++ FlowWorkList, + SSAWorkList1 = ExtraSSA ++ MoreSSAWork ++ SSAWorkList, + Environment4 = mark_as_handled(Destination, Environment3), + scc(FlowWorkList1, SSAWorkList1, Environment4) + end + end; +%% Take an element from the SSAWorkList and process it +scc([], [{Node, Instruction} | SSAWorkList], Environment) -> + case reachable(Node, Environment) of + true -> + case ?CODE:is_phi(Instruction) of + true -> + {Environment1, MoreSSA} = visit_phi(Instruction, Node, Environment), + scc([], MoreSSA ++ SSAWorkList, Environment1); + false -> + {MoreFlowDests, MoreSSAWork, Environment1} = + visit_expression(Instruction, Environment), + SSAWorkList1 = MoreSSAWork ++ SSAWorkList, + MoreFlowWork = [{Node, Destination} || Destination<-MoreFlowDests], + scc(MoreFlowWork, SSAWorkList1, Environment1) + end; + false -> + scc([], SSAWorkList, Environment) + end. + +%%----------------------------------------------------------------------------- +%% Procedure : update_cfg/1 +%% Purpose : Transforms the cfg into something more pleasant. +%% Here the mapping of variables & registers to lattice-values is +%% used to actually change the code. +%% Arguments : Environment - in which everything happens. +%% Returns : A new CFG. +%%----------------------------------------------------------------------------- + +update_cfg(Environment) -> + NodeList = get_nodelist(Environment), + CFG1 = update_nodes(NodeList, Environment), + %% why not hipe_???_ssa:remove_dead_code ? + CFG2 = ?CFG:remove_unreachable_code(CFG1), + CFG2. + +%%----------------------------------------------------------------------------- +%% Procedure : update_nodes/2 +%% Purpose : loop over all nodes in a list of nodes, ignoring any +%% non-reachable node. +%% Arguments : NodeList - the list of nodes. +%% Environment - in which everything happens. +%% Returns : a new cfg. +%%----------------------------------------------------------------------------- + +update_nodes([], Environment) -> + env__cfg(Environment); +update_nodes([Node | NodeList], Environment) -> + NewEnvironment = + case reachable(Node, Environment) of + true -> + Instructions = extract_code(Node, Environment), + Updater = fun(Instruction) -> + update_instruction(Instruction, Environment) + end, + NewInstructions = lists:flatmap(Updater, Instructions), + update_code(Node, NewInstructions, Environment); + false -> + Environment + end, + update_nodes(NodeList, NewEnvironment). + +%%----------------------------------------------------------------------------- +%% Procedure : update_code/3 +%% Purpose : Insert a list of new instructions into the cfg in the +%% environment +%% Arguments : Node - name of the bb whose instructions we replace. +%% NewInstructions - The list of new instructions +%% Env - The environment +%% Returns : A new environment +%%----------------------------------------------------------------------------- + +update_code(Node, NewInstructions, Environment) -> + CFG = env__cfg(Environment), + BB = ?CFG:bb(CFG, Node), + OrderedInstructions = put_phi_nodes_first(NewInstructions), + NewBB = hipe_bb:code_update(BB, OrderedInstructions), + NewCFG = ?CFG:bb_add(CFG, Node, NewBB), + Environment#env{cfg = NewCFG}. + +%%----------------------------------------------------------------------------- +%% Procedure : put_phi_nodes_first/1 +%% Purpose : Move all phi-instructions to the beginning of the basic block. +%% Arguments : Instructions - The list of instructions +%% Returns : A list of instructions where the phi-nodes are first. +%%----------------------------------------------------------------------------- + +put_phi_nodes_first(Instructions) -> + {PhiInstructions, OtherInstructions} = + partition(fun(X) -> ?CODE:is_phi(X) end, Instructions), + PhiInstructions ++ OtherInstructions. + +%%----------------------------------------------------------------------------- + +partition(Function, List) -> + partition(Function, List, [], []). + +partition(_Function, [], True, False) -> + {lists:reverse(True), lists:reverse(False)}; + +partition(Function, [Hd | Tail], True, False) -> + case Function(Hd) of + true -> + partition(Function, Tail, [Hd | True], False); + false -> + partition(Function, Tail, True, [Hd | False]) + end. + +%%----------------------------------------------------------------------------- +%% Procedure : visit_phi_nodes/4 +%% Purpose : visit all the phi-nodes in a bb and return the list of +%% remaining instructions, new ssa-edges and a new environment. +%% Arguments : [Inst|Insts] - The list of instructions in the bb +%% Node - Name of the current node. +%% Environment - the environment +%% SSAWork - the ssawork found so far. +%% Returns : {Environment, Instruction list, SSAWorkList} +%%----------------------------------------------------------------------------- + +visit_phi_nodes([], CurrentNode, _Environment, _SSAWork) -> + ?EXIT({"~w: visit_phi_nodes/4 Basic block contains no code", + ?MODULE, CurrentNode}); +visit_phi_nodes(Is = [Inst | Insts], Node, Environment, SSAWork) -> + case ?CODE:is_phi(Inst) of + true -> + {Environment1, NewSSA} = visit_phi(Inst, Node, Environment), + visit_phi_nodes(Insts, Node, Environment1, NewSSA ++ SSAWork); + false -> + {Environment, Is, SSAWork} + end. + +%%----------------------------------------------------------------------------- +%% Procedure : visit_phi/3 +%% Purpose : visit a phi-node +%% Arguments : PhiInstruction- The instruction +%% CurrentNode - Name of the current node. +%% Environment - the environment +%% Returns : {NewEnvironment, SSAWork} +%%----------------------------------------------------------------------------- + +visit_phi(PhiInstruction, CurrentNode, Environment) -> + ArgumentList = ?CODE:phi_arglist(PhiInstruction), + Value = get_phi_value(ArgumentList, CurrentNode, Environment, top), + Name = ?CODE:phi_dst(PhiInstruction), + {Environment1, SSAWork} = update_lattice_value({Name, Value}, Environment), + {Environment1, SSAWork}. + +%%----------------------------------------------------------------------------- +%% Procedure : get_phi_value/4 +%% Purpose : compute the result of a phi-function, taking care to ignore +%% edges that are not yet executable. +%% Arguments : ArgList - the list of arguments {Node, Value pair} +%% CurrentNode - the current node +%% Environment - well... +%% CurrentValue - the meet of the relevant already processed values +%% Returns : Integer, top or bottom +%%----------------------------------------------------------------------------- + +%% the arglist contains {predecessor, variable} elements. Remember +%% to be optimistic in this part, hopefully, topvalues will fall down +%% to become constants. Hence topvalues are more or less ignored here. +get_phi_value([], _CurrentNode, _Environment, CurrentValue) -> + CurrentValue; +get_phi_value([{PredecessorNode, Variable}| ArgList], + CurrentNode, + Environment, + CurrentValue) -> + case executable({PredecessorNode, CurrentNode}, Environment) of + true -> + NewValue = lookup_lattice_value(Variable, Environment), + case NewValue of + bottom -> + bottom; + top -> + get_phi_value(ArgList, CurrentNode, Environment, CurrentValue); + _ -> + case CurrentValue of + top -> + get_phi_value(ArgList, CurrentNode, Environment, NewValue); + _ -> + case (NewValue =:= CurrentValue) of + true -> + get_phi_value(ArgList, CurrentNode, Environment, NewValue); + false -> %% two different constants. + bottom + end + end + end; + false -> %% non-executable transitions don't affect the value. + get_phi_value(ArgList, CurrentNode, Environment, CurrentValue) + end. + +%%------------------------------ environment ---------------------------------- + +reachable(Node, Environment) -> + Predecessors = predecessors(Node, Environment), + Executable = fun(Pred) -> executable({Pred, Node}, Environment) end, + lists:any(Executable, Predecessors). + +%%----------------------------------------------------------------------------- + +mark_as_executable(Edge, Environment) -> + ExecutableFlags = env__executable_flags(Environment), + ExecutableFlags1 = gb_sets:add(Edge, ExecutableFlags), + Environment#env{executable_flags = ExecutableFlags1}. + +%%----------------------------------------------------------------------------- + +mark_as_handled(Node, Environment = #env{handled_blocks=Handled}) -> + NewHandled = gb_sets:add_element(Node, Handled), + Environment#env{handled_blocks=NewHandled}. + +handled(Node, #env{handled_blocks=Handled}) -> + gb_sets:is_element(Node, Handled). + +%%----------------------------------------------------------------------------- + +extract_code(Node, Environment) -> + CFG = env__cfg(Environment), + case ?CFG:bb(CFG, Node) of + not_found -> ?WARNING_MSG("Could not find label ~w.\n", [Node]), + []; + BB -> hipe_bb:code(BB) + end. + +%%----------------------------------------------------------------------------- + +predecessors(Node, Environment) -> + CFG = env__cfg(Environment), + ?CFG:pred(CFG, Node). + +%%----------------------------------------------------------------------------- + +executable(Edge, Environment) -> + ExecutableFlags = env__executable_flags(Environment), + gb_sets:is_member(Edge, ExecutableFlags). + +%%----------------------------------------------------------------------------- + +update_lattice_value({[], _NewValue}, Environment) -> + {Environment, []}; +update_lattice_value({Names, NewValue}, Environment) when is_list(Names) -> + Update = + fun(Dst, {Env, SSA}) -> + {NewEnv, NewSSA} = + update_lattice_value({Dst, NewValue}, Env), + {NewEnv, SSA ++ NewSSA} + end, + lists:foldl(Update, {Environment, []}, Names); +%% update_lattice_value({Name, {Res, N, Z, C, V} }, _) -> +%% ?EXIT({"inserting dumt grejs", {Name, {Res, N, Z, C, V} } }); +update_lattice_value({Name, NewValue}, Environment) -> + LatticeValues = env__lattice_values(Environment), + {LatticeValues1, SSAWork} = + case gb_trees:lookup(Name, LatticeValues) of + none -> + {gb_trees:insert(Name, NewValue, LatticeValues), + lookup_ssa_edges(Name, Environment)}; + {value, NewValue} -> + {LatticeValues, []}; + {value, _} -> + {gb_trees:update(Name, NewValue, LatticeValues), + lookup_ssa_edges(Name, Environment)} + end, + {Environment#env{lattice_values = LatticeValues1}, SSAWork}. + +%%----------------------------------------------------------------------------- + +lookup_ssa_edges(Variable, Environment) -> + SSAEdges = env__ssa_edges(Environment), + case gb_trees:lookup(Variable, SSAEdges) of + {value, X} -> + X; + _ -> % Unused variable + [] + end. + +%%----------------------------------------------------------------------------- + +get_nodelist(Environment) -> + CFG = env__cfg(Environment), + ?CFG:labels(CFG). + +%%----------------------------------------------------------------------------- + +-ifdef(DEBUG). + +%%----------------------------------------------------------------------------- +%%---------------------------------- DEBUG ------------------------------------ + +error(Text) -> + error(Text, []). + +error(Text, Data) -> + io:format("Internal compiler error in ~w\n",[?MODULE]), + io:format(Text, Data), + io:format("\n\n"), + halt(). + +%%----------------------------------------------------------------------------- + +print_environment(Environment) -> + io:format("============================================================\n"), + io:format("Executable flags: "), + print_executable_flags(env__executable_flags(Environment)), + io:format("Lattice values --->\n"), + print_lattice_values(env__lattice_values(Environment)), + io:format("SSA edges --->\n"), + print_ssa_edges(env__ssa_edges(Environment)), + io:format("============================================================\n"). + +%%----------------------------------------------------------------------------- + +print_executable_flags(ExecutableFlags) -> + ListOfFlags = gb_sets:to_list(ExecutableFlags), + Printer = fun ({Source, Destination}) -> + io:format("(~w, ~w), ", [Source, Destination]) end, + lists:foreach(Printer, ListOfFlags), + io:format("()\n"). + +%%----------------------------------------------------------------------------- + +print_lattice_values(LatticeValues) -> + ListOfLatticeValues = gb_trees:to_list(LatticeValues), + Printer = fun ({Key, Value}) -> + io:format("~w = ~w\n", [Key, Value]) end, + lists:foreach(Printer, ListOfLatticeValues). + +%%----------------------------------------------------------------------------- + +print_ssa_edges(SSAEdges) -> + ListOfSSAEdges = gb_trees:to_list(SSAEdges), + Printer = fun ({Key, Value}) -> + io:format("~w: ~w\n", [Key, Value]) end, + lists:foreach(Printer, ListOfSSAEdges). + +%%----------------------------------------------------------------------------- + +-endif. %% DEBUG + +%%----------------------------------------------------------------------------- + diff --git a/lib/hipe/ssa/hipe_ssa_copy_prop.inc b/lib/hipe/ssa/hipe_ssa_copy_prop.inc new file mode 100644 index 0000000000..311940a1fc --- /dev/null +++ b/lib/hipe/ssa/hipe_ssa_copy_prop.inc @@ -0,0 +1,198 @@ +%%% -*- Erlang -*- +%%% -*- erlang-indent-level: 2 -*- +%%% +%%% %CopyrightBegin% +%%% +%%% Copyright Ericsson AB 2003-2009. All Rights Reserved. +%%% +%%% The contents of this file are subject to the Erlang Public License, +%%% Version 1.1, (the "License"); you may not use this file except in +%%% compliance with the License. You should have received a copy of the +%%% Erlang Public License along with this software. If not, it can be +%%% retrieved online at http://www.erlang.org/. +%%% +%%% Software distributed under the License is distributed on an "AS IS" +%%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%%% the License for the specific language governing rights and limitations +%%% under the License. +%%% +%%% %CopyrightEnd% +%%% +%%%------------------------------------------------------------------- +%%% File : hipe_ssa_copy_prop.inc +%%% Author : Tobias Lindahl +%%% Description : Copy propagation on SSA form. +%%% +%%% Created : 4 Apr 2003 by Tobias Lindahl +%%%------------------------------------------------------------------- + +-export([cfg/1]). + +%%-------------------------------------------------------------------- +%% Two passes through the code visiting the blocks in reverse +%% postorder. The first pass binds all destinations of copying moves +%% to the sources, and the second propagates the copies and removes +%% the copying moves. +%% +%% Problem: +%% Since phi-nodes are implemented as instructions they are not +%% atomic. If we are not careful we can get the situation (after propagation): +%% +%% v0 = phi(v0, v2) +%% v1 = phi(v0, v3) +%% ^^ +%% where the underlined v0 really corresponds to the v0 before the first +%% phi-instruction. +%% +%% Solution: +%% * Find all dependencies between the uses of a phi-instruction to +%% the destination of any earlier phi-instruction in the same phi-node; +%% * Keep the copying move that defines the variable used in the +%% latter phi-instruction; and +%% * Do not propagate the copy into the phi-instruction +%% +%%-------------------------------------------------------------------- + +-spec cfg(#cfg{}) -> #cfg{}. + +cfg(Cfg) -> + Labels = ?cfg:reverse_postorder(Cfg), + {Info,PhiDep} = analyse(Labels, Cfg, gb_trees:empty(), gb_trees:empty()), + rewrite(Labels, Cfg, Info, PhiDep). + +analyse([Label|Left], Cfg, Info, PhiDep) -> + BB = ?cfg:bb(Cfg, Label), + Code = hipe_bb:code(BB), + NewPhiDep = get_phi_dep(Code, gb_sets:empty(), PhiDep), + NewInfo = analyse_code(Code, Info), + analyse(Left, Cfg, NewInfo, NewPhiDep); +analyse([], _Cfg, Info, PhiDep) -> + {Info,PhiDep}. + +get_phi_dep([I|Left], Defined, Dep) -> + case ?code:is_phi(I) of + true -> + Use = ?code:uses(I), + [Def] = ?code:defines(I), + NewDep = add_dep(Use, Defined, Dep), + get_phi_dep(Left, gb_sets:insert(Def, Defined), NewDep); + false -> + Dep + end; +get_phi_dep([], _Defined, Dep) -> + Dep. + +add_dep([Use|Left], Defined, Dep) -> + case gb_trees:lookup(Use, Dep) of + none -> + add_dep(Left, Defined, gb_trees:insert(Use, Defined, Dep)); + {value, Set} -> + NewSet = gb_sets:union(Defined, Set), + add_dep(Left, Defined, gb_trees:enter(Use, NewSet, Dep)) + end; +add_dep([], _Defined, Dep) -> + Dep. + +has_dep(Use, Def, Dep) -> + case gb_trees:lookup(Use, Dep) of + none -> + false; + {value, Set} -> + gb_sets:is_member(Def, Set) + end. + +analyse_code([I|Left], Info) -> + case ?code:is_move(I) of + true -> + NewInfo = get_info_move(I, Info), + analyse_code(Left, NewInfo); + false -> + analyse_code(Left, Info) + end; +analyse_code([], Info) -> + Info. + +get_info_move(I, Info) -> + case ?code:uses(I) of + [] -> %% Constant. + Info; + [Src] -> + add_binding(?code:defines(I), Src, Info) + end. + +rewrite([Label|Left], Cfg, Info, PhiDep) -> + BB = ?cfg:bb(Cfg, Label), + Code = hipe_bb:code(BB), + NewCode = rewrite_code(Code, Info, PhiDep, []), + NewBB = hipe_bb:code_update(BB, NewCode), + rewrite(Left, ?cfg:bb_add(Cfg, Label, NewBB), Info, PhiDep); +rewrite([], Cfg, _Info, _PhiDep) -> + Cfg. + +rewrite_code([I|Left], Info, PhiDep, Acc) -> + case ?code:is_move(I) of + true -> + Fun = fun(X, Y) -> ?code:mk_move(X, Y) end, + NewI = rewrite_move(I, Fun, Info, PhiDep), + rewrite_code(Left, Info, PhiDep, NewI++Acc); + false -> + NewI = rewrite_instr(I, Info, PhiDep), + rewrite_code(Left, Info, PhiDep, [NewI|Acc]) + end; +rewrite_code([], _Info, _PhiDep, Acc) -> + lists:reverse(Acc). + +rewrite_move(I, Fun, Info, PhiDep) -> + case ?code:uses(I) of + [] ->%% Constant move. Keep it! + [I]; + _ -> + Dst = hd(?code:defines(I)), + case gb_trees:lookup(Dst, Info) of + {value, Root} -> + case has_dep(Dst, Root, PhiDep) of + true -> %% Must keep the copying move! + [Fun(Dst, Root)]; + false -> + [] + end; + none -> + [] + end + end. + +rewrite_instr(I, Info, PhiDep) -> + rewrite_instr0(I, ?code:uses(I), Info, PhiDep, []). + +rewrite_instr0(I, [Key|Left], Info, PhiDep, UpdateInfo) -> + case gb_trees:lookup(Key, Info) of + none -> + rewrite_instr0(I, Left, Info, PhiDep, UpdateInfo); + {value, Root} -> + case gb_trees:lookup(Key, Info) of + {value, Root} -> + case has_dep(Key, Root, PhiDep) of + true -> %% Must keep Key! + rewrite_instr0(I, Left, Info, PhiDep, UpdateInfo); + false -> + rewrite_instr0(I, Left, Info, PhiDep, [{Key, Root}|UpdateInfo]) + end; + _ -> + rewrite_instr0(I, Left, Info, PhiDep, UpdateInfo) + end + end; +rewrite_instr0(I, [], _Info, _PhiDep, UpdateInfo) -> + ?code:subst(UpdateInfo, I). + +add_binding([Key|Left], Val, Info) -> + %% Make sure the key is bound to the end of any copy-chains. + NewInfo = + case gb_trees:lookup(Val, Info) of + {value, NewVal} -> + gb_trees:insert(Key, NewVal, Info); + none -> + gb_trees:insert(Key, Val, Info) + end, + add_binding(Left, Val, NewInfo); +add_binding([], _, Info) -> + Info. diff --git a/lib/hipe/ssa/hipe_ssa_liveness.inc b/lib/hipe/ssa/hipe_ssa_liveness.inc new file mode 100644 index 0000000000..05c8a88059 --- /dev/null +++ b/lib/hipe/ssa/hipe_ssa_liveness.inc @@ -0,0 +1,328 @@ +%% -*- Erlang -*- +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2004-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% GENERIC MODULE TO PERFORM LIVENESS ANALYSIS ON SSA FORM +%% +%% Exports: +%% ~~~~~~~ +%% analyze(CFG) - returns a liveness analysis of CFG. +%% liveout(Liveness, Label) - returns the list of variables that are +%% live at exit from basic block named Label. +%% livein(Liveness, Label) - returns the list of variables that are +%% live on entry to the basic block named Label. +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%% Uncomment the following if this is ever needed as an independent module +%% +-ifdef(LIVENESS_NEEDED). +-export([ssa_liveness__analyze/1, + ssa_liveness__livein/2]). +%% ssa_liveness__livein/3], +%% ssa_liveness__liveout/2]). +-endif. +%% -ifdef(DEBUG_LIVENESS). +%% -export([pp_liveness/1]). +%% -endif. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% Interface functions that MUST be implemented in the supporting files +%% +%% In the CFG file: +%% ---------------- +%% - bb(CFG, L) -> BasicBlock, extract a basic block from a cfg. +%% - postorder(CFG) -> [Labels], the labels of the cfg in postorder +%% - succ(CFG, L) -> [Labels], +%% - function(CFG) -> {M,F,A} +%% +%% In the CODE file: +%% ----------------- +%% - uses(Instr) -> +%% - defines(Instr) -> +%% - is_phi(Instr) -> Boolean +%% - phi_arglist(Instr) -> [{Pred, Var}] + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% The generic liveness analysis on SSA form +%% +ssa_liveness__analyze(CFG) -> + PO = ?CFG:postorder(CFG), + InitLiveness = liveness_init(init(PO, CFG)), + merry_go_around(PO, InitLiveness). + +%% +%% The fixpoint iteration +%% + +merry_go_around(Labels, Liveness) -> + case doit_once(Labels, Liveness) of + {fixpoint, NewLiveness} -> + NewLiveness; + {value, NewLiveness} -> + merry_go_around(Labels, NewLiveness) + end. + +%% +%% One iteration +%% + +doit_once(Labels, Liveness) -> + doit_once(Labels, Liveness, true). + +doit_once([], Liveness, FixPoint) -> + if FixPoint -> {fixpoint, Liveness}; + true -> {value, Liveness} + end; +doit_once([L|Ls], Liveness, FixPoint) -> + LiveOut = join_livein(Liveness, L), + NewLiveness = update_liveout(L, LiveOut, Liveness), + Kill = set_subtract(LiveOut, kill(L, NewLiveness)), + LiveIn = set_union(Kill, gen(L, NewLiveness)), + case update_livein(L, LiveIn, NewLiveness) of + fixpoint -> doit_once(Ls, NewLiveness, FixPoint); + {value, NewLiveness1} -> doit_once(Ls, NewLiveness1, false) + end. + +%% +%% updates liveness for a basic block +%% + +update_livein(Label, NewLiveIn, Liveness) -> + {GKD, LiveIn, LiveOut, Succ} = liveness_lookup(Label, Liveness), + case LiveIn of + NewLiveIn -> + fixpoint; + _ -> + {value, liveness_update(Label, {GKD,NewLiveIn,LiveOut,Succ}, Liveness)} + end. + +update_liveout(Label, NewLiveOut, Liveness) -> + {GKD, LiveIn, _LiveOut, Succ} = liveness_lookup(Label, Liveness), + liveness_update(Label, {GKD,LiveIn,NewLiveOut,Succ}, Liveness). + +%% +%% Join live in to get the new live out. +%% + +join_livein(Liveness, L) -> + Succ = successors(L, Liveness), + case Succ of + [] -> % special case if no successors + gb_sets:from_list(liveout_no_succ()); + _ -> + join_livein1(L, Succ, Liveness) + end. + +join_livein1(Pred, Labels, Liveness) -> + join_livein1(Pred, Labels, Liveness, new_set()). + +join_livein1(_Pred, [], _Liveness, Live) -> + Live; +join_livein1(Pred, [L|Ls], Liveness, Live) -> + OldLivein = livein_set(Liveness, L, Pred), + NewLive = set_union(OldLivein, Live), + join_livein1(Pred, Ls, Liveness, NewLive). + + +ssa_liveness__liveout(Liveness, L) -> + {_GKD, _LiveIn, LiveOut, Successors} = liveness_lookup(L, Liveness), + case Successors of + [] -> % special case if no successors + liveout_no_succ(); + _ -> + set_to_list(LiveOut) + end. + +-ifdef(LIVENESS_NEEDED). +ssa_liveness__livein(Liveness, L) -> + set_to_list(livein_set(Liveness, L)). + +%% ssa_liveness__livein(Liveness, L, Pred) -> +%% set_to_list(livein_set(Liveness, L, Pred)). + +livein_set(Liveness, L) -> + {{_Gen,_Kill,{TotalDirGen, _DirGen}}, LiveIn, _LiveOut, _Successors} = + liveness_lookup(L, Liveness), + set_union(TotalDirGen, LiveIn). +-endif. + +livein_set(Liveness, L, Pred) -> + {{_Gen,_Kill,{_TotalDirGen, DirGen}}, LiveIn, _LiveOut, _Successors} = + liveness_lookup(L, Liveness), + case gb_trees:lookup(Pred, DirGen) of + none -> + LiveIn; + {value, LiveInFromPred} -> + set_union(LiveInFromPred, LiveIn) + end. + +successors(L, Liveness) -> + {_GKD, _LiveIn, _LiveOut, Successors} = liveness_lookup(L, Liveness), + Successors. + +kill(L, Liveness) -> + {{_Gen,Kill,_DirGen},_LiveIn,_LiveOut,_Successors} = + liveness_lookup(L, Liveness), + Kill. + +gen(L, Liveness) -> + {{Gen,_Kill,_DirGen},_LiveIn,_LiveOut,_Successors} = + liveness_lookup(L, Liveness), + Gen. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% init returns a list of: {Label, {{Gen, Kill}, LiveIn, Successors}} +%% - Label is the name of the basic block. +%% - Gen is the set of varables that are used by this block. +%% - Kill is the set of varables that are defined by this block. +%% - LiveIn is the set of variables that are alive at entry to the +%% block (initially empty). +%% - Successors is a list of the successors to the block. + +init([], _) -> + []; +init([L|Ls], CFG) -> + BB = ?CFG:bb(CFG, L), + Code = hipe_bb:code(BB), + Succ = ?CFG:succ(CFG, L), + {Gen, Kill} = make_bb_transfer(Code, Succ), + DirectedGen = get_directed_gen(Code), + [{L, {{Gen, Kill, DirectedGen}, new_set(), new_set(), Succ}} + | init(Ls, CFG)]. + +make_bb_transfer([], _Succ) -> + {new_set(), new_set()}; % {Gen, Kill} +make_bb_transfer([I|Is], Succ) -> + {Gen, Kill} = make_bb_transfer(Is, Succ), + case ?CODE:is_phi(I) of + true -> + InstrKill = set_from_list(?CODE:defines(I)), + Gen1 = set_subtract(Gen, InstrKill), + Kill1 = set_union(Kill, InstrKill), + {Gen1, Kill1}; + false -> + InstrGen = set_from_list(?CODE:uses(I)), + InstrKill = set_from_list(?CODE:defines(I)), + Gen1 = set_subtract(Gen, InstrKill), + Gen2 = set_union(Gen1, InstrGen), + Kill1 = set_union(Kill, InstrKill), + Kill2 = set_subtract(Kill1, InstrGen), + {Gen2, Kill2} + end. + +get_directed_gen(Code) -> + Map = get_directed_gen_1(Code), + TotalGen = lists:foldl(fun({_Pred, Gen}, Acc) -> + set_union(Gen, Acc) + end, new_set(), gb_trees:to_list(Map)), + {TotalGen, Map}. + +get_directed_gen_1([I|Left])-> + case ?CODE:is_phi(I) of + false -> + gb_trees:empty(); + true -> + Map = get_directed_gen_1(Left), + ArgList = ?CODE:phi_arglist(I), + lists:foldl(fun update_directed_gen/2, Map, ArgList) + end. + +update_directed_gen({Pred, Var}, Map)-> + case gb_trees:lookup(Pred, Map) of + none -> gb_trees:insert(Pred, set_from_list([Var]), Map); + {value, Set} -> gb_trees:update(Pred, set_add(Var, Set), Map) + end. + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% liveness +%% + +liveness_init(List) -> + liveness_init1(List, gb_trees:empty()). + +liveness_init1([{Label, Info}|Left], Map) -> + liveness_init1(Left, gb_trees:insert(Label, Info, Map)); +liveness_init1([], Map) -> + Map. + +liveness_lookup(Label, Map) -> + {value, Info} = gb_trees:lookup(Label, Map), + Info. + +liveness_update(Label, NewInfo, Map) -> + gb_trees:update(Label, NewInfo, Map). + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% Sets +%% + +new_set() -> + gb_sets:empty(). + +set_union(S1, S2) -> + gb_sets:union(S1, S2). + +set_subtract(S1, S2) -> + gb_sets:subtract(S1, S2). + +set_from_list(List) -> + gb_sets:from_list(List). + +set_to_list(Set) -> + gb_sets:to_list(Set). + +set_add(Var, Set) -> + gb_sets:add(Var, Set). + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% Pretty printer +%% + +-ifdef(DEBUG_LIVENESS). + +pp_liveness(CFG) -> + io:format("Liveness for ~p:\n", [?CFG:function(CGF)]), + Liveness = analyze(CFG), + RevPostorder = lists:reverse(?CFG:postorder(CFG)), + Edges = [{X, Y} || X <- RevPostorder, Y <- ?CFG:succ(CFG, X)], + pp_liveness_edges(Edges, Liveness). + +pp_liveness_edges([{From, To}|Left], Liveness)-> + LiveIn = livein(Liveness, To, From), + io:format("Label ~w -> Label ~w: ~p\n", [From, To, LiveIn]), + LiveOut = liveout(Liveness, From), + io:format("Total live out from Label ~w: ~p\n", [From, LiveOut]), + pp_liveness_edges(Left, Liveness); +pp_liveness_edges([], _Liveness) -> + ok. + +-endif. diff --git a/lib/hipe/tools/Makefile b/lib/hipe/tools/Makefile new file mode 100644 index 0000000000..6ce5cb1b8b --- /dev/null +++ b/lib/hipe/tools/Makefile @@ -0,0 +1,111 @@ +# +# %CopyrightBegin% +# +# Copyright Ericsson AB 2002-2009. All Rights Reserved. +# +# The contents of this file are subject to the Erlang Public License, +# Version 1.1, (the "License"); you may not use this file except in +# compliance with the License. You should have received a copy of the +# Erlang Public License along with this software. If not, it can be +# retrieved online at http://www.erlang.org/. +# +# Software distributed under the License is distributed on an "AS IS" +# basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +# the License for the specific language governing rights and limitations +# under the License. +# +# %CopyrightEnd% +# + +ifndef EBIN +EBIN = ../ebin +endif + +ifndef DOCS +DOCS = ../doc +endif + +include $(ERL_TOP)/make/target.mk +include $(ERL_TOP)/make/$(TARGET)/otp.mk + +# ---------------------------------------------------- +# Application version +# ---------------------------------------------------- +include ../vsn.mk +VSN=$(HIPE_VSN) + +# ---------------------------------------------------- +# Release directory specification +# ---------------------------------------------------- +RELSYSDIR = $(RELEASE_PATH)/lib/hipe-$(VSN) + +# ---------------------------------------------------- +# Target Specs +# ---------------------------------------------------- +MODULES = hipe_tool hipe_profile hipe_ceach hipe_jit +# hipe_timer + +HRL_FILES= +ERL_FILES= $(MODULES:%=%.erl) +TARGET_FILES= $(MODULES:%=$(EBIN)/%.$(EMULATOR)) +DOC_FILES= $(MODULES:%=$(DOCS)/%.html) + +# APP_FILE= +# APP_SRC= $(APP_FILE).src +# APP_TARGET= $(EBIN)/$(APP_FILE) +# +# APPUP_FILE= +# APPUP_SRC= $(APPUP_FILE).src +# APPUP_TARGET= $(EBIN)/$(APPUP_FILE) + +# ---------------------------------------------------- +# FLAGS +# ---------------------------------------------------- + +include ../native.mk + +ERL_COMPILE_FLAGS += +warn_exported_vars +warn_missing_spec +warn_untyped_record + +# ---------------------------------------------------- +# Targets +# ---------------------------------------------------- + +debug opt: $(TARGET_FILES) + +docs: $(DOC_FILES) + +clean: + rm -f $(TARGET_FILES) + rm -f core + +$(DOCS)/%.html:%.erl + erl -noshell -run edoc_run file '"$<"' '[{dir, "$(DOCS)"}]' -s init stop + +# ---------------------------------------------------- +# Special Build Targets +# ---------------------------------------------------- + + + +# ---------------------------------------------------- +# Release Target +# ---------------------------------------------------- +include $(ERL_TOP)/make/otp_release_targets.mk + +release_spec: opt + $(INSTALL_DIR) $(RELSYSDIR)/ebin + $(INSTALL_DATA) $(TARGET_FILES) $(RELSYSDIR)/ebin + +release_docs_spec: + +distclean: clean +realclean: clean + + +# ---------------------------------------------------- +# Include dependencies +# ---------------------------------------------------- + +$(EBIN)/hipe_ceach.beam: ../main/hipe.hrl +$(EBIN)/hipe_tool.beam: ../main/hipe.hrl + diff --git a/lib/hipe/tools/hipe_ceach.erl b/lib/hipe/tools/hipe_ceach.erl new file mode 100644 index 0000000000..b29615e169 --- /dev/null +++ b/lib/hipe/tools/hipe_ceach.erl @@ -0,0 +1,74 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2002-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Copyright (c) 2001 by Erik Johansson. All Rights Reserved +%% ==================================================================== +%% Module : hipe_ceach +%% Purpose : Compile each function in a module, possibly applying a +%% fun between each compilation. Useful for bug hunting by +%% pinpointing a function that when compiled causes a bug. +%% Notes : +%% History : * 2001-12-11 Erik Johansson (happi@it.uu.se): Created. +%% ==================================================================== +%% Exports : +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +-module(hipe_ceach). + +-export([c/1, c/2, c/3]). + +-include("../main/hipe.hrl"). + +%%--------------------------------------------------------------------- + +-spec c(atom()) -> 'ok'. + +c(M) -> + lists:foreach(fun({F,A}) -> comp(M, F, A) end, + M:module_info(functions)). + +-spec c(atom(), comp_options()) -> 'ok'. + +c(M, Opts) -> + lists:foreach(fun({F,A}) -> comp(M, F, A, Opts) end, + M:module_info(functions)). + +-spec c(atom(), comp_options(), fun(() -> any())) -> 'ok'. + +c(M, Opts, Fn) -> + lists:foreach(fun({F,A}) -> comp(M, F, A, Opts), Fn() end, + M:module_info(functions)). + +-spec comp(atom(), atom(), arity()) -> 'ok'. + +comp(M, F, A) -> + io:format("~w:~w/~w... ", [M, F, A]), + MFA = {M, F, A}, + {ok, MFA} = hipe:c(MFA), + io:format("OK\n"). + +-spec comp(atom(), atom(), arity(), comp_options()) -> 'ok'. + +comp(M, F, A, Opts) -> + io:format("~w:~w/~w... ", [M, F, A]), + MFA = {M, F, A}, + {ok, MFA} = hipe:c(MFA, Opts), + io:format("OK\n"). diff --git a/lib/hipe/tools/hipe_jit.erl b/lib/hipe/tools/hipe_jit.erl new file mode 100644 index 0000000000..0ac84388ae --- /dev/null +++ b/lib/hipe/tools/hipe_jit.erl @@ -0,0 +1,87 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2002-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Copyright (c) 2002 by Erik Johansson. +%% ==================================================================== +%% Module : hipe_jit +%% Purpose : +%% Notes : +%% History : * 2002-03-14 Erik Johansson (happi@it.uu.se): Created. +%% ==================================================================== +%% @doc +%% A tool to enable using the HiPE compiler as an automatic JIT +%% compiler rather than a user-controlled one. +%% @end +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +-module(hipe_jit). + +-export([start/0]). + +-record(state, {mode = start :: 'sleep' | 'start' | 'wait', + threshold = 5000 :: non_neg_integer(), + sleep = 5000 :: non_neg_integer(), + time = 1000 :: non_neg_integer()}). + +%%--------------------------------------------------------------------- + +-spec start() -> pid(). +%% @doc +%% Starts an Erlang process which calls the HiPE compiler every +%% now and then (when it sees it fit to do so). +%% @end +start() -> + spawn(fun () -> loop(#state{}) end). + +loop(State) -> + case State#state.mode of + start -> + start(State); + wait -> + wait(State); + _ -> + sleep(State) + end. + +sleep(State) -> + receive + quit -> ok + after State#state.sleep -> + loop(State#state{mode=start}) + end. + +start(State) -> + catch hipe_profile:prof(), + catch hipe_profile:clear(), + loop(State#state{mode=wait}). + +wait(State) -> + receive + quit -> ok + after State#state.time -> + R = [M || {M,C} <- (catch hipe_profile:mods_res()), + C > State#state.threshold], + catch hipe_profile:prof_off(), + lists:foreach(fun(M) -> + io:format("Compile ~w\n",[M]), + hipe:c(M,[o2,verbose]) + end, R) + end, + loop(State#state{mode=sleep}). diff --git a/lib/hipe/tools/hipe_profile.erl b/lib/hipe/tools/hipe_profile.erl new file mode 100644 index 0000000000..7566acb8f4 --- /dev/null +++ b/lib/hipe/tools/hipe_profile.erl @@ -0,0 +1,191 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2002-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Copyright (c) 2001 by Erik Johansson. All Rights Reserved +%% Time-stamp: <2008-04-20 14:53:42 richard> +%% ==================================================================== +%% Module : hipe_profile +%% Purpose : +%% History : * 2001-07-12 Erik Johansson (happi@it.uu.se): Created. +%% ==================================================================== +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +-module(hipe_profile). + +-export([%% profile/1, mods_profile/1, + prof/0, prof_off/0, clear/0, res/0, + mods_res/0, + %% clear_module/1, res_module/1, + prof_module/1, prof_module_off/1]). + +%% %% @spec mods_profile(F) -> [{mod(),calls()}] +%% %% F = () -> term() +%% %% mod() = atom() +%% %% calls()= integer() +%% %% +%% %% @doc Returns the number of calls per module generated by +%% %% applying F(). +%% %% The resulting lists is sorted with the most called +%% %% module first. +%% mods_profile(F) -> +%% F(), +%% prof(), +%% clear(), +%% F(), +%% R = mods_res(), +%% prof_off(), +%% R. + +-spec mods_res() -> [{atom(), non_neg_integer()}]. +%% @doc Returns the number of calls per module currently +%% recordeed since hipe_bifs:call_count_on(). +%% The resulting list is sorted with the most called +%% module first. +mods_res() -> + lists:reverse(lists:keysort(2, calls())). + +-spec calls() -> [{atom(), non_neg_integer()}]. +%% @doc Returns the number of calls per module currently +%% recordeed since hipe_bifs:call_count_on(). +calls() -> + [{Mod, total_calls(Mod)} || Mod <- mods(), + total_calls(Mod) > 1, + Mod =/= hipe_profile]. + +%% %% @spec profile(F) -> [{mfa(),calls()}] +%% %% F = () -> term() +%% %% mfa() = {mod(),function(),arity()} +%% %% function() = atom() +%% %% arity() = intger() +%% %% +%% %% @doc Returns the number of calls per module generated by +%% %% applying F(). +%% %% The resulting lists is sorted with the most called +%% %% module first. +%% profile(F) -> +%% %% Make sure all code is loaded. +%% F(), +%% %% Turn profiling on. +%% prof(), +%% clear(), +%% %% Apply the closure to profile. +%% F(), +%% %% Get result. +%% R = res(), +%% %% Turn of profiling. +%% prof_off(), +%% R. + +-spec prof() -> 'ok'. +%% @doc Turns on profiling of all loaded modules. +prof() -> + lists:foreach(fun prof_module/1, mods()). + +-spec prof_off() -> 'ok'. +%% @doc Turns off profiling of all loaded modules. + prof_off() -> + lists:foreach(fun prof_module_off/1, mods()). + +-spec clear() -> 'ok'. +%% @doc Clears all counters. +clear() -> + lists:foreach(fun clear_module/1, mods()). + +-spec res() -> [{mfa(), non_neg_integer()}]. +%% @doc Returns a list of the numbers of calls to each profiled function. +%% The list is sorted with the most called function first. +res() -> + lists:reverse(lists:keysort(2, lists:flatten([res_module(M) || M <- mods()]))). + +%% -------------------------------------------------------------------- +-spec mods() -> [atom()]. +%% @doc Returns a list of all loaded modules. +%@ -------------------------------------------------------------------- + +mods() -> + [Mod || {Mod,_} <- code:all_loaded()]. + +%% -------------------------------------------------------------------- +-spec prof_module(atom()) -> 'ok'. +%% @doc Turns on profiling for given module. +%@ ____________________________________________________________________ + +prof_module(Mod) -> + Funs = Mod:module_info(functions), + lists:foreach(fun ({F,A}) -> catch hipe_bifs:call_count_on({Mod,F,A}) end, + Funs), + ok. + +%% -------------------------------------------------------------------- +-spec prof_module_off(atom()) -> 'ok'. +%% @doc Turns off profiling of the module Mod. +%@ -------------------------------------------------------------------- + +prof_module_off(Mod) -> + Funs = Mod:module_info(functions), + lists:foreach(fun ({F,A}) -> catch hipe_bifs:call_count_off({Mod,F,A}) end, + Funs), + ok. + +%% -------------------------------------------------------------------- +-spec clear_module(atom()) -> 'ok'. +%% @doc Clears the call counters for all functions in module Mod. +%@ -------------------------------------------------------------------- + +clear_module(Mod) -> + Funs = Mod:module_info(functions), + lists:foreach(fun ({F,A}) -> catch hipe_bifs:call_count_clear({Mod,F,A}) end, + Funs), + ok. + +%% -------------------------------------------------------------------- +-spec res_module(atom()) -> [{mfa(), non_neg_integer()}]. +%% @doc Returns the number of profiled calls to each function (MFA) +%% in the module Mod. +%@ -------------------------------------------------------------------- + +res_module(Mod) -> + Fun = fun ({F,A}) when is_atom(F), is_integer(A) -> + MFA = {Mod,F,A}, + {MFA, try hipe_bifs:call_count_get(MFA) of + N when is_integer(N) -> N; + false -> 0 + catch + _:_ -> 0 + end + } + end, + lists:reverse(lists:keysort(2, [Fun(FA) || FA <- Mod:module_info(functions)])). + +-spec total_calls(atom()) -> non_neg_integer(). + +total_calls(Mod) -> + Funs = Mod:module_info(functions), + SumF = fun ({F,A}, Acc) -> + MFA = {Mod,F,A}, + try hipe_bifs:call_count_get(MFA) of + N when is_integer(N) -> N+Acc; + false -> Acc + catch + _:_ -> Acc + end; + (_, Acc) -> Acc + end, + lists:foldl(SumF, 0, Funs). diff --git a/lib/hipe/tools/hipe_timer.erl b/lib/hipe/tools/hipe_timer.erl new file mode 100644 index 0000000000..03cc358f17 --- /dev/null +++ b/lib/hipe/tools/hipe_timer.erl @@ -0,0 +1,159 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2002-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Copyright (c) 2001 by Erik Johansson. All Rights Reserved +%% Time-stamp: <2008-04-20 14:53:36 richard> +%% ==================================================================== +%% Module : hipe_timer +%% Purpose : +%% Notes : +%% History : * 2001-03-15 Erik Johansson (happi@it.uu.se): Created. +%% ==================================================================== +%% Exports : +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +-module(hipe_timer). + +-export([tr/1, t/1, timer/1, time/1, empty_time/0]). +-export([advanced/2]). + +t(F) -> + {EWT,ERT} = empty_time(), + {WT,RT} = time(F), + {WT-EWT,(RT-ERT)/1000}. + +tr(F) -> + {EWT,ERT} = empty_time(), + {R,{WT,RT}} = timer(F), + {R,{WT-EWT,(RT-ERT)/1000}}. + +empty_time() -> + {WT1,WT2,WT3} = erlang:now(), + {A,_} = erlang:statistics(runtime), + {WT12,WT22,WT32} = erlang:now(), + {B,_} = erlang:statistics(runtime), + {(WT12-WT1)*1000000+(WT22-WT2)+(WT32-WT3)/1000000,B-A}. + +time(F) -> + {WT1,WT2,WT3} = erlang:now(), + {A,_} = erlang:statistics(runtime), + F(), + {WT12,WT22,WT32} = erlang:now(), + {B,_} = erlang:statistics(runtime), + {(WT12-WT1)*1000000+(WT22-WT2)+(WT32-WT3)/1000000,B-A}. + +timer(F) -> + {WT1,WT2,WT3} = erlang:now(), + {A,_} = erlang:statistics(runtime), + R = F(), + {WT12,WT22,WT32} = erlang:now(), + {B,_} = erlang:statistics(runtime), + {R,{(WT12-WT1)*1000000+(WT22-WT2)+(WT32-WT3)/1000000,B-A}}. + +advanced(_Fun, I) when I < 2 -> false; +advanced(Fun, Iterations) -> + R = Fun(), + Measurements = [t(Fun) || _ <- lists:seq(1, Iterations)], + {Wallclock, RunTime} = split(Measurements), + WMin = lists:min(Wallclock), + RMin = lists:min(RunTime), + WMax = lists:max(Wallclock), + RMax = lists:max(RunTime), + WMean = mean(Wallclock), + RMean = mean(RunTime), + WMedian = median(Wallclock), + RMedian = median(RunTime), + WVariance = variance(Wallclock), + RVariance = variance(RunTime), + WStddev = stddev(Wallclock), + RStddev = stddev(RunTime), + WVarCoff = 100 * WStddev / WMean, + RVarCoff = 100 * RStddev / RMean, + WSum = lists:sum(Wallclock), + RSum = lists:sum(RunTime), + [{wallclock,[{min, WMin}, + {max, WMax}, + {mean, WMean}, + {median, WMedian}, + {variance, WVariance}, + {stdev, WStddev}, + {varcoff, WVarCoff}, + {sum, WSum}, + {values, Wallclock}]}, + {runtime,[{min, RMin}, + {max, RMax}, + {mean, RMean}, + {median, RMedian}, + {variance, RVariance}, + {stdev, RStddev}, + {varcoff, RVarCoff}, + {sum, RSum}, + {values, RunTime}]}, + {iterations, Iterations}, + {result, R}]. + +split(M) -> + split(M, [], []). + +split([{W,R}|More], AccW, AccR) -> + split(More, [W|AccW], [R|AccR]); +split([], AccW, AccR) -> + {AccW, AccR}. + +mean(L) -> + mean(L, 0, 0). + +mean([V|Vs], No, Sum) -> + mean(Vs, No+1, Sum+V); +mean([], No, Sum) when No > 0 -> + Sum/No; +mean([], _No, _Sum) -> + exit(empty_list). + +median(L) -> + S = length(L), + SL = lists:sort(L), + case even(S) of + true -> + (lists:nth((S div 2), SL) + lists:nth((S div 2) + 1, SL)) / 2; + false -> + lists:nth((S div 2), SL) + end. + +even(S) -> + (S band 1) =:= 0. + +%% diffs(L, V) -> +%% [X - V || X <- L]. + +square_diffs(L, V) -> + [(X - V) * (X - V) || X <- L]. + +variance(L) -> + Mean = mean(L), + N = length(L), + if N > 1 -> + lists:sum(square_diffs(L,Mean)) / (N-1); + true -> exit('too few values') + end. + +stddev(L) -> + math:sqrt(variance(L)). diff --git a/lib/hipe/tools/hipe_tool.erl b/lib/hipe/tools/hipe_tool.erl new file mode 100644 index 0000000000..ae1cad06cc --- /dev/null +++ b/lib/hipe/tools/hipe_tool.erl @@ -0,0 +1,513 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2002-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Copyright (c) 2002 by Erik Johansson. +%% ==================================================================== +%% Module : hipe_tool +%% Purpose : +%% Notes : +%% History : * 2002-03-13 Erik Johansson (happi@it.uu.se): Created. +%% ==================================================================== +%% Exports : +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +-module(hipe_tool). + +-export([start/0]). + +%%--------------------------------------------------------------------- + +-include("../main/hipe.hrl"). + +%%--------------------------------------------------------------------- + +-define(WINDOW_WIDTH, 920). +-define(WINDOW_HEIGHT, 460). +-define(DEFAULT_BG_COLOR, {217,217,217}). +-define(POLL_INTERVAL, 5000). +-define(FONT, {screen, 12}). +-define(HEADER_FONT, {screen, [bold], 12}). +-define(NORMAL_FG_COLOR, {0,0,0}). + +%%--------------------------------------------------------------------- + +-type fa() :: {atom(), arity()}. % {Fun,Arity} +-type fa_address() :: {atom(), arity(), non_neg_integer()}. % {F,A,Address} + +%%--------------------------------------------------------------------- + +-record(state, {win_created = false :: boolean(), + mindex = 0 :: integer(), + mod :: module(), + funs = [] :: [fa()], + mods = [] :: [module()], + options = [o2] :: comp_options(), + compiling = false :: 'false' | pid() + }). + +%%--------------------------------------------------------------------- + +-spec start() -> pid(). + +start() -> + spawn(fun () -> init() end). + +init() -> + process_flag(trap_exit, true), + gs:start(), + S = init_window(#state{}), + loop(S). + +-spec loop(#state{}) -> no_return(). + +loop(State) -> + receive + {gs, code_listbox, click, Data, [Idx, Txt | _]} -> + NewState = update_module_box(State,Idx,Data,Txt), + loop(NewState); + {gs, module_listbox, click, Data, [Idx, _Txt | _]} -> + NewState = update_fun(State,Idx,Data), + loop(NewState); + {gs, compmod, click, _, _} -> + loop(compile(State)); + {gs, prof, click, [], ["Turn off\nProfiling"]} -> + hipe_profile:prof_module_off(State#state.mod), + loop(update_module_box(State,State#state.mindex,State#state.mods,"")); + {gs, prof, click, [], _} -> + hipe_profile:prof_module(State#state.mod), + loop(update_module_box(State,State#state.mindex,State#state.mods,"")); + {gs, win, configure, _, _} -> + gs:config(win, [{width, ?WINDOW_WIDTH}, {height, ?WINDOW_HEIGHT}]), + loop(State); + + show_window when State#state.win_created =:= true -> + gs:config(win, [raise]), + loop(State); + show_window when State#state.win_created =:= false -> + loop((init_window(State))#state{win_created = true}); + + {gs, _Id, click, close_menu, _Args} -> + gs:destroy(win), + loop(State#state{win_created = false}); + {gs, _Id, keypress, _Data, [c, _, 0, 1 | _]} -> + gs:destroy(win), + loop(State#state{win_created = false}); + {gs, _Id, keypress, _Data, ['C', _, 1, 1 | _]} -> + gs:destroy(win), + loop(State#state{win_created = false}); + {gs, _Id, keypress, _Data, _Args} -> + loop(State); + {gs, _, destroy, _, _} -> + loop(State#state{win_created = false}); + + {compilation_done, _Res, Sender} -> + case State#state.compiling of + Sender -> + catch gs:config(compmod, [{enable, true}]), + update_text(compiling, ""), + loop(update_module_box(State, + State#state.mindex, + State#state.mods, "")); + _ -> + loop(State) + end; + + {'EXIT', _Pid, _Reason} -> + exit(normal); + _Other -> + io:format("HiPE window received message ~p ~n", [_Other]), + loop(State) + after + ?POLL_INTERVAL -> + loop(update_code_listbox(State)) + end. + +-spec init_window(#state{}) -> #state{}. + +init_window(State) -> + create_window(State), + gs:config(win, [{map,true}]), + update_code_listbox(State#state{win_created = true}). + +-spec create_window(#state{}) -> 'ok'. + +create_window(State) -> + gs:window(win, gs:start(), [{width, ?WINDOW_WIDTH}, + {height, ?WINDOW_HEIGHT}, + {bg, ?DEFAULT_BG_COLOR}, + {title, "[HiPE] Code list"}, + {configure, true}, + {destroy, true}, + {cursor, arrow}, + {keypress, true} + ]), + create_menu(), + Xpos = 4, + Ypos1 = 60, + Width = (?WINDOW_WIDTH - (Xpos*4)) div 3, + create_labels([{mods,Ypos1-20,"Loaded Modules"}], Xpos + 1 + 3), + Xpos2 = Xpos*2+Width, + create_labels([{mod,Ypos1-20,"Module:"++atom_to_list(State#state.mod)}, + {ver,Ypos1,""}, + {time,Ypos1+20,""}, + {native,Ypos1+40,""}, + {compiling,Ypos1+60,""}], Xpos2), + create_labels([{function,Ypos1-20,"Function:"}, + {nativefun,Ypos1,""}], Xpos*3+Width*2), + Ypos = 240, + Height1 = ?WINDOW_HEIGHT - Ypos1 - Xpos, + Height = ?WINDOW_HEIGHT - Ypos - Xpos, + gs:listbox(code_listbox, win, [{x, Xpos}, + {y, Ypos1}, + {width, Width}, + {height, Height1}, + {bg, {255,255,255}}, + {vscroll, right}, + {hscroll, true}, + {click, true}]), + gs:listbox(module_listbox, win, [{x, Xpos*2+Width}, + {y, Ypos}, + {width, Width}, + {height, Height}, + {bg, {255,255,255}}, + {vscroll, right}, + {hscroll, true}, + {click, true}]), + gs:listbox(profile_listbox, win, [{x, Xpos*3+Width*2}, + {y, Ypos1+40}, + {width, Width}, + {height, Height-60}, + {bg, {255,255,255}}, + {vscroll, right}, + {hscroll, true}, + {click, true}]), + gs:button(compmod,win,[{label,{text,"Compile\nModule"}}, + {justify,center}, + {x,Xpos*2+Width*1}, + {height,60}, + {y,Ypos-80}]), + gs:button(prof,win,[{label,{text,"Profile\nModule"}}, + {justify,center}, + {x,Xpos*2+Width*1+100}, + {height,60}, + {y,Ypos-80}]), + gs:button(clearprof,win,[{label, {text,"Clear\nProfile"}}, + {justify, center}, + {x, Xpos*2+Width*1+200}, + {height, 60}, + {y, Ypos-80}]), + gs:editor(edoc,win,[{x, Xpos*3+Width*2}, {y, Ypos}, + {width, Width}, {height, Height}, + {insert, {'end',"Edit this text!"}}, + {vscroll, right}, + {hscroll, true}, + {wrap, none}]), + ok. + +-spec create_menu() -> 'ok'. + +create_menu() -> + gs:menubar(menubar, win, [{bg, ?DEFAULT_BG_COLOR}]), + create_sub_menus([{mbutt, fmenu, " File", + [{" Close Ctrl-C ",close_menu}]}, + {mbuttc,cmenu, " Compile ", + [{" Compile Module", comp_mod}]}, + {mbuttp,pmenu, " Profile ", + [{" Profile Module", prof_mod}]}, + {mbutte,emenu, " Edoc", [separator]}, + {mbutta,amenu, " Analyze ", [separator]}, + {mbuttb,bmenu, " Benchmark ", [separator]}, + {mbuttj,jmenu, " Jit ", [separator]}]), + ok. + +create_menuitems(Parent, [{Text,Data}|Rest]) -> + gs:menuitem(Parent, [{bg, ?DEFAULT_BG_COLOR}, + {fg, {178, 34, 34}}, + {label, {text, Text}}, + {data, Data}, + {underline, 1} + ]), + create_menuitems(Parent, Rest); +create_menuitems(Parent, [separator|Rest]) -> + gs:menuitem(Parent, [{itemtype, separator}]), + create_menuitems(Parent, Rest); +create_menuitems(_, []) -> ok. + +create_sub_menus([{Parent, Name, Text, Items}|Rest]) -> + BG = {bg, ?DEFAULT_BG_COLOR}, + FG = {fg, {178, 34, 34}}, % firebrick + Label = {label, {text, Text}}, + gs:menubutton(Parent, menubar, [BG, FG, Label, {underline, 1}]), + gs:menu(Name, Parent, [BG, FG]), + create_menuitems(Name, Items), + create_sub_menus(Rest); +create_sub_menus([]) -> ok. + +create_labels([{Name,Y,Text}|Rest], Xpos) -> + gs:label(Name, win, [{width, (?WINDOW_WIDTH - 16) div 3}, + {height, 20}, + {x, Xpos + 1 + 3}, + {y, Y}, + {bg, ?DEFAULT_BG_COLOR}, + {fg, ?NORMAL_FG_COLOR}, + {font, ?HEADER_FONT}, + {align, w}, + {label, {text, Text}} + ]), + create_labels(Rest,Xpos); +create_labels([],_) -> ok. + +-spec update_code_listbox(#state{}) -> #state{}. + +update_code_listbox(State) -> + Mods = lists:sort(mods()), + case State#state.win_created of + false -> + State; + true -> + case Mods =:= State#state.mods of + true -> State; + false -> + update_text(mods, + "Loaded Modules ("++ + integer_to_list(length(Mods))++")"), + catch gs:config(code_listbox, [{data, Mods}, + {items, Mods}, + {selection, 0} + ]), + update_module_box(State#state{mods = Mods}, 0, Mods, "") + end + end. + +-spec update_fun(#state{}, integer(), [mfa()]) -> #state{}. + +update_fun(State, Idx, Data) -> + case State#state.win_created of + false -> + State; + true -> + MFA = {M,F,A} = get_selection(Idx, Data, {?MODULE,start,0}), + update_text(function, "Function: "++mfa_to_string(MFA)), + case in_native(F, A, native_code(M)) of + true -> update_text(nativefun, "Native"); + false -> update_text(nativefun, "Emulated") + end, + State + end. + +get_selection(Idx, Data, Default) -> + try lists:nth(Idx+1, Data) catch _:_ -> Default end. + +-spec update_module_box(#state{}, integer(), [atom()], string()) -> #state{}. + +update_module_box(State, Idx, Data, _Txt) -> + case State#state.win_created of + false -> + State; + true -> + Mod = get_selection(Idx, Data, hipe_tool), + %% io:format("~w\n", [Mod:module_info()]), + Info = Mod:module_info(), + Funs = lists:usort(funs(Mod)), + MFAs = mfas(Mod, Funs), + ModText = atom_to_list(Mod), + update_text(mod, "Module:"++ModText), + update_text(compmod, "Compile\nModule\n"++ModText), + Options = get_compile(Info), + update_text(ver, get_version(Options)), + update_text(time, get_time(Options)), + NativeCode = native_code(Mod), + + Prof = is_profiled(Mod), + if Prof -> update_text(prof, "Turn off\nProfiling"); + true -> update_text(prof, "Profile\n"++ModText) + end, + + Mode = get_mode(Funs, NativeCode), + + update_text(native, Mode), + Items = fun_names(Mod, Funs, NativeCode, Prof), + + Selection = {selection, 0}, + catch gs:config(module_listbox, [{data, MFAs}, + {items, Items}, + Selection]), + ProfData = [mfa_to_string(element(1, X)) ++ " " ++ + integer_to_list(element(2,X)) + || X <- hipe_profile:res(), element(2, X) > 0], + catch gs:config(profile_listbox, [{data, ProfData}, + {items, ProfData}, + Selection]), + get_edoc(Mod), + update_fun(State#state{mindex = Idx, mod = Mod, funs = Funs}, 0, MFAs) + end. + +update_text(Lab, Text) -> + catch gs:config(Lab, [{label, {text, Text}}]). + +%%--------------------------------------------------------------------- +%% @doc Returns a list of all loaded modules. +%%--------------------------------------------------------------------- + +-spec mods() -> [module()]. + +mods() -> + [Mod || {Mod,_File} <- code:all_loaded()]. + +-spec funs(module()) -> [fa()]. + +funs(Mod) -> + Mod:module_info(functions). + +-spec native_code(module()) -> [fa_address()]. + +native_code(Mod) -> + Mod:module_info(native_addresses). + +-spec mfas(module(), [fa()]) -> [mfa()]. + +mfas(Mod, Funs) -> + [{Mod,F,A} || {F,A} <- Funs]. + +-spec fun_names(module(), [fa()], [fa_address()], boolean()) -> string(). + +fun_names(M, Funs, NativeCode, Prof) -> + [list_to_atom(atom_to_list(F) ++ "/" ++ integer_to_list(A) ++ + (case in_native(F, A, NativeCode) of + true -> " [native] "; + false -> "" + end) + ++ + if Prof -> + (catch integer_to_list(hipe_bifs:call_count_get({M,F,A}))); + true -> "" + end) || + {F,A} <- Funs]. + +-spec in_native(atom(), arity(), [fa_address()]) -> boolean(). + +in_native(F, A, NativeCode) -> + lists:any(fun({Fun,Arity,_}) -> + (Fun =:= F andalso Arity =:= A) + end, + NativeCode). + +-spec mfa_to_string(mfa()) -> [char(),...]. + +mfa_to_string({M,F,A}) -> + atom_to_list(M) ++ ":" ++ atom_to_list(F) ++ "/" ++ integer_to_list(A). + +get_mode(Funs, NativeCode) -> + case NativeCode of + [] -> "Emulated"; + InNative when is_list(InNative) -> + if length(InNative) =:= length(Funs) -> + "Native"; + true -> "Mixed" + end + end. + +get_time(Comp) -> + case lists:keyfind(time, 1, Comp) of + {_, {Y,Month,D,H,Min,S}} -> + integer_to_list(Y) ++ + "-" ++ integer_to_list(Month) ++ + "-" ++ integer_to_list(D) ++ " " ++ + integer_to_list(H) ++ ":" ++ integer_to_list(Min) ++ + ":" ++ integer_to_list(S); + false -> "" + end. + +get_version(Comp) -> + case lists:keyfind(version, 1, Comp) of + {_, V} when is_list(V) -> V; + false -> "" + end. + +get_cwd(Options) -> + case lists:keyfind(cwd, 1, Options) of + {_, V} when is_atom(V) -> atom_to_list(V); + {_, V} -> V; + false -> "" + end. + +get_options(Comp) -> + case lists:keyfind(options, 1, Comp) of + {_, V} when is_list(V) -> V; + false -> "" + end. + +get_compile(Info) -> + case lists:keyfind(compile, 1, Info) of + {_, O} when is_list(O) -> O; + false -> [] + end. + +-spec is_profiled(module()) -> boolean(). + +is_profiled(Mod) -> + case hipe_bifs:call_count_get({Mod,module_info,0}) of + false -> false; + C when is_integer(C) -> true + end. + +-spec compile(#state{}) -> #state{}. + +compile(State) -> + catch gs:config(compmod, [{enable, false}]), + update_text(compiling, "Compiling..."), + Parent = self(), + P = spawn(fun() -> c(Parent, State#state.mod, State#state.options) end), + State#state{compiling = P}. + +-spec c(pid(), module(), comp_options()) -> 'ok'. + +c(Parent, Mod, Options) -> + Res = hipe:c(Mod, Options), + Parent ! {compilation_done,Res,self()}, + ok. + +get_edoc(Mod) -> + Info = Mod:module_info(), + Comp = get_compile(Info), + Options = get_options(Comp), + Dir = get_cwd(Options), + File = + case Dir of + "" -> atom_to_list(Mod) ++ ".erl"; + _ -> Dir ++"/" ++ atom_to_list(Mod) ++ ".erl" + end, + %% io:format("Get ~s\n", [File]), + Text = try edoc(File, [{xml_export,xmerl_text}, no_output]) + catch _:_ -> "error" + end, + gs:config(edoc, {enable, true}), + gs:config(edoc, clear), + gs:config(edoc, {insert, {insert, Text}}), + gs:config(edoc, {enable, false}), + ok. + +edoc(Name, Opts) -> + Doc = edoc:get_doc([Name, Opts]), + %% Comments = edoc:read_comments(Name, Opts), + %% Text = edoc:forms(Forms, Comments, Name, Opts), + edoc:layout([Doc, Opts]), + ok. diff --git a/lib/hipe/util/Makefile b/lib/hipe/util/Makefile new file mode 100644 index 0000000000..27cacedf11 --- /dev/null +++ b/lib/hipe/util/Makefile @@ -0,0 +1,109 @@ +# +# %CopyrightBegin% +# +# Copyright Ericsson AB 2001-2009. All Rights Reserved. +# +# The contents of this file are subject to the Erlang Public License, +# Version 1.1, (the "License"); you may not use this file except in +# compliance with the License. You should have received a copy of the +# Erlang Public License along with this software. If not, it can be +# retrieved online at http://www.erlang.org/. +# +# Software distributed under the License is distributed on an "AS IS" +# basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +# the License for the specific language governing rights and limitations +# under the License. +# +# %CopyrightEnd% +# + +ifndef EBIN +EBIN = ../ebin +endif + +ifndef DOCS +DOCS = ../doc +endif + +include $(ERL_TOP)/make/target.mk +include $(ERL_TOP)/make/$(TARGET)/otp.mk + +# ---------------------------------------------------- +# Application version +# ---------------------------------------------------- +include ../vsn.mk +VSN=$(HIPE_VSN) + +# ---------------------------------------------------- +# Release directory specification +# ---------------------------------------------------- +RELSYSDIR = $(RELEASE_PATH)/lib/hipe-$(VSN) + +# ---------------------------------------------------- +# Target Specs +# ---------------------------------------------------- +ifdef HIPE_ENABLED +HIPE_MODULES = hipe_vectors +else +HIPE_MODULES = +endif +MODULES = hipe_timing hipe_dot hipe_digraph $(HIPE_MODULES) + +HRL_FILES= +ERL_FILES= $(MODULES:%=%.erl) +TARGET_FILES= $(MODULES:%=$(EBIN)/%.$(EMULATOR)) +DOC_FILES= $(MODULES:%=$(DOCS)/%.html) + +# APP_FILE= +# APP_SRC= $(APP_FILE).src +# APP_TARGET= $(EBIN)/$(APP_FILE) +# +# APPUP_FILE= +# APPUP_SRC= $(APPUP_FILE).src +# APPUP_TARGET= $(EBIN)/$(APPUP_FILE) + +# ---------------------------------------------------- +# FLAGS +# ---------------------------------------------------- + +include ../native.mk + +ERL_COMPILE_FLAGS += +warn_exported_vars +warn_missing_spec +warn_untyped_record + +# ---------------------------------------------------- +# Targets +# ---------------------------------------------------- + +debug opt: $(TARGET_FILES) + +docs: $(DOC_FILES) + +clean: + rm -f $(TARGET_FILES) + rm -f core + +$(DOCS)/%.html:%.erl + erl -noshell -run edoc_run file '"$<"' '[{dir, "$(DOCS)"}]' -s init stop + +# ---------------------------------------------------- +# Special Build Targets +# ---------------------------------------------------- + + + +# ---------------------------------------------------- +# Release Target +# ---------------------------------------------------- +include $(ERL_TOP)/make/otp_release_targets.mk + +release_spec: opt + $(INSTALL_DIR) $(RELSYSDIR)/util + $(INSTALL_DATA) $(ERL_FILES) $(HRL_FILES) $(RELSYSDIR)/util + $(INSTALL_DIR) $(RELSYSDIR)/ebin + $(INSTALL_DATA) $(TARGET_FILES) $(RELSYSDIR)/ebin + +release_docs_spec: + + +$(EBIN)/hipe_timing.beam: ../main/hipe.hrl +$(EBIN)/hipe_vectors.beam: hipe_vectors.hrl diff --git a/lib/hipe/util/hipe_digraph.erl b/lib/hipe/util/hipe_digraph.erl new file mode 100644 index 0000000000..a62e913fe5 --- /dev/null +++ b/lib/hipe/util/hipe_digraph.erl @@ -0,0 +1,238 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2005-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%%----------------------------------------------------------------------- +%% File : hipe_digraph.erl +%% Author : Tobias Lindahl +%% Purpose : Provides a simple implementation of a directed graph. +%% +%% Created : 9 Feb 2005 by Tobias Lindahl +%%----------------------------------------------------------------------- +-module(hipe_digraph). + +-export([new/0, add_edge/3, add_node/2, add_node_list/2, + from_list/1, to_list/1, get_parents/2, get_children/2]). +-export([reverse_preorder_sccs/1]). + +%%------------------------------------------------------------------------ + +-type ordset(T) :: [T]. % XXX: temporarily + +-record(hipe_digraph, {edges = dict:new() :: dict(), + rev_edges = dict:new() :: dict(), + leaves = ordsets:new() :: ordset(_), % ??? + nodes = sets:new() :: set()}). + +-opaque hdg() :: #hipe_digraph{}. + +%%------------------------------------------------------------------------ + +-spec new() -> hdg(). + +new() -> + #hipe_digraph{edges = dict:new(), rev_edges = dict:new(), + leaves = ordsets:new(), nodes = sets:new()}. + +-spec from_list([_]) -> hdg(). + +from_list(List) -> + Edges = lists:foldl(fun({From, To}, Dict) -> + Fun = fun(Set) -> ordsets:add_element(To, Set) end, + dict:update(From, Fun, [To], Dict) + end, + dict:new(), List), + RevEdges = lists:foldl(fun({From, To}, Dict) -> + Fun = fun(Set) -> + ordsets:add_element(From, Set) + end, + dict:update(To, Fun, [From], Dict) + end, + dict:new(), List), + Keys1 = sets:from_list(dict:fetch_keys(Edges)), + Keys2 = sets:from_list(dict:fetch_keys(RevEdges)), + Nodes = sets:union(Keys1, Keys2), + #hipe_digraph{edges = Edges, rev_edges = RevEdges, + leaves = [], nodes = Nodes}. + +-spec to_list(hdg()) -> [_]. + +to_list(#hipe_digraph{edges = Edges}) -> + List1 = dict:to_list(Edges), + List2 = lists:foldl(fun({From, ToList}, Acc) -> + [[{From, To} || To <- ToList]|Acc] + end, [], List1), + lists:flatten(List2). + +-spec add_node(_, hdg()) -> hdg(). + +add_node(NewNode, DG = #hipe_digraph{nodes=Nodes}) -> + DG#hipe_digraph{nodes = sets:add_element(NewNode, Nodes)}. + +-spec add_node_list([_], hdg()) -> hdg(). + +add_node_list(NewNodes, DG = #hipe_digraph{nodes=Nodes}) -> + Set = sets:from_list(NewNodes), + DG#hipe_digraph{nodes = sets:union(Set, Nodes)}. + +-spec add_edge(_, _, hdg()) -> hdg(). + +add_edge(From, To, #hipe_digraph{edges = Edges, rev_edges = RevEdges, + leaves = Leaves, nodes = Nodes}) -> + Fun1 = fun(Set) -> ordsets:add_element(To, Set) end, + NewEdges = dict:update(From, Fun1, [To], Edges), + Fun2 = fun(Set) -> ordsets:add_element(From, Set) end, + NewRevEdges = dict:update(To, Fun2, [From], RevEdges), + NewLeaves = ordsets:del_element(From, Leaves), + #hipe_digraph{edges = NewEdges, + rev_edges = NewRevEdges, + leaves = NewLeaves, + nodes = sets:add_element(From, sets:add_element(To, Nodes))}. + +%%------------------------------------------------------------------------- + +-spec take_indep_scc(hdg()) -> 'none' | {'ok', [_], hdg()}. + +take_indep_scc(DG = #hipe_digraph{edges = Edges, rev_edges = RevEdges, + leaves = Leaves, nodes = Nodes}) -> + case sets:size(Nodes) =:= 0 of + true -> none; + false -> + {SCC, NewLeaves} = + case Leaves of + [H|T] -> + {[H], T}; + [] -> + case find_all_leaves(Edges) of + [] -> + {[Node|_], _} = dfs(Nodes, RevEdges), + {SCC1, _} = dfs(Node, Edges), + {SCC1, []}; + [H|T] -> + {[H], T} + end + end, + NewEdges = remove_edges(SCC, Edges, RevEdges), + NewRevEdges = remove_edges(SCC, RevEdges, Edges), + NewNodes = sets:subtract(Nodes, sets:from_list(SCC)), + {ok, reverse_preorder(SCC, Edges), + DG#hipe_digraph{edges = NewEdges, rev_edges = NewRevEdges, + leaves = NewLeaves, nodes = NewNodes}} + end. + +find_all_leaves(Edges) -> + List = dict:fold(fun(Key, [Key], Acc) -> [Key|Acc]; + (_, _, Acc) -> Acc + end, [], Edges), + ordsets:from_list(List). + +remove_edges(Nodes0, Edges, RevEdges) -> + Nodes = ordsets:from_list(Nodes0), + Fun = fun(N, Dict) -> dict:erase(N, Dict) end, + Edges1 = lists:foldl(Fun, Edges, Nodes), + remove_edges_in(Nodes, Edges1, RevEdges). + +remove_edges_in([Node|Nodes], Edges, RevEdges) -> + NewEdges = + case dict:find(Node, RevEdges) of + error -> + Edges; + {ok, Set} -> + Fun = fun(Key, Dict) -> + case dict:find(Key, Dict) of + error -> + Dict; + {ok, OldTo} -> + case ordsets:del_element(Node, OldTo) of + [] -> dict:store(Key, [Key], Dict); + NewSet -> dict:store(Key, NewSet, Dict) + end + end + end, + lists:foldl(Fun, Edges, Set) + end, + remove_edges_in(Nodes, NewEdges, RevEdges); +remove_edges_in([], Edges, _RevEdges) -> + Edges. + +reverse_preorder([_] = Nodes, _Edges) -> + Nodes; +reverse_preorder([N|_] = Nodes, Edges) -> + NodeSet = sets:from_list(Nodes), + {PreOrder, _} = dfs(N, Edges), + DFS = [X || X <- PreOrder, sets:is_element(X, NodeSet)], + lists:reverse(DFS). + +%%--------------------------------------------------------------------- + +-spec reverse_preorder_sccs(hdg()) -> [[_]]. + +reverse_preorder_sccs(DG) -> + reverse_preorder_sccs(DG, []). + +reverse_preorder_sccs(DG, Acc) -> + case take_indep_scc(DG) of + none -> lists:reverse(Acc); + {ok, SCC, DG1} -> reverse_preorder_sccs(DG1, [SCC|Acc]) + end. + +%%--------------------------------------------------------------------- + +-spec get_parents(_, hdg()) -> [_]. + +get_parents(Node, #hipe_digraph{rev_edges = RevEdges}) -> + case dict:is_key(Node, RevEdges) of + true -> dict:fetch(Node, RevEdges); + false -> [] + end. + +-spec get_children(_, hdg()) -> [_]. + +get_children(Node, #hipe_digraph{edges = Edges}) -> + case dict:is_key(Node, Edges) of + true -> dict:fetch(Node, Edges); + false -> [] + end. + +%%--------------------------------------------------------------------- +%% dfs/2 returns a preordered depth first search and the nodes visited. + +dfs(Node, Edges) -> + case sets:is_set(Node) of + true -> + dfs(sets:to_list(Node), Edges, sets:new(), []); + false -> + dfs([Node], Edges, sets:new(), []) + end. + +dfs([Node|Left], Edges, Visited, Order) -> + case sets:is_element(Node, Visited) of + true -> + dfs(Left, Edges, Visited, Order); + false -> + NewVisited = sets:add_element(Node, Visited), + case dict:find(Node, Edges) of + error -> + dfs(Left, Edges, NewVisited, [Node|Order]); + {ok, Succ} -> + {NewOrder, NewVisited1} = dfs(Succ, Edges, NewVisited, Order), + dfs(Left, Edges, NewVisited1, [Node|NewOrder]) + end + end; +dfs([], _Edges, Visited, Order) -> + {Order, Visited}. diff --git a/lib/hipe/util/hipe_dot.erl b/lib/hipe/util/hipe_dot.erl new file mode 100755 index 0000000000..d6ef801c88 --- /dev/null +++ b/lib/hipe/util/hipe_dot.erl @@ -0,0 +1,217 @@ +%%% -*- erlang-indent-level: 2 -*- +%%% +%%% %CopyrightBegin% +%%% +%%% Copyright Ericsson AB 2004-2009. All Rights Reserved. +%%% +%%% The contents of this file are subject to the Erlang Public License, +%%% Version 1.1, (the "License"); you may not use this file except in +%%% compliance with the License. You should have received a copy of the +%%% Erlang Public License along with this software. If not, it can be +%%% retrieved online at http://www.erlang.org/. +%%% +%%% Software distributed under the License is distributed on an "AS IS" +%%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%%% the License for the specific language governing rights and limitations +%%% under the License. +%%% +%%% %CopyrightEnd% +%%% +%%%------------------------------------------------------------------- +%%% File : hipe_dot.erl +%%% Author : Per Gustafsson +%%% Description : +%%% +%%% Created : 25 Nov 2004 by Per Gustafsson +%%%------------------------------------------------------------------- + +-module(hipe_dot). + +-export([translate_digraph/3, translate_digraph/5, + translate_list/3, translate_list/4, translate_list/5]). + +%%-------------------------------------------------------------------- + +-type gnode() :: any(). +-type edge() :: {gnode(), gnode()}. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% This module creates .dot representations of graphs from their +%% Erlang representations. There are two different forms of Erlang +%% representations that the module accepts, digraphs and lists of two +%% tuples (where each tuple represents a directed edge). +%% +%% The functions also require a FileName and a name of the graph. The +%% filename is the name of the resulting .dot file the GraphName is +%% pretty much useless. +%% +%% The resulting .dot reprsentation will be stored in the flie FileName. +%% +%% Interfaces: +%% +%% translate_list(Graph::[{Node,Node}], FileName::string(), +%% GraphName::string()) -> ok +%% +%% translate_list(Graph::[{Node,Node}], FileName::string(), +%% GraphName::string(), Options::[option] ) -> ok +%% +%% translate_list(Graph::[{Node,Node}], FileName::string(), +%% GraphName::string(), Fun::fun(term() -> string()), +%% Options::[option]) -> ok +%% +%% The optional Fun argument dictates how the node/names should be output. +%% +%% The option list can be used to pass options to .dot to decide how +%% different nodes and edges should be displayed. +%% +%% translate_digraph has the same interface as translate_list except +%% it takes a digraph rather than a list. +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +-spec translate_digraph(digraph(), string(), string()) -> 'ok'. + +translate_digraph(G, FileName, GName) -> + translate_digraph(G, FileName, GName, + fun(X) -> io_lib:format("~p", [X]) end, []). + +-spec translate_digraph(digraph(), string(), string(), + fun((_) -> string()), [_]) -> 'ok'. + +translate_digraph(G, FileName, GName, Fun, Opts) -> + Edges = [digraph:edge(G, X) || X <- digraph:edges(G)], + EdgeList = [{X, Y} || {_, X, Y, _} <- Edges], + translate_list(EdgeList, FileName, GName, Fun, Opts). + +%%-------------------------------------------------------------------- + +-spec translate_list([edge()], string(), string()) -> 'ok'. + +translate_list(List, FileName, GName) -> + translate_list(List, FileName, GName, + fun(X) -> lists:flatten(io_lib:format("~p", [X])) end, []). + +-spec translate_list([edge()], string(), string(), [_]) -> 'ok'. + +translate_list(List, FileName, GName, Opts) -> + translate_list(List, FileName, GName, + fun(X) -> lists:flatten(io_lib:format("~p", [X])) end, Opts). + +-spec translate_list([edge()], string(), string(), + fun((_) -> string()), [_]) -> 'ok'. + +translate_list(List, FileName, GName, Fun, Opts) -> + {NodeList1, NodeList2} = lists:unzip(List), + NodeList = NodeList1 ++ NodeList2, + NodeSet = ordsets:from_list(NodeList), + Start = ["digraph ",GName ," {"], + VertexList = [node_format(Opts, Fun, V) ||V <- NodeSet], + End = ["graph [", GName, "=", GName, "]}"], + EdgeList = [edge_format(Opts, Fun, X, Y) || {X,Y} <- List], + String = [Start, VertexList, EdgeList, End], + %% io:format("~p~n", [lists:flatten([String])]), + ok = file:write_file(FileName, list_to_binary(String)). + +%%-------------------------------------------------------------------- + +node_format(Opt, Fun, V) -> + OptText = nodeoptions(Opt, Fun ,V), + Tmp = io_lib:format("~p", [Fun(V)]), + String = lists:flatten(Tmp), + %% io:format("~p", [String]), + {Width, Heigth} = calc_dim(String), + W = ((Width div 7) + 1) * 0.55, + H = Heigth * 0.4, + SL = io_lib:format("~f", [W]), + SH = io_lib:format("~f", [H]), + [String, " [width=", SL, " heigth=", SH, " ", OptText,"];\n"]. + +edge_format(Opt, Fun, V1, V2) -> + OptText = + case lists:flatten(edgeoptions(Opt, Fun ,V1, V2)) of + [] -> + []; + [_|X] -> + X + end, + String = [io_lib:format("~p", [Fun(V1)]), " -> ", + io_lib:format("~p", [Fun(V2)])], + [String, " [", OptText, "];\n"]. + +calc_dim(String) -> + calc_dim(String, 1, 0, 0). + +calc_dim("\\n" ++ T, H, TmpW, MaxW) -> + calc_dim(T, H+1, 0, erlang:max(TmpW, MaxW)); +calc_dim([_|T], H, TmpW, MaxW) -> + calc_dim(T, H, TmpW+1, MaxW); +calc_dim([], H, TmpW, MaxW) -> + {erlang:max(TmpW, MaxW), H}. + +edgeoptions([{all_edges, {OptName, OptVal}}|T], Fun, V1, V2) -> + case legal_edgeoption(OptName) of + true -> + [io_lib:format(",~p=~p ", [OptName, OptVal])|edgeoptions(T, Fun, V1, V2)] + %% false -> + %% edgeoptions(T, Fun, V1, V2) + end; +edgeoptions([{N1, N2, {OptName, OptVal}}|T], Fun, V1, V2) -> + case %% legal_edgeoption(OptName) andalso + Fun(N1) =:= Fun(V1) andalso Fun(N2) =:= Fun(V2) of + true -> + [io_lib:format(",~p=~p ", [OptName, OptVal])|edgeoptions(T, Fun, V1, V2)]; + false -> + edgeoptions(T, Fun, V1, V2) + end; +edgeoptions([_|T], Fun, V1, V2) -> + edgeoptions(T, Fun, V1, V2); +edgeoptions([], _, _, _) -> + []. + +nodeoptions([{all_nodes, {OptName, OptVal}}|T], Fun, V) -> + case legal_nodeoption(OptName) of + true -> + [io_lib:format(",~p=~p ", [OptName, OptVal])|nodeoptions(T, Fun, V)]; + false -> + nodeoptions(T, Fun, V) + end; +nodeoptions([{Node, {OptName, OptVal}}|T], Fun, V) -> + case Fun(Node) =:= Fun(V) andalso legal_nodeoption(OptName) of + true -> + [io_lib:format("~p=~p ", [OptName, OptVal])|nodeoptions(T, Fun, V)]; + false -> + nodeoptions(T, Fun, V) + end; +nodeoptions([_|T], Fun, V) -> + nodeoptions(T, Fun, V); +nodeoptions([], _Fun, _V) -> + []. + +legal_nodeoption(bottomlabel) -> true; +legal_nodeoption(color) -> true; +legal_nodeoption(comment) -> true; +legal_nodeoption(distortion) -> true; +legal_nodeoption(fillcolor) -> true; +legal_nodeoption(fixedsize) -> true; +legal_nodeoption(fontcolor) -> true; +legal_nodeoption(fontname) -> true; +legal_nodeoption(fontsize) -> true; +legal_nodeoption(group) -> true; +legal_nodeoption(height) -> true; +legal_nodeoption(label) -> true; +legal_nodeoption(layer) -> true; +legal_nodeoption(orientation) -> true; +legal_nodeoption(peripheries) -> true; +legal_nodeoption(regular) -> true; +legal_nodeoption(shape) -> true; +legal_nodeoption(shapefile) -> true; +legal_nodeoption(sides) -> true; +legal_nodeoption(skew) -> true; +legal_nodeoption(style) -> true; +legal_nodeoption(toplabel) -> true; +legal_nodeoption('URL') -> true; +legal_nodeoption(z) -> true; +legal_nodeoption(Option) when is_atom(Option) -> false. + +legal_edgeoption(Option) when is_atom(Option) -> true. diff --git a/lib/hipe/util/hipe_timing.erl b/lib/hipe/util/hipe_timing.erl new file mode 100644 index 0000000000..191db497e2 --- /dev/null +++ b/lib/hipe/util/hipe_timing.erl @@ -0,0 +1,131 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2001-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%%==================================================================== +%% Note: Uses the process keys: +%% hipe_time - Indicates what to time. +%% hipe_timers - A stack of timers. +%% {hipe_timer,T} - Delata times for named timers. +%% T - Acc times for all named timers T. +%%==================================================================== + +-module(hipe_timing). +-export([start/2, stop/2, + %% start_timer/0, stop_timer/1, + %% get_hipe_timer_val/1, set_hipe_timer_val/2, + %% start_hipe_timer/1, stop_hipe_timer/1, + start_optional_timer/2, stop_optional_timer/2]). + +-include("../main/hipe.hrl"). + +%%===================================================================== + +-spec start(string(), atom()) -> 'ok'. + +start(Text, Mod) when is_atom(Mod) -> + Timers = + case get(hipe_timers) of + undefined -> []; + Ts -> Ts + end, + Space = [$| || _ <- Timers], + Total = start_timer(), + put(hipe_timers, [Total|Timers]), + ?msg("[@~7w]" ++ Space ++ "> ~s~n", [Total,Text]). + +-spec stop(string(), atom()) -> 'ok'. + +stop(Text, Mod) when is_atom(Mod) -> + {Total,_Last} = erlang:statistics(runtime), + case get(hipe_timers) of + [StartTime|Timers] -> + Space = [$| || _ <- Timers], + put(hipe_timers,Timers), + ?msg("[@~7w]" ++ Space ++ "< ~s: ~w~n", [Total, Text, Total-StartTime]); + _ -> + put(hipe_timers, []), + ?msg("[@~7w]< ~s: ~w~n", [Total, Text, Total]) + end. + +-spec start_optional_timer(string(), atom()) -> 'ok'. + +start_optional_timer(Text, Mod) -> + case get(hipe_time) of + true -> start(Text, Mod); + all -> start(Text, Mod); + Mod -> start(Text, Mod); + List when is_list(List) -> + case lists:member(Mod, List) of + true -> start(Text, Mod); + false -> ok + end; + _ -> ok + end. + +-spec stop_optional_timer(string(), atom()) -> 'ok'. + +stop_optional_timer(Text, Mod) -> + case get(hipe_time) of + true -> stop(Text, Mod); + all -> stop(Text, Mod); + Mod -> stop(Text, Mod); + List when is_list(List) -> + case lists:member(Mod, List) of + true -> stop(Text, Mod); + false -> ok + end; + _ -> ok + end. + +-spec start_timer() -> non_neg_integer(). + +start_timer() -> + {Total, _Last} = erlang:statistics(runtime), + Total. + +%% stop_timer(T) -> +%% {Total, _Last} = erlang:statistics(runtime), +%% Total - T. +%% +%% start_hipe_timer(Timer) -> +%% Time = erlang:statistics(runtime), +%% put({hipe_timer,Timer}, Time). +%% +%% stop_hipe_timer(Timer) -> +%% {T2, _} = erlang:statistics(runtime), +%% T1 = +%% case get({hipe_timer,Timer}) of +%% {T0, _} -> T0; +%% _ -> 0 +%% end, +%% AccT = +%% case get(Timer) of +%% T when is_integer(T) -> T; +%% _ -> 0 +%% end, +%% put(Timer,AccT+T2-T1). +%% +%% get_hipe_timer_val(Timer) -> +%% case get(Timer) of +%% T when is_integer(T) -> T; +%% _ -> 0 +%% end. +%% +%% set_hipe_timer_val(Timer, Val) -> +%% put(Timer, Val). diff --git a/lib/hipe/util/hipe_vectors.erl b/lib/hipe/util/hipe_vectors.erl new file mode 100644 index 0000000000..d153f3a50d --- /dev/null +++ b/lib/hipe/util/hipe_vectors.erl @@ -0,0 +1,111 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2001-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% VECTORS IN ERLANG +%% +%% Abstract interface to vectors, indexed from 0 to size-1. + +-module(hipe_vectors). +-export([new/2, + set/3, + get/2, + size/1, + vector_to_list/1, + %% list_to_vector/1, + list/1]). + +-include("hipe_vectors.hrl"). + +%% --------------------------------------------------------------------- + +-ifdef(USE_TUPLES). + +new(N, V) -> + erlang:make_tuple(N, V). + +size(V) -> erlang:tuple_size(V). + +list(Vec) -> + index(tuple_to_list(Vec), 0). + +index([X|Xs],N) -> + [{N,X} | index(Xs,N+1)]; +index([],_) -> + []. + +%% list_to_vector(Xs) -> +%% list_to_tuple(Xs). + +vector_to_list(V) -> + tuple_to_list(V). + +set(Vec, Ix, V) -> + setelement(Ix+1, Vec, V). + +get(Vec, Ix) -> element(Ix+1, Vec). + +-endif. %% ifdef USE_TUPLES + +%% --------------------------------------------------------------------- + +-ifdef(USE_GBTREES). + +-spec new(non_neg_integer(), _) -> hipe_vector(). +new(N, V) when is_integer(N), N >= 0 -> + gb_trees:from_orddict(mklist(N, V)). + +mklist(N, V) -> + mklist(0, N, V). + +mklist(M, N, V) when M < N -> + [{M, V} | mklist(M+1, N, V)]; +mklist(_, _, _) -> + []. + +-spec size(hipe_vector()) -> non_neg_integer(). +size(V) -> gb_trees:size(V). + +-spec list(hipe_vector()) -> [{_, _}]. +list(Vec) -> + gb_trees:to_list(Vec). + +%% -spec list_to_vector([_]) -> hipe_vector(). +%% list_to_vector(Xs) -> +%% gb_trees:from_orddict(index(Xs, 0)). +%% +%% index([X|Xs], N) -> +%% [{N, X} | index(Xs, N+1)]; +%% index([],_) -> +%% []. + +-spec vector_to_list(hipe_vector()) -> [_]. +vector_to_list(V) -> + gb_trees:values(V). + +-spec set(hipe_vector(), non_neg_integer(), _) -> hipe_vector(). +set(Vec, Ix, V) -> + gb_trees:update(Ix, V, Vec). + +-spec get(hipe_vector(), non_neg_integer()) -> any(). +get(Vec, Ix) -> + gb_trees:get(Ix, Vec). + +-endif. %% ifdef USE_GBTREES diff --git a/lib/hipe/util/hipe_vectors.hrl b/lib/hipe/util/hipe_vectors.hrl new file mode 100644 index 0000000000..043faf4c91 --- /dev/null +++ b/lib/hipe/util/hipe_vectors.hrl @@ -0,0 +1,28 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2008-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%%-define(USE_TUPLES, true). +-define(USE_GBTREES, true). + +-ifdef(USE_TUPLES). +-type hipe_vector() :: tuple(). +-endif. + +-ifdef(USE_GBTREES). +-type hipe_vector() :: gb_tree(). +-endif. diff --git a/lib/hipe/vsn.mk b/lib/hipe/vsn.mk new file mode 100644 index 0000000000..c75ac5efe7 --- /dev/null +++ b/lib/hipe/vsn.mk @@ -0,0 +1 @@ +HIPE_VSN = 3.7.4 diff --git a/lib/hipe/x86/Makefile b/lib/hipe/x86/Makefile new file mode 100644 index 0000000000..065b56fce3 --- /dev/null +++ b/lib/hipe/x86/Makefile @@ -0,0 +1,134 @@ +# +# %CopyrightBegin% +# +# Copyright Ericsson AB 2001-2009. All Rights Reserved. +# +# The contents of this file are subject to the Erlang Public License, +# Version 1.1, (the "License"); you may not use this file except in +# compliance with the License. You should have received a copy of the +# Erlang Public License along with this software. If not, it can be +# retrieved online at http://www.erlang.org/. +# +# Software distributed under the License is distributed on an "AS IS" +# basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +# the License for the specific language governing rights and limitations +# under the License. +# +# %CopyrightEnd% +# + +ifndef EBIN +EBIN = ../ebin +endif + +ifndef DOCS +DOCS = ../doc +endif + +include $(ERL_TOP)/make/target.mk +include $(ERL_TOP)/make/$(TARGET)/otp.mk + +# ---------------------------------------------------- +# Application version +# ---------------------------------------------------- +include ../vsn.mk +VSN=$(HIPE_VSN) + +# ---------------------------------------------------- +# Release directory specification +# ---------------------------------------------------- +RELSYSDIR = $(RELEASE_PATH)/lib/hipe-$(VSN) + +# ---------------------------------------------------- +# Target Specs +# ---------------------------------------------------- +# Please keep this list sorted. +MODULES=hipe_rtl_to_x86 \ + hipe_x86 \ + hipe_x86_assemble \ + hipe_x86_cfg \ + hipe_x86_defuse \ + hipe_x86_encode \ + hipe_x86_frame \ + hipe_x86_liveness \ + hipe_x86_main \ + hipe_x86_postpass \ + hipe_x86_pp \ + hipe_x86_ra \ + hipe_x86_ra_finalise \ + hipe_x86_ra_ls \ + hipe_x86_ra_naive \ + hipe_x86_ra_postconditions \ + hipe_x86_ra_x87_ls \ + hipe_x86_registers \ + hipe_x86_spill_restore \ + hipe_x86_x87 + +HRL_FILES=hipe_x86.hrl +ERL_FILES=$(MODULES:%=%.erl) +TARGET_FILES=$(MODULES:%=$(EBIN)/%.$(EMULATOR)) +DOC_FILES= $(MODULES:%=$(DOCS)/%.html) + +# APP_FILE= +# APP_SRC=$(APP_FILE).src +# APP_TARGET=$(EBIN)/$(APP_FILE) +# +# APPUP_FILE= +# APPUP_SRC=$(APPUP_FILE).src +# APPUP_TARGET=$(EBIN)/$(APPUP_FILE) + +# ---------------------------------------------------- +# FLAGS +# ---------------------------------------------------- + +include ../native.mk + +ERL_COMPILE_FLAGS += +warn_exported_vars + +# ---------------------------------------------------- +# Targets +# ---------------------------------------------------- + +debug opt: $(TARGET_FILES) + +docs: $(DOC_FILES) + +clean: + rm -f $(TARGET_FILES) + rm -f core + +$(DOCS)/%.html:%.erl + erl -noshell -run edoc_run file '"$<"' '[{dir, "$(DOCS)"}]' -s init stop + +# ---------------------------------------------------- +# Special Build Targets +# ---------------------------------------------------- + +# ---------------------------------------------------- +# Release Target +# ---------------------------------------------------- +include $(ERL_TOP)/make/otp_release_targets.mk + +release_spec: opt + $(INSTALL_DIR) $(RELSYSDIR)/ebin + $(INSTALL_DATA) $(TARGET_FILES) $(RELSYSDIR)/ebin + +release_docs_spec: + +# Please keep this list sorted. +$(EBIN)/hipe_rtl_to_x86.beam: ../rtl/hipe_rtl.hrl +$(EBIN)/hipe_x86_assemble.beam: ../main/hipe.hrl ../rtl/hipe_literals.hrl ../misc/hipe_sdi.hrl +$(EBIN)/hipe_x86_cfg.beam: ../flow/cfg.hrl ../flow/cfg.inc +$(EBIN)/hipe_x86_frame.beam: ../rtl/hipe_literals.hrl +$(EBIN)/hipe_x86_liveness.beam: ../flow/liveness.inc +$(EBIN)/hipe_x86_main.beam: ../main/hipe.hrl +$(EBIN)/hipe_x86_ra: ../main/hipe.hrl +$(EBIN)/hipe_x86_ra_dummy.beam: ../main/hipe.hrl +$(EBIN)/hipe_x86_ra_ls.beam: ../main/hipe.hrl +$(EBIN)/hipe_x86_ra_postconditions.beam: ../main/hipe.hrl +$(EBIN)/hipe_x86_ra_x87_ls.beam: ../main/hipe.hrl +$(EBIN)/hipe_x86_registers.beam: ../rtl/hipe_literals.hrl +$(EBIN)/hipe_x86_spill_restore.beam: ../main/hipe.hrl ../flow/cfg.hrl +$(EBIN)/hipe_x86_x87.beam: ../main/hipe.hrl + +$(TARGET_FILES): hipe_x86.hrl ../misc/hipe_consttab.hrl diff --git a/lib/hipe/x86/NOTES.OPTIM b/lib/hipe/x86/NOTES.OPTIM new file mode 100644 index 0000000000..4c241cacb4 --- /dev/null +++ b/lib/hipe/x86/NOTES.OPTIM @@ -0,0 +1,200 @@ +$Id$ + +Partial x86 code optimisation guide +=================================== +Priority should be given to P6 and P4, then K7, +then P5, and last to K6. + +Rules that are blatantly obvious or irrelevant for HiPE are +generally not listed. These includes things like alignment +of basic data types, store-forwarding rules when alignment +or sizes don't match, and partial register stalls. + +Intel P4 +-------- +The P6 4-1-1 insn decode template no longer applies. + +Simple insns (add/sub/cmp/test/and/or/xor/neg/not/mov/sahf) +are twice as fast as in P6. + +Shifts are "movsx" (sign-extend) are slower than in P6. + +Always avoid "inc" and "dec", use "add" and "sub" instead, +due to condition codes dependencies overhead. + +"fxch" is slightly more expensive than in P6, where it was free. + +Use "setcc" or "cmov" to eliminate unpredictable branches. + +For hot code executing out of the trace cache, alignment of +branch targets is less of an issue compared to P6. + +Do use "fxch" to simulate a flat FP register file, but only +for that purpose, not for manual scheduling for parallelism. + +Using "lea" is highly recommended. + +Eliminate redundant loads. Use regs as much as possible. + +Left shifts up to 3 have longer latencies than the equivalent +sequence of adds. + +Do utilise the addressing modes, to save registers and trace +cache bandwidth. + +"xor reg,reg" or "sub reg,reg" preferred over moving zero to reg. + +"test reg,reg" preferred over "cmp" with zero or "and". + +Avoid explicit cmp/test;jcc if the preceeding insn (alu, but not +mov or lea) set the condition codes. + +Load-execute alu insns (mem src) are Ok. + +Add-reg-to-mem slightly better than add-mem-to-reg. + +Add-reg-to-mem is better than load;add;store. + +Intel P6 +-------- +4-1-1 instruction decoding template: can decode one semi-complex +(max 4 uops) and two simple (1 uop) insns per clock; follow a +complex insn by two simple ones, otherwise the decoders will stall. + +Load-execute (mem src) alu insns are 2 uops. +Read-modify-write (mem dst) alu insns are 4 uops. + +Insns longer than 7 bytes block parallel decoding. +Avoid insns longer than 7 bytes. + +Lea is useful. + +"movzx" is preferred for zero-extension; the xor;mov alternative +causes a partial register stall. + +Use "test" instead of "cmp" with zero. + +Pull address calculations into load and store insn addressing modes. + +Clear a reg with "xor", not by moving zero to it. + +Many alu insns set the condition codes. Replace "alu;cmp;jcc" +with "alu;jcc". This is not applicable for "mov" or "lea". + +For FP code, simulate a flat register file on the x87 stack by +using fxch to reorder it. + +AMD K7 +------ +Select DirectPath insns. Avoid VectorPath insns due to slower decode. + +Alu insns with mem src are very efficient. +Alu insns with mem dst are very efficient. + +Fetches from I-cache are 16-byte aligned. Align functions and frequently +used labels at or near the start of 16-byte aligned blocks. + +"movzx" preferred over "xor;mov" for zero-extension. + +"push mem" preferred over "load;push reg". + +"xor reg,reg" preferred over moving zero to the reg. + +"test" preferred over "cmp". + +"pop" insns are VectorPath. "pop mem" has latency 3, "pop reg" has +latency 4. + +"push reg" and "push imm" are DirectPath, "push mem" is VectorPath. +The latency is 3 clocks. + +Intel P5 +-------- +If a loop header is less than 8 bytes away from a 16-byte +boundary, align it to the 16-byte boundary. + +If a return address is less than 8 bytes away from a 16-byte +boundary, align it to the 16-byte boundary. + +Align function entry points to 16-byte boundaries. + +Ensure that doubles are 64-bit aligned. + +Data cache line size is 32 bytes. The whole line is brought +in on a read miss. + +"push mem" is not pairable; loading a temp reg and pushing +the reg pairs better -- this is also faster on the 486. + +No conditional move instruction. + +Insns longer than 7 bytes can't go down the V-pipe or share +the insn FIFO with other insns. +Avoid insns longer than 7 bytes. + +Lea is useful when it replaces several other add/shift insns. +Lea is not a good replacement for a single shl since a scaled +index requires a disp32 (or base), making the insn longer. + +"movzx" is worse than the xor;mov alternative -- the opcode +prefix causes a slowdown and it is not pariable. + +Use "test" instead of "cmp" with zero. + +"test eax,imm" and "test reg,reg" are pairable, other forms are not. + +Pull address calculations into load and store insn addressing modes. + +Clear a reg with "xor", not by moving zero to it. + +Many alu insns set the condition codes. Replace "alu;cmp;jcc" +with "alu;jcc". This is not applicable for "mov" or "lea". + +For FP code, simulate a flat register file on the x87 stack by +using fxch to reorder it. + +"neg" and "not" are not pairable. "test imm,reg" and "test imm,mem" +are not pairable. Shifts by "cl" are not pairable. Shifts by "1" or +"imm" are pairable but only execute in the U-pipe. + +AMD K6 +------ +The insn size predecoder has a 3-byte window. Insns with both prefix +and SIB bytes cannot be short-decoded. + +Use short and simple insns, including mem src alu insns. + +Avoid insns longer than 7 bytes. They cannot be short-decoded. +Short-decode: max 7 bytes, max 2 uops. +Long-decode: max 11 bytes, max 4 uops. +Vector-decode: longer than 11 bytes or more than 4 uops. + +Prefer read-modify-write alu insns (mem dst) over "load;op;store" +sequences, for code density and register pressure reasons. + +Avoid the "(esi)" addressing mode: it forces the insn to be vector-decoded. +Use a different reg or add an explicit zero displacement. + +"add reg,reg" preferred over a shl by 1, it parallelises better. + +"movzx" preferred over "xor;mov" for zero-extension. + +Moving zero to a reg preferred over "xor reg,reg" due to dependencies +and condition codes overhead. + +"push mem" preferred over "load;push reg" due to code density and +register pressure. (Page 64.) +Explicit moves preferred when pushing args for fn calls, due to +%esp dependencies and random access possibility. (Page 58.) +[hmm, these two are in conflict] + +There is no penalty for seg reg prefix unless there are multiple prefixes. + +Align function entries and frequent branch targets to 16-byte boundaries. + +Shifts by imm only go down one of the pipes. + +"test reg,reg" preferred over "cmp" with zero. +"test reg,imm" is a long-decode insn. + +No conditional move insn. diff --git a/lib/hipe/x86/NOTES.RA b/lib/hipe/x86/NOTES.RA new file mode 100644 index 0000000000..ce80411642 --- /dev/null +++ b/lib/hipe/x86/NOTES.RA @@ -0,0 +1,32 @@ +$Id$ + +Register Allocation +=================== + +These are the rules that HiPE x86 register allocators must abide by. + +- Before RA, every Temp (precoloured or pseudo) is semantically + equivalent to Reg. Any operand may be Temp. + +- Before RA, only FIXED registers may occur in precoloured Temps. + Exception 1 is move: src or dst may be an argument register. + Exception 2 is call: the dst (if any) must be %eax. + +- After RA, an operand (src or dst) may refer to at most one memory cell. + Therefore, a pseudo-Temp MAY NOT occur as base or offset in an + explicit memory operand after RA. + +- After RA, a binary operation (alu, cmp, move) may refer to at most + one memory cell. Therefore, AT MOST ONE of src and dst may be a + pseudo-Temp after RA. If one of the operands (src or dst) is an + explicit memory operand, then the other operand MUST NOT be a + pseudo-Temp after RA. + +- After RA, the index in a jmp_switch must be a register. + +- After RA, the temp in a lea must be a register. + +- After RA, the temp in an imul must be a register. + +- After RA, a function's formal parameters must reside on the stack. + Therefore, the RA MUST NOT map the formals to actual registers. diff --git a/lib/hipe/x86/TODO b/lib/hipe/x86/TODO new file mode 100644 index 0000000000..7c93f7daf3 --- /dev/null +++ b/lib/hipe/x86/TODO @@ -0,0 +1,31 @@ +rtl_to_x86: +* recognise alub(X,X,sub,1,lt,L1,L2,P) and turn it into 'dec', + this might improve the reduction test code slightly (X is + the pseudo for FCALLS) +* recognise alu(Z,X,add,Y) and turn it into 'lea'. +* rewrite tailcalls as parallel assignments before regalloc + +x86: +* Use separate constructors for real regs (x86_reg) and pseudos (x86_temp). + +Frame: +* drop tailcall rewrite + +Registers: +* make the 2 regs now reserved for frame's tailcall rewrite available for arg passing + +Optimizations: +* replace jcc cc,L1; jmp L0; L1: with jcc L0; L1: (length:len/2) +* Kill move X,X insns, either in frame or finalise +* Instruction scheduling module +* We can now choose to not have HP in %esi. However, this currently loses + performance due to (a) repeated moves to/from P_HP(P), and (b) spills of + the temp that contains a copy of P_HP(P). Both of these problems should be + fixed, and then, if we don't have any noticeable performance degradation, we + should permanently change to a non-reserved HP strategy. + +Loader: + +Assembler: + +Encode: diff --git a/lib/hipe/x86/hipe_rtl_to_x86.erl b/lib/hipe/x86/hipe_rtl_to_x86.erl new file mode 100644 index 0000000000..d77e4fed3b --- /dev/null +++ b/lib/hipe/x86/hipe_rtl_to_x86.erl @@ -0,0 +1,865 @@ +%%% -*- erlang-indent-level: 2 -*- +%%% +%%% %CopyrightBegin% +%%% +%%% Copyright Ericsson AB 2001-2009. All Rights Reserved. +%%% +%%% The contents of this file are subject to the Erlang Public License, +%%% Version 1.1, (the "License"); you may not use this file except in +%%% compliance with the License. You should have received a copy of the +%%% Erlang Public License along with this software. If not, it can be +%%% retrieved online at http://www.erlang.org/. +%%% +%%% Software distributed under the License is distributed on an "AS IS" +%%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%%% the License for the specific language governing rights and limitations +%%% under the License. +%%% +%%% %CopyrightEnd% +%%% +%%% +%%% Translate 3-address RTL code to 2-address pseudo-x86 code. + +-ifdef(HIPE_AMD64). +-define(HIPE_RTL_TO_X86, hipe_rtl_to_amd64). +-define(HIPE_X86_REGISTERS, hipe_amd64_registers). +-define(ECX, rcx). +-define(EAX, rax). +-else. +-define(HIPE_RTL_TO_X86, hipe_rtl_to_x86). +-define(HIPE_X86_REGISTERS, hipe_x86_registers). +-define(ECX, ecx). +-define(EAX, eax). +-endif. + +-module(?HIPE_RTL_TO_X86). +-export([translate/1]). + +-include("../rtl/hipe_rtl.hrl"). + +translate(RTL) -> % RTL function -> x86 defun + hipe_gensym:init(x86), + hipe_gensym:set_var(x86, ?HIPE_X86_REGISTERS:first_virtual()), + hipe_gensym:set_label(x86, hipe_gensym:get_label(rtl)), + Map0 = vmap_empty(), + {Formals, Map1} = conv_formals(hipe_rtl:rtl_params(RTL), Map0), + OldData = hipe_rtl:rtl_data(RTL), + {Code0, NewData} = conv_insn_list(hipe_rtl:rtl_code(RTL), Map1, OldData), + {RegFormals,_} = split_args(Formals), + Code = + case RegFormals of + [] -> Code0; + _ -> [hipe_x86:mk_label(hipe_gensym:get_next_label(x86)) | + move_formals(RegFormals, Code0)] + end, + IsClosure = hipe_rtl:rtl_is_closure(RTL), + IsLeaf = hipe_rtl:rtl_is_leaf(RTL), + hipe_x86:mk_defun(hipe_rtl:rtl_fun(RTL), + Formals, + IsClosure, + IsLeaf, + Code, + NewData, + [], + []). + +conv_insn_list([H|T], Map, Data) -> + {NewH, NewMap, NewData1} = conv_insn(H, Map, Data), + %% io:format("~w \n ==>\n ~w\n- - - - - - - - -\n",[H,NewH]), + {NewT, NewData2} = conv_insn_list(T, NewMap, NewData1), + {NewH ++ NewT, NewData2}; +conv_insn_list([], _, Data) -> + {[], Data}. + +conv_insn(I, Map, Data) -> + case I of + #alu{} -> + %% dst = src1 binop src2 + BinOp = conv_binop(hipe_rtl:alu_op(I)), + {Dst, Map0} = conv_dst(hipe_rtl:alu_dst(I), Map), + {FixSrc1, Src1, Map1} = conv_src(hipe_rtl:alu_src1(I), Map0), + {FixSrc2, Src2, Map2} = conv_src(hipe_rtl:alu_src2(I), Map1), + I2 = + case hipe_rtl:is_shift_op(hipe_rtl:alu_op(I)) of + true -> + conv_shift(Dst, Src1, BinOp, Src2); + false -> + conv_alu(Dst, Src1, BinOp, Src2, []) + end, + {FixSrc1++FixSrc2++I2, Map2, Data}; + #alub{} -> + %% dst = src1 op src2; if COND goto label + BinOp = conv_binop(hipe_rtl:alub_op(I)), + {Dst, Map0} = conv_dst(hipe_rtl:alub_dst(I), Map), + {FixSrc1, Src1, Map1} = conv_src(hipe_rtl:alub_src1(I), Map0), + {FixSrc2, Src2, Map2} = conv_src(hipe_rtl:alub_src2(I), Map1), + Cc = conv_cond(hipe_rtl:alub_cond(I)), + I1 = [hipe_x86:mk_pseudo_jcc(Cc, + hipe_rtl:alub_true_label(I), + hipe_rtl:alub_false_label(I), + hipe_rtl:alub_pred(I))], + I2 = conv_alu(Dst, Src1, BinOp, Src2, I1), + {FixSrc1++FixSrc2++I2, Map2, Data}; + #branch{} -> + %% = src1 - src2; if COND goto label + {FixSrc1, Src1, Map0} = conv_src(hipe_rtl:branch_src1(I), Map), + {FixSrc2, Src2, Map1} = conv_src(hipe_rtl:branch_src2(I), Map0), + Cc = conv_cond(hipe_rtl:branch_cond(I)), + I2 = conv_branch(Src1, Cc, Src2, + hipe_rtl:branch_true_label(I), + hipe_rtl:branch_false_label(I), + hipe_rtl:branch_pred(I)), + {FixSrc1++FixSrc2++I2, Map1, Data}; + #call{} -> + %% push + %% ... + %% push + %% eax := call ; if exn goto else goto Next + %% Next: + %% := eax + %% goto + {FixArgs, Args, Map0} = conv_src_list(hipe_rtl:call_arglist(I), Map), + {Dsts, Map1} = conv_dst_list(hipe_rtl:call_dstlist(I), Map0), + {Fun, Map2} = conv_fun(hipe_rtl:call_fun(I), Map1), + I2 = conv_call(Dsts, Fun, Args, + hipe_rtl:call_continuation(I), + hipe_rtl:call_fail(I), + hipe_rtl:call_type(I)), + %% XXX Fixme: this ++ is probably inefficient. + {FixArgs++I2, Map2, Data}; + #comment{} -> + I2 = [hipe_x86:mk_comment(hipe_rtl:comment_text(I))], + {I2, Map, Data}; + #enter{} -> + {FixArgs, Args, Map0} = conv_src_list(hipe_rtl:enter_arglist(I), Map), + {Fun, Map1} = conv_fun(hipe_rtl:enter_fun(I), Map0), + I2 = conv_tailcall(Fun, Args, hipe_rtl:enter_type(I)), + {FixArgs++I2, Map1, Data}; + #goto{} -> + I2 = [hipe_x86:mk_jmp_label(hipe_rtl:goto_label(I))], + {I2, Map, Data}; + #label{} -> + I2 = [hipe_x86:mk_label(hipe_rtl:label_name(I))], + {I2, Map, Data}; + #load{} -> + {Dst, Map0} = conv_dst(hipe_rtl:load_dst(I), Map), + {FixSrc, Src, Map1} = conv_src(hipe_rtl:load_src(I), Map0), + {FixOff, Off, Map2} = conv_src(hipe_rtl:load_offset(I), Map1), + I2 = case {hipe_rtl:load_size(I), hipe_rtl:load_sign(I)} of + {byte, signed} -> + [hipe_x86:mk_movsx(hipe_x86:mk_mem(Src, Off, 'byte'), Dst)]; + {byte, unsigned} -> + [hipe_x86:mk_movzx(hipe_x86:mk_mem(Src, Off, 'byte'), Dst)]; + {int16, signed} -> + [hipe_x86:mk_movsx(hipe_x86:mk_mem(Src, Off, 'int16'), Dst)]; + {int16, unsigned} -> + [hipe_x86:mk_movzx(hipe_x86:mk_mem(Src, Off, 'int16'), Dst)]; + {LoadSize, LoadSign} -> + mk_load(LoadSize, LoadSign, Src, Off, Dst) + end, + {FixSrc++FixOff++I2, Map2, Data}; + #load_address{} -> + {Dst, Map0} = conv_dst(hipe_rtl:load_address_dst(I), Map), + Addr = hipe_rtl:load_address_addr(I), + Type = hipe_rtl:load_address_type(I), + Src = hipe_x86:mk_imm_from_addr(Addr, Type), + I2 = mk_load_address(Type, Src, Dst), + {I2, Map0, Data}; + #load_atom{} -> + {Dst, Map0} = conv_dst(hipe_rtl:load_atom_dst(I), Map), + Src = hipe_x86:mk_imm_from_atom(hipe_rtl:load_atom_atom(I)), + I2 = [hipe_x86:mk_move(Src, Dst)], + {I2, Map0, Data}; + #move{} -> + {Dst, Map0} = conv_dst(hipe_rtl:move_dst(I), Map), + {FixSrc, Src, Map1} = conv_src(hipe_rtl:move_src(I), Map0), + I2 = [hipe_x86:mk_move(Src, Dst)], + {FixSrc++I2, Map1, Data}; + #return{} -> + {FixArgs, Args, Map0} = conv_src_list(hipe_rtl:return_varlist(I), Map), + %% frame will fill in npop later, hence the "mk_ret(-1)" + I2 = move_retvals(Args, [hipe_x86:mk_ret(-1)]), + {FixArgs++I2, Map0, Data}; + #store{} -> + {Ptr, Map0} = conv_dst(hipe_rtl:store_base(I), Map), + {FixSrc, Src, Map1} = conv_src(hipe_rtl:store_src(I), Map0), + {FixOff, Off, Map2} = conv_src(hipe_rtl:store_offset(I), Map1), + I2 = mk_store(hipe_rtl:store_size(I), Src, Ptr, Off), + {FixSrc++FixOff++I2, Map2, Data}; + #switch{} -> % this one also updates Data :-( + %% from hipe_rtl2sparc, but we use a hairy addressing mode + %% instead of doing the arithmetic manually + Labels = hipe_rtl:switch_labels(I), + LMap = [{label,L} || L <- Labels], + {NewData, JTabLab} = + case hipe_rtl:switch_sort_order(I) of + [] -> + hipe_consttab:insert_block(Data, word, LMap); + SortOrder -> + hipe_consttab:insert_sorted_block( + Data, word, LMap, SortOrder) + end, + %% no immediates allowed here + {Index, Map1} = conv_dst(hipe_rtl:switch_src(I), Map), + I2 = mk_jmp_switch(Index, JTabLab, Labels), + {I2, Map1, NewData}; + #fload{} -> + {Dst, Map0} = conv_dst(hipe_rtl:fload_dst(I), Map), + {[], Src, Map1} = conv_src(hipe_rtl:fload_src(I), Map0), + {[], Off, Map2} = conv_src(hipe_rtl:fload_offset(I), Map1), + I2 = [hipe_x86:mk_fmove(hipe_x86:mk_mem(Src, Off, 'double'),Dst)], + {I2, Map2, Data}; + #fstore{} -> + {Dst, Map0} = conv_dst(hipe_rtl:fstore_base(I), Map), + {[], Src, Map1} = conv_src(hipe_rtl:fstore_src(I), Map0), + {[], Off, Map2} = conv_src(hipe_rtl:fstore_offset(I), Map1), + I2 = [hipe_x86:mk_fmove(Src, hipe_x86:mk_mem(Dst, Off, 'double'))], + {I2, Map2, Data}; + #fp{} -> + {Dst, Map0} = conv_dst(hipe_rtl:fp_dst(I), Map), + {[], Src1, Map1} = conv_src(hipe_rtl:fp_src1(I), Map0), + {[], Src2, Map2} = conv_src(hipe_rtl:fp_src2(I), Map1), + FpBinOp = conv_fp_binop(hipe_rtl:fp_op(I)), + I2 = conv_fp_binary(Dst, Src1, FpBinOp, Src2), + {I2, Map2, Data}; + #fp_unop{} -> + {Dst, Map0} = conv_dst(hipe_rtl:fp_unop_dst(I), Map), + {[], Src, Map1} = conv_src(hipe_rtl:fp_unop_src(I), Map0), + FpUnOp = conv_fp_unop(hipe_rtl:fp_unop_op(I)), + I2 = conv_fp_unary(Dst, Src, FpUnOp), + {I2, Map1, Data}; + #fmove{} -> + {Dst, Map0} = conv_dst(hipe_rtl:fmove_dst(I), Map), + {[], Src, Map1} = conv_src(hipe_rtl:fmove_src(I), Map0), + I2 = [hipe_x86:mk_fmove(Src, Dst)], + {I2, Map1, Data}; + #fconv{} -> + {Dst, Map0} = conv_dst(hipe_rtl:fconv_dst(I), Map), + {[], Src, Map1} = conv_src(hipe_rtl:fconv_src(I), Map0), + I2 = [hipe_x86:mk_fmove(Src, Dst)], + {I2, Map1, Data}; + X -> + %% gctest?? + %% jmp, jmp_link, jsr, esr, multimove, + %% stackneed, pop_frame, restore_frame, save_frame + throw({?MODULE, {"unknown RTL instruction", X}}) + end. + +%%% Finalise the conversion of a 3-address ALU operation, taking +%%% care to not introduce more temps and moves than necessary. + +conv_alu(Dst, Src1, 'imul', Src2, Tail) -> + mk_imul(Src1, Src2, Dst, Tail); +conv_alu(Dst, Src1, BinOp, Src2, Tail) -> + case same_opnd(Dst, Src1) of + true -> % x = x op y + [hipe_x86:mk_alu(BinOp, Src2, Dst) | Tail]; % x op= y + false -> % z = x op y, where z != x + case same_opnd(Dst, Src2) of + false -> % z = x op y, where z != x && z != y + [hipe_x86:mk_move(Src1, Dst), % z = x + hipe_x86:mk_alu(BinOp, Src2, Dst) | Tail]; % z op= y + true -> % y = x op y, where y != x + case binop_commutes(BinOp) of + true -> % y = y op x + [hipe_x86:mk_alu(BinOp, Src1, Dst) | Tail]; % y op= x + false -> % y = x op y, where op doesn't commute + Tmp = clone_dst(Dst), + [hipe_x86:mk_move(Src1, Tmp), % t = x + hipe_x86:mk_alu(BinOp, Src2, Tmp), % t op= y + hipe_x86:mk_move(Tmp, Dst) | Tail] % y = t + end + end + end. + +mk_imul(Src1, Src2, Dst, Tail) -> + case hipe_x86:is_imm(Src1) of + true -> + case hipe_x86:is_imm(Src2) of + true -> + mk_imul_iit(Src1, Src2, Dst, Tail); + _ -> + mk_imul_itt(Src1, Src2, Dst, Tail) + end; + _ -> + case hipe_x86:is_imm(Src2) of + true -> + mk_imul_itt(Src2, Src1, Dst, Tail); + _ -> + mk_imul_ttt(Src1, Src2, Dst, Tail) + end + end. + +mk_imul_iit(Src1, Src2, Dst, Tail) -> + io:format("~w: RTL mul with two immediates\n", [?MODULE]), + Tmp2 = new_untagged_temp(), + [hipe_x86:mk_move(Src2, Tmp2) | + mk_imul_itt(Src1, Tmp2, Dst, Tail)]. + +mk_imul_itt(Src1, Src2, Dst, Tail) -> + [hipe_x86:mk_imul(Src1, Src2, Dst) | Tail]. + +mk_imul_ttt(Src1, Src2, Dst, Tail) -> + case same_opnd(Dst, Src1) of + true -> + [hipe_x86:mk_imul([], Src2, Dst) | Tail]; + false -> + case same_opnd(Dst, Src2) of + true -> + [hipe_x86:mk_imul([], Src1, Dst) | Tail]; + false -> + [hipe_x86:mk_move(Src1, Dst), + hipe_x86:mk_imul([], Src2, Dst) | Tail] + end + end. + +conv_shift(Dst, Src1, BinOp, Src2) -> + {NewSrc2,I1} = + case hipe_x86:is_imm(Src2) of + true -> + {Src2, []}; + false -> + NewSrc = hipe_x86:mk_temp(?HIPE_X86_REGISTERS:?ECX(), 'untagged'), + {NewSrc, [hipe_x86:mk_move(Src2, NewSrc)]} + end, + I2 = case same_opnd(Dst, Src1) of + true -> % x = x op y + [hipe_x86:mk_shift(BinOp, NewSrc2, Dst)]; % x op= y + false -> % z = x op y, where z != x + case same_opnd(Dst, Src2) of + false -> % z = x op y, where z != x && z != y + [hipe_x86:mk_move(Src1, Dst), % z = x + hipe_x86:mk_shift(BinOp, NewSrc2, Dst)];% z op= y + true -> % y = x op y, no shift op commutes + Tmp = clone_dst(Dst), + [hipe_x86:mk_move(Src1, Tmp), % t = x + hipe_x86:mk_shift(BinOp, NewSrc2, Tmp), % t op= y + hipe_x86:mk_move(Tmp, Dst)] % y = t + end + end, + I1 ++ I2. + +%%% Finalise the conversion of a conditional branch operation, taking +%%% care to not introduce more temps and moves than necessary. + +conv_branch(Src1, Cc, Src2, TrueLab, FalseLab, Pred) -> + case hipe_x86:is_imm(Src1) of + false -> + mk_branch(Src1, Cc, Src2, TrueLab, FalseLab, Pred); + true -> + case hipe_x86:is_imm(Src2) of + false -> + NewCc = commute_cc(Cc), + mk_branch(Src2, NewCc, Src1, TrueLab, FalseLab, Pred); + true -> + %% two immediates, let the optimiser clean it up + Tmp = new_untagged_temp(), + [hipe_x86:mk_move(Src1, Tmp) | + mk_branch(Tmp, Cc, Src2, TrueLab, FalseLab, Pred)] + end + end. + +mk_branch(Src1, Cc, Src2, TrueLab, FalseLab, Pred) -> + %% PRE: not(is_imm(Src1)) + [hipe_x86:mk_cmp(Src2, Src1), + hipe_x86:mk_pseudo_jcc(Cc, TrueLab, FalseLab, Pred)]. + +%%% Convert an RTL ALU or ALUB binary operator. + +conv_binop(BinOp) -> + case BinOp of + 'add' -> 'add'; + 'sub' -> 'sub'; + 'or' -> 'or'; + 'and' -> 'and'; + 'xor' -> 'xor'; + 'sll' -> 'shl'; + 'srl' -> 'shr'; + 'sra' -> 'sar'; + 'mul' -> 'imul'; + %% andnot ??? + _ -> exit({?MODULE, {"unknown binop", BinOp}}) + end. + +binop_commutes(BinOp) -> + case BinOp of + 'add' -> true; + 'or' -> true; + 'and' -> true; + 'xor' -> true; + _ -> false + end. + +%%% Convert an RTL conditional operator. + +conv_cond(Cond) -> + case Cond of + eq -> 'e'; + ne -> 'ne'; + gt -> 'g'; + gtu -> 'a'; + ge -> 'ge'; + geu -> 'ae'; + lt -> 'l'; + ltu -> 'b'; + le -> 'le'; + leu -> 'be'; + overflow -> 'o'; + not_overflow -> 'no'; + _ -> exit({?MODULE, {"unknown rtl cond", Cond}}) + end. + +commute_cc(Cc) -> % if x Cc y, then y commute_cc(Cc) x + case Cc of + 'e' -> 'e'; % ==, == + 'ne' -> 'ne'; % !=, != + 'g' -> 'l'; % >, < + 'a' -> 'b'; % >u, 'le'; % >=, <= + 'ae' -> 'be'; % >=u, <=u + 'l' -> 'g'; % <, > + 'b' -> 'a'; % u + 'le' -> 'ge'; % <=, >= + 'be' -> 'ae'; % <=u, >=u + %% overflow/not_overflow: n/a + _ -> exit({?MODULE, {"unknown cc", Cc}}) + end. + +%%% Test if Dst and Src are the same operand. + +same_opnd(Dst, Src) -> Dst =:= Src. + +%%% Finalise the conversion of a tailcall instruction. + +conv_tailcall(Fun, Args, Linkage) -> + Arity = length(Args), + {RegArgs,StkArgs} = split_args(Args), + move_actuals(RegArgs, + [hipe_x86:mk_pseudo_tailcall_prepare(), + hipe_x86:mk_pseudo_tailcall(Fun, Arity, StkArgs, Linkage)]). + +split_args(Args) -> + split_args(0, ?HIPE_X86_REGISTERS:nr_args(), Args, []). +split_args(I, N, [Arg|Args], RegArgs) when I < N -> + Reg = ?HIPE_X86_REGISTERS:arg(I), + Temp = hipe_x86:mk_temp(Reg, 'tagged'), + split_args(I+1, N, Args, [{Arg,Temp}|RegArgs]); +split_args(_, _, StkArgs, RegArgs) -> + {RegArgs, StkArgs}. + +move_actuals([], Rest) -> Rest; +move_actuals([{Src,Dst}|Actuals], Rest) -> + move_actuals(Actuals, [hipe_x86:mk_move(Src, Dst) | Rest]). + +move_formals([], Rest) -> Rest; +move_formals([{Dst,Src}|Formals], Rest) -> + move_formals(Formals, [hipe_x86:mk_move(Src, Dst) | Rest]). + +%%% Finalise the conversion of a call instruction. + +conv_call(Dsts, Fun, Args, ContLab, ExnLab, Linkage) -> + case hipe_x86:is_prim(Fun) of + true -> + conv_primop_call(Dsts, Fun, Args, ContLab, ExnLab, Linkage); + false -> + conv_general_call(Dsts, Fun, Args, ContLab, ExnLab, Linkage) + end. + +conv_primop_call(Dsts, Prim, Args, ContLab, ExnLab, Linkage) -> + case hipe_x86:prim_prim(Prim) of + 'fwait' -> + conv_fwait_call(Dsts, Args, ContLab, ExnLab, Linkage); + _ -> + conv_general_call(Dsts, Prim, Args, ContLab, ExnLab, Linkage) + end. + +conv_fwait_call([], [], [], [], not_remote) -> + [hipe_x86:mk_fp_unop('fwait', [])]. + +conv_general_call(Dsts, Fun, Args, ContLab, ExnLab, Linkage) -> + %% The backend does not support pseudo_calls without a + %% continuation label, so we make sure each call has one. + {RealContLab, Tail} = + case do_call_results(Dsts) of + [] -> + %% Avoid consing up a dummy basic block if the moves list + %% is empty, as is typical for calls to suspend/0. + %% This should be subsumed by a general "optimise the CFG" + %% module, and could probably be removed. + case ContLab of + [] -> + NewContLab = hipe_gensym:get_next_label(x86), + {NewContLab, [hipe_x86:mk_label(NewContLab)]}; + _ -> + {ContLab, []} + end; + Moves -> + %% Change the call to continue at a new basic block. + %% In this block move the result registers to the Dsts, + %% then continue at the call's original continuation. + %% + %% This should be fixed to propagate "fallthrough calls" + %% When the rest of the backend supports them. + NewContLab = hipe_gensym:get_next_label(x86), + case ContLab of + [] -> + %% This is just a fallthrough + %% No jump back after the moves. + {NewContLab, + [hipe_x86:mk_label(NewContLab) | + Moves]}; + _ -> + %% The call has a continuation + %% jump to it. + {NewContLab, + [hipe_x86:mk_label(NewContLab) | + Moves ++ + [hipe_x86:mk_jmp_label(ContLab)]]} + end + end, + SDesc = hipe_x86:mk_sdesc(ExnLab, 0, length(Args), {}), + CallInsn = hipe_x86:mk_pseudo_call(Fun, SDesc, RealContLab, Linkage), + {RegArgs,StkArgs} = split_args(Args), + do_push_args(StkArgs, move_actuals(RegArgs, [CallInsn | Tail])). + +do_push_args([Arg|Args], Tail) -> + [hipe_x86:mk_push(Arg) | do_push_args(Args, Tail)]; +do_push_args([], Tail) -> + Tail. + +%%% Move return values from the return value registers. + +do_call_results(DstList) -> + do_call_results(DstList, 0, []). + +do_call_results([Dst|DstList], I, Rest) -> + Src = hipe_x86:mk_temp(?HIPE_X86_REGISTERS:ret(I), 'tagged'), + Move = hipe_x86:mk_move(Src, Dst), + do_call_results(DstList, I+1, [Move|Rest]); +do_call_results([], _, Insns) -> Insns. + +%%% Move return values to the return value registers. + +move_retvals(SrcLst, Rest) -> + move_retvals(SrcLst, 0, Rest). + +move_retvals([Src|SrcLst], I, Rest) -> + Dst = hipe_x86:mk_temp(?HIPE_X86_REGISTERS:ret(I), 'tagged'), + Move = hipe_x86:mk_move(Src, Dst), + move_retvals(SrcLst, I+1, [Move|Rest]); +move_retvals([], _, Insns) -> Insns. + +%%% Convert a 'fun' operand (MFA, prim, or temp) + +conv_fun(Fun, Map) -> + case hipe_rtl:is_var(Fun) of + true -> + conv_dst(Fun, Map); + false -> + case hipe_rtl:is_reg(Fun) of + true -> + conv_dst(Fun, Map); + false -> + case Fun of + Prim when is_atom(Prim) -> + {hipe_x86:mk_prim(Prim), Map}; + {M,F,A} when is_atom(M), is_atom(F), is_integer(A) -> + {hipe_x86:mk_mfa(M,F,A), Map}; + _ -> + exit({?MODULE,conv_fun,Fun}) + end + end + end. + +%%% Convert an RTL source operand (imm/var/reg). + +conv_src(Opnd, Map) -> + case hipe_rtl:is_imm(Opnd) of + true -> + conv_imm(Opnd, Map); + false -> + {NewOpnd,NewMap} = conv_dst(Opnd, Map), + {[], NewOpnd, NewMap} + end. + +-ifdef(HIPE_AMD64). +conv_imm(Opnd, Map) -> + ImmVal = hipe_rtl:imm_value(Opnd), + case is_imm64(ImmVal) of + true -> + Temp = hipe_x86:mk_new_temp('untagged'), + {[hipe_x86:mk_move64(hipe_x86:mk_imm(ImmVal), Temp)], Temp, Map}; + false -> + {[], hipe_x86:mk_imm(ImmVal), Map} + end. + +is_imm64(Value) when is_integer(Value) -> + (Value < -(1 bsl (32 - 1))) or (Value > (1 bsl (32 - 1)) - 1); +is_imm64({_,atom}) -> false; % Atoms are 32 bits. +is_imm64({_,c_const}) -> false; % c_consts are 32 bits. +is_imm64({_,_}) -> true . % Other relocs are 64 bits. +-else. +conv_imm(Opnd, Map) -> + {[], hipe_x86:mk_imm(hipe_rtl:imm_value(Opnd)), Map}. +-endif. + +conv_src_list([O|Os], Map) -> + {NewInstr, V, Map1} = conv_src(O, Map), + {Instrs, Vs, Map2} = conv_src_list(Os, Map1), + {Instrs++NewInstr, [V|Vs], Map2}; +conv_src_list([], Map) -> + {[], [], Map}. + +%%% Convert an RTL destination operand (var/reg). + +conv_dst(Opnd, Map) -> + {Name, Type} = + case hipe_rtl:is_var(Opnd) of + true -> + {hipe_rtl:var_index(Opnd), 'tagged'}; + false -> + case hipe_rtl:is_fpreg(Opnd) of + true -> + {hipe_rtl:fpreg_index(Opnd), 'double'}; + false -> + {hipe_rtl:reg_index(Opnd), 'untagged'} + end + end, + case ?HIPE_X86_REGISTERS:is_precoloured(Name) of + true -> + case ?HIPE_X86_REGISTERS:proc_offset(Name) of + false -> + {hipe_x86:mk_temp(Name, Type), Map}; + Offset -> + Preg = ?HIPE_X86_REGISTERS:proc_pointer(), + Pbase = hipe_x86:mk_temp(Preg, 'untagged'), + Poff = hipe_x86:mk_imm(Offset), + {hipe_x86:mk_mem(Pbase, Poff, Type), Map} + end; + false -> + case vmap_lookup(Map, Opnd) of + {value, NewTemp} -> + {NewTemp, Map}; + _ -> + NewTemp = hipe_x86:mk_new_temp(Type), + {NewTemp, vmap_bind(Map, Opnd, NewTemp)} + end + end. + +conv_dst_list([O|Os], Map) -> + {Dst, Map1} = conv_dst(O, Map), + {Dsts, Map2} = conv_dst_list(Os, Map1), + {[Dst|Dsts], Map2}; +conv_dst_list([], Map) -> + {[], Map}. + +conv_formals(Os, Map) -> + conv_formals(?HIPE_X86_REGISTERS:nr_args(), Os, Map, []). + +conv_formals(N, [O|Os], Map, Res) -> + Type = + case hipe_rtl:is_var(O) of + true -> 'tagged'; + false ->'untagged' + end, + Dst = + if N > 0 -> hipe_x86:mk_new_temp(Type); % allocatable + true -> hipe_x86:mk_new_nonallocatable_temp(Type) + end, + Map1 = vmap_bind(Map, O, Dst), + conv_formals(N-1, Os, Map1, [Dst|Res]); +conv_formals(_, [], Map, Res) -> + {lists:reverse(Res), Map}. + +%%% typeof_src -- what's src's type? + +typeof_src(Src) -> + case hipe_x86:is_imm(Src) of + true -> + 'untagged'; + _ -> + typeof_dst(Src) + end. + +%%% typeof_dst -- what's dst's type? + +typeof_dst(Dst) -> + case hipe_x86:is_temp(Dst) of + true -> + hipe_x86:temp_type(Dst); + _ -> + hipe_x86:mem_type(Dst) + end. + +%%% clone_dst -- conjure up a scratch reg with same type as dst + +clone_dst(Dst) -> + hipe_x86:mk_new_temp(typeof_dst(Dst)). + +%%% new_untagged_temp -- conjure up an untagged scratch reg + +new_untagged_temp() -> + hipe_x86:mk_new_temp('untagged'). + +%%% Map from RTL var/reg operands to x86 temps. + +vmap_empty() -> + gb_trees:empty(). + +vmap_lookup(Map, Key) -> + gb_trees:lookup(Key, Map). + +vmap_bind(Map, Key, Val) -> + gb_trees:insert(Key, Val, Map). + +%%% Finalise the conversion of a 2-address FP operation. + +conv_fp_unary(Dst, Src, FpUnOp) -> + case same_opnd(Dst, Src) of + true -> + [hipe_x86:mk_fp_unop(FpUnOp, Dst)]; + _ -> + [hipe_x86:mk_fmove(Src, Dst), + hipe_x86:mk_fp_unop(FpUnOp, Dst)] + end. + +conv_fp_unop(RtlFpUnOp) -> + case RtlFpUnOp of + 'fchs' -> 'fchs' + end. + +%%% Finalise the conversion of a 3-address FP operation. + +conv_fp_binary(Dst, Src1, FpBinOp, Src2) -> + case same_opnd(Dst, Src1) of + true -> % x = x op y + [hipe_x86:mk_fp_binop(FpBinOp, Src2, Dst)]; % x op= y + false -> % z = x op y, where z != x + case same_opnd(Dst, Src2) of + false -> % z = x op y, where z != x && z != y + [hipe_x86:mk_fmove(Src1, Dst), % z = x + hipe_x86:mk_fp_binop(FpBinOp, Src2, Dst)]; % z op= y + true -> % y = x op y, where y != x + case fp_binop_commutes(FpBinOp) of + true -> % y = y op x + [hipe_x86:mk_fp_binop(FpBinOp, Src1, Dst)]; % y op= x + false -> % y = x op y, where op doesn't commute + RevFpBinOp = reverse_fp_binop(FpBinOp), + [hipe_x86:mk_fp_binop(RevFpBinOp, Src1, Dst)] + end + end + end. + +%%% Convert an RTL FP binary operator. + +conv_fp_binop(RtlFpBinOp) -> + case RtlFpBinOp of + 'fadd' -> 'fadd'; + 'fdiv' -> 'fdiv'; + 'fmul' -> 'fmul'; + 'fsub' -> 'fsub' + end. + +fp_binop_commutes(FpBinOp) -> + case FpBinOp of + 'fadd' -> true; + 'fmul' -> true; + _ -> false + end. + +reverse_fp_binop(FpBinOp) -> + case FpBinOp of + 'fsub' -> 'fsubr'; + 'fdiv' -> 'fdivr' + end. + +%%% Create a jmp_switch instruction. + +-ifdef(HIPE_AMD64). +mk_jmp_switch(Index, JTabLab, Labels) -> + JTabReg = hipe_x86:mk_new_temp('untagged'), + JTabImm = hipe_x86:mk_imm_from_addr(JTabLab, constant), + [hipe_x86:mk_move64(JTabImm, JTabReg), + hipe_x86:mk_jmp_switch(Index, JTabReg, Labels)]. +-else. +mk_jmp_switch(Index, JTabLab, Labels) -> + %% this is equivalent to "jmp *JTabLab(,Index,4)" + %% ("r = Index; r *= 4; r += &JTab; jmp *r" isn't as nice) + [hipe_x86:mk_jmp_switch(Index, JTabLab, Labels)]. +-endif. + +%%% Finalise the translation of a load_address instruction. + +-ifdef(HIPE_AMD64). +mk_load_address(Type, Src, Dst) -> + case Type of + c_const -> % 32 bits + [hipe_x86:mk_move(Src, Dst)]; + _ -> + [hipe_x86:mk_move64(Src, Dst)] + end. +-else. +mk_load_address(_Type, Src, Dst) -> + [hipe_x86:mk_move(Src, Dst)]. +-endif. + +%%% Translate 32-bit and larger loads. + +-ifdef(HIPE_AMD64). +mk_load(LoadSize, LoadSign, Src, Off, Dst) -> + case {LoadSize, LoadSign} of + {int32, signed} -> + [hipe_x86:mk_movsx(hipe_x86:mk_mem(Src, Off, 'int32'), Dst)]; + {int32, unsigned} -> + %% The processor zero-extends for us. No need for 'movzx'. + [hipe_x86:mk_move(hipe_x86:mk_mem(Src, Off, 'int32'), Dst)]; + {_, _} -> + mk_load_word(Src, Off, Dst) + end. +-else. +mk_load(_LoadSize, _LoadSign, Src, Off, Dst) -> + mk_load_word(Src, Off, Dst). +-endif. + +mk_load_word(Src, Off, Dst) -> + Type = typeof_dst(Dst), + [hipe_x86:mk_move(hipe_x86:mk_mem(Src, Off, Type), Dst)]. + +%%% Finalise the translation of a store instruction. + +-ifdef(HIPE_AMD64). +mk_store(RtlStoreSize, Src, Ptr, Off) -> + Type = case RtlStoreSize of + word -> + typeof_src(Src); + OtherType -> + OtherType + end, + [hipe_x86:mk_move(Src, hipe_x86:mk_mem(Ptr, Off, Type))]. +-else. +mk_store(RtlStoreSize, Src, Ptr, Off) -> + case RtlStoreSize of + word -> + Type = typeof_src(Src), + [hipe_x86:mk_move(Src, hipe_x86:mk_mem(Ptr, Off, Type))]; + int32 -> + Type = typeof_src(Src), + [hipe_x86:mk_move(Src, hipe_x86:mk_mem(Ptr, Off, Type))]; + int16 -> + Type = 'int16', + [hipe_x86:mk_move(Src, hipe_x86:mk_mem(Ptr, Off, Type))]; + byte -> + Type = 'byte', + {NewSrc, I1} = conv_small_store(Src), + I1 ++ [hipe_x86:mk_move(NewSrc, hipe_x86:mk_mem(Ptr, Off, Type))] + end. + +conv_small_store(Src) -> + case hipe_x86:is_imm(Src) of + true -> + {Src, []}; + false -> + NewSrc = hipe_x86:mk_temp(hipe_x86_registers:eax(), 'untagged'), + {NewSrc, [hipe_x86:mk_move(Src, NewSrc)]} + end. +-endif. diff --git a/lib/hipe/x86/hipe_x86.erl b/lib/hipe/x86/hipe_x86.erl new file mode 100644 index 0000000000..3298151366 --- /dev/null +++ b/lib/hipe/x86/hipe_x86.erl @@ -0,0 +1,496 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2001-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%% representation of 2-address pseudo-amd64 code + +-module(hipe_x86). + +-include("hipe_x86.hrl"). + +%% Commented out are interface functions which are currently not used. +-export([mk_temp/2, + %% mk_nonallocatable_temp/2, + mk_new_temp/1, + mk_new_nonallocatable_temp/1, + is_temp/1, + temp_reg/1, + temp_type/1, + temp_is_allocatable/1, + + mk_imm/1, + mk_imm_from_addr/2, + mk_imm_from_atom/1, + is_imm/1, + %% imm_value/1, + + mk_mem/3, + %% is_mem/1, + %% mem_base/1, + %% mem_off/1, + mem_type/1, + + mk_fpreg/1, + mk_fpreg/2, + %% is_fpreg/1, + %% fpreg_is_pseudo/1, + %% fpreg_reg/1, + + mk_mfa/3, + %% is_mfa/1, + + mk_prim/1, + is_prim/1, + prim_prim/1, + + mk_sdesc/4, + + %% insn_type/1, + + mk_alu/3, + %% is_alu/1, + alu_op/1, + alu_src/1, + alu_dst/1, + + mk_call/3, + %% is_call/1, + call_fun/1, + call_sdesc/1, + call_linkage/1, + + %% mk_cmovcc/3, + %% is_cmovcc/1, + cmovcc_cc/1, + cmovcc_src/1, + cmovcc_dst/1, + + mk_cmp/2, + %% is_cmp/1, + cmp_src/1, + cmp_dst/1, + + mk_comment/1, + %% is_comment/1, + %% comment_term/1, + + mk_fmove/2, + is_fmove/1, + fmove_src/1, + fmove_dst/1, + + mk_fp_unop/2, + %% is_fp_unop/1, + fp_unop_arg/1, + fp_unop_op/1, + + mk_fp_binop/3, + %% is_fp_binop/1, + fp_binop_src/1, + fp_binop_dst/1, + fp_binop_op/1, + + mk_imul/3, + imul_imm_opt/1, + imul_src/1, + imul_temp/1, + + mk_jcc/2, + %% is_jcc/1, + jcc_cc/1, + jcc_label/1, + + mk_jmp_fun/2, + %% is_jmp_fun/1, + jmp_fun_fun/1, + jmp_fun_linkage/1, + + mk_jmp_label/1, + %% is_jmp_label/1, + jmp_label_label/1, + + mk_jmp_switch/3, + %% is_jmp_switch/1, + jmp_switch_temp/1, + jmp_switch_jtab/1, + %% jmp_switch_labels/1, + + mk_label/1, + is_label/1, + label_label/1, + + mk_lea/2, + %% is_lea/1, + lea_mem/1, + lea_temp/1, + + mk_move/2, + is_move/1, + move_src/1, + move_dst/1, + mk_move64/2, + %% is_move64/1, + move64_src/1, + move64_dst/1, + + mk_movsx/2, + %% is_movsx/1, + movsx_src/1, + movsx_dst/1, + + mk_movzx/2, + %% is_movzx/1, + movzx_src/1, + movzx_dst/1, + + mk_pseudo_call/4, + %% is_pseudo_call/1, + pseudo_call_fun/1, + pseudo_call_sdesc/1, + pseudo_call_contlab/1, + pseudo_call_linkage/1, + + mk_pseudo_jcc/4, + %% is_pseudo_jcc/1, + %% pseudo_jcc_cc/1, + %% pseudo_jcc_true_label/1, + %% pseudo_jcc_false_label/1, + %% pseudo_jcc_pred/1, + + mk_pseudo_spill/1, + + mk_pseudo_tailcall/4, + %% is_pseudo_tailcall/1, + pseudo_tailcall_fun/1, + %% pseudo_tailcall_arity/1, + pseudo_tailcall_stkargs/1, + pseudo_tailcall_linkage/1, + + mk_pseudo_tailcall_prepare/0, + %% is_pseudo_tailcall_prepare/1, + + mk_push/1, + %% is_push/1, + push_src/1, + + %% mk_pop/1, + pop_dst/1, + + mk_ret/1, + %% is_ret/1, + ret_npop/1, + + mk_shift/3, + %% is_shift/1, + shift_op/1, + shift_src/1, + shift_dst/1, + + %% mk_test/2, + test_src/1, + test_dst/1, + + mk_defun/8, + defun_mfa/1, + defun_formals/1, + defun_is_closure/1, + defun_is_leaf/1, + defun_code/1, + defun_data/1, + defun_var_range/1 + %% defun_label_range/1, + + %% highest_temp/1 + ]). + +%%% +%%% Low-level accessors. +%%% + +mk_temp(Reg, Type) when is_integer(Reg) -> + #x86_temp{reg=Reg, type=Type, allocatable=true}. +mk_nonallocatable_temp(Reg, Type) when is_integer(Reg) -> + #x86_temp{reg=Reg, type=Type, allocatable=false}. +mk_new_temp(Type) -> + mk_temp(hipe_gensym:get_next_var(x86), Type). +mk_new_nonallocatable_temp(Type) -> + mk_nonallocatable_temp(hipe_gensym:get_next_var(x86), Type). +is_temp(X) -> case X of #x86_temp{} -> true; _ -> false end. +temp_reg(#x86_temp{reg=Reg}) when is_integer(Reg) -> Reg. +temp_type(#x86_temp{type=Type}) -> Type. +temp_is_allocatable(#x86_temp{allocatable=A}) -> A. + +mk_imm(Value) -> #x86_imm{value=Value}. +mk_imm_from_addr(Addr, Type) -> + mk_imm({Addr, Type}). +mk_imm_from_atom(Atom) -> + mk_imm(Atom). +is_imm(X) -> case X of #x86_imm{} -> true; _ -> false end. +%% imm_value(#x86_imm{value=Value}) -> Value. + +mk_mem(Base, Off, Type) -> #x86_mem{base=Base, off=Off, type=Type}. +%% is_mem(X) -> case X of #x86_mem{} -> true; _ -> false end. +%% mem_base(#x86_mem{base=Base}) -> Base. +%% mem_off(#x86_mem{off=Off}) -> Off. +mem_type(#x86_mem{type=Type}) -> Type. + +mk_fpreg(Reg) -> #x86_fpreg{reg=Reg, pseudo=true}. +mk_fpreg(Reg, Pseudo) -> #x86_fpreg{reg=Reg, pseudo=Pseudo}. +%% is_fpreg(F) -> case F of #x86_fpreg{} -> true;_ -> false end. +%% fpreg_is_pseudo(#x86_fpreg{pseudo=Pseudo}) -> Pseudo. +%% fpreg_reg(#x86_fpreg{reg=Reg}) -> Reg. + +mk_mfa(M, F, A) -> #x86_mfa{m=M, f=F, a=A}. +%% is_mfa(X) -> case X of #x86_mfa{} -> true; _ -> false end. + +mk_prim(Prim) -> #x86_prim{prim=Prim}. +is_prim(X) -> case X of #x86_prim{} -> true; _ -> false end. +prim_prim(#x86_prim{prim=Prim}) -> Prim. + +mk_sdesc(ExnLab, FSize, Arity, Live) -> + #x86_sdesc{exnlab=ExnLab, fsize=FSize, arity=Arity, live=Live}. + +insn_type(Insn) -> + element(1, Insn). + +is_insn_type(Insn, Type) -> + case insn_type(Insn) of + Type -> true; + _ -> false + end. + +mk_alu(Op, Src, Dst) -> #alu{aluop=Op, src=Src, dst=Dst}. +%% is_alu(Insn) -> is_insn_type(Insn, alu). +alu_op(#alu{aluop=Op}) -> Op. +alu_src(#alu{src=Src}) -> Src. +alu_dst(#alu{dst=Dst}) -> Dst. + +mk_call(Fun, SDesc, Linkage) -> + check_linkage(Linkage), + #call{'fun'=Fun, sdesc=SDesc, linkage=Linkage}. +%% is_call(Insn) -> is_insn_type(Insn, call). +call_fun(#call{'fun'=Fun}) -> Fun. +call_sdesc(#call{sdesc=SDesc}) -> SDesc. +call_linkage(#call{linkage=Linkage}) -> Linkage. + +check_linkage(Linkage) -> + case Linkage of + remote -> []; + not_remote -> [] + end. + +%% mk_cmovcc(Cc, Src, Dst) -> #cmovcc{cc=Cc, src=Src, dst=Dst}. +%% is_cmovcc(Insn) -> is_insn_type(Insn, cmovcc). +cmovcc_cc(#cmovcc{cc=Cc}) -> Cc. +cmovcc_src(#cmovcc{src=Src}) -> Src. +cmovcc_dst(#cmovcc{dst=Dst}) -> Dst. + +mk_cmp(Src, Dst) -> #cmp{src=Src, dst=Dst}. +%% is_cmp(Insn) -> is_insn_type(Insn, cmp). +cmp_src(#cmp{src=Src}) -> Src. +cmp_dst(#cmp{dst=Dst}) -> Dst. + +%% mk_test(Src, Dst) -> #test{src=Src, dst=Dst}. +test_src(#test{src=Src}) -> Src. +test_dst(#test{dst=Dst}) -> Dst. + +mk_comment(Term) -> #comment{term=Term}. +%% is_comment(Insn) -> is_insn_type(Insn, comment). +%% comment_term(#comment{term=Term}) -> Term. + +mk_fmove(Src, Dst) -> #fmove{src=Src, dst=Dst}. +is_fmove(F) -> is_insn_type(F, fmove). +fmove_src(#fmove{src=Src}) -> Src. +fmove_dst(#fmove{dst=Dst}) -> Dst. + +mk_fp_unop(Op, Arg) -> #fp_unop{op=Op, arg=Arg}. +%% is_fp_unop(F) -> is_insn_type(F, fp_unop). +fp_unop_arg(#fp_unop{arg=Arg}) -> Arg. +fp_unop_op(#fp_unop{op=Op}) -> Op. + +mk_fp_binop(Op, Src, Dst) -> #fp_binop{op=Op, src=Src, dst=Dst}. +%% is_fp_binop(F) -> is_insn_type(F, fp_binop). +fp_binop_src(#fp_binop{src=Src}) -> Src. +fp_binop_dst(#fp_binop{dst=Dst}) -> Dst. +fp_binop_op(#fp_binop{op=Op}) -> Op. + +mk_imul(ImmOpt, Src, Temp) -> #imul{imm_opt=ImmOpt, src=Src, temp=Temp}. +imul_imm_opt(#imul{imm_opt=ImmOpt}) -> ImmOpt. +imul_src(#imul{src=Src}) -> Src. +imul_temp(#imul{temp=Temp}) -> Temp. + +mk_jcc(Cc, Label) -> #jcc{cc=Cc, label=Label}. +%% is_jcc(Insn) -> is_insn_type(Insn, jcc). +jcc_cc(#jcc{cc=Cc}) -> Cc. +jcc_label(#jcc{label=Label}) -> Label. + +mk_jmp_fun(Fun, Linkage) -> + check_linkage(Linkage), + #jmp_fun{'fun'=Fun, linkage=Linkage}. +%% is_jmp_fun(Insn) -> is_insn_type(Insn, jmp_fun). +jmp_fun_fun(#jmp_fun{'fun'=Fun}) -> Fun. +jmp_fun_linkage(#jmp_fun{linkage=Linkage}) -> Linkage. + +mk_jmp_label(Label) -> #jmp_label{label=Label}. +%% is_jmp_label(Insn) -> is_insn_type(Insn, jmp_label). +jmp_label_label(#jmp_label{label=Label}) -> Label. + +mk_jmp_switch(Temp, JTab, Labels) -> + #jmp_switch{temp=Temp, jtab=JTab, labels=Labels}. +%% is_jmp_switch(Insn) -> is_insn_type(Insn, jmp_switch). +jmp_switch_temp(#jmp_switch{temp=Temp}) -> Temp. +jmp_switch_jtab(#jmp_switch{jtab=JTab}) -> JTab. +%% jmp_switch_labels(#jmp_switch{labels=Labels}) -> Labels. + +mk_label(Label) -> #label{label=Label}. +is_label(Insn) -> is_insn_type(Insn, label). +label_label(#label{label=Label}) -> Label. + +mk_lea(Mem, Temp) -> #lea{mem=Mem, temp=Temp}. +%% is_lea(Insn) -> is_insn_type(Insn, lea). +lea_mem(#lea{mem=Mem}) -> Mem. +lea_temp(#lea{temp=Temp}) -> Temp. + +mk_move(Src, Dst) -> #move{src=Src, dst=Dst}. +is_move(Insn) -> is_insn_type(Insn, move). +move_src(#move{src=Src}) -> Src. +move_dst(#move{dst=Dst}) -> Dst. + +mk_move64(Imm, Dst) -> #move64{imm=Imm, dst=Dst}. +%% is_move64(Insn) -> is_insn_type(Insn, move64). +move64_src(#move64{imm=Imm}) -> Imm. +move64_dst(#move64{dst=Dst}) -> Dst. + +mk_movsx(Src, Dst) -> #movsx{src=Src, dst=Dst}. +%% is_movsx(Insn) -> is_insn_type(Insn, movsx). +movsx_src(#movsx{src=Src}) -> Src. +movsx_dst(#movsx{dst=Dst}) -> Dst. + +mk_movzx(Src, Dst) -> #movzx{src=Src, dst=Dst}. +%% is_movzx(Insn) -> is_insn_type(Insn, movzx). +movzx_src(#movzx{src=Src}) -> Src. +movzx_dst(#movzx{dst=Dst}) -> Dst. + +mk_pseudo_call(Fun, SDesc, ContLab, Linkage) -> + check_linkage(Linkage), + #pseudo_call{'fun'=Fun, sdesc=SDesc, contlab=ContLab, linkage=Linkage}. +%% is_pseudo_call(Insn) -> is_insn_type(Insn, pseudo_call). +pseudo_call_fun(#pseudo_call{'fun'=Fun}) -> Fun. +pseudo_call_sdesc(#pseudo_call{sdesc=SDesc}) -> SDesc. +pseudo_call_contlab(#pseudo_call{contlab=ContLab}) -> ContLab. +pseudo_call_linkage(#pseudo_call{linkage=Linkage}) -> Linkage. + +mk_pseudo_jcc(Cc, TrueLabel, FalseLabel, Pred) -> % 'smart' constructor + if Pred >= 0.5 -> + mk_pseudo_jcc_simple(neg_cc(Cc), FalseLabel, TrueLabel, 1.0-Pred); + true -> + mk_pseudo_jcc_simple(Cc, TrueLabel, FalseLabel, Pred) + end. +neg_cc(Cc) -> + case Cc of + 'e' -> 'ne'; % ==, != + 'ne' -> 'e'; % !=, == + 'g' -> 'le'; % >, <= + 'a' -> 'be'; % >u, <=u + 'ge' -> 'l'; % >=, < + 'ae' -> 'b'; % >=u, 'ge'; % <, >= + 'b' -> 'ae'; % =u + 'le' -> 'g'; % <=, > + 'be' -> 'a'; % <=u, >u + 'o' -> 'no'; % overflow, not_overflow + 'no' -> 'o'; % not_overflow, overflow + _ -> exit({?MODULE, {"unknown cc", Cc}}) + end. +mk_pseudo_jcc_simple(Cc, TrueLabel, FalseLabel, Pred) -> + #pseudo_jcc{cc=Cc, true_label=TrueLabel, false_label=FalseLabel, pred=Pred}. +%% is_pseudo_jcc(Insn) -> is_insn_type(Insn, pseudo_jcc). +%% pseudo_jcc_cc(#pseudo_jcc{cc=Cc}) -> Cc. +%% pseudo_jcc_true_label(#pseudo_jcc{true_label=TrueLabel}) -> TrueLabel. +%% pseudo_jcc_false_label(#pseudo_jcc{false_label=FalseLabel}) -> FalseLabel. +%% pseudo_jcc_pred(#pseudo_jcc{pred=Pred}) -> Pred. + +mk_pseudo_spill(List) -> + #pseudo_spill{args=List}. + +mk_pseudo_tailcall(Fun, Arity, StkArgs, Linkage) -> + check_linkage(Linkage), + #pseudo_tailcall{'fun'=Fun, arity=Arity, stkargs=StkArgs, linkage=Linkage}. +%% is_pseudo_tailcall(Insn) -> is_insn_type(Insn, pseudo_tailcall). +pseudo_tailcall_fun(#pseudo_tailcall{'fun'=Fun}) -> Fun. +%% pseudo_tailcall_arity(#pseudo_tailcall{arity=Arity}) -> Arity. +pseudo_tailcall_stkargs(#pseudo_tailcall{stkargs=StkArgs}) -> StkArgs. +pseudo_tailcall_linkage(#pseudo_tailcall{linkage=Linkage}) -> Linkage. + +mk_pseudo_tailcall_prepare() -> #pseudo_tailcall_prepare{}. +%% is_pseudo_tailcall_prepare(Insn) -> is_insn_type(Insn, pseudo_tailcall_prepare). + +mk_push(Src) -> #push{src=Src}. +%% is_push(Insn) -> is_insn_type(Insn, push). +push_src(#push{src=Src}) -> Src. + +%% mk_pop(Dst) -> #pop{dst=Dst}. +%% is_push(Insn) -> is_insn_type(Insn, push). +pop_dst(#pop{dst=Dst}) -> Dst. + +mk_ret(NPop) -> #ret{npop=NPop}. +%% is_ret(Insn) -> is_insn_type(Insn, ret). +ret_npop(#ret{npop=NPop}) -> NPop. + +mk_shift(ShiftOp, Src, Dst) -> + #shift{shiftop=ShiftOp, src=Src, dst=Dst}. +%% is_shift(Insn) -> is_insn_type(Insn, shift). +shift_op(#shift{shiftop=ShiftOp}) -> ShiftOp. +shift_src(#shift{src=Src}) -> Src. +shift_dst(#shift{dst=Dst}) -> Dst. + +mk_defun(MFA, Formals, IsClosure, IsLeaf, Code, Data, VarRange, LabelRange) -> + #defun{mfa=MFA, formals=Formals, code=Code, data=Data, + isclosure=IsClosure, isleaf=IsLeaf, + var_range=VarRange, label_range=LabelRange}. +defun_mfa(#defun{mfa=MFA}) -> MFA. +defun_formals(#defun{formals=Formals}) -> Formals. +defun_is_closure(#defun{isclosure=IsClosure}) -> IsClosure. +defun_is_leaf(#defun{isleaf=IsLeaf}) -> IsLeaf. +defun_code(#defun{code=Code}) -> Code. +defun_data(#defun{data=Data}) -> Data. +defun_var_range(#defun{var_range=VarRange}) -> VarRange. +%% defun_label_range(#defun{label_range=LabelRange}) -> LabelRange. + +%% highest_temp(Code) -> +%% highest_temp(Code,0). +%% +%% highest_temp([I|Is],Max) -> +%% Defs = hipe_x86_defuse:insn_def(I), +%% Uses = hipe_x86_defuse:insn_use(I), +%% highest_temp(Is,new_max(Defs++Uses,Max)); +%% highest_temp([],Max) -> +%% Max. +%% +%% new_max([V|Vs],Max) -> +%% case is_temp(V) of +%% true -> +%% TReg = temp_reg(V), +%% if TReg > Max -> +%% new_max(Vs, TReg); +%% true -> +%% new_max(Vs, Max) +%% end; +%% false -> +%% new_max(Vs, Max) +%% end; +%% new_max([],Max) -> Max. diff --git a/lib/hipe/x86/hipe_x86.hrl b/lib/hipe/x86/hipe_x86.hrl new file mode 100644 index 0000000000..3d22fb381f --- /dev/null +++ b/lib/hipe/x86/hipe_x86.hrl @@ -0,0 +1,116 @@ +%%% +%%% %CopyrightBegin% +%%% +%%% Copyright Ericsson AB 2001-2009. All Rights Reserved. +%%% +%%% The contents of this file are subject to the Erlang Public License, +%%% Version 1.1, (the "License"); you may not use this file except in +%%% compliance with the License. You should have received a copy of the +%%% Erlang Public License along with this software. If not, it can be +%%% retrieved online at http://www.erlang.org/. +%%% +%%% Software distributed under the License is distributed on an "AS IS" +%%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%%% the License for the specific language governing rights and limitations +%%% under the License. +%%% +%%% %CopyrightEnd% +%%% +%%% concrete representation of 2-address pseudo-x86 code + +%%%-------------------------------------------------------------------- +%%% x86 operands: +%%% +%%% int32 ::= +%%% reg ::= +%%% type ::= 'tagged' | 'untagged' +%%% label ::= +%%% label_type ::= 'label' | 'constant' +%%% aluop ::= +%%% term ::= +%%% cc ::= +%%% pred ::= +%%% npop ::= +%%% +%%% temp ::= {x86_temp, reg, type, allocatable} +%%% allocatable ::= 'true' | 'false' +%%% +%%% imm ::= {x86_imm, value} +%%% value ::= int32 | atom | {label, label_type} +%%% +%%% mem ::= {x86_mem, base, off, mem_type} +%%% base ::= temp | [] (XXX BUG: not quite true before RA) +%%% off ::= imm | temp +%%% mem_type ::= 'byte' | 'int16' (only valid with mov{s,z}x) +%%% | type +%%% +%%% src ::= temp | mem | imm +%%% dst ::= temp | mem +%%% arg ::= src +%%% args ::= +%%% +%%% mfa ::= {x86_mfa, atom, atom, byte} +%%% prim ::= {x86_prim, atom} +%%% fun ::= mfa | prim | temp | mem +%%% +%%% jtab ::= label (equiv. to {x86_imm,{label,'constant'}}) +%%% +%%% sdesc ::= {x86_sdesc, exnlab, fsize, arity, live} +%%% exnlab ::= [] | label +%%% fsize ::= (frame size in words) +%%% live ::= (word offsets) +%%% arity ::= int32 + +-record(x86_temp, {reg, type, allocatable}). +-record(x86_imm, {value}). +-record(x86_mem, {base, off, type}). +-record(x86_fpreg, {reg, pseudo}). +-record(x86_mfa, {m::atom(), f::atom(), a::arity()}). +-record(x86_prim, {prim}). +-record(x86_sdesc, {exnlab, fsize, arity::arity(), live::tuple()}). + +%%% Basic instructions. +%%% These follow the AT&T convention, i.e. op src,dst (dst := dst op src) +%%% After register allocation, at most one operand in a binary +%%% instruction (alu, cmp, move) may denote a memory cell. +%%% After frame allocation, every temp must denote a physical register. + +-record(alu, {aluop, src, dst}). +-record(call, {'fun', sdesc, linkage}). +-record(cmovcc, {cc, src, dst}). +-record(cmp, {src, dst}). % a 'sub' alu which doesn't update dst +-record(comment, {term}). +-record(fmove, {src, dst}). +-record(fp_binop, {op, src, dst}). +-record(fp_unop, {op, arg}). % arg may be [] :-( +-record(imul, {imm_opt, src, temp}). % imm_opt:[]|imm, src:temp|mem +-record(jcc, {cc, label}). +-record(jmp_fun, {'fun', linkage}). % tailcall, direct or indirect +-record(jmp_label, {label}). % local jmp, direct +-record(jmp_switch, {temp, jtab, labels}). % local jmp, indirect +-record(label, {label}). +-record(lea, {mem, temp}). +-record(move, {src, dst}). +-record(move64, {imm, dst}). +-record(movsx, {src, dst}). +-record(movzx, {src, dst}). +-record(pseudo_call, {'fun', sdesc, contlab, linkage}). +-record(pseudo_jcc, {cc, true_label, false_label, pred}). +-record(pseudo_spill, {args=[]}). +-record(pseudo_tailcall, {'fun', arity, stkargs, linkage}). +-record(pseudo_tailcall_prepare, {}). +-record(push, {src}). +-record(pop, {dst}). +-record(ret, {npop}). % EAX is live-in +-record(shift, {shiftop, src, dst}). +-record(test, {src, dst}). + +%%% Function definitions. + +-include("../misc/hipe_consttab.hrl"). + +-record(defun, {mfa :: mfa(), formals, code, + data :: hipe_consttab(), + isclosure :: boolean(), + isleaf :: boolean(), + var_range, label_range}). diff --git a/lib/hipe/x86/hipe_x86_assemble.erl b/lib/hipe/x86/hipe_x86_assemble.erl new file mode 100644 index 0000000000..4e65736db3 --- /dev/null +++ b/lib/hipe/x86/hipe_x86_assemble.erl @@ -0,0 +1,1014 @@ +%%% -*- erlang-indent-level: 2 -*- +%%% +%%% %CopyrightBegin% +%%% +%%% Copyright Ericsson AB 2001-2009. All Rights Reserved. +%%% +%%% The contents of this file are subject to the Erlang Public License, +%%% Version 1.1, (the "License"); you may not use this file except in +%%% compliance with the License. You should have received a copy of the +%%% Erlang Public License along with this software. If not, it can be +%%% retrieved online at http://www.erlang.org/. +%%% +%%% Software distributed under the License is distributed on an "AS IS" +%%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%%% the License for the specific language governing rights and limitations +%%% under the License. +%%% +%%% %CopyrightEnd% +%%% +%%% HiPE/x86 assembler +%%% +%%% TODO: +%%% - Simplify combine_label_maps and mk_data_relocs. +%%% - Move find_const to hipe_pack_constants? + +-ifdef(HIPE_AMD64). +-define(HIPE_X86_ASSEMBLE, hipe_amd64_assemble). +-define(HIPE_X86_ENCODE, hipe_amd64_encode). +-define(HIPE_X86_REGISTERS, hipe_amd64_registers). +-define(HIPE_X86_PP, hipe_amd64_pp). +-ifdef(AMD64_SIMULATE_NSP). +-define(X86_SIMULATE_NSP, ?AMD64_SIMULATE_NSP). +-endif. +-define(EAX, rax). +-define(REGArch, reg64). +-define(RMArch, rm64). +-define(EA_DISP32_ABSOLUTE, ea_disp32_sindex). +-else. +-define(HIPE_X86_ASSEMBLE, hipe_x86_assemble). +-define(HIPE_X86_ENCODE, hipe_x86_encode). +-define(HIPE_X86_REGISTERS, hipe_x86_registers). +-define(HIPE_X86_PP, hipe_x86_pp). +-define(EAX, eax). +-define(REGArch, reg32). +-define(RMArch, rm32). +-define(EA_DISP32_ABSOLUTE, ea_disp32). +-endif. + +-module(?HIPE_X86_ASSEMBLE). +-export([assemble/4]). + +-define(DEBUG,true). + +-include("../main/hipe.hrl"). +-include("../x86/hipe_x86.hrl"). +-include("../../kernel/src/hipe_ext_format.hrl"). +-include("../rtl/hipe_literals.hrl"). +-include("../misc/hipe_sdi.hrl"). +-undef(ASSERT). +-define(ASSERT(G), if G -> [] ; true -> exit({assertion_failed,?MODULE,?LINE,??G}) end). + +assemble(CompiledCode, Closures, Exports, Options) -> + ?when_option(time, Options, ?start_timer("x86 assembler")), + print("****************** Assembling *******************\n", [], Options), + %% + Code = [{MFA, + hipe_x86:defun_code(Defun), + hipe_x86:defun_data(Defun)} + || {MFA, Defun} <- CompiledCode], + %% + {ConstAlign,ConstSize,ConstMap,RefsFromConsts} = + hipe_pack_constants:pack_constants(Code, ?HIPE_X86_REGISTERS:alignment()), + %% + {CodeSize,CodeBinary,AccRefs,LabelMap,ExportMap} = + encode(translate(Code, ConstMap, Options), Options), + print("Total num bytes=~w\n", [CodeSize], Options), + %% put(code_size, CodeSize), + %% put(const_size, ConstSize), + %% ?when_option(verbose, Options, + %% ?debug_msg("Constants are ~w bytes\n",[ConstSize])), + %% + SC = hipe_pack_constants:slim_constmap(ConstMap), + DataRelocs = mk_data_relocs(RefsFromConsts, LabelMap), + SSE = slim_sorted_exportmap(ExportMap,Closures,Exports), + SlimRefs = hipe_pack_constants:slim_refs(AccRefs), + Bin = term_to_binary([{?VERSION_STRING(),?HIPE_SYSTEM_CRC}, + ConstAlign, ConstSize, + SC, + DataRelocs, % nee LM, LabelMap + SSE, + CodeSize,CodeBinary,SlimRefs, + 0,[] % ColdCodeSize, SlimColdRefs + ]), + %% + %% ?when_option(time, Options, ?stop_timer("x86 assembler")), + Bin. + +%%% +%%% Assembly Pass 1. +%%% Process initial {MFA,Code,Data} list. +%%% Translate each MFA's body, choosing operand & instruction kinds. +%%% +%%% Assembly Pass 2. +%%% Perform short/long form optimisation for jumps. +%%% Build LabelMap for each MFA. +%%% +%%% Result is {MFA,NewCode,CodeSize,LabelMap} list. +%%% + +translate(Code, ConstMap, Options) -> + translate_mfas(Code, ConstMap, [], Options). + +translate_mfas([{MFA,Insns,_Data}|Code], ConstMap, NewCode, Options) -> + {NewInsns,CodeSize,LabelMap} = + translate_insns(Insns, {MFA,ConstMap}, hipe_sdi:pass1_init(), 0, [], Options), + translate_mfas(Code, ConstMap, [{MFA,NewInsns,CodeSize,LabelMap}|NewCode], Options); +translate_mfas([], _ConstMap, NewCode, _Options) -> + lists:reverse(NewCode). + +translate_insns([I|Insns], Context, SdiPass1, Address, NewInsns, Options) -> + NewIs = translate_insn(I, Context, Options), + add_insns(NewIs, Insns, Context, SdiPass1, Address, NewInsns, Options); +translate_insns([], _Context, SdiPass1, Address, NewInsns, _Options) -> + {LabelMap,CodeSizeIncr} = hipe_sdi:pass2(SdiPass1), + {lists:reverse(NewInsns), Address+CodeSizeIncr, LabelMap}. + +add_insns([I|Is], Insns, Context, SdiPass1, Address, NewInsns, Options) -> + NewSdiPass1 = + case I of + {'.label',L,_} -> + hipe_sdi:pass1_add_label(SdiPass1, Address, L); + {jcc_sdi,{_,{label,L}},_} -> + SdiInfo = #sdi_info{incr=(6-2),lb=(-128)+2,ub=127+2}, + hipe_sdi:pass1_add_sdi(SdiPass1, Address, L, SdiInfo); + {jmp_sdi,{{label,L}},_} -> + SdiInfo = #sdi_info{incr=(5-2),lb=(-128)+2,ub=127+2}, + hipe_sdi:pass1_add_sdi(SdiPass1, Address, L, SdiInfo); + _ -> + SdiPass1 + end, + Address1 = Address + insn_size(I), + add_insns(Is, Insns, Context, NewSdiPass1, Address1, [I|NewInsns], Options); +add_insns([], Insns, Context, SdiPass1, Address, NewInsns, Options) -> + translate_insns(Insns, Context, SdiPass1, Address, NewInsns, Options). + +insn_size(I) -> + case I of + {'.label',_,_} -> 0; + {'.sdesc',_,_} -> 0; + {jcc_sdi,_,_} -> 2; + {jmp_sdi,_,_} -> 2; + {Op,Arg,_Orig} -> ?HIPE_X86_ENCODE:insn_sizeof(Op, Arg) + end. + +translate_insn(I, Context, Options) -> + case I of + #alu{} -> + Arg = resolve_alu_args(hipe_x86:alu_src(I), hipe_x86:alu_dst(I), Context), + [{hipe_x86:alu_op(I), Arg, I}]; + #call{} -> + translate_call(I); + #cmovcc{} -> + {Dst,Src} = resolve_move_args( + hipe_x86:cmovcc_src(I), hipe_x86:cmovcc_dst(I), + Context), + CC = {cc,?HIPE_X86_ENCODE:cc(hipe_x86:cmovcc_cc(I))}, + Arg = {CC,Dst,Src}, + [{cmovcc, Arg, I}]; + #cmp{} -> + Arg = resolve_alu_args(hipe_x86:cmp_src(I), hipe_x86:cmp_dst(I), Context), + [{cmp, Arg, I}]; + #comment{} -> + []; + #fmove{} -> + {Op,Arg} = resolve_sse2_fmove_args(hipe_x86:fmove_src(I), + hipe_x86:fmove_dst(I)), + [{Op, Arg, I}]; + #fp_binop{} -> + case proplists:get_bool(x87, Options) of + true -> % x87 + Arg = resolve_x87_binop_args(hipe_x86:fp_binop_src(I), + hipe_x86:fp_binop_dst(I)), + [{hipe_x86:fp_binop_op(I), Arg, I}]; + false -> % sse2 + Arg = resolve_sse2_binop_args(hipe_x86:fp_binop_src(I), + hipe_x86:fp_binop_dst(I)), + [{resolve_sse2_op(hipe_x86:fp_binop_op(I)), Arg, I}] + end; + #fp_unop{} -> + case proplists:get_bool(x87, Options) of + true -> % x87 + Arg = resolve_x87_unop_arg(hipe_x86:fp_unop_arg(I)), + [{hipe_x86:fp_unop_op(I), Arg, I}]; + false -> % sse2 + case hipe_x86:fp_unop_op(I) of + 'fchs' -> + Arg = resolve_sse2_fchs_arg(hipe_x86:fp_unop_arg(I)), + [{'xorpd', Arg, I}]; + 'fwait' -> % no op on sse2, magic on x87 + [] + end + end; + #imul{} -> + translate_imul(I, Context); + #jcc{} -> + Cc = {cc,?HIPE_X86_ENCODE:cc(hipe_x86:jcc_cc(I))}, + Label = translate_label(hipe_x86:jcc_label(I)), + [{jcc_sdi, {Cc,Label}, I}]; + #jmp_fun{} -> + %% call and jmp are patched the same, so no need to distinguish + %% call from tailcall + PatchTypeExt = + case hipe_x86:jmp_fun_linkage(I) of + remote -> ?CALL_REMOTE; + not_remote -> ?CALL_LOCAL + end, + Arg = translate_fun(hipe_x86:jmp_fun_fun(I), PatchTypeExt), + [{jmp, {Arg}, I}]; + #jmp_label{} -> + Arg = translate_label(hipe_x86:jmp_label_label(I)), + [{jmp_sdi, {Arg}, I}]; + #jmp_switch{} -> + RM32 = resolve_jmp_switch_arg(I, Context), + [{jmp, {RM32}, I}]; + #label{} -> + [{'.label', hipe_x86:label_label(I), I}]; + #lea{} -> + Arg = resolve_lea_args(hipe_x86:lea_mem(I), hipe_x86:lea_temp(I)), + [{lea, Arg, I}]; + #move{} -> + Arg = resolve_move_args(hipe_x86:move_src(I), hipe_x86:move_dst(I), + Context), + [{mov, Arg, I}]; + #move64{} -> + translate_move64(I, Context); + #movsx{} -> + Arg = resolve_movx_args(hipe_x86:movsx_src(I), hipe_x86:movsx_dst(I)), + [{movsx, Arg, I}]; + #movzx{} -> + Arg = resolve_movx_args(hipe_x86:movzx_src(I), hipe_x86:movzx_dst(I)), + [{movzx, Arg, I}]; + %% pseudo_call: eliminated before assembly + %% pseudo_jcc: eliminated before assembly + %% pseudo_tailcall: eliminated before assembly + %% pseudo_tailcall_prepare: eliminated before assembly + #pop{} -> + Arg = translate_dst(hipe_x86:pop_dst(I)), + [{pop, {Arg}, I}]; + #push{} -> + Arg = translate_src(hipe_x86:push_src(I), Context), + [{push, {Arg}, I}]; + #ret{} -> + translate_ret(I); + #shift{} -> + Arg = resolve_shift_args(hipe_x86:shift_src(I), hipe_x86:shift_dst(I), Context), + [{hipe_x86:shift_op(I), Arg, I}]; + #test{} -> + Arg = resolve_test_args(hipe_x86:test_src(I), hipe_x86:test_dst(I), Context), + [{test, Arg, I}] + end. + +-ifdef(X86_SIMULATE_NSP). +-ifdef(HIPE_AMD64). +translate_call(I) -> + WordSize = hipe_amd64_registers:wordsize(), + RegSP = 2#100, % esp/rsp + TempSP = hipe_x86:mk_temp(RegSP, untagged), + FunOrig = hipe_x86:call_fun(I), + Fun = + case FunOrig of + #x86_mem{base=#x86_temp{reg=4}, off=#x86_imm{value=Off}} -> + FunOrig#x86_mem{off=#x86_imm{value=Off+WordSize}}; + _ -> FunOrig + end, + RegRA = + begin + RegTemp0 = hipe_amd64_registers:temp0(), + RegTemp1 = hipe_amd64_registers:temp1(), + case Fun of + #x86_temp{reg=RegTemp0} -> RegTemp1; + #x86_mem{base=#x86_temp{reg=RegTemp0}} -> RegTemp1; + _ -> RegTemp0 + end + end, + TempRA = hipe_x86:mk_temp(RegRA, untagged), + PatchTypeExt = + case hipe_x86:call_linkage(I) of + remote -> ?CALL_REMOTE; + not_remote -> ?CALL_LOCAL + end, + JmpArg = translate_fun(Fun, PatchTypeExt), + I4 = {'.sdesc', hipe_x86:call_sdesc(I), #comment{term=sdesc}}, + I3 = {jmp, {JmpArg}, #comment{term=call}}, + Size3 = hipe_amd64_encode:insn_sizeof(jmp, {JmpArg}), + MovArgs = {mem_to_rmArch(hipe_x86:mk_mem(TempSP, + hipe_x86:mk_imm(0), + untagged)), + temp_to_regArch(TempRA)}, + I2 = {mov, MovArgs, #comment{term=call}}, + Size2 = hipe_amd64_encode:insn_sizeof(mov, MovArgs), + I1 = {lea, {temp_to_regArch(TempRA), + {ea, hipe_amd64_encode:ea_disp32_rip(Size2+Size3)}}, + #comment{term=call}}, + I0 = {sub, {temp_to_rmArch(TempSP), {imm8,WordSize}}, I}, + [I0,I1,I2,I3,I4]. +-else. +translate_call(I) -> + WordSize = ?HIPE_X86_REGISTERS:wordsize(), + RegSP = 2#100, % esp/rsp + TempSP = hipe_x86:mk_temp(RegSP, untagged), + FunOrig = hipe_x86:call_fun(I), + Fun = + case FunOrig of + #x86_mem{base=#x86_temp{reg=4}, off=#x86_imm{value=Off}} -> + FunOrig#x86_mem{off=#x86_imm{value=Off+WordSize}}; + _ -> FunOrig + end, + PatchTypeExt = + case hipe_x86:call_linkage(I) of + remote -> ?CALL_REMOTE; + not_remote -> ?CALL_LOCAL + end, + JmpArg = translate_fun(Fun, PatchTypeExt), + I3 = {'.sdesc', hipe_x86:call_sdesc(I), #comment{term=sdesc}}, + I2 = {jmp, {JmpArg}, #comment{term=call}}, + Size2 = ?HIPE_X86_ENCODE:insn_sizeof(jmp, {JmpArg}), + I1 = {mov, {mem_to_rmArch(hipe_x86:mk_mem(TempSP, + hipe_x86:mk_imm(0), + untagged)), + {imm32,{?X86ABSPCREL,4+Size2}}}, + #comment{term=call}}, + I0 = {sub, {temp_to_rmArch(TempSP), {imm8,WordSize}}, I}, + [I0,I1,I2,I3]. +-endif. + +translate_ret(I) -> + NPOP = hipe_x86:ret_npop(I) + ?HIPE_X86_REGISTERS:wordsize(), + RegSP = 2#100, % esp/rsp + TempSP = hipe_x86:mk_temp(RegSP, untagged), + RegRA = 2#011, % ebx/rbx + TempRA = hipe_x86:mk_temp(RegRA, untagged), + [{mov, + {temp_to_regArch(TempRA), + mem_to_rmArch(hipe_x86:mk_mem(TempSP, + hipe_x86:mk_imm(0), + untagged))}, + I}, + {add, + {temp_to_rmArch(TempSP), + case NPOP < 128 of + true -> {imm8,NPOP}; + false -> {imm32,NPOP} + end}, + #comment{term=ret}}, + {jmp, + {temp_to_rmArch(TempRA)}, + #comment{term=ret}}]. + +-else. % not X86_SIMULATE_NSP + +translate_call(I) -> + %% call and jmp are patched the same, so no need to distinguish + %% call from tailcall + PatchTypeExt = + case hipe_x86:call_linkage(I) of + remote -> ?CALL_REMOTE; + not_remote -> ?CALL_LOCAL + end, + Arg = translate_fun(hipe_x86:call_fun(I), PatchTypeExt), + SDesc = hipe_x86:call_sdesc(I), + [{call, {Arg}, I}, {'.sdesc', SDesc, #comment{term=sdesc}}]. + +translate_ret(I) -> + Arg = + case hipe_x86:ret_npop(I) of + 0 -> {}; + N -> {{imm16,N}} + end, + [{ret, Arg, I}]. + +-endif. % X86_SIMULATE_NSP + +translate_imul(I, Context) -> + Temp = temp_to_regArch(hipe_x86:imul_temp(I)), + Src = temp_or_mem_to_rmArch(hipe_x86:imul_src(I)), + Args = + case hipe_x86:imul_imm_opt(I) of + [] -> {Temp,Src}; + Imm -> {Temp,Src,translate_imm(Imm, Context, true)} + end, + [{'imul', Args, I}]. + +temp_or_mem_to_rmArch(Src) -> + case Src of + #x86_temp{} -> temp_to_rmArch(Src); + #x86_mem{} -> mem_to_rmArch(Src) + end. + +translate_label(Label) when is_integer(Label) -> + {label,Label}. % symbolic, since offset is not yet computable + +translate_fun(Arg, PatchTypeExt) -> + case Arg of + #x86_temp{} -> + temp_to_rmArch(Arg); + #x86_mem{} -> + mem_to_rmArch(Arg); + #x86_mfa{m=M,f=F,a=A} -> + {rel32,{PatchTypeExt,{M,F,A}}}; + #x86_prim{prim=Prim} -> + {rel32,{PatchTypeExt,Prim}} + end. + +translate_src(Src, Context) -> + case Src of + #x86_imm{} -> + translate_imm(Src, Context, true); + _ -> + translate_dst(Src) + end. + +%%% MayTrunc8 controls whether negative Imm8s should be truncated +%%% to 8 bits or not. Truncation should always be done, except when +%%% the caller will widen the Imm8 to an Imm32 or Imm64. +translate_imm(#x86_imm{value=Imm}, Context, MayTrunc8) -> + if is_atom(Imm) -> + {imm32,{?LOAD_ATOM,Imm}}; + is_integer(Imm) -> + case (Imm =< 127) and (Imm >= -128) of + true -> + Imm8 = + case MayTrunc8 of + true -> Imm band 16#FF; + false -> Imm + end, + {imm8,Imm8}; + false -> + {imm32,Imm} + end; + true -> + Val = + case Imm of + {Label,constant} -> + {MFA,ConstMap} = Context, + ConstNo = find_const({MFA,Label}, ConstMap), + {constant,ConstNo}; + {Label,closure} -> + {closure,Label}; + {Label,c_const} -> + {c_const,Label} + end, + {imm32,{?LOAD_ADDRESS,Val}} + end. + +translate_dst(Dst) -> + case Dst of + #x86_temp{} -> + temp_to_regArch(Dst); + #x86_mem{type='double'} -> + mem_to_rm64fp(Dst); + #x86_mem{} -> + mem_to_rmArch(Dst); + #x86_fpreg{} -> + fpreg_to_stack(Dst) + end. + +%%% +%%% Assembly Pass 3. +%%% Process final {MFA,Code,CodeSize,LabelMap} list from pass 2. +%%% Translate to a single binary code segment. +%%% Collect relocation patches. +%%% Build ExportMap (MFA-to-address mapping). +%%% Combine LabelMaps to a single one (for mk_data_relocs/2 compatibility). +%%% Return {CombinedCodeSize,BinaryCode,Relocs,CombinedLabelMap,ExportMap}. +%%% + +encode(Code, Options) -> + CodeSize = compute_code_size(Code, 0), + ExportMap = build_export_map(Code, 0, []), + {AccCode,Relocs} = encode_mfas(Code, 0, [], [], Options), + CodeBinary = list_to_binary(lists:reverse(AccCode)), + ?ASSERT(CodeSize =:= byte_size(CodeBinary)), + CombinedLabelMap = combine_label_maps(Code, 0, gb_trees:empty()), + {CodeSize,CodeBinary,Relocs,CombinedLabelMap,ExportMap}. + +nr_pad_bytes(Address) -> (4 - (Address rem 4)) rem 4. % XXX: 16 or 32 instead? + +align_entry(Address) -> Address + nr_pad_bytes(Address). + +compute_code_size([{_MFA,_Insns,CodeSize,_LabelMap}|Code], Size) -> + compute_code_size(Code, align_entry(Size+CodeSize)); +compute_code_size([], Size) -> Size. + +build_export_map([{{M,F,A},_Insns,CodeSize,_LabelMap}|Code], Address, ExportMap) -> + build_export_map(Code, align_entry(Address+CodeSize), [{Address,M,F,A}|ExportMap]); +build_export_map([], _Address, ExportMap) -> ExportMap. + +combine_label_maps([{MFA,_Insns,CodeSize,LabelMap}|Code], Address, CLM) -> + NewCLM = merge_label_map(gb_trees:to_list(LabelMap), MFA, Address, CLM), + combine_label_maps(Code, align_entry(Address+CodeSize), NewCLM); +combine_label_maps([], _Address, CLM) -> CLM. + +merge_label_map([{Label,Offset}|Rest], MFA, Address, CLM) -> + NewCLM = gb_trees:insert({MFA,Label}, Address+Offset, CLM), + merge_label_map(Rest, MFA, Address, NewCLM); +merge_label_map([], _MFA, _Address, CLM) -> CLM. + +encode_mfas([{MFA,Insns,CodeSize,LabelMap}|Code], Address, AccCode, Relocs, Options) -> + print("Generating code for:~w\n", [MFA], Options), + print("Offset | Opcode | Instruction\n", [], Options), + {Address1,Relocs1,AccCode1} = + encode_insns(Insns, Address, Address, LabelMap, Relocs, AccCode, Options), + ExpectedAddress = align_entry(Address + CodeSize), + ?ASSERT(Address1 =:= ExpectedAddress), + print("Finished.\n\n", [], Options), + encode_mfas(Code, Address1, AccCode1, Relocs1, Options); +encode_mfas([], _Address, AccCode, Relocs, _Options) -> + {AccCode, Relocs}. + +encode_insns([I|Insns], Address, FunAddress, LabelMap, Relocs, AccCode, Options) -> + case I of + {'.label',L,_} -> + LabelAddress = gb_trees:get(L, LabelMap) + FunAddress, + ?ASSERT(Address =:= LabelAddress), % sanity check + print_insn(Address, [], I, Options), + encode_insns(Insns, Address, FunAddress, LabelMap, Relocs, AccCode, Options); + {'.sdesc',SDesc,_} -> + #x86_sdesc{exnlab=ExnLab,fsize=FSize,arity=Arity,live=Live} = SDesc, + ExnRA = + case ExnLab of + [] -> []; % don't cons up a new one + ExnLab -> gb_trees:get(ExnLab, LabelMap) + FunAddress + end, + Reloc = {?SDESC, Address, + ?STACK_DESC(ExnRA, FSize, Arity, Live)}, + encode_insns(Insns, Address, FunAddress, LabelMap, [Reloc|Relocs], AccCode, Options); + _ -> + {Op,Arg,_} = fix_jumps(I, Address, FunAddress, LabelMap), + {Bytes, NewRelocs} = ?HIPE_X86_ENCODE:insn_encode(Op, Arg, Address), + print_insn(Address, Bytes, I, Options), + Segment = list_to_binary(Bytes), + Size = byte_size(Segment), + NewAccCode = [Segment|AccCode], + encode_insns(Insns, Address+Size, FunAddress, LabelMap, NewRelocs++Relocs, NewAccCode, Options) + end; +encode_insns([], Address, FunAddress, LabelMap, Relocs, AccCode, Options) -> + case nr_pad_bytes(Address) of + 0 -> + {Address,Relocs,AccCode}; + NrPadBytes -> % triggers at most once per function body + Padding = lists:duplicate(NrPadBytes, {nop,{},#comment{term=padding}}), + encode_insns(Padding, Address, FunAddress, LabelMap, Relocs, AccCode, Options) + end. + +fix_jumps(I, InsnAddress, FunAddress, LabelMap) -> + case I of + {jcc_sdi,{CC,{label,L}},OrigI} -> + LabelAddress = gb_trees:get(L, LabelMap) + FunAddress, + ShortOffset = LabelAddress - (InsnAddress + 2), + if is_integer(ShortOffset), ShortOffset >= -128, ShortOffset =< 127 -> + {jcc,{CC,{rel8,ShortOffset band 16#FF}},OrigI}; + true -> + LongOffset = LabelAddress - (InsnAddress + 6), + {jcc,{CC,{rel32,LongOffset}},OrigI} + end; + {jmp_sdi,{{label,L}},OrigI} -> + LabelAddress = gb_trees:get(L, LabelMap) + FunAddress, + ShortOffset = LabelAddress - (InsnAddress + 2), + if is_integer(ShortOffset), ShortOffset >= -128, ShortOffset =< 127 -> + {jmp,{{rel8,ShortOffset band 16#FF}},OrigI}; + true -> + LongOffset = LabelAddress - (InsnAddress + 5), + {jmp,{{rel32,LongOffset}},OrigI} + end; + _ -> I + end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +fpreg_to_stack(#x86_fpreg{reg=Reg}) -> + {fpst, Reg}. + +temp_to_regArch(#x86_temp{reg=Reg}) -> + {?REGArch, Reg}. + +-ifdef(HIPE_AMD64). +temp_to_reg64(#x86_temp{reg=Reg}) -> + {reg64, Reg}. +-endif. + +temp_to_reg32(#x86_temp{reg=Reg}) -> + {reg32, Reg}. +temp_to_reg16(#x86_temp{reg=Reg}) -> + {reg16, Reg}. +temp_to_reg8(#x86_temp{reg=Reg}) -> + {reg8, Reg}. + +temp_to_xmm(#x86_temp{reg=Reg}) -> + {xmm, Reg}. + +-ifdef(HIPE_AMD64). +temp_to_rm64(#x86_temp{reg=Reg}) -> + {rm64, hipe_amd64_encode:rm_reg(Reg)}. +-endif. + +temp_to_rmArch(#x86_temp{reg=Reg}) -> + {?RMArch, ?HIPE_X86_ENCODE:rm_reg(Reg)}. +temp_to_rm64fp(#x86_temp{reg=Reg}) -> + {rm64fp, ?HIPE_X86_ENCODE:rm_reg(Reg)}. + +mem_to_ea(Mem) -> + EA = mem_to_ea_common(Mem), + {ea, EA}. + +mem_to_rm32(Mem) -> + EA = mem_to_ea_common(Mem), + {rm32, ?HIPE_X86_ENCODE:rm_mem(EA)}. + +mem_to_rmArch(Mem) -> + EA = mem_to_ea_common(Mem), + {?RMArch, ?HIPE_X86_ENCODE:rm_mem(EA)}. + +mem_to_rm64fp(Mem) -> + EA = mem_to_ea_common(Mem), + {rm64fp, ?HIPE_X86_ENCODE:rm_mem(EA)}. + +%%%%%%%%%%%%%%%%% +mem_to_rm8(Mem) -> + EA = mem_to_ea_common(Mem), + {rm8, ?HIPE_X86_ENCODE:rm_mem(EA)}. + +mem_to_rm16(Mem) -> + EA = mem_to_ea_common(Mem), + {rm16, ?HIPE_X86_ENCODE:rm_mem(EA)}. +%%%%%%%%%%%%%%%%% + +mem_to_ea_common(#x86_mem{base=[], off=#x86_imm{value=Off}}) -> + ?HIPE_X86_ENCODE:?EA_DISP32_ABSOLUTE(Off); +mem_to_ea_common(#x86_mem{base=#x86_temp{reg=Base}, off=#x86_temp{reg=Index}}) -> + case Base band 2#111 of + 5 -> % ebp/rbp or r13 + case Index band 2#111 of + 5 -> % ebp/rbp or r13 + SINDEX = ?HIPE_X86_ENCODE:sindex(0, Index), + SIB = ?HIPE_X86_ENCODE:sib(Base, SINDEX), + ?HIPE_X86_ENCODE:ea_disp8_sib(0, SIB); + _ -> + SINDEX = ?HIPE_X86_ENCODE:sindex(0, Base), + SIB = ?HIPE_X86_ENCODE:sib(Index, SINDEX), + ?HIPE_X86_ENCODE:ea_sib(SIB) + end; + _ -> + SINDEX = ?HIPE_X86_ENCODE:sindex(0, Index), + SIB = ?HIPE_X86_ENCODE:sib(Base, SINDEX), + ?HIPE_X86_ENCODE:ea_sib(SIB) + end; +mem_to_ea_common(#x86_mem{base=#x86_temp{reg=Base}, off=#x86_imm{value=Off}}) -> + if + Off =:= 0 -> + case Base of + 4 -> %esp, use SIB w/o disp8 + SIB = ?HIPE_X86_ENCODE:sib(Base), + ?HIPE_X86_ENCODE:ea_sib(SIB); + 5 -> %ebp, use disp8 w/o SIB + ?HIPE_X86_ENCODE:ea_disp8_base(Off, Base); + 12 -> %r12, use SIB w/o disp8 + SIB = ?HIPE_X86_ENCODE:sib(Base), + ?HIPE_X86_ENCODE:ea_sib(SIB); + 13 -> %r13, use disp8 w/o SIB + ?HIPE_X86_ENCODE:ea_disp8_base(Off, Base); + _ -> %neither SIB nor disp8 needed + ?HIPE_X86_ENCODE:ea_base(Base) + end; + Off >= -128, Off =< 127 -> + Disp8 = Off band 16#FF, + case Base of + 4 -> %esp, must use SIB + SIB = ?HIPE_X86_ENCODE:sib(Base), + ?HIPE_X86_ENCODE:ea_disp8_sib(Disp8, SIB); + 12 -> %r12, must use SIB + SIB = ?HIPE_X86_ENCODE:sib(Base), + ?HIPE_X86_ENCODE:ea_disp8_sib(Disp8, SIB); + _ -> %use disp8 w/o SIB + ?HIPE_X86_ENCODE:ea_disp8_base(Disp8, Base) + end; + true -> + case Base of + 4 -> %esp, must use SIB + SIB = ?HIPE_X86_ENCODE:sib(Base), + ?HIPE_X86_ENCODE:ea_disp32_sib(Off, SIB); + 12 -> %r12, must use SIB + SIB = ?HIPE_X86_ENCODE:sib(Base), + ?HIPE_X86_ENCODE:ea_disp32_sib(Off, SIB); + _ -> + ?HIPE_X86_ENCODE:ea_disp32_base(Off, Base) + end + end. + +%% jmp_switch +-ifdef(HIPE_AMD64). +resolve_jmp_switch_arg(I, _Context) -> + Base = hipe_x86:temp_reg(hipe_x86:jmp_switch_jtab(I)), + Index = hipe_x86:temp_reg(hipe_x86:jmp_switch_temp(I)), + SINDEX = hipe_amd64_encode:sindex(3, Index), + SIB = hipe_amd64_encode:sib(Base, SINDEX), + EA = + if (Base =:= 5) or (Base =:= 13) -> + hipe_amd64_encode:ea_disp8_sib(0, SIB); + true -> + hipe_amd64_encode:ea_sib(SIB) + end, + {rm64,hipe_amd64_encode:rm_mem(EA)}. +-else. +resolve_jmp_switch_arg(I, {MFA,ConstMap}) -> + ConstNo = find_const({MFA,hipe_x86:jmp_switch_jtab(I)}, ConstMap), + Disp32 = {?LOAD_ADDRESS,{constant,ConstNo}}, + SINDEX = ?HIPE_X86_ENCODE:sindex(2, hipe_x86:temp_reg(hipe_x86:jmp_switch_temp(I))), + EA = ?HIPE_X86_ENCODE:ea_disp32_sindex(Disp32, SINDEX), % this creates a SIB implicitly + {rm32,?HIPE_X86_ENCODE:rm_mem(EA)}. +-endif. + +%% lea reg, mem +resolve_lea_args(Src=#x86_mem{}, Dst=#x86_temp{}) -> + {temp_to_regArch(Dst),mem_to_ea(Src)}. + +resolve_sse2_op(Op) -> + case Op of + fadd -> addsd; + fdiv -> divsd; + fmul -> mulsd; + fsub -> subsd; + _ -> exit({?MODULE, unknown_sse2_operator, Op}) + end. + +%% OP xmm, mem +resolve_sse2_binop_args(Src=#x86_mem{type=double}, + Dst=#x86_temp{type=double}) -> + {temp_to_xmm(Dst),mem_to_rm64fp(Src)}; +%% movsd mem, xmm +resolve_sse2_binop_args(Src=#x86_temp{type=double}, + Dst=#x86_mem{type=double}) -> + {mem_to_rm64fp(Dst),temp_to_xmm(Src)}; +%% OP xmm, xmm +resolve_sse2_binop_args(Src=#x86_temp{type=double}, + Dst=#x86_temp{type=double}) -> + {temp_to_xmm(Dst),temp_to_rm64fp(Src)}. + +%%% fmove -> cvtsi2sd or movsd +resolve_sse2_fmove_args(Src, Dst) -> + case {Src,Dst} of + {#x86_temp{type=untagged}, #x86_temp{type=double}} -> % cvtsi2sd xmm, reg + {cvtsi2sd, {temp_to_xmm(Dst),temp_to_rmArch(Src)}}; + {#x86_mem{type=untagged}, #x86_temp{type=double}} -> % cvtsi2sd xmm, mem + {cvtsi2sd, {temp_to_xmm(Dst),mem_to_rmArch(Src)}}; + _ -> % movsd + {movsd, resolve_sse2_binop_args(Src, Dst)} + end. + +%%% xorpd xmm, mem +resolve_sse2_fchs_arg(Dst=#x86_temp{type=double}) -> + {temp_to_xmm(Dst), + {rm64fp, {rm_mem, ?HIPE_X86_ENCODE:?EA_DISP32_ABSOLUTE( + {?LOAD_ADDRESS, + {c_const, sse2_fnegate_mask}})}}}. + +%% mov mem, imm +resolve_move_args(#x86_imm{value=ImmSrc}, Dst=#x86_mem{type=Type}, Context) -> + case Type of % to support byte, int16 and int32 stores + byte -> + ByteImm = ImmSrc band 255, %to ensure that it is a bytesized imm + {mem_to_rm8(Dst),{imm8,ByteImm}}; + int16 -> + {mem_to_rm16(Dst),{imm16,ImmSrc band 16#FFFF}}; + int32 -> + {_,Imm} = translate_imm(#x86_imm{value=ImmSrc}, Context, false), + {mem_to_rm32(Dst),{imm32,Imm}}; + _ -> + RMArch = mem_to_rmArch(Dst), + {_,Imm} = translate_imm(#x86_imm{value=ImmSrc}, Context, false), + {RMArch,{imm32,Imm}} + end; + +%% mov reg,mem +resolve_move_args(Src=#x86_mem{type=Type}, Dst=#x86_temp{}, _Context) -> + case Type of + int32 -> % must be unsigned + {temp_to_reg32(Dst),mem_to_rm32(Src)}; + _ -> + {temp_to_regArch(Dst),mem_to_rmArch(Src)} + end; + +%% mov mem,reg +resolve_move_args(Src=#x86_temp{}, Dst=#x86_mem{type=Type}, _Context) -> + case Type of % to support byte, int16 and int32 stores + byte -> + {mem_to_rm8(Dst),temp_to_reg8(Src)}; + int16 -> + {mem_to_rm16(Dst),temp_to_reg16(Src)}; + int32 -> + {mem_to_rm32(Dst),temp_to_reg32(Src)}; + tagged -> % tagged, untagged + {mem_to_rmArch(Dst),temp_to_regArch(Src)}; + untagged -> % tagged, untagged + {mem_to_rmArch(Dst),temp_to_regArch(Src)} + end; + +%% mov reg,reg +resolve_move_args(Src=#x86_temp{}, Dst=#x86_temp{}, _Context) -> + {temp_to_regArch(Dst),temp_to_rmArch(Src)}; + +%% mov reg,imm +resolve_move_args(Src=#x86_imm{value=_ImmSrc}, Dst=#x86_temp{}, Context) -> + {_,Imm} = translate_imm(Src, Context, false), + imm_move_args(Dst, Imm). + +-ifdef(HIPE_AMD64). +imm_move_args(Dst, Imm) -> + if is_number(Imm), Imm >= 0 -> + {temp_to_reg32(Dst),{imm32,Imm}}; + true -> + {temp_to_rm64(Dst),{imm32,Imm}} + end. +-else. +imm_move_args(Dst, Imm) -> + {temp_to_reg32(Dst),{imm32,Imm}}. +-endif. + +-ifdef(HIPE_AMD64). +translate_move64(I, Context) -> + Arg = resolve_move64_args(hipe_x86:move64_src(I), + hipe_x86:move64_dst(I), + Context), + [{mov, Arg, I}]. + +%% mov reg,imm64 +resolve_move64_args(Src=#x86_imm{}, Dst=#x86_temp{}, Context) -> + {_,Imm} = translate_imm(Src, Context, false), + {temp_to_reg64(Dst),{imm64,Imm}}. +-else. +translate_move64(I, _Context) -> exit({?MODULE, I}). +-endif. + +%%% mov{s,z}x +resolve_movx_args(Src=#x86_mem{type=Type}, Dst=#x86_temp{}) -> + {temp_to_regArch(Dst), + case Type of + byte -> + mem_to_rm8(Src); + int16 -> + mem_to_rm16(Src); + int32 -> + mem_to_rm32(Src) + end}. + +%%% alu/cmp (_not_ test) +resolve_alu_args(Src, Dst, Context) -> + case {Src,Dst} of + {#x86_imm{}, #x86_mem{}} -> + {mem_to_rmArch(Dst), translate_imm(Src, Context, true)}; + {#x86_mem{}, #x86_temp{}} -> + {temp_to_regArch(Dst), mem_to_rmArch(Src)}; + {#x86_temp{}, #x86_mem{}} -> + {mem_to_rmArch(Dst), temp_to_regArch(Src)}; + {#x86_temp{}, #x86_temp{}} -> + {temp_to_regArch(Dst), temp_to_rmArch(Src)}; + {#x86_imm{}, #x86_temp{reg=0}} -> % eax,imm + NewSrc = translate_imm(Src, Context, true), + NewDst = + case NewSrc of + {imm8,_} -> temp_to_rmArch(Dst); + {imm32,_} -> ?EAX + end, + {NewDst, NewSrc}; + {#x86_imm{}, #x86_temp{}} -> + {temp_to_rmArch(Dst), translate_imm(Src, Context, true)} + end. + +%%% test +resolve_test_args(Src, Dst, Context) -> + case Src of + #x86_imm{} -> % imm8 not allowed + {_ImmSize,ImmValue} = translate_imm(Src, Context, false), + NewDst = + case Dst of + #x86_temp{reg=0} -> ?EAX; + #x86_temp{} -> temp_to_rmArch(Dst); + #x86_mem{} -> mem_to_rmArch(Dst) + end, + {NewDst, {imm32,ImmValue}}; + #x86_temp{} -> + NewDst = + case Dst of + #x86_temp{} -> temp_to_rmArch(Dst); + #x86_mem{} -> mem_to_rmArch(Dst) + end, + {NewDst, temp_to_regArch(Src)} + end. + +%%% shifts +resolve_shift_args(Src, Dst, Context) -> + RM32 = + case Dst of + #x86_temp{} -> temp_to_rmArch(Dst); + #x86_mem{} -> mem_to_rmArch(Dst) + end, + Count = + case Src of + #x86_imm{value=1} -> 1; + #x86_imm{} -> translate_imm(Src, Context, true); % must be imm8 + #x86_temp{reg=1} -> cl % temp must be ecx + end, + {RM32, Count}. + +%% x87_binop mem +resolve_x87_unop_arg(Arg=#x86_mem{type=Type})-> + case Type of + 'double' -> {mem_to_rm64fp(Arg)}; + 'untagged' -> {mem_to_rmArch(Arg)}; + _ -> ?EXIT({fmovArgNotSupported,{Arg}}) + end; +resolve_x87_unop_arg(Arg=#x86_fpreg{}) -> + {fpreg_to_stack(Arg)}; +resolve_x87_unop_arg([]) -> + []. + +%% x87_binop mem, st(i) +resolve_x87_binop_args(Src=#x86_fpreg{}, Dst=#x86_mem{})-> + {mem_to_rm64fp(Dst),fpreg_to_stack(Src)}; +%% x87_binop st(0), st(i) +resolve_x87_binop_args(Src=#x86_fpreg{}, Dst=#x86_fpreg{})-> + {fpreg_to_stack(Dst),fpreg_to_stack(Src)}. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +mk_data_relocs(RefsFromConsts, LabelMap) -> + lists:flatten(mk_data_relocs(RefsFromConsts, LabelMap, [])). + +mk_data_relocs([{MFA,Labels} | Rest], LabelMap, Acc) -> + Map = [case Label of + {L,Pos} -> + Offset = find({MFA,L}, LabelMap), + {Pos,Offset}; + {sorted,Base,OrderedLabels} -> + {sorted, Base, [begin + Offset = find({MFA,L}, LabelMap), + {Order, Offset} + end + || {L,Order} <- OrderedLabels]} + end + || Label <- Labels], + %% msg("Map: ~w Map\n",[Map]), + mk_data_relocs(Rest, LabelMap, [Map,Acc]); +mk_data_relocs([],_,Acc) -> Acc. + +find({MFA,L},LabelMap) -> + gb_trees:get({MFA,L}, LabelMap). + +slim_sorted_exportmap([{Addr,M,F,A}|Rest], Closures, Exports) -> + IsClosure = lists:member({M,F,A}, Closures), + IsExported = is_exported(F, A, Exports), + [Addr,M,F,A,IsClosure,IsExported | slim_sorted_exportmap(Rest, Closures, Exports)]; +slim_sorted_exportmap([],_,_) -> []. + +is_exported(F, A, Exports) -> lists:member({F,A}, Exports). + +%%% +%%% Assembly listing support (pp_asm option). +%%% + +print(String, Arglist, Options) -> + ?when_option(pp_asm, Options, io:format(String, Arglist)). + +print_insn(Address, Bytes, I, Options) -> + ?when_option(pp_asm, Options, print_insn_2(Address, Bytes, I)), + ?when_option(pp_cxmon, Options, print_code_list_2(Bytes)). + +print_code_list_2([H | Tail]) -> + print_byte(H), + io:format(","), + print_code_list_2(Tail); +print_code_list_2([]) -> + io:format(""). + +print_insn_2(Address, Bytes, {_,_,OrigI}) -> + io:format("~8.16b | ", [Address]), + print_code_list(Bytes, 0), + ?HIPE_X86_PP:pp_insn(OrigI). + +print_code_list([Byte|Rest], Len) -> + print_byte(Byte), + print_code_list(Rest, Len+1); +print_code_list([], Len) -> + fill_spaces(24-(Len*2)), + io:format(" | "). + +print_byte(Byte) -> + io:format("~2.16.0b", [Byte band 16#FF]). + +fill_spaces(N) when N > 0 -> + io:format(" "), + fill_spaces(N-1); +fill_spaces(0) -> + []. + +%%% +%%% Lookup a constant in a ConstMap. +%%% + +find_const({MFA,Label},[{pcm_entry,MFA,Label,ConstNo,_,_,_}|_]) -> + ConstNo; +find_const(N,[_|R]) -> + find_const(N,R); +find_const(C,[]) -> + ?EXIT({constant_not_found,C}). diff --git a/lib/hipe/x86/hipe_x86_cfg.erl b/lib/hipe/x86/hipe_x86_cfg.erl new file mode 100644 index 0000000000..d15dcc061a --- /dev/null +++ b/lib/hipe/x86/hipe_x86_cfg.erl @@ -0,0 +1,147 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2001-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +-module(hipe_x86_cfg). + +-export([init/1, + labels/1, start_label/1, + succ/2, pred/2, + bb/2, bb_add/3]). +-export([postorder/1, reverse_postorder/1]). +-export([linearise/1, params/1, arity/1, redirect_jmp/3]). + +%%% these tell cfg.inc what to define (ugly as hell) +-define(PRED_NEEDED,true). +-define(BREADTH_ORDER,true). +-define(PARAMS_NEEDED,true). +-define(START_LABEL_UPDATE_NEEDED,true). + +-include("hipe_x86.hrl"). +-include("../flow/cfg.hrl"). +-include("../flow/cfg.inc"). + +init(Defun) -> + %% XXX: this assumes that the code starts with a label insn. + %% Is that guaranteed? + Code = hipe_x86:defun_code(Defun), + StartLab = hipe_x86:label_label(hd(Code)), + Data = hipe_x86:defun_data(Defun), + IsClosure = hipe_x86:defun_is_closure(Defun), + MFA = hipe_x86:defun_mfa(Defun), + IsLeaf = hipe_x86:defun_is_leaf(Defun), + Formals = hipe_x86:defun_formals(Defun), + CFG0 = mk_empty_cfg(MFA, StartLab, Data, IsClosure, IsLeaf, Formals), + take_bbs(Code, CFG0). + +is_branch(I) -> + case I of + #jmp_fun{} -> true; + #jmp_label{} -> true; + #jmp_switch{} -> true; + #pseudo_call{} -> true; + #pseudo_jcc{} -> true; + #pseudo_tailcall{} -> true; + #ret{} -> true; + _ -> false + end. + +branch_successors(Branch) -> + case Branch of + #jmp_fun{} -> []; + #jmp_label{label=Label} -> [Label]; + #jmp_switch{labels=Labels} -> Labels; + #pseudo_call{contlab=ContLab, sdesc=#x86_sdesc{exnlab=ExnLab}} -> + case ExnLab of + [] -> [ContLab]; + _ -> [ContLab,ExnLab] + end; + #pseudo_jcc{true_label=TrueLab,false_label=FalseLab} -> [FalseLab,TrueLab]; + #pseudo_tailcall{} -> []; + #ret{} -> [] + end. + +-ifdef(REMOVE_TRIVIAL_BBS_NEEDED). +fails_to(_Instr) -> []. +-endif. + +redirect_jmp(I, Old, New) -> + case I of + #jmp_label{label=Label} -> + if Old =:= Label -> I#jmp_label{label=New}; + true -> I + end; + #pseudo_jcc{true_label=TrueLab, false_label=FalseLab} -> + J0 = if Old =:= TrueLab -> I#pseudo_jcc{true_label=New}; + true -> I + end, + if Old =:= FalseLab -> J0#pseudo_jcc{false_label=New}; + true -> J0 + end; + %% handle pseudo_call too? + _ -> I + end. + +%%% XXX: fix if labels can occur in operands +%% redirect_ops(_Labels, CFG, _Map) -> +%% CFG. + +mk_goto(Label) -> + hipe_x86:mk_jmp_label(Label). + +is_label(I) -> + hipe_x86:is_label(I). + +label_name(Label) -> + hipe_x86:label_label(Label). + +mk_label(Name) -> + hipe_x86:mk_label(Name). + +%% is_comment(I) -> +%% hipe_x86:is_comment(I). +%% +%% is_goto(I) -> +%% hipe_x86:is_jmp_label(I). + +linearise(CFG) -> % -> defun, not insn list + MFA = function(CFG), + Formals = params(CFG), + Code = linearize_cfg(CFG), + Data = data(CFG), + VarRange = hipe_gensym:var_range(x86), + LabelRange = hipe_gensym:label_range(x86), + IsClosure = is_closure(CFG), + IsLeaf = is_leaf(CFG), + hipe_x86:mk_defun(MFA, Formals, IsClosure, IsLeaf, + Code, Data, VarRange, LabelRange). + +arity(CFG) -> + {_M,_F,A} = function(CFG), + A. + +%% init_gensym(CFG) -> +%% HighestVar = find_highest_var(CFG), +%% HighestLabel = find_highest_label(CFG), +%% hipe_gensym:init(), +%% hipe_gensym:set_var(x86, HighestVar), +%% hipe_gensym:set_label(x86, HighestLabel). +%% +%% highest_var(Code) -> +%% hipe_x86:highest_temp(Code). diff --git a/lib/hipe/x86/hipe_x86_defuse.erl b/lib/hipe/x86/hipe_x86_defuse.erl new file mode 100644 index 0000000000..3387f77595 --- /dev/null +++ b/lib/hipe/x86/hipe_x86_defuse.erl @@ -0,0 +1,160 @@ +%%% -*- erlang-indent-level: 2 -*- +%%% +%%% %CopyrightBegin% +%%% +%%% Copyright Ericsson AB 2001-2009. All Rights Reserved. +%%% +%%% The contents of this file are subject to the Erlang Public License, +%%% Version 1.1, (the "License"); you may not use this file except in +%%% compliance with the License. You should have received a copy of the +%%% Erlang Public License along with this software. If not, it can be +%%% retrieved online at http://www.erlang.org/. +%%% +%%% Software distributed under the License is distributed on an "AS IS" +%%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%%% the License for the specific language governing rights and limitations +%%% under the License. +%%% +%%% %CopyrightEnd% +%%% +%%% compute def/use sets for x86 insns +%%% +%%% TODO: +%%% - represent EFLAGS (condition codes) use/def by a virtual reg? +%%% - should push use/def %esp? + +-ifdef(HIPE_AMD64). +-define(HIPE_X86_DEFUSE, hipe_amd64_defuse). +-define(HIPE_X86_REGISTERS, hipe_amd64_registers). +-define(RV, rax). +-else. +-define(HIPE_X86_DEFUSE, hipe_x86_defuse). +-define(HIPE_X86_REGISTERS, hipe_x86_registers). +-define(RV, eax). +-endif. + +-module(?HIPE_X86_DEFUSE). +-export([insn_def/1, insn_use/1]). %% src_use/1]). +-include("../x86/hipe_x86.hrl"). + +%%% +%%% insn_def(Insn) -- Return set of temps defined by an instruction. +%%% + +insn_def(I) -> + case I of + #alu{dst=Dst} -> dst_def(Dst); + #cmovcc{dst=Dst} -> dst_def(Dst); + #fmove{dst=Dst} -> dst_def(Dst); + #fp_binop{dst=Dst} -> dst_def(Dst); + #fp_unop{arg=Arg} -> dst_def(Arg); + #imul{temp=Temp} -> [Temp]; + #lea{temp=Temp} -> [Temp]; + #move{dst=Dst} -> dst_def(Dst); + #move64{dst=Dst} -> dst_def(Dst); + #movsx{dst=Dst} -> dst_def(Dst); + #movzx{dst=Dst} -> dst_def(Dst); + #pseudo_call{} -> call_clobbered(); + #pseudo_spill{} -> []; + #pseudo_tailcall_prepare{} -> tailcall_clobbered(); + #shift{dst=Dst} -> dst_def(Dst); + %% call, cmp, comment, jcc, jmp_fun, jmp_label, jmp_switch, label + %% pseudo_jcc, pseudo_tailcall, push, ret + _ -> [] + end. + +dst_def(Dst) -> + case Dst of + #x86_temp{} -> [Dst]; + #x86_fpreg{} -> [Dst]; + _ -> [] + end. + +call_clobbered() -> + [hipe_x86:mk_temp(R, T) + || {R,T} <- ?HIPE_X86_REGISTERS:call_clobbered()]. + +tailcall_clobbered() -> + [hipe_x86:mk_temp(R, T) + || {R,T} <- ?HIPE_X86_REGISTERS:tailcall_clobbered()]. + +%%% +%%% insn_use(Insn) -- Return set of temps used by an instruction. +%%% + +insn_use(I) -> + case I of + #alu{src=Src,dst=Dst} -> addtemp(Src, addtemp(Dst, [])); + #call{'fun'=Fun} -> addtemp(Fun, []); + #cmovcc{src=Src, dst=Dst} -> addtemp(Src, dst_use(Dst)); + #cmp{src=Src, dst=Dst} -> addtemp(Src, addtemp(Dst, [])); + #fmove{src=Src,dst=Dst} -> addtemp(Src, dst_use(Dst)); + #fp_unop{arg=Arg} -> addtemp(Arg, []); + #fp_binop{src=Src,dst=Dst} -> addtemp(Src, addtemp(Dst, [])); + #imul{imm_opt=ImmOpt,src=Src,temp=Temp} -> + addtemp(Src, case ImmOpt of [] -> addtemp(Temp, []); _ -> [] end); + #jmp_fun{'fun'=Fun} -> addtemp(Fun, []); + #jmp_switch{temp=Temp, jtab=JTab} -> addtemp(Temp, addtemp(JTab, [])); + #lea{mem=Mem} -> addtemp(Mem, []); + #move{src=Src,dst=Dst} -> addtemp(Src, dst_use(Dst)); + #move64{} -> []; + #movsx{src=Src,dst=Dst} -> addtemp(Src, dst_use(Dst)); + #movzx{src=Src,dst=Dst} -> addtemp(Src, dst_use(Dst)); + #pseudo_call{'fun'=Fun,sdesc=#x86_sdesc{arity=Arity}} -> + addtemp(Fun, arity_use(Arity)); + #pseudo_spill{args=Args} -> Args; + #pseudo_tailcall{'fun'=Fun,arity=Arity,stkargs=StkArgs} -> + addtemp(Fun, addtemps(StkArgs, addtemps(tailcall_clobbered(), + arity_use(Arity)))); + #push{src=Src} -> addtemp(Src, []); + #ret{} -> [hipe_x86:mk_temp(?HIPE_X86_REGISTERS:?RV(), 'tagged')]; + #shift{src=Src,dst=Dst} -> addtemp(Src, addtemp(Dst, [])); + %% comment, jcc, jmp_label, label, pseudo_jcc, pseudo_tailcall_prepare + _ -> [] + end. + +arity_use(Arity) -> + [hipe_x86:mk_temp(R, 'tagged') + || R <- ?HIPE_X86_REGISTERS:args(Arity)]. + +dst_use(Dst) -> + case Dst of + #x86_mem{base=Base,off=Off} -> addbase(Base, addtemp(Off, [])); + _ -> [] + end. + +%%% +%%% src_use(Src) -- Return set of temps used by a source operand. +%%% + +%% src_use(Src) -> +%% addtemp(Src, []). + +%%% +%%% Auxiliary operations on sets of temps +%%% + +addtemps([Arg|Args], Set) -> + addtemps(Args, addtemp(Arg, Set)); +addtemps([], Set) -> + Set. + +addtemp(Arg, Set) -> + case Arg of + #x86_temp{} -> add(Arg, Set); + #x86_mem{base=Base,off=Off} -> addtemp(Off, addbase(Base, Set)); + #x86_fpreg{} -> add(Arg, Set); + _ -> Set + end. + +addbase(Base, Set) -> + case Base of + [] -> Set; + _ -> addtemp(Base, Set) + end. + +add(Arg, Set) -> + case lists:member(Arg, Set) of + false -> [Arg|Set]; + _ -> Set + end. diff --git a/lib/hipe/x86/hipe_x86_encode.erl b/lib/hipe/x86/hipe_x86_encode.erl new file mode 100644 index 0000000000..db7f53ad26 --- /dev/null +++ b/lib/hipe/x86/hipe_x86_encode.erl @@ -0,0 +1,1302 @@ +%%% +%%% %CopyrightBegin% +%%% +%%% Copyright Ericsson AB 2001-2009. All Rights Reserved. +%%% +%%% The contents of this file are subject to the Erlang Public License, +%%% Version 1.1, (the "License"); you may not use this file except in +%%% compliance with the License. You should have received a copy of the +%%% Erlang Public License along with this software. If not, it can be +%%% retrieved online at http://www.erlang.org/. +%%% +%%% Software distributed under the License is distributed on an "AS IS" +%%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%%% the License for the specific language governing rights and limitations +%%% under the License. +%%% +%%% %CopyrightEnd% +%%% +%%% Copyright (C) 2000-2005 Mikael Pettersson +%%% +%%% This is the syntax of x86 r/m operands: +%%% +%%% opnd ::= reg mod == 11 +%%% | MEM[ea] mod != 11 +%%% +%%% ea ::= disp32(reg) mod == 10, r/m != ESP +%%% | disp32 sib12 mod == 10, r/m == 100 +%%% | disp8(reg) mod == 01, r/m != ESP +%%% | disp8 sib12 mod == 01, r/m == 100 +%%% | (reg) mod == 00, r/m != ESP and EBP +%%% | sib0 mod == 00, r/m == 100 +%%% | disp32 mod == 00, r/m == 101 [on x86-32] +%%% | disp32(%rip) mod == 00, r/m == 101 [on x86-64] +%%% +%%% // sib0: mod == 00 +%%% sib0 ::= disp32(,index,scale) base == EBP, index != ESP +%%% | disp32 base == EBP, index == 100 +%%% | (base,index,scale) base != EBP, index != ESP +%%% | (base) base != EBP, index == 100 +%%% +%%% // sib12: mod == 01 or 10 +%%% sib12 ::= (base,index,scale) index != ESP +%%% | (base) index == 100 +%%% +%%% scale ::= 00 | 01 | 10 | 11 index << scale +%%% +%%% Notes: +%%% +%%% 1. ESP cannot be used as index register. +%%% 2. Use of ESP as base register requires a SIB byte. +%%% 3. disp(reg), when reg != ESP, can be represented without +%%% [r/m == reg] or with [r/m == 100, base == reg] a SIB byte. +%%% 4. disp32 can be represented without [mod == 00, r/m == 101] +%%% or with [mod == 00, r/m == 100, base == 101, index == 100] +%%% a SIB byte. +%%% 5. x86-32 and x86-64 interpret mod==00b r/m==101b EAs differently: +%%% on x86-32 the disp32 is an absolute address, but on x86-64 the +%%% disp32 is relative to the %rip of the next instruction. +%%% Absolute disp32s need a SIB on x86-64. + +-module(hipe_x86_encode). + +-export([% condition codes + cc/1, + % 8-bit registers + %% al/0, cl/0, dl/0, bl/0, ah/0, ch/0, dh/0, bh/0, + % 32-bit registers + %% eax/0, ecx/0, edx/0, ebx/0, esp/0, ebp/0, esi/0, edi/0, + % operands + sindex/2, sib/1, sib/2, + ea_disp32_base/2, ea_disp32_sib/2, + ea_disp8_base/2, ea_disp8_sib/2, + ea_base/1, + %% ea_disp32_sindex/1, % XXX: do not use on x86-32, only on x86-64 + ea_disp32_sindex/2, + ea_sib/1, ea_disp32/1, + rm_reg/1, rm_mem/1, + % instructions + insn_encode/3, insn_sizeof/2]). + +%%-define(DO_HIPE_X86_ENCODE_TEST,true). +-ifdef(DO_HIPE_X86_ENCODE_TEST). +-export([dotest/0, dotest/1]). % for testing, don't use +-endif. + +-define(ASSERT(F,G), if G -> [] ; true -> exit({?MODULE,F}) end). +%-define(ASSERT(F,G), []). + +%%% condition codes + +-define(CC_O, 2#0000). % overflow +-define(CC_NO, 2#0001). % no overflow +-define(CC_B, 2#0010). % below, =u +-define(CC_E, 2#0100). % equal +-define(CC_NE, 2#0101). % not equal +-define(CC_BE, 2#0110). % below or equal, <=u +-define(CC_A, 2#0111). % above, >u +-define(CC_S, 2#1000). % sign, + +-define(CC_NS, 2#1001). % not sign, - +-define(CC_PE, 2#1010). % parity even +-define(CC_PO, 2#1011). % parity odd +-define(CC_L, 2#1100). % less than, =s +-define(CC_LE, 2#1110). % less or equal, <=s +-define(CC_G, 2#1111). % greater than, >s + +cc(o) -> ?CC_O; +cc(no) -> ?CC_NO; +cc(b) -> ?CC_B; +cc(ae) -> ?CC_AE; +cc(e) -> ?CC_E; +cc(ne) -> ?CC_NE; +cc(be) -> ?CC_BE; +cc(a) -> ?CC_A; +cc(s) -> ?CC_S; +cc(ns) -> ?CC_NS; +cc(pe) -> ?CC_PE; +cc(po) -> ?CC_PO; +cc(l) -> ?CC_L; +cc(ge) -> ?CC_GE; +cc(le) -> ?CC_LE; +cc(g) -> ?CC_G. + +%%% 8-bit registers + +-define(AL, 2#000). +-define(CL, 2#001). +-define(DL, 2#010). +-define(BL, 2#011). +-define(AH, 2#100). +-define(CH, 2#101). +-define(DH, 2#110). +-define(BH, 2#111). + +%% al() -> ?AL. +%% cl() -> ?CL. +%% dl() -> ?DL. +%% bl() -> ?BL. +%% ah() -> ?AH. +%% ch() -> ?CH. +%% dh() -> ?DH. +%% bh() -> ?BH. + +%%% 32-bit registers + +-define(EAX, 2#000). +-define(ECX, 2#001). +-define(EDX, 2#010). +-define(EBX, 2#011). +-define(ESP, 2#100). +-define(EBP, 2#101). +-define(ESI, 2#110). +-define(EDI, 2#111). + +%% eax() -> ?EAX. +%% ecx() -> ?ECX. +%% edx() -> ?EDX. +%% ebx() -> ?EBX. +%% esp() -> ?ESP. +%% ebp() -> ?EBP. +%% esi() -> ?ESI. +%% edi() -> ?EDI. + +%%% r/m operands + +sindex(Scale, Index) when is_integer(Scale), is_integer(Index) -> + ?ASSERT(sindex, Scale >= 0), + ?ASSERT(sindex, Scale =< 3), + ?ASSERT(sindex, Index =/= ?ESP), + {sindex, Scale, Index}. + +-record(sib, {sindex_opt, base :: integer()}). +sib(Base) when is_integer(Base) -> #sib{sindex_opt=none, base=Base}. +sib(Base, Sindex) when is_integer(Base) -> #sib{sindex_opt=Sindex, base=Base}. + +ea_disp32_base(Disp32, Base) when is_integer(Base) -> + ?ASSERT(ea_disp32_base, Base =/= ?ESP), + {ea_disp32_base, Disp32, Base}. +ea_disp32_sib(Disp32, SIB) -> {ea_disp32_sib, Disp32, SIB}. +ea_disp8_base(Disp8, Base) when is_integer(Base) -> + ?ASSERT(ea_disp8_base, Base =/= ?ESP), + {ea_disp8_base, Disp8, Base}. +ea_disp8_sib(Disp8, SIB) -> {ea_disp8_sib, Disp8, SIB}. +ea_base(Base) when is_integer(Base) -> + ?ASSERT(ea_base, Base =/= ?ESP), + ?ASSERT(ea_base, Base =/= ?EBP), + {ea_base, Base}. +%% ea_disp32_sindex(Disp32) -> {ea_disp32_sindex, Disp32, none}. +ea_disp32_sindex(Disp32, Sindex) -> {ea_disp32_sindex, Disp32, Sindex}. +ea_sib(SIB) -> + ?ASSERT(ea_sib, SIB#sib.base =/= ?EBP), + {ea_sib, SIB}. +ea_disp32(Disp32) -> {ea_disp32, Disp32}. + +rm_reg(Reg) -> {rm_reg, Reg}. +rm_mem(EA) -> {rm_mem, EA}. + +mk_modrm(Mod, RO, RM) -> + (Mod bsl 6) bor (RO bsl 3) bor RM. + +mk_sib(Scale, Index, Base) -> + (Scale bsl 6) bor (Index bsl 3) bor Base. + +le16(Word, Tail) -> + [Word band 16#FF, (Word bsr 8) band 16#FF | Tail]. + +le32(Word, Tail) when is_integer(Word) -> + [Word band 16#FF, (Word bsr 8) band 16#FF, + (Word bsr 16) band 16#FF, (Word bsr 24) band 16#FF | Tail]; +le32({Tag,Val}, Tail) -> % a relocatable datum + [{le32,Tag,Val} | Tail]. + +enc_sindex_opt({sindex,Scale,Index}) -> {Scale, Index}; +enc_sindex_opt(none) -> {2#00, 2#100}. + +enc_sib(#sib{sindex_opt=SindexOpt, base=Base}) -> + {Scale, Index} = enc_sindex_opt(SindexOpt), + mk_sib(Scale, Index, Base). + +enc_ea(EA, RO, Tail) -> + case EA of + {ea_disp32_base, Disp32, Base} -> + [mk_modrm(2#10, RO, Base) | le32(Disp32, Tail)]; + {ea_disp32_sib, Disp32, SIB} -> + [mk_modrm(2#10, RO, 2#100), enc_sib(SIB) | le32(Disp32, Tail)]; + {ea_disp8_base, Disp8, Base} -> + [mk_modrm(2#01, RO, Base), Disp8 | Tail]; + {ea_disp8_sib, Disp8, SIB} -> + [mk_modrm(2#01, RO, 2#100), enc_sib(SIB), Disp8 | Tail]; + {ea_base, Base} -> + [mk_modrm(2#00, RO, Base) | Tail]; + {ea_disp32_sindex, Disp32, SindexOpt} -> + {Scale, Index} = enc_sindex_opt(SindexOpt), + SIB = mk_sib(Scale, Index, 2#101), + MODRM = mk_modrm(2#00, RO, 2#100), + [MODRM, SIB | le32(Disp32, Tail)]; + {ea_sib, SIB} -> + [mk_modrm(2#00, RO, 2#100), enc_sib(SIB) | Tail]; + {ea_disp32, Disp32} -> + [mk_modrm(2#00, RO, 2#101) | le32(Disp32, Tail)] + end. + +encode_rm(RM, RO, Tail) -> + case RM of + {rm_reg, Reg} -> [mk_modrm(2#11, RO, Reg) | Tail]; + {rm_mem, EA} -> enc_ea(EA, RO, Tail) + end. + +sizeof_ea(EA) -> + case element(1, EA) of + ea_disp32_base -> 5; + ea_disp32_sib -> 6; + ea_disp8_base -> 2; + ea_disp8_sib -> 3; + ea_base -> 1; + ea_disp32_sindex -> 6; + ea_sib -> 2; + ea_disp32 -> 5 + end. + +sizeof_rm(RM) -> + case RM of + {rm_reg, _} -> 1; + {rm_mem, EA} -> sizeof_ea(EA) + end. + +%%% Floating point stack positions + +-define(ST0, 2#000). +-define(ST1, 2#001). +-define(ST2, 2#010). +-define(ST3, 2#011). +-define(ST4, 2#100). +-define(ST5, 2#101). +-define(ST6, 2#110). +-define(ST7, 2#111). + +st(0) -> ?ST0; +st(1) -> ?ST1; +st(2) -> ?ST2; +st(3) -> ?ST3; +st(4) -> ?ST4; +st(5) -> ?ST5; +st(6) -> ?ST6; +st(7) -> ?ST7. + + +%%% Instructions +%%% +%%% Insn ::= {Op,Opnds} +%%% Opnds ::= {Opnd1,...,Opndn} (n >= 0) +%%% Opnd ::= eax | ax | al | 1 | cl +%%% | {imm32,Imm32} | {imm16,Imm16} | {imm8,Imm8} +%%% | {rm32,RM32} | {rm16,RM16} | {rm8,RM8} +%%% | {rel32,Rel32} | {rel8,Rel8} +%%% | {moffs32,Moffs32} | {moffs16,Moffs16} | {moffs8,Moffs8} +%%% | {cc,CC} +%%% | {reg32,Reg32} | {reg16,Reg16} | {reg8,Reg8} +%%% | {ea,EA} + +-define(PFX_OPND, 16#66). + +arith_binop_encode(SubOpcode, Opnds) -> + %% add, or, adc, sbb, and, sub, xor, cmp + case Opnds of + {eax, {imm32,Imm32}} -> + [16#05 bor (SubOpcode bsl 3) | le32(Imm32, [])]; + {{rm32,RM32}, {imm32,Imm32}} -> + [16#81 | encode_rm(RM32, SubOpcode, le32(Imm32, []))]; + {{rm32,RM32}, {imm8,Imm8}} -> + [16#83 | encode_rm(RM32, SubOpcode, [Imm8])]; + {{rm32,RM32}, {reg32,Reg32}} -> + [16#01 bor (SubOpcode bsl 3) | encode_rm(RM32, Reg32, [])]; + {{reg32,Reg32}, {rm32,RM32}} -> + [16#03 bor (SubOpcode bsl 3) | encode_rm(RM32, Reg32, [])] + end. + +arith_binop_sizeof(Opnds) -> + %% add, or, adc, sbb, and, sub, xor, cmp + case Opnds of + {eax, {imm32,_}} -> + 1 + 4; + {{rm32,RM32}, {imm32,_}} -> + 1 + sizeof_rm(RM32) + 4; + {{rm32,RM32}, {imm8,_}} -> + 1 + sizeof_rm(RM32) + 1; + {{rm32,RM32}, {reg32,_}} -> + 1 + sizeof_rm(RM32); + {{reg32,_}, {rm32,RM32}} -> + 1 + sizeof_rm(RM32) + end. + +bs_op_encode(Opcode, {{reg32,Reg32}, {rm32,RM32}}) -> % bsf, bsr + [16#0F, Opcode | encode_rm(RM32, Reg32, [])]. + +bs_op_sizeof({{reg32,_}, {rm32,RM32}}) -> % bsf, bsr + 2 + sizeof_rm(RM32). + +bswap_encode({{reg32,Reg32}}) -> + [16#0F, 16#C8 bor Reg32]. + +bswap_sizeof({{reg32,_}}) -> + 2. + +bt_op_encode(SubOpcode, Opnds) -> % bt, btc, btr, bts + case Opnds of + {{rm32,RM32}, {reg32,Reg32}} -> + [16#0F, 16#A3 bor (SubOpcode bsl 3) | encode_rm(RM32, Reg32, [])]; + {{rm32,RM32}, {imm8,Imm8}} -> + [16#0F, 16#BA | encode_rm(RM32, SubOpcode, [Imm8])] + end. + +bt_op_sizeof(Opnds) -> % bt, btc, btr, bts + case Opnds of + {{rm32,RM32}, {reg32,_}} -> + 2 + sizeof_rm(RM32); + {{rm32,RM32}, {imm8,_}} -> + 2 + sizeof_rm(RM32) + 1 + end. + +call_encode(Opnds) -> + case Opnds of + {{rel32,Rel32}} -> + [16#E8 | le32(Rel32, [])]; + {{rm32,RM32}} -> + [16#FF | encode_rm(RM32, 2#010, [])] + end. + +call_sizeof(Opnds) -> + case Opnds of + {{rel32,_}} -> + 1 + 4; + {{rm32,RM32}} -> + 1 + sizeof_rm(RM32) + end. + +cbw_encode({}) -> + [?PFX_OPND, 16#98]. + +cbw_sizeof({}) -> + 2. + +nullary_op_encode(Opcode, {}) -> + %% cdq, clc, cld, cmc, cwde, into, leave, nop, prefix_fs, stc, std + [Opcode]. + +nullary_op_sizeof({}) -> + %% cdq, clc, cld, cmc, cwde, into, leave, nop, prefix_fs, stc, std + 1. + +cmovcc_encode({{cc,CC}, {reg32,Reg32}, {rm32,RM32}}) -> + [16#0F, 16#40 bor CC | encode_rm(RM32, Reg32, [])]. + +cmovcc_sizeof({{cc,_}, {reg32,_}, {rm32,RM32}}) -> + 2 + sizeof_rm(RM32). + +incdec_encode(SubOpcode, Opnds) -> % SubOpcode is either 0 or 1 + case Opnds of + {{rm32,RM32}} -> + [16#FF | encode_rm(RM32, SubOpcode, [])]; + {{reg32,Reg32}} -> + [16#40 bor (SubOpcode bsl 3) bor Reg32] + end. + +incdec_sizeof(Opnds) -> + case Opnds of + {{rm32,RM32}} -> + 1 + sizeof_rm(RM32); + {{reg32,_}} -> + 1 + end. + +arith_unop_encode(Opcode, {{rm32,RM32}}) -> % div, idiv, mul, neg, not + [16#F7 | encode_rm(RM32, Opcode, [])]. + +arith_unop_sizeof({{rm32,RM32}}) -> % div, idiv, mul, neg, not + 1 + sizeof_rm(RM32). + +enter_encode({{imm16,Imm16}, {imm8,Imm8}}) -> + [16#C8 | le16(Imm16, [Imm8])]. + +enter_sizeof({{imm16,_}, {imm8,_}}) -> + 1 + 2 + 1. + +imul_encode(Opnds) -> + case Opnds of + {{rm32,RM32}} -> % *= rm32 + [16#F7 | encode_rm(RM32, 2#101, [])]; + {{reg32,Reg32}, {rm32,RM32}} -> % reg *= rm32 + [16#0F, 16#AF | encode_rm(RM32, Reg32, [])]; + {{reg32,Reg32}, {rm32,RM32}, {imm8,Imm8}} -> % reg := rm32 * sext(imm8) + [16#6B | encode_rm(RM32, Reg32, [Imm8])]; + {{reg32,Reg32}, {rm32,RM32}, {imm32,Imm32}} -> % reg := rm32 * imm32 + [16#69 | encode_rm(RM32, Reg32, le32(Imm32, []))] + end. + +imul_sizeof(Opnds) -> + case Opnds of + {{rm32,RM32}} -> + 1 + sizeof_rm(RM32); + {{reg32,_}, {rm32,RM32}} -> + 2 + sizeof_rm(RM32); + {{reg32,_}, {rm32,RM32}, {imm8,_}} -> + 1 + sizeof_rm(RM32) + 1; + {{reg32,_}, {rm32,RM32}, {imm32,_}} -> + 1 + sizeof_rm(RM32) + 4 + end. + +jcc_encode(Opnds) -> + case Opnds of + {{cc,CC}, {rel8,Rel8}} -> + [16#70 bor CC, Rel8]; + {{cc,CC}, {rel32,Rel32}} -> + [16#0F, 16#80 bor CC | le32(Rel32, [])] + end. + +jcc_sizeof(Opnds) -> + case Opnds of + {{cc,_}, {rel8,_}} -> + 2; + {{cc,_}, {rel32,_}} -> + 2 + 4 + end. + +jmp8_op_encode(Opcode, {{rel8,Rel8}}) -> % jecxz, loop, loope, loopne + [Opcode, Rel8]. + +jmp8_op_sizeof({{rel8,_}}) -> % jecxz, loop, loope, loopne + 2. + +jmp_encode(Opnds) -> + case Opnds of + {{rel8,Rel8}} -> + [16#EB, Rel8]; + {{rel32,Rel32}} -> + [16#E9 | le32(Rel32, [])]; + {{rm32,RM32}} -> + [16#FF | encode_rm(RM32, 2#100, [])] + end. + +jmp_sizeof(Opnds) -> + case Opnds of + {{rel8,_}} -> + 2; + {{rel32,_}} -> + 1 + 4; + {{rm32,RM32}} -> + 1 + sizeof_rm(RM32) + end. + +lea_encode({{reg32,Reg32}, {ea,EA}}) -> + [16#8D | enc_ea(EA, Reg32, [])]. + +lea_sizeof({{reg32,_}, {ea,EA}}) -> + 1 + sizeof_ea(EA). + +mov_encode(Opnds) -> + case Opnds of + {{rm8,RM8}, {reg8,Reg8}} -> + [16#88 | encode_rm(RM8, Reg8, [])]; + {{rm16,RM16}, {reg16,Reg16}} -> + [?PFX_OPND, 16#89 | encode_rm(RM16, Reg16, [])]; + {{rm32,RM32}, {reg32,Reg32}} -> + [16#89 | encode_rm(RM32, Reg32, [])]; + {{reg8,Reg8}, {rm8,RM8}} -> + [16#8A | encode_rm(RM8, Reg8, [])]; + {{reg16,Reg16}, {rm16,RM16}} -> + [?PFX_OPND, 16#8B | encode_rm(RM16, Reg16, [])]; + {{reg32,Reg32}, {rm32,RM32}} -> + [16#8B | encode_rm(RM32, Reg32, [])]; + {al, {moffs8,Moffs8}} -> + [16#A0 | le32(Moffs8, [])]; + {ax, {moffs16,Moffs16}} -> + [?PFX_OPND, 16#A1 | le32(Moffs16, [])]; + {eax, {moffs32,Moffs32}} -> + [16#A1 | le32(Moffs32, [])]; + {{moffs8,Moffs8}, al} -> + [16#A2 | le32(Moffs8, [])]; + {{moffs16,Moffs16}, ax} -> + [?PFX_OPND, 16#A3 | le32(Moffs16, [])]; + {{moffs32,Moffs32}, eax} -> + [16#A3 | le32(Moffs32, [])]; + {{reg8,Reg8}, {imm8,Imm8}} -> + [16#B0 bor Reg8, Imm8]; + {{reg16,Reg16}, {imm16,Imm16}} -> + [?PFX_OPND, 16#B8 bor Reg16 | le16(Imm16, [])]; + {{reg32,Reg32}, {imm32,Imm32}} -> + [16#B8 bor Reg32 | le32(Imm32, [])]; + {{rm8,RM8}, {imm8,Imm8}} -> + [16#C6 | encode_rm(RM8, 2#000, [Imm8])]; + {{rm16,RM16}, {imm16,Imm16}} -> + [?PFX_OPND, 16#C7 | encode_rm(RM16, 2#000, le16(Imm16, []))]; + {{rm32,RM32}, {imm32,Imm32}} -> + [16#C7 | encode_rm(RM32, 2#000, le32(Imm32, []))] + end. + +mov_sizeof(Opnds) -> + case Opnds of + {{rm8,RM8}, {reg8,_}} -> + 1 + sizeof_rm(RM8); + {{rm16,RM16}, {reg16,_}} -> + 2 + sizeof_rm(RM16); + {{rm32,RM32}, {reg32,_}} -> + 1 + sizeof_rm(RM32); + {{reg8,_}, {rm8,RM8}} -> + 1 + sizeof_rm(RM8); + {{reg16,_}, {rm16,RM16}} -> + 2 + sizeof_rm(RM16); + {{reg32,_}, {rm32,RM32}} -> + 1 + sizeof_rm(RM32); + {al, {moffs8,_}} -> + 1 + 4; + {ax, {moffs16,_}} -> + 2 + 4; + {eax, {moffs32,_}} -> + 1 + 4; + {{moffs8,_}, al} -> + 1 + 4; + {{moffs16,_}, ax} -> + 2 + 4; + {{moffs32,_}, eax} -> + 1 + 4; + {{reg8,_}, {imm8,_}} -> + 2; + {{reg16,_}, {imm16,_}} -> + 2 + 2; + {{reg32,_}, {imm32,_}} -> + 1 + 4; + {{rm8,RM8}, {imm8,_}} -> + 1 + sizeof_rm(RM8) + 1; + {{rm16,RM16}, {imm16,_}} -> + 2 + sizeof_rm(RM16) + 2; + {{rm32,RM32}, {imm32,_}} -> + 1 + sizeof_rm(RM32) + 4 + end. + +movx_op_encode(Opcode, Opnds) -> % movsx, movzx + case Opnds of + {{reg16,Reg16}, {rm8,RM8}} -> + [?PFX_OPND, 16#0F, Opcode | encode_rm(RM8, Reg16, [])]; + {{reg32,Reg32}, {rm8,RM8}} -> + [16#0F, Opcode | encode_rm(RM8, Reg32, [])]; + {{reg32,Reg32}, {rm16,RM16}} -> + [16#0F, Opcode bor 1 | encode_rm(RM16, Reg32, [])] + end. + +movx_op_sizeof(Opnds) -> + case Opnds of + {{reg16,_}, {rm8,RM8}} -> + 3 + sizeof_rm(RM8); + {{reg32,_}, {rm8,RM8}} -> + 2 + sizeof_rm(RM8); + {{reg32,_}, {rm16,RM16}} -> + 2 + sizeof_rm(RM16) + end. + +pop_encode(Opnds) -> + case Opnds of + {{rm32,RM32}} -> + [16#8F | encode_rm(RM32, 2#000, [])]; + {{reg32,Reg32}} -> + [16#58 bor Reg32] + end. + +pop_sizeof(Opnds) -> + case Opnds of + {{rm32,RM32}} -> + 1 + sizeof_rm(RM32); + {{reg32,_}} -> + 1 + end. + +push_encode(Opnds) -> + case Opnds of + {{rm32,RM32}} -> + [16#FF | encode_rm(RM32, 2#110, [])]; + {{reg32,Reg32}} -> + [16#50 bor Reg32]; + {{imm8,Imm8}} -> % sign-extended + [16#6A, Imm8]; + {{imm32,Imm32}} -> + [16#68 | le32(Imm32, [])] + end. + +push_sizeof(Opnds) -> + case Opnds of + {{rm32,RM32}} -> + 1 + sizeof_rm(RM32); + {{reg32,_}} -> + 1; + {{imm8,_}} -> + 2; + {{imm32,_}} -> + 1 + 4 + end. + +shift_op_encode(SubOpcode, Opnds) -> % rcl, rcr, rol, ror, sar, shl, shr + case Opnds of + {{rm32,RM32}, 1} -> + [16#D1 | encode_rm(RM32, SubOpcode, [])]; + {{rm32,RM32}, cl} -> + [16#D3 | encode_rm(RM32, SubOpcode, [])]; + {{rm32,RM32}, {imm8,Imm8}} -> + [16#C1 | encode_rm(RM32, SubOpcode, [Imm8])]; + {{rm16,RM16}, {imm8,Imm8}} -> + [?PFX_OPND, 16#C1 | encode_rm(RM16, SubOpcode, [Imm8])] + end. + +shift_op_sizeof(Opnds) -> % rcl, rcr, rol, ror, sar, shl, shr + case Opnds of + {{rm32,RM32}, 1} -> + 1 + sizeof_rm(RM32); + {{rm32,RM32}, cl} -> + 1 + sizeof_rm(RM32); + {{rm32,RM32}, {imm8,_Imm8}} -> + 1 + sizeof_rm(RM32) + 1; + {{rm16,RM16}, {imm8,_Imm8}} -> + 1 + 1 + sizeof_rm(RM16) + 1 + end. + +ret_encode(Opnds) -> + case Opnds of + {} -> + [16#C3]; + {{imm16,Imm16}} -> + [16#C2 | le16(Imm16, [])] + end. + +ret_sizeof(Opnds) -> + case Opnds of + {} -> + 1; + {{imm16,_}} -> + 1 + 2 + end. + +setcc_encode({{cc,CC}, {rm8,RM8}}) -> + [16#0F, 16#90 bor CC | encode_rm(RM8, 2#000, [])]. + +setcc_sizeof({{cc,_}, {rm8,RM8}}) -> + 2 + sizeof_rm(RM8). + +shd_op_encode(Opcode, Opnds) -> + case Opnds of + {{rm32,RM32}, {reg32,Reg32}, {imm8,Imm8}} -> + [16#0F, Opcode | encode_rm(RM32, Reg32, [Imm8])]; + {{rm32,RM32}, {reg32,Reg32}, cl} -> + [16#0F, Opcode bor 1 | encode_rm(RM32, Reg32, [])] + end. + +shd_op_sizeof(Opnds) -> + case Opnds of + {{rm32,RM32}, {reg32,_}, {imm8,_}} -> + 2 + sizeof_rm(RM32) + 1; + {{rm32,RM32}, {reg32,_}, cl} -> + 2 + sizeof_rm(RM32) + end. + +test_encode(Opnds) -> + case Opnds of + {eax, {imm32,Imm32}} -> + [16#A9 | le32(Imm32, [])]; + {{rm32,RM32}, {imm32,Imm32}} -> + [16#F7 | encode_rm(RM32, 2#000, le32(Imm32, []))]; + {{rm32,RM32}, {reg32,Reg32}} -> + [16#85 | encode_rm(RM32, Reg32, [])] + end. + +test_sizeof(Opnds) -> + case Opnds of + {eax, {imm32,_}} -> + 1 + 4; + {{rm32,RM32}, {imm32,_}} -> + 1 + sizeof_rm(RM32) + 4; + {{rm32,RM32}, {reg32,_}} -> + 1 + sizeof_rm(RM32) + end. + +fild_encode(Opnds) -> + %% The operand cannot be a register! + {{rm32, RM32}} = Opnds, + [16#DB | encode_rm(RM32, 2#000, [])]. + +fild_sizeof(Opnds) -> + {{rm32, RM32}} = Opnds, + 1 + sizeof_rm(RM32). + +fld_encode(Opnds) -> + case Opnds of + {{rm64fp, RM64fp}} -> + [16#DD | encode_rm(RM64fp, 2#000, [])]; + {{fpst, St}} -> + [16#D9, 16#C0 bor st(St)] + end. + +fld_sizeof(Opnds) -> + case Opnds of + {{rm64fp, RM64fp}} -> + 1 + sizeof_rm(RM64fp); + {{fpst, _}} -> + 2 + end. + +fp_comm_arith_encode(OpCode, Opnds) -> + %% fadd, fmul + case Opnds of + {{rm64fp, RM64fp}} -> + [16#DC | encode_rm(RM64fp, OpCode, [])]; + {{fpst,0}, {fpst,St}} -> + [16#D8, (16#C0 bor (OpCode bsl 3)) bor st(St)]; + {{fpst,St}, {fpst,0}} -> + [16#DC, (16#C0 bor (OpCode bsl 3)) bor st(St)] + end. + +fp_comm_arith_pop_encode(OpCode, Opnds) -> + %% faddp, fmulp + case Opnds of + [] -> + [16#DE, 16#C0 bor (OpCode bsl 3) bor st(1)]; + {{fpst,St},{fpst,0}} -> + [16#DE, 16#C0 bor (OpCode bsl 3) bor st(St)] + end. + +fp_arith_encode(OpCode, Opnds) -> + %% fdiv, fsub + case Opnds of + {{rm64fp, RM64fp}} -> + [16#DC | encode_rm(RM64fp, OpCode, [])]; + {{fpst,0}, {fpst,St}} -> + OpCode0 = OpCode band 2#110, + [16#D8, 16#C0 bor (OpCode0 bsl 3) bor st(St)]; + {{fpst,St}, {fpst,0}} -> + OpCode0 = OpCode bor 1, + [16#DC, 16#C0 bor (OpCode0 bsl 3) bor st(St)] + end. + +fp_arith_pop_encode(OpCode, Opnds) -> + %% fdivp, fsubp + OpCode0 = OpCode bor 1, + case Opnds of + [] -> + [16#DE, 16#C8 bor (OpCode0 bsl 3) bor st(1)]; + {{fpst,St}, {fpst,0}} -> + [16#DE, 16#C8 bor (OpCode0 bsl 3) bor st(St)] + end. + +fp_arith_rev_encode(OpCode, Opnds) -> + %% fdivr, fsubr + case Opnds of + {{rm64fp, RM64fp}} -> + [16#DC | encode_rm(RM64fp, OpCode, [])]; + {{fpst,0}, {fpst,St}} -> + OpCode0 = OpCode bor 1, + [16#D8, 16#C0 bor (OpCode0 bsl 3) bor st(St)]; + {{fpst,St}, {fpst,0}} -> + OpCode0 = OpCode band 2#110, + [16#DC, 16#C0 bor (OpCode0 bsl 3) bor st(St)] + end. + +fp_arith_rev_pop_encode(OpCode, Opnds) -> + %% fdivrp, fsubrp + OpCode0 = OpCode band 2#110, + case Opnds of + [] -> + [16#DE, 16#C0 bor (OpCode0 bsl 3) bor st(1)]; + {{fpst,St}, {fpst, 0}} -> + [16#DE, 16#C0 bor (OpCode0 bsl 3) bor st(St)] + end. + +fp_arith_sizeof(Opnds) -> + case Opnds of + {{rm64fp, RM64fp}} -> + 1 + sizeof_rm(RM64fp); + {{fpst,0}, {fpst,_}} -> + 2; + {{fpst,_}, {fpst,0}} -> + 2 + end. + +fst_encode(OpCode, Opnds) -> + case Opnds of + {{rm64fp, RM64fp}} -> + [16#DD | encode_rm(RM64fp, OpCode, [])]; + {{fpst, St}} -> + [16#DD, 16#C0 bor (OpCode bsl 3) bor st(St)] + end. + +fst_sizeof(Opnds) -> + case Opnds of + {{rm64fp, RM64fp}} -> + 1 + sizeof_rm(RM64fp); + {{fpst, _}} -> + 2 + end. + +fchs_encode() -> + [16#D9, 16#E0]. +fchs_sizeof() -> + 2. + +ffree_encode({{fpst, St}})-> + [16#DD, 16#C0 bor st(St)]. +ffree_sizeof() -> + 2. + +fwait_encode() -> + [16#9B]. +fwait_sizeof() -> + 1. + +fxch_encode(Opnds) -> + case Opnds of + [] -> + [16#D9, 16#C8 bor st(1)]; + {{fpst, St}} -> + [16#D9, 16#C8 bor st(St)] + end. +fxch_sizeof() -> + 2. + +insn_encode(Op, Opnds, Offset) -> + Bytes = insn_encode_internal(Op, Opnds), + case has_relocs(Bytes) of + false -> % the common case + {Bytes, []}; + _ -> + fix_relocs(Bytes, Offset, [], []) + end. + +has_relocs([{le32,_,_}|_]) -> true; +has_relocs([_|Bytes]) -> has_relocs(Bytes); +has_relocs([]) -> false. + +fix_relocs([{le32,Tag,Val}|Bytes], Offset, Code, Relocs) -> + fix_relocs(Bytes, Offset+4, + [16#00, 16#00, 16#00, 16#00 | Code], + [{Tag,Offset,Val}|Relocs]); +fix_relocs([Byte|Bytes], Offset, Code, Relocs) -> + fix_relocs(Bytes, Offset+1, [Byte|Code], Relocs); +fix_relocs([], _Offset, Code, Relocs) -> + {lists:reverse(Code), lists:reverse(Relocs)}. + +insn_encode_internal(Op, Opnds) -> + case Op of + 'adc' -> arith_binop_encode(2#010, Opnds); + 'add' -> arith_binop_encode(2#000, Opnds); + 'and' -> arith_binop_encode(2#100, Opnds); + 'bsf' -> bs_op_encode(16#BC, Opnds); + 'bsr' -> bs_op_encode(16#BD, Opnds); + 'bswap' -> bswap_encode(Opnds); + 'bt' -> bt_op_encode(2#100, Opnds); + 'btc' -> bt_op_encode(2#111, Opnds); + 'btr' -> bt_op_encode(2#110, Opnds); + 'bts' -> bt_op_encode(2#101, Opnds); + 'call' -> call_encode(Opnds); + 'cbw' -> cbw_encode(Opnds); + 'cdq' -> nullary_op_encode(16#99, Opnds); + 'clc' -> nullary_op_encode(16#F8, Opnds); + 'cld' -> nullary_op_encode(16#FC, Opnds); + 'cmc' -> nullary_op_encode(16#F5, Opnds); + 'cmovcc' -> cmovcc_encode(Opnds); + 'cmp' -> arith_binop_encode(2#111, Opnds); + 'cwde' -> nullary_op_encode(16#98, Opnds); + 'dec' -> incdec_encode(2#001, Opnds); + 'div' -> arith_unop_encode(2#110, Opnds); + 'enter' -> enter_encode(Opnds); + 'fadd' -> fp_comm_arith_encode(2#000, Opnds); + 'faddp' -> fp_comm_arith_pop_encode(2#000, Opnds); + 'fchs' -> fchs_encode(); + 'fdiv' -> fp_arith_encode(2#110, Opnds); + 'fdivp' -> fp_arith_pop_encode(2#110, Opnds); + 'fdivr' -> fp_arith_rev_encode(2#111, Opnds); + 'fdivrp' -> fp_arith_rev_pop_encode(2#111, Opnds); + 'ffree' -> ffree_encode(Opnds); + 'fild' -> fild_encode(Opnds); + 'fld' -> fld_encode(Opnds); + 'fmul' -> fp_comm_arith_encode(2#001, Opnds); + 'fmulp' -> fp_comm_arith_pop_encode(2#001, Opnds); + 'fst' -> fst_encode(2#010, Opnds); + 'fstp' -> fst_encode(2#011, Opnds); + 'fsub' -> fp_arith_encode(2#100, Opnds); + 'fsubp' -> fp_arith_pop_encode(2#100, Opnds); + 'fsubr' -> fp_arith_rev_encode(2#101, Opnds); + 'fsubrp' -> fp_arith_rev_pop_encode(2#101, Opnds); + 'fwait' -> fwait_encode(); + 'fxch' -> fxch_encode(Opnds); + 'idiv' -> arith_unop_encode(2#111, Opnds); + 'imul' -> imul_encode(Opnds); + 'inc' -> incdec_encode(2#000, Opnds); + 'into' -> nullary_op_encode(16#CE, Opnds); + 'jcc' -> jcc_encode(Opnds); + 'jecxz' -> jmp8_op_encode(16#E3, Opnds); + 'jmp' -> jmp_encode(Opnds); + 'lea' -> lea_encode(Opnds); + 'leave' -> nullary_op_encode(16#C9, Opnds); + 'loop' -> jmp8_op_encode(16#E2, Opnds); + 'loope' -> jmp8_op_encode(16#E1, Opnds); + 'loopne' -> jmp8_op_encode(16#E0, Opnds); + 'mov' -> mov_encode(Opnds); + 'movsx' -> movx_op_encode(16#BE, Opnds); + 'movzx' -> movx_op_encode(16#B6, Opnds); + 'mul' -> arith_unop_encode(2#100, Opnds); + 'neg' -> arith_unop_encode(2#011, Opnds); + 'nop' -> nullary_op_encode(16#90, Opnds); + 'not' -> arith_unop_encode(2#010, Opnds); + 'or' -> arith_binop_encode(2#001, Opnds); + 'pop' -> pop_encode(Opnds); + 'prefix_fs' -> nullary_op_encode(16#64, Opnds); + 'push' -> push_encode(Opnds); + 'rcl' -> shift_op_encode(2#010, Opnds); + 'rcr' -> shift_op_encode(2#011, Opnds); + 'ret' -> ret_encode(Opnds); + 'rol' -> shift_op_encode(2#000, Opnds); + 'ror' -> shift_op_encode(2#001, Opnds); + 'sar' -> shift_op_encode(2#111, Opnds); + 'sbb' -> arith_binop_encode(2#011, Opnds); + 'setcc' -> setcc_encode(Opnds); + 'shl' -> shift_op_encode(2#100, Opnds); + 'shld' -> shd_op_encode(16#A4, Opnds); + 'shr' -> shift_op_encode(2#101, Opnds); + 'shrd' -> shd_op_encode(16#AC, Opnds); + 'stc' -> nullary_op_encode(16#F9, Opnds); + 'std' -> nullary_op_encode(16#FD, Opnds); + 'sub' -> arith_binop_encode(2#101, Opnds); + 'test' -> test_encode(Opnds); + 'xor' -> arith_binop_encode(2#110, Opnds); + _ -> exit({?MODULE,insn_encode,Op}) + end. + +insn_sizeof(Op, Opnds) -> + case Op of + 'adc' -> arith_binop_sizeof(Opnds); + 'add' -> arith_binop_sizeof(Opnds); + 'and' -> arith_binop_sizeof(Opnds); + 'bsf' -> bs_op_sizeof(Opnds); + 'bsr' -> bs_op_sizeof(Opnds); + 'bswap' -> bswap_sizeof(Opnds); + 'bt' -> bt_op_sizeof(Opnds); + 'btc' -> bt_op_sizeof(Opnds); + 'btr' -> bt_op_sizeof(Opnds); + 'bts' -> bt_op_sizeof(Opnds); + 'call' -> call_sizeof(Opnds); + 'cbw' -> cbw_sizeof(Opnds); + 'cdq' -> nullary_op_sizeof(Opnds); + 'clc' -> nullary_op_sizeof(Opnds); + 'cld' -> nullary_op_sizeof(Opnds); + 'cmc' -> nullary_op_sizeof(Opnds); + 'cmovcc' -> cmovcc_sizeof(Opnds); + 'cmp' -> arith_binop_sizeof(Opnds); + 'cwde' -> nullary_op_sizeof(Opnds); + 'dec' -> incdec_sizeof(Opnds); + 'div' -> arith_unop_sizeof(Opnds); + 'enter' -> enter_sizeof(Opnds); + 'fadd' -> fp_arith_sizeof(Opnds); + 'faddp' -> fp_arith_sizeof(Opnds); + 'fchs' -> fchs_sizeof(); + 'fdiv' -> fp_arith_sizeof(Opnds); + 'fdivp' -> fp_arith_sizeof(Opnds); + 'fdivr' -> fp_arith_sizeof(Opnds); + 'fdivrp' -> fp_arith_sizeof(Opnds); + 'ffree' -> ffree_sizeof(); + 'fild' -> fild_sizeof(Opnds); + 'fld' -> fld_sizeof(Opnds); + 'fmul' -> fp_arith_sizeof(Opnds); + 'fmulp' -> fp_arith_sizeof(Opnds); + 'fst' -> fst_sizeof(Opnds); + 'fstp' -> fst_sizeof(Opnds); + 'fsub' -> fp_arith_sizeof(Opnds); + 'fsubp' -> fp_arith_sizeof(Opnds); + 'fsubr' -> fp_arith_sizeof(Opnds); + 'fsubrp' -> fp_arith_sizeof(Opnds); + 'fwait' -> fwait_sizeof(); + 'fxch' -> fxch_sizeof(); + 'idiv' -> arith_unop_sizeof(Opnds); + 'imul' -> imul_sizeof(Opnds); + 'inc' -> incdec_sizeof(Opnds); + 'into' -> nullary_op_sizeof(Opnds); + 'jcc' -> jcc_sizeof(Opnds); + 'jecxz' -> jmp8_op_sizeof(Opnds); + 'jmp' -> jmp_sizeof(Opnds); + 'lea' -> lea_sizeof(Opnds); + 'leave' -> nullary_op_sizeof(Opnds); + 'loop' -> jmp8_op_sizeof(Opnds); + 'loope' -> jmp8_op_sizeof(Opnds); + 'loopne' -> jmp8_op_sizeof(Opnds); + 'mov' -> mov_sizeof(Opnds); + 'movsx' -> movx_op_sizeof(Opnds); + 'movzx' -> movx_op_sizeof(Opnds); + 'mul' -> arith_unop_sizeof(Opnds); + 'neg' -> arith_unop_sizeof(Opnds); + 'nop' -> nullary_op_sizeof(Opnds); + 'not' -> arith_unop_sizeof(Opnds); + 'or' -> arith_binop_sizeof(Opnds); + 'pop' -> pop_sizeof(Opnds); + 'prefix_fs' -> nullary_op_sizeof(Opnds); + 'push' -> push_sizeof(Opnds); + 'rcl' -> shift_op_sizeof(Opnds); + 'rcr' -> shift_op_sizeof(Opnds); + 'ret' -> ret_sizeof(Opnds); + 'rol' -> shift_op_sizeof(Opnds); + 'ror' -> shift_op_sizeof(Opnds); + 'sar' -> shift_op_sizeof(Opnds); + 'sbb' -> arith_binop_sizeof(Opnds); + 'setcc' -> setcc_sizeof(Opnds); + 'shl' -> shift_op_sizeof(Opnds); + 'shld' -> shd_op_sizeof(Opnds); + 'shr' -> shift_op_sizeof(Opnds); + 'shrd' -> shd_op_sizeof(Opnds); + 'stc' -> nullary_op_sizeof(Opnds); + 'std' -> nullary_op_sizeof(Opnds); + 'sub' -> arith_binop_sizeof(Opnds); + 'test' -> test_sizeof(Opnds); + 'xor' -> arith_binop_sizeof(Opnds); + _ -> exit({?MODULE,insn_sizeof,Op}) + end. + +%%===================================================================== +%% testing interface +%%===================================================================== + +-ifdef(DO_HIPE_X86_ENCODE_TEST). + +say(OS, Str) -> + file:write(OS, Str). + +digit16(Dig0) -> + Dig = Dig0 band 16#F, + if Dig >= 16#A -> $A + (Dig - 16#A); + true -> $0 + Dig + end. + +say_byte(OS, Byte) -> + say(OS, "0x"), + say(OS, [digit16(Byte bsr 4)]), + say(OS, [digit16(Byte)]). + +init(OS) -> + say(OS, "\t.text\n"). + +say_bytes(OS, Byte0, Bytes0) -> + say_byte(OS, Byte0), + case Bytes0 of + [] -> + say(OS, "\n"); + [Byte1|Bytes1] -> + say(OS, ","), + say_bytes(OS, Byte1, Bytes1) + end. + +t(OS, Op, Opnds) -> + insn_sizeof(Op, Opnds), + {[Byte|Bytes],[]} = insn_encode(Op, Opnds, 0), + say(OS, "\t.byte "), + say_bytes(OS, Byte, Bytes). + +dotest1(OS) -> + init(OS), + % exercise all rm32 types + t(OS,lea,{{reg32,?EAX},{ea,ea_disp32(16#87654321)}}), + t(OS,lea,{{reg32,?EAX},{ea,ea_sib(sib(?ECX))}}), + t(OS,lea,{{reg32,?EAX},{ea,ea_sib(sib(?ECX,sindex(2#10,?EDI)))}}), + t(OS,lea,{{reg32,?EAX},{ea,ea_disp32_sindex(16#87654321)}}), + t(OS,lea,{{reg32,?EAX},{ea,ea_disp32_sindex(16#87654321,sindex(2#10,?EDI))}}), + t(OS,lea,{{reg32,?EAX},{ea,ea_base(?ECX)}}), + t(OS,lea,{{reg32,?EAX},{ea,ea_disp8_sib(16#03,sib(?ECX))}}), + t(OS,lea,{{reg32,?EAX},{ea,ea_disp8_sib(16#03,sib(?ECX,sindex(2#10,?EDI)))}}), + t(OS,lea,{{reg32,?EAX},{ea,ea_disp8_base(16#3,?ECX)}}), + t(OS,lea,{{reg32,?EAX},{ea,ea_disp32_sib(16#87654321,sib(?ECX))}}), + t(OS,lea,{{reg32,?EAX},{ea,ea_disp32_sib(16#87654321,sib(?ECX,sindex(2#10,?EDI)))}}), + t(OS,lea,{{reg32,?EAX},{ea,ea_disp32_base(16#87654321,?EBP)}}), + t(OS,call,{{rm32,rm_reg(?EAX)}}), + t(OS,call,{{rm32,rm_mem(ea_disp32_sindex(16#87654321,sindex(2#10,?EDI)))}}), + t(OS,call,{{rel32,-5}}), + % default parameters for the tests below + Word32 = 16#87654321, + Word16 = 16#F00F, + Word8 = 16#80, + Imm32 = {imm32,Word32}, + Imm16 = {imm16,Word16}, + Imm8 = {imm8,Word8}, + RM32 = {rm32,rm_reg(?EDX)}, + RM16 = {rm16,rm_reg(?EDX)}, + RM8 = {rm8,rm_reg(?EDX)}, + Rel32 = {rel32,Word32}, + Rel8 = {rel8,Word8}, + Moffs32 = {moffs32,Word32}, + Moffs16 = {moffs16,Word32}, + Moffs8 = {moffs8,Word32}, + CC = {cc,?CC_G}, + Reg32 = {reg32,?EAX}, + Reg16 = {reg16,?EAX}, + Reg8 = {reg8,?AH}, + EA = {ea,ea_base(?ECX)}, + % exercise each instruction definition + t(OS,'adc',{eax,Imm32}), + t(OS,'adc',{RM32,Imm32}), + t(OS,'adc',{RM32,Imm8}), + t(OS,'adc',{RM32,Reg32}), + t(OS,'adc',{Reg32,RM32}), + t(OS,'add',{eax,Imm32}), + t(OS,'add',{RM32,Imm32}), + t(OS,'add',{RM32,Imm8}), + t(OS,'add',{RM32,Reg32}), + t(OS,'add',{Reg32,RM32}), + t(OS,'and',{eax,Imm32}), + t(OS,'and',{RM32,Imm32}), + t(OS,'and',{RM32,Imm8}), + t(OS,'and',{RM32,Reg32}), + t(OS,'and',{Reg32,RM32}), + t(OS,'bsf',{Reg32,RM32}), + t(OS,'bsr',{Reg32,RM32}), + t(OS,'bswap',{Reg32}), + t(OS,'bt',{RM32,Reg32}), + t(OS,'bt',{RM32,Imm8}), + t(OS,'btc',{RM32,Reg32}), + t(OS,'btc',{RM32,Imm8}), + t(OS,'btr',{RM32,Reg32}), + t(OS,'btr',{RM32,Imm8}), + t(OS,'bts',{RM32,Reg32}), + t(OS,'bts',{RM32,Imm8}), + t(OS,'call',{Rel32}), + t(OS,'call',{RM32}), + t(OS,'cbw',{}), + t(OS,'cdq',{}), + t(OS,'clc',{}), + t(OS,'cld',{}), + t(OS,'cmc',{}), + t(OS,'cmovcc',{CC,Reg32,RM32}), + t(OS,'cmp',{eax,Imm32}), + t(OS,'cmp',{RM32,Imm32}), + t(OS,'cmp',{RM32,Imm8}), + t(OS,'cmp',{RM32,Reg32}), + t(OS,'cmp',{Reg32,RM32}), + t(OS,'cwde',{}), + t(OS,'dec',{RM32}), + t(OS,'dec',{Reg32}), + t(OS,'div',{RM32}), + t(OS,'enter',{Imm16,{imm8,3}}), + t(OS,'idiv',{RM32}), + t(OS,'imul',{RM32}), + t(OS,'imul',{Reg32,RM32}), + t(OS,'imul',{Reg32,RM32,Imm8}), + t(OS,'imul',{Reg32,RM32,Imm32}), + t(OS,'inc',{RM32}), + t(OS,'inc',{Reg32}), + t(OS,'into',{}), + t(OS,'jcc',{CC,Rel8}), + t(OS,'jcc',{CC,Rel32}), + t(OS,'jecxz',{Rel8}), + t(OS,'jmp',{Rel8}), + t(OS,'jmp',{Rel32}), + t(OS,'jmp',{RM32}), + t(OS,'lea',{Reg32,EA}), + t(OS,'leave',{}), + t(OS,'loop',{Rel8}), + t(OS,'loope',{Rel8}), + t(OS,'loopne',{Rel8}), + t(OS,'mov',{RM8,Reg8}), + t(OS,'mov',{RM16,Reg16}), + t(OS,'mov',{RM32,Reg32}), + t(OS,'mov',{Reg8,RM8}), + t(OS,'mov',{Reg16,RM16}), + t(OS,'mov',{Reg32,RM32}), + t(OS,'mov',{al,Moffs8}), + t(OS,'mov',{ax,Moffs16}), + t(OS,'mov',{eax,Moffs32}), + t(OS,'mov',{Moffs8,al}), + t(OS,'mov',{Moffs16,ax}), + t(OS,'mov',{Moffs32,eax}), + t(OS,'mov',{Reg8,Imm8}), + t(OS,'mov',{Reg16,Imm16}), + t(OS,'mov',{Reg32,Imm32}), + t(OS,'mov',{RM8,Imm8}), + t(OS,'mov',{RM16,Imm16}), + t(OS,'mov',{RM32,Imm32}), + t(OS,'movsx',{Reg16,RM8}), + t(OS,'movsx',{Reg32,RM8}), + t(OS,'movsx',{Reg32,RM16}), + t(OS,'movzx',{Reg16,RM8}), + t(OS,'movzx',{Reg32,RM8}), + t(OS,'movzx',{Reg32,RM16}), + t(OS,'mul',{RM32}), + t(OS,'neg',{RM32}), + t(OS,'nop',{}), + t(OS,'not',{RM32}), + t(OS,'or',{eax,Imm32}), + t(OS,'or',{RM32,Imm32}), + t(OS,'or',{RM32,Imm8}), + t(OS,'or',{RM32,Reg32}), + t(OS,'or',{Reg32,RM32}), + t(OS,'pop',{RM32}), + t(OS,'pop',{Reg32}), + t(OS,'push',{RM32}), + t(OS,'push',{Reg32}), + t(OS,'push',{Imm8}), + t(OS,'push',{Imm32}), + t(OS,'rcl',{RM32,1}), + t(OS,'rcl',{RM32,cl}), + t(OS,'rcl',{RM32,Imm8}), + t(OS,'rcl',{RM16,Imm8}), + t(OS,'rcr',{RM32,1}), + t(OS,'rcr',{RM32,cl}), + t(OS,'rcr',{RM32,Imm8}), + t(OS,'rcr',{RM16,Imm8}), + t(OS,'ret',{}), + t(OS,'ret',{Imm16}), + t(OS,'rol',{RM32,1}), + t(OS,'rol',{RM32,cl}), + t(OS,'rol',{RM32,Imm8}), + t(OS,'rol',{RM16,Imm8}), + t(OS,'ror',{RM32,1}), + t(OS,'ror',{RM32,cl}), + t(OS,'ror',{RM32,Imm8}), + t(OS,'ror',{RM16,Imm8}), + t(OS,'sar',{RM32,1}), + t(OS,'sar',{RM32,cl}), + t(OS,'sar',{RM32,Imm8}), + t(OS,'sar',{RM16,Imm8}), + t(OS,'sbb',{eax,Imm32}), + t(OS,'sbb',{RM32,Imm32}), + t(OS,'sbb',{RM32,Imm8}), + t(OS,'sbb',{RM32,Reg32}), + t(OS,'sbb',{Reg32,RM32}), + t(OS,'setcc',{CC,RM8}), + t(OS,'shl',{RM32,1}), + t(OS,'shl',{RM32,cl}), + t(OS,'shl',{RM32,Imm8}), + t(OS,'shl',{RM16,Imm8}), + t(OS,'shld',{RM32,Reg32,Imm8}), + t(OS,'shld',{RM32,Reg32,cl}), + t(OS,'shr',{RM32,1}), + t(OS,'shr',{RM32,cl}), + t(OS,'shr',{RM32,Imm8}), + t(OS,'shr',{RM16,Imm8}), + t(OS,'shrd',{RM32,Reg32,Imm8}), + t(OS,'shrd',{RM32,Reg32,cl}), + t(OS,'stc',{}), + t(OS,'std',{}), + t(OS,'sub',{eax,Imm32}), + t(OS,'sub',{RM32,Imm32}), + t(OS,'sub',{RM32,Imm8}), + t(OS,'sub',{RM32,Reg32}), + t(OS,'sub',{Reg32,RM32}), + t(OS,'test',{eax,Imm32}), + t(OS,'test',{RM32,Imm32}), + t(OS,'test',{RM32,Reg32}), + t(OS,'xor',{eax,Imm32}), + t(OS,'xor',{RM32,Imm32}), + t(OS,'xor',{RM32,Imm8}), + t(OS,'xor',{RM32,Reg32}), + t(OS,'xor',{Reg32,RM32}), + t(OS,'prefix_fs',{}), t(OS,'add',{{reg32,?EAX},{rm32,rm_mem(ea_disp32(16#20))}}), + []. + +dotest() -> dotest1(group_leader()). % stdout == group_leader + +dotest(File) -> + {ok,OS} = file:open(File, [write]), + dotest1(OS), + file:close(OS). +-endif. diff --git a/lib/hipe/x86/hipe_x86_encode.txt b/lib/hipe/x86/hipe_x86_encode.txt new file mode 100644 index 0000000000..13746e2a47 --- /dev/null +++ b/lib/hipe/x86/hipe_x86_encode.txt @@ -0,0 +1,213 @@ +$Id$ + +hipe_x86_encode USAGE GUIDE +Revision 0.4, 2001-10-09 + +This document describes how to use the hipe_x86_encode.erl module. + +Preliminaries +------------- +This is not a tutorial on the x86 architecture. The reader +should be familiar with both the programming model and +the general syntax of instructions and their operands. + +The hipe_x86_encode module follows the conventions in the +"Intel Architecture Software Developer's Manual, Volume 2: +Instruction Set Reference" document. In particular, the +order of source and destination operands in instructions +follows Intel's conventions: "add eax,edx" adds edx to eax. +The GNU Assembler "gas" follows the so-called AT&T syntax +which reverses the order of the source and destination operands. + +Basic Functionality +------------------- +The hipe_x86_encode module implements the mapping from symbolic x86 +instructions to their binary representation, as lists of bytes. + +Instructions and operands have to match actual x86 instructions +and operands exactly. The mapping from "abstract" instructions +to correct x86 instructions has to be done before the instructions +are passed to the hipe_x86_encode module. (In HiPE, this mapping +is done by the hipe_x86_assemble module.) + +The hipe_x86_encode module handles arithmetic operations on 32-bit +integers, data movement of 8, 16, and 32-bit words, and most +control flow operations. A 32-bit address and operand size process +mode is assumed, which is what Unix and Linux systems use. + +Operations and registers related to floating-point, MMX, SIMD, 3dnow!, +or operating system control are not implemented. Segment registers +are supported minimally: a 'prefix_fs' pseudo-instruction can be +used to insert an FS segment register override prefix. + +Instruction Syntax +------------------ +The function hipe_x86_encode:insn_encode/1 takes an instruction in +symbolic form and translates it to its binary representation, +as a list of bytes. + +Symbolic instructions are Erlang terms in the following syntax: + + Insn ::= {Op,Opnds} + Op ::= (an Erlang atom) + Opnds ::= {Opnd1,...,Opndn} (n >= 0) + Opnd ::= eax | ax | al | 1 | cl + | {imm32,Imm32} | {imm16,Imm16} | {imm8,Imm8} + | {rm32,RM32} | {rm16,RM16} | {rm8,RM8} + | {rel32,Rel32} | {rel8,Rel8} + | {moffs32,Moffs32} | {moffs16,Moffs16} | {moffs8,Moffs8} + | {cc,CC} + | {reg32,Reg32} | {reg16,Reg16} | {reg8,Reg8} + | {ea,EA} + Imm32 ::= (a 32-bit integer; immediate value) + Imm16 ::= (a 16-bit integer; immediate value) + Imm8 ::= (an 8-bit integer; immediate value) + Rel32 ::= (a 32-bit integer; jump offset) + Rel8 ::= (an 8-bit integer; jump offset) + Moffs32 ::= (a 32-bit integer; address of 32-bit word) + Moffs16 ::= (a 32-bit integer; address of 16-bit word) + Moffs8 ::= (a 32-bit integer; address of 8-bit word) + CC ::= (a 4-bit condition code) + Reg32 ::= (a 3-bit register number of a 32-bit register) + Reg16 ::= (same as Reg32, but the register size is 16 bits) + Reg8 ::= (a 3-bit register number of an 8-bit register) + EA ::= (general operand; a memory cell) + RM32 ::= (general operand; a 32-bit register or memory cell) + RM16 ::= (same as RM32, but the operand size is 16 bits) + RM8 ::= (general operand; an 8-bit register or memory cell) + +To construct these terms, the hipe_x86_encode module exports several +helper functions: + +cc/1 + Converts an atom to a 4-bit condition code. + +al/0, cl/0, dl/0, bl/0, ah/0, ch/0, dh/0, bh/0 + Returns a 3-bit register number for an 8-bit register. + +eax/0, ecx/0, edx/0, ebx/0, esp/0, ebp/0, esi/0, edi/0 + Returns a 3-bit register number for a 32- or 16-bit register. + +A general operand can be a register or a memory operand. +An x86 memory operand is expressed as an "effective address": + + Displacement(Base register,Index register,Scale) +or + [base register] + [(index register) * (scale)] + [displacement] + +where the base register is any of the 8 integer registers, +the index register in any of the 8 integer registers except ESP, +scale is 0, 1, 2, or 3 (multiply index with 1, 2, 4, or 8), +and displacement is an 8- or 32-bit offset. +Most components are optional. + +An effective address is constructed by calling one of the following +nine functions: + +ea_base/1 + ea_base(Reg32), where Reg32 is not ESP or EBP, + constructs the EA "(Reg32)", i.e. Reg32. +ea_disp32/1 + ea_disp32(Disp32) construct the EA "Disp32" +ea_disp32_base/2 + ea_disp32(Disp32, Reg32), where Reg32 is not ESP, + constructs the EA "Disp32(Reg32)", i.e. Reg32+Disp32. +ea_disp8_base/2 + This is like ea_disp32_base/2, except the displacement + is 8 bits instead of 32 bits. The CPU will _sign-extend_ + the 8-bit displacement to 32 bits before using it. +ea_disp32_sindex/1 + ea_disp32_sindex(Disp32) constructs the EA "Disp32", + but uses a longer encoding than ea_disp32/1. + Hint: Don't use this one. + +The last four forms use index registers with or without scaling +factors and base registers, so-called "SIBs". To build these, call: + +sindex/2 + sindex(Scale, Index), where scale is 0, 1, 2, or 3, and + Index is a 32-bit integer register except ESP, constructs + part of a SIB representing "Index * 2^Scale". +sib/1 + sib(Reg32) constructs a SIB containing only a base register + and no scaled index, "(Reg32)", i.e. "Reg32". +sib/2 + sib(Reg32, sindex(Scale, Index)) constructs a SIB + "(Reg32,Index,Scale)", i.e. "Reg32 + (Index * 2^Scale)". + +ea_sib/1 + ea_sib(SIB), where SIB's base register is not EBP, + constructs an EA which is that SIB, i.e. "(Base)" or + "(Base,Index,Scale)". +ea_disp32_sib/2 + ea_disp32_sib(Disp32, SIB) constructs the EA "Disp32(SIB)", + i.e. "Base+Disp32" or "Base+(Index*2^Scale)+Disp32". +ea_disp32_sindex/2 + ea_disp32_sindex(Disp32, Sindex) constructs the EA + "Disp32(,Index,Scale)", i.e. "(Index*2^Scale)+Disp32". +ea_disp8_sib/2 + This is just like ea_disp32_sib/2, except the displacement + is 8 bits (with sign-extension). + +To construct a general operand, call one of these two functions: + +rm_reg/1 + rm_reg(Reg) constructs a general operand which is that register. +rm_mem/1 + rm_mem(EA) constucts a general operand which is the memory + cell addressed by EA. + +A symbolic instruction with name "Op" and the n operands "Opnd1" +to "Opndn" is represented as the tuple + + {Op, {Opnd1, ..., Opndn}} + +Usage +----- +Once a symbolic instruction "Insn" has been constructed, it can be +translated to binary by calling + + insn_encode(Insn) + +which returns a list of bytes. + +Since x86 instructions have varying size (as opposed to most +RISC machines), there is also a function + + insn_sizeof(Insn) + +which returns the number of bytes the binary encoding will occupy. +insn_sizeof(Insn) equals length(insn_encode(Insn)), but insn_sizeof +is cheaper to compute. This is useful for two purposes: (1) when +compiling to memory, one needs to know in advance how many bytes of +memory to allocate for a piece of code, and (2) when computing the +relative distance between a jump or call instruction and its target +label. + +Examples +-------- +1. nop +is constructed as + {nop, {}} + +2. add eax,edx (eax := eax + edx) +can be constructed as + {add, {eax, {reg32, hipe_x86_encode:edx()}}} +or as + Reg32 = {reg32, hipe_x86_encode:eax()}, + RM32 = {rm32, hipe_x86_encode:rm_reg(hipe_x86_encode:edx())}, + {add, {Reg32, RM32}} + +3. mov edx,(eax) (edx := MEM[eax]) +is constructed as + Reg32 = {reg32, hipe_x86_encode:edx()}, + RM32 = {rm32, hipe_x86_encode:rm_reg(hipe_x86_encode:eax())}, + {mov, {Reg32, RM32}} + +Addendum +-------- +The hipe_x86_encode.erl source code is the authoritative reference +for the hipe_x86_encode module. + +Please report errors in either hipe_x86_encode.erl or this guide +to mikpe@it.uu.se. diff --git a/lib/hipe/x86/hipe_x86_frame.erl b/lib/hipe/x86/hipe_x86_frame.erl new file mode 100644 index 0000000000..0a3317a369 --- /dev/null +++ b/lib/hipe/x86/hipe_x86_frame.erl @@ -0,0 +1,687 @@ +%%% -*- erlang-indent-level: 2 -*- +%%% +%%% %CopyrightBegin% +%%% +%%% Copyright Ericsson AB 2001-2009. All Rights Reserved. +%%% +%%% The contents of this file are subject to the Erlang Public License, +%%% Version 1.1, (the "License"); you may not use this file except in +%%% compliance with the License. You should have received a copy of the +%%% Erlang Public License along with this software. If not, it can be +%%% retrieved online at http://www.erlang.org/. +%%% +%%% Software distributed under the License is distributed on an "AS IS" +%%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%%% the License for the specific language governing rights and limitations +%%% under the License. +%%% +%%% %CopyrightEnd% +%%% +%%% x86 stack frame handling +%%% +%%% - map non-register temps to stack slots +%%% - add explicit stack management code to prologue and epilogue, +%%% and at calls and tailcalls +%%% +%%% TODO: +%%% - Compute max stack in a pre-pass? (get rid of ref cell updates) +%%% - Merge all_temps and defun_minframe to a single +%%% pass, for compile-time efficiency reasons. + +-ifdef(HIPE_AMD64). +-define(HIPE_X86_FRAME, hipe_amd64_frame). +-define(HIPE_X86_REGISTERS, hipe_amd64_registers). +-define(HIPE_X86_LIVENESS, hipe_amd64_liveness). +-define(LEAF_WORDS, ?AMD64_LEAF_WORDS). +-else. +-define(HIPE_X86_FRAME, hipe_x86_frame). +-define(HIPE_X86_REGISTERS, hipe_x86_registers). +-define(HIPE_X86_LIVENESS, hipe_x86_liveness). +-define(LEAF_WORDS, ?X86_LEAF_WORDS). +-endif. + +-module(?HIPE_X86_FRAME). +-export([frame/2]). +-include("../x86/hipe_x86.hrl"). +-include("../rtl/hipe_literals.hrl"). + +frame(Defun, _Options) -> + Formals = fix_formals(hipe_x86:defun_formals(Defun)), + Temps0 = all_temps(hipe_x86:defun_code(Defun), Formals), + MinFrame = defun_minframe(Defun), + Temps = ensure_minframe(MinFrame, Temps0), + CFG0 = hipe_x86_cfg:init(Defun), + Liveness = ?HIPE_X86_LIVENESS:analyse(CFG0), + CFG1 = do_body(CFG0, Liveness, Formals, Temps), + hipe_x86_cfg:linearise(CFG1). + +fix_formals(Formals) -> + fix_formals(?HIPE_X86_REGISTERS:nr_args(), Formals). + +fix_formals(0, Rest) -> Rest; +fix_formals(N, [_|Rest]) -> fix_formals(N-1, Rest); +fix_formals(_, []) -> []. + +do_body(CFG0, Liveness, Formals, Temps) -> + Context = mk_context(Liveness, Formals, Temps), + CFG1 = do_blocks(CFG0, Context), + do_prologue(CFG1, Context). + +do_blocks(CFG, Context) -> + Labels = hipe_x86_cfg:labels(CFG), + do_blocks(Labels, CFG, Context). + +do_blocks([Label|Labels], CFG, Context) -> + Liveness = context_liveness(Context), + LiveOut = ?HIPE_X86_LIVENESS:liveout(Liveness, Label), + Block = hipe_x86_cfg:bb(CFG, Label), + Code = hipe_bb:code(Block), + NewCode = do_block(Code, LiveOut, Context), + NewBlock = hipe_bb:code_update(Block, NewCode), + NewCFG = hipe_x86_cfg:bb_add(CFG, Label, NewBlock), + do_blocks(Labels, NewCFG, Context); +do_blocks([], CFG, _) -> + CFG. + +do_block(Insns, LiveOut, Context) -> + do_block(Insns, LiveOut, Context, context_framesize(Context), []). + +do_block([I|Insns], LiveOut, Context, FPoff0, RevCode) -> + {NewIs, FPoff1} = do_insn(I, LiveOut, Context, FPoff0), + do_block(Insns, LiveOut, Context, FPoff1, lists:reverse(NewIs, RevCode)); +do_block([], _, Context, FPoff, RevCode) -> + FPoff0 = context_framesize(Context), + if FPoff =:= FPoff0 -> []; + true -> exit({?MODULE,do_block,FPoff}) + end, + lists:reverse(RevCode, []). + +do_insn(I, LiveOut, Context, FPoff) -> + case I of + #alu{} -> + {[do_alu(I, Context, FPoff)], FPoff}; + #cmp{} -> + {[do_cmp(I, Context, FPoff)], FPoff}; + #fp_unop{} -> + {do_fp_unop(I, Context, FPoff), FPoff}; + #fp_binop{} -> + {do_fp_binop(I, Context, FPoff), FPoff}; + #fmove{} -> + {[do_fmove(I, Context, FPoff)], FPoff}; + #imul{} -> + {[do_imul(I, Context, FPoff)], FPoff}; + #move{} -> + {[do_move(I, Context, FPoff)], FPoff}; + #movsx{} -> + {[do_movsx(I, Context, FPoff)], FPoff}; + #movzx{} -> + {[do_movzx(I, Context, FPoff)], FPoff}; + #pseudo_call{} -> + do_pseudo_call(I, LiveOut, Context, FPoff); + #pseudo_tailcall{} -> + {do_pseudo_tailcall(I, Context), context_framesize(Context)}; + #push{} -> + {[do_push(I, Context, FPoff)], FPoff+word_size()}; + #ret{} -> + {do_ret(I, Context, FPoff), context_framesize(Context)}; + #shift{} -> + {[do_shift(I, Context, FPoff)], FPoff}; + _ -> % comment, jmp, label, pseudo_jcc, pseudo_tailcall_prepare + {[I], FPoff} + end. + +%%% +%%% Convert any pseudo-temp operand in a binary (alu, cmp, move) +%%% or unary (push) instruction to an explicit x86_mem operand. +%%% + +do_alu(I, Context, FPoff) -> + #alu{src=Src0,dst=Dst0} = I, + Src = conv_opnd(Src0, FPoff, Context), + Dst = conv_opnd(Dst0, FPoff, Context), + I#alu{src=Src,dst=Dst}. + +do_cmp(I, Context, FPoff) -> + #cmp{src=Src0,dst=Dst0} = I, + Src = conv_opnd(Src0, FPoff, Context), + Dst = conv_opnd(Dst0, FPoff, Context), + I#cmp{src=Src,dst=Dst}. + +do_fp_unop(I, Context, FPoff) -> + #fp_unop{arg=Arg0} = I, + Arg = conv_opnd(Arg0, FPoff, Context), + [I#fp_unop{arg=Arg}]. + +do_fp_binop(I, Context, FPoff) -> + #fp_binop{src=Src0,dst=Dst0} = I, + Src = conv_opnd(Src0, FPoff, Context), + Dst = conv_opnd(Dst0, FPoff, Context), + [I#fp_binop{src=Src,dst=Dst}]. + +do_fmove(I, Context, FPoff) -> + #fmove{src=Src0,dst=Dst0} = I, + Src = conv_opnd(Src0, FPoff, Context), + Dst = conv_opnd(Dst0, FPoff, Context), + I#fmove{src=Src,dst=Dst}. + +do_imul(I, Context, FPoff) -> + #imul{src=Src0} = I, + Src = conv_opnd(Src0, FPoff, Context), + I#imul{src=Src}. + +do_move(I, Context, FPoff) -> + #move{src=Src0,dst=Dst0} = I, + Src = conv_opnd(Src0, FPoff, Context), + Dst = conv_opnd(Dst0, FPoff, Context), + I#move{src=Src,dst=Dst}. + +do_movsx(I, Context, FPoff) -> + #movsx{src=Src0,dst=Dst0} = I, + Src = conv_opnd(Src0, FPoff, Context), + Dst = conv_opnd(Dst0, FPoff, Context), + I#movsx{src=Src,dst=Dst}. + +do_movzx(I, Context, FPoff) -> + #movzx{src=Src0,dst=Dst0} = I, + Src = conv_opnd(Src0, FPoff, Context), + Dst = conv_opnd(Dst0, FPoff, Context), + I#movzx{src=Src,dst=Dst}. + +do_push(I, Context, FPoff) -> + #push{src=Src0} = I, + Src = conv_opnd(Src0, FPoff, Context), + I#push{src=Src}. + +do_shift(I, Context, FPoff) -> + #shift{src=Src0,dst=Dst0} = I, + Src = conv_opnd(Src0, FPoff, Context), + Dst = conv_opnd(Dst0, FPoff, Context), + I#shift{src=Src,dst=Dst}. + +conv_opnd(Opnd, FPoff, Context) -> + case opnd_is_pseudo(Opnd) of + false -> + Opnd; + true -> + conv_pseudo(Opnd, FPoff, Context) + end. + +conv_pseudo(Temp, FPoff, Context) -> + Off = FPoff + context_offset(Context, Temp), + conv_pseudo(Temp, Off). + +conv_pseudo(Temp, Off) -> + hipe_x86:mk_mem(mk_sp(), hipe_x86:mk_imm(Off), hipe_x86:temp_type(Temp)). + +%%% +%%% Return - deallocate frame and emit 'ret $N' insn. +%%% + +do_ret(_I, Context, FPoff) -> + %% XXX: this conses up a new ret insn, ignoring the one rtl->x86 made + adjust_sp(FPoff, [hipe_x86:mk_ret(word_size()*context_arity(Context))]). + +adjust_sp(N, Rest) -> + if N =:= 0 -> + Rest; + true -> + [hipe_x86:mk_alu('add', hipe_x86:mk_imm(N), mk_sp()) | Rest] + end. + +%%% +%%% Recursive calls. +%%% + +do_pseudo_call(I, LiveOut, Context, FPoff0) -> + #x86_sdesc{exnlab=ExnLab,arity=OrigArity} = hipe_x86:pseudo_call_sdesc(I), + Fun0 = hipe_x86:pseudo_call_fun(I), + Fun1 = conv_opnd(Fun0, FPoff0, Context), + LiveTemps = [Temp || Temp <- LiveOut, temp_is_pseudo(Temp)], + SDesc = mk_sdesc(ExnLab, Context, LiveTemps), + ContLab = hipe_x86:pseudo_call_contlab(I), + Linkage = hipe_x86:pseudo_call_linkage(I), + CallCode = [hipe_x86:mk_pseudo_call(Fun1, SDesc, ContLab, Linkage)], + %% +word_size() for our RA and +word_size() for callee's RA should + %% it need to call inc_stack + StkArity = erlang:max(0, OrigArity - ?HIPE_X86_REGISTERS:nr_args()), + context_need_stack(Context, stack_need(FPoff0 + 2*word_size(), StkArity, Fun1)), + ArgsBytes = word_size() * StkArity, + {CallCode, FPoff0 - ArgsBytes}. + +stack_need(FPoff, StkArity, Fun) -> + case Fun of + #x86_prim{} -> FPoff; + #x86_mfa{m=M,f=F,a=A} -> + case erlang:is_builtin(M, F, A) of + true -> FPoff; + false -> stack_need_general(FPoff, StkArity) + end; + #x86_temp{} -> stack_need_general(FPoff, StkArity); + #x86_mem{} -> stack_need_general(FPoff, StkArity) + end. + +stack_need_general(FPoff, StkArity) -> + erlang:max(FPoff, FPoff + (?LEAF_WORDS - 2 - StkArity) * word_size()). + +%%% +%%% Create stack descriptors for call sites. +%%% + +mk_sdesc(ExnLab, Context, Temps) -> % for normal calls + Temps0 = only_tagged(Temps), + Live = mk_live(Context, Temps0), + Arity = context_arity(Context), + FSize = context_framesize(Context), + hipe_x86:mk_sdesc(ExnLab, FSize div word_size(), Arity, + list_to_tuple(Live)). + +only_tagged(Temps)-> + [X || X <- Temps, hipe_x86:temp_type(X) =:= 'tagged']. + +mk_live(Context, Temps) -> + lists:sort([temp_to_slot(Context, Temp) || Temp <- Temps]). + +temp_to_slot(Context, Temp) -> + (context_framesize(Context) + context_offset(Context, Temp)) + div word_size(). + +mk_minimal_sdesc(Context) -> % for inc_stack_0 calls + hipe_x86:mk_sdesc([], 0, context_arity(Context), {}). + +%%% +%%% Tailcalls. +%%% + +do_pseudo_tailcall(I, Context) -> % always at FPoff=context_framesize(Context) + Arity = context_arity(Context), + Args = hipe_x86:pseudo_tailcall_stkargs(I) ++ [context_ra(Context)], + Fun0 = hipe_x86:pseudo_tailcall_fun(I), + {Insns, FPoff1, Fun1} = do_tailcall_args(Args, Context, Fun0), + context_need_stack(Context, FPoff1), + FPoff2 = FPoff1 + word_size()+word_size()*Arity - word_size()*length(Args), + %% +word_size() for callee's inc_stack RA + StkArity = length(hipe_x86:pseudo_tailcall_stkargs(I)), + context_need_stack(Context, stack_need(FPoff2 + word_size(), StkArity, Fun1)), + I2 = hipe_x86:mk_jmp_fun(Fun1, hipe_x86:pseudo_tailcall_linkage(I)), + Insns ++ adjust_sp(FPoff2, [I2]). + +do_tailcall_args(Args, Context, Fun0) -> + FPoff0 = context_framesize(Context), + Arity = context_arity(Context), + FrameTop = word_size() + word_size()*Arity, + DangerOff = FrameTop - word_size()*length(Args), + Moves = mk_moves(Args, FrameTop, []), + {Stores, Simple, Conflict} = + split_moves(Moves, Context, DangerOff, [], [], []), + %% sanity check (shouldn't trigger any more) + if DangerOff < -FPoff0 -> + exit({?MODULE,do_tailcall_args,DangerOff,-FPoff0}); + true -> [] + end, + FPoff1 = FPoff0, + %% + {Pushes, MoreSimple, FPoff2} = split_conflict(Conflict, FPoff1, [], []), + %% + {PushFun0, FPoff3, LoadFun1, Fun1} = + case opnd_is_pseudo(Fun0) of + false -> + {[], FPoff2, [], Fun0}; + true -> + Type = hipe_x86:temp_type(Fun0), + Temp1 = mk_temp1(Type), + Fun0Off = context_offset(Context, Fun0), + MEM0 = conv_pseudo(Fun0, FPoff2 + Fun0Off), + if Fun0Off >= DangerOff -> + Fun1Off = hipe_x86:mk_imm(0), + MEM1 = hipe_x86:mk_mem(mk_sp(), Fun1Off, Type), + {[hipe_x86:mk_push(MEM0)], + FPoff2 + word_size(), + [hipe_x86:mk_move(MEM1, Temp1)], + Temp1}; + true -> + {[], FPoff2, [hipe_x86:mk_move(MEM0, Temp1)], Temp1} + end + end, + %% + RegTemp0 = ?HIPE_X86_REGISTERS:temp0(), + TempReg = + case hipe_x86:is_temp(Fun1) of + true -> + RegFun1 = hipe_x86:temp_reg(Fun1), + if RegFun1 =/= RegTemp0 -> RegTemp0; + true -> ?HIPE_X86_REGISTERS:temp1() + end; + false -> + RegTemp0 + end, + %% + {Pushes ++ PushFun0 ++ + store_moves(Stores, FPoff3, LoadFun1 ++ + simple_moves(Simple, FPoff3, TempReg, + simple_moves(MoreSimple, FPoff3, TempReg, + []))), + FPoff3, Fun1}. + +mk_moves([Arg|Args], Off, Moves) -> + Off1 = Off - word_size(), + mk_moves(Args, Off1, [{Arg,Off1}|Moves]); +mk_moves([], _, Moves) -> + Moves. + +split_moves([Move|Moves], Context, DangerOff, Stores, Simple, Conflict) -> + {Src,DstOff} = Move, + case src_is_pseudo(Src) of + false -> + split_moves(Moves, Context, DangerOff, [Move|Stores], + Simple, Conflict); + true -> + SrcOff = context_offset(Context, Src), + Type = typeof_src(Src), + if SrcOff =:= DstOff -> + split_moves(Moves, Context, DangerOff, Stores, + Simple, Conflict); + SrcOff >= DangerOff -> + split_moves(Moves, Context, DangerOff, Stores, + Simple, [{SrcOff,DstOff,Type}|Conflict]); + true -> + split_moves(Moves, Context, DangerOff, Stores, + [{SrcOff,DstOff,Type}|Simple], Conflict) + end + end; +split_moves([], _, _, Stores, Simple, Conflict) -> + {Stores, Simple, Conflict}. + +split_conflict([{SrcOff,DstOff,Type}|Conflict], FPoff, Pushes, Simple) -> + Push = hipe_x86:mk_push( + hipe_x86:mk_mem(mk_sp(), hipe_x86:mk_imm(FPoff+SrcOff), Type)), + split_conflict(Conflict, FPoff+word_size(), [Push|Pushes], + [{-(FPoff+word_size()),DstOff,Type}|Simple]); +split_conflict([], FPoff, Pushes, Simple) -> + {lists:reverse(Pushes), Simple, FPoff}. + +simple_moves([{SrcOff,DstOff,Type}|Moves], FPoff, TempReg, Rest) -> + Temp = hipe_x86:mk_temp(TempReg, Type), + SP = mk_sp(), + LoadOff = hipe_x86:mk_imm(FPoff+SrcOff), + LD = hipe_x86:mk_move(hipe_x86:mk_mem(SP, LoadOff, Type), Temp), + StoreOff = hipe_x86:mk_imm(FPoff+DstOff), + ST = hipe_x86:mk_move(Temp, hipe_x86:mk_mem(SP, StoreOff, Type)), + simple_moves(Moves, FPoff, TempReg, [LD, ST | Rest]); +simple_moves([], _, _, Rest) -> + Rest. + +store_moves([{Src,DstOff}|Moves], FPoff, Rest) -> + Type = typeof_src(Src), + SP = mk_sp(), + StoreOff = hipe_x86:mk_imm(FPoff+DstOff), + ST = hipe_x86:mk_move(Src, hipe_x86:mk_mem(SP, StoreOff, Type)), + store_moves(Moves, FPoff, [ST | Rest]); +store_moves([], _, Rest) -> + Rest. + +%%% +%%% Contexts +%%% + +-record(context, {liveness, framesize, arity, map, ra, ref_maxstack}). + +mk_context(Liveness, Formals, Temps) -> + RA = hipe_x86:mk_new_temp('untagged'), + {Map, MinOff} = mk_temp_map(Formals, RA, Temps), + FrameSize = (-MinOff), + RefMaxStack = hipe_bifs:ref(FrameSize), + Context = #context{liveness=Liveness, + framesize=FrameSize, arity=length(Formals), + map=Map, ra=RA, ref_maxstack=RefMaxStack}, + Context. + +context_need_stack(#context{ref_maxstack=RM}, N) -> + M = hipe_bifs:ref_get(RM), + if N > M -> hipe_bifs:ref_set(RM, N); + true -> [] + end. + +context_maxstack(#context{ref_maxstack=RM}) -> + hipe_bifs:ref_get(RM). + +context_arity(#context{arity=Arity}) -> + Arity. + +context_framesize(#context{framesize=FrameSize}) -> + FrameSize. + +context_liveness(#context{liveness=Liveness}) -> + Liveness. + +context_offset(#context{map=Map}, Temp) -> + tmap_lookup(Map, Temp). + +context_ra(#context{ra=RA}) -> + RA. + +mk_temp_map(Formals, RA, Temps) -> + {Map, _} = enter_vars(Formals, word_size() * (length(Formals)+1), + tmap_bind(tmap_empty(), RA, 0)), + enter_vars(tset_to_list(Temps), 0, Map). + +enter_vars([V|Vs], PrevOff, Map) -> + Off = + case hipe_x86:temp_type(V) of + 'double' -> PrevOff - float_size(); + _ -> PrevOff - word_size() + end, + enter_vars(Vs, Off, tmap_bind(Map, V, Off)); +enter_vars([], Off, Map) -> + {Map, Off}. + +tmap_empty() -> + gb_trees:empty(). + +tmap_bind(Map, Key, Val) -> + gb_trees:insert(Key, Val, Map). + +tmap_lookup(Map, Key) -> + gb_trees:get(Key, Map). + +%%% +%%% do_prologue: prepend stack frame allocation code. +%%% +%%% NewStart: +%%% temp0 = sp - MaxStack +%%% if( temp0 < SP_LIMIT(P) ) goto IncStack else goto AllocFrame +%%% AllocFrame: +%%% sp -= FrameSize +%%% goto OldStart +%%% OldStart: +%%% ... +%%% IncStack: +%%% call inc_stack +%%% goto NewStart + +do_prologue(CFG, Context) -> + do_check_stack(do_alloc_frame(CFG, Context), Context). + +do_alloc_frame(CFG, Context) -> + case context_framesize(Context) of + 0 -> + CFG; + FrameSize -> + OldStartLab = hipe_x86_cfg:start_label(CFG), + AllocFrameLab = hipe_gensym:get_next_label(x86), + SP = mk_sp(), + AllocFrameCode = + [hipe_x86:mk_alu('sub', hipe_x86:mk_imm(FrameSize), SP), + hipe_x86:mk_jmp_label(OldStartLab)], + CFG1 = hipe_x86_cfg:bb_add(CFG, AllocFrameLab, + hipe_bb:mk_bb(AllocFrameCode)), + hipe_x86_cfg:start_label_update(CFG1, AllocFrameLab) + end. + +do_check_stack(CFG, Context) -> + MaxStack = context_maxstack(Context), + Arity = context_arity(Context), + Guaranteed = erlang:max(0, (?LEAF_WORDS - 1 - Arity) * word_size()), + if MaxStack =< Guaranteed -> + %% io:format("~w: MaxStack ~w =< Guaranteed ~w :-)\n", [?MODULE,MaxStack,Guaranteed]), + CFG; + true -> + %% io:format("~w: MaxStack ~w > Guaranteed ~w :-(\n", [?MODULE,MaxStack,Guaranteed]), + AllocFrameLab = hipe_x86_cfg:start_label(CFG), + NewStartLab = hipe_gensym:get_next_label(x86), + IncStackLab = hipe_gensym:get_next_label(x86), + %% + Type = 'untagged', + Preg = ?HIPE_X86_REGISTERS:proc_pointer(), + Pbase = hipe_x86:mk_temp(Preg, Type), + SP_LIMIT_OFF = hipe_x86:mk_imm( + ?HIPE_X86_REGISTERS:sp_limit_offset()), + Temp0 = mk_temp0(Type), + SP = mk_sp(), + NewStartCode = + %% hopefully this lea is faster than the mov;sub it replaced + [hipe_x86:mk_lea( + hipe_x86:mk_mem(SP, hipe_x86:mk_imm(-MaxStack), 'untagged'), + Temp0), + hipe_x86:mk_cmp( + hipe_x86:mk_mem(Pbase, SP_LIMIT_OFF, Type), Temp0), + hipe_x86:mk_pseudo_jcc('b', IncStackLab, AllocFrameLab, 0.01)], + IncStackCode = + [hipe_x86:mk_call(hipe_x86:mk_prim('inc_stack_0'), + mk_minimal_sdesc(Context), not_remote), + hipe_x86:mk_jmp_label(NewStartLab)], + %% + CFG1 = hipe_x86_cfg:bb_add(CFG, NewStartLab, + hipe_bb:mk_bb(NewStartCode)), + CFG2 = hipe_x86_cfg:bb_add(CFG1, IncStackLab, + hipe_bb:mk_bb(IncStackCode)), + hipe_x86_cfg:start_label_update(CFG2, NewStartLab) + end. + +%%% typeof_src -- what's src's type? + +typeof_src(Src) -> + case Src of + #x86_imm{} -> + 'untagged'; + #x86_temp{} -> + hipe_x86:temp_type(Src); + #x86_mem{} -> + hipe_x86:mem_type(Src) + end. + +%%% Cons up an '%sp' Temp. + +mk_sp() -> + hipe_x86:mk_temp(?HIPE_X86_REGISTERS:sp(), 'untagged'). + +%%% Cons up a '%temp0' Temp. + +mk_temp0(Type) -> + hipe_x86:mk_temp(?HIPE_X86_REGISTERS:temp0(), Type). + +%%% Cons up a '%temp1' Temp. + +mk_temp1(Type) -> + hipe_x86:mk_temp(?HIPE_X86_REGISTERS:temp1(), Type). + +%%% Check if an operand is a pseudo-Temp. + +src_is_pseudo(Src) -> + opnd_is_pseudo(Src). + +opnd_is_pseudo(Opnd) -> + case hipe_x86:is_temp(Opnd) of + true -> temp_is_pseudo(Opnd); + false -> false + end. + +temp_is_pseudo(Temp) -> + case hipe_x86:is_temp(Temp) of + true -> + not(?HIPE_X86_REGISTERS:is_precoloured(hipe_x86:temp_reg(Temp))); + false -> + false + end. + + +%%% +%%% Build the set of all temps used in a Defun's body. +%%% + +all_temps(Code, Formals) -> + S0 = find_temps(Code, tset_empty()), + S1 = tset_del_list(S0, Formals), + S2 = tset_filter(S1, fun(T) -> temp_is_pseudo(T) end), + S2. + +find_temps([I|Insns], S0) -> + S1 = tset_add_list(S0, hipe_x86_defuse:insn_def(I)), + S2 = tset_add_list(S1, hipe_x86_defuse:insn_use(I)), + find_temps(Insns, S2); +find_temps([], S) -> + S. + +tset_empty() -> + gb_sets:new(). + +tset_size(S) -> + gb_sets:size(S). + +tset_insert(S, T) -> + gb_sets:add_element(T, S). + +tset_add_list(S, Ts) -> + gb_sets:union(S, gb_sets:from_list(Ts)). + +tset_del_list(S, Ts) -> + gb_sets:subtract(S, gb_sets:from_list(Ts)). + +tset_filter(S, F) -> + gb_sets:filter(F, S). + +tset_to_list(S) -> + gb_sets:to_list(S). + +%%% +%%% Compute minimum permissible frame size, ignoring spilled temps. +%%% This is done to ensure that we won't have to adjust the frame size +%%% in the middle of a tailcall. +%%% + +defun_minframe(Defun) -> + MaxTailArity = body_mta(hipe_x86:defun_code(Defun), 0), + MyArity = length(fix_formals(hipe_x86:defun_formals(Defun))), + erlang:max(MaxTailArity - MyArity, 0). + +body_mta([I|Code], MTA) -> + body_mta(Code, insn_mta(I, MTA)); +body_mta([], MTA) -> + MTA. + +insn_mta(I, MTA) -> + case I of + #pseudo_tailcall{arity=Arity} -> + erlang:max(MTA, Arity - ?HIPE_X86_REGISTERS:nr_args()); + _ -> MTA + end. + +%%% +%%% Ensure that we have enough temps to satisfy the minimum frame size, +%%% if necessary by prepending unused dummy temps. +%%% + +ensure_minframe(MinFrame, Temps) -> + ensure_minframe(MinFrame, tset_size(Temps), Temps). + +ensure_minframe(MinFrame, Frame, Temps) -> + if MinFrame > Frame -> + Temp = hipe_x86:mk_new_temp('untagged'), + ensure_minframe(MinFrame, Frame+1, tset_insert(Temps, Temp)); + true -> Temps + end. + +word_size() -> + ?HIPE_X86_REGISTERS:wordsize(). + +float_size() -> + ?HIPE_X86_REGISTERS:float_size(). diff --git a/lib/hipe/x86/hipe_x86_liveness.erl b/lib/hipe/x86/hipe_x86_liveness.erl new file mode 100644 index 0000000000..6874b05a59 --- /dev/null +++ b/lib/hipe/x86/hipe_x86_liveness.erl @@ -0,0 +1,57 @@ +%%% -*- erlang-indent-level: 2 -*- +%%% +%%% %CopyrightBegin% +%%% +%%% Copyright Ericsson AB 2001-2009. All Rights Reserved. +%%% +%%% The contents of this file are subject to the Erlang Public License, +%%% Version 1.1, (the "License"); you may not use this file except in +%%% compliance with the License. You should have received a copy of the +%%% Erlang Public License along with this software. If not, it can be +%%% retrieved online at http://www.erlang.org/. +%%% +%%% Software distributed under the License is distributed on an "AS IS" +%%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%%% the License for the specific language governing rights and limitations +%%% under the License. +%%% +%%% %CopyrightEnd% +%%% +%%% x86_liveness -- compute register liveness for x86 CFGs + +-ifdef(HIPE_AMD64). +-define(HIPE_X86_LIVENESS, hipe_amd64_liveness). +-define(HIPE_X86_DEFUSE, hipe_amd64_defuse). +-define(HIPE_X86_REGISTERS, hipe_amd64_registers). +-else. +-define(HIPE_X86_LIVENESS, hipe_x86_liveness). +-define(HIPE_X86_DEFUSE, hipe_x86_defuse). +-define(HIPE_X86_REGISTERS, hipe_x86_registers). +-endif. + +-module(?HIPE_X86_LIVENESS). + +-export([analyse/1]). +-export([liveout/2]). +-export([uses/1, defines/1]). % used in hipe_*_spill_restore modules + +-include("../x86/hipe_x86.hrl"). % ../x86/ is needed when included in amd64 +-include("../flow/liveness.inc"). + +analyse(CFG) -> analyze(CFG). +cfg_bb(CFG, L) -> hipe_x86_cfg:bb(CFG, L). +cfg_postorder(CFG) -> hipe_x86_cfg:postorder(CFG). +cfg_succ(CFG, L) -> hipe_x86_cfg:succ(CFG, L). +uses(Insn) -> ?HIPE_X86_DEFUSE:insn_use(Insn). +defines(Insn) -> ?HIPE_X86_DEFUSE:insn_def(Insn). +liveout_no_succ() -> + ordsets:from_list(lists:map(fun({Reg,Type}) -> + hipe_x86:mk_temp(Reg, Type) + end, + ?HIPE_X86_REGISTERS:live_at_return())). + +-ifdef(DEBUG_LIVENESS). +cfg_labels(CFG) -> hipe_x86_cfg:labels(CFG). +cfg_bb_add(CFG,L,NewBB) -> hipe_x86_cfg:bb_add(CFG,L,NewBB). +mk_comment(Text) -> hipe_x86:mk_comment(Text). +-endif. diff --git a/lib/hipe/x86/hipe_x86_main.erl b/lib/hipe/x86/hipe_x86_main.erl new file mode 100644 index 0000000000..f45a49ca0a --- /dev/null +++ b/lib/hipe/x86/hipe_x86_main.erl @@ -0,0 +1,70 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2004-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +-ifdef(HIPE_AMD64). +-define(HIPE_X86_MAIN, hipe_amd64_main). +-define(RTL_TO_X86, rtl_to_amd64). % XXX: kill this crap +-define(HIPE_RTL_TO_X86, hipe_rtl_to_amd64). +-define(HIPE_X86_RA, hipe_amd64_ra). +-define(HIPE_X86_FRAME, hipe_amd64_frame). +-define(HIPE_X86_PP, hipe_amd64_pp). +-define(X86TAG, amd64). % XXX: kill this crap +-define(X86STR, "amd64"). +-define(HIPE_X86_SPILL_RESTORE, hipe_amd64_spill_restore). +-else. +-define(HIPE_X86_MAIN, hipe_x86_main). +-define(RTL_TO_X86, rtl_to_x86). % XXX: kill this crap +-define(HIPE_RTL_TO_X86, hipe_rtl_to_x86). +-define(HIPE_X86_RA, hipe_x86_ra). +-define(HIPE_X86_FRAME, hipe_x86_frame). +-define(HIPE_X86_PP, hipe_x86_pp). +-define(X86TAG, x86). % XXX: kill this crap +-define(X86STR, "x86"). +-define(HIPE_X86_SPILL_RESTORE, hipe_x86_spill_restore). +-endif. + +-module(?HIPE_X86_MAIN). +-export([?RTL_TO_X86/3]). % XXX: change to 'from_rtl' to avoid $ARCH substring + +-ifndef(DEBUG). +-define(DEBUG,1). +-endif. +-define(HIPE_INSTRUMENT_COMPILER, true). %% Turn on instrumentation. +-include("../main/hipe.hrl"). + +?RTL_TO_X86(MFA, RTL, Options) -> + Translated = ?option_time(?HIPE_RTL_TO_X86:translate(RTL), + "RTL-to-"?X86STR, Options), + SpillRest = + case proplists:get_bool(caller_save_spill_restore, Options) of + true -> + ?option_time(?HIPE_X86_SPILL_RESTORE:spill_restore(Translated, Options), + ?X86STR" spill restore", Options); + false -> + Translated + end, + Allocated = ?option_time(?HIPE_X86_RA:ra(SpillRest, Options), + ?X86STR" register allocation", Options), + Framed = ?option_time(?HIPE_X86_FRAME:frame(Allocated, Options), + ?X86STR" frame", Options), + Finalised = ?option_time(hipe_x86_postpass:postpass(Framed, Options), + ?X86STR" finalise", Options), + ?HIPE_X86_PP:optional_pp(Finalised, MFA, Options), + {native, ?X86TAG, {unprofiled, Finalised}}. diff --git a/lib/hipe/x86/hipe_x86_postpass.erl b/lib/hipe/x86/hipe_x86_postpass.erl new file mode 100644 index 0000000000..34e3d7a11b --- /dev/null +++ b/lib/hipe/x86/hipe_x86_postpass.erl @@ -0,0 +1,276 @@ +%%% -*- erlang-indent-level: 2 -*- +%%% +%%% %CopyrightBegin% +%%% +%%% Copyright Ericsson AB 2003-2009. All Rights Reserved. +%%% +%%% The contents of this file are subject to the Erlang Public License, +%%% Version 1.1, (the "License"); you may not use this file except in +%%% compliance with the License. You should have received a copy of the +%%% Erlang Public License along with this software. If not, it can be +%%% retrieved online at http://www.erlang.org/. +%%% +%%% Software distributed under the License is distributed on an "AS IS" +%%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%%% the License for the specific language governing rights and limitations +%%% under the License. +%%% +%%% %CopyrightEnd% +%%% +%%%---------------------------------------------------------------------- +%%% File : hipe_x86_postpass.erl +%%% Author : Christoffer Vikström +%%% Purpose : Contain postpass optimisations for x86-assembler code. +%%% Created : 5 Aug 2003 by Christoffer Vikström +%%%---------------------------------------------------------------------- + +-ifndef(HIPE_X86_POSTPASS). +-define(HIPE_X86_POSTPASS, hipe_x86_postpass). +-endif. + +-module(?HIPE_X86_POSTPASS). +-export([postpass/2]). +-include("../x86/hipe_x86.hrl"). + +%%>----------------------------------------------------------------------< +% Procedure : postpass/2 +% Purpose : Function that performs a nr of postpass optimizations on +% the hipe x86-assembler code before it is encoded and loaded. +%%>----------------------------------------------------------------------< +postpass(#defun{code=Code0}=Defun, Options) -> + Code1 = pseudo_insn_expansion(Code0), + Code2 = case proplists:get_bool(peephole, Options) of + true -> peephole_optimization(Code1); + false -> Code1 + end, + Code3 = trivial_goto_elimination(Code2), + Defun#defun{code=Code3}. + + +%%>----------------------------------------------------------------------< +% Procedure : peep/1 +% Purpose : Function that does peephole optimizations. It works by +% moving a window over the code and looking at a sequence of +% a few instructions. Replaces long sequences of instructions +% with shorter ones and removes unnecesary ones. +% Arguments : Insns - List of pseudo x86-assembler records. +% Res - Returned list of pseudo x86-assembler records. +% Kept reversed, until it is returned. +% Return : An optimized list of pseudo x86-assembler records with +% (hopefully) fewer or faster instructions. +%%>----------------------------------------------------------------------< +peephole_optimization(Insns) -> + peep(Insns, [], []). + +%% MoveSelf related peep-opts +%% ------------------------------ +peep([#fmove{src=Src, dst=Src} | Insns], Res,Lst) -> + peep(Insns, Res, [moveSelf1|Lst]); +peep([I=#fmove{src=Src, dst=Dst}, + #fmove{src=Dst, dst=Src} | Insns], Res,Lst) -> + peep(Insns, [I|Res], [moveSelf2|Lst]); +peep([#movsx{src=Src, dst=Src} | Insns], Res,Lst) -> + peep(Insns, Res, [moveSelf3|Lst]); +peep([I=#movsx{src=Src, dst=Dst}, + #movsx{src=Dst, dst=Src} | Insns], Res,Lst) -> + peep(Insns, [I|Res], [moveSelf4|Lst]); +peep([#movzx{src=Src, dst=Src} | Insns], Res,Lst) -> + peep(Insns, Res, [moveSelf5|Lst]); +peep([I=#movzx{src=Src, dst=Dst}, + #movzx{src=Dst, dst=Src} | Insns], Res,Lst) -> + peep(Insns, [I|Res], [moveSelf6|Lst]); +peep([#cmovcc{src=Src, dst=Src} | Insns], Res,Lst) -> + peep(Insns, Res, [moveSelf7|Lst]); +peep([I=#cmovcc{src=Src, dst=Dst}, + #cmovcc{src=Dst, dst=Src}|Insns], Res,Lst) -> + peep(Insns, [I|Res], [moveSelf8|Lst]); +peep([#move{src=#x86_temp{reg=X}, + dst=#x86_temp{reg=X}} | Insns], Res,Lst) -> + peep(Insns, Res, [moveSelf9|Lst]); +peep([I=#move{src=#x86_temp{reg=Src}, dst=#x86_temp{reg=Dst}}, + #move{src=#x86_temp{reg=Dst}, dst=#x86_temp{reg=Src}} | Insns], Res,Lst) -> + peep(Insns, [I|Res], [moveSelf0|Lst]); + + +%% ElimBinALMDouble +%% ---------------- +peep([Move=#move{src=Src, dst=Dst}, Alu=#alu{src=Src, dst=Dst}|Insns], Res, Lst) -> + peep([Alu#alu{src=Dst}|Insns], [Move|Res], [elimBinALMDouble|Lst]); + + +%% ElimFBinDouble +%% -------------- +peep([Move=#fmove{src=Src, dst=Dst}, + BinOp=#fp_binop{src=Src, dst=Dst}|Insns], Res, Lst) -> + peep([BinOp#fp_binop{src=Dst}|Insns], [Move|Res], [elimFBinDouble|Lst]); + + +%% CommuteBinALMD +%% -------------- +peep([#move{src=Src1, dst=Dst}, + #alu{aluop=Op,src=Src2,dst=Dst}|Insns], Res, Lst) + when (Src1 =:= #x86_imm{}) and (Src2 =/= #x86_imm{}) and + ((Op =:= 'add') or (Op =:= 'and') or (Op =:= 'or') or (Op =:= 'xor')) -> + peep(Insns, [#alu{aluop=Op,src=Src1,dst=Dst}, + #move{src=Src2, dst=Dst}|Res], + [commuteBinALMD|Lst]); + + +%% ElimCmp0 +%% -------- +peep([C=#cmp{src=Src, dst=Dst},J=#jcc{cc=Cond, label=Lab}|Insns],Res,Lst) -> + case (((Src =:= #x86_imm{value=0}) or (Dst =:= #x86_imm{value=0})) and + ((Cond =:= 'eq') or (Cond =:= 'neq'))) of + true -> + Src2 = case Src of #x86_imm{value=0} -> Src; _ -> Dst end, + Cond2 = case Cond of 'eq' -> 'z'; 'neq' -> 'nz' end, + Test = #test{src=Src2, dst=#x86_imm{value=0}}, + Jump = #jcc{cc=Cond2, label=Lab}, + peep(Insns, [Jump, Test|Res], [elimCmp0|Lst]); + _ -> + peep(Insns, [J,C|Res], Lst) + end; + + +%% ElimCmpTest +%% ----------- +peep([I|Insns],Res,Lst) when (I =:= #cmp{}) or (I =:= #test{}) -> + case check(Insns) of + #jcc{} -> + peep(Insns, [I|Res], Lst); + #jmp_fun{} -> + peep(Insns, [I|Res], Lst); + #jmp_label{} -> + peep(Insns, [I|Res], Lst); + #jmp_switch{} -> + peep(Insns, [I|Res], Lst); + #cmovcc{} -> + peep(Insns, [I|Res], Lst); + #ret{} -> + peep(Insns, [I|Res], Lst); + _ -> + peep(Insns, Res, [elimCmpTest|Lst]) + end; + + +%% ElimPushPop +%% ----------- +peep([#push{src=Opr}, #pop{dst=Opr} | Insns], Res, Lst) -> + peep(Insns, Res, [elimPushPop|Lst]); + + +% %% ElimIFF +% %% ------- +peep([#jcc{label=Lab}, I=#label{label=Lab}|Insns], Res, Lst) -> + peep(Insns, [I, #jmp_label{label=Lab}|Res], [elimIFF|Lst]); + + +%% ElimSet0 +%% -------- +peep([#move{src=#x86_imm{value=0},dst=Dst}|Insns],Res,Lst) +when (Dst==#x86_temp{}) -> + peep(Insns, [#alu{aluop='xor', src=Dst, dst=Dst}|Res], [elimSet0|Lst]); + +%% ElimMDPow2 +%% ---------- +peep([B = #alu{aluop=Op,src=#x86_imm{value=Val},dst=Dst}|Insns], Res, Lst) -> + {IsLog2, Size, Sign} = log2(Val), + case ((Op =:= imul) or (Op =:= idiv)) and IsLog2 of + true -> + Sh = case Sign of positive -> 'bsl'; negative -> 'bsr' end, + peep(Insns, + [#shift{shiftop=Sh, src=#x86_imm{value=Size}, dst=Dst}|Res], + [elimMDPow2|Lst]); + false -> + peep(Insns, [B|Res], Lst) + end; + +%% SubToDec +%% This rule turns "subl $1,Dst; jl Lab" into "decl Dst; jl Lab", which +%% changes reduction counter tests to use decl instead of subl. +%% However, on Athlon64 this leads to a small but measurable decrease +%% in performance. The use of dec is also not recommended on P4, so +%% this transformation is disabled. +%% peep([#alu{aluop='sub',src=#x86_imm{value=1},dst=Dst},J=#jcc{cc='l'}|Insns], Res, Lst) -> +%% peep(Insns, [J, #dec{dst=Dst} | Res], [subToDec|Lst]); + +%% Standard list recursion clause +%% ------------------------------ +peep([I | Insns], Res, Lst) -> + peep(Insns, [I|Res], Lst); +peep([], Res, _Lst) -> + lists:reverse(Res). + +%% Simple goto elimination +%% ----------------------- +trivial_goto_elimination(Insns) -> goto_elim(Insns, []). + +goto_elim([#jmp_label{label=Label}, I = #label{label=Label}|Insns], Res) -> + goto_elim([I|Insns], Res); +goto_elim([I | Insns], Res) -> + goto_elim(Insns, [I|Res]); +goto_elim([], Res) -> + lists:reverse(Res). + + +%%>----------------------------------------------------------------------< +%% Procedure : expand/1 +%% Purpose : Expands pseudo instructions. +%% Arguments : Insns - An x86-instruction list. +%% Return : An expanded instruction list. +%% Notes : +%%>----------------------------------------------------------------------< +pseudo_insn_expansion(Insns) -> expand(Insns, []). +expand([I|Tail], Res) -> + case I of + #pseudo_jcc{cc=Cc,true_label=TrueLab,false_label=FalseLab} -> + expand(Tail, [hipe_x86:mk_jmp_label(FalseLab), + hipe_x86:mk_jcc(Cc, TrueLab) | Res]); + #pseudo_tailcall_prepare{} -> + expand(Tail, Res); + #pseudo_call{'fun'=Fun,sdesc=SDesc,contlab=ContLab,linkage=Linkage} -> + expand(Tail, [hipe_x86:mk_jmp_label(ContLab), + hipe_x86:mk_call(Fun, SDesc, Linkage) | Res]); + _ -> + expand(Tail, [I|Res]) + end; +expand([], Res) -> lists:reverse(Res). + +%% Log2 function +%% ------------- +%% Used by ElimMDPow2 clause of peep(..) +log2(Nr) -> log2(Nr, 0). +log2(0, _) -> {false, 0, positive}; +log2(Nr, I) -> + case (Nr band 1) =:= 1 of + true -> + case Nr of + 1 -> + {true, I, positive}; + -1 -> + {true, I, negative}; + _ -> + {false, 0, positive} + end; + false -> + log2((Nr bsr 1), I+1) + end. + +%% Skips through all comments and move instructions and returns the next one +%% ------------------------------------------------------------------------- +%% Used by ElimCmpTest above. +check([I|Ins]) -> + case I of + #comment{} -> + check(Ins); + #move{} -> + check(Ins); + #fmove{} -> + check(Ins); + #movsx{} -> + check(Ins); + #movzx{} -> + check(Ins); + OtherI -> + OtherI + end. diff --git a/lib/hipe/x86/hipe_x86_pp.erl b/lib/hipe/x86/hipe_x86_pp.erl new file mode 100644 index 0000000000..555e21a446 --- /dev/null +++ b/lib/hipe/x86/hipe_x86_pp.erl @@ -0,0 +1,350 @@ +%%% -*- erlang-indent-level: 2 -*- +%%% +%%% %CopyrightBegin% +%%% +%%% Copyright Ericsson AB 2001-2009. All Rights Reserved. +%%% +%%% The contents of this file are subject to the Erlang Public License, +%%% Version 1.1, (the "License"); you may not use this file except in +%%% compliance with the License. You should have received a copy of the +%%% Erlang Public License along with this software. If not, it can be +%%% retrieved online at http://www.erlang.org/. +%%% +%%% Software distributed under the License is distributed on an "AS IS" +%%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%%% the License for the specific language governing rights and limitations +%%% under the License. +%%% +%%% %CopyrightEnd% +%%% +%%% x86 pretty-printer + +-ifdef(HIPE_AMD64). +-define(HIPE_X86_PP, hipe_amd64_pp). +-define(HIPE_X86_REGISTERS, hipe_amd64_registers). +-else. +-define(HIPE_X86_PP, hipe_x86_pp). +-define(HIPE_X86_REGISTERS, hipe_x86_registers). +-endif. + +-module(?HIPE_X86_PP). +-export([% pp/1, pp/2, + pp_insn/1, optional_pp/3]). +-include("../x86/hipe_x86.hrl"). + +optional_pp(Defun, MFA, Options) -> + case proplists:get_value(pp_native, Options) of + true -> + pp(Defun); + {only,Lst} when is_list(Lst) -> + case lists:member(MFA, Lst) of + true -> pp(Defun); + false -> ok + end; + {only,MFA} -> + pp(Defun); + {file,FileName} -> + {ok, File} = file:open(FileName, [write,append]), + pp(File, Defun), + ok = file:close(File); + _ -> + ok + end. + +pp(Defun) -> + pp(standard_io, Defun). + +pp(Dev, #defun{mfa={M,F,A}, code=Code, data=Data}) -> + Fname = atom_to_list(M)++"_"++atom_to_list(F)++"_"++integer_to_list(A), + io:format(Dev, "\t.text\n", []), + io:format(Dev, "\t.align 4\n", []), + io:format(Dev, "\t.global ~s\n", [Fname]), + io:format(Dev, "~s:\n", [Fname]), + pp_insns(Dev, Code, Fname), + io:format(Dev, "\t.rodata\n", []), + io:format(Dev, "\t.align 4\n", []), + hipe_data_pp:pp(Dev, Data, x86, Fname), + io:format(Dev, "\n", []). + +pp_insns(Dev, [I|Is], Fname) -> + pp_insn(Dev, I, Fname), + pp_insns(Dev, Is, Fname); +pp_insns(_, [], _) -> + ok. + +pp_insn(I) -> + pp_insn(standard_io, I, ""). + +pp_insn(Dev, I, Pre) -> + case I of + #alu{aluop=AluOp, src=Src, dst=Dst} -> + io:format(Dev, "\t~s ", [alu_op_name(AluOp)]), + pp_src(Dev, Src), + io:format(Dev, ", ", []), + pp_dst(Dev, Dst), + io:format(Dev, "\n", []); + #call{'fun'=Fun, sdesc=SDesc, linkage=Linkage} -> + io:format(Dev, "\tcall ", []), + pp_fun(Dev, Fun), + io:format(Dev, " #", []), + pp_sdesc(Dev, Pre, SDesc), + io:format(Dev, " ~w\n", [Linkage]); + #cmovcc{cc=Cc, src=Src, dst=Dst} -> + io:format(Dev, "\tcmov~s ", [cc_name(Cc)]), + pp_src(Dev, Src), + io:format(Dev, ", ", []), + pp_dst(Dev, Dst), + io:format(Dev, "\n", []); + #cmp{src=Src, dst=Dst} -> + io:format(Dev, "\tcmp ", []), + pp_src(Dev, Src), + io:format(Dev, ", ", []), + pp_dst(Dev, Dst), + io:format(Dev, "\n", []); + #comment{term=Term} -> + io:format(Dev, "\t# ~p\n", [Term]); + #imul{imm_opt=ImmOpt, src=Src, temp=Temp} -> + io:format(Dev, "\timul ", []), + case ImmOpt of + [] -> ok; + Imm -> + pp_imm(Dev, Imm, true), + io:format(Dev, ", ", []) + end, + pp_src(Dev, Src), + io:format(Dev, ", ", []), + pp_temp(Dev, Temp), + io:format(Dev, "\n", []); + #jcc{cc=Cc, label=Label} -> + io:format(Dev, "\tj~s .~s_~w\n", [cc_name(Cc), Pre, Label]); + #jmp_fun{'fun'=Fun, linkage=Linkage} -> + io:format(Dev, "\tjmp ", []), + pp_fun(Dev, Fun), + io:format(Dev, " ~w\n", [Linkage]); + #jmp_label{label=Label} -> + io:format(Dev, "\tjmp .~s_~w\n", [Pre, Label]); + #jmp_switch{temp=Temp, jtab=JTab, labels=Labels} -> + io:format(Dev, "\tjmp *{constant,~w}(,", [JTab]), + pp_temp(Dev, Temp), + io:format(Dev, ",4) #", []), + pp_labels(Dev, Labels, Pre), + io:format(Dev, "\n", []); + #label{label=Label} -> + io:format(Dev, ".~s_~w:~n", [Pre, Label]); + #lea{mem=Mem, temp=Temp} -> + io:format(Dev, "\tlea ", []), + pp_mem(Dev, Mem), + io:format(Dev, ", ", []), + pp_temp(Dev, Temp), + io:format(Dev, "\n", []); + #move{src=Src, dst=Dst} -> + io:format(Dev, "\tmov ", []), + pp_src(Dev, Src), + io:format(Dev, ", ", []), + pp_dst(Dev, Dst), + io:format(Dev, "\n", []); + #move64{} -> + pp_move64(Dev, I); + #movsx{src=Src, dst=Dst} -> + io:format(Dev, "\tmovsx ", []), + pp_src(Dev, Src), + io:format(Dev, ", ", []), + pp_dst(Dev, Dst), + io:format(Dev, "\n", []); + #movzx{src=Src, dst=Dst} -> + io:format(Dev, "\tmovzx ", []), + pp_src(Dev, Src), + io:format(Dev, ", ", []), + pp_dst(Dev, Dst), + io:format(Dev, "\n", []); + #pseudo_call{'fun'=Fun, sdesc=SDesc, contlab=ContLab, linkage=Linkage} -> + io:format(Dev, "\tpseudo_call ", []), + pp_fun(Dev, Fun), + io:format(Dev, " # contlab .~s_~w", [Pre, ContLab]), + pp_sdesc(Dev, Pre, SDesc), + io:format(Dev, " ~w\n", [Linkage]); + #pseudo_jcc{cc=Cc, true_label=TrueLab, false_label=FalseLab, pred=Pred} -> + io:format(Dev, "\tpseudo_j~s ", [cc_name(Cc)]), + io:format(Dev, ".~s_~w # .~s_~w ~.2f\n", + [Pre, TrueLab, Pre, FalseLab, Pred]); + #pseudo_tailcall{'fun'=Fun, arity=Arity, stkargs=StkArgs, linkage=Linkage} -> + io:format(Dev, "\tpseudo_tailcall ", []), + pp_fun(Dev, Fun), + io:format(Dev, "~w (", [Arity]), + pp_args(Dev, StkArgs), + io:format(Dev, ") ~w\n", [Linkage]); + #pseudo_tailcall_prepare{} -> + io:format(Dev, "\tpseudo_tailcall_prepare\n", []); + #push{src=Src} -> + io:format(Dev, "\tpush ", []), + pp_src(Dev, Src), + io:format(Dev, "\n", []); + #ret{npop=NPop} -> + io:format(Dev, "\tret $~s\n", [to_hex(NPop)]); + #shift{shiftop=ShiftOp, src=Src, dst=Dst} -> + io:format(Dev, "\t~s ", [alu_op_name(ShiftOp)]), + pp_src(Dev, Src), + io:format(Dev, ", ", []), + pp_dst(Dev, Dst), + io:format(Dev, "\n", []); + #fp_binop{src=Src, dst=Dst, op=Op} -> + io:format(Dev, "\t~s ", [Op]), + pp_dst(Dev, Dst), + io:format(Dev, ", ", []), + pp_src(Dev, Src), + io:format(Dev, "\n", []); + #fp_unop{arg=Arg, op=Op} -> + io:format(Dev, "\t~s ", [Op]), + case Arg of + []-> + io:format(Dev, "\n", []); + _ -> + pp_args(Dev, [Arg]), + io:format(Dev, "\n", []) + end; + #fmove{src=Src, dst=Dst} -> + io:format(Dev, "\tfmove ", []), + pp_src(Dev, Src), + io:format(Dev, ", ", []), + pp_dst(Dev, Dst), + io:format(Dev, "\n", []); + _ -> + exit({?MODULE, pp_insn, {"unknown x86 instruction", I}}) + end. + +-ifdef(HIPE_AMD64). +pp_move64(Dev, I) -> + #move64{imm=Src, dst=Dst} = I, + io:format(Dev, "\tmov64 ", []), + pp_src(Dev, Src), + io:format(Dev, ", ", []), + pp_dst(Dev, Dst), + io:format(Dev, "\n", []). +-else. +pp_move64(_Dev, I) -> exit({?MODULE, I}). +-endif. + +to_hex(N) -> + io_lib:format("~.16x", [N, "0x"]). + +pp_sdesc(Dev, Pre, #x86_sdesc{exnlab=ExnLab,fsize=FSize,arity=Arity,live=Live}) -> + pp_sdesc_exnlab(Dev, Pre, ExnLab), + io:format(Dev, " ~s ~w [", [to_hex(FSize), Arity]), + pp_sdesc_live(Dev, Live), + io:format(Dev, "]", []). + +pp_sdesc_exnlab(Dev, _, []) -> io:format(Dev, " []", []); +pp_sdesc_exnlab(Dev, Pre, ExnLab) -> io:format(Dev, " .~s_~w", [Pre, ExnLab]). + +pp_sdesc_live(_, {}) -> ok; +pp_sdesc_live(Dev, Live) -> pp_sdesc_live(Dev, Live, 1). + +pp_sdesc_live(Dev, Live, I) -> + io:format(Dev, "~s", [to_hex(element(I, Live))]), + if I < tuple_size(Live) -> + io:format(Dev, ",", []), + pp_sdesc_live(Dev, Live, I+1); + true -> ok + end. + +pp_labels(Dev, [Label|Labels], Pre) -> + io:format(Dev, " .~s_~w", [Pre, Label]), + pp_labels(Dev, Labels, Pre); +pp_labels(_, [], _) -> + ok. + +pp_fun(Dev, Fun) -> + case Fun of + #x86_mfa{m=M, f=F, a=A} -> + io:format(Dev, "~w:~w/~w", [M, F, A]); + #x86_prim{prim=Prim} -> + io:format(Dev, "~w", [Prim]); + _ -> % temp or mem + io:format(Dev, "*", []), + pp_dst(Dev, Fun) + end. + +alu_op_name(Op) -> Op. + +cc_name(Cc) -> Cc. + +pp_hard_reg(Dev, Reg) -> + io:format(Dev, "~s", [?HIPE_X86_REGISTERS:reg_name(Reg)]). + +type_tag('tagged') -> "t"; +type_tag('untagged') -> "u"; +type_tag('double') -> "d". + +pp_temp(Dev, #x86_temp{reg=Reg, type=Type}) -> + case Type of + double -> + Tag = type_tag(Type), + io:format(Dev, "~s~w", [Tag, Reg]); + _ -> + case ?HIPE_X86_REGISTERS:is_precoloured(Reg) of + true -> + pp_hard_reg(Dev, Reg); + false -> + Tag = type_tag(Type), + io:format(Dev, "~s~w", [Tag, Reg]) + end + end. + +pp_fpreg(Dev, #x86_fpreg{reg=Reg, pseudo=Pseudo})-> + case Pseudo of + true -> io:format(Dev, "pseudo_fp(~w)", [Reg]); + _ -> io:format(Dev, "st(~w)", [Reg]) + end. + +pp_imm(Dev, #x86_imm{value=Value}, Dollar) -> + if Dollar =:= true -> io:format(Dev, [$$], []); + true -> ok + end, + if is_integer(Value) -> io:format(Dev, "~s", [to_hex(Value)]); + true -> io:format(Dev, "~w", [Value]) + end. + +pp_mem(Dev, #x86_mem{base=Base, off=Off}) -> + pp_off(Dev, Off), + case Base of + [] -> + ok; + _ -> + io:format(Dev, "(", []), + pp_temp(Dev, Base), + io:format(Dev, ")", []) + end. + +pp_off(Dev, Off) -> + pp_src(Dev, Off, false). + +pp_src(Dev, Src) -> + pp_src(Dev, Src, true). + +pp_src(Dev, Src, Dollar) -> + case Src of + #x86_temp{} -> + pp_temp(Dev, Src); + #x86_imm{} -> + pp_imm(Dev, Src, Dollar); + #x86_mem{} -> + pp_mem(Dev, Src); + #x86_fpreg{} -> + pp_fpreg(Dev, Src) + end. + +pp_dst(Dev, Dst) -> + pp_src(Dev, Dst). + +pp_args(Dev, [A|As]) -> + pp_src(Dev, A), + pp_comma_args(Dev, As); +pp_args(_, []) -> + ok. + +pp_comma_args(Dev, [A|As]) -> + io:format(Dev, ", ", []), + pp_src(Dev, A), + pp_comma_args(Dev, As); +pp_comma_args(_, []) -> + ok. diff --git a/lib/hipe/x86/hipe_x86_ra.erl b/lib/hipe/x86/hipe_x86_ra.erl new file mode 100644 index 0000000000..d50b9aabad --- /dev/null +++ b/lib/hipe/x86/hipe_x86_ra.erl @@ -0,0 +1,99 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2004-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +-ifdef(HIPE_AMD64). +-define(HIPE_X86_RA, hipe_amd64_ra). +-define(HIPE_X86_PP, hipe_amd64_pp). +-define(HIPE_X86_RA_LS, hipe_amd64_ra_ls). +-define(HIPE_X86_RA_NAIVE, hipe_amd64_ra_naive). +-define(HIPE_X86_RA_FINALISE, hipe_amd64_ra_finalise). +-define(HIPE_X86_SPECIFIC, hipe_amd64_specific). +-else. +-define(HIPE_X86_RA, hipe_x86_ra). +-define(HIPE_X86_PP, hipe_x86_pp). +-define(HIPE_X86_RA_LS, hipe_x86_ra_ls). +-define(HIPE_X86_RA_NAIVE, hipe_x86_ra_naive). +-define(HIPE_X86_RA_FINALISE, hipe_x86_ra_finalise). +-define(HIPE_X86_SPECIFIC, hipe_x86_specific). +-endif. + +-module(?HIPE_X86_RA). +-export([ra/2]). + +%%-define(HIPE_INSTRUMENT_COMPILER, true). %% Turn on instrumentation. +-include("../main/hipe.hrl"). + +ra(Defun0, Options) -> + %% ?HIPE_X86_PP:pp(Defun0), + {Defun1, Coloring_fp, SpillIndex} = ra_fp(Defun0, Options), + %% ?HIPE_X86_PP:pp(Defun1), + ?start_ra_instrumentation(Options, + length(hipe_x86:defun_code(Defun1)), + element(2,hipe_x86:defun_var_range(Defun1))), + {Defun2, Coloring} + = case proplists:get_value(regalloc, Options, coalescing) of + coalescing -> + ra(Defun1, SpillIndex, Options, hipe_coalescing_regalloc); + optimistic -> + ra(Defun1, SpillIndex, Options, hipe_optimistic_regalloc); + graph_color -> + ra(Defun1, SpillIndex, Options, hipe_graph_coloring_regalloc); + linear_scan -> + ?HIPE_X86_RA_LS:ra(Defun1, SpillIndex, Options); + naive -> + ?HIPE_X86_RA_NAIVE:ra(Defun1, Coloring_fp, Options); + _ -> + exit({unknown_regalloc_compiler_option, + proplists:get_value(regalloc,Options)}) + end, + ?stop_ra_instrumentation(Options, + length(hipe_x86:defun_code(Defun2)), + element(2,hipe_x86:defun_var_range(Defun2))), + %% ?HIPE_X86_PP:pp(Defun2), + ?HIPE_X86_RA_FINALISE:finalise(Defun2, Coloring, Coloring_fp, Options). + +ra(Defun, SpillIndex, Options, RegAllocMod) -> + hipe_regalloc_loop:ra(Defun, SpillIndex, Options, RegAllocMod, ?HIPE_X86_SPECIFIC). + +-ifdef(HIPE_AMD64). +ra_fp(Defun, Options) -> + case proplists:get_bool(inline_fp, Options) and + (proplists:get_value(regalloc, Options) =/= naive) of + true -> + case proplists:get_bool(x87, Options) of + true -> + hipe_amd64_ra_x87_ls:ra(Defun, Options); + false -> + hipe_regalloc_loop:ra_fp(Defun, Options, + hipe_coalescing_regalloc, + hipe_amd64_specific_sse2) + end; + false -> + {Defun,[],0} + end. +-else. +ra_fp(Defun, Options) -> + case proplists:get_bool(inline_fp, Options) of + true -> + hipe_x86_ra_x87_ls:ra(Defun, Options); + false -> + {Defun,[],0} + end. +-endif. diff --git a/lib/hipe/x86/hipe_x86_ra_finalise.erl b/lib/hipe/x86/hipe_x86_ra_finalise.erl new file mode 100644 index 0000000000..10b4df05d2 --- /dev/null +++ b/lib/hipe/x86/hipe_x86_ra_finalise.erl @@ -0,0 +1,335 @@ +%%% -*- erlang-indent-level: 2 -*- +%%% +%%% %CopyrightBegin% +%%% +%%% Copyright Ericsson AB 2004-2009. All Rights Reserved. +%%% +%%% The contents of this file are subject to the Erlang Public License, +%%% Version 1.1, (the "License"); you may not use this file except in +%%% compliance with the License. You should have received a copy of the +%%% Erlang Public License along with this software. If not, it can be +%%% retrieved online at http://www.erlang.org/. +%%% +%%% Software distributed under the License is distributed on an "AS IS" +%%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%%% the License for the specific language governing rights and limitations +%%% under the License. +%%% +%%% %CopyrightEnd% +%%% +%%% +%%% - apply temp -> reg/spill map from RA + +-ifdef(HIPE_AMD64). +-define(HIPE_X86_RA_FINALISE, hipe_amd64_ra_finalise). +-define(HIPE_X86_REGISTERS, hipe_amd64_registers). +-define(HIPE_X86_X87, hipe_amd64_x87). +-else. +-define(HIPE_X86_RA_FINALISE, hipe_x86_ra_finalise). +-define(HIPE_X86_REGISTERS, hipe_x86_registers). +-define(HIPE_X86_X87, hipe_x86_x87). +-endif. + +-module(?HIPE_X86_RA_FINALISE). +-export([finalise/4]). +-include("../x86/hipe_x86.hrl"). + +finalise(Defun, TempMap, FpMap, Options) -> + Defun1 = finalise_ra(Defun, TempMap, FpMap, Options), + case proplists:get_bool(x87, Options) of + true -> + ?HIPE_X86_X87:map(Defun1); + _ -> + Defun1 + end. + +%%% +%%% Finalise the temp->reg/spill mapping. +%%% (XXX: maybe this should be merged with the main pass, +%%% but I just want this to work now) +%%% + +finalise_ra(Defun, [], [], _Options) -> + Defun; +finalise_ra(Defun, TempMap, FpMap, Options) -> + Code = hipe_x86:defun_code(Defun), + {_, SpillLimit} = hipe_x86:defun_var_range(Defun), + Map = mk_ra_map(TempMap, SpillLimit), + FpMap0 = mk_ra_map_fp(FpMap, SpillLimit, Options), + NewCode = ra_code(Code, Map, FpMap0), + Defun#defun{code=NewCode}. + +ra_code(Code, Map, FpMap) -> + [ra_insn(I, Map, FpMap) || I <- Code]. + +ra_insn(I, Map, FpMap) -> + case I of + #alu{src=Src0,dst=Dst0} -> + Src = ra_opnd(Src0, Map), + Dst = ra_opnd(Dst0, Map), + I#alu{src=Src,dst=Dst}; + #call{} -> + I; + #cmovcc{src=Src0,dst=Dst0} -> + Src = ra_opnd(Src0, Map), + Dst = ra_opnd(Dst0, Map), + I#cmovcc{src=Src,dst=Dst}; + #cmp{src=Src0,dst=Dst0} -> + Src = ra_opnd(Src0, Map), + Dst = ra_opnd(Dst0, Map), + I#cmp{src=Src,dst=Dst}; + #comment{} -> + I; + #fmove{src=Src0,dst=Dst0} -> + Src = ra_opnd(Src0, Map, FpMap), + Dst = ra_opnd(Dst0, Map, FpMap), + I#fmove{src=Src,dst=Dst}; + #fp_unop{arg=Arg0} -> + Arg = ra_opnd(Arg0, Map, FpMap), + I#fp_unop{arg=Arg}; + #fp_binop{src=Src0,dst=Dst0} -> + Src = ra_opnd(Src0, Map, FpMap), + Dst = ra_opnd(Dst0, Map, FpMap), + I#fp_binop{src=Src,dst=Dst}; + #imul{src=Src0,temp=Temp0} -> + Src = ra_opnd(Src0, Map), + Temp = ra_temp(Temp0, Map), + I#imul{src=Src,temp=Temp}; + #jcc{} -> + I; + #jmp_fun{'fun'=Fun0} -> + Fun = ra_opnd(Fun0, Map), + I#jmp_fun{'fun'=Fun}; + #jmp_label{} -> + I; + #jmp_switch{temp=Temp0,jtab=JTab0} -> + Temp = ra_opnd(Temp0, Map), + JTab = ra_opnd(JTab0, Map), + I#jmp_switch{temp=Temp,jtab=JTab}; + #label{} -> + I; + #lea{mem=Mem0,temp=Temp0} -> + Mem = ra_mem(Mem0, Map), + Temp = ra_temp(Temp0, Map), + I#lea{mem=Mem,temp=Temp}; + #move{src=Src0,dst=Dst0} -> + Src = ra_opnd(Src0, Map), + Dst = ra_opnd(Dst0, Map), + I#move{src=Src,dst=Dst}; + #move64{dst=Dst0} -> + Dst = ra_opnd(Dst0, Map), + I#move64{dst=Dst}; + #movsx{src=Src0,dst=Dst0} -> + Src = ra_opnd(Src0, Map), + Dst = ra_opnd(Dst0, Map), + I#movsx{src=Src,dst=Dst}; + #movzx{src=Src0,dst=Dst0} -> + Src = ra_opnd(Src0, Map), + Dst = ra_opnd(Dst0, Map), + I#movzx{src=Src,dst=Dst}; + #pseudo_call{'fun'=Fun0} -> + Fun = ra_opnd(Fun0, Map), + I#pseudo_call{'fun'=Fun}; + #pseudo_jcc{} -> + I; + #pseudo_tailcall{'fun'=Fun0,stkargs=StkArgs0} -> + Fun = ra_opnd(Fun0, Map), + StkArgs = ra_args(StkArgs0, Map), + I#pseudo_tailcall{'fun'=Fun,stkargs=StkArgs}; + #pseudo_tailcall_prepare{} -> + I; + #push{src=Src0} -> + Src = ra_opnd(Src0, Map), + I#push{src=Src}; + #ret{} -> + I; + #shift{src=Src0,dst=Dst0} -> + Src = ra_opnd(Src0, Map), + Dst = ra_opnd(Dst0, Map), + I#shift{src=Src,dst=Dst}; + _ -> + exit({?MODULE,ra_insn,I}) + end. + +ra_args(Args, Map) -> + [ra_opnd(Opnd, Map) || Opnd <- Args]. + +ra_opnd(Opnd, Map) -> + ra_opnd(Opnd, Map, gb_trees:empty()). +ra_opnd(Opnd, Map, FpMap) -> + case Opnd of + #x86_temp{} -> ra_temp(Opnd, Map, FpMap); + #x86_mem{} -> ra_mem(Opnd, Map); + _ -> Opnd + end. + +ra_mem(Mem, Map) -> + #x86_mem{base=Base0,off=Off0} = Mem, + Base = ra_opnd(Base0, Map), + Off = ra_opnd(Off0, Map), + Mem#x86_mem{base=Base,off=Off}. + +ra_temp(Temp, Map) -> + ra_temp(Temp, Map, gb_trees:empty()). + +ra_temp(Temp, Map, FpMap) -> + Reg = hipe_x86:temp_reg(Temp), + case hipe_x86:temp_type(Temp) of + double -> + ra_temp_double(Temp, Reg, FpMap); + _-> + case ?HIPE_X86_REGISTERS:is_precoloured(Reg) of + true -> + Temp; + _ -> + case gb_trees:lookup(Reg, Map) of + {value,NewReg} -> Temp#x86_temp{reg=NewReg}; + _ -> Temp + end + end + end. + +-ifdef(HIPE_AMD64). +ra_temp_double(Temp, Reg, FpMap) -> + case hipe_amd64_registers:is_precoloured_sse2(Reg) of + true -> + Temp; + _ -> + case gb_trees:lookup(Reg, FpMap) of + {value,NewReg} -> Temp#x86_temp{reg=NewReg}; + _ -> Temp + end + end. +-else. +ra_temp_double(Temp, Reg, FpMap) -> + case gb_trees:lookup(Reg, FpMap) of + {value,NewReg} -> + case hipe_x86_registers:is_precoloured_x87(NewReg) of + true -> hipe_x86:mk_fpreg(NewReg); + false -> + Temp#x86_temp{reg=NewReg} + end; + _ -> + Temp + end. +-endif. + +mk_ra_map(TempMap, SpillLimit) -> + %% Build a partial map from pseudo to reg or spill. + %% Spills are represented as pseudos with indices above SpillLimit. + %% (I'd prefer to use negative indices, but that breaks + %% ?HIPE_X86_REGISTERS:is_precoloured/1.) + %% The frame mapping proper is unchanged, since spills look just like + %% ordinary (un-allocated) pseudos. + lists:foldl(fun(MapLet, Map) -> + {Key,Val} = conv_ra_maplet(MapLet, SpillLimit, + is_precoloured), + gb_trees:insert(Key, Val, Map) + end, + gb_trees:empty(), + TempMap). + +conv_ra_maplet(MapLet = {From,To}, SpillLimit, IsPrecoloured) -> + %% From should be a pseudo, or a hard reg mapped to itself. + if is_integer(From), From =< SpillLimit -> + case ?HIPE_X86_REGISTERS:IsPrecoloured(From) of + false -> []; + _ -> + case To of + {reg, From} -> []; + _ -> exit({?MODULE,conv_ra_maplet,MapLet}) + end + end; + true -> exit({?MODULE,conv_ra_maplet,MapLet}) + end, + %% end of From check + case To of + {reg, NewReg} -> + %% NewReg should be a hard reg, or a pseudo mapped + %% to itself (formals are handled this way). + if is_integer(NewReg) -> + case ?HIPE_X86_REGISTERS:IsPrecoloured(NewReg) of + true -> []; + _ -> if From =:= NewReg -> []; + true -> + exit({?MODULE,conv_ra_maplet,MapLet}) + end + end; + true -> exit({?MODULE,conv_ra_maplet,MapLet}) + end, + %% end of NewReg check + {From, NewReg}; + {spill, SpillIndex} -> + %% SpillIndex should be >= 0. + if is_integer(SpillIndex), SpillIndex >= 0 -> []; + true -> exit({?MODULE,conv_ra_maplet,MapLet}) + end, + %% end of SpillIndex check + ToTempNum = SpillLimit+SpillIndex+1, + MaxTempNum = hipe_gensym:get_var(x86), + if MaxTempNum >= ToTempNum -> ok; + true -> hipe_gensym:set_var(x86, ToTempNum) + end, + {From, ToTempNum}; + _ -> exit({?MODULE,conv_ra_maplet,MapLet}) + end. + +mk_ra_map_x87(FpMap, SpillLimit) -> + lists:foldl(fun(MapLet, Map) -> + {Key,Val} = conv_ra_maplet(MapLet, SpillLimit, + is_precoloured_x87), + gb_trees:insert(Key, Val, Map) + end, + gb_trees:empty(), + FpMap). + +-ifdef(HIPE_AMD64). +mk_ra_map_sse2(FpMap, SpillLimit) -> + lists:foldl(fun(MapLet, Map) -> + {Key,Val} = conv_ra_maplet(MapLet, SpillLimit, + is_precoloured_sse2), + gb_trees:insert(Key, Val, Map) + end, + gb_trees:empty(), + FpMap). + +mk_ra_map_fp(FpMap, SpillLimit, Options) -> + case proplists:get_bool(x87, Options) of + true -> mk_ra_map_x87(FpMap, SpillLimit); + false -> mk_ra_map_sse2(FpMap, SpillLimit) + end. +-else. +mk_ra_map_fp(FpMap, SpillLimit, _Options) -> + mk_ra_map_x87(FpMap, SpillLimit). +-endif. + +-ifdef(notdef). +conv_ra_maplet_fp(MapLet = {From,To}, SpillLimit) -> + %% From should be a pseudo + if is_integer(From), From =< SpillLimit -> []; + true -> exit({?MODULE,conv_ra_maplet_fp,MapLet}) + end, + %% end of From check + case To of + {reg, NewReg} -> + case hipe_x86_registers:is_precoloured_x87(NewReg) of + true-> []; + false -> exit({?MODULE,conv_ra_maplet_fp,MapLet}) + end, + %% end of NewReg check. + {From, NewReg}; + {spill, SpillIndex} -> + %% SpillIndex should be >= 0. + if is_integer(SpillIndex), SpillIndex >= 0 -> []; + true -> exit({?MODULE,conv_ra_maplet_fp,MapLet}) + end, + %% end of SpillIndex check + ToTempNum = SpillLimit+SpillIndex+1, + MaxTempNum = hipe_gensym:get_var(x86), + if MaxTempNum >= ToTempNum -> []; + true -> hipe_gensym:set_var(x86, ToTempNum) + end, + {From, ToTempNum}; + _ -> exit({?MODULE,conv_ra_maplet_fp,MapLet}) + end. +-endif. diff --git a/lib/hipe/x86/hipe_x86_ra_ls.erl b/lib/hipe/x86/hipe_x86_ra_ls.erl new file mode 100644 index 0000000000..ab7b6708ad --- /dev/null +++ b/lib/hipe/x86/hipe_x86_ra_ls.erl @@ -0,0 +1,85 @@ +%%% -*- erlang-indent-level: 2 -*- +%%% +%%% %CopyrightBegin% +%%% +%%% Copyright Ericsson AB 2001-2009. All Rights Reserved. +%%% +%%% The contents of this file are subject to the Erlang Public License, +%%% Version 1.1, (the "License"); you may not use this file except in +%%% compliance with the License. You should have received a copy of the +%%% Erlang Public License along with this software. If not, it can be +%%% retrieved online at http://www.erlang.org/. +%%% +%%% Software distributed under the License is distributed on an "AS IS" +%%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%%% the License for the specific language governing rights and limitations +%%% under the License. +%%% +%%% %CopyrightEnd% +%%% +%%% Linear Scan register allocator for x86 + +-ifdef(HIPE_AMD64). +-define(HIPE_X86_RA_LS, hipe_amd64_ra_ls). +-define(HIPE_X86_PP, hipe_amd64_pp). +-define(HIPE_X86_RA_POSTCONDITIONS, hipe_amd64_ra_postconditions). +-define(HIPE_X86_REGISTERS, hipe_amd64_registers). +-define(HIPE_X86_SPECIFIC, hipe_amd64_specific). +-else. +-define(HIPE_X86_RA_LS, hipe_x86_ra_ls). +-define(HIPE_X86_PP, hipe_x86_pp). +-define(HIPE_X86_RA_POSTCONDITIONS, hipe_x86_ra_postconditions). +-define(HIPE_X86_REGISTERS, hipe_x86_registers). +-define(HIPE_X86_SPECIFIC, hipe_x86_specific). +-endif. + +-module(?HIPE_X86_RA_LS). +-export([ra/3,regalloc/7]). +-define(HIPE_INSTRUMENT_COMPILER, true). %% Turn on instrumentation. +-include("../main/hipe.hrl"). + +ra(Defun, SpillIndex, Options) -> + NewDefun = Defun, %% hipe_${ARCH}_ra_rename:rename(Defun,Options), + CFG = hipe_x86_cfg:init(NewDefun), + + SpillLimit = ?HIPE_X86_SPECIFIC:number_of_temporaries( + CFG), + ?inc_counter(bbs_counter, length(hipe_x86_cfg:labels(CFG))), + alloc(NewDefun, SpillIndex, SpillLimit, Options). + + +alloc(Defun, SpillIndex, SpillLimit, Options) -> + ?inc_counter(ra_iteration_counter,1), + %% ?HIPE_X86_PP:pp(Defun), + CFG = hipe_x86_cfg:init(Defun), + {Coloring, NewSpillIndex} = + regalloc( + CFG, + ?HIPE_X86_REGISTERS:allocatable()-- + [?HIPE_X86_REGISTERS:temp1(), + ?HIPE_X86_REGISTERS:temp0()], + [hipe_x86_cfg:start_label(CFG)], + SpillIndex, SpillLimit, Options, + ?HIPE_X86_SPECIFIC), + {NewDefun, _DidSpill} = + ?HIPE_X86_RA_POSTCONDITIONS:check_and_rewrite( + Defun, Coloring, 'linearscan'), + %% ?HIPE_X86_PP:pp(NewDefun), + TempMap = hipe_temp_map:cols2tuple(Coloring, ?HIPE_X86_SPECIFIC), + {TempMap2,NewSpillIndex2} = + hipe_spillmin:stackalloc(CFG, [], SpillIndex, Options, + ?HIPE_X86_SPECIFIC, TempMap), + Coloring2 = + hipe_spillmin:mapmerge(hipe_temp_map:to_substlist(TempMap), TempMap2), + case proplists:get_bool(verbose_spills, Options) of + true -> + ?msg("Stack slot size: ~p~n",[NewSpillIndex2-SpillIndex]); + false -> + ok + end, + ?add_spills(Options, NewSpillIndex), + {NewDefun, Coloring2}. + +regalloc(CFG,PhysRegs,Entrypoints, SpillIndex, DontSpill, Options, Target) -> + hipe_ls_regalloc:regalloc(CFG,PhysRegs,Entrypoints, SpillIndex, + DontSpill, Options, Target). diff --git a/lib/hipe/x86/hipe_x86_ra_naive.erl b/lib/hipe/x86/hipe_x86_ra_naive.erl new file mode 100644 index 0000000000..e9b99cd2c5 --- /dev/null +++ b/lib/hipe/x86/hipe_x86_ra_naive.erl @@ -0,0 +1,409 @@ +%%% -*- erlang-indent-level: 2 -*- +%%% +%%% %CopyrightBegin% +%%% +%%% Copyright Ericsson AB 2005-2009. All Rights Reserved. +%%% +%%% The contents of this file are subject to the Erlang Public License, +%%% Version 1.1, (the "License"); you may not use this file except in +%%% compliance with the License. You should have received a copy of the +%%% Erlang Public License along with this software. If not, it can be +%%% retrieved online at http://www.erlang.org/. +%%% +%%% Software distributed under the License is distributed on an "AS IS" +%%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%%% the License for the specific language governing rights and limitations +%%% under the License. +%%% +%%% %CopyrightEnd% +%%% +%%% simple local x86 regalloc + +-ifdef(HIPE_AMD64). +-define(HIPE_X86_RA_NAIVE, hipe_amd64_ra_naive). +-define(HIPE_X86_REGISTERS, hipe_amd64_registers). +-define(HIPE_X86_SPECIFIC_FP, hipe_amd64_specific_sse2). +-define(ECX, rcx). +-else. +-define(HIPE_X86_RA_NAIVE, hipe_x86_ra_naive). +-define(HIPE_X86_REGISTERS, hipe_x86_registers). +-define(HIPE_X86_SPECIFIC_FP, hipe_x86_specific_x87). +-define(ECX, ecx). +-endif. + +-module(?HIPE_X86_RA_NAIVE). +-export([ra/3]). + +-include("../x86/hipe_x86.hrl"). +-define(HIPE_INSTRUMENT_COMPILER, true). % enable instrumentation +-include("../main/hipe.hrl"). + +ra(X86Defun, Coloring_fp, Options) -> + #defun{code=Code0} = X86Defun, + Code1 = do_insns(Code0), + NofSpilledFloats = count_non_float_spills(Coloring_fp), + NofFloats = length(Coloring_fp), + ?add_spills(Options, hipe_gensym:get_var(x86) - + ?HIPE_X86_REGISTERS:first_virtual()- + NofSpilledFloats - + NofFloats), + TempMap = [], + {X86Defun#defun{code=Code1, + var_range={0, hipe_gensym:get_var(x86)}}, + TempMap}. + +count_non_float_spills(Coloring_fp) -> + count_non_float_spills(Coloring_fp, 0). + +count_non_float_spills([{_,To}|Tail], Num) -> + case ?HIPE_X86_SPECIFIC_FP:is_precoloured(To) of + true -> + count_non_float_spills(Tail, Num); + false -> + count_non_float_spills(Tail, Num+1) + end; +count_non_float_spills([], Num) -> + Num. + +do_insns([I|Insns]) -> + do_insn(I) ++ do_insns(Insns); +do_insns([]) -> + []. + +do_insn(I) -> % Insn -> Insn list + case I of + #alu{} -> + do_alu(I); + #cmp{} -> + do_cmp(I); + #imul{} -> + do_imul(I); + #jmp_switch{} -> + do_jmp_switch(I); + #lea{} -> + do_lea(I); + #move{} -> + do_move(I); + #move64{} -> + do_move64(I); + #movzx{} -> + do_movx(I); + #movsx{} -> + do_movx(I); + #fmove{} -> + do_fmove(I); + #fp_unop{} -> + do_fp_unop(I); + #fp_binop{} -> + do_fp_binop(I); + #shift{} -> + do_shift(I); + #label{} -> + [I]; + #pseudo_jcc{} -> + [I]; + #pseudo_call{} -> + [I]; + #ret{} -> + [I]; + #pseudo_tailcall_prepare{} -> + [I]; + #pseudo_tailcall{} -> + [I]; + #push{} -> + [I]; + #jmp_label{} -> + [I]; + #comment{} -> + [I]; + _ -> + io:format("Unknown Instruction = ~w\n", [I]), + exit({?MODULE, unknown_instruction, I}) + end. + +%%% Fix an alu op. + +do_alu(I) -> + #alu{src=Src0,dst=Dst0} = I, + {FixSrc,Src,FixDst,Dst} = do_binary(Src0, Dst0), + FixSrc ++ FixDst ++ [I#alu{src=Src,dst=Dst}]. + +%%% Fix a cmp op. + +do_cmp(I) -> + #cmp{src=Src0,dst=Dst0} = I, + {FixSrc, Src, FixDst, Dst} = do_binary(Src0, Dst0), + FixSrc ++ FixDst ++ [I#cmp{src=Src,dst=Dst}]. + +%%% Fix an imul op. + +do_imul(I) -> + #imul{imm_opt=ImmOpt,src=Src0,temp=Temp0} = I, + {FixSrc,Src} = fix_src_operand(Src0), % may use temp0 + {FixTempSrc,Temp,FixTempDst} = + case temp_is_pseudo(Temp0) of + false -> + {[], Temp0, []}; + true -> + Reg = hipe_x86:mk_temp(?HIPE_X86_REGISTERS:temp1(), 'untagged'), + {case ImmOpt of + [] -> [hipe_x86:mk_move(Temp0, Reg)]; % temp *= src + _ -> [] % temp = src * imm + end, + Reg, + [hipe_x86:mk_move(Reg, Temp0)]} + end, + FixSrc ++ FixTempSrc ++ [I#imul{src=Src,temp=Temp}] ++ FixTempDst. + +%%% Fix a jmp_switch op. + +-ifdef(HIPE_AMD64). +do_jmp_switch(I) -> + #jmp_switch{temp=Temp, jtab=Tab} = I, + case temp_is_pseudo(Temp) of + false -> + case temp_is_pseudo(Tab) of + false -> + [I]; + true -> + Reg = hipe_x86:mk_temp(hipe_amd64_registers:temp0(), 'untagged'), + [hipe_x86:mk_move(Temp, Reg), I#jmp_switch{jtab=Reg}] + end; + true -> + Reg = hipe_x86:mk_temp(hipe_amd64_registers:temp1(), 'untagged'), + case temp_is_pseudo(Tab) of + false -> + [hipe_x86:mk_move(Temp, Reg), I#jmp_switch{temp=Reg}]; + true -> + Reg2 = hipe_x86:mk_temp(hipe_amd64_registers:temp0(), 'untagged'), + [hipe_x86:mk_move(Temp, Reg), + hipe_x86:mk_move(Tab, Reg2), + I#jmp_switch{temp=Reg, jtab=Reg2}] + end + end. +-else. +do_jmp_switch(I) -> + #jmp_switch{temp=Temp} = I, + case temp_is_pseudo(Temp) of + false -> + [I]; + true -> + Reg = hipe_x86:mk_temp(?HIPE_X86_REGISTERS:temp0(), 'untagged'), + [hipe_x86:mk_move(Temp, Reg), I#jmp_switch{temp=Reg}] + end. +-endif. + +%%% Fix a lea op. + +do_lea(I) -> + #lea{temp=Temp} = I, + case temp_is_pseudo(Temp) of + false -> + [I]; + true -> + Reg = hipe_x86:mk_temp(?HIPE_X86_REGISTERS:temp0(), 'untagged'), + [I#lea{temp=Reg}, hipe_x86:mk_move(Reg, Temp)] + end. + +%%% Fix a move op. + +do_move(I) -> + #move{src=Src0,dst=Dst0} = I, + {FixSrc, Src, FixDst, Dst} = do_binary(Src0, Dst0), + FixSrc ++ FixDst ++ [I#move{src=Src,dst=Dst}]. + +-ifdef(HIPE_AMD64). +do_move64(I) -> + #move64{dst=Dst} = I, + case is_mem_opnd(Dst) of + false -> + [I]; + true -> + Reg = hipe_amd64_registers:temp1(), + NewDst = clone(Dst, Reg), + [I#move64{dst=NewDst}, hipe_x86:mk_move(NewDst, Dst)] + end. +-else. +do_move64(I) -> exit({?MODULE, I}). +-endif. + +do_movx(I) -> + {{FixSrc, Src}, {FixDst, Dst}} = + case I of + #movsx{src=Src0,dst=Dst0} -> + {fix_src_operand(Src0), fix_dst_operand(Dst0)}; + #movzx{src=Src0,dst=Dst0} -> + {fix_src_operand(Src0), fix_dst_operand(Dst0)} + end, + Reg = ?HIPE_X86_REGISTERS:temp0(), + Dst2 = clone(Dst, Reg), + I2 = case is_mem_opnd(Dst) of + true -> + Reg = ?HIPE_X86_REGISTERS:temp0(), + Dst2 = clone(Dst, Reg), + case I of + #movsx{} -> + [hipe_x86:mk_movsx(Src, Dst2), hipe_x86:mk_move(Dst2, Dst)]; + #movzx{} -> + [hipe_x86:mk_movzx(Src, Dst2), hipe_x86:mk_move(Dst2, Dst)] + end; + false -> + case I of + #movsx{} -> + [hipe_x86:mk_movsx(Src, Dst)]; + #movzx{} -> + [hipe_x86:mk_movzx(Src, Dst)] + end + end, + FixSrc ++ FixDst ++ I2. + + +%%% Fix a fmove op. +%% conv_to_float +do_fmove(I=#fmove{src=#x86_temp{type=untagged}, + dst=#x86_temp{type=double}}) -> + #fmove{src=Src0,dst=Dst0} = I, + Src = clone(Src0, ?HIPE_X86_REGISTERS:temp0()), + Dst = clone(Dst0, ?HIPE_X86_REGISTERS:temp1()), + [hipe_x86:mk_move(Src0, Src), + I#fmove{src=Src, dst=Dst}, + hipe_x86:mk_fmove(Dst, Dst0)]; +%% fmove +do_fmove(I) -> + #fmove{src=Src0,dst=Dst0} = I, + {FixSrc, Src, FixDst, Dst} = do_binary(Src0, Dst0), + FixSrc ++ FixDst ++ [I#fmove{src=Src,dst=Dst}]. + +do_fp_unop(I) -> + #fp_unop{arg=Arg} = I, + case is_mem_opnd(Arg) of + false -> + [I]; + true -> + Reg = ?HIPE_X86_REGISTERS:temp1(), + NewArg = clone(Arg, Reg), + [hipe_x86:mk_fmove(Arg, NewArg), + I#fp_unop{arg=NewArg}, + hipe_x86:mk_fmove(NewArg, Arg)] + end. + +do_fp_binop(I) -> + #fp_binop{src=Src0, dst=Dst0} = I, + {FixSrc, Src} = fix_src_operand(Src0), + {FixDst, Dst} = fix_dst_operand(Dst0), + Reg = ?HIPE_X86_REGISTERS:temp1(), + Dst2 = clone(Dst, Reg), + FixSrc ++ FixDst ++ [hipe_x86:mk_fmove(Dst, Dst2), + I#fp_binop{src=Src, dst=Dst2}, + hipe_x86:mk_fmove(Dst2, Dst)]. + +do_shift(I) -> + #shift{src=Src0,dst=Dst0} = I, + {FixDst, Dst} = fix_dst_operand(Dst0), + Reg = ?HIPE_X86_REGISTERS:?ECX(), + case Src0 of + #x86_imm{} -> + FixDst ++ [I#shift{dst=Dst}]; + #x86_temp{reg=Reg} -> + FixDst ++ [I#shift{dst=Dst}] + end. + +%%% Fix the operands of a binary op. +%%% 1. remove pseudos from any explicit memory operands +%%% 2. if both operands are (implicit or explicit) memory operands, +%%% move src to a reg and use reg as src in the original insn + +do_binary(Src0, Dst0) -> + {FixSrc, Src} = fix_src_operand(Src0), + {FixDst, Dst} = fix_dst_operand(Dst0), + {FixSrc3, Src3} = + case is_mem_opnd(Src) of + false -> + {FixSrc, Src}; + true -> + case is_mem_opnd(Dst) of + false -> + {FixSrc, Src}; + true -> + Reg = ?HIPE_X86_REGISTERS:temp0(), + Src2 = clone(Src, Reg), + FixSrc2 = FixSrc ++ [mk_move(Src, Src2)], + {FixSrc2, Src2} + end + end, + {FixSrc3, Src3, FixDst, Dst}. + +%%% Fix any x86_mem operand to not refer to any pseudos. +%%% The fixup may use additional instructions and registers. +%%% 'src' operands may clobber '%temp0'. +%%% 'dst' operands may clobber '%temp1'. + +fix_src_operand(Opnd) -> + fix_mem_operand(Opnd, ?HIPE_X86_REGISTERS:temp0()). + +fix_dst_operand(Opnd) -> + fix_mem_operand(Opnd, ?HIPE_X86_REGISTERS:temp1()). + +fix_mem_operand(Opnd, Reg) -> % -> {[fixupcode], newop} + case Opnd of + #x86_mem{base=Base,off=Off} -> + case is_mem_opnd(Base) of + false -> + case src_is_pseudo(Off) of + false -> + {[], Opnd}; + true -> % pseudo(reg) + Temp = clone(Off, Reg), + {[hipe_x86:mk_move(Off, Temp)], + Opnd#x86_mem{off=Temp}} + end; + true -> + Temp = clone(Base, Reg), + case src_is_pseudo(Off) of + false -> % imm/reg(pseudo) + {[hipe_x86:mk_move(Base, Temp)], + Opnd#x86_mem{base=Temp}}; + true -> % pseudo1(pseudo0) + {[hipe_x86:mk_move(Base, Temp), + hipe_x86:mk_alu('add', Off, Temp)], + Opnd#x86_mem{base=Temp, off=hipe_x86:mk_imm(0)}} + end + end; + _ -> + {[], Opnd} + end. + +%%% Check if an operand denotes a memory cell (mem or pseudo). + +is_mem_opnd(Opnd) -> + case Opnd of + #x86_mem{} -> true; + #x86_temp{} -> temp_is_pseudo(Opnd); + _ -> false + end. + +%%% Check if an operand is a pseudo-Temp. + +src_is_pseudo(Src) -> + case hipe_x86:is_temp(Src) of + true -> temp_is_pseudo(Src); + false -> false + end. + +temp_is_pseudo(Temp) -> + not(?HIPE_X86_REGISTERS:is_precoloured(hipe_x86:temp_reg(Temp))). + +%%% Make Reg a clone of Dst (attach Dst's type to Reg). + +clone(Dst, Reg) -> + Type = + case Dst of + #x86_mem{} -> hipe_x86:mem_type(Dst); + #x86_temp{} -> hipe_x86:temp_type(Dst) + end, + hipe_x86:mk_temp(Reg, Type). + +mk_move(Src, Dst=#x86_temp{type=double}) -> + hipe_x86:mk_fmove(Src, Dst); +mk_move(Src, Dst) -> + hipe_x86:mk_move(Src, Dst). diff --git a/lib/hipe/x86/hipe_x86_ra_postconditions.erl b/lib/hipe/x86/hipe_x86_ra_postconditions.erl new file mode 100644 index 0000000000..0b70764daf --- /dev/null +++ b/lib/hipe/x86/hipe_x86_ra_postconditions.erl @@ -0,0 +1,452 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2001-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +-ifdef(HIPE_AMD64). +-define(HIPE_X86_RA_POSTCONDITIONS, hipe_amd64_ra_postconditions). +-define(HIPE_X86_REGISTERS, hipe_amd64_registers). +-define(HIPE_X86_SPECIFIC, hipe_amd64_specific). +-define(ECX, rcx). +-else. +-define(HIPE_X86_RA_POSTCONDITIONS, hipe_x86_ra_postconditions). +-define(HIPE_X86_REGISTERS, hipe_x86_registers). +-define(HIPE_X86_SPECIFIC, hipe_x86_specific). +-define(ECX, ecx). +-endif. + +-module(?HIPE_X86_RA_POSTCONDITIONS). + +-export([check_and_rewrite/3]). + +-include("../x86/hipe_x86.hrl"). +-define(HIPE_INSTRUMENT_COMPILER, true). +-include("../main/hipe.hrl"). +-define(count_temp(T), ?cons_counter(counter_mfa_mem_temps, T)). + +check_and_rewrite(Defun, Coloring, Strategy) -> + %% io:format("Converting\n"), + TempMap = hipe_temp_map:cols2tuple(Coloring, ?HIPE_X86_SPECIFIC), + %% io:format("Rewriting\n"), + #defun{code=Code0} = Defun, + {Code1, DidSpill} = do_insns(Code0, TempMap, Strategy, [], false), + {Defun#defun{code=Code1,var_range={0,hipe_gensym:get_var(x86)}}, + DidSpill}. + +do_insns([I|Insns], TempMap, Strategy, Accum, DidSpill0) -> + {NewIs, DidSpill1} = do_insn(I, TempMap, Strategy), + do_insns(Insns, TempMap, Strategy, lists:reverse(NewIs, Accum), DidSpill0 or DidSpill1); +do_insns([], _TempMap, _Strategy, Accum, DidSpill) -> + {lists:reverse(Accum), DidSpill}. + +do_insn(I, TempMap, Strategy) -> % Insn -> {Insn list, DidSpill} + case I of + #alu{} -> + do_alu(I, TempMap, Strategy); + #cmp{} -> + do_cmp(I, TempMap, Strategy); + #imul{} -> + do_imul(I, TempMap, Strategy); + #jmp_switch{} -> + do_jmp_switch(I, TempMap, Strategy); + #lea{} -> + do_lea(I, TempMap, Strategy); + #move{} -> + do_move(I, TempMap, Strategy); + #move64{} -> + do_move64(I, TempMap, Strategy); + #movsx{} -> + do_movx(I, TempMap, Strategy); + #movzx{} -> + do_movx(I, TempMap, Strategy); + #fmove{} -> + do_fmove(I, TempMap, Strategy); + #shift{} -> + do_shift(I, TempMap, Strategy); + _ -> + %% comment, jmp*, label, pseudo_call, pseudo_jcc, pseudo_tailcall, + %% pseudo_tailcall_prepare, push, ret + {[I], false} + end. + +%%% Fix an alu op. + +do_alu(I, TempMap, Strategy) -> + #alu{src=Src0,dst=Dst0} = I, + {FixSrc,Src,FixDst,Dst,DidSpill} = + do_binary(Src0, Dst0, TempMap, Strategy), + {FixSrc ++ FixDst ++ [I#alu{src=Src,dst=Dst}], DidSpill}. + +%%% Fix a cmp op. + +do_cmp(I, TempMap, Strategy) -> + #cmp{src=Src0,dst=Dst0} = I, + {FixSrc, Src, FixDst, Dst, DidSpill} = + do_binary(Src0, Dst0, TempMap, Strategy), + {FixSrc ++ FixDst ++ [I#cmp{src=Src,dst=Dst}], DidSpill}. + +%%% Fix an imul op. + +do_imul(I, TempMap, Strategy) -> + #imul{imm_opt=ImmOpt,src=Src0,temp=Temp0} = I, + {FixSrc,Src,DidSpill1} = fix_src_operand(Src0, TempMap, Strategy), % temp1 + {FixTempSrc,Temp,FixTempDst,DidSpill2} = + case is_spilled(Temp0, TempMap) of + false -> + {[], Temp0, [], false}; + true -> + Reg = spill_temp0('untagged', Strategy), + {case ImmOpt of + [] -> [hipe_x86:mk_move(Temp0, Reg)]; % temp *= src + _ -> [] % temp = src * imm + end, + Reg, + [hipe_x86:mk_move(Reg, Temp0)], + true} + end, + {FixSrc ++ FixTempSrc ++ [I#imul{src=Src,temp=Temp}] ++ FixTempDst, + DidSpill1 or DidSpill2}. + +%%% Fix a jmp_switch op. + +-ifdef(HIPE_AMD64). +do_jmp_switch(I, TempMap, Strategy) -> + #jmp_switch{temp=Temp, jtab=Tab} = I, + case is_spilled(Temp, TempMap) of + false -> + case is_spilled(Tab, TempMap) of + false -> + {[I], false}; + true -> + NewTab = spill_temp('untagged', Strategy), + {[hipe_x86:mk_move(Tab, NewTab), I#jmp_switch{jtab=Tab}], + true} + end; + true -> + case is_spilled(Tab, TempMap) of + false -> + NewTmp = spill_temp('untagged', Strategy), + {[hipe_x86:mk_move(Temp, NewTmp), I#jmp_switch{temp=NewTmp}], + true}; + true -> + NewTmp = spill_temp('untagged', Strategy), + NewTab = spill_temp0('untagged', Strategy), + {[hipe_x86:mk_move(Temp, NewTmp), + hipe_x86:mk_move(Tab, NewTab), + I#jmp_switch{temp=NewTmp, jtab=NewTab}], + true} + end + end. +-else. % not AMD64 +do_jmp_switch(I, TempMap, Strategy) -> + #jmp_switch{temp=Temp} = I, + case is_spilled(Temp, TempMap) of + false -> + {[I], false}; + true -> + NewTmp = spill_temp('untagged', Strategy), + {[hipe_x86:mk_move(Temp, NewTmp), I#jmp_switch{temp=NewTmp}], + true} + end. +-endif. % not AMD64 + +%%% Fix a lea op. + +do_lea(I, TempMap, Strategy) -> + #lea{temp=Temp} = I, + case is_spilled(Temp, TempMap) of + false -> + {[I], false}; + true -> + NewTmp = spill_temp('untagged', Strategy), + {[I#lea{temp=NewTmp}, hipe_x86:mk_move(NewTmp, Temp)], + true} + end. + +%%% Fix a move op. + +do_move(I, TempMap, Strategy) -> + #move{src=Src0,dst=Dst0} = I, + {FixSrc, Src, FixDst, Dst, DidSpill} = + do_check_byte_move(Src0, Dst0, TempMap, Strategy), + {FixSrc ++ FixDst ++ [I#move{src=Src,dst=Dst}], + DidSpill}. + +-ifdef(HIPE_AMD64). + +%%% AMD64 has no issues with byte moves. +do_check_byte_move(Src0, Dst0, TempMap, Strategy) -> + do_binary(Src0, Dst0, TempMap, Strategy). + +-else. % not AMD64 + +%%% x86 can only do byte moves to a subset of the integer registers. +do_check_byte_move(Src0, Dst0, TempMap, Strategy) -> + case Dst0 of + #x86_mem{type=byte} -> + do_byte_move(Src0, Dst0, TempMap, Strategy); + _ -> + do_binary(Src0, Dst0, TempMap, Strategy) + end. + +do_byte_move(Src0, Dst0, TempMap, Strategy) -> + {FixSrc, Src, DidSpill1} = fix_src_operand(Src0, TempMap, Strategy), + {FixDst, Dst, DidSpill2} = fix_dst_operand(Dst0, TempMap, Strategy), + Reg = hipe_x86_registers:eax(), + {FixSrc3, Src3} = % XXX: this just checks Src, the result is known! + case Src of + #x86_imm{} -> + {FixSrc, Src}; + #x86_temp{reg=Reg} -> % small moves must start from reg 1->4 + {FixSrc, Src} % so variable sources are always put in eax + end, + {FixSrc3, Src3, FixDst, Dst, + DidSpill2 or DidSpill1}. + +-endif. % not AMD64 + +%%% Fix a move64 op. + +do_move64(I, TempMap, Strategy) -> + #move64{dst=Dst} = I, + case is_spilled(Dst, TempMap) of + false -> + {[I], false}; + true -> + Reg = clone(Dst, Strategy), + {[I#move64{dst=Reg}, hipe_x86:mk_move(Reg, Dst)], true} + end. + +%%% Fix a movx op. + +do_movx(I, TempMap, Strategy) -> + {{FixSrc, Src, DidSpill1}, {FixDst, Dst, DidSpill2}} = + case I of + #movsx{src=Src0,dst=Dst0} -> + {fix_src_operand(Src0, TempMap, Strategy), + fix_dst_operand(Dst0, TempMap, Strategy)}; + #movzx{src=Src0,dst=Dst0} -> + {fix_src_operand(Src0, TempMap, Strategy), + fix_dst_operand(Dst0, TempMap, Strategy)} + end, + {I3, DidSpill3} = + case is_spilled(Dst, TempMap) of + false -> + I2 = case I of + #movsx{} -> + [hipe_x86:mk_movsx(Src, Dst)]; + #movzx{} -> + [hipe_x86:mk_movzx(Src, Dst)] + end, + {I2, false}; + true -> + Dst2 = clone(Dst, Strategy), + I2 = + case I of + #movsx{} -> + [hipe_x86:mk_movsx(Src, Dst2), hipe_x86:mk_move(Dst2, Dst)]; + #movzx{} -> + [hipe_x86:mk_movzx(Src, Dst2), hipe_x86:mk_move(Dst2, Dst)] + end, + {I2, true} + end, + {FixSrc++FixDst++I3, + DidSpill3 or DidSpill2 or DidSpill1}. + +%%% Fix an fmove op. + +do_fmove(I, TempMap, Strategy) -> + #fmove{src=Src0,dst=Dst0} = I, + {FixSrc, Src, DidSpill1} = fix_src_operand(Src0, TempMap, Strategy), + {FixDst, Dst, DidSpill2} = fix_dst_operand(Dst0, TempMap, Strategy), + %% fmoves from memory position to memory position is handled + %% by the f.p. register allocator. + {FixSrc ++ FixDst ++ [I#fmove{src=Src,dst=Dst}], + DidSpill1 or DidSpill2}. + +%%% Fix a shift operation. +%%% 1. remove pseudos from any explicit memory operands +%%% 2. if the source is a register or memory position +%%% make sure to move it to %ecx + +do_shift(I, TempMap, Strategy) -> + #shift{src=Src0,dst=Dst0} = I, + {FixDst, Dst, DidSpill} = fix_dst_operand(Dst0, TempMap, Strategy), + Reg = ?HIPE_X86_REGISTERS:?ECX(), + case Src0 of + #x86_imm{} -> + {FixDst ++ [I#shift{dst=Dst}], DidSpill}; + #x86_temp{reg=Reg} -> + {FixDst ++ [I#shift{dst=Dst}], DidSpill} + end. + +%%% Fix the operands of a binary op. +%%% 1. remove pseudos from any explicit memory operands +%%% 2. if both operands are (implicit or explicit) memory operands, +%%% move src to a reg and use reg as src in the original insn + +do_binary(Src0, Dst0, TempMap, Strategy) -> + {FixSrc, Src, DidSpill1} = fix_src_operand(Src0, TempMap, Strategy), + {FixDst, Dst, DidSpill2} = fix_dst_operand(Dst0, TempMap, Strategy), + {FixSrc3, Src3, DidSpill3} = + case is_mem_opnd(Src, TempMap) of + false -> + {FixSrc, Src, false}; + true -> + case is_mem_opnd(Dst, TempMap) of + false -> + {FixSrc, Src, false}; + true -> + Src2 = clone(Src, Strategy), + FixSrc2 = FixSrc ++ [hipe_x86:mk_move(Src, Src2)], + {FixSrc2, Src2, true} + end + end, + {FixSrc3, Src3, FixDst, Dst, + DidSpill3 or DidSpill2 or DidSpill1}. + +%%% Fix any x86_mem operand to not refer to any spilled temps. + +fix_src_operand(Opnd, TmpMap, Strategy) -> + fix_mem_operand(Opnd, TmpMap, temp1(Strategy)). + +temp1('normal') -> []; +temp1('linearscan') -> ?HIPE_X86_REGISTERS:temp1(). + +fix_dst_operand(Opnd, TempMap, Strategy) -> + fix_mem_operand(Opnd, TempMap, temp0(Strategy)). + +temp0('normal') -> []; +temp0('linearscan') -> ?HIPE_X86_REGISTERS:temp0(). + +fix_mem_operand(Opnd, TempMap, RegOpt) -> % -> {[fixupcode], newop, DidSpill} + case Opnd of + #x86_mem{base=Base,off=Off} -> + case is_mem_opnd(Base, TempMap) of + false -> + case is_mem_opnd(Off, TempMap) of + false -> + {[], Opnd, false}; + true -> + Temp = clone2(Off, RegOpt), + {[hipe_x86:mk_move(Off, Temp)], + Opnd#x86_mem{off=Temp}, + true} + end; + true -> + Temp = clone2(Base, RegOpt), + case is_mem_opnd(Off, TempMap) of + false -> % imm/reg(pseudo) + {[hipe_x86:mk_move(Base, Temp)], + Opnd#x86_mem{base=Temp}, + true}; + true -> % pseudo(pseudo) + {[hipe_x86:mk_move(Base, Temp), + hipe_x86:mk_alu('add', Off, Temp)], + Opnd#x86_mem{base=Temp, off=hipe_x86:mk_imm(0)}, + true} + end + end; + _ -> + {[], Opnd, false} + end. + +%%% Check if an operand denotes a memory cell (mem or pseudo). + +is_mem_opnd(Opnd, TempMap) -> + R = + case Opnd of + #x86_mem{} -> true; + #x86_temp{} -> + Reg = hipe_x86:temp_reg(Opnd), + case hipe_x86:temp_is_allocatable(Opnd) of + true -> + case tuple_size(TempMap) > Reg of + true -> + case + hipe_temp_map:is_spilled(Reg, TempMap) of + true -> + ?count_temp(Reg), + true; + false -> false + end; + _ -> + %% impossible, but was true in ls post and false in normal post + exit({?MODULE,is_mem_opnd,Reg}), + false + end; + false -> true + end; + _ -> false + end, + %% io:format("Op ~w mem: ~w\n",[Opnd,R]), + R. + +%%% Check if an operand is a spilled Temp. + +is_spilled(Temp, TempMap) -> + case hipe_x86:temp_is_allocatable(Temp) of + true -> + Reg = hipe_x86:temp_reg(Temp), + case tuple_size(TempMap) > Reg of + true -> + case hipe_temp_map:is_spilled(Reg, TempMap) of + true -> + ?count_temp(Reg), + true; + false -> + false + end; + false -> + false + end; + false -> true + end. + +%%% Make Reg a clone of Dst (attach Dst's type to Reg). + +clone(Dst, Strategy) -> + Type = + case Dst of + #x86_mem{} -> hipe_x86:mem_type(Dst); + #x86_temp{} -> hipe_x86:temp_type(Dst) + end, + spill_temp(Type, Strategy). + +spill_temp0(Type, 'normal') -> + hipe_x86:mk_new_temp(Type); +spill_temp0(Type, 'linearscan') -> + hipe_x86:mk_temp(?HIPE_X86_REGISTERS:temp0(), Type). + +spill_temp(Type, 'normal') -> + hipe_x86:mk_new_temp(Type); +spill_temp(Type, 'linearscan') -> + hipe_x86:mk_temp(?HIPE_X86_REGISTERS:temp1(), Type). + +%%% Make a certain reg into a clone of Dst + +clone2(Dst, RegOpt) -> + Type = + case Dst of + #x86_mem{} -> hipe_x86:mem_type(Dst); + #x86_temp{} -> hipe_x86:temp_type(Dst) + end, + case RegOpt of + [] -> hipe_x86:mk_new_temp(Type); + Reg -> hipe_x86:mk_temp(Reg, Type) + end. diff --git a/lib/hipe/x86/hipe_x86_ra_x87_ls.erl b/lib/hipe/x86/hipe_x86_ra_x87_ls.erl new file mode 100644 index 0000000000..6bdb08c6fb --- /dev/null +++ b/lib/hipe/x86/hipe_x86_ra_x87_ls.erl @@ -0,0 +1,63 @@ +%% $Id$ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2006-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +%% Linear Scan register allocator for x87 + +-ifdef(HIPE_AMD64). +-define(HIPE_X86_RA_X87_LS, hipe_amd64_ra_x87_ls). +-define(HIPE_X86_SPECIFIC_X87, hipe_amd64_specific_x87). +-define(HIPE_X86_PP, hipe_amd64_pp). +-define(HIPE_X86_RA_LS, hipe_amd64_ra_ls). +-else. +-define(HIPE_X86_RA_X87_LS, hipe_x86_ra_x87_ls). +-define(HIPE_X86_SPECIFIC_X87, hipe_x86_specific_x87). +-define(HIPE_X86_PP, hipe_x86_pp). +-define(HIPE_X86_RA_LS, hipe_x86_ra_ls). +-endif. + +-module(?HIPE_X86_RA_X87_LS). +-export([ra/2]). + +%%-define(DEBUG,1). + +-define(HIPE_INSTRUMENT_COMPILER, false). %% Turn off instrumentation. +-include("../main/hipe.hrl"). + +ra(Defun, Options) -> + ?inc_counter(ra_calls_counter,1), + CFG = hipe_x86_cfg:init(Defun), + %% ?inc_counter(ra_caller_saves_counter,count_caller_saves(CFG)), + SpillIndex = 0, + SpillLimit = ?HIPE_X86_SPECIFIC_X87:number_of_temporaries(CFG), + ?inc_counter(bbs_counter, length(hipe_x86_cfg:labels(CFG))), + + ?inc_counter(ra_iteration_counter,1), + %% ?HIPE_X86_PP:pp(Defun), + Cfg = hipe_x86_cfg:init(Defun), % XXX: didn't we just compute this above? + + {Coloring,NewSpillIndex} = + ?HIPE_X86_RA_LS:regalloc(Cfg, + ?HIPE_X86_SPECIFIC_X87:allocatable(), + [hipe_x86_cfg:start_label(Cfg)], + SpillIndex, SpillLimit, Options, + ?HIPE_X86_SPECIFIC_X87), + + ?add_spills(Options, NewSpillIndex), + {Defun, Coloring, NewSpillIndex}. diff --git a/lib/hipe/x86/hipe_x86_registers.erl b/lib/hipe/x86/hipe_x86_registers.erl new file mode 100644 index 0000000000..1cfa095995 --- /dev/null +++ b/lib/hipe/x86/hipe_x86_registers.erl @@ -0,0 +1,254 @@ +%%% +%%% %CopyrightBegin% +%%% +%%% Copyright Ericsson AB 2001-2009. All Rights Reserved. +%%% +%%% The contents of this file are subject to the Erlang Public License, +%%% Version 1.1, (the "License"); you may not use this file except in +%%% compliance with the License. You should have received a copy of the +%%% Erlang Public License along with this software. If not, it can be +%%% retrieved online at http://www.erlang.org/. +%%% +%%% Software distributed under the License is distributed on an "AS IS" +%%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%%% the License for the specific language governing rights and limitations +%%% under the License. +%%% +%%% %CopyrightEnd% +%%% +%%% +%%% TODO: +%%% - Do we need a pseudo reg for the condition codes? + +-module(hipe_x86_registers). + +-export([reg_name/1, + first_virtual/0, + is_precoloured/1, + is_precoloured_x87/1, + all_precoloured/0, + eax/0, + ecx/0, + temp0/0, + temp1/0, + sp/0, + proc_pointer/0, + heap_limit/0, + fcalls/0, + proc_offset/1, + sp_limit_offset/0, + is_fixed/1, + %% fixed/0, + allocatable/0, + allocatable_x87/0, + nr_args/0, + arg/1, + is_arg/1, + args/1, + nr_rets/0, + ret/1, + call_clobbered/0, + tailcall_clobbered/0, + live_at_return/0, + float_size/0, + wordsize/0, + alignment/0]). + +-include("../rtl/hipe_literals.hrl"). + +-ifdef(X86_HP_IN_ESI). +-export([heap_pointer/0]). +-endif. + +-define(EAX, 0). +-define(ECX, 1). +-define(EDX, 2). +-define(EBX, 3). +-define(ESP, 4). +-define(EBP, 5). +-define(ESI, 6). +-define(EDI, 7). +-define(FCALLS, 8). % proc field alias +-define(HEAP_LIMIT, 9). % proc field alias +-define(LAST_PRECOLOURED, 9). + +-define(ARG0, ?EAX). +-define(ARG1, ?EDX). +-define(ARG2, ?ECX). +-define(ARG3, ?EBX). +-define(ARG4, ?EDI). + +-define(RET0, ?EAX). +-define(RET1, ?EDX). +-define(RET2, ?ECX). +-define(RET3, ?EBX). +-define(RET4, ?EDI). + +-define(TEMP0, ?EBX). % XXX: was EAX +-define(TEMP1, ?EDI). % XXX: was EDX then EDI + +-define(PROC_POINTER, ?EBP). + +reg_name(R) -> + case R of + ?EAX -> "%eax"; + ?ECX -> "%ecx"; + ?EDX -> "%edx"; + ?EBX -> "%ebx"; + ?ESP -> "%esp"; + ?EBP -> "%ebp"; + ?ESI -> "%esi"; + ?EDI -> "%edi"; + ?FCALLS -> "%fcalls"; + ?HEAP_LIMIT -> "%hplim"; + Other -> "%r" ++ integer_to_list(Other) + end. + +first_virtual() -> ?LAST_PRECOLOURED + 1. + +is_precoloured(X) -> X =< ?LAST_PRECOLOURED. + +is_precoloured_x87(X) -> X =< 6. + +all_precoloured() -> + [?EAX, + ?ECX, + ?EDX, + ?EBX, + ?ESP, + ?EBP, + ?ESI, + ?EDI, + ?FCALLS, + ?HEAP_LIMIT]. + +eax() -> ?EAX. +ecx() -> ?ECX. +temp0() -> ?TEMP0. +temp1() -> ?TEMP1. +sp() -> ?ESP. +proc_pointer() -> ?PROC_POINTER. +fcalls() -> ?FCALLS. +heap_limit() -> ?HEAP_LIMIT. + +-ifdef(X86_HP_IN_ESI). +-define(ESI_IS_FIXED,1). +-define(HEAP_POINTER, ?ESI). +heap_pointer() -> ?HEAP_POINTER. +is_heap_pointer(?HEAP_POINTER) -> true; +is_heap_pointer(_) -> false. +-define(LIST_HP_FIXED,[?HEAP_POINTER]). +-define(LIST_HP_LIVE_AT_RETURN,[{?HEAP_POINTER,untagged}]). +-else. +is_heap_pointer(_) -> false. +-define(LIST_HP_FIXED,[]). +-define(LIST_HP_LIVE_AT_RETURN,[]). +-endif. + +-ifdef(ESI_IS_FIXED). +-define(LIST_ESI_ALLOCATABLE,[]). +-define(LIST_ESI_CALL_CLOBBERED,[]). +-else. +-define(LIST_ESI_ALLOCATABLE,[?ESI]). +-define(LIST_ESI_CALL_CLOBBERED,[{?ESI,tagged},{?ESI,untagged}]). +-endif. + +proc_offset(?FCALLS) -> ?P_FCALLS; +proc_offset(?HEAP_LIMIT) -> ?P_HP_LIMIT; +proc_offset(_) -> false. + +sp_limit_offset() -> ?P_NSP_LIMIT. + +is_fixed(?ESP) -> true; +is_fixed(?PROC_POINTER) -> true; +is_fixed(?FCALLS) -> true; +is_fixed(?HEAP_LIMIT) -> true; +is_fixed(R) -> is_heap_pointer(R). + +%% fixed() -> +%% [?ESP, ?PROC_POINTER, ?FCALLS, ?HEAP_LIMIT | ?LIST_HP_FIXED]. + +allocatable() -> + [?EDX, ?ECX, ?EBX, ?EAX, ?EDI| ?LIST_ESI_ALLOCATABLE]. + +allocatable_x87() -> + [0,1,2,3,4,5,6]. + +nr_args() -> ?X86_NR_ARG_REGS. + +arg(N) -> + if N < ?X86_NR_ARG_REGS -> + case N of + 0 -> ?ARG0; + 1 -> ?ARG1; + 2 -> ?ARG2; + 3 -> ?ARG3; + 4 -> ?ARG4; + _ -> exit({?MODULE, arg, N}) + end; + true -> + exit({?MODULE, arg, N}) + end. + +is_arg(R) -> + case R of + ?ARG0 -> ?X86_NR_ARG_REGS > 0; + ?ARG1 -> ?X86_NR_ARG_REGS > 1; + ?ARG2 -> ?X86_NR_ARG_REGS > 2; + ?ARG3 -> ?X86_NR_ARG_REGS > 3; + ?ARG4 -> ?X86_NR_ARG_REGS > 4; + _ -> false + end. + +args(Arity) when is_integer(Arity), Arity >= 0 -> + N = erlang:min(Arity, ?X86_NR_ARG_REGS), + args(N-1, []). + +args(I, Rest) when I < 0 -> Rest; +args(I, Rest) -> args(I-1, [arg(I) | Rest]). + +nr_rets() -> ?X86_NR_RET_REGS. + +ret(N) -> + if N < ?X86_NR_RET_REGS -> + case N of + 0 -> ?RET0; + 1 -> ?RET1; + 2 -> ?RET2; + 3 -> ?RET3; + 4 -> ?RET4; + _ -> exit({?MODULE, ret, N}) + end; + true -> + exit({?MODULE, ret, N}) + end. + +call_clobbered() -> + [{?EAX,tagged},{?EAX,untagged}, % does the RA strip the type or not? + {?EDX,tagged},{?EDX,untagged}, + {?ECX,tagged},{?ECX,untagged}, + {?EBX,tagged},{?EBX,untagged}, + {?EDI,tagged},{?EDI,untagged} + | ?LIST_ESI_CALL_CLOBBERED] ++ all_x87_pseudos(). + +tailcall_clobbered() -> % tailcall crapola needs two temps + [{?TEMP0,tagged},{?TEMP0,untagged}, + {?TEMP1,tagged},{?TEMP1,untagged}] ++ all_x87_pseudos(). + +all_x87_pseudos() -> + [{0,double}, {1,double}, {2,double}, {3,double}, + {4,double}, {5,double}, {6,double}]. + +live_at_return() -> + [{?ESP,untagged} + ,{?PROC_POINTER,untagged} + ,{?FCALLS,untagged} + ,{?HEAP_LIMIT,untagged} + | ?LIST_HP_LIVE_AT_RETURN + ]. + +alignment() -> 4. + +float_size() -> 8. + +wordsize() -> 4. diff --git a/lib/hipe/x86/hipe_x86_spill_restore.erl b/lib/hipe/x86/hipe_x86_spill_restore.erl new file mode 100644 index 0000000000..e60c446e17 --- /dev/null +++ b/lib/hipe/x86/hipe_x86_spill_restore.erl @@ -0,0 +1,345 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2008-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%% ==================================================================== +%% Authors : Dogan Yazar and Erdem Aksu (KT2 project of 2008) +%% ==================================================================== + +-ifdef(HIPE_AMD64). +-define(HIPE_X86_SPILL_RESTORE, hipe_amd64_spill_restore). +-define(HIPE_X86_LIVENESS, hipe_amd64_liveness). +-define(HIPE_X86_SPECIFIC, hipe_amd64_specific). +-define(HIPE_X86_REGISTERS, hipe_amd64_registers). +-define(X86STR, "amd64"). +-else. +-define(HIPE_X86_SPILL_RESTORE, hipe_x86_spill_restore). +-define(HIPE_X86_LIVENESS, hipe_x86_liveness). +-define(HIPE_X86_SPECIFIC, hipe_x86_specific). +-define(HIPE_X86_REGISTERS, hipe_x86_registers). +-define(X86STR, "x86"). +-endif. + +-module(?HIPE_X86_SPILL_RESTORE). + +-export([spill_restore/2]). + +%% controls which set library is used to keep temp variables. +-define(SET_MODULE, ordsets). + +%% Turn on instrumentation. +-define(HIPE_INSTRUMENT_COMPILER, true). + +-include("../main/hipe.hrl"). +-include("../x86/hipe_x86.hrl"). % Added for the definition of #pseudo_call{} +-include("../flow/cfg.hrl"). % Added for the definition of #cfg{} + +%% Main function +spill_restore(Defun, Options) -> + CFG = ?option_time(firstPass(Defun), ?X86STR" First Pass", Options), + CFGFinal = ?option_time(secondPass(CFG), ?X86STR" Second Pass", Options), + hipe_x86_cfg:linearise(CFGFinal). + +%% Performs the first pass of the algorithm. +%% By working bottom up, introduce the pseudo_spills. +firstPass(Defun) -> + CFG0 = ?HIPE_X86_SPECIFIC:defun_to_cfg(Defun), + %% get the labels bottom up + Labels = hipe_x86_cfg:postorder(CFG0), + Liveness = ?HIPE_X86_LIVENESS:analyse(CFG0), + %% spill around the function will be introduced below the move + %% formals, so get all labels except it. + LabelsExceptMoveFormals = lists:sublist(Labels, length(Labels)-1), + %% all work is done by the helper function firstPassHelper + %% saveTree keeps the all newly introduced spills. Keys are the labels. + {CFG1, SaveTree} = firstPassHelper(LabelsExceptMoveFormals, Liveness, CFG0), + case hipe_x86_cfg:reverse_postorder(CFG0) of + [Label1, Label2|_] -> + SaveTreeElement = saveTreeLookup(Label2, SaveTree), + %% FilteredSaveTreeElement is the to be spilled temps around the function call. + %% They are spilled just before move formals + FilteredSaveTreeElement = [Temp || Temp <- SaveTreeElement, temp_is_pseudo(Temp)], + Block = hipe_x86_cfg:bb(CFG1, Label1), + Code = hipe_bb:code(Block), + %% The following statements are tedious but work ok. + %% Put spills between move formals and the jump code. + %% This disgusting thing is done because spills should be + %% introduced after move formals. + %% Another solution may be to introduce another block. + MoveCodes = lists:sublist(Code, length(Code)-1), + JumpCode = lists:last(Code), + hipe_x86_cfg:bb_add(CFG1, Label1, hipe_bb:mk_bb(MoveCodes ++ [hipe_x86:mk_pseudo_spill(FilteredSaveTreeElement)] ++ [JumpCode])); + _ -> + CFG1 + end. + +%% helper function of firstPass + +%% processes all labels recursively and decides the spills to be put. +%% spills are introduced before each function call (pseudo_call) as well as +%% global spill is found +firstPassHelper(Labels, Liveness, CFG) -> + firstPassHelper(Labels, Liveness, CFG, gb_trees:empty()). + +firstPassHelper([Label|Labels], Liveness, CFG, SaveTree) -> + LiveOut = from_list(?HIPE_X86_LIVENESS:liveout(Liveness, Label)), + Block = hipe_x86_cfg:bb(CFG, Label), + Code = hipe_bb:code(Block), + Succ = hipe_x86_cfg:succ(CFG, Label), + IntersectedSaveList = findIntersectedSaveList(Succ,SaveTree), + %% call firstPassDoBlock which will give the updated block + %% code(including spills) as well as Intersected Save List which + %% should be passed above blocks + {_,NewIntersectedList,NewCode} = + firstPassDoBlock(Code, LiveOut,IntersectedSaveList), + NewBlock = hipe_bb:code_update(Block, NewCode), + NewCFG = hipe_x86_cfg:bb_add(CFG, Label, NewBlock), + SizeOfSet = setSize(NewIntersectedList), + + %% if the Intersected Save List is not empty, insert it in the save tree. + if SizeOfSet =/= 0 -> + UpdatedSaveTree = gb_trees:insert(Label,NewIntersectedList,SaveTree), + firstPassHelper(Labels, Liveness, NewCFG,UpdatedSaveTree); + true -> + firstPassHelper(Labels, Liveness, NewCFG,SaveTree) + end; +firstPassHelper([], _, CFG, SaveTree) -> + {CFG, SaveTree}. + +%% handle each instruction in the block bottom up +firstPassDoBlock(Insts, LiveOut, IntersectedSaveList) -> + lists:foldr(fun firstPassDoInsn/2, {LiveOut,IntersectedSaveList,[]}, Insts). + +firstPassDoInsn(I, {LiveOut,IntersectedSaveList,PrevInsts} ) -> + case I of + #pseudo_call{} -> + do_pseudo_call(I, {LiveOut,IntersectedSaveList,PrevInsts}); + _ -> % other instructions + DefinedList = from_list( ?HIPE_X86_LIVENESS:defines(I)), + UsedList = from_list(?HIPE_X86_LIVENESS:uses(I)), + + NewLiveOut = subtract(union(LiveOut, UsedList), DefinedList), + NewIntersectedSaveList = subtract(IntersectedSaveList, DefinedList), + + {NewLiveOut, NewIntersectedSaveList, [I|PrevInsts]} + end. + +do_pseudo_call(I, {LiveOut,IntersectedSaveList,PrevInsts}) -> + LiveTemps = [Temp || Temp <- to_list(LiveOut), temp_is_pseudo(Temp)], + NewIntersectedSaveList = union(IntersectedSaveList, LiveOut), + {LiveOut, NewIntersectedSaveList, [hipe_x86:mk_pseudo_spill(LiveTemps), I | PrevInsts]}. + +findIntersectedSaveList(LabelList, SaveTree) -> + findIntersectedSaveList([saveTreeLookup(Label,SaveTree) || Label <- LabelList]). + +findIntersectedSaveList([]) -> + []; +findIntersectedSaveList([List1]) -> + List1; +findIntersectedSaveList([List1,List2|Rest]) -> + findIntersectedSaveList([intersection(List1, List2)|Rest]). + +saveTreeLookup(Label, SaveTree) -> + case gb_trees:lookup(Label, SaveTree) of + {value, SaveList} -> + SaveList; + _ -> + [] + end. + +%% Performs the second pass of the algoritm. +%% It basically eliminates the unnecessary spills and introduces restores. +%% Works top down +secondPass(CFG0) -> + Labels = hipe_x86_cfg:reverse_postorder(CFG0), + Liveness = ?HIPE_X86_LIVENESS:analyse(CFG0), + secondPassHelper(Labels,Liveness,CFG0). + +%% helper function of secondPass. + +%% recursively handle all labels given. +secondPassHelper(Labels, Liveness, CFG) -> + secondPassHelper(Labels, Liveness, CFG, gb_trees:empty(), CFG). + +%% AccumulatedCFG stands for the CFG that has restore edges incrementally. +%% UnmodifiedCFG is the CFG created after first pass. + +%% AccumulatedSaveTree is used to eliminate the unnecessary saves. The +%% saves (spills) in above blocks are traversed down (if still live +%% and not redefined) and redundant saves are eliminated in the lower +%% blocks. +%% For memory efficiency, it may be better not to maintain the +%% AccumulatedSaveTree but traverse the tree recursively and pass the +%% save lists to the childs individually. +%% But current approach may be faster even though it needs bigger memory. + +secondPassHelper([Label|RestOfLabels], Liveness, + AccumulatedCFG, AccumulatedSaveTree, UnmodifiedCFG) -> + LiveOut = ?HIPE_X86_LIVENESS:liveout(Liveness, Label), + Block = hipe_x86_cfg:bb(AccumulatedCFG, Label), + Code = hipe_bb:code(Block), + + %% UnmodifiedCFG is needed for getting the correct predecessors. + %% (i.e. not to get the restore edge blocks) + PredList = hipe_x86_cfg:pred(UnmodifiedCFG, Label), + %% find the spills coming from all the parents by intersecting + InitialAccumulatedSaveList = + findIntersectedSaveList(PredList, AccumulatedSaveTree), + AccumulatedSaveList = + keepLiveVarsInAccumSaveList(InitialAccumulatedSaveList, LiveOut), + + {NewCode, CFGUpdateWithRestores, NewAccumulatedSaveList} = + secondPassDoBlock(Label, Code, AccumulatedCFG, AccumulatedSaveList), + + UpdatedAccumulatedSaveTree = + gb_trees:insert(Label, NewAccumulatedSaveList, AccumulatedSaveTree), + NewBlock = hipe_bb:code_update(Block, NewCode), + NewCFG = hipe_x86_cfg:bb_add(CFGUpdateWithRestores, Label, NewBlock), + secondPassHelper(RestOfLabels, Liveness, NewCFG, + UpdatedAccumulatedSaveTree, UnmodifiedCFG); +secondPassHelper([], _, AccumulatedCFG, _, _) -> + AccumulatedCFG. + +secondPassDoBlock(CurrentLabel, Insts, CFG, AccumulatedSaveList) -> + {NewAccumulatedSaveList,NewInsts,_,_,CFGUpdateWithRestores} = + lists:foldl(fun secondPassDoInsn/2, {AccumulatedSaveList,[],[],CurrentLabel,CFG}, Insts), + {NewInsts, CFGUpdateWithRestores, NewAccumulatedSaveList}. + +secondPassDoInsn(I, {AccumulatedSaveList,PrevInsts,SpillList,CurrentLabel,CFG}) -> + case I of + #pseudo_spill{} -> + %% spill variables that are not accumulated from top down + %% (which are not already saved) + VariablesAlreadySaved = [X || {X,_} <- to_list(AccumulatedSaveList)], + VariablesToBeSpilled = I#pseudo_spill.args -- VariablesAlreadySaved, + NewSpillList = [{Temp, hipe_x86:mk_new_temp(Temp#x86_temp.type)} || Temp <- VariablesToBeSpilled], + %% update accumulated saved list by adding the newly spilled variables. + NewAccumulatedSaveList = union(AccumulatedSaveList, from_list(NewSpillList)), + {NewAccumulatedSaveList, PrevInsts ++ secondPassDoPseudoSpill(NewSpillList), NewSpillList, CurrentLabel, CFG}; + #pseudo_call{} -> + {CFGUpdateWithRestores, NewPseudoCall} = + secondPassDoPseudoCall(I, AccumulatedSaveList, CFG), + %% spill list is emptied after use + {AccumulatedSaveList, PrevInsts ++ [NewPseudoCall], CurrentLabel, [], CFGUpdateWithRestores}; + _ -> + %% remove the defined variables from the accumulated save + %% list since they need to be saved again in later occasions. + DefinedList = from_list(?HIPE_X86_LIVENESS:defines(I)), + NewAccumulatedSaveList = removeRedefVarsFromAccumSaveList(AccumulatedSaveList, DefinedList), + {NewAccumulatedSaveList, PrevInsts ++ [I], SpillList, CurrentLabel, CFG} + end. + +%% remove dead vars from accumulated save list so that they are not restored. +keepLiveVarsInAccumSaveList([], _) -> + []; +keepLiveVarsInAccumSaveList([{Var,Temp}|Rest], DefinedList) -> + IsDefined = is_element(Var, DefinedList), + case IsDefined of + true -> [{Var,Temp}|keepLiveVarsInAccumSaveList(Rest, DefinedList)]; + false -> keepLiveVarsInAccumSaveList(Rest, DefinedList) + end. + +%% remove the redefined variables from accumulated save list since +%% they are changed. +removeRedefVarsFromAccumSaveList([], _) -> + []; +removeRedefVarsFromAccumSaveList([{Var,Temp}|Rest], DefinedList) -> + IsDefined = is_element(Var, DefinedList), + case IsDefined of + true -> removeRedefVarsFromAccumSaveList(Rest, DefinedList); + false -> [{Var,Temp}|removeRedefVarsFromAccumSaveList(Rest, DefinedList)] + end. + +%% convert pseudo_spills to move instructions. +secondPassDoPseudoSpill(SpillList) -> + lists:foldl(fun convertPseudoSpillToMov/2, [], SpillList). + +%% if there are variables to be restored, then call addRestoreBlockToEdge to +%% place them in a new block on the edge of the blocks. +secondPassDoPseudoCall(I, RestoreList, CFG) -> + ContLabel = I#pseudo_call.contlab, + SizeOfSet = setSize(RestoreList), + if SizeOfSet =/= 0 -> + addRestoreBlockToEdge(I, ContLabel, CFG, RestoreList); + true -> + {CFG, I} + end. + +%% prepares the moves for the spills. +convertPseudoSpillToMov({Temp, NewTemp}, OtherMoves) -> + OtherMoves ++ [mkMove(Temp, NewTemp)]. + +%% prepares the moves for the restores. +%% Called by addRestoreBlockToEdge while introducing the restores. +convertPseudoRestoreToMov({Temp, NewTemp}, OtherMoves) -> + OtherMoves ++ [mkMove(NewTemp, Temp)]. + +%% makes the move record, special care is taken for doubles. +mkMove(NewTemp,Temp) -> + if Temp#x86_temp.type =:= 'double' -> + hipe_x86:mk_fmove(NewTemp, Temp); + true -> + hipe_x86:mk_move(NewTemp, Temp) + end. + +%% adds a new block (on the edge) that includes introduced restore moves. +addRestoreBlockToEdge(PseudoCall, ContLabel, CFG, TempArgsList) -> + NextLabel = hipe_gensym:get_next_label(x86), + NewCode = lists:foldl(fun convertPseudoRestoreToMov/2, [], TempArgsList) ++ [hipe_x86:mk_jmp_label(ContLabel)], + NewBlock = hipe_bb:mk_bb(NewCode), + NewPseudoCall = redirect_pseudo_call(PseudoCall, ContLabel, NextLabel), + NewCFG = hipe_x86_cfg:bb_add(CFG, NextLabel, NewBlock), + {NewCFG, NewPseudoCall}. + +%% used instead of hipe_x86_cfg:redirect_jmp since it does not handle pseudo_call calls. +redirect_pseudo_call(I = #pseudo_call{contlab=ContLabel}, Old, New) -> + case Old =:= ContLabel of + true -> I#pseudo_call{contlab=New}; + false -> I + end. + +temp_is_pseudo(Temp) -> + case hipe_x86:is_temp(Temp) of + true -> not(?HIPE_X86_REGISTERS:is_precoloured(hipe_x86:temp_reg(Temp))); + false -> false + end. + +%%--------------------------------------------------------------------- +%% Set operations where the module name is an easily changeable macro +%%--------------------------------------------------------------------- + +union(Set1,Set2) -> + ?SET_MODULE:union(Set1,Set2). + +setSize(Set) -> + ?SET_MODULE:size(Set). + +from_list(List) -> + ?SET_MODULE:from_list(List). + +to_list(Set) -> + ?SET_MODULE:to_list(Set). + +subtract(Set1, Set2) -> + ?SET_MODULE:subtract(Set1, Set2). + +intersection(Set1, Set2) -> + ?SET_MODULE:intersection(Set1, Set2). + +is_element(Element, Set) -> + ?SET_MODULE:is_element(Element, Set). diff --git a/lib/hipe/x86/hipe_x86_x87.erl b/lib/hipe/x86/hipe_x86_x87.erl new file mode 100644 index 0000000000..6ef14abdbb --- /dev/null +++ b/lib/hipe/x86/hipe_x86_x87.erl @@ -0,0 +1,635 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2005-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%% Floating point handling. + +-ifdef(HIPE_AMD64). +-define(HIPE_X86_X87, hipe_amd64_x87). +-define(HIPE_X86_DEFUSE, hipe_amd64_defuse). +-define(HIPE_X86_LIVENESS, hipe_amd64_liveness). +-define(HIPE_X86_REGISTERS, hipe_amd64_registers). +-else. +-define(HIPE_X86_X87, hipe_x86_x87). +-define(HIPE_X86_DEFUSE, hipe_x86_defuse). +-define(HIPE_X86_LIVENESS, hipe_x86_liveness). +-define(HIPE_X86_REGISTERS, hipe_x86_registers). +-endif. + +-module(?HIPE_X86_X87). + +-export([map/1]). + +-include("../x86/hipe_x86.hrl"). +-include("../main/hipe.hrl"). + +%%---------------------------------------------------------------------- + +map(Defun) -> + CFG0 = hipe_x86_cfg:init(Defun), + %% hipe_x86_cfg:pp(CFG0), + Liveness = ?HIPE_X86_LIVENESS:analyse(CFG0), + StartLabel = hipe_x86_cfg:start_label(CFG0), + {CFG1,_} = do_blocks([], [StartLabel], CFG0, Liveness, [], gb_trees:empty()), + hipe_x86_cfg:linearise(CFG1). + +do_blocks(Pred, [Lbl|Lbls], CFG, Liveness, Map, BlockMap) -> + case gb_trees:lookup(Lbl, BlockMap) of + none -> + %% This block has not been visited. + Block = hipe_x86_cfg:bb(CFG, Lbl), + Succ = hipe_x86_cfg:succ(CFG, Lbl), + NewBlockMap = gb_trees:insert(Lbl, Map, BlockMap), + LiveOut = [X || X <- ?HIPE_X86_LIVENESS:liveout(Liveness, Lbl), + is_fp(X)], + Code = hipe_bb:code(Block), + ReverseCode = lists:reverse(Code), + {NewCode0, NewMap, NewBlockMap1, Dirty} = + do_block(ReverseCode, LiveOut, Map, NewBlockMap), + NewCFG1 = + case Dirty of + true -> + NewBlock = hipe_bb:code_update(Block, NewCode0), + hipe_x86_cfg:bb_add(CFG, Lbl, NewBlock); + _ -> + CFG + end, + {NewCFG3, NewBlockMap2} = + do_blocks(Lbl, Succ, NewCFG1, Liveness, NewMap, NewBlockMap1), + do_blocks(Pred, Lbls, NewCFG3, Liveness, Map, NewBlockMap2); + {value, fail} -> + %% Don't have to follow this trace any longer. + do_blocks(Pred,Lbls, CFG, Liveness, Map, BlockMap); + {value, ExistingMap} -> + %% This block belongs to a trace already handled. + %% The Map coming in must be identical to the one used + %% when the block was processed. + if ExistingMap =:= Map -> + do_blocks(Pred, Lbls, CFG, Liveness, Map, BlockMap); + true -> + NewCFG = do_shuffle(Pred, Lbl, CFG, Map, ExistingMap), + do_blocks(Pred, Lbls, NewCFG, Liveness, Map, BlockMap) + end + end; +do_blocks(_Pred, [], CFG, _Liveness, _Map, BlockMap) -> + {CFG, BlockMap}. + +do_block(Ins, LiveOut, Map, BlockMap) -> + do_block(Ins, LiveOut, Map, BlockMap, false). + +do_block([I|Is], LiveOut, Map, BlockMap, Dirty) -> + case handle_insn(I) of + false -> + {NewCode, NewMap, NewBlockMap, NewDirty} = + do_block(Is, LiveOut, Map, BlockMap, Dirty), + {NewCode++[I], NewMap, NewBlockMap, NewDirty}; + true -> + Def = ordsets:from_list(?HIPE_X86_DEFUSE:insn_def(I)), + Use = ordsets:from_list(?HIPE_X86_DEFUSE:insn_use(I)), + NewLiveOut = + ordsets:filter(fun(X) -> is_fp(X) end, + ordsets:union(ordsets:subtract(LiveOut, Def), Use)), + {NewCode, NewMap, NewBlockMap, NewDirty} = + do_block(Is, NewLiveOut, Map, BlockMap, Dirty), + {NewI, NewMap1, NewBlockMap1} = + do_insn(I, LiveOut, NewMap, NewBlockMap), + NewDirty1 = + if NewDirty =:= true -> true; + NewI =:= [I] -> false; + true -> true + end, + {NewCode++NewI, NewMap1, NewBlockMap1, NewDirty1} + end; +do_block([], LiveOut, Map, BlockMap, Dirty) -> + case [X || X <- Map, not lists:member(X, LiveOut)] of + [] -> + {[], Map, BlockMap, Dirty}; + Pop -> + {PopIns, NewMap} = pop_dead(Pop, Map), + {PopIns, NewMap, BlockMap, true} + end. + +do_shuffle(Pred, Lbl, CFG, OldMap, NewMap) -> + %% First make sure both maps have the same members. + Push = NewMap -- OldMap, + Pop = OldMap -- NewMap, + {PopInsn, OldMap0} = pop_dead(Pop, OldMap), + {PushInsn, OldMap1} = + case Push of + []-> {[], OldMap0}; + _-> push_list(lists:reverse(Push), OldMap0) + end, + Code = + if OldMap1 =:= NewMap -> + %% It was enough to push and pop. + PopInsn ++ PushInsn ++ [hipe_x86:mk_jmp_label(Lbl)]; + true -> + %% Shuffle the positions so the maps match + Cycles = find_swap_cycles(OldMap1, NewMap), + SwitchInsns = do_switching(Cycles), + PopInsn ++ PushInsn ++ SwitchInsns ++ [hipe_x86:mk_jmp_label(Lbl)] + end, + %% Update the CFG. + NewLabel = hipe_gensym:get_next_label(x86), + NewCFG1 = hipe_x86_cfg:bb_add(CFG, NewLabel, hipe_bb:mk_bb(Code)), + OldPred = hipe_x86_cfg:bb(NewCFG1, Pred), + PredCode = hipe_bb:code(OldPred), + NewLast = redirect(lists:last(PredCode), Lbl,NewLabel), + NewPredCode = butlast(PredCode) ++ [NewLast], + NewPredBB = hipe_bb:code_update(OldPred, NewPredCode), + hipe_x86_cfg:bb_add(NewCFG1, Pred, NewPredBB). + +find_swap_cycles(OldMap, NewMap) -> + Moves = [get_pos(X, NewMap, 1) || X <- OldMap], + find_swap_cycles(OldMap, Moves, lists:seq(1, length(OldMap)), []). + +find_swap_cycles(OldMap, Moves, NotHandled, Cycles) -> + if NotHandled =:= [] -> Cycles; + true -> + Cycle = find_cycle(Moves, [hd(NotHandled)]), + NewNotHandled = NotHandled -- Cycle, + case lists:member(1, Cycle) of + true -> + %% The cycle that contains the first element on the stack + %% must be processed last. + NewCycle = format_cycle(Cycle), + find_swap_cycles(OldMap, Moves, NewNotHandled, Cycles ++ [NewCycle]); + _ -> + NewCycle = format_cycle(Cycle), + find_swap_cycles(OldMap, Moves, NewNotHandled, [NewCycle|Cycles]) + end + end. + +find_cycle(Moves, Cycle) -> + To = lists:nth(lists:last(Cycle), Moves), + if To =:= hd(Cycle) -> Cycle; + true -> find_cycle(Moves, Cycle ++ [To]) + end. + +format_cycle(C) -> + %% The position numbers start with 1 - should start with 0. + %% If position 0 is in the cycle it will be permuted until + %% the 0 is first and then remove it. + %% Otherwise the first element is also added last. + NewCycle = [X - 1 || X <- C], + case lists:member(0, NewCycle) of + true -> format_cycle(NewCycle, []); + _ -> NewCycle ++ [hd(NewCycle)] + end. + +format_cycle([H|T], NewCycle) -> + case H of + 0 -> T ++ NewCycle; + _ -> format_cycle(T, NewCycle ++ [H]) + end. + +do_switching(Cycles) -> + do_switching(Cycles, []). + +do_switching([C|Cycles], Insns) -> + NewInsns = Insns ++ [hipe_x86:mk_fp_unop(fxch, mk_st(X)) || X <- C], + do_switching(Cycles, NewInsns); +do_switching([], Insns) -> + Insns. + +redirect(Insn, OldLbl, NewLbl) -> + case Insn of + #pseudo_call{contlab = ContLab, sdesc = SDesc} -> + #x86_sdesc{exnlab = ExnLab} = SDesc, + if ContLab =:= OldLbl -> + Insn#pseudo_call{contlab = NewLbl}; + ExnLab =:= OldLbl -> + Insn#pseudo_call{sdesc = SDesc#x86_sdesc{exnlab = NewLbl}} + end; + _ -> + hipe_x86_cfg:redirect_jmp(Insn, OldLbl, NewLbl) + end. + +do_insn(I, LiveOut, Map, BlockMap) -> + case I of + #pseudo_call{'fun' = Fun, contlab = ContLab} -> + case Fun of + %% We don't want to spill anything if an exception has been thrown. + {_, 'handle_fp_exception'} -> + NewBlockMap = + case gb_trees:lookup(ContLab, BlockMap) of + {value, fail} -> + BlockMap; + {value, _} -> + gb_trees:update(ContLab, fail, BlockMap); + none -> + gb_trees:insert(ContLab, fail, BlockMap) + end, + {[I], [], NewBlockMap}; + _ -> + {pop_all(Map)++[I],[],BlockMap} + end; + #fp_unop{op = 'fwait'} -> + Store = pseudo_pop(Map), + {Store ++ [I], Map, BlockMap}; + #fp_unop{} -> + {NewI, NewMap} = do_fp_unop(I, LiveOut, Map), + {NewI, NewMap, BlockMap}; + #fp_binop{} -> + {NewI, NewMap} = do_fp_binop(I, LiveOut, Map), + {NewI, NewMap, BlockMap}; + #fmove{src = Src, dst = Dst} -> + if Src =:= Dst -> + %% Don't need to keep this instruction! + %% However, we may need to pop from the stack. + case is_liveOut(Src, LiveOut) of + true-> + {[], Map, BlockMap}; + false -> + {SwitchInsn, NewMap0} = switch_first(Dst, Map), + NewMap = pop(NewMap0), + {SwitchInsn++pop_insn(), NewMap, BlockMap} + end; + true -> + {NewI, NewMap} = do_fmove(Src, Dst, LiveOut, Map), + {NewI, NewMap, BlockMap} + end; + _ -> + {[I], Map, BlockMap} + end. + +do_fmove(Src, Dst = #x86_mem{}, LiveOut, Map) -> + %% Storing a float from the stack into memory. + {SwitchInsn, NewMap0} = switch_first(Src, Map), + case is_liveOut(Src, LiveOut) of + true -> + {SwitchInsn ++ [hipe_x86:mk_fp_unop(fst, Dst)], NewMap0}; + _ -> + NewMap1 = pop(NewMap0), + {SwitchInsn ++ [hipe_x86:mk_fp_unop(fstp, Dst)], NewMap1} + end; +do_fmove(Src = #x86_mem{}, Dst, _LiveOut, Map) -> + %% Pushing a float into the stack. + case in_map(Dst, Map) of + true -> ?EXIT({loadingExistingFpVariable,{Src,Dst}}); + _ -> ok + end, + {PushOp, [_|NewMap0]} = push(Src, Map), + %% We want Dst in the map rather than Src. + NewMap = [Dst|NewMap0], + {PushOp, NewMap}; +do_fmove(Src, Dst, LiveOut, Map) -> + %% Copying a float that either is spilled or is on the fp stack, + %% or converting a fixnum in a temp to a float on the fp stack. + case in_map(Dst, Map) of + true -> ?EXIT({copyingToExistingFpVariable,{Src,Dst}}); + _ -> ok + end, + IsConv = + case Src of + #x86_temp{type = Type} -> Type =/= 'double'; + _ -> false + end, + case IsConv of + true -> + do_conv(Src, Dst, Map); + _ -> + %% Copying. + case {is_liveOut(Src, LiveOut), in_map(Src, Map)} of + {false, true} -> + %% Just remap Dst to Src + {Head, [_|T]} = lists:splitwith(fun(X) -> X =/= Src end, Map), + {[], Head ++ [Dst|T]}; + _ -> + {PushOp, [_|NewMap0]} = push(Src, Map), + %% We want Dst in the map rather than Src. + NewMap = [Dst|NewMap0], + {PushOp, NewMap} + end + end. + +do_conv(Src = #x86_temp{reg = Reg}, Dst, Map) -> + %% Converting. Src must not be a register, so we + %% might have to put it into memory in between. + {Move, NewSrc} = + case ?HIPE_X86_REGISTERS:is_precoloured(Reg) of + true -> + Temp = hipe_x86:mk_new_temp('untagged'), + {[hipe_x86:mk_move(Src,Temp)], Temp}; + _ -> + {[], Src} + end, + {PushOp, [_|NewMap0]} = push(NewSrc, Map), + %% We want Dst in the map rather than NewSrc. + NewMap = [Dst|NewMap0], + case length(PushOp) of + 1 -> %% No popping of memory object on fpstack + {Move ++ [hipe_x86:mk_fp_unop(fild, NewSrc)], NewMap}; + _ -> %% H contains pop instructions. Must be kept! + Head = butlast(PushOp), + {Move ++ Head ++ [hipe_x86:mk_fp_unop(fild, NewSrc)], NewMap} + end. + +do_fp_unop(I = #fp_unop{arg = Arg, op = fchs}, Liveout, Map) -> + %% This is fchs, the only operation without a + %% popping version. Needs special handling. + case is_liveOut(Arg, Liveout) of + true -> + {SwitchIns, NewMap} = switch_first(Arg, Map), + {SwitchIns ++ [I#fp_unop{arg = []}], NewMap}; + false -> + %% Don't need to keep this instruction! + %% However, we may need to pop Src from the stack. + case in_map(Arg, Map) of + true -> + {SwitchInsn, NewMap0} = switch_first(Arg, Map), + NewMap = pop(NewMap0), + {SwitchInsn ++ pop_insn(), NewMap}; + _ -> + {[],Map} + end + end. + +do_fp_binop(#fp_binop{src = Src, dst = Dst, op = Op}, LiveOut, Map) -> + case {is_liveOut(Src, LiveOut), is_liveOut(Dst, LiveOut)} of + {true, true} -> + keep_both(Op, Src, Dst, Map); + {true, false} -> + keep_src(Op, Src, Dst, Map); + {false, true} -> + keep_dst(Op, Src, Dst, Map); + {false, false} -> + %% Both Dst and Src are popped. + keep_none(Op, Src, Dst, Map) + end. + +keep_both(Op, Src, Dst, Map) -> + %% Keep both Dst and Src if it is there. + {SwitchInsn, NewMap} = switch_first(Dst, Map), + NewSrc = get_new_opnd(Src, NewMap), + Insn = format_fp_binop(Op, NewSrc, mk_st(0)), + {SwitchInsn++Insn, NewMap}. + +keep_src(Op, Src, Dst, Map) -> + %% Pop Dst but keep Src in stack if it is there. + {SwitchInsn, NewMap0} = switch_first(Dst, Map), + NewSrc = get_new_opnd(Src, NewMap0), + NewMap = pop(NewMap0), + Insn = format_fp_binop(Op, NewSrc, mk_st(0)), + {SwitchInsn ++ Insn ++ pop_insn(), NewMap}. + +keep_dst(Op, Src, Dst, Map) -> + %% Keep Dst but pop Src. + %% Dst must be in stack. + DstInMap = in_map(Dst, Map), + SrcInMap = in_map(Src, Map), + case SrcInMap of + true -> + case DstInMap of + true -> + %% Src must be popped. If Dst is on top of the stack we can + %% alter the operation rather than shuffle the stack. + {SwitchInsn, Insn, NewMap} = + if hd(Map) =:= Dst -> + NewOp = mk_op_pop(reverse_op(Op)), + NewDst = get_new_opnd(Src, Map), + TmpMap = lists:map(fun(X) -> + if X =:= Src -> Dst; true -> X end + end, Map), + {[], format_fp_binop(NewOp, mk_st(0), NewDst), pop(TmpMap)}; + true -> + {SwitchInsn1, NewMap0} = switch_first(Src, Map), + NewDst = get_new_opnd(Dst,NewMap0), + NewOp = mk_op_pop(Op), + {SwitchInsn1,format_fp_binop(NewOp, mk_st(0), NewDst), pop(NewMap0)} + end, + {SwitchInsn ++ Insn, NewMap}; + _ -> + %% Src is on the stack, but Dst isn't. Use memory command to avoid + %% unnecessary loading instructions. + {SwitchInsn, NewMap0} = switch_first(Src, Map), + NewOp = reverse_op(Op), + NewMap = [Dst] ++ tl(NewMap0), + Insn = format_fp_binop(NewOp, Dst, mk_st(0)), + {SwitchInsn ++ Insn, NewMap} + end; + _ -> + %% Src isn't in the map so it doesn't have to be popped. + {SwitchInsn, NewMap} = switch_first(Dst, Map), + {SwitchInsn ++ [#fp_unop{arg = Src, op = Op}], NewMap} + end. + +keep_none(Op, Src, Dst, Map) -> + %% Dst must be on stack. + {PushInsn, NewMap0} = + case in_map(Dst, Map) of + true -> {[], Map}; + _ -> push(Dst, Map) + end, + case in_map(Src, NewMap0) of + true -> + %% Src must be popped. + {SwitchInsn1, NewMap1} = switch_first(Src, NewMap0), + NewOp = mk_op_pop(Op), + NewDst = get_new_opnd(Dst,NewMap1), + NewMap2 = pop(NewMap1), + %% Then Dst has to be popped. + {PopInsn, NewMap} = pop_member(Dst, NewMap2), + Insn = format_fp_binop(NewOp, mk_st(0), NewDst), + {PushInsn ++ SwitchInsn1 ++ Insn ++ PopInsn, NewMap}; + _ -> + %% Src isn't in the map so it doesn't have to be popped. + {SwitchInsn, NewMap1} = switch_first(Dst, NewMap0), + NewMap = pop(NewMap1), + {SwitchInsn ++ [#fp_unop{arg = Src, op = Op}] ++ pop_insn(), NewMap} + end. + +format_fp_binop(Op, Src = #x86_temp{}, Dst = #x86_fpreg{reg = Reg}) -> + %% Handle that st(0) is sometimes implicit. + if Reg =:= 0 -> [hipe_x86:mk_fp_unop(Op, Src)]; + true -> [hipe_x86:mk_fp_binop(Op, Src, Dst)] + end; +format_fp_binop(Op, Src, Dst) -> + [hipe_x86:mk_fp_binop(Op, Src, Dst)]. + +in_map(X, Map) -> + lists:member(X, Map). + +push_list(L, Map) -> + push_list(L, Map, []). +push_list([H|T], Map, Acc) -> + {Insn, NewMap} = push(H,Map), + push_list(T, NewMap, Acc++Insn); +push_list([], Map, Acc) -> + {Acc, Map}. + +push(X, Map0) -> + {PopInsn, Map} = + if length(Map0) > 7 -> pop_a_temp(Map0); + true -> {[], Map0} + end, + NewX = get_new_opnd(X,Map), + NewMap = [X | Map], + PushOp = [hipe_x86:mk_fp_unop(fld, NewX)], + {PopInsn ++ PushOp, NewMap}. + +pop([_|Map]) -> + Map. + +pop_insn() -> + [hipe_x86:mk_fp_unop('fstp',mk_st(0))]. + +pop_dead(Dead, Map) -> + Dead0 = [X || X <- Map, lists:member(X,Dead)], + pop_dead(Dead0, Map, []). + +pop_dead([D|Dead], Map, Code) -> + {I, NewMap0} = switch_first(D, Map), + NewMap = pop(NewMap0), + Store = case D of + #x86_temp{} -> [hipe_x86:mk_fp_unop('fstp', D)]; + _ -> pop_insn() + end, + pop_dead(Dead, NewMap, Code++I++Store); +pop_dead([], Map, Code) -> + {Code,Map}. + +pop_all(Map) -> + {Code, _} = pop_dead(Map, Map), + Code. + +pop_member(Member, Map) -> + {Head,[_|T]} = lists:splitwith(fun(X)-> X =/= Member end, Map), + {[hipe_x86:mk_fp_unop('fstp', mk_st(get_pos(Member, Map, 0)))], + Head++T}. + +pop_a_temp(Map) -> + Temp = find_a_temp(Map), + {SwitchInsn, NewMap0} = switch_first(Temp, Map), + NewMap = pop(NewMap0), + {SwitchInsn ++ [hipe_x86:mk_fp_unop('fstp', Temp)], NewMap}. + +find_a_temp([H = #x86_temp{}|_]) -> + H; +find_a_temp([_|T]) -> + find_a_temp(T); +find_a_temp([]) -> + ?EXIT({noTempOnFPStack,{}}). + +switch_first(X, Map = [H|_]) -> + Pos = get_pos(X, Map, 0), + case Pos of + 0 -> + {[], Map}; + notFound -> + push(X, Map); + _ -> + {[_|Head], [_|Tail]} = lists:splitwith(fun(Y)-> Y =/= X end, Map), + NewMap = [X|Head] ++ [H|Tail], + Ins = hipe_x86:mk_fp_unop(fxch, mk_st(Pos)), + {[Ins], NewMap} + end; +switch_first(X, Map) -> + push(X, Map). + +get_pos(X, [H|T], Pos) -> + if X =:= H -> Pos; + true -> get_pos(X, T, Pos+1) + end; +get_pos(_, [], _) -> + notFound. + +get_new_opnd(X, Map) -> + I = get_pos(X, Map, 0), + case I of + notFound -> + %% The operand is probably a spilled float. + X; + _ -> + mk_st(I) + end. + +is_fp(#x86_fpreg{}) -> + true; +is_fp(#x86_mem{type = Type}) -> + Type =:= 'double'; +is_fp(#x86_temp{type = Type}) -> + Type =:= 'double'. + +handle_insn(I) -> + case I of + #fmove{} -> true; + #fp_unop{} -> true; + #fp_binop{} -> true; + #pseudo_call{} ->true; + %% #ret{} -> true; + _ -> false + end. + +is_liveOut(X, LiveOut) -> + ordsets:is_element(X, LiveOut). + +mk_st(X) -> + hipe_x86:mk_fpreg(X, false). + +reverse_op(Op) -> + case Op of + 'fsub' -> 'fsubr'; + 'fdiv' -> 'fdivr'; + 'fsubr'-> 'fsub'; + 'fdivr' -> 'fdiv'; + _ -> Op + end. + +mk_op_pop(Op) -> + case Op of + 'fadd'-> 'faddp'; + 'fdiv' -> 'fdivp'; + 'fdivr' -> 'fdivrp'; + 'fmul' -> 'fmulp'; + 'fsub' -> 'fsubp'; + 'fsubr' -> 'fsubrp'; + _ -> ?EXIT({operandHasNoPopVariant,{Op}}) + end. + +butlast([X|Xs]) -> butlast(Xs,X). + +butlast([],_) -> []; +butlast([X|Xs],Y) -> [Y|butlast(Xs,X)]. + +%%pp_insn(Op, Src, Dst) -> +%% pp([hipe_x86:mk_fp_binop(Op, Src, Dst)]). + +%%pp([I|Ins]) -> +%% hipe_x86_pp:pp_insn(I), +%% pp(Ins); +%%pp([]) -> +%% []. + +pseudo_pop(Map) when length(Map) > 0 -> + Dst = hipe_x86:mk_new_temp('double'), + pseudo_pop(Dst, length(Map), []); +pseudo_pop(_) -> + []. + +pseudo_pop(Dst, St, Acc) when St > 1 -> + %% Store all members of the stack to a single temporary to force + %% any floating point overflow exceptions to occur even though we + %% don't have overflow for the extended double precision in the x87. + pseudo_pop(Dst, St-1, + [hipe_x86:mk_fp_unop('fxch', mk_st(St-1)), + hipe_x86:mk_fp_unop('fst', Dst), + hipe_x86:mk_fp_unop('fxch', mk_st(St-1)) + |Acc]); +pseudo_pop(Dst, _St, Acc) -> + [hipe_x86:mk_fp_unop('fst', Dst)|Acc]. -- cgit v1.2.3