diff options
author | Björn Gustavsson <[email protected]> | 2017-10-09 11:31:48 +0200 |
---|---|---|
committer | Björn Gustavsson <[email protected]> | 2017-10-09 11:31:48 +0200 |
commit | e38c688d3961bedcd85f842fd052836cb345a902 (patch) | |
tree | 05b1bbdbb0e17ab368e9ec016e24e46c02e4d6d4 | |
parent | 52449634868e38087d0c93a0145fcfd9d2de394b (diff) | |
parent | c2e746117b24235b8f2c508ef8f90e35ad422bf2 (diff) | |
download | otp-e38c688d3961bedcd85f842fd052836cb345a902.tar.gz otp-e38c688d3961bedcd85f842fd052836cb345a902.tar.bz2 otp-e38c688d3961bedcd85f842fd052836cb345a902.zip |
Merge branch 'bjorn/erts/pack-with-opcode/OTP-14325'
OTP-14327
OTP-14340
* bjorn/erts/pack-with-opcode/OTP-14325:
Pack operands for combined instructions into the instruction word
beam_makeops: Use named arguments for the code generation functions
Optimize packing for "optional use" operands
beam_makeops: Print the instruction name for fatal packing errors
Introduce a syntax for marking operands as "optional use"
beam_makeops: Refactor parsing of specific instructions
Optimize instruction prefetch
Pack operands into the instruction word
Use 32-bits pointers to C code
Move LD flags for hipe from Makefile.in to configure.in
beam_disasm: Correct printing of y registers
ops.tab: Slightly optimize badmatch on a Y register
macros.tab: Fix assertion in SET_I_REL()
-rw-r--r-- | erts/configure.in | 47 | ||||
-rw-r--r-- | erts/emulator/Makefile.in | 12 | ||||
-rw-r--r-- | erts/emulator/beam/beam_bp.c | 19 | ||||
-rw-r--r-- | erts/emulator/beam/beam_debug.c | 44 | ||||
-rw-r--r-- | erts/emulator/beam/beam_emu.c | 39 | ||||
-rw-r--r-- | erts/emulator/beam/beam_load.c | 43 | ||||
-rw-r--r-- | erts/emulator/beam/erl_vm.h | 11 | ||||
-rw-r--r-- | erts/emulator/beam/macros.tab | 2 | ||||
-rw-r--r-- | erts/emulator/beam/ops.tab | 265 | ||||
-rwxr-xr-x | erts/emulator/utils/beam_makeops | 486 |
10 files changed, 648 insertions, 320 deletions
diff --git a/erts/configure.in b/erts/configure.in index 9dec562f33..508e99a415 100644 --- a/erts/configure.in +++ b/erts/configure.in @@ -2766,6 +2766,20 @@ if test X${enable_hipe} = Xyes && test X$ARCH = Xamd64; then LDFLAGS=$saved_LDFLAGS])])]) fi +if test X${enable_hipe} = Xyes; then + case $OPSYS in + linux) + ppcBEAMLDFLAGS="-Wl,-m,elf32ppc" + ppc64BEAMLDFLAGS="-Wl,-m,elf64ppc,-T,hipe/elf64ppc.x" + ;; + darwin) + amd64BEAMLDFLAGS="-pagezero_size 0x10000000" + ;; + esac + archVarName="${ARCH}BEAMLDFLAGS" + eval HIPEBEAMLDFLAGS=\$$archVarName +fi +AC_SUBST(HIPEBEAMLDFLAGS) if test X${enable_fp_exceptions} = Xauto ; then case $host_os in @@ -3760,6 +3774,39 @@ dnl LM_FIND_EMU_CC dnl +dnl Test whether code pointers are always short (32 bits). +dnl + +AC_MSG_CHECKING([whether the code model is small]) +saved_LDFLAGS="$LDFLAGS" +LDFLAGS="$LDFLAGS $HIPEBEAMLDFLAGS" +AC_TRY_RUN([ + #include <stdlib.h> + int main() { + if ((unsigned long long)&main < (1ull << 32)) { + exit(0); + } + exit(1); + } +], +erl_code_model_small=yes, +erl_code_model_small=no, +erl_code_model_small=no) +AC_MSG_RESULT([$erl_code_model_small]) +LDFLAGS="$saved_LDFLAGS" +case $erl_code_model_small in + yes) + AC_DEFINE(CODE_MODEL_SMALL,[1], + [Define if the code model is small (code fits below 2Gb)]) + CODE_MODEL=small + ;; + no) + CODE_MODEL=unknown + ;; +esac +AC_SUBST(CODE_MODEL) + +dnl dnl DTrace & LTTNG dnl case $DYNAMIC_TRACE_FRAMEWORK in diff --git a/erts/emulator/Makefile.in b/erts/emulator/Makefile.in index eb39c5e4ec..297c64de49 100644 --- a/erts/emulator/Makefile.in +++ b/erts/emulator/Makefile.in @@ -240,16 +240,7 @@ ARCH=@ARCH@ ultrasparcCFLAGS=-Wa,-xarch=v8plusa ARCHCFLAGS=$($(ARCH)CFLAGS) -ifdef HIPE_ENABLED -ifeq ($(OPSYS),linux) -ppcBEAMLDFLAGS=-Wl,-m,elf32ppc -ppc64BEAMLDFLAGS=-Wl,-m,elf64ppc,-T,hipe/elf64ppc.x -endif -ifeq ($(OPSYS),darwin) -amd64BEAMLDFLAGS=-pagezero_size 0x10000000 -endif -HIPEBEAMLDFLAGS=$($(ARCH)BEAMLDFLAGS) -endif +HIPEBEAMLDFLAGS=@HIPEBEAMLDFLAGS@ ERTS_BUILD_FALLBACK_POLL=@ERTS_BUILD_FALLBACK_POLL@ @@ -577,6 +568,7 @@ $(TTF_DIR)/beam_tr_funcs.h \ $(TTF_DIR)/OPCODES-GENERATED: $(OPCODE_TABLES) utils/beam_makeops $(gen_verbose)LANG=C $(PERL) utils/beam_makeops \ -wordsize @EXTERNAL_WORD_SIZE@ \ + -code-model @CODE_MODEL@ \ -outdir $(TTF_DIR) \ -DUSE_VM_PROBES=$(if $(USE_VM_PROBES),1,0) \ -DNO_FPE_SIGNALS=$(if $filter(unreliable,$(FPE)),1,0) \ diff --git a/erts/emulator/beam/beam_bp.c b/erts/emulator/beam/beam_bp.c index c81380c14d..871670e8c3 100644 --- a/erts/emulator/beam/beam_bp.c +++ b/erts/emulator/beam/beam_bp.c @@ -419,9 +419,11 @@ erts_install_breakpoints(BpFunctions* f) for (i = 0; i < n; i++) { ErtsCodeInfo* ci = f->matching[i].ci; - BeamInstr *pc = erts_codeinfo_to_code(ci); GenericBp* g = ci->u.gen_bp; - if (*pc != br && g) { + BeamInstr volatile *pc = erts_codeinfo_to_code(ci); + BeamInstr instr = *pc; + + if (!BeamIsOpCode(instr, op_i_generic_breakpoint) && g) { Module* modp = f->matching[i].mod; /* @@ -435,11 +437,16 @@ erts_install_breakpoints(BpFunctions* f) /* * The following write is not protected by any lock. We * assume that the hardware guarantees that a write of an - * aligned word-size (or half-word) writes is atomic - * (i.e. that other processes executing this code will not - * see a half pointer). + * aligned word-size writes is atomic (i.e. that other + * processes executing this code will not see a half + * pointer). + * + * The contents of *pc is marked 'volatile' to ensure that + * the compiler will do a single full-word write, and not + * try any fancy optimizations to write a half word. */ - *pc = br; + instr = BeamSetCodeAddr(instr, br); + *pc = instr; modp->curr.num_breakpoints++; } } diff --git a/erts/emulator/beam/beam_debug.c b/erts/emulator/beam/beam_debug.c index 0f332da63f..70078c8c59 100644 --- a/erts/emulator/beam/beam_debug.c +++ b/erts/emulator/beam/beam_debug.c @@ -201,7 +201,7 @@ void debug_dump_code(BeamInstr *I, int num) for (i = 0; i < NUM_SPECIFIC_OPS; i++) { if (BeamIsOpCode(instr, i) && opc[i].name[0] != '\0') { code_ptr += print_op(ERTS_PRINT_DSBUF, (void *) dsbufp, - i, opc[i].sz-1, code_ptr+1) + 1; + i, opc[i].sz-1, code_ptr) + 1; break; } } @@ -321,7 +321,7 @@ erts_debug_disassemble_1(BIF_ALIST_1) for (i = 0; i < NUM_SPECIFIC_OPS; i++) { if (BeamIsOpCode(instr, i) && opc[i].name[0] != '\0') { code_ptr += print_op(ERTS_PRINT_DSBUF, (void *) dsbufp, - i, opc[i].sz-1, code_ptr+1) + 1; + i, opc[i].sz-1, code_ptr) + 1; break; } } @@ -405,8 +405,11 @@ print_op(fmtfn_t to, void *to_arg, int op, int size, BeamInstr* addr) * Avoid copying because instructions containing bignum operands * are bigger than actually declared. */ - ap = (BeamInstr *) addr; + addr++; + ap = addr; } else { + BeamInstr instr_word = addr++[0]; + /* * Copy all arguments to a local buffer for the unpacking. */ @@ -431,23 +434,22 @@ print_op(fmtfn_t to, void *to_arg, int op, int size, BeamInstr* addr) case 'q': *ap++ = *--sp; break; - case 'i': /* Initialize packing accumulator. */ - *ap++ = packed; - break; - case 's': - *ap++ = packed & 0x3ff; - packed >>= 10; +#ifdef ARCH_64 + case '1': /* Tightest shift */ + *ap++ = (packed & BEAM_TIGHTEST_MASK) << 3; + packed >>= BEAM_TIGHTEST_SHIFT; break; - case '0': /* Tight shift */ +#endif + case '2': /* Tight shift */ *ap++ = packed & BEAM_TIGHT_MASK; packed >>= BEAM_TIGHT_SHIFT; break; - case '6': /* Shift 16 steps */ + case '3': /* Loose shift */ *ap++ = packed & BEAM_LOOSE_MASK; packed >>= BEAM_LOOSE_SHIFT; break; #ifdef ARCH_64 - case 'w': /* Shift 32 steps */ + case '4': /* Shift 32 steps */ *ap++ = packed & BEAM_WIDE_MASK; packed >>= BEAM_WIDE_SHIFT; break; @@ -458,8 +460,18 @@ print_op(fmtfn_t to, void *to_arg, int op, int size, BeamInstr* addr) case 'P': packed = *--sp; break; +#if defined(ARCH_64) && defined(CODE_MODEL_SMALL) + case '#': /* -1 */ + case '$': /* -2 */ + case '%': /* -3 */ + case '&': /* -4 */ + case '\'': /* -5 */ + case '(': /* -6 */ + packed = (packed << BEAM_WIDE_SHIFT) | BeamExtraData(instr_word); + break; +#endif default: - ASSERT(0); + erts_exit(ERTS_ERROR_EXIT, "beam_debug: invalid packing op: %c\n", *prog); } } ap = args; @@ -744,7 +756,7 @@ print_op(fmtfn_t to, void *to_arg, int op, int size, BeamInstr* addr) erts_print(to, to_arg, " x(%d)", loader_x_reg_index(ap[0])); break; case LOADER_Y_REG: - erts_print(to, to_arg, " x(%d)", loader_y_reg_index(ap[0])); + erts_print(to, to_arg, " y(%d)", loader_y_reg_index(ap[0]) - CP_SIZE); break; default: erts_print(to, to_arg, " %T", (Eterm) ap[0]); @@ -765,7 +777,7 @@ print_op(fmtfn_t to, void *to_arg, int op, int size, BeamInstr* addr) erts_print(to, to_arg, " x(%d)", loader_x_reg_index(ap[0])); break; case LOADER_Y_REG: - erts_print(to, to_arg, " x(%d)", loader_y_reg_index(ap[0])); + erts_print(to, to_arg, " y(%d)", loader_y_reg_index(ap[0]) - CP_SIZE); break; default: erts_print(to, to_arg, " %T", (Eterm) ap[0]); @@ -788,7 +800,7 @@ print_op(fmtfn_t to, void *to_arg, int op, int size, BeamInstr* addr) erts_print(to, to_arg, " x(%d)", loader_x_reg_index(ap[0])); break; case LOADER_Y_REG: - erts_print(to, to_arg, " y(%d)", loader_y_reg_index(ap[0])); + erts_print(to, to_arg, " y(%d)", loader_y_reg_index(ap[0]) - CP_SIZE); break; default: erts_print(to, to_arg, " %T", (Eterm) ap[0]); diff --git a/erts/emulator/beam/beam_emu.c b/erts/emulator/beam/beam_emu.c index 9a77c63390..aa94fbf536 100644 --- a/erts/emulator/beam/beam_emu.c +++ b/erts/emulator/beam/beam_emu.c @@ -50,14 +50,16 @@ #if defined(NO_JUMP_TABLE) # define OpCase(OpCode) case op_##OpCode # define CountCase(OpCode) case op_count_##OpCode -# define IsOpCode(InstrWord, OpCode) ((InstrWord) == (BeamInstr)op_##OpCode) -# define Goto(Rel) {Go = (Rel); goto emulator_loop;} +# define IsOpCode(InstrWord, OpCode) (BeamCodeAddr(InstrWord) == (BeamInstr)op_##OpCode) +# define Goto(Rel) {Go = BeamCodeAddr(Rel); goto emulator_loop;} +# define GotoPF(Rel) Goto(Rel) #else # define OpCase(OpCode) lb_##OpCode # define CountCase(OpCode) lb_count_##OpCode -# define IsOpCode(InstrWord, OpCode) ((InstrWord) == (BeamInstr)&&lb_##OpCode) -# define Goto(Rel) goto *((void *)Rel) -# define LabelAddr(Label) &&Label +# define IsOpCode(InstrWord, OpCode) (BeamCodeAddr(InstrWord) == (BeamInstr)&&lb_##OpCode) +# define Goto(Rel) goto *((void *)BeamCodeAddr(Rel)) +# define GotoPF(Rel) goto *((void *)Rel) +# define LabelAddr(Label) &&Label #endif #ifdef ERTS_ENABLE_LOCK_CHECK @@ -131,11 +133,11 @@ do { \ /* We don't check the range if an ordinary switch is used */ #ifdef NO_JUMP_TABLE -#define VALID_INSTR(IP) ((UWord)(IP) < (NUMBER_OF_OPCODES*2+10)) +# define VALID_INSTR(IP) (BeamCodeAddr(IP) < (NUMBER_OF_OPCODES*2+10)) #else -#define VALID_INSTR(IP) \ - ((SWord)LabelAddr(emulator_loop) <= (SWord)(IP) && \ - (SWord)(IP) < (SWord)LabelAddr(end_emulator_loop)) +# define VALID_INSTR(IP) \ + ((BeamInstr)LabelAddr(emulator_loop) <= BeamCodeAddr(IP) && \ + BeamCodeAddr(IP) < (BeamInstr)LabelAddr(end_emulator_loop)) #endif /* NO_JUMP_TABLE */ #define SET_CP(p, ip) \ @@ -234,15 +236,18 @@ void** beam_ops; #define fb(N) ((Sint)(Sint32)(N)) #define jb(N) ((Sint)(Sint32)(N)) #define tb(N) (N) -#define xb(N) (*(Eterm *) (((unsigned char *)reg) + (N))) -#define yb(N) (*(Eterm *) (((unsigned char *)E) + (N))) +#define xb(N) (*ADD_BYTE_OFFSET(reg, N)) +#define yb(N) (*ADD_BYTE_OFFSET(E, N)) #define Sb(N) (*REG_TARGET_PTR(N)) #define lb(N) (*(double *) (((unsigned char *)&(freg[0].fd)) + (N))) #define Qb(N) (N) #define Ib(N) (N) + #define x(N) reg[N] #define y(N) E[N] #define r(N) x(N) +#define Q(N) (N*sizeof(Eterm *)) +#define l(N) (freg[N].fd) /* * Check that we haven't used the reductions and jump to function pointed to by @@ -1006,6 +1011,18 @@ init_emulator_finish(void) int i; Export* ep; +#if defined(ARCH_64) && defined(CODE_MODEL_SMALL) + for (i = 0; i < NUMBER_OF_OPCODES; i++) { + BeamInstr instr = BeamOpCodeAddr(i); + if (instr >= (1ull << 32)) { + erts_exit(ERTS_ERROR_EXIT, + "This run-time was supposed be compiled with all code below 2Gb,\n" + "but the instruction '%s' is located at %016lx.\n", + opc[i].name, instr); + } + } +#endif + beam_apply[0] = BeamOpCodeAddr(op_i_apply); beam_apply[1] = BeamOpCodeAddr(op_normal_exit); beam_exit[0] = BeamOpCodeAddr(op_error_action_code); diff --git a/erts/emulator/beam/beam_load.c b/erts/emulator/beam/beam_load.c index 9835b1c096..00dd28b26c 100644 --- a/erts/emulator/beam/beam_load.c +++ b/erts/emulator/beam/beam_load.c @@ -2578,23 +2578,31 @@ load_code(LoaderState* stp) sp++; } break; - case 'i': /* Initialize packing accumulator. */ - packed = code[--ci]; +#ifdef ARCH_64 + case '1': /* Tightest shift (always 10 bits) */ + ci--; + ASSERT((code[ci] & ~0x1FF8ull) == 0); /* Fits in 10 bits */ + packed = (packed << BEAM_TIGHTEST_SHIFT); + packed |= code[ci] >> 3; + if (packed_label) { + packed_label->packed++; + } break; - case '0': /* Tight shift */ +#endif + case '2': /* Tight shift (10 or 16 bits) */ packed = (packed << BEAM_TIGHT_SHIFT) | code[--ci]; if (packed_label) { packed_label->packed++; } break; - case '6': /* Shift 16 steps */ + case '3': /* Loose shift (16 bits) */ packed = (packed << BEAM_LOOSE_SHIFT) | code[--ci]; if (packed_label) { packed_label->packed++; } break; #ifdef ARCH_64 - case 'w': /* Shift 32 steps */ + case '4': /* Wide shift (32 bits) */ { Uint w = code[--ci]; @@ -2646,8 +2654,31 @@ load_code(LoaderState* stp) sp++; packed = 0; break; +#if defined(ARCH_64) && defined(CODE_MODEL_SMALL) + case '#': /* -1 */ + case '$': /* -2 */ + case '%': /* -3 */ + case '&': /* -4 */ + case '\'': /* -5 */ + case '(': /* -6 */ + /* Pack accumulator contents into instruction word. */ + { + Sint pos = ci - (*prog - '#' + 1); + /* Are the high 32 bits of the instruction word zero? */ + ASSERT((code[pos] & ~((1ull << BEAM_WIDE_SHIFT)-1)) == 0); + code[pos] |= packed << BEAM_WIDE_SHIFT; + if (packed_label) { + ASSERT(packed_label->packed == 1); + packed_label->pos = pos; + packed_label->packed = 2; + packed_label = 0; + } + packed >>= BEAM_WIDE_SHIFT; + } + break; +#endif default: - ASSERT(0); + erts_exit(ERTS_ERROR_EXIT, "beam_load: invalid packing op: %c\n", *prog); } } ASSERT(sp == stack); /* Incorrect program? */ diff --git a/erts/emulator/beam/erl_vm.h b/erts/emulator/beam/erl_vm.h index 661538eadd..76980b5871 100644 --- a/erts/emulator/beam/erl_vm.h +++ b/erts/emulator/beam/erl_vm.h @@ -209,6 +209,15 @@ extern void** beam_ops; # define BeamOpCodeAddr(OpCode) ((BeamInstr)beam_ops[(OpCode)]) #endif -#define BeamIsOpCode(InstrWord, OpCode) ((InstrWord) == BeamOpCodeAddr(OpCode)) +#if defined(ARCH_64) && defined(CODE_MODEL_SMALL) +# define BeamCodeAddr(InstrWord) ((BeamInstr)(Uint32)(InstrWord)) +# define BeamSetCodeAddr(InstrWord, Addr) (((InstrWord) & ~((1ull << 32)-1)) | (Addr)) +# define BeamExtraData(InstrWord) ((InstrWord) >> 32) +#else +# define BeamCodeAddr(InstrWord) ((BeamInstr)(InstrWord)) +# define BeamSetCodeAddr(InstrWord, Addr) (Addr) +#endif + +#define BeamIsOpCode(InstrWord, OpCode) (BeamCodeAddr(InstrWord) == BeamOpCodeAddr(OpCode)) #endif /* __ERL_VM_H__ */ diff --git a/erts/emulator/beam/macros.tab b/erts/emulator/beam/macros.tab index 0d175a7ec6..e0b5f56b53 100644 --- a/erts/emulator/beam/macros.tab +++ b/erts/emulator/beam/macros.tab @@ -38,7 +38,7 @@ REFRESH_GEN_DEST() { // zero, except in a few bit syntax instructions.) SET_I_REL(Offset) { - ASSERT(VALID_INSTR(*(I + ($Offset)))); + ASSERT(VALID_INSTR(*(I + ($Offset) + $IP_ADJUSTMENT))); I += $Offset + $IP_ADJUSTMENT; } diff --git a/erts/emulator/beam/ops.tab b/erts/emulator/beam/ops.tab index 75ff40606b..4a915c7762 100644 --- a/erts/emulator/beam/ops.tab +++ b/erts/emulator/beam/ops.tab @@ -99,21 +99,21 @@ line Loc | func_info M F A => func_info M F A | line Loc line I -allocate t t -allocate_heap t I t +allocate t t? +allocate_heap t I t? %cold deallocate Q %hot init y -allocate_zero t t -allocate_heap_zero t I t +allocate_zero t t? +allocate_heap_zero t I t? trim N Remaining => i_trim N i_trim t -test_heap I t +test_heap I t? allocate_heap S u==0 R => allocate S R allocate_heap_zero S u==0 R => allocate_zero S R @@ -158,19 +158,19 @@ is_tuple Fail=f S | select_tuple_arity S=d Fail=f Size=u Rest=* => \ select_tuple_arity S=d Fail=f Size=u Rest=* => \ gen_select_tuple_arity(S, Fail, Size, Rest) -i_select_val_bins xy f I +i_select_val_bins xy f? I -i_select_val_lins xy f I +i_select_val_lins xy f? I -i_select_val2 xy f c c +i_select_val2 xy f? c c -i_select_tuple_arity xy f I +i_select_tuple_arity xy f? I -i_select_tuple_arity2 xy f A A +i_select_tuple_arity2 xy f? A A -i_jump_on_val_zero xy f I +i_jump_on_val_zero xy f? I -i_jump_on_val xy f I W +i_jump_on_val xy f? I W get_list xy xy xy @@ -213,9 +213,9 @@ i_get_tuple_element2y x P y y i_get_tuple_element3 x P x %cold -is_number f x -is_number f y +is_number f? xy %hot + is_number Fail=f i => is_number Fail=f na => jump Fail is_number Fail Literal=q => move Literal x | is_number Fail x @@ -446,37 +446,37 @@ is_ne_exact Lbl C=c R=xy => is_ne_exact Lbl R C is_ne_exact Lbl R=xy C=ian => i_is_ne_exact_immed Lbl R C is_ne_exact Lbl R=xy C=q => i_is_ne_exact_literal Lbl R C -i_is_eq_exact_immed f rxy c +i_is_eq_exact_immed f? rxy c -i_is_eq_exact_literal f xy c +i_is_eq_exact_literal f? xy c -i_is_ne_exact_immed f xy c +i_is_ne_exact_immed f? xy c -i_is_ne_exact_literal f xy c +i_is_ne_exact_literal f? xy c is_eq_exact Lbl Y=y X=x => is_eq_exact Lbl X Y -is_eq_exact f x xy -is_eq_exact f y y +is_eq_exact f? x xy +is_eq_exact f? y y -is_ne_exact f S S +is_ne_exact f? S S -is_lt f x x -is_lt f x c -is_lt f c x +is_lt f? x x +is_lt f? x c +is_lt f? c x %cold -is_lt f s s +is_lt f? s s %hot -is_ge f x x -is_ge f x c -is_ge f c x +is_ge f? x x +is_ge f? x c +is_ge f? c x %cold -is_ge f s s +is_ge f? s s %hot -is_eq f s s +is_eq f? s s -is_ne f s s +is_ne f? s s # # Putting things. @@ -583,7 +583,7 @@ is_tagged_tuple Fail Literal=q Arity Atom => \ move Literal x | is_tagged_tuple Fail x Arity Atom is_tagged_tuple Fail=f c Arity Atom => jump Fail -is_tagged_tuple f rxy A a +is_tagged_tuple f? rxy A a # Test tuple & arity (head) @@ -591,14 +591,14 @@ is_tuple Fail Literal=q => move Literal x | is_tuple Fail x is_tuple Fail=f c => jump Fail is_tuple Fail=f S=xy | test_arity Fail=f S=xy Arity => is_tuple_of_arity Fail S Arity -is_tuple_of_arity f rxy A +is_tuple_of_arity f? rxy A -is_tuple f rxy +is_tuple f? rxy test_arity Fail Literal=q Arity => move Literal x | test_arity Fail x Arity test_arity Fail=f c Arity => jump Fail -test_arity f xy A +test_arity f? xy A get_tuple_element Reg=x P1 D1=x | get_tuple_element Reg=x P2 D2=x | \ get_tuple_element Reg=x P3 D3=x | \ @@ -619,16 +619,16 @@ is_integer Fail Literal=q => move Literal x | is_integer Fail x is_integer Fail=f S=x | allocate Need Regs => is_integer_allocate Fail S Need Regs -is_integer_allocate f x t t +is_integer_allocate f? x t t -is_integer f xy +is_integer f? xy is_list Fail=f n => is_list Fail Literal=q => move Literal x | is_list Fail x is_list Fail=f c => jump Fail -is_list f x +is_list f? x %cold -is_list f y +is_list f? y %hot is_nonempty_list Fail=f S=x | allocate Need Rs => is_nonempty_list_allocate Fail S Need Rs @@ -638,21 +638,21 @@ is_nonempty_list F=f x==0 | test_heap I1 I2 => is_nonempty_list_test_heap F I1 I is_nonempty_list Fail=f S=x | get_list S D1=x D2=x => \ is_nonempty_list_get_list Fail S D1 D2 -is_nonempty_list_allocate f rx t t -is_nonempty_list_test_heap f I t -is_nonempty_list_get_list f rx x x -is_nonempty_list f xy +is_nonempty_list_allocate f? rx t t +is_nonempty_list_test_heap f? I t +is_nonempty_list_get_list f? rx x x +is_nonempty_list f? xy -is_atom f x +is_atom f? x %cold -is_atom f y +is_atom f? y %hot is_atom Fail=f a => is_atom Fail=f niq => jump Fail -is_float f x +is_float f? x %cold -is_float f y +is_float f? y %hot is_float Fail=f nai => jump Fail is_float Fail Literal=q => move Literal x | is_float Fail x @@ -660,13 +660,13 @@ is_float Fail Literal=q => move Literal x | is_float Fail x is_nil Fail=f n => is_nil Fail=f qia => jump Fail -is_nil f xy +is_nil f? xy is_binary Fail Literal=q => move Literal x | is_binary Fail x is_binary Fail=f c => jump Fail -is_binary f x +is_binary f? x %cold -is_binary f y +is_binary f? y %hot # XXX Deprecated. @@ -674,27 +674,27 @@ is_bitstr Fail Term => is_bitstring Fail Term is_bitstring Fail Literal=q => move Literal x | is_bitstring Fail x is_bitstring Fail=f c => jump Fail -is_bitstring f x +is_bitstring f? x %cold -is_bitstring f y +is_bitstring f? y %hot is_reference Fail=f cq => jump Fail -is_reference f x +is_reference f? x %cold -is_reference f y +is_reference f? y %hot is_pid Fail=f cq => jump Fail -is_pid f x +is_pid f? x %cold -is_pid f y +is_pid f? y %hot is_port Fail=f cq => jump Fail -is_port f x +is_port f? x %cold -is_port f y +is_port f? y %hot is_boolean Fail=f a==am_true => @@ -702,19 +702,19 @@ is_boolean Fail=f a==am_false => is_boolean Fail=f ac => jump Fail %cold -is_boolean f xy +is_boolean f? xy %hot is_function2 Fail=f acq Arity => jump Fail is_function2 Fail=f Fun a => jump Fail -is_function2 f S s +is_function2 f? S s # Allocating & initializing. allocate Need Regs | init Y => allocate_init Need Regs Y init Y1 | init Y2 => init2 Y1 Y2 -allocate_init t t y +allocate_init t t? y ################################################################# # External function and bif calls. @@ -1004,13 +1004,13 @@ node y # Note: 'I' is sufficient because this instruction will only be used # if the arity fits in 24 bits. -i_fast_element xy j I d +i_fast_element xy j? I d -i_element xy j s d +i_element xy j? s d -bif1 f b s d +bif1 f? b s d bif1_body b s d -i_bif2 f b s s d +i_bif2 f? b s s d i_bif2_body b s s d # @@ -1062,7 +1062,7 @@ make_fun2 OldIndex=u => gen_make_fun2(OldIndex) i_make_fun W t %hot -is_function f xy +is_function f? xy is_function Fail=f c => jump Fail func_info M F A => i_func_info u M F A @@ -1091,24 +1091,24 @@ i_bs_match_string x f W W bs_get_integer2 Fail=f Ms=x Live=u Sz=sq Unit=u Flags=u Dst=d => \ gen_get_integer2(Fail, Ms, Live, Sz, Unit, Flags, Dst) -i_bs_get_integer_small_imm x W f t x -i_bs_get_integer_imm x W t f t x -i_bs_get_integer f t t x s x -i_bs_get_integer_8 x f x -i_bs_get_integer_16 x f x +i_bs_get_integer_small_imm x W f? t x +i_bs_get_integer_imm x W t f? t x +i_bs_get_integer f? t t x s x +i_bs_get_integer_8 x f? x +i_bs_get_integer_16 x f? x %if ARCH_64 -i_bs_get_integer_32 x f x +i_bs_get_integer_32 x f? x %endif # Fetching binaries from binaries. bs_get_binary2 Fail=f Ms=x Live=u Sz=sq Unit=u Flags=u Dst=d => \ gen_get_binary2(Fail, Ms, Live, Sz, Unit, Flags, Dst) -i_bs_get_binary_imm2 f x t W t x -i_bs_get_binary2 f x t s t x -i_bs_get_binary_all2 f x t t x -i_bs_get_binary_all_reuse x f t +i_bs_get_binary_imm2 f? x t W t x +i_bs_get_binary2 f x t? s t x +i_bs_get_binary_all2 f? x t t x +i_bs_get_binary_all_reuse x f? t # Fetching float from binaries. bs_get_float2 Fail=f Ms=x Live=u Sz=s Unit=u Flags=u Dst=d => \ @@ -1116,29 +1116,32 @@ bs_get_float2 Fail=f Ms=x Live=u Sz=s Unit=u Flags=u Dst=d => \ bs_get_float2 Fail=f Ms=x Live=u Sz=q Unit=u Flags=u Dst=d => jump Fail -i_bs_get_float2 f x t s t x +i_bs_get_float2 f? x t s t x # Miscellanous bs_skip_bits2 Fail=f Ms=x Sz=sq Unit=u Flags=u => \ gen_skip_bits2(Fail, Ms, Sz, Unit, Flags) -i_bs_skip_bits_imm2 f x W -i_bs_skip_bits2 f x xy t -i_bs_skip_bits_all2 f x t +i_bs_skip_bits_imm2 f? x W +i_bs_skip_bits2 f? x xy t +i_bs_skip_bits_all2 f? x t bs_test_tail2 Fail=f Ms=x Bits=u==0 => bs_test_zero_tail2 Fail Ms bs_test_tail2 Fail=f Ms=x Bits=u => bs_test_tail_imm2 Fail Ms Bits -bs_test_zero_tail2 f x -bs_test_tail_imm2 f x W +bs_test_zero_tail2 f? x +bs_test_tail_imm2 f? x W bs_test_unit F Ms Unit=u==8 => bs_test_unit8 F Ms -bs_test_unit f x t -bs_test_unit8 f x +bs_test_unit f? x t +bs_test_unit8 f? x # An y register operand for bs_context_to_binary is rare, # but can happen because of inlining. +bs_context_to_binary Y=y | line L | badmatch Y => \ + move Y x | bs_context_to_binary x | line L | badmatch x + bs_context_to_binary Y=y => move Y x | bs_context_to_binary x bs_context_to_binary x @@ -1147,14 +1150,14 @@ bs_context_to_binary x # Utf8/utf16/utf32 support. (R12B-5) # bs_get_utf8 Fail=f Ms=x u u Dst=d => i_bs_get_utf8 Ms Fail Dst -i_bs_get_utf8 x f x +i_bs_get_utf8 x f? x bs_skip_utf8 Fail=f Ms=x u u => i_bs_get_utf8 Ms Fail x bs_get_utf16 Fail=f Ms=x u Flags=u Dst=d => i_bs_get_utf16 Ms Fail Flags Dst bs_skip_utf16 Fail=f Ms=x u Flags=u => i_bs_get_utf16 Ms Fail Flags x -i_bs_get_utf16 x f t x +i_bs_get_utf16 x f? t x bs_get_utf32 Fail=f Ms=x Live=u Flags=u Dst=d => \ bs_get_integer2 Fail Ms Live i=32 u=1 Flags Dst | \ @@ -1183,13 +1186,13 @@ bs_init2 Fail Sz Words=u==0 Regs Flags Dst => \ bs_init2 Fail Sz Words Regs Flags Dst => \ i_bs_init_fail_heap Sz Words Fail Regs Dst -i_bs_init_fail xy j t x +i_bs_init_fail xy j? t? x -i_bs_init_fail_heap s I j t x +i_bs_init_fail_heap s I j? t? x -i_bs_init W t x +i_bs_init W t? x -i_bs_init_heap W I t x +i_bs_init_heap W I t? x bs_init_bits Fail Sz=o Words Regs Flags Dst => system_limit Fail @@ -1202,16 +1205,16 @@ bs_init_bits Fail Sz Words=u==0 Regs Flags Dst => \ bs_init_bits Fail Sz Words Regs Flags Dst => \ i_bs_init_bits_fail_heap Sz Words Fail Regs Dst -i_bs_init_bits_fail xy j t x +i_bs_init_bits_fail xy j? t? x -i_bs_init_bits_fail_heap s I j t x +i_bs_init_bits_fail_heap s I j? t? x -i_bs_init_bits W t x -i_bs_init_bits_heap W I t x +i_bs_init_bits W t? x +i_bs_init_bits_heap W I t? x bs_add Fail S1=i==0 S2 Unit=u==1 D => move S2 D -bs_add j s s t x +bs_add j? s s t? x bs_append Fail Size Extra Live Unit Bin Flags Dst => \ move Bin x | i_bs_append Fail Extra Live Unit Size Dst @@ -1221,8 +1224,8 @@ bs_private_append Fail Size Unit Bin Flags Dst => \ bs_init_writable -i_bs_append j I t t s x -i_bs_private_append j t s S x +i_bs_append j? I t? t s x +i_bs_private_append j? t s S x # # Storing integers into binaries. @@ -1231,8 +1234,8 @@ i_bs_private_append j t s S x bs_put_integer Fail=j Sz=sq Unit=u Flags=u Src=s => \ gen_put_integer(Fail, Sz, Unit, Flags, Src) -i_new_bs_put_integer j s t s -i_new_bs_put_integer_imm j W t s +i_new_bs_put_integer j? s t s +i_new_bs_put_integer_imm j? W t s # # Utf8/utf16/utf32 support. (R12B-5) @@ -1248,14 +1251,14 @@ i_bs_utf16_size s x bs_put_utf8 Fail u Src=s => i_bs_put_utf8 Fail Src -i_bs_put_utf8 j s +i_bs_put_utf8 j? s -bs_put_utf16 j t s +bs_put_utf16 j? t s bs_put_utf32 Fail=j Flags=u Src=s => \ i_bs_validate_unicode Fail Src | bs_put_integer Fail i=32 u=1 Flags Src -i_bs_validate_unicode j s +i_bs_validate_unicode j? s # # Storing floats into binaries. @@ -1265,8 +1268,8 @@ bs_put_float Fail Sz=q Unit Flags Val => badarg Fail bs_put_float Fail=j Sz=s Unit=u Flags=u Src=s => \ gen_put_float(Fail, Sz, Unit, Flags, Src) -i_new_bs_put_float j s t s -i_new_bs_put_float_imm j W t s +i_new_bs_put_float j? s t s +i_new_bs_put_float_imm j? W t s # # Storing binaries into binaries. @@ -1275,9 +1278,9 @@ i_new_bs_put_float_imm j W t s bs_put_binary Fail=j Sz=s Unit=u Flags=u Src=s => \ gen_put_binary(Fail, Sz, Unit, Flags, Src) -i_new_bs_put_binary j s t s -i_new_bs_put_binary_imm j W s -i_new_bs_put_binary_all j s t +i_new_bs_put_binary j? s t s +i_new_bs_put_binary_imm j? W s +i_new_bs_put_binary_all j? s t # # Warning: The i_bs_put_string and i_new_bs_put_string instructions @@ -1391,12 +1394,12 @@ new_map Dst Live Size Rest=* | is_small_map_literal_keys(Size, Rest) => \ new_map d t I i_new_small_map_lit d t q update_map_assoc s d t I -update_map_exact j s d t I +update_map_exact j? s d t I is_map Fail Lit=q | literal_is_map(Lit) => is_map Fail cq => jump Fail -is_map f xy +is_map f? xy ## Transform has_map_fields #{ K1 := _, K2 := _ } to has_map_elements @@ -1410,14 +1413,14 @@ get_map_elements Fail Src=xy Size=u==2 Rest=* => \ get_map_elements Fail Src Size Rest=* | map_key_sort(Size, Rest) => \ gen_get_map_elements(Fail, Src, Size, Rest) -i_get_map_elements f s I +i_get_map_elements f? s I i_get_map_element Fail Src=xy Key=y Dst => \ move Key x | i_get_map_element Fail Src x Dst -i_get_map_element_hash f xy c I xy +i_get_map_element_hash f? xy c I xy -i_get_map_element f xy x xy +i_get_map_element f? xy x xy # # Convert the plus operations to a generic plus instruction. @@ -1485,32 +1488,32 @@ gc_bif1 Fail I u$bif:erlang:bnot/1 Src Dst=d => i_int_bnot Fail Src I Dst i_increment rxy W t d -i_plus x xy j t d -i_plus s s j t d +i_plus x xy j? t d +i_plus s s j? t d -i_minus x x j t d -i_minus s s j t d +i_minus x x j? t d +i_minus s s j? t d -i_times j t s s d +i_times j? t s s d -i_m_div j t s s d -i_int_div j t s s d +i_m_div j? t s s d +i_int_div j? t s s d -i_rem x x j t d -i_rem s s j t d +i_rem x x j? t d +i_rem s s j? t d -i_bsl s s j t d -i_bsr s s j t d +i_bsl s s j? t d +i_bsr s s j? t d -i_band x c j t d -i_band s s j t d +i_band x c j? t d +i_band s s j? t d -i_bor j I s s d -i_bxor j I s s d +i_bor j? I s s d +i_bxor j? I s s d i_int_bnot Fail Src=c Live Dst => move Src x | i_int_bnot Fail x Live Dst -i_int_bnot j S t d +i_int_bnot j? S t d # # Old guard BIFs that creates heap fragments are no longer allowed. @@ -1534,9 +1537,9 @@ gc_bif2 Fail I Bif S1 S2 Dst => \ gc_bif3 Fail I Bif S1 S2 S3 Dst => \ gen_guard_bif3(Fail, I, Bif, S1, S2, S3, Dst) -i_gc_bif1 j W s t d +i_gc_bif1 j? W s t? d -i_gc_bif2 j W t s s d +i_gc_bif2 j? W t? s s d ii_gc_bif3/7 @@ -1545,7 +1548,7 @@ ii_gc_bif3/7 ii_gc_bif3 Fail Bif Live S1 S2 S3 Dst => \ move S1 x | i_gc_bif3 Fail Bif Live S2 S3 Dst -i_gc_bif3 j W t s s d +i_gc_bif3 j? W t? s s d # # The following instruction is specially handled in beam_load.c diff --git a/erts/emulator/utils/beam_makeops b/erts/emulator/utils/beam_makeops index 64a9a49ac8..d7791d23fa 100755 --- a/erts/emulator/utils/beam_makeops +++ b/erts/emulator/utils/beam_makeops @@ -24,6 +24,17 @@ use constant COLD => 0; use constant WARM => 1; use constant HOT => 2; +# Instructions for packing +use constant PACK_JUMP => 1; +use constant PACK_IN_INSTR_WORD => 2; +use constant PACK_OPT_IN_INSTR_WORD => 4; + +# Packing commands +use constant PACK_CMD_TIGHTEST => '1'; +use constant PACK_CMD_TIGHT => '2'; +use constant PACK_CMD_LOOSE => '3'; +use constant PACK_CMD_WIDE => '4'; + $BEAM_FORMAT_NUMBER = undef; my $target = \&emulator_output; @@ -32,30 +43,15 @@ my $verbose = 0; my $hotness = 1; my $num_file_opcodes = 0; my $wordsize = 32; -my %defs; # Defines (from command line). +my $code_pointers_are_short = 0; # Whether code pointers (to C code) are short. +my $code_model = 'unknown'; +my %defs; # Defines (from command line). # This is shift counts and mask for the packer. my $WHOLE_WORD = ''; -my @pack_instr; -my @pack_shift; -my @pack_mask; - -$pack_instr[2] = ['6', 'i']; -$pack_instr[3] = ['0', '0', 'i']; -$pack_instr[4] = ['6', '6', '6', 'i']; # Only for 64 bit wordsize - -$pack_shift[2] = ['0', 'BEAM_LOOSE_SHIFT']; -$pack_shift[3] = ['0', 'BEAM_TIGHT_SHIFT', '(2*BEAM_TIGHT_SHIFT)']; -$pack_shift[4] = ['0', 'BEAM_LOOSE_SHIFT', # Only for 64 bit wordsize - '(2*BEAM_LOOSE_SHIFT)', - '(3*BEAM_LOOSE_SHIFT)']; - -$pack_mask[2] = ['BEAM_LOOSE_MASK', $WHOLE_WORD]; -$pack_mask[3] = ['BEAM_TIGHT_MASK', 'BEAM_TIGHT_MASK', 'BEAM_TIGHT_MASK']; -$pack_mask[4] = ['BEAM_LOOSE_MASK', # Only for 64 bit wordsize - 'BEAM_LOOSE_MASK', - 'BEAM_LOOSE_MASK', - $WHOLE_WORD]; + +my @basic_pack_options = (0); +my @extended_pack_options = @basic_pack_options; # There are two types of instructions: generic and specific. # The generic instructions are those generated by the Beam compiler. @@ -250,6 +246,7 @@ while (@ARGV && $ARGV[0] =~ /^-(.*)/) { ($target = \&compiler_output), next if /^compiler/; ($outdir = shift), next if /^outdir/; ($wordsize = shift), next if /^wordsize/; + ($code_model = shift), next if /^code-model/; ($verbose = 1), next if /^v/; ($defs{$1} = $2), next if /^D(\w+)=(\w+)/; die "$0: Bad option: -$_\n"; @@ -261,14 +258,21 @@ if ($wordsize == 32) { } elsif ($wordsize == 64) { $defs{'ARCH_32'} = 0; $defs{'ARCH_64'} = 1; + $code_pointers_are_short = $code_model eq 'small'; } # -# Initialize number of arguments per packed word. +# Initialize pack options. # if ($wordsize == 64) { - $pack_mask[3] = ['BEAM_TIGHT_MASK', 'BEAM_TIGHT_MASK', $WHOLE_WORD]; + @basic_pack_options = (0,PACK_JUMP); + @extended_pack_options = @basic_pack_options; + if ($code_pointers_are_short) { + foreach (@basic_pack_options) { + push @extended_pack_options, $_ | PACK_IN_INSTR_WORD; + } + } } # @@ -450,15 +454,7 @@ while (<>) { # Parse specific instructions (only present in emulator/loader): # Name Arg1 Arg2... # - my($name, @args) = split; - error("too many operands") - if @args > $max_spec_operands; - syntax_check($name, @args); - my $arity = @args; - if (defined $gen_opnum{$name,$arity} and $obsolete[$gen_opnum{$name,$arity}]) { - error("specific instructions may not be specified for obsolete instructions"); - } - save_specific_ops($name, $arity, $hotness, @args); + my($name,$arity) = parse_specific_op($_); if (defined $op_num) { error("specific instructions must not be numbered"); } elsif (!defined($gen_arity{$name}) && !defined($unnumbered{$name,$arity})) { @@ -531,10 +527,9 @@ sub emulator_output { foreach $key (keys %specific_op) { foreach (@{$specific_op{$key}}) { my($name, $hotness, @args) = @$_; - my $sign = join('', @args); my $print_name = print_name($name, @args); - my($size, $code, $pack_spec) = cg_basic($name, @args); + my($size, $code, $pack_spec) = cg_basic(name => $name, args => \@args); if (defined $code) { $code = "OpCase($print_name):\n$code"; push @generated_code, [$hotness,$code,($print_name)]; @@ -602,6 +597,7 @@ sub emulator_output { foreach (@{$specific_op{$key}}) { my($name, $hot, @args) = @{$_}; my($sign) = join('', @args); + $sign =~ s/[?]//g; # The primitive types should sort before other types. @@ -619,6 +615,7 @@ sub emulator_output { my $print_name = $items{$sort_key}; my $info = $spec_op_info{$print_name}; my(@args) = @{$info->{'args'}}; + @args = map { s/[?]$//; $_ } @args; my $arity = @args; # @@ -732,12 +729,19 @@ sub emulator_output { print "#if !defined(ARCH_64)\n"; print qq[ #error "64-bit architecture assumed, but ARCH_64 not defined"\n]; print "#endif\n"; + if ($code_pointers_are_short) { + print "#if !defined(CODE_MODEL_SMALL)\n"; + print qq[ #error "small code model assumed, but CODE_MODEL_SMALL not defined"\n]; + print "#endif\n"; + } print "#define BEAM_WIDE_MASK 0xFFFFFFFFull\n"; print "#define BEAM_LOOSE_MASK 0xFFFFull\n"; print "#define BEAM_TIGHT_MASK 0xFFFFull\n"; + print "#define BEAM_TIGHTEST_MASK 0x3FFull\n"; print "#define BEAM_WIDE_SHIFT 32\n"; print "#define BEAM_LOOSE_SHIFT 16\n"; print "#define BEAM_TIGHT_SHIFT 16\n"; + print "#define BEAM_TIGHTEST_SHIFT 10\n"; } print "\n"; @@ -852,6 +856,7 @@ sub emulator_output { sub print_name { my($name,@args) = @_; my $sign = join '', @args; + $sign =~ s/[?]//g; $sign ne '' ? "${name}_$sign" : $name; } @@ -971,40 +976,54 @@ sub compiler_output { } # -# Check an operation for validity. +# Parse and store a specific operation. # -sub syntax_check { - my($name, @args) = @_; - my($i); +sub parse_specific_op { + my($name, @args) = split " ", shift; + my $arity = @args; + # Check for various errors. error("Bad opcode name '$name'") unless $name =~ /^[a-z][\w\d_]*$/; - for ($i = 0; $i < @args; $i++) { - foreach my $type (split(//, $args[$i])) { + error("too many operands") + if @args > $max_spec_operands; + for (my $i = 0; $i < $arity; $i++) { + my $arg = $args[$i]; + $arg =~ s/[?]$//; + foreach my $type (split(//, $arg)) { error("Argument " . ($i+1) . ": invalid type '$type'") unless defined $arg_size{$type}; } } -} - -sub save_specific_ops { - my($name,$arity,$hot,@args) = @_; - my(@res) = (""); + if (defined $gen_opnum{$name,$arity} and $obsolete[$gen_opnum{$name,$arity}]) { + error("specific instructions may not be specified for obsolete instructions"); + } + # Expand operands with multiple types to multiple instructions. + # (For example, "get_list xy xy xy" will be expanded to six instructions.) + my @res = ([]); foreach my $arg (@args) { - my @new_res = (); + my @old_res = @res; + @res = (); + my $marker = ($arg =~ s/[?]$//) ? '?' : ''; foreach my $type (split(//, $arg)) { - foreach my $args (@res) { - push @new_res, "$args$type"; + foreach my $args_ref (@old_res) { + my @args = @$args_ref; + push @args, "$type$marker"; + push @res, \@args; } } - @res = @new_res; } + + # Store each specific instruction. my $key = "$name/$arity"; - foreach my $args (@res) { - @args = split //, $args; - push @{$specific_op{$key}}, [$name,$hot,@args]; + foreach my $args_ref (@res) { + @args = @$args_ref; + push @{$specific_op{$key}}, [$name,$hotness,@args]; } + + # Done. + ($name,$arity); } sub parse_c_args { @@ -1110,9 +1129,28 @@ sub combine_instruction_group { # Variables. my %offsets; my @instrs; - my %num_references; + my %num_references; # Number of references from other sub instructions. my $group_size = 999; + # + # Calculate the number of references from other sub instructions. + # This number is useful in several ways: + # + # * If this number is 0, it is only used as the entry point for a + # function, implying that it does not need a label and that operands + # can be packed into the instruction word. + # + # * We'll use this number in the sort key, as a tie breaker for sub instructions + # at the same instruction offset. + # + foreach my $ref_instr (@in_instrs) { + my(undef,undef,$first_sub,@other_subs) = @$ref_instr; + $num_references{$first_sub} += 0; # Make sure it is defined. + foreach my $sub (@other_subs) { + $num_references{$sub}++; + } + } + # Do basic error checking. Associate operands of instructions # with the correct micro instructions. Calculate offsets for micro # instructions. @@ -1138,12 +1176,13 @@ sub combine_instruction_group { foreach (0..$#c_args) { push @first, shift @rest; } - my $size = cg_combined_size($s, 1, @first); + my $size = cg_combined_size(name => $s, + first => $num_references{$s} == 0, + args => \@first); $offsets{$s} = $offset unless defined $offsets{$s} and $offsets{$s} < $offset; $offset += $size - 1; my $label = micro_label($s); - $num_references{$label} = 0; push @new_subs, [$opcase,$label,$s,$size-1,@first]; $opcase = ''; } @@ -1162,9 +1201,8 @@ sub combine_instruction_group { my($opcase,$label,$s,$size,@args) = @{$subs[$i]}; my $next = ''; (undef,$next) = @{$subs[$i+1]} if $i < $#subs; - $num_references{$next}++ if $next; my $instr_info = "$opcase:$label:$next:$s:$size:@args"; - push @all_instrs, [$label,$offsets{$s},$instr_info]; + push @all_instrs, [$label,$s,$offsets{$s},$instr_info]; } } @@ -1172,8 +1210,8 @@ sub combine_instruction_group { my %label_to_offset; my %order_to_offset; foreach my $instr (@all_instrs) { - my($label,$offset,$instr_info) = @$instr; - my $sort_key = sprintf("%02d.%02d", $offset, $num_references{$label}); + my($label,$s,$offset,$instr_info) = @$instr; + my $sort_key = sprintf("%02d.%02d", $offset, $num_references{$s}); push @{$order_to_instrs{$sort_key}}, $instr_info; $label_to_offset{$label} = $offset; $order_to_offset{$sort_key} = $offset; @@ -1212,7 +1250,7 @@ sub combine_instruction_group { $gcode .= "OpCase($opcase):\n"; push @opcase_labels, $opcase; } - if ($num_references{$label}) { + if ($num_references{$s}) { $gcode .= "$label:\n"; } @@ -1229,8 +1267,13 @@ sub combine_instruction_group { } my($gen_code,$down,$up) = - cg_combined_code($s, 1, $flags, $offset, - $group_size-$offset, $inc, @first); + cg_combined_code(name => $s, + first => $num_references{$s} == 0, + extra_comments => $flags, + offset => $offset, + comp_size => $group_size-$offset, + inc => $inc, + args =>\@first); my $spec_label = "$opcase$label"; $down{$spec_label} = $down; $up{$spec_label} = $up; @@ -1279,8 +1322,8 @@ sub micro_label { # sub cg_basic { - my($name,@args) = @_; - my($size,$code,$pack_spec) = code_gen($name, 1, '', 0, undef, undef, @args); + my %params = (@_, pack_options => \@extended_pack_options); + my($size,$code,$pack_spec) = code_gen(%params); $pack_spec = build_pack_spec($pack_spec); ($size,$code,$pack_spec); } @@ -1290,8 +1333,10 @@ sub cg_basic { # sub cg_combined_size { - my($name,$pack,@args) = @_; - my($size) = code_gen($name, $pack, '', 0, undef, undef, @args); + my %params = (@_, pack_options => \@basic_pack_options); + $params{pack_options} = \@extended_pack_options + if $params{first}; + my($size) = code_gen(%params); $size; } @@ -1300,8 +1345,10 @@ sub cg_combined_size { # sub cg_combined_code { - my($name,$pack,$extra_comments,$offset,$comp_size,$inc,@args) = @_; - my($size,$code,$pack_spec) = code_gen(@_); + my %params = (@_, pack_options => \@basic_pack_options); + $params{pack_options} = \@extended_pack_options + if $params{first}; + my($size,$code,$pack_spec) = code_gen(%params); if ($pack_spec eq '') { ($code,'',''); } else { @@ -1311,8 +1358,16 @@ sub cg_combined_code { } sub code_gen { - my($name,$pack,$extra_comments,$offset,$comp_size,$inc,@args) = @_; - my $group_size = defined $comp_size ? $comp_size + $inc : undef; + my %params = (extra_comments => '', + offset => 0, + inc => 0, + @_); + my $name = $params{name}; + my $extra_comments = $params{extra_comments}; + my $offset = $params{offset}; + my $inc = $params{inc}; + my @args = @{$params{args}}; + my $size = 0; my $flags = ''; my @f; @@ -1326,8 +1381,9 @@ sub code_gen { # my $c_code_ref = $c_code{$name}; - if ($pack and defined $c_code_ref and $name ne 'catch') { - ($var_decls, $pack_spec, @args) = do_pack($offset, @args); + if (defined $c_code_ref and $name ne 'catch') { + my $pack_options = $params{pack_options}; + ($var_decls, $pack_spec, @args) = do_pack($name, $offset, $pack_options, @args); } # @@ -1337,6 +1393,7 @@ sub code_gen { my $need_block = 0; my $arg_offset = $offset; + @args = map { s/[?]$//g; $_ } @args; foreach (@args) { my($this_size) = $arg_size{$_}; SWITCH: @@ -1399,7 +1456,7 @@ sub code_gen { return ($size+1, undef, ''); } - $group_size = $size unless defined $group_size; + my $group_size = ($params{comp_size} || $size) + $inc; # # Generate main body of the implementation. @@ -1418,7 +1475,7 @@ sub code_gen { $bindings{$var} = $f[$i]; } $bindings{'NEXT_INSTRUCTION'} = "I+" . ($group_size+$offset+1); - $bindings{'IP_ADJUSTMENT'} = defined $inc ? $inc : 0; + $bindings{'IP_ADJUSTMENT'} = $inc; $c_code = eval { expand_all($c_code, \%bindings) }; unless (defined $c_code) { warn $@; @@ -1441,10 +1498,10 @@ sub code_gen { "ASSERT(VALID_INSTR(*I));\n" . "Goto(*I);"; } else { - $var_decls .= "BeamInstr next_pf = I[$instr_offset];\n"; + $var_decls .= "BeamInstr next_pf = BeamCodeAddr(I[$instr_offset]);\n"; $dispatch_next = "\nI += $instr_offset;\n" . "ASSERT(VALID_INSTR(next_pf));\n" . - "Goto(next_pf);"; + "GotoPF(next_pf);"; } # @@ -1603,9 +1660,51 @@ sub needs_do_wrapper { } sub do_pack { - my($offset,@args) = @_; + my($name,$offset,$pack_opts_ref,@args) = @_; + my @pack_opts = @$pack_opts_ref; + my $opt_arg_pos = -1; + + # Look for an optional use operand not as the first argument. + if (@args and $args[0] !~ /[?]$/) { + for (my $pos = 0; $pos < @args; $pos++) { + if ($args[$pos] =~ /[?]$/) { + $opt_arg_pos = $pos; + last; + } + } + } + + @args = map { s/[?]$//; $_ } @args; # Remove any optional use marker. + + # If there is an optional operand, extend the array of pack options. + if ($opt_arg_pos >= 0) { + my @new_pack_opts = grep { $_ & PACK_IN_INSTR_WORD } @pack_opts; + @new_pack_opts = map { + ($_ & ~ PACK_IN_INSTR_WORD) | PACK_OPT_IN_INSTR_WORD; + } @new_pack_opts; + push @pack_opts, @new_pack_opts; + } + + my $ret = ['', ':', @args]; + my $score = 0; + + foreach my $options (@pack_opts) { + my $this_opt_arg_pos = ($options & PACK_OPT_IN_INSTR_WORD) ? $opt_arg_pos : -1; + my($this_score,$this_result) = + do_pack_one($name, $options, $this_opt_arg_pos, $offset, @args); + if ($this_score > $score) { + $ret = $this_result; + $score = $this_score; + } + } + return @$ret; +} + +sub do_pack_one { + my($name,$options,$opt_arg_pos,$offset,@args) = @_; my($packable_args) = 0; my @bits_needed; # Bits needed for each argument. + my $pack_in_iw = $options & PACK_IN_INSTR_WORD; # # Define the minimum number of bits needed for the packable argument types. @@ -1619,6 +1718,10 @@ sub do_pack { 't' => 16); if ($wordsize == 64) { $bits_needed{'I'} = 32; + if ($options & PACK_JUMP) { + $bits_needed{'f'} = 32; + $bits_needed{'j'} = 32; + } } # @@ -1631,51 +1734,48 @@ sub do_pack { } else { push @bits_needed, 0; } - } - - # - # Try to pack 'f' and 'j', but not at expense at worse packing - # for other operands. For example, given the arguments "f x x", we - # want the 'x' operands to be packed, not 'f' and 'x' packed and - # the final 'x' not packed. - # - - if ($wordsize == 64 and $packable_args == 1) { - for (my $i = 0; $i < @args; $i++) { - if ($args[$i] =~ /^[fj]$/) { - $bits_needed[$i] = 32; - $packable_args++; - last; - } + if ($arg =~ /^[fj]$/) { + # Only pack the first occurrence of 'f' or 'j'. + delete $bits_needed{'f'}; + delete $bits_needed{'j'}; } } # - # Nothing to pack unless there are at least 2 packable arguments. + # Return if there is nothing to pack. # - return ('', ':', @args) if $packable_args < 2; + if ($packable_args == 0) { + return (-1); + } elsif ($packable_args == 1 and $options == 0) { + return (-1); + } # # Determine how many arguments we should pack into each word. # my @args_per_word; my @need_wide_mask; - my $bits = 0; - my $word = 0; - $args_per_word[0] = 0; - $need_wide_mask[0] = 0; - for (my $i = 0; $i < @args; $i++) { - if ($bits_needed[$i]) { - my $needed = $bits_needed[$i]; - - my $next_word = sub { - $word++; - $args_per_word[$word] = 0; - $need_wide_mask[$word] = 0; - $bits = 0; - }; + my $bits; + my $this_wordsize; + my $word = -1; + + my $next_word = sub { + $word++; + $args_per_word[$word] = 0; + $need_wide_mask[$word] = 0; + $bits = 0; + $this_wordsize = $wordsize; + }; + + $next_word->(); + $this_wordsize = 32 if $pack_in_iw; + for (my $arg_num = 0; $arg_num < @args; $arg_num++) { + my $needed = $bits_needed[$arg_num]; + + next unless $needed; + next if $arg_num == $opt_arg_pos; - if ($bits+$needed > $wordsize) { # Does not fit. + if ($bits+$needed > $this_wordsize) { # Does not fit. $next_word->(); } if ($args_per_word[$word] == 4) { # Can't handle more than 4 args. @@ -1695,15 +1795,16 @@ sub do_pack { # Can only pack two things in a word where one # item is 32 bits. Force the next item into # the next word. - $bits = $wordsize; + $bits = $this_wordsize; } - } } # # Try to balance packing between words. # - if ($args_per_word[$#args_per_word] == 1) { + if (@args_per_word == 1 and $args_per_word[0] == 1 and $pack_in_iw) { + # Don't rebalance. + } elsif ($args_per_word[$#args_per_word] == 1) { if ($args_per_word[$#args_per_word-1] < 3) { pop @args_per_word; } else { @@ -1728,12 +1829,19 @@ sub do_pack { # beginning). my $up = ''; # Pack commands (storing back while # moving forward). + my $arg_num = 0; # Number of argument. - # Skip an unpackable argument. + # Skip an unpackable argument. Also handle packing of + # an single operand into the instruction word. my $skip_unpackable = sub { my($arg) = @_; - if ($arg_size{$arg}) { + if ($arg_num == $opt_arg_pos) { + my $pack = chr(ord('#') + $arg_num); + $down = PACK_CMD_WIDE . "$pack$down"; + my $unpack = "BeamExtraData(I[0])"; + $args[$arg_num] = "packed:$arg:0:${arg}b($unpack)"; + } elsif ($arg_size{$arg}) { # Save the argument on the pack engine's stack. my $push = 'g'; if ($type_bit{$arg} & $type_bit{'q'}) { @@ -1753,43 +1861,50 @@ sub do_pack { # the packing engine works from right-to-left, but we must generate # the instructions from left-to-right because we must calculate # instruction sizes from left-to-right. - - my $arg_num = 0; for (my $word = 0; $word < @args_per_word; $word++) { my $ap = 0; # Argument number within word. my $packed_var = "tmp_packed" . ($word+1); my $args_per_word = $args_per_word[$word]; - my @shift; - my @mask; - my @instr; - - if ($need_wide_mask[$word]) { - @shift = ('0', 'BEAM_WIDE_SHIFT'); - @mask = ('BEAM_WIDE_MASK', $WHOLE_WORD); - @instr = ('w', 'w'); - } else { - @shift = @{$pack_shift[$args_per_word]}; - @mask = @{$pack_mask[$args_per_word]}; - @instr = @{$pack_instr[$args_per_word]}; - } + my $pack_word_size = ($pack_in_iw && $word == 0) ? 32 : $wordsize; + + my($shref,$mref,$iref,$unpack_suffix) = + get_pack_parameters($name, $args_per_word, $pack_word_size, + $need_wide_mask[$word]); + my @shift = @$shref; + my @mask = @$mref; + my @instr = @$iref; while ($ap < $args_per_word) { my $reg = $args[$arg_num]; my $this_size = $arg_size{$reg}; + if ($bits_needed[$arg_num]) { $this_size = 0; if ($ap == 0) { - $pack_prefix .= "Eterm $packed_var = " . - arg_offset($size+$offset) . ";\n"; - $up .= "p"; - $down = "P$down"; - $this_size = 1; + my $packed_data; + if ($pack_in_iw and $word == 0) { + $packed_data = "BeamExtraData(I[0])"; + if ($args_per_word == 1) { + $packed_var = $packed_data; + } else { + $pack_prefix .= "Eterm $packed_var = $packed_data;\n"; + } + my $pack = chr(ord('#') + $size); + $down = "$pack$down"; + } else { + $packed_data = arg_offset($size + $offset); + $pack_prefix .= "Eterm $packed_var = $packed_data;\n"; + $down = "P$down"; + $up .= "p"; + $this_size = 1; + } } $down = "$instr[$ap]$down"; my $unpack = make_unpack($packed_var, $shift[$ap], $mask[$ap]); - $args[$arg_num] = "packed:$reg:$this_size:$reg" . "b($unpack)"; + my $macro = "$reg$unpack_suffix"; + $args[$arg_num] = "packed:$reg:$this_size:$macro($unpack)"; $ap++; } else { @@ -1804,12 +1919,107 @@ sub do_pack { # Skip any unpackable arguments at the end. # while ($arg_num < @args) { - $skip_unpackable->($args[$arg_num]); + my $arg = $args[$arg_num]; + $skip_unpackable->($arg); + $size += $arg_size{$arg}; $arg_num++; } my $pack_spec = "$down:$up"; - return ($pack_prefix, $pack_spec, @args); + my $score = pack_score($options, @args); + + return ($score, [$pack_prefix,$pack_spec,@args]); +} + +sub get_pack_parameters { + my($name,$args_per_word,$pack_word_size,$wide_mask) = @_; + my(@shift,@mask,@instr); + my $unpack_suffix = 'b'; + + if ($wide_mask and $args_per_word > 1) { + @shift = ('0', 'BEAM_WIDE_SHIFT'); + @mask = ('BEAM_WIDE_MASK', $WHOLE_WORD); + @instr = (PACK_CMD_WIDE) x 2; + } elsif ($args_per_word == 1) { + @shift = ('0'); + @mask = ($WHOLE_WORD); + @instr = (PACK_CMD_WIDE); + } elsif ($args_per_word == 2) { + if ($pack_word_size != $wordsize) { + # 64-bit word size, pack 32 bits into instruction word. + @shift = ('0', 'BEAM_TIGHT_SHIFT'); + @mask = ('BEAM_TIGHT_MASK', $WHOLE_WORD); + @instr = (PACK_CMD_TIGHT) x 2; + } else { + # 32/64 bit word size + @shift = ('0', 'BEAM_LOOSE_SHIFT'); + @mask = ('BEAM_LOOSE_MASK', $WHOLE_WORD); + @instr = (PACK_CMD_LOOSE) x 2; + } + } elsif ($args_per_word == 3) { + if ($pack_word_size != $wordsize) { + # 64-bit word size, pack 3 register numbers into instruction word. + @shift = ('0', 'BEAM_TIGHTEST_SHIFT', '(2*BEAM_TIGHTEST_SHIFT)'); + @mask = ('BEAM_TIGHTEST_MASK', 'BEAM_TIGHTEST_MASK', $WHOLE_WORD); + @instr = (PACK_CMD_TIGHTEST) x 3; + $unpack_suffix = ''; + } else { + # 32/64 bit word size. + @shift = ('0', 'BEAM_TIGHT_SHIFT', '(2*BEAM_TIGHT_SHIFT)'); + if ($wordsize == 32) { + @mask = ('BEAM_TIGHT_MASK') x 3; + } elsif ($wordsize == 64) { + @mask = ('BEAM_TIGHT_MASK', 'BEAM_TIGHT_MASK', $WHOLE_WORD); + } + @instr = (PACK_CMD_TIGHT) x 3; + } + } elsif ($args_per_word == 4) { + # 64 bit word size only. + @shift = ('0', + 'BEAM_LOOSE_SHIFT', + '(2*BEAM_LOOSE_SHIFT)', + '(3*BEAM_LOOSE_SHIFT)'); + @mask = ('BEAM_LOOSE_MASK', 'BEAM_LOOSE_MASK', + 'BEAM_LOOSE_MASK', $WHOLE_WORD); + @instr = (PACK_CMD_LOOSE) x 4; + } + + unless (@shift) { + error("$name: internal packing error: args_per_word=$args_per_word, " . + "pack_word_size=$pack_word_size"); + } + + (\@shift,\@mask,\@instr,$unpack_suffix); +} + +sub pack_score { + my($options,@args) = @_; + my $size = 0; + + # Calculate the number of words. + foreach (@args) { + if (/^packed:[^:]*:(\d+)/) { + $size += $1; + } else { + $size += $arg_size{$_} + } + } + + # Less numbers of words give a higher score; for the same number of + # words, using PACK_JUMP or PACK_IN_INSTR_WORD gives a lower score. + my $score = 1 + 10*($max_spec_operands - $size); + if (($options & PACK_OPT_IN_INSTR_WORD) != 0) { + $score += 4; + } elsif ($options == PACK_IN_INSTR_WORD) { + $score += 0; + } elsif ($options == PACK_JUMP) { + $score += 1; + } elsif ($options == (PACK_JUMP|PACK_IN_INSTR_WORD)) { + $score += 2; + } elsif ($options == 0) { + $score += 3; + } + $score; } sub make_unpack { |