diff options
author | Björn Gustavsson <[email protected]> | 2017-08-24 10:21:39 +0200 |
---|---|---|
committer | Björn Gustavsson <[email protected]> | 2017-08-24 10:21:39 +0200 |
commit | fbf740d68600b59dc5fa7bd76d0aa0d019e81a75 (patch) | |
tree | 08092ecd6b529989e888bf976d1370a1afacf8d0 | |
parent | 6c4b60d6b9208bdc5eef3f0f2da220fbce890938 (diff) | |
parent | 7b64965d7a22d2250d3c6582a6d1737ca325a8dc (diff) | |
download | otp-fbf740d68600b59dc5fa7bd76d0aa0d019e81a75.tar.gz otp-fbf740d68600b59dc5fa7bd76d0aa0d019e81a75.tar.bz2 otp-fbf740d68600b59dc5fa7bd76d0aa0d019e81a75.zip |
Merge branch 'bjorn/erts/improve-beam-ops'
* bjorn/erts/improve-beam-ops:
arith_instrs.tab: Clean up bsl/bsr
beam_makeops: Stop using the Arg() macro
Eliminate the beam_instrs.h file
Add the 'S' type for a register source
Introduce more packable types
Pack cold instructions too
Pack instructions using 'q', 'c', and 's'
beam_makeops: Rewrite the packer, fixing several bugs
Make map update instruction functions indepedent of instruction format
beam_makeops: Introduce the new type 'W' (machine word)
Use the wait_timeout_{un}locked_int instructions
beam_makeops: Remove the unused aliases 'N' and 'U'
beam_makeops: Add an additional sanity check
beam_makeops: Prevent truncation when packing 'I' values
Improve performance for bsl/bsr
arith_instrs.tab: Eliminate warning for uninitialized value
beam_emu: Remove unused macros
beam_makeops: Remove unused subroutine save_c_code
Add missing -no_next for badarg instruction
-rw-r--r-- | erts/emulator/Makefile.in | 1 | ||||
-rw-r--r-- | erts/emulator/beam/arith_instrs.tab | 80 | ||||
-rw-r--r-- | erts/emulator/beam/beam_debug.c | 23 | ||||
-rw-r--r-- | erts/emulator/beam/beam_emu.c | 71 | ||||
-rw-r--r-- | erts/emulator/beam/beam_load.c | 89 | ||||
-rw-r--r-- | erts/emulator/beam/instrs.tab | 1 | ||||
-rw-r--r-- | erts/emulator/beam/map_instrs.tab | 20 | ||||
-rw-r--r-- | erts/emulator/beam/ops.tab | 135 | ||||
-rw-r--r-- | erts/emulator/test/tuple_SUITE.erl | 7 | ||||
-rwxr-xr-x | erts/emulator/utils/beam_makeops | 372 |
10 files changed, 447 insertions, 352 deletions
diff --git a/erts/emulator/Makefile.in b/erts/emulator/Makefile.in index c6511a4b49..bc7eb72221 100644 --- a/erts/emulator/Makefile.in +++ b/erts/emulator/Makefile.in @@ -546,7 +546,6 @@ endif $(TTF_DIR)/beam_cold.h \ $(TTF_DIR)/beam_hot.h \ -$(TTF_DIR)/beam_instrs.h \ $(TTF_DIR)/beam_opcodes.c \ $(TTF_DIR)/beam_opcodes.h \ $(TTF_DIR)/beam_pred_funcs.h \ diff --git a/erts/emulator/beam/arith_instrs.tab b/erts/emulator/beam/arith_instrs.tab index 91fe21e161..67cd7c6a2a 100644 --- a/erts/emulator/beam/arith_instrs.tab +++ b/erts/emulator/beam/arith_instrs.tab @@ -226,16 +226,12 @@ i_bsr := shift.setup_bsr.execute; shift.head() { Eterm Op1, Op2; Sint shift_left_count; - Sint ires; - Eterm* bigp; - Eterm tmp_big[2]; - Uint BIF; } shift.setup_bsr(Src1, Src2) { Op1 = $Src1; Op2 = $Src2; - BIF = BIF_bsr_2; + shift_left_count = 0; if (is_small(Op2)) { shift_left_count = -signed_val(Op2); } else if (is_big(Op2)) { @@ -245,15 +241,13 @@ shift.setup_bsr(Src1, Src2) { */ shift_left_count = make_small(bignum_header_is_neg(*big_val(Op2)) ? MAX_SMALL : MIN_SMALL); - } else { - shift_left_count = 0; } } shift.setup_bsl(Src1, Src2) { Op1 = $Src1; Op2 = $Src2; - BIF = BIF_bsl_2; + shift_left_count = 0; if (is_small(Op2)) { shift_left_count = signed_val(Op2); } else if (is_big(Op2)) { @@ -271,66 +265,65 @@ shift.setup_bsl(Src1, Src2) { */ shift_left_count = MAX_SMALL; } - } else { - shift_left_count = 0; } } shift.execute(Fail, Live, Dst) { + Uint big_words_needed; + if (is_small(Op1)) { - ires = signed_val(Op1); - if (shift_left_count == 0 || ires == 0) { + Sint int_res = signed_val(Op1); + if (shift_left_count == 0 || int_res == 0) { if (is_not_integer(Op2)) { - c_p->freason = BADARITH; - $BIF_ERROR_ARITY_2($Fail, BIF, Op1, Op2); + goto shift_error; } - if (ires == 0) { + if (int_res == 0) { $Dst = Op1; $NEXT0(); } } else if (shift_left_count < 0) { /* Right shift */ + Eterm bsr_res; shift_left_count = -shift_left_count; if (shift_left_count >= SMALL_BITS-1) { - $Dst = (ires < 0) ? SMALL_MINUS_ONE : SMALL_ZERO; + bsr_res = (int_res < 0) ? SMALL_MINUS_ONE : SMALL_ZERO; } else { - $Dst = make_small(ires >> shift_left_count); + bsr_res = make_small(int_res >> shift_left_count); } + $Dst = bsr_res; $NEXT0(); } else if (shift_left_count < SMALL_BITS-1) { /* Left shift */ - if ((ires > 0 && - ((~(Uint)0 << ((SMALL_BITS-1)-shift_left_count)) & - ires) == 0) || - ((~(Uint)0 << ((SMALL_BITS-1)-shift_left_count)) & - ~ires) == 0) { - $Dst = make_small(ires << shift_left_count); + if ((int_res > 0 && + ((~(Uint)0 << ((SMALL_BITS-1)-shift_left_count)) & int_res) == 0) || + ((~(Uint)0 << ((SMALL_BITS-1)-shift_left_count)) & ~int_res) == 0) { + $Dst = make_small(int_res << shift_left_count); $NEXT0(); } } - ires = 1; /* big_size(small_to_big(Op1)) */ + big_words_needed = 1; /* big_size(small_to_big(Op1)) */ goto big_shift; } else if (is_big(Op1)) { if (shift_left_count == 0) { if (is_not_integer(Op2)) { - c_p->freason = BADARITH; - $BIF_ERROR_ARITY_2($Fail, BIF, Op1, Op2); + goto shift_error; } $Dst = Op1; $NEXT0(); } - ires = big_size(Op1); + big_words_needed = big_size(Op1); big_shift: if (shift_left_count > 0) { /* Left shift. */ - ires += (shift_left_count / D_EXP); + big_words_needed += (shift_left_count / D_EXP); } else { /* Right shift. */ - if (ires <= (-shift_left_count / D_EXP)) { - ires = 3; /* ??? */ + if (big_words_needed <= (-shift_left_count / D_EXP)) { + big_words_needed = 3; /* ??? */ } else { - ires -= (-shift_left_count / D_EXP); + big_words_needed -= (-shift_left_count / D_EXP); } } { - ires = BIG_NEED_SIZE(ires+1); + Eterm tmp_big[2]; + Sint big_need_size = BIG_NEED_SIZE(big_words_needed+1); /* * Slightly conservative check the size to avoid @@ -338,15 +331,14 @@ shift.execute(Fail, Live, Dst) { * clearly would overflow the arity in the header * word. */ - if (ires-8 > BIG_ARITY_MAX) { + if (big_need_size-8 > BIG_ARITY_MAX) { $SYSTEM_LIMIT($Fail); } - $GC_TEST_PRESERVE(ires+1, $Live, Op1); + $GC_TEST_PRESERVE(big_need_size+1, $Live, Op1); if (is_small(Op1)) { Op1 = small_to_big(signed_val(Op1), tmp_big); } - bigp = HTOP; - Op1 = big_lshift(Op1, shift_left_count, bigp); + Op1 = big_lshift(Op1, shift_left_count, HTOP); if (is_big(Op1)) { HTOP += bignum_header_arity(*HTOP) + 1; } @@ -369,8 +361,22 @@ shift.execute(Fail, Live, Dst) { /* * One or more non-integer arguments. */ + shift_error: c_p->freason = BADARITH; - $BIF_ERROR_ARITY_2($Fail, BIF, Op1, Op2); + if ($Fail) { + $FAIL($Fail); + } else { + reg[0] = Op1; + reg[1] = Op2; + SWAPOUT; + if (I[0] == (BeamInstr) OpCode(i_bsl_ssjtd)) { + I = handle_error(c_p, I, reg, &bif_export[BIF_bsl_2]->info.mfa); + } else { + ASSERT(I[0] == (BeamInstr) OpCode(i_bsr_ssjtd)); + I = handle_error(c_p, I, reg, &bif_export[BIF_bsr_2]->info.mfa); + } + goto post_error_handling; + } } i_int_bnot(Fail, Src, Live, Dst) { diff --git a/erts/emulator/beam/beam_debug.c b/erts/emulator/beam/beam_debug.c index afe87288ce..49414ae8fc 100644 --- a/erts/emulator/beam/beam_debug.c +++ b/erts/emulator/beam/beam_debug.c @@ -522,12 +522,13 @@ print_op(fmtfn_t to, void *to_arg, int op, int size, BeamInstr* addr) } ap++; break; - case 'I': /* Untagged integer. */ - case 't': + case 't': /* Untagged integers */ + case 'I': + case 'W': switch (op) { - case op_i_gc_bif1_jIsId: - case op_i_gc_bif2_jIIssd: - case op_i_gc_bif3_jIIssd: + case op_i_gc_bif1_jWstd: + case op_i_gc_bif2_jWtssd: + case op_i_gc_bif3_jWtssd: { const ErtsGcBif* p; BifFunction gcf = (BifFunction) *ap; @@ -672,8 +673,8 @@ print_op(fmtfn_t to, void *to_arg, int op, int size, BeamInstr* addr) } } break; - case op_i_jump_on_val_xfII: - case op_i_jump_on_val_yfII: + case op_i_jump_on_val_xfIW: + case op_i_jump_on_val_yfIW: { int n; for (n = ap[-2]; n > 0; n--) { @@ -696,9 +697,9 @@ print_op(fmtfn_t to, void *to_arg, int op, int size, BeamInstr* addr) break; case op_i_put_tuple_xI: case op_i_put_tuple_yI: - case op_new_map_dII: - case op_update_map_assoc_sdII: - case op_update_map_exact_jsdII: + case op_new_map_dtI: + case op_update_map_assoc_sdtI: + case op_update_map_exact_jsdtI: { int n = unpacked[-1]; @@ -718,7 +719,7 @@ print_op(fmtfn_t to, void *to_arg, int op, int size, BeamInstr* addr) } } break; - case op_i_new_small_map_lit_dIq: + case op_i_new_small_map_lit_dtq: { Eterm *tp = tuple_val(unpacked[-1]); int n = arityval(*tp); diff --git a/erts/emulator/beam/beam_emu.c b/erts/emulator/beam/beam_emu.c index e5935f5f02..2c37dc42b3 100644 --- a/erts/emulator/beam/beam_emu.c +++ b/erts/emulator/beam/beam_emu.c @@ -153,10 +153,7 @@ do { \ * Register target (X or Y register). */ -#define REG_TARGET_PTR(Target) (((Target) & 1) ? &yb(Target-1) : &xb(Target)) -#define REG_TARGET(Target) (*REG_TARGET_PTR(Target)) - -#define ISCATCHEND(instr) ((Eterm *) *(instr) == OpCode(catch_end_y)) +#define REG_TARGET_PTR(Target) (((Target) & 1) ? &yb((Target)-1) : &xb(Target)) /* * Special Beam instructions. @@ -241,9 +238,11 @@ void** beam_ops; PROCESS_MAIN_CHK_LOCKS((P)); \ ERTS_UNREQ_PROC_MAIN_LOCK((P)) +#define db(N) (N) #define tb(N) (N) #define xb(N) (*(Eterm *) (((unsigned char *)reg) + (N))) #define yb(N) (*(Eterm *) (((unsigned char *)E) + (N))) +#define Sb(N) (*REG_TARGET_PTR(N)) #define lb(N) (*(double *) (((unsigned char *)&(freg[0].fd)) + (N))) #define Qb(N) (N) #define Ib(N) (N) @@ -318,10 +317,6 @@ void** beam_ops; #endif #define Arg(N) I[(N)+1] -#define Next(N) \ - I += (N) + 1; \ - ASSERT(VALID_INSTR(*I)); \ - Goto(*I) #define GetR(pos, tr) \ do { \ @@ -406,12 +401,13 @@ static BeamInstr* apply_fun(Process* p, Eterm fun, Eterm args, Eterm* reg) NOINLINE; static Eterm new_fun(Process* p, Eterm* reg, ErlFunEntry* fe, int num_free) NOINLINE; -static Eterm new_map(Process* p, Eterm* reg, BeamInstr* I) NOINLINE; -static Eterm new_small_map_lit(Process* p, Eterm* reg, Uint* n_exp, BeamInstr* I) NOINLINE; -static Eterm update_map_assoc(Process* p, Eterm* reg, - Eterm map, BeamInstr* I) NOINLINE; -static Eterm update_map_exact(Process* p, Eterm* reg, - Eterm map, BeamInstr* I) NOINLINE; +static Eterm new_map(Process* p, Eterm* reg, Uint live, Uint n, BeamInstr* ptr) NOINLINE; +static Eterm new_small_map_lit(Process* p, Eterm* reg, Eterm keys_literal, + Uint live, BeamInstr* ptr) NOINLINE; +static Eterm update_map_assoc(Process* p, Eterm* reg, Uint live, + Uint n, BeamInstr* new_p) NOINLINE; +static Eterm update_map_exact(Process* p, Eterm* reg, Uint live, + Uint n, Eterm* new_p) NOINLINE; static Eterm get_map_element(Eterm map, Eterm key); static Eterm get_map_element_hash(Eterm map, Eterm key, Uint32 hx); @@ -774,7 +770,6 @@ void process_main(Eterm * x_reg_array, FloatDef* f_reg_array) #endif #include "beam_hot.h" -#include "beam_instrs.h" #ifdef DEBUG /* @@ -2734,24 +2729,20 @@ do { \ static Eterm -new_map(Process* p, Eterm* reg, BeamInstr* I) +new_map(Process* p, Eterm* reg, Uint live, Uint n, BeamInstr* ptr) { - Uint n = Arg(3); Uint i; Uint need = n + 1 /* hdr */ + 1 /*size*/ + 1 /* ptr */ + 1 /* arity */; Eterm keys; Eterm *mhp,*thp; Eterm *E; - BeamInstr *ptr; flatmap_t *mp; ErtsHeapFactory factory; - ptr = &Arg(4); - if (n > 2*MAP_SMALL_MAP_LIMIT) { Eterm res; if (HeapWordsLeft(p) < n) { - erts_garbage_collect(p, n, reg, Arg(2)); + erts_garbage_collect(p, n, reg, live); } mhp = p->htop; @@ -2772,7 +2763,7 @@ new_map(Process* p, Eterm* reg, BeamInstr* I) } if (HeapWordsLeft(p) < need) { - erts_garbage_collect(p, need, reg, Arg(2)); + erts_garbage_collect(p, need, reg, live); } thp = p->htop; @@ -2795,24 +2786,20 @@ new_map(Process* p, Eterm* reg, BeamInstr* I) } static Eterm -new_small_map_lit(Process* p, Eterm* reg, Uint* n_exp, BeamInstr* I) +new_small_map_lit(Process* p, Eterm* reg, Eterm keys_literal, Uint live, BeamInstr* ptr) { - Eterm* keys = tuple_val(Arg(3)); + Eterm* keys = tuple_val(keys_literal); Uint n = arityval(*keys); Uint need = n + 1 /* hdr */ + 1 /*size*/ + 1 /* ptr */ + 1 /* arity */; Uint i; - BeamInstr *ptr; flatmap_t *mp; Eterm *mhp; Eterm *E; - *n_exp = n; - ptr = &Arg(4); - ASSERT(n <= MAP_SMALL_MAP_LIMIT); if (HeapWordsLeft(p) < need) { - erts_garbage_collect(p, need, reg, Arg(2)); + erts_garbage_collect(p, need, reg, live); } mhp = p->htop; @@ -2821,7 +2808,7 @@ new_small_map_lit(Process* p, Eterm* reg, Uint* n_exp, BeamInstr* I) mp = (flatmap_t *)mhp; mhp += MAP_HEADER_FLATMAP_SZ; mp->thing_word = MAP_HEADER_FLATMAP; mp->size = n; - mp->keys = Arg(3); + mp->keys = keys_literal; for (i = 0; i < n; i++) { GET_TERM(*ptr++, *mhp++); @@ -2833,9 +2820,8 @@ new_small_map_lit(Process* p, Eterm* reg, Uint* n_exp, BeamInstr* I) } static Eterm -update_map_assoc(Process* p, Eterm* reg, Eterm map, BeamInstr* I) +update_map_assoc(Process* p, Eterm* reg, Uint live, Uint n, BeamInstr* new_p) { - Uint n; Uint num_old; Uint num_updates; Uint need; @@ -2845,12 +2831,12 @@ update_map_assoc(Process* p, Eterm* reg, Eterm map, BeamInstr* I) Eterm* E; Eterm* old_keys; Eterm* old_vals; - BeamInstr* new_p; Eterm new_key; Eterm* kp; + Eterm map; - new_p = &Arg(4); - num_updates = Arg(3) / 2; + num_updates = n / 2; + map = reg[live]; if (is_not_flatmap(map)) { Uint32 hx; @@ -2880,7 +2866,7 @@ update_map_assoc(Process* p, Eterm* reg, Eterm map, BeamInstr* I) */ if (num_old == 0) { - return new_map(p, reg, I); + return new_map(p, reg, live, n, new_p); } /* @@ -2890,8 +2876,6 @@ update_map_assoc(Process* p, Eterm* reg, Eterm map, BeamInstr* I) need = 2*(num_old+num_updates) + 1 + MAP_HEADER_FLATMAP_SZ; if (HeapWordsLeft(p) < need) { - Uint live = Arg(2); - reg[live] = map; erts_garbage_collect(p, need, reg, live+1); map = reg[live]; old_mp = (flatmap_t *)flatmap_val(map); @@ -3038,9 +3022,8 @@ update_map_assoc(Process* p, Eterm* reg, Eterm map, BeamInstr* I) */ static Eterm -update_map_exact(Process* p, Eterm* reg, Eterm map, BeamInstr* I) +update_map_exact(Process* p, Eterm* reg, Uint live, Uint n, Eterm* new_p) { - Uint n; Uint i; Uint num_old; Uint need; @@ -3050,12 +3033,12 @@ update_map_exact(Process* p, Eterm* reg, Eterm map, BeamInstr* I) Eterm* E; Eterm* old_keys; Eterm* old_vals; - BeamInstr* new_p; Eterm new_key; + Eterm map; - new_p = &Arg(5); - n = Arg(4) / 2; /* Number of values to be updated */ + n /= 2; /* Number of values to be updated */ ASSERT(n > 0); + map = reg[live]; if (is_not_flatmap(map)) { Uint32 hx; @@ -3109,8 +3092,6 @@ update_map_exact(Process* p, Eterm* reg, Eterm map, BeamInstr* I) need = num_old + MAP_HEADER_FLATMAP_SZ; if (HeapWordsLeft(p) < need) { - Uint live = Arg(3); - reg[live] = map; erts_garbage_collect(p, need, reg, live+1); map = reg[live]; old_mp = (flatmap_t *)flatmap_val(map); diff --git a/erts/emulator/beam/beam_load.c b/erts/emulator/beam/beam_load.c index dcd312f54f..9ff32e30f3 100644 --- a/erts/emulator/beam/beam_load.c +++ b/erts/emulator/beam/beam_load.c @@ -2374,7 +2374,8 @@ load_code(LoaderState* stp) break; } break; - case 'd': /* Destination (x(0), x(N), y(N) */ + case 'd': /* Destination (x(N), y(N) */ + case 'S': /* Source (x(N), y(N)) */ switch (tag) { case TAG_x: code[ci++] = tmp_op->a[arg].val * sizeof(Eterm); @@ -2388,11 +2389,29 @@ load_code(LoaderState* stp) break; } break; - case 'I': /* Untagged integer (or pointer). */ - VerifyTag(stp, tag, TAG_u); - code[ci++] = tmp_op->a[arg].val; - break; - case 't': /* Small untagged integer -- can be packed. */ + case 't': /* Small untagged integer (16 bits) -- can be packed. */ + case 'I': /* Untagged integer (32 bits) -- can be packed. */ + case 'W': /* Untagged integer or pointer (machine word). */ +#ifdef DEBUG + switch (*sign) { + case 't': + if (tmp_op->a[arg].val >> 16 != 0) { + load_printf(__LINE__, stp, "value %lu of type 't' does not fit in 16 bits", + tmp_op->a[arg].val); + ASSERT(0); + } + break; +#ifdef ARCH_64 + case 'I': + if (tmp_op->a[arg].val >> 32 != 0) { + load_printf(__LINE__, stp, "value %lu of type 'I' does not fit in 32 bits", + tmp_op->a[arg].val); + ASSERT(0); + } + break; +#endif + } +#endif VerifyTag(stp, tag, TAG_u); code[ci++] = tmp_op->a[arg].val; break; @@ -2477,16 +2496,32 @@ load_code(LoaderState* stp) * The packing engine. */ if (opc[stp->specific_op].pack[0]) { - char* prog; /* Program for packing engine. */ - BeamInstr stack[8]; /* Stack. */ - BeamInstr* sp = stack; /* Points to next free position. */ - BeamInstr packed = 0; /* Accumulator for packed operations. */ + char* prog; /* Program for packing engine. */ + struct pack_stack { + BeamInstr instr; + LiteralPatch* patch; + } stack[8]; /* Stack. */ + struct pack_stack* sp = stack; /* Points to next free position. */ + BeamInstr packed = 0; /* Accumulator for packed operations. */ for (prog = opc[stp->specific_op].pack; *prog; prog++) { switch (*prog) { case 'g': /* Get instruction; push on stack. */ - *sp++ = code[--ci]; - break; + { + LiteralPatch* lp; + + ci--; + sp->instr = code[ci]; + sp->patch = 0; + for (lp = stp->literal_patches; lp && lp->pos > ci-MAX_OPARGS; lp = lp->next) { + if (lp->pos == ci) { + sp->patch = lp; + break; + } + } + sp++; + } + break; case 'i': /* Initialize packing accumulator. */ packed = code[--ci]; break; @@ -2502,10 +2537,17 @@ load_code(LoaderState* stp) break; #endif case 'p': /* Put instruction (from stack). */ - code[ci++] = *--sp; + --sp; + code[ci] = sp->instr; + if (sp->patch) { + sp->patch->pos = ci; + } + ci++; break; case 'P': /* Put packed operands. */ - *sp++ = packed; + sp->instr = packed; + sp->patch = 0; + sp++; packed = 0; break; default: @@ -2627,8 +2669,8 @@ load_code(LoaderState* stp) /* Remember offset for the on_load function. */ stp->on_load = ci; break; - case op_bs_put_string_II: - case op_i_bs_match_string_xfII: + case op_bs_put_string_WW: + case op_i_bs_match_string_xfWW: new_string_patch(stp, ci-1); break; @@ -2884,6 +2926,7 @@ gen_element(LoaderState* stp, GenOpArg Fail, GenOpArg Index, op->next = NULL; if (Index.type == TAG_i && Index.val > 0 && + Index.val <= ERTS_MAX_TUPLE_SIZE && (Tuple.type == TAG_x || Tuple.type == TAG_y)) { op->op = genop_i_fast_element_4; op->a[0] = Tuple; @@ -3420,7 +3463,7 @@ gen_literal_timeout(LoaderState* stp, GenOpArg Fail, GenOpArg Time) Sint timeout; NEW_GENOP(stp, op); - op->op = genop_wait_timeout_unlocked_2; + op->op = genop_wait_timeout_unlocked_int_2; op->next = NULL; op->arity = 2; op->a[0].type = TAG_u; @@ -3467,12 +3510,12 @@ gen_literal_timeout_locked(LoaderState* stp, GenOpArg Fail, GenOpArg Time) Sint timeout; NEW_GENOP(stp, op); - op->op = genop_wait_timeout_locked_2; + op->op = genop_wait_timeout_locked_int_2; op->next = NULL; op->arity = 2; - op->a[0] = Fail; - op->a[1].type = TAG_u; - + op->a[0].type = TAG_u; + op->a[1] = Fail; + if (Time.type == TAG_i && (timeout = Time.val) >= 0 && #if defined(ARCH_64) (timeout >> 32) == 0 @@ -3480,7 +3523,7 @@ gen_literal_timeout_locked(LoaderState* stp, GenOpArg Fail, GenOpArg Time) 1 #endif ) { - op->a[1].val = timeout; + op->a[0].val = timeout; #if !defined(ARCH_64) } else if (Time.type == TAG_q) { Eterm big; @@ -3494,7 +3537,7 @@ gen_literal_timeout_locked(LoaderState* stp, GenOpArg Fail, GenOpArg Time) } else { Uint u; (void) term_to_Uint(big, &u); - op->a[1].val = (BeamInstr) u; + op->a[0].val = (BeamInstr) u; } #endif } else { diff --git a/erts/emulator/beam/instrs.tab b/erts/emulator/beam/instrs.tab index d45da62d03..1af01e53bd 100644 --- a/erts/emulator/beam/instrs.tab +++ b/erts/emulator/beam/instrs.tab @@ -814,6 +814,7 @@ is_ge(Fail, X, Y) { badarg(Fail) { $BADARG($Fail); + //| -no_next; } badmatch(Src) { diff --git a/erts/emulator/beam/map_instrs.tab b/erts/emulator/beam/map_instrs.tab index 30c3d7743f..bbb2f49b66 100644 --- a/erts/emulator/beam/map_instrs.tab +++ b/erts/emulator/beam/map_instrs.tab @@ -31,22 +31,24 @@ new_map(Dst, Live, N) { Eterm res; HEAVY_SWAPOUT; - res = new_map(c_p, reg, I-1); + res = new_map(c_p, reg, $Live, $N, $NEXT_INSTRUCTION); HEAVY_SWAPIN; $REFRESH_GEN_DEST(); $Dst = res; $NEXT($NEXT_INSTRUCTION+$N); } -i_new_small_map_lit(Dst, Live, Literal) { +i_new_small_map_lit(Dst, Live, Keys) { Eterm res; Uint n; + Eterm keys = $Keys; HEAVY_SWAPOUT; - res = new_small_map_lit(c_p, reg, &n, I-1); + res = new_small_map_lit(c_p, reg, keys, $Live, $NEXT_INSTRUCTION); HEAVY_SWAPIN; $REFRESH_GEN_DEST(); $Dst = res; + n = arityval(*tuple_val(keys)); $NEXT($NEXT_INSTRUCTION+n); } @@ -127,11 +129,11 @@ i_get_map_elements(Fail, Src, N) { update_map_assoc(Src, Dst, Live, N) { Eterm res; - Eterm map; + Uint live = $Live; - map = $Src; + reg[live] = $Src; HEAVY_SWAPOUT; - res = update_map_assoc(c_p, reg, map, I); + res = update_map_assoc(c_p, reg, live, $N, $NEXT_INSTRUCTION); HEAVY_SWAPIN; ASSERT(is_value(res)); $REFRESH_GEN_DEST(); @@ -141,11 +143,11 @@ update_map_assoc(Src, Dst, Live, N) { update_map_exact(Fail, Src, Dst, Live, N) { Eterm res; - Eterm map; + Uint live = $Live; - map = $Src; + reg[live] = $Src; HEAVY_SWAPOUT; - res = update_map_exact(c_p, reg, map, I); + res = update_map_exact(c_p, reg, live, $N, $NEXT_INSTRUCTION); HEAVY_SWAPIN; if (is_value(res)) { $REFRESH_GEN_DEST(); diff --git a/erts/emulator/beam/ops.tab b/erts/emulator/beam/ops.tab index 92e67bb470..b6e995fdbe 100644 --- a/erts/emulator/beam/ops.tab +++ b/erts/emulator/beam/ops.tab @@ -108,7 +108,7 @@ allocate_zero t t allocate_heap_zero t I t trim N Remaining => i_trim N -i_trim I +i_trim t test_heap I t @@ -167,7 +167,7 @@ i_select_tuple_arity2 xy f A A f f i_jump_on_val_zero xy f I -i_jump_on_val xy f I I +i_jump_on_val xy f I W get_list xy xy xy @@ -192,7 +192,7 @@ try_case_end s # Destructive set tuple element -set_tuple_element s d P +set_tuple_element s S P # Get tuple element @@ -382,8 +382,8 @@ label L | wait_timeout Fail Src | smp_already_locked(L) => \ label L | wait_timeout_locked Src Fail wait_timeout Fail Src => wait_timeout_unlocked Src Fail -wait_timeout_unlocked Fail Src=aiq => gen_literal_timeout(Fail, Src) -wait_timeout_locked Fail Src=aiq => gen_literal_timeout_locked(Fail, Src) +wait_timeout_unlocked Src=aiq Fail => gen_literal_timeout(Fail, Src) +wait_timeout_locked Src=aiq Fail => gen_literal_timeout_locked(Fail, Src) label L | wait Fail | smp_already_locked(L) => label L | wait_locked Fail wait Fail => wait_unlocked Fail @@ -398,6 +398,7 @@ loop_rec_end f wait_locked f wait_unlocked f +# Note that a timeout value must fit in 32 bits. wait_timeout_unlocked_int I f wait_timeout_unlocked s f wait_timeout_locked_int I f @@ -589,7 +590,7 @@ is_integer Fail Literal=q => move Literal x | is_integer Fail x is_integer Fail=f S=x | allocate Need Regs => is_integer_allocate Fail S Need Regs -is_integer_allocate f x I I +is_integer_allocate f x t t is_integer f xy @@ -608,7 +609,7 @@ is_nonempty_list F=f x==0 | test_heap I1 I2 => is_nonempty_list_test_heap F I1 I is_nonempty_list Fail=f S=x | get_list S D1=x D2=x => \ is_nonempty_list_get_list Fail S D1 D2 -is_nonempty_list_allocate f rx I t +is_nonempty_list_allocate f rx t t is_nonempty_list_test_heap f I t is_nonempty_list_get_list f rx x x is_nonempty_list f xy @@ -684,7 +685,7 @@ is_function2 f s s allocate Need Regs | init Y => allocate_init Need Regs Y init Y1 | init Y2 => init2 Y1 Y2 -allocate_init t I y +allocate_init t t y ################################################################# # External function and bif calls. @@ -970,6 +971,8 @@ node x node y %hot +# Note: 'I' is sufficient because this instruction will only be used +# if the arity fits in 24 bits. i_fast_element xy j I d i_element xy j s d @@ -1003,15 +1006,15 @@ call_last Ar Func D => i_call_last Func D call_only Ar Func => i_call_only Func i_call f -i_call_last f P +i_call_last f Q i_call_only f i_call_ext e -i_call_ext_last e P +i_call_ext_last e Q i_call_ext_only e i_move_call_ext c e -i_move_call_ext_last e P c +i_move_call_ext_last e Q c i_move_call_ext_only e c # Fun calls. @@ -1019,13 +1022,13 @@ i_move_call_ext_only e c call_fun Arity | deallocate D | return => i_call_fun_last Arity D call_fun Arity => i_call_fun Arity -i_call_fun I -i_call_fun_last I P +i_call_fun t +i_call_fun_last t Q make_fun2 OldIndex=u => gen_make_fun2(OldIndex) %cold -i_make_fun I t +i_make_fun W t %hot is_function f xy @@ -1051,14 +1054,14 @@ i_bs_restore2 x I # Matching integers bs_match_string Fail Ms Bits Val => i_bs_match_string Ms Fail Bits Val -i_bs_match_string x f I I +i_bs_match_string x f W W # Fetching integers from binaries. bs_get_integer2 Fail=f Ms=x Live=u Sz=sq Unit=u Flags=u Dst=d => \ gen_get_integer2(Fail, Ms, Live, Sz, Unit, Flags, Dst) -i_bs_get_integer_small_imm x I f I x -i_bs_get_integer_imm x I I f I x +i_bs_get_integer_small_imm x W f t x +i_bs_get_integer_imm x W t f t x i_bs_get_integer f I I s s x i_bs_get_integer_8 x f x i_bs_get_integer_16 x f x @@ -1071,7 +1074,7 @@ i_bs_get_integer_32 x f x bs_get_binary2 Fail=f Ms=x Live=u Sz=sq Unit=u Flags=u Dst=d => \ gen_get_binary2(Fail, Ms, Live, Sz, Unit, Flags, Dst) -i_bs_get_binary_imm2 f x I I I x +i_bs_get_binary_imm2 f x t W t x i_bs_get_binary2 f x I s I x i_bs_get_binary_all2 f x I I x i_bs_get_binary_all_reuse x f I @@ -1089,14 +1092,14 @@ i_bs_get_float2 f x I s I x bs_skip_bits2 Fail=f Ms=x Sz=sq Unit=u Flags=u => \ gen_skip_bits2(Fail, Ms, Sz, Unit, Flags) -i_bs_skip_bits_imm2 f x I +i_bs_skip_bits_imm2 f x W i_bs_skip_bits2 f x xy I i_bs_skip_bits_all2 f x I bs_test_tail2 Fail=f Ms=x Bits=u==0 => bs_test_zero_tail2 Fail Ms bs_test_tail2 Fail=f Ms=x Bits=u => bs_test_tail_imm2 Fail Ms Bits bs_test_zero_tail2 f x -bs_test_tail_imm2 f x I +bs_test_tail_imm2 f x W bs_test_unit F Ms Unit=u==8 => bs_test_unit8 F Ms bs_test_unit f x I @@ -1149,13 +1152,13 @@ bs_init2 Fail Sz Words=u==0 Regs Flags Dst => \ bs_init2 Fail Sz Words Regs Flags Dst => \ i_bs_init_fail_heap Sz Words Fail Regs Dst -i_bs_init_fail xy j I x +i_bs_init_fail xy j t x -i_bs_init_fail_heap s I j I x +i_bs_init_fail_heap s I j t x -i_bs_init I I x +i_bs_init W t x -i_bs_init_heap I I I x +i_bs_init_heap W I t x bs_init_bits Fail Sz=o Words Regs Flags Dst => system_limit Fail @@ -1168,16 +1171,16 @@ bs_init_bits Fail Sz Words=u==0 Regs Flags Dst => \ bs_init_bits Fail Sz Words Regs Flags Dst => \ i_bs_init_bits_fail_heap Sz Words Fail Regs Dst -i_bs_init_bits_fail xy j I x +i_bs_init_bits_fail xy j t x -i_bs_init_bits_fail_heap s I j I x +i_bs_init_bits_fail_heap s I j t x -i_bs_init_bits I I x -i_bs_init_bits_heap I I I x +i_bs_init_bits W t x +i_bs_init_bits_heap W I t x bs_add Fail S1=i==0 S2 Unit=u==1 D => move S2 D -bs_add j s s I x +bs_add j s s t x bs_append Fail Size Extra Live Unit Bin Flags Dst => \ move Bin x | i_bs_append Fail Extra Live Unit Size Dst @@ -1187,8 +1190,8 @@ bs_private_append Fail Size Unit Bin Flags Dst => \ bs_init_writable -i_bs_append j I I I s x -i_bs_private_append j I s s x +i_bs_append j I t t s x +i_bs_private_append j t s s x # # Storing integers into binaries. @@ -1197,8 +1200,8 @@ i_bs_private_append j I s s x bs_put_integer Fail=j Sz=sq Unit=u Flags=u Src=s => \ gen_put_integer(Fail, Sz, Unit, Flags, Src) -i_new_bs_put_integer j s I s -i_new_bs_put_integer_imm j I I s +i_new_bs_put_integer j s t s +i_new_bs_put_integer_imm j W t s # # Utf8/utf16/utf32 support. (R12B-5) @@ -1216,7 +1219,7 @@ bs_put_utf8 Fail u Src=s => i_bs_put_utf8 Fail Src i_bs_put_utf8 j s -bs_put_utf16 j I s +bs_put_utf16 j t s bs_put_utf32 Fail=j Flags=u Src=s => \ i_bs_validate_unicode Fail Src | bs_put_integer Fail i=32 u=1 Flags Src @@ -1231,8 +1234,8 @@ bs_put_float Fail Sz=q Unit Flags Val => badarg Fail bs_put_float Fail=j Sz=s Unit=u Flags=u Src=s => \ gen_put_float(Fail, Sz, Unit, Flags, Src) -i_new_bs_put_float j s I s -i_new_bs_put_float_imm j I I s +i_new_bs_put_float j s t s +i_new_bs_put_float_imm j W t s # # Storing binaries into binaries. @@ -1241,9 +1244,9 @@ i_new_bs_put_float_imm j I I s bs_put_binary Fail=j Sz=s Unit=u Flags=u Src=s => \ gen_put_binary(Fail, Sz, Unit, Flags, Src) -i_new_bs_put_binary j s I s -i_new_bs_put_binary_imm j I s -i_new_bs_put_binary_all j s I +i_new_bs_put_binary j s t s +i_new_bs_put_binary_imm j W s +i_new_bs_put_binary_all j s t # # Warning: The i_bs_put_string and i_new_bs_put_string instructions @@ -1251,7 +1254,7 @@ i_new_bs_put_binary_all j s I # Don't change the instruction format unless you change the loader too. # -bs_put_string I I +bs_put_string W W # # New floating point instructions (R8). @@ -1269,9 +1272,9 @@ fmove Arg=l Dst=d => fstore Arg Dst fmove Arg=dq Dst=l => fload Arg Dst fstore l d -fload dq l +fload Sq l -fconv d l +fconv S l i_fadd l l l i_fsub l l l @@ -1295,8 +1298,8 @@ fclearerror # New apply instructions in R10B. # -apply I -apply_last I P +apply t +apply_last t Q # # Handle compatibility with OTP 17 here. @@ -1354,10 +1357,10 @@ sorted_put_map_exact F Src Dst Live Size Rest=* => \ new_map Dst Live Size Rest=* | is_small_map_literal_keys(Size, Rest) => \ gen_new_small_map_lit(Dst, Live, Size, Rest) -new_map d I I -i_new_small_map_lit d I q -update_map_assoc s d I I -update_map_exact j s d I I +new_map d t I +i_new_small_map_lit d t q +update_map_assoc s d t I +update_map_exact j s d t I is_map Fail Lit=q | literal_is_map(Lit) => is_map Fail cq => jump Fail @@ -1449,32 +1452,32 @@ gc_bif2 Fail Live u$bif:erlang:bxor/2 S1 S2 Dst => \ gc_bif1 Fail I u$bif:erlang:bnot/1 Src Dst=d => i_int_bnot Fail Src I Dst -i_increment rxy I I d +i_increment rxy W t d -i_plus x xy j I d -i_plus s s j I d +i_plus x xy j t d +i_plus s s j t d -i_minus x x j I d -i_minus s s j I d +i_minus x x j t d +i_minus s s j t d -i_times j I s s d +i_times j t s s d -i_m_div j I s s d -i_int_div j I s s d +i_m_div j t s s d +i_int_div j t s s d -i_rem x x j I d -i_rem s s j I d +i_rem x x j t d +i_rem s s j t d -i_bsl s s j I d -i_bsr s s j I d +i_bsl s s j t d +i_bsr s s j t d -i_band x c j I d -i_band s s j I d +i_band x c j t d +i_band s s j t d i_bor j I s s d i_bxor j I s s d -i_int_bnot j s I d +i_int_bnot j s t d # # Old guard BIFs that creates heap fragments are no longer allowed. @@ -1498,9 +1501,9 @@ gc_bif2 Fail I Bif S1 S2 Dst => \ gc_bif3 Fail I Bif S1 S2 S3 Dst => \ gen_guard_bif3(Fail, I, Bif, S1, S2, S3, Dst) -i_gc_bif1 j I s I d +i_gc_bif1 j W s t d -i_gc_bif2 j I I s s d +i_gc_bif2 j W t s s d ii_gc_bif3/7 @@ -1509,7 +1512,7 @@ ii_gc_bif3/7 ii_gc_bif3 Fail Bif Live S1 S2 S3 Dst => \ move S1 x | i_gc_bif3 Fail Bif Live S2 S3 Dst -i_gc_bif3 j I I s s d +i_gc_bif3 j W t s s d # # The following instruction is specially handled in beam_load.c diff --git a/erts/emulator/test/tuple_SUITE.erl b/erts/emulator/test/tuple_SUITE.erl index 79b681b4d1..baf41180e0 100644 --- a/erts/emulator/test/tuple_SUITE.erl +++ b/erts/emulator/test/tuple_SUITE.erl @@ -134,6 +134,13 @@ t_element(Config) when is_list(Config) -> {'EXIT', {badarg, _}} = (catch element(1, id(42))), {'EXIT', {badarg, _}} = (catch element(id(1.5), id({a,b}))), + %% Make sure that the loader does not reject the module when + %% huge literal index values are used. + {'EXIT', {badarg, _}} = (catch element((1 bsl 24)-1, id({a,b,c}))), + {'EXIT', {badarg, _}} = (catch element(1 bsl 24, id({a,b,c}))), + {'EXIT', {badarg, _}} = (catch element(1 bsl 32, id({a,b,c}))), + {'EXIT', {badarg, _}} = (catch element(1 bsl 64, id({a,b,c}))), + ok. get_elements([Element|Rest], Tuple, Pos) -> diff --git a/erts/emulator/utils/beam_makeops b/erts/emulator/utils/beam_makeops index da69b13e87..6c54ab3421 100755 --- a/erts/emulator/utils/beam_makeops +++ b/erts/emulator/utils/beam_makeops @@ -54,11 +54,6 @@ $pack_mask[4] = ['BEAM_LOOSE_MASK', # Only for 64 bit wordsize 'BEAM_LOOSE_MASK', $WHOLE_WORD]; -# Mapping from packagable arguments to number of packed arguments per -# word. Initialized after the wordsize is known. - -my @args_per_word; - # There are two types of instructions: generic and specific. # The generic instructions are those generated by the Beam compiler. # Corresponding to each generic instruction, there is generally a @@ -97,10 +92,9 @@ my %c_code_used; # Used or not. # Definitions for instructions combined from micro instructions. my %combined_instrs; -my %combined_code; # Combined micro instructions. -my %hot_code; -my %cold_code; +my @generated_code; # Generated code. +my %sort_order; my @unnumbered_generic; my %unnumbered; @@ -144,13 +138,15 @@ my %arg_size = ('r' => 0, # x(0) - x register zero 'n' => 0, # NIL (implicit) 'c' => 1, # tagged constant (integer, atom, nil) 's' => 1, # tagged source; any of the above + 'S' => 1, # tagged source register (x or y) 'd' => 1, # tagged destination register (r, x, y) 'f' => 1, # failure label 'j' => 1, # either 'f' or 'p' 'e' => 1, # pointer to export entry 'L' => 0, # label - 'I' => 1, # untagged integer - 't' => 1, # untagged integer -- can be packed + 't' => 1, # untagged integer (12 bits) -- can be packed + 'I' => 1, # untagged integer (32 bits) -- can be packed + 'W' => 1, # untagged integer/pointer (one word) 'b' => 1, # pointer to bif 'A' => 1, # arity value 'P' => 1, # byte offset into tuple or stack @@ -192,16 +188,16 @@ sub define_type_bit { define_type_bit('s', $type_bit{'d'} | $type_bit{'i'} | $type_bit{'a'} | $type_bit{'n'} | $type_bit{'q'}); + define_type_bit('S', $type_bit{'d'}); define_type_bit('j', $type_bit{'f'} | $type_bit{'p'}); # Aliases (for matching purposes). - define_type_bit('I', $type_bit{'u'}); define_type_bit('t', $type_bit{'u'}); + define_type_bit('I', $type_bit{'u'}); + define_type_bit('W', $type_bit{'u'}); define_type_bit('A', $type_bit{'u'}); define_type_bit('L', $type_bit{'u'}); define_type_bit('b', $type_bit{'u'}); - define_type_bit('N', $type_bit{'u'}); - define_type_bit('U', $type_bit{'u'}); define_type_bit('e', $type_bit{'u'}); define_type_bit('P', $type_bit{'u'}); define_type_bit('Q', $type_bit{'u'}); @@ -228,6 +224,12 @@ $match_engine_ops{'TOP_fail'} = 1; sanity("tag '$tag': primitive tags must be named with lowercase letters") unless $tag =~ /^[a-z]$/; } + + foreach my $tag (keys %arg_size) { + defined $type_bit{$tag} or + sanity("the tag '$tag' has a size in %arg_size, " . + "but has no defined bit pattern"); + } } # @@ -258,15 +260,8 @@ if ($wordsize == 32) { # Initialize number of arguments per packed word. # -$args_per_word[2] = 2; -$args_per_word[3] = 3; -$args_per_word[4] = 2; -$args_per_word[5] = 3; -$args_per_word[6] = 3; - if ($wordsize == 64) { $pack_mask[3] = ['BEAM_TIGHT_MASK', 'BEAM_TIGHT_MASK', $WHOLE_WORD]; - $args_per_word[4] = 4; } # @@ -278,11 +273,6 @@ my $c_code_block; my $c_code_loc; my @c_args; -sub save_c_code { - my($name,$block,$loc,@args) = @_; - -} - while (<>) { my($op_num); if ($in_c_code) { @@ -393,7 +383,7 @@ while (<>) { # micro instructions. # if (/^(\w+)\s*:=\s*([\w.]+)\s*;\s*$/) { - $combined_instrs{$1} = ["$ARGV($.)","beam_instrs.h",$2]; + $combined_instrs{$1} = ["$ARGV($.)",$2]; next; } @@ -589,17 +579,14 @@ sub emulator_output { # for the emulator. # my($size, $code, $pack) = - basic_generator($name, $hot, '', 0, undef, @args); + basic_generator($name, 1, '', 0, undef, @args); # # Save the generated $code for later. # if (defined $code) { - if ($hot) { - push(@{$hot_code{$code}}, $instr); - } else { - push(@{$cold_code{$code}}, $instr); - } + $code = "OpCase($instr):\n$code"; + push @generated_code, [$hot,$code,($instr)]; } # @@ -710,7 +697,7 @@ sub emulator_output { print "#if !defined(ARCH_64)\n"; print qq[ #error "64-bit architecture assumed, but ARCH_64 not defined"\n]; print "#endif\n"; - print "#define BEAM_WIDE_MASK 0xFFFFUL\n"; + print "#define BEAM_WIDE_MASK 0xFFFFFFFFUL\n"; print "#define BEAM_LOOSE_MASK 0xFFFFUL\n"; print "#define BEAM_TIGHT_MASK 0xFFFFUL\n"; print "#define BEAM_WIDE_SHIFT 32\n"; @@ -814,19 +801,12 @@ sub emulator_output { $name = "$outdir/beam_hot.h"; open(STDOUT, ">$name") || die "Failed to open $name for writing: $!\n"; comment('C'); - print_code(\%hot_code); + print_code(1); $name = "$outdir/beam_cold.h"; open(STDOUT, ">$name") || die "Failed to open $name for writing: $!\n"; comment('C'); - print_code(\%cold_code); - - foreach my $key (keys %combined_code) { - my $name = "$outdir/$key"; - open(STDOUT, ">$name") || die "Failed to open $name for writing: $!\n"; - comment('C'); - print_indented_code(@{$combined_code{$key}}); - } + print_code(0); } sub init_item { @@ -854,19 +834,14 @@ sub q { } sub print_code { - my($ref) = @_; - my(%sorted); - my($key, $label); # Loop variables. - - foreach $key (keys %$ref) { - my($sort_key); - my($code) = ''; - foreach $label (@{$ref->{$key}}) { - $code .= "OpCase($label):\n"; - $sort_key = $label; - } - $code .= "$key\n"; - $sorted{$sort_key} = $code; + my($include_hot) = @_; + my %sorted; + + foreach my $ref (@generated_code) { + my($hot,$code,@labels) = @$ref; + next unless $hot == $include_hot; + my($sort_key) = @labels; # Use the first label as sort key. + $sorted{$sort_key} = $code; } foreach (sort keys %sorted) { @@ -1043,12 +1018,11 @@ sub comment { # sub combine_micro_instructions { my %groups; - my %group_file; # Sanity check, normalize micro instructions. foreach my $instr (keys %combined_instrs) { my $ref = $combined_instrs{$instr}; - my($def_loc,$outfile,$def) = @$ref; + my($def_loc,$def) = @$ref; my($group,@subs) = split /[.]/, $def; my $arity = 0; @subs = map { "$group.$_" } @subs; @@ -1061,14 +1035,12 @@ sub combine_micro_instructions { $arity += scalar(@c_args); } push @{$groups{$group}}, [$instr,$arity,@subs]; - $group_file{$group} = $outfile; } # Now generate code for each group. foreach my $group (sort keys %groups) { - my $code = combine_instruction_group($group, @{$groups{$group}}); - my $outfile = $group_file{$group}; - push @{$combined_code{$outfile}}, $code; + my($code,@labels) = combine_instruction_group($group, @{$groups{$group}}); + push @generated_code, [1,$code,@labels]; } } @@ -1160,6 +1132,7 @@ sub combine_instruction_group { # Now generate the code for the entire group. my $offset = 0; + my @opcase_labels; for(my $i = 0; $i < @slots; $i++) { my $key = $slots[$i]; @@ -1183,6 +1156,7 @@ sub combine_instruction_group { if ($opcase ne '') { $gcode .= "OpCase($opcase):\n"; + push @opcase_labels, $opcase; } if ($num_references{$label}) { $gcode .= "$label:\n"; @@ -1208,7 +1182,7 @@ sub combine_instruction_group { $offset = $order_to_offset{$slots[$i+1]} if $i < $#slots; } - "{\n$gcode\n}\n\n"; + ("{\n$gcode\n}\n\n",@opcase_labels); } sub micro_label { @@ -1239,7 +1213,7 @@ sub basic_generator { my $c_code_ref = $c_code{$name}; if ($hot and defined $c_code_ref) { - ($prefix, $pack_spec, @args) = do_pack(@args); + ($var_decls, $pack_spec, @args) = do_pack(@args); } # @@ -1253,7 +1227,14 @@ sub basic_generator { my($this_size) = $arg_size{$_}; SWITCH: { - /^pack:(\d):(.*)/ and do { + /^packed:d:(\d):(.*)/ and do { + $var_decls .= "Eterm dst = $2;\n" . + "Eterm* dst_ptr = REG_TARGET_PTR(dst);\n"; + push(@f, "*dst_ptr"); + $this_size = $1; + last SWITCH; + }; + /^packed:[a-zA-z]:(\d):(.*)/ and do { push(@f, $2); $this_size = $1; last SWITCH; @@ -1262,8 +1243,8 @@ sub basic_generator { push(@f, "r(0)"); last SWITCH; }; - /[lxy]/ and do { - push(@f, $_ . "b(Arg($arg_offset))"); + /[lxyS]/ and do { + push(@f, $_ . "b(" . arg_offset($arg_offset) . ")"); last SWITCH; }; /n/ and do { @@ -1280,13 +1261,13 @@ sub basic_generator { last SWITCH; }; /d/ and do { - $var_decls .= "Eterm dst = Arg($arg_offset);\n" . + $var_decls .= "Eterm dst = " . arg_offset($arg_offset) . ";\n" . "Eterm* dst_ptr = REG_TARGET_PTR(dst);\n"; push(@f, "*dst_ptr"); last SWITCH; }; defined $arg_size{$_} and do { - push(@f, "Arg($arg_offset)"); + push @f, arg_offset($arg_offset); last SWITCH; }; @@ -1368,6 +1349,11 @@ sub basic_generator { ($size+1, $code, $pack_spec); } +sub arg_offset { + my $offset = shift; + "I[" . ($offset+1) . "]"; +} + sub expand_all { my($code,$bindings_ref) = @_; my %bindings = %{$bindings_ref}; @@ -1457,123 +1443,189 @@ sub expand_macro { sub do_pack { my(@args) = @_; my($packable_args) = 0; - my @is_packable; # Packability (boolean) for each argument. - my $wide_packing = 0; - my(@orig_args) = @args; + my @bits_needed; # Bits needed for each argument. + + # + # Define the minimum number of bits needed for the packable argument types. + # + my %bits_needed = ('x' => 10, + 'y' => 10, + 'Q' => 10, + 'l' => 10, + 'S' => 16, + 'd' => 16, + 't' => 16); + if ($wordsize == 64) { + $bits_needed{'I'} = 32; + } # - # Count the number of packable arguments. If we encounter any 's' or 'd' - # arguments, packing is not possible. + # Count the number of packable arguments. # - my $packable_types = "xytQ"; foreach my $arg (@args) { - if ($arg =~ /^[$packable_types]/) { + if (defined $bits_needed{$arg}) { $packable_args++; - push @is_packable, 1; - } elsif ($arg =~ /^I/ and $wordsize == 64 and $packable_args < 2) { - $wide_packing = 1; - push @is_packable, 1; - if (++$packable_args == 2) { - # We can only pack two arguments. Turn off packing - # for the rest of the arguments. - $packable_types = "\xFF"; - } - } elsif ($arg =~ /^[sd]/) { - return ('', '', @args); - } elsif ($arg =~ /^[scq]/ and $packable_args > 0) { - # When packing, this operand will be picked up from the - # code array, put onto the packing stack, and later put - # back into a different location in the code. The problem - # is that if this operand is a literal, the original - # location in the code would have been remembered in a - # literal patch. For packing to work, we would have to - # adjust the position in the literal patch. For the - # moment, adding additional instructions to the packing - # engine to handle this does not seem worth it, so we will - # just turn off packing. - return ('', '', @args); + push @bits_needed, $bits_needed{$arg}; } else { - push @is_packable, 0; + push @bits_needed, 0; } } # - # Get out of here if too few or too many arguments. + # Nothing to pack unless there are at least 2 packable arguments. # return ('', '', @args) if $packable_args < 2; - my($size) = 0; - my($pack_prefix) = ''; - my($down) = ''; # Pack commands (towards instruction + # + # Determine how many arguments we should pack into each word. + # + my @args_per_word; + my @need_wide_mask; + my $bits = 0; + my $word = 0; + $args_per_word[0] = 0; + $need_wide_mask[0] = 0; + for (my $i = 0; $i < @args; $i++) { + if ($bits_needed[$i]) { + my $needed = $bits_needed[$i]; + + my $next_word = sub { + $word++; + $args_per_word[$word] = 0; + $need_wide_mask[$word] = 0; + $bits = 0; + }; + + if ($bits+$needed > $wordsize) { # Does not fit. + $next_word->(); + } + if ($args_per_word[$word] == 4) { # Can't handle more than 4 args. + $next_word->(); + } + if ($needed == 32 and $args_per_word[$word] > 1) { + # Must only pack two arguments in this word, and there + # are already at least two arguments here. + $next_word->(); + } + $args_per_word[$word]++; + $bits += $needed; + if ($needed == 32) { + $need_wide_mask[$word]++; + } + if ($need_wide_mask[$word] and $bits > 32) { + # Can only pack two things in a word where one + # item is 32 bits. Force the next item into + # the next word. + $bits = $wordsize; + } + } + } + + # + # Try to balance packing between words. + # + if ($args_per_word[$#args_per_word] == 1) { + if ($args_per_word[$#args_per_word-1] < 3) { + pop @args_per_word; + } else { + $args_per_word[$#args_per_word-1]--; + $args_per_word[$#args_per_word]++; + } + } elsif (@args_per_word == 2 and + $args_per_word[0] == 4 and + $args_per_word[1] == 2) { + $args_per_word[0] = 3; + $args_per_word[1] = 3; + } elsif (@args_per_word == 2 and + $args_per_word[0] == 3 and + $args_per_word[1] == 1) { + $args_per_word[0] = 2; + $args_per_word[1] = 2; + } + + my $size = 0; + my $pack_prefix = ''; + my $down = ''; # Pack commands (towards instruction # beginning). - my($up) = ''; # Pack commands (storing back while + my $up = ''; # Pack commands (storing back while # moving forward). + my $did_some_packing = 0; # Nothing packed yet. - my $args_per_word = $args_per_word[$packable_args]; - my @shift; - my @mask; - my @instr; + # Skip an unpackable argument. + my $skip_unpackable = sub { + my($arg) = @_; - if ($wide_packing) { - @shift = ('0', 'BEAM_WIDE_SHIFT'); - @mask = ('BEAM_WIDE_MASK', $WHOLE_WORD); - @instr = ('w', 'i'); - } else { - @shift = @{$pack_shift[$args_per_word]}; - @mask = @{$pack_mask[$args_per_word]}; - @instr = @{$pack_instr[$args_per_word]}; - } + if ($arg_size{$arg} and $did_some_packing) { + # Save the argument on the pack engine's stack. + $down = "g${down}"; + $up = "${up}p"; + } else { + # The argument has either zero size (e.g. r(0)), + # or is to the left of the first packed argument + # and will never be accessed. No need to do + # anything. + } + }; # # Now generate the packing instructions. One complication is that # the packing engine works from right-to-left, but we must generate # the instructions from left-to-right because we must calculate # instruction sizes from left-to-right. - # - # XXX Packing 3 't's in one word won't work. Sorry. - my $did_some_packing = 0; # Nothing packed yet. - my($ap) = 0; # Argument number within word. - my($tmpnum) = 1; # Number of temporary variable. - my($expr) = ''; - for (my $i = 0; $i < @args; $i++) { - my($reg) = $args[$i]; - my($this_size) = $arg_size{$reg}; - if ($is_packable[$i]) { - $this_size = 0; - $did_some_packing = 1; - - if ($ap == 0) { - $pack_prefix .= "Eterm tmp_packed$tmpnum = Arg($size);\n"; - $up .= "p"; - $down = "P$down"; - $this_size = 1; - } + my $arg_num = 0; + for (my $word = 0; $word < @args_per_word; $word++) { + my $ap = 0; # Argument number within word. + my $packed_var = "tmp_packed" . ($word+1); + my $args_per_word = $args_per_word[$word]; + my @shift; + my @mask; + my @instr; + + if ($need_wide_mask[$word]) { + @shift = ('0', 'BEAM_WIDE_SHIFT'); + @mask = ('BEAM_WIDE_MASK', $WHOLE_WORD); + @instr = ('w', 'i'); + } else { + @shift = @{$pack_shift[$args_per_word]}; + @mask = @{$pack_mask[$args_per_word]}; + @instr = @{$pack_instr[$args_per_word]}; + } - $down = "$instr[$ap]$down"; - my($unpack) = make_unpack($tmpnum, $shift[$ap], $mask[$ap]); - $args[$i] = "pack:$this_size:$reg" . "b($unpack)"; + while ($ap < $args_per_word) { + my $reg = $args[$arg_num]; + my $this_size = $arg_size{$reg}; + if ($bits_needed[$arg_num]) { + $this_size = 0; + $did_some_packing = 1; + + if ($ap == 0) { + $pack_prefix .= "Eterm $packed_var = " . + arg_offset($size) . ";\n"; + $up .= "p"; + $down = "P$down"; + $this_size = 1; + } - if (++$ap == $args_per_word) { - $ap = 0; - $tmpnum++; - } - } elsif ($arg_size{$reg} && $did_some_packing) { - # - # This is an argument that can't be packed. Normally, we must - # save it on the pack engine's stack, unless: - # - # 1. The argument has zero size (e.g. r(0)). Such arguments - # will not be loaded. They disappear. - # 2. If the argument is on the left of the first packed argument, - # the packing engine will never access it (because the engine - # operates from right-to-left). - # + $down = "$instr[$ap]$down"; + my $unpack = make_unpack($packed_var, $shift[$ap], $mask[$ap]); + $args[$arg_num] = "packed:$reg:$this_size:$reg" . "b($unpack)"; - $down = "g${down}"; - $up = "${up}p"; - } - $size += $this_size; + $ap++; + } else { + $skip_unpackable->($reg); + } + $size += $this_size; + $arg_num++; + } + } + + # + # Skip any unpackable arguments at the end. + # + while ($arg_num < @args) { + $skip_unpackable->($args[$arg_num]); + $arg_num++; } my $pack_spec = $down . $up; @@ -1581,9 +1633,9 @@ sub do_pack { } sub make_unpack { - my($tmpnum, $shift, $mask) = @_; + my($packed_var, $shift, $mask) = @_; - my($e) = "tmp_packed$tmpnum"; + my $e = $packed_var; $e = "($e>>$shift)" if $shift; $e .= "&$mask" unless $mask eq $WHOLE_WORD; $e; |