aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBjörn Gustavsson <[email protected]>2017-08-24 10:21:39 +0200
committerBjörn Gustavsson <[email protected]>2017-08-24 10:21:39 +0200
commitfbf740d68600b59dc5fa7bd76d0aa0d019e81a75 (patch)
tree08092ecd6b529989e888bf976d1370a1afacf8d0
parent6c4b60d6b9208bdc5eef3f0f2da220fbce890938 (diff)
parent7b64965d7a22d2250d3c6582a6d1737ca325a8dc (diff)
downloadotp-fbf740d68600b59dc5fa7bd76d0aa0d019e81a75.tar.gz
otp-fbf740d68600b59dc5fa7bd76d0aa0d019e81a75.tar.bz2
otp-fbf740d68600b59dc5fa7bd76d0aa0d019e81a75.zip
Merge branch 'bjorn/erts/improve-beam-ops'
* bjorn/erts/improve-beam-ops: arith_instrs.tab: Clean up bsl/bsr beam_makeops: Stop using the Arg() macro Eliminate the beam_instrs.h file Add the 'S' type for a register source Introduce more packable types Pack cold instructions too Pack instructions using 'q', 'c', and 's' beam_makeops: Rewrite the packer, fixing several bugs Make map update instruction functions indepedent of instruction format beam_makeops: Introduce the new type 'W' (machine word) Use the wait_timeout_{un}locked_int instructions beam_makeops: Remove the unused aliases 'N' and 'U' beam_makeops: Add an additional sanity check beam_makeops: Prevent truncation when packing 'I' values Improve performance for bsl/bsr arith_instrs.tab: Eliminate warning for uninitialized value beam_emu: Remove unused macros beam_makeops: Remove unused subroutine save_c_code Add missing -no_next for badarg instruction
-rw-r--r--erts/emulator/Makefile.in1
-rw-r--r--erts/emulator/beam/arith_instrs.tab80
-rw-r--r--erts/emulator/beam/beam_debug.c23
-rw-r--r--erts/emulator/beam/beam_emu.c71
-rw-r--r--erts/emulator/beam/beam_load.c89
-rw-r--r--erts/emulator/beam/instrs.tab1
-rw-r--r--erts/emulator/beam/map_instrs.tab20
-rw-r--r--erts/emulator/beam/ops.tab135
-rw-r--r--erts/emulator/test/tuple_SUITE.erl7
-rwxr-xr-xerts/emulator/utils/beam_makeops372
10 files changed, 447 insertions, 352 deletions
diff --git a/erts/emulator/Makefile.in b/erts/emulator/Makefile.in
index c6511a4b49..bc7eb72221 100644
--- a/erts/emulator/Makefile.in
+++ b/erts/emulator/Makefile.in
@@ -546,7 +546,6 @@ endif
$(TTF_DIR)/beam_cold.h \
$(TTF_DIR)/beam_hot.h \
-$(TTF_DIR)/beam_instrs.h \
$(TTF_DIR)/beam_opcodes.c \
$(TTF_DIR)/beam_opcodes.h \
$(TTF_DIR)/beam_pred_funcs.h \
diff --git a/erts/emulator/beam/arith_instrs.tab b/erts/emulator/beam/arith_instrs.tab
index 91fe21e161..67cd7c6a2a 100644
--- a/erts/emulator/beam/arith_instrs.tab
+++ b/erts/emulator/beam/arith_instrs.tab
@@ -226,16 +226,12 @@ i_bsr := shift.setup_bsr.execute;
shift.head() {
Eterm Op1, Op2;
Sint shift_left_count;
- Sint ires;
- Eterm* bigp;
- Eterm tmp_big[2];
- Uint BIF;
}
shift.setup_bsr(Src1, Src2) {
Op1 = $Src1;
Op2 = $Src2;
- BIF = BIF_bsr_2;
+ shift_left_count = 0;
if (is_small(Op2)) {
shift_left_count = -signed_val(Op2);
} else if (is_big(Op2)) {
@@ -245,15 +241,13 @@ shift.setup_bsr(Src1, Src2) {
*/
shift_left_count = make_small(bignum_header_is_neg(*big_val(Op2)) ?
MAX_SMALL : MIN_SMALL);
- } else {
- shift_left_count = 0;
}
}
shift.setup_bsl(Src1, Src2) {
Op1 = $Src1;
Op2 = $Src2;
- BIF = BIF_bsl_2;
+ shift_left_count = 0;
if (is_small(Op2)) {
shift_left_count = signed_val(Op2);
} else if (is_big(Op2)) {
@@ -271,66 +265,65 @@ shift.setup_bsl(Src1, Src2) {
*/
shift_left_count = MAX_SMALL;
}
- } else {
- shift_left_count = 0;
}
}
shift.execute(Fail, Live, Dst) {
+ Uint big_words_needed;
+
if (is_small(Op1)) {
- ires = signed_val(Op1);
- if (shift_left_count == 0 || ires == 0) {
+ Sint int_res = signed_val(Op1);
+ if (shift_left_count == 0 || int_res == 0) {
if (is_not_integer(Op2)) {
- c_p->freason = BADARITH;
- $BIF_ERROR_ARITY_2($Fail, BIF, Op1, Op2);
+ goto shift_error;
}
- if (ires == 0) {
+ if (int_res == 0) {
$Dst = Op1;
$NEXT0();
}
} else if (shift_left_count < 0) { /* Right shift */
+ Eterm bsr_res;
shift_left_count = -shift_left_count;
if (shift_left_count >= SMALL_BITS-1) {
- $Dst = (ires < 0) ? SMALL_MINUS_ONE : SMALL_ZERO;
+ bsr_res = (int_res < 0) ? SMALL_MINUS_ONE : SMALL_ZERO;
} else {
- $Dst = make_small(ires >> shift_left_count);
+ bsr_res = make_small(int_res >> shift_left_count);
}
+ $Dst = bsr_res;
$NEXT0();
} else if (shift_left_count < SMALL_BITS-1) { /* Left shift */
- if ((ires > 0 &&
- ((~(Uint)0 << ((SMALL_BITS-1)-shift_left_count)) &
- ires) == 0) ||
- ((~(Uint)0 << ((SMALL_BITS-1)-shift_left_count)) &
- ~ires) == 0) {
- $Dst = make_small(ires << shift_left_count);
+ if ((int_res > 0 &&
+ ((~(Uint)0 << ((SMALL_BITS-1)-shift_left_count)) & int_res) == 0) ||
+ ((~(Uint)0 << ((SMALL_BITS-1)-shift_left_count)) & ~int_res) == 0) {
+ $Dst = make_small(int_res << shift_left_count);
$NEXT0();
}
}
- ires = 1; /* big_size(small_to_big(Op1)) */
+ big_words_needed = 1; /* big_size(small_to_big(Op1)) */
goto big_shift;
} else if (is_big(Op1)) {
if (shift_left_count == 0) {
if (is_not_integer(Op2)) {
- c_p->freason = BADARITH;
- $BIF_ERROR_ARITY_2($Fail, BIF, Op1, Op2);
+ goto shift_error;
}
$Dst = Op1;
$NEXT0();
}
- ires = big_size(Op1);
+ big_words_needed = big_size(Op1);
big_shift:
if (shift_left_count > 0) { /* Left shift. */
- ires += (shift_left_count / D_EXP);
+ big_words_needed += (shift_left_count / D_EXP);
} else { /* Right shift. */
- if (ires <= (-shift_left_count / D_EXP)) {
- ires = 3; /* ??? */
+ if (big_words_needed <= (-shift_left_count / D_EXP)) {
+ big_words_needed = 3; /* ??? */
} else {
- ires -= (-shift_left_count / D_EXP);
+ big_words_needed -= (-shift_left_count / D_EXP);
}
}
{
- ires = BIG_NEED_SIZE(ires+1);
+ Eterm tmp_big[2];
+ Sint big_need_size = BIG_NEED_SIZE(big_words_needed+1);
/*
* Slightly conservative check the size to avoid
@@ -338,15 +331,14 @@ shift.execute(Fail, Live, Dst) {
* clearly would overflow the arity in the header
* word.
*/
- if (ires-8 > BIG_ARITY_MAX) {
+ if (big_need_size-8 > BIG_ARITY_MAX) {
$SYSTEM_LIMIT($Fail);
}
- $GC_TEST_PRESERVE(ires+1, $Live, Op1);
+ $GC_TEST_PRESERVE(big_need_size+1, $Live, Op1);
if (is_small(Op1)) {
Op1 = small_to_big(signed_val(Op1), tmp_big);
}
- bigp = HTOP;
- Op1 = big_lshift(Op1, shift_left_count, bigp);
+ Op1 = big_lshift(Op1, shift_left_count, HTOP);
if (is_big(Op1)) {
HTOP += bignum_header_arity(*HTOP) + 1;
}
@@ -369,8 +361,22 @@ shift.execute(Fail, Live, Dst) {
/*
* One or more non-integer arguments.
*/
+ shift_error:
c_p->freason = BADARITH;
- $BIF_ERROR_ARITY_2($Fail, BIF, Op1, Op2);
+ if ($Fail) {
+ $FAIL($Fail);
+ } else {
+ reg[0] = Op1;
+ reg[1] = Op2;
+ SWAPOUT;
+ if (I[0] == (BeamInstr) OpCode(i_bsl_ssjtd)) {
+ I = handle_error(c_p, I, reg, &bif_export[BIF_bsl_2]->info.mfa);
+ } else {
+ ASSERT(I[0] == (BeamInstr) OpCode(i_bsr_ssjtd));
+ I = handle_error(c_p, I, reg, &bif_export[BIF_bsr_2]->info.mfa);
+ }
+ goto post_error_handling;
+ }
}
i_int_bnot(Fail, Src, Live, Dst) {
diff --git a/erts/emulator/beam/beam_debug.c b/erts/emulator/beam/beam_debug.c
index afe87288ce..49414ae8fc 100644
--- a/erts/emulator/beam/beam_debug.c
+++ b/erts/emulator/beam/beam_debug.c
@@ -522,12 +522,13 @@ print_op(fmtfn_t to, void *to_arg, int op, int size, BeamInstr* addr)
}
ap++;
break;
- case 'I': /* Untagged integer. */
- case 't':
+ case 't': /* Untagged integers */
+ case 'I':
+ case 'W':
switch (op) {
- case op_i_gc_bif1_jIsId:
- case op_i_gc_bif2_jIIssd:
- case op_i_gc_bif3_jIIssd:
+ case op_i_gc_bif1_jWstd:
+ case op_i_gc_bif2_jWtssd:
+ case op_i_gc_bif3_jWtssd:
{
const ErtsGcBif* p;
BifFunction gcf = (BifFunction) *ap;
@@ -672,8 +673,8 @@ print_op(fmtfn_t to, void *to_arg, int op, int size, BeamInstr* addr)
}
}
break;
- case op_i_jump_on_val_xfII:
- case op_i_jump_on_val_yfII:
+ case op_i_jump_on_val_xfIW:
+ case op_i_jump_on_val_yfIW:
{
int n;
for (n = ap[-2]; n > 0; n--) {
@@ -696,9 +697,9 @@ print_op(fmtfn_t to, void *to_arg, int op, int size, BeamInstr* addr)
break;
case op_i_put_tuple_xI:
case op_i_put_tuple_yI:
- case op_new_map_dII:
- case op_update_map_assoc_sdII:
- case op_update_map_exact_jsdII:
+ case op_new_map_dtI:
+ case op_update_map_assoc_sdtI:
+ case op_update_map_exact_jsdtI:
{
int n = unpacked[-1];
@@ -718,7 +719,7 @@ print_op(fmtfn_t to, void *to_arg, int op, int size, BeamInstr* addr)
}
}
break;
- case op_i_new_small_map_lit_dIq:
+ case op_i_new_small_map_lit_dtq:
{
Eterm *tp = tuple_val(unpacked[-1]);
int n = arityval(*tp);
diff --git a/erts/emulator/beam/beam_emu.c b/erts/emulator/beam/beam_emu.c
index e5935f5f02..2c37dc42b3 100644
--- a/erts/emulator/beam/beam_emu.c
+++ b/erts/emulator/beam/beam_emu.c
@@ -153,10 +153,7 @@ do { \
* Register target (X or Y register).
*/
-#define REG_TARGET_PTR(Target) (((Target) & 1) ? &yb(Target-1) : &xb(Target))
-#define REG_TARGET(Target) (*REG_TARGET_PTR(Target))
-
-#define ISCATCHEND(instr) ((Eterm *) *(instr) == OpCode(catch_end_y))
+#define REG_TARGET_PTR(Target) (((Target) & 1) ? &yb((Target)-1) : &xb(Target))
/*
* Special Beam instructions.
@@ -241,9 +238,11 @@ void** beam_ops;
PROCESS_MAIN_CHK_LOCKS((P)); \
ERTS_UNREQ_PROC_MAIN_LOCK((P))
+#define db(N) (N)
#define tb(N) (N)
#define xb(N) (*(Eterm *) (((unsigned char *)reg) + (N)))
#define yb(N) (*(Eterm *) (((unsigned char *)E) + (N)))
+#define Sb(N) (*REG_TARGET_PTR(N))
#define lb(N) (*(double *) (((unsigned char *)&(freg[0].fd)) + (N)))
#define Qb(N) (N)
#define Ib(N) (N)
@@ -318,10 +317,6 @@ void** beam_ops;
#endif
#define Arg(N) I[(N)+1]
-#define Next(N) \
- I += (N) + 1; \
- ASSERT(VALID_INSTR(*I)); \
- Goto(*I)
#define GetR(pos, tr) \
do { \
@@ -406,12 +401,13 @@ static BeamInstr* apply_fun(Process* p, Eterm fun,
Eterm args, Eterm* reg) NOINLINE;
static Eterm new_fun(Process* p, Eterm* reg,
ErlFunEntry* fe, int num_free) NOINLINE;
-static Eterm new_map(Process* p, Eterm* reg, BeamInstr* I) NOINLINE;
-static Eterm new_small_map_lit(Process* p, Eterm* reg, Uint* n_exp, BeamInstr* I) NOINLINE;
-static Eterm update_map_assoc(Process* p, Eterm* reg,
- Eterm map, BeamInstr* I) NOINLINE;
-static Eterm update_map_exact(Process* p, Eterm* reg,
- Eterm map, BeamInstr* I) NOINLINE;
+static Eterm new_map(Process* p, Eterm* reg, Uint live, Uint n, BeamInstr* ptr) NOINLINE;
+static Eterm new_small_map_lit(Process* p, Eterm* reg, Eterm keys_literal,
+ Uint live, BeamInstr* ptr) NOINLINE;
+static Eterm update_map_assoc(Process* p, Eterm* reg, Uint live,
+ Uint n, BeamInstr* new_p) NOINLINE;
+static Eterm update_map_exact(Process* p, Eterm* reg, Uint live,
+ Uint n, Eterm* new_p) NOINLINE;
static Eterm get_map_element(Eterm map, Eterm key);
static Eterm get_map_element_hash(Eterm map, Eterm key, Uint32 hx);
@@ -774,7 +770,6 @@ void process_main(Eterm * x_reg_array, FloatDef* f_reg_array)
#endif
#include "beam_hot.h"
-#include "beam_instrs.h"
#ifdef DEBUG
/*
@@ -2734,24 +2729,20 @@ do { \
static Eterm
-new_map(Process* p, Eterm* reg, BeamInstr* I)
+new_map(Process* p, Eterm* reg, Uint live, Uint n, BeamInstr* ptr)
{
- Uint n = Arg(3);
Uint i;
Uint need = n + 1 /* hdr */ + 1 /*size*/ + 1 /* ptr */ + 1 /* arity */;
Eterm keys;
Eterm *mhp,*thp;
Eterm *E;
- BeamInstr *ptr;
flatmap_t *mp;
ErtsHeapFactory factory;
- ptr = &Arg(4);
-
if (n > 2*MAP_SMALL_MAP_LIMIT) {
Eterm res;
if (HeapWordsLeft(p) < n) {
- erts_garbage_collect(p, n, reg, Arg(2));
+ erts_garbage_collect(p, n, reg, live);
}
mhp = p->htop;
@@ -2772,7 +2763,7 @@ new_map(Process* p, Eterm* reg, BeamInstr* I)
}
if (HeapWordsLeft(p) < need) {
- erts_garbage_collect(p, need, reg, Arg(2));
+ erts_garbage_collect(p, need, reg, live);
}
thp = p->htop;
@@ -2795,24 +2786,20 @@ new_map(Process* p, Eterm* reg, BeamInstr* I)
}
static Eterm
-new_small_map_lit(Process* p, Eterm* reg, Uint* n_exp, BeamInstr* I)
+new_small_map_lit(Process* p, Eterm* reg, Eterm keys_literal, Uint live, BeamInstr* ptr)
{
- Eterm* keys = tuple_val(Arg(3));
+ Eterm* keys = tuple_val(keys_literal);
Uint n = arityval(*keys);
Uint need = n + 1 /* hdr */ + 1 /*size*/ + 1 /* ptr */ + 1 /* arity */;
Uint i;
- BeamInstr *ptr;
flatmap_t *mp;
Eterm *mhp;
Eterm *E;
- *n_exp = n;
- ptr = &Arg(4);
-
ASSERT(n <= MAP_SMALL_MAP_LIMIT);
if (HeapWordsLeft(p) < need) {
- erts_garbage_collect(p, need, reg, Arg(2));
+ erts_garbage_collect(p, need, reg, live);
}
mhp = p->htop;
@@ -2821,7 +2808,7 @@ new_small_map_lit(Process* p, Eterm* reg, Uint* n_exp, BeamInstr* I)
mp = (flatmap_t *)mhp; mhp += MAP_HEADER_FLATMAP_SZ;
mp->thing_word = MAP_HEADER_FLATMAP;
mp->size = n;
- mp->keys = Arg(3);
+ mp->keys = keys_literal;
for (i = 0; i < n; i++) {
GET_TERM(*ptr++, *mhp++);
@@ -2833,9 +2820,8 @@ new_small_map_lit(Process* p, Eterm* reg, Uint* n_exp, BeamInstr* I)
}
static Eterm
-update_map_assoc(Process* p, Eterm* reg, Eterm map, BeamInstr* I)
+update_map_assoc(Process* p, Eterm* reg, Uint live, Uint n, BeamInstr* new_p)
{
- Uint n;
Uint num_old;
Uint num_updates;
Uint need;
@@ -2845,12 +2831,12 @@ update_map_assoc(Process* p, Eterm* reg, Eterm map, BeamInstr* I)
Eterm* E;
Eterm* old_keys;
Eterm* old_vals;
- BeamInstr* new_p;
Eterm new_key;
Eterm* kp;
+ Eterm map;
- new_p = &Arg(4);
- num_updates = Arg(3) / 2;
+ num_updates = n / 2;
+ map = reg[live];
if (is_not_flatmap(map)) {
Uint32 hx;
@@ -2880,7 +2866,7 @@ update_map_assoc(Process* p, Eterm* reg, Eterm map, BeamInstr* I)
*/
if (num_old == 0) {
- return new_map(p, reg, I);
+ return new_map(p, reg, live, n, new_p);
}
/*
@@ -2890,8 +2876,6 @@ update_map_assoc(Process* p, Eterm* reg, Eterm map, BeamInstr* I)
need = 2*(num_old+num_updates) + 1 + MAP_HEADER_FLATMAP_SZ;
if (HeapWordsLeft(p) < need) {
- Uint live = Arg(2);
- reg[live] = map;
erts_garbage_collect(p, need, reg, live+1);
map = reg[live];
old_mp = (flatmap_t *)flatmap_val(map);
@@ -3038,9 +3022,8 @@ update_map_assoc(Process* p, Eterm* reg, Eterm map, BeamInstr* I)
*/
static Eterm
-update_map_exact(Process* p, Eterm* reg, Eterm map, BeamInstr* I)
+update_map_exact(Process* p, Eterm* reg, Uint live, Uint n, Eterm* new_p)
{
- Uint n;
Uint i;
Uint num_old;
Uint need;
@@ -3050,12 +3033,12 @@ update_map_exact(Process* p, Eterm* reg, Eterm map, BeamInstr* I)
Eterm* E;
Eterm* old_keys;
Eterm* old_vals;
- BeamInstr* new_p;
Eterm new_key;
+ Eterm map;
- new_p = &Arg(5);
- n = Arg(4) / 2; /* Number of values to be updated */
+ n /= 2; /* Number of values to be updated */
ASSERT(n > 0);
+ map = reg[live];
if (is_not_flatmap(map)) {
Uint32 hx;
@@ -3109,8 +3092,6 @@ update_map_exact(Process* p, Eterm* reg, Eterm map, BeamInstr* I)
need = num_old + MAP_HEADER_FLATMAP_SZ;
if (HeapWordsLeft(p) < need) {
- Uint live = Arg(3);
- reg[live] = map;
erts_garbage_collect(p, need, reg, live+1);
map = reg[live];
old_mp = (flatmap_t *)flatmap_val(map);
diff --git a/erts/emulator/beam/beam_load.c b/erts/emulator/beam/beam_load.c
index dcd312f54f..9ff32e30f3 100644
--- a/erts/emulator/beam/beam_load.c
+++ b/erts/emulator/beam/beam_load.c
@@ -2374,7 +2374,8 @@ load_code(LoaderState* stp)
break;
}
break;
- case 'd': /* Destination (x(0), x(N), y(N) */
+ case 'd': /* Destination (x(N), y(N) */
+ case 'S': /* Source (x(N), y(N)) */
switch (tag) {
case TAG_x:
code[ci++] = tmp_op->a[arg].val * sizeof(Eterm);
@@ -2388,11 +2389,29 @@ load_code(LoaderState* stp)
break;
}
break;
- case 'I': /* Untagged integer (or pointer). */
- VerifyTag(stp, tag, TAG_u);
- code[ci++] = tmp_op->a[arg].val;
- break;
- case 't': /* Small untagged integer -- can be packed. */
+ case 't': /* Small untagged integer (16 bits) -- can be packed. */
+ case 'I': /* Untagged integer (32 bits) -- can be packed. */
+ case 'W': /* Untagged integer or pointer (machine word). */
+#ifdef DEBUG
+ switch (*sign) {
+ case 't':
+ if (tmp_op->a[arg].val >> 16 != 0) {
+ load_printf(__LINE__, stp, "value %lu of type 't' does not fit in 16 bits",
+ tmp_op->a[arg].val);
+ ASSERT(0);
+ }
+ break;
+#ifdef ARCH_64
+ case 'I':
+ if (tmp_op->a[arg].val >> 32 != 0) {
+ load_printf(__LINE__, stp, "value %lu of type 'I' does not fit in 32 bits",
+ tmp_op->a[arg].val);
+ ASSERT(0);
+ }
+ break;
+#endif
+ }
+#endif
VerifyTag(stp, tag, TAG_u);
code[ci++] = tmp_op->a[arg].val;
break;
@@ -2477,16 +2496,32 @@ load_code(LoaderState* stp)
* The packing engine.
*/
if (opc[stp->specific_op].pack[0]) {
- char* prog; /* Program for packing engine. */
- BeamInstr stack[8]; /* Stack. */
- BeamInstr* sp = stack; /* Points to next free position. */
- BeamInstr packed = 0; /* Accumulator for packed operations. */
+ char* prog; /* Program for packing engine. */
+ struct pack_stack {
+ BeamInstr instr;
+ LiteralPatch* patch;
+ } stack[8]; /* Stack. */
+ struct pack_stack* sp = stack; /* Points to next free position. */
+ BeamInstr packed = 0; /* Accumulator for packed operations. */
for (prog = opc[stp->specific_op].pack; *prog; prog++) {
switch (*prog) {
case 'g': /* Get instruction; push on stack. */
- *sp++ = code[--ci];
- break;
+ {
+ LiteralPatch* lp;
+
+ ci--;
+ sp->instr = code[ci];
+ sp->patch = 0;
+ for (lp = stp->literal_patches; lp && lp->pos > ci-MAX_OPARGS; lp = lp->next) {
+ if (lp->pos == ci) {
+ sp->patch = lp;
+ break;
+ }
+ }
+ sp++;
+ }
+ break;
case 'i': /* Initialize packing accumulator. */
packed = code[--ci];
break;
@@ -2502,10 +2537,17 @@ load_code(LoaderState* stp)
break;
#endif
case 'p': /* Put instruction (from stack). */
- code[ci++] = *--sp;
+ --sp;
+ code[ci] = sp->instr;
+ if (sp->patch) {
+ sp->patch->pos = ci;
+ }
+ ci++;
break;
case 'P': /* Put packed operands. */
- *sp++ = packed;
+ sp->instr = packed;
+ sp->patch = 0;
+ sp++;
packed = 0;
break;
default:
@@ -2627,8 +2669,8 @@ load_code(LoaderState* stp)
/* Remember offset for the on_load function. */
stp->on_load = ci;
break;
- case op_bs_put_string_II:
- case op_i_bs_match_string_xfII:
+ case op_bs_put_string_WW:
+ case op_i_bs_match_string_xfWW:
new_string_patch(stp, ci-1);
break;
@@ -2884,6 +2926,7 @@ gen_element(LoaderState* stp, GenOpArg Fail, GenOpArg Index,
op->next = NULL;
if (Index.type == TAG_i && Index.val > 0 &&
+ Index.val <= ERTS_MAX_TUPLE_SIZE &&
(Tuple.type == TAG_x || Tuple.type == TAG_y)) {
op->op = genop_i_fast_element_4;
op->a[0] = Tuple;
@@ -3420,7 +3463,7 @@ gen_literal_timeout(LoaderState* stp, GenOpArg Fail, GenOpArg Time)
Sint timeout;
NEW_GENOP(stp, op);
- op->op = genop_wait_timeout_unlocked_2;
+ op->op = genop_wait_timeout_unlocked_int_2;
op->next = NULL;
op->arity = 2;
op->a[0].type = TAG_u;
@@ -3467,12 +3510,12 @@ gen_literal_timeout_locked(LoaderState* stp, GenOpArg Fail, GenOpArg Time)
Sint timeout;
NEW_GENOP(stp, op);
- op->op = genop_wait_timeout_locked_2;
+ op->op = genop_wait_timeout_locked_int_2;
op->next = NULL;
op->arity = 2;
- op->a[0] = Fail;
- op->a[1].type = TAG_u;
-
+ op->a[0].type = TAG_u;
+ op->a[1] = Fail;
+
if (Time.type == TAG_i && (timeout = Time.val) >= 0 &&
#if defined(ARCH_64)
(timeout >> 32) == 0
@@ -3480,7 +3523,7 @@ gen_literal_timeout_locked(LoaderState* stp, GenOpArg Fail, GenOpArg Time)
1
#endif
) {
- op->a[1].val = timeout;
+ op->a[0].val = timeout;
#if !defined(ARCH_64)
} else if (Time.type == TAG_q) {
Eterm big;
@@ -3494,7 +3537,7 @@ gen_literal_timeout_locked(LoaderState* stp, GenOpArg Fail, GenOpArg Time)
} else {
Uint u;
(void) term_to_Uint(big, &u);
- op->a[1].val = (BeamInstr) u;
+ op->a[0].val = (BeamInstr) u;
}
#endif
} else {
diff --git a/erts/emulator/beam/instrs.tab b/erts/emulator/beam/instrs.tab
index d45da62d03..1af01e53bd 100644
--- a/erts/emulator/beam/instrs.tab
+++ b/erts/emulator/beam/instrs.tab
@@ -814,6 +814,7 @@ is_ge(Fail, X, Y) {
badarg(Fail) {
$BADARG($Fail);
+ //| -no_next;
}
badmatch(Src) {
diff --git a/erts/emulator/beam/map_instrs.tab b/erts/emulator/beam/map_instrs.tab
index 30c3d7743f..bbb2f49b66 100644
--- a/erts/emulator/beam/map_instrs.tab
+++ b/erts/emulator/beam/map_instrs.tab
@@ -31,22 +31,24 @@ new_map(Dst, Live, N) {
Eterm res;
HEAVY_SWAPOUT;
- res = new_map(c_p, reg, I-1);
+ res = new_map(c_p, reg, $Live, $N, $NEXT_INSTRUCTION);
HEAVY_SWAPIN;
$REFRESH_GEN_DEST();
$Dst = res;
$NEXT($NEXT_INSTRUCTION+$N);
}
-i_new_small_map_lit(Dst, Live, Literal) {
+i_new_small_map_lit(Dst, Live, Keys) {
Eterm res;
Uint n;
+ Eterm keys = $Keys;
HEAVY_SWAPOUT;
- res = new_small_map_lit(c_p, reg, &n, I-1);
+ res = new_small_map_lit(c_p, reg, keys, $Live, $NEXT_INSTRUCTION);
HEAVY_SWAPIN;
$REFRESH_GEN_DEST();
$Dst = res;
+ n = arityval(*tuple_val(keys));
$NEXT($NEXT_INSTRUCTION+n);
}
@@ -127,11 +129,11 @@ i_get_map_elements(Fail, Src, N) {
update_map_assoc(Src, Dst, Live, N) {
Eterm res;
- Eterm map;
+ Uint live = $Live;
- map = $Src;
+ reg[live] = $Src;
HEAVY_SWAPOUT;
- res = update_map_assoc(c_p, reg, map, I);
+ res = update_map_assoc(c_p, reg, live, $N, $NEXT_INSTRUCTION);
HEAVY_SWAPIN;
ASSERT(is_value(res));
$REFRESH_GEN_DEST();
@@ -141,11 +143,11 @@ update_map_assoc(Src, Dst, Live, N) {
update_map_exact(Fail, Src, Dst, Live, N) {
Eterm res;
- Eterm map;
+ Uint live = $Live;
- map = $Src;
+ reg[live] = $Src;
HEAVY_SWAPOUT;
- res = update_map_exact(c_p, reg, map, I);
+ res = update_map_exact(c_p, reg, live, $N, $NEXT_INSTRUCTION);
HEAVY_SWAPIN;
if (is_value(res)) {
$REFRESH_GEN_DEST();
diff --git a/erts/emulator/beam/ops.tab b/erts/emulator/beam/ops.tab
index 92e67bb470..b6e995fdbe 100644
--- a/erts/emulator/beam/ops.tab
+++ b/erts/emulator/beam/ops.tab
@@ -108,7 +108,7 @@ allocate_zero t t
allocate_heap_zero t I t
trim N Remaining => i_trim N
-i_trim I
+i_trim t
test_heap I t
@@ -167,7 +167,7 @@ i_select_tuple_arity2 xy f A A f f
i_jump_on_val_zero xy f I
-i_jump_on_val xy f I I
+i_jump_on_val xy f I W
get_list xy xy xy
@@ -192,7 +192,7 @@ try_case_end s
# Destructive set tuple element
-set_tuple_element s d P
+set_tuple_element s S P
# Get tuple element
@@ -382,8 +382,8 @@ label L | wait_timeout Fail Src | smp_already_locked(L) => \
label L | wait_timeout_locked Src Fail
wait_timeout Fail Src => wait_timeout_unlocked Src Fail
-wait_timeout_unlocked Fail Src=aiq => gen_literal_timeout(Fail, Src)
-wait_timeout_locked Fail Src=aiq => gen_literal_timeout_locked(Fail, Src)
+wait_timeout_unlocked Src=aiq Fail => gen_literal_timeout(Fail, Src)
+wait_timeout_locked Src=aiq Fail => gen_literal_timeout_locked(Fail, Src)
label L | wait Fail | smp_already_locked(L) => label L | wait_locked Fail
wait Fail => wait_unlocked Fail
@@ -398,6 +398,7 @@ loop_rec_end f
wait_locked f
wait_unlocked f
+# Note that a timeout value must fit in 32 bits.
wait_timeout_unlocked_int I f
wait_timeout_unlocked s f
wait_timeout_locked_int I f
@@ -589,7 +590,7 @@ is_integer Fail Literal=q => move Literal x | is_integer Fail x
is_integer Fail=f S=x | allocate Need Regs => is_integer_allocate Fail S Need Regs
-is_integer_allocate f x I I
+is_integer_allocate f x t t
is_integer f xy
@@ -608,7 +609,7 @@ is_nonempty_list F=f x==0 | test_heap I1 I2 => is_nonempty_list_test_heap F I1 I
is_nonempty_list Fail=f S=x | get_list S D1=x D2=x => \
is_nonempty_list_get_list Fail S D1 D2
-is_nonempty_list_allocate f rx I t
+is_nonempty_list_allocate f rx t t
is_nonempty_list_test_heap f I t
is_nonempty_list_get_list f rx x x
is_nonempty_list f xy
@@ -684,7 +685,7 @@ is_function2 f s s
allocate Need Regs | init Y => allocate_init Need Regs Y
init Y1 | init Y2 => init2 Y1 Y2
-allocate_init t I y
+allocate_init t t y
#################################################################
# External function and bif calls.
@@ -970,6 +971,8 @@ node x
node y
%hot
+# Note: 'I' is sufficient because this instruction will only be used
+# if the arity fits in 24 bits.
i_fast_element xy j I d
i_element xy j s d
@@ -1003,15 +1006,15 @@ call_last Ar Func D => i_call_last Func D
call_only Ar Func => i_call_only Func
i_call f
-i_call_last f P
+i_call_last f Q
i_call_only f
i_call_ext e
-i_call_ext_last e P
+i_call_ext_last e Q
i_call_ext_only e
i_move_call_ext c e
-i_move_call_ext_last e P c
+i_move_call_ext_last e Q c
i_move_call_ext_only e c
# Fun calls.
@@ -1019,13 +1022,13 @@ i_move_call_ext_only e c
call_fun Arity | deallocate D | return => i_call_fun_last Arity D
call_fun Arity => i_call_fun Arity
-i_call_fun I
-i_call_fun_last I P
+i_call_fun t
+i_call_fun_last t Q
make_fun2 OldIndex=u => gen_make_fun2(OldIndex)
%cold
-i_make_fun I t
+i_make_fun W t
%hot
is_function f xy
@@ -1051,14 +1054,14 @@ i_bs_restore2 x I
# Matching integers
bs_match_string Fail Ms Bits Val => i_bs_match_string Ms Fail Bits Val
-i_bs_match_string x f I I
+i_bs_match_string x f W W
# Fetching integers from binaries.
bs_get_integer2 Fail=f Ms=x Live=u Sz=sq Unit=u Flags=u Dst=d => \
gen_get_integer2(Fail, Ms, Live, Sz, Unit, Flags, Dst)
-i_bs_get_integer_small_imm x I f I x
-i_bs_get_integer_imm x I I f I x
+i_bs_get_integer_small_imm x W f t x
+i_bs_get_integer_imm x W t f t x
i_bs_get_integer f I I s s x
i_bs_get_integer_8 x f x
i_bs_get_integer_16 x f x
@@ -1071,7 +1074,7 @@ i_bs_get_integer_32 x f x
bs_get_binary2 Fail=f Ms=x Live=u Sz=sq Unit=u Flags=u Dst=d => \
gen_get_binary2(Fail, Ms, Live, Sz, Unit, Flags, Dst)
-i_bs_get_binary_imm2 f x I I I x
+i_bs_get_binary_imm2 f x t W t x
i_bs_get_binary2 f x I s I x
i_bs_get_binary_all2 f x I I x
i_bs_get_binary_all_reuse x f I
@@ -1089,14 +1092,14 @@ i_bs_get_float2 f x I s I x
bs_skip_bits2 Fail=f Ms=x Sz=sq Unit=u Flags=u => \
gen_skip_bits2(Fail, Ms, Sz, Unit, Flags)
-i_bs_skip_bits_imm2 f x I
+i_bs_skip_bits_imm2 f x W
i_bs_skip_bits2 f x xy I
i_bs_skip_bits_all2 f x I
bs_test_tail2 Fail=f Ms=x Bits=u==0 => bs_test_zero_tail2 Fail Ms
bs_test_tail2 Fail=f Ms=x Bits=u => bs_test_tail_imm2 Fail Ms Bits
bs_test_zero_tail2 f x
-bs_test_tail_imm2 f x I
+bs_test_tail_imm2 f x W
bs_test_unit F Ms Unit=u==8 => bs_test_unit8 F Ms
bs_test_unit f x I
@@ -1149,13 +1152,13 @@ bs_init2 Fail Sz Words=u==0 Regs Flags Dst => \
bs_init2 Fail Sz Words Regs Flags Dst => \
i_bs_init_fail_heap Sz Words Fail Regs Dst
-i_bs_init_fail xy j I x
+i_bs_init_fail xy j t x
-i_bs_init_fail_heap s I j I x
+i_bs_init_fail_heap s I j t x
-i_bs_init I I x
+i_bs_init W t x
-i_bs_init_heap I I I x
+i_bs_init_heap W I t x
bs_init_bits Fail Sz=o Words Regs Flags Dst => system_limit Fail
@@ -1168,16 +1171,16 @@ bs_init_bits Fail Sz Words=u==0 Regs Flags Dst => \
bs_init_bits Fail Sz Words Regs Flags Dst => \
i_bs_init_bits_fail_heap Sz Words Fail Regs Dst
-i_bs_init_bits_fail xy j I x
+i_bs_init_bits_fail xy j t x
-i_bs_init_bits_fail_heap s I j I x
+i_bs_init_bits_fail_heap s I j t x
-i_bs_init_bits I I x
-i_bs_init_bits_heap I I I x
+i_bs_init_bits W t x
+i_bs_init_bits_heap W I t x
bs_add Fail S1=i==0 S2 Unit=u==1 D => move S2 D
-bs_add j s s I x
+bs_add j s s t x
bs_append Fail Size Extra Live Unit Bin Flags Dst => \
move Bin x | i_bs_append Fail Extra Live Unit Size Dst
@@ -1187,8 +1190,8 @@ bs_private_append Fail Size Unit Bin Flags Dst => \
bs_init_writable
-i_bs_append j I I I s x
-i_bs_private_append j I s s x
+i_bs_append j I t t s x
+i_bs_private_append j t s s x
#
# Storing integers into binaries.
@@ -1197,8 +1200,8 @@ i_bs_private_append j I s s x
bs_put_integer Fail=j Sz=sq Unit=u Flags=u Src=s => \
gen_put_integer(Fail, Sz, Unit, Flags, Src)
-i_new_bs_put_integer j s I s
-i_new_bs_put_integer_imm j I I s
+i_new_bs_put_integer j s t s
+i_new_bs_put_integer_imm j W t s
#
# Utf8/utf16/utf32 support. (R12B-5)
@@ -1216,7 +1219,7 @@ bs_put_utf8 Fail u Src=s => i_bs_put_utf8 Fail Src
i_bs_put_utf8 j s
-bs_put_utf16 j I s
+bs_put_utf16 j t s
bs_put_utf32 Fail=j Flags=u Src=s => \
i_bs_validate_unicode Fail Src | bs_put_integer Fail i=32 u=1 Flags Src
@@ -1231,8 +1234,8 @@ bs_put_float Fail Sz=q Unit Flags Val => badarg Fail
bs_put_float Fail=j Sz=s Unit=u Flags=u Src=s => \
gen_put_float(Fail, Sz, Unit, Flags, Src)
-i_new_bs_put_float j s I s
-i_new_bs_put_float_imm j I I s
+i_new_bs_put_float j s t s
+i_new_bs_put_float_imm j W t s
#
# Storing binaries into binaries.
@@ -1241,9 +1244,9 @@ i_new_bs_put_float_imm j I I s
bs_put_binary Fail=j Sz=s Unit=u Flags=u Src=s => \
gen_put_binary(Fail, Sz, Unit, Flags, Src)
-i_new_bs_put_binary j s I s
-i_new_bs_put_binary_imm j I s
-i_new_bs_put_binary_all j s I
+i_new_bs_put_binary j s t s
+i_new_bs_put_binary_imm j W s
+i_new_bs_put_binary_all j s t
#
# Warning: The i_bs_put_string and i_new_bs_put_string instructions
@@ -1251,7 +1254,7 @@ i_new_bs_put_binary_all j s I
# Don't change the instruction format unless you change the loader too.
#
-bs_put_string I I
+bs_put_string W W
#
# New floating point instructions (R8).
@@ -1269,9 +1272,9 @@ fmove Arg=l Dst=d => fstore Arg Dst
fmove Arg=dq Dst=l => fload Arg Dst
fstore l d
-fload dq l
+fload Sq l
-fconv d l
+fconv S l
i_fadd l l l
i_fsub l l l
@@ -1295,8 +1298,8 @@ fclearerror
# New apply instructions in R10B.
#
-apply I
-apply_last I P
+apply t
+apply_last t Q
#
# Handle compatibility with OTP 17 here.
@@ -1354,10 +1357,10 @@ sorted_put_map_exact F Src Dst Live Size Rest=* => \
new_map Dst Live Size Rest=* | is_small_map_literal_keys(Size, Rest) => \
gen_new_small_map_lit(Dst, Live, Size, Rest)
-new_map d I I
-i_new_small_map_lit d I q
-update_map_assoc s d I I
-update_map_exact j s d I I
+new_map d t I
+i_new_small_map_lit d t q
+update_map_assoc s d t I
+update_map_exact j s d t I
is_map Fail Lit=q | literal_is_map(Lit) =>
is_map Fail cq => jump Fail
@@ -1449,32 +1452,32 @@ gc_bif2 Fail Live u$bif:erlang:bxor/2 S1 S2 Dst => \
gc_bif1 Fail I u$bif:erlang:bnot/1 Src Dst=d => i_int_bnot Fail Src I Dst
-i_increment rxy I I d
+i_increment rxy W t d
-i_plus x xy j I d
-i_plus s s j I d
+i_plus x xy j t d
+i_plus s s j t d
-i_minus x x j I d
-i_minus s s j I d
+i_minus x x j t d
+i_minus s s j t d
-i_times j I s s d
+i_times j t s s d
-i_m_div j I s s d
-i_int_div j I s s d
+i_m_div j t s s d
+i_int_div j t s s d
-i_rem x x j I d
-i_rem s s j I d
+i_rem x x j t d
+i_rem s s j t d
-i_bsl s s j I d
-i_bsr s s j I d
+i_bsl s s j t d
+i_bsr s s j t d
-i_band x c j I d
-i_band s s j I d
+i_band x c j t d
+i_band s s j t d
i_bor j I s s d
i_bxor j I s s d
-i_int_bnot j s I d
+i_int_bnot j s t d
#
# Old guard BIFs that creates heap fragments are no longer allowed.
@@ -1498,9 +1501,9 @@ gc_bif2 Fail I Bif S1 S2 Dst => \
gc_bif3 Fail I Bif S1 S2 S3 Dst => \
gen_guard_bif3(Fail, I, Bif, S1, S2, S3, Dst)
-i_gc_bif1 j I s I d
+i_gc_bif1 j W s t d
-i_gc_bif2 j I I s s d
+i_gc_bif2 j W t s s d
ii_gc_bif3/7
@@ -1509,7 +1512,7 @@ ii_gc_bif3/7
ii_gc_bif3 Fail Bif Live S1 S2 S3 Dst => \
move S1 x | i_gc_bif3 Fail Bif Live S2 S3 Dst
-i_gc_bif3 j I I s s d
+i_gc_bif3 j W t s s d
#
# The following instruction is specially handled in beam_load.c
diff --git a/erts/emulator/test/tuple_SUITE.erl b/erts/emulator/test/tuple_SUITE.erl
index 79b681b4d1..baf41180e0 100644
--- a/erts/emulator/test/tuple_SUITE.erl
+++ b/erts/emulator/test/tuple_SUITE.erl
@@ -134,6 +134,13 @@ t_element(Config) when is_list(Config) ->
{'EXIT', {badarg, _}} = (catch element(1, id(42))),
{'EXIT', {badarg, _}} = (catch element(id(1.5), id({a,b}))),
+ %% Make sure that the loader does not reject the module when
+ %% huge literal index values are used.
+ {'EXIT', {badarg, _}} = (catch element((1 bsl 24)-1, id({a,b,c}))),
+ {'EXIT', {badarg, _}} = (catch element(1 bsl 24, id({a,b,c}))),
+ {'EXIT', {badarg, _}} = (catch element(1 bsl 32, id({a,b,c}))),
+ {'EXIT', {badarg, _}} = (catch element(1 bsl 64, id({a,b,c}))),
+
ok.
get_elements([Element|Rest], Tuple, Pos) ->
diff --git a/erts/emulator/utils/beam_makeops b/erts/emulator/utils/beam_makeops
index da69b13e87..6c54ab3421 100755
--- a/erts/emulator/utils/beam_makeops
+++ b/erts/emulator/utils/beam_makeops
@@ -54,11 +54,6 @@ $pack_mask[4] = ['BEAM_LOOSE_MASK', # Only for 64 bit wordsize
'BEAM_LOOSE_MASK',
$WHOLE_WORD];
-# Mapping from packagable arguments to number of packed arguments per
-# word. Initialized after the wordsize is known.
-
-my @args_per_word;
-
# There are two types of instructions: generic and specific.
# The generic instructions are those generated by the Beam compiler.
# Corresponding to each generic instruction, there is generally a
@@ -97,10 +92,9 @@ my %c_code_used; # Used or not.
# Definitions for instructions combined from micro instructions.
my %combined_instrs;
-my %combined_code; # Combined micro instructions.
-my %hot_code;
-my %cold_code;
+my @generated_code; # Generated code.
+my %sort_order;
my @unnumbered_generic;
my %unnumbered;
@@ -144,13 +138,15 @@ my %arg_size = ('r' => 0, # x(0) - x register zero
'n' => 0, # NIL (implicit)
'c' => 1, # tagged constant (integer, atom, nil)
's' => 1, # tagged source; any of the above
+ 'S' => 1, # tagged source register (x or y)
'd' => 1, # tagged destination register (r, x, y)
'f' => 1, # failure label
'j' => 1, # either 'f' or 'p'
'e' => 1, # pointer to export entry
'L' => 0, # label
- 'I' => 1, # untagged integer
- 't' => 1, # untagged integer -- can be packed
+ 't' => 1, # untagged integer (12 bits) -- can be packed
+ 'I' => 1, # untagged integer (32 bits) -- can be packed
+ 'W' => 1, # untagged integer/pointer (one word)
'b' => 1, # pointer to bif
'A' => 1, # arity value
'P' => 1, # byte offset into tuple or stack
@@ -192,16 +188,16 @@ sub define_type_bit {
define_type_bit('s', $type_bit{'d'} | $type_bit{'i'} |
$type_bit{'a'} | $type_bit{'n'} |
$type_bit{'q'});
+ define_type_bit('S', $type_bit{'d'});
define_type_bit('j', $type_bit{'f'} | $type_bit{'p'});
# Aliases (for matching purposes).
- define_type_bit('I', $type_bit{'u'});
define_type_bit('t', $type_bit{'u'});
+ define_type_bit('I', $type_bit{'u'});
+ define_type_bit('W', $type_bit{'u'});
define_type_bit('A', $type_bit{'u'});
define_type_bit('L', $type_bit{'u'});
define_type_bit('b', $type_bit{'u'});
- define_type_bit('N', $type_bit{'u'});
- define_type_bit('U', $type_bit{'u'});
define_type_bit('e', $type_bit{'u'});
define_type_bit('P', $type_bit{'u'});
define_type_bit('Q', $type_bit{'u'});
@@ -228,6 +224,12 @@ $match_engine_ops{'TOP_fail'} = 1;
sanity("tag '$tag': primitive tags must be named with lowercase letters")
unless $tag =~ /^[a-z]$/;
}
+
+ foreach my $tag (keys %arg_size) {
+ defined $type_bit{$tag} or
+ sanity("the tag '$tag' has a size in %arg_size, " .
+ "but has no defined bit pattern");
+ }
}
#
@@ -258,15 +260,8 @@ if ($wordsize == 32) {
# Initialize number of arguments per packed word.
#
-$args_per_word[2] = 2;
-$args_per_word[3] = 3;
-$args_per_word[4] = 2;
-$args_per_word[5] = 3;
-$args_per_word[6] = 3;
-
if ($wordsize == 64) {
$pack_mask[3] = ['BEAM_TIGHT_MASK', 'BEAM_TIGHT_MASK', $WHOLE_WORD];
- $args_per_word[4] = 4;
}
#
@@ -278,11 +273,6 @@ my $c_code_block;
my $c_code_loc;
my @c_args;
-sub save_c_code {
- my($name,$block,$loc,@args) = @_;
-
-}
-
while (<>) {
my($op_num);
if ($in_c_code) {
@@ -393,7 +383,7 @@ while (<>) {
# micro instructions.
#
if (/^(\w+)\s*:=\s*([\w.]+)\s*;\s*$/) {
- $combined_instrs{$1} = ["$ARGV($.)","beam_instrs.h",$2];
+ $combined_instrs{$1} = ["$ARGV($.)",$2];
next;
}
@@ -589,17 +579,14 @@ sub emulator_output {
# for the emulator.
#
my($size, $code, $pack) =
- basic_generator($name, $hot, '', 0, undef, @args);
+ basic_generator($name, 1, '', 0, undef, @args);
#
# Save the generated $code for later.
#
if (defined $code) {
- if ($hot) {
- push(@{$hot_code{$code}}, $instr);
- } else {
- push(@{$cold_code{$code}}, $instr);
- }
+ $code = "OpCase($instr):\n$code";
+ push @generated_code, [$hot,$code,($instr)];
}
#
@@ -710,7 +697,7 @@ sub emulator_output {
print "#if !defined(ARCH_64)\n";
print qq[ #error "64-bit architecture assumed, but ARCH_64 not defined"\n];
print "#endif\n";
- print "#define BEAM_WIDE_MASK 0xFFFFUL\n";
+ print "#define BEAM_WIDE_MASK 0xFFFFFFFFUL\n";
print "#define BEAM_LOOSE_MASK 0xFFFFUL\n";
print "#define BEAM_TIGHT_MASK 0xFFFFUL\n";
print "#define BEAM_WIDE_SHIFT 32\n";
@@ -814,19 +801,12 @@ sub emulator_output {
$name = "$outdir/beam_hot.h";
open(STDOUT, ">$name") || die "Failed to open $name for writing: $!\n";
comment('C');
- print_code(\%hot_code);
+ print_code(1);
$name = "$outdir/beam_cold.h";
open(STDOUT, ">$name") || die "Failed to open $name for writing: $!\n";
comment('C');
- print_code(\%cold_code);
-
- foreach my $key (keys %combined_code) {
- my $name = "$outdir/$key";
- open(STDOUT, ">$name") || die "Failed to open $name for writing: $!\n";
- comment('C');
- print_indented_code(@{$combined_code{$key}});
- }
+ print_code(0);
}
sub init_item {
@@ -854,19 +834,14 @@ sub q {
}
sub print_code {
- my($ref) = @_;
- my(%sorted);
- my($key, $label); # Loop variables.
-
- foreach $key (keys %$ref) {
- my($sort_key);
- my($code) = '';
- foreach $label (@{$ref->{$key}}) {
- $code .= "OpCase($label):\n";
- $sort_key = $label;
- }
- $code .= "$key\n";
- $sorted{$sort_key} = $code;
+ my($include_hot) = @_;
+ my %sorted;
+
+ foreach my $ref (@generated_code) {
+ my($hot,$code,@labels) = @$ref;
+ next unless $hot == $include_hot;
+ my($sort_key) = @labels; # Use the first label as sort key.
+ $sorted{$sort_key} = $code;
}
foreach (sort keys %sorted) {
@@ -1043,12 +1018,11 @@ sub comment {
#
sub combine_micro_instructions {
my %groups;
- my %group_file;
# Sanity check, normalize micro instructions.
foreach my $instr (keys %combined_instrs) {
my $ref = $combined_instrs{$instr};
- my($def_loc,$outfile,$def) = @$ref;
+ my($def_loc,$def) = @$ref;
my($group,@subs) = split /[.]/, $def;
my $arity = 0;
@subs = map { "$group.$_" } @subs;
@@ -1061,14 +1035,12 @@ sub combine_micro_instructions {
$arity += scalar(@c_args);
}
push @{$groups{$group}}, [$instr,$arity,@subs];
- $group_file{$group} = $outfile;
}
# Now generate code for each group.
foreach my $group (sort keys %groups) {
- my $code = combine_instruction_group($group, @{$groups{$group}});
- my $outfile = $group_file{$group};
- push @{$combined_code{$outfile}}, $code;
+ my($code,@labels) = combine_instruction_group($group, @{$groups{$group}});
+ push @generated_code, [1,$code,@labels];
}
}
@@ -1160,6 +1132,7 @@ sub combine_instruction_group {
# Now generate the code for the entire group.
my $offset = 0;
+ my @opcase_labels;
for(my $i = 0; $i < @slots; $i++) {
my $key = $slots[$i];
@@ -1183,6 +1156,7 @@ sub combine_instruction_group {
if ($opcase ne '') {
$gcode .= "OpCase($opcase):\n";
+ push @opcase_labels, $opcase;
}
if ($num_references{$label}) {
$gcode .= "$label:\n";
@@ -1208,7 +1182,7 @@ sub combine_instruction_group {
$offset = $order_to_offset{$slots[$i+1]} if $i < $#slots;
}
- "{\n$gcode\n}\n\n";
+ ("{\n$gcode\n}\n\n",@opcase_labels);
}
sub micro_label {
@@ -1239,7 +1213,7 @@ sub basic_generator {
my $c_code_ref = $c_code{$name};
if ($hot and defined $c_code_ref) {
- ($prefix, $pack_spec, @args) = do_pack(@args);
+ ($var_decls, $pack_spec, @args) = do_pack(@args);
}
#
@@ -1253,7 +1227,14 @@ sub basic_generator {
my($this_size) = $arg_size{$_};
SWITCH:
{
- /^pack:(\d):(.*)/ and do {
+ /^packed:d:(\d):(.*)/ and do {
+ $var_decls .= "Eterm dst = $2;\n" .
+ "Eterm* dst_ptr = REG_TARGET_PTR(dst);\n";
+ push(@f, "*dst_ptr");
+ $this_size = $1;
+ last SWITCH;
+ };
+ /^packed:[a-zA-z]:(\d):(.*)/ and do {
push(@f, $2);
$this_size = $1;
last SWITCH;
@@ -1262,8 +1243,8 @@ sub basic_generator {
push(@f, "r(0)");
last SWITCH;
};
- /[lxy]/ and do {
- push(@f, $_ . "b(Arg($arg_offset))");
+ /[lxyS]/ and do {
+ push(@f, $_ . "b(" . arg_offset($arg_offset) . ")");
last SWITCH;
};
/n/ and do {
@@ -1280,13 +1261,13 @@ sub basic_generator {
last SWITCH;
};
/d/ and do {
- $var_decls .= "Eterm dst = Arg($arg_offset);\n" .
+ $var_decls .= "Eterm dst = " . arg_offset($arg_offset) . ";\n" .
"Eterm* dst_ptr = REG_TARGET_PTR(dst);\n";
push(@f, "*dst_ptr");
last SWITCH;
};
defined $arg_size{$_} and do {
- push(@f, "Arg($arg_offset)");
+ push @f, arg_offset($arg_offset);
last SWITCH;
};
@@ -1368,6 +1349,11 @@ sub basic_generator {
($size+1, $code, $pack_spec);
}
+sub arg_offset {
+ my $offset = shift;
+ "I[" . ($offset+1) . "]";
+}
+
sub expand_all {
my($code,$bindings_ref) = @_;
my %bindings = %{$bindings_ref};
@@ -1457,123 +1443,189 @@ sub expand_macro {
sub do_pack {
my(@args) = @_;
my($packable_args) = 0;
- my @is_packable; # Packability (boolean) for each argument.
- my $wide_packing = 0;
- my(@orig_args) = @args;
+ my @bits_needed; # Bits needed for each argument.
+
+ #
+ # Define the minimum number of bits needed for the packable argument types.
+ #
+ my %bits_needed = ('x' => 10,
+ 'y' => 10,
+ 'Q' => 10,
+ 'l' => 10,
+ 'S' => 16,
+ 'd' => 16,
+ 't' => 16);
+ if ($wordsize == 64) {
+ $bits_needed{'I'} = 32;
+ }
#
- # Count the number of packable arguments. If we encounter any 's' or 'd'
- # arguments, packing is not possible.
+ # Count the number of packable arguments.
#
- my $packable_types = "xytQ";
foreach my $arg (@args) {
- if ($arg =~ /^[$packable_types]/) {
+ if (defined $bits_needed{$arg}) {
$packable_args++;
- push @is_packable, 1;
- } elsif ($arg =~ /^I/ and $wordsize == 64 and $packable_args < 2) {
- $wide_packing = 1;
- push @is_packable, 1;
- if (++$packable_args == 2) {
- # We can only pack two arguments. Turn off packing
- # for the rest of the arguments.
- $packable_types = "\xFF";
- }
- } elsif ($arg =~ /^[sd]/) {
- return ('', '', @args);
- } elsif ($arg =~ /^[scq]/ and $packable_args > 0) {
- # When packing, this operand will be picked up from the
- # code array, put onto the packing stack, and later put
- # back into a different location in the code. The problem
- # is that if this operand is a literal, the original
- # location in the code would have been remembered in a
- # literal patch. For packing to work, we would have to
- # adjust the position in the literal patch. For the
- # moment, adding additional instructions to the packing
- # engine to handle this does not seem worth it, so we will
- # just turn off packing.
- return ('', '', @args);
+ push @bits_needed, $bits_needed{$arg};
} else {
- push @is_packable, 0;
+ push @bits_needed, 0;
}
}
#
- # Get out of here if too few or too many arguments.
+ # Nothing to pack unless there are at least 2 packable arguments.
#
return ('', '', @args) if $packable_args < 2;
- my($size) = 0;
- my($pack_prefix) = '';
- my($down) = ''; # Pack commands (towards instruction
+ #
+ # Determine how many arguments we should pack into each word.
+ #
+ my @args_per_word;
+ my @need_wide_mask;
+ my $bits = 0;
+ my $word = 0;
+ $args_per_word[0] = 0;
+ $need_wide_mask[0] = 0;
+ for (my $i = 0; $i < @args; $i++) {
+ if ($bits_needed[$i]) {
+ my $needed = $bits_needed[$i];
+
+ my $next_word = sub {
+ $word++;
+ $args_per_word[$word] = 0;
+ $need_wide_mask[$word] = 0;
+ $bits = 0;
+ };
+
+ if ($bits+$needed > $wordsize) { # Does not fit.
+ $next_word->();
+ }
+ if ($args_per_word[$word] == 4) { # Can't handle more than 4 args.
+ $next_word->();
+ }
+ if ($needed == 32 and $args_per_word[$word] > 1) {
+ # Must only pack two arguments in this word, and there
+ # are already at least two arguments here.
+ $next_word->();
+ }
+ $args_per_word[$word]++;
+ $bits += $needed;
+ if ($needed == 32) {
+ $need_wide_mask[$word]++;
+ }
+ if ($need_wide_mask[$word] and $bits > 32) {
+ # Can only pack two things in a word where one
+ # item is 32 bits. Force the next item into
+ # the next word.
+ $bits = $wordsize;
+ }
+ }
+ }
+
+ #
+ # Try to balance packing between words.
+ #
+ if ($args_per_word[$#args_per_word] == 1) {
+ if ($args_per_word[$#args_per_word-1] < 3) {
+ pop @args_per_word;
+ } else {
+ $args_per_word[$#args_per_word-1]--;
+ $args_per_word[$#args_per_word]++;
+ }
+ } elsif (@args_per_word == 2 and
+ $args_per_word[0] == 4 and
+ $args_per_word[1] == 2) {
+ $args_per_word[0] = 3;
+ $args_per_word[1] = 3;
+ } elsif (@args_per_word == 2 and
+ $args_per_word[0] == 3 and
+ $args_per_word[1] == 1) {
+ $args_per_word[0] = 2;
+ $args_per_word[1] = 2;
+ }
+
+ my $size = 0;
+ my $pack_prefix = '';
+ my $down = ''; # Pack commands (towards instruction
# beginning).
- my($up) = ''; # Pack commands (storing back while
+ my $up = ''; # Pack commands (storing back while
# moving forward).
+ my $did_some_packing = 0; # Nothing packed yet.
- my $args_per_word = $args_per_word[$packable_args];
- my @shift;
- my @mask;
- my @instr;
+ # Skip an unpackable argument.
+ my $skip_unpackable = sub {
+ my($arg) = @_;
- if ($wide_packing) {
- @shift = ('0', 'BEAM_WIDE_SHIFT');
- @mask = ('BEAM_WIDE_MASK', $WHOLE_WORD);
- @instr = ('w', 'i');
- } else {
- @shift = @{$pack_shift[$args_per_word]};
- @mask = @{$pack_mask[$args_per_word]};
- @instr = @{$pack_instr[$args_per_word]};
- }
+ if ($arg_size{$arg} and $did_some_packing) {
+ # Save the argument on the pack engine's stack.
+ $down = "g${down}";
+ $up = "${up}p";
+ } else {
+ # The argument has either zero size (e.g. r(0)),
+ # or is to the left of the first packed argument
+ # and will never be accessed. No need to do
+ # anything.
+ }
+ };
#
# Now generate the packing instructions. One complication is that
# the packing engine works from right-to-left, but we must generate
# the instructions from left-to-right because we must calculate
# instruction sizes from left-to-right.
- #
- # XXX Packing 3 't's in one word won't work. Sorry.
- my $did_some_packing = 0; # Nothing packed yet.
- my($ap) = 0; # Argument number within word.
- my($tmpnum) = 1; # Number of temporary variable.
- my($expr) = '';
- for (my $i = 0; $i < @args; $i++) {
- my($reg) = $args[$i];
- my($this_size) = $arg_size{$reg};
- if ($is_packable[$i]) {
- $this_size = 0;
- $did_some_packing = 1;
-
- if ($ap == 0) {
- $pack_prefix .= "Eterm tmp_packed$tmpnum = Arg($size);\n";
- $up .= "p";
- $down = "P$down";
- $this_size = 1;
- }
+ my $arg_num = 0;
+ for (my $word = 0; $word < @args_per_word; $word++) {
+ my $ap = 0; # Argument number within word.
+ my $packed_var = "tmp_packed" . ($word+1);
+ my $args_per_word = $args_per_word[$word];
+ my @shift;
+ my @mask;
+ my @instr;
+
+ if ($need_wide_mask[$word]) {
+ @shift = ('0', 'BEAM_WIDE_SHIFT');
+ @mask = ('BEAM_WIDE_MASK', $WHOLE_WORD);
+ @instr = ('w', 'i');
+ } else {
+ @shift = @{$pack_shift[$args_per_word]};
+ @mask = @{$pack_mask[$args_per_word]};
+ @instr = @{$pack_instr[$args_per_word]};
+ }
- $down = "$instr[$ap]$down";
- my($unpack) = make_unpack($tmpnum, $shift[$ap], $mask[$ap]);
- $args[$i] = "pack:$this_size:$reg" . "b($unpack)";
+ while ($ap < $args_per_word) {
+ my $reg = $args[$arg_num];
+ my $this_size = $arg_size{$reg};
+ if ($bits_needed[$arg_num]) {
+ $this_size = 0;
+ $did_some_packing = 1;
+
+ if ($ap == 0) {
+ $pack_prefix .= "Eterm $packed_var = " .
+ arg_offset($size) . ";\n";
+ $up .= "p";
+ $down = "P$down";
+ $this_size = 1;
+ }
- if (++$ap == $args_per_word) {
- $ap = 0;
- $tmpnum++;
- }
- } elsif ($arg_size{$reg} && $did_some_packing) {
- #
- # This is an argument that can't be packed. Normally, we must
- # save it on the pack engine's stack, unless:
- #
- # 1. The argument has zero size (e.g. r(0)). Such arguments
- # will not be loaded. They disappear.
- # 2. If the argument is on the left of the first packed argument,
- # the packing engine will never access it (because the engine
- # operates from right-to-left).
- #
+ $down = "$instr[$ap]$down";
+ my $unpack = make_unpack($packed_var, $shift[$ap], $mask[$ap]);
+ $args[$arg_num] = "packed:$reg:$this_size:$reg" . "b($unpack)";
- $down = "g${down}";
- $up = "${up}p";
- }
- $size += $this_size;
+ $ap++;
+ } else {
+ $skip_unpackable->($reg);
+ }
+ $size += $this_size;
+ $arg_num++;
+ }
+ }
+
+ #
+ # Skip any unpackable arguments at the end.
+ #
+ while ($arg_num < @args) {
+ $skip_unpackable->($args[$arg_num]);
+ $arg_num++;
}
my $pack_spec = $down . $up;
@@ -1581,9 +1633,9 @@ sub do_pack {
}
sub make_unpack {
- my($tmpnum, $shift, $mask) = @_;
+ my($packed_var, $shift, $mask) = @_;
- my($e) = "tmp_packed$tmpnum";
+ my $e = $packed_var;
$e = "($e>>$shift)" if $shift;
$e .= "&$mask" unless $mask eq $WHOLE_WORD;
$e;