aboutsummaryrefslogtreecommitdiffstats
path: root/erts/emulator
diff options
context:
space:
mode:
Diffstat (limited to 'erts/emulator')
-rw-r--r--erts/emulator/beam/beam_bp.c2
-rw-r--r--erts/emulator/beam/beam_emu.c21
-rw-r--r--erts/emulator/beam/beam_load.c14
-rw-r--r--erts/emulator/beam/erl_nif.c53
-rw-r--r--erts/emulator/beam/erl_process.c2
-rw-r--r--erts/emulator/beam/macros.tab9
-rw-r--r--erts/emulator/beam/map_instrs.tab8
-rw-r--r--erts/emulator/beam/ops.tab2
-rw-r--r--erts/emulator/hipe/hipe_amd64_bifs.m436
-rw-r--r--erts/emulator/hipe/hipe_bif0.tab1
-rw-r--r--erts/emulator/hipe/hipe_bif_list.m45
-rw-r--r--erts/emulator/hipe/hipe_debug.c8
-rw-r--r--erts/emulator/hipe/hipe_native_bif.c9
-rw-r--r--erts/emulator/hipe/hipe_native_bif.h2
-rw-r--r--erts/emulator/hipe/hipe_primops.h1
-rw-r--r--erts/emulator/internal_doc/beam_makeops.md1846
-rw-r--r--erts/emulator/nifs/common/zlib_nif.c30
-rw-r--r--erts/emulator/test/distribution_SUITE.erl64
-rw-r--r--erts/emulator/test/process_SUITE.erl13
-rwxr-xr-xerts/emulator/utils/beam_makeops174
20 files changed, 2122 insertions, 178 deletions
diff --git a/erts/emulator/beam/beam_bp.c b/erts/emulator/beam/beam_bp.c
index fe1e15701b..0832b3f374 100644
--- a/erts/emulator/beam/beam_bp.c
+++ b/erts/emulator/beam/beam_bp.c
@@ -998,7 +998,9 @@ do_call_trace(Process* c_p, ErtsCodeInfo* info, Eterm* reg,
fixup_cp_before_trace(c_p, &return_to_trace);
+ ERTS_UNREQ_PROC_MAIN_LOCK(c_p);
flags = erts_call_trace(c_p, info, ms, reg, local, &tracer);
+ ERTS_REQ_PROC_MAIN_LOCK(c_p);
/* restore cp after potential fixup */
c_p->cp = cp_save;
diff --git a/erts/emulator/beam/beam_emu.c b/erts/emulator/beam/beam_emu.c
index 60d0008d8f..bc95ccec52 100644
--- a/erts/emulator/beam/beam_emu.c
+++ b/erts/emulator/beam/beam_emu.c
@@ -400,12 +400,13 @@ static BeamInstr* apply_fun(Process* p, Eterm fun,
Eterm args, Eterm* reg) NOINLINE;
static Eterm new_fun(Process* p, Eterm* reg,
ErlFunEntry* fe, int num_free) NOINLINE;
-static Eterm new_map(Process* p, Eterm* reg, Uint live, Uint n, BeamInstr* ptr) NOINLINE;
-static Eterm new_small_map_lit(Process* p, Eterm* reg, Eterm keys_literal,
+static Eterm erts_gc_new_map(Process* p, Eterm* reg, Uint live,
+ Uint n, BeamInstr* ptr) NOINLINE;
+static Eterm erts_gc_new_small_map_lit(Process* p, Eterm* reg, Eterm keys_literal,
Uint live, BeamInstr* ptr) NOINLINE;
-static Eterm update_map_assoc(Process* p, Eterm* reg, Uint live,
+static Eterm erts_gc_update_map_assoc(Process* p, Eterm* reg, Uint live,
Uint n, BeamInstr* new_p) NOINLINE;
-static Eterm update_map_exact(Process* p, Eterm* reg, Uint live,
+static Eterm erts_gc_update_map_exact(Process* p, Eterm* reg, Uint live,
Uint n, Eterm* new_p) NOINLINE;
static Eterm get_map_element(Eterm map, Eterm key);
static Eterm get_map_element_hash(Eterm map, Eterm key, Uint32 hx);
@@ -2755,7 +2756,7 @@ do { \
static Eterm
-new_map(Process* p, Eterm* reg, Uint live, Uint n, BeamInstr* ptr)
+erts_gc_new_map(Process* p, Eterm* reg, Uint live, Uint n, BeamInstr* ptr)
{
Uint i;
Uint need = n + 1 /* hdr */ + 1 /*size*/ + 1 /* ptr */ + 1 /* arity */;
@@ -2812,7 +2813,8 @@ new_map(Process* p, Eterm* reg, Uint live, Uint n, BeamInstr* ptr)
}
static Eterm
-new_small_map_lit(Process* p, Eterm* reg, Eterm keys_literal, Uint live, BeamInstr* ptr)
+erts_gc_new_small_map_lit(Process* p, Eterm* reg, Eterm keys_literal,
+ Uint live, BeamInstr* ptr)
{
Eterm* keys = tuple_val(keys_literal);
Uint n = arityval(*keys);
@@ -2846,7 +2848,8 @@ new_small_map_lit(Process* p, Eterm* reg, Eterm keys_literal, Uint live, BeamIns
}
static Eterm
-update_map_assoc(Process* p, Eterm* reg, Uint live, Uint n, BeamInstr* new_p)
+erts_gc_update_map_assoc(Process* p, Eterm* reg, Uint live,
+ Uint n, BeamInstr* new_p)
{
Uint num_old;
Uint num_updates;
@@ -2892,7 +2895,7 @@ update_map_assoc(Process* p, Eterm* reg, Uint live, Uint n, BeamInstr* new_p)
*/
if (num_old == 0) {
- return new_map(p, reg, live, n, new_p);
+ return erts_gc_new_map(p, reg, live, n, new_p);
}
/*
@@ -3048,7 +3051,7 @@ update_map_assoc(Process* p, Eterm* reg, Uint live, Uint n, BeamInstr* new_p)
*/
static Eterm
-update_map_exact(Process* p, Eterm* reg, Uint live, Uint n, Eterm* new_p)
+erts_gc_update_map_exact(Process* p, Eterm* reg, Uint live, Uint n, Eterm* new_p)
{
Uint i;
Uint num_old;
diff --git a/erts/emulator/beam/beam_load.c b/erts/emulator/beam/beam_load.c
index 7331c331a6..4fcfea527c 100644
--- a/erts/emulator/beam/beam_load.c
+++ b/erts/emulator/beam/beam_load.c
@@ -2384,8 +2384,18 @@ load_code(LoaderState* stp)
code[ci++] = NIL;
break;
case TAG_q:
- new_literal_patch(stp, ci);
- code[ci++] = tmp_op->a[arg].val;
+ {
+ BeamInstr val = tmp_op->a[arg].val;
+ Eterm term = stp->literals[val].term;
+ new_literal_patch(stp, ci);
+ code[ci++] = val;
+ switch (loader_tag(term)) {
+ case LOADER_X_REG:
+ case LOADER_Y_REG:
+ LoadError1(stp, "the term '%T' would be confused "
+ "with a register", term);
+ }
+ }
break;
default:
LoadError1(stp, "bad tag %d for general source",
diff --git a/erts/emulator/beam/erl_nif.c b/erts/emulator/beam/erl_nif.c
index f7f12efe28..c21b139cfa 100644
--- a/erts/emulator/beam/erl_nif.c
+++ b/erts/emulator/beam/erl_nif.c
@@ -3323,36 +3323,38 @@ static int examine_iovec_term(Eterm list, UWord max_length, iovec_slice_t *resul
size = binary_size(binary);
binary_header = binary_val(binary);
- /* If we're a sub-binary we'll need to check our underlying binary to
- * determine whether we're on-heap or not. */
- if(thing_subtag(*binary_header) == SUB_BINARY_SUBTAG) {
- ErlSubBin *sb = (ErlSubBin*)binary_header;
-
- /* Reject bitstrings */
- if((sb->bitoffs + sb->bitsize) > 0) {
- return 0;
+ if (size > 0) {
+ /* If we're a sub-binary we'll need to check our underlying binary
+ * to determine whether we're on-heap or not. */
+ if (thing_subtag(*binary_header) == SUB_BINARY_SUBTAG) {
+ ErlSubBin *sb = (ErlSubBin*)binary_header;
+
+ /* Reject bitstrings */
+ if((sb->bitoffs + sb->bitsize) > 0) {
+ return 0;
+ }
+
+ ASSERT(size <= binary_size(sb->orig));
+ binary_header = binary_val(sb->orig);
}
- ASSERT(size <= binary_size(sb->orig));
- binary_header = binary_val(sb->orig);
- }
-
- if(thing_subtag(*binary_header) == HEAP_BINARY_SUBTAG) {
- ASSERT(size <= ERL_ONHEAP_BIN_LIMIT);
+ if (thing_subtag(*binary_header) == HEAP_BINARY_SUBTAG) {
+ ASSERT(size <= ERL_ONHEAP_BIN_LIMIT);
- result->iovec_len += 1;
- result->onheap_size += size;
- } else {
- ASSERT(thing_subtag(*binary_header) == REFC_BINARY_SUBTAG);
+ result->iovec_len += 1;
+ result->onheap_size += size;
+ } else {
+ ASSERT(thing_subtag(*binary_header) == REFC_BINARY_SUBTAG);
- result->iovec_len += 1 + size / MAX_SYSIOVEC_IOVLEN;
- result->offheap_size += size;
+ result->iovec_len += 1 + size / MAX_SYSIOVEC_IOVLEN;
+ result->offheap_size += size;
+ }
}
result->sublist_length += 1;
lookahead = CDR(cell);
- if(result->sublist_length >= max_length) {
+ if (result->sublist_length >= max_length) {
break;
}
}
@@ -3385,6 +3387,10 @@ static void inspect_raw_binary_data(Eterm binary, ErlNifBinary *result) {
if (thing_subtag(*parent_header) == REFC_BINARY_SUBTAG) {
ProcBin *pb = (ProcBin*)parent_header;
+ if (pb->flags & (PB_IS_WRITABLE | PB_ACTIVE_WRITER)) {
+ erts_emasculate_writable_binary(pb);
+ }
+
ASSERT(pb->val != NULL);
ASSERT(byte_offset < pb->size);
ASSERT(&pb->bytes[byte_offset] >= (byte*)(pb->val)->orig_bytes);
@@ -3428,7 +3434,7 @@ static int fill_iovec_with_slice(ErlNifEnv *env,
/* If this isn't a refc binary, copy its contents to the onheap buffer
* and reference that instead. */
- if (raw_data.ref_bin == NULL) {
+ if (raw_data.size > 0 && raw_data.ref_bin == NULL) {
ASSERT(onheap_offset < onheap_data.size);
ASSERT(slice->onheap_size > 0);
@@ -3439,12 +3445,11 @@ static int fill_iovec_with_slice(ErlNifEnv *env,
raw_data.ref_bin = onheap_data.ref_bin;
}
- ASSERT(raw_data.ref_bin != NULL);
-
while (raw_data.size > 0) {
UWord chunk_len = MIN(raw_data.size, MAX_SYSIOVEC_IOVLEN);
ASSERT(iovec_idx < iovec->iovcnt);
+ ASSERT(raw_data.ref_bin != NULL);
iovec->iov[iovec_idx].iov_base = raw_data.data;
iovec->iov[iovec_idx].iov_len = chunk_len;
diff --git a/erts/emulator/beam/erl_process.c b/erts/emulator/beam/erl_process.c
index 6654468fb6..3c0a126fe2 100644
--- a/erts/emulator/beam/erl_process.c
+++ b/erts/emulator/beam/erl_process.c
@@ -10824,7 +10824,7 @@ request_system_task(Process *c_p, Eterm requester, Eterm target,
goto badarg;
req_type = tp[1];
req_id = tp[2];
- req_id_sz = is_immed(req_id) ? req_id : size_object(req_id);
+ req_id_sz = is_immed(req_id) ? 0 : size_object(req_id);
tot_sz = req_id_sz;
for (i = 0; i < ERTS_MAX_PROC_SYS_TASK_ARGS; i++) {
int tix = 3 + i;
diff --git a/erts/emulator/beam/macros.tab b/erts/emulator/beam/macros.tab
index e0b5f56b53..494fe8961e 100644
--- a/erts/emulator/beam/macros.tab
+++ b/erts/emulator/beam/macros.tab
@@ -20,13 +20,12 @@
//
//
-// Use if there is a garbage collection before storing to a
-// general destination (either X or Y register).
+// Define a regular expression that will match instructions that
+// perform GC. That will allow beam_makeops to check for instructions
+// that don't use $REFRESH_GEN_DEST() when they should.
//
-REFRESH_GEN_DEST() {
- dst_ptr = REG_TARGET_PTR(dst);
-}
+GC_REGEXP=erts_garbage_collect|erts_gc|GcBifFunction;
// $Offset is relative to the start of the instruction (not to the
// location of the failure label reference). Since combined
diff --git a/erts/emulator/beam/map_instrs.tab b/erts/emulator/beam/map_instrs.tab
index bbb2f49b66..c594a87298 100644
--- a/erts/emulator/beam/map_instrs.tab
+++ b/erts/emulator/beam/map_instrs.tab
@@ -31,7 +31,7 @@ new_map(Dst, Live, N) {
Eterm res;
HEAVY_SWAPOUT;
- res = new_map(c_p, reg, $Live, $N, $NEXT_INSTRUCTION);
+ res = erts_gc_new_map(c_p, reg, $Live, $N, $NEXT_INSTRUCTION);
HEAVY_SWAPIN;
$REFRESH_GEN_DEST();
$Dst = res;
@@ -44,7 +44,7 @@ i_new_small_map_lit(Dst, Live, Keys) {
Eterm keys = $Keys;
HEAVY_SWAPOUT;
- res = new_small_map_lit(c_p, reg, keys, $Live, $NEXT_INSTRUCTION);
+ res = erts_gc_new_small_map_lit(c_p, reg, keys, $Live, $NEXT_INSTRUCTION);
HEAVY_SWAPIN;
$REFRESH_GEN_DEST();
$Dst = res;
@@ -133,7 +133,7 @@ update_map_assoc(Src, Dst, Live, N) {
reg[live] = $Src;
HEAVY_SWAPOUT;
- res = update_map_assoc(c_p, reg, live, $N, $NEXT_INSTRUCTION);
+ res = erts_gc_update_map_assoc(c_p, reg, live, $N, $NEXT_INSTRUCTION);
HEAVY_SWAPIN;
ASSERT(is_value(res));
$REFRESH_GEN_DEST();
@@ -147,7 +147,7 @@ update_map_exact(Fail, Src, Dst, Live, N) {
reg[live] = $Src;
HEAVY_SWAPOUT;
- res = update_map_exact(c_p, reg, live, $N, $NEXT_INSTRUCTION);
+ res = erts_gc_update_map_exact(c_p, reg, live, $N, $NEXT_INSTRUCTION);
HEAVY_SWAPIN;
if (is_value(res)) {
$REFRESH_GEN_DEST();
diff --git a/erts/emulator/beam/ops.tab b/erts/emulator/beam/ops.tab
index 7a2c39b3a8..a560bde920 100644
--- a/erts/emulator/beam/ops.tab
+++ b/erts/emulator/beam/ops.tab
@@ -511,8 +511,6 @@ put_list y x x
put_list y y x
put_list x y x
-put_list y x x
-
# put_list SrcReg Constant Dst
put_list x c x
diff --git a/erts/emulator/hipe/hipe_amd64_bifs.m4 b/erts/emulator/hipe/hipe_amd64_bifs.m4
index 6d998c4b55..cf4c59c9af 100644
--- a/erts/emulator/hipe/hipe_amd64_bifs.m4
+++ b/erts/emulator/hipe/hipe_amd64_bifs.m4
@@ -462,42 +462,6 @@ ASYM($1):
TYPE_FUNCTION(ASYM($1))
#endif')
-/*
- * nogc_bif_interface_1(nbif_name, cbif_name)
- *
- * Generate native interface for a bif with implicit P
- * The bif can fail but cannot do GC.
- */
-
-define(nogc_bif_interface_1,
-`
-#ifndef HAVE_$1
-#`define' HAVE_$1
- TEXT
- .align 4
- GLOBAL(ASYM($1))
-ASYM($1):
- /* set up the parameters */
- movq P, %rdi
- NBIF_ARG(%rsi,1,0)
-
- /* make the call on the C stack */
- SWITCH_ERLANG_TO_C
- pushq %rsi
- movq %rsp, %rsi /* Eterm* BIF__ARGS */
- sub $(8), %rsp /* stack frame 16-byte alignment */
- CALL_BIF($2)
- add $(1*8 + 8), %rsp
- SWITCH_C_TO_ERLANG
-
- /* throw exception if failure, otherwise return */
- TEST_GOT_EXN
- jz nbif_1_simple_exception
- NBIF_RET(1)
- SET_SIZE(ASYM($1))
- TYPE_FUNCTION(ASYM($1))
-#endif')
-
/*
* noproc_primop_interface_0(nbif_name, cbif_name)
diff --git a/erts/emulator/hipe/hipe_bif0.tab b/erts/emulator/hipe/hipe_bif0.tab
index 4f73770d24..0380e8c795 100644
--- a/erts/emulator/hipe/hipe_bif0.tab
+++ b/erts/emulator/hipe/hipe_bif0.tab
@@ -135,7 +135,6 @@ atom bs_utf16_size
atom bs_put_utf16be
atom bs_put_utf16le
atom bs_get_utf16
-atom bs_validate_unicode
atom bs_validate_unicode_retract
atom emulate_fpe
atom emasculate_binary
diff --git a/erts/emulator/hipe/hipe_bif_list.m4 b/erts/emulator/hipe/hipe_bif_list.m4
index 23b6709cd0..625d8486fd 100644
--- a/erts/emulator/hipe/hipe_bif_list.m4
+++ b/erts/emulator/hipe/hipe_bif_list.m4
@@ -248,11 +248,6 @@ nofail_primop_interface_3(nbif_bs_get_float_2, erts_bs_get_float_2)
standard_bif_interface_3(nbif_bs_put_utf8, hipe_bs_put_utf8)
standard_bif_interface_3(nbif_bs_put_utf16be, hipe_bs_put_utf16be)
standard_bif_interface_3(nbif_bs_put_utf16le, hipe_bs_put_utf16le)
-ifdef(`nogc_bif_interface_1',`
-nogc_bif_interface_1(nbif_bs_validate_unicode, hipe_bs_validate_unicode)
-',`
-standard_bif_interface_1(nbif_bs_validate_unicode, hipe_bs_validate_unicode)
-')
/*
* Bit-syntax primops without any P parameter.
diff --git a/erts/emulator/hipe/hipe_debug.c b/erts/emulator/hipe/hipe_debug.c
index 222a11db3d..cfe60b379e 100644
--- a/erts/emulator/hipe/hipe_debug.c
+++ b/erts/emulator/hipe/hipe_debug.c
@@ -135,7 +135,9 @@ static void print_heap(Eterm *pos, Eterm *end)
printf("From: 0x%0*lx to 0x%0*lx\n\r",
2*(int)sizeof(long), (unsigned long)pos,
2*(int)sizeof(long), (unsigned long)end);
- printf(" | H E A P |\r\n");
+ printf(" | %*s H E A P %*s |\r\n",
+ 2*(int)sizeof(long)-1, "",
+ 2*(int)sizeof(long)-1, "");
printf(" | %*s | %*s |\r\n",
2+2*(int)sizeof(long), "Address",
2+2*(int)sizeof(long), "Contents");
@@ -158,8 +160,10 @@ static void print_heap(Eterm *pos, Eterm *end)
++pos;
--ari;
}
- } else
+ } else {
+ fflush(stdout);
erts_printf("%.30T", val);
+ }
printf("\r\n");
}
printf(" |%s|%s|\r\n", dashes, dashes);
diff --git a/erts/emulator/hipe/hipe_native_bif.c b/erts/emulator/hipe/hipe_native_bif.c
index e444e3dc5d..99c34532b9 100644
--- a/erts/emulator/hipe/hipe_native_bif.c
+++ b/erts/emulator/hipe/hipe_native_bif.c
@@ -482,15 +482,6 @@ static int validate_unicode(Eterm arg)
return 1;
}
-BIF_RETTYPE nbif_impl_hipe_bs_validate_unicode(NBIF_ALIST_1)
-{
- Process *p = BIF_P;
- Eterm arg = BIF_ARG_1;
- if (!validate_unicode(arg))
- BIF_ERROR(p, BADARG);
- return NIL;
-}
-
Uint hipe_is_unicode(Eterm arg)
{
return (Uint) validate_unicode(arg);
diff --git a/erts/emulator/hipe/hipe_native_bif.h b/erts/emulator/hipe/hipe_native_bif.h
index 71f63875a4..d5081b8438 100644
--- a/erts/emulator/hipe/hipe_native_bif.h
+++ b/erts/emulator/hipe/hipe_native_bif.h
@@ -66,7 +66,6 @@ AEXTERN(Eterm,nbif_bs_utf16_size,(Eterm));
AEXTERN(Eterm,nbif_bs_put_utf16be,(Process*,Eterm,byte*,unsigned int));
AEXTERN(Eterm,nbif_bs_put_utf16le,(Process*,Eterm,byte*,unsigned int));
AEXTERN(Eterm,nbif_bs_get_utf16,(void));
-AEXTERN(Eterm,nbif_bs_validate_unicode,(Process*,Eterm));
AEXTERN(Uint,nbif_is_unicode,(Eterm));
AEXTERN(Eterm,nbif_bs_validate_unicode_retract,(void));
AEXTERN(Uint,nbif_is_divisible,(Uint,Uint));
@@ -92,7 +91,6 @@ BIF_RETTYPE nbif_impl_hipe_bs_put_utf8(NBIF_ALIST_3);
Eterm hipe_bs_utf16_size(Eterm);
BIF_RETTYPE nbif_impl_hipe_bs_put_utf16be(NBIF_ALIST_3);
BIF_RETTYPE nbif_impl_hipe_bs_put_utf16le(NBIF_ALIST_3);
-BIF_RETTYPE nbif_impl_hipe_bs_validate_unicode(NBIF_ALIST_1);
Uint hipe_is_unicode(Eterm);
struct erl_bin_match_buffer;
int hipe_bs_validate_unicode_retract(struct erl_bin_match_buffer*, Eterm);
diff --git a/erts/emulator/hipe/hipe_primops.h b/erts/emulator/hipe/hipe_primops.h
index 49e6bdb026..a6abd3e011 100644
--- a/erts/emulator/hipe/hipe_primops.h
+++ b/erts/emulator/hipe/hipe_primops.h
@@ -63,7 +63,6 @@ PRIMOP_LIST(am_bs_utf16_size, &nbif_bs_utf16_size)
PRIMOP_LIST(am_bs_put_utf16be, &nbif_bs_put_utf16be)
PRIMOP_LIST(am_bs_put_utf16le, &nbif_bs_put_utf16le)
PRIMOP_LIST(am_bs_get_utf16, &nbif_bs_get_utf16)
-PRIMOP_LIST(am_bs_validate_unicode, &nbif_bs_validate_unicode)
PRIMOP_LIST(am_is_unicode, &nbif_is_unicode)
PRIMOP_LIST(am_bs_validate_unicode_retract, &nbif_bs_validate_unicode_retract)
diff --git a/erts/emulator/internal_doc/beam_makeops.md b/erts/emulator/internal_doc/beam_makeops.md
new file mode 100644
index 0000000000..1da8d2ab05
--- /dev/null
+++ b/erts/emulator/internal_doc/beam_makeops.md
@@ -0,0 +1,1846 @@
+The beam\_makeops script
+=======================
+
+This document describes the **beam\_makeops** script.
+
+Introduction
+------------
+
+The **beam\_makeops** Perl script is used at build-time by both the
+compiler and runtime system. Given a number of input files (all with
+the extension `.tab`), it will generate source files used by the
+Erlang compiler and by the runtime system to load and execute BEAM
+instructions.
+
+Essentially those `.tab` files define:
+
+* External generic BEAM instructions. They are the instructions that
+are known to both the compiler and the runtime system. Generic
+instructions are stable between releases. New generic instructions
+with high numbers than previous instructions can be added in major
+releases. The OTP 20 release has 159 external generic instructions.
+
+* Internal generic instructions. They are known only to the runtime
+system and can be changed at any time without compatibility issues.
+They are created by transformation rules (described next).
+
+* Rules for transforming one or more generic instructions to other
+generic instructions. The transformation rules allow combining,
+splitting, and removal of instructions, as well as shuffling operands.
+Because of the transformation rules, the runtime can have many
+internal generic instructions that are only known to runtime system.
+
+* Specific BEAM instructions. The specific instructions are the
+instructions that are actually executed by the runtime system. They
+can be changed at any time without causing compatibility issues.
+The loader translates generic instructions to specific instructions.
+In general, for each generic instruction, there exists a family of
+specific instructions. The OTP 20 release has 389 specific
+instructions.
+
+* The implementation of specific instructions.
+
+Generic instructions have typed operands. Here are a few examples of
+operands for `move/2`:
+
+ {move,{atom,id},{x,5}}.
+ {move,{x,3},{x,0}}.
+ {move,{x,2},{y,1}}.
+
+When those instructions are loaded, the loader rewrites them
+to specific instructions:
+
+ move_cx id 5
+ move_xx 3 0
+ move_xy 2 1
+
+Corresponding to each generic instruction, there is a family of
+specific instructions. The types that an instance of a specific
+instruction can handle are encoded in the instruction names. For
+example, `move_xy` takes an X register number as the first operand and
+a Y register number as the second operand. `move_cx` takes a tagged
+Erlang term as the first operand and an X register number as the
+second operand.
+
+An example: the move instruction
+--------------------------------
+
+Using the `move` instruction as an example, we will give a quick
+tour to show the main features of **beam\_makeops**.
+
+In the `compiler` application, in the file `genop.tab`, there is the
+following line:
+
+ 64: move/2
+
+This is a definition of an external generic BEAM instruction. Most
+importantly it specifices that the opcode is 64. It also defines that
+it has two operands. The BEAM assembler will use the opcode when
+creating `.beam` files. The compiler does not really need the arity,
+but it will use it as an internal sanity check when assembling the
+BEAM code.
+
+Let's have a look at `ops.tab` in `erts/emulator/beam`, where the
+specific `move` instructions are defined. Here are a few of them:
+
+ move x x
+ move x y
+ move c x
+
+Each specific instructions is defined by following the name of the
+instruction with the types for each operand. An operand type is a
+single letter. For example, `x` means an X register, `y`
+means a Y register, and `c` is a "constant" (a tagged term such as
+an integer, an atom, or a literal).
+
+Now let's look at the implementation of the `move` instruction. There
+are multiple files containing implementations of instructions in the
+`erts/emulator/beam` directory. The `move` instruction is defined in
+`instrs.tab`. It looks like this:
+
+ move(Src, Dst) {
+ $Dst = $Src;
+ }
+
+The implementation for an instruction largely follows the C syntax,
+except that the variables in the function head don't have any types.
+The `$` before an identifier denotes a macro expansion. Thus,
+`$Src` will expand to the code to pick up the source operand for
+the instruction and `$Dst` to the code for the destination register.
+
+We will look at the code for each specific instruction in turn. To
+make the code easier to understand, let's first look at the memory
+layout for the instruction `{move,{atom,id},{x,5}}`:
+
+ +--------------------+--------------------+
+ I -> | 40 | &&lb_move_cx |
+ +--------------------+--------------------+
+ | Tagged atom 'id' |
+ +--------------------+--------------------+
+
+This example and all other examples in the document assumes a 64-bit
+archictecture, and furthermore that pointers to C code fit in 32 bits.
+
+`I` in the BEAM virtual machine is the instruction pointer. When BEAM
+executes an instruction, `I` points to the first word of the
+instruction.
+
+`&&lb_move_cx` is the address to C code that implements `move_cx`. It
+is stored in the lower 32 bits of the word. In the upper 32 bits is
+the byte offset to the X register; the register number 5 has been
+multiplied by the word size size 8.
+
+In the next word the tagged atom `id` is stored.
+
+With that background, we can look at the generated code for `move_cx`
+in `beam_hot.h`:
+
+ OpCase(move_cx):
+ {
+ BeamInstr next_pf = BeamCodeAddr(I[2]);
+ xb(BeamExtraData(I[0])) = I[1];
+ I += 2;
+ ASSERT(VALID_INSTR(next_pf));
+ GotoPF(next_pf);
+ }
+
+We will go through each line in turn.
+
+* `OpCase(move_cx):` defines a label for the instruction. The
+`OpCase()` macro is defined in `beam_emu.c`. It will expand this line
+to `lb_move_cx:`.
+
+* `BeamInstr next_pf = BeamCodeAddr(I[2]);` fetches the pointer to
+code for the next instruction to be executed. The `BeamCodeAddr()`
+macro extracts the pointer from the lower 32 bits of the instruction
+word.
+
+* `xb(BeamExtraData(I[0])) = I[1];` is the expansion of `$Dst = $Src`.
+`BeamExtraData()` is a macro that will extract the upper 32 bits from
+the instruction word. In this example, it will return 40 which is the
+byte offset for X register 5. The `xb()` macro will cast a byte
+pointer to an `Eterm` pointer and dereference it. The `I[1]` on
+the right side of the `=` fetches an Erlang term (the atom `id` in
+this case).
+
+* `I += 2` advances the instruction pointer to the next
+instruction.
+
+* In a debug-compiled emulator, `ASSERT(VALID_INSTR(next_pf));` makes
+sure that `next_pf` is a valid instruction (that is, that it points
+within the `process_main()` function in `beam_emu.c`).
+
+* `GotoPF(next_pf);` transfers control to the next instruction.
+
+Now let's look at the implementation of `move_xx`:
+
+ OpCase(move_xx):
+ {
+ Eterm tmp_packed1 = BeamExtraData(I[0]);
+ BeamInstr next_pf = BeamCodeAddr(I[1]);
+ xb((tmp_packed1>>BEAM_TIGHT_SHIFT)) = xb(tmp_packed1&BEAM_TIGHT_MASK);
+ I += 1;
+ ASSERT(VALID_INSTR(next_pf));
+ GotoPF(next_pf);
+ }
+
+We will go through the lines that are new or have changed compared to
+`move_cx`.
+
+* `Eterm tmp_packed1 = BeamExtraData(I[0]);` picks up both X register
+numbers packed into the upper 32 bits of the instruction word.
+
+* `BeamInstr next_pf = BeamCodeAddr(I[1]);` pre-fetches the address of
+the next instruction. Note that because both X registers operands fits
+into the instruction word, the next instruction is in the very next
+word.
+
+* `xb((tmp_packed1>>BEAM_TIGHT_SHIFT)) = xb(tmp_packed1&BEAM_TIGHT_MASK);`
+copies the source to the destination. (For a 64-bit architecture,
+`BEAM_TIGHT_SHIFT` is 16 and `BEAM_TIGHT_MASK` is `0xFFFF`.)
+
+* `I += 1;` advances the instruction pointer to the next instruction.
+
+`move_xy` is almost identical to `move_xx`. The only difference is
+the use of the `yb()` macro instead of `xb()` to reference the
+destination register:
+
+ OpCase(move_xy):
+ {
+ Eterm tmp_packed1 = BeamExtraData(I[0]);
+ BeamInstr next_pf = BeamCodeAddr(I[1]);
+ yb((tmp_packed1>>BEAM_TIGHT_SHIFT)) = xb(tmp_packed1&BEAM_TIGHT_MASK);
+ I += 1;
+ ASSERT(VALID_INSTR(next_pf));
+ GotoPF(next_pf);
+ }
+
+### Transformation rules ###
+
+Next let's look at how we can do some optimizations using transformation
+rules. For simple instructions such as `move/2`, the instruction dispatch
+overhead can be substantial. A simple optimization is to combine common
+instructions sequences to a single instruction. One such common sequence
+is multiple `move` instructions moving X registers to Y registers.
+
+Using the following rule we can combine two `move` instructions
+to a `move2` instruction:
+
+ move X1=x Y1=y | move X2=x Y2=y => move2 X1 Y1 X2 Y2
+
+The left side of the arrow (`=>`) is a pattern. If the pattern
+matches, the matching instructions will be replaced by the
+instructions on the right side. Variables in a pattern must start
+with an uppercase letter just as in Erlang. A pattern variable may be
+followed `=` and one or more type letters to constrain the match to
+one of those types. The variables that are bound on the left side can
+be used on the right side.
+
+We will also need to define a specific instruction and an implementation:
+
+ # In ops.tab
+ move2 x y x y
+
+ // In instrs.tab
+ move2(S1, D1, S2, D2) {
+ Eterm V1, V2;
+ V1 = $S1;
+ V2 = $S2;
+ $D1 = V1;
+ $D2 = V2;
+ }
+
+When the loader has found a match and replaced the matched instructions,
+it will match the new instructions against the transformation rules.
+Because of that, we can define the rule for a `move3/6` instruction
+as follows:
+
+ move2 X1=x Y1=y X2=x Y2=y | move X3=x Y3=y => \
+ move3 X1 Y1 X2 Y2 X3 Y3
+
+(A `\` before a newline can be used to break a long line for readability.)
+
+It would also be possible to define it like this:
+
+ move X1=x Y1=y | move X2=x Y2=y | move X3=x Y3=y => \
+ move3 X1 Y1 X2 Y2 X3 Y3
+
+but in that case it must be defined before the rule for `move2/4`
+because the first matching rule will be applied.
+
+One must be careful not to create infinite loops. For example, if we
+for some reason would want to reverse the operand order for the `move`
+instruction, we must not do like this:
+
+ move Src Dst => move Dst Src
+
+The loader would swap the operands forever. To avoid the loop, we must
+rename the instruction. For example:
+
+ move Src Dst => assign Dst Src
+
+This concludes the quick tour of the features of **beam\_makeops**.
+
+Short overview of instruction loading
+-------------------------------------
+
+To give some background to the rest of this document, here follows a
+quick overview of how instructions are loaded.
+
+* The loader reads and decodes one instruction at a time from the BEAM
+code and creates a generic instruction. Many transformation rules
+must look at multiple instructions, so the loader will
+keep multiple generic instructions in a linked list.
+
+* The loader tries to apply transformation rules against the
+generic instructions in the linked list. If a rule matches, the
+matched instructions will be removed and replaced with new
+generic instructions constructed from the right side of the
+transformation.
+
+* If a transformation rule matched, the loader applies the
+transformation rules again.
+
+* If no transformation rule match, the loader will begin rewriting
+the first of generic instructions to a specific instruction.
+
+* First the loader will search for a specific operation where the
+types for all operands match the type for the generic instruction.
+The first matching instruction will be selected. **beam\_makeops**
+has ordered the specific instructions so that instructions with more
+specific operands comes before instructions with less specific
+operands. For example, `move_nx` is more specific than `move_cx`. If
+the first operand is `[]` (NIL), `move_nx` will be selected.
+
+* Given the opcode for the selected specific instruction, the loader
+looks up the pointer to the C code for the instruction and stores
+in the code area for the module being loaded.
+
+* The loader translates each operand to a machine word and stores it
+in the code area. The operand type for the selected specific
+instruction guides the translation. For example, if the type is `e`,
+the value of the operand is an index into an arry of external
+functions and will be translated to a pointer to the export entry for
+the function to call. If the type is `x`, the number of the X
+register will be multiplied by the word size to produce a byte offset.
+
+* The loader runs the packing engine to pack multiple operands into a
+single word. The packing engine is controlled by a small program,
+which is a string where each character is an instruction. For
+example, the code to pack the operands for `move_xy` is `"22#"` (on a
+64-bit machine). That program will pack the byte offsets for both
+registers into the same word as the pointer to C code.
+
+Running beam_makeops
+--------------------
+
+**beam\_makeops** is found in `$ERL_TOP/erts/emulator/utils`. Options
+start with a hyphen (`-`). The options are followed by the name of
+the input files. By convention, all input files have the extension
+`.tab`, but is not enforced by **beam\_makeops**.
+
+### The -outdir option ###
+
+The option `-outdir Directory` specifies the output directory for
+the generated files. Default is the current working directory.
+
+### Running beam_makeops for the compiler ###
+
+Give the option `-compiler` to produce output files for the compiler.
+The following files will be written to the output directory:
+
+* `beam_opcodes.erl` - Used primarily by `beam_asm` and `beam_diasm`.
+
+* `beam_opcode.hrl` - Used by `beam_asm`. It contains tag definitions
+used for encoding instruction operands.
+
+The input file should only contain the definition of BEAM_FORMAT_NUMBER
+and external generic instructions. (Everything else would be ignored.)
+
+### Running beam_makeops for the emulator ###
+
+Give the option `-emulator` to produce output files for the emulator.
+The following output files will be generated in the output directory.
+
+* `beam_hot.h`, `beam_warm.h`, `beam_cold.`h - Implementation of
+instructions. Included inside the `process_main()` function in
+`beam_emu.c`.
+
+* `beam_opcodes.c` - Defines static data used by the loader
+(`beam_load.c`). Data about generic instructions, specific
+instructions (including how to pack their operands), and
+transformation rules are all part of this file.
+
+* `beam_opcodes.h` - Miscellanous preprocessor definitions, mainly
+used by `beam_load.c` but also by `beam_{hot,warm,cold}.h`.
+
+* `beam_pred_funcs.h` - Included by `beam_load.c`. Contains defines
+needed to call guard constraints in transformation rules.
+
+* `beam_tr_funcs.h` - Included by `beam_load.c`. Contains defines
+needed to call a C function to the right of a transformation rule.
+
+The following options can be given:
+
+* `wordsize 32|64` - Defines the word size. Default is 32.
+
+* `code-model Model` - The code model as given to `-mcmodel` option
+for GCC. Default is `unknown`. If the code model is `small` (and
+the word size is 64 bits), **beam\_makeops** will pack operands
+into the upper 32 bits of the instruction word.
+
+* `DSymbol=0|1` - Defines the value for a symbol. The symbol can be
+used in `%if` and `%unless` directives.
+
+Syntax of .tab files
+--------------------
+
+### Comments ###
+
+Any line starting with `#` is a comment and is ignored.
+
+A line with `//` is also a comment. It is recommended to only
+use this style of comments in files that define implementations of
+instructions.
+
+A long line can be broken into shorter lines by a placing a`\` before
+the newline.
+
+### Variable definitions ###
+
+A variable definition binds a variable to a Perl variable. It is only
+meaningful to add a new definition if **beam\_makeops** is updated
+at the same time to use the variable. A variable definition looks this:
+
+*name*=*value*[;]
+
+where *name* is the name of a Perl variable in **beam\_makeops**,
+and *value* is the value to be given to the variable. The line
+can optionally end with a `;` (to avoid messing up the
+C indentation mode in Emacs).
+
+Here follows a description of the variables that are defined.
+
+#### BEAM\_FORMAT\_NUMBER ####
+
+`genop.tab` has the following definition:
+
+ BEAM_FORMAT_NUMBER=0
+
+It defines the version of the instruction set (which will be
+included in the code header in the BEAM code). Theoretically,
+the version could be bumped, and all instructions changed.
+In practice, we would have two support two instruction sets
+in the runtime system for at least two releases, so it will
+probably never happen in practice.
+
+#### GC\_REGEXP ####
+
+In `macros.tab`, there is a definition of `GC_REGEXP`.
+It will be described in [a later section](#the-gc_regexp-definition).
+
+### Directives ###
+
+There are directives to classify specific instructions depending
+on how frequently used they are:
+
+* `%hot` - Implementation will be placed in `beam_hot.h`. Frequently
+executed instructions.
+
+* `%warm` - Implementation will be placed in `beam_warm.h`. Binary
+syntax instructions.
+
+* `%cold` - Implementation will be placed in `beam_cold.h`. Trace
+instructions and infrequently used instructions.
+
+Default is `%hot`. The directives will be applied to declarations
+of the specific instruction that follow. Here is an example:
+
+ %cold
+ is_number f? xy
+ %hot
+
+#### Conditional compilation directives ####
+
+The `%if` directive includes a range of lines if a condition is
+true. For example:
+
+ %if ARCH_64
+ i_bs_get_integer_32 x f? x
+ %endif
+
+The specific instruction `i_bs_get_integer_32` will only be defined
+on a 64-bit machine.
+
+The condition can be inverted by using `%unless` instead of `%if`:
+
+ %unless NO_FPE_SIGNALS
+ fcheckerror p => i_fcheckerror
+ i_fcheckerror
+ fclearerror
+ %endif
+
+It is also possible to add an `%else` clause:
+
+ %if ARCH_64
+ BS_SAFE_MUL(A, B, Fail, Dst) {
+ Uint64 res = ($A) * ($B);
+ if (res / $B != $A) {
+ $Fail;
+ }
+ $Dst = res;
+ }
+ %else
+ BS_SAFE_MUL(A, B, Fail, Dst) {
+ Uint64 res = (Uint64)($A) * (Uint64)($B);
+ if ((res >> (8*sizeof(Uint))) != 0) {
+ $Fail;
+ }
+ $Dst = res;
+ }
+ %endif
+
+#### Symbols that are defined in directives ####
+
+The following symbols are always defined.
+
+* `ARCH_64` - is 1 for a 64-bit machine, and 0 otherwise.
+* `ARCH_32` - is 1 for 32-bit machine, and 1 otherwise.
+
+The `Makefile` for building the emulator currently defines the
+following symbols by using the `-D` option on the command line for
+**beam\_makeops**.
+
+* `NO_FPE_SIGNALS` - 1 if FPE signals are not enable in runtime system,
+0 otherwise.
+* `USE_VM_PROBES` - 1 if the runtime system is compiled to use VM probes (support for dtrace or systemtap), 0 otherwise.
+
+### Defining external generic instructions ###
+
+External generic BEAM instructions are known to both the compiler and
+the runtime system. They remain stable between releases. A new major
+release may add more external generic instructions, but must not change
+the semantics for a previously defined instruction.
+
+The syntax for an external generic instruction is as follows:
+
+*opcode*: [-]*name*/*arity*
+
+*opcode* is an integer greater than or equal to 1.
+
+*name* is an identifier starting with a lowercase letter. *arity* is
+an integer denoting the number of operands.
+
+*name* can optionally be preceded by `-` to indicate that it has been
+obsoleted. The compiler is not allowed to generate BEAM files that
+use obsolete instructions and the loader will refuse to load BEAM
+files that use obsolete instructions.
+
+It only makes sense to define external generic instructions in the
+file `genop.tab` in `lib/compiler/src`, because the compiler must
+know about them in order to use them.
+
+New instructions must be added at the end of the file, with higher
+numbers than the previous instructions.
+
+### Defining internal generic instructions ###
+
+Internal generic instructions are known only to the runtime
+system and can be changed at any time without compatibility issues.
+
+There are two ways to define internal generic instructions:
+
+* Implicitly when a specific instruction is defined. This is by far
+the most common way. Whenever a specific instruction is created,
+**beam\_makeops** automatically creates an internal generic instruction
+if it does not previously exist.
+
+* Explicitly. This is necessary only when a generic instruction does
+not have any corresponding specific instruction.
+
+The syntax for an internal generic instruction is as follows:
+
+*name*/*arity*
+
+*name* is an identifier starting with a lowercase letter. *arity* is
+an integer denoting the number of operands.
+
+### About generic instructions in general ###
+
+Each generic instruction has an opcode. The opcode is an integer,
+greater than or equal to 1. For an external generic instruction, it
+must be explicitly given `genop.tab`, while internal generic
+instructions are automatically numbered by **beam\_makeops**.
+
+The identity of a generic instruction is its name combined with its
+arity. That means that it is allowed to define two distinct generic
+instructions having the same name but with different arities. For
+example:
+
+ move_window/5
+ move_window/6
+
+Each operand of a generic instruction is tagged with its type. A generic
+instruction can have one of the following types:
+
+* `x` - X register.
+
+* `y` - Y register.
+
+* `l` - Floating point register number.
+
+* `i` - Tagged literal integer.
+
+* `a` - Tagged literal atom.
+
+* `n` - NIL (`[]`, the empty list).
+
+* `q` - Literal that don't fit in a word, that is an object stored on
+the heap such as a list or tuple. Any heap object type is supported,
+even types that don't have real literals such as external references.
+
+* `f` - Non-zero failure label.
+
+* `p` - Zero failure label.
+
+* `u` - Untagged integer that fits in a machine word. It is used for many
+different purposes, such as the number of live registers in `test_heap/2`,
+as a reference to the export for `call_ext/2`, and as the flags operand for
+binary syntax instructions. When the generic instruction is translated to a
+specific instruction, the type for the operand in the specific operation will
+tell the loader how to treat the operand.
+
+* `o` - Overflow. If the value for an `u` operand does not fit in a machine
+word, the type of the operand will be changed to `o` (with no associated
+value). Currently only used internally in the loader in the guard constraint
+function `binary_too_big()`.
+
+* `v` - Arity value. Only used internally in the loader.
+
+
+### Defining specific instructions ###
+
+The specific instructions are known only to the runtime system and
+are the instructions that are actually executed. They can be changed
+at any time without causing compatibility issues.
+
+A specific instruction can have at most 6 operands.
+
+A specific instruction is defined by first giving its name followed by
+the types for each operand. For example:
+
+ move x y
+
+Internally, for example in the generated code and in the output from
+the BEAM disassembler, the instruction `move x y` will be called `move_xy`.
+
+The name for a specific instruction is an identifier starting with a
+lowercase letter. A type is an lowercase or uppercase letter.
+
+All specific instructions with a given name must have the same number
+of operands. That is, the following is **not** allowed:
+
+ move x x
+ move x y x y
+
+Here follows the type letters that more or less directly corresponds
+to the types for generic instructions.
+
+* `x` - X register. Will be loaded as a byte offset to the X register
+relative to the base of X register array. (Can be packed with other
+operands.)
+
+* `y` - Y register. Will be loaded as a byte offset to the Y register
+relative to the stack frame. (Can be packed with other operands.)
+
+* `r` - X register 0. An implicit operand that will not be stored in
+the loaded code.
+
+* `l` - Floating point register number. (Can be packed with other
+operands.)
+
+* `i` - Tagged literal integer (a SMALL that will fit in one word).
+
+* `a` - Tagged atom.
+
+* `n` - NIL or the empty list. (Will not be stored in the loaded code.)
+
+* `q` - Tagged CONS or BOXED pointer. That is, a term such as a list
+or tuple. Any heap object type is supported, even types that don't
+have real literals such as external references.
+
+* `f` - Failure label (non-zero). The target for a branch
+or call instruction.
+
+* `p` - The 0 failure label, meaning that an exception should be raised
+if the instruction fails. (Will not be stored in the loaded code.)
+
+* `c` - Any literal term; that is, immediate literals such as SMALL,
+and CONS or BOXED pointers to literals. (Can be used where the
+operand in the generic instruction has one of the types `i`, `a`, `n`,
+or `q`.)
+
+The types that follow do a type test of the operand at runtime; thus,
+they are generally more expensive in terms of runtime than the types
+described earlier. However, those operand types are needed to avoid a
+combinatorial explosion in the number of specific instructions and
+overall code size of `process_main()`.
+
+* `s` - Tagged source: X register, Y register, or a literal term. The
+tag will be tested at runtime to retrieve the value from an X
+register, a Y register, or simply use the value as a tagged Erlang
+term. (Implementation note: An X register is tagged as a pid, and a Y
+register as a port. Therefore the literal term must not contain a
+port or pid.)
+
+* `S` - Tagged source register (X or Y). The tag will be tested at
+runtime to retrieve the value from an X register or a Y register. Slighly
+cheaper than `s`.
+
+* `d` - Tagged destination register (X or Y). The tag will be tested
+at runtime to set up a pointer to the destination register. If the
+instrution performs a garbarge collection, it must use the
+`$REFRESH_GEN_DEST()` macro to refresh the pointer before storing to
+it (there are more details about that in a later section).
+
+* `j` - A failure label (combination of `f` and `p`). If the branch target 0,
+an exception will be raised if instruction fails, otherwise control will be
+transfered to the target address.
+
+The types that follows are all applied to an operand that has the `u`
+type.
+
+* `t` - An untagged integer that will fit in 12 bits (0-4096). It can be
+packed with other operands in a word. Most often used as the number
+of live registers in instructions such as `test_heap`.
+
+* `I` - An untagged integer that will fit in 32 bits. It can be
+packed with other operands in a word on a 64-bit system.
+
+* `W` - Untagged integer or pointer. Not possible to pack with other
+operands.
+
+* `e` - Pointer to an export entry. Use by call instructions that call
+other modules, such as `call_ext`.
+
+* `L` - A label. Only used by the `label/1` instruction.
+
+* `b` - Pointer to BIF. Used by instructions that BIFs, such as
+`call_bif`.
+
+* `A` - A tagged arityvalue. Used in instructions that test the arity
+of a tuple.
+
+* `P` - A byte offset into a tuple.
+
+* `Q` - A byte offset into the stack. Used for updating the frame
+pointer register. Can be packed with other operands.
+
+When the loader translates a generic instruction a specific
+instruction, it will choose the most specific instruction that will
+fit the types. Consider the following two instructions:
+
+ move c x
+ move n x
+
+The `c` operand can encode any literal value, including NIL. The
+`n` operand only works for NIL. If we have the generic instruction
+`{move,nil,{x,1}}`, the loader will translate it to `move_nx 1`
+because `move n x` is more specific. `move_nx` could be slightly
+faster or smaller (depending on the architecture), because the `[]`
+is not stored explicitly as an operand.
+
+#### Syntactic sugar for specific instructions ####
+
+It is possible to specify more than one type letter for each operand.
+Here is an example:
+
+ move cxy xy
+
+This is syntactic sugar for:
+
+ move c x
+ move c y
+ move x x
+ move x y
+ move y x
+ move y y
+
+Note the difference between `move c xy` and `move c d`. Note that `move c xy`
+is equivalent to the following two definitions:
+
+ move c x
+ move c y
+
+On the other hand, `move c d` is a single instruction. At runtime,
+the `d` operand will be tested to see whether it refers to an X
+register or a Y register, and a pointer to the register will be set
+up.
+
+#### The '?' type modifier ####
+
+The character `?` can be added to the end of an operand to indicate
+that the operand will not be used every time the instruction is executed.
+For example:
+
+ allocate_heap t I t?
+ is_eq_exact f? x xy
+
+In `allocate_heap`, the last operand is the number of live registers.
+It will only be used if there is not enough heap space and a garbage
+collection must be performed.
+
+In `is_eq_exact`, the failure address (the first operand) will only be
+used if the two register operands are not equal.
+
+Knowing that an operand is not always used can improve how packing
+is done for some instructions.
+
+For the `allocate_heap` instruction, without the `?` the packing would
+be done like this:
+
+ +--------------------+--------------------+
+ I -> | Stack needed | &&lb_allocate_heap +
+ +--------------------+--------------------+
+ | Heap needed | Live registers +
+ +--------------------+--------------------+
+
+"Stack needed" and "Heap needed" are always used, but they are in
+different words. Thus, at runtime the `allocate_heap` instruction
+must read both words from memory even though it will not always use
+"Live registers".
+
+With the `?`, the operands will be packed like this:
+
+ +--------------------+--------------------+
+ I -> | Live registers | &&lb_allocate_heap +
+ +--------------------+--------------------+
+ | Heap needed | Stack needed +
+ +--------------------+--------------------+
+
+Now "Stack needed" and "Heap needed" are in the same word.
+
+### Defining transformation rules ###
+
+Transformation rules are used to rewrite generic instructions to other
+generic instructions. The transformations rules are applied
+repeatedly until no rule match. At that point, the first instruction
+in the resulting instruction sequence will be converted to a specific
+instruction and added to the code for the module being loaded. Then
+the transformation rules for the remaining instructions are run in the
+same way.
+
+A rule is recognized by its right-pointer arrow: `=>`. To the left of
+the arrow is one or more instruction patterns, separated by `|`. To
+the right of the arrow is zero or more instructions, separated by `|`.
+If the instructions from the BEAM code matches the instruction
+patterns on the left side, they will be replaced with instructions on
+the right side (or removed if there are no instructions on the right).
+
+#### Defining instruction patterns ####
+
+We will start looking at the patterns on the left side of the arrow.
+
+A pattern for an instruction consists of its name, followed by a pattern
+for each of its operands. The operand patterns are separated by spaces.
+
+The simplest possible pattern is a variable. Just like in Erlang,
+a variable must begin with an uppercase letter. If the same variable is
+used in multiple operands, the pattern will only match if the operands
+are equal. For example:
+
+ move Same Same =>
+
+This pattern will match if the operands for `move` are the same. If
+the pattern match, the instruction will be removed. (That used to be an
+actual rule a long time ago when the compiler would occasionally produce
+instructions such as `{move,{x,2},{x,2}}`.)
+
+Variables that have been bound on the left side can be used on the
+right side. For example, this rule will rewrite all `move` instructions
+to `assign` instructions with the operands swapped:
+
+ move Src Dst => assign Dst Src
+
+If we only want to match operands of a certain type, we can
+use a type constraint. A type constraint consists of one or more
+lowercase letters, each specifying a type. For example:
+
+ is_integer Fail an => jump Fail
+
+The second operand pattern, `an`, will match if the second operand is
+either an atom or NIL (the empty list). In case of a match, the
+`is_integer/2` instruction will be replaced with a `jump/1`
+instruction.
+
+An operand pattern can bind a variable and constrain the type at the
+same time by following the variable with a `=` and the constraint.
+For example:
+
+ is_eq_exact Fail=f R=xy C=q => i_is_eq_exact_literal Fail R C
+
+Here the `is_eq_exact` instruction is replaced with a specialized instruction
+that only compares literals, but only if the first operand is a register and
+the second operand is a literal.
+
+#### Further constraining patterns ####
+
+In addition to specifying a type letter, the actual value for the type can
+be specified. For example:
+
+ move C=c x==1 => move_x1 C
+
+Here the second operand of `move` is constrained to be X register 1.
+
+When specifying an atom constraint, the atom is written as it would be
+in the C source code. That is, it needs an `am_` prefix, and it must
+be listed in `atom.names`. For example:
+
+ is_boolean Fail=f a==am_true =>
+ is_boolean Fail=f a==am_false =>
+
+There are several constraints available for testing whether a call is to a BIF
+or a function.
+
+The constraint `u$is_bif` will test whether the given operand refers to a BIF.
+For example:
+
+ call_ext u Bif=u$is_bif => call_bif Bif
+ call_ext u Func => i_call_ext Func
+
+The `call_ext` instruction can be used to call functions written in
+Erlang as well as BIFs (or more properly called SNIFs). The
+`u$is_bif` constraint will match if the operand refers to a BIF (that
+is, if it is listed in the file `bif.tab`). Note that `u$is_bif`
+should only be applied to operands that are known to contain an index
+to the import table chunk in the BEAM file (such operands have the
+type `b` or `e` in the corresponding specific instruction). If
+applied to other `u` operands, it will at best return a nonsense
+result.
+
+The `u$is_not_bif` constraint matches if the operand does not refer to
+a BIF (not listed in `bif.tab`). For example:
+
+ move S X0=x==0 | line Loc | call_ext_last Ar Func=u$is_not_bif D => \
+ move S X0 | call_ext_last Ar Func D
+
+The `u$bif:Module:Name/Arity` constraint tests whether the given
+operand refers to a specific BIF. Note that `Module:Name/Arity`
+**must** be an existing BIF defined in `bif.tab`, or there will
+be a compilation error. It is useful when a call to a specific BIF
+should be replaced with an instruction as in this example:
+
+ gc_bif2 Fail Live u$bif:erlang:splus/2 S1 S2 Dst => \
+ gen_plus Fail Live S1 S2 Dst
+
+Here the call to the GC BIF `'+'/2` will be replaced with the instruction
+`gen_plus/5`. Note that the same name as used in the C source code must be
+used for the BIF, which in this case is `splus`. It is defined like this
+in `bit.tab`:
+
+ ubif erlang:'+'/2 splus_2
+
+The `u$func:Module:Name/Arity` will test whether the given operand is a
+a specific function. Here is an example:
+
+ bif1 Fail u$func:erlang:is_constant/1 Src Dst => too_old_compiler
+
+`is_constant/1` used to be a BIF a long time ago. The transformation
+replaces the call with the `too_old_compiler` instruction which will produce
+a nicer error message than the default error would be for a missing guard BIF.
+
+#### Type constraints allowed in patterns ####
+
+Here are all type letters that are allowed on the left side of a transformation
+rule.
+
+* `u` - An untagged integer that fits in a machine word.
+
+* `x` - X register.
+
+* `y` - Y register.
+
+* `l` - Floating point register number.
+
+* `i` - Tagged literal integer.
+
+* `a` - Tagged literal atom.
+
+* `n` - NIL (`[]`, the empty list).
+
+* `q` - Literals that don't fit in a word, such as list or tuples.
+
+* `f` - Non-zero failure label.
+
+* `p` - The zero failure label.
+
+* `j` - Any label. Equivalent to `fp`.
+
+* `c` - Any literal term. Equivalent to `ainq`.
+
+* `s` - X register, Y register, or any literal term. Equivalent to `xyc`.
+
+* `d` - X or Y register. Equivalent to `xy`. (In a pattern `d` will
+match both source and destination registers. As an operand in a specific
+instruction, it must only be used for a destination register.)
+
+* `o` - Overflow. An untagged integer that does not fit in a machine word.
+
+#### Guard constraints ####
+
+If the constraints described so far is not enough, additional
+constraints can be written in C in `beam_load.c` and be called as a
+guard function on the left side of the transformation. If the guard
+function returns a non-zero value, the matching of the rule will
+continue, otherwise the match will fail. For example:
+
+ ensure_map Lit=q | literal_is_map(Lit) =>
+
+The guard test `literal_is_map/1` tests whether the given literal is a map.
+If the literal is a map, the instruction is unnecessary and can be removed.
+
+It is outside the scope for this document to describe in detail how such
+guard functions are written, but for the curious here is the implementation
+of `literal_is_map()`:
+
+ static int
+ literal_is_map(LoaderState* stp, GenOpArg Lit)
+ {
+ Eterm term;
+
+ ASSERT(Lit.type == TAG_q);
+ term = stp->literals[Lit.val].term;
+ return is_map(term);
+ }
+
+#### Handling instruction with variable number of operands ####
+
+Some instructions, such as `select_val/3`, essentially has a variable
+number of operands. Such instructions have a `{list,[...]}` operand
+as their last operand in the BEAM assembly code. For example:
+
+ {select_val,{x,0},
+ {f,1},
+ {list,[{atom,b},{f,4},{atom,a},{f,5}]}}.
+
+The loader will convert a `{list,[...]}` operand to an `u` operand whose
+value is the number of elements in the list, followed by each element in
+the list. The instruction above would be translated to the following
+generic instruction:
+
+ {select_val,{x,0},{f,1},{u,4},{atom,b},{f,4},{atom,a},{f,5}}
+
+To match a variable number of arguments we need to use the special
+operand type `*` like this:
+
+ select_val Src=aiq Fail=f Size=u List=* => \
+ i_const_select_val Src Fail Size List
+
+This transformation renames a `select_val/3` instruction
+with a constant source operand to `i_const_select_val/3`.
+
+#### Constructing new instructions on the right side ####
+
+The most common operand on the right side is a variable that was bound while
+matching the left side. For example:
+
+ trim N Remaining => i_trim N
+
+An operand can also be a type letter to construct an operand of that type.
+Each type has a default value. For example, the type `x` has the default
+value 1023, which is the highest X register. That makes `x` on the right
+side a convenient shortcut for a temporary X register. For example:
+
+ is_number Fail Literal=q => move Literal x | is_number Fail x
+
+If the second operand for `is_number/2` is a literal, it will be moved to
+X register 1023. Then `is_number/2` will test whether the value stored in
+X register 1023 is a number.
+
+This kind of transformation is useful when it is rare that an operand can
+be anything else but a register. In the case of `is_number/2`, the second
+operand is always a register unless the compiler optimizations have been
+disabled.
+
+If the default value is not suitable, the type letter can be followed
+by `=` and a value. Most types take an integer value. The value for
+an atom is written the same way as in the C source code. For example,
+the atom `false` is written as `am_false`. The atom must be listed in
+`atom.names`.
+
+Here is an example showing how values can be specified:
+
+ bs_put_utf32 Fail=j Flags=u Src=s => \
+ i_bs_validate_unicode Fail Src | \
+ bs_put_integer Fail i=32 u=1 Flags Src
+
+#### Type letters on the right side ####
+
+Here follows all types that are allowed to be used in operands for
+instructions being constructed on the right side of a transformation
+rule.
+
+* `u` - Construct an untagged integer. The default value is 0.
+
+* `x` - X register. The default value is 1023. That makes `x` convenient to
+use as a temporary X register.
+
+* `y` - Y register. The default value is 0.
+
+* `l` - Foating point register number. The default value is 0.
+
+* `i` - Tagged literal integer. The default value is 0.
+
+* `a` - Tagged atom. The default value is the empty atom (`am_Empty`).
+
+* `n` - NIL (`[]`, the empty list).
+
+#### Function call on the right side ####
+
+Transformations that are not possible to describe with the rule
+language as described here can be written as a C function in
+`beam_load.c` and called from the right side of a transformation. The
+left side of the transformation will perform the match and bind
+operands to variables. The variables can then be passed to a
+generator function on the right side. For example:
+
+ bif2 Fail=j u$bif:erlang:element/2 Index=s Tuple=xy Dst=d => \
+ gen_element(Jump, Index, Tuple, Dst)
+
+This transformation rule matches a call to the BIF `element/2`.
+The operands will be captured and the function `gen_element()` will
+be called.
+
+`gen_element()` will produce one of two instructions depending
+on `Index`. If `Index` is an integer in the range from 1 up to
+the maximum tuple size, the instruction `i_fast_element/2` will
+be produced, otherwise the instruction `i_element/4` will be
+produced. The corresponding specific instructions are:
+
+ i_fast_element xy j? I d
+ i_element xy j? s d
+
+The `i_fast_element/2` instruction is faster because the tuple is
+already an untagged integer. It also knows that the index is at least
+1, so it does not have to test for that. The `i_element/4`
+instruction will have to fetch the index from a register, test that it
+is an integer, and untag the integer.
+
+It is outside the scope of this document to describe in detail how
+generator functions are written, but for the curious, here is the
+implementation of `gen_element()`:
+
+ static GenOp*
+ gen_element(LoaderState* stp, GenOpArg Fail,
+ GenOpArg Index, GenOpArg Tuple, GenOpArg Dst)
+ {
+ GenOp* op;
+
+ NEW_GENOP(stp, op);
+ op->arity = 4;
+ op->next = NULL;
+
+ if (Index.type == TAG_i && Index.val > 0 &&
+ Index.val <= ERTS_MAX_TUPLE_SIZE &&
+ (Tuple.type == TAG_x || Tuple.type == TAG_y)) {
+ op->op = genop_i_fast_element_4;
+ op->a[0] = Tuple;
+ op->a[1] = Fail;
+ op->a[2].type = TAG_u;
+ op->a[2].val = Index.val;
+ op->a[3] = Dst;
+ } else {
+ op->op = genop_i_element_4;
+ op->a[0] = Tuple;
+ op->a[1] = Fail;
+ op->a[2] = Index;
+ op->a[3] = Dst;
+ }
+
+ return op;
+ }
+}
+
+### Defining the implementation ###
+
+The actual implementation of instructions are also defined in `.tab`
+files processed by **beam\_makeops**. For practical reasons,
+instruction definitions are stored in several files, at the time of
+writing in the following files:
+
+ bif_instrs.tab
+ arith_instrs.tab
+ bs_instrs.tab
+ float_instrs.tab
+ instrs.tab
+ map_instrs.tab
+ msg_instrs.tab
+ select_instrs.tab
+ trace_instrs.tab
+
+There is also a file that only contains macro definitions:
+
+ macros.tab
+
+The syntax of each file is similar to C code. In fact, most of
+the contents *is* C code, interspersed with macro invocations.
+
+To allow Emacs to auto-indent the code, each file starts with the
+following line:
+
+ // -*- c -*-
+
+To avoid messing up the indentation, all comments are written
+as C++ style comments (`//`) instead of `#`. Note that a comment
+must start at the beginning of a line.
+
+The meat of an instruction definition file are macro definitions.
+We have seen this macro definition before:
+
+ move(Src, Dst) {
+ $Dst = $Src;
+ }
+
+A macro definitions must start at the beginning of the line (no spaces
+allowed), the opening curly bracket must be on the same line, and the
+finishing curly bracket must be at the beginning of a line. It is
+recommended that the macro body is properly indented.
+
+As a convention, the macro arguments in the head all start with an
+uppercase letter. In the body, the macro arguments can be expanded
+by preceding them with `$`.
+
+A macro definition whose name and arity matches a family of
+specific instructions is assumed to be the implementation of that
+instruction.
+
+A macro can also be invoked from within another macro. For example,
+`move_deallocate_return/2` avoids repeating code by invoking
+`$deallocate_return()` as a macro:
+
+ move_deallocate_return(Src, Deallocate) {
+ x(0) = $Src;
+ $deallocate_return($Deallocate);
+ }
+
+Here is the definition of `deallocate_return/1`:
+
+ deallocate_return(Deallocate) {
+ //| -no_next
+ int words_to_pop = $Deallocate;
+ SET_I((BeamInstr *) cp_val(*E));
+ E = ADD_BYTE_OFFSET(E, words_to_pop);
+ CHECK_TERM(x(0));
+ DispatchReturn;
+ }
+
+The expanded code for `move_deallocate_return` will look this:
+
+ OpCase(move_deallocate_return_cQ):
+ {
+ x(0) = I[1];
+ do {
+ int words_to_pop = Qb(BeamExtraData(I[0]));
+ SET_I((BeamInstr *) cp_val(*E));
+ E = ADD_BYTE_OFFSET(E, words_to_pop);
+ CHECK_TERM(x(0));
+ DispatchReturn;
+ } while (0);
+ }
+
+When expanding macros, **beam\_makeops** wraps the expansion in a
+`do`/`while` wrapper unless **beam\_makeops** can clearly see that no
+wrapper is needed. In this case, the wrapper is needed.
+
+Note that arguments for macros cannot be complex expressions, because
+the arguments are split on `,`. For example, the following would
+not work because **beam\_makeops** would split the expression into
+two arguments:
+
+ $deallocate_return(get_deallocation(y, $Deallocate));
+
+#### Code generation directives ####
+
+Within macro definitions, `//` comments are in general not treated
+specially. They will be copied to the file with the generated code
+along with the rest of code in the body.
+
+However, there is an exception. Within a macro definition, a line that
+starts with whitespace followed by `//|` is treated specially. The
+rest of the line is assumed to contain directives to control code
+generation.
+
+Currently, two code generation directives are recognized:
+
+* `-no_prefetch`
+* `-no_next`
+
+##### The -no_prefetch directive #####
+
+To see what `-no_prefetch` does, let's first look at the default code
+generation. Here is the code generated for `move_cx`:
+
+ OpCase(move_cx):
+ {
+ BeamInstr next_pf = BeamCodeAddr(I[2]);
+ xb(BeamExtraData(I[0])) = I[1];
+ I += 2;
+ ASSERT(VALID_INSTR(next_pf));
+ GotoPF(next_pf);
+ }
+
+Note that the very first thing done is to fetch the address to the
+next instruction. The reason is that it usually improves performance.
+
+Just as a demonstration, we can add a `-no_prefetch` directive to
+the `move/2` instruction:
+
+ move(Src, Dst) {
+ //| -no_prefetch
+ $Dst = $Src;
+ }
+
+We can see that the prefetch is no longer done:
+
+ OpCase(move_cx):
+ {
+ xb(BeamExtraData(I[0])) = I[1];
+ I += 2;
+ ASSERT(VALID_INSTR(*I));
+ Goto(*I);
+ }
+
+When would we want to turn off the prefetch in practice?
+
+In instructions that will not always execute the next instruction.
+For example:
+
+ is_atom(Fail, Src) {
+ if (is_not_atom($Src)) {
+ $FAIL($Fail);
+ }
+ }
+
+ // From macros.tab
+ FAIL(Fail) {
+ //| -no_prefetch
+ $SET_I_REL($Fail);
+ Goto(*I);
+ }
+
+`is_atom/2` may either execute the next instruction (if the second
+operand is an atom) or branch to the failure label.
+
+The generated code looks like this:
+
+ OpCase(is_atom_fx):
+ {
+ if (is_not_atom(xb(I[1]))) {
+ ASSERT(VALID_INSTR(*(I + (fb(BeamExtraData(I[0]))) + 0)));
+ I += fb(BeamExtraData(I[0])) + 0;;
+ Goto(*I);;
+ }
+ I += 2;
+ ASSERT(VALID_INSTR(*I));
+ Goto(*I);
+ }
+
+##### The -no_next directive #####
+
+Next we will look at when the `-no_next` directive can be used. Here
+is the `jump/1` instruction:
+
+ jump(Fail) {
+ $JUMP($Fail);
+ }
+
+ // From macros.tab
+ JUMP(Fail) {
+ //| -no_next
+ $SET_I_REL($Fail);
+ Goto(*I);
+ }
+
+The generated code looks like this:
+
+ OpCase(jump_f):
+ {
+ ASSERT(VALID_INSTR(*(I + (fb(BeamExtraData(I[0]))) + 0)));
+ I += fb(BeamExtraData(I[0])) + 0;;
+ Goto(*I);;
+ }
+
+If we remove the `-no_next` directive, the code would look like this:
+
+ OpCase(jump_f):
+ {
+ BeamInstr next_pf = BeamCodeAddr(I[1]);
+ ASSERT(VALID_INSTR(*(I + (fb(BeamExtraData(I[0]))) + 0)));
+ I += fb(BeamExtraData(I[0])) + 0;;
+ Goto(*I);;
+ I += 1;
+ ASSERT(VALID_INSTR(next_pf));
+ GotoPF(next_pf);
+ }
+
+In the end, the C compiler will probably optimize this code to the
+same native code as the first version, but the first version is certainly
+much easier to read for human readers.
+
+#### Macros in the macros.tab file ####
+
+The file `macros.tab` contains many useful macros. When implementing
+new instructions it is good practice to look through `macros.tab` to
+see if any of existing macros can be used rather than re-inventing
+the wheel.
+
+We will describe a few of the most useful macros here.
+
+##### The GC_REGEXP definition #####
+
+The following line defines a regular expression that will recognize
+a call to a function that does a garbage collection:
+
+ GC_REGEXP=erts_garbage_collect|erts_gc|GcBifFunction;
+
+The purpose is that **beam\_makeops** can verify that an instruction
+that does a garbage collection and has an `d` operand uses the
+`$REFRESH_GEN_DEST()` macro.
+
+If you need to define a new function that does garbage collection,
+you should give it the prefix `erts_gc_`. If that is not possible
+you should update the regular expression so that it will match your
+new function.
+
+##### FAIL(Fail) #####
+
+Branch to `$Fail`. Will suppress prefetch (`-no_prefetch`). Typical use:
+
+ is_nonempty_list(Fail, Src) {
+ if (is_not_list($Src)) {
+ $FAIL($Fail);
+ }
+ }
+
+##### JUMP(Fail) #####
+
+Branch to `$Fail`. Suppresses generation of dispatch of the next
+instruction (`-no_next`). Typical use:
+
+ jump(Fail) {
+ $JUMP($Fail);
+ }
+
+##### GC_TEST(NeedStack, NeedHeap, Live) #####
+
+`$GC_TEST(NeedStack, NeedHeap, Live)` tests that given amount of
+stack space and heap space is available. If not it will do a
+garbage collection. Typical use:
+
+ test_heap(Nh, Live) {
+ $GC_TEST(0, $Nh, $Live);
+ }
+
+##### AH(NeedStack, NeedHeap, Live) #####
+
+`AH(NeedStack, NeedHeap, Live)` allocates a stack frame and
+optionally additional heap space.
+
+#### Pre-defined macros and variables ####
+
+**beam\_makeops** defines several built-in macros and pre-bound variables.
+
+##### The NEXT_INSTRUCTION pre-bound variable #####
+
+The NEXT_INSTRUCTION is a pre-bound variable that is available in
+all instructions. It expands to the address of the next instruction.
+
+Here is an example:
+
+ i_call(CallDest) {
+ SET_CP(c_p, $NEXT_INSTRUCTION);
+ $DISPATCH_REL($CallDest);
+ }
+
+When calling a function, the return address is first stored in `c_p->cp`
+(using the `SET_CP()` macro defined in `beam_emu.c`), and then control is
+transferred to the callee. Here is the generated code:
+
+ OpCase(i_call_f):
+ {
+ SET_CP(c_p, I+1);
+ ASSERT(VALID_INSTR(*(I + (fb(BeamExtraData(I[0]))) + 0)));
+ I += fb(BeamExtraData(I[0])) + 0;;
+ DTRACE_LOCAL_CALL(c_p, erts_code_to_codemfa(I));
+ Dispatch();;
+ }
+
+We can see that that `$NEXT_INSTRUCTION` has been expanded to `I+1`.
+That makes sense since the size of the `i_call_f/1` instruction is
+one word.
+
+##### The IP_ADJUSTMENT pre-bound variable #####
+
+`$IP_ADJUSTMENT` is usually 0. In a few combined instructions
+(described below) it can be non-zero. It is used like this
+in `macros.tab`:
+
+ SET_I_REL(Offset) {
+ ASSERT(VALID_INSTR(*(I + ($Offset) + $IP_ADJUSTMENT)));
+ I += $Offset + $IP_ADJUSTMENT;
+ }
+
+Avoid using `IP_ADJUSTMENT` directly. Use `SET_I_REL()` or
+one of the macros that invoke such as `FAIL()` or `JUMP()`
+defined in `macros.tab`.
+
+#### Pre-defined macro functions ####
+
+##### The IF() macro #####
+
+`$IF(Expr, IfTrue, IfFalse)` evaluates `Expr`, which must be a valid
+Perl expression (which for simple numeric expressions have the same
+syntax as C). If `Expr` evaluates to 0, the entire `IF()` expression will be
+replaced with `IfFalse`, otherwise it will be replaced with `IfTrue`.
+
+See the description of `OPERAND_POSITION()` for an example.
+
+##### The OPERAND\_POSITION() macro #####
+
+`$OPERAND_POSITION(Expr)` returns the position for `Expr`, if
+`Expr` is an operand that is not packed. The first operand is
+at position 1.
+
+Returns 0 otherwise.
+
+This macro could be used like this in order to share code:
+
+ FAIL(Fail) {
+ //| -no_prefetch
+ $IF($OPERAND_POSITION($Fail) == 1 && $IP_ADJUSTMENT == 0,
+ goto common_jump,
+ $DO_JUMP($Fail));
+ }
+
+ DO_JUMP(Fail) {
+ $SET_I_REL($Fail);
+ Goto(*I));
+ }
+
+ // In beam_emu.c:
+ common_jump:
+ I += I[1];
+ Goto(*I));
+
+
+#### The $REFRESH\_GEN\_DEST() macro ####
+
+When a specific instruction has a `d` operand, early during execution
+of the instruction, a pointer will be initialized to point to the X or
+Y register in question.
+
+If there is a garbage collection before the result is stored,
+the stack will move and if the `d` operand refered to a Y
+register, the pointer will no longer be valid. (Y registers are
+stored on the stack.)
+
+In those circumstances, `$REFRESH_GEN_DEST()` must be invoked
+to set up the pointer again. **beam\_makeops** will notice
+if there is a call to a function that does a garbage collection and
+`$REFRESH_GEN_DEST()` is not called.
+
+Here is a complete example. The `new_map` instruction is defined
+like this:
+
+ new_map d t I
+
+It is implemented like this:
+
+ new_map(Dst, Live, N) {
+ Eterm res;
+
+ HEAVY_SWAPOUT;
+ res = erts_gc_new_map(c_p, reg, $Live, $N, $NEXT_INSTRUCTION);
+ HEAVY_SWAPIN;
+ $REFRESH_GEN_DEST();
+ $Dst = res;
+ $NEXT($NEXT_INSTRUCTION+$N);
+ }
+
+If we have forgotten the `$REFRESH_GEN_DEST()` there would be a message
+similar to this:
+
+ pointer to destination register is invalid after GC -- use $REFRESH_GEN_DEST()
+ ... from the body of new_map at beam/map_instrs.tab(30)
+
+#### Combined instructions ####
+
+**Problem**: For frequently executed instructions we want to use
+"fast" operands types such as `x` and `y`, as opposed to `s` or `S`.
+To avoid an explosion in code size, we want to share most of the
+implementation between the instructions. Here are the specific
+instructions for `i_increment/5`:
+
+ i_increment r W t d
+ i_increment x W t d
+ i_increment y W t d
+
+The `i_increment` instruction is implemented like this:
+
+ i_increment(Source, IncrementVal, Live, Dst) {
+ Eterm increment_reg_source = $Source;
+ Eterm increment_val = $IncrementVal;
+ Uint live;
+ Eterm result;
+
+ if (ERTS_LIKELY(is_small(increment_reg_val))) {
+ Sint i = signed_val(increment_reg_val) + increment_val;
+ if (ERTS_LIKELY(IS_SSMALL(i))) {
+ $Dst = make_small(i);
+ $NEXT0();
+ }
+ }
+ live = $Live;
+ HEAVY_SWAPOUT;
+ reg[live] = increment_reg_val;
+ reg[live+1] = make_small(increment_val);
+ result = erts_gc_mixed_plus(c_p, reg, live);
+ HEAVY_SWAPIN;
+ ERTS_HOLE_CHECK(c_p);
+ if (ERTS_LIKELY(is_value(result))) {
+ $REFRESH_GEN_DEST();
+ $Dst = result;
+ $NEXT0();
+ }
+ ASSERT(c_p->freason != BADMATCH || is_value(c_p->fvalue));
+ goto find_func_info;
+ }
+
+There will be three almost identical copies of the code. Given the
+size of the code, that could be too high cost to pay.
+
+To avoid the three copies of the code, we could use only one specific
+instruction:
+
+ i_increment S W t d
+
+(The same implementation as above will work.)
+
+That reduces the code size, but is slower because `S` means that
+there will be extra code to test whether the operand refers to an X
+register or a Y register.
+
+**Solution**: We can use "combined instructions". Combined
+instructions are combined from instruction fragments. The
+bulk of the code can be shared.
+
+Here we will show how `i_increment` can be implemented as a combined
+instruction. We will show each individual fragment first, and then
+show how to connect them together. First we will need a variable that
+we can store the value fetched from the register in:
+
+ increment.head() {
+ Eterm increment_reg_val;
+ }
+
+The name `increment` is the name of the group that the fragment
+belongs to. Note that it does not need to have the same
+name as the instruction. The group name is followed by `.` and
+the name of the fragment. The name `head` is pre-defined.
+The code in it will be placed at the beginning of a block, so
+that all fragments in the group can access it.
+
+Next we define the fragment that will pick up the value from the
+register from the first operand:
+
+ increment.fetch(Src) {
+ increment_reg_val = $Src;
+ }
+
+We call this fragment `fetch`. This fragment will be duplicated three
+times, one for each value of the first operand (`r`, `x`, and `y`).
+
+Next we define the main part of the code that do the actual incrementing.
+
+ increment.execute(IncrementVal, Live, Dst) {
+ Eterm increment_val = $IncrementVal;
+ Uint live;
+ Eterm result;
+
+ if (ERTS_LIKELY(is_small(increment_reg_val))) {
+ Sint i = signed_val(increment_reg_val) + increment_val;
+ if (ERTS_LIKELY(IS_SSMALL(i))) {
+ $Dst = make_small(i);
+ $NEXT0();
+ }
+ }
+ live = $Live;
+ HEAVY_SWAPOUT;
+ reg[live] = increment_reg_val;
+ reg[live+1] = make_small(increment_val);
+ result = erts_gc_mixed_plus(c_p, reg, live);
+ HEAVY_SWAPIN;
+ ERTS_HOLE_CHECK(c_p);
+ if (ERTS_LIKELY(is_value(result))) {
+ $REFRESH_GEN_DEST();
+ $Dst = result;
+ $NEXT0();
+ }
+ ASSERT(c_p->freason != BADMATCH || is_value(c_p->fvalue));
+ goto find_func_info;
+ }
+
+We call this fragment `execute`. It will handle the three remaining
+operands (`W t d`). There will only be one copy of this fragment.
+
+Now that we have defined the fragments, we need to inform
+**beam\_makeops** how they should be connected:
+
+ i_increment := increment.fetch.execute;
+
+To the left of the `:=` is the name of the specific instruction that
+should be implemented by the fragments, in this case `i_increment`.
+To the right of `:=` is the name of the group with the fragments,
+followed by a `.`. Then the name of the fragments in the group are
+listed in the order they should be executed. Note that the `head`
+fragment is not listed.
+
+The line ends in `;` (to avoid messing up the indentation in Emacs).
+
+(Note that in practice the `:=` line is usually placed before the
+fragments.)
+
+The generated code looks like this:
+
+ {
+ Eterm increment_reg_val;
+ OpCase(i_increment_rWtd):
+ {
+ increment_reg_val = r(0);
+ }
+ goto increment__execute;
+
+ OpCase(i_increment_xWtd):
+ {
+ increment_reg_val = xb(BeamExtraData(I[0]));
+ }
+ goto increment__execute;
+
+ OpCase(i_increment_yWtd):
+ {
+ increment_reg_val = yb(BeamExtraData(I[0]));
+ }
+ goto increment__execute;
+
+ increment__execute:
+ {
+ // Here follows the code from increment.execute()
+ .
+ .
+ .
+ }
+
+##### Some notes about combined instructions #####
+
+The operands that are different must be at
+the beginning of the instruction. All operands in the last
+fragment must have the same operands in all variants of
+the specific instruction.
+
+As an example, the following specific instructions cannot be
+implemented as a combined instruction:
+
+ i_times j? t x x d
+ i_times j? t x y d
+ i_times j? t s s d
+
+We would have to change the order of the operands so that the
+two operands that are different are placed first:
+
+ i_times x x j? t d
+ i_times x y j? t d
+ i_times s s j? t d
+
+We can then define:
+
+ i_times := times.fetch.execute;
+
+ times.head {
+ Eterm op1, op2;
+ }
+
+ times.fetch(Src1, Src2) {
+ op1 = $Src1;
+ op2 = $Src2;
+ }
+
+ times.execute(Fail, Live, Dst) {
+ // Multiply op1 and op2.
+ .
+ .
+ .
+ }
+
+Several instructions can share a group. As an example, the following
+instructions have different names, but in the end they all create a
+binary. The last two operands are common for all of them:
+
+ i_bs_init_fail xy j? t? x
+ i_bs_init_fail_heap s I j? t? x
+ i_bs_init W t? x
+ i_bs_init_heap W I t? x
+
+The instructions are defined like this (formatted with extra
+spaces for clarity):
+
+ i_bs_init_fail_heap := bs_init . fail_heap . verify . execute;
+ i_bs_init_fail := bs_init . fail . verify . execute;
+ i_bs_init := bs_init . . plain . execute;
+ i_bs_init_heap := bs_init . heap . execute;
+
+Note that the first two instruction have three fragments, while the
+other two only have two fragments. Here are the fragments:
+
+ bs_init_bits.head() {
+ Eterm num_bits_term;
+ Uint num_bits;
+ Uint alloc;
+ }
+
+ bs_init_bits.plain(NumBits) {
+ num_bits = $NumBits;
+ alloc = 0;
+ }
+
+ bs_init_bits.heap(NumBits, Alloc) {
+ num_bits = $NumBits;
+ alloc = $Alloc;
+ }
+
+ bs_init_bits.fail(NumBitsTerm) {
+ num_bits_term = $NumBitsTerm;
+ alloc = 0;
+ }
+
+ bs_init_bits.fail_heap(NumBitsTerm, Alloc) {
+ num_bits_term = $NumBitsTerm;
+ alloc = $Alloc;
+ }
+
+ bs_init_bits.verify(Fail) {
+ // Verify the num_bits_term, fail using $FAIL
+ // if there is a problem.
+ .
+ .
+ .
+ }
+
+ bs_init_bits.execute(Live, Dst) {
+ // Long complicated code to a create a binary.
+ .
+ .
+ .
+ }
+
+The full definitions of those instructions can be found in `bs_instrs.tab`.
+The generated code can be found in `beam_warm.h`.
diff --git a/erts/emulator/nifs/common/zlib_nif.c b/erts/emulator/nifs/common/zlib_nif.c
index fa29b4fb71..b709ed5a6f 100644
--- a/erts/emulator/nifs/common/zlib_nif.c
+++ b/erts/emulator/nifs/common/zlib_nif.c
@@ -717,7 +717,9 @@ static ERL_NIF_TERM zlib_deflateEnd(ErlNifEnv *env, int argc, const ERL_NIF_TERM
static ERL_NIF_TERM zlib_deflateParams(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
zlib_data_t *d;
+
int res, level, strategy;
+ Bytef dummy_buffer;
if(argc != 3 || !get_zlib_data(env, argv[0], &d)
|| !enif_get_int(env, argv[1], &level)
@@ -729,12 +731,27 @@ static ERL_NIF_TERM zlib_deflateParams(ErlNifEnv *env, int argc, const ERL_NIF_T
return enif_raise_exception(env, am_not_initialized);
}
- /* deflateParams will flush everything currently in the stream, corrupting
- * the heap unless it's empty. We therefore pretend to have a full output
- * buffer, forcing a Z_BUF_ERROR if there's anything left to be flushed. */
- d->s.avail_out = 0;
+ /* This is a bit of a hack; deflateParams flushes with Z_BLOCK which won't
+ * stop at a byte boundary, so we can't split this operation up, and we
+ * can't allocate a buffer large enough to fit it in one go since we have
+ * to support zlib versions that lack deflatePending.
+ *
+ * We therefore flush everything prior to this call to ensure that we are
+ * stopped on a byte boundary and have no pending data. We then hand it a
+ * dummy buffer to detect when this assumption doesn't hold (Hopefully
+ * never), and to smooth over an issue with zlib 1.2.11 which always
+ * returns Z_BUF_ERROR when d->s.avail_out is 0, regardless of whether
+ * there's any pending data or not. */
+
+ d->s.next_out = &dummy_buffer;
+ d->s.avail_out = 1;
+
res = deflateParams(&d->s, level, strategy);
+ if(d->s.avail_out == 0) {
+ return zlib_return(env, Z_STREAM_ERROR);
+ }
+
return zlib_return(env, res);
}
@@ -929,7 +946,7 @@ static ERL_NIF_TERM zlib_inflate(ErlNifEnv *env, int argc, const ERL_NIF_TERM ar
return enif_raise_exception(env, am_not_initialized);
}
- if(d->eos_seen) {
+ if(d->eos_seen && enif_ioq_size(d->input_queue) > 0) {
int res;
switch(d->eos_behavior) {
@@ -943,11 +960,10 @@ static ERL_NIF_TERM zlib_inflate(ErlNifEnv *env, int argc, const ERL_NIF_TERM ar
}
d->eos_seen = 0;
+
break;
case EOS_BEHAVIOR_CUT:
zlib_reset_input(d);
-
- return enif_make_tuple2(env, am_finished, enif_make_list(env, 0));
}
}
diff --git a/erts/emulator/test/distribution_SUITE.erl b/erts/emulator/test/distribution_SUITE.erl
index 2d0ae9c83e..e2914cbc92 100644
--- a/erts/emulator/test/distribution_SUITE.erl
+++ b/erts/emulator/test/distribution_SUITE.erl
@@ -1365,81 +1365,59 @@ bad_dist_structure(Config) when is_list(Config) ->
start_monitor(Offender,P),
P ! one,
send_bad_structure(Offender, P,{?DOP_MONITOR_P_EXIT,'replace',P,normal},2),
- pong = rpc:call(Victim, net_adm, ping, [Offender]),
+
start_monitor(Offender,P),
send_bad_structure(Offender, P,{?DOP_MONITOR_P_EXIT,'replace',P,normal,normal},2),
- pong = rpc:call(Victim, net_adm, ping, [Offender]),
+
start_link(Offender,P),
send_bad_structure(Offender, P,{?DOP_LINK},0),
- pong = rpc:call(Victim, net_adm, ping, [Offender]),
+
start_link(Offender,P),
send_bad_structure(Offender, P,{?DOP_UNLINK,'replace'},2),
- pong = rpc:call(Victim, net_adm, ping, [Offender]),
+
start_link(Offender,P),
send_bad_structure(Offender, P,{?DOP_UNLINK,'replace',make_ref()},2),
- pong = rpc:call(Victim, net_adm, ping, [Offender]),
+
start_link(Offender,P),
send_bad_structure(Offender, P,{?DOP_UNLINK,make_ref(),P},0),
- pong = rpc:call(Victim, net_adm, ping, [Offender]),
+
start_link(Offender,P),
send_bad_structure(Offender, P,{?DOP_UNLINK,normal,normal},0),
- pong = rpc:call(Victim, net_adm, ping, [Offender]),
+
start_monitor(Offender,P),
send_bad_structure(Offender, P,{?DOP_MONITOR_P,'replace',P},2),
- pong = rpc:call(Victim, net_adm, ping, [Offender]),
+
start_monitor(Offender,P),
send_bad_structure(Offender, P,{?DOP_MONITOR_P,'replace',P,normal},2),
- pong = rpc:call(Victim, net_adm, ping, [Offender]),
+
start_monitor(Offender,P),
send_bad_structure(Offender, P,{?DOP_DEMONITOR_P,'replace',P},2),
- pong = rpc:call(Victim, net_adm, ping, [Offender]),
+
start_monitor(Offender,P),
send_bad_structure(Offender, P,{?DOP_DEMONITOR_P,'replace',P,normal},2),
- pong = rpc:call(Victim, net_adm, ping, [Offender]),
+
send_bad_structure(Offender, P,{?DOP_EXIT,'replace',P},2),
- pong = rpc:call(Victim, net_adm, ping, [Offender]),
send_bad_structure(Offender, P,{?DOP_EXIT,make_ref(),normal,normal},0),
- pong = rpc:call(Victim, net_adm, ping, [Offender]),
send_bad_structure(Offender, P,{?DOP_EXIT_TT,'replace',token,P},2),
- pong = rpc:call(Victim, net_adm, ping, [Offender]),
send_bad_structure(Offender, P,{?DOP_EXIT_TT,make_ref(),token,normal,normal},0),
- pong = rpc:call(Victim, net_adm, ping, [Offender]),
send_bad_structure(Offender, P,{?DOP_EXIT2,'replace',P},2),
- pong = rpc:call(Victim, net_adm, ping, [Offender]),
send_bad_structure(Offender, P,{?DOP_EXIT2,make_ref(),normal,normal},0),
- pong = rpc:call(Victim, net_adm, ping, [Offender]),
send_bad_structure(Offender, P,{?DOP_EXIT2_TT,'replace',token,P},2),
- pong = rpc:call(Victim, net_adm, ping, [Offender]),
send_bad_structure(Offender, P,{?DOP_EXIT2_TT,make_ref(),token,normal,normal},0),
- pong = rpc:call(Victim, net_adm, ping, [Offender]),
send_bad_structure(Offender, P,{?DOP_GROUP_LEADER,'replace'},2),
- pong = rpc:call(Victim, net_adm, ping, [Offender]),
send_bad_structure(Offender, P,{?DOP_GROUP_LEADER,'replace','atomic'},2),
- pong = rpc:call(Victim, net_adm, ping, [Offender]),
send_bad_structure(Offender, P,{?DOP_GROUP_LEADER,'replace',P},0),
- pong = rpc:call(Victim, net_adm, ping, [Offender]),
send_bad_structure(Offender, P,{?DOP_REG_SEND_TT,'replace','',name},2,{message}),
- pong = rpc:call(Victim, net_adm, ping, [Offender]),
send_bad_structure(Offender, P,{?DOP_REG_SEND_TT,'replace','',name,token},0,{message}),
- pong = rpc:call(Victim, net_adm, ping, [Offender]),
send_bad_structure(Offender, P,{?DOP_REG_SEND,'replace',''},2,{message}),
- pong = rpc:call(Victim, net_adm, ping, [Offender]),
send_bad_structure(Offender, P,{?DOP_REG_SEND,'replace','',P},0,{message}),
- pong = rpc:call(Victim, net_adm, ping, [Offender]),
send_bad_structure(Offender, P,{?DOP_REG_SEND,'replace','',name},0,{message}),
- pong = rpc:call(Victim, net_adm, ping, [Offender]),
send_bad_structure(Offender, P,{?DOP_REG_SEND,'replace','',name,{token}},2,{message}),
- pong = rpc:call(Victim, net_adm, ping, [Offender]),
send_bad_structure(Offender, P,{?DOP_SEND_TT,'',P},0,{message}),
- pong = rpc:call(Victim, net_adm, ping, [Offender]),
send_bad_structure(Offender, P,{?DOP_SEND_TT,'',name,token},0,{message}),
- pong = rpc:call(Victim, net_adm, ping, [Offender]),
send_bad_structure(Offender, P,{?DOP_SEND,''},0,{message}),
- pong = rpc:call(Victim, net_adm, ping, [Offender]),
send_bad_structure(Offender, P,{?DOP_SEND,'',name},0,{message}),
- pong = rpc:call(Victim, net_adm, ping, [Offender]),
send_bad_structure(Offender, P,{?DOP_SEND,'',P,{token}},0,{message}),
- pong = rpc:call(Victim, net_adm, ping, [Offender]),
P ! two,
P ! check_msgs,
receive
@@ -1685,13 +1663,16 @@ bad_dist_ext_size(Config) when is_list(Config) ->
start_node_monitors([Offender,Victim]),
Parent = self(),
- P = spawn_link(Victim,
+ P = spawn_opt(Victim,
fun () ->
Parent ! {self(), started},
receive check_msgs -> ok end, %% DID CRASH HERE
bad_dist_ext_check_msgs([one]),
Parent ! {self(), messages_checked}
- end),
+ end,
+ [link,
+ %% on_heap to force total_heap_size to inspect msg queue
+ {message_queue_data, on_heap}]),
receive {P, started} -> ok end,
P ! one,
@@ -1714,6 +1695,7 @@ bad_dist_ext_size(Config) when is_list(Config) ->
verify_still_up(Offender, Victim),
+ %% Let process_info(P, total_heap_size) find bad msg and disconnect
rpc:call(Victim, erlang, process_info, [P, total_heap_size]),
verify_down(Offender, connection_closed, Victim, killed),
@@ -1795,10 +1777,11 @@ send_bad_structure(Offender,Victim,Bad,WhereToPutSelf) ->
send_bad_structure(Offender,Victim,Bad,WhereToPutSelf,PayLoad) ->
Parent = self(),
Done = make_ref(),
- spawn(Offender,
+ spawn_link(Offender,
fun () ->
Node = node(Victim),
pong = net_adm:ping(Node),
+ erlang:monitor_node(Node, true),
DCtrl = dctrl(Node),
Bad1 = case WhereToPutSelf of
0 ->
@@ -1812,7 +1795,16 @@ send_bad_structure(Offender,Victim,Bad,WhereToPutSelf,PayLoad) ->
[] -> [];
_Other -> [dmsg_ext(PayLoad)]
end,
+
+ receive {nodedown, Node} -> exit("premature nodedown")
+ after 10 -> ok
+ end,
+
dctrl_send(DCtrl, DData),
+
+ receive {nodedown, Node} -> ok
+ after 5000 -> exit("missing nodedown")
+ end,
Parent ! {DData,Done}
end),
receive
diff --git a/erts/emulator/test/process_SUITE.erl b/erts/emulator/test/process_SUITE.erl
index a9f20f9928..a8bcfac84d 100644
--- a/erts/emulator/test/process_SUITE.erl
+++ b/erts/emulator/test/process_SUITE.erl
@@ -2532,8 +2532,13 @@ system_task_on_suspended(Config) when is_list(Config) ->
end.
gc_request_when_gc_disabled(Config) when is_list(Config) ->
- Master = self(),
AIS = erts_debug:set_internal_state(available_internal_state, true),
+ gc_request_when_gc_disabled_do(ref),
+ gc_request_when_gc_disabled_do(immed),
+ erts_debug:set_internal_state(available_internal_state, AIS).
+
+gc_request_when_gc_disabled_do(ReqIdType) ->
+ Master = self(),
{P, M} = spawn_opt(fun () ->
true = erts_debug:set_internal_state(gc_state,
false),
@@ -2545,7 +2550,10 @@ gc_request_when_gc_disabled(Config) when is_list(Config) ->
receive after 100 -> ok end
end, [monitor, link]),
receive {P, gc_state, false} -> ok end,
- ReqId = make_ref(),
+ ReqId = case ReqIdType of
+ ref -> make_ref();
+ immed -> immed
+ end,
async = garbage_collect(P, [{async, ReqId}]),
receive
{garbage_collect, ReqId, Result} ->
@@ -2554,7 +2562,6 @@ gc_request_when_gc_disabled(Config) when is_list(Config) ->
ok
end,
receive {garbage_collect, ReqId, true} -> ok end,
- erts_debug:set_internal_state(available_internal_state, AIS),
receive {'DOWN', M, process, P, _Reason} -> ok end,
ok.
diff --git a/erts/emulator/utils/beam_makeops b/erts/emulator/utils/beam_makeops
index d7791d23fa..da994fae3e 100755
--- a/erts/emulator/utils/beam_makeops
+++ b/erts/emulator/utils/beam_makeops
@@ -19,7 +19,7 @@
# %CopyrightEnd%
#
use strict;
-use vars qw($BEAM_FORMAT_NUMBER);
+use vars qw($BEAM_FORMAT_NUMBER $GC_REGEXP);
use constant COLD => 0;
use constant WARM => 1;
use constant HOT => 2;
@@ -36,6 +36,7 @@ use constant PACK_CMD_LOOSE => '3';
use constant PACK_CMD_WIDE => '4';
$BEAM_FORMAT_NUMBER = undef;
+$GC_REGEXP = undef;
my $target = \&emulator_output;
my $outdir = "."; # Directory for output files.
@@ -77,6 +78,10 @@ my %num_specific;
my %gen_to_spec;
my %specific_op;
+# The following hashes are used for error checking.
+my %print_name;
+my %specific_op_arity;
+
# Information about each specific operator. Key is the print name (e.g. get_list_xxy).
# Value is a hash.
my %spec_op_info;
@@ -131,7 +136,10 @@ my $loader_types = "nprvlqo";
my $genop_types = $compiler_types . $loader_types;
#
-# Defines the argument types and their loaded size assuming no packing.
+# Define the operand types and their loaded size assuming no packing.
+#
+# Those are the types that can be used in the definition of a specific
+# instruction.
#
my %arg_size = ('r' => 0, # x(0) - x register zero
'x' => 1, # x(N), N > 0 - x register
@@ -154,12 +162,35 @@ my %arg_size = ('r' => 0, # x(0) - x register zero
'A' => 1, # arity value
'P' => 1, # byte offset into tuple or stack
'Q' => 1, # like 'P', but packable
- 'h' => 1, # character
+ 'h' => 1, # character (not used)
'l' => 1, # float reg
'q' => 1, # literal term
);
#
+# Define the types that may be used in a transformation rule.
+#
+# %pattern_type defines the types that may be used in a pattern
+# on the left side.
+#
+# %construction_type defines the types that may be used when
+# constructing a new instruction on the right side (a subset of
+# the pattern types that are possible to construct).
+#
+my $pattern_types = "acdfjilnopqsuxy";
+my %pattern_type;
+@pattern_type{split("", $pattern_types)} = (1) x length($pattern_types);
+
+my %construction_type;
+foreach my $type (keys %pattern_type) {
+ $construction_type{$type} = 1
+ if index($genop_types, $type) >= 0;
+}
+foreach my $makes_no_sense ('f', 'j', 'o', 'p', 'q') {
+ delete $construction_type{$makes_no_sense};
+}
+
+#
# Generate bits.
#
my %type_bit;
@@ -194,7 +225,8 @@ sub define_type_bit {
define_type_bit('S', $type_bit{'d'});
define_type_bit('j', $type_bit{'f'} | $type_bit{'p'});
- # Aliases (for matching purposes).
+ # Aliases of 'u'. Those specify how to load the operand and
+ # what kind of packing can be done.
define_type_bit('t', $type_bit{'u'});
define_type_bit('I', $type_bit{'u'});
define_type_bit('W', $type_bit{'u'});
@@ -279,9 +311,15 @@ if ($wordsize == 64) {
# Add placeholders for built-in macros.
#
-$c_code{'IS_PACKED'} = ['$Expr',"built-in macro",('Expr')];
-$c_code{'ARG_POSITION'} = ['$Expr',"built-in macro",('Expr')];
-foreach my $name (keys %c_code) {
+my %predef_macros =
+ (OPERAND_POSITION => ['Expr'],
+ IF => ['Expr','IfTrue','IfFalse'],
+ REFRESH_GEN_DEST => [],
+ );
+foreach my $name (keys %predef_macros) {
+ my @args = @{$predef_macros{$name}};
+ my $body = join(':', map { '$' . $_ } @args);
+ $c_code{$name} = [$body,"built-in macro",@args],
$c_code_used{$name} = 1;
}
@@ -359,8 +397,10 @@ while (<>) {
#
if (/^([\w_][\w\d_]+)=(.*)/) {
no strict 'refs';
- my($name) = $1;
- $$name = $2;
+ my $name = $1;
+ my $value = $2;
+ $value =~ s/;\s*$//;
+ $$name = $value;
next;
}
@@ -1019,6 +1059,22 @@ sub parse_specific_op {
my $key = "$name/$arity";
foreach my $args_ref (@res) {
@args = @$args_ref;
+ my $arity = @args;
+ my $loc = "$ARGV($.)";
+ if (defined $specific_op_arity{$name}) {
+ my($prev_arity,$loc) = @{$specific_op_arity{$name}};
+ if ($arity != $prev_arity) {
+ error("$name defined with arity $arity, " .
+ "but previously defined with arity $prev_arity at $loc");
+ }
+ }
+ $specific_op_arity{$name} = [$arity,$loc];
+ my $print_name = print_name($name, @args);
+ if (defined $print_name{$print_name}) {
+ error("$name @args: already defined at " .
+ $print_name{$print_name});
+ }
+ $print_name{$print_name} = $loc;
push @{$specific_op{$key}}, [$name,$hotness,@args];
}
@@ -1333,7 +1389,9 @@ sub cg_basic {
#
sub cg_combined_size {
- my %params = (@_, pack_options => \@basic_pack_options);
+ my %params = (@_,
+ pack_options => \@basic_pack_options,
+ size_only => 1);
$params{pack_options} = \@extended_pack_options
if $params{first};
my($size) = code_gen(%params);
@@ -1361,6 +1419,7 @@ sub code_gen {
my %params = (extra_comments => '',
offset => 0,
inc => 0,
+ size_only => 0,
@_);
my $name = $params{name};
my $extra_comments = $params{extra_comments};
@@ -1393,6 +1452,7 @@ sub code_gen {
my $need_block = 0;
my $arg_offset = $offset;
+ my $has_gen_dest = 0;
@args = map { s/[?]$//g; $_ } @args;
foreach (@args) {
my($this_size) = $arg_size{$_};
@@ -1403,6 +1463,7 @@ sub code_gen {
"Eterm* dst_ptr = REG_TARGET_PTR(dst);\n";
push(@f, "*dst_ptr");
$this_size = $1;
+ $has_gen_dest = 1;
last SWITCH;
};
/^packed:[a-zA-z]:(\d):(.*)/ and do {
@@ -1435,6 +1496,7 @@ sub code_gen {
$var_decls .= "Eterm dst = " . arg_offset($arg_offset) . ";\n" .
"Eterm* dst_ptr = REG_TARGET_PTR(dst);\n";
push(@f, "*dst_ptr");
+ $has_gen_dest = 1;
last SWITCH;
};
defined $arg_size{$_} and do {
@@ -1449,10 +1511,10 @@ sub code_gen {
}
#
- # If the implementation is in beam_emu.c, there is nothing
- # more to do.
+ # If the implementation is in beam_emu.c or if
+ # the caller only wants the size, we are done.
#
- unless (defined $c_code_ref) {
+ if (not defined $c_code_ref or $params{size_only}) {
return ($size+1, undef, '');
}
@@ -1517,9 +1579,36 @@ sub code_gen {
"{",
"$var_decls$body",
"}", "");
+
+ # Make sure that $REFRESH_GEN_DEST() is used when a
+ # general destination ('d') may have been clobbered by
+ # a GC.
+ my $gc_error = verify_gc_code($code, $has_gen_dest);
+ if (defined $gc_error) {
+ warn $gc_error;
+ error("... from the body of $name at $where");
+ }
+
+ # Done.
($size+1, $code, $pack_spec);
}
+sub verify_gc_code {
+ my $code = shift;
+ my $has_gen_dest = shift;
+
+ return unless $has_gen_dest;
+
+ if ($code =~ /$GC_REGEXP/o) {
+ my $code_after_gc = substr($code, $+[0]);
+ unless ($code_after_gc =~ /dst_ptr = REG_TARGET_PTR/) {
+ return "pointer to destination register is invalid after GC -- " .
+ "use \$REFRESH_GEN_DEST()\n";
+ }
+ }
+ return undef;
+}
+
sub arg_offset {
my $offset = shift;
"I[" . ($offset+1) . "]";
@@ -1619,17 +1708,26 @@ sub expand_macro {
}
# Handle built-in macros.
- if ($name eq 'ARG_POSITION') {
+ if ($name eq 'OPERAND_POSITION') {
if ($body =~ /^I\[(\d+)\]$/) {
$body = $1;
} else {
$body = 0;
}
- } elsif ($name eq 'IS_PACKED') {
- $body = ($body =~ /^I\[\d+\]$/) ? 0 : 1;
+ } elsif ($name eq 'IF') {
+ my $expr = $new_bindings{Expr};
+ my $bool = eval $expr;
+ if ($@ ne '') {
+ &error("bad expression '$expr' in \$IF()");
+ }
+ my $part = $bool ? 'IfTrue' : 'IfFalse';
+ $body = $new_bindings{$part};
+ } elsif ($name eq 'REFRESH_GEN_DEST') {
+ $body = "dst_ptr = REG_TARGET_PTR(dst)";
}
- # Wrap body if needed and return resul.t
+
+ # Wrap body if needed and return result.
$body = "do {\n$body\n} while (0)"
if needs_do_wrapper($body);
($body,$rest);
@@ -2156,12 +2254,19 @@ sub tr_parse_op {
if (/^([a-z*]+)(.*)/) {
$type = $1;
$_ = $2;
+ error("$type: only a single type is allowed on right side of transformations")
+ if not $src and length($type) > 1;
foreach (split('', $type)) {
- error("bad type in $op")
- unless defined $type_bit{$_} or $type eq '*';
- $_ eq 'r' and
- error("$op: 'r' is not allowed in transformations")
- }
+ next if $src and $type eq '*';
+ error("$op: not a type")
+ unless defined $type_bit{$_};
+ error("$op: the type '$_' is not allowed in transformations")
+ unless defined $pattern_type{$_};
+ if (not $src) {
+ error("$op: type '$_' is not allowed on the right side of transformations")
+ unless defined $construction_type{$_};
+ }
+ }
}
# Get an optional condition. (In source.)
@@ -2194,10 +2299,18 @@ sub tr_parse_op {
}
# Get an optional value. (In destination.)
- $type_val = $type eq 'x' ? 1023 : 0;
+ if ($type eq 'x') {
+ $type_val = 1023;
+ } elsif ($type eq 'a') {
+ $type_val = 'am_Empty';
+ } else {
+ $type_val = 0;
+ }
if (/^=(.*)/) {
- error("value not allowed in source: $op")
+ error("$op: value not allowed in source")
if $src;
+ error("$op: the type 'n' must not be given a value")
+ if $type eq 'n';
$type_val = $1;
$_ = '';
}
@@ -2207,13 +2320,16 @@ sub tr_parse_op {
error("garbage '$_' after operand: $op")
unless /^\s*$/;
- # Test that destination has no conditions.
+ # Check the conditions.
- unless ($src) {
- error("condition not allowed in destination: $op")
+ if ($src) {
+ error("$op: the type '$type' is not allowed to be compared with a literal value")
+ if $cond and not $construction_type{$type};
+ } else {
+ error("$op: condition not allowed in destination")
if $cond;
- error("variable name and type cannot be combined in destination: $op")
- if $var && $type;
+ error("$op: variable name and type cannot be combined in destination")
+ if $var and $type;
}
($var,$type,$type_val,$cond,$cond_val);