diff options
Diffstat (limited to 'erts/emulator')
-rw-r--r-- | erts/emulator/beam/beam_bp.c | 2 | ||||
-rw-r--r-- | erts/emulator/beam/beam_emu.c | 21 | ||||
-rw-r--r-- | erts/emulator/beam/beam_load.c | 14 | ||||
-rw-r--r-- | erts/emulator/beam/erl_nif.c | 53 | ||||
-rw-r--r-- | erts/emulator/beam/erl_process.c | 2 | ||||
-rw-r--r-- | erts/emulator/beam/macros.tab | 9 | ||||
-rw-r--r-- | erts/emulator/beam/map_instrs.tab | 8 | ||||
-rw-r--r-- | erts/emulator/beam/ops.tab | 2 | ||||
-rw-r--r-- | erts/emulator/hipe/hipe_amd64_bifs.m4 | 36 | ||||
-rw-r--r-- | erts/emulator/hipe/hipe_bif0.tab | 1 | ||||
-rw-r--r-- | erts/emulator/hipe/hipe_bif_list.m4 | 5 | ||||
-rw-r--r-- | erts/emulator/hipe/hipe_debug.c | 8 | ||||
-rw-r--r-- | erts/emulator/hipe/hipe_native_bif.c | 9 | ||||
-rw-r--r-- | erts/emulator/hipe/hipe_native_bif.h | 2 | ||||
-rw-r--r-- | erts/emulator/hipe/hipe_primops.h | 1 | ||||
-rw-r--r-- | erts/emulator/internal_doc/beam_makeops.md | 1846 | ||||
-rw-r--r-- | erts/emulator/nifs/common/zlib_nif.c | 30 | ||||
-rw-r--r-- | erts/emulator/test/distribution_SUITE.erl | 64 | ||||
-rw-r--r-- | erts/emulator/test/process_SUITE.erl | 13 | ||||
-rwxr-xr-x | erts/emulator/utils/beam_makeops | 174 |
20 files changed, 2122 insertions, 178 deletions
diff --git a/erts/emulator/beam/beam_bp.c b/erts/emulator/beam/beam_bp.c index fe1e15701b..0832b3f374 100644 --- a/erts/emulator/beam/beam_bp.c +++ b/erts/emulator/beam/beam_bp.c @@ -998,7 +998,9 @@ do_call_trace(Process* c_p, ErtsCodeInfo* info, Eterm* reg, fixup_cp_before_trace(c_p, &return_to_trace); + ERTS_UNREQ_PROC_MAIN_LOCK(c_p); flags = erts_call_trace(c_p, info, ms, reg, local, &tracer); + ERTS_REQ_PROC_MAIN_LOCK(c_p); /* restore cp after potential fixup */ c_p->cp = cp_save; diff --git a/erts/emulator/beam/beam_emu.c b/erts/emulator/beam/beam_emu.c index 60d0008d8f..bc95ccec52 100644 --- a/erts/emulator/beam/beam_emu.c +++ b/erts/emulator/beam/beam_emu.c @@ -400,12 +400,13 @@ static BeamInstr* apply_fun(Process* p, Eterm fun, Eterm args, Eterm* reg) NOINLINE; static Eterm new_fun(Process* p, Eterm* reg, ErlFunEntry* fe, int num_free) NOINLINE; -static Eterm new_map(Process* p, Eterm* reg, Uint live, Uint n, BeamInstr* ptr) NOINLINE; -static Eterm new_small_map_lit(Process* p, Eterm* reg, Eterm keys_literal, +static Eterm erts_gc_new_map(Process* p, Eterm* reg, Uint live, + Uint n, BeamInstr* ptr) NOINLINE; +static Eterm erts_gc_new_small_map_lit(Process* p, Eterm* reg, Eterm keys_literal, Uint live, BeamInstr* ptr) NOINLINE; -static Eterm update_map_assoc(Process* p, Eterm* reg, Uint live, +static Eterm erts_gc_update_map_assoc(Process* p, Eterm* reg, Uint live, Uint n, BeamInstr* new_p) NOINLINE; -static Eterm update_map_exact(Process* p, Eterm* reg, Uint live, +static Eterm erts_gc_update_map_exact(Process* p, Eterm* reg, Uint live, Uint n, Eterm* new_p) NOINLINE; static Eterm get_map_element(Eterm map, Eterm key); static Eterm get_map_element_hash(Eterm map, Eterm key, Uint32 hx); @@ -2755,7 +2756,7 @@ do { \ static Eterm -new_map(Process* p, Eterm* reg, Uint live, Uint n, BeamInstr* ptr) +erts_gc_new_map(Process* p, Eterm* reg, Uint live, Uint n, BeamInstr* ptr) { Uint i; Uint need = n + 1 /* hdr */ + 1 /*size*/ + 1 /* ptr */ + 1 /* arity */; @@ -2812,7 +2813,8 @@ new_map(Process* p, Eterm* reg, Uint live, Uint n, BeamInstr* ptr) } static Eterm -new_small_map_lit(Process* p, Eterm* reg, Eterm keys_literal, Uint live, BeamInstr* ptr) +erts_gc_new_small_map_lit(Process* p, Eterm* reg, Eterm keys_literal, + Uint live, BeamInstr* ptr) { Eterm* keys = tuple_val(keys_literal); Uint n = arityval(*keys); @@ -2846,7 +2848,8 @@ new_small_map_lit(Process* p, Eterm* reg, Eterm keys_literal, Uint live, BeamIns } static Eterm -update_map_assoc(Process* p, Eterm* reg, Uint live, Uint n, BeamInstr* new_p) +erts_gc_update_map_assoc(Process* p, Eterm* reg, Uint live, + Uint n, BeamInstr* new_p) { Uint num_old; Uint num_updates; @@ -2892,7 +2895,7 @@ update_map_assoc(Process* p, Eterm* reg, Uint live, Uint n, BeamInstr* new_p) */ if (num_old == 0) { - return new_map(p, reg, live, n, new_p); + return erts_gc_new_map(p, reg, live, n, new_p); } /* @@ -3048,7 +3051,7 @@ update_map_assoc(Process* p, Eterm* reg, Uint live, Uint n, BeamInstr* new_p) */ static Eterm -update_map_exact(Process* p, Eterm* reg, Uint live, Uint n, Eterm* new_p) +erts_gc_update_map_exact(Process* p, Eterm* reg, Uint live, Uint n, Eterm* new_p) { Uint i; Uint num_old; diff --git a/erts/emulator/beam/beam_load.c b/erts/emulator/beam/beam_load.c index 7331c331a6..4fcfea527c 100644 --- a/erts/emulator/beam/beam_load.c +++ b/erts/emulator/beam/beam_load.c @@ -2384,8 +2384,18 @@ load_code(LoaderState* stp) code[ci++] = NIL; break; case TAG_q: - new_literal_patch(stp, ci); - code[ci++] = tmp_op->a[arg].val; + { + BeamInstr val = tmp_op->a[arg].val; + Eterm term = stp->literals[val].term; + new_literal_patch(stp, ci); + code[ci++] = val; + switch (loader_tag(term)) { + case LOADER_X_REG: + case LOADER_Y_REG: + LoadError1(stp, "the term '%T' would be confused " + "with a register", term); + } + } break; default: LoadError1(stp, "bad tag %d for general source", diff --git a/erts/emulator/beam/erl_nif.c b/erts/emulator/beam/erl_nif.c index f7f12efe28..c21b139cfa 100644 --- a/erts/emulator/beam/erl_nif.c +++ b/erts/emulator/beam/erl_nif.c @@ -3323,36 +3323,38 @@ static int examine_iovec_term(Eterm list, UWord max_length, iovec_slice_t *resul size = binary_size(binary); binary_header = binary_val(binary); - /* If we're a sub-binary we'll need to check our underlying binary to - * determine whether we're on-heap or not. */ - if(thing_subtag(*binary_header) == SUB_BINARY_SUBTAG) { - ErlSubBin *sb = (ErlSubBin*)binary_header; - - /* Reject bitstrings */ - if((sb->bitoffs + sb->bitsize) > 0) { - return 0; + if (size > 0) { + /* If we're a sub-binary we'll need to check our underlying binary + * to determine whether we're on-heap or not. */ + if (thing_subtag(*binary_header) == SUB_BINARY_SUBTAG) { + ErlSubBin *sb = (ErlSubBin*)binary_header; + + /* Reject bitstrings */ + if((sb->bitoffs + sb->bitsize) > 0) { + return 0; + } + + ASSERT(size <= binary_size(sb->orig)); + binary_header = binary_val(sb->orig); } - ASSERT(size <= binary_size(sb->orig)); - binary_header = binary_val(sb->orig); - } - - if(thing_subtag(*binary_header) == HEAP_BINARY_SUBTAG) { - ASSERT(size <= ERL_ONHEAP_BIN_LIMIT); + if (thing_subtag(*binary_header) == HEAP_BINARY_SUBTAG) { + ASSERT(size <= ERL_ONHEAP_BIN_LIMIT); - result->iovec_len += 1; - result->onheap_size += size; - } else { - ASSERT(thing_subtag(*binary_header) == REFC_BINARY_SUBTAG); + result->iovec_len += 1; + result->onheap_size += size; + } else { + ASSERT(thing_subtag(*binary_header) == REFC_BINARY_SUBTAG); - result->iovec_len += 1 + size / MAX_SYSIOVEC_IOVLEN; - result->offheap_size += size; + result->iovec_len += 1 + size / MAX_SYSIOVEC_IOVLEN; + result->offheap_size += size; + } } result->sublist_length += 1; lookahead = CDR(cell); - if(result->sublist_length >= max_length) { + if (result->sublist_length >= max_length) { break; } } @@ -3385,6 +3387,10 @@ static void inspect_raw_binary_data(Eterm binary, ErlNifBinary *result) { if (thing_subtag(*parent_header) == REFC_BINARY_SUBTAG) { ProcBin *pb = (ProcBin*)parent_header; + if (pb->flags & (PB_IS_WRITABLE | PB_ACTIVE_WRITER)) { + erts_emasculate_writable_binary(pb); + } + ASSERT(pb->val != NULL); ASSERT(byte_offset < pb->size); ASSERT(&pb->bytes[byte_offset] >= (byte*)(pb->val)->orig_bytes); @@ -3428,7 +3434,7 @@ static int fill_iovec_with_slice(ErlNifEnv *env, /* If this isn't a refc binary, copy its contents to the onheap buffer * and reference that instead. */ - if (raw_data.ref_bin == NULL) { + if (raw_data.size > 0 && raw_data.ref_bin == NULL) { ASSERT(onheap_offset < onheap_data.size); ASSERT(slice->onheap_size > 0); @@ -3439,12 +3445,11 @@ static int fill_iovec_with_slice(ErlNifEnv *env, raw_data.ref_bin = onheap_data.ref_bin; } - ASSERT(raw_data.ref_bin != NULL); - while (raw_data.size > 0) { UWord chunk_len = MIN(raw_data.size, MAX_SYSIOVEC_IOVLEN); ASSERT(iovec_idx < iovec->iovcnt); + ASSERT(raw_data.ref_bin != NULL); iovec->iov[iovec_idx].iov_base = raw_data.data; iovec->iov[iovec_idx].iov_len = chunk_len; diff --git a/erts/emulator/beam/erl_process.c b/erts/emulator/beam/erl_process.c index 6654468fb6..3c0a126fe2 100644 --- a/erts/emulator/beam/erl_process.c +++ b/erts/emulator/beam/erl_process.c @@ -10824,7 +10824,7 @@ request_system_task(Process *c_p, Eterm requester, Eterm target, goto badarg; req_type = tp[1]; req_id = tp[2]; - req_id_sz = is_immed(req_id) ? req_id : size_object(req_id); + req_id_sz = is_immed(req_id) ? 0 : size_object(req_id); tot_sz = req_id_sz; for (i = 0; i < ERTS_MAX_PROC_SYS_TASK_ARGS; i++) { int tix = 3 + i; diff --git a/erts/emulator/beam/macros.tab b/erts/emulator/beam/macros.tab index e0b5f56b53..494fe8961e 100644 --- a/erts/emulator/beam/macros.tab +++ b/erts/emulator/beam/macros.tab @@ -20,13 +20,12 @@ // // -// Use if there is a garbage collection before storing to a -// general destination (either X or Y register). +// Define a regular expression that will match instructions that +// perform GC. That will allow beam_makeops to check for instructions +// that don't use $REFRESH_GEN_DEST() when they should. // -REFRESH_GEN_DEST() { - dst_ptr = REG_TARGET_PTR(dst); -} +GC_REGEXP=erts_garbage_collect|erts_gc|GcBifFunction; // $Offset is relative to the start of the instruction (not to the // location of the failure label reference). Since combined diff --git a/erts/emulator/beam/map_instrs.tab b/erts/emulator/beam/map_instrs.tab index bbb2f49b66..c594a87298 100644 --- a/erts/emulator/beam/map_instrs.tab +++ b/erts/emulator/beam/map_instrs.tab @@ -31,7 +31,7 @@ new_map(Dst, Live, N) { Eterm res; HEAVY_SWAPOUT; - res = new_map(c_p, reg, $Live, $N, $NEXT_INSTRUCTION); + res = erts_gc_new_map(c_p, reg, $Live, $N, $NEXT_INSTRUCTION); HEAVY_SWAPIN; $REFRESH_GEN_DEST(); $Dst = res; @@ -44,7 +44,7 @@ i_new_small_map_lit(Dst, Live, Keys) { Eterm keys = $Keys; HEAVY_SWAPOUT; - res = new_small_map_lit(c_p, reg, keys, $Live, $NEXT_INSTRUCTION); + res = erts_gc_new_small_map_lit(c_p, reg, keys, $Live, $NEXT_INSTRUCTION); HEAVY_SWAPIN; $REFRESH_GEN_DEST(); $Dst = res; @@ -133,7 +133,7 @@ update_map_assoc(Src, Dst, Live, N) { reg[live] = $Src; HEAVY_SWAPOUT; - res = update_map_assoc(c_p, reg, live, $N, $NEXT_INSTRUCTION); + res = erts_gc_update_map_assoc(c_p, reg, live, $N, $NEXT_INSTRUCTION); HEAVY_SWAPIN; ASSERT(is_value(res)); $REFRESH_GEN_DEST(); @@ -147,7 +147,7 @@ update_map_exact(Fail, Src, Dst, Live, N) { reg[live] = $Src; HEAVY_SWAPOUT; - res = update_map_exact(c_p, reg, live, $N, $NEXT_INSTRUCTION); + res = erts_gc_update_map_exact(c_p, reg, live, $N, $NEXT_INSTRUCTION); HEAVY_SWAPIN; if (is_value(res)) { $REFRESH_GEN_DEST(); diff --git a/erts/emulator/beam/ops.tab b/erts/emulator/beam/ops.tab index 7a2c39b3a8..a560bde920 100644 --- a/erts/emulator/beam/ops.tab +++ b/erts/emulator/beam/ops.tab @@ -511,8 +511,6 @@ put_list y x x put_list y y x put_list x y x -put_list y x x - # put_list SrcReg Constant Dst put_list x c x diff --git a/erts/emulator/hipe/hipe_amd64_bifs.m4 b/erts/emulator/hipe/hipe_amd64_bifs.m4 index 6d998c4b55..cf4c59c9af 100644 --- a/erts/emulator/hipe/hipe_amd64_bifs.m4 +++ b/erts/emulator/hipe/hipe_amd64_bifs.m4 @@ -462,42 +462,6 @@ ASYM($1): TYPE_FUNCTION(ASYM($1)) #endif') -/* - * nogc_bif_interface_1(nbif_name, cbif_name) - * - * Generate native interface for a bif with implicit P - * The bif can fail but cannot do GC. - */ - -define(nogc_bif_interface_1, -` -#ifndef HAVE_$1 -#`define' HAVE_$1 - TEXT - .align 4 - GLOBAL(ASYM($1)) -ASYM($1): - /* set up the parameters */ - movq P, %rdi - NBIF_ARG(%rsi,1,0) - - /* make the call on the C stack */ - SWITCH_ERLANG_TO_C - pushq %rsi - movq %rsp, %rsi /* Eterm* BIF__ARGS */ - sub $(8), %rsp /* stack frame 16-byte alignment */ - CALL_BIF($2) - add $(1*8 + 8), %rsp - SWITCH_C_TO_ERLANG - - /* throw exception if failure, otherwise return */ - TEST_GOT_EXN - jz nbif_1_simple_exception - NBIF_RET(1) - SET_SIZE(ASYM($1)) - TYPE_FUNCTION(ASYM($1)) -#endif') - /* * noproc_primop_interface_0(nbif_name, cbif_name) diff --git a/erts/emulator/hipe/hipe_bif0.tab b/erts/emulator/hipe/hipe_bif0.tab index 4f73770d24..0380e8c795 100644 --- a/erts/emulator/hipe/hipe_bif0.tab +++ b/erts/emulator/hipe/hipe_bif0.tab @@ -135,7 +135,6 @@ atom bs_utf16_size atom bs_put_utf16be atom bs_put_utf16le atom bs_get_utf16 -atom bs_validate_unicode atom bs_validate_unicode_retract atom emulate_fpe atom emasculate_binary diff --git a/erts/emulator/hipe/hipe_bif_list.m4 b/erts/emulator/hipe/hipe_bif_list.m4 index 23b6709cd0..625d8486fd 100644 --- a/erts/emulator/hipe/hipe_bif_list.m4 +++ b/erts/emulator/hipe/hipe_bif_list.m4 @@ -248,11 +248,6 @@ nofail_primop_interface_3(nbif_bs_get_float_2, erts_bs_get_float_2) standard_bif_interface_3(nbif_bs_put_utf8, hipe_bs_put_utf8) standard_bif_interface_3(nbif_bs_put_utf16be, hipe_bs_put_utf16be) standard_bif_interface_3(nbif_bs_put_utf16le, hipe_bs_put_utf16le) -ifdef(`nogc_bif_interface_1',` -nogc_bif_interface_1(nbif_bs_validate_unicode, hipe_bs_validate_unicode) -',` -standard_bif_interface_1(nbif_bs_validate_unicode, hipe_bs_validate_unicode) -') /* * Bit-syntax primops without any P parameter. diff --git a/erts/emulator/hipe/hipe_debug.c b/erts/emulator/hipe/hipe_debug.c index 222a11db3d..cfe60b379e 100644 --- a/erts/emulator/hipe/hipe_debug.c +++ b/erts/emulator/hipe/hipe_debug.c @@ -135,7 +135,9 @@ static void print_heap(Eterm *pos, Eterm *end) printf("From: 0x%0*lx to 0x%0*lx\n\r", 2*(int)sizeof(long), (unsigned long)pos, 2*(int)sizeof(long), (unsigned long)end); - printf(" | H E A P |\r\n"); + printf(" | %*s H E A P %*s |\r\n", + 2*(int)sizeof(long)-1, "", + 2*(int)sizeof(long)-1, ""); printf(" | %*s | %*s |\r\n", 2+2*(int)sizeof(long), "Address", 2+2*(int)sizeof(long), "Contents"); @@ -158,8 +160,10 @@ static void print_heap(Eterm *pos, Eterm *end) ++pos; --ari; } - } else + } else { + fflush(stdout); erts_printf("%.30T", val); + } printf("\r\n"); } printf(" |%s|%s|\r\n", dashes, dashes); diff --git a/erts/emulator/hipe/hipe_native_bif.c b/erts/emulator/hipe/hipe_native_bif.c index e444e3dc5d..99c34532b9 100644 --- a/erts/emulator/hipe/hipe_native_bif.c +++ b/erts/emulator/hipe/hipe_native_bif.c @@ -482,15 +482,6 @@ static int validate_unicode(Eterm arg) return 1; } -BIF_RETTYPE nbif_impl_hipe_bs_validate_unicode(NBIF_ALIST_1) -{ - Process *p = BIF_P; - Eterm arg = BIF_ARG_1; - if (!validate_unicode(arg)) - BIF_ERROR(p, BADARG); - return NIL; -} - Uint hipe_is_unicode(Eterm arg) { return (Uint) validate_unicode(arg); diff --git a/erts/emulator/hipe/hipe_native_bif.h b/erts/emulator/hipe/hipe_native_bif.h index 71f63875a4..d5081b8438 100644 --- a/erts/emulator/hipe/hipe_native_bif.h +++ b/erts/emulator/hipe/hipe_native_bif.h @@ -66,7 +66,6 @@ AEXTERN(Eterm,nbif_bs_utf16_size,(Eterm)); AEXTERN(Eterm,nbif_bs_put_utf16be,(Process*,Eterm,byte*,unsigned int)); AEXTERN(Eterm,nbif_bs_put_utf16le,(Process*,Eterm,byte*,unsigned int)); AEXTERN(Eterm,nbif_bs_get_utf16,(void)); -AEXTERN(Eterm,nbif_bs_validate_unicode,(Process*,Eterm)); AEXTERN(Uint,nbif_is_unicode,(Eterm)); AEXTERN(Eterm,nbif_bs_validate_unicode_retract,(void)); AEXTERN(Uint,nbif_is_divisible,(Uint,Uint)); @@ -92,7 +91,6 @@ BIF_RETTYPE nbif_impl_hipe_bs_put_utf8(NBIF_ALIST_3); Eterm hipe_bs_utf16_size(Eterm); BIF_RETTYPE nbif_impl_hipe_bs_put_utf16be(NBIF_ALIST_3); BIF_RETTYPE nbif_impl_hipe_bs_put_utf16le(NBIF_ALIST_3); -BIF_RETTYPE nbif_impl_hipe_bs_validate_unicode(NBIF_ALIST_1); Uint hipe_is_unicode(Eterm); struct erl_bin_match_buffer; int hipe_bs_validate_unicode_retract(struct erl_bin_match_buffer*, Eterm); diff --git a/erts/emulator/hipe/hipe_primops.h b/erts/emulator/hipe/hipe_primops.h index 49e6bdb026..a6abd3e011 100644 --- a/erts/emulator/hipe/hipe_primops.h +++ b/erts/emulator/hipe/hipe_primops.h @@ -63,7 +63,6 @@ PRIMOP_LIST(am_bs_utf16_size, &nbif_bs_utf16_size) PRIMOP_LIST(am_bs_put_utf16be, &nbif_bs_put_utf16be) PRIMOP_LIST(am_bs_put_utf16le, &nbif_bs_put_utf16le) PRIMOP_LIST(am_bs_get_utf16, &nbif_bs_get_utf16) -PRIMOP_LIST(am_bs_validate_unicode, &nbif_bs_validate_unicode) PRIMOP_LIST(am_is_unicode, &nbif_is_unicode) PRIMOP_LIST(am_bs_validate_unicode_retract, &nbif_bs_validate_unicode_retract) diff --git a/erts/emulator/internal_doc/beam_makeops.md b/erts/emulator/internal_doc/beam_makeops.md new file mode 100644 index 0000000000..1da8d2ab05 --- /dev/null +++ b/erts/emulator/internal_doc/beam_makeops.md @@ -0,0 +1,1846 @@ +The beam\_makeops script +======================= + +This document describes the **beam\_makeops** script. + +Introduction +------------ + +The **beam\_makeops** Perl script is used at build-time by both the +compiler and runtime system. Given a number of input files (all with +the extension `.tab`), it will generate source files used by the +Erlang compiler and by the runtime system to load and execute BEAM +instructions. + +Essentially those `.tab` files define: + +* External generic BEAM instructions. They are the instructions that +are known to both the compiler and the runtime system. Generic +instructions are stable between releases. New generic instructions +with high numbers than previous instructions can be added in major +releases. The OTP 20 release has 159 external generic instructions. + +* Internal generic instructions. They are known only to the runtime +system and can be changed at any time without compatibility issues. +They are created by transformation rules (described next). + +* Rules for transforming one or more generic instructions to other +generic instructions. The transformation rules allow combining, +splitting, and removal of instructions, as well as shuffling operands. +Because of the transformation rules, the runtime can have many +internal generic instructions that are only known to runtime system. + +* Specific BEAM instructions. The specific instructions are the +instructions that are actually executed by the runtime system. They +can be changed at any time without causing compatibility issues. +The loader translates generic instructions to specific instructions. +In general, for each generic instruction, there exists a family of +specific instructions. The OTP 20 release has 389 specific +instructions. + +* The implementation of specific instructions. + +Generic instructions have typed operands. Here are a few examples of +operands for `move/2`: + + {move,{atom,id},{x,5}}. + {move,{x,3},{x,0}}. + {move,{x,2},{y,1}}. + +When those instructions are loaded, the loader rewrites them +to specific instructions: + + move_cx id 5 + move_xx 3 0 + move_xy 2 1 + +Corresponding to each generic instruction, there is a family of +specific instructions. The types that an instance of a specific +instruction can handle are encoded in the instruction names. For +example, `move_xy` takes an X register number as the first operand and +a Y register number as the second operand. `move_cx` takes a tagged +Erlang term as the first operand and an X register number as the +second operand. + +An example: the move instruction +-------------------------------- + +Using the `move` instruction as an example, we will give a quick +tour to show the main features of **beam\_makeops**. + +In the `compiler` application, in the file `genop.tab`, there is the +following line: + + 64: move/2 + +This is a definition of an external generic BEAM instruction. Most +importantly it specifices that the opcode is 64. It also defines that +it has two operands. The BEAM assembler will use the opcode when +creating `.beam` files. The compiler does not really need the arity, +but it will use it as an internal sanity check when assembling the +BEAM code. + +Let's have a look at `ops.tab` in `erts/emulator/beam`, where the +specific `move` instructions are defined. Here are a few of them: + + move x x + move x y + move c x + +Each specific instructions is defined by following the name of the +instruction with the types for each operand. An operand type is a +single letter. For example, `x` means an X register, `y` +means a Y register, and `c` is a "constant" (a tagged term such as +an integer, an atom, or a literal). + +Now let's look at the implementation of the `move` instruction. There +are multiple files containing implementations of instructions in the +`erts/emulator/beam` directory. The `move` instruction is defined in +`instrs.tab`. It looks like this: + + move(Src, Dst) { + $Dst = $Src; + } + +The implementation for an instruction largely follows the C syntax, +except that the variables in the function head don't have any types. +The `$` before an identifier denotes a macro expansion. Thus, +`$Src` will expand to the code to pick up the source operand for +the instruction and `$Dst` to the code for the destination register. + +We will look at the code for each specific instruction in turn. To +make the code easier to understand, let's first look at the memory +layout for the instruction `{move,{atom,id},{x,5}}`: + + +--------------------+--------------------+ + I -> | 40 | &&lb_move_cx | + +--------------------+--------------------+ + | Tagged atom 'id' | + +--------------------+--------------------+ + +This example and all other examples in the document assumes a 64-bit +archictecture, and furthermore that pointers to C code fit in 32 bits. + +`I` in the BEAM virtual machine is the instruction pointer. When BEAM +executes an instruction, `I` points to the first word of the +instruction. + +`&&lb_move_cx` is the address to C code that implements `move_cx`. It +is stored in the lower 32 bits of the word. In the upper 32 bits is +the byte offset to the X register; the register number 5 has been +multiplied by the word size size 8. + +In the next word the tagged atom `id` is stored. + +With that background, we can look at the generated code for `move_cx` +in `beam_hot.h`: + + OpCase(move_cx): + { + BeamInstr next_pf = BeamCodeAddr(I[2]); + xb(BeamExtraData(I[0])) = I[1]; + I += 2; + ASSERT(VALID_INSTR(next_pf)); + GotoPF(next_pf); + } + +We will go through each line in turn. + +* `OpCase(move_cx):` defines a label for the instruction. The +`OpCase()` macro is defined in `beam_emu.c`. It will expand this line +to `lb_move_cx:`. + +* `BeamInstr next_pf = BeamCodeAddr(I[2]);` fetches the pointer to +code for the next instruction to be executed. The `BeamCodeAddr()` +macro extracts the pointer from the lower 32 bits of the instruction +word. + +* `xb(BeamExtraData(I[0])) = I[1];` is the expansion of `$Dst = $Src`. +`BeamExtraData()` is a macro that will extract the upper 32 bits from +the instruction word. In this example, it will return 40 which is the +byte offset for X register 5. The `xb()` macro will cast a byte +pointer to an `Eterm` pointer and dereference it. The `I[1]` on +the right side of the `=` fetches an Erlang term (the atom `id` in +this case). + +* `I += 2` advances the instruction pointer to the next +instruction. + +* In a debug-compiled emulator, `ASSERT(VALID_INSTR(next_pf));` makes +sure that `next_pf` is a valid instruction (that is, that it points +within the `process_main()` function in `beam_emu.c`). + +* `GotoPF(next_pf);` transfers control to the next instruction. + +Now let's look at the implementation of `move_xx`: + + OpCase(move_xx): + { + Eterm tmp_packed1 = BeamExtraData(I[0]); + BeamInstr next_pf = BeamCodeAddr(I[1]); + xb((tmp_packed1>>BEAM_TIGHT_SHIFT)) = xb(tmp_packed1&BEAM_TIGHT_MASK); + I += 1; + ASSERT(VALID_INSTR(next_pf)); + GotoPF(next_pf); + } + +We will go through the lines that are new or have changed compared to +`move_cx`. + +* `Eterm tmp_packed1 = BeamExtraData(I[0]);` picks up both X register +numbers packed into the upper 32 bits of the instruction word. + +* `BeamInstr next_pf = BeamCodeAddr(I[1]);` pre-fetches the address of +the next instruction. Note that because both X registers operands fits +into the instruction word, the next instruction is in the very next +word. + +* `xb((tmp_packed1>>BEAM_TIGHT_SHIFT)) = xb(tmp_packed1&BEAM_TIGHT_MASK);` +copies the source to the destination. (For a 64-bit architecture, +`BEAM_TIGHT_SHIFT` is 16 and `BEAM_TIGHT_MASK` is `0xFFFF`.) + +* `I += 1;` advances the instruction pointer to the next instruction. + +`move_xy` is almost identical to `move_xx`. The only difference is +the use of the `yb()` macro instead of `xb()` to reference the +destination register: + + OpCase(move_xy): + { + Eterm tmp_packed1 = BeamExtraData(I[0]); + BeamInstr next_pf = BeamCodeAddr(I[1]); + yb((tmp_packed1>>BEAM_TIGHT_SHIFT)) = xb(tmp_packed1&BEAM_TIGHT_MASK); + I += 1; + ASSERT(VALID_INSTR(next_pf)); + GotoPF(next_pf); + } + +### Transformation rules ### + +Next let's look at how we can do some optimizations using transformation +rules. For simple instructions such as `move/2`, the instruction dispatch +overhead can be substantial. A simple optimization is to combine common +instructions sequences to a single instruction. One such common sequence +is multiple `move` instructions moving X registers to Y registers. + +Using the following rule we can combine two `move` instructions +to a `move2` instruction: + + move X1=x Y1=y | move X2=x Y2=y => move2 X1 Y1 X2 Y2 + +The left side of the arrow (`=>`) is a pattern. If the pattern +matches, the matching instructions will be replaced by the +instructions on the right side. Variables in a pattern must start +with an uppercase letter just as in Erlang. A pattern variable may be +followed `=` and one or more type letters to constrain the match to +one of those types. The variables that are bound on the left side can +be used on the right side. + +We will also need to define a specific instruction and an implementation: + + # In ops.tab + move2 x y x y + + // In instrs.tab + move2(S1, D1, S2, D2) { + Eterm V1, V2; + V1 = $S1; + V2 = $S2; + $D1 = V1; + $D2 = V2; + } + +When the loader has found a match and replaced the matched instructions, +it will match the new instructions against the transformation rules. +Because of that, we can define the rule for a `move3/6` instruction +as follows: + + move2 X1=x Y1=y X2=x Y2=y | move X3=x Y3=y => \ + move3 X1 Y1 X2 Y2 X3 Y3 + +(A `\` before a newline can be used to break a long line for readability.) + +It would also be possible to define it like this: + + move X1=x Y1=y | move X2=x Y2=y | move X3=x Y3=y => \ + move3 X1 Y1 X2 Y2 X3 Y3 + +but in that case it must be defined before the rule for `move2/4` +because the first matching rule will be applied. + +One must be careful not to create infinite loops. For example, if we +for some reason would want to reverse the operand order for the `move` +instruction, we must not do like this: + + move Src Dst => move Dst Src + +The loader would swap the operands forever. To avoid the loop, we must +rename the instruction. For example: + + move Src Dst => assign Dst Src + +This concludes the quick tour of the features of **beam\_makeops**. + +Short overview of instruction loading +------------------------------------- + +To give some background to the rest of this document, here follows a +quick overview of how instructions are loaded. + +* The loader reads and decodes one instruction at a time from the BEAM +code and creates a generic instruction. Many transformation rules +must look at multiple instructions, so the loader will +keep multiple generic instructions in a linked list. + +* The loader tries to apply transformation rules against the +generic instructions in the linked list. If a rule matches, the +matched instructions will be removed and replaced with new +generic instructions constructed from the right side of the +transformation. + +* If a transformation rule matched, the loader applies the +transformation rules again. + +* If no transformation rule match, the loader will begin rewriting +the first of generic instructions to a specific instruction. + +* First the loader will search for a specific operation where the +types for all operands match the type for the generic instruction. +The first matching instruction will be selected. **beam\_makeops** +has ordered the specific instructions so that instructions with more +specific operands comes before instructions with less specific +operands. For example, `move_nx` is more specific than `move_cx`. If +the first operand is `[]` (NIL), `move_nx` will be selected. + +* Given the opcode for the selected specific instruction, the loader +looks up the pointer to the C code for the instruction and stores +in the code area for the module being loaded. + +* The loader translates each operand to a machine word and stores it +in the code area. The operand type for the selected specific +instruction guides the translation. For example, if the type is `e`, +the value of the operand is an index into an arry of external +functions and will be translated to a pointer to the export entry for +the function to call. If the type is `x`, the number of the X +register will be multiplied by the word size to produce a byte offset. + +* The loader runs the packing engine to pack multiple operands into a +single word. The packing engine is controlled by a small program, +which is a string where each character is an instruction. For +example, the code to pack the operands for `move_xy` is `"22#"` (on a +64-bit machine). That program will pack the byte offsets for both +registers into the same word as the pointer to C code. + +Running beam_makeops +-------------------- + +**beam\_makeops** is found in `$ERL_TOP/erts/emulator/utils`. Options +start with a hyphen (`-`). The options are followed by the name of +the input files. By convention, all input files have the extension +`.tab`, but is not enforced by **beam\_makeops**. + +### The -outdir option ### + +The option `-outdir Directory` specifies the output directory for +the generated files. Default is the current working directory. + +### Running beam_makeops for the compiler ### + +Give the option `-compiler` to produce output files for the compiler. +The following files will be written to the output directory: + +* `beam_opcodes.erl` - Used primarily by `beam_asm` and `beam_diasm`. + +* `beam_opcode.hrl` - Used by `beam_asm`. It contains tag definitions +used for encoding instruction operands. + +The input file should only contain the definition of BEAM_FORMAT_NUMBER +and external generic instructions. (Everything else would be ignored.) + +### Running beam_makeops for the emulator ### + +Give the option `-emulator` to produce output files for the emulator. +The following output files will be generated in the output directory. + +* `beam_hot.h`, `beam_warm.h`, `beam_cold.`h - Implementation of +instructions. Included inside the `process_main()` function in +`beam_emu.c`. + +* `beam_opcodes.c` - Defines static data used by the loader +(`beam_load.c`). Data about generic instructions, specific +instructions (including how to pack their operands), and +transformation rules are all part of this file. + +* `beam_opcodes.h` - Miscellanous preprocessor definitions, mainly +used by `beam_load.c` but also by `beam_{hot,warm,cold}.h`. + +* `beam_pred_funcs.h` - Included by `beam_load.c`. Contains defines +needed to call guard constraints in transformation rules. + +* `beam_tr_funcs.h` - Included by `beam_load.c`. Contains defines +needed to call a C function to the right of a transformation rule. + +The following options can be given: + +* `wordsize 32|64` - Defines the word size. Default is 32. + +* `code-model Model` - The code model as given to `-mcmodel` option +for GCC. Default is `unknown`. If the code model is `small` (and +the word size is 64 bits), **beam\_makeops** will pack operands +into the upper 32 bits of the instruction word. + +* `DSymbol=0|1` - Defines the value for a symbol. The symbol can be +used in `%if` and `%unless` directives. + +Syntax of .tab files +-------------------- + +### Comments ### + +Any line starting with `#` is a comment and is ignored. + +A line with `//` is also a comment. It is recommended to only +use this style of comments in files that define implementations of +instructions. + +A long line can be broken into shorter lines by a placing a`\` before +the newline. + +### Variable definitions ### + +A variable definition binds a variable to a Perl variable. It is only +meaningful to add a new definition if **beam\_makeops** is updated +at the same time to use the variable. A variable definition looks this: + +*name*=*value*[;] + +where *name* is the name of a Perl variable in **beam\_makeops**, +and *value* is the value to be given to the variable. The line +can optionally end with a `;` (to avoid messing up the +C indentation mode in Emacs). + +Here follows a description of the variables that are defined. + +#### BEAM\_FORMAT\_NUMBER #### + +`genop.tab` has the following definition: + + BEAM_FORMAT_NUMBER=0 + +It defines the version of the instruction set (which will be +included in the code header in the BEAM code). Theoretically, +the version could be bumped, and all instructions changed. +In practice, we would have two support two instruction sets +in the runtime system for at least two releases, so it will +probably never happen in practice. + +#### GC\_REGEXP #### + +In `macros.tab`, there is a definition of `GC_REGEXP`. +It will be described in [a later section](#the-gc_regexp-definition). + +### Directives ### + +There are directives to classify specific instructions depending +on how frequently used they are: + +* `%hot` - Implementation will be placed in `beam_hot.h`. Frequently +executed instructions. + +* `%warm` - Implementation will be placed in `beam_warm.h`. Binary +syntax instructions. + +* `%cold` - Implementation will be placed in `beam_cold.h`. Trace +instructions and infrequently used instructions. + +Default is `%hot`. The directives will be applied to declarations +of the specific instruction that follow. Here is an example: + + %cold + is_number f? xy + %hot + +#### Conditional compilation directives #### + +The `%if` directive includes a range of lines if a condition is +true. For example: + + %if ARCH_64 + i_bs_get_integer_32 x f? x + %endif + +The specific instruction `i_bs_get_integer_32` will only be defined +on a 64-bit machine. + +The condition can be inverted by using `%unless` instead of `%if`: + + %unless NO_FPE_SIGNALS + fcheckerror p => i_fcheckerror + i_fcheckerror + fclearerror + %endif + +It is also possible to add an `%else` clause: + + %if ARCH_64 + BS_SAFE_MUL(A, B, Fail, Dst) { + Uint64 res = ($A) * ($B); + if (res / $B != $A) { + $Fail; + } + $Dst = res; + } + %else + BS_SAFE_MUL(A, B, Fail, Dst) { + Uint64 res = (Uint64)($A) * (Uint64)($B); + if ((res >> (8*sizeof(Uint))) != 0) { + $Fail; + } + $Dst = res; + } + %endif + +#### Symbols that are defined in directives #### + +The following symbols are always defined. + +* `ARCH_64` - is 1 for a 64-bit machine, and 0 otherwise. +* `ARCH_32` - is 1 for 32-bit machine, and 1 otherwise. + +The `Makefile` for building the emulator currently defines the +following symbols by using the `-D` option on the command line for +**beam\_makeops**. + +* `NO_FPE_SIGNALS` - 1 if FPE signals are not enable in runtime system, +0 otherwise. +* `USE_VM_PROBES` - 1 if the runtime system is compiled to use VM probes (support for dtrace or systemtap), 0 otherwise. + +### Defining external generic instructions ### + +External generic BEAM instructions are known to both the compiler and +the runtime system. They remain stable between releases. A new major +release may add more external generic instructions, but must not change +the semantics for a previously defined instruction. + +The syntax for an external generic instruction is as follows: + +*opcode*: [-]*name*/*arity* + +*opcode* is an integer greater than or equal to 1. + +*name* is an identifier starting with a lowercase letter. *arity* is +an integer denoting the number of operands. + +*name* can optionally be preceded by `-` to indicate that it has been +obsoleted. The compiler is not allowed to generate BEAM files that +use obsolete instructions and the loader will refuse to load BEAM +files that use obsolete instructions. + +It only makes sense to define external generic instructions in the +file `genop.tab` in `lib/compiler/src`, because the compiler must +know about them in order to use them. + +New instructions must be added at the end of the file, with higher +numbers than the previous instructions. + +### Defining internal generic instructions ### + +Internal generic instructions are known only to the runtime +system and can be changed at any time without compatibility issues. + +There are two ways to define internal generic instructions: + +* Implicitly when a specific instruction is defined. This is by far +the most common way. Whenever a specific instruction is created, +**beam\_makeops** automatically creates an internal generic instruction +if it does not previously exist. + +* Explicitly. This is necessary only when a generic instruction does +not have any corresponding specific instruction. + +The syntax for an internal generic instruction is as follows: + +*name*/*arity* + +*name* is an identifier starting with a lowercase letter. *arity* is +an integer denoting the number of operands. + +### About generic instructions in general ### + +Each generic instruction has an opcode. The opcode is an integer, +greater than or equal to 1. For an external generic instruction, it +must be explicitly given `genop.tab`, while internal generic +instructions are automatically numbered by **beam\_makeops**. + +The identity of a generic instruction is its name combined with its +arity. That means that it is allowed to define two distinct generic +instructions having the same name but with different arities. For +example: + + move_window/5 + move_window/6 + +Each operand of a generic instruction is tagged with its type. A generic +instruction can have one of the following types: + +* `x` - X register. + +* `y` - Y register. + +* `l` - Floating point register number. + +* `i` - Tagged literal integer. + +* `a` - Tagged literal atom. + +* `n` - NIL (`[]`, the empty list). + +* `q` - Literal that don't fit in a word, that is an object stored on +the heap such as a list or tuple. Any heap object type is supported, +even types that don't have real literals such as external references. + +* `f` - Non-zero failure label. + +* `p` - Zero failure label. + +* `u` - Untagged integer that fits in a machine word. It is used for many +different purposes, such as the number of live registers in `test_heap/2`, +as a reference to the export for `call_ext/2`, and as the flags operand for +binary syntax instructions. When the generic instruction is translated to a +specific instruction, the type for the operand in the specific operation will +tell the loader how to treat the operand. + +* `o` - Overflow. If the value for an `u` operand does not fit in a machine +word, the type of the operand will be changed to `o` (with no associated +value). Currently only used internally in the loader in the guard constraint +function `binary_too_big()`. + +* `v` - Arity value. Only used internally in the loader. + + +### Defining specific instructions ### + +The specific instructions are known only to the runtime system and +are the instructions that are actually executed. They can be changed +at any time without causing compatibility issues. + +A specific instruction can have at most 6 operands. + +A specific instruction is defined by first giving its name followed by +the types for each operand. For example: + + move x y + +Internally, for example in the generated code and in the output from +the BEAM disassembler, the instruction `move x y` will be called `move_xy`. + +The name for a specific instruction is an identifier starting with a +lowercase letter. A type is an lowercase or uppercase letter. + +All specific instructions with a given name must have the same number +of operands. That is, the following is **not** allowed: + + move x x + move x y x y + +Here follows the type letters that more or less directly corresponds +to the types for generic instructions. + +* `x` - X register. Will be loaded as a byte offset to the X register +relative to the base of X register array. (Can be packed with other +operands.) + +* `y` - Y register. Will be loaded as a byte offset to the Y register +relative to the stack frame. (Can be packed with other operands.) + +* `r` - X register 0. An implicit operand that will not be stored in +the loaded code. + +* `l` - Floating point register number. (Can be packed with other +operands.) + +* `i` - Tagged literal integer (a SMALL that will fit in one word). + +* `a` - Tagged atom. + +* `n` - NIL or the empty list. (Will not be stored in the loaded code.) + +* `q` - Tagged CONS or BOXED pointer. That is, a term such as a list +or tuple. Any heap object type is supported, even types that don't +have real literals such as external references. + +* `f` - Failure label (non-zero). The target for a branch +or call instruction. + +* `p` - The 0 failure label, meaning that an exception should be raised +if the instruction fails. (Will not be stored in the loaded code.) + +* `c` - Any literal term; that is, immediate literals such as SMALL, +and CONS or BOXED pointers to literals. (Can be used where the +operand in the generic instruction has one of the types `i`, `a`, `n`, +or `q`.) + +The types that follow do a type test of the operand at runtime; thus, +they are generally more expensive in terms of runtime than the types +described earlier. However, those operand types are needed to avoid a +combinatorial explosion in the number of specific instructions and +overall code size of `process_main()`. + +* `s` - Tagged source: X register, Y register, or a literal term. The +tag will be tested at runtime to retrieve the value from an X +register, a Y register, or simply use the value as a tagged Erlang +term. (Implementation note: An X register is tagged as a pid, and a Y +register as a port. Therefore the literal term must not contain a +port or pid.) + +* `S` - Tagged source register (X or Y). The tag will be tested at +runtime to retrieve the value from an X register or a Y register. Slighly +cheaper than `s`. + +* `d` - Tagged destination register (X or Y). The tag will be tested +at runtime to set up a pointer to the destination register. If the +instrution performs a garbarge collection, it must use the +`$REFRESH_GEN_DEST()` macro to refresh the pointer before storing to +it (there are more details about that in a later section). + +* `j` - A failure label (combination of `f` and `p`). If the branch target 0, +an exception will be raised if instruction fails, otherwise control will be +transfered to the target address. + +The types that follows are all applied to an operand that has the `u` +type. + +* `t` - An untagged integer that will fit in 12 bits (0-4096). It can be +packed with other operands in a word. Most often used as the number +of live registers in instructions such as `test_heap`. + +* `I` - An untagged integer that will fit in 32 bits. It can be +packed with other operands in a word on a 64-bit system. + +* `W` - Untagged integer or pointer. Not possible to pack with other +operands. + +* `e` - Pointer to an export entry. Use by call instructions that call +other modules, such as `call_ext`. + +* `L` - A label. Only used by the `label/1` instruction. + +* `b` - Pointer to BIF. Used by instructions that BIFs, such as +`call_bif`. + +* `A` - A tagged arityvalue. Used in instructions that test the arity +of a tuple. + +* `P` - A byte offset into a tuple. + +* `Q` - A byte offset into the stack. Used for updating the frame +pointer register. Can be packed with other operands. + +When the loader translates a generic instruction a specific +instruction, it will choose the most specific instruction that will +fit the types. Consider the following two instructions: + + move c x + move n x + +The `c` operand can encode any literal value, including NIL. The +`n` operand only works for NIL. If we have the generic instruction +`{move,nil,{x,1}}`, the loader will translate it to `move_nx 1` +because `move n x` is more specific. `move_nx` could be slightly +faster or smaller (depending on the architecture), because the `[]` +is not stored explicitly as an operand. + +#### Syntactic sugar for specific instructions #### + +It is possible to specify more than one type letter for each operand. +Here is an example: + + move cxy xy + +This is syntactic sugar for: + + move c x + move c y + move x x + move x y + move y x + move y y + +Note the difference between `move c xy` and `move c d`. Note that `move c xy` +is equivalent to the following two definitions: + + move c x + move c y + +On the other hand, `move c d` is a single instruction. At runtime, +the `d` operand will be tested to see whether it refers to an X +register or a Y register, and a pointer to the register will be set +up. + +#### The '?' type modifier #### + +The character `?` can be added to the end of an operand to indicate +that the operand will not be used every time the instruction is executed. +For example: + + allocate_heap t I t? + is_eq_exact f? x xy + +In `allocate_heap`, the last operand is the number of live registers. +It will only be used if there is not enough heap space and a garbage +collection must be performed. + +In `is_eq_exact`, the failure address (the first operand) will only be +used if the two register operands are not equal. + +Knowing that an operand is not always used can improve how packing +is done for some instructions. + +For the `allocate_heap` instruction, without the `?` the packing would +be done like this: + + +--------------------+--------------------+ + I -> | Stack needed | &&lb_allocate_heap + + +--------------------+--------------------+ + | Heap needed | Live registers + + +--------------------+--------------------+ + +"Stack needed" and "Heap needed" are always used, but they are in +different words. Thus, at runtime the `allocate_heap` instruction +must read both words from memory even though it will not always use +"Live registers". + +With the `?`, the operands will be packed like this: + + +--------------------+--------------------+ + I -> | Live registers | &&lb_allocate_heap + + +--------------------+--------------------+ + | Heap needed | Stack needed + + +--------------------+--------------------+ + +Now "Stack needed" and "Heap needed" are in the same word. + +### Defining transformation rules ### + +Transformation rules are used to rewrite generic instructions to other +generic instructions. The transformations rules are applied +repeatedly until no rule match. At that point, the first instruction +in the resulting instruction sequence will be converted to a specific +instruction and added to the code for the module being loaded. Then +the transformation rules for the remaining instructions are run in the +same way. + +A rule is recognized by its right-pointer arrow: `=>`. To the left of +the arrow is one or more instruction patterns, separated by `|`. To +the right of the arrow is zero or more instructions, separated by `|`. +If the instructions from the BEAM code matches the instruction +patterns on the left side, they will be replaced with instructions on +the right side (or removed if there are no instructions on the right). + +#### Defining instruction patterns #### + +We will start looking at the patterns on the left side of the arrow. + +A pattern for an instruction consists of its name, followed by a pattern +for each of its operands. The operand patterns are separated by spaces. + +The simplest possible pattern is a variable. Just like in Erlang, +a variable must begin with an uppercase letter. If the same variable is +used in multiple operands, the pattern will only match if the operands +are equal. For example: + + move Same Same => + +This pattern will match if the operands for `move` are the same. If +the pattern match, the instruction will be removed. (That used to be an +actual rule a long time ago when the compiler would occasionally produce +instructions such as `{move,{x,2},{x,2}}`.) + +Variables that have been bound on the left side can be used on the +right side. For example, this rule will rewrite all `move` instructions +to `assign` instructions with the operands swapped: + + move Src Dst => assign Dst Src + +If we only want to match operands of a certain type, we can +use a type constraint. A type constraint consists of one or more +lowercase letters, each specifying a type. For example: + + is_integer Fail an => jump Fail + +The second operand pattern, `an`, will match if the second operand is +either an atom or NIL (the empty list). In case of a match, the +`is_integer/2` instruction will be replaced with a `jump/1` +instruction. + +An operand pattern can bind a variable and constrain the type at the +same time by following the variable with a `=` and the constraint. +For example: + + is_eq_exact Fail=f R=xy C=q => i_is_eq_exact_literal Fail R C + +Here the `is_eq_exact` instruction is replaced with a specialized instruction +that only compares literals, but only if the first operand is a register and +the second operand is a literal. + +#### Further constraining patterns #### + +In addition to specifying a type letter, the actual value for the type can +be specified. For example: + + move C=c x==1 => move_x1 C + +Here the second operand of `move` is constrained to be X register 1. + +When specifying an atom constraint, the atom is written as it would be +in the C source code. That is, it needs an `am_` prefix, and it must +be listed in `atom.names`. For example: + + is_boolean Fail=f a==am_true => + is_boolean Fail=f a==am_false => + +There are several constraints available for testing whether a call is to a BIF +or a function. + +The constraint `u$is_bif` will test whether the given operand refers to a BIF. +For example: + + call_ext u Bif=u$is_bif => call_bif Bif + call_ext u Func => i_call_ext Func + +The `call_ext` instruction can be used to call functions written in +Erlang as well as BIFs (or more properly called SNIFs). The +`u$is_bif` constraint will match if the operand refers to a BIF (that +is, if it is listed in the file `bif.tab`). Note that `u$is_bif` +should only be applied to operands that are known to contain an index +to the import table chunk in the BEAM file (such operands have the +type `b` or `e` in the corresponding specific instruction). If +applied to other `u` operands, it will at best return a nonsense +result. + +The `u$is_not_bif` constraint matches if the operand does not refer to +a BIF (not listed in `bif.tab`). For example: + + move S X0=x==0 | line Loc | call_ext_last Ar Func=u$is_not_bif D => \ + move S X0 | call_ext_last Ar Func D + +The `u$bif:Module:Name/Arity` constraint tests whether the given +operand refers to a specific BIF. Note that `Module:Name/Arity` +**must** be an existing BIF defined in `bif.tab`, or there will +be a compilation error. It is useful when a call to a specific BIF +should be replaced with an instruction as in this example: + + gc_bif2 Fail Live u$bif:erlang:splus/2 S1 S2 Dst => \ + gen_plus Fail Live S1 S2 Dst + +Here the call to the GC BIF `'+'/2` will be replaced with the instruction +`gen_plus/5`. Note that the same name as used in the C source code must be +used for the BIF, which in this case is `splus`. It is defined like this +in `bit.tab`: + + ubif erlang:'+'/2 splus_2 + +The `u$func:Module:Name/Arity` will test whether the given operand is a +a specific function. Here is an example: + + bif1 Fail u$func:erlang:is_constant/1 Src Dst => too_old_compiler + +`is_constant/1` used to be a BIF a long time ago. The transformation +replaces the call with the `too_old_compiler` instruction which will produce +a nicer error message than the default error would be for a missing guard BIF. + +#### Type constraints allowed in patterns #### + +Here are all type letters that are allowed on the left side of a transformation +rule. + +* `u` - An untagged integer that fits in a machine word. + +* `x` - X register. + +* `y` - Y register. + +* `l` - Floating point register number. + +* `i` - Tagged literal integer. + +* `a` - Tagged literal atom. + +* `n` - NIL (`[]`, the empty list). + +* `q` - Literals that don't fit in a word, such as list or tuples. + +* `f` - Non-zero failure label. + +* `p` - The zero failure label. + +* `j` - Any label. Equivalent to `fp`. + +* `c` - Any literal term. Equivalent to `ainq`. + +* `s` - X register, Y register, or any literal term. Equivalent to `xyc`. + +* `d` - X or Y register. Equivalent to `xy`. (In a pattern `d` will +match both source and destination registers. As an operand in a specific +instruction, it must only be used for a destination register.) + +* `o` - Overflow. An untagged integer that does not fit in a machine word. + +#### Guard constraints #### + +If the constraints described so far is not enough, additional +constraints can be written in C in `beam_load.c` and be called as a +guard function on the left side of the transformation. If the guard +function returns a non-zero value, the matching of the rule will +continue, otherwise the match will fail. For example: + + ensure_map Lit=q | literal_is_map(Lit) => + +The guard test `literal_is_map/1` tests whether the given literal is a map. +If the literal is a map, the instruction is unnecessary and can be removed. + +It is outside the scope for this document to describe in detail how such +guard functions are written, but for the curious here is the implementation +of `literal_is_map()`: + + static int + literal_is_map(LoaderState* stp, GenOpArg Lit) + { + Eterm term; + + ASSERT(Lit.type == TAG_q); + term = stp->literals[Lit.val].term; + return is_map(term); + } + +#### Handling instruction with variable number of operands #### + +Some instructions, such as `select_val/3`, essentially has a variable +number of operands. Such instructions have a `{list,[...]}` operand +as their last operand in the BEAM assembly code. For example: + + {select_val,{x,0}, + {f,1}, + {list,[{atom,b},{f,4},{atom,a},{f,5}]}}. + +The loader will convert a `{list,[...]}` operand to an `u` operand whose +value is the number of elements in the list, followed by each element in +the list. The instruction above would be translated to the following +generic instruction: + + {select_val,{x,0},{f,1},{u,4},{atom,b},{f,4},{atom,a},{f,5}} + +To match a variable number of arguments we need to use the special +operand type `*` like this: + + select_val Src=aiq Fail=f Size=u List=* => \ + i_const_select_val Src Fail Size List + +This transformation renames a `select_val/3` instruction +with a constant source operand to `i_const_select_val/3`. + +#### Constructing new instructions on the right side #### + +The most common operand on the right side is a variable that was bound while +matching the left side. For example: + + trim N Remaining => i_trim N + +An operand can also be a type letter to construct an operand of that type. +Each type has a default value. For example, the type `x` has the default +value 1023, which is the highest X register. That makes `x` on the right +side a convenient shortcut for a temporary X register. For example: + + is_number Fail Literal=q => move Literal x | is_number Fail x + +If the second operand for `is_number/2` is a literal, it will be moved to +X register 1023. Then `is_number/2` will test whether the value stored in +X register 1023 is a number. + +This kind of transformation is useful when it is rare that an operand can +be anything else but a register. In the case of `is_number/2`, the second +operand is always a register unless the compiler optimizations have been +disabled. + +If the default value is not suitable, the type letter can be followed +by `=` and a value. Most types take an integer value. The value for +an atom is written the same way as in the C source code. For example, +the atom `false` is written as `am_false`. The atom must be listed in +`atom.names`. + +Here is an example showing how values can be specified: + + bs_put_utf32 Fail=j Flags=u Src=s => \ + i_bs_validate_unicode Fail Src | \ + bs_put_integer Fail i=32 u=1 Flags Src + +#### Type letters on the right side #### + +Here follows all types that are allowed to be used in operands for +instructions being constructed on the right side of a transformation +rule. + +* `u` - Construct an untagged integer. The default value is 0. + +* `x` - X register. The default value is 1023. That makes `x` convenient to +use as a temporary X register. + +* `y` - Y register. The default value is 0. + +* `l` - Foating point register number. The default value is 0. + +* `i` - Tagged literal integer. The default value is 0. + +* `a` - Tagged atom. The default value is the empty atom (`am_Empty`). + +* `n` - NIL (`[]`, the empty list). + +#### Function call on the right side #### + +Transformations that are not possible to describe with the rule +language as described here can be written as a C function in +`beam_load.c` and called from the right side of a transformation. The +left side of the transformation will perform the match and bind +operands to variables. The variables can then be passed to a +generator function on the right side. For example: + + bif2 Fail=j u$bif:erlang:element/2 Index=s Tuple=xy Dst=d => \ + gen_element(Jump, Index, Tuple, Dst) + +This transformation rule matches a call to the BIF `element/2`. +The operands will be captured and the function `gen_element()` will +be called. + +`gen_element()` will produce one of two instructions depending +on `Index`. If `Index` is an integer in the range from 1 up to +the maximum tuple size, the instruction `i_fast_element/2` will +be produced, otherwise the instruction `i_element/4` will be +produced. The corresponding specific instructions are: + + i_fast_element xy j? I d + i_element xy j? s d + +The `i_fast_element/2` instruction is faster because the tuple is +already an untagged integer. It also knows that the index is at least +1, so it does not have to test for that. The `i_element/4` +instruction will have to fetch the index from a register, test that it +is an integer, and untag the integer. + +It is outside the scope of this document to describe in detail how +generator functions are written, but for the curious, here is the +implementation of `gen_element()`: + + static GenOp* + gen_element(LoaderState* stp, GenOpArg Fail, + GenOpArg Index, GenOpArg Tuple, GenOpArg Dst) + { + GenOp* op; + + NEW_GENOP(stp, op); + op->arity = 4; + op->next = NULL; + + if (Index.type == TAG_i && Index.val > 0 && + Index.val <= ERTS_MAX_TUPLE_SIZE && + (Tuple.type == TAG_x || Tuple.type == TAG_y)) { + op->op = genop_i_fast_element_4; + op->a[0] = Tuple; + op->a[1] = Fail; + op->a[2].type = TAG_u; + op->a[2].val = Index.val; + op->a[3] = Dst; + } else { + op->op = genop_i_element_4; + op->a[0] = Tuple; + op->a[1] = Fail; + op->a[2] = Index; + op->a[3] = Dst; + } + + return op; + } +} + +### Defining the implementation ### + +The actual implementation of instructions are also defined in `.tab` +files processed by **beam\_makeops**. For practical reasons, +instruction definitions are stored in several files, at the time of +writing in the following files: + + bif_instrs.tab + arith_instrs.tab + bs_instrs.tab + float_instrs.tab + instrs.tab + map_instrs.tab + msg_instrs.tab + select_instrs.tab + trace_instrs.tab + +There is also a file that only contains macro definitions: + + macros.tab + +The syntax of each file is similar to C code. In fact, most of +the contents *is* C code, interspersed with macro invocations. + +To allow Emacs to auto-indent the code, each file starts with the +following line: + + // -*- c -*- + +To avoid messing up the indentation, all comments are written +as C++ style comments (`//`) instead of `#`. Note that a comment +must start at the beginning of a line. + +The meat of an instruction definition file are macro definitions. +We have seen this macro definition before: + + move(Src, Dst) { + $Dst = $Src; + } + +A macro definitions must start at the beginning of the line (no spaces +allowed), the opening curly bracket must be on the same line, and the +finishing curly bracket must be at the beginning of a line. It is +recommended that the macro body is properly indented. + +As a convention, the macro arguments in the head all start with an +uppercase letter. In the body, the macro arguments can be expanded +by preceding them with `$`. + +A macro definition whose name and arity matches a family of +specific instructions is assumed to be the implementation of that +instruction. + +A macro can also be invoked from within another macro. For example, +`move_deallocate_return/2` avoids repeating code by invoking +`$deallocate_return()` as a macro: + + move_deallocate_return(Src, Deallocate) { + x(0) = $Src; + $deallocate_return($Deallocate); + } + +Here is the definition of `deallocate_return/1`: + + deallocate_return(Deallocate) { + //| -no_next + int words_to_pop = $Deallocate; + SET_I((BeamInstr *) cp_val(*E)); + E = ADD_BYTE_OFFSET(E, words_to_pop); + CHECK_TERM(x(0)); + DispatchReturn; + } + +The expanded code for `move_deallocate_return` will look this: + + OpCase(move_deallocate_return_cQ): + { + x(0) = I[1]; + do { + int words_to_pop = Qb(BeamExtraData(I[0])); + SET_I((BeamInstr *) cp_val(*E)); + E = ADD_BYTE_OFFSET(E, words_to_pop); + CHECK_TERM(x(0)); + DispatchReturn; + } while (0); + } + +When expanding macros, **beam\_makeops** wraps the expansion in a +`do`/`while` wrapper unless **beam\_makeops** can clearly see that no +wrapper is needed. In this case, the wrapper is needed. + +Note that arguments for macros cannot be complex expressions, because +the arguments are split on `,`. For example, the following would +not work because **beam\_makeops** would split the expression into +two arguments: + + $deallocate_return(get_deallocation(y, $Deallocate)); + +#### Code generation directives #### + +Within macro definitions, `//` comments are in general not treated +specially. They will be copied to the file with the generated code +along with the rest of code in the body. + +However, there is an exception. Within a macro definition, a line that +starts with whitespace followed by `//|` is treated specially. The +rest of the line is assumed to contain directives to control code +generation. + +Currently, two code generation directives are recognized: + +* `-no_prefetch` +* `-no_next` + +##### The -no_prefetch directive ##### + +To see what `-no_prefetch` does, let's first look at the default code +generation. Here is the code generated for `move_cx`: + + OpCase(move_cx): + { + BeamInstr next_pf = BeamCodeAddr(I[2]); + xb(BeamExtraData(I[0])) = I[1]; + I += 2; + ASSERT(VALID_INSTR(next_pf)); + GotoPF(next_pf); + } + +Note that the very first thing done is to fetch the address to the +next instruction. The reason is that it usually improves performance. + +Just as a demonstration, we can add a `-no_prefetch` directive to +the `move/2` instruction: + + move(Src, Dst) { + //| -no_prefetch + $Dst = $Src; + } + +We can see that the prefetch is no longer done: + + OpCase(move_cx): + { + xb(BeamExtraData(I[0])) = I[1]; + I += 2; + ASSERT(VALID_INSTR(*I)); + Goto(*I); + } + +When would we want to turn off the prefetch in practice? + +In instructions that will not always execute the next instruction. +For example: + + is_atom(Fail, Src) { + if (is_not_atom($Src)) { + $FAIL($Fail); + } + } + + // From macros.tab + FAIL(Fail) { + //| -no_prefetch + $SET_I_REL($Fail); + Goto(*I); + } + +`is_atom/2` may either execute the next instruction (if the second +operand is an atom) or branch to the failure label. + +The generated code looks like this: + + OpCase(is_atom_fx): + { + if (is_not_atom(xb(I[1]))) { + ASSERT(VALID_INSTR(*(I + (fb(BeamExtraData(I[0]))) + 0))); + I += fb(BeamExtraData(I[0])) + 0;; + Goto(*I);; + } + I += 2; + ASSERT(VALID_INSTR(*I)); + Goto(*I); + } + +##### The -no_next directive ##### + +Next we will look at when the `-no_next` directive can be used. Here +is the `jump/1` instruction: + + jump(Fail) { + $JUMP($Fail); + } + + // From macros.tab + JUMP(Fail) { + //| -no_next + $SET_I_REL($Fail); + Goto(*I); + } + +The generated code looks like this: + + OpCase(jump_f): + { + ASSERT(VALID_INSTR(*(I + (fb(BeamExtraData(I[0]))) + 0))); + I += fb(BeamExtraData(I[0])) + 0;; + Goto(*I);; + } + +If we remove the `-no_next` directive, the code would look like this: + + OpCase(jump_f): + { + BeamInstr next_pf = BeamCodeAddr(I[1]); + ASSERT(VALID_INSTR(*(I + (fb(BeamExtraData(I[0]))) + 0))); + I += fb(BeamExtraData(I[0])) + 0;; + Goto(*I);; + I += 1; + ASSERT(VALID_INSTR(next_pf)); + GotoPF(next_pf); + } + +In the end, the C compiler will probably optimize this code to the +same native code as the first version, but the first version is certainly +much easier to read for human readers. + +#### Macros in the macros.tab file #### + +The file `macros.tab` contains many useful macros. When implementing +new instructions it is good practice to look through `macros.tab` to +see if any of existing macros can be used rather than re-inventing +the wheel. + +We will describe a few of the most useful macros here. + +##### The GC_REGEXP definition ##### + +The following line defines a regular expression that will recognize +a call to a function that does a garbage collection: + + GC_REGEXP=erts_garbage_collect|erts_gc|GcBifFunction; + +The purpose is that **beam\_makeops** can verify that an instruction +that does a garbage collection and has an `d` operand uses the +`$REFRESH_GEN_DEST()` macro. + +If you need to define a new function that does garbage collection, +you should give it the prefix `erts_gc_`. If that is not possible +you should update the regular expression so that it will match your +new function. + +##### FAIL(Fail) ##### + +Branch to `$Fail`. Will suppress prefetch (`-no_prefetch`). Typical use: + + is_nonempty_list(Fail, Src) { + if (is_not_list($Src)) { + $FAIL($Fail); + } + } + +##### JUMP(Fail) ##### + +Branch to `$Fail`. Suppresses generation of dispatch of the next +instruction (`-no_next`). Typical use: + + jump(Fail) { + $JUMP($Fail); + } + +##### GC_TEST(NeedStack, NeedHeap, Live) ##### + +`$GC_TEST(NeedStack, NeedHeap, Live)` tests that given amount of +stack space and heap space is available. If not it will do a +garbage collection. Typical use: + + test_heap(Nh, Live) { + $GC_TEST(0, $Nh, $Live); + } + +##### AH(NeedStack, NeedHeap, Live) ##### + +`AH(NeedStack, NeedHeap, Live)` allocates a stack frame and +optionally additional heap space. + +#### Pre-defined macros and variables #### + +**beam\_makeops** defines several built-in macros and pre-bound variables. + +##### The NEXT_INSTRUCTION pre-bound variable ##### + +The NEXT_INSTRUCTION is a pre-bound variable that is available in +all instructions. It expands to the address of the next instruction. + +Here is an example: + + i_call(CallDest) { + SET_CP(c_p, $NEXT_INSTRUCTION); + $DISPATCH_REL($CallDest); + } + +When calling a function, the return address is first stored in `c_p->cp` +(using the `SET_CP()` macro defined in `beam_emu.c`), and then control is +transferred to the callee. Here is the generated code: + + OpCase(i_call_f): + { + SET_CP(c_p, I+1); + ASSERT(VALID_INSTR(*(I + (fb(BeamExtraData(I[0]))) + 0))); + I += fb(BeamExtraData(I[0])) + 0;; + DTRACE_LOCAL_CALL(c_p, erts_code_to_codemfa(I)); + Dispatch();; + } + +We can see that that `$NEXT_INSTRUCTION` has been expanded to `I+1`. +That makes sense since the size of the `i_call_f/1` instruction is +one word. + +##### The IP_ADJUSTMENT pre-bound variable ##### + +`$IP_ADJUSTMENT` is usually 0. In a few combined instructions +(described below) it can be non-zero. It is used like this +in `macros.tab`: + + SET_I_REL(Offset) { + ASSERT(VALID_INSTR(*(I + ($Offset) + $IP_ADJUSTMENT))); + I += $Offset + $IP_ADJUSTMENT; + } + +Avoid using `IP_ADJUSTMENT` directly. Use `SET_I_REL()` or +one of the macros that invoke such as `FAIL()` or `JUMP()` +defined in `macros.tab`. + +#### Pre-defined macro functions #### + +##### The IF() macro ##### + +`$IF(Expr, IfTrue, IfFalse)` evaluates `Expr`, which must be a valid +Perl expression (which for simple numeric expressions have the same +syntax as C). If `Expr` evaluates to 0, the entire `IF()` expression will be +replaced with `IfFalse`, otherwise it will be replaced with `IfTrue`. + +See the description of `OPERAND_POSITION()` for an example. + +##### The OPERAND\_POSITION() macro ##### + +`$OPERAND_POSITION(Expr)` returns the position for `Expr`, if +`Expr` is an operand that is not packed. The first operand is +at position 1. + +Returns 0 otherwise. + +This macro could be used like this in order to share code: + + FAIL(Fail) { + //| -no_prefetch + $IF($OPERAND_POSITION($Fail) == 1 && $IP_ADJUSTMENT == 0, + goto common_jump, + $DO_JUMP($Fail)); + } + + DO_JUMP(Fail) { + $SET_I_REL($Fail); + Goto(*I)); + } + + // In beam_emu.c: + common_jump: + I += I[1]; + Goto(*I)); + + +#### The $REFRESH\_GEN\_DEST() macro #### + +When a specific instruction has a `d` operand, early during execution +of the instruction, a pointer will be initialized to point to the X or +Y register in question. + +If there is a garbage collection before the result is stored, +the stack will move and if the `d` operand refered to a Y +register, the pointer will no longer be valid. (Y registers are +stored on the stack.) + +In those circumstances, `$REFRESH_GEN_DEST()` must be invoked +to set up the pointer again. **beam\_makeops** will notice +if there is a call to a function that does a garbage collection and +`$REFRESH_GEN_DEST()` is not called. + +Here is a complete example. The `new_map` instruction is defined +like this: + + new_map d t I + +It is implemented like this: + + new_map(Dst, Live, N) { + Eterm res; + + HEAVY_SWAPOUT; + res = erts_gc_new_map(c_p, reg, $Live, $N, $NEXT_INSTRUCTION); + HEAVY_SWAPIN; + $REFRESH_GEN_DEST(); + $Dst = res; + $NEXT($NEXT_INSTRUCTION+$N); + } + +If we have forgotten the `$REFRESH_GEN_DEST()` there would be a message +similar to this: + + pointer to destination register is invalid after GC -- use $REFRESH_GEN_DEST() + ... from the body of new_map at beam/map_instrs.tab(30) + +#### Combined instructions #### + +**Problem**: For frequently executed instructions we want to use +"fast" operands types such as `x` and `y`, as opposed to `s` or `S`. +To avoid an explosion in code size, we want to share most of the +implementation between the instructions. Here are the specific +instructions for `i_increment/5`: + + i_increment r W t d + i_increment x W t d + i_increment y W t d + +The `i_increment` instruction is implemented like this: + + i_increment(Source, IncrementVal, Live, Dst) { + Eterm increment_reg_source = $Source; + Eterm increment_val = $IncrementVal; + Uint live; + Eterm result; + + if (ERTS_LIKELY(is_small(increment_reg_val))) { + Sint i = signed_val(increment_reg_val) + increment_val; + if (ERTS_LIKELY(IS_SSMALL(i))) { + $Dst = make_small(i); + $NEXT0(); + } + } + live = $Live; + HEAVY_SWAPOUT; + reg[live] = increment_reg_val; + reg[live+1] = make_small(increment_val); + result = erts_gc_mixed_plus(c_p, reg, live); + HEAVY_SWAPIN; + ERTS_HOLE_CHECK(c_p); + if (ERTS_LIKELY(is_value(result))) { + $REFRESH_GEN_DEST(); + $Dst = result; + $NEXT0(); + } + ASSERT(c_p->freason != BADMATCH || is_value(c_p->fvalue)); + goto find_func_info; + } + +There will be three almost identical copies of the code. Given the +size of the code, that could be too high cost to pay. + +To avoid the three copies of the code, we could use only one specific +instruction: + + i_increment S W t d + +(The same implementation as above will work.) + +That reduces the code size, but is slower because `S` means that +there will be extra code to test whether the operand refers to an X +register or a Y register. + +**Solution**: We can use "combined instructions". Combined +instructions are combined from instruction fragments. The +bulk of the code can be shared. + +Here we will show how `i_increment` can be implemented as a combined +instruction. We will show each individual fragment first, and then +show how to connect them together. First we will need a variable that +we can store the value fetched from the register in: + + increment.head() { + Eterm increment_reg_val; + } + +The name `increment` is the name of the group that the fragment +belongs to. Note that it does not need to have the same +name as the instruction. The group name is followed by `.` and +the name of the fragment. The name `head` is pre-defined. +The code in it will be placed at the beginning of a block, so +that all fragments in the group can access it. + +Next we define the fragment that will pick up the value from the +register from the first operand: + + increment.fetch(Src) { + increment_reg_val = $Src; + } + +We call this fragment `fetch`. This fragment will be duplicated three +times, one for each value of the first operand (`r`, `x`, and `y`). + +Next we define the main part of the code that do the actual incrementing. + + increment.execute(IncrementVal, Live, Dst) { + Eterm increment_val = $IncrementVal; + Uint live; + Eterm result; + + if (ERTS_LIKELY(is_small(increment_reg_val))) { + Sint i = signed_val(increment_reg_val) + increment_val; + if (ERTS_LIKELY(IS_SSMALL(i))) { + $Dst = make_small(i); + $NEXT0(); + } + } + live = $Live; + HEAVY_SWAPOUT; + reg[live] = increment_reg_val; + reg[live+1] = make_small(increment_val); + result = erts_gc_mixed_plus(c_p, reg, live); + HEAVY_SWAPIN; + ERTS_HOLE_CHECK(c_p); + if (ERTS_LIKELY(is_value(result))) { + $REFRESH_GEN_DEST(); + $Dst = result; + $NEXT0(); + } + ASSERT(c_p->freason != BADMATCH || is_value(c_p->fvalue)); + goto find_func_info; + } + +We call this fragment `execute`. It will handle the three remaining +operands (`W t d`). There will only be one copy of this fragment. + +Now that we have defined the fragments, we need to inform +**beam\_makeops** how they should be connected: + + i_increment := increment.fetch.execute; + +To the left of the `:=` is the name of the specific instruction that +should be implemented by the fragments, in this case `i_increment`. +To the right of `:=` is the name of the group with the fragments, +followed by a `.`. Then the name of the fragments in the group are +listed in the order they should be executed. Note that the `head` +fragment is not listed. + +The line ends in `;` (to avoid messing up the indentation in Emacs). + +(Note that in practice the `:=` line is usually placed before the +fragments.) + +The generated code looks like this: + + { + Eterm increment_reg_val; + OpCase(i_increment_rWtd): + { + increment_reg_val = r(0); + } + goto increment__execute; + + OpCase(i_increment_xWtd): + { + increment_reg_val = xb(BeamExtraData(I[0])); + } + goto increment__execute; + + OpCase(i_increment_yWtd): + { + increment_reg_val = yb(BeamExtraData(I[0])); + } + goto increment__execute; + + increment__execute: + { + // Here follows the code from increment.execute() + . + . + . + } + +##### Some notes about combined instructions ##### + +The operands that are different must be at +the beginning of the instruction. All operands in the last +fragment must have the same operands in all variants of +the specific instruction. + +As an example, the following specific instructions cannot be +implemented as a combined instruction: + + i_times j? t x x d + i_times j? t x y d + i_times j? t s s d + +We would have to change the order of the operands so that the +two operands that are different are placed first: + + i_times x x j? t d + i_times x y j? t d + i_times s s j? t d + +We can then define: + + i_times := times.fetch.execute; + + times.head { + Eterm op1, op2; + } + + times.fetch(Src1, Src2) { + op1 = $Src1; + op2 = $Src2; + } + + times.execute(Fail, Live, Dst) { + // Multiply op1 and op2. + . + . + . + } + +Several instructions can share a group. As an example, the following +instructions have different names, but in the end they all create a +binary. The last two operands are common for all of them: + + i_bs_init_fail xy j? t? x + i_bs_init_fail_heap s I j? t? x + i_bs_init W t? x + i_bs_init_heap W I t? x + +The instructions are defined like this (formatted with extra +spaces for clarity): + + i_bs_init_fail_heap := bs_init . fail_heap . verify . execute; + i_bs_init_fail := bs_init . fail . verify . execute; + i_bs_init := bs_init . . plain . execute; + i_bs_init_heap := bs_init . heap . execute; + +Note that the first two instruction have three fragments, while the +other two only have two fragments. Here are the fragments: + + bs_init_bits.head() { + Eterm num_bits_term; + Uint num_bits; + Uint alloc; + } + + bs_init_bits.plain(NumBits) { + num_bits = $NumBits; + alloc = 0; + } + + bs_init_bits.heap(NumBits, Alloc) { + num_bits = $NumBits; + alloc = $Alloc; + } + + bs_init_bits.fail(NumBitsTerm) { + num_bits_term = $NumBitsTerm; + alloc = 0; + } + + bs_init_bits.fail_heap(NumBitsTerm, Alloc) { + num_bits_term = $NumBitsTerm; + alloc = $Alloc; + } + + bs_init_bits.verify(Fail) { + // Verify the num_bits_term, fail using $FAIL + // if there is a problem. + . + . + . + } + + bs_init_bits.execute(Live, Dst) { + // Long complicated code to a create a binary. + . + . + . + } + +The full definitions of those instructions can be found in `bs_instrs.tab`. +The generated code can be found in `beam_warm.h`. diff --git a/erts/emulator/nifs/common/zlib_nif.c b/erts/emulator/nifs/common/zlib_nif.c index fa29b4fb71..b709ed5a6f 100644 --- a/erts/emulator/nifs/common/zlib_nif.c +++ b/erts/emulator/nifs/common/zlib_nif.c @@ -717,7 +717,9 @@ static ERL_NIF_TERM zlib_deflateEnd(ErlNifEnv *env, int argc, const ERL_NIF_TERM static ERL_NIF_TERM zlib_deflateParams(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) { zlib_data_t *d; + int res, level, strategy; + Bytef dummy_buffer; if(argc != 3 || !get_zlib_data(env, argv[0], &d) || !enif_get_int(env, argv[1], &level) @@ -729,12 +731,27 @@ static ERL_NIF_TERM zlib_deflateParams(ErlNifEnv *env, int argc, const ERL_NIF_T return enif_raise_exception(env, am_not_initialized); } - /* deflateParams will flush everything currently in the stream, corrupting - * the heap unless it's empty. We therefore pretend to have a full output - * buffer, forcing a Z_BUF_ERROR if there's anything left to be flushed. */ - d->s.avail_out = 0; + /* This is a bit of a hack; deflateParams flushes with Z_BLOCK which won't + * stop at a byte boundary, so we can't split this operation up, and we + * can't allocate a buffer large enough to fit it in one go since we have + * to support zlib versions that lack deflatePending. + * + * We therefore flush everything prior to this call to ensure that we are + * stopped on a byte boundary and have no pending data. We then hand it a + * dummy buffer to detect when this assumption doesn't hold (Hopefully + * never), and to smooth over an issue with zlib 1.2.11 which always + * returns Z_BUF_ERROR when d->s.avail_out is 0, regardless of whether + * there's any pending data or not. */ + + d->s.next_out = &dummy_buffer; + d->s.avail_out = 1; + res = deflateParams(&d->s, level, strategy); + if(d->s.avail_out == 0) { + return zlib_return(env, Z_STREAM_ERROR); + } + return zlib_return(env, res); } @@ -929,7 +946,7 @@ static ERL_NIF_TERM zlib_inflate(ErlNifEnv *env, int argc, const ERL_NIF_TERM ar return enif_raise_exception(env, am_not_initialized); } - if(d->eos_seen) { + if(d->eos_seen && enif_ioq_size(d->input_queue) > 0) { int res; switch(d->eos_behavior) { @@ -943,11 +960,10 @@ static ERL_NIF_TERM zlib_inflate(ErlNifEnv *env, int argc, const ERL_NIF_TERM ar } d->eos_seen = 0; + break; case EOS_BEHAVIOR_CUT: zlib_reset_input(d); - - return enif_make_tuple2(env, am_finished, enif_make_list(env, 0)); } } diff --git a/erts/emulator/test/distribution_SUITE.erl b/erts/emulator/test/distribution_SUITE.erl index 2d0ae9c83e..e2914cbc92 100644 --- a/erts/emulator/test/distribution_SUITE.erl +++ b/erts/emulator/test/distribution_SUITE.erl @@ -1365,81 +1365,59 @@ bad_dist_structure(Config) when is_list(Config) -> start_monitor(Offender,P), P ! one, send_bad_structure(Offender, P,{?DOP_MONITOR_P_EXIT,'replace',P,normal},2), - pong = rpc:call(Victim, net_adm, ping, [Offender]), + start_monitor(Offender,P), send_bad_structure(Offender, P,{?DOP_MONITOR_P_EXIT,'replace',P,normal,normal},2), - pong = rpc:call(Victim, net_adm, ping, [Offender]), + start_link(Offender,P), send_bad_structure(Offender, P,{?DOP_LINK},0), - pong = rpc:call(Victim, net_adm, ping, [Offender]), + start_link(Offender,P), send_bad_structure(Offender, P,{?DOP_UNLINK,'replace'},2), - pong = rpc:call(Victim, net_adm, ping, [Offender]), + start_link(Offender,P), send_bad_structure(Offender, P,{?DOP_UNLINK,'replace',make_ref()},2), - pong = rpc:call(Victim, net_adm, ping, [Offender]), + start_link(Offender,P), send_bad_structure(Offender, P,{?DOP_UNLINK,make_ref(),P},0), - pong = rpc:call(Victim, net_adm, ping, [Offender]), + start_link(Offender,P), send_bad_structure(Offender, P,{?DOP_UNLINK,normal,normal},0), - pong = rpc:call(Victim, net_adm, ping, [Offender]), + start_monitor(Offender,P), send_bad_structure(Offender, P,{?DOP_MONITOR_P,'replace',P},2), - pong = rpc:call(Victim, net_adm, ping, [Offender]), + start_monitor(Offender,P), send_bad_structure(Offender, P,{?DOP_MONITOR_P,'replace',P,normal},2), - pong = rpc:call(Victim, net_adm, ping, [Offender]), + start_monitor(Offender,P), send_bad_structure(Offender, P,{?DOP_DEMONITOR_P,'replace',P},2), - pong = rpc:call(Victim, net_adm, ping, [Offender]), + start_monitor(Offender,P), send_bad_structure(Offender, P,{?DOP_DEMONITOR_P,'replace',P,normal},2), - pong = rpc:call(Victim, net_adm, ping, [Offender]), + send_bad_structure(Offender, P,{?DOP_EXIT,'replace',P},2), - pong = rpc:call(Victim, net_adm, ping, [Offender]), send_bad_structure(Offender, P,{?DOP_EXIT,make_ref(),normal,normal},0), - pong = rpc:call(Victim, net_adm, ping, [Offender]), send_bad_structure(Offender, P,{?DOP_EXIT_TT,'replace',token,P},2), - pong = rpc:call(Victim, net_adm, ping, [Offender]), send_bad_structure(Offender, P,{?DOP_EXIT_TT,make_ref(),token,normal,normal},0), - pong = rpc:call(Victim, net_adm, ping, [Offender]), send_bad_structure(Offender, P,{?DOP_EXIT2,'replace',P},2), - pong = rpc:call(Victim, net_adm, ping, [Offender]), send_bad_structure(Offender, P,{?DOP_EXIT2,make_ref(),normal,normal},0), - pong = rpc:call(Victim, net_adm, ping, [Offender]), send_bad_structure(Offender, P,{?DOP_EXIT2_TT,'replace',token,P},2), - pong = rpc:call(Victim, net_adm, ping, [Offender]), send_bad_structure(Offender, P,{?DOP_EXIT2_TT,make_ref(),token,normal,normal},0), - pong = rpc:call(Victim, net_adm, ping, [Offender]), send_bad_structure(Offender, P,{?DOP_GROUP_LEADER,'replace'},2), - pong = rpc:call(Victim, net_adm, ping, [Offender]), send_bad_structure(Offender, P,{?DOP_GROUP_LEADER,'replace','atomic'},2), - pong = rpc:call(Victim, net_adm, ping, [Offender]), send_bad_structure(Offender, P,{?DOP_GROUP_LEADER,'replace',P},0), - pong = rpc:call(Victim, net_adm, ping, [Offender]), send_bad_structure(Offender, P,{?DOP_REG_SEND_TT,'replace','',name},2,{message}), - pong = rpc:call(Victim, net_adm, ping, [Offender]), send_bad_structure(Offender, P,{?DOP_REG_SEND_TT,'replace','',name,token},0,{message}), - pong = rpc:call(Victim, net_adm, ping, [Offender]), send_bad_structure(Offender, P,{?DOP_REG_SEND,'replace',''},2,{message}), - pong = rpc:call(Victim, net_adm, ping, [Offender]), send_bad_structure(Offender, P,{?DOP_REG_SEND,'replace','',P},0,{message}), - pong = rpc:call(Victim, net_adm, ping, [Offender]), send_bad_structure(Offender, P,{?DOP_REG_SEND,'replace','',name},0,{message}), - pong = rpc:call(Victim, net_adm, ping, [Offender]), send_bad_structure(Offender, P,{?DOP_REG_SEND,'replace','',name,{token}},2,{message}), - pong = rpc:call(Victim, net_adm, ping, [Offender]), send_bad_structure(Offender, P,{?DOP_SEND_TT,'',P},0,{message}), - pong = rpc:call(Victim, net_adm, ping, [Offender]), send_bad_structure(Offender, P,{?DOP_SEND_TT,'',name,token},0,{message}), - pong = rpc:call(Victim, net_adm, ping, [Offender]), send_bad_structure(Offender, P,{?DOP_SEND,''},0,{message}), - pong = rpc:call(Victim, net_adm, ping, [Offender]), send_bad_structure(Offender, P,{?DOP_SEND,'',name},0,{message}), - pong = rpc:call(Victim, net_adm, ping, [Offender]), send_bad_structure(Offender, P,{?DOP_SEND,'',P,{token}},0,{message}), - pong = rpc:call(Victim, net_adm, ping, [Offender]), P ! two, P ! check_msgs, receive @@ -1685,13 +1663,16 @@ bad_dist_ext_size(Config) when is_list(Config) -> start_node_monitors([Offender,Victim]), Parent = self(), - P = spawn_link(Victim, + P = spawn_opt(Victim, fun () -> Parent ! {self(), started}, receive check_msgs -> ok end, %% DID CRASH HERE bad_dist_ext_check_msgs([one]), Parent ! {self(), messages_checked} - end), + end, + [link, + %% on_heap to force total_heap_size to inspect msg queue + {message_queue_data, on_heap}]), receive {P, started} -> ok end, P ! one, @@ -1714,6 +1695,7 @@ bad_dist_ext_size(Config) when is_list(Config) -> verify_still_up(Offender, Victim), + %% Let process_info(P, total_heap_size) find bad msg and disconnect rpc:call(Victim, erlang, process_info, [P, total_heap_size]), verify_down(Offender, connection_closed, Victim, killed), @@ -1795,10 +1777,11 @@ send_bad_structure(Offender,Victim,Bad,WhereToPutSelf) -> send_bad_structure(Offender,Victim,Bad,WhereToPutSelf,PayLoad) -> Parent = self(), Done = make_ref(), - spawn(Offender, + spawn_link(Offender, fun () -> Node = node(Victim), pong = net_adm:ping(Node), + erlang:monitor_node(Node, true), DCtrl = dctrl(Node), Bad1 = case WhereToPutSelf of 0 -> @@ -1812,7 +1795,16 @@ send_bad_structure(Offender,Victim,Bad,WhereToPutSelf,PayLoad) -> [] -> []; _Other -> [dmsg_ext(PayLoad)] end, + + receive {nodedown, Node} -> exit("premature nodedown") + after 10 -> ok + end, + dctrl_send(DCtrl, DData), + + receive {nodedown, Node} -> ok + after 5000 -> exit("missing nodedown") + end, Parent ! {DData,Done} end), receive diff --git a/erts/emulator/test/process_SUITE.erl b/erts/emulator/test/process_SUITE.erl index a9f20f9928..a8bcfac84d 100644 --- a/erts/emulator/test/process_SUITE.erl +++ b/erts/emulator/test/process_SUITE.erl @@ -2532,8 +2532,13 @@ system_task_on_suspended(Config) when is_list(Config) -> end. gc_request_when_gc_disabled(Config) when is_list(Config) -> - Master = self(), AIS = erts_debug:set_internal_state(available_internal_state, true), + gc_request_when_gc_disabled_do(ref), + gc_request_when_gc_disabled_do(immed), + erts_debug:set_internal_state(available_internal_state, AIS). + +gc_request_when_gc_disabled_do(ReqIdType) -> + Master = self(), {P, M} = spawn_opt(fun () -> true = erts_debug:set_internal_state(gc_state, false), @@ -2545,7 +2550,10 @@ gc_request_when_gc_disabled(Config) when is_list(Config) -> receive after 100 -> ok end end, [monitor, link]), receive {P, gc_state, false} -> ok end, - ReqId = make_ref(), + ReqId = case ReqIdType of + ref -> make_ref(); + immed -> immed + end, async = garbage_collect(P, [{async, ReqId}]), receive {garbage_collect, ReqId, Result} -> @@ -2554,7 +2562,6 @@ gc_request_when_gc_disabled(Config) when is_list(Config) -> ok end, receive {garbage_collect, ReqId, true} -> ok end, - erts_debug:set_internal_state(available_internal_state, AIS), receive {'DOWN', M, process, P, _Reason} -> ok end, ok. diff --git a/erts/emulator/utils/beam_makeops b/erts/emulator/utils/beam_makeops index d7791d23fa..da994fae3e 100755 --- a/erts/emulator/utils/beam_makeops +++ b/erts/emulator/utils/beam_makeops @@ -19,7 +19,7 @@ # %CopyrightEnd% # use strict; -use vars qw($BEAM_FORMAT_NUMBER); +use vars qw($BEAM_FORMAT_NUMBER $GC_REGEXP); use constant COLD => 0; use constant WARM => 1; use constant HOT => 2; @@ -36,6 +36,7 @@ use constant PACK_CMD_LOOSE => '3'; use constant PACK_CMD_WIDE => '4'; $BEAM_FORMAT_NUMBER = undef; +$GC_REGEXP = undef; my $target = \&emulator_output; my $outdir = "."; # Directory for output files. @@ -77,6 +78,10 @@ my %num_specific; my %gen_to_spec; my %specific_op; +# The following hashes are used for error checking. +my %print_name; +my %specific_op_arity; + # Information about each specific operator. Key is the print name (e.g. get_list_xxy). # Value is a hash. my %spec_op_info; @@ -131,7 +136,10 @@ my $loader_types = "nprvlqo"; my $genop_types = $compiler_types . $loader_types; # -# Defines the argument types and their loaded size assuming no packing. +# Define the operand types and their loaded size assuming no packing. +# +# Those are the types that can be used in the definition of a specific +# instruction. # my %arg_size = ('r' => 0, # x(0) - x register zero 'x' => 1, # x(N), N > 0 - x register @@ -154,12 +162,35 @@ my %arg_size = ('r' => 0, # x(0) - x register zero 'A' => 1, # arity value 'P' => 1, # byte offset into tuple or stack 'Q' => 1, # like 'P', but packable - 'h' => 1, # character + 'h' => 1, # character (not used) 'l' => 1, # float reg 'q' => 1, # literal term ); # +# Define the types that may be used in a transformation rule. +# +# %pattern_type defines the types that may be used in a pattern +# on the left side. +# +# %construction_type defines the types that may be used when +# constructing a new instruction on the right side (a subset of +# the pattern types that are possible to construct). +# +my $pattern_types = "acdfjilnopqsuxy"; +my %pattern_type; +@pattern_type{split("", $pattern_types)} = (1) x length($pattern_types); + +my %construction_type; +foreach my $type (keys %pattern_type) { + $construction_type{$type} = 1 + if index($genop_types, $type) >= 0; +} +foreach my $makes_no_sense ('f', 'j', 'o', 'p', 'q') { + delete $construction_type{$makes_no_sense}; +} + +# # Generate bits. # my %type_bit; @@ -194,7 +225,8 @@ sub define_type_bit { define_type_bit('S', $type_bit{'d'}); define_type_bit('j', $type_bit{'f'} | $type_bit{'p'}); - # Aliases (for matching purposes). + # Aliases of 'u'. Those specify how to load the operand and + # what kind of packing can be done. define_type_bit('t', $type_bit{'u'}); define_type_bit('I', $type_bit{'u'}); define_type_bit('W', $type_bit{'u'}); @@ -279,9 +311,15 @@ if ($wordsize == 64) { # Add placeholders for built-in macros. # -$c_code{'IS_PACKED'} = ['$Expr',"built-in macro",('Expr')]; -$c_code{'ARG_POSITION'} = ['$Expr',"built-in macro",('Expr')]; -foreach my $name (keys %c_code) { +my %predef_macros = + (OPERAND_POSITION => ['Expr'], + IF => ['Expr','IfTrue','IfFalse'], + REFRESH_GEN_DEST => [], + ); +foreach my $name (keys %predef_macros) { + my @args = @{$predef_macros{$name}}; + my $body = join(':', map { '$' . $_ } @args); + $c_code{$name} = [$body,"built-in macro",@args], $c_code_used{$name} = 1; } @@ -359,8 +397,10 @@ while (<>) { # if (/^([\w_][\w\d_]+)=(.*)/) { no strict 'refs'; - my($name) = $1; - $$name = $2; + my $name = $1; + my $value = $2; + $value =~ s/;\s*$//; + $$name = $value; next; } @@ -1019,6 +1059,22 @@ sub parse_specific_op { my $key = "$name/$arity"; foreach my $args_ref (@res) { @args = @$args_ref; + my $arity = @args; + my $loc = "$ARGV($.)"; + if (defined $specific_op_arity{$name}) { + my($prev_arity,$loc) = @{$specific_op_arity{$name}}; + if ($arity != $prev_arity) { + error("$name defined with arity $arity, " . + "but previously defined with arity $prev_arity at $loc"); + } + } + $specific_op_arity{$name} = [$arity,$loc]; + my $print_name = print_name($name, @args); + if (defined $print_name{$print_name}) { + error("$name @args: already defined at " . + $print_name{$print_name}); + } + $print_name{$print_name} = $loc; push @{$specific_op{$key}}, [$name,$hotness,@args]; } @@ -1333,7 +1389,9 @@ sub cg_basic { # sub cg_combined_size { - my %params = (@_, pack_options => \@basic_pack_options); + my %params = (@_, + pack_options => \@basic_pack_options, + size_only => 1); $params{pack_options} = \@extended_pack_options if $params{first}; my($size) = code_gen(%params); @@ -1361,6 +1419,7 @@ sub code_gen { my %params = (extra_comments => '', offset => 0, inc => 0, + size_only => 0, @_); my $name = $params{name}; my $extra_comments = $params{extra_comments}; @@ -1393,6 +1452,7 @@ sub code_gen { my $need_block = 0; my $arg_offset = $offset; + my $has_gen_dest = 0; @args = map { s/[?]$//g; $_ } @args; foreach (@args) { my($this_size) = $arg_size{$_}; @@ -1403,6 +1463,7 @@ sub code_gen { "Eterm* dst_ptr = REG_TARGET_PTR(dst);\n"; push(@f, "*dst_ptr"); $this_size = $1; + $has_gen_dest = 1; last SWITCH; }; /^packed:[a-zA-z]:(\d):(.*)/ and do { @@ -1435,6 +1496,7 @@ sub code_gen { $var_decls .= "Eterm dst = " . arg_offset($arg_offset) . ";\n" . "Eterm* dst_ptr = REG_TARGET_PTR(dst);\n"; push(@f, "*dst_ptr"); + $has_gen_dest = 1; last SWITCH; }; defined $arg_size{$_} and do { @@ -1449,10 +1511,10 @@ sub code_gen { } # - # If the implementation is in beam_emu.c, there is nothing - # more to do. + # If the implementation is in beam_emu.c or if + # the caller only wants the size, we are done. # - unless (defined $c_code_ref) { + if (not defined $c_code_ref or $params{size_only}) { return ($size+1, undef, ''); } @@ -1517,9 +1579,36 @@ sub code_gen { "{", "$var_decls$body", "}", ""); + + # Make sure that $REFRESH_GEN_DEST() is used when a + # general destination ('d') may have been clobbered by + # a GC. + my $gc_error = verify_gc_code($code, $has_gen_dest); + if (defined $gc_error) { + warn $gc_error; + error("... from the body of $name at $where"); + } + + # Done. ($size+1, $code, $pack_spec); } +sub verify_gc_code { + my $code = shift; + my $has_gen_dest = shift; + + return unless $has_gen_dest; + + if ($code =~ /$GC_REGEXP/o) { + my $code_after_gc = substr($code, $+[0]); + unless ($code_after_gc =~ /dst_ptr = REG_TARGET_PTR/) { + return "pointer to destination register is invalid after GC -- " . + "use \$REFRESH_GEN_DEST()\n"; + } + } + return undef; +} + sub arg_offset { my $offset = shift; "I[" . ($offset+1) . "]"; @@ -1619,17 +1708,26 @@ sub expand_macro { } # Handle built-in macros. - if ($name eq 'ARG_POSITION') { + if ($name eq 'OPERAND_POSITION') { if ($body =~ /^I\[(\d+)\]$/) { $body = $1; } else { $body = 0; } - } elsif ($name eq 'IS_PACKED') { - $body = ($body =~ /^I\[\d+\]$/) ? 0 : 1; + } elsif ($name eq 'IF') { + my $expr = $new_bindings{Expr}; + my $bool = eval $expr; + if ($@ ne '') { + &error("bad expression '$expr' in \$IF()"); + } + my $part = $bool ? 'IfTrue' : 'IfFalse'; + $body = $new_bindings{$part}; + } elsif ($name eq 'REFRESH_GEN_DEST') { + $body = "dst_ptr = REG_TARGET_PTR(dst)"; } - # Wrap body if needed and return resul.t + + # Wrap body if needed and return result. $body = "do {\n$body\n} while (0)" if needs_do_wrapper($body); ($body,$rest); @@ -2156,12 +2254,19 @@ sub tr_parse_op { if (/^([a-z*]+)(.*)/) { $type = $1; $_ = $2; + error("$type: only a single type is allowed on right side of transformations") + if not $src and length($type) > 1; foreach (split('', $type)) { - error("bad type in $op") - unless defined $type_bit{$_} or $type eq '*'; - $_ eq 'r' and - error("$op: 'r' is not allowed in transformations") - } + next if $src and $type eq '*'; + error("$op: not a type") + unless defined $type_bit{$_}; + error("$op: the type '$_' is not allowed in transformations") + unless defined $pattern_type{$_}; + if (not $src) { + error("$op: type '$_' is not allowed on the right side of transformations") + unless defined $construction_type{$_}; + } + } } # Get an optional condition. (In source.) @@ -2194,10 +2299,18 @@ sub tr_parse_op { } # Get an optional value. (In destination.) - $type_val = $type eq 'x' ? 1023 : 0; + if ($type eq 'x') { + $type_val = 1023; + } elsif ($type eq 'a') { + $type_val = 'am_Empty'; + } else { + $type_val = 0; + } if (/^=(.*)/) { - error("value not allowed in source: $op") + error("$op: value not allowed in source") if $src; + error("$op: the type 'n' must not be given a value") + if $type eq 'n'; $type_val = $1; $_ = ''; } @@ -2207,13 +2320,16 @@ sub tr_parse_op { error("garbage '$_' after operand: $op") unless /^\s*$/; - # Test that destination has no conditions. + # Check the conditions. - unless ($src) { - error("condition not allowed in destination: $op") + if ($src) { + error("$op: the type '$type' is not allowed to be compared with a literal value") + if $cond and not $construction_type{$type}; + } else { + error("$op: condition not allowed in destination") if $cond; - error("variable name and type cannot be combined in destination: $op") - if $var && $type; + error("$op: variable name and type cannot be combined in destination") + if $var and $type; } ($var,$type,$type_val,$cond,$cond_val); |