diff options
Diffstat (limited to 'erts/emulator/beam')
47 files changed, 2167 insertions, 514 deletions
diff --git a/erts/emulator/beam/arith_instrs.tab b/erts/emulator/beam/arith_instrs.tab index 5f23b2c168..f14b376419 100644 --- a/erts/emulator/beam/arith_instrs.tab +++ b/erts/emulator/beam/arith_instrs.tab @@ -51,11 +51,50 @@ plus.fetch(Op1, Op2) { plus.execute(Fail, Dst) { if (ERTS_LIKELY(is_both_small(PlusOp1, PlusOp2))) { +#ifdef HAVE_OVERFLOW_CHECK_BUILTINS + Sint lhs_tagged, rhs_untagged, res; + + /* The value part of immediate integers start right after the tag and + * occupy the rest of the word, so if you squint a bit they look like + * fixed-point integers; as long as you mask the tag away you will get + * correct results from addition/subtraction since they share the same + * notion of zero. It's fairly easy to see that the following holds + * when (a + b) is in range: + * + * (a >> s) + (b >> s) == ((a & ~m) + (b & ~m)) >> s + * + * Where 's' is the tag size and 'm' is the tag mask. + * + * The left-hand side is our fallback in the #else clause and is the + * fastest way to do this safely in plain C. The actual addition will + * never overflow since `Sint` has a much greater range than our + * smalls, so we can use the IS_SSMALL macro to see if the result is + * within range. + * + * What we're doing below is an extension of the right-hand side. By + * treating `a` and `b` as fixed-point integers, all additions whose + * result is out of range will also overflow `Sint` and we can use the + * compiler's overflow intrinsics to check for this condition. + * + * In addition, since the tag lives in the lowest bits we can further + * optimize this by only stripping the tag from either side. The higher + * bits can't influence the tag bits since we bail on overflow, so the + * tag bits from the tagged side will simply appear in the result. */ + lhs_tagged = PlusOp1; + rhs_untagged = PlusOp2 & ~_TAG_IMMED1_MASK; + + if (ERTS_LIKELY(!__builtin_add_overflow(lhs_tagged, rhs_untagged, &res))) { + ASSERT(is_small(res)); + $Dst = res; + $NEXT0(); + } +#else Sint i = signed_val(PlusOp1) + signed_val(PlusOp2); if (ERTS_LIKELY(IS_SSMALL(i))) { $Dst = make_small(i); $NEXT0(); } +#endif } $OUTLINED_ARITH_2($Fail, mixed_plus, BIF_splus_2, PlusOp1, PlusOp2, $Dst); } @@ -73,11 +112,26 @@ minus.fetch(Op1, Op2) { minus.execute(Fail, Dst) { if (ERTS_LIKELY(is_both_small(MinusOp1, MinusOp2))) { +#ifdef HAVE_OVERFLOW_CHECK_BUILTINS + Sint lhs_tagged, rhs_untagged, res; + + /* See plus.execute */ + lhs_tagged = MinusOp1; + rhs_untagged = MinusOp2 & ~_TAG_IMMED1_MASK; + + if (ERTS_LIKELY(!__builtin_sub_overflow(lhs_tagged, rhs_untagged, &res))) { + ASSERT(is_small(res)); + $Dst = res; + $NEXT0(); + } +#else Sint i = signed_val(MinusOp1) - signed_val(MinusOp2); + if (ERTS_LIKELY(IS_SSMALL(i))) { $Dst = make_small(i); $NEXT0(); } +#endif } $OUTLINED_ARITH_2($Fail, mixed_minus, BIF_sminus_2, MinusOp1, MinusOp2, $Dst); } @@ -97,12 +151,27 @@ increment.execute(IncrementVal, Dst) { Eterm result; if (ERTS_LIKELY(is_small(increment_reg_val))) { +#ifdef HAVE_OVERFLOW_CHECK_BUILTINS + Sint lhs_tagged, rhs_untagged, res; + + /* See plus.execute */ + lhs_tagged = increment_reg_val; + rhs_untagged = (Sint)increment_val << _TAG_IMMED1_SIZE; + + if (ERTS_LIKELY(!__builtin_add_overflow(lhs_tagged, rhs_untagged, &res))) { + ASSERT(is_small(res)); + $Dst = res; + $NEXT0(); + } +#else Sint i = signed_val(increment_reg_val) + increment_val; if (ERTS_LIKELY(IS_SSMALL(i))) { $Dst = make_small(i); $NEXT0(); } +#endif } + result = erts_mixed_plus(c_p, increment_reg_val, make_small(increment_val)); ERTS_HOLE_CHECK(c_p); if (ERTS_LIKELY(is_value(result))) { @@ -118,11 +187,15 @@ i_times(Fail, Op1, Op2, Dst) { Eterm op2 = $Op2; #ifdef HAVE_OVERFLOW_CHECK_BUILTINS if (ERTS_LIKELY(is_both_small(op1, op2))) { - Sint a = signed_val(op1); - Sint b = signed_val(op2); - Sint res; - if (ERTS_LIKELY(!__builtin_mul_overflow(a, b, &res) && IS_SSMALL(res))) { - $Dst = make_small(res); + /* See plus.execute */ + Sint lhs_untagged, rhs_actual, res; + + lhs_untagged = op1 & ~_TAG_IMMED1_MASK; + rhs_actual = signed_val(op2); + + if (ERTS_LIKELY(!__builtin_mul_overflow(lhs_untagged, rhs_actual, &res))) { + ASSERT(!(res & _TAG_IMMED1_MASK)); + $Dst = res | _TAG_IMMED1_SMALL; $NEXT0(); } } diff --git a/erts/emulator/beam/beam_debug.c b/erts/emulator/beam/beam_debug.c index 762c5da9be..4d52435139 100644 --- a/erts/emulator/beam/beam_debug.c +++ b/erts/emulator/beam/beam_debug.c @@ -353,6 +353,22 @@ erts_debug_disassemble_1(BIF_ALIST_1) return TUPLE3(hp, addr, bin, mfa); } +BIF_RETTYPE +erts_debug_interpreter_size_0(BIF_ALIST_0) +{ + int i; + BeamInstr low, high; + + low = high = (BeamInstr) process_main; + for (i = 0; i < NUM_SPECIFIC_OPS; i++) { + BeamInstr a = BeamOpCodeAddr(i); + if (a > high) { + high = a; + } + } + return erts_make_integer(high - low, BIF_P); +} + void dbg_bt(Process* p, Eterm* sp) { diff --git a/erts/emulator/beam/beam_emu.c b/erts/emulator/beam/beam_emu.c index ea01ce597d..bae64afb97 100644 --- a/erts/emulator/beam/beam_emu.c +++ b/erts/emulator/beam/beam_emu.c @@ -375,44 +375,33 @@ do { \ /* * process_main() is already huge, so we want to avoid inlining - * into it. Especially functions that are seldom used. + * seldom used functions into it. */ -#ifdef __GNUC__ -# define NOINLINE __attribute__((__noinline__)) -#else -# define NOINLINE -#endif - - -/* - * The following functions are called directly by process_main(). - * Don't inline them. - */ -static void init_emulator_finish(void) NOINLINE; -static ErtsCodeMFA *ubif2mfa(void* uf) NOINLINE; +static void init_emulator_finish(void) ERTS_NOINLINE; +static ErtsCodeMFA *ubif2mfa(void* uf) ERTS_NOINLINE; static BeamInstr* handle_error(Process* c_p, BeamInstr* pc, - Eterm* reg, ErtsCodeMFA* bif_mfa) NOINLINE; + Eterm* reg, ErtsCodeMFA* bif_mfa) ERTS_NOINLINE; static BeamInstr* call_error_handler(Process* p, ErtsCodeMFA* mfa, - Eterm* reg, Eterm func) NOINLINE; + Eterm* reg, Eterm func) ERTS_NOINLINE; static BeamInstr* fixed_apply(Process* p, Eterm* reg, Uint arity, - BeamInstr *I, Uint offs) NOINLINE; + BeamInstr *I, Uint offs) ERTS_NOINLINE; static BeamInstr* apply(Process* p, Eterm* reg, - BeamInstr *I, Uint offs) NOINLINE; + BeamInstr *I, Uint offs) ERTS_NOINLINE; static BeamInstr* call_fun(Process* p, int arity, - Eterm* reg, Eterm args) NOINLINE; + Eterm* reg, Eterm args) ERTS_NOINLINE; static BeamInstr* apply_fun(Process* p, Eterm fun, - Eterm args, Eterm* reg) NOINLINE; + Eterm args, Eterm* reg) ERTS_NOINLINE; static Eterm new_fun(Process* p, Eterm* reg, - ErlFunEntry* fe, int num_free) NOINLINE; + ErlFunEntry* fe, int num_free) ERTS_NOINLINE; static int is_function2(Eterm Term, Uint arity); static Eterm erts_gc_new_map(Process* p, Eterm* reg, Uint live, - Uint n, BeamInstr* ptr) NOINLINE; + Uint n, BeamInstr* ptr) ERTS_NOINLINE; static Eterm erts_gc_new_small_map_lit(Process* p, Eterm* reg, Eterm keys_literal, - Uint live, BeamInstr* ptr) NOINLINE; + Uint live, BeamInstr* ptr) ERTS_NOINLINE; static Eterm erts_gc_update_map_assoc(Process* p, Eterm* reg, Uint live, - Uint n, BeamInstr* new_p) NOINLINE; + Uint n, BeamInstr* new_p) ERTS_NOINLINE; static Eterm erts_gc_update_map_exact(Process* p, Eterm* reg, Uint live, - Uint n, Eterm* new_p) NOINLINE; + Uint n, Eterm* new_p) ERTS_NOINLINE; static Eterm get_map_element(Eterm map, Eterm key); static Eterm get_map_element_hash(Eterm map, Eterm key, Uint32 hx); @@ -3283,7 +3272,7 @@ erts_current_reductions(Process *c_p, Process *p) } else { reds_left = c_p->fcalls; } - return REDS_IN(c_p) - reds_left; + return REDS_IN(c_p) - reds_left - erts_proc_sched_data(p)->virtual_reds; } int diff --git a/erts/emulator/beam/bif.c b/erts/emulator/beam/bif.c index d0e2d9afc2..b81056c774 100644 --- a/erts/emulator/beam/bif.c +++ b/erts/emulator/beam/bif.c @@ -1915,7 +1915,7 @@ do_send(Process *p, Eterm to, Eterm msg, Eterm return_term, Eterm *refp, erts_dsprintf_buf_t *dsbufp = erts_create_logger_dsbuf(); erts_dsprintf(dsbufp, "Discarding message %T from %T to %T in an old " - "incarnation (%u) of this node (%u)\n", + "incarnation (%d) of this node (%d)\n", msg, p->common.id, to, @@ -1959,7 +1959,7 @@ do_send(Process *p, Eterm to, Eterm msg, Eterm return_term, Eterm *refp, erts_dsprintf_buf_t *dsbufp = erts_create_logger_dsbuf(); erts_dsprintf(dsbufp, "Discarding message %T from %T to %T in an old " - "incarnation (%u) of this node (%u)\n", + "incarnation (%d) of this node (%d)\n", msg, p->common.id, to, diff --git a/erts/emulator/beam/bif.tab b/erts/emulator/beam/bif.tab index 34a0be4f2d..db9c258cb7 100644 --- a/erts/emulator/beam/bif.tab +++ b/erts/emulator/beam/bif.tab @@ -437,6 +437,7 @@ bif erts_debug:set_internal_state/2 bif erts_debug:display/1 bif erts_debug:dist_ext_to_term/2 bif erts_debug:instructions/0 +bif erts_debug:interpreter_size/0 bif erts_debug:dirty_cpu/2 bif erts_debug:dirty_io/2 bif erts_debug:dirty/3 diff --git a/erts/emulator/beam/big.c b/erts/emulator/beam/big.c index 7666f23a4f..522f50287a 100644 --- a/erts/emulator/beam/big.c +++ b/erts/emulator/beam/big.c @@ -2176,24 +2176,6 @@ term_to_Uint64(Eterm term, Uint64 *up) #endif } -int -term_to_Uint32(Eterm term, Uint32 *up) -{ -#if ERTS_SIZEOF_ETERM == 4 - return term_to_Uint(term,up); -#else - if (is_small(term)) { - Sint i = signed_val(term); - if (i >= 0) { - *up = (Uint32) i; - return 1; - } - } - *up = BADARG; - return 0; -#endif -} - int term_to_Sint(Eterm term, Sint *sp) { diff --git a/erts/emulator/beam/big.h b/erts/emulator/beam/big.h index 3fed076419..ad19cce395 100644 --- a/erts/emulator/beam/big.h +++ b/erts/emulator/beam/big.h @@ -168,8 +168,6 @@ Eterm erts_uint64_array_to_big(Uint **, int, int, Uint64 *); int term_to_Uint64(Eterm, Uint64*); int term_to_Sint64(Eterm, Sint64*); #endif -int term_to_Uint32(Eterm, Uint32*); - Uint32 big_to_uint32(Eterm b); int term_equals_2pow32(Eterm); diff --git a/erts/emulator/beam/break.c b/erts/emulator/beam/break.c index 06d8c7cda8..80e871aaf6 100644 --- a/erts/emulator/beam/break.c +++ b/erts/emulator/beam/break.c @@ -394,8 +394,12 @@ print_process_info(fmtfn_t to, void *to_arg, Process *p, ErtsProcLocks orig_lock erts_print(to, to_arg, "OldBinVHeap: %b64u\n", BIN_OLD_VHEAP(p)); erts_print(to, to_arg, "BinVHeap unused: %b64u\n", BIN_VHEAP_SZ(p) - p->off_heap.overhead); - erts_print(to, to_arg, "OldBinVHeap unused: %b64u\n", - BIN_OLD_VHEAP_SZ(p) - BIN_OLD_VHEAP(p)); + if (BIN_OLD_VHEAP_SZ(p) >= BIN_OLD_VHEAP(p)) { + erts_print(to, to_arg, "OldBinVHeap unused: %b64u\n", + BIN_OLD_VHEAP_SZ(p) - BIN_OLD_VHEAP(p)); + } else { + erts_print(to, to_arg, "OldBinVHeap unused: overflow\n"); + } erts_print(to, to_arg, "Memory: %beu\n", erts_process_memory(p, !0)); if (garbing) { diff --git a/erts/emulator/beam/bs_instrs.tab b/erts/emulator/beam/bs_instrs.tab index 9cad2b03c5..bd1ad91e45 100644 --- a/erts/emulator/beam/bs_instrs.tab +++ b/erts/emulator/beam/bs_instrs.tab @@ -125,7 +125,7 @@ BS_GET_UNCHECKED_FIELD_SIZE(Bits, Unit, Fail, Dst) { TEST_BIN_VHEAP(VNh, Nh, Live) { Uint need = $Nh; if (E - HTOP < need || MSO(c_p).overhead + $VNh >= BIN_VHEAP_SZ(c_p)) { - SWAPOUT; + $GC_SWAPOUT(); PROCESS_MAIN_CHK_LOCKS(c_p); FCALLS -= erts_garbage_collect_nobump(c_p, need, reg, $Live, FCALLS); ERTS_VERIFY_UNUSED_TEMP_ALLOC(c_p); diff --git a/erts/emulator/beam/dist.c b/erts/emulator/beam/dist.c index 8bbe6450eb..ff19ef018e 100644 --- a/erts/emulator/beam/dist.c +++ b/erts/emulator/beam/dist.c @@ -55,7 +55,6 @@ */ #if 0 #define ERTS_DIST_MSG_DBG -FILE *dbg_file; #endif #if 0 /* Enable this to print the dist debug messages to a file instead */ @@ -67,6 +66,7 @@ FILE *dbg_file; #endif #if defined(ERTS_DIST_MSG_DBG) || defined(ERTS_RAW_DIST_MSG_DBG) +FILE *dbg_file; static void bw(byte *buf, ErlDrvSizeT sz) { bin_write(ERTS_PRINT_FILE, dbg_file, buf, sz); @@ -743,7 +743,7 @@ void init_dist(void) sprintf(buff, ERTS_DIST_MSG_DBG_FILE, getpid()); dbg_file = fopen(buff,"w+"); } -#elif defined (ERTS_DIST_MSG_DBG) +#elif defined(ERTS_DIST_MSG_DBG) || defined(ERTS_RAW_DIST_MSG_DBG) dbg_file = stderr; #endif @@ -775,19 +775,25 @@ void init_dist(void) static ERTS_INLINE ErtsDistOutputBuf * alloc_dist_obuf(Uint size, Uint headers) { - int i; + Uint obuf_size = sizeof(ErtsDistOutputBuf)*(headers); ErtsDistOutputBuf *obuf; - Uint obuf_size = sizeof(ErtsDistOutputBuf)*(headers) + - sizeof(byte)*size; - Binary *bin = erts_bin_drv_alloc(obuf_size); - obuf = (ErtsDistOutputBuf *) &bin->orig_bytes[size]; + Binary *bin; + byte *extp; + int i; + + bin = erts_bin_drv_alloc(obuf_size + size); erts_refc_add(&bin->intern.refc, headers - 1, 1); + + obuf = (ErtsDistOutputBuf *)&bin->orig_bytes[0]; + extp = (byte *)&bin->orig_bytes[obuf_size]; + for (i = 0; i < headers; i++) { obuf[i].bin = bin; - obuf[i].extp = (byte *)&bin->orig_bytes[0]; + obuf[i].extp = extp; #ifdef DEBUG obuf[i].dbg_pattern = ERTS_DIST_OUTPUT_BUF_DBG_PATTERN; - obuf[i].alloc_endp = obuf->extp + size; + obuf[i].ext_startp = extp; + obuf[i].alloc_endp = &extp[size]; ASSERT(bin == ErtsDistOutputBuf2Binary(obuf)); #endif } @@ -1360,7 +1366,7 @@ erts_dist_seq_tree_foreach_delete_yielding(DistSeqNode **root, limit); if (res > 0) { if (ysp != &ys) - erts_free(ERTS_ALC_T_ML_YIELD_STATE, ysp); + erts_free(ERTS_ALC_T_SEQ_YIELD_STATE, ysp); *vyspp = NULL; } else { @@ -1838,7 +1844,7 @@ int erts_net_message(Port *prt, if (locks) erts_proc_unlock(rp, locks); - } else if (ede_hfrag) { + } else if (ede_hfrag != NULL) { erts_free_dist_ext_copy(erts_get_dist_ext(ede_hfrag)); free_message_buffer(ede_hfrag); } @@ -1880,16 +1886,18 @@ int erts_net_message(Port *prt, goto invalid_message; } rp = erts_proc_lookup(to); + if (rp) { ErtsProcLocks locks = 0; erts_queue_dist_message(rp, locks, edep, ede_hfrag, token, am_Empty); if (locks) erts_proc_unlock(rp, locks); - } else if (ede_hfrag) { + } else if (ede_hfrag != NULL) { erts_free_dist_ext_copy(erts_get_dist_ext(ede_hfrag)); free_message_buffer(ede_hfrag); } + break; } @@ -1930,15 +1938,19 @@ int erts_net_message(Port *prt, goto invalid_message; } - if (!erts_proc_lookup(watcher)) break; /* Process not alive */ - - if (reason == THE_NON_VALUE) { + if (!erts_proc_lookup(watcher)) { + if (ede_hfrag != NULL) { + erts_free_dist_ext_copy(erts_get_dist_ext(ede_hfrag)); + free_message_buffer(ede_hfrag); + } + break; /* Process not alive */ + } #ifdef ERTS_DIST_MSG_DBG + if (reason == THE_NON_VALUE) { dist_msg_dbg(edep, "MSG", buf, orig_len); -#endif - } +#endif erts_proc_sig_send_dist_monitor_down( dep, ref, watched, watcher, edep, ede_hfrag, reason); @@ -1987,13 +1999,19 @@ int erts_net_message(Port *prt, goto invalid_message; } - if (!erts_proc_lookup(to)) break; /* Process not alive */ + if (!erts_proc_lookup(to)) { + if (ede_hfrag != NULL) { + erts_free_dist_ext_copy(erts_get_dist_ext(ede_hfrag)); + free_message_buffer(ede_hfrag); + } + break; /* Process not alive */ + } - if (reason == THE_NON_VALUE) { #ifdef ERTS_DIST_MSG_DBG + if (reason == THE_NON_VALUE) { dist_msg_dbg(edep, "MSG", buf, orig_len); -#endif } +#endif erts_proc_sig_send_dist_link_exit(dep, from, to, edep, ede_hfrag, @@ -2042,13 +2060,19 @@ int erts_net_message(Port *prt, goto invalid_message; } - if (!erts_proc_lookup(to)) break; /* Process not alive */ + if (!erts_proc_lookup(to)) { + if (ede_hfrag != NULL) { + erts_free_dist_ext_copy(erts_get_dist_ext(ede_hfrag)); + free_message_buffer(ede_hfrag); + } + break; /* Process not alive */ + } - if (reason == THE_NON_VALUE) { #ifdef ERTS_DIST_MSG_DBG + if (reason == THE_NON_VALUE) { dist_msg_dbg(edep, "MSG", buf, orig_len); -#endif } +#endif erts_proc_sig_send_dist_exit(dep, from, to, edep, ede_hfrag, reason, token); break; @@ -2301,8 +2325,18 @@ erts_dsig_send(ErtsDSigSendContext *ctx) ctx->data_size = ctx->max_finalize_prepend; erts_reset_atom_cache_map(ctx->acmp); - erts_encode_dist_ext_size(ctx->ctl, ctx->flags, ctx->acmp, &ctx->data_size); + switch (erts_encode_dist_ext_size(ctx->ctl, ctx->flags, + ctx->acmp, &ctx->data_size)) { + case ERTS_EXT_SZ_OK: + break; + case ERTS_EXT_SZ_SYSTEM_LIMIT: + retval = ERTS_DSIG_SEND_TOO_LRG; + goto done; + case ERTS_EXT_SZ_YIELD: + ERTS_INTERNAL_ERROR("Unexpected yield result"); + break; + } if (is_non_value(ctx->msg)) { ctx->phase = ERTS_DSIG_SEND_PHASE_ALLOC; break; @@ -2312,17 +2346,31 @@ erts_dsig_send(ErtsDSigSendContext *ctx) ctx->u.sc.level = 0; ctx->phase = ERTS_DSIG_SEND_PHASE_MSG_SIZE; - case ERTS_DSIG_SEND_PHASE_MSG_SIZE: - if (!ctx->no_trap) { - if (erts_encode_dist_ext_size_int(ctx->msg, ctx, &ctx->data_size)) { - retval = ERTS_DSIG_SEND_CONTINUE; - goto done; - } - } else { - erts_encode_dist_ext_size(ctx->msg, ctx->flags, ctx->acmp, &ctx->data_size); + case ERTS_DSIG_SEND_PHASE_MSG_SIZE: { + ErtsExtSzRes sz_res; + sz_res = (!ctx->no_trap + ? erts_encode_dist_ext_size_ctx(ctx->msg, + ctx, + &ctx->data_size) + : erts_encode_dist_ext_size(ctx->msg, + ctx->flags, + ctx->acmp, + &ctx->data_size)); + switch (sz_res) { + case ERTS_EXT_SZ_OK: + break; + case ERTS_EXT_SZ_SYSTEM_LIMIT: + retval = ERTS_DSIG_SEND_TOO_LRG; + goto done; + case ERTS_EXT_SZ_YIELD: + if (ctx->no_trap) + ERTS_INTERNAL_ERROR("Unexpected yield result"); + retval = ERTS_DSIG_SEND_CONTINUE; + goto done; } ctx->phase = ERTS_DSIG_SEND_PHASE_ALLOC; + } case ERTS_DSIG_SEND_PHASE_ALLOC: erts_finalize_atom_cache_map(ctx->acmp, ctx->flags); @@ -2341,7 +2389,8 @@ erts_dsig_send(ErtsDSigSendContext *ctx) (ctx->fragments-1) * ERTS_DIST_FRAGMENT_HEADER_SIZE, ctx->fragments); ctx->obuf->ext_start = &ctx->obuf->extp[0]; - ctx->obuf->ext_endp = &ctx->obuf->extp[0] + ctx->max_finalize_prepend + ctx->dhdr_ext_size; + ctx->obuf->ext_endp = &ctx->obuf->extp[0] + ctx->max_finalize_prepend + + ctx->dhdr_ext_size; /* Encode internal version of dist header */ ctx->obuf->extp = erts_encode_ext_dist_header_setup( @@ -2380,8 +2429,8 @@ erts_dsig_send(ErtsDSigSendContext *ctx) case ERTS_DSIG_SEND_PHASE_FIN: { ASSERT(ctx->obuf->extp < ctx->obuf->ext_endp); - ASSERT(((byte*)&ctx->obuf->bin->orig_bytes[0]) <= ctx->obuf->extp - ctx->max_finalize_prepend); - ASSERT(ctx->obuf->ext_endp <= ((byte*)ctx->obuf->bin->orig_bytes) + ctx->data_size + ctx->dhdr_ext_size); + ASSERT(ctx->obuf->ext_startp <= ctx->obuf->extp - ctx->max_finalize_prepend); + ASSERT(ctx->obuf->ext_endp <= (byte*)ctx->obuf->ext_startp + ctx->data_size + ctx->dhdr_ext_size); ctx->data_size = ctx->obuf->ext_endp - ctx->obuf->extp; @@ -3424,6 +3473,8 @@ dist_ctrl_get_data_1(BIF_ALIST_1) obufsize -= size_obuf(obuf); if (reds < 0) { erts_de_runlock(dep); + if (obufsize) + erts_atomic_add_nob(&dep->qsize, (erts_aint_t) -obufsize); ERTS_BIF_YIELD1(bif_export[BIF_dist_ctrl_get_data_1], BIF_P, BIF_ARG_1); } @@ -3457,6 +3508,7 @@ dist_ctrl_get_data_1(BIF_ALIST_1) pb->bytes = (byte*) obuf->extp; pb->flags = 0; res = make_binary(pb); + hp += PROC_BIN_SIZE; } else { hp = HAlloc(BIF_P, PROC_BIN_SIZE * 2 + 4 + hsz); pb = (ProcBin *) (char *) hp; @@ -3748,10 +3800,12 @@ int distribution_info(fmtfn_t to, void *arg) /* Called by break handler */ BIF_RETTYPE setnode_2(BIF_ALIST_2) { Process *net_kernel; - Uint32 creation; + Uint creation; /* valid creation ? */ - if(!term_to_Uint32(BIF_ARG_2, &creation)) + if(!term_to_Uint(BIF_ARG_2, &creation)) + goto error; + if(creation > 3) goto error; /* valid node name ? */ @@ -3795,7 +3849,7 @@ BIF_RETTYPE setnode_2(BIF_ALIST_2) erts_proc_unlock(BIF_P, ERTS_PROC_LOCK_MAIN); erts_thr_progress_block(); inc_no_nodes(); - erts_set_this_node(BIF_ARG_1, creation); + erts_set_this_node(BIF_ARG_1, (Uint32) creation); erts_is_alive = 1; send_nodes_mon_msgs(NULL, am_nodeup, BIF_ARG_1, am_visible, NIL); erts_thr_progress_unblock(); diff --git a/erts/emulator/beam/dist.h b/erts/emulator/beam/dist.h index f953a2ab8c..067028634b 100644 --- a/erts/emulator/beam/dist.h +++ b/erts/emulator/beam/dist.h @@ -54,12 +54,11 @@ #define DFLAG_DIST_MANDATORY (DFLAG_EXTENDED_REFERENCES \ | DFLAG_EXTENDED_PIDS_PORTS \ | DFLAG_UTF8_ATOMS \ - | DFLAG_NEW_FUN_TAGS \ - | DFLAG_BIG_CREATION) + | DFLAG_NEW_FUN_TAGS) /* * Additional optimistic flags when encoding toward pending connection. - * If remote node (erl_interface) does not support these then we may need + * If remote node (erl_interface) does not supporting these then we may need * to transcode messages enqueued before connection setup was finished. */ #define DFLAG_DIST_HOPEFULLY (DFLAG_EXPORT_PTR_TAG \ @@ -76,6 +75,7 @@ | DFLAG_SMALL_ATOM_TAGS \ | DFLAG_UTF8_ATOMS \ | DFLAG_MAP_TAG \ + | DFLAG_BIG_CREATION \ | DFLAG_SEND_SENDER \ | DFLAG_BIG_SEQTRACE_LABELS \ | DFLAG_EXIT_PAYLOAD \ diff --git a/erts/emulator/beam/erl_alloc.c b/erts/emulator/beam/erl_alloc.c index e6169ebeaa..b9f0334172 100644 --- a/erts/emulator/beam/erl_alloc.c +++ b/erts/emulator/beam/erl_alloc.c @@ -3904,7 +3904,7 @@ check_memory_fence(void *ptr, Uint *size, ErtsAlcType_t n, int func) { Uint sz; Uint found_type; - UWord pre_pattern; + UWord pre_pattern, expected_pattern; UWord post_pattern; UWord *ui_ptr; #ifdef HARD_DEBUG @@ -3914,6 +3914,8 @@ check_memory_fence(void *ptr, Uint *size, ErtsAlcType_t n, int func) if (!ptr) return NULL; + expected_pattern = MK_PATTERN(n); + ui_ptr = (UWord *) ptr; pre_pattern = *(--ui_ptr); *size = sz = *(--ui_ptr); @@ -3922,7 +3924,13 @@ check_memory_fence(void *ptr, Uint *size, ErtsAlcType_t n, int func) #endif found_type = GET_TYPE_OF_PATTERN(pre_pattern); - if (pre_pattern != MK_PATTERN(n)) { + + if (found_type != n) { + erts_exit(ERTS_ABORT_EXIT, "ERROR: Miss matching allocator types" + " used in alloc and free\n"); + } + + if (pre_pattern != expected_pattern) { if ((FIXED_FENCE_PATTERN_MASK & pre_pattern) != FIXED_FENCE_PATTERN) erts_exit(ERTS_ABORT_EXIT, "ERROR: Fence at beginning of memory block (p=0x%u) " @@ -3932,8 +3940,7 @@ check_memory_fence(void *ptr, Uint *size, ErtsAlcType_t n, int func) sys_memcpy((void *) &post_pattern, (void *) (((char *)ptr)+sz), sizeof(UWord)); - if (post_pattern != MK_PATTERN(n) - || pre_pattern != post_pattern) { + if (post_pattern != expected_pattern || pre_pattern != post_pattern) { char fbuf[10]; char obuf[10]; char *ftype; diff --git a/erts/emulator/beam/erl_alloc.types b/erts/emulator/beam/erl_alloc.types index e7329daa2d..92e5069c71 100644 --- a/erts/emulator/beam/erl_alloc.types +++ b/erts/emulator/beam/erl_alloc.types @@ -282,6 +282,7 @@ type ENVIRONMENT SYSTEM SYSTEM environment type PERSISTENT_TERM LONG_LIVED CODE persisten_term type PERSISTENT_LOCK_Q SHORT_LIVED SYSTEM persistent_lock_q +type PERSISTENT_TERM_TMP SHORT_LIVED SYSTEM persistent_term_tmp_table # # Types used for special emulators diff --git a/erts/emulator/beam/erl_alloc_util.c b/erts/emulator/beam/erl_alloc_util.c index 8d4464969a..25ac3bc5af 100644 --- a/erts/emulator/beam/erl_alloc_util.c +++ b/erts/emulator/beam/erl_alloc_util.c @@ -6567,6 +6567,14 @@ erts_alcu_start(Allctr_t *allctr, AllctrInit_t *init) __FILE__, __LINE__); } + /* The various fields packed into the header word must not overlap */ + ERTS_CT_ASSERT(!(MBC_ABLK_OFFSET_MASK & MBC_ABLK_SZ_MASK)); + ERTS_CT_ASSERT(!(MBC_ABLK_OFFSET_MASK & BLK_FLG_MASK)); + ERTS_CT_ASSERT(!(MBC_ABLK_SZ_MASK & BLK_FLG_MASK)); + ERTS_CT_ASSERT(!(MBC_FBLK_SZ_MASK & BLK_FLG_MASK)); + ERTS_CT_ASSERT(!(SBC_BLK_SZ_MASK & BLK_FLG_MASK)); + ERTS_CT_ASSERT(!(CRR_SZ_MASK & CRR_FLG_MASK)); + if (!initialized) goto error; diff --git a/erts/emulator/beam/erl_alloc_util.h b/erts/emulator/beam/erl_alloc_util.h index ea1afe8f58..b46b311c59 100644 --- a/erts/emulator/beam/erl_alloc_util.h +++ b/erts/emulator/beam/erl_alloc_util.h @@ -334,8 +334,11 @@ void erts_alcu_sched_spec_data_init(struct ErtsSchedulerData_ *esdp); #endif #if MBC_ABLK_OFFSET_BITS -# define MBC_ABLK_OFFSET_SHIFT (sizeof(UWord)*8 - MBC_ABLK_OFFSET_BITS) -# define MBC_ABLK_OFFSET_MASK ((~((UWord)0) << MBC_ABLK_OFFSET_SHIFT) & ~BLK_FLG_MASK) +/* The shift is reduced by 1 since the highest bit is used for a flag. */ +# define MBC_ABLK_OFFSET_SHIFT (sizeof(UWord)*8 - 1 - MBC_ABLK_OFFSET_BITS) +# define MBC_ABLK_OFFSET_MASK \ + (((UWORD_CONSTANT(1) << MBC_ABLK_OFFSET_BITS) - UWORD_CONSTANT(1)) \ + << MBC_ABLK_OFFSET_SHIFT) # define MBC_ABLK_SZ_MASK (~MBC_ABLK_OFFSET_MASK & ~BLK_FLG_MASK) #else # define MBC_ABLK_SZ_MASK (~BLK_FLG_MASK) diff --git a/erts/emulator/beam/erl_bif_info.c b/erts/emulator/beam/erl_bif_info.c index a7424bbcb8..2704b99aa4 100644 --- a/erts/emulator/beam/erl_bif_info.c +++ b/erts/emulator/beam/erl_bif_info.c @@ -768,7 +768,7 @@ static ErtsProcessInfoArgs pi_args[] = { {am_memory, 0, ERTS_PI_FLAG_NEED_MSGQ_LEN|ERTS_PI_FLAG_FORCE_SIG_SEND, ERTS_PROC_LOCK_MAIN}, {am_garbage_collection, 3+2 + 3+2 + 3+2 + 3+2 + 3+2 + ERTS_MAX_HEAP_SIZE_MAP_SZ, 0, ERTS_PROC_LOCK_MAIN}, {am_group_leader, 0, 0, ERTS_PROC_LOCK_MAIN}, - {am_reductions, 0, 0, ERTS_PROC_LOCK_MAIN}, + {am_reductions, 0, ERTS_PI_FLAG_FORCE_SIG_SEND, ERTS_PROC_LOCK_MAIN}, {am_priority, 0, 0, 0}, {am_trace, 0, 0, ERTS_PROC_LOCK_MAIN}, {am_binary, 0, ERTS_PI_FLAG_FORCE_SIG_SEND, ERTS_PROC_LOCK_MAIN}, @@ -2579,6 +2579,7 @@ BIF_RETTYPE system_info_1(BIF_ALIST_1) /* Need to be the only thread running... */ erts_proc_unlock(BIF_P, ERTS_PROC_LOCK_MAIN); + BIF_P->scheduler_data->current_process = NULL; erts_thr_progress_block(); if (BIF_ARG_1 == am_info) @@ -2592,6 +2593,7 @@ BIF_RETTYPE system_info_1(BIF_ALIST_1) erts_thr_progress_unblock(); erts_proc_lock(BIF_P, ERTS_PROC_LOCK_MAIN); + BIF_P->scheduler_data->current_process = BIF_P; ASSERT(dsbufp && dsbufp->str); res = new_binary(BIF_P, (byte *) dsbufp->str, dsbufp->str_len); @@ -2797,10 +2799,7 @@ BIF_RETTYPE system_info_1(BIF_ALIST_1) } else if (BIF_ARG_1 == am_threads) { return am_true; } else if (BIF_ARG_1 == am_creation) { - Uint hsz = 0; - erts_bld_uint(NULL, &hsz, erts_this_node->creation); - hp = hsz ? HAlloc(BIF_P, hsz) : NULL; - BIF_RET(erts_bld_uint(&hp, NULL, erts_this_node->creation)); + return make_small(erts_this_node->creation); } else if (BIF_ARG_1 == am_break_ignored) { extern int ignore_break; if (ignore_break) @@ -3026,6 +3025,8 @@ BIF_RETTYPE system_info_1(BIF_ALIST_1) BIF_RET(erts_nif_taints(BIF_P)); } else if (ERTS_IS_ATOM_STR("reader_groups_map", BIF_ARG_1)) { BIF_RET(erts_get_reader_groups_map(BIF_P)); + } else if (ERTS_IS_ATOM_STR("decentralized_counter_groups_map", BIF_ARG_1)) { + BIF_RET(erts_get_decentralized_counter_groups_map(BIF_P)); } else if (ERTS_IS_ATOM_STR("dist_buf_busy_limit", BIF_ARG_1)) { Uint hsz = 0; @@ -4235,7 +4236,10 @@ BIF_RETTYPE erts_debug_get_internal_state_1(BIF_ALIST_1) Uint dflags = (TERM_TO_BINARY_DFLAGS & ~DFLAG_EXPORT_PTR_TAG & ~DFLAG_BIT_BINARIES); - BIF_RET(erts_term_to_binary(BIF_P, tp[2], 0, dflags)); + Eterm res = erts_term_to_binary(BIF_P, tp[2], 0, dflags); + if (is_value(res)) + BIF_RET(res); + BIF_ERROR(BIF_P, SYSTEM_LIMIT); } else if (ERTS_IS_ATOM_STR("dist_ctrl", tp[1])) { Eterm res = am_undefined; @@ -4692,6 +4696,14 @@ BIF_RETTYPE erts_debug_set_internal_state_2(BIF_ALIST_2) BIF_RET(am_ok); } } + else if (ERTS_IS_ATOM_STR("ets_debug_random_split_join", BIF_ARG_1)) { + if (is_tuple(BIF_ARG_2)) { + Eterm* tpl = tuple_val(BIF_ARG_2); + + if (erts_ets_debug_random_split_join(tpl[1], tpl[2] == am_true)) + BIF_RET(am_ok); + } + } else if (ERTS_IS_ATOM_STR("mbuf", BIF_ARG_1)) { Uint sz = size_object(BIF_ARG_2); ErlHeapFragment* frag = new_message_buffer(sz); diff --git a/erts/emulator/beam/erl_bif_persistent.c b/erts/emulator/beam/erl_bif_persistent.c index 5a78a043ce..f38e0cc5cb 100644 --- a/erts/emulator/beam/erl_bif_persistent.c +++ b/erts/emulator/beam/erl_bif_persistent.c @@ -37,15 +37,6 @@ #include "erl_binary.h" /* - * The limit for the number of persistent terms before - * a warning is issued. - */ - -#define WARNING_LIMIT 20000 -#define XSTR(s) STR(s) -#define STR(s) #s - -/* * Parameters for the hash table. */ #define INITIAL_SIZE 8 @@ -73,14 +64,69 @@ typedef struct trap_data { Uint memory; /* Used by info/0 to count used memory */ } TrapData; +typedef enum { + ERTS_PERSISTENT_TERM_CPY_PLACE_START, + ERTS_PERSISTENT_TERM_CPY_PLACE_1, + ERTS_PERSISTENT_TERM_CPY_PLACE_2, + ERTS_PERSISTENT_TERM_CPY_PLACE_3 +} ErtsPersistentTermCpyTableLocation; + +typedef enum { + ERTS_PERSISTENT_TERM_CPY_NO_REHASH = 0, + ERTS_PERSISTENT_TERM_CPY_REHASH = 1, + ERTS_PERSISTENT_TERM_CPY_TEMP = 2 +} ErtsPersistentTermCpyTableType; + +typedef struct { + HashTable* old_table; /* in param */ + Uint new_size; /* in param */ + ErtsPersistentTermCpyTableType copy_type; /* in param */ + Uint max_iterations; /* in param */ + ErtsPersistentTermCpyTableLocation location; /* in/out param */ + Uint iterations_done; /* in/out param */ + Uint total_iterations_done; /* in/out param */ + HashTable* new_table; /* out param */ +} ErtsPersistentTermCpyTableCtx; + +typedef enum { + PUT2_TRAP_LOCATION_NEW_KEY, + PUT2_TRAP_LOCATION_REPLACE_VALUE +} ErtsPersistentTermPut2TrapLocation; + +typedef struct { + ErtsPersistentTermPut2TrapLocation trap_location; + Eterm key; + Eterm term; + Uint entry_index; + HashTable* hash_table; + Eterm heap[3]; + Eterm tuple; + ErtsPersistentTermCpyTableCtx cpy_ctx; +} ErtsPersistentTermPut2Context; + +typedef enum { + ERASE1_TRAP_LOCATION_TMP_COPY, + ERASE1_TRAP_LOCATION_FINAL_COPY +} ErtsPersistentTermErase1TrapLocation; + +typedef struct { + ErtsPersistentTermErase1TrapLocation trap_location; + Eterm key; + HashTable* old_table; + HashTable* new_table; + Uint entry_index; + Eterm old_term; + HashTable* tmp_table; + ErtsPersistentTermCpyTableCtx cpy_ctx; +} ErtsPersistentTermErase1Context; + /* * Declarations of local functions. */ static HashTable* create_initial_table(void); static Uint lookup(HashTable* hash_table, Eterm key); -static HashTable* copy_table(HashTable* old_table, Uint new_size, int rehash); -static HashTable* tmp_table_copy(HashTable* old_table); +static HashTable* copy_table(ErtsPersistentTermCpyTableCtx* ctx); static int try_seize_update_permission(Process* c_p); static void release_update_permission(int release_updater); static void table_updater(void* table); @@ -127,7 +173,6 @@ static Process* updater_process = NULL; /* Protected by update_table_permission_mtx */ static ErtsThrPrgrLaterOp thr_prog_op; -static int issued_warning = 0; /* * Queue of hash tables to be deleted. @@ -139,7 +184,7 @@ static HashTable** delete_queue_tail = &delete_queue_head; /* * The following variables are only used during crash dumping. They - * are intialized by erts_init_persistent_dumping(). + * are initialized by erts_init_persistent_dumping(). */ ErtsLiteralArea** erts_persistent_areas; @@ -188,101 +233,181 @@ void erts_init_bif_persistent_term(void) &persistent_term_info_trap); } +/* + * Macro used for trapping in persistent_term_put_2 and + * persistent_term_erase_1 + */ +#define TRAPPING_COPY_TABLE(TABLE_DEST, OLD_TABLE, NEW_SIZE, COPY_TYPE, LOC_NAME, TRAP_CODE) \ + do { \ + ctx->cpy_ctx = (ErtsPersistentTermCpyTableCtx){ \ + .old_table = OLD_TABLE, \ + .new_size = NEW_SIZE, \ + .copy_type = COPY_TYPE, \ + .location = ERTS_PERSISTENT_TERM_CPY_PLACE_START \ + }; \ + L_ ## LOC_NAME: \ + ctx->cpy_ctx.max_iterations = MAX(1, max_iterations); \ + TABLE_DEST = copy_table(&ctx->cpy_ctx); \ + iterations_until_trap -= ctx->cpy_ctx.total_iterations_done; \ + if (TABLE_DEST == NULL) { \ + ctx->trap_location = LOC_NAME; \ + erts_set_gc_state(BIF_P, 0); \ + BUMP_ALL_REDS(BIF_P); \ + TRAP_CODE; \ + } \ + } while (0) + +static int persistent_term_put_2_ctx_bin_dtor(Binary *context_bin) +{ + ErtsPersistentTermPut2Context* ctx = ERTS_MAGIC_BIN_DATA(context_bin); + if (ctx->cpy_ctx.new_table != NULL) { + erts_free(ERTS_ALC_T_PERSISTENT_TERM, ctx->cpy_ctx.new_table); + release_update_permission(0); + } + return 1; +} +/* + * A linear congruential generator that is used in the debug emulator + * to trap after a random number of iterations in + * persistent_term_put_2 and persistent_term_erase_1. + * + * https://en.wikipedia.org/wiki/Linear_congruential_generator + */ +#define GET_SMALL_RANDOM_INT(SEED) \ + (1103515245 * (SEED) + 12345) % 227 + BIF_RETTYPE persistent_term_put_2(BIF_ALIST_2) { - Eterm key; - Eterm term; - Eterm heap[3]; - Eterm tuple; - HashTable* hash_table; - Uint term_size; - Uint lit_area_size; - ErlOffHeap code_off_heap; - ErtsLiteralArea* literal_area; - erts_shcopy_t info; - Eterm* ptr; - Uint entry_index; + static const Uint ITERATIONS_PER_RED = 32; + ErtsPersistentTermPut2Context* ctx; + Eterm state_mref = THE_NON_VALUE; + long iterations_until_trap; + long max_iterations; +#define PUT_TRAP_CODE \ + BIF_TRAP2(bif_export[BIF_persistent_term_put_2], BIF_P, state_mref, BIF_ARG_2) +#define TRAPPING_COPY_TABLE_PUT(TABLE_DEST, OLD_TABLE, NEW_SIZE, COPY_TYPE, LOC_NAME) \ + TRAPPING_COPY_TABLE(TABLE_DEST, OLD_TABLE, NEW_SIZE, COPY_TYPE, LOC_NAME, PUT_TRAP_CODE) + +#ifdef DEBUG + (void)ITERATIONS_PER_RED; + iterations_until_trap = max_iterations = + GET_SMALL_RANDOM_INT(ERTS_BIF_REDS_LEFT(BIF_P) + (Uint)&ctx); +#else + iterations_until_trap = max_iterations = + ITERATIONS_PER_RED * ERTS_BIF_REDS_LEFT(BIF_P); +#endif + if (is_internal_magic_ref(BIF_ARG_1) && + (ERTS_MAGIC_BIN_DESTRUCTOR(erts_magic_ref2bin(BIF_ARG_1)) == + persistent_term_put_2_ctx_bin_dtor)) { + /* Restore state after a trap */ + Binary* state_bin; + state_mref = BIF_ARG_1; + state_bin = erts_magic_ref2bin(state_mref); + ctx = ERTS_MAGIC_BIN_DATA(state_bin); + ASSERT(BIF_P->flags & F_DISABLE_GC); + erts_set_gc_state(BIF_P, 1); + switch (ctx->trap_location) { + case PUT2_TRAP_LOCATION_NEW_KEY: + goto L_PUT2_TRAP_LOCATION_NEW_KEY; + case PUT2_TRAP_LOCATION_REPLACE_VALUE: + goto L_PUT2_TRAP_LOCATION_REPLACE_VALUE; + } + } else { + /* Save state in magic bin in case trapping is necessary */ + Eterm* hp; + Binary* state_bin = erts_create_magic_binary(sizeof(ErtsPersistentTermPut2Context), + persistent_term_put_2_ctx_bin_dtor); + hp = HAlloc(BIF_P, ERTS_MAGIC_REF_THING_SIZE); + state_mref = erts_mk_magic_ref(&hp, &MSO(BIF_P), state_bin); + ctx = ERTS_MAGIC_BIN_DATA(state_bin); + /* + * IMPORTANT: The following field is used to detect if + * persistent_term_put_2_ctx_bin_dtor needs to free memory + */ + ctx->cpy_ctx.new_table = NULL; + } + if (!try_seize_update_permission(BIF_P)) { ERTS_BIF_YIELD2(bif_export[BIF_persistent_term_put_2], BIF_P, BIF_ARG_1, BIF_ARG_2); } + ctx->hash_table = (HashTable *) erts_atomic_read_nob(&the_hash_table); - hash_table = (HashTable *) erts_atomic_read_nob(&the_hash_table); + ctx->key = BIF_ARG_1; + ctx->term = BIF_ARG_2; - key = BIF_ARG_1; - term = BIF_ARG_2; + ctx->entry_index = lookup(ctx->hash_table, ctx->key); - entry_index = lookup(hash_table, key); - - heap[0] = make_arityval(2); - heap[1] = key; - heap[2] = term; - tuple = make_tuple(heap); + ctx->heap[0] = make_arityval(2); + ctx->heap[1] = ctx->key; + ctx->heap[2] = ctx->term; + ctx->tuple = make_tuple(ctx->heap); - if (is_nil(hash_table->term[entry_index])) { - Uint size = hash_table->allocated; - if (MUST_GROW(hash_table)) { - size *= 2; + if (is_nil(ctx->hash_table->term[ctx->entry_index])) { + Uint new_size = ctx->hash_table->allocated; + if (MUST_GROW(ctx->hash_table)) { + new_size *= 2; } - hash_table = copy_table(hash_table, size, 0); - entry_index = lookup(hash_table, key); - hash_table->num_entries++; + TRAPPING_COPY_TABLE_PUT(ctx->hash_table, + ctx->hash_table, + new_size, + ERTS_PERSISTENT_TERM_CPY_NO_REHASH, + PUT2_TRAP_LOCATION_NEW_KEY); + ctx->entry_index = lookup(ctx->hash_table, ctx->key); + ctx->hash_table->num_entries++; } else { - Eterm tuple = hash_table->term[entry_index]; + Eterm tuple = ctx->hash_table->term[ctx->entry_index]; Eterm old_term; ASSERT(is_tuple_arity(tuple, 2)); old_term = boxed_val(tuple)[2]; - if (EQ(term, old_term)) { + if (EQ(ctx->term, old_term)) { /* Same value. No need to update anything. */ release_update_permission(0); BIF_RET(am_ok); } else { /* Mark the old term for deletion. */ - mark_for_deletion(hash_table, entry_index); - hash_table = copy_table(hash_table, hash_table->allocated, 0); + mark_for_deletion(ctx->hash_table, ctx->entry_index); + TRAPPING_COPY_TABLE_PUT(ctx->hash_table, + ctx->hash_table, + ctx->hash_table->allocated, + ERTS_PERSISTENT_TERM_CPY_NO_REHASH, + PUT2_TRAP_LOCATION_REPLACE_VALUE); } } - /* - * Preserve internal sharing in the term by using the - * sharing-preserving functions. However, literals must - * be copied in case the module holding them are unloaded. - */ - INITIALIZE_SHCOPY(info); - info.copy_literals = 1; - term_size = copy_shared_calculate(tuple, &info); - ERTS_INIT_OFF_HEAP(&code_off_heap); - lit_area_size = ERTS_LITERAL_AREA_ALLOC_SIZE(term_size); - literal_area = erts_alloc(ERTS_ALC_T_LITERAL, lit_area_size); - ptr = &literal_area->start[0]; - literal_area->end = ptr + term_size; - tuple = copy_shared_perform(tuple, term_size, &info, &ptr, &code_off_heap); - ASSERT(tuple_val(tuple) == literal_area->start); - literal_area->off_heap = code_off_heap.first; - DESTROY_SHCOPY(info); - erts_set_literal_tag(&tuple, literal_area->start, term_size); - hash_table->term[entry_index] = tuple; - - erts_schedule_thr_prgr_later_op(table_updater, hash_table, &thr_prog_op); - suspend_updater(BIF_P); - - /* - * Issue a warning once if the warning limit has been exceeded. - */ - - if (hash_table->num_entries > WARNING_LIMIT && issued_warning == 0) { - static char w[] = - "More than " XSTR(WARNING_LIMIT) " persistent terms " - "have been created.\n" - "It is recommended to avoid creating an excessive number of\n" - "persistent terms, as creation and deletion of persistent terms\n" - "will be slower as the number of persistent terms increases.\n"; - issued_warning = 1; - erts_send_warning_to_logger_str(BIF_P->group_leader, w); + { + Uint term_size; + Uint lit_area_size; + ErlOffHeap code_off_heap; + ErtsLiteralArea* literal_area; + erts_shcopy_t info; + Eterm* ptr; + /* + * Preserve internal sharing in the term by using the + * sharing-preserving functions. However, literals must + * be copied in case the module holding them are unloaded. + */ + INITIALIZE_SHCOPY(info); + info.copy_literals = 1; + term_size = copy_shared_calculate(ctx->tuple, &info); + ERTS_INIT_OFF_HEAP(&code_off_heap); + lit_area_size = ERTS_LITERAL_AREA_ALLOC_SIZE(term_size); + literal_area = erts_alloc(ERTS_ALC_T_LITERAL, lit_area_size); + ptr = &literal_area->start[0]; + literal_area->end = ptr + term_size; + ctx->tuple = copy_shared_perform(ctx->tuple, term_size, &info, &ptr, &code_off_heap); + ASSERT(tuple_val(ctx->tuple) == literal_area->start); + literal_area->off_heap = code_off_heap.first; + DESTROY_SHCOPY(info); + erts_set_literal_tag(&ctx->tuple, literal_area->start, term_size); + ctx->hash_table->term[ctx->entry_index] = ctx->tuple; + + erts_schedule_thr_prgr_later_op(table_updater, ctx->hash_table, &thr_prog_op); + suspend_updater(BIF_P); } - + BUMP_REDS(BIF_P, (max_iterations - iterations_until_trap) / ITERATIONS_PER_RED); ERTS_BIF_YIELD_RETURN(BIF_P, am_ok); } @@ -349,26 +474,84 @@ BIF_RETTYPE persistent_term_get_2(BIF_ALIST_2) BIF_RET(result); } -BIF_RETTYPE persistent_term_erase_1(BIF_ALIST_1) +static int persistent_term_erase_1_ctx_bin_dtor(Binary *context_bin) { - Eterm key = BIF_ARG_1; - HashTable* old_table; - HashTable* new_table; - Uint entry_index; - Eterm old_term; + ErtsPersistentTermErase1Context* ctx = ERTS_MAGIC_BIN_DATA(context_bin); + if (ctx->cpy_ctx.new_table != NULL) { + if (ctx->cpy_ctx.copy_type == ERTS_PERSISTENT_TERM_CPY_TEMP) { + erts_free(ERTS_ALC_T_PERSISTENT_TERM_TMP, ctx->cpy_ctx.new_table); + } else { + erts_free(ERTS_ALC_T_PERSISTENT_TERM, ctx->cpy_ctx.new_table); + } + if (ctx->tmp_table != NULL) { + erts_free(ERTS_ALC_T_PERSISTENT_TERM_TMP, ctx->tmp_table); + } + release_update_permission(0); + } + return 1; +} +BIF_RETTYPE persistent_term_erase_1(BIF_ALIST_1) +{ + static const Uint ITERATIONS_PER_RED = 32; + ErtsPersistentTermErase1Context* ctx; + Eterm state_mref = THE_NON_VALUE; + long iterations_until_trap; + long max_iterations; +#ifdef DEBUG + (void)ITERATIONS_PER_RED; + iterations_until_trap = max_iterations = + GET_SMALL_RANDOM_INT(ERTS_BIF_REDS_LEFT(BIF_P) + (Uint)&ctx); +#else + iterations_until_trap = max_iterations = + ITERATIONS_PER_RED * ERTS_BIF_REDS_LEFT(BIF_P); +#endif +#define ERASE_TRAP_CODE \ + BIF_TRAP1(bif_export[BIF_persistent_term_erase_1], BIF_P, state_mref); +#define TRAPPING_COPY_TABLE_ERASE(TABLE_DEST, OLD_TABLE, NEW_SIZE, REHASH, LOC_NAME) \ + TRAPPING_COPY_TABLE(TABLE_DEST, OLD_TABLE, NEW_SIZE, REHASH, LOC_NAME, ERASE_TRAP_CODE) + if (is_internal_magic_ref(BIF_ARG_1) && + (ERTS_MAGIC_BIN_DESTRUCTOR(erts_magic_ref2bin(BIF_ARG_1)) == + persistent_term_erase_1_ctx_bin_dtor)) { + /* Restore the state after a trap */ + Binary* state_bin; + state_mref = BIF_ARG_1; + state_bin = erts_magic_ref2bin(state_mref); + ctx = ERTS_MAGIC_BIN_DATA(state_bin); + ASSERT(BIF_P->flags & F_DISABLE_GC); + erts_set_gc_state(BIF_P, 1); + switch (ctx->trap_location) { + case ERASE1_TRAP_LOCATION_TMP_COPY: + goto L_ERASE1_TRAP_LOCATION_TMP_COPY; + case ERASE1_TRAP_LOCATION_FINAL_COPY: + goto L_ERASE1_TRAP_LOCATION_FINAL_COPY; + } + } else { + /* Save state in magic bin in case trapping is necessary */ + Eterm* hp; + Binary* state_bin = erts_create_magic_binary(sizeof(ErtsPersistentTermErase1Context), + persistent_term_erase_1_ctx_bin_dtor); + hp = HAlloc(BIF_P, ERTS_MAGIC_REF_THING_SIZE); + state_mref = erts_mk_magic_ref(&hp, &MSO(BIF_P), state_bin); + ctx = ERTS_MAGIC_BIN_DATA(state_bin); + /* + * IMPORTANT: The following two fields are used to detect if + * persistent_term_erase_1_ctx_bin_dtor needs to free memory + */ + ctx->cpy_ctx.new_table = NULL; + ctx->tmp_table = NULL; + } if (!try_seize_update_permission(BIF_P)) { ERTS_BIF_YIELD1(bif_export[BIF_persistent_term_erase_1], BIF_P, BIF_ARG_1); } - old_table = (HashTable *) erts_atomic_read_nob(&the_hash_table); - entry_index = lookup(old_table, key); - old_term = old_table->term[entry_index]; - if (is_boxed(old_term)) { + ctx->key = BIF_ARG_1; + ctx->old_table = (HashTable *) erts_atomic_read_nob(&the_hash_table); + ctx->entry_index = lookup(ctx->old_table, ctx->key); + ctx->old_term = ctx->old_table->term[ctx->entry_index]; + if (is_boxed(ctx->old_term)) { Uint new_size; - HashTable* tmp_table; - /* * Since we don't use any delete markers, we must rehash * the table when deleting terms to ensure that all terms @@ -378,8 +561,12 @@ BIF_RETTYPE persistent_term_erase_1(BIF_ALIST_1) * temporary table copy of the same size as the old one. */ - ASSERT(is_tuple_arity(old_term, 2)); - tmp_table = tmp_table_copy(old_table); + ASSERT(is_tuple_arity(ctx->old_term, 2)); + TRAPPING_COPY_TABLE_ERASE(ctx->tmp_table, + ctx->old_table, + ctx->old_table->allocated, + ERTS_PERSISTENT_TERM_CPY_TEMP, + ERASE1_TRAP_LOCATION_TMP_COPY); /* * Delete the term from the temporary table. Then copy the @@ -387,18 +574,28 @@ BIF_RETTYPE persistent_term_erase_1(BIF_ALIST_1) * while copying. */ - tmp_table->term[entry_index] = NIL; - tmp_table->num_entries--; - new_size = tmp_table->allocated; - if (MUST_SHRINK(tmp_table)) { + ctx->tmp_table->term[ctx->entry_index] = NIL; + ctx->tmp_table->num_entries--; + new_size = ctx->tmp_table->allocated; + if (MUST_SHRINK(ctx->tmp_table)) { new_size /= 2; } - new_table = copy_table(tmp_table, new_size, 1); - erts_free(ERTS_ALC_T_TMP, tmp_table); + TRAPPING_COPY_TABLE_ERASE(ctx->new_table, + ctx->tmp_table, + new_size, + ERTS_PERSISTENT_TERM_CPY_REHASH, + ERASE1_TRAP_LOCATION_FINAL_COPY); + erts_free(ERTS_ALC_T_PERSISTENT_TERM_TMP, ctx->tmp_table); + /* + * IMPORTANT: Memory management depends on that ctx->tmp_table + * is set to NULL on the line below + */ + ctx->tmp_table = NULL; - mark_for_deletion(old_table, entry_index); - erts_schedule_thr_prgr_later_op(table_updater, new_table, &thr_prog_op); + mark_for_deletion(ctx->old_table, ctx->entry_index); + erts_schedule_thr_prgr_later_op(table_updater, ctx->new_table, &thr_prog_op); suspend_updater(BIF_P); + BUMP_REDS(BIF_P, (max_iterations - iterations_until_trap) / ITERATIONS_PER_RED); ERTS_BIF_YIELD_RETURN(BIF_P, am_true); } @@ -406,7 +603,7 @@ BIF_RETTYPE persistent_term_erase_1(BIF_ALIST_1) * Key is not present. Nothing to do. */ - ASSERT(is_nil(old_term)); + ASSERT(is_nil(ctx->old_term)); release_update_permission(0); BIF_RET(am_false); } @@ -740,65 +937,104 @@ lookup(HashTable* hash_table, Eterm key) } static HashTable* -tmp_table_copy(HashTable* old_table) +copy_table(ErtsPersistentTermCpyTableCtx* ctx) { - Uint size = old_table->allocated; - HashTable* tmp_table; + Uint old_size = ctx->old_table->allocated; Uint i; - - tmp_table = (HashTable *) erts_alloc(ERTS_ALC_T_TMP, - sizeof(HashTable) + - sizeof(Eterm) * (size-1)); - *tmp_table = *old_table; - for (i = 0; i < size; i++) { - tmp_table->term[i] = old_table->term[i]; + ErtsAlcType_t alloc_type; + ctx->total_iterations_done = 0; + switch(ctx->location) { + case ERTS_PERSISTENT_TERM_CPY_PLACE_1: goto L_copy_table_place_1; + case ERTS_PERSISTENT_TERM_CPY_PLACE_2: goto L_copy_table_place_2; + case ERTS_PERSISTENT_TERM_CPY_PLACE_3: goto L_copy_table_place_3; + case ERTS_PERSISTENT_TERM_CPY_PLACE_START: + ctx->iterations_done = 0; } - return tmp_table; -} - -static HashTable* -copy_table(HashTable* old_table, Uint new_size, int rehash) -{ - HashTable* new_table; - Uint old_size = old_table->allocated; - Uint i; - - new_table = (HashTable *) erts_alloc(ERTS_ALC_T_PERSISTENT_TERM, - sizeof(HashTable) + - sizeof(Eterm) * (new_size-1)); - if (old_table->allocated == new_size && !rehash) { + if (ctx->copy_type == ERTS_PERSISTENT_TERM_CPY_TEMP) { + alloc_type = ERTS_ALC_T_PERSISTENT_TERM_TMP; + } else { + alloc_type = ERTS_ALC_T_PERSISTENT_TERM; + } + ctx->new_table = (HashTable *) erts_alloc(alloc_type, + sizeof(HashTable) + + sizeof(Eterm) * (ctx->new_size-1)); + if (ctx->old_table->allocated == ctx->new_size && + (ctx->copy_type == ERTS_PERSISTENT_TERM_CPY_NO_REHASH || + ctx->copy_type == ERTS_PERSISTENT_TERM_CPY_TEMP)) { /* * Same size and no key deleted. Make an exact copy of the table. */ - *new_table = *old_table; - for (i = 0; i < new_size; i++) { - new_table->term[i] = old_table->term[i]; + *ctx->new_table = *ctx->old_table; + L_copy_table_place_1: + for (i = ctx->iterations_done; + i < MIN(ctx->iterations_done + ctx->max_iterations, + ctx->new_size); + i++) { + ctx->new_table->term[i] = ctx->old_table->term[i]; } + ctx->total_iterations_done = (i - ctx->iterations_done); + if (i < ctx->new_size) { + ctx->iterations_done = i; + ctx->location = ERTS_PERSISTENT_TERM_CPY_PLACE_1; + return NULL; + } + ctx->iterations_done = 0; } else { /* * The size of the table has changed or an element has been * deleted. Must rehash, by inserting all old terms into the * new (empty) table. */ - new_table->allocated = new_size; - new_table->num_entries = old_table->num_entries; - new_table->mask = new_size - 1; - for (i = 0; i < new_size; i++) { - new_table->term[i] = NIL; + ctx->new_table->allocated = ctx->new_size; + ctx->new_table->num_entries = ctx->old_table->num_entries; + ctx->new_table->mask = ctx->new_size - 1; + L_copy_table_place_2: + for (i = ctx->iterations_done; + i < MIN(ctx->iterations_done + ctx->max_iterations, + ctx->new_size); + i++) { + ctx->new_table->term[i] = NIL; + } + ctx->total_iterations_done = (i - ctx->iterations_done); + ctx->max_iterations -= ctx->total_iterations_done; + if (i < ctx->new_size) { + ctx->iterations_done = i; + ctx->location = ERTS_PERSISTENT_TERM_CPY_PLACE_2; + return NULL; } - for (i = 0; i < old_size; i++) { - if (is_tuple(old_table->term[i])) { - Eterm key = tuple_val(old_table->term[i])[1]; - Uint entry_index = lookup(new_table, key); - ASSERT(is_nil(new_table->term[entry_index])); - new_table->term[entry_index] = old_table->term[i]; + ctx->iterations_done = 0; + L_copy_table_place_3: + for (i = ctx->iterations_done; + i < MIN(ctx->iterations_done + ctx->max_iterations, + old_size); + i++) { + if (is_tuple(ctx->old_table->term[i])) { + Eterm key = tuple_val(ctx->old_table->term[i])[1]; + Uint entry_index = lookup(ctx->new_table, key); + ASSERT(is_nil(ctx->new_table->term[entry_index])); + ctx->new_table->term[entry_index] = ctx->old_table->term[i]; } } + ctx->total_iterations_done += (i - ctx->iterations_done); + if (i < old_size) { + ctx->iterations_done = i; + ctx->location = ERTS_PERSISTENT_TERM_CPY_PLACE_3; + return NULL; + } + ctx->iterations_done = 0; + } + ctx->new_table->first_to_delete = 0; + ctx->new_table->num_to_delete = 0; + erts_atomic_init_nob(&ctx->new_table->refc, (erts_aint_t)1); + { + HashTable* new_table = ctx->new_table; + /* + * IMPORTANT: Memory management depends on that ctx->new_table is + * set to NULL on the line below + */ + ctx->new_table = NULL; + return new_table; } - new_table->first_to_delete = 0; - new_table->num_to_delete = 0; - erts_atomic_init_nob(&new_table->refc, (erts_aint_t)1); - return new_table; } static void diff --git a/erts/emulator/beam/erl_cpu_topology.c b/erts/emulator/beam/erl_cpu_topology.c index 6f8d2f8c35..6a4f43297e 100644 --- a/erts/emulator/beam/erl_cpu_topology.c +++ b/erts/emulator/beam/erl_cpu_topology.c @@ -34,6 +34,7 @@ #include "error.h" #include "bif.h" #include "erl_cpu_topology.h" +#include "erl_flxctr.h" #define ERTS_MAX_READER_GROUPS 64 @@ -58,6 +59,7 @@ static erts_cpu_info_t *cpuinfo; static int max_main_threads; static int reader_groups; +static int decentralized_counter_groups; static ErtsCpuBindData *scheduler2cpu_map; static erts_rwmtx_t cpuinfo_rwmtx; @@ -127,6 +129,8 @@ static erts_cpu_groups_map_t *cpu_groups_maps; static erts_cpu_groups_map_t *reader_groups_map; +static erts_cpu_groups_map_t *decentralized_counter_groups_map; + #define ERTS_TOPOLOGY_CG ERTS_TOPOLOGY_MAX_DEPTH #define ERTS_MAX_CPU_TOPOLOGY_ID ((int) 0xffff) @@ -138,6 +142,7 @@ static void cpu_bind_order_sort(erts_cpu_topology_t *cpudata, static void write_schedulers_bind_change(erts_cpu_topology_t *cpudata, int size); static void reader_groups_callback(int, ErtsSchedulerData *, int, void *); +static void flxctr_groups_callback(int, ErtsSchedulerData *, int, void *); static erts_cpu_groups_map_t *add_cpu_groups(int groups, erts_cpu_groups_callback_t callback, void *arg); @@ -1646,7 +1651,8 @@ erts_get_logical_processors(int *conf, int *onln, int *avail) } void -erts_pre_early_init_cpu_topology(int *max_rg_p, +erts_pre_early_init_cpu_topology(int *max_dcg_p, + int *max_rg_p, int *conf_p, int *onln_p, int *avail_p) @@ -1654,6 +1660,7 @@ erts_pre_early_init_cpu_topology(int *max_rg_p, cpu_groups_maps = NULL; no_cpu_groups_callbacks = 0; *max_rg_p = ERTS_MAX_READER_GROUPS; + *max_dcg_p = ERTS_MAX_FLXCTR_GROUPS; cpuinfo = erts_cpu_info_create(); get_logical_processors(conf_p, onln_p, avail_p); } @@ -1662,7 +1669,9 @@ void erts_early_init_cpu_topology(int no_schedulers, int *max_main_threads_p, int max_reader_groups, - int *reader_groups_p) + int *reader_groups_p, + int max_decentralized_counter_groups, + int *decentralized_counter_groups_p) { user_cpudata = NULL; user_cpudata_size = 0; @@ -1687,6 +1696,12 @@ erts_early_init_cpu_topology(int no_schedulers, max_main_threads = no_schedulers; *max_main_threads_p = max_main_threads; + decentralized_counter_groups = max_main_threads; + if (decentralized_counter_groups <= 1 || max_decentralized_counter_groups <= 1) + decentralized_counter_groups = 1; + if (decentralized_counter_groups > max_decentralized_counter_groups) + decentralized_counter_groups = max_decentralized_counter_groups; + *decentralized_counter_groups_p = decentralized_counter_groups; reader_groups = max_main_threads; if (reader_groups <= 1 || max_reader_groups <= 1) reader_groups = 0; @@ -1718,6 +1733,9 @@ erts_init_cpu_topology(void) reader_groups_map = add_cpu_groups(reader_groups, reader_groups_callback, NULL); + decentralized_counter_groups_map = add_cpu_groups(decentralized_counter_groups, + flxctr_groups_callback, + NULL); if (cpu_bind_order == ERTS_CPU_BIND_NONE) erts_rwmtx_rwunlock(&cpuinfo_rwmtx); @@ -1789,6 +1807,15 @@ reader_groups_callback(int suspending, erts_rwmtx_set_reader_group(suspending ? 0 : group+1); } +void +flxctr_groups_callback(int suspending, + ErtsSchedulerData *esdp, + int group, + void *unused) +{ + erts_flxctr_set_slot(suspending ? 0 : group+1); +} + static Eterm get_cpu_groups_map(Process *c_p, erts_cpu_groups_map_t *map, int offset); @@ -1821,6 +1848,16 @@ erts_get_reader_groups_map(Process *c_p) return res; } +Eterm +erts_get_decentralized_counter_groups_map(Process *c_p) +{ + Eterm res; + erts_rwmtx_rlock(&cpuinfo_rwmtx); + res = get_cpu_groups_map(c_p, decentralized_counter_groups_map, 1); + erts_rwmtx_runlock(&cpuinfo_rwmtx); + return res; +} + /* * CPU groups */ diff --git a/erts/emulator/beam/erl_cpu_topology.h b/erts/emulator/beam/erl_cpu_topology.h index 88bcad79ab..4a428d7972 100644 --- a/erts/emulator/beam/erl_cpu_topology.h +++ b/erts/emulator/beam/erl_cpu_topology.h @@ -27,14 +27,19 @@ #ifndef ERL_CPU_TOPOLOGY_H__ #define ERL_CPU_TOPOLOGY_H__ -void erts_pre_early_init_cpu_topology(int *max_rg_p, - int *conf_p, - int *onln_p, - int *avail_p); -void erts_early_init_cpu_topology(int no_schedulers, - int *max_main_threads_p, - int max_reader_groups, - int *reader_groups_p); +void +erts_pre_early_init_cpu_topology(int *max_dcg_p, + int *max_rg_p, + int *conf_p, + int *onln_p, + int *avail_p); +void +erts_early_init_cpu_topology(int no_schedulers, + int *max_main_threads_p, + int max_reader_groups, + int *reader_groups_p, + int max_decentralized_counter_groups, + int *decentralized_counter_groups_p); void erts_init_cpu_topology(void); @@ -70,6 +75,7 @@ Eterm erts_bind_schedulers(Process *c_p, Eterm how); Eterm erts_get_schedulers_binds(Process *c_p); Eterm erts_get_reader_groups_map(Process *c_p); +Eterm erts_get_decentralized_counter_groups_map(Process *c_p); Eterm erts_set_cpu_topology(Process *c_p, Eterm term); Eterm erts_get_cpu_topology_term(Process *c_p, Eterm which); diff --git a/erts/emulator/beam/erl_db.c b/erts/emulator/beam/erl_db.c index 0a50af4d1a..d24f30f126 100644 --- a/erts/emulator/beam/erl_db.c +++ b/erts/emulator/beam/erl_db.c @@ -42,6 +42,7 @@ #include "bif.h" #include "big.h" #include "erl_binary.h" +#include "bif.h" erts_atomic_t erts_ets_misc_mem_size; @@ -64,6 +65,11 @@ do { \ } \ }while(0) +#define DB_GET_APPROX_NITEMS(DB) \ + erts_flxctr_read_approx(&(DB)->common.counters, ERTS_DB_TABLE_NITEMS_COUNTER_ID) +#define DB_GET_APPROX_MEM_CONSUMED(DB) \ + erts_flxctr_read_approx(&(DB)->common.counters, ERTS_DB_TABLE_MEM_COUNTER_ID) + static BIF_RETTYPE db_bif_fail(Process* p, Uint freason, Uint bif_ix, Export* bif_exp) { @@ -398,8 +404,9 @@ static void free_dbtable(void *vtb) { DbTable *tb = (DbTable *) vtb; - - ASSERT(erts_atomic_read_nob(&tb->common.memory_size) == sizeof(DbTable)); + ASSERT(erts_flxctr_is_snapshot_ongoing(&tb->common.counters) || + sizeof(DbTable) == erts_flxctr_read_approx(&tb->common.counters, + ERTS_DB_TABLE_MEM_COUNTER_ID)); erts_rwmtx_destroy(&tb->common.rwlock); erts_mtx_destroy(&tb->common.fixlock); @@ -408,7 +415,8 @@ free_dbtable(void *vtb) if (tb->common.btid) erts_bin_release(tb->common.btid); - erts_db_free(ERTS_ALC_T_DB_TABLE, tb, (void *) tb, sizeof(DbTable)); + erts_flxctr_destroy(&tb->common.counters, ERTS_ALC_T_DB_TABLE); + erts_free(ERTS_ALC_T_DB_TABLE, tb); } static void schedule_free_dbtable(DbTable* tb) @@ -1731,12 +1739,16 @@ BIF_RETTYPE ets_new_2(BIF_ALIST_2) */ { DbTable init_tb; - - erts_atomic_init_nob(&init_tb.common.memory_size, 0); + erts_flxctr_init(&init_tb.common.counters, 0, 2, ERTS_ALC_T_DB_TABLE); tb = (DbTable*) erts_db_alloc(ERTS_ALC_T_DB_TABLE, &init_tb, sizeof(DbTable)); - erts_atomic_init_nob(&tb->common.memory_size, - erts_atomic_read_nob(&init_tb.common.memory_size)); + erts_flxctr_init(&tb->common.counters, + status & DB_CA_ORDERED_SET, + 2, + ERTS_ALC_T_DB_TABLE); + erts_flxctr_add(&tb->common.counters, + ERTS_DB_TABLE_MEM_COUNTER_ID, + DB_GET_APPROX_MEM_CONSUMED(&init_tb)); } tb->common.meth = meth; @@ -1750,8 +1762,6 @@ BIF_RETTYPE ets_new_2(BIF_ALIST_2) tb->common.owner = BIF_P->common.id; set_heir(BIF_P, tb, heir, heir_data); - erts_atomic_init_nob(&tb->common.nitems, 0); - tb->common.fixing_procs = NULL; tb->common.compress = is_compressed; #ifdef ETS_DBG_FORCE_TRAP @@ -2128,19 +2138,18 @@ BIF_RETTYPE ets_internal_delete_all_2(BIF_ALIST_2) { SWord initial_reds = ERTS_BIF_REDS_LEFT(BIF_P); SWord reds = initial_reds; - Eterm nitems; + Eterm nitems_holder = THE_NON_VALUE; DbTable* tb; - CHECK_TABLES(); DB_BIF_GET_TABLE(tb, DB_WRITE, LCK_WRITE, BIF_ets_internal_delete_all_2); if (BIF_ARG_2 == am_undefined) { - nitems = erts_make_integer(erts_atomic_read_nob(&tb->common.nitems), - BIF_P); - - reds = tb->common.meth->db_delete_all_objects(BIF_P, tb, reds); - + reds = tb->common.meth->db_delete_all_objects(BIF_P, + tb, + reds, + &nitems_holder); + ASSERT(nitems_holder != THE_NON_VALUE); ASSERT(!(tb->common.status & DB_BUSY)); if (reds < 0) { @@ -2159,7 +2168,7 @@ BIF_RETTYPE ets_internal_delete_all_2(BIF_ALIST_2) db_unlock(tb, LCK_WRITE); BUMP_ALL_REDS(BIF_P); BIF_TRAP2(bif_export[BIF_ets_internal_delete_all_2], BIF_P, - BIF_ARG_1, nitems); + BIF_ARG_1, nitems_holder); } else { /* Done, no trapping needed */ @@ -2169,15 +2178,19 @@ BIF_RETTYPE ets_internal_delete_all_2(BIF_ALIST_2) } else { /* - * The table lookup succeeded and second argument is nitems + * The table lookup succeeded and second argument is nitems_holder * and not 'undefined', which means we have trapped at least once * and are now done. */ - nitems = BIF_ARG_2; + nitems_holder = BIF_ARG_2; } - db_unlock(tb, LCK_WRITE); + { + Eterm nitems = + tb->common.meth->db_delete_all_objects_get_nitems_from_holder(BIF_P, + nitems_holder); BIF_RET(nitems); + } } static void delete_all_objects_continue(Process* p, DbTable* tb) @@ -2190,7 +2203,7 @@ static void delete_all_objects_continue(Process* p, DbTable* tb) if ((tb->common.status & (DB_DELETE|DB_BUSY)) != DB_BUSY) return; - reds = tb->common.meth->db_delete_all_objects(p, tb, reds); + reds = tb->common.meth->db_delete_all_objects(p, tb, reds, NULL); if (reds < 0) { BUMP_ALL_REDS(p); @@ -3277,13 +3290,29 @@ BIF_RETTYPE ets_info_1(BIF_ALIST_1) int i; Eterm* hp; Uint freason; + Sint size = -1; + Sint memory = -1; + Eterm table; + int is_ctrs_read_result_set = 0; /*Process* rp = NULL;*/ /* If/when we implement lockless private tables: Eterm owner; */ - - if ((tb = db_get_table(BIF_P, BIF_ARG_1, DB_INFO, LCK_READ, &freason)) == NULL) { - if (freason == BADARG && (is_atom(BIF_ARG_1) || is_ref(BIF_ARG_1))) + if(is_tuple(BIF_ARG_1) && + is_tuple_arity(BIF_ARG_1, 2) && + erts_flxctr_is_snapshot_result(tuple_val(BIF_ARG_1)[1])) { + Eterm counter_read_result = tuple_val(BIF_ARG_1)[1]; + table = tuple_val(BIF_ARG_1)[2]; + size = erts_flxctr_get_snapshot_result_after_trap(counter_read_result, + ERTS_DB_TABLE_NITEMS_COUNTER_ID); + memory = erts_flxctr_get_snapshot_result_after_trap(counter_read_result, + ERTS_DB_TABLE_MEM_COUNTER_ID); + is_ctrs_read_result_set = 1; + } else { + table = BIF_ARG_1; + } + if ((tb = db_get_table(BIF_P, table, DB_INFO, LCK_READ, &freason)) == NULL) { + if (freason == BADARG && (is_atom(table) || is_ref(table))) BIF_RET(am_undefined); else return db_bif_fail(BIF_P, freason, BIF_ets_info_1, NULL); @@ -3314,9 +3343,35 @@ BIF_RETTYPE ets_info_1(BIF_ALIST_1) BIF_ERROR(BIF_P, BADARG); } }*/ + + if (!is_ctrs_read_result_set) { + ErtsFlxCtrSnapshotResult res = + erts_flxctr_snapshot(&tb->common.counters, ERTS_ALC_T_DB_TABLE, BIF_P); + if (ERTS_FLXCTR_GET_RESULT_AFTER_TRAP == res.type) { + Eterm tuple; + db_unlock(tb, LCK_READ); + hp = HAlloc(BIF_P, 3); + tuple = TUPLE2(hp, res.trap_resume_state, table); + BIF_TRAP1(bif_export[BIF_ets_info_1], BIF_P, tuple); + } else if (res.type == ERTS_FLXCTR_TRY_AGAIN_AFTER_TRAP) { + db_unlock(tb, LCK_READ); + BIF_TRAP1(bif_export[BIF_ets_info_1], BIF_P, table); + } else { + size = res.result[ERTS_DB_TABLE_NITEMS_COUNTER_ID]; + memory = res.result[ERTS_DB_TABLE_MEM_COUNTER_ID]; + is_ctrs_read_result_set = 1; + } + } for (i = 0; i < sizeof(fields)/sizeof(Eterm); i++) { - results[i] = table_info(BIF_P, tb, fields[i]); - ASSERT(is_value(results[i])); + if (is_ctrs_read_result_set && am_size == fields[i]) { + results[i] = erts_make_integer(size, BIF_P); + } else if (is_ctrs_read_result_set && am_memory == fields[i]) { + Sint words = (Sint) ((memory + sizeof(Sint) - 1) / sizeof(Sint)); + results[i] = erts_make_integer(words, BIF_P); + } else { + results[i] = table_info(BIF_P, tb, fields[i]); + ASSERT(is_value(results[i])); + } } db_unlock(tb, LCK_READ); @@ -3344,14 +3399,43 @@ BIF_RETTYPE ets_info_2(BIF_ALIST_2) DbTable* tb; Eterm ret = THE_NON_VALUE; Uint freason; - + if (erts_flxctr_is_snapshot_result(BIF_ARG_1)) { + Sint res; + if (am_memory == BIF_ARG_2) { + res = erts_flxctr_get_snapshot_result_after_trap(BIF_ARG_1, + ERTS_DB_TABLE_MEM_COUNTER_ID); + res = (Sint) ((res + sizeof(Sint) - 1) / sizeof(Sint)); + } else { + res = erts_flxctr_get_snapshot_result_after_trap(BIF_ARG_1, + ERTS_DB_TABLE_NITEMS_COUNTER_ID); + } + BIF_RET(erts_make_integer(res, BIF_P)); + } if ((tb = db_get_table(BIF_P, BIF_ARG_1, DB_INFO, LCK_READ, &freason)) == NULL) { if (freason == BADARG && (is_atom(BIF_ARG_1) || is_ref(BIF_ARG_1))) BIF_RET(am_undefined); else return db_bif_fail(BIF_P, freason, BIF_ets_info_2, NULL); } - ret = table_info(BIF_P, tb, BIF_ARG_2); + if (BIF_ARG_2 == am_size || BIF_ARG_2 == am_memory) { + ErtsFlxCtrSnapshotResult res = + erts_flxctr_snapshot(&tb->common.counters, ERTS_ALC_T_DB_TABLE, BIF_P); + if (ERTS_FLXCTR_GET_RESULT_AFTER_TRAP == res.type) { + db_unlock(tb, LCK_READ); + BIF_TRAP2(bif_export[BIF_ets_info_2], BIF_P, res.trap_resume_state, BIF_ARG_2); + } else if (res.type == ERTS_FLXCTR_TRY_AGAIN_AFTER_TRAP) { + db_unlock(tb, LCK_READ); + BIF_TRAP2(bif_export[BIF_ets_info_2], BIF_P, BIF_ARG_1, BIF_ARG_2); + } else if (BIF_ARG_2 == am_size) { + ret = erts_make_integer(res.result[ERTS_DB_TABLE_NITEMS_COUNTER_ID], BIF_P); + } else { /* BIF_ARG_2 == am_memory */ + Sint r = res.result[ERTS_DB_TABLE_MEM_COUNTER_ID]; + r = (Sint) ((r + sizeof(Sint) - 1) / sizeof(Sint)); + ret = erts_make_integer(r, BIF_P); + } + } else { + ret = table_info(BIF_P, tb, BIF_ARG_2); + } db_unlock(tb, LCK_READ); if (is_non_value(ret)) { BIF_ERROR(BIF_P, BADARG); @@ -4121,7 +4205,8 @@ static Eterm table_info(Process* p, DbTable* tb, Eterm What) int use_monotonic; if (What == am_size) { - ret = make_small(erts_atomic_read_nob(&tb->common.nitems)); + Uint size = (Uint) (DB_GET_APPROX_NITEMS(tb)); + ret = erts_make_integer(size, p); } else if (What == am_type) { if (tb->common.status & DB_SET) { ret = am_set; @@ -4136,7 +4221,7 @@ static Eterm table_info(Process* p, DbTable* tb, Eterm What) ret = am_bag; } } else if (What == am_memory) { - Uint words = (Uint) ((erts_atomic_read_nob(&tb->common.memory_size) + Uint words = (Uint) ((DB_GET_APPROX_MEM_CONSUMED(tb) + sizeof(Uint) - 1) / sizeof(Uint)); @@ -4294,9 +4379,9 @@ static void print_table(fmtfn_t to, void *to_arg, int show, DbTable* tb) tb->common.meth->db_print(to, to_arg, show, tb); - erts_print(to, to_arg, "Objects: %d\n", (int)erts_atomic_read_nob(&tb->common.nitems)); + erts_print(to, to_arg, "Objects: %d\n", (int)DB_GET_APPROX_NITEMS(tb)); erts_print(to, to_arg, "Words: %bpu\n", - (Uint) ((erts_atomic_read_nob(&tb->common.memory_size) + (Uint) ((DB_GET_APPROX_MEM_CONSUMED(tb) + sizeof(Uint) - 1) / sizeof(Uint))); @@ -4488,3 +4573,16 @@ int erts_ets_force_split(Eterm tid, int on) db_unlock(tb, LCK_WRITE); return 1; } + +int erts_ets_debug_random_split_join(Eterm tid, int on) +{ + DbTable* tb = tid2tab(tid); + if (!tb || !IS_CATREE_TABLE(tb->common.type)) + return 0; + + db_lock(tb, LCK_WRITE); + if (!(tb->common.status & DB_DELETE)) + db_catree_debug_random_split_join(&tb->catree, on); + db_unlock(tb, LCK_WRITE); + return 1; +} diff --git a/erts/emulator/beam/erl_db.h b/erts/emulator/beam/erl_db.h index dc77fbb60c..b3dc1b9ba3 100644 --- a/erts/emulator/beam/erl_db.h +++ b/erts/emulator/beam/erl_db.h @@ -131,6 +131,7 @@ extern erts_atomic_t erts_ets_misc_mem_size; Eterm erts_ets_colliding_names(Process*, Eterm name, Uint cnt); int erts_ets_force_split(Eterm tid, int on); +int erts_ets_debug_random_split_join(Eterm tid, int on); Uint erts_db_get_max_tabs(void); Eterm erts_db_make_tid(Process *c_p, DbTableCommon *tb); @@ -160,7 +161,9 @@ do { \ erts_aint_t sz__ = (((erts_aint_t) (ALLOC_SZ)) \ - ((erts_aint_t) (FREE_SZ))); \ ASSERT((TAB)); \ - erts_atomic_add_nob(&(TAB)->common.memory_size, sz__); \ + erts_flxctr_add(&(TAB)->common.counters, \ + ERTS_DB_TABLE_MEM_COUNTER_ID, \ + sz__); \ } while (0) #define ERTS_ETS_MISC_MEM_ADD(SZ) \ @@ -305,10 +308,10 @@ erts_db_free(ErtsAlcType_t type, DbTable *tab, void *ptr, Uint size) ASSERT(ptr != 0); ASSERT(size == ERTS_ALC_DBG_BLK_SZ(ptr)); ERTS_DB_ALC_MEM_UPDATE_(tab, size, 0); - - ASSERT(((void *) tab) != ptr - || erts_atomic_read_nob(&tab->common.memory_size) == 0); - + ASSERT(((void *) tab) != ptr || + tab->common.counters.is_decentralized || + 0 == erts_flxctr_read_centralized(&tab->common.counters, + ERTS_DB_TABLE_MEM_COUNTER_ID)); erts_free(type, ptr); } diff --git a/erts/emulator/beam/erl_db_catree.c b/erts/emulator/beam/erl_db_catree.c index 0402c6b7b4..e0d5e44f58 100644 --- a/erts/emulator/beam/erl_db_catree.c +++ b/erts/emulator/beam/erl_db_catree.c @@ -149,7 +149,12 @@ static SWord db_free_table_continue_catree(DbTable *tbl, SWord); static void db_foreach_offheap_catree(DbTable *, void (*)(ErlOffHeap *, void *), void *); -static SWord db_delete_all_objects_catree(Process* p, DbTable* tbl, SWord reds); +static SWord db_delete_all_objects_catree(Process* p, + DbTable* tbl, + SWord reds, + Eterm* nitems_holder_wb); +static Eterm db_delete_all_objects_get_nitems_from_holder_catree(Process* p, + Eterm nitems_holder); static int db_lookup_dbterm_catree(Process *, DbTable *, Eterm key, Eterm obj, DbUpdateHandle*); @@ -191,6 +196,7 @@ DbTableMethod db_catree = db_select_replace_continue_catree, db_take_catree, db_delete_all_objects_catree, + db_delete_all_objects_get_nitems_from_holder_catree, db_free_table_catree, db_free_table_continue_catree, db_print_catree, @@ -641,7 +647,8 @@ static int dbg_fastrand(void) static void dbg_provoke_random_splitjoin(DbTableCATree* tb, DbTableCATreeNode* base_node) { - if (tb->common.status & DB_CATREE_FORCE_SPLIT) + if (tb->common.status & DB_CATREE_FORCE_SPLIT || + !(tb->common.status & DB_CATREE_DEBUG_RANDOM_SPLIT_JOIN)) return; switch (dbg_fastrand() % 8) { @@ -1357,6 +1364,8 @@ static SWord do_free_base_node_cont(DbTableCATree *tb, SWord num_left) PUSH_NODE(&tb->free_stack_elems, root); root = p; } else { + DEC_NITEMS((DbTable*)tb); + tb->nr_of_deleted_items++; free_term((DbTable*)tb, root); if (--num_left >= 0) { break; @@ -1397,6 +1406,10 @@ int db_create_catree(Process *p, DbTable *tbl) root = create_base_node(tb, NULL); tb->deletion = 0; tb->base_nodes_to_free_list = NULL; + tb->nr_of_deleted_items = 0; +#ifdef DEBUG + tbl->common.status |= DB_CATREE_DEBUG_RANDOM_SPLIT_JOIN; +#endif erts_atomic_init_relb(&(tb->root), (erts_aint_t)root); return DB_ERROR_NONE; } @@ -2050,6 +2063,7 @@ static SWord db_free_table_continue_catree(DbTable *tbl, SWord reds) PUSH_NODE(&tb->free_stack_rnodes, GET_ROOT(tb)); tb->is_routing_nodes_freed = 0; tb->base_nodes_to_free_list = NULL; + tb->nr_of_deleted_items = 0; } if ( ! tb->is_routing_nodes_freed ) { reds = do_free_routing_nodes_catree_cont(tb, reds); @@ -2079,13 +2093,57 @@ static SWord db_free_table_continue_catree(DbTable *tbl, SWord reds) return 1; } -static SWord db_delete_all_objects_catree(Process* p, DbTable* tbl, SWord reds) +static +int db_catree_nr_of_items_deleted_wb_dtor(Binary *context_bin) { + (void)context_bin; + return 1; +} + +typedef struct { + Uint nr_of_deleted_items; +} DbCATreeNrOfItemsDeletedWb; + +static Eterm +create_and_install_num_of_deleted_items_wb_bin(Process *p, DbTableCATree *tb) +{ + Binary* bin = + erts_create_magic_binary(sizeof(DbCATreeNrOfItemsDeletedWb), + db_catree_nr_of_items_deleted_wb_dtor); + DbCATreeNrOfItemsDeletedWb* data = ERTS_MAGIC_BIN_DATA(bin); + Eterm* hp = HAlloc(p, ERTS_MAGIC_REF_THING_SIZE); + Eterm mref = erts_mk_magic_ref(&hp, &MSO(p), bin); + data->nr_of_deleted_items = 0; + tb->nr_of_deleted_items_wb = bin; + erts_refc_inctest(&bin->intern.refc, 2); + return mref; +} + +static Eterm db_delete_all_objects_get_nitems_from_holder_catree(Process* p, + Eterm mref) +{ + Binary* bin = erts_magic_ref2bin(mref); + DbCATreeNrOfItemsDeletedWb* data = ERTS_MAGIC_BIN_DATA(bin); + return erts_make_integer(data->nr_of_deleted_items, p); +} + +static SWord db_delete_all_objects_catree(Process* p, + DbTable* tbl, + SWord reds, + Eterm* nitems_holder_wb) { + DbTableCATree *tb = &tbl->catree; + DbCATreeNrOfItemsDeletedWb* data; + if (!tb->deletion) { + *nitems_holder_wb = + create_and_install_num_of_deleted_items_wb_bin(p, tb); + } reds = db_free_table_continue_catree(tbl, reds); if (reds < 0) return reds; + data = ERTS_MAGIC_BIN_DATA(tb->nr_of_deleted_items_wb); + data->nr_of_deleted_items = tb->nr_of_deleted_items; + erts_bin_release(tb->nr_of_deleted_items_wb); db_create_catree(p, tbl); - erts_atomic_set_nob(&tbl->catree.common.nitems, 0); return reds; } @@ -2203,6 +2261,14 @@ void db_catree_force_split(DbTableCATree* tb, int on) tb->common.status &= ~DB_CATREE_FORCE_SPLIT; } +void db_catree_debug_random_split_join(DbTableCATree* tb, int on) +{ + if (on) + tb->common.status |= DB_CATREE_DEBUG_RANDOM_SPLIT_JOIN; + else + tb->common.status &= ~DB_CATREE_DEBUG_RANDOM_SPLIT_JOIN; +} + void db_calc_stats_catree(DbTableCATree* tb, DbCATreeStats* stats) { DbTableCATreeNode* stack[ERL_DB_CATREE_MAX_ROUTE_NODE_LAYER_HEIGHT]; diff --git a/erts/emulator/beam/erl_db_catree.h b/erts/emulator/beam/erl_db_catree.h index 418837be8e..cf3498dabb 100644 --- a/erts/emulator/beam/erl_db_catree.h +++ b/erts/emulator/beam/erl_db_catree.h @@ -87,6 +87,10 @@ typedef struct db_table_catree { CATreeNodeStack free_stack_rnodes; DbTableCATreeNode *base_nodes_to_free_list; int is_routing_nodes_freed; + /* The fields below are used by delete_all_objects and + select_delete(DeleteAll)*/ + Uint nr_of_deleted_items; + Binary* nr_of_deleted_items_wb; } DbTableCATree; typedef struct { @@ -104,7 +108,6 @@ void db_initialize_catree(void); int db_create_catree(Process *p, DbTable *tbl); - TreeDbTerm** catree_find_root(Eterm key, CATreeRootIterator*); TreeDbTerm** catree_find_next_from_pb_key_root(Eterm key, CATreeRootIterator*); @@ -121,6 +124,7 @@ void erts_lcnt_enable_db_catree_lock_count(DbTableCATree *tb, int enable); #endif void db_catree_force_split(DbTableCATree*, int on); +void db_catree_debug_random_split_join(DbTableCATree*, int on); typedef struct { Uint route_nodes; diff --git a/erts/emulator/beam/erl_db_hash.c b/erts/emulator/beam/erl_db_hash.c index f225730029..ceaccf7e44 100644 --- a/erts/emulator/beam/erl_db_hash.c +++ b/erts/emulator/beam/erl_db_hash.c @@ -85,6 +85,14 @@ #include "erl_db_hash.h" +#define ADD_NITEMS(DB, TO_ADD) \ + erts_flxctr_add(&(DB)->common.counters, ERTS_DB_TABLE_NITEMS_COUNTER_ID, TO_ADD) +#define INC_NITEMS(DB) \ + erts_flxctr_inc_read_centralized(&(DB)->common.counters, ERTS_DB_TABLE_NITEMS_COUNTER_ID) +#define DEC_NITEMS(DB) \ + erts_flxctr_dec_read_centralized(&(DB)->common.counters, ERTS_DB_TABLE_NITEMS_COUNTER_ID) +#define RESET_NITEMS(DB) \ + erts_flxctr_reset(&(DB)->common.counters, ERTS_DB_TABLE_NITEMS_COUNTER_ID) /* * The following symbols can be manipulated to "tune" the linear hash array */ @@ -121,7 +129,9 @@ : ((struct segment**) erts_atomic_read_nob(&(tb)->segtab))) #endif #define NACTIVE(tb) ((int)erts_atomic_read_nob(&(tb)->nactive)) -#define NITEMS(tb) ((int)erts_atomic_read_nob(&(tb)->common.nitems)) +#define NITEMS(tb) \ + ((Sint)erts_flxctr_read_centralized(&(tb)->common.counters, \ + ERTS_DB_TABLE_NITEMS_COUNTER_ID)) #define SLOT_IX_TO_SEG_IX(i) (((i)+(EXT_SEGSZ-FIRST_SEGSZ)) >> EXT_SEGSZ_EXP) @@ -444,7 +454,12 @@ static void db_foreach_offheap_hash(DbTable *, void (*)(ErlOffHeap *, void *), void *); -static SWord db_delete_all_objects_hash(Process* p, DbTable* tbl, SWord reds); +static SWord db_delete_all_objects_hash(Process* p, + DbTable* tbl, + SWord reds, + Eterm* nitems_holder_wb); +static Eterm db_delete_all_objects_get_nitems_from_holder_hash(Process* p, + Eterm nitems_holder); #ifdef HARDDEBUG static void db_check_table_hash(DbTableHash *tb); #endif @@ -548,6 +563,7 @@ DbTableMethod db_hash = db_select_replace_continue_hash, db_take_hash, db_delete_all_objects_hash, + db_delete_all_objects_get_nitems_from_holder_hash, db_free_empty_table_hash, db_free_table_continue_hash, db_print_hash, @@ -806,7 +822,7 @@ int db_put_hash(DbTable *tbl, Eterm obj, int key_clash_fail) if (tb->common.status & DB_SET) { HashDbTerm* bnext = b->next; if (is_pseudo_deleted(b)) { - erts_atomic_inc_nob(&tb->common.nitems); + INC_NITEMS(tb); b->pseudo_deleted = 0; } else if (key_clash_fail) { @@ -835,7 +851,7 @@ int db_put_hash(DbTable *tbl, Eterm obj, int key_clash_fail) do { if (db_eq(&tb->common,obj,&q->dbterm)) { if (is_pseudo_deleted(q)) { - erts_atomic_inc_nob(&tb->common.nitems); + INC_NITEMS(tb); q->pseudo_deleted = 0; ASSERT(q->hvalue == hval); if (q != b) { /* must move to preserve key insertion order */ @@ -858,7 +874,7 @@ Lnew: q->pseudo_deleted = 0; q->next = b; *bp = q; - nitems = erts_atomic_inc_read_nob(&tb->common.nitems); + nitems = INC_NITEMS(tb); WUNLOCK_HASH(lck); { int nactive = NACTIVE(tb); @@ -1056,7 +1072,7 @@ int db_erase_hash(DbTable *tbl, Eterm key, Eterm *ret) } WUNLOCK_HASH(lck); if (nitems_diff) { - erts_atomic_add_nob(&tb->common.nitems, nitems_diff); + ADD_NITEMS(tb, nitems_diff); try_shrink(tb); } free_term_list(tb, free_us); @@ -1117,7 +1133,7 @@ static int db_erase_object_hash(DbTable *tbl, Eterm object, Eterm *ret) } WUNLOCK_HASH(lck); if (nitems_diff) { - erts_atomic_add_nob(&tb->common.nitems, nitems_diff); + ADD_NITEMS(tb, nitems_diff); try_shrink(tb); } free_term_list(tb, free_us); @@ -2023,7 +2039,7 @@ static int select_delete_on_match_res(traverse_context_t* ctx_base, Sint slot_ix del->next = ctx->free_us; ctx->free_us = del; } - erts_atomic_dec_nob(&ctx->base.tb->common.nitems); + DEC_NITEMS(ctx->base.tb); return 1; } @@ -2300,7 +2316,7 @@ static int db_take_hash(Process *p, DbTable *tbl, Eterm key, Eterm *ret) } WUNLOCK_HASH(lck); if (nitems_diff) { - erts_atomic_add_nob(&tb->common.nitems, nitems_diff); + ADD_NITEMS(tb, nitems_diff); try_shrink(tb); } free_term_list(tb, free_us); @@ -2360,7 +2376,7 @@ static SWord db_mark_all_deleted_hash(DbTable *tbl, SWord reds) fixdel->slot = NACTIVE(tb) - 1; fixdel->all = 1; fixdel->trap = 0; - erts_atomic_set_nob(&tb->common.nitems, 0); + RESET_NITEMS(tb); return loops < 0 ? 0 : loops / LOOPS_PER_REDUCTION; } @@ -2468,7 +2484,8 @@ static SWord db_free_table_continue_hash(DbTable *tbl, SWord reds) (void*)tb->locks, sizeof(DbTableHashFineLocks)); tb->locks = NULL; } - ASSERT(erts_atomic_read_nob(&tb->common.memory_size) == sizeof(DbTable)); + ASSERT(sizeof(DbTable) == erts_flxctr_read_approx(&tb->common.counters, + ERTS_DB_TABLE_MEM_COUNTER_ID)); return reds; /* Done */ } @@ -3080,7 +3097,7 @@ db_lookup_dbterm_hash(Process *p, DbTable *tbl, Eterm key, Eterm obj, ASSERT(q->hvalue == hval); q->pseudo_deleted = 0; *bp = b = q; - erts_atomic_inc_nob(&tb->common.nitems); + INC_NITEMS(tb); } HRelease(p, hend, htop); @@ -3123,7 +3140,7 @@ db_finalize_dbterm_hash(int cret, DbUpdateHandle* handle) } WUNLOCK_HASH(lck); - erts_atomic_dec_nob(&tb->common.nitems); + DEC_NITEMS(tb); try_shrink(tb); } else { if (handle->flags & DB_MUST_RESIZE) { @@ -3132,7 +3149,7 @@ db_finalize_dbterm_hash(int cret, DbUpdateHandle* handle) } if (handle->flags & DB_INC_TRY_GROW) { int nactive; - int nitems = erts_atomic_inc_read_nob(&tb->common.nitems); + int nitems = INC_NITEMS(tb); WUNLOCK_HASH(lck); nactive = NACTIVE(tb); @@ -3153,8 +3170,17 @@ db_finalize_dbterm_hash(int cret, DbUpdateHandle* handle) return; } -static SWord db_delete_all_objects_hash(Process* p, DbTable* tbl, SWord reds) +static SWord db_delete_all_objects_hash(Process* p, + DbTable* tbl, + SWord reds, + Eterm* nitems_holder_wb) { + if (nitems_holder_wb != NULL) { + Uint nr_of_items = + erts_flxctr_read_centralized(&tbl->common.counters, + ERTS_DB_TABLE_NITEMS_COUNTER_ID); + *nitems_holder_wb = erts_make_integer(nr_of_items, p); + } if (IS_FIXED(tbl)) { reds = db_mark_all_deleted_hash(tbl, reds); } else { @@ -3163,11 +3189,16 @@ static SWord db_delete_all_objects_hash(Process* p, DbTable* tbl, SWord reds) return reds; db_create_hash(p, tbl); - erts_atomic_set_nob(&tbl->hash.common.nitems, 0); + RESET_NITEMS(tbl); } return reds; } +static Eterm db_delete_all_objects_get_nitems_from_holder_hash(Process* p, + Eterm nitems_holder){ + return nitems_holder; +} + void db_foreach_offheap_hash(DbTable *tbl, void (*func)(ErlOffHeap *, void *), void * arg) diff --git a/erts/emulator/beam/erl_db_tree.c b/erts/emulator/beam/erl_db_tree.c index f9ba04f399..492ea81b63 100644 --- a/erts/emulator/beam/erl_db_tree.c +++ b/erts/emulator/beam/erl_db_tree.c @@ -51,9 +51,20 @@ #include "erl_db_tree_util.h" #define GETKEY_WITH_POS(Keypos, Tplp) (*((Tplp) + Keypos)) -#define NITEMS(tb) ((int)erts_atomic_read_nob(&(tb)->common.nitems)) -#define TREE_MAX_ELEMENTS 0xFFFFFFFFUL +#define NITEMS_CENTRALIZED(tb) \ + ((Sint)erts_flxctr_read_centralized(&(tb)->common.counters, \ + ERTS_DB_TABLE_NITEMS_COUNTER_ID)) +#define ADD_NITEMS(DB, TO_ADD) \ + erts_flxctr_add(&(DB)->common.counters, ERTS_DB_TABLE_NITEMS_COUNTER_ID, TO_ADD) +#define INC_NITEMS(DB) \ + erts_flxctr_inc(&(DB)->common.counters, ERTS_DB_TABLE_NITEMS_COUNTER_ID) +#define INC_NITEMS_CENTRALIZED(DB) \ + erts_flxctr_inc_read_centralized(&(DB)->common.counters, ERTS_DB_TABLE_NITEMS_COUNTER_ID) +#define RESET_NITEMS(DB) \ + erts_flxctr_reset(&(DB)->common.counters, ERTS_DB_TABLE_NITEMS_COUNTER_ID) +#define IS_CENTRALIZED_CTR(tb) (!(tb)->common.counters.is_decentralized) +#define APPROX_MEM_CONSUMED(tb) erts_flxctr_read_approx(&(tb)->common.counters, ERTS_DB_TABLE_MEM_COUNTER_ID) #define TOPN_NODE(Dtt, Pos) \ (((Pos) < Dtt->pos) ? \ @@ -296,7 +307,7 @@ int tree_balance_right(TreeDbTerm **this); static int delsub(TreeDbTerm **this); static TreeDbTerm *slot_search(Process *p, TreeDbTerm *root, Sint slot, DbTable *tb, DbTableTree *stack_container, - CATreeRootIterator *iter); + CATreeRootIterator *iter, int* is_EOT); static TreeDbTerm *find_node(DbTableCommon *tb, TreeDbTerm *root, Eterm key, DbTableTree *stack_container); static TreeDbTerm **find_node2(DbTableCommon *tb, TreeDbTerm **root, Eterm key); @@ -433,8 +444,12 @@ static void db_foreach_offheap_tree(DbTable *, void (*)(ErlOffHeap *, void *), void *); -static SWord db_delete_all_objects_tree(Process* p, DbTable* tbl, SWord reds); - +static SWord db_delete_all_objects_tree(Process* p, + DbTable* tbl, + SWord reds, + Eterm* nitems_holder_wb); +static Eterm db_delete_all_objects_get_nitems_from_holder_tree(Process* p, + Eterm nitems_holder); #ifdef HARDDEBUG static void db_check_table_tree(DbTable *tbl); #endif @@ -478,6 +493,7 @@ DbTableMethod db_tree = db_select_replace_continue_tree, db_take_tree, db_delete_all_objects_tree, + db_delete_all_objects_get_nitems_from_holder_tree, db_free_empty_table_tree, db_free_table_continue_tree, db_print_tree, @@ -595,7 +611,8 @@ int db_last_tree_common(Process *p, DbTable *tbl, TreeDbTerm *root, } if (stack) { PUSH_NODE(stack, this); - stack->slot = NITEMS(tbl); + /* Always centralized counters when static stack is used */ + stack->slot = NITEMS_CENTRALIZED(tbl); release_stack(tbl,stack_container,stack); } *ret = db_copy_key(p, tbl, &this->dbterm); @@ -661,10 +678,7 @@ int db_put_tree_common(DbTableCommon *tb, TreeDbTerm **root, Eterm obj, for (;;) if (!*this) { /* Found our place */ state = 1; - if (erts_atomic_inc_read_nob(&tb->nitems) >= TREE_MAX_ELEMENTS) { - erts_atomic_dec_nob(&tb->nitems); - return DB_ERROR_SYSRES; - } + INC_NITEMS(((DbTable*)tb)); *this = new_dbterm(tb, obj); (*this)->balance = 0; (*this)->left = (*this)->right = NULL; @@ -888,7 +902,7 @@ int db_slot_tree_common(Process *p, DbTable *tbl, TreeDbTerm *root, TreeDbTerm *st; Eterm *hp, *hend; Eterm copy; - + int is_EOT = 0; /* * The notion of a "slot" is not natural in a tree, but we try to * simulate it by giving the n'th node in the tree instead. @@ -899,10 +913,10 @@ int db_slot_tree_common(Process *p, DbTable *tbl, TreeDbTerm *root, if (is_not_small(slot_term) || ((slot = signed_val(slot_term)) < 0) || - (slot > NITEMS(tbl))) + (IS_CENTRALIZED_CTR(tbl) && slot > NITEMS_CENTRALIZED(tbl))) return DB_ERROR_BADPARAM; - if (slot == NITEMS(tbl)) { + if (IS_CENTRALIZED_CTR(tbl) && slot == NITEMS_CENTRALIZED(tbl)) { *ret = am_EOT; return DB_ERROR_NONE; } @@ -912,7 +926,11 @@ int db_slot_tree_common(Process *p, DbTable *tbl, TreeDbTerm *root, * are counted from 1 and up. */ ++slot; - st = slot_search(p, root, slot, tbl, stack_container, iter); + st = slot_search(p, root, slot, tbl, stack_container, iter, &is_EOT); + if (is_EOT) { + *ret = am_EOT; + return DB_ERROR_NONE; + } if (st == NULL) { *ret = am_false; return DB_ERROR_UNSPEC; @@ -2244,7 +2262,8 @@ void db_print_tree_common(fmtfn_t to, void *to_arg, erts_print(to, to_arg, "\n" "------------------------------------------------\n"); #else - erts_print(to, to_arg, "Ordered set (AVL tree), Elements: %d\n", NITEMS(tbl)); + erts_print(to, to_arg, "Ordered set (AVL tree), Elements: %d\n", + erts_flxctr_read_approx(&tbl->common.counters, ERTS_DB_TABLE_NITEMS_COUNTER_ID)); #endif } @@ -2281,24 +2300,41 @@ static SWord db_free_table_continue_tree(DbTable *tbl, SWord reds) (DbTable *) tb, (void *) tb->static_stack.array, sizeof(TreeDbTerm *) * STACK_NEED); - ASSERT((erts_atomic_read_nob(&tb->common.memory_size) - == sizeof(DbTable)) || - (erts_atomic_read_nob(&tb->common.memory_size) - == (sizeof(DbTable) + sizeof(DbFixation)))); + ASSERT(erts_flxctr_is_snapshot_ongoing(&tb->common.counters) || + ((APPROX_MEM_CONSUMED(tb) + == sizeof(DbTable)) || + (APPROX_MEM_CONSUMED(tb) + == (sizeof(DbTable) + sizeof(DbFixation))))); } return reds; } -static SWord db_delete_all_objects_tree(Process* p, DbTable* tbl, SWord reds) +static SWord db_delete_all_objects_tree(Process* p, + DbTable* tbl, + SWord reds, + Eterm* nitems_holder_wb) { + if (nitems_holder_wb != NULL) { + Uint nr_of_items = + erts_flxctr_read_centralized(&tbl->common.counters, + ERTS_DB_TABLE_NITEMS_COUNTER_ID); + *nitems_holder_wb = erts_make_integer(nr_of_items, p); + } reds = db_free_table_continue_tree(tbl, reds); if (reds < 0) return reds; db_create_tree(p, tbl); - erts_atomic_set_nob(&tbl->tree.common.nitems, 0); + RESET_NITEMS(tbl); return reds; } +static Eterm db_delete_all_objects_get_nitems_from_holder_tree(Process* p, + Eterm holder) +{ + (void)p; + return holder; +} + static void do_db_tree_foreach_offheap(TreeDbTerm *, void (*)(ErlOffHeap *, void *), void *); @@ -2383,7 +2419,7 @@ static TreeDbTerm *linkout_tree(DbTableCommon *tb, TreeDbTerm **root, tstack[tpos++] = this; state = delsub(this); } - erts_atomic_dec_nob(&tb->nitems); + DEC_NITEMS(((DbTable*)tb)); break; } } @@ -2450,7 +2486,7 @@ static TreeDbTerm *linkout_object_tree(DbTableCommon *tb, TreeDbTerm **root, tstack[tpos++] = this; state = delsub(this); } - erts_atomic_dec_nob(&tb->nitems); + DEC_NITEMS(((DbTable*)tb)); break; } } @@ -2745,7 +2781,8 @@ static int delsub(TreeDbTerm **this) static TreeDbTerm *slot_search(Process *p, TreeDbTerm *root, Sint slot, DbTable *tb, DbTableTree *stack_container, - CATreeRootIterator *iter) + CATreeRootIterator *iter, + int* is_EOT) { TreeDbTerm *this; TreeDbTerm *tmp; @@ -2837,8 +2874,12 @@ static TreeDbTerm *slot_search(Process *p, TreeDbTerm *root, break; next_root: - if (!iter) + if (!iter) { + if (stack->slot == (slot-1)) { + *is_EOT = 1; + } break; /* EOT */ + } ASSERT(slot > stack->slot); if (lastobj) { @@ -2846,8 +2887,12 @@ next_root: lastobj = NULL; } pp = catree_find_next_root(iter, &lastkey); - if (!pp) + if (!pp) { + if (stack->slot == (slot-1)) { + *is_EOT = 1; + } break; /* EOT */ + } root = *pp; stack->pos = 0; find_next(&tb->common, root, stack, lastkey); diff --git a/erts/emulator/beam/erl_db_tree_util.h b/erts/emulator/beam/erl_db_tree_util.h index 02df74678d..ba4a8f79e5 100644 --- a/erts/emulator/beam/erl_db_tree_util.h +++ b/erts/emulator/beam/erl_db_tree_util.h @@ -25,6 +25,8 @@ ** Internal functions and macros used by both the CA tree and the AVL tree */ + +#if defined(ARCH_32) /* ** A stack of this size is enough for an AVL tree with more than ** 0xFFFFFFFF elements. May be subject to change if @@ -34,8 +36,19 @@ ** Where n denotes the number of nodes, h(n) the height of the tree ** with n nodes and log is the binary logarithm. */ - #define STACK_NEED 50 +#elif defined(ARCH_64) +/* +** A stack of this size is enough for an AVL tree with more than +** 2^61 elements. +** The Maximal height of an AVL tree is calculated as above. +*/ +#define STACK_NEED 90 +#else +#error "Unsported architecture" +#endif + + #define PUSH_NODE(Dtt, Tdt) \ ((Dtt)->array[(Dtt)->pos++] = Tdt) @@ -50,6 +63,9 @@ #define EMPTY_NODE(Dtt) (TOP_NODE(Dtt) == NULL) +#define DEC_NITEMS(DB) \ + erts_flxctr_dec(&(DB)->common.counters, ERTS_DB_TABLE_NITEMS_COUNTER_ID) + static ERTS_INLINE void free_term(DbTable *tb, TreeDbTerm* p) { db_free_term(tb, p, offsetof(TreeDbTerm, dbterm)); diff --git a/erts/emulator/beam/erl_db_util.h b/erts/emulator/beam/erl_db_util.h index e3d3c0e804..02d4dd6c9a 100644 --- a/erts/emulator/beam/erl_db_util.h +++ b/erts/emulator/beam/erl_db_util.h @@ -21,6 +21,7 @@ #ifndef _DB_UTIL_H #define _DB_UTIL_H +#include "erl_flxctr.h" #include "global.h" #include "erl_message.h" #include "erl_bif_unique.h" @@ -207,8 +208,12 @@ typedef struct db_table_method enum DbIterSafety*); int (*db_take)(Process *, DbTable *, Eterm, Eterm *); - SWord (*db_delete_all_objects)(Process* p, DbTable* db, SWord reds); - + SWord (*db_delete_all_objects)(Process* p, + DbTable* db, + SWord reds, + Eterm* nitems_holder_wb); + Eterm (*db_delete_all_objects_get_nitems_from_holder)(Process* p, + Eterm nitems_holder); int (*db_free_empty_table)(DbTable* db); SWord (*db_free_table_continue)(DbTable* db, SWord reds); @@ -257,6 +262,9 @@ typedef struct { DbTable *prev; } DbTableList; +#define ERTS_DB_TABLE_NITEMS_COUNTER_ID 0 +#define ERTS_DB_TABLE_MEM_COUNTER_ID 1 + /* * This structure contains data for all different types of database * tables. Note that these fields must match the same fields @@ -281,8 +289,11 @@ typedef struct db_table_common { Eterm the_name; /* an atom */ Binary *btid; DbTableMethod* meth; /* table methods */ - erts_atomic_t nitems; /* Total number of items in table */ - erts_atomic_t memory_size;/* Total memory size. NOTE: in bytes! */ + /* The ErtsFlxCtr below contains: + * - Total number of items in table + * - Total memory size (NOTE: in bytes!) */ + ErtsFlxCtr counters; + char extra_for_flxctr[ERTS_FLXCTR_NR_OF_EXTRA_BYTES(2)]; struct { /* Last fixation time */ ErtsMonotonicTime monotonic; ErtsMonotonicTime offset; @@ -315,6 +326,7 @@ typedef struct db_table_common { #define DB_BUSY (1 << 12) #define DB_CATREE_FORCE_SPLIT (1 << 31) /* erts_debug */ +#define DB_CATREE_DEBUG_RANDOM_SPLIT_JOIN (1 << 30) /* erts_debug */ #define IS_HASH_TABLE(Status) (!!((Status) & \ (DB_BAG | DB_SET | DB_DUPLICATE_BAG))) diff --git a/erts/emulator/beam/erl_flxctr.c b/erts/emulator/beam/erl_flxctr.c new file mode 100644 index 0000000000..35f4a21508 --- /dev/null +++ b/erts/emulator/beam/erl_flxctr.c @@ -0,0 +1,370 @@ +/* + * %CopyrightBegin% + * + * Copyright Ericsson AB 2019. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * %CopyrightEnd% + */ + +/* + * Author: Kjell Winblad + */ + +#include "erl_flxctr.h" + +static int reader_groups_array_size = 0; +#define ERTS_FLXCTR_DECENTRALIZED_NO_SLOTS (reader_groups_array_size) + +static int erts_flxctr_read_ctx_bin_dtor(Binary *context_bin); +static int erts_flxctr_wait_dtor(Binary *context_bin); + +typedef struct { + ErtsThrPrgrLaterOp later_op; + Process* process; + ErtsFlxCtrDecentralizedCtrArray* array; + ErtsFlxCtrDecentralizedCtrArray* next_array; + ErtsAlcType_t alloc_type; + int nr_of_counters; + Sint result[ERTS_FLXCTR_ATOMICS_PER_CACHE_LINE]; +} DecentralizedReadSnapshotInfo; + +typedef enum { + ERTS_FLXCTR_SNAPSHOT_NOT_ONGOING = 0, + ERTS_FLXCTR_SNAPSHOT_ONGOING = 1, + ERTS_FLXCTR_SNAPSHOT_ONGOING_TP_THREAD_DO_FREE = 2 +} erts_flxctr_snapshot_status; + +static void +thr_prg_wake_up_and_count(void* bin_p) +{ + Binary* bin = bin_p; + DecentralizedReadSnapshotInfo* info = ERTS_MAGIC_BIN_DATA(bin); + Process* p = info->process; + ErtsFlxCtrDecentralizedCtrArray* array = info->array; + ErtsFlxCtrDecentralizedCtrArray* next = info->next_array; + int i, sched; + /* Reset result array */ + for (i = 0; i < info->nr_of_counters; i++) { + info->result[i] = 0; + } + /* Read result from snapshot */ + for (sched = 0; sched < ERTS_FLXCTR_DECENTRALIZED_NO_SLOTS; sched++) { + for (i = 0; i < info->nr_of_counters; i++) { + info->result[i] = info->result[i] + + erts_atomic_read_nob(&array->array[sched].counters[i]); + } + } + /* Update the next decentralized counter array */ + for (i = 0; i < info->nr_of_counters; i++) { + erts_atomic_add_nob(&next->array[0].counters[i], info->result[i]); + } + /* Announce that the snapshot is done */ + { + Sint expected = ERTS_FLXCTR_SNAPSHOT_ONGOING; + if (expected != erts_atomic_cmpxchg_mb(&next->snapshot_status, + ERTS_FLXCTR_SNAPSHOT_NOT_ONGOING, + expected)) { + /* The CAS failed which means that this thread need to free the next array. */ + erts_free(info->alloc_type, next->block_start); + } + } + /* Resume the process that requested the snapshot */ + erts_proc_lock(p, ERTS_PROC_LOCK_STATUS); + if (!ERTS_PROC_IS_EXITING(p)) { + erts_resume(p, ERTS_PROC_LOCK_STATUS); + } + /* Free the memory that is no longer needed */ + erts_free(info->alloc_type, array->block_start); + erts_proc_unlock(p, ERTS_PROC_LOCK_STATUS); + erts_proc_dec_refc(p); + erts_bin_release(bin); +} + +typedef struct { + ErtsThrPrgrLaterOp later_op; + Process* process; +} ErtsFlxCtrWakeUpLaterInfo; + +static void +thr_prg_wake_up_later(void* bin_p) +{ + Binary* bin = bin_p; + ErtsFlxCtrWakeUpLaterInfo* info = ERTS_MAGIC_BIN_DATA(bin); + Process* p = info->process; + /* Resume the requesting process */ + erts_proc_lock(p, ERTS_PROC_LOCK_STATUS); + if (!ERTS_PROC_IS_EXITING(p)) { + erts_resume(p, ERTS_PROC_LOCK_STATUS); + } + erts_proc_unlock(p, ERTS_PROC_LOCK_STATUS); + /* Free data */ + erts_proc_dec_refc(p); + erts_bin_release(bin); +} + +static +int erts_flxctr_read_ctx_bin_dtor(Binary *context_bin) { + (void)context_bin; + return 1; +} + +static +int erts_flxctr_wait_dtor(Binary *context_bin) { + (void)context_bin; + return 1; +} + +static void suspend_until_thr_prg(Process* p) +{ + Binary* state_bin; + ErtsFlxCtrWakeUpLaterInfo* info; + state_bin = erts_create_magic_binary(sizeof(ErtsFlxCtrWakeUpLaterInfo), + erts_flxctr_wait_dtor); + info = ERTS_MAGIC_BIN_DATA(state_bin); + info->process = p; + erts_refc_inctest(&state_bin->intern.refc, 1); + erts_suspend(p, ERTS_PROC_LOCK_MAIN, NULL); + erts_proc_inc_refc(p); + ERTS_VBUMP_ALL_REDS(p); + erts_schedule_thr_prgr_later_op(thr_prg_wake_up_later, state_bin, &info->later_op); +} + + +static ErtsFlxCtrDecentralizedCtrArray* +create_decentralized_ctr_array(ErtsAlcType_t alloc_type, Uint nr_of_counters) { + /* Allocate an ErtsFlxCtrDecentralizedCtrArray and make sure that + the array field is located at the start of a cache line */ + char* bytes = + erts_alloc(alloc_type, + sizeof(ErtsFlxCtrDecentralizedCtrArray) + + (sizeof(ErtsFlxCtrDecentralizedCtrArrayElem) * + ERTS_FLXCTR_DECENTRALIZED_NO_SLOTS) + + ERTS_CACHE_LINE_SIZE); + void* block_start = bytes; + int bytes_to_next_cacheline_border; + ErtsFlxCtrDecentralizedCtrArray* array; + int i, sched; + bytes = &bytes[offsetof(ErtsFlxCtrDecentralizedCtrArray, array)]; + bytes_to_next_cacheline_border = + ERTS_CACHE_LINE_SIZE - (((Uint)bytes) % ERTS_CACHE_LINE_SIZE); + array = (ErtsFlxCtrDecentralizedCtrArray*) + (&bytes[bytes_to_next_cacheline_border - + (int)offsetof(ErtsFlxCtrDecentralizedCtrArray, array)]); + ASSERT(((Uint)array->array) % ERTS_CACHE_LINE_SIZE == 0); + ASSERT(((Uint)array - (Uint)block_start) <= ERTS_CACHE_LINE_SIZE); + /* Initialize fields */ + erts_atomic_init_nob(&array->snapshot_status, ERTS_FLXCTR_SNAPSHOT_ONGOING); + for (sched = 0; sched < ERTS_FLXCTR_DECENTRALIZED_NO_SLOTS; sched++) { + for (i = 0; i < nr_of_counters; i++) { + erts_atomic_init_nob(&array->array[sched].counters[i], 0); + } + } + array->block_start = block_start; + return array; +} + +void erts_flxctr_setup(int decentralized_counter_groups) +{ + reader_groups_array_size = decentralized_counter_groups+1; +} + +void erts_flxctr_init(ErtsFlxCtr* c, + int is_decentralized, + Uint nr_of_counters, + ErtsAlcType_t alloc_type) +{ + ASSERT(nr_of_counters <= ERTS_FLXCTR_ATOMICS_PER_CACHE_LINE); + c->is_decentralized = is_decentralized; + c->nr_of_counters = nr_of_counters; + if (c->is_decentralized) { + ErtsFlxCtrDecentralizedCtrArray* array = + create_decentralized_ctr_array(alloc_type, nr_of_counters); + erts_atomic_set_nob(&array->snapshot_status, + ERTS_FLXCTR_SNAPSHOT_NOT_ONGOING); + erts_atomic_init_nob(&c->u.counters_ptr, (Sint)array); + ASSERT(((Uint)array->array) % ERTS_CACHE_LINE_SIZE == 0); + } else { + int i; + for (i = 0; i < nr_of_counters; i++) { + erts_atomic_init_nob(&c->u.counters[i], 0); + } + } +} + +void erts_flxctr_destroy(ErtsFlxCtr* c, ErtsAlcType_t type) +{ + if (c->is_decentralized) { + if (erts_flxctr_is_snapshot_ongoing(c)) { + ErtsFlxCtrDecentralizedCtrArray* array = + ERTS_FLXCTR_GET_CTR_ARRAY_PTR(c); + /* Try to delegate the resposibilty of freeing to + thr_prg_wake_up_and_count */ + Sint expected = ERTS_FLXCTR_SNAPSHOT_ONGOING; + if (expected != + erts_atomic_cmpxchg_mb(&array->snapshot_status, + ERTS_FLXCTR_SNAPSHOT_ONGOING_TP_THREAD_DO_FREE, + expected)) { + /* The delegation was unsuccessful which means that no + snapshot is ongoing anymore and the freeing needs + to be done here */ + ERTS_ASSERT(!erts_flxctr_is_snapshot_ongoing(c)); + erts_free(type, array->block_start); + } + } else { + erts_free(type, ERTS_FLXCTR_GET_CTR_ARRAY_PTR(c)->block_start); + } + } +} + +ErtsFlxCtrSnapshotResult +erts_flxctr_snapshot(ErtsFlxCtr* c, + ErtsAlcType_t alloc_type, + Process* p) +{ + if (c->is_decentralized) { + ErtsFlxCtrDecentralizedCtrArray* array = ERTS_FLXCTR_GET_CTR_ARRAY_PTR(c); + if (erts_flxctr_is_snapshot_ongoing(c)) { + /* Let the caller try again later */ + ErtsFlxCtrSnapshotResult res = + {.type = ERTS_FLXCTR_TRY_AGAIN_AFTER_TRAP}; + suspend_until_thr_prg(p); + return res; + } else { + Eterm* hp; + Binary* state_bin; + Eterm state_mref; + DecentralizedReadSnapshotInfo* info; + ErtsFlxCtrDecentralizedCtrArray* new_array = + create_decentralized_ctr_array(alloc_type, c->nr_of_counters); + int success = + ((Sint)array) == erts_atomic_cmpxchg_mb(&c->u.counters_ptr, + (Sint)new_array, + (Sint)array); + if (!success) { + /* Let the caller try again later */ + ErtsFlxCtrSnapshotResult res = + {.type = ERTS_FLXCTR_TRY_AGAIN_AFTER_TRAP}; + suspend_until_thr_prg(p); + erts_free(alloc_type, new_array->block_start); + return res; + } + /* Create binary with info about the operation that can be + sent to the caller and to a thread progress function */ + state_bin = + erts_create_magic_binary(sizeof(DecentralizedReadSnapshotInfo), + erts_flxctr_read_ctx_bin_dtor); + hp = HAlloc(p, ERTS_MAGIC_REF_THING_SIZE); + state_mref = erts_mk_magic_ref(&hp, &MSO(p), state_bin); + info = ERTS_MAGIC_BIN_DATA(state_bin); + info->alloc_type = alloc_type; + info->array = array; + info->next_array = new_array; + info->process = p; + info->nr_of_counters = c->nr_of_counters; + erts_proc_inc_refc(p); + erts_refc_inctest(&state_bin->intern.refc, 2); + erts_suspend(p, ERTS_PROC_LOCK_MAIN, NULL); + ERTS_VBUMP_ALL_REDS(p); + erts_schedule_thr_prgr_later_op(thr_prg_wake_up_and_count, + state_bin, + &info->later_op); + { + ErtsFlxCtrSnapshotResult res = { + .type = ERTS_FLXCTR_GET_RESULT_AFTER_TRAP, + .trap_resume_state = state_mref}; + return res; + } + } + } else { + ErtsFlxCtrSnapshotResult res; + int i; + res.type = ERTS_FLXCTR_DONE; + for (i = 0; i < c->nr_of_counters; i++){ + res.result[i] = erts_flxctr_read_centralized(c, i); + } + return res; + } +} + + +Sint erts_flxctr_get_snapshot_result_after_trap(Eterm result_holder, + Uint counter_nr) +{ + Binary* bin = erts_magic_ref2bin(result_holder); + DecentralizedReadSnapshotInfo* data = ERTS_MAGIC_BIN_DATA(bin);; + return data->result[counter_nr]; +} + +int erts_flxctr_is_snapshot_result(Eterm term) +{ + if (is_internal_magic_ref(term)) { + Binary* bin = erts_magic_ref2bin(term); + return ERTS_MAGIC_BIN_DESTRUCTOR(bin) == erts_flxctr_read_ctx_bin_dtor; + } else return 0; +} + +Sint erts_flxctr_read_approx(ErtsFlxCtr* c, + Uint counter_nr) +{ + if (c->is_decentralized) { + ErtsFlxCtrDecentralizedCtrArray* counter = ERTS_FLXCTR_GET_CTR_ARRAY_PTR(c); + Sint sum = 0; + int sched; + for (sched = 0; sched < ERTS_FLXCTR_DECENTRALIZED_NO_SLOTS; sched++) { + sum = sum + erts_atomic_read_nob(&counter->array[sched].counters[counter_nr]); + } + return sum; + } else { + return erts_flxctr_read_centralized(c, counter_nr); + } +} + +int erts_flxctr_is_snapshot_ongoing(ErtsFlxCtr* c) +{ + return c->is_decentralized && + (ERTS_FLXCTR_SNAPSHOT_NOT_ONGOING != + erts_atomic_read_acqb(&ERTS_FLXCTR_GET_CTR_ARRAY_PTR(c)->snapshot_status)); +} + +int erts_flxctr_suspend_until_thr_prg_if_snapshot_ongoing(ErtsFlxCtr* c, Process* p) +{ + if (erts_flxctr_is_snapshot_ongoing(c)) { + suspend_until_thr_prg(p); + return 1; + } else { + return 0; + } +} + +void erts_flxctr_reset(ErtsFlxCtr* c, + Uint counter_nr) +{ + if (c->is_decentralized) { + int sched; + ErtsFlxCtrDecentralizedCtrArray* counter = + ERTS_FLXCTR_GET_CTR_ARRAY_PTR(c); + for (sched = 0; sched < ERTS_FLXCTR_DECENTRALIZED_NO_SLOTS; sched++) { + erts_atomic_set_nob(&counter->array[sched].counters[counter_nr], 0); + } + } else { + erts_atomic_set_nob(&c->u.counters[counter_nr], 0); + } +} + + +void erts_flxctr_set_slot(int group) { + ErtsSchedulerData *esdp = erts_get_scheduler_data(); + esdp->flxctr_slot_no = group; +} diff --git a/erts/emulator/beam/erl_flxctr.h b/erts/emulator/beam/erl_flxctr.h new file mode 100644 index 0000000000..5cab02b9eb --- /dev/null +++ b/erts/emulator/beam/erl_flxctr.h @@ -0,0 +1,406 @@ +/* + * %CopyrightBegin% + * + * Copyright Ericsson AB 2019. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * %CopyrightEnd% + */ + +/** + * @file erl_flxctr.h + * + * @brief This file contains the API of a flexible counter. The + * counter can be configured during its initialization to be + * centralized or decentralized. The centralized configuration makes + * it possible to read the counter value extremely efficiently, but + * updates of the counter value can easily cause contention. The + * decentralized configuration has the reverse trade-off (i.e., + * updates are efficient and scalable but reading the counter value is + * slow and may cause contention). + * + * @author Kjell Winblad + */ + +#ifndef ERL_FLXCTR_H__ +#define ERL_FLXCTR_H__ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif +#include "sys.h" +#include "erl_vm.h" +#include "global.h" +#include "error.h" +#include "bif.h" +#include "big.h" +#include "erl_binary.h" +#include "bif.h" +#include <stddef.h> + +/* Public Interface */ + +#define ERTS_MAX_FLXCTR_GROUPS 256 +#define ERTS_FLXCTR_ATOMICS_PER_CACHE_LINE (ERTS_CACHE_LINE_SIZE / sizeof(erts_atomic_t)) + +typedef struct { + int nr_of_counters; + int is_decentralized; + union { + erts_atomic_t counters_ptr; + erts_atomic_t counters[1]; + } u; +} ErtsFlxCtr; + +#define ERTS_FLXCTR_NR_OF_EXTRA_BYTES(NR_OF_COUNTERS) \ + ((NR_OF_COUNTERS-1) * sizeof(erts_atomic_t)) + +/* Called by early_init */ +void erts_flxctr_setup(int decentralized_counter_groups); + +/** + * @brief Initializes an ErtsFlxCtr. The macro + * ERTS_FLXCTR_NR_OF_EXTRA_BYTES should be used to determine how much + * extra space that needs to be allocated directly after the + * ErtsFlxCtr when is_decentralized is set to zero. Each ErtsFlxCtr + * instance may contain up to ERTS_FLXCTR_ATOMICS_PER_CACHE_LINE + * counters. These counters are numbered from zero to + * (ERTS_FLXCTR_ATOMICS_PER_CACHE_LINE-1). Most of the functions in + * this module take a parameter named counter_nr that controls which + * of the ERTS_FLXCTR_ATOMICS_PER_CACHE_LINE counters in the given + * ErtsFlxCtr that should be operated on. + * + * @param c The counter to initialize + * @param is_decentralized Non-zero value to make c decentralized + * @param nr_of_counters The number of counters included in c + * (max ERTS_FLXCTR_ATOMICS_PER_CACHE_LINE) + * @param alloc_type + */ +void erts_flxctr_init(ErtsFlxCtr* c, + int is_decentralized, + Uint nr_of_counters, + ErtsAlcType_t alloc_type); + +/** + * @brief Destroys an initialized counter. + * + * @param c The counter that should be destroyed + * @param alloc_type The allocation type (needs to be the same as the + * one passed to erts_flxctr_init when c was + * initialized) + */ +void erts_flxctr_destroy(ErtsFlxCtr* c, ErtsAlcType_t alloc_type); + +/** + * @brief Adds to_add to the counter with counter_nr in c + * + * @param c the ErtsFlxCtr to operate on + * @param counter_nr The number of the counter in c to modify + * @param to_add The amount that should be added to the specified counter + */ +ERTS_GLB_INLINE +void erts_flxctr_add(ErtsFlxCtr* c, + Uint counter_nr, + int to_add); + +/** + * @brief Increases the specified counter by 1 + * + * @param c The ErtsFlxCtr instance to operate on + * @param counter_nr The number of the counter within c to operate on + */ +ERTS_GLB_INLINE +void erts_flxctr_inc(ErtsFlxCtr* c, + Uint counter_nr); + +/** + * @brief Decreases the specified counter by 1 + */ +ERTS_GLB_INLINE +void erts_flxctr_dec(ErtsFlxCtr* c, + Uint counter_nr); + +/** + * @brief This function tries to return the current value of the + * specified counter but may return an incorrect result if the counter + * is decentralized and other threads are accessing the counter + * concurrently. + * + * @param c The ErtsFlxCtr instance to operate on + * @param counter_nr The number of the counter within c to operate on + * + * @return A snapshot of the specifed counter if c is centralized or a + * possibly incorrect estimate of the counter value if c is + * decentralized + */ +Sint erts_flxctr_read_approx(ErtsFlxCtr* c, + Uint counter_nr); + +/** + * @brief This function can only be used together with an ErtsFlxCtr + * that is configured to be centralized. The function increments the + * specified counter by 1 and returns the value of the counter after + * the increment. + */ +ERTS_GLB_INLINE +Sint erts_flxctr_inc_read_centralized(ErtsFlxCtr* c, + Uint counter_nr); + +/** + * @brief This function can only be used together with a ErtsFlxCtr + * that is configured to be centralized. The function decrements the + * specified counter by 1 and returns the value of the counter after + * the operation. + */ +ERTS_GLB_INLINE +Sint erts_flxctr_dec_read_centralized(ErtsFlxCtr* c, + Uint counter_nr); + +/** + * @brief This function can only be used together with an ErtsFlxCtr + * that is configured to be centralized. The function returns the + * current value of the specified counter. + */ +ERTS_GLB_INLINE +Sint erts_flxctr_read_centralized(ErtsFlxCtr* c, + Uint counter_nr); + + +typedef enum { + ERTS_FLXCTR_TRY_AGAIN_AFTER_TRAP, + ERTS_FLXCTR_DONE, + ERTS_FLXCTR_GET_RESULT_AFTER_TRAP +} ErtsFlxctrSnapshotResultType; + +typedef struct { + ErtsFlxctrSnapshotResultType type; + Eterm trap_resume_state; + Sint result[ERTS_FLXCTR_ATOMICS_PER_CACHE_LINE]; +} ErtsFlxCtrSnapshotResult; + +/** + * @brief This function initiates an atomic snapshot of an ErtsFlxCtr + * to read out the values of one or more of the counters that are + * stored in the given ErtsFlxCtr. The caller needs to perform + * different actions after the return of this function depending on + * the value of the type field in the returned struct: + * + * - The caller needs to trap and try again after the trap if the + * return value has the type ERTS_FLXCTR_TRY_AGAIN_AFTER_TRAP. + * + * - The caller can get the result directly from the result field of + * the returned struct if the return value has the type + * ERTS_FLXCTR_DONE. The value at index i in the result field + * correspond to counter number i. + * + * - Finally, if the return value has the type + * ERTS_FLXCTR_GET_RESULT_AFTER_TRAP, then the caller needs to save + * the value of the field trap_resume_state from the returned struct + * and trap. After the trap, the values of the counters can be + * obtained by using the function + * erts_flxctr_get_snapshot_result_after_trap. Note that the + * function erts_flxctr_is_snapshot_result can be used to check if a + * value is obtained from the trap_resume_state field in the + * returned struct (this can be useful when the calling function + * wakes up again after the trap). + * + * The snapshot operation that is initiated by this function should be + * considered to be ongoing from the issuing of this function until a + * struct with the type field set to ERTS_FLXCTR_DONE has been + * returned from the function or until the caller of this function has + * woken up after trapping. + * + * @param c The ErtsFlxCtr that the snapshot shall be taken from + * @param alloc_type The allocation type (needs to be the same as the + * type passed to erts_flxctr_init when c was + * initialized) + * @param p The Erlang process that is doing the call + * + * @return See the description above + * + */ +ErtsFlxCtrSnapshotResult +erts_flxctr_snapshot(ErtsFlxCtr* c, + ErtsAlcType_t alloc_type, + Process* p); + +/** + * @brief Checks if the parameter term is a snapshot result (i.e., + * something obtained from the trap_resume_state field of an + * ErtsFlxCtrSnapshotResult struct that has been returned from + * erts_flxctr_snapshot). + * + * @param term The term to check + * + * @return A nonzero value iff the term is a snapshot result + */ +int erts_flxctr_is_snapshot_result(Eterm term); + +/** + * @brief Returns the result of a snapshot for a counter given a + * snapshot result returned by a call to erts_flxctr_snapshot (i.e., + * the value stored in the trap_resume_state field of a struct + * returned by erts_flxctr_snapshot). The caller needs to trap between + * the return of erts_flxctr_snapshot and the call to this function. + */ +Sint erts_flxctr_get_snapshot_result_after_trap(Eterm trap_resume_state, + Uint counter_nr); + +/** + * @brief Resets the specified counter to 0. This function is unsafe + * to call while a snapshot operation may be active (initiated with + * the erts_flxctr_snapshot function). + */ +void erts_flxctr_reset(ErtsFlxCtr* c, + Uint counter_nr); + +/** + * @brief Checks if a snapshot operation is active (snapshots are + * initiated with the erts_flxctr_snapshot function). + * + * @return nonzero value iff a snapshot was active at some point + * between the invocation and return of the function + */ +int erts_flxctr_is_snapshot_ongoing(ErtsFlxCtr* c); + +/** + * @brief This function checks if a snapshot operation is ongoing + * (snapshots are initiated with the erts_flxctr_snapshot function) + * and suspend the given process until thread progress has happened if + * it detected an ongoing snapshot operation. The caller needs to trap + * if a non-zero value is returned. + * + * @param c The ErtsFlxCtr to check + * @param p The calling process + * + * @return nonzero value if the given process has got suspended + */ +int erts_flxctr_suspend_until_thr_prg_if_snapshot_ongoing(ErtsFlxCtr* c, Process* p); + +/* End: Public Interface */ + +/* Internal Declarations */ + +#define ERTS_FLXCTR_GET_CTR_ARRAY_PTR(C) \ + ((ErtsFlxCtrDecentralizedCtrArray*) erts_atomic_read_acqb(&(C)->u.counters_ptr)) +#define ERTS_FLXCTR_GET_CTR_PTR(C, SCHEDULER_ID, COUNTER_ID) \ + &(ERTS_FLXCTR_GET_CTR_ARRAY_PTR(C))->array[SCHEDULER_ID].counters[COUNTER_ID] + + +typedef union { + erts_atomic_t counters[ERTS_FLXCTR_ATOMICS_PER_CACHE_LINE]; + char pad[ERTS_CACHE_LINE_SIZE]; +} ErtsFlxCtrDecentralizedCtrArrayElem; + +typedef struct ErtsFlxCtrDecentralizedCtrArray { + void* block_start; + erts_atomic_t snapshot_status; + ErtsFlxCtrDecentralizedCtrArrayElem array[]; +} ErtsFlxCtrDecentralizedCtrArray; + +void erts_flxctr_set_slot(int group); + +ERTS_GLB_INLINE +int erts_flxctr_get_slot_index(void); + +/* End: Internal Declarations */ + + +/* Implementation of inlined functions */ + +#if ERTS_GLB_INLINE_INCL_FUNC_DEF + +ERTS_GLB_INLINE +int erts_flxctr_get_slot_index(void) +{ + ErtsSchedulerData *esdp = erts_get_scheduler_data(); + ASSERT(esdp && !ERTS_SCHEDULER_IS_DIRTY(esdp)); + ASSERT(esdp->flxctr_slot_no > 0); + return esdp->flxctr_slot_no; +} + +ERTS_GLB_INLINE +void erts_flxctr_add(ErtsFlxCtr* c, + Uint counter_nr, + int to_add) +{ + ASSERT(counter_nr < c->nr_of_counters); + if (c->is_decentralized) { + erts_atomic_add_nob(ERTS_FLXCTR_GET_CTR_PTR(c, + erts_flxctr_get_slot_index(), + counter_nr), + to_add); + } else { + erts_atomic_add_nob(&c->u.counters[counter_nr], to_add); + } +} + +ERTS_GLB_INLINE +void erts_flxctr_inc(ErtsFlxCtr* c, + Uint counter_nr) +{ + ASSERT(counter_nr < c->nr_of_counters); + if (c->is_decentralized) { + erts_atomic_inc_nob(ERTS_FLXCTR_GET_CTR_PTR(c, + erts_flxctr_get_slot_index(), + counter_nr)); + } else { + erts_atomic_inc_read_nob(&c->u.counters[counter_nr]); + } +} + +ERTS_GLB_INLINE +void erts_flxctr_dec(ErtsFlxCtr* c, + Uint counter_nr) +{ + ASSERT(counter_nr < c->nr_of_counters); + if (c->is_decentralized) { + erts_atomic_dec_nob(ERTS_FLXCTR_GET_CTR_PTR(c, + erts_flxctr_get_slot_index(), + counter_nr)); + } else { + erts_atomic_dec_nob(&c->u.counters[counter_nr]); + } +} + +ERTS_GLB_INLINE +Sint erts_flxctr_inc_read_centralized(ErtsFlxCtr* c, + Uint counter_nr) +{ + ASSERT(counter_nr < c->nr_of_counters); + ASSERT(!c->is_decentralized); + return erts_atomic_inc_read_nob(&c->u.counters[counter_nr]); +} + +ERTS_GLB_INLINE +Sint erts_flxctr_dec_read_centralized(ErtsFlxCtr* c, + Uint counter_nr) +{ + ASSERT(counter_nr < c->nr_of_counters); + ASSERT(!c->is_decentralized); + return erts_atomic_dec_read_nob(&c->u.counters[counter_nr]); +} + +ERTS_GLB_INLINE +Sint erts_flxctr_read_centralized(ErtsFlxCtr* c, + Uint counter_nr) +{ + ASSERT(counter_nr < c->nr_of_counters); + ASSERT(!c->is_decentralized); + return erts_atomic_read_nob(&((erts_atomic_t*)(c->u.counters))[counter_nr]); +} + +#endif /* #if ERTS_GLB_INLINE_INCL_FUNC_DEF */ + +#endif /* ERL_FLXCTR_H__ */ diff --git a/erts/emulator/beam/erl_init.c b/erts/emulator/beam/erl_init.c index 12750b9aa6..547e4064a2 100644 --- a/erts/emulator/beam/erl_init.c +++ b/erts/emulator/beam/erl_init.c @@ -593,6 +593,7 @@ void erts_usage(void) erts_fprintf(stderr, " no_time_warp|single_time_warp|multi_time_warp\n"); erts_fprintf(stderr, "-d don't write a crash dump for internally detected errors\n"); erts_fprintf(stderr, " (halt(String) will still produce a crash dump)\n"); + erts_fprintf(stderr, "-dcg set the limit for the number of decentralized counter groups\n"); erts_fprintf(stderr, "-fn[u|a|l] Control how filenames are interpreted\n"); erts_fprintf(stderr, "-hms size set minimum heap size in words (default %d)\n", H_DEFAULT_SIZE); @@ -785,6 +786,8 @@ early_init(int *argc, char **argv) /* int dirty_io_scheds; int max_reader_groups; int reader_groups; + int max_decentralized_counter_groups; + int decentralized_counter_groups; char envbuf[21]; /* enough for any 64-bit integer */ size_t envbufsz; @@ -804,7 +807,8 @@ early_init(int *argc, char **argv) /* erts_initialized = 0; - erts_pre_early_init_cpu_topology(&max_reader_groups, + erts_pre_early_init_cpu_topology(&max_decentralized_counter_groups, + &max_reader_groups, &ncpu, &ncpuonln, &ncpuavail); @@ -865,6 +869,24 @@ early_init(int *argc, char **argv) /* } if (argv[i][0] == '-') { switch (argv[i][1]) { + case 'd': { + char *sub_param = argv[i]+2; + if (has_prefix("cg", sub_param)) { + char *arg = get_arg(sub_param+2, argv[i+1], &i); + if (sscanf(arg, "%d", &max_decentralized_counter_groups) != 1) { + erts_fprintf(stderr, + "bad decentralized counter groups limit: %s\n", arg); + erts_usage(); + } + if (max_decentralized_counter_groups < 0) { + erts_fprintf(stderr, + "bad decentralized counter groups limit: %d\n", + max_decentralized_counter_groups); + erts_usage(); + } + } + break; + } case 'r': { char *sub_param = argv[i]+2; if (has_prefix("g", sub_param)) { @@ -1186,8 +1208,10 @@ early_init(int *argc, char **argv) /* erts_early_init_cpu_topology(no_schedulers, &max_main_threads, max_reader_groups, - &reader_groups); - + &reader_groups, + max_decentralized_counter_groups, + &decentralized_counter_groups); + erts_flxctr_setup(decentralized_counter_groups); { erts_thr_late_init_data_t elid = ERTS_THR_LATE_INIT_DATA_DEF_INITER; elid.mem.std.alloc = ethr_std_alloc; diff --git a/erts/emulator/beam/erl_message.c b/erts/emulator/beam/erl_message.c index 2a0fb9e2aa..6645341512 100644 --- a/erts/emulator/beam/erl_message.c +++ b/erts/emulator/beam/erl_message.c @@ -1590,6 +1590,9 @@ void erts_factory_undo(ErtsHeapFactory* factory) factory->message->hfrag.next = factory->heap_frags; else factory->message->data.heap_frag = factory->heap_frags; + /* Set the message to NIL in order for this message not to be + treated as a distributed message by the cleanup_messages logic */ + factory->message->m[0] = NIL; erts_cleanup_messages(factory->message); break; case FACTORY_TMP: diff --git a/erts/emulator/beam/erl_nif.c b/erts/emulator/beam/erl_nif.c index deaf35c2a1..1fbe362330 100644 --- a/erts/emulator/beam/erl_nif.c +++ b/erts/emulator/beam/erl_nif.c @@ -1344,11 +1344,18 @@ unsigned char* enif_make_new_binary(ErlNifEnv* env, size_t size, int enif_term_to_binary(ErlNifEnv *dst_env, ERL_NIF_TERM term, ErlNifBinary *bin) { - Sint size; + Uint size; byte *bp; Binary* refbin; - size = erts_encode_ext_size(term); + switch (erts_encode_ext_size(term, &size)) { + case ERTS_EXT_SZ_SYSTEM_LIMIT: + return 0; /* system limit */ + case ERTS_EXT_SZ_YIELD: + ERTS_INTERNAL_ERROR("Unexpected yield"); + case ERTS_EXT_SZ_OK: + break; + } if (!enif_alloc_binary(size, bin)) return 0; diff --git a/erts/emulator/beam/erl_node_tables.c b/erts/emulator/beam/erl_node_tables.c index 49dea8919b..4eb6c3e214 100644 --- a/erts/emulator/beam/erl_node_tables.c +++ b/erts/emulator/beam/erl_node_tables.c @@ -805,8 +805,7 @@ static int node_table_cmp(void *venp1, void *venp2) { return ((((ErlNode *) venp1)->sysname == ((ErlNode *) venp2)->sysname) && - ((((ErlNode *) venp1)->creation == ((ErlNode *) venp2)->creation) || - (((ErlNode *) venp1)->creation == 0 || ((ErlNode *) venp2)->creation == 0)) + ((((ErlNode *) venp1)->creation == ((ErlNode *) venp2)->creation)) ? 0 : 1); } @@ -977,7 +976,7 @@ static void print_node(void *venp, void *vpndp) if(pndp->sysname == NIL) { erts_print(pndp->to, pndp->to_arg, "Name: %T ", enp->sysname); } - erts_print(pndp->to, pndp->to_arg, " %u", enp->creation); + erts_print(pndp->to, pndp->to_arg, " %d", enp->creation); #ifdef DEBUG erts_print(pndp->to, pndp->to_arg, " (refc=%ld)", erts_refc_read(&enp->refc, 0)); @@ -1020,7 +1019,7 @@ void erts_print_node_info(fmtfn_t to, /* ----------------------------------------------------------------------- */ void -erts_set_this_node(Eterm sysname, Uint32 creation) +erts_set_this_node(Eterm sysname, Uint creation) { ERTS_LC_ASSERT(erts_thr_progress_is_blocking()); ASSERT(2 <= de_refc_read(erts_this_dist_entry, 2)); diff --git a/erts/emulator/beam/erl_node_tables.h b/erts/emulator/beam/erl_node_tables.h index c434926142..aa8af12555 100644 --- a/erts/emulator/beam/erl_node_tables.h +++ b/erts/emulator/beam/erl_node_tables.h @@ -95,6 +95,7 @@ enum dist_entry_state { struct ErtsDistOutputBuf_ { #ifdef DEBUG Uint dbg_pattern; + byte *ext_startp; byte *alloc_endp; #endif ErtsDistOutputBuf *next; @@ -258,7 +259,7 @@ void erts_set_dist_entry_pending(DistEntry *); void erts_set_dist_entry_connected(DistEntry *, Eterm, Uint); ErlNode *erts_find_or_insert_node(Eterm, Uint32, Eterm); void erts_schedule_delete_node(ErlNode *); -void erts_set_this_node(Eterm, Uint32); +void erts_set_this_node(Eterm, Uint); Uint erts_node_table_size(void); void erts_init_node_tables(int); void erts_node_table_info(fmtfn_t, void *); diff --git a/erts/emulator/beam/erl_proc_sig_queue.c b/erts/emulator/beam/erl_proc_sig_queue.c index 4e9f177e51..f58a606d57 100644 --- a/erts/emulator/beam/erl_proc_sig_queue.c +++ b/erts/emulator/beam/erl_proc_sig_queue.c @@ -1019,6 +1019,8 @@ send_gen_exit_signal(Process *c_p, Eterm from_tag, ref_sz = size_object(ref); hsz += ref_sz; + reason_sz = 0; /* Set to silence gcc warning */ + /* The reason was part of the control message, just use copy it into the xsigd */ if (is_value(reason)) { diff --git a/erts/emulator/beam/erl_process.c b/erts/emulator/beam/erl_process.c index 9e662632b4..1f6adb98ef 100644 --- a/erts/emulator/beam/erl_process.c +++ b/erts/emulator/beam/erl_process.c @@ -12095,6 +12095,7 @@ erts_proc_exit_handle_dist_monitor(ErtsMonitor *mon, void *vctxt, Sint reds) Eterm watched; Uint watcher_sz, ref_sz; ErtsHeapFactory factory; + Sint reds_consumed = 0; ASSERT(erts_monitor_is_target(mon) && mon->type == ERTS_MON_TYPE_DIST_PROC); @@ -12139,16 +12140,18 @@ erts_proc_exit_handle_dist_monitor(ErtsMonitor *mon, void *vctxt, Sint reds) watched, ref, reason); + reds_consumed = reds - (ctx.reds / TERM_TO_BINARY_LOOP_FACTOR); switch (code) { case ERTS_DSIG_SEND_CONTINUE: + case ERTS_DSIG_SEND_YIELD: erts_set_gc_state(c_p, 0); ctxt->dist_state = erts_dsend_export_trap_context(c_p, &ctx); - /* fall-through */ - case ERTS_DSIG_SEND_YIELD: + reds_consumed = reds; /* force yield */ break; case ERTS_DSIG_SEND_OK: break; case ERTS_DSIG_SEND_TOO_LRG: + erts_kill_dist_connection(dep, dist->connection_id); erts_set_gc_state(c_p, 1); break; default: @@ -12164,7 +12167,7 @@ erts_proc_exit_handle_dist_monitor(ErtsMonitor *mon, void *vctxt, Sint reds) erts_monitor_release(mon); else erts_monitor_release_both(mdp); - return reds - (ctx.reds / TERM_TO_BINARY_LOOP_FACTOR); + return reds_consumed; } int @@ -12351,6 +12354,7 @@ erts_proc_exit_handle_dist_link(ErtsLink *lnk, void *vctxt, Sint reds) ErtsLink *dlnk; ErtsLinkData *ldp = NULL; ErtsHeapFactory factory; + Sint reds_consumed = 0; ASSERT(lnk->type == ERTS_LNK_TYPE_DIST_PROC); dlnk = erts_link_to_other(lnk, &ldp); @@ -12387,16 +12391,18 @@ erts_proc_exit_handle_dist_link(ErtsLink *lnk, void *vctxt, Sint reds) item, reason, SEQ_TRACE_TOKEN(c_p)); + reds_consumed = reds - (ctx.reds / TERM_TO_BINARY_LOOP_FACTOR); switch (code) { + case ERTS_DSIG_SEND_YIELD: case ERTS_DSIG_SEND_CONTINUE: erts_set_gc_state(c_p, 0); ctxt->dist_state = erts_dsend_export_trap_context(c_p, &ctx); - /* fall-through */ - case ERTS_DSIG_SEND_YIELD: + reds_consumed = reds; /* force yield */ break; case ERTS_DSIG_SEND_OK: break; case ERTS_DSIG_SEND_TOO_LRG: + erts_kill_dist_connection(dep, dist->connection_id); erts_set_gc_state(c_p, 1); break; default: @@ -12412,7 +12418,7 @@ erts_proc_exit_handle_dist_link(ErtsLink *lnk, void *vctxt, Sint reds) erts_link_release_both(ldp); else if (lnk) erts_link_release(lnk); - return reds - (ctx.reds / TERM_TO_BINARY_LOOP_FACTOR); + return reds_consumed; } int @@ -12893,7 +12899,9 @@ restart: switch (result) { case ERTS_DSIG_SEND_OK: + break; case ERTS_DSIG_SEND_TOO_LRG: /*SEND_SYSTEM_LIMIT*/ + erts_kill_dist_connection(ctx->dep, ctx->connection_id); break; case ERTS_DSIG_SEND_YIELD: /*SEND_YIELD_RETURN*/ case ERTS_DSIG_SEND_CONTINUE: { /*SEND_YIELD_CONTINUE*/ diff --git a/erts/emulator/beam/erl_process.h b/erts/emulator/beam/erl_process.h index 711b73417d..6118c671ee 100644 --- a/erts/emulator/beam/erl_process.h +++ b/erts/emulator/beam/erl_process.h @@ -641,6 +641,7 @@ struct ErtsSchedulerData_ { ErtsSchedType type; Uint no; /* Scheduler number for normal schedulers */ Uint dirty_no; /* Scheduler number for dirty schedulers */ + int flxctr_slot_no; /* slot nr when a flxctr is used */ struct enif_environment_t *current_nif; Process *dirty_shadow_process; Port *current_port; @@ -1847,6 +1848,7 @@ int erts_resume_processes(ErtsProcList *); void erts_deep_process_dump(fmtfn_t, void *); Eterm erts_get_reader_groups_map(Process *c_p); +Eterm erts_get_decentralized_counter_groups_map(Process *c_p); Eterm erts_debug_reader_groups_map(Process *c_p, int groups); Uint erts_debug_nbalance(void); diff --git a/erts/emulator/beam/erl_trace.c b/erts/emulator/beam/erl_trace.c index ae7084b7f4..c85a7df5ec 100644 --- a/erts/emulator/beam/erl_trace.c +++ b/erts/emulator/beam/erl_trace.c @@ -635,9 +635,11 @@ write_sys_msg_to_port(Eterm unused_to, Eterm message) { byte *buffer; byte *ptr; - unsigned size; + Uint size; + + if (erts_encode_ext_size(message, &size) != ERTS_EXT_SZ_OK) + erts_exit(ERTS_ERROR_EXIT, "Internal error: System limit\n"); - size = erts_encode_ext_size(message); buffer = (byte *) erts_alloc(ERTS_ALC_T_TMP, size); ptr = buffer; diff --git a/erts/emulator/beam/external.c b/erts/emulator/beam/external.c index 471c1c3938..ec67ab2aed 100644 --- a/erts/emulator/beam/external.c +++ b/erts/emulator/beam/external.c @@ -51,17 +51,18 @@ #define MAX_STRING_LEN 0xffff -/* - * MAX value for the creation field in pid, port and reference - * for the old PID_EXT, PORT_EXT, REFERENCE_EXT and NEW_REFERENCE_EXT. - * Older nodes (OTP 19-21) will send us these so we must be able to decode them. - * - * From OTP 22 DFLAG_BIG_CREATION is mandatory so this node will always - * encode with new big 32-bit creations using NEW_PID_EXT, NEW_PORT_EXT - * and NEWER_REFERENCE_EXT. +/* MAX value for the creation field in pid, port and reference + for the local node and for the current external format. + + Larger creation values than this are allowed in external pid, port and refs + encoded with NEW_PID_EXT, NEW_PORT_EXT and NEWER_REFERENCE_EXT. + The point here is to prepare for future upgrade to 32-bit creation. + OTP-19 (erts-8.0) can handle big creation values from other (newer) nodes, + but do not use big creation values for the local node yet, + as we still may have to communicate with older nodes. */ -#define ERTS_MAX_TINY_CREATION (3) -#define is_tiny_creation(Cre) ((unsigned)(Cre) <= ERTS_MAX_TINY_CREATION) +#define ERTS_MAX_LOCAL_CREATION (3) +#define is_valid_creation(Cre) ((unsigned)(Cre) <= ERTS_MAX_LOCAL_CREATION) #undef ERTS_DEBUG_USE_DIST_SEP #ifdef DEBUG @@ -111,13 +112,13 @@ static byte* dec_pid(ErtsDistExternal *, ErtsHeapFactory*, byte*, Eterm*, byte t static Sint decoded_size(byte *ep, byte* endp, int internal_tags, struct B2TContext_t*); static BIF_RETTYPE term_to_binary_trap_1(BIF_ALIST_1); -static Eterm erts_term_to_binary_int(Process* p, Eterm Term, int level, Uint flags, - Binary *context_b); +static Eterm erts_term_to_binary_int(Process* p, Eterm Term, Eterm opts, int level, + Uint flags, Binary *context_b); static Uint encode_size_struct2(ErtsAtomCacheMap *, Eterm, unsigned); struct TTBSizeContext_; -static int encode_size_struct_int(struct TTBSizeContext_*, ErtsAtomCacheMap *acmp, Eterm obj, - unsigned dflags, Sint *reds, Uint *res); +static ErtsExtSzRes encode_size_struct_int(struct TTBSizeContext_*, ErtsAtomCacheMap *acmp, + Eterm obj, unsigned dflags, Sint *reds, Uint *res); static Export binary_to_term_trap_export; static BIF_RETTYPE binary_to_term_trap_1(BIF_ALIST_1); @@ -602,49 +603,50 @@ done: return reds < 0 ? 0 : reds; } -int erts_encode_dist_ext_size(Eterm term, Uint32 flags, ErtsAtomCacheMap *acmp, - Uint* szp) +ErtsExtSzRes +erts_encode_dist_ext_size(Eterm term, Uint32 flags, ErtsAtomCacheMap *acmp, Uint* szp) { Uint sz; - if (encode_size_struct_int(NULL, acmp, term, flags, NULL, &sz)) { - return -1; - } else { + ErtsExtSzRes res = encode_size_struct_int(NULL, acmp, term, flags, NULL, &sz); + if (res == ERTS_EXT_SZ_OK) { #ifndef ERTS_DEBUG_USE_DIST_SEP if (!(flags & (DFLAG_DIST_HDR_ATOM_CACHE | DFLAG_NO_MAGIC))) #endif sz++ /* VERSION_MAGIC */; *szp += sz; - return 0; } + return res; } -int erts_encode_dist_ext_size_int(Eterm term, ErtsDSigSendContext *ctx, Uint* szp) +ErtsExtSzRes +erts_encode_dist_ext_size_ctx(Eterm term, ErtsDSigSendContext *ctx, Uint* szp) { Uint sz; - if (encode_size_struct_int(&ctx->u.sc, ctx->acmp, term, ctx->flags, &ctx->reds, &sz)) { - return -1; - } else { + ErtsExtSzRes res = encode_size_struct_int(&ctx->u.sc, ctx->acmp, term, + ctx->flags, &ctx->reds, &sz); + if (res == ERTS_EXT_SZ_OK) { #ifndef ERTS_DEBUG_USE_DIST_SEP if (!(ctx->flags & (DFLAG_DIST_HDR_ATOM_CACHE | DFLAG_NO_MAGIC))) #endif sz++ /* VERSION_MAGIC */; *szp += sz; - return 0; } + return res; } -Uint erts_encode_ext_size(Eterm term) +ErtsExtSzRes erts_encode_ext_size_2(Eterm term, unsigned dflags, Uint *szp) { - return encode_size_struct2(NULL, term, TERM_TO_BINARY_DFLAGS) - + 1 /* VERSION_MAGIC */; + ErtsExtSzRes res = encode_size_struct_int(NULL, NULL, term, dflags, + NULL, szp); + (*szp)++ /* VERSION_MAGIC */; + return res; } -Uint erts_encode_ext_size_2(Eterm term, unsigned dflags) +ErtsExtSzRes erts_encode_ext_size(Eterm term, Uint *szp) { - return encode_size_struct2(NULL, term, dflags) - + 1 /* VERSION_MAGIC */; + return erts_encode_ext_size_2(term, TERM_TO_BINARY_DFLAGS, szp); } Uint erts_encode_ext_size_ets(Eterm term) @@ -699,6 +701,7 @@ dist_ext_size(ErtsDistExternal *edep) } else { sz -= sizeof(ErtsAtomTranslationTable); } + ASSERT(sz % 4 == 0); return sz; } @@ -706,8 +709,9 @@ Uint erts_dist_ext_size(ErtsDistExternal *edep) { Uint sz = dist_ext_size(edep); + sz += 4; /* may need to pad to 8-byte-align ErtsDistExternalData */ sz += edep->data[0].frag_id * sizeof(ErtsDistExternalData); - return sz + ERTS_EXTRA_DATA_ALIGN_SZ(sz); + return sz; } Uint @@ -749,6 +753,8 @@ erts_make_dist_ext_copy(ErtsDistExternal *edep, ErtsDistExternal *new_edep) erts_ref_dist_entry(new_edep->dep); ep += dist_ext_sz; + ep += (UWord)ep & 4; /* 8-byte alignment for ErtsDistExternalData */ + ASSERT((UWord)ep % 8 == 0); new_edep->data = (ErtsDistExternalData*)ep; sys_memzero(new_edep->data, sizeof(ErtsDistExternalData) * edep->data->frag_id); @@ -1248,9 +1254,22 @@ static BIF_RETTYPE term_to_binary_trap_1(BIF_ALIST_1) { Eterm *tp = tuple_val(BIF_ARG_1); Eterm Term = tp[1]; - Eterm bt = tp[2]; + Eterm Opts = tp[2]; + Eterm bt = tp[3]; Binary *bin = erts_magic_ref2bin(bt); - Eterm res = erts_term_to_binary_int(BIF_P, Term, 0, 0,bin); + Eterm res = erts_term_to_binary_int(BIF_P, Term, Opts, 0, 0,bin); + if (is_non_value(res)) { + if (erts_set_gc_state(BIF_P, 1) + || MSO(BIF_P).overhead > BIN_VHEAP_SZ(BIF_P)) { + ERTS_VBUMP_ALL_REDS(BIF_P); + } + if (Opts == am_undefined) + ERTS_BIF_ERROR_TRAPPED1(BIF_P, SYSTEM_LIMIT, + bif_export[BIF_term_to_binary_1], Term); + else + ERTS_BIF_ERROR_TRAPPED2(BIF_P, SYSTEM_LIMIT, + bif_export[BIF_term_to_binary_2], Term, Opts); + } if (is_tuple(res)) { ASSERT(BIF_P->flags & F_DISABLE_GC); BIF_TRAP1(&term_to_binary_trap_export,BIF_P,res); @@ -1267,7 +1286,12 @@ HIPE_WRAPPER_BIF_DISABLE_GC(term_to_binary, 1) BIF_RETTYPE term_to_binary_1(BIF_ALIST_1) { - Eterm res = erts_term_to_binary_int(BIF_P, BIF_ARG_1, 0, TERM_TO_BINARY_DFLAGS, NULL); + Eterm res = erts_term_to_binary_int(BIF_P, BIF_ARG_1, am_undefined, + 0, TERM_TO_BINARY_DFLAGS, NULL); + if (is_non_value(res)) { + ASSERT(!(BIF_P->flags & F_DISABLE_GC)); + BIF_ERROR(BIF_P, SYSTEM_LIMIT); + } if (is_tuple(res)) { erts_set_gc_state(BIF_P, 0); BIF_TRAP1(&term_to_binary_trap_export,BIF_P,res); @@ -1326,7 +1350,12 @@ BIF_RETTYPE term_to_binary_2(BIF_ALIST_2) goto error; } - res = erts_term_to_binary_int(p, Term, level, flags, NULL); + res = erts_term_to_binary_int(p, Term, BIF_ARG_2, + level, flags, NULL); + if (is_non_value(res)) { + ASSERT(!(BIF_P->flags & F_DISABLE_GC)); + BIF_ERROR(BIF_P, SYSTEM_LIMIT); + } if (is_tuple(res)) { erts_set_gc_state(p, 0); BIF_TRAP1(&term_to_binary_trap_export,BIF_P,res); @@ -1875,8 +1904,17 @@ external_size_1(BIF_ALIST_1) { Process* p = BIF_P; Eterm Term = BIF_ARG_1; + Uint size; + + switch (erts_encode_ext_size(Term, &size)) { + case ERTS_EXT_SZ_SYSTEM_LIMIT: + BIF_ERROR(BIF_P, SYSTEM_LIMIT); + case ERTS_EXT_SZ_YIELD: + ERTS_INTERNAL_ERROR("Unexpected yield"); + case ERTS_EXT_SZ_OK: + break; + } - Uint size = erts_encode_ext_size(Term); if (IS_USMALL(0, size)) { BIF_RET(make_small(size)); } else { @@ -1919,7 +1957,15 @@ external_size_2(BIF_ALIST_2) goto error; } - size = erts_encode_ext_size_2(BIF_ARG_1, flags); + switch (erts_encode_ext_size_2(BIF_ARG_1, flags, &size)) { + case ERTS_EXT_SZ_SYSTEM_LIMIT: + BIF_ERROR(BIF_P, SYSTEM_LIMIT); + case ERTS_EXT_SZ_YIELD: + ERTS_INTERNAL_ERROR("Unexpected yield"); + case ERTS_EXT_SZ_OK: + break; + } + if (IS_USMALL(0, size)) { BIF_RET(make_small(size)); } else { @@ -2007,7 +2053,15 @@ erts_term_to_binary_simple(Process* p, Eterm Term, Uint size, int level, Uint fl Eterm erts_term_to_binary(Process* p, Eterm Term, int level, Uint flags) { Uint size; - size = encode_size_struct2(NULL, Term, flags) + 1 /* VERSION_MAGIC */; + switch (encode_size_struct_int(NULL, NULL, Term, flags, NULL, &size)) { + case ERTS_EXT_SZ_SYSTEM_LIMIT: + return THE_NON_VALUE; + case ERTS_EXT_SZ_YIELD: + ERTS_INTERNAL_ERROR("Unexpected yield"); + case ERTS_EXT_SZ_OK: + break; + } + size++; /* VERSION_MAGIC */; return erts_term_to_binary_simple(p, Term, size, level, flags); } @@ -2057,8 +2111,8 @@ static int ttb_context_destructor(Binary *context_bin) return 1; } -static Eterm erts_term_to_binary_int(Process* p, Eterm Term, int level, Uint flags, - Binary *context_b) +static Eterm erts_term_to_binary_int(Process* p, Eterm Term, Eterm opts, int level, + Uint flags, Binary *context_b) { Eterm *hp; Eterm res; @@ -2076,18 +2130,17 @@ static Eterm erts_term_to_binary_int(Process* p, Eterm Term, int level, Uint fla do { \ if (context_b == NULL) { \ context_b = erts_create_magic_binary(sizeof(TTBContext), \ - ttb_context_destructor); \ + ttb_context_destructor);\ context = ERTS_MAGIC_BIN_DATA(context_b); \ - sys_memcpy(context,&c_buff,sizeof(TTBContext)); \ + sys_memcpy(context,&c_buff,sizeof(TTBContext)); \ } \ } while (0) #define RETURN_STATE() \ do { \ - static const int TUPLE2_SIZE = 2 + 1; \ - hp = HAlloc(p, ERTS_MAGIC_REF_THING_SIZE + TUPLE2_SIZE); \ + hp = HAlloc(p, ERTS_MAGIC_REF_THING_SIZE + 1 + 3); \ c_term = erts_mk_magic_ref(&hp, &MSO(p), context_b); \ - res = TUPLE2(hp, Term, c_term); \ + res = TUPLE3(hp, Term, opts, c_term); \ BUMP_ALL_REDS(p); \ return res; \ } while (0); @@ -2113,11 +2166,17 @@ static Eterm erts_term_to_binary_int(Process* p, Eterm Term, int level, Uint fla int level; Uint flags; /* Try for fast path */ - if (encode_size_struct_int(&context->s.sc, NULL, Term, - context->s.sc.flags, &reds, &size) < 0) { + switch (encode_size_struct_int(&context->s.sc, NULL, Term, + context->s.sc.flags, &reds, &size)) { + case ERTS_EXT_SZ_SYSTEM_LIMIT: + BUMP_REDS(p, (initial_reds - reds) / TERM_TO_BINARY_LOOP_FACTOR); + return THE_NON_VALUE; + case ERTS_EXT_SZ_YIELD: EXPORT_CONTEXT(); /* Same state */ RETURN_STATE(); + case ERTS_EXT_SZ_OK: + break; } ++size; /* VERSION_MAGIC */ /* Move these to next state */ @@ -2379,8 +2438,7 @@ enc_pid(ErtsAtomCacheMap *acmp, Eterm pid, byte* ep, Uint32 dflags) Eterm sysname = ((is_internal_pid(pid) && (dflags & DFLAG_INTERNAL_TAGS)) ? INTERNAL_LOCAL_SYSNAME : pid_node_name(pid)); Uint32 creation = pid_creation(pid); - - *ep++ = NEW_PID_EXT; + byte* tagp = ep++; /* insert atom here containing host and sysname */ ep = enc_atom(acmp, sysname, ep, dflags); @@ -2392,8 +2450,15 @@ enc_pid(ErtsAtomCacheMap *acmp, Eterm pid, byte* ep, Uint32 dflags) ep += 4; put_int32(os, ep); ep += 4; - put_int32(creation, ep); - ep += 4; + if (creation <= ERTS_MAX_LOCAL_CREATION) { + *tagp = PID_EXT; + *ep++ = creation; + } else { + ASSERT(is_external_pid(pid)); + *tagp = NEW_PID_EXT; + put_int32(creation, ep); + ep += 4; + } return ep; } @@ -2513,7 +2578,7 @@ dec_pid(ErtsDistExternal *edep, ErtsHeapFactory* factory, byte* ep, if (tag == PID_EXT) { cre = get_int8(ep); ep += 1; - if (!is_tiny_creation(cre)) { + if (!is_valid_creation(cre)) { return NULL; } } else { @@ -2774,18 +2839,25 @@ enc_term_int(TTBEncodeContext* ctx, ErtsAtomCacheMap *acmp, Eterm obj, byte* ep, Eterm sysname = (((dflags & DFLAG_INTERNAL_TAGS) && is_internal_ref(obj)) ? INTERNAL_LOCAL_SYSNAME : ref_node_name(obj)); Uint32 creation = ref_creation(obj); + byte* tagp = ep++; ASSERT(dflags & DFLAG_EXTENDED_REFERENCES); erts_magic_ref_save_bin(obj); - *ep++ = NEWER_REFERENCE_EXT; i = ref_no_numbers(obj); put_int16(i, ep); ep += 2; ep = enc_atom(acmp, sysname, ep, dflags); - put_int32(creation, ep); - ep += 4; + if (creation <= ERTS_MAX_LOCAL_CREATION) { + *tagp = NEW_REFERENCE_EXT; + *ep++ = creation; + } else { + ASSERT(is_external_ref(obj)); + *tagp = NEWER_REFERENCE_EXT; + put_int32(creation, ep); + ep += 4; + } ref_num = ref_numbers(obj); for (j = 0; j < i; j++) { put_int32(ref_num[j], ep); @@ -2798,14 +2870,21 @@ enc_term_int(TTBEncodeContext* ctx, ErtsAtomCacheMap *acmp, Eterm obj, byte* ep, Eterm sysname = (((dflags & DFLAG_INTERNAL_TAGS) && is_internal_port(obj)) ? INTERNAL_LOCAL_SYSNAME : port_node_name(obj)); Uint32 creation = port_creation(obj); + byte* tagp = ep++; - *ep++ = NEW_PORT_EXT; ep = enc_atom(acmp, sysname, ep, dflags); j = port_number(obj); put_int32(j, ep); ep += 4; - put_int32(creation, ep); - ep += 4; + if (creation <= ERTS_MAX_LOCAL_CREATION) { + *tagp = PORT_EXT; + *ep++ = creation; + } else { + ASSERT(is_external_port(obj)); + *tagp = NEW_PORT_EXT; + put_int32(creation, ep); + ep += 4; + } break; } case LIST_DEF: @@ -3500,7 +3579,7 @@ dec_term_atom_common: if (tag == PORT_EXT) { cre = get_int8(ep); ep++; - if (!is_tiny_creation(cre)) { + if (!is_valid_creation(cre)) { goto error; } } @@ -3547,7 +3626,7 @@ dec_term_atom_common: cre = get_int8(ep); ep += 1; - if (!is_tiny_creation(cre)) { + if (!is_valid_creation(cre)) { goto error; } goto ref_ext_common; @@ -3561,7 +3640,7 @@ dec_term_atom_common: cre = get_int8(ep); ep += 1; - if (!is_tiny_creation(cre)) { + if (!is_valid_creation(cre)) { goto error; } r0 = get_int32(ep); @@ -4159,13 +4238,21 @@ error_hamt: to a sequence of bytes N.B. That this must agree with to_external2() above!!! (except for cached atoms) */ -static Uint encode_size_struct2(ErtsAtomCacheMap *acmp, Eterm obj, unsigned dflags) { - Uint res; - (void) encode_size_struct_int(NULL, acmp, obj, dflags, NULL, &res); - return res; +static Uint encode_size_struct2(ErtsAtomCacheMap *acmp, + Eterm obj, + unsigned dflags) { + Uint size; + ErtsExtSzRes res = encode_size_struct_int(NULL, acmp, obj, + dflags, NULL, &size); + /* + * encode_size_struct2() only allowed when + * we know the result will always be OK! + */ + ASSERT(res == ERTS_EXT_SZ_OK); (void) res; + return (Uint) size; } -static int +static ErtsExtSzRes encode_size_struct_int(TTBSizeContext* ctx, ErtsAtomCacheMap *acmp, Eterm obj, unsigned dflags, Sint *reds, Uint *res) { @@ -4198,7 +4285,7 @@ encode_size_struct_int(TTBSizeContext* ctx, ErtsAtomCacheMap *acmp, Eterm obj, ctx->obj = obj; ctx->result = result; WSTACK_SAVE(s, &ctx->wstack); - return -1; + return ERTS_EXT_SZ_YIELD; } switch (tag_val_def(obj)) { case NIL_DEF: @@ -4259,21 +4346,30 @@ encode_size_struct_int(TTBSizeContext* ctx, ErtsAtomCacheMap *acmp, Eterm obj, result += 1 + 4 + 1 + i; /* tag,size,sign,digits */ break; case EXTERNAL_PID_DEF: + if (external_pid_creation(obj) > ERTS_MAX_LOCAL_CREATION) + result += 3; + /*fall through*/ case PID_DEF: result += (1 + encode_size_struct2(acmp, pid_node_name(obj), dflags) + - 4 + 4 + 4); + 4 + 4 + 1); break; case EXTERNAL_REF_DEF: + if (external_ref_creation(obj) > ERTS_MAX_LOCAL_CREATION) + result += 3; + /*fall through*/ case REF_DEF: ASSERT(dflags & DFLAG_EXTENDED_REFERENCES); i = ref_no_numbers(obj); result += (1 + 2 + encode_size_struct2(acmp, ref_node_name(obj), dflags) + - 4 + 4*i); + 1 + 4*i); break; case EXTERNAL_PORT_DEF: + if (external_port_creation(obj) > ERTS_MAX_LOCAL_CREATION) + result += 3; + /*fall through*/ case PORT_DEF: result += (1 + encode_size_struct2(acmp, port_node_name(obj), dflags) + - 4 + 4); + 4 + 1); break; case LIST_DEF: { int is_str = is_external_string(obj, &m); @@ -4365,11 +4461,26 @@ encode_size_struct_int(TTBSizeContext* ctx, ErtsAtomCacheMap *acmp, Eterm obj, result += 32; /* Yes, including the tag */ } break; - case BINARY_DEF: - if (dflags & DFLAG_INTERNAL_TAGS) { + case BINARY_DEF: { + ProcBin* pb = (ProcBin*) binary_val(obj); + Uint tot_bytes = pb->size; + if (!(dflags & DFLAG_INTERNAL_TAGS)) { +#ifdef ARCH_64 + if (tot_bytes >= (Uint) 0xffffffff) { + if (pb->thing_word == HEADER_SUB_BIN) { + ErlSubBin* sub = (ErlSubBin*) pb; + tot_bytes += (sub->bitoffs + sub->bitsize+ 7) / 8; + } + if (tot_bytes > (Uint) 0xffffffff) { + WSTACK_DESTROY(s); + return ERTS_EXT_SZ_SYSTEM_LIMIT; + } + } +#endif + } + else { ProcBin* pb = (ProcBin*) binary_val(obj); Uint sub_extra = 0; - Uint tot_bytes = pb->size; if (pb->thing_word == HEADER_SUB_BIN) { ErlSubBin* sub = (ErlSubBin*) pb; pb = (ProcBin*) binary_val(sub->orig); @@ -4386,6 +4497,7 @@ encode_size_struct_int(TTBSizeContext* ctx, ErtsAtomCacheMap *acmp, Eterm obj, result += 1 + 4 + binary_size(obj) + 5; /* For unaligned binary */ break; + } case FUN_DEF: { ErlFunThing* funp = (ErlFunThing *) fun_val(obj); @@ -4418,7 +4530,7 @@ encode_size_struct_int(TTBSizeContext* ctx, ErtsAtomCacheMap *acmp, Eterm obj, break; default: - erts_exit(ERTS_ERROR_EXIT,"Internal data structure error (in encode_size_struct2)%x\n", + erts_exit(ERTS_ERROR_EXIT,"Internal data structure error (in encode_size_struct_int) %x\n", obj); } @@ -4458,7 +4570,7 @@ encode_size_struct_int(TTBSizeContext* ctx, ErtsAtomCacheMap *acmp, Eterm obj, *reds = r < 0 ? 0 : r; } *res = result; - return 0; + return ERTS_EXT_SZ_OK; } diff --git a/erts/emulator/beam/external.h b/erts/emulator/beam/external.h index 396cd9f802..b556c9076c 100644 --- a/erts/emulator/beam/external.h +++ b/erts/emulator/beam/external.h @@ -144,14 +144,6 @@ typedef struct erl_dist_external { ErtsAtomTranslationTable attab; } ErtsDistExternal; -#define ERTS_DIST_EXT_SIZE(EDEP) \ - (sizeof(ErtsDistExternal) \ - - (((EDEP)->flags & ERTS_DIST_EXT_ATOM_TRANS_TAB) \ - ? (ASSERT(0 <= (EDEP)->attab.size \ - && (EDEP)->attab.size <= ERTS_ATOM_CACHE_SIZE), \ - sizeof(Eterm)*(ERTS_ATOM_CACHE_SIZE - (EDEP)->attab.size)) \ - : sizeof(ErtsAtomTranslationTable))) - typedef struct { byte *extp; int exttmp; @@ -172,14 +164,21 @@ byte *erts_encode_ext_dist_header_setup(byte *, ErtsAtomCacheMap *, Uint, Eterm) byte *erts_encode_ext_dist_header_fragment(byte **, Uint, Eterm); Sint erts_encode_ext_dist_header_finalize(ErtsDistOutputBuf*, DistEntry *, Uint32 dflags, Sint reds); struct erts_dsig_send_context; -int erts_encode_dist_ext_size(Eterm, Uint32, ErtsAtomCacheMap*, Uint* szp); -int erts_encode_dist_ext_size_int(Eterm term, struct erts_dsig_send_context* ctx, Uint* szp); + +typedef enum { + ERTS_EXT_SZ_OK, + ERTS_EXT_SZ_YIELD, + ERTS_EXT_SZ_SYSTEM_LIMIT +} ErtsExtSzRes; + +ErtsExtSzRes erts_encode_dist_ext_size(Eterm, Uint32, ErtsAtomCacheMap*, Uint* szp); +ErtsExtSzRes erts_encode_dist_ext_size_ctx(Eterm term, struct erts_dsig_send_context* ctx, Uint* szp); struct TTBEncodeContext_; int erts_encode_dist_ext(Eterm, byte **, Uint32, ErtsAtomCacheMap *, struct TTBEncodeContext_ *, Sint* reds); -Uint erts_encode_ext_size(Eterm); -Uint erts_encode_ext_size_2(Eterm, unsigned); +ErtsExtSzRes erts_encode_ext_size(Eterm, Uint *szp); +ErtsExtSzRes erts_encode_ext_size_2(Eterm, unsigned, Uint *szp); Uint erts_encode_ext_size_ets(Eterm); void erts_encode_ext(Eterm, byte **); byte* erts_encode_ext_ets(Eterm, byte *, struct erl_off_heap_header** ext_off_heap); diff --git a/erts/emulator/beam/global.h b/erts/emulator/beam/global.h index f9bbe4167f..4c8d3d3dbe 100644 --- a/erts/emulator/beam/global.h +++ b/erts/emulator/beam/global.h @@ -1216,10 +1216,11 @@ Uint64 erts_timestamp_millis(void); Export* erts_find_function(Eterm, Eterm, unsigned int, ErtsCodeIndex); -void *erts_calc_stacklimit(char *prev_c, UWord stacksize); -int erts_check_below_limit(char *ptr, char *limit); -int erts_check_above_limit(char *ptr, char *limit); -void *erts_ptr_id(void *ptr); +/* ERTS_NOINLINE prevents link-time optimization across modules */ +void *erts_calc_stacklimit(char *prev_c, UWord stacksize) ERTS_NOINLINE; +int erts_check_below_limit(char *ptr, char *limit) ERTS_NOINLINE; +int erts_check_above_limit(char *ptr, char *limit) ERTS_NOINLINE; +void *erts_ptr_id(void *ptr) ERTS_NOINLINE; Eterm store_external_or_ref_in_proc_(Process *, Eterm); Eterm store_external_or_ref_(Uint **, ErlOffHeap*, Eterm); diff --git a/erts/emulator/beam/instrs.tab b/erts/emulator/beam/instrs.tab index 692408e212..462ee77e6f 100644 --- a/erts/emulator/beam/instrs.tab +++ b/erts/emulator/beam/instrs.tab @@ -1001,7 +1001,7 @@ catch_end(Y) { } /* only x(2) is included in the rootset here */ if (E - HTOP < 3) { - SWAPOUT; + $GC_SWAPOUT(); PROCESS_MAIN_CHK_LOCKS(c_p); FCALLS -= erts_garbage_collect_nobump(c_p, 3, reg+2, 1, FCALLS); ERTS_VERIFY_UNUSED_TEMP_ALLOC(c_p); diff --git a/erts/emulator/beam/io.c b/erts/emulator/beam/io.c index b961c639f5..45fef0c0e5 100644 --- a/erts/emulator/beam/io.c +++ b/erts/emulator/beam/io.c @@ -4450,6 +4450,7 @@ erts_port_call(Process* c_p, char input_buf[256]; char *bufp; byte *endp; + Uint uintsz; ErlDrvSizeT size; int try_call; erts_aint32_t sched_flags; @@ -4462,7 +4463,9 @@ erts_port_call(Process* c_p, try_call = !(sched_flags & ERTS_PTS_FLGS_FORCE_SCHEDULE_OP); - size = erts_encode_ext_size(data); + if (erts_encode_ext_size(data, &uintsz) != ERTS_EXT_SZ_OK) + return ERTS_PORT_OP_BADARG; + size = (ErlDrvSizeT) uintsz; if (!try_call) bufp = erts_alloc(ERTS_ALC_T_DRV_CALL_DATA, size); @@ -5295,44 +5298,31 @@ erts_get_port_names(Eterm id, ErlDrvPort drv_port) pnp->driver_name = NULL; } else { - int do_realloc = 1; - int len = -1; - size_t pnp_len = sizeof(ErtsPortNames); -#ifndef DEBUG - pnp_len += 100; /* In most cases 100 characters will be enough... */ - ASSERT(prt->common.id == id); -#endif - pnp = erts_alloc(ERTS_ALC_T_PORT_NAMES, pnp_len); - do { - int nlen; - char *name, *driver_name; - if (len > 0) { - erts_free(ERTS_ALC_T_PORT_NAMES, pnp); - pnp_len = sizeof(ErtsPortNames) + len; - pnp = erts_alloc(ERTS_ALC_T_PORT_NAMES, pnp_len); - } - name = prt->name; - len = nlen = name ? sys_strlen(name) + 1 : 0; - driver_name = (prt->drv_ptr ? prt->drv_ptr->name : NULL); - len += driver_name ? sys_strlen(driver_name) + 1 : 0; - if (len <= pnp_len - sizeof(ErtsPortNames)) { - if (!name) - pnp->name = NULL; - else { - pnp->name = ((char *) pnp) + sizeof(ErtsPortNames); - sys_strcpy(pnp->name, name); - } - if (!driver_name) - pnp->driver_name = NULL; - else { - pnp->driver_name = (((char *) pnp) - + sizeof(ErtsPortNames) - + nlen); - sys_strcpy(pnp->driver_name, driver_name); - } - do_realloc = 0; - } - } while (do_realloc); + int len; + int nlen; + char *driver_name; + + len = nlen = prt->name ? sys_strlen(prt->name) + 1 : 0; + driver_name = (prt->drv_ptr ? prt->drv_ptr->name : NULL); + len += driver_name ? sys_strlen(driver_name) + 1 : 0; + + pnp = erts_alloc(ERTS_ALC_T_PORT_NAMES, + sizeof(ErtsPortNames) + len); + + if (!prt->name) + pnp->name = NULL; + else { + pnp->name = ((char *) pnp) + sizeof(ErtsPortNames); + sys_strcpy(pnp->name, prt->name); + } + if (!driver_name) + pnp->driver_name = NULL; + else { + pnp->driver_name = (((char *) pnp) + + sizeof(ErtsPortNames) + + nlen); + sys_strcpy(pnp->driver_name, driver_name); + } } return pnp; } diff --git a/erts/emulator/beam/macros.tab b/erts/emulator/beam/macros.tab index 494fe8961e..1b5e5f66b0 100644 --- a/erts/emulator/beam/macros.tab +++ b/erts/emulator/beam/macros.tab @@ -63,10 +63,21 @@ JUMP(Fail) { Goto(*I); } +GC_SWAPOUT() { + // + // Since a garbage collection is expensive anyway, we can afford + // to save the instruction counter so that the correct function will + // be pointed in the crash dump if the garbage collection fails + // because of insufficient memory. + // + SWAPOUT; + c_p->i = I; +} + GC_TEST(Ns, Nh, Live) { Uint need = $Nh + $Ns; if (ERTS_UNLIKELY(E - HTOP < need)) { - SWAPOUT; + $GC_SWAPOUT(); PROCESS_MAIN_CHK_LOCKS(c_p); FCALLS -= erts_garbage_collect_nobump(c_p, need, reg, $Live, FCALLS); ERTS_VERIFY_UNUSED_TEMP_ALLOC(c_p); @@ -79,7 +90,7 @@ GC_TEST(Ns, Nh, Live) { GC_TEST_PRESERVE(NeedHeap, Live, PreserveTerm) { Uint need = $NeedHeap; if (ERTS_UNLIKELY(E - HTOP < need)) { - SWAPOUT; + $GC_SWAPOUT(); reg[$Live] = $PreserveTerm; PROCESS_MAIN_CHK_LOCKS(c_p); FCALLS -= erts_garbage_collect_nobump(c_p, need, reg, $Live+1, FCALLS); diff --git a/erts/emulator/beam/ops.tab b/erts/emulator/beam/ops.tab index 7a125b0f67..10ca74cd60 100644 --- a/erts/emulator/beam/ops.tab +++ b/erts/emulator/beam/ops.tab @@ -1690,9 +1690,14 @@ i_plus S1=c S2=c Fail Dst => move S1 x | i_plus x S2 Fail Dst i_plus xy xyc j? d -i_minus x x j? d -i_minus c x j? d -i_minus s s j? d +# A minus instruction with a constant right operand will be +# converted to an i_increment instruction, except in guards or +# when the negated value of the constant won't fit in a guard. +# Therefore, it very rare. +i_minus S1 S2=c Fail Dst => move S2 x | i_minus S1 x Fail Dst + +i_minus xy xy j? d +i_minus c xy j? d i_times j? s s d diff --git a/erts/emulator/beam/sys.h b/erts/emulator/beam/sys.h index a6312293cc..c261c8e117 100644 --- a/erts/emulator/beam/sys.h +++ b/erts/emulator/beam/sys.h @@ -63,6 +63,14 @@ # endif #endif +#ifndef ERTS_NOINLINE +# if ERTS_AT_LEAST_GCC_VSN__(3,1,1) +# define ERTS_NOINLINE __attribute__((__noinline__)) +# else +# define ERTS_NOINLINE +# endif +#endif + #if defined(DEBUG) || defined(ERTS_ENABLE_LOCK_CHECK) # undef ERTS_CAN_INLINE # define ERTS_CAN_INLINE 0 diff --git a/erts/emulator/beam/time.c b/erts/emulator/beam/time.c index a3069e419a..9eb020d070 100644 --- a/erts/emulator/beam/time.c +++ b/erts/emulator/beam/time.c @@ -316,7 +316,7 @@ struct ErtsTimerWheel_ { #define ERTS_TW_SLOT_AT_ONCE (-1) #define ERTS_TW_BUMP_LATER_WHEEL(TIW) \ - ((tiw)->pos + ERTS_TW_LATER_WHEEL_SLOT_SIZE >= (TIW)->later.pos) + ((TIW)->pos + ERTS_TW_LATER_WHEEL_SLOT_SIZE >= (TIW)->later.pos) static int bump_later_wheel(ErtsTimerWheel *tiw, int *yield_count_p); @@ -701,7 +701,8 @@ remove_timer(ErtsTimerWheel *tiw, ErtsTWheelTimer *p) if (slot < ERTS_TW_SOON_WHEEL_END_SLOT) { if (empty_slot && tiw->true_next_timeout_time - && p->timeout_pos == tiw->next_timeout_pos) { + && p->timeout_pos == tiw->next_timeout_pos + && tiw->yield_slot == ERTS_TW_SLOT_INACTIVE) { tiw->true_next_timeout_time = 0; } if (--tiw->soon.nto == 0) @@ -714,7 +715,8 @@ remove_timer(ErtsTimerWheel *tiw, ErtsTWheelTimer *p) ErtsMonotonicTime tpos = tiw->later.min_tpos; tpos &= ERTS_TW_LATER_WHEEL_POS_MASK; tpos -= ERTS_TW_LATER_WHEEL_SLOT_SIZE; - if (tpos == tiw->next_timeout_pos) + if (tpos == tiw->next_timeout_pos + && tiw->yield_slot == ERTS_TW_SLOT_INACTIVE) tiw->true_next_timeout_time = 0; } if (--tiw->later.nto == 0) { @@ -908,7 +910,6 @@ erts_bump_timers(ErtsTimerWheel *tiw, ErtsMonotonicTime curr_time) { ErtsMonotonicTime tmp_slots = bump_to - tiw->pos; - tmp_slots = (bump_to - tiw->pos); if (tmp_slots < ERTS_TW_SOON_WHEEL_SIZE) slots = (int) tmp_slots; else |