diff options
Diffstat (limited to 'erts/emulator/beam')
31 files changed, 1411 insertions, 226 deletions
diff --git a/erts/emulator/beam/arith_instrs.tab b/erts/emulator/beam/arith_instrs.tab index 5f23b2c168..f14b376419 100644 --- a/erts/emulator/beam/arith_instrs.tab +++ b/erts/emulator/beam/arith_instrs.tab @@ -51,11 +51,50 @@ plus.fetch(Op1, Op2) { plus.execute(Fail, Dst) { if (ERTS_LIKELY(is_both_small(PlusOp1, PlusOp2))) { +#ifdef HAVE_OVERFLOW_CHECK_BUILTINS + Sint lhs_tagged, rhs_untagged, res; + + /* The value part of immediate integers start right after the tag and + * occupy the rest of the word, so if you squint a bit they look like + * fixed-point integers; as long as you mask the tag away you will get + * correct results from addition/subtraction since they share the same + * notion of zero. It's fairly easy to see that the following holds + * when (a + b) is in range: + * + * (a >> s) + (b >> s) == ((a & ~m) + (b & ~m)) >> s + * + * Where 's' is the tag size and 'm' is the tag mask. + * + * The left-hand side is our fallback in the #else clause and is the + * fastest way to do this safely in plain C. The actual addition will + * never overflow since `Sint` has a much greater range than our + * smalls, so we can use the IS_SSMALL macro to see if the result is + * within range. + * + * What we're doing below is an extension of the right-hand side. By + * treating `a` and `b` as fixed-point integers, all additions whose + * result is out of range will also overflow `Sint` and we can use the + * compiler's overflow intrinsics to check for this condition. + * + * In addition, since the tag lives in the lowest bits we can further + * optimize this by only stripping the tag from either side. The higher + * bits can't influence the tag bits since we bail on overflow, so the + * tag bits from the tagged side will simply appear in the result. */ + lhs_tagged = PlusOp1; + rhs_untagged = PlusOp2 & ~_TAG_IMMED1_MASK; + + if (ERTS_LIKELY(!__builtin_add_overflow(lhs_tagged, rhs_untagged, &res))) { + ASSERT(is_small(res)); + $Dst = res; + $NEXT0(); + } +#else Sint i = signed_val(PlusOp1) + signed_val(PlusOp2); if (ERTS_LIKELY(IS_SSMALL(i))) { $Dst = make_small(i); $NEXT0(); } +#endif } $OUTLINED_ARITH_2($Fail, mixed_plus, BIF_splus_2, PlusOp1, PlusOp2, $Dst); } @@ -73,11 +112,26 @@ minus.fetch(Op1, Op2) { minus.execute(Fail, Dst) { if (ERTS_LIKELY(is_both_small(MinusOp1, MinusOp2))) { +#ifdef HAVE_OVERFLOW_CHECK_BUILTINS + Sint lhs_tagged, rhs_untagged, res; + + /* See plus.execute */ + lhs_tagged = MinusOp1; + rhs_untagged = MinusOp2 & ~_TAG_IMMED1_MASK; + + if (ERTS_LIKELY(!__builtin_sub_overflow(lhs_tagged, rhs_untagged, &res))) { + ASSERT(is_small(res)); + $Dst = res; + $NEXT0(); + } +#else Sint i = signed_val(MinusOp1) - signed_val(MinusOp2); + if (ERTS_LIKELY(IS_SSMALL(i))) { $Dst = make_small(i); $NEXT0(); } +#endif } $OUTLINED_ARITH_2($Fail, mixed_minus, BIF_sminus_2, MinusOp1, MinusOp2, $Dst); } @@ -97,12 +151,27 @@ increment.execute(IncrementVal, Dst) { Eterm result; if (ERTS_LIKELY(is_small(increment_reg_val))) { +#ifdef HAVE_OVERFLOW_CHECK_BUILTINS + Sint lhs_tagged, rhs_untagged, res; + + /* See plus.execute */ + lhs_tagged = increment_reg_val; + rhs_untagged = (Sint)increment_val << _TAG_IMMED1_SIZE; + + if (ERTS_LIKELY(!__builtin_add_overflow(lhs_tagged, rhs_untagged, &res))) { + ASSERT(is_small(res)); + $Dst = res; + $NEXT0(); + } +#else Sint i = signed_val(increment_reg_val) + increment_val; if (ERTS_LIKELY(IS_SSMALL(i))) { $Dst = make_small(i); $NEXT0(); } +#endif } + result = erts_mixed_plus(c_p, increment_reg_val, make_small(increment_val)); ERTS_HOLE_CHECK(c_p); if (ERTS_LIKELY(is_value(result))) { @@ -118,11 +187,15 @@ i_times(Fail, Op1, Op2, Dst) { Eterm op2 = $Op2; #ifdef HAVE_OVERFLOW_CHECK_BUILTINS if (ERTS_LIKELY(is_both_small(op1, op2))) { - Sint a = signed_val(op1); - Sint b = signed_val(op2); - Sint res; - if (ERTS_LIKELY(!__builtin_mul_overflow(a, b, &res) && IS_SSMALL(res))) { - $Dst = make_small(res); + /* See plus.execute */ + Sint lhs_untagged, rhs_actual, res; + + lhs_untagged = op1 & ~_TAG_IMMED1_MASK; + rhs_actual = signed_val(op2); + + if (ERTS_LIKELY(!__builtin_mul_overflow(lhs_untagged, rhs_actual, &res))) { + ASSERT(!(res & _TAG_IMMED1_MASK)); + $Dst = res | _TAG_IMMED1_SMALL; $NEXT0(); } } diff --git a/erts/emulator/beam/beam_emu.c b/erts/emulator/beam/beam_emu.c index ea01ce597d..8e93e53003 100644 --- a/erts/emulator/beam/beam_emu.c +++ b/erts/emulator/beam/beam_emu.c @@ -375,44 +375,33 @@ do { \ /* * process_main() is already huge, so we want to avoid inlining - * into it. Especially functions that are seldom used. + * seldom used functions into it. */ -#ifdef __GNUC__ -# define NOINLINE __attribute__((__noinline__)) -#else -# define NOINLINE -#endif - - -/* - * The following functions are called directly by process_main(). - * Don't inline them. - */ -static void init_emulator_finish(void) NOINLINE; -static ErtsCodeMFA *ubif2mfa(void* uf) NOINLINE; +static void init_emulator_finish(void) ERTS_NOINLINE; +static ErtsCodeMFA *ubif2mfa(void* uf) ERTS_NOINLINE; static BeamInstr* handle_error(Process* c_p, BeamInstr* pc, - Eterm* reg, ErtsCodeMFA* bif_mfa) NOINLINE; + Eterm* reg, ErtsCodeMFA* bif_mfa) ERTS_NOINLINE; static BeamInstr* call_error_handler(Process* p, ErtsCodeMFA* mfa, - Eterm* reg, Eterm func) NOINLINE; + Eterm* reg, Eterm func) ERTS_NOINLINE; static BeamInstr* fixed_apply(Process* p, Eterm* reg, Uint arity, - BeamInstr *I, Uint offs) NOINLINE; + BeamInstr *I, Uint offs) ERTS_NOINLINE; static BeamInstr* apply(Process* p, Eterm* reg, - BeamInstr *I, Uint offs) NOINLINE; + BeamInstr *I, Uint offs) ERTS_NOINLINE; static BeamInstr* call_fun(Process* p, int arity, - Eterm* reg, Eterm args) NOINLINE; + Eterm* reg, Eterm args) ERTS_NOINLINE; static BeamInstr* apply_fun(Process* p, Eterm fun, - Eterm args, Eterm* reg) NOINLINE; + Eterm args, Eterm* reg) ERTS_NOINLINE; static Eterm new_fun(Process* p, Eterm* reg, - ErlFunEntry* fe, int num_free) NOINLINE; + ErlFunEntry* fe, int num_free) ERTS_NOINLINE; static int is_function2(Eterm Term, Uint arity); static Eterm erts_gc_new_map(Process* p, Eterm* reg, Uint live, - Uint n, BeamInstr* ptr) NOINLINE; + Uint n, BeamInstr* ptr) ERTS_NOINLINE; static Eterm erts_gc_new_small_map_lit(Process* p, Eterm* reg, Eterm keys_literal, - Uint live, BeamInstr* ptr) NOINLINE; + Uint live, BeamInstr* ptr) ERTS_NOINLINE; static Eterm erts_gc_update_map_assoc(Process* p, Eterm* reg, Uint live, - Uint n, BeamInstr* new_p) NOINLINE; + Uint n, BeamInstr* new_p) ERTS_NOINLINE; static Eterm erts_gc_update_map_exact(Process* p, Eterm* reg, Uint live, - Uint n, Eterm* new_p) NOINLINE; + Uint n, Eterm* new_p) ERTS_NOINLINE; static Eterm get_map_element(Eterm map, Eterm key); static Eterm get_map_element_hash(Eterm map, Eterm key, Uint32 hx); diff --git a/erts/emulator/beam/bif.c b/erts/emulator/beam/bif.c index d0e2d9afc2..b81056c774 100644 --- a/erts/emulator/beam/bif.c +++ b/erts/emulator/beam/bif.c @@ -1915,7 +1915,7 @@ do_send(Process *p, Eterm to, Eterm msg, Eterm return_term, Eterm *refp, erts_dsprintf_buf_t *dsbufp = erts_create_logger_dsbuf(); erts_dsprintf(dsbufp, "Discarding message %T from %T to %T in an old " - "incarnation (%u) of this node (%u)\n", + "incarnation (%d) of this node (%d)\n", msg, p->common.id, to, @@ -1959,7 +1959,7 @@ do_send(Process *p, Eterm to, Eterm msg, Eterm return_term, Eterm *refp, erts_dsprintf_buf_t *dsbufp = erts_create_logger_dsbuf(); erts_dsprintf(dsbufp, "Discarding message %T from %T to %T in an old " - "incarnation (%u) of this node (%u)\n", + "incarnation (%d) of this node (%d)\n", msg, p->common.id, to, diff --git a/erts/emulator/beam/big.c b/erts/emulator/beam/big.c index 7666f23a4f..522f50287a 100644 --- a/erts/emulator/beam/big.c +++ b/erts/emulator/beam/big.c @@ -2176,24 +2176,6 @@ term_to_Uint64(Eterm term, Uint64 *up) #endif } -int -term_to_Uint32(Eterm term, Uint32 *up) -{ -#if ERTS_SIZEOF_ETERM == 4 - return term_to_Uint(term,up); -#else - if (is_small(term)) { - Sint i = signed_val(term); - if (i >= 0) { - *up = (Uint32) i; - return 1; - } - } - *up = BADARG; - return 0; -#endif -} - int term_to_Sint(Eterm term, Sint *sp) { diff --git a/erts/emulator/beam/big.h b/erts/emulator/beam/big.h index 3fed076419..ad19cce395 100644 --- a/erts/emulator/beam/big.h +++ b/erts/emulator/beam/big.h @@ -168,8 +168,6 @@ Eterm erts_uint64_array_to_big(Uint **, int, int, Uint64 *); int term_to_Uint64(Eterm, Uint64*); int term_to_Sint64(Eterm, Sint64*); #endif -int term_to_Uint32(Eterm, Uint32*); - Uint32 big_to_uint32(Eterm b); int term_equals_2pow32(Eterm); diff --git a/erts/emulator/beam/dist.c b/erts/emulator/beam/dist.c index 8bbe6450eb..30fe13fad3 100644 --- a/erts/emulator/beam/dist.c +++ b/erts/emulator/beam/dist.c @@ -775,19 +775,25 @@ void init_dist(void) static ERTS_INLINE ErtsDistOutputBuf * alloc_dist_obuf(Uint size, Uint headers) { - int i; + Uint obuf_size = sizeof(ErtsDistOutputBuf)*(headers); ErtsDistOutputBuf *obuf; - Uint obuf_size = sizeof(ErtsDistOutputBuf)*(headers) + - sizeof(byte)*size; - Binary *bin = erts_bin_drv_alloc(obuf_size); - obuf = (ErtsDistOutputBuf *) &bin->orig_bytes[size]; + Binary *bin; + byte *extp; + int i; + + bin = erts_bin_drv_alloc(obuf_size + size); erts_refc_add(&bin->intern.refc, headers - 1, 1); + + obuf = (ErtsDistOutputBuf *)&bin->orig_bytes[0]; + extp = (byte *)&bin->orig_bytes[obuf_size]; + for (i = 0; i < headers; i++) { obuf[i].bin = bin; - obuf[i].extp = (byte *)&bin->orig_bytes[0]; + obuf[i].extp = extp; #ifdef DEBUG obuf[i].dbg_pattern = ERTS_DIST_OUTPUT_BUF_DBG_PATTERN; - obuf[i].alloc_endp = obuf->extp + size; + obuf[i].ext_startp = extp; + obuf[i].alloc_endp = &extp[size]; ASSERT(bin == ErtsDistOutputBuf2Binary(obuf)); #endif } @@ -1360,7 +1366,7 @@ erts_dist_seq_tree_foreach_delete_yielding(DistSeqNode **root, limit); if (res > 0) { if (ysp != &ys) - erts_free(ERTS_ALC_T_ML_YIELD_STATE, ysp); + erts_free(ERTS_ALC_T_SEQ_YIELD_STATE, ysp); *vyspp = NULL; } else { @@ -2341,7 +2347,8 @@ erts_dsig_send(ErtsDSigSendContext *ctx) (ctx->fragments-1) * ERTS_DIST_FRAGMENT_HEADER_SIZE, ctx->fragments); ctx->obuf->ext_start = &ctx->obuf->extp[0]; - ctx->obuf->ext_endp = &ctx->obuf->extp[0] + ctx->max_finalize_prepend + ctx->dhdr_ext_size; + ctx->obuf->ext_endp = &ctx->obuf->extp[0] + ctx->max_finalize_prepend + + ctx->dhdr_ext_size; /* Encode internal version of dist header */ ctx->obuf->extp = erts_encode_ext_dist_header_setup( @@ -2380,8 +2387,8 @@ erts_dsig_send(ErtsDSigSendContext *ctx) case ERTS_DSIG_SEND_PHASE_FIN: { ASSERT(ctx->obuf->extp < ctx->obuf->ext_endp); - ASSERT(((byte*)&ctx->obuf->bin->orig_bytes[0]) <= ctx->obuf->extp - ctx->max_finalize_prepend); - ASSERT(ctx->obuf->ext_endp <= ((byte*)ctx->obuf->bin->orig_bytes) + ctx->data_size + ctx->dhdr_ext_size); + ASSERT(ctx->obuf->ext_startp <= ctx->obuf->extp - ctx->max_finalize_prepend); + ASSERT(ctx->obuf->ext_endp <= (byte*)ctx->obuf->ext_startp + ctx->data_size + ctx->dhdr_ext_size); ctx->data_size = ctx->obuf->ext_endp - ctx->obuf->extp; @@ -3457,6 +3464,7 @@ dist_ctrl_get_data_1(BIF_ALIST_1) pb->bytes = (byte*) obuf->extp; pb->flags = 0; res = make_binary(pb); + hp += PROC_BIN_SIZE; } else { hp = HAlloc(BIF_P, PROC_BIN_SIZE * 2 + 4 + hsz); pb = (ProcBin *) (char *) hp; @@ -3748,10 +3756,12 @@ int distribution_info(fmtfn_t to, void *arg) /* Called by break handler */ BIF_RETTYPE setnode_2(BIF_ALIST_2) { Process *net_kernel; - Uint32 creation; + Uint creation; /* valid creation ? */ - if(!term_to_Uint32(BIF_ARG_2, &creation)) + if(!term_to_Uint(BIF_ARG_2, &creation)) + goto error; + if(creation > 3) goto error; /* valid node name ? */ @@ -3795,7 +3805,7 @@ BIF_RETTYPE setnode_2(BIF_ALIST_2) erts_proc_unlock(BIF_P, ERTS_PROC_LOCK_MAIN); erts_thr_progress_block(); inc_no_nodes(); - erts_set_this_node(BIF_ARG_1, creation); + erts_set_this_node(BIF_ARG_1, (Uint32) creation); erts_is_alive = 1; send_nodes_mon_msgs(NULL, am_nodeup, BIF_ARG_1, am_visible, NIL); erts_thr_progress_unblock(); diff --git a/erts/emulator/beam/dist.h b/erts/emulator/beam/dist.h index f953a2ab8c..067028634b 100644 --- a/erts/emulator/beam/dist.h +++ b/erts/emulator/beam/dist.h @@ -54,12 +54,11 @@ #define DFLAG_DIST_MANDATORY (DFLAG_EXTENDED_REFERENCES \ | DFLAG_EXTENDED_PIDS_PORTS \ | DFLAG_UTF8_ATOMS \ - | DFLAG_NEW_FUN_TAGS \ - | DFLAG_BIG_CREATION) + | DFLAG_NEW_FUN_TAGS) /* * Additional optimistic flags when encoding toward pending connection. - * If remote node (erl_interface) does not support these then we may need + * If remote node (erl_interface) does not supporting these then we may need * to transcode messages enqueued before connection setup was finished. */ #define DFLAG_DIST_HOPEFULLY (DFLAG_EXPORT_PTR_TAG \ @@ -76,6 +75,7 @@ | DFLAG_SMALL_ATOM_TAGS \ | DFLAG_UTF8_ATOMS \ | DFLAG_MAP_TAG \ + | DFLAG_BIG_CREATION \ | DFLAG_SEND_SENDER \ | DFLAG_BIG_SEQTRACE_LABELS \ | DFLAG_EXIT_PAYLOAD \ diff --git a/erts/emulator/beam/erl_bif_info.c b/erts/emulator/beam/erl_bif_info.c index a7424bbcb8..7ff345a54b 100644 --- a/erts/emulator/beam/erl_bif_info.c +++ b/erts/emulator/beam/erl_bif_info.c @@ -2579,6 +2579,7 @@ BIF_RETTYPE system_info_1(BIF_ALIST_1) /* Need to be the only thread running... */ erts_proc_unlock(BIF_P, ERTS_PROC_LOCK_MAIN); + BIF_P->scheduler_data->current_process = NULL; erts_thr_progress_block(); if (BIF_ARG_1 == am_info) @@ -2592,6 +2593,7 @@ BIF_RETTYPE system_info_1(BIF_ALIST_1) erts_thr_progress_unblock(); erts_proc_lock(BIF_P, ERTS_PROC_LOCK_MAIN); + BIF_P->scheduler_data->current_process = BIF_P; ASSERT(dsbufp && dsbufp->str); res = new_binary(BIF_P, (byte *) dsbufp->str, dsbufp->str_len); @@ -2797,10 +2799,7 @@ BIF_RETTYPE system_info_1(BIF_ALIST_1) } else if (BIF_ARG_1 == am_threads) { return am_true; } else if (BIF_ARG_1 == am_creation) { - Uint hsz = 0; - erts_bld_uint(NULL, &hsz, erts_this_node->creation); - hp = hsz ? HAlloc(BIF_P, hsz) : NULL; - BIF_RET(erts_bld_uint(&hp, NULL, erts_this_node->creation)); + return make_small(erts_this_node->creation); } else if (BIF_ARG_1 == am_break_ignored) { extern int ignore_break; if (ignore_break) @@ -3026,6 +3025,8 @@ BIF_RETTYPE system_info_1(BIF_ALIST_1) BIF_RET(erts_nif_taints(BIF_P)); } else if (ERTS_IS_ATOM_STR("reader_groups_map", BIF_ARG_1)) { BIF_RET(erts_get_reader_groups_map(BIF_P)); + } else if (ERTS_IS_ATOM_STR("decentralized_counter_groups_map", BIF_ARG_1)) { + BIF_RET(erts_get_decentralized_counter_groups_map(BIF_P)); } else if (ERTS_IS_ATOM_STR("dist_buf_busy_limit", BIF_ARG_1)) { Uint hsz = 0; diff --git a/erts/emulator/beam/erl_cpu_topology.c b/erts/emulator/beam/erl_cpu_topology.c index 6f8d2f8c35..6a4f43297e 100644 --- a/erts/emulator/beam/erl_cpu_topology.c +++ b/erts/emulator/beam/erl_cpu_topology.c @@ -34,6 +34,7 @@ #include "error.h" #include "bif.h" #include "erl_cpu_topology.h" +#include "erl_flxctr.h" #define ERTS_MAX_READER_GROUPS 64 @@ -58,6 +59,7 @@ static erts_cpu_info_t *cpuinfo; static int max_main_threads; static int reader_groups; +static int decentralized_counter_groups; static ErtsCpuBindData *scheduler2cpu_map; static erts_rwmtx_t cpuinfo_rwmtx; @@ -127,6 +129,8 @@ static erts_cpu_groups_map_t *cpu_groups_maps; static erts_cpu_groups_map_t *reader_groups_map; +static erts_cpu_groups_map_t *decentralized_counter_groups_map; + #define ERTS_TOPOLOGY_CG ERTS_TOPOLOGY_MAX_DEPTH #define ERTS_MAX_CPU_TOPOLOGY_ID ((int) 0xffff) @@ -138,6 +142,7 @@ static void cpu_bind_order_sort(erts_cpu_topology_t *cpudata, static void write_schedulers_bind_change(erts_cpu_topology_t *cpudata, int size); static void reader_groups_callback(int, ErtsSchedulerData *, int, void *); +static void flxctr_groups_callback(int, ErtsSchedulerData *, int, void *); static erts_cpu_groups_map_t *add_cpu_groups(int groups, erts_cpu_groups_callback_t callback, void *arg); @@ -1646,7 +1651,8 @@ erts_get_logical_processors(int *conf, int *onln, int *avail) } void -erts_pre_early_init_cpu_topology(int *max_rg_p, +erts_pre_early_init_cpu_topology(int *max_dcg_p, + int *max_rg_p, int *conf_p, int *onln_p, int *avail_p) @@ -1654,6 +1660,7 @@ erts_pre_early_init_cpu_topology(int *max_rg_p, cpu_groups_maps = NULL; no_cpu_groups_callbacks = 0; *max_rg_p = ERTS_MAX_READER_GROUPS; + *max_dcg_p = ERTS_MAX_FLXCTR_GROUPS; cpuinfo = erts_cpu_info_create(); get_logical_processors(conf_p, onln_p, avail_p); } @@ -1662,7 +1669,9 @@ void erts_early_init_cpu_topology(int no_schedulers, int *max_main_threads_p, int max_reader_groups, - int *reader_groups_p) + int *reader_groups_p, + int max_decentralized_counter_groups, + int *decentralized_counter_groups_p) { user_cpudata = NULL; user_cpudata_size = 0; @@ -1687,6 +1696,12 @@ erts_early_init_cpu_topology(int no_schedulers, max_main_threads = no_schedulers; *max_main_threads_p = max_main_threads; + decentralized_counter_groups = max_main_threads; + if (decentralized_counter_groups <= 1 || max_decentralized_counter_groups <= 1) + decentralized_counter_groups = 1; + if (decentralized_counter_groups > max_decentralized_counter_groups) + decentralized_counter_groups = max_decentralized_counter_groups; + *decentralized_counter_groups_p = decentralized_counter_groups; reader_groups = max_main_threads; if (reader_groups <= 1 || max_reader_groups <= 1) reader_groups = 0; @@ -1718,6 +1733,9 @@ erts_init_cpu_topology(void) reader_groups_map = add_cpu_groups(reader_groups, reader_groups_callback, NULL); + decentralized_counter_groups_map = add_cpu_groups(decentralized_counter_groups, + flxctr_groups_callback, + NULL); if (cpu_bind_order == ERTS_CPU_BIND_NONE) erts_rwmtx_rwunlock(&cpuinfo_rwmtx); @@ -1789,6 +1807,15 @@ reader_groups_callback(int suspending, erts_rwmtx_set_reader_group(suspending ? 0 : group+1); } +void +flxctr_groups_callback(int suspending, + ErtsSchedulerData *esdp, + int group, + void *unused) +{ + erts_flxctr_set_slot(suspending ? 0 : group+1); +} + static Eterm get_cpu_groups_map(Process *c_p, erts_cpu_groups_map_t *map, int offset); @@ -1821,6 +1848,16 @@ erts_get_reader_groups_map(Process *c_p) return res; } +Eterm +erts_get_decentralized_counter_groups_map(Process *c_p) +{ + Eterm res; + erts_rwmtx_rlock(&cpuinfo_rwmtx); + res = get_cpu_groups_map(c_p, decentralized_counter_groups_map, 1); + erts_rwmtx_runlock(&cpuinfo_rwmtx); + return res; +} + /* * CPU groups */ diff --git a/erts/emulator/beam/erl_cpu_topology.h b/erts/emulator/beam/erl_cpu_topology.h index 88bcad79ab..4a428d7972 100644 --- a/erts/emulator/beam/erl_cpu_topology.h +++ b/erts/emulator/beam/erl_cpu_topology.h @@ -27,14 +27,19 @@ #ifndef ERL_CPU_TOPOLOGY_H__ #define ERL_CPU_TOPOLOGY_H__ -void erts_pre_early_init_cpu_topology(int *max_rg_p, - int *conf_p, - int *onln_p, - int *avail_p); -void erts_early_init_cpu_topology(int no_schedulers, - int *max_main_threads_p, - int max_reader_groups, - int *reader_groups_p); +void +erts_pre_early_init_cpu_topology(int *max_dcg_p, + int *max_rg_p, + int *conf_p, + int *onln_p, + int *avail_p); +void +erts_early_init_cpu_topology(int no_schedulers, + int *max_main_threads_p, + int max_reader_groups, + int *reader_groups_p, + int max_decentralized_counter_groups, + int *decentralized_counter_groups_p); void erts_init_cpu_topology(void); @@ -70,6 +75,7 @@ Eterm erts_bind_schedulers(Process *c_p, Eterm how); Eterm erts_get_schedulers_binds(Process *c_p); Eterm erts_get_reader_groups_map(Process *c_p); +Eterm erts_get_decentralized_counter_groups_map(Process *c_p); Eterm erts_set_cpu_topology(Process *c_p, Eterm term); Eterm erts_get_cpu_topology_term(Process *c_p, Eterm which); diff --git a/erts/emulator/beam/erl_db.c b/erts/emulator/beam/erl_db.c index 0a50af4d1a..c0f5c506f4 100644 --- a/erts/emulator/beam/erl_db.c +++ b/erts/emulator/beam/erl_db.c @@ -42,6 +42,7 @@ #include "bif.h" #include "big.h" #include "erl_binary.h" +#include "bif.h" erts_atomic_t erts_ets_misc_mem_size; @@ -64,6 +65,11 @@ do { \ } \ }while(0) +#define DB_GET_APPROX_NITEMS(DB) \ + erts_flxctr_read_approx(&(DB)->common.counters, ERTS_DB_TABLE_NITEMS_COUNTER_ID) +#define DB_GET_APPROX_MEM_CONSUMED(DB) \ + erts_flxctr_read_approx(&(DB)->common.counters, ERTS_DB_TABLE_MEM_COUNTER_ID) + static BIF_RETTYPE db_bif_fail(Process* p, Uint freason, Uint bif_ix, Export* bif_exp) { @@ -398,8 +404,9 @@ static void free_dbtable(void *vtb) { DbTable *tb = (DbTable *) vtb; - - ASSERT(erts_atomic_read_nob(&tb->common.memory_size) == sizeof(DbTable)); + ASSERT(erts_flxctr_is_snapshot_ongoing(&tb->common.counters) || + sizeof(DbTable) == erts_flxctr_read_approx(&tb->common.counters, + ERTS_DB_TABLE_MEM_COUNTER_ID)); erts_rwmtx_destroy(&tb->common.rwlock); erts_mtx_destroy(&tb->common.fixlock); @@ -408,7 +415,8 @@ free_dbtable(void *vtb) if (tb->common.btid) erts_bin_release(tb->common.btid); - erts_db_free(ERTS_ALC_T_DB_TABLE, tb, (void *) tb, sizeof(DbTable)); + erts_flxctr_destroy(&tb->common.counters, ERTS_ALC_T_DB_TABLE); + erts_free(ERTS_ALC_T_DB_TABLE, tb); } static void schedule_free_dbtable(DbTable* tb) @@ -1731,12 +1739,16 @@ BIF_RETTYPE ets_new_2(BIF_ALIST_2) */ { DbTable init_tb; - - erts_atomic_init_nob(&init_tb.common.memory_size, 0); + erts_flxctr_init(&init_tb.common.counters, 0, 2, ERTS_ALC_T_DB_TABLE); tb = (DbTable*) erts_db_alloc(ERTS_ALC_T_DB_TABLE, &init_tb, sizeof(DbTable)); - erts_atomic_init_nob(&tb->common.memory_size, - erts_atomic_read_nob(&init_tb.common.memory_size)); + erts_flxctr_init(&tb->common.counters, + status & DB_CA_ORDERED_SET, + 2, + ERTS_ALC_T_DB_TABLE); + erts_flxctr_add(&tb->common.counters, + ERTS_DB_TABLE_MEM_COUNTER_ID, + DB_GET_APPROX_MEM_CONSUMED(&init_tb)); } tb->common.meth = meth; @@ -1750,8 +1762,6 @@ BIF_RETTYPE ets_new_2(BIF_ALIST_2) tb->common.owner = BIF_P->common.id; set_heir(BIF_P, tb, heir, heir_data); - erts_atomic_init_nob(&tb->common.nitems, 0); - tb->common.fixing_procs = NULL; tb->common.compress = is_compressed; #ifdef ETS_DBG_FORCE_TRAP @@ -2128,19 +2138,18 @@ BIF_RETTYPE ets_internal_delete_all_2(BIF_ALIST_2) { SWord initial_reds = ERTS_BIF_REDS_LEFT(BIF_P); SWord reds = initial_reds; - Eterm nitems; + Eterm nitems_holder = THE_NON_VALUE; DbTable* tb; - CHECK_TABLES(); DB_BIF_GET_TABLE(tb, DB_WRITE, LCK_WRITE, BIF_ets_internal_delete_all_2); if (BIF_ARG_2 == am_undefined) { - nitems = erts_make_integer(erts_atomic_read_nob(&tb->common.nitems), - BIF_P); - - reds = tb->common.meth->db_delete_all_objects(BIF_P, tb, reds); - + reds = tb->common.meth->db_delete_all_objects(BIF_P, + tb, + reds, + &nitems_holder); + ASSERT(nitems_holder != THE_NON_VALUE); ASSERT(!(tb->common.status & DB_BUSY)); if (reds < 0) { @@ -2159,7 +2168,7 @@ BIF_RETTYPE ets_internal_delete_all_2(BIF_ALIST_2) db_unlock(tb, LCK_WRITE); BUMP_ALL_REDS(BIF_P); BIF_TRAP2(bif_export[BIF_ets_internal_delete_all_2], BIF_P, - BIF_ARG_1, nitems); + BIF_ARG_1, nitems_holder); } else { /* Done, no trapping needed */ @@ -2169,15 +2178,19 @@ BIF_RETTYPE ets_internal_delete_all_2(BIF_ALIST_2) } else { /* - * The table lookup succeeded and second argument is nitems + * The table lookup succeeded and second argument is nitems_holder * and not 'undefined', which means we have trapped at least once * and are now done. */ - nitems = BIF_ARG_2; + nitems_holder = BIF_ARG_2; } - db_unlock(tb, LCK_WRITE); + { + Eterm nitems = + tb->common.meth->db_delete_all_objects_get_nitems_from_holder(BIF_P, + nitems_holder); BIF_RET(nitems); + } } static void delete_all_objects_continue(Process* p, DbTable* tb) @@ -2190,7 +2203,7 @@ static void delete_all_objects_continue(Process* p, DbTable* tb) if ((tb->common.status & (DB_DELETE|DB_BUSY)) != DB_BUSY) return; - reds = tb->common.meth->db_delete_all_objects(p, tb, reds); + reds = tb->common.meth->db_delete_all_objects(p, tb, reds, NULL); if (reds < 0) { BUMP_ALL_REDS(p); @@ -3277,13 +3290,29 @@ BIF_RETTYPE ets_info_1(BIF_ALIST_1) int i; Eterm* hp; Uint freason; + Sint size = -1; + Sint memory = -1; + Eterm table; + int is_ctrs_read_result_set = 0; /*Process* rp = NULL;*/ /* If/when we implement lockless private tables: Eterm owner; */ - - if ((tb = db_get_table(BIF_P, BIF_ARG_1, DB_INFO, LCK_READ, &freason)) == NULL) { - if (freason == BADARG && (is_atom(BIF_ARG_1) || is_ref(BIF_ARG_1))) + if(is_tuple(BIF_ARG_1) && + is_tuple_arity(BIF_ARG_1, 2) && + erts_flxctr_is_snapshot_result(tuple_val(BIF_ARG_1)[1])) { + Eterm counter_read_result = tuple_val(BIF_ARG_1)[1]; + table = tuple_val(BIF_ARG_1)[2]; + size = erts_flxctr_get_snapshot_result_after_trap(counter_read_result, + ERTS_DB_TABLE_NITEMS_COUNTER_ID); + memory = erts_flxctr_get_snapshot_result_after_trap(counter_read_result, + ERTS_DB_TABLE_MEM_COUNTER_ID); + is_ctrs_read_result_set = 1; + } else { + table = BIF_ARG_1; + } + if ((tb = db_get_table(BIF_P, table, DB_INFO, LCK_READ, &freason)) == NULL) { + if (freason == BADARG && (is_atom(table) || is_ref(table))) BIF_RET(am_undefined); else return db_bif_fail(BIF_P, freason, BIF_ets_info_1, NULL); @@ -3314,9 +3343,35 @@ BIF_RETTYPE ets_info_1(BIF_ALIST_1) BIF_ERROR(BIF_P, BADARG); } }*/ + + if (!is_ctrs_read_result_set) { + ErtsFlxCtrSnapshotResult res = + erts_flxctr_snapshot(&tb->common.counters, ERTS_ALC_T_DB_TABLE, BIF_P); + if (ERTS_FLXCTR_GET_RESULT_AFTER_TRAP == res.type) { + Eterm tuple; + db_unlock(tb, LCK_READ); + hp = HAlloc(BIF_P, 3); + tuple = TUPLE2(hp, res.trap_resume_state, table); + BIF_TRAP1(bif_export[BIF_ets_info_1], BIF_P, tuple); + } else if (res.type == ERTS_FLXCTR_TRY_AGAIN_AFTER_TRAP) { + db_unlock(tb, LCK_READ); + BIF_TRAP1(bif_export[BIF_ets_info_1], BIF_P, table); + } else { + size = res.result[ERTS_DB_TABLE_NITEMS_COUNTER_ID]; + memory = res.result[ERTS_DB_TABLE_MEM_COUNTER_ID]; + is_ctrs_read_result_set = 1; + } + } for (i = 0; i < sizeof(fields)/sizeof(Eterm); i++) { - results[i] = table_info(BIF_P, tb, fields[i]); - ASSERT(is_value(results[i])); + if (is_ctrs_read_result_set && am_size == fields[i]) { + results[i] = erts_make_integer(size, BIF_P); + } else if (is_ctrs_read_result_set && am_memory == fields[i]) { + Sint words = (Sint) ((memory + sizeof(Sint) - 1) / sizeof(Sint)); + results[i] = erts_make_integer(words, BIF_P); + } else { + results[i] = table_info(BIF_P, tb, fields[i]); + ASSERT(is_value(results[i])); + } } db_unlock(tb, LCK_READ); @@ -3344,14 +3399,43 @@ BIF_RETTYPE ets_info_2(BIF_ALIST_2) DbTable* tb; Eterm ret = THE_NON_VALUE; Uint freason; - + if (erts_flxctr_is_snapshot_result(BIF_ARG_1)) { + Sint res; + if (am_memory == BIF_ARG_2) { + res = erts_flxctr_get_snapshot_result_after_trap(BIF_ARG_1, + ERTS_DB_TABLE_MEM_COUNTER_ID); + res = (Sint) ((res + sizeof(Sint) - 1) / sizeof(Sint)); + } else { + res = erts_flxctr_get_snapshot_result_after_trap(BIF_ARG_1, + ERTS_DB_TABLE_NITEMS_COUNTER_ID); + } + BIF_RET(erts_make_integer(res, BIF_P)); + } if ((tb = db_get_table(BIF_P, BIF_ARG_1, DB_INFO, LCK_READ, &freason)) == NULL) { if (freason == BADARG && (is_atom(BIF_ARG_1) || is_ref(BIF_ARG_1))) BIF_RET(am_undefined); else return db_bif_fail(BIF_P, freason, BIF_ets_info_2, NULL); } - ret = table_info(BIF_P, tb, BIF_ARG_2); + if (BIF_ARG_2 == am_size || BIF_ARG_2 == am_memory) { + ErtsFlxCtrSnapshotResult res = + erts_flxctr_snapshot(&tb->common.counters, ERTS_ALC_T_DB_TABLE, BIF_P); + if (ERTS_FLXCTR_GET_RESULT_AFTER_TRAP == res.type) { + db_unlock(tb, LCK_READ); + BIF_TRAP2(bif_export[BIF_ets_info_2], BIF_P, res.trap_resume_state, BIF_ARG_2); + } else if (res.type == ERTS_FLXCTR_TRY_AGAIN_AFTER_TRAP) { + db_unlock(tb, LCK_READ); + BIF_TRAP2(bif_export[BIF_ets_info_2], BIF_P, BIF_ARG_1, BIF_ARG_2); + } else if (BIF_ARG_2 == am_size) { + ret = erts_make_integer(res.result[ERTS_DB_TABLE_NITEMS_COUNTER_ID], BIF_P); + } else { /* BIF_ARG_2 == am_memory */ + Sint r = res.result[ERTS_DB_TABLE_MEM_COUNTER_ID]; + r = (Sint) ((r + sizeof(Sint) - 1) / sizeof(Sint)); + ret = erts_make_integer(r, BIF_P); + } + } else { + ret = table_info(BIF_P, tb, BIF_ARG_2); + } db_unlock(tb, LCK_READ); if (is_non_value(ret)) { BIF_ERROR(BIF_P, BADARG); @@ -4121,7 +4205,8 @@ static Eterm table_info(Process* p, DbTable* tb, Eterm What) int use_monotonic; if (What == am_size) { - ret = make_small(erts_atomic_read_nob(&tb->common.nitems)); + Uint size = (Uint) (DB_GET_APPROX_NITEMS(tb)); + ret = erts_make_integer(size, p); } else if (What == am_type) { if (tb->common.status & DB_SET) { ret = am_set; @@ -4136,7 +4221,7 @@ static Eterm table_info(Process* p, DbTable* tb, Eterm What) ret = am_bag; } } else if (What == am_memory) { - Uint words = (Uint) ((erts_atomic_read_nob(&tb->common.memory_size) + Uint words = (Uint) ((DB_GET_APPROX_MEM_CONSUMED(tb) + sizeof(Uint) - 1) / sizeof(Uint)); @@ -4294,9 +4379,9 @@ static void print_table(fmtfn_t to, void *to_arg, int show, DbTable* tb) tb->common.meth->db_print(to, to_arg, show, tb); - erts_print(to, to_arg, "Objects: %d\n", (int)erts_atomic_read_nob(&tb->common.nitems)); + erts_print(to, to_arg, "Objects: %d\n", (int)DB_GET_APPROX_NITEMS(tb)); erts_print(to, to_arg, "Words: %bpu\n", - (Uint) ((erts_atomic_read_nob(&tb->common.memory_size) + (Uint) ((DB_GET_APPROX_MEM_CONSUMED(tb) + sizeof(Uint) - 1) / sizeof(Uint))); diff --git a/erts/emulator/beam/erl_db.h b/erts/emulator/beam/erl_db.h index dc77fbb60c..b22f35a5ef 100644 --- a/erts/emulator/beam/erl_db.h +++ b/erts/emulator/beam/erl_db.h @@ -160,7 +160,9 @@ do { \ erts_aint_t sz__ = (((erts_aint_t) (ALLOC_SZ)) \ - ((erts_aint_t) (FREE_SZ))); \ ASSERT((TAB)); \ - erts_atomic_add_nob(&(TAB)->common.memory_size, sz__); \ + erts_flxctr_add(&(TAB)->common.counters, \ + ERTS_DB_TABLE_MEM_COUNTER_ID, \ + sz__); \ } while (0) #define ERTS_ETS_MISC_MEM_ADD(SZ) \ @@ -305,10 +307,10 @@ erts_db_free(ErtsAlcType_t type, DbTable *tab, void *ptr, Uint size) ASSERT(ptr != 0); ASSERT(size == ERTS_ALC_DBG_BLK_SZ(ptr)); ERTS_DB_ALC_MEM_UPDATE_(tab, size, 0); - - ASSERT(((void *) tab) != ptr - || erts_atomic_read_nob(&tab->common.memory_size) == 0); - + ASSERT(((void *) tab) != ptr || + tab->common.counters.is_decentralized || + 0 == erts_flxctr_read_centralized(&tab->common.counters, + ERTS_DB_TABLE_MEM_COUNTER_ID)); erts_free(type, ptr); } diff --git a/erts/emulator/beam/erl_db_catree.c b/erts/emulator/beam/erl_db_catree.c index 0402c6b7b4..962fe4c4f8 100644 --- a/erts/emulator/beam/erl_db_catree.c +++ b/erts/emulator/beam/erl_db_catree.c @@ -149,7 +149,12 @@ static SWord db_free_table_continue_catree(DbTable *tbl, SWord); static void db_foreach_offheap_catree(DbTable *, void (*)(ErlOffHeap *, void *), void *); -static SWord db_delete_all_objects_catree(Process* p, DbTable* tbl, SWord reds); +static SWord db_delete_all_objects_catree(Process* p, + DbTable* tbl, + SWord reds, + Eterm* nitems_holder_wb); +static Eterm db_delete_all_objects_get_nitems_from_holder_catree(Process* p, + Eterm nitems_holder); static int db_lookup_dbterm_catree(Process *, DbTable *, Eterm key, Eterm obj, DbUpdateHandle*); @@ -191,6 +196,7 @@ DbTableMethod db_catree = db_select_replace_continue_catree, db_take_catree, db_delete_all_objects_catree, + db_delete_all_objects_get_nitems_from_holder_catree, db_free_table_catree, db_free_table_continue_catree, db_print_catree, @@ -1357,6 +1363,8 @@ static SWord do_free_base_node_cont(DbTableCATree *tb, SWord num_left) PUSH_NODE(&tb->free_stack_elems, root); root = p; } else { + DEC_NITEMS((DbTable*)tb); + tb->nr_of_deleted_items++; free_term((DbTable*)tb, root); if (--num_left >= 0) { break; @@ -1397,6 +1405,7 @@ int db_create_catree(Process *p, DbTable *tbl) root = create_base_node(tb, NULL); tb->deletion = 0; tb->base_nodes_to_free_list = NULL; + tb->nr_of_deleted_items = 0; erts_atomic_init_relb(&(tb->root), (erts_aint_t)root); return DB_ERROR_NONE; } @@ -2050,6 +2059,7 @@ static SWord db_free_table_continue_catree(DbTable *tbl, SWord reds) PUSH_NODE(&tb->free_stack_rnodes, GET_ROOT(tb)); tb->is_routing_nodes_freed = 0; tb->base_nodes_to_free_list = NULL; + tb->nr_of_deleted_items = 0; } if ( ! tb->is_routing_nodes_freed ) { reds = do_free_routing_nodes_catree_cont(tb, reds); @@ -2079,13 +2089,57 @@ static SWord db_free_table_continue_catree(DbTable *tbl, SWord reds) return 1; } -static SWord db_delete_all_objects_catree(Process* p, DbTable* tbl, SWord reds) +static +int db_catree_nr_of_items_deleted_wb_dtor(Binary *context_bin) { + (void)context_bin; + return 1; +} + +typedef struct { + Uint nr_of_deleted_items; +} DbCATreeNrOfItemsDeletedWb; + +static Eterm +create_and_install_num_of_deleted_items_wb_bin(Process *p, DbTableCATree *tb) +{ + Binary* bin = + erts_create_magic_binary(sizeof(DbCATreeNrOfItemsDeletedWb), + db_catree_nr_of_items_deleted_wb_dtor); + DbCATreeNrOfItemsDeletedWb* data = ERTS_MAGIC_BIN_DATA(bin); + Eterm* hp = HAlloc(p, ERTS_MAGIC_REF_THING_SIZE); + Eterm mref = erts_mk_magic_ref(&hp, &MSO(p), bin); + data->nr_of_deleted_items = 0; + tb->nr_of_deleted_items_wb = bin; + erts_refc_inctest(&bin->intern.refc, 2); + return mref; +} + +static Eterm db_delete_all_objects_get_nitems_from_holder_catree(Process* p, + Eterm mref) { + Binary* bin = erts_magic_ref2bin(mref); + DbCATreeNrOfItemsDeletedWb* data = ERTS_MAGIC_BIN_DATA(bin); + return erts_make_integer(data->nr_of_deleted_items, p); +} + +static SWord db_delete_all_objects_catree(Process* p, + DbTable* tbl, + SWord reds, + Eterm* nitems_holder_wb) +{ + DbTableCATree *tb = &tbl->catree; + DbCATreeNrOfItemsDeletedWb* data; + if (!tb->deletion) { + *nitems_holder_wb = + create_and_install_num_of_deleted_items_wb_bin(p, tb); + } reds = db_free_table_continue_catree(tbl, reds); if (reds < 0) return reds; + data = ERTS_MAGIC_BIN_DATA(tb->nr_of_deleted_items_wb); + data->nr_of_deleted_items = tb->nr_of_deleted_items; + erts_bin_release(tb->nr_of_deleted_items_wb); db_create_catree(p, tbl); - erts_atomic_set_nob(&tbl->catree.common.nitems, 0); return reds; } diff --git a/erts/emulator/beam/erl_db_catree.h b/erts/emulator/beam/erl_db_catree.h index 418837be8e..fde442eaf5 100644 --- a/erts/emulator/beam/erl_db_catree.h +++ b/erts/emulator/beam/erl_db_catree.h @@ -87,6 +87,10 @@ typedef struct db_table_catree { CATreeNodeStack free_stack_rnodes; DbTableCATreeNode *base_nodes_to_free_list; int is_routing_nodes_freed; + /* The fields below are used by delete_all_objects and + select_delete(DeleteAll)*/ + Uint nr_of_deleted_items; + Binary* nr_of_deleted_items_wb; } DbTableCATree; typedef struct { @@ -104,7 +108,6 @@ void db_initialize_catree(void); int db_create_catree(Process *p, DbTable *tbl); - TreeDbTerm** catree_find_root(Eterm key, CATreeRootIterator*); TreeDbTerm** catree_find_next_from_pb_key_root(Eterm key, CATreeRootIterator*); diff --git a/erts/emulator/beam/erl_db_hash.c b/erts/emulator/beam/erl_db_hash.c index f225730029..ceaccf7e44 100644 --- a/erts/emulator/beam/erl_db_hash.c +++ b/erts/emulator/beam/erl_db_hash.c @@ -85,6 +85,14 @@ #include "erl_db_hash.h" +#define ADD_NITEMS(DB, TO_ADD) \ + erts_flxctr_add(&(DB)->common.counters, ERTS_DB_TABLE_NITEMS_COUNTER_ID, TO_ADD) +#define INC_NITEMS(DB) \ + erts_flxctr_inc_read_centralized(&(DB)->common.counters, ERTS_DB_TABLE_NITEMS_COUNTER_ID) +#define DEC_NITEMS(DB) \ + erts_flxctr_dec_read_centralized(&(DB)->common.counters, ERTS_DB_TABLE_NITEMS_COUNTER_ID) +#define RESET_NITEMS(DB) \ + erts_flxctr_reset(&(DB)->common.counters, ERTS_DB_TABLE_NITEMS_COUNTER_ID) /* * The following symbols can be manipulated to "tune" the linear hash array */ @@ -121,7 +129,9 @@ : ((struct segment**) erts_atomic_read_nob(&(tb)->segtab))) #endif #define NACTIVE(tb) ((int)erts_atomic_read_nob(&(tb)->nactive)) -#define NITEMS(tb) ((int)erts_atomic_read_nob(&(tb)->common.nitems)) +#define NITEMS(tb) \ + ((Sint)erts_flxctr_read_centralized(&(tb)->common.counters, \ + ERTS_DB_TABLE_NITEMS_COUNTER_ID)) #define SLOT_IX_TO_SEG_IX(i) (((i)+(EXT_SEGSZ-FIRST_SEGSZ)) >> EXT_SEGSZ_EXP) @@ -444,7 +454,12 @@ static void db_foreach_offheap_hash(DbTable *, void (*)(ErlOffHeap *, void *), void *); -static SWord db_delete_all_objects_hash(Process* p, DbTable* tbl, SWord reds); +static SWord db_delete_all_objects_hash(Process* p, + DbTable* tbl, + SWord reds, + Eterm* nitems_holder_wb); +static Eterm db_delete_all_objects_get_nitems_from_holder_hash(Process* p, + Eterm nitems_holder); #ifdef HARDDEBUG static void db_check_table_hash(DbTableHash *tb); #endif @@ -548,6 +563,7 @@ DbTableMethod db_hash = db_select_replace_continue_hash, db_take_hash, db_delete_all_objects_hash, + db_delete_all_objects_get_nitems_from_holder_hash, db_free_empty_table_hash, db_free_table_continue_hash, db_print_hash, @@ -806,7 +822,7 @@ int db_put_hash(DbTable *tbl, Eterm obj, int key_clash_fail) if (tb->common.status & DB_SET) { HashDbTerm* bnext = b->next; if (is_pseudo_deleted(b)) { - erts_atomic_inc_nob(&tb->common.nitems); + INC_NITEMS(tb); b->pseudo_deleted = 0; } else if (key_clash_fail) { @@ -835,7 +851,7 @@ int db_put_hash(DbTable *tbl, Eterm obj, int key_clash_fail) do { if (db_eq(&tb->common,obj,&q->dbterm)) { if (is_pseudo_deleted(q)) { - erts_atomic_inc_nob(&tb->common.nitems); + INC_NITEMS(tb); q->pseudo_deleted = 0; ASSERT(q->hvalue == hval); if (q != b) { /* must move to preserve key insertion order */ @@ -858,7 +874,7 @@ Lnew: q->pseudo_deleted = 0; q->next = b; *bp = q; - nitems = erts_atomic_inc_read_nob(&tb->common.nitems); + nitems = INC_NITEMS(tb); WUNLOCK_HASH(lck); { int nactive = NACTIVE(tb); @@ -1056,7 +1072,7 @@ int db_erase_hash(DbTable *tbl, Eterm key, Eterm *ret) } WUNLOCK_HASH(lck); if (nitems_diff) { - erts_atomic_add_nob(&tb->common.nitems, nitems_diff); + ADD_NITEMS(tb, nitems_diff); try_shrink(tb); } free_term_list(tb, free_us); @@ -1117,7 +1133,7 @@ static int db_erase_object_hash(DbTable *tbl, Eterm object, Eterm *ret) } WUNLOCK_HASH(lck); if (nitems_diff) { - erts_atomic_add_nob(&tb->common.nitems, nitems_diff); + ADD_NITEMS(tb, nitems_diff); try_shrink(tb); } free_term_list(tb, free_us); @@ -2023,7 +2039,7 @@ static int select_delete_on_match_res(traverse_context_t* ctx_base, Sint slot_ix del->next = ctx->free_us; ctx->free_us = del; } - erts_atomic_dec_nob(&ctx->base.tb->common.nitems); + DEC_NITEMS(ctx->base.tb); return 1; } @@ -2300,7 +2316,7 @@ static int db_take_hash(Process *p, DbTable *tbl, Eterm key, Eterm *ret) } WUNLOCK_HASH(lck); if (nitems_diff) { - erts_atomic_add_nob(&tb->common.nitems, nitems_diff); + ADD_NITEMS(tb, nitems_diff); try_shrink(tb); } free_term_list(tb, free_us); @@ -2360,7 +2376,7 @@ static SWord db_mark_all_deleted_hash(DbTable *tbl, SWord reds) fixdel->slot = NACTIVE(tb) - 1; fixdel->all = 1; fixdel->trap = 0; - erts_atomic_set_nob(&tb->common.nitems, 0); + RESET_NITEMS(tb); return loops < 0 ? 0 : loops / LOOPS_PER_REDUCTION; } @@ -2468,7 +2484,8 @@ static SWord db_free_table_continue_hash(DbTable *tbl, SWord reds) (void*)tb->locks, sizeof(DbTableHashFineLocks)); tb->locks = NULL; } - ASSERT(erts_atomic_read_nob(&tb->common.memory_size) == sizeof(DbTable)); + ASSERT(sizeof(DbTable) == erts_flxctr_read_approx(&tb->common.counters, + ERTS_DB_TABLE_MEM_COUNTER_ID)); return reds; /* Done */ } @@ -3080,7 +3097,7 @@ db_lookup_dbterm_hash(Process *p, DbTable *tbl, Eterm key, Eterm obj, ASSERT(q->hvalue == hval); q->pseudo_deleted = 0; *bp = b = q; - erts_atomic_inc_nob(&tb->common.nitems); + INC_NITEMS(tb); } HRelease(p, hend, htop); @@ -3123,7 +3140,7 @@ db_finalize_dbterm_hash(int cret, DbUpdateHandle* handle) } WUNLOCK_HASH(lck); - erts_atomic_dec_nob(&tb->common.nitems); + DEC_NITEMS(tb); try_shrink(tb); } else { if (handle->flags & DB_MUST_RESIZE) { @@ -3132,7 +3149,7 @@ db_finalize_dbterm_hash(int cret, DbUpdateHandle* handle) } if (handle->flags & DB_INC_TRY_GROW) { int nactive; - int nitems = erts_atomic_inc_read_nob(&tb->common.nitems); + int nitems = INC_NITEMS(tb); WUNLOCK_HASH(lck); nactive = NACTIVE(tb); @@ -3153,8 +3170,17 @@ db_finalize_dbterm_hash(int cret, DbUpdateHandle* handle) return; } -static SWord db_delete_all_objects_hash(Process* p, DbTable* tbl, SWord reds) +static SWord db_delete_all_objects_hash(Process* p, + DbTable* tbl, + SWord reds, + Eterm* nitems_holder_wb) { + if (nitems_holder_wb != NULL) { + Uint nr_of_items = + erts_flxctr_read_centralized(&tbl->common.counters, + ERTS_DB_TABLE_NITEMS_COUNTER_ID); + *nitems_holder_wb = erts_make_integer(nr_of_items, p); + } if (IS_FIXED(tbl)) { reds = db_mark_all_deleted_hash(tbl, reds); } else { @@ -3163,11 +3189,16 @@ static SWord db_delete_all_objects_hash(Process* p, DbTable* tbl, SWord reds) return reds; db_create_hash(p, tbl); - erts_atomic_set_nob(&tbl->hash.common.nitems, 0); + RESET_NITEMS(tbl); } return reds; } +static Eterm db_delete_all_objects_get_nitems_from_holder_hash(Process* p, + Eterm nitems_holder){ + return nitems_holder; +} + void db_foreach_offheap_hash(DbTable *tbl, void (*func)(ErlOffHeap *, void *), void * arg) diff --git a/erts/emulator/beam/erl_db_tree.c b/erts/emulator/beam/erl_db_tree.c index f9ba04f399..492ea81b63 100644 --- a/erts/emulator/beam/erl_db_tree.c +++ b/erts/emulator/beam/erl_db_tree.c @@ -51,9 +51,20 @@ #include "erl_db_tree_util.h" #define GETKEY_WITH_POS(Keypos, Tplp) (*((Tplp) + Keypos)) -#define NITEMS(tb) ((int)erts_atomic_read_nob(&(tb)->common.nitems)) -#define TREE_MAX_ELEMENTS 0xFFFFFFFFUL +#define NITEMS_CENTRALIZED(tb) \ + ((Sint)erts_flxctr_read_centralized(&(tb)->common.counters, \ + ERTS_DB_TABLE_NITEMS_COUNTER_ID)) +#define ADD_NITEMS(DB, TO_ADD) \ + erts_flxctr_add(&(DB)->common.counters, ERTS_DB_TABLE_NITEMS_COUNTER_ID, TO_ADD) +#define INC_NITEMS(DB) \ + erts_flxctr_inc(&(DB)->common.counters, ERTS_DB_TABLE_NITEMS_COUNTER_ID) +#define INC_NITEMS_CENTRALIZED(DB) \ + erts_flxctr_inc_read_centralized(&(DB)->common.counters, ERTS_DB_TABLE_NITEMS_COUNTER_ID) +#define RESET_NITEMS(DB) \ + erts_flxctr_reset(&(DB)->common.counters, ERTS_DB_TABLE_NITEMS_COUNTER_ID) +#define IS_CENTRALIZED_CTR(tb) (!(tb)->common.counters.is_decentralized) +#define APPROX_MEM_CONSUMED(tb) erts_flxctr_read_approx(&(tb)->common.counters, ERTS_DB_TABLE_MEM_COUNTER_ID) #define TOPN_NODE(Dtt, Pos) \ (((Pos) < Dtt->pos) ? \ @@ -296,7 +307,7 @@ int tree_balance_right(TreeDbTerm **this); static int delsub(TreeDbTerm **this); static TreeDbTerm *slot_search(Process *p, TreeDbTerm *root, Sint slot, DbTable *tb, DbTableTree *stack_container, - CATreeRootIterator *iter); + CATreeRootIterator *iter, int* is_EOT); static TreeDbTerm *find_node(DbTableCommon *tb, TreeDbTerm *root, Eterm key, DbTableTree *stack_container); static TreeDbTerm **find_node2(DbTableCommon *tb, TreeDbTerm **root, Eterm key); @@ -433,8 +444,12 @@ static void db_foreach_offheap_tree(DbTable *, void (*)(ErlOffHeap *, void *), void *); -static SWord db_delete_all_objects_tree(Process* p, DbTable* tbl, SWord reds); - +static SWord db_delete_all_objects_tree(Process* p, + DbTable* tbl, + SWord reds, + Eterm* nitems_holder_wb); +static Eterm db_delete_all_objects_get_nitems_from_holder_tree(Process* p, + Eterm nitems_holder); #ifdef HARDDEBUG static void db_check_table_tree(DbTable *tbl); #endif @@ -478,6 +493,7 @@ DbTableMethod db_tree = db_select_replace_continue_tree, db_take_tree, db_delete_all_objects_tree, + db_delete_all_objects_get_nitems_from_holder_tree, db_free_empty_table_tree, db_free_table_continue_tree, db_print_tree, @@ -595,7 +611,8 @@ int db_last_tree_common(Process *p, DbTable *tbl, TreeDbTerm *root, } if (stack) { PUSH_NODE(stack, this); - stack->slot = NITEMS(tbl); + /* Always centralized counters when static stack is used */ + stack->slot = NITEMS_CENTRALIZED(tbl); release_stack(tbl,stack_container,stack); } *ret = db_copy_key(p, tbl, &this->dbterm); @@ -661,10 +678,7 @@ int db_put_tree_common(DbTableCommon *tb, TreeDbTerm **root, Eterm obj, for (;;) if (!*this) { /* Found our place */ state = 1; - if (erts_atomic_inc_read_nob(&tb->nitems) >= TREE_MAX_ELEMENTS) { - erts_atomic_dec_nob(&tb->nitems); - return DB_ERROR_SYSRES; - } + INC_NITEMS(((DbTable*)tb)); *this = new_dbterm(tb, obj); (*this)->balance = 0; (*this)->left = (*this)->right = NULL; @@ -888,7 +902,7 @@ int db_slot_tree_common(Process *p, DbTable *tbl, TreeDbTerm *root, TreeDbTerm *st; Eterm *hp, *hend; Eterm copy; - + int is_EOT = 0; /* * The notion of a "slot" is not natural in a tree, but we try to * simulate it by giving the n'th node in the tree instead. @@ -899,10 +913,10 @@ int db_slot_tree_common(Process *p, DbTable *tbl, TreeDbTerm *root, if (is_not_small(slot_term) || ((slot = signed_val(slot_term)) < 0) || - (slot > NITEMS(tbl))) + (IS_CENTRALIZED_CTR(tbl) && slot > NITEMS_CENTRALIZED(tbl))) return DB_ERROR_BADPARAM; - if (slot == NITEMS(tbl)) { + if (IS_CENTRALIZED_CTR(tbl) && slot == NITEMS_CENTRALIZED(tbl)) { *ret = am_EOT; return DB_ERROR_NONE; } @@ -912,7 +926,11 @@ int db_slot_tree_common(Process *p, DbTable *tbl, TreeDbTerm *root, * are counted from 1 and up. */ ++slot; - st = slot_search(p, root, slot, tbl, stack_container, iter); + st = slot_search(p, root, slot, tbl, stack_container, iter, &is_EOT); + if (is_EOT) { + *ret = am_EOT; + return DB_ERROR_NONE; + } if (st == NULL) { *ret = am_false; return DB_ERROR_UNSPEC; @@ -2244,7 +2262,8 @@ void db_print_tree_common(fmtfn_t to, void *to_arg, erts_print(to, to_arg, "\n" "------------------------------------------------\n"); #else - erts_print(to, to_arg, "Ordered set (AVL tree), Elements: %d\n", NITEMS(tbl)); + erts_print(to, to_arg, "Ordered set (AVL tree), Elements: %d\n", + erts_flxctr_read_approx(&tbl->common.counters, ERTS_DB_TABLE_NITEMS_COUNTER_ID)); #endif } @@ -2281,24 +2300,41 @@ static SWord db_free_table_continue_tree(DbTable *tbl, SWord reds) (DbTable *) tb, (void *) tb->static_stack.array, sizeof(TreeDbTerm *) * STACK_NEED); - ASSERT((erts_atomic_read_nob(&tb->common.memory_size) - == sizeof(DbTable)) || - (erts_atomic_read_nob(&tb->common.memory_size) - == (sizeof(DbTable) + sizeof(DbFixation)))); + ASSERT(erts_flxctr_is_snapshot_ongoing(&tb->common.counters) || + ((APPROX_MEM_CONSUMED(tb) + == sizeof(DbTable)) || + (APPROX_MEM_CONSUMED(tb) + == (sizeof(DbTable) + sizeof(DbFixation))))); } return reds; } -static SWord db_delete_all_objects_tree(Process* p, DbTable* tbl, SWord reds) +static SWord db_delete_all_objects_tree(Process* p, + DbTable* tbl, + SWord reds, + Eterm* nitems_holder_wb) { + if (nitems_holder_wb != NULL) { + Uint nr_of_items = + erts_flxctr_read_centralized(&tbl->common.counters, + ERTS_DB_TABLE_NITEMS_COUNTER_ID); + *nitems_holder_wb = erts_make_integer(nr_of_items, p); + } reds = db_free_table_continue_tree(tbl, reds); if (reds < 0) return reds; db_create_tree(p, tbl); - erts_atomic_set_nob(&tbl->tree.common.nitems, 0); + RESET_NITEMS(tbl); return reds; } +static Eterm db_delete_all_objects_get_nitems_from_holder_tree(Process* p, + Eterm holder) +{ + (void)p; + return holder; +} + static void do_db_tree_foreach_offheap(TreeDbTerm *, void (*)(ErlOffHeap *, void *), void *); @@ -2383,7 +2419,7 @@ static TreeDbTerm *linkout_tree(DbTableCommon *tb, TreeDbTerm **root, tstack[tpos++] = this; state = delsub(this); } - erts_atomic_dec_nob(&tb->nitems); + DEC_NITEMS(((DbTable*)tb)); break; } } @@ -2450,7 +2486,7 @@ static TreeDbTerm *linkout_object_tree(DbTableCommon *tb, TreeDbTerm **root, tstack[tpos++] = this; state = delsub(this); } - erts_atomic_dec_nob(&tb->nitems); + DEC_NITEMS(((DbTable*)tb)); break; } } @@ -2745,7 +2781,8 @@ static int delsub(TreeDbTerm **this) static TreeDbTerm *slot_search(Process *p, TreeDbTerm *root, Sint slot, DbTable *tb, DbTableTree *stack_container, - CATreeRootIterator *iter) + CATreeRootIterator *iter, + int* is_EOT) { TreeDbTerm *this; TreeDbTerm *tmp; @@ -2837,8 +2874,12 @@ static TreeDbTerm *slot_search(Process *p, TreeDbTerm *root, break; next_root: - if (!iter) + if (!iter) { + if (stack->slot == (slot-1)) { + *is_EOT = 1; + } break; /* EOT */ + } ASSERT(slot > stack->slot); if (lastobj) { @@ -2846,8 +2887,12 @@ next_root: lastobj = NULL; } pp = catree_find_next_root(iter, &lastkey); - if (!pp) + if (!pp) { + if (stack->slot == (slot-1)) { + *is_EOT = 1; + } break; /* EOT */ + } root = *pp; stack->pos = 0; find_next(&tb->common, root, stack, lastkey); diff --git a/erts/emulator/beam/erl_db_tree_util.h b/erts/emulator/beam/erl_db_tree_util.h index 02df74678d..ba4a8f79e5 100644 --- a/erts/emulator/beam/erl_db_tree_util.h +++ b/erts/emulator/beam/erl_db_tree_util.h @@ -25,6 +25,8 @@ ** Internal functions and macros used by both the CA tree and the AVL tree */ + +#if defined(ARCH_32) /* ** A stack of this size is enough for an AVL tree with more than ** 0xFFFFFFFF elements. May be subject to change if @@ -34,8 +36,19 @@ ** Where n denotes the number of nodes, h(n) the height of the tree ** with n nodes and log is the binary logarithm. */ - #define STACK_NEED 50 +#elif defined(ARCH_64) +/* +** A stack of this size is enough for an AVL tree with more than +** 2^61 elements. +** The Maximal height of an AVL tree is calculated as above. +*/ +#define STACK_NEED 90 +#else +#error "Unsported architecture" +#endif + + #define PUSH_NODE(Dtt, Tdt) \ ((Dtt)->array[(Dtt)->pos++] = Tdt) @@ -50,6 +63,9 @@ #define EMPTY_NODE(Dtt) (TOP_NODE(Dtt) == NULL) +#define DEC_NITEMS(DB) \ + erts_flxctr_dec(&(DB)->common.counters, ERTS_DB_TABLE_NITEMS_COUNTER_ID) + static ERTS_INLINE void free_term(DbTable *tb, TreeDbTerm* p) { db_free_term(tb, p, offsetof(TreeDbTerm, dbterm)); diff --git a/erts/emulator/beam/erl_db_util.h b/erts/emulator/beam/erl_db_util.h index e3d3c0e804..97f2848679 100644 --- a/erts/emulator/beam/erl_db_util.h +++ b/erts/emulator/beam/erl_db_util.h @@ -21,6 +21,7 @@ #ifndef _DB_UTIL_H #define _DB_UTIL_H +#include "erl_flxctr.h" #include "global.h" #include "erl_message.h" #include "erl_bif_unique.h" @@ -207,8 +208,12 @@ typedef struct db_table_method enum DbIterSafety*); int (*db_take)(Process *, DbTable *, Eterm, Eterm *); - SWord (*db_delete_all_objects)(Process* p, DbTable* db, SWord reds); - + SWord (*db_delete_all_objects)(Process* p, + DbTable* db, + SWord reds, + Eterm* nitems_holder_wb); + Eterm (*db_delete_all_objects_get_nitems_from_holder)(Process* p, + Eterm nitems_holder); int (*db_free_empty_table)(DbTable* db); SWord (*db_free_table_continue)(DbTable* db, SWord reds); @@ -257,6 +262,9 @@ typedef struct { DbTable *prev; } DbTableList; +#define ERTS_DB_TABLE_NITEMS_COUNTER_ID 0 +#define ERTS_DB_TABLE_MEM_COUNTER_ID 1 + /* * This structure contains data for all different types of database * tables. Note that these fields must match the same fields @@ -281,8 +289,11 @@ typedef struct db_table_common { Eterm the_name; /* an atom */ Binary *btid; DbTableMethod* meth; /* table methods */ - erts_atomic_t nitems; /* Total number of items in table */ - erts_atomic_t memory_size;/* Total memory size. NOTE: in bytes! */ + /* The ErtsFlxCtr below contains: + * - Total number of items in table + * - Total memory size (NOTE: in bytes!) */ + ErtsFlxCtr counters; + char extra_for_flxctr[ERTS_FLXCTR_NR_OF_EXTRA_BYTES(2)]; struct { /* Last fixation time */ ErtsMonotonicTime monotonic; ErtsMonotonicTime offset; diff --git a/erts/emulator/beam/erl_flxctr.c b/erts/emulator/beam/erl_flxctr.c new file mode 100644 index 0000000000..35f4a21508 --- /dev/null +++ b/erts/emulator/beam/erl_flxctr.c @@ -0,0 +1,370 @@ +/* + * %CopyrightBegin% + * + * Copyright Ericsson AB 2019. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * %CopyrightEnd% + */ + +/* + * Author: Kjell Winblad + */ + +#include "erl_flxctr.h" + +static int reader_groups_array_size = 0; +#define ERTS_FLXCTR_DECENTRALIZED_NO_SLOTS (reader_groups_array_size) + +static int erts_flxctr_read_ctx_bin_dtor(Binary *context_bin); +static int erts_flxctr_wait_dtor(Binary *context_bin); + +typedef struct { + ErtsThrPrgrLaterOp later_op; + Process* process; + ErtsFlxCtrDecentralizedCtrArray* array; + ErtsFlxCtrDecentralizedCtrArray* next_array; + ErtsAlcType_t alloc_type; + int nr_of_counters; + Sint result[ERTS_FLXCTR_ATOMICS_PER_CACHE_LINE]; +} DecentralizedReadSnapshotInfo; + +typedef enum { + ERTS_FLXCTR_SNAPSHOT_NOT_ONGOING = 0, + ERTS_FLXCTR_SNAPSHOT_ONGOING = 1, + ERTS_FLXCTR_SNAPSHOT_ONGOING_TP_THREAD_DO_FREE = 2 +} erts_flxctr_snapshot_status; + +static void +thr_prg_wake_up_and_count(void* bin_p) +{ + Binary* bin = bin_p; + DecentralizedReadSnapshotInfo* info = ERTS_MAGIC_BIN_DATA(bin); + Process* p = info->process; + ErtsFlxCtrDecentralizedCtrArray* array = info->array; + ErtsFlxCtrDecentralizedCtrArray* next = info->next_array; + int i, sched; + /* Reset result array */ + for (i = 0; i < info->nr_of_counters; i++) { + info->result[i] = 0; + } + /* Read result from snapshot */ + for (sched = 0; sched < ERTS_FLXCTR_DECENTRALIZED_NO_SLOTS; sched++) { + for (i = 0; i < info->nr_of_counters; i++) { + info->result[i] = info->result[i] + + erts_atomic_read_nob(&array->array[sched].counters[i]); + } + } + /* Update the next decentralized counter array */ + for (i = 0; i < info->nr_of_counters; i++) { + erts_atomic_add_nob(&next->array[0].counters[i], info->result[i]); + } + /* Announce that the snapshot is done */ + { + Sint expected = ERTS_FLXCTR_SNAPSHOT_ONGOING; + if (expected != erts_atomic_cmpxchg_mb(&next->snapshot_status, + ERTS_FLXCTR_SNAPSHOT_NOT_ONGOING, + expected)) { + /* The CAS failed which means that this thread need to free the next array. */ + erts_free(info->alloc_type, next->block_start); + } + } + /* Resume the process that requested the snapshot */ + erts_proc_lock(p, ERTS_PROC_LOCK_STATUS); + if (!ERTS_PROC_IS_EXITING(p)) { + erts_resume(p, ERTS_PROC_LOCK_STATUS); + } + /* Free the memory that is no longer needed */ + erts_free(info->alloc_type, array->block_start); + erts_proc_unlock(p, ERTS_PROC_LOCK_STATUS); + erts_proc_dec_refc(p); + erts_bin_release(bin); +} + +typedef struct { + ErtsThrPrgrLaterOp later_op; + Process* process; +} ErtsFlxCtrWakeUpLaterInfo; + +static void +thr_prg_wake_up_later(void* bin_p) +{ + Binary* bin = bin_p; + ErtsFlxCtrWakeUpLaterInfo* info = ERTS_MAGIC_BIN_DATA(bin); + Process* p = info->process; + /* Resume the requesting process */ + erts_proc_lock(p, ERTS_PROC_LOCK_STATUS); + if (!ERTS_PROC_IS_EXITING(p)) { + erts_resume(p, ERTS_PROC_LOCK_STATUS); + } + erts_proc_unlock(p, ERTS_PROC_LOCK_STATUS); + /* Free data */ + erts_proc_dec_refc(p); + erts_bin_release(bin); +} + +static +int erts_flxctr_read_ctx_bin_dtor(Binary *context_bin) { + (void)context_bin; + return 1; +} + +static +int erts_flxctr_wait_dtor(Binary *context_bin) { + (void)context_bin; + return 1; +} + +static void suspend_until_thr_prg(Process* p) +{ + Binary* state_bin; + ErtsFlxCtrWakeUpLaterInfo* info; + state_bin = erts_create_magic_binary(sizeof(ErtsFlxCtrWakeUpLaterInfo), + erts_flxctr_wait_dtor); + info = ERTS_MAGIC_BIN_DATA(state_bin); + info->process = p; + erts_refc_inctest(&state_bin->intern.refc, 1); + erts_suspend(p, ERTS_PROC_LOCK_MAIN, NULL); + erts_proc_inc_refc(p); + ERTS_VBUMP_ALL_REDS(p); + erts_schedule_thr_prgr_later_op(thr_prg_wake_up_later, state_bin, &info->later_op); +} + + +static ErtsFlxCtrDecentralizedCtrArray* +create_decentralized_ctr_array(ErtsAlcType_t alloc_type, Uint nr_of_counters) { + /* Allocate an ErtsFlxCtrDecentralizedCtrArray and make sure that + the array field is located at the start of a cache line */ + char* bytes = + erts_alloc(alloc_type, + sizeof(ErtsFlxCtrDecentralizedCtrArray) + + (sizeof(ErtsFlxCtrDecentralizedCtrArrayElem) * + ERTS_FLXCTR_DECENTRALIZED_NO_SLOTS) + + ERTS_CACHE_LINE_SIZE); + void* block_start = bytes; + int bytes_to_next_cacheline_border; + ErtsFlxCtrDecentralizedCtrArray* array; + int i, sched; + bytes = &bytes[offsetof(ErtsFlxCtrDecentralizedCtrArray, array)]; + bytes_to_next_cacheline_border = + ERTS_CACHE_LINE_SIZE - (((Uint)bytes) % ERTS_CACHE_LINE_SIZE); + array = (ErtsFlxCtrDecentralizedCtrArray*) + (&bytes[bytes_to_next_cacheline_border - + (int)offsetof(ErtsFlxCtrDecentralizedCtrArray, array)]); + ASSERT(((Uint)array->array) % ERTS_CACHE_LINE_SIZE == 0); + ASSERT(((Uint)array - (Uint)block_start) <= ERTS_CACHE_LINE_SIZE); + /* Initialize fields */ + erts_atomic_init_nob(&array->snapshot_status, ERTS_FLXCTR_SNAPSHOT_ONGOING); + for (sched = 0; sched < ERTS_FLXCTR_DECENTRALIZED_NO_SLOTS; sched++) { + for (i = 0; i < nr_of_counters; i++) { + erts_atomic_init_nob(&array->array[sched].counters[i], 0); + } + } + array->block_start = block_start; + return array; +} + +void erts_flxctr_setup(int decentralized_counter_groups) +{ + reader_groups_array_size = decentralized_counter_groups+1; +} + +void erts_flxctr_init(ErtsFlxCtr* c, + int is_decentralized, + Uint nr_of_counters, + ErtsAlcType_t alloc_type) +{ + ASSERT(nr_of_counters <= ERTS_FLXCTR_ATOMICS_PER_CACHE_LINE); + c->is_decentralized = is_decentralized; + c->nr_of_counters = nr_of_counters; + if (c->is_decentralized) { + ErtsFlxCtrDecentralizedCtrArray* array = + create_decentralized_ctr_array(alloc_type, nr_of_counters); + erts_atomic_set_nob(&array->snapshot_status, + ERTS_FLXCTR_SNAPSHOT_NOT_ONGOING); + erts_atomic_init_nob(&c->u.counters_ptr, (Sint)array); + ASSERT(((Uint)array->array) % ERTS_CACHE_LINE_SIZE == 0); + } else { + int i; + for (i = 0; i < nr_of_counters; i++) { + erts_atomic_init_nob(&c->u.counters[i], 0); + } + } +} + +void erts_flxctr_destroy(ErtsFlxCtr* c, ErtsAlcType_t type) +{ + if (c->is_decentralized) { + if (erts_flxctr_is_snapshot_ongoing(c)) { + ErtsFlxCtrDecentralizedCtrArray* array = + ERTS_FLXCTR_GET_CTR_ARRAY_PTR(c); + /* Try to delegate the resposibilty of freeing to + thr_prg_wake_up_and_count */ + Sint expected = ERTS_FLXCTR_SNAPSHOT_ONGOING; + if (expected != + erts_atomic_cmpxchg_mb(&array->snapshot_status, + ERTS_FLXCTR_SNAPSHOT_ONGOING_TP_THREAD_DO_FREE, + expected)) { + /* The delegation was unsuccessful which means that no + snapshot is ongoing anymore and the freeing needs + to be done here */ + ERTS_ASSERT(!erts_flxctr_is_snapshot_ongoing(c)); + erts_free(type, array->block_start); + } + } else { + erts_free(type, ERTS_FLXCTR_GET_CTR_ARRAY_PTR(c)->block_start); + } + } +} + +ErtsFlxCtrSnapshotResult +erts_flxctr_snapshot(ErtsFlxCtr* c, + ErtsAlcType_t alloc_type, + Process* p) +{ + if (c->is_decentralized) { + ErtsFlxCtrDecentralizedCtrArray* array = ERTS_FLXCTR_GET_CTR_ARRAY_PTR(c); + if (erts_flxctr_is_snapshot_ongoing(c)) { + /* Let the caller try again later */ + ErtsFlxCtrSnapshotResult res = + {.type = ERTS_FLXCTR_TRY_AGAIN_AFTER_TRAP}; + suspend_until_thr_prg(p); + return res; + } else { + Eterm* hp; + Binary* state_bin; + Eterm state_mref; + DecentralizedReadSnapshotInfo* info; + ErtsFlxCtrDecentralizedCtrArray* new_array = + create_decentralized_ctr_array(alloc_type, c->nr_of_counters); + int success = + ((Sint)array) == erts_atomic_cmpxchg_mb(&c->u.counters_ptr, + (Sint)new_array, + (Sint)array); + if (!success) { + /* Let the caller try again later */ + ErtsFlxCtrSnapshotResult res = + {.type = ERTS_FLXCTR_TRY_AGAIN_AFTER_TRAP}; + suspend_until_thr_prg(p); + erts_free(alloc_type, new_array->block_start); + return res; + } + /* Create binary with info about the operation that can be + sent to the caller and to a thread progress function */ + state_bin = + erts_create_magic_binary(sizeof(DecentralizedReadSnapshotInfo), + erts_flxctr_read_ctx_bin_dtor); + hp = HAlloc(p, ERTS_MAGIC_REF_THING_SIZE); + state_mref = erts_mk_magic_ref(&hp, &MSO(p), state_bin); + info = ERTS_MAGIC_BIN_DATA(state_bin); + info->alloc_type = alloc_type; + info->array = array; + info->next_array = new_array; + info->process = p; + info->nr_of_counters = c->nr_of_counters; + erts_proc_inc_refc(p); + erts_refc_inctest(&state_bin->intern.refc, 2); + erts_suspend(p, ERTS_PROC_LOCK_MAIN, NULL); + ERTS_VBUMP_ALL_REDS(p); + erts_schedule_thr_prgr_later_op(thr_prg_wake_up_and_count, + state_bin, + &info->later_op); + { + ErtsFlxCtrSnapshotResult res = { + .type = ERTS_FLXCTR_GET_RESULT_AFTER_TRAP, + .trap_resume_state = state_mref}; + return res; + } + } + } else { + ErtsFlxCtrSnapshotResult res; + int i; + res.type = ERTS_FLXCTR_DONE; + for (i = 0; i < c->nr_of_counters; i++){ + res.result[i] = erts_flxctr_read_centralized(c, i); + } + return res; + } +} + + +Sint erts_flxctr_get_snapshot_result_after_trap(Eterm result_holder, + Uint counter_nr) +{ + Binary* bin = erts_magic_ref2bin(result_holder); + DecentralizedReadSnapshotInfo* data = ERTS_MAGIC_BIN_DATA(bin);; + return data->result[counter_nr]; +} + +int erts_flxctr_is_snapshot_result(Eterm term) +{ + if (is_internal_magic_ref(term)) { + Binary* bin = erts_magic_ref2bin(term); + return ERTS_MAGIC_BIN_DESTRUCTOR(bin) == erts_flxctr_read_ctx_bin_dtor; + } else return 0; +} + +Sint erts_flxctr_read_approx(ErtsFlxCtr* c, + Uint counter_nr) +{ + if (c->is_decentralized) { + ErtsFlxCtrDecentralizedCtrArray* counter = ERTS_FLXCTR_GET_CTR_ARRAY_PTR(c); + Sint sum = 0; + int sched; + for (sched = 0; sched < ERTS_FLXCTR_DECENTRALIZED_NO_SLOTS; sched++) { + sum = sum + erts_atomic_read_nob(&counter->array[sched].counters[counter_nr]); + } + return sum; + } else { + return erts_flxctr_read_centralized(c, counter_nr); + } +} + +int erts_flxctr_is_snapshot_ongoing(ErtsFlxCtr* c) +{ + return c->is_decentralized && + (ERTS_FLXCTR_SNAPSHOT_NOT_ONGOING != + erts_atomic_read_acqb(&ERTS_FLXCTR_GET_CTR_ARRAY_PTR(c)->snapshot_status)); +} + +int erts_flxctr_suspend_until_thr_prg_if_snapshot_ongoing(ErtsFlxCtr* c, Process* p) +{ + if (erts_flxctr_is_snapshot_ongoing(c)) { + suspend_until_thr_prg(p); + return 1; + } else { + return 0; + } +} + +void erts_flxctr_reset(ErtsFlxCtr* c, + Uint counter_nr) +{ + if (c->is_decentralized) { + int sched; + ErtsFlxCtrDecentralizedCtrArray* counter = + ERTS_FLXCTR_GET_CTR_ARRAY_PTR(c); + for (sched = 0; sched < ERTS_FLXCTR_DECENTRALIZED_NO_SLOTS; sched++) { + erts_atomic_set_nob(&counter->array[sched].counters[counter_nr], 0); + } + } else { + erts_atomic_set_nob(&c->u.counters[counter_nr], 0); + } +} + + +void erts_flxctr_set_slot(int group) { + ErtsSchedulerData *esdp = erts_get_scheduler_data(); + esdp->flxctr_slot_no = group; +} diff --git a/erts/emulator/beam/erl_flxctr.h b/erts/emulator/beam/erl_flxctr.h new file mode 100644 index 0000000000..5cab02b9eb --- /dev/null +++ b/erts/emulator/beam/erl_flxctr.h @@ -0,0 +1,406 @@ +/* + * %CopyrightBegin% + * + * Copyright Ericsson AB 2019. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * %CopyrightEnd% + */ + +/** + * @file erl_flxctr.h + * + * @brief This file contains the API of a flexible counter. The + * counter can be configured during its initialization to be + * centralized or decentralized. The centralized configuration makes + * it possible to read the counter value extremely efficiently, but + * updates of the counter value can easily cause contention. The + * decentralized configuration has the reverse trade-off (i.e., + * updates are efficient and scalable but reading the counter value is + * slow and may cause contention). + * + * @author Kjell Winblad + */ + +#ifndef ERL_FLXCTR_H__ +#define ERL_FLXCTR_H__ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif +#include "sys.h" +#include "erl_vm.h" +#include "global.h" +#include "error.h" +#include "bif.h" +#include "big.h" +#include "erl_binary.h" +#include "bif.h" +#include <stddef.h> + +/* Public Interface */ + +#define ERTS_MAX_FLXCTR_GROUPS 256 +#define ERTS_FLXCTR_ATOMICS_PER_CACHE_LINE (ERTS_CACHE_LINE_SIZE / sizeof(erts_atomic_t)) + +typedef struct { + int nr_of_counters; + int is_decentralized; + union { + erts_atomic_t counters_ptr; + erts_atomic_t counters[1]; + } u; +} ErtsFlxCtr; + +#define ERTS_FLXCTR_NR_OF_EXTRA_BYTES(NR_OF_COUNTERS) \ + ((NR_OF_COUNTERS-1) * sizeof(erts_atomic_t)) + +/* Called by early_init */ +void erts_flxctr_setup(int decentralized_counter_groups); + +/** + * @brief Initializes an ErtsFlxCtr. The macro + * ERTS_FLXCTR_NR_OF_EXTRA_BYTES should be used to determine how much + * extra space that needs to be allocated directly after the + * ErtsFlxCtr when is_decentralized is set to zero. Each ErtsFlxCtr + * instance may contain up to ERTS_FLXCTR_ATOMICS_PER_CACHE_LINE + * counters. These counters are numbered from zero to + * (ERTS_FLXCTR_ATOMICS_PER_CACHE_LINE-1). Most of the functions in + * this module take a parameter named counter_nr that controls which + * of the ERTS_FLXCTR_ATOMICS_PER_CACHE_LINE counters in the given + * ErtsFlxCtr that should be operated on. + * + * @param c The counter to initialize + * @param is_decentralized Non-zero value to make c decentralized + * @param nr_of_counters The number of counters included in c + * (max ERTS_FLXCTR_ATOMICS_PER_CACHE_LINE) + * @param alloc_type + */ +void erts_flxctr_init(ErtsFlxCtr* c, + int is_decentralized, + Uint nr_of_counters, + ErtsAlcType_t alloc_type); + +/** + * @brief Destroys an initialized counter. + * + * @param c The counter that should be destroyed + * @param alloc_type The allocation type (needs to be the same as the + * one passed to erts_flxctr_init when c was + * initialized) + */ +void erts_flxctr_destroy(ErtsFlxCtr* c, ErtsAlcType_t alloc_type); + +/** + * @brief Adds to_add to the counter with counter_nr in c + * + * @param c the ErtsFlxCtr to operate on + * @param counter_nr The number of the counter in c to modify + * @param to_add The amount that should be added to the specified counter + */ +ERTS_GLB_INLINE +void erts_flxctr_add(ErtsFlxCtr* c, + Uint counter_nr, + int to_add); + +/** + * @brief Increases the specified counter by 1 + * + * @param c The ErtsFlxCtr instance to operate on + * @param counter_nr The number of the counter within c to operate on + */ +ERTS_GLB_INLINE +void erts_flxctr_inc(ErtsFlxCtr* c, + Uint counter_nr); + +/** + * @brief Decreases the specified counter by 1 + */ +ERTS_GLB_INLINE +void erts_flxctr_dec(ErtsFlxCtr* c, + Uint counter_nr); + +/** + * @brief This function tries to return the current value of the + * specified counter but may return an incorrect result if the counter + * is decentralized and other threads are accessing the counter + * concurrently. + * + * @param c The ErtsFlxCtr instance to operate on + * @param counter_nr The number of the counter within c to operate on + * + * @return A snapshot of the specifed counter if c is centralized or a + * possibly incorrect estimate of the counter value if c is + * decentralized + */ +Sint erts_flxctr_read_approx(ErtsFlxCtr* c, + Uint counter_nr); + +/** + * @brief This function can only be used together with an ErtsFlxCtr + * that is configured to be centralized. The function increments the + * specified counter by 1 and returns the value of the counter after + * the increment. + */ +ERTS_GLB_INLINE +Sint erts_flxctr_inc_read_centralized(ErtsFlxCtr* c, + Uint counter_nr); + +/** + * @brief This function can only be used together with a ErtsFlxCtr + * that is configured to be centralized. The function decrements the + * specified counter by 1 and returns the value of the counter after + * the operation. + */ +ERTS_GLB_INLINE +Sint erts_flxctr_dec_read_centralized(ErtsFlxCtr* c, + Uint counter_nr); + +/** + * @brief This function can only be used together with an ErtsFlxCtr + * that is configured to be centralized. The function returns the + * current value of the specified counter. + */ +ERTS_GLB_INLINE +Sint erts_flxctr_read_centralized(ErtsFlxCtr* c, + Uint counter_nr); + + +typedef enum { + ERTS_FLXCTR_TRY_AGAIN_AFTER_TRAP, + ERTS_FLXCTR_DONE, + ERTS_FLXCTR_GET_RESULT_AFTER_TRAP +} ErtsFlxctrSnapshotResultType; + +typedef struct { + ErtsFlxctrSnapshotResultType type; + Eterm trap_resume_state; + Sint result[ERTS_FLXCTR_ATOMICS_PER_CACHE_LINE]; +} ErtsFlxCtrSnapshotResult; + +/** + * @brief This function initiates an atomic snapshot of an ErtsFlxCtr + * to read out the values of one or more of the counters that are + * stored in the given ErtsFlxCtr. The caller needs to perform + * different actions after the return of this function depending on + * the value of the type field in the returned struct: + * + * - The caller needs to trap and try again after the trap if the + * return value has the type ERTS_FLXCTR_TRY_AGAIN_AFTER_TRAP. + * + * - The caller can get the result directly from the result field of + * the returned struct if the return value has the type + * ERTS_FLXCTR_DONE. The value at index i in the result field + * correspond to counter number i. + * + * - Finally, if the return value has the type + * ERTS_FLXCTR_GET_RESULT_AFTER_TRAP, then the caller needs to save + * the value of the field trap_resume_state from the returned struct + * and trap. After the trap, the values of the counters can be + * obtained by using the function + * erts_flxctr_get_snapshot_result_after_trap. Note that the + * function erts_flxctr_is_snapshot_result can be used to check if a + * value is obtained from the trap_resume_state field in the + * returned struct (this can be useful when the calling function + * wakes up again after the trap). + * + * The snapshot operation that is initiated by this function should be + * considered to be ongoing from the issuing of this function until a + * struct with the type field set to ERTS_FLXCTR_DONE has been + * returned from the function or until the caller of this function has + * woken up after trapping. + * + * @param c The ErtsFlxCtr that the snapshot shall be taken from + * @param alloc_type The allocation type (needs to be the same as the + * type passed to erts_flxctr_init when c was + * initialized) + * @param p The Erlang process that is doing the call + * + * @return See the description above + * + */ +ErtsFlxCtrSnapshotResult +erts_flxctr_snapshot(ErtsFlxCtr* c, + ErtsAlcType_t alloc_type, + Process* p); + +/** + * @brief Checks if the parameter term is a snapshot result (i.e., + * something obtained from the trap_resume_state field of an + * ErtsFlxCtrSnapshotResult struct that has been returned from + * erts_flxctr_snapshot). + * + * @param term The term to check + * + * @return A nonzero value iff the term is a snapshot result + */ +int erts_flxctr_is_snapshot_result(Eterm term); + +/** + * @brief Returns the result of a snapshot for a counter given a + * snapshot result returned by a call to erts_flxctr_snapshot (i.e., + * the value stored in the trap_resume_state field of a struct + * returned by erts_flxctr_snapshot). The caller needs to trap between + * the return of erts_flxctr_snapshot and the call to this function. + */ +Sint erts_flxctr_get_snapshot_result_after_trap(Eterm trap_resume_state, + Uint counter_nr); + +/** + * @brief Resets the specified counter to 0. This function is unsafe + * to call while a snapshot operation may be active (initiated with + * the erts_flxctr_snapshot function). + */ +void erts_flxctr_reset(ErtsFlxCtr* c, + Uint counter_nr); + +/** + * @brief Checks if a snapshot operation is active (snapshots are + * initiated with the erts_flxctr_snapshot function). + * + * @return nonzero value iff a snapshot was active at some point + * between the invocation and return of the function + */ +int erts_flxctr_is_snapshot_ongoing(ErtsFlxCtr* c); + +/** + * @brief This function checks if a snapshot operation is ongoing + * (snapshots are initiated with the erts_flxctr_snapshot function) + * and suspend the given process until thread progress has happened if + * it detected an ongoing snapshot operation. The caller needs to trap + * if a non-zero value is returned. + * + * @param c The ErtsFlxCtr to check + * @param p The calling process + * + * @return nonzero value if the given process has got suspended + */ +int erts_flxctr_suspend_until_thr_prg_if_snapshot_ongoing(ErtsFlxCtr* c, Process* p); + +/* End: Public Interface */ + +/* Internal Declarations */ + +#define ERTS_FLXCTR_GET_CTR_ARRAY_PTR(C) \ + ((ErtsFlxCtrDecentralizedCtrArray*) erts_atomic_read_acqb(&(C)->u.counters_ptr)) +#define ERTS_FLXCTR_GET_CTR_PTR(C, SCHEDULER_ID, COUNTER_ID) \ + &(ERTS_FLXCTR_GET_CTR_ARRAY_PTR(C))->array[SCHEDULER_ID].counters[COUNTER_ID] + + +typedef union { + erts_atomic_t counters[ERTS_FLXCTR_ATOMICS_PER_CACHE_LINE]; + char pad[ERTS_CACHE_LINE_SIZE]; +} ErtsFlxCtrDecentralizedCtrArrayElem; + +typedef struct ErtsFlxCtrDecentralizedCtrArray { + void* block_start; + erts_atomic_t snapshot_status; + ErtsFlxCtrDecentralizedCtrArrayElem array[]; +} ErtsFlxCtrDecentralizedCtrArray; + +void erts_flxctr_set_slot(int group); + +ERTS_GLB_INLINE +int erts_flxctr_get_slot_index(void); + +/* End: Internal Declarations */ + + +/* Implementation of inlined functions */ + +#if ERTS_GLB_INLINE_INCL_FUNC_DEF + +ERTS_GLB_INLINE +int erts_flxctr_get_slot_index(void) +{ + ErtsSchedulerData *esdp = erts_get_scheduler_data(); + ASSERT(esdp && !ERTS_SCHEDULER_IS_DIRTY(esdp)); + ASSERT(esdp->flxctr_slot_no > 0); + return esdp->flxctr_slot_no; +} + +ERTS_GLB_INLINE +void erts_flxctr_add(ErtsFlxCtr* c, + Uint counter_nr, + int to_add) +{ + ASSERT(counter_nr < c->nr_of_counters); + if (c->is_decentralized) { + erts_atomic_add_nob(ERTS_FLXCTR_GET_CTR_PTR(c, + erts_flxctr_get_slot_index(), + counter_nr), + to_add); + } else { + erts_atomic_add_nob(&c->u.counters[counter_nr], to_add); + } +} + +ERTS_GLB_INLINE +void erts_flxctr_inc(ErtsFlxCtr* c, + Uint counter_nr) +{ + ASSERT(counter_nr < c->nr_of_counters); + if (c->is_decentralized) { + erts_atomic_inc_nob(ERTS_FLXCTR_GET_CTR_PTR(c, + erts_flxctr_get_slot_index(), + counter_nr)); + } else { + erts_atomic_inc_read_nob(&c->u.counters[counter_nr]); + } +} + +ERTS_GLB_INLINE +void erts_flxctr_dec(ErtsFlxCtr* c, + Uint counter_nr) +{ + ASSERT(counter_nr < c->nr_of_counters); + if (c->is_decentralized) { + erts_atomic_dec_nob(ERTS_FLXCTR_GET_CTR_PTR(c, + erts_flxctr_get_slot_index(), + counter_nr)); + } else { + erts_atomic_dec_nob(&c->u.counters[counter_nr]); + } +} + +ERTS_GLB_INLINE +Sint erts_flxctr_inc_read_centralized(ErtsFlxCtr* c, + Uint counter_nr) +{ + ASSERT(counter_nr < c->nr_of_counters); + ASSERT(!c->is_decentralized); + return erts_atomic_inc_read_nob(&c->u.counters[counter_nr]); +} + +ERTS_GLB_INLINE +Sint erts_flxctr_dec_read_centralized(ErtsFlxCtr* c, + Uint counter_nr) +{ + ASSERT(counter_nr < c->nr_of_counters); + ASSERT(!c->is_decentralized); + return erts_atomic_dec_read_nob(&c->u.counters[counter_nr]); +} + +ERTS_GLB_INLINE +Sint erts_flxctr_read_centralized(ErtsFlxCtr* c, + Uint counter_nr) +{ + ASSERT(counter_nr < c->nr_of_counters); + ASSERT(!c->is_decentralized); + return erts_atomic_read_nob(&((erts_atomic_t*)(c->u.counters))[counter_nr]); +} + +#endif /* #if ERTS_GLB_INLINE_INCL_FUNC_DEF */ + +#endif /* ERL_FLXCTR_H__ */ diff --git a/erts/emulator/beam/erl_init.c b/erts/emulator/beam/erl_init.c index 12750b9aa6..547e4064a2 100644 --- a/erts/emulator/beam/erl_init.c +++ b/erts/emulator/beam/erl_init.c @@ -593,6 +593,7 @@ void erts_usage(void) erts_fprintf(stderr, " no_time_warp|single_time_warp|multi_time_warp\n"); erts_fprintf(stderr, "-d don't write a crash dump for internally detected errors\n"); erts_fprintf(stderr, " (halt(String) will still produce a crash dump)\n"); + erts_fprintf(stderr, "-dcg set the limit for the number of decentralized counter groups\n"); erts_fprintf(stderr, "-fn[u|a|l] Control how filenames are interpreted\n"); erts_fprintf(stderr, "-hms size set minimum heap size in words (default %d)\n", H_DEFAULT_SIZE); @@ -785,6 +786,8 @@ early_init(int *argc, char **argv) /* int dirty_io_scheds; int max_reader_groups; int reader_groups; + int max_decentralized_counter_groups; + int decentralized_counter_groups; char envbuf[21]; /* enough for any 64-bit integer */ size_t envbufsz; @@ -804,7 +807,8 @@ early_init(int *argc, char **argv) /* erts_initialized = 0; - erts_pre_early_init_cpu_topology(&max_reader_groups, + erts_pre_early_init_cpu_topology(&max_decentralized_counter_groups, + &max_reader_groups, &ncpu, &ncpuonln, &ncpuavail); @@ -865,6 +869,24 @@ early_init(int *argc, char **argv) /* } if (argv[i][0] == '-') { switch (argv[i][1]) { + case 'd': { + char *sub_param = argv[i]+2; + if (has_prefix("cg", sub_param)) { + char *arg = get_arg(sub_param+2, argv[i+1], &i); + if (sscanf(arg, "%d", &max_decentralized_counter_groups) != 1) { + erts_fprintf(stderr, + "bad decentralized counter groups limit: %s\n", arg); + erts_usage(); + } + if (max_decentralized_counter_groups < 0) { + erts_fprintf(stderr, + "bad decentralized counter groups limit: %d\n", + max_decentralized_counter_groups); + erts_usage(); + } + } + break; + } case 'r': { char *sub_param = argv[i]+2; if (has_prefix("g", sub_param)) { @@ -1186,8 +1208,10 @@ early_init(int *argc, char **argv) /* erts_early_init_cpu_topology(no_schedulers, &max_main_threads, max_reader_groups, - &reader_groups); - + &reader_groups, + max_decentralized_counter_groups, + &decentralized_counter_groups); + erts_flxctr_setup(decentralized_counter_groups); { erts_thr_late_init_data_t elid = ERTS_THR_LATE_INIT_DATA_DEF_INITER; elid.mem.std.alloc = ethr_std_alloc; diff --git a/erts/emulator/beam/erl_node_tables.c b/erts/emulator/beam/erl_node_tables.c index 49dea8919b..4eb6c3e214 100644 --- a/erts/emulator/beam/erl_node_tables.c +++ b/erts/emulator/beam/erl_node_tables.c @@ -805,8 +805,7 @@ static int node_table_cmp(void *venp1, void *venp2) { return ((((ErlNode *) venp1)->sysname == ((ErlNode *) venp2)->sysname) && - ((((ErlNode *) venp1)->creation == ((ErlNode *) venp2)->creation) || - (((ErlNode *) venp1)->creation == 0 || ((ErlNode *) venp2)->creation == 0)) + ((((ErlNode *) venp1)->creation == ((ErlNode *) venp2)->creation)) ? 0 : 1); } @@ -977,7 +976,7 @@ static void print_node(void *venp, void *vpndp) if(pndp->sysname == NIL) { erts_print(pndp->to, pndp->to_arg, "Name: %T ", enp->sysname); } - erts_print(pndp->to, pndp->to_arg, " %u", enp->creation); + erts_print(pndp->to, pndp->to_arg, " %d", enp->creation); #ifdef DEBUG erts_print(pndp->to, pndp->to_arg, " (refc=%ld)", erts_refc_read(&enp->refc, 0)); @@ -1020,7 +1019,7 @@ void erts_print_node_info(fmtfn_t to, /* ----------------------------------------------------------------------- */ void -erts_set_this_node(Eterm sysname, Uint32 creation) +erts_set_this_node(Eterm sysname, Uint creation) { ERTS_LC_ASSERT(erts_thr_progress_is_blocking()); ASSERT(2 <= de_refc_read(erts_this_dist_entry, 2)); diff --git a/erts/emulator/beam/erl_node_tables.h b/erts/emulator/beam/erl_node_tables.h index c434926142..aa8af12555 100644 --- a/erts/emulator/beam/erl_node_tables.h +++ b/erts/emulator/beam/erl_node_tables.h @@ -95,6 +95,7 @@ enum dist_entry_state { struct ErtsDistOutputBuf_ { #ifdef DEBUG Uint dbg_pattern; + byte *ext_startp; byte *alloc_endp; #endif ErtsDistOutputBuf *next; @@ -258,7 +259,7 @@ void erts_set_dist_entry_pending(DistEntry *); void erts_set_dist_entry_connected(DistEntry *, Eterm, Uint); ErlNode *erts_find_or_insert_node(Eterm, Uint32, Eterm); void erts_schedule_delete_node(ErlNode *); -void erts_set_this_node(Eterm, Uint32); +void erts_set_this_node(Eterm, Uint); Uint erts_node_table_size(void); void erts_init_node_tables(int); void erts_node_table_info(fmtfn_t, void *); diff --git a/erts/emulator/beam/erl_proc_sig_queue.c b/erts/emulator/beam/erl_proc_sig_queue.c index 4e9f177e51..f58a606d57 100644 --- a/erts/emulator/beam/erl_proc_sig_queue.c +++ b/erts/emulator/beam/erl_proc_sig_queue.c @@ -1019,6 +1019,8 @@ send_gen_exit_signal(Process *c_p, Eterm from_tag, ref_sz = size_object(ref); hsz += ref_sz; + reason_sz = 0; /* Set to silence gcc warning */ + /* The reason was part of the control message, just use copy it into the xsigd */ if (is_value(reason)) { diff --git a/erts/emulator/beam/erl_process.c b/erts/emulator/beam/erl_process.c index 9e662632b4..2b45d2d353 100644 --- a/erts/emulator/beam/erl_process.c +++ b/erts/emulator/beam/erl_process.c @@ -12141,10 +12141,9 @@ erts_proc_exit_handle_dist_monitor(ErtsMonitor *mon, void *vctxt, Sint reds) reason); switch (code) { case ERTS_DSIG_SEND_CONTINUE: + case ERTS_DSIG_SEND_YIELD: erts_set_gc_state(c_p, 0); ctxt->dist_state = erts_dsend_export_trap_context(c_p, &ctx); - /* fall-through */ - case ERTS_DSIG_SEND_YIELD: break; case ERTS_DSIG_SEND_OK: break; @@ -12388,11 +12387,10 @@ erts_proc_exit_handle_dist_link(ErtsLink *lnk, void *vctxt, Sint reds) reason, SEQ_TRACE_TOKEN(c_p)); switch (code) { + case ERTS_DSIG_SEND_YIELD: case ERTS_DSIG_SEND_CONTINUE: erts_set_gc_state(c_p, 0); ctxt->dist_state = erts_dsend_export_trap_context(c_p, &ctx); - /* fall-through */ - case ERTS_DSIG_SEND_YIELD: break; case ERTS_DSIG_SEND_OK: break; diff --git a/erts/emulator/beam/erl_process.h b/erts/emulator/beam/erl_process.h index 711b73417d..6118c671ee 100644 --- a/erts/emulator/beam/erl_process.h +++ b/erts/emulator/beam/erl_process.h @@ -641,6 +641,7 @@ struct ErtsSchedulerData_ { ErtsSchedType type; Uint no; /* Scheduler number for normal schedulers */ Uint dirty_no; /* Scheduler number for dirty schedulers */ + int flxctr_slot_no; /* slot nr when a flxctr is used */ struct enif_environment_t *current_nif; Process *dirty_shadow_process; Port *current_port; @@ -1847,6 +1848,7 @@ int erts_resume_processes(ErtsProcList *); void erts_deep_process_dump(fmtfn_t, void *); Eterm erts_get_reader_groups_map(Process *c_p); +Eterm erts_get_decentralized_counter_groups_map(Process *c_p); Eterm erts_debug_reader_groups_map(Process *c_p, int groups); Uint erts_debug_nbalance(void); diff --git a/erts/emulator/beam/external.c b/erts/emulator/beam/external.c index 471c1c3938..395ff51ad3 100644 --- a/erts/emulator/beam/external.c +++ b/erts/emulator/beam/external.c @@ -51,17 +51,18 @@ #define MAX_STRING_LEN 0xffff -/* - * MAX value for the creation field in pid, port and reference - * for the old PID_EXT, PORT_EXT, REFERENCE_EXT and NEW_REFERENCE_EXT. - * Older nodes (OTP 19-21) will send us these so we must be able to decode them. - * - * From OTP 22 DFLAG_BIG_CREATION is mandatory so this node will always - * encode with new big 32-bit creations using NEW_PID_EXT, NEW_PORT_EXT - * and NEWER_REFERENCE_EXT. +/* MAX value for the creation field in pid, port and reference + for the local node and for the current external format. + + Larger creation values than this are allowed in external pid, port and refs + encoded with NEW_PID_EXT, NEW_PORT_EXT and NEWER_REFERENCE_EXT. + The point here is to prepare for future upgrade to 32-bit creation. + OTP-19 (erts-8.0) can handle big creation values from other (newer) nodes, + but do not use big creation values for the local node yet, + as we still may have to communicate with older nodes. */ -#define ERTS_MAX_TINY_CREATION (3) -#define is_tiny_creation(Cre) ((unsigned)(Cre) <= ERTS_MAX_TINY_CREATION) +#define ERTS_MAX_LOCAL_CREATION (3) +#define is_valid_creation(Cre) ((unsigned)(Cre) <= ERTS_MAX_LOCAL_CREATION) #undef ERTS_DEBUG_USE_DIST_SEP #ifdef DEBUG @@ -699,6 +700,7 @@ dist_ext_size(ErtsDistExternal *edep) } else { sz -= sizeof(ErtsAtomTranslationTable); } + ASSERT(sz % 4 == 0); return sz; } @@ -706,8 +708,9 @@ Uint erts_dist_ext_size(ErtsDistExternal *edep) { Uint sz = dist_ext_size(edep); + sz += 4; /* may need to pad to 8-byte-align ErtsDistExternalData */ sz += edep->data[0].frag_id * sizeof(ErtsDistExternalData); - return sz + ERTS_EXTRA_DATA_ALIGN_SZ(sz); + return sz; } Uint @@ -749,6 +752,8 @@ erts_make_dist_ext_copy(ErtsDistExternal *edep, ErtsDistExternal *new_edep) erts_ref_dist_entry(new_edep->dep); ep += dist_ext_sz; + ep += (UWord)ep & 4; /* 8-byte alignment for ErtsDistExternalData */ + ASSERT((UWord)ep % 8 == 0); new_edep->data = (ErtsDistExternalData*)ep; sys_memzero(new_edep->data, sizeof(ErtsDistExternalData) * edep->data->frag_id); @@ -2379,8 +2384,7 @@ enc_pid(ErtsAtomCacheMap *acmp, Eterm pid, byte* ep, Uint32 dflags) Eterm sysname = ((is_internal_pid(pid) && (dflags & DFLAG_INTERNAL_TAGS)) ? INTERNAL_LOCAL_SYSNAME : pid_node_name(pid)); Uint32 creation = pid_creation(pid); - - *ep++ = NEW_PID_EXT; + byte* tagp = ep++; /* insert atom here containing host and sysname */ ep = enc_atom(acmp, sysname, ep, dflags); @@ -2392,8 +2396,15 @@ enc_pid(ErtsAtomCacheMap *acmp, Eterm pid, byte* ep, Uint32 dflags) ep += 4; put_int32(os, ep); ep += 4; - put_int32(creation, ep); - ep += 4; + if (creation <= ERTS_MAX_LOCAL_CREATION) { + *tagp = PID_EXT; + *ep++ = creation; + } else { + ASSERT(is_external_pid(pid)); + *tagp = NEW_PID_EXT; + put_int32(creation, ep); + ep += 4; + } return ep; } @@ -2513,7 +2524,7 @@ dec_pid(ErtsDistExternal *edep, ErtsHeapFactory* factory, byte* ep, if (tag == PID_EXT) { cre = get_int8(ep); ep += 1; - if (!is_tiny_creation(cre)) { + if (!is_valid_creation(cre)) { return NULL; } } else { @@ -2774,18 +2785,25 @@ enc_term_int(TTBEncodeContext* ctx, ErtsAtomCacheMap *acmp, Eterm obj, byte* ep, Eterm sysname = (((dflags & DFLAG_INTERNAL_TAGS) && is_internal_ref(obj)) ? INTERNAL_LOCAL_SYSNAME : ref_node_name(obj)); Uint32 creation = ref_creation(obj); + byte* tagp = ep++; ASSERT(dflags & DFLAG_EXTENDED_REFERENCES); erts_magic_ref_save_bin(obj); - *ep++ = NEWER_REFERENCE_EXT; i = ref_no_numbers(obj); put_int16(i, ep); ep += 2; ep = enc_atom(acmp, sysname, ep, dflags); - put_int32(creation, ep); - ep += 4; + if (creation <= ERTS_MAX_LOCAL_CREATION) { + *tagp = NEW_REFERENCE_EXT; + *ep++ = creation; + } else { + ASSERT(is_external_ref(obj)); + *tagp = NEWER_REFERENCE_EXT; + put_int32(creation, ep); + ep += 4; + } ref_num = ref_numbers(obj); for (j = 0; j < i; j++) { put_int32(ref_num[j], ep); @@ -2798,14 +2816,21 @@ enc_term_int(TTBEncodeContext* ctx, ErtsAtomCacheMap *acmp, Eterm obj, byte* ep, Eterm sysname = (((dflags & DFLAG_INTERNAL_TAGS) && is_internal_port(obj)) ? INTERNAL_LOCAL_SYSNAME : port_node_name(obj)); Uint32 creation = port_creation(obj); + byte* tagp = ep++; - *ep++ = NEW_PORT_EXT; ep = enc_atom(acmp, sysname, ep, dflags); j = port_number(obj); put_int32(j, ep); ep += 4; - put_int32(creation, ep); - ep += 4; + if (creation <= ERTS_MAX_LOCAL_CREATION) { + *tagp = PORT_EXT; + *ep++ = creation; + } else { + ASSERT(is_external_port(obj)); + *tagp = NEW_PORT_EXT; + put_int32(creation, ep); + ep += 4; + } break; } case LIST_DEF: @@ -3500,7 +3525,7 @@ dec_term_atom_common: if (tag == PORT_EXT) { cre = get_int8(ep); ep++; - if (!is_tiny_creation(cre)) { + if (!is_valid_creation(cre)) { goto error; } } @@ -3547,7 +3572,7 @@ dec_term_atom_common: cre = get_int8(ep); ep += 1; - if (!is_tiny_creation(cre)) { + if (!is_valid_creation(cre)) { goto error; } goto ref_ext_common; @@ -3561,7 +3586,7 @@ dec_term_atom_common: cre = get_int8(ep); ep += 1; - if (!is_tiny_creation(cre)) { + if (!is_valid_creation(cre)) { goto error; } r0 = get_int32(ep); @@ -4259,21 +4284,30 @@ encode_size_struct_int(TTBSizeContext* ctx, ErtsAtomCacheMap *acmp, Eterm obj, result += 1 + 4 + 1 + i; /* tag,size,sign,digits */ break; case EXTERNAL_PID_DEF: + if (external_pid_creation(obj) > ERTS_MAX_LOCAL_CREATION) + result += 3; + /*fall through*/ case PID_DEF: result += (1 + encode_size_struct2(acmp, pid_node_name(obj), dflags) + - 4 + 4 + 4); + 4 + 4 + 1); break; case EXTERNAL_REF_DEF: + if (external_ref_creation(obj) > ERTS_MAX_LOCAL_CREATION) + result += 3; + /*fall through*/ case REF_DEF: ASSERT(dflags & DFLAG_EXTENDED_REFERENCES); i = ref_no_numbers(obj); result += (1 + 2 + encode_size_struct2(acmp, ref_node_name(obj), dflags) + - 4 + 4*i); + 1 + 4*i); break; case EXTERNAL_PORT_DEF: + if (external_port_creation(obj) > ERTS_MAX_LOCAL_CREATION) + result += 3; + /*fall through*/ case PORT_DEF: result += (1 + encode_size_struct2(acmp, port_node_name(obj), dflags) + - 4 + 4); + 4 + 1); break; case LIST_DEF: { int is_str = is_external_string(obj, &m); diff --git a/erts/emulator/beam/external.h b/erts/emulator/beam/external.h index 396cd9f802..f2cc9bf98f 100644 --- a/erts/emulator/beam/external.h +++ b/erts/emulator/beam/external.h @@ -144,14 +144,6 @@ typedef struct erl_dist_external { ErtsAtomTranslationTable attab; } ErtsDistExternal; -#define ERTS_DIST_EXT_SIZE(EDEP) \ - (sizeof(ErtsDistExternal) \ - - (((EDEP)->flags & ERTS_DIST_EXT_ATOM_TRANS_TAB) \ - ? (ASSERT(0 <= (EDEP)->attab.size \ - && (EDEP)->attab.size <= ERTS_ATOM_CACHE_SIZE), \ - sizeof(Eterm)*(ERTS_ATOM_CACHE_SIZE - (EDEP)->attab.size)) \ - : sizeof(ErtsAtomTranslationTable))) - typedef struct { byte *extp; int exttmp; diff --git a/erts/emulator/beam/global.h b/erts/emulator/beam/global.h index f9bbe4167f..4c8d3d3dbe 100644 --- a/erts/emulator/beam/global.h +++ b/erts/emulator/beam/global.h @@ -1216,10 +1216,11 @@ Uint64 erts_timestamp_millis(void); Export* erts_find_function(Eterm, Eterm, unsigned int, ErtsCodeIndex); -void *erts_calc_stacklimit(char *prev_c, UWord stacksize); -int erts_check_below_limit(char *ptr, char *limit); -int erts_check_above_limit(char *ptr, char *limit); -void *erts_ptr_id(void *ptr); +/* ERTS_NOINLINE prevents link-time optimization across modules */ +void *erts_calc_stacklimit(char *prev_c, UWord stacksize) ERTS_NOINLINE; +int erts_check_below_limit(char *ptr, char *limit) ERTS_NOINLINE; +int erts_check_above_limit(char *ptr, char *limit) ERTS_NOINLINE; +void *erts_ptr_id(void *ptr) ERTS_NOINLINE; Eterm store_external_or_ref_in_proc_(Process *, Eterm); Eterm store_external_or_ref_(Uint **, ErlOffHeap*, Eterm); diff --git a/erts/emulator/beam/ops.tab b/erts/emulator/beam/ops.tab index 7a125b0f67..10ca74cd60 100644 --- a/erts/emulator/beam/ops.tab +++ b/erts/emulator/beam/ops.tab @@ -1690,9 +1690,14 @@ i_plus S1=c S2=c Fail Dst => move S1 x | i_plus x S2 Fail Dst i_plus xy xyc j? d -i_minus x x j? d -i_minus c x j? d -i_minus s s j? d +# A minus instruction with a constant right operand will be +# converted to an i_increment instruction, except in guards or +# when the negated value of the constant won't fit in a guard. +# Therefore, it very rare. +i_minus S1 S2=c Fail Dst => move S2 x | i_minus S1 x Fail Dst + +i_minus xy xy j? d +i_minus c xy j? d i_times j? s s d diff --git a/erts/emulator/beam/sys.h b/erts/emulator/beam/sys.h index a6312293cc..c261c8e117 100644 --- a/erts/emulator/beam/sys.h +++ b/erts/emulator/beam/sys.h @@ -63,6 +63,14 @@ # endif #endif +#ifndef ERTS_NOINLINE +# if ERTS_AT_LEAST_GCC_VSN__(3,1,1) +# define ERTS_NOINLINE __attribute__((__noinline__)) +# else +# define ERTS_NOINLINE +# endif +#endif + #if defined(DEBUG) || defined(ERTS_ENABLE_LOCK_CHECK) # undef ERTS_CAN_INLINE # define ERTS_CAN_INLINE 0 |