diff options
Diffstat (limited to 'erts/emulator')
-rw-r--r-- | erts/emulator/beam/beam_emu.c | 1 | ||||
-rw-r--r-- | erts/emulator/beam/bif.tab | 1 | ||||
-rw-r--r-- | erts/emulator/beam/erl_bif_counters.c | 58 | ||||
-rw-r--r-- | erts/emulator/beam/erl_gc.c | 12 | ||||
-rw-r--r-- | erts/emulator/beam/erl_node_tables.c | 21 | ||||
-rw-r--r-- | erts/emulator/test/counters_SUITE.erl | 138 |
6 files changed, 204 insertions, 27 deletions
diff --git a/erts/emulator/beam/beam_emu.c b/erts/emulator/beam/beam_emu.c index ab5920a67e..e909a0b4da 100644 --- a/erts/emulator/beam/beam_emu.c +++ b/erts/emulator/beam/beam_emu.c @@ -579,6 +579,7 @@ init_emulator(void) * the instructions' C labels to the loader. * The second call starts execution of BEAM code. This call never returns. */ +ERTS_NO_RETPOLINE void process_main(Eterm * x_reg_array, FloatDef* f_reg_array) { static int init_done = 0; diff --git a/erts/emulator/beam/bif.tab b/erts/emulator/beam/bif.tab index aa3c3acd9f..d4ba90a61a 100644 --- a/erts/emulator/beam/bif.tab +++ b/erts/emulator/beam/bif.tab @@ -723,4 +723,5 @@ bif atomics:info/1 bif erts_internal:counters_new/1 bif erts_internal:counters_get/2 bif erts_internal:counters_add/3 +bif erts_internal:counters_put/3 bif erts_internal:counters_info/1 diff --git a/erts/emulator/beam/erl_bif_counters.c b/erts/emulator/beam/erl_bif_counters.c index a46b462225..7c8884ba32 100644 --- a/erts/emulator/beam/erl_bif_counters.c +++ b/erts/emulator/beam/erl_bif_counters.c @@ -19,7 +19,7 @@ */ /* - * Purpose: High performance atomics. + * Purpose: The implementation for 'counters' with 'write_concurrency'. */ #ifdef HAVE_CONFIG_H @@ -37,8 +37,17 @@ #include "erl_bif_unique.h" #include "erl_map.h" +/* + * Each logical counter consists of one 64-bit atomic instance per scheduler + * plus one instance for the "base value". + * + * get() reads all atomics for the counter and returns the sum. + * add() reads and writes only its own scheduler specific atomic instance. + * put() reads all scheduler specific atomics and writes a new base value. + */ +#define ATOMICS_PER_COUNTER (erts_no_schedulers + 1) -#define COUNTERS_PER_CACHE_LINE (ERTS_CACHE_LINE_SIZE / sizeof(erts_atomic64_t)) +#define ATOMICS_PER_CACHE_LINE (ERTS_CACHE_LINE_SIZE / sizeof(erts_atomic64_t)) typedef struct { @@ -47,12 +56,12 @@ typedef struct UWord ulen; #endif union { - erts_atomic64_t v[COUNTERS_PER_CACHE_LINE]; + erts_atomic64_t v[ATOMICS_PER_CACHE_LINE]; byte cache_line__[ERTS_CACHE_LINE_SIZE]; } u[1]; }CountersRef; -static int counters_destructor(Binary *unused) +static int counters_destructor(Binary *mbin) { return 1; } @@ -76,10 +85,10 @@ BIF_RETTYPE erts_internal_counters_new_1(BIF_ALIST_1) BIF_ERROR(BIF_P, BADARG); } - if (cnt > (ERTS_UWORD_MAX / (sizeof(erts_atomic64_t)*2*erts_no_schedulers))) + if (cnt > (ERTS_UWORD_MAX / (sizeof(erts_atomic64_t)*2*ATOMICS_PER_COUNTER))) BIF_ERROR(BIF_P, SYSTEM_LIMIT); - cache_lines = erts_no_schedulers * div_ceil(cnt, COUNTERS_PER_CACHE_LINE); + cache_lines = ATOMICS_PER_COUNTER * div_ceil(cnt, ATOMICS_PER_CACHE_LINE); bytes = offsetof(CountersRef, u) + cache_lines * ERTS_CACHE_LINE_SIZE; mbin = erts_create_magic_binary_x(bytes, counters_destructor, @@ -87,12 +96,13 @@ BIF_RETTYPE erts_internal_counters_new_1(BIF_ALIST_1) 0); p = ERTS_MAGIC_BIN_DATA(mbin); p->arity = cnt; + #ifdef DEBUG p->ulen = cache_lines; #endif ASSERT((byte*)&p->u[cache_lines] <= ((byte*)p + bytes)); for (ui=0; ui < cache_lines; ui++) - for (vi=0; vi < COUNTERS_PER_CACHE_LINE; vi++) + for (vi=0; vi < ATOMICS_PER_CACHE_LINE; vi++) erts_atomic64_init_nob(&p->u[ui].v[vi], 0); hp = HAlloc(BIF_P, ERTS_MAGIC_REF_THING_SIZE); return erts_mk_magic_ref(&hp, &MSO(BIF_P), mbin); @@ -120,8 +130,8 @@ static ERTS_INLINE int get_ref_cnt(Eterm ref, Eterm index, UWord ix, ui, vi; if (!get_ref(ref, &p) || !term_to_UWord(index, &ix) || --ix >= p->arity) return 0; - ui = (ix / COUNTERS_PER_CACHE_LINE) * erts_no_schedulers + sched_ix; - vi = ix % COUNTERS_PER_CACHE_LINE; + ui = (ix / ATOMICS_PER_CACHE_LINE) * ATOMICS_PER_COUNTER + sched_ix; + vi = ix % ATOMICS_PER_CACHE_LINE; ASSERT(ui < p->ulen); *pp = p; *app = &p->u[ui].v[vi]; @@ -134,7 +144,8 @@ static ERTS_INLINE int get_ref_my_cnt(Eterm ref, Eterm index, { ErtsSchedulerData *esdp = erts_get_scheduler_data(); ASSERT(esdp && !ERTS_SCHEDULER_IS_DIRTY(esdp)); - return get_ref_cnt(ref, index, pp, app, esdp->no - 1); + ASSERT(esdp->no > 0 && esdp->no < ATOMICS_PER_COUNTER); + return get_ref_cnt(ref, index, pp, app, esdp->no); } static ERTS_INLINE int get_ref_first_cnt(Eterm ref, Eterm index, @@ -172,7 +183,7 @@ BIF_RETTYPE erts_internal_counters_get_2(BIF_ALIST_2) if (!get_ref_first_cnt(BIF_ARG_1, BIF_ARG_2, &p, &ap)) { BIF_ERROR(BIF_P, BADARG); } - for (j = erts_no_schedulers; j ; --j) { + for (j = ATOMICS_PER_COUNTER; j ; --j) { acc += erts_atomic64_read_nob(ap); ap = (erts_atomic64_t*) ((byte*)ap + ERTS_CACHE_LINE_SIZE); } @@ -194,6 +205,31 @@ BIF_RETTYPE erts_internal_counters_add_3(BIF_ALIST_3) return am_ok; } +BIF_RETTYPE erts_internal_counters_put_3(BIF_ALIST_3) +{ + CountersRef* p; + erts_atomic64_t* first_ap; + erts_atomic64_t* ap; + erts_aint64_t acc; + erts_aint64_t val; + int j; + + if (!get_ref_first_cnt(BIF_ARG_1, BIF_ARG_2, &p, &first_ap) + || !term_to_Sint64(BIF_ARG_3, &val)) { + BIF_ERROR(BIF_P, BADARG); + } + + ap = first_ap; + acc = 0; + j = ATOMICS_PER_COUNTER - 1; + do { + ap = (erts_atomic64_t*) ((byte*)ap + ERTS_CACHE_LINE_SIZE); + acc += erts_atomic64_read_nob(ap); + } while (--j); + erts_atomic64_set_nob(first_ap, val-acc); + + return am_ok; +} BIF_RETTYPE erts_internal_counters_info_1(BIF_ALIST_1) { diff --git a/erts/emulator/beam/erl_gc.c b/erts/emulator/beam/erl_gc.c index 47dd115c82..b4df418cd5 100644 --- a/erts/emulator/beam/erl_gc.c +++ b/erts/emulator/beam/erl_gc.c @@ -681,7 +681,7 @@ garbage_collect(Process* p, ErlHeapFragment *live_hf_end, ErtsMonotonicTime start_time; ErtsSchedulerData *esdp = erts_proc_sched_data(p); erts_aint32_t state; - ERTS_MSACC_PUSH_STATE_M(); + ERTS_MSACC_PUSH_STATE(); #ifdef USE_VM_PROBES DTRACE_CHARBUF(pidbuf, DTRACE_TERM_BUF_SIZE); #endif @@ -711,7 +711,7 @@ garbage_collect(Process* p, ErlHeapFragment *live_hf_end, else if (p->live_hf_end != ERTS_INVALID_HFRAG_PTR) live_hf_end = p->live_hf_end; - ERTS_MSACC_SET_STATE_CACHED_M(ERTS_MSACC_STATE_GC); + ERTS_MSACC_SET_STATE_CACHED(ERTS_MSACC_STATE_GC); erts_atomic32_read_bor_nob(&p->state, ERTS_PSFLG_GC); if (erts_system_monitor_long_gc != 0) @@ -759,7 +759,7 @@ garbage_collect(Process* p, ErlHeapFragment *live_hf_end, gc_trace_end_tag = am_gc_minor_end; } else { do_major_collection: - ERTS_MSACC_SET_STATE_CACHED_M_X(ERTS_MSACC_STATE_GC_FULL); + ERTS_MSACC_SET_STATE_CACHED_X(ERTS_MSACC_STATE_GC_FULL); if (IS_TRACED_FL(p, F_TRACE_GC)) { trace_gc(p, am_gc_major_start, need, THE_NON_VALUE); } @@ -770,7 +770,7 @@ do_major_collection: p->flags &= ~(F_DIRTY_MAJOR_GC|F_DIRTY_MINOR_GC); DTRACE2(gc_major_end, pidbuf, reclaimed_now); gc_trace_end_tag = am_gc_major_end; - ERTS_MSACC_SET_STATE_CACHED_M_X(ERTS_MSACC_STATE_GC); + ERTS_MSACC_SET_STATE_CACHED_X(ERTS_MSACC_STATE_GC); } reset_active_writer(p); @@ -800,7 +800,7 @@ do_major_collection: /* We have to make sure that we have space for need on the heap */ res = delay_garbage_collection(p, live_hf_end, need, fcalls); - ERTS_MSACC_POP_STATE_M(); + ERTS_MSACC_POP_STATE(); return res; } @@ -843,7 +843,7 @@ do_major_collection: FLAGS(p) &= ~(F_FORCE_GC|F_HIBERNATED); p->live_hf_end = ERTS_INVALID_HFRAG_PTR; - ERTS_MSACC_POP_STATE_M(); + ERTS_MSACC_POP_STATE(); #ifdef CHECK_FOR_HOLES /* diff --git a/erts/emulator/beam/erl_node_tables.c b/erts/emulator/beam/erl_node_tables.c index f4dc60941a..18ed782ae3 100644 --- a/erts/emulator/beam/erl_node_tables.c +++ b/erts/emulator/beam/erl_node_tables.c @@ -421,8 +421,25 @@ static void schedule_delete_dist_entry(DistEntry* dep) * * Note that timeouts do not guarantee thread progress. */ - erts_schedule_thr_prgr_later_op(start_timer_delete_dist_entry, - dep, &dep->later_op); + ErtsSchedulerData *esdp = erts_get_scheduler_data(); + if (esdp && !ERTS_SCHEDULER_IS_DIRTY(esdp)) { + erts_schedule_thr_prgr_later_op(start_timer_delete_dist_entry, + dep, &dep->later_op); + } else { + /* + * Since OTP 20, it's possible that destructor is executed on + * a dirty scheduler. Aux work cannot be done on a dirty + * scheduler, and scheduling any aux work on a dirty scheduler + * makes the scheduler to loop infinitely. + * To avoid this, make a spot jump: schedule this function again + * on a first normal scheduler. It is guaranteed to be always + * online. Since it's a rare event, this shall not pose a big + * utilisation hit. + */ + erts_schedule_misc_aux_work(1, + (void (*)(void *))schedule_delete_dist_entry, + (void *) dep); + } } static void diff --git a/erts/emulator/test/counters_SUITE.erl b/erts/emulator/test/counters_SUITE.erl index 7de164096b..b3f0358c1e 100644 --- a/erts/emulator/test/counters_SUITE.erl +++ b/erts/emulator/test/counters_SUITE.erl @@ -21,12 +21,13 @@ -include_lib("common_test/include/ct.hrl"). --compile(export_all). +-export([suite/0, all/0]). +-export([basic/1, bad/1, limits/1, indep/1, write_concurrency/1]). suite() -> [{ct_hooks,[ts_install_cth]}]. all() -> - [basic, bad, limits]. + [basic, bad, limits, indep, write_concurrency]. basic(Config) when is_list(Config) -> Size = 10, @@ -53,15 +54,21 @@ basic_do(Ref, Ix) -> 77 = counters:get(Ref, Ix), ok = counters:sub(Ref, Ix, -10), 87 = counters:get(Ref, Ix), + ok = counters:put(Ref, Ix, 0), + 0 = counters:get(Ref, Ix), + ok = counters:put(Ref, Ix, 123), + 123 = counters:get(Ref, Ix), + ok = counters:put(Ref, Ix, -321), + -321 = counters:get(Ref, Ix), ok. check_memory(atomics, Memory, Size) -> {_,true} = {Memory, Memory > Size*8}, {_,true} = {Memory, Memory < Size*max_atomic_sz() + 100}; check_memory(write_concurrency, Memory, Size) -> - NScheds = erlang:system_info(schedulers), - {_,true} = {Memory, Memory > NScheds*Size*8}, - {_,true} = {Memory, Memory < NScheds*(Size+7)*max_atomic_sz() + 100}. + NWords = erlang:system_info(schedulers) + 1, + {_,true} = {Memory, Memory > NWords*Size*8}, + {_,true} = {Memory, Memory < NWords*(Size+7)*max_atomic_sz() + 100}. max_atomic_sz() -> case erlang:system_info({wordsize, external}) of @@ -90,23 +97,138 @@ bad(Config) when is_list(Config) -> limits(Config) when is_list(Config) -> + limits_do(counters:new(1,[atomics])), + limits_do(counters:new(1,[write_concurrency])), + ok. + +limits_do(Ref) -> Bits = 64, Max = (1 bsl (Bits-1)) - 1, Min = -(1 bsl (Bits-1)), - Ref = counters:new(1,[]), 0 = counters:get(Ref, 1), - ok = counters:add(Ref, 1, Max), + ok = counters:put(Ref, 1, Max), + Max = counters:get(Ref, 1), ok = counters:add(Ref, 1, 1), Min = counters:get(Ref, 1), ok = counters:sub(Ref, 1, 1), Max = counters:get(Ref, 1), + ok = counters:put(Ref, 1, Min), + Min = counters:get(Ref, 1), IncrMax = (Max bsl 1) bor 1, - ok = counters:sub(Ref, 1, counters:get(Ref, 1)), + ok = counters:put(Ref, 1, 0), ok = counters:add(Ref, 1, IncrMax), -1 = counters:get(Ref, 1), {'EXIT',{badarg,_}} = (catch counters:add(Ref, 1, IncrMax+1)), {'EXIT',{badarg,_}} = (catch counters:add(Ref, 1, Min-1)), + {'EXIT',{badarg,_}} = (catch counters:put(Ref, 1, Max+1)), + {'EXIT',{badarg,_}} = (catch counters:add(Ref, 1, Min-1)), + ok. + +%% Verify that independent workers, using different counters +%% within the same array, do not interfere with each other. +indep(Config) when is_list(Config) -> + NScheds = erlang:system_info(schedulers), + Ref = counters:new(NScheds,[write_concurrency]), + Rounds = 100, + Papa = self(), + Pids = [spawn_opt(fun () -> + Val = I*197, + counters:put(Ref, I, Val), + indep_looper(Rounds, Ref, I, Val), + Papa ! {self(), done} + end, + [link, {scheduler, I}]) + || I <- lists:seq(1, NScheds)], + [receive {P,done} -> ok end || P <- Pids], ok. + +indep_looper(0, _, _ , _) -> + ok; +indep_looper(N, Ref, I, Val0) -> + %%io:format("Val0 = ~p\n", [Val0]), + Val0 = counters:get(Ref, I), + Val1 = indep_adder(Ref, I, Val0), + indep_subber(Ref, I, Val1), + Val2 = N*7 + I, + counters:put(Ref, I, Val2), + indep_looper(N-1, Ref, I, Val2). + +indep_adder(Ref, I, Val) when Val < (1 bsl 62) -> + %%io:format("adder Val = ~p\n", [Val]), + Incr = abs(Val div 2) + I + 984735, + counters:add(Ref, I, Incr), + Res = Val + Incr, + Res = counters:get(Ref, I), + indep_adder(Ref, I, Res); +indep_adder(_Ref, _I, Val) -> + Val. + +indep_subber(Ref, I, Val) when Val > -(1 bsl 62) -> + %%io:format("subber Val = ~p\n", [Val]), + Decr = (abs(Val div 2) + I + 725634), + counters:sub(Ref, I, Decr), + Res = Val - Decr, + Res = counters:get(Ref, I), + indep_subber(Ref, I, Res); +indep_subber(_Ref, _I, Val) -> + Val. + + + +%% Verify write_concurrency yields correct results. +write_concurrency(Config) when is_list(Config) -> + rand:seed(exs1024s), + io:format("*** SEED: ~p ***\n", [rand:export_seed()]), + NScheds = erlang:system_info(schedulers), + Size = 100, + Ref = counters:new(Size,[write_concurrency]), + Rounds = 1000, + Papa = self(), + Pids = [spawn_opt(fun Worker() -> + receive + {go, Ix, Incr} -> + wc_looper(Rounds, Ref, Ix, Incr), + Papa ! {self(), done, Rounds*Incr}, + Worker(); + stop -> + ok + end + end, + [link, {scheduler, N}]) + || N <- lists:seq(1, NScheds)], + [begin + Base = rand_log64(), + counters:put(Ref, Index, Base), + SendList = [{P,{go, Index, rand_log64()}} || P <- Pids], + [P ! Msg || {P,Msg} <- SendList], + Added = lists:sum([receive {P,done,Contrib} -> Contrib end || P <- Pids]), + Result = mask_sint64(Base+Added), + {_,Result} = {Result, counters:get(Ref, Index)} + end + || Index <- lists:seq(1, Size)], + + [begin unlink(P), P ! stop end || P <- Pids], + ok. + +wc_looper(0, _, _, _) -> + ok; +wc_looper(N, Ref, Ix, Incr) -> + counters:add(Ref, Ix, Incr), + wc_looper(N-1, Ref, Ix, Incr). + +mask_sint64(X) -> + SMask = 1 bsl 63, + UMask = SMask - 1, + (X band UMask) - (X band SMask). + +%% A random signed 64-bit integer +%% with a uniformly distributed number of significant bits. +rand_log64() -> + Uint = round(math:pow(2, rand:uniform()*63)), + case rand:uniform(2) of + 1 -> -Uint; + 2 -> Uint + end. |