diff options
author | Lukas Larsson <[email protected]> | 2019-06-28 10:48:43 +0200 |
---|---|---|
committer | Lukas Larsson <[email protected]> | 2019-06-28 10:48:43 +0200 |
commit | c29006892cdddd95f32a7b6fc41eb3d8065c0f39 (patch) | |
tree | 5795e9d5f0bec9333b6c8d458149420efdae4834 /erts | |
parent | 42ab20bfe6cfaf7e08b97ba0fbfffa86da6dc821 (diff) | |
parent | b942df8593b6295e61eb767008d6e93a2cc34665 (diff) | |
download | otp-c29006892cdddd95f32a7b6fc41eb3d8065c0f39.tar.gz otp-c29006892cdddd95f32a7b6fc41eb3d8065c0f39.tar.bz2 otp-c29006892cdddd95f32a7b6fc41eb3d8065c0f39.zip |
Merge branch 'lukas/erts/cleanup_scheduler_start' into maint
* lukas/erts/cleanup_scheduler_start:
erts: Reduce test time for multi_load in valgrind
erts: Randomize valgrind taskset CPU
erts: Make dump_SUITE:free_dump tc more robust to different systems
erts: Fix distr SUITE latency testcases
erts: Fix gc disable when terminating process
erts: Cleanup start of all erts threads to ABORT when failing
Diffstat (limited to 'erts')
-rw-r--r-- | erts/emulator/beam/erl_process.c | 67 | ||||
-rw-r--r-- | erts/emulator/test/distribution_SUITE.erl | 29 | ||||
-rw-r--r-- | erts/emulator/test/dump_SUITE.erl | 33 | ||||
-rw-r--r-- | erts/emulator/test/multi_load_SUITE.erl | 10 | ||||
-rw-r--r-- | erts/etc/unix/cerl.src | 5 |
5 files changed, 77 insertions, 67 deletions
diff --git a/erts/emulator/beam/erl_process.c b/erts/emulator/beam/erl_process.c index 1f6adb98ef..de0564292d 100644 --- a/erts/emulator/beam/erl_process.c +++ b/erts/emulator/beam/erl_process.c @@ -8568,9 +8568,6 @@ erts_start_schedulers(void) { ethr_tid tid; int res = 0; - Uint actual; - Uint wanted = erts_no_schedulers; - Uint wanted_no_schedulers = erts_no_schedulers; char name[16]; ethr_thr_opts opts = ETHR_THR_OPTS_DEFAULT_INITER; int ix; @@ -8584,40 +8581,34 @@ erts_start_schedulers(void) erts_snprintf(opts.name, 16, "runq_supervisor"); erts_atomic_init_nob(&runq_supervisor_sleeping, 0); if (0 != ethr_event_init(&runq_supervision_event)) - erts_exit(ERTS_ERROR_EXIT, "Failed to create run-queue supervision event\n"); + erts_exit(ERTS_ABORT_EXIT, "Failed to create run-queue supervision event\n"); res = ethr_thr_create(&runq_supervisor_tid, runq_supervisor, NULL, &opts); if (0 != res) - erts_exit(ERTS_ERROR_EXIT, "Failed to create run-queue supervision thread, " + erts_exit(ERTS_ABORT_EXIT, "Failed to create run-queue supervision thread, " "error = %d\n", res); } opts.suggested_stack_size = erts_sched_thread_suggested_stack_size; - if (wanted < 1) - wanted = 1; - if (wanted > ERTS_MAX_NO_OF_SCHEDULERS) { - wanted = ERTS_MAX_NO_OF_SCHEDULERS; - res = ENOTSUP; - } - - for (actual = 0; actual < wanted; actual++) { - ErtsSchedulerData *esdp = ERTS_SCHEDULER_IX(actual); - - ASSERT(actual == esdp->no - 1); - - erts_snprintf(opts.name, 16, "%lu_scheduler", actual + 1); + ASSERT(erts_no_schedulers > 0 && erts_no_schedulers <= ERTS_MAX_NO_OF_SCHEDULERS); + for (ix = 0; ix < erts_no_schedulers; ix++) { + ErtsSchedulerData *esdp = ERTS_SCHEDULER_IX(ix); + ASSERT(ix == esdp->no - 1); + erts_snprintf(opts.name, 16, "%lu_scheduler", ix + 1); res = ethr_thr_create(&esdp->tid, sched_thread_func, (void*)esdp, &opts); - if (res != 0) { - break; + erts_exit(ERTS_ABORT_EXIT, "Failed to create scheduler thread %d, error = %d\n", ix, res); } } - erts_no_schedulers = actual; + + /* Probably not needed as thread create will imply a memory barrier, + but we do one just to be safe. */ + ERTS_THR_MEMORY_BARRIER; { for (ix = 0; ix < erts_no_dirty_cpu_schedulers; ix++) { @@ -8626,7 +8617,7 @@ erts_start_schedulers(void) opts.suggested_stack_size = erts_dcpu_sched_thread_suggested_stack_size; res = ethr_thr_create(&esdp->tid,sched_dirty_cpu_thread_func,(void*)esdp,&opts); if (res != 0) - erts_exit(ERTS_ERROR_EXIT, "Failed to create dirty cpu scheduler thread %d, error = %d\n", ix, res); + erts_exit(ERTS_ABORT_EXIT, "Failed to create dirty cpu scheduler thread %d, error = %d\n", ix, res); } for (ix = 0; ix < erts_no_dirty_io_schedulers; ix++) { ErtsSchedulerData *esdp = ERTS_DIRTY_IO_SCHEDULER_IX(ix); @@ -8634,40 +8625,22 @@ erts_start_schedulers(void) opts.suggested_stack_size = erts_dio_sched_thread_suggested_stack_size; res = ethr_thr_create(&esdp->tid,sched_dirty_io_thread_func,(void*)esdp,&opts); if (res != 0) - erts_exit(ERTS_ERROR_EXIT, "Failed to create dirty io scheduler thread %d, error = %d\n", ix, res); + erts_exit(ERTS_ABORT_EXIT, "Failed to create dirty io scheduler thread %d, error = %d\n", ix, res); } } - ERTS_THR_MEMORY_BARRIER; - erts_snprintf(opts.name, 16, "aux"); res = ethr_thr_create(&tid, aux_thread, NULL, &opts); if (res != 0) - erts_exit(ERTS_ERROR_EXIT, "Failed to create aux thread, error = %d\n", res); + erts_exit(ERTS_ABORT_EXIT, "Failed to create aux thread, error = %d\n", res); for (ix = 0; ix < erts_no_poll_threads; ix++) { erts_snprintf(opts.name, 16, "%d_poller", ix); res = ethr_thr_create(&tid, poll_thread, (void*)(UWord)ix, &opts); if (res != 0) - erts_exit(ERTS_ERROR_EXIT, "Failed to create poll thread\n"); - } - - if (actual < 1) - erts_exit(ERTS_ERROR_EXIT, - "Failed to create any scheduler-threads: %s (%d)\n", - erl_errno_id(res), - res); - if (res != 0) { - erts_dsprintf_buf_t *dsbufp = erts_create_logger_dsbuf(); - ASSERT(actual != wanted_no_schedulers); - erts_dsprintf(dsbufp, - "Failed to create %beu scheduler-threads (%s:%d); " - "only %beu scheduler-thread%s created.\n", - wanted_no_schedulers, erl_errno_id(res), res, - actual, actual == 1 ? " was" : "s were"); - erts_send_error_to_logger_nogl(dsbufp); + erts_exit(ERTS_ABORT_EXIT, "Failed to create poll thread\n"); } } @@ -12097,6 +12070,7 @@ erts_proc_exit_handle_dist_monitor(ErtsMonitor *mon, void *vctxt, Sint reds) ErtsHeapFactory factory; Sint reds_consumed = 0; + ASSERT(c_p->flags & F_DISABLE_GC); ASSERT(erts_monitor_is_target(mon) && mon->type == ERTS_MON_TYPE_DIST_PROC); mdp = erts_monitor_to_data(mon); @@ -12144,7 +12118,6 @@ erts_proc_exit_handle_dist_monitor(ErtsMonitor *mon, void *vctxt, Sint reds) switch (code) { case ERTS_DSIG_SEND_CONTINUE: case ERTS_DSIG_SEND_YIELD: - erts_set_gc_state(c_p, 0); ctxt->dist_state = erts_dsend_export_trap_context(c_p, &ctx); reds_consumed = reds; /* force yield */ break; @@ -12152,7 +12125,6 @@ erts_proc_exit_handle_dist_monitor(ErtsMonitor *mon, void *vctxt, Sint reds) break; case ERTS_DSIG_SEND_TOO_LRG: erts_kill_dist_connection(dep, dist->connection_id); - erts_set_gc_state(c_p, 1); break; default: ASSERT(! "Invalid dsig send exit monitor result"); @@ -12356,6 +12328,7 @@ erts_proc_exit_handle_dist_link(ErtsLink *lnk, void *vctxt, Sint reds) ErtsHeapFactory factory; Sint reds_consumed = 0; + ASSERT(c_p->flags & F_DISABLE_GC); ASSERT(lnk->type == ERTS_LNK_TYPE_DIST_PROC); dlnk = erts_link_to_other(lnk, &ldp); dist = ((ErtsLinkDataExtended *) ldp)->dist; @@ -12395,7 +12368,6 @@ erts_proc_exit_handle_dist_link(ErtsLink *lnk, void *vctxt, Sint reds) switch (code) { case ERTS_DSIG_SEND_YIELD: case ERTS_DSIG_SEND_CONTINUE: - erts_set_gc_state(c_p, 0); ctxt->dist_state = erts_dsend_export_trap_context(c_p, &ctx); reds_consumed = reds; /* force yield */ break; @@ -12403,7 +12375,6 @@ erts_proc_exit_handle_dist_link(ErtsLink *lnk, void *vctxt, Sint reds) break; case ERTS_DSIG_SEND_TOO_LRG: erts_kill_dist_connection(dep, dist->connection_id); - erts_set_gc_state(c_p, 1); break; default: ASSERT(! "Invalid dsig send exit monitor result"); @@ -12951,6 +12922,8 @@ restart: yield_allowed = 0; #endif + /* Enable GC again, through strictly not needed it puts + the process in a consistent state. */ erts_set_gc_state(p, 1); /* Set state to not active as we don't want this process diff --git a/erts/emulator/test/distribution_SUITE.erl b/erts/emulator/test/distribution_SUITE.erl index 7885d35d9d..9dcdd60060 100644 --- a/erts/emulator/test/distribution_SUITE.erl +++ b/erts/emulator/test/distribution_SUITE.erl @@ -1400,6 +1400,10 @@ get_conflicting_unicode_atoms(CIX, N) -> %% The message_latency_large tests that small distribution messages are %% not blocked by other large distribution messages. Basically it tests %% that fragmentation of distribution messages works. +%% +%% Because of large problems to get reliable values from these testcases +%% they no longer fail when the latency is incorrect. However, they are +%% kept as they continue to find bugs in the distribution implementation. message_latency_large_message(Config) when is_list(Config) -> measure_latency_large_message(?FUNCTION_NAME, fun(Dropper, Payload) -> Dropper ! Payload end). @@ -1484,7 +1488,11 @@ measure_latency_large_message(Nodename, DataFun) -> case {lists:max(Times), lists:min(Times)} of {Max, Min} when Max * 0.25 > Min, BuildType =:= opt -> - ct:fail({incorrect_latency, IndexTimes}); + %% We only issue a comment for this failure as the + %% testcases proved very difficult to run successfully + %% on many platforms. + ct:comment({incorrect_latency, IndexTimes}), + ok; _ -> ok end. @@ -1503,10 +1511,7 @@ measure_latency(DataFun, Dropper, Echo, Payload) -> end end) || _ <- lists:seq(1,2)], - [receive - {monitor, _Sender, busy_dist_port, _Info} -> - ok - end || _ <- lists:seq(1,10)], + wait_for_busy_dist(2 * 60 * 1000, 10), {TS, Times} = timer:tc(fun() -> @@ -1530,6 +1535,18 @@ measure_latency(DataFun, Dropper, Echo, Payload) -> end || {Sender, Ref} <- Senders], TS. +wait_for_busy_dist(_Tmo, 0) -> + ok; +wait_for_busy_dist(Tmo, N) -> + T0 = erlang:monotonic_time(millisecond), + receive + {monitor, _Sender, busy_dist_port, _Info} -> + wait_for_busy_dist(Tmo - (erlang:monotonic_time(millisecond) - T0), N - 1) + after Tmo -> + ct:log("Timed out waiting for busy_dist, ~p left",[N]), + timeout + end. + flush() -> receive _ -> @@ -2600,7 +2617,7 @@ verify_nc(Node) -> demonitor(MonRef,[flush]), ok; {Ref, Error} -> - ct:log("~p",[Error]), + ct:log("~s",[Error]), ct:fail(failed_nc_refc_check); {'DOWN', MonRef, _, _, _} = Down -> ct:log("~p",[Down]), diff --git a/erts/emulator/test/dump_SUITE.erl b/erts/emulator/test/dump_SUITE.erl index 9f8ac42fa9..b7da69e556 100644 --- a/erts/emulator/test/dump_SUITE.erl +++ b/erts/emulator/test/dump_SUITE.erl @@ -140,13 +140,13 @@ free_dump(Config) when is_list(Config) -> {ok, NodeA} = start_node(Config), {ok, NodeB} = start_node(Config), - Self = self(), PidA = spawn_link( NodeA, fun() -> Self ! ready, + Reason = lists:duplicate(1000000,100), receive ok -> spawn(fun() -> @@ -154,24 +154,29 @@ free_dump(Config) when is_list(Config) -> timer:sleep(5), receive M -> - io:format("~p",[M]), - erlang:halt("dump") - end + io:format("~p",[M]) +%% We may want to add this timeout here in-case no busy condition is triggered +%% after 60 * 1000 -> +%% io:format("Timeout") + end, + erlang:halt("dump") end), - exit(lists:duplicate(1000000,100)) + exit(Reason) end end), - spawn_link(NodeB, - fun() -> - [erlang:monitor(process, PidA) || _ <- lists:seq(1,10000)], - Self ! done, - receive _ -> ok end - end), + PidB = spawn_link(NodeB, + fun() -> + [erlang:monitor(process, PidA) || _ <- lists:seq(1,10000)], + Self ! done, + receive _ -> ok end + end), receive done -> ok end, true = rpc:call(NodeA, os, putenv, ["ERL_CRASH_DUMP",Dump]), - ct:pal("~p",[rpc:call(NodeA, distribution_SUITE, make_busy, [NodeB, 1000])]), + %% Make the node busy towards NodeB for 10 seconds. + BusyPid = rpc:call(NodeA, distribution_SUITE, make_busy, [NodeB,10000]), + ct:pal("~p",[BusyPid]), receive ready -> unlink(PidA), PidA ! ok end, @@ -185,6 +190,10 @@ free_dump(Config) when is_list(Config) -> file:delete(Dump), + unlink(PidB), + + rpc:call(NodeB, erlang, halt, [0]), + ok. diff --git a/erts/emulator/test/multi_load_SUITE.erl b/erts/emulator/test/multi_load_SUITE.erl index edf3205812..c79e2b6dcd 100644 --- a/erts/emulator/test/multi_load_SUITE.erl +++ b/erts/emulator/test/multi_load_SUITE.erl @@ -30,7 +30,15 @@ all() -> [many,on_load,errors]. many(_Config) -> - Ms = make_modules(100, fun many_module/1), + + N = case erlang:system_info(build_type) of + valgrind -> + 10; + _ -> + 100 + end, + + Ms = make_modules(N, fun many_module/1), io:put_chars("Light load\n" "=========="), diff --git a/erts/etc/unix/cerl.src b/erts/etc/unix/cerl.src index 710a7a9ef6..59de9bdec8 100644 --- a/erts/etc/unix/cerl.src +++ b/erts/etc/unix/cerl.src @@ -312,8 +312,11 @@ if [ "x$GDB" = "x" ]; then # on multiple cores (especially with async threads). Valgrind only run one pthread # at a time anyway so there is no point letting it utilize more than one core. # Use $sched_arg to force all schedulers online to emulate multicore. - taskset1="taskset 1" ncpu=`cat /proc/cpuinfo | grep -w processor | wc -l` + # Choose a random core in order to not collide with any other valgrind + # run on the same machine. + taskset1=$((1 << (`shuf -i 1-$ncpu -n 1` - 1) )) + taskset1="taskset $taskset1" sched_arg="-S$ncpu:$ncpu" else taskset1= |