From c0ca8209570b42bfbb029eb7e9ae8750a43fb739 Mon Sep 17 00:00:00 2001 From: Lukas Larsson Date: Mon, 18 Mar 2019 13:33:30 +0100 Subject: erts: Add crash dumping of EXITING and FREE processes --- erts/emulator/beam/break.c | 32 ++++++++++-- erts/emulator/beam/dist.c | 19 +++++++ erts/emulator/beam/dist.h | 1 + erts/emulator/beam/erl_process_dump.c | 14 ++--- erts/emulator/test/dump_SUITE.erl | 98 +++++++++++++++++++++++++++++++---- 5 files changed, 145 insertions(+), 19 deletions(-) diff --git a/erts/emulator/beam/break.c b/erts/emulator/beam/break.c index 27bf2187c2..303ab9d9b7 100644 --- a/erts/emulator/beam/break.c +++ b/erts/emulator/beam/break.c @@ -39,6 +39,7 @@ #include "erl_hl_timer.h" #include "erl_thr_progress.h" #include "erl_proc_sig_queue.h" +#include "dist.h" /* Forward declarations -- should really appear somewhere else */ static void process_killer(void); @@ -81,11 +82,32 @@ process_info(fmtfn_t to, void *to_arg) /* Do not include processes with no heap, * they are most likely just created and has invalid data */ - if (!ERTS_PROC_IS_EXITING(p) && p->heap != NULL) + if (p->heap != NULL) print_process_info(to, to_arg, p, 0); } } + /* Look for FREE processes in the run-queues and dist entries. + These have been removed from the ptab but we still want them + in the crash dump for debugging. */ + + /* First loop through all run-queues */ + for (i = 0; i < erts_no_schedulers + ERTS_NUM_DIRTY_RUNQS; i++) { + ErtsRunQueue *rq = ERTS_RUNQ_IX(i); + int j; + for (j = 0; j < ERTS_NO_PROC_PRIO_QUEUES; j++) { + Process *p = rq->procs.prio[j].first; + while (p) { + if (ERTS_PSFLG_FREE & erts_atomic32_read_acqb(&p->state)) + print_process_info(to, to_arg, p, 0); + p = p->next; + } + } + } + + /* Then check all dist entries */ + erts_dist_print_procs_suspended_on_de(to, to_arg); + port_info(to, to_arg); } @@ -206,6 +228,7 @@ print_process_info(fmtfn_t to, void *to_arg, Process *p, ErtsProcLocks orig_lock { int garbing = 0; int running = 0; + int exiting = 0; Sint len; struct saved_calls *scb; erts_aint32_t state; @@ -226,6 +249,9 @@ print_process_info(fmtfn_t to, void *to_arg, Process *p, ErtsProcLocks orig_lock | ERTS_PSFLG_DIRTY_RUNNING)) running = 1; + if (state & ERTS_PSFLG_EXITING) + exiting = 1; + if (!(locks & ERTS_PROC_LOCK_MAIN)) { locks |= ERTS_PROC_LOCK_MAIN; if (ERTS_IS_CRASH_DUMPING && running) { @@ -246,7 +272,7 @@ print_process_info(fmtfn_t to, void *to_arg, Process *p, ErtsProcLocks orig_lock * If the process is registered as a global process, display the * registered name */ - if (p->common.u.alive.reg) + if (!ERTS_PROC_IS_EXITING(p) && p->common.u.alive.reg) erts_print(to, to_arg, "Name: %T\n", p->common.u.alive.reg->name); /* @@ -332,7 +358,7 @@ print_process_info(fmtfn_t to, void *to_arg, Process *p, ErtsProcLocks orig_lock } /* display the links only if there are any*/ - if (ERTS_P_LINKS(p) || ERTS_P_MONITORS(p) || ERTS_P_LT_MONITORS(p)) { + if (!exiting && (ERTS_P_LINKS(p) || ERTS_P_MONITORS(p) || ERTS_P_LT_MONITORS(p))) { PrintMonitorContext context = {1, to, to_arg}; erts_print(to, to_arg,"Link list: ["); erts_link_tree_foreach(ERTS_P_LINKS(p), doit_print_link, &context); diff --git a/erts/emulator/beam/dist.c b/erts/emulator/beam/dist.c index 2c2af4bdd3..4c200ecc83 100644 --- a/erts/emulator/beam/dist.c +++ b/erts/emulator/beam/dist.c @@ -5046,3 +5046,22 @@ erts_processes_monitoring_nodes(Process *c_p) return ctxt.res; } + +static void +print_suspended_on_de(fmtfn_t to, void *to_arg, DistEntry *dep) +{ + for (; dep; dep = dep->next) { + ErtsProcList *curr = erts_proclist_peek_first(dep->suspended); + while (curr) { + if (!is_internal_pid(curr->u.pid)) + print_process_info(to, to_arg, curr->u.p, 0); + curr = erts_proclist_peek_next(dep->suspended, curr); + } + } +} + +void +erts_dist_print_procs_suspended_on_de(fmtfn_t to, void *to_arg) { + print_suspended_on_de(to, to_arg, erts_hidden_dist_entries); + print_suspended_on_de(to, to_arg, erts_visible_dist_entries); +} diff --git a/erts/emulator/beam/dist.h b/erts/emulator/beam/dist.h index 9b5e62ab7e..f953a2ab8c 100644 --- a/erts/emulator/beam/dist.h +++ b/erts/emulator/beam/dist.h @@ -319,5 +319,6 @@ extern int erts_dsig_prepare(ErtsDSigSendContext *, int, int); +void erts_dist_print_procs_suspended_on_de(fmtfn_t to, void *to_arg); int erts_auto_connect(DistEntry* dep, Process *proc, ErtsProcLocks proc_locks); #endif diff --git a/erts/emulator/beam/erl_process_dump.c b/erts/emulator/beam/erl_process_dump.c index a164ed543e..71262061dd 100644 --- a/erts/emulator/beam/erl_process_dump.c +++ b/erts/emulator/beam/erl_process_dump.c @@ -121,12 +121,14 @@ Uint erts_process_memory(Process *p, int include_sigs_in_transit) size += sizeof(Process); - erts_link_tree_foreach(ERTS_P_LINKS(p), - link_size, (void *) &size); - erts_monitor_tree_foreach(ERTS_P_MONITORS(p), - monitor_size, (void *) &size); - erts_monitor_list_foreach(ERTS_P_LT_MONITORS(p), - monitor_size, (void *) &size); + if ((erts_atomic32_read_nob(&p->state) & ERTS_PSFLG_EXITING) == 0) { + erts_link_tree_foreach(ERTS_P_LINKS(p), + link_size, (void *) &size); + erts_monitor_tree_foreach(ERTS_P_MONITORS(p), + monitor_size, (void *) &size); + erts_monitor_list_foreach(ERTS_P_LT_MONITORS(p), + monitor_size, (void *) &size); + } size += (p->heap_sz + p->mbuf_sz) * sizeof(Eterm); if (p->abandoned_heap) size += (p->hend - p->heap) * sizeof(Eterm); diff --git a/erts/emulator/test/dump_SUITE.erl b/erts/emulator/test/dump_SUITE.erl index d0237b78cc..3b860ebdf6 100644 --- a/erts/emulator/test/dump_SUITE.erl +++ b/erts/emulator/test/dump_SUITE.erl @@ -24,7 +24,7 @@ -export([all/0, suite/0, init_per_testcase/2, end_per_testcase/2]). --export([signal_abort/1]). +-export([signal_abort/1, exiting_dump/1, free_dump/1]). -export([load/0]). @@ -35,7 +35,7 @@ suite() -> {timetrap, {minutes, 2}}]. all() -> - [signal_abort]. + [signal_abort, exiting_dump, free_dump]. init_per_testcase(signal_abort, Config) -> SO = erlang:system_info(schedulers_online), @@ -48,7 +48,10 @@ init_per_testcase(signal_abort, Config) -> {skip, "the platform does not support scheduler dump"}; Dump -> Config - end. + end; +init_per_testcase(_, Config) -> + Config. + end_per_testcase(_, Config) -> Config. @@ -79,8 +82,6 @@ signal_abort(Config) -> {ok, Bin} = get_dump_when_done(Dump), - ct:log("~s",[Bin]), - {match, Matches} = re:run(Bin,"Current Process: <",[global]), ct:log("Found ~p",[Matches]), @@ -91,6 +92,85 @@ signal_abort(Config) -> ok. +load() -> + lists:seq(1,10000), + load(). + + +%% Test that crash dumping when a process is in the state EXITING works +exiting_dump(Config) when is_list(Config) -> + Dump = filename:join(proplists:get_value(priv_dir, Config),"signal_abort.dump"), + + {ok, Node} = start_node(Config), + + Self = self(), + + Pid = spawn_link(Node, + fun() -> + [begin + T = ets:new(hej,[]), + [ets:insert(T,{I,I}) || I <- lists:seq(1,1000)] + end || _ <- lists:seq(1,1000)], + Self ! ready, + receive ok -> ok end + end), + + true = rpc:call(Node, os, putenv, ["ERL_CRASH_DUMP",Dump]), + + receive ready -> unlink(Pid), Pid ! ok end, + + rpc:call(Node, erlang, halt, ["dump"]), + + {ok, Bin} = get_dump_when_done(Dump), + + {match, Matches} = re:run(Bin,"^State: Exiting", [global, multiline]), + + ct:log("Found ~p",[Matches]), + + true = length(Matches) == 1, + + file:delete(Dump), + + ok. + +%% Test that crash dumping when a process is in the state FREE works +free_dump(Config) when is_list(Config) -> + Dump = filename:join(proplists:get_value(priv_dir, Config),"signal_abort.dump"), + + {ok, Node} = start_node(Config), + + Self = self(), + + Pid = spawn_link(Node, + fun() -> + Self ! ready, + receive + ok -> + unlink(Self), + exit(lists:duplicate(1000,1000)) + end + end), + + true = rpc:call(Node, os, putenv, ["ERL_CRASH_DUMP",Dump]), + + [erlang:monitor(process, Pid) || _ <- lists:seq(1,10000)], + receive ready -> unlink(Pid), Pid ! ok end, + + rpc:call(Node, erlang, halt, ["dump"]), + + {ok, Bin} = get_dump_when_done(Dump), + + {match, Matches} = re:run(Bin,"^State: Non Existing", [global, multiline]), + + ct:log("Found ~p",[Matches]), + + true = length(Matches) == 1, + + file:delete(Dump), + + ok. + + get_dump_when_done(Dump) -> case file:read_file_info(Dump) of {ok, #file_info{ size = Sz }} -> @@ -104,15 +184,13 @@ get_dump_when_done(Dump, Sz) -> timer:sleep(1000), case file:read_file_info(Dump) of {ok, #file_info{ size = Sz }} -> - file:read_file(Dump); + {ok, Bin} = file:read_file(Dump), + ct:log("~s",[Bin]), + {ok, Bin}; {ok, #file_info{ size = NewSz }} -> get_dump_when_done(Dump, NewSz) end. -load() -> - lists:seq(1,10000), - load(). - start_node(Config) when is_list(Config) -> Pa = filename:dirname(code:which(?MODULE)), Name = list_to_atom(atom_to_list(?MODULE) -- cgit v1.2.3