aboutsummaryrefslogtreecommitdiffstats
path: root/erts/emulator
diff options
context:
space:
mode:
authorLukas Larsson <[email protected]>2014-12-04 11:09:06 +0100
committerLukas Larsson <[email protected]>2014-12-19 10:39:52 +0100
commitffd0153e6dffcc29cf79d0191860047dba0438bb (patch)
tree31a0ced512b5b243caa5933c73aa4dc70d6c0c87 /erts/emulator
parente2a600341324d2ce3689119f8fd61b248702d79f (diff)
downloadotp-ffd0153e6dffcc29cf79d0191860047dba0438bb.tar.gz
otp-ffd0153e6dffcc29cf79d0191860047dba0438bb.tar.bz2
otp-ffd0153e6dffcc29cf79d0191860047dba0438bb.zip
erts: Improve crash dumps
This commit improves crash dumps in several ways: * Suspends schedulers to get a current snapshot * Dumps information about scheduler * Dumps stack trace of current running process (including Garbing processes)
Diffstat (limited to 'erts/emulator')
-rw-r--r--erts/emulator/beam/break.c128
-rw-r--r--erts/emulator/beam/erl_bif_info.c7
-rw-r--r--erts/emulator/beam/erl_process.c215
-rw-r--r--erts/emulator/beam/erl_process.h13
-rw-r--r--erts/emulator/beam/erl_process_dump.c174
-rw-r--r--erts/emulator/beam/erl_thr_progress.c42
-rw-r--r--erts/emulator/beam/erl_thr_progress.h4
-rw-r--r--erts/emulator/beam/sys.h2
-rw-r--r--erts/emulator/sys/unix/erl_unix_sys.h26
-rw-r--r--erts/emulator/sys/unix/sys.c31
-rw-r--r--erts/emulator/sys/win32/erl_win_sys.h12
-rw-r--r--erts/emulator/sys/win32/sys.c21
-rw-r--r--erts/emulator/test/bif_SUITE.erl33
13 files changed, 647 insertions, 61 deletions
diff --git a/erts/emulator/beam/break.c b/erts/emulator/beam/break.c
index 5aee85174f..41765dad12 100644
--- a/erts/emulator/beam/break.c
+++ b/erts/emulator/beam/break.c
@@ -210,25 +210,12 @@ print_process_info(int to, void *to_arg, Process *p)
erts_print(to, to_arg, "State: ");
state = erts_smp_atomic32_read_acqb(&p->state);
- if (state & ERTS_PSFLG_FREE)
- erts_print(to, to_arg, "Non Existing\n"); /* Should never happen */
- else if (state & ERTS_PSFLG_EXITING)
- erts_print(to, to_arg, "Exiting\n");
- else if (state & ERTS_PSFLG_GC) {
- garbing = 1;
- running = 1;
- erts_print(to, to_arg, "Garbing\n");
- }
- else if (state & ERTS_PSFLG_SUSPENDED)
- erts_print(to, to_arg, "Suspended\n");
- else if (state & ERTS_PSFLG_RUNNING) {
- running = 1;
- erts_print(to, to_arg, "Running\n");
- }
- else if (state & ERTS_PSFLG_ACTIVE)
- erts_print(to, to_arg, "Scheduled\n");
- else
- erts_print(to, to_arg, "Waiting\n");
+ erts_dump_process_state(to, to_arg, state);
+ if (state & ERTS_PSFLG_GC) {
+ garbing = 1;
+ running = 1;
+ } else if (state & ERTS_PSFLG_RUNNING)
+ running = 1;
/*
* If the process is registered as a global process, display the
@@ -352,6 +339,10 @@ print_process_info(int to, void *to_arg, Process *p)
#endif
erts_stack_dump(to, to_arg, p);
}
+
+ /* Display all states */
+ erts_print(to, to_arg, "Internal State: ");
+ erts_dump_extended_process_state(to, to_arg, state);
}
static void
@@ -671,6 +662,7 @@ erl_crash_dump_v(char *file, int line, char* fmt, va_list args)
{
#ifdef ERTS_SMP
ErtsThrPrgrData tpd_buf; /* in case we aren't a managed thread... */
+ int bc;
#endif
int fd;
size_t envsz;
@@ -681,27 +673,39 @@ erl_crash_dump_v(char *file, int line, char* fmt, va_list args)
char* dumpname;
int secs;
int env_erl_crash_dump_seconds_set = 1;
+ int i;
if (ERTS_SOMEONE_IS_CRASH_DUMPING)
return;
#ifdef ERTS_SMP
+ /* Order all managed threads to block, this has to be done
+ first to guarantee that this is the only thread to generate
+ crash dump. */
+ bc = erts_thr_progress_fatal_error_block(&tpd_buf);
+
+#ifdef ERTS_THR_HAVE_SIG_FUNCS
/*
- * Wait for all managed threads to block. If all threads haven't blocked
- * after a minute, we go anyway and hope for the best...
- *
- * We do not release system again. We expect an exit() or abort() after
- * dump has been written.
+ * We suspend all scheduler threads so that we can dump some
+ * data about the currently running processes and scheduler data.
+ * We have to be very very careful when doing this as the schedulers
+ * could be anywhere.
*/
- erts_thr_progress_fatal_error_block(60000, &tpd_buf);
- /* Either worked or not... */
+ for (i = 0; i < erts_no_schedulers; i++) {
+ erts_tid_t tid = ERTS_SCHEDULER_IX(i)->tid;
+ if (!erts_equal_tids(tid,erts_thr_self()))
+ sys_thr_suspend(tid);
+ }
+
+#endif
/* Allow us to pass certain places without locking... */
erts_smp_atomic32_set_mb(&erts_writing_erl_crash_dump, 1);
erts_smp_tsd_set(erts_is_crash_dumping_key, (void *) 1);
-#else
+
+#else /* !ERTS_SMP */
erts_writing_erl_crash_dump = 1;
-#endif
+#endif /* ERTS_SMP */
envsz = sizeof(env);
/* ERL_CRASH_DUMP_SECONDS not set
@@ -758,9 +762,8 @@ erl_crash_dump_v(char *file, int line, char* fmt, va_list args)
erts_fprintf(stderr,"\nCrash dump is being written to: %s...", dumpname);
fd = open(dumpname,O_WRONLY | O_CREAT | O_TRUNC,0640);
- if (fd < 0)
+ if (fd < 0)
return; /* Can't create the crash dump, skip it */
-
time(&now);
erts_fdprintf(fd, "=erl_crash_dump:0.3\n%s", ctime(&now));
@@ -774,9 +777,74 @@ erl_crash_dump_v(char *file, int line, char* fmt, va_list args)
erts_fdprintf(fd, "System version: ");
erts_print_system_version(fd, NULL, NULL);
erts_fdprintf(fd, "%s\n", "Compiled: " ERLANG_COMPILE_DATE);
+
erts_fdprintf(fd, "Taints: ");
erts_print_nif_taints(fd, NULL);
erts_fdprintf(fd, "Atoms: %d\n", atom_table_size());
+
+#ifdef USE_THREADS
+ /* We want to note which thread it was that called erl_exit */
+ if (erts_get_scheduler_data()) {
+ erts_fdprintf(fd, "Calling Thread: scheduler:%d\n",
+ erts_get_scheduler_data()->no);
+ } else {
+ if (!erts_thr_getname(erts_thr_self(), dumpnamebuf, MAXPATHLEN))
+ erts_fdprintf(fd, "Calling Thread: %s\n", dumpnamebuf);
+ else
+ erts_fdprintf(fd, "Calling Thread: %p\n", erts_thr_self());
+ }
+#else
+ erts_fdprintf(fd, "Calling Thread: scheduler:1\n");
+#endif
+
+#if defined(ERTS_HAVE_TRY_CATCH)
+
+ /*
+ * erts_print_scheduler_info is not guaranteed to be safe to call
+ * here for all schedulers as we may have suspended a scheduler
+ * in the middle of updating the STACK_TOP and STACK_START
+ * variables and thus when scanning the stack we could get
+ * segmentation faults. We protect against this very unlikely
+ * scenario by using the ERTS_SYS_TRY_CATCH.
+ */
+ for (i = 0; i < erts_no_schedulers; i++) {
+ ERTS_SYS_TRY_CATCH(
+ erts_print_scheduler_info(fd, NULL, ERTS_SCHEDULER_IX(i)),
+ erts_fdprintf(fd, "** crashed **\n"));
+ }
+#endif
+
+#ifdef ERTS_SMP
+
+#if defined(ERTS_THR_HAVE_SIG_FUNCS)
+
+ /* We resume all schedulers so that we are in a known safe state
+ when we write the rest of the crash dump */
+ for (i = 0; i < erts_no_schedulers; i++) {
+ erts_tid_t tid = ERTS_SCHEDULER_IX(i)->tid;
+ if (!erts_equal_tids(tid,erts_thr_self()))
+ sys_thr_resume(tid);
+ }
+#endif
+
+ /*
+ * Wait for all managed threads to block. If all threads haven't blocked
+ * after a minute, we go anyway and hope for the best...
+ *
+ * We do not release system again. We expect an exit() or abort() after
+ * dump has been written.
+ */
+ erts_thr_progress_fatal_error_wait(60000);
+ /* Either worked or not... */
+#endif
+
+#ifndef ERTS_HAVE_TRY_CATCH
+ /* This is safe to call here, as all schedulers are blocked */
+ for (i = 0; i < erts_no_schedulers; i++) {
+ erts_print_scheduler_info(fd, NULL, ERTS_SCHEDULER_IX(i));
+ }
+#endif
+
info(fd, NULL); /* General system info */
if (erts_ptab_initialized(&erts_proc))
process_info(fd, NULL); /* Info about each process and port */
diff --git a/erts/emulator/beam/erl_bif_info.c b/erts/emulator/beam/erl_bif_info.c
index d2ee5e4224..d750e34be3 100644
--- a/erts/emulator/beam/erl_bif_info.c
+++ b/erts/emulator/beam/erl_bif_info.c
@@ -3900,6 +3900,13 @@ BIF_RETTYPE erts_debug_set_internal_state_2(BIF_ALIST_2)
}
}
}
+ else if (ERTS_IS_ATOM_STR("broken_halt", BIF_ARG_1)) {
+ /* Ugly ugly code used by bif_SUITE:erlang_halt/1 */
+#if defined(ERTS_HAVE_TRY_CATCH)
+ erts_get_scheduler_data()->run_queue = NULL;
+#endif
+ erl_exit(ERTS_DUMP_EXIT, "%T", BIF_ARG_2);
+ }
}
BIF_ERROR(BIF_P, BADARG);
diff --git a/erts/emulator/beam/erl_process.c b/erts/emulator/beam/erl_process.c
index 31dda21b47..1ec931e312 100644
--- a/erts/emulator/beam/erl_process.c
+++ b/erts/emulator/beam/erl_process.c
@@ -457,8 +457,7 @@ do { \
static void exec_misc_ops(ErtsRunQueue *);
static void print_function_from_pc(int to, void *to_arg, BeamInstr* x);
-static int stack_element_dump(int to, void *to_arg, Process* p, Eterm* sp,
- int yreg);
+static int stack_element_dump(int to, void *to_arg, Eterm* sp, int yreg);
static void aux_work_timeout(void *unused);
static void aux_work_timeout_early_init(int no_schedulers);
@@ -12186,7 +12185,7 @@ erts_stack_dump(int to, void *to_arg, Process *p)
}
erts_program_counter_info(to, to_arg, p);
for (sp = p->stop; sp < STACK_START(p); sp++) {
- yreg = stack_element_dump(to, to_arg, p, sp, yreg);
+ yreg = stack_element_dump(to, to_arg, sp, yreg);
}
}
@@ -12243,7 +12242,7 @@ print_function_from_pc(int to, void *to_arg, BeamInstr* x)
}
static int
-stack_element_dump(int to, void *to_arg, Process* p, Eterm* sp, int yreg)
+stack_element_dump(int to, void *to_arg, Eterm* sp, int yreg)
{
Eterm x = *sp;
@@ -12272,6 +12271,214 @@ stack_element_dump(int to, void *to_arg, Process* p, Eterm* sp, int yreg)
}
/*
+ * Print scheduler information
+ */
+void
+erts_print_scheduler_info(int to, void *to_arg, ErtsSchedulerData *esdp) {
+ int i;
+ erts_aint32_t flg;
+ Process *p;
+
+ erts_print(to, to_arg, "=scheduler:%u\n", esdp->no);
+
+#ifdef ERTS_SMP
+ flg = erts_smp_atomic32_read_dirty(&esdp->ssi->flags);
+ erts_print(to, to_arg, "Scheduler Sleep Info Flags: ");
+ for (i = 0; i < ERTS_SSI_FLGS_MAX && flg; i++) {
+ erts_aint32_t chk = (1 << i);
+ if (flg & chk) {
+ switch (chk) {
+ case ERTS_SSI_FLG_SLEEPING:
+ erts_print(to, to_arg, "SLEEPING"); break;
+ case ERTS_SSI_FLG_POLL_SLEEPING:
+ erts_print(to, to_arg, "POLL_SLEEPING"); break;
+ case ERTS_SSI_FLG_TSE_SLEEPING:
+ erts_print(to, to_arg, "TSE_SLEEPING"); break;
+ case ERTS_SSI_FLG_WAITING:
+ erts_print(to, to_arg, "WAITING"); break;
+ case ERTS_SSI_FLG_SUSPENDED:
+ erts_print(to, to_arg, "SUSPENDED"); break;
+ default:
+ erts_print(to, to_arg, "UNKNOWN(%d)", flg); break;
+ }
+ if (flg > chk)
+ erts_print(to, to_arg, " | ");
+ flg -= chk;
+ }
+ }
+ erts_print(to, to_arg, "\n");
+#endif
+
+ flg = erts_atomic32_read_dirty(&esdp->ssi->aux_work);
+ erts_print(to, to_arg, "Scheduler Sleep Info Aux Work: ");
+ for (i = 0; i < ERTS_SSI_AUX_WORK_MAX && flg; i++) {
+ erts_aint32_t chk = (1 << i);
+ if (flg & chk) {
+ switch (chk) {
+ case ERTS_SSI_AUX_WORK_DELAYED_AW_WAKEUP:
+ erts_print(to, to_arg, "DELAYED_AW_WAKEUP"); break;
+ case ERTS_SSI_AUX_WORK_DD:
+ erts_print(to, to_arg, "DELAYED_DEALLOC"); break;
+ case ERTS_SSI_AUX_WORK_DD_THR_PRGR:
+ erts_print(to, to_arg, "DELAYED_DEALLOC_THR_PRGR"); break;
+ case ERTS_SSI_AUX_WORK_FIX_ALLOC_DEALLOC:
+ erts_print(to, to_arg, "FIX_ALLOC_DEALLOC"); break;
+ case ERTS_SSI_AUX_WORK_FIX_ALLOC_LOWER_LIM:
+ erts_print(to, to_arg, "FIX_ALLOC_LOWER_LIM"); break;
+ case ERTS_SSI_AUX_WORK_THR_PRGR_LATER_OP:
+ erts_print(to, to_arg, "THR_PRGR_LATER_OP"); break;
+ case ERTS_SSI_AUX_WORK_ASYNC_READY:
+ erts_print(to, to_arg, "ASYNC_READY"); break;
+ case ERTS_SSI_AUX_WORK_ASYNC_READY_CLEAN:
+ erts_print(to, to_arg, "ASYNC_READY_CLEAN"); break;
+ case ERTS_SSI_AUX_WORK_MISC_THR_PRGR:
+ erts_print(to, to_arg, "MISC_THR_PRGR"); break;
+ case ERTS_SSI_AUX_WORK_MISC:
+ erts_print(to, to_arg, "MISC"); break;
+ case ERTS_SSI_AUX_WORK_CHECK_CHILDREN:
+ erts_print(to, to_arg, "CHECK_CHILDREN"); break;
+ case ERTS_SSI_AUX_WORK_SET_TMO:
+ erts_print(to, to_arg, "SET_TMO"); break;
+ case ERTS_SSI_AUX_WORK_MSEG_CACHE_CHECK:
+ erts_print(to, to_arg, "MSEG_CACHE_CHECK"); break;
+ case ERTS_SSI_AUX_WORK_REAP_PORTS:
+ erts_print(to, to_arg, "REAP_PORTS"); break;
+ default:
+ erts_print(to, to_arg, "UNKNOWN(%d)", flg); break;
+ }
+ if (flg > chk)
+ erts_print(to, to_arg, " | ");
+ flg -= chk;
+ }
+ }
+ erts_print(to, to_arg, "\n");
+
+ erts_print(to, to_arg, "Current Port: ");
+ if (esdp->current_port)
+ erts_print(to, to_arg, "%T", esdp->current_port->common.id);
+ erts_print(to, to_arg, "\n");
+
+ p = esdp->current_process;
+ erts_print(to, to_arg, "Current Process: ");
+ if (esdp->current_process && !(ERTS_TRACE_FLAGS(p) & F_SENSITIVE)) {
+ flg = erts_smp_atomic32_read_dirty(&p->state);
+ erts_print(to, to_arg, "%T\n", p->common.id);
+
+ erts_print(to, to_arg, "Current Process State: ");
+ erts_dump_process_state(to, to_arg, flg);
+
+ erts_print(to, to_arg, "Current Process Internal State: ");
+ erts_dump_extended_process_state(to, to_arg, flg);
+
+ erts_print(to, to_arg, "Current Process Program counter: %p (", p->i);
+ print_function_from_pc(to, to_arg, p->i);
+ erts_print(to, to_arg, ")\n");
+ erts_print(to, to_arg, "Current Process CP: %p (", p->cp);
+ print_function_from_pc(to, to_arg, p->cp);
+ erts_print(to, to_arg, ")\n");
+
+ /* Getting this stacktrace can segfault if we are very very
+ unlucky if called while a process is being garbage collected.
+ Therefore we only call this on other schedulers if we either
+ have protection against segfaults, or we know that the process
+ is not garbage collecting. It *should* always be safe to call
+ on a process owned by us, even if it is currently being garbage
+ collected.
+ */
+ erts_print(to, to_arg, "Current Process Limited Stack Trace:\n");
+ erts_limited_stack_trace(to, to_arg, p);
+ } else
+ erts_print(to, to_arg, "\n");
+
+ for (i = 0; i < ERTS_NO_PROC_PRIO_LEVELS; i++) {
+ erts_print(to, to_arg, "Run Queue ");
+ switch (i) {
+ case PRIORITY_MAX:
+ erts_print(to, to_arg, "Max ");
+ break;
+ case PRIORITY_HIGH:
+ erts_print(to, to_arg, "High ");
+ break;
+ case PRIORITY_NORMAL:
+ erts_print(to, to_arg, "Normal ");
+ break;
+ case PRIORITY_LOW:
+ erts_print(to, to_arg, "Low ");
+ break;
+ default:
+ erts_print(to, to_arg, "Unknown ");
+ break;
+ }
+ erts_print(to, to_arg, "Length: %d\n",
+ erts_smp_atomic32_read_dirty(&esdp->run_queue->procs.prio_info[i].len));
+ }
+ erts_print(to, to_arg, "Run Queue Port Length: %d\n",
+ erts_smp_atomic32_read_dirty(&esdp->run_queue->ports.info.len));
+
+ flg = erts_smp_atomic32_read_dirty(&esdp->run_queue->flags);
+ erts_print(to, to_arg, "Run Queue Flags: ");
+ for (i = 0; i < ERTS_RUNQ_FLG_MAX && flg; i++) {
+ erts_aint32_t chk = (1 << i);
+ if (flg & chk) {
+ switch (chk) {
+ case (1 << PRIORITY_MAX):
+ erts_print(to, to_arg, "NONEMPTY_MAX"); break;
+ case (1 << PRIORITY_HIGH):
+ erts_print(to, to_arg, "NONEMPTY_HIGH"); break;
+ case (1 << PRIORITY_NORMAL):
+ erts_print(to, to_arg, "NONEMPTY_NORMAL"); break;
+ case (1 << PRIORITY_LOW):
+ erts_print(to, to_arg, "NONEMPTY_LOW"); break;
+ case (1 << (PRIORITY_MAX + ERTS_RUNQ_FLGS_EMIGRATE_SHFT)):
+ erts_print(to, to_arg, "EMIGRATE_MAX"); break;
+ case (1 << (PRIORITY_HIGH + ERTS_RUNQ_FLGS_EMIGRATE_SHFT)):
+ erts_print(to, to_arg, "EMIGRATE_HIGH"); break;
+ case (1 << (PRIORITY_NORMAL + ERTS_RUNQ_FLGS_EMIGRATE_SHFT)):
+ erts_print(to, to_arg, "EMIGRATE_NORMAL"); break;
+ case (1 << (PRIORITY_LOW + ERTS_RUNQ_FLGS_EMIGRATE_SHFT)):
+ erts_print(to, to_arg, "EMIGRATE_LOW"); break;
+ case (1 << (PRIORITY_MAX + ERTS_RUNQ_FLGS_IMMIGRATE_SHFT)):
+ erts_print(to, to_arg, "IMMIGRATE_MAX"); break;
+ case (1 << (PRIORITY_HIGH + ERTS_RUNQ_FLGS_IMMIGRATE_SHFT)):
+ erts_print(to, to_arg, "IMMIGRATE_HIGH"); break;
+ case (1 << (PRIORITY_NORMAL + ERTS_RUNQ_FLGS_IMMIGRATE_SHFT)):
+ erts_print(to, to_arg, "IMMIGRATE_NORMAL"); break;
+ case (1 << (PRIORITY_LOW + ERTS_RUNQ_FLGS_IMMIGRATE_SHFT)):
+ erts_print(to, to_arg, "IMMIGRATE_LOW"); break;
+ case (1 << (PRIORITY_MAX + ERTS_RUNQ_FLGS_EVACUATE_SHFT)):
+ erts_print(to, to_arg, "EVACUATE_MAX"); break;
+ case (1 << (PRIORITY_HIGH + ERTS_RUNQ_FLGS_EVACUATE_SHFT)):
+ erts_print(to, to_arg, "EVACUATE_HIGH"); break;
+ case (1 << (PRIORITY_NORMAL + ERTS_RUNQ_FLGS_EVACUATE_SHFT)):
+ erts_print(to, to_arg, "EVACUATE_NORMAL"); break;
+ case (1 << (PRIORITY_LOW + ERTS_RUNQ_FLGS_EVACUATE_SHFT)):
+ erts_print(to, to_arg, "EVACUATE_LOW"); break;
+ case ERTS_RUNQ_FLG_OUT_OF_WORK:
+ erts_print(to, to_arg, "OUT_OF_WORK"); break;
+ case ERTS_RUNQ_FLG_HALFTIME_OUT_OF_WORK:
+ erts_print(to, to_arg, "HALFTIME_OUT_OF_WORK"); break;
+ case ERTS_RUNQ_FLG_SUSPENDED:
+ erts_print(to, to_arg, "SUSPENDED"); break;
+ case ERTS_RUNQ_FLG_CHK_CPU_BIND:
+ erts_print(to, to_arg, "CHK_CPU_BIND"); break;
+ case ERTS_RUNQ_FLG_INACTIVE:
+ erts_print(to, to_arg, "INACTIVE"); break;
+ case ERTS_RUNQ_FLG_NONEMPTY:
+ erts_print(to, to_arg, "NONEMPTY"); break;
+ case ERTS_RUNQ_FLG_PROTECTED:
+ erts_print(to, to_arg, "PROTECTED"); break;
+ default:
+ erts_print(to, to_arg, "UNKNOWN(%d)", flg); break;
+ }
+ if (flg > chk)
+ erts_print(to, to_arg, " | ");
+ flg -= chk;
+ }
+ }
+ erts_print(to, to_arg, "\n");
+}
+
+/*
* A nice system halt closing all open port goes as follows:
* 1) This function schedules the aux work ERTS_SSI_AUX_WORK_REAP_PORTS
* on all schedulers, then schedules itself out.
diff --git a/erts/emulator/beam/erl_process.h b/erts/emulator/beam/erl_process.h
index 3d08be25ff..a12b577656 100644
--- a/erts/emulator/beam/erl_process.h
+++ b/erts/emulator/beam/erl_process.h
@@ -170,6 +170,8 @@ extern int erts_sched_thread_suggested_stack_size;
#define ERTS_RUNQ_FLG_PROTECTED \
(((Uint32) 1) << (ERTS_RUNQ_FLG_BASE2 + 6))
+#define ERTS_RUNQ_FLG_MAX (ERTS_RUNQ_FLG_BASE2 + 7)
+
#define ERTS_RUNQ_FLGS_MIGRATION_QMASKS \
(ERTS_RUNQ_FLGS_EMIGRATE_QMASK \
| ERTS_RUNQ_FLGS_IMMIGRATE_QMASK \
@@ -252,6 +254,8 @@ typedef enum {
#define ERTS_SSI_FLG_WAITING (((erts_aint32_t) 1) << 3)
#define ERTS_SSI_FLG_SUSPENDED (((erts_aint32_t) 1) << 4)
+#define ERTS_SSI_FLGS_MAX 5
+
#define ERTS_SSI_FLGS_SLEEP_TYPE \
(ERTS_SSI_FLG_TSE_SLEEPING|ERTS_SSI_FLG_POLL_SLEEPING)
@@ -283,6 +287,8 @@ typedef enum {
#define ERTS_SSI_AUX_WORK_MSEG_CACHE_CHECK (((erts_aint32_t) 1) << 12)
#define ERTS_SSI_AUX_WORK_REAP_PORTS (((erts_aint32_t) 1) << 13)
+#define ERTS_SSI_AUX_WORK_MAX 14
+
typedef struct ErtsSchedulerSleepInfo_ ErtsSchedulerSleepInfo;
#ifdef ERTS_DIRTY_SCHEDULERS
@@ -1081,6 +1087,9 @@ void erts_check_for_holes(Process* p);
#define ERTS_PSFLG_DIRTY_IO_PROC ERTS_PSFLG_BIT(19)
#define ERTS_PSFLG_DIRTY_CPU_PROC_IN_Q ERTS_PSFLG_BIT(20)
#define ERTS_PSFLG_DIRTY_IO_PROC_IN_Q ERTS_PSFLG_BIT(21)
+#define ERTS_PSFLG_MAX (ERTS_PSFLGS_ZERO_BIT_OFFSET + 22)
+#else
+#define ERTS_PSFLG_MAX (ERTS_PSFLGS_ZERO_BIT_OFFSET + 18)
#endif
#define ERTS_PSFLGS_IN_PRQ_MASK (ERTS_PSFLG_IN_PRQ_MAX \
@@ -1616,7 +1625,11 @@ void erts_cleanup_empty_process(Process* p);
void erts_debug_verify_clean_empty_process(Process* p);
#endif
void erts_stack_dump(int to, void *to_arg, Process *);
+void erts_limited_stack_trace(int to, void *to_arg, Process *);
void erts_program_counter_info(int to, void *to_arg, Process *);
+void erts_print_scheduler_info(int to, void *to_arg, ErtsSchedulerData *esdp);
+void erts_dump_extended_process_state(int to, void *to_arg, erts_aint32_t psflg);
+void erts_dump_process_state(int to, void *to_arg, erts_aint32_t psflg);
Eterm erts_get_process_priority(Process *p);
Eterm erts_set_process_priority(Process *p, Eterm prio);
diff --git a/erts/emulator/beam/erl_process_dump.c b/erts/emulator/beam/erl_process_dump.c
index 2f3cf23b00..36bb6b2f0e 100644
--- a/erts/emulator/beam/erl_process_dump.c
+++ b/erts/emulator/beam/erl_process_dump.c
@@ -43,8 +43,9 @@ static void dump_process_info(int to, void *to_arg, Process *p);
static void dump_element(int to, void *to_arg, Eterm x);
static void dump_dist_ext(int to, void *to_arg, ErtsDistExternal *edep);
static void dump_element_nl(int to, void *to_arg, Eterm x);
-static int stack_element_dump(int to, void *to_arg, Process* p, Eterm* sp,
+static int stack_element_dump(int to, void *to_arg, Eterm* sp,
int yreg);
+static void stack_trace_dump(int to, void *to_arg, Eterm* sp);
static void print_function_from_pc(int to, void *to_arg, BeamInstr* x);
static void heap_dump(int to, void *to_arg, Eterm x);
static void dump_binaries(int to, void *to_arg, Binary* root);
@@ -148,7 +149,7 @@ dump_process_info(int to, void *to_arg, Process *p)
if ((ERTS_TRACE_FLAGS(p) & F_SENSITIVE) == 0) {
erts_print(to, to_arg, "=proc_stack:%T\n", p->common.id);
for (sp = p->stop; sp < STACK_START(p); sp++) {
- yreg = stack_element_dump(to, to_arg, p, sp, yreg);
+ yreg = stack_element_dump(to, to_arg, sp, yreg);
}
erts_print(to, to_arg, "=proc_heap:%T\n", p->common.id);
@@ -243,9 +244,65 @@ dump_element_nl(int to, void *to_arg, Eterm x)
erts_putc(to, to_arg, '\n');
}
+static void
+stack_trace_dump(int to, void *to_arg, Eterm *sp) {
+ Eterm x = *sp;
+ if (is_CP(x)) {
+ erts_print(to, to_arg, "%p:", sp);
+ erts_print(to, to_arg, "SReturn addr 0x%X (", cp_val(x));
+ print_function_from_pc(to, to_arg, cp_val(x));
+ erts_print(to, to_arg, ")\n");
+ }
+}
+
+void
+erts_limited_stack_trace(int to, void *to_arg, Process *p)
+{
+ Eterm* sp;
+
+
+ if (ERTS_TRACE_FLAGS(p) & F_SENSITIVE) {
+ return;
+ }
+
+ if (STACK_START(p) < STACK_TOP(p)) {
+ return;
+ }
+
+ if ((STACK_START(p) - STACK_TOP(p)) < 512) {
+ if (erts_sys_is_area_readable((char*)STACK_TOP(p),
+ (char*)STACK_START(p)))
+ for (sp = STACK_TOP(p); sp < STACK_START(p); sp++)
+ stack_trace_dump(to, to_arg, sp);
+ else
+ erts_print(to, to_arg, "Could not read from stack memory: %p - %p\n",
+ STACK_TOP(p), STACK_START(p));
+ } else {
+ sp = STACK_TOP(p);
+ if (erts_sys_is_area_readable((char*)STACK_TOP(p),
+ (char*)(STACK_TOP(p) + 25)))
+ for (; sp < (STACK_TOP(p) + 256); sp++)
+ stack_trace_dump(to, to_arg, sp);
+ else
+ erts_print(to, to_arg, "Could not read from stack memory: %p - %p\n",
+ STACK_TOP(p), STACK_TOP(p) + 256);
+
+ erts_print(to, to_arg, "%p: skipping %d frames\n",
+ sp, STACK_START(p) - STACK_TOP(p) - 512);
+
+ if (erts_sys_is_area_readable((char*)(STACK_START(p) - 256),
+ (char*)STACK_START(p)))
+ for (sp = STACK_START(p) - 256; sp < STACK_START(p); sp++)
+ stack_trace_dump(to, to_arg, sp);
+ else
+ erts_print(to, to_arg, "Could not read from stack memory: %p - %p\n",
+ STACK_START(p) - 256, STACK_START(p));
+ }
+
+}
static int
-stack_element_dump(int to, void *to_arg, Process* p, Eterm* sp, int yreg)
+stack_element_dump(int to, void *to_arg, Eterm* sp, int yreg)
{
Eterm x = *sp;
@@ -508,3 +565,114 @@ dump_externally(int to, void *to_arg, Eterm term)
erts_print(to, to_arg, "%02X", *s++);
}
}
+
+void erts_dump_process_state(int to, void *to_arg, erts_aint32_t psflg) {
+ if (psflg & ERTS_PSFLG_FREE)
+ erts_print(to, to_arg, "Non Existing\n"); /* Should never happen */
+ else if (psflg & ERTS_PSFLG_EXITING)
+ erts_print(to, to_arg, "Exiting\n");
+ else if (psflg & ERTS_PSFLG_GC) {
+ erts_print(to, to_arg, "Garbing\n");
+ }
+ else if (psflg & ERTS_PSFLG_SUSPENDED)
+ erts_print(to, to_arg, "Suspended\n");
+ else if (psflg & ERTS_PSFLG_RUNNING) {
+ erts_print(to, to_arg, "Running\n");
+ }
+ else if (psflg & ERTS_PSFLG_ACTIVE)
+ erts_print(to, to_arg, "Scheduled\n");
+ else
+ erts_print(to, to_arg, "Waiting\n");
+}
+
+void
+erts_dump_extended_process_state(int to, void *to_arg, erts_aint32_t psflg) {
+
+ int i;
+
+ switch (ERTS_PSFLGS_GET_ACT_PRIO(psflg)) {
+ case PRIORITY_MAX: erts_print(to, to_arg, "ACT_PRIO_MAX | "); break;
+ case PRIORITY_HIGH: erts_print(to, to_arg, "ACT_PRIO_HIGH | "); break;
+ case PRIORITY_NORMAL: erts_print(to, to_arg, "ACT_PRIO_NORMAL | "); break;
+ case PRIORITY_LOW: erts_print(to, to_arg, "ACT_PRIO_LOW | "); break;
+ }
+ switch (ERTS_PSFLGS_GET_USR_PRIO(psflg)) {
+ case PRIORITY_MAX: erts_print(to, to_arg, "USR_PRIO_MAX | "); break;
+ case PRIORITY_HIGH: erts_print(to, to_arg, "USR_PRIO_HIGH | "); break;
+ case PRIORITY_NORMAL: erts_print(to, to_arg, "USR_PRIO_NORMAL | "); break;
+ case PRIORITY_LOW: erts_print(to, to_arg, "USR_PRIO_LOW | "); break;
+ }
+ switch (ERTS_PSFLGS_GET_PRQ_PRIO(psflg)) {
+ case PRIORITY_MAX: erts_print(to, to_arg, "PRQ_PRIO_MAX"); break;
+ case PRIORITY_HIGH: erts_print(to, to_arg, "PRQ_PRIO_HIGH"); break;
+ case PRIORITY_NORMAL: erts_print(to, to_arg, "PRQ_PRIO_NORMAL"); break;
+ case PRIORITY_LOW: erts_print(to, to_arg, "PRQ_PRIO_LOW"); break;
+ }
+
+ psflg &= ~(ERTS_PSFLGS_ACT_PRIO_MASK |
+ ERTS_PSFLGS_USR_PRIO_MASK |
+ ERTS_PSFLGS_PRQ_PRIO_MASK);
+
+ if (psflg)
+ erts_print(to, to_arg, " | ");
+
+ for (i = 0; i < ERTS_PSFLG_MAX && psflg; i++) {
+ erts_aint32_t chk = (1 << i);
+ if (psflg & chk) {
+ switch (chk) {
+ case ERTS_PSFLG_IN_PRQ_MAX:
+ erts_print(to, to_arg, "IN_PRQ_MAX"); break;
+ case ERTS_PSFLG_IN_PRQ_HIGH:
+ erts_print(to, to_arg, "IN_PRQ_HIGH"); break;
+ case ERTS_PSFLG_IN_PRQ_NORMAL:
+ erts_print(to, to_arg, "IN_PRQ_NORMAL"); break;
+ case ERTS_PSFLG_IN_PRQ_LOW:
+ erts_print(to, to_arg, "IN_PRQ_LOW"); break;
+ case ERTS_PSFLG_FREE:
+ erts_print(to, to_arg, "FREE"); break;
+ case ERTS_PSFLG_EXITING:
+ erts_print(to, to_arg, "EXITING"); break;
+ case ERTS_PSFLG_PENDING_EXIT:
+ erts_print(to, to_arg, "PENDING_EXIT"); break;
+ case ERTS_PSFLG_ACTIVE:
+ erts_print(to, to_arg, "ACTIVE"); break;
+ case ERTS_PSFLG_IN_RUNQ:
+ erts_print(to, to_arg, "IN_RUNQ"); break;
+ case ERTS_PSFLG_RUNNING:
+ erts_print(to, to_arg, "RUNNING"); break;
+ case ERTS_PSFLG_SUSPENDED:
+ erts_print(to, to_arg, "SUSPENDED"); break;
+ case ERTS_PSFLG_GC:
+ erts_print(to, to_arg, "GC"); break;
+ case ERTS_PSFLG_BOUND:
+ erts_print(to, to_arg, "BOUND"); break;
+ case ERTS_PSFLG_TRAP_EXIT:
+ erts_print(to, to_arg, "TRAP_EXIT"); break;
+ case ERTS_PSFLG_ACTIVE_SYS:
+ erts_print(to, to_arg, "ACTIVE_SYS"); break;
+ case ERTS_PSFLG_RUNNING_SYS:
+ erts_print(to, to_arg, "RUNNING_SYS"); break;
+ case ERTS_PSFLG_PROXY:
+ erts_print(to, to_arg, "PROXY"); break;
+ case ERTS_PSFLG_DELAYED_SYS:
+ erts_print(to, to_arg, "DELAYED_SYS"); break;
+#ifdef ERTS_DIRTY_SCHEDULERS
+ case ERTS_PSFLG_DIRTY_CPU_PROC:
+ erts_print(to, to_arg, "DIRTY_CPU_PROC"); break;
+ case ERTS_PSFLG_DIRTY_IO_PROC:
+ erts_print(to, to_arg, "DIRTY_IO_PROC"); break;
+ case ERTS_PSFLG_DIRTY_CPU_PROC_IN_Q:
+ erts_print(to, to_arg, "DIRTY_CPU_PROC_IN_Q"); break;
+ case ERTS_PSFLG_DIRTY_IO_PROC_IN_Q:
+ erts_print(to, to_arg, "DIRTY_IO_PROC_IN_Q"); break;
+#endif
+ default:
+ erts_print(to, to_arg, "UNKNOWN(%d)", chk); break;
+ }
+ if (psflg > chk)
+ erts_print(to, to_arg, " | ");
+ psflg -= chk;
+ }
+ }
+ erts_print(to, to_arg, "\n");
+}
diff --git a/erts/emulator/beam/erl_thr_progress.c b/erts/emulator/beam/erl_thr_progress.c
index 545a0343d0..3dca2aba10 100644
--- a/erts/emulator/beam/erl_thr_progress.c
+++ b/erts/emulator/beam/erl_thr_progress.c
@@ -1381,25 +1381,10 @@ erts_thr_progress_block(void)
thr_progress_block(tmp_thr_prgr_data(NULL), 1);
}
-void
-erts_thr_progress_fatal_error_block(SWord timeout,
- ErtsThrPrgrData *tmp_tpd_bufp)
+int
+erts_thr_progress_fatal_error_block(ErtsThrPrgrData *tmp_tpd_bufp)
{
ErtsThrPrgrData *tpd = perhaps_thr_prgr_data(NULL);
- erts_aint32_t bc;
- SWord time_left = timeout;
- SysTimeval to;
-
- /*
- * Counting poll intervals may give us a too long timeout
- * if cpu is busy. If we got tolerant time of day we use it
- * to prevent this.
- */
- if (!erts_disable_tolerant_timeofday) {
- erts_get_timeval(&to);
- to.tv_sec += timeout / 1000;
- to.tv_sec += timeout % 1000;
- }
if (!tpd) {
/*
@@ -1412,9 +1397,26 @@ erts_thr_progress_fatal_error_block(SWord timeout,
init_tmp_thr_prgr_data(tpd);
}
- bc = thr_progress_block(tpd, 0);
- if (bc == 0)
- return; /* Succefully blocked all managed threads */
+ /* Returns number of threads that have not yes been blocked */
+ return thr_progress_block(tpd, 0);
+}
+
+void
+erts_thr_progress_fatal_error_wait(SWord timeout) {
+ erts_aint32_t bc;
+ SWord time_left = timeout;
+ SysTimeval to;
+
+ /*
+ * Counting poll intervals may give us a too long timeout
+ * if cpu is busy. If we got tolerant time of day we use it
+ * to prevent this.
+ */
+ if (!erts_disable_tolerant_timeofday) {
+ erts_get_timeval(&to);
+ to.tv_sec += timeout / 1000;
+ to.tv_sec += timeout % 1000;
+ }
while (1) {
if (erts_milli_sleep(ERTS_THR_PRGR_FTL_ERR_BLCK_POLL_INTERVAL) == 0)
diff --git a/erts/emulator/beam/erl_thr_progress.h b/erts/emulator/beam/erl_thr_progress.h
index 5f392944c2..85b4eeb0eb 100644
--- a/erts/emulator/beam/erl_thr_progress.h
+++ b/erts/emulator/beam/erl_thr_progress.h
@@ -83,8 +83,8 @@ typedef struct {
ErtsThrPrgrLeaderState leader_state;
} ErtsThrPrgrData;
-void erts_thr_progress_fatal_error_block(SWord timeout,
- ErtsThrPrgrData *tmp_tpd_bufp);
+int erts_thr_progress_fatal_error_block(ErtsThrPrgrData *tmp_tpd_bufp);
+void erts_thr_progress_fatal_error_wait(SWord timeout);
#endif /* ERTS_SMP */
diff --git a/erts/emulator/beam/sys.h b/erts/emulator/beam/sys.h
index bdc5237815..9227ce74a2 100644
--- a/erts/emulator/beam/sys.h
+++ b/erts/emulator/beam/sys.h
@@ -756,6 +756,8 @@ typedef struct {
} ErtsCheckIoDebugInfo;
int erts_check_io_debug(ErtsCheckIoDebugInfo *ip);
+int erts_sys_is_area_readable(char *start, char *stop);
+
/* xxxP */
#define SYS_DEFAULT_FLOAT_DECIMALS 20
void init_sys_float(void);
diff --git a/erts/emulator/sys/unix/erl_unix_sys.h b/erts/emulator/sys/unix/erl_unix_sys.h
index bc55fab8fb..1932e97742 100644
--- a/erts/emulator/sys/unix/erl_unix_sys.h
+++ b/erts/emulator/sys/unix/erl_unix_sys.h
@@ -45,7 +45,7 @@
#include <fcntl.h>
#include "erl_errno.h"
#include <signal.h>
-
+#include <setjmp.h>
#if HAVE_SYS_SOCKETIO_H
# include <sys/socketio.h>
@@ -349,4 +349,28 @@ extern int exit_async(void);
#define ERTS_EXIT_AFTER_DUMP _exit
+#if !defined(__APPLE__) && !defined(__MACH__)
+/* Some OS X versions do not allow (ab)using signal handlers like this */
+#define ERTS_HAVE_TRY_CATCH 1
+
+/* We try to simulate a try catch in C with the help of signal handlers.
+ * Only use this as a very last resort, as it is not very portable and
+ * quite unstable. It is also not thread safe, so make sure that only
+ * one thread can call this at a time!
+ */
+extern void erts_sys_sigsegv_handler(int);
+extern jmp_buf erts_sys_sigsegv_jmp;
+#define ERTS_SYS_TRY_CATCH(EXPR,CATCH) \
+ do { \
+ SIGFUNC prev_handler = sys_signal(SIGSEGV, \
+ erts_sys_sigsegv_handler); \
+ if (!setjmp(erts_sys_sigsegv_jmp)) { \
+ EXPR; \
+ } else { \
+ CATCH; \
+ } \
+ sys_signal(SIGSEGV,prev_handler); \
+ } while(0)
+#endif
+
#endif /* #ifndef _ERL_UNIX_SYS_H */
diff --git a/erts/emulator/sys/unix/sys.c b/erts/emulator/sys/unix/sys.c
index 74d67cbd57..57f9fbef53 100644
--- a/erts/emulator/sys/unix/sys.c
+++ b/erts/emulator/sys/unix/sys.c
@@ -223,6 +223,8 @@ static int sig_suspend_fds[2] = {-1, -1};
static int async_fd[2];
#endif
+jmp_buf erts_sys_sigsegv_jmp;
+
#if CHLDWTHR || defined(ERTS_SMP)
erts_mtx_t chld_stat_mtx;
#endif
@@ -683,6 +685,35 @@ void sys_sigrelease(int sig)
sigprocmask(SIG_UNBLOCK, &mask, (sigset_t *)NULL);
}
+void erts_sys_sigsegv_handler(int signo) {
+ if (signo == SIGSEGV) {
+ longjmp(erts_sys_sigsegv_jmp, 1);
+ }
+}
+
+/*
+ * Function returns 1 if we can read from all values in between
+ * start and stop.
+ */
+int
+erts_sys_is_area_readable(char *start, char *stop) {
+ int fds[2];
+ if (!pipe(fds)) {
+ /* We let write try to figure out if the pointers are readable */
+ int res = write(fds[1], start, (char*)stop - (char*)start);
+ if (res == -1) {
+ close(fds[0]);
+ close(fds[1]);
+ return 0;
+ }
+ close(fds[0]);
+ close(fds[1]);
+ return 1;
+ }
+ return 0;
+
+}
+
static ERTS_INLINE int
prepare_crash_dump(int secs)
{
diff --git a/erts/emulator/sys/win32/erl_win_sys.h b/erts/emulator/sys/win32/erl_win_sys.h
index 838f0c61eb..fde32c8684 100644
--- a/erts/emulator/sys/win32/erl_win_sys.h
+++ b/erts/emulator/sys/win32/erl_win_sys.h
@@ -236,4 +236,16 @@ typedef long ssize_t;
int init_async(int);
int exit_async(void);
#endif
+
+#define ERTS_HAVE_TRY_CATCH 1
+
+#define ERTS_SYS_TRY_CATCH(EXPR,CATCH) \
+ __try { \
+ EXPR; \
+ } \
+ __except(GetExceptionCode() == EXCEPTION_ACCESS_VIOLATION ? EXCEPTION_EXECUTE_HANDLER : EXCEPTION_CONTINUE_SEARCH) \
+ { \
+ CATCH; \
+ }
+
#endif
diff --git a/erts/emulator/sys/win32/sys.c b/erts/emulator/sys/win32/sys.c
index 164ef95629..5d51659b4e 100644
--- a/erts/emulator/sys/win32/sys.c
+++ b/erts/emulator/sys/win32/sys.c
@@ -247,6 +247,27 @@ void erl_sys_args(int* argc, char** argv)
#endif
}
+/*
+ * Function returns 1 if we can read from all values in between
+ * start and stop.
+ */
+int
+erts_sys_is_area_readable(char *start, char *stop) {
+ volatile char tmp;
+ __try
+ {
+ while(start < stop) {
+ tmp = *start;
+ start++;
+ }
+ }
+ __except(EXCEPTION_EXECUTE_HANDLER)
+ {
+ return 0;
+ }
+ return 1;
+}
+
int erts_sys_prepare_crash_dump(int secs)
{
Port *heart_port;
diff --git a/erts/emulator/test/bif_SUITE.erl b/erts/emulator/test/bif_SUITE.erl
index fbc229bc53..fc9bdae0a0 100644
--- a/erts/emulator/test/bif_SUITE.erl
+++ b/erts/emulator/test/bif_SUITE.erl
@@ -20,6 +20,7 @@
-module(bif_SUITE).
-include_lib("test_server/include/test_server.hrl").
+-include_lib("kernel/include/file.hrl").
-export([all/0, suite/0,groups/0,init_per_suite/1, end_per_suite/1,
init_per_group/2,end_per_group/2,
@@ -681,8 +682,38 @@ erlang_halt(Config) when is_list(Config) ->
{badrpc,nodedown} = rpc:call(N2, erlang, halt, [0]),
{ok,N3} = slave:start(H, halt_node3),
{badrpc,nodedown} = rpc:call(N3, erlang, halt, [0,[]]),
- ok.
+ % This test triggers a segfault when dumping a crash dump
+ % to make sure that we can handle it properly.
+ {ok,N4} = slave:start(H, halt_node4),
+ CrashDump = filename:join(proplists:get_value(priv_dir,Config),
+ "segfault_erl_crash.dump"),
+ true = rpc:call(N4, os, putenv, ["ERL_CRASH_DUMP",CrashDump]),
+ false = rpc:call(N4, erts_debug, set_internal_state,
+ [available_internal_state, true]),
+ {badrpc,nodedown} = rpc:call(N4, erts_debug, set_internal_state,
+ [broken_halt, "Validate correct crash dump"]),
+ ok = wait_until_stable_size(CrashDump,-1),
+ {ok, Bin} = file:read_file(CrashDump),
+ case {string:str(binary_to_list(Bin),"\n=end\n"),
+ string:str(binary_to_list(Bin),"\r\n=end\r\n")} of
+ {0,0} -> ct:fail("Could not find end marker in crash dump");
+ _ -> ok
+ end.
+
+wait_until_stable_size(_File,-10) ->
+ {error,enoent};
+wait_until_stable_size(File,PrevSz) ->
+ timer:sleep(250),
+ case file:read_file_info(File) of
+ {error,enoent} ->
+ wait_until_stable_size(File,PrevSz-1);
+ {ok,#file_info{size = PrevSz }} when PrevSz /= -1 ->
+ io:format("Crashdump file size was: ~p (~s)~n",[PrevSz,File]),
+ ok;
+ {ok,#file_info{size = NewSz }} ->
+ wait_until_stable_size(File,NewSz)
+ end.
%% Helpers