From 6aa87d58b756ef65650ee793ad4ece8add7b70fb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B6rn-Egil=20Dahlberg?=
Date: Tue, 2 Oct 2012 18:44:03 +0200
Subject: erts, heart: Ensure erl_crash.dump is written
When a crash dump is about to be written and we have
heartbeat enabled on a system. We need time to write it
before heart explicitly kills the beam.
---
erts/emulator/beam/atom.names | 1 +
erts/emulator/beam/global.h | 3 +
erts/emulator/beam/io.c | 21 +++++++
erts/emulator/sys/unix/sys.c | 46 ++++++++++++----
erts/emulator/sys/win32/sys.c | 16 +++++-
erts/etc/common/heart.c | 124 ++++++++++++++++++++++--------------------
6 files changed, 138 insertions(+), 73 deletions(-)
(limited to 'erts')
diff --git a/erts/emulator/beam/atom.names b/erts/emulator/beam/atom.names
index 106fad030b..afcbd732df 100644
--- a/erts/emulator/beam/atom.names
+++ b/erts/emulator/beam/atom.names
@@ -252,6 +252,7 @@ atom heap_block_size
atom heap_size
atom heap_sizes
atom heap_type
+atom heart_port
atom heir
atom hidden
atom hide
diff --git a/erts/emulator/beam/global.h b/erts/emulator/beam/global.h
index dbf95f5bd7..2c20e3da3b 100755
--- a/erts/emulator/beam/global.h
+++ b/erts/emulator/beam/global.h
@@ -983,6 +983,9 @@ Uint erts_port_ioq_size(Port *pp);
void erts_stale_drv_select(Eterm, ErlDrvEvent, int, int);
void erts_port_cleanup(Port *);
void erts_fire_port_monitor(Port *prt, Eterm ref);
+
+Port *erts_get_heart_port(void);
+
#ifdef ERTS_SMP
void erts_smp_xports_unlock(Port *);
#endif
diff --git a/erts/emulator/beam/io.c b/erts/emulator/beam/io.c
index 35b194f927..502ded4eca 100644
--- a/erts/emulator/beam/io.c
+++ b/erts/emulator/beam/io.c
@@ -5235,3 +5235,24 @@ erl_drv_getenv(char *key, char *value, size_t *value_size)
{
return erts_sys_getenv_raw(key, value, value_size);
}
+
+/* get heart_port
+ * used by erl_crash_dump
+ * - uses the fact that heart_port is registered when starting heart
+ */
+
+Port *erts_get_heart_port() {
+
+ Port* port;
+ Uint ix;
+
+ for(ix = 0; ix < erts_max_ports; ix++) {
+ port = &erts_port[ix];
+ /* immediate compare */
+ if (port->reg && port->reg->name == am_heart_port) {
+ return port;
+ }
+ }
+
+ return NULL;
+}
diff --git a/erts/emulator/sys/unix/sys.c b/erts/emulator/sys/unix/sys.c
index c1fa00b4ea..37dfcb1dd4 100644
--- a/erts/emulator/sys/unix/sys.c
+++ b/erts/emulator/sys/unix/sys.c
@@ -58,7 +58,6 @@
#define __DARWIN__ 1
#endif
-
#ifdef USE_THREADS
#include "erl_threads.h"
#endif
@@ -71,7 +70,6 @@ static erts_smp_rwmtx_t environ_rwmtx;
#define MAX_VSIZE 16 /* Max number of entries allowed in an I/O
* vector sock_sendv().
*/
-
/*
* Don't need global.h, but bif_table.h (included by bif.h),
* won't compile otherwise
@@ -123,6 +121,15 @@ struct ErtsSysReportExit_ {
#endif
};
+/* This data is shared by these drivers - initialized by spawn_init() */
+static struct driver_data {
+ int port_num, ofd, packet_bytes;
+ ErtsSysReportExit *report_exit;
+ int pid;
+ int alive;
+ int status;
+} *driver_data; /* indexed by fd */
+
static ErtsSysReportExit *report_exit_list;
#if CHLDWTHR && !defined(ERTS_SMP)
static ErtsSysReportExit *report_exit_transit_list;
@@ -685,12 +692,33 @@ prepare_crash_dump(void)
int i, max;
char env[21]; /* enough to hold any 64-bit integer */
size_t envsz;
+ Port *heart_port;
+ Eterm heap[3];
+ Eterm *hp = heap;
+ Eterm list = NIL;
+
+ int heart_fd[2] = {-1,-1};
if (ERTS_PREPARED_CRASH_DUMP)
return; /* We have already been called */
+ heart_port = erts_get_heart_port();
+ if (heart_port) {
+ /* hearts input fd
+ * We "know" drv_data is the in_fd since the port is started with read|write
+ */
+ heart_fd[0] = (int)heart_port->drv_data;
+ heart_fd[1] = (int)driver_data[heart_fd[0]].ofd;
+
+ list = CONS(hp, make_small(8), list); hp += 2;
+
+ /* send to heart port, CMD = 8, i.e. prepare crash dump =o */
+ erts_write_to_port(NIL, heart_port, list);
+ }
+
/* Make sure we unregister at epmd (unknown fd) and get at least
one free filedescriptor (for erl_crash.dump) */
+
max = max_files;
if (max < 1024)
max = 1024;
@@ -704,6 +732,10 @@ prepare_crash_dump(void)
if (i == async_fd[0] || i == async_fd[1])
continue;
#endif
+ /* We don't want to close our heart yet ... */
+ if (i == heart_fd[0] || i == heart_fd[1])
+ continue;
+
close(i);
}
@@ -725,7 +757,6 @@ prepare_crash_dump(void)
sec = (unsigned) i != 0 ? 0 : atoi(env);
alarm(sec);
}
-
}
void
@@ -1021,15 +1052,6 @@ void fini_getenv_state(GETENV_STATE *state)
#define ERTS_SYS_READ_BUF_SZ (64*1024)
-/* This data is shared by these drivers - initialized by spawn_init() */
-static struct driver_data {
- int port_num, ofd, packet_bytes;
- ErtsSysReportExit *report_exit;
- int pid;
- int alive;
- int status;
-} *driver_data; /* indexed by fd */
-
/* Driver interfaces */
static ErlDrvData spawn_start(ErlDrvPort, char*, SysDriverOpts*);
static ErlDrvData fd_start(ErlDrvPort, char*, SysDriverOpts*);
diff --git a/erts/emulator/sys/win32/sys.c b/erts/emulator/sys/win32/sys.c
index 6894d682e7..c5664d8e8a 100755
--- a/erts/emulator/sys/win32/sys.c
+++ b/erts/emulator/sys/win32/sys.c
@@ -258,8 +258,22 @@ void erl_sys_args(int* argc, char** argv)
void
erts_sys_prepare_crash_dump(void)
{
+ Port *heart_port;
+ Eterm heap[3];
+ Eterm *hp = heap;
+ Eterm list = NIL;
+
+ heart_port = erts_get_heart_port();
+
+ if (heart_port) {
+
+ list = CONS(hp, make_small(8), list); hp += 2;
+
+ /* send to heart port, CMD = 8, i.e. prepare crash dump =o */
+ erts_write_to_port(NIL, heart_port, list);
+ }
+
/* Windows - free file descriptors are hopefully available */
- return;
}
static void
diff --git a/erts/etc/common/heart.c b/erts/etc/common/heart.c
index 70c2b3bb23..c6ce017d8c 100644
--- a/erts/etc/common/heart.c
+++ b/erts/etc/common/heart.c
@@ -153,13 +153,14 @@ struct msg {
};
/* operations */
-#define HEART_ACK 1
-#define HEART_BEAT 2
-#define SHUT_DOWN 3
-#define SET_CMD 4
-#define CLEAR_CMD 5
-#define GET_CMD 6
-#define HEART_CMD 7
+#define HEART_ACK (1)
+#define HEART_BEAT (2)
+#define SHUT_DOWN (3)
+#define SET_CMD (4)
+#define CLEAR_CMD (5)
+#define GET_CMD (6)
+#define HEART_CMD (7)
+#define PREPARING_CRASH (8)
/* Maybe interesting to change */
@@ -187,10 +188,11 @@ unsigned long heart_beat_kill_pid = 0;
#define SOL_WD_TIMEOUT (heart_beat_timeout+heart_beat_boot_delay)
/* reasons for reboot */
-#define R_TIMEOUT 1
-#define R_CLOSED 2
-#define R_ERROR 3
-#define R_SHUT_DOWN 4
+#define R_TIMEOUT (1)
+#define R_CLOSED (2)
+#define R_ERROR (3)
+#define R_SHUT_DOWN (4)
+#define R_CRASHING (5) /* Doing a crash dump and we will wait for it */
/* macros */
@@ -200,8 +202,8 @@ unsigned long heart_beat_kill_pid = 0;
/* prototypes */
-static int message_loop(int,int);
-static void do_terminate(int);
+static int message_loop(int, int);
+static void do_terminate(int, int);
static int notify_ack(int);
static int heart_cmd_reply(int, char *);
static int write_message(int, struct msg *);
@@ -376,7 +378,7 @@ main(int argc, char **argv)
program_name[sizeof(program_name)-1] = '\0';
notify_ack(erlout_fd);
cmd[0] = '\0';
- do_terminate(message_loop(erlin_fd,erlout_fd));
+ do_terminate(erlin_fd,message_loop(erlin_fd,erlout_fd));
return 0;
}
@@ -410,6 +412,7 @@ message_loop(erlin_fd, erlout_fd)
#endif
while (1) {
+ /* REFACTOR: below to select/tmo function */
#ifdef __WIN32__
wresult = WaitForSingleObject(hevent_dataready,SELECT_TIMEOUT*1000+ 2);
if (wresult == WAIT_FAILED) {
@@ -504,6 +507,10 @@ message_loop(erlin_fd, erlout_fd)
free_env_val(env);
}
break;
+ case PREPARING_CRASH:
+ /* Erlang has reached a crushdump point (is crashing for sure) */
+ print_error("Erlang is crashing .. (waiting for crash dump file)");
+ return R_CRASHING;
default:
/* ignore all other messages */
break;
@@ -635,69 +642,66 @@ void win_system(char *command)
* do_terminate
*/
static void
-do_terminate(reason)
- int reason;
-{
+do_terminate(int erlin_fd, int reason) {
/*
When we get here, we have HEART_BEAT_BOOT_DELAY secs to finish
(plus heart_beat_report_delay if under VxWorks), so we don't need
to call wd_reset().
*/
-
+ struct msg message;
+
switch (reason) {
case R_SHUT_DOWN:
break;
+ case R_CRASHING:
+ print_error("Waiting for dump");
+ read_message(erlin_fd, &message); /* read until we get something */
case R_TIMEOUT:
- case R_ERROR:
case R_CLOSED:
+ case R_ERROR:
default:
-#if defined(__WIN32__) /* Not VxWorks */
{
- if(!cmd[0]) {
- char *command = get_env(HEART_COMMAND_ENV);
- if(!command)
- print_error("Would reboot. Terminating.");
- else {
- kill_old_erlang();
- /* High prio combined with system() works badly indeed... */
- SetPriorityClass(GetCurrentProcess(), NORMAL_PRIORITY_CLASS);
- win_system(command);
- print_error("Executed \"%s\". Terminating.",command);
+#if defined(__WIN32__) /* Not VxWorks */
+ if(!cmd[0]) {
+ char *command = get_env(HEART_COMMAND_ENV);
+ if(!command)
+ print_error("Would reboot. Terminating.");
+ else {
+ kill_old_erlang();
+ /* High prio combined with system() works badly indeed... */
+ SetPriorityClass(GetCurrentProcess(), NORMAL_PRIORITY_CLASS);
+ win_system(command);
+ print_error("Executed \"%s\". Terminating.",command);
+ }
+ free_env_val(command);
+ } else {
+ kill_old_erlang();
+ /* High prio combined with system() works badly indeed... */
+ SetPriorityClass(GetCurrentProcess(), NORMAL_PRIORITY_CLASS);
+ win_system(&cmd[0]);
+ print_error("Executed \"%s\". Terminating.",cmd);
}
- free_env_val(command);
- }
- else {
- kill_old_erlang();
- /* High prio combined with system() works badly indeed... */
- SetPriorityClass(GetCurrentProcess(), NORMAL_PRIORITY_CLASS);
- win_system(&cmd[0]);
- print_error("Executed \"%s\". Terminating.",cmd);
- }
- }
-
#else
- {
- if(!cmd[0]) {
- char *command = get_env(HEART_COMMAND_ENV);
- if(!command)
- print_error("Would reboot. Terminating.");
- else {
- kill_old_erlang();
- /* suppress gcc warning with 'if' */
- if(system(command));
- print_error("Executed \"%s\". Terminating.",command);
+ if(!cmd[0]) {
+ char *command = get_env(HEART_COMMAND_ENV);
+ if(!command)
+ print_error("Would reboot. Terminating.");
+ else {
+ kill_old_erlang();
+ /* suppress gcc warning with 'if' */
+ if(system(command));
+ print_error("Executed \"%s\". Terminating.",command);
+ }
+ free_env_val(command);
+ } else {
+ kill_old_erlang();
+ /* suppress gcc warning with 'if' */
+ if(system((char*)&cmd[0]));
+ print_error("Executed \"%s\". Terminating.",cmd);
}
- free_env_val(command);
- }
- else {
- kill_old_erlang();
- /* suppress gcc warning with 'if' */
- if(system((char*)&cmd[0]));
- print_error("Executed \"%s\". Terminating.",cmd);
- }
+#endif
}
break;
-#endif
} /* switch(reason) */
}
--
cgit v1.2.3
From 70c9312c4bd8832c31094dd076364077f0a684c5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B6rn-Egil=20Dahlberg?=
Date: Wed, 10 Oct 2012 15:09:30 +0200
Subject: heart: Refactor heart debugging
---
erts/etc/common/heart.c | 35 ++++++++++++++++++++---------------
1 file changed, 20 insertions(+), 15 deletions(-)
(limited to 'erts')
diff --git a/erts/etc/common/heart.c b/erts/etc/common/heart.c
index c6ce017d8c..dcb83c33ac 100644
--- a/erts/etc/common/heart.c
+++ b/erts/etc/common/heart.c
@@ -352,12 +352,14 @@ static void get_arguments(int argc, char** argv) {
debugf("arguments -ht %d -wt %d -pid %lu\n",h,w,p);
}
-int
-main(int argc, char **argv)
-{
+int main(int argc, char **argv) {
+
+ if (is_env_set("HEART_DEBUG")) {
+ fprintf(stderr, "heart: debug is ON!\r\n");
+ debug_on = 1;
+ }
+
get_arguments(argc,argv);
- if (is_env_set("HEART_DEBUG"))
- debug_on=1;
#ifdef __WIN32__
if (debug_on) {
if(!is_env_set("ERLSRV_SERVICE_NAME")) {
@@ -649,6 +651,7 @@ do_terminate(int erlin_fd, int reason) {
to call wd_reset().
*/
struct msg message;
+ int ret = 0;
switch (reason) {
case R_SHUT_DOWN:
@@ -656,6 +659,7 @@ do_terminate(int erlin_fd, int reason) {
case R_CRASHING:
print_error("Waiting for dump");
read_message(erlin_fd, &message); /* read until we get something */
+ /* fall through */
case R_TIMEOUT:
case R_CLOSED:
case R_ERROR:
@@ -689,15 +693,15 @@ do_terminate(int erlin_fd, int reason) {
else {
kill_old_erlang();
/* suppress gcc warning with 'if' */
- if(system(command));
- print_error("Executed \"%s\". Terminating.",command);
+ ret = system(command);
+ print_error("Executed \"%s\" -> %d. Terminating.",command, ret);
}
free_env_val(command);
} else {
kill_old_erlang();
/* suppress gcc warning with 'if' */
- if(system((char*)&cmd[0]));
- print_error("Executed \"%s\". Terminating.",cmd);
+ ret = system((char*)&cmd[0]);
+ print_error("Executed \"%s\" -> %d. Terminating.",cmd, ret);
}
#endif
}
@@ -895,12 +899,13 @@ debugf(const char *format,...)
{
va_list args;
- if (!debug_on) return;
- va_start(args, format);
- fprintf(stderr, "Heart: ");
- vfprintf(stderr, format, args);
- va_end(args);
- fprintf(stderr, "\r\n");
+ if (debug_on) {
+ va_start(args, format);
+ fprintf(stderr, "Heart: ");
+ vfprintf(stderr, format, args);
+ va_end(args);
+ fprintf(stderr, "\r\n");
+ }
}
#ifdef __WIN32__
--
cgit v1.2.3
From 310354406aecba66ac0acbfd7d8ed8abb8bfcd5d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B6rn-Egil=20Dahlberg?=
Date: Thu, 11 Oct 2012 15:16:08 +0200
Subject: erts: Search for heart in ports that are alive
---
erts/emulator/beam/io.c | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
(limited to 'erts')
diff --git a/erts/emulator/beam/io.c b/erts/emulator/beam/io.c
index 502ded4eca..88e9215a9c 100644
--- a/erts/emulator/beam/io.c
+++ b/erts/emulator/beam/io.c
@@ -5248,7 +5248,10 @@ Port *erts_get_heart_port() {
for(ix = 0; ix < erts_max_ports; ix++) {
port = &erts_port[ix];
- /* immediate compare */
+ /* only examine undead or alive ports */
+ if (port->status & ERTS_PORT_SFLGS_DEAD)
+ continue;
+ /* immediate atom compare */
if (port->reg && port->reg->name == am_heart_port) {
return port;
}
--
cgit v1.2.3
From d4172bde993275f26b62b53af85a5ac092846dad Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B6rn-Egil=20Dahlberg?=
Date: Thu, 11 Oct 2012 16:38:04 +0200
Subject: erts: Change ERL_CRASH_DUMP_SECONDS behaviour
Not setting ERL_CRASH_DUMP_SECONDS will now terminate beam
immediately on a crash without writing a crash dump file.
Setting ERL_CRASH_DUMP_SECONDS to 0 will also terminate beam
immediately on a crash without writing a crash dump file, i.e. same as not
setting ERL_CRASH_DUMP_SECONDS environment variable.
Setting ERL_CRASH_DUMP_SECONDS to a negative value will let the beam wait
indefinitely on the crash dump file being written.
Setting ERL_CRASH_DUMP_SECONDS to a positive value will let the beam wait
that many seconds on the crash dump file being written.
A positive value will set both an alarm in beam AND a heart timeout for restart
if heart is running.
This is due to the change of 'heart' behavior when 'heart' is
listening for a crash.
---
erts/emulator/beam/break.c | 39 ++++++++++++++++++++++--
erts/emulator/beam/index.c | 3 +-
erts/emulator/beam/sys.h | 2 +-
erts/emulator/sys/unix/sys.c | 40 +++++++++++++++---------
erts/emulator/sys/win32/sys.c | 3 +-
erts/etc/common/heart.c | 71 ++++++++++++++++++++++++++++++++++++++++---
6 files changed, 134 insertions(+), 24 deletions(-)
(limited to 'erts')
diff --git a/erts/emulator/beam/break.c b/erts/emulator/beam/break.c
index 93aa2fb8d0..cf66f4e6b6 100644
--- a/erts/emulator/beam/break.c
+++ b/erts/emulator/beam/break.c
@@ -650,10 +650,13 @@ erl_crash_dump_v(char *file, int line, char* fmt, va_list args)
ErtsThrPrgrData tpd_buf; /* in case we aren't a managed thread... */
#endif
int fd;
+ size_t envsz;
time_t now;
+ char env[21]; /* enough to hold any 64-bit integer */
size_t dumpnamebufsize = MAXPATHLEN;
char dumpnamebuf[MAXPATHLEN];
char* dumpname;
+ int secs;
if (ERTS_SOMEONE_IS_CRASH_DUMPING)
return;
@@ -676,9 +679,41 @@ erl_crash_dump_v(char *file, int line, char* fmt, va_list args)
erts_writing_erl_crash_dump = 1;
#endif
- erts_sys_prepare_crash_dump();
+ envsz = sizeof(env);
+ /* ERL_CRASH_DUMP_SECONDS not set
+ * same as ERL_CRASH_DUMP_SECONDS = 0
+ * - do not write dump
+ * - do not set an alarm
+ * - break immediately
+ *
+ * ERL_CRASH_DUMP_SECONDS = 0
+ * - do not write dump
+ * - do not set an alarm
+ * - break immediately
+ *
+ * ERL_CRASH_DUMP_SECONDS < 0
+ * - do not set alarm
+ * - write dump until done
+ *
+ * ERL_CRASH_DUMP_SECONDS = S (and S positive)
+ * - Don't dump file forever
+ * - set alarm (set in sys)
+ * - write dump until alarm or file is written completely
+ */
+
+ if (erts_sys_getenv__("ERL_CRASH_DUMP_SECONDS", env, &envsz) != 0) {
+ return; /* break immediately */
+ } else {
+ secs = atoi(env);
+ }
+
+ if (secs == 0) {
+ return;
+ }
+
+ erts_sys_prepare_crash_dump(secs);
- if (erts_sys_getenv_raw("ERL_CRASH_DUMP",&dumpnamebuf[0],&dumpnamebufsize) != 0)
+ if (erts_sys_getenv__("ERL_CRASH_DUMP",&dumpnamebuf[0],&dumpnamebufsize) != 0)
dumpname = "erl_crash.dump";
else
dumpname = &dumpnamebuf[0];
diff --git a/erts/emulator/beam/index.c b/erts/emulator/beam/index.c
index a4a3007f93..ad4672c3de 100644
--- a/erts/emulator/beam/index.c
+++ b/erts/emulator/beam/index.c
@@ -82,7 +82,8 @@ index_put(IndexTable* t, void* tmpl)
if (ix >= t->size) {
Uint sz;
if (ix >= t->limit) {
- erl_exit(1, "no more index entries in %s (max=%d)\n",
+ /* A core dump is unnecessary */
+ erl_exit(ERTS_DUMP_EXIT, "no more index entries in %s (max=%d)\n",
t->htable.name, t->limit);
}
sz = INDEX_PAGE_SIZE*sizeof(IndexSlot*);
diff --git a/erts/emulator/beam/sys.h b/erts/emulator/beam/sys.h
index 2406c52f14..9dd8341520 100644
--- a/erts/emulator/beam/sys.h
+++ b/erts/emulator/beam/sys.h
@@ -652,7 +652,7 @@ void erts_sys_schedule_interrupt_timed(int set, erts_short_time_t msec);
void erts_sys_main_thread(void);
#endif
-extern void erts_sys_prepare_crash_dump(void);
+extern void erts_sys_prepare_crash_dump(int secs);
extern void erts_sys_pre_init(void);
extern void erl_sys_init(void);
extern void erl_sys_args(int *argc, char **argv);
diff --git a/erts/emulator/sys/unix/sys.c b/erts/emulator/sys/unix/sys.c
index 37dfcb1dd4..282ecb345f 100644
--- a/erts/emulator/sys/unix/sys.c
+++ b/erts/emulator/sys/unix/sys.c
@@ -687,18 +687,20 @@ static RETSIGTYPE break_handler(int sig)
#endif /* 0 */
static ERTS_INLINE void
-prepare_crash_dump(void)
+prepare_crash_dump(int secs)
{
+#define NUFBUF (3)
int i, max;
char env[21]; /* enough to hold any 64-bit integer */
size_t envsz;
+ DeclareTmpHeapNoproc(heap,NUFBUF);
Port *heart_port;
- Eterm heap[3];
Eterm *hp = heap;
Eterm list = NIL;
-
int heart_fd[2] = {-1,-1};
+ UseTmpHeapNoproc(NUFBUF);
+
if (ERTS_PREPARED_CRASH_DUMP)
return; /* We have already been called */
@@ -740,7 +742,7 @@ prepare_crash_dump(void)
}
envsz = sizeof(env);
- i = erts_sys_getenv_raw("ERL_CRASH_DUMP_NICE", env, &envsz);
+ i = erts_sys_getenv__("ERL_CRASH_DUMP_NICE", env, &envsz);
if (i >= 0) {
int nice_val;
nice_val = i != 0 ? 0 : atoi(env);
@@ -749,20 +751,21 @@ prepare_crash_dump(void)
}
erts_silence_warn_unused_result(nice(nice_val));
}
-
- envsz = sizeof(env);
- i = erts_sys_getenv_raw("ERL_CRASH_DUMP_SECONDS", env, &envsz);
- if (i >= 0) {
- unsigned sec;
- sec = (unsigned) i != 0 ? 0 : atoi(env);
- alarm(sec);
+
+ /* Positive secs means an alarm must be set
+ * 0 or negative means no alarm
+ */
+ if (secs > 0) {
+ alarm((unsigned int)secs);
}
+ UnUseTmpHeapNoproc(NUFBUF);
+#undef NUFBUF
}
void
-erts_sys_prepare_crash_dump(void)
+erts_sys_prepare_crash_dump(int secs)
{
- prepare_crash_dump();
+ prepare_crash_dump(secs);
}
static ERTS_INLINE void
@@ -804,7 +807,7 @@ sigusr1_exit(void)
is hung somewhere, so it won't be able to poll any flag we set here.
*/
ERTS_SET_GOT_SIGUSR1;
- prepare_crash_dump();
+ prepare_crash_dump((int)0);
erl_exit(1, "Received SIGUSR1\n");
}
@@ -2439,6 +2442,15 @@ erts_sys_getenv_raw(char *key, char *value, size_t *size) {
return erts_sys_getenv(key, value, size);
}
+/*
+ * erts_sys_getenv
+ * returns:
+ * -1, if environment key is not set with a value
+ * 0, if environment key is set and value fits into buffer size
+ * 1, if environment key is set but does not fit into buffer size
+ * size is set with the needed buffer size value
+ */
+
int
erts_sys_getenv(char *key, char *value, size_t *size)
{
diff --git a/erts/emulator/sys/win32/sys.c b/erts/emulator/sys/win32/sys.c
index c5664d8e8a..9d45023ceb 100755
--- a/erts/emulator/sys/win32/sys.c
+++ b/erts/emulator/sys/win32/sys.c
@@ -256,7 +256,7 @@ void erl_sys_args(int* argc, char** argv)
}
void
-erts_sys_prepare_crash_dump(void)
+erts_sys_prepare_crash_dump(int secs)
{
Port *heart_port;
Eterm heap[3];
@@ -274,6 +274,7 @@ erts_sys_prepare_crash_dump(void)
}
/* Windows - free file descriptors are hopefully available */
+ /* Alarm not used on windows */
}
static void
diff --git a/erts/etc/common/heart.c b/erts/etc/common/heart.c
index dcb83c33ac..ed75a8f256 100644
--- a/erts/etc/common/heart.c
+++ b/erts/etc/common/heart.c
@@ -137,7 +137,8 @@
# endif
#endif
-#define HEART_COMMAND_ENV "HEART_COMMAND"
+#define HEART_COMMAND_ENV "HEART_COMMAND"
+#define ERL_CRASH_DUMP_SECONDS_ENV "ERL_CRASH_DUMP_SECONDS"
#define MSG_HDR_SIZE 2
#define MSG_HDR_PLUS_OP_SIZE 3
@@ -214,6 +215,7 @@ static void print_error(const char *,...);
static void debugf(const char *,...);
static void init_timestamp(void);
static time_t timestamp(time_t *);
+static int wait_until_close_write_or_env_tmo(int);
#ifdef __WIN32__
static BOOL enable_privilege(void);
@@ -650,15 +652,20 @@ do_terminate(int erlin_fd, int reason) {
(plus heart_beat_report_delay if under VxWorks), so we don't need
to call wd_reset().
*/
- struct msg message;
- int ret = 0;
+ int ret = 0, tmo=0;
+ char *tmo_env;
switch (reason) {
case R_SHUT_DOWN:
break;
case R_CRASHING:
- print_error("Waiting for dump");
- read_message(erlin_fd, &message); /* read until we get something */
+ if (is_env_set(ERL_CRASH_DUMP_SECONDS_ENV)) {
+ tmo_env = get_env(ERL_CRASH_DUMP_SECONDS_ENV);
+ tmo = atoi(tmo_env);
+ print_error("Waiting for dump - timeout set to %d seconds.", tmo);
+ wait_until_close_write_or_env_tmo(tmo);
+ free_env_val(tmo_env);
+ }
/* fall through */
case R_TIMEOUT:
case R_CLOSED:
@@ -709,6 +716,60 @@ do_terminate(int erlin_fd, int reason) {
} /* switch(reason) */
}
+
+/* Waits until something happens on socket or handle
+ *
+ * Uses global variables erlin_fd or hevent_dataready
+ */
+int wait_until_close_write_or_env_tmo(int tmo) {
+ int i = 0;
+
+#ifdef __WIN32__
+ DWORD wresult;
+ DWORD wtmo = INFINITE;
+
+ if (tmo >= 0) {
+ wtmo = tmo*1000 + 2;
+ }
+
+ wresult = WaitForSingleObject(hevent_dataready, wtmo);
+ if (wresult == WAIT_FAILED) {
+ print_last_error();
+ return -1;
+ }
+
+ if (wresult == WAIT_TIMEOUT) {
+ debugf("wait timed out\n");
+ i = 0;
+ } else {
+ debugf("wait ok\n");
+ i = 1;
+ }
+#else
+ fd_set read_fds;
+ int max_fd;
+ struct timeval timeout;
+ struct timeval *tptr = NULL;
+
+ max_fd = erlin_fd; /* global */
+
+ if (tmo >= 0) {
+ timeout.tv_sec = tmo; /* On Linux timeout is modified by select */
+ timeout.tv_usec = 0;
+ tptr = &timeout;
+ }
+
+ FD_ZERO(&read_fds);
+ FD_SET(erlin_fd, &read_fds);
+ if ((i = select(max_fd + 1, &read_fds, NULLFDS, NULLFDS, tptr)) < 0) {
+ print_error("error in select.");
+ return -1;
+ }
+#endif
+ return i;
+}
+
+
/*
* notify_ack
*
--
cgit v1.2.3
From e6ea3b07e25b09481b010558f8bfdfb57dcbb85e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B6rn-Egil=20Dahlberg?=
Date: Mon, 15 Oct 2012 16:22:47 +0200
Subject: doc: Document ERL_CRASH_DUMP_SECONDS behaviour
* heart reboot behaviour
* erl_crash.dump file write behaviour
---
erts/doc/src/erl.xml | 31 +++++++++++++++++++++++++++++++
1 file changed, 31 insertions(+)
(limited to 'erts')
diff --git a/erts/doc/src/erl.xml b/erts/doc/src/erl.xml
index 2dea477d99..f931445a3e 100644
--- a/erts/doc/src/erl.xml
+++ b/erts/doc/src/erl.xml
@@ -1040,6 +1040,37 @@
the emulator will be allowed to spend writing a crash dump. When
the given number of seconds have elapsed, the emulator will be
terminated by a SIGALRM signal.
+
+ If the environment variable is not set or it is set to zero seconds, ,
+ the runtime system will not even attempt to write the crash dump file. It will just terminate.
+
+ If the environment variable is set to negative valie, e.g. ,
+ the runtime system will wait indefinitely for the crash dump file to be written.
+
+ This environment variable is used in conjuction with
+ heart if heart is running:
+
+
+
+
+ Suppresses the writing a crash dump file entirely,
+ thus rebooting the runtime system immediately.
+ This is the same as not setting the environment variable.
+
+
+
+ Setting the environment variable to a negative value will cause the
+ termination of the runtime system to wait until the crash dump file
+ has been completly written.
+
+
+
+
+ Will wait for S seconds to complete the crash dump file and
+ then terminate the runtime system.
+
+
+
-
--
cgit v1.2.3
From 014c784a73e56769c7045d8f7515d57d9d8a2caf Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B6rn-Egil=20Dahlberg?=
Date: Fri, 19 Oct 2012 17:22:25 +0200
Subject: erts: Fix lock check assertion
---
erts/emulator/beam/erl_lock_check.c | 2 +-
erts/emulator/beam/erl_process_lock.c | 2 +-
erts/emulator/beam/io.c | 2 +-
3 files changed, 3 insertions(+), 3 deletions(-)
(limited to 'erts')
diff --git a/erts/emulator/beam/erl_lock_check.c b/erts/emulator/beam/erl_lock_check.c
index b545ec07c0..1f388c1796 100644
--- a/erts/emulator/beam/erl_lock_check.c
+++ b/erts/emulator/beam/erl_lock_check.c
@@ -443,7 +443,7 @@ print_lock2(char *prefix, Sint16 id, Wterm extra, Uint16 flags, char *suffix)
"%s'%s:%p%s'%s%s",
prefix,
lname,
- boxed_val(extra),
+ _unchecked_boxed_val(extra),
lock_type(flags),
rw_op_str(flags),
suffix);
diff --git a/erts/emulator/beam/erl_process_lock.c b/erts/emulator/beam/erl_process_lock.c
index f28ef6a6d7..f7900317cc 100644
--- a/erts/emulator/beam/erl_process_lock.c
+++ b/erts/emulator/beam/erl_process_lock.c
@@ -1349,7 +1349,7 @@ erts_proc_lc_chk_no_proc_locks(char *file, int line)
lc_id.proc_lock_msgq,
lc_id.proc_lock_status};
erts_lc_have_lock_ids(resv, ids, 4);
- if (resv[0] || resv[1] || resv[2] || resv[3]) {
+ if (!ERTS_IS_CRASH_DUMPING && (resv[0] || resv[1] || resv[2] || resv[3])) {
erts_lc_fail("%s:%d: Thread has process locks locked when expected "
"not to have any process locks locked",
file, line);
diff --git a/erts/emulator/beam/io.c b/erts/emulator/beam/io.c
index 88e9215a9c..609fe9f5fb 100644
--- a/erts/emulator/beam/io.c
+++ b/erts/emulator/beam/io.c
@@ -1132,7 +1132,7 @@ int erts_write_to_port(Eterm caller_id, Port *p, Eterm list)
Uint size;
int fpe_was_unmasked;
- ERTS_SMP_LC_ASSERT(erts_lc_is_port_locked(p));
+ ERTS_SMP_LC_ASSERT(erts_lc_is_port_locked(p) || ERTS_IS_CRASH_DUMPING);
ERTS_SMP_CHK_NO_PROC_LOCKS;
p->caller = caller_id;
--
cgit v1.2.3
From a986416859aa6065e44d7288b37152756ac1b0ce Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B6rn-Egil=20Dahlberg?=
Date: Fri, 19 Oct 2012 17:32:18 +0200
Subject: erts: Fix crash dump write to port hack
More future proof with R16
---
erts/emulator/sys/unix/sys.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
(limited to 'erts')
diff --git a/erts/emulator/sys/unix/sys.c b/erts/emulator/sys/unix/sys.c
index 282ecb345f..c485a4eece 100644
--- a/erts/emulator/sys/unix/sys.c
+++ b/erts/emulator/sys/unix/sys.c
@@ -715,7 +715,7 @@ prepare_crash_dump(int secs)
list = CONS(hp, make_small(8), list); hp += 2;
/* send to heart port, CMD = 8, i.e. prepare crash dump =o */
- erts_write_to_port(NIL, heart_port, list);
+ erts_write_to_port(ERTS_INVALID_PID, heart_port, list);
}
/* Make sure we unregister at epmd (unknown fd) and get at least
--
cgit v1.2.3