aboutsummaryrefslogtreecommitdiffstats
path: root/erts
diff options
context:
space:
mode:
authorBjörn-Egil Dahlberg <[email protected]>2012-10-02 18:44:03 +0200
committerBjörn-Egil Dahlberg <[email protected]>2012-10-15 20:49:43 +0200
commit6aa87d58b756ef65650ee793ad4ece8add7b70fb (patch)
treee40745a989254631a2f921942ea74ffb76a4337a /erts
parent952db27ba0a5b87a2a47f3a7034a9bf92e3651e5 (diff)
downloadotp-6aa87d58b756ef65650ee793ad4ece8add7b70fb.tar.gz
otp-6aa87d58b756ef65650ee793ad4ece8add7b70fb.tar.bz2
otp-6aa87d58b756ef65650ee793ad4ece8add7b70fb.zip
erts, heart: Ensure erl_crash.dump is written
When a crash dump is about to be written and we have heartbeat enabled on a system. We need time to write it before heart explicitly kills the beam.
Diffstat (limited to 'erts')
-rw-r--r--erts/emulator/beam/atom.names1
-rwxr-xr-xerts/emulator/beam/global.h3
-rw-r--r--erts/emulator/beam/io.c21
-rw-r--r--erts/emulator/sys/unix/sys.c46
-rwxr-xr-xerts/emulator/sys/win32/sys.c16
-rw-r--r--erts/etc/common/heart.c124
6 files changed, 138 insertions, 73 deletions
diff --git a/erts/emulator/beam/atom.names b/erts/emulator/beam/atom.names
index 106fad030b..afcbd732df 100644
--- a/erts/emulator/beam/atom.names
+++ b/erts/emulator/beam/atom.names
@@ -252,6 +252,7 @@ atom heap_block_size
atom heap_size
atom heap_sizes
atom heap_type
+atom heart_port
atom heir
atom hidden
atom hide
diff --git a/erts/emulator/beam/global.h b/erts/emulator/beam/global.h
index dbf95f5bd7..2c20e3da3b 100755
--- a/erts/emulator/beam/global.h
+++ b/erts/emulator/beam/global.h
@@ -983,6 +983,9 @@ Uint erts_port_ioq_size(Port *pp);
void erts_stale_drv_select(Eterm, ErlDrvEvent, int, int);
void erts_port_cleanup(Port *);
void erts_fire_port_monitor(Port *prt, Eterm ref);
+
+Port *erts_get_heart_port(void);
+
#ifdef ERTS_SMP
void erts_smp_xports_unlock(Port *);
#endif
diff --git a/erts/emulator/beam/io.c b/erts/emulator/beam/io.c
index 35b194f927..502ded4eca 100644
--- a/erts/emulator/beam/io.c
+++ b/erts/emulator/beam/io.c
@@ -5235,3 +5235,24 @@ erl_drv_getenv(char *key, char *value, size_t *value_size)
{
return erts_sys_getenv_raw(key, value, value_size);
}
+
+/* get heart_port
+ * used by erl_crash_dump
+ * - uses the fact that heart_port is registered when starting heart
+ */
+
+Port *erts_get_heart_port() {
+
+ Port* port;
+ Uint ix;
+
+ for(ix = 0; ix < erts_max_ports; ix++) {
+ port = &erts_port[ix];
+ /* immediate compare */
+ if (port->reg && port->reg->name == am_heart_port) {
+ return port;
+ }
+ }
+
+ return NULL;
+}
diff --git a/erts/emulator/sys/unix/sys.c b/erts/emulator/sys/unix/sys.c
index c1fa00b4ea..37dfcb1dd4 100644
--- a/erts/emulator/sys/unix/sys.c
+++ b/erts/emulator/sys/unix/sys.c
@@ -58,7 +58,6 @@
#define __DARWIN__ 1
#endif
-
#ifdef USE_THREADS
#include "erl_threads.h"
#endif
@@ -71,7 +70,6 @@ static erts_smp_rwmtx_t environ_rwmtx;
#define MAX_VSIZE 16 /* Max number of entries allowed in an I/O
* vector sock_sendv().
*/
-
/*
* Don't need global.h, but bif_table.h (included by bif.h),
* won't compile otherwise
@@ -123,6 +121,15 @@ struct ErtsSysReportExit_ {
#endif
};
+/* This data is shared by these drivers - initialized by spawn_init() */
+static struct driver_data {
+ int port_num, ofd, packet_bytes;
+ ErtsSysReportExit *report_exit;
+ int pid;
+ int alive;
+ int status;
+} *driver_data; /* indexed by fd */
+
static ErtsSysReportExit *report_exit_list;
#if CHLDWTHR && !defined(ERTS_SMP)
static ErtsSysReportExit *report_exit_transit_list;
@@ -685,12 +692,33 @@ prepare_crash_dump(void)
int i, max;
char env[21]; /* enough to hold any 64-bit integer */
size_t envsz;
+ Port *heart_port;
+ Eterm heap[3];
+ Eterm *hp = heap;
+ Eterm list = NIL;
+
+ int heart_fd[2] = {-1,-1};
if (ERTS_PREPARED_CRASH_DUMP)
return; /* We have already been called */
+ heart_port = erts_get_heart_port();
+ if (heart_port) {
+ /* hearts input fd
+ * We "know" drv_data is the in_fd since the port is started with read|write
+ */
+ heart_fd[0] = (int)heart_port->drv_data;
+ heart_fd[1] = (int)driver_data[heart_fd[0]].ofd;
+
+ list = CONS(hp, make_small(8), list); hp += 2;
+
+ /* send to heart port, CMD = 8, i.e. prepare crash dump =o */
+ erts_write_to_port(NIL, heart_port, list);
+ }
+
/* Make sure we unregister at epmd (unknown fd) and get at least
one free filedescriptor (for erl_crash.dump) */
+
max = max_files;
if (max < 1024)
max = 1024;
@@ -704,6 +732,10 @@ prepare_crash_dump(void)
if (i == async_fd[0] || i == async_fd[1])
continue;
#endif
+ /* We don't want to close our heart yet ... */
+ if (i == heart_fd[0] || i == heart_fd[1])
+ continue;
+
close(i);
}
@@ -725,7 +757,6 @@ prepare_crash_dump(void)
sec = (unsigned) i != 0 ? 0 : atoi(env);
alarm(sec);
}
-
}
void
@@ -1021,15 +1052,6 @@ void fini_getenv_state(GETENV_STATE *state)
#define ERTS_SYS_READ_BUF_SZ (64*1024)
-/* This data is shared by these drivers - initialized by spawn_init() */
-static struct driver_data {
- int port_num, ofd, packet_bytes;
- ErtsSysReportExit *report_exit;
- int pid;
- int alive;
- int status;
-} *driver_data; /* indexed by fd */
-
/* Driver interfaces */
static ErlDrvData spawn_start(ErlDrvPort, char*, SysDriverOpts*);
static ErlDrvData fd_start(ErlDrvPort, char*, SysDriverOpts*);
diff --git a/erts/emulator/sys/win32/sys.c b/erts/emulator/sys/win32/sys.c
index 6894d682e7..c5664d8e8a 100755
--- a/erts/emulator/sys/win32/sys.c
+++ b/erts/emulator/sys/win32/sys.c
@@ -258,8 +258,22 @@ void erl_sys_args(int* argc, char** argv)
void
erts_sys_prepare_crash_dump(void)
{
+ Port *heart_port;
+ Eterm heap[3];
+ Eterm *hp = heap;
+ Eterm list = NIL;
+
+ heart_port = erts_get_heart_port();
+
+ if (heart_port) {
+
+ list = CONS(hp, make_small(8), list); hp += 2;
+
+ /* send to heart port, CMD = 8, i.e. prepare crash dump =o */
+ erts_write_to_port(NIL, heart_port, list);
+ }
+
/* Windows - free file descriptors are hopefully available */
- return;
}
static void
diff --git a/erts/etc/common/heart.c b/erts/etc/common/heart.c
index 70c2b3bb23..c6ce017d8c 100644
--- a/erts/etc/common/heart.c
+++ b/erts/etc/common/heart.c
@@ -153,13 +153,14 @@ struct msg {
};
/* operations */
-#define HEART_ACK 1
-#define HEART_BEAT 2
-#define SHUT_DOWN 3
-#define SET_CMD 4
-#define CLEAR_CMD 5
-#define GET_CMD 6
-#define HEART_CMD 7
+#define HEART_ACK (1)
+#define HEART_BEAT (2)
+#define SHUT_DOWN (3)
+#define SET_CMD (4)
+#define CLEAR_CMD (5)
+#define GET_CMD (6)
+#define HEART_CMD (7)
+#define PREPARING_CRASH (8)
/* Maybe interesting to change */
@@ -187,10 +188,11 @@ unsigned long heart_beat_kill_pid = 0;
#define SOL_WD_TIMEOUT (heart_beat_timeout+heart_beat_boot_delay)
/* reasons for reboot */
-#define R_TIMEOUT 1
-#define R_CLOSED 2
-#define R_ERROR 3
-#define R_SHUT_DOWN 4
+#define R_TIMEOUT (1)
+#define R_CLOSED (2)
+#define R_ERROR (3)
+#define R_SHUT_DOWN (4)
+#define R_CRASHING (5) /* Doing a crash dump and we will wait for it */
/* macros */
@@ -200,8 +202,8 @@ unsigned long heart_beat_kill_pid = 0;
/* prototypes */
-static int message_loop(int,int);
-static void do_terminate(int);
+static int message_loop(int, int);
+static void do_terminate(int, int);
static int notify_ack(int);
static int heart_cmd_reply(int, char *);
static int write_message(int, struct msg *);
@@ -376,7 +378,7 @@ main(int argc, char **argv)
program_name[sizeof(program_name)-1] = '\0';
notify_ack(erlout_fd);
cmd[0] = '\0';
- do_terminate(message_loop(erlin_fd,erlout_fd));
+ do_terminate(erlin_fd,message_loop(erlin_fd,erlout_fd));
return 0;
}
@@ -410,6 +412,7 @@ message_loop(erlin_fd, erlout_fd)
#endif
while (1) {
+ /* REFACTOR: below to select/tmo function */
#ifdef __WIN32__
wresult = WaitForSingleObject(hevent_dataready,SELECT_TIMEOUT*1000+ 2);
if (wresult == WAIT_FAILED) {
@@ -504,6 +507,10 @@ message_loop(erlin_fd, erlout_fd)
free_env_val(env);
}
break;
+ case PREPARING_CRASH:
+ /* Erlang has reached a crushdump point (is crashing for sure) */
+ print_error("Erlang is crashing .. (waiting for crash dump file)");
+ return R_CRASHING;
default:
/* ignore all other messages */
break;
@@ -635,69 +642,66 @@ void win_system(char *command)
* do_terminate
*/
static void
-do_terminate(reason)
- int reason;
-{
+do_terminate(int erlin_fd, int reason) {
/*
When we get here, we have HEART_BEAT_BOOT_DELAY secs to finish
(plus heart_beat_report_delay if under VxWorks), so we don't need
to call wd_reset().
*/
-
+ struct msg message;
+
switch (reason) {
case R_SHUT_DOWN:
break;
+ case R_CRASHING:
+ print_error("Waiting for dump");
+ read_message(erlin_fd, &message); /* read until we get something */
case R_TIMEOUT:
- case R_ERROR:
case R_CLOSED:
+ case R_ERROR:
default:
-#if defined(__WIN32__) /* Not VxWorks */
{
- if(!cmd[0]) {
- char *command = get_env(HEART_COMMAND_ENV);
- if(!command)
- print_error("Would reboot. Terminating.");
- else {
- kill_old_erlang();
- /* High prio combined with system() works badly indeed... */
- SetPriorityClass(GetCurrentProcess(), NORMAL_PRIORITY_CLASS);
- win_system(command);
- print_error("Executed \"%s\". Terminating.",command);
+#if defined(__WIN32__) /* Not VxWorks */
+ if(!cmd[0]) {
+ char *command = get_env(HEART_COMMAND_ENV);
+ if(!command)
+ print_error("Would reboot. Terminating.");
+ else {
+ kill_old_erlang();
+ /* High prio combined with system() works badly indeed... */
+ SetPriorityClass(GetCurrentProcess(), NORMAL_PRIORITY_CLASS);
+ win_system(command);
+ print_error("Executed \"%s\". Terminating.",command);
+ }
+ free_env_val(command);
+ } else {
+ kill_old_erlang();
+ /* High prio combined with system() works badly indeed... */
+ SetPriorityClass(GetCurrentProcess(), NORMAL_PRIORITY_CLASS);
+ win_system(&cmd[0]);
+ print_error("Executed \"%s\". Terminating.",cmd);
}
- free_env_val(command);
- }
- else {
- kill_old_erlang();
- /* High prio combined with system() works badly indeed... */
- SetPriorityClass(GetCurrentProcess(), NORMAL_PRIORITY_CLASS);
- win_system(&cmd[0]);
- print_error("Executed \"%s\". Terminating.",cmd);
- }
- }
-
#else
- {
- if(!cmd[0]) {
- char *command = get_env(HEART_COMMAND_ENV);
- if(!command)
- print_error("Would reboot. Terminating.");
- else {
- kill_old_erlang();
- /* suppress gcc warning with 'if' */
- if(system(command));
- print_error("Executed \"%s\". Terminating.",command);
+ if(!cmd[0]) {
+ char *command = get_env(HEART_COMMAND_ENV);
+ if(!command)
+ print_error("Would reboot. Terminating.");
+ else {
+ kill_old_erlang();
+ /* suppress gcc warning with 'if' */
+ if(system(command));
+ print_error("Executed \"%s\". Terminating.",command);
+ }
+ free_env_val(command);
+ } else {
+ kill_old_erlang();
+ /* suppress gcc warning with 'if' */
+ if(system((char*)&cmd[0]));
+ print_error("Executed \"%s\". Terminating.",cmd);
}
- free_env_val(command);
- }
- else {
- kill_old_erlang();
- /* suppress gcc warning with 'if' */
- if(system((char*)&cmd[0]));
- print_error("Executed \"%s\". Terminating.",cmd);
- }
+#endif
}
break;
-#endif
} /* switch(reason) */
}