From 6aa87d58b756ef65650ee793ad4ece8add7b70fb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn-Egil=20Dahlberg?= Date: Tue, 2 Oct 2012 18:44:03 +0200 Subject: erts, heart: Ensure erl_crash.dump is written When a crash dump is about to be written and we have heartbeat enabled on a system. We need time to write it before heart explicitly kills the beam. --- erts/emulator/beam/atom.names | 1 + erts/emulator/beam/global.h | 3 + erts/emulator/beam/io.c | 21 +++++++ erts/emulator/sys/unix/sys.c | 46 ++++++++++++---- erts/emulator/sys/win32/sys.c | 16 +++++- erts/etc/common/heart.c | 124 ++++++++++++++++++++++-------------------- 6 files changed, 138 insertions(+), 73 deletions(-) (limited to 'erts') diff --git a/erts/emulator/beam/atom.names b/erts/emulator/beam/atom.names index 106fad030b..afcbd732df 100644 --- a/erts/emulator/beam/atom.names +++ b/erts/emulator/beam/atom.names @@ -252,6 +252,7 @@ atom heap_block_size atom heap_size atom heap_sizes atom heap_type +atom heart_port atom heir atom hidden atom hide diff --git a/erts/emulator/beam/global.h b/erts/emulator/beam/global.h index dbf95f5bd7..2c20e3da3b 100755 --- a/erts/emulator/beam/global.h +++ b/erts/emulator/beam/global.h @@ -983,6 +983,9 @@ Uint erts_port_ioq_size(Port *pp); void erts_stale_drv_select(Eterm, ErlDrvEvent, int, int); void erts_port_cleanup(Port *); void erts_fire_port_monitor(Port *prt, Eterm ref); + +Port *erts_get_heart_port(void); + #ifdef ERTS_SMP void erts_smp_xports_unlock(Port *); #endif diff --git a/erts/emulator/beam/io.c b/erts/emulator/beam/io.c index 35b194f927..502ded4eca 100644 --- a/erts/emulator/beam/io.c +++ b/erts/emulator/beam/io.c @@ -5235,3 +5235,24 @@ erl_drv_getenv(char *key, char *value, size_t *value_size) { return erts_sys_getenv_raw(key, value, value_size); } + +/* get heart_port + * used by erl_crash_dump + * - uses the fact that heart_port is registered when starting heart + */ + +Port *erts_get_heart_port() { + + Port* port; + Uint ix; + + for(ix = 0; ix < erts_max_ports; ix++) { + port = &erts_port[ix]; + /* immediate compare */ + if (port->reg && port->reg->name == am_heart_port) { + return port; + } + } + + return NULL; +} diff --git a/erts/emulator/sys/unix/sys.c b/erts/emulator/sys/unix/sys.c index c1fa00b4ea..37dfcb1dd4 100644 --- a/erts/emulator/sys/unix/sys.c +++ b/erts/emulator/sys/unix/sys.c @@ -58,7 +58,6 @@ #define __DARWIN__ 1 #endif - #ifdef USE_THREADS #include "erl_threads.h" #endif @@ -71,7 +70,6 @@ static erts_smp_rwmtx_t environ_rwmtx; #define MAX_VSIZE 16 /* Max number of entries allowed in an I/O * vector sock_sendv(). */ - /* * Don't need global.h, but bif_table.h (included by bif.h), * won't compile otherwise @@ -123,6 +121,15 @@ struct ErtsSysReportExit_ { #endif }; +/* This data is shared by these drivers - initialized by spawn_init() */ +static struct driver_data { + int port_num, ofd, packet_bytes; + ErtsSysReportExit *report_exit; + int pid; + int alive; + int status; +} *driver_data; /* indexed by fd */ + static ErtsSysReportExit *report_exit_list; #if CHLDWTHR && !defined(ERTS_SMP) static ErtsSysReportExit *report_exit_transit_list; @@ -685,12 +692,33 @@ prepare_crash_dump(void) int i, max; char env[21]; /* enough to hold any 64-bit integer */ size_t envsz; + Port *heart_port; + Eterm heap[3]; + Eterm *hp = heap; + Eterm list = NIL; + + int heart_fd[2] = {-1,-1}; if (ERTS_PREPARED_CRASH_DUMP) return; /* We have already been called */ + heart_port = erts_get_heart_port(); + if (heart_port) { + /* hearts input fd + * We "know" drv_data is the in_fd since the port is started with read|write + */ + heart_fd[0] = (int)heart_port->drv_data; + heart_fd[1] = (int)driver_data[heart_fd[0]].ofd; + + list = CONS(hp, make_small(8), list); hp += 2; + + /* send to heart port, CMD = 8, i.e. prepare crash dump =o */ + erts_write_to_port(NIL, heart_port, list); + } + /* Make sure we unregister at epmd (unknown fd) and get at least one free filedescriptor (for erl_crash.dump) */ + max = max_files; if (max < 1024) max = 1024; @@ -704,6 +732,10 @@ prepare_crash_dump(void) if (i == async_fd[0] || i == async_fd[1]) continue; #endif + /* We don't want to close our heart yet ... */ + if (i == heart_fd[0] || i == heart_fd[1]) + continue; + close(i); } @@ -725,7 +757,6 @@ prepare_crash_dump(void) sec = (unsigned) i != 0 ? 0 : atoi(env); alarm(sec); } - } void @@ -1021,15 +1052,6 @@ void fini_getenv_state(GETENV_STATE *state) #define ERTS_SYS_READ_BUF_SZ (64*1024) -/* This data is shared by these drivers - initialized by spawn_init() */ -static struct driver_data { - int port_num, ofd, packet_bytes; - ErtsSysReportExit *report_exit; - int pid; - int alive; - int status; -} *driver_data; /* indexed by fd */ - /* Driver interfaces */ static ErlDrvData spawn_start(ErlDrvPort, char*, SysDriverOpts*); static ErlDrvData fd_start(ErlDrvPort, char*, SysDriverOpts*); diff --git a/erts/emulator/sys/win32/sys.c b/erts/emulator/sys/win32/sys.c index 6894d682e7..c5664d8e8a 100755 --- a/erts/emulator/sys/win32/sys.c +++ b/erts/emulator/sys/win32/sys.c @@ -258,8 +258,22 @@ void erl_sys_args(int* argc, char** argv) void erts_sys_prepare_crash_dump(void) { + Port *heart_port; + Eterm heap[3]; + Eterm *hp = heap; + Eterm list = NIL; + + heart_port = erts_get_heart_port(); + + if (heart_port) { + + list = CONS(hp, make_small(8), list); hp += 2; + + /* send to heart port, CMD = 8, i.e. prepare crash dump =o */ + erts_write_to_port(NIL, heart_port, list); + } + /* Windows - free file descriptors are hopefully available */ - return; } static void diff --git a/erts/etc/common/heart.c b/erts/etc/common/heart.c index 70c2b3bb23..c6ce017d8c 100644 --- a/erts/etc/common/heart.c +++ b/erts/etc/common/heart.c @@ -153,13 +153,14 @@ struct msg { }; /* operations */ -#define HEART_ACK 1 -#define HEART_BEAT 2 -#define SHUT_DOWN 3 -#define SET_CMD 4 -#define CLEAR_CMD 5 -#define GET_CMD 6 -#define HEART_CMD 7 +#define HEART_ACK (1) +#define HEART_BEAT (2) +#define SHUT_DOWN (3) +#define SET_CMD (4) +#define CLEAR_CMD (5) +#define GET_CMD (6) +#define HEART_CMD (7) +#define PREPARING_CRASH (8) /* Maybe interesting to change */ @@ -187,10 +188,11 @@ unsigned long heart_beat_kill_pid = 0; #define SOL_WD_TIMEOUT (heart_beat_timeout+heart_beat_boot_delay) /* reasons for reboot */ -#define R_TIMEOUT 1 -#define R_CLOSED 2 -#define R_ERROR 3 -#define R_SHUT_DOWN 4 +#define R_TIMEOUT (1) +#define R_CLOSED (2) +#define R_ERROR (3) +#define R_SHUT_DOWN (4) +#define R_CRASHING (5) /* Doing a crash dump and we will wait for it */ /* macros */ @@ -200,8 +202,8 @@ unsigned long heart_beat_kill_pid = 0; /* prototypes */ -static int message_loop(int,int); -static void do_terminate(int); +static int message_loop(int, int); +static void do_terminate(int, int); static int notify_ack(int); static int heart_cmd_reply(int, char *); static int write_message(int, struct msg *); @@ -376,7 +378,7 @@ main(int argc, char **argv) program_name[sizeof(program_name)-1] = '\0'; notify_ack(erlout_fd); cmd[0] = '\0'; - do_terminate(message_loop(erlin_fd,erlout_fd)); + do_terminate(erlin_fd,message_loop(erlin_fd,erlout_fd)); return 0; } @@ -410,6 +412,7 @@ message_loop(erlin_fd, erlout_fd) #endif while (1) { + /* REFACTOR: below to select/tmo function */ #ifdef __WIN32__ wresult = WaitForSingleObject(hevent_dataready,SELECT_TIMEOUT*1000+ 2); if (wresult == WAIT_FAILED) { @@ -504,6 +507,10 @@ message_loop(erlin_fd, erlout_fd) free_env_val(env); } break; + case PREPARING_CRASH: + /* Erlang has reached a crushdump point (is crashing for sure) */ + print_error("Erlang is crashing .. (waiting for crash dump file)"); + return R_CRASHING; default: /* ignore all other messages */ break; @@ -635,69 +642,66 @@ void win_system(char *command) * do_terminate */ static void -do_terminate(reason) - int reason; -{ +do_terminate(int erlin_fd, int reason) { /* When we get here, we have HEART_BEAT_BOOT_DELAY secs to finish (plus heart_beat_report_delay if under VxWorks), so we don't need to call wd_reset(). */ - + struct msg message; + switch (reason) { case R_SHUT_DOWN: break; + case R_CRASHING: + print_error("Waiting for dump"); + read_message(erlin_fd, &message); /* read until we get something */ case R_TIMEOUT: - case R_ERROR: case R_CLOSED: + case R_ERROR: default: -#if defined(__WIN32__) /* Not VxWorks */ { - if(!cmd[0]) { - char *command = get_env(HEART_COMMAND_ENV); - if(!command) - print_error("Would reboot. Terminating."); - else { - kill_old_erlang(); - /* High prio combined with system() works badly indeed... */ - SetPriorityClass(GetCurrentProcess(), NORMAL_PRIORITY_CLASS); - win_system(command); - print_error("Executed \"%s\". Terminating.",command); +#if defined(__WIN32__) /* Not VxWorks */ + if(!cmd[0]) { + char *command = get_env(HEART_COMMAND_ENV); + if(!command) + print_error("Would reboot. Terminating."); + else { + kill_old_erlang(); + /* High prio combined with system() works badly indeed... */ + SetPriorityClass(GetCurrentProcess(), NORMAL_PRIORITY_CLASS); + win_system(command); + print_error("Executed \"%s\". Terminating.",command); + } + free_env_val(command); + } else { + kill_old_erlang(); + /* High prio combined with system() works badly indeed... */ + SetPriorityClass(GetCurrentProcess(), NORMAL_PRIORITY_CLASS); + win_system(&cmd[0]); + print_error("Executed \"%s\". Terminating.",cmd); } - free_env_val(command); - } - else { - kill_old_erlang(); - /* High prio combined with system() works badly indeed... */ - SetPriorityClass(GetCurrentProcess(), NORMAL_PRIORITY_CLASS); - win_system(&cmd[0]); - print_error("Executed \"%s\". Terminating.",cmd); - } - } - #else - { - if(!cmd[0]) { - char *command = get_env(HEART_COMMAND_ENV); - if(!command) - print_error("Would reboot. Terminating."); - else { - kill_old_erlang(); - /* suppress gcc warning with 'if' */ - if(system(command)); - print_error("Executed \"%s\". Terminating.",command); + if(!cmd[0]) { + char *command = get_env(HEART_COMMAND_ENV); + if(!command) + print_error("Would reboot. Terminating."); + else { + kill_old_erlang(); + /* suppress gcc warning with 'if' */ + if(system(command)); + print_error("Executed \"%s\". Terminating.",command); + } + free_env_val(command); + } else { + kill_old_erlang(); + /* suppress gcc warning with 'if' */ + if(system((char*)&cmd[0])); + print_error("Executed \"%s\". Terminating.",cmd); } - free_env_val(command); - } - else { - kill_old_erlang(); - /* suppress gcc warning with 'if' */ - if(system((char*)&cmd[0])); - print_error("Executed \"%s\". Terminating.",cmd); - } +#endif } break; -#endif } /* switch(reason) */ } -- cgit v1.2.3 From 70c9312c4bd8832c31094dd076364077f0a684c5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn-Egil=20Dahlberg?= Date: Wed, 10 Oct 2012 15:09:30 +0200 Subject: heart: Refactor heart debugging --- erts/etc/common/heart.c | 35 ++++++++++++++++++++--------------- 1 file changed, 20 insertions(+), 15 deletions(-) (limited to 'erts') diff --git a/erts/etc/common/heart.c b/erts/etc/common/heart.c index c6ce017d8c..dcb83c33ac 100644 --- a/erts/etc/common/heart.c +++ b/erts/etc/common/heart.c @@ -352,12 +352,14 @@ static void get_arguments(int argc, char** argv) { debugf("arguments -ht %d -wt %d -pid %lu\n",h,w,p); } -int -main(int argc, char **argv) -{ +int main(int argc, char **argv) { + + if (is_env_set("HEART_DEBUG")) { + fprintf(stderr, "heart: debug is ON!\r\n"); + debug_on = 1; + } + get_arguments(argc,argv); - if (is_env_set("HEART_DEBUG")) - debug_on=1; #ifdef __WIN32__ if (debug_on) { if(!is_env_set("ERLSRV_SERVICE_NAME")) { @@ -649,6 +651,7 @@ do_terminate(int erlin_fd, int reason) { to call wd_reset(). */ struct msg message; + int ret = 0; switch (reason) { case R_SHUT_DOWN: @@ -656,6 +659,7 @@ do_terminate(int erlin_fd, int reason) { case R_CRASHING: print_error("Waiting for dump"); read_message(erlin_fd, &message); /* read until we get something */ + /* fall through */ case R_TIMEOUT: case R_CLOSED: case R_ERROR: @@ -689,15 +693,15 @@ do_terminate(int erlin_fd, int reason) { else { kill_old_erlang(); /* suppress gcc warning with 'if' */ - if(system(command)); - print_error("Executed \"%s\". Terminating.",command); + ret = system(command); + print_error("Executed \"%s\" -> %d. Terminating.",command, ret); } free_env_val(command); } else { kill_old_erlang(); /* suppress gcc warning with 'if' */ - if(system((char*)&cmd[0])); - print_error("Executed \"%s\". Terminating.",cmd); + ret = system((char*)&cmd[0]); + print_error("Executed \"%s\" -> %d. Terminating.",cmd, ret); } #endif } @@ -895,12 +899,13 @@ debugf(const char *format,...) { va_list args; - if (!debug_on) return; - va_start(args, format); - fprintf(stderr, "Heart: "); - vfprintf(stderr, format, args); - va_end(args); - fprintf(stderr, "\r\n"); + if (debug_on) { + va_start(args, format); + fprintf(stderr, "Heart: "); + vfprintf(stderr, format, args); + va_end(args); + fprintf(stderr, "\r\n"); + } } #ifdef __WIN32__ -- cgit v1.2.3 From 310354406aecba66ac0acbfd7d8ed8abb8bfcd5d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn-Egil=20Dahlberg?= Date: Thu, 11 Oct 2012 15:16:08 +0200 Subject: erts: Search for heart in ports that are alive --- erts/emulator/beam/io.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'erts') diff --git a/erts/emulator/beam/io.c b/erts/emulator/beam/io.c index 502ded4eca..88e9215a9c 100644 --- a/erts/emulator/beam/io.c +++ b/erts/emulator/beam/io.c @@ -5248,7 +5248,10 @@ Port *erts_get_heart_port() { for(ix = 0; ix < erts_max_ports; ix++) { port = &erts_port[ix]; - /* immediate compare */ + /* only examine undead or alive ports */ + if (port->status & ERTS_PORT_SFLGS_DEAD) + continue; + /* immediate atom compare */ if (port->reg && port->reg->name == am_heart_port) { return port; } -- cgit v1.2.3 From d4172bde993275f26b62b53af85a5ac092846dad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn-Egil=20Dahlberg?= Date: Thu, 11 Oct 2012 16:38:04 +0200 Subject: erts: Change ERL_CRASH_DUMP_SECONDS behaviour Not setting ERL_CRASH_DUMP_SECONDS will now terminate beam immediately on a crash without writing a crash dump file. Setting ERL_CRASH_DUMP_SECONDS to 0 will also terminate beam immediately on a crash without writing a crash dump file, i.e. same as not setting ERL_CRASH_DUMP_SECONDS environment variable. Setting ERL_CRASH_DUMP_SECONDS to a negative value will let the beam wait indefinitely on the crash dump file being written. Setting ERL_CRASH_DUMP_SECONDS to a positive value will let the beam wait that many seconds on the crash dump file being written. A positive value will set both an alarm in beam AND a heart timeout for restart if heart is running. This is due to the change of 'heart' behavior when 'heart' is listening for a crash. --- erts/emulator/beam/break.c | 39 ++++++++++++++++++++++-- erts/emulator/beam/index.c | 3 +- erts/emulator/beam/sys.h | 2 +- erts/emulator/sys/unix/sys.c | 40 +++++++++++++++--------- erts/emulator/sys/win32/sys.c | 3 +- erts/etc/common/heart.c | 71 ++++++++++++++++++++++++++++++++++++++++--- 6 files changed, 134 insertions(+), 24 deletions(-) (limited to 'erts') diff --git a/erts/emulator/beam/break.c b/erts/emulator/beam/break.c index 93aa2fb8d0..cf66f4e6b6 100644 --- a/erts/emulator/beam/break.c +++ b/erts/emulator/beam/break.c @@ -650,10 +650,13 @@ erl_crash_dump_v(char *file, int line, char* fmt, va_list args) ErtsThrPrgrData tpd_buf; /* in case we aren't a managed thread... */ #endif int fd; + size_t envsz; time_t now; + char env[21]; /* enough to hold any 64-bit integer */ size_t dumpnamebufsize = MAXPATHLEN; char dumpnamebuf[MAXPATHLEN]; char* dumpname; + int secs; if (ERTS_SOMEONE_IS_CRASH_DUMPING) return; @@ -676,9 +679,41 @@ erl_crash_dump_v(char *file, int line, char* fmt, va_list args) erts_writing_erl_crash_dump = 1; #endif - erts_sys_prepare_crash_dump(); + envsz = sizeof(env); + /* ERL_CRASH_DUMP_SECONDS not set + * same as ERL_CRASH_DUMP_SECONDS = 0 + * - do not write dump + * - do not set an alarm + * - break immediately + * + * ERL_CRASH_DUMP_SECONDS = 0 + * - do not write dump + * - do not set an alarm + * - break immediately + * + * ERL_CRASH_DUMP_SECONDS < 0 + * - do not set alarm + * - write dump until done + * + * ERL_CRASH_DUMP_SECONDS = S (and S positive) + * - Don't dump file forever + * - set alarm (set in sys) + * - write dump until alarm or file is written completely + */ + + if (erts_sys_getenv__("ERL_CRASH_DUMP_SECONDS", env, &envsz) != 0) { + return; /* break immediately */ + } else { + secs = atoi(env); + } + + if (secs == 0) { + return; + } + + erts_sys_prepare_crash_dump(secs); - if (erts_sys_getenv_raw("ERL_CRASH_DUMP",&dumpnamebuf[0],&dumpnamebufsize) != 0) + if (erts_sys_getenv__("ERL_CRASH_DUMP",&dumpnamebuf[0],&dumpnamebufsize) != 0) dumpname = "erl_crash.dump"; else dumpname = &dumpnamebuf[0]; diff --git a/erts/emulator/beam/index.c b/erts/emulator/beam/index.c index a4a3007f93..ad4672c3de 100644 --- a/erts/emulator/beam/index.c +++ b/erts/emulator/beam/index.c @@ -82,7 +82,8 @@ index_put(IndexTable* t, void* tmpl) if (ix >= t->size) { Uint sz; if (ix >= t->limit) { - erl_exit(1, "no more index entries in %s (max=%d)\n", + /* A core dump is unnecessary */ + erl_exit(ERTS_DUMP_EXIT, "no more index entries in %s (max=%d)\n", t->htable.name, t->limit); } sz = INDEX_PAGE_SIZE*sizeof(IndexSlot*); diff --git a/erts/emulator/beam/sys.h b/erts/emulator/beam/sys.h index 2406c52f14..9dd8341520 100644 --- a/erts/emulator/beam/sys.h +++ b/erts/emulator/beam/sys.h @@ -652,7 +652,7 @@ void erts_sys_schedule_interrupt_timed(int set, erts_short_time_t msec); void erts_sys_main_thread(void); #endif -extern void erts_sys_prepare_crash_dump(void); +extern void erts_sys_prepare_crash_dump(int secs); extern void erts_sys_pre_init(void); extern void erl_sys_init(void); extern void erl_sys_args(int *argc, char **argv); diff --git a/erts/emulator/sys/unix/sys.c b/erts/emulator/sys/unix/sys.c index 37dfcb1dd4..282ecb345f 100644 --- a/erts/emulator/sys/unix/sys.c +++ b/erts/emulator/sys/unix/sys.c @@ -687,18 +687,20 @@ static RETSIGTYPE break_handler(int sig) #endif /* 0 */ static ERTS_INLINE void -prepare_crash_dump(void) +prepare_crash_dump(int secs) { +#define NUFBUF (3) int i, max; char env[21]; /* enough to hold any 64-bit integer */ size_t envsz; + DeclareTmpHeapNoproc(heap,NUFBUF); Port *heart_port; - Eterm heap[3]; Eterm *hp = heap; Eterm list = NIL; - int heart_fd[2] = {-1,-1}; + UseTmpHeapNoproc(NUFBUF); + if (ERTS_PREPARED_CRASH_DUMP) return; /* We have already been called */ @@ -740,7 +742,7 @@ prepare_crash_dump(void) } envsz = sizeof(env); - i = erts_sys_getenv_raw("ERL_CRASH_DUMP_NICE", env, &envsz); + i = erts_sys_getenv__("ERL_CRASH_DUMP_NICE", env, &envsz); if (i >= 0) { int nice_val; nice_val = i != 0 ? 0 : atoi(env); @@ -749,20 +751,21 @@ prepare_crash_dump(void) } erts_silence_warn_unused_result(nice(nice_val)); } - - envsz = sizeof(env); - i = erts_sys_getenv_raw("ERL_CRASH_DUMP_SECONDS", env, &envsz); - if (i >= 0) { - unsigned sec; - sec = (unsigned) i != 0 ? 0 : atoi(env); - alarm(sec); + + /* Positive secs means an alarm must be set + * 0 or negative means no alarm + */ + if (secs > 0) { + alarm((unsigned int)secs); } + UnUseTmpHeapNoproc(NUFBUF); +#undef NUFBUF } void -erts_sys_prepare_crash_dump(void) +erts_sys_prepare_crash_dump(int secs) { - prepare_crash_dump(); + prepare_crash_dump(secs); } static ERTS_INLINE void @@ -804,7 +807,7 @@ sigusr1_exit(void) is hung somewhere, so it won't be able to poll any flag we set here. */ ERTS_SET_GOT_SIGUSR1; - prepare_crash_dump(); + prepare_crash_dump((int)0); erl_exit(1, "Received SIGUSR1\n"); } @@ -2439,6 +2442,15 @@ erts_sys_getenv_raw(char *key, char *value, size_t *size) { return erts_sys_getenv(key, value, size); } +/* + * erts_sys_getenv + * returns: + * -1, if environment key is not set with a value + * 0, if environment key is set and value fits into buffer size + * 1, if environment key is set but does not fit into buffer size + * size is set with the needed buffer size value + */ + int erts_sys_getenv(char *key, char *value, size_t *size) { diff --git a/erts/emulator/sys/win32/sys.c b/erts/emulator/sys/win32/sys.c index c5664d8e8a..9d45023ceb 100755 --- a/erts/emulator/sys/win32/sys.c +++ b/erts/emulator/sys/win32/sys.c @@ -256,7 +256,7 @@ void erl_sys_args(int* argc, char** argv) } void -erts_sys_prepare_crash_dump(void) +erts_sys_prepare_crash_dump(int secs) { Port *heart_port; Eterm heap[3]; @@ -274,6 +274,7 @@ erts_sys_prepare_crash_dump(void) } /* Windows - free file descriptors are hopefully available */ + /* Alarm not used on windows */ } static void diff --git a/erts/etc/common/heart.c b/erts/etc/common/heart.c index dcb83c33ac..ed75a8f256 100644 --- a/erts/etc/common/heart.c +++ b/erts/etc/common/heart.c @@ -137,7 +137,8 @@ # endif #endif -#define HEART_COMMAND_ENV "HEART_COMMAND" +#define HEART_COMMAND_ENV "HEART_COMMAND" +#define ERL_CRASH_DUMP_SECONDS_ENV "ERL_CRASH_DUMP_SECONDS" #define MSG_HDR_SIZE 2 #define MSG_HDR_PLUS_OP_SIZE 3 @@ -214,6 +215,7 @@ static void print_error(const char *,...); static void debugf(const char *,...); static void init_timestamp(void); static time_t timestamp(time_t *); +static int wait_until_close_write_or_env_tmo(int); #ifdef __WIN32__ static BOOL enable_privilege(void); @@ -650,15 +652,20 @@ do_terminate(int erlin_fd, int reason) { (plus heart_beat_report_delay if under VxWorks), so we don't need to call wd_reset(). */ - struct msg message; - int ret = 0; + int ret = 0, tmo=0; + char *tmo_env; switch (reason) { case R_SHUT_DOWN: break; case R_CRASHING: - print_error("Waiting for dump"); - read_message(erlin_fd, &message); /* read until we get something */ + if (is_env_set(ERL_CRASH_DUMP_SECONDS_ENV)) { + tmo_env = get_env(ERL_CRASH_DUMP_SECONDS_ENV); + tmo = atoi(tmo_env); + print_error("Waiting for dump - timeout set to %d seconds.", tmo); + wait_until_close_write_or_env_tmo(tmo); + free_env_val(tmo_env); + } /* fall through */ case R_TIMEOUT: case R_CLOSED: @@ -709,6 +716,60 @@ do_terminate(int erlin_fd, int reason) { } /* switch(reason) */ } + +/* Waits until something happens on socket or handle + * + * Uses global variables erlin_fd or hevent_dataready + */ +int wait_until_close_write_or_env_tmo(int tmo) { + int i = 0; + +#ifdef __WIN32__ + DWORD wresult; + DWORD wtmo = INFINITE; + + if (tmo >= 0) { + wtmo = tmo*1000 + 2; + } + + wresult = WaitForSingleObject(hevent_dataready, wtmo); + if (wresult == WAIT_FAILED) { + print_last_error(); + return -1; + } + + if (wresult == WAIT_TIMEOUT) { + debugf("wait timed out\n"); + i = 0; + } else { + debugf("wait ok\n"); + i = 1; + } +#else + fd_set read_fds; + int max_fd; + struct timeval timeout; + struct timeval *tptr = NULL; + + max_fd = erlin_fd; /* global */ + + if (tmo >= 0) { + timeout.tv_sec = tmo; /* On Linux timeout is modified by select */ + timeout.tv_usec = 0; + tptr = &timeout; + } + + FD_ZERO(&read_fds); + FD_SET(erlin_fd, &read_fds); + if ((i = select(max_fd + 1, &read_fds, NULLFDS, NULLFDS, tptr)) < 0) { + print_error("error in select."); + return -1; + } +#endif + return i; +} + + /* * notify_ack * -- cgit v1.2.3 From e6ea3b07e25b09481b010558f8bfdfb57dcbb85e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn-Egil=20Dahlberg?= Date: Mon, 15 Oct 2012 16:22:47 +0200 Subject: doc: Document ERL_CRASH_DUMP_SECONDS behaviour * heart reboot behaviour * erl_crash.dump file write behaviour --- erts/doc/src/erl.xml | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) (limited to 'erts') diff --git a/erts/doc/src/erl.xml b/erts/doc/src/erl.xml index 2dea477d99..f931445a3e 100644 --- a/erts/doc/src/erl.xml +++ b/erts/doc/src/erl.xml @@ -1040,6 +1040,37 @@ the emulator will be allowed to spend writing a crash dump. When the given number of seconds have elapsed, the emulator will be terminated by a SIGALRM signal.

+ +

If the environment variable is not set or it is set to zero seconds, , + the runtime system will not even attempt to write the crash dump file. It will just terminate. +

+

If the environment variable is set to negative valie, e.g. , + the runtime system will wait indefinitely for the crash dump file to be written. +

+

This environment variable is used in conjuction with + heart if heart is running: +

+ + +

+ Suppresses the writing a crash dump file entirely, + thus rebooting the runtime system immediately. + This is the same as not setting the environment variable. +

+
+ +

Setting the environment variable to a negative value will cause the + termination of the runtime system to wait until the crash dump file + has been completly written. +

+
+ +

+ Will wait for S seconds to complete the crash dump file and + then terminate the runtime system. +

+
+
-- cgit v1.2.3 From 014c784a73e56769c7045d8f7515d57d9d8a2caf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn-Egil=20Dahlberg?= Date: Fri, 19 Oct 2012 17:22:25 +0200 Subject: erts: Fix lock check assertion --- erts/emulator/beam/erl_lock_check.c | 2 +- erts/emulator/beam/erl_process_lock.c | 2 +- erts/emulator/beam/io.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'erts') diff --git a/erts/emulator/beam/erl_lock_check.c b/erts/emulator/beam/erl_lock_check.c index b545ec07c0..1f388c1796 100644 --- a/erts/emulator/beam/erl_lock_check.c +++ b/erts/emulator/beam/erl_lock_check.c @@ -443,7 +443,7 @@ print_lock2(char *prefix, Sint16 id, Wterm extra, Uint16 flags, char *suffix) "%s'%s:%p%s'%s%s", prefix, lname, - boxed_val(extra), + _unchecked_boxed_val(extra), lock_type(flags), rw_op_str(flags), suffix); diff --git a/erts/emulator/beam/erl_process_lock.c b/erts/emulator/beam/erl_process_lock.c index f28ef6a6d7..f7900317cc 100644 --- a/erts/emulator/beam/erl_process_lock.c +++ b/erts/emulator/beam/erl_process_lock.c @@ -1349,7 +1349,7 @@ erts_proc_lc_chk_no_proc_locks(char *file, int line) lc_id.proc_lock_msgq, lc_id.proc_lock_status}; erts_lc_have_lock_ids(resv, ids, 4); - if (resv[0] || resv[1] || resv[2] || resv[3]) { + if (!ERTS_IS_CRASH_DUMPING && (resv[0] || resv[1] || resv[2] || resv[3])) { erts_lc_fail("%s:%d: Thread has process locks locked when expected " "not to have any process locks locked", file, line); diff --git a/erts/emulator/beam/io.c b/erts/emulator/beam/io.c index 88e9215a9c..609fe9f5fb 100644 --- a/erts/emulator/beam/io.c +++ b/erts/emulator/beam/io.c @@ -1132,7 +1132,7 @@ int erts_write_to_port(Eterm caller_id, Port *p, Eterm list) Uint size; int fpe_was_unmasked; - ERTS_SMP_LC_ASSERT(erts_lc_is_port_locked(p)); + ERTS_SMP_LC_ASSERT(erts_lc_is_port_locked(p) || ERTS_IS_CRASH_DUMPING); ERTS_SMP_CHK_NO_PROC_LOCKS; p->caller = caller_id; -- cgit v1.2.3 From a986416859aa6065e44d7288b37152756ac1b0ce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn-Egil=20Dahlberg?= Date: Fri, 19 Oct 2012 17:32:18 +0200 Subject: erts: Fix crash dump write to port hack More future proof with R16 --- erts/emulator/sys/unix/sys.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'erts') diff --git a/erts/emulator/sys/unix/sys.c b/erts/emulator/sys/unix/sys.c index 282ecb345f..c485a4eece 100644 --- a/erts/emulator/sys/unix/sys.c +++ b/erts/emulator/sys/unix/sys.c @@ -715,7 +715,7 @@ prepare_crash_dump(int secs) list = CONS(hp, make_small(8), list); hp += 2; /* send to heart port, CMD = 8, i.e. prepare crash dump =o */ - erts_write_to_port(NIL, heart_port, list); + erts_write_to_port(ERTS_INVALID_PID, heart_port, list); } /* Make sure we unregister at epmd (unknown fd) and get at least -- cgit v1.2.3