From d4172bde993275f26b62b53af85a5ac092846dad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn-Egil=20Dahlberg?= Date: Thu, 11 Oct 2012 16:38:04 +0200 Subject: erts: Change ERL_CRASH_DUMP_SECONDS behaviour Not setting ERL_CRASH_DUMP_SECONDS will now terminate beam immediately on a crash without writing a crash dump file. Setting ERL_CRASH_DUMP_SECONDS to 0 will also terminate beam immediately on a crash without writing a crash dump file, i.e. same as not setting ERL_CRASH_DUMP_SECONDS environment variable. Setting ERL_CRASH_DUMP_SECONDS to a negative value will let the beam wait indefinitely on the crash dump file being written. Setting ERL_CRASH_DUMP_SECONDS to a positive value will let the beam wait that many seconds on the crash dump file being written. A positive value will set both an alarm in beam AND a heart timeout for restart if heart is running. This is due to the change of 'heart' behavior when 'heart' is listening for a crash. --- erts/emulator/beam/break.c | 39 ++++++++++++++++++++++-- erts/emulator/beam/index.c | 3 +- erts/emulator/beam/sys.h | 2 +- erts/emulator/sys/unix/sys.c | 40 +++++++++++++++--------- erts/emulator/sys/win32/sys.c | 3 +- erts/etc/common/heart.c | 71 ++++++++++++++++++++++++++++++++++++++++--- 6 files changed, 134 insertions(+), 24 deletions(-) diff --git a/erts/emulator/beam/break.c b/erts/emulator/beam/break.c index 93aa2fb8d0..cf66f4e6b6 100644 --- a/erts/emulator/beam/break.c +++ b/erts/emulator/beam/break.c @@ -650,10 +650,13 @@ erl_crash_dump_v(char *file, int line, char* fmt, va_list args) ErtsThrPrgrData tpd_buf; /* in case we aren't a managed thread... */ #endif int fd; + size_t envsz; time_t now; + char env[21]; /* enough to hold any 64-bit integer */ size_t dumpnamebufsize = MAXPATHLEN; char dumpnamebuf[MAXPATHLEN]; char* dumpname; + int secs; if (ERTS_SOMEONE_IS_CRASH_DUMPING) return; @@ -676,9 +679,41 @@ erl_crash_dump_v(char *file, int line, char* fmt, va_list args) erts_writing_erl_crash_dump = 1; #endif - erts_sys_prepare_crash_dump(); + envsz = sizeof(env); + /* ERL_CRASH_DUMP_SECONDS not set + * same as ERL_CRASH_DUMP_SECONDS = 0 + * - do not write dump + * - do not set an alarm + * - break immediately + * + * ERL_CRASH_DUMP_SECONDS = 0 + * - do not write dump + * - do not set an alarm + * - break immediately + * + * ERL_CRASH_DUMP_SECONDS < 0 + * - do not set alarm + * - write dump until done + * + * ERL_CRASH_DUMP_SECONDS = S (and S positive) + * - Don't dump file forever + * - set alarm (set in sys) + * - write dump until alarm or file is written completely + */ + + if (erts_sys_getenv__("ERL_CRASH_DUMP_SECONDS", env, &envsz) != 0) { + return; /* break immediately */ + } else { + secs = atoi(env); + } + + if (secs == 0) { + return; + } + + erts_sys_prepare_crash_dump(secs); - if (erts_sys_getenv_raw("ERL_CRASH_DUMP",&dumpnamebuf[0],&dumpnamebufsize) != 0) + if (erts_sys_getenv__("ERL_CRASH_DUMP",&dumpnamebuf[0],&dumpnamebufsize) != 0) dumpname = "erl_crash.dump"; else dumpname = &dumpnamebuf[0]; diff --git a/erts/emulator/beam/index.c b/erts/emulator/beam/index.c index a4a3007f93..ad4672c3de 100644 --- a/erts/emulator/beam/index.c +++ b/erts/emulator/beam/index.c @@ -82,7 +82,8 @@ index_put(IndexTable* t, void* tmpl) if (ix >= t->size) { Uint sz; if (ix >= t->limit) { - erl_exit(1, "no more index entries in %s (max=%d)\n", + /* A core dump is unnecessary */ + erl_exit(ERTS_DUMP_EXIT, "no more index entries in %s (max=%d)\n", t->htable.name, t->limit); } sz = INDEX_PAGE_SIZE*sizeof(IndexSlot*); diff --git a/erts/emulator/beam/sys.h b/erts/emulator/beam/sys.h index 2406c52f14..9dd8341520 100644 --- a/erts/emulator/beam/sys.h +++ b/erts/emulator/beam/sys.h @@ -652,7 +652,7 @@ void erts_sys_schedule_interrupt_timed(int set, erts_short_time_t msec); void erts_sys_main_thread(void); #endif -extern void erts_sys_prepare_crash_dump(void); +extern void erts_sys_prepare_crash_dump(int secs); extern void erts_sys_pre_init(void); extern void erl_sys_init(void); extern void erl_sys_args(int *argc, char **argv); diff --git a/erts/emulator/sys/unix/sys.c b/erts/emulator/sys/unix/sys.c index 37dfcb1dd4..282ecb345f 100644 --- a/erts/emulator/sys/unix/sys.c +++ b/erts/emulator/sys/unix/sys.c @@ -687,18 +687,20 @@ static RETSIGTYPE break_handler(int sig) #endif /* 0 */ static ERTS_INLINE void -prepare_crash_dump(void) +prepare_crash_dump(int secs) { +#define NUFBUF (3) int i, max; char env[21]; /* enough to hold any 64-bit integer */ size_t envsz; + DeclareTmpHeapNoproc(heap,NUFBUF); Port *heart_port; - Eterm heap[3]; Eterm *hp = heap; Eterm list = NIL; - int heart_fd[2] = {-1,-1}; + UseTmpHeapNoproc(NUFBUF); + if (ERTS_PREPARED_CRASH_DUMP) return; /* We have already been called */ @@ -740,7 +742,7 @@ prepare_crash_dump(void) } envsz = sizeof(env); - i = erts_sys_getenv_raw("ERL_CRASH_DUMP_NICE", env, &envsz); + i = erts_sys_getenv__("ERL_CRASH_DUMP_NICE", env, &envsz); if (i >= 0) { int nice_val; nice_val = i != 0 ? 0 : atoi(env); @@ -749,20 +751,21 @@ prepare_crash_dump(void) } erts_silence_warn_unused_result(nice(nice_val)); } - - envsz = sizeof(env); - i = erts_sys_getenv_raw("ERL_CRASH_DUMP_SECONDS", env, &envsz); - if (i >= 0) { - unsigned sec; - sec = (unsigned) i != 0 ? 0 : atoi(env); - alarm(sec); + + /* Positive secs means an alarm must be set + * 0 or negative means no alarm + */ + if (secs > 0) { + alarm((unsigned int)secs); } + UnUseTmpHeapNoproc(NUFBUF); +#undef NUFBUF } void -erts_sys_prepare_crash_dump(void) +erts_sys_prepare_crash_dump(int secs) { - prepare_crash_dump(); + prepare_crash_dump(secs); } static ERTS_INLINE void @@ -804,7 +807,7 @@ sigusr1_exit(void) is hung somewhere, so it won't be able to poll any flag we set here. */ ERTS_SET_GOT_SIGUSR1; - prepare_crash_dump(); + prepare_crash_dump((int)0); erl_exit(1, "Received SIGUSR1\n"); } @@ -2439,6 +2442,15 @@ erts_sys_getenv_raw(char *key, char *value, size_t *size) { return erts_sys_getenv(key, value, size); } +/* + * erts_sys_getenv + * returns: + * -1, if environment key is not set with a value + * 0, if environment key is set and value fits into buffer size + * 1, if environment key is set but does not fit into buffer size + * size is set with the needed buffer size value + */ + int erts_sys_getenv(char *key, char *value, size_t *size) { diff --git a/erts/emulator/sys/win32/sys.c b/erts/emulator/sys/win32/sys.c index c5664d8e8a..9d45023ceb 100755 --- a/erts/emulator/sys/win32/sys.c +++ b/erts/emulator/sys/win32/sys.c @@ -256,7 +256,7 @@ void erl_sys_args(int* argc, char** argv) } void -erts_sys_prepare_crash_dump(void) +erts_sys_prepare_crash_dump(int secs) { Port *heart_port; Eterm heap[3]; @@ -274,6 +274,7 @@ erts_sys_prepare_crash_dump(void) } /* Windows - free file descriptors are hopefully available */ + /* Alarm not used on windows */ } static void diff --git a/erts/etc/common/heart.c b/erts/etc/common/heart.c index dcb83c33ac..ed75a8f256 100644 --- a/erts/etc/common/heart.c +++ b/erts/etc/common/heart.c @@ -137,7 +137,8 @@ # endif #endif -#define HEART_COMMAND_ENV "HEART_COMMAND" +#define HEART_COMMAND_ENV "HEART_COMMAND" +#define ERL_CRASH_DUMP_SECONDS_ENV "ERL_CRASH_DUMP_SECONDS" #define MSG_HDR_SIZE 2 #define MSG_HDR_PLUS_OP_SIZE 3 @@ -214,6 +215,7 @@ static void print_error(const char *,...); static void debugf(const char *,...); static void init_timestamp(void); static time_t timestamp(time_t *); +static int wait_until_close_write_or_env_tmo(int); #ifdef __WIN32__ static BOOL enable_privilege(void); @@ -650,15 +652,20 @@ do_terminate(int erlin_fd, int reason) { (plus heart_beat_report_delay if under VxWorks), so we don't need to call wd_reset(). */ - struct msg message; - int ret = 0; + int ret = 0, tmo=0; + char *tmo_env; switch (reason) { case R_SHUT_DOWN: break; case R_CRASHING: - print_error("Waiting for dump"); - read_message(erlin_fd, &message); /* read until we get something */ + if (is_env_set(ERL_CRASH_DUMP_SECONDS_ENV)) { + tmo_env = get_env(ERL_CRASH_DUMP_SECONDS_ENV); + tmo = atoi(tmo_env); + print_error("Waiting for dump - timeout set to %d seconds.", tmo); + wait_until_close_write_or_env_tmo(tmo); + free_env_val(tmo_env); + } /* fall through */ case R_TIMEOUT: case R_CLOSED: @@ -709,6 +716,60 @@ do_terminate(int erlin_fd, int reason) { } /* switch(reason) */ } + +/* Waits until something happens on socket or handle + * + * Uses global variables erlin_fd or hevent_dataready + */ +int wait_until_close_write_or_env_tmo(int tmo) { + int i = 0; + +#ifdef __WIN32__ + DWORD wresult; + DWORD wtmo = INFINITE; + + if (tmo >= 0) { + wtmo = tmo*1000 + 2; + } + + wresult = WaitForSingleObject(hevent_dataready, wtmo); + if (wresult == WAIT_FAILED) { + print_last_error(); + return -1; + } + + if (wresult == WAIT_TIMEOUT) { + debugf("wait timed out\n"); + i = 0; + } else { + debugf("wait ok\n"); + i = 1; + } +#else + fd_set read_fds; + int max_fd; + struct timeval timeout; + struct timeval *tptr = NULL; + + max_fd = erlin_fd; /* global */ + + if (tmo >= 0) { + timeout.tv_sec = tmo; /* On Linux timeout is modified by select */ + timeout.tv_usec = 0; + tptr = &timeout; + } + + FD_ZERO(&read_fds); + FD_SET(erlin_fd, &read_fds); + if ((i = select(max_fd + 1, &read_fds, NULLFDS, NULLFDS, tptr)) < 0) { + print_error("error in select."); + return -1; + } +#endif + return i; +} + + /* * notify_ack * -- cgit v1.2.3