From d4172bde993275f26b62b53af85a5ac092846dad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn-Egil=20Dahlberg?= Date: Thu, 11 Oct 2012 16:38:04 +0200 Subject: erts: Change ERL_CRASH_DUMP_SECONDS behaviour Not setting ERL_CRASH_DUMP_SECONDS will now terminate beam immediately on a crash without writing a crash dump file. Setting ERL_CRASH_DUMP_SECONDS to 0 will also terminate beam immediately on a crash without writing a crash dump file, i.e. same as not setting ERL_CRASH_DUMP_SECONDS environment variable. Setting ERL_CRASH_DUMP_SECONDS to a negative value will let the beam wait indefinitely on the crash dump file being written. Setting ERL_CRASH_DUMP_SECONDS to a positive value will let the beam wait that many seconds on the crash dump file being written. A positive value will set both an alarm in beam AND a heart timeout for restart if heart is running. This is due to the change of 'heart' behavior when 'heart' is listening for a crash. --- erts/etc/common/heart.c | 71 +++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 66 insertions(+), 5 deletions(-) (limited to 'erts/etc/common') diff --git a/erts/etc/common/heart.c b/erts/etc/common/heart.c index dcb83c33ac..ed75a8f256 100644 --- a/erts/etc/common/heart.c +++ b/erts/etc/common/heart.c @@ -137,7 +137,8 @@ # endif #endif -#define HEART_COMMAND_ENV "HEART_COMMAND" +#define HEART_COMMAND_ENV "HEART_COMMAND" +#define ERL_CRASH_DUMP_SECONDS_ENV "ERL_CRASH_DUMP_SECONDS" #define MSG_HDR_SIZE 2 #define MSG_HDR_PLUS_OP_SIZE 3 @@ -214,6 +215,7 @@ static void print_error(const char *,...); static void debugf(const char *,...); static void init_timestamp(void); static time_t timestamp(time_t *); +static int wait_until_close_write_or_env_tmo(int); #ifdef __WIN32__ static BOOL enable_privilege(void); @@ -650,15 +652,20 @@ do_terminate(int erlin_fd, int reason) { (plus heart_beat_report_delay if under VxWorks), so we don't need to call wd_reset(). */ - struct msg message; - int ret = 0; + int ret = 0, tmo=0; + char *tmo_env; switch (reason) { case R_SHUT_DOWN: break; case R_CRASHING: - print_error("Waiting for dump"); - read_message(erlin_fd, &message); /* read until we get something */ + if (is_env_set(ERL_CRASH_DUMP_SECONDS_ENV)) { + tmo_env = get_env(ERL_CRASH_DUMP_SECONDS_ENV); + tmo = atoi(tmo_env); + print_error("Waiting for dump - timeout set to %d seconds.", tmo); + wait_until_close_write_or_env_tmo(tmo); + free_env_val(tmo_env); + } /* fall through */ case R_TIMEOUT: case R_CLOSED: @@ -709,6 +716,60 @@ do_terminate(int erlin_fd, int reason) { } /* switch(reason) */ } + +/* Waits until something happens on socket or handle + * + * Uses global variables erlin_fd or hevent_dataready + */ +int wait_until_close_write_or_env_tmo(int tmo) { + int i = 0; + +#ifdef __WIN32__ + DWORD wresult; + DWORD wtmo = INFINITE; + + if (tmo >= 0) { + wtmo = tmo*1000 + 2; + } + + wresult = WaitForSingleObject(hevent_dataready, wtmo); + if (wresult == WAIT_FAILED) { + print_last_error(); + return -1; + } + + if (wresult == WAIT_TIMEOUT) { + debugf("wait timed out\n"); + i = 0; + } else { + debugf("wait ok\n"); + i = 1; + } +#else + fd_set read_fds; + int max_fd; + struct timeval timeout; + struct timeval *tptr = NULL; + + max_fd = erlin_fd; /* global */ + + if (tmo >= 0) { + timeout.tv_sec = tmo; /* On Linux timeout is modified by select */ + timeout.tv_usec = 0; + tptr = &timeout; + } + + FD_ZERO(&read_fds); + FD_SET(erlin_fd, &read_fds); + if ((i = select(max_fd + 1, &read_fds, NULLFDS, NULLFDS, tptr)) < 0) { + print_error("error in select."); + return -1; + } +#endif + return i; +} + + /* * notify_ack * -- cgit v1.2.3