diff options
Diffstat (limited to 'erts/etc/common/heart.c')
-rw-r--r-- | erts/etc/common/heart.c | 299 |
1 files changed, 146 insertions, 153 deletions
diff --git a/erts/etc/common/heart.c b/erts/etc/common/heart.c index 70c2b3bb23..81d797dc7e 100644 --- a/erts/etc/common/heart.c +++ b/erts/etc/common/heart.c @@ -66,8 +66,7 @@ * input and output file descriptors (0 and 1). These descriptors * (and the standard error descriptor 2) must NOT be closed * explicitely by this program at termination (in UNIX it is - * taken care of by the operating system itself; in VxWorks - * it is taken care of by the spawn driver part of the Emulator). + * taken care of by the operating system itself). * * END OF FILE * @@ -75,12 +74,6 @@ * that there is no process at the other end of the connection * having the connection open for writing (end-of-file). * - * HARDWARE WATCHDOG - * - * When used with VxWorks(with CPU40), the hardware - * watchdog is enabled, making sure that the system reboots - * even if the heart port program malfunctions or the system - * is completely overloaded. */ #ifdef HAVE_CONFIG_H @@ -93,9 +86,6 @@ #include <fcntl.h> #include <process.h> #endif -#ifdef VXWORKS -#include "sys.h" -#endif /* * Implement time correction using times() call even on Linuxes @@ -113,19 +103,7 @@ #include <time.h> #include <errno.h> -#ifdef VXWORKS -# include <vxWorks.h> -# include <ioLib.h> -# include <selectLib.h> -# include <netinet/in.h> -# include <rebootLib.h> -# include <sysLib.h> -# include <taskLib.h> -# include <wdLib.h> -# include <taskHookLib.h> -# include <selectLib.h> -#endif -#if !defined(__WIN32__) && !defined(VXWORKS) +#if !defined(__WIN32__) # include <sys/types.h> # include <netinet/in.h> # include <sys/time.h> @@ -137,7 +115,8 @@ # endif #endif -#define HEART_COMMAND_ENV "HEART_COMMAND" +#define HEART_COMMAND_ENV "HEART_COMMAND" +#define ERL_CRASH_DUMP_SECONDS_ENV "ERL_CRASH_DUMP_SECONDS" #define MSG_HDR_SIZE 2 #define MSG_HDR_PLUS_OP_SIZE 3 @@ -153,13 +132,14 @@ struct msg { }; /* operations */ -#define HEART_ACK 1 -#define HEART_BEAT 2 -#define SHUT_DOWN 3 -#define SET_CMD 4 -#define CLEAR_CMD 5 -#define GET_CMD 6 -#define HEART_CMD 7 +#define HEART_ACK (1) +#define HEART_BEAT (2) +#define SHUT_DOWN (3) +#define SET_CMD (4) +#define CLEAR_CMD (5) +#define GET_CMD (6) +#define HEART_CMD (7) +#define PREPARING_CRASH (8) /* Maybe interesting to change */ @@ -187,10 +167,11 @@ unsigned long heart_beat_kill_pid = 0; #define SOL_WD_TIMEOUT (heart_beat_timeout+heart_beat_boot_delay) /* reasons for reboot */ -#define R_TIMEOUT 1 -#define R_CLOSED 2 -#define R_ERROR 3 -#define R_SHUT_DOWN 4 +#define R_TIMEOUT (1) +#define R_CLOSED (2) +#define R_ERROR (3) +#define R_SHUT_DOWN (4) +#define R_CRASHING (5) /* Doing a crash dump and we will wait for it */ /* macros */ @@ -200,8 +181,8 @@ unsigned long heart_beat_kill_pid = 0; /* prototypes */ -static int message_loop(int,int); -static void do_terminate(int); +static int message_loop(int, int); +static void do_terminate(int, int); static int notify_ack(int); static int heart_cmd_reply(int, char *); static int write_message(int, struct msg *); @@ -212,6 +193,7 @@ static void print_error(const char *,...); static void debugf(const char *,...); static void init_timestamp(void); static time_t timestamp(time_t *); +static int wait_until_close_write_or_env_tmo(int); #ifdef __WIN32__ static BOOL enable_privilege(void); @@ -350,12 +332,14 @@ static void get_arguments(int argc, char** argv) { debugf("arguments -ht %d -wt %d -pid %lu\n",h,w,p); } -int -main(int argc, char **argv) -{ +int main(int argc, char **argv) { + + if (is_env_set("HEART_DEBUG")) { + fprintf(stderr, "heart: debug is ON!\r\n"); + debug_on = 1; + } + get_arguments(argc,argv); - if (is_env_set("HEART_DEBUG")) - debug_on=1; #ifdef __WIN32__ if (debug_on) { if(!is_env_set("ERLSRV_SERVICE_NAME")) { @@ -376,7 +360,7 @@ main(int argc, char **argv) program_name[sizeof(program_name)-1] = '\0'; notify_ack(erlout_fd); cmd[0] = '\0'; - do_terminate(message_loop(erlin_fd,erlout_fd)); + do_terminate(erlin_fd,message_loop(erlin_fd,erlout_fd)); return 0; } @@ -410,6 +394,7 @@ message_loop(erlin_fd, erlout_fd) #endif while (1) { + /* REFACTOR: below to select/tmo function */ #ifdef __WIN32__ wresult = WaitForSingleObject(hevent_dataready,SELECT_TIMEOUT*1000+ 2); if (wresult == WAIT_FAILED) { @@ -504,6 +489,10 @@ message_loop(erlin_fd, erlout_fd) free_env_val(env); } break; + case PREPARING_CRASH: + /* Erlang has reached a crushdump point (is crashing for sure) */ + print_error("Erlang is crashing .. (waiting for crash dump file)"); + return R_CRASHING; default: /* ignore all other messages */ break; @@ -548,8 +537,7 @@ kill_old_erlang(void){ CloseHandle(erlh); } } -#elif !defined(VXWORKS) -/* Unix eh? */ +#else static void kill_old_erlang(void){ pid_t pid; @@ -568,7 +556,7 @@ kill_old_erlang(void){ } } } -#endif /* Not on VxWorks */ +#endif #ifdef __WIN32__ void win_system(char *command) @@ -635,72 +623,130 @@ void win_system(char *command) * do_terminate */ static void -do_terminate(reason) - int reason; -{ +do_terminate(int erlin_fd, int reason) { /* When we get here, we have HEART_BEAT_BOOT_DELAY secs to finish (plus heart_beat_report_delay if under VxWorks), so we don't need to call wd_reset(). */ - + int ret = 0, tmo=0; + char *tmo_env; + switch (reason) { case R_SHUT_DOWN: break; + case R_CRASHING: + if (is_env_set(ERL_CRASH_DUMP_SECONDS_ENV)) { + tmo_env = get_env(ERL_CRASH_DUMP_SECONDS_ENV); + tmo = atoi(tmo_env); + print_error("Waiting for dump - timeout set to %d seconds.", tmo); + wait_until_close_write_or_env_tmo(tmo); + free_env_val(tmo_env); + } + /* fall through */ case R_TIMEOUT: - case R_ERROR: case R_CLOSED: + case R_ERROR: default: -#if defined(__WIN32__) /* Not VxWorks */ { - if(!cmd[0]) { - char *command = get_env(HEART_COMMAND_ENV); - if(!command) - print_error("Would reboot. Terminating."); - else { - kill_old_erlang(); - /* High prio combined with system() works badly indeed... */ - SetPriorityClass(GetCurrentProcess(), NORMAL_PRIORITY_CLASS); - win_system(command); - print_error("Executed \"%s\". Terminating.",command); +#if defined(__WIN32__) /* Not VxWorks */ + if(!cmd[0]) { + char *command = get_env(HEART_COMMAND_ENV); + if(!command) + print_error("Would reboot. Terminating."); + else { + kill_old_erlang(); + /* High prio combined with system() works badly indeed... */ + SetPriorityClass(GetCurrentProcess(), NORMAL_PRIORITY_CLASS); + win_system(command); + print_error("Executed \"%s\". Terminating.",command); + } + free_env_val(command); + } else { + kill_old_erlang(); + /* High prio combined with system() works badly indeed... */ + SetPriorityClass(GetCurrentProcess(), NORMAL_PRIORITY_CLASS); + win_system(&cmd[0]); + print_error("Executed \"%s\". Terminating.",cmd); } - free_env_val(command); - } - else { - kill_old_erlang(); - /* High prio combined with system() works badly indeed... */ - SetPriorityClass(GetCurrentProcess(), NORMAL_PRIORITY_CLASS); - win_system(&cmd[0]); - print_error("Executed \"%s\". Terminating.",cmd); - } - } - #else - { - if(!cmd[0]) { - char *command = get_env(HEART_COMMAND_ENV); - if(!command) - print_error("Would reboot. Terminating."); - else { - kill_old_erlang(); - /* suppress gcc warning with 'if' */ - if(system(command)); - print_error("Executed \"%s\". Terminating.",command); + if(!cmd[0]) { + char *command = get_env(HEART_COMMAND_ENV); + if(!command) + print_error("Would reboot. Terminating."); + else { + kill_old_erlang(); + /* suppress gcc warning with 'if' */ + ret = system(command); + print_error("Executed \"%s\" -> %d. Terminating.",command, ret); + } + free_env_val(command); + } else { + kill_old_erlang(); + /* suppress gcc warning with 'if' */ + ret = system((char*)&cmd[0]); + print_error("Executed \"%s\" -> %d. Terminating.",cmd, ret); } - free_env_val(command); - } - else { - kill_old_erlang(); - /* suppress gcc warning with 'if' */ - if(system((char*)&cmd[0])); - print_error("Executed \"%s\". Terminating.",cmd); - } +#endif } break; -#endif } /* switch(reason) */ } + +/* Waits until something happens on socket or handle + * + * Uses global variables erlin_fd or hevent_dataready + */ +int wait_until_close_write_or_env_tmo(int tmo) { + int i = 0; + +#ifdef __WIN32__ + DWORD wresult; + DWORD wtmo = INFINITE; + + if (tmo >= 0) { + wtmo = tmo*1000 + 2; + } + + wresult = WaitForSingleObject(hevent_dataready, wtmo); + if (wresult == WAIT_FAILED) { + print_last_error(); + return -1; + } + + if (wresult == WAIT_TIMEOUT) { + debugf("wait timed out\n"); + i = 0; + } else { + debugf("wait ok\n"); + i = 1; + } +#else + fd_set read_fds; + int max_fd; + struct timeval timeout; + struct timeval *tptr = NULL; + + max_fd = erlin_fd; /* global */ + + if (tmo >= 0) { + timeout.tv_sec = tmo; /* On Linux timeout is modified by select */ + timeout.tv_usec = 0; + tptr = &timeout; + } + + FD_ZERO(&read_fds); + FD_SET(erlin_fd, &read_fds); + if ((i = select(max_fd + 1, &read_fds, NULLFDS, NULLFDS, tptr)) < 0) { + print_error("error in select."); + return -1; + } +#endif + return i; +} + + /* * notify_ack * @@ -891,12 +937,13 @@ debugf(const char *format,...) { va_list args; - if (!debug_on) return; - va_start(args, format); - fprintf(stderr, "Heart: "); - vfprintf(stderr, format, args); - va_end(args); - fprintf(stderr, "\r\n"); + if (debug_on) { + va_start(args, format); + fprintf(stderr, "Heart: "); + vfprintf(stderr, format, args); + va_end(args); + fprintf(stderr, "\r\n"); + } } #ifdef __WIN32__ @@ -1024,59 +1071,6 @@ time_t timestamp(time_t *res) return r; } -#elif defined(VXWORKS) - -static WDOG_ID watchdog_id; -static volatile unsigned elapsed; -static WIND_TCB *this_task; -/* A simple variable is enough to lock the time update, as the - watchdog is run at interrupt level and never preempted. */ -static volatile int lock_time; - -static void my_delete_hook(WIND_TCB *tcb) -{ - if (tcb == this_task) { - wdDelete(watchdog_id); - watchdog_id = NULL; - taskDeleteHookDelete((FUNCPTR) &my_delete_hook); - } -} - -static void my_wd_routine(int count) -{ - if (watchdog_id != NULL) { - ++count; - if (!lock_time) { - elapsed += count; - count = 0; - } - wdStart(watchdog_id, sysClkRateGet(), - (FUNCPTR) &my_wd_routine, count); - } -} - -void init_timestamp(void) -{ - lock_time = 0; - elapsed = 0; - watchdog_id = wdCreate(); - this_task = (WIND_TCB *) taskIdSelf(); - taskDeleteHookAdd((FUNCPTR) &my_delete_hook); - wdStart(watchdog_id, sysClkRateGet(), - (FUNCPTR) &my_wd_routine, 0); -} - -time_t timestamp(time_t *res) -{ - time_t r; - ++lock_time; - r = (time_t) elapsed; - --lock_time; - if (res != NULL) - *res = r; - return r; -} - #elif defined(HAVE_GETHRTIME) || defined(GETHRTIME_WITH_CLOCK_GETTIME) #if defined(GETHRTIME_WITH_CLOCK_GETTIME) @@ -1102,7 +1096,6 @@ typedef hrtime_t SysHrTime; #define sys_gethrtime() gethrtime() #endif - void init_timestamp(void) { } |