aboutsummaryrefslogtreecommitdiffstats
path: root/erts/etc/common/heart.c
diff options
context:
space:
mode:
Diffstat (limited to 'erts/etc/common/heart.c')
-rw-r--r--erts/etc/common/heart.c325
1 files changed, 170 insertions, 155 deletions
diff --git a/erts/etc/common/heart.c b/erts/etc/common/heart.c
index 755e308219..81d797dc7e 100644
--- a/erts/etc/common/heart.c
+++ b/erts/etc/common/heart.c
@@ -66,8 +66,7 @@
* input and output file descriptors (0 and 1). These descriptors
* (and the standard error descriptor 2) must NOT be closed
* explicitely by this program at termination (in UNIX it is
- * taken care of by the operating system itself; in VxWorks
- * it is taken care of by the spawn driver part of the Emulator).
+ * taken care of by the operating system itself).
*
* END OF FILE
*
@@ -75,12 +74,6 @@
* that there is no process at the other end of the connection
* having the connection open for writing (end-of-file).
*
- * HARDWARE WATCHDOG
- *
- * When used with VxWorks(with CPU40), the hardware
- * watchdog is enabled, making sure that the system reboots
- * even if the heart port program malfunctions or the system
- * is completely overloaded.
*/
#ifdef HAVE_CONFIG_H
@@ -93,18 +86,12 @@
#include <fcntl.h>
#include <process.h>
#endif
-#ifdef VXWORKS
-#include "sys.h"
-#endif
/*
* Implement time correction using times() call even on Linuxes
* that can simulate gethrtime with clock_gettime, no use implementing
* a phony gethrtime in this file as the time questions are so infrequent.
*/
-#if defined(CORRET_USING_TIMES) || defined(GETHRTIME_WITH_CLOCK_GETTIME)
-# define HEART_CORRECT_USING_TIMES 1
-#endif
#include <stdio.h>
#include <stddef.h>
@@ -116,31 +103,20 @@
#include <time.h>
#include <errno.h>
-#ifdef VXWORKS
-# include <vxWorks.h>
-# include <ioLib.h>
-# include <selectLib.h>
-# include <netinet/in.h>
-# include <rebootLib.h>
-# include <sysLib.h>
-# include <taskLib.h>
-# include <wdLib.h>
-# include <taskHookLib.h>
-# include <selectLib.h>
-#endif
-#if !defined(__WIN32__) && !defined(VXWORKS)
+#if !defined(__WIN32__)
# include <sys/types.h>
# include <netinet/in.h>
# include <sys/time.h>
# include <unistd.h>
# include <signal.h>
-# if defined(HEART_CORRECT_USING_TIMES)
+# if defined(CORRECT_USING_TIMES)
# include <sys/times.h>
# include <limits.h>
# endif
#endif
-#define HEART_COMMAND_ENV "HEART_COMMAND"
+#define HEART_COMMAND_ENV "HEART_COMMAND"
+#define ERL_CRASH_DUMP_SECONDS_ENV "ERL_CRASH_DUMP_SECONDS"
#define MSG_HDR_SIZE 2
#define MSG_HDR_PLUS_OP_SIZE 3
@@ -156,13 +132,14 @@ struct msg {
};
/* operations */
-#define HEART_ACK 1
-#define HEART_BEAT 2
-#define SHUT_DOWN 3
-#define SET_CMD 4
-#define CLEAR_CMD 5
-#define GET_CMD 6
-#define HEART_CMD 7
+#define HEART_ACK (1)
+#define HEART_BEAT (2)
+#define SHUT_DOWN (3)
+#define SET_CMD (4)
+#define CLEAR_CMD (5)
+#define GET_CMD (6)
+#define HEART_CMD (7)
+#define PREPARING_CRASH (8)
/* Maybe interesting to change */
@@ -190,10 +167,11 @@ unsigned long heart_beat_kill_pid = 0;
#define SOL_WD_TIMEOUT (heart_beat_timeout+heart_beat_boot_delay)
/* reasons for reboot */
-#define R_TIMEOUT 1
-#define R_CLOSED 2
-#define R_ERROR 3
-#define R_SHUT_DOWN 4
+#define R_TIMEOUT (1)
+#define R_CLOSED (2)
+#define R_ERROR (3)
+#define R_SHUT_DOWN (4)
+#define R_CRASHING (5) /* Doing a crash dump and we will wait for it */
/* macros */
@@ -203,8 +181,8 @@ unsigned long heart_beat_kill_pid = 0;
/* prototypes */
-static int message_loop(int,int);
-static void do_terminate(int);
+static int message_loop(int, int);
+static void do_terminate(int, int);
static int notify_ack(int);
static int heart_cmd_reply(int, char *);
static int write_message(int, struct msg *);
@@ -215,6 +193,7 @@ static void print_error(const char *,...);
static void debugf(const char *,...);
static void init_timestamp(void);
static time_t timestamp(time_t *);
+static int wait_until_close_write_or_env_tmo(int);
#ifdef __WIN32__
static BOOL enable_privilege(void);
@@ -353,12 +332,14 @@ static void get_arguments(int argc, char** argv) {
debugf("arguments -ht %d -wt %d -pid %lu\n",h,w,p);
}
-int
-main(int argc, char **argv)
-{
+int main(int argc, char **argv) {
+
+ if (is_env_set("HEART_DEBUG")) {
+ fprintf(stderr, "heart: debug is ON!\r\n");
+ debug_on = 1;
+ }
+
get_arguments(argc,argv);
- if (is_env_set("HEART_DEBUG"))
- debug_on=1;
#ifdef __WIN32__
if (debug_on) {
if(!is_env_set("ERLSRV_SERVICE_NAME")) {
@@ -379,7 +360,7 @@ main(int argc, char **argv)
program_name[sizeof(program_name)-1] = '\0';
notify_ack(erlout_fd);
cmd[0] = '\0';
- do_terminate(message_loop(erlin_fd,erlout_fd));
+ do_terminate(erlin_fd,message_loop(erlin_fd,erlout_fd));
return 0;
}
@@ -413,6 +394,7 @@ message_loop(erlin_fd, erlout_fd)
#endif
while (1) {
+ /* REFACTOR: below to select/tmo function */
#ifdef __WIN32__
wresult = WaitForSingleObject(hevent_dataready,SELECT_TIMEOUT*1000+ 2);
if (wresult == WAIT_FAILED) {
@@ -447,7 +429,8 @@ message_loop(erlin_fd, erlout_fd)
*/
timestamp(&now);
if (now > last_received + heart_beat_timeout) {
- print_error("heart-beat time-out.");
+ print_error("heart-beat time-out, no activity for %lu seconds",
+ (unsigned long) (now - last_received));
return R_TIMEOUT;
}
/*
@@ -506,6 +489,10 @@ message_loop(erlin_fd, erlout_fd)
free_env_val(env);
}
break;
+ case PREPARING_CRASH:
+ /* Erlang has reached a crushdump point (is crashing for sure) */
+ print_error("Erlang is crashing .. (waiting for crash dump file)");
+ return R_CRASHING;
default:
/* ignore all other messages */
break;
@@ -550,8 +537,7 @@ kill_old_erlang(void){
CloseHandle(erlh);
}
}
-#elif !defined(VXWORKS)
-/* Unix eh? */
+#else
static void
kill_old_erlang(void){
pid_t pid;
@@ -570,7 +556,7 @@ kill_old_erlang(void){
}
}
}
-#endif /* Not on VxWorks */
+#endif
#ifdef __WIN32__
void win_system(char *command)
@@ -637,72 +623,130 @@ void win_system(char *command)
* do_terminate
*/
static void
-do_terminate(reason)
- int reason;
-{
+do_terminate(int erlin_fd, int reason) {
/*
When we get here, we have HEART_BEAT_BOOT_DELAY secs to finish
(plus heart_beat_report_delay if under VxWorks), so we don't need
to call wd_reset().
*/
-
+ int ret = 0, tmo=0;
+ char *tmo_env;
+
switch (reason) {
case R_SHUT_DOWN:
break;
+ case R_CRASHING:
+ if (is_env_set(ERL_CRASH_DUMP_SECONDS_ENV)) {
+ tmo_env = get_env(ERL_CRASH_DUMP_SECONDS_ENV);
+ tmo = atoi(tmo_env);
+ print_error("Waiting for dump - timeout set to %d seconds.", tmo);
+ wait_until_close_write_or_env_tmo(tmo);
+ free_env_val(tmo_env);
+ }
+ /* fall through */
case R_TIMEOUT:
- case R_ERROR:
case R_CLOSED:
+ case R_ERROR:
default:
-#if defined(__WIN32__) /* Not VxWorks */
{
- if(!cmd[0]) {
- char *command = get_env(HEART_COMMAND_ENV);
- if(!command)
- print_error("Would reboot. Terminating.");
- else {
- kill_old_erlang();
- /* High prio combined with system() works badly indeed... */
- SetPriorityClass(GetCurrentProcess(), NORMAL_PRIORITY_CLASS);
- win_system(command);
- print_error("Executed \"%s\". Terminating.",command);
+#if defined(__WIN32__) /* Not VxWorks */
+ if(!cmd[0]) {
+ char *command = get_env(HEART_COMMAND_ENV);
+ if(!command)
+ print_error("Would reboot. Terminating.");
+ else {
+ kill_old_erlang();
+ /* High prio combined with system() works badly indeed... */
+ SetPriorityClass(GetCurrentProcess(), NORMAL_PRIORITY_CLASS);
+ win_system(command);
+ print_error("Executed \"%s\". Terminating.",command);
+ }
+ free_env_val(command);
+ } else {
+ kill_old_erlang();
+ /* High prio combined with system() works badly indeed... */
+ SetPriorityClass(GetCurrentProcess(), NORMAL_PRIORITY_CLASS);
+ win_system(&cmd[0]);
+ print_error("Executed \"%s\". Terminating.",cmd);
}
- free_env_val(command);
- }
- else {
- kill_old_erlang();
- /* High prio combined with system() works badly indeed... */
- SetPriorityClass(GetCurrentProcess(), NORMAL_PRIORITY_CLASS);
- win_system(&cmd[0]);
- print_error("Executed \"%s\". Terminating.",cmd);
- }
- }
-
#else
- {
- if(!cmd[0]) {
- char *command = get_env(HEART_COMMAND_ENV);
- if(!command)
- print_error("Would reboot. Terminating.");
- else {
- kill_old_erlang();
- /* suppress gcc warning with 'if' */
- if(system(command));
- print_error("Executed \"%s\". Terminating.",command);
+ if(!cmd[0]) {
+ char *command = get_env(HEART_COMMAND_ENV);
+ if(!command)
+ print_error("Would reboot. Terminating.");
+ else {
+ kill_old_erlang();
+ /* suppress gcc warning with 'if' */
+ ret = system(command);
+ print_error("Executed \"%s\" -> %d. Terminating.",command, ret);
+ }
+ free_env_val(command);
+ } else {
+ kill_old_erlang();
+ /* suppress gcc warning with 'if' */
+ ret = system((char*)&cmd[0]);
+ print_error("Executed \"%s\" -> %d. Terminating.",cmd, ret);
}
- free_env_val(command);
- }
- else {
- kill_old_erlang();
- /* suppress gcc warning with 'if' */
- if(system((char*)&cmd[0]));
- print_error("Executed \"%s\". Terminating.",cmd);
- }
+#endif
}
break;
-#endif
} /* switch(reason) */
}
+
+/* Waits until something happens on socket or handle
+ *
+ * Uses global variables erlin_fd or hevent_dataready
+ */
+int wait_until_close_write_or_env_tmo(int tmo) {
+ int i = 0;
+
+#ifdef __WIN32__
+ DWORD wresult;
+ DWORD wtmo = INFINITE;
+
+ if (tmo >= 0) {
+ wtmo = tmo*1000 + 2;
+ }
+
+ wresult = WaitForSingleObject(hevent_dataready, wtmo);
+ if (wresult == WAIT_FAILED) {
+ print_last_error();
+ return -1;
+ }
+
+ if (wresult == WAIT_TIMEOUT) {
+ debugf("wait timed out\n");
+ i = 0;
+ } else {
+ debugf("wait ok\n");
+ i = 1;
+ }
+#else
+ fd_set read_fds;
+ int max_fd;
+ struct timeval timeout;
+ struct timeval *tptr = NULL;
+
+ max_fd = erlin_fd; /* global */
+
+ if (tmo >= 0) {
+ timeout.tv_sec = tmo; /* On Linux timeout is modified by select */
+ timeout.tv_usec = 0;
+ tptr = &timeout;
+ }
+
+ FD_ZERO(&read_fds);
+ FD_SET(erlin_fd, &read_fds);
+ if ((i = select(max_fd + 1, &read_fds, NULLFDS, NULLFDS, tptr)) < 0) {
+ print_error("error in select.");
+ return -1;
+ }
+#endif
+ return i;
+}
+
+
/*
* notify_ack
*
@@ -893,12 +937,13 @@ debugf(const char *format,...)
{
va_list args;
- if (!debug_on) return;
- va_start(args, format);
- fprintf(stderr, "Heart: ");
- vfprintf(stderr, format, args);
- va_end(args);
- fprintf(stderr, "\r\n");
+ if (debug_on) {
+ va_start(args, format);
+ fprintf(stderr, "Heart: ");
+ vfprintf(stderr, format, args);
+ va_end(args);
+ fprintf(stderr, "\r\n");
+ }
}
#ifdef __WIN32__
@@ -1026,60 +1071,30 @@ time_t timestamp(time_t *res)
return r;
}
-#elif defined(VXWORKS)
-
-static WDOG_ID watchdog_id;
-static volatile unsigned elapsed;
-static WIND_TCB *this_task;
-/* A simple variable is enough to lock the time update, as the
- watchdog is run at interrupt level and never preempted. */
-static volatile int lock_time;
-
-static void my_delete_hook(WIND_TCB *tcb)
-{
- if (tcb == this_task) {
- wdDelete(watchdog_id);
- watchdog_id = NULL;
- taskDeleteHookDelete((FUNCPTR) &my_delete_hook);
- }
-}
+#elif defined(HAVE_GETHRTIME) || defined(GETHRTIME_WITH_CLOCK_GETTIME)
-static void my_wd_routine(int count)
-{
- if (watchdog_id != NULL) {
- ++count;
- if (!lock_time) {
- elapsed += count;
- count = 0;
- }
- wdStart(watchdog_id, sysClkRateGet(),
- (FUNCPTR) &my_wd_routine, count);
- }
-}
+#if defined(GETHRTIME_WITH_CLOCK_GETTIME)
+typedef long long SysHrTime;
-void init_timestamp(void)
-{
- lock_time = 0;
- elapsed = 0;
- watchdog_id = wdCreate();
- this_task = (WIND_TCB *) taskIdSelf();
- taskDeleteHookAdd((FUNCPTR) &my_delete_hook);
- wdStart(watchdog_id, sysClkRateGet(),
- (FUNCPTR) &my_wd_routine, 0);
-}
+SysHrTime sys_gethrtime(void);
-time_t timestamp(time_t *res)
+SysHrTime sys_gethrtime(void)
{
- time_t r;
- ++lock_time;
- r = (time_t) elapsed;
- --lock_time;
- if (res != NULL)
- *res = r;
- return r;
+ struct timespec ts;
+ long long result;
+ if (clock_gettime(CLOCK_MONOTONIC,&ts) != 0) {
+ print_error("Fatal, could not get clock_monotonic value, terminating! "
+ "errno = %d\n", errno);
+ exit(1);
+ }
+ result = ((long long) ts.tv_sec) * 1000000000LL +
+ ((long long) ts.tv_nsec);
+ return (SysHrTime) result;
}
-
-#elif defined(HAVE_GETHRTIME)
+#else
+typedef hrtime_t SysHrTime;
+#define sys_gethrtime() gethrtime()
+#endif
void init_timestamp(void)
{
@@ -1087,14 +1102,14 @@ void init_timestamp(void)
time_t timestamp(time_t *res)
{
- hrtime_t ht = gethrtime();
+ SysHrTime ht = sys_gethrtime();
time_t r = (time_t) (ht / 1000000000);
if (res != NULL)
*res = r;
return r;
}
-#elif defined(HEART_CORRECT_USING_TIMES)
+#elif defined(CORRECT_USING_TIMES)
# ifdef NO_SYSCONF
# include <sys/param.h>