aboutsummaryrefslogtreecommitdiffstats
path: root/erts/etc/common/heart.c
diff options
context:
space:
mode:
Diffstat (limited to 'erts/etc/common/heart.c')
-rw-r--r--erts/etc/common/heart.c116
1 files changed, 52 insertions, 64 deletions
diff --git a/erts/etc/common/heart.c b/erts/etc/common/heart.c
index 2830641802..bc353e384e 100644
--- a/erts/etc/common/heart.c
+++ b/erts/etc/common/heart.c
@@ -1,18 +1,19 @@
/*
* %CopyrightBegin%
*
- * Copyright Ericsson AB 1996-2013. All Rights Reserved.
+ * Copyright Ericsson AB 1996-2016. All Rights Reserved.
*
- * The contents of this file are subject to the Erlang Public License,
- * Version 1.1, (the "License"); you may not use this file except in
- * compliance with the License. You should have received a copy of the
- * Erlang Public License along with this software. If not, it can be
- * retrieved online at http://www.erlang.org/.
- *
- * Software distributed under the License is distributed on an "AS IS"
- * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
- * the License for the specific language governing rights and limitations
- * under the License.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
*
* %CopyrightEnd%
*/
@@ -47,13 +48,10 @@
*
* HEART_BEATING
*
- * This program expects a heart beat messages. If it does not receive a
- * heart beat message from Erlang within heart_beat_timeout seconds, it
- * reboots the system. The variable heart_beat_timeout is exported (so
- * that it can be set from the shell in VxWorks, as is the variable
- * heart_beat_report_delay). When using Solaris, the system is rebooted
- * by executing the command stored in the environment variable
- * HEART_COMMAND.
+ * This program expects a heart beat message. If it does not receive a
+ * heart beat message from Erlang within heart_beat_timeout seconds, it
+ * reboots the system. The system is rebooted by executing the command
+ * stored in the environment variable HEART_COMMAND.
*
* BLOCKING DESCRIPTORS
*
@@ -109,7 +107,7 @@
# include <sys/time.h>
# include <unistd.h>
# include <signal.h>
-# if defined(CORRECT_USING_TIMES)
+# if defined(OS_MONOTONIC_TIME_USING_TIMES)
# include <sys/times.h>
# include <limits.h>
# endif
@@ -117,11 +115,14 @@
#define HEART_COMMAND_ENV "HEART_COMMAND"
#define ERL_CRASH_DUMP_SECONDS_ENV "ERL_CRASH_DUMP_SECONDS"
+#define HEART_KILL_SIGNAL "HEART_KILL_SIGNAL"
+#define HEART_NO_KILL "HEART_NO_KILL"
+
-#define MSG_HDR_SIZE 2
-#define MSG_HDR_PLUS_OP_SIZE 3
-#define MSG_BODY_SIZE 2048
-#define MSG_TOTAL_SIZE 2050
+#define MSG_HDR_SIZE (2)
+#define MSG_HDR_PLUS_OP_SIZE (3)
+#define MSG_BODY_SIZE (2048)
+#define MSG_TOTAL_SIZE (2050)
unsigned char cmd[MSG_BODY_SIZE];
@@ -145,27 +146,17 @@ struct msg {
/* Maybe interesting to change */
/* Times in seconds */
-#define HEART_BEAT_BOOT_DELAY 60 /* 1 minute */
#define SELECT_TIMEOUT 5 /* Every 5 seconds we reset the
watchdog timer */
/* heart_beat_timeout is the maximum gap in seconds between two
- consecutive heart beat messages from Erlang, and HEART_BEAT_BOOT_DELAY
- is the the extra delay that wd_keeper allows for, to give heart a
- chance to reboot in the "normal" way before the hardware watchdog
- enters the scene. heart_beat_report_delay is the time allowed for reporting
- before rebooting under VxWorks. */
+ consecutive heart beat messages from Erlang. */
int heart_beat_timeout = 60;
-int heart_beat_report_delay = 30;
-int heart_beat_boot_delay = HEART_BEAT_BOOT_DELAY;
/* All current platforms have a process identifier that
fits in an unsigned long and where 0 is an impossible or invalid value */
unsigned long heart_beat_kill_pid = 0;
-#define VW_WD_TIMEOUT (heart_beat_timeout+heart_beat_report_delay+heart_beat_boot_delay)
-#define SOL_WD_TIMEOUT (heart_beat_timeout+heart_beat_boot_delay)
-
/* reasons for reboot */
#define R_TIMEOUT (1)
#define R_CLOSED (2)
@@ -293,7 +284,6 @@ free_env_val(char *value)
static void get_arguments(int argc, char** argv) {
int i = 1;
int h;
- int w;
unsigned long p;
while (i < argc) {
@@ -309,15 +299,6 @@ static void get_arguments(int argc, char** argv) {
i++;
}
break;
- case 'w':
- if (strcmp(argv[i], "-wt") == 0)
- if (sscanf(argv[i+1],"%i",&w) ==1)
- if ((w > 10) && (w <= 65535)) {
- heart_beat_boot_delay = w;
- fprintf(stderr,"heart_beat_boot_delay = %d\n",w);
- i++;
- }
- break;
case 'p':
if (strcmp(argv[i], "-pid") == 0)
if (sscanf(argv[i+1],"%lu",&p) ==1){
@@ -343,7 +324,7 @@ static void get_arguments(int argc, char** argv) {
}
i++;
}
- debugf("arguments -ht %d -wt %d -pid %lu\n",h,w,p);
+ debugf("arguments -ht %d -pid %lu\n",h,p);
}
int main(int argc, char **argv) {
@@ -470,10 +451,6 @@ message_loop(erlin_fd, erlout_fd)
switch (mp->op) {
case HEART_BEAT:
timestamp(&last_received);
-#ifdef USE_WATCHDOG
- /* reset the hardware watchdog timer */
- wd_reset();
-#endif
break;
case SHUT_DOWN:
return R_SHUT_DOWN;
@@ -526,6 +503,12 @@ static void
kill_old_erlang(void){
HANDLE erlh;
DWORD exit_code;
+ char* envvar = NULL;
+
+ envvar = get_env(HEART_NO_KILL);
+ if (envvar && strcmp(envvar, "TRUE") == 0)
+ return;
+
if(heart_beat_kill_pid != 0){
if((erlh = OpenProcess(PROCESS_TERMINATE |
SYNCHRONIZE |
@@ -555,14 +538,26 @@ kill_old_erlang(void){
static void
kill_old_erlang(void){
pid_t pid;
- int i;
- int res;
+ int i, res;
+ int sig = SIGKILL;
+ char *envvar = NULL;
+
+ envvar = get_env(HEART_NO_KILL);
+ if (envvar && strcmp(envvar, "TRUE") == 0)
+ return;
+
+ envvar = get_env(HEART_KILL_SIGNAL);
+ if (envvar && strcmp(envvar, "SIGABRT") == 0) {
+ print_error("kill signal SIGABRT requested");
+ sig = SIGABRT;
+ }
+
if(heart_beat_kill_pid != 0){
pid = (pid_t) heart_beat_kill_pid;
- res = kill(pid,SIGKILL);
+ res = kill(pid,sig);
for(i=0; i < 5 && res == 0; ++i){
sleep(1);
- res = kill(pid,SIGKILL);
+ res = kill(pid,sig);
}
if(errno != ESRCH){
print_error("Unable to kill old process, "
@@ -656,11 +651,6 @@ void win_system(char *command)
*/
static void
do_terminate(int erlin_fd, int reason) {
- /*
- When we get here, we have HEART_BEAT_BOOT_DELAY secs to finish
- (plus heart_beat_report_delay if under VxWorks), so we don't need
- to call wd_reset().
- */
int ret = 0, tmo=0;
char *tmo_env;
@@ -708,14 +698,12 @@ do_terminate(int erlin_fd, int reason) {
print_error("Would reboot. Terminating.");
else {
kill_old_erlang();
- /* suppress gcc warning with 'if' */
ret = system(command);
print_error("Executed \"%s\" -> %d. Terminating.",command, ret);
}
free_env_val(command);
} else {
kill_old_erlang();
- /* suppress gcc warning with 'if' */
ret = system((char*)&cmd[0]);
print_error("Executed \"%s\" -> %d. Terminating.",cmd, ret);
}
@@ -1084,9 +1072,9 @@ time_t timestamp(time_t *res)
return r;
}
-#elif defined(HAVE_GETHRTIME) || defined(GETHRTIME_WITH_CLOCK_GETTIME)
+#elif defined(OS_MONOTONIC_TIME_USING_GETHRTIME) || defined(OS_MONOTONIC_TIME_USING_CLOCK_GETTIME)
-#if defined(GETHRTIME_WITH_CLOCK_GETTIME)
+#if defined(OS_MONOTONIC_TIME_USING_CLOCK_GETTIME)
typedef long long SysHrTime;
SysHrTime sys_gethrtime(void);
@@ -1095,7 +1083,7 @@ SysHrTime sys_gethrtime(void)
{
struct timespec ts;
long long result;
- if (clock_gettime(CLOCK_MONOTONIC,&ts) != 0) {
+ if (clock_gettime(MONOTONIC_CLOCK_ID,&ts) != 0) {
print_error("Fatal, could not get clock_monotonic value, terminating! "
"errno = %d\n", errno);
exit(1);
@@ -1122,7 +1110,7 @@ time_t timestamp(time_t *res)
return r;
}
-#elif defined(CORRECT_USING_TIMES)
+#elif defined(OS_MONOTONIC_TIME_USING_TIMES)
# ifdef NO_SYSCONF
# include <sys/param.h>