From 750e9be58c7c245d49d6a7227b972bf0fd8c09ab Mon Sep 17 00:00:00 2001 From: Sverker Eriksson Date: Thu, 14 Feb 2019 18:07:15 +0100 Subject: erts: Avoid heart killing a nicely exiting emulator Symptom: Heart kills exiting emulator before is has flushed all ports and with HEART_KILL_SIGNAL=SIGABRT it may also produce unnecessary core dumps from doing init:reboot() for example. Problem: Heart port is closed together with all the others in handle_reap_ports() which is detected by heart OS process. Solution 1: Leave the heart port alone in handle_reap_ports() and let it be closed by OS when emulator exists. It doesn't need to be flushed anyway. Solution 2: When heart OS process gets EOF on connection let it wait max 5 seconds for emulator process to self terminate before trying to kill it. --- erts/emulator/beam/erl_process.c | 7 +++++++ erts/etc/common/heart.c | 38 +++++++++++++++++++++++++------------- 2 files changed, 32 insertions(+), 13 deletions(-) diff --git a/erts/emulator/beam/erl_process.c b/erts/emulator/beam/erl_process.c index ffbfbc4e56..a4f312f101 100644 --- a/erts/emulator/beam/erl_process.c +++ b/erts/emulator/beam/erl_process.c @@ -2625,6 +2625,13 @@ handle_reap_ports(ErtsAuxWorkData *awdp, erts_aint32_t aux_work, int waiting) erts_smp_port_lock(prt); + if (prt->common.u.alive.reg && + prt->common.u.alive.reg->name == am_heart_port) { + /* Leave heart port to not get killed before flushing is done*/ + erts_port_release(prt); + continue; + } + state = erts_atomic32_read_nob(&prt->state); if (!(state & (ERTS_PORT_SFLGS_INVALID_DRIVER_LOOKUP | ERTS_PORT_SFLG_HALT))) { diff --git a/erts/etc/common/heart.c b/erts/etc/common/heart.c index 8f1e89b638..5967fa2543 100644 --- a/erts/etc/common/heart.c +++ b/erts/etc/common/heart.c @@ -500,7 +500,7 @@ message_loop(erlin_fd, erlout_fd) #if defined(__WIN32__) static void -kill_old_erlang(void){ +kill_old_erlang(int reason){ HANDLE erlh; DWORD exit_code; char* envvar = NULL; @@ -536,7 +536,8 @@ kill_old_erlang(void){ } #else static void -kill_old_erlang(void){ +kill_old_erlang(int reason) +{ pid_t pid; int i, res; int sig = SIGKILL; @@ -546,14 +547,25 @@ kill_old_erlang(void){ if (envvar && strcmp(envvar, "TRUE") == 0) return; - envvar = get_env(HEART_KILL_SIGNAL); - if (envvar && strcmp(envvar, "SIGABRT") == 0) { - print_error("kill signal SIGABRT requested"); - sig = SIGABRT; - } - if(heart_beat_kill_pid != 0){ - pid = (pid_t) heart_beat_kill_pid; + pid = (pid_t) heart_beat_kill_pid; + if (reason == R_CLOSED) { + print_error("Wait 5 seconds for Erlang to terminate nicely"); + for (i=0; i < 5; ++i) { + res = kill(pid, 0); /* check if alive */ + if (res < 0 && errno == ESRCH) + return; + sleep(1); + } + print_error("Erlang still alive, kill it"); + } + + envvar = get_env(HEART_KILL_SIGNAL); + if (envvar && strcmp(envvar, "SIGABRT") == 0) { + print_error("kill signal SIGABRT requested"); + sig = SIGABRT; + } + res = kill(pid,sig); for(i=0; i < 5 && res == 0; ++i){ sleep(1); @@ -677,7 +689,7 @@ do_terminate(int erlin_fd, int reason) { if(!command) print_error("Would reboot. Terminating."); else { - kill_old_erlang(); + kill_old_erlang(reason); /* High prio combined with system() works badly indeed... */ SetPriorityClass(GetCurrentProcess(), NORMAL_PRIORITY_CLASS); win_system(command); @@ -685,7 +697,7 @@ do_terminate(int erlin_fd, int reason) { } free_env_val(command); } else { - kill_old_erlang(); + kill_old_erlang(reason); /* High prio combined with system() works badly indeed... */ SetPriorityClass(GetCurrentProcess(), NORMAL_PRIORITY_CLASS); win_system(&cmd[0]); @@ -697,13 +709,13 @@ do_terminate(int erlin_fd, int reason) { if(!command) print_error("Would reboot. Terminating."); else { - kill_old_erlang(); + kill_old_erlang(reason); ret = system(command); print_error("Executed \"%s\" -> %d. Terminating.",command, ret); } free_env_val(command); } else { - kill_old_erlang(); + kill_old_erlang(reason); ret = system((char*)&cmd[0]); print_error("Executed \"%s\" -> %d. Terminating.",cmd, ret); } -- cgit v1.2.3