diff options
29 files changed, 692 insertions, 448 deletions
diff --git a/erts/doc/src/erl.xml b/erts/doc/src/erl.xml index 2dea477d99..f931445a3e 100644 --- a/erts/doc/src/erl.xml +++ b/erts/doc/src/erl.xml @@ -1040,6 +1040,37 @@ the emulator will be allowed to spend writing a crash dump. When the given number of seconds have elapsed, the emulator will be terminated by a SIGALRM signal.</p> + + <p> If the environment variable is <em>not</em> set or it is set to zero seconds, <c><![CDATA[ERL_CRASH_DUMP_SECONDS=0]]></c>, + the runtime system will not even attempt to write the crash dump file. It will just terminate. + </p> + <p> If the environment variable is set to negative valie, e.g. <c><![CDATA[ERL_CRASH_DUMP_SECONDS=-1]]></c>, + the runtime system will wait indefinitely for the crash dump file to be written. + </p> + <p> This environment variable is used in conjuction with + <seealso marker="kernel:heart"><c>heart</c></seealso> if <c>heart</c> is running: + </p> + <taglist> + <tag><c><![CDATA[ERL_CRASH_DUMP_SECONDS=0]]></c></tag> + <item><p> + Suppresses the writing a crash dump file entirely, + thus rebooting the runtime system immediately. + This is the same as not setting the environment variable. + </p> + </item> + <tag><c><![CDATA[ERL_CRASH_DUMP_SECONDS=-1]]></c></tag> + <item><p>Setting the environment variable to a negative value will cause the + termination of the runtime system to wait until the crash dump file + has been completly written. + </p> + </item> + <tag><c><![CDATA[ERL_CRASH_DUMP_SECONDS=S]]></c></tag> + <item><p> + Will wait for <c>S</c> seconds to complete the crash dump file and + then terminate the runtime system. + </p> + </item> + </taglist> </item> <tag><c><![CDATA[ERL_AFLAGS]]></c></tag> <item> diff --git a/erts/emulator/beam/atom.names b/erts/emulator/beam/atom.names index 106fad030b..afcbd732df 100644 --- a/erts/emulator/beam/atom.names +++ b/erts/emulator/beam/atom.names @@ -252,6 +252,7 @@ atom heap_block_size atom heap_size atom heap_sizes atom heap_type +atom heart_port atom heir atom hidden atom hide diff --git a/erts/emulator/beam/break.c b/erts/emulator/beam/break.c index 93aa2fb8d0..cf66f4e6b6 100644 --- a/erts/emulator/beam/break.c +++ b/erts/emulator/beam/break.c @@ -650,10 +650,13 @@ erl_crash_dump_v(char *file, int line, char* fmt, va_list args) ErtsThrPrgrData tpd_buf; /* in case we aren't a managed thread... */ #endif int fd; + size_t envsz; time_t now; + char env[21]; /* enough to hold any 64-bit integer */ size_t dumpnamebufsize = MAXPATHLEN; char dumpnamebuf[MAXPATHLEN]; char* dumpname; + int secs; if (ERTS_SOMEONE_IS_CRASH_DUMPING) return; @@ -676,9 +679,41 @@ erl_crash_dump_v(char *file, int line, char* fmt, va_list args) erts_writing_erl_crash_dump = 1; #endif - erts_sys_prepare_crash_dump(); + envsz = sizeof(env); + /* ERL_CRASH_DUMP_SECONDS not set + * same as ERL_CRASH_DUMP_SECONDS = 0 + * - do not write dump + * - do not set an alarm + * - break immediately + * + * ERL_CRASH_DUMP_SECONDS = 0 + * - do not write dump + * - do not set an alarm + * - break immediately + * + * ERL_CRASH_DUMP_SECONDS < 0 + * - do not set alarm + * - write dump until done + * + * ERL_CRASH_DUMP_SECONDS = S (and S positive) + * - Don't dump file forever + * - set alarm (set in sys) + * - write dump until alarm or file is written completely + */ + + if (erts_sys_getenv__("ERL_CRASH_DUMP_SECONDS", env, &envsz) != 0) { + return; /* break immediately */ + } else { + secs = atoi(env); + } + + if (secs == 0) { + return; + } + + erts_sys_prepare_crash_dump(secs); - if (erts_sys_getenv_raw("ERL_CRASH_DUMP",&dumpnamebuf[0],&dumpnamebufsize) != 0) + if (erts_sys_getenv__("ERL_CRASH_DUMP",&dumpnamebuf[0],&dumpnamebufsize) != 0) dumpname = "erl_crash.dump"; else dumpname = &dumpnamebuf[0]; diff --git a/erts/emulator/beam/erl_lock_check.c b/erts/emulator/beam/erl_lock_check.c index b545ec07c0..1f388c1796 100644 --- a/erts/emulator/beam/erl_lock_check.c +++ b/erts/emulator/beam/erl_lock_check.c @@ -443,7 +443,7 @@ print_lock2(char *prefix, Sint16 id, Wterm extra, Uint16 flags, char *suffix) "%s'%s:%p%s'%s%s", prefix, lname, - boxed_val(extra), + _unchecked_boxed_val(extra), lock_type(flags), rw_op_str(flags), suffix); diff --git a/erts/emulator/beam/erl_posix_str.c b/erts/emulator/beam/erl_posix_str.c index 02db10905b..deb7e3e173 100644 --- a/erts/emulator/beam/erl_posix_str.c +++ b/erts/emulator/beam/erl_posix_str.c @@ -619,6 +619,7 @@ erl_errno_id(error) case WSAEINVALIDPROVIDER: return "einvalidprovider"; #endif #ifdef WSAEPROVIDERFAILEDINIT + /* You could get this if SYSTEMROOT env variable is set incorrectly */ case WSAEPROVIDERFAILEDINIT: return "eproviderfailedinit"; #endif #ifdef WSASYSCALLFAILURE diff --git a/erts/emulator/beam/erl_process_lock.c b/erts/emulator/beam/erl_process_lock.c index f28ef6a6d7..f7900317cc 100644 --- a/erts/emulator/beam/erl_process_lock.c +++ b/erts/emulator/beam/erl_process_lock.c @@ -1349,7 +1349,7 @@ erts_proc_lc_chk_no_proc_locks(char *file, int line) lc_id.proc_lock_msgq, lc_id.proc_lock_status}; erts_lc_have_lock_ids(resv, ids, 4); - if (resv[0] || resv[1] || resv[2] || resv[3]) { + if (!ERTS_IS_CRASH_DUMPING && (resv[0] || resv[1] || resv[2] || resv[3])) { erts_lc_fail("%s:%d: Thread has process locks locked when expected " "not to have any process locks locked", file, line); diff --git a/erts/emulator/beam/global.h b/erts/emulator/beam/global.h index dbf95f5bd7..2c20e3da3b 100755 --- a/erts/emulator/beam/global.h +++ b/erts/emulator/beam/global.h @@ -983,6 +983,9 @@ Uint erts_port_ioq_size(Port *pp); void erts_stale_drv_select(Eterm, ErlDrvEvent, int, int); void erts_port_cleanup(Port *); void erts_fire_port_monitor(Port *prt, Eterm ref); + +Port *erts_get_heart_port(void); + #ifdef ERTS_SMP void erts_smp_xports_unlock(Port *); #endif diff --git a/erts/emulator/beam/index.c b/erts/emulator/beam/index.c index a4a3007f93..ad4672c3de 100644 --- a/erts/emulator/beam/index.c +++ b/erts/emulator/beam/index.c @@ -82,7 +82,8 @@ index_put(IndexTable* t, void* tmpl) if (ix >= t->size) { Uint sz; if (ix >= t->limit) { - erl_exit(1, "no more index entries in %s (max=%d)\n", + /* A core dump is unnecessary */ + erl_exit(ERTS_DUMP_EXIT, "no more index entries in %s (max=%d)\n", t->htable.name, t->limit); } sz = INDEX_PAGE_SIZE*sizeof(IndexSlot*); diff --git a/erts/emulator/beam/io.c b/erts/emulator/beam/io.c index 35b194f927..609fe9f5fb 100644 --- a/erts/emulator/beam/io.c +++ b/erts/emulator/beam/io.c @@ -1132,7 +1132,7 @@ int erts_write_to_port(Eterm caller_id, Port *p, Eterm list) Uint size; int fpe_was_unmasked; - ERTS_SMP_LC_ASSERT(erts_lc_is_port_locked(p)); + ERTS_SMP_LC_ASSERT(erts_lc_is_port_locked(p) || ERTS_IS_CRASH_DUMPING); ERTS_SMP_CHK_NO_PROC_LOCKS; p->caller = caller_id; @@ -5235,3 +5235,27 @@ erl_drv_getenv(char *key, char *value, size_t *value_size) { return erts_sys_getenv_raw(key, value, value_size); } + +/* get heart_port + * used by erl_crash_dump + * - uses the fact that heart_port is registered when starting heart + */ + +Port *erts_get_heart_port() { + + Port* port; + Uint ix; + + for(ix = 0; ix < erts_max_ports; ix++) { + port = &erts_port[ix]; + /* only examine undead or alive ports */ + if (port->status & ERTS_PORT_SFLGS_DEAD) + continue; + /* immediate atom compare */ + if (port->reg && port->reg->name == am_heart_port) { + return port; + } + } + + return NULL; +} diff --git a/erts/emulator/beam/sys.h b/erts/emulator/beam/sys.h index 2406c52f14..9dd8341520 100644 --- a/erts/emulator/beam/sys.h +++ b/erts/emulator/beam/sys.h @@ -652,7 +652,7 @@ void erts_sys_schedule_interrupt_timed(int set, erts_short_time_t msec); void erts_sys_main_thread(void); #endif -extern void erts_sys_prepare_crash_dump(void); +extern void erts_sys_prepare_crash_dump(int secs); extern void erts_sys_pre_init(void); extern void erl_sys_init(void); extern void erl_sys_args(int *argc, char **argv); diff --git a/erts/emulator/sys/unix/sys.c b/erts/emulator/sys/unix/sys.c index c1fa00b4ea..c485a4eece 100644 --- a/erts/emulator/sys/unix/sys.c +++ b/erts/emulator/sys/unix/sys.c @@ -58,7 +58,6 @@ #define __DARWIN__ 1 #endif - #ifdef USE_THREADS #include "erl_threads.h" #endif @@ -71,7 +70,6 @@ static erts_smp_rwmtx_t environ_rwmtx; #define MAX_VSIZE 16 /* Max number of entries allowed in an I/O * vector sock_sendv(). */ - /* * Don't need global.h, but bif_table.h (included by bif.h), * won't compile otherwise @@ -123,6 +121,15 @@ struct ErtsSysReportExit_ { #endif }; +/* This data is shared by these drivers - initialized by spawn_init() */ +static struct driver_data { + int port_num, ofd, packet_bytes; + ErtsSysReportExit *report_exit; + int pid; + int alive; + int status; +} *driver_data; /* indexed by fd */ + static ErtsSysReportExit *report_exit_list; #if CHLDWTHR && !defined(ERTS_SMP) static ErtsSysReportExit *report_exit_transit_list; @@ -680,17 +687,40 @@ static RETSIGTYPE break_handler(int sig) #endif /* 0 */ static ERTS_INLINE void -prepare_crash_dump(void) +prepare_crash_dump(int secs) { +#define NUFBUF (3) int i, max; char env[21]; /* enough to hold any 64-bit integer */ size_t envsz; + DeclareTmpHeapNoproc(heap,NUFBUF); + Port *heart_port; + Eterm *hp = heap; + Eterm list = NIL; + int heart_fd[2] = {-1,-1}; + + UseTmpHeapNoproc(NUFBUF); if (ERTS_PREPARED_CRASH_DUMP) return; /* We have already been called */ + heart_port = erts_get_heart_port(); + if (heart_port) { + /* hearts input fd + * We "know" drv_data is the in_fd since the port is started with read|write + */ + heart_fd[0] = (int)heart_port->drv_data; + heart_fd[1] = (int)driver_data[heart_fd[0]].ofd; + + list = CONS(hp, make_small(8), list); hp += 2; + + /* send to heart port, CMD = 8, i.e. prepare crash dump =o */ + erts_write_to_port(ERTS_INVALID_PID, heart_port, list); + } + /* Make sure we unregister at epmd (unknown fd) and get at least one free filedescriptor (for erl_crash.dump) */ + max = max_files; if (max < 1024) max = 1024; @@ -704,11 +734,15 @@ prepare_crash_dump(void) if (i == async_fd[0] || i == async_fd[1]) continue; #endif + /* We don't want to close our heart yet ... */ + if (i == heart_fd[0] || i == heart_fd[1]) + continue; + close(i); } envsz = sizeof(env); - i = erts_sys_getenv_raw("ERL_CRASH_DUMP_NICE", env, &envsz); + i = erts_sys_getenv__("ERL_CRASH_DUMP_NICE", env, &envsz); if (i >= 0) { int nice_val; nice_val = i != 0 ? 0 : atoi(env); @@ -717,21 +751,21 @@ prepare_crash_dump(void) } erts_silence_warn_unused_result(nice(nice_val)); } - - envsz = sizeof(env); - i = erts_sys_getenv_raw("ERL_CRASH_DUMP_SECONDS", env, &envsz); - if (i >= 0) { - unsigned sec; - sec = (unsigned) i != 0 ? 0 : atoi(env); - alarm(sec); - } + /* Positive secs means an alarm must be set + * 0 or negative means no alarm + */ + if (secs > 0) { + alarm((unsigned int)secs); + } + UnUseTmpHeapNoproc(NUFBUF); +#undef NUFBUF } void -erts_sys_prepare_crash_dump(void) +erts_sys_prepare_crash_dump(int secs) { - prepare_crash_dump(); + prepare_crash_dump(secs); } static ERTS_INLINE void @@ -773,7 +807,7 @@ sigusr1_exit(void) is hung somewhere, so it won't be able to poll any flag we set here. */ ERTS_SET_GOT_SIGUSR1; - prepare_crash_dump(); + prepare_crash_dump((int)0); erl_exit(1, "Received SIGUSR1\n"); } @@ -1021,15 +1055,6 @@ void fini_getenv_state(GETENV_STATE *state) #define ERTS_SYS_READ_BUF_SZ (64*1024) -/* This data is shared by these drivers - initialized by spawn_init() */ -static struct driver_data { - int port_num, ofd, packet_bytes; - ErtsSysReportExit *report_exit; - int pid; - int alive; - int status; -} *driver_data; /* indexed by fd */ - /* Driver interfaces */ static ErlDrvData spawn_start(ErlDrvPort, char*, SysDriverOpts*); static ErlDrvData fd_start(ErlDrvPort, char*, SysDriverOpts*); @@ -2417,6 +2442,15 @@ erts_sys_getenv_raw(char *key, char *value, size_t *size) { return erts_sys_getenv(key, value, size); } +/* + * erts_sys_getenv + * returns: + * -1, if environment key is not set with a value + * 0, if environment key is set and value fits into buffer size + * 1, if environment key is set but does not fit into buffer size + * size is set with the needed buffer size value + */ + int erts_sys_getenv(char *key, char *value, size_t *size) { diff --git a/erts/emulator/sys/win32/sys.c b/erts/emulator/sys/win32/sys.c index c4e748ed3c..50f6219374 100755 --- a/erts/emulator/sys/win32/sys.c +++ b/erts/emulator/sys/win32/sys.c @@ -256,10 +256,25 @@ void erl_sys_args(int* argc, char** argv) } void -erts_sys_prepare_crash_dump(void) +erts_sys_prepare_crash_dump(int secs) { + Port *heart_port; + Eterm heap[3]; + Eterm *hp = heap; + Eterm list = NIL; + + heart_port = erts_get_heart_port(); + + if (heart_port) { + + list = CONS(hp, make_small(8), list); hp += 2; + + /* send to heart port, CMD = 8, i.e. prepare crash dump =o */ + erts_write_to_port(NIL, heart_port, list); + } + /* Windows - free file descriptors are hopefully available */ - return; + /* Alarm not used on windows */ } static void diff --git a/erts/emulator/test/alloc_SUITE_data/bucket_mask.c b/erts/emulator/test/alloc_SUITE_data/bucket_mask.c index 13af7d861a..b214f87e4a 100644 --- a/erts/emulator/test/alloc_SUITE_data/bucket_mask.c +++ b/erts/emulator/test/alloc_SUITE_data/bucket_mask.c @@ -16,11 +16,19 @@ * $Id$ */ +#include "erl_int_sizes_config.h" + #include "testcase_driver.h" #include "allocator_test.h" #include <stdio.h> -#define SBCT (512*1024) +#ifdef __WIN32__ && SIZEOF_VOID_P == 8 +/* Use larger threashold for win64 as block alignment + is 16 bytes and not 8 */ +#define SBCT ((1024*1024)) +#else +#define SBCT ((512*1024)) +#endif char * testcase_name(void) diff --git a/erts/emulator/test/smoke_test_SUITE.erl b/erts/emulator/test/smoke_test_SUITE.erl index 98f1cf1ad5..71186d4942 100644 --- a/erts/emulator/test/smoke_test_SUITE.erl +++ b/erts/emulator/test/smoke_test_SUITE.erl @@ -51,7 +51,17 @@ end_per_group(_GroupName, Config) -> Config. +init_per_testcase(boot_combo = Case, Config) when is_list(Config) -> + case erlang:system_info(build_type) of + opt -> + init_per_tc(Case, Config); + _ -> + {skip,"Cannot test boot_combo in special builds since beam.* may not exist"} + end; init_per_testcase(Case, Config) when is_list(Config) -> + init_per_tc(Case, Config). + +init_per_tc(Case, Config) -> Dog = ?t:timetrap(?DEFAULT_TIMEOUT), [{testcase, Case},{watchdog, Dog}|Config]. @@ -121,18 +131,19 @@ start_node(Config) -> start_node(Config, ""). start_node(Config, Args) when is_list(Config) -> - ?line Pa = filename:dirname(code:which(?MODULE)), - ?line {A, B, C} = now(), - ?line Name = list_to_atom(atom_to_list(?MODULE) - ++ "-" - ++ atom_to_list(?config(testcase, Config)) - ++ "-" - ++ integer_to_list(A) - ++ "-" - ++ integer_to_list(B) - ++ "-" - ++ integer_to_list(C)), - ?line ?t:start_node(Name, slave, [{args, "-pa "++Pa++" "++Args}]). + Pa = filename:dirname(code:which(?MODULE)), + {A, B, C} = now(), + Name = list_to_atom(atom_to_list(?MODULE) + ++ "-" + ++ atom_to_list(?config(testcase, Config)) + ++ "-" + ++ integer_to_list(A) + ++ "-" + ++ integer_to_list(B) + ++ "-" + ++ integer_to_list(C)), + Opts = [{args, "-pa "++Pa++" "++Args}], + ?t:start_node(Name, slave, Opts). stop_node(Node) -> ?t:stop_node(Node). diff --git a/erts/etc/common/heart.c b/erts/etc/common/heart.c index 70c2b3bb23..ed75a8f256 100644 --- a/erts/etc/common/heart.c +++ b/erts/etc/common/heart.c @@ -137,7 +137,8 @@ # endif #endif -#define HEART_COMMAND_ENV "HEART_COMMAND" +#define HEART_COMMAND_ENV "HEART_COMMAND" +#define ERL_CRASH_DUMP_SECONDS_ENV "ERL_CRASH_DUMP_SECONDS" #define MSG_HDR_SIZE 2 #define MSG_HDR_PLUS_OP_SIZE 3 @@ -153,13 +154,14 @@ struct msg { }; /* operations */ -#define HEART_ACK 1 -#define HEART_BEAT 2 -#define SHUT_DOWN 3 -#define SET_CMD 4 -#define CLEAR_CMD 5 -#define GET_CMD 6 -#define HEART_CMD 7 +#define HEART_ACK (1) +#define HEART_BEAT (2) +#define SHUT_DOWN (3) +#define SET_CMD (4) +#define CLEAR_CMD (5) +#define GET_CMD (6) +#define HEART_CMD (7) +#define PREPARING_CRASH (8) /* Maybe interesting to change */ @@ -187,10 +189,11 @@ unsigned long heart_beat_kill_pid = 0; #define SOL_WD_TIMEOUT (heart_beat_timeout+heart_beat_boot_delay) /* reasons for reboot */ -#define R_TIMEOUT 1 -#define R_CLOSED 2 -#define R_ERROR 3 -#define R_SHUT_DOWN 4 +#define R_TIMEOUT (1) +#define R_CLOSED (2) +#define R_ERROR (3) +#define R_SHUT_DOWN (4) +#define R_CRASHING (5) /* Doing a crash dump and we will wait for it */ /* macros */ @@ -200,8 +203,8 @@ unsigned long heart_beat_kill_pid = 0; /* prototypes */ -static int message_loop(int,int); -static void do_terminate(int); +static int message_loop(int, int); +static void do_terminate(int, int); static int notify_ack(int); static int heart_cmd_reply(int, char *); static int write_message(int, struct msg *); @@ -212,6 +215,7 @@ static void print_error(const char *,...); static void debugf(const char *,...); static void init_timestamp(void); static time_t timestamp(time_t *); +static int wait_until_close_write_or_env_tmo(int); #ifdef __WIN32__ static BOOL enable_privilege(void); @@ -350,12 +354,14 @@ static void get_arguments(int argc, char** argv) { debugf("arguments -ht %d -wt %d -pid %lu\n",h,w,p); } -int -main(int argc, char **argv) -{ +int main(int argc, char **argv) { + + if (is_env_set("HEART_DEBUG")) { + fprintf(stderr, "heart: debug is ON!\r\n"); + debug_on = 1; + } + get_arguments(argc,argv); - if (is_env_set("HEART_DEBUG")) - debug_on=1; #ifdef __WIN32__ if (debug_on) { if(!is_env_set("ERLSRV_SERVICE_NAME")) { @@ -376,7 +382,7 @@ main(int argc, char **argv) program_name[sizeof(program_name)-1] = '\0'; notify_ack(erlout_fd); cmd[0] = '\0'; - do_terminate(message_loop(erlin_fd,erlout_fd)); + do_terminate(erlin_fd,message_loop(erlin_fd,erlout_fd)); return 0; } @@ -410,6 +416,7 @@ message_loop(erlin_fd, erlout_fd) #endif while (1) { + /* REFACTOR: below to select/tmo function */ #ifdef __WIN32__ wresult = WaitForSingleObject(hevent_dataready,SELECT_TIMEOUT*1000+ 2); if (wresult == WAIT_FAILED) { @@ -504,6 +511,10 @@ message_loop(erlin_fd, erlout_fd) free_env_val(env); } break; + case PREPARING_CRASH: + /* Erlang has reached a crushdump point (is crashing for sure) */ + print_error("Erlang is crashing .. (waiting for crash dump file)"); + return R_CRASHING; default: /* ignore all other messages */ break; @@ -635,72 +646,130 @@ void win_system(char *command) * do_terminate */ static void -do_terminate(reason) - int reason; -{ +do_terminate(int erlin_fd, int reason) { /* When we get here, we have HEART_BEAT_BOOT_DELAY secs to finish (plus heart_beat_report_delay if under VxWorks), so we don't need to call wd_reset(). */ - + int ret = 0, tmo=0; + char *tmo_env; + switch (reason) { case R_SHUT_DOWN: break; + case R_CRASHING: + if (is_env_set(ERL_CRASH_DUMP_SECONDS_ENV)) { + tmo_env = get_env(ERL_CRASH_DUMP_SECONDS_ENV); + tmo = atoi(tmo_env); + print_error("Waiting for dump - timeout set to %d seconds.", tmo); + wait_until_close_write_or_env_tmo(tmo); + free_env_val(tmo_env); + } + /* fall through */ case R_TIMEOUT: - case R_ERROR: case R_CLOSED: + case R_ERROR: default: -#if defined(__WIN32__) /* Not VxWorks */ { - if(!cmd[0]) { - char *command = get_env(HEART_COMMAND_ENV); - if(!command) - print_error("Would reboot. Terminating."); - else { - kill_old_erlang(); - /* High prio combined with system() works badly indeed... */ - SetPriorityClass(GetCurrentProcess(), NORMAL_PRIORITY_CLASS); - win_system(command); - print_error("Executed \"%s\". Terminating.",command); +#if defined(__WIN32__) /* Not VxWorks */ + if(!cmd[0]) { + char *command = get_env(HEART_COMMAND_ENV); + if(!command) + print_error("Would reboot. Terminating."); + else { + kill_old_erlang(); + /* High prio combined with system() works badly indeed... */ + SetPriorityClass(GetCurrentProcess(), NORMAL_PRIORITY_CLASS); + win_system(command); + print_error("Executed \"%s\". Terminating.",command); + } + free_env_val(command); + } else { + kill_old_erlang(); + /* High prio combined with system() works badly indeed... */ + SetPriorityClass(GetCurrentProcess(), NORMAL_PRIORITY_CLASS); + win_system(&cmd[0]); + print_error("Executed \"%s\". Terminating.",cmd); } - free_env_val(command); - } - else { - kill_old_erlang(); - /* High prio combined with system() works badly indeed... */ - SetPriorityClass(GetCurrentProcess(), NORMAL_PRIORITY_CLASS); - win_system(&cmd[0]); - print_error("Executed \"%s\". Terminating.",cmd); - } - } - #else - { - if(!cmd[0]) { - char *command = get_env(HEART_COMMAND_ENV); - if(!command) - print_error("Would reboot. Terminating."); - else { - kill_old_erlang(); - /* suppress gcc warning with 'if' */ - if(system(command)); - print_error("Executed \"%s\". Terminating.",command); + if(!cmd[0]) { + char *command = get_env(HEART_COMMAND_ENV); + if(!command) + print_error("Would reboot. Terminating."); + else { + kill_old_erlang(); + /* suppress gcc warning with 'if' */ + ret = system(command); + print_error("Executed \"%s\" -> %d. Terminating.",command, ret); + } + free_env_val(command); + } else { + kill_old_erlang(); + /* suppress gcc warning with 'if' */ + ret = system((char*)&cmd[0]); + print_error("Executed \"%s\" -> %d. Terminating.",cmd, ret); } - free_env_val(command); - } - else { - kill_old_erlang(); - /* suppress gcc warning with 'if' */ - if(system((char*)&cmd[0])); - print_error("Executed \"%s\". Terminating.",cmd); - } +#endif } break; -#endif } /* switch(reason) */ } + +/* Waits until something happens on socket or handle + * + * Uses global variables erlin_fd or hevent_dataready + */ +int wait_until_close_write_or_env_tmo(int tmo) { + int i = 0; + +#ifdef __WIN32__ + DWORD wresult; + DWORD wtmo = INFINITE; + + if (tmo >= 0) { + wtmo = tmo*1000 + 2; + } + + wresult = WaitForSingleObject(hevent_dataready, wtmo); + if (wresult == WAIT_FAILED) { + print_last_error(); + return -1; + } + + if (wresult == WAIT_TIMEOUT) { + debugf("wait timed out\n"); + i = 0; + } else { + debugf("wait ok\n"); + i = 1; + } +#else + fd_set read_fds; + int max_fd; + struct timeval timeout; + struct timeval *tptr = NULL; + + max_fd = erlin_fd; /* global */ + + if (tmo >= 0) { + timeout.tv_sec = tmo; /* On Linux timeout is modified by select */ + timeout.tv_usec = 0; + tptr = &timeout; + } + + FD_ZERO(&read_fds); + FD_SET(erlin_fd, &read_fds); + if ((i = select(max_fd + 1, &read_fds, NULLFDS, NULLFDS, tptr)) < 0) { + print_error("error in select."); + return -1; + } +#endif + return i; +} + + /* * notify_ack * @@ -891,12 +960,13 @@ debugf(const char *format,...) { va_list args; - if (!debug_on) return; - va_start(args, format); - fprintf(stderr, "Heart: "); - vfprintf(stderr, format, args); - va_end(args); - fprintf(stderr, "\r\n"); + if (debug_on) { + va_start(args, format); + fprintf(stderr, "Heart: "); + vfprintf(stderr, format, args); + va_end(args); + fprintf(stderr, "\r\n"); + } } #ifdef __WIN32__ diff --git a/erts/etc/win32/nsis/Makefile b/erts/etc/win32/nsis/Makefile index f377a68c69..a53ac708f8 100644 --- a/erts/etc/win32/nsis/Makefile +++ b/erts/etc/win32/nsis/Makefile @@ -91,7 +91,7 @@ release_spec: fi;\ if [ '!' -z "$(REDIST_FILE)" -a '!' -z "$(REDIST_DLL_VERSION)" ];\ then \ - cp $(REDIST_FILE) "$(RELEASE_PATH)/$(REDIST_TARGET);"\ + cp $(REDIST_FILE) "$(RELEASE_PATH)/$(REDIST_TARGET)";\ echo '!define HAVE_REDIST_FILE 1' >> $(VERSION_HEADER); \ echo '!define REDIST_DLL_VERSION "$(REDIST_DLL_VERSION)"' >> $(VERSION_HEADER);\ echo '!define REDIST_DLL_NAME "$(REDIST_DLL_NAME)"' >> $(VERSION_HEADER);\ diff --git a/lib/dialyzer/src/dialyzer.erl b/lib/dialyzer/src/dialyzer.erl index 3e3c12405f..99388438b1 100644 --- a/lib/dialyzer/src/dialyzer.erl +++ b/lib/dialyzer/src/dialyzer.erl @@ -162,14 +162,17 @@ run(Opts) -> {error, Msg} -> throw({dialyzer_error, Msg}); OptsRecord -> - case cl_check_init(OptsRecord) of - {ok, ?RET_NOTHING_SUSPICIOUS} -> - case dialyzer_cl:start(OptsRecord) of - {?RET_DISCREPANCIES, Warnings} -> Warnings; - {?RET_NOTHING_SUSPICIOUS, []} -> [] - end; - {error, ErrorMsg1} -> - throw({dialyzer_error, ErrorMsg1}) + case OptsRecord#options.check_plt of + true -> + case cl_check_init(OptsRecord) of + {ok, ?RET_NOTHING_SUSPICIOUS} -> ok; + {error, ErrorMsg1} -> throw({dialyzer_error, ErrorMsg1}) + end; + false -> ok + end, + case dialyzer_cl:start(OptsRecord) of + {?RET_DISCREPANCIES, Warnings} -> Warnings; + {?RET_NOTHING_SUSPICIOUS, []} -> [] end catch throw:{dialyzer_error, ErrorMsg} -> diff --git a/lib/dialyzer/src/dialyzer_analysis_callgraph.erl b/lib/dialyzer/src/dialyzer_analysis_callgraph.erl index c237d4e0e9..86618a4915 100644 --- a/lib/dialyzer/src/dialyzer_analysis_callgraph.erl +++ b/lib/dialyzer/src/dialyzer_analysis_callgraph.erl @@ -326,13 +326,6 @@ cleanup_callgraph(#analysis_state{plt = InitPlt, parent = Parent, ModuleDeps = dialyzer_callgraph:module_deps(Callgraph), send_mod_deps(Parent, ModuleDeps), {Callgraph1, ExtCalls} = dialyzer_callgraph:remove_external(Callgraph), - RelevantAPICalls = - dialyzer_behaviours:get_behaviour_apis([gen_server]), - BehaviourAPICalls = [Call || {_From, To} = Call <- ExtCalls, - lists:member(To, RelevantAPICalls)], - Callgraph2 = - dialyzer_callgraph:put_behaviour_api_calls(BehaviourAPICalls, - Callgraph1), ExtCalls1 = [Call || Call = {_From, To} <- ExtCalls, not dialyzer_plt:contains_mfa(InitPlt, To)], {BadCalls1, RealExtCalls} = @@ -355,7 +348,7 @@ cleanup_callgraph(#analysis_state{plt = InitPlt, parent = Parent, true -> send_ext_calls(Parent, lists:usort([To || {_From, To} <- RealExtCalls])) end, - Callgraph2. + Callgraph1. compile_src(File, Includes, Defines, Callgraph, CServer, UseContracts) -> DefaultIncludes = default_includes(filename:dirname(File)), diff --git a/lib/dialyzer/src/dialyzer_behaviours.erl b/lib/dialyzer/src/dialyzer_behaviours.erl index b84071b95c..36aef2a37f 100644 --- a/lib/dialyzer/src/dialyzer_behaviours.erl +++ b/lib/dialyzer/src/dialyzer_behaviours.erl @@ -30,11 +30,9 @@ -module(dialyzer_behaviours). --export([check_callbacks/5, get_behaviour_apis/1, - translate_behaviour_api_call/5, translatable_behaviours/1, - translate_callgraph/3]). +-export([check_callbacks/5]). --export_type([behaviour/0, behaviour_api_dict/0]). +-export_type([behaviour/0]). %%-------------------------------------------------------------------- @@ -224,103 +222,3 @@ get_line([]) -> -1. get_file([{file, File}|_]) -> File; get_file([_|Tail]) -> get_file(Tail). - -%%----------------------------------------------------------------------------- - --spec translatable_behaviours(cerl:c_module()) -> behaviour_api_dict(). - -translatable_behaviours(Tree) -> - Attrs = cerl:module_attrs(Tree), - {Behaviours, _BehLines} = get_behaviours(Attrs), - [{B, Calls} || B <- Behaviours, (Calls = behaviour_api_calls(B)) =/= []]. - --spec get_behaviour_apis([behaviour()]) -> [mfa()]. - -get_behaviour_apis(Behaviours) -> - get_behaviour_apis(Behaviours, []). - --spec translate_behaviour_api_call(dialyzer_callgraph:mfa_or_funlbl(), - [erl_types:erl_type()], - [dialyzer_races:core_vars()], - module(), - behaviour_api_dict()) -> - {dialyzer_callgraph:mfa_or_funlbl(), - [erl_types:erl_type()], - [dialyzer_races:core_vars()]} - | 'plain_call'. - -translate_behaviour_api_call(_Fun, _ArgTypes, _Args, _Module, []) -> - plain_call; -translate_behaviour_api_call({Module, Fun, Arity}, ArgTypes, Args, - CallbackModule, BehApiInfo) -> - case lists:keyfind(Module, 1, BehApiInfo) of - false -> plain_call; - {Module, Calls} -> - case lists:keyfind({Fun, Arity}, 1, Calls) of - false -> plain_call; - {{Fun, Arity}, {CFun, CArity, COrder}} -> - {{CallbackModule, CFun, CArity}, - [nth_or_0(N, ArgTypes, erl_types:t_any()) || N <-COrder], - [nth_or_0(N, Args, bypassed) || N <-COrder]} - end - end; -translate_behaviour_api_call(_Fun, _ArgTypes, _Args, _Module, _BehApiInfo) -> - plain_call. - --spec translate_callgraph(behaviour_api_dict(), atom(), - dialyzer_callgraph:callgraph()) -> - dialyzer_callgraph:callgraph(). - -translate_callgraph([{Behaviour,_}|Behaviours], Module, Callgraph) -> - UsedCalls = [Call || {_From, {M, _F, _A}} = Call <- - dialyzer_callgraph:get_behaviour_api_calls(Callgraph), - M =:= Behaviour], - Calls = [{{Behaviour, API, Arity}, Callback} || - {{API, Arity}, Callback} <- behaviour_api_calls(Behaviour)], - DirectCalls = [{From, {Module, Fun, Arity}} || - {From, To} <- UsedCalls,{API, {Fun, Arity, _Ord}} <- Calls, - To =:= API], - dialyzer_callgraph:add_edges(DirectCalls, Callgraph), - translate_callgraph(Behaviours, Module, Callgraph); -translate_callgraph([], _Module, Callgraph) -> - Callgraph. - -get_behaviour_apis([], Acc) -> - Acc; -get_behaviour_apis([Behaviour | Rest], Acc) -> - MFAs = [{Behaviour, Fun, Arity} || - {{Fun, Arity}, _} <- behaviour_api_calls(Behaviour)], - get_behaviour_apis(Rest, MFAs ++ Acc). - -%------------------------------------------------------------------------------ - -nth_or_0(0, _List, Zero) -> - Zero; -nth_or_0(N, List, _Zero) -> - lists:nth(N, List). - -%------------------------------------------------------------------------------ - --type behaviour_api_dict()::[{behaviour(), behaviour_api_info()}]. --type behaviour_api_info()::[{original_fun(), replacement_fun()}]. --type original_fun()::{atom(), arity()}. --type replacement_fun()::{atom(), arity(), arg_list()}. --type arg_list()::[byte()]. - --spec behaviour_api_calls(behaviour()) -> behaviour_api_info(). - -behaviour_api_calls(gen_server) -> - [{{start_link, 3}, {init, 1, [2]}}, - {{start_link, 4}, {init, 1, [3]}}, - {{start, 3}, {init, 1, [2]}}, - {{start, 4}, {init, 1, [3]}}, - {{call, 2}, {handle_call, 3, [2, 0, 0]}}, - {{call, 3}, {handle_call, 3, [2, 0, 0]}}, - {{multi_call, 2}, {handle_call, 3, [2, 0, 0]}}, - {{multi_call, 3}, {handle_call, 3, [3, 0, 0]}}, - {{multi_call, 4}, {handle_call, 3, [3, 0, 0]}}, - {{cast, 2}, {handle_cast, 2, [2, 0]}}, - {{abcast, 2}, {handle_cast, 2, [2, 0]}}, - {{abcast, 3}, {handle_cast, 2, [3, 0]}}]; -behaviour_api_calls(_Other) -> - []. diff --git a/lib/dialyzer/src/dialyzer_dataflow.erl b/lib/dialyzer/src/dialyzer_dataflow.erl index 7131633da1..0ef008bc58 100644 --- a/lib/dialyzer/src/dialyzer_dataflow.erl +++ b/lib/dialyzer/src/dialyzer_dataflow.erl @@ -91,9 +91,8 @@ warning_mode = false :: boolean(), warnings = [] :: [dial_warning()], work :: {[_], [_], set()}, - module :: module(), - behaviour_api_dict = [] :: - dialyzer_behaviours:behaviour_api_dict()}). + module :: module() + }). -record(map, {dict = dict:new() :: dict(), subst = dict:new() :: dict(), @@ -135,38 +134,15 @@ get_fun_types(Tree, Plt, Callgraph, Records) -> analyze_module(Tree, Plt, Callgraph, Records, GetWarnings) -> debug_pp(Tree, false), Module = cerl:atom_val(cerl:module_name(Tree)), - RaceDetection = dialyzer_callgraph:get_race_detection(Callgraph), - BehaviourTranslations = - case RaceDetection of - true -> dialyzer_behaviours:translatable_behaviours(Tree); - false -> [] - end, TopFun = cerl:ann_c_fun([{label, top}], [], Tree), - State = - state__new(Callgraph, TopFun, Plt, Module, Records, BehaviourTranslations), + State = state__new(Callgraph, TopFun, Plt, Module, Records), State1 = state__race_analysis(not GetWarnings, State), State2 = analyze_loop(State1), case GetWarnings of true -> State3 = state__set_warning_mode(State2), State4 = analyze_loop(State3), - - %% EXPERIMENTAL: Turn all behaviour API calls into calls to the - %% respective callback module's functions. - - case BehaviourTranslations of - [] -> dialyzer_races:race(State4); - Behaviours -> - Digraph = dialyzer_callgraph:get_digraph(State4#state.callgraph), - TranslatedCallgraph = - dialyzer_behaviours:translate_callgraph(Behaviours, Module, - Callgraph), - St = - dialyzer_races:race(State4#state{callgraph = TranslatedCallgraph}), - FinalCallgraph = dialyzer_callgraph:put_digraph(Digraph, - St#state.callgraph), - St#state{callgraph = FinalCallgraph} - end; + dialyzer_races:race(State4); false -> State2 end. @@ -530,21 +506,8 @@ handle_apply_or_call([{TypeOfApply, {Fun, Sig, Contr, LocalRet}}|Left], Ann = cerl:get_ann(Tree), File = get_file(Ann), Line = abs(get_line(Ann)), - - %% EXPERIMENTAL: Turn a behaviour's API call into a call to the - %% respective callback module's function. - - Module = State#state.module, - BehApiDict = State#state.behaviour_api_dict, - {RealFun, RealArgTypes, RealArgs} = - case dialyzer_behaviours:translate_behaviour_api_call(Fun, ArgTypes, - Args, Module, - BehApiDict) of - plain_call -> {Fun, ArgTypes, Args}; - BehaviourAPI -> BehaviourAPI - end, - dialyzer_races:store_race_call(RealFun, RealArgTypes, RealArgs, - {File, Line}, State); + dialyzer_races:store_race_call(Fun, ArgTypes, Args, + {File, Line}, State); false -> State end, FailedConj = any_none([RetWithoutLocal|NewArgTypes]), @@ -2711,7 +2674,7 @@ determine_mode(Type, Opaques) -> %%% %%% =========================================================================== -state__new(Callgraph, Tree, Plt, Module, Records, BehaviourTranslations) -> +state__new(Callgraph, Tree, Plt, Module, Records) -> Opaques = erl_types:module_builtin_opaques(Module) ++ erl_types:t_opaque_from_records(Records), TreeMap = build_tree_map(Tree), @@ -2725,7 +2688,7 @@ state__new(Callgraph, Tree, Plt, Module, Records, BehaviourTranslations) -> #state{callgraph = Callgraph, envs = Env, fun_tab = FunTab, opaques = Opaques, plt = Plt, races = dialyzer_races:new(), records = Records, warning_mode = false, warnings = [], work = Work, tree_map = TreeMap, - module = Module, behaviour_api_dict = BehaviourTranslations}. + module = Module}. state__warning_mode(#state{warning_mode = WM}) -> WM. diff --git a/lib/dialyzer/src/dialyzer_races.erl b/lib/dialyzer/src/dialyzer_races.erl index cdb9f25999..2aa8343bce 100644 --- a/lib/dialyzer/src/dialyzer_races.erl +++ b/lib/dialyzer/src/dialyzer_races.erl @@ -1758,7 +1758,10 @@ compare_var_list(Var, VarList, RaceVarMap) -> ets_list_args(MaybeList) -> case is_list(MaybeList) of - true -> [ets_tuple_args(T) || T <- MaybeList]; + true -> + try [ets_tuple_args(T) || T <- MaybeList] + catch _:_ -> [?no_label] + end; false -> [ets_tuple_args(MaybeList)] end. diff --git a/lib/dialyzer/test/race_SUITE_data/results/ets_insert_args10 b/lib/dialyzer/test/race_SUITE_data/results/ets_insert_args10 new file mode 100644 index 0000000000..c3c9b12bdd --- /dev/null +++ b/lib/dialyzer/test/race_SUITE_data/results/ets_insert_args10 @@ -0,0 +1,2 @@ + +ets_insert_args10.erl:9: The call ets:insert(T::'foo',[{'counter',number()},...]) might have an unintended effect due to a possible race condition caused by its combination with the ets:lookup(T::'foo','counter') call in ets_insert_args10.erl on line 8 diff --git a/lib/dialyzer/test/race_SUITE_data/src/ets_insert_args10.erl b/lib/dialyzer/test/race_SUITE_data/src/ets_insert_args10.erl new file mode 100644 index 0000000000..c897a34af0 --- /dev/null +++ b/lib/dialyzer/test/race_SUITE_data/src/ets_insert_args10.erl @@ -0,0 +1,19 @@ +%% This tests the presence of possible races due to an ets:lookup/ets:insert +%% combination. It takes into account the argument types of the calls. + +-module(ets_insert_args10). +-export([start/0]). + +start() -> + F = fun(T) -> [{_, N}] = ets:lookup(T, counter), + ets:insert(T, [{counter, N+1}]) + end, + io:format("Created ~w\n", [ets:new(foo, [named_table, public])]), + A = {counter, 0}, + B = [], + ets:insert(foo, [A|B]), + io:format("Inserted ~w\n", [{counter, 0}]), + F(foo), + io:format("Update complete\n", []), + ObjectList = ets:lookup(foo, counter), + io:format("Counter: ~w\n", [ObjectList]). diff --git a/lib/dialyzer/test/small_SUITE_data/results/contracts_with_subtypes b/lib/dialyzer/test/small_SUITE_data/results/contracts_with_subtypes index 8dc0361b0d..4850f3ff0c 100644 --- a/lib/dialyzer/test/small_SUITE_data/results/contracts_with_subtypes +++ b/lib/dialyzer/test/small_SUITE_data/results/contracts_with_subtypes @@ -6,7 +6,7 @@ contracts_with_subtypes.erl:109: The call contracts_with_subtypes:rec_arg({'b',{ contracts_with_subtypes.erl:110: The call contracts_with_subtypes:rec_arg({'a',{'b',{'a','b'}}}) breaks the contract (Arg) -> 'ok' when is_subtype(Arg,{'a',A} | {'b',B}), is_subtype(A,'a' | {'b',B}), is_subtype(B,'b' | {'a',A}) contracts_with_subtypes.erl:111: The call contracts_with_subtypes:rec_arg({'b',{'a',{'b','a'}}}) breaks the contract (Arg) -> 'ok' when is_subtype(Arg,{'a',A} | {'b',B}), is_subtype(A,'a' | {'b',B}), is_subtype(B,'b' | {'a',A}) contracts_with_subtypes.erl:142: The pattern 1 can never match the type binary() | string() -contracts_with_subtypes.erl:145: The pattern 'alpha' can never match the type {'ok',X} | {'ok',X,binary() | string()} +contracts_with_subtypes.erl:145: The pattern 'alpha' can never match the type {'ok',_} | {'ok',_,binary() | string()} contracts_with_subtypes.erl:147: The pattern 42 can never match the type {'ok',_} | {'ok',_,binary() | string()} contracts_with_subtypes.erl:163: The pattern 'alpha' can never match the type {'ok',X} contracts_with_subtypes.erl:165: The pattern 42 can never match the type {'ok',X} diff --git a/lib/dialyzer/test/small_SUITE_data/src/remote_tuple_set.erl b/lib/dialyzer/test/small_SUITE_data/src/remote_tuple_set.erl new file mode 100644 index 0000000000..6c440ed04c --- /dev/null +++ b/lib/dialyzer/test/small_SUITE_data/src/remote_tuple_set.erl @@ -0,0 +1,8 @@ +-module(remote_tuple_set). + +-export([parse_cidr/0]). + +-spec parse_cidr() -> {inet:address_family(),1,2} | {error}. + +parse_cidr() -> + {inet,1,2}. diff --git a/lib/hipe/cerl/erl_types.erl b/lib/hipe/cerl/erl_types.erl index 1579735773..bc7ea17077 100644 --- a/lib/hipe/cerl/erl_types.erl +++ b/lib/hipe/cerl/erl_types.erl @@ -687,8 +687,8 @@ t_solve_remote(?tuple(Types, _Arity, _Tag), ET, R, C) -> {RL, RR} = list_solve_remote(Types, ET, R, C), {t_tuple(RL), RR}; t_solve_remote(?tuple_set(Set), ET, R, C) -> - {NewSet, RR} = tuples_solve_remote(Set, ET, R, C), - {?tuple_set(NewSet), RR}; + {NewTuples, RR} = tuples_solve_remote(Set, ET, R, C), + {t_sup(NewTuples), RR}; t_solve_remote(?remote(Set), ET, R, C) -> RemoteList = ordsets:to_list(Set), {RL, RR} = list_solve_remote_type(RemoteList, ET, R, C), @@ -788,10 +788,10 @@ opaques_solve_remote([#opaque{struct = Struct} = Remote|Tail], ET, R, C) -> tuples_solve_remote([], _ET, _R, _C) -> {[], []}; -tuples_solve_remote([{Sz, Tuples}|Tail], ET, R, C) -> +tuples_solve_remote([{_Sz, Tuples}|Tail], ET, R, C) -> {RL, RR1} = list_solve_remote(Tuples, ET, R, C), {LSzTpls, RR2} = tuples_solve_remote(Tail, ET, R, C), - {[{Sz, RL}|LSzTpls], RR1 ++ RR2}. + {RL ++ LSzTpls, RR1 ++ RR2}. %%----------------------------------------------------------------------------- %% Unit type. Signals non termination. diff --git a/lib/kernel/doc/src/heart.xml b/lib/kernel/doc/src/heart.xml index 26d1e27822..2826d3d00a 100644 --- a/lib/kernel/doc/src/heart.xml +++ b/lib/kernel/doc/src/heart.xml @@ -71,6 +71,39 @@ timeout and try to reboot the system. This can happen, for example, if the system clock is adjusted automatically by use of NTP (Network Time Protocol).</p> + + <p> If a crash occurs, an <c><![CDATA[erl_crash.dump]]></c> will <em>not</em> be written + unless the environment variable <c><![CDATA[ERL_CRASH_DUMP_SECONDS]]></c> is set. + </p> + + <pre> +% <input>erl -heart -env ERL_CRASH_DUMP_SECONDS 10 ...</input></pre> + <p> + Furthermore, <c><![CDATA[ERL_CRASH_DUMP_SECONDS]]></c> has the following behaviour on + <c>heart</c>: + </p> + <taglist> + <tag><c><![CDATA[ERL_CRASH_DUMP_SECONDS=0]]></c></tag> + <item><p> + Suppresses the writing a crash dump file entirely, + thus rebooting the runtime system immediately. + This is the same as not setting the environment variable. + </p> + </item> + <tag><c><![CDATA[ERL_CRASH_DUMP_SECONDS=-1]]></c></tag> + <item><p> Setting the environment variable to a negative value will not reboot + the runtime system until the crash dump file has been completly written. + </p> + </item> + <tag><c><![CDATA[ERL_CRASH_DUMP_SECONDS=S]]></c></tag> + <item><p> + Heart will wait for <c>S</c> seconds to let the crash dump file be written. + After <c>S</c> seconds <c>heart</c> will reboot the runtime system regardless of + the crash dump file has been written or not. + </p> + </item> + </taglist> + <p>In the following descriptions, all function fails with reason <c>badarg</c> if <c>heart</c> is not started.</p> </description> diff --git a/lib/kernel/src/heart.erl b/lib/kernel/src/heart.erl index 28452a377e..de287bfa43 100644 --- a/lib/kernel/src/heart.erl +++ b/lib/kernel/src/heart.erl @@ -42,6 +42,7 @@ -define(CLEAR_CMD, 5). -define(GET_CMD, 6). -define(HEART_CMD, 7). +-define(PREPARING_CRASH, 8). % Used in beam vm -define(TIMEOUT, 5000). -define(CYCLE_TIMEOUT, 10000). @@ -130,6 +131,8 @@ start_portprogram() -> Port when is_port(Port) -> case wait_ack(Port) of ok -> + %% register port so the vm can find it if need be + register(heart_port, Port), {ok, Port}; {error, Reason} -> report_problem({{port_problem, Reason}, diff --git a/lib/kernel/test/heart_SUITE.erl b/lib/kernel/test/heart_SUITE.erl index 233e438dc9..e64d2914c4 100644 --- a/lib/kernel/test/heart_SUITE.erl +++ b/lib/kernel/test/heart_SUITE.erl @@ -22,7 +22,10 @@ -export([all/0, suite/0,groups/0,init_per_suite/1, end_per_suite/1, init_per_group/2,end_per_group/2, start/1, restart/1, - reboot/1, set_cmd/1, clear_cmd/1, get_cmd/1, + reboot/1, + node_start_immediately_after_crash/1, + node_start_soon_after_crash/1, + set_cmd/1, clear_cmd/1, get_cmd/1, dont_drop/1, kill_pid/1]). -export([init_per_testcase/2, end_per_testcase/2]). @@ -38,15 +41,15 @@ init_per_testcase(_Func, Config) -> end_per_testcase(_Func, Config) -> Nodes = nodes(), lists:foreach(fun(X) -> - NNam = list_to_atom(hd(string:tokens(atom_to_list(X),"@"))), - case NNam of - heart_test -> - ?t:format(1, "WARNING: Killed ~p~n", [X]), - rpc:cast(X, erlang, halt, []); - _ -> - ok - end - end, Nodes), + NNam = list_to_atom(hd(string:tokens(atom_to_list(X),"@"))), + case NNam of + heart_test -> + ?t:format(1, "WARNING: Killed ~p~n", [X]), + rpc:cast(X, erlang, halt, []); + _ -> + ok + end + end, Nodes), Dog=?config(watchdog, Config), test_server:timetrap_cancel(Dog). @@ -57,8 +60,13 @@ end_per_testcase(_Func, Config) -> %%----------------------------------------------------------------- suite() -> [{ct_hooks,[ts_install_cth]}]. -all() -> - [start, restart, reboot, set_cmd, clear_cmd, get_cmd, kill_pid]. +all() -> [ + start, restart, reboot, + node_start_immediately_after_crash, + node_start_soon_after_crash, + set_cmd, clear_cmd, get_cmd, + kill_pid + ]. groups() -> []. @@ -80,17 +88,22 @@ init_per_suite(Config) when is_list(Config) -> end_per_suite(Config) when is_list(Config) -> Config. + start_check(Type, Name) -> + start_check(Type, Name, []). +start_check(Type, Name, Envs) -> Args = case ?t:os_type() of - {win32,_} -> "-heart -env HEART_COMMAND no_reboot"; - _ -> "-heart" - end, + {win32,_} -> + "-heart " ++ env_encode([{"HEART_COMMAND", no_reboot}|Envs]); + _ -> + "-heart " ++ env_encode(Envs) + end, {ok, Node} = case Type of - loose -> - loose_node:start(Name, Args, ?DEFAULT_TIMEOUT_SECS); - _ -> - ?t:start_node(Name, Type, [{args, Args}]) - end, + loose -> + loose_node:start(Name, Args, ?DEFAULT_TIMEOUT_SECS); + _ -> + ?t:start_node(Name, Type, [{args, Args}]) + end, erlang:monitor_node(Node, true), case rpc:call(Node, erlang, whereis, [heart]) of Pid when is_pid(Pid) -> @@ -103,21 +116,19 @@ start_check(Type, Name) -> start(doc) -> []; start(suite) -> {req, [{time, 10}]}; start(Config) when is_list(Config) -> - ?line {ok, Node} = start_check(slave, heart_test), - ?line rpc:call(Node, init, reboot, []), + {ok, Node} = start_check(slave, heart_test), + rpc:call(Node, init, reboot, []), receive - {nodedown, Node} -> - ok - after 2000 -> - test_server:fail(node_not_closed) + {nodedown, Node} -> ok + after 2000 -> test_server:fail(node_not_closed) end, test_server:sleep(5000), - ?line case net_adm:ping(Node) of - pang -> - ok; - _ -> - test_server:fail(node_rebooted) - end, + case net_adm:ping(Node) of + pang -> + ok; + _ -> + test_server:fail(node_rebooted) + end, test_server:stop_node(Node). %% Also test fixed bug in R1B (it was not possible to @@ -125,6 +136,10 @@ start(Config) when is_list(Config) -> %% Slave executes erlang:halt() on master nodedown. %% Therefore the slave process has to be killed %% before restart. + +%% restart +%% Purpose: +%% Check that a node is up and running after a init:restart/0 restart(doc) -> []; restart(suite) -> case ?t:os_type() of @@ -134,8 +149,8 @@ restart(suite) -> {skip, "Only run on unix and win32"} end; restart(Config) when is_list(Config) -> - ?line {ok, Node} = start_check(loose, heart_test), - ?line rpc:call(Node, init, restart, []), + {ok, Node} = start_check(loose, heart_test), + rpc:call(Node, init, restart, []), receive {nodedown, Node} -> ok @@ -143,32 +158,21 @@ restart(Config) when is_list(Config) -> test_server:fail(node_not_closed) end, test_server:sleep(5000), - - ?line case net_adm:ping(Node) of - pong -> - erlang:monitor_node(Node, true), - ?line rpc:call(Node, init, stop, []), - receive - {nodedown, Node} -> - ok - after 2000 -> - test_server:fail(node_not_closed2) - end, - ok; - _ -> - test_server:fail(node_not_restarted) - end, + node_check_up_down(Node, 2000), loose_node:stop(Node). +%% reboot +%% Purpose: +%% Check that a node is up and running after a init:reboot/0 reboot(doc) -> []; reboot(suite) -> {req, [{time, 10}]}; reboot(Config) when is_list(Config) -> {ok, Node} = start_check(slave, heart_test), - ?line ok = rpc:call(Node, heart, set_cmd, + ok = rpc:call(Node, heart, set_cmd, [atom_to_list(lib:progname()) ++ " -noshell -heart " ++ name(Node) ++ "&"]), - ?line rpc:call(Node, init, reboot, []), + rpc:call(Node, init, reboot, []), receive {nodedown, Node} -> ok @@ -176,44 +180,119 @@ reboot(Config) when is_list(Config) -> test_server:fail(node_not_closed) end, test_server:sleep(5000), - ?line case net_adm:ping(Node) of - pong -> - erlang:monitor_node(Node, true), - ?line rpc:call(Node, init, reboot, []), - receive - {nodedown, Node} -> - ok - after 2000 -> - test_server:fail(node_not_closed2) - end, - ok; - _ -> - test_server:fail(node_not_rebooted) - end, + node_check_up_down(Node, 2000), ok. +%% node_start_immediately_after_crash +%% Purpose: +%% Check that a node is up and running after a crash. +%% This test exhausts the atom table on the remote node. +%% ERL_CRASH_DUMP_SECONDS=0 will force beam not to dump an erl_crash.dump. +node_start_immediately_after_crash(suite) -> {req, [{time, 10}]}; +node_start_immediately_after_crash(Config) when is_list(Config) -> + {ok, Node} = start_check(loose, heart_test_imm, [{"ERL_CRASH_DUMP_SECONDS", "0"}]), + + ok = rpc:call(Node, heart, set_cmd, + [atom_to_list(lib:progname()) ++ + " -noshell -heart " ++ name(Node) ++ "&"]), + + Mod = exhaust_atoms, + + Code = generate(Mod, [], [ + "do() -> " + " Set = lists:seq($a,$z), " + " [ list_to_atom([A,B,C,D,E]) || " + " A <- Set, B <- Set, C <- Set, E <- Set, D <- Set ]." + ]), + + %% crash it with atom exhaustion + rpc:call(Node, erlang, load_module, [Mod, Code]), + rpc:cast(Node, Mod, do, []), + + T0 = now(), + + receive {nodedown, Node} -> + test_server:format("Took ~.2f s. for node to go down~n", [timer:now_diff(now(), T0)/1000000]), + ok + %% timeout is very liberal here. nodedown is received in about 1 s. on linux (palantir) + %% and in about 10 s. on solaris (carcharoth) + after (15000*test_server:timetrap_scale_factor()) -> test_server:fail(node_not_closed) + end, + test_server:sleep(3000), + node_check_up_down(Node, 2000), + loose_node:stop(Node). + +%% node_start_soon_after_crash +%% Purpose: +%% Check that a node is up and running after a crash. +%% This test exhausts the atom table on the remote node. +%% ERL_CRASH_DUMP_SECONDS=10 will force beam +%% to only dump an erl_crash.dump for 10 seconds. +node_start_soon_after_crash(suite) -> {req, [{time, 10}]}; +node_start_soon_after_crash(Config) when is_list(Config) -> + {ok, Node} = start_check(loose, heart_test_soon, [{"ERL_CRASH_DUMP_SECONDS", "10"}]), + + ok = rpc:call(Node, heart, set_cmd, + [atom_to_list(lib:progname()) ++ + " -noshell -heart " ++ name(Node) ++ "&"]), + + Mod = exhaust_atoms, + + Code = generate(Mod, [], [ + "do() -> " + " Set = lists:seq($a,$z), " + " [ list_to_atom([A,B,C,D,E]) || " + " A <- Set, B <- Set, C <- Set, E <- Set, D <- Set ]." + ]), + + %% crash it with atom exhaustion + rpc:call(Node, erlang, load_module, [Mod, Code]), + rpc:cast(Node, Mod, do, []), + + receive {nodedown, Node} -> ok + after (15000*test_server:timetrap_scale_factor()) -> test_server:fail(node_not_closed) + end, + test_server:sleep(20000), + node_check_up_down(Node, 15000), + loose_node:stop(Node). + + +node_check_up_down(Node, Tmo) -> + case net_adm:ping(Node) of + pong -> + erlang:monitor_node(Node, true), + rpc:call(Node, init, reboot, []), + receive + {nodedown, Node} -> ok + after Tmo -> + test_server:fail(node_not_closed2) + end; + _ -> + test_server:fail(node_not_rebooted) + end. + %% Only tests bad command, correct behaviour is tested in reboot/1. set_cmd(suite) -> []; set_cmd(Config) when is_list(Config) -> - ?line {ok, Node} = start_check(slave, heart_test), + {ok, Node} = start_check(slave, heart_test), Cmd = wrong_atom, - ?line {error, {bad_cmd, Cmd}} = rpc:call(Node, heart, set_cmd, [Cmd]), + {error, {bad_cmd, Cmd}} = rpc:call(Node, heart, set_cmd, [Cmd]), Cmd1 = lists:duplicate(2047, $a), - ?line {error, {bad_cmd, Cmd1}} = rpc:call(Node, heart, set_cmd, [Cmd1]), + {error, {bad_cmd, Cmd1}} = rpc:call(Node, heart, set_cmd, [Cmd1]), Cmd2 = lists:duplicate(28, $a), - ?line ok = rpc:call(Node, heart, set_cmd, [Cmd2]), + ok = rpc:call(Node, heart, set_cmd, [Cmd2]), Cmd3 = lists:duplicate(2000, $a), - ?line ok = rpc:call(Node, heart, set_cmd, [Cmd3]), + ok = rpc:call(Node, heart, set_cmd, [Cmd3]), stop_node(Node), ok. clear_cmd(suite) -> {req,[{time,15}]}; clear_cmd(Config) when is_list(Config) -> - ?line {ok, Node} = start_check(slave, heart_test), - ?line ok = rpc:call(Node, heart, set_cmd, + {ok, Node} = start_check(slave, heart_test), + ok = rpc:call(Node, heart, set_cmd, [atom_to_list(lib:progname()) ++ " -noshell -heart " ++ name(Node) ++ "&"]), - ?line rpc:call(Node, init, reboot, []), + rpc:call(Node, init, reboot, []), receive {nodedown, Node} -> ok @@ -221,16 +300,16 @@ clear_cmd(Config) when is_list(Config) -> test_server:fail(node_not_closed) end, test_server:sleep(5000), - ?line case net_adm:ping(Node) of - pong -> - erlang:monitor_node(Node, true); - _ -> - test_server:fail(node_not_rebooted) - end, - ?line ok = rpc:call(Node, heart, set_cmd, + case net_adm:ping(Node) of + pong -> + erlang:monitor_node(Node, true); + _ -> + test_server:fail(node_not_rebooted) + end, + ok = rpc:call(Node, heart, set_cmd, ["erl -noshell -heart " ++ name(Node) ++ "&"]), - ?line ok = rpc:call(Node, heart, clear_cmd, []), - ?line rpc:call(Node, init, reboot, []), + ok = rpc:call(Node, heart, clear_cmd, []), + rpc:call(Node, init, reboot, []), receive {nodedown, Node} -> ok @@ -238,20 +317,20 @@ clear_cmd(Config) when is_list(Config) -> test_server:fail(node_not_closed) end, test_server:sleep(5000), - ?line case net_adm:ping(Node) of - pang -> - ok; - _ -> - test_server:fail(node_rebooted) - end, + case net_adm:ping(Node) of + pang -> + ok; + _ -> + test_server:fail(node_rebooted) + end, ok. get_cmd(suite) -> []; get_cmd(Config) when is_list(Config) -> - ?line {ok, Node} = start_check(slave, heart_test), + {ok, Node} = start_check(slave, heart_test), Cmd = "test", - ?line ok = rpc:call(Node, heart, set_cmd, [Cmd]), - ?line {ok, Cmd} = rpc:call(Node, heart, get_cmd, []), + ok = rpc:call(Node, heart, set_cmd, [Cmd]), + {ok, Cmd} = rpc:call(Node, heart, get_cmd, []), stop_node(Node), ok. @@ -274,57 +353,53 @@ dont_drop(Config) when is_list(Config) -> ok end. -do_dont_drop(_,0) -> - []; +do_dont_drop(_,0) -> []; do_dont_drop(Config,N) -> %% Name of first slave node - ?line NN1 = atom_to_list(?MODULE) ++ "slave_1", + NN1 = atom_to_list(?MODULE) ++ "slave_1", %% Name of node started by heart on failure - ?line NN2 = atom_to_list(?MODULE) ++ "slave_2", + NN2 = atom_to_list(?MODULE) ++ "slave_2", %% Name of node started by heart on success - ?line NN3 = atom_to_list(?MODULE) ++ "slave_3", - ?line Host = hd(tl(string:tokens(atom_to_list(node()),"@"))), + NN3 = atom_to_list(?MODULE) ++ "slave_3", + Host = hd(tl(string:tokens(atom_to_list(node()),"@"))), %% The initial heart command - ?line FirstCmd = erl() ++ name(NN2 ++ "@" ++ Host), + FirstCmd = erl() ++ name(NN2 ++ "@" ++ Host), %% Separated the parameters to start_node_run for clarity... - ?line Name = list_to_atom(NN1), - ?line Env = [{"HEART_COMMAND", FirstCmd}], - ?line Func = "start_heart_stress", - ?line Arg = NN3 ++ "@" ++ Host ++ " " ++ + Name = list_to_atom(NN1), + Env = [{"HEART_COMMAND", FirstCmd}], + Func = "start_heart_stress", + Arg = NN3 ++ "@" ++ Host ++ " " ++ filename:join(?config(data_dir, Config), "simple_echo"), - ?line start_node_run(Name,Env,Func,Arg), - ?line case wait_for_any_of(list_to_atom(NN2 ++ "@" ++ Host), - list_to_atom(NN3 ++ "@" ++ Host)) of - 2 -> - ?line [ok | do_dont_drop(Config,N-1)]; - _ -> - ?line false - end. + start_node_run(Name,Env,Func,Arg), + case wait_for_any_of(list_to_atom(NN2 ++ "@" ++ Host), + list_to_atom(NN3 ++ "@" ++ Host)) of + 2 -> + [ok | do_dont_drop(Config,N-1)]; + _ -> + false + end. wait_for_any_of(N1,N2) -> - ?line wait_for_any_of(N1,N2,45). + wait_for_any_of(N1,N2,45). wait_for_any_of(_N1,_N2,0) -> - ?line false; + false; wait_for_any_of(N1,N2,Times) -> - ?line receive - after 1000 -> - ?line ok - end, - ?line case net_adm:ping(N1) of - pang -> - ?line case net_adm:ping(N2) of - pang -> - ?line wait_for_any_of(N1,N2,Times - 1); - pong -> - ?line rpc:call(N2,init,stop,[]), - ?line 2 - end; - pong -> - ?line rpc:call(N1,init,stop,[]), - ?line 1 - end. + receive after 1000 -> ok end, + case net_adm:ping(N1) of + pang -> + case net_adm:ping(N2) of + pang -> + wait_for_any_of(N1,N2,Times - 1); + pong -> + rpc:call(N2,init,stop,[]), + 2 + end; + pong -> + rpc:call(N1,init,stop,[]), + 1 + end. kill_pid(suite) -> @@ -347,9 +422,7 @@ do_kill_pid(_Config) -> {ok,Node} = start_node_run(Name,Env,suicide_by_heart,[]), ok = wait_for_node(Node,15), erlang:monitor_node(Node, true), - receive - {nodedown,Node} -> - ok + receive {nodedown,Node} -> ok after 30000 -> false end. @@ -357,23 +430,16 @@ do_kill_pid(_Config) -> wait_for_node(_,0) -> false; wait_for_node(Node,N) -> - receive - after 1000 -> - ok - end, + receive after 1000 -> ok end, case net_adm:ping(Node) of - pong -> - ok; - pang -> - wait_for_node(Node,N-1) + pong -> ok; + pang -> wait_for_node(Node,N-1) end. erl() -> case os:type() of - {win32,_} -> - "werl "; - _ -> - "erl " + {win32,_} -> "werl "; + _ -> "erl " end. name(Node) when is_list(Node) -> name(Node,[]); @@ -390,15 +456,13 @@ name([H|T], Name) -> name(T, [H|Name]). -atom_conv(A) when is_atom(A) -> - atom_to_list(A); -atom_conv(A) when is_list(A) -> - A. +enc(A) when is_atom(A) -> atom_to_list(A); +enc(A) when is_binary(A) -> binary_to_list(A); +enc(A) when is_list(A) -> A. -env_conv([]) -> - []; -env_conv([{X,Y}|T]) -> - atom_conv(X) ++ " \"" ++ atom_conv(Y) ++ "\" " ++ env_conv(T). +env_encode([]) -> []; +env_encode([{X,Y}|T]) -> + "-env " ++ enc(X) ++ " \"" ++ enc(Y) ++ "\" " ++ env_encode(T). %%% %%% Starts a node and runs a function in this @@ -409,12 +473,12 @@ env_conv([{X,Y}|T]) -> %%% Argument is the argument(s) to send through erl -s %%% start_node_run(Name, Env, Function, Argument) -> - ?line PA = filename:dirname(code:which(?MODULE)), - ?line Params = "-heart -env " ++ env_conv(Env) ++ " -pa " ++ PA ++ - " -s " ++ - atom_conv(?MODULE) ++ " " ++ atom_conv(Function) ++ " " ++ - atom_conv(Argument), - ?line start_node(Name, Params). + PA = filename:dirname(code:which(?MODULE)), + Params = "-heart " ++ env_encode(Env) ++ " -pa " ++ PA ++ + " -s " ++ + enc(?MODULE) ++ " " ++ enc(Function) ++ " " ++ + enc(Argument), + start_node(Name, Params). start_node(Name, Param) -> test_server:start_node(Name, slave, [{args, Param}]). @@ -480,3 +544,24 @@ suicide_by_heart() -> {makaronipudding} -> sallad end. + + +%% generate a module from binary +generate(Module, Attributes, FunStrings) -> + FunForms = function_forms(FunStrings), + Forms = [ + {attribute,1,module,Module}, + {attribute,2,export,[FA || {FA,_} <- FunForms]} + ] ++ [{attribute, 3, A, V}|| {A, V} <- Attributes] ++ + [ Function || {_, Function} <- FunForms], + {ok, Module, Bin} = compile:forms(Forms), + Bin. + + +function_forms([]) -> []; +function_forms([S|Ss]) -> + {ok, Ts,_} = erl_scan:string(S), + {ok, Form} = erl_parse:parse_form(Ts), + Fun = element(3, Form), + Arity = element(4, Form), + [{{Fun,Arity}, Form}|function_forms(Ss)]. |