From 93ad08d8c6d6a9875a10b33633aca52de5d3c59b Mon Sep 17 00:00:00 2001 From: Maxim Fedorov Date: Tue, 28 Aug 2018 16:11:09 -0700 Subject: Fix an endless rescheduling loop when a process is executing process_info(self(), ...) It is possible that a process has to yield before completing process_info BIF when it runs out of reductions. If this BIF is called by the process itself, it does not send a signal but executes in the context of a process. If it has to yield, it turns F_LOCAL_SIGS_ONLY flag on, which means new signals won't be fetched from the outer message queue. When the same process needs to execute dirty system code (e.g. dirty GC) it has to be run on a dirty scheduler. However signals enqueued into outer queue cause it to be rescheduled on a normal scheduler. F_LOCAL_SIGS_ONLY prevent outer queue signals delivery, creating an endless rescheduling loop. This commit disengages F_LOCAL_SIG_ONLY if process needs to execute dirty code in order to complete signal delivery and allow process to be moved to dirty run queue. --- erts/emulator/beam/erl_process.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'erts/emulator/beam') diff --git a/erts/emulator/beam/erl_process.c b/erts/emulator/beam/erl_process.c index 7a9ef3c1de..d5bd17ff3e 100644 --- a/erts/emulator/beam/erl_process.c +++ b/erts/emulator/beam/erl_process.c @@ -9641,7 +9641,7 @@ Process *erts_schedule(ErtsSchedulerData *esdp, Process *p, int calls) if (state & ERTS_PSFLG_RUNNING_SYS) { if (state & (ERTS_PSFLG_SIG_Q|ERTS_PSFLG_SIG_IN_Q)) { int local_only = (!!(p->flags & F_LOCAL_SIGS_ONLY) - & !(state & ERTS_PSFLG_SUSPENDED)); + & !(state & (ERTS_PSFLG_SUSPENDED|ERTS_PSFLGS_DIRTY_WORK))); if (!local_only | !!(state & ERTS_PSFLG_SIG_Q)) { int sig_reds; /* -- cgit v1.2.3 From a31216200bdee2c04b3fb3ae5e26607674715c8a Mon Sep 17 00:00:00 2001 From: Rickard Green Date: Wed, 5 Sep 2018 16:04:51 +0200 Subject: Prevent inconsistent node lists If net_kernel "forgets" to abort a connection (as it currently might), the garbage collection of a distribution entry could cause node lists to enter an inconsistent state. --- erts/emulator/beam/dist.c | 6 ++++++ erts/emulator/beam/dist.h | 2 ++ erts/emulator/beam/erl_node_tables.c | 38 ++++++++++++++++++++++++++++++++++++ 3 files changed, 46 insertions(+) (limited to 'erts/emulator/beam') diff --git a/erts/emulator/beam/dist.c b/erts/emulator/beam/dist.c index 70474898b2..16c4d689a5 100644 --- a/erts/emulator/beam/dist.c +++ b/erts/emulator/beam/dist.c @@ -3628,6 +3628,12 @@ static Sint abort_connection(DistEntry* dep, Uint32 conn_id) return 0; } +Sint +erts_abort_connection(DistEntry *dep, Uint32 conn_id) +{ + return abort_connection(dep, conn_id); +} + BIF_RETTYPE erts_internal_abort_connection_2(BIF_ALIST_2) { DistEntry* dep; diff --git a/erts/emulator/beam/dist.h b/erts/emulator/beam/dist.h index dda2029a4c..30b4b35c20 100644 --- a/erts/emulator/beam/dist.h +++ b/erts/emulator/beam/dist.h @@ -399,5 +399,7 @@ extern void erts_kill_dist_connection(DistEntry *dep, Uint32); extern Uint erts_dist_cache_size(void); +extern Sint erts_abort_connection(DistEntry *dep, Uint32 conn_id); + #endif diff --git a/erts/emulator/beam/erl_node_tables.c b/erts/emulator/beam/erl_node_tables.c index 1f147011a8..9b34af1480 100644 --- a/erts/emulator/beam/erl_node_tables.c +++ b/erts/emulator/beam/erl_node_tables.c @@ -412,6 +412,44 @@ static void schedule_delete_dist_entry(DistEntry* dep) static void start_timer_delete_dist_entry(void *vdep) { + DistEntry *dep = vdep; + Eterm sysname; + enum dist_entry_state state; + Uint32 connection_id; + + erts_de_rlock(dep); + state = dep->state; + connection_id = dep->connection_id; + sysname = dep->sysname; + erts_de_runlock(dep); + + if (state != ERTS_DE_STATE_IDLE) { + char *state_str; + erts_dsprintf_buf_t *dsbuf = erts_create_logger_dsbuf(); + switch (state) { + case ERTS_DE_STATE_CONNECTED: + state_str = "connected"; + break; + case ERTS_DE_STATE_PENDING: + state_str = "pending connect"; + break; + case ERTS_DE_STATE_EXITING: + state_str = "exiting"; + break; + case ERTS_DE_STATE_IDLE: + state_str = "idle"; + break; + default: + state_str = "unknown"; + break; + } + erts_dsprintf(dsbuf, "Garbage collecting distribution " + "entry for node %T in state: %s", + sysname, state_str); + erts_send_error_to_logger_nogl(dsbuf); + erts_abort_connection(dep, connection_id); + } + if (node_tab_delete_delay == 0) { prepare_try_delete_dist_entry(vdep); } -- cgit v1.2.3