From 93ad08d8c6d6a9875a10b33633aca52de5d3c59b Mon Sep 17 00:00:00 2001 From: Maxim Fedorov Date: Tue, 28 Aug 2018 16:11:09 -0700 Subject: Fix an endless rescheduling loop when a process is executing process_info(self(), ...) It is possible that a process has to yield before completing process_info BIF when it runs out of reductions. If this BIF is called by the process itself, it does not send a signal but executes in the context of a process. If it has to yield, it turns F_LOCAL_SIGS_ONLY flag on, which means new signals won't be fetched from the outer message queue. When the same process needs to execute dirty system code (e.g. dirty GC) it has to be run on a dirty scheduler. However signals enqueued into outer queue cause it to be rescheduled on a normal scheduler. F_LOCAL_SIGS_ONLY prevent outer queue signals delivery, creating an endless rescheduling loop. This commit disengages F_LOCAL_SIG_ONLY if process needs to execute dirty code in order to complete signal delivery and allow process to be moved to dirty run queue. --- erts/emulator/beam/erl_process.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'erts') diff --git a/erts/emulator/beam/erl_process.c b/erts/emulator/beam/erl_process.c index 7a9ef3c1de..d5bd17ff3e 100644 --- a/erts/emulator/beam/erl_process.c +++ b/erts/emulator/beam/erl_process.c @@ -9641,7 +9641,7 @@ Process *erts_schedule(ErtsSchedulerData *esdp, Process *p, int calls) if (state & ERTS_PSFLG_RUNNING_SYS) { if (state & (ERTS_PSFLG_SIG_Q|ERTS_PSFLG_SIG_IN_Q)) { int local_only = (!!(p->flags & F_LOCAL_SIGS_ONLY) - & !(state & ERTS_PSFLG_SUSPENDED)); + & !(state & (ERTS_PSFLG_SUSPENDED|ERTS_PSFLGS_DIRTY_WORK))); if (!local_only | !!(state & ERTS_PSFLG_SIG_Q)) { int sig_reds; /* -- cgit v1.2.3 From a31216200bdee2c04b3fb3ae5e26607674715c8a Mon Sep 17 00:00:00 2001 From: Rickard Green Date: Wed, 5 Sep 2018 16:04:51 +0200 Subject: Prevent inconsistent node lists If net_kernel "forgets" to abort a connection (as it currently might), the garbage collection of a distribution entry could cause node lists to enter an inconsistent state. --- erts/emulator/beam/dist.c | 6 +++++ erts/emulator/beam/dist.h | 2 ++ erts/emulator/beam/erl_node_tables.c | 38 +++++++++++++++++++++++++++++ erts/emulator/test/node_container_SUITE.erl | 28 +++++++++++++++++++-- 4 files changed, 72 insertions(+), 2 deletions(-) (limited to 'erts') diff --git a/erts/emulator/beam/dist.c b/erts/emulator/beam/dist.c index 70474898b2..16c4d689a5 100644 --- a/erts/emulator/beam/dist.c +++ b/erts/emulator/beam/dist.c @@ -3628,6 +3628,12 @@ static Sint abort_connection(DistEntry* dep, Uint32 conn_id) return 0; } +Sint +erts_abort_connection(DistEntry *dep, Uint32 conn_id) +{ + return abort_connection(dep, conn_id); +} + BIF_RETTYPE erts_internal_abort_connection_2(BIF_ALIST_2) { DistEntry* dep; diff --git a/erts/emulator/beam/dist.h b/erts/emulator/beam/dist.h index dda2029a4c..30b4b35c20 100644 --- a/erts/emulator/beam/dist.h +++ b/erts/emulator/beam/dist.h @@ -399,5 +399,7 @@ extern void erts_kill_dist_connection(DistEntry *dep, Uint32); extern Uint erts_dist_cache_size(void); +extern Sint erts_abort_connection(DistEntry *dep, Uint32 conn_id); + #endif diff --git a/erts/emulator/beam/erl_node_tables.c b/erts/emulator/beam/erl_node_tables.c index 1f147011a8..9b34af1480 100644 --- a/erts/emulator/beam/erl_node_tables.c +++ b/erts/emulator/beam/erl_node_tables.c @@ -412,6 +412,44 @@ static void schedule_delete_dist_entry(DistEntry* dep) static void start_timer_delete_dist_entry(void *vdep) { + DistEntry *dep = vdep; + Eterm sysname; + enum dist_entry_state state; + Uint32 connection_id; + + erts_de_rlock(dep); + state = dep->state; + connection_id = dep->connection_id; + sysname = dep->sysname; + erts_de_runlock(dep); + + if (state != ERTS_DE_STATE_IDLE) { + char *state_str; + erts_dsprintf_buf_t *dsbuf = erts_create_logger_dsbuf(); + switch (state) { + case ERTS_DE_STATE_CONNECTED: + state_str = "connected"; + break; + case ERTS_DE_STATE_PENDING: + state_str = "pending connect"; + break; + case ERTS_DE_STATE_EXITING: + state_str = "exiting"; + break; + case ERTS_DE_STATE_IDLE: + state_str = "idle"; + break; + default: + state_str = "unknown"; + break; + } + erts_dsprintf(dsbuf, "Garbage collecting distribution " + "entry for node %T in state: %s", + sysname, state_str); + erts_send_error_to_logger_nogl(dsbuf); + erts_abort_connection(dep, connection_id); + } + if (node_tab_delete_delay == 0) { prepare_try_delete_dist_entry(vdep); } diff --git a/erts/emulator/test/node_container_SUITE.erl b/erts/emulator/test/node_container_SUITE.erl index 7df001fec5..55135fbcbc 100644 --- a/erts/emulator/test/node_container_SUITE.erl +++ b/erts/emulator/test/node_container_SUITE.erl @@ -50,7 +50,8 @@ bad_nc/1, unique_pid/1, iter_max_procs/1, - magic_ref/1]). + magic_ref/1, + dist_entry_gc/1]). suite() -> [{ct_hooks,[ts_install_cth]}, @@ -58,7 +59,7 @@ suite() -> all() -> - [term_to_binary_to_term_eq, round_trip_eq, cmp, ref_eq, + [dist_entry_gc, term_to_binary_to_term_eq, round_trip_eq, cmp, ref_eq, node_table_gc, dist_link_refc, dist_monitor_refc, node_controller_refc, ets_refc, match_spec_refc, timer_refc, pid_wrap, port_wrap, bad_nc, @@ -894,6 +895,29 @@ magic_ref(Config) when is_list(Config) -> true = is_reference(MRef2), true = erts_debug:get_internal_state({magic_ref,MRef2}), ok. + + +lost_pending_connection(Node) -> + _ = (catch erts_internal:new_connection(Node)), + ok. + +dist_entry_gc(Config) when is_list(Config) -> + Me = self(), + {ok, Node} = start_node(get_nodefirstname(), "+zdntgc 0"), + P = spawn_link(Node, + fun () -> + LostNode = list_to_atom("lost_pending_connection@" ++ hostname()), + lost_pending_connection(LostNode), + garbage_collect(), %% Could crash... + Me ! {self(), ok} + end), + receive + {P, ok} -> ok + end, + unlink(P), + stop_node(Node), + ok. + %% %% -- Internal utils --------------------------------------------------------- %% -- cgit v1.2.3 From 245a3e53b3d8f324b82ab56f06f8df3cf580f860 Mon Sep 17 00:00:00 2001 From: Erlang/OTP Date: Wed, 5 Sep 2018 20:53:24 +0200 Subject: Update version numbers --- erts/vsn.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'erts') diff --git a/erts/vsn.mk b/erts/vsn.mk index feb51e42d2..293b555b18 100644 --- a/erts/vsn.mk +++ b/erts/vsn.mk @@ -18,7 +18,7 @@ # %CopyrightEnd% # -VSN = 10.0.6 +VSN = 10.0.7 # Port number 4365 in 4.2 # Port number 4366 in 4.3 -- cgit v1.2.3 From 12d2c65ed477e9fde9a411727de4cc67c53b1a1c Mon Sep 17 00:00:00 2001 From: Erlang/OTP Date: Wed, 5 Sep 2018 20:53:36 +0200 Subject: Update release notes --- erts/doc/src/notes.xml | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) (limited to 'erts') diff --git a/erts/doc/src/notes.xml b/erts/doc/src/notes.xml index 5862318ab7..c7491e2741 100644 --- a/erts/doc/src/notes.xml +++ b/erts/doc/src/notes.xml @@ -31,6 +31,34 @@

This document describes the changes made to the ERTS application.

+
Erts 10.0.7 + +
Fixed Bugs and Malfunctions + + +

+ A process could get stuck in an infinite rescheduling + loop between normal and dirty schedulers. This bug was + introduced in ERTS version 10.0.

+

+ Thanks to Maxim Fedorov for finding and fixing this + issue.

+

+ Own Id: OTP-15275 Aux Id: PR-1943

+
+ +

+ Garbage collection of a distribution entry could cause an + emulator crash if net_kernel had not brought + previous connection attempts on it down properly.

+

+ Own Id: OTP-15279 Aux Id: ERIERL-226

+
+
+
+ +
+
Erts 10.0.6
Fixed Bugs and Malfunctions -- cgit v1.2.3