diff options
author | Rickard Green <[email protected]> | 2018-09-05 16:04:51 +0200 |
---|---|---|
committer | Rickard Green <[email protected]> | 2018-09-05 16:30:19 +0200 |
commit | a31216200bdee2c04b3fb3ae5e26607674715c8a (patch) | |
tree | 26ad8d59b3af89be1f907d08bd605641b2f7037e | |
parent | f7012b7a731924193cf05fc77b103e89bd0fcbfb (diff) | |
download | otp-a31216200bdee2c04b3fb3ae5e26607674715c8a.tar.gz otp-a31216200bdee2c04b3fb3ae5e26607674715c8a.tar.bz2 otp-a31216200bdee2c04b3fb3ae5e26607674715c8a.zip |
Prevent inconsistent node lists
If net_kernel "forgets" to abort a connection (as it currently might),
the garbage collection of a distribution entry could cause node lists
to enter an inconsistent state.
-rw-r--r-- | erts/emulator/beam/dist.c | 6 | ||||
-rw-r--r-- | erts/emulator/beam/dist.h | 2 | ||||
-rw-r--r-- | erts/emulator/beam/erl_node_tables.c | 38 | ||||
-rw-r--r-- | erts/emulator/test/node_container_SUITE.erl | 28 |
4 files changed, 72 insertions, 2 deletions
diff --git a/erts/emulator/beam/dist.c b/erts/emulator/beam/dist.c index 70474898b2..16c4d689a5 100644 --- a/erts/emulator/beam/dist.c +++ b/erts/emulator/beam/dist.c @@ -3628,6 +3628,12 @@ static Sint abort_connection(DistEntry* dep, Uint32 conn_id) return 0; } +Sint +erts_abort_connection(DistEntry *dep, Uint32 conn_id) +{ + return abort_connection(dep, conn_id); +} + BIF_RETTYPE erts_internal_abort_connection_2(BIF_ALIST_2) { DistEntry* dep; diff --git a/erts/emulator/beam/dist.h b/erts/emulator/beam/dist.h index dda2029a4c..30b4b35c20 100644 --- a/erts/emulator/beam/dist.h +++ b/erts/emulator/beam/dist.h @@ -399,5 +399,7 @@ extern void erts_kill_dist_connection(DistEntry *dep, Uint32); extern Uint erts_dist_cache_size(void); +extern Sint erts_abort_connection(DistEntry *dep, Uint32 conn_id); + #endif diff --git a/erts/emulator/beam/erl_node_tables.c b/erts/emulator/beam/erl_node_tables.c index 1f147011a8..9b34af1480 100644 --- a/erts/emulator/beam/erl_node_tables.c +++ b/erts/emulator/beam/erl_node_tables.c @@ -412,6 +412,44 @@ static void schedule_delete_dist_entry(DistEntry* dep) static void start_timer_delete_dist_entry(void *vdep) { + DistEntry *dep = vdep; + Eterm sysname; + enum dist_entry_state state; + Uint32 connection_id; + + erts_de_rlock(dep); + state = dep->state; + connection_id = dep->connection_id; + sysname = dep->sysname; + erts_de_runlock(dep); + + if (state != ERTS_DE_STATE_IDLE) { + char *state_str; + erts_dsprintf_buf_t *dsbuf = erts_create_logger_dsbuf(); + switch (state) { + case ERTS_DE_STATE_CONNECTED: + state_str = "connected"; + break; + case ERTS_DE_STATE_PENDING: + state_str = "pending connect"; + break; + case ERTS_DE_STATE_EXITING: + state_str = "exiting"; + break; + case ERTS_DE_STATE_IDLE: + state_str = "idle"; + break; + default: + state_str = "unknown"; + break; + } + erts_dsprintf(dsbuf, "Garbage collecting distribution " + "entry for node %T in state: %s", + sysname, state_str); + erts_send_error_to_logger_nogl(dsbuf); + erts_abort_connection(dep, connection_id); + } + if (node_tab_delete_delay == 0) { prepare_try_delete_dist_entry(vdep); } diff --git a/erts/emulator/test/node_container_SUITE.erl b/erts/emulator/test/node_container_SUITE.erl index 7df001fec5..55135fbcbc 100644 --- a/erts/emulator/test/node_container_SUITE.erl +++ b/erts/emulator/test/node_container_SUITE.erl @@ -50,7 +50,8 @@ bad_nc/1, unique_pid/1, iter_max_procs/1, - magic_ref/1]). + magic_ref/1, + dist_entry_gc/1]). suite() -> [{ct_hooks,[ts_install_cth]}, @@ -58,7 +59,7 @@ suite() -> all() -> - [term_to_binary_to_term_eq, round_trip_eq, cmp, ref_eq, + [dist_entry_gc, term_to_binary_to_term_eq, round_trip_eq, cmp, ref_eq, node_table_gc, dist_link_refc, dist_monitor_refc, node_controller_refc, ets_refc, match_spec_refc, timer_refc, pid_wrap, port_wrap, bad_nc, @@ -894,6 +895,29 @@ magic_ref(Config) when is_list(Config) -> true = is_reference(MRef2), true = erts_debug:get_internal_state({magic_ref,MRef2}), ok. + + +lost_pending_connection(Node) -> + _ = (catch erts_internal:new_connection(Node)), + ok. + +dist_entry_gc(Config) when is_list(Config) -> + Me = self(), + {ok, Node} = start_node(get_nodefirstname(), "+zdntgc 0"), + P = spawn_link(Node, + fun () -> + LostNode = list_to_atom("lost_pending_connection@" ++ hostname()), + lost_pending_connection(LostNode), + garbage_collect(), %% Could crash... + Me ! {self(), ok} + end), + receive + {P, ok} -> ok + end, + unlink(P), + stop_node(Node), + ok. + %% %% -- Internal utils --------------------------------------------------------- %% |