aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRickard Green <[email protected]>2018-09-05 16:04:51 +0200
committerRickard Green <[email protected]>2018-09-05 16:30:19 +0200
commita31216200bdee2c04b3fb3ae5e26607674715c8a (patch)
tree26ad8d59b3af89be1f907d08bd605641b2f7037e
parentf7012b7a731924193cf05fc77b103e89bd0fcbfb (diff)
downloadotp-a31216200bdee2c04b3fb3ae5e26607674715c8a.tar.gz
otp-a31216200bdee2c04b3fb3ae5e26607674715c8a.tar.bz2
otp-a31216200bdee2c04b3fb3ae5e26607674715c8a.zip
Prevent inconsistent node lists
If net_kernel "forgets" to abort a connection (as it currently might), the garbage collection of a distribution entry could cause node lists to enter an inconsistent state.
-rw-r--r--erts/emulator/beam/dist.c6
-rw-r--r--erts/emulator/beam/dist.h2
-rw-r--r--erts/emulator/beam/erl_node_tables.c38
-rw-r--r--erts/emulator/test/node_container_SUITE.erl28
4 files changed, 72 insertions, 2 deletions
diff --git a/erts/emulator/beam/dist.c b/erts/emulator/beam/dist.c
index 70474898b2..16c4d689a5 100644
--- a/erts/emulator/beam/dist.c
+++ b/erts/emulator/beam/dist.c
@@ -3628,6 +3628,12 @@ static Sint abort_connection(DistEntry* dep, Uint32 conn_id)
return 0;
}
+Sint
+erts_abort_connection(DistEntry *dep, Uint32 conn_id)
+{
+ return abort_connection(dep, conn_id);
+}
+
BIF_RETTYPE erts_internal_abort_connection_2(BIF_ALIST_2)
{
DistEntry* dep;
diff --git a/erts/emulator/beam/dist.h b/erts/emulator/beam/dist.h
index dda2029a4c..30b4b35c20 100644
--- a/erts/emulator/beam/dist.h
+++ b/erts/emulator/beam/dist.h
@@ -399,5 +399,7 @@ extern void erts_kill_dist_connection(DistEntry *dep, Uint32);
extern Uint erts_dist_cache_size(void);
+extern Sint erts_abort_connection(DistEntry *dep, Uint32 conn_id);
+
#endif
diff --git a/erts/emulator/beam/erl_node_tables.c b/erts/emulator/beam/erl_node_tables.c
index 1f147011a8..9b34af1480 100644
--- a/erts/emulator/beam/erl_node_tables.c
+++ b/erts/emulator/beam/erl_node_tables.c
@@ -412,6 +412,44 @@ static void schedule_delete_dist_entry(DistEntry* dep)
static void
start_timer_delete_dist_entry(void *vdep)
{
+ DistEntry *dep = vdep;
+ Eterm sysname;
+ enum dist_entry_state state;
+ Uint32 connection_id;
+
+ erts_de_rlock(dep);
+ state = dep->state;
+ connection_id = dep->connection_id;
+ sysname = dep->sysname;
+ erts_de_runlock(dep);
+
+ if (state != ERTS_DE_STATE_IDLE) {
+ char *state_str;
+ erts_dsprintf_buf_t *dsbuf = erts_create_logger_dsbuf();
+ switch (state) {
+ case ERTS_DE_STATE_CONNECTED:
+ state_str = "connected";
+ break;
+ case ERTS_DE_STATE_PENDING:
+ state_str = "pending connect";
+ break;
+ case ERTS_DE_STATE_EXITING:
+ state_str = "exiting";
+ break;
+ case ERTS_DE_STATE_IDLE:
+ state_str = "idle";
+ break;
+ default:
+ state_str = "unknown";
+ break;
+ }
+ erts_dsprintf(dsbuf, "Garbage collecting distribution "
+ "entry for node %T in state: %s",
+ sysname, state_str);
+ erts_send_error_to_logger_nogl(dsbuf);
+ erts_abort_connection(dep, connection_id);
+ }
+
if (node_tab_delete_delay == 0) {
prepare_try_delete_dist_entry(vdep);
}
diff --git a/erts/emulator/test/node_container_SUITE.erl b/erts/emulator/test/node_container_SUITE.erl
index 7df001fec5..55135fbcbc 100644
--- a/erts/emulator/test/node_container_SUITE.erl
+++ b/erts/emulator/test/node_container_SUITE.erl
@@ -50,7 +50,8 @@
bad_nc/1,
unique_pid/1,
iter_max_procs/1,
- magic_ref/1]).
+ magic_ref/1,
+ dist_entry_gc/1]).
suite() ->
[{ct_hooks,[ts_install_cth]},
@@ -58,7 +59,7 @@ suite() ->
all() ->
- [term_to_binary_to_term_eq, round_trip_eq, cmp, ref_eq,
+ [dist_entry_gc, term_to_binary_to_term_eq, round_trip_eq, cmp, ref_eq,
node_table_gc, dist_link_refc, dist_monitor_refc,
node_controller_refc, ets_refc, match_spec_refc,
timer_refc, pid_wrap, port_wrap, bad_nc,
@@ -894,6 +895,29 @@ magic_ref(Config) when is_list(Config) ->
true = is_reference(MRef2),
true = erts_debug:get_internal_state({magic_ref,MRef2}),
ok.
+
+
+lost_pending_connection(Node) ->
+ _ = (catch erts_internal:new_connection(Node)),
+ ok.
+
+dist_entry_gc(Config) when is_list(Config) ->
+ Me = self(),
+ {ok, Node} = start_node(get_nodefirstname(), "+zdntgc 0"),
+ P = spawn_link(Node,
+ fun () ->
+ LostNode = list_to_atom("lost_pending_connection@" ++ hostname()),
+ lost_pending_connection(LostNode),
+ garbage_collect(), %% Could crash...
+ Me ! {self(), ok}
+ end),
+ receive
+ {P, ok} -> ok
+ end,
+ unlink(P),
+ stop_node(Node),
+ ok.
+
%%
%% -- Internal utils ---------------------------------------------------------
%%