From 3dda5298322a76f2787c6c4dd1cce78416c20dc3 Mon Sep 17 00:00:00 2001 From: Sverker Eriksson Date: Wed, 16 Aug 2017 16:56:12 +0200 Subject: erts: Async auto-connect for monitor_node Removed distribution_SUITE:applied_monitor_node as it seems to test apply of trapping BIF and monitor_node does not trap anymore. --- erts/emulator/beam/dist.c | 124 ++++++++++++++++++++---------- erts/emulator/test/distribution_SUITE.erl | 24 +----- erts/emulator/test/erl_link_SUITE.erl | 13 ++-- erts/preloaded/src/erlang.erl | 10 +-- 4 files changed, 98 insertions(+), 73 deletions(-) (limited to 'erts') diff --git a/erts/emulator/beam/dist.c b/erts/emulator/beam/dist.c index 0fa399cf53..724cb6b24d 100644 --- a/erts/emulator/beam/dist.c +++ b/erts/emulator/beam/dist.c @@ -3789,11 +3789,17 @@ monitor_node(Process* p, Eterm Node, Eterm Bool, Eterm Options) DistEntry *dep; ErtsLink *lnk; Eterm l; + int async_connect = 1; for (l = Options; l != NIL && is_list(l); l = CDR(list_val(l))) { Eterm t = CAR(list_val(l)); - /* allow_passive_connect the only available option right now */ - if (t != am_allow_passive_connect) { + if (t == am_allow_passive_connect) { + /* + * Handle this horrible feature by falling back on old synchronous + * auto-connect (if needed) + */ + async_connect = 0; + } else { BIF_ERROR(p, BADARG); } } @@ -3807,47 +3813,85 @@ monitor_node(Process* p, Eterm Node, Eterm Bool, Eterm Options) && (Node != erts_this_node->sysname))) { BIF_ERROR(p, BADARG); } - dep = erts_sysname_to_connected_dist_entry(Node); - if (!dep) { - do_trap: - BIF_TRAP3(dmonitor_node_trap, p, Node, Bool, Options); - } - if (dep == erts_this_dist_entry) - goto done; - - erts_proc_lock(p, ERTS_PROC_LOCK_LINK); - erts_de_rlock(dep); - if (ERTS_DE_IS_NOT_CONNECTED(dep)) { - erts_proc_unlock(p, ERTS_PROC_LOCK_LINK); - erts_de_runlock(dep); - goto do_trap; - } - erts_de_links_lock(dep); - erts_de_runlock(dep); if (Bool == am_true) { - ASSERT(dep->cid != NIL); - lnk = erts_add_or_lookup_link(&(dep->node_links), LINK_NODE, - p->common.id); - ++ERTS_LINK_REFC(lnk); - lnk = erts_add_or_lookup_link(&ERTS_P_LINKS(p), LINK_NODE, Node); - ++ERTS_LINK_REFC(lnk); - } - else { - lnk = erts_lookup_link(dep->node_links, p->common.id); - if (lnk != NULL) { - if ((--ERTS_LINK_REFC(lnk)) == 0) { - erts_destroy_link(erts_remove_link(&(dep->node_links), - p->common.id)); - } - } - lnk = erts_lookup_link(ERTS_P_LINKS(p), Node); - if (lnk != NULL) { - if ((--ERTS_LINK_REFC(lnk)) == 0) { - erts_destroy_link(erts_remove_link(&ERTS_P_LINKS(p), - Node)); + ErtsDSigData dsd; + dsd.node = Node; + dep = erts_find_or_insert_dist_entry(Node); + if (dep == erts_this_dist_entry) + goto done; + + erts_proc_lock(p, ERTS_PROC_LOCK_LINK); + + switch (erts_dsig_prepare(&dsd, &dep, p, + (ERTS_PROC_LOCK_MAIN | ERTS_PROC_LOCK_LINK), + ERTS_DSP_RLOCK, 0, async_connect)) { + case ERTS_DSIG_PREP_NOT_ALIVE: + case ERTS_DSIG_PREP_NOT_CONNECTED: + /* Trap to either send 'nodedown' or do passive connection attempt */ + trap: + erts_proc_unlock(p, ERTS_PROC_LOCK_LINK); + erts_deref_dist_entry(dep); + BIF_TRAP3(dmonitor_node_trap, p, Node, Bool, Options); + case ERTS_DSIG_PREP_PENDING: + if (!async_connect) { + /* + * Pending connection may fail, so we must trap + * to ensure passive connection attempt + */ + erts_de_runlock(dep); + goto trap; } - } + /*fall through*/ + case ERTS_DSIG_PREP_CONNECTED: + erts_de_links_lock(dep); + erts_de_runlock(dep); + lnk = erts_add_or_lookup_link(&(dep->node_links), LINK_NODE, + p->common.id); + ++ERTS_LINK_REFC(lnk); + lnk = erts_add_or_lookup_link(&ERTS_P_LINKS(p), LINK_NODE, Node); + ++ERTS_LINK_REFC(lnk); + break; + default: + ERTS_ASSERT(! "Invalid dsig prepare result"); + } + } + else { /* Bool == false */ + dep = erts_sysname_to_connected_dist_entry(Node); + if (!dep) { + /* + * Before OTP-21 this case triggered auto-connect + * and a 'nodedown' message if that failed. + * Now it's a simple no-op which feels more reasonable. + */ + BIF_RET(am_true); + } + if (dep == erts_this_dist_entry) + goto done; + + erts_proc_lock(p, ERTS_PROC_LOCK_LINK); + erts_de_rlock(dep); + if (!(dep->status & (ERTS_DE_SFLG_PENDING | ERTS_DE_SFLG_CONNECTED))) { + erts_proc_unlock(p, ERTS_PROC_LOCK_LINK); + erts_de_runlock(dep); + goto done; + } + erts_de_links_lock(dep); + erts_de_runlock(dep); + lnk = erts_lookup_link(dep->node_links, p->common.id); + if (lnk != NULL) { + if ((--ERTS_LINK_REFC(lnk)) == 0) { + erts_destroy_link(erts_remove_link(&(dep->node_links), + p->common.id)); + } + } + lnk = erts_lookup_link(ERTS_P_LINKS(p), Node); + if (lnk != NULL) { + if ((--ERTS_LINK_REFC(lnk)) == 0) { + erts_destroy_link(erts_remove_link(&ERTS_P_LINKS(p), + Node)); + } + } } erts_de_links_unlock(dep); diff --git a/erts/emulator/test/distribution_SUITE.erl b/erts/emulator/test/distribution_SUITE.erl index c7d63f9feb..98c39dc6dd 100644 --- a/erts/emulator/test/distribution_SUITE.erl +++ b/erts/emulator/test/distribution_SUITE.erl @@ -41,7 +41,7 @@ local_send_small/1, local_send_big/1, local_send_legal/1, link_to_busy/1, exit_to_busy/1, lost_exit/1, link_to_dead/1, link_to_dead_new_node/1, - applied_monitor_node/1, ref_port_roundtrip/1, nil_roundtrip/1, + ref_port_roundtrip/1, nil_roundtrip/1, trap_bif_1/1, trap_bif_2/1, trap_bif_3/1, stop_dist/1, dist_auto_connect_never/1, dist_auto_connect_once/1, @@ -75,7 +75,7 @@ suite() -> all() -> [ping, {group, bulk_send}, {group, local_send}, link_to_busy, exit_to_busy, lost_exit, link_to_dead, - link_to_dead_new_node, applied_monitor_node, + link_to_dead_new_node, ref_port_roundtrip, nil_roundtrip, stop_dist, {group, trap_bif}, {group, dist_auto_connect}, dist_parallel_send, atom_roundtrip, unicode_atom_roundtrip, @@ -639,26 +639,6 @@ link_to_dead_new_node(Config) when is_list(Config) -> end, ok. -%% Test that monitor_node/2 works when applied. -applied_monitor_node(Config) when is_list(Config) -> - NonExisting = list_to_atom("__non_existing__@" ++ hostname()), - - %% Tail-recursive call to apply (since the node is non-existing, - %% there will be a trap). - - true = tail_apply(erlang, monitor_node, [NonExisting, true]), - [{nodedown, NonExisting}] = test_server:messages_get(), - - %% Ordinary call (with trap). - - true = apply(erlang, monitor_node, [NonExisting, true]), - [{nodedown, NonExisting}] = test_server:messages_get(), - - ok. - -tail_apply(M, F, A) -> - apply(M, F, A). - %% Test that sending a port or reference to another node and back again %% doesn't correct them in any way. ref_port_roundtrip(Config) when is_list(Config) -> diff --git a/erts/emulator/test/erl_link_SUITE.erl b/erts/emulator/test/erl_link_SUITE.erl index d8c5b663e3..a66ca7a57d 100644 --- a/erts/emulator/test/erl_link_SUITE.erl +++ b/erts/emulator/test/erl_link_SUITE.erl @@ -200,13 +200,16 @@ monitor_nodes(Config) when is_list(Config) -> monitor_node(A, true), check_monitor_node(self(), A, 1), check_monitor_node(self(), B, 3), + ok = receive {nodedown, C} -> ok after 1000 -> timeout end, + %%OTP-21: monitor_node(_,false) does not trigger auto-connect anymore + %% and therefore no nodedown if it fails. + %%ok = receive {nodedown, C} -> ok after 1000 -> timeout end, + ok = receive {nodedown, C} -> ok after 1000 -> timeout end, + ok = receive {nodedown, D} -> ok after 1000 -> timeout end, + ok = receive {nodedown, D} -> ok after 1000 -> timeout end, check_monitor_node(self(), C, 0), check_monitor_node(self(), D, 0), - receive {nodedown, C} -> ok end, - receive {nodedown, C} -> ok end, - receive {nodedown, C} -> ok end, - receive {nodedown, D} -> ok end, - receive {nodedown, D} -> ok end, + stop_node(A), receive {nodedown, A} -> ok end, check_monitor_node(self(), A, 0), diff --git a/erts/preloaded/src/erlang.erl b/erts/preloaded/src/erlang.erl index 28625c400f..9630c0c934 100644 --- a/erts/preloaded/src/erlang.erl +++ b/erts/preloaded/src/erlang.erl @@ -3304,12 +3304,10 @@ dunlink(Pid) -> false -> true end. -dmonitor_node(Node, Flag, []) -> - case net_kernel:connect(Node) of - true -> erlang:monitor_node(Node, Flag, []); - false -> erlang:self() ! {nodedown, Node}, true - end; - +dmonitor_node(Node, _Flag, []) -> + %% Only called when auto-connect attempt failed early in VM + erlang:self() ! {nodedown, Node}, + true; dmonitor_node(Node, Flag, Opts) -> case lists:member(allow_passive_connect, Opts) of true -> -- cgit v1.2.3