From 66d67762be1cf0a3b9ac068d597c6d8bdaf2e3d7 Mon Sep 17 00:00:00 2001 From: Anders Svensson Date: Mon, 3 Nov 2014 00:12:58 +0100 Subject: Fix ignored connect timer There are two timers governing the establishment of peer connections: connect_timer and watchdog_timer. The former is the RFC 6733 Tc timer and is used by diameter_service to establish an initial connection. The latter is RFC 3539 TwInit and is used by diameter_watchdog for connection reestablishment after the watchdog leaves state INITIAL. A connecting transport ignored the connect timer since the watchdog process never died, regardless of the watchdog state, causing the watchdog timer to handle reconnection. This seems to have been broken for some time. --- lib/diameter/src/base/diameter_watchdog.erl | 36 ++++++++++++++--------------- 1 file changed, 17 insertions(+), 19 deletions(-) (limited to 'lib/diameter/src') diff --git a/lib/diameter/src/base/diameter_watchdog.erl b/lib/diameter/src/base/diameter_watchdog.erl index eff5096745..b7f2d24941 100644 --- a/lib/diameter/src/base/diameter_watchdog.erl +++ b/lib/diameter/src/base/diameter_watchdog.erl @@ -255,11 +255,15 @@ close({'DOWN', _, process, TPid, {shutdown, Reason}}, close(_, _) -> ok. -event(_, #watchdog{status = T}, #watchdog{status = T}) -> - ok; - -event(_, #watchdog{transport = undefined}, #watchdog{transport = undefined}) -> +event(_, + #watchdog{status = From, transport = F}, + #watchdog{status = To, transport = T}) + when F == undefined, T == undefined; %% transport not started + From == initial, To == down; %% never really left INITIAL + From == To -> %% no state transition ok; +%% Note that there is no INITIAL -> DOWN transition in RFC 3539: ours +%% is just a consequence of stop. event(Msg, #watchdog{status = From, transport = F, parent = Pid}, @@ -411,7 +415,7 @@ transition({'DOWN', _, process, TPid, _Reason}, stop; %% ... or not. -transition({'DOWN', _, process, TPid, _Reason}, +transition({'DOWN', _, process, TPid, _Reason} = D, #watchdog{transport = TPid, status = T, restrict = {_,R}} @@ -422,20 +426,14 @@ transition({'DOWN', _, process, TPid, _Reason}, %% Close an accepting watchdog immediately if there's no %% restriction on the number of connections to the same peer: the - %% state machine never enters state REOPEN in this case. The - %% 'close' message (instead of stop) is so as not to bypass the - %% sending of messages to the service process in handle_info/2. - - if T /= initial, M == accept, not R -> - send(self(), close), - S#watchdog{status = down}; - T /= initial -> - set_watchdog(S#watchdog{status = down}); - M == connect -> - set_watchdog(S); - M == accept -> - send(self(), close), - S + %% state machine never enters state REOPEN in this case. + + if T == initial; + M == accept, not R -> + close(D, S0), + stop; + true -> + set_watchdog(S#watchdog{status = down}) end; %% Incoming message. -- cgit v1.2.3 From 2b89e8bd5a8258c4259ed53cc0331d4fbe1f1aa3 Mon Sep 17 00:00:00 2001 From: Anders Svensson Date: Mon, 3 Nov 2014 01:47:27 +0100 Subject: Tweak reason in closed event From {error, Reason} to {no_connection, Reason} when a connection can't be established. The exit reason of a diameter_peer_fsm process is turned into a message from the corresponding diameter_watchdog process to the relevant diameter_service process, the latter sending a 'closed' event including the reason to any subscribers. Reason = [] when none of the configured transport modules succeeds in establishing a connection, which admittedly isn't terribly descriptive. (The lists is of error reasons from transport start functions, which is empty as long as transport processes start successfully.) Note that this form of the closed event is undocumented, aside from the documentation saying that one should expect undocumented events. The explicitly documented forms are currently specific to CER/CEA failures. --- lib/diameter/src/base/diameter_peer_fsm.erl | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) (limited to 'lib/diameter/src') diff --git a/lib/diameter/src/base/diameter_peer_fsm.erl b/lib/diameter/src/base/diameter_peer_fsm.erl index 86fc43cdc5..ee6e7dd89e 100644 --- a/lib/diameter/src/base/diameter_peer_fsm.erl +++ b/lib/diameter/src/base/diameter_peer_fsm.erl @@ -225,8 +225,8 @@ start_transport(Addrs0, T) -> erlang:monitor(process, TPid), q_next(TPid, Addrs0, Tmo, Data), {TPid, Addrs}; - No -> - exit({shutdown, No}) + {error, No} -> + exit({shutdown, {no_connection, No}}) end. svc(#diameter_service{capabilities = LCaps0} = Svc, Addrs) -> @@ -368,11 +368,8 @@ transition({diameter, {TPid, connected}}, %% message. This may be followed by an incoming message which arrived %% before the transport was killed and this can't be distinguished %% from one from the transport that's been started to replace it. -transition({diameter, {_, connected}}, _) -> - {stop, connection_timeout}; -transition({diameter, {_, connected, _}}, _) -> - {stop, connection_timeout}; -transition({diameter, {_, connected, _, _}}, _) -> +transition({diameter, T}, _) + when tuple_size(T) < 5, connected == element(2,T) -> {stop, connection_timeout}; %% Connection has timed out: start an alternate. -- cgit v1.2.3