From c3b9ebc6cf9619c220b75601b8f02737052901b3 Mon Sep 17 00:00:00 2001 From: Anders Svensson Date: Mon, 3 Nov 2014 02:07:58 +0100 Subject: Rename reconnect_timer to connect_timer in examples and suites The timer was renamed in commit abea7186. --- lib/diameter/examples/code/peer.erl | 2 +- lib/diameter/test/diameter_config_SUITE.erl | 2 +- lib/diameter/test/diameter_event_SUITE.erl | 6 +++--- lib/diameter/test/diameter_transport_SUITE.erl | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) (limited to 'lib/diameter') diff --git a/lib/diameter/examples/code/peer.erl b/lib/diameter/examples/code/peer.erl index b4ee17e4b7..7519abfb2c 100644 --- a/lib/diameter/examples/code/peer.erl +++ b/lib/diameter/examples/code/peer.erl @@ -74,7 +74,7 @@ start(Name, Opts) | {error, term()}. connect(Name, T) -> - diameter:add_transport(Name, {connect, [{reconnect_timer, 5000} + diameter:add_transport(Name, {connect, [{connect_timer, 5000} | client(T)]}). %% listen/2 diff --git a/lib/diameter/test/diameter_config_SUITE.erl b/lib/diameter/test/diameter_config_SUITE.erl index d10ee83ba4..ad5b3f9420 100644 --- a/lib/diameter/test/diameter_config_SUITE.erl +++ b/lib/diameter/test/diameter_config_SUITE.erl @@ -157,7 +157,7 @@ {length_errors, [[exit], [handle], [discard]], [[x]]}, - {reconnect_timer, + {connect_timer, [[3000]], [[infinity]]}, {watchdog_timer, diff --git a/lib/diameter/test/diameter_event_SUITE.erl b/lib/diameter/test/diameter_event_SUITE.erl index 94b4967921..64199895be 100644 --- a/lib/diameter/test/diameter_event_SUITE.erl +++ b/lib/diameter/test/diameter_event_SUITE.erl @@ -114,12 +114,12 @@ up(Config) -> %% Connect with non-matching capabilities and expect CEA from the peer %% to indicate as much and then for the transport to be restarted -%% (after reconnect_timer). +%% (after connect_timer). down(Config) -> {Svc, Ref} = connect(Config, [{capabilities, [{'Acct-Application-Id', [?DICT_ACCT:id()]}]}, {applications, [?DICT_ACCT]}, - {reconnect_timer, 5000}]), + {connect_timer, 5000}]), start = event(Svc), {closed, Ref, {'CEA', ?NO_COMMON_APP, _, #diameter_packet{}}, _} = event(Svc), @@ -129,7 +129,7 @@ down(Config) -> %% CEA and cause the client to timeout. cea_timeout(Config) -> {Svc, Ref} = connect(Config, [{capx_timeout, ?SERVER_CAPX_TMO div 2}, - {reconnect_timer, 2*?SERVER_CAPX_TMO}]), + {connect_timer, 2*?SERVER_CAPX_TMO}]), start = event(Svc), {closed, Ref, {'CEA', timeout}, _} = event(Svc). diff --git a/lib/diameter/test/diameter_transport_SUITE.erl b/lib/diameter/test/diameter_transport_SUITE.erl index 9408fae62c..fcffa69c24 100644 --- a/lib/diameter/test/diameter_transport_SUITE.erl +++ b/lib/diameter/test/diameter_transport_SUITE.erl @@ -194,7 +194,7 @@ reconnect({connect, Ref}) -> true = diameter:subscribe(SvcName), ok = start_service(SvcName), [{{_, _, LRef}, Pid}] = diameter_reg:wait({?MODULE, Ref, '_'}), - CRef = ?util:connect(SvcName, tcp, LRef, [{reconnect_timer, 2000}, + CRef = ?util:connect(SvcName, tcp, LRef, [{connect_timer, 2000}, {watchdog_timer, 6000}]), %% Tell partner to kill transport after seeing that there are no -- cgit v1.2.3 From 3b51661f1f9f70cf820026cb345523c05aad2dfa Mon Sep 17 00:00:00 2001 From: Anders Svensson Date: Mon, 3 Nov 2014 09:36:09 +0100 Subject: Check {connect,watchdog}_timer distinction in event testcases The connect timer is currently ignored by a connecting transport, so the check causes one testcase to fail. --- lib/diameter/test/diameter_event_SUITE.erl | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) (limited to 'lib/diameter') diff --git a/lib/diameter/test/diameter_event_SUITE.erl b/lib/diameter/test/diameter_event_SUITE.erl index 64199895be..f43f111d20 100644 --- a/lib/diameter/test/diameter_event_SUITE.erl +++ b/lib/diameter/test/diameter_event_SUITE.erl @@ -107,10 +107,18 @@ start_server(Config) -> %% Connect with matching capabilities and expect the connection to %% come up. up(Config) -> - {Svc, Ref} = connect(Config, []), + {Svc, Ref} = connect(Config, [{connect_timer, 5000}, + {watchdog_timer, 15000}]), start = event(Svc), - {up, Ref, {_,_Caps}, _Config, #diameter_packet{}} = event(Svc), - {watchdog, Ref, _, {initial, okay}, _} = event(Svc). + {up, Ref, {TPid, Caps}, Cfg, #diameter_packet{}} = event(Svc), + {watchdog, Ref, _, {initial, okay}, _} = event(Svc), + %% Kill the transport process and see that the connection is + %% reestablished after a watchdog timeout, not after connect_timer + %% expiry. + exit(TPid, kill), + {down, Ref, {TPid, Caps}, Cfg} = event(Svc), + {watchdog, Ref, _, {okay, down}, _} = event(Svc), + {reconnect, Ref, _} = event(Svc, 10000, 20000). %% Connect with non-matching capabilities and expect CEA from the peer %% to indicate as much and then for the transport to be restarted @@ -119,11 +127,12 @@ down(Config) -> {Svc, Ref} = connect(Config, [{capabilities, [{'Acct-Application-Id', [?DICT_ACCT:id()]}]}, {applications, [?DICT_ACCT]}, - {connect_timer, 5000}]), + {connect_timer, 5000}, + {watchdog_timer, 20000}]), start = event(Svc), {closed, Ref, {'CEA', ?NO_COMMON_APP, _, #diameter_packet{}}, _} = event(Svc), - {reconnect, Ref, _} = event(Svc). + {reconnect, Ref, _} = event(Svc, 4000, 10000). %% Connect with matching capabilities but have the server delay its %% CEA and cause the client to timeout. @@ -165,6 +174,13 @@ uniq() -> event(Name) -> receive #diameter_event{service = Name, info = T} -> T end. +event(Name, TL, TH) -> + T0 = now(), + Event = event(Name), + DT = timer:now_diff(now(), T0) div 1000, + {true, true, DT, Event} = {TL < DT, DT < TH, DT, Event}, + Event. + start_service(Name, Opts) -> diameter:start_service(Name, [{monitor, self()} | Opts]). -- cgit v1.2.3 From 66d67762be1cf0a3b9ac068d597c6d8bdaf2e3d7 Mon Sep 17 00:00:00 2001 From: Anders Svensson Date: Mon, 3 Nov 2014 00:12:58 +0100 Subject: Fix ignored connect timer There are two timers governing the establishment of peer connections: connect_timer and watchdog_timer. The former is the RFC 6733 Tc timer and is used by diameter_service to establish an initial connection. The latter is RFC 3539 TwInit and is used by diameter_watchdog for connection reestablishment after the watchdog leaves state INITIAL. A connecting transport ignored the connect timer since the watchdog process never died, regardless of the watchdog state, causing the watchdog timer to handle reconnection. This seems to have been broken for some time. --- lib/diameter/src/base/diameter_watchdog.erl | 36 ++++++++++++++--------------- 1 file changed, 17 insertions(+), 19 deletions(-) (limited to 'lib/diameter') diff --git a/lib/diameter/src/base/diameter_watchdog.erl b/lib/diameter/src/base/diameter_watchdog.erl index eff5096745..b7f2d24941 100644 --- a/lib/diameter/src/base/diameter_watchdog.erl +++ b/lib/diameter/src/base/diameter_watchdog.erl @@ -255,11 +255,15 @@ close({'DOWN', _, process, TPid, {shutdown, Reason}}, close(_, _) -> ok. -event(_, #watchdog{status = T}, #watchdog{status = T}) -> - ok; - -event(_, #watchdog{transport = undefined}, #watchdog{transport = undefined}) -> +event(_, + #watchdog{status = From, transport = F}, + #watchdog{status = To, transport = T}) + when F == undefined, T == undefined; %% transport not started + From == initial, To == down; %% never really left INITIAL + From == To -> %% no state transition ok; +%% Note that there is no INITIAL -> DOWN transition in RFC 3539: ours +%% is just a consequence of stop. event(Msg, #watchdog{status = From, transport = F, parent = Pid}, @@ -411,7 +415,7 @@ transition({'DOWN', _, process, TPid, _Reason}, stop; %% ... or not. -transition({'DOWN', _, process, TPid, _Reason}, +transition({'DOWN', _, process, TPid, _Reason} = D, #watchdog{transport = TPid, status = T, restrict = {_,R}} @@ -422,20 +426,14 @@ transition({'DOWN', _, process, TPid, _Reason}, %% Close an accepting watchdog immediately if there's no %% restriction on the number of connections to the same peer: the - %% state machine never enters state REOPEN in this case. The - %% 'close' message (instead of stop) is so as not to bypass the - %% sending of messages to the service process in handle_info/2. - - if T /= initial, M == accept, not R -> - send(self(), close), - S#watchdog{status = down}; - T /= initial -> - set_watchdog(S#watchdog{status = down}); - M == connect -> - set_watchdog(S); - M == accept -> - send(self(), close), - S + %% state machine never enters state REOPEN in this case. + + if T == initial; + M == accept, not R -> + close(D, S0), + stop; + true -> + set_watchdog(S#watchdog{status = down}) end; %% Incoming message. -- cgit v1.2.3 From 2b89e8bd5a8258c4259ed53cc0331d4fbe1f1aa3 Mon Sep 17 00:00:00 2001 From: Anders Svensson Date: Mon, 3 Nov 2014 01:47:27 +0100 Subject: Tweak reason in closed event From {error, Reason} to {no_connection, Reason} when a connection can't be established. The exit reason of a diameter_peer_fsm process is turned into a message from the corresponding diameter_watchdog process to the relevant diameter_service process, the latter sending a 'closed' event including the reason to any subscribers. Reason = [] when none of the configured transport modules succeeds in establishing a connection, which admittedly isn't terribly descriptive. (The lists is of error reasons from transport start functions, which is empty as long as transport processes start successfully.) Note that this form of the closed event is undocumented, aside from the documentation saying that one should expect undocumented events. The explicitly documented forms are currently specific to CER/CEA failures. --- lib/diameter/src/base/diameter_peer_fsm.erl | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) (limited to 'lib/diameter') diff --git a/lib/diameter/src/base/diameter_peer_fsm.erl b/lib/diameter/src/base/diameter_peer_fsm.erl index 86fc43cdc5..ee6e7dd89e 100644 --- a/lib/diameter/src/base/diameter_peer_fsm.erl +++ b/lib/diameter/src/base/diameter_peer_fsm.erl @@ -225,8 +225,8 @@ start_transport(Addrs0, T) -> erlang:monitor(process, TPid), q_next(TPid, Addrs0, Tmo, Data), {TPid, Addrs}; - No -> - exit({shutdown, No}) + {error, No} -> + exit({shutdown, {no_connection, No}}) end. svc(#diameter_service{capabilities = LCaps0} = Svc, Addrs) -> @@ -368,11 +368,8 @@ transition({diameter, {TPid, connected}}, %% message. This may be followed by an incoming message which arrived %% before the transport was killed and this can't be distinguished %% from one from the transport that's been started to replace it. -transition({diameter, {_, connected}}, _) -> - {stop, connection_timeout}; -transition({diameter, {_, connected, _}}, _) -> - {stop, connection_timeout}; -transition({diameter, {_, connected, _, _}}, _) -> +transition({diameter, T}, _) + when tuple_size(T) < 5, connected == element(2,T) -> {stop, connection_timeout}; %% Connection has timed out: start an alternate. -- cgit v1.2.3