aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAnders Svensson <[email protected]>2013-03-04 15:56:42 +0100
committerAnders Svensson <[email protected]>2013-03-12 18:18:11 +0100
commit924716b9bcecbda07ec630c705ede820363e92bc (patch)
tree065f9e5469f79e1e750d502fbc3250d33e18fa71
parent8b947bef92e7ace97c90769fe51a643db7739c3d (diff)
downloadotp-924716b9bcecbda07ec630c705ede820363e92bc.tar.gz
otp-924716b9bcecbda07ec630c705ede820363e92bc.tar.bz2
otp-924716b9bcecbda07ec630c705ede820363e92bc.zip
Tweak okay -> suspect config
Make it just a number of timeouts, without a new DWR being sent.
-rw-r--r--lib/diameter/doc/src/diameter.xml8
-rw-r--r--lib/diameter/src/base/diameter_watchdog.erl18
-rw-r--r--lib/diameter/test/diameter_watchdog_SUITE.erl55
3 files changed, 42 insertions, 39 deletions
diff --git a/lib/diameter/doc/src/diameter.xml b/lib/diameter/doc/src/diameter.xml
index 75e95a9a3d..8e9ec06ff9 100644
--- a/lib/diameter/doc/src/diameter.xml
+++ b/lib/diameter/doc/src/diameter.xml
@@ -1132,10 +1132,10 @@ corresponding timeout (see below) or all fail.</p>
Specifies configuration that alters the behaviour of the watchdog
state machine.
On key <c>okay</c>, the non-negative number of answered DWR
-messages required before transitioning from REOPEN to OKAY.
-On key <c>suspect</c>, the positive number of unanswered DWR messages
-before transitioning from OKAY to SUSPECT, or 0 to never make this
-transition.</p>
+messages before transitioning from REOPEN to OKAY.
+On key <c>suspect</c>, the number of watchdog timeouts before
+transitioning from OKAY to SUSPECT when DWR is unanswered, or 0 to
+not make the transition.</p>
<p>
Defaults to <c>[{okay, 3}, {suspect, 1}]</c>.
diff --git a/lib/diameter/src/base/diameter_watchdog.erl b/lib/diameter/src/base/diameter_watchdog.erl
index 0b32974efe..82ca603cf3 100644
--- a/lib/diameter/src/base/diameter_watchdog.erl
+++ b/lib/diameter/src/base/diameter_watchdog.erl
@@ -49,6 +49,8 @@
-define(IS_NATURAL(N), (is_integer(N) andalso 0 =< N)).
+-define(CHOOSE(B,T,F), if (B) -> T; true -> F end).
+
-record(config,
{suspect = 1 :: non_neg_integer(), %% OKAY -> SUSPECT
okay = 3 :: non_neg_integer()}). %% REOPEN -> OKAY
@@ -61,7 +63,7 @@
%% {M,F,A} -> integer() >= 0
num_dwa = 0 :: -1 | non_neg_integer(),
%% number of DWAs received in reopen,
- %% or to send in okay before moving to suspect
+ %% or number of timeouts before okay -> suspect
%% end PCB
parent = self() :: pid(), %% service process
transport :: pid() | undefined, %% peer_fsm process
@@ -424,7 +426,7 @@ transition({'DOWN', _, process, TPid, _Reason},
#watchdog{transport = TPid,
status = T}
= S) ->
- set_watchdog(S#watchdog{status = case T of initial -> T; _ -> down end,
+ set_watchdog(S#watchdog{status = ?CHOOSE(initial == T, T, down),
pending = false,
transport = undefined});
@@ -668,9 +670,10 @@ timeout(#watchdog{status = okay,
case N of
1 ->
S#watchdog{status = suspect};
- _ -> %% non-standard
- send_watchdog(S#watchdog{pending = false,
- num_dwa = decr(N)})
+ 0 -> %% non-standard: never move to suspect
+ S;
+ N -> %% non-standard: more timeouts before moving
+ S#watchdog{num_dwa = N-1}
end;
%% SUSPECT Timer expires CloseConnection()
@@ -725,11 +728,6 @@ timeout(#watchdog{status = T} = S)
T == down ->
restart(S).
-decr(0 = N) ->
- N;
-decr(N) ->
- N-1.
-
%% restart/1
restart(#watchdog{transport = undefined} = S) ->
diff --git a/lib/diameter/test/diameter_watchdog_SUITE.erl b/lib/diameter/test/diameter_watchdog_SUITE.erl
index 82244a1c7f..704bf110c7 100644
--- a/lib/diameter/test/diameter_watchdog_SUITE.erl
+++ b/lib/diameter/test/diameter_watchdog_SUITE.erl
@@ -89,16 +89,23 @@
-define(INFO(T), #diameter_event{info = T}).
%% Receive an event message from diameter.
--define(EVENT(T),
- apply(fun() -> %% apply to not bind T_
- receive #diameter_event{info = T = T_} ->
- log_event(T_)
- end
+-define(EVENT(T), %% apply to not bind T_
+ apply(fun() ->
+ receive ?INFO(T = T_) -> log_event(T_) end
end,
[])).
%% Receive a watchdog event.
-define(WD_EVENT(Ref), log_wd(element(4, ?EVENT({watchdog, Ref, _, _, _})))).
+-define(WD_EVENT(Ref, Ms),
+ apply(fun() ->
+ receive ?INFO({watchdog, Ref, _, T_, _}) ->
+ log_wd(T_)
+ after Ms ->
+ false
+ end
+ end,
+ [])).
%% Log to make failures identifiable.
-define(LOG(T), ?LOG("~p", [T])).
@@ -376,8 +383,8 @@ tpid(Ref, [[{ref, Ref},
%% # suspect/1
%% ===========================================================================
-%% Configure transports to require a set number of watchdogs before
-%% moving from OKAY to SUSPECT.
+%% Configure transports to require a set number of watchdog timeouts
+%% before moving from OKAY to SUSPECT.
suspect(_) ->
[] = run([[abuse, [suspect, N]] || N <- [0,1,3]]).
@@ -394,19 +401,21 @@ suspect(TRef, true, SvcName, _) ->
{okay, _} = ?WD_EVENT(TRef);
suspect(TRef, false, SvcName, 0) -> %% SUSPECT disabled
- %% Wait 2+ watchdogs and see that two unanswered watchdogs have
- %% been sent.
- [2,0,0,0] = receive
- ?INFO({watchdog, TRef, _, _, _} = T) -> T
- after 28000 ->
- wd_counts(SvcName)
- end;
+ %% Wait 2+ watchdogs and see that only one watchdog has been sent.
+ false = ?WD_EVENT(TRef, 28000),
+ [1,0,0,0] = wd_counts(SvcName);
suspect(TRef, false, SvcName, N) ->
- {okay, suspect} = ?WD_EVENT(TRef),
- [N,0,0,0] = wd_counts(SvcName),
- {suspect, down} = ?WD_EVENT(TRef),
- [N,0,0,0] = wd_counts(SvcName).
+ %% Check that no watchdog transition takes place within N+
+ %% watchdogs ...
+ false = ?WD_EVENT(TRef, N*10000+8000),
+ [1,0,0,0] = wd_counts(SvcName),
+ %% ... but that the connection then becomes suspect ...
+ {okay, suspect} = ?WD_EVENT(TRef, 10000),
+ [1,0,0,0] = wd_counts(SvcName),
+ %% ... and goes down.
+ {suspect, down} = ?WD_EVENT(TRef, 18000),
+ [1,0,0,0] = wd_counts(SvcName).
%% abuse/1
@@ -470,13 +479,9 @@ ok(TRef, SvcName, Down, 0) ->
%% Connection comes up without watchdog exchange.
{Down, okay} = ?WD_EVENT(TRef),
[1,0,0,0] = wd_counts(SvcName),
- %% Wait 2+ watchdog timeout to see that the connection stays up and
- %% two watchdogs are exchanged.
- ok = receive ?INFO({watchdog, TRef, _, _, _} = T) ->
- T
- after 28000 ->
- ok
- end,
+ %% Wait 2+ watchdog timeouts to see that the connection stays up
+ %% and two watchdogs are exchanged.
+ false = ?WD_EVENT(TRef, 28000),
[3,0,0,2] = wd_counts(SvcName);
ok(TRef, SvcName, Down, N) ->