From 924716b9bcecbda07ec630c705ede820363e92bc Mon Sep 17 00:00:00 2001 From: Anders Svensson Date: Mon, 4 Mar 2013 15:56:42 +0100 Subject: Tweak okay -> suspect config Make it just a number of timeouts, without a new DWR being sent. --- lib/diameter/doc/src/diameter.xml | 8 ++-- lib/diameter/src/base/diameter_watchdog.erl | 18 ++++----- lib/diameter/test/diameter_watchdog_SUITE.erl | 55 +++++++++++++++------------ 3 files changed, 42 insertions(+), 39 deletions(-) (limited to 'lib/diameter') diff --git a/lib/diameter/doc/src/diameter.xml b/lib/diameter/doc/src/diameter.xml index 75e95a9a3d..8e9ec06ff9 100644 --- a/lib/diameter/doc/src/diameter.xml +++ b/lib/diameter/doc/src/diameter.xml @@ -1132,10 +1132,10 @@ corresponding timeout (see below) or all fail.

Specifies configuration that alters the behaviour of the watchdog state machine. On key okay, the non-negative number of answered DWR -messages required before transitioning from REOPEN to OKAY. -On key suspect, the positive number of unanswered DWR messages -before transitioning from OKAY to SUSPECT, or 0 to never make this -transition.

+messages before transitioning from REOPEN to OKAY. +On key suspect, the number of watchdog timeouts before +transitioning from OKAY to SUSPECT when DWR is unanswered, or 0 to +not make the transition.

Defaults to [{okay, 3}, {suspect, 1}]. diff --git a/lib/diameter/src/base/diameter_watchdog.erl b/lib/diameter/src/base/diameter_watchdog.erl index 0b32974efe..82ca603cf3 100644 --- a/lib/diameter/src/base/diameter_watchdog.erl +++ b/lib/diameter/src/base/diameter_watchdog.erl @@ -49,6 +49,8 @@ -define(IS_NATURAL(N), (is_integer(N) andalso 0 =< N)). +-define(CHOOSE(B,T,F), if (B) -> T; true -> F end). + -record(config, {suspect = 1 :: non_neg_integer(), %% OKAY -> SUSPECT okay = 3 :: non_neg_integer()}). %% REOPEN -> OKAY @@ -61,7 +63,7 @@ %% {M,F,A} -> integer() >= 0 num_dwa = 0 :: -1 | non_neg_integer(), %% number of DWAs received in reopen, - %% or to send in okay before moving to suspect + %% or number of timeouts before okay -> suspect %% end PCB parent = self() :: pid(), %% service process transport :: pid() | undefined, %% peer_fsm process @@ -424,7 +426,7 @@ transition({'DOWN', _, process, TPid, _Reason}, #watchdog{transport = TPid, status = T} = S) -> - set_watchdog(S#watchdog{status = case T of initial -> T; _ -> down end, + set_watchdog(S#watchdog{status = ?CHOOSE(initial == T, T, down), pending = false, transport = undefined}); @@ -668,9 +670,10 @@ timeout(#watchdog{status = okay, case N of 1 -> S#watchdog{status = suspect}; - _ -> %% non-standard - send_watchdog(S#watchdog{pending = false, - num_dwa = decr(N)}) + 0 -> %% non-standard: never move to suspect + S; + N -> %% non-standard: more timeouts before moving + S#watchdog{num_dwa = N-1} end; %% SUSPECT Timer expires CloseConnection() @@ -725,11 +728,6 @@ timeout(#watchdog{status = T} = S) T == down -> restart(S). -decr(0 = N) -> - N; -decr(N) -> - N-1. - %% restart/1 restart(#watchdog{transport = undefined} = S) -> diff --git a/lib/diameter/test/diameter_watchdog_SUITE.erl b/lib/diameter/test/diameter_watchdog_SUITE.erl index 82244a1c7f..704bf110c7 100644 --- a/lib/diameter/test/diameter_watchdog_SUITE.erl +++ b/lib/diameter/test/diameter_watchdog_SUITE.erl @@ -89,16 +89,23 @@ -define(INFO(T), #diameter_event{info = T}). %% Receive an event message from diameter. --define(EVENT(T), - apply(fun() -> %% apply to not bind T_ - receive #diameter_event{info = T = T_} -> - log_event(T_) - end +-define(EVENT(T), %% apply to not bind T_ + apply(fun() -> + receive ?INFO(T = T_) -> log_event(T_) end end, [])). %% Receive a watchdog event. -define(WD_EVENT(Ref), log_wd(element(4, ?EVENT({watchdog, Ref, _, _, _})))). +-define(WD_EVENT(Ref, Ms), + apply(fun() -> + receive ?INFO({watchdog, Ref, _, T_, _}) -> + log_wd(T_) + after Ms -> + false + end + end, + [])). %% Log to make failures identifiable. -define(LOG(T), ?LOG("~p", [T])). @@ -376,8 +383,8 @@ tpid(Ref, [[{ref, Ref}, %% # suspect/1 %% =========================================================================== -%% Configure transports to require a set number of watchdogs before -%% moving from OKAY to SUSPECT. +%% Configure transports to require a set number of watchdog timeouts +%% before moving from OKAY to SUSPECT. suspect(_) -> [] = run([[abuse, [suspect, N]] || N <- [0,1,3]]). @@ -394,19 +401,21 @@ suspect(TRef, true, SvcName, _) -> {okay, _} = ?WD_EVENT(TRef); suspect(TRef, false, SvcName, 0) -> %% SUSPECT disabled - %% Wait 2+ watchdogs and see that two unanswered watchdogs have - %% been sent. - [2,0,0,0] = receive - ?INFO({watchdog, TRef, _, _, _} = T) -> T - after 28000 -> - wd_counts(SvcName) - end; + %% Wait 2+ watchdogs and see that only one watchdog has been sent. + false = ?WD_EVENT(TRef, 28000), + [1,0,0,0] = wd_counts(SvcName); suspect(TRef, false, SvcName, N) -> - {okay, suspect} = ?WD_EVENT(TRef), - [N,0,0,0] = wd_counts(SvcName), - {suspect, down} = ?WD_EVENT(TRef), - [N,0,0,0] = wd_counts(SvcName). + %% Check that no watchdog transition takes place within N+ + %% watchdogs ... + false = ?WD_EVENT(TRef, N*10000+8000), + [1,0,0,0] = wd_counts(SvcName), + %% ... but that the connection then becomes suspect ... + {okay, suspect} = ?WD_EVENT(TRef, 10000), + [1,0,0,0] = wd_counts(SvcName), + %% ... and goes down. + {suspect, down} = ?WD_EVENT(TRef, 18000), + [1,0,0,0] = wd_counts(SvcName). %% abuse/1 @@ -470,13 +479,9 @@ ok(TRef, SvcName, Down, 0) -> %% Connection comes up without watchdog exchange. {Down, okay} = ?WD_EVENT(TRef), [1,0,0,0] = wd_counts(SvcName), - %% Wait 2+ watchdog timeout to see that the connection stays up and - %% two watchdogs are exchanged. - ok = receive ?INFO({watchdog, TRef, _, _, _} = T) -> - T - after 28000 -> - ok - end, + %% Wait 2+ watchdog timeouts to see that the connection stays up + %% and two watchdogs are exchanged. + false = ?WD_EVENT(TRef, 28000), [3,0,0,2] = wd_counts(SvcName); ok(TRef, SvcName, Down, N) -> -- cgit v1.2.3