aboutsummaryrefslogtreecommitdiffstats
path: root/lib/diameter/src/base/diameter_watchdog.erl
diff options
context:
space:
mode:
Diffstat (limited to 'lib/diameter/src/base/diameter_watchdog.erl')
-rw-r--r--lib/diameter/src/base/diameter_watchdog.erl111
1 files changed, 88 insertions, 23 deletions
diff --git a/lib/diameter/src/base/diameter_watchdog.erl b/lib/diameter/src/base/diameter_watchdog.erl
index 073a415d10..82ca603cf3 100644
--- a/lib/diameter/src/base/diameter_watchdog.erl
+++ b/lib/diameter/src/base/diameter_watchdog.erl
@@ -47,6 +47,14 @@
-define(BASE, ?DIAMETER_DICT_COMMON).
+-define(IS_NATURAL(N), (is_integer(N) andalso 0 =< N)).
+
+-define(CHOOSE(B,T,F), if (B) -> T; true -> F end).
+
+-record(config,
+ {suspect = 1 :: non_neg_integer(), %% OKAY -> SUSPECT
+ okay = 3 :: non_neg_integer()}). %% REOPEN -> OKAY
+
-record(watchdog,
{%% PCB - Peer Control Block; see RFC 3539, Appendix A
status = initial :: initial | okay | suspect | down | reopen,
@@ -54,7 +62,8 @@
tw :: 6000..16#FFFFFFFF | {module(), atom(), list()},
%% {M,F,A} -> integer() >= 0
num_dwa = 0 :: -1 | non_neg_integer(),
- %% number of DWAs received during reopen
+ %% number of DWAs received in reopen,
+ %% or number of timeouts before okay -> suspect
%% end PCB
parent = self() :: pid(), %% service process
transport :: pid() | undefined, %% peer_fsm process
@@ -64,7 +73,8 @@
%% term passed into diameter_service with incoming message
sequence :: diameter:sequence(), %% mask
restrict :: {diameter:restriction(), boolean()},
- shutdown = false :: boolean()}).
+ shutdown = false :: boolean(),
+ config :: #config{}}).
%% ---------------------------------------------------------------------------
%% start/2
@@ -129,7 +139,8 @@ i({Ack, T, Pid, {RecvData,
receive_data = RecvData,
dictionary = Dict0,
sequence = Mask,
- restrict = {Restrict, lists:member(node(), Nodes)}}.
+ restrict = {Restrict, lists:member(node(), Nodes)},
+ config = config(Opts)}.
wait(Ref, Pid) ->
receive
@@ -139,6 +150,27 @@ wait(Ref, Pid) ->
exit({shutdown, D})
end.
+%% config/1
+%%
+%% Could also configure counts for SUSPECT to DOWN and REOPEN to DOWN,
+%% but don't.
+
+config(Opts) ->
+ Config = proplists:get_value(watchdog_config, Opts, []),
+ is_list(Config) orelse config_error({watchdog_config, Config}),
+ lists:foldl(fun config/2, #config{}, Config).
+
+config({suspect, N}, Rec)
+ when ?IS_NATURAL(N) ->
+ Rec#config{suspect = N};
+
+config({okay, N}, Rec)
+ when ?IS_NATURAL(N) ->
+ Rec#config{okay = N};
+
+config(T, _) ->
+ config_error(T).
+
%% start/5
start(T, Opts, Mask, Nodes, Dict0, Svc) ->
@@ -219,6 +251,17 @@ handle_info(T, #watchdog{} = State) ->
?LOG(stop, T),
event(T, State, State#watchdog{status = down}),
{stop, {shutdown, T}, State}
+ end;
+
+handle_info(T, State) -> %% started in old code
+ handle_info(T, upgrade(State)).
+
+upgrade(State) ->
+ case erlang:append_element(State, #config{}) of
+ #watchdog{status = okay, config = #config{suspect = OS}} = S ->
+ S#watchdog{num_dwa = OS};
+ #watchdog{} = S ->
+ S
end.
close({'DOWN', _, process, TPid, {shutdown, Reason}},
@@ -331,11 +374,13 @@ transition({accepted = T, TPid}, #watchdog{transport = TPid,
transition({open, TPid, Hosts, _} = Open,
#watchdog{transport = TPid,
status = initial,
- restrict = {_, R}}
+ restrict = {_,R},
+ config = #config{suspect = OS}}
= S) ->
case okay(getr(restart), Hosts, R) of
okay ->
- set_watchdog(S#watchdog{status = okay});
+ set_watchdog(S#watchdog{status = okay,
+ num_dwa = OS});
reopen ->
transition(Open, S#watchdog{status = down})
end;
@@ -347,15 +392,22 @@ transition({open, TPid, Hosts, _} = Open,
transition({open = Key, TPid, _Hosts, T},
#watchdog{transport = TPid,
- status = down}
+ status = down,
+ config = #config{suspect = OS,
+ okay = RO}}
= S) ->
- %% Store the info we need to notify the parent to reopen the
- %% connection after the requisite DWA's are received, at which
- %% time we eraser(open). The reopen message is a later addition,
- %% to communicate the new capabilities as soon as they're known.
- putr(Key, {TPid, T}),
- set_watchdog(send_watchdog(S#watchdog{status = reopen,
- num_dwa = 0}));
+ case RO of
+ 0 -> %% non-standard: skip REOPEN
+ set_watchdog(S#watchdog{status = okay,
+ num_dwa = OS});
+ _ ->
+ %% Store the info we need to notify the parent to reopen
+ %% the connection after the requisite DWA's are received,
+ %% at which time we eraser(open).
+ putr(Key, {TPid, T}),
+ set_watchdog(send_watchdog(S#watchdog{status = reopen,
+ num_dwa = 0}))
+ end;
%% OKAY Connection down CloseConnection()
%% Failover()
@@ -374,7 +426,7 @@ transition({'DOWN', _, process, TPid, _Reason},
#watchdog{transport = TPid,
status = T}
= S) ->
- set_watchdog(S#watchdog{status = case T of initial -> T; _ -> down end,
+ set_watchdog(S#watchdog{status = ?CHOOSE(initial == T, T, down),
pending = false,
transport = undefined});
@@ -553,22 +605,27 @@ rcv(_, #watchdog{status = okay} = S) ->
%% SUSPECT Receive non-DWA Failback()
%% SetWatchdog() OKAY
-rcv('DWA', #watchdog{status = suspect} = S) ->
+rcv('DWA', #watchdog{status = suspect, config = #config{suspect = OS}} = S) ->
set_watchdog(S#watchdog{status = okay,
+ num_dwa = OS,
pending = false});
-rcv(_, #watchdog{status = suspect} = S) ->
- set_watchdog(S#watchdog{status = okay});
+rcv(_, #watchdog{status = suspect, config = #config{suspect = OS}} = S) ->
+ set_watchdog(S#watchdog{status = okay,
+ num_dwa = OS});
%% REOPEN Receive DWA & Pending = FALSE
%% NumDWA == 2 NumDWA++
%% Failback() OKAY
rcv('DWA', #watchdog{status = reopen,
- num_dwa = 2 = N}
- = S) ->
+ num_dwa = N,
+ config = #config{suspect = OS,
+ okay = RO}}
+ = S)
+ when N+1 == RO ->
S#watchdog{status = okay,
- num_dwa = N+1,
+ num_dwa = OS,
pending = false};
%% REOPEN Receive DWA & Pending = FALSE
@@ -607,9 +664,17 @@ timeout(#watchdog{status = T,
%% Pending SetWatchdog() SUSPECT
timeout(#watchdog{status = okay,
- pending = true}
- = S) ->
- S#watchdog{status = suspect};
+ pending = true,
+ num_dwa = N}
+ = S) ->
+ case N of
+ 1 ->
+ S#watchdog{status = suspect};
+ 0 -> %% non-standard: never move to suspect
+ S;
+ N -> %% non-standard: more timeouts before moving
+ S#watchdog{num_dwa = N-1}
+ end;
%% SUSPECT Timer expires CloseConnection()
%% SetWatchdog() DOWN