aboutsummaryrefslogtreecommitdiffstats
path: root/lib/diameter/src/base/diameter_watchdog.erl
diff options
context:
space:
mode:
Diffstat (limited to 'lib/diameter/src/base/diameter_watchdog.erl')
-rw-r--r--lib/diameter/src/base/diameter_watchdog.erl206
1 files changed, 88 insertions, 118 deletions
diff --git a/lib/diameter/src/base/diameter_watchdog.erl b/lib/diameter/src/base/diameter_watchdog.erl
index a5429c967c..10ab246b88 100644
--- a/lib/diameter/src/base/diameter_watchdog.erl
+++ b/lib/diameter/src/base/diameter_watchdog.erl
@@ -62,11 +62,13 @@
restrict :: {diameter:restriction(), boolean()},
shutdown = false :: boolean()}).
+%% ---------------------------------------------------------------------------
%% start/2
%%
%% Start a monitor before the watchdog is allowed to proceed to ensure
%% that a failed capabilities exchange produces the desired exit
%% reason.
+%% ---------------------------------------------------------------------------
-spec start(Type, {RecvData, [Opt], SvcName, SvcOpts, #diameter_service{}})
-> {reference(), pid()}
@@ -77,12 +79,12 @@
SvcName :: diameter:service_name().
start({_,_} = Type, T) ->
- Ref = make_ref(),
- {ok, Pid} = diameter_watchdog_sup:start_child({Ref, {Type, self(), T}}),
+ Ack = make_ref(),
+ {ok, Pid} = diameter_watchdog_sup:start_child({Ack, Type, self(), T}),
try
{erlang:monitor(process, Pid), Pid}
after
- Pid ! Ref
+ send(Pid, Ack)
end.
start_link(T) ->
@@ -101,22 +103,15 @@ init(T) ->
proc_lib:init_ack({ok, self()}),
gen_server:enter_loop(?MODULE, [], i(T)).
-i({Ref, {_, Pid, _} = T}) ->
- MRef = erlang:monitor(process, Pid),
- receive
- Ref ->
- make_state(T);
- {'DOWN', MRef, process, _, _} = D ->
- exit({shutdown, D})
- end.
-
-make_state({T, Pid, {RecvData,
- Opts,
- SvcName,
- SvcOpts,
- #diameter_service{applications = Apps,
- capabilities = Caps}
- = Svc}}) ->
+i({Ack, T, Pid, {RecvData,
+ Opts,
+ SvcName,
+ SvcOpts,
+ #diameter_service{applications = Apps,
+ capabilities = Caps}
+ = Svc}}) ->
+ erlang:monitor(process, Pid),
+ wait(Ack, Pid),
random:seed(now()),
putr(restart, {T, Opts, Svc}), %% save seeing it in trace
putr(dwr, dwr(Caps)), %%
@@ -124,9 +119,7 @@ make_state({T, Pid, {RecvData,
Restrict = proplists:get_value(restrict_connections, SvcOpts),
Nodes = restrict_nodes(Restrict),
#watchdog{parent = Pid,
- transport = monitor(diameter_peer_fsm:start(T,
- Opts,
- {Mask, Nodes, Svc})),
+ transport = start(T, Opts, Mask, Nodes, Svc),
tw = proplists:get_value(watchdog_timer,
Opts,
?DEFAULT_TW_INIT),
@@ -134,6 +127,21 @@ make_state({T, Pid, {RecvData,
sequence = Mask,
restrict = {Restrict, lists:member(node(), Nodes)}}.
+wait(Ref, Pid) ->
+ receive
+ Ref ->
+ ok;
+ {'DOWN', _, process, Pid, _} = D ->
+ exit({shutdown, D})
+ end.
+
+%% start/5
+
+start(T, Opts, Mask, Nodes, Svc) ->
+ {_MRef, Pid}
+ = diameter_peer_fsm:start(T, Opts, {Mask, Nodes, Svc}),
+ Pid.
+
%% handle_call/3
handle_call(_, _, State) ->
@@ -151,41 +159,59 @@ handle_info(T, #watchdog{} = State) ->
ok ->
{noreply, State};
#watchdog{} = S ->
- event(State, S),
+ close(T, State), %% service expects 'close' message
+ event(T, State, S), %% before 'watchdog'
{noreply, S};
stop ->
?LOG(stop, T),
- event(State, State#watchdog{status = down}),
+ event(T, State, State#watchdog{status = down}),
{stop, {shutdown, T}, State}
- end;
+ end.
-handle_info(T, S) ->
- handle_info(T, upgrade(S)).
+close({'DOWN', _, process, TPid, {shutdown, Reason}},
+ #watchdog{transport = TPid,
+ parent = Pid}) ->
+ send(Pid, {close, self(), Reason});
-upgrade(S) ->
- #watchdog{} = list_to_tuple(tuple_to_list(S)
- ++ [?NOMASK, {nodes, true}, false]).
+close(_, _) ->
+ ok.
-event(#watchdog{status = T}, #watchdog{status = T}) ->
+event(_, #watchdog{status = T}, #watchdog{status = T}) ->
ok;
-event(#watchdog{transport = undefined}, #watchdog{transport = undefined}) ->
+event(_, #watchdog{transport = undefined}, #watchdog{transport = undefined}) ->
ok;
-event(#watchdog{status = From, transport = F, parent = Pid},
+event(Msg,
+ #watchdog{status = From, transport = F, parent = Pid},
#watchdog{status = To, transport = T}) ->
- E = {tpid(F,T), From, To},
- notify(Pid, E),
+ TPid = tpid(F,T),
+ E = {[TPid | data(Msg, TPid, From, To)], From, To},
+ send(Pid, {watchdog, self(), E}),
?LOG(transition, {self(), E}).
+data(Msg, TPid, reopen, okay) ->
+ {recv, TPid, 'DWA', _Pkt} = Msg, %% assert
+ {TPid, T} = eraser(open),
+ [T];
+
+data({open, TPid, _Hosts, T}, TPid, _From, To)
+ when To == okay;
+ To == reopen ->
+ [T];
+
+data(_, _, _, _) ->
+ [].
+
tpid(_, Pid)
when is_pid(Pid) ->
Pid;
+
tpid(Pid, _) ->
Pid.
-notify(Pid, E) ->
- Pid ! {watchdog, self(), E}.
+send(Pid, T) ->
+ Pid ! T.
%% terminate/2
@@ -215,15 +241,13 @@ transition(close, #watchdog{}) ->
ok;
%% Service is asking for the peer to be taken down gracefully.
-transition({shutdown, Pid}, #watchdog{parent = Pid,
- transport = undefined,
- status = S}) ->
- down = S, %% sanity check
+transition({shutdown, Pid, _}, #watchdog{parent = Pid,
+ transport = undefined}) ->
stop;
-transition({shutdown = T, Pid}, #watchdog{parent = Pid,
- transport = TPid}
- = S) ->
- TPid ! {T, self()},
+transition({shutdown = T, Pid, Reason}, #watchdog{parent = Pid,
+ transport = TPid}
+ = S) ->
+ send(TPid, {T, self(), Reason}),
S#watchdog{shutdown = true};
%% Parent process has died,
@@ -234,13 +258,9 @@ transition({'DOWN', _, process, Pid, _Reason},
%% Transport has accepted a connection.
transition({accepted = T, TPid}, #watchdog{transport = TPid,
parent = Pid}) ->
- Pid ! {T, self(), TPid},
+ send(Pid, {T, self(), TPid}),
ok;
-%% Transport is telling us that its impending death isn't failure.
-transition({close, TPid, _Reason}, #watchdog{transport = TPid}) ->
- stop;
-
%% STATE Event Actions New State
%% ===== ------ ------- ----------
%% INITIAL Connection up SetWatchdog() OKAY
@@ -255,15 +275,13 @@ transition({close, TPid, _Reason}, #watchdog{transport = TPid}) ->
%% know the identity of the peer (ie. now) that we know that we're in
%% state down rather than initial.
-transition({open, TPid, Hosts, T} = Open,
+transition({open, TPid, Hosts, _} = Open,
#watchdog{transport = TPid,
status = initial,
- parent = Pid,
restrict = {_, R}}
= S) ->
case okay(getr(restart), Hosts, R) of
okay ->
- open(Pid, {TPid, T}),
set_watchdog(S#watchdog{status = okay});
reopen ->
transition(Open, S#watchdog{status = down})
@@ -274,17 +292,15 @@ transition({open, TPid, Hosts, T} = Open,
%% SetWatchdog()
%% Pending = TRUE REOPEN
-transition({open = P, TPid, _Hosts, T},
+transition({open = Key, TPid, _Hosts, T},
#watchdog{transport = TPid,
- parent = Pid,
status = down}
= S) ->
%% Store the info we need to notify the parent to reopen the
%% connection after the requisite DWA's are received, at which
%% time we eraser(open). The reopen message is a later addition,
%% to communicate the new capabilities as soon as they're known.
- putr(P, {TPid, T}),
- Pid ! {reopen, self(), {TPid, T}},
+ putr(Key, {TPid, T}),
set_watchdog(send_watchdog(S#watchdog{status = reopen,
num_dwa = 0}));
@@ -296,26 +312,18 @@ transition({open = P, TPid, _Hosts, T},
%% REOPEN Connection down CloseConnection()
%% SetWatchdog() DOWN
-transition({'DOWN', _, process, TPid, _},
+transition({'DOWN', _, process, TPid, _Reason},
#watchdog{transport = TPid,
- status = S,
- shutdown = D})
- when S == initial;
- D ->
+ shutdown = true}) ->
stop;
-transition({'DOWN', _, process, TPid, _},
- #watchdog{transport = TPid}
+transition({'DOWN', _, process, TPid, _Reason},
+ #watchdog{transport = TPid,
+ status = T}
= S) ->
- failover(S),
- close(S),
- set_watchdog(S#watchdog{status = down,
+ set_watchdog(S#watchdog{status = case T of initial -> T; _ -> down end,
pending = false,
transport = undefined});
-%% Any outstanding pending (or other messages from the transport) will
-%% have arrived before 'DOWN' since the message comes from the same
-%% process. Note that we could also get this message in the initial
-%% state.
%% Incoming message.
transition({recv, TPid, Name, Pkt}, #watchdog{transport = TPid} = S) ->
@@ -331,15 +339,11 @@ transition({timeout, _, tw}, #watchdog{}) ->
%% State query.
transition({state, Pid}, #watchdog{status = S}) ->
- Pid ! {self(), S},
+ send(Pid, {self(), S}),
ok.
%% ===========================================================================
-monitor(Pid) ->
- erlang:monitor(process, Pid),
- Pid.
-
putr(Key, Val) ->
put({?MODULE, Key}, Val).
@@ -386,7 +390,7 @@ okay([{_,P}]) ->
%% ... or it has.
okay(C) ->
- [_|_] = [P ! close || {_,P} <- C, self() /= P],
+ [_|_] = [send(P, close) || {_,P} <- C, self() /= P],
reopen.
%% set_watchdog/1
@@ -408,36 +412,13 @@ tw(T)
tw({M,F,A}) ->
apply(M,F,A).
-%% open/2
-
-open(Pid, {_,_} = T) ->
- Pid ! {connection_up, self(), T}.
-
-%% failover/1
-
-failover(#watchdog{status = okay,
- parent = Pid}) ->
- Pid ! {connection_down, self()};
-
-failover(_) ->
- ok.
-
-%% close/1
-
-close(#watchdog{status = down}) ->
- ok;
-
-close(#watchdog{parent = Pid}) ->
- {{T, _}, _, _} = getr(restart),
- T == accept andalso (Pid ! {close, self()}).
-
%% send_watchdog/1
send_watchdog(#watchdog{pending = false,
transport = TPid,
sequence = Mask}
= S) ->
- TPid ! {send, encode(getr(dwr), Mask)},
+ send(TPid, {send, encode(getr(dwr), Mask)}),
?LOG(send, 'DWR'),
S#watchdog{pending = true}.
@@ -518,12 +499,10 @@ rcv(_, #watchdog{status = okay} = S) ->
%% SetWatchdog() OKAY
rcv('DWA', #watchdog{status = suspect} = S) ->
- failback(S),
set_watchdog(S#watchdog{status = okay,
pending = false});
rcv(_, #watchdog{status = suspect} = S) ->
- failback(S),
set_watchdog(S#watchdog{status = okay});
%% REOPEN Receive DWA & Pending = FALSE
@@ -531,10 +510,8 @@ rcv(_, #watchdog{status = suspect} = S) ->
%% Failback() OKAY
rcv('DWA', #watchdog{status = reopen,
- num_dwa = 2 = N,
- parent = Pid}
+ num_dwa = 2 = N}
= S) ->
- open(Pid, eraser(open)),
S#watchdog{status = okay,
num_dwa = N+1,
pending = false};
@@ -553,11 +530,6 @@ rcv('DWA', #watchdog{status = reopen,
rcv(_, #watchdog{status = reopen} = S) ->
throwaway(S).
-%% failback/1
-
-failback(#watchdog{parent = Pid}) ->
- Pid ! {connection_up, self()}.
-
%% timeout/1
%%
%% The caller sets the watchdog on the return value.
@@ -582,7 +554,6 @@ timeout(#watchdog{status = T,
timeout(#watchdog{status = okay,
pending = true}
= S) ->
- failover(S),
S#watchdog{status = suspect};
%% SUSPECT Timer expires CloseConnection()
@@ -599,7 +570,6 @@ timeout(#watchdog{status = T,
when T == suspect;
T == reopen, P, N < 0 ->
exit(TPid, {shutdown, watchdog_timeout}),
- close(S),
S#watchdog{status = down};
%% REOPEN Timer expires & NumDWA = -1
@@ -633,7 +603,9 @@ timeout(#watchdog{status = reopen,
%% process has died. We only need to handle state down since we start
%% the first watchdog when transitioning out of initial.
-timeout(#watchdog{status = down} = S) ->
+timeout(#watchdog{status = T} = S)
+ when T == initial;
+ T == down ->
restart(S).
%% restart/1
@@ -659,11 +631,9 @@ restart({{connect, _} = T, Opts, Svc}, #watchdog{parent = Pid,
sequence = Mask,
restrict = {R,_}}
= S) ->
- Pid ! {reconnect, self()},
+ send(Pid, {reconnect, self()}),
Nodes = restrict_nodes(R),
- S#watchdog{transport = monitor(diameter_peer_fsm:start(T,
- Opts,
- {Mask, Nodes, Svc})),
+ S#watchdog{transport = start(T, Opts, Mask, Nodes, Svc),
restrict = {R, lists:member(node(), Nodes)}};
%% No restriction on the number of connections to the same peer: just