diff options
author | Anders Svensson <[email protected]> | 2013-02-12 12:51:56 +0100 |
---|---|---|
committer | Anders Svensson <[email protected]> | 2013-02-12 12:51:56 +0100 |
commit | 1e7b3534eeea4761bb45ff31bcdaf57cd66a7cb7 (patch) | |
tree | 0bb4c01bc6c303154814848e4a52b07a16f161bd /lib/diameter/src/base/diameter_watchdog.erl | |
parent | 5bb22560c350baa834196835640a5aab21e97f8a (diff) | |
parent | 551bdf0da65bcb9838ddc613e0507e2871dcd5dc (diff) | |
download | otp-1e7b3534eeea4761bb45ff31bcdaf57cd66a7cb7.tar.gz otp-1e7b3534eeea4761bb45ff31bcdaf57cd66a7cb7.tar.bz2 otp-1e7b3534eeea4761bb45ff31bcdaf57cd66a7cb7.zip |
Merge branch 'anders/diameter/reopen/OTP-10692'
* anders/diameter/reopen/OTP-10692:
Fix testcases in event suite
Fix faulty watchdog transition INITIAL -> DOWN
Fix faulty watchdog transition DOWN -> INITIAL
Comment fixes
Rename records peer/conn -> watchdog/peer in diameter_service
Add testcases to traffic suite
Simplify watchdog transitions in service process
Simplify transport shutdown
Remove upgrade code not needed after application restart
Tweak (make) silent rules support
Diffstat (limited to 'lib/diameter/src/base/diameter_watchdog.erl')
-rw-r--r-- | lib/diameter/src/base/diameter_watchdog.erl | 206 |
1 files changed, 88 insertions, 118 deletions
diff --git a/lib/diameter/src/base/diameter_watchdog.erl b/lib/diameter/src/base/diameter_watchdog.erl index a5429c967c..10ab246b88 100644 --- a/lib/diameter/src/base/diameter_watchdog.erl +++ b/lib/diameter/src/base/diameter_watchdog.erl @@ -62,11 +62,13 @@ restrict :: {diameter:restriction(), boolean()}, shutdown = false :: boolean()}). +%% --------------------------------------------------------------------------- %% start/2 %% %% Start a monitor before the watchdog is allowed to proceed to ensure %% that a failed capabilities exchange produces the desired exit %% reason. +%% --------------------------------------------------------------------------- -spec start(Type, {RecvData, [Opt], SvcName, SvcOpts, #diameter_service{}}) -> {reference(), pid()} @@ -77,12 +79,12 @@ SvcName :: diameter:service_name(). start({_,_} = Type, T) -> - Ref = make_ref(), - {ok, Pid} = diameter_watchdog_sup:start_child({Ref, {Type, self(), T}}), + Ack = make_ref(), + {ok, Pid} = diameter_watchdog_sup:start_child({Ack, Type, self(), T}), try {erlang:monitor(process, Pid), Pid} after - Pid ! Ref + send(Pid, Ack) end. start_link(T) -> @@ -101,22 +103,15 @@ init(T) -> proc_lib:init_ack({ok, self()}), gen_server:enter_loop(?MODULE, [], i(T)). -i({Ref, {_, Pid, _} = T}) -> - MRef = erlang:monitor(process, Pid), - receive - Ref -> - make_state(T); - {'DOWN', MRef, process, _, _} = D -> - exit({shutdown, D}) - end. - -make_state({T, Pid, {RecvData, - Opts, - SvcName, - SvcOpts, - #diameter_service{applications = Apps, - capabilities = Caps} - = Svc}}) -> +i({Ack, T, Pid, {RecvData, + Opts, + SvcName, + SvcOpts, + #diameter_service{applications = Apps, + capabilities = Caps} + = Svc}}) -> + erlang:monitor(process, Pid), + wait(Ack, Pid), random:seed(now()), putr(restart, {T, Opts, Svc}), %% save seeing it in trace putr(dwr, dwr(Caps)), %% @@ -124,9 +119,7 @@ make_state({T, Pid, {RecvData, Restrict = proplists:get_value(restrict_connections, SvcOpts), Nodes = restrict_nodes(Restrict), #watchdog{parent = Pid, - transport = monitor(diameter_peer_fsm:start(T, - Opts, - {Mask, Nodes, Svc})), + transport = start(T, Opts, Mask, Nodes, Svc), tw = proplists:get_value(watchdog_timer, Opts, ?DEFAULT_TW_INIT), @@ -134,6 +127,21 @@ make_state({T, Pid, {RecvData, sequence = Mask, restrict = {Restrict, lists:member(node(), Nodes)}}. +wait(Ref, Pid) -> + receive + Ref -> + ok; + {'DOWN', _, process, Pid, _} = D -> + exit({shutdown, D}) + end. + +%% start/5 + +start(T, Opts, Mask, Nodes, Svc) -> + {_MRef, Pid} + = diameter_peer_fsm:start(T, Opts, {Mask, Nodes, Svc}), + Pid. + %% handle_call/3 handle_call(_, _, State) -> @@ -151,41 +159,59 @@ handle_info(T, #watchdog{} = State) -> ok -> {noreply, State}; #watchdog{} = S -> - event(State, S), + close(T, State), %% service expects 'close' message + event(T, State, S), %% before 'watchdog' {noreply, S}; stop -> ?LOG(stop, T), - event(State, State#watchdog{status = down}), + event(T, State, State#watchdog{status = down}), {stop, {shutdown, T}, State} - end; + end. -handle_info(T, S) -> - handle_info(T, upgrade(S)). +close({'DOWN', _, process, TPid, {shutdown, Reason}}, + #watchdog{transport = TPid, + parent = Pid}) -> + send(Pid, {close, self(), Reason}); -upgrade(S) -> - #watchdog{} = list_to_tuple(tuple_to_list(S) - ++ [?NOMASK, {nodes, true}, false]). +close(_, _) -> + ok. -event(#watchdog{status = T}, #watchdog{status = T}) -> +event(_, #watchdog{status = T}, #watchdog{status = T}) -> ok; -event(#watchdog{transport = undefined}, #watchdog{transport = undefined}) -> +event(_, #watchdog{transport = undefined}, #watchdog{transport = undefined}) -> ok; -event(#watchdog{status = From, transport = F, parent = Pid}, +event(Msg, + #watchdog{status = From, transport = F, parent = Pid}, #watchdog{status = To, transport = T}) -> - E = {tpid(F,T), From, To}, - notify(Pid, E), + TPid = tpid(F,T), + E = {[TPid | data(Msg, TPid, From, To)], From, To}, + send(Pid, {watchdog, self(), E}), ?LOG(transition, {self(), E}). +data(Msg, TPid, reopen, okay) -> + {recv, TPid, 'DWA', _Pkt} = Msg, %% assert + {TPid, T} = eraser(open), + [T]; + +data({open, TPid, _Hosts, T}, TPid, _From, To) + when To == okay; + To == reopen -> + [T]; + +data(_, _, _, _) -> + []. + tpid(_, Pid) when is_pid(Pid) -> Pid; + tpid(Pid, _) -> Pid. -notify(Pid, E) -> - Pid ! {watchdog, self(), E}. +send(Pid, T) -> + Pid ! T. %% terminate/2 @@ -215,15 +241,13 @@ transition(close, #watchdog{}) -> ok; %% Service is asking for the peer to be taken down gracefully. -transition({shutdown, Pid}, #watchdog{parent = Pid, - transport = undefined, - status = S}) -> - down = S, %% sanity check +transition({shutdown, Pid, _}, #watchdog{parent = Pid, + transport = undefined}) -> stop; -transition({shutdown = T, Pid}, #watchdog{parent = Pid, - transport = TPid} - = S) -> - TPid ! {T, self()}, +transition({shutdown = T, Pid, Reason}, #watchdog{parent = Pid, + transport = TPid} + = S) -> + send(TPid, {T, self(), Reason}), S#watchdog{shutdown = true}; %% Parent process has died, @@ -234,13 +258,9 @@ transition({'DOWN', _, process, Pid, _Reason}, %% Transport has accepted a connection. transition({accepted = T, TPid}, #watchdog{transport = TPid, parent = Pid}) -> - Pid ! {T, self(), TPid}, + send(Pid, {T, self(), TPid}), ok; -%% Transport is telling us that its impending death isn't failure. -transition({close, TPid, _Reason}, #watchdog{transport = TPid}) -> - stop; - %% STATE Event Actions New State %% ===== ------ ------- ---------- %% INITIAL Connection up SetWatchdog() OKAY @@ -255,15 +275,13 @@ transition({close, TPid, _Reason}, #watchdog{transport = TPid}) -> %% know the identity of the peer (ie. now) that we know that we're in %% state down rather than initial. -transition({open, TPid, Hosts, T} = Open, +transition({open, TPid, Hosts, _} = Open, #watchdog{transport = TPid, status = initial, - parent = Pid, restrict = {_, R}} = S) -> case okay(getr(restart), Hosts, R) of okay -> - open(Pid, {TPid, T}), set_watchdog(S#watchdog{status = okay}); reopen -> transition(Open, S#watchdog{status = down}) @@ -274,17 +292,15 @@ transition({open, TPid, Hosts, T} = Open, %% SetWatchdog() %% Pending = TRUE REOPEN -transition({open = P, TPid, _Hosts, T}, +transition({open = Key, TPid, _Hosts, T}, #watchdog{transport = TPid, - parent = Pid, status = down} = S) -> %% Store the info we need to notify the parent to reopen the %% connection after the requisite DWA's are received, at which %% time we eraser(open). The reopen message is a later addition, %% to communicate the new capabilities as soon as they're known. - putr(P, {TPid, T}), - Pid ! {reopen, self(), {TPid, T}}, + putr(Key, {TPid, T}), set_watchdog(send_watchdog(S#watchdog{status = reopen, num_dwa = 0})); @@ -296,26 +312,18 @@ transition({open = P, TPid, _Hosts, T}, %% REOPEN Connection down CloseConnection() %% SetWatchdog() DOWN -transition({'DOWN', _, process, TPid, _}, +transition({'DOWN', _, process, TPid, _Reason}, #watchdog{transport = TPid, - status = S, - shutdown = D}) - when S == initial; - D -> + shutdown = true}) -> stop; -transition({'DOWN', _, process, TPid, _}, - #watchdog{transport = TPid} +transition({'DOWN', _, process, TPid, _Reason}, + #watchdog{transport = TPid, + status = T} = S) -> - failover(S), - close(S), - set_watchdog(S#watchdog{status = down, + set_watchdog(S#watchdog{status = case T of initial -> T; _ -> down end, pending = false, transport = undefined}); -%% Any outstanding pending (or other messages from the transport) will -%% have arrived before 'DOWN' since the message comes from the same -%% process. Note that we could also get this message in the initial -%% state. %% Incoming message. transition({recv, TPid, Name, Pkt}, #watchdog{transport = TPid} = S) -> @@ -331,15 +339,11 @@ transition({timeout, _, tw}, #watchdog{}) -> %% State query. transition({state, Pid}, #watchdog{status = S}) -> - Pid ! {self(), S}, + send(Pid, {self(), S}), ok. %% =========================================================================== -monitor(Pid) -> - erlang:monitor(process, Pid), - Pid. - putr(Key, Val) -> put({?MODULE, Key}, Val). @@ -386,7 +390,7 @@ okay([{_,P}]) -> %% ... or it has. okay(C) -> - [_|_] = [P ! close || {_,P} <- C, self() /= P], + [_|_] = [send(P, close) || {_,P} <- C, self() /= P], reopen. %% set_watchdog/1 @@ -408,36 +412,13 @@ tw(T) tw({M,F,A}) -> apply(M,F,A). -%% open/2 - -open(Pid, {_,_} = T) -> - Pid ! {connection_up, self(), T}. - -%% failover/1 - -failover(#watchdog{status = okay, - parent = Pid}) -> - Pid ! {connection_down, self()}; - -failover(_) -> - ok. - -%% close/1 - -close(#watchdog{status = down}) -> - ok; - -close(#watchdog{parent = Pid}) -> - {{T, _}, _, _} = getr(restart), - T == accept andalso (Pid ! {close, self()}). - %% send_watchdog/1 send_watchdog(#watchdog{pending = false, transport = TPid, sequence = Mask} = S) -> - TPid ! {send, encode(getr(dwr), Mask)}, + send(TPid, {send, encode(getr(dwr), Mask)}), ?LOG(send, 'DWR'), S#watchdog{pending = true}. @@ -518,12 +499,10 @@ rcv(_, #watchdog{status = okay} = S) -> %% SetWatchdog() OKAY rcv('DWA', #watchdog{status = suspect} = S) -> - failback(S), set_watchdog(S#watchdog{status = okay, pending = false}); rcv(_, #watchdog{status = suspect} = S) -> - failback(S), set_watchdog(S#watchdog{status = okay}); %% REOPEN Receive DWA & Pending = FALSE @@ -531,10 +510,8 @@ rcv(_, #watchdog{status = suspect} = S) -> %% Failback() OKAY rcv('DWA', #watchdog{status = reopen, - num_dwa = 2 = N, - parent = Pid} + num_dwa = 2 = N} = S) -> - open(Pid, eraser(open)), S#watchdog{status = okay, num_dwa = N+1, pending = false}; @@ -553,11 +530,6 @@ rcv('DWA', #watchdog{status = reopen, rcv(_, #watchdog{status = reopen} = S) -> throwaway(S). -%% failback/1 - -failback(#watchdog{parent = Pid}) -> - Pid ! {connection_up, self()}. - %% timeout/1 %% %% The caller sets the watchdog on the return value. @@ -582,7 +554,6 @@ timeout(#watchdog{status = T, timeout(#watchdog{status = okay, pending = true} = S) -> - failover(S), S#watchdog{status = suspect}; %% SUSPECT Timer expires CloseConnection() @@ -599,7 +570,6 @@ timeout(#watchdog{status = T, when T == suspect; T == reopen, P, N < 0 -> exit(TPid, {shutdown, watchdog_timeout}), - close(S), S#watchdog{status = down}; %% REOPEN Timer expires & NumDWA = -1 @@ -633,7 +603,9 @@ timeout(#watchdog{status = reopen, %% process has died. We only need to handle state down since we start %% the first watchdog when transitioning out of initial. -timeout(#watchdog{status = down} = S) -> +timeout(#watchdog{status = T} = S) + when T == initial; + T == down -> restart(S). %% restart/1 @@ -659,11 +631,9 @@ restart({{connect, _} = T, Opts, Svc}, #watchdog{parent = Pid, sequence = Mask, restrict = {R,_}} = S) -> - Pid ! {reconnect, self()}, + send(Pid, {reconnect, self()}), Nodes = restrict_nodes(R), - S#watchdog{transport = monitor(diameter_peer_fsm:start(T, - Opts, - {Mask, Nodes, Svc})), + S#watchdog{transport = start(T, Opts, Mask, Nodes, Svc), restrict = {R, lists:member(node(), Nodes)}}; %% No restriction on the number of connections to the same peer: just |