diff options
Diffstat (limited to 'lib/diameter/src/base/diameter_watchdog.erl')
-rw-r--r-- | lib/diameter/src/base/diameter_watchdog.erl | 89 |
1 files changed, 65 insertions, 24 deletions
diff --git a/lib/diameter/src/base/diameter_watchdog.erl b/lib/diameter/src/base/diameter_watchdog.erl index 53e659e3f6..eff5096745 100644 --- a/lib/diameter/src/base/diameter_watchdog.erl +++ b/lib/diameter/src/base/diameter_watchdog.erl @@ -49,8 +49,6 @@ -define(IS_NATURAL(N), (is_integer(N) andalso 0 =< N)). --define(CHOOSE(B,T,F), if (B) -> T; true -> F end). - -record(config, {suspect = 1 :: non_neg_integer(), %% OKAY -> SUSPECT okay = 3 :: non_neg_integer()}). %% REOPEN -> OKAY @@ -221,7 +219,6 @@ dict0(_, _, Acc) -> Acc. config_error(T) -> - diameter_lib:error_report(configuration_error, T), exit({shutdown, {configuration_error, T}}). %% handle_call/3 @@ -270,7 +267,7 @@ event(Msg, TPid = tpid(F,T), E = {[TPid | data(Msg, TPid, From, To)], From, To}, send(Pid, {watchdog, self(), E}), - ?LOG(transition, {self(), E}). + ?LOG(transition, {From, To}). data(Msg, TPid, reopen, okay) -> {recv, TPid, 'DWA', _Pkt} = Msg, %% assert @@ -313,14 +310,13 @@ code_change(_, State, _) -> %% The state transitions documented here are extracted from RFC 3539, %% the commentary is ours. -%% Service or watchdog is telling the watchdog of an accepting -%% transport to die after connect_timer expiry or reestablished -%% connection (in another transport process) respectively. -transition(close, #watchdog{status = down}) -> +%% Service is telling the watchdog of an accepting transport to die +%% following transport death in state INITIAL, or after connect_timer +%% expiry; or another watchdog is saying the same after reestablishing +%% a connection previously had by this one. +transition(close, #watchdog{}) -> {{accept, _}, _, _} = getr(restart), %% assert stop; -transition(close, #watchdog{}) -> - ok; %% Service is asking for the peer to be taken down gracefully. transition({shutdown, Pid, _}, #watchdog{parent = Pid, @@ -332,6 +328,11 @@ transition({shutdown = T, Pid, Reason}, #watchdog{parent = Pid, send(TPid, {T, self(), Reason}), S#watchdog{shutdown = true}; +%% Transport is telling us that DPA has been sent in response to DPR: +%% its death should lead to ours. +transition({'DPR', TPid}, #watchdog{transport = TPid} = S) -> + S#watchdog{shutdown = true}; + %% Parent process has died, transition({'DOWN', _, process, Pid, _Reason}, #watchdog{parent = Pid}) -> @@ -403,18 +404,39 @@ transition({open = Key, TPid, _Hosts, T}, %% REOPEN Connection down CloseConnection() %% SetWatchdog() DOWN +%% Transport has died after DPA or service requested termination ... transition({'DOWN', _, process, TPid, _Reason}, #watchdog{transport = TPid, shutdown = true}) -> stop; +%% ... or not. transition({'DOWN', _, process, TPid, _Reason}, #watchdog{transport = TPid, - status = T} - = S) -> - set_watchdog(S#watchdog{status = ?CHOOSE(initial == T, T, down), - pending = false, - transport = undefined}); + status = T, + restrict = {_,R}} + = S0) -> + S = S0#watchdog{pending = false, + transport = undefined}, + {{M,_}, _, _} = getr(restart), + + %% Close an accepting watchdog immediately if there's no + %% restriction on the number of connections to the same peer: the + %% state machine never enters state REOPEN in this case. The + %% 'close' message (instead of stop) is so as not to bypass the + %% sending of messages to the service process in handle_info/2. + + if T /= initial, M == accept, not R -> + send(self(), close), + S#watchdog{status = down}; + T /= initial -> + set_watchdog(S#watchdog{status = down}); + M == connect -> + set_watchdog(S); + M == accept -> + send(self(), close), + S + end; %% Incoming message. transition({recv, TPid, Name, Pkt}, #watchdog{transport = TPid} = S) -> @@ -454,9 +476,7 @@ encode(dwr = M, Dict0, Mask) -> hop_by_hop_id = Seq}, Pkt = #diameter_packet{header = Hdr, msg = Msg}, - #diameter_packet{bin = Bin} = diameter_codec:encode(Dict0, Pkt), - Bin; - + diameter_codec:encode(Dict0, Pkt); encode(dwa, Dict0, #diameter_packet{header = H, transport_data = TD} = ReqPkt) -> @@ -525,10 +545,14 @@ send_watchdog(#watchdog{pending = false, dictionary = Dict0, sequence = Mask} = S) -> - send(TPid, {send, encode(dwr, Dict0, Mask)}), + #diameter_packet{bin = Bin} = EPkt = encode(dwr, Dict0, Mask), + diameter_traffic:incr(send, EPkt, TPid, Dict0), + send(TPid, {send, Bin}), ?LOG(send, 'DWR'), S#watchdog{pending = true}. +%% Dont' count encode errors since we don't expect any on DWR/DWA. + %% recv/3 recv(Name, Pkt, S) -> @@ -545,13 +569,29 @@ recv(Name, Pkt, S) -> rcv('DWR', Pkt, #watchdog{transport = TPid, dictionary = Dict0}) -> - send(TPid, {send, encode(dwa, Dict0, Pkt)}), + ?LOG(recv, 'DWR'), + DPkt = diameter_codec:decode(Dict0, Pkt), + diameter_traffic:incr(recv, DPkt, TPid, Dict0), + diameter_traffic:incr_error(recv, DPkt, TPid, Dict0), + EPkt = encode(dwa, Dict0, Pkt), + diameter_traffic:incr(send, EPkt, TPid, Dict0), + diameter_traffic:incr_rc(send, EPkt, TPid, Dict0), + + send(TPid, {send, EPkt}), ?LOG(send, 'DWA'); +rcv('DWA', Pkt, #watchdog{transport = TPid, + dictionary = Dict0}) -> + ?LOG(recv, 'DWA'), + diameter_traffic:incr(recv, Pkt, TPid, Dict0), + diameter_traffic:incr_rc(recv, + diameter_codec:decode(Dict0, Pkt), + TPid, + Dict0); + rcv(N, _, _) when N == 'CER'; N == 'CEA'; - N == 'DWA'; N == 'DPR'; N == 'DPA' -> false; @@ -740,7 +780,7 @@ timeout(#watchdog{status = T} = S) restart(#watchdog{transport = undefined} = S) -> restart(getr(restart), S); -restart(S) -> +restart(S) -> %% reconnect has won race with timeout S. %% restart/2 @@ -770,9 +810,10 @@ restart({{connect, _} = T, Opts, Svc}, %% die. Note that a state machine never enters state REOPEN in this %% case. restart({{accept, _}, _, _}, #watchdog{restrict = {_, false}}) -> - stop; + stop; %% 'DOWN' was in old code: 'close' was not sent -%% Otherwise hang around until told to die. +%% Otherwise hang around until told to die, either by the service or +%% by another watchdog. restart({{accept, _}, _, _}, S) -> S. |