aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAnders Svensson <anders@erlang.org>2014-05-25 09:28:44 +0200
committerAnders Svensson <anders@erlang.org>2014-05-25 09:28:44 +0200
commitd9ca987b0440662816589895bb5adf6f27322a25 (patch)
tree941ce8a088359dd8f279322ac20bf5564a46ceb4
parenta9ee1f31d669143303f38b3f8d4568fad568d841 (diff)
parent53aa56b26eeb97c99bd125b53121f5a3f4767949 (diff)
downloadotp-d9ca987b0440662816589895bb5adf6f27322a25.tar.gz
otp-d9ca987b0440662816589895bb5adf6f27322a25.tar.bz2
otp-d9ca987b0440662816589895bb5adf6f27322a25.zip
Merge branch 'anders/diameter/watchdog_leak/OTP-11934' into maint
* anders/diameter/watchdog_leak/OTP-11934: Simplify sending of 'close' to watchdog Fix watchdog table leak
-rw-r--r--lib/diameter/src/base/diameter_service.erl22
-rw-r--r--lib/diameter/src/base/diameter_watchdog.erl51
2 files changed, 41 insertions, 32 deletions
diff --git a/lib/diameter/src/base/diameter_service.erl b/lib/diameter/src/base/diameter_service.erl
index 0faf52c1ec..0dc3eb7123 100644
--- a/lib/diameter/src/base/diameter_service.erl
+++ b/lib/diameter/src/base/diameter_service.erl
@@ -881,7 +881,7 @@ watchdog(TPid, [], ?WD_OKAY, ?WD_SUSPECT = To, Wd, State) ->
%% Watchdog has lost its connection.
watchdog(TPid, [], _, ?WD_DOWN = To, Wd, #state{peerT = PeerT} = S) ->
- close(Wd, S),
+ close(Wd),
watchdog_down(Wd, To, S),
ets:delete(PeerT, TPid);
@@ -1199,26 +1199,16 @@ tc(false = No, _, _) -> %% removed
%% another watchdog to be able to detect that it should transition
%% from initial into reopen rather than okay. That someone is either
%% the accepting watchdog upon reception of a CER from the previously
-%% connected peer, or us after connect_timer timeout.
+%% connected peer, or us after connect_timer timeout or immediately.
-close(#watchdog{type = connect}, _) ->
+close(#watchdog{type = connect}) ->
ok;
+
close(#watchdog{type = accept,
pid = Pid,
- ref = Ref,
- options = Opts},
- #state{service_name = SvcName}) ->
- c(Pid, diameter_config:have_transport(SvcName, Ref), Opts).
-
-%% Tell watchdog to (maybe) die later ...
-c(Pid, true, Opts) ->
+ options = Opts}) ->
Tc = connect_timer(Opts, 2*?DEFAULT_TC),
- erlang:send_after(Tc, Pid, close);
-
-%% ... or now.
-c(Pid, false, _Opts) ->
- Pid ! close.
-
+ erlang:send_after(Tc, Pid, close).
%% The RFC's only document the behaviour of Tc, our connect_timer,
%% for the establishment of connections but we also give
%% connect_timer semantics for a listener, being the time within
diff --git a/lib/diameter/src/base/diameter_watchdog.erl b/lib/diameter/src/base/diameter_watchdog.erl
index 53e659e3f6..017a520467 100644
--- a/lib/diameter/src/base/diameter_watchdog.erl
+++ b/lib/diameter/src/base/diameter_watchdog.erl
@@ -49,8 +49,6 @@
-define(IS_NATURAL(N), (is_integer(N) andalso 0 =< N)).
--define(CHOOSE(B,T,F), if (B) -> T; true -> F end).
-
-record(config,
{suspect = 1 :: non_neg_integer(), %% OKAY -> SUSPECT
okay = 3 :: non_neg_integer()}). %% REOPEN -> OKAY
@@ -313,14 +311,13 @@ code_change(_, State, _) ->
%% The state transitions documented here are extracted from RFC 3539,
%% the commentary is ours.
-%% Service or watchdog is telling the watchdog of an accepting
-%% transport to die after connect_timer expiry or reestablished
-%% connection (in another transport process) respectively.
-transition(close, #watchdog{status = down}) ->
+%% Service is telling the watchdog of an accepting transport to die
+%% following transport death in state INITIAL, or after connect_timer
+%% expiry; or another watchdog is saying the same after reestablishing
+%% a connection previously had by this one.
+transition(close, #watchdog{}) ->
{{accept, _}, _, _} = getr(restart), %% assert
stop;
-transition(close, #watchdog{}) ->
- ok;
%% Service is asking for the peer to be taken down gracefully.
transition({shutdown, Pid, _}, #watchdog{parent = Pid,
@@ -403,18 +400,39 @@ transition({open = Key, TPid, _Hosts, T},
%% REOPEN Connection down CloseConnection()
%% SetWatchdog() DOWN
+%% Transport has died after service requested termination ...
transition({'DOWN', _, process, TPid, _Reason},
#watchdog{transport = TPid,
shutdown = true}) ->
stop;
+%% ... or not.
transition({'DOWN', _, process, TPid, _Reason},
#watchdog{transport = TPid,
- status = T}
- = S) ->
- set_watchdog(S#watchdog{status = ?CHOOSE(initial == T, T, down),
- pending = false,
- transport = undefined});
+ status = T,
+ restrict = {_,R}}
+ = S0) ->
+ S = S0#watchdog{pending = false,
+ transport = undefined},
+ {{M,_}, _, _} = getr(restart),
+
+ %% Close an accepting watchdog immediately if there's no
+ %% restriction on the number of connections to the same peer: the
+ %% state machine never enters state REOPEN in this case. The
+ %% 'close' message (instead of stop) is so as not to bypass the
+ %% sending of messages to the service process in handle_info/2.
+
+ if T /= initial, M == accept, not R ->
+ send(self(), close),
+ S#watchdog{status = down};
+ T /= initial ->
+ set_watchdog(S#watchdog{status = down});
+ M == connect ->
+ set_watchdog(S);
+ M == accept ->
+ send(self(), close),
+ S
+ end;
%% Incoming message.
transition({recv, TPid, Name, Pkt}, #watchdog{transport = TPid} = S) ->
@@ -740,7 +758,7 @@ timeout(#watchdog{status = T} = S)
restart(#watchdog{transport = undefined} = S) ->
restart(getr(restart), S);
-restart(S) ->
+restart(S) -> %% reconnect has won race with timeout
S.
%% restart/2
@@ -770,9 +788,10 @@ restart({{connect, _} = T, Opts, Svc},
%% die. Note that a state machine never enters state REOPEN in this
%% case.
restart({{accept, _}, _, _}, #watchdog{restrict = {_, false}}) ->
- stop;
+ stop; %% 'DOWN' was in old code: 'close' was not sent
-%% Otherwise hang around until told to die.
+%% Otherwise hang around until told to die, either by the service or
+%% by another watchdog.
restart({{accept, _}, _, _}, S) ->
S.