diff options
author | Anders Svensson <[email protected]> | 2014-05-19 09:10:00 +0200 |
---|---|---|
committer | Anders Svensson <[email protected]> | 2014-05-20 02:43:03 +0200 |
commit | 6d087921e7608691ed5cd554484a13b961d3620f (patch) | |
tree | c2000cd0d34479bbd56584bb91098ade1fda5ed2 /lib/diameter/src | |
parent | 25237481ccccd3ddfa74582dc267632ad618ba30 (diff) | |
download | otp-6d087921e7608691ed5cd554484a13b961d3620f.tar.gz otp-6d087921e7608691ed5cd554484a13b961d3620f.tar.bz2 otp-6d087921e7608691ed5cd554484a13b961d3620f.zip |
Fix watchdog table leak
Commit ef5fddcb (diameter-1.4.1, R16B) caused the leak in the case of an
accepting watchdog with restrict_connections = false. It (correctly)
ensured the state remained at INITIAL but a subsequent 'close' message
to terminate the process was ignored since the state was not DOWN. In
fact, no 'close' was sent since there was no state transition or
previous connection: the former triggers the message from
diameter_service, the latter from diameter_watchdog. The message is now
sent to self() from the watchdog itself.
Send 'close' in the same way when multiple connections to the same peer
are allowed, to avoid waiting for a watchdog timer expiry for the
process to terminate in this case.
Diffstat (limited to 'lib/diameter/src')
-rw-r--r-- | lib/diameter/src/base/diameter_service.erl | 6 | ||||
-rw-r--r-- | lib/diameter/src/base/diameter_watchdog.erl | 51 |
2 files changed, 37 insertions, 20 deletions
diff --git a/lib/diameter/src/base/diameter_service.erl b/lib/diameter/src/base/diameter_service.erl index 70e66537ed..4b2e5b674c 100644 --- a/lib/diameter/src/base/diameter_service.erl +++ b/lib/diameter/src/base/diameter_service.erl @@ -1,7 +1,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 2010-2013. All Rights Reserved. +%% Copyright Ericsson AB 2010-2014. All Rights Reserved. %% %% The contents of this file are subject to the Erlang Public License, %% Version 1.1, (the "License"); you may not use this file except in @@ -1188,7 +1188,7 @@ tc(false = No, _, _) -> %% removed %% another watchdog to be able to detect that it should transition %% from initial into reopen rather than okay. That someone is either %% the accepting watchdog upon reception of a CER from the previously -%% connected peer, or us after connect_timer timeout. +%% connected peer, or us after connect_timer timeout or immediately. close(#watchdog{type = connect}, _) -> ok; @@ -1199,7 +1199,7 @@ close(#watchdog{type = accept, #state{service_name = SvcName}) -> c(Pid, diameter_config:have_transport(SvcName, Ref), Opts). -%% Tell watchdog to (maybe) die later ... +%% Tell watchdog to die later ... c(Pid, true, Opts) -> Tc = connect_timer(Opts, 2*?DEFAULT_TC), erlang:send_after(Tc, Pid, close); diff --git a/lib/diameter/src/base/diameter_watchdog.erl b/lib/diameter/src/base/diameter_watchdog.erl index 9a1c8b6585..62dc32af69 100644 --- a/lib/diameter/src/base/diameter_watchdog.erl +++ b/lib/diameter/src/base/diameter_watchdog.erl @@ -1,7 +1,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 2010-2013. All Rights Reserved. +%% Copyright Ericsson AB 2010-2014. All Rights Reserved. %% %% The contents of this file are subject to the Erlang Public License, %% Version 1.1, (the "License"); you may not use this file except in @@ -49,8 +49,6 @@ -define(IS_NATURAL(N), (is_integer(N) andalso 0 =< N)). --define(CHOOSE(B,T,F), if (B) -> T; true -> F end). - -record(config, {suspect = 1 :: non_neg_integer(), %% OKAY -> SUSPECT okay = 3 :: non_neg_integer()}). %% REOPEN -> OKAY @@ -328,14 +326,13 @@ code_change(_, State, _) -> %% The state transitions documented here are extracted from RFC 3539, %% the commentary is ours. -%% Service or watchdog is telling the watchdog of an accepting -%% transport to die after connect_timer expiry or reestablished -%% connection (in another transport process) respectively. -transition(close, #watchdog{status = down}) -> +%% Service is telling the watchdog of an accepting transport to die +%% following transport death in state INITIAL, or after connect_timer +%% expiry; or another watchdog is saying the same after reestablishing +%% a connection previously had by this one. +transition(close, #watchdog{}) -> {{accept, _}, _, _} = getr(restart), %% assert stop; -transition(close, #watchdog{}) -> - ok; %% Service is asking for the peer to be taken down gracefully. transition({shutdown, Pid, _}, #watchdog{parent = Pid, @@ -425,11 +422,30 @@ transition({'DOWN', _, process, TPid, _Reason}, transition({'DOWN', _, process, TPid, _Reason}, #watchdog{transport = TPid, - status = T} - = S) -> - set_watchdog(S#watchdog{status = ?CHOOSE(initial == T, T, down), - pending = false, - transport = undefined}); + status = T, + restrict = {_,R}} + = S0) -> + S = S0#watchdog{pending = false, + transport = undefined}, + {{M,_}, _, _} = getr(restart), + + %% Close an accepting watchdog immediately if there's no + %% restriction on the number of connections to the same peer: the + %% state machine never enters state REOPEN in this case. The + %% 'close' message (instead of stop) is so as not to bypass the + %% sending of messages to the service process in handle_info/2. + + if T /= initial, M == accept, not R -> + send(self(), close), + S#watchdog{status = down}; + T /= initial -> + set_watchdog(S#watchdog{status = down}); + M == connect -> + set_watchdog(S); + M == accept -> + send(self(), close), + S + end; %% Incoming message. transition({recv, TPid, Name, Pkt}, #watchdog{transport = TPid} = S) -> @@ -755,7 +771,7 @@ timeout(#watchdog{status = T} = S) restart(#watchdog{transport = undefined} = S) -> restart(getr(restart), S); -restart(S) -> +restart(S) -> %% reconnect has won race with timeout S. %% restart/2 @@ -785,9 +801,10 @@ restart({{connect, _} = T, Opts, Svc}, %% die. Note that a state machine never enters state REOPEN in this %% case. restart({{accept, _}, _, _}, #watchdog{restrict = {_, false}}) -> - stop; + stop; %% 'DOWN' was in old code: 'close' was not sent -%% Otherwise hang around until told to die. +%% Otherwise hang around until told to die, either by the service or +%% by another watchdog. restart({{accept, _}, _, _}, S) -> S. |