From c609108ce017069a77708f80dae9e89c45ff222d Mon Sep 17 00:00:00 2001 From: Anders Svensson Date: Tue, 9 Apr 2013 01:38:34 +0200 Subject: Fix watchdog table leak A service process maintains a table keyed on watchdog process pids. When a watchdog process dies the corresponding entry should be removed but this was broken in commit f115a9f7, causing entries with watchdog state DOWN to accumulate. Watchdog processes die as a result of diameter:remove_transport/2, or when a peer reestablishes a connection in the listening case. Neither is typically a frequent occurrence. The fault manifests itself in the return value of diameter:service_info(SvcName, transport), which displays entries for watchdog processes that are no longer alive. --- lib/diameter/src/base/diameter_service.erl | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'lib/diameter') diff --git a/lib/diameter/src/base/diameter_service.erl b/lib/diameter/src/base/diameter_service.erl index e4d1c60727..112e83476d 100644 --- a/lib/diameter/src/base/diameter_service.erl +++ b/lib/diameter/src/base/diameter_service.erl @@ -861,17 +861,21 @@ watchdog(TPid, [], ?WD_SUSPECT, ?WD_OKAY, Wd, State) -> %% Watchdog has an unresponsive connection. watchdog(TPid, [], ?WD_OKAY, ?WD_SUSPECT = To, Wd, State) -> #watchdog{peer = TPid} = Wd, %% assert - connection_down(Wd, To, State); + watchdog_down(Wd, To, State); %% Watchdog has lost its connection. watchdog(TPid, [], _, ?WD_DOWN = To, Wd, #state{peerT = PeerT} = S) -> close(Wd, S), - connection_down(Wd, To, S), + watchdog_down(Wd, To, S), ets:delete(PeerT, TPid); watchdog(_, [], _, _, _, _) -> ok. +watchdog_down(Wd, To, #state{watchdogT = WatchdogT} = S) -> + insert(WatchdogT, Wd#watchdog{state = To}), + connection_down(Wd, To, S). + %% --------------------------------------------------------------------------- %% # connection_up/3 %% --------------------------------------------------------------------------- @@ -1029,21 +1033,17 @@ connection_down(#watchdog{state = ?WD_OKAY, remove_local_peer(SApps, {{TPid, Caps}, {SvcName, Apps}}, LDict), diameter_traffic:peer_down(TPid); -connection_down(#watchdog{}, #peer{}, _) -> - ok; - -connection_down(#watchdog{state = WS, +connection_down(#watchdog{state = ?WD_OKAY, peer = TPid} = Wd, To, - #state{watchdogT = WatchdogT, - peerT = PeerT} + #state{peerT = PeerT} = S) when is_atom(To) -> - insert(WatchdogT, Wd#watchdog{state = To}), - ?WD_OKAY == WS - andalso - connection_down(Wd, fetch(PeerT, TPid), S). + connection_down(Wd, #peer{} = fetch(PeerT, TPid), S); + +connection_down(#watchdog{}, _, _) -> + ok. remove_local_peer(SApps, T, LDict) -> lists:foldl(fun(A,D) -> rlp(A, T, D) end, LDict, SApps). -- cgit v1.2.3 From d2bd3f357446d6d580db0fafd16377855cc81492 Mon Sep 17 00:00:00 2001 From: Anders Svensson Date: Mon, 8 Apr 2013 17:06:58 +0200 Subject: Add testcase to exercise reconnect behaviour --- lib/diameter/test/diameter_transport_SUITE.erl | 90 +++++++++++++++++++++++++- 1 file changed, 88 insertions(+), 2 deletions(-) (limited to 'lib/diameter') diff --git a/lib/diameter/test/diameter_transport_SUITE.erl b/lib/diameter/test/diameter_transport_SUITE.erl index 893b7ba2f9..de097b886e 100644 --- a/lib/diameter/test/diameter_transport_SUITE.erl +++ b/lib/diameter/test/diameter_transport_SUITE.erl @@ -1,7 +1,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 2010-2011. All Rights Reserved. +%% Copyright Ericsson AB 2010-2013. All Rights Reserved. %% %% The contents of this file are subject to the Erlang Public License, %% Version 1.1, (the "License"); you may not use this file except in @@ -36,6 +36,7 @@ tcp_connect/1, sctp_accept/1, sctp_connect/1, + reconnect/1, reconnect/0, stop/1]). -export([accept/1, @@ -102,7 +103,8 @@ tc() -> [tcp_accept, tcp_connect, sctp_accept, - sctp_connect]. + sctp_connect, + reconnect]. init_per_suite(Config) -> [{sctp, have_sctp()} | Config]. @@ -164,6 +166,90 @@ connect(Prot) -> T = {Prot, make_ref()}, [] = ?util:run([{?MODULE, [init, X, T]} || X <- [gen_accept, connect]]). +%% =========================================================================== +%% reconnect/1 +%% +%% Exercise reconnection behaviour: that a connecting transport +%% doesn't try to establish a new connection until the old one is +%% broken. + +reconnect() -> + [{timetrap, {minutes, 4}}]. + +reconnect({listen, Ref}) -> + SvcName = make_ref(), + ok = start_service(SvcName), + LRef = ?util:listen(SvcName, tcp, [{watchdog_timer, 6000}]), + [_] = diameter_reg:wait({diameter_tcp, listener, {LRef, '_'}}), + true = diameter_reg:add_new({?MODULE, Ref, LRef}), + + %% Wait for partner to request transport death: kill to force the + %% peer to reconnect. + TPid = abort(SvcName, LRef, Ref), + + exit(TPid, kill), + + abort(SvcName, LRef, Ref); + +reconnect({connect, Ref}) -> + SvcName = make_ref(), + true = diameter:subscribe(SvcName), + ok = start_service(SvcName), + [{{_, _, LRef}, Pid}] = diameter_reg:wait({?MODULE, Ref, '_'}), + CRef = ?util:connect(SvcName, tcp, LRef, [{reconnect_timer, 2000}, + {watchdog_timer, 6000}]), + + %% Tell partner to kill transport after seeing that there are no + %% reconnection attempts. + abort(SvcName, Pid, Ref), + + %% Transport does down and is reestablished. + ?RECV(#diameter_event{service = SvcName, info = {down, CRef, _, _}}), + ?RECV(#diameter_event{service = SvcName, info = {reconnect, CRef, _}}), + ?RECV(#diameter_event{service = SvcName, info = {up, CRef, _, _, _}}), + + %% Kill again. + abort(SvcName, Pid, Ref), + + %% Wait for partner to die. + MRef = erlang:monitor(process, Pid), + ?RECV({'DOWN', MRef, process, _, _}); + +reconnect(_) -> + Ref = make_ref(), + [] = ?util:run([{?MODULE, [reconnect, {T, Ref}]} + || T <- [listen, connect]]). + +start_service(SvcName) -> + OH = io_lib:format("~p-~p-~p", tuple_to_list(now())), + Opts = [{application, [{dictionary, diameter_gen_base_rfc6733}, + {module, diameter_callback}]}, + {'Origin-Host', OH}, + {'Origin-Realm', OH ++ ".org"}, + {'Vendor-Id', 0}, + {'Product-Name', "x"}, + {'Auth-Application-Id', [0]}], + diameter:start_service(SvcName, Opts). + +abort(SvcName, Pid, Ref) + when is_pid(Pid) -> + receive + #diameter_event{service = SvcName, info = {reconnect, _, _}} = E -> + erlang:error(E) + after 45000 -> + ok + end, + Pid ! {abort, Ref}; + +abort(SvcName, LRef, Ref) + when is_reference(LRef) -> + ?RECV({abort, Ref}), + [[{ref, LRef}, {type, listen}, {options, _}, {accept, [_,_] = Ts} | _]] + %% assert on two accepting + = diameter:service_info(SvcName, transport), + [TPid] = [P || [{watchdog, {_,_,okay}}, {peer, {P,_}} | _] <- Ts], + TPid. + %% =========================================================================== %% =========================================================================== -- cgit v1.2.3 From b59386f5684250b823c40b7482df73afaf632bd9 Mon Sep 17 00:00:00 2001 From: Anders Svensson Date: Tue, 9 Apr 2013 01:56:13 +0200 Subject: Minor doc fix --- lib/diameter/doc/src/diameter.xml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'lib/diameter') diff --git a/lib/diameter/doc/src/diameter.xml b/lib/diameter/doc/src/diameter.xml index 7ea93d480b..48914abaa2 100644 --- a/lib/diameter/doc/src/diameter.xml +++ b/lib/diameter/doc/src/diameter.xml @@ -780,10 +780,10 @@ connections to the same peer.

If type [node()] then a connection is rejected if another already exists on any of the specified nodes. -Values of type false, node, nodes or +Types false, node, nodes and &evaluable; are equivalent to -values [], [node()], [node()|nodes()] and the -evaluated value, respectively, evaluation of each expression taking +[], [node()], [node()|nodes()] and the +evaluated value respectively, evaluation of each expression taking place whenever a new connection is to be established. Note that false allows an unlimited number of connections to be established with the same peer.

-- cgit v1.2.3