From c609108ce017069a77708f80dae9e89c45ff222d Mon Sep 17 00:00:00 2001
From: Anders Svensson
Date: Tue, 9 Apr 2013 01:38:34 +0200
Subject: Fix watchdog table leak
A service process maintains a table keyed on watchdog process pids. When
a watchdog process dies the corresponding entry should be removed but
this was broken in commit f115a9f7, causing entries with watchdog state
DOWN to accumulate.
Watchdog processes die as a result of diameter:remove_transport/2, or
when a peer reestablishes a connection in the listening case. Neither is
typically a frequent occurrence.
The fault manifests itself in the return value of
diameter:service_info(SvcName, transport), which displays entries for
watchdog processes that are no longer alive.
---
lib/diameter/src/base/diameter_service.erl | 24 ++++++++++++------------
1 file changed, 12 insertions(+), 12 deletions(-)
(limited to 'lib/diameter')
diff --git a/lib/diameter/src/base/diameter_service.erl b/lib/diameter/src/base/diameter_service.erl
index e4d1c60727..112e83476d 100644
--- a/lib/diameter/src/base/diameter_service.erl
+++ b/lib/diameter/src/base/diameter_service.erl
@@ -861,17 +861,21 @@ watchdog(TPid, [], ?WD_SUSPECT, ?WD_OKAY, Wd, State) ->
%% Watchdog has an unresponsive connection.
watchdog(TPid, [], ?WD_OKAY, ?WD_SUSPECT = To, Wd, State) ->
#watchdog{peer = TPid} = Wd, %% assert
- connection_down(Wd, To, State);
+ watchdog_down(Wd, To, State);
%% Watchdog has lost its connection.
watchdog(TPid, [], _, ?WD_DOWN = To, Wd, #state{peerT = PeerT} = S) ->
close(Wd, S),
- connection_down(Wd, To, S),
+ watchdog_down(Wd, To, S),
ets:delete(PeerT, TPid);
watchdog(_, [], _, _, _, _) ->
ok.
+watchdog_down(Wd, To, #state{watchdogT = WatchdogT} = S) ->
+ insert(WatchdogT, Wd#watchdog{state = To}),
+ connection_down(Wd, To, S).
+
%% ---------------------------------------------------------------------------
%% # connection_up/3
%% ---------------------------------------------------------------------------
@@ -1029,21 +1033,17 @@ connection_down(#watchdog{state = ?WD_OKAY,
remove_local_peer(SApps, {{TPid, Caps}, {SvcName, Apps}}, LDict),
diameter_traffic:peer_down(TPid);
-connection_down(#watchdog{}, #peer{}, _) ->
- ok;
-
-connection_down(#watchdog{state = WS,
+connection_down(#watchdog{state = ?WD_OKAY,
peer = TPid}
= Wd,
To,
- #state{watchdogT = WatchdogT,
- peerT = PeerT}
+ #state{peerT = PeerT}
= S)
when is_atom(To) ->
- insert(WatchdogT, Wd#watchdog{state = To}),
- ?WD_OKAY == WS
- andalso
- connection_down(Wd, fetch(PeerT, TPid), S).
+ connection_down(Wd, #peer{} = fetch(PeerT, TPid), S);
+
+connection_down(#watchdog{}, _, _) ->
+ ok.
remove_local_peer(SApps, T, LDict) ->
lists:foldl(fun(A,D) -> rlp(A, T, D) end, LDict, SApps).
--
cgit v1.2.3
From d2bd3f357446d6d580db0fafd16377855cc81492 Mon Sep 17 00:00:00 2001
From: Anders Svensson
Date: Mon, 8 Apr 2013 17:06:58 +0200
Subject: Add testcase to exercise reconnect behaviour
---
lib/diameter/test/diameter_transport_SUITE.erl | 90 +++++++++++++++++++++++++-
1 file changed, 88 insertions(+), 2 deletions(-)
(limited to 'lib/diameter')
diff --git a/lib/diameter/test/diameter_transport_SUITE.erl b/lib/diameter/test/diameter_transport_SUITE.erl
index 893b7ba2f9..de097b886e 100644
--- a/lib/diameter/test/diameter_transport_SUITE.erl
+++ b/lib/diameter/test/diameter_transport_SUITE.erl
@@ -1,7 +1,7 @@
%%
%% %CopyrightBegin%
%%
-%% Copyright Ericsson AB 2010-2011. All Rights Reserved.
+%% Copyright Ericsson AB 2010-2013. All Rights Reserved.
%%
%% The contents of this file are subject to the Erlang Public License,
%% Version 1.1, (the "License"); you may not use this file except in
@@ -36,6 +36,7 @@
tcp_connect/1,
sctp_accept/1,
sctp_connect/1,
+ reconnect/1, reconnect/0,
stop/1]).
-export([accept/1,
@@ -102,7 +103,8 @@ tc() ->
[tcp_accept,
tcp_connect,
sctp_accept,
- sctp_connect].
+ sctp_connect,
+ reconnect].
init_per_suite(Config) ->
[{sctp, have_sctp()} | Config].
@@ -164,6 +166,90 @@ connect(Prot) ->
T = {Prot, make_ref()},
[] = ?util:run([{?MODULE, [init, X, T]} || X <- [gen_accept, connect]]).
+%% ===========================================================================
+%% reconnect/1
+%%
+%% Exercise reconnection behaviour: that a connecting transport
+%% doesn't try to establish a new connection until the old one is
+%% broken.
+
+reconnect() ->
+ [{timetrap, {minutes, 4}}].
+
+reconnect({listen, Ref}) ->
+ SvcName = make_ref(),
+ ok = start_service(SvcName),
+ LRef = ?util:listen(SvcName, tcp, [{watchdog_timer, 6000}]),
+ [_] = diameter_reg:wait({diameter_tcp, listener, {LRef, '_'}}),
+ true = diameter_reg:add_new({?MODULE, Ref, LRef}),
+
+ %% Wait for partner to request transport death: kill to force the
+ %% peer to reconnect.
+ TPid = abort(SvcName, LRef, Ref),
+
+ exit(TPid, kill),
+
+ abort(SvcName, LRef, Ref);
+
+reconnect({connect, Ref}) ->
+ SvcName = make_ref(),
+ true = diameter:subscribe(SvcName),
+ ok = start_service(SvcName),
+ [{{_, _, LRef}, Pid}] = diameter_reg:wait({?MODULE, Ref, '_'}),
+ CRef = ?util:connect(SvcName, tcp, LRef, [{reconnect_timer, 2000},
+ {watchdog_timer, 6000}]),
+
+ %% Tell partner to kill transport after seeing that there are no
+ %% reconnection attempts.
+ abort(SvcName, Pid, Ref),
+
+ %% Transport does down and is reestablished.
+ ?RECV(#diameter_event{service = SvcName, info = {down, CRef, _, _}}),
+ ?RECV(#diameter_event{service = SvcName, info = {reconnect, CRef, _}}),
+ ?RECV(#diameter_event{service = SvcName, info = {up, CRef, _, _, _}}),
+
+ %% Kill again.
+ abort(SvcName, Pid, Ref),
+
+ %% Wait for partner to die.
+ MRef = erlang:monitor(process, Pid),
+ ?RECV({'DOWN', MRef, process, _, _});
+
+reconnect(_) ->
+ Ref = make_ref(),
+ [] = ?util:run([{?MODULE, [reconnect, {T, Ref}]}
+ || T <- [listen, connect]]).
+
+start_service(SvcName) ->
+ OH = io_lib:format("~p-~p-~p", tuple_to_list(now())),
+ Opts = [{application, [{dictionary, diameter_gen_base_rfc6733},
+ {module, diameter_callback}]},
+ {'Origin-Host', OH},
+ {'Origin-Realm', OH ++ ".org"},
+ {'Vendor-Id', 0},
+ {'Product-Name', "x"},
+ {'Auth-Application-Id', [0]}],
+ diameter:start_service(SvcName, Opts).
+
+abort(SvcName, Pid, Ref)
+ when is_pid(Pid) ->
+ receive
+ #diameter_event{service = SvcName, info = {reconnect, _, _}} = E ->
+ erlang:error(E)
+ after 45000 ->
+ ok
+ end,
+ Pid ! {abort, Ref};
+
+abort(SvcName, LRef, Ref)
+ when is_reference(LRef) ->
+ ?RECV({abort, Ref}),
+ [[{ref, LRef}, {type, listen}, {options, _}, {accept, [_,_] = Ts} | _]]
+ %% assert on two accepting
+ = diameter:service_info(SvcName, transport),
+ [TPid] = [P || [{watchdog, {_,_,okay}}, {peer, {P,_}} | _] <- Ts],
+ TPid.
+
%% ===========================================================================
%% ===========================================================================
--
cgit v1.2.3
From b59386f5684250b823c40b7482df73afaf632bd9 Mon Sep 17 00:00:00 2001
From: Anders Svensson
Date: Tue, 9 Apr 2013 01:56:13 +0200
Subject: Minor doc fix
---
lib/diameter/doc/src/diameter.xml | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
(limited to 'lib/diameter')
diff --git a/lib/diameter/doc/src/diameter.xml b/lib/diameter/doc/src/diameter.xml
index 7ea93d480b..48914abaa2 100644
--- a/lib/diameter/doc/src/diameter.xml
+++ b/lib/diameter/doc/src/diameter.xml
@@ -780,10 +780,10 @@ connections to the same peer.
If type [node()] then a connection is rejected if another already
exists on any of the specified nodes.
-Values of type false, node, nodes or
+Types false, node, nodes and
&evaluable; are equivalent to
-values [], [node()], [node()|nodes()] and the
-evaluated value, respectively, evaluation of each expression taking
+[], [node()], [node()|nodes()] and the
+evaluated value respectively, evaluation of each expression taking
place whenever a new connection is to be established.
Note that false allows an unlimited number of connections to be
established with the same peer.
--
cgit v1.2.3