From ae92510847831609b12673be1d70106b395a2f36 Mon Sep 17 00:00:00 2001 From: Anders Svensson Date: Thu, 25 Sep 2014 00:45:06 +0200 Subject: Fix remote diameter_request table leak An outgoing request whose pick_peer callback selected a transport on another node resulted in an orphaned diameter_request entry on that node. --- lib/diameter/src/base/diameter_traffic.erl | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/lib/diameter/src/base/diameter_traffic.erl b/lib/diameter/src/base/diameter_traffic.erl index 280d09d7e8..e4812f3dc9 100644 --- a/lib/diameter/src/base/diameter_traffic.erl +++ b/lib/diameter/src/base/diameter_traffic.erl @@ -1632,12 +1632,23 @@ send_request(TPid, #diameter_packet{} = Pkt, Req, SvcName, Timeout) -> %% send/1 -send({TPid, Pkt, #request{handler = Pid} = Req, SvcName, Timeout, TRef}) -> - Ref = send_request(TPid, - Pkt, - Req#request{handler = self()}, - SvcName, - Timeout), +send({TPid, Pkt, #request{handler = Pid} = Req0, SvcName, Timeout, TRef}) -> + Seqs = diameter_codec:sequence_numbers(Pkt), + Req = Req0#request{handler = self()}, + Ref = send_request(TPid, Pkt, Req, SvcName, Timeout), + + try + recv(TPid, Pid, TRef, Ref) + after + %% Remove only the entry for this specific send since a resend + %% from the originating node can pick another transport on + %% this one. + ets:delete_object(?REQUEST_TABLE, {Seqs, Req, Ref}) + end. + +%% recv/4 + +recv(TPid, Pid, TRef, Ref) -> receive {answer, _, _, _, _} = A -> Pid ! A; -- cgit v1.2.3 From e12f7043f05431c4278f1ed506e0f76b1c95152d Mon Sep 17 00:00:00 2001 From: Anders Svensson Date: Fri, 10 Oct 2014 17:18:04 +0200 Subject: Fix handling of 3xxx Result-Code without E-bit Commit 00584303 broke the population of the errors field of the diameter_packet record when an incoming request with an E-bit/Result-Code mismatch was decoded. Instead of the intended {5004, #diameter_avp{value = integer()}}, the value was a 4-tuple containing the integer Result-Code. --- lib/diameter/src/base/diameter_traffic.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/diameter/src/base/diameter_traffic.erl b/lib/diameter/src/base/diameter_traffic.erl index 280d09d7e8..8becf7bd5f 100644 --- a/lib/diameter/src/base/diameter_traffic.erl +++ b/lib/diameter/src/base/diameter_traffic.erl @@ -1484,7 +1484,7 @@ handle_A(Pkt, SvcName, Dict, Dict0, App, #request{transport = TPid} = Req) -> %% a missing AVP. If both are optional in the dictionary %% then this isn't a decode error: just continue on. answer(Pkt, SvcName, App, Req); - exit: {invalid_error_bit, RC} -> + exit: {invalid_error_bit, {_, _, _, RC}} -> #diameter_packet{errors = Es} = Pkt, E = {5004, #diameter_avp{name = 'Result-Code', value = RC}}, -- cgit v1.2.3 From c3b9ebc6cf9619c220b75601b8f02737052901b3 Mon Sep 17 00:00:00 2001 From: Anders Svensson Date: Mon, 3 Nov 2014 02:07:58 +0100 Subject: Rename reconnect_timer to connect_timer in examples and suites The timer was renamed in commit abea7186. --- lib/diameter/examples/code/peer.erl | 2 +- lib/diameter/test/diameter_config_SUITE.erl | 2 +- lib/diameter/test/diameter_event_SUITE.erl | 6 +++--- lib/diameter/test/diameter_transport_SUITE.erl | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/lib/diameter/examples/code/peer.erl b/lib/diameter/examples/code/peer.erl index b4ee17e4b7..7519abfb2c 100644 --- a/lib/diameter/examples/code/peer.erl +++ b/lib/diameter/examples/code/peer.erl @@ -74,7 +74,7 @@ start(Name, Opts) | {error, term()}. connect(Name, T) -> - diameter:add_transport(Name, {connect, [{reconnect_timer, 5000} + diameter:add_transport(Name, {connect, [{connect_timer, 5000} | client(T)]}). %% listen/2 diff --git a/lib/diameter/test/diameter_config_SUITE.erl b/lib/diameter/test/diameter_config_SUITE.erl index d10ee83ba4..ad5b3f9420 100644 --- a/lib/diameter/test/diameter_config_SUITE.erl +++ b/lib/diameter/test/diameter_config_SUITE.erl @@ -157,7 +157,7 @@ {length_errors, [[exit], [handle], [discard]], [[x]]}, - {reconnect_timer, + {connect_timer, [[3000]], [[infinity]]}, {watchdog_timer, diff --git a/lib/diameter/test/diameter_event_SUITE.erl b/lib/diameter/test/diameter_event_SUITE.erl index 94b4967921..64199895be 100644 --- a/lib/diameter/test/diameter_event_SUITE.erl +++ b/lib/diameter/test/diameter_event_SUITE.erl @@ -114,12 +114,12 @@ up(Config) -> %% Connect with non-matching capabilities and expect CEA from the peer %% to indicate as much and then for the transport to be restarted -%% (after reconnect_timer). +%% (after connect_timer). down(Config) -> {Svc, Ref} = connect(Config, [{capabilities, [{'Acct-Application-Id', [?DICT_ACCT:id()]}]}, {applications, [?DICT_ACCT]}, - {reconnect_timer, 5000}]), + {connect_timer, 5000}]), start = event(Svc), {closed, Ref, {'CEA', ?NO_COMMON_APP, _, #diameter_packet{}}, _} = event(Svc), @@ -129,7 +129,7 @@ down(Config) -> %% CEA and cause the client to timeout. cea_timeout(Config) -> {Svc, Ref} = connect(Config, [{capx_timeout, ?SERVER_CAPX_TMO div 2}, - {reconnect_timer, 2*?SERVER_CAPX_TMO}]), + {connect_timer, 2*?SERVER_CAPX_TMO}]), start = event(Svc), {closed, Ref, {'CEA', timeout}, _} = event(Svc). diff --git a/lib/diameter/test/diameter_transport_SUITE.erl b/lib/diameter/test/diameter_transport_SUITE.erl index 9408fae62c..fcffa69c24 100644 --- a/lib/diameter/test/diameter_transport_SUITE.erl +++ b/lib/diameter/test/diameter_transport_SUITE.erl @@ -194,7 +194,7 @@ reconnect({connect, Ref}) -> true = diameter:subscribe(SvcName), ok = start_service(SvcName), [{{_, _, LRef}, Pid}] = diameter_reg:wait({?MODULE, Ref, '_'}), - CRef = ?util:connect(SvcName, tcp, LRef, [{reconnect_timer, 2000}, + CRef = ?util:connect(SvcName, tcp, LRef, [{connect_timer, 2000}, {watchdog_timer, 6000}]), %% Tell partner to kill transport after seeing that there are no -- cgit v1.2.3 From 3b51661f1f9f70cf820026cb345523c05aad2dfa Mon Sep 17 00:00:00 2001 From: Anders Svensson Date: Mon, 3 Nov 2014 09:36:09 +0100 Subject: Check {connect,watchdog}_timer distinction in event testcases The connect timer is currently ignored by a connecting transport, so the check causes one testcase to fail. --- lib/diameter/test/diameter_event_SUITE.erl | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/lib/diameter/test/diameter_event_SUITE.erl b/lib/diameter/test/diameter_event_SUITE.erl index 64199895be..f43f111d20 100644 --- a/lib/diameter/test/diameter_event_SUITE.erl +++ b/lib/diameter/test/diameter_event_SUITE.erl @@ -107,10 +107,18 @@ start_server(Config) -> %% Connect with matching capabilities and expect the connection to %% come up. up(Config) -> - {Svc, Ref} = connect(Config, []), + {Svc, Ref} = connect(Config, [{connect_timer, 5000}, + {watchdog_timer, 15000}]), start = event(Svc), - {up, Ref, {_,_Caps}, _Config, #diameter_packet{}} = event(Svc), - {watchdog, Ref, _, {initial, okay}, _} = event(Svc). + {up, Ref, {TPid, Caps}, Cfg, #diameter_packet{}} = event(Svc), + {watchdog, Ref, _, {initial, okay}, _} = event(Svc), + %% Kill the transport process and see that the connection is + %% reestablished after a watchdog timeout, not after connect_timer + %% expiry. + exit(TPid, kill), + {down, Ref, {TPid, Caps}, Cfg} = event(Svc), + {watchdog, Ref, _, {okay, down}, _} = event(Svc), + {reconnect, Ref, _} = event(Svc, 10000, 20000). %% Connect with non-matching capabilities and expect CEA from the peer %% to indicate as much and then for the transport to be restarted @@ -119,11 +127,12 @@ down(Config) -> {Svc, Ref} = connect(Config, [{capabilities, [{'Acct-Application-Id', [?DICT_ACCT:id()]}]}, {applications, [?DICT_ACCT]}, - {connect_timer, 5000}]), + {connect_timer, 5000}, + {watchdog_timer, 20000}]), start = event(Svc), {closed, Ref, {'CEA', ?NO_COMMON_APP, _, #diameter_packet{}}, _} = event(Svc), - {reconnect, Ref, _} = event(Svc). + {reconnect, Ref, _} = event(Svc, 4000, 10000). %% Connect with matching capabilities but have the server delay its %% CEA and cause the client to timeout. @@ -165,6 +174,13 @@ uniq() -> event(Name) -> receive #diameter_event{service = Name, info = T} -> T end. +event(Name, TL, TH) -> + T0 = now(), + Event = event(Name), + DT = timer:now_diff(now(), T0) div 1000, + {true, true, DT, Event} = {TL < DT, DT < TH, DT, Event}, + Event. + start_service(Name, Opts) -> diameter:start_service(Name, [{monitor, self()} | Opts]). -- cgit v1.2.3 From 66d67762be1cf0a3b9ac068d597c6d8bdaf2e3d7 Mon Sep 17 00:00:00 2001 From: Anders Svensson Date: Mon, 3 Nov 2014 00:12:58 +0100 Subject: Fix ignored connect timer There are two timers governing the establishment of peer connections: connect_timer and watchdog_timer. The former is the RFC 6733 Tc timer and is used by diameter_service to establish an initial connection. The latter is RFC 3539 TwInit and is used by diameter_watchdog for connection reestablishment after the watchdog leaves state INITIAL. A connecting transport ignored the connect timer since the watchdog process never died, regardless of the watchdog state, causing the watchdog timer to handle reconnection. This seems to have been broken for some time. --- lib/diameter/src/base/diameter_watchdog.erl | 36 ++++++++++++++--------------- 1 file changed, 17 insertions(+), 19 deletions(-) diff --git a/lib/diameter/src/base/diameter_watchdog.erl b/lib/diameter/src/base/diameter_watchdog.erl index eff5096745..b7f2d24941 100644 --- a/lib/diameter/src/base/diameter_watchdog.erl +++ b/lib/diameter/src/base/diameter_watchdog.erl @@ -255,11 +255,15 @@ close({'DOWN', _, process, TPid, {shutdown, Reason}}, close(_, _) -> ok. -event(_, #watchdog{status = T}, #watchdog{status = T}) -> - ok; - -event(_, #watchdog{transport = undefined}, #watchdog{transport = undefined}) -> +event(_, + #watchdog{status = From, transport = F}, + #watchdog{status = To, transport = T}) + when F == undefined, T == undefined; %% transport not started + From == initial, To == down; %% never really left INITIAL + From == To -> %% no state transition ok; +%% Note that there is no INITIAL -> DOWN transition in RFC 3539: ours +%% is just a consequence of stop. event(Msg, #watchdog{status = From, transport = F, parent = Pid}, @@ -411,7 +415,7 @@ transition({'DOWN', _, process, TPid, _Reason}, stop; %% ... or not. -transition({'DOWN', _, process, TPid, _Reason}, +transition({'DOWN', _, process, TPid, _Reason} = D, #watchdog{transport = TPid, status = T, restrict = {_,R}} @@ -422,20 +426,14 @@ transition({'DOWN', _, process, TPid, _Reason}, %% Close an accepting watchdog immediately if there's no %% restriction on the number of connections to the same peer: the - %% state machine never enters state REOPEN in this case. The - %% 'close' message (instead of stop) is so as not to bypass the - %% sending of messages to the service process in handle_info/2. - - if T /= initial, M == accept, not R -> - send(self(), close), - S#watchdog{status = down}; - T /= initial -> - set_watchdog(S#watchdog{status = down}); - M == connect -> - set_watchdog(S); - M == accept -> - send(self(), close), - S + %% state machine never enters state REOPEN in this case. + + if T == initial; + M == accept, not R -> + close(D, S0), + stop; + true -> + set_watchdog(S#watchdog{status = down}) end; %% Incoming message. -- cgit v1.2.3 From 2b89e8bd5a8258c4259ed53cc0331d4fbe1f1aa3 Mon Sep 17 00:00:00 2001 From: Anders Svensson Date: Mon, 3 Nov 2014 01:47:27 +0100 Subject: Tweak reason in closed event From {error, Reason} to {no_connection, Reason} when a connection can't be established. The exit reason of a diameter_peer_fsm process is turned into a message from the corresponding diameter_watchdog process to the relevant diameter_service process, the latter sending a 'closed' event including the reason to any subscribers. Reason = [] when none of the configured transport modules succeeds in establishing a connection, which admittedly isn't terribly descriptive. (The lists is of error reasons from transport start functions, which is empty as long as transport processes start successfully.) Note that this form of the closed event is undocumented, aside from the documentation saying that one should expect undocumented events. The explicitly documented forms are currently specific to CER/CEA failures. --- lib/diameter/src/base/diameter_peer_fsm.erl | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/lib/diameter/src/base/diameter_peer_fsm.erl b/lib/diameter/src/base/diameter_peer_fsm.erl index 86fc43cdc5..ee6e7dd89e 100644 --- a/lib/diameter/src/base/diameter_peer_fsm.erl +++ b/lib/diameter/src/base/diameter_peer_fsm.erl @@ -225,8 +225,8 @@ start_transport(Addrs0, T) -> erlang:monitor(process, TPid), q_next(TPid, Addrs0, Tmo, Data), {TPid, Addrs}; - No -> - exit({shutdown, No}) + {error, No} -> + exit({shutdown, {no_connection, No}}) end. svc(#diameter_service{capabilities = LCaps0} = Svc, Addrs) -> @@ -368,11 +368,8 @@ transition({diameter, {TPid, connected}}, %% message. This may be followed by an incoming message which arrived %% before the transport was killed and this can't be distinguished %% from one from the transport that's been started to replace it. -transition({diameter, {_, connected}}, _) -> - {stop, connection_timeout}; -transition({diameter, {_, connected, _}}, _) -> - {stop, connection_timeout}; -transition({diameter, {_, connected, _, _}}, _) -> +transition({diameter, T}, _) + when tuple_size(T) < 5, connected == element(2,T) -> {stop, connection_timeout}; %% Connection has timed out: start an alternate. -- cgit v1.2.3 From c29dd0129b35390334bb6d2bbed5500b7089a532 Mon Sep 17 00:00:00 2001 From: Anders Svensson Date: Thu, 13 Nov 2014 21:56:03 +0100 Subject: Order peers in pick_peer callbacks The order of peers presented to a diameter_app(3) pick_peer callback has previously not been documented, but there are use cases that are simplified by an ordering. For example, consider preferring a direct connection to a specified Destination-Host/Realm to any host in the realm. The implementation previously treated this as a special case by placing matching hosts at the head of the peers list, but the documentation made no guarantees. Now present peers in match-order, so that the desired sorting is the result of the following filter. {any, [{all, [host, realm]}, realm]} The implementation is not backwards compatible in the sense that a realm filter alone is no longer equivalent in this case. However, as stated, the documentation never made any guarantees regarding the sorting. --- lib/diameter/doc/src/diameter.xml | 12 ++++++ lib/diameter/src/base/diameter_service.erl | 65 +++++++++++++++++------------- 2 files changed, 48 insertions(+), 29 deletions(-) diff --git a/lib/diameter/doc/src/diameter.xml b/lib/diameter/doc/src/diameter.xml index ab9ad25a3a..00b54ffbc4 100644 --- a/lib/diameter/doc/src/diameter.xml +++ b/lib/diameter/doc/src/diameter.xml @@ -500,6 +500,18 @@ Matches only those peers matched by each filter in the specified list.

Matches only those peers matched by at least one filter in the specified list.

+ +

+The resulting peer list will be in match order, peers matching the +first filter of the list sorting before those matched by the second, +and so on. +For example, the following filter causes peers matching both the host +and realm filters to be presented before those matching only the realm +filter.

+ +
+{any, [{all, [host, realm]}, realm]}
+
diff --git a/lib/diameter/src/base/diameter_service.erl b/lib/diameter/src/base/diameter_service.erl index ab56ca9cef..76b05a2ad4 100644 --- a/lib/diameter/src/base/diameter_service.erl +++ b/lib/diameter/src/base/diameter_service.erl @@ -1460,42 +1460,52 @@ pick_peer(Local, peers(Alias, RH, Filter, Peers) -> case ?Dict:find(Alias, Peers) of {ok, L} -> - ps(L, RH, Filter, {[],[]}); + filter(L, RH, Filter); error -> [] end. -%% Place a peer whose Destination-Host/Realm matches those of the -%% request at the front of the result list. Could add some sort of -%% 'sort' option to allow more control. - -ps([], _, _, {Ys, Ns}) -> - lists:reverse(Ys, Ns); -ps([{_TPid, #diameter_caps{} = Caps} = TC | Rest], RH, Filter, Acc) -> - ps(Rest, RH, Filter, pacc(caps_filter(Caps, RH, Filter), - caps_filter(Caps, RH, {all, [host, realm]}), - TC, - Acc)). - -pacc(true, true, Peer, {Ts, Fs}) -> - {[Peer|Ts], Fs}; -pacc(true, false, Peer, {Ts, Fs}) -> - {Ts, [Peer|Fs]}; -pacc(_, _, _, Acc) -> - Acc. +%% filter/3 +%% +%% Return peers in match order. -%% caps_filter/3 +filter(Peers, RH, Filter) -> + {Ts, _} = fltr(Peers, RH, Filter), + Ts. + +%% fltr/4 -caps_filter(C, RH, {neg, F}) -> - not caps_filter(C, RH, F); +fltr(Peers, _, none) -> + {Peers, []}; -caps_filter(C, RH, {all, L}) +fltr(Peers, RH, {neg, F}) -> + {Ts, Fs} = fltr(Peers, RH, F), + {Fs, Ts}; + +fltr(Peers, RH, {all, L}) when is_list(L) -> - lists:all(fun(F) -> caps_filter(C, RH, F) end, L); + lists:foldl(fun(F,A) -> fltr_all(F, A, RH) end, + {Peers, []}, + L); -caps_filter(C, RH, {any, L}) +fltr(Peers, RH, {any, L}) when is_list(L) -> - lists:any(fun(F) -> caps_filter(C, RH, F) end, L); + lists:foldl(fun(F,A) -> fltr_any(F, A, RH) end, + {[], Peers}, + L); + +fltr(Peers, RH, F) -> + lists:partition(fun({_,C}) -> caps_filter(C, RH, F) end, Peers). + +fltr_all(F, {Ts0, Fs0}, RH) -> + {Ts1, Fs1} = fltr(Ts0, RH, F), + {Ts1, Fs0 ++ Fs1}. + +fltr_any(F, {Ts0, Fs0}, RH) -> + {Ts1, Fs1} = fltr(Fs0, RH, F), + {Ts0 ++ Ts1, Fs1}. + +%% caps_filter/3 caps_filter(#diameter_caps{origin_host = {_,OH}}, [_,DH], host) -> eq(undefined, DH, OH); @@ -1508,9 +1518,6 @@ caps_filter(C, _, Filter) -> %% caps_filter/2 -caps_filter(_, none) -> - true; - caps_filter(#diameter_caps{origin_host = {_,OH}}, {host, H}) -> eq(any, H, OH); -- cgit v1.2.3