From f8fa795ac4885af5c9f396fbcf26143a67fbdf49 Mon Sep 17 00:00:00 2001 From: Anders Svensson Date: Tue, 1 Dec 2015 08:11:25 +0100 Subject: Fix request table leak at exit signal The storing of request records in the ets table diameter_request was wrapped in a try/after so that the latter would unconditionally remove written entries. The problem is that it didn't deal with the process exiting as a result of an exit signal, since this doesn't raise in an exception. Since the process in question applies callbacks to user code, we can potentially be linked to other process and exit as a result. Trapping exits changes the current behaviour of the process, so spawn a monitoring process that cleans up upon reception of 'DOWN'. --- lib/diameter/src/base/diameter_traffic.erl | 32 +++++++++++++++++++----------- 1 file changed, 20 insertions(+), 12 deletions(-) (limited to 'lib') diff --git a/lib/diameter/src/base/diameter_traffic.erl b/lib/diameter/src/base/diameter_traffic.erl index eb4bbae931..d1adb084ce 100644 --- a/lib/diameter/src/base/diameter_traffic.erl +++ b/lib/diameter/src/base/diameter_traffic.erl @@ -1490,16 +1490,20 @@ send_R(Pkt0, caps = Caps, packet = Pkt0}, - try - incr(send, Pkt, TPid, AppDict), - TRef = send_request(TPid, Pkt, Req, SvcName, Timeout), - Pid ! Ref, %% tell caller a send has been attempted - handle_answer(SvcName, - App, - recv_A(Timeout, SvcName, App, Opts, {TRef, Req})) - after - erase_requests(Pkt) - end. + %% Ensure that request table is cleaned even if we receive an exit + %% signal. An alternative would be to simply trap exits, but + %% callbacks are applied in this process, and these could possibly + %% be expecting the prevailing behaviour. + Self = self(), + Seqs = diameter_codec:sequence_numbers(Pkt), + spawn(fun() -> diameter_lib:wait([Self]), erase_requests(Seqs) end), + + incr(send, Pkt, TPid, AppDict), + TRef = send_request(TPid, Pkt, Req, SvcName, Timeout), + Pid ! Ref, %% tell caller a send has been attempted + handle_answer(SvcName, + App, + recv_A(Timeout, SvcName, App, Opts, {TRef, Req})). %% recv_A/5 @@ -1831,6 +1835,10 @@ store_request(TPid, Bin, Req, Timeout) -> TRef. %% lookup_request/2 +%% +%% Note the match on both the key and transport pid. The latter is +%% necessary since the same Hop-by-Hop and End-to-End identifiers are +%% reused in the case of retransmission. lookup_request(Msg, TPid) -> Seqs = diameter_codec:sequence_numbers(Msg), @@ -1846,8 +1854,8 @@ lookup_request(Msg, TPid) -> %% erase_requests/1 -erase_requests(Pkt) -> - ets:delete(?REQUEST_TABLE, diameter_codec:sequence_numbers(Pkt)). +erase_requests(Seqs) -> + ets:delete(?REQUEST_TABLE, Seqs). %% match_requests/1 -- cgit v1.2.3 From 6c9cbd96d01da3194715d3caf8aa23350dfaa53a Mon Sep 17 00:00:00 2001 From: Anders Svensson Date: Thu, 3 Dec 2015 13:09:11 +0100 Subject: Fix request table leak at retransmission In the case of retranmission, a prepare_retransmit callback could modify End-to-End and/or Hop-by-Hop identifiers so that the resulting diameter_request entry was not removed, since the removal was of entries with the identifiers of the original request. The chances someone doing this in practice are probably minimal. --- lib/diameter/src/base/diameter_traffic.erl | 65 +++++++++++++----------------- 1 file changed, 28 insertions(+), 37 deletions(-) (limited to 'lib') diff --git a/lib/diameter/src/base/diameter_traffic.erl b/lib/diameter/src/base/diameter_traffic.erl index d1adb084ce..61ea5e69ba 100644 --- a/lib/diameter/src/base/diameter_traffic.erl +++ b/lib/diameter/src/base/diameter_traffic.erl @@ -169,7 +169,7 @@ incr_error(Dir, Id, TPid, _) -> incr_error(Dir, Id, TPid) -> incr(TPid, {Id, Dir, error}). - + %% --------------------------------------------------------------------------- %% incr_rc/4 %% --------------------------------------------------------------------------- @@ -1490,14 +1490,6 @@ send_R(Pkt0, caps = Caps, packet = Pkt0}, - %% Ensure that request table is cleaned even if we receive an exit - %% signal. An alternative would be to simply trap exits, but - %% callbacks are applied in this process, and these could possibly - %% be expecting the prevailing behaviour. - Self = self(), - Seqs = diameter_codec:sequence_numbers(Pkt), - spawn(fun() -> diameter_lib:wait([Self]), erase_requests(Seqs) end), - incr(send, Pkt, TPid, AppDict), TRef = send_request(TPid, Pkt, Req, SvcName, Timeout), Pid ! Ref, %% tell caller a send has been attempted @@ -1706,9 +1698,18 @@ encode(_, _, #diameter_packet{} = Pkt) -> send_request(TPid, #diameter_packet{bin = Bin} = Pkt, Req, _SvcName, Timeout) when node() == node(TPid) -> - %% Store the outgoing request before sending to avoid a race with - %% reply reception. - TRef = store_request(TPid, Bin, Req, Timeout), + Seqs = diameter_codec:sequence_numbers(Bin), + TRef = erlang:start_timer(Timeout, self(), TPid), + Entry = {Seqs, Req, TRef}, + + %% Ensure that request table is cleaned even if we receive an exit + %% signal. An alternative would be to simply trap exits, but + %% callbacks are applied in this process, and these could possibly + %% be expecting the prevailing behaviour. + Self = self(), + spawn(fun() -> diameter_lib:wait([Self]), erase_request(Entry) end), + + store_request(Entry, TPid), send(TPid, Pkt), TRef; @@ -1723,31 +1724,21 @@ send_request(TPid, #diameter_packet{} = Pkt, Req, SvcName, Timeout) -> %% send/1 send({TPid, Pkt, #request{handler = Pid} = Req0, SvcName, Timeout, TRef}) -> - Seqs = diameter_codec:sequence_numbers(Pkt), Req = Req0#request{handler = self()}, - Ref = send_request(TPid, Pkt, Req, SvcName, Timeout), - - try - recv(TPid, Pid, TRef, Ref) - after - %% Remove only the entry for this specific send since a resend - %% from the originating node can pick another transport on - %% this one. - ets:delete_object(?REQUEST_TABLE, {Seqs, Req, Ref}) - end. + recv(TPid, Pid, TRef, send_request(TPid, Pkt, Req, SvcName, Timeout)). %% recv/4 %% %% Relay an answer from a remote node. -recv(TPid, Pid, TRef, Ref) -> +recv(TPid, Pid, TRef, LocalTRef) -> receive {answer, _, _, _, _} = A -> Pid ! A; - {failover = T, Ref} -> + {failover = T, LocalTRef} -> Pid ! {T, TRef}; T -> - exit({timeout, Ref, TPid} = T) + exit({timeout, LocalTRef, TPid} = T) end. %% send/2 @@ -1824,15 +1815,15 @@ resend_request(Pkt0, TRef = send_request(TPid, Pkt, Req, SvcName, Tmo), {TRef, Req}. -%% store_request/4 +%% store_request/2 -store_request(TPid, Bin, Req, Timeout) -> - Seqs = diameter_codec:sequence_numbers(Bin), - TRef = erlang:start_timer(Timeout, self(), TPid), - ets:insert(?REQUEST_TABLE, {Seqs, Req, TRef}), +store_request(T, TPid) -> + ets:insert(?REQUEST_TABLE, T), ets:member(?REQUEST_TABLE, TPid) - orelse (self() ! {failover, TRef}), %% failover/1 may have missed - TRef. + orelse begin + {_Seqs, _Req, TRef} = T, + (self() ! {failover, TRef}) %% failover/1 may have missed + end. %% lookup_request/2 %% @@ -1852,10 +1843,10 @@ lookup_request(Msg, TPid) -> false end. -%% erase_requests/1 +%% erase_request/1 -erase_requests(Seqs) -> - ets:delete(?REQUEST_TABLE, Seqs). +erase_request(T) -> + ets:delete_object(?REQUEST_TABLE, T). %% match_requests/1 @@ -1878,7 +1869,7 @@ failover(TPid) when is_pid(TPid) -> lists:foreach(fun failover/1, match_requests(TPid)); %% Note that a request process can store its request after failover -%% notifications are sent here: store_request/4 sends the notification +%% notifications are sent here: store_request/2 sends the notification %% in that case. %% Failover as a consequence of request_peer_down/1: inform the -- cgit v1.2.3