From 9a878743009952849984fd663ffb60261b9d2965 Mon Sep 17 00:00:00 2001
From: Anders Svensson <anders@erlang.org>
Date: Tue, 15 Mar 2016 12:10:58 +0100
Subject: Make peer failover more efficient

Failover caused the entire request table to be scanned in search of
entries with the transport process in question. With many entries
(possibly as a result of the leak fixed in commit 6c9cbd96), this can
lead to the service process hanging in ets:select_trap/1, with memory
growth when many request processes write concurrently. Now write entries
keyed on the transport pid, so that finding request processes at
failover is a lookup rather than a select scanning the entire table.

There is no upgrade handling in that new code doesn't consider that old
code didn't write entries on the transport pid. Thus, a request whose
table entries were written in old code will timeout rather than failover
in new code. That is, there is a small window for failover to be missed
(since request processes are short-lived), but it requires that it take
place during the upgrade.

As a minor aside, don't ignore failovers when sending binaries (which
isn't officially supported), let prepare_retransmit callbacks deal with
modifying the binary as required.
---
 lib/diameter/src/base/diameter_traffic.erl | 86 +++++++++++++-----------------
 1 file changed, 36 insertions(+), 50 deletions(-)

diff --git a/lib/diameter/src/base/diameter_traffic.erl b/lib/diameter/src/base/diameter_traffic.erl
index 5d39c08213..ba446dacbb 100644
--- a/lib/diameter/src/base/diameter_traffic.erl
+++ b/lib/diameter/src/base/diameter_traffic.erl
@@ -125,8 +125,16 @@ peer_up(TPid) ->
 %% ---------------------------------------------------------------------------
 
 peer_down(TPid) ->
-    ets:delete(?REQUEST_TABLE, TPid),
-    failover(TPid).
+    ets:delete_object(?REQUEST_TABLE, {TPid}),
+    lists:foreach(fun failover/1, ets:lookup(?REQUEST_TABLE, TPid)).
+%% Note that a request process can store its request after failover
+%% notifications are sent here: insert_request/2 sends the notification
+%% in that case.
+
+%% failover/1
+
+failover({_TPid, {Pid, TRef}}) ->
+    Pid ! {failover, TRef}.
 
 %% ---------------------------------------------------------------------------
 %% incr/4
@@ -1450,7 +1458,7 @@ make_request_packet(#diameter_packet{header = Hdr} = Pkt,
 make_request_packet(Msg, Pkt) ->
     Pkt#diameter_packet{msg = Msg}.
 
-%% make_retransmit_packet/2
+%% make_retransmit_packet/1
 
 make_retransmit_packet(#diameter_packet{msg = [#diameter_header{} = Hdr
                                                | Avps]}
@@ -1704,16 +1712,13 @@ send_request(TPid, #diameter_packet{bin = Bin} = Pkt, Req, _SvcName, Timeout)
   when node() == node(TPid) ->
     Seqs = diameter_codec:sequence_numbers(Bin),
     TRef = erlang:start_timer(Timeout, self(), TPid),
-    Entry = {Seqs, Req, TRef},
+    Entry = {Seqs, #request{handler = Pid} = Req, TRef},
 
-    %% Ensure that request table is cleaned even if we receive an exit
-    %% signal. An alternative would be to simply trap exits, but
-    %% callbacks are applied in this process, and these could possibly
-    %% be expecting the prevailing behaviour.
-    Self = self(),
-    spawn(fun() -> diameter_lib:wait([Self]), erase_request(Entry) end),
+    %% Ensure that request table is cleaned even if the process is
+    %% killed.
+    spawn(fun() -> diameter_lib:wait([Pid]), delete_request(Entry) end),
 
-    store_request(Entry, TPid),
+    insert_request(Entry),
     send(TPid, Pkt),
     TRef;
 
@@ -1775,6 +1780,8 @@ retransmit({TPid, Caps, App}
                SvcName,
                Timeout,
                []).
+%% When sending a binary, it's up to prepare_retransmit to modify it
+%% accordingly.
 
 retransmit({send, Msg},
            Transport,
@@ -1819,15 +1826,21 @@ resend_request(Pkt0,
     TRef = send_request(TPid, Pkt, Req, SvcName, Tmo),
     {TRef, Req}.
 
-%% store_request/2
+%% insert_request/1
 
-store_request(T, TPid) ->
-    ets:insert(?REQUEST_TABLE, T),
-    ets:member(?REQUEST_TABLE, TPid)
-        orelse begin
-                   {_Seqs, _Req, TRef} = T,
-                   self() ! {failover, TRef}  %% failover/1 may have missed
-               end.
+insert_request({_Seqs, #request{transport = TPid}, TRef} = T) ->
+    ets:insert(?REQUEST_TABLE, [T, {TPid, {self(), TRef}}]),
+    is_peer_up(TPid)
+        orelse (self() ! {failover, TRef}).  %% failover/1 may have missed
+
+%% is_peer_up/1
+%%
+%% Is the entry written by peer_up/1 and deleted by peer_down/1 still
+%% in the request table?
+
+is_peer_up(TPid) ->
+    Spec = [{{TPid}, [], ['$_']}],
+    '$end_of_table' /= ets:select(?REQUEST_TABLE, Spec, 1).
 
 %% lookup_request/2
 %%
@@ -1847,16 +1860,11 @@ lookup_request(Msg, TPid) ->
             false
     end.
 
-%% erase_request/1
-
-erase_request(T) ->
-    ets:delete_object(?REQUEST_TABLE, T).
-
-%% match_requests/1
+%% delete_request/1
 
-match_requests(TPid) ->
-    Pat = {'_', #request{transport = TPid, _ = '_'}, '_'},
-    ets:select(?REQUEST_TABLE, [{Pat, [], ['$_']}]).
+delete_request({_Seqs, #request{handler = Pid, transport = TPid}, TRef} = T) ->
+    Spec = [{R, [], [true]} || R <- [T, {TPid, {Pid, TRef}}]],
+    ets:select_delete(?REQUEST_TABLE, Spec).
 
 %% have_request/2
 
@@ -1865,28 +1873,6 @@ have_request(Pkt, TPid) ->
     Pat = {Seqs, #request{transport = TPid, _ = '_'}, '_'},
     '$end_of_table' /= ets:select(?REQUEST_TABLE, [{Pat, [], ['$_']}], 1).
 
-%% ---------------------------------------------------------------------------
-%% # failover/1-2
-%% ---------------------------------------------------------------------------
-
-failover(TPid)
-  when is_pid(TPid) ->
-    lists:foreach(fun failover/1, match_requests(TPid));
-%% Note that a request process can store its request after failover
-%% notifications are sent here: store_request/2 sends the notification
-%% in that case.
-
-%% Failover as a consequence of peer_down/1: inform the
-%% request process.
-failover({_, Req, TRef}) ->
-    #request{handler = Pid,
-             packet = #diameter_packet{msg = M}}
-        = Req,
-    M /= undefined andalso (Pid ! {failover, TRef}).
-%% Failover is not performed when msg = binary() since sending
-%% pre-encoded binaries is only partially supported. (Mostly for
-%% test.)
-
 %% get_destination/2
 
 get_destination(Dict, Msg) ->
-- 
cgit v1.2.3