aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAnders Svensson <[email protected]>2015-07-18 14:04:28 +0200
committerAnders Svensson <[email protected]>2015-07-19 11:08:21 +0200
commit4f365c072b6df771004b388dd7e66f08e37ac5e7 (patch)
treec3fe3f7437c0a91c1ee4126aaf4608507ee39835
parent862af31d8a91b56711e3da554bb08247b7ee43cd (diff)
downloadotp-4f365c072b6df771004b388dd7e66f08e37ac5e7.tar.gz
otp-4f365c072b6df771004b388dd7e66f08e37ac5e7.tar.bz2
otp-4f365c072b6df771004b388dd7e66f08e37ac5e7.zip
Don't start watchdog timers unnecessarily
In particular, restart the timer with each incoming Diameter message, only when the previous timer has expired. Doing so has been seen to result in high lock contention at load, as in the example below: (diameter@test)9> lcnt:conflicts([{print, [name, tries, ratio, time]}]). lock #tries collisions [%] time [us] ----- ------- --------------- ---------- bif_timers 7844528 99.4729 1394434884 db_tab 17240988 1.7947 6286664 timeofday 7358692 5.6729 1399624 proc_link 4814938 2.2736 482985 drv_ev_state 2324012 0.5951 98920 run_queue 21768213 0.2091 63516 pollset 1190174 1.7170 42499 pix_lock 1956 2.5562 39770 make_ref 4697067 0.3669 20211 proc_msgq 9475944 0.0295 5200 timer_wheel 5325966 0.0568 2654 proc_main 10005332 2.8190 1079 pollset_rm_list 59768 1.7752 480
-rw-r--r--lib/diameter/src/base/diameter_watchdog.erl41
1 files changed, 27 insertions, 14 deletions
diff --git a/lib/diameter/src/base/diameter_watchdog.erl b/lib/diameter/src/base/diameter_watchdog.erl
index 9844f58ab2..0607c72818 100644
--- a/lib/diameter/src/base/diameter_watchdog.erl
+++ b/lib/diameter/src/base/diameter_watchdog.erl
@@ -65,7 +65,10 @@
%% end PCB
parent = self() :: pid(), %% service process
transport :: pid() | undefined, %% peer_fsm process
- tref :: reference(), %% reference for current watchdog timer
+ tref :: reference() %% reference for current watchdog timer
+ | integer() %% monotonic time
+ | tuple() %% now()
+ | undefined,
dictionary :: module(), %% common dictionary
receive_data :: term(),
%% term passed into diameter_service with incoming message
@@ -446,11 +449,12 @@ transition({recv, TPid, Name, Pkt}, #watchdog{transport = TPid} = S) ->
%% Current watchdog has timed out.
transition({timeout, TRef, tw}, #watchdog{tref = TRef} = S) ->
- set_watchdog(timeout(S));
+ set_watchdog(0, timeout(S));
-%% Timer was canceled after message was already sent.
-transition({timeout, _, tw}, #watchdog{}) ->
- ok;
+%% Message has arrived since the timer was started: subtract time
+%% already elapsed from new timer.
+transition({timeout, _, tw}, #watchdog{tref = T0} = S) ->
+ set_watchdog(diameter_lib:micro_diff(T0) div 1000, S);
%% State query.
transition({state, Pid}, #watchdog{status = S}) ->
@@ -526,18 +530,27 @@ role() ->
%% set_watchdog/1
-set_watchdog(#watchdog{tw = TwInit,
- tref = TRef}
- = S) ->
- cancel(TRef),
- S#watchdog{tref = erlang:start_timer(tw(TwInit), self(), tw)};
+%% Timer not yet set.
+set_watchdog(#watchdog{tref = undefined} = S) ->
+ set_watchdog(0, S);
+
+%% Timer already set: start at new one only at expiry.
+set_watchdog(#watchdog{} = S) ->
+ S#watchdog{tref = diameter_lib:now()};
+
set_watchdog(stop = No) ->
No.
-cancel(undefined) ->
- ok;
-cancel(TRef) ->
- erlang:cancel_timer(TRef).
+%% set_watchdog/2
+
+set_watchdog(Ms, #watchdog{tw = TwInit} = S) ->
+ S#watchdog{tref = erlang:start_timer(tw(TwInit, Ms), self(), tw)}.
+
+%% A callback could return anything, so ensure the result isn't
+%% negative. Don't prevent abuse, even though the smallest valid
+%% timeout is 4000.
+tw(TwInit, Ms) ->
+ max(tw(TwInit) - Ms, 0).
tw(T)
when is_integer(T), T >= 6000 ->