From b1cdee0b11c6fde89ba586b1c04724daf4a96a07 Mon Sep 17 00:00:00 2001 From: Anders Svensson Date: Wed, 22 Aug 2012 16:45:31 +0200 Subject: Add events for watchdog state transitions --- lib/diameter/doc/src/diameter.xml | 13 ++++++++ lib/diameter/src/base/diameter_service.erl | 8 +++++ lib/diameter/src/base/diameter_watchdog.erl | 48 +++++++++++++++++++++++------ 3 files changed, 60 insertions(+), 9 deletions(-) (limited to 'lib') diff --git a/lib/diameter/doc/src/diameter.xml b/lib/diameter/doc/src/diameter.xml index 93e2603c10..4dcbac4889 100644 --- a/lib/diameter/doc/src/diameter.xml +++ b/lib/diameter/doc/src/diameter.xml @@ -692,6 +692,19 @@ The packet record contains the CEA in question.

+{watchdog, Ref, PeerRef, {From, To}, Config} + + +Ref = transport_ref() +PeerRef = diameter_app:peer_ref() +From, To = initial | okay | suspect | down | reopen +Config = {connect|listen, [transport_opt()]} + + +

+An RFC 3539 watchdog state machine has changed state.

+
+

diff --git a/lib/diameter/src/base/diameter_service.erl b/lib/diameter/src/base/diameter_service.erl index 3dfdcee2b2..c9c3179630 100644 --- a/lib/diameter/src/base/diameter_service.erl +++ b/lib/diameter/src/base/diameter_service.erl @@ -516,6 +516,14 @@ transition({reconnect, Pid}, S) -> reconnect(Pid, S), ok; +%% Watchdog is sending notification of a state transition. +transition({watchdog, Pid, {TPid, From, To}}, #state{service_name = SvcName, + peerT = PeerT}) -> + #peer{ref = Ref, type = T, options = Opts} + = fetch(PeerT, Pid), + send_event(SvcName, {watchdog, Ref, TPid, {From, To}, {T, Opts}}), + ok; + %% Monitor process has died. Just die with a reason that tells %% diameter_config about the happening. If a cleaner shutdown is %% required then someone should stop us. diff --git a/lib/diameter/src/base/diameter_watchdog.erl b/lib/diameter/src/base/diameter_watchdog.erl index fb22fd8275..b615bed860 100644 --- a/lib/diameter/src/base/diameter_watchdog.erl +++ b/lib/diameter/src/base/diameter_watchdog.erl @@ -1,7 +1,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 2010-2011. All Rights Reserved. +%% Copyright Ericsson AB 2010-2012. All Rights Reserved. %% %% The contents of this file are subject to the Erlang Public License, %% Version 1.1, (the "License"); you may not use this file except in @@ -54,7 +54,7 @@ %% number of DWAs received during reopen %% end PCB parent = self() :: pid(), - transport :: pid(), + transport :: pid() | undefined, tref :: reference(), %% reference for current watchdog timer message_data}). %% term passed into diameter_service with message @@ -64,6 +64,14 @@ %% that a failed capabilities exchange produces the desired exit %% reason. +-spec start(Type, {RecvData, Opts, SvcName, Svc}) + -> pid() + when Type :: {connect|accept, reference()}, + RecvData :: term(), + Opts :: list(), + SvcName :: term(), + Svc :: #diameter_service{}. + start({_,_} = Type, T) -> Ref = make_ref(), {ok, Pid} = diameter_watchdog_sup:start_child({Ref, {Type, self(), T}}), @@ -102,7 +110,7 @@ i({_, Pid, _} = T) -> %% from old code erlang:monitor(process, Pid), make_state(T). -make_state({T, Pid, {ConnT, +make_state({T, Pid, {RecvData, Opts, SvcName, #diameter_service{applications = Apps, @@ -116,7 +124,7 @@ make_state({T, Pid, {ConnT, tw = proplists:get_value(watchdog_timer, Opts, ?DEFAULT_TW_INIT), - message_data = {ConnT, SvcName, Apps}}. + message_data = {RecvData, SvcName, Apps}}. %% handle_call/3 @@ -134,14 +142,36 @@ handle_info(T, State) -> case transition(T, State) of ok -> {noreply, State}; - #watchdog{status = X} = S -> - ?LOGC(X =/= State#watchdog.status, transition, X), + #watchdog{} = S -> + event(State, S), {noreply, S}; stop -> ?LOG(stop, T), + event(State, State#watchdog{status = down}), {stop, {shutdown, T}, State} end. +event(#watchdog{status = T}, #watchdog{status = T}) -> + ok; + +event(#watchdog{transport = undefined}, #watchdog{transport = undefined}) -> + ok; + +event(#watchdog{status = From, transport = F, parent = Pid}, + #watchdog{status = To, transport = T}) -> + E = {tpid(F,T), From, To}, + notify(Pid, E), + ?LOG(transition, {self(), E}). + +tpid(_, Pid) + when is_pid(Pid) -> + Pid; +tpid(Pid, _) -> + Pid. + +notify(Pid, E) -> + Pid ! {watchdog, self(), E}. + %% terminate/2 terminate(_, _) -> @@ -251,8 +281,8 @@ transition({'DOWN', _, process, TPid, _}, status = initial}) -> stop; -transition({'DOWN', _, process, Pid, _}, - #watchdog{transport = Pid} +transition({'DOWN', _, process, TPid, _}, + #watchdog{transport = TPid} = S) -> failover(S), close(S), @@ -385,7 +415,7 @@ recv(Name, Pkt, S) -> rcv(Name, Pkt, S), NS catch - throw: {?MODULE, throwaway, #watchdog{} = NS} -> + {?MODULE, throwaway, #watchdog{} = NS} -> NS end. -- cgit v1.2.3 From 26721653adec1e9d4a1032e766f8243085687bf3 Mon Sep 17 00:00:00 2001 From: Anders Svensson Date: Fri, 24 Aug 2012 23:59:19 +0200 Subject: Maintain watchdog states in service_info --- lib/diameter/src/base/diameter_service.erl | 89 ++++++++++++++++++++++++------ 1 file changed, 73 insertions(+), 16 deletions(-) (limited to 'lib') diff --git a/lib/diameter/src/base/diameter_service.erl b/lib/diameter/src/base/diameter_service.erl index c9c3179630..0bba8117a5 100644 --- a/lib/diameter/src/base/diameter_service.erl +++ b/lib/diameter/src/base/diameter_service.erl @@ -1,7 +1,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 2010-2011. All Rights Reserved. +%% Copyright Ericsson AB 2010-2012. All Rights Reserved. %% %% The contents of this file are subject to the Erlang Public License, %% Version 1.1, (the "License"); you may not use this file except in @@ -65,9 +65,27 @@ -include_lib("diameter/include/diameter.hrl"). -include("diameter_internal.hrl"). +%% The "old" states maintained in this module historically. -define(STATE_UP, up). -define(STATE_DOWN, down). +-type op_state() :: ?STATE_UP + | ?STATE_DOWN. + +%% The RFC 3539 watchdog states that are now maintained, albeit +%% along with the old up/down. okay = up, else down. +-define(WD_INITIAL, initial). +-define(WD_OKAY, okay). +-define(WD_SUSPECT, suspect). +-define(WD_DOWN, down). +-define(WD_REOPEN, reopen). + +-type wd_state() :: ?WD_INITIAL + | ?WD_OKAY + | ?WD_SUSPECT + | ?WD_DOWN + | ?WD_REOPEN. + -define(DEFAULT_TC, 30000). %% RFC 3588 ch 2.1 -define(DEFAULT_TIMEOUT, 5000). %% for outgoing requests -define(RESTART_TC, 1000). %% if restart was this recent @@ -117,7 +135,8 @@ type :: match(connect | accept), ref :: match(reference()), %% key into diameter_config options :: match([diameter:transport_opt()]),%% from start_transport - op_state = ?STATE_DOWN :: match(?STATE_DOWN | ?STATE_UP), + op_state = {?STATE_DOWN, ?WD_INITIAL} + :: match(op_state() | {op_state(), wd_state()}), started = now(), %% at process start conn = false :: match(boolean() | pid())}). %% true at accept, pid() at connection_up (connT key) @@ -516,13 +535,33 @@ transition({reconnect, Pid}, S) -> reconnect(Pid, S), ok; -%% Watchdog is sending notification of a state transition. +%% Watchdog is sending notification of a state transition. Note that +%% the connection_up/down messages are pre-date this message and are +%% still used. A 'watchdog' message will follow these and communicate +%% the same state as was set in handling connection_up/down. transition({watchdog, Pid, {TPid, From, To}}, #state{service_name = SvcName, peerT = PeerT}) -> - #peer{ref = Ref, type = T, options = Opts} + #peer{ref = Ref, type = T, options = Opts, op_state = {OS,_}} + = P = fetch(PeerT, Pid), + insert(PeerT, P#peer{op_state = {OS, To}}), send_event(SvcName, {watchdog, Ref, TPid, {From, To}, {T, Opts}}), ok; +%% Death of a peer process results in the removal of it's peer and any +%% associated conn record when 'DOWN' is received (after this) but the +%% states will be {?STATE_UP, ?WD_DOWN} for a short time. (No real +%% problem since ?WD_* is only used in service_info.) We set ?WD_OKAY +%% as a consequence of connection_up since we know a watchdog is +%% coming. We can't set anything at connection_down since we don't +%% know if the subsequent watchdog message will be ?WD_DOWN or +%% ?WD_SUSPECT. We don't (yet) set ?STATE_* as a consequence of a +%% watchdog message since this requires changing some of the matching +%% on ?STATE_*. +%% +%% Death of a conn process results in connection_down followed by +%% watchdog ?WD_DOWN. The latter doesn't result in the conn record +%% being deleted since 'DOWN' from death of its peer doesn't (yet) +%% deal with the record having been removed. %% Monitor process has died. Just die with a reason that tells %% diameter_config about the happening. If a cleaner shutdown is @@ -887,7 +926,14 @@ accepted(Pid, _TPid, #state{peerT = PeerT} = S) -> fetch(Tid, Key) -> [T] = ets:lookup(Tid, Key), - T. + case T of + #peer{op_state = ?STATE_UP} = P -> + P#peer{op_state = {?STATE_UP, ?WD_OKAY}}; + #peer{op_state = ?STATE_DOWN} = P -> + P#peer{op_state = {?STATE_DOWN, ?WD_DOWN}}; + _ -> + T + end. %%% --------------------------------------------------------------------------- %%% # connection_up/3 @@ -933,12 +979,12 @@ connection_up(T, P, C, #state{peerT = PeerT, service = #diameter_service{applications = Apps}} = S) -> - #peer{conn = TPid, op_state = ?STATE_DOWN} + #peer{conn = TPid, op_state = {?STATE_DOWN, _}} = P, #conn{apps = SApps, caps = Caps} = C, - insert(PeerT, P#peer{op_state = ?STATE_UP}), + insert(PeerT, P#peer{op_state = {?STATE_UP, ?WD_OKAY}}), request_peer_up(TPid), report_status(up, P, C, S, T), @@ -987,22 +1033,22 @@ peer_cb(MFA, Alias) -> connection_down(Pid, #state{peerT = PeerT, connT = ConnT} = S) -> - #peer{op_state = ?STATE_UP, %% assert + #peer{op_state = {?STATE_UP, WS}, %% assert conn = TPid} = P = fetch(PeerT, Pid), C = fetch(ConnT, TPid), - insert(PeerT, P#peer{op_state = ?STATE_DOWN}), + insert(PeerT, P#peer{op_state = {?STATE_DOWN, WS}}), connection_down(P,C,S). %% connection_down/3 -connection_down(#peer{op_state = ?STATE_DOWN}, _, S) -> +connection_down(#peer{op_state = {?STATE_DOWN, _}}, _, S) -> S; connection_down(#peer{conn = TPid, - op_state = ?STATE_UP} + op_state = {?STATE_UP, _}} = P, #conn{caps = Caps, apps = SApps} @@ -1051,7 +1097,7 @@ peer_down(Pid, Reason, #state{peerT = PeerT} = S) -> %% Send an event at connection establishment failure. closed({shutdown, {close, _TPid, Reason}}, - #peer{op_state = ?STATE_DOWN, + #peer{op_state = {?STATE_DOWN, _}, ref = Ref, type = Type, options = Opts}, @@ -2858,15 +2904,26 @@ it_acc(ConnT, Acc, #peer{pid = Pid, op_state = OS, started = T, conn = TPid}) -> + WS = wd_state(OS), dict:append(Ref, [{type, Type}, {options, Opts}, - {watchdog, {Pid, T, OS}} - | info_conn(ConnT, TPid)], + {watchdog, {Pid, T, WS}} + | info_conn(ConnT, TPid, WS /= ?WD_DOWN)], Acc). -info_conn(ConnT, TPid) -> - info_conn(ets:lookup(ConnT, TPid)). +info_conn(ConnT, TPid, true) + when is_pid(TPid) -> + info_conn(ets:lookup(ConnT, TPid)); +info_conn(_, _, _) -> + []. + +wd_state({_,S}) -> + S; +wd_state(?STATE_UP) -> + ?WD_OKAY; +wd_state(?STATE_DOWN) -> + ?WD_DOWN. info_conn([#conn{pid = Pid, apps = SApps, caps = Caps, started = T}]) -> [{peer, {Pid, T}}, -- cgit v1.2.3 From 92813e780248755e720eda0749aa6232bae8724e Mon Sep 17 00:00:00 2001 From: Anders Svensson Date: Sun, 26 Aug 2012 21:32:11 +0200 Subject: Minor spec fix --- lib/diameter/src/base/diameter_watchdog.erl | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'lib') diff --git a/lib/diameter/src/base/diameter_watchdog.erl b/lib/diameter/src/base/diameter_watchdog.erl index b615bed860..d7474e5c56 100644 --- a/lib/diameter/src/base/diameter_watchdog.erl +++ b/lib/diameter/src/base/diameter_watchdog.erl @@ -64,13 +64,12 @@ %% that a failed capabilities exchange produces the desired exit %% reason. --spec start(Type, {RecvData, Opts, SvcName, Svc}) - -> pid() - when Type :: {connect|accept, reference()}, +-spec start(Type, {RecvData, [Opt], SvcName, #diameter_service{}}) + -> {reference(), pid()} + when Type :: {connect|accept, diameter:transport_ref()}, RecvData :: term(), - Opts :: list(), - SvcName :: term(), - Svc :: #diameter_service{}. + Opt :: diameter:transport_opt(), + SvcName :: diameter:service_name(). start({_,_} = Type, T) -> Ref = make_ref(), -- cgit v1.2.3