From 32a34109c6547c16a787e1788954b4dc2736976c Mon Sep 17 00:00:00 2001 From: Anders Svensson Date: Mon, 28 Jan 2013 18:29:43 +0100 Subject: Fix faulty watchdog transition DOWN -> INITIAL This was the result of the watchdog process exiting as a consequence of peer death in some casesi, causing a restarted transport to enter INITIAL when it should enter REOPEN. The watchdog now remains alive as long as peer shutdown isn't requested and a 'close' message to the service process (instead of watchdog death) generates 'closed' events from the service. --- lib/diameter/src/base/diameter_peer_fsm.erl | 57 ++++++++--------------------- 1 file changed, 16 insertions(+), 41 deletions(-) (limited to 'lib/diameter/src/base/diameter_peer_fsm.erl') diff --git a/lib/diameter/src/base/diameter_peer_fsm.erl b/lib/diameter/src/base/diameter_peer_fsm.erl index b515a599ed..d593d0ab84 100644 --- a/lib/diameter/src/base/diameter_peer_fsm.erl +++ b/lib/diameter/src/base/diameter_peer_fsm.erl @@ -126,12 +126,7 @@ %% State Machine rather than closer to the transport. This is what we %% now do below: connect/accept call diameter_watchdog and return the %% pid of the watchdog process, and the watchdog in turn calls start/3 -%% below to start the process implementing the Peer State Machine. The -%% former is a "peer" in diameter_service while the latter is a -%% "conn". In a sense, diameter_service sees the watchdog as -%% implementing the Peer State Machine and the process implemented -%% here as being the transport, not being aware of the watchdog at -%% all. +%% below to start the process implementing the Peer State Machine. %% %%% --------------------------------------------------------------------------- @@ -274,13 +269,12 @@ handle_info(T, #state{} = State) -> {noreply, S}; {stop, Reason} -> ?LOG(stop, Reason), - x(Reason, State); + {stop, {shutdown, Reason}, State}; stop -> ?LOG(stop, T), - x(T, State) + {stop, {shutdown, T}, State} catch exit: {diameter_codec, encode, _} = Reason -> - close_wd(Reason, State#state.parent), ?LOG(stop, Reason), %% diameter_codec:encode/2 emits an error report. Only %% indicate the probable reason here. @@ -300,10 +294,6 @@ handle_info(T, #state{} = State) -> %% succesfully encoded. It's not checked at diameter:add_transport/2 %% since this can be called before creating the service. -x(Reason, #state{} = S) -> - close_wd(Reason, S), - {stop, {shutdown, Reason}, S}. - %% terminate/2 terminate(_, _) -> @@ -378,9 +368,8 @@ transition({diameter, {recv, Pkt}}, S) -> recv(Pkt, S); %% Timeout when still in the same state ... -transition({timeout = T, PS}, #state{state = PS} = S) -> - close({capx(PS), T}, S), - stop; +transition({timeout = T, PS}, #state{state = PS}) -> + {stop, {capx(PS), T}}; %% ... or not. transition({timeout, _}, _) -> @@ -457,7 +446,7 @@ send_CER(#state{state = {'Wait-Conn-Ack', Tmo}, OH = LCaps#diameter_caps.origin_host, req_send_CER(OH, Remote) orelse - close({already_connected, Remote, LCaps}, S), + close({already_connected, Remote, LCaps}), CER = build_CER(S), ?LOG(send, 'CER'), #diameter_packet{header = #diameter_header{end_to_end_id = Eid, @@ -690,17 +679,17 @@ cea(CEA, RC) -> CEA#diameter_base_CEA{'Result-Code' = RC}. post('CER' = T, RC, Pkt, S) -> - [fun close/2, {T, caps(S), {RC, Pkt}}]; + [fun(_) -> close({T, caps(S), {RC, Pkt}}) end]; post(_, _, _, _) -> ok. rejected({capabilities_cb, _F, Reason}, T, S) -> rejected(Reason, T, S); -rejected(discard, T, S) -> - close(T, S); +rejected(discard, T, _) -> + close(T); rejected({N, Es}, T, S) -> - {answer('CER', N, Es, S), [fun close/2, T]}; + {answer('CER', N, Es, S), [fun(_) -> close(T) end]}; rejected(N, T, S) -> rejected({N, []}, T, S). @@ -860,7 +849,7 @@ handle_CEA(#diameter_packet{bin = Bin} of _ -> open(DPkt, SApps, Caps, {connect, hd([_] = IS)}, S) catch - ?FAILURE(Reason) -> close({'CEA', Reason, Caps, DPkt}, S) + ?FAILURE(Reason) -> close({'CEA', Reason, Caps, DPkt}) end. %% Check more than the result code since the peer could send success %% regardless. If not 2001 then a peer_up callback could do anything @@ -880,7 +869,7 @@ recv_CEA(#diameter_packet{header = #diameter_header{version T; recv_CEA(Pkt, S) -> - close({'CEA', caps(S), Pkt}, S). + close({'CEA', caps(S), Pkt}). caps(#diameter_service{capabilities = Caps}) -> Caps; @@ -933,14 +922,14 @@ open(Pkt, SupportedApps, Caps, {Type, IS}, #state{parent = Pid, %% We've advertised TLS support: tell the transport the result %% and expect a reply when the handshake is complete. -tls_ack(true, Caps, Type, IS, #state{transport = TPid} = S) -> +tls_ack(true, Caps, Type, IS, #state{transport = TPid}) -> Ref = make_ref(), TPid ! {diameter, {tls, Ref, Type, IS == ?TLS}}, receive {diameter, {tls, Ref}} -> ok; {'DOWN', _, process, TPid, Reason} -> - close({tls_ack, Reason, Caps}, S) + close({tls_ack, Reason, Caps}) end; %% Or not. Don't send anything to the transport so that transports @@ -953,25 +942,11 @@ capz(#diameter_caps{} = L, #diameter_caps{} = R) -> = list_to_tuple([diameter_caps | lists:zip(tl(tuple_to_list(L)), tl(tuple_to_list(R)))]). -%% close/2 +%% close/1 -%% Tell the watchdog that our death isn't due to transport failure. -close(Reason, #state{parent = Pid}) -> - close_wd(Reason, Pid), +close(Reason) -> throw({?MODULE, close, Reason}). -%% close_wd/2 - -%% Ensure the watchdog dies if DPR has been sent ... -close_wd(_, #state{dpr = false}) -> - ok; -close_wd(Reason, #state{parent = Pid}) -> - close_wd(Reason, Pid); - -%% ... or otherwise -close_wd(Reason, Pid) -> - Pid ! {close, self(), Reason}. - %% dwa/1 dwa(#diameter_caps{origin_host = OH, -- cgit v1.2.3