diff options
Diffstat (limited to 'lib/diameter/src/base/diameter_service.erl')
-rw-r--r-- | lib/diameter/src/base/diameter_service.erl | 151 |
1 files changed, 113 insertions, 38 deletions
diff --git a/lib/diameter/src/base/diameter_service.erl b/lib/diameter/src/base/diameter_service.erl index 1274e0fc48..ab56ca9cef 100644 --- a/lib/diameter/src/base/diameter_service.erl +++ b/lib/diameter/src/base/diameter_service.erl @@ -499,9 +499,21 @@ transition(Req, S) -> %% # terminate/2 %% --------------------------------------------------------------------------- -terminate(Reason, #state{service_name = Name} = S) -> +terminate(Reason, #state{service_name = Name, peerT = PeerT} = S) -> send_event(Name, stop), ets:delete(?STATE_TABLE, Name), + + %% Communicate pending loss of any peers that connection_down/3 + %% won't. This is needed when stopping a service since we don't + %% wait for watchdog state changes to take care of if. That this + %% takes place after deleting the state entry ensures that the + %% resulting failover by request processes accomplishes nothing. + ets:foldl(fun(#peer{pid = TPid}, _) -> + diameter_traffic:peer_down(TPid) + end, + ok, + PeerT), + shutdown == Reason %% application shutdown andalso shutdown(application, S). @@ -719,14 +731,27 @@ remotes(F) -> L when is_list(L) -> L; T -> - diameter_lib:error_report({invalid_return, T}, F), + ?LOG(invalid_return, {F,T}), + error_report(invalid_return, share_peers, F), [] catch E:R -> - diameter_lib:error_report({failure, {E, R, ?STACK}}, F), + ?LOG(failure, {E, R, F, diameter_lib:get_stacktrace()}), + error_report(failure, share_peers, F), [] end. +%% error_report/3 + +error_report(T, What, F) -> + Reason = io_lib:format("~s from ~p callback", [reason(T), What]), + diameter_lib:error_report(Reason, diameter_lib:eval_name(F)). + +reason(invalid_return) -> + "invalid return"; +reason(failure) -> + "failure". + %% --------------------------------------------------------------------------- %% # start/3 %% --------------------------------------------------------------------------- @@ -869,7 +894,7 @@ watchdog(TPid, [], ?WD_OKAY, ?WD_SUSPECT = To, Wd, State) -> %% Watchdog has lost its connection. watchdog(TPid, [], _, ?WD_DOWN = To, Wd, #state{peerT = PeerT} = S) -> - close(Wd, S), + close(Wd), watchdog_down(Wd, To, S), ets:delete(PeerT, TPid); @@ -1026,8 +1051,11 @@ peer_cb(App, F, A) -> true catch E:R -> - diameter_lib:error_report({failure, {E, R, ?STACK}}, - {App, F, A}), + %% Don't include arguments since a #diameter_caps{} strings + %% from the peer, which could be anything (especially, large). + [Mod|X] = App#diameter_app.module, + ?LOG(failure, {E, R, Mod, F, diameter_lib:get_stacktrace()}), + error_report(failure, F, {Mod, F, A ++ X}), false end. @@ -1187,26 +1215,16 @@ tc(false = No, _, _) -> %% removed %% another watchdog to be able to detect that it should transition %% from initial into reopen rather than okay. That someone is either %% the accepting watchdog upon reception of a CER from the previously -%% connected peer, or us after connect_timer timeout. +%% connected peer, or us after connect_timer timeout or immediately. -close(#watchdog{type = connect}, _) -> +close(#watchdog{type = connect}) -> ok; + close(#watchdog{type = accept, pid = Pid, - ref = Ref, - options = Opts}, - #state{service_name = SvcName}) -> - c(Pid, diameter_config:have_transport(SvcName, Ref), Opts). - -%% Tell watchdog to (maybe) die later ... -c(Pid, true, Opts) -> + options = Opts}) -> Tc = connect_timer(Opts, 2*?DEFAULT_TC), - erlang:send_after(Tc, Pid, close); - -%% ... or now. -c(Pid, false, _Opts) -> - Pid ! close. - + erlang:send_after(Tc, Pid, close). %% The RFC's only document the behaviour of Tc, our connect_timer, %% for the establishment of connections but we also give %% connect_timer semantics for a listener, being the time within @@ -1260,13 +1278,14 @@ cm([#diameter_app{alias = Alias} = App], Req, From, Svc) -> mod_state(Alias, ModS), {T, RC}; T -> - diameter_lib:error_report({invalid, T}, - {App, handle_call, Args}), + ModX = App#diameter_app.module, + ?LOG(invalid_return, {ModX, handle_call, Args, T}), invalid catch E: Reason -> - diameter_lib:error_report({failure, {E, Reason, ?STACK}}, - {App, handle_call, Args}), + ModX = App#diameter_app.module, + Stack = diameter_lib:get_stacktrace(), + ?LOG(failure, {E, Reason, ModX, handle_call, Stack}), failure end; @@ -1389,6 +1408,8 @@ pick_peer(Local, Remote, Pid, _SvcName, #diameter_app{mutable = true} = App) case call_service(Pid, {pick_peer, Local, Remote, App}) of {TPid, _} = T when is_pid(TPid) -> T; + false = No -> + No; {error, _} -> false end; @@ -1422,13 +1443,16 @@ pick_peer(Local, T; %% Accept returned state in the immutable {false = No, S} -> %% case as long it isn't changed. No; - T -> - diameter_lib:error_report({invalid, T, App}, - {App, pick_peer, Args}) + T when M -> + ModX = App#diameter_app.module, + ?LOG(invalid_return, {ModX, pick_peer, T}), + false catch - E: Reason -> - diameter_lib:error_report({failure, {E, Reason, ?STACK}}, - {App, pick_peer, Args}) + E: Reason when M -> + ModX = App#diameter_app.module, + Stack = diameter_lib:get_stacktrace(), + ?LOG(failure, {E, Reason, ModX, pick_peer, Stack}), + false end. %% peers/4 @@ -1549,7 +1573,8 @@ transports(#state{watchdogT = WatchdogT}) -> -define(OTHER_INFO, [connections, name, peers, - statistics]). + statistics, + info]). service_info(Item, S) when is_atom(Item) -> @@ -1639,6 +1664,7 @@ complete_info(Item, #state{service = Svc} = S) -> keys -> ?ALL_INFO ++ ?CAP_INFO ++ ?OTHER_INFO; all -> service_info(?ALL_INFO, S); statistics -> info_stats(S); + info -> info_info(S); connections -> info_connections(S); peers -> info_peers(S) end. @@ -1721,12 +1747,11 @@ peer_acc(PeerT, Acc, #watchdog{pid = Pid, state = WS, started = At, peer = TPid}) -> - dict:append(Ref, - [{type, Type}, - {options, Opts}, - {watchdog, {Pid, At, WS}} - | info_peer(PeerT, TPid, WS)], - Acc). + Info = [{type, Type}, + {options, Opts}, + {watchdog, {Pid, At, WS}} + | info_peer(PeerT, TPid, WS)], + dict:append(Ref, Info ++ [{info, info_process_info(Info)}], Acc). info_peer(PeerT, TPid, WS) when is_pid(TPid), WS /= ?WD_DOWN -> @@ -1738,6 +1763,49 @@ info_peer(PeerT, TPid, WS) info_peer(_, _, _) -> []. +info_process_info(Info) -> + lists:flatmap(fun ipi/1, Info). + +ipi({watchdog, {Pid, _, _}}) -> + info_pid(Pid); + +ipi({peer, {Pid, _}}) -> + info_pid(Pid); + +ipi({port, [{owner, Pid} | _]}) -> + info_pid(Pid); + +ipi(_) -> + []. + +info_pid(Pid) -> + case process_info(Pid, [message_queue_len, memory, binary]) of + undefined -> + []; + L -> + [{Pid, lists:map(fun({K,V}) -> {K, map_info(K,V)} end, L)}] + end. + +%% The binary list consists of 3-tuples {Ptr, Size, Count}, where Ptr +%% is a C pointer value, Size is the size of a referenced binary in +%% bytes, and Count is a global reference count. The same Ptr can +%% occur multiple times, once for each reference on the process heap. +%% In this case, the corresponding tuples will have Size in common but +%% Count may differ just because no global lock is taken when the +%% value is retrieved. +%% +%% The list can be quite large, and we aren't often interested in the +%% pointers or counts, so whittle this down to the number of binaries +%% referenced and their total byte count. +map_info(binary, L) -> + SzD = lists:foldl(fun({P,S,_}, D) -> dict:store(P,S,D) end, + dict:new(), + L), + {dict:size(SzD), dict:fold(fun(_,S,N) -> S + N end, 0, SzD)}; + +map_info(_, T) -> + T. + %% The point of extracting the config here is so that 'transport' info %% has one entry for each transport ref, the peer table only %% containing entries that have a living watchdog. @@ -1795,6 +1863,13 @@ mk_app(#diameter_app{} = A) -> info_pending(#state{} = S) -> diameter_traffic:pending(transports(S)). +%% info_info/1 +%% +%% Extract process_info from connections info. + +info_info(S) -> + [I || L <- conn_list(S), {info, I} <- L]. + %% info_connections/1 %% %% One entry per transport connection. Statistics for each entry are |