diff options
author | Christopher Faulet <[email protected]> | 2011-09-05 12:42:33 +0200 |
---|---|---|
committer | Henrik Nord <[email protected]> | 2011-09-16 14:54:44 +0200 |
commit | 47759479146ca11ad81eca0bb3236b265e20601d (patch) | |
tree | debf184d6518c4737a9d91867d1923d78c38d977 /lib/stdlib/src | |
parent | 6a113a60dba9fd6c4d736b9e56c52f3494a15027 (diff) | |
download | otp-47759479146ca11ad81eca0bb3236b265e20601d.tar.gz otp-47759479146ca11ad81eca0bb3236b265e20601d.tar.bz2 otp-47759479146ca11ad81eca0bb3236b265e20601d.zip |
Explicitly kill dynamic children in supervisors
According to the supervisor's documentation:
"Important note on simple-one-for-one supervisors: The dynamically
created child processes of a simple-one-for-one supervisor are not
explicitly killed, regardless of shutdown strategy, but are expected
to terminate when the supervisor does (that is, when an exit signal
from the parent process is received)."
All is fine as long as we stop simple_one_for_one supervisor manually.
Dynamic children catch the exit signal from the supervisor and leave.
But, if this happens when we stop an application, after the top
supervisor has stopped, the application master kills all remaining
processes associated to this application. So, dynamic children that trap
exit signals can be killed during their cleanup (here we mean inside
terminate/2). This is unpredictable and highly time-dependent.
In this commit, supervisor module is patched to explicitly terminate
dynamic children accordingly to the shutdown strategy.
NOTE: Order in which dynamic children are stopped is not defined. In
fact, this is "almost" done at the same time.
Diffstat (limited to 'lib/stdlib/src')
-rw-r--r-- | lib/stdlib/src/supervisor.erl | 116 |
1 files changed, 112 insertions, 4 deletions
diff --git a/lib/stdlib/src/supervisor.erl b/lib/stdlib/src/supervisor.erl index dc31647eb5..8e1ac1bb5c 100644 --- a/lib/stdlib/src/supervisor.erl +++ b/lib/stdlib/src/supervisor.erl @@ -519,9 +519,12 @@ handle_info(Msg, State) -> %% -spec terminate(term(), state()) -> 'ok'. +terminate(_Reason, #state{children=[Child]} = State) when ?is_simple(State) -> + terminate_dynamic_children(Child, dynamics_db(Child#child.restart_type, + State#state.dynamics), + State#state.name); terminate(_Reason, State) -> - terminate_children(State#state.children, State#state.name), - ok. + terminate_children(State#state.children, State#state.name). %% %% Change code for the supervisor. @@ -831,8 +834,113 @@ monitor_child(Pid) -> %% that will be handled in shutdown/2. ok end. - - + + +%%----------------------------------------------------------------- +%% Func: terminate_dynamic_children/3 +%% Args: Child = child_rec() +%% Dynamics = ?DICT() | ?SET() +%% SupName = {local, atom()} | {global, atom()} | {pid(),Mod} +%% Returns: ok +%% +%% +%% Shutdown all dynamic children. This happens when the supervisor is +%% stopped. Because the supervisor can have millions of dynamic children, we +%% can have an significative overhead here. +%%----------------------------------------------------------------- +terminate_dynamic_children(Child, Dynamics, SupName) -> + Pids = monitor_dynamic_children(Child, Dynamics, SupName), + Sz = ?SETS:size(Pids), + case Child#child.shutdown of + brutal_kill -> + ?SETS:fold(fun(P, _) -> exit(P, kill) end, ok, Pids), + wait_dynamic_children(Child, Pids, SupName, Sz, undefined); + infinity -> + ?SETS:fold(fun(P, _) -> exit(P, shutdown) end, ok, Pids), + wait_dynamic_children(Child, Pids, SupName, Sz, undefined); + Time -> + ?SETS:fold(fun(P, _) -> exit(P, shutdown) end, ok, Pids), + TRef = erlang:start_timer(Time, self(), kill), + wait_dynamic_children(Child, Pids, SupName, Sz, TRef) + end. + + +monitor_dynamic_children(#child{restart_type=temporary} = Child, + Dynamics, SupName) -> + ?SETS:fold(fun(P, Acc) -> + case monitor_child(P) of + ok -> + ?SETS:add_element(P, Acc); + {error, normal} -> + Acc; + {error, OtherReason} -> + report_error(shutdown_error, OtherReason, + Child#child{pid=P}, SupName), + Acc + end + end, ?SETS:new(), Dynamics); +monitor_dynamic_children(#child{restart_type=RType} = Child, + Dynamics, SupName) -> + ?DICT:fold(fun(P, _, Acc) -> + case monitor_child(P) of + ok -> + ?SETS:add_element(P, Acc); + {error, normal} when RType =/= permanent -> + Acc; + {error, OtherReason} -> + report_error(shutdown_error, OtherReason, + Child#child{pid=P}, SupName), + Acc + end + end, ?SETS:new(), Dynamics). + + + +wait_dynamic_children(_Child, _Pids, _SupName, 0, undefined) -> + ok; +wait_dynamic_children(_Child, _Pids, _SupName, 0, TRef) -> + %% If the timer has expired before its cancellation, we must empty the + %% mail-box of the 'timeout'-message. + erlang:cancel_timer(TRef), + receive + {timeout, TRef, kill} -> + ok + after 0 -> + ok + end; +wait_dynamic_children(#child{shutdown=brutal_kill} = Child, + Pids, SupName, Sz, TRef) -> + receive + {'DOWN', _MRef, process, Pid, killed} -> + wait_dynamic_children(Child, ?SETS:del_element(Pid, Pids), SupName, + Sz-1, TRef); + + {'DOWN', _MRef, process, Pid, Reason} -> + report_error(shutdown_error, Reason, Child#child{pid=Pid}, SupName), + wait_dynamic_children(Child, ?SETS:del_element(Pid, Pids), SupName, + Sz-1, TRef) + end; +wait_dynamic_children(#child{restart_type=RType} = Child, Pids, + SupName, Sz, TRef) -> + receive + {'DOWN', _MRef, process, Pid, shutdown} -> + wait_dynamic_children(Child, ?SETS:del_element(Pid, Pids), SupName, + Sz-1, TRef); + + {'DOWN', _MRef, process, Pid, normal} when RType =/= permanent -> + wait_dynamic_children(Child, ?SETS:del_element(Pid, Pids), SupName, + Sz-1, TRef); + + {'DOWN', _MRef, process, Pid, Reason} -> + report_error(shutdown_error, Reason, Child#child{pid=Pid}, SupName), + wait_dynamic_children(Child, ?SETS:del_element(Pid, Pids), SupName, + Sz-1, TRef); + + {timeout, TRef, kill} -> + ?SETS:fold(fun(P, _) -> exit(P, kill) end, ok, Pids), + wait_dynamic_children(Child, Pids, SupName, Sz, undefined) + end. + %%----------------------------------------------------------------- %% Child/State manipulating functions. %%----------------------------------------------------------------- |