diff options
author | Christopher Faulet <[email protected]> | 2011-09-05 12:42:33 +0200 |
---|---|---|
committer | Henrik Nord <[email protected]> | 2011-09-16 14:54:44 +0200 |
commit | 47759479146ca11ad81eca0bb3236b265e20601d (patch) | |
tree | debf184d6518c4737a9d91867d1923d78c38d977 /lib | |
parent | 6a113a60dba9fd6c4d736b9e56c52f3494a15027 (diff) | |
download | otp-47759479146ca11ad81eca0bb3236b265e20601d.tar.gz otp-47759479146ca11ad81eca0bb3236b265e20601d.tar.bz2 otp-47759479146ca11ad81eca0bb3236b265e20601d.zip |
Explicitly kill dynamic children in supervisors
According to the supervisor's documentation:
"Important note on simple-one-for-one supervisors: The dynamically
created child processes of a simple-one-for-one supervisor are not
explicitly killed, regardless of shutdown strategy, but are expected
to terminate when the supervisor does (that is, when an exit signal
from the parent process is received)."
All is fine as long as we stop simple_one_for_one supervisor manually.
Dynamic children catch the exit signal from the supervisor and leave.
But, if this happens when we stop an application, after the top
supervisor has stopped, the application master kills all remaining
processes associated to this application. So, dynamic children that trap
exit signals can be killed during their cleanup (here we mean inside
terminate/2). This is unpredictable and highly time-dependent.
In this commit, supervisor module is patched to explicitly terminate
dynamic children accordingly to the shutdown strategy.
NOTE: Order in which dynamic children are stopped is not defined. In
fact, this is "almost" done at the same time.
Diffstat (limited to 'lib')
-rw-r--r-- | lib/stdlib/doc/src/supervisor.xml | 6 | ||||
-rw-r--r-- | lib/stdlib/src/supervisor.erl | 116 | ||||
-rw-r--r-- | lib/stdlib/test/Makefile | 1 | ||||
-rw-r--r-- | lib/stdlib/test/supervisor_2.erl | 42 | ||||
-rw-r--r-- | lib/stdlib/test/supervisor_SUITE.erl | 38 |
5 files changed, 190 insertions, 13 deletions
diff --git a/lib/stdlib/doc/src/supervisor.xml b/lib/stdlib/doc/src/supervisor.xml index edd119d37a..b4e81aba1f 100644 --- a/lib/stdlib/doc/src/supervisor.xml +++ b/lib/stdlib/doc/src/supervisor.xml @@ -171,12 +171,6 @@ child_spec() = {Id,StartFunc,Restart,Shutdown,Type,Modules} <p>If the child process is another supervisor, <c>Shutdown</c> should be set to <c>infinity</c> to give the subtree ample time to shutdown.</p> - <p><em>Important note on simple-one-for-one supervisors:</em> - The dynamically created child processes of a - simple-one-for-one supervisor are not explicitly killed, - regardless of shutdown strategy, but are expected to terminate - when the supervisor does (that is, when an exit signal from - the parent process is received).</p> <p>Note that all child processes implemented using the standard OTP behavior modules automatically adhere to the shutdown protocol.</p> diff --git a/lib/stdlib/src/supervisor.erl b/lib/stdlib/src/supervisor.erl index dc31647eb5..8e1ac1bb5c 100644 --- a/lib/stdlib/src/supervisor.erl +++ b/lib/stdlib/src/supervisor.erl @@ -519,9 +519,12 @@ handle_info(Msg, State) -> %% -spec terminate(term(), state()) -> 'ok'. +terminate(_Reason, #state{children=[Child]} = State) when ?is_simple(State) -> + terminate_dynamic_children(Child, dynamics_db(Child#child.restart_type, + State#state.dynamics), + State#state.name); terminate(_Reason, State) -> - terminate_children(State#state.children, State#state.name), - ok. + terminate_children(State#state.children, State#state.name). %% %% Change code for the supervisor. @@ -831,8 +834,113 @@ monitor_child(Pid) -> %% that will be handled in shutdown/2. ok end. - - + + +%%----------------------------------------------------------------- +%% Func: terminate_dynamic_children/3 +%% Args: Child = child_rec() +%% Dynamics = ?DICT() | ?SET() +%% SupName = {local, atom()} | {global, atom()} | {pid(),Mod} +%% Returns: ok +%% +%% +%% Shutdown all dynamic children. This happens when the supervisor is +%% stopped. Because the supervisor can have millions of dynamic children, we +%% can have an significative overhead here. +%%----------------------------------------------------------------- +terminate_dynamic_children(Child, Dynamics, SupName) -> + Pids = monitor_dynamic_children(Child, Dynamics, SupName), + Sz = ?SETS:size(Pids), + case Child#child.shutdown of + brutal_kill -> + ?SETS:fold(fun(P, _) -> exit(P, kill) end, ok, Pids), + wait_dynamic_children(Child, Pids, SupName, Sz, undefined); + infinity -> + ?SETS:fold(fun(P, _) -> exit(P, shutdown) end, ok, Pids), + wait_dynamic_children(Child, Pids, SupName, Sz, undefined); + Time -> + ?SETS:fold(fun(P, _) -> exit(P, shutdown) end, ok, Pids), + TRef = erlang:start_timer(Time, self(), kill), + wait_dynamic_children(Child, Pids, SupName, Sz, TRef) + end. + + +monitor_dynamic_children(#child{restart_type=temporary} = Child, + Dynamics, SupName) -> + ?SETS:fold(fun(P, Acc) -> + case monitor_child(P) of + ok -> + ?SETS:add_element(P, Acc); + {error, normal} -> + Acc; + {error, OtherReason} -> + report_error(shutdown_error, OtherReason, + Child#child{pid=P}, SupName), + Acc + end + end, ?SETS:new(), Dynamics); +monitor_dynamic_children(#child{restart_type=RType} = Child, + Dynamics, SupName) -> + ?DICT:fold(fun(P, _, Acc) -> + case monitor_child(P) of + ok -> + ?SETS:add_element(P, Acc); + {error, normal} when RType =/= permanent -> + Acc; + {error, OtherReason} -> + report_error(shutdown_error, OtherReason, + Child#child{pid=P}, SupName), + Acc + end + end, ?SETS:new(), Dynamics). + + + +wait_dynamic_children(_Child, _Pids, _SupName, 0, undefined) -> + ok; +wait_dynamic_children(_Child, _Pids, _SupName, 0, TRef) -> + %% If the timer has expired before its cancellation, we must empty the + %% mail-box of the 'timeout'-message. + erlang:cancel_timer(TRef), + receive + {timeout, TRef, kill} -> + ok + after 0 -> + ok + end; +wait_dynamic_children(#child{shutdown=brutal_kill} = Child, + Pids, SupName, Sz, TRef) -> + receive + {'DOWN', _MRef, process, Pid, killed} -> + wait_dynamic_children(Child, ?SETS:del_element(Pid, Pids), SupName, + Sz-1, TRef); + + {'DOWN', _MRef, process, Pid, Reason} -> + report_error(shutdown_error, Reason, Child#child{pid=Pid}, SupName), + wait_dynamic_children(Child, ?SETS:del_element(Pid, Pids), SupName, + Sz-1, TRef) + end; +wait_dynamic_children(#child{restart_type=RType} = Child, Pids, + SupName, Sz, TRef) -> + receive + {'DOWN', _MRef, process, Pid, shutdown} -> + wait_dynamic_children(Child, ?SETS:del_element(Pid, Pids), SupName, + Sz-1, TRef); + + {'DOWN', _MRef, process, Pid, normal} when RType =/= permanent -> + wait_dynamic_children(Child, ?SETS:del_element(Pid, Pids), SupName, + Sz-1, TRef); + + {'DOWN', _MRef, process, Pid, Reason} -> + report_error(shutdown_error, Reason, Child#child{pid=Pid}, SupName), + wait_dynamic_children(Child, ?SETS:del_element(Pid, Pids), SupName, + Sz-1, TRef); + + {timeout, TRef, kill} -> + ?SETS:fold(fun(P, _) -> exit(P, kill) end, ok, Pids), + wait_dynamic_children(Child, Pids, SupName, Sz, undefined) + end. + %%----------------------------------------------------------------- %% Child/State manipulating functions. %%----------------------------------------------------------------- diff --git a/lib/stdlib/test/Makefile b/lib/stdlib/test/Makefile index 5502c69fa5..aa6a660c34 100644 --- a/lib/stdlib/test/Makefile +++ b/lib/stdlib/test/Makefile @@ -65,6 +65,7 @@ MODULES= \ stdlib_SUITE \ string_SUITE \ supervisor_1 \ + supervisor_2 \ naughty_child \ shell_SUITE \ supervisor_SUITE \ diff --git a/lib/stdlib/test/supervisor_2.erl b/lib/stdlib/test/supervisor_2.erl new file mode 100644 index 0000000000..67aacf5a9c --- /dev/null +++ b/lib/stdlib/test/supervisor_2.erl @@ -0,0 +1,42 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 1996-2010. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%% Description: Simulates the behaviour that a child process may have. +%% Is used by the supervisor_SUITE test suite. +-module(supervisor_2). + +-export([start_child/1, init/1]). + +-export([handle_call/3, handle_info/2, terminate/2]). + +start_child(Time) when is_integer(Time), Time > 0 -> + gen_server:start_link(?MODULE, Time, []). + +init(Time) -> + process_flag(trap_exit, true), + {ok, Time}. + +handle_call(Req, _From, State) -> + {reply, Req, State}. + +handle_info(_, State) -> + {noreply, State}. + +terminate(_Reason, Time) -> + timer:sleep(Time), + ok. diff --git a/lib/stdlib/test/supervisor_SUITE.erl b/lib/stdlib/test/supervisor_SUITE.erl index b48450c151..a6358e7063 100644 --- a/lib/stdlib/test/supervisor_SUITE.erl +++ b/lib/stdlib/test/supervisor_SUITE.erl @@ -50,7 +50,7 @@ one_for_all_escalation/1, simple_one_for_one/1, simple_one_for_one_escalation/1, rest_for_one/1, rest_for_one_escalation/1, - simple_one_for_one_extra/1]). + simple_one_for_one_extra/1, simple_one_for_one_shutdown/1]). %% Misc tests -export([child_unlink/1, tree/1, count_children_memory/1, @@ -94,8 +94,8 @@ groups() -> {restart_one_for_all, [], [one_for_all, one_for_all_escalation]}, {restart_simple_one_for_one, [], - [simple_one_for_one, simple_one_for_one_extra, - simple_one_for_one_escalation]}, + [simple_one_for_one, simple_one_for_one_shutdown, + simple_one_for_one_extra, simple_one_for_one_escalation]}, {restart_rest_for_one, [], [rest_for_one, rest_for_one_escalation]}]. @@ -782,6 +782,38 @@ simple_one_for_one(Config) when is_list(Config) -> terminate(SupPid, Pid4, Id4, abnormal), check_exit([SupPid]). + +%%------------------------------------------------------------------------- +simple_one_for_one_shutdown(doc) -> + ["Test simple_one_for_one children shutdown accordingly to the " + "supervisor's shutdown strategy."]; +simple_one_for_one_shutdown(suite) -> []; +simple_one_for_one_shutdown(Config) when is_list(Config) -> + process_flag(trap_exit, true), + ShutdownTime = 1000, + Child = {child, {supervisor_2, start_child, []}, + permanent, 2*ShutdownTime, worker, []}, + {ok, SupPid} = start_link({ok, {{simple_one_for_one, 2, 3600}, [Child]}}), + + %% Will be gracefully shutdown + {ok, _CPid1} = supervisor:start_child(sup_test, [ShutdownTime]), + {ok, _CPid2} = supervisor:start_child(sup_test, [ShutdownTime]), + + %% Will be killed after 2*ShutdownTime milliseconds + {ok, _CPid3} = supervisor:start_child(sup_test, [5*ShutdownTime]), + + {T, ok} = timer:tc(fun terminate/2, [SupPid, shutdown]), + if T < 1000*ShutdownTime -> + %% Because supervisor's children wait before exiting, it can't + %% terminate quickly + test_server:fail({shutdown_too_short, T}); + T >= 1000*5*ShutdownTime -> + test_server:fail({shutdown_too_long, T}); + true -> + check_exit([SupPid]) + end. + + %%------------------------------------------------------------------------- simple_one_for_one_extra(doc) -> ["Tests automatic restart of children " |