aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChristopher Faulet <[email protected]>2011-09-05 12:42:33 +0200
committerHenrik Nord <[email protected]>2011-09-16 14:54:44 +0200
commit47759479146ca11ad81eca0bb3236b265e20601d (patch)
treedebf184d6518c4737a9d91867d1923d78c38d977
parent6a113a60dba9fd6c4d736b9e56c52f3494a15027 (diff)
downloadotp-47759479146ca11ad81eca0bb3236b265e20601d.tar.gz
otp-47759479146ca11ad81eca0bb3236b265e20601d.tar.bz2
otp-47759479146ca11ad81eca0bb3236b265e20601d.zip
Explicitly kill dynamic children in supervisors
According to the supervisor's documentation: "Important note on simple-one-for-one supervisors: The dynamically created child processes of a simple-one-for-one supervisor are not explicitly killed, regardless of shutdown strategy, but are expected to terminate when the supervisor does (that is, when an exit signal from the parent process is received)." All is fine as long as we stop simple_one_for_one supervisor manually. Dynamic children catch the exit signal from the supervisor and leave. But, if this happens when we stop an application, after the top supervisor has stopped, the application master kills all remaining processes associated to this application. So, dynamic children that trap exit signals can be killed during their cleanup (here we mean inside terminate/2). This is unpredictable and highly time-dependent. In this commit, supervisor module is patched to explicitly terminate dynamic children accordingly to the shutdown strategy. NOTE: Order in which dynamic children are stopped is not defined. In fact, this is "almost" done at the same time.
-rw-r--r--lib/stdlib/doc/src/supervisor.xml6
-rw-r--r--lib/stdlib/src/supervisor.erl116
-rw-r--r--lib/stdlib/test/Makefile1
-rw-r--r--lib/stdlib/test/supervisor_2.erl42
-rw-r--r--lib/stdlib/test/supervisor_SUITE.erl38
5 files changed, 190 insertions, 13 deletions
diff --git a/lib/stdlib/doc/src/supervisor.xml b/lib/stdlib/doc/src/supervisor.xml
index edd119d37a..b4e81aba1f 100644
--- a/lib/stdlib/doc/src/supervisor.xml
+++ b/lib/stdlib/doc/src/supervisor.xml
@@ -171,12 +171,6 @@ child_spec() = {Id,StartFunc,Restart,Shutdown,Type,Modules}
<p>If the child process is another supervisor, <c>Shutdown</c>
should be set to <c>infinity</c> to give the subtree ample
time to shutdown.</p>
- <p><em>Important note on simple-one-for-one supervisors:</em>
- The dynamically created child processes of a
- simple-one-for-one supervisor are not explicitly killed,
- regardless of shutdown strategy, but are expected to terminate
- when the supervisor does (that is, when an exit signal from
- the parent process is received).</p>
<p>Note that all child processes implemented using the standard
OTP behavior modules automatically adhere to the shutdown
protocol.</p>
diff --git a/lib/stdlib/src/supervisor.erl b/lib/stdlib/src/supervisor.erl
index dc31647eb5..8e1ac1bb5c 100644
--- a/lib/stdlib/src/supervisor.erl
+++ b/lib/stdlib/src/supervisor.erl
@@ -519,9 +519,12 @@ handle_info(Msg, State) ->
%%
-spec terminate(term(), state()) -> 'ok'.
+terminate(_Reason, #state{children=[Child]} = State) when ?is_simple(State) ->
+ terminate_dynamic_children(Child, dynamics_db(Child#child.restart_type,
+ State#state.dynamics),
+ State#state.name);
terminate(_Reason, State) ->
- terminate_children(State#state.children, State#state.name),
- ok.
+ terminate_children(State#state.children, State#state.name).
%%
%% Change code for the supervisor.
@@ -831,8 +834,113 @@ monitor_child(Pid) ->
%% that will be handled in shutdown/2.
ok
end.
-
-
+
+
+%%-----------------------------------------------------------------
+%% Func: terminate_dynamic_children/3
+%% Args: Child = child_rec()
+%% Dynamics = ?DICT() | ?SET()
+%% SupName = {local, atom()} | {global, atom()} | {pid(),Mod}
+%% Returns: ok
+%%
+%%
+%% Shutdown all dynamic children. This happens when the supervisor is
+%% stopped. Because the supervisor can have millions of dynamic children, we
+%% can have an significative overhead here.
+%%-----------------------------------------------------------------
+terminate_dynamic_children(Child, Dynamics, SupName) ->
+ Pids = monitor_dynamic_children(Child, Dynamics, SupName),
+ Sz = ?SETS:size(Pids),
+ case Child#child.shutdown of
+ brutal_kill ->
+ ?SETS:fold(fun(P, _) -> exit(P, kill) end, ok, Pids),
+ wait_dynamic_children(Child, Pids, SupName, Sz, undefined);
+ infinity ->
+ ?SETS:fold(fun(P, _) -> exit(P, shutdown) end, ok, Pids),
+ wait_dynamic_children(Child, Pids, SupName, Sz, undefined);
+ Time ->
+ ?SETS:fold(fun(P, _) -> exit(P, shutdown) end, ok, Pids),
+ TRef = erlang:start_timer(Time, self(), kill),
+ wait_dynamic_children(Child, Pids, SupName, Sz, TRef)
+ end.
+
+
+monitor_dynamic_children(#child{restart_type=temporary} = Child,
+ Dynamics, SupName) ->
+ ?SETS:fold(fun(P, Acc) ->
+ case monitor_child(P) of
+ ok ->
+ ?SETS:add_element(P, Acc);
+ {error, normal} ->
+ Acc;
+ {error, OtherReason} ->
+ report_error(shutdown_error, OtherReason,
+ Child#child{pid=P}, SupName),
+ Acc
+ end
+ end, ?SETS:new(), Dynamics);
+monitor_dynamic_children(#child{restart_type=RType} = Child,
+ Dynamics, SupName) ->
+ ?DICT:fold(fun(P, _, Acc) ->
+ case monitor_child(P) of
+ ok ->
+ ?SETS:add_element(P, Acc);
+ {error, normal} when RType =/= permanent ->
+ Acc;
+ {error, OtherReason} ->
+ report_error(shutdown_error, OtherReason,
+ Child#child{pid=P}, SupName),
+ Acc
+ end
+ end, ?SETS:new(), Dynamics).
+
+
+
+wait_dynamic_children(_Child, _Pids, _SupName, 0, undefined) ->
+ ok;
+wait_dynamic_children(_Child, _Pids, _SupName, 0, TRef) ->
+ %% If the timer has expired before its cancellation, we must empty the
+ %% mail-box of the 'timeout'-message.
+ erlang:cancel_timer(TRef),
+ receive
+ {timeout, TRef, kill} ->
+ ok
+ after 0 ->
+ ok
+ end;
+wait_dynamic_children(#child{shutdown=brutal_kill} = Child,
+ Pids, SupName, Sz, TRef) ->
+ receive
+ {'DOWN', _MRef, process, Pid, killed} ->
+ wait_dynamic_children(Child, ?SETS:del_element(Pid, Pids), SupName,
+ Sz-1, TRef);
+
+ {'DOWN', _MRef, process, Pid, Reason} ->
+ report_error(shutdown_error, Reason, Child#child{pid=Pid}, SupName),
+ wait_dynamic_children(Child, ?SETS:del_element(Pid, Pids), SupName,
+ Sz-1, TRef)
+ end;
+wait_dynamic_children(#child{restart_type=RType} = Child, Pids,
+ SupName, Sz, TRef) ->
+ receive
+ {'DOWN', _MRef, process, Pid, shutdown} ->
+ wait_dynamic_children(Child, ?SETS:del_element(Pid, Pids), SupName,
+ Sz-1, TRef);
+
+ {'DOWN', _MRef, process, Pid, normal} when RType =/= permanent ->
+ wait_dynamic_children(Child, ?SETS:del_element(Pid, Pids), SupName,
+ Sz-1, TRef);
+
+ {'DOWN', _MRef, process, Pid, Reason} ->
+ report_error(shutdown_error, Reason, Child#child{pid=Pid}, SupName),
+ wait_dynamic_children(Child, ?SETS:del_element(Pid, Pids), SupName,
+ Sz-1, TRef);
+
+ {timeout, TRef, kill} ->
+ ?SETS:fold(fun(P, _) -> exit(P, kill) end, ok, Pids),
+ wait_dynamic_children(Child, Pids, SupName, Sz, undefined)
+ end.
+
%%-----------------------------------------------------------------
%% Child/State manipulating functions.
%%-----------------------------------------------------------------
diff --git a/lib/stdlib/test/Makefile b/lib/stdlib/test/Makefile
index 5502c69fa5..aa6a660c34 100644
--- a/lib/stdlib/test/Makefile
+++ b/lib/stdlib/test/Makefile
@@ -65,6 +65,7 @@ MODULES= \
stdlib_SUITE \
string_SUITE \
supervisor_1 \
+ supervisor_2 \
naughty_child \
shell_SUITE \
supervisor_SUITE \
diff --git a/lib/stdlib/test/supervisor_2.erl b/lib/stdlib/test/supervisor_2.erl
new file mode 100644
index 0000000000..67aacf5a9c
--- /dev/null
+++ b/lib/stdlib/test/supervisor_2.erl
@@ -0,0 +1,42 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 1996-2010. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%
+%% Description: Simulates the behaviour that a child process may have.
+%% Is used by the supervisor_SUITE test suite.
+-module(supervisor_2).
+
+-export([start_child/1, init/1]).
+
+-export([handle_call/3, handle_info/2, terminate/2]).
+
+start_child(Time) when is_integer(Time), Time > 0 ->
+ gen_server:start_link(?MODULE, Time, []).
+
+init(Time) ->
+ process_flag(trap_exit, true),
+ {ok, Time}.
+
+handle_call(Req, _From, State) ->
+ {reply, Req, State}.
+
+handle_info(_, State) ->
+ {noreply, State}.
+
+terminate(_Reason, Time) ->
+ timer:sleep(Time),
+ ok.
diff --git a/lib/stdlib/test/supervisor_SUITE.erl b/lib/stdlib/test/supervisor_SUITE.erl
index b48450c151..a6358e7063 100644
--- a/lib/stdlib/test/supervisor_SUITE.erl
+++ b/lib/stdlib/test/supervisor_SUITE.erl
@@ -50,7 +50,7 @@
one_for_all_escalation/1,
simple_one_for_one/1, simple_one_for_one_escalation/1,
rest_for_one/1, rest_for_one_escalation/1,
- simple_one_for_one_extra/1]).
+ simple_one_for_one_extra/1, simple_one_for_one_shutdown/1]).
%% Misc tests
-export([child_unlink/1, tree/1, count_children_memory/1,
@@ -94,8 +94,8 @@ groups() ->
{restart_one_for_all, [],
[one_for_all, one_for_all_escalation]},
{restart_simple_one_for_one, [],
- [simple_one_for_one, simple_one_for_one_extra,
- simple_one_for_one_escalation]},
+ [simple_one_for_one, simple_one_for_one_shutdown,
+ simple_one_for_one_extra, simple_one_for_one_escalation]},
{restart_rest_for_one, [],
[rest_for_one, rest_for_one_escalation]}].
@@ -782,6 +782,38 @@ simple_one_for_one(Config) when is_list(Config) ->
terminate(SupPid, Pid4, Id4, abnormal),
check_exit([SupPid]).
+
+%%-------------------------------------------------------------------------
+simple_one_for_one_shutdown(doc) ->
+ ["Test simple_one_for_one children shutdown accordingly to the "
+ "supervisor's shutdown strategy."];
+simple_one_for_one_shutdown(suite) -> [];
+simple_one_for_one_shutdown(Config) when is_list(Config) ->
+ process_flag(trap_exit, true),
+ ShutdownTime = 1000,
+ Child = {child, {supervisor_2, start_child, []},
+ permanent, 2*ShutdownTime, worker, []},
+ {ok, SupPid} = start_link({ok, {{simple_one_for_one, 2, 3600}, [Child]}}),
+
+ %% Will be gracefully shutdown
+ {ok, _CPid1} = supervisor:start_child(sup_test, [ShutdownTime]),
+ {ok, _CPid2} = supervisor:start_child(sup_test, [ShutdownTime]),
+
+ %% Will be killed after 2*ShutdownTime milliseconds
+ {ok, _CPid3} = supervisor:start_child(sup_test, [5*ShutdownTime]),
+
+ {T, ok} = timer:tc(fun terminate/2, [SupPid, shutdown]),
+ if T < 1000*ShutdownTime ->
+ %% Because supervisor's children wait before exiting, it can't
+ %% terminate quickly
+ test_server:fail({shutdown_too_short, T});
+ T >= 1000*5*ShutdownTime ->
+ test_server:fail({shutdown_too_long, T});
+ true ->
+ check_exit([SupPid])
+ end.
+
+
%%-------------------------------------------------------------------------
simple_one_for_one_extra(doc) ->
["Tests automatic restart of children "