aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHenrik Nord <[email protected]>2011-10-20 14:39:42 +0200
committerHenrik Nord <[email protected]>2011-10-20 14:39:42 +0200
commit45b4d5309e0686cc5fa28506de76f75b598bbd95 (patch)
treebd4d54a6de38654f49908b38c311718b74593a33
parente21bc712b95b878afd9ab3d93ac27ded0bce7ccf (diff)
parent04731323678eff58f709b36f864d12aa08cfe6d9 (diff)
downloadotp-45b4d5309e0686cc5fa28506de76f75b598bbd95.tar.gz
otp-45b4d5309e0686cc5fa28506de76f75b598bbd95.tar.bz2
otp-45b4d5309e0686cc5fa28506de76f75b598bbd95.zip
Merge branch 'cf/simple_one_for_one_shutdown'
* cf/simple_one_for_one_shutdown: Explain how dynamic child processes are stopped Stack errors when dynamic children are stopped Explicitly kill dynamic children in supervisors Conflicts: lib/stdlib/doc/src/supervisor.xml OTP-9647
-rw-r--r--lib/stdlib/doc/src/supervisor.xml4
-rw-r--r--lib/stdlib/src/supervisor.erl121
-rw-r--r--lib/stdlib/test/Makefile1
-rw-r--r--lib/stdlib/test/supervisor_2.erl42
-rw-r--r--lib/stdlib/test/supervisor_SUITE.erl38
-rw-r--r--system/doc/design_principles/sup_princ.xml4
6 files changed, 202 insertions, 8 deletions
diff --git a/lib/stdlib/doc/src/supervisor.xml b/lib/stdlib/doc/src/supervisor.xml
index 54e7cab884..30514dfee9 100644
--- a/lib/stdlib/doc/src/supervisor.xml
+++ b/lib/stdlib/doc/src/supervisor.xml
@@ -93,6 +93,10 @@
instead the child specification identifier is used,
<c>terminate_child/2</c> will return
<c>{error,simple_one_for_one}</c>.</p>
+ <p>Because a <c>simple_one_for_one</c> supervisor could have many
+ children, it shuts them all down at same time. So, order in which they
+ are stopped is not defined. For the same reason, it could have an
+ overhead with regards to the <c>Shutdown</c> strategy.</p>
</item>
</list>
<p>To prevent a supervisor from getting into an infinite loop of
diff --git a/lib/stdlib/src/supervisor.erl b/lib/stdlib/src/supervisor.erl
index 051dca462b..f20ea18fd0 100644
--- a/lib/stdlib/src/supervisor.erl
+++ b/lib/stdlib/src/supervisor.erl
@@ -515,9 +515,12 @@ handle_info(Msg, State) ->
%%
-spec terminate(term(), state()) -> 'ok'.
+terminate(_Reason, #state{children=[Child]} = State) when ?is_simple(State) ->
+ terminate_dynamic_children(Child, dynamics_db(Child#child.restart_type,
+ State#state.dynamics),
+ State#state.name);
terminate(_Reason, State) ->
- terminate_children(State#state.children, State#state.name),
- ok.
+ terminate_children(State#state.children, State#state.name).
%%
%% Change code for the supervisor.
@@ -830,8 +833,109 @@ monitor_child(Pid) ->
%% that will be handled in shutdown/2.
ok
end.
-
-
+
+
+%%-----------------------------------------------------------------
+%% Func: terminate_dynamic_children/3
+%% Args: Child = child_rec()
+%% Dynamics = ?DICT() | ?SET()
+%% SupName = {local, atom()} | {global, atom()} | {pid(),Mod}
+%% Returns: ok
+%%
+%%
+%% Shutdown all dynamic children. This happens when the supervisor is
+%% stopped. Because the supervisor can have millions of dynamic children, we
+%% can have an significative overhead here.
+%%-----------------------------------------------------------------
+terminate_dynamic_children(Child, Dynamics, SupName) ->
+ {Pids, EStack0} = monitor_dynamic_children(Child, Dynamics),
+ Sz = ?SETS:size(Pids),
+ EStack = case Child#child.shutdown of
+ brutal_kill ->
+ ?SETS:fold(fun(P, _) -> exit(P, kill) end, ok, Pids),
+ wait_dynamic_children(Child, Pids, Sz, undefined, EStack0);
+ infinity ->
+ ?SETS:fold(fun(P, _) -> exit(P, shutdown) end, ok, Pids),
+ wait_dynamic_children(Child, Pids, Sz, undefined, EStack0);
+ Time ->
+ ?SETS:fold(fun(P, _) -> exit(P, shutdown) end, ok, Pids),
+ TRef = erlang:start_timer(Time, self(), kill),
+ wait_dynamic_children(Child, Pids, Sz, TRef, EStack0)
+ end,
+ %% Unrool stacked errors and report them
+ ?DICT:fold(fun(Reason, Ls, _) ->
+ report_error(shutdown_error, Reason,
+ Child#child{pid=Ls}, SupName)
+ end, ok, EStack).
+
+
+monitor_dynamic_children(#child{restart_type=temporary}, Dynamics) ->
+ ?SETS:fold(fun(P, {Pids, EStack}) ->
+ case monitor_child(P) of
+ ok ->
+ {?SETS:add_element(P, Pids), EStack};
+ {error, normal} ->
+ {Pids, EStack};
+ {error, Reason} ->
+ {Pids, ?DICT:append(Reason, P, EStack)}
+ end
+ end, {?SETS:new(), ?DICT:new()}, Dynamics);
+monitor_dynamic_children(#child{restart_type=RType}, Dynamics) ->
+ ?DICT:fold(fun(P, _, {Pids, EStack}) ->
+ case monitor_child(P) of
+ ok ->
+ {?SETS:add_element(P, Pids), EStack};
+ {error, normal} when RType =/= permanent ->
+ {Pids, EStack};
+ {error, Reason} ->
+ {Pids, ?DICT:append(Reason, P, EStack)}
+ end
+ end, {?SETS:new(), ?DICT:new()}, Dynamics).
+
+
+wait_dynamic_children(_Child, _Pids, 0, undefined, EStack) ->
+ EStack;
+wait_dynamic_children(_Child, _Pids, 0, TRef, EStack) ->
+ %% If the timer has expired before its cancellation, we must empty the
+ %% mail-box of the 'timeout'-message.
+ erlang:cancel_timer(TRef),
+ receive
+ {timeout, TRef, kill} ->
+ EStack
+ after 0 ->
+ EStack
+ end;
+wait_dynamic_children(#child{shutdown=brutal_kill} = Child, Pids, Sz,
+ TRef, EStack) ->
+ receive
+ {'DOWN', _MRef, process, Pid, killed} ->
+ wait_dynamic_children(Child, ?SETS:del_element(Pid, Pids), Sz-1,
+ TRef, EStack);
+
+ {'DOWN', _MRef, process, Pid, Reason} ->
+ wait_dynamic_children(Child, ?SETS:del_element(Pid, Pids), Sz-1,
+ TRef, ?DICT:append(Reason, Pid, EStack))
+ end;
+wait_dynamic_children(#child{restart_type=RType} = Child, Pids, Sz,
+ TRef, EStack) ->
+ receive
+ {'DOWN', _MRef, process, Pid, shutdown} ->
+ wait_dynamic_children(Child, ?SETS:del_element(Pid, Pids), Sz-1,
+ TRef, EStack);
+
+ {'DOWN', _MRef, process, Pid, normal} when RType =/= permanent ->
+ wait_dynamic_children(Child, ?SETS:del_element(Pid, Pids), Sz-1,
+ TRef, EStack);
+
+ {'DOWN', _MRef, process, Pid, Reason} ->
+ wait_dynamic_children(Child, ?SETS:del_element(Pid, Pids), Sz-1,
+ TRef, ?DICT:append(Reason, Pid, EStack));
+
+ {timeout, TRef, kill} ->
+ ?SETS:fold(fun(P, _) -> exit(P, kill) end, ok, Pids),
+ wait_dynamic_children(Child, Pids, Sz-1, undefined, EStack)
+ end.
+
%%-----------------------------------------------------------------
%% Child/State manipulating functions.
%%-----------------------------------------------------------------
@@ -1134,12 +1238,19 @@ report_error(Error, Reason, Child, SupName) ->
error_logger:error_report(supervisor_report, ErrorMsg).
-extract_child(Child) ->
+extract_child(Child) when is_pid(Child#child.pid) ->
[{pid, Child#child.pid},
{name, Child#child.name},
{mfargs, Child#child.mfargs},
{restart_type, Child#child.restart_type},
{shutdown, Child#child.shutdown},
+ {child_type, Child#child.child_type}];
+extract_child(Child) ->
+ [{nb_children, length(Child#child.pid)},
+ {name, Child#child.name},
+ {mfargs, Child#child.mfargs},
+ {restart_type, Child#child.restart_type},
+ {shutdown, Child#child.shutdown},
{child_type, Child#child.child_type}].
report_progress(Child, SupName) ->
diff --git a/lib/stdlib/test/Makefile b/lib/stdlib/test/Makefile
index 5502c69fa5..aa6a660c34 100644
--- a/lib/stdlib/test/Makefile
+++ b/lib/stdlib/test/Makefile
@@ -65,6 +65,7 @@ MODULES= \
stdlib_SUITE \
string_SUITE \
supervisor_1 \
+ supervisor_2 \
naughty_child \
shell_SUITE \
supervisor_SUITE \
diff --git a/lib/stdlib/test/supervisor_2.erl b/lib/stdlib/test/supervisor_2.erl
new file mode 100644
index 0000000000..67aacf5a9c
--- /dev/null
+++ b/lib/stdlib/test/supervisor_2.erl
@@ -0,0 +1,42 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 1996-2010. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%
+%% Description: Simulates the behaviour that a child process may have.
+%% Is used by the supervisor_SUITE test suite.
+-module(supervisor_2).
+
+-export([start_child/1, init/1]).
+
+-export([handle_call/3, handle_info/2, terminate/2]).
+
+start_child(Time) when is_integer(Time), Time > 0 ->
+ gen_server:start_link(?MODULE, Time, []).
+
+init(Time) ->
+ process_flag(trap_exit, true),
+ {ok, Time}.
+
+handle_call(Req, _From, State) ->
+ {reply, Req, State}.
+
+handle_info(_, State) ->
+ {noreply, State}.
+
+terminate(_Reason, Time) ->
+ timer:sleep(Time),
+ ok.
diff --git a/lib/stdlib/test/supervisor_SUITE.erl b/lib/stdlib/test/supervisor_SUITE.erl
index 9d47233422..e709cf62ba 100644
--- a/lib/stdlib/test/supervisor_SUITE.erl
+++ b/lib/stdlib/test/supervisor_SUITE.erl
@@ -52,7 +52,7 @@
one_for_all_escalation/1,
simple_one_for_one/1, simple_one_for_one_escalation/1,
rest_for_one/1, rest_for_one_escalation/1,
- simple_one_for_one_extra/1]).
+ simple_one_for_one_extra/1, simple_one_for_one_shutdown/1]).
%% Misc tests
-export([child_unlink/1, tree/1, count_children_memory/1,
@@ -99,8 +99,8 @@ groups() ->
{restart_one_for_all, [],
[one_for_all, one_for_all_escalation]},
{restart_simple_one_for_one, [],
- [simple_one_for_one, simple_one_for_one_extra,
- simple_one_for_one_escalation]},
+ [simple_one_for_one, simple_one_for_one_shutdown,
+ simple_one_for_one_extra, simple_one_for_one_escalation]},
{restart_rest_for_one, [],
[rest_for_one, rest_for_one_escalation]}].
@@ -868,6 +868,38 @@ simple_one_for_one(Config) when is_list(Config) ->
terminate(SupPid, Pid4, Id4, abnormal),
check_exit([SupPid]).
+
+%%-------------------------------------------------------------------------
+simple_one_for_one_shutdown(doc) ->
+ ["Test simple_one_for_one children shutdown accordingly to the "
+ "supervisor's shutdown strategy."];
+simple_one_for_one_shutdown(suite) -> [];
+simple_one_for_one_shutdown(Config) when is_list(Config) ->
+ process_flag(trap_exit, true),
+ ShutdownTime = 1000,
+ Child = {child, {supervisor_2, start_child, []},
+ permanent, 2*ShutdownTime, worker, []},
+ {ok, SupPid} = start_link({ok, {{simple_one_for_one, 2, 3600}, [Child]}}),
+
+ %% Will be gracefully shutdown
+ {ok, _CPid1} = supervisor:start_child(sup_test, [ShutdownTime]),
+ {ok, _CPid2} = supervisor:start_child(sup_test, [ShutdownTime]),
+
+ %% Will be killed after 2*ShutdownTime milliseconds
+ {ok, _CPid3} = supervisor:start_child(sup_test, [5*ShutdownTime]),
+
+ {T, ok} = timer:tc(fun terminate/2, [SupPid, shutdown]),
+ if T < 1000*ShutdownTime ->
+ %% Because supervisor's children wait before exiting, it can't
+ %% terminate quickly
+ test_server:fail({shutdown_too_short, T});
+ T >= 1000*5*ShutdownTime ->
+ test_server:fail({shutdown_too_long, T});
+ true ->
+ check_exit([SupPid])
+ end.
+
+
%%-------------------------------------------------------------------------
simple_one_for_one_extra(doc) ->
["Tests automatic restart of children "
diff --git a/system/doc/design_principles/sup_princ.xml b/system/doc/design_principles/sup_princ.xml
index 9e7cc2f543..a432f9458b 100644
--- a/system/doc/design_principles/sup_princ.xml
+++ b/system/doc/design_principles/sup_princ.xml
@@ -349,6 +349,10 @@ call:start_link(id1)</code>
supervisor:terminate_child(Sup, Pid)</code>
<p>where <c>Sup</c> is the pid, or name, of the supervisor and
<c>Pid</c> is the pid of the child.</p>
+ <p>Because a <c>simple_one_for_one</c> supervisor could have many children,
+ it shuts them all down at same time. So, order in which they are stopped is
+ not defined. For the same reason, it could have an overhead with regards to
+ the <c>Shutdown</c> strategy.</p>
</section>
<section>