aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorFred Hebert <[email protected]>2011-08-03 10:47:36 -0400
committerFred Hebert <[email protected]>2011-08-23 14:31:09 -0400
commit4f9b938112a80f1c1e210b1a6af40a42f833cfa2 (patch)
tree9fb8fd96f584107c9592c0be3e603d9020a40b69
parent62f194499810270b488cc46a5e611f0be77f36a7 (diff)
downloadotp-4f9b938112a80f1c1e210b1a6af40a42f833cfa2.tar.gz
otp-4f9b938112a80f1c1e210b1a6af40a42f833cfa2.tar.bz2
otp-4f9b938112a80f1c1e210b1a6af40a42f833cfa2.zip
fix supervisors restarting temporary children
In the current implementation of supervisors, temporary children should never be restarted. However, when a temporary child is restarted as part of a one_for_all or rest_for_one strategy where the failing process is not the temporary child, the supervisor still tries to restart it. Because the supervisor doesn't keep some of the MFA information of temporary children, this causes the supervisor to hit its restart limit and crash. This patch fixes the behaviour by inserting a clause in terminate_children/2-3 (private function) that will omit temporary children when building a list of killed processes, to avoid having the supervisor trying to restart them again. Only supervisors in need of restarting children used the list, so the change should be of no impact for the functions that called terminate_children/2-3 only to kill all children. The documentation has been modified to make this behaviour more explicit.
-rw-r--r--lib/stdlib/doc/src/supervisor.xml9
-rw-r--r--lib/stdlib/src/supervisor.erl7
-rw-r--r--lib/stdlib/test/supervisor_SUITE.erl35
3 files changed, 46 insertions, 5 deletions
diff --git a/lib/stdlib/doc/src/supervisor.xml b/lib/stdlib/doc/src/supervisor.xml
index 009aa60faa..edd119d37a 100644
--- a/lib/stdlib/doc/src/supervisor.xml
+++ b/lib/stdlib/doc/src/supervisor.xml
@@ -150,9 +150,12 @@ child_spec() = {Id,StartFunc,Restart,Shutdown,Type,Modules}
<p><c>Restart</c> defines when a terminated child process
should be restarted. A <c>permanent</c> child process should
always be restarted, a <c>temporary</c> child process should
- never be restarted and a <c>transient</c> child process
- should be restarted only if it terminates abnormally, i.e.
- with another exit reason than <c>normal</c>.</p>
+ never be restarted (even when the supervisor's restart strategy
+ is <c>rest_for_one</c> or <c>one_for_all</c> and a sibling's
+ death causes the temporary process to be terminated) and a
+ <c>transient</c> child process should be restarted only if
+ it terminates abnormally, i.e. with another exit reason
+ than <c>normal</c>.</p>
</item>
<item>
<p><c>Shutdown</c> defines how a child process should be
diff --git a/lib/stdlib/src/supervisor.erl b/lib/stdlib/src/supervisor.erl
index e60706ed05..dc31647eb5 100644
--- a/lib/stdlib/src/supervisor.erl
+++ b/lib/stdlib/src/supervisor.erl
@@ -735,6 +735,13 @@ restart(one_for_all, Child, State) ->
terminate_children(Children, SupName) ->
terminate_children(Children, SupName, []).
+%% Temporary children should not be restarted and thus should
+%% be skipped when building the list of terminated children, although
+%% we do want them to be shut down as many functions from this module
+%% use this function to just clear everything.
+terminate_children([Child = #child{restart_type=temporary} | Children], SupName, Res) ->
+ do_terminate(Child, SupName),
+ terminate_children(Children, SupName, Res);
terminate_children([Child | Children], SupName, Res) ->
NChild = do_terminate(Child, SupName),
terminate_children(Children, SupName, [NChild | Res]);
diff --git a/lib/stdlib/test/supervisor_SUITE.erl b/lib/stdlib/test/supervisor_SUITE.erl
index ff5e4c629a..b48450c151 100644
--- a/lib/stdlib/test/supervisor_SUITE.erl
+++ b/lib/stdlib/test/supervisor_SUITE.erl
@@ -42,7 +42,7 @@
-export([ permanent_normal/1, transient_normal/1,
temporary_normal/1,
permanent_abnormal/1, transient_abnormal/1,
- temporary_abnormal/1]).
+ temporary_abnormal/1, temporary_bystander/1]).
%% Restart strategy tests
-export([ one_for_one/1,
@@ -74,7 +74,7 @@ all() ->
{group, abnormal_termination}, child_unlink, tree,
count_children_memory, do_not_save_start_parameters_for_temporary_children,
do_not_save_child_specs_for_temporary_children,
- simple_one_for_one_scale_many_temporary_children].
+ simple_one_for_one_scale_many_temporary_children, temporary_bystander].
groups() ->
[{sup_start, [],
@@ -607,6 +607,37 @@ temporary_abnormal(Config) when is_list(Config) ->
[0,0,0,0] = get_child_counts(sup_test).
%%-------------------------------------------------------------------------
+temporary_bystander(doc) ->
+ ["A temporary process killed as part of a rest_for_one or one_for_all "
+ "restart strategy should not be restarted given its args are not "
+ " saved. Otherwise the supervisor hits its limit and crashes."];
+temporary_bystander(suite) -> [];
+temporary_bystander(_Config) ->
+ Child1 = {child1, {supervisor_1, start_child, []}, permanent, 100,
+ worker, []},
+ Child2 = {child2, {supervisor_1, start_child, []}, temporary, 100,
+ worker, []},
+ {ok, SupPid1} = supervisor:start_link(?MODULE, {ok, {{one_for_all, 2, 300}, []}}),
+ {ok, SupPid2} = supervisor:start_link(?MODULE, {ok, {{rest_for_one, 2, 300}, []}}),
+ unlink(SupPid1), % otherwise we crash with it
+ unlink(SupPid2), % otherwise we crash with it
+ {ok, CPid1} = supervisor:start_child(SupPid1, Child1),
+ {ok, _CPid2} = supervisor:start_child(SupPid1, Child2),
+ {ok, CPid3} = supervisor:start_child(SupPid2, Child1),
+ {ok, _CPid4} = supervisor:start_child(SupPid2, Child2),
+ terminate(SupPid1, CPid1, child1, normal),
+ terminate(SupPid2, CPid3, child1, normal),
+ timer:sleep(350),
+ catch link(SupPid1),
+ catch link(SupPid2),
+ %% The supervisor would die attempting to restart child2
+ true = erlang:is_process_alive(SupPid1),
+ true = erlang:is_process_alive(SupPid2),
+ %% Child2 has not been restarted
+ [{child1, _, _, _}] = supervisor:which_children(SupPid1),
+ [{child1, _, _, _}] = supervisor:which_children(SupPid2).
+
+%%-------------------------------------------------------------------------
one_for_one(doc) ->
["Test the one_for_one base case."];
one_for_one(suite) -> [];