diff options
| author | Fred Hebert <[email protected]> | 2011-08-03 10:47:36 -0400 | 
|---|---|---|
| committer | Fred Hebert <[email protected]> | 2011-08-23 14:31:09 -0400 | 
| commit | 4f9b938112a80f1c1e210b1a6af40a42f833cfa2 (patch) | |
| tree | 9fb8fd96f584107c9592c0be3e603d9020a40b69 /lib | |
| parent | 62f194499810270b488cc46a5e611f0be77f36a7 (diff) | |
| download | otp-4f9b938112a80f1c1e210b1a6af40a42f833cfa2.tar.gz otp-4f9b938112a80f1c1e210b1a6af40a42f833cfa2.tar.bz2 otp-4f9b938112a80f1c1e210b1a6af40a42f833cfa2.zip | |
fix supervisors restarting temporary children
In the current implementation of supervisors, temporary children
should never be restarted. However, when a temporary child is
restarted as part of a one_for_all or rest_for_one strategy where
the failing process is not the temporary child, the supervisor
still tries to restart it.
Because the supervisor doesn't keep some of the MFA information
of temporary children, this causes the supervisor to hit its
restart limit and crash.
This patch fixes the behaviour by inserting a clause in
terminate_children/2-3 (private function) that will omit temporary
children when building a list of killed processes, to avoid having
the supervisor trying to restart them again.
Only supervisors in need of restarting children used the list, so
the change should be of no impact for the functions that called
terminate_children/2-3 only to kill all children.
The documentation has been modified to make this behaviour
more explicit.
Diffstat (limited to 'lib')
| -rw-r--r-- | lib/stdlib/doc/src/supervisor.xml | 9 | ||||
| -rw-r--r-- | lib/stdlib/src/supervisor.erl | 7 | ||||
| -rw-r--r-- | lib/stdlib/test/supervisor_SUITE.erl | 35 | 
3 files changed, 46 insertions, 5 deletions
| diff --git a/lib/stdlib/doc/src/supervisor.xml b/lib/stdlib/doc/src/supervisor.xml index 009aa60faa..edd119d37a 100644 --- a/lib/stdlib/doc/src/supervisor.xml +++ b/lib/stdlib/doc/src/supervisor.xml @@ -150,9 +150,12 @@ child_spec() = {Id,StartFunc,Restart,Shutdown,Type,Modules}          <p><c>Restart</c> defines when a terminated child process            should be restarted. A <c>permanent</c> child process should            always be restarted, a <c>temporary</c> child process should -          never be restarted and a <c>transient</c> child process -          should be restarted only if it terminates abnormally, i.e. -          with another exit reason than <c>normal</c>.</p> +          never be restarted (even when the supervisor's restart strategy +          is <c>rest_for_one</c> or <c>one_for_all</c> and a sibling's +          death causes the temporary process to be terminated) and a +          <c>transient</c> child process should be restarted only if +          it terminates abnormally, i.e. with another exit reason +          than <c>normal</c>.</p>        </item>        <item>          <p><c>Shutdown</c> defines how a child process should be diff --git a/lib/stdlib/src/supervisor.erl b/lib/stdlib/src/supervisor.erl index e60706ed05..dc31647eb5 100644 --- a/lib/stdlib/src/supervisor.erl +++ b/lib/stdlib/src/supervisor.erl @@ -735,6 +735,13 @@ restart(one_for_all, Child, State) ->  terminate_children(Children, SupName) ->      terminate_children(Children, SupName, []). +%% Temporary children should not be restarted and thus should +%% be skipped when building the list of terminated children, although +%% we do want them to be shut down as many functions from this module +%% use this function to just clear everything. +terminate_children([Child = #child{restart_type=temporary} | Children], SupName, Res) -> +    do_terminate(Child, SupName), +    terminate_children(Children, SupName, Res);  terminate_children([Child | Children], SupName, Res) ->      NChild = do_terminate(Child, SupName),      terminate_children(Children, SupName, [NChild | Res]); diff --git a/lib/stdlib/test/supervisor_SUITE.erl b/lib/stdlib/test/supervisor_SUITE.erl index ff5e4c629a..b48450c151 100644 --- a/lib/stdlib/test/supervisor_SUITE.erl +++ b/lib/stdlib/test/supervisor_SUITE.erl @@ -42,7 +42,7 @@  -export([ permanent_normal/1, transient_normal/1,  	  temporary_normal/1,  	  permanent_abnormal/1, transient_abnormal/1, -	  temporary_abnormal/1]). +	  temporary_abnormal/1, temporary_bystander/1]).  %% Restart strategy tests   -export([ one_for_one/1, @@ -74,7 +74,7 @@ all() ->       {group, abnormal_termination}, child_unlink, tree,       count_children_memory, do_not_save_start_parameters_for_temporary_children,       do_not_save_child_specs_for_temporary_children, -     simple_one_for_one_scale_many_temporary_children]. +     simple_one_for_one_scale_many_temporary_children, temporary_bystander].  groups() ->       [{sup_start, [], @@ -607,6 +607,37 @@ temporary_abnormal(Config) when is_list(Config) ->      [0,0,0,0] = get_child_counts(sup_test).  %%------------------------------------------------------------------------- +temporary_bystander(doc) -> +    ["A temporary process killed as part of a rest_for_one or one_for_all " +     "restart strategy should not be restarted given its args are not " +     " saved. Otherwise the supervisor hits its limit and crashes."]; +temporary_bystander(suite) -> []; +temporary_bystander(_Config) -> +    Child1 = {child1, {supervisor_1, start_child, []}, permanent, 100, +	      worker, []}, +    Child2 = {child2, {supervisor_1, start_child, []}, temporary, 100, +	      worker, []}, +    {ok, SupPid1} = supervisor:start_link(?MODULE, {ok, {{one_for_all, 2, 300}, []}}), +    {ok, SupPid2} = supervisor:start_link(?MODULE, {ok, {{rest_for_one, 2, 300}, []}}), +    unlink(SupPid1), % otherwise we crash with it +    unlink(SupPid2), % otherwise we crash with it +    {ok, CPid1} = supervisor:start_child(SupPid1, Child1), +    {ok, _CPid2} = supervisor:start_child(SupPid1, Child2), +    {ok, CPid3} = supervisor:start_child(SupPid2, Child1), +    {ok, _CPid4} = supervisor:start_child(SupPid2, Child2), +    terminate(SupPid1, CPid1, child1, normal), +    terminate(SupPid2, CPid3, child1, normal), +    timer:sleep(350), +    catch link(SupPid1), +    catch link(SupPid2), +    %% The supervisor would die attempting to restart child2 +    true = erlang:is_process_alive(SupPid1), +    true = erlang:is_process_alive(SupPid2), +    %% Child2 has not been restarted +    [{child1, _, _, _}] = supervisor:which_children(SupPid1), +    [{child1, _, _, _}] = supervisor:which_children(SupPid2). + +%%-------------------------------------------------------------------------  one_for_one(doc) ->      ["Test the one_for_one base case."];  one_for_one(suite) -> []; | 
