From 4f9b938112a80f1c1e210b1a6af40a42f833cfa2 Mon Sep 17 00:00:00 2001 From: Fred Hebert Date: Wed, 3 Aug 2011 10:47:36 -0400 Subject: fix supervisors restarting temporary children In the current implementation of supervisors, temporary children should never be restarted. However, when a temporary child is restarted as part of a one_for_all or rest_for_one strategy where the failing process is not the temporary child, the supervisor still tries to restart it. Because the supervisor doesn't keep some of the MFA information of temporary children, this causes the supervisor to hit its restart limit and crash. This patch fixes the behaviour by inserting a clause in terminate_children/2-3 (private function) that will omit temporary children when building a list of killed processes, to avoid having the supervisor trying to restart them again. Only supervisors in need of restarting children used the list, so the change should be of no impact for the functions that called terminate_children/2-3 only to kill all children. The documentation has been modified to make this behaviour more explicit. --- lib/stdlib/doc/src/supervisor.xml | 9 ++++++--- lib/stdlib/src/supervisor.erl | 7 +++++++ lib/stdlib/test/supervisor_SUITE.erl | 35 +++++++++++++++++++++++++++++++++-- 3 files changed, 46 insertions(+), 5 deletions(-) diff --git a/lib/stdlib/doc/src/supervisor.xml b/lib/stdlib/doc/src/supervisor.xml index 009aa60faa..edd119d37a 100644 --- a/lib/stdlib/doc/src/supervisor.xml +++ b/lib/stdlib/doc/src/supervisor.xml @@ -150,9 +150,12 @@ child_spec() = {Id,StartFunc,Restart,Shutdown,Type,Modules}

Restart defines when a terminated child process should be restarted. A permanent child process should always be restarted, a temporary child process should - never be restarted and a transient child process - should be restarted only if it terminates abnormally, i.e. - with another exit reason than normal.

+ never be restarted (even when the supervisor's restart strategy + is rest_for_one or one_for_all and a sibling's + death causes the temporary process to be terminated) and a + transient child process should be restarted only if + it terminates abnormally, i.e. with another exit reason + than normal.

Shutdown defines how a child process should be diff --git a/lib/stdlib/src/supervisor.erl b/lib/stdlib/src/supervisor.erl index e60706ed05..dc31647eb5 100644 --- a/lib/stdlib/src/supervisor.erl +++ b/lib/stdlib/src/supervisor.erl @@ -735,6 +735,13 @@ restart(one_for_all, Child, State) -> terminate_children(Children, SupName) -> terminate_children(Children, SupName, []). +%% Temporary children should not be restarted and thus should +%% be skipped when building the list of terminated children, although +%% we do want them to be shut down as many functions from this module +%% use this function to just clear everything. +terminate_children([Child = #child{restart_type=temporary} | Children], SupName, Res) -> + do_terminate(Child, SupName), + terminate_children(Children, SupName, Res); terminate_children([Child | Children], SupName, Res) -> NChild = do_terminate(Child, SupName), terminate_children(Children, SupName, [NChild | Res]); diff --git a/lib/stdlib/test/supervisor_SUITE.erl b/lib/stdlib/test/supervisor_SUITE.erl index ff5e4c629a..b48450c151 100644 --- a/lib/stdlib/test/supervisor_SUITE.erl +++ b/lib/stdlib/test/supervisor_SUITE.erl @@ -42,7 +42,7 @@ -export([ permanent_normal/1, transient_normal/1, temporary_normal/1, permanent_abnormal/1, transient_abnormal/1, - temporary_abnormal/1]). + temporary_abnormal/1, temporary_bystander/1]). %% Restart strategy tests -export([ one_for_one/1, @@ -74,7 +74,7 @@ all() -> {group, abnormal_termination}, child_unlink, tree, count_children_memory, do_not_save_start_parameters_for_temporary_children, do_not_save_child_specs_for_temporary_children, - simple_one_for_one_scale_many_temporary_children]. + simple_one_for_one_scale_many_temporary_children, temporary_bystander]. groups() -> [{sup_start, [], @@ -606,6 +606,37 @@ temporary_abnormal(Config) when is_list(Config) -> [] = supervisor:which_children(sup_test), [0,0,0,0] = get_child_counts(sup_test). +%%------------------------------------------------------------------------- +temporary_bystander(doc) -> + ["A temporary process killed as part of a rest_for_one or one_for_all " + "restart strategy should not be restarted given its args are not " + " saved. Otherwise the supervisor hits its limit and crashes."]; +temporary_bystander(suite) -> []; +temporary_bystander(_Config) -> + Child1 = {child1, {supervisor_1, start_child, []}, permanent, 100, + worker, []}, + Child2 = {child2, {supervisor_1, start_child, []}, temporary, 100, + worker, []}, + {ok, SupPid1} = supervisor:start_link(?MODULE, {ok, {{one_for_all, 2, 300}, []}}), + {ok, SupPid2} = supervisor:start_link(?MODULE, {ok, {{rest_for_one, 2, 300}, []}}), + unlink(SupPid1), % otherwise we crash with it + unlink(SupPid2), % otherwise we crash with it + {ok, CPid1} = supervisor:start_child(SupPid1, Child1), + {ok, _CPid2} = supervisor:start_child(SupPid1, Child2), + {ok, CPid3} = supervisor:start_child(SupPid2, Child1), + {ok, _CPid4} = supervisor:start_child(SupPid2, Child2), + terminate(SupPid1, CPid1, child1, normal), + terminate(SupPid2, CPid3, child1, normal), + timer:sleep(350), + catch link(SupPid1), + catch link(SupPid2), + %% The supervisor would die attempting to restart child2 + true = erlang:is_process_alive(SupPid1), + true = erlang:is_process_alive(SupPid2), + %% Child2 has not been restarted + [{child1, _, _, _}] = supervisor:which_children(SupPid1), + [{child1, _, _, _}] = supervisor:which_children(SupPid2). + %%------------------------------------------------------------------------- one_for_one(doc) -> ["Test the one_for_one base case."]; -- cgit v1.2.3