From 4f9b938112a80f1c1e210b1a6af40a42f833cfa2 Mon Sep 17 00:00:00 2001 From: Fred Hebert Date: Wed, 3 Aug 2011 10:47:36 -0400 Subject: fix supervisors restarting temporary children In the current implementation of supervisors, temporary children should never be restarted. However, when a temporary child is restarted as part of a one_for_all or rest_for_one strategy where the failing process is not the temporary child, the supervisor still tries to restart it. Because the supervisor doesn't keep some of the MFA information of temporary children, this causes the supervisor to hit its restart limit and crash. This patch fixes the behaviour by inserting a clause in terminate_children/2-3 (private function) that will omit temporary children when building a list of killed processes, to avoid having the supervisor trying to restart them again. Only supervisors in need of restarting children used the list, so the change should be of no impact for the functions that called terminate_children/2-3 only to kill all children. The documentation has been modified to make this behaviour more explicit. --- lib/stdlib/test/supervisor_SUITE.erl | 35 +++++++++++++++++++++++++++++++++-- 1 file changed, 33 insertions(+), 2 deletions(-) (limited to 'lib/stdlib/test/supervisor_SUITE.erl') diff --git a/lib/stdlib/test/supervisor_SUITE.erl b/lib/stdlib/test/supervisor_SUITE.erl index ff5e4c629a..b48450c151 100644 --- a/lib/stdlib/test/supervisor_SUITE.erl +++ b/lib/stdlib/test/supervisor_SUITE.erl @@ -42,7 +42,7 @@ -export([ permanent_normal/1, transient_normal/1, temporary_normal/1, permanent_abnormal/1, transient_abnormal/1, - temporary_abnormal/1]). + temporary_abnormal/1, temporary_bystander/1]). %% Restart strategy tests -export([ one_for_one/1, @@ -74,7 +74,7 @@ all() -> {group, abnormal_termination}, child_unlink, tree, count_children_memory, do_not_save_start_parameters_for_temporary_children, do_not_save_child_specs_for_temporary_children, - simple_one_for_one_scale_many_temporary_children]. + simple_one_for_one_scale_many_temporary_children, temporary_bystander]. groups() -> [{sup_start, [], @@ -606,6 +606,37 @@ temporary_abnormal(Config) when is_list(Config) -> [] = supervisor:which_children(sup_test), [0,0,0,0] = get_child_counts(sup_test). +%%------------------------------------------------------------------------- +temporary_bystander(doc) -> + ["A temporary process killed as part of a rest_for_one or one_for_all " + "restart strategy should not be restarted given its args are not " + " saved. Otherwise the supervisor hits its limit and crashes."]; +temporary_bystander(suite) -> []; +temporary_bystander(_Config) -> + Child1 = {child1, {supervisor_1, start_child, []}, permanent, 100, + worker, []}, + Child2 = {child2, {supervisor_1, start_child, []}, temporary, 100, + worker, []}, + {ok, SupPid1} = supervisor:start_link(?MODULE, {ok, {{one_for_all, 2, 300}, []}}), + {ok, SupPid2} = supervisor:start_link(?MODULE, {ok, {{rest_for_one, 2, 300}, []}}), + unlink(SupPid1), % otherwise we crash with it + unlink(SupPid2), % otherwise we crash with it + {ok, CPid1} = supervisor:start_child(SupPid1, Child1), + {ok, _CPid2} = supervisor:start_child(SupPid1, Child2), + {ok, CPid3} = supervisor:start_child(SupPid2, Child1), + {ok, _CPid4} = supervisor:start_child(SupPid2, Child2), + terminate(SupPid1, CPid1, child1, normal), + terminate(SupPid2, CPid3, child1, normal), + timer:sleep(350), + catch link(SupPid1), + catch link(SupPid2), + %% The supervisor would die attempting to restart child2 + true = erlang:is_process_alive(SupPid1), + true = erlang:is_process_alive(SupPid2), + %% Child2 has not been restarted + [{child1, _, _, _}] = supervisor:which_children(SupPid1), + [{child1, _, _, _}] = supervisor:which_children(SupPid2). + %%------------------------------------------------------------------------- one_for_one(doc) -> ["Test the one_for_one base case."]; -- cgit v1.2.3