diff options
-rw-r--r-- | lib/stdlib/doc/src/supervisor.xml | 4 | ||||
-rw-r--r-- | lib/stdlib/src/supervisor.erl | 121 | ||||
-rw-r--r-- | lib/stdlib/test/Makefile | 1 | ||||
-rw-r--r-- | lib/stdlib/test/supervisor_2.erl | 42 | ||||
-rw-r--r-- | lib/stdlib/test/supervisor_SUITE.erl | 38 | ||||
-rw-r--r-- | system/doc/design_principles/sup_princ.xml | 4 |
6 files changed, 202 insertions, 8 deletions
diff --git a/lib/stdlib/doc/src/supervisor.xml b/lib/stdlib/doc/src/supervisor.xml index 54e7cab884..30514dfee9 100644 --- a/lib/stdlib/doc/src/supervisor.xml +++ b/lib/stdlib/doc/src/supervisor.xml @@ -93,6 +93,10 @@ instead the child specification identifier is used, <c>terminate_child/2</c> will return <c>{error,simple_one_for_one}</c>.</p> + <p>Because a <c>simple_one_for_one</c> supervisor could have many + children, it shuts them all down at same time. So, order in which they + are stopped is not defined. For the same reason, it could have an + overhead with regards to the <c>Shutdown</c> strategy.</p> </item> </list> <p>To prevent a supervisor from getting into an infinite loop of diff --git a/lib/stdlib/src/supervisor.erl b/lib/stdlib/src/supervisor.erl index 051dca462b..f20ea18fd0 100644 --- a/lib/stdlib/src/supervisor.erl +++ b/lib/stdlib/src/supervisor.erl @@ -515,9 +515,12 @@ handle_info(Msg, State) -> %% -spec terminate(term(), state()) -> 'ok'. +terminate(_Reason, #state{children=[Child]} = State) when ?is_simple(State) -> + terminate_dynamic_children(Child, dynamics_db(Child#child.restart_type, + State#state.dynamics), + State#state.name); terminate(_Reason, State) -> - terminate_children(State#state.children, State#state.name), - ok. + terminate_children(State#state.children, State#state.name). %% %% Change code for the supervisor. @@ -830,8 +833,109 @@ monitor_child(Pid) -> %% that will be handled in shutdown/2. ok end. - - + + +%%----------------------------------------------------------------- +%% Func: terminate_dynamic_children/3 +%% Args: Child = child_rec() +%% Dynamics = ?DICT() | ?SET() +%% SupName = {local, atom()} | {global, atom()} | {pid(),Mod} +%% Returns: ok +%% +%% +%% Shutdown all dynamic children. This happens when the supervisor is +%% stopped. Because the supervisor can have millions of dynamic children, we +%% can have an significative overhead here. +%%----------------------------------------------------------------- +terminate_dynamic_children(Child, Dynamics, SupName) -> + {Pids, EStack0} = monitor_dynamic_children(Child, Dynamics), + Sz = ?SETS:size(Pids), + EStack = case Child#child.shutdown of + brutal_kill -> + ?SETS:fold(fun(P, _) -> exit(P, kill) end, ok, Pids), + wait_dynamic_children(Child, Pids, Sz, undefined, EStack0); + infinity -> + ?SETS:fold(fun(P, _) -> exit(P, shutdown) end, ok, Pids), + wait_dynamic_children(Child, Pids, Sz, undefined, EStack0); + Time -> + ?SETS:fold(fun(P, _) -> exit(P, shutdown) end, ok, Pids), + TRef = erlang:start_timer(Time, self(), kill), + wait_dynamic_children(Child, Pids, Sz, TRef, EStack0) + end, + %% Unrool stacked errors and report them + ?DICT:fold(fun(Reason, Ls, _) -> + report_error(shutdown_error, Reason, + Child#child{pid=Ls}, SupName) + end, ok, EStack). + + +monitor_dynamic_children(#child{restart_type=temporary}, Dynamics) -> + ?SETS:fold(fun(P, {Pids, EStack}) -> + case monitor_child(P) of + ok -> + {?SETS:add_element(P, Pids), EStack}; + {error, normal} -> + {Pids, EStack}; + {error, Reason} -> + {Pids, ?DICT:append(Reason, P, EStack)} + end + end, {?SETS:new(), ?DICT:new()}, Dynamics); +monitor_dynamic_children(#child{restart_type=RType}, Dynamics) -> + ?DICT:fold(fun(P, _, {Pids, EStack}) -> + case monitor_child(P) of + ok -> + {?SETS:add_element(P, Pids), EStack}; + {error, normal} when RType =/= permanent -> + {Pids, EStack}; + {error, Reason} -> + {Pids, ?DICT:append(Reason, P, EStack)} + end + end, {?SETS:new(), ?DICT:new()}, Dynamics). + + +wait_dynamic_children(_Child, _Pids, 0, undefined, EStack) -> + EStack; +wait_dynamic_children(_Child, _Pids, 0, TRef, EStack) -> + %% If the timer has expired before its cancellation, we must empty the + %% mail-box of the 'timeout'-message. + erlang:cancel_timer(TRef), + receive + {timeout, TRef, kill} -> + EStack + after 0 -> + EStack + end; +wait_dynamic_children(#child{shutdown=brutal_kill} = Child, Pids, Sz, + TRef, EStack) -> + receive + {'DOWN', _MRef, process, Pid, killed} -> + wait_dynamic_children(Child, ?SETS:del_element(Pid, Pids), Sz-1, + TRef, EStack); + + {'DOWN', _MRef, process, Pid, Reason} -> + wait_dynamic_children(Child, ?SETS:del_element(Pid, Pids), Sz-1, + TRef, ?DICT:append(Reason, Pid, EStack)) + end; +wait_dynamic_children(#child{restart_type=RType} = Child, Pids, Sz, + TRef, EStack) -> + receive + {'DOWN', _MRef, process, Pid, shutdown} -> + wait_dynamic_children(Child, ?SETS:del_element(Pid, Pids), Sz-1, + TRef, EStack); + + {'DOWN', _MRef, process, Pid, normal} when RType =/= permanent -> + wait_dynamic_children(Child, ?SETS:del_element(Pid, Pids), Sz-1, + TRef, EStack); + + {'DOWN', _MRef, process, Pid, Reason} -> + wait_dynamic_children(Child, ?SETS:del_element(Pid, Pids), Sz-1, + TRef, ?DICT:append(Reason, Pid, EStack)); + + {timeout, TRef, kill} -> + ?SETS:fold(fun(P, _) -> exit(P, kill) end, ok, Pids), + wait_dynamic_children(Child, Pids, Sz-1, undefined, EStack) + end. + %%----------------------------------------------------------------- %% Child/State manipulating functions. %%----------------------------------------------------------------- @@ -1134,12 +1238,19 @@ report_error(Error, Reason, Child, SupName) -> error_logger:error_report(supervisor_report, ErrorMsg). -extract_child(Child) -> +extract_child(Child) when is_pid(Child#child.pid) -> [{pid, Child#child.pid}, {name, Child#child.name}, {mfargs, Child#child.mfargs}, {restart_type, Child#child.restart_type}, {shutdown, Child#child.shutdown}, + {child_type, Child#child.child_type}]; +extract_child(Child) -> + [{nb_children, length(Child#child.pid)}, + {name, Child#child.name}, + {mfargs, Child#child.mfargs}, + {restart_type, Child#child.restart_type}, + {shutdown, Child#child.shutdown}, {child_type, Child#child.child_type}]. report_progress(Child, SupName) -> diff --git a/lib/stdlib/test/Makefile b/lib/stdlib/test/Makefile index 5502c69fa5..aa6a660c34 100644 --- a/lib/stdlib/test/Makefile +++ b/lib/stdlib/test/Makefile @@ -65,6 +65,7 @@ MODULES= \ stdlib_SUITE \ string_SUITE \ supervisor_1 \ + supervisor_2 \ naughty_child \ shell_SUITE \ supervisor_SUITE \ diff --git a/lib/stdlib/test/supervisor_2.erl b/lib/stdlib/test/supervisor_2.erl new file mode 100644 index 0000000000..67aacf5a9c --- /dev/null +++ b/lib/stdlib/test/supervisor_2.erl @@ -0,0 +1,42 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 1996-2010. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%% Description: Simulates the behaviour that a child process may have. +%% Is used by the supervisor_SUITE test suite. +-module(supervisor_2). + +-export([start_child/1, init/1]). + +-export([handle_call/3, handle_info/2, terminate/2]). + +start_child(Time) when is_integer(Time), Time > 0 -> + gen_server:start_link(?MODULE, Time, []). + +init(Time) -> + process_flag(trap_exit, true), + {ok, Time}. + +handle_call(Req, _From, State) -> + {reply, Req, State}. + +handle_info(_, State) -> + {noreply, State}. + +terminate(_Reason, Time) -> + timer:sleep(Time), + ok. diff --git a/lib/stdlib/test/supervisor_SUITE.erl b/lib/stdlib/test/supervisor_SUITE.erl index 9d47233422..e709cf62ba 100644 --- a/lib/stdlib/test/supervisor_SUITE.erl +++ b/lib/stdlib/test/supervisor_SUITE.erl @@ -52,7 +52,7 @@ one_for_all_escalation/1, simple_one_for_one/1, simple_one_for_one_escalation/1, rest_for_one/1, rest_for_one_escalation/1, - simple_one_for_one_extra/1]). + simple_one_for_one_extra/1, simple_one_for_one_shutdown/1]). %% Misc tests -export([child_unlink/1, tree/1, count_children_memory/1, @@ -99,8 +99,8 @@ groups() -> {restart_one_for_all, [], [one_for_all, one_for_all_escalation]}, {restart_simple_one_for_one, [], - [simple_one_for_one, simple_one_for_one_extra, - simple_one_for_one_escalation]}, + [simple_one_for_one, simple_one_for_one_shutdown, + simple_one_for_one_extra, simple_one_for_one_escalation]}, {restart_rest_for_one, [], [rest_for_one, rest_for_one_escalation]}]. @@ -868,6 +868,38 @@ simple_one_for_one(Config) when is_list(Config) -> terminate(SupPid, Pid4, Id4, abnormal), check_exit([SupPid]). + +%%------------------------------------------------------------------------- +simple_one_for_one_shutdown(doc) -> + ["Test simple_one_for_one children shutdown accordingly to the " + "supervisor's shutdown strategy."]; +simple_one_for_one_shutdown(suite) -> []; +simple_one_for_one_shutdown(Config) when is_list(Config) -> + process_flag(trap_exit, true), + ShutdownTime = 1000, + Child = {child, {supervisor_2, start_child, []}, + permanent, 2*ShutdownTime, worker, []}, + {ok, SupPid} = start_link({ok, {{simple_one_for_one, 2, 3600}, [Child]}}), + + %% Will be gracefully shutdown + {ok, _CPid1} = supervisor:start_child(sup_test, [ShutdownTime]), + {ok, _CPid2} = supervisor:start_child(sup_test, [ShutdownTime]), + + %% Will be killed after 2*ShutdownTime milliseconds + {ok, _CPid3} = supervisor:start_child(sup_test, [5*ShutdownTime]), + + {T, ok} = timer:tc(fun terminate/2, [SupPid, shutdown]), + if T < 1000*ShutdownTime -> + %% Because supervisor's children wait before exiting, it can't + %% terminate quickly + test_server:fail({shutdown_too_short, T}); + T >= 1000*5*ShutdownTime -> + test_server:fail({shutdown_too_long, T}); + true -> + check_exit([SupPid]) + end. + + %%------------------------------------------------------------------------- simple_one_for_one_extra(doc) -> ["Tests automatic restart of children " diff --git a/system/doc/design_principles/sup_princ.xml b/system/doc/design_principles/sup_princ.xml index 9e7cc2f543..a432f9458b 100644 --- a/system/doc/design_principles/sup_princ.xml +++ b/system/doc/design_principles/sup_princ.xml @@ -349,6 +349,10 @@ call:start_link(id1)</code> supervisor:terminate_child(Sup, Pid)</code> <p>where <c>Sup</c> is the pid, or name, of the supervisor and <c>Pid</c> is the pid of the child.</p> + <p>Because a <c>simple_one_for_one</c> supervisor could have many children, + it shuts them all down at same time. So, order in which they are stopped is + not defined. For the same reason, it could have an overhead with regards to + the <c>Shutdown</c> strategy.</p> </section> <section> |