diff options
author | Siri Hansen <siri@erlang.org> | 2016-02-10 11:10:18 +0100 |
---|---|---|
committer | Siri Hansen <siri@erlang.org> | 2016-02-10 11:13:35 +0100 |
commit | 8fba155bdc0b45f0504756d1ea695d9c53a44f80 (patch) | |
tree | 20743b3da8707882935c995c77e05b2da9ed7c29 | |
parent | 439292ad4afe73243852fe79d5d467c325f382bf (diff) | |
parent | 264a5e804d6e23bf6c6916887669760cbf243caa (diff) | |
download | otp-8fba155bdc0b45f0504756d1ea695d9c53a44f80.tar.gz otp-8fba155bdc0b45f0504756d1ea695d9c53a44f80.tar.bz2 otp-8fba155bdc0b45f0504756d1ea695d9c53a44f80.zip |
Merge branch 'nybek/speed_up_supervisor_count_children' into maint
* nybek/speed_up_supervisor_count_children:
Speed up supervisor:count_children/1; simple_one_for_one
Add supervisor:get_callback_module/1
OTP-13290
-rw-r--r-- | lib/sasl/src/release_handler_1.erl | 33 | ||||
-rw-r--r-- | lib/sasl/src/sasl.app.src | 2 | ||||
-rw-r--r-- | lib/sasl/test/release_handler_SUITE.erl | 2 | ||||
-rw-r--r-- | lib/stdlib/doc/src/supervisor.xml | 5 | ||||
-rw-r--r-- | lib/stdlib/src/supervisor.erl | 63 | ||||
-rw-r--r-- | lib/stdlib/test/supervisor_SUITE.erl | 52 | ||||
-rw-r--r-- | lib/stdlib/test/supervisor_deadlock.erl | 14 |
7 files changed, 107 insertions, 64 deletions
diff --git a/lib/sasl/src/release_handler_1.erl b/lib/sasl/src/release_handler_1.erl index 5e9a35ab4c..a6325270a5 100644 --- a/lib/sasl/src/release_handler_1.erl +++ b/lib/sasl/src/release_handler_1.erl @@ -676,48 +676,19 @@ maybe_get_dynamic_mods(Name, Pid) -> error(get_modules_failed) end. -%% XXXX -%% Note: The following is a terrible hack done in order to resolve the -%% problem stated in ticket OTP-3452. - -%% XXXX NOTE WELL: This record is from supervisor.erl. Also the record -%% name is really `state'. --record(supervisor_state, {name, - strategy, - children = [], - dynamics = [], - intensity, - period, - restarts = [], - module, - args}). - %% Return the name of the call-back module that implements the %% (top) supervisor SupPid. %% Returns: {ok, Module} | {error,undefined} %% get_supervisor_module(SupPid) -> - case catch get_supervisor_module1(SupPid) of - {ok, Module} when is_atom(Module) -> + case catch supervisor:get_callback_module(SupPid) of + Module when is_atom(Module) -> {ok, Module}; _Other -> io:format("~w: reason: ~w~n", [SupPid, _Other]), {error, undefined} end. -get_supervisor_module1(SupPid) -> - {status, _Pid, {module, _Mod}, - [_PDict, _SysState, _Parent, _Dbg, Misc]} = sys:get_status(SupPid), - %% supervisor Misc field changed at R13B04, handle old and new variants here - State = case Misc of - [_Name, State1, _Type, _Time] -> - State1; - [_Header, _Data, {data, [{"State", State2}]}] -> - State2 - end, - %% Cannot use #supervisor_state{module = Module} = State. - {ok, element(#supervisor_state.module, State)}. - %%----------------------------------------------------------------- %% Func: do_soft_purge/3 %% Args: Mod = atom() diff --git a/lib/sasl/src/sasl.app.src b/lib/sasl/src/sasl.app.src index 705bb73fc5..507e2dc229 100644 --- a/lib/sasl/src/sasl.app.src +++ b/lib/sasl/src/sasl.app.src @@ -46,6 +46,6 @@ {env, [{sasl_error_logger, tty}, {errlog_type, all}]}, {mod, {sasl, []}}, - {runtime_dependencies, ["tools-2.6.14","stdlib-2.6","kernel-4.1", + {runtime_dependencies, ["tools-2.6.14","stdlib-2.8","kernel-4.1", "erts-6.0"]}]}. diff --git a/lib/sasl/test/release_handler_SUITE.erl b/lib/sasl/test/release_handler_SUITE.erl index d57de2593a..ee620dcdb4 100644 --- a/lib/sasl/test/release_handler_SUITE.erl +++ b/lib/sasl/test/release_handler_SUITE.erl @@ -1363,7 +1363,7 @@ upgrade_supervisor(Conf) when is_list(Conf) -> %% Check that the restart strategy and child spec is updated {status, _, {module, _}, [_, _, _, _, [_,_,{data,[{"State",State}]}]]} = rpc:call(Node,sys,get_status,[a_sup]), - {state,_,RestartStrategy,[Child],_,_,_,_,_,_} = State, + {state,_,RestartStrategy,[Child],_,_,_,_,_,_,_} = State, one_for_all = RestartStrategy, % changed from one_for_one {child,_,_,_,_,brutal_kill,_,_} = Child, % changed from timeout 2000 diff --git a/lib/stdlib/doc/src/supervisor.xml b/lib/stdlib/doc/src/supervisor.xml index 24ff251ce3..815bf4a489 100644 --- a/lib/stdlib/doc/src/supervisor.xml +++ b/lib/stdlib/doc/src/supervisor.xml @@ -543,7 +543,10 @@ </item> <item> <p><c>active</c> - the count of all actively running child processes - managed by this supervisor.</p> + managed by this supervisor. In the case of <c>simple_one_for_one</c> + supervisors, no check is carried out to ensure that each child process + is still alive, though the result provided here is likely to be very + accurate unless the supervisor is heavily overloaded.</p> </item> <item> <p><c>supervisors</c> - the count of all children marked as diff --git a/lib/stdlib/src/supervisor.erl b/lib/stdlib/src/supervisor.erl index 92a0c29011..cecdebd0c8 100644 --- a/lib/stdlib/src/supervisor.erl +++ b/lib/stdlib/src/supervisor.erl @@ -33,6 +33,9 @@ terminate/2, code_change/3]). -export([try_again_restart/2]). +%% For release_handler only +-export([get_callback_module/1]). + %%-------------------------------------------------------------------------- -export_type([sup_flags/0, child_spec/0, startchild_ret/0, strategy/0]). @@ -113,6 +116,7 @@ intensity :: non_neg_integer(), period :: pos_integer(), restarts = [], + dynamic_restarts = 0 :: non_neg_integer(), module, args}). -type state() :: #state{}. @@ -250,6 +254,17 @@ try_again_restart(Supervisor, Child) -> cast(Supervisor, Req) -> gen_server:cast(Supervisor, Req). +%%%----------------------------------------------------------------- +%%% Called by release_handler during upgrade +-spec get_callback_module(Pid) -> Module when + Pid :: pid(), + Module :: atom(). +get_callback_module(Pid) -> + {status, _Pid, {module, _Mod}, + [_PDict, _SysState, _Parent, _Dbg, Misc]} = sys:get_status(Pid), + [_Header, _Data, {data, [{"State", State}]}] = Misc, + State#state.module. + %%% --------------------------------------------------- %%% %%% Initialize the supervisor. @@ -504,39 +519,26 @@ handle_call(which_children, _From, State) -> handle_call(count_children, _From, #state{children = [#child{restart_type = temporary, child_type = CT}]} = State) when ?is_simple(State) -> - {Active, Count} = - ?SETS:fold(fun(Pid, {Alive, Tot}) -> - case is_pid(Pid) andalso is_process_alive(Pid) of - true ->{Alive+1, Tot +1}; - false -> - {Alive, Tot + 1} - end - end, {0, 0}, dynamics_db(temporary, State#state.dynamics)), + Sz = ?SETS:size(dynamics_db(temporary, State#state.dynamics)), Reply = case CT of - supervisor -> [{specs, 1}, {active, Active}, - {supervisors, Count}, {workers, 0}]; - worker -> [{specs, 1}, {active, Active}, - {supervisors, 0}, {workers, Count}] + supervisor -> [{specs, 1}, {active, Sz}, + {supervisors, Sz}, {workers, 0}]; + worker -> [{specs, 1}, {active, Sz}, + {supervisors, 0}, {workers, Sz}] end, {reply, Reply, State}; -handle_call(count_children, _From, #state{children = [#child{restart_type = RType, +handle_call(count_children, _From, #state{dynamic_restarts = Restarts, + children = [#child{restart_type = RType, child_type = CT}]} = State) when ?is_simple(State) -> - {Active, Count} = - ?DICTS:fold(fun(Pid, _Val, {Alive, Tot}) -> - case is_pid(Pid) andalso is_process_alive(Pid) of - true -> - {Alive+1, Tot +1}; - false -> - {Alive, Tot + 1} - end - end, {0, 0}, dynamics_db(RType, State#state.dynamics)), + Sz = ?DICTS:size(dynamics_db(RType, State#state.dynamics)), + Active = Sz - Restarts, Reply = case CT of supervisor -> [{specs, 1}, {active, Active}, - {supervisors, Count}, {workers, 0}]; + {supervisors, Sz}, {workers, 0}]; worker -> [{specs, 1}, {active, Active}, - {supervisors, 0}, {workers, Count}] + {supervisors, 0}, {workers, Sz}] end, {reply, Reply, State}; @@ -806,8 +808,15 @@ restart(Child, State) -> {shutdown, remove_child(Child, NState)} end. -restart(simple_one_for_one, Child, State) -> +restart(simple_one_for_one, Child, State0) -> #child{pid = OldPid, mfargs = {M, F, A}} = Child, + State = case OldPid of + ?restarting(_) -> + NRes = State0#state.dynamic_restarts - 1, + State0#state{dynamic_restarts = NRes}; + _ -> + State0 + end, Dynamics = ?DICTS:erase(OldPid, dynamics_db(Child#child.restart_type, State#state.dynamics)), case do_start_child_i(M, F, A) of @@ -818,7 +827,9 @@ restart(simple_one_for_one, Child, State) -> NState = State#state{dynamics = ?DICTS:store(Pid, A, Dynamics)}, {ok, NState}; {error, Error} -> - NState = State#state{dynamics = ?DICTS:store(restarting(OldPid), A, + NRestarts = State#state.dynamic_restarts + 1, + NState = State#state{dynamic_restarts = NRestarts, + dynamics = ?DICTS:store(restarting(OldPid), A, Dynamics)}, report_error(start_error, Error, Child, State#state.name), {try_again, NState} diff --git a/lib/stdlib/test/supervisor_SUITE.erl b/lib/stdlib/test/supervisor_SUITE.erl index 8cb2a5194a..903ca76575 100644 --- a/lib/stdlib/test/supervisor_SUITE.erl +++ b/lib/stdlib/test/supervisor_SUITE.erl @@ -67,6 +67,7 @@ %% Misc tests -export([child_unlink/1, tree/1, count_children/1, + count_restarting_children/1, do_not_save_start_parameters_for_temporary_children/1, do_not_save_child_specs_for_temporary_children/1, simple_one_for_one_scale_many_temporary_children/1, @@ -90,7 +91,8 @@ all() -> {group, normal_termination}, {group, shutdown_termination}, {group, abnormal_termination}, child_unlink, tree, - count_children, do_not_save_start_parameters_for_temporary_children, + count_children, count_restarting_children, + do_not_save_start_parameters_for_temporary_children, do_not_save_child_specs_for_temporary_children, simple_one_for_one_scale_many_temporary_children, temporary_bystander, simple_global_supervisor, hanging_restart_loop, hanging_restart_loop_simple, @@ -1459,6 +1461,54 @@ count_children(Config) when is_list(Config) -> [1,0,0,0] = get_child_counts(sup_test). %%------------------------------------------------------------------------- +%% Test count_children when some children are restarting +count_restarting_children(Config) when is_list(Config) -> + process_flag(trap_exit, true), + Child = {child, {supervisor_deadlock, start_child_noreg, []}, + permanent, brutal_kill, worker, []}, + %% 2 sek delay on failing restart (see supervisor_deadlock.erl) -> + %% MaxR=20, MaxT=10 should ensure a restart loop when starting and + %% restarting 3 instances of the child (as below) + {ok, SupPid} = start_link({ok, {{simple_one_for_one, 20, 10}, [Child]}}), + + %% Ets table with state read by supervisor_deadlock.erl + ets:new(supervisor_deadlock,[set,named_table,public]), + ets:insert(supervisor_deadlock,{fail_start,false}), + + [1,0,0,0] = get_child_counts(SupPid), + {ok, Ch1_1} = supervisor:start_child(SupPid, []), + [1,1,0,1] = get_child_counts(SupPid), + {ok, Ch1_2} = supervisor:start_child(SupPid, []), + [1,2,0,2] = get_child_counts(SupPid), + {ok, Ch1_3} = supervisor:start_child(SupPid, []), + [1,3,0,3] = get_child_counts(SupPid), + + supervisor_deadlock:restart_child(Ch1_1), + supervisor_deadlock:restart_child(Ch1_2), + supervisor_deadlock:restart_child(Ch1_3), + test_server:sleep(400), + [1,3,0,3] = get_child_counts(SupPid), + [Ch2_1, Ch2_2, Ch2_3] = [C || {_,C,_,_} <- supervisor:which_children(SupPid)], + + ets:insert(supervisor_deadlock,{fail_start,true}), + supervisor_deadlock:restart_child(Ch2_1), + supervisor_deadlock:restart_child(Ch2_2), + test_server:sleep(4000), % allow restart to happen before proceeding + [1,1,0,3] = get_child_counts(SupPid), + + ets:insert(supervisor_deadlock,{fail_start,false}), + test_server:sleep(4000), % allow restart to happen before proceeding + [1,3,0,3] = get_child_counts(SupPid), + + ok = supervisor:terminate_child(SupPid, Ch2_3), + [1,2,0,2] = get_child_counts(SupPid), + [Ch3_1, Ch3_2] = [C || {_,C,_,_} <- supervisor:which_children(SupPid)], + ok = supervisor:terminate_child(SupPid, Ch3_1), + [1,1,0,1] = get_child_counts(SupPid), + ok = supervisor:terminate_child(SupPid, Ch3_2), + [1,0,0,0] = get_child_counts(SupPid). + +%%------------------------------------------------------------------------- %% Temporary children shall not be restarted so they should not save %% start parameters, as it potentially can take up a huge amount of %% memory for no purpose. diff --git a/lib/stdlib/test/supervisor_deadlock.erl b/lib/stdlib/test/supervisor_deadlock.erl index 288547a972..8d3d1c6f30 100644 --- a/lib/stdlib/test/supervisor_deadlock.erl +++ b/lib/stdlib/test/supervisor_deadlock.erl @@ -11,9 +11,11 @@ init([child]) -> %% terminates immediately {ok, []}; [{fail_start, true}] -> - %% Restart frequency is MaxR=8, MaxT=10, so this will - %% ensure that restart intensity is not reached -> restart - %% loop + %% A restart frequency of MaxR=8, MaxT=10 should ensure + %% that restart intensity is not reached -> restart loop. + %% (Note that if we use simple_one_for_one, and start + %% 'many' child instances, the restart frequency must be + %% ajusted accordingly.) timer:sleep(2000), % NOTE: this could be a gen_server call timeout {stop, error} @@ -41,5 +43,11 @@ code_change(_OldVsn, State, _Extra) -> start_child() -> gen_server:start_link({local, ?MODULE}, ?MODULE, [child], []). +start_child_noreg() -> + gen_server:start_link(?MODULE, [child], []). + restart_child() -> gen_server:cast(supervisor_deadlock, restart). + +restart_child(Pid) -> + gen_server:cast(Pid, restart). |