aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSiri Hansen <siri@erlang.org>2016-02-10 11:10:18 +0100
committerSiri Hansen <siri@erlang.org>2016-02-10 11:13:35 +0100
commit8fba155bdc0b45f0504756d1ea695d9c53a44f80 (patch)
tree20743b3da8707882935c995c77e05b2da9ed7c29
parent439292ad4afe73243852fe79d5d467c325f382bf (diff)
parent264a5e804d6e23bf6c6916887669760cbf243caa (diff)
downloadotp-8fba155bdc0b45f0504756d1ea695d9c53a44f80.tar.gz
otp-8fba155bdc0b45f0504756d1ea695d9c53a44f80.tar.bz2
otp-8fba155bdc0b45f0504756d1ea695d9c53a44f80.zip
Merge branch 'nybek/speed_up_supervisor_count_children' into maint
* nybek/speed_up_supervisor_count_children: Speed up supervisor:count_children/1; simple_one_for_one Add supervisor:get_callback_module/1 OTP-13290
-rw-r--r--lib/sasl/src/release_handler_1.erl33
-rw-r--r--lib/sasl/src/sasl.app.src2
-rw-r--r--lib/sasl/test/release_handler_SUITE.erl2
-rw-r--r--lib/stdlib/doc/src/supervisor.xml5
-rw-r--r--lib/stdlib/src/supervisor.erl63
-rw-r--r--lib/stdlib/test/supervisor_SUITE.erl52
-rw-r--r--lib/stdlib/test/supervisor_deadlock.erl14
7 files changed, 107 insertions, 64 deletions
diff --git a/lib/sasl/src/release_handler_1.erl b/lib/sasl/src/release_handler_1.erl
index 5e9a35ab4c..a6325270a5 100644
--- a/lib/sasl/src/release_handler_1.erl
+++ b/lib/sasl/src/release_handler_1.erl
@@ -676,48 +676,19 @@ maybe_get_dynamic_mods(Name, Pid) ->
error(get_modules_failed)
end.
-%% XXXX
-%% Note: The following is a terrible hack done in order to resolve the
-%% problem stated in ticket OTP-3452.
-
-%% XXXX NOTE WELL: This record is from supervisor.erl. Also the record
-%% name is really `state'.
--record(supervisor_state, {name,
- strategy,
- children = [],
- dynamics = [],
- intensity,
- period,
- restarts = [],
- module,
- args}).
-
%% Return the name of the call-back module that implements the
%% (top) supervisor SupPid.
%% Returns: {ok, Module} | {error,undefined}
%%
get_supervisor_module(SupPid) ->
- case catch get_supervisor_module1(SupPid) of
- {ok, Module} when is_atom(Module) ->
+ case catch supervisor:get_callback_module(SupPid) of
+ Module when is_atom(Module) ->
{ok, Module};
_Other ->
io:format("~w: reason: ~w~n", [SupPid, _Other]),
{error, undefined}
end.
-get_supervisor_module1(SupPid) ->
- {status, _Pid, {module, _Mod},
- [_PDict, _SysState, _Parent, _Dbg, Misc]} = sys:get_status(SupPid),
- %% supervisor Misc field changed at R13B04, handle old and new variants here
- State = case Misc of
- [_Name, State1, _Type, _Time] ->
- State1;
- [_Header, _Data, {data, [{"State", State2}]}] ->
- State2
- end,
- %% Cannot use #supervisor_state{module = Module} = State.
- {ok, element(#supervisor_state.module, State)}.
-
%%-----------------------------------------------------------------
%% Func: do_soft_purge/3
%% Args: Mod = atom()
diff --git a/lib/sasl/src/sasl.app.src b/lib/sasl/src/sasl.app.src
index 705bb73fc5..507e2dc229 100644
--- a/lib/sasl/src/sasl.app.src
+++ b/lib/sasl/src/sasl.app.src
@@ -46,6 +46,6 @@
{env, [{sasl_error_logger, tty},
{errlog_type, all}]},
{mod, {sasl, []}},
- {runtime_dependencies, ["tools-2.6.14","stdlib-2.6","kernel-4.1",
+ {runtime_dependencies, ["tools-2.6.14","stdlib-2.8","kernel-4.1",
"erts-6.0"]}]}.
diff --git a/lib/sasl/test/release_handler_SUITE.erl b/lib/sasl/test/release_handler_SUITE.erl
index d57de2593a..ee620dcdb4 100644
--- a/lib/sasl/test/release_handler_SUITE.erl
+++ b/lib/sasl/test/release_handler_SUITE.erl
@@ -1363,7 +1363,7 @@ upgrade_supervisor(Conf) when is_list(Conf) ->
%% Check that the restart strategy and child spec is updated
{status, _, {module, _}, [_, _, _, _, [_,_,{data,[{"State",State}]}]]} =
rpc:call(Node,sys,get_status,[a_sup]),
- {state,_,RestartStrategy,[Child],_,_,_,_,_,_} = State,
+ {state,_,RestartStrategy,[Child],_,_,_,_,_,_,_} = State,
one_for_all = RestartStrategy, % changed from one_for_one
{child,_,_,_,_,brutal_kill,_,_} = Child, % changed from timeout 2000
diff --git a/lib/stdlib/doc/src/supervisor.xml b/lib/stdlib/doc/src/supervisor.xml
index 24ff251ce3..815bf4a489 100644
--- a/lib/stdlib/doc/src/supervisor.xml
+++ b/lib/stdlib/doc/src/supervisor.xml
@@ -543,7 +543,10 @@
</item>
<item>
<p><c>active</c> - the count of all actively running child processes
- managed by this supervisor.</p>
+ managed by this supervisor. In the case of <c>simple_one_for_one</c>
+ supervisors, no check is carried out to ensure that each child process
+ is still alive, though the result provided here is likely to be very
+ accurate unless the supervisor is heavily overloaded.</p>
</item>
<item>
<p><c>supervisors</c> - the count of all children marked as
diff --git a/lib/stdlib/src/supervisor.erl b/lib/stdlib/src/supervisor.erl
index 92a0c29011..cecdebd0c8 100644
--- a/lib/stdlib/src/supervisor.erl
+++ b/lib/stdlib/src/supervisor.erl
@@ -33,6 +33,9 @@
terminate/2, code_change/3]).
-export([try_again_restart/2]).
+%% For release_handler only
+-export([get_callback_module/1]).
+
%%--------------------------------------------------------------------------
-export_type([sup_flags/0, child_spec/0, startchild_ret/0, strategy/0]).
@@ -113,6 +116,7 @@
intensity :: non_neg_integer(),
period :: pos_integer(),
restarts = [],
+ dynamic_restarts = 0 :: non_neg_integer(),
module,
args}).
-type state() :: #state{}.
@@ -250,6 +254,17 @@ try_again_restart(Supervisor, Child) ->
cast(Supervisor, Req) ->
gen_server:cast(Supervisor, Req).
+%%%-----------------------------------------------------------------
+%%% Called by release_handler during upgrade
+-spec get_callback_module(Pid) -> Module when
+ Pid :: pid(),
+ Module :: atom().
+get_callback_module(Pid) ->
+ {status, _Pid, {module, _Mod},
+ [_PDict, _SysState, _Parent, _Dbg, Misc]} = sys:get_status(Pid),
+ [_Header, _Data, {data, [{"State", State}]}] = Misc,
+ State#state.module.
+
%%% ---------------------------------------------------
%%%
%%% Initialize the supervisor.
@@ -504,39 +519,26 @@ handle_call(which_children, _From, State) ->
handle_call(count_children, _From, #state{children = [#child{restart_type = temporary,
child_type = CT}]} = State)
when ?is_simple(State) ->
- {Active, Count} =
- ?SETS:fold(fun(Pid, {Alive, Tot}) ->
- case is_pid(Pid) andalso is_process_alive(Pid) of
- true ->{Alive+1, Tot +1};
- false ->
- {Alive, Tot + 1}
- end
- end, {0, 0}, dynamics_db(temporary, State#state.dynamics)),
+ Sz = ?SETS:size(dynamics_db(temporary, State#state.dynamics)),
Reply = case CT of
- supervisor -> [{specs, 1}, {active, Active},
- {supervisors, Count}, {workers, 0}];
- worker -> [{specs, 1}, {active, Active},
- {supervisors, 0}, {workers, Count}]
+ supervisor -> [{specs, 1}, {active, Sz},
+ {supervisors, Sz}, {workers, 0}];
+ worker -> [{specs, 1}, {active, Sz},
+ {supervisors, 0}, {workers, Sz}]
end,
{reply, Reply, State};
-handle_call(count_children, _From, #state{children = [#child{restart_type = RType,
+handle_call(count_children, _From, #state{dynamic_restarts = Restarts,
+ children = [#child{restart_type = RType,
child_type = CT}]} = State)
when ?is_simple(State) ->
- {Active, Count} =
- ?DICTS:fold(fun(Pid, _Val, {Alive, Tot}) ->
- case is_pid(Pid) andalso is_process_alive(Pid) of
- true ->
- {Alive+1, Tot +1};
- false ->
- {Alive, Tot + 1}
- end
- end, {0, 0}, dynamics_db(RType, State#state.dynamics)),
+ Sz = ?DICTS:size(dynamics_db(RType, State#state.dynamics)),
+ Active = Sz - Restarts,
Reply = case CT of
supervisor -> [{specs, 1}, {active, Active},
- {supervisors, Count}, {workers, 0}];
+ {supervisors, Sz}, {workers, 0}];
worker -> [{specs, 1}, {active, Active},
- {supervisors, 0}, {workers, Count}]
+ {supervisors, 0}, {workers, Sz}]
end,
{reply, Reply, State};
@@ -806,8 +808,15 @@ restart(Child, State) ->
{shutdown, remove_child(Child, NState)}
end.
-restart(simple_one_for_one, Child, State) ->
+restart(simple_one_for_one, Child, State0) ->
#child{pid = OldPid, mfargs = {M, F, A}} = Child,
+ State = case OldPid of
+ ?restarting(_) ->
+ NRes = State0#state.dynamic_restarts - 1,
+ State0#state{dynamic_restarts = NRes};
+ _ ->
+ State0
+ end,
Dynamics = ?DICTS:erase(OldPid, dynamics_db(Child#child.restart_type,
State#state.dynamics)),
case do_start_child_i(M, F, A) of
@@ -818,7 +827,9 @@ restart(simple_one_for_one, Child, State) ->
NState = State#state{dynamics = ?DICTS:store(Pid, A, Dynamics)},
{ok, NState};
{error, Error} ->
- NState = State#state{dynamics = ?DICTS:store(restarting(OldPid), A,
+ NRestarts = State#state.dynamic_restarts + 1,
+ NState = State#state{dynamic_restarts = NRestarts,
+ dynamics = ?DICTS:store(restarting(OldPid), A,
Dynamics)},
report_error(start_error, Error, Child, State#state.name),
{try_again, NState}
diff --git a/lib/stdlib/test/supervisor_SUITE.erl b/lib/stdlib/test/supervisor_SUITE.erl
index 8cb2a5194a..903ca76575 100644
--- a/lib/stdlib/test/supervisor_SUITE.erl
+++ b/lib/stdlib/test/supervisor_SUITE.erl
@@ -67,6 +67,7 @@
%% Misc tests
-export([child_unlink/1, tree/1, count_children/1,
+ count_restarting_children/1,
do_not_save_start_parameters_for_temporary_children/1,
do_not_save_child_specs_for_temporary_children/1,
simple_one_for_one_scale_many_temporary_children/1,
@@ -90,7 +91,8 @@ all() ->
{group, normal_termination},
{group, shutdown_termination},
{group, abnormal_termination}, child_unlink, tree,
- count_children, do_not_save_start_parameters_for_temporary_children,
+ count_children, count_restarting_children,
+ do_not_save_start_parameters_for_temporary_children,
do_not_save_child_specs_for_temporary_children,
simple_one_for_one_scale_many_temporary_children, temporary_bystander,
simple_global_supervisor, hanging_restart_loop, hanging_restart_loop_simple,
@@ -1459,6 +1461,54 @@ count_children(Config) when is_list(Config) ->
[1,0,0,0] = get_child_counts(sup_test).
%%-------------------------------------------------------------------------
+%% Test count_children when some children are restarting
+count_restarting_children(Config) when is_list(Config) ->
+ process_flag(trap_exit, true),
+ Child = {child, {supervisor_deadlock, start_child_noreg, []},
+ permanent, brutal_kill, worker, []},
+ %% 2 sek delay on failing restart (see supervisor_deadlock.erl) ->
+ %% MaxR=20, MaxT=10 should ensure a restart loop when starting and
+ %% restarting 3 instances of the child (as below)
+ {ok, SupPid} = start_link({ok, {{simple_one_for_one, 20, 10}, [Child]}}),
+
+ %% Ets table with state read by supervisor_deadlock.erl
+ ets:new(supervisor_deadlock,[set,named_table,public]),
+ ets:insert(supervisor_deadlock,{fail_start,false}),
+
+ [1,0,0,0] = get_child_counts(SupPid),
+ {ok, Ch1_1} = supervisor:start_child(SupPid, []),
+ [1,1,0,1] = get_child_counts(SupPid),
+ {ok, Ch1_2} = supervisor:start_child(SupPid, []),
+ [1,2,0,2] = get_child_counts(SupPid),
+ {ok, Ch1_3} = supervisor:start_child(SupPid, []),
+ [1,3,0,3] = get_child_counts(SupPid),
+
+ supervisor_deadlock:restart_child(Ch1_1),
+ supervisor_deadlock:restart_child(Ch1_2),
+ supervisor_deadlock:restart_child(Ch1_3),
+ test_server:sleep(400),
+ [1,3,0,3] = get_child_counts(SupPid),
+ [Ch2_1, Ch2_2, Ch2_3] = [C || {_,C,_,_} <- supervisor:which_children(SupPid)],
+
+ ets:insert(supervisor_deadlock,{fail_start,true}),
+ supervisor_deadlock:restart_child(Ch2_1),
+ supervisor_deadlock:restart_child(Ch2_2),
+ test_server:sleep(4000), % allow restart to happen before proceeding
+ [1,1,0,3] = get_child_counts(SupPid),
+
+ ets:insert(supervisor_deadlock,{fail_start,false}),
+ test_server:sleep(4000), % allow restart to happen before proceeding
+ [1,3,0,3] = get_child_counts(SupPid),
+
+ ok = supervisor:terminate_child(SupPid, Ch2_3),
+ [1,2,0,2] = get_child_counts(SupPid),
+ [Ch3_1, Ch3_2] = [C || {_,C,_,_} <- supervisor:which_children(SupPid)],
+ ok = supervisor:terminate_child(SupPid, Ch3_1),
+ [1,1,0,1] = get_child_counts(SupPid),
+ ok = supervisor:terminate_child(SupPid, Ch3_2),
+ [1,0,0,0] = get_child_counts(SupPid).
+
+%%-------------------------------------------------------------------------
%% Temporary children shall not be restarted so they should not save
%% start parameters, as it potentially can take up a huge amount of
%% memory for no purpose.
diff --git a/lib/stdlib/test/supervisor_deadlock.erl b/lib/stdlib/test/supervisor_deadlock.erl
index 288547a972..8d3d1c6f30 100644
--- a/lib/stdlib/test/supervisor_deadlock.erl
+++ b/lib/stdlib/test/supervisor_deadlock.erl
@@ -11,9 +11,11 @@ init([child]) ->
%% terminates immediately
{ok, []};
[{fail_start, true}] ->
- %% Restart frequency is MaxR=8, MaxT=10, so this will
- %% ensure that restart intensity is not reached -> restart
- %% loop
+ %% A restart frequency of MaxR=8, MaxT=10 should ensure
+ %% that restart intensity is not reached -> restart loop.
+ %% (Note that if we use simple_one_for_one, and start
+ %% 'many' child instances, the restart frequency must be
+ %% ajusted accordingly.)
timer:sleep(2000), % NOTE: this could be a gen_server call timeout
{stop, error}
@@ -41,5 +43,11 @@ code_change(_OldVsn, State, _Extra) ->
start_child() ->
gen_server:start_link({local, ?MODULE}, ?MODULE, [child], []).
+start_child_noreg() ->
+ gen_server:start_link(?MODULE, [child], []).
+
restart_child() ->
gen_server:cast(supervisor_deadlock, restart).
+
+restart_child(Pid) ->
+ gen_server:cast(Pid, restart).