diff options
author | Siri Hansen <[email protected]> | 2017-11-15 16:16:50 +0100 |
---|---|---|
committer | GitHub <[email protected]> | 2017-11-15 16:16:50 +0100 |
commit | 41e6eaa01031284c6d0f89eee031593f77953c1d (patch) | |
tree | 0205f54e2cb29420e07fcfcc50211c898483a310 | |
parent | 355e7855919e2a82856b27d45cb692a087da0385 (diff) | |
parent | a03895dcfb558dbb27a8a15afbb199f17231184b (diff) | |
download | otp-41e6eaa01031284c6d0f89eee031593f77953c1d.tar.gz otp-41e6eaa01031284c6d0f89eee031593f77953c1d.tar.bz2 otp-41e6eaa01031284c6d0f89eee031593f77953c1d.zip |
Merge pull request #1602 from sirihansen/siri/supervisor/store-children-in-map
Refactor supervisor, and store children in a map instead of a list
OTP-14586
-rw-r--r-- | lib/sasl/test/release_handler_SUITE.erl | 4 | ||||
-rw-r--r-- | lib/stdlib/src/supervisor.erl | 946 | ||||
-rw-r--r-- | lib/stdlib/test/supervisor_1.erl | 2 | ||||
-rw-r--r-- | lib/stdlib/test/supervisor_SUITE.erl | 364 | ||||
-rw-r--r-- | lib/stdlib/test/supervisor_deadlock.erl | 2 |
5 files changed, 816 insertions, 502 deletions
diff --git a/lib/sasl/test/release_handler_SUITE.erl b/lib/sasl/test/release_handler_SUITE.erl index 63b48e7a4e..4935782cf2 100644 --- a/lib/sasl/test/release_handler_SUITE.erl +++ b/lib/sasl/test/release_handler_SUITE.erl @@ -1384,9 +1384,9 @@ upgrade_supervisor(Conf) when is_list(Conf) -> %% Check that the restart strategy and child spec is updated {status, _, {module, _}, [_, _, _, _, [_,_,{data,[{"State",State}]}|_]]} = rpc:call(Node,sys,get_status,[a_sup]), - {state,_,RestartStrategy,[Child],_,_,_,_,_,_,_} = State, + {state,_,RestartStrategy,{[a],Db},_,_,_,_,_,_,_} = State, one_for_all = RestartStrategy, % changed from one_for_one - {child,_,_,_,_,brutal_kill,_,_} = Child, % changed from timeout 2000 + {child,_,_,_,_,brutal_kill,_,_} = maps:get(a,Db), % changed from timeout 2000 ok. diff --git a/lib/stdlib/src/supervisor.erl b/lib/stdlib/src/supervisor.erl index 7920e55930..e56415650f 100644 --- a/lib/stdlib/src/supervisor.erl +++ b/lib/stdlib/src/supervisor.erl @@ -31,7 +31,6 @@ %% Internal exports -export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2, code_change/3, format_status/2]). --export([try_again_restart/2]). %% For release_handler only -export([get_callback_module/1]). @@ -79,6 +78,7 @@ | {RestartStrategy :: strategy(), Intensity :: non_neg_integer(), Period :: pos_integer()}. +-type children() :: {Ids :: [child_id()], Db :: #{child_id() => child_rec()}}. %%-------------------------------------------------------------------------- %% Defaults @@ -96,7 +96,7 @@ pid = undefined :: child() | {restarting, pid() | undefined} | [pid()], - name :: child_id(), + id :: child_id(), mfargs :: mfargs(), restart_type :: restart(), shutdown :: shutdown(), @@ -104,16 +104,11 @@ modules = [] :: modules()}). -type child_rec() :: #child{}. --define(DICTS, dict). --define(DICT, dict:dict). --define(SETS, sets). --define(SET, sets:set). - -record(state, {name, strategy :: strategy() | 'undefined', - children = [] :: [child_rec()], - dynamics :: {'dict', ?DICT(pid(), list())} - | {'set', ?SET(pid())} + children = {[],#{}} :: children(), % Ids in start order + dynamics :: {'maps', #{pid() => list()}} + | {'sets', sets:set(pid())} | 'undefined', intensity :: non_neg_integer() | 'undefined', period :: pos_integer() | 'undefined', @@ -124,6 +119,9 @@ -type state() :: #state{}. -define(is_simple(State), State#state.strategy =:= simple_one_for_one). +-define(is_temporary(_Child_), _Child_#child.restart_type=:=temporary). +-define(is_transient(_Child_), _Child_#child.restart_type=:=transient). +-define(is_permanent(_Child_), _Child_#child.restart_type=:=permanent). -callback init(Args :: term()) -> {ok, {SupFlags :: sup_flags(), [ChildSpec :: child_spec()]}} @@ -179,16 +177,16 @@ start_child(Supervisor, ChildSpec) -> | {'error', Error}, Error :: 'running' | 'restarting' | 'not_found' | 'simple_one_for_one' | term(). -restart_child(Supervisor, Name) -> - call(Supervisor, {restart_child, Name}). +restart_child(Supervisor, Id) -> + call(Supervisor, {restart_child, Id}). -spec delete_child(SupRef, Id) -> Result when SupRef :: sup_ref(), Id :: child_id(), Result :: 'ok' | {'error', Error}, Error :: 'running' | 'restarting' | 'not_found' | 'simple_one_for_one'. -delete_child(Supervisor, Name) -> - call(Supervisor, {delete_child, Name}). +delete_child(Supervisor, Id) -> + call(Supervisor, {delete_child, Id}). %%----------------------------------------------------------------- %% Func: terminate_child/2 @@ -202,16 +200,16 @@ delete_child(Supervisor, Name) -> Id :: pid() | child_id(), Result :: 'ok' | {'error', Error}, Error :: 'not_found' | 'simple_one_for_one'. -terminate_child(Supervisor, Name) -> - call(Supervisor, {terminate_child, Name}). +terminate_child(Supervisor, Id) -> + call(Supervisor, {terminate_child, Id}). -spec get_childspec(SupRef, Id) -> Result when SupRef :: sup_ref(), Id :: pid() | child_id(), Result :: {'ok', child_spec()} | {'error', Error}, Error :: 'not_found'. -get_childspec(Supervisor, Name) -> - call(Supervisor, {get_childspec, Name}). +get_childspec(Supervisor, Id) -> + call(Supervisor, {get_childspec, Id}). -spec which_children(SupRef) -> [{Id,Child,Type,Modules}] when SupRef :: sup_ref(), @@ -246,17 +244,6 @@ check_childspecs(ChildSpecs) when is_list(ChildSpecs) -> check_childspecs(X) -> {error, {badarg, X}}. %%%----------------------------------------------------------------- -%%% Called by restart/2 --spec try_again_restart(SupRef, Child) -> ok when - SupRef :: sup_ref(), - Child :: child_id() | pid(). -try_again_restart(Supervisor, Child) -> - cast(Supervisor, {try_again_restart, Child}). - -cast(Supervisor, Req) -> - gen_server:cast(Supervisor, Req). - -%%%----------------------------------------------------------------- %%% Called by release_handler during upgrade -spec get_callback_module(Pid) -> Module when Pid :: pid(), @@ -325,7 +312,7 @@ init_children(State, StartSpec) -> init_dynamic(State, [StartSpec]) -> case check_startspec([StartSpec]) of {ok, Children} -> - {ok, State#state{children = Children}}; + {ok, dyn_init(State#state{children = Children})}; Error -> {stop, {start_spec, Error}} end; @@ -334,35 +321,34 @@ init_dynamic(_State, StartSpec) -> %%----------------------------------------------------------------- %% Func: start_children/2 -%% Args: Children = [child_rec()] in start order +%% Args: Children = children() % Ids in start order %% SupName = {local, atom()} | {global, atom()} | {pid(), Mod} -%% Purpose: Start all children. The new list contains #child's +%% Purpose: Start all children. The new map contains #child's %% with pids. %% Returns: {ok, NChildren} | {error, NChildren, Reason} -%% NChildren = [child_rec()] in termination order (reversed -%% start order) +%% NChildren = children() % Ids in termination order +%% (reversed start order) %%----------------------------------------------------------------- -start_children(Children, SupName) -> start_children(Children, [], SupName). - -start_children([Child|Chs], NChildren, SupName) -> - case do_start_child(SupName, Child) of - {ok, undefined} when Child#child.restart_type =:= temporary -> - start_children(Chs, NChildren, SupName); - {ok, Pid} -> - start_children(Chs, [Child#child{pid = Pid}|NChildren], SupName); - {ok, Pid, _Extra} -> - start_children(Chs, [Child#child{pid = Pid}|NChildren], SupName); - {error, Reason} -> - report_error(start_error, Reason, Child, SupName), - {error, lists:reverse(Chs) ++ [Child | NChildren], - {failed_to_start_child,Child#child.name,Reason}} - end; -start_children([], NChildren, _SupName) -> - {ok, NChildren}. +start_children(Children, SupName) -> + Start = + fun(Id,Child) -> + case do_start_child(SupName, Child) of + {ok, undefined} when ?is_temporary(Child) -> + remove; + {ok, Pid} -> + {update,Child#child{pid = Pid}}; + {ok, Pid, _Extra} -> + {update,Child#child{pid = Pid}}; + {error, Reason} -> + report_error(start_error, Reason, Child, SupName), + {abort,{failed_to_start_child,Id,Reason}} + end + end, + children_map(Start,Children). do_start_child(SupName, Child) -> #child{mfargs = {M, F, Args}} = Child, - case catch apply(M, F, Args) of + case do_start_child_i(M, F, Args) of {ok, Pid} when is_pid(Pid) -> NChild = Child#child{pid = Pid}, report_progress(NChild, SupName), @@ -371,10 +357,8 @@ do_start_child(SupName, Child) -> NChild = Child#child{pid = Pid}, report_progress(NChild, SupName), {ok, Pid, Extra}; - ignore -> - {ok, undefined}; - {error, What} -> {error, What}; - What -> {error, What} + Other -> + Other end. do_start_child_i(M, F, A) -> @@ -400,17 +384,17 @@ do_start_child_i(M, F, A) -> -spec handle_call(call(), term(), state()) -> {'reply', term(), state()}. handle_call({start_child, EArgs}, _From, State) when ?is_simple(State) -> - Child = hd(State#state.children), + Child = get_dynamic_child(State), #child{mfargs = {M, F, A}} = Child, Args = A ++ EArgs, case do_start_child_i(M, F, Args) of {ok, undefined} -> {reply, {ok, undefined}, State}; {ok, Pid} -> - NState = save_dynamic_child(Child#child.restart_type, Pid, Args, State), + NState = dyn_store(Pid, Args, State), {reply, {ok, Pid}, NState}; {ok, Pid, Extra} -> - NState = save_dynamic_child(Child#child.restart_type, Pid, Args, State), + NState = dyn_store(Pid, Args, State), {reply, {ok, Pid, Extra}, NState}; What -> {reply, What, State} @@ -426,121 +410,94 @@ handle_call({start_child, ChildSpec}, _From, State) -> end; %% terminate_child for simple_one_for_one can only be done with pid -handle_call({terminate_child, Name}, _From, State) when not is_pid(Name), - ?is_simple(State) -> +handle_call({terminate_child, Id}, _From, State) when not is_pid(Id), + ?is_simple(State) -> {reply, {error, simple_one_for_one}, State}; -handle_call({terminate_child, Name}, _From, State) -> - case get_child(Name, State, ?is_simple(State)) of - {value, Child} -> - case do_terminate(Child, State#state.name) of - #child{restart_type=RT} when RT=:=temporary; ?is_simple(State) -> - {reply, ok, state_del_child(Child, State)}; - NChild -> - {reply, ok, replace_child(NChild, State)} - end; - false -> +handle_call({terminate_child, Id}, _From, State) -> + case find_child(Id, State) of + {ok, Child} -> + do_terminate(Child, State#state.name), + {reply, ok, del_child(Child, State)}; + error -> {reply, {error, not_found}, State} end; %% restart_child request is invalid for simple_one_for_one supervisors -handle_call({restart_child, _Name}, _From, State) when ?is_simple(State) -> +handle_call({restart_child, _Id}, _From, State) when ?is_simple(State) -> {reply, {error, simple_one_for_one}, State}; -handle_call({restart_child, Name}, _From, State) -> - case get_child(Name, State) of - {value, Child} when Child#child.pid =:= undefined -> +handle_call({restart_child, Id}, _From, State) -> + case find_child(Id, State) of + {ok, Child} when Child#child.pid =:= undefined -> case do_start_child(State#state.name, Child) of {ok, Pid} -> - NState = replace_child(Child#child{pid = Pid}, State), + NState = set_pid(Pid, Id, State), {reply, {ok, Pid}, NState}; {ok, Pid, Extra} -> - NState = replace_child(Child#child{pid = Pid}, State), + NState = set_pid(Pid, Id, State), {reply, {ok, Pid, Extra}, NState}; Error -> {reply, Error, State} end; - {value, #child{pid=?restarting(_)}} -> + {ok, #child{pid=?restarting(_)}} -> {reply, {error, restarting}, State}; - {value, _} -> + {ok, _} -> {reply, {error, running}, State}; _ -> {reply, {error, not_found}, State} end; %% delete_child request is invalid for simple_one_for_one supervisors -handle_call({delete_child, _Name}, _From, State) when ?is_simple(State) -> +handle_call({delete_child, _Id}, _From, State) when ?is_simple(State) -> {reply, {error, simple_one_for_one}, State}; -handle_call({delete_child, Name}, _From, State) -> - case get_child(Name, State) of - {value, Child} when Child#child.pid =:= undefined -> - NState = remove_child(Child, State), +handle_call({delete_child, Id}, _From, State) -> + case find_child(Id, State) of + {ok, Child} when Child#child.pid =:= undefined -> + NState = remove_child(Id, State), {reply, ok, NState}; - {value, #child{pid=?restarting(_)}} -> + {ok, #child{pid=?restarting(_)}} -> {reply, {error, restarting}, State}; - {value, _} -> + {ok, _} -> {reply, {error, running}, State}; _ -> {reply, {error, not_found}, State} end; -handle_call({get_childspec, Name}, _From, State) -> - case get_child(Name, State, ?is_simple(State)) of - {value, Child} -> +handle_call({get_childspec, Id}, _From, State) -> + case find_child(Id, State) of + {ok, Child} -> {reply, {ok, child_to_spec(Child)}, State}; - false -> + error -> {reply, {error, not_found}, State} end; -handle_call(which_children, _From, #state{children = [#child{restart_type = temporary, - child_type = CT, - modules = Mods}]} = - State) when ?is_simple(State) -> - Reply = lists:map(fun(Pid) -> {undefined, Pid, CT, Mods} end, - ?SETS:to_list(dynamics_db(temporary, State#state.dynamics))), - {reply, Reply, State}; - -handle_call(which_children, _From, #state{children = [#child{restart_type = RType, - child_type = CT, - modules = Mods}]} = - State) when ?is_simple(State) -> - Reply = lists:map(fun({?restarting(_),_}) -> {undefined,restarting,CT,Mods}; - ({Pid, _}) -> {undefined, Pid, CT, Mods} end, - ?DICTS:to_list(dynamics_db(RType, State#state.dynamics))), +handle_call(which_children, _From, State) when ?is_simple(State) -> + #child{child_type = CT,modules = Mods} = get_dynamic_child(State), + Reply = dyn_map(fun(?restarting(_)) -> {undefined, restarting, CT, Mods}; + (Pid) -> {undefined, Pid, CT, Mods} + end, State), {reply, Reply, State}; handle_call(which_children, _From, State) -> Resp = - lists:map(fun(#child{pid = ?restarting(_), name = Name, - child_type = ChildType, modules = Mods}) -> - {Name, restarting, ChildType, Mods}; - (#child{pid = Pid, name = Name, - child_type = ChildType, modules = Mods}) -> - {Name, Pid, ChildType, Mods} - end, - State#state.children), + children_to_list( + fun(Id,#child{pid = ?restarting(_), + child_type = ChildType, modules = Mods}) -> + {Id, restarting, ChildType, Mods}; + (Id,#child{pid = Pid, + child_type = ChildType, modules = Mods}) -> + {Id, Pid, ChildType, Mods} + end, + State#state.children), {reply, Resp, State}; - -handle_call(count_children, _From, #state{children = [#child{restart_type = temporary, - child_type = CT}]} = State) - when ?is_simple(State) -> - Sz = ?SETS:size(dynamics_db(temporary, State#state.dynamics)), - Reply = case CT of - supervisor -> [{specs, 1}, {active, Sz}, - {supervisors, Sz}, {workers, 0}]; - worker -> [{specs, 1}, {active, Sz}, - {supervisors, 0}, {workers, Sz}] - end, - {reply, Reply, State}; - -handle_call(count_children, _From, #state{dynamic_restarts = Restarts, - children = [#child{restart_type = RType, - child_type = CT}]} = State) +handle_call(count_children, _From, #state{dynamic_restarts = Restarts} = State) when ?is_simple(State) -> - Sz = ?DICTS:size(dynamics_db(RType, State#state.dynamics)), - Active = Sz - Restarts, + #child{child_type = CT} = get_dynamic_child(State), + Sz = dyn_size(State), + Active = Sz - Restarts, % Restarts is always 0 for temporary children Reply = case CT of supervisor -> [{specs, 1}, {active, Active}, {supervisors, Sz}, {workers, 0}]; @@ -552,16 +509,15 @@ handle_call(count_children, _From, #state{dynamic_restarts = Restarts, handle_call(count_children, _From, State) -> %% Specs and children are together on the children list... {Specs, Active, Supers, Workers} = - lists:foldl(fun(Child, Counts) -> - count_child(Child, Counts) - end, {0,0,0,0}, State#state.children), + children_fold(fun(_Id, Child, Counts) -> + count_child(Child, Counts) + end, {0,0,0,0}, State#state.children), %% Reformat counts to a property list. Reply = [{specs, Specs}, {active, Active}, {supervisors, Supers}, {workers, Workers}], {reply, Reply, State}. - count_child(#child{pid = Pid, child_type = worker}, {Specs, Active, Supers, Workers}) -> case is_pid(Pid) andalso is_process_alive(Pid) of @@ -575,34 +531,15 @@ count_child(#child{pid = Pid, child_type = supervisor}, false -> {Specs+1, Active, Supers+1, Workers} end. - %%% If a restart attempt failed, this message is cast %%% from restart/2 in order to give gen_server the chance to %%% check it's inbox before trying again. --spec handle_cast({try_again_restart, child_id() | pid()}, state()) -> +-spec handle_cast({try_again_restart, child_id() | {'restarting',pid()}}, state()) -> {'noreply', state()} | {stop, shutdown, state()}. -handle_cast({try_again_restart,Pid}, #state{children=[Child]}=State) - when ?is_simple(State) -> - RT = Child#child.restart_type, - RPid = restarting(Pid), - case dynamic_child_args(RPid, RT, State#state.dynamics) of - {ok, Args} -> - {M, F, _} = Child#child.mfargs, - NChild = Child#child{pid = RPid, mfargs = {M, F, Args}}, - case restart(NChild,State) of - {ok, State1} -> - {noreply, State1}; - {shutdown, State1} -> - {stop, shutdown, State1} - end; - error -> - {noreply, State} - end; - -handle_cast({try_again_restart,Name}, State) -> - case lists:keyfind(Name,#child.name,State#state.children) of - Child = #child{pid=?restarting(_)} -> +handle_cast({try_again_restart,TryAgainId}, State) -> + case find_child_and_args(TryAgainId, State) of + {ok, Child = #child{pid=?restarting(_)}} -> case restart(Child,State) of {ok, State1} -> {noreply, State1}; @@ -637,10 +574,8 @@ handle_info(Msg, State) -> %% -spec terminate(term(), state()) -> 'ok'. -terminate(_Reason, #state{children=[Child]} = State) when ?is_simple(State) -> - terminate_dynamic_children(Child, dynamics_db(Child#child.restart_type, - State#state.dynamics), - State#state.name); +terminate(_Reason, State) when ?is_simple(State) -> + terminate_dynamic_children(State); terminate(_Reason, State) -> terminate_children(State#state.children, State#state.name). @@ -675,8 +610,8 @@ code_change(_, State, _) -> update_childspec(State, StartSpec) when ?is_simple(State) -> case check_startspec(StartSpec) of - {ok, [Child]} -> - {ok, State#state{children = [Child]}}; + {ok, {[_],_}=Children} -> + {ok, State#state{children = Children}}; Error -> {error, Error} end; @@ -690,39 +625,36 @@ update_childspec(State, StartSpec) -> {error, Error} end. -update_childspec1([Child|OldC], Children, KeepOld) -> - case update_chsp(Child, Children) of - {ok,NewChildren} -> - update_childspec1(OldC, NewChildren, KeepOld); +update_childspec1({[Id|OldIds], OldDb}, {Ids,Db}, KeepOld) -> + case update_chsp(maps:get(Id,OldDb), Db) of + {ok,NewDb} -> + update_childspec1({OldIds,OldDb}, {Ids,NewDb}, KeepOld); false -> - update_childspec1(OldC, Children, [Child|KeepOld]) + update_childspec1({OldIds,OldDb}, {Ids,Db}, [Id|KeepOld]) end; -update_childspec1([], Children, KeepOld) -> +update_childspec1({[],OldDb}, {Ids,Db}, KeepOld) -> + KeepOldDb = maps:with(KeepOld,OldDb), %% Return them in (kept) reverse start order. - lists:reverse(Children ++ KeepOld). - -update_chsp(OldCh, Children) -> - case lists:map(fun(Ch) when OldCh#child.name =:= Ch#child.name -> - Ch#child{pid = OldCh#child.pid}; - (Ch) -> - Ch - end, - Children) of - Children -> - false; % OldCh not found in new spec. - NewC -> - {ok, NewC} + {lists:reverse(Ids ++ KeepOld),maps:merge(KeepOldDb,Db)}. + +update_chsp(#child{id=Id}=OldChild, NewDb) -> + case maps:find(Id, NewDb) of + {ok,Child} -> + {ok,NewDb#{Id => Child#child{pid = OldChild#child.pid}}}; + error -> % Id not found in new spec. + false end. + %%% --------------------------------------------------- %%% Start a new child. %%% --------------------------------------------------- handle_start_child(Child, State) -> - case get_child(Child#child.name, State) of - false -> + case find_child(Child#child.id, State) of + error -> case do_start_child(State#state.name, Child) of - {ok, undefined} when Child#child.restart_type =:= temporary -> + {ok, undefined} when ?is_temporary(Child) -> {{ok, undefined}, State}; {ok, Pid} -> {{ok, Pid}, save_child(Child#child{pid = Pid}, State)}; @@ -731,9 +663,9 @@ handle_start_child(Child, State) -> {error, What} -> {{error, {What, Child}}, State} end; - {value, OldChild} when is_pid(OldChild#child.pid) -> + {ok, OldChild} when is_pid(OldChild#child.pid) -> {{error, {already_started, OldChild#child.pid}}, State}; - {value, _OldChild} -> + {ok, _OldChild} -> {{error, already_present}, State} end. @@ -742,63 +674,45 @@ handle_start_child(Child, State) -> %%% Returns: {ok, state()} | {shutdown, state()} %%% --------------------------------------------------- -restart_child(Pid, Reason, #state{children = [Child]} = State) when ?is_simple(State) -> - RestartType = Child#child.restart_type, - case dynamic_child_args(Pid, RestartType, State#state.dynamics) of - {ok, Args} -> - {M, F, _} = Child#child.mfargs, - NChild = Child#child{pid = Pid, mfargs = {M, F, Args}}, - do_restart(RestartType, Reason, NChild, State); - error -> - {ok, State} - end; - restart_child(Pid, Reason, State) -> - Children = State#state.children, - case lists:keyfind(Pid, #child.pid, Children) of - #child{restart_type = RestartType} = Child -> - do_restart(RestartType, Reason, Child, State); - false -> + case find_child_and_args(Pid, State) of + {ok, Child} -> + do_restart(Reason, Child, State); + error -> {ok, State} end. -do_restart(permanent, Reason, Child, State) -> +do_restart(Reason, Child, State) when ?is_permanent(Child) -> report_error(child_terminated, Reason, Child, State#state.name), restart(Child, State); -do_restart(_, normal, Child, State) -> - NState = state_del_child(Child, State), +do_restart(normal, Child, State) -> + NState = del_child(Child, State), {ok, NState}; -do_restart(_, shutdown, Child, State) -> - NState = state_del_child(Child, State), +do_restart(shutdown, Child, State) -> + NState = del_child(Child, State), {ok, NState}; -do_restart(_, {shutdown, _Term}, Child, State) -> - NState = state_del_child(Child, State), +do_restart({shutdown, _Term}, Child, State) -> + NState = del_child(Child, State), {ok, NState}; -do_restart(transient, Reason, Child, State) -> +do_restart(Reason, Child, State) when ?is_transient(Child) -> report_error(child_terminated, Reason, Child, State#state.name), restart(Child, State); -do_restart(temporary, Reason, Child, State) -> +do_restart(Reason, Child, State) when ?is_temporary(Child) -> report_error(child_terminated, Reason, Child, State#state.name), - NState = state_del_child(Child, State), + NState = del_child(Child, State), {ok, NState}. restart(Child, State) -> case add_restart(State) of {ok, NState} -> case restart(NState#state.strategy, Child, NState) of - {try_again,NState2} -> + {{try_again, TryAgainId}, NState2} -> %% Leaving control back to gen_server before %% trying again. This way other incoming requsts %% for the supervisor can be handled - e.g. a %% shutdown request for the supervisor or the %% child. - Id = if ?is_simple(State) -> Child#child.pid; - true -> Child#child.name - end, - ok = try_again_restart(self(), Id), - {ok,NState2}; - {try_again, NState2, #child{name=ChName}} -> - ok = try_again_restart(self(), ChName), + try_again_restart(TryAgainId), {ok,NState2}; Other -> Other @@ -806,124 +720,111 @@ restart(Child, State) -> {terminate, NState} -> report_error(shutdown, reached_max_restart_intensity, Child, State#state.name), - {shutdown, remove_child(Child, NState)} + {shutdown, del_child(Child, NState)} end. restart(simple_one_for_one, Child, State0) -> #child{pid = OldPid, mfargs = {M, F, A}} = Child, - State = case OldPid of + State1 = case OldPid of ?restarting(_) -> NRes = State0#state.dynamic_restarts - 1, State0#state{dynamic_restarts = NRes}; _ -> State0 end, - Dynamics = ?DICTS:erase(OldPid, dynamics_db(Child#child.restart_type, - State#state.dynamics)), + State2 = dyn_erase(OldPid, State1), case do_start_child_i(M, F, A) of {ok, Pid} -> - DynamicsDb = {dict, ?DICTS:store(Pid, A, Dynamics)}, - NState = State#state{dynamics = DynamicsDb}, + NState = dyn_store(Pid, A, State2), {ok, NState}; {ok, Pid, _Extra} -> - DynamicsDb = {dict, ?DICTS:store(Pid, A, Dynamics)}, - NState = State#state{dynamics = DynamicsDb}, + NState = dyn_store(Pid, A, State2), {ok, NState}; {error, Error} -> - NRestarts = State#state.dynamic_restarts + 1, - DynamicsDb = {dict, ?DICTS:store(restarting(OldPid), A, Dynamics)}, - NState = State#state{dynamic_restarts = NRestarts, - dynamics = DynamicsDb}, - report_error(start_error, Error, Child, State#state.name), - {try_again, NState} + ROldPid = restarting(OldPid), + NRestarts = State2#state.dynamic_restarts + 1, + State3 = State2#state{dynamic_restarts = NRestarts}, + NState = dyn_store(ROldPid, A, State3), + report_error(start_error, Error, Child, NState#state.name), + {{try_again, ROldPid}, NState} end; -restart(one_for_one, Child, State) -> +restart(one_for_one, #child{id=Id} = Child, State) -> OldPid = Child#child.pid, case do_start_child(State#state.name, Child) of {ok, Pid} -> - NState = replace_child(Child#child{pid = Pid}, State), + NState = set_pid(Pid, Id, State), {ok, NState}; {ok, Pid, _Extra} -> - NState = replace_child(Child#child{pid = Pid}, State), + NState = set_pid(Pid, Id, State), {ok, NState}; {error, Reason} -> - NState = replace_child(Child#child{pid = restarting(OldPid)}, State), + NState = set_pid(restarting(OldPid), Id, State), report_error(start_error, Reason, Child, State#state.name), - {try_again, NState} - end; -restart(rest_for_one, Child, State) -> - {ChAfter, ChBefore} = split_child(Child#child.pid, State#state.children), - ChAfter2 = terminate_children(ChAfter, State#state.name), - case start_children(ChAfter2, State#state.name) of - {ok, ChAfter3} -> - {ok, State#state{children = ChAfter3 ++ ChBefore}}; - {error, ChAfter3, {failed_to_start_child, ChName, _Reason}} - when ChName =:= Child#child.name -> - NChild = Child#child{pid=restarting(Child#child.pid)}, - NState = State#state{children = ChAfter3 ++ ChBefore}, - {try_again, replace_child(NChild,NState)}; - {error, ChAfter3, {failed_to_start_child, ChName, _Reason}} -> - NChild = lists:keyfind(ChName, #child.name, ChAfter3), - NChild2 = NChild#child{pid=?restarting(undefined)}, - NState = State#state{children = ChAfter3 ++ ChBefore}, - {try_again, replace_child(NChild2,NState), NChild2} + {{try_again,Id}, NState} end; -restart(one_for_all, Child, State) -> - Children1 = del_child(Child#child.pid, State#state.children), - Children2 = terminate_children(Children1, State#state.name), - case start_children(Children2, State#state.name) of - {ok, NChs} -> - {ok, State#state{children = NChs}}; - {error, NChs, {failed_to_start_child, ChName, _Reason}} - when ChName =:= Child#child.name -> - NChild = Child#child{pid=restarting(Child#child.pid)}, - NState = State#state{children = NChs}, - {try_again, replace_child(NChild,NState)}; - {error, NChs, {failed_to_start_child, ChName, _Reason}} -> - NChild = lists:keyfind(ChName, #child.name, NChs), - NChild2 = NChild#child{pid=?restarting(undefined)}, - NState = State#state{children = NChs}, - {try_again, replace_child(NChild2,NState), NChild2} +restart(rest_for_one, #child{id=Id} = Child, #state{name=SupName} = State) -> + {ChAfter, ChBefore} = split_child(Id, State#state.children), + {Return, ChAfter2} = restart_multiple_children(Child, ChAfter, SupName), + {Return, State#state{children = append(ChAfter2,ChBefore)}}; +restart(one_for_all, Child, #state{name=SupName} = State) -> + Children1 = del_child(Child#child.id, State#state.children), + {Return, NChildren} = restart_multiple_children(Child, Children1, SupName), + {Return, State#state{children = NChildren}}. + +restart_multiple_children(Child, Children, SupName) -> + Children1 = terminate_children(Children, SupName), + case start_children(Children1, SupName) of + {ok, NChildren} -> + {ok, NChildren}; + {error, NChildren, {failed_to_start_child, FailedId, _Reason}} -> + NewPid = if FailedId =:= Child#child.id -> + restarting(Child#child.pid); + true -> + ?restarting(undefined) + end, + {{try_again, FailedId}, set_pid(NewPid,FailedId,NChildren)} end. restarting(Pid) when is_pid(Pid) -> ?restarting(Pid); restarting(RPid) -> RPid. +-spec try_again_restart(child_id() | {'restarting',pid()}) -> 'ok'. +try_again_restart(TryAgainId) -> + gen_server:cast(self(), {try_again_restart, TryAgainId}). + %%----------------------------------------------------------------- %% Func: terminate_children/2 -%% Args: Children = [child_rec()] in termination order +%% Args: Children = children() % Ids in termination order %% SupName = {local, atom()} | {global, atom()} | {pid(),Mod} -%% Returns: NChildren = [child_rec()] in -%% startup order (reversed termination order) +%% Returns: NChildren = children() % Ids in startup order +%% % (reversed termination order) %%----------------------------------------------------------------- terminate_children(Children, SupName) -> - terminate_children(Children, SupName, []). - -%% Temporary children should not be restarted and thus should -%% be skipped when building the list of terminated children, although -%% we do want them to be shut down as many functions from this module -%% use this function to just clear everything. -terminate_children([Child = #child{restart_type=temporary} | Children], SupName, Res) -> - _ = do_terminate(Child, SupName), - terminate_children(Children, SupName, Res); -terminate_children([Child | Children], SupName, Res) -> - NChild = do_terminate(Child, SupName), - terminate_children(Children, SupName, [NChild | Res]); -terminate_children([], _SupName, Res) -> - Res. + Terminate = + fun(_Id,Child) when ?is_temporary(Child) -> + %% Temporary children should not be restarted and thus should + %% be skipped when building the list of terminated children. + do_terminate(Child, SupName), + remove; + (_Id,Child) -> + do_terminate(Child, SupName), + {update,Child#child{pid=undefined}} + end, + {ok,NChildren} = children_map(Terminate, Children), + NChildren. do_terminate(Child, SupName) when is_pid(Child#child.pid) -> case shutdown(Child#child.pid, Child#child.shutdown) of ok -> ok; - {error, normal} when Child#child.restart_type =/= permanent -> + {error, normal} when not (?is_permanent(Child)) -> ok; {error, OtherReason} -> report_error(shutdown_error, OtherReason, Child, SupName) end, - Child#child{pid = undefined}; -do_terminate(Child, _SupName) -> - Child#child{pid = undefined}. + ok; +do_terminate(_Child, _SupName) -> + ok. %%----------------------------------------------------------------- %% Shutdowns a child. We must check the EXIT value @@ -996,66 +897,50 @@ monitor_child(Pid) -> ok end. - %%----------------------------------------------------------------- -%% Func: terminate_dynamic_children/3 -%% Args: Child = child_rec() -%% Dynamics = ?DICT() | ?SET() -%% SupName = {local, atom()} | {global, atom()} | {pid(),Mod} +%% Func: terminate_dynamic_children/1 +%% Args: State %% Returns: ok %% -%% %% Shutdown all dynamic children. This happens when the supervisor is %% stopped. Because the supervisor can have millions of dynamic children, we -%% can have an significative overhead here. +%% can have a significative overhead here. %%----------------------------------------------------------------- -terminate_dynamic_children(Child, Dynamics, SupName) -> - {Pids, EStack0} = monitor_dynamic_children(Child, Dynamics), - Sz = ?SETS:size(Pids), +terminate_dynamic_children(State) -> + Child = get_dynamic_child(State), + {Pids, EStack0} = monitor_dynamic_children(Child,State), + Sz = sets:size(Pids), EStack = case Child#child.shutdown of brutal_kill -> - ?SETS:fold(fun(P, _) -> exit(P, kill) end, ok, Pids), + sets:fold(fun(P, _) -> exit(P, kill) end, ok, Pids), wait_dynamic_children(Child, Pids, Sz, undefined, EStack0); infinity -> - ?SETS:fold(fun(P, _) -> exit(P, shutdown) end, ok, Pids), + sets:fold(fun(P, _) -> exit(P, shutdown) end, ok, Pids), wait_dynamic_children(Child, Pids, Sz, undefined, EStack0); Time -> - ?SETS:fold(fun(P, _) -> exit(P, shutdown) end, ok, Pids), + sets:fold(fun(P, _) -> exit(P, shutdown) end, ok, Pids), TRef = erlang:start_timer(Time, self(), kill), wait_dynamic_children(Child, Pids, Sz, TRef, EStack0) end, %% Unroll stacked errors and report them - ?DICTS:fold(fun(Reason, Ls, _) -> - report_error(shutdown_error, Reason, - Child#child{pid=Ls}, SupName) - end, ok, EStack). - - -monitor_dynamic_children(#child{restart_type=temporary}, Dynamics) -> - ?SETS:fold(fun(P, {Pids, EStack}) -> - case monitor_child(P) of - ok -> - {?SETS:add_element(P, Pids), EStack}; - {error, normal} -> - {Pids, EStack}; - {error, Reason} -> - {Pids, ?DICTS:append(Reason, P, EStack)} - end - end, {?SETS:new(), ?DICTS:new()}, Dynamics); -monitor_dynamic_children(#child{restart_type=RType}, Dynamics) -> - ?DICTS:fold(fun(P, _, {Pids, EStack}) when is_pid(P) -> - case monitor_child(P) of - ok -> - {?SETS:add_element(P, Pids), EStack}; - {error, normal} when RType =/= permanent -> - {Pids, EStack}; - {error, Reason} -> - {Pids, ?DICTS:append(Reason, P, EStack)} - end; - (?restarting(_), _, {Pids, EStack}) -> - {Pids, EStack} - end, {?SETS:new(), ?DICTS:new()}, Dynamics). - + dict:fold(fun(Reason, Ls, _) -> + report_error(shutdown_error, Reason, + Child#child{pid=Ls}, State#state.name) + end, ok, EStack). + +monitor_dynamic_children(Child,State) -> + dyn_fold(fun(P,{Pids, EStack}) when is_pid(P) -> + case monitor_child(P) of + ok -> + {sets:add_element(P, Pids), EStack}; + {error, normal} when not (?is_permanent(Child)) -> + {Pids, EStack}; + {error, Reason} -> + {Pids, dict:append(Reason, P, EStack)} + end; + (?restarting(_), {Pids, EStack}) -> + {Pids, EStack} + end, {sets:new(), dict:new()}, State). wait_dynamic_children(_Child, _Pids, 0, undefined, EStack) -> EStack; @@ -1073,39 +958,38 @@ wait_dynamic_children(#child{shutdown=brutal_kill} = Child, Pids, Sz, TRef, EStack) -> receive {'DOWN', _MRef, process, Pid, killed} -> - wait_dynamic_children(Child, ?SETS:del_element(Pid, Pids), Sz-1, + wait_dynamic_children(Child, sets:del_element(Pid, Pids), Sz-1, TRef, EStack); {'DOWN', _MRef, process, Pid, Reason} -> - wait_dynamic_children(Child, ?SETS:del_element(Pid, Pids), Sz-1, - TRef, ?DICTS:append(Reason, Pid, EStack)) + wait_dynamic_children(Child, sets:del_element(Pid, Pids), Sz-1, + TRef, dict:append(Reason, Pid, EStack)) end; -wait_dynamic_children(#child{restart_type=RType} = Child, Pids, Sz, - TRef, EStack) -> +wait_dynamic_children(Child, Pids, Sz, TRef, EStack) -> receive {'DOWN', _MRef, process, Pid, shutdown} -> - wait_dynamic_children(Child, ?SETS:del_element(Pid, Pids), Sz-1, + wait_dynamic_children(Child, sets:del_element(Pid, Pids), Sz-1, TRef, EStack); {'DOWN', _MRef, process, Pid, {shutdown, _}} -> - wait_dynamic_children(Child, ?SETS:del_element(Pid, Pids), Sz-1, + wait_dynamic_children(Child, sets:del_element(Pid, Pids), Sz-1, TRef, EStack); - {'DOWN', _MRef, process, Pid, normal} when RType =/= permanent -> - wait_dynamic_children(Child, ?SETS:del_element(Pid, Pids), Sz-1, + {'DOWN', _MRef, process, Pid, normal} when not (?is_permanent(Child)) -> + wait_dynamic_children(Child, sets:del_element(Pid, Pids), Sz-1, TRef, EStack); {'DOWN', _MRef, process, Pid, Reason} -> - wait_dynamic_children(Child, ?SETS:del_element(Pid, Pids), Sz-1, - TRef, ?DICTS:append(Reason, Pid, EStack)); + wait_dynamic_children(Child, sets:del_element(Pid, Pids), Sz-1, + TRef, dict:append(Reason, Pid, EStack)); {timeout, TRef, kill} -> - ?SETS:fold(fun(P, _) -> exit(P, kill) end, ok, Pids), + sets:fold(fun(P, _) -> exit(P, kill) end, ok, Pids), wait_dynamic_children(Child, Pids, Sz, undefined, EStack) end. %%----------------------------------------------------------------- -%% Child/State manipulating functions. +%% Access #state.children %%----------------------------------------------------------------- %% Note we do not want to save the parameter list for temporary processes as @@ -1113,114 +997,184 @@ wait_dynamic_children(#child{restart_type=RType} = Child, Pids, Sz, %% Especially for dynamic children to simple_one_for_one supervisors %% it could become very costly as it is not uncommon to spawn %% very many such processes. -save_child(#child{restart_type = temporary, - mfargs = {M, F, _}} = Child, #state{children = Children} = State) -> - State#state{children = [Child#child{mfargs = {M, F, undefined}} |Children]}; -save_child(Child, #state{children = Children} = State) -> - State#state{children = [Child |Children]}. - -save_dynamic_child(temporary, Pid, _, #state{dynamics = Dynamics} = State) -> - DynamicsDb = dynamics_db(temporary, Dynamics), - State#state{dynamics = {set, ?SETS:add_element(Pid, DynamicsDb)}}; -save_dynamic_child(RestartType, Pid, Args, #state{dynamics = Dynamics} = State) -> - DynamicsDb = dynamics_db(RestartType, Dynamics), - State#state{dynamics = {dict, ?DICTS:store(Pid, Args, DynamicsDb)}}. - -dynamics_db(temporary, undefined) -> - ?SETS:new(); -dynamics_db(_, undefined) -> - ?DICTS:new(); -dynamics_db(_, {_Tag, DynamicsDb}) -> - DynamicsDb. - -dynamic_child_args(_Pid, temporary, _DynamicsDb) -> - {ok, undefined}; -dynamic_child_args(Pid, _RT, {dict, DynamicsDb}) -> - ?DICTS:find(Pid, DynamicsDb); -dynamic_child_args(_Pid, _RT, undefined) -> - error. - -state_del_child(#child{pid = Pid, restart_type = temporary}, State) when ?is_simple(State) -> - NDynamics = ?SETS:del_element(Pid, dynamics_db(temporary, State#state.dynamics)), - State#state{dynamics = {set, NDynamics}}; -state_del_child(#child{pid = Pid, restart_type = RType}, State) when ?is_simple(State) -> - NDynamics = ?DICTS:erase(Pid, dynamics_db(RType, State#state.dynamics)), - State#state{dynamics = {dict, NDynamics}}; -state_del_child(Child, State) -> - NChildren = del_child(Child#child.name, State#state.children), - State#state{children = NChildren}. - -del_child(Name, [Ch|Chs]) when Ch#child.name =:= Name, Ch#child.restart_type =:= temporary -> - Chs; -del_child(Name, [Ch|Chs]) when Ch#child.name =:= Name -> - [Ch#child{pid = undefined} | Chs]; -del_child(Pid, [Ch|Chs]) when Ch#child.pid =:= Pid, Ch#child.restart_type =:= temporary -> - Chs; -del_child(Pid, [Ch|Chs]) when Ch#child.pid =:= Pid -> - [Ch#child{pid = undefined} | Chs]; -del_child(Name, [Ch|Chs]) -> - [Ch|del_child(Name, Chs)]; -del_child(_, []) -> - []. +-spec save_child(child_rec(), state()) -> state(). +save_child(#child{mfargs = {M, F, _}} = Child, State) when ?is_temporary(Child) -> + do_save_child(Child#child{mfargs = {M, F, undefined}}, State); +save_child(Child, State) -> + do_save_child(Child, State). + +-spec do_save_child(child_rec(), state()) -> state(). +do_save_child(#child{id = Id} = Child, #state{children = {Ids,Db}} = State) -> + State#state{children = {[Id|Ids],Db#{Id => Child}}}. + +-spec del_child(child_rec(), state()) -> state(); + (child_id(), children()) -> children(). +del_child(#child{pid = Pid}, State) when ?is_simple(State) -> + dyn_erase(Pid,State); +del_child(Child, State) when is_record(Child,child), is_record(State,state) -> + NChildren = del_child(Child#child.id, State#state.children), + State#state{children = NChildren}; +del_child(Id, {Ids,Db}) -> + case maps:get(Id, Db) of + Child when Child#child.restart_type =:= temporary -> + {lists:delete(Id, Ids), maps:remove(Id, Db)}; + Child -> + {Ids, Db#{Id=>Child#child{pid=undefined}}} + end. -%% Chs = [S4, S3, Ch, S1, S0] -%% Ret: {[S4, S3, Ch], [S1, S0]} -split_child(Name, Chs) -> - split_child(Name, Chs, []). - -split_child(Name, [Ch|Chs], After) when Ch#child.name =:= Name -> - {lists:reverse([Ch#child{pid = undefined} | After]), Chs}; -split_child(Pid, [Ch|Chs], After) when Ch#child.pid =:= Pid -> - {lists:reverse([Ch#child{pid = undefined} | After]), Chs}; -split_child(Name, [Ch|Chs], After) -> - split_child(Name, Chs, [Ch | After]); -split_child(_, [], After) -> - {lists:reverse(After), []}. - -get_child(Name, State) -> - get_child(Name, State, false). - -get_child(Pid, State, AllowPid) when AllowPid, is_pid(Pid) -> - get_dynamic_child(Pid, State); -get_child(Name, State, _) -> - lists:keysearch(Name, #child.name, State#state.children). - -get_dynamic_child(Pid, #state{children=[Child], dynamics=Dynamics}) -> - case is_dynamic_pid(Pid, Dynamics) of - true -> - {value, Child#child{pid=Pid}}; - false -> - RPid = restarting(Pid), - case is_dynamic_pid(RPid, Dynamics) of - true -> - {value, Child#child{pid=RPid}}; - false -> +%% In: {[S4, S3, Ch, S1, S0],Db} +%% Ret: {{[S4, S3, Ch],Db1}, {[S1, S0],Db2}} +%% Db1 and Db2 contain the keys in the lists they are associated with. +-spec split_child(child_id(), children()) -> {children(), children()}. +split_child(Id, {Ids,Db}) -> + {IdsAfter,IdsBefore} = split_ids(Id, Ids, []), + DbBefore = maps:with(IdsBefore,Db), + #{Id:=Ch} = DbAfter = maps:with(IdsAfter,Db), + {{IdsAfter,DbAfter#{Id=>Ch#child{pid=undefined}}},{IdsBefore,DbBefore}}. + +split_ids(Id, [Id|Ids], After) -> + {lists:reverse([Id|After]), Ids}; +split_ids(Id, [Other|Ids], After) -> + split_ids(Id, Ids, [Other | After]). + +%% Find the child record for a given Pid (dynamic child) or Id +%% (non-dynamic child). This is called from the API functions. +-spec find_child(pid() | child_id(), state()) -> {ok,child_rec()} | error. +find_child(Pid, State) when is_pid(Pid), ?is_simple(State) -> + case find_dynamic_child(Pid, State) of + error -> + case find_dynamic_child(restarting(Pid), State) of + error -> case erlang:is_process_alive(Pid) of - true -> false; - false -> {value, Child} - end - end + true -> error; + false -> {ok, get_dynamic_child(State)} + end; + Other -> + Other + end; + Other -> + Other + end; +find_child(Id, #state{children = {_Ids,Db}}) -> + maps:find(Id, Db). + +%% Get the child record - either by child id or by pid. If +%% simple_one_for_one, then insert the pid and args into the returned +%% child record. This is called when trying to restart the child. +-spec find_child_and_args(IdOrPid, state()) -> {ok, child_rec()} | error when + IdOrPid :: pid() | {restarting,pid()} | child_id(). +find_child_and_args(Pid, State) when ?is_simple(State) -> + case find_dynamic_child(Pid, State) of + {ok,#child{mfargs={M,F,_}} = Child} -> + {ok, Args} = dyn_args(Pid, State), + {ok, Child#child{mfargs = {M, F, Args}}}; + error -> + error + end; +find_child_and_args(Pid, State) when is_pid(Pid) -> + find_child_by_pid(Pid, State); +find_child_and_args(Id, #state{children={_Ids,Db}}) -> + maps:find(Id, Db). + +%% Given the pid, find the child record for a dynamic child, and +%% include the pid in the returned record. +-spec find_dynamic_child(IdOrPid, state()) -> {ok, child_rec()} | error when + IdOrPid :: pid() | {restarting,pid()} | child_id(). +find_dynamic_child(Pid, State) -> + case dyn_exists(Pid, State) of + true -> + Child = get_dynamic_child(State), + {ok, Child#child{pid=Pid}}; + false -> + error end. -is_dynamic_pid(Pid, {dict, Dynamics}) -> - ?DICTS:is_key(Pid, Dynamics); -is_dynamic_pid(Pid, {set, Dynamics}) -> - ?SETS:is_element(Pid, Dynamics); -is_dynamic_pid(_Pid, undefined) -> - false. - -replace_child(Child, State) -> - Chs = do_replace_child(Child, State#state.children), - State#state{children = Chs}. - -do_replace_child(Child, [Ch|Chs]) when Ch#child.name =:= Child#child.name -> - [Child | Chs]; -do_replace_child(Child, [Ch|Chs]) -> - [Ch|do_replace_child(Child, Chs)]. +%% Given the pid, find the child record for a non-dyanamic child. +-spec find_child_by_pid(IdOrPid, state()) -> {ok,child_rec()} | error when + IdOrPid :: pid() | {restarting,pid()}. +find_child_by_pid(Pid,#state{children={_Ids,Db}}) -> + Fun = fun(_Id,#child{pid=P}=Ch,_) when P =:= Pid -> + throw(Ch); + (_,_,error) -> + error + end, + try maps:fold(Fun,error,Db) + catch throw:Child -> {ok,Child} + end. -remove_child(Child, State) -> - Chs = lists:keydelete(Child#child.name, #child.name, State#state.children), - State#state{children = Chs}. +%% Get the child record from a simple_one_for_one supervisor - no pid +%% It is assumed that the child can always be found +-spec get_dynamic_child(state()) -> child_rec(). +get_dynamic_child(#state{children={[Id],Db}}) -> + #{Id := Child} = Db, + Child. + +%% Update pid in the given child record and store it in the process state +-spec set_pid(term(), child_id(), state()) -> state(); + (term(), child_id(), children()) -> children(). +set_pid(Pid, Id, #state{children=Children} = State) -> + State#state{children = set_pid(Pid, Id, Children)}; +set_pid(Pid, Id, {Ids, Db}) -> + NewDb = maps:update_with(Id, fun(Child) -> Child#child{pid=Pid} end, Db), + {Ids,NewDb}. + +%% Remove the Id and the child record from the process state +-spec remove_child(child_id(), state()) -> state(). +remove_child(Id, #state{children={Ids,Db}} = State) -> + NewIds = lists:delete(Id,Ids), + NewDb = maps:remove(Id,Db), + State#state{children = {NewIds,NewDb}}. + +%% In the order of Ids, traverse the children and update each child +%% according to the return value of the Fun. +%% On error, abort and return the merge of the old and the updated map. +%% NOTE: The returned list of Ids is reverted compared to the input. +-spec children_map(Fun, children()) -> {ok, children()} | + {error,children(),Reason} when + Fun :: fun((child_id(),child_rec()) -> {update,child_rec()} | + remove | + {abort, Reason}), + Reason :: term(). +children_map(Fun,{Ids,Db}) -> + children_map(Fun, Ids, Db, []). + +children_map(Fun,[Id|Ids],Db,Acc) -> + case Fun(Id,maps:get(Id,Db)) of + {update,Child} -> + children_map(Fun,Ids,Db#{Id => Child},[Id|Acc]); + remove -> + children_map(Fun,Ids,maps:remove(Id,Db),Acc); + {abort,Reason} -> + {error,{lists:reverse(Ids)++[Id|Acc],Db},Reason} + end; +children_map(_Fun,[],Db,Acc) -> + {ok,{Acc,Db}}. + +%% In the order of Ids, map over all children and return the list +-spec children_to_list(Fun, children()) -> List when + Fun :: fun((child_id(), child_rec()) -> Elem), + List :: list(Elem), + Elem :: term(). +children_to_list(Fun,{Ids,Db}) -> + children_to_list(Fun, Ids, Db, []). +children_to_list(Fun,[Id|Ids],Db,Acc) -> + children_to_list(Fun,Ids,Db,[Fun(Id,maps:get(Id,Db))|Acc]); +children_to_list(_Fun,[],_Db,Acc) -> + lists:reverse(Acc). + +%% The order is not important - so ignore Ids +-spec children_fold(Fun, Acc0, children()) -> Acc1 when + Fun :: fun((child_id(), child_rec(), AccIn) -> AccOut), + Acc0 :: term(), + Acc1 :: term(), + AccIn :: term(), + AccOut :: term(). +children_fold(Fun,Init,{_Ids,Db}) -> + maps:fold(Fun, Init, Db). + +-spec append(children(), children()) -> children(). +append({Ids1,Db1},{Ids2,Db2}) -> + {Ids1++Ids2,maps:merge(Db1,Db2)}. %%----------------------------------------------------------------- %% Func: init_state/4 @@ -1290,27 +1244,27 @@ supname(N, _) -> N. %%% Returns: {ok, [child_rec()]} | Error %%% ------------------------------------------------------ -check_startspec(Children) -> check_startspec(Children, []). +check_startspec(Children) -> check_startspec(Children, [], #{}). -check_startspec([ChildSpec|T], Res) -> +check_startspec([ChildSpec|T], Ids, Db) -> case check_childspec(ChildSpec) of - {ok, Child} -> - case lists:keymember(Child#child.name, #child.name, Res) of + {ok, #child{id=Id}=Child} -> + case maps:is_key(Id, Db) of %% The error message duplicate_child_name is kept for %% backwards compatibility, although %% duplicate_child_id would be more correct. - true -> {duplicate_child_name, Child#child.name}; - false -> check_startspec(T, [Child | Res]) + true -> {duplicate_child_name, Id}; + false -> check_startspec(T, [Id | Ids], Db#{Id=>Child}) end; Error -> Error end; -check_startspec([], Res) -> - {ok, lists:reverse(Res)}. +check_startspec([], Ids, Db) -> + {ok, {lists:reverse(Ids),Db}}. check_childspec(ChildSpec) when is_map(ChildSpec) -> catch do_check_childspec(maps:merge(?default_child_spec,ChildSpec)); -check_childspec({Name, Func, RestartType, Shutdown, ChildType, Mods}) -> - check_childspec(#{id => Name, +check_childspec({Id, Func, RestartType, Shutdown, ChildType, Mods}) -> + check_childspec(#{id => Id, start => Func, restart => RestartType, shutdown => Shutdown, @@ -1320,15 +1274,15 @@ check_childspec(X) -> {invalid_child_spec, X}. do_check_childspec(#{restart := RestartType, type := ChildType} = ChildSpec)-> - Name = case ChildSpec of - #{id := N} -> N; + Id = case ChildSpec of + #{id := I} -> I; _ -> throw(missing_id) end, Func = case ChildSpec of #{start := F} -> F; _ -> throw(missing_start) end, - validName(Name), + validId(Id), validFunc(Func), validRestartType(RestartType), validChildType(ChildType), @@ -1343,14 +1297,14 @@ do_check_childspec(#{restart := RestartType, _ -> {M,_,_} = Func, [M] end, validMods(Mods), - {ok, #child{name = Name, mfargs = Func, restart_type = RestartType, + {ok, #child{id = Id, mfargs = Func, restart_type = RestartType, shutdown = Shutdown, child_type = ChildType, modules = Mods}}. validChildType(supervisor) -> true; validChildType(worker) -> true; validChildType(What) -> throw({invalid_child_type, What}). -validName(_Name) -> true. +validId(_Id) -> true. validFunc({M, F, A}) when is_atom(M), is_atom(F), @@ -1379,13 +1333,13 @@ validMods(Mods) when is_list(Mods) -> Mods); validMods(Mods) -> throw({invalid_modules, Mods}). -child_to_spec(#child{name = Name, +child_to_spec(#child{id = Id, mfargs = Func, restart_type = RestartType, shutdown = Shutdown, child_type = ChildType, modules = Mods}) -> - #{id => Name, + #{id => Id, start => Func, restart => RestartType, shutdown => Shutdown, @@ -1439,17 +1393,16 @@ report_error(Error, Reason, Child, SupName) -> {offender, extract_child(Child)}], error_logger:error_report(supervisor_report, ErrorMsg). - extract_child(Child) when is_list(Child#child.pid) -> [{nb_children, length(Child#child.pid)}, - {id, Child#child.name}, + {id, Child#child.id}, {mfargs, Child#child.mfargs}, {restart_type, Child#child.restart_type}, {shutdown, Child#child.shutdown}, {child_type, Child#child.child_type}]; extract_child(Child) -> [{pid, Child#child.pid}, - {id, Child#child.name}, + {id, Child#child.id}, {mfargs, Child#child.mfargs}, {restart_type, Child#child.restart_type}, {shutdown, Child#child.shutdown}, @@ -1465,3 +1418,46 @@ format_status(terminate, [_PDict, State]) -> format_status(_, [_PDict, State]) -> [{data, [{"State", State}]}, {supervisor, [{"Callback", State#state.module}]}]. + +%%%----------------------------------------------------------------- +%%% Dynamics database access +dyn_size(#state{dynamics = {Mod,Db}}) -> + Mod:size(Db). + +dyn_erase(Pid,#state{dynamics={sets,Db}}=State) -> + State#state{dynamics={sets,sets:del_element(Pid,Db)}}; +dyn_erase(Pid,#state{dynamics={maps,Db}}=State) -> + State#state{dynamics={maps,maps:remove(Pid,Db)}}. + +dyn_store(Pid,_,#state{dynamics={sets,Db}}=State) -> + State#state{dynamics={sets,sets:add_element(Pid,Db)}}; +dyn_store(Pid,Args,#state{dynamics={maps,Db}}=State) -> + State#state{dynamics={maps,Db#{Pid => Args}}}. + +dyn_fold(Fun,Init,#state{dynamics={sets,Db}}) -> + sets:fold(Fun,Init,Db); +dyn_fold(Fun,Init,#state{dynamics={maps,Db}}) -> + maps:fold(fun(Pid,_,Acc) -> Fun(Pid,Acc) end, Init, Db). + +dyn_map(Fun, #state{dynamics={sets,Db}}) -> + lists:map(Fun, sets:to_list(Db)); +dyn_map(Fun, #state{dynamics={maps,Db}}) -> + lists:map(Fun, maps:keys(Db)). + +dyn_exists(Pid, #state{dynamics={sets, Db}}) -> + sets:is_element(Pid, Db); +dyn_exists(Pid, #state{dynamics={maps, Db}}) -> + maps:is_key(Pid, Db). + +dyn_args(_Pid, #state{dynamics={sets, _Db}}) -> + {ok,undefined}; +dyn_args(Pid, #state{dynamics={maps, Db}}) -> + maps:find(Pid, Db). + +dyn_init(State) -> + dyn_init(get_dynamic_child(State),State). + +dyn_init(Child,State) when ?is_temporary(Child) -> + State#state{dynamics={sets,sets:new()}}; +dyn_init(_Child,State) -> + State#state{dynamics={maps,maps:new()}}. diff --git a/lib/stdlib/test/supervisor_1.erl b/lib/stdlib/test/supervisor_1.erl index 419026749b..c3ccacc587 100644 --- a/lib/stdlib/test/supervisor_1.erl +++ b/lib/stdlib/test/supervisor_1.erl @@ -42,6 +42,8 @@ start_child(error) -> set -> gen_server:start_link(?MODULE, error, []) end; +start_child({return, Term}) -> + Term; start_child(Extra) -> {ok, Pid} = gen_server:start_link(?MODULE, normal, []), diff --git a/lib/stdlib/test/supervisor_SUITE.erl b/lib/stdlib/test/supervisor_SUITE.erl index cd2c6b0cbb..761df8eb40 100644 --- a/lib/stdlib/test/supervisor_SUITE.erl +++ b/lib/stdlib/test/supervisor_SUITE.erl @@ -39,6 +39,9 @@ sup_start_ignore_temporary_child_start_child_simple/1, sup_start_ignore_permanent_child_start_child_simple/1, sup_start_error_return/1, sup_start_fail/1, + sup_start_child_returns_error/1, + sup_start_restart_child_returns_error/1, + sup_start_child_returns_error_simple/1, sup_start_map/1, sup_start_map_simple/1, sup_start_map_faulty_specs/1, sup_stop_infinity/1, sup_stop_timeout/1, sup_stop_brutal_kill/1, @@ -65,14 +68,16 @@ simple_one_for_one_extra/1, simple_one_for_one_shutdown/1]). %% Misc tests --export([child_unlink/1, tree/1, count_children/1, +-export([child_unlink/1, tree/1, count_children/1, count_children_supervisor/1, count_restarting_children/1, get_callback_module/1, do_not_save_start_parameters_for_temporary_children/1, do_not_save_child_specs_for_temporary_children/1, simple_one_for_one_scale_many_temporary_children/1, simple_global_supervisor/1, hanging_restart_loop/1, + hanging_restart_loop_rest_for_one/1, hanging_restart_loop_simple/1, code_change/1, code_change_map/1, - code_change_simple/1, code_change_simple_map/1]). + code_change_simple/1, code_change_simple_map/1, + order_of_children/1, scale_start_stop_many_children/1]). %%------------------------------------------------------------------------- @@ -91,12 +96,15 @@ all() -> {group, normal_termination}, {group, shutdown_termination}, {group, abnormal_termination}, child_unlink, tree, - count_children, count_restarting_children, get_callback_module, + count_children, count_children_supervisor, count_restarting_children, + get_callback_module, do_not_save_start_parameters_for_temporary_children, do_not_save_child_specs_for_temporary_children, simple_one_for_one_scale_many_temporary_children, temporary_bystander, - simple_global_supervisor, hanging_restart_loop, hanging_restart_loop_simple, - code_change, code_change_map, code_change_simple, code_change_simple_map]. + simple_global_supervisor, hanging_restart_loop, + hanging_restart_loop_rest_for_one, hanging_restart_loop_simple, + code_change, code_change_map, code_change_simple, code_change_simple_map, + order_of_children, scale_start_stop_many_children]. groups() -> [{sup_start, [], @@ -105,7 +113,10 @@ groups() -> sup_start_ignore_temporary_child_start_child, sup_start_ignore_temporary_child_start_child_simple, sup_start_ignore_permanent_child_start_child_simple, - sup_start_error_return, sup_start_fail]}, + sup_start_error_return, sup_start_fail, + sup_start_child_returns_error, sup_start_restart_child_returns_error, + sup_start_child_returns_error_simple + ]}, {sup_start_map, [], [sup_start_map, sup_start_map_simple, sup_start_map_faulty_specs]}, {sup_stop, [], @@ -147,6 +158,15 @@ init_per_testcase(_Case, Config) -> Config. end_per_testcase(_Case, _Config) -> + %% Clean up to avoid unnecessary error reports in the shell + case whereis(sup_test) of + SupPid when is_pid(SupPid) -> + unlink(SupPid), + exit(SupPid,shutdown), + ok; + _ -> + error + end, ok. start_link(InitResult) -> @@ -274,6 +294,7 @@ sup_start_ignore_permanent_child_start_child_simple(Config) %% Regression test: check that the supervisor terminates without error. exit(Pid, shutdown), check_exit_reason(Pid, shutdown). + %%------------------------------------------------------------------------- %% Tests what happens if init-callback returns a invalid value. sup_start_error_return(Config) when is_list(Config) -> @@ -289,6 +310,53 @@ sup_start_fail(Config) when is_list(Config) -> check_exit_reason(Term). %%------------------------------------------------------------------------- +%% Test what happens when the start function for a child returns +%% {error,Reason} or some other term(). +sup_start_restart_child_returns_error(_Config) -> + process_flag(trap_exit, true), + Child = {child1, {supervisor_1, start_child, [error]}, + permanent, 1000, worker, []}, + {ok, _Pid} = start_link({ok, {{one_for_one, 2, 3600}, [Child]}}), + + ok = supervisor:terminate_child(sup_test, child1), + {error,{function_clause,_}} = supervisor:restart_child(sup_test,child1), + + [{child1,undefined,worker,[]}] = supervisor:which_children(sup_test), + ok. + +%%------------------------------------------------------------------------- +%% Test what happens when the start function for a child returns +%% {error,Reason} or some other term(). +sup_start_child_returns_error(_Config) -> + process_flag(trap_exit, true), + Child1 = {child1, {supervisor_1, start_child, [{return,{error,reason}}]}, + permanent, 1000, worker, []}, + Child2 = {child2, {supervisor_1, start_child, [{return,error_reason}]}, + permanent, 1000, worker, []}, + {ok, _Pid} = start_link({ok, {{one_for_one, 2, 3600}, []}}), + + {error,{reason,_}} = supervisor:start_child(sup_test,Child1), + {error,{error_reason,_}} = supervisor:start_child(sup_test,Child2), + + [] = supervisor:which_children(sup_test), + ok. + +%%------------------------------------------------------------------------- +%% Test what happens when the start function for a child returns +%% {error,Reason} - simple_one_for_one +sup_start_child_returns_error_simple(_Config) -> + process_flag(trap_exit, true), + Child = {child1, {supervisor_1, start_child, []}, + permanent, 1000, worker, []}, + {ok, _Pid} = start_link({ok, {{simple_one_for_one, 2, 3600}, [Child]}}), + + {error,reason} = supervisor:start_child(sup_test,[{return,{error,reason}}]), + {error,error_reason} = supervisor:start_child(sup_test,[{return,error_reason}]), + + [] = supervisor:which_children(sup_test), + ok. + +%%------------------------------------------------------------------------- %% Tests that the supervisor process starts correctly with map %% startspec, and that the full childspec can be read. sup_start_map(Config) when is_list(Config) -> @@ -468,7 +536,16 @@ extra_return(Config) when is_list(Config) -> [{child1, CPid3, worker, []}] = supervisor:which_children(sup_test), [1,1,0,1] = get_child_counts(sup_test), - ok. + %% Check that it can be automatically restarted + terminate(CPid3, abnormal), + [{child1, CPid4, worker, []}] = supervisor:which_children(sup_test), + [1,1,0,1] = get_child_counts(sup_test), + if (not is_pid(CPid4)) orelse CPid4=:=CPid3 -> + ct:fail({not_restarted,CPid3,CPid4}); + true -> + ok + end. + %%------------------------------------------------------------------------- %% Test API functions start_child/2, terminate_child/2, delete_child/2 %% restart_child/2, which_children/1, count_children/1. Only correct @@ -1140,7 +1217,7 @@ simple_one_for_one(Config) when is_list(Config) -> [{Id4, Pid4, _, _}|_] = supervisor:which_children(sup_test), terminate(SupPid, Pid4, Id4, abnormal), - check_exit([SupPid]). + check_exit_reason(SupPid,shutdown). %%------------------------------------------------------------------------- @@ -1378,6 +1455,11 @@ tree(Config) when is_list(Config) -> [?MODULE, {ok, {{one_for_one, 4, 3600}, []}}]}, permanent, infinity, supervisor, []}, + ChildSup3 = {supchild3, + {supervisor, start_link, + [?MODULE, {ok, {{one_for_one, 4, 3600}, []}}]}, + transient, infinity, + supervisor, []}, %% Top supervisor {ok, SupPid} = start_link({ok, {{one_for_all, 4, 3600}, []}}), @@ -1385,7 +1467,9 @@ tree(Config) when is_list(Config) -> %% Child supervisors {ok, Sup1} = supervisor:start_child(SupPid, ChildSup1), {ok, Sup2} = supervisor:start_child(SupPid, ChildSup2), - [2,2,2,0] = get_child_counts(SupPid), + {ok, _Sup3} = supervisor:start_child(SupPid, ChildSup3), + ok = supervisor:terminate_child(SupPid, supchild3), + [3,2,3,0] = get_child_counts(SupPid), %% Workers [{_, CPid2, _, _},{_, CPid1, _, _}] = @@ -1417,16 +1501,21 @@ tree(Config) when is_list(Config) -> timer:sleep(1000), - [{supchild2, NewSup2, _, _},{supchild1, NewSup1, _, _}] = + [{supchild3, NewSup3, _, _}, + {supchild2, NewSup2, _, _}, + {supchild1, NewSup1, _, _}] = supervisor:which_children(SupPid), - [2,2,2,0] = get_child_counts(SupPid), + [3,3,3,0] = get_child_counts(SupPid), [{child2, _, _, _},{child1, _, _, _}] = supervisor:which_children(NewSup1), [2,2,0,2] = get_child_counts(NewSup1), [] = supervisor:which_children(NewSup2), - [0,0,0,0] = get_child_counts(NewSup2). + [0,0,0,0] = get_child_counts(NewSup2), + + [] = supervisor:which_children(NewSup3), + [0,0,0,0] = get_child_counts(NewSup3). %%------------------------------------------------------------------------- %% Test count_children @@ -1459,6 +1548,36 @@ count_children(Config) when is_list(Config) -> [1,0,0,0] = get_child_counts(sup_test). %%------------------------------------------------------------------------- +%% Test count_children for simple_one_for_one, when children are supervisors +count_children_supervisor(Config) when is_list(Config) -> + process_flag(trap_exit, true), + Child = {child, {supervisor_1, start_child, []}, temporary, infinity, + supervisor, []}, + {ok, SupPid} = start_link({ok, {{simple_one_for_one, 2, 3600}, [Child]}}), + [supervisor:start_child(sup_test, []) || _Ignore <- lists:seq(1,1000)], + + Children = supervisor:which_children(sup_test), + ChildCount = get_child_counts(sup_test), + + [supervisor:start_child(sup_test, []) || _Ignore2 <- lists:seq(1,1000)], + + ChildCount2 = get_child_counts(sup_test), + Children2 = supervisor:which_children(sup_test), + + ChildCount3 = get_child_counts(sup_test), + Children3 = supervisor:which_children(sup_test), + + 1000 = length(Children), + [1,1000,1000,0] = ChildCount, + 2000 = length(Children2), + [1,2000,2000,0] = ChildCount2, + Children3 = Children2, + ChildCount3 = ChildCount2, + + [terminate(SupPid, Pid, child, kill) || {undefined, Pid, supervisor, _Modules} <- Children3], + [1,0,0,0] = get_child_counts(sup_test). + +%%------------------------------------------------------------------------- %% Test count_children when some children are restarting count_restarting_children(Config) when is_list(Config) -> process_flag(trap_exit, true), @@ -1577,11 +1696,11 @@ dont_save_start_parameters_for_temporary_children(simple_one_for_one = Type) -> start_children(Sup2, [LargeList], 100), start_children(Sup3, [LargeList], 100), - [{memory,Mem1}] = process_info(Sup1, [memory]), - [{memory,Mem2}] = process_info(Sup2, [memory]), - [{memory,Mem3}] = process_info(Sup3, [memory]), + Size1 = erts_debug:flat_size(sys:get_status(Sup1)), + Size2 = erts_debug:flat_size(sys:get_status(Sup2)), + Size3 = erts_debug:flat_size(sys:get_status(Sup3)), - true = (Mem3 < Mem1) and (Mem3 < Mem2), + true = (Size3 < Size1) and (Size3 < Size2), terminate(Sup1, shutdown), terminate(Sup2, shutdown), @@ -1605,11 +1724,11 @@ dont_save_start_parameters_for_temporary_children(Type) -> start_children(Sup2, Transient, 100), start_children(Sup3, Temporary, 100), - [{memory,Mem1}] = process_info(Sup1, [memory]), - [{memory,Mem2}] = process_info(Sup2, [memory]), - [{memory,Mem3}] = process_info(Sup3, [memory]), + Size1 = erts_debug:flat_size(sys:get_status(Sup1)), + Size2 = erts_debug:flat_size(sys:get_status(Sup2)), + Size3 = erts_debug:flat_size(sys:get_status(Sup3)), - true = (Mem3 < Mem1) and (Mem3 < Mem2), + true = (Size3 < Size1) and (Size3 < Size2), terminate(Sup1, shutdown), terminate(Sup2, shutdown), @@ -1847,6 +1966,61 @@ hanging_restart_loop(Config) when is_list(Config) -> undefined = whereis(sup_test), ok. +hanging_restart_loop_rest_for_one(Config) when is_list(Config) -> + process_flag(trap_exit, true), + {ok, Pid} = start_link({ok, {{rest_for_one, 8, 10}, []}}), + Child1 = {child1, {supervisor_1, start_child, []}, + permanent, brutal_kill, worker, []}, + Child2 = {child2, {supervisor_deadlock, start_child, []}, + permanent, brutal_kill, worker, []}, + Child3 = {child3, {supervisor_1, start_child, []}, + permanent, brutal_kill, worker, []}, + + %% Ets table with state read by supervisor_deadlock.erl + ets:new(supervisor_deadlock,[set,named_table,public]), + ets:insert(supervisor_deadlock,{fail_start,false}), + + {ok, CPid1} = supervisor:start_child(sup_test, Child1), + {ok, CPid2} = supervisor:start_child(sup_test, Child2), + link(CPid2), + {ok, _CPid3} = supervisor:start_child(sup_test, Child3), + + ets:insert(supervisor_deadlock,{fail_start,true}), + supervisor_deadlock:restart_child(), + timer:sleep(2000), % allow restart to happen before proceeding + + {error, already_present} = supervisor:start_child(sup_test, Child2), + {error, restarting} = supervisor:restart_child(sup_test, child2), + {error, restarting} = supervisor:delete_child(sup_test, child2), + [{child3,undefined,worker,[]}, + {child2,restarting,worker,[]}, + {child1,CPid1,worker,[]}] = supervisor:which_children(sup_test), + [3,1,0,3] = get_child_counts(sup_test), + + ok = supervisor:terminate_child(sup_test, child2), + check_exit_reason(CPid2, error), + [{child3,undefined,worker,[]}, + {child2,undefined,worker,[]}, + {child1,CPid1,worker,[]}] = supervisor:which_children(sup_test), + + ets:insert(supervisor_deadlock,{fail_start,false}), + {ok, CPid22} = supervisor:restart_child(sup_test, child2), + link(CPid22), + + ets:insert(supervisor_deadlock,{fail_start,true}), + supervisor_deadlock:restart_child(), + timer:sleep(2000), % allow restart to happen before proceeding + + %% Terminating supervisor. + %% OTP-9549 fixes so this does not give a timetrap timeout - + %% i.e. that supervisor does not hang in restart loop. + terminate(Pid,shutdown), + + %% Check that child died with reason from 'restart' request above + check_exit_reason(CPid22, error), + undefined = whereis(sup_test), + ok. + %%------------------------------------------------------------------------- %% Test that child and supervisor can be shutdown while hanging in %% restart loop, simple_one_for_one. @@ -2022,11 +2196,11 @@ code_change_simple(_Config) -> SimpleChild2 = {child2,{supervisor_1, start_child, []}, permanent, brutal_kill, worker, []}, - {error, {error, {ok,[_,_]}}} = + {error, {error, {ok,{[_,_],_}}}} = fake_upgrade(SimplePid,{ok,{SimpleFlags,[SimpleChild1,SimpleChild2]}}), %% Attempt to remove child - {error, {error, {ok,[]}}} = fake_upgrade(SimplePid,{ok,{SimpleFlags,[]}}), + {error, {error, {ok,{[],_}}}} = fake_upgrade(SimplePid,{ok,{SimpleFlags,[]}}), terminate(SimplePid,shutdown), ok. @@ -2047,11 +2221,11 @@ code_change_simple_map(_Config) -> %% Attempt to add child SimpleChild2 = #{id=>child2, start=>{supervisor_1, start_child, []}}, - {error, {error, {ok, [_,_]}}} = + {error, {error, {ok, {[_,_],_}}}} = fake_upgrade(SimplePid,{ok,{SimpleFlags,[SimpleChild1,SimpleChild2]}}), %% Attempt to remove child - {error, {error, {ok, []}}} = + {error, {error, {ok, {[],_}}}} = fake_upgrade(SimplePid,{ok,{SimpleFlags,[]}}), terminate(SimplePid,shutdown), @@ -2075,6 +2249,148 @@ fake_upgrade(Pid,NewInitReturn) -> ok = sys:resume(Pid), R. +%% Test that children are started in the order they are given, and +%% terminated in the opposite order +order_of_children(_Config) -> + process_flag(trap_exit, true), + %% Use child ids that are not alphabetically storted + Id1 = ch7, + Id2 = ch3, + Id3 = ch10, + Id4 = ch2, + Id5 = ch5, + Children = + [{Id, {supervisor_1, start_child, []}, permanent, 1000, worker, []} || + Id <- [Id1,Id2,Id3,Id4,Id5]], + + {ok, SupPid} = start_link({ok, {{rest_for_one, 2, 3600}, Children}}), + + + %% Check start order (pids are growing) + Which1 = supervisor:which_children(sup_test), + IsPid = fun({_,P,_,_}) when is_pid(P) -> true; (_) -> false end, + true = lists:all(IsPid,Which1), + SortedOnPid1 = lists:keysort(2,Which1), + [{Id1,Pid1,_,_}, + {Id2,Pid2,_,_}, + {Id3,Pid3,_,_}, + {Id4,Pid4,_,_}, + {Id5,Pid5,_,_}] = SortedOnPid1, + + TPid = self(), + TraceHandler = fun({trace,P,exit,_},{Last,Ps}) when P=:=Last -> + TPid ! {exited,lists:reverse([P|Ps])}, + {Last,Ps}; + ({trace,P,exit,_},{Last,Ps}) -> + {Last,[P|Ps]}; + (_T,Acc) -> + Acc + end, + + %% Terminate Pid3 and check that Pid4 and Pid5 are terminated in + %% expected order. + Expected1 = [Pid5,Pid4], + {ok,_} = dbg:tracer(process,{TraceHandler,{Pid4,[]}}), + [{ok,[_]} = dbg:p(P,procs) || P <- Expected1], + terminate(Pid3, abnormal), + receive {exited,ExitedPids1} -> + dbg:stop_clear(), + case ExitedPids1 of + Expected1 -> ok; + _ -> ct:fail({faulty_termination_order, + {expected,Expected1}, + {got,ExitedPids1}}) + end + after 3000 -> + dbg:stop_clear(), + ct:fail({shutdown_fail,timeout}) + end, + + %% Then check that Id3-5 are started again in correct order + Which2 = supervisor:which_children(sup_test), + true = lists:all(IsPid,Which2), + SortedOnPid2 = lists:keysort(2,Which2), + [{Id1,Pid1,_,_}, + {Id2,Pid2,_,_}, + {Id3,Pid32,_,_}, + {Id4,Pid42,_,_}, + {Id5,Pid52,_,_}] = SortedOnPid2, + + %% Terminate supervisor and check that all children are terminated + %% in opposite start order + Expected2 = [Pid52,Pid42,Pid32,Pid2,Pid1], + {ok,_} = dbg:tracer(process,{TraceHandler,{Pid1,[]}}), + [{ok,[_]} = dbg:p(P,procs) || P <- Expected2], + exit(SupPid,shutdown), + receive {exited,ExitedPids2} -> + dbg:stop_clear(), + case ExitedPids2 of + Expected2 -> ok; + _ -> ct:fail({faulty_termination_order, + {expected,Expected2}, + {got,ExitedPids2}}) + end + after 3000 -> + dbg:stop_clear(), + ct:fail({shutdown_fail,timeout}) + end, + ok. + +%% Test that a non-simple supervisor scales well for starting and +%% stopping many children. +scale_start_stop_many_children(_Config) -> + process_flag(trap_exit, true), + {ok, _Pid} = start_link({ok, {{one_for_one, 2, 3600}, []}}), + N1 = 1000, + N2 = 100000, + Ids1 = lists:seq(1,N1), + Ids2 = lists:seq(1,N2), + Children1 = [{Id,{supervisor_1,start_child,[]},permanent,1000,worker,[]} || + Id <- Ids1], + Children2 = [{Id,{supervisor_1,start_child,[]},permanent,1000,worker,[]} || + Id <- Ids2], + + {StartT1,_} = + timer:tc(fun() -> + [supervisor:start_child(sup_test,C) || C <- Children1] + end), + {StopT1,_} = + timer:tc(fun() -> + [supervisor:terminate_child(sup_test,I) || I <- Ids1] + end), + ct:log("~w children, start time: ~w ms, stop time: ~w ms", + [N1, StartT1 div 1000, StopT1 div 1000]), + + {StartT2,_} = + timer:tc(fun() -> + [supervisor:start_child(sup_test,C) || C <- Children2] + end), + {StopT2,_} = + timer:tc(fun() -> + [supervisor:terminate_child(sup_test,I) || I <- Ids2] + end), + ct:log("~w children, start time: ~w ms, stop time: ~w ms", + [N2, StartT2 div 1000, StopT2 div 1000]), + + %% Scaling should be more or less linear, but allowing a bit more + %% to avoid false alarms + ScaleLimit = (N2 div N1) * 10, + StartScale = StartT2 div StartT1, + StopScale = StopT2 div StopT1, + + ct:log("Scale limit: ~w~nStart scale: ~w~nStop scale: ~w", + [ScaleLimit, StartScale, StopScale]), + + if StartScale > ScaleLimit -> + ct:fail({bad_start_scale,StartScale}); + StopScale > ScaleLimit -> + ct:fail({bad_stop_scale,StopScale}); + true -> + ok + end, + + ok. + %%------------------------------------------------------------------------- terminate(Pid, Reason) when Reason =/= supervisor -> terminate(dummy, Pid, dummy, Reason). diff --git a/lib/stdlib/test/supervisor_deadlock.erl b/lib/stdlib/test/supervisor_deadlock.erl index 8d3d1c6f30..f51aecccb2 100644 --- a/lib/stdlib/test/supervisor_deadlock.erl +++ b/lib/stdlib/test/supervisor_deadlock.erl @@ -1,5 +1,5 @@ -module(supervisor_deadlock). --compile(export_all). +-compile([export_all,nowarn_export_all]). %%%----------------------------------------------------------------- |