diff options
-rw-r--r-- | erts/etc/unix/etp-commands | 32 | ||||
-rw-r--r-- | lib/kernel/src/auth.erl | 12 | ||||
-rw-r--r-- | lib/stdlib/doc/src/supervisor.xml | 12 | ||||
-rw-r--r-- | lib/stdlib/src/supervisor.erl | 170 | ||||
-rw-r--r-- | lib/stdlib/test/Makefile | 1 | ||||
-rw-r--r-- | lib/stdlib/test/supervisor_SUITE.erl | 122 | ||||
-rw-r--r-- | lib/stdlib/test/supervisor_deadlock.erl | 45 |
7 files changed, 329 insertions, 65 deletions
diff --git a/erts/etc/unix/etp-commands b/erts/etc/unix/etp-commands index 6a01e0b7e0..79e5d6b1d8 100644 --- a/erts/etc/unix/etp-commands +++ b/erts/etc/unix/etp-commands @@ -1883,6 +1883,28 @@ document etp-ets-tables %--------------------------------------------------------------------------- end +define etp-ets-obj +# Args: DbTerm* +# + set $etp_ets_obj_i = 1 + while $etp_ets_obj_i <= (($arg0)->tpl[0] >> 6) + if $etp_ets_obj_i == 1 + printf "{" + else + printf ", " + end + set $etp_ets_elem = ($arg0)->tpl[$etp_ets_obj_i] + if ($etp_ets_elem & 3) == 0 + printf "<compressed>" + else + etp-1 $etp_ets_elem 0 + end + set $etp_ets_obj_i++ + end + printf "}" +end + + define etp-ets-tabledump # Args: int tableindex # @@ -1896,10 +1918,10 @@ define etp-ets-tabledump if $etp_ets_tabledump_t->common.status & 0x130 # Hash table set $etp_ets_tabledump_h = $etp_ets_tabledump_t->hash - printf "%% nitems=%d\n", $etp_ets_tabledump_t->common.nitems - while $etp_ets_tabledump_i < $etp_ets_tabledump_h->nactive - set $etp_ets_tabledump_l = $etp_ets_tabledump_h->seg \ - [$etp_ets_tabledump_i>>8][$etp_ets_tabledump_i&0xFF] + printf "%% nitems=%d\n", (long) $etp_ets_tabledump_t->common.nitems + while $etp_ets_tabledump_i < (long) $etp_ets_tabledump_h->nactive + set $etp_ets_tabledump_seg = ((struct segment**)$etp_ets_tabledump_h->segtab)[$etp_ets_tabledump_i>>8] + set $etp_ets_tabledump_l = $etp_ets_tabledump_seg->buckets[$etp_ets_tabledump_i&0xFF] if $etp_ets_tabledump_l printf "%% Slot %d:\n", $etp_ets_tabledump_i while $etp_ets_tabledump_l @@ -1909,7 +1931,7 @@ define etp-ets-tabledump printf "[" end set $etp_ets_tabledump_n++ - etp-1 ((Eterm)($etp_ets_tabledump_l->dbterm.tpl)|0x2) 0 + etp-ets-obj &($etp_ets_tabledump_l->dbterm) if $etp_ets_tabledump_l->hvalue == ((unsigned long)-1) printf "% *\n" else diff --git a/lib/kernel/src/auth.erl b/lib/kernel/src/auth.erl index c329a5652a..252c20ac4e 100644 --- a/lib/kernel/src/auth.erl +++ b/lib/kernel/src/auth.erl @@ -381,13 +381,17 @@ create_cookie(Name) -> case {R1, R2} of {ok, ok} -> ok; - {{error,_Reason}, _} -> - {error, "Failed to create cookie file"}; + {{error,Reason}, _} -> + {error, + lists:flatten( + io_lib:format("Failed to write to cookie file '~s': ~p", [Name, Reason]))}; {ok, {error, Reason}} -> {error, "Failed to change mode: " ++ atom_to_list(Reason)} end; - {error,_Reason} -> - {error, "Failed to create cookie file"} + {error,Reason} -> + {error, + lists:flatten( + io_lib:format("Failed to create cookie file '~s': ~p", [Name, Reason]))} end. random_cookie(0, _, Result) -> diff --git a/lib/stdlib/doc/src/supervisor.xml b/lib/stdlib/doc/src/supervisor.xml index d1e62230bc..35f4f82264 100644 --- a/lib/stdlib/doc/src/supervisor.xml +++ b/lib/stdlib/doc/src/supervisor.xml @@ -399,10 +399,11 @@ child_spec() = {Id,StartFunc,Restart,Shutdown,Type,Modules} <c>SupRef</c>.</p> <p>If successful, the function returns <c>ok</c>. If the child specification identified by <c><anno>Id</anno></c> exists but - the corresponding child process is running, the function - returns <c>{error,running}</c>. If the child specification - identified by <c><anno>Id</anno></c> does not exist, the function returns - <c>{error,not_found}</c>.</p> + the corresponding child process is running or about to be restarted, + the function returns <c>{error,running}</c> or + <c>{error,restarting}</c> respectively. If the child specification + identified by <c><anno>Id</anno></c> does not exist, the function + returns <c>{error,not_found}</c>.</p> </desc> </func> <func> @@ -462,7 +463,8 @@ child_spec() = {Id,StartFunc,Restart,Shutdown,Type,Modules} </item> <item> <p><c><anno>Child</anno></c> - the pid of the corresponding child - process, or <c>undefined</c> if there is no such process.</p> + process, the atom <c>restarting</c> if the process is about to be + restarted or <c>undefined</c> if there is no such process.</p> </item> <item> <p><c><anno>Type</anno></c> - as defined in the child specification.</p> diff --git a/lib/stdlib/src/supervisor.erl b/lib/stdlib/src/supervisor.erl index ac5b078c29..f315064b03 100644 --- a/lib/stdlib/src/supervisor.erl +++ b/lib/stdlib/src/supervisor.erl @@ -28,8 +28,9 @@ check_childspecs/1]). %% Internal exports --export([init/1, handle_call/3, handle_info/2, terminate/2, code_change/3]). --export([handle_cast/2]). +-export([init/1, handle_call/3, handle_cast/2, handle_info/2, + terminate/2, code_change/3]). +-export([try_again_restart/2]). %%-------------------------------------------------------------------------- @@ -37,7 +38,7 @@ %%-------------------------------------------------------------------------- --type child() :: 'undefined' | pid() | [pid()]. +-type child() :: 'undefined' | pid(). -type child_id() :: term(). -type mfargs() :: {M :: module(), F :: atom(), A :: [term()] | undefined}. -type modules() :: [module()] | 'dynamic'. @@ -62,8 +63,8 @@ %%-------------------------------------------------------------------------- -record(child, {% pid is undefined when child is not running - pid = undefined :: child(), - name, + pid = undefined :: child() | {restarting,pid()} | [pid()], + name :: child_id(), mfargs :: mfargs(), restart_type :: restart(), shutdown :: shutdown(), @@ -95,6 +96,8 @@ [ChildSpec :: child_spec()]}} | ignore. +-define(restarting(_Pid_), {restarting,_Pid_}). + %%% --------------------------------------------------- %%% This is a general process supervisor built upon gen_server.erl. %%% Servers/processes should/could also be built using gen_server.erl. @@ -139,7 +142,8 @@ start_child(Supervisor, ChildSpec) -> Result :: {'ok', Child :: child()} | {'ok', Child :: child(), Info :: term()} | {'error', Error}, - Error :: 'running' | 'not_found' | 'simple_one_for_one' | term(). + Error :: 'running' | 'restarting' | 'not_found' | 'simple_one_for_one' | + term(). restart_child(Supervisor, Name) -> call(Supervisor, {restart_child, Name}). @@ -147,7 +151,7 @@ restart_child(Supervisor, Name) -> SupRef :: sup_ref(), Id :: child_id(), Result :: 'ok' | {'error', Error}, - Error :: 'running' | 'not_found' | 'simple_one_for_one'. + Error :: 'running' | 'restarting' | 'not_found' | 'simple_one_for_one'. delete_child(Supervisor, Name) -> call(Supervisor, {delete_child, Name}). @@ -169,7 +173,7 @@ terminate_child(Supervisor, Name) -> -spec which_children(SupRef) -> [{Id,Child,Type,Modules}] when SupRef :: sup_ref(), Id :: child_id() | undefined, - Child :: child(), + Child :: child() | 'restarting', Type :: worker(), Modules :: modules(). which_children(Supervisor) -> @@ -198,6 +202,17 @@ check_childspecs(ChildSpecs) when is_list(ChildSpecs) -> end; check_childspecs(X) -> {error, {badarg, X}}. +%%%----------------------------------------------------------------- +%%% Called by timer:apply_after from restart/2 +-spec try_again_restart(SupRef, Child) -> ok when + SupRef :: sup_ref(), + Child :: child_id() | pid(). +try_again_restart(Supervisor, Child) -> + cast(Supervisor, {try_again_restart, Child}). + +cast(Supervisor, Req) -> + gen_server:cast(Supervisor, Req). + %%% --------------------------------------------------- %%% %%% Initialize the supervisor. @@ -384,6 +399,8 @@ handle_call({restart_child, Name}, _From, State) -> Error -> {reply, Error, State} end; + {value, #child{pid=?restarting(_)}} -> + {reply, {error, restarting}, State}; {value, _} -> {reply, {error, running}, State}; _ -> @@ -395,6 +412,8 @@ handle_call({delete_child, Name}, _From, State) -> {value, Child} when Child#child.pid =:= undefined -> NState = remove_child(Child, State), {reply, ok, NState}; + {value, #child{pid=?restarting(_)}} -> + {reply, {error, restarting}, State}; {value, _} -> {reply, {error, running}, State}; _ -> @@ -413,13 +432,17 @@ handle_call(which_children, _From, #state{children = [#child{restart_type = RTyp child_type = CT, modules = Mods}]} = State) when ?is_simple(State) -> - Reply = lists:map(fun({Pid, _}) -> {undefined, Pid, CT, Mods} end, + Reply = lists:map(fun({?restarting(_),_}) -> {undefined,restarting,CT,Mods}; + ({Pid, _}) -> {undefined, Pid, CT, Mods} end, ?DICT:to_list(dynamics_db(RType, State#state.dynamics))), {reply, Reply, State}; handle_call(which_children, _From, State) -> Resp = - lists:map(fun(#child{pid = Pid, name = Name, + lists:map(fun(#child{pid = ?restarting(_), name = Name, + child_type = ChildType, modules = Mods}) -> + {Name, restarting, ChildType, Mods}; + (#child{pid = Pid, name = Name, child_type = ChildType, modules = Mods}) -> {Name, Pid, ChildType, Mods} end, @@ -432,8 +455,11 @@ handle_call(count_children, _From, #state{children = [#child{restart_type = temp when ?is_simple(State) -> {Active, Count} = ?SETS:fold(fun(Pid, {Alive, Tot}) -> - if is_pid(Pid) -> {Alive+1, Tot +1}; - true -> {Alive, Tot + 1} end + case is_pid(Pid) andalso is_process_alive(Pid) of + true ->{Alive+1, Tot +1}; + false -> + {Alive, Tot + 1} + end end, {0, 0}, dynamics_db(temporary, State#state.dynamics)), Reply = case CT of supervisor -> [{specs, 1}, {active, Active}, @@ -448,8 +474,12 @@ handle_call(count_children, _From, #state{children = [#child{restart_type = RTy when ?is_simple(State) -> {Active, Count} = ?DICT:fold(fun(Pid, _Val, {Alive, Tot}) -> - if is_pid(Pid) -> {Alive+1, Tot +1}; - true -> {Alive, Tot + 1} end + case is_pid(Pid) andalso is_process_alive(Pid) of + true -> + {Alive+1, Tot +1}; + false -> + {Alive, Tot + 1} + end end, {0, 0}, dynamics_db(RType, State#state.dynamics)), Reply = case CT of supervisor -> [{specs, 1}, {active, Active}, @@ -486,14 +516,42 @@ count_child(#child{pid = Pid, child_type = supervisor}, end. -%%% Hopefully cause a function-clause as there is no API function -%%% that utilizes cast. --spec handle_cast('null', state()) -> {'noreply', state()}. +%%% If a restart attempt failed, this message is sent via +%%% timer:apply_after(0,...) in order to give gen_server the chance to +%%% check it's inbox before trying again. +-spec handle_cast({try_again_restart, child_id() | pid()}, state()) -> + {'noreply', state()} | {stop, shutdown, state()}. -handle_cast(null, State) -> - error_logger:error_msg("ERROR: Supervisor received cast-message 'null'~n", - []), - {noreply, State}. +handle_cast({try_again_restart,Pid}, #state{children=[Child]}=State) + when ?is_simple(State) -> + RT = Child#child.restart_type, + RPid = restarting(Pid), + case dynamic_child_args(RPid, dynamics_db(RT, State#state.dynamics)) of + {ok, Args} -> + {M, F, _} = Child#child.mfargs, + NChild = Child#child{pid = RPid, mfargs = {M, F, Args}}, + case restart(NChild,State) of + {ok, State1} -> + {noreply, State1}; + {shutdown, State1} -> + {stop, shutdown, State1} + end; + error -> + {noreply, State} + end; + +handle_cast({try_again_restart,Name}, State) -> + case lists:keyfind(Name,#child.name,State#state.children) of + Child = #child{pid=?restarting(_)} -> + case restart(Child,State) of + {ok, State1} -> + {noreply, State1}; + {shutdown, State1} -> + {stop, shutdown, State1} + end; + _ -> + {noreply,State} + end. %% %% Take care of terminated children. @@ -624,7 +682,7 @@ handle_start_child(Child, State) -> {error, What} -> {{error, {What, Child}}, State} end; - {value, OldChild} when OldChild#child.pid =/= undefined -> + {value, OldChild} when is_pid(OldChild#child.pid) -> {{error, {already_started, OldChild#child.pid}}, State}; {value, _OldChild} -> {{error, already_present}, State} @@ -678,7 +736,21 @@ do_restart(temporary, Reason, Child, State) -> restart(Child, State) -> case add_restart(State) of {ok, NState} -> - restart(NState#state.strategy, Child, NState); + case restart(NState#state.strategy, Child, NState) of + {try_again,NState2} -> + %% Leaving control back to gen_server before + %% trying again. This way other incoming requsts + %% for the supervisor can be handled - e.g. a + %% shutdown request for the supervisor or the + %% child. + Id = if ?is_simple(State) -> Child#child.pid; + true -> Child#child.name + end, + timer:apply_after(0,?MODULE,try_again_restart,[self(),Id]), + {ok,NState2}; + Other -> + Other + end; {terminate, NState} -> report_error(shutdown, reached_max_restart_intensity, Child, State#state.name), @@ -686,9 +758,9 @@ restart(Child, State) -> end. restart(simple_one_for_one, Child, State) -> - #child{mfargs = {M, F, A}} = Child, - Dynamics = ?DICT:erase(Child#child.pid, dynamics_db(Child#child.restart_type, - State#state.dynamics)), + #child{pid = OldPid, mfargs = {M, F, A}} = Child, + Dynamics = ?DICT:erase(OldPid, dynamics_db(Child#child.restart_type, + State#state.dynamics)), case do_start_child_i(M, F, A) of {ok, Pid} -> NState = State#state{dynamics = ?DICT:store(Pid, A, Dynamics)}, @@ -697,10 +769,13 @@ restart(simple_one_for_one, Child, State) -> NState = State#state{dynamics = ?DICT:store(Pid, A, Dynamics)}, {ok, NState}; {error, Error} -> + NState = State#state{dynamics = ?DICT:store(restarting(OldPid), A, + Dynamics)}, report_error(start_error, Error, Child, State#state.name), - restart(Child, State) + {try_again, NState} end; restart(one_for_one, Child, State) -> + OldPid = Child#child.pid, case do_start_child(State#state.name, Child) of {ok, Pid} -> NState = replace_child(Child#child{pid = Pid}, State), @@ -709,8 +784,9 @@ restart(one_for_one, Child, State) -> NState = replace_child(Child#child{pid = Pid}, State), {ok, NState}; {error, Reason} -> + NState = replace_child(Child#child{pid = restarting(OldPid)}, State), report_error(start_error, Reason, Child, State#state.name), - restart(Child, State) + {try_again, NState} end; restart(rest_for_one, Child, State) -> {ChAfter, ChBefore} = split_child(Child#child.pid, State#state.children), @@ -719,7 +795,9 @@ restart(rest_for_one, Child, State) -> {ok, ChAfter3} -> {ok, State#state{children = ChAfter3 ++ ChBefore}}; {error, ChAfter3} -> - restart(Child, State#state{children = ChAfter3 ++ ChBefore}) + NChild = Child#child{pid=restarting(Child#child.pid)}, + NState = State#state{children = ChAfter3 ++ ChBefore}, + {try_again, replace_child(NChild,NState)} end; restart(one_for_all, Child, State) -> Children1 = del_child(Child#child.pid, State#state.children), @@ -728,9 +806,14 @@ restart(one_for_all, Child, State) -> {ok, NChs} -> {ok, State#state{children = NChs}}; {error, NChs} -> - restart(Child, State#state{children = NChs}) + NChild = Child#child{pid=restarting(Child#child.pid)}, + NState = State#state{children = NChs}, + {try_again, replace_child(NChild,NState)} end. +restarting(Pid) when is_pid(Pid) -> ?restarting(Pid); +restarting(RPid) -> RPid. + %%----------------------------------------------------------------- %% Func: terminate_children/2 %% Args: Children = [child_rec()] in termination order @@ -754,7 +837,7 @@ terminate_children([Child | Children], SupName, Res) -> terminate_children([], _SupName, Res) -> Res. -do_terminate(Child, SupName) when Child#child.pid =/= undefined -> +do_terminate(Child, SupName) when is_pid(Child#child.pid) -> case shutdown(Child#child.pid, Child#child.shutdown) of ok -> ok; @@ -765,7 +848,7 @@ do_terminate(Child, SupName) when Child#child.pid =/= undefined -> end, Child#child{pid = undefined}; do_terminate(Child, _SupName) -> - Child. + Child#child{pid = undefined}. %%----------------------------------------------------------------- %% Shutdowns a child. We must check the EXIT value @@ -866,7 +949,7 @@ terminate_dynamic_children(Child, Dynamics, SupName) -> TRef = erlang:start_timer(Time, self(), kill), wait_dynamic_children(Child, Pids, Sz, TRef, EStack0) end, - %% Unrool stacked errors and report them + %% Unroll stacked errors and report them ?DICT:fold(fun(Reason, Ls, _) -> report_error(shutdown_error, Reason, Child#child{pid=Ls}, SupName) @@ -885,7 +968,7 @@ monitor_dynamic_children(#child{restart_type=temporary}, Dynamics) -> end end, {?SETS:new(), ?DICT:new()}, Dynamics); monitor_dynamic_children(#child{restart_type=RType}, Dynamics) -> - ?DICT:fold(fun(P, _, {Pids, EStack}) -> + ?DICT:fold(fun(P, _, {Pids, EStack}) when is_pid(P) -> case monitor_child(P) of ok -> {?SETS:add_element(P, Pids), EStack}; @@ -893,7 +976,9 @@ monitor_dynamic_children(#child{restart_type=RType}, Dynamics) -> {Pids, EStack}; {error, Reason} -> {Pids, ?DICT:append(Reason, P, EStack)} - end + end; + (?restarting(_), _, {Pids, EStack}) -> + {Pids, EStack} end, {?SETS:new(), ?DICT:new()}, Dynamics). @@ -1020,13 +1105,20 @@ get_child(Name, State, _) -> lists:keysearch(Name, #child.name, State#state.children). get_dynamic_child(Pid, #state{children=[Child], dynamics=Dynamics}) -> - case is_dynamic_pid(Pid, dynamics_db(Child#child.restart_type, Dynamics)) of + DynamicsDb = dynamics_db(Child#child.restart_type, Dynamics), + case is_dynamic_pid(Pid, DynamicsDb) of true -> {value, Child#child{pid=Pid}}; false -> - case erlang:is_process_alive(Pid) of - true -> false; - false -> {value, Child} + RPid = restarting(Pid), + case is_dynamic_pid(RPid, DynamicsDb) of + true -> + {value, Child#child{pid=RPid}}; + false -> + case erlang:is_process_alive(Pid) of + true -> false; + false -> {value, Child} + end end end. diff --git a/lib/stdlib/test/Makefile b/lib/stdlib/test/Makefile index b36265302c..4de6ea3ee7 100644 --- a/lib/stdlib/test/Makefile +++ b/lib/stdlib/test/Makefile @@ -67,6 +67,7 @@ MODULES= \ string_SUITE \ supervisor_1 \ supervisor_2 \ + supervisor_deadlock \ naughty_child \ shell_SUITE \ supervisor_SUITE \ diff --git a/lib/stdlib/test/supervisor_SUITE.erl b/lib/stdlib/test/supervisor_SUITE.erl index 71b76c093f..767ae3d62c 100644 --- a/lib/stdlib/test/supervisor_SUITE.erl +++ b/lib/stdlib/test/supervisor_SUITE.erl @@ -21,7 +21,7 @@ -module(supervisor_SUITE). -include_lib("common_test/include/ct.hrl"). --define(TIMEOUT, 1000). +-define(TIMEOUT, ?t:minutes(1)). %% Testserver specific export -export([all/0, suite/0,groups/0,init_per_suite/1, end_per_suite/1, @@ -62,7 +62,8 @@ do_not_save_start_parameters_for_temporary_children/1, do_not_save_child_specs_for_temporary_children/1, simple_one_for_one_scale_many_temporary_children/1, - simple_global_supervisor/1]). + simple_global_supervisor/1, hanging_restart_loop/1, + hanging_restart_loop_simple/1]). %%------------------------------------------------------------------------- @@ -82,7 +83,7 @@ all() -> count_children_memory, do_not_save_start_parameters_for_temporary_children, do_not_save_child_specs_for_temporary_children, simple_one_for_one_scale_many_temporary_children, temporary_bystander, - simple_global_supervisor]. + simple_global_supervisor, hanging_restart_loop, hanging_restart_loop_simple]. groups() -> [{sup_start, [], @@ -111,10 +112,8 @@ groups() -> {restart_rest_for_one, [], [rest_for_one, rest_for_one_escalation]}]. -init_per_suite(Config0) -> - Config = lists:keydelete(watchdog, 1, Config0), - Dog = test_server:timetrap(?TIMEOUT), - [{watchdog, Dog} | Config]. +init_per_suite(Config) -> + Config. end_per_suite(_Config) -> ok. @@ -129,18 +128,21 @@ init_per_testcase(count_children_memory, Config) -> try erlang:memory() of _ -> erts_debug:set_internal_state(available_internal_state, true), - Config + Dog = ?t:timetrap(?TIMEOUT), + [{watchdog,Dog}|Config] catch error:notsup -> {skip, "+Meamin used during test; erlang:memory/1 not available"} end; init_per_testcase(_Case, Config) -> - erlang:display(_Case), - Config. + Dog = ?t:timetrap(?TIMEOUT), + [{watchdog,Dog}|Config]. -end_per_testcase(count_children_memory, _Config) -> +end_per_testcase(count_children_memory, Config) -> catch erts_debug:set_internal_state(available_internal_state, false), + ?t:timetrap_cancel(?config(watchdog,Config)), ok; -end_per_testcase(_Case, _Config) -> +end_per_testcase(_Case, Config) -> + ?t:timetrap_cancel(?config(watchdog,Config)), ok. start_link(InitResult) -> @@ -1455,6 +1457,102 @@ gen_server9212() -> %%------------------------------------------------------------------------- +%% Test that child and supervisor can be shutdown while hanging in restart loop. +%% See OTP-9549. +hanging_restart_loop(Config) when is_list(Config) -> + process_flag(trap_exit, true), + {ok, Pid} = start_link({ok, {{one_for_one, 8, 10}, []}}), + Child1 = {child1, {supervisor_deadlock, start_child, []}, + permanent, brutal_kill, worker, []}, + + %% Ets table with state read by supervisor_deadlock.erl + ets:new(supervisor_deadlock,[set,named_table,public]), + ets:insert(supervisor_deadlock,{fail_start,false}), + + {ok, CPid1} = supervisor:start_child(sup_test, Child1), + link(CPid1), + + ets:insert(supervisor_deadlock,{fail_start,true}), + supervisor_deadlock:restart_child(), + timer:sleep(2000), % allow restart to happen before proceeding + + {error, already_present} = supervisor:start_child(sup_test, Child1), + {error, restarting} = supervisor:restart_child(sup_test, child1), + {error, restarting} = supervisor:delete_child(sup_test, child1), + [{child1,restarting,worker,[]}] = supervisor:which_children(sup_test), + [1,0,0,1] = get_child_counts(sup_test), + + ok = supervisor:terminate_child(sup_test, child1), + check_exit_reason(CPid1, error), + [{child1,undefined,worker,[]}] = supervisor:which_children(sup_test), + + ets:insert(supervisor_deadlock,{fail_start,false}), + {ok, CPid2} = supervisor:restart_child(sup_test, child1), + link(CPid2), + + ets:insert(supervisor_deadlock,{fail_start,true}), + supervisor_deadlock:restart_child(), + timer:sleep(2000), % allow restart to happen before proceeding + + %% Terminating supervisor. + %% OTP-9549 fixes so this does not give a timetrap timeout - + %% i.e. that supervisor does not hang in restart loop. + terminate(Pid,shutdown), + + %% Check that child died with reason from 'restart' request above + check_exit_reason(CPid2, error), + undefined = whereis(sup_test), + ok. + +%%------------------------------------------------------------------------- +%% Test that child and supervisor can be shutdown while hanging in +%% restart loop, simple_one_for_one. +%% See OTP-9549. +hanging_restart_loop_simple(Config) when is_list(Config) -> + process_flag(trap_exit, true), + Child1 = {child1, {supervisor_deadlock, start_child, []}, + permanent, brutal_kill, worker, []}, + {ok, Pid} = start_link({ok, {{simple_one_for_one, 8, 10}, [Child1]}}), + + %% Ets table with state read by supervisor_deadlock.erl + ets:new(supervisor_deadlock,[set,named_table,public]), + ets:insert(supervisor_deadlock,{fail_start,false}), + + {ok, CPid1} = supervisor:start_child(sup_test, []), + link(CPid1), + + ets:insert(supervisor_deadlock,{fail_start,true}), + supervisor_deadlock:restart_child(), + timer:sleep(2000), % allow restart to happen before proceeding + + {error, simple_one_for_one} = supervisor:restart_child(sup_test, child1), + {error, simple_one_for_one} = supervisor:delete_child(sup_test, child1), + [{undefined,restarting,worker,[]}] = supervisor:which_children(sup_test), + [1,0,0,1] = get_child_counts(sup_test), + + ok = supervisor:terminate_child(sup_test, CPid1), + check_exit_reason(CPid1, error), + [] = supervisor:which_children(sup_test), + + ets:insert(supervisor_deadlock,{fail_start,false}), + {ok, CPid2} = supervisor:start_child(sup_test, []), + link(CPid2), + + ets:insert(supervisor_deadlock,{fail_start,true}), + supervisor_deadlock:restart_child(), + timer:sleep(2000), % allow restart to happen before proceeding + + %% Terminating supervisor. + %% OTP-9549 fixes so this does not give a timetrap timeout - + %% i.e. that supervisor does not hang in restart loop. + terminate(Pid,shutdown), + + %% Check that child died with reason from 'restart' request above + check_exit_reason(CPid2, error), + undefined = whereis(sup_test), + ok. + +%%------------------------------------------------------------------------- terminate(Pid, Reason) when Reason =/= supervisor -> terminate(dummy, Pid, dummy, Reason). diff --git a/lib/stdlib/test/supervisor_deadlock.erl b/lib/stdlib/test/supervisor_deadlock.erl new file mode 100644 index 0000000000..288547a972 --- /dev/null +++ b/lib/stdlib/test/supervisor_deadlock.erl @@ -0,0 +1,45 @@ +-module(supervisor_deadlock). +-compile(export_all). + + +%%%----------------------------------------------------------------- +%%% gen_server callbacks +init([child]) -> + case ets:lookup(supervisor_deadlock,fail_start) of + [{fail_start, false}] -> + %% we must not fail on the first init, otherwise supervisor + %% terminates immediately + {ok, []}; + [{fail_start, true}] -> + %% Restart frequency is MaxR=8, MaxT=10, so this will + %% ensure that restart intensity is not reached -> restart + %% loop + timer:sleep(2000), % NOTE: this could be a gen_server call timeout + + {stop, error} + end. + +handle_call(_Req, _From, State) -> + {reply, ok, State}. + +%% Force a restart +handle_cast(restart, State) -> + {stop, error, State}. + +handle_info(_Msg, State) -> + {noreply, State}. + +terminate(_Reason, _State) -> + ok. + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + + +%%%----------------------------------------------------------------- +%%% Start child +start_child() -> + gen_server:start_link({local, ?MODULE}, ?MODULE, [child], []). + +restart_child() -> + gen_server:cast(supervisor_deadlock, restart). |