diff options
8 files changed, 217 insertions, 33 deletions
diff --git a/lib/sasl/src/release_handler_1.erl b/lib/sasl/src/release_handler_1.erl index ef95606bb5..8d0baf3ab1 100644 --- a/lib/sasl/src/release_handler_1.erl +++ b/lib/sasl/src/release_handler_1.erl @@ -21,7 +21,7 @@ %% External exports -export([eval_script/1, eval_script/5, check_script/2, check_old_processes/2]). --export([get_current_vsn/1]). %% exported because used in a test case +-export([get_current_vsn/1, get_supervised_procs/0]). %% exported because used in a test case -record(eval_state, {bins = [], stopped = [], suspended = [], apps = [], libdirs, unpurged = [], vsns = [], newlibs = [], @@ -493,6 +493,19 @@ start(Procs) -> %% supervisor module, we should load the new version, and then %% delete the old. Then we should perform the start changes %% manually, by adding/deleting children. +%% +%% Recent changes to this code cause the upgrade error out and +%% log the case where a suspended supervisor has which_children +%% called against it. This retains the behavior of causing a VM +%% restart to the *old* version of a release but has the +%% advantage of logging the pid and supervisor that had the +%% issue. +%% +%% A second case where this can occur is if a child spec is +%% incorrect and get_modules is called against a process that +%% can't respond to the gen:call. Again an error is logged, +%% an error returned and a VM restart is issued. +%% %% Returns: [{SuperPid, ChildName, ChildPid, Mods}] %%----------------------------------------------------------------- %% OTP-3452. For each application the first item contains the pid @@ -502,49 +515,81 @@ start(Procs) -> get_supervised_procs() -> lists:foldl( fun(Application, Procs) -> - case application_controller:get_master(Application) of - Pid when is_pid(Pid) -> - {Root, _AppMod} = application_master:get_child(Pid), - case get_supervisor_module(Root) of - {ok, SupMod} -> - get_procs(supervisor:which_children(Root), - Root) ++ - [{undefined, undefined, Root, [SupMod]} | - Procs]; - {error, _} -> - error_logger:error_msg("release_handler: " - "cannot find top " - "supervisor for " - "application ~w~n", - [Application]), - get_procs(supervisor:which_children(Root), - Root) ++ Procs - end; - _ -> Procs - end + get_master_procs(Application, + Procs, + application_controller:get_master(Application)) end, [], - lists:map(fun({Application, _Name, _Vsn}) -> - Application - end, - application:which_applications())). + get_application_names()). + +get_supervised_procs(_, Root, Procs, {ok, SupMod}) -> + get_procs(maybe_supervisor_which_children(get_proc_state(Root), SupMod, Root), Root) ++ + [{undefined, undefined, Root, [SupMod]} | Procs]; +get_supervised_procs(Application, Root, Procs, {error, _}) -> + error_logger:error_msg("release_handler: cannot find top supervisor for " + "application ~w~n", [Application]), + get_procs(maybe_supervisor_which_children(get_proc_state(Root), Application, Root), Root) ++ Procs. + +get_application_names() -> + lists:map(fun({Application, _Name, _Vsn}) -> + Application + end, + application:which_applications()). + +get_master_procs(Application, Procs, Pid) when is_pid(Pid) -> + {Root, _AppMod} = application_master:get_child(Pid), + get_supervised_procs(Application, Root, Procs, get_supervisor_module(Root)); +get_master_procs(_, Procs, _) -> + Procs. get_procs([{Name, Pid, worker, dynamic} | T], Sup) when is_pid(Pid) -> - Mods = get_dynamic_mods(Pid), + Mods = maybe_get_dynamic_mods(Name, Pid), [{Sup, Name, Pid, Mods} | get_procs(T, Sup)]; get_procs([{Name, Pid, worker, Mods} | T], Sup) when is_pid(Pid), is_list(Mods) -> [{Sup, Name, Pid, Mods} | get_procs(T, Sup)]; get_procs([{Name, Pid, supervisor, Mods} | T], Sup) when is_pid(Pid) -> - [{Sup, Name, Pid, Mods} | get_procs(T, Sup)] ++ - get_procs(supervisor:which_children(Pid), Pid); + [{Sup, Name, Pid, Mods} | get_procs(T, Sup)] ++ + get_procs(maybe_supervisor_which_children(get_proc_state(Pid), Name, Pid), Pid); get_procs([_H | T], Sup) -> get_procs(T, Sup); get_procs(_, _Sup) -> []. -get_dynamic_mods(Pid) -> - {ok,Res} = gen:call(Pid, self(), get_modules), - Res. +get_proc_state(Proc) -> + {status, _, {module, _}, [_, State, _, _, _]} = sys:get_status(Proc), + State. + +maybe_supervisor_which_children(suspended, Name, Pid) -> + error_logger:error_msg("release_handler: a which_children call" + " to ~p (~p) was avoided. This supervisor" + " is suspended and should likely be upgraded" + " differently. Exiting ...~n", [Name, Pid]), + error(suspended_supervisor); + +maybe_supervisor_which_children(State, Name, Pid) -> + case catch supervisor:which_children(Pid) of + Res when is_list(Res) -> + Res; + Other -> + error_logger:error_msg("release_handler: ~p~nerror during" + " a which_children call to ~p (~p)." + " [State: ~p] Exiting ... ~n", + [Other, Name, Pid, State]), + error(which_children_failed) + end. + +maybe_get_dynamic_mods(Name, Pid) -> + case catch gen:call(Pid, self(), get_modules) of + {ok, Res} -> + Res; + Other -> + error_logger:error_msg("release_handler: ~p~nerror during a" + " get_modules call to ~p (~p)," + " there may be an error in it's" + " childspec. Exiting ...~n", + [Other, Name, Pid]), + error(get_modules_failed) + end. %% XXXX %% Note: The following is a terrible hack done in order to resolve the diff --git a/lib/sasl/test/release_handler_SUITE.erl b/lib/sasl/test/release_handler_SUITE.erl index b44da72d35..af2183bfff 100644 --- a/lib/sasl/test/release_handler_SUITE.erl +++ b/lib/sasl/test/release_handler_SUITE.erl @@ -58,7 +58,7 @@ cases() -> [otp_2740, otp_2760, otp_5761, otp_9402, otp_9417, otp_9395_check_old_code, otp_9395_check_and_purge, otp_9395_update_many_mods, otp_9395_rm_many_mods, - instructions, eval_appup]. + instructions, eval_appup, supervisor_which_children_timeout]. groups() -> [{release,[], @@ -523,6 +523,29 @@ no_cc() -> %%%----------------------------------------------------------------- %%----------------------------------------------------------------- +%% release_handler_1:get_supervised_procs/0 test +%%----------------------------------------------------------------- +supervisor_which_children_timeout(Conf) -> + PrivDir = priv_dir(Conf), + Dir = filename:join(PrivDir,"supervisor_which_children_timeout"), + DataDir = ?config(data_dir,Conf), + LibDir = filename:join([DataDir,release_handler_timeouts]), + + Rel1 = create_and_install_fake_first_release(Dir,[{dummy,"0.1",LibDir}]), + + {ok, Node} = t_start_node(supervisor_which_children_timeout, Rel1, []), + Proc = rpc:call(Node, erlang, whereis, [dummy_sup_2]), + ok = rpc:call(Node, sys, suspend, [Proc]), + Result = {badrpc, {'EXIT', {suspended_supervisor, _}}} = + rpc:call(Node, release_handler_1, get_supervised_procs, []), + ?t:format("release_handler_1:get_supervised_procs/0: ~p~n", [Result]), + + ok. + +supervisor_which_children_timeout(cleanup, Conf) -> + stop_node(node_name(supervisor_which_children_timeout)). + +%%----------------------------------------------------------------- %% Ticket: OTP-2740 %% Slogan: vsn not numeric doesn't work so good in release_handling %%----------------------------------------------------------------- diff --git a/lib/sasl/test/release_handler_SUITE_data/Makefile.src b/lib/sasl/test/release_handler_SUITE_data/Makefile.src index 9b07e7ce0a..edb446413d 100644 --- a/lib/sasl/test/release_handler_SUITE_data/Makefile.src +++ b/lib/sasl/test/release_handler_SUITE_data/Makefile.src @@ -64,8 +64,13 @@ C= \ c/b.@EMULATOR@ \ c/c_sup.@EMULATOR@ +SUP= \ + release_handler_timeouts/dummy-0.1/ebin/dummy_app.@EMULATOR@ \ + release_handler_timeouts/dummy-0.1/ebin/dummy_server.@EMULATOR@ \ + release_handler_timeouts/dummy-0.1/ebin/dummy_sup.@EMULATOR@ \ + release_handler_timeouts/dummy-0.1/ebin/dummy_sup_2.@EMULATOR@ -all: $(P2B) $(LIB) $(APP) $(OTP2740) $(C) +all: $(P2B) $(LIB) $(APP) $(OTP2740) $(C) $(SUP) P2B/a-2.0/ebin/a.@EMULATOR@: P2B/a-2.0/src/a.erl erlc $(EFLAGS) -oP2B/a-2.0/ebin P2B/a-2.0/src/a.erl @@ -195,3 +200,12 @@ c/b.@EMULATOR@: c/b.erl erlc $(EFLAGS) -oc c/b.erl c/c_sup.@EMULATOR@: c/c_sup.erl erlc $(EFLAGS) -oc c/c_sup.erl + +release_handler_timeouts/dummy-0.1/ebin/dummy_app.@EMULATOR@: release_handler_timeouts/dummy-0.1/src/dummy_app.erl + erlc $(EFLAGS) -orelease_handler_timeouts/dummy-0.1/ebin release_handler_timeouts/dummy-0.1/src/dummy_app.erl +release_handler_timeouts/dummy-0.1/ebin/dummy_server.@EMULATOR@: release_handler_timeouts/dummy-0.1/src/dummy_server.erl + erlc $(EFLAGS) -orelease_handler_timeouts/dummy-0.1/ebin release_handler_timeouts/dummy-0.1/src/dummy_server.erl +release_handler_timeouts/dummy-0.1/ebin/dummy_sup.@EMULATOR@: release_handler_timeouts/dummy-0.1/src/dummy_sup.erl + erlc $(EFLAGS) -orelease_handler_timeouts/dummy-0.1/ebin release_handler_timeouts/dummy-0.1/src/dummy_sup.erl +release_handler_timeouts/dummy-0.1/ebin/dummy_sup_2.@EMULATOR@: release_handler_timeouts/dummy-0.1/src/dummy_sup_2.erl + erlc $(EFLAGS) -orelease_handler_timeouts/dummy-0.1/ebin release_handler_timeouts/dummy-0.1/src/dummy_sup_2.erl diff --git a/lib/sasl/test/release_handler_SUITE_data/release_handler_timeouts/dummy-0.1/ebin/dummy.app b/lib/sasl/test/release_handler_SUITE_data/release_handler_timeouts/dummy-0.1/ebin/dummy.app new file mode 100644 index 0000000000..9efdc2e5da --- /dev/null +++ b/lib/sasl/test/release_handler_SUITE_data/release_handler_timeouts/dummy-0.1/ebin/dummy.app @@ -0,0 +1,7 @@ +{application,dummy, + [{description,"a dummy app"}, + {vsn,"0.1"}, + {registered,[dummy_app]}, + {mod,{dummy_app,[]}}, + {applications,[kernel,stdlib,sasl]}, + {modules,[dummy_app,dummy_server,dummy_sup,dummy_sup_2]}]}.
\ No newline at end of file diff --git a/lib/sasl/test/release_handler_SUITE_data/release_handler_timeouts/dummy-0.1/src/dummy_app.erl b/lib/sasl/test/release_handler_SUITE_data/release_handler_timeouts/dummy-0.1/src/dummy_app.erl new file mode 100644 index 0000000000..51363b3630 --- /dev/null +++ b/lib/sasl/test/release_handler_SUITE_data/release_handler_timeouts/dummy-0.1/src/dummy_app.erl @@ -0,0 +1,9 @@ +-module(dummy_app). +-behaviour(application). + +-export([start/2, stop/1]). + +start(_,_) -> + dummy_sup:start_link(). + +stop(_) -> ok. diff --git a/lib/sasl/test/release_handler_SUITE_data/release_handler_timeouts/dummy-0.1/src/dummy_server.erl b/lib/sasl/test/release_handler_SUITE_data/release_handler_timeouts/dummy-0.1/src/dummy_server.erl new file mode 100644 index 0000000000..382251eba7 --- /dev/null +++ b/lib/sasl/test/release_handler_SUITE_data/release_handler_timeouts/dummy-0.1/src/dummy_server.erl @@ -0,0 +1,56 @@ +-module(dummy_server). +-behaviour(gen_server). + +-export([start_link/0, set_state/1, get_state/0]). + +-export([init/1, + handle_call/3, + handle_cast/2, + handle_info/2, + terminate/2, + code_change/3]). + +%% + +start_link() -> + gen_server:start_link({local, ?MODULE}, ?MODULE, [], []). + +set_state(What) -> + gen_server:call(?MODULE, {set_state, What}). + +get_state() -> + gen_server:call(?MODULE, get_state). + + +%% + +init([]) -> + say("init, setting state to 0", []), + {ok, 0}. + + +handle_call({set_state, NewState}, _From, _State) -> + {reply, {ok, NewState}, NewState}; + +handle_call(get_state, _From, State) -> + {reply, State, State}. + +handle_cast('__not_implemented', State) -> + {noreply, State}. + +handle_info(_Info, State) -> + say("info ~p, ~p.", [_Info, State]), + {noreply, State}. + +terminate(_Reason, _State) -> + say("terminate ~p, ~p", [_Reason, _State]), + ok. + +code_change(_OldVsn, State, _Extra) -> + say("code_change ~p, ~p, ~p", [_OldVsn, State, _Extra]), + {ok, State}. + +%% Internal + +say(Format, Data) -> + io:format("~p:~p: ~s~n", [?MODULE, self(), io_lib:format(Format, Data)]). diff --git a/lib/sasl/test/release_handler_SUITE_data/release_handler_timeouts/dummy-0.1/src/dummy_sup.erl b/lib/sasl/test/release_handler_SUITE_data/release_handler_timeouts/dummy-0.1/src/dummy_sup.erl new file mode 100644 index 0000000000..3d7b5060df --- /dev/null +++ b/lib/sasl/test/release_handler_SUITE_data/release_handler_timeouts/dummy-0.1/src/dummy_sup.erl @@ -0,0 +1,15 @@ +-module(dummy_sup). +-behaviour(supervisor). + +-export([start_link/0]). +-export([init/1]). + +start_link() -> + supervisor:start_link({local, ?MODULE}, ?MODULE, []). + +init([]) -> + DummySup2 = {dummy_sup_2, + {dummy_sup_2, start_link, []}, + permanent, 5000, supervisor, [dummy_sup_2]}, + + {ok, {{one_for_one, 10, 10}, [DummySup2]}}. diff --git a/lib/sasl/test/release_handler_SUITE_data/release_handler_timeouts/dummy-0.1/src/dummy_sup_2.erl b/lib/sasl/test/release_handler_SUITE_data/release_handler_timeouts/dummy-0.1/src/dummy_sup_2.erl new file mode 100644 index 0000000000..d936cbcbd6 --- /dev/null +++ b/lib/sasl/test/release_handler_SUITE_data/release_handler_timeouts/dummy-0.1/src/dummy_sup_2.erl @@ -0,0 +1,15 @@ +-module(dummy_sup_2). +-behaviour(supervisor). + +-export([start_link/0]). +-export([init/1]). + +start_link() -> + supervisor:start_link({local, ?MODULE}, ?MODULE, []). + +init([]) -> + Dummy = {dummy_server, + {dummy_server, start_link, []}, + permanent, 5000, worker, [dummy_server]}, + + {ok, {{one_for_one, 10, 10}, [Dummy]}}. |