From feecf39417a1a5f2114a5fc18c7e0207fb642eee Mon Sep 17 00:00:00 2001 From: joewilliams Date: Thu, 9 Jun 2011 07:58:01 -0700 Subject: General improvements to release_handler_1:get_supervised_procs The core issues this patch attempts to solve is two fold, 1) have release_handler_1 act slightly differently in two corner cases and 2) clean up the code in get_supervised_procs/0 to remove nested cases and etc. Regarding #1, get_supervised_procs/0 will now call functions to test to see if the supervisor is suspended before attempting to ask it for a list of children. It now will print an error message regarding the suspended supervisor and produce an error that will cause the VM to restart. Previously it would timeout attempting the call to which_children and the VM would restart without any details regarding the reason. The second corner case is if in a child specification a supervisor is incorrectly stated to be a worker and get_modules is called against it. A timeout will occur causing a VM restart. Similar to the last corner case in this patch an error message is printed and an error is emitted causing a VM restart. When first looking into the issue it was hard to discover why my upgrades where failing. All I received during the upgrade process was a timeout and a VM restart, no other information. This patch should help users track down issues like these. Regarding #2, due to the above confusion in trying to figure out what had happened I dug into the code and started tracing it through and found that the nested case statements and etc made it confusing. So I started to rework and clean up, hopefully making this code path clearer to future readers. --- lib/sasl/test/release_handler_SUITE.erl | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) (limited to 'lib/sasl/test/release_handler_SUITE.erl') diff --git a/lib/sasl/test/release_handler_SUITE.erl b/lib/sasl/test/release_handler_SUITE.erl index efa775f344..c850e339bc 100644 --- a/lib/sasl/test/release_handler_SUITE.erl +++ b/lib/sasl/test/release_handler_SUITE.erl @@ -55,7 +55,8 @@ win32_cases() -> %% Cases that can be run on all platforms cases() -> - [otp_2740, otp_2760, otp_5761, instructions, eval_appup]. + [otp_2740, otp_2760, otp_5761, instructions, eval_appup, + supervisor_which_children_timeout]. groups() -> [{release,[], @@ -512,6 +513,29 @@ no_cc() -> %%% Testing of reported bugs and other tickets. %%%----------------------------------------------------------------- +%%----------------------------------------------------------------- +%% release_handler_1:get_supervised_procs/0 test +%%----------------------------------------------------------------- +supervisor_which_children_timeout(Conf) -> + PrivDir = priv_dir(Conf), + Dir = filename:join(PrivDir,"supervisor_which_children_timeout"), + DataDir = ?config(data_dir,Conf), + LibDir = filename:join([DataDir,release_handler_timeouts]), + + Rel1 = create_and_install_fake_first_release(Dir,[{dummy,"0.1",LibDir}]), + + {ok, Node} = t_start_node(supervisor_which_children_timeout, Rel1, []), + Proc = rpc:call(Node, erlang, whereis, [dummy_sup_2]), + ok = rpc:call(Node, sys, suspend, [Proc]), + Result = {badrpc, {'EXIT', {suspended_supervisor, _}}} = + rpc:call(Node, release_handler_1, get_supervised_procs, []), + ?t:format("release_handler_1:get_supervised_procs/0: ~p~n", [Result]), + + ok. + +supervisor_which_children_timeout(cleanup, Conf) -> + stop_node(node_name(supervisor_which_children_timeout)). + %%----------------------------------------------------------------- %% Ticket: OTP-2740 %% Slogan: vsn not numeric doesn't work so good in release_handling -- cgit v1.2.3