From 397e69fa95457f9660831ba1e551c784eb61e93e Mon Sep 17 00:00:00 2001 From: Dan Gudmundsson Date: Tue, 18 Feb 2014 17:21:55 +0100 Subject: kernel: Fix an application terminate deadlock possibility Could happen if get_child was called during terminate. io (since it is a group_leader) also causes problems after get_child was called. Split up and do it async. --- lib/kernel/src/application_master.erl | 38 +++++++++++++--------- lib/kernel/test/application_SUITE.erl | 33 ++++++++++++++++--- .../application_SUITE_data/deadlock/deadlock.app | 2 +- .../application_SUITE_data/deadlock/deadlock.erl | 12 ++++++- 4 files changed, 64 insertions(+), 21 deletions(-) diff --git a/lib/kernel/src/application_master.erl b/lib/kernel/src/application_master.erl index 68f78c6eb8..bc15b5a7de 100644 --- a/lib/kernel/src/application_master.erl +++ b/lib/kernel/src/application_master.erl @@ -1,7 +1,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 1996-2013. All Rights Reserved. +%% Copyright Ericsson AB 1996-2014. All Rights Reserved. %% %% The contents of this file are subject to the Erlang Public License, %% Version 1.1, (the "License"); you may not use this file except in @@ -27,7 +27,7 @@ -include("application_master.hrl"). --record(state, {child, appl_data, children = [], procs = 0, gleader}). +-record(state, {child, appl_data, children = [], procs = 0, gleader, req=[]}). %%----------------------------------------------------------------- %% Func: start_link/1 @@ -205,22 +205,25 @@ terminate_loop(Child, State) -> %%----------------------------------------------------------------- %% The Application Master is linked to *all* processes in the group -%% (application). +%% (application). %%----------------------------------------------------------------- handle_msg({get_child, Tag, From}, State) -> - From ! {Tag, get_child_i(State#state.child)}, - State; + get_child_i(State, Tag, From); handle_msg({stop, Tag, From}, State) -> catch terminate(normal, State), From ! {Tag, ok}, exit(normal); +handle_msg({child, Ref, GrandChild, Mod}, #state{req=Reqs0}=State) -> + {value, {_, Tag, From}, Reqs} = lists:keytake(Ref, 1, Reqs0), + From ! {Tag, {GrandChild, Mod}}, + State#state{req=Reqs}; handle_msg(_, State) -> State. - -terminate(Reason, State) -> - terminate_child(State#state.child, State), - kill_children(State#state.children), +terminate(Reason, State = #state{child=Child, children=Children, req=Reqs}) -> + _ = [From ! {Tag, error} || {_, Tag, From} <- Reqs], + terminate_child(Child, State), + kill_children(Children), exit(Reason). @@ -342,8 +345,8 @@ start_supervisor(Type, M, A) -> loop_it(Parent, Child, Mod, AppState) -> receive - {Parent, get_child} -> - Parent ! {self(), Child, Mod}, + {Parent, get_child, Ref} -> + Parent ! {child, Ref, Child, Mod}, loop_it(Parent, Child, Mod, AppState); {Parent, terminate} -> NewAppState = prep_stop(Mod, AppState), @@ -382,10 +385,15 @@ prep_stop(Mod, AppState) -> NewAppState end. -get_child_i(Child) -> - Child ! {self(), get_child}, - receive - {Child, GrandChild, Mod} -> {GrandChild, Mod} +get_child_i(#state{child=Child, req=Reqs}=State, Tag, From) -> + Ref = erlang:make_ref(), + case erlang:is_process_alive(Child) of + true -> + Child ! {self(), get_child, Ref}, + State#state{req=[{Ref, Tag, From}|Reqs]}; + false -> + From ! {Tag, error}, + State end. terminate_child_i(Child, State) -> diff --git a/lib/kernel/test/application_SUITE.erl b/lib/kernel/test/application_SUITE.erl index ff62297f2d..ccb3760309 100644 --- a/lib/kernel/test/application_SUITE.erl +++ b/lib/kernel/test/application_SUITE.erl @@ -1,7 +1,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 1996-2013. All Rights Reserved. +%% Copyright Ericsson AB 1996-2014. All Rights Reserved. %% %% The contents of this file are subject to the Erlang Public License, %% Version 1.1, (the "License"); you may not use this file except in @@ -36,7 +36,7 @@ -export([config_change/1, persistent_env/1, distr_changed_tc1/1, distr_changed_tc2/1, ensure_started/1, ensure_all_started/1, - shutdown_func/1, do_shutdown/1, shutdown_timeout/1]). + shutdown_func/1, do_shutdown/1, shutdown_timeout/1, shutdown_deadlock/1]). -define(TESTCASE, testcase_name). -define(testcase, ?config(?TESTCASE, Config)). @@ -54,6 +54,7 @@ all() -> script_start, nodedown_start, permit_false_start_local, permit_false_start_dist, get_key, get_env, ensure_all_started, {group, distr_changed}, config_change, shutdown_func, shutdown_timeout, + shutdown_deadlock, persistent_env]. groups() -> @@ -961,7 +962,7 @@ nodedown_start(Conf) when is_list(Conf) -> ensure_started(suite) -> []; ensure_started(doc) -> ["Test application:ensure_started/1."]; -ensure_started(Conf) -> +ensure_started(_Conf) -> {ok, Fd} = file:open("app1.app", [write]), w_app1(Fd), @@ -981,7 +982,7 @@ ensure_started(Conf) -> ensure_all_started(suite) -> []; ensure_all_started(doc) -> ["Test application:ensure_all_started/1-2."]; -ensure_all_started(Conf) -> +ensure_all_started(_Conf) -> {ok, Fd1} = file:open("app1.app", [write]), w_app1(Fd1), @@ -2096,7 +2097,31 @@ shutdown_timeout(Config) when is_list(Config) -> end, ok. +%%%----------------------------------------------------------------- +%%% Provokes a (previous) application shutdown deadlock +%%%----------------------------------------------------------------- +shutdown_deadlock(Config) when is_list(Config) -> + DataDir = ?config(data_dir,Config), + code:add_path(filename:join([DataDir,deadlock])), + %% ok = rpc:call(Cp1, application, start, [sasl]), + ok = application:start(deadlock), + Tester = self(), + application:set_env(deadlock, fail_stop, Tester), + spawn(fun() -> Tester ! {stop, application:stop(deadlock)} end), + receive + {deadlock, Server} -> + spawn(fun() -> + Master = application_controller:get_master(deadlock), + Child = application_master:get_child(Master), + Tester ! {child, Child} + end), + timer:sleep(100), + erlang:display({self(), "Sending Continue", Server}), + Server ! continue + end, + [_|_] = application:which_applications(), + ok. %%----------------------------------------------------------------- diff --git a/lib/kernel/test/application_SUITE_data/deadlock/deadlock.app b/lib/kernel/test/application_SUITE_data/deadlock/deadlock.app index 0c1001bed6..233c7a3f76 100644 --- a/lib/kernel/test/application_SUITE_data/deadlock/deadlock.app +++ b/lib/kernel/test/application_SUITE_data/deadlock/deadlock.app @@ -4,5 +4,5 @@ {applications, [kernel, stdlib, sasl]}, {modules, [deadlock]}, {mod, {deadlock, []}}, - {env, [{fail_start, false}]} + {env, [{fail_start, false}, {fail_stop, false}]} ]}. diff --git a/lib/kernel/test/application_SUITE_data/deadlock/deadlock.erl b/lib/kernel/test/application_SUITE_data/deadlock/deadlock.erl index 5f68bf9078..3ef6105371 100644 --- a/lib/kernel/test/application_SUITE_data/deadlock/deadlock.erl +++ b/lib/kernel/test/application_SUITE_data/deadlock/deadlock.erl @@ -21,7 +21,7 @@ init([sup]) -> {ok, {{one_for_one, 5, 10}, [ { sasl_syslog_dm, {?MODULE, start_link, []}, - permanent, brutal_kill, worker, + permanent, 25000, worker, [deadlock] } ]}}; @@ -32,6 +32,8 @@ init([sup]) -> init([child]) -> case application:get_env(deadlock, fail_start) of {ok, false} -> + process_flag(trap_exit, true), + io:format("~p: Traps exit~n",[?MODULE]), %% we must not fail on the first init, otherwise supervisor %% terminates immediately {ok, []}; @@ -50,6 +52,14 @@ handle_info(_Msg, State) -> {noreply, State}. terminate(_Reason, _State) -> + case application:get_env(deadlock, fail_stop) of + {ok, false} -> ok; + {ok, Tester} -> + Tester ! {deadlock, self()}, + io:format("~p: Waiting in terminate (~p)~n",[?MODULE,Tester]), + receive continue -> ok end + end, + io:format("~p: terminates~n", [?MODULE]), ok. code_change(_OldVsn, State, _Extra) -> -- cgit v1.2.3