aboutsummaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
authorDan Gudmundsson <[email protected]>2015-02-10 14:43:19 +0100
committerDan Gudmundsson <[email protected]>2015-02-10 14:52:16 +0100
commita0f9bb79004eeacc327d0bfaa891f0cbd71b3f04 (patch)
tree9374618f03b0e64e68a8cae0230974a9a6a61135 /lib
parent33156218401ba0ba489d0d787367af82f56c5f3c (diff)
parent70c4db1d0b57363e33a04c935b653092f68cb91a (diff)
downloadotp-a0f9bb79004eeacc327d0bfaa891f0cbd71b3f04.tar.gz
otp-a0f9bb79004eeacc327d0bfaa891f0cbd71b3f04.tar.bz2
otp-a0f9bb79004eeacc327d0bfaa891f0cbd71b3f04.zip
Merge branch 'dumbbell/mnesia-hang-if-remote-stopped-after-proto-negotiation' into maint
* dumbbell/mnesia-hang-if-remote-stopped-after-proto-negotiation: mnesia: Check nodes after protocol negotiation OTP-12473
Diffstat (limited to 'lib')
-rw-r--r--lib/mnesia/src/mnesia_recover.erl31
1 files changed, 28 insertions, 3 deletions
diff --git a/lib/mnesia/src/mnesia_recover.erl b/lib/mnesia/src/mnesia_recover.erl
index b6492707e2..eeb4fa0ced 100644
--- a/lib/mnesia/src/mnesia_recover.erl
+++ b/lib/mnesia/src/mnesia_recover.erl
@@ -689,12 +689,29 @@ handle_call({connect_nodes, Ns}, From, State) ->
%% called from handle_info
gen_server:reply(From, {[], AlreadyConnected}),
{noreply, State};
- GoodNodes ->
+ ProbablyGoodNodes ->
%% Now we have agreed upon a protocol with some new nodes
- %% and we may use them when we recover transactions
+ %% and we may use them when we recover transactions.
+ %%
+ %% Just in case Mnesia was stopped on some of those nodes
+ %% between the protocol negotiation and now, we check one
+ %% more time the state of Mnesia.
+ %%
+ %% Of course, there is still a chance that mnesia_down
+ %% events occur during this check and we miss them. To
+ %% prevent it, handle_cast({mnesia_down, ...}, ...) removes
+ %% the down node again, in addition to mnesia_down/1.
+ %%
+ %% See a comment in handle_cast({mnesia_down, ...}, ...).
+ Verify = fun(N) ->
+ Run = mnesia_lib:is_running(N),
+ Run =:= yes orelse Run =:= starting
+ end,
+ GoodNodes = [N || N <- ProbablyGoodNodes, Verify(N)],
+
mnesia_lib:add_list(recover_nodes, GoodNodes),
cast({announce_all, GoodNodes}),
- case get_master_nodes(schema) of
+ case get_master_nodes(schema) of
[] ->
Context = starting_partitioned_network,
mnesia_monitor:detect_inconcistency(GoodNodes, Context);
@@ -842,6 +859,14 @@ handle_cast({what_decision, Node, OtherD}, State) ->
{noreply, State};
handle_cast({mnesia_down, Node}, State) ->
+ %% The node was already removed from recover_nodes in mnesia_down/1,
+ %% but we do it again here in the mnesia_recover process, in case
+ %% another event incorrectly added it back. This can happen during
+ %% Mnesia startup which takes time betweenthe connection, the
+ %% protocol negotiation and the merge of the schema.
+ %%
+ %% See a comment in handle_call({connect_nodes, ...), ...).
+ mnesia_lib:del(recover_nodes, Node),
case State#state.unclear_decision of
undefined ->
{noreply, State};