aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDan Gudmundsson <[email protected]>2013-10-10 10:55:46 +0200
committerDan Gudmundsson <[email protected]>2013-11-25 12:33:16 +0100
commit9237801d22a38d2643ffe94ab626c4d2815012dd (patch)
tree7ffb6c1af05db1c45d30f6ce72f8548934f52b60
parent3abf1b5ef82478b152581152ad3ec749e8b7edaa (diff)
downloadotp-9237801d22a38d2643ffe94ab626c4d2815012dd.tar.gz
otp-9237801d22a38d2643ffe94ab626c4d2815012dd.tar.bz2
otp-9237801d22a38d2643ffe94ab626c4d2815012dd.zip
mnesia: Synchronize lock cleanup after mnesia down
Bad timing could lead to hanging transactions after a mnesia down from a node with sticky locks. Excellent bug report from janchochol Situation: * node A and B have copies of table T * node A ows sticky of table T * node A goes down (e.g. crash) * node B tries to perform transactional operation on table T (e.g. mnesia:select) In this situation there is possibility that first (and maybe other) transaction on node B will hang indefinitely. This is caused by race condition, when transaction process send lock request operation to node A and waits for reply. When node A is down it will never send reply, so process on node B will be stuck forever. Reason is that message sent to mnesia_locker gen_server from mnesia_locker:mnesia_down can be received after mnesia_locker gen_server already replies to transaction processes with {switch, N, Req} and node N is down. Monitoring remote process when sending request to other node should be safe solution.
-rw-r--r--lib/mnesia/src/mnesia.appup.src14
-rw-r--r--lib/mnesia/src/mnesia_locker.erl32
-rw-r--r--lib/mnesia/src/mnesia_monitor.erl6
-rw-r--r--lib/mnesia/src/mnesia_tm.erl6
4 files changed, 24 insertions, 34 deletions
diff --git a/lib/mnesia/src/mnesia.appup.src b/lib/mnesia/src/mnesia.appup.src
index 355aafb215..c245299740 100644
--- a/lib/mnesia/src/mnesia.appup.src
+++ b/lib/mnesia/src/mnesia.appup.src
@@ -1,22 +1,16 @@
%% -*- erlang -*-
{"%VSN%",
[
- {"4.7.1", [{restart_application, mnesia}]},
- {"4.7", [{restart_application, mnesia}]},
- {"4.6", [{restart_application, mnesia}]},
- {"4.5.1", [{restart_application, mnesia}]},
- {"4.5", [{restart_application, mnesia}]},
+ {<<"4\\.1[0-9].*">>, [{restart_application, mnesia}]},
+ {<<"4\\.[5-9].*">>, [{restart_application, mnesia}]},
{"4.4.19", [{restart_application, mnesia}]},
{"4.4.18", [{restart_application, mnesia}]},
{"4.4.17", [{restart_application, mnesia}]},
{"4.4.16", [{restart_application, mnesia}]}
],
[
- {"4.7.1", [{restart_application, mnesia}]},
- {"4.7", [{restart_application, mnesia}]},
- {"4.6", [{restart_application, mnesia}]},
- {"4.5.1", [{restart_application, mnesia}]},
- {"4.5", [{restart_application, mnesia}]},
+ {<<"4\\.1[0-9].*">>, [{restart_application, mnesia}]},
+ {<<"4\\.[5-9].*">>, [{restart_application, mnesia}]},
{"4.4.19", [{restart_application, mnesia}]},
{"4.4.18", [{restart_application, mnesia}]},
{"4.4.17", [{restart_application, mnesia}]},
diff --git a/lib/mnesia/src/mnesia_locker.erl b/lib/mnesia/src/mnesia_locker.erl
index 14011003d3..c4fe370ec1 100644
--- a/lib/mnesia/src/mnesia_locker.erl
+++ b/lib/mnesia/src/mnesia_locker.erl
@@ -1,7 +1,7 @@
%%
%% %CopyrightBegin%
%%
-%% Copyright Ericsson AB 1996-2012. All Rights Reserved.
+%% Copyright Ericsson AB 1996-2013. All Rights Reserved.
%%
%% The contents of this file are subject to the Erlang Public License,
%% Version 1.1, (the "License"); you may not use this file except in
@@ -26,8 +26,8 @@
global_lock/5,
ixrlock/5,
init/1,
- mnesia_down/2,
release_tid/1,
+ mnesia_down/2,
async_release_tid/2,
send_release_tid/2,
receive_release_tid_acc/2,
@@ -137,6 +137,17 @@ receive_release_tid_acc([Node | Nodes], Tid) ->
receive_release_tid_acc([], _Tid) ->
ok.
+mnesia_down(Node, Pending) ->
+ case whereis(?MODULE) of
+ undefined -> {error, node_not_running};
+ Pid ->
+ Ref = make_ref(),
+ Pid ! {{self(), Ref}, {release_remote_non_pending, Node, Pending}},
+ receive %% No need to wait for anything else if process dies we die soon
+ {Ref,ok} -> ok
+ end
+ end.
+
loop(State) ->
receive
{From, {write, Tid, Oid}} ->
@@ -213,9 +224,9 @@ loop(State) ->
reply(From, {tid_released, Tid}),
loop(State);
- {release_remote_non_pending, Node, Pending} ->
+ {{From, Ref},{release_remote_non_pending, Node, Pending}} ->
release_remote_non_pending(Node, Pending),
- mnesia_monitor:mnesia_down(?MODULE, Node),
+ From ! {Ref, ok},
loop(State);
{'EXIT', Pid, _} when Pid == State#state.supervisor ->
@@ -653,19 +664,6 @@ ix_read_res(Tab,IxKey,Pos) ->
%% ********************* end server code ********************
%% The following code executes at the client side of a transactions
-mnesia_down(N, Pending) ->
- case whereis(?MODULE) of
- undefined ->
- %% Takes care of mnesia_down's in early startup
- mnesia_monitor:mnesia_down(?MODULE, N);
- Pid ->
- %% Syncronously call needed in order to avoid
- %% race with mnesia_tm's coordinator processes
- %% that may restart and acquire new locks.
- %% mnesia_monitor ensures the sync.
- Pid ! {release_remote_non_pending, N, Pending}
- end.
-
%% Aquire a write lock, but do a read, used by
%% mnesia:wread/1
diff --git a/lib/mnesia/src/mnesia_monitor.erl b/lib/mnesia/src/mnesia_monitor.erl
index 7a788238fc..438da65158 100644
--- a/lib/mnesia/src/mnesia_monitor.erl
+++ b/lib/mnesia/src/mnesia_monitor.erl
@@ -482,11 +482,7 @@ handle_cast({mnesia_down, mnesia_controller, Node}, State) ->
mnesia_tm:mnesia_down(Node),
{noreply, State};
-handle_cast({mnesia_down, mnesia_tm, {Node, Pending}}, State) ->
- mnesia_locker:mnesia_down(Node, Pending),
- {noreply, State};
-
-handle_cast({mnesia_down, mnesia_locker, Node}, State) ->
+handle_cast({mnesia_down, mnesia_tm, Node}, State) ->
Down = {mnesia_down, Node},
mnesia_lib:report_system_event(Down),
GoingDown = lists:delete(Node, State#state.going_down),
diff --git a/lib/mnesia/src/mnesia_tm.erl b/lib/mnesia/src/mnesia_tm.erl
index e54e5c4e88..17af0cad44 100644
--- a/lib/mnesia/src/mnesia_tm.erl
+++ b/lib/mnesia/src/mnesia_tm.erl
@@ -181,7 +181,7 @@ mnesia_down(Node) ->
%% mnesia_monitor takes care of the sync
case whereis(?MODULE) of
undefined ->
- mnesia_monitor:mnesia_down(?MODULE, {Node, []});
+ mnesia_monitor:mnesia_down(?MODULE, Node);
Pid ->
Pid ! {mnesia_down, Node}
end.
@@ -403,7 +403,9 @@ doit_loop(#state{coordinators=Coordinators,participants=Participants,supervisor=
Tids = gb_trees:keys(Participants),
reconfigure_participants(N, gb_trees:values(Participants)),
NewState = clear_fixtable(N, State),
- mnesia_monitor:mnesia_down(?MODULE, {N, Tids}),
+
+ mnesia_locker:mnesia_down(N, Tids),
+ mnesia_monitor:mnesia_down(?MODULE, N),
doit_loop(NewState);
{From, {unblock_me, Tab}} ->