From 9237801d22a38d2643ffe94ab626c4d2815012dd Mon Sep 17 00:00:00 2001 From: Dan Gudmundsson Date: Thu, 10 Oct 2013 10:55:46 +0200 Subject: mnesia: Synchronize lock cleanup after mnesia down Bad timing could lead to hanging transactions after a mnesia down from a node with sticky locks. Excellent bug report from janchochol Situation: * node A and B have copies of table T * node A ows sticky of table T * node A goes down (e.g. crash) * node B tries to perform transactional operation on table T (e.g. mnesia:select) In this situation there is possibility that first (and maybe other) transaction on node B will hang indefinitely. This is caused by race condition, when transaction process send lock request operation to node A and waits for reply. When node A is down it will never send reply, so process on node B will be stuck forever. Reason is that message sent to mnesia_locker gen_server from mnesia_locker:mnesia_down can be received after mnesia_locker gen_server already replies to transaction processes with {switch, N, Req} and node N is down. Monitoring remote process when sending request to other node should be safe solution. --- lib/mnesia/src/mnesia.appup.src | 14 ++++---------- lib/mnesia/src/mnesia_locker.erl | 32 +++++++++++++++----------------- lib/mnesia/src/mnesia_monitor.erl | 6 +----- lib/mnesia/src/mnesia_tm.erl | 6 ++++-- 4 files changed, 24 insertions(+), 34 deletions(-) (limited to 'lib/mnesia/src') diff --git a/lib/mnesia/src/mnesia.appup.src b/lib/mnesia/src/mnesia.appup.src index 355aafb215..c245299740 100644 --- a/lib/mnesia/src/mnesia.appup.src +++ b/lib/mnesia/src/mnesia.appup.src @@ -1,22 +1,16 @@ %% -*- erlang -*- {"%VSN%", [ - {"4.7.1", [{restart_application, mnesia}]}, - {"4.7", [{restart_application, mnesia}]}, - {"4.6", [{restart_application, mnesia}]}, - {"4.5.1", [{restart_application, mnesia}]}, - {"4.5", [{restart_application, mnesia}]}, + {<<"4\\.1[0-9].*">>, [{restart_application, mnesia}]}, + {<<"4\\.[5-9].*">>, [{restart_application, mnesia}]}, {"4.4.19", [{restart_application, mnesia}]}, {"4.4.18", [{restart_application, mnesia}]}, {"4.4.17", [{restart_application, mnesia}]}, {"4.4.16", [{restart_application, mnesia}]} ], [ - {"4.7.1", [{restart_application, mnesia}]}, - {"4.7", [{restart_application, mnesia}]}, - {"4.6", [{restart_application, mnesia}]}, - {"4.5.1", [{restart_application, mnesia}]}, - {"4.5", [{restart_application, mnesia}]}, + {<<"4\\.1[0-9].*">>, [{restart_application, mnesia}]}, + {<<"4\\.[5-9].*">>, [{restart_application, mnesia}]}, {"4.4.19", [{restart_application, mnesia}]}, {"4.4.18", [{restart_application, mnesia}]}, {"4.4.17", [{restart_application, mnesia}]}, diff --git a/lib/mnesia/src/mnesia_locker.erl b/lib/mnesia/src/mnesia_locker.erl index 14011003d3..c4fe370ec1 100644 --- a/lib/mnesia/src/mnesia_locker.erl +++ b/lib/mnesia/src/mnesia_locker.erl @@ -1,7 +1,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 1996-2012. All Rights Reserved. +%% Copyright Ericsson AB 1996-2013. All Rights Reserved. %% %% The contents of this file are subject to the Erlang Public License, %% Version 1.1, (the "License"); you may not use this file except in @@ -26,8 +26,8 @@ global_lock/5, ixrlock/5, init/1, - mnesia_down/2, release_tid/1, + mnesia_down/2, async_release_tid/2, send_release_tid/2, receive_release_tid_acc/2, @@ -137,6 +137,17 @@ receive_release_tid_acc([Node | Nodes], Tid) -> receive_release_tid_acc([], _Tid) -> ok. +mnesia_down(Node, Pending) -> + case whereis(?MODULE) of + undefined -> {error, node_not_running}; + Pid -> + Ref = make_ref(), + Pid ! {{self(), Ref}, {release_remote_non_pending, Node, Pending}}, + receive %% No need to wait for anything else if process dies we die soon + {Ref,ok} -> ok + end + end. + loop(State) -> receive {From, {write, Tid, Oid}} -> @@ -213,9 +224,9 @@ loop(State) -> reply(From, {tid_released, Tid}), loop(State); - {release_remote_non_pending, Node, Pending} -> + {{From, Ref},{release_remote_non_pending, Node, Pending}} -> release_remote_non_pending(Node, Pending), - mnesia_monitor:mnesia_down(?MODULE, Node), + From ! {Ref, ok}, loop(State); {'EXIT', Pid, _} when Pid == State#state.supervisor -> @@ -653,19 +664,6 @@ ix_read_res(Tab,IxKey,Pos) -> %% ********************* end server code ******************** %% The following code executes at the client side of a transactions -mnesia_down(N, Pending) -> - case whereis(?MODULE) of - undefined -> - %% Takes care of mnesia_down's in early startup - mnesia_monitor:mnesia_down(?MODULE, N); - Pid -> - %% Syncronously call needed in order to avoid - %% race with mnesia_tm's coordinator processes - %% that may restart and acquire new locks. - %% mnesia_monitor ensures the sync. - Pid ! {release_remote_non_pending, N, Pending} - end. - %% Aquire a write lock, but do a read, used by %% mnesia:wread/1 diff --git a/lib/mnesia/src/mnesia_monitor.erl b/lib/mnesia/src/mnesia_monitor.erl index 7a788238fc..438da65158 100644 --- a/lib/mnesia/src/mnesia_monitor.erl +++ b/lib/mnesia/src/mnesia_monitor.erl @@ -482,11 +482,7 @@ handle_cast({mnesia_down, mnesia_controller, Node}, State) -> mnesia_tm:mnesia_down(Node), {noreply, State}; -handle_cast({mnesia_down, mnesia_tm, {Node, Pending}}, State) -> - mnesia_locker:mnesia_down(Node, Pending), - {noreply, State}; - -handle_cast({mnesia_down, mnesia_locker, Node}, State) -> +handle_cast({mnesia_down, mnesia_tm, Node}, State) -> Down = {mnesia_down, Node}, mnesia_lib:report_system_event(Down), GoingDown = lists:delete(Node, State#state.going_down), diff --git a/lib/mnesia/src/mnesia_tm.erl b/lib/mnesia/src/mnesia_tm.erl index e54e5c4e88..17af0cad44 100644 --- a/lib/mnesia/src/mnesia_tm.erl +++ b/lib/mnesia/src/mnesia_tm.erl @@ -181,7 +181,7 @@ mnesia_down(Node) -> %% mnesia_monitor takes care of the sync case whereis(?MODULE) of undefined -> - mnesia_monitor:mnesia_down(?MODULE, {Node, []}); + mnesia_monitor:mnesia_down(?MODULE, Node); Pid -> Pid ! {mnesia_down, Node} end. @@ -403,7 +403,9 @@ doit_loop(#state{coordinators=Coordinators,participants=Participants,supervisor= Tids = gb_trees:keys(Participants), reconfigure_participants(N, gb_trees:values(Participants)), NewState = clear_fixtable(N, State), - mnesia_monitor:mnesia_down(?MODULE, {N, Tids}), + + mnesia_locker:mnesia_down(N, Tids), + mnesia_monitor:mnesia_down(?MODULE, N), doit_loop(NewState); {From, {unblock_me, Tab}} -> -- cgit v1.2.3