From 6233c5db89867fae35f69bd7b8ef94b32cec605d Mon Sep 17 00:00:00 2001 From: Dan Gudmundsson Date: Wed, 3 Feb 2016 15:22:04 +0100 Subject: mnesia: Avoid deadlock possibility in mnesia:del_table_copy schema del_table_copy grabs a write lock in a new process in prepare_op/3 to change 'where_to_read' when a table copy is updated. When del_table_copy(schema, Node) is called all copies located on Node are deleted, and thus many locks are taken. Since this was done outside of the schema-transaction, mnesia's deadlock prevention algorithms was sidestepped and a deadlock could occur. Fix by always grabbing write-locks for all changed tabs early and in the same transaction, this might slow done the operation some but it must be done and it also cleans up the code. --- lib/mnesia/src/mnesia_schema.erl | 82 +++++++------------------------ lib/mnesia/test/mnesia_isolation_test.erl | 71 +++++++++++++++++++++----- 2 files changed, 77 insertions(+), 76 deletions(-) diff --git a/lib/mnesia/src/mnesia_schema.erl b/lib/mnesia/src/mnesia_schema.erl index 593360415d..2acddccf6e 100644 --- a/lib/mnesia/src/mnesia_schema.erl +++ b/lib/mnesia/src/mnesia_schema.erl @@ -1297,6 +1297,7 @@ make_del_table_copy(Tab, Node) -> _ -> ensure_active(Cs), verify_cstruct(Cs2), + get_tid_ts_and_lock(Tab, write), [{op, del_table_copy, Storage, Node, vsn_cs2list(Cs2)}] end. @@ -1324,6 +1325,7 @@ remove_node_from_tabs([Tab|Rest], Node) -> remove_node_from_tabs(Rest, Node)]; _Ns -> verify_cstruct(Cs2), + get_tid_ts_and_lock(Tab, write), [{op, del_table_copy, ram_copies, Node, vsn_cs2list(Cs2)}| remove_node_from_tabs(Rest, Node)] end @@ -1355,6 +1357,7 @@ do_move_table(schema, _FromNode, _ToNode) -> mnesia:abort({bad_type, schema}); do_move_table(Tab, FromNode, ToNode) when is_atom(FromNode), is_atom(ToNode) -> TidTs = get_tid_ts_and_lock(schema, write), + %% get_tid_ts_and_lock(Tab, write), write locked by load_table in mnesia_loader insert_schema_ops(TidTs, make_move_table(Tab, FromNode, ToNode)); do_move_table(Tab, FromNode, ToNode) -> mnesia:abort({badarg, Tab, FromNode, ToNode}). @@ -1993,28 +1996,11 @@ prepare_op(Tid, {op, add_table_copy, Storage, Node, TabDef}, _WaitFor) -> {true, optional} end; -prepare_op(Tid, {op, del_table_copy, _Storage, Node, TabDef}, _WaitFor) -> +prepare_op(_Tid, {op, del_table_copy, _Storage, Node, TabDef}, _WaitFor) -> Cs = list2cs(TabDef), Tab = Cs#cstruct.name, - - if - %% Schema table lock is always required to run a schema op. - %% No need to look it. - node(Tid#tid.pid) == node(), Tab /= schema -> - Self = self(), - Pid = spawn_link(fun() -> lock_del_table(Tab, Node, Cs, Self) end), - put(mnesia_lock, Pid), - receive - {Pid, updated} -> - {true, optional}; - {Pid, FailReason} -> - mnesia:abort(FailReason); - {'EXIT', Pid, Reason} -> - mnesia:abort(Reason) - end; - true -> - {true, optional} - end; + set_where_to_read(Tab, Node, Cs), + {true, optional}; prepare_op(_Tid, {op, change_table_copy_type, N, FromS, ToS, TabDef}, _WaitFor) when N == node() -> @@ -2228,46 +2214,6 @@ receive_sync(Nodes, Pids) -> {abort, Else} end. -lock_del_table(Tab, NewNode, Cs0, Father) -> - Ns = val({schema, active_replicas}), - process_flag(trap_exit,true), - Lock = fun() -> - mnesia:write_lock_table(Tab), - %% Sigh using cs record - Set = fun(Node) -> - [Cs] = normalize_cs([Cs0], Node), - rpc:call(Node, ?MODULE, set_where_to_read, [Tab, NewNode, Cs]) - end, - Res = [Set(Node) || Node <- Ns], - Filter = fun(ok) -> - false; - ({badrpc, {'EXIT', {undef, _}}}) -> - %% This will be the case we talks with elder nodes - %% than 3.8.2, they will set where_to_read without - %% getting a lock. - false; - (_) -> - true - end, - case lists:filter(Filter, Res) of - [] -> - Father ! {self(), updated}, - %% When transaction is commited the process dies - %% and the lock is released. - receive _ -> ok end; - Err -> - Father ! {self(), {bad_commit, Err}} - end, - ok - end, - case mnesia:transaction(Lock) of - {atomic, ok} -> ok; - {aborted, R} -> Father ! {self(), R} - end, - unlink(Father), - unlink(whereis(mnesia_tm)), - exit(normal). - set_where_to_read(Tab, Node, Cs) -> case mnesia_lib:val({Tab, where_to_read}) of Node -> @@ -2418,13 +2364,19 @@ undo_prepare_op(Tid, {op, add_table_copy, Storage, Node, TabDef}) -> insert_cstruct(Tid, Cs2, true) % Don't care about the version end; -undo_prepare_op(_Tid, {op, del_table_copy, _, Node, TabDef}) - when Node == node() -> - WriteLocker = get(mnesia_lock), - WriteLocker =/= undefined andalso (WriteLocker ! die), +undo_prepare_op(_Tid, {op, del_table_copy, _, Node, TabDef}) -> Cs = list2cs(TabDef), Tab = Cs#cstruct.name, - mnesia_lib:set({Tab, where_to_read}, Node); + if node() =:= Node -> + mnesia_lib:set({Tab, where_to_read}, Node); + true -> + case mnesia_lib:val({Tab, where_to_read}) of + nowhere -> + mnesia_lib:set_remote_where_to_read(Tab); + true -> + ignore + end + end; undo_prepare_op(_Tid, {op, change_table_copy_type, N, FromS, ToS, TabDef}) when N == node() -> diff --git a/lib/mnesia/test/mnesia_isolation_test.erl b/lib/mnesia/test/mnesia_isolation_test.erl index b66da6e390..4dd0c01d05 100644 --- a/lib/mnesia/test/mnesia_isolation_test.erl +++ b/lib/mnesia/test/mnesia_isolation_test.erl @@ -39,7 +39,7 @@ groups() -> [{locking, [], [no_conflict, simple_queue_conflict, advanced_queue_conflict, simple_deadlock_conflict, - advanced_deadlock_conflict, lock_burst, + advanced_deadlock_conflict, schema_deadlock, lock_burst, {group, sticky_locks}, {group, unbound_locking}, {group, admin_conflict}, nasty]}, {sticky_locks, [], [basic_sticky_functionality]}, @@ -148,20 +148,32 @@ simple_queue_conflict(Config) when is_list(Config) -> fun_loop(Fun, AllSharedLocks, OneExclusiveLocks), ok. -wait_for_lock(Pid, _Nodes, 0) -> +wait_for_lock(Pid, Nodes, Retry) -> + wait_for_lock(Pid, Nodes, Retry, queue). + +wait_for_lock(Pid, _Nodes, 0, queue) -> Queue = mnesia:system_info(lock_queue), ?error("Timeout while waiting for lock on Pid ~p in queue ~p~n", [Pid, Queue]); -wait_for_lock(Pid, Nodes, N) -> - rpc:multicall(Nodes, sys, get_status, [mnesia_locker]), - List = [rpc:call(Node, mnesia, system_info, [lock_queue]) || Node <- Nodes], +wait_for_lock(Pid, _Nodes, 0, held) -> + Held = mnesia:system_info(held_locks), + ?error("Timeout while waiting for lock on Pid ~p (held) ~p~n", [Pid, Held]); +wait_for_lock(Pid, Nodes, N, Where) -> + rpc:multicall(Nodes, sys, get_status, [mnesia_locker]), + List = case Where of + queue -> + [rpc:call(Node, mnesia, system_info, [lock_queue]) || Node <- Nodes]; + held -> + [rpc:call(Node, mnesia, system_info, [held_locks]) || Node <- Nodes] + end, Q = lists:append(List), - check_q(Pid, Q, Nodes, N). + check_q(Pid, Q, Nodes, N, Where). -check_q(Pid, [{_Oid, _Op, Pid, _Tid, _WFT} | _Tail], _N, _Count) -> ok; -check_q(Pid, [_ | Tail], N, Count) -> check_q(Pid, Tail, N, Count); -check_q(Pid, [], N, Count) -> - timer:sleep(500), - wait_for_lock(Pid, N, Count - 1). +check_q(Pid, [{_Oid, _Op, Pid, _Tid, _WFT} | _Tail], _N, _Count, _Where) -> ok; +check_q(Pid, [{_Oid, _Op, {tid,_,Pid}} | _Tail], _N, _Count, _Where) -> ok; +check_q(Pid, [_ | Tail], N, Count, Where) -> check_q(Pid, Tail, N, Count, Where); +check_q(Pid, [], N, Count, Where) -> + timer:sleep(200), + wait_for_lock(Pid, N, Count - 1, Where). %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @@ -270,6 +282,43 @@ advanced_deadlock_conflict(Config) when is_list(Config) -> ?match([], mnesia:system_info(lock_queue)), ok. +%% Verify (and regression test) deadlock in del_table_copy(schema, Node) +schema_deadlock(Config) when is_list(Config) -> + Ns = [Node1, Node2] = ?acquire_nodes(2, Config), + ?match({atomic, ok}, mnesia:create_table(a, [{disc_copies, Ns}])), + ?match({atomic, ok}, mnesia:create_table(b, [{disc_copies, Ns}])), + + Tester = self(), + + Deadlocker = fun() -> + mnesia:write({a,1,1}), %% grab write lock on A + receive + continue -> + mnesia:write({b,1,1}), %% grab write lock on B + end_trans + end + end, + + ?match(stopped, rpc:call(Node2, mnesia, stop, [])), + timer:sleep(500), %% Let Node1 reconfigure + sys:get_status(mnesia_monitor), + + DoingTrans = spawn_link(fun() -> Tester ! {self(),mnesia:transaction(Deadlocker)} end), + wait_for_lock(DoingTrans, [Node1], 10, held), + %% Will grab write locks on schema, a, and b + DoingSchema = spawn_link(fun() -> Tester ! {self(), mnesia:del_table_copy(schema, Node2)} end), + timer:sleep(500), %% Let schema trans start, and try to grab locks + DoingTrans ! continue, + + ?match(ok, receive {DoingTrans, {atomic, end_trans}} -> ok after 5000 -> timeout end), + ?match(ok, receive {DoingSchema, {atomic, ok}} -> ok after 5000 -> timeout end), + + sys:get_status(whereis(mnesia_locker)), % Explicit sync, release locks is async + ?match([], mnesia:system_info(held_locks)), + ?match([], mnesia:system_info(lock_queue)), + ok. + + one_oid(Tab) -> {Tab, 1}. other_oid(Tab) -> {Tab, 2}. -- cgit v1.2.3