aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDan Gudmundsson <[email protected]>2019-08-09 12:46:06 +0200
committerDan Gudmundsson <[email protected]>2019-08-09 12:46:06 +0200
commit29a73cf252143a09457a22dca433c4486d1c497b (patch)
tree90a7cad2e270010c0d46c0176ab9a4cae8e048f1
parentaac25aa13f773f62319ec019261dcb90dfe46cbc (diff)
parent2bf7b5b5abbc1097a0b30bea2d390a2a0bec004c (diff)
downloadotp-29a73cf252143a09457a22dca433c4486d1c497b.tar.gz
otp-29a73cf252143a09457a22dca433c4486d1c497b.tar.bz2
otp-29a73cf252143a09457a22dca433c4486d1c497b.zip
Merge branch 'dgud/mnesia/add_table_copy_deadlock/ERL-872/OTP-15933' into maint
* dgud/mnesia/add_table_copy_deadlock/ERL-872/OTP-15933: mnesia: Fix deadlock caused by add_table_copy
-rw-r--r--lib/mnesia/src/mnesia_controller.erl81
-rw-r--r--lib/mnesia/src/mnesia_schema.erl2
-rw-r--r--lib/mnesia/test/mnesia_isolation_test.erl44
3 files changed, 82 insertions, 45 deletions
diff --git a/lib/mnesia/src/mnesia_controller.erl b/lib/mnesia/src/mnesia_controller.erl
index 882de0d613..0f221b0c1f 100644
--- a/lib/mnesia/src/mnesia_controller.erl
+++ b/lib/mnesia/src/mnesia_controller.erl
@@ -331,35 +331,39 @@ release_schema_commit_lock() ->
%% Special for preparation of add table copy
get_network_copy(Tid, Tab, Cs) ->
-% We can't let the controller queue this one
-% because that may cause a deadlock between schema_operations
-% and initial tableloadings which both takes schema locks.
-% But we have to get copier_done msgs when the other side
-% goes down.
- call({add_other, self()}),
- Reason = {dumper,{add_table_copy, Tid}},
- Work = #net_load{table = Tab,reason = Reason,cstruct = Cs},
- %% I'll need this cause it's linked trough the subscriber
- %% might be solved by using monitor in subscr instead.
- process_flag(trap_exit, true),
- Load = load_table_fun(Work),
- Res = ?CATCH(Load()),
- process_flag(trap_exit, false),
- call({del_other, self()}),
- case Res of
- #loader_done{is_loaded = true} ->
- Tab = Res#loader_done.table_name,
- case Res#loader_done.needs_announce of
- true ->
- i_have_tab(Tab);
- false ->
- ignore
- end,
- Res#loader_done.reply;
- #loader_done{} ->
- Res#loader_done.reply;
- Else ->
- {not_loaded, Else}
+ %% We can't let the controller queue this one
+ %% because that may cause a deadlock between schema_operations
+ %% and initial tableloadings which both takes schema locks.
+ %% But we have to get copier_done msgs when the other side
+ %% goes down.
+ case call({add_other, self()}) of
+ ok ->
+ Reason = {dumper,{add_table_copy, Tid}},
+ Work = #net_load{table = Tab,reason = Reason,cstruct = Cs},
+ %% I'll need this cause it's linked trough the subscriber
+ %% might be solved by using monitor in subscr instead.
+ process_flag(trap_exit, true),
+ Load = load_table_fun(Work),
+ Res = ?CATCH(Load()),
+ process_flag(trap_exit, false),
+ call({del_other, self()}),
+ case Res of
+ #loader_done{is_loaded = true} ->
+ Tab = Res#loader_done.table_name,
+ case Res#loader_done.needs_announce of
+ true ->
+ i_have_tab(Tab);
+ false ->
+ ignore
+ end,
+ Res#loader_done.reply;
+ #loader_done{} ->
+ Res#loader_done.reply;
+ Else ->
+ {not_loaded, Else}
+ end;
+ {error, Else} ->
+ {not_loaded, Else}
end.
%% This functions is invoked from the dumper
@@ -772,6 +776,18 @@ handle_call({unannounce_add_table_copy, [Tab, Node], From}, ReplyTo, State) ->
noreply(State#state{early_msgs = [{call, Msg, undefined} | Msgs]})
end;
+handle_call({add_other, Who}, _From, State = #state{others=Others0, schema_is_merged=SM}) ->
+ case SM of
+ true ->
+ Others = [Who|Others0],
+ {reply, ok, State#state{others=Others}};
+ false ->
+ {reply, {error, {not_active,schema,node()}}, State}
+ end;
+handle_call({del_other, Who}, _From, State = #state{others=Others0}) ->
+ Others = lists:delete(Who, Others0),
+ {reply, ok, State#state{others=Others}};
+
handle_call(Msg, From, State) when State#state.schema_is_merged /= true ->
%% Buffer early messages
Msgs = State#state.early_msgs,
@@ -803,13 +819,6 @@ handle_call({block_table, [Tab], From}, _Dummy, State) ->
handle_call({check_w2r, _Node, Tab}, _From, State) ->
{reply, val({Tab, where_to_read}), State};
-handle_call({add_other, Who}, _From, State = #state{others=Others0}) ->
- Others = [Who|Others0],
- {reply, ok, State#state{others=Others}};
-handle_call({del_other, Who}, _From, State = #state{others=Others0}) ->
- Others = lists:delete(Who, Others0),
- {reply, ok, State#state{others=Others}};
-
handle_call(Msg, _From, State) ->
error("~p got unexpected call: ~tp~n", [?SERVER_NAME, Msg]),
noreply(State).
diff --git a/lib/mnesia/src/mnesia_schema.erl b/lib/mnesia/src/mnesia_schema.erl
index ef38adca1e..4a86aeb375 100644
--- a/lib/mnesia/src/mnesia_schema.erl
+++ b/lib/mnesia/src/mnesia_schema.erl
@@ -697,7 +697,7 @@ schema_coordinator(Client, _Fun, undefined) ->
schema_coordinator(Client, Fun, Controller) when is_pid(Controller) ->
%% Do not trap exit in order to automatically die
%% when the controller dies
-
+ put(transaction_client, Client), %% debug
link(Controller),
unlink(Client),
diff --git a/lib/mnesia/test/mnesia_isolation_test.erl b/lib/mnesia/test/mnesia_isolation_test.erl
index 49bcec14af..98a7840b72 100644
--- a/lib/mnesia/test/mnesia_isolation_test.erl
+++ b/lib/mnesia/test/mnesia_isolation_test.erl
@@ -1036,29 +1036,57 @@ add_table_copy(Config) when is_list(Config) ->
Def = [{ram_copies, [ThisNode]}, {attributes, [key, attr1, attr2]}],
?match({atomic, ok}, mnesia:create_table(Tab, Def)),
insert(Tab, 50),
- {success, [A]} = ?start_activities([ThisNode]),
+ {success, [A]} = ?start_activities([ThisNode]),
mnesia_test_lib:start_sync_transactions([A], 0),
A ! fun() -> mnesia:write({Tab, 1, 1, updated}) end,
?match_receive({A, ok}), %% A is executed
- Pid = spawn_link(?MODULE, op, [self(), mnesia, add_table_copy,
+ Pid = spawn_link(?MODULE, op, [self(), mnesia, add_table_copy,
[Tab, Node2, ram_copies]]),
-
+
?match_receive(timeout), %% op waits for locks occupied by A
A ! end_trans, %% Kill A, locks should be released
- ?match_receive({A,{atomic,end_trans}}),
-
- receive
+ ?match_receive({A,{atomic,end_trans}}),
+
+ receive
Msg -> ?match({Pid, {atomic, ok}}, Msg)
after
timer:seconds(20) -> ?error("Operation timed out", [])
end,
+ ?match_receive({'EXIT', Pid, normal}),
sys:get_status(whereis(mnesia_locker)), % Explicit sync, release locks is async
- ?match([], mnesia:system_info(held_locks)),
- ?match([], mnesia:system_info(lock_queue)),
+ ?match([], mnesia:system_info(held_locks)),
+ ?match([], mnesia:system_info(lock_queue)),
+
+ {atomic, ok} = mnesia:del_table_copy(Tab, Node2),
+ Self = self(),
+ New = spawn_link(Node2,
+ fun () ->
+ application:stop(mnesia),
+ Self ! {self(), ok},
+ io:format(user, "restart mnesia~n", []),
+ Self ! {self(), catch application:start(mnesia)}
+ end),
+ receive {New,ok} -> ok end,
+
+ Add = fun Add() ->
+ case mnesia:add_table_copy(Tab, Node2, disc_copies) of
+ {atomic, ok} -> ok;
+ _R -> io:format(user, "aborted with reason ~p~n", [_R]),
+ timer:sleep(10),
+ Add()
+ end
+ end,
+
+ ?match(ok, Add()),
+ ?match_receive({New,ok}),
+
+ sys:get_status(whereis(mnesia_locker)), % Explicit sync, release locks is async
+ ?match([], mnesia:system_info(held_locks)),
+ ?match([], mnesia:system_info(lock_queue)),
ok.
del_table_copy(suite) -> [];