aboutsummaryrefslogtreecommitdiffstats
path: root/lib/mnesia/src/mnesia_controller.erl
diff options
context:
space:
mode:
authorUlf Wiger <[email protected]>2010-05-09 11:34:18 +0200
committerRaimo Niskanen <[email protected]>2010-06-02 09:28:54 +0200
commitf58fcc6a8698b693ac0d4ff5c90d6b83d24e3c6a (patch)
tree3225bc2f5863454f7536a8a6305cb77e560caf1c /lib/mnesia/src/mnesia_controller.erl
parenta21a9dac550bcb976fa074d7005499a4d6b55791 (diff)
downloadotp-f58fcc6a8698b693ac0d4ff5c90d6b83d24e3c6a.tar.gz
otp-f58fcc6a8698b693ac0d4ff5c90d6b83d24e3c6a.tar.bz2
otp-f58fcc6a8698b693ac0d4ff5c90d6b83d24e3c6a.zip
Allow a user_defined function to wrap mnesia_schema:merge_schema()
Mnesia currently notifies the user if it detects a partitioned network, but the options for resolving the situation are limited. In practice, the only safe options are: - set master_nodes and restart one of the affected 'islands' - restart the entire system from backup This patch introduces a way to resolve the situation without restarting any nodes. The key to doing this safely is to lock affected tables and run the merge function inside the same transaction that merges the schema. Otherwise, one transaction will merge the schema, after which writes to the database will be replicated across the (potentially) inconsistent copies; the transaction triggered by the asynchronous inconsistency event will have to race to be the first to access the tables. The normal call to merge the schema is done from mnesia_controller. Previously, this was mnesia_schema:merge_schema(). The new function is merge_schema(UserFun), with the following behaviour: merge_schema(UserFun) -> schema_transaction( fun() -> UserFun(fun(Arg) -> do_merge_schema(Arg) end) end). Where do_merge_schema(LockTabs) will execute the schema merge as before, but also lock all tables in the list LockTabs which have copies on the affected nodes (that is, everywhere the schema table is locked). The effect of this is to allow a wrapper function that calls the merge and, if successful, continues to resolve the inconsistency on the tables, knowing that they have now been locked on all affected nodes. The function that is actually called by the deconflict function is mnesia_controller:connect_nodes(Nodes, UserFun), as in: Tables = tables_to_deconflict(Node), mnesia_controller:connect_nodes( [Node], fun(MergeF) -> case MergeF(Tables) of {merged,_,_} -> deconflict(Tables, Node); Other -> Other end). In the case where the merge fails, it is probably wise to restart from a backup... I have not run the mnesia test suite, as it is not available. I have not updated documentation, as these functions are not documented in the first place.
Diffstat (limited to 'lib/mnesia/src/mnesia_controller.erl')
-rw-r--r--lib/mnesia/src/mnesia_controller.erl27
1 files changed, 17 insertions, 10 deletions
diff --git a/lib/mnesia/src/mnesia_controller.erl b/lib/mnesia/src/mnesia_controller.erl
index 9bc480e619..9971953c1f 100644
--- a/lib/mnesia/src/mnesia_controller.erl
+++ b/lib/mnesia/src/mnesia_controller.erl
@@ -52,6 +52,7 @@
async_dump_log/1,
sync_dump_log/1,
connect_nodes/1,
+ connect_nodes/2,
wait_for_schema_commit_lock/0,
release_schema_commit_lock/0,
create_table/1,
@@ -94,7 +95,7 @@
load_and_reply/2,
send_and_reply/2,
wait_for_tables_init/2,
- connect_nodes2/2
+ connect_nodes2/3
]).
-import(mnesia_lib, [set/2, add/2]).
@@ -420,12 +421,15 @@ try_schedule_late_disc_load(Tabs, Reason, MsgTag) ->
[[Tabs, Reason, MsgTag], AbortReason])
end.
-connect_nodes(Ns) ->
+connect_nodes(Ns) ->
+ connect_nodes(Ns, fun default_merge/1).
+
+connect_nodes(Ns, UserFun) ->
case mnesia:system_info(is_running) of
no ->
{error, {node_not_running, node()}};
yes ->
- Pid = spawn_link(?MODULE,connect_nodes2,[self(),Ns]),
+ Pid = spawn_link(?MODULE,connect_nodes2,[self(),Ns, UserFun]),
receive
{?MODULE, Pid, Res, New} ->
case Res of
@@ -443,7 +447,7 @@ connect_nodes(Ns) ->
end
end.
-connect_nodes2(Father, Ns) ->
+connect_nodes2(Father, Ns, UserFun) ->
Current = val({current, db_nodes}),
abcast([node()|Ns], {merging_schema, node()}),
{NewC, OldC} = mnesia_recover:connect_nodes(Ns),
@@ -451,7 +455,7 @@ connect_nodes2(Father, Ns) ->
New1 = mnesia_lib:intersect(Ns, Connected),
New = New1 -- Current,
process_flag(trap_exit, true),
- Res = try_merge_schema(New),
+ Res = try_merge_schema(New, UserFun),
Msg = {schema_is_merged, [], late_merge, []},
multicall([node()|Ns], Msg),
After = val({current, db_nodes}),
@@ -465,7 +469,7 @@ connect_nodes2(Father, Ns) ->
merge_schema() ->
AllNodes = mnesia_lib:all_nodes(),
- case try_merge_schema(AllNodes) of
+ case try_merge_schema(AllNodes, fun default_merge/1) of
ok ->
schema_is_merged();
{aborted, {throw, Str}} when is_list(Str) ->
@@ -474,8 +478,11 @@ merge_schema() ->
fatal("Failed to merge schema: ~p~n", [Else])
end.
-try_merge_schema(Nodes) ->
- case mnesia_schema:merge_schema() of
+default_merge(F) ->
+ F([]).
+
+try_merge_schema(Nodes, UserFun) ->
+ case mnesia_schema:merge_schema(UserFun) of
{atomic, not_merged} ->
%% No more nodes that we need to merge the schema with
ok;
@@ -488,11 +495,11 @@ try_merge_schema(Nodes) ->
im_running(OldFriends, NewFriends),
im_running(NewFriends, OldFriends),
- try_merge_schema(Nodes);
+ try_merge_schema(Nodes, UserFun);
{atomic, {"Cannot get cstructs", Node, Reason}} ->
dbg_out("Cannot get cstructs, Node ~p ~p~n", [Node, Reason]),
timer:sleep(1000), % Avoid a endless loop look alike
- try_merge_schema(Nodes);
+ try_merge_schema(Nodes, UserFun);
Other ->
Other
end.