From 6d446fc5d08d56174a79ab546d5aa2e79277af06 Mon Sep 17 00:00:00 2001
From: Ulf Wiger
Date: Thu, 9 Dec 2010 15:08:52 +0100
Subject: Add {majority, boolean()} per-table option.
With {majority, true} set for a table, write transactions will
abort if they cannot commit to a majority of the nodes that
have a copy of the table. Currently, the implementation hooks
into the prepare_commit, and forces an asymmetric transaction
if the commit set affects any table with the majority flag set.
In the commit itself, the transaction will abort if it cannot
satisfy the majority requirement for all tables involved in the
thransaction.
A future optimization might be to abort already when a write
lock is attempted on such a table (/-object) and the lock cannot
be set on enough nodes.
This functionality introduces the possibility to automatically
"fence off" a table in the presence of failures.
This is a first implementation. Only basic tests have been
performed.
---
lib/mnesia/src/mnesia.hrl | 1 +
lib/mnesia/src/mnesia_schema.erl | 15 +++++++-
lib/mnesia/src/mnesia_tm.erl | 76 ++++++++++++++++++++++++++++++++++------
3 files changed, 81 insertions(+), 11 deletions(-)
diff --git a/lib/mnesia/src/mnesia.hrl b/lib/mnesia/src/mnesia.hrl
index d488d9364a..26537815a3 100644
--- a/lib/mnesia/src/mnesia.hrl
+++ b/lib/mnesia/src/mnesia.hrl
@@ -62,6 +62,7 @@
disc_only_copies = [], % [Node]
load_order = 0, % Integer
access_mode = read_write, % read_write | read_only
+ majority = false, % true | false
index = [], % [Integer]
snmp = [], % Snmp Ustruct
local_content = false, % true | false
diff --git a/lib/mnesia/src/mnesia_schema.erl b/lib/mnesia/src/mnesia_schema.erl
index d1d892a387..a6c8ffec01 100644
--- a/lib/mnesia/src/mnesia_schema.erl
+++ b/lib/mnesia/src/mnesia_schema.erl
@@ -178,6 +178,7 @@ do_set_schema(Tab, Cs) ->
set({Tab, disc_only_copies}, Cs#cstruct.disc_only_copies),
set({Tab, load_order}, Cs#cstruct.load_order),
set({Tab, access_mode}, Cs#cstruct.access_mode),
+ set({Tab, majority}, Cs#cstruct.majority),
set({Tab, snmp}, Cs#cstruct.snmp),
set({Tab, user_properties}, Cs#cstruct.user_properties),
[set({Tab, user_property, element(1, P)}, P) || P <- Cs#cstruct.user_properties],
@@ -651,6 +652,7 @@ list2cs(List) when is_list(List) ->
Snmp = pick(Name, snmp, List, []),
LoadOrder = pick(Name, load_order, List, 0),
AccessMode = pick(Name, access_mode, List, read_write),
+ Majority = pick(Name, majority, List, false),
UserProps = pick(Name, user_properties, List, []),
verify({alt, [nil, list]}, mnesia_lib:etype(UserProps),
{bad_type, Name, {user_properties, UserProps}}),
@@ -676,6 +678,7 @@ list2cs(List) when is_list(List) ->
snmp = Snmp,
load_order = LoadOrder,
access_mode = AccessMode,
+ majority = Majority,
local_content = LC,
record_name = RecName,
attributes = Attrs,
@@ -809,7 +812,16 @@ verify_cstruct(Cs) when is_record(Cs, cstruct) ->
Access = Cs#cstruct.access_mode,
verify({alt, [read_write, read_only]}, Access,
{bad_type, Tab, {access_mode, Access}}),
-
+ Majority = Cs#cstruct.majority,
+ verify({alt, [true, false]}, Majority,
+ {bad_type, Tab, {majority, Majority}}),
+ case Majority of
+ true ->
+ verify(false, LC,
+ {combine_error, Tab, [{local_content,true},{majority,true}]});
+ false ->
+ ok
+ end,
Snmp = Cs#cstruct.snmp,
verify(true, mnesia_snmp_hook:check_ustruct(Snmp),
{badarg, Tab, {snmp, Snmp}}),
@@ -2971,6 +2983,7 @@ merge_versions(AnythingNew, Cs, RemoteCs, Force) ->
Cs#cstruct.index == RemoteCs#cstruct.index,
Cs#cstruct.snmp == RemoteCs#cstruct.snmp,
Cs#cstruct.access_mode == RemoteCs#cstruct.access_mode,
+ Cs#cstruct.majority == RemoteCs#cstruct.majority,
Cs#cstruct.load_order == RemoteCs#cstruct.load_order,
Cs#cstruct.user_properties == RemoteCs#cstruct.user_properties ->
do_merge_versions(AnythingNew, Cs, RemoteCs);
diff --git a/lib/mnesia/src/mnesia_tm.erl b/lib/mnesia/src/mnesia_tm.erl
index bb8e788b40..c7a0c28589 100644
--- a/lib/mnesia/src/mnesia_tm.erl
+++ b/lib/mnesia/src/mnesia_tm.erl
@@ -64,7 +64,8 @@
prev_tab = [], % initiate to a non valid table name
prev_types,
prev_snmp,
- types
+ types,
+ majority = []
}).
-record(participant, {tid, pid, commit, disc_nodes = [],
@@ -1100,9 +1101,12 @@ t_commit(Type) ->
case arrange(Tid, Store, Type) of
{N, Prep} when N > 0 ->
multi_commit(Prep#prep.protocol,
+ majority_attr(Prep),
Tid, Prep#prep.records, Store);
{0, Prep} ->
- multi_commit(read_only, Tid, Prep#prep.records, Store)
+ multi_commit(read_only,
+ majority_attr(Prep),
+ Tid, Prep#prep.records, Store)
end;
true ->
%% nested commit
@@ -1117,6 +1121,12 @@ t_commit(Type) ->
do_commit_nested
end.
+majority_attr(#prep{majority = M}) ->
+ M;
+majority_attr(_) ->
+ [].
+
+
%% This function arranges for all objects we shall write in S to be
%% in a list of {Node, CommitRecord}
%% Important function for the performance of mnesia.
@@ -1222,11 +1232,13 @@ prepare_items(Tid, Tab, Key, Items, Prep) ->
{blocked, _} ->
unblocked = req({unblock_me, Tab}),
prepare_items(Tid, Tab, Key, Items, Prep);
- _ ->
+ _ ->
+ Majority = needs_majority(Tab, Prep),
Snmp = val({Tab, snmp}),
Recs2 = do_prepare_items(Tid, Tab, Key, Types,
Snmp, Items, Prep#prep.records),
Prep2 = Prep#prep{records = Recs2, prev_tab = Tab,
+ majority = Majority,
prev_types = Types, prev_snmp = Snmp},
check_prep(Prep2, Types)
end.
@@ -1235,6 +1247,41 @@ do_prepare_items(Tid, Tab, Key, Types, Snmp, Items, Recs) ->
Recs2 = prepare_snmp(Tid, Tab, Key, Types, Snmp, Items, Recs), % May exit
prepare_nodes(Tid, Types, Items, Recs2, normal).
+
+needs_majority(Tab, #prep{majority = M}) ->
+ case lists:keymember(Tab, 1, M) of
+ true ->
+ M;
+ false ->
+ case ?catch_val({Tab, majority}) of
+ {'EXIT', _} ->
+ M;
+ false ->
+ [{Tab, []} | M];
+ true ->
+ CopyHolders = all_copy_holders(Tab),
+ [{Tab, CopyHolders} | M]
+ end
+ end.
+
+all_copy_holders(Tab) ->
+ DC = val({Tab, disc_copies}),
+ DO = val({Tab, disc_only_copies}),
+ RC = val({Tab, ram_copies}),
+ DC ++ DO ++ RC.
+
+have_majority([], _) ->
+ ok;
+have_majority([{Tab, AllNodes} | Rest], Nodes) ->
+ Missing = AllNodes -- Nodes,
+ Present = AllNodes -- Missing,
+ case length(Present) > length(Missing) of
+ true ->
+ have_majority(Rest, Nodes);
+ false ->
+ {error, Tab}
+ end.
+
prepare_snmp(Tab, Key, Items) ->
case val({Tab, snmp}) of
[] ->
@@ -1261,10 +1308,15 @@ prepare_snmp(Tid, Tab, Key, Types, Us, Items, Recs) ->
prepare_nodes(Tid, Types, [{clear_table, Tab}], Recs, snmp)
end.
-check_prep(Prep, Types) when Prep#prep.types == Types ->
+check_prep(#prep{majority = [], types = Types} = Prep, Types) ->
Prep;
-check_prep(Prep, Types) when Prep#prep.types == undefined ->
- Prep#prep{types = Types};
+check_prep(#prep{majority = M, types = undefined} = Prep, Types) ->
+ Protocol = if M == [] ->
+ Prep#prep.protocol;
+ true ->
+ asym_trans
+ end,
+ Prep#prep{protocol = Protocol, types = Types};
check_prep(Prep, _Types) ->
Prep#prep{protocol = asym_trans}.
@@ -1311,7 +1363,7 @@ prepare_node(_Node, _Storage, [], Rec, _Kind) ->
%% multi_commit((Protocol, Tid, CommitRecords, Store)
%% Local work is always performed in users process
-multi_commit(read_only, Tid, CR, _Store) ->
+multi_commit(read_only, _Maj = [], Tid, CR, _Store) ->
%% This featherweight commit protocol is used when no
%% updates has been performed in the transaction.
@@ -1324,7 +1376,7 @@ multi_commit(read_only, Tid, CR, _Store) ->
?MODULE ! {delete_transaction, Tid},
do_commit;
-multi_commit(sym_trans, Tid, CR, Store) ->
+multi_commit(sym_trans, _Maj = [], Tid, CR, Store) ->
%% This lightweight commit protocol is used when all
%% the involved tables are replicated symetrically.
%% Their storage types must match on each node.
@@ -1376,7 +1428,7 @@ multi_commit(sym_trans, Tid, CR, Store) ->
[{tid, Tid}, {outcome, Outcome}]),
Outcome;
-multi_commit(sync_sym_trans, Tid, CR, Store) ->
+multi_commit(sync_sym_trans, _Maj = [], Tid, CR, Store) ->
%% This protocol is the same as sym_trans except that it
%% uses syncronized calls to disk_log and syncronized commits
%% when several nodes are involved.
@@ -1408,7 +1460,7 @@ multi_commit(sync_sym_trans, Tid, CR, Store) ->
[{tid, Tid}, {outcome, Outcome}]),
Outcome;
-multi_commit(asym_trans, Tid, CR, Store) ->
+multi_commit(asym_trans, Majority, Tid, CR, Store) ->
%% This more expensive commit protocol is used when
%% table definitions are changed (schema transactions).
%% It is also used when the involved tables are
@@ -1469,6 +1521,10 @@ multi_commit(asym_trans, Tid, CR, Store) ->
{D2, CR2} = commit_decision(D, CR, [], []),
DiscNs = D2#decision.disc_nodes,
RamNs = D2#decision.ram_nodes,
+ case have_majority(Majority, DiscNs ++ RamNs) of
+ ok -> ok;
+ {error, Tab} -> mnesia:abort({no_majority, Tab})
+ end,
Pending = mnesia_checkpoint:tm_enter_pending(Tid, DiscNs, RamNs),
?ets_insert(Store, Pending),
{WaitFor, Local} = ask_commit(asym_trans, Tid, CR2, DiscNs, RamNs),
--
cgit v1.2.3
From 38eef7af0f8bc1d2d152cefb8df1e1303ddcac45 Mon Sep 17 00:00:00 2001
From: Ulf Wiger
Date: Thu, 9 Dec 2010 18:14:15 +0100
Subject: Write locks now check majority when needed.
Since the table loader also sets (table) write locks, a special
lock type, 'load', was needed. Unfortunately, this affects mnesia
activity callbacks that redefine the lock operation.
---
lib/mnesia/src/mnesia.erl | 6 ++++++
lib/mnesia/src/mnesia_lib.erl | 10 ++++++++++
lib/mnesia/src/mnesia_loader.erl | 2 +-
lib/mnesia/src/mnesia_locker.erl | 33 ++++++++++++++++++++++++++++++++-
lib/mnesia/src/mnesia_schema.erl | 1 +
lib/mnesia/src/mnesia_tm.erl | 12 ++----------
6 files changed, 52 insertions(+), 12 deletions(-)
diff --git a/lib/mnesia/src/mnesia.erl b/lib/mnesia/src/mnesia.erl
index 025b32f506..43953f9ad4 100644
--- a/lib/mnesia/src/mnesia.erl
+++ b/lib/mnesia/src/mnesia.erl
@@ -39,6 +39,7 @@
%% Access within an activity - Lock acquisition
lock/2, lock/4,
+ lock_table/2,
read_lock_table/1,
write_lock_table/1,
@@ -415,6 +416,9 @@ lock(LockItem, LockKind) ->
abort(no_transaction)
end.
+lock_table(Tab, LockKind) ->
+ lock({table, Tab}, LockKind).
+
lock(Tid, Ts, LockItem, LockKind) ->
case element(1, Tid) of
tid ->
@@ -467,6 +471,8 @@ lock_table(Tid, Ts, Tab, LockKind) when is_atom(Tab) ->
mnesia_locker:rlock_table(Tid, Store, Tab);
write ->
mnesia_locker:wlock_table(Tid, Store, Tab);
+ load ->
+ mnesia_locker:load_lock_table(Tid, Store, Tab);
sticky_write ->
mnesia_locker:sticky_wlock_table(Tid, Store, Tab);
none ->
diff --git a/lib/mnesia/src/mnesia_lib.erl b/lib/mnesia/src/mnesia_lib.erl
index 36bcfe8de9..7e926a6258 100644
--- a/lib/mnesia/src/mnesia_lib.erl
+++ b/lib/mnesia/src/mnesia_lib.erl
@@ -96,6 +96,8 @@
exists/1,
fatal/2,
get_node_number/0,
+ have_majority/2,
+ have_majority/3,
fix_error/1,
important/2,
incr_counter/1,
@@ -660,6 +662,14 @@ proc_info(_) -> false.
get_node_number() ->
{node(), self()}.
+have_majority(Tab, HaveNodes) ->
+ have_majority(Tab, val({Tab, all_nodes}), HaveNodes).
+
+have_majority(_Tab, AllNodes, HaveNodes) ->
+ Missing = AllNodes -- HaveNodes,
+ Present = AllNodes -- Missing,
+ length(Present) > length(Missing).
+
read_log_files() ->
[{F, catch file:read_file(F)} || F <- mnesia_log:log_files()].
diff --git a/lib/mnesia/src/mnesia_loader.erl b/lib/mnesia/src/mnesia_loader.erl
index 3de329503e..e785b795d1 100644
--- a/lib/mnesia/src/mnesia_loader.erl
+++ b/lib/mnesia/src/mnesia_loader.erl
@@ -702,7 +702,7 @@ send_table(Pid, Tab, RemoteS) ->
prepare_copy(Pid, Tab, Storage) ->
Trans =
fun() ->
- mnesia:write_lock_table(Tab),
+ mnesia:lock_table(Tab, load),
mnesia_subscr:subscribe(Pid, {table, Tab}),
update_where_to_write(Tab, node(Pid)),
mnesia_lib:db_fixtable(Storage, Tab, true),
diff --git a/lib/mnesia/src/mnesia_locker.erl b/lib/mnesia/src/mnesia_locker.erl
index ca0cc79c45..9822dfd116 100644
--- a/lib/mnesia/src/mnesia_locker.erl
+++ b/lib/mnesia/src/mnesia_locker.erl
@@ -40,7 +40,8 @@
sticky_wlock_table/3,
wlock/3,
wlock_no_exist/4,
- wlock_table/3
+ wlock_table/3,
+ load_lock_table/3
]).
%% sys callback functions
@@ -657,6 +658,7 @@ rwlock(Tid, Store, Oid) ->
case need_lock(Store, Tab, Key, Lock) of
yes ->
Ns = w_nodes(Tab),
+ check_majority(Tab, Ns),
Res = get_rwlocks_on_nodes(Ns, rwlock, Node, Store, Tid, Oid),
?ets_insert(Store, {{locks, Tab, Key}, Lock}),
Res;
@@ -683,6 +685,28 @@ w_nodes(Tab) ->
_ -> mnesia:abort({no_exists, Tab})
end.
+%% If the table has the 'majority' flag set, we can
+%% only take a write lock if we see a majority of the
+%% nodes.
+
+check_majority(true, Tab, HaveNs) ->
+ check_majority(Tab, HaveNs);
+check_majority(false, _, _) ->
+ ok.
+
+check_majority(Tab, HaveNs) ->
+ case ?catch_val({Tab, majority}) of
+ true ->
+ case mnesia_lib:have_majority(Tab, HaveNs) of
+ true ->
+ ok;
+ false ->
+ mnesia:abort({no_majority, Tab})
+ end;
+ _ ->
+ ok
+ end.
+
%% aquire a sticky wlock, a sticky lock is a lock
%% which remains at this node after the termination of the
%% transaction.
@@ -773,10 +797,14 @@ sticky_wlock_table(Tid, Store, Tab) ->
%% local store when we have aquired the lock.
%%
wlock(Tid, Store, Oid) ->
+ wlock(Tid, Store, Oid, _CheckMajority = true).
+
+wlock(Tid, Store, Oid, CheckMajority) ->
{Tab, Key} = Oid,
case need_lock(Store, Tab, Key, write) of
yes ->
Ns = w_nodes(Tab),
+ check_majority(CheckMajority, Tab, Ns),
Op = {self(), {write, Tid, Oid}},
?ets_insert(Store, {{locks, Tab, Key}, write}),
get_wlocks_on_nodes(Ns, Ns, Store, Op, Oid);
@@ -789,6 +817,9 @@ wlock(Tid, Store, Oid) ->
wlock_table(Tid, Store, Tab) ->
wlock(Tid, Store, {Tab, ?ALL}).
+load_lock_table(Tid, Store, Tab) ->
+ wlock(Tid, Store, {Tab, ?ALL}, _CheckMajority = false).
+
%% Write lock even if the table does not exist
wlock_no_exist(Tid, Store, Tab, Ns) ->
diff --git a/lib/mnesia/src/mnesia_schema.erl b/lib/mnesia/src/mnesia_schema.erl
index a6c8ffec01..b3f06322d9 100644
--- a/lib/mnesia/src/mnesia_schema.erl
+++ b/lib/mnesia/src/mnesia_schema.erl
@@ -179,6 +179,7 @@ do_set_schema(Tab, Cs) ->
set({Tab, load_order}, Cs#cstruct.load_order),
set({Tab, access_mode}, Cs#cstruct.access_mode),
set({Tab, majority}, Cs#cstruct.majority),
+ set({Tab, all_nodes}, mnesia_lib:cs_to_nodes(Cs)),
set({Tab, snmp}, Cs#cstruct.snmp),
set({Tab, user_properties}, Cs#cstruct.user_properties),
[set({Tab, user_property, element(1, P)}, P) || P <- Cs#cstruct.user_properties],
diff --git a/lib/mnesia/src/mnesia_tm.erl b/lib/mnesia/src/mnesia_tm.erl
index c7a0c28589..0b87c40add 100644
--- a/lib/mnesia/src/mnesia_tm.erl
+++ b/lib/mnesia/src/mnesia_tm.erl
@@ -1259,23 +1259,15 @@ needs_majority(Tab, #prep{majority = M}) ->
false ->
[{Tab, []} | M];
true ->
- CopyHolders = all_copy_holders(Tab),
+ CopyHolders = val({Tab, all_nodes}),
[{Tab, CopyHolders} | M]
end
end.
-all_copy_holders(Tab) ->
- DC = val({Tab, disc_copies}),
- DO = val({Tab, disc_only_copies}),
- RC = val({Tab, ram_copies}),
- DC ++ DO ++ RC.
-
have_majority([], _) ->
ok;
have_majority([{Tab, AllNodes} | Rest], Nodes) ->
- Missing = AllNodes -- Nodes,
- Present = AllNodes -- Missing,
- case length(Present) > length(Missing) of
+ case mnesia_lib:have_majority(Tab, AllNodes, Nodes) of
true ->
have_majority(Rest, Nodes);
false ->
--
cgit v1.2.3
From 5309d521c394aa7ca1feb679dbda6333a3cb4873 Mon Sep 17 00:00:00 2001
From: Ulf Wiger
Date: Mon, 13 Dec 2010 08:08:46 +0100
Subject: check majority for sticky locks
---
lib/mnesia/src/mnesia_locker.erl | 18 ++++++++++++++++++
1 file changed, 18 insertions(+)
diff --git a/lib/mnesia/src/mnesia_locker.erl b/lib/mnesia/src/mnesia_locker.erl
index 9822dfd116..2025a2ab37 100644
--- a/lib/mnesia/src/mnesia_locker.erl
+++ b/lib/mnesia/src/mnesia_locker.erl
@@ -689,6 +689,7 @@ w_nodes(Tab) ->
%% only take a write lock if we see a majority of the
%% nodes.
+
check_majority(true, Tab, HaveNs) ->
check_majority(Tab, HaveNs);
check_majority(false, _, _) ->
@@ -732,6 +733,7 @@ sticky_lock(Tid, Store, {Tab, Key} = Oid, Lock) ->
end.
do_sticky_lock(Tid, Store, {Tab, Key} = Oid, Lock) ->
+ sticky_check_majority(Lock, Tab),
?MODULE ! {self(), {test_set_sticky, Tid, Oid, Lock}},
N = node(),
receive
@@ -761,6 +763,22 @@ do_sticky_lock(Tid, Store, {Tab, Key} = Oid, Lock) ->
dirty_sticky_lock(Tab, Key, [N], Lock)
end.
+sticky_check_majority(read, _) ->
+ ok;
+sticky_check_majority(write, Tab) ->
+ case ?catch_val({Tab, majority}) of
+ true ->
+ HaveNodes = val({Tab, where_to_write}),
+ case mnesia_lib:have_majority(Tab, HaveNodes) of
+ true ->
+ ok;
+ false ->
+ mnesia:abort({no_majority, Tab})
+ end;
+ _ ->
+ ok
+ end.
+
not_stuck(Tid, Store, Tab, _Key, Oid, _Lock, N) ->
rlock(Tid, Store, {Tab, ?ALL}), %% needed?
wlock(Tid, Store, Oid), %% perfect sync
--
cgit v1.2.3
From b10094bc2f120ceda0e927d31067670e26a0a4d7 Mon Sep 17 00:00:00 2001
From: Ulf Wiger
Date: Tue, 14 Dec 2010 19:27:26 +0100
Subject: optimize sticky_lock maj. check
---
lib/mnesia/src/mnesia_locker.erl | 14 +++++++-------
1 file changed, 7 insertions(+), 7 deletions(-)
diff --git a/lib/mnesia/src/mnesia_locker.erl b/lib/mnesia/src/mnesia_locker.erl
index 2025a2ab37..635668ff59 100644
--- a/lib/mnesia/src/mnesia_locker.erl
+++ b/lib/mnesia/src/mnesia_locker.erl
@@ -733,13 +733,14 @@ sticky_lock(Tid, Store, {Tab, Key} = Oid, Lock) ->
end.
do_sticky_lock(Tid, Store, {Tab, Key} = Oid, Lock) ->
- sticky_check_majority(Lock, Tab),
+ WNodes = w_nodes(Tab),
+ sticky_check_majority(Lock, Tab, WNodes),
?MODULE ! {self(), {test_set_sticky, Tid, Oid, Lock}},
N = node(),
receive
{?MODULE, N, granted} ->
?ets_insert(Store, {{locks, Tab, Key}, write}),
- [?ets_insert(Store, {nodes, Node}) || Node <- w_nodes(Tab)],
+ [?ets_insert(Store, {nodes, Node}) || Node <- WNodes],
granted;
{?MODULE, N, {granted, Val}} -> %% for rwlocks
case opt_lookup_in_client(Val, Oid, write) of
@@ -747,7 +748,7 @@ do_sticky_lock(Tid, Store, {Tab, Key} = Oid, Lock) ->
exit({aborted, C});
Val2 ->
?ets_insert(Store, {{locks, Tab, Key}, write}),
- [?ets_insert(Store, {nodes, Node}) || Node <- w_nodes(Tab)],
+ [?ets_insert(Store, {nodes, Node}) || Node <- WNodes],
Val2
end;
{?MODULE, N, {not_granted, Reason}} ->
@@ -763,13 +764,12 @@ do_sticky_lock(Tid, Store, {Tab, Key} = Oid, Lock) ->
dirty_sticky_lock(Tab, Key, [N], Lock)
end.
-sticky_check_majority(read, _) ->
+sticky_check_majority(read, _, _) ->
ok;
-sticky_check_majority(write, Tab) ->
+sticky_check_majority(write, Tab, WNodes) ->
case ?catch_val({Tab, majority}) of
true ->
- HaveNodes = val({Tab, where_to_write}),
- case mnesia_lib:have_majority(Tab, HaveNodes) of
+ case mnesia_lib:have_majority(Tab, WNodes) of
true ->
ok;
false ->
--
cgit v1.2.3
From ca1412a4f614942fcfe6601c3e8b5d6d2df153f7 Mon Sep 17 00:00:00 2001
From: Ulf Wiger
Date: Thu, 27 Jan 2011 19:33:34 +0100
Subject: bug in mnesia_tm:needs_majority/2
---
lib/mnesia/src/mnesia_tm.erl | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/lib/mnesia/src/mnesia_tm.erl b/lib/mnesia/src/mnesia_tm.erl
index 0b87c40add..01abc14a81 100644
--- a/lib/mnesia/src/mnesia_tm.erl
+++ b/lib/mnesia/src/mnesia_tm.erl
@@ -1257,7 +1257,7 @@ needs_majority(Tab, #prep{majority = M}) ->
{'EXIT', _} ->
M;
false ->
- [{Tab, []} | M];
+ M;
true ->
CopyHolders = val({Tab, all_nodes}),
[{Tab, CopyHolders} | M]
--
cgit v1.2.3
From 54235590360d5f6d9a390ec3f0ac7e3f4e603c42 Mon Sep 17 00:00:00 2001
From: Ulf Wiger
Date: Sun, 30 Jan 2011 19:11:21 +0100
Subject: where_to_wlock optimization + change_table_majority/2
---
lib/mnesia/src/mnesia.erl | 5 +++-
lib/mnesia/src/mnesia_controller.erl | 26 ++++++++++++++++---
lib/mnesia/src/mnesia_dumper.erl | 8 ++++++
lib/mnesia/src/mnesia_locker.erl | 48 +++++++++++++++++-------------------
lib/mnesia/src/mnesia_schema.erl | 38 ++++++++++++++++++++++++++++
5 files changed, 96 insertions(+), 29 deletions(-)
diff --git a/lib/mnesia/src/mnesia.erl b/lib/mnesia/src/mnesia.erl
index 43953f9ad4..980a9c6213 100644
--- a/lib/mnesia/src/mnesia.erl
+++ b/lib/mnesia/src/mnesia.erl
@@ -93,7 +93,7 @@
add_table_copy/3, del_table_copy/2, move_table_copy/3,
add_table_index/2, del_table_index/2,
transform_table/3, transform_table/4,
- change_table_copy_type/3,
+ change_table_copy_type/3, change_table_majority/2,
read_table_property/2, write_table_property/2, delete_table_property/2,
change_table_frag/2,
clear_table/1, clear_table/4,
@@ -2461,6 +2461,9 @@ change_table_access_mode(T, Access) ->
change_table_load_order(T, O) ->
mnesia_schema:change_table_load_order(T, O).
+change_table_majority(T, M) ->
+ mnesia_schema:change_table_majority(T, M).
+
set_master_nodes(Nodes) when is_list(Nodes) ->
UseDir = system_info(use_dir),
IsRunning = system_info(is_running),
diff --git a/lib/mnesia/src/mnesia_controller.erl b/lib/mnesia/src/mnesia_controller.erl
index 0254769758..d4b2c7b5cc 100644
--- a/lib/mnesia/src/mnesia_controller.erl
+++ b/lib/mnesia/src/mnesia_controller.erl
@@ -72,6 +72,7 @@
add_active_replica/4,
update/1,
change_table_access_mode/1,
+ change_table_majority/1,
del_active_replica/2,
wait_for_tables/2,
get_network_copy/2,
@@ -690,7 +691,8 @@ handle_call({update_where_to_write, [add, Tab, AddNode], _From}, _Dummy, State)
case lists:member(AddNode, Current) and
(State#state.schema_is_merged == true) of
true ->
- mnesia_lib:add_lsort({Tab, where_to_write}, AddNode);
+ mnesia_lib:add_lsort({Tab, where_to_write}, AddNode),
+ update_where_to_wlock(Tab);
false ->
ignore
end,
@@ -1690,6 +1692,8 @@ add_active_replica(Tab, Node, Storage, AccessMode) ->
set(Var, mark_blocked_tab(Blocked, Del)),
mnesia_lib:del({Tab, where_to_write}, Node)
end,
+
+ update_where_to_wlock(Tab),
add({Tab, active_replicas}, Node).
del_active_replica(Tab, Node) ->
@@ -1699,7 +1703,8 @@ del_active_replica(Tab, Node) ->
New = lists:sort(Del),
set(Var, mark_blocked_tab(Blocked, New)), % where_to_commit
mnesia_lib:del({Tab, active_replicas}, Node),
- mnesia_lib:del({Tab, where_to_write}, Node).
+ mnesia_lib:del({Tab, where_to_write}, Node),
+ update_where_to_wlock(Tab).
change_table_access_mode(Cs) ->
W = fun() ->
@@ -1708,7 +1713,22 @@ change_table_access_mode(Cs) ->
val({Tab, active_replicas}))
end,
update(W).
-
+
+change_table_majority(Cs) ->
+ W = fun() ->
+ Tab = Cs#cstruct.name,
+ set({Tab, majority}, Cs#cstruct.majority),
+ update_where_to_wlock(Tab)
+ end,
+ update(W).
+
+update_where_to_wlock(Tab) ->
+ WNodes = val({Tab, where_to_write}),
+ Majority = case catch val({Tab, majority}) of
+ true -> true;
+ _ -> false
+ end,
+ set({Tab, where_to_wlock}, {WNodes, Majority}).
%% node To now has tab loaded, but this must be undone
%% This code is rpc:call'ed from the tab_copier process
diff --git a/lib/mnesia/src/mnesia_dumper.erl b/lib/mnesia/src/mnesia_dumper.erl
index 644133cf5d..9a0a2c4dcc 100644
--- a/lib/mnesia/src/mnesia_dumper.erl
+++ b/lib/mnesia/src/mnesia_dumper.erl
@@ -896,6 +896,14 @@ insert_op(Tid, _, {op, change_table_access_mode,TabDef, _OldAccess, _Access}, In
end,
insert_cstruct(Tid, Cs, true, InPlace, InitBy);
+insert_op(Tid, _, {op, change_table_majority,TabDef, _OldAccess, _Access}, InPlace, InitBy) ->
+ Cs = mnesia_schema:list2cs(TabDef),
+ case InitBy of
+ startup -> ignore;
+ _ -> mnesia_controller:change_table_majority(Cs)
+ end,
+ insert_cstruct(Tid, Cs, true, InPlace, InitBy);
+
insert_op(Tid, _, {op, change_table_load_order, TabDef, _OldLevel, _Level}, InPlace, InitBy) ->
Cs = mnesia_schema:list2cs(TabDef),
insert_cstruct(Tid, Cs, true, InPlace, InitBy);
diff --git a/lib/mnesia/src/mnesia_locker.erl b/lib/mnesia/src/mnesia_locker.erl
index 635668ff59..0492d794f3 100644
--- a/lib/mnesia/src/mnesia_locker.erl
+++ b/lib/mnesia/src/mnesia_locker.erl
@@ -657,17 +657,17 @@ rwlock(Tid, Store, Oid) ->
Lock = write,
case need_lock(Store, Tab, Key, Lock) of
yes ->
- Ns = w_nodes(Tab),
- check_majority(Tab, Ns),
+ {Ns, Majority} = w_nodes(Tab),
+ check_majority(Majority, Tab, Ns),
Res = get_rwlocks_on_nodes(Ns, rwlock, Node, Store, Tid, Oid),
?ets_insert(Store, {{locks, Tab, Key}, Lock}),
Res;
no ->
if
Key == ?ALL ->
- w_nodes(Tab);
+ element(2, w_nodes(Tab));
Tab == ?GLOBAL ->
- w_nodes(Tab);
+ element(2, w_nodes(Tab));
true ->
dirty_rpc(Node, Tab, Key, Lock)
end
@@ -679,9 +679,8 @@ rwlock(Tid, Store, Oid) ->
%% in the local store under the key == nodes
w_nodes(Tab) ->
- Nodes = ?catch_val({Tab, where_to_write}),
- case Nodes of
- [_ | _] -> Nodes;
+ case ?catch_val({Tab, where_to_wlock}) of
+ {[_ | _], _} = Where -> Where;
_ -> mnesia:abort({no_exists, Tab})
end.
@@ -733,8 +732,8 @@ sticky_lock(Tid, Store, {Tab, Key} = Oid, Lock) ->
end.
do_sticky_lock(Tid, Store, {Tab, Key} = Oid, Lock) ->
- WNodes = w_nodes(Tab),
- sticky_check_majority(Lock, Tab, WNodes),
+ {WNodes, Majority} = w_nodes(Tab),
+ sticky_check_majority(Lock, Tab, Majority, WNodes),
?MODULE ! {self(), {test_set_sticky, Tid, Oid, Lock}},
N = node(),
receive
@@ -764,20 +763,15 @@ do_sticky_lock(Tid, Store, {Tab, Key} = Oid, Lock) ->
dirty_sticky_lock(Tab, Key, [N], Lock)
end.
-sticky_check_majority(read, _, _) ->
- ok;
-sticky_check_majority(write, Tab, WNodes) ->
- case ?catch_val({Tab, majority}) of
+sticky_check_majority(W, Tab, true, WNodes) when W==write; W==read_write ->
+ case mnesia_lib:have_majority(Tab, WNodes) of
true ->
- case mnesia_lib:have_majority(Tab, WNodes) of
- true ->
- ok;
- false ->
- mnesia:abort({no_majority, Tab})
- end;
- _ ->
- ok
- end.
+ ok;
+ false ->
+ mnesia:abort({no_majority, Tab})
+ end;
+sticky_check_majority(_, _, _, _) ->
+ ok.
not_stuck(Tid, Store, Tab, _Key, Oid, _Lock, N) ->
rlock(Tid, Store, {Tab, ?ALL}), %% needed?
@@ -821,15 +815,19 @@ wlock(Tid, Store, Oid, CheckMajority) ->
{Tab, Key} = Oid,
case need_lock(Store, Tab, Key, write) of
yes ->
- Ns = w_nodes(Tab),
- check_majority(CheckMajority, Tab, Ns),
+ {Ns, Majority} = w_nodes(Tab),
+ if CheckMajority ->
+ check_majority(Majority, Tab, Ns);
+ true ->
+ ignore
+ end,
Op = {self(), {write, Tid, Oid}},
?ets_insert(Store, {{locks, Tab, Key}, write}),
get_wlocks_on_nodes(Ns, Ns, Store, Op, Oid);
no when Key /= ?ALL, Tab /= ?GLOBAL ->
[];
no ->
- w_nodes(Tab)
+ element(2, w_nodes(Tab))
end.
wlock_table(Tid, Store, Tab) ->
diff --git a/lib/mnesia/src/mnesia_schema.erl b/lib/mnesia/src/mnesia_schema.erl
index b3f06322d9..360da8a17d 100644
--- a/lib/mnesia/src/mnesia_schema.erl
+++ b/lib/mnesia/src/mnesia_schema.erl
@@ -37,6 +37,7 @@
change_table_copy_type/3,
change_table_access_mode/2,
change_table_load_order/2,
+ change_table_majority/2,
change_table_frag/2,
clear_table/1,
create_table/1,
@@ -1508,6 +1509,43 @@ make_change_table_load_order(Tab, LoadOrder) ->
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+change_table_majority(Tab, Majority) when is_boolean(Majority) ->
+ schema_transaction(fun() -> do_change_table_majority(Tab, Majority) end).
+
+do_change_table_majority(schema, _Majority) ->
+ mnesia:abort({bad_type, schema});
+do_change_table_majority(Tab, Majority) ->
+ TidTs = get_tid_ts_and_lock(schema, write),
+ get_tid_ts_and_lock(Tab, none),
+ insert_schema_ops(TidTs, make_change_table_majority(Tab, Majority)).
+
+make_change_table_majority(Tab, Majority) ->
+ ensure_writable(schema),
+ Cs = incr_version(val({Tab, cstruct})),
+ ensure_active(Cs),
+ OldMajority = Cs#cstruct.majority,
+ Cs2 = Cs#cstruct{majority = Majority},
+ FragOps = case lists:keyfind(base_table, 1, Cs#cstruct.frag_properties) of
+ {_, Tab} ->
+ FragNames = mnesia_frag:frag_names(Tab) -- [Tab],
+ lists:map(
+ fun(T) ->
+ get_tid_ts_and_lock(Tab, none),
+ CsT = incr_version(val({T, cstruct})),
+ ensure_active(CsT),
+ CsT2 = CsT#cstruct{majority = Majority},
+ verify_cstruct(CsT2),
+ {op, change_table_majority, cs2list(CsT2),
+ OldMajority, Majority}
+ end, FragNames);
+ false -> [];
+ {_, _} -> mnesia:abort({bad_type, Tab})
+ end,
+ verify_cstruct(Cs2),
+ [{op, change_table_majority, cs2list(Cs2), OldMajority, Majority} | FragOps].
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
write_table_property(Tab, Prop) when is_tuple(Prop), size(Prop) >= 1 ->
schema_transaction(fun() -> do_write_table_property(Tab, Prop) end);
write_table_property(Tab, Prop) ->
--
cgit v1.2.3
From 4eacd6dcc0ffd28c4d76507212b87b75b249daca Mon Sep 17 00:00:00 2001
From: Ulf Wiger
Date: Sun, 30 Jan 2011 20:18:35 +0100
Subject: add mnesia_majority_test suite
---
lib/mnesia/test/Makefile | 1 +
lib/mnesia/test/mnesia_SUITE.erl | 99 ++++++++++++++++
lib/mnesia/test/mnesia_majority_test.erl | 188 +++++++++++++++++++++++++++++++
3 files changed, 288 insertions(+)
create mode 100644 lib/mnesia/test/mnesia_majority_test.erl
diff --git a/lib/mnesia/test/Makefile b/lib/mnesia/test/Makefile
index b165924ef2..ae4c9626c7 100644
--- a/lib/mnesia/test/Makefile
+++ b/lib/mnesia/test/Makefile
@@ -42,6 +42,7 @@ MODULES= \
mnesia_dirty_access_test \
mnesia_atomicity_test \
mnesia_consistency_test \
+ mnesia_majority_test \
mnesia_isolation_test \
mnesia_durability_test \
mnesia_recovery_test \
diff --git a/lib/mnesia/test/mnesia_SUITE.erl b/lib/mnesia/test/mnesia_SUITE.erl
index 8ba8427213..dc8f216c1c 100644
--- a/lib/mnesia/test/mnesia_SUITE.erl
+++ b/lib/mnesia/test/mnesia_SUITE.erl
@@ -142,6 +142,105 @@ silly() ->
mnesia_install_test:silly().
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+light(doc) ->
+ ["The 'light' test suite runs a selected set of test suites and is",
+ "intended to be the smallest test suite that is meaningful",
+ "to run. It starts with an installation test (which in essence is the",
+ "'silly' test case) and then it covers all functions in the API in",
+ "various depths. All configuration parameters and examples are also",
+ "covered."];
+light(suite) ->
+ [
+ install,
+ nice,
+ evil,
+ {mnesia_frag_test, light},
+ qlc,
+ registry,
+ config,
+ examples
+ ].
+
+install(suite) ->
+ [{mnesia_install_test, all}].
+
+nice(suite) ->
+ [{mnesia_nice_coverage_test, all}].
+
+evil(suite) ->
+ [{mnesia_evil_coverage_test, all}].
+
+qlc(suite) ->
+ [{mnesia_qlc_test, all}].
+
+registry(suite) ->
+ [{mnesia_registry_test, all}].
+
+config(suite) ->
+ [{mnesia_config_test, all}].
+
+examples(suite) ->
+ [{mnesia_examples_test, all}].
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+medium(doc) ->
+ ["The 'medium' test suite verfies the ACID (atomicity, consistency",
+ "isolation and durability) properties and various recovery scenarios",
+ "These tests may take quite while to run."];
+medium(suite) ->
+ [
+ install,
+ atomicity,
+ isolation,
+ durability,
+ recovery,
+ consistency,
+ majority,
+ {mnesia_frag_test, medium}
+ ].
+
+atomicity(suite) ->
+ [{mnesia_atomicity_test, all}].
+
+isolation(suite) ->
+ [{mnesia_isolation_test, all}].
+
+durability(suite) ->
+ [{mnesia_durability_test, all}].
+
+recovery(suite) ->
+ [{mnesia_recovery_test, all}].
+
+consistency(suite) ->
+ [{mnesia_consistency_test, all}].
+
+majority(suite) ->
+ [{mnesia_majority_test, all}].
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+heavy(doc) ->
+ ["The 'heavy' test suite runs some resource consuming tests and",
+ "benchmarks"];
+heavy(suite) ->
+ [measure].
+
+measure(suite) ->
+ [{mnesia_measure_test, all}].
+
+prediction(suite) ->
+ [{mnesia_measure_test, prediction}].
+
+fairness(suite) ->
+ [{mnesia_measure_test, fairness}].
+
+benchmarks(suite) ->
+ [{mnesia_measure_test, benchmarks}].
+
+consumption(suite) ->
+ [{mnesia_measure_test, consumption}].
+
+scalability(suite) ->
+ [{mnesia_measure_test, scalability}].
clean_up_suite(doc) -> ["Not a test case only kills mnesia and nodes, that where"
"started during the tests"];
diff --git a/lib/mnesia/test/mnesia_majority_test.erl b/lib/mnesia/test/mnesia_majority_test.erl
new file mode 100644
index 0000000000..17d1d8bcdd
--- /dev/null
+++ b/lib/mnesia/test/mnesia_majority_test.erl
@@ -0,0 +1,188 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 1996-2009. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%
+
+%%
+-module(mnesia_majority_test).
+-author('ulf.wiger@erlang-solutions.com').
+-compile(export_all).
+-include("mnesia_test_lib.hrl").
+
+init_per_testcase(Func, Conf) ->
+ mnesia_test_lib:init_per_testcase(Func, Conf).
+
+fin_per_testcase(Func, Conf) ->
+ mnesia_test_lib:fin_per_testcase(Func, Conf).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+all(doc) ->
+ ["Verify that majority checking works"];
+all(suite) ->
+ [
+ write
+ , wread
+ , delete
+ , clear_table
+ , frag
+ , change_majority
+ , frag_change_majority
+ ].
+
+write(suite) -> [];
+write(Config) when is_list(Config) ->
+ [N1, N2, N3] = Nodes = ?acquire_nodes(3, Config),
+ Tab = t,
+ Schema = [{name, Tab}, {ram_copies, [N1,N2,N3]}, {majority,true}],
+ ?match({atomic, ok}, mnesia:create_table(Schema)),
+ ?match({[ok,ok,ok],[]},
+ rpc:multicall([N1,N2,N3], mnesia, wait_for_tables, [[Tab], 3000])),
+ ?match({atomic,ok},
+ mnesia:transaction(fun() -> mnesia:write({t,1,a}) end)),
+ mnesia_test_lib:kill_mnesia([N3]),
+ ?match({atomic,ok},
+ mnesia:transaction(fun() -> mnesia:write({t,1,a}) end)),
+ mnesia_test_lib:kill_mnesia([N2]),
+ ?match({aborted,{no_majority,Tab}},
+ mnesia:transaction(fun() -> mnesia:write({t,1,a}) end)).
+
+wread(suite) -> [];
+wread(Config) when is_list(Config) ->
+ [N1, N2] = Nodes = ?acquire_nodes(2, Config),
+ Tab = t,
+ Schema = [{name, Tab}, {ram_copies, [N1,N2]}, {majority,true}],
+ ?match({atomic, ok}, mnesia:create_table(Schema)),
+ ?match({[ok,ok],[]},
+ rpc:multicall([N1,N2], mnesia, wait_for_tables, [[Tab], 3000])),
+ ?match({atomic,[]},
+ mnesia:transaction(fun() -> mnesia:read(t,1,write) end)),
+ mnesia_test_lib:kill_mnesia([N2]),
+ ?match({aborted,{no_majority,Tab}},
+ mnesia:transaction(fun() -> mnesia:read(t,1,write) end)).
+
+delete(suite) -> [];
+delete(Config) when is_list(Config) ->
+ [N1, N2] = Nodes = ?acquire_nodes(2, Config),
+ Tab = t,
+ Schema = [{name, Tab}, {ram_copies, [N1,N2]}, {majority,true}],
+ ?match({atomic, ok}, mnesia:create_table(Schema)),
+ ?match({[ok,ok],[]},
+ rpc:multicall([N1,N2], mnesia, wait_for_tables, [[Tab], 3000])),
+ %% works as expected with majority of nodes present
+ ?match({atomic,ok},
+ mnesia:transaction(fun() -> mnesia:write({t,1,a}) end)),
+ ?match({atomic,ok},
+ mnesia:transaction(fun() -> mnesia:delete({t,1}) end)),
+ ?match({atomic,[]},
+ mnesia:transaction(fun() -> mnesia:read({t,1}) end)),
+ %% put the record back
+ ?match({atomic,ok},
+ mnesia:transaction(fun() -> mnesia:write({t,1,a}) end)),
+ ?match({atomic,[{t,1,a}]},
+ mnesia:transaction(fun() -> mnesia:read({t,1}) end)),
+ mnesia_test_lib:kill_mnesia([N2]),
+ ?match({aborted,{no_majority,Tab}},
+ mnesia:transaction(fun() -> mnesia:delete({t,1}) end)).
+
+clear_table(suite) -> [];
+clear_table(Config) when is_list(Config) ->
+ [N1, N2] = Nodes = ?acquire_nodes(2, Config),
+ Tab = t,
+ Schema = [{name, Tab}, {ram_copies, [N1,N2]}, {majority,true}],
+ ?match({atomic, ok}, mnesia:create_table(Schema)),
+ ?match({[ok,ok],[]},
+ rpc:multicall([N1,N2], mnesia, wait_for_tables, [[Tab], 3000])),
+ %% works as expected with majority of nodes present
+ ?match({atomic,ok},
+ mnesia:transaction(fun() -> mnesia:write({t,1,a}) end)),
+ ?match({atomic,ok}, mnesia:clear_table(t)),
+ ?match({atomic,[]},
+ mnesia:transaction(fun() -> mnesia:read({t,1}) end)),
+ %% put the record back
+ ?match({atomic,ok},
+ mnesia:transaction(fun() -> mnesia:write({t,1,a}) end)),
+ ?match({atomic,[{t,1,a}]},
+ mnesia:transaction(fun() -> mnesia:read({t,1}) end)),
+ mnesia_test_lib:kill_mnesia([N2]),
+ ?match({aborted,{no_majority,Tab}}, mnesia:clear_table(t)).
+
+frag(suite) -> [];
+frag(Config) when is_list(Config) ->
+ [N1] = Nodes = ?acquire_nodes(1, Config),
+ Tab = t,
+ Schema = [
+ {name, Tab}, {ram_copies, [N1]},
+ {majority,true},
+ {frag_properties, [{n_fragments, 2}]}
+ ],
+ ?match({atomic, ok}, mnesia:create_table(Schema)),
+ ?match(true, mnesia:table_info(t, majority)),
+ ?match(true, mnesia:table_info(t_frag2, majority)).
+
+change_majority(suite) -> [];
+change_majority(Config) when is_list(Config) ->
+ [N1,N2] = Nodes = ?acquire_nodes(2, Config),
+ Tab = t,
+ Schema = [
+ {name, Tab}, {ram_copies, [N1,N2]},
+ {majority,false}
+ ],
+ ?match({atomic, ok}, mnesia:create_table(Schema)),
+ ?match(false, mnesia:table_info(t, majority)),
+ ?match({atomic, ok},
+ mnesia:change_table_majority(t, true)),
+ ?match(true, mnesia:table_info(t, majority)),
+ ?match(ok,
+ mnesia:activity(transaction, fun() ->
+ mnesia:write({t,1,a})
+ end)),
+ mnesia_test_lib:kill_mnesia([N2]),
+ ?match({'EXIT',{aborted,{no_majority,_}}},
+ mnesia:activity(transaction, fun() ->
+ mnesia:write({t,1,a})
+ end)).
+
+frag_change_majority(suite) -> [];
+frag_change_majority(Config) when is_list(Config) ->
+ [N1,N2] = Nodes = ?acquire_nodes(2, Config),
+ Tab = t,
+ Schema = [
+ {name, Tab}, {ram_copies, [N1,N2]},
+ {majority,false},
+ {frag_properties,
+ [{n_fragments, 2},
+ {n_ram_copies, 2},
+ {node_pool, [N1,N2]}]}
+ ],
+ ?match({atomic, ok}, mnesia:create_table(Schema)),
+ ?match(false, mnesia:table_info(t, majority)),
+ ?match(false, mnesia:table_info(t_frag2, majority)),
+ ?match({aborted,{bad_type,t_frag2}},
+ mnesia:change_table_majority(t_frag2, true)),
+ ?match({atomic, ok},
+ mnesia:change_table_majority(t, true)),
+ ?match(true, mnesia:table_info(t, majority)),
+ ?match(true, mnesia:table_info(t_frag2, majority)),
+ ?match(ok,
+ mnesia:activity(transaction, fun() ->
+ mnesia:write({t,1,a})
+ end, mnesia_frag)),
+ mnesia_test_lib:kill_mnesia([N2]),
+ ?match({'EXIT',{aborted,{no_majority,_}}},
+ mnesia:activity(transaction, fun() ->
+ mnesia:write({t,1,a})
+ end, mnesia_frag)).
--
cgit v1.2.3
From 497f4b2215e2fc1b6a10c3f96c71b90dbb529eae Mon Sep 17 00:00:00 2001
From: Ulf Wiger
Date: Sun, 30 Jan 2011 20:19:48 +0100
Subject: Add documentation text about majority checking
---
lib/mnesia/doc/src/Mnesia_chap7.xmlsrc | 7 +++++++
lib/mnesia/doc/src/mnesia.xml | 32 +++++++++++++++++++++++++++++++-
2 files changed, 38 insertions(+), 1 deletion(-)
diff --git a/lib/mnesia/doc/src/Mnesia_chap7.xmlsrc b/lib/mnesia/doc/src/Mnesia_chap7.xmlsrc
index 7078499fbf..21174340d1 100644
--- a/lib/mnesia/doc/src/Mnesia_chap7.xmlsrc
+++ b/lib/mnesia/doc/src/Mnesia_chap7.xmlsrc
@@ -473,6 +473,13 @@ dets:close(N).
mnesia:table_info(Tab, master_nodes) may be used to
obtain information about the potential master nodes.
+ Determining which data to keep after communication failure is outside
+ the scope of Mnesia. One approach would be to determine which "island"
+ contains a majority of the nodes. Using the {majority,true} option
+ for critical tables can be a way of ensuring that nodes that are not part
+ of a "majority island" are not able to update those tables. Note that this
+ constitutes a reduction in service on the minority nodes. This would be
+ a tradeoff in favour of higher consistency guarantees.
The function mnesia:force_load_table(Tab) may be used to
force load the table regardless of which table load mechanism
is activated.
diff --git a/lib/mnesia/doc/src/mnesia.xml b/lib/mnesia/doc/src/mnesia.xml
index 16e78ea0af..2a2c7d3a9f 100644
--- a/lib/mnesia/doc/src/mnesia.xml
+++ b/lib/mnesia/doc/src/mnesia.xml
@@ -160,6 +160,14 @@ If a new item is inserted with the same key as
behavior. The default is false.
+ -
+
majority This attribute can be either true or
+ false (default is false). When true, a majority
+ of the table replicas must be available for an update to succeed.
+ Majority checking can be enabled on tables with mission-critical data,
+ where it is vital to avoid inconsistencies due to network splits.
+
+
-
snmp Each (set based) Mnesia table can be
automatically turned into an SNMP ordered table as well.
@@ -649,6 +657,17 @@ mnesia:change_table_copy_type(person, node(), disc_copies)
LoadOrder priority will be loaded first at startup.
+
+ change_table_majority(Tab, Majority) -> {aborted, R} | {atomic, ok}
+ Change the majority check setting for the table.
+
+ Majority must be a boolean; the default is false.
+ When true, a majority of the table's replicas must be available
+ for an update to succeed. When used on fragmented tables, Tab
+ must be the name base table. Directly changing the majority setting on
+ individual fragments is not allowed.
+
+
clear_table(Tab) -> {aborted, R} | {atomic, ok}
Deletes all entries in a table.
@@ -753,6 +772,14 @@ mnesia:change_table_copy_type(person, node(), disc_copies)
priority will be loaded first at startup.
+ -
+
{majority, Flag}, where Flag must be a boolean.
+ If true, any (non-dirty) update to the table will abort unless
+ a majority of the table's replicas are available for the commit.
+ When used on a fragmented table, all fragments will be given
+ the same majority setting.
+
+
-
{ram_copies, Nodelist}, where
Nodelist is a list of the nodes where this table
@@ -1737,7 +1764,10 @@ mnesia:create_table(person,
write and sticky_write are supported.
If the user wants to update the record it is more efficient to
- use write/sticky_write as the LockKind.
+ use write/sticky_write as the LockKind. If majority checking
+ is active on the table, it will be checked as soon as a write lock is
+ attempted. This can be used to quickly abort if the majority condition
+ isn't met.
--
cgit v1.2.3
From 8d7963d76fb651f6cb250033a0a5a643abd01389 Mon Sep 17 00:00:00 2001
From: Ulf Wiger
Date: Fri, 13 May 2011 18:39:41 +0200
Subject: dialyzer warning on mnesia_tm
---
lib/mnesia/src/mnesia_tm.erl | 4 +---
1 file changed, 1 insertion(+), 3 deletions(-)
diff --git a/lib/mnesia/src/mnesia_tm.erl b/lib/mnesia/src/mnesia_tm.erl
index 01abc14a81..f62f7cb7c8 100644
--- a/lib/mnesia/src/mnesia_tm.erl
+++ b/lib/mnesia/src/mnesia_tm.erl
@@ -1122,9 +1122,7 @@ t_commit(Type) ->
end.
majority_attr(#prep{majority = M}) ->
- M;
-majority_attr(_) ->
- [].
+ M.
%% This function arranges for all objects we shall write in S to be
--
cgit v1.2.3