aboutsummaryrefslogtreecommitdiffstats
path: root/lib/mnesia
diff options
context:
space:
mode:
authorHenrik Nord <[email protected]>2011-05-17 11:57:11 +0200
committerHenrik Nord <[email protected]>2011-05-17 11:57:25 +0200
commit0d2d95e4e3605d31d7032914ed623a91ef01ff25 (patch)
tree5dfe3d43ef6a24df423834fc864e834660c03b71 /lib/mnesia
parent23db3a53e9c77eee4b1b94bd26c13a9c68718189 (diff)
parent8d7963d76fb651f6cb250033a0a5a643abd01389 (diff)
downloadotp-0d2d95e4e3605d31d7032914ed623a91ef01ff25.tar.gz
otp-0d2d95e4e3605d31d7032914ed623a91ef01ff25.tar.bz2
otp-0d2d95e4e3605d31d7032914ed623a91ef01ff25.zip
Merge branch 'uw/mnesia-majority' into dev
* uw/mnesia-majority: dialyzer warning on mnesia_tm Add documentation text about majority checking add mnesia_majority_test suite where_to_wlock optimization + change_table_majority/2 bug in mnesia_tm:needs_majority/2 optimize sticky_lock maj. check check majority for sticky locks Write locks now check majority when needed. Add {majority, boolean()} per-table option. OTP-9304
Diffstat (limited to 'lib/mnesia')
-rw-r--r--lib/mnesia/doc/src/Mnesia_chap7.xmlsrc7
-rw-r--r--lib/mnesia/doc/src/mnesia.xml32
-rw-r--r--lib/mnesia/src/mnesia.erl11
-rw-r--r--lib/mnesia/src/mnesia.hrl1
-rw-r--r--lib/mnesia/src/mnesia_controller.erl26
-rw-r--r--lib/mnesia/src/mnesia_dumper.erl8
-rw-r--r--lib/mnesia/src/mnesia_lib.erl10
-rw-r--r--lib/mnesia/src/mnesia_loader.erl2
-rw-r--r--lib/mnesia/src/mnesia_locker.erl69
-rw-r--r--lib/mnesia/src/mnesia_schema.erl54
-rw-r--r--lib/mnesia/src/mnesia_tm.erl66
-rw-r--r--lib/mnesia/test/Makefile1
-rw-r--r--lib/mnesia/test/mnesia_SUITE.erl99
-rw-r--r--lib/mnesia/test/mnesia_majority_test.erl188
14 files changed, 546 insertions, 28 deletions
diff --git a/lib/mnesia/doc/src/Mnesia_chap7.xmlsrc b/lib/mnesia/doc/src/Mnesia_chap7.xmlsrc
index 7078499fbf..21174340d1 100644
--- a/lib/mnesia/doc/src/Mnesia_chap7.xmlsrc
+++ b/lib/mnesia/doc/src/Mnesia_chap7.xmlsrc
@@ -473,6 +473,13 @@ dets:close(N). </pre>
<c>mnesia:table_info(Tab, master_nodes)</c> may be used to
obtain information about the potential master nodes.
</p>
+ <p>Determining which data to keep after communication failure is outside
+ the scope of Mnesia. One approach would be to determine which "island"
+ contains a majority of the nodes. Using the <c>{majority,true}</c> option
+ for critical tables can be a way of ensuring that nodes that are not part
+ of a "majority island" are not able to update those tables. Note that this
+ constitutes a reduction in service on the minority nodes. This would be
+ a tradeoff in favour of higher consistency guarantees.</p>
<p>The function <c>mnesia:force_load_table(Tab)</c> may be used to
force load the table regardless of which table load mechanism
is activated.
diff --git a/lib/mnesia/doc/src/mnesia.xml b/lib/mnesia/doc/src/mnesia.xml
index 16e78ea0af..2a2c7d3a9f 100644
--- a/lib/mnesia/doc/src/mnesia.xml
+++ b/lib/mnesia/doc/src/mnesia.xml
@@ -161,6 +161,14 @@ If a new item is inserted with the same key as
</p>
</item>
<item>
+ <p><c>majority</c> This attribute can be either <c>true</c> or
+ <c>false</c> (default is <c>false</c>). When <c>true</c>, a majority
+ of the table replicas must be available for an update to succeed.
+ Majority checking can be enabled on tables with mission-critical data,
+ where it is vital to avoid inconsistencies due to network splits.
+ </p>
+ </item>
+ <item>
<p><c>snmp</c> Each (set based) Mnesia table can be
automatically turned into an SNMP ordered table as well.
This property specifies the types of the SNMP keys.
@@ -650,6 +658,17 @@ mnesia:change_table_copy_type(person, node(), disc_copies)
</desc>
</func>
<func>
+ <name>change_table_majority(Tab, Majority) -> {aborted, R} | {atomic, ok}</name>
+ <fsummary>Change the majority check setting for the table.</fsummary>
+ <desc>
+ <p><c>Majority</c> must be a boolean; the default is <c>false</c>.
+ When <c>true</c>, a majority of the table's replicas must be available
+ for an update to succeed. When used on fragmented tables, <c>Tab</c>
+ must be the name base table. Directly changing the majority setting on
+ individual fragments is not allowed.</p>
+ </desc>
+ </func>
+ <func>
<name>clear_table(Tab) -> {aborted, R} | {atomic, ok}</name>
<fsummary>Deletes all entries in a table.</fsummary>
<desc>
@@ -753,6 +772,14 @@ mnesia:change_table_copy_type(person, node(), disc_copies)
priority will be loaded first at startup.
</p>
</item>
+ <item>
+ <p><c>{majority, Flag}</c>, where <c>Flag</c> must be a boolean.
+ If <c>true</c>, any (non-dirty) update to the table will abort unless
+ a majority of the table's replicas are available for the commit.
+ When used on a fragmented table, all fragments will be given
+ the same majority setting.
+ </p>
+ </item>
<item>
<p><c>{ram_copies, Nodelist}</c>, where
<c>Nodelist</c> is a list of the nodes where this table
@@ -1737,7 +1764,10 @@ mnesia:create_table(person,
<c>write</c> and <c>sticky_write</c> are supported.
</p>
<p>If the user wants to update the record it is more efficient to
- use <c>write/sticky_write</c> as the LockKind.
+ use <c>write/sticky_write</c> as the LockKind. If majority checking
+ is active on the table, it will be checked as soon as a write lock is
+ attempted. This can be used to quickly abort if the majority condition
+ isn't met.
</p>
</desc>
</func>
diff --git a/lib/mnesia/src/mnesia.erl b/lib/mnesia/src/mnesia.erl
index 025b32f506..980a9c6213 100644
--- a/lib/mnesia/src/mnesia.erl
+++ b/lib/mnesia/src/mnesia.erl
@@ -39,6 +39,7 @@
%% Access within an activity - Lock acquisition
lock/2, lock/4,
+ lock_table/2,
read_lock_table/1,
write_lock_table/1,
@@ -92,7 +93,7 @@
add_table_copy/3, del_table_copy/2, move_table_copy/3,
add_table_index/2, del_table_index/2,
transform_table/3, transform_table/4,
- change_table_copy_type/3,
+ change_table_copy_type/3, change_table_majority/2,
read_table_property/2, write_table_property/2, delete_table_property/2,
change_table_frag/2,
clear_table/1, clear_table/4,
@@ -415,6 +416,9 @@ lock(LockItem, LockKind) ->
abort(no_transaction)
end.
+lock_table(Tab, LockKind) ->
+ lock({table, Tab}, LockKind).
+
lock(Tid, Ts, LockItem, LockKind) ->
case element(1, Tid) of
tid ->
@@ -467,6 +471,8 @@ lock_table(Tid, Ts, Tab, LockKind) when is_atom(Tab) ->
mnesia_locker:rlock_table(Tid, Store, Tab);
write ->
mnesia_locker:wlock_table(Tid, Store, Tab);
+ load ->
+ mnesia_locker:load_lock_table(Tid, Store, Tab);
sticky_write ->
mnesia_locker:sticky_wlock_table(Tid, Store, Tab);
none ->
@@ -2455,6 +2461,9 @@ change_table_access_mode(T, Access) ->
change_table_load_order(T, O) ->
mnesia_schema:change_table_load_order(T, O).
+change_table_majority(T, M) ->
+ mnesia_schema:change_table_majority(T, M).
+
set_master_nodes(Nodes) when is_list(Nodes) ->
UseDir = system_info(use_dir),
IsRunning = system_info(is_running),
diff --git a/lib/mnesia/src/mnesia.hrl b/lib/mnesia/src/mnesia.hrl
index d488d9364a..26537815a3 100644
--- a/lib/mnesia/src/mnesia.hrl
+++ b/lib/mnesia/src/mnesia.hrl
@@ -62,6 +62,7 @@
disc_only_copies = [], % [Node]
load_order = 0, % Integer
access_mode = read_write, % read_write | read_only
+ majority = false, % true | false
index = [], % [Integer]
snmp = [], % Snmp Ustruct
local_content = false, % true | false
diff --git a/lib/mnesia/src/mnesia_controller.erl b/lib/mnesia/src/mnesia_controller.erl
index 0254769758..d4b2c7b5cc 100644
--- a/lib/mnesia/src/mnesia_controller.erl
+++ b/lib/mnesia/src/mnesia_controller.erl
@@ -72,6 +72,7 @@
add_active_replica/4,
update/1,
change_table_access_mode/1,
+ change_table_majority/1,
del_active_replica/2,
wait_for_tables/2,
get_network_copy/2,
@@ -690,7 +691,8 @@ handle_call({update_where_to_write, [add, Tab, AddNode], _From}, _Dummy, State)
case lists:member(AddNode, Current) and
(State#state.schema_is_merged == true) of
true ->
- mnesia_lib:add_lsort({Tab, where_to_write}, AddNode);
+ mnesia_lib:add_lsort({Tab, where_to_write}, AddNode),
+ update_where_to_wlock(Tab);
false ->
ignore
end,
@@ -1690,6 +1692,8 @@ add_active_replica(Tab, Node, Storage, AccessMode) ->
set(Var, mark_blocked_tab(Blocked, Del)),
mnesia_lib:del({Tab, where_to_write}, Node)
end,
+
+ update_where_to_wlock(Tab),
add({Tab, active_replicas}, Node).
del_active_replica(Tab, Node) ->
@@ -1699,7 +1703,8 @@ del_active_replica(Tab, Node) ->
New = lists:sort(Del),
set(Var, mark_blocked_tab(Blocked, New)), % where_to_commit
mnesia_lib:del({Tab, active_replicas}, Node),
- mnesia_lib:del({Tab, where_to_write}, Node).
+ mnesia_lib:del({Tab, where_to_write}, Node),
+ update_where_to_wlock(Tab).
change_table_access_mode(Cs) ->
W = fun() ->
@@ -1708,7 +1713,22 @@ change_table_access_mode(Cs) ->
val({Tab, active_replicas}))
end,
update(W).
-
+
+change_table_majority(Cs) ->
+ W = fun() ->
+ Tab = Cs#cstruct.name,
+ set({Tab, majority}, Cs#cstruct.majority),
+ update_where_to_wlock(Tab)
+ end,
+ update(W).
+
+update_where_to_wlock(Tab) ->
+ WNodes = val({Tab, where_to_write}),
+ Majority = case catch val({Tab, majority}) of
+ true -> true;
+ _ -> false
+ end,
+ set({Tab, where_to_wlock}, {WNodes, Majority}).
%% node To now has tab loaded, but this must be undone
%% This code is rpc:call'ed from the tab_copier process
diff --git a/lib/mnesia/src/mnesia_dumper.erl b/lib/mnesia/src/mnesia_dumper.erl
index 55b9946ae9..92fd9dfade 100644
--- a/lib/mnesia/src/mnesia_dumper.erl
+++ b/lib/mnesia/src/mnesia_dumper.erl
@@ -900,6 +900,14 @@ insert_op(Tid, _, {op, change_table_access_mode,TabDef, _OldAccess, _Access}, In
end,
insert_cstruct(Tid, Cs, true, InPlace, InitBy);
+insert_op(Tid, _, {op, change_table_majority,TabDef, _OldAccess, _Access}, InPlace, InitBy) ->
+ Cs = mnesia_schema:list2cs(TabDef),
+ case InitBy of
+ startup -> ignore;
+ _ -> mnesia_controller:change_table_majority(Cs)
+ end,
+ insert_cstruct(Tid, Cs, true, InPlace, InitBy);
+
insert_op(Tid, _, {op, change_table_load_order, TabDef, _OldLevel, _Level}, InPlace, InitBy) ->
Cs = mnesia_schema:list2cs(TabDef),
insert_cstruct(Tid, Cs, true, InPlace, InitBy);
diff --git a/lib/mnesia/src/mnesia_lib.erl b/lib/mnesia/src/mnesia_lib.erl
index 36bcfe8de9..7e926a6258 100644
--- a/lib/mnesia/src/mnesia_lib.erl
+++ b/lib/mnesia/src/mnesia_lib.erl
@@ -96,6 +96,8 @@
exists/1,
fatal/2,
get_node_number/0,
+ have_majority/2,
+ have_majority/3,
fix_error/1,
important/2,
incr_counter/1,
@@ -660,6 +662,14 @@ proc_info(_) -> false.
get_node_number() ->
{node(), self()}.
+have_majority(Tab, HaveNodes) ->
+ have_majority(Tab, val({Tab, all_nodes}), HaveNodes).
+
+have_majority(_Tab, AllNodes, HaveNodes) ->
+ Missing = AllNodes -- HaveNodes,
+ Present = AllNodes -- Missing,
+ length(Present) > length(Missing).
+
read_log_files() ->
[{F, catch file:read_file(F)} || F <- mnesia_log:log_files()].
diff --git a/lib/mnesia/src/mnesia_loader.erl b/lib/mnesia/src/mnesia_loader.erl
index 3de329503e..e785b795d1 100644
--- a/lib/mnesia/src/mnesia_loader.erl
+++ b/lib/mnesia/src/mnesia_loader.erl
@@ -702,7 +702,7 @@ send_table(Pid, Tab, RemoteS) ->
prepare_copy(Pid, Tab, Storage) ->
Trans =
fun() ->
- mnesia:write_lock_table(Tab),
+ mnesia:lock_table(Tab, load),
mnesia_subscr:subscribe(Pid, {table, Tab}),
update_where_to_write(Tab, node(Pid)),
mnesia_lib:db_fixtable(Storage, Tab, true),
diff --git a/lib/mnesia/src/mnesia_locker.erl b/lib/mnesia/src/mnesia_locker.erl
index ca0cc79c45..0492d794f3 100644
--- a/lib/mnesia/src/mnesia_locker.erl
+++ b/lib/mnesia/src/mnesia_locker.erl
@@ -40,7 +40,8 @@
sticky_wlock_table/3,
wlock/3,
wlock_no_exist/4,
- wlock_table/3
+ wlock_table/3,
+ load_lock_table/3
]).
%% sys callback functions
@@ -656,16 +657,17 @@ rwlock(Tid, Store, Oid) ->
Lock = write,
case need_lock(Store, Tab, Key, Lock) of
yes ->
- Ns = w_nodes(Tab),
+ {Ns, Majority} = w_nodes(Tab),
+ check_majority(Majority, Tab, Ns),
Res = get_rwlocks_on_nodes(Ns, rwlock, Node, Store, Tid, Oid),
?ets_insert(Store, {{locks, Tab, Key}, Lock}),
Res;
no ->
if
Key == ?ALL ->
- w_nodes(Tab);
+ element(2, w_nodes(Tab));
Tab == ?GLOBAL ->
- w_nodes(Tab);
+ element(2, w_nodes(Tab));
true ->
dirty_rpc(Node, Tab, Key, Lock)
end
@@ -677,12 +679,34 @@ rwlock(Tid, Store, Oid) ->
%% in the local store under the key == nodes
w_nodes(Tab) ->
- Nodes = ?catch_val({Tab, where_to_write}),
- case Nodes of
- [_ | _] -> Nodes;
+ case ?catch_val({Tab, where_to_wlock}) of
+ {[_ | _], _} = Where -> Where;
_ -> mnesia:abort({no_exists, Tab})
end.
+%% If the table has the 'majority' flag set, we can
+%% only take a write lock if we see a majority of the
+%% nodes.
+
+
+check_majority(true, Tab, HaveNs) ->
+ check_majority(Tab, HaveNs);
+check_majority(false, _, _) ->
+ ok.
+
+check_majority(Tab, HaveNs) ->
+ case ?catch_val({Tab, majority}) of
+ true ->
+ case mnesia_lib:have_majority(Tab, HaveNs) of
+ true ->
+ ok;
+ false ->
+ mnesia:abort({no_majority, Tab})
+ end;
+ _ ->
+ ok
+ end.
+
%% aquire a sticky wlock, a sticky lock is a lock
%% which remains at this node after the termination of the
%% transaction.
@@ -708,12 +732,14 @@ sticky_lock(Tid, Store, {Tab, Key} = Oid, Lock) ->
end.
do_sticky_lock(Tid, Store, {Tab, Key} = Oid, Lock) ->
+ {WNodes, Majority} = w_nodes(Tab),
+ sticky_check_majority(Lock, Tab, Majority, WNodes),
?MODULE ! {self(), {test_set_sticky, Tid, Oid, Lock}},
N = node(),
receive
{?MODULE, N, granted} ->
?ets_insert(Store, {{locks, Tab, Key}, write}),
- [?ets_insert(Store, {nodes, Node}) || Node <- w_nodes(Tab)],
+ [?ets_insert(Store, {nodes, Node}) || Node <- WNodes],
granted;
{?MODULE, N, {granted, Val}} -> %% for rwlocks
case opt_lookup_in_client(Val, Oid, write) of
@@ -721,7 +747,7 @@ do_sticky_lock(Tid, Store, {Tab, Key} = Oid, Lock) ->
exit({aborted, C});
Val2 ->
?ets_insert(Store, {{locks, Tab, Key}, write}),
- [?ets_insert(Store, {nodes, Node}) || Node <- w_nodes(Tab)],
+ [?ets_insert(Store, {nodes, Node}) || Node <- WNodes],
Val2
end;
{?MODULE, N, {not_granted, Reason}} ->
@@ -737,6 +763,16 @@ do_sticky_lock(Tid, Store, {Tab, Key} = Oid, Lock) ->
dirty_sticky_lock(Tab, Key, [N], Lock)
end.
+sticky_check_majority(W, Tab, true, WNodes) when W==write; W==read_write ->
+ case mnesia_lib:have_majority(Tab, WNodes) of
+ true ->
+ ok;
+ false ->
+ mnesia:abort({no_majority, Tab})
+ end;
+sticky_check_majority(_, _, _, _) ->
+ ok.
+
not_stuck(Tid, Store, Tab, _Key, Oid, _Lock, N) ->
rlock(Tid, Store, {Tab, ?ALL}), %% needed?
wlock(Tid, Store, Oid), %% perfect sync
@@ -773,22 +809,33 @@ sticky_wlock_table(Tid, Store, Tab) ->
%% local store when we have aquired the lock.
%%
wlock(Tid, Store, Oid) ->
+ wlock(Tid, Store, Oid, _CheckMajority = true).
+
+wlock(Tid, Store, Oid, CheckMajority) ->
{Tab, Key} = Oid,
case need_lock(Store, Tab, Key, write) of
yes ->
- Ns = w_nodes(Tab),
+ {Ns, Majority} = w_nodes(Tab),
+ if CheckMajority ->
+ check_majority(Majority, Tab, Ns);
+ true ->
+ ignore
+ end,
Op = {self(), {write, Tid, Oid}},
?ets_insert(Store, {{locks, Tab, Key}, write}),
get_wlocks_on_nodes(Ns, Ns, Store, Op, Oid);
no when Key /= ?ALL, Tab /= ?GLOBAL ->
[];
no ->
- w_nodes(Tab)
+ element(2, w_nodes(Tab))
end.
wlock_table(Tid, Store, Tab) ->
wlock(Tid, Store, {Tab, ?ALL}).
+load_lock_table(Tid, Store, Tab) ->
+ wlock(Tid, Store, {Tab, ?ALL}, _CheckMajority = false).
+
%% Write lock even if the table does not exist
wlock_no_exist(Tid, Store, Tab, Ns) ->
diff --git a/lib/mnesia/src/mnesia_schema.erl b/lib/mnesia/src/mnesia_schema.erl
index f33a6c7a84..fef72ad39c 100644
--- a/lib/mnesia/src/mnesia_schema.erl
+++ b/lib/mnesia/src/mnesia_schema.erl
@@ -37,6 +37,7 @@
change_table_copy_type/3,
change_table_access_mode/2,
change_table_load_order/2,
+ change_table_majority/2,
change_table_frag/2,
clear_table/1,
create_table/1,
@@ -178,6 +179,8 @@ do_set_schema(Tab, Cs) ->
set({Tab, disc_only_copies}, Cs#cstruct.disc_only_copies),
set({Tab, load_order}, Cs#cstruct.load_order),
set({Tab, access_mode}, Cs#cstruct.access_mode),
+ set({Tab, majority}, Cs#cstruct.majority),
+ set({Tab, all_nodes}, mnesia_lib:cs_to_nodes(Cs)),
set({Tab, snmp}, Cs#cstruct.snmp),
set({Tab, user_properties}, Cs#cstruct.user_properties),
[set({Tab, user_property, element(1, P)}, P) || P <- Cs#cstruct.user_properties],
@@ -651,6 +654,7 @@ list2cs(List) when is_list(List) ->
Snmp = pick(Name, snmp, List, []),
LoadOrder = pick(Name, load_order, List, 0),
AccessMode = pick(Name, access_mode, List, read_write),
+ Majority = pick(Name, majority, List, false),
UserProps = pick(Name, user_properties, List, []),
verify({alt, [nil, list]}, mnesia_lib:etype(UserProps),
{bad_type, Name, {user_properties, UserProps}}),
@@ -676,6 +680,7 @@ list2cs(List) when is_list(List) ->
snmp = Snmp,
load_order = LoadOrder,
access_mode = AccessMode,
+ majority = Majority,
local_content = LC,
record_name = RecName,
attributes = Attrs,
@@ -809,7 +814,16 @@ verify_cstruct(Cs) when is_record(Cs, cstruct) ->
Access = Cs#cstruct.access_mode,
verify({alt, [read_write, read_only]}, Access,
{bad_type, Tab, {access_mode, Access}}),
-
+ Majority = Cs#cstruct.majority,
+ verify({alt, [true, false]}, Majority,
+ {bad_type, Tab, {majority, Majority}}),
+ case Majority of
+ true ->
+ verify(false, LC,
+ {combine_error, Tab, [{local_content,true},{majority,true}]});
+ false ->
+ ok
+ end,
Snmp = Cs#cstruct.snmp,
verify(true, mnesia_snmp_hook:check_ustruct(Snmp),
{badarg, Tab, {snmp, Snmp}}),
@@ -1495,6 +1509,43 @@ make_change_table_load_order(Tab, LoadOrder) ->
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+change_table_majority(Tab, Majority) when is_boolean(Majority) ->
+ schema_transaction(fun() -> do_change_table_majority(Tab, Majority) end).
+
+do_change_table_majority(schema, _Majority) ->
+ mnesia:abort({bad_type, schema});
+do_change_table_majority(Tab, Majority) ->
+ TidTs = get_tid_ts_and_lock(schema, write),
+ get_tid_ts_and_lock(Tab, none),
+ insert_schema_ops(TidTs, make_change_table_majority(Tab, Majority)).
+
+make_change_table_majority(Tab, Majority) ->
+ ensure_writable(schema),
+ Cs = incr_version(val({Tab, cstruct})),
+ ensure_active(Cs),
+ OldMajority = Cs#cstruct.majority,
+ Cs2 = Cs#cstruct{majority = Majority},
+ FragOps = case lists:keyfind(base_table, 1, Cs#cstruct.frag_properties) of
+ {_, Tab} ->
+ FragNames = mnesia_frag:frag_names(Tab) -- [Tab],
+ lists:map(
+ fun(T) ->
+ get_tid_ts_and_lock(Tab, none),
+ CsT = incr_version(val({T, cstruct})),
+ ensure_active(CsT),
+ CsT2 = CsT#cstruct{majority = Majority},
+ verify_cstruct(CsT2),
+ {op, change_table_majority, cs2list(CsT2),
+ OldMajority, Majority}
+ end, FragNames);
+ false -> [];
+ {_, _} -> mnesia:abort({bad_type, Tab})
+ end,
+ verify_cstruct(Cs2),
+ [{op, change_table_majority, cs2list(Cs2), OldMajority, Majority} | FragOps].
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
write_table_property(Tab, Prop) when is_tuple(Prop), size(Prop) >= 1 ->
schema_transaction(fun() -> do_write_table_property(Tab, Prop) end);
write_table_property(Tab, Prop) ->
@@ -2974,6 +3025,7 @@ merge_versions(AnythingNew, Cs, RemoteCs, Force) ->
Cs#cstruct.index == RemoteCs#cstruct.index,
Cs#cstruct.snmp == RemoteCs#cstruct.snmp,
Cs#cstruct.access_mode == RemoteCs#cstruct.access_mode,
+ Cs#cstruct.majority == RemoteCs#cstruct.majority,
Cs#cstruct.load_order == RemoteCs#cstruct.load_order,
Cs#cstruct.user_properties == RemoteCs#cstruct.user_properties ->
do_merge_versions(AnythingNew, Cs, RemoteCs);
diff --git a/lib/mnesia/src/mnesia_tm.erl b/lib/mnesia/src/mnesia_tm.erl
index bb8e788b40..f62f7cb7c8 100644
--- a/lib/mnesia/src/mnesia_tm.erl
+++ b/lib/mnesia/src/mnesia_tm.erl
@@ -64,7 +64,8 @@
prev_tab = [], % initiate to a non valid table name
prev_types,
prev_snmp,
- types
+ types,
+ majority = []
}).
-record(participant, {tid, pid, commit, disc_nodes = [],
@@ -1100,9 +1101,12 @@ t_commit(Type) ->
case arrange(Tid, Store, Type) of
{N, Prep} when N > 0 ->
multi_commit(Prep#prep.protocol,
+ majority_attr(Prep),
Tid, Prep#prep.records, Store);
{0, Prep} ->
- multi_commit(read_only, Tid, Prep#prep.records, Store)
+ multi_commit(read_only,
+ majority_attr(Prep),
+ Tid, Prep#prep.records, Store)
end;
true ->
%% nested commit
@@ -1117,6 +1121,10 @@ t_commit(Type) ->
do_commit_nested
end.
+majority_attr(#prep{majority = M}) ->
+ M.
+
+
%% This function arranges for all objects we shall write in S to be
%% in a list of {Node, CommitRecord}
%% Important function for the performance of mnesia.
@@ -1222,11 +1230,13 @@ prepare_items(Tid, Tab, Key, Items, Prep) ->
{blocked, _} ->
unblocked = req({unblock_me, Tab}),
prepare_items(Tid, Tab, Key, Items, Prep);
- _ ->
+ _ ->
+ Majority = needs_majority(Tab, Prep),
Snmp = val({Tab, snmp}),
Recs2 = do_prepare_items(Tid, Tab, Key, Types,
Snmp, Items, Prep#prep.records),
Prep2 = Prep#prep{records = Recs2, prev_tab = Tab,
+ majority = Majority,
prev_types = Types, prev_snmp = Snmp},
check_prep(Prep2, Types)
end.
@@ -1235,6 +1245,33 @@ do_prepare_items(Tid, Tab, Key, Types, Snmp, Items, Recs) ->
Recs2 = prepare_snmp(Tid, Tab, Key, Types, Snmp, Items, Recs), % May exit
prepare_nodes(Tid, Types, Items, Recs2, normal).
+
+needs_majority(Tab, #prep{majority = M}) ->
+ case lists:keymember(Tab, 1, M) of
+ true ->
+ M;
+ false ->
+ case ?catch_val({Tab, majority}) of
+ {'EXIT', _} ->
+ M;
+ false ->
+ M;
+ true ->
+ CopyHolders = val({Tab, all_nodes}),
+ [{Tab, CopyHolders} | M]
+ end
+ end.
+
+have_majority([], _) ->
+ ok;
+have_majority([{Tab, AllNodes} | Rest], Nodes) ->
+ case mnesia_lib:have_majority(Tab, AllNodes, Nodes) of
+ true ->
+ have_majority(Rest, Nodes);
+ false ->
+ {error, Tab}
+ end.
+
prepare_snmp(Tab, Key, Items) ->
case val({Tab, snmp}) of
[] ->
@@ -1261,10 +1298,15 @@ prepare_snmp(Tid, Tab, Key, Types, Us, Items, Recs) ->
prepare_nodes(Tid, Types, [{clear_table, Tab}], Recs, snmp)
end.
-check_prep(Prep, Types) when Prep#prep.types == Types ->
+check_prep(#prep{majority = [], types = Types} = Prep, Types) ->
Prep;
-check_prep(Prep, Types) when Prep#prep.types == undefined ->
- Prep#prep{types = Types};
+check_prep(#prep{majority = M, types = undefined} = Prep, Types) ->
+ Protocol = if M == [] ->
+ Prep#prep.protocol;
+ true ->
+ asym_trans
+ end,
+ Prep#prep{protocol = Protocol, types = Types};
check_prep(Prep, _Types) ->
Prep#prep{protocol = asym_trans}.
@@ -1311,7 +1353,7 @@ prepare_node(_Node, _Storage, [], Rec, _Kind) ->
%% multi_commit((Protocol, Tid, CommitRecords, Store)
%% Local work is always performed in users process
-multi_commit(read_only, Tid, CR, _Store) ->
+multi_commit(read_only, _Maj = [], Tid, CR, _Store) ->
%% This featherweight commit protocol is used when no
%% updates has been performed in the transaction.
@@ -1324,7 +1366,7 @@ multi_commit(read_only, Tid, CR, _Store) ->
?MODULE ! {delete_transaction, Tid},
do_commit;
-multi_commit(sym_trans, Tid, CR, Store) ->
+multi_commit(sym_trans, _Maj = [], Tid, CR, Store) ->
%% This lightweight commit protocol is used when all
%% the involved tables are replicated symetrically.
%% Their storage types must match on each node.
@@ -1376,7 +1418,7 @@ multi_commit(sym_trans, Tid, CR, Store) ->
[{tid, Tid}, {outcome, Outcome}]),
Outcome;
-multi_commit(sync_sym_trans, Tid, CR, Store) ->
+multi_commit(sync_sym_trans, _Maj = [], Tid, CR, Store) ->
%% This protocol is the same as sym_trans except that it
%% uses syncronized calls to disk_log and syncronized commits
%% when several nodes are involved.
@@ -1408,7 +1450,7 @@ multi_commit(sync_sym_trans, Tid, CR, Store) ->
[{tid, Tid}, {outcome, Outcome}]),
Outcome;
-multi_commit(asym_trans, Tid, CR, Store) ->
+multi_commit(asym_trans, Majority, Tid, CR, Store) ->
%% This more expensive commit protocol is used when
%% table definitions are changed (schema transactions).
%% It is also used when the involved tables are
@@ -1469,6 +1511,10 @@ multi_commit(asym_trans, Tid, CR, Store) ->
{D2, CR2} = commit_decision(D, CR, [], []),
DiscNs = D2#decision.disc_nodes,
RamNs = D2#decision.ram_nodes,
+ case have_majority(Majority, DiscNs ++ RamNs) of
+ ok -> ok;
+ {error, Tab} -> mnesia:abort({no_majority, Tab})
+ end,
Pending = mnesia_checkpoint:tm_enter_pending(Tid, DiscNs, RamNs),
?ets_insert(Store, Pending),
{WaitFor, Local} = ask_commit(asym_trans, Tid, CR2, DiscNs, RamNs),
diff --git a/lib/mnesia/test/Makefile b/lib/mnesia/test/Makefile
index b165924ef2..ae4c9626c7 100644
--- a/lib/mnesia/test/Makefile
+++ b/lib/mnesia/test/Makefile
@@ -42,6 +42,7 @@ MODULES= \
mnesia_dirty_access_test \
mnesia_atomicity_test \
mnesia_consistency_test \
+ mnesia_majority_test \
mnesia_isolation_test \
mnesia_durability_test \
mnesia_recovery_test \
diff --git a/lib/mnesia/test/mnesia_SUITE.erl b/lib/mnesia/test/mnesia_SUITE.erl
index 8ba8427213..dc8f216c1c 100644
--- a/lib/mnesia/test/mnesia_SUITE.erl
+++ b/lib/mnesia/test/mnesia_SUITE.erl
@@ -142,6 +142,105 @@ silly() ->
mnesia_install_test:silly().
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+light(doc) ->
+ ["The 'light' test suite runs a selected set of test suites and is",
+ "intended to be the smallest test suite that is meaningful",
+ "to run. It starts with an installation test (which in essence is the",
+ "'silly' test case) and then it covers all functions in the API in",
+ "various depths. All configuration parameters and examples are also",
+ "covered."];
+light(suite) ->
+ [
+ install,
+ nice,
+ evil,
+ {mnesia_frag_test, light},
+ qlc,
+ registry,
+ config,
+ examples
+ ].
+
+install(suite) ->
+ [{mnesia_install_test, all}].
+
+nice(suite) ->
+ [{mnesia_nice_coverage_test, all}].
+
+evil(suite) ->
+ [{mnesia_evil_coverage_test, all}].
+
+qlc(suite) ->
+ [{mnesia_qlc_test, all}].
+
+registry(suite) ->
+ [{mnesia_registry_test, all}].
+
+config(suite) ->
+ [{mnesia_config_test, all}].
+
+examples(suite) ->
+ [{mnesia_examples_test, all}].
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+medium(doc) ->
+ ["The 'medium' test suite verfies the ACID (atomicity, consistency",
+ "isolation and durability) properties and various recovery scenarios",
+ "These tests may take quite while to run."];
+medium(suite) ->
+ [
+ install,
+ atomicity,
+ isolation,
+ durability,
+ recovery,
+ consistency,
+ majority,
+ {mnesia_frag_test, medium}
+ ].
+
+atomicity(suite) ->
+ [{mnesia_atomicity_test, all}].
+
+isolation(suite) ->
+ [{mnesia_isolation_test, all}].
+
+durability(suite) ->
+ [{mnesia_durability_test, all}].
+
+recovery(suite) ->
+ [{mnesia_recovery_test, all}].
+
+consistency(suite) ->
+ [{mnesia_consistency_test, all}].
+
+majority(suite) ->
+ [{mnesia_majority_test, all}].
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+heavy(doc) ->
+ ["The 'heavy' test suite runs some resource consuming tests and",
+ "benchmarks"];
+heavy(suite) ->
+ [measure].
+
+measure(suite) ->
+ [{mnesia_measure_test, all}].
+
+prediction(suite) ->
+ [{mnesia_measure_test, prediction}].
+
+fairness(suite) ->
+ [{mnesia_measure_test, fairness}].
+
+benchmarks(suite) ->
+ [{mnesia_measure_test, benchmarks}].
+
+consumption(suite) ->
+ [{mnesia_measure_test, consumption}].
+
+scalability(suite) ->
+ [{mnesia_measure_test, scalability}].
clean_up_suite(doc) -> ["Not a test case only kills mnesia and nodes, that where"
"started during the tests"];
diff --git a/lib/mnesia/test/mnesia_majority_test.erl b/lib/mnesia/test/mnesia_majority_test.erl
new file mode 100644
index 0000000000..17d1d8bcdd
--- /dev/null
+++ b/lib/mnesia/test/mnesia_majority_test.erl
@@ -0,0 +1,188 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 1996-2009. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%
+
+%%
+-module(mnesia_majority_test).
+-author('[email protected]').
+-compile(export_all).
+-include("mnesia_test_lib.hrl").
+
+init_per_testcase(Func, Conf) ->
+ mnesia_test_lib:init_per_testcase(Func, Conf).
+
+fin_per_testcase(Func, Conf) ->
+ mnesia_test_lib:fin_per_testcase(Func, Conf).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+all(doc) ->
+ ["Verify that majority checking works"];
+all(suite) ->
+ [
+ write
+ , wread
+ , delete
+ , clear_table
+ , frag
+ , change_majority
+ , frag_change_majority
+ ].
+
+write(suite) -> [];
+write(Config) when is_list(Config) ->
+ [N1, N2, N3] = Nodes = ?acquire_nodes(3, Config),
+ Tab = t,
+ Schema = [{name, Tab}, {ram_copies, [N1,N2,N3]}, {majority,true}],
+ ?match({atomic, ok}, mnesia:create_table(Schema)),
+ ?match({[ok,ok,ok],[]},
+ rpc:multicall([N1,N2,N3], mnesia, wait_for_tables, [[Tab], 3000])),
+ ?match({atomic,ok},
+ mnesia:transaction(fun() -> mnesia:write({t,1,a}) end)),
+ mnesia_test_lib:kill_mnesia([N3]),
+ ?match({atomic,ok},
+ mnesia:transaction(fun() -> mnesia:write({t,1,a}) end)),
+ mnesia_test_lib:kill_mnesia([N2]),
+ ?match({aborted,{no_majority,Tab}},
+ mnesia:transaction(fun() -> mnesia:write({t,1,a}) end)).
+
+wread(suite) -> [];
+wread(Config) when is_list(Config) ->
+ [N1, N2] = Nodes = ?acquire_nodes(2, Config),
+ Tab = t,
+ Schema = [{name, Tab}, {ram_copies, [N1,N2]}, {majority,true}],
+ ?match({atomic, ok}, mnesia:create_table(Schema)),
+ ?match({[ok,ok],[]},
+ rpc:multicall([N1,N2], mnesia, wait_for_tables, [[Tab], 3000])),
+ ?match({atomic,[]},
+ mnesia:transaction(fun() -> mnesia:read(t,1,write) end)),
+ mnesia_test_lib:kill_mnesia([N2]),
+ ?match({aborted,{no_majority,Tab}},
+ mnesia:transaction(fun() -> mnesia:read(t,1,write) end)).
+
+delete(suite) -> [];
+delete(Config) when is_list(Config) ->
+ [N1, N2] = Nodes = ?acquire_nodes(2, Config),
+ Tab = t,
+ Schema = [{name, Tab}, {ram_copies, [N1,N2]}, {majority,true}],
+ ?match({atomic, ok}, mnesia:create_table(Schema)),
+ ?match({[ok,ok],[]},
+ rpc:multicall([N1,N2], mnesia, wait_for_tables, [[Tab], 3000])),
+ %% works as expected with majority of nodes present
+ ?match({atomic,ok},
+ mnesia:transaction(fun() -> mnesia:write({t,1,a}) end)),
+ ?match({atomic,ok},
+ mnesia:transaction(fun() -> mnesia:delete({t,1}) end)),
+ ?match({atomic,[]},
+ mnesia:transaction(fun() -> mnesia:read({t,1}) end)),
+ %% put the record back
+ ?match({atomic,ok},
+ mnesia:transaction(fun() -> mnesia:write({t,1,a}) end)),
+ ?match({atomic,[{t,1,a}]},
+ mnesia:transaction(fun() -> mnesia:read({t,1}) end)),
+ mnesia_test_lib:kill_mnesia([N2]),
+ ?match({aborted,{no_majority,Tab}},
+ mnesia:transaction(fun() -> mnesia:delete({t,1}) end)).
+
+clear_table(suite) -> [];
+clear_table(Config) when is_list(Config) ->
+ [N1, N2] = Nodes = ?acquire_nodes(2, Config),
+ Tab = t,
+ Schema = [{name, Tab}, {ram_copies, [N1,N2]}, {majority,true}],
+ ?match({atomic, ok}, mnesia:create_table(Schema)),
+ ?match({[ok,ok],[]},
+ rpc:multicall([N1,N2], mnesia, wait_for_tables, [[Tab], 3000])),
+ %% works as expected with majority of nodes present
+ ?match({atomic,ok},
+ mnesia:transaction(fun() -> mnesia:write({t,1,a}) end)),
+ ?match({atomic,ok}, mnesia:clear_table(t)),
+ ?match({atomic,[]},
+ mnesia:transaction(fun() -> mnesia:read({t,1}) end)),
+ %% put the record back
+ ?match({atomic,ok},
+ mnesia:transaction(fun() -> mnesia:write({t,1,a}) end)),
+ ?match({atomic,[{t,1,a}]},
+ mnesia:transaction(fun() -> mnesia:read({t,1}) end)),
+ mnesia_test_lib:kill_mnesia([N2]),
+ ?match({aborted,{no_majority,Tab}}, mnesia:clear_table(t)).
+
+frag(suite) -> [];
+frag(Config) when is_list(Config) ->
+ [N1] = Nodes = ?acquire_nodes(1, Config),
+ Tab = t,
+ Schema = [
+ {name, Tab}, {ram_copies, [N1]},
+ {majority,true},
+ {frag_properties, [{n_fragments, 2}]}
+ ],
+ ?match({atomic, ok}, mnesia:create_table(Schema)),
+ ?match(true, mnesia:table_info(t, majority)),
+ ?match(true, mnesia:table_info(t_frag2, majority)).
+
+change_majority(suite) -> [];
+change_majority(Config) when is_list(Config) ->
+ [N1,N2] = Nodes = ?acquire_nodes(2, Config),
+ Tab = t,
+ Schema = [
+ {name, Tab}, {ram_copies, [N1,N2]},
+ {majority,false}
+ ],
+ ?match({atomic, ok}, mnesia:create_table(Schema)),
+ ?match(false, mnesia:table_info(t, majority)),
+ ?match({atomic, ok},
+ mnesia:change_table_majority(t, true)),
+ ?match(true, mnesia:table_info(t, majority)),
+ ?match(ok,
+ mnesia:activity(transaction, fun() ->
+ mnesia:write({t,1,a})
+ end)),
+ mnesia_test_lib:kill_mnesia([N2]),
+ ?match({'EXIT',{aborted,{no_majority,_}}},
+ mnesia:activity(transaction, fun() ->
+ mnesia:write({t,1,a})
+ end)).
+
+frag_change_majority(suite) -> [];
+frag_change_majority(Config) when is_list(Config) ->
+ [N1,N2] = Nodes = ?acquire_nodes(2, Config),
+ Tab = t,
+ Schema = [
+ {name, Tab}, {ram_copies, [N1,N2]},
+ {majority,false},
+ {frag_properties,
+ [{n_fragments, 2},
+ {n_ram_copies, 2},
+ {node_pool, [N1,N2]}]}
+ ],
+ ?match({atomic, ok}, mnesia:create_table(Schema)),
+ ?match(false, mnesia:table_info(t, majority)),
+ ?match(false, mnesia:table_info(t_frag2, majority)),
+ ?match({aborted,{bad_type,t_frag2}},
+ mnesia:change_table_majority(t_frag2, true)),
+ ?match({atomic, ok},
+ mnesia:change_table_majority(t, true)),
+ ?match(true, mnesia:table_info(t, majority)),
+ ?match(true, mnesia:table_info(t_frag2, majority)),
+ ?match(ok,
+ mnesia:activity(transaction, fun() ->
+ mnesia:write({t,1,a})
+ end, mnesia_frag)),
+ mnesia_test_lib:kill_mnesia([N2]),
+ ?match({'EXIT',{aborted,{no_majority,_}}},
+ mnesia:activity(transaction, fun() ->
+ mnesia:write({t,1,a})
+ end, mnesia_frag)).