aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--lib/mnesia/src/mnesia.hrl1
-rw-r--r--lib/mnesia/src/mnesia_schema.erl15
-rw-r--r--lib/mnesia/src/mnesia_tm.erl76
3 files changed, 81 insertions, 11 deletions
diff --git a/lib/mnesia/src/mnesia.hrl b/lib/mnesia/src/mnesia.hrl
index d488d9364a..26537815a3 100644
--- a/lib/mnesia/src/mnesia.hrl
+++ b/lib/mnesia/src/mnesia.hrl
@@ -62,6 +62,7 @@
disc_only_copies = [], % [Node]
load_order = 0, % Integer
access_mode = read_write, % read_write | read_only
+ majority = false, % true | false
index = [], % [Integer]
snmp = [], % Snmp Ustruct
local_content = false, % true | false
diff --git a/lib/mnesia/src/mnesia_schema.erl b/lib/mnesia/src/mnesia_schema.erl
index d1d892a387..a6c8ffec01 100644
--- a/lib/mnesia/src/mnesia_schema.erl
+++ b/lib/mnesia/src/mnesia_schema.erl
@@ -178,6 +178,7 @@ do_set_schema(Tab, Cs) ->
set({Tab, disc_only_copies}, Cs#cstruct.disc_only_copies),
set({Tab, load_order}, Cs#cstruct.load_order),
set({Tab, access_mode}, Cs#cstruct.access_mode),
+ set({Tab, majority}, Cs#cstruct.majority),
set({Tab, snmp}, Cs#cstruct.snmp),
set({Tab, user_properties}, Cs#cstruct.user_properties),
[set({Tab, user_property, element(1, P)}, P) || P <- Cs#cstruct.user_properties],
@@ -651,6 +652,7 @@ list2cs(List) when is_list(List) ->
Snmp = pick(Name, snmp, List, []),
LoadOrder = pick(Name, load_order, List, 0),
AccessMode = pick(Name, access_mode, List, read_write),
+ Majority = pick(Name, majority, List, false),
UserProps = pick(Name, user_properties, List, []),
verify({alt, [nil, list]}, mnesia_lib:etype(UserProps),
{bad_type, Name, {user_properties, UserProps}}),
@@ -676,6 +678,7 @@ list2cs(List) when is_list(List) ->
snmp = Snmp,
load_order = LoadOrder,
access_mode = AccessMode,
+ majority = Majority,
local_content = LC,
record_name = RecName,
attributes = Attrs,
@@ -809,7 +812,16 @@ verify_cstruct(Cs) when is_record(Cs, cstruct) ->
Access = Cs#cstruct.access_mode,
verify({alt, [read_write, read_only]}, Access,
{bad_type, Tab, {access_mode, Access}}),
-
+ Majority = Cs#cstruct.majority,
+ verify({alt, [true, false]}, Majority,
+ {bad_type, Tab, {majority, Majority}}),
+ case Majority of
+ true ->
+ verify(false, LC,
+ {combine_error, Tab, [{local_content,true},{majority,true}]});
+ false ->
+ ok
+ end,
Snmp = Cs#cstruct.snmp,
verify(true, mnesia_snmp_hook:check_ustruct(Snmp),
{badarg, Tab, {snmp, Snmp}}),
@@ -2971,6 +2983,7 @@ merge_versions(AnythingNew, Cs, RemoteCs, Force) ->
Cs#cstruct.index == RemoteCs#cstruct.index,
Cs#cstruct.snmp == RemoteCs#cstruct.snmp,
Cs#cstruct.access_mode == RemoteCs#cstruct.access_mode,
+ Cs#cstruct.majority == RemoteCs#cstruct.majority,
Cs#cstruct.load_order == RemoteCs#cstruct.load_order,
Cs#cstruct.user_properties == RemoteCs#cstruct.user_properties ->
do_merge_versions(AnythingNew, Cs, RemoteCs);
diff --git a/lib/mnesia/src/mnesia_tm.erl b/lib/mnesia/src/mnesia_tm.erl
index bb8e788b40..c7a0c28589 100644
--- a/lib/mnesia/src/mnesia_tm.erl
+++ b/lib/mnesia/src/mnesia_tm.erl
@@ -64,7 +64,8 @@
prev_tab = [], % initiate to a non valid table name
prev_types,
prev_snmp,
- types
+ types,
+ majority = []
}).
-record(participant, {tid, pid, commit, disc_nodes = [],
@@ -1100,9 +1101,12 @@ t_commit(Type) ->
case arrange(Tid, Store, Type) of
{N, Prep} when N > 0 ->
multi_commit(Prep#prep.protocol,
+ majority_attr(Prep),
Tid, Prep#prep.records, Store);
{0, Prep} ->
- multi_commit(read_only, Tid, Prep#prep.records, Store)
+ multi_commit(read_only,
+ majority_attr(Prep),
+ Tid, Prep#prep.records, Store)
end;
true ->
%% nested commit
@@ -1117,6 +1121,12 @@ t_commit(Type) ->
do_commit_nested
end.
+majority_attr(#prep{majority = M}) ->
+ M;
+majority_attr(_) ->
+ [].
+
+
%% This function arranges for all objects we shall write in S to be
%% in a list of {Node, CommitRecord}
%% Important function for the performance of mnesia.
@@ -1222,11 +1232,13 @@ prepare_items(Tid, Tab, Key, Items, Prep) ->
{blocked, _} ->
unblocked = req({unblock_me, Tab}),
prepare_items(Tid, Tab, Key, Items, Prep);
- _ ->
+ _ ->
+ Majority = needs_majority(Tab, Prep),
Snmp = val({Tab, snmp}),
Recs2 = do_prepare_items(Tid, Tab, Key, Types,
Snmp, Items, Prep#prep.records),
Prep2 = Prep#prep{records = Recs2, prev_tab = Tab,
+ majority = Majority,
prev_types = Types, prev_snmp = Snmp},
check_prep(Prep2, Types)
end.
@@ -1235,6 +1247,41 @@ do_prepare_items(Tid, Tab, Key, Types, Snmp, Items, Recs) ->
Recs2 = prepare_snmp(Tid, Tab, Key, Types, Snmp, Items, Recs), % May exit
prepare_nodes(Tid, Types, Items, Recs2, normal).
+
+needs_majority(Tab, #prep{majority = M}) ->
+ case lists:keymember(Tab, 1, M) of
+ true ->
+ M;
+ false ->
+ case ?catch_val({Tab, majority}) of
+ {'EXIT', _} ->
+ M;
+ false ->
+ [{Tab, []} | M];
+ true ->
+ CopyHolders = all_copy_holders(Tab),
+ [{Tab, CopyHolders} | M]
+ end
+ end.
+
+all_copy_holders(Tab) ->
+ DC = val({Tab, disc_copies}),
+ DO = val({Tab, disc_only_copies}),
+ RC = val({Tab, ram_copies}),
+ DC ++ DO ++ RC.
+
+have_majority([], _) ->
+ ok;
+have_majority([{Tab, AllNodes} | Rest], Nodes) ->
+ Missing = AllNodes -- Nodes,
+ Present = AllNodes -- Missing,
+ case length(Present) > length(Missing) of
+ true ->
+ have_majority(Rest, Nodes);
+ false ->
+ {error, Tab}
+ end.
+
prepare_snmp(Tab, Key, Items) ->
case val({Tab, snmp}) of
[] ->
@@ -1261,10 +1308,15 @@ prepare_snmp(Tid, Tab, Key, Types, Us, Items, Recs) ->
prepare_nodes(Tid, Types, [{clear_table, Tab}], Recs, snmp)
end.
-check_prep(Prep, Types) when Prep#prep.types == Types ->
+check_prep(#prep{majority = [], types = Types} = Prep, Types) ->
Prep;
-check_prep(Prep, Types) when Prep#prep.types == undefined ->
- Prep#prep{types = Types};
+check_prep(#prep{majority = M, types = undefined} = Prep, Types) ->
+ Protocol = if M == [] ->
+ Prep#prep.protocol;
+ true ->
+ asym_trans
+ end,
+ Prep#prep{protocol = Protocol, types = Types};
check_prep(Prep, _Types) ->
Prep#prep{protocol = asym_trans}.
@@ -1311,7 +1363,7 @@ prepare_node(_Node, _Storage, [], Rec, _Kind) ->
%% multi_commit((Protocol, Tid, CommitRecords, Store)
%% Local work is always performed in users process
-multi_commit(read_only, Tid, CR, _Store) ->
+multi_commit(read_only, _Maj = [], Tid, CR, _Store) ->
%% This featherweight commit protocol is used when no
%% updates has been performed in the transaction.
@@ -1324,7 +1376,7 @@ multi_commit(read_only, Tid, CR, _Store) ->
?MODULE ! {delete_transaction, Tid},
do_commit;
-multi_commit(sym_trans, Tid, CR, Store) ->
+multi_commit(sym_trans, _Maj = [], Tid, CR, Store) ->
%% This lightweight commit protocol is used when all
%% the involved tables are replicated symetrically.
%% Their storage types must match on each node.
@@ -1376,7 +1428,7 @@ multi_commit(sym_trans, Tid, CR, Store) ->
[{tid, Tid}, {outcome, Outcome}]),
Outcome;
-multi_commit(sync_sym_trans, Tid, CR, Store) ->
+multi_commit(sync_sym_trans, _Maj = [], Tid, CR, Store) ->
%% This protocol is the same as sym_trans except that it
%% uses syncronized calls to disk_log and syncronized commits
%% when several nodes are involved.
@@ -1408,7 +1460,7 @@ multi_commit(sync_sym_trans, Tid, CR, Store) ->
[{tid, Tid}, {outcome, Outcome}]),
Outcome;
-multi_commit(asym_trans, Tid, CR, Store) ->
+multi_commit(asym_trans, Majority, Tid, CR, Store) ->
%% This more expensive commit protocol is used when
%% table definitions are changed (schema transactions).
%% It is also used when the involved tables are
@@ -1469,6 +1521,10 @@ multi_commit(asym_trans, Tid, CR, Store) ->
{D2, CR2} = commit_decision(D, CR, [], []),
DiscNs = D2#decision.disc_nodes,
RamNs = D2#decision.ram_nodes,
+ case have_majority(Majority, DiscNs ++ RamNs) of
+ ok -> ok;
+ {error, Tab} -> mnesia:abort({no_majority, Tab})
+ end,
Pending = mnesia_checkpoint:tm_enter_pending(Tid, DiscNs, RamNs),
?ets_insert(Store, Pending),
{WaitFor, Local} = ask_commit(asym_trans, Tid, CR2, DiscNs, RamNs),