diff options
-rw-r--r-- | lib/mnesia/src/mnesia.hrl | 1 | ||||
-rw-r--r-- | lib/mnesia/src/mnesia_schema.erl | 15 | ||||
-rw-r--r-- | lib/mnesia/src/mnesia_tm.erl | 76 |
3 files changed, 81 insertions, 11 deletions
diff --git a/lib/mnesia/src/mnesia.hrl b/lib/mnesia/src/mnesia.hrl index d488d9364a..26537815a3 100644 --- a/lib/mnesia/src/mnesia.hrl +++ b/lib/mnesia/src/mnesia.hrl @@ -62,6 +62,7 @@ disc_only_copies = [], % [Node] load_order = 0, % Integer access_mode = read_write, % read_write | read_only + majority = false, % true | false index = [], % [Integer] snmp = [], % Snmp Ustruct local_content = false, % true | false diff --git a/lib/mnesia/src/mnesia_schema.erl b/lib/mnesia/src/mnesia_schema.erl index d1d892a387..a6c8ffec01 100644 --- a/lib/mnesia/src/mnesia_schema.erl +++ b/lib/mnesia/src/mnesia_schema.erl @@ -178,6 +178,7 @@ do_set_schema(Tab, Cs) -> set({Tab, disc_only_copies}, Cs#cstruct.disc_only_copies), set({Tab, load_order}, Cs#cstruct.load_order), set({Tab, access_mode}, Cs#cstruct.access_mode), + set({Tab, majority}, Cs#cstruct.majority), set({Tab, snmp}, Cs#cstruct.snmp), set({Tab, user_properties}, Cs#cstruct.user_properties), [set({Tab, user_property, element(1, P)}, P) || P <- Cs#cstruct.user_properties], @@ -651,6 +652,7 @@ list2cs(List) when is_list(List) -> Snmp = pick(Name, snmp, List, []), LoadOrder = pick(Name, load_order, List, 0), AccessMode = pick(Name, access_mode, List, read_write), + Majority = pick(Name, majority, List, false), UserProps = pick(Name, user_properties, List, []), verify({alt, [nil, list]}, mnesia_lib:etype(UserProps), {bad_type, Name, {user_properties, UserProps}}), @@ -676,6 +678,7 @@ list2cs(List) when is_list(List) -> snmp = Snmp, load_order = LoadOrder, access_mode = AccessMode, + majority = Majority, local_content = LC, record_name = RecName, attributes = Attrs, @@ -809,7 +812,16 @@ verify_cstruct(Cs) when is_record(Cs, cstruct) -> Access = Cs#cstruct.access_mode, verify({alt, [read_write, read_only]}, Access, {bad_type, Tab, {access_mode, Access}}), - + Majority = Cs#cstruct.majority, + verify({alt, [true, false]}, Majority, + {bad_type, Tab, {majority, Majority}}), + case Majority of + true -> + verify(false, LC, + {combine_error, Tab, [{local_content,true},{majority,true}]}); + false -> + ok + end, Snmp = Cs#cstruct.snmp, verify(true, mnesia_snmp_hook:check_ustruct(Snmp), {badarg, Tab, {snmp, Snmp}}), @@ -2971,6 +2983,7 @@ merge_versions(AnythingNew, Cs, RemoteCs, Force) -> Cs#cstruct.index == RemoteCs#cstruct.index, Cs#cstruct.snmp == RemoteCs#cstruct.snmp, Cs#cstruct.access_mode == RemoteCs#cstruct.access_mode, + Cs#cstruct.majority == RemoteCs#cstruct.majority, Cs#cstruct.load_order == RemoteCs#cstruct.load_order, Cs#cstruct.user_properties == RemoteCs#cstruct.user_properties -> do_merge_versions(AnythingNew, Cs, RemoteCs); diff --git a/lib/mnesia/src/mnesia_tm.erl b/lib/mnesia/src/mnesia_tm.erl index bb8e788b40..c7a0c28589 100644 --- a/lib/mnesia/src/mnesia_tm.erl +++ b/lib/mnesia/src/mnesia_tm.erl @@ -64,7 +64,8 @@ prev_tab = [], % initiate to a non valid table name prev_types, prev_snmp, - types + types, + majority = [] }). -record(participant, {tid, pid, commit, disc_nodes = [], @@ -1100,9 +1101,12 @@ t_commit(Type) -> case arrange(Tid, Store, Type) of {N, Prep} when N > 0 -> multi_commit(Prep#prep.protocol, + majority_attr(Prep), Tid, Prep#prep.records, Store); {0, Prep} -> - multi_commit(read_only, Tid, Prep#prep.records, Store) + multi_commit(read_only, + majority_attr(Prep), + Tid, Prep#prep.records, Store) end; true -> %% nested commit @@ -1117,6 +1121,12 @@ t_commit(Type) -> do_commit_nested end. +majority_attr(#prep{majority = M}) -> + M; +majority_attr(_) -> + []. + + %% This function arranges for all objects we shall write in S to be %% in a list of {Node, CommitRecord} %% Important function for the performance of mnesia. @@ -1222,11 +1232,13 @@ prepare_items(Tid, Tab, Key, Items, Prep) -> {blocked, _} -> unblocked = req({unblock_me, Tab}), prepare_items(Tid, Tab, Key, Items, Prep); - _ -> + _ -> + Majority = needs_majority(Tab, Prep), Snmp = val({Tab, snmp}), Recs2 = do_prepare_items(Tid, Tab, Key, Types, Snmp, Items, Prep#prep.records), Prep2 = Prep#prep{records = Recs2, prev_tab = Tab, + majority = Majority, prev_types = Types, prev_snmp = Snmp}, check_prep(Prep2, Types) end. @@ -1235,6 +1247,41 @@ do_prepare_items(Tid, Tab, Key, Types, Snmp, Items, Recs) -> Recs2 = prepare_snmp(Tid, Tab, Key, Types, Snmp, Items, Recs), % May exit prepare_nodes(Tid, Types, Items, Recs2, normal). + +needs_majority(Tab, #prep{majority = M}) -> + case lists:keymember(Tab, 1, M) of + true -> + M; + false -> + case ?catch_val({Tab, majority}) of + {'EXIT', _} -> + M; + false -> + [{Tab, []} | M]; + true -> + CopyHolders = all_copy_holders(Tab), + [{Tab, CopyHolders} | M] + end + end. + +all_copy_holders(Tab) -> + DC = val({Tab, disc_copies}), + DO = val({Tab, disc_only_copies}), + RC = val({Tab, ram_copies}), + DC ++ DO ++ RC. + +have_majority([], _) -> + ok; +have_majority([{Tab, AllNodes} | Rest], Nodes) -> + Missing = AllNodes -- Nodes, + Present = AllNodes -- Missing, + case length(Present) > length(Missing) of + true -> + have_majority(Rest, Nodes); + false -> + {error, Tab} + end. + prepare_snmp(Tab, Key, Items) -> case val({Tab, snmp}) of [] -> @@ -1261,10 +1308,15 @@ prepare_snmp(Tid, Tab, Key, Types, Us, Items, Recs) -> prepare_nodes(Tid, Types, [{clear_table, Tab}], Recs, snmp) end. -check_prep(Prep, Types) when Prep#prep.types == Types -> +check_prep(#prep{majority = [], types = Types} = Prep, Types) -> Prep; -check_prep(Prep, Types) when Prep#prep.types == undefined -> - Prep#prep{types = Types}; +check_prep(#prep{majority = M, types = undefined} = Prep, Types) -> + Protocol = if M == [] -> + Prep#prep.protocol; + true -> + asym_trans + end, + Prep#prep{protocol = Protocol, types = Types}; check_prep(Prep, _Types) -> Prep#prep{protocol = asym_trans}. @@ -1311,7 +1363,7 @@ prepare_node(_Node, _Storage, [], Rec, _Kind) -> %% multi_commit((Protocol, Tid, CommitRecords, Store) %% Local work is always performed in users process -multi_commit(read_only, Tid, CR, _Store) -> +multi_commit(read_only, _Maj = [], Tid, CR, _Store) -> %% This featherweight commit protocol is used when no %% updates has been performed in the transaction. @@ -1324,7 +1376,7 @@ multi_commit(read_only, Tid, CR, _Store) -> ?MODULE ! {delete_transaction, Tid}, do_commit; -multi_commit(sym_trans, Tid, CR, Store) -> +multi_commit(sym_trans, _Maj = [], Tid, CR, Store) -> %% This lightweight commit protocol is used when all %% the involved tables are replicated symetrically. %% Their storage types must match on each node. @@ -1376,7 +1428,7 @@ multi_commit(sym_trans, Tid, CR, Store) -> [{tid, Tid}, {outcome, Outcome}]), Outcome; -multi_commit(sync_sym_trans, Tid, CR, Store) -> +multi_commit(sync_sym_trans, _Maj = [], Tid, CR, Store) -> %% This protocol is the same as sym_trans except that it %% uses syncronized calls to disk_log and syncronized commits %% when several nodes are involved. @@ -1408,7 +1460,7 @@ multi_commit(sync_sym_trans, Tid, CR, Store) -> [{tid, Tid}, {outcome, Outcome}]), Outcome; -multi_commit(asym_trans, Tid, CR, Store) -> +multi_commit(asym_trans, Majority, Tid, CR, Store) -> %% This more expensive commit protocol is used when %% table definitions are changed (schema transactions). %% It is also used when the involved tables are @@ -1469,6 +1521,10 @@ multi_commit(asym_trans, Tid, CR, Store) -> {D2, CR2} = commit_decision(D, CR, [], []), DiscNs = D2#decision.disc_nodes, RamNs = D2#decision.ram_nodes, + case have_majority(Majority, DiscNs ++ RamNs) of + ok -> ok; + {error, Tab} -> mnesia:abort({no_majority, Tab}) + end, Pending = mnesia_checkpoint:tm_enter_pending(Tid, DiscNs, RamNs), ?ets_insert(Store, Pending), {WaitFor, Local} = ask_commit(asym_trans, Tid, CR2, DiscNs, RamNs), |