diff options
-rw-r--r-- | lib/mnesia/doc/src/Mnesia_chap7.xmlsrc | 7 | ||||
-rw-r--r-- | lib/mnesia/doc/src/mnesia.xml | 32 | ||||
-rw-r--r-- | lib/mnesia/src/mnesia.erl | 11 | ||||
-rw-r--r-- | lib/mnesia/src/mnesia.hrl | 1 | ||||
-rw-r--r-- | lib/mnesia/src/mnesia_controller.erl | 26 | ||||
-rw-r--r-- | lib/mnesia/src/mnesia_dumper.erl | 8 | ||||
-rw-r--r-- | lib/mnesia/src/mnesia_lib.erl | 10 | ||||
-rw-r--r-- | lib/mnesia/src/mnesia_loader.erl | 2 | ||||
-rw-r--r-- | lib/mnesia/src/mnesia_locker.erl | 69 | ||||
-rw-r--r-- | lib/mnesia/src/mnesia_schema.erl | 54 | ||||
-rw-r--r-- | lib/mnesia/src/mnesia_tm.erl | 66 | ||||
-rw-r--r-- | lib/mnesia/test/Makefile | 1 | ||||
-rw-r--r-- | lib/mnesia/test/mnesia_SUITE.erl | 99 | ||||
-rw-r--r-- | lib/mnesia/test/mnesia_majority_test.erl | 188 |
14 files changed, 546 insertions, 28 deletions
diff --git a/lib/mnesia/doc/src/Mnesia_chap7.xmlsrc b/lib/mnesia/doc/src/Mnesia_chap7.xmlsrc index 7078499fbf..21174340d1 100644 --- a/lib/mnesia/doc/src/Mnesia_chap7.xmlsrc +++ b/lib/mnesia/doc/src/Mnesia_chap7.xmlsrc @@ -473,6 +473,13 @@ dets:close(N). </pre> <c>mnesia:table_info(Tab, master_nodes)</c> may be used to obtain information about the potential master nodes. </p> + <p>Determining which data to keep after communication failure is outside + the scope of Mnesia. One approach would be to determine which "island" + contains a majority of the nodes. Using the <c>{majority,true}</c> option + for critical tables can be a way of ensuring that nodes that are not part + of a "majority island" are not able to update those tables. Note that this + constitutes a reduction in service on the minority nodes. This would be + a tradeoff in favour of higher consistency guarantees.</p> <p>The function <c>mnesia:force_load_table(Tab)</c> may be used to force load the table regardless of which table load mechanism is activated. diff --git a/lib/mnesia/doc/src/mnesia.xml b/lib/mnesia/doc/src/mnesia.xml index 16e78ea0af..2a2c7d3a9f 100644 --- a/lib/mnesia/doc/src/mnesia.xml +++ b/lib/mnesia/doc/src/mnesia.xml @@ -161,6 +161,14 @@ If a new item is inserted with the same key as </p> </item> <item> + <p><c>majority</c> This attribute can be either <c>true</c> or + <c>false</c> (default is <c>false</c>). When <c>true</c>, a majority + of the table replicas must be available for an update to succeed. + Majority checking can be enabled on tables with mission-critical data, + where it is vital to avoid inconsistencies due to network splits. + </p> + </item> + <item> <p><c>snmp</c> Each (set based) Mnesia table can be automatically turned into an SNMP ordered table as well. This property specifies the types of the SNMP keys. @@ -650,6 +658,17 @@ mnesia:change_table_copy_type(person, node(), disc_copies) </desc> </func> <func> + <name>change_table_majority(Tab, Majority) -> {aborted, R} | {atomic, ok}</name> + <fsummary>Change the majority check setting for the table.</fsummary> + <desc> + <p><c>Majority</c> must be a boolean; the default is <c>false</c>. + When <c>true</c>, a majority of the table's replicas must be available + for an update to succeed. When used on fragmented tables, <c>Tab</c> + must be the name base table. Directly changing the majority setting on + individual fragments is not allowed.</p> + </desc> + </func> + <func> <name>clear_table(Tab) -> {aborted, R} | {atomic, ok}</name> <fsummary>Deletes all entries in a table.</fsummary> <desc> @@ -753,6 +772,14 @@ mnesia:change_table_copy_type(person, node(), disc_copies) priority will be loaded first at startup. </p> </item> + <item> + <p><c>{majority, Flag}</c>, where <c>Flag</c> must be a boolean. + If <c>true</c>, any (non-dirty) update to the table will abort unless + a majority of the table's replicas are available for the commit. + When used on a fragmented table, all fragments will be given + the same majority setting. + </p> + </item> <item> <p><c>{ram_copies, Nodelist}</c>, where <c>Nodelist</c> is a list of the nodes where this table @@ -1737,7 +1764,10 @@ mnesia:create_table(person, <c>write</c> and <c>sticky_write</c> are supported. </p> <p>If the user wants to update the record it is more efficient to - use <c>write/sticky_write</c> as the LockKind. + use <c>write/sticky_write</c> as the LockKind. If majority checking + is active on the table, it will be checked as soon as a write lock is + attempted. This can be used to quickly abort if the majority condition + isn't met. </p> </desc> </func> diff --git a/lib/mnesia/src/mnesia.erl b/lib/mnesia/src/mnesia.erl index 025b32f506..980a9c6213 100644 --- a/lib/mnesia/src/mnesia.erl +++ b/lib/mnesia/src/mnesia.erl @@ -39,6 +39,7 @@ %% Access within an activity - Lock acquisition lock/2, lock/4, + lock_table/2, read_lock_table/1, write_lock_table/1, @@ -92,7 +93,7 @@ add_table_copy/3, del_table_copy/2, move_table_copy/3, add_table_index/2, del_table_index/2, transform_table/3, transform_table/4, - change_table_copy_type/3, + change_table_copy_type/3, change_table_majority/2, read_table_property/2, write_table_property/2, delete_table_property/2, change_table_frag/2, clear_table/1, clear_table/4, @@ -415,6 +416,9 @@ lock(LockItem, LockKind) -> abort(no_transaction) end. +lock_table(Tab, LockKind) -> + lock({table, Tab}, LockKind). + lock(Tid, Ts, LockItem, LockKind) -> case element(1, Tid) of tid -> @@ -467,6 +471,8 @@ lock_table(Tid, Ts, Tab, LockKind) when is_atom(Tab) -> mnesia_locker:rlock_table(Tid, Store, Tab); write -> mnesia_locker:wlock_table(Tid, Store, Tab); + load -> + mnesia_locker:load_lock_table(Tid, Store, Tab); sticky_write -> mnesia_locker:sticky_wlock_table(Tid, Store, Tab); none -> @@ -2455,6 +2461,9 @@ change_table_access_mode(T, Access) -> change_table_load_order(T, O) -> mnesia_schema:change_table_load_order(T, O). +change_table_majority(T, M) -> + mnesia_schema:change_table_majority(T, M). + set_master_nodes(Nodes) when is_list(Nodes) -> UseDir = system_info(use_dir), IsRunning = system_info(is_running), diff --git a/lib/mnesia/src/mnesia.hrl b/lib/mnesia/src/mnesia.hrl index d488d9364a..26537815a3 100644 --- a/lib/mnesia/src/mnesia.hrl +++ b/lib/mnesia/src/mnesia.hrl @@ -62,6 +62,7 @@ disc_only_copies = [], % [Node] load_order = 0, % Integer access_mode = read_write, % read_write | read_only + majority = false, % true | false index = [], % [Integer] snmp = [], % Snmp Ustruct local_content = false, % true | false diff --git a/lib/mnesia/src/mnesia_controller.erl b/lib/mnesia/src/mnesia_controller.erl index 0254769758..d4b2c7b5cc 100644 --- a/lib/mnesia/src/mnesia_controller.erl +++ b/lib/mnesia/src/mnesia_controller.erl @@ -72,6 +72,7 @@ add_active_replica/4, update/1, change_table_access_mode/1, + change_table_majority/1, del_active_replica/2, wait_for_tables/2, get_network_copy/2, @@ -690,7 +691,8 @@ handle_call({update_where_to_write, [add, Tab, AddNode], _From}, _Dummy, State) case lists:member(AddNode, Current) and (State#state.schema_is_merged == true) of true -> - mnesia_lib:add_lsort({Tab, where_to_write}, AddNode); + mnesia_lib:add_lsort({Tab, where_to_write}, AddNode), + update_where_to_wlock(Tab); false -> ignore end, @@ -1690,6 +1692,8 @@ add_active_replica(Tab, Node, Storage, AccessMode) -> set(Var, mark_blocked_tab(Blocked, Del)), mnesia_lib:del({Tab, where_to_write}, Node) end, + + update_where_to_wlock(Tab), add({Tab, active_replicas}, Node). del_active_replica(Tab, Node) -> @@ -1699,7 +1703,8 @@ del_active_replica(Tab, Node) -> New = lists:sort(Del), set(Var, mark_blocked_tab(Blocked, New)), % where_to_commit mnesia_lib:del({Tab, active_replicas}, Node), - mnesia_lib:del({Tab, where_to_write}, Node). + mnesia_lib:del({Tab, where_to_write}, Node), + update_where_to_wlock(Tab). change_table_access_mode(Cs) -> W = fun() -> @@ -1708,7 +1713,22 @@ change_table_access_mode(Cs) -> val({Tab, active_replicas})) end, update(W). - + +change_table_majority(Cs) -> + W = fun() -> + Tab = Cs#cstruct.name, + set({Tab, majority}, Cs#cstruct.majority), + update_where_to_wlock(Tab) + end, + update(W). + +update_where_to_wlock(Tab) -> + WNodes = val({Tab, where_to_write}), + Majority = case catch val({Tab, majority}) of + true -> true; + _ -> false + end, + set({Tab, where_to_wlock}, {WNodes, Majority}). %% node To now has tab loaded, but this must be undone %% This code is rpc:call'ed from the tab_copier process diff --git a/lib/mnesia/src/mnesia_dumper.erl b/lib/mnesia/src/mnesia_dumper.erl index 55b9946ae9..92fd9dfade 100644 --- a/lib/mnesia/src/mnesia_dumper.erl +++ b/lib/mnesia/src/mnesia_dumper.erl @@ -900,6 +900,14 @@ insert_op(Tid, _, {op, change_table_access_mode,TabDef, _OldAccess, _Access}, In end, insert_cstruct(Tid, Cs, true, InPlace, InitBy); +insert_op(Tid, _, {op, change_table_majority,TabDef, _OldAccess, _Access}, InPlace, InitBy) -> + Cs = mnesia_schema:list2cs(TabDef), + case InitBy of + startup -> ignore; + _ -> mnesia_controller:change_table_majority(Cs) + end, + insert_cstruct(Tid, Cs, true, InPlace, InitBy); + insert_op(Tid, _, {op, change_table_load_order, TabDef, _OldLevel, _Level}, InPlace, InitBy) -> Cs = mnesia_schema:list2cs(TabDef), insert_cstruct(Tid, Cs, true, InPlace, InitBy); diff --git a/lib/mnesia/src/mnesia_lib.erl b/lib/mnesia/src/mnesia_lib.erl index 36bcfe8de9..7e926a6258 100644 --- a/lib/mnesia/src/mnesia_lib.erl +++ b/lib/mnesia/src/mnesia_lib.erl @@ -96,6 +96,8 @@ exists/1, fatal/2, get_node_number/0, + have_majority/2, + have_majority/3, fix_error/1, important/2, incr_counter/1, @@ -660,6 +662,14 @@ proc_info(_) -> false. get_node_number() -> {node(), self()}. +have_majority(Tab, HaveNodes) -> + have_majority(Tab, val({Tab, all_nodes}), HaveNodes). + +have_majority(_Tab, AllNodes, HaveNodes) -> + Missing = AllNodes -- HaveNodes, + Present = AllNodes -- Missing, + length(Present) > length(Missing). + read_log_files() -> [{F, catch file:read_file(F)} || F <- mnesia_log:log_files()]. diff --git a/lib/mnesia/src/mnesia_loader.erl b/lib/mnesia/src/mnesia_loader.erl index 3de329503e..e785b795d1 100644 --- a/lib/mnesia/src/mnesia_loader.erl +++ b/lib/mnesia/src/mnesia_loader.erl @@ -702,7 +702,7 @@ send_table(Pid, Tab, RemoteS) -> prepare_copy(Pid, Tab, Storage) -> Trans = fun() -> - mnesia:write_lock_table(Tab), + mnesia:lock_table(Tab, load), mnesia_subscr:subscribe(Pid, {table, Tab}), update_where_to_write(Tab, node(Pid)), mnesia_lib:db_fixtable(Storage, Tab, true), diff --git a/lib/mnesia/src/mnesia_locker.erl b/lib/mnesia/src/mnesia_locker.erl index ca0cc79c45..0492d794f3 100644 --- a/lib/mnesia/src/mnesia_locker.erl +++ b/lib/mnesia/src/mnesia_locker.erl @@ -40,7 +40,8 @@ sticky_wlock_table/3, wlock/3, wlock_no_exist/4, - wlock_table/3 + wlock_table/3, + load_lock_table/3 ]). %% sys callback functions @@ -656,16 +657,17 @@ rwlock(Tid, Store, Oid) -> Lock = write, case need_lock(Store, Tab, Key, Lock) of yes -> - Ns = w_nodes(Tab), + {Ns, Majority} = w_nodes(Tab), + check_majority(Majority, Tab, Ns), Res = get_rwlocks_on_nodes(Ns, rwlock, Node, Store, Tid, Oid), ?ets_insert(Store, {{locks, Tab, Key}, Lock}), Res; no -> if Key == ?ALL -> - w_nodes(Tab); + element(2, w_nodes(Tab)); Tab == ?GLOBAL -> - w_nodes(Tab); + element(2, w_nodes(Tab)); true -> dirty_rpc(Node, Tab, Key, Lock) end @@ -677,12 +679,34 @@ rwlock(Tid, Store, Oid) -> %% in the local store under the key == nodes w_nodes(Tab) -> - Nodes = ?catch_val({Tab, where_to_write}), - case Nodes of - [_ | _] -> Nodes; + case ?catch_val({Tab, where_to_wlock}) of + {[_ | _], _} = Where -> Where; _ -> mnesia:abort({no_exists, Tab}) end. +%% If the table has the 'majority' flag set, we can +%% only take a write lock if we see a majority of the +%% nodes. + + +check_majority(true, Tab, HaveNs) -> + check_majority(Tab, HaveNs); +check_majority(false, _, _) -> + ok. + +check_majority(Tab, HaveNs) -> + case ?catch_val({Tab, majority}) of + true -> + case mnesia_lib:have_majority(Tab, HaveNs) of + true -> + ok; + false -> + mnesia:abort({no_majority, Tab}) + end; + _ -> + ok + end. + %% aquire a sticky wlock, a sticky lock is a lock %% which remains at this node after the termination of the %% transaction. @@ -708,12 +732,14 @@ sticky_lock(Tid, Store, {Tab, Key} = Oid, Lock) -> end. do_sticky_lock(Tid, Store, {Tab, Key} = Oid, Lock) -> + {WNodes, Majority} = w_nodes(Tab), + sticky_check_majority(Lock, Tab, Majority, WNodes), ?MODULE ! {self(), {test_set_sticky, Tid, Oid, Lock}}, N = node(), receive {?MODULE, N, granted} -> ?ets_insert(Store, {{locks, Tab, Key}, write}), - [?ets_insert(Store, {nodes, Node}) || Node <- w_nodes(Tab)], + [?ets_insert(Store, {nodes, Node}) || Node <- WNodes], granted; {?MODULE, N, {granted, Val}} -> %% for rwlocks case opt_lookup_in_client(Val, Oid, write) of @@ -721,7 +747,7 @@ do_sticky_lock(Tid, Store, {Tab, Key} = Oid, Lock) -> exit({aborted, C}); Val2 -> ?ets_insert(Store, {{locks, Tab, Key}, write}), - [?ets_insert(Store, {nodes, Node}) || Node <- w_nodes(Tab)], + [?ets_insert(Store, {nodes, Node}) || Node <- WNodes], Val2 end; {?MODULE, N, {not_granted, Reason}} -> @@ -737,6 +763,16 @@ do_sticky_lock(Tid, Store, {Tab, Key} = Oid, Lock) -> dirty_sticky_lock(Tab, Key, [N], Lock) end. +sticky_check_majority(W, Tab, true, WNodes) when W==write; W==read_write -> + case mnesia_lib:have_majority(Tab, WNodes) of + true -> + ok; + false -> + mnesia:abort({no_majority, Tab}) + end; +sticky_check_majority(_, _, _, _) -> + ok. + not_stuck(Tid, Store, Tab, _Key, Oid, _Lock, N) -> rlock(Tid, Store, {Tab, ?ALL}), %% needed? wlock(Tid, Store, Oid), %% perfect sync @@ -773,22 +809,33 @@ sticky_wlock_table(Tid, Store, Tab) -> %% local store when we have aquired the lock. %% wlock(Tid, Store, Oid) -> + wlock(Tid, Store, Oid, _CheckMajority = true). + +wlock(Tid, Store, Oid, CheckMajority) -> {Tab, Key} = Oid, case need_lock(Store, Tab, Key, write) of yes -> - Ns = w_nodes(Tab), + {Ns, Majority} = w_nodes(Tab), + if CheckMajority -> + check_majority(Majority, Tab, Ns); + true -> + ignore + end, Op = {self(), {write, Tid, Oid}}, ?ets_insert(Store, {{locks, Tab, Key}, write}), get_wlocks_on_nodes(Ns, Ns, Store, Op, Oid); no when Key /= ?ALL, Tab /= ?GLOBAL -> []; no -> - w_nodes(Tab) + element(2, w_nodes(Tab)) end. wlock_table(Tid, Store, Tab) -> wlock(Tid, Store, {Tab, ?ALL}). +load_lock_table(Tid, Store, Tab) -> + wlock(Tid, Store, {Tab, ?ALL}, _CheckMajority = false). + %% Write lock even if the table does not exist wlock_no_exist(Tid, Store, Tab, Ns) -> diff --git a/lib/mnesia/src/mnesia_schema.erl b/lib/mnesia/src/mnesia_schema.erl index f33a6c7a84..fef72ad39c 100644 --- a/lib/mnesia/src/mnesia_schema.erl +++ b/lib/mnesia/src/mnesia_schema.erl @@ -37,6 +37,7 @@ change_table_copy_type/3, change_table_access_mode/2, change_table_load_order/2, + change_table_majority/2, change_table_frag/2, clear_table/1, create_table/1, @@ -178,6 +179,8 @@ do_set_schema(Tab, Cs) -> set({Tab, disc_only_copies}, Cs#cstruct.disc_only_copies), set({Tab, load_order}, Cs#cstruct.load_order), set({Tab, access_mode}, Cs#cstruct.access_mode), + set({Tab, majority}, Cs#cstruct.majority), + set({Tab, all_nodes}, mnesia_lib:cs_to_nodes(Cs)), set({Tab, snmp}, Cs#cstruct.snmp), set({Tab, user_properties}, Cs#cstruct.user_properties), [set({Tab, user_property, element(1, P)}, P) || P <- Cs#cstruct.user_properties], @@ -651,6 +654,7 @@ list2cs(List) when is_list(List) -> Snmp = pick(Name, snmp, List, []), LoadOrder = pick(Name, load_order, List, 0), AccessMode = pick(Name, access_mode, List, read_write), + Majority = pick(Name, majority, List, false), UserProps = pick(Name, user_properties, List, []), verify({alt, [nil, list]}, mnesia_lib:etype(UserProps), {bad_type, Name, {user_properties, UserProps}}), @@ -676,6 +680,7 @@ list2cs(List) when is_list(List) -> snmp = Snmp, load_order = LoadOrder, access_mode = AccessMode, + majority = Majority, local_content = LC, record_name = RecName, attributes = Attrs, @@ -809,7 +814,16 @@ verify_cstruct(Cs) when is_record(Cs, cstruct) -> Access = Cs#cstruct.access_mode, verify({alt, [read_write, read_only]}, Access, {bad_type, Tab, {access_mode, Access}}), - + Majority = Cs#cstruct.majority, + verify({alt, [true, false]}, Majority, + {bad_type, Tab, {majority, Majority}}), + case Majority of + true -> + verify(false, LC, + {combine_error, Tab, [{local_content,true},{majority,true}]}); + false -> + ok + end, Snmp = Cs#cstruct.snmp, verify(true, mnesia_snmp_hook:check_ustruct(Snmp), {badarg, Tab, {snmp, Snmp}}), @@ -1495,6 +1509,43 @@ make_change_table_load_order(Tab, LoadOrder) -> %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +change_table_majority(Tab, Majority) when is_boolean(Majority) -> + schema_transaction(fun() -> do_change_table_majority(Tab, Majority) end). + +do_change_table_majority(schema, _Majority) -> + mnesia:abort({bad_type, schema}); +do_change_table_majority(Tab, Majority) -> + TidTs = get_tid_ts_and_lock(schema, write), + get_tid_ts_and_lock(Tab, none), + insert_schema_ops(TidTs, make_change_table_majority(Tab, Majority)). + +make_change_table_majority(Tab, Majority) -> + ensure_writable(schema), + Cs = incr_version(val({Tab, cstruct})), + ensure_active(Cs), + OldMajority = Cs#cstruct.majority, + Cs2 = Cs#cstruct{majority = Majority}, + FragOps = case lists:keyfind(base_table, 1, Cs#cstruct.frag_properties) of + {_, Tab} -> + FragNames = mnesia_frag:frag_names(Tab) -- [Tab], + lists:map( + fun(T) -> + get_tid_ts_and_lock(Tab, none), + CsT = incr_version(val({T, cstruct})), + ensure_active(CsT), + CsT2 = CsT#cstruct{majority = Majority}, + verify_cstruct(CsT2), + {op, change_table_majority, cs2list(CsT2), + OldMajority, Majority} + end, FragNames); + false -> []; + {_, _} -> mnesia:abort({bad_type, Tab}) + end, + verify_cstruct(Cs2), + [{op, change_table_majority, cs2list(Cs2), OldMajority, Majority} | FragOps]. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + write_table_property(Tab, Prop) when is_tuple(Prop), size(Prop) >= 1 -> schema_transaction(fun() -> do_write_table_property(Tab, Prop) end); write_table_property(Tab, Prop) -> @@ -2974,6 +3025,7 @@ merge_versions(AnythingNew, Cs, RemoteCs, Force) -> Cs#cstruct.index == RemoteCs#cstruct.index, Cs#cstruct.snmp == RemoteCs#cstruct.snmp, Cs#cstruct.access_mode == RemoteCs#cstruct.access_mode, + Cs#cstruct.majority == RemoteCs#cstruct.majority, Cs#cstruct.load_order == RemoteCs#cstruct.load_order, Cs#cstruct.user_properties == RemoteCs#cstruct.user_properties -> do_merge_versions(AnythingNew, Cs, RemoteCs); diff --git a/lib/mnesia/src/mnesia_tm.erl b/lib/mnesia/src/mnesia_tm.erl index bb8e788b40..f62f7cb7c8 100644 --- a/lib/mnesia/src/mnesia_tm.erl +++ b/lib/mnesia/src/mnesia_tm.erl @@ -64,7 +64,8 @@ prev_tab = [], % initiate to a non valid table name prev_types, prev_snmp, - types + types, + majority = [] }). -record(participant, {tid, pid, commit, disc_nodes = [], @@ -1100,9 +1101,12 @@ t_commit(Type) -> case arrange(Tid, Store, Type) of {N, Prep} when N > 0 -> multi_commit(Prep#prep.protocol, + majority_attr(Prep), Tid, Prep#prep.records, Store); {0, Prep} -> - multi_commit(read_only, Tid, Prep#prep.records, Store) + multi_commit(read_only, + majority_attr(Prep), + Tid, Prep#prep.records, Store) end; true -> %% nested commit @@ -1117,6 +1121,10 @@ t_commit(Type) -> do_commit_nested end. +majority_attr(#prep{majority = M}) -> + M. + + %% This function arranges for all objects we shall write in S to be %% in a list of {Node, CommitRecord} %% Important function for the performance of mnesia. @@ -1222,11 +1230,13 @@ prepare_items(Tid, Tab, Key, Items, Prep) -> {blocked, _} -> unblocked = req({unblock_me, Tab}), prepare_items(Tid, Tab, Key, Items, Prep); - _ -> + _ -> + Majority = needs_majority(Tab, Prep), Snmp = val({Tab, snmp}), Recs2 = do_prepare_items(Tid, Tab, Key, Types, Snmp, Items, Prep#prep.records), Prep2 = Prep#prep{records = Recs2, prev_tab = Tab, + majority = Majority, prev_types = Types, prev_snmp = Snmp}, check_prep(Prep2, Types) end. @@ -1235,6 +1245,33 @@ do_prepare_items(Tid, Tab, Key, Types, Snmp, Items, Recs) -> Recs2 = prepare_snmp(Tid, Tab, Key, Types, Snmp, Items, Recs), % May exit prepare_nodes(Tid, Types, Items, Recs2, normal). + +needs_majority(Tab, #prep{majority = M}) -> + case lists:keymember(Tab, 1, M) of + true -> + M; + false -> + case ?catch_val({Tab, majority}) of + {'EXIT', _} -> + M; + false -> + M; + true -> + CopyHolders = val({Tab, all_nodes}), + [{Tab, CopyHolders} | M] + end + end. + +have_majority([], _) -> + ok; +have_majority([{Tab, AllNodes} | Rest], Nodes) -> + case mnesia_lib:have_majority(Tab, AllNodes, Nodes) of + true -> + have_majority(Rest, Nodes); + false -> + {error, Tab} + end. + prepare_snmp(Tab, Key, Items) -> case val({Tab, snmp}) of [] -> @@ -1261,10 +1298,15 @@ prepare_snmp(Tid, Tab, Key, Types, Us, Items, Recs) -> prepare_nodes(Tid, Types, [{clear_table, Tab}], Recs, snmp) end. -check_prep(Prep, Types) when Prep#prep.types == Types -> +check_prep(#prep{majority = [], types = Types} = Prep, Types) -> Prep; -check_prep(Prep, Types) when Prep#prep.types == undefined -> - Prep#prep{types = Types}; +check_prep(#prep{majority = M, types = undefined} = Prep, Types) -> + Protocol = if M == [] -> + Prep#prep.protocol; + true -> + asym_trans + end, + Prep#prep{protocol = Protocol, types = Types}; check_prep(Prep, _Types) -> Prep#prep{protocol = asym_trans}. @@ -1311,7 +1353,7 @@ prepare_node(_Node, _Storage, [], Rec, _Kind) -> %% multi_commit((Protocol, Tid, CommitRecords, Store) %% Local work is always performed in users process -multi_commit(read_only, Tid, CR, _Store) -> +multi_commit(read_only, _Maj = [], Tid, CR, _Store) -> %% This featherweight commit protocol is used when no %% updates has been performed in the transaction. @@ -1324,7 +1366,7 @@ multi_commit(read_only, Tid, CR, _Store) -> ?MODULE ! {delete_transaction, Tid}, do_commit; -multi_commit(sym_trans, Tid, CR, Store) -> +multi_commit(sym_trans, _Maj = [], Tid, CR, Store) -> %% This lightweight commit protocol is used when all %% the involved tables are replicated symetrically. %% Their storage types must match on each node. @@ -1376,7 +1418,7 @@ multi_commit(sym_trans, Tid, CR, Store) -> [{tid, Tid}, {outcome, Outcome}]), Outcome; -multi_commit(sync_sym_trans, Tid, CR, Store) -> +multi_commit(sync_sym_trans, _Maj = [], Tid, CR, Store) -> %% This protocol is the same as sym_trans except that it %% uses syncronized calls to disk_log and syncronized commits %% when several nodes are involved. @@ -1408,7 +1450,7 @@ multi_commit(sync_sym_trans, Tid, CR, Store) -> [{tid, Tid}, {outcome, Outcome}]), Outcome; -multi_commit(asym_trans, Tid, CR, Store) -> +multi_commit(asym_trans, Majority, Tid, CR, Store) -> %% This more expensive commit protocol is used when %% table definitions are changed (schema transactions). %% It is also used when the involved tables are @@ -1469,6 +1511,10 @@ multi_commit(asym_trans, Tid, CR, Store) -> {D2, CR2} = commit_decision(D, CR, [], []), DiscNs = D2#decision.disc_nodes, RamNs = D2#decision.ram_nodes, + case have_majority(Majority, DiscNs ++ RamNs) of + ok -> ok; + {error, Tab} -> mnesia:abort({no_majority, Tab}) + end, Pending = mnesia_checkpoint:tm_enter_pending(Tid, DiscNs, RamNs), ?ets_insert(Store, Pending), {WaitFor, Local} = ask_commit(asym_trans, Tid, CR2, DiscNs, RamNs), diff --git a/lib/mnesia/test/Makefile b/lib/mnesia/test/Makefile index b165924ef2..ae4c9626c7 100644 --- a/lib/mnesia/test/Makefile +++ b/lib/mnesia/test/Makefile @@ -42,6 +42,7 @@ MODULES= \ mnesia_dirty_access_test \ mnesia_atomicity_test \ mnesia_consistency_test \ + mnesia_majority_test \ mnesia_isolation_test \ mnesia_durability_test \ mnesia_recovery_test \ diff --git a/lib/mnesia/test/mnesia_SUITE.erl b/lib/mnesia/test/mnesia_SUITE.erl index 8ba8427213..dc8f216c1c 100644 --- a/lib/mnesia/test/mnesia_SUITE.erl +++ b/lib/mnesia/test/mnesia_SUITE.erl @@ -142,6 +142,105 @@ silly() -> mnesia_install_test:silly(). %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +light(doc) -> + ["The 'light' test suite runs a selected set of test suites and is", + "intended to be the smallest test suite that is meaningful", + "to run. It starts with an installation test (which in essence is the", + "'silly' test case) and then it covers all functions in the API in", + "various depths. All configuration parameters and examples are also", + "covered."]; +light(suite) -> + [ + install, + nice, + evil, + {mnesia_frag_test, light}, + qlc, + registry, + config, + examples + ]. + +install(suite) -> + [{mnesia_install_test, all}]. + +nice(suite) -> + [{mnesia_nice_coverage_test, all}]. + +evil(suite) -> + [{mnesia_evil_coverage_test, all}]. + +qlc(suite) -> + [{mnesia_qlc_test, all}]. + +registry(suite) -> + [{mnesia_registry_test, all}]. + +config(suite) -> + [{mnesia_config_test, all}]. + +examples(suite) -> + [{mnesia_examples_test, all}]. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +medium(doc) -> + ["The 'medium' test suite verfies the ACID (atomicity, consistency", + "isolation and durability) properties and various recovery scenarios", + "These tests may take quite while to run."]; +medium(suite) -> + [ + install, + atomicity, + isolation, + durability, + recovery, + consistency, + majority, + {mnesia_frag_test, medium} + ]. + +atomicity(suite) -> + [{mnesia_atomicity_test, all}]. + +isolation(suite) -> + [{mnesia_isolation_test, all}]. + +durability(suite) -> + [{mnesia_durability_test, all}]. + +recovery(suite) -> + [{mnesia_recovery_test, all}]. + +consistency(suite) -> + [{mnesia_consistency_test, all}]. + +majority(suite) -> + [{mnesia_majority_test, all}]. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +heavy(doc) -> + ["The 'heavy' test suite runs some resource consuming tests and", + "benchmarks"]; +heavy(suite) -> + [measure]. + +measure(suite) -> + [{mnesia_measure_test, all}]. + +prediction(suite) -> + [{mnesia_measure_test, prediction}]. + +fairness(suite) -> + [{mnesia_measure_test, fairness}]. + +benchmarks(suite) -> + [{mnesia_measure_test, benchmarks}]. + +consumption(suite) -> + [{mnesia_measure_test, consumption}]. + +scalability(suite) -> + [{mnesia_measure_test, scalability}]. clean_up_suite(doc) -> ["Not a test case only kills mnesia and nodes, that where" "started during the tests"]; diff --git a/lib/mnesia/test/mnesia_majority_test.erl b/lib/mnesia/test/mnesia_majority_test.erl new file mode 100644 index 0000000000..17d1d8bcdd --- /dev/null +++ b/lib/mnesia/test/mnesia_majority_test.erl @@ -0,0 +1,188 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 1996-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +%% +-module(mnesia_majority_test). +-author('[email protected]'). +-compile(export_all). +-include("mnesia_test_lib.hrl"). + +init_per_testcase(Func, Conf) -> + mnesia_test_lib:init_per_testcase(Func, Conf). + +fin_per_testcase(Func, Conf) -> + mnesia_test_lib:fin_per_testcase(Func, Conf). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +all(doc) -> + ["Verify that majority checking works"]; +all(suite) -> + [ + write + , wread + , delete + , clear_table + , frag + , change_majority + , frag_change_majority + ]. + +write(suite) -> []; +write(Config) when is_list(Config) -> + [N1, N2, N3] = Nodes = ?acquire_nodes(3, Config), + Tab = t, + Schema = [{name, Tab}, {ram_copies, [N1,N2,N3]}, {majority,true}], + ?match({atomic, ok}, mnesia:create_table(Schema)), + ?match({[ok,ok,ok],[]}, + rpc:multicall([N1,N2,N3], mnesia, wait_for_tables, [[Tab], 3000])), + ?match({atomic,ok}, + mnesia:transaction(fun() -> mnesia:write({t,1,a}) end)), + mnesia_test_lib:kill_mnesia([N3]), + ?match({atomic,ok}, + mnesia:transaction(fun() -> mnesia:write({t,1,a}) end)), + mnesia_test_lib:kill_mnesia([N2]), + ?match({aborted,{no_majority,Tab}}, + mnesia:transaction(fun() -> mnesia:write({t,1,a}) end)). + +wread(suite) -> []; +wread(Config) when is_list(Config) -> + [N1, N2] = Nodes = ?acquire_nodes(2, Config), + Tab = t, + Schema = [{name, Tab}, {ram_copies, [N1,N2]}, {majority,true}], + ?match({atomic, ok}, mnesia:create_table(Schema)), + ?match({[ok,ok],[]}, + rpc:multicall([N1,N2], mnesia, wait_for_tables, [[Tab], 3000])), + ?match({atomic,[]}, + mnesia:transaction(fun() -> mnesia:read(t,1,write) end)), + mnesia_test_lib:kill_mnesia([N2]), + ?match({aborted,{no_majority,Tab}}, + mnesia:transaction(fun() -> mnesia:read(t,1,write) end)). + +delete(suite) -> []; +delete(Config) when is_list(Config) -> + [N1, N2] = Nodes = ?acquire_nodes(2, Config), + Tab = t, + Schema = [{name, Tab}, {ram_copies, [N1,N2]}, {majority,true}], + ?match({atomic, ok}, mnesia:create_table(Schema)), + ?match({[ok,ok],[]}, + rpc:multicall([N1,N2], mnesia, wait_for_tables, [[Tab], 3000])), + %% works as expected with majority of nodes present + ?match({atomic,ok}, + mnesia:transaction(fun() -> mnesia:write({t,1,a}) end)), + ?match({atomic,ok}, + mnesia:transaction(fun() -> mnesia:delete({t,1}) end)), + ?match({atomic,[]}, + mnesia:transaction(fun() -> mnesia:read({t,1}) end)), + %% put the record back + ?match({atomic,ok}, + mnesia:transaction(fun() -> mnesia:write({t,1,a}) end)), + ?match({atomic,[{t,1,a}]}, + mnesia:transaction(fun() -> mnesia:read({t,1}) end)), + mnesia_test_lib:kill_mnesia([N2]), + ?match({aborted,{no_majority,Tab}}, + mnesia:transaction(fun() -> mnesia:delete({t,1}) end)). + +clear_table(suite) -> []; +clear_table(Config) when is_list(Config) -> + [N1, N2] = Nodes = ?acquire_nodes(2, Config), + Tab = t, + Schema = [{name, Tab}, {ram_copies, [N1,N2]}, {majority,true}], + ?match({atomic, ok}, mnesia:create_table(Schema)), + ?match({[ok,ok],[]}, + rpc:multicall([N1,N2], mnesia, wait_for_tables, [[Tab], 3000])), + %% works as expected with majority of nodes present + ?match({atomic,ok}, + mnesia:transaction(fun() -> mnesia:write({t,1,a}) end)), + ?match({atomic,ok}, mnesia:clear_table(t)), + ?match({atomic,[]}, + mnesia:transaction(fun() -> mnesia:read({t,1}) end)), + %% put the record back + ?match({atomic,ok}, + mnesia:transaction(fun() -> mnesia:write({t,1,a}) end)), + ?match({atomic,[{t,1,a}]}, + mnesia:transaction(fun() -> mnesia:read({t,1}) end)), + mnesia_test_lib:kill_mnesia([N2]), + ?match({aborted,{no_majority,Tab}}, mnesia:clear_table(t)). + +frag(suite) -> []; +frag(Config) when is_list(Config) -> + [N1] = Nodes = ?acquire_nodes(1, Config), + Tab = t, + Schema = [ + {name, Tab}, {ram_copies, [N1]}, + {majority,true}, + {frag_properties, [{n_fragments, 2}]} + ], + ?match({atomic, ok}, mnesia:create_table(Schema)), + ?match(true, mnesia:table_info(t, majority)), + ?match(true, mnesia:table_info(t_frag2, majority)). + +change_majority(suite) -> []; +change_majority(Config) when is_list(Config) -> + [N1,N2] = Nodes = ?acquire_nodes(2, Config), + Tab = t, + Schema = [ + {name, Tab}, {ram_copies, [N1,N2]}, + {majority,false} + ], + ?match({atomic, ok}, mnesia:create_table(Schema)), + ?match(false, mnesia:table_info(t, majority)), + ?match({atomic, ok}, + mnesia:change_table_majority(t, true)), + ?match(true, mnesia:table_info(t, majority)), + ?match(ok, + mnesia:activity(transaction, fun() -> + mnesia:write({t,1,a}) + end)), + mnesia_test_lib:kill_mnesia([N2]), + ?match({'EXIT',{aborted,{no_majority,_}}}, + mnesia:activity(transaction, fun() -> + mnesia:write({t,1,a}) + end)). + +frag_change_majority(suite) -> []; +frag_change_majority(Config) when is_list(Config) -> + [N1,N2] = Nodes = ?acquire_nodes(2, Config), + Tab = t, + Schema = [ + {name, Tab}, {ram_copies, [N1,N2]}, + {majority,false}, + {frag_properties, + [{n_fragments, 2}, + {n_ram_copies, 2}, + {node_pool, [N1,N2]}]} + ], + ?match({atomic, ok}, mnesia:create_table(Schema)), + ?match(false, mnesia:table_info(t, majority)), + ?match(false, mnesia:table_info(t_frag2, majority)), + ?match({aborted,{bad_type,t_frag2}}, + mnesia:change_table_majority(t_frag2, true)), + ?match({atomic, ok}, + mnesia:change_table_majority(t, true)), + ?match(true, mnesia:table_info(t, majority)), + ?match(true, mnesia:table_info(t_frag2, majority)), + ?match(ok, + mnesia:activity(transaction, fun() -> + mnesia:write({t,1,a}) + end, mnesia_frag)), + mnesia_test_lib:kill_mnesia([N2]), + ?match({'EXIT',{aborted,{no_majority,_}}}, + mnesia:activity(transaction, fun() -> + mnesia:write({t,1,a}) + end, mnesia_frag)). |