aboutsummaryrefslogtreecommitdiffstats
path: root/lib/mnesia/src
diff options
context:
space:
mode:
Diffstat (limited to 'lib/mnesia/src')
-rw-r--r--lib/mnesia/src/Makefile139
-rw-r--r--lib/mnesia/src/mnesia.app.src52
-rw-r--r--lib/mnesia/src/mnesia.appup.src37
-rw-r--r--lib/mnesia/src/mnesia.erl2883
-rw-r--r--lib/mnesia/src/mnesia.hrl121
-rw-r--r--lib/mnesia/src/mnesia_backup.erl201
-rw-r--r--lib/mnesia/src/mnesia_bup.erl1186
-rw-r--r--lib/mnesia/src/mnesia_checkpoint.erl1295
-rw-r--r--lib/mnesia/src/mnesia_checkpoint_sup.erl42
-rw-r--r--lib/mnesia/src/mnesia_controller.erl2182
-rw-r--r--lib/mnesia/src/mnesia_dumper.erl1218
-rw-r--r--lib/mnesia/src/mnesia_event.erl260
-rw-r--r--lib/mnesia/src/mnesia_frag.erl1361
-rw-r--r--lib/mnesia/src/mnesia_frag_hash.erl151
-rw-r--r--lib/mnesia/src/mnesia_frag_old_hash.erl132
-rw-r--r--lib/mnesia/src/mnesia_index.erl384
-rw-r--r--lib/mnesia/src/mnesia_kernel_sup.erl65
-rw-r--r--lib/mnesia/src/mnesia_late_loader.erl108
-rw-r--r--lib/mnesia/src/mnesia_lib.erl1306
-rw-r--r--lib/mnesia/src/mnesia_loader.erl828
-rw-r--r--lib/mnesia/src/mnesia_locker.erl1196
-rw-r--r--lib/mnesia/src/mnesia_log.erl1025
-rw-r--r--lib/mnesia/src/mnesia_monitor.erl823
-rw-r--r--lib/mnesia/src/mnesia_recover.erl1196
-rw-r--r--lib/mnesia/src/mnesia_registry.erl280
-rw-r--r--lib/mnesia/src/mnesia_schema.erl3027
-rw-r--r--lib/mnesia/src/mnesia_snmp_hook.erl259
-rw-r--r--lib/mnesia/src/mnesia_snmp_sup.erl42
-rw-r--r--lib/mnesia/src/mnesia_sp.erl42
-rw-r--r--lib/mnesia/src/mnesia_subscr.erl494
-rw-r--r--lib/mnesia/src/mnesia_sup.erl131
-rw-r--r--lib/mnesia/src/mnesia_text.erl194
-rw-r--r--lib/mnesia/src/mnesia_tm.erl2301
33 files changed, 24961 insertions, 0 deletions
diff --git a/lib/mnesia/src/Makefile b/lib/mnesia/src/Makefile
new file mode 100644
index 0000000000..e032f563fa
--- /dev/null
+++ b/lib/mnesia/src/Makefile
@@ -0,0 +1,139 @@
+#
+# %CopyrightBegin%
+#
+# Copyright Ericsson AB 1996-2009. All Rights Reserved.
+#
+# The contents of this file are subject to the Erlang Public License,
+# Version 1.1, (the "License"); you may not use this file except in
+# compliance with the License. You should have received a copy of the
+# Erlang Public License along with this software. If not, it can be
+# retrieved online at http://www.erlang.org/.
+#
+# Software distributed under the License is distributed on an "AS IS"
+# basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+# the License for the specific language governing rights and limitations
+# under the License.
+#
+# %CopyrightEnd%
+#
+
+#
+include $(ERL_TOP)/make/target.mk
+
+ifeq ($(TYPE),debug)
+ERL_COMPILE_FLAGS += -Ddebug -W
+endif
+
+include $(ERL_TOP)/make/$(TARGET)/otp.mk
+
+# ----------------------------------------------------
+# Application version
+# ----------------------------------------------------
+include ../vsn.mk
+VSN=$(MNESIA_VSN)
+
+# ----------------------------------------------------
+# Release directory specification
+# ----------------------------------------------------
+RELSYSDIR = $(RELEASE_PATH)/lib/mnesia-$(VSN)
+
+# ----------------------------------------------------
+# Target Specs
+# ----------------------------------------------------
+MODULES= \
+ mnesia \
+ mnesia_backup \
+ mnesia_bup \
+ mnesia_checkpoint \
+ mnesia_checkpoint_sup \
+ mnesia_controller \
+ mnesia_dumper\
+ mnesia_event \
+ mnesia_frag \
+ mnesia_frag_hash \
+ mnesia_frag_old_hash \
+ mnesia_index \
+ mnesia_kernel_sup \
+ mnesia_late_loader \
+ mnesia_lib\
+ mnesia_loader \
+ mnesia_locker \
+ mnesia_log \
+ mnesia_monitor \
+ mnesia_recover \
+ mnesia_registry \
+ mnesia_schema\
+ mnesia_snmp_hook \
+ mnesia_snmp_sup \
+ mnesia_subscr \
+ mnesia_sup \
+ mnesia_sp \
+ mnesia_text \
+ mnesia_tm
+
+HRL_FILES= mnesia.hrl
+
+ERL_FILES= $(MODULES:%=%.erl)
+
+TARGET_FILES= $(MODULES:%=$(EBIN)/%.$(EMULATOR)) $(APP_TARGET) $(APPUP_TARGET)
+
+APP_FILE= mnesia.app
+
+APP_SRC= $(APP_FILE).src
+APP_TARGET= $(EBIN)/$(APP_FILE)
+
+APPUP_FILE= mnesia.appup
+
+APPUP_SRC= $(APPUP_FILE).src
+APPUP_TARGET= $(EBIN)/$(APPUP_FILE)
+
+
+
+# ----------------------------------------------------
+# FLAGS
+# ----------------------------------------------------
+ERL_COMPILE_FLAGS += \
+ +warn_unused_vars \
+ +'{parse_transform,sys_pre_attributes}' \
+ +'{attribute,insert,vsn,"mnesia_$(MNESIA_VSN)"}' \
+ -W
+
+# ----------------------------------------------------
+# Targets
+# ----------------------------------------------------
+
+opt: $(TARGET_FILES)
+
+debug:
+ @${MAKE} TYPE=debug
+
+clean:
+ rm -f $(TARGET_FILES)
+ rm -f core
+
+docs:
+
+# ----------------------------------------------------
+# Special Build Targets
+# ----------------------------------------------------
+
+$(APP_TARGET): $(APP_SRC) ../vsn.mk
+ sed -e 's;%VSN%;$(VSN);' $< > $@
+
+$(APPUP_TARGET): $(APPUP_SRC) ../vsn.mk
+ sed -e 's;%VSN%;$(VSN);' $< > $@
+
+
+# ----------------------------------------------------
+# Release Target
+# ----------------------------------------------------
+include $(ERL_TOP)/make/otp_release_targets.mk
+
+release_spec: opt
+ $(INSTALL_DIR) $(RELSYSDIR)/src
+ $(INSTALL_DATA) $(HRL_FILES) $(ERL_FILES) $(RELSYSDIR)/src
+ $(INSTALL_DIR) $(RELSYSDIR)/ebin
+ $(INSTALL_DATA) $(TARGET_FILES) $(RELSYSDIR)/ebin
+
+release_docs_spec:
+
diff --git a/lib/mnesia/src/mnesia.app.src b/lib/mnesia/src/mnesia.app.src
new file mode 100644
index 0000000000..3715488ec2
--- /dev/null
+++ b/lib/mnesia/src/mnesia.app.src
@@ -0,0 +1,52 @@
+{application, mnesia,
+ [{description, "MNESIA CXC 138 12"},
+ {vsn, "%VSN%"},
+ {modules, [
+ mnesia,
+ mnesia_backup,
+ mnesia_bup,
+ mnesia_checkpoint,
+ mnesia_checkpoint_sup,
+ mnesia_controller,
+ mnesia_dumper,
+ mnesia_event,
+ mnesia_frag,
+ mnesia_frag_hash,
+ mnesia_frag_old_hash,
+ mnesia_index,
+ mnesia_kernel_sup,
+ mnesia_late_loader,
+ mnesia_lib,
+ mnesia_loader,
+ mnesia_locker,
+ mnesia_log,
+ mnesia_monitor,
+ mnesia_recover,
+ mnesia_registry,
+ mnesia_schema,
+ mnesia_snmp_hook,
+ mnesia_snmp_sup,
+ mnesia_subscr,
+ mnesia_sup,
+ mnesia_sp,
+ mnesia_text,
+ mnesia_tm
+ ]},
+ {registered, [
+ mnesia_dumper_load_regulator,
+ mnesia_event,
+ mnesia_fallback,
+ mnesia_controller,
+ mnesia_kernel_sup,
+ mnesia_late_loader,
+ mnesia_locker,
+ mnesia_monitor,
+ mnesia_recover,
+ mnesia_substr,
+ mnesia_sup,
+ mnesia_tm
+ ]},
+ {applications, [kernel, stdlib]},
+ {mod, {mnesia_sup, []}}]}.
+
+
diff --git a/lib/mnesia/src/mnesia.appup.src b/lib/mnesia/src/mnesia.appup.src
new file mode 100644
index 0000000000..cad63bf8df
--- /dev/null
+++ b/lib/mnesia/src/mnesia.appup.src
@@ -0,0 +1,37 @@
+%% -*- erlang -*-
+{"%VSN%",
+ [
+ {"4.4.11",
+ [
+ {update, mnesia_locker, soft, soft_purge, soft_purge, []},
+ {update, mnesia_controller, soft, soft_purge, soft_purge, []}
+ ]
+ },
+ {"4.4.10",
+ [
+ {update, mnesia_locker, soft, soft_purge, soft_purge, []},
+ {update, mnesia_controller, soft, soft_purge, soft_purge, []}
+ ]
+ },
+ {"4.4.9", [{restart_application, mnesia}]},
+ {"4.4.8", [{restart_application, mnesia}]},
+ {"4.4.7", [{restart_application, mnesia}]}
+ ],
+ [
+ {"4.4.11",
+ [
+ {update, mnesia_locker, soft, soft_purge, soft_purge, []},
+ {update, mnesia_controller, soft, soft_purge, soft_purge, []}
+ ]
+ },
+ {"4.4.10",
+ [
+ {update, mnesia_locker, soft, soft_purge, soft_purge, []},
+ {update, mnesia_controller, soft, soft_purge, soft_purge, []}
+ ]
+ },
+ {"4.4.9", [{restart_application, mnesia}]},
+ {"4.4.8", [{restart_application, mnesia}]},
+ {"4.4.7", [{restart_application, mnesia}]}
+ ]
+}.
diff --git a/lib/mnesia/src/mnesia.erl b/lib/mnesia/src/mnesia.erl
new file mode 100644
index 0000000000..9a630f18eb
--- /dev/null
+++ b/lib/mnesia/src/mnesia.erl
@@ -0,0 +1,2883 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 1996-2009. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%
+
+%%
+%% This module exports the public interface of the Mnesia DBMS engine
+
+-module(mnesia).
+%-behaviour(mnesia_access).
+
+-export([
+ %% Start, stop and debugging
+ start/0, start/1, stop/0, % Not for public use
+ set_debug_level/1, lkill/0, kill/0, % Not for public use
+ ms/0,
+ change_config/2,
+
+ %% Activity mgt
+ abort/1, transaction/1, transaction/2, transaction/3,
+ sync_transaction/1, sync_transaction/2, sync_transaction/3,
+ async_dirty/1, async_dirty/2, sync_dirty/1, sync_dirty/2, ets/1, ets/2,
+ activity/2, activity/3, activity/4, % Not for public use
+ is_transaction/0,
+
+ %% Access within an activity - Lock acquisition
+ lock/2, lock/4,
+ read_lock_table/1,
+ write_lock_table/1,
+
+ %% Access within an activity - Updates
+ write/1, s_write/1, write/3, write/5,
+ delete/1, s_delete/1, delete/3, delete/5,
+ delete_object/1, s_delete_object/1, delete_object/3, delete_object/5,
+
+ %% Access within an activity - Reads
+ read/1, read/2, wread/1, read/3, read/5,
+ match_object/1, match_object/3, match_object/5,
+ select/1,select/2,select/3,select/4,select/5,select/6,
+ all_keys/1, all_keys/4,
+ index_match_object/2, index_match_object/4, index_match_object/6,
+ index_read/3, index_read/6,
+ first/1, next/2, last/1, prev/2,
+ first/3, next/4, last/3, prev/4,
+
+ %% Iterators within an activity
+ foldl/3, foldl/4, foldr/3, foldr/4,
+
+ %% Dirty access regardless of activities - Updates
+ dirty_write/1, dirty_write/2,
+ dirty_delete/1, dirty_delete/2,
+ dirty_delete_object/1, dirty_delete_object/2,
+ dirty_update_counter/2, dirty_update_counter/3,
+
+ %% Dirty access regardless of activities - Read
+ dirty_read/1, dirty_read/2,
+ dirty_select/2,
+ dirty_match_object/1, dirty_match_object/2, dirty_all_keys/1,
+ dirty_index_match_object/2, dirty_index_match_object/3,
+ dirty_index_read/3, dirty_slot/2,
+ dirty_first/1, dirty_next/2, dirty_last/1, dirty_prev/2,
+
+ %% Info
+ table_info/2, table_info/4, schema/0, schema/1,
+ error_description/1, info/0, system_info/1,
+ system_info/0, % Not for public use
+
+ %% Database mgt
+ create_schema/1, delete_schema/1,
+ backup/1, backup/2, traverse_backup/4, traverse_backup/6,
+ install_fallback/1, install_fallback/2,
+ uninstall_fallback/0, uninstall_fallback/1,
+ activate_checkpoint/1, deactivate_checkpoint/1,
+ backup_checkpoint/2, backup_checkpoint/3, restore/2,
+
+ %% Table mgt
+ create_table/1, create_table/2, delete_table/1,
+ add_table_copy/3, del_table_copy/2, move_table_copy/3,
+ add_table_index/2, del_table_index/2,
+ transform_table/3, transform_table/4,
+ change_table_copy_type/3,
+ read_table_property/2, write_table_property/2, delete_table_property/2,
+ change_table_frag/2,
+ clear_table/1, clear_table/4,
+
+ %% Table load
+ dump_tables/1, wait_for_tables/2, force_load_table/1,
+ change_table_access_mode/2, change_table_load_order/2,
+ set_master_nodes/1, set_master_nodes/2,
+
+ %% Misc admin
+ dump_log/0, subscribe/1, unsubscribe/1, report_event/1,
+
+ %% Snmp
+ snmp_open_table/2, snmp_close_table/1,
+ snmp_get_row/2, snmp_get_next_index/2, snmp_get_mnesia_key/2,
+
+ %% Textfile access
+ load_textfile/1, dump_to_textfile/1,
+
+ %% QLC functions
+ table/1, table/2,
+
+ %% Mnemosyne exclusive
+ get_activity_id/0, put_activity_id/1, % Not for public use
+
+ %% Mnesia internal functions
+ dirty_rpc/4, % Not for public use
+ has_var/1, fun_select/7, fun_select/10, select_cont/3, dirty_sel_init/5,
+ foldl/6, foldr/6,
+
+ %% Module internal callback functions
+ raw_table_info/2, % Not for public use
+ remote_dirty_match_object/2, % Not for public use
+ remote_dirty_select/2 % Not for public use
+ ]).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+-include("mnesia.hrl").
+-import(mnesia_lib, [verbose/2]).
+
+-define(DEFAULT_ACCESS, ?MODULE).
+
+%% Select
+-define(PATTERN_TO_OBJECT_MATCH_SPEC(Pat), [{Pat,[],['$_']}]).
+-define(PATTERN_TO_BINDINGS_MATCH_SPEC(Pat), [{Pat,[],['$$']}]).
+
+%% Local function in order to avoid external function call
+val(Var) ->
+ case ?catch_val(Var) of
+ {'EXIT', Reason} -> mnesia_lib:other_val(Var, Reason);
+ Value -> Value
+ end.
+
+is_dollar_digits(Var) ->
+ case atom_to_list(Var) of
+ [$$ | Digs] ->
+ is_digits(Digs);
+ _ ->
+ false
+ end.
+
+is_digits([Dig | Tail]) ->
+ if
+ $0 =< Dig, Dig =< $9 ->
+ is_digits(Tail);
+ true ->
+ false
+ end;
+is_digits([]) ->
+ true.
+
+has_var(X) when is_atom(X) ->
+ if
+ X == '_' ->
+ true;
+ is_atom(X) ->
+ is_dollar_digits(X);
+ true ->
+ false
+ end;
+has_var(X) when is_tuple(X) ->
+ e_has_var(X, tuple_size(X));
+has_var([H|T]) ->
+ case has_var(H) of
+ false -> has_var(T);
+ Other -> Other
+ end;
+has_var(_) -> false.
+
+e_has_var(_, 0) -> false;
+e_has_var(X, Pos) ->
+ case has_var(element(Pos, X))of
+ false -> e_has_var(X, Pos-1);
+ Other -> Other
+ end.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Start and stop
+
+start() ->
+ {Time , Res} = timer:tc(application, start, [?APPLICATION, temporary]),
+
+ Secs = Time div 1000000,
+ case Res of
+ ok ->
+ verbose("Mnesia started, ~p seconds~n",[ Secs]),
+ ok;
+ {error, {already_started, mnesia}} ->
+ verbose("Mnesia already started, ~p seconds~n",[ Secs]),
+ ok;
+ {error, R} ->
+ verbose("Mnesia failed to start, ~p seconds: ~p~n",[ Secs, R]),
+ {error, R}
+ end.
+
+start(ExtraEnv) when is_list(ExtraEnv) ->
+ case mnesia_lib:ensure_loaded(?APPLICATION) of
+ ok ->
+ patched_start(ExtraEnv);
+ Error ->
+ Error
+ end;
+start(ExtraEnv) ->
+ {error, {badarg, ExtraEnv}}.
+
+patched_start([{Env, Val} | Tail]) when is_atom(Env) ->
+ case mnesia_monitor:patch_env(Env, Val) of
+ {error, Reason} ->
+ {error, Reason};
+ _NewVal ->
+ patched_start(Tail)
+ end;
+patched_start([Head | _]) ->
+ {error, {bad_type, Head}};
+patched_start([]) ->
+ start().
+
+stop() ->
+ case application:stop(?APPLICATION) of
+ ok -> stopped;
+ {error, {not_started, ?APPLICATION}} -> stopped;
+ Other -> Other
+ end.
+
+change_config(extra_db_nodes, Ns) when is_list(Ns) ->
+ mnesia_controller:connect_nodes(Ns);
+change_config(dc_dump_limit, N) when is_number(N), N > 0 ->
+ case mnesia_lib:is_running() of
+ yes ->
+ mnesia_lib:set(dc_dump_limit, N),
+ {ok, N};
+ _ ->
+ {error, {not_started, ?APPLICATION}}
+ end;
+change_config(BadKey, _BadVal) ->
+ {error, {badarg, BadKey}}.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Debugging
+
+set_debug_level(Level) ->
+ mnesia_subscr:set_debug_level(Level).
+
+lkill() ->
+ mnesia_sup:kill().
+
+kill() ->
+ rpc:multicall(mnesia_sup, kill, []).
+
+ms() ->
+ [
+ mnesia,
+ mnesia_backup,
+ mnesia_bup,
+ mnesia_checkpoint,
+ mnesia_checkpoint_sup,
+ mnesia_controller,
+ mnesia_dumper,
+ mnesia_loader,
+ mnesia_frag,
+ mnesia_frag_hash,
+ mnesia_frag_old_hash,
+ mnesia_index,
+ mnesia_kernel_sup,
+ mnesia_late_loader,
+ mnesia_lib,
+ mnesia_log,
+ mnesia_registry,
+ mnesia_schema,
+ mnesia_snmp_hook,
+ mnesia_snmp_sup,
+ mnesia_subscr,
+ mnesia_sup,
+ mnesia_text,
+ mnesia_tm,
+ mnesia_recover,
+ mnesia_locker,
+
+ %% Keep these last in the list, so
+ %% mnesia_sup kills these last
+ mnesia_monitor,
+ mnesia_event
+ ].
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Activity mgt
+
+-spec(abort/1 :: (_) -> no_return()).
+
+abort(Reason) ->
+ exit({aborted, Reason}).
+
+is_transaction() ->
+ case get(mnesia_activity_state) of
+ {_, Tid, _Ts} when element(1,Tid) == tid ->
+ true;
+ _ ->
+ false
+ end.
+
+transaction(Fun) ->
+ transaction(get(mnesia_activity_state), Fun, [], infinity, ?DEFAULT_ACCESS, async).
+transaction(Fun, Retries) when is_integer(Retries), Retries >= 0 ->
+ transaction(get(mnesia_activity_state), Fun, [], Retries, ?DEFAULT_ACCESS, async);
+transaction(Fun, Retries) when Retries == infinity ->
+ transaction(get(mnesia_activity_state), Fun, [], Retries, ?DEFAULT_ACCESS, async);
+transaction(Fun, Args) ->
+ transaction(get(mnesia_activity_state), Fun, Args, infinity, ?DEFAULT_ACCESS, async).
+transaction(Fun, Args, Retries) ->
+ transaction(get(mnesia_activity_state), Fun, Args, Retries, ?DEFAULT_ACCESS, async).
+
+sync_transaction(Fun) ->
+ transaction(get(mnesia_activity_state), Fun, [], infinity, ?DEFAULT_ACCESS, sync).
+sync_transaction(Fun, Retries) when is_integer(Retries), Retries >= 0 ->
+ transaction(get(mnesia_activity_state), Fun, [], Retries, ?DEFAULT_ACCESS, sync);
+sync_transaction(Fun, Retries) when Retries == infinity ->
+ transaction(get(mnesia_activity_state), Fun, [], Retries, ?DEFAULT_ACCESS, sync);
+sync_transaction(Fun, Args) ->
+ transaction(get(mnesia_activity_state), Fun, Args, infinity, ?DEFAULT_ACCESS, sync).
+sync_transaction(Fun, Args, Retries) ->
+ transaction(get(mnesia_activity_state), Fun, Args, Retries, ?DEFAULT_ACCESS, sync).
+
+
+transaction(State, Fun, Args, Retries, Mod, Kind)
+ when is_function(Fun), is_list(Args), Retries == infinity, is_atom(Mod) ->
+ mnesia_tm:transaction(State, Fun, Args, Retries, Mod, Kind);
+transaction(State, Fun, Args, Retries, Mod, Kind)
+ when is_function(Fun), is_list(Args), is_integer(Retries), Retries >= 0, is_atom(Mod) ->
+ mnesia_tm:transaction(State, Fun, Args, Retries, Mod, Kind);
+transaction(_State, Fun, Args, Retries, Mod, _Kind) ->
+ {aborted, {badarg, Fun, Args, Retries, Mod}}.
+
+non_transaction(State, Fun, Args, ActivityKind, Mod)
+ when is_function(Fun), is_list(Args), is_atom(Mod) ->
+ mnesia_tm:non_transaction(State, Fun, Args, ActivityKind, Mod);
+non_transaction(_State, Fun, Args, _ActivityKind, _Mod) ->
+ {aborted, {badarg, Fun, Args}}.
+
+async_dirty(Fun) ->
+ async_dirty(Fun, []).
+async_dirty(Fun, Args) ->
+ non_transaction(get(mnesia_activity_state), Fun, Args, async_dirty, ?DEFAULT_ACCESS).
+
+sync_dirty(Fun) ->
+ sync_dirty(Fun, []).
+sync_dirty(Fun, Args) ->
+ non_transaction(get(mnesia_activity_state), Fun, Args, sync_dirty, ?DEFAULT_ACCESS).
+
+ets(Fun) ->
+ ets(Fun, []).
+ets(Fun, Args) ->
+ non_transaction(get(mnesia_activity_state), Fun, Args, ets, ?DEFAULT_ACCESS).
+
+activity(Kind, Fun) ->
+ activity(Kind, Fun, []).
+activity(Kind, Fun, Args) when is_list(Args) ->
+ activity(Kind, Fun, Args, mnesia_monitor:get_env(access_module));
+activity(Kind, Fun, Mod) ->
+ activity(Kind, Fun, [], Mod).
+
+activity(Kind, Fun, Args, Mod) ->
+ State = get(mnesia_activity_state),
+ case Kind of
+ ets -> non_transaction(State, Fun, Args, Kind, Mod);
+ async_dirty -> non_transaction(State, Fun, Args, Kind, Mod);
+ sync_dirty -> non_transaction(State, Fun, Args, Kind, Mod);
+ transaction -> wrap_trans(State, Fun, Args, infinity, Mod, async);
+ {transaction, Retries} -> wrap_trans(State, Fun, Args, Retries, Mod, async);
+ sync_transaction -> wrap_trans(State, Fun, Args, infinity, Mod, sync);
+ {sync_transaction, Retries} -> wrap_trans(State, Fun, Args, Retries, Mod, sync);
+ _ -> {aborted, {bad_type, Kind}}
+ end.
+
+wrap_trans(State, Fun, Args, Retries, Mod, Kind) ->
+ case transaction(State, Fun, Args, Retries, Mod, Kind) of
+ {atomic, GoodRes} -> GoodRes;
+ BadRes -> exit(BadRes)
+ end.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Access within an activity - lock acquisition
+
+%% Grab a lock on an item in the global lock table
+%% Item may be any term. Lock may be write or read.
+%% write lock is set on all the given nodes
+%% read lock is only set on the first node
+%% Nodes may either be a list of nodes or one node as an atom
+%% Mnesia on all Nodes must be connected to each other, but
+%% it is not neccessary that they are up and running.
+
+lock(LockItem, LockKind) ->
+ case get(mnesia_activity_state) of
+ {?DEFAULT_ACCESS, Tid, Ts} ->
+ lock(Tid, Ts, LockItem, LockKind);
+ {Mod, Tid, Ts} ->
+ Mod:lock(Tid, Ts, LockItem, LockKind);
+ _ ->
+ abort(no_transaction)
+ end.
+
+lock(Tid, Ts, LockItem, LockKind) ->
+ case element(1, Tid) of
+ tid ->
+ case LockItem of
+ {record, Tab, Key} ->
+ lock_record(Tid, Ts, Tab, Key, LockKind);
+ {table, Tab} ->
+ lock_table(Tid, Ts, Tab, LockKind);
+ {global, GlobalKey, Nodes} ->
+ global_lock(Tid, Ts, GlobalKey, LockKind, Nodes);
+ _ ->
+ abort({bad_type, LockItem})
+ end;
+ _Protocol ->
+ []
+ end.
+
+%% Grab a read lock on a whole table
+read_lock_table(Tab) ->
+ lock({table, Tab}, read),
+ ok.
+
+%% Grab a write lock on a whole table
+write_lock_table(Tab) ->
+ lock({table, Tab}, write),
+ ok.
+
+lock_record(Tid, Ts, Tab, Key, LockKind) when is_atom(Tab) ->
+ Store = Ts#tidstore.store,
+ Oid = {Tab, Key},
+ case LockKind of
+ read ->
+ mnesia_locker:rlock(Tid, Store, Oid);
+ write ->
+ mnesia_locker:wlock(Tid, Store, Oid);
+ sticky_write ->
+ mnesia_locker:sticky_wlock(Tid, Store, Oid);
+ none ->
+ [];
+ _ ->
+ abort({bad_type, Tab, LockKind})
+ end;
+lock_record(_Tid, _Ts, Tab, _Key, _LockKind) ->
+ abort({bad_type, Tab}).
+
+lock_table(Tid, Ts, Tab, LockKind) when is_atom(Tab) ->
+ Store = Ts#tidstore.store,
+ case LockKind of
+ read ->
+ mnesia_locker:rlock_table(Tid, Store, Tab);
+ write ->
+ mnesia_locker:wlock_table(Tid, Store, Tab);
+ sticky_write ->
+ mnesia_locker:sticky_wlock_table(Tid, Store, Tab);
+ none ->
+ [];
+ _ ->
+ abort({bad_type, Tab, LockKind})
+ end;
+lock_table(_Tid, _Ts, Tab, _LockKind) ->
+ abort({bad_type, Tab}).
+
+global_lock(Tid, Ts, Item, Kind, Nodes) when is_list(Nodes) ->
+ case element(1, Tid) of
+ tid ->
+ Store = Ts#tidstore.store,
+ GoodNs = good_global_nodes(Nodes),
+ if
+ Kind /= read, Kind /= write ->
+ abort({bad_type, Kind});
+ true ->
+ mnesia_locker:global_lock(Tid, Store, Item, Kind, GoodNs)
+ end;
+ _Protocol ->
+ []
+ end;
+global_lock(_Tid, _Ts, _Item, _Kind, Nodes) ->
+ abort({bad_type, Nodes}).
+
+good_global_nodes(Nodes) ->
+ Recover = [node() | val(recover_nodes)],
+ mnesia_lib:intersect(Nodes, Recover).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Access within an activity - updates
+
+write(Val) when is_tuple(Val), tuple_size(Val) > 2 ->
+ Tab = element(1, Val),
+ write(Tab, Val, write);
+write(Val) ->
+ abort({bad_type, Val}).
+
+s_write(Val) when is_tuple(Val), tuple_size(Val) > 2 ->
+ Tab = element(1, Val),
+ write(Tab, Val, sticky_write).
+
+write(Tab, Val, LockKind) ->
+ case get(mnesia_activity_state) of
+ {?DEFAULT_ACCESS, Tid, Ts} ->
+ write(Tid, Ts, Tab, Val, LockKind);
+ {Mod, Tid, Ts} ->
+ Mod:write(Tid, Ts, Tab, Val, LockKind);
+ _ ->
+ abort(no_transaction)
+ end.
+
+write(Tid, Ts, Tab, Val, LockKind)
+ when is_atom(Tab), Tab /= schema, is_tuple(Val), tuple_size(Val) > 2 ->
+ case element(1, Tid) of
+ ets ->
+ ?ets_insert(Tab, Val),
+ ok;
+ tid ->
+ Store = Ts#tidstore.store,
+ Oid = {Tab, element(2, Val)},
+ case LockKind of
+ write ->
+ mnesia_locker:wlock(Tid, Store, Oid);
+ sticky_write ->
+ mnesia_locker:sticky_wlock(Tid, Store, Oid);
+ _ ->
+ abort({bad_type, Tab, LockKind})
+ end,
+ write_to_store(Tab, Store, Oid, Val);
+ Protocol ->
+ do_dirty_write(Protocol, Tab, Val)
+ end;
+write(_Tid, _Ts, Tab, Val, LockKind) ->
+ abort({bad_type, Tab, Val, LockKind}).
+
+write_to_store(Tab, Store, Oid, Val) ->
+ case ?catch_val({Tab, record_validation}) of
+ {RecName, Arity, Type}
+ when tuple_size(Val) == Arity, RecName == element(1, Val) ->
+ case Type of
+ bag ->
+ ?ets_insert(Store, {Oid, Val, write});
+ _ ->
+ ?ets_delete(Store, Oid),
+ ?ets_insert(Store, {Oid, Val, write})
+ end,
+ ok;
+ {'EXIT', _} ->
+ abort({no_exists, Tab});
+ _ ->
+ abort({bad_type, Val})
+ end.
+
+delete({Tab, Key}) ->
+ delete(Tab, Key, write);
+delete(Oid) ->
+ abort({bad_type, Oid}).
+
+s_delete({Tab, Key}) ->
+ delete(Tab, Key, sticky_write);
+s_delete(Oid) ->
+ abort({bad_type, Oid}).
+
+delete(Tab, Key, LockKind) ->
+ case get(mnesia_activity_state) of
+ {?DEFAULT_ACCESS, Tid, Ts} ->
+ delete(Tid, Ts, Tab, Key, LockKind);
+ {Mod, Tid, Ts} ->
+ Mod:delete(Tid, Ts, Tab, Key, LockKind);
+ _ ->
+ abort(no_transaction)
+ end.
+
+delete(Tid, Ts, Tab, Key, LockKind)
+ when is_atom(Tab), Tab /= schema ->
+ case element(1, Tid) of
+ ets ->
+ ?ets_delete(Tab, Key),
+ ok;
+ tid ->
+ Store = Ts#tidstore.store,
+ Oid = {Tab, Key},
+ case LockKind of
+ write ->
+ mnesia_locker:wlock(Tid, Store, Oid);
+ sticky_write ->
+ mnesia_locker:sticky_wlock(Tid, Store, Oid);
+ _ ->
+ abort({bad_type, Tab, LockKind})
+ end,
+ ?ets_delete(Store, Oid),
+ ?ets_insert(Store, {Oid, Oid, delete}),
+ ok;
+ Protocol ->
+ do_dirty_delete(Protocol, Tab, Key)
+ end;
+delete(_Tid, _Ts, Tab, _Key, _LockKind) ->
+ abort({bad_type, Tab}).
+
+delete_object(Val) when is_tuple(Val), tuple_size(Val) > 2 ->
+ Tab = element(1, Val),
+ delete_object(Tab, Val, write);
+delete_object(Val) ->
+ abort({bad_type, Val}).
+
+s_delete_object(Val) when is_tuple(Val), tuple_size(Val) > 2 ->
+ Tab = element(1, Val),
+ delete_object(Tab, Val, sticky_write);
+s_delete_object(Val) ->
+ abort({bad_type, Val}).
+
+delete_object(Tab, Val, LockKind) ->
+ case get(mnesia_activity_state) of
+ {?DEFAULT_ACCESS, Tid, Ts} ->
+ delete_object(Tid, Ts, Tab, Val, LockKind);
+ {Mod, Tid, Ts} ->
+ Mod:delete_object(Tid, Ts, Tab, Val, LockKind);
+ _ ->
+ abort(no_transaction)
+ end.
+
+delete_object(Tid, Ts, Tab, Val, LockKind)
+ when is_atom(Tab), Tab /= schema, is_tuple(Val), tuple_size(Val) > 2 ->
+ case has_var(Val) of
+ false ->
+ do_delete_object(Tid, Ts, Tab, Val, LockKind);
+ true ->
+ abort({bad_type, Tab, Val})
+ end;
+delete_object(_Tid, _Ts, Tab, _Key, _LockKind) ->
+ abort({bad_type, Tab}).
+
+do_delete_object(Tid, Ts, Tab, Val, LockKind) ->
+ case element(1, Tid) of
+ ets ->
+ ?ets_match_delete(Tab, Val),
+ ok;
+ tid ->
+ Store = Ts#tidstore.store,
+ Oid = {Tab, element(2, Val)},
+ case LockKind of
+ write ->
+ mnesia_locker:wlock(Tid, Store, Oid);
+ sticky_write ->
+ mnesia_locker:sticky_wlock(Tid, Store, Oid);
+ _ ->
+ abort({bad_type, Tab, LockKind})
+ end,
+ case val({Tab, setorbag}) of
+ bag ->
+ ?ets_match_delete(Store, {Oid, Val, '_'}),
+ ?ets_insert(Store, {Oid, Val, delete_object});
+ _ ->
+ case ?ets_match_object(Store, {Oid, '_', write}) of
+ [] ->
+ ?ets_match_delete(Store, {Oid, Val, '_'}),
+ ?ets_insert(Store, {Oid, Val, delete_object});
+ _ ->
+ ?ets_delete(Store, Oid),
+ ?ets_insert(Store, {Oid, Oid, delete})
+ end
+ end,
+ ok;
+ Protocol ->
+ do_dirty_delete_object(Protocol, Tab, Val)
+ end.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Access within an activity - read
+
+read(Tab, Key) ->
+ read(Tab, Key, read).
+
+read({Tab, Key}) ->
+ read(Tab, Key, read);
+read(Oid) ->
+ abort({bad_type, Oid}).
+
+wread({Tab, Key}) ->
+ read(Tab, Key, write);
+wread(Oid) ->
+ abort({bad_type, Oid}).
+
+read(Tab, Key, LockKind) ->
+ case get(mnesia_activity_state) of
+ {?DEFAULT_ACCESS, Tid, Ts} ->
+ read(Tid, Ts, Tab, Key, LockKind);
+ {Mod, Tid, Ts} ->
+ Mod:read(Tid, Ts, Tab, Key, LockKind);
+ _ ->
+ abort(no_transaction)
+ end.
+
+read(Tid, Ts, Tab, Key, LockKind)
+ when is_atom(Tab), Tab /= schema ->
+ case element(1, Tid) of
+ ets ->
+ ?ets_lookup(Tab, Key);
+ tid ->
+ Store = Ts#tidstore.store,
+ Oid = {Tab, Key},
+ Objs =
+ case LockKind of
+ read ->
+ mnesia_locker:rlock(Tid, Store, Oid);
+ write ->
+ mnesia_locker:rwlock(Tid, Store, Oid);
+ sticky_write ->
+ mnesia_locker:sticky_rwlock(Tid, Store, Oid);
+ _ ->
+ abort({bad_type, Tab, LockKind})
+ end,
+ add_written(?ets_lookup(Store, Oid), Tab, Objs);
+ _Protocol ->
+ dirty_read(Tab, Key)
+ end;
+read(_Tid, _Ts, Tab, _Key, _LockKind) ->
+ abort({bad_type, Tab}).
+
+first(Tab) ->
+ case get(mnesia_activity_state) of
+ {?DEFAULT_ACCESS, Tid, Ts} ->
+ first(Tid, Ts, Tab);
+ {Mod, Tid, Ts} ->
+ Mod:first(Tid, Ts, Tab);
+ _ ->
+ abort(no_transaction)
+ end.
+
+first(Tid, Ts, Tab)
+ when is_atom(Tab), Tab /= schema ->
+ case element(1, Tid) of
+ ets ->
+ ?ets_first(Tab);
+ tid ->
+ lock_table(Tid, Ts, Tab, read),
+ do_fixtable(Tab,Ts),
+ Key = dirty_first(Tab),
+ stored_keys(Tab,Key,'$end_of_table',Ts,next,
+ val({Tab, setorbag}));
+ _Protocol ->
+ dirty_first(Tab)
+ end;
+first(_Tid, _Ts,Tab) ->
+ abort({bad_type, Tab}).
+
+last(Tab) ->
+ case get(mnesia_activity_state) of
+ {?DEFAULT_ACCESS, Tid, Ts} ->
+ last(Tid, Ts, Tab);
+ {Mod, Tid, Ts} ->
+ Mod:last(Tid, Ts, Tab);
+ _ ->
+ abort(no_transaction)
+ end.
+
+last(Tid, Ts, Tab)
+ when is_atom(Tab), Tab /= schema ->
+ case element(1, Tid) of
+ ets ->
+ ?ets_last(Tab);
+ tid ->
+ lock_table(Tid, Ts, Tab, read),
+ do_fixtable(Tab,Ts),
+ Key = dirty_last(Tab),
+ stored_keys(Tab,Key,'$end_of_table',Ts,prev,
+ val({Tab, setorbag}));
+ _Protocol ->
+ dirty_last(Tab)
+ end;
+last(_Tid, _Ts,Tab) ->
+ abort({bad_type, Tab}).
+
+next(Tab,Key) ->
+ case get(mnesia_activity_state) of
+ {?DEFAULT_ACCESS,Tid,Ts} ->
+ next(Tid,Ts,Tab,Key);
+ {Mod,Tid,Ts} ->
+ Mod:next(Tid,Ts,Tab,Key);
+ _ ->
+ abort(no_transaction)
+ end.
+next(Tid,Ts,Tab,Key)
+ when is_atom(Tab), Tab /= schema ->
+ case element(1, Tid) of
+ ets ->
+ ?ets_next(Tab,Key);
+ tid ->
+ lock_table(Tid, Ts, Tab, read),
+ do_fixtable(Tab,Ts),
+ New = (catch dirty_next(Tab,Key)),
+ stored_keys(Tab,New,Key,Ts,next,
+ val({Tab, setorbag}));
+ _Protocol ->
+ dirty_next(Tab,Key)
+ end;
+next(_Tid, _Ts,Tab,_) ->
+ abort({bad_type, Tab}).
+
+prev(Tab,Key) ->
+ case get(mnesia_activity_state) of
+ {?DEFAULT_ACCESS,Tid,Ts} ->
+ prev(Tid,Ts,Tab,Key);
+ {Mod,Tid,Ts} ->
+ Mod:prev(Tid,Ts,Tab,Key);
+ _ ->
+ abort(no_transaction)
+ end.
+prev(Tid,Ts,Tab,Key)
+ when is_atom(Tab), Tab /= schema ->
+ case element(1, Tid) of
+ ets ->
+ ?ets_prev(Tab,Key);
+ tid ->
+ lock_table(Tid, Ts, Tab, read),
+ do_fixtable(Tab,Ts),
+ New = (catch dirty_prev(Tab,Key)),
+ stored_keys(Tab,New,Key,Ts,prev,
+ val({Tab, setorbag}));
+ _Protocol ->
+ dirty_prev(Tab,Key)
+ end;
+prev(_Tid, _Ts,Tab,_) ->
+ abort({bad_type, Tab}).
+
+%% Compensate for transaction written and/or deleted records
+stored_keys(Tab,'$end_of_table',Prev,Ts,Op,Type) ->
+ case ts_keys(Ts#tidstore.store,Tab,Op,Type,[]) of
+ [] -> '$end_of_table';
+ Keys when Type == ordered_set->
+ get_ordered_tskey(Prev,Keys,Op);
+ Keys ->
+ get_next_tskey(Prev,Keys,Tab)
+ end;
+stored_keys(Tab,{'EXIT',{aborted,R={badarg,[Tab,Key]}}},
+ Key,#tidstore{store=Store},Op,Type) ->
+ %% Had to match on error, ouch..
+ case ?ets_match(Store, {{Tab, Key}, '_', '$1'}) of
+ [] -> abort(R);
+ Ops ->
+ case lists:last(Ops) of
+ [delete] -> abort(R);
+ _ ->
+ case ts_keys(Store,Tab,Op,Type,[]) of
+ [] -> '$end_of_table';
+ Keys -> get_next_tskey(Key,Keys,Tab)
+ end
+ end
+ end;
+stored_keys(_,{'EXIT',{aborted,R}},_,_,_,_) ->
+ abort(R);
+stored_keys(Tab,Key,Prev,#tidstore{store=Store},Op,ordered_set) ->
+ case ?ets_match(Store, {{Tab, Key}, '_', '$1'}) of
+ [] ->
+ Keys = ts_keys(Store,Tab,Op,ordered_set,[Key]),
+ get_ordered_tskey(Prev,Keys,Op);
+ Ops ->
+ case lists:last(Ops) of
+ [delete] ->
+ mnesia:Op(Tab,Key);
+ _ ->
+ Keys = ts_keys(Store,Tab,Op,ordered_set,[Key]),
+ get_ordered_tskey(Prev,Keys,Op)
+ end
+ end;
+stored_keys(Tab,Key,_,#tidstore{store=Store},Op,_) ->
+ case ?ets_match(Store, {{Tab, Key}, '_', '$1'}) of
+ [] -> Key;
+ Ops ->
+ case lists:last(Ops) of
+ [delete] -> mnesia:Op(Tab,Key);
+ _ -> Key
+ end
+ end.
+
+get_ordered_tskey('$end_of_table', [First|_],_) -> First;
+get_ordered_tskey(Prev, [First|_], next) when Prev < First -> First;
+get_ordered_tskey(Prev, [First|_], prev) when Prev > First -> First;
+get_ordered_tskey(Prev, [_|R],Op) -> get_ordered_tskey(Prev,R,Op);
+get_ordered_tskey(_, [],_) -> '$end_of_table'.
+
+get_next_tskey(Key,Keys,Tab) ->
+ Next =
+ if Key == '$end_of_table' -> hd(Keys);
+ true ->
+ case lists:dropwhile(fun(A) -> A /= Key end, Keys) of
+ [] -> hd(Keys); %% First stored key
+ [Key] -> '$end_of_table';
+ [Key,Next2|_] -> Next2
+ end
+ end,
+ case Next of
+ '$end_of_table' -> '$end_of_table';
+ _ -> %% Really slow anybody got another solution??
+ case dirty_read(Tab, Next) of
+ [] -> Next;
+ _ ->
+ %% Updated value we already returned this key
+ get_next_tskey(Next,Keys,Tab)
+ end
+ end.
+
+ts_keys(Store, Tab, Op, Type, Def) ->
+ All = ?ets_match(Store, {{Tab,'$1'},'_','$2'}),
+ Keys = ts_keys_1(All, Def),
+ if
+ Type == ordered_set, Op == prev ->
+ lists:reverse(lists:sort(Keys));
+ Type == ordered_set ->
+ lists:sort(Keys);
+ Op == next ->
+ lists:reverse(Keys);
+ true ->
+ Keys
+ end.
+
+ts_keys_1([[Key, write]|R], []) ->
+ ts_keys_1(R, [Key]);
+ts_keys_1([[Key, write]|R], Acc=[Key|_]) ->
+ ts_keys_1(R, Acc);
+ts_keys_1([[Key, write]|R], Acc) ->
+ ts_keys_1(R, [Key|Acc]);
+ts_keys_1([[Key, delete]|R], [Key|Acc]) ->
+ ts_keys_1(R, Acc);
+ts_keys_1([_|R], Acc) ->
+ ts_keys_1(R, Acc);
+ts_keys_1([], Acc) ->
+ Acc.
+
+
+%%%%%%%%%%%%%%%%%%%%%
+%% Iterators
+
+foldl(Fun, Acc, Tab) ->
+ foldl(Fun, Acc, Tab, read).
+
+foldl(Fun, Acc, Tab, LockKind) when is_function(Fun) ->
+ case get(mnesia_activity_state) of
+ {?DEFAULT_ACCESS, Tid, Ts} ->
+ foldl(Tid, Ts, Fun, Acc, Tab, LockKind);
+ {Mod, Tid, Ts} ->
+ Mod:foldl(Tid, Ts, Fun, Acc, Tab, LockKind);
+ _ ->
+ abort(no_transaction)
+ end.
+
+foldl(ActivityId, Opaque, Fun, Acc, Tab, LockKind) ->
+ {Type, Prev} = init_iteration(ActivityId, Opaque, Tab, LockKind),
+ Res = (catch do_foldl(ActivityId, Opaque, Tab, dirty_first(Tab), Fun, Acc, Type, Prev)),
+ close_iteration(Res, Tab).
+
+do_foldl(A, O, Tab, '$end_of_table', Fun, RAcc, _Type, Stored) ->
+ lists:foldl(fun(Key, Acc) ->
+ lists:foldl(Fun, Acc, read(A, O, Tab, Key, read))
+ end, RAcc, Stored);
+do_foldl(A, O, Tab, Key, Fun, Acc, ordered_set, [H | Stored]) when H == Key ->
+ NewAcc = lists:foldl(Fun, Acc, read(A, O, Tab, Key, read)),
+ {_, Tid, Ts} = get(mnesia_activity_state),
+ do_foldl(Tid, Ts, Tab, dirty_next(Tab, Key), Fun, NewAcc, ordered_set, Stored);
+do_foldl(A, O, Tab, Key, Fun, Acc, ordered_set, [H | Stored]) when H < Key ->
+ NewAcc = lists:foldl(Fun, Acc, read(A, O, Tab, H, read)),
+ {_, Tid, Ts} = get(mnesia_activity_state),
+ do_foldl(Tid, Ts, Tab, Key, Fun, NewAcc, ordered_set, Stored);
+do_foldl(A, O, Tab, Key, Fun, Acc, ordered_set, [H | Stored]) when H > Key ->
+ NewAcc = lists:foldl(Fun, Acc, read(A, O, Tab, Key, read)),
+ {_, Tid, Ts} = get(mnesia_activity_state),
+ do_foldl(Tid, Ts, Tab, dirty_next(Tab, Key), Fun, NewAcc, ordered_set, [H |Stored]);
+do_foldl(A, O, Tab, Key, Fun, Acc, Type, Stored) -> %% Type is set or bag
+ NewAcc = lists:foldl(Fun, Acc, read(A, O, Tab, Key, read)),
+ NewStored = ordsets:del_element(Key, Stored),
+ {_, Tid, Ts} = get(mnesia_activity_state),
+ do_foldl(Tid, Ts, Tab, dirty_next(Tab, Key), Fun, NewAcc, Type, NewStored).
+
+foldr(Fun, Acc, Tab) ->
+ foldr(Fun, Acc, Tab, read).
+foldr(Fun, Acc, Tab, LockKind) when is_function(Fun) ->
+ case get(mnesia_activity_state) of
+ {?DEFAULT_ACCESS, Tid, Ts} ->
+ foldr(Tid, Ts, Fun, Acc, Tab, LockKind);
+ {Mod, Tid, Ts} ->
+ Mod:foldr(Tid, Ts, Fun, Acc, Tab, LockKind);
+ _ ->
+ abort(no_transaction)
+ end.
+
+foldr(ActivityId, Opaque, Fun, Acc, Tab, LockKind) ->
+ {Type, TempPrev} = init_iteration(ActivityId, Opaque, Tab, LockKind),
+ Prev =
+ if
+ Type == ordered_set ->
+ lists:reverse(TempPrev);
+ true -> %% Order doesn't matter for set and bag
+ TempPrev %% Keep the order so we can use ordsets:del_element
+ end,
+ Res = (catch do_foldr(ActivityId, Opaque, Tab, dirty_last(Tab), Fun, Acc, Type, Prev)),
+ close_iteration(Res, Tab).
+
+do_foldr(A, O, Tab, '$end_of_table', Fun, RAcc, _Type, Stored) ->
+ lists:foldl(fun(Key, Acc) ->
+ lists:foldl(Fun, Acc, read(A, O, Tab, Key, read))
+ end, RAcc, Stored);
+do_foldr(A, O, Tab, Key, Fun, Acc, ordered_set, [H | Stored]) when H == Key ->
+ NewAcc = lists:foldl(Fun, Acc, read(A, O, Tab, Key, read)),
+ {_, Tid, Ts} = get(mnesia_activity_state),
+ do_foldr(Tid, Ts, Tab, dirty_prev(Tab, Key), Fun, NewAcc, ordered_set, Stored);
+do_foldr(A, O, Tab, Key, Fun, Acc, ordered_set, [H | Stored]) when H > Key ->
+ NewAcc = lists:foldl(Fun, Acc, read(A, O, Tab, H, read)),
+ {_, Tid, Ts} = get(mnesia_activity_state),
+ do_foldr(Tid, Ts, Tab, Key, Fun, NewAcc, ordered_set, Stored);
+do_foldr(A, O, Tab, Key, Fun, Acc, ordered_set, [H | Stored]) when H < Key ->
+ NewAcc = lists:foldl(Fun, Acc, read(A, O, Tab, Key, read)),
+ {_, Tid, Ts} = get(mnesia_activity_state),
+ do_foldr(Tid, Ts, Tab, dirty_prev(Tab, Key), Fun, NewAcc, ordered_set, [H |Stored]);
+do_foldr(A, O, Tab, Key, Fun, Acc, Type, Stored) -> %% Type is set or bag
+ NewAcc = lists:foldl(Fun, Acc, read(A, O, Tab, Key, read)),
+ NewStored = ordsets:del_element(Key, Stored),
+ {_, Tid, Ts} = get(mnesia_activity_state),
+ do_foldr(Tid, Ts, Tab, dirty_prev(Tab, Key), Fun, NewAcc, Type, NewStored).
+
+init_iteration(ActivityId, Opaque, Tab, LockKind) ->
+ lock(ActivityId, Opaque, {table, Tab}, LockKind),
+ Type = val({Tab, setorbag}),
+ Previous = add_previous(ActivityId, Opaque, Type, Tab),
+ St = val({Tab, storage_type}),
+ if
+ St == unknown ->
+ ignore;
+ true ->
+ mnesia_lib:db_fixtable(St, Tab, true)
+ end,
+ {Type, Previous}.
+
+close_iteration(Res, Tab) ->
+ case val({Tab, storage_type}) of
+ unknown ->
+ ignore;
+ St ->
+ mnesia_lib:db_fixtable(St, Tab, false)
+ end,
+ case Res of
+ {'EXIT', {aborted, What}} ->
+ abort(What);
+ {'EXIT', What} ->
+ abort(What);
+ _ ->
+ Res
+ end.
+
+add_previous(_ActivityId, non_transaction, _Type, _Tab) ->
+ [];
+add_previous(_Tid, Ts, _Type, Tab) ->
+ Previous = ?ets_match(Ts#tidstore.store, {{Tab, '$1'}, '_', write}),
+ lists:sort(lists:concat(Previous)).
+
+%% This routine fixes up the return value from read/1 so that
+%% it is correct with respect to what this particular transaction
+%% has already written, deleted .... etc
+
+add_written([], _Tab, Objs) ->
+ Objs; % standard normal fast case
+add_written(Written, Tab, Objs) ->
+ case val({Tab, setorbag}) of
+ bag ->
+ add_written_to_bag(Written, Objs, []);
+ _ ->
+ add_written_to_set(Written)
+ end.
+
+add_written_to_set(Ws) ->
+ case lists:last(Ws) of
+ {_, _, delete} -> [];
+ {_, Val, write} -> [Val];
+ {_, _, delete_object} -> []
+ end.
+
+add_written_to_bag([{_, Val, write} | Tail], Objs, Ack) ->
+ add_written_to_bag(Tail, lists:delete(Val, Objs), [Val | Ack]);
+add_written_to_bag([], Objs, Ack) ->
+ Objs ++ lists:reverse(Ack); %% Oldest write first as in ets
+add_written_to_bag([{_, _ , delete} | Tail], _Objs, _Ack) ->
+ %% This transaction just deleted all objects
+ %% with this key
+ add_written_to_bag(Tail, [], []);
+add_written_to_bag([{_, Val, delete_object} | Tail], Objs, Ack) ->
+ add_written_to_bag(Tail, lists:delete(Val, Objs), lists:delete(Val, Ack)).
+
+match_object(Pat) when is_tuple(Pat), tuple_size(Pat) > 2 ->
+ Tab = element(1, Pat),
+ match_object(Tab, Pat, read);
+match_object(Pat) ->
+ abort({bad_type, Pat}).
+
+match_object(Tab, Pat, LockKind) ->
+ case get(mnesia_activity_state) of
+ {?DEFAULT_ACCESS, Tid, Ts} ->
+ match_object(Tid, Ts, Tab, Pat, LockKind);
+ {Mod, Tid, Ts} ->
+ Mod:match_object(Tid, Ts, Tab, Pat, LockKind);
+ _ ->
+ abort(no_transaction)
+ end.
+
+match_object(Tid, Ts, Tab, Pat, LockKind)
+ when is_atom(Tab), Tab /= schema, is_tuple(Pat), tuple_size(Pat) > 2 ->
+ case element(1, Tid) of
+ ets ->
+ mnesia_lib:db_match_object(ram_copies, Tab, Pat);
+ tid ->
+ Key = element(2, Pat),
+ case has_var(Key) of
+ false -> lock_record(Tid, Ts, Tab, Key, LockKind);
+ true -> lock_table(Tid, Ts, Tab, LockKind)
+ end,
+ Objs = dirty_match_object(Tab, Pat),
+ add_written_match(Ts#tidstore.store, Pat, Tab, Objs);
+ _Protocol ->
+ dirty_match_object(Tab, Pat)
+ end;
+match_object(_Tid, _Ts, Tab, Pat, _LockKind) ->
+ abort({bad_type, Tab, Pat}).
+
+add_written_match(S, Pat, Tab, Objs) ->
+ Ops = find_ops(S, Tab, Pat),
+ add_match(Ops, Objs, val({Tab, setorbag})).
+
+find_ops(S, Tab, Pat) ->
+ GetWritten = [{{{Tab, '_'}, Pat, write}, [], ['$_']},
+ {{{Tab, '_'}, '_', delete}, [], ['$_']},
+ {{{Tab, '_'}, Pat, delete_object}, [], ['$_']}],
+ ets:select(S, GetWritten).
+
+add_match([], Objs, _Type) ->
+ Objs;
+add_match(Written, Objs, ordered_set) ->
+ %% Must use keysort which is stable
+ add_ordered_match(lists:keysort(1,Written), Objs, []);
+add_match([{Oid, _, delete}|R], Objs, Type) ->
+ add_match(R, deloid(Oid, Objs), Type);
+add_match([{_Oid, Val, delete_object}|R], Objs, Type) ->
+ add_match(R, lists:delete(Val, Objs), Type);
+add_match([{_Oid, Val, write}|R], Objs, bag) ->
+ add_match(R, [Val | lists:delete(Val, Objs)], bag);
+add_match([{Oid, Val, write}|R], Objs, set) ->
+ add_match(R, [Val | deloid(Oid,Objs)],set).
+
+%% For ordered_set only !!
+add_ordered_match(Written = [{{_, Key}, _, _}|_], [Obj|Objs], Acc)
+ when Key > element(2, Obj) ->
+ add_ordered_match(Written, Objs, [Obj|Acc]);
+add_ordered_match([{{_, Key}, Val, write}|Rest], Objs =[Obj|_], Acc)
+ when Key < element(2, Obj) ->
+ add_ordered_match(Rest, [Val|Objs],Acc);
+add_ordered_match([{{_, Key}, _, _DelOP}|Rest], Objs =[Obj|_], Acc)
+ when Key < element(2, Obj) ->
+ add_ordered_match(Rest,Objs,Acc);
+%% Greater than last object
+add_ordered_match([{_, Val, write}|Rest], [], Acc) ->
+ add_ordered_match(Rest, [Val], Acc);
+add_ordered_match([_|Rest], [], Acc) ->
+ add_ordered_match(Rest, [], Acc);
+%% Keys are equal from here
+add_ordered_match([{_, Val, write}|Rest], [_Obj|Objs], Acc) ->
+ add_ordered_match(Rest, [Val|Objs], Acc);
+add_ordered_match([{_, _Val, delete}|Rest], [_Obj|Objs], Acc) ->
+ add_ordered_match(Rest, Objs, Acc);
+add_ordered_match([{_, Val, delete_object}|Rest], [Val|Objs], Acc) ->
+ add_ordered_match(Rest, Objs, Acc);
+add_ordered_match([{_, _, delete_object}|Rest], Objs, Acc) ->
+ add_ordered_match(Rest, Objs, Acc);
+add_ordered_match([], Objs, Acc) ->
+ lists:reverse(Acc, Objs).
+
+%% For select chunk
+add_sel_match(Sorted, Objs, ordered_set) ->
+ add_sel_ordered_match(Sorted, Objs, []);
+add_sel_match(Written, Objs, Type) ->
+ add_sel_match(Written, Objs, Type, []).
+
+add_sel_match([], Objs, _Type, Acc) ->
+ {Objs,lists:reverse(Acc)};
+add_sel_match([Op={Oid, _, delete}|R], Objs, Type, Acc) ->
+ case deloid(Oid, Objs) of
+ Objs ->
+ add_sel_match(R, Objs, Type, [Op|Acc]);
+ NewObjs when Type == set ->
+ add_sel_match(R, NewObjs, Type, Acc);
+ NewObjs -> %% If bag we may get more in next chunk
+ add_sel_match(R, NewObjs, Type, [Op|Acc])
+ end;
+add_sel_match([Op = {_Oid, Val, delete_object}|R], Objs, Type, Acc) ->
+ case lists:delete(Val, Objs) of
+ Objs ->
+ add_sel_match(R, Objs, Type, [Op|Acc]);
+ NewObjs when Type == set ->
+ add_sel_match(R, NewObjs, Type, Acc);
+ NewObjs ->
+ add_sel_match(R, NewObjs, Type, [Op|Acc])
+ end;
+add_sel_match([Op={Oid={_,Key}, Val, write}|R], Objs, bag, Acc) ->
+ case lists:keymember(Key, 2, Objs) of
+ true ->
+ add_sel_match(R,[Val|lists:delete(Val,Objs)],bag,
+ [{Oid,Val,delete_object}|Acc]);
+ false ->
+ add_sel_match(R,Objs,bag,[Op|Acc])
+ end;
+add_sel_match([Op={Oid, Val, write}|R], Objs, set, Acc) ->
+ case deloid(Oid,Objs) of
+ Objs ->
+ add_sel_match(R, Objs,set, [Op|Acc]);
+ NewObjs ->
+ add_sel_match(R, [Val | NewObjs],set, Acc)
+ end.
+
+%% For ordered_set only !!
+add_sel_ordered_match(Written = [{{_, Key}, _, _}|_], [Obj|Objs],Acc)
+ when Key > element(2, Obj) ->
+ add_sel_ordered_match(Written, Objs, [Obj|Acc]);
+add_sel_ordered_match([{{_, Key}, Val, write}|Rest], Objs =[Obj|_],Acc)
+ when Key < element(2, Obj) ->
+ add_sel_ordered_match(Rest,[Val|Objs],Acc);
+add_sel_ordered_match([{{_, Key}, _, _DelOP}|Rest], Objs =[Obj|_], Acc)
+ when Key < element(2, Obj) ->
+ add_sel_ordered_match(Rest,Objs,Acc);
+%% Greater than last object
+add_sel_ordered_match(Ops1, [], Acc) ->
+ {lists:reverse(Acc), Ops1};
+%% Keys are equal from here
+add_sel_ordered_match([{_, Val, write}|Rest], [_Obj|Objs], Acc) ->
+ add_sel_ordered_match(Rest, [Val|Objs], Acc);
+add_sel_ordered_match([{_, _Val, delete}|Rest], [_Obj|Objs], Acc) ->
+ add_sel_ordered_match(Rest, Objs, Acc);
+add_sel_ordered_match([{_, Val, delete_object}|Rest], [Val|Objs], Acc) ->
+ add_sel_ordered_match(Rest, Objs, Acc);
+add_sel_ordered_match([{_, _, delete_object}|Rest], Objs, Acc) ->
+ add_sel_ordered_match(Rest, Objs, Acc);
+add_sel_ordered_match([], Objs, Acc) ->
+ {lists:reverse(Acc, Objs),[]}.
+
+
+deloid(_Oid, []) ->
+ [];
+deloid({Tab, Key}, [H | T]) when element(2, H) == Key ->
+ deloid({Tab, Key}, T);
+deloid(Oid, [H | T]) ->
+ [H | deloid(Oid, T)].
+
+%%%%%%%%%%%%%%%%%%
+% select
+
+select(Tab, Pat) ->
+ select(Tab, Pat, read).
+select(Tab, Pat, LockKind)
+ when is_atom(Tab), Tab /= schema, is_list(Pat) ->
+ case get(mnesia_activity_state) of
+ {?DEFAULT_ACCESS, Tid, Ts} ->
+ select(Tid, Ts, Tab, Pat, LockKind);
+ {Mod, Tid, Ts} ->
+ Mod:select(Tid, Ts, Tab, Pat, LockKind);
+ _ ->
+ abort(no_transaction)
+ end;
+select(Tab, Pat, _Lock) ->
+ abort({badarg, Tab, Pat}).
+
+select(Tid, Ts, Tab, Spec, LockKind) ->
+ SelectFun = fun(FixedSpec) -> dirty_select(Tab, FixedSpec) end,
+ fun_select(Tid, Ts, Tab, Spec, LockKind, Tab, SelectFun).
+
+fun_select(Tid, Ts, Tab, Spec, LockKind, TabPat, SelectFun) ->
+ case element(1, Tid) of
+ ets ->
+ mnesia_lib:db_select(ram_copies, Tab, Spec);
+ tid ->
+ select_lock(Tid,Ts,LockKind,Spec,Tab),
+ Store = Ts#tidstore.store,
+ Written = ?ets_match_object(Store, {{TabPat, '_'}, '_', '_'}),
+ case Written of
+ [] ->
+ %% Nothing changed in the table during this transaction,
+ %% Simple case get results from [d]ets
+ SelectFun(Spec);
+ _ ->
+ %% Hard (slow case) records added or deleted earlier
+ %% in the transaction, have to cope with that.
+ Type = val({Tab, setorbag}),
+ FixedSpec = get_record_pattern(Spec),
+ TabRecs = SelectFun(FixedSpec),
+ FixedRes = add_match(Written, TabRecs, Type),
+ CMS = ets:match_spec_compile(Spec),
+ ets:match_spec_run(FixedRes, CMS)
+ end;
+ _Protocol ->
+ SelectFun(Spec)
+ end.
+
+select_lock(Tid,Ts,LockKind,Spec,Tab) ->
+ %% Avoid table lock if possible
+ case Spec of
+ [{HeadPat,_, _}] when is_tuple(HeadPat), tuple_size(HeadPat) > 2 ->
+ Key = element(2, HeadPat),
+ case has_var(Key) of
+ false -> lock_record(Tid, Ts, Tab, Key, LockKind);
+ true -> lock_table(Tid, Ts, Tab, LockKind)
+ end;
+ _ ->
+ lock_table(Tid, Ts, Tab, LockKind)
+ end.
+
+%% Breakable Select
+select(Tab, Pat, NObjects, LockKind)
+ when is_atom(Tab), Tab /= schema, is_list(Pat), is_integer(NObjects) ->
+ case get(mnesia_activity_state) of
+ {?DEFAULT_ACCESS, Tid, Ts} ->
+ select(Tid, Ts, Tab, Pat, NObjects, LockKind);
+ {Mod, Tid, Ts} ->
+ Mod:select(Tid, Ts, Tab, Pat, NObjects, LockKind);
+ _ ->
+ abort(no_transaction)
+ end;
+select(Tab, Pat, NObjects, _Lock) ->
+ abort({badarg, Tab, Pat, NObjects}).
+
+select(Tid, Ts, Tab, Spec, NObjects, LockKind) ->
+ Where = val({Tab,where_to_read}),
+ Type = mnesia_lib:storage_type_at_node(Where,Tab),
+ InitFun = fun(FixedSpec) -> dirty_sel_init(Where,Tab,FixedSpec,NObjects,Type) end,
+ fun_select(Tid,Ts,Tab,Spec,LockKind,Tab,InitFun,NObjects,Where,Type).
+
+-record(mnesia_select, {tab,tid,node,storage,cont,written=[],spec,type,orig}).
+
+fun_select(Tid, Ts, Tab, Spec, LockKind, TabPat, Init, NObjects, Node, Storage) ->
+ Def = #mnesia_select{tid=Tid,node=Node,storage=Storage,tab=Tab,orig=Spec},
+ case element(1, Tid) of
+ ets ->
+ select_state(mnesia_lib:db_select_init(ram_copies,Tab,Spec,NObjects),Def);
+ tid ->
+ select_lock(Tid,Ts,LockKind,Spec,Tab),
+ Store = Ts#tidstore.store,
+ do_fixtable(Tab, Store),
+
+ Written0 = ?ets_match_object(Store, {{TabPat, '_'}, '_', '_'}),
+ case Written0 of
+ [] ->
+ %% Nothing changed in the table during this transaction,
+ %% Simple case get results from [d]ets
+ select_state(Init(Spec),Def);
+ _ ->
+ %% Hard (slow case) records added or deleted earlier
+ %% in the transaction, have to cope with that.
+ Type = val({Tab, setorbag}),
+ Written =
+ if Type == ordered_set -> %% Sort stable
+ lists:keysort(1,Written0);
+ true ->
+ Written0
+ end,
+ FixedSpec = get_record_pattern(Spec),
+ CMS = ets:match_spec_compile(Spec),
+ trans_select(Init(FixedSpec),
+ Def#mnesia_select{written=Written,spec=CMS,type=Type, orig=FixedSpec})
+ end;
+ _Protocol ->
+ select_state(Init(Spec),Def)
+ end.
+
+select(Cont) ->
+ case get(mnesia_activity_state) of
+ {?DEFAULT_ACCESS, Tid, Ts} ->
+ select_cont(Tid,Ts,Cont);
+ {Mod, Tid, Ts} ->
+ Mod:select_cont(Tid,Ts,Cont);
+ _ ->
+ abort(no_transaction)
+ end.
+
+select_cont(_Tid,_Ts,'$end_of_table') ->
+ '$end_of_table';
+select_cont(Tid,_Ts,State=#mnesia_select{tid=Tid,cont=Cont, orig=Ms})
+ when element(1,Tid) == ets ->
+ case Cont of
+ '$end_of_table' -> '$end_of_table';
+ _ -> select_state(mnesia_lib:db_select_cont(ram_copies,Cont,Ms),State)
+ end;
+select_cont(Tid,_,State=#mnesia_select{tid=Tid,written=[]}) ->
+ select_state(dirty_sel_cont(State),State);
+select_cont(Tid,_Ts,State=#mnesia_select{tid=Tid}) ->
+ trans_select(dirty_sel_cont(State), State);
+select_cont(_Tid2,_,#mnesia_select{tid=_Tid1}) -> % Missmatching tids
+ abort(wrong_transaction);
+select_cont(_,_,Cont) ->
+ abort({badarg, Cont}).
+
+trans_select('$end_of_table', #mnesia_select{written=Written0,spec=CMS,type=Type}) ->
+ Written = add_match(Written0, [], Type),
+ {ets:match_spec_run(Written, CMS), '$end_of_table'};
+trans_select({TabRecs,Cont}, State = #mnesia_select{written=Written0,spec=CMS,type=Type}) ->
+ {FixedRes,Written} = add_sel_match(Written0, TabRecs, Type),
+ select_state({ets:match_spec_run(FixedRes, CMS),Cont},
+ State#mnesia_select{written=Written}).
+
+select_state({Matches, Cont}, MS) ->
+ {Matches, MS#mnesia_select{cont=Cont}};
+select_state('$end_of_table',_) -> '$end_of_table'.
+
+get_record_pattern([]) -> [];
+get_record_pattern([{M,C,_B}|R]) ->
+ [{M,C,['$_']} | get_record_pattern(R)].
+
+all_keys(Tab) ->
+ case get(mnesia_activity_state) of
+ {?DEFAULT_ACCESS, Tid, Ts} ->
+ all_keys(Tid, Ts, Tab, read);
+ {Mod, Tid, Ts} ->
+ Mod:all_keys(Tid, Ts, Tab, read);
+ _ ->
+ abort(no_transaction)
+ end.
+
+all_keys(Tid, Ts, Tab, LockKind)
+ when is_atom(Tab), Tab /= schema ->
+ Pat0 = val({Tab, wild_pattern}),
+ Pat = setelement(2, Pat0, '$1'),
+ Keys = select(Tid, Ts, Tab, [{Pat, [], ['$1']}], LockKind),
+ case val({Tab, setorbag}) of
+ bag ->
+ mnesia_lib:uniq(Keys);
+ _ ->
+ Keys
+ end;
+all_keys(_Tid, _Ts, Tab, _LockKind) ->
+ abort({bad_type, Tab}).
+
+index_match_object(Pat, Attr) when is_tuple(Pat), tuple_size(Pat) > 2 ->
+ Tab = element(1, Pat),
+ index_match_object(Tab, Pat, Attr, read);
+index_match_object(Pat, _Attr) ->
+ abort({bad_type, Pat}).
+
+index_match_object(Tab, Pat, Attr, LockKind) ->
+ case get(mnesia_activity_state) of
+ {?DEFAULT_ACCESS, Tid, Ts} ->
+ index_match_object(Tid, Ts, Tab, Pat, Attr, LockKind);
+ {Mod, Tid, Ts} ->
+ Mod:index_match_object(Tid, Ts, Tab, Pat, Attr, LockKind);
+ _ ->
+ abort(no_transaction)
+ end.
+
+index_match_object(Tid, Ts, Tab, Pat, Attr, LockKind)
+ when is_atom(Tab), Tab /= schema, is_tuple(Pat), tuple_size(Pat) > 2 ->
+ case element(1, Tid) of
+ ets ->
+ dirty_index_match_object(Tab, Pat, Attr); % Should be optimized?
+ tid ->
+ case mnesia_schema:attr_tab_to_pos(Tab, Attr) of
+ Pos when Pos =< tuple_size(Pat) ->
+ case LockKind of
+ read ->
+ Store = Ts#tidstore.store,
+ mnesia_locker:rlock_table(Tid, Store, Tab),
+ Objs = dirty_index_match_object(Tab, Pat, Attr),
+ add_written_match(Store, Pat, Tab, Objs);
+ _ ->
+ abort({bad_type, Tab, LockKind})
+ end;
+ BadPos ->
+ abort({bad_type, Tab, BadPos})
+ end;
+ _Protocol ->
+ dirty_index_match_object(Tab, Pat, Attr)
+ end;
+index_match_object(_Tid, _Ts, Tab, Pat, _Attr, _LockKind) ->
+ abort({bad_type, Tab, Pat}).
+
+index_read(Tab, Key, Attr) ->
+ case get(mnesia_activity_state) of
+ {?DEFAULT_ACCESS, Tid, Ts} ->
+ index_read(Tid, Ts, Tab, Key, Attr, read);
+ {Mod, Tid, Ts} ->
+ Mod:index_read(Tid, Ts, Tab, Key, Attr, read);
+ _ ->
+ abort(no_transaction)
+ end.
+
+index_read(Tid, Ts, Tab, Key, Attr, LockKind)
+ when is_atom(Tab), Tab /= schema ->
+ case element(1, Tid) of
+ ets ->
+ dirty_index_read(Tab, Key, Attr); % Should be optimized?
+ tid ->
+ Pos = mnesia_schema:attr_tab_to_pos(Tab, Attr),
+ case LockKind of
+ read ->
+ case has_var(Key) of
+ false ->
+ Store = Ts#tidstore.store,
+ Objs = mnesia_index:read(Tid, Store, Tab, Key, Pos),
+ Pat = setelement(Pos, val({Tab, wild_pattern}), Key),
+ add_written_match(Store, Pat, Tab, Objs);
+ true ->
+ abort({bad_type, Tab, Attr, Key})
+ end;
+ _ ->
+ abort({bad_type, Tab, LockKind})
+ end;
+ _Protocol ->
+ dirty_index_read(Tab, Key, Attr)
+ end;
+index_read(_Tid, _Ts, Tab, _Key, _Attr, _LockKind) ->
+ abort({bad_type, Tab}).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Dirty access regardless of activities - updates
+
+dirty_write(Val) when is_tuple(Val), tuple_size(Val) > 2 ->
+ Tab = element(1, Val),
+ dirty_write(Tab, Val);
+dirty_write(Val) ->
+ abort({bad_type, Val}).
+
+dirty_write(Tab, Val) ->
+ do_dirty_write(async_dirty, Tab, Val).
+
+do_dirty_write(SyncMode, Tab, Val)
+ when is_atom(Tab), Tab /= schema, is_tuple(Val), tuple_size(Val) > 2 ->
+ case ?catch_val({Tab, record_validation}) of
+ {RecName, Arity, _Type}
+ when tuple_size(Val) == Arity, RecName == element(1, Val) ->
+ Oid = {Tab, element(2, Val)},
+ mnesia_tm:dirty(SyncMode, {Oid, Val, write});
+ {'EXIT', _} ->
+ abort({no_exists, Tab});
+ _ ->
+ abort({bad_type, Val})
+ end;
+do_dirty_write(_SyncMode, Tab, Val) ->
+ abort({bad_type, Tab, Val}).
+
+dirty_delete({Tab, Key}) ->
+ dirty_delete(Tab, Key);
+dirty_delete(Oid) ->
+ abort({bad_type, Oid}).
+
+dirty_delete(Tab, Key) ->
+ do_dirty_delete(async_dirty, Tab, Key).
+
+do_dirty_delete(SyncMode, Tab, Key) when is_atom(Tab), Tab /= schema ->
+ Oid = {Tab, Key},
+ mnesia_tm:dirty(SyncMode, {Oid, Oid, delete});
+do_dirty_delete(_SyncMode, Tab, _Key) ->
+ abort({bad_type, Tab}).
+
+dirty_delete_object(Val) when is_tuple(Val), tuple_size(Val) > 2 ->
+ Tab = element(1, Val),
+ dirty_delete_object(Tab, Val);
+dirty_delete_object(Val) ->
+ abort({bad_type, Val}).
+
+dirty_delete_object(Tab, Val) ->
+ do_dirty_delete_object(async_dirty, Tab, Val).
+
+do_dirty_delete_object(SyncMode, Tab, Val)
+ when is_atom(Tab), Tab /= schema, is_tuple(Val), tuple_size(Val) > 2 ->
+ Oid = {Tab, element(2, Val)},
+ case has_var(Val) of
+ false ->
+ mnesia_tm:dirty(SyncMode, {Oid, Val, delete_object});
+ true ->
+ abort({bad_type, Tab, Val})
+ end;
+
+do_dirty_delete_object(_SyncMode, Tab, Val) ->
+ abort({bad_type, Tab, Val}).
+
+%% A Counter is an Oid being {CounterTab, CounterName}
+
+dirty_update_counter({Tab, Key}, Incr) ->
+ dirty_update_counter(Tab, Key, Incr);
+dirty_update_counter(Counter, _Incr) ->
+ abort({bad_type, Counter}).
+
+dirty_update_counter(Tab, Key, Incr) ->
+ do_dirty_update_counter(async_dirty, Tab, Key, Incr).
+
+do_dirty_update_counter(SyncMode, Tab, Key, Incr)
+ when is_atom(Tab), Tab /= schema, is_integer(Incr) ->
+ case ?catch_val({Tab, record_validation}) of
+ {RecName, 3, set} ->
+ Oid = {Tab, Key},
+ mnesia_tm:dirty(SyncMode, {Oid, {RecName, Incr}, update_counter});
+ _ ->
+ abort({combine_error, Tab, update_counter})
+ end;
+do_dirty_update_counter(_SyncMode, Tab, _Key, Incr) ->
+ abort({bad_type, Tab, Incr}).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Dirty access regardless of activities - read
+
+dirty_read({Tab, Key}) ->
+ dirty_read(Tab, Key);
+dirty_read(Oid) ->
+ abort({bad_type, Oid}).
+
+dirty_read(Tab, Key)
+ when is_atom(Tab), Tab /= schema ->
+%% case catch ?ets_lookup(Tab, Key) of
+%% {'EXIT', _} ->
+ %% Bad luck, we have to perform a real lookup
+ dirty_rpc(Tab, mnesia_lib, db_get, [Tab, Key]);
+%% Val ->
+%% Val
+%% end;
+dirty_read(Tab, _Key) ->
+ abort({bad_type, Tab}).
+
+dirty_match_object(Pat) when is_tuple(Pat), tuple_size(Pat) > 2 ->
+ Tab = element(1, Pat),
+ dirty_match_object(Tab, Pat);
+dirty_match_object(Pat) ->
+ abort({bad_type, Pat}).
+
+dirty_match_object(Tab, Pat)
+ when is_atom(Tab), Tab /= schema, is_tuple(Pat), tuple_size(Pat) > 2 ->
+ dirty_rpc(Tab, ?MODULE, remote_dirty_match_object, [Tab, Pat]);
+dirty_match_object(Tab, Pat) ->
+ abort({bad_type, Tab, Pat}).
+
+remote_dirty_match_object(Tab, Pat) ->
+ Key = element(2, Pat),
+ case has_var(Key) of
+ false ->
+ mnesia_lib:db_match_object(Tab, Pat);
+ true ->
+ PosList = val({Tab, index}),
+ remote_dirty_match_object(Tab, Pat, PosList)
+ end.
+
+remote_dirty_match_object(Tab, Pat, [Pos | Tail]) when Pos =< tuple_size(Pat) ->
+ IxKey = element(Pos, Pat),
+ case has_var(IxKey) of
+ false ->
+ mnesia_index:dirty_match_object(Tab, Pat, Pos);
+ true ->
+ remote_dirty_match_object(Tab, Pat, Tail)
+ end;
+remote_dirty_match_object(Tab, Pat, []) ->
+ mnesia_lib:db_match_object(Tab, Pat);
+remote_dirty_match_object(Tab, Pat, _PosList) ->
+ abort({bad_type, Tab, Pat}).
+
+dirty_select(Tab, Spec) when is_atom(Tab), Tab /= schema, is_list(Spec) ->
+ dirty_rpc(Tab, ?MODULE, remote_dirty_select, [Tab, Spec]);
+dirty_select(Tab, Spec) ->
+ abort({bad_type, Tab, Spec}).
+
+remote_dirty_select(Tab, Spec) ->
+ case Spec of
+ [{HeadPat, _, _}] when is_tuple(HeadPat), tuple_size(HeadPat) > 2 ->
+ Key = element(2, HeadPat),
+ case has_var(Key) of
+ false ->
+ mnesia_lib:db_select(Tab, Spec);
+ true ->
+ PosList = val({Tab, index}),
+ remote_dirty_select(Tab, Spec, PosList)
+ end;
+ _ ->
+ mnesia_lib:db_select(Tab, Spec)
+ end.
+
+remote_dirty_select(Tab, [{HeadPat,_, _}] = Spec, [Pos | Tail])
+ when is_tuple(HeadPat), tuple_size(HeadPat) > 2, Pos =< tuple_size(HeadPat) ->
+ Key = element(Pos, HeadPat),
+ case has_var(Key) of
+ false ->
+ Recs = mnesia_index:dirty_select(Tab, HeadPat, Pos),
+ %% Returns the records without applying the match spec
+ %% The actual filtering is handled by the caller
+ CMS = ets:match_spec_compile(Spec),
+ case val({Tab, setorbag}) of
+ ordered_set ->
+ ets:match_spec_run(lists:sort(Recs), CMS);
+ _ ->
+ ets:match_spec_run(Recs, CMS)
+ end;
+ true ->
+ remote_dirty_select(Tab, Spec, Tail)
+ end;
+remote_dirty_select(Tab, Spec, _) ->
+ mnesia_lib:db_select(Tab, Spec).
+
+dirty_sel_init(Node,Tab,Spec,NObjects,Type) ->
+ do_dirty_rpc(Tab,Node,mnesia_lib,db_select_init,[Type,Tab,Spec,NObjects]).
+
+dirty_sel_cont(#mnesia_select{cont='$end_of_table'}) -> '$end_of_table';
+dirty_sel_cont(#mnesia_select{node=Node,tab=Tab,storage=Type,cont=Cont,orig=Ms}) ->
+ do_dirty_rpc(Tab,Node,mnesia_lib,db_select_cont,[Type,Cont,Ms]).
+
+dirty_all_keys(Tab) when is_atom(Tab), Tab /= schema ->
+ case ?catch_val({Tab, wild_pattern}) of
+ {'EXIT', _} ->
+ abort({no_exists, Tab});
+ Pat0 ->
+ Pat = setelement(2, Pat0, '$1'),
+ Keys = dirty_select(Tab, [{Pat, [], ['$1']}]),
+ case val({Tab, setorbag}) of
+ bag -> mnesia_lib:uniq(Keys);
+ _ -> Keys
+ end
+ end;
+dirty_all_keys(Tab) ->
+ abort({bad_type, Tab}).
+
+dirty_index_match_object(Pat, Attr) when is_tuple(Pat), tuple_size(Pat) > 2 ->
+ Tab = element(1, Pat),
+ dirty_index_match_object(Tab, Pat, Attr);
+dirty_index_match_object(Pat, _Attr) ->
+ abort({bad_type, Pat}).
+
+dirty_index_match_object(Tab, Pat, Attr)
+ when is_atom(Tab), Tab /= schema, is_tuple(Pat), tuple_size(Pat) > 2 ->
+ case mnesia_schema:attr_tab_to_pos(Tab, Attr) of
+ Pos when Pos =< tuple_size(Pat) ->
+ case has_var(element(2, Pat)) of
+ false ->
+ dirty_match_object(Tab, Pat);
+ true ->
+ Elem = element(Pos, Pat),
+ case has_var(Elem) of
+ false ->
+ dirty_rpc(Tab, mnesia_index, dirty_match_object,
+ [Tab, Pat, Pos]);
+ true ->
+ abort({bad_type, Tab, Attr, Elem})
+ end
+ end;
+ BadPos ->
+ abort({bad_type, Tab, BadPos})
+ end;
+dirty_index_match_object(Tab, Pat, _Attr) ->
+ abort({bad_type, Tab, Pat}).
+
+dirty_index_read(Tab, Key, Attr) when is_atom(Tab), Tab /= schema ->
+ Pos = mnesia_schema:attr_tab_to_pos(Tab, Attr),
+ case has_var(Key) of
+ false ->
+ mnesia_index:dirty_read(Tab, Key, Pos);
+ true ->
+ abort({bad_type, Tab, Attr, Key})
+ end;
+dirty_index_read(Tab, _Key, _Attr) ->
+ abort({bad_type, Tab}).
+
+dirty_slot(Tab, Slot) when is_atom(Tab), Tab /= schema, is_integer(Slot) ->
+ dirty_rpc(Tab, mnesia_lib, db_slot, [Tab, Slot]);
+dirty_slot(Tab, Slot) ->
+ abort({bad_type, Tab, Slot}).
+
+dirty_first(Tab) when is_atom(Tab), Tab /= schema ->
+ dirty_rpc(Tab, mnesia_lib, db_first, [Tab]);
+dirty_first(Tab) ->
+ abort({bad_type, Tab}).
+
+dirty_last(Tab) when is_atom(Tab), Tab /= schema ->
+ dirty_rpc(Tab, mnesia_lib, db_last, [Tab]);
+dirty_last(Tab) ->
+ abort({bad_type, Tab}).
+
+dirty_next(Tab, Key) when is_atom(Tab), Tab /= schema ->
+ dirty_rpc(Tab, mnesia_lib, db_next_key, [Tab, Key]);
+dirty_next(Tab, _Key) ->
+ abort({bad_type, Tab}).
+
+dirty_prev(Tab, Key) when is_atom(Tab), Tab /= schema ->
+ dirty_rpc(Tab, mnesia_lib, db_prev_key, [Tab, Key]);
+dirty_prev(Tab, _Key) ->
+ abort({bad_type, Tab}).
+
+
+dirty_rpc(Tab, M, F, Args) ->
+ Node = val({Tab, where_to_read}),
+ do_dirty_rpc(Tab, Node, M, F, Args).
+
+do_dirty_rpc(_Tab, nowhere, _, _, Args) ->
+ mnesia:abort({no_exists, Args});
+do_dirty_rpc(Tab, Node, M, F, Args) ->
+ case rpc:call(Node, M, F, Args) of
+ {badrpc, Reason} ->
+ timer:sleep(20), %% Do not be too eager, and can't use yield on SMP
+ %% Sync with mnesia_monitor
+ try sys:get_status(mnesia_monitor) catch _:_ -> ok end,
+ case mnesia_controller:call({check_w2r, Node, Tab}) of % Sync
+ NewNode when NewNode =:= Node ->
+ ErrorTag = mnesia_lib:dirty_rpc_error_tag(Reason),
+ mnesia:abort({ErrorTag, Args});
+ NewNode ->
+ case get(mnesia_activity_state) of
+ {_Mod, Tid, _Ts} when is_record(Tid, tid) ->
+ %% In order to perform a consistent
+ %% retry of a transaction we need
+ %% to acquire the lock on the NewNode.
+ %% In this context we do neither know
+ %% the kind or granularity of the lock.
+ %% --> Abort the transaction
+ mnesia:abort({node_not_running, Node});
+ {error, {node_not_running, _}} ->
+ %% Mnesia is stopping
+ mnesia:abort({no_exists, Args});
+ _ ->
+ %% Splendid! A dirty retry is safe
+ %% 'Node' probably went down now
+ %% Let mnesia_controller get broken link message first
+ do_dirty_rpc(Tab, NewNode, M, F, Args)
+ end
+ end;
+ Other ->
+ Other
+ end.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Info
+
+%% Info about one table
+table_info(Tab, Item) ->
+ case get(mnesia_activity_state) of
+ undefined ->
+ any_table_info(Tab, Item);
+ {?DEFAULT_ACCESS, _Tid, _Ts} ->
+ any_table_info(Tab, Item);
+ {Mod, Tid, Ts} ->
+ Mod:table_info(Tid, Ts, Tab, Item);
+ _ ->
+ abort(no_transaction)
+ end.
+
+table_info(_Tid, _Ts, Tab, Item) ->
+ any_table_info(Tab, Item).
+
+
+any_table_info(Tab, Item) when is_atom(Tab) ->
+ case Item of
+ master_nodes ->
+ mnesia_recover:get_master_nodes(Tab);
+% checkpoints ->
+% case ?catch_val({Tab, commit_work}) of
+% [{checkpoints, List} | _] -> List;
+% No_chk when is_list(No_chk) -> [];
+% Else -> info_reply(Else, Tab, Item)
+% end;
+ size ->
+ raw_table_info(Tab, Item);
+ memory ->
+ raw_table_info(Tab, Item);
+ type ->
+ case ?catch_val({Tab, setorbag}) of
+ {'EXIT', _} ->
+ bad_info_reply(Tab, Item);
+ Val ->
+ Val
+ end;
+ all ->
+ case mnesia_schema:get_table_properties(Tab) of
+ [] ->
+ abort({no_exists, Tab, Item});
+ Props ->
+ lists:map(fun({setorbag, Type}) -> {type, Type};
+ (Prop) -> Prop end,
+ Props)
+ end;
+ name ->
+ Tab;
+ _ ->
+ case ?catch_val({Tab, Item}) of
+ {'EXIT', _} ->
+ bad_info_reply(Tab, Item);
+ Val ->
+ Val
+ end
+ end;
+any_table_info(Tab, _Item) ->
+ abort({bad_type, Tab}).
+
+raw_table_info(Tab, Item) ->
+ case ?catch_val({Tab, storage_type}) of
+ ram_copies ->
+ info_reply(catch ?ets_info(Tab, Item), Tab, Item);
+ disc_copies ->
+ info_reply(catch ?ets_info(Tab, Item), Tab, Item);
+ disc_only_copies ->
+ info_reply(catch dets:info(Tab, Item), Tab, Item);
+ unknown ->
+ bad_info_reply(Tab, Item);
+ {'EXIT', _} ->
+ bad_info_reply(Tab, Item)
+ end.
+
+info_reply({'EXIT', _Reason}, Tab, Item) ->
+ bad_info_reply(Tab, Item);
+info_reply({error, _Reason}, Tab, Item) ->
+ bad_info_reply(Tab, Item);
+info_reply(Val, _Tab, _Item) ->
+ Val.
+
+bad_info_reply(_Tab, size) -> 0;
+bad_info_reply(_Tab, memory) -> 0;
+bad_info_reply(Tab, Item) -> abort({no_exists, Tab, Item}).
+
+%% Raw info about all tables
+schema() ->
+ mnesia_schema:info().
+
+%% Raw info about one tables
+schema(Tab) ->
+ mnesia_schema:info(Tab).
+
+error_description(Err) ->
+ mnesia_lib:error_desc(Err).
+
+info() ->
+ case mnesia_lib:is_running() of
+ yes ->
+ TmInfo = mnesia_tm:get_info(10000),
+ Held = system_info(held_locks),
+ Queued = system_info(lock_queue),
+
+ io:format("---> Processes holding locks <--- ~n", []),
+ lists:foreach(fun(L) -> io:format("Lock: ~p~n", [L]) end,
+ Held),
+
+ io:format( "---> Processes waiting for locks <--- ~n", []),
+ lists:foreach(fun({Oid, Op, _Pid, Tid, OwnerTid}) ->
+ io:format("Tid ~p waits for ~p lock "
+ "on oid ~p owned by ~p ~n",
+ [Tid, Op, Oid, OwnerTid])
+ end, Queued),
+ mnesia_tm:display_info(group_leader(), TmInfo),
+
+ Pat = {'_', unclear, '_'},
+ Uncertain = ets:match_object(mnesia_decision, Pat),
+
+ io:format( "---> Uncertain transactions <--- ~n", []),
+ lists:foreach(fun({Tid, _, Nodes}) ->
+ io:format("Tid ~w waits for decision "
+ "from ~w~n",
+ [Tid, Nodes])
+ end, Uncertain),
+
+ mnesia_controller:info(),
+ display_system_info(Held, Queued, TmInfo, Uncertain);
+ _ ->
+ mini_info()
+ end,
+ ok.
+
+mini_info() ->
+ io:format("===> System info in version ~p, debug level = ~p <===~n",
+ [system_info(version), system_info(debug)]),
+ Not =
+ case system_info(use_dir) of
+ true -> "";
+ false -> "NOT "
+ end,
+
+ io:format("~w. Directory ~p is ~sused.~n",
+ [system_info(schema_location), system_info(directory), Not]),
+ io:format("use fallback at restart = ~w~n",
+ [system_info(fallback_activated)]),
+ Running = system_info(running_db_nodes),
+ io:format("running db nodes = ~w~n", [Running]),
+ All = mnesia_lib:all_nodes(),
+ io:format("stopped db nodes = ~w ~n", [All -- Running]).
+
+display_system_info(Held, Queued, TmInfo, Uncertain) ->
+ mini_info(),
+ display_tab_info(),
+ S = fun(Items) -> [system_info(I) || I <- Items] end,
+
+ io:format("~w transactions committed, ~w aborted, "
+ "~w restarted, ~w logged to disc~n",
+ S([transaction_commits, transaction_failures,
+ transaction_restarts, transaction_log_writes])),
+
+ {Active, Pending} =
+ case TmInfo of
+ {timeout, _} -> {infinity, infinity};
+ {info, P, A} -> {length(A), length(P)}
+ end,
+ io:format("~w held locks, ~w in queue; "
+ "~w local transactions, ~w remote~n",
+ [length(Held), length(Queued), Active, Pending]),
+
+ Ufold = fun({_, _, Ns}, {C, Old}) ->
+ New = [N || N <- Ns, not lists:member(N, Old)],
+ {C + 1, New ++ Old}
+ end,
+ {Ucount, Unodes} = lists:foldl(Ufold, {0, []}, Uncertain),
+ io:format("~w transactions waits for other nodes: ~p~n",
+ [Ucount, Unodes]).
+
+display_tab_info() ->
+ MasterTabs = mnesia_recover:get_master_node_tables(),
+ io:format("master node tables = ~p~n", [lists:sort(MasterTabs)]),
+
+ Tabs = system_info(tables),
+
+ {Unknown, Ram, Disc, DiscOnly} =
+ lists:foldl(fun storage_count/2, {[], [], [], []}, Tabs),
+
+ io:format("remote = ~p~n", [lists:sort(Unknown)]),
+ io:format("ram_copies = ~p~n", [lists:sort(Ram)]),
+ io:format("disc_copies = ~p~n", [lists:sort(Disc)]),
+ io:format("disc_only_copies = ~p~n", [lists:sort(DiscOnly)]),
+
+ Rfoldl = fun(T, Acc) ->
+ Rpat =
+ case val({T, access_mode}) of
+ read_only ->
+ lists:sort([{A, read_only} || A <- val({T, active_replicas})]);
+ read_write ->
+ table_info(T, where_to_commit)
+ end,
+ case lists:keysearch(Rpat, 1, Acc) of
+ {value, {_Rpat, Rtabs}} ->
+ lists:keyreplace(Rpat, 1, Acc, {Rpat, [T | Rtabs]});
+ false ->
+ [{Rpat, [T]} | Acc]
+ end
+ end,
+ Repl = lists:foldl(Rfoldl, [], Tabs),
+ Rdisp = fun({Rpat, Rtabs}) -> io:format("~p = ~p~n", [Rpat, Rtabs]) end,
+ lists:foreach(Rdisp, lists:sort(Repl)).
+
+storage_count(T, {U, R, D, DO}) ->
+ case table_info(T, storage_type) of
+ unknown -> {[T | U], R, D, DO};
+ ram_copies -> {U, [T | R], D, DO};
+ disc_copies -> {U, R, [T | D], DO};
+ disc_only_copies -> {U, R, D, [T | DO]}
+ end.
+
+system_info(Item) ->
+ case catch system_info2(Item) of
+ {'EXIT',Error} -> abort(Error);
+ Other -> Other
+ end.
+
+system_info2(all) ->
+ Items = system_info_items(mnesia_lib:is_running()),
+ [{I, system_info(I)} || I <- Items];
+
+system_info2(db_nodes) ->
+ DiscNs = ?catch_val({schema, disc_copies}),
+ RamNs = ?catch_val({schema, ram_copies}),
+ if
+ is_list(DiscNs), is_list(RamNs) ->
+ DiscNs ++ RamNs;
+ true ->
+ case mnesia_schema:read_nodes() of
+ {ok, Nodes} -> Nodes;
+ {error,Reason} -> exit(Reason)
+ end
+ end;
+system_info2(running_db_nodes) ->
+ case ?catch_val({current, db_nodes}) of
+ {'EXIT',_} ->
+ %% Ensure that we access the intended Mnesia
+ %% directory. This function may not be called
+ %% during startup since it will cause the
+ %% application_controller to get into deadlock
+ load_mnesia_or_abort(),
+ mnesia_lib:running_nodes();
+ Other ->
+ Other
+ end;
+
+system_info2(extra_db_nodes) ->
+ case ?catch_val(extra_db_nodes) of
+ {'EXIT',_} ->
+ %% Ensure that we access the intended Mnesia
+ %% directory. This function may not be called
+ %% during startup since it will cause the
+ %% application_controller to get into deadlock
+ load_mnesia_or_abort(),
+ mnesia_monitor:get_env(extra_db_nodes);
+ Other ->
+ Other
+ end;
+
+system_info2(directory) ->
+ case ?catch_val(directory) of
+ {'EXIT',_} ->
+ %% Ensure that we access the intended Mnesia
+ %% directory. This function may not be called
+ %% during startup since it will cause the
+ %% application_controller to get into deadlock
+ load_mnesia_or_abort(),
+ mnesia_monitor:get_env(dir);
+ Other ->
+ Other
+ end;
+
+system_info2(use_dir) ->
+ case ?catch_val(use_dir) of
+ {'EXIT',_} ->
+ %% Ensure that we access the intended Mnesia
+ %% directory. This function may not be called
+ %% during startup since it will cause the
+ %% application_controller to get into deadlock
+ load_mnesia_or_abort(),
+ mnesia_monitor:use_dir();
+ Other ->
+ Other
+ end;
+
+system_info2(schema_location) ->
+ case ?catch_val(schema_location) of
+ {'EXIT',_} ->
+ %% Ensure that we access the intended Mnesia
+ %% directory. This function may not be called
+ %% during startup since it will cause the
+ %% application_controller to get into deadlock
+ load_mnesia_or_abort(),
+ mnesia_monitor:get_env(schema_location);
+ Other ->
+ Other
+ end;
+
+system_info2(fallback_activated) ->
+ case ?catch_val(fallback_activated) of
+ {'EXIT',_} ->
+ %% Ensure that we access the intended Mnesia
+ %% directory. This function may not be called
+ %% during startup since it will cause the
+ %% application_controller to get into deadlock
+ load_mnesia_or_abort(),
+ mnesia_bup:fallback_exists();
+ Other ->
+ Other
+ end;
+
+system_info2(version) ->
+ case ?catch_val(version) of
+ {'EXIT', _} ->
+ Apps = application:loaded_applications(),
+ case lists:keysearch(?APPLICATION, 1, Apps) of
+ {value, {_Name, _Desc, Version}} ->
+ Version;
+ false ->
+ %% Ensure that it does not match
+ {mnesia_not_loaded, node(), now()}
+ end;
+ Version ->
+ Version
+ end;
+
+system_info2(access_module) -> mnesia_monitor:get_env(access_module);
+system_info2(auto_repair) -> mnesia_monitor:get_env(auto_repair);
+system_info2(is_running) -> mnesia_lib:is_running();
+system_info2(backup_module) -> mnesia_monitor:get_env(backup_module);
+system_info2(event_module) -> mnesia_monitor:get_env(event_module);
+system_info2(debug) -> mnesia_monitor:get_env(debug);
+system_info2(dump_log_load_regulation) -> mnesia_monitor:get_env(dump_log_load_regulation);
+system_info2(dump_log_write_threshold) -> mnesia_monitor:get_env(dump_log_write_threshold);
+system_info2(dump_log_time_threshold) -> mnesia_monitor:get_env(dump_log_time_threshold);
+system_info2(dump_log_update_in_place) ->
+ mnesia_monitor:get_env(dump_log_update_in_place);
+system_info2(max_wait_for_decision) -> mnesia_monitor:get_env(max_wait_for_decision);
+system_info2(embedded_mnemosyne) -> mnesia_monitor:get_env(embedded_mnemosyne);
+system_info2(ignore_fallback_at_startup) -> mnesia_monitor:get_env(ignore_fallback_at_startup);
+system_info2(fallback_error_function) -> mnesia_monitor:get_env(fallback_error_function);
+system_info2(log_version) -> mnesia_log:version();
+system_info2(protocol_version) -> mnesia_monitor:protocol_version();
+system_info2(schema_version) -> mnesia_schema:version(); %backward compatibility
+system_info2(tables) -> val({schema, tables});
+system_info2(local_tables) -> val({schema, local_tables});
+system_info2(master_node_tables) -> mnesia_recover:get_master_node_tables();
+system_info2(subscribers) -> mnesia_subscr:subscribers();
+system_info2(checkpoints) -> mnesia_checkpoint:checkpoints();
+system_info2(held_locks) -> mnesia_locker:get_held_locks();
+system_info2(lock_queue) -> mnesia_locker:get_lock_queue();
+system_info2(transactions) -> mnesia_tm:get_transactions();
+system_info2(transaction_failures) -> mnesia_lib:read_counter(trans_failures);
+system_info2(transaction_commits) -> mnesia_lib:read_counter(trans_commits);
+system_info2(transaction_restarts) -> mnesia_lib:read_counter(trans_restarts);
+system_info2(transaction_log_writes) -> mnesia_dumper:get_log_writes();
+system_info2(core_dir) -> mnesia_monitor:get_env(core_dir);
+system_info2(no_table_loaders) -> mnesia_monitor:get_env(no_table_loaders);
+system_info2(dc_dump_limit) -> mnesia_monitor:get_env(dc_dump_limit);
+
+system_info2(Item) -> exit({badarg, Item}).
+
+system_info_items(yes) ->
+ [
+ access_module,
+ auto_repair,
+ backup_module,
+ checkpoints,
+ db_nodes,
+ debug,
+ directory,
+ dump_log_load_regulation,
+ dump_log_time_threshold,
+ dump_log_update_in_place,
+ dump_log_write_threshold,
+ embedded_mnemosyne,
+ event_module,
+ extra_db_nodes,
+ fallback_activated,
+ held_locks,
+ ignore_fallback_at_startup,
+ fallback_error_function,
+ is_running,
+ local_tables,
+ lock_queue,
+ log_version,
+ master_node_tables,
+ max_wait_for_decision,
+ protocol_version,
+ running_db_nodes,
+ schema_location,
+ schema_version,
+ subscribers,
+ tables,
+ transaction_commits,
+ transaction_failures,
+ transaction_log_writes,
+ transaction_restarts,
+ transactions,
+ use_dir,
+ core_dir,
+ no_table_loaders,
+ dc_dump_limit,
+ version
+ ];
+system_info_items(no) ->
+ [
+ auto_repair,
+ backup_module,
+ db_nodes,
+ debug,
+ directory,
+ dump_log_load_regulation,
+ dump_log_time_threshold,
+ dump_log_update_in_place,
+ dump_log_write_threshold,
+ event_module,
+ extra_db_nodes,
+ ignore_fallback_at_startup,
+ fallback_error_function,
+ is_running,
+ log_version,
+ max_wait_for_decision,
+ protocol_version,
+ running_db_nodes,
+ schema_location,
+ schema_version,
+ use_dir,
+ core_dir,
+ version
+ ].
+
+system_info() ->
+ IsRunning = mnesia_lib:is_running(),
+ case IsRunning of
+ yes ->
+ TmInfo = mnesia_tm:get_info(10000),
+ Held = system_info(held_locks),
+ Queued = system_info(lock_queue),
+ Pat = {'_', unclear, '_'},
+ Uncertain = ets:match_object(mnesia_decision, Pat),
+ display_system_info(Held, Queued, TmInfo, Uncertain);
+ _ ->
+ mini_info()
+ end,
+ IsRunning.
+
+load_mnesia_or_abort() ->
+ case mnesia_lib:ensure_loaded(?APPLICATION) of
+ ok ->
+ ok;
+ {error, Reason} ->
+ abort(Reason)
+ end.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Database mgt
+
+create_schema(Ns) ->
+ mnesia_bup:create_schema(Ns).
+
+delete_schema(Ns) ->
+ mnesia_schema:delete_schema(Ns).
+
+backup(Opaque) ->
+ mnesia_log:backup(Opaque).
+
+backup(Opaque, Mod) ->
+ mnesia_log:backup(Opaque, Mod).
+
+traverse_backup(S, T, Fun, Acc) ->
+ mnesia_bup:traverse_backup(S, T, Fun, Acc).
+
+traverse_backup(S, SM, T, TM, F, A) ->
+ mnesia_bup:traverse_backup(S, SM, T, TM, F, A).
+
+install_fallback(Opaque) ->
+ mnesia_bup:install_fallback(Opaque).
+
+install_fallback(Opaque, Mod) ->
+ mnesia_bup:install_fallback(Opaque, Mod).
+
+uninstall_fallback() ->
+ mnesia_bup:uninstall_fallback().
+
+uninstall_fallback(Args) ->
+ mnesia_bup:uninstall_fallback(Args).
+
+activate_checkpoint(Args) ->
+ mnesia_checkpoint:activate(Args).
+
+deactivate_checkpoint(Name) ->
+ mnesia_checkpoint:deactivate(Name).
+
+backup_checkpoint(Name, Opaque) ->
+ mnesia_log:backup_checkpoint(Name, Opaque).
+
+backup_checkpoint(Name, Opaque, Mod) ->
+ mnesia_log:backup_checkpoint(Name, Opaque, Mod).
+
+restore(Opaque, Args) ->
+ mnesia_schema:restore(Opaque, Args).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Table mgt
+
+create_table(Arg) ->
+ mnesia_schema:create_table(Arg).
+create_table(Name, Arg) when is_list(Arg) ->
+ mnesia_schema:create_table([{name, Name}| Arg]);
+create_table(Name, Arg) ->
+ {aborted, badarg, Name, Arg}.
+
+delete_table(Tab) ->
+ mnesia_schema:delete_table(Tab).
+
+add_table_copy(Tab, N, S) ->
+ mnesia_schema:add_table_copy(Tab, N, S).
+del_table_copy(Tab, N) ->
+ mnesia_schema:del_table_copy(Tab, N).
+
+move_table_copy(Tab, From, To) ->
+ mnesia_schema:move_table(Tab, From, To).
+
+add_table_index(Tab, Ix) ->
+ mnesia_schema:add_table_index(Tab, Ix).
+del_table_index(Tab, Ix) ->
+ mnesia_schema:del_table_index(Tab, Ix).
+
+transform_table(Tab, Fun, NewA) ->
+ case catch val({Tab, record_name}) of
+ {'EXIT', Reason} ->
+ mnesia:abort(Reason);
+ OldRN ->
+ mnesia_schema:transform_table(Tab, Fun, NewA, OldRN)
+ end.
+
+transform_table(Tab, Fun, NewA, NewRN) ->
+ mnesia_schema:transform_table(Tab, Fun, NewA, NewRN).
+
+change_table_copy_type(T, N, S) ->
+ mnesia_schema:change_table_copy_type(T, N, S).
+
+clear_table(Tab) ->
+ case get(mnesia_activity_state) of
+ State = {Mod, Tid, _Ts} when element(1, Tid) =/= tid ->
+ transaction(State, fun() -> do_clear_table(Tab) end, [], infinity, Mod, sync);
+ undefined ->
+ transaction(undefined, fun() -> do_clear_table(Tab) end, [], infinity, ?DEFAULT_ACCESS, sync);
+ _ -> %% Not allowed for clear_table
+ mnesia:abort({aborted, nested_transaction})
+ end.
+
+do_clear_table(Tab) ->
+ case get(mnesia_activity_state) of
+ {?DEFAULT_ACCESS, Tid, Ts} ->
+ clear_table(Tid, Ts, Tab, '_');
+ {Mod, Tid, Ts} ->
+ Mod:clear_table(Tid, Ts, Tab, '_');
+ _ ->
+ abort(no_transaction)
+ end.
+
+clear_table(Tid, Ts, Tab, Obj) when element(1, Tid) =:= tid ->
+ Store = Ts#tidstore.store,
+ mnesia_locker:wlock_table(Tid, Store, Tab),
+ Oid = {Tab, '_'},
+ ?ets_insert(Store, {Oid, Obj, clear_table}),
+ ok.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Table mgt - user properties
+
+read_table_property(Tab, PropKey) ->
+ val({Tab, user_property, PropKey}).
+
+write_table_property(Tab, Prop) ->
+ mnesia_schema:write_table_property(Tab, Prop).
+
+delete_table_property(Tab, PropKey) ->
+ mnesia_schema:delete_table_property(Tab, PropKey).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Table mgt - user properties
+
+change_table_frag(Tab, FragProp) ->
+ mnesia_schema:change_table_frag(Tab, FragProp).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Table mgt - table load
+
+%% Dump a ram table to disc
+dump_tables(Tabs) ->
+ mnesia_schema:dump_tables(Tabs).
+
+%% allow the user to wait for some tables to be loaded
+wait_for_tables(Tabs, Timeout) ->
+ mnesia_controller:wait_for_tables(Tabs, Timeout).
+
+force_load_table(Tab) ->
+ case mnesia_controller:force_load_table(Tab) of
+ ok -> yes; % Backwards compatibility
+ Other -> Other
+ end.
+
+change_table_access_mode(T, Access) ->
+ mnesia_schema:change_table_access_mode(T, Access).
+
+change_table_load_order(T, O) ->
+ mnesia_schema:change_table_load_order(T, O).
+
+set_master_nodes(Nodes) when is_list(Nodes) ->
+ UseDir = system_info(use_dir),
+ IsRunning = system_info(is_running),
+ case IsRunning of
+ yes ->
+ CsPat = {{'_', cstruct}, '_'},
+ Cstructs0 = ?ets_match_object(mnesia_gvar, CsPat),
+ Cstructs = [Cs || {_, Cs} <- Cstructs0],
+ log_valid_master_nodes(Cstructs, Nodes, UseDir, IsRunning);
+ _NotRunning ->
+ case UseDir of
+ true ->
+ mnesia_lib:lock_table(schema),
+ Res =
+ case mnesia_schema:read_cstructs_from_disc() of
+ {ok, Cstructs} ->
+ log_valid_master_nodes(Cstructs, Nodes, UseDir, IsRunning);
+ {error, Reason} ->
+ {error, Reason}
+ end,
+ mnesia_lib:unlock_table(schema),
+ Res;
+ false ->
+ ok
+ end
+ end;
+set_master_nodes(Nodes) ->
+ {error, {bad_type, Nodes}}.
+
+log_valid_master_nodes(Cstructs, Nodes, UseDir, IsRunning) ->
+ Fun = fun(Cs) ->
+ Copies = mnesia_lib:copy_holders(Cs),
+ Valid = mnesia_lib:intersect(Nodes, Copies),
+ {Cs#cstruct.name, Valid}
+ end,
+ Args = lists:map(Fun, Cstructs),
+ mnesia_recover:log_master_nodes(Args, UseDir, IsRunning).
+
+set_master_nodes(Tab, Nodes) when is_list(Nodes) ->
+ UseDir = system_info(use_dir),
+ IsRunning = system_info(is_running),
+ case IsRunning of
+ yes ->
+ case ?catch_val({Tab, cstruct}) of
+ {'EXIT', _} ->
+ {error, {no_exists, Tab}};
+ Cs ->
+ case Nodes -- mnesia_lib:copy_holders(Cs) of
+ [] ->
+ Args = [{Tab , Nodes}],
+ mnesia_recover:log_master_nodes(Args, UseDir, IsRunning);
+ BadNodes ->
+ {error, {no_exists, Tab, BadNodes}}
+ end
+ end;
+ _NotRunning ->
+ case UseDir of
+ true ->
+ mnesia_lib:lock_table(schema),
+ Res =
+ case mnesia_schema:read_cstructs_from_disc() of
+ {ok, Cstructs} ->
+ case lists:keysearch(Tab, 2, Cstructs) of
+ {value, Cs} ->
+ case Nodes -- mnesia_lib:copy_holders(Cs) of
+ [] ->
+ Args = [{Tab , Nodes}],
+ mnesia_recover:log_master_nodes(Args, UseDir, IsRunning);
+ BadNodes ->
+ {error, {no_exists, Tab, BadNodes}}
+ end;
+ false ->
+ {error, {no_exists, Tab}}
+ end;
+ {error, Reason} ->
+ {error, Reason}
+ end,
+ mnesia_lib:unlock_table(schema),
+ Res;
+ false ->
+ ok
+ end
+ end;
+set_master_nodes(Tab, Nodes) ->
+ {error, {bad_type, Tab, Nodes}}.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Misc admin
+
+dump_log() ->
+ mnesia_controller:sync_dump_log(user).
+
+subscribe(What) ->
+ mnesia_subscr:subscribe(self(), What).
+
+unsubscribe(What) ->
+ mnesia_subscr:unsubscribe(self(), What).
+
+report_event(Event) ->
+ mnesia_lib:report_system_event({mnesia_user, Event}).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Snmp
+
+snmp_open_table(Tab, Us) ->
+ mnesia_schema:add_snmp(Tab, Us).
+
+snmp_close_table(Tab) ->
+ mnesia_schema:del_snmp(Tab).
+
+snmp_get_row(Tab, RowIndex) when is_atom(Tab), Tab /= schema, is_list(RowIndex) ->
+ case get(mnesia_activity_state) of
+ {Mod, Tid, Ts=#tidstore{store=Store}} when element(1, Tid) =:= tid ->
+ case snmp_oid_to_mnesia_key(RowIndex, Tab) of
+ unknown -> %% Arrg contains fix_string
+ Ops = find_ops(Store, Tab, val({Tab, wild_pattern})),
+ SnmpType = val({Tab,snmp}),
+ Fix = fun({{_,Key},Row,Op}, Res) ->
+ case mnesia_snmp_hook:key_to_oid(Tab,Key,SnmpType) of
+ RowIndex ->
+ case Op of
+ write -> {ok, Row};
+ _ ->
+ undefined
+ end;
+ _ ->
+ Res
+ end
+ end,
+ lists:foldl(Fix, undefined, Ops);
+ Key ->
+ case Mod:read(Tid, Ts, Tab, Key, read) of
+ [Row] ->
+ {ok, Row};
+ _ ->
+ undefined
+ end
+ end;
+ _ ->
+ dirty_rpc(Tab, mnesia_snmp_hook, get_row, [Tab, RowIndex])
+ end;
+snmp_get_row(Tab, _RowIndex) ->
+ abort({bad_type, Tab}).
+
+%%%%%%%%%%%%%
+
+snmp_get_next_index(Tab, RowIndex) when is_atom(Tab), Tab /= schema, is_list(RowIndex) ->
+ {Next,OrigKey} = dirty_rpc(Tab, mnesia_snmp_hook, get_next_index, [Tab, RowIndex]),
+ case get(mnesia_activity_state) of
+ {_Mod, Tid, #tidstore{store=Store}} when element(1, Tid) =:= tid ->
+ case OrigKey of
+ undefined ->
+ snmp_order_keys(Store, Tab, RowIndex, []);
+ _ ->
+ case ?ets_match(Store, {{Tab,OrigKey}, '_', '$1'}) of
+ [] -> snmp_order_keys(Store,Tab,RowIndex,[OrigKey]);
+ Ops ->
+ case lists:last(Ops) of
+ [delete] -> snmp_get_next_index(Tab, Next);
+ _ -> snmp_order_keys(Store,Tab,RowIndex,[OrigKey])
+ end
+ end
+ end;
+ _ ->
+ case Next of
+ endOfTable -> endOfTable;
+ _ -> {ok, Next}
+ end
+ end;
+snmp_get_next_index(Tab, _RowIndex) ->
+ abort({bad_type, Tab}).
+
+snmp_order_keys(Store,Tab,RowIndex,Def) ->
+ All = ?ets_match(Store, {{Tab,'$1'},'_','$2'}),
+ SnmpType = val({Tab,snmp}),
+ Keys0 = [mnesia_snmp_hook:key_to_oid(Tab,Key,SnmpType) ||
+ Key <- ts_keys_1(All, Def)],
+ Keys = lists:sort(Keys0),
+ get_ordered_snmp_key(RowIndex,Keys).
+
+get_ordered_snmp_key(Prev, [First|_]) when Prev < First -> {ok, First};
+get_ordered_snmp_key(Prev, [_|R]) ->
+ get_ordered_snmp_key(Prev, R);
+get_ordered_snmp_key(_, []) ->
+ endOfTable.
+
+%%%%%%%%%%
+
+snmp_get_mnesia_key(Tab, RowIndex) when is_atom(Tab), Tab /= schema, is_list(RowIndex) ->
+ case get(mnesia_activity_state) of
+ {_Mod, Tid, Ts} when element(1, Tid) =:= tid ->
+ Res = dirty_rpc(Tab,mnesia_snmp_hook,get_mnesia_key,[Tab,RowIndex]),
+ snmp_filter_key(Res, RowIndex, Tab, Ts#tidstore.store);
+ _ ->
+ dirty_rpc(Tab, mnesia_snmp_hook, get_mnesia_key, [Tab, RowIndex])
+ end;
+snmp_get_mnesia_key(Tab, _RowIndex) ->
+ abort({bad_type, Tab}).
+
+snmp_oid_to_mnesia_key(RowIndex, Tab) ->
+ case mnesia_snmp_hook:oid_to_key(RowIndex, Tab) of
+ unknown -> %% Contains fix_string needs lookup
+ case dirty_rpc(Tab,mnesia_snmp_hook,get_mnesia_key,[Tab,RowIndex]) of
+ {ok, MnesiaKey} -> MnesiaKey;
+ undefined -> unknown
+ end;
+ MnesiaKey ->
+ MnesiaKey
+ end.
+
+snmp_filter_key(Res = {ok,Key}, _RowIndex, Tab, Store) ->
+ case ?ets_lookup(Store, {Tab,Key}) of
+ [] -> Res;
+ Ops ->
+ case lists:last(Ops) of
+ {_, _, write} -> Res;
+ _ -> undefined
+ end
+ end;
+snmp_filter_key(undefined, RowIndex, Tab, Store) ->
+ case mnesia_snmp_hook:oid_to_key(RowIndex, Tab) of
+ unknown -> %% Arrg contains fix_string
+ Ops = find_ops(Store, Tab, val({Tab, wild_pattern})),
+ SnmpType = val({Tab,snmp}),
+ Fix = fun({{_,Key},_,Op}, Res) ->
+ case mnesia_snmp_hook:key_to_oid(Tab,Key,SnmpType) of
+ RowIndex ->
+ case Op of
+ write -> {ok, Key};
+ _ ->
+ undefined
+ end;
+ _ ->
+ Res
+ end
+ end,
+ lists:foldl(Fix, undefined, Ops);
+ Key ->
+ case ?ets_lookup(Store, {Tab,Key}) of
+ [] ->
+ undefined;
+ Ops ->
+ case lists:last(Ops) of
+ {_, _, write} -> {ok, Key};
+ _ -> undefined
+ end
+ end
+ end.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Textfile access
+
+load_textfile(F) ->
+ mnesia_text:load_textfile(F).
+dump_to_textfile(F) ->
+ mnesia_text:dump_to_textfile(F).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% QLC Handles
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+table(Tab) ->
+ table(Tab, []).
+table(Tab,Opts) ->
+ {[Trav,Lock,NObjects],QlcOptions0} =
+ qlc_opts(Opts,[{traverse,select},{lock,read},{n_objects,100}]),
+ TF = case Trav of
+ {select,Ms} ->
+ fun() -> qlc_select(select(Tab,Ms,NObjects,Lock)) end;
+ select ->
+ fun(Ms) -> qlc_select(select(Tab,Ms,NObjects,Lock)) end;
+ _ ->
+ erlang:error({badarg, {Trav,[Tab, Opts]}})
+ end,
+ Pre = fun(Arg) -> pre_qlc(Arg, Tab) end,
+ Post = fun() -> post_qlc(Tab) end,
+ Info = fun(Tag) -> qlc_info(Tab, Tag) end,
+ ParentFun = fun() ->
+ {mnesia_activity, mnesia:get_activity_id()}
+ end,
+ Lookup =
+ case Trav of
+ {select, _} -> [];
+ _ ->
+ LFun = fun(2, Keys) ->
+ Read = fun(Key) -> read(Tab,Key,Lock) end,
+ lists:flatmap(Read, Keys);
+ (Index,Keys) ->
+ IdxRead = fun(Key) -> index_read(Tab,Key,Index) end,
+ lists:flatmap(IdxRead, Keys)
+ end,
+ [{lookup_fun, LFun}]
+ end,
+ MFA = fun(Type) -> qlc_format(Type, Tab, NObjects, Lock, Opts) end,
+ QlcOptions = [{pre_fun, Pre}, {post_fun, Post},
+ {info_fun, Info}, {parent_fun, ParentFun},
+ {format_fun, MFA}|Lookup] ++ QlcOptions0,
+ qlc:table(TF, QlcOptions).
+
+pre_qlc(Opts, Tab) ->
+ {_,Tid,_} =
+ case get(mnesia_activity_state) of
+ undefined ->
+ case lists:keysearch(parent_value, 1, Opts) of
+ {value, {parent_value,{mnesia_activity,undefined}}} ->
+ abort(no_transaction);
+ {value, {parent_value,{mnesia_activity,Aid}}} ->
+ {value,{stop_fun,Stop}} =
+ lists:keysearch(stop_fun,1,Opts),
+ put_activity_id(Aid,Stop),
+ Aid;
+ _ ->
+ abort(no_transaction)
+ end;
+ Else ->
+ Else
+ end,
+ case element(1,Tid) of
+ tid -> ok;
+ _ ->
+ case ?catch_val({Tab, setorbag}) of
+ ordered_set -> ok;
+ _ ->
+ dirty_rpc(Tab, mnesia_tm, fixtable, [Tab,true,self()]),
+ ok
+ end
+ end.
+
+post_qlc(Tab) ->
+ case catch get(mnesia_activity_state) of
+ {_,#tid{},_} -> ok;
+ _ ->
+ case ?catch_val({Tab, setorbag}) of
+ ordered_set ->
+ ok;
+ _ ->
+ dirty_rpc(Tab, mnesia_tm, fixtable, [Tab,false,self()]),
+ ok
+ end
+ end.
+
+qlc_select('$end_of_table') -> [];
+qlc_select({[], Cont}) -> qlc_select(select(Cont));
+qlc_select({Objects, Cont}) ->
+ Objects ++ fun() -> qlc_select(select(Cont)) end.
+
+qlc_opts(Opts, Keys) when is_list(Opts) ->
+ qlc_opts(Opts, Keys, []);
+qlc_opts(Option, Keys) ->
+ qlc_opts([Option], Keys, []).
+
+qlc_opts(Opts, [{Key,Def}|Keys], Acc) ->
+ Opt = case lists:keysearch(Key,1, Opts) of
+ {value, {Key,Value}} ->
+ Value;
+ false ->
+ Def
+ end,
+ qlc_opts(lists:keydelete(Key,1,Opts),Keys,[Opt|Acc]);
+qlc_opts(Opts,[],Acc) -> {lists:reverse(Acc),Opts}.
+
+qlc_info(Tab, num_of_objects) ->
+ dirty_rpc(Tab, ?MODULE, raw_table_info, [Tab, size]);
+qlc_info(_, keypos) -> 2;
+qlc_info(_, is_unique_objects) -> true;
+qlc_info(Tab, is_unique_keys) ->
+ case val({Tab, type}) of
+ set -> true;
+ ordered_set -> true;
+ _ -> false
+ end;
+qlc_info(Tab, is_sorted_objects) ->
+ case val({Tab, type}) of
+ ordered_set ->
+ case ?catch_val({Tab, frag_hash}) of
+ {'EXIT', _} ->
+ ascending;
+ _ -> %% Fragmented tables are not ordered
+ no
+ end;
+ _ -> no
+ end;
+qlc_info(Tab, indices) ->
+ val({Tab,index});
+qlc_info(_Tab, _) ->
+ undefined.
+
+qlc_format(all, Tab, NObjects, Lock, Opts) ->
+ {?MODULE, table, [Tab,[{n_objects, NObjects}, {lock,Lock}|Opts]]};
+qlc_format({match_spec, Ms}, Tab, NObjects, Lock, Opts) ->
+ {?MODULE, table, [Tab,[{traverse,{select,Ms}},{n_objects, NObjects}, {lock,Lock}|Opts]]};
+qlc_format({lookup, 2, Keys}, Tab, _, Lock, _) ->
+ io_lib:format("lists:flatmap(fun(V) -> "
+ "~w:read(~w, V, ~w) end, ~w)",
+ [?MODULE, Tab, Lock, Keys]);
+qlc_format({lookup, Index,Keys}, Tab, _, _, _) ->
+ io_lib:format("lists:flatmap(fun(V) -> "
+ "~w:index_read(~w, V, ~w) end, ~w)",
+ [?MODULE, Tab, Index, Keys]).
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+do_fixtable(Tab, #tidstore{store=Store}) ->
+ do_fixtable(Tab,Store);
+do_fixtable(Tab, Store) ->
+ case ?catch_val({Tab, setorbag}) of
+ ordered_set ->
+ ok;
+ _ ->
+ case ?ets_match_object(Store, {fixtable, {Tab, '_'}}) of
+ [] ->
+ Node = dirty_rpc(Tab, mnesia_tm, fixtable, [Tab,true,self()]),
+ ?ets_insert(Store, {fixtable, {Tab, Node}});
+ _ ->
+ ignore
+ end,
+ ok
+ end.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Mnemosyne exclusive
+
+get_activity_id() ->
+ get(mnesia_activity_state).
+
+put_activity_id(Activity) ->
+ mnesia_tm:put_activity_id(Activity).
+put_activity_id(Activity,Fun) ->
+ mnesia_tm:put_activity_id(Activity,Fun).
diff --git a/lib/mnesia/src/mnesia.hrl b/lib/mnesia/src/mnesia.hrl
new file mode 100644
index 0000000000..d488d9364a
--- /dev/null
+++ b/lib/mnesia/src/mnesia.hrl
@@ -0,0 +1,121 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 1996-2009. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%
+
+%%
+
+-define(APPLICATION, mnesia).
+
+-define(ets_lookup(Tab, Key), ets:lookup(Tab, Key)).
+-define(ets_lookup_element(Tab, Key, Pos), ets:lookup_element(Tab, Key, Pos)).
+-define(ets_insert(Tab, Rec), ets:insert(Tab, Rec)).
+-define(ets_delete(Tab, Key), ets:delete(Tab, Key)).
+-define(ets_match_delete(Tab, Pat), ets:match_delete(Tab, Pat)).
+-define(ets_match_object(Tab, Pat), ets:match_object(Tab, Pat)).
+-define(ets_match(Tab, Pat), ets:match(Tab, Pat)).
+-define(ets_info(Tab, Item), ets:info(Tab, Item)).
+-define(ets_update_counter(Tab, Key, Incr), ets:update_counter(Tab, Key, Incr)).
+-define(ets_first(Tab), ets:first(Tab)).
+-define(ets_next(Tab, Key), ets:next(Tab, Key)).
+-define(ets_last(Tab), ets:last(Tab)).
+-define(ets_prev(Tab, Key), ets:prev(Tab, Key)).
+-define(ets_slot(Tab, Pos), ets:slot(Tab, Pos)).
+-define(ets_new_table(Tab, Props), ets:new(Tab, Props)).
+-define(ets_delete_table(Tab), ets:delete(Tab)).
+-define(ets_fixtable(Tab, Bool), ets:fixtable(Tab, Bool)).
+
+-define(catch_val(Var), (catch ?ets_lookup_element(mnesia_gvar, Var, 2))).
+
+%% It's important that counter is first, since we compare tid's
+
+-record(tid,
+ {counter, %% serial no for tid
+ pid}). %% owner of tid
+
+
+-record(tidstore,
+ {store, %% current ets table for tid
+ up_stores = [], %% list of upper layer stores for nested trans
+ level = 1}). %% transaction level
+
+-define(unique_cookie, {erlang:now(), node()}).
+
+-record(cstruct, {name, % Atom
+ type = set, % set | bag
+ ram_copies = [], % [Node]
+ disc_copies = [], % [Node]
+ disc_only_copies = [], % [Node]
+ load_order = 0, % Integer
+ access_mode = read_write, % read_write | read_only
+ index = [], % [Integer]
+ snmp = [], % Snmp Ustruct
+ local_content = false, % true | false
+ record_name = {bad_record_name}, % Atom (Default = Name)
+ attributes = [key, val], % [Atom]
+ user_properties = [], % [Record]
+ frag_properties = [], % [{Key, Val]
+ cookie = ?unique_cookie, % Term
+ version = {{2, 0}, []}}). % {{Integer, Integer}, [Node]}
+
+%% Record for the head structure in Mnesia's log files
+%%
+%% The definition of this record may *NEVER* be changed
+%% since it may be written to very old backup files.
+%% By holding this record definition stable we can be
+%% able to comprahend backups from timepoint 0. It also
+%% allows us to use the backup format as an interchange
+%% format between Mnesia releases.
+
+-record(log_header,{log_kind,
+ log_version,
+ mnesia_version,
+ node,
+ now}).
+
+%% Commit records stored in the transaction log
+-record(commit, {node,
+ decision, % presume_commit | Decision
+ ram_copies = [],
+ disc_copies = [],
+ disc_only_copies = [],
+ snmp = [],
+ schema_ops = []
+ }).
+
+-record(decision, {tid,
+ outcome, % presume_abort | committed
+ disc_nodes,
+ ram_nodes}).
+
+%% Maybe cyclic wait
+-record(cyclic, {node = node(),
+ oid, % {Tab, Key}
+ op, % read | write
+ lock, % read | write
+ lucky
+ }).
+
+%% Managing conditional debug functions
+
+-ifdef(debug).
+ -define(eval_debug_fun(I, C),
+ mnesia_lib:eval_debug_fun(I, C, ?FILE, ?LINE)).
+-else.
+ -define(eval_debug_fun(I, C), ok).
+-endif.
+
diff --git a/lib/mnesia/src/mnesia_backup.erl b/lib/mnesia/src/mnesia_backup.erl
new file mode 100644
index 0000000000..f372ca0be5
--- /dev/null
+++ b/lib/mnesia/src/mnesia_backup.erl
@@ -0,0 +1,201 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 1996-2009. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%
+
+%%
+
+%%-behaviour(mnesia_backup).
+%0
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%
+%% This module contains one implementation of callback functions
+%% used by Mnesia at backup and restore. The user may however
+%% write an own module the same interface as mnesia_backup and
+%% configure Mnesia so the alternate module performs the actual
+%% accesses to the backup media. This means that the user may put
+%% the backup on medias that Mnesia does not know about, possibly
+%% on hosts where Erlang is not running.
+%%
+%% The OpaqueData argument is never interpreted by other parts of
+%% Mnesia. It is the property of this module. Alternate implementations
+%% of this module may have different interpretations of OpaqueData.
+%% The OpaqueData argument given to open_write/1 and open_read/1
+%% are forwarded directly from the user.
+%%
+%% All functions must return {ok, NewOpaqueData} or {error, Reason}.
+%%
+%% The NewOpaqueData arguments returned by backup callback functions will
+%% be given as input when the next backup callback function is invoked.
+%% If any return value does not match {ok, _} the backup will be aborted.
+%%
+%% The NewOpaqueData arguments returned by restore callback functions will
+%% be given as input when the next restore callback function is invoked
+%% If any return value does not match {ok, _} the restore will be aborted.
+%%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+-module(mnesia_backup).
+
+-include_lib("kernel/include/file.hrl").
+
+-export([
+ %% Write access
+ open_write/1,
+ write/2,
+ commit_write/1,
+ abort_write/1,
+
+ %% Read access
+ open_read/1,
+ read/1,
+ close_read/1
+ ]).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Backup callback interface
+-record(backup, {tmp_file, file, file_desc}).
+
+%% Opens backup media for write
+%%
+%% Returns {ok, OpaqueData} or {error, Reason}
+open_write(OpaqueData) ->
+ File = OpaqueData,
+ Tmp = lists:concat([File,".BUPTMP"]),
+ file:delete(Tmp),
+ file:delete(File),
+ case disk_log:open([{name, make_ref()},
+ {file, Tmp},
+ {repair, false},
+ {linkto, self()}]) of
+ {ok, Fd} ->
+ {ok, #backup{tmp_file = Tmp, file = File, file_desc = Fd}};
+ {error, Reason} ->
+ {error, Reason}
+ end.
+
+%% Writes BackupItems to the backup media
+%%
+%% Returns {ok, OpaqueData} or {error, Reason}
+write(OpaqueData, BackupItems) ->
+ B = OpaqueData,
+ case disk_log:log_terms(B#backup.file_desc, BackupItems) of
+ ok ->
+ {ok, B};
+ {error, Reason} ->
+ abort_write(B),
+ {error, Reason}
+ end.
+
+%% Closes the backup media after a successful backup
+%%
+%% Returns {ok, ReturnValueToUser} or {error, Reason}
+commit_write(OpaqueData) ->
+ B = OpaqueData,
+ case disk_log:sync(B#backup.file_desc) of
+ ok ->
+ case disk_log:close(B#backup.file_desc) of
+ ok ->
+ case file:rename(B#backup.tmp_file, B#backup.file) of
+ ok ->
+ {ok, B#backup.file};
+ {error, Reason} ->
+ {error, Reason}
+ end;
+ {error, Reason} ->
+ {error, Reason}
+ end;
+ {error, Reason} ->
+ {error, Reason}
+ end.
+
+%% Closes the backup media after an interrupted backup
+%%
+%% Returns {ok, ReturnValueToUser} or {error, Reason}
+abort_write(BackupRef) ->
+ Res = disk_log:close(BackupRef#backup.file_desc),
+ file:delete(BackupRef#backup.tmp_file),
+ case Res of
+ ok ->
+ {ok, BackupRef#backup.file};
+ {error, Reason} ->
+ {error, Reason}
+ end.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Restore callback interface
+
+-record(restore, {file, file_desc, cont}).
+
+%% Opens backup media for read
+%%
+%% Returns {ok, OpaqueData} or {error, Reason}
+open_read(OpaqueData) ->
+ File = OpaqueData,
+ case file:read_file_info(File) of
+ {error, Reason} ->
+ {error, Reason};
+ _FileInfo -> %% file exists
+ case disk_log:open([{file, File},
+ {name, make_ref()},
+ {repair, false},
+ {mode, read_only},
+ {linkto, self()}]) of
+ {ok, Fd} ->
+ {ok, #restore{file = File, file_desc = Fd, cont = start}};
+ {repaired, Fd, _, {badbytes, 0}} ->
+ {ok, #restore{file = File, file_desc = Fd, cont = start}};
+ {repaired, Fd, _, _} ->
+ {ok, #restore{file = File, file_desc = Fd, cont = start}};
+ {error, Reason} ->
+ {error, Reason}
+ end
+ end.
+
+%% Reads BackupItems from the backup media
+%%
+%% Returns {ok, OpaqueData, BackupItems} or {error, Reason}
+%%
+%% BackupItems == [] is interpreted as eof
+read(OpaqueData) ->
+ R = OpaqueData,
+ Fd = R#restore.file_desc,
+ case disk_log:chunk(Fd, R#restore.cont) of
+ {error, Reason} ->
+ {error, {"Possibly truncated", Reason}};
+ eof ->
+ {ok, R, []};
+ {Cont, []} ->
+ read(R#restore{cont = Cont});
+ {Cont, BackupItems, _BadBytes} ->
+ {ok, R#restore{cont = Cont}, BackupItems};
+ {Cont, BackupItems} ->
+ {ok, R#restore{cont = Cont}, BackupItems}
+ end.
+
+%% Closes the backup media after restore
+%%
+%% Returns {ok, ReturnValueToUser} or {error, Reason}
+close_read(OpaqueData) ->
+ R = OpaqueData,
+ case disk_log:close(R#restore.file_desc) of
+ ok -> {ok, R#restore.file};
+ {error, Reason} -> {error, Reason}
+ end.
+%0
+
diff --git a/lib/mnesia/src/mnesia_bup.erl b/lib/mnesia/src/mnesia_bup.erl
new file mode 100644
index 0000000000..37a8258d74
--- /dev/null
+++ b/lib/mnesia/src/mnesia_bup.erl
@@ -0,0 +1,1186 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 1996-2009. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%
+
+%%
+-module(mnesia_bup).
+-export([
+ %% Public interface
+ iterate/4,
+ read_schema/2,
+ fallback_bup/0,
+ fallback_exists/0,
+ tm_fallback_start/1,
+ create_schema/1,
+ install_fallback/1,
+ install_fallback/2,
+ uninstall_fallback/0,
+ uninstall_fallback/1,
+ traverse_backup/4,
+ traverse_backup/6,
+ make_initial_backup/3,
+ fallback_to_schema/0,
+ lookup_schema/2,
+ schema2bup/1,
+ refresh_cookie/2,
+
+ %% Internal
+ fallback_receiver/2,
+ install_fallback_master/2,
+ uninstall_fallback_master/2,
+ local_uninstall_fallback/2,
+ do_traverse_backup/7,
+ trav_apply/4
+ ]).
+
+-include("mnesia.hrl").
+-import(mnesia_lib, [verbose/2, dbg_out/2]).
+
+-record(restore, {mode, bup_module, bup_data}).
+
+-record(fallback_args, {opaque,
+ scope = global,
+ module = mnesia_monitor:get_env(backup_module),
+ use_default_dir = true,
+ mnesia_dir,
+ fallback_bup,
+ fallback_tmp,
+ skip_tables = [],
+ keep_tables = [],
+ default_op = keep_tables
+ }).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Backup iterator
+
+%% Reads schema section and iterates over all records in a backup.
+%%
+%% Fun(BunchOfRecords, Header, Schema, Acc) is applied when a suitable amount
+%% of records has been collected.
+%%
+%% BunchOfRecords will be [] when the iteration is done.
+iterate(Mod, Fun, Opaque, Acc) ->
+ R = #restore{bup_module = Mod, bup_data = Opaque},
+ case catch read_schema_section(R) of
+ {error, Reason} ->
+ {error, Reason};
+ {R2, {Header, Schema, Rest}} ->
+ case catch iter(R2, Header, Schema, Fun, Acc, Rest) of
+ {ok, R3, Res} ->
+ catch safe_apply(R3, close_read, [R3#restore.bup_data]),
+ {ok, Res};
+ {error, Reason} ->
+ catch safe_apply(R2, close_read, [R2#restore.bup_data]),
+ {error, Reason};
+ {'EXIT', Pid, Reason} ->
+ catch safe_apply(R2, close_read, [R2#restore.bup_data]),
+ {error, {'EXIT', Pid, Reason}};
+ {'EXIT', Reason} ->
+ catch safe_apply(R2, close_read, [R2#restore.bup_data]),
+ {error, {'EXIT', Reason}}
+ end
+ end.
+
+iter(R, Header, Schema, Fun, Acc, []) ->
+ case safe_apply(R, read, [R#restore.bup_data]) of
+ {R2, []} ->
+ Res = Fun([], Header, Schema, Acc),
+ {ok, R2, Res};
+ {R2, BupItems} ->
+ iter(R2, Header, Schema, Fun, Acc, BupItems)
+ end;
+iter(R, Header, Schema, Fun, Acc, BupItems) ->
+ Acc2 = Fun(BupItems, Header, Schema, Acc),
+ iter(R, Header, Schema, Fun, Acc2, []).
+
+safe_apply(R, write, [_, Items]) when Items =:= [] ->
+ R;
+safe_apply(R, What, Args) ->
+ Abort = fun(Re) -> abort_restore(R, What, Args, Re) end,
+ Mod = R#restore.bup_module,
+ case catch apply(Mod, What, Args) of
+ {ok, Opaque, Items} when What =:= read ->
+ {R#restore{bup_data = Opaque}, Items};
+ {ok, Opaque} when What =/= read->
+ R#restore{bup_data = Opaque};
+ {error, Re} ->
+ Abort(Re);
+ Re ->
+ Abort(Re)
+ end.
+
+abort_restore(R, What, Args, Reason) ->
+ Mod = R#restore.bup_module,
+ Opaque = R#restore.bup_data,
+ dbg_out("Restore aborted. ~p:~p~p -> ~p~n",
+ [Mod, What, Args, Reason]),
+ catch apply(Mod, close_read, [Opaque]),
+ throw({error, Reason}).
+
+fallback_to_schema() ->
+ Fname = fallback_bup(),
+ fallback_to_schema(Fname).
+
+fallback_to_schema(Fname) ->
+ Mod = mnesia_backup,
+ case read_schema(Mod, Fname) of
+ {error, Reason} ->
+ {error, Reason};
+ Schema ->
+ case catch lookup_schema(schema, Schema) of
+ {error, _} ->
+ {error, "No schema in fallback"};
+ List ->
+ {ok, fallback, List}
+ end
+ end.
+
+%% Opens Opaque reads schema and then close
+read_schema(Mod, Opaque) ->
+ R = #restore{bup_module = Mod, bup_data = Opaque},
+ case catch read_schema_section(R) of
+ {error, Reason} ->
+ {error, Reason};
+ {R2, {_Header, Schema, _}} ->
+ catch safe_apply(R2, close_read, [R2#restore.bup_data]),
+ Schema
+ end.
+
+%% Open backup media and extract schema
+%% rewind backup media and leave it open
+%% Returns {R, {Header, Schema}}
+read_schema_section(R) ->
+ case catch do_read_schema_section(R) of
+ {'EXIT', Reason} ->
+ catch safe_apply(R, close_read, [R#restore.bup_data]),
+ {error, {'EXIT', Reason}};
+ {error, Reason} ->
+ catch safe_apply(R, close_read, [R#restore.bup_data]),
+ {error, Reason};
+ {R2, {H, Schema, Rest}} ->
+ Schema2 = convert_schema(H#log_header.log_version, Schema),
+ {R2, {H, Schema2, Rest}}
+ end.
+
+do_read_schema_section(R) ->
+ R2 = safe_apply(R, open_read, [R#restore.bup_data]),
+ {R3, RawSchema} = safe_apply(R2, read, [R2#restore.bup_data]),
+ do_read_schema_section(R3, verify_header(RawSchema), []).
+
+do_read_schema_section(R, {ok, B, C, []}, Acc) ->
+ case safe_apply(R, read, [R#restore.bup_data]) of
+ {R2, []} ->
+ {R2, {B, Acc, []}};
+ {R2, RawSchema} ->
+ do_read_schema_section(R2, {ok, B, C, RawSchema}, Acc)
+ end;
+
+do_read_schema_section(R, {ok, B, C, [Head | Tail]}, Acc)
+ when element(1, Head) =:= schema ->
+ do_read_schema_section(R, {ok, B, C, Tail}, Acc ++ [Head]);
+
+do_read_schema_section(R, {ok, B, _C, Rest}, Acc) ->
+ {R, {B, Acc, Rest}};
+
+do_read_schema_section(_R, {error, Reason}, _Acc) ->
+ {error, Reason}.
+
+verify_header([H | RawSchema]) when is_record(H, log_header) ->
+ Current = mnesia_log:backup_log_header(),
+ if
+ H#log_header.log_kind =:= Current#log_header.log_kind ->
+ Versions = ["0.1", "1.1", Current#log_header.log_version],
+ case lists:member(H#log_header.log_version, Versions) of
+ true ->
+ {ok, H, Current, RawSchema};
+ false ->
+ {error, {"Bad header version. Cannot be used as backup.", H}}
+ end;
+ true ->
+ {error, {"Bad kind of header. Cannot be used as backup.", H}}
+ end;
+verify_header(RawSchema) ->
+ {error, {"Missing header. Cannot be used as backup.", catch hd(RawSchema)}}.
+
+refresh_cookie(Schema, NewCookie) ->
+ case lists:keysearch(schema, 2, Schema) of
+ {value, {schema, schema, List}} ->
+ Cs = mnesia_schema:list2cs(List),
+ Cs2 = Cs#cstruct{cookie = NewCookie},
+ Item = {schema, schema, mnesia_schema:cs2list(Cs2)},
+ lists:keyreplace(schema, 2, Schema, Item);
+
+ false ->
+ Reason = "No schema found. Cannot be used as backup.",
+ throw({error, {Reason, Schema}})
+ end.
+
+%% Convert schema items from an external backup
+%% If backup format is the latest, no conversion is needed
+%% All supported backup formats should have their converters
+%% here as separate function clauses.
+convert_schema("0.1", Schema) ->
+ convert_0_1(Schema);
+convert_schema("1.1", Schema) ->
+ %% The new backup format is a pure extension of the old one
+ Current = mnesia_log:backup_log_header(),
+ convert_schema(Current#log_header.log_version, Schema);
+convert_schema(Latest, Schema) ->
+ H = mnesia_log:backup_log_header(),
+ if
+ H#log_header.log_version =:= Latest ->
+ Schema;
+ true ->
+ Reason = "Bad backup header version. Cannot convert schema.",
+ throw({error, {Reason, H}})
+ end.
+
+%% Backward compatibility for 0.1
+convert_0_1(Schema) ->
+ case lists:keysearch(schema, 2, Schema) of
+ {value, {schema, schema, List}} ->
+ Schema2 = lists:keydelete(schema, 2, Schema),
+ Cs = mnesia_schema:list2cs(List),
+ convert_0_1(Schema2, [], Cs);
+ false ->
+ List = mnesia_schema:get_initial_schema(disc_copies, [node()]),
+ Cs = mnesia_schema:list2cs(List),
+ convert_0_1(Schema, [], Cs)
+ end.
+
+convert_0_1([{schema, cookie, Cookie} | Schema], Acc, Cs) ->
+ convert_0_1(Schema, Acc, Cs#cstruct{cookie = Cookie});
+convert_0_1([{schema, db_nodes, DbNodes} | Schema], Acc, Cs) ->
+ convert_0_1(Schema, Acc, Cs#cstruct{disc_copies = DbNodes});
+convert_0_1([{schema, version, Version} | Schema], Acc, Cs) ->
+ convert_0_1(Schema, Acc, Cs#cstruct{version = Version});
+convert_0_1([{schema, Tab, Def} | Schema], Acc, Cs) ->
+ Head =
+ case lists:keysearch(index, 1, Def) of
+ {value, {index, PosList}} ->
+ %% Remove the snmp "index"
+ P = PosList -- [snmp],
+ Def2 = lists:keyreplace(index, 1, Def, {index, P}),
+ {schema, Tab, Def2};
+ false ->
+ {schema, Tab, Def}
+ end,
+ convert_0_1(Schema, [Head | Acc], Cs);
+convert_0_1([Head | Schema], Acc, Cs) ->
+ convert_0_1(Schema, [Head | Acc], Cs);
+convert_0_1([], Acc, Cs) ->
+ [schema2bup({schema, schema, Cs}) | Acc].
+
+%% Returns Val or throw error
+lookup_schema(Key, Schema) ->
+ case lists:keysearch(Key, 2, Schema) of
+ {value, {schema, Key, Val}} -> Val;
+ false -> throw({error, {"Cannot lookup", Key}})
+ end.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Backup compatibility
+
+%% Convert internal schema items to backup dito
+schema2bup({schema, Tab}) ->
+ {schema, Tab};
+schema2bup({schema, Tab, TableDef}) ->
+ {schema, Tab, mnesia_schema:cs2list(TableDef)}.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Create schema on the given nodes
+%% Requires that old schemas has been deleted
+%% Returns ok | {error, Reason}
+create_schema([]) ->
+ create_schema([node()]);
+create_schema(Ns) when is_list(Ns) ->
+ case is_set(Ns) of
+ true ->
+ create_schema(Ns, mnesia_schema:ensure_no_schema(Ns));
+ false ->
+ {error, {combine_error, Ns}}
+ end;
+create_schema(Ns) ->
+ {error, {badarg, Ns}}.
+
+is_set(List) when is_list(List) ->
+ ordsets:is_set(lists:sort(List));
+is_set(_) ->
+ false.
+
+create_schema(Ns, ok) ->
+ %% Ensure that we access the intended Mnesia
+ %% directory. This function may not be called
+ %% during startup since it will cause the
+ %% application_controller to get into deadlock
+ case mnesia_lib:ensure_loaded(?APPLICATION) of
+ ok ->
+ case mnesia_monitor:get_env(schema_location) of
+ ram ->
+ {error, {has_no_disc, node()}};
+ _ ->
+ case mnesia_schema:opt_create_dir(true, mnesia_lib:dir()) of
+ {error, What} ->
+ {error, What};
+ ok ->
+ Mod = mnesia_backup,
+ Str = mk_str(),
+ File = mnesia_lib:dir(Str),
+ file:delete(File),
+ case catch make_initial_backup(Ns, File, Mod) of
+ {ok, _Res} ->
+ case do_install_fallback(File, Mod) of
+ ok ->
+ file:delete(File),
+ ok;
+ {error, Reason} ->
+ {error, Reason}
+ end;
+ {error, Reason} ->
+ {error, Reason}
+ end
+ end
+ end;
+ {error, Reason} ->
+ {error, Reason}
+ end;
+create_schema(_Ns, {error, Reason}) ->
+ {error, Reason};
+create_schema(_Ns, Reason) ->
+ {error, Reason}.
+
+mk_str() ->
+ Now = [integer_to_list(I) || I <- tuple_to_list(now())],
+ lists:concat([node()] ++ Now ++ ".TMP").
+
+make_initial_backup(Ns, Opaque, Mod) ->
+ Schema = [{schema, schema, mnesia_schema:get_initial_schema(disc_copies, Ns)}],
+ O2 = do_apply(Mod, open_write, [Opaque], Opaque),
+ O3 = do_apply(Mod, write, [O2, [mnesia_log:backup_log_header()]], O2),
+ O4 = do_apply(Mod, write, [O3, Schema], O3),
+ O5 = do_apply(Mod, commit_write, [O4], O4),
+ {ok, O5}.
+
+do_apply(_, write, [_, Items], Opaque) when Items =:= [] ->
+ Opaque;
+do_apply(Mod, What, Args, _Opaque) ->
+ case catch apply(Mod, What, Args) of
+ {ok, Opaque2} -> Opaque2;
+ {error, Reason} -> throw({error, Reason});
+ {'EXIT', Reason} -> throw({error, {'EXIT', Reason}})
+ end.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Restore
+
+%% Restore schema and possibly other tables from a backup
+%% and replicate them to the necessary nodes
+%% Requires that old schemas has been deleted
+%% Returns ok | {error, Reason}
+install_fallback(Opaque) ->
+ install_fallback(Opaque, []).
+
+install_fallback(Opaque, Args) ->
+ %% Ensure that we access the intended Mnesia
+ %% directory. This function may not be called
+ %% during startup since it will cause the
+ %% application_controller to get into deadlock
+ case mnesia_lib:ensure_loaded(?APPLICATION) of
+ ok ->
+ do_install_fallback(Opaque, Args);
+ {error, Reason} ->
+ {error, Reason}
+ end.
+
+do_install_fallback(Opaque, Mod) when is_atom(Mod) ->
+ do_install_fallback(Opaque, [{module, Mod}]);
+do_install_fallback(Opaque, Args) when is_list(Args) ->
+ case check_fallback_args(Args, #fallback_args{opaque = Opaque}) of
+ {ok, FA} ->
+ do_install_fallback(FA);
+ {error, Reason} ->
+ {error, Reason}
+ end;
+do_install_fallback(_Opaque, Args) ->
+ {error, {badarg, Args}}.
+
+check_fallback_args([Arg | Tail], FA) ->
+ case catch check_fallback_arg_type(Arg, FA) of
+ {'EXIT', _Reason} ->
+ {error, {badarg, Arg}};
+ FA2 ->
+ check_fallback_args(Tail, FA2)
+ end;
+check_fallback_args([], FA) ->
+ {ok, FA}.
+
+check_fallback_arg_type(Arg, FA) ->
+ case Arg of
+ {scope, global} ->
+ FA#fallback_args{scope = global};
+ {scope, local} ->
+ FA#fallback_args{scope = local};
+ {module, Mod} ->
+ Mod2 = mnesia_monitor:do_check_type(backup_module, Mod),
+ FA#fallback_args{module = Mod2};
+ {mnesia_dir, Dir} ->
+ FA#fallback_args{mnesia_dir = Dir,
+ use_default_dir = false};
+ {keep_tables, Tabs} ->
+ atom_list(Tabs),
+ FA#fallback_args{keep_tables = Tabs};
+ {skip_tables, Tabs} ->
+ atom_list(Tabs),
+ FA#fallback_args{skip_tables = Tabs};
+ {default_op, keep_tables} ->
+ FA#fallback_args{default_op = keep_tables};
+ {default_op, skip_tables} ->
+ FA#fallback_args{default_op = skip_tables}
+ end.
+
+atom_list([H | T]) when is_atom(H) ->
+ atom_list(T);
+atom_list([]) ->
+ ok.
+
+do_install_fallback(FA) ->
+ Pid = spawn_link(?MODULE, install_fallback_master, [self(), FA]),
+ Res =
+ receive
+ {'EXIT', Pid, Reason} -> % if appl has trapped exit
+ {error, {'EXIT', Reason}};
+ {Pid, Res2} ->
+ case Res2 of
+ {ok, _} ->
+ ok;
+ {error, Reason} ->
+ {error, {"Cannot install fallback", Reason}}
+ end
+ end,
+ Res.
+
+install_fallback_master(ClientPid, FA) ->
+ process_flag(trap_exit, true),
+ State = {start, FA},
+ Opaque = FA#fallback_args.opaque,
+ Mod = FA#fallback_args.module,
+ Res = (catch iterate(Mod, fun restore_recs/4, Opaque, State)),
+ unlink(ClientPid),
+ ClientPid ! {self(), Res},
+ exit(shutdown).
+
+restore_recs(_, _, _, stop) ->
+ throw({error, "restore_recs already stopped"});
+
+restore_recs(Recs, Header, Schema, {start, FA}) ->
+ %% No records in backup
+ Schema2 = convert_schema(Header#log_header.log_version, Schema),
+ CreateList = lookup_schema(schema, Schema2),
+ case catch mnesia_schema:list2cs(CreateList) of
+ {'EXIT', Reason} ->
+ throw({error, {"Bad schema in restore_recs", Reason}});
+ Cs ->
+ Ns = get_fallback_nodes(FA, Cs#cstruct.disc_copies),
+ global:set_lock({{mnesia_table_lock, schema}, self()}, Ns, infinity),
+ Args = [self(), FA],
+ Pids = [spawn_link(N, ?MODULE, fallback_receiver, Args) || N <- Ns],
+ send_fallback(Pids, {start, Header, Schema2}),
+ Res = restore_recs(Recs, Header, Schema2, Pids),
+ global:del_lock({{mnesia_table_lock, schema}, self()}, Ns),
+ Res
+ end;
+
+restore_recs([], _Header, _Schema, Pids) ->
+ send_fallback(Pids, swap),
+ send_fallback(Pids, stop),
+ stop;
+
+restore_recs(Recs, _, _, Pids) ->
+ send_fallback(Pids, {records, Recs}),
+ Pids.
+
+get_fallback_nodes(FA, Ns) ->
+ This = node(),
+ case lists:member(This, Ns) of
+ true ->
+ case FA#fallback_args.scope of
+ global -> Ns;
+ local -> [This]
+ end;
+ false ->
+ throw({error, {"No disc resident schema on local node", Ns}})
+ end.
+
+send_fallback(Pids, Msg) when is_list(Pids), Pids =/= [] ->
+ lists:foreach(fun(Pid) -> Pid ! {self(), Msg} end, Pids),
+ rec_answers(Pids, []).
+
+rec_answers([], Acc) ->
+ case {lists:keysearch(error, 1, Acc), mnesia_lib:uniq(Acc)} of
+ {{value, {error, Val}}, _} -> throw({error, Val});
+ {_, [SameAnswer]} -> SameAnswer;
+ {_, Other} -> throw({error, {"Different answers", Other}})
+ end;
+rec_answers(Pids, Acc) ->
+ receive
+ {'EXIT', Pid, stopped} ->
+ Pids2 = lists:delete(Pid, Pids),
+ rec_answers(Pids2, [stopped|Acc]);
+ {'EXIT', Pid, Reason} ->
+ Pids2 = lists:delete(Pid, Pids),
+ rec_answers(Pids2, [{error, {'EXIT', Pid, Reason}}|Acc]);
+ {Pid, Reply} ->
+ Pids2 = lists:delete(Pid, Pids),
+ rec_answers(Pids2, [Reply|Acc])
+ end.
+
+fallback_exists() ->
+ Fname = fallback_bup(),
+ fallback_exists(Fname).
+
+fallback_exists(Fname) ->
+ case mnesia_monitor:use_dir() of
+ true ->
+ mnesia_lib:exists(Fname);
+ false ->
+ case ?catch_val(active_fallback) of
+ {'EXIT', _} -> false;
+ Bool -> Bool
+ end
+ end.
+
+fallback_name() -> "FALLBACK.BUP".
+fallback_bup() -> mnesia_lib:dir(fallback_name()).
+
+fallback_tmp_name() -> "FALLBACK.TMP".
+%% fallback_full_tmp_name() -> mnesia_lib:dir(fallback_tmp_name()).
+
+fallback_receiver(Master, FA) ->
+ process_flag(trap_exit, true),
+
+ case catch register(mnesia_fallback, self()) of
+ {'EXIT', _} ->
+ Reason = {already_exists, node()},
+ local_fallback_error(Master, Reason);
+ true ->
+ FA2 = check_fallback_dir(Master, FA),
+ Bup = FA2#fallback_args.fallback_bup,
+ case mnesia_lib:exists(Bup) of
+ true ->
+ Reason2 = {already_exists, node()},
+ local_fallback_error(Master, Reason2);
+ false ->
+ Mod = mnesia_backup,
+ Tmp = FA2#fallback_args.fallback_tmp,
+ R = #restore{mode = replace,
+ bup_module = Mod,
+ bup_data = Tmp},
+ file:delete(Tmp),
+ case catch fallback_receiver_loop(Master, R, FA2, schema) of
+ {error, Reason} ->
+ local_fallback_error(Master, Reason);
+ Other ->
+ exit(Other)
+ end
+ end
+ end.
+
+local_fallback_error(Master, Reason) ->
+ Master ! {self(), {error, Reason}},
+ unlink(Master),
+ exit(Reason).
+
+check_fallback_dir(Master, FA) ->
+ case mnesia:system_info(schema_location) of
+ ram ->
+ Reason = {has_no_disc, node()},
+ local_fallback_error(Master, Reason);
+ _ ->
+ Dir = check_fallback_dir_arg(Master, FA),
+ Bup = filename:join([Dir, fallback_name()]),
+ Tmp = filename:join([Dir, fallback_tmp_name()]),
+ FA#fallback_args{fallback_bup = Bup,
+ fallback_tmp = Tmp,
+ mnesia_dir = Dir}
+ end.
+
+check_fallback_dir_arg(Master, FA) ->
+ case FA#fallback_args.use_default_dir of
+ true ->
+ mnesia_lib:dir();
+ false when FA#fallback_args.scope =:= local ->
+ Dir = FA#fallback_args.mnesia_dir,
+ case catch mnesia_monitor:do_check_type(dir, Dir) of
+ {'EXIT', _R} ->
+ Reason = {badarg, {dir, Dir}, node()},
+ local_fallback_error(Master, Reason);
+ AbsDir->
+ AbsDir
+ end;
+ false when FA#fallback_args.scope =:= global ->
+ Reason = {combine_error, global, dir, node()},
+ local_fallback_error(Master, Reason)
+ end.
+
+fallback_receiver_loop(Master, R, FA, State) ->
+ receive
+ {Master, {start, Header, Schema}} when State =:= schema ->
+ Dir = FA#fallback_args.mnesia_dir,
+ throw_bad_res(ok, mnesia_schema:opt_create_dir(true, Dir)),
+ R2 = safe_apply(R, open_write, [R#restore.bup_data]),
+ R3 = safe_apply(R2, write, [R2#restore.bup_data, [Header]]),
+ BupSchema = [schema2bup(S) || S <- Schema],
+ R4 = safe_apply(R3, write, [R3#restore.bup_data, BupSchema]),
+ Master ! {self(), ok},
+ fallback_receiver_loop(Master, R4, FA, records);
+
+ {Master, {records, Recs}} when State =:= records ->
+ R2 = safe_apply(R, write, [R#restore.bup_data, Recs]),
+ Master ! {self(), ok},
+ fallback_receiver_loop(Master, R2, FA, records);
+
+ {Master, swap} when State =/= schema ->
+ ?eval_debug_fun({?MODULE, fallback_receiver_loop, pre_swap}, []),
+ safe_apply(R, commit_write, [R#restore.bup_data]),
+ Bup = FA#fallback_args.fallback_bup,
+ Tmp = FA#fallback_args.fallback_tmp,
+ throw_bad_res(ok, file:rename(Tmp, Bup)),
+ catch mnesia_lib:set(active_fallback, true),
+ ?eval_debug_fun({?MODULE, fallback_receiver_loop, post_swap}, []),
+ Master ! {self(), ok},
+ fallback_receiver_loop(Master, R, FA, stop);
+
+ {Master, stop} when State =:= stop ->
+ stopped;
+
+ Msg ->
+ safe_apply(R, abort_write, [R#restore.bup_data]),
+ Tmp = FA#fallback_args.fallback_tmp,
+ file:delete(Tmp),
+ throw({error, "Unexpected msg fallback_receiver_loop", Msg})
+ end.
+
+throw_bad_res(Expected, Expected) -> Expected;
+throw_bad_res(_Expected, {error, Actual}) -> throw({error, Actual});
+throw_bad_res(_Expected, Actual) -> throw({error, Actual}).
+
+-record(local_tab, {name,
+ storage_type,
+ open,
+ add,
+ close,
+ swap,
+ record_name,
+ opened}).
+
+tm_fallback_start(IgnoreFallback) ->
+ mnesia_schema:lock_schema(),
+ Res = do_fallback_start(fallback_exists(), IgnoreFallback),
+ mnesia_schema: unlock_schema(),
+ case Res of
+ ok -> ok;
+ {error, Reason} -> exit(Reason)
+ end.
+
+do_fallback_start(false, _IgnoreFallback) ->
+ ok;
+do_fallback_start(true, true) ->
+ verbose("Ignoring fallback at startup, but leaving it active...~n", []),
+ mnesia_lib:set(active_fallback, true),
+ ok;
+do_fallback_start(true, false) ->
+ verbose("Starting from fallback...~n", []),
+
+ BupFile = fallback_bup(),
+ Mod = mnesia_backup,
+ LocalTabs = ?ets_new_table(mnesia_local_tables, [set, public, {keypos, 2}]),
+ case catch iterate(Mod, fun restore_tables/4, BupFile, {start, LocalTabs}) of
+ {ok, _Res} ->
+ catch dets:close(schema),
+ TmpSchema = mnesia_lib:tab2tmp(schema),
+ DatSchema = mnesia_lib:tab2dat(schema),
+ AllLT = ?ets_match_object(LocalTabs, '_'),
+ ?ets_delete_table(LocalTabs),
+ case file:rename(TmpSchema, DatSchema) of
+ ok ->
+ [(LT#local_tab.swap)(LT#local_tab.name, LT) ||
+ LT <- AllLT, LT#local_tab.name =/= schema],
+ file:delete(BupFile),
+ ok;
+ {error, Reason} ->
+ file:delete(TmpSchema),
+ {error, {"Cannot start from fallback. Rename error.", Reason}}
+ end;
+ {error, Reason} ->
+ {error, {"Cannot start from fallback", Reason}};
+ {'EXIT', Reason} ->
+ {error, {"Cannot start from fallback", Reason}}
+ end.
+
+restore_tables(All=[Rec | Recs], Header, Schema, State={local, LocalTabs, LT}) ->
+ Tab = element(1, Rec),
+ if
+ Tab =:= LT#local_tab.name ->
+ Key = element(2, Rec),
+ (LT#local_tab.add)(Tab, Key, Rec, LT),
+ restore_tables(Recs, Header, Schema, State);
+ true ->
+ NewState = {new, LocalTabs},
+ restore_tables(All, Header, Schema, NewState)
+ end;
+restore_tables(All=[Rec | Recs], Header, Schema, {new, LocalTabs}) ->
+ Tab = element(1, Rec),
+ case ?ets_lookup(LocalTabs, Tab) of
+ [] ->
+ State = {not_local, LocalTabs, Tab},
+ restore_tables(Recs, Header, Schema, State);
+ [LT] when is_record(LT, local_tab) ->
+ State = {local, LocalTabs, LT},
+ case LT#local_tab.opened of
+ true -> ignore;
+ false ->
+ (LT#local_tab.open)(Tab, LT),
+ ?ets_insert(LocalTabs,LT#local_tab{opened=true})
+ end,
+ restore_tables(All, Header, Schema, State)
+ end;
+restore_tables(All=[Rec | Recs], Header, Schema, S = {not_local, LocalTabs, PrevTab}) ->
+ Tab = element(1, Rec),
+ if
+ Tab =:= PrevTab ->
+ restore_tables(Recs, Header, Schema, S);
+ true ->
+ State = {new, LocalTabs},
+ restore_tables(All, Header, Schema, State)
+ end;
+restore_tables(Recs, Header, Schema, {start, LocalTabs}) ->
+ Dir = mnesia_lib:dir(),
+ OldDir = filename:join([Dir, "OLD_DIR"]),
+ mnesia_schema:purge_dir(OldDir, []),
+ mnesia_schema:purge_dir(Dir, [fallback_name()]),
+ init_dat_files(Schema, LocalTabs),
+ State = {new, LocalTabs},
+ restore_tables(Recs, Header, Schema, State);
+restore_tables([], _Header, _Schema, State) ->
+ State.
+
+%% Creates all neccessary dat files and inserts
+%% the table definitions in the schema table
+%%
+%% Returns a list of local_tab tuples for all local tables
+init_dat_files(Schema, LocalTabs) ->
+ TmpFile = mnesia_lib:tab2tmp(schema),
+ Args = [{file, TmpFile}, {keypos, 2}, {type, set}],
+ case dets:open_file(schema, Args) of % Assume schema lock
+ {ok, _} ->
+ create_dat_files(Schema, LocalTabs),
+ ok = dets:close(schema),
+ LocalTab = #local_tab{name = schema,
+ storage_type = disc_copies,
+ open = undefined,
+ add = undefined,
+ close = undefined,
+ swap = undefined,
+ record_name = schema,
+ opened = false},
+ ?ets_insert(LocalTabs, LocalTab);
+ {error, Reason} ->
+ throw({error, {"Cannot open file", schema, Args, Reason}})
+ end.
+
+create_dat_files([{schema, schema, TabDef} | Tail], LocalTabs) ->
+ ok = dets:insert(schema, {schema, schema, TabDef}),
+ create_dat_files(Tail, LocalTabs);
+create_dat_files([{schema, Tab, TabDef} | Tail], LocalTabs) ->
+ TmpFile = mnesia_lib:tab2tmp(Tab),
+ DatFile = mnesia_lib:tab2dat(Tab),
+ DclFile = mnesia_lib:tab2dcl(Tab),
+ DcdFile = mnesia_lib:tab2dcd(Tab),
+ Expunge = fun() ->
+ file:delete(DatFile),
+ file:delete(DclFile),
+ file:delete(DcdFile)
+ end,
+
+ mnesia_lib:dets_sync_close(Tab),
+ file:delete(TmpFile),
+ Cs = mnesia_schema:list2cs(TabDef),
+ ok = dets:insert(schema, {schema, Tab, TabDef}),
+ RecName = Cs#cstruct.record_name,
+ Storage = mnesia_lib:cs_to_storage_type(node(), Cs),
+ if
+ Storage =:= unknown ->
+ ok = dets:delete(schema, {schema, Tab}),
+ create_dat_files(Tail, LocalTabs);
+ Storage =:= disc_only_copies ->
+ Args = [{file, TmpFile}, {keypos, 2},
+ {type, mnesia_lib:disk_type(Tab, Cs#cstruct.type)}],
+ Open = fun(T, LT) when T =:= LT#local_tab.name ->
+ case mnesia_lib:dets_sync_open(T, Args) of
+ {ok, _} ->
+ ok;
+ {error, Reason} ->
+ throw({error, {"Cannot open file", T, Args, Reason}})
+ end
+ end,
+ Add = fun(T, Key, Rec, LT) when T =:= LT#local_tab.name ->
+ case Rec of
+ {_T, Key} ->
+ ok = dets:delete(T, Key);
+ (Rec) when T =:= RecName ->
+ ok = dets:insert(Tab, Rec);
+ (Rec) ->
+ Rec2 = setelement(1, Rec, RecName),
+ ok = dets:insert(T, Rec2)
+ end
+ end,
+ Close = fun(T, LT) when T =:= LT#local_tab.name ->
+ mnesia_lib:dets_sync_close(T)
+ end,
+ Swap = fun(T, LT) when T =:= LT#local_tab.name ->
+ Expunge(),
+ case LT#local_tab.opened of
+ true ->
+ Close(T,LT);
+ false ->
+ Open(T,LT),
+ Close(T,LT)
+ end,
+ case file:rename(TmpFile, DatFile) of
+ ok ->
+ ok;
+ {error, Reason} ->
+ mnesia_lib:fatal("Cannot rename file ~p -> ~p: ~p~n",
+ [TmpFile, DatFile, Reason])
+ end
+ end,
+ LocalTab = #local_tab{name = Tab,
+ storage_type = Storage,
+ open = Open,
+ add = Add,
+ close = Close,
+ swap = Swap,
+ record_name = RecName,
+ opened = false},
+ ?ets_insert(LocalTabs, LocalTab),
+ create_dat_files(Tail, LocalTabs);
+ Storage =:= ram_copies; Storage =:= disc_copies ->
+ Open = fun(T, LT) when T =:= LT#local_tab.name ->
+ mnesia_log:open_log({?MODULE, T},
+ mnesia_log:dcl_log_header(),
+ TmpFile,
+ false,
+ false,
+ read_write)
+ end,
+ Add = fun(T, Key, Rec, LT) when T =:= LT#local_tab.name ->
+ Log = {?MODULE, T},
+ case Rec of
+ {_T, Key} ->
+ mnesia_log:append(Log, {{T, Key}, {T, Key}, delete});
+ (Rec) when T =:= RecName ->
+ mnesia_log:append(Log, {{T, Key}, Rec, write});
+ (Rec) ->
+ Rec2 = setelement(1, Rec, RecName),
+ mnesia_log:append(Log, {{T, Key}, Rec2, write})
+ end
+ end,
+ Close = fun(T, LT) when T =:= LT#local_tab.name ->
+ mnesia_log:close_log({?MODULE, T})
+ end,
+ Swap = fun(T, LT) when T =:= LT#local_tab.name ->
+ Expunge(),
+ if
+ Storage =:= ram_copies, LT#local_tab.opened =:= false ->
+ ok;
+ true ->
+ Log = mnesia_log:open_log(fallback_tab,
+ mnesia_log:dcd_log_header(),
+ DcdFile,
+ false),
+ mnesia_log:close_log(Log),
+ case LT#local_tab.opened of
+ true ->
+ Close(T,LT);
+ false ->
+ Open(T,LT),
+ Close(T,LT)
+ end,
+ case file:rename(TmpFile, DclFile) of
+ ok ->
+ ok;
+ {error, Reason} ->
+ mnesia_lib:fatal("Cannot rename file ~p -> ~p: ~p~n",
+ [TmpFile, DclFile, Reason])
+ end
+ end
+ end,
+ LocalTab = #local_tab{name = Tab,
+ storage_type = Storage,
+ open = Open,
+ add = Add,
+ close = Close,
+ swap = Swap,
+ record_name = RecName,
+ opened = false
+ },
+ ?ets_insert(LocalTabs, LocalTab),
+ create_dat_files(Tail, LocalTabs)
+ end;
+create_dat_files([{schema, Tab} | Tail], LocalTabs) ->
+ ?ets_delete(LocalTabs, Tab),
+ ok = dets:delete(schema, {schema, Tab}),
+ TmpFile = mnesia_lib:tab2tmp(Tab),
+ mnesia_lib:dets_sync_close(Tab),
+ file:delete(TmpFile),
+ create_dat_files(Tail, LocalTabs);
+create_dat_files([], _LocalTabs) ->
+ ok.
+
+uninstall_fallback() ->
+ uninstall_fallback([{scope, global}]).
+
+uninstall_fallback(Args) ->
+ case check_fallback_args(Args, #fallback_args{}) of
+ {ok, FA} ->
+ do_uninstall_fallback(FA);
+ {error, Reason} ->
+ {error, Reason}
+ end.
+
+do_uninstall_fallback(FA) ->
+ %% Ensure that we access the intended Mnesia
+ %% directory. This function may not be called
+ %% during startup since it will cause the
+ %% application_controller to get into deadlock
+ case mnesia_lib:ensure_loaded(?APPLICATION) of
+ ok ->
+ Pid = spawn_link(?MODULE, uninstall_fallback_master, [self(), FA]),
+ receive
+ {'EXIT', Pid, Reason} -> % if appl has trapped exit
+ {error, {'EXIT', Reason}};
+ {Pid, Res} ->
+ Res
+ end;
+ {error, Reason} ->
+ {error, Reason}
+ end.
+
+uninstall_fallback_master(ClientPid, FA) ->
+ process_flag(trap_exit, true),
+
+ FA2 = check_fallback_dir(ClientPid, FA), % May exit
+ Bup = FA2#fallback_args.fallback_bup,
+ case fallback_to_schema(Bup) of
+ {ok, fallback, List} ->
+ Cs = mnesia_schema:list2cs(List),
+ case catch get_fallback_nodes(FA, Cs#cstruct.disc_copies) of
+ Ns when is_list(Ns) ->
+ do_uninstall(ClientPid, Ns, FA);
+ {error, Reason} ->
+ local_fallback_error(ClientPid, Reason)
+ end;
+ {error, Reason} ->
+ local_fallback_error(ClientPid, Reason)
+ end.
+
+do_uninstall(ClientPid, Ns, FA) ->
+ Args = [self(), FA],
+ global:set_lock({{mnesia_table_lock, schema}, self()}, Ns, infinity),
+ Pids = [spawn_link(N, ?MODULE, local_uninstall_fallback, Args) || N <- Ns],
+ Res = do_uninstall(ClientPid, Pids, [], [], ok),
+ global:del_lock({{mnesia_table_lock, schema}, self()}, Ns),
+ ClientPid ! {self(), Res},
+ unlink(ClientPid),
+ exit(shutdown).
+
+do_uninstall(ClientPid, [Pid | Pids], GoodPids, BadNodes, Res) ->
+ receive
+ %% {'EXIT', ClientPid, _} ->
+ %% client_exit;
+ {'EXIT', Pid, Reason} ->
+ BadNode = node(Pid),
+ BadRes = {error, {"Uninstall fallback", BadNode, Reason}},
+ do_uninstall(ClientPid, Pids, GoodPids, [BadNode | BadNodes], BadRes);
+ {Pid, {error, Reason}} ->
+ BadNode = node(Pid),
+ BadRes = {error, {"Uninstall fallback", BadNode, Reason}},
+ do_uninstall(ClientPid, Pids, GoodPids, [BadNode | BadNodes], BadRes);
+ {Pid, started} ->
+ do_uninstall(ClientPid, Pids, [Pid | GoodPids], BadNodes, Res)
+ end;
+do_uninstall(ClientPid, [], GoodPids, [], ok) ->
+ lists:foreach(fun(Pid) -> Pid ! {self(), do_uninstall} end, GoodPids),
+ rec_uninstall(ClientPid, GoodPids, ok);
+do_uninstall(_ClientPid, [], GoodPids, BadNodes, BadRes) ->
+ lists:foreach(fun(Pid) -> exit(Pid, shutdown) end, GoodPids),
+ {error, {node_not_running, BadNodes, BadRes}}.
+
+local_uninstall_fallback(Master, FA) ->
+ %% Don't trap exit
+
+ register(mnesia_fallback, self()), % May exit
+ FA2 = check_fallback_dir(Master, FA), % May exit
+ Master ! {self(), started},
+
+ receive
+ {Master, do_uninstall} ->
+ ?eval_debug_fun({?MODULE, uninstall_fallback2, pre_delete}, []),
+ catch mnesia_lib:set(active_fallback, false),
+ Tmp = FA2#fallback_args.fallback_tmp,
+ Bup = FA2#fallback_args.fallback_bup,
+ file:delete(Tmp),
+ Res =
+ case fallback_exists(Bup) of
+ true -> file:delete(Bup);
+ false -> ok
+ end,
+ ?eval_debug_fun({?MODULE, uninstall_fallback2, post_delete}, []),
+ Master ! {self(), Res},
+ unlink(Master),
+ exit(normal)
+ end.
+
+rec_uninstall(ClientPid, [Pid | Pids], AccRes) ->
+ receive
+ %% {'EXIT', ClientPid, _} ->
+ %% exit(shutdown);
+ {'EXIT', Pid, R} ->
+ Reason = {node_not_running, {node(Pid), R}},
+ rec_uninstall(ClientPid, Pids, {error, Reason});
+ {Pid, ok} ->
+ rec_uninstall(ClientPid, Pids, AccRes);
+ {Pid, BadRes} ->
+ rec_uninstall(ClientPid, Pids, BadRes)
+ end;
+rec_uninstall(ClientPid, [], Res) ->
+ ClientPid ! {self(), Res},
+ unlink(ClientPid),
+ exit(normal).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Backup traversal
+
+%% Iterate over a backup and produce a new backup.
+%% Fun(BackupItem, Acc) is applied for each BackupItem.
+%%
+%% Valid BackupItems are:
+%%
+%% {schema, Tab} Table to be deleted
+%% {schema, Tab, CreateList} Table to be created, CreateList may be empty
+%% {schema, db_nodes, DbNodes}List of nodes, defaults to [node()] OLD
+%% {schema, version, Version} Schema version OLD
+%% {schema, cookie, Cookie} Unique schema cookie OLD
+%% {Tab, Key} Oid for record to be deleted
+%% Record Record to be inserted.
+%%
+%% The Fun must return a tuple {BackupItems, NewAcc}
+%% where BackupItems is a list of valid BackupItems and
+%% NewAcc is a new accumulator value. Once BackupItems
+%% that not are schema related has been returned, no more schema
+%% items may be returned. The schema related items must always be
+%% first in the backup.
+%%
+%% If TargetMod =:= read_only, no new backup will be created.
+%%
+%% Opening of the source media will be performed by
+%% to SourceMod:open_read(Source)
+%%
+%% Opening of the target media will be performed by
+%% to TargetMod:open_write(Target)
+traverse_backup(Source, Target, Fun, Acc) ->
+ Mod = mnesia_monitor:get_env(backup_module),
+ traverse_backup(Source, Mod, Target, Mod, Fun, Acc).
+
+traverse_backup(Source, SourceMod, Target, TargetMod, Fun, Acc) ->
+ Args = [self(), Source, SourceMod, Target, TargetMod, Fun, Acc],
+ Pid = spawn_link(?MODULE, do_traverse_backup, Args),
+ receive
+ {'EXIT', Pid, Reason} ->
+ {error, {"Backup traversal crashed", Reason}};
+ {iter_done, Pid, Res} ->
+ Res
+ end.
+
+do_traverse_backup(ClientPid, Source, SourceMod, Target, TargetMod, Fun, Acc) ->
+ process_flag(trap_exit, true),
+ Iter =
+ if
+ TargetMod =/= read_only ->
+ case catch do_apply(TargetMod, open_write, [Target], Target) of
+ {error, Error} ->
+ unlink(ClientPid),
+ ClientPid ! {iter_done, self(), {error, Error}},
+ exit(Error);
+ Else -> Else
+ end;
+ true ->
+ ignore
+ end,
+ A = {start, Fun, Acc, TargetMod, Iter},
+ Res =
+ case iterate(SourceMod, fun trav_apply/4, Source, A) of
+ {ok, {iter, _, Acc2, _, Iter2}} when TargetMod =/= read_only ->
+ case catch do_apply(TargetMod, commit_write, [Iter2], Iter2) of
+ {error, Reason} ->
+ {error, Reason};
+ _ ->
+ {ok, Acc2}
+ end;
+ {ok, {iter, _, Acc2, _, _}} ->
+ {ok, Acc2};
+ {error, Reason} when TargetMod =/= read_only->
+ catch do_apply(TargetMod, abort_write, [Iter], Iter),
+ {error, {"Backup traversal failed", Reason}};
+ {error, Reason} ->
+ {error, {"Backup traversal failed", Reason}}
+ end,
+ unlink(ClientPid),
+ ClientPid ! {iter_done, self(), Res}.
+
+trav_apply(Recs, _Header, _Schema, {iter, Fun, Acc, Mod, Iter}) ->
+ {NewRecs, Acc2} = filter_foldl(Fun, Acc, Recs),
+ if
+ Mod =/= read_only, NewRecs =/= [] ->
+ Iter2 = do_apply(Mod, write, [Iter, NewRecs], Iter),
+ {iter, Fun, Acc2, Mod, Iter2};
+ true ->
+ {iter, Fun, Acc2, Mod, Iter}
+ end;
+trav_apply(Recs, Header, Schema, {start, Fun, Acc, Mod, Iter}) ->
+ Iter2 =
+ if
+ Mod =/= read_only ->
+ do_apply(Mod, write, [Iter, [Header]], Iter);
+ true ->
+ Iter
+ end,
+ TravAcc = trav_apply(Schema, Header, Schema, {iter, Fun, Acc, Mod, Iter2}),
+ trav_apply(Recs, Header, Schema, TravAcc).
+
+filter_foldl(Fun, Acc, [Head|Tail]) ->
+ case Fun(Head, Acc) of
+ {HeadItems, HeadAcc} when is_list(HeadItems) ->
+ {TailItems, TailAcc} = filter_foldl(Fun, HeadAcc, Tail),
+ {HeadItems ++ TailItems, TailAcc};
+ Other ->
+ throw({error, {"Fun must return a list", Other}})
+ end;
+filter_foldl(_Fun, Acc, []) ->
+ {[], Acc}.
+
diff --git a/lib/mnesia/src/mnesia_checkpoint.erl b/lib/mnesia/src/mnesia_checkpoint.erl
new file mode 100644
index 0000000000..eb8fe38908
--- /dev/null
+++ b/lib/mnesia/src/mnesia_checkpoint.erl
@@ -0,0 +1,1295 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 1996-2009. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%
+
+%%
+-module(mnesia_checkpoint).
+
+%% TM callback interface
+-export([
+ tm_add_copy/2,
+ tm_change_table_copy_type/3,
+ tm_del_copy/2,
+ tm_mnesia_down/1,
+ tm_prepare/1,
+ tm_retain/4,
+ tm_retain/5,
+ tm_enter_pending/1,
+ tm_enter_pending/3,
+ tm_exit_pending/1,
+ convert_cp_record/1
+ ]).
+
+%% Public interface
+-export([
+ activate/1,
+ checkpoints/0,
+ deactivate/1,
+ deactivate/2,
+ iterate/6,
+ most_local_node/2,
+ really_retain/2,
+ stop/0,
+ stop_iteration/1,
+ tables_and_cookie/1
+ ]).
+
+%% Internal
+-export([
+ call/2,
+ cast/2,
+ init/1,
+ remote_deactivate/1,
+ start/1
+ ]).
+
+%% sys callback interface
+-export([
+ system_code_change/4,
+ system_continue/3,
+ system_terminate/4
+ ]).
+
+-include("mnesia.hrl").
+-import(mnesia_lib, [add/2, del/2, set/2, unset/1]).
+-import(mnesia_lib, [dbg_out/2]).
+
+-record(checkpoint_args, {name = {now(), node()},
+ allow_remote = true,
+ ram_overrides_dump = false,
+ nodes = [],
+ node = node(),
+ now = now(),
+ cookie = ?unique_cookie,
+ min = [],
+ max = [],
+ pending_tab,
+ wait_for_old, % Initially undefined then List
+ is_activated = false,
+ ignore_new = [],
+ retainers = [],
+ iterators = [],
+ supervisor,
+ pid
+ }).
+
+%% Old record definition
+-record(checkpoint, {name,
+ allow_remote,
+ ram_overrides_dump,
+ nodes,
+ node,
+ now,
+ min,
+ max,
+ pending_tab,
+ wait_for_old,
+ is_activated,
+ ignore_new,
+ retainers,
+ iterators,
+ supervisor,
+ pid
+ }).
+
+-record(retainer, {cp_name, tab_name, store, writers = [], really_retain = true}).
+
+-record(iter, {tab_name, oid_tab, main_tab, retainer_tab, source, val, pid}).
+
+-record(pending, {tid, disc_nodes = [], ram_nodes = []}).
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% TM callback functions
+
+stop() ->
+ lists:foreach(fun(Name) -> call(Name, stop) end,
+ checkpoints()),
+ ok.
+
+tm_prepare(Cp) when is_record(Cp, checkpoint_args) ->
+ Name = Cp#checkpoint_args.name,
+ case lists:member(Name, checkpoints()) of
+ false ->
+ start_retainer(Cp);
+ true ->
+ {error, {already_exists, Name, node()}}
+ end;
+tm_prepare(Cp) when is_record(Cp, checkpoint) ->
+ %% Node with old protocol sent an old checkpoint record
+ %% and we have to convert it
+ case convert_cp_record(Cp) of
+ {ok, NewCp} ->
+ tm_prepare(NewCp);
+ {error, Reason} ->
+ {error, Reason}
+ end.
+
+tm_mnesia_down(Node) ->
+ lists:foreach(fun(Name) -> cast(Name, {mnesia_down, Node}) end,
+ checkpoints()).
+
+%% Returns pending
+tm_enter_pending(Tid, DiscNs, RamNs) ->
+ Pending = #pending{tid = Tid, disc_nodes = DiscNs, ram_nodes = RamNs},
+ tm_enter_pending(Pending).
+
+tm_enter_pending(Pending) ->
+ PendingTabs = val(pending_checkpoints),
+ tm_enter_pending(PendingTabs, Pending).
+
+tm_enter_pending([], Pending) ->
+ Pending;
+tm_enter_pending([Tab | Tabs], Pending) ->
+ catch ?ets_insert(Tab, Pending),
+ tm_enter_pending(Tabs, Pending).
+
+tm_exit_pending(Tid) ->
+ Pids = val(pending_checkpoint_pids),
+ tm_exit_pending(Pids, Tid).
+
+tm_exit_pending([], Tid) ->
+ Tid;
+tm_exit_pending([Pid | Pids], Tid) ->
+ Pid ! {self(), {exit_pending, Tid}},
+ tm_exit_pending(Pids, Tid).
+
+enter_still_pending([Tid | Tids], Tab) ->
+ ?ets_insert(Tab, #pending{tid = Tid}),
+ enter_still_pending(Tids, Tab);
+enter_still_pending([], _Tab) ->
+ ok.
+
+
+%% Looks up checkpoints for functions in mnesia_tm.
+tm_retain(Tid, Tab, Key, Op) ->
+ case val({Tab, commit_work}) of
+ [{checkpoints, Checkpoints} | _ ] ->
+ tm_retain(Tid, Tab, Key, Op, Checkpoints);
+ _ ->
+ undefined
+ end.
+
+tm_retain(Tid, Tab, Key, Op, Checkpoints) ->
+ case Op of
+ clear_table ->
+ OldRecs = mnesia_lib:db_match_object(Tab, '_'),
+ send_group_retain(OldRecs, Checkpoints, Tid, Tab, []),
+ OldRecs;
+ _ ->
+ OldRecs = mnesia_lib:db_get(Tab, Key),
+ send_retain(Checkpoints, {retain, Tid, Tab, Key, OldRecs}),
+ OldRecs
+ end.
+
+send_group_retain([Rec | Recs], Checkpoints, Tid, Tab, [PrevRec | PrevRecs])
+ when element(2, Rec) /= element(2, PrevRec) ->
+ Key = element(2, PrevRec),
+ OldRecs = lists:reverse([PrevRec | PrevRecs]),
+ send_retain(Checkpoints, {retain, Tid, Tab, Key, OldRecs}),
+ send_group_retain(Recs, Checkpoints, Tid, Tab, [Rec]);
+send_group_retain([Rec | Recs], Checkpoints, Tid, Tab, Acc) ->
+ send_group_retain(Recs, Checkpoints, Tid, Tab, [Rec | Acc]);
+send_group_retain([], Checkpoints, Tid, Tab, [PrevRec | PrevRecs]) ->
+ Key = element(2, PrevRec),
+ OldRecs = lists:reverse([PrevRec | PrevRecs]),
+ send_retain(Checkpoints, {retain, Tid, Tab, Key, OldRecs}),
+ ok;
+send_group_retain([], _Checkpoints, _Tid, _Tab, []) ->
+ ok.
+
+send_retain([Name | Names], Msg) ->
+ cast(Name, Msg),
+ send_retain(Names, Msg);
+send_retain([], _Msg) ->
+ ok.
+
+tm_add_copy(Tab, Node) when Node /= node() ->
+ case val({Tab, commit_work}) of
+ [{checkpoints, Checkpoints} | _ ] ->
+ Fun = fun(Name) -> call(Name, {add_copy, Tab, Node}) end,
+ map_call(Fun, Checkpoints, ok);
+ _ ->
+ ok
+ end.
+
+tm_del_copy(Tab, Node) when Node == node() ->
+ mnesia_subscr:unsubscribe_table(Tab),
+ case val({Tab, commit_work}) of
+ [{checkpoints, Checkpoints} | _ ] ->
+ Fun = fun(Name) -> call(Name, {del_copy, Tab, Node}) end,
+ map_call(Fun, Checkpoints, ok);
+ _ ->
+ ok
+ end.
+
+tm_change_table_copy_type(Tab, From, To) ->
+ case val({Tab, commit_work}) of
+ [{checkpoints, Checkpoints} | _ ] ->
+ Fun = fun(Name) -> call(Name, {change_copy, Tab, From, To}) end,
+ map_call(Fun, Checkpoints, ok);
+ _ ->
+ ok
+ end.
+
+map_call(Fun, [Name | Names], Res) ->
+ case Fun(Name) of
+ ok ->
+ map_call(Fun, Names, Res);
+ {error, {no_exists, Name}} ->
+ map_call(Fun, Names, Res);
+ {error, Reason} ->
+ %% BUGBUG: We may end up with some checkpoint retainers
+ %% too much in the add_copy case. How do we remove them?
+ map_call(Fun, Names, {error, Reason})
+ end;
+map_call(_Fun, [], Res) ->
+ Res.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Public functions
+
+deactivate(Name) ->
+ case call(Name, get_checkpoint) of
+ {error, Reason} ->
+ {error, Reason};
+ Cp ->
+ deactivate(Cp#checkpoint_args.nodes, Name)
+ end.
+
+deactivate(Nodes, Name) ->
+ rpc:multicall(Nodes, ?MODULE, remote_deactivate, [Name]),
+ ok.
+
+remote_deactivate(Name) ->
+ call(Name, deactivate).
+
+checkpoints() -> val(checkpoints).
+
+tables_and_cookie(Name) ->
+ case call(Name, get_checkpoint) of
+ {error, Reason} ->
+ {error, Reason};
+ Cp ->
+ Tabs = Cp#checkpoint_args.min ++ Cp#checkpoint_args.max,
+ Cookie = Cp#checkpoint_args.cookie,
+ {ok, Tabs, Cookie}
+ end.
+
+most_local_node(Name, Tab) ->
+ case ?catch_val({Tab, {retainer, Name}}) of
+ {'EXIT', _} ->
+ {error, {"No retainer attached to table", [Tab, Name]}};
+ R ->
+ Writers = R#retainer.writers,
+ LocalWriter = lists:member(node(), Writers),
+ if
+ LocalWriter == true ->
+ {ok, node()};
+ Writers /= [] ->
+ {ok, hd(Writers)};
+ true ->
+ {error, {"No retainer attached to table", [Tab, Name]}}
+ end
+ end.
+
+really_retain(Name, Tab) ->
+ R = val({Tab, {retainer, Name}}),
+ R#retainer.really_retain.
+
+%% Activate a checkpoint.
+%%
+%% A checkpoint is a transaction consistent state that may be used to
+%% perform a distributed backup or to rollback the involved tables to
+%% their old state. Backups may also be used to restore tables to
+%% their old state. Args is a list of the following tuples:
+%%
+%% {name, Name}
+%% Name of checkpoint. Each checkpoint must have a name which
+%% is unique on the reachable nodes. The name may be reused when
+%% the checkpoint has been deactivated.
+%% By default a probably unique name is generated.
+%% Multiple checkpoints may be set on the same table.
+%%
+%% {allow_remote, Bool}
+%% false means that all retainers must be local. If the
+%% table does not reside locally, the checkpoint fails.
+%% true allows retainers on other nodes.
+%%
+%% {min, MinTabs}
+%% Minimize redundancy and only keep checkpoint info together with
+%% one replica, preferrably at the local node. If any node involved
+%% the checkpoint goes down, the checkpoint is deactivated.
+%%
+%% {max, MaxTabs}
+%% Maximize redundancy and keep checkpoint info together with all
+%% replicas. The checkpoint becomes more fault tolerant if the
+%% tables has several replicas. When new replicas are added, they
+%% will also get a retainer attached to them.
+%%
+%% {ram_overrides_dump, Bool}
+%% {ram_overrides_dump, Tabs}
+%% Only applicable for ram_copies. Bool controls which versions of
+%% the records that should be included in the checkpoint state.
+%% true means that the latest comitted records in ram (i.e. the
+%% records that the application accesses) should be included
+%% in the checkpoint. false means that the records dumped to
+%% dat-files (the records that will be loaded at startup) should
+%% be included in the checkpoint. Tabs is a list of tables.
+%% Default is false.
+%%
+%% {ignore_new, TidList}
+%% Normally we wait for all pending transactions to complete
+%% before we allow iteration over the checkpoint. But in order
+%% to cope with checkpoint activation inside a transaction that
+%% currently prepares commit (mnesia_init:get_net_work_copy) we
+%% need to have the ability to ignore the enclosing transaction.
+%% We do not wait for the transactions in TidList to end. The
+%% transactions in TidList are regarded as newer than the checkpoint.
+
+activate(Args) ->
+ case args2cp(Args) of
+ {ok, Cp} ->
+ do_activate(Cp);
+ {error, Reason} ->
+ {error, Reason}
+ end.
+
+args2cp(Args) when is_list(Args)->
+ case catch lists:foldl(fun check_arg/2, #checkpoint_args{}, Args) of
+ {'EXIT', Reason} ->
+ {error, Reason};
+ Cp ->
+ case check_tables(Cp) of
+ {error, Reason} ->
+ {error, Reason};
+ {ok, Overriders, AllTabs} ->
+ arrange_retainers(Cp, Overriders, AllTabs)
+ end
+ end;
+args2cp(Args) ->
+ {error, {badarg, Args}}.
+
+check_arg({name, Name}, Cp) ->
+ case lists:member(Name, checkpoints()) of
+ true ->
+ exit({already_exists, Name});
+ false ->
+ case catch tab2retainer({foo, Name}) of
+ List when is_list(List) ->
+ Cp#checkpoint_args{name = Name};
+ _ ->
+ exit({badarg, Name})
+ end
+ end;
+check_arg({allow_remote, true}, Cp) ->
+ Cp#checkpoint_args{allow_remote = true};
+check_arg({allow_remote, false}, Cp) ->
+ Cp#checkpoint_args{allow_remote = false};
+check_arg({ram_overrides_dump, true}, Cp) ->
+ Cp#checkpoint_args{ram_overrides_dump = true};
+check_arg({ram_overrides_dump, false}, Cp) ->
+ Cp#checkpoint_args{ram_overrides_dump = false};
+check_arg({ram_overrides_dump, Tabs}, Cp) when is_list(Tabs) ->
+ Cp#checkpoint_args{ram_overrides_dump = Tabs};
+check_arg({min, Tabs}, Cp) when is_list(Tabs) ->
+ Cp#checkpoint_args{min = Tabs};
+check_arg({max, Tabs}, Cp) when is_list(Tabs) ->
+ Cp#checkpoint_args{max = Tabs};
+check_arg({ignore_new, Tids}, Cp) when is_list(Tids) ->
+ Cp#checkpoint_args{ignore_new = Tids};
+check_arg(Arg, _) ->
+ exit({badarg, Arg}).
+
+check_tables(Cp) ->
+ Min = Cp#checkpoint_args.min,
+ Max = Cp#checkpoint_args.max,
+ AllTabs = Min ++ Max,
+ DoubleTabs = [T || T <- Min, lists:member(T, Max)],
+ Overriders = Cp#checkpoint_args.ram_overrides_dump,
+ if
+ DoubleTabs /= [] ->
+ {error, {combine_error, Cp#checkpoint_args.name,
+ [{min, DoubleTabs}, {max, DoubleTabs}]}};
+ Min == [], Max == [] ->
+ {error, {combine_error, Cp#checkpoint_args.name,
+ [{min, Min}, {max, Max}]}};
+ Overriders == false ->
+ {ok, [], AllTabs};
+ Overriders == true ->
+ {ok, AllTabs, AllTabs};
+ is_list(Overriders) ->
+ case [T || T <- Overriders, not lists:member(T, Min)] of
+ [] ->
+ case [T || T <- Overriders, not lists:member(T, Max)] of
+ [] ->
+ {ok, Overriders, AllTabs};
+ Outsiders ->
+ {error, {combine_error, Cp#checkpoint_args.name,
+ [{ram_overrides_dump, Outsiders},
+ {max, Outsiders}]}}
+ end;
+ Outsiders ->
+ {error, {combine_error, Cp#checkpoint_args.name,
+ [{ram_overrides_dump, Outsiders},
+ {min, Outsiders}]}}
+ end
+ end.
+
+arrange_retainers(Cp, Overriders, AllTabs) ->
+ R = #retainer{cp_name = Cp#checkpoint_args.name},
+ case catch [R#retainer{tab_name = Tab,
+ writers = select_writers(Cp, Tab)}
+ || Tab <- AllTabs] of
+ {'EXIT', Reason} ->
+ {error, Reason};
+ Retainers ->
+ {ok, Cp#checkpoint_args{ram_overrides_dump = Overriders,
+ retainers = Retainers,
+ nodes = writers(Retainers)}}
+ end.
+
+select_writers(Cp, Tab) ->
+ case filter_remote(Cp, val({Tab, active_replicas})) of
+ [] ->
+ exit({"Cannot prepare checkpoint (replica not available)",
+ [Tab, Cp#checkpoint_args.name]});
+ Writers ->
+ This = node(),
+ case {lists:member(Tab, Cp#checkpoint_args.max),
+ lists:member(This, Writers)} of
+ {true, _} -> Writers; % Max
+ {false, true} -> [This];
+ {false, false} -> [hd(Writers)]
+ end
+ end.
+
+filter_remote(Cp, Writers) when Cp#checkpoint_args.allow_remote == true ->
+ Writers;
+filter_remote(_Cp, Writers) ->
+ This = node(),
+ case lists:member(This, Writers) of
+ true -> [This];
+ false -> []
+ end.
+
+writers(Retainers) ->
+ Fun = fun(R, Acc) -> R#retainer.writers ++ Acc end,
+ Writers = lists:foldl(Fun, [], Retainers),
+ mnesia_lib:uniq(Writers).
+
+do_activate(Cp) ->
+ Name = Cp#checkpoint_args.name,
+ Nodes = Cp#checkpoint_args.nodes,
+ case mnesia_tm:prepare_checkpoint(Nodes, Cp) of
+ {Replies, []} ->
+ check_prep(Replies, Name, Nodes, Cp#checkpoint_args.ignore_new);
+ {_, BadNodes} ->
+ {error, {"Cannot prepare checkpoint (bad nodes)",
+ [Name, BadNodes]}}
+ end.
+
+check_prep([{ok, Name, IgnoreNew, _Node} | Replies], Name, Nodes, IgnoreNew) ->
+ check_prep(Replies, Name, Nodes, IgnoreNew);
+check_prep([{error, Reason} | _Replies], Name, _Nodes, _IgnoreNew) ->
+ {error, {"Cannot prepare checkpoint (bad reply)",
+ [Name, Reason]}};
+check_prep([{badrpc, Reason} | _Replies], Name, _Nodes, _IgnoreNew) ->
+ {error, {"Cannot prepare checkpoint (badrpc)",
+ [Name, Reason]}};
+check_prep([], Name, Nodes, IgnoreNew) ->
+ collect_pending(Name, Nodes, IgnoreNew).
+
+collect_pending(Name, Nodes, IgnoreNew) ->
+ case rpc:multicall(Nodes, ?MODULE, call, [Name, collect_pending]) of
+ {Replies, []} ->
+ case catch ?ets_new_table(mnesia_union, [bag]) of
+ {'EXIT', Reason} -> %% system limit
+ Msg = "Cannot create an ets table pending union",
+ {error, {system_limit, Msg, Reason}};
+ UnionTab ->
+ compute_union(Replies, Nodes, Name, UnionTab, IgnoreNew)
+ end;
+ {_, BadNodes} ->
+ deactivate(Nodes, Name),
+ {error, {"Cannot collect from pending checkpoint", Name, BadNodes}}
+ end.
+
+compute_union([{ok, Pending} | Replies], Nodes, Name, UnionTab, IgnoreNew) ->
+ add_pending(Pending, UnionTab),
+ compute_union(Replies, Nodes, Name, UnionTab, IgnoreNew);
+compute_union([{error, Reason} | _Replies], Nodes, Name, UnionTab, _IgnoreNew) ->
+ deactivate(Nodes, Name),
+ ?ets_delete_table(UnionTab),
+ {error, Reason};
+compute_union([{badrpc, Reason} | _Replies], Nodes, Name, UnionTab, _IgnoreNew) ->
+ deactivate(Nodes, Name),
+ ?ets_delete_table(UnionTab),
+ {error, {badrpc, Reason}};
+compute_union([], Nodes, Name, UnionTab, IgnoreNew) ->
+ send_activate(Nodes, Nodes, Name, UnionTab, IgnoreNew).
+
+add_pending([P | Pending], UnionTab) ->
+ add_pending_node(P#pending.disc_nodes, P#pending.tid, UnionTab),
+ add_pending_node(P#pending.ram_nodes, P#pending.tid, UnionTab),
+ add_pending(Pending, UnionTab);
+add_pending([], _UnionTab) ->
+ ok.
+
+add_pending_node([Node | Nodes], Tid, UnionTab) ->
+ ?ets_insert(UnionTab, {Node, Tid}),
+ add_pending_node(Nodes, Tid, UnionTab);
+add_pending_node([], _Tid, _UnionTab) ->
+ ok.
+
+send_activate([Node | Nodes], AllNodes, Name, UnionTab, IgnoreNew) ->
+ Pending = [Tid || {_, Tid} <- ?ets_lookup(UnionTab, Node),
+ not lists:member(Tid, IgnoreNew)],
+ case rpc:call(Node, ?MODULE, call, [Name, {activate, Pending}]) of
+ activated ->
+ send_activate(Nodes, AllNodes, Name, UnionTab, IgnoreNew);
+ {badrpc, Reason} ->
+ deactivate(Nodes, Name),
+ ?ets_delete_table(UnionTab),
+ {error, {"Activation failed (bad node)", Name, Node, Reason}};
+ {error, Reason} ->
+ deactivate(Nodes, Name),
+ ?ets_delete_table(UnionTab),
+ {error, {"Activation failed", Name, Node, Reason}}
+ end;
+send_activate([], AllNodes, Name, UnionTab, _IgnoreNew) ->
+ ?ets_delete_table(UnionTab),
+ {ok, Name, AllNodes}.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Checkpoint server
+
+cast(Name, Msg) ->
+ case ?catch_val({checkpoint, Name}) of
+ {'EXIT', _} ->
+ {error, {no_exists, Name}};
+
+ Pid when is_pid(Pid) ->
+ Pid ! {self(), Msg},
+ {ok, Pid}
+ end.
+
+call(Name, Msg) ->
+ case ?catch_val({checkpoint, Name}) of
+ {'EXIT', _} ->
+ {error, {no_exists, Name}};
+
+ Pid when is_pid(Pid) ->
+ Monitor = erlang:monitor(process, Pid), %catch link(Pid), % Always local
+ Pid ! {self(), Msg},
+ Self = self(),
+ receive
+ {'EXIT', Pid, Reason} ->
+ {error, {"Got exit", [Name, Reason]}};
+ {'DOWN', Monitor, _, Pid, Reason} ->
+ {error, {"Got exit", [Name, Reason]}};
+ {Name, Self, Reply} ->
+ erlang:demonitor(Monitor),
+ Reply
+ end;
+ Error ->
+ Error
+ end.
+
+abcast(Nodes, Name, Msg) ->
+ rpc:eval_everywhere(Nodes, ?MODULE, cast, [Name, Msg]).
+
+reply(nopid, _Name, _Reply) ->
+ ignore;
+reply(ReplyTo, Name, Reply) ->
+ ReplyTo ! {Name, ReplyTo, Reply}.
+
+%% Returns {ok, NewCp} or {error, Reason}
+start_retainer(Cp) ->
+ % Will never be restarted
+ Name = Cp#checkpoint_args.name,
+ case supervisor:start_child(mnesia_checkpoint_sup, [Cp]) of
+ {ok, _Pid} ->
+ {ok, Name, Cp#checkpoint_args.ignore_new, node()};
+ {error, Reason} ->
+ {error, {"Cannot create checkpoint retainer",
+ Name, node(), Reason}}
+ end.
+
+start(Cp) ->
+ Name = Cp#checkpoint_args.name,
+ Args = [Cp#checkpoint_args{supervisor = self()}],
+ mnesia_monitor:start_proc({?MODULE, Name}, ?MODULE, init, Args).
+
+init(Cp) ->
+ process_flag(trap_exit, true),
+ process_flag(priority, high), %% Needed dets files might starve the system
+ Name = Cp#checkpoint_args.name,
+ Props = [set, public, {keypos, 2}],
+ case catch ?ets_new_table(mnesia_pending_checkpoint, Props) of
+ {'EXIT', Reason} -> %% system limit
+ Msg = "Cannot create an ets table for pending transactions",
+ Error = {error, {system_limit, Name, Msg, Reason}},
+ proc_lib:init_ack(Cp#checkpoint_args.supervisor, Error);
+ PendingTab ->
+ Rs = [prepare_tab(Cp, R) || R <- Cp#checkpoint_args.retainers],
+ Cp2 = Cp#checkpoint_args{retainers = Rs,
+ pid = self(),
+ pending_tab = PendingTab},
+ add(pending_checkpoint_pids, self()),
+ add(pending_checkpoints, PendingTab),
+ set({checkpoint, Name}, self()),
+ add(checkpoints, Name),
+ dbg_out("Checkpoint ~p (~p) started~n", [Name, self()]),
+ proc_lib:init_ack(Cp2#checkpoint_args.supervisor, {ok, self()}),
+ retainer_loop(Cp2)
+ end.
+
+prepare_tab(Cp, R) ->
+ Tab = R#retainer.tab_name,
+ prepare_tab(Cp, R, val({Tab, storage_type})).
+
+prepare_tab(Cp, R, Storage) ->
+ Tab = R#retainer.tab_name,
+ Name = R#retainer.cp_name,
+ case lists:member(node(), R#retainer.writers) of
+ true ->
+ R2 = retainer_create(Cp, R, Tab, Name, Storage),
+ set({Tab, {retainer, Name}}, R2),
+ %% Keep checkpoint info for table_info & mnesia_session
+ add({Tab, checkpoints}, Name),
+ add_chkp_info(Tab, Name),
+ R2;
+ false ->
+ set({Tab, {retainer, Name}}, R#retainer{store = undefined}),
+ R
+ end.
+
+add_chkp_info(Tab, Name) ->
+ case val({Tab, commit_work}) of
+ [{checkpoints, OldList} | CommitList] ->
+ case lists:member(Name, OldList) of
+ true ->
+ ok;
+ false ->
+ NewC = [{checkpoints, [Name | OldList]} | CommitList],
+ mnesia_lib:set({Tab, commit_work}, NewC)
+ end;
+ CommitList ->
+ Chkp = {checkpoints, [Name]},
+ %% OBS checkpoints needs to be first in the list!
+ mnesia_lib:set({Tab, commit_work}, [Chkp | CommitList])
+ end.
+
+tab2retainer({Tab, Name}) ->
+ FlatName = lists:flatten(io_lib:write(Name)),
+ mnesia_lib:dir(lists:concat([?MODULE, "_", Tab, "_", FlatName, ".RET"])).
+
+retainer_create(_Cp, R, Tab, Name, disc_only_copies) ->
+ Fname = tab2retainer({Tab, Name}),
+ file:delete(Fname),
+ Args = [{file, Fname}, {type, set}, {keypos, 2}, {repair, false}],
+ {ok, _} = mnesia_lib:dets_sync_open({Tab, Name}, Args),
+ dbg_out("Checkpoint retainer created ~p ~p~n", [Name, Tab]),
+ R#retainer{store = {dets, {Tab, Name}}, really_retain = true};
+retainer_create(Cp, R, Tab, Name, Storage) ->
+ T = ?ets_new_table(mnesia_retainer, [set, public, {keypos, 2}]),
+ Overriders = Cp#checkpoint_args.ram_overrides_dump,
+ ReallyR = R#retainer.really_retain,
+ ReallyCp = lists:member(Tab, Overriders),
+ ReallyR2 = prepare_ram_tab(Tab, T, Storage, ReallyR, ReallyCp),
+ dbg_out("Checkpoint retainer created ~p ~p~n", [Name, Tab]),
+ R#retainer{store = {ets, T}, really_retain = ReallyR2}.
+
+%% Copy the dumped table into retainer if needed
+%% If the really_retain flag already has been set to false,
+%% it should remain false even if we change storage type
+%% while the checkpoint is activated.
+prepare_ram_tab(Tab, T, ram_copies, true, false) ->
+ Fname = mnesia_lib:tab2dcd(Tab),
+ case mnesia_lib:exists(Fname) of
+ true ->
+ Log = mnesia_log:open_log(prepare_ram_tab,
+ mnesia_log:dcd_log_header(),
+ Fname, true,
+ mnesia_monitor:get_env(auto_repair),
+ read_only),
+ Add = fun(Rec) ->
+ Key = element(2, Rec),
+ Recs =
+ case ?ets_lookup(T, Key) of
+ [] -> [];
+ [{_, _, Old}] -> Old
+ end,
+ ?ets_insert(T, {Tab, Key, [Rec | Recs]}),
+ continue
+ end,
+ traverse_dcd(mnesia_log:chunk_log(Log, start), Log, Add),
+ mnesia_log:close_log(Log);
+ false ->
+ ok
+ end,
+ false;
+prepare_ram_tab(_, _, _, ReallyRetain, _) ->
+ ReallyRetain.
+
+traverse_dcd({Cont, [LogH | Rest]}, Log, Fun)
+ when is_record(LogH, log_header),
+ LogH#log_header.log_kind == dcd_log,
+ LogH#log_header.log_version >= "1.0" ->
+ traverse_dcd({Cont, Rest}, Log, Fun); %% BUGBUG Error handling repaired files
+traverse_dcd({Cont, Recs}, Log, Fun) -> %% trashed data??
+ lists:foreach(Fun, Recs),
+ traverse_dcd(mnesia_log:chunk_log(Log, Cont), Log, Fun);
+traverse_dcd(eof, _Log, _Fun) ->
+ ok.
+
+retainer_get({ets, Store}, Key) -> ?ets_lookup(Store, Key);
+retainer_get({dets, Store}, Key) -> dets:lookup(Store, Key).
+
+retainer_put({ets, Store}, Val) -> ?ets_insert(Store, Val);
+retainer_put({dets, Store}, Val) -> dets:insert(Store, Val).
+
+retainer_first({ets, Store}) -> ?ets_first(Store);
+retainer_first({dets, Store}) -> dets:first(Store).
+
+retainer_next({ets, Store}, Key) -> ?ets_next(Store, Key);
+retainer_next({dets, Store}, Key) -> dets:next(Store, Key).
+
+%% retainer_next_slot(Tab, Pos) ->
+%% case retainer_slot(Tab, Pos) of
+%% '$end_of_table' ->
+%% '$end_of_table';
+%% [] ->
+%% retainer_next_slot(Tab, Pos + 1);
+%% Recs when is_list(Recs) ->
+%% {Pos, Recs}
+%% end.
+%%
+%% retainer_slot({ets, Store}, Pos) -> ?ets_next(Store, Pos);
+%% retainer_slot({dets, Store}, Pos) -> dets:slot(Store, Pos).
+
+retainer_fixtable(Tab, Bool) when is_atom(Tab) ->
+ mnesia_lib:db_fixtable(val({Tab, storage_type}), Tab, Bool);
+retainer_fixtable({ets, Tab}, Bool) ->
+ mnesia_lib:db_fixtable(ram_copies, Tab, Bool);
+retainer_fixtable({dets, Tab}, Bool) ->
+ mnesia_lib:db_fixtable(disc_only_copies, Tab, Bool).
+
+retainer_delete({ets, Store}) ->
+ ?ets_delete_table(Store);
+retainer_delete({dets, Store}) ->
+ mnesia_lib:dets_sync_close(Store),
+ Fname = tab2retainer(Store),
+ file:delete(Fname).
+
+retainer_loop(Cp) ->
+ Name = Cp#checkpoint_args.name,
+ receive
+ {_From, {retain, Tid, Tab, Key, OldRecs}}
+ when Cp#checkpoint_args.wait_for_old == [] ->
+ R = val({Tab, {retainer, Name}}),
+ PendingTab = Cp#checkpoint_args.pending_tab,
+ case R#retainer.really_retain of
+ true when PendingTab =:= undefined ->
+ Store = R#retainer.store,
+ case retainer_get(Store, Key) of
+ [] -> retainer_put(Store, {Tab, Key, OldRecs});
+ _ -> already_retained
+ end;
+ true ->
+ case ets:member(PendingTab, Tid) of
+ true -> ignore;
+ false ->
+ Store = R#retainer.store,
+ case retainer_get(Store, Key) of
+ [] -> retainer_put(Store, {Tab, Key, OldRecs});
+ _ -> already_retained
+ end
+ end;
+ false ->
+ ignore
+ end,
+ retainer_loop(Cp);
+
+ %% Adm
+ {From, deactivate} ->
+ do_stop(Cp),
+ reply(From, Name, deactivated),
+ unlink(From),
+ exit(shutdown);
+
+ {'EXIT', Parent, _} when Parent == Cp#checkpoint_args.supervisor ->
+ %% do_stop(Cp),
+ %% assume that entire Mnesia is terminating
+ exit(shutdown);
+
+ {_From, {mnesia_down, Node}} ->
+ Cp2 = do_del_retainers(Cp, Node),
+ retainer_loop(Cp2);
+ {From, get_checkpoint} ->
+ reply(From, Name, Cp),
+ retainer_loop(Cp);
+ {From, {add_copy, Tab, Node}} when Cp#checkpoint_args.wait_for_old == [] ->
+ {Res, Cp2} = do_add_copy(Cp, Tab, Node),
+ reply(From, Name, Res),
+ retainer_loop(Cp2);
+ {From, {del_copy, Tab, Node}} when Cp#checkpoint_args.wait_for_old == [] ->
+ Cp2 = do_del_copy(Cp, Tab, Node),
+ reply(From, Name, ok),
+ retainer_loop(Cp2);
+ {From, {change_copy, Tab, From, To}} when Cp#checkpoint_args.wait_for_old == [] ->
+ Cp2 = do_change_copy(Cp, Tab, From, To),
+ reply(From, Name, ok),
+ retainer_loop(Cp2);
+ {_From, {add_retainer, R, Node}} ->
+ Cp2 = do_add_retainer(Cp, R, Node),
+ retainer_loop(Cp2);
+ {_From, {del_retainer, R, Node}} when Cp#checkpoint_args.wait_for_old == [] ->
+ Cp2 = do_del_retainer(Cp, R, Node),
+ retainer_loop(Cp2);
+
+ %% Iteration
+ {From, {iter_begin, Iter}} when Cp#checkpoint_args.wait_for_old == [] ->
+ Cp2 = iter_begin(Cp, From, Iter),
+ retainer_loop(Cp2);
+
+ {From, {iter_end, Iter}} when Cp#checkpoint_args.wait_for_old == [] ->
+ retainer_fixtable(Iter#iter.oid_tab, false),
+ Iters = Cp#checkpoint_args.iterators -- [Iter],
+ reply(From, Name, ok),
+ retainer_loop(Cp#checkpoint_args{iterators = Iters});
+
+ {_From, {exit_pending, Tid}}
+ when is_list(Cp#checkpoint_args.wait_for_old) ->
+ StillPending = lists:delete(Tid, Cp#checkpoint_args.wait_for_old),
+ Cp2 = Cp#checkpoint_args{wait_for_old = StillPending},
+ Cp3 = maybe_activate(Cp2),
+ retainer_loop(Cp3);
+
+ {From, collect_pending} ->
+ PendingTab = Cp#checkpoint_args.pending_tab,
+ del(pending_checkpoints, PendingTab),
+ Pending = ?ets_match_object(PendingTab, '_'),
+ reply(From, Name, {ok, Pending}),
+ retainer_loop(Cp);
+
+ {From, {activate, Pending}} ->
+ StillPending = mnesia_recover:still_pending(Pending),
+ enter_still_pending(StillPending, Cp#checkpoint_args.pending_tab),
+ Cp2 = maybe_activate(Cp#checkpoint_args{wait_for_old = StillPending}),
+ reply(From, Name, activated),
+ retainer_loop(Cp2);
+
+ {'EXIT', From, _Reason} ->
+ Iters = [Iter || Iter <- Cp#checkpoint_args.iterators,
+ check_iter(From, Iter)],
+ retainer_loop(Cp#checkpoint_args{iterators = Iters});
+
+ {system, From, Msg} ->
+ dbg_out("~p got {system, ~p, ~p}~n", [?MODULE, From, Msg]),
+ sys:handle_system_msg(Msg, From, no_parent, ?MODULE, [], Cp)
+ end.
+
+maybe_activate(Cp)
+ when Cp#checkpoint_args.wait_for_old == [],
+ Cp#checkpoint_args.is_activated == false ->
+ Cp#checkpoint_args{pending_tab = undefined, is_activated = true};
+maybe_activate(Cp) ->
+ Cp.
+
+iter_begin(Cp, From, Iter) ->
+ Name = Cp#checkpoint_args.name,
+ R = val({Iter#iter.tab_name, {retainer, Name}}),
+ Iter2 = init_tabs(R, Iter),
+ Iter3 = Iter2#iter{pid = From},
+ retainer_fixtable(Iter3#iter.oid_tab, true),
+ Iters = [Iter3 | Cp#checkpoint_args.iterators],
+ reply(From, Name, {ok, Iter3, self()}),
+ Cp#checkpoint_args{iterators = Iters}.
+
+do_stop(Cp) ->
+ Name = Cp#checkpoint_args.name,
+ del(pending_checkpoints, Cp#checkpoint_args.pending_tab),
+ del(pending_checkpoint_pids, self()),
+ del(checkpoints, Name),
+ unset({checkpoint, Name}),
+ lists:foreach(fun deactivate_tab/1, Cp#checkpoint_args.retainers),
+ Iters = Cp#checkpoint_args.iterators,
+ lists:foreach(fun(I) -> retainer_fixtable(I#iter.oid_tab, false) end, Iters).
+
+deactivate_tab(R) ->
+ Name = R#retainer.cp_name,
+ Tab = R#retainer.tab_name,
+ try
+ Active = lists:member(node(), R#retainer.writers),
+ case R#retainer.store of
+ undefined ->
+ ignore;
+ Store when Active == true ->
+ retainer_delete(Store);
+ _ ->
+ ignore
+ end,
+ unset({Tab, {retainer, Name}}),
+ del({Tab, checkpoints}, Name), %% Keep checkpoint info for table_info & mnesia_session
+ del_chkp_info(Tab, Name)
+ catch _:_ -> ignore
+ end.
+
+del_chkp_info(Tab, Name) ->
+ case val({Tab, commit_work}) of
+ [{checkpoints, ChkList} | Rest] ->
+ case lists:delete(Name, ChkList) of
+ [] ->
+ %% The only checkpoint was deleted
+ mnesia_lib:set({Tab, commit_work}, Rest);
+ NewList ->
+ mnesia_lib:set({Tab, commit_work},
+ [{checkpoints, NewList} | Rest])
+ end;
+ _ -> ignore
+ end.
+
+do_del_retainers(Cp, Node) ->
+ Rs = [do_del_retainer2(Cp, R, Node) || R <- Cp#checkpoint_args.retainers],
+ Cp#checkpoint_args{retainers = Rs, nodes = writers(Rs)}.
+
+do_del_retainer2(Cp, R, Node) ->
+ Writers = R#retainer.writers -- [Node],
+ R2 = R#retainer{writers = Writers},
+ set({R2#retainer.tab_name, {retainer, R2#retainer.cp_name}}, R2),
+ if
+ Writers == [] ->
+ Event = {mnesia_checkpoint_deactivated, Cp#checkpoint_args.name},
+ mnesia_lib:report_system_event(Event),
+ do_stop(Cp),
+ exit(shutdown);
+ Node == node() ->
+ deactivate_tab(R), % Avoids unnecessary tm_retain accesses
+ set({R2#retainer.tab_name, {retainer, R2#retainer.cp_name}}, R2),
+ R2;
+ true ->
+ R2
+ end.
+
+do_del_retainer(Cp, R0, Node) ->
+ {R, Rest} = find_retainer(R0, Cp#checkpoint_args.retainers, []),
+ R2 = do_del_retainer2(Cp, R, Node),
+ Rs = [R2|Rest],
+ Cp#checkpoint_args{retainers = Rs, nodes = writers(Rs)}.
+
+do_del_copy(Cp, Tab, ThisNode) when ThisNode == node() ->
+ Name = Cp#checkpoint_args.name,
+ Others = Cp#checkpoint_args.nodes -- [ThisNode],
+ R = val({Tab, {retainer, Name}}),
+ abcast(Others, Name, {del_retainer, R, ThisNode}),
+ do_del_retainer(Cp, R, ThisNode).
+
+do_add_copy(Cp, Tab, Node) when Node /= node()->
+ case lists:member(Tab, Cp#checkpoint_args.max) of
+ false ->
+ {ok, Cp};
+ true ->
+ Name = Cp#checkpoint_args.name,
+ R0 = val({Tab, {retainer, Name}}),
+ W = R0#retainer.writers,
+ R = R0#retainer{writers = W ++ [Node]},
+
+ case lists:member(Node, Cp#checkpoint_args.nodes) of
+ true ->
+ send_retainer(Cp, R, Node);
+ false ->
+ case tm_remote_prepare(Node, Cp) of
+ {ok, Name, _IgnoreNew, Node} ->
+ case lists:member(schema, Cp#checkpoint_args.max) of
+ true ->
+ %% We need to send schema retainer somewhere
+ RS0 = val({schema, {retainer, Name}}),
+ WS = RS0#retainer.writers,
+ RS1 = RS0#retainer{writers = WS ++ [Node]},
+ {ok, Cp1} = send_retainer(Cp, RS1, Node),
+ send_retainer(Cp1, R, Node);
+ false ->
+ send_retainer(Cp, R, Node)
+ end;
+ {badrpc, Reason} ->
+ {{error, {badrpc, Reason}}, Cp};
+ {error, Reason} ->
+ {{error, Reason}, Cp}
+ end
+ end
+ end.
+
+tm_remote_prepare(Node, Cp) ->
+ rpc:call(Node, ?MODULE, tm_prepare, [Cp]).
+
+do_add_retainer(Cp, R0, Node) ->
+ Writers = R0#retainer.writers,
+ {R, Rest} = find_retainer(R0, Cp#checkpoint_args.retainers, []),
+ NewRet =
+ if
+ Node == node() ->
+ prepare_tab(Cp, R#retainer{writers = Writers});
+ true ->
+ R#retainer{writers = Writers}
+ end,
+ Rs = [NewRet | Rest],
+ set({NewRet#retainer.tab_name, {retainer, NewRet#retainer.cp_name}}, NewRet),
+ Cp#checkpoint_args{retainers = Rs, nodes = writers(Rs)}.
+
+find_retainer(#retainer{cp_name = CP, tab_name = Tab},
+ [Ret = #retainer{cp_name = CP, tab_name = Tab} | R], Acc) ->
+ {Ret, R ++ Acc};
+find_retainer(Ret, [H|R], Acc) ->
+ find_retainer(Ret, R, [H|Acc]).
+
+send_retainer(Cp, R, Node) ->
+ Name = Cp#checkpoint_args.name,
+ Nodes0 = Cp#checkpoint_args.nodes -- [Node],
+ Nodes = Nodes0 -- [node()],
+ Msg = {add_retainer, R, Node},
+ abcast(Nodes, Name, Msg),
+ {ok, _} = rpc:call(Node, ?MODULE, cast, [Name, Msg]),
+ Store = R#retainer.store,
+ send_retainer2(Node, Name, Store, retainer_first(Store)),
+ Cp2 = do_add_retainer(Cp, R, Node),
+ {ok, Cp2}.
+
+send_retainer2(_, _, _, '$end_of_table') ->
+ ok;
+%%send_retainer2(Node, Name, Store, {Slot, Records}) ->
+send_retainer2(Node, Name, Store, Key) ->
+ [{Tab, _, Records}] = retainer_get(Store, Key),
+ abcast([Node], Name, {retain, {dirty, send_retainer}, Tab, Key, Records}),
+ send_retainer2(Node, Name, Store, retainer_next(Store, Key)).
+
+do_change_copy(Cp, Tab, FromType, ToType) ->
+ Name = Cp#checkpoint_args.name,
+ R = val({Tab, {retainer, Name}}),
+ R2 = prepare_tab(Cp, R, ToType),
+ {_, Old} = R#retainer.store,
+ {_, New} = R2#retainer.store,
+
+ Fname = tab2retainer({Tab, Name}),
+ if
+ FromType == disc_only_copies ->
+ mnesia_lib:dets_sync_close(Old),
+ loaded = mnesia_lib:dets_to_ets(Old, New, Fname, set, no, yes),
+ ok = file:delete(Fname);
+ ToType == disc_only_copies ->
+ TabSize = ?ets_info(Old, size),
+ Props = [{file, Fname},
+ {type, set},
+ {keypos, 2},
+%% {ram_file, true},
+ {estimated_no_objects, TabSize + 256},
+ {repair, false}],
+ {ok, _} = mnesia_lib:dets_sync_open(New, Props),
+ ok = mnesia_dumper:raw_dump_table(New, Old),
+ ?ets_delete_table(Old);
+ true ->
+ ignore
+ end,
+ Pos = #retainer.tab_name,
+ Rs = lists:keyreplace(Tab, Pos, Cp#checkpoint_args.retainers, R2),
+ Cp#checkpoint_args{retainers = Rs, nodes = writers(Rs)}.
+
+check_iter(From, Iter) when Iter#iter.pid == From ->
+ retainer_fixtable(Iter#iter.oid_tab, false),
+ false;
+check_iter(_From, _Iter) ->
+ true.
+
+init_tabs(R, Iter) ->
+ {Kind, _} = Store = R#retainer.store,
+ Main = {Kind, Iter#iter.tab_name},
+ Ret = Store,
+ Iter2 = Iter#iter{main_tab = Main, retainer_tab = Ret},
+ case Iter#iter.source of
+ table -> Iter2#iter{oid_tab = Main};
+ retainer -> Iter2#iter{oid_tab = Ret}
+ end.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Iteration
+%%
+%% Iterates over a table and applies Fun(ListOfRecords)
+%% with a suitable amount of records, e.g. 1000 or so.
+%% ListOfRecords is [] when the iteration is over.
+%%
+%% OidKind affects which internal table to be iterated over and
+%% ValKind affects which table to pick the actual records from. Legal
+%% values for OidKind and ValKind is the atom table or the atom
+%% retainer.
+%%
+%% The iteration may either be performed over the main table (which
+%% contains the latest values of the records, i.e. the values that
+%% are visible to the applications) or over the checkpoint retainer
+%% (which contains the values as the looked like the timepoint when
+%% the checkpoint was activated).
+%%
+%% It is possible to iterate over the main table and pick values
+%% from the retainer and vice versa.
+
+iterate(Name, Tab, Fun, Acc, Source, Val) ->
+ Iter0 = #iter{tab_name = Tab, source = Source, val = Val},
+ case call(Name, {iter_begin, Iter0}) of
+ {error, Reason} ->
+ {error, Reason};
+ {ok, Iter, Pid} ->
+ link(Pid), % We don't want any pending fixtable's
+ Res = (catch iter(Fun, Acc, Iter)),
+ unlink(Pid),
+ call(Name, {iter_end, Iter}),
+ case Res of
+ {'EXIT', Reason} -> {error, Reason};
+ {error, Reason} -> {error, Reason};
+ Acc2 -> {ok, Acc2}
+ end
+ end.
+
+iter(Fun, Acc, Iter)->
+ iter(Fun, Acc, Iter, retainer_first(Iter#iter.oid_tab)).
+
+iter(Fun, Acc, Iter, Key) ->
+ case get_records(Iter, Key) of
+ {'$end_of_table', []} ->
+ Fun([], Acc);
+ {'$end_of_table', Records} ->
+ Acc2 = Fun(Records, Acc),
+ Fun([], Acc2);
+ {Next, Records} ->
+ Acc2 = Fun(Records, Acc),
+ iter(Fun, Acc2, Iter, Next)
+ end.
+
+stop_iteration(Reason) ->
+ throw({error, {stopped, Reason}}).
+
+get_records(Iter, Key) ->
+ get_records(Iter, Key, 500, []). % 500 keys
+
+get_records(_Iter, Key, 0, Acc) ->
+ {Key, lists:append(lists:reverse(Acc))};
+get_records(_Iter, '$end_of_table', _I, Acc) ->
+ {'$end_of_table', lists:append(lists:reverse(Acc))};
+get_records(Iter, Key, I, Acc) ->
+ Recs = get_val(Iter, Key),
+ Next = retainer_next(Iter#iter.oid_tab, Key),
+ get_records(Iter, Next, I-1, [Recs | Acc]).
+
+get_val(Iter, Key) when Iter#iter.val == latest ->
+ get_latest_val(Iter, Key);
+get_val(Iter, Key) when Iter#iter.val == checkpoint ->
+ get_checkpoint_val(Iter, Key).
+
+get_latest_val(Iter, Key) when Iter#iter.source == table ->
+ retainer_get(Iter#iter.main_tab, Key);
+get_latest_val(Iter, Key) when Iter#iter.source == retainer ->
+ DeleteOid = {Iter#iter.tab_name, Key},
+ [DeleteOid | retainer_get(Iter#iter.main_tab, Key)].
+
+get_checkpoint_val(Iter, Key) when Iter#iter.source == table ->
+ retainer_get(Iter#iter.main_tab, Key);
+get_checkpoint_val(Iter, Key) when Iter#iter.source == retainer ->
+ DeleteOid = {Iter#iter.tab_name, Key},
+ case retainer_get(Iter#iter.retainer_tab, Key) of
+ [{_, _, []}] -> [DeleteOid];
+ [{_, _, Records}] -> [DeleteOid | Records]
+ end.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% System upgrade
+
+system_continue(_Parent, _Debug, Cp) ->
+ retainer_loop(Cp).
+
+system_terminate(_Reason, _Parent,_Debug, Cp) ->
+ do_stop(Cp).
+
+system_code_change(Cp, _Module, _OldVsn, _Extra) ->
+ {ok, Cp}.
+
+convert_cp_record(Cp) when is_record(Cp, checkpoint) ->
+ ROD =
+ case Cp#checkpoint.ram_overrides_dump of
+ true -> Cp#checkpoint.min ++ Cp#checkpoint.max;
+ false -> []
+ end,
+
+ {ok, #checkpoint_args{name = Cp#checkpoint.name,
+ allow_remote = Cp#checkpoint.name,
+ ram_overrides_dump = ROD,
+ nodes = Cp#checkpoint.nodes,
+ node = Cp#checkpoint.node,
+ now = Cp#checkpoint.now,
+ cookie = ?unique_cookie,
+ min = Cp#checkpoint.min,
+ max = Cp#checkpoint.max,
+ pending_tab = Cp#checkpoint.pending_tab,
+ wait_for_old = Cp#checkpoint.wait_for_old,
+ is_activated = Cp#checkpoint.is_activated,
+ ignore_new = Cp#checkpoint.ignore_new,
+ retainers = Cp#checkpoint.retainers,
+ iterators = Cp#checkpoint.iterators,
+ supervisor = Cp#checkpoint.supervisor,
+ pid = Cp#checkpoint.pid
+ }};
+convert_cp_record(Cp) when is_record(Cp, checkpoint_args) ->
+ AllTabs = Cp#checkpoint_args.min ++ Cp#checkpoint_args.max,
+ ROD = case Cp#checkpoint_args.ram_overrides_dump of
+ [] ->
+ false;
+ AllTabs ->
+ true;
+ _ ->
+ error
+ end,
+ if
+ ROD == error ->
+ {error, {"Old node cannot handle new checkpoint protocol",
+ ram_overrides_dump}};
+ true ->
+ {ok, #checkpoint{name = Cp#checkpoint_args.name,
+ allow_remote = Cp#checkpoint_args.name,
+ ram_overrides_dump = ROD,
+ nodes = Cp#checkpoint_args.nodes,
+ node = Cp#checkpoint_args.node,
+ now = Cp#checkpoint_args.now,
+ min = Cp#checkpoint_args.min,
+ max = Cp#checkpoint_args.max,
+ pending_tab = Cp#checkpoint_args.pending_tab,
+ wait_for_old = Cp#checkpoint_args.wait_for_old,
+ is_activated = Cp#checkpoint_args.is_activated,
+ ignore_new = Cp#checkpoint_args.ignore_new,
+ retainers = Cp#checkpoint_args.retainers,
+ iterators = Cp#checkpoint_args.iterators,
+ supervisor = Cp#checkpoint_args.supervisor,
+ pid = Cp#checkpoint_args.pid
+ }}
+ end.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+val(Var) ->
+ case ?catch_val(Var) of
+ {'EXIT', _ReASoN_} -> mnesia_lib:other_val(Var, _ReASoN_);
+ _VaLuE_ -> _VaLuE_
+ end.
+
diff --git a/lib/mnesia/src/mnesia_checkpoint_sup.erl b/lib/mnesia/src/mnesia_checkpoint_sup.erl
new file mode 100644
index 0000000000..2fe8df52f7
--- /dev/null
+++ b/lib/mnesia/src/mnesia_checkpoint_sup.erl
@@ -0,0 +1,42 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 1997-2009. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%
+
+%%
+-module(mnesia_checkpoint_sup).
+
+-behaviour(supervisor).
+
+-export([start/0, init/1]).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% top supervisor callback functions
+
+start() ->
+ supervisor:start_link({local, ?MODULE}, ?MODULE, []).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% sub supervisor callback functions
+
+init([]) ->
+ Flags = {simple_one_for_one, 0, timer:hours(24)}, % Trust the top supervisor
+ MFA = {mnesia_checkpoint, start, []},
+ Modules = [?MODULE, mnesia_checkpoint, supervisor],
+ KillAfter = mnesia_kernel_sup:supervisor_timeout(timer:seconds(3)),
+ Workers = [{?MODULE, MFA, transient, KillAfter, worker, Modules}],
+ {ok, {Flags, Workers}}.
diff --git a/lib/mnesia/src/mnesia_controller.erl b/lib/mnesia/src/mnesia_controller.erl
new file mode 100644
index 0000000000..9bc480e619
--- /dev/null
+++ b/lib/mnesia/src/mnesia_controller.erl
@@ -0,0 +1,2182 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 1996-2009. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%
+
+%%
+%% The mnesia_init process loads tables from local disc or from
+%% another nodes. It also coordinates updates of the info about
+%% where we can read and write tables.
+%%
+%% Tables may need to be loaded initially at startup of the local
+%% node or when other nodes announces that they already have loaded
+%% tables that we also want.
+%%
+%% Initially we set the load request queue to those tables that we
+%% safely can load locally, i.e. tables where we have the last
+%% consistent replica and we have received mnesia_down from all
+%% other nodes holding the table. Then we let the mnesia_init
+%% process enter its normal working state.
+%%
+%% When we need to load a table we append a request to the load
+%% request queue. All other requests are regarded as high priority
+%% and are processed immediately (e.g. update table whereabouts).
+%% We processes the load request queue as a "background" job..
+
+-module(mnesia_controller).
+
+-behaviour(gen_server).
+
+%% Mnesia internal stuff
+-export([
+ start/0,
+ i_have_tab/1,
+ info/0,
+ get_info/1,
+ get_workers/1,
+ force_load_table/1,
+ async_dump_log/1,
+ sync_dump_log/1,
+ connect_nodes/1,
+ wait_for_schema_commit_lock/0,
+ release_schema_commit_lock/0,
+ create_table/1,
+ get_disc_copy/1,
+ get_cstructs/0,
+ sync_and_block_table_whereabouts/4,
+ sync_del_table_copy_whereabouts/2,
+ block_table/1,
+ unblock_table/1,
+ block_controller/0,
+ unblock_controller/0,
+ unannounce_add_table_copy/2,
+ master_nodes_updated/2,
+ mnesia_down/1,
+ add_active_replica/2,
+ add_active_replica/3,
+ add_active_replica/4,
+ update/1,
+ change_table_access_mode/1,
+ del_active_replica/2,
+ wait_for_tables/2,
+ get_network_copy/2,
+ merge_schema/0,
+ start_remote_sender/4,
+ schedule_late_disc_load/2
+ ]).
+
+%% gen_server callbacks
+-export([init/1,
+ handle_call/3,
+ handle_cast/2,
+ handle_info/2,
+ terminate/2,
+ code_change/3]).
+
+%% Module internal stuff
+-export([call/1,
+ cast/1,
+ dump_and_reply/2,
+ load_and_reply/2,
+ send_and_reply/2,
+ wait_for_tables_init/2,
+ connect_nodes2/2
+ ]).
+
+-import(mnesia_lib, [set/2, add/2]).
+-import(mnesia_lib, [fatal/2, error/2, verbose/2, dbg_out/2]).
+
+-include("mnesia.hrl").
+
+-define(SERVER_NAME, ?MODULE).
+
+-record(state, {supervisor,
+ schema_is_merged = false,
+ early_msgs = [],
+ loader_pid = [], %% Was Pid is now [{Pid,Work}|..]
+ loader_queue, %% Was list is now gb_tree
+ sender_pid = [], %% Was a pid or undef is now [{Pid,Work}|..]
+ sender_queue = [],
+ late_loader_queue, %% Was list is now gb_tree
+ dumper_pid, %% Dumper or schema commit pid
+ dumper_queue = [], %% Dumper or schema commit queue
+ others = [], %% Processes that needs the copier_done msg
+ dump_log_timer_ref,
+ is_stopping = false
+ }).
+%% Backwards Comp. Sender_pid is now a list of senders..
+get_senders(#state{sender_pid = Pids}) when is_list(Pids) -> Pids.
+%% Backwards Comp. loader_pid is now a list of loaders..
+get_loaders(#state{loader_pid = Pids}) when is_list(Pids) -> Pids.
+max_loaders() ->
+ case ?catch_val(no_table_loaders) of
+ {'EXIT', _} ->
+ mnesia_lib:set(no_table_loaders,1),
+ 1;
+ Val -> Val
+ end.
+
+-record(schema_commit_lock, {owner}).
+-record(block_controller, {owner}).
+
+-record(dump_log, {initiated_by,
+ opt_reply_to
+ }).
+
+-record(net_load, {table,
+ reason,
+ opt_reply_to,
+ cstruct = unknown
+ }).
+
+-record(send_table, {table,
+ receiver_pid,
+ remote_storage
+ }).
+
+-record(disc_load, {table,
+ reason,
+ opt_reply_to
+ }).
+
+-record(late_load, {table,
+ reason,
+ opt_reply_to,
+ loaders
+ }).
+
+-record(loader_done, {worker_pid,
+ is_loaded,
+ table_name,
+ needs_announce,
+ needs_sync,
+ needs_reply,
+ reply_to,
+ reply}).
+
+-record(sender_done, {worker_pid,
+ worker_res,
+ table_name
+ }).
+
+-record(dumper_done, {worker_pid,
+ worker_res
+ }).
+
+val(Var) ->
+ case ?catch_val(Var) of
+ {'EXIT', Reason} -> mnesia_lib:other_val(Var, Reason);
+ Value -> Value
+ end.
+
+start() ->
+ gen_server:start_link({local, ?SERVER_NAME}, ?MODULE, [self()],
+ [{timeout, infinity}
+ %% ,{debug, [trace]}
+ ]).
+
+sync_dump_log(InitBy) ->
+ call({sync_dump_log, InitBy}).
+
+async_dump_log(InitBy) ->
+ ?SERVER_NAME ! {async_dump_log, InitBy}.
+
+%% Wait for tables to be active
+%% If needed, we will wait for Mnesia to start
+%% If Mnesia stops, we will wait for Mnesia to restart
+%% We will wait even if the list of tables is empty
+%%
+wait_for_tables(Tabs, Timeout) when is_list(Tabs), Timeout == infinity ->
+ do_wait_for_tables(Tabs, Timeout);
+wait_for_tables(Tabs, Timeout) when is_list(Tabs),
+ is_integer(Timeout), Timeout >= 0 ->
+ do_wait_for_tables(Tabs, Timeout);
+wait_for_tables(Tabs, Timeout) ->
+ {error, {badarg, Tabs, Timeout}}.
+
+do_wait_for_tables(Tabs, 0) ->
+ reply_wait(Tabs);
+do_wait_for_tables(Tabs, Timeout) ->
+ Pid = spawn_link(?MODULE, wait_for_tables_init, [self(), Tabs]),
+ receive
+ {?SERVER_NAME, Pid, Res} ->
+ Res;
+ {'EXIT', Pid, _} ->
+ reply_wait(Tabs)
+ after Timeout ->
+ unlink(Pid),
+ exit(Pid, timeout),
+ reply_wait(Tabs)
+ end.
+
+reply_wait(Tabs) ->
+ case catch mnesia_lib:active_tables() of
+ {'EXIT', _} ->
+ {error, {node_not_running, node()}};
+ Active when is_list(Active) ->
+ case Tabs -- Active of
+ [] ->
+ ok;
+ BadTabs ->
+ {timeout, BadTabs}
+ end
+ end.
+
+wait_for_tables_init(From, Tabs) ->
+ process_flag(trap_exit, true),
+ Res = wait_for_init(From, Tabs, whereis(?SERVER_NAME)),
+ From ! {?SERVER_NAME, self(), Res},
+ unlink(From),
+ exit(normal).
+
+wait_for_init(From, Tabs, Init) ->
+ case catch link(Init) of
+ {'EXIT', _} ->
+ %% Mnesia is not started
+ {error, {node_not_running, node()}};
+ true when is_pid(Init) ->
+ cast({sync_tabs, Tabs, self()}),
+ rec_tabs(Tabs, Tabs, From, Init)
+ end.
+
+sync_reply(Waiter, Tab) ->
+ Waiter ! {?SERVER_NAME, {tab_synced, Tab}}.
+
+rec_tabs([Tab | Tabs], AllTabs, From, Init) ->
+ receive
+ {?SERVER_NAME, {tab_synced, Tab}} ->
+ rec_tabs(Tabs, AllTabs, From, Init);
+
+ {'EXIT', From, _} ->
+ %% This will trigger an exit signal
+ %% to mnesia_init
+ exit(wait_for_tables_timeout);
+
+ {'EXIT', Init, _} ->
+ %% Oops, mnesia_init stopped,
+ exit(mnesia_stopped)
+ end;
+rec_tabs([], _, _, Init) ->
+ unlink(Init),
+ ok.
+
+get_cstructs() ->
+ call(get_cstructs).
+
+update(Fun) ->
+ call({update,Fun}).
+
+
+mnesia_down(Node) ->
+ case cast({mnesia_down, Node}) of
+ {error, _} -> mnesia_monitor:mnesia_down(?SERVER_NAME, Node);
+ _Pid -> ok
+ end.
+wait_for_schema_commit_lock() ->
+ link(whereis(?SERVER_NAME)),
+ unsafe_call(wait_for_schema_commit_lock).
+
+block_controller() ->
+ call(block_controller).
+
+unblock_controller() ->
+ cast(unblock_controller).
+
+release_schema_commit_lock() ->
+ cast({release_schema_commit_lock, self()}),
+ unlink(whereis(?SERVER_NAME)).
+
+%% Special for preparation of add table copy
+get_network_copy(Tab, Cs) ->
+% We can't let the controller queue this one
+% because that may cause a deadlock between schema_operations
+% and initial tableloadings which both takes schema locks.
+% But we have to get copier_done msgs when the other side
+% goes down.
+ call({add_other, self()}),
+ Reason = {dumper,add_table_copy},
+ Work = #net_load{table = Tab,reason = Reason,cstruct = Cs},
+ %% I'll need this cause it's linked trough the subscriber
+ %% might be solved by using monitor in subscr instead.
+ process_flag(trap_exit, true),
+ Load = load_table_fun(Work),
+ Res = (catch Load()),
+ process_flag(trap_exit, false),
+ call({del_other, self()}),
+ case Res of
+ #loader_done{is_loaded = true} ->
+ Tab = Res#loader_done.table_name,
+ case Res#loader_done.needs_announce of
+ true ->
+ i_have_tab(Tab);
+ false ->
+ ignore
+ end,
+ Res#loader_done.reply;
+ #loader_done{} ->
+ Res#loader_done.reply;
+ Else ->
+ {not_loaded, Else}
+ end.
+
+%% This functions is invoked from the dumper
+%%
+%% There are two cases here:
+%% startup ->
+%% no need for sync, since mnesia_controller not started yet
+%% schema_trans ->
+%% already synced with mnesia_controller since the dumper
+%% is syncronously started from mnesia_controller
+
+create_table(Tab) ->
+ {loaded, ok} = mnesia_loader:disc_load_table(Tab, {dumper,create_table}).
+
+get_disc_copy(Tab) ->
+ disc_load_table(Tab, {dumper,change_table_copy_type}, undefined).
+
+%% Returns ok instead of yes
+force_load_table(Tab) when is_atom(Tab), Tab /= schema ->
+ case ?catch_val({Tab, storage_type}) of
+ ram_copies ->
+ do_force_load_table(Tab);
+ disc_copies ->
+ do_force_load_table(Tab);
+ disc_only_copies ->
+ do_force_load_table(Tab);
+ unknown ->
+ set({Tab, load_by_force}, true),
+ cast({force_load_updated, Tab}),
+ wait_for_tables([Tab], infinity);
+ {'EXIT', _} ->
+ {error, {no_exists, Tab}}
+ end;
+force_load_table(Tab) ->
+ {error, {bad_type, Tab}}.
+
+do_force_load_table(Tab) ->
+ Loaded = ?catch_val({Tab, load_reason}),
+ case Loaded of
+ unknown ->
+ set({Tab, load_by_force}, true),
+ mnesia_late_loader:async_late_disc_load(node(), [Tab], forced_by_user),
+ wait_for_tables([Tab], infinity);
+ {'EXIT', _} ->
+ set({Tab, load_by_force}, true),
+ mnesia_late_loader:async_late_disc_load(node(), [Tab], forced_by_user),
+ wait_for_tables([Tab], infinity);
+ _ ->
+ ok
+ end.
+master_nodes_updated(schema, _Masters) ->
+ ignore;
+master_nodes_updated(Tab, Masters) ->
+ cast({master_nodes_updated, Tab, Masters}).
+
+schedule_late_disc_load(Tabs, Reason) ->
+ MsgTag = late_disc_load,
+ try_schedule_late_disc_load(Tabs, Reason, MsgTag).
+
+try_schedule_late_disc_load(Tabs, _Reason, MsgTag)
+ when Tabs == [], MsgTag /= schema_is_merged ->
+ ignore;
+try_schedule_late_disc_load(Tabs, Reason, MsgTag) ->
+ GetIntents =
+ fun() ->
+ Item = mnesia_late_disc_load,
+ Nodes = val({current, db_nodes}),
+ mnesia:lock({global, Item, Nodes}, write),
+ case multicall(Nodes -- [node()], disc_load_intents) of
+ {Replies, []} ->
+ call({MsgTag, Tabs, Reason, Replies}),
+ done;
+ {_, BadNodes} ->
+ %% Some nodes did not respond, lets try again
+ {retry, BadNodes}
+ end
+ end,
+ case mnesia:transaction(GetIntents) of
+ {atomic, done} ->
+ done;
+ {atomic, {retry, BadNodes}} ->
+ verbose("Retry late_load_tables because bad nodes: ~p~n",
+ [BadNodes]),
+ try_schedule_late_disc_load(Tabs, Reason, MsgTag);
+ {aborted, AbortReason} ->
+ fatal("Cannot late_load_tables~p: ~p~n",
+ [[Tabs, Reason, MsgTag], AbortReason])
+ end.
+
+connect_nodes(Ns) ->
+ case mnesia:system_info(is_running) of
+ no ->
+ {error, {node_not_running, node()}};
+ yes ->
+ Pid = spawn_link(?MODULE,connect_nodes2,[self(),Ns]),
+ receive
+ {?MODULE, Pid, Res, New} ->
+ case Res of
+ ok ->
+ mnesia_lib:add_list(extra_db_nodes, New),
+ {ok, New};
+ {aborted, {throw, Str}} when is_list(Str) ->
+ %%mnesia_recover:disconnect_nodes(New),
+ {error, {merge_schema_failed, lists:flatten(Str)}};
+ Else ->
+ {error, Else}
+ end;
+ {'EXIT', Pid, Reason} ->
+ {error, Reason}
+ end
+ end.
+
+connect_nodes2(Father, Ns) ->
+ Current = val({current, db_nodes}),
+ abcast([node()|Ns], {merging_schema, node()}),
+ {NewC, OldC} = mnesia_recover:connect_nodes(Ns),
+ Connected = NewC ++OldC,
+ New1 = mnesia_lib:intersect(Ns, Connected),
+ New = New1 -- Current,
+ process_flag(trap_exit, true),
+ Res = try_merge_schema(New),
+ Msg = {schema_is_merged, [], late_merge, []},
+ multicall([node()|Ns], Msg),
+ After = val({current, db_nodes}),
+ Father ! {?MODULE, self(), Res, mnesia_lib:intersect(Ns,After)},
+ unlink(Father),
+ ok.
+
+%% Merge the local schema with the schema on other nodes.
+%% But first we must let all processes that want to force
+%% load tables wait until the schema merge is done.
+
+merge_schema() ->
+ AllNodes = mnesia_lib:all_nodes(),
+ case try_merge_schema(AllNodes) of
+ ok ->
+ schema_is_merged();
+ {aborted, {throw, Str}} when is_list(Str) ->
+ fatal("Failed to merge schema: ~s~n", [Str]);
+ Else ->
+ fatal("Failed to merge schema: ~p~n", [Else])
+ end.
+
+try_merge_schema(Nodes) ->
+ case mnesia_schema:merge_schema() of
+ {atomic, not_merged} ->
+ %% No more nodes that we need to merge the schema with
+ ok;
+ {atomic, {merged, OldFriends, NewFriends}} ->
+ %% Check if new nodes has been added to the schema
+ Diff = mnesia_lib:all_nodes() -- [node() | Nodes],
+ mnesia_recover:connect_nodes(Diff),
+
+ %% Tell everybody to adopt orphan tables
+ im_running(OldFriends, NewFriends),
+ im_running(NewFriends, OldFriends),
+
+ try_merge_schema(Nodes);
+ {atomic, {"Cannot get cstructs", Node, Reason}} ->
+ dbg_out("Cannot get cstructs, Node ~p ~p~n", [Node, Reason]),
+ timer:sleep(1000), % Avoid a endless loop look alike
+ try_merge_schema(Nodes);
+ Other ->
+ Other
+ end.
+
+im_running(OldFriends, NewFriends) ->
+ abcast(OldFriends, {im_running, node(), NewFriends}).
+
+schema_is_merged() ->
+ MsgTag = schema_is_merged,
+ SafeLoads = initial_safe_loads(),
+
+ %% At this point we do not know anything about
+ %% which tables that the other nodes already
+ %% has loaded and therefore we let the normal
+ %% processing of the loader_queue take care
+ %% of it, since we at that time point will
+ %% know the whereabouts. We rely on the fact
+ %% that all nodes tells each other directly
+ %% when they have loaded a table and are
+ %% willing to share it.
+
+ try_schedule_late_disc_load(SafeLoads, initial, MsgTag).
+
+
+cast(Msg) ->
+ case whereis(?SERVER_NAME) of
+ undefined ->{error, {node_not_running, node()}};
+ Pid -> gen_server:cast(Pid, Msg)
+ end.
+
+abcast(Nodes, Msg) ->
+ gen_server:abcast(Nodes, ?SERVER_NAME, Msg).
+
+unsafe_call(Msg) ->
+ case whereis(?SERVER_NAME) of
+ undefined -> {error, {node_not_running, node()}};
+ Pid -> gen_server:call(Pid, Msg, infinity)
+ end.
+
+call(Msg) ->
+ case whereis(?SERVER_NAME) of
+ undefined ->
+ {error, {node_not_running, node()}};
+ Pid ->
+ link(Pid),
+ Res = gen_server:call(Pid, Msg, infinity),
+ unlink(Pid),
+
+ %% We get an exit signal if server dies
+ receive
+ {'EXIT', Pid, _Reason} ->
+ {error, {node_not_running, node()}}
+ after 0 ->
+ Res
+ end
+ end.
+
+remote_call(Node, Func, Args) ->
+ case catch gen_server:call({?MODULE, Node}, {Func, Args, self()}, infinity) of
+ {'EXIT', Error} ->
+ {error, Error};
+ Else ->
+ Else
+ end.
+
+multicall(Nodes, Msg) ->
+ {Good, Bad} = gen_server:multi_call(Nodes, ?MODULE, Msg, infinity),
+ PatchedGood = [Reply || {_Node, Reply} <- Good],
+ {PatchedGood, Bad}. %% Make the replies look like rpc:multicalls..
+%% rpc:multicall(Nodes, ?MODULE, call, [Msg]).
+
+%%%----------------------------------------------------------------------
+%%% Callback functions from gen_server
+%%%----------------------------------------------------------------------
+
+%%----------------------------------------------------------------------
+%% Func: init/1
+%% Returns: {ok, State} |
+%% {ok, State, Timeout} |
+%% {stop, Reason}
+%%----------------------------------------------------------------------
+init([Parent]) ->
+ process_flag(trap_exit, true),
+ mnesia_lib:verbose("~p starting: ~p~n", [?SERVER_NAME, self()]),
+
+ %% Handshake and initialize transaction recovery
+ %% for new nodes detected in the schema
+ All = mnesia_lib:all_nodes(),
+ Diff = All -- [node() | val(original_nodes)],
+ mnesia_lib:unset(original_nodes),
+ mnesia_recover:connect_nodes(Diff),
+
+ Interval = mnesia_monitor:get_env(dump_log_time_threshold),
+ Msg = {async_dump_log, time_threshold},
+ {ok, Ref} = timer:send_interval(Interval, Msg),
+ mnesia_dumper:start_regulator(),
+
+ Empty = gb_trees:empty(),
+ {ok, #state{supervisor = Parent, dump_log_timer_ref = Ref,
+ loader_queue = Empty,
+ late_loader_queue = Empty}}.
+
+%%----------------------------------------------------------------------
+%% Func: handle_call/3
+%% Returns: {reply, Reply, State} |
+%% {reply, Reply, State, Timeout} |
+%% {noreply, State} |
+%% {noreply, State, Timeout} |
+%% {stop, Reason, Reply, State} | (terminate/2 is called)
+%% {stop, Reason, Reply, State} (terminate/2 is called)
+%%----------------------------------------------------------------------
+
+handle_call({sync_dump_log, InitBy}, From, State) ->
+ Worker = #dump_log{initiated_by = InitBy,
+ opt_reply_to = From
+ },
+ State2 = add_worker(Worker, State),
+ noreply(State2);
+
+handle_call(wait_for_schema_commit_lock, From, State) ->
+ Worker = #schema_commit_lock{owner = From},
+ State2 = add_worker(Worker, State),
+ noreply(State2);
+
+handle_call(block_controller, From, State) ->
+ Worker = #block_controller{owner = From},
+ State2 = add_worker(Worker, State),
+ noreply(State2);
+
+handle_call({update,Fun}, From, State) ->
+ Res = (catch Fun()),
+ reply(From, Res),
+ noreply(State);
+
+handle_call(get_cstructs, From, State) ->
+ Tabs = val({schema, tables}),
+ Cstructs = [val({T, cstruct}) || T <- Tabs],
+ Running = val({current, db_nodes}),
+ reply(From, {cstructs, Cstructs, Running}),
+ noreply(State);
+
+handle_call({schema_is_merged, [], late_merge, []}, From,
+ State = #state{schema_is_merged = Merged}) ->
+ case Merged of
+ {false, Node} when Node == node(From) ->
+ Msgs = State#state.early_msgs,
+ State1 = State#state{early_msgs = [], schema_is_merged = true},
+ handle_early_msgs(lists:reverse(Msgs), State1);
+ _ ->
+ %% Ooops this came to early, before we have merged :-)
+ %% or it came to late or from a node we don't care about
+ reply(From, ignore),
+ noreply(State)
+ end;
+
+handle_call({schema_is_merged, TabsR, Reason, RemoteLoaders}, From, State) ->
+ State2 = late_disc_load(TabsR, Reason, RemoteLoaders, From, State),
+
+ %% Handle early messages
+ Msgs = State2#state.early_msgs,
+ State3 = State2#state{early_msgs = [], schema_is_merged = true},
+ handle_early_msgs(lists:reverse(Msgs), State3);
+
+handle_call(disc_load_intents,From,State = #state{loader_queue=LQ,late_loader_queue=LLQ}) ->
+ LQTabs = gb_trees:keys(LQ),
+ LLQTabs = gb_trees:keys(LLQ),
+ ActiveTabs = lists:sort(mnesia_lib:local_active_tables()),
+ reply(From, {ok, node(), ordsets:union([LQTabs,LLQTabs,ActiveTabs])}),
+ noreply(State);
+
+handle_call({update_where_to_write, [add, Tab, AddNode], _From}, _Dummy, State) ->
+ Current = val({current, db_nodes}),
+ Res =
+ case lists:member(AddNode, Current) and
+ (State#state.schema_is_merged == true) of
+ true ->
+ mnesia_lib:add_lsort({Tab, where_to_write}, AddNode);
+ false ->
+ ignore
+ end,
+ {reply, Res, State};
+
+handle_call({add_active_replica, [Tab, ToNode, RemoteS, AccessMode], From},
+ ReplyTo, State) ->
+ KnownNode = lists:member(ToNode, val({current, db_nodes})),
+ Merged = State#state.schema_is_merged,
+ if
+ KnownNode == false ->
+ reply(ReplyTo, ignore),
+ noreply(State);
+ Merged == true ->
+ Res = case ?catch_val({Tab, cstruct}) of
+ {'EXIT', _} -> %% Tab deleted
+ deleted;
+ _ ->
+ add_active_replica(Tab, ToNode, RemoteS, AccessMode)
+ end,
+ reply(ReplyTo, Res),
+ noreply(State);
+ true -> %% Schema is not merged
+ Msg = {add_active_replica, [Tab, ToNode, RemoteS, AccessMode], From},
+ Msgs = State#state.early_msgs,
+ reply(ReplyTo, ignore), %% Reply ignore and add data after schema merge
+ noreply(State#state{early_msgs = [{call, Msg, undefined} | Msgs]})
+ end;
+
+handle_call({unannounce_add_table_copy, [Tab, Node], From}, ReplyTo, State) ->
+ KnownNode = lists:member(node(From), val({current, db_nodes})),
+ Merged = State#state.schema_is_merged,
+ if
+ KnownNode == false ->
+ reply(ReplyTo, ignore),
+ noreply(State);
+ Merged == true ->
+ Res = unannounce_add_table_copy(Tab, Node),
+ reply(ReplyTo, Res),
+ noreply(State);
+ true -> %% Schema is not merged
+ Msg = {unannounce_add_table_copy, [Tab, Node], From},
+ Msgs = State#state.early_msgs,
+ reply(ReplyTo, ignore), %% Reply ignore and add data after schema merge
+ %% Set ReplyTO to undefined so we don't reply twice
+ noreply(State#state{early_msgs = [{call, Msg, undefined} | Msgs]})
+ end;
+
+handle_call({net_load, Tab, Cs}, From, State) ->
+ State2 =
+ case State#state.schema_is_merged of
+ true ->
+ Worker = #net_load{table = Tab,
+ opt_reply_to = From,
+ reason = {dumper,add_table_copy},
+ cstruct = Cs
+ },
+ add_worker(Worker, State);
+ false ->
+ reply(From, {not_loaded, schema_not_merged}),
+ State
+ end,
+ noreply(State2);
+
+handle_call(Msg, From, State) when State#state.schema_is_merged /= true ->
+ %% Buffer early messages
+ Msgs = State#state.early_msgs,
+ noreply(State#state{early_msgs = [{call, Msg, From} | Msgs]});
+
+handle_call({late_disc_load, Tabs, Reason, RemoteLoaders}, From, State) ->
+ State2 = late_disc_load(Tabs, Reason, RemoteLoaders, From, State),
+ noreply(State2);
+
+handle_call({unblock_table, Tab}, _Dummy, State) ->
+ Var = {Tab, where_to_commit},
+ case val(Var) of
+ {blocked, List} ->
+ set(Var, List); % where_to_commit
+ _ ->
+ ignore
+ end,
+ {reply, ok, State};
+
+handle_call({block_table, [Tab], From}, _Dummy, State) ->
+ case lists:member(node(From), val({current, db_nodes})) of
+ true ->
+ block_table(Tab);
+ false ->
+ ignore
+ end,
+ {reply, ok, State};
+
+handle_call({check_w2r, _Node, Tab}, _From, State) ->
+ {reply, val({Tab, where_to_read}), State};
+
+handle_call({add_other, Who}, _From, State = #state{others=Others0}) ->
+ Others = [Who|Others0],
+ {reply, ok, State#state{others=Others}};
+handle_call({del_other, Who}, _From, State = #state{others=Others0}) ->
+ Others = lists:delete(Who, Others0),
+ {reply, ok, State#state{others=Others}};
+
+handle_call(Msg, _From, State) ->
+ error("~p got unexpected call: ~p~n", [?SERVER_NAME, Msg]),
+ noreply(State).
+
+late_disc_load(TabsR, Reason, RemoteLoaders, From,
+ State = #state{loader_queue = LQ, late_loader_queue = LLQ}) ->
+ verbose("Intend to load tables: ~p~n", [TabsR]),
+ ?eval_debug_fun({?MODULE, late_disc_load},
+ [{tabs, TabsR},
+ {reason, Reason},
+ {loaders, RemoteLoaders}]),
+
+ reply(From, queued),
+ %% RemoteLoaders is a list of {ok, Node, Tabs} tuples
+
+ %% Remove deleted tabs and queued/loaded
+ LocalTabs = gb_sets:from_ordset(lists:sort(mnesia_lib:val({schema,local_tables}))),
+ Filter = fun(TabInfo0, Acc) ->
+ TabInfo = {Tab,_} =
+ case TabInfo0 of
+ {_,_} -> TabInfo0;
+ TabN -> {TabN,Reason}
+ end,
+ case gb_sets:is_member(Tab, LocalTabs) of
+ true ->
+ case ?catch_val({Tab, where_to_read}) == node() of
+ true -> Acc;
+ false ->
+ case gb_trees:is_defined(Tab,LQ) of
+ true -> Acc;
+ false -> [TabInfo | Acc]
+ end
+ end;
+ false -> Acc
+ end
+ end,
+
+ Tabs = lists:foldl(Filter, [], TabsR),
+
+ Nodes = val({current, db_nodes}),
+ LateQueue = late_loaders(Tabs, RemoteLoaders, Nodes, LLQ),
+ State#state{late_loader_queue = LateQueue}.
+
+late_loaders([{Tab, Reason} | Tabs], RemoteLoaders, Nodes, LLQ) ->
+ case gb_trees:is_defined(Tab, LLQ) of
+ false ->
+ LoadNodes = late_load_filter(RemoteLoaders, Tab, Nodes, []),
+ case LoadNodes of
+ [] -> cast({disc_load, Tab, Reason}); % Ugly cast
+ _ -> ignore
+ end,
+ LateLoad = #late_load{table=Tab,loaders=LoadNodes,reason=Reason},
+ late_loaders(Tabs, RemoteLoaders, Nodes, gb_trees:insert(Tab,LateLoad,LLQ));
+ true ->
+ late_loaders(Tabs, RemoteLoaders, Nodes, LLQ)
+ end;
+late_loaders([], _RemoteLoaders, _Nodes, LLQ) ->
+ LLQ.
+
+late_load_filter([{error, _} | RemoteLoaders], Tab, Nodes, Acc) ->
+ late_load_filter(RemoteLoaders, Tab, Nodes, Acc);
+late_load_filter([{badrpc, _} | RemoteLoaders], Tab, Nodes, Acc) ->
+ late_load_filter(RemoteLoaders, Tab, Nodes, Acc);
+late_load_filter([RL | RemoteLoaders], Tab, Nodes, Acc) ->
+ {ok, Node, Intents} = RL,
+ Access = val({Tab, access_mode}),
+ LocalC = val({Tab, local_content}),
+ StillActive = lists:member(Node, Nodes),
+ RemoteIntent = lists:member(Tab, Intents),
+ if
+ Access == read_write,
+ LocalC == false,
+ StillActive == true,
+ RemoteIntent == true ->
+ Masters = mnesia_recover:get_master_nodes(Tab),
+ case lists:member(Node, Masters) of
+ true ->
+ %% The other node is master node for
+ %% the table, accept his load intent
+ late_load_filter(RemoteLoaders, Tab, Nodes, [Node | Acc]);
+ false when Masters == [] ->
+ %% The table has no master nodes
+ %% accept his load intent
+ late_load_filter(RemoteLoaders, Tab, Nodes, [Node | Acc]);
+ false ->
+ %% Some one else is master node for
+ %% the table, ignore his load intent
+ late_load_filter(RemoteLoaders, Tab, Nodes, Acc)
+ end;
+ true ->
+ late_load_filter(RemoteLoaders, Tab, Nodes, Acc)
+ end;
+late_load_filter([], _Tab, _Nodes, Acc) ->
+ Acc.
+
+%%----------------------------------------------------------------------
+%% Func: handle_cast/2
+%% Returns: {noreply, State} |
+%% {noreply, State, Timeout} |
+%% {stop, Reason, State} (terminate/2 is called)
+%%----------------------------------------------------------------------
+
+handle_cast({release_schema_commit_lock, _Owner}, State) ->
+ if
+ State#state.is_stopping == true ->
+ {stop, shutdown, State};
+ true ->
+ case State#state.dumper_queue of
+ [#schema_commit_lock{}|Rest] ->
+ [_Worker | Rest] = State#state.dumper_queue,
+ State2 = State#state{dumper_pid = undefined,
+ dumper_queue = Rest},
+ State3 = opt_start_worker(State2),
+ noreply(State3);
+ _ ->
+ noreply(State)
+ end
+ end;
+
+handle_cast(unblock_controller, State) ->
+ if
+ State#state.is_stopping == true ->
+ {stop, shutdown, State};
+ is_record(hd(State#state.dumper_queue), block_controller) ->
+ [_Worker | Rest] = State#state.dumper_queue,
+ State2 = State#state{dumper_pid = undefined,
+ dumper_queue = Rest},
+ State3 = opt_start_worker(State2),
+ noreply(State3)
+ end;
+
+handle_cast({mnesia_down, Node}, State) ->
+ maybe_log_mnesia_down(Node),
+ mnesia_lib:del({current, db_nodes}, Node),
+ mnesia_checkpoint:tm_mnesia_down(Node),
+ Alltabs = val({schema, tables}),
+ reconfigure_tables(Node, Alltabs),
+ %% Done from (external point of view)
+ mnesia_monitor:mnesia_down(?SERVER_NAME, Node),
+
+ %% Fix if we are late_merging against the node that went down
+ case State#state.schema_is_merged of
+ {false, Node} ->
+ spawn(?MODULE, call, [{schema_is_merged, [], late_merge, []}]);
+ _ ->
+ ignore
+ end,
+
+ %% Fix internal stuff
+ LateQ = remove_loaders(Alltabs, Node, State#state.late_loader_queue),
+
+ case get_senders(State) ++ get_loaders(State) of
+ [] -> ignore;
+ Senders ->
+ lists:foreach(fun({Pid,_}) -> Pid ! {copier_done, Node} end,
+ Senders)
+ end,
+ lists:foreach(fun(Pid) -> Pid ! {copier_done,Node} end,
+ State#state.others),
+
+ Remove = fun(ST) ->
+ node(ST#send_table.receiver_pid) /= Node
+ end,
+ NewSenders = lists:filter(Remove, State#state.sender_queue),
+ Early = remove_early_messages(State#state.early_msgs, Node),
+ noreply(State#state{sender_queue = NewSenders,
+ early_msgs = Early,
+ late_loader_queue = LateQ
+ });
+
+handle_cast({merging_schema, Node}, State) ->
+ case State#state.schema_is_merged of
+ false ->
+ %% This comes from dynamic connect_nodes which are made
+ %% after mnesia:start() and the schema_merge.
+ ImANewKidInTheBlock =
+ (val({schema, storage_type}) == ram_copies)
+ andalso (mnesia_lib:val({schema, local_tables}) == [schema]),
+ case ImANewKidInTheBlock of
+ true -> %% I'm newly started ram_node..
+ noreply(State#state{schema_is_merged = {false, Node}});
+ false ->
+ noreply(State)
+ end;
+ _ -> %% Already merging schema.
+ noreply(State)
+ end;
+
+handle_cast(Msg, State) when State#state.schema_is_merged /= true ->
+ %% Buffer early messages
+ Msgs = State#state.early_msgs,
+ noreply(State#state{early_msgs = [{cast, Msg} | Msgs]});
+
+%% This must be done after schema_is_merged otherwise adopt_orphan
+%% might trigger a table load from wrong nodes as a result of that we don't
+%% know which tables we can load safly first.
+handle_cast({im_running, _Node, NewFriends}, State) ->
+ LocalTabs = mnesia_lib:local_active_tables() -- [schema],
+ RemoveLocalOnly = fun(Tab) -> not val({Tab, local_content}) end,
+ Tabs = lists:filter(RemoveLocalOnly, LocalTabs),
+ Ns = mnesia_lib:intersect(NewFriends, val({current, db_nodes})),
+ abcast(Ns, {adopt_orphans, node(), Tabs}),
+ noreply(State);
+
+handle_cast({disc_load, Tab, Reason}, State) ->
+ Worker = #disc_load{table = Tab, reason = Reason},
+ State2 = add_worker(Worker, State),
+ noreply(State2);
+
+handle_cast(Worker = #send_table{}, State) ->
+ State2 = add_worker(Worker, State),
+ noreply(State2);
+
+handle_cast({sync_tabs, Tabs, From}, State) ->
+ %% user initiated wait_for_tables
+ handle_sync_tabs(Tabs, From),
+ noreply(State);
+
+handle_cast({i_have_tab, Tab, Node}, State) ->
+ case lists:member(Node, val({current, db_nodes})) of
+ true ->
+ State2 = node_has_tabs([Tab], Node, State),
+ noreply(State2);
+ false ->
+ noreply(State)
+ end;
+
+handle_cast({force_load_updated, Tab}, State) ->
+ case val({Tab, active_replicas}) of
+ [] ->
+ %% No valid replicas
+ noreply(State);
+ [SomeNode | _] ->
+ State2 = node_has_tabs([Tab], SomeNode, State),
+ noreply(State2)
+ end;
+
+handle_cast({master_nodes_updated, Tab, Masters}, State) ->
+ Active = val({Tab, active_replicas}),
+ Valid =
+ case val({Tab, load_by_force}) of
+ true ->
+ Active;
+ false ->
+ if
+ Masters == [] ->
+ Active;
+ true ->
+ mnesia_lib:intersect(Masters, Active)
+ end
+ end,
+ case Valid of
+ [] ->
+ %% No valid replicas
+ noreply(State);
+ [SomeNode | _] ->
+ State2 = node_has_tabs([Tab], SomeNode, State),
+ noreply(State2)
+ end;
+
+handle_cast({adopt_orphans, Node, Tabs}, State) ->
+
+ State2 = node_has_tabs(Tabs, Node, State),
+
+ %% Register the other node as up and running
+ mnesia_recover:log_mnesia_up(Node),
+ verbose("Logging mnesia_up ~w~n",[Node]),
+ mnesia_lib:report_system_event({mnesia_up, Node}),
+
+ %% Load orphan tables
+ LocalTabs = val({schema, local_tables}) -- [schema],
+ Nodes = val({current, db_nodes}),
+ {LocalOrphans, RemoteMasters} =
+ orphan_tables(LocalTabs, Node, Nodes, [], []),
+ Reason = {adopt_orphan, node()},
+ mnesia_late_loader:async_late_disc_load(node(), LocalOrphans, Reason),
+
+ Fun =
+ fun(N) ->
+ RemoteOrphans =
+ [Tab || {Tab, Ns} <- RemoteMasters,
+ lists:member(N, Ns)],
+ mnesia_late_loader:maybe_async_late_disc_load(N, RemoteOrphans, Reason)
+ end,
+ lists:foreach(Fun, Nodes),
+ noreply(State2);
+
+handle_cast(Msg, State) ->
+ error("~p got unexpected cast: ~p~n", [?SERVER_NAME, Msg]),
+ noreply(State).
+
+handle_sync_tabs([Tab | Tabs], From) ->
+ case val({Tab, where_to_read}) of
+ nowhere ->
+ case get({sync_tab, Tab}) of
+ undefined ->
+ put({sync_tab, Tab}, [From]);
+ Pids ->
+ put({sync_tab, Tab}, [From | Pids])
+ end;
+ _ ->
+ sync_reply(From, Tab)
+ end,
+ handle_sync_tabs(Tabs, From);
+handle_sync_tabs([], _From) ->
+ ok.
+
+%%----------------------------------------------------------------------
+%% Func: handle_info/2
+%% Returns: {noreply, State} |
+%% {noreply, State, Timeout} |
+%% {stop, Reason, State} (terminate/2 is called)
+%%----------------------------------------------------------------------
+
+handle_info({async_dump_log, InitBy}, State) ->
+ Worker = #dump_log{initiated_by = InitBy},
+ State2 = add_worker(Worker, State),
+ noreply(State2);
+
+handle_info(#dumper_done{worker_pid=Pid, worker_res=Res}, State) ->
+ if
+ State#state.is_stopping == true ->
+ {stop, shutdown, State};
+ Res == dumped, Pid == State#state.dumper_pid ->
+ [Worker | Rest] = State#state.dumper_queue,
+ reply(Worker#dump_log.opt_reply_to, Res),
+ State2 = State#state{dumper_pid = undefined,
+ dumper_queue = Rest},
+ State3 = opt_start_worker(State2),
+ noreply(State3);
+ true ->
+ fatal("Dumper failed: ~p~n state: ~p~n", [Res, State]),
+ {stop, fatal, State}
+ end;
+
+handle_info(Done = #loader_done{worker_pid=WPid, table_name=Tab}, State0) ->
+ LateQueue0 = State0#state.late_loader_queue,
+ State1 = State0#state{loader_pid = lists:keydelete(WPid,1,get_loaders(State0))},
+
+ State2 =
+ case Done#loader_done.is_loaded of
+ true ->
+ %% Optional table announcement
+ if
+ Done#loader_done.needs_announce == true,
+ Done#loader_done.needs_reply == true ->
+ i_have_tab(Tab),
+ %% Should be {dumper,add_table_copy} only
+ reply(Done#loader_done.reply_to,
+ Done#loader_done.reply);
+ Done#loader_done.needs_reply == true ->
+ %% Should be {dumper,add_table_copy} only
+ reply(Done#loader_done.reply_to,
+ Done#loader_done.reply);
+ Done#loader_done.needs_announce == true, Tab == schema ->
+ i_have_tab(Tab);
+ Done#loader_done.needs_announce == true ->
+ i_have_tab(Tab),
+ %% Local node needs to perform user_sync_tab/1
+ Ns = val({current, db_nodes}),
+ abcast(Ns, {i_have_tab, Tab, node()});
+ Tab == schema ->
+ ignore;
+ true ->
+ %% Local node needs to perform user_sync_tab/1
+ Ns = val({current, db_nodes}),
+ AlreadyKnows = val({Tab, active_replicas}),
+ abcast(Ns -- AlreadyKnows, {i_have_tab, Tab, node()})
+ end,
+ %% Optional user sync
+ case Done#loader_done.needs_sync of
+ true -> user_sync_tab(Tab);
+ false -> ignore
+ end,
+ State1#state{late_loader_queue=gb_trees:delete_any(Tab, LateQueue0)};
+ false ->
+ %% Either the node went down or table was not
+ %% loaded remotly yet
+ case Done#loader_done.needs_reply of
+ true ->
+ reply(Done#loader_done.reply_to,
+ Done#loader_done.reply);
+ false ->
+ ignore
+ end,
+ case ?catch_val({Tab, active_replicas}) of
+ [_|_] -> % still available elsewhere
+ {value,{_,Worker}} = lists:keysearch(WPid,1,get_loaders(State0)),
+ add_loader(Tab,Worker,State1);
+ _ ->
+ State1
+ end
+ end,
+ State3 = opt_start_worker(State2),
+ noreply(State3);
+
+handle_info(#sender_done{worker_pid=Pid, worker_res=Res}, State) ->
+ Senders = get_senders(State),
+ {value, {Pid,_Worker}} = lists:keysearch(Pid, 1, Senders),
+ if
+ Res == ok ->
+ State2 = State#state{sender_pid = lists:keydelete(Pid, 1, Senders)},
+ State3 = opt_start_worker(State2),
+ noreply(State3);
+ true ->
+ %% No need to send any message to the table receiver
+ %% since it will soon get a mnesia_down anyway
+ fatal("Sender failed: ~p~n state: ~p~n", [Res, State]),
+ {stop, fatal, State}
+ end;
+
+handle_info({'EXIT', Pid, R}, State) when Pid == State#state.supervisor ->
+ catch set(mnesia_status, stopping),
+ case State#state.dumper_pid of
+ undefined ->
+ dbg_out("~p was ~p~n", [?SERVER_NAME, R]),
+ {stop, shutdown, State};
+ _ ->
+ noreply(State#state{is_stopping = true})
+ end;
+
+handle_info({'EXIT', Pid, R}, State) when Pid == State#state.dumper_pid ->
+ case State#state.dumper_queue of
+ [#schema_commit_lock{}|Workers] -> %% Schema trans crashed or was killed
+ dbg_out("WARNING: Dumper ~p exited ~p~n", [Pid, R]),
+ State2 = State#state{dumper_queue = Workers, dumper_pid = undefined},
+ State3 = opt_start_worker(State2),
+ noreply(State3);
+ _Other ->
+ fatal("Dumper or schema commit crashed: ~p~n state: ~p~n", [R, State]),
+ {stop, fatal, State}
+ end;
+
+handle_info(Msg = {'EXIT', Pid, R}, State) when R /= wait_for_tables_timeout ->
+ case lists:keymember(Pid, 1, get_senders(State)) of
+ true ->
+ %% No need to send any message to the table receiver
+ %% since it will soon get a mnesia_down anyway
+ fatal("Sender crashed: ~p~n state: ~p~n", [{Pid,R}, State]),
+ {stop, fatal, State};
+ false ->
+ case lists:keymember(Pid, 1, get_loaders(State)) of
+ true ->
+ fatal("Loader crashed: ~p~n state: ~p~n", [R, State]),
+ {stop, fatal, State};
+ false ->
+ error("~p got unexpected info: ~p~n", [?SERVER_NAME, Msg]),
+ noreply(State)
+ end
+ end;
+
+handle_info({From, get_state}, State) ->
+ From ! {?SERVER_NAME, State},
+ noreply(State);
+
+%% No real need for buffering
+handle_info(Msg, State) when State#state.schema_is_merged /= true ->
+ %% Buffer early messages
+ Msgs = State#state.early_msgs,
+ noreply(State#state{early_msgs = [{info, Msg} | Msgs]});
+
+handle_info({'EXIT', Pid, wait_for_tables_timeout}, State) ->
+ sync_tab_timeout(Pid, get()),
+ noreply(State);
+
+handle_info(Msg, State) ->
+ error("~p got unexpected info: ~p~n", [?SERVER_NAME, Msg]),
+ noreply(State).
+
+sync_tab_timeout(Pid, [{{sync_tab, Tab}, Pids} | Tail]) ->
+ case lists:delete(Pid, Pids) of
+ [] ->
+ erase({sync_tab, Tab});
+ Pids2 ->
+ put({sync_tab, Tab}, Pids2)
+ end,
+ sync_tab_timeout(Pid, Tail);
+sync_tab_timeout(Pid, [_ | Tail]) ->
+ sync_tab_timeout(Pid, Tail);
+sync_tab_timeout(_Pid, []) ->
+ ok.
+
+%% Pick the load record that has the highest load order
+%% Returns {BestLoad, RemainingQueue} or {none, []} if queue is empty
+pick_next(Queue) ->
+ List = gb_trees:values(Queue),
+ case pick_next(List, none, none) of
+ none -> {none, gb_trees:empty()};
+ {Tab, Worker} -> {Worker, gb_trees:delete(Tab,Queue)}
+ end.
+
+pick_next([Head = #net_load{table=Tab}| Tail], Load, Order) ->
+ select_best(Head, Tail, ?catch_val({Tab, load_order}), Load, Order);
+pick_next([Head = #disc_load{table=Tab}| Tail], Load, Order) ->
+ select_best(Head, Tail, ?catch_val({Tab, load_order}), Load, Order);
+pick_next([], none, _Order) ->
+ none;
+pick_next([], Load, _Order) ->
+ {element(2,Load), Load}.
+
+select_best(_Head, Tail, {'EXIT', _WHAT}, Load, Order) ->
+ %% Table have been deleted drop it.
+ pick_next(Tail, Load, Order);
+select_best(Load, Tail, Order, none, none) ->
+ pick_next(Tail, Load, Order);
+select_best(Load, Tail, Order, _OldLoad, OldOrder) when Order > OldOrder ->
+ pick_next(Tail, Load, Order);
+select_best(_Load, Tail, _Order, OldLoad, OldOrder) ->
+ pick_next(Tail, OldLoad, OldOrder).
+
+%%----------------------------------------------------------------------
+%% Func: terminate/2
+%% Purpose: Shutdown the server
+%% Returns: any (ignored by gen_server)
+%%----------------------------------------------------------------------
+terminate(Reason, State) ->
+ mnesia_monitor:terminate_proc(?SERVER_NAME, Reason, State).
+
+%%----------------------------------------------------------------------
+%% Func: code_change/3
+%% Purpose: Upgrade process when its code is to be changed
+%% Returns: {ok, NewState}
+%%----------------------------------------------------------------------
+code_change(_OldVsn, State0, _Extra) ->
+ %% Loader Queue
+ State1 = case State0#state.loader_pid of
+ Pids when is_list(Pids) -> State0;
+ undefined -> State0#state{loader_pid = [],loader_queue=gb_trees:empty()};
+ Pid when is_pid(Pid) ->
+ [Loader|Rest] = State0#state.loader_queue,
+ LQ0 = [{element(2,Rec),Rec} || Rec <- Rest],
+ LQ1 = lists:sort(LQ0),
+ LQ = gb_trees:from_orddict(LQ1),
+ State0#state{loader_pid=[{Pid,Loader}], loader_queue=LQ}
+ end,
+ %% LateLoaderQueue
+ State = if is_list(State1#state.late_loader_queue) ->
+ LLQ0 = State1#state.late_loader_queue,
+ LLQ1 = lists:sort([{element(2,Rec),Rec} || Rec <- LLQ0]),
+ LLQ = gb_trees:from_orddict(LLQ1),
+ State1#state{late_loader_queue=LLQ};
+ true ->
+ State1
+ end,
+ {ok, State}.
+
+%%%----------------------------------------------------------------------
+%%% Internal functions
+%%%----------------------------------------------------------------------
+
+maybe_log_mnesia_down(N) ->
+ %% We use mnesia_down when deciding which tables to load locally,
+ %% so if we are not running (i.e haven't decided which tables
+ %% to load locally), don't log mnesia_down yet.
+ case mnesia_lib:is_running() of
+ yes ->
+ verbose("Logging mnesia_down ~w~n", [N]),
+ mnesia_recover:log_mnesia_down(N),
+ ok;
+ _ ->
+ Filter = fun(Tab) ->
+ inactive_copy_holders(Tab, N)
+ end,
+ HalfLoadedTabs = lists:any(Filter, val({schema, local_tables}) -- [schema]),
+ if
+ HalfLoadedTabs == true ->
+ verbose("Logging mnesia_down ~w~n", [N]),
+ mnesia_recover:log_mnesia_down(N),
+ ok;
+ true ->
+ %% Unfortunately we have not loaded some common
+ %% tables yet, so we cannot rely on the nodedown
+ log_later %% BUGBUG handle this case!!!
+ end
+ end.
+
+inactive_copy_holders(Tab, Node) ->
+ Cs = val({Tab, cstruct}),
+ case mnesia_lib:cs_to_storage_type(Node, Cs) of
+ unknown ->
+ false;
+ _Storage ->
+ mnesia_lib:not_active_here(Tab)
+ end.
+
+orphan_tables([Tab | Tabs], Node, Ns, Local, Remote) ->
+ Cs = val({Tab, cstruct}),
+ CopyHolders = mnesia_lib:copy_holders(Cs),
+ RamCopyHolders = Cs#cstruct.ram_copies,
+ DiscCopyHolders = CopyHolders -- RamCopyHolders,
+ DiscNodes = val({schema, disc_copies}),
+ LocalContent = Cs#cstruct.local_content,
+ RamCopyHoldersOnDiscNodes = mnesia_lib:intersect(RamCopyHolders, DiscNodes),
+ Active = val({Tab, active_replicas}),
+ BeingCreated = (?catch_val({Tab, create_table}) == true),
+ Read = val({Tab, where_to_read}),
+ case lists:member(Node, DiscCopyHolders) of
+ _ when BeingCreated == true ->
+ orphan_tables(Tabs, Node, Ns, Local, Remote);
+ _ when Read == node() -> %% Allready loaded
+ orphan_tables(Tabs, Node, Ns, Local, Remote);
+ true when Active == [] ->
+ case DiscCopyHolders -- Ns of
+ [] ->
+ %% We're last up and the other nodes have not
+ %% loaded the table. Lets load it if we are
+ %% the smallest node.
+ case lists:min(DiscCopyHolders) of
+ Min when Min == node() ->
+ case mnesia_recover:get_master_nodes(Tab) of
+ [] ->
+ L = [Tab | Local],
+ orphan_tables(Tabs, Node, Ns, L, Remote);
+ Masters ->
+ R = [{Tab, Masters} | Remote],
+ orphan_tables(Tabs, Node, Ns, Local, R)
+ end;
+ _ ->
+ orphan_tables(Tabs, Node, Ns, Local, Remote)
+ end;
+ _ ->
+ orphan_tables(Tabs, Node, Ns, Local, Remote)
+ end;
+ false when Active == [], DiscCopyHolders == [], RamCopyHoldersOnDiscNodes == [] ->
+ %% Special case when all replicas resides on disc less nodes
+ orphan_tables(Tabs, Node, Ns, [Tab | Local], Remote);
+ _ when LocalContent == true ->
+ orphan_tables(Tabs, Node, Ns, [Tab | Local], Remote);
+ _ ->
+ orphan_tables(Tabs, Node, Ns, Local, Remote)
+ end;
+orphan_tables([], _, _, LocalOrphans, RemoteMasters) ->
+ {LocalOrphans, RemoteMasters}.
+
+node_has_tabs([Tab | Tabs], Node, State) when Node /= node() ->
+ State2 =
+ case catch update_whereabouts(Tab, Node, State) of
+ State1 = #state{} -> State1;
+ {'EXIT', R} -> %% Tab was just deleted?
+ case ?catch_val({Tab, cstruct}) of
+ {'EXIT', _} -> State; % yes
+ _ -> erlang:error(R)
+ end
+ end,
+ node_has_tabs(Tabs, Node, State2);
+node_has_tabs([Tab | Tabs], Node, State) ->
+ user_sync_tab(Tab),
+ node_has_tabs(Tabs, Node, State);
+node_has_tabs([], _Node, State) ->
+ State.
+
+update_whereabouts(Tab, Node, State) ->
+ Storage = val({Tab, storage_type}),
+ Read = val({Tab, where_to_read}),
+ LocalC = val({Tab, local_content}),
+ BeingCreated = (?catch_val({Tab, create_table}) == true),
+ Masters = mnesia_recover:get_master_nodes(Tab),
+ ByForce = val({Tab, load_by_force}),
+ GoGetIt =
+ if
+ ByForce == true ->
+ true;
+ Masters == [] ->
+ true;
+ true ->
+ lists:member(Node, Masters)
+ end,
+
+ dbg_out("Table ~w is loaded on ~w. s=~w, r=~w, lc=~w, f=~w, m=~w~n",
+ [Tab, Node, Storage, Read, LocalC, ByForce, GoGetIt]),
+ if
+ LocalC == true ->
+ %% Local contents, don't care about other node
+ State;
+ BeingCreated == true ->
+ %% The table is currently being created
+ %% It will be handled elsewhere
+ State;
+ Storage == unknown, Read == nowhere ->
+ %% No own copy, time to read remotely
+ %% if the other node is a good node
+ add_active_replica(Tab, Node),
+ case GoGetIt of
+ true ->
+ set({Tab, where_to_read}, Node),
+ user_sync_tab(Tab),
+ State;
+ false ->
+ State
+ end;
+ Storage == unknown ->
+ %% No own copy, continue to read remotely
+ add_active_replica(Tab, Node),
+ NodeST = mnesia_lib:storage_type_at_node(Node, Tab),
+ ReadST = mnesia_lib:storage_type_at_node(Read, Tab),
+ if %% Avoid reading from disc_only_copies
+ NodeST == disc_only_copies ->
+ ignore;
+ ReadST == disc_only_copies ->
+ mnesia_lib:set_remote_where_to_read(Tab);
+ true ->
+ ignore
+ end,
+ user_sync_tab(Tab),
+ State;
+ Read == nowhere ->
+ %% Own copy, go and get a copy of the table
+ %% if the other node is master or if there
+ %% are no master at all
+ add_active_replica(Tab, Node),
+ case GoGetIt of
+ true ->
+ Worker = #net_load{table = Tab,
+ reason = {active_remote, Node}},
+ add_worker(Worker, State);
+ false ->
+ State
+ end;
+ true ->
+ %% We already have an own copy
+ add_active_replica(Tab, Node),
+ user_sync_tab(Tab),
+ State
+ end.
+
+initial_safe_loads() ->
+ case val({schema, storage_type}) of
+ ram_copies ->
+ Downs = [],
+ Tabs = val({schema, local_tables}) -- [schema],
+ LastC = fun(T) -> last_consistent_replica(T, Downs) end,
+ lists:zf(LastC, Tabs);
+
+ disc_copies ->
+ Downs = mnesia_recover:get_mnesia_downs(),
+ dbg_out("mnesia_downs = ~p~n", [Downs]),
+
+ Tabs = val({schema, local_tables}) -- [schema],
+ LastC = fun(T) -> last_consistent_replica(T, Downs) end,
+ lists:zf(LastC, Tabs)
+ end.
+
+last_consistent_replica(Tab, Downs) ->
+ Cs = val({Tab, cstruct}),
+ Storage = mnesia_lib:cs_to_storage_type(node(), Cs),
+ Ram = Cs#cstruct.ram_copies,
+ Disc = Cs#cstruct.disc_copies,
+ DiscOnly = Cs#cstruct.disc_only_copies,
+ BetterCopies0 = mnesia_lib:remote_copy_holders(Cs) -- Downs,
+ BetterCopies = BetterCopies0 -- Ram,
+ AccessMode = Cs#cstruct.access_mode,
+ Copies = mnesia_lib:copy_holders(Cs),
+ Masters = mnesia_recover:get_master_nodes(Tab),
+ LocalMaster0 = lists:member(node(), Masters),
+ LocalContent = Cs#cstruct.local_content,
+ RemoteMaster =
+ if
+ Masters == [] -> false;
+ true -> not LocalMaster0
+ end,
+ LocalMaster =
+ if
+ Masters == [] -> false;
+ true -> LocalMaster0
+ end,
+ if
+ Copies == [node()] ->
+ %% Only one copy holder and it is local.
+ %% It may also be a local contents table
+ {true, {Tab, local_only}};
+ LocalContent == true ->
+ {true, {Tab, local_content}};
+ LocalMaster == true ->
+ %% We have a local master
+ {true, {Tab, local_master}};
+ RemoteMaster == true ->
+ %% Wait for remote master copy
+ false;
+ Storage == ram_copies ->
+ if
+ Disc == [], DiscOnly == [] ->
+ %% Nobody has copy on disc
+ {true, {Tab, ram_only}};
+ true ->
+ %% Some other node has copy on disc
+ false
+ end;
+ AccessMode == read_only ->
+ %% No one has been able to update the table,
+ %% i.e. all disc resident copies are equal
+ {true, {Tab, read_only}};
+ BetterCopies /= [], Masters /= [node()] ->
+ %% There are better copies on other nodes
+ %% and we do not have the only master copy
+ false;
+ true ->
+ {true, {Tab, initial}}
+ end.
+
+reconfigure_tables(N, [Tab |Tail]) ->
+ del_active_replica(Tab, N),
+ case val({Tab, where_to_read}) of
+ N -> mnesia_lib:set_remote_where_to_read(Tab);
+ _ -> ignore
+ end,
+ reconfigure_tables(N, Tail);
+reconfigure_tables(_, []) ->
+ ok.
+
+remove_loaders([Tab| Tabs], N, Loaders) ->
+ LateQ = drop_loaders(Tab, N, Loaders),
+ remove_loaders(Tabs, N, LateQ);
+remove_loaders([],_, LateQ) -> LateQ.
+
+remove_early_messages([], _Node) ->
+ [];
+remove_early_messages([{call, {add_active_replica, [_, Node, _, _], _}, _}|R], Node) ->
+ remove_early_messages(R, Node); %% Does a reply before queuing
+remove_early_messages([{call, {block_table, _, From}, ReplyTo}|R], Node)
+ when node(From) == Node ->
+ reply(ReplyTo, ok), %% Remove gen:server waits..
+ remove_early_messages(R, Node);
+remove_early_messages([{cast, {i_have_tab, _Tab, Node}}|R], Node) ->
+ remove_early_messages(R, Node);
+remove_early_messages([{cast, {adopt_orphans, Node, _Tabs}}|R], Node) ->
+ remove_early_messages(R, Node);
+remove_early_messages([M|R],Node) ->
+ [M|remove_early_messages(R,Node)].
+
+%% Drop loader from late load queue and possibly trigger a disc_load
+drop_loaders(Tab, Node, LLQ) ->
+ case gb_trees:lookup(Tab,LLQ) of
+ none ->
+ LLQ;
+ {value, H} ->
+ %% Check if it is time to issue a disc_load request
+ case H#late_load.loaders of
+ [Node] ->
+ Reason = {H#late_load.reason, last_loader_down, Node},
+ cast({disc_load, Tab, Reason}); % Ugly cast
+ _ ->
+ ignore
+ end,
+ %% Drop the node from the list of loaders
+ H2 = H#late_load{loaders = H#late_load.loaders -- [Node]},
+ gb_trees:update(Tab, H2, LLQ)
+ end.
+
+add_active_replica(Tab, Node) ->
+ add_active_replica(Tab, Node, val({Tab, cstruct})).
+
+add_active_replica(Tab, Node, Cs = #cstruct{}) ->
+ Storage = mnesia_lib:schema_cs_to_storage_type(Node, Cs),
+ AccessMode = Cs#cstruct.access_mode,
+ add_active_replica(Tab, Node, Storage, AccessMode).
+
+%% Block table primitives
+
+block_table(Tab) ->
+ Var = {Tab, where_to_commit},
+ Old = val(Var),
+ New = {blocked, Old},
+ set(Var, New). % where_to_commit
+
+unblock_table(Tab) ->
+ call({unblock_table, Tab}).
+
+is_tab_blocked(W2C) when is_list(W2C) ->
+ {false, W2C};
+is_tab_blocked({blocked, W2C}) when is_list(W2C) ->
+ {true, W2C}.
+
+mark_blocked_tab(true, Value) ->
+ {blocked, Value};
+mark_blocked_tab(false, Value) ->
+ Value.
+
+%%
+
+add_active_replica(Tab, Node, Storage, AccessMode) ->
+ Var = {Tab, where_to_commit},
+ {Blocked, Old} = is_tab_blocked(val(Var)),
+ Del = lists:keydelete(Node, 1, Old),
+ case AccessMode of
+ read_write ->
+ New = lists:sort([{Node, Storage} | Del]),
+ set(Var, mark_blocked_tab(Blocked, New)), % where_to_commit
+ mnesia_lib:add_lsort({Tab, where_to_write}, Node);
+ read_only ->
+ set(Var, mark_blocked_tab(Blocked, Del)),
+ mnesia_lib:del({Tab, where_to_write}, Node)
+ end,
+ add({Tab, active_replicas}, Node).
+
+del_active_replica(Tab, Node) ->
+ Var = {Tab, where_to_commit},
+ {Blocked, Old} = is_tab_blocked(val(Var)),
+ Del = lists:keydelete(Node, 1, Old),
+ New = lists:sort(Del),
+ set(Var, mark_blocked_tab(Blocked, New)), % where_to_commit
+ mnesia_lib:del({Tab, active_replicas}, Node),
+ mnesia_lib:del({Tab, where_to_write}, Node).
+
+change_table_access_mode(Cs) ->
+ W = fun() ->
+ Tab = Cs#cstruct.name,
+ lists:foreach(fun(N) -> add_active_replica(Tab, N, Cs) end,
+ val({Tab, active_replicas}))
+ end,
+ update(W).
+
+
+%% node To now has tab loaded, but this must be undone
+%% This code is rpc:call'ed from the tab_copier process
+%% when it has *not* released it's table lock
+unannounce_add_table_copy(Tab, To) ->
+ catch del_active_replica(Tab, To),
+ case catch val({Tab , where_to_read}) of
+ To ->
+ mnesia_lib:set_remote_where_to_read(Tab);
+ _ ->
+ ignore
+ end.
+
+user_sync_tab(Tab) ->
+ case val(debug) of
+ trace ->
+ mnesia_subscr:subscribe(whereis(mnesia_event), {table, Tab});
+ _ ->
+ ignore
+ end,
+
+ case erase({sync_tab, Tab}) of
+ undefined ->
+ ok;
+ Pids ->
+ lists:foreach(fun(Pid) -> sync_reply(Pid, Tab) end, Pids)
+ end.
+
+i_have_tab(Tab) ->
+ case val({Tab, local_content}) of
+ true ->
+ mnesia_lib:set_local_content_whereabouts(Tab);
+ false ->
+ set({Tab, where_to_read}, node())
+ end,
+ add_active_replica(Tab, node()).
+
+sync_and_block_table_whereabouts(Tab, ToNode, RemoteS, AccessMode) when Tab /= schema ->
+ Current = val({current, db_nodes}),
+ Ns =
+ case lists:member(ToNode, Current) of
+ true -> Current -- [ToNode];
+ false -> Current
+ end,
+ remote_call(ToNode, block_table, [Tab]),
+ [remote_call(Node, add_active_replica, [Tab, ToNode, RemoteS, AccessMode]) ||
+ Node <- [ToNode | Ns]],
+ ok.
+
+sync_del_table_copy_whereabouts(Tab, ToNode) when Tab /= schema ->
+ Current = val({current, db_nodes}),
+ Ns =
+ case lists:member(ToNode, Current) of
+ true -> Current;
+ false -> [ToNode | Current]
+ end,
+ Args = [Tab, ToNode],
+ [remote_call(Node, unannounce_add_table_copy, Args) || Node <- Ns],
+ ok.
+
+get_info(Timeout) ->
+ case whereis(?SERVER_NAME) of
+ undefined ->
+ {timeout, Timeout};
+ Pid ->
+ Pid ! {self(), get_state},
+ receive
+ {?SERVER_NAME, State = #state{loader_queue=LQ,late_loader_queue=LLQ}} ->
+ {info,State#state{loader_queue=gb_trees:to_list(LQ),
+ late_loader_queue=gb_trees:to_list(LLQ)}}
+ after Timeout ->
+ {timeout, Timeout}
+ end
+ end.
+
+get_workers(Timeout) ->
+ case whereis(?SERVER_NAME) of
+ undefined ->
+ {timeout, Timeout};
+ Pid ->
+ Pid ! {self(), get_state},
+ receive
+ {?SERVER_NAME, State = #state{}} ->
+ {workers, get_loaders(State), get_senders(State), State#state.dumper_pid}
+ after Timeout ->
+ {timeout, Timeout}
+ end
+ end.
+
+info() ->
+ Tabs = mnesia_lib:local_active_tables(),
+ io:format( "---> Active tables <--- ~n", []),
+ info(Tabs).
+
+info([Tab | Tail]) ->
+ case val({Tab, storage_type}) of
+ disc_only_copies ->
+ info_format(Tab,
+ dets:info(Tab, size),
+ dets:info(Tab, file_size),
+ "bytes on disc");
+ _ ->
+ info_format(Tab,
+ ?ets_info(Tab, size),
+ ?ets_info(Tab, memory),
+ "words of mem")
+ end,
+ info(Tail);
+info([]) -> ok.
+
+
+info_format(Tab, Size, Mem, Media) ->
+ StrT = mnesia_lib:pad_name(atom_to_list(Tab), 15, []),
+ StrS = mnesia_lib:pad_name(integer_to_list(Size), 8, []),
+ StrM = mnesia_lib:pad_name(integer_to_list(Mem), 8, []),
+ io:format("~s: with ~s records occupying ~s ~s~n",
+ [StrT, StrS, StrM, Media]).
+
+%% Handle early arrived messages
+handle_early_msgs([Msg | Msgs], State) ->
+ %% The messages are in reverse order
+ case handle_early_msg(Msg, State) of
+%% {stop, Reason, Reply, State2} -> % Will not happen according to dialyzer
+%% {stop, Reason, Reply, State2};
+ {stop, Reason, State2} ->
+ {stop, Reason, State2};
+ {noreply, State2} ->
+ handle_early_msgs(Msgs, State2);
+ {reply, Reply, State2} ->
+ {call, _Call, From} = Msg,
+ reply(From, Reply),
+ handle_early_msgs(Msgs, State2)
+ end;
+handle_early_msgs([], State) ->
+ noreply(State).
+
+handle_early_msg({call, Msg, From}, State) ->
+ handle_call(Msg, From, State);
+handle_early_msg({cast, Msg}, State) ->
+ handle_cast(Msg, State);
+handle_early_msg({info, Msg}, State) ->
+ handle_info(Msg, State).
+
+noreply(State) ->
+ {noreply, State}.
+
+reply(undefined, Reply) ->
+ Reply;
+reply(ReplyTo, Reply) ->
+ gen_server:reply(ReplyTo, Reply),
+ Reply.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Worker management
+
+%% Returns new State
+add_worker(Worker = #dump_log{}, State) ->
+ InitBy = Worker#dump_log.initiated_by,
+ Queue = State#state.dumper_queue,
+ case lists:keymember(InitBy, #dump_log.initiated_by, Queue) of
+ true when Worker#dump_log.opt_reply_to == undefined ->
+ %% The same threshold has been exceeded again,
+ %% before we have had the possibility to
+ %% process the older one.
+ DetectedBy = {dump_log, InitBy},
+ Event = {mnesia_overload, DetectedBy},
+ mnesia_lib:report_system_event(Event);
+ _ ->
+ ignore
+ end,
+ Queue2 = Queue ++ [Worker],
+ State2 = State#state{dumper_queue = Queue2},
+ opt_start_worker(State2);
+add_worker(Worker = #schema_commit_lock{}, State) ->
+ Queue = State#state.dumper_queue,
+ Queue2 = Queue ++ [Worker],
+ State2 = State#state{dumper_queue = Queue2},
+ opt_start_worker(State2);
+add_worker(Worker = #net_load{}, State) ->
+ opt_start_worker(add_loader(Worker#net_load.table,Worker,State));
+add_worker(Worker = #send_table{}, State) ->
+ Queue = State#state.sender_queue,
+ State2 = State#state{sender_queue = Queue ++ [Worker]},
+ opt_start_worker(State2);
+add_worker(Worker = #disc_load{}, State) ->
+ opt_start_worker(add_loader(Worker#disc_load.table,Worker,State));
+% Block controller should be used for upgrading mnesia.
+add_worker(Worker = #block_controller{}, State) ->
+ Queue = State#state.dumper_queue,
+ Queue2 = [Worker | Queue],
+ State2 = State#state{dumper_queue = Queue2},
+ opt_start_worker(State2).
+
+add_loader(Tab,Worker,State = #state{loader_queue=LQ0}) ->
+ case gb_trees:is_defined(Tab, LQ0) of
+ true -> State;
+ false ->
+ LQ=gb_trees:insert(Tab, Worker, LQ0),
+ State#state{loader_queue=LQ}
+ end.
+
+%% Optionally start a worker
+%%
+%% Dumpers and loaders may run simultaneously
+%% but neither of them may run during schema commit.
+%% Loaders may not start if a schema commit is enqueued.
+opt_start_worker(State) when State#state.is_stopping == true ->
+ State;
+opt_start_worker(State) ->
+ %% Prioritize dumper and schema commit
+ %% by checking them first
+ case State#state.dumper_queue of
+ [Worker | _Rest] when State#state.dumper_pid == undefined ->
+ %% Great, a worker in queue and neither
+ %% a schema transaction is being
+ %% committed and nor a dumper is running
+
+ %% Start worker but keep him in the queue
+ if
+ is_record(Worker, schema_commit_lock) ->
+ ReplyTo = Worker#schema_commit_lock.owner,
+ reply(ReplyTo, granted),
+ {Owner, _Tag} = ReplyTo,
+ opt_start_loader(State#state{dumper_pid = Owner});
+
+ is_record(Worker, dump_log) ->
+ Pid = spawn_link(?MODULE, dump_and_reply, [self(), Worker]),
+ State2 = State#state{dumper_pid = Pid},
+
+ %% If the worker was a dumper we may
+ %% possibly be able to start a loader
+ %% or sender
+ State3 = opt_start_sender(State2),
+ opt_start_loader(State3);
+
+ is_record(Worker, block_controller) ->
+ case {get_senders(State), get_loaders(State)} of
+ {[], []} ->
+ ReplyTo = Worker#block_controller.owner,
+ reply(ReplyTo, granted),
+ {Owner, _Tag} = ReplyTo,
+ State#state{dumper_pid = Owner};
+ _ ->
+ State
+ end
+ end;
+ _ ->
+ %% Bad luck, try with a loader or sender instead
+ State2 = opt_start_sender(State),
+ opt_start_loader(State2)
+ end.
+
+opt_start_sender(State) ->
+ case State#state.sender_queue of
+ []-> State; %% No need
+ SenderQ ->
+ {NewS,Kept} = opt_start_sender2(SenderQ, get_senders(State),
+ [], get_loaders(State)),
+ State#state{sender_pid = NewS, sender_queue = Kept}
+ end.
+
+opt_start_sender2([], Pids,Kept, _) -> {Pids,Kept};
+opt_start_sender2([Sender|R], Pids, Kept, LoaderQ) ->
+ Tab = Sender#send_table.table,
+ Active = val({Tab, active_replicas}),
+ IgotIt = lists:member(node(), Active),
+ IsLoading = lists:any(fun({_Pid,Loader}) ->
+ Tab == element(#net_load.table, Loader)
+ end, LoaderQ),
+ if
+ IgotIt, IsLoading ->
+ %% I'm currently finishing loading the table let him wait
+ opt_start_sender2(R,Pids, [Sender|Kept], LoaderQ);
+ IgotIt ->
+ %% Start worker but keep him in the queue
+ Pid = spawn_link(?MODULE, send_and_reply,[self(), Sender]),
+ opt_start_sender2(R,[{Pid,Sender}|Pids],Kept,LoaderQ);
+ true ->
+ verbose("Send table failed ~p not active on this node ~n", [Tab]),
+ Sender#send_table.receiver_pid ! {copier_done, node()},
+ opt_start_sender2(R,Pids, Kept, LoaderQ)
+ end.
+
+opt_start_loader(State = #state{loader_queue = LoaderQ}) ->
+ Current = get_loaders(State),
+ Max = max_loaders(),
+ case gb_trees:is_empty(LoaderQ) of
+ true ->
+ State;
+ _ when length(Current) >= Max ->
+ State;
+ false ->
+ SchemaQueue = State#state.dumper_queue,
+ case lists:keymember(schema_commit_lock, 1, SchemaQueue) of
+ false ->
+ case pick_next(LoaderQ) of
+ {none,Rest} ->
+ State#state{loader_queue=Rest};
+ {Worker,Rest} ->
+ case already_loading(Worker, get_loaders(State)) of
+ true ->
+ opt_start_loader(State#state{loader_queue = Rest});
+ false ->
+ %% Start worker but keep him in the queue
+ Pid = load_and_reply(self(), Worker),
+ State#state{loader_pid=[{Pid,Worker}|get_loaders(State)],
+ loader_queue = Rest}
+ end
+ end;
+ true ->
+ %% Bad luck, we must wait for the schema commit
+ State
+ end
+ end.
+
+already_loading(#net_load{table=Tab},Loaders) ->
+ already_loading2(Tab,Loaders);
+already_loading(#disc_load{table=Tab},Loaders) ->
+ already_loading2(Tab,Loaders).
+
+already_loading2(Tab, [{_,#net_load{table=Tab}}|_]) -> true;
+already_loading2(Tab, [{_,#disc_load{table=Tab}}|_]) -> true;
+already_loading2(Tab, [_|Rest]) -> already_loading2(Tab,Rest);
+already_loading2(_,[]) -> false.
+
+start_remote_sender(Node, Tab, Receiver, Storage) ->
+ Msg = #send_table{table = Tab,
+ receiver_pid = Receiver,
+ remote_storage = Storage},
+ gen_server:cast({?SERVER_NAME, Node}, Msg).
+
+dump_and_reply(ReplyTo, Worker) ->
+ %% No trap_exit, die intentionally instead
+ Res = mnesia_dumper:opt_dump_log(Worker#dump_log.initiated_by),
+ ReplyTo ! #dumper_done{worker_pid = self(),
+ worker_res = Res},
+ unlink(ReplyTo),
+ exit(normal).
+
+send_and_reply(ReplyTo, Worker) ->
+ %% No trap_exit, die intentionally instead
+ Res = mnesia_loader:send_table(Worker#send_table.receiver_pid,
+ Worker#send_table.table,
+ Worker#send_table.remote_storage),
+ ReplyTo ! #sender_done{worker_pid = self(),
+ worker_res = Res},
+ unlink(ReplyTo),
+ exit(normal).
+
+load_and_reply(ReplyTo, Worker) ->
+ Load = load_table_fun(Worker),
+ SendAndReply =
+ fun() ->
+ process_flag(trap_exit, true),
+ Done = Load(),
+ ReplyTo ! Done#loader_done{worker_pid = self()},
+ unlink(ReplyTo),
+ exit(normal)
+ end,
+ spawn_link(SendAndReply).
+
+%% Now it is time to load the table
+%% but first we must check if it still is neccessary
+load_table_fun(#net_load{cstruct=Cs, table=Tab, reason=Reason, opt_reply_to=ReplyTo}) ->
+ LocalC = val({Tab, local_content}),
+ AccessMode = val({Tab, access_mode}),
+ ReadNode = val({Tab, where_to_read}),
+ Active = filter_active(Tab),
+ Done = #loader_done{is_loaded = true,
+ table_name = Tab,
+ needs_announce = false,
+ needs_sync = false,
+ needs_reply = (ReplyTo /= undefined),
+ reply_to = ReplyTo,
+ reply = {loaded, ok}
+ },
+ if
+ ReadNode == node() ->
+ %% Already loaded locally
+ fun() -> Done end;
+ LocalC == true ->
+ fun() ->
+ Res = mnesia_loader:disc_load_table(Tab, load_local_content),
+ Done#loader_done{reply = Res, needs_announce = true, needs_sync = true}
+ end;
+ AccessMode == read_only, Reason /= {dumper,add_table_copy} ->
+ fun() -> disc_load_table(Tab, Reason, ReplyTo) end;
+ true ->
+ fun() ->
+ %% Either we cannot read the table yet
+ %% or someone is moving a replica between
+ %% two nodes
+ Res = mnesia_loader:net_load_table(Tab, Reason, Active, Cs),
+ case Res of
+ {loaded, ok} ->
+ Done#loader_done{needs_sync = true,
+ reply = Res};
+ {not_loaded, _} ->
+ Done#loader_done{is_loaded = false,
+ reply = Res}
+ end
+ end
+ end;
+load_table_fun(#disc_load{table=Tab, reason=Reason, opt_reply_to=ReplyTo}) ->
+ ReadNode = val({Tab, where_to_read}),
+ Active = filter_active(Tab),
+ Done = #loader_done{is_loaded = true,
+ table_name = Tab,
+ needs_announce = false,
+ needs_sync = false,
+ needs_reply = false
+ },
+ if
+ Active == [], ReadNode == nowhere ->
+ %% Not loaded anywhere, lets load it from disc
+ fun() -> disc_load_table(Tab, Reason, ReplyTo) end;
+ ReadNode == nowhere ->
+ %% Already loaded on other node, lets get it
+ Cs = val({Tab, cstruct}),
+ fun() ->
+ case mnesia_loader:net_load_table(Tab, Reason, Active, Cs) of
+ {loaded, ok} ->
+ Done#loader_done{needs_sync = true};
+ {not_loaded, storage_unknown} ->
+ Done#loader_done{is_loaded = false};
+ {not_loaded, ErrReason} ->
+ Done#loader_done{is_loaded = false,
+ reply = {not_loaded,ErrReason}}
+ end
+ end;
+ true ->
+ %% Already readable, do not worry be happy
+ fun() -> Done end
+ end.
+
+disc_load_table(Tab, Reason, ReplyTo) ->
+ Done = #loader_done{is_loaded = true,
+ table_name = Tab,
+ needs_announce = false,
+ needs_sync = false,
+ needs_reply = ReplyTo /= undefined,
+ reply_to = ReplyTo,
+ reply = {loaded, ok}
+ },
+ Res = mnesia_loader:disc_load_table(Tab, Reason),
+ if
+ Res == {loaded, ok} ->
+ Done#loader_done{needs_announce = true,
+ needs_sync = true,
+ reply = Res};
+ ReplyTo /= undefined ->
+ Done#loader_done{is_loaded = false,
+ reply = Res};
+ true ->
+ fatal("Cannot load table ~p from disc: ~p~n", [Tab, Res])
+ end.
+
+filter_active(Tab) ->
+ ByForce = val({Tab, load_by_force}),
+ Active = val({Tab, active_replicas}),
+ Masters = mnesia_recover:get_master_nodes(Tab),
+ Ns = do_filter_active(ByForce, Active, Masters),
+ %% Reorder the so that we load from fastest first
+ LS = ?catch_val({Tab, storage_type}),
+ DOC = val({Tab, disc_only_copies}),
+ {Good,Worse} =
+ case LS of
+ disc_only_copies ->
+ G = mnesia_lib:intersect(Ns, DOC),
+ {G,Ns--G};
+ _ ->
+ G = Ns -- DOC,
+ {G,Ns--G}
+ end,
+ %% Pick a random node of the fastest
+ Len = length(Good),
+ if
+ Len > 0 ->
+ R = erlang:phash(node(), Len+1),
+ random(R-1,Good,Worse);
+ true ->
+ Worse
+ end.
+
+random(N, [H|R], Acc) when N > 0 ->
+ random(N-1,R, [H|Acc]);
+random(0, L, Acc) ->
+ L ++ Acc.
+
+do_filter_active(true, Active, _Masters) ->
+ Active;
+do_filter_active(false, Active, []) ->
+ Active;
+do_filter_active(false, Active, Masters) ->
+ mnesia_lib:intersect(Active, Masters).
+
+
diff --git a/lib/mnesia/src/mnesia_dumper.erl b/lib/mnesia/src/mnesia_dumper.erl
new file mode 100644
index 0000000000..f669d009c6
--- /dev/null
+++ b/lib/mnesia/src/mnesia_dumper.erl
@@ -0,0 +1,1218 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 1996-2009. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%
+
+%%
+-module(mnesia_dumper).
+
+%% The InitBy arg may be one of the following:
+%% scan_decisions Initial scan for decisions
+%% startup Initial dump during startup
+%% schema_prepare Dump initiated during schema transaction preparation
+%% schema_update Dump initiated during schema transaction commit
+%% fast_schema_update A schema_update, but ignores the log file
+%% user Dump initiated by user
+%% write_threshold Automatic dump caused by too many log writes
+%% time_threshold Automatic dump caused by timeout
+
+%% Public interface
+-export([
+ get_log_writes/0,
+ incr_log_writes/0,
+ raw_dump_table/2,
+ raw_named_dump_table/2,
+ start_regulator/0,
+ opt_dump_log/1,
+ update/3
+ ]).
+
+ %% Internal stuff
+-export([regulator_init/1]).
+
+-include("mnesia.hrl").
+-include_lib("kernel/include/file.hrl").
+
+-import(mnesia_lib, [fatal/2, dbg_out/2]).
+
+-define(REGULATOR_NAME, mnesia_dumper_load_regulator).
+-define(DumpToEtsMultiplier, 4).
+
+get_log_writes() ->
+ Max = mnesia_monitor:get_env(dump_log_write_threshold),
+ Prev = mnesia_lib:read_counter(trans_log_writes),
+ Left = mnesia_lib:read_counter(trans_log_writes_left),
+ Diff = Max - Left,
+ Prev + Diff.
+
+incr_log_writes() ->
+ Left = mnesia_lib:incr_counter(trans_log_writes_left, -1),
+ if
+ Left > 0 ->
+ ignore;
+ true ->
+ adjust_log_writes(true)
+ end.
+
+adjust_log_writes(DoCast) ->
+ Token = {mnesia_adjust_log_writes, self()},
+ case global:set_lock(Token, [node()], 1) of
+ false ->
+ ignore; %% Somebody else is sending a dump request
+ true ->
+ case DoCast of
+ false ->
+ ignore;
+ true ->
+ mnesia_controller:async_dump_log(write_threshold)
+ end,
+ Max = mnesia_monitor:get_env(dump_log_write_threshold),
+ Left = mnesia_lib:read_counter(trans_log_writes_left),
+ %% Don't care if we lost a few writes
+ mnesia_lib:set_counter(trans_log_writes_left, Max),
+ Diff = Max - Left,
+ mnesia_lib:incr_counter(trans_log_writes, Diff),
+ global:del_lock(Token, [node()])
+ end.
+
+%% Returns 'ok' or exits
+opt_dump_log(InitBy) ->
+ Reg = case whereis(?REGULATOR_NAME) of
+ undefined ->
+ nopid;
+ Pid when is_pid(Pid) ->
+ Pid
+ end,
+ perform_dump(InitBy, Reg).
+
+%% Scan for decisions
+perform_dump(InitBy, Regulator) when InitBy == scan_decisions ->
+ ?eval_debug_fun({?MODULE, perform_dump}, [InitBy]),
+
+ dbg_out("Transaction log dump initiated by ~w~n", [InitBy]),
+ scan_decisions(mnesia_log:previous_log_file(), InitBy, Regulator),
+ scan_decisions(mnesia_log:latest_log_file(), InitBy, Regulator);
+
+%% Propagate the log into the DAT-files
+perform_dump(InitBy, Regulator) ->
+ ?eval_debug_fun({?MODULE, perform_dump}, [InitBy]),
+ LogState = mnesia_log:prepare_log_dump(InitBy),
+ dbg_out("Transaction log dump initiated by ~w: ~w~n",
+ [InitBy, LogState]),
+ adjust_log_writes(false),
+ case LogState of
+ already_dumped ->
+ mnesia_recover:allow_garb(),
+ dumped;
+ {needs_dump, Diff} ->
+ U = mnesia_monitor:get_env(dump_log_update_in_place),
+ Cont = mnesia_log:init_log_dump(),
+ mnesia_recover:sync(),
+ case catch do_perform_dump(Cont, U, InitBy, Regulator, undefined) of
+ ok ->
+ ?eval_debug_fun({?MODULE, post_dump}, [InitBy]),
+ case mnesia_monitor:use_dir() of
+ true ->
+ mnesia_recover:dump_decision_tab();
+ false ->
+ mnesia_log:purge_some_logs()
+ end,
+ mnesia_recover:allow_garb(),
+ %% And now to the crucial point...
+ mnesia_log:confirm_log_dump(Diff);
+ {error, Reason} ->
+ {error, Reason};
+ {'EXIT', {Desc, Reason}} ->
+ case mnesia_monitor:get_env(auto_repair) of
+ true ->
+ mnesia_lib:important(Desc, Reason),
+ %% Ignore rest of the log
+ mnesia_log:confirm_log_dump(Diff);
+ false ->
+ fatal(Desc, Reason)
+ end
+ end;
+ {error, Reason} ->
+ {error, {"Cannot prepare log dump", Reason}}
+ end.
+
+scan_decisions(Fname, InitBy, Regulator) ->
+ Exists = mnesia_lib:exists(Fname),
+ case Exists of
+ false ->
+ ok;
+ true ->
+ Header = mnesia_log:trans_log_header(),
+ Name = previous_log,
+ mnesia_log:open_log(Name, Header, Fname, Exists,
+ mnesia_monitor:get_env(auto_repair), read_only),
+ Cont = start,
+ Res = (catch do_perform_dump(Cont, false, InitBy, Regulator, undefined)),
+ mnesia_log:close_log(Name),
+ case Res of
+ ok -> ok;
+ {'EXIT', Reason} -> {error, Reason}
+ end
+ end.
+
+do_perform_dump(Cont, InPlace, InitBy, Regulator, OldVersion) ->
+ case mnesia_log:chunk_log(Cont) of
+ {C2, Recs} ->
+ case catch insert_recs(Recs, InPlace, InitBy, Regulator, OldVersion) of
+ {'EXIT', R} ->
+ Reason = {"Transaction log dump error: ~p~n", [R]},
+ close_files(InPlace, {error, Reason}, InitBy),
+ exit(Reason);
+ Version ->
+ do_perform_dump(C2, InPlace, InitBy, Regulator, Version)
+ end;
+ eof ->
+ close_files(InPlace, ok, InitBy),
+ erase(mnesia_dumper_dets),
+ ok
+ end.
+
+insert_recs([Rec | Recs], InPlace, InitBy, Regulator, LogV) ->
+ regulate(Regulator),
+ case insert_rec(Rec, InPlace, InitBy, LogV) of
+ LogH when is_record(LogH, log_header) ->
+ insert_recs(Recs, InPlace, InitBy, Regulator, LogH#log_header.log_version);
+ _ ->
+ insert_recs(Recs, InPlace, InitBy, Regulator, LogV)
+ end;
+
+insert_recs([], _InPlace, _InitBy, _Regulator, Version) ->
+ Version.
+
+insert_rec(Rec, _InPlace, scan_decisions, _LogV) ->
+ if
+ is_record(Rec, commit) ->
+ ignore;
+ is_record(Rec, log_header) ->
+ ignore;
+ true ->
+ mnesia_recover:note_log_decision(Rec, scan_decisions)
+ end;
+insert_rec(Rec, InPlace, InitBy, LogV) when is_record(Rec, commit) ->
+ %% Determine the Outcome of the transaction and recover it
+ D = Rec#commit.decision,
+ case mnesia_recover:wait_for_decision(D, InitBy) of
+ {Tid, committed} ->
+ do_insert_rec(Tid, Rec, InPlace, InitBy, LogV);
+ {Tid, aborted} ->
+ mnesia_schema:undo_prepare_commit(Tid, Rec)
+ end;
+insert_rec(H, _InPlace, _InitBy, _LogV) when is_record(H, log_header) ->
+ CurrentVersion = mnesia_log:version(),
+ if
+ H#log_header.log_kind /= trans_log ->
+ exit({"Bad kind of transaction log", H});
+ H#log_header.log_version == CurrentVersion ->
+ ok;
+ H#log_header.log_version == "4.2" ->
+ ok;
+ H#log_header.log_version == "4.1" ->
+ ok;
+ H#log_header.log_version == "4.0" ->
+ ok;
+ true ->
+ fatal("Bad version of transaction log: ~p~n", [H])
+ end,
+ H;
+
+insert_rec(_Rec, _InPlace, _InitBy, _LogV) ->
+ ok.
+
+do_insert_rec(Tid, Rec, InPlace, InitBy, LogV) ->
+ case Rec#commit.schema_ops of
+ [] ->
+ ignore;
+ SchemaOps ->
+ case val({schema, storage_type}) of
+ ram_copies ->
+ insert_ops(Tid, schema_ops, SchemaOps, InPlace, InitBy, LogV);
+ Storage ->
+ true = open_files(schema, Storage, InPlace, InitBy),
+ insert_ops(Tid, schema_ops, SchemaOps, InPlace, InitBy, LogV)
+ end
+ end,
+ D = Rec#commit.disc_copies,
+ insert_ops(Tid, disc_copies, D, InPlace, InitBy, LogV),
+ case InitBy of
+ startup ->
+ DO = Rec#commit.disc_only_copies,
+ insert_ops(Tid, disc_only_copies, DO, InPlace, InitBy, LogV);
+ _ ->
+ ignore
+ end.
+
+
+update(_Tid, [], _DumperMode) ->
+ dumped;
+update(Tid, SchemaOps, DumperMode) ->
+ UseDir = mnesia_monitor:use_dir(),
+ Res = perform_update(Tid, SchemaOps, DumperMode, UseDir),
+ mnesia_controller:release_schema_commit_lock(),
+ Res.
+
+perform_update(_Tid, _SchemaOps, mandatory, true) ->
+ %% Force a dump of the transaction log in order to let the
+ %% dumper perform needed updates
+
+ InitBy = schema_update,
+ ?eval_debug_fun({?MODULE, dump_schema_op}, [InitBy]),
+ opt_dump_log(InitBy);
+perform_update(Tid, SchemaOps, _DumperMode, _UseDir) ->
+ %% No need for a full transaction log dump.
+ %% Ignore the log file and perform only perform
+ %% the corresponding updates.
+
+ InitBy = fast_schema_update,
+ InPlace = mnesia_monitor:get_env(dump_log_update_in_place),
+ ?eval_debug_fun({?MODULE, dump_schema_op}, [InitBy]),
+ case catch insert_ops(Tid, schema_ops, SchemaOps, InPlace, InitBy,
+ mnesia_log:version()) of
+ {'EXIT', Reason} ->
+ Error = {error, {"Schema update error", Reason}},
+ close_files(InPlace, Error, InitBy),
+ fatal("Schema update error ~p ~p", [Reason, SchemaOps]);
+ _ ->
+ ?eval_debug_fun({?MODULE, post_dump}, [InitBy]),
+ close_files(InPlace, ok, InitBy),
+ ok
+ end.
+
+insert_ops(_Tid, _Storage, [], _InPlace, _InitBy, _) -> ok;
+insert_ops(Tid, Storage, [Op], InPlace, InitBy, Ver) when Ver >= "4.3"->
+ insert_op(Tid, Storage, Op, InPlace, InitBy),
+ ok;
+insert_ops(Tid, Storage, [Op | Ops], InPlace, InitBy, Ver) when Ver >= "4.3"->
+ insert_op(Tid, Storage, Op, InPlace, InitBy),
+ insert_ops(Tid, Storage, Ops, InPlace, InitBy, Ver);
+insert_ops(Tid, Storage, [Op | Ops], InPlace, InitBy, Ver) when Ver < "4.3" ->
+ insert_ops(Tid, Storage, Ops, InPlace, InitBy, Ver),
+ insert_op(Tid, Storage, Op, InPlace, InitBy).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Normal ops
+
+disc_insert(_Tid, Storage, Tab, Key, Val, Op, InPlace, InitBy) ->
+ case open_files(Tab, Storage, InPlace, InitBy) of
+ true ->
+ case Storage of
+ disc_copies when Tab /= schema ->
+ mnesia_log:append({?MODULE,Tab}, {{Tab, Key}, Val, Op}),
+ ok;
+ _ ->
+ dets_insert(Op,Tab,Key,Val)
+ end;
+ false ->
+ ignore
+ end.
+
+%% To fix update_counter so that it behaves better.
+%% i.e. if nothing have changed in tab except update_counter
+%% trust that the value in the dets file is correct.
+%% Otherwise we will get a double increment.
+%% This is perfect but update_counter is a dirty op.
+
+dets_insert(Op,Tab,Key,Val) ->
+ case Op of
+ write ->
+ dets_updated(Tab,Key),
+ ok = dets:insert(Tab, Val);
+ delete ->
+ dets_updated(Tab,Key),
+ ok = dets:delete(Tab, Key);
+ update_counter ->
+ case dets_incr_counter(Tab,Key) of
+ true ->
+ {RecName, Incr} = Val,
+ case catch dets:update_counter(Tab, Key, Incr) of
+ CounterVal when is_integer(CounterVal) ->
+ ok;
+ _ when Incr < 0 ->
+ Zero = {RecName, Key, 0},
+ ok = dets:insert(Tab, Zero);
+ _ ->
+ Init = {RecName, Key, Incr},
+ ok = dets:insert(Tab, Init)
+ end;
+ false -> ok
+ end;
+ delete_object ->
+ dets_updated(Tab,Key),
+ ok = dets:delete_object(Tab, Val);
+ clear_table ->
+ dets_cleared(Tab),
+ ok = dets:match_delete(Tab, '_')
+ end.
+
+dets_updated(Tab,Key) ->
+ case get(mnesia_dumper_dets) of
+ undefined ->
+ Empty = gb_trees:empty(),
+ Tree = gb_trees:insert(Tab, gb_sets:singleton(Key), Empty),
+ put(mnesia_dumper_dets, Tree);
+ Tree ->
+ case gb_trees:lookup(Tab,Tree) of
+ {value, cleared} -> ignore;
+ {value, Set} ->
+ T = gb_trees:update(Tab, gb_sets:add(Key, Set), Tree),
+ put(mnesia_dumper_dets, T);
+ none ->
+ T = gb_trees:insert(Tab, gb_sets:singleton(Key), Tree),
+ put(mnesia_dumper_dets, T)
+ end
+ end.
+
+dets_incr_counter(Tab,Key) ->
+ case get(mnesia_dumper_dets) of
+ undefined -> false;
+ Tree ->
+ case gb_trees:lookup(Tab,Tree) of
+ {value, cleared} -> true;
+ {value, Set} -> gb_sets:is_member(Key, Set);
+ none -> false
+ end
+ end.
+
+dets_cleared(Tab) ->
+ case get(mnesia_dumper_dets) of
+ undefined ->
+ Empty = gb_trees:empty(),
+ Tree = gb_trees:insert(Tab, cleared, Empty),
+ put(mnesia_dumper_dets, Tree);
+ Tree ->
+ case gb_trees:lookup(Tab,Tree) of
+ {value, cleared} -> ignore;
+ _ ->
+ T = gb_trees:enter(Tab, cleared, Tree),
+ put(mnesia_dumper_dets, T)
+ end
+ end.
+
+insert(Tid, Storage, Tab, Key, [Val | Tail], Op, InPlace, InitBy) ->
+ insert(Tid, Storage, Tab, Key, Val, Op, InPlace, InitBy),
+ insert(Tid, Storage, Tab, Key, Tail, Op, InPlace, InitBy);
+
+insert(_Tid, _Storage, _Tab, _Key, [], _Op, _InPlace, _InitBy) ->
+ ok;
+
+insert(Tid, Storage, Tab, Key, Val, Op, InPlace, InitBy) ->
+ Item = {{Tab, Key}, Val, Op},
+ case InitBy of
+ startup ->
+ disc_insert(Tid, Storage, Tab, Key, Val, Op, InPlace, InitBy);
+
+ _ when Storage == ram_copies ->
+ mnesia_tm:do_update_op(Tid, Storage, Item),
+ Snmp = mnesia_tm:prepare_snmp(Tab, Key, [Item]),
+ mnesia_tm:do_snmp(Tid, Snmp);
+
+ _ when Storage == disc_copies ->
+ disc_insert(Tid, Storage, Tab, Key, Val, Op, InPlace, InitBy),
+ mnesia_tm:do_update_op(Tid, Storage, Item),
+ Snmp = mnesia_tm:prepare_snmp(Tab, Key, [Item]),
+ mnesia_tm:do_snmp(Tid, Snmp);
+
+ _ when Storage == disc_only_copies ->
+ mnesia_tm:do_update_op(Tid, Storage, Item),
+ Snmp = mnesia_tm:prepare_snmp(Tab, Key, [Item]),
+ mnesia_tm:do_snmp(Tid, Snmp);
+
+ _ when Storage == unknown ->
+ ignore
+ end.
+
+disc_delete_table(Tab, Storage) ->
+ case mnesia_monitor:use_dir() of
+ true ->
+ if
+ Storage == disc_only_copies; Tab == schema ->
+ mnesia_monitor:unsafe_close_dets(Tab),
+ Dat = mnesia_lib:tab2dat(Tab),
+ file:delete(Dat);
+ true ->
+ DclFile = mnesia_lib:tab2dcl(Tab),
+ case get({?MODULE,Tab}) of
+ {opened_dumper, dcl} ->
+ del_opened_tab(Tab),
+ mnesia_log:unsafe_close_log(Tab);
+ _ ->
+ ok
+ end,
+ file:delete(DclFile),
+ DcdFile = mnesia_lib:tab2dcd(Tab),
+ file:delete(DcdFile),
+ ok
+ end,
+ erase({?MODULE, Tab});
+ false ->
+ ignore
+ end.
+
+disc_delete_indecies(_Tab, _Cs, Storage) when Storage /= disc_only_copies ->
+ ignore;
+disc_delete_indecies(Tab, Cs, disc_only_copies) ->
+ Indecies = Cs#cstruct.index,
+ mnesia_index:del_transient(Tab, Indecies, disc_only_copies).
+
+insert_op(Tid, Storage, {{Tab, Key}, Val, Op}, InPlace, InitBy) ->
+ %% Propagate to disc only
+ disc_insert(Tid, Storage, Tab, Key, Val, Op, InPlace, InitBy);
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% NOTE that all operations below will only
+%% be performed if the dump is initiated by
+%% startup or fast_schema_update
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+insert_op(_Tid, schema_ops, _OP, _InPlace, Initby)
+ when Initby /= startup,
+ Initby /= fast_schema_update,
+ Initby /= schema_update ->
+ ignore;
+
+insert_op(Tid, _, {op, rec, Storage, Item}, InPlace, InitBy) ->
+ {{Tab, Key}, ValList, Op} = Item,
+ insert(Tid, Storage, Tab, Key, ValList, Op, InPlace, InitBy);
+
+insert_op(Tid, _, {op, change_table_copy_type, N, FromS, ToS, TabDef}, InPlace, InitBy) ->
+ Cs = mnesia_schema:list2cs(TabDef),
+ Val = mnesia_schema:insert_cstruct(Tid, Cs, true), % Update ram only
+ {schema, Tab, _} = Val,
+ case lists:member(N, val({current, db_nodes})) of
+ true when InitBy /= startup ->
+ mnesia_controller:add_active_replica(Tab, N, Cs);
+ _ ->
+ ignore
+ end,
+ if
+ N == node() ->
+ Dmp = mnesia_lib:tab2dmp(Tab),
+ Dat = mnesia_lib:tab2dat(Tab),
+ Dcd = mnesia_lib:tab2dcd(Tab),
+ Dcl = mnesia_lib:tab2dcl(Tab),
+ case {FromS, ToS} of
+ {ram_copies, disc_copies} when Tab == schema ->
+ ok = ensure_rename(Dmp, Dat);
+ {ram_copies, disc_copies} ->
+ file:delete(Dcl),
+ ok = ensure_rename(Dmp, Dcd);
+ {disc_copies, ram_copies} when Tab == schema ->
+ mnesia_lib:set(use_dir, false),
+ mnesia_monitor:unsafe_close_dets(Tab),
+ file:delete(Dat);
+ {disc_copies, ram_copies} ->
+ file:delete(Dcl),
+ file:delete(Dcd);
+ {ram_copies, disc_only_copies} ->
+ ok = ensure_rename(Dmp, Dat),
+ true = open_files(Tab, disc_only_copies, InPlace, InitBy),
+ %% ram_delete_table must be done before init_indecies,
+ %% it uses info which is reset in init_indecies,
+ %% it doesn't matter, because init_indecies don't use
+ %% the ram replica of the table when creating the disc
+ %% index; Could be improved :)
+ mnesia_schema:ram_delete_table(Tab, FromS),
+ PosList = Cs#cstruct.index,
+ mnesia_index:init_indecies(Tab, disc_only_copies, PosList);
+ {disc_only_copies, ram_copies} ->
+ mnesia_monitor:unsafe_close_dets(Tab),
+ disc_delete_indecies(Tab, Cs, disc_only_copies),
+ case InitBy of
+ startup ->
+ ignore;
+ _ ->
+ mnesia_controller:get_disc_copy(Tab)
+ end,
+ disc_delete_table(Tab, disc_only_copies);
+ {disc_copies, disc_only_copies} ->
+ ok = ensure_rename(Dmp, Dat),
+ true = open_files(Tab, disc_only_copies, InPlace, InitBy),
+ mnesia_schema:ram_delete_table(Tab, FromS),
+ PosList = Cs#cstruct.index,
+ mnesia_index:init_indecies(Tab, disc_only_copies, PosList),
+ file:delete(Dcl),
+ file:delete(Dcd);
+ {disc_only_copies, disc_copies} ->
+ mnesia_monitor:unsafe_close_dets(Tab),
+ disc_delete_indecies(Tab, Cs, disc_only_copies),
+ case InitBy of
+ startup ->
+ ignore;
+ _ ->
+ mnesia_log:ets2dcd(Tab),
+ mnesia_controller:get_disc_copy(Tab),
+ disc_delete_table(Tab, disc_only_copies)
+ end
+ end;
+ true ->
+ ignore
+ end,
+ S = val({schema, storage_type}),
+ disc_insert(Tid, S, schema, Tab, Val, write, InPlace, InitBy);
+
+insert_op(Tid, _, {op, transform, _Fun, TabDef}, InPlace, InitBy) ->
+ Cs = mnesia_schema:list2cs(TabDef),
+ case mnesia_lib:cs_to_storage_type(node(), Cs) of
+ disc_copies ->
+ open_dcl(Cs#cstruct.name);
+ _ ->
+ ignore
+ end,
+ insert_cstruct(Tid, Cs, true, InPlace, InitBy);
+
+%%% Operations below this are handled without using the logg.
+
+insert_op(Tid, _, {op, restore_recreate, TabDef}, InPlace, InitBy) ->
+ Cs = mnesia_schema:list2cs(TabDef),
+ Tab = Cs#cstruct.name,
+ Type = Cs#cstruct.type,
+ Storage = mnesia_lib:cs_to_storage_type(node(), Cs),
+ %% Delete all possibly existing files and tables
+ disc_delete_table(Tab, Storage),
+ disc_delete_indecies(Tab, Cs, Storage),
+ case InitBy of
+ startup ->
+ ignore;
+ _ ->
+ case ?catch_val({Tab, cstruct}) of
+ {'EXIT', _} -> ignore;
+ _ ->
+ mnesia_schema:ram_delete_table(Tab, Storage),
+ mnesia_checkpoint:tm_del_copy(Tab, node())
+ end
+ end,
+ %% And create new ones..
+ if
+ (InitBy == startup) or (Storage == unknown) ->
+ ignore;
+ Storage == ram_copies ->
+ Args = [{keypos, 2}, public, named_table, Type],
+ mnesia_monitor:mktab(Tab, Args);
+ Storage == disc_copies ->
+ Args = [{keypos, 2}, public, named_table, Type],
+ mnesia_monitor:mktab(Tab, Args),
+ File = mnesia_lib:tab2dcd(Tab),
+ FArg = [{file, File}, {name, {mnesia,create}},
+ {repair, false}, {mode, read_write}],
+ {ok, Log} = mnesia_monitor:open_log(FArg),
+ mnesia_monitor:unsafe_close_log(Log);
+ Storage == disc_only_copies ->
+ File = mnesia_lib:tab2dat(Tab),
+ file:delete(File),
+ Args = [{file, mnesia_lib:tab2dat(Tab)},
+ {type, mnesia_lib:disk_type(Tab, Type)},
+ {keypos, 2},
+ {repair, mnesia_monitor:get_env(auto_repair)}],
+ mnesia_monitor:open_dets(Tab, Args)
+ end,
+ insert_op(Tid, ignore, {op, create_table, TabDef}, InPlace, InitBy);
+
+insert_op(Tid, _, {op, create_table, TabDef}, InPlace, InitBy) ->
+ Cs = mnesia_schema:list2cs(TabDef),
+ insert_cstruct(Tid, Cs, false, InPlace, InitBy),
+ Tab = Cs#cstruct.name,
+ Storage = mnesia_lib:cs_to_storage_type(node(), Cs),
+ case InitBy of
+ startup ->
+ case Storage of
+ unknown ->
+ ignore;
+ ram_copies ->
+ ignore;
+ disc_copies ->
+ Dcd = mnesia_lib:tab2dcd(Tab),
+ case mnesia_lib:exists(Dcd) of
+ true -> ignore;
+ false ->
+ mnesia_log:open_log(temp,
+ mnesia_log:dcl_log_header(),
+ Dcd,
+ false,
+ false,
+ read_write),
+ mnesia_log:unsafe_close_log(temp)
+ end;
+ _ ->
+ Args = [{file, mnesia_lib:tab2dat(Tab)},
+ {type, mnesia_lib:disk_type(Tab, Cs#cstruct.type)},
+ {keypos, 2},
+ {repair, mnesia_monitor:get_env(auto_repair)}],
+ case mnesia_monitor:open_dets(Tab, Args) of
+ {ok, _} ->
+ mnesia_monitor:unsafe_close_dets(Tab);
+ {error, Error} ->
+ exit({"Failed to create dets table", Error})
+ end
+ end;
+ _ ->
+ Copies = mnesia_lib:copy_holders(Cs),
+ Active = mnesia_lib:intersect(Copies, val({current, db_nodes})),
+ [mnesia_controller:add_active_replica(Tab, N, Cs) || N <- Active],
+
+ case Storage of
+ unknown ->
+ mnesia_lib:unset({Tab, create_table}),
+ case Cs#cstruct.local_content of
+ true ->
+ ignore;
+ false ->
+ mnesia_lib:set_remote_where_to_read(Tab)
+ end;
+ _ ->
+ case Cs#cstruct.local_content of
+ true ->
+ mnesia_lib:set_local_content_whereabouts(Tab);
+ false ->
+ mnesia_lib:set({Tab, where_to_read}, node())
+ end,
+ case Storage of
+ ram_copies ->
+ ignore;
+ _ ->
+ %% Indecies are still created by loader
+ disc_delete_indecies(Tab, Cs, Storage)
+ %% disc_delete_table(Tab, Storage)
+ end,
+
+ %% Update whereabouts and create table
+ mnesia_controller:create_table(Tab),
+ mnesia_lib:unset({Tab, create_table})
+ end
+ end;
+
+insert_op(_Tid, _, {op, dump_table, Size, TabDef}, _InPlace, _InitBy) ->
+ case Size of
+ unknown ->
+ ignore;
+ _ ->
+ Cs = mnesia_schema:list2cs(TabDef),
+ Tab = Cs#cstruct.name,
+ Dmp = mnesia_lib:tab2dmp(Tab),
+ Dat = mnesia_lib:tab2dcd(Tab),
+ case Size of
+ 0 ->
+ %% Assume that table files already are closed
+ file:delete(Dmp),
+ file:delete(Dat);
+ _ ->
+ ok = ensure_rename(Dmp, Dat)
+ end
+ end;
+
+insert_op(Tid, _, {op, delete_table, TabDef}, InPlace, InitBy) ->
+ Cs = mnesia_schema:list2cs(TabDef),
+ Tab = Cs#cstruct.name,
+ case mnesia_lib:cs_to_storage_type(node(), Cs) of
+ unknown ->
+ ignore;
+ Storage ->
+ disc_delete_table(Tab, Storage),
+ disc_delete_indecies(Tab, Cs, Storage),
+ case InitBy of
+ startup ->
+ ignore;
+ _ ->
+ mnesia_schema:ram_delete_table(Tab, Storage),
+ mnesia_checkpoint:tm_del_copy(Tab, node())
+ end
+ end,
+ delete_cstruct(Tid, Cs, InPlace, InitBy);
+
+insert_op(Tid, _, {op, clear_table, TabDef}, InPlace, InitBy) ->
+ Cs = mnesia_schema:list2cs(TabDef),
+ Tab = Cs#cstruct.name,
+ case mnesia_lib:cs_to_storage_type(node(), Cs) of
+ unknown ->
+ ignore;
+ Storage ->
+ Oid = '_', %%val({Tab, wild_pattern}),
+ if Storage == disc_copies ->
+ open_dcl(Cs#cstruct.name);
+ true ->
+ ignore
+ end,
+ %% Need to catch this, it crashes on ram_copies if
+ %% the op comes before table is loaded at startup.
+ catch insert(Tid, Storage, Tab, '_', Oid, clear_table, InPlace, InitBy)
+ end;
+
+insert_op(Tid, _, {op, merge_schema, TabDef}, InPlace, InitBy) ->
+ Cs = mnesia_schema:list2cs(TabDef),
+ case Cs#cstruct.name of
+ schema ->
+ %% If we bootstrap an empty (diskless) mnesia from another node
+ %% we might have changed the storage_type of schema.
+ %% I think this is a good place to do it.
+ Update = fun(NS = {Node,Storage}) ->
+ case mnesia_lib:cs_to_storage_type(Node, Cs) of
+ Storage -> NS;
+ disc_copies when Node == node() ->
+ Dir = mnesia_lib:dir(),
+ ok = mnesia_schema:opt_create_dir(true, Dir),
+ mnesia_schema:purge_dir(Dir, []),
+ mnesia_log:purge_all_logs(),
+
+ mnesia_lib:set(use_dir, true),
+ mnesia_log:init(),
+ Ns = val({current, db_nodes}),
+ F = fun(U) -> mnesia_recover:log_mnesia_up(U) end,
+ lists:foreach(F, Ns),
+ raw_named_dump_table(schema, dat),
+ temp_set_master_nodes(),
+ {Node,disc_copies};
+ CSstorage ->
+ {Node,CSstorage}
+ end
+ end,
+
+ W2C0 = val({schema, where_to_commit}),
+ W2C = case W2C0 of
+ {blocked, List} ->
+ {blocked,lists:map(Update,List)};
+ List ->
+ lists:map(Update,List)
+ end,
+ if W2C == W2C0 -> ignore;
+ true -> mnesia_lib:set({schema, where_to_commit}, W2C)
+ end;
+ _ ->
+ ignore
+ end,
+ insert_cstruct(Tid, Cs, false, InPlace, InitBy);
+
+insert_op(Tid, _, {op, del_table_copy, Storage, Node, TabDef}, InPlace, InitBy) ->
+ Cs = mnesia_schema:list2cs(TabDef),
+ Tab = Cs#cstruct.name,
+ if
+ Tab == schema, Storage == ram_copies ->
+ insert_cstruct(Tid, Cs, true, InPlace, InitBy);
+ Tab /= schema ->
+ mnesia_controller:del_active_replica(Tab, Node),
+ mnesia_lib:del({Tab, Storage}, Node),
+ if
+ Node == node() ->
+ case Cs#cstruct.local_content of
+ true -> mnesia_lib:set({Tab, where_to_read}, nowhere);
+ false -> mnesia_lib:set_remote_where_to_read(Tab)
+ end,
+ mnesia_lib:del({schema, local_tables}, Tab),
+ mnesia_lib:set({Tab, storage_type}, unknown),
+ insert_cstruct(Tid, Cs, true, InPlace, InitBy),
+ disc_delete_table(Tab, Storage),
+ disc_delete_indecies(Tab, Cs, Storage),
+ mnesia_schema:ram_delete_table(Tab, Storage),
+ mnesia_checkpoint:tm_del_copy(Tab, Node);
+ true ->
+ case val({Tab, where_to_read}) of
+ Node ->
+ mnesia_lib:set_remote_where_to_read(Tab);
+ _ ->
+ ignore
+ end,
+ insert_cstruct(Tid, Cs, true, InPlace, InitBy)
+ end
+ end;
+
+insert_op(Tid, _, {op, add_table_copy, _Storage, _Node, TabDef}, InPlace, InitBy) ->
+ %% During prepare commit, the files was created
+ %% and the replica was announced
+ Cs = mnesia_schema:list2cs(TabDef),
+ insert_cstruct(Tid, Cs, true, InPlace, InitBy);
+
+insert_op(Tid, _, {op, add_snmp, _Us, TabDef}, InPlace, InitBy) ->
+ Cs = mnesia_schema:list2cs(TabDef),
+ insert_cstruct(Tid, Cs, true, InPlace, InitBy);
+
+insert_op(Tid, _, {op, del_snmp, TabDef}, InPlace, InitBy) ->
+ Cs = mnesia_schema:list2cs(TabDef),
+ Tab = Cs#cstruct.name,
+ Storage = mnesia_lib:cs_to_storage_type(node(), Cs),
+ if
+ InitBy /= startup,
+ Storage /= unknown ->
+ case ?catch_val({Tab, {index, snmp}}) of
+ {'EXIT', _} ->
+ ignore;
+ Stab ->
+ mnesia_snmp_hook:delete_table(Tab, Stab),
+ mnesia_lib:unset({Tab, {index, snmp}})
+ end;
+ true ->
+ ignore
+ end,
+ insert_cstruct(Tid, Cs, true, InPlace, InitBy);
+
+insert_op(Tid, _, {op, add_index, Pos, TabDef}, InPlace, InitBy) ->
+ Cs = mnesia_schema:list2cs(TabDef),
+ Tab = insert_cstruct(Tid, Cs, true, InPlace, InitBy),
+ Storage = mnesia_lib:cs_to_storage_type(node(), Cs),
+ case InitBy of
+ startup when Storage == disc_only_copies ->
+ true = open_files(Tab, Storage, InPlace, InitBy),
+ mnesia_index:init_indecies(Tab, Storage, [Pos]);
+ startup ->
+ ignore;
+ _ ->
+ mnesia_index:init_indecies(Tab, Storage, [Pos])
+ end;
+
+insert_op(Tid, _, {op, del_index, Pos, TabDef}, InPlace, InitBy) ->
+ Cs = mnesia_schema:list2cs(TabDef),
+ Tab = Cs#cstruct.name,
+ Storage = mnesia_lib:cs_to_storage_type(node(), Cs),
+ case InitBy of
+ startup when Storage == disc_only_copies ->
+ mnesia_index:del_index_table(Tab, Storage, Pos);
+ startup ->
+ ignore;
+ _ ->
+ mnesia_index:del_index_table(Tab, Storage, Pos)
+ end,
+ insert_cstruct(Tid, Cs, true, InPlace, InitBy);
+
+insert_op(Tid, _, {op, change_table_access_mode,TabDef, _OldAccess, _Access}, InPlace, InitBy) ->
+ Cs = mnesia_schema:list2cs(TabDef),
+ case InitBy of
+ startup -> ignore;
+ _ -> mnesia_controller:change_table_access_mode(Cs)
+ end,
+ insert_cstruct(Tid, Cs, true, InPlace, InitBy);
+
+insert_op(Tid, _, {op, change_table_load_order, TabDef, _OldLevel, _Level}, InPlace, InitBy) ->
+ Cs = mnesia_schema:list2cs(TabDef),
+ insert_cstruct(Tid, Cs, true, InPlace, InitBy);
+
+insert_op(Tid, _, {op, delete_property, TabDef, PropKey}, InPlace, InitBy) ->
+ Cs = mnesia_schema:list2cs(TabDef),
+ Tab = Cs#cstruct.name,
+ mnesia_lib:unset({Tab, user_property, PropKey}),
+ insert_cstruct(Tid, Cs, true, InPlace, InitBy);
+
+insert_op(Tid, _, {op, write_property, TabDef, _Prop}, InPlace, InitBy) ->
+ Cs = mnesia_schema:list2cs(TabDef),
+ insert_cstruct(Tid, Cs, true, InPlace, InitBy);
+
+insert_op(Tid, _, {op, change_table_frag, _Change, TabDef}, InPlace, InitBy) ->
+ Cs = mnesia_schema:list2cs(TabDef),
+ insert_cstruct(Tid, Cs, true, InPlace, InitBy).
+
+open_files(Tab, Storage, UpdateInPlace, InitBy)
+ when Storage /= unknown, Storage /= ram_copies ->
+ case get({?MODULE, Tab}) of
+ undefined ->
+ case ?catch_val({Tab, setorbag}) of
+ {'EXIT', _} ->
+ false;
+ Type ->
+ case Storage of
+ disc_copies when Tab /= schema ->
+ Bool = open_disc_copies(Tab, InitBy),
+ Bool;
+ _ ->
+ Fname = prepare_open(Tab, UpdateInPlace),
+ Args = [{file, Fname},
+ {keypos, 2},
+ {repair, mnesia_monitor:get_env(auto_repair)},
+ {type, mnesia_lib:disk_type(Tab, Type)}],
+ {ok, _} = mnesia_monitor:open_dets(Tab, Args),
+ put({?MODULE, Tab}, {opened_dumper, dat}),
+ true
+ end
+ end;
+ already_dumped ->
+ false;
+ {opened_dumper, _} ->
+ true
+ end;
+open_files(_Tab, _Storage, _UpdateInPlace, _InitBy) ->
+ false.
+
+open_disc_copies(Tab, InitBy) ->
+ DclF = mnesia_lib:tab2dcl(Tab),
+ DumpEts =
+ case file:read_file_info(DclF) of
+ {error, enoent} ->
+ false;
+ {ok, DclInfo} ->
+ DcdF = mnesia_lib:tab2dcd(Tab),
+ case file:read_file_info(DcdF) of
+ {error, Reason} ->
+ mnesia_lib:dbg_out("File ~p info_error ~p ~n",
+ [DcdF, Reason]),
+ true;
+ {ok, DcdInfo} ->
+ Mul = case ?catch_val(dc_dump_limit) of
+ {'EXIT', _} -> ?DumpToEtsMultiplier;
+ Val -> Val
+ end,
+ DcdInfo#file_info.size =< (DclInfo#file_info.size * Mul)
+ end
+ end,
+ if
+ DumpEts == false; InitBy == startup ->
+ mnesia_log:open_log({?MODULE,Tab},
+ mnesia_log:dcl_log_header(),
+ DclF,
+ mnesia_lib:exists(DclF),
+ mnesia_monitor:get_env(auto_repair),
+ read_write),
+ put({?MODULE, Tab}, {opened_dumper, dcl}),
+ true;
+ true ->
+ mnesia_log:ets2dcd(Tab),
+ put({?MODULE, Tab}, already_dumped),
+ false
+ end.
+
+%% Always opens the dcl file for writing overriding already_dumped
+%% mechanismen, used for schema transactions.
+open_dcl(Tab) ->
+ case get({?MODULE, Tab}) of
+ {opened_dumper, _} ->
+ true;
+ _ -> %% undefined or already_dumped
+ DclF = mnesia_lib:tab2dcl(Tab),
+ mnesia_log:open_log({?MODULE,Tab},
+ mnesia_log:dcl_log_header(),
+ DclF,
+ mnesia_lib:exists(DclF),
+ mnesia_monitor:get_env(auto_repair),
+ read_write),
+ put({?MODULE, Tab}, {opened_dumper, dcl}),
+ true
+ end.
+
+prepare_open(Tab, UpdateInPlace) ->
+ Dat = mnesia_lib:tab2dat(Tab),
+ case UpdateInPlace of
+ true ->
+ Dat;
+ false ->
+ Tmp = mnesia_lib:tab2tmp(Tab),
+ case catch mnesia_lib:copy_file(Dat, Tmp) of
+ ok ->
+ Tmp;
+ Error ->
+ fatal("Cannot copy dets file ~p to ~p: ~p~n",
+ [Dat, Tmp, Error])
+ end
+ end.
+
+del_opened_tab(Tab) ->
+ erase({?MODULE, Tab}).
+
+close_files(UpdateInPlace, Outcome, InitBy) -> % Update in place
+ close_files(UpdateInPlace, Outcome, InitBy, get()).
+
+close_files(InPlace, Outcome, InitBy, [{{?MODULE, Tab}, already_dumped} | Tail]) ->
+ erase({?MODULE, Tab}),
+ close_files(InPlace, Outcome, InitBy, Tail);
+close_files(InPlace, Outcome, InitBy, [{{?MODULE, Tab}, {opened_dumper, Type}} | Tail]) ->
+ erase({?MODULE, Tab}),
+ case val({Tab, storage_type}) of
+ disc_only_copies when InitBy /= startup ->
+ ignore;
+ disc_copies when Tab /= schema ->
+ mnesia_log:close_log({?MODULE,Tab});
+ Storage ->
+ do_close(InPlace, Outcome, Tab, Type, Storage)
+ end,
+ close_files(InPlace, Outcome, InitBy, Tail);
+
+close_files(InPlace, Outcome, InitBy, [_ | Tail]) ->
+ close_files(InPlace, Outcome, InitBy, Tail);
+close_files(_, _, _InitBy, []) ->
+ ok.
+
+%% If storage is unknown during close clean up files, this can happen if timing
+%% is right and dirty_write conflicts with schema operations.
+do_close(_, _, Tab, dcl, unknown) ->
+ mnesia_log:close_log({?MODULE,Tab}),
+ file:delete(mnesia_lib:tab2dcl(Tab));
+do_close(_, _, Tab, dcl, _) -> %% To be safe, can it happen?
+ mnesia_log:close_log({?MODULE,Tab});
+
+do_close(InPlace, Outcome, Tab, dat, Storage) ->
+ mnesia_monitor:close_dets(Tab),
+ if
+ Storage == unknown, InPlace == true ->
+ file:delete(mnesia_lib:tab2dat(Tab));
+ InPlace == true ->
+ %% Update in place
+ ok;
+ Outcome == ok, Storage /= unknown ->
+ %% Success: swap tmp files with dat files
+ TabDat = mnesia_lib:tab2dat(Tab),
+ ok = file:rename(mnesia_lib:tab2tmp(Tab), TabDat);
+ true ->
+ file:delete(mnesia_lib:tab2tmp(Tab))
+ end.
+
+
+ensure_rename(From, To) ->
+ case mnesia_lib:exists(From) of
+ true ->
+ file:rename(From, To);
+ false ->
+ case mnesia_lib:exists(To) of
+ true ->
+ ok;
+ false ->
+ {error, {rename_failed, From, To}}
+ end
+ end.
+
+insert_cstruct(Tid, Cs, KeepWhereabouts, InPlace, InitBy) ->
+ Val = mnesia_schema:insert_cstruct(Tid, Cs, KeepWhereabouts),
+ {schema, Tab, _} = Val,
+ S = val({schema, storage_type}),
+ disc_insert(Tid, S, schema, Tab, Val, write, InPlace, InitBy),
+ Tab.
+
+delete_cstruct(Tid, Cs, InPlace, InitBy) ->
+ Val = mnesia_schema:delete_cstruct(Tid, Cs),
+ {schema, Tab, _} = Val,
+ S = val({schema, storage_type}),
+ disc_insert(Tid, S, schema, Tab, Val, delete, InPlace, InitBy),
+ Tab.
+
+
+temp_set_master_nodes() ->
+ Tabs = val({schema, local_tables}),
+ Masters = [{Tab, (val({Tab, disc_copies}) ++
+ val({Tab, ram_copies}) ++
+ val({Tab, disc_only_copies})) -- [node()]}
+ || Tab <- Tabs],
+ %% UseDir = false since we don't want to remember these
+ %% masternodes and we are running (really soon anyway) since we want this
+ %% to be known during table loading.
+ mnesia_recover:log_master_nodes(Masters, false, yes),
+ ok.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Raw dump of table. Dumper must have unique access to the ets table.
+
+raw_named_dump_table(Tab, Ftype) ->
+ case mnesia_monitor:use_dir() of
+ true ->
+ mnesia_lib:lock_table(Tab),
+ TmpFname = mnesia_lib:tab2tmp(Tab),
+ Fname =
+ case Ftype of
+ dat -> mnesia_lib:tab2dat(Tab);
+ dmp -> mnesia_lib:tab2dmp(Tab)
+ end,
+ file:delete(TmpFname),
+ file:delete(Fname),
+ TabSize = ?ets_info(Tab, size),
+ TabRef = Tab,
+ DiskType = mnesia_lib:disk_type(Tab),
+ Args = [{file, TmpFname},
+ {keypos, 2},
+ %% {ram_file, true},
+ {estimated_no_objects, TabSize + 256},
+ {repair, mnesia_monitor:get_env(auto_repair)},
+ {type, DiskType}],
+ case mnesia_lib:dets_sync_open(TabRef, Args) of
+ {ok, TabRef} ->
+ Storage = ram_copies,
+ mnesia_lib:db_fixtable(Storage, Tab, true),
+
+ case catch raw_dump_table(TabRef, Tab) of
+ {'EXIT', Reason} ->
+ mnesia_lib:db_fixtable(Storage, Tab, false),
+ mnesia_lib:dets_sync_close(Tab),
+ file:delete(TmpFname),
+ mnesia_lib:unlock_table(Tab),
+ exit({"Dump of table to disc failed", Reason});
+ ok ->
+ mnesia_lib:db_fixtable(Storage, Tab, false),
+ mnesia_lib:dets_sync_close(Tab),
+ mnesia_lib:unlock_table(Tab),
+ ok = file:rename(TmpFname, Fname)
+ end;
+ {error, Reason} ->
+ mnesia_lib:unlock_table(Tab),
+ exit({"Open of file before dump to disc failed", Reason})
+ end;
+ false ->
+ exit({has_no_disc, node()})
+ end.
+
+raw_dump_table(DetsRef, EtsRef) ->
+ dets:from_ets(DetsRef, EtsRef).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Load regulator
+%%
+%% This is a poor mans substitute for a fair scheduler algorithm
+%% in the Erlang emulator. The mnesia_dumper process performs many
+%% costly BIF invokations and must pay for this. But since the
+%% Emulator does not handle this properly we must compensate for
+%% this with some form of load regulation of ourselves in order to
+%% not steal all computation power in the Erlang Emulator ans make
+%% other processes starve. Hopefully this is a temporary solution.
+
+start_regulator() ->
+ case mnesia_monitor:get_env(dump_log_load_regulation) of
+ false ->
+ nopid;
+ true ->
+ N = ?REGULATOR_NAME,
+ case mnesia_monitor:start_proc(N, ?MODULE, regulator_init, [self()]) of
+ {ok, Pid} ->
+ Pid;
+ {error, Reason} ->
+ fatal("Failed to start ~n: ~p~n", [N, Reason])
+ end
+ end.
+
+regulator_init(Parent) ->
+ %% No need for trapping exits.
+ %% Using low priority causes the regulation
+ process_flag(priority, low),
+ register(?REGULATOR_NAME, self()),
+ proc_lib:init_ack(Parent, {ok, self()}),
+ regulator_loop().
+
+regulator_loop() ->
+ receive
+ {regulate, From} ->
+ From ! {regulated, self()},
+ regulator_loop();
+ {stop, From} ->
+ From ! {stopped, self()},
+ exit(normal)
+ end.
+
+regulate(nopid) ->
+ ok;
+regulate(RegulatorPid) ->
+ RegulatorPid ! {regulate, self()},
+ receive
+ {regulated, RegulatorPid} -> ok
+ end.
+
+val(Var) ->
+ case ?catch_val(Var) of
+ {'EXIT', Reason} -> mnesia_lib:other_val(Var, Reason);
+ Value -> Value
+ end.
diff --git a/lib/mnesia/src/mnesia_event.erl b/lib/mnesia/src/mnesia_event.erl
new file mode 100644
index 0000000000..ec6b99ecaa
--- /dev/null
+++ b/lib/mnesia/src/mnesia_event.erl
@@ -0,0 +1,260 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 1997-2009. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%
+
+%%
+-module(mnesia_event).
+
+-behaviour(gen_event).
+%-behaviour(mnesia_event).
+
+%% gen_event callback interface
+-export([init/1,
+ handle_event/2,
+ handle_call/2,
+ handle_info/2,
+ terminate/2,
+ code_change/3]).
+
+-record(state, {nodes = [],
+ dumped_core = false, %% only dump fatal core once
+ args}).
+
+%%%----------------------------------------------------------------
+%%% Callback functions from gen_server
+%%%----------------------------------------------------------------
+
+%%-----------------------------------------------------------------
+%% init(Args) ->
+%% {ok, State} | Error
+%%-----------------------------------------------------------------
+
+init(Args) ->
+ {ok, #state{args = Args}}.
+
+%%-----------------------------------------------------------------
+%% handle_event(Event, State) ->
+%% {ok, NewState} | remove_handler |
+%% {swap_handler, Args1, State1, Mod2, Args2}
+%%-----------------------------------------------------------------
+
+handle_event(Event, State) ->
+ handle_any_event(Event, State).
+
+%%-----------------------------------------------------------------
+%% handle_info(Msg, State) ->
+%% {ok, NewState} | remove_handler |
+%% {swap_handler, Args1, State1, Mod2, Args2}
+%%-----------------------------------------------------------------
+
+handle_info(Msg, State) ->
+ handle_any_event(Msg, State),
+ {ok, State}.
+
+%%-----------------------------------------------------------------
+%% handle_call(Event, State) ->
+%% {ok, Reply, NewState} | {remove_handler, Reply} |
+%% {swap_handler, Reply, Args1, State1, Mod2, Args2}
+%%-----------------------------------------------------------------
+
+handle_call(Msg, State) ->
+ Reply = ok,
+ {ok, NewState} = handle_any_event(Msg, State),
+ {ok, Reply, NewState}.
+
+%%-----------------------------------------------------------------
+%% terminate(Reason, State) ->
+%% AnyVal
+%%-----------------------------------------------------------------
+
+terminate(_Reason, _State) ->
+ ok.
+
+%%----------------------------------------------------------------------
+%% Func: code_change/3
+%% Purpose: Upgrade process when its code is to be changed
+%% Returns: {ok, NewState}
+%%----------------------------------------------------------------------
+code_change(_OldVsn, State, _Extra) ->
+ {ok, State}.
+
+%%-----------------------------------------------------------------
+%% Internal functions
+%%-----------------------------------------------------------------
+
+handle_any_event({mnesia_system_event, Event}, State) ->
+ handle_system_event(Event, State);
+handle_any_event({mnesia_table_event, Event}, State) ->
+ handle_table_event(Event, State);
+handle_any_event(Msg, State) ->
+ report_error("~p got unexpected event: ~p~n", [?MODULE, Msg]),
+ {ok, State}.
+
+handle_table_event({Oper, Record, TransId}, State) ->
+ report_info("~p performed by ~p on record:~n\t~p~n",
+ [Oper, TransId, Record]),
+ {ok, State}.
+
+handle_system_event({mnesia_checkpoint_activated, _Checkpoint}, State) ->
+ {ok, State};
+
+handle_system_event({mnesia_checkpoint_deactivated, _Checkpoint}, State) ->
+ {ok, State};
+
+handle_system_event({mnesia_up, Node}, State) ->
+ Nodes = [Node | State#state.nodes],
+ {ok, State#state{nodes = Nodes}};
+
+handle_system_event({mnesia_down, Node}, State) ->
+ case mnesia:system_info(fallback_activated) of
+ true ->
+ case mnesia_monitor:get_env(fallback_error_function) of
+ {mnesia, lkill} ->
+ Msg = "A fallback is installed and Mnesia "
+ "must be restarted. Forcing shutdown "
+ "after mnesia_down from ~p...~n",
+ report_fatal(Msg, [Node], nocore, State#state.dumped_core),
+ mnesia:lkill(),
+ exit(fatal);
+ {UserMod, UserFunc} ->
+ Msg = "Warning: A fallback is installed and Mnesia got mnesia_down "
+ "from ~p. ~n",
+ report_info(Msg, [Node]),
+ case catch apply(UserMod, UserFunc, [Node]) of
+ {'EXIT', {undef, _Reason}} ->
+ %% Backward compatibility
+ apply(UserMod, UserFunc, []);
+ {'EXIT', Reason} ->
+ exit(Reason);
+ _ ->
+ ok
+ end,
+ Nodes = lists:delete(Node, State#state.nodes),
+ {ok, State#state{nodes = Nodes}}
+ end;
+ false ->
+ Nodes = lists:delete(Node, State#state.nodes),
+ {ok, State#state{nodes = Nodes}}
+ end;
+
+handle_system_event({mnesia_overload, Details}, State) ->
+ report_warning("Mnesia is overloaded: ~p~n", [Details]),
+ {ok, State};
+
+handle_system_event({mnesia_info, Format, Args}, State) ->
+ report_info(Format, Args),
+ {ok, State};
+
+handle_system_event({mnesia_warning, Format, Args}, State) ->
+ report_warning(Format, Args),
+ {ok, State};
+
+handle_system_event({mnesia_error, Format, Args}, State) ->
+ report_error(Format, Args),
+ {ok, State};
+
+handle_system_event({mnesia_fatal, Format, Args, BinaryCore}, State) ->
+ report_fatal(Format, Args, BinaryCore, State#state.dumped_core),
+ {ok, State#state{dumped_core = true}};
+
+handle_system_event({inconsistent_database, Reason, Node}, State) ->
+ report_error("mnesia_event got {inconsistent_database, ~w, ~w}~n",
+ [Reason, Node]),
+ {ok, State};
+
+handle_system_event({mnesia_user, Event}, State) ->
+ report_info("User event: ~p~n", [Event]),
+ {ok, State};
+
+handle_system_event(Msg, State) ->
+ report_error("mnesia_event got unexpected system event: ~p~n", [Msg]),
+ {ok, State}.
+
+report_info(Format0, Args0) ->
+ Format = "Mnesia(~p): " ++ Format0,
+ Args = [node() | Args0],
+ case global:whereis_name(mnesia_global_logger) of
+ undefined ->
+ io:format(Format, Args);
+ Pid ->
+ io:format(Pid, Format, Args)
+ end.
+
+report_warning(Format0, Args0) ->
+ Format = "Mnesia(~p): ** WARNING ** " ++ Format0,
+ Args = [node() | Args0],
+ case erlang:function_exported(error_logger, warning_msg, 2) of
+ true ->
+ error_logger:warning_msg(Format, Args);
+ false ->
+ error_logger:format(Format, Args)
+ end,
+ case global:whereis_name(mnesia_global_logger) of
+ undefined ->
+ ok;
+ Pid ->
+ io:format(Pid, Format, Args)
+ end.
+
+report_error(Format0, Args0) ->
+ Format = "Mnesia(~p): ** ERROR ** " ++ Format0,
+ Args = [node() | Args0],
+ error_logger:format(Format, Args),
+ case global:whereis_name(mnesia_global_logger) of
+ undefined ->
+ ok;
+ Pid ->
+ io:format(Pid, Format, Args)
+ end.
+
+report_fatal(Format, Args, BinaryCore, CoreDumped) ->
+ UseDir = mnesia_monitor:use_dir(),
+ CoreDir = mnesia_monitor:get_env(core_dir),
+ if
+ is_list(CoreDir),CoreDumped == false, is_binary(BinaryCore) ->
+ core_file(CoreDir,BinaryCore,Format,Args);
+ (UseDir == true),CoreDumped == false, is_binary(BinaryCore) ->
+ core_file(CoreDir,BinaryCore,Format,Args);
+ true ->
+ report_error("(ignoring core) ** FATAL ** " ++ Format, Args)
+ end.
+
+core_file(CoreDir,BinaryCore,Format,Args) ->
+ %% Integers = tuple_to_list(date()) ++ tuple_to_list(time()),
+ Integers = tuple_to_list(now()),
+ Fun = fun(I) when I < 10 -> ["_0",I];
+ (I) -> ["_",I]
+ end,
+ List = lists:append([Fun(I) || I <- Integers]),
+ CoreFile = if is_list(CoreDir) ->
+ filename:absname(lists:concat(["MnesiaCore.", node()] ++ List),
+ CoreDir);
+ true ->
+ filename:absname(lists:concat(["MnesiaCore.", node()] ++ List))
+ end,
+ case file:write_file(CoreFile, BinaryCore) of
+ ok ->
+ report_error("(core dumped to file: ~p)~n ** FATAL ** " ++ Format,
+ [CoreFile] ++ Args);
+ {error, Reason} ->
+ report_error("(could not write core file: ~p)~n ** FATAL ** " ++ Format,
+ [Reason] ++ Args)
+ end.
+
+
+
diff --git a/lib/mnesia/src/mnesia_frag.erl b/lib/mnesia/src/mnesia_frag.erl
new file mode 100644
index 0000000000..a2958ab461
--- /dev/null
+++ b/lib/mnesia/src/mnesia_frag.erl
@@ -0,0 +1,1361 @@
+%%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 1998-2009. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%
+
+%%%
+%%%----------------------------------------------------------------------
+%%% Purpose : Support tables so large that they need
+%%% to be divided into several fragments.
+%%%----------------------------------------------------------------------
+
+%header_doc_include
+
+-module(mnesia_frag).
+
+%% Callback functions when accessed within an activity
+-export([
+ lock/4,
+ write/5, delete/5, delete_object/5,
+ read/5, match_object/5, all_keys/4,
+ select/5,select/6,select_cont/3,
+ index_match_object/6, index_read/6,
+ foldl/6, foldr/6, table_info/4,
+ first/3, next/4, prev/4, last/3,
+ clear_table/4
+ ]).
+
+%header_doc_include
+
+%% -behaviour(mnesia_access).
+
+-export([
+ change_table_frag/2,
+ remove_node/2,
+ expand_cstruct/1,
+ lookup_frag_hash/1,
+ lookup_foreigners/1,
+ frag_names/1,
+ set_frag_hash/2,
+ local_select/4,
+ remote_select/4
+ ]).
+
+-include("mnesia.hrl").
+
+-define(OLD_HASH_MOD, mnesia_frag_old_hash).
+-define(DEFAULT_HASH_MOD, mnesia_frag_hash).
+%%-define(DEFAULT_HASH_MOD, ?OLD_HASH_MOD). %% BUGBUG: New should be default
+
+-record(frag_state,
+ {foreign_key,
+ n_fragments,
+ hash_module,
+ hash_state}).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Access functions
+
+%impl_doc_include
+
+%% Callback functions which provides transparent
+%% access of fragmented tables from any activity
+%% access context.
+
+lock(ActivityId, Opaque, {table , Tab}, LockKind) ->
+ case frag_names(Tab) of
+ [Tab] ->
+ mnesia:lock(ActivityId, Opaque, {table, Tab}, LockKind);
+ Frags ->
+ DeepNs = [mnesia:lock(ActivityId, Opaque, {table, F}, LockKind) ||
+ F <- Frags],
+ mnesia_lib:uniq(lists:append(DeepNs))
+ end;
+
+lock(ActivityId, Opaque, LockItem, LockKind) ->
+ mnesia:lock(ActivityId, Opaque, LockItem, LockKind).
+
+write(ActivityId, Opaque, Tab, Rec, LockKind) ->
+ Frag = record_to_frag_name(Tab, Rec),
+ mnesia:write(ActivityId, Opaque, Frag, Rec, LockKind).
+
+delete(ActivityId, Opaque, Tab, Key, LockKind) ->
+ Frag = key_to_frag_name(Tab, Key),
+ mnesia:delete(ActivityId, Opaque, Frag, Key, LockKind).
+
+delete_object(ActivityId, Opaque, Tab, Rec, LockKind) ->
+ Frag = record_to_frag_name(Tab, Rec),
+ mnesia:delete_object(ActivityId, Opaque, Frag, Rec, LockKind).
+
+read(ActivityId, Opaque, Tab, Key, LockKind) ->
+ Frag = key_to_frag_name(Tab, Key),
+ mnesia:read(ActivityId, Opaque, Frag, Key, LockKind).
+
+match_object(ActivityId, Opaque, Tab, HeadPat, LockKind) ->
+ MatchSpec = [{HeadPat, [], ['$_']}],
+ select(ActivityId, Opaque, Tab, MatchSpec, LockKind).
+
+select(ActivityId, Opaque, Tab, MatchSpec, LockKind) ->
+ do_select(ActivityId, Opaque, Tab, MatchSpec, LockKind).
+
+
+select(ActivityId, Opaque, Tab, MatchSpec, Limit, LockKind) ->
+ init_select(ActivityId, Opaque, Tab, MatchSpec, Limit, LockKind).
+
+
+all_keys(ActivityId, Opaque, Tab, LockKind) ->
+ Match = [mnesia:all_keys(ActivityId, Opaque, Frag, LockKind)
+ || Frag <- frag_names(Tab)],
+ lists:append(Match).
+
+clear_table(ActivityId, Opaque, Tab, Obj) ->
+ [mnesia:clear_table(ActivityId, Opaque, Frag, Obj) || Frag <- frag_names(Tab)],
+ ok.
+
+index_match_object(ActivityId, Opaque, Tab, Pat, Attr, LockKind) ->
+ Match =
+ [mnesia:index_match_object(ActivityId, Opaque, Frag, Pat, Attr, LockKind)
+ || Frag <- frag_names(Tab)],
+ lists:append(Match).
+
+index_read(ActivityId, Opaque, Tab, Key, Attr, LockKind) ->
+ Match =
+ [mnesia:index_read(ActivityId, Opaque, Frag, Key, Attr, LockKind)
+ || Frag <- frag_names(Tab)],
+ lists:append(Match).
+
+foldl(ActivityId, Opaque, Fun, Acc, Tab, LockKind) ->
+ Fun2 = fun(Frag, A) ->
+ mnesia:foldl(ActivityId, Opaque, Fun, A, Frag, LockKind)
+ end,
+ lists:foldl(Fun2, Acc, frag_names(Tab)).
+
+foldr(ActivityId, Opaque, Fun, Acc, Tab, LockKind) ->
+ Fun2 = fun(Frag, A) ->
+ mnesia:foldr(ActivityId, Opaque, Fun, A, Frag, LockKind)
+ end,
+ lists:foldr(Fun2, Acc, frag_names(Tab)).
+
+table_info(ActivityId, Opaque, {Tab, Key}, Item) ->
+ Frag = key_to_frag_name(Tab, Key),
+ table_info2(ActivityId, Opaque, Tab, Frag, Item);
+table_info(ActivityId, Opaque, Tab, Item) ->
+ table_info2(ActivityId, Opaque, Tab, Tab, Item).
+
+table_info2(ActivityId, Opaque, Tab, Frag, Item) ->
+ case Item of
+ size ->
+ SumFun = fun({_, Size}, Acc) -> Acc + Size end,
+ lists:foldl(SumFun, 0, frag_size(ActivityId, Opaque, Tab));
+ memory ->
+ SumFun = fun({_, Size}, Acc) -> Acc + Size end,
+ lists:foldl(SumFun, 0, frag_memory(ActivityId, Opaque, Tab));
+ base_table ->
+ lookup_prop(Tab, base_table);
+ node_pool ->
+ lookup_prop(Tab, node_pool);
+ n_fragments ->
+ FH = lookup_frag_hash(Tab),
+ FH#frag_state.n_fragments;
+ foreign_key ->
+ FH = lookup_frag_hash(Tab),
+ FH#frag_state.foreign_key;
+ foreigners ->
+ lookup_foreigners(Tab);
+ n_ram_copies ->
+ length(val({Tab, ram_copies}));
+ n_disc_copies ->
+ length(val({Tab, disc_copies}));
+ n_disc_only_copies ->
+ length(val({Tab, disc_only_copies}));
+
+ frag_names ->
+ frag_names(Tab);
+ frag_dist ->
+ frag_dist(Tab);
+ frag_size ->
+ frag_size(ActivityId, Opaque, Tab);
+ frag_memory ->
+ frag_memory(ActivityId, Opaque, Tab);
+ _ ->
+ mnesia:table_info(ActivityId, Opaque, Frag, Item)
+ end.
+
+first(ActivityId, Opaque, Tab) ->
+ case ?catch_val({Tab, frag_hash}) of
+ {'EXIT', _} ->
+ mnesia:first(ActivityId, Opaque, Tab);
+ FH ->
+ FirstFrag = Tab,
+ case mnesia:first(ActivityId, Opaque, FirstFrag) of
+ '$end_of_table' ->
+ search_first(ActivityId, Opaque, Tab, 1, FH);
+ Next ->
+ Next
+ end
+ end.
+
+search_first(ActivityId, Opaque, Tab, N, FH) when N =< FH#frag_state.n_fragments ->
+ NextN = N + 1,
+ NextFrag = n_to_frag_name(Tab, NextN),
+ case mnesia:first(ActivityId, Opaque, NextFrag) of
+ '$end_of_table' ->
+ search_first(ActivityId, Opaque, Tab, NextN, FH);
+ Next ->
+ Next
+ end;
+search_first(_ActivityId, _Opaque, _Tab, _N, _FH) ->
+ '$end_of_table'.
+
+last(ActivityId, Opaque, Tab) ->
+ case ?catch_val({Tab, frag_hash}) of
+ {'EXIT', _} ->
+ mnesia:last(ActivityId, Opaque, Tab);
+ FH ->
+ LastN = FH#frag_state.n_fragments,
+ search_last(ActivityId, Opaque, Tab, LastN, FH)
+ end.
+
+search_last(ActivityId, Opaque, Tab, N, FH) when N >= 1 ->
+ Frag = n_to_frag_name(Tab, N),
+ case mnesia:last(ActivityId, Opaque, Frag) of
+ '$end_of_table' ->
+ PrevN = N - 1,
+ search_last(ActivityId, Opaque, Tab, PrevN, FH);
+ Prev ->
+ Prev
+ end;
+search_last(_ActivityId, _Opaque, _Tab, _N, _FH) ->
+ '$end_of_table'.
+
+prev(ActivityId, Opaque, Tab, Key) ->
+ case ?catch_val({Tab, frag_hash}) of
+ {'EXIT', _} ->
+ mnesia:prev(ActivityId, Opaque, Tab, Key);
+ FH ->
+ N = key_to_n(FH, Key),
+ Frag = n_to_frag_name(Tab, N),
+ case mnesia:prev(ActivityId, Opaque, Frag, Key) of
+ '$end_of_table' ->
+ search_prev(ActivityId, Opaque, Tab, N);
+ Prev ->
+ Prev
+ end
+ end.
+
+search_prev(ActivityId, Opaque, Tab, N) when N > 1 ->
+ PrevN = N - 1,
+ PrevFrag = n_to_frag_name(Tab, PrevN),
+ case mnesia:last(ActivityId, Opaque, PrevFrag) of
+ '$end_of_table' ->
+ search_prev(ActivityId, Opaque, Tab, PrevN);
+ Prev ->
+ Prev
+ end;
+search_prev(_ActivityId, _Opaque, _Tab, _N) ->
+ '$end_of_table'.
+
+next(ActivityId, Opaque, Tab, Key) ->
+ case ?catch_val({Tab, frag_hash}) of
+ {'EXIT', _} ->
+ mnesia:next(ActivityId, Opaque, Tab, Key);
+ FH ->
+ N = key_to_n(FH, Key),
+ Frag = n_to_frag_name(Tab, N),
+ case mnesia:next(ActivityId, Opaque, Frag, Key) of
+ '$end_of_table' ->
+ search_next(ActivityId, Opaque, Tab, N, FH);
+ Prev ->
+ Prev
+ end
+ end.
+
+search_next(ActivityId, Opaque, Tab, N, FH) when N < FH#frag_state.n_fragments ->
+ NextN = N + 1,
+ NextFrag = n_to_frag_name(Tab, NextN),
+ case mnesia:first(ActivityId, Opaque, NextFrag) of
+ '$end_of_table' ->
+ search_next(ActivityId, Opaque, Tab, NextN, FH);
+ Next ->
+ Next
+ end;
+search_next(_ActivityId, _Opaque, _Tab, _N, _FH) ->
+ '$end_of_table'.
+
+%impl_doc_include
+
+frag_size(ActivityId, Opaque, Tab) ->
+ [{F, remote_table_info(ActivityId, Opaque, F, size)} || F <- frag_names(Tab)].
+
+frag_memory(ActivityId, Opaque, Tab) ->
+ [{F, remote_table_info(ActivityId, Opaque, F, memory)} || F <- frag_names(Tab)].
+
+remote_table_info(ActivityId, Opaque, Tab, Item) ->
+ N = val({Tab, where_to_read}),
+ case rpc:call(N, mnesia, table_info, [ActivityId, Opaque, Tab, Item]) of
+ {badrpc, _} ->
+ mnesia:abort({no_exists, Tab, Item});
+ Info ->
+ Info
+ end.
+
+init_select(Tid,Opaque,Tab,Pat,Limit,LockKind) ->
+ case ?catch_val({Tab, frag_hash}) of
+ {'EXIT', _} ->
+ mnesia:select(Tid, Opaque, Tab, Pat, Limit,LockKind);
+ FH ->
+ FragNumbers = verify_numbers(FH,Pat),
+ Fun = fun(Num) ->
+ Name = n_to_frag_name(Tab, Num),
+ Node = val({Name, where_to_read}),
+ Storage = mnesia_lib:storage_type_at_node(Node, Name),
+ mnesia:lock(Tid, Opaque, {table, Name}, LockKind),
+ {Name, Node, Storage}
+ end,
+ [{FTab,Node,Type}|NameNodes] = lists:map(Fun, FragNumbers),
+ InitFun = fun(FixedSpec) -> mnesia:dirty_sel_init(Node,FTab,FixedSpec,Limit,Type) end,
+ Res = mnesia:fun_select(Tid,Opaque,FTab,Pat,LockKind,FTab,InitFun,Limit,Node,Type),
+ frag_sel_cont(Res, NameNodes, {Pat,LockKind,Limit})
+ end.
+
+select_cont(_Tid,_,{frag_cont, '$end_of_table', [],_}) -> '$end_of_table';
+select_cont(Tid,Ts,{frag_cont, '$end_of_table', [{Tab,Node,Type}|Rest],Args}) ->
+ {Spec,LockKind,Limit} = Args,
+ InitFun = fun(FixedSpec) -> mnesia:dirty_sel_init(Node,Tab,FixedSpec,Limit,Type) end,
+ Res = mnesia:fun_select(Tid,Ts,Tab,Spec,LockKind,Tab,InitFun,Limit,Node,Type),
+ frag_sel_cont(Res, Rest, Args);
+select_cont(Tid,Ts,{frag_cont, Cont, TabL, Args}) ->
+ frag_sel_cont(mnesia:select_cont(Tid,Ts,Cont),TabL,Args);
+select_cont(Tid,Ts,Else) -> %% Not a fragmented table
+ mnesia:select_cont(Tid,Ts,Else).
+
+frag_sel_cont('$end_of_table', [],_) ->
+ '$end_of_table';
+frag_sel_cont('$end_of_table', TabL,Args) ->
+ {[], {frag_cont, '$end_of_table', TabL,Args}};
+frag_sel_cont({Recs,Cont}, TabL,Args) ->
+ {Recs, {frag_cont, Cont, TabL,Args}}.
+
+do_select(ActivityId, Opaque, Tab, MatchSpec, LockKind) ->
+ case ?catch_val({Tab, frag_hash}) of
+ {'EXIT', _} ->
+ mnesia:select(ActivityId, Opaque, Tab, MatchSpec, LockKind);
+ FH ->
+ FragNumbers = verify_numbers(FH,MatchSpec),
+ Fun = fun(Num) ->
+ Name = n_to_frag_name(Tab, Num),
+ Node = val({Name, where_to_read}),
+ mnesia:lock(ActivityId, Opaque, {table, Name}, LockKind),
+ {Name, Node}
+ end,
+ NameNodes = lists:map(Fun, FragNumbers),
+ SelectAllFun =
+ fun(PatchedMatchSpec) ->
+ Match = [mnesia:dirty_select(Name, PatchedMatchSpec)
+ || {Name, _Node} <- NameNodes],
+ lists:append(Match)
+ end,
+ case [{Name, Node} || {Name, Node} <- NameNodes, Node /= node()] of
+ [] ->
+ %% All fragments are local
+ mnesia:fun_select(ActivityId, Opaque, Tab, MatchSpec, none, '_', SelectAllFun);
+ RemoteNameNodes ->
+ Type = val({Tab,setorbag}),
+ SelectFun =
+ fun(PatchedMatchSpec) ->
+ Ref = make_ref(),
+ Args = [self(), Ref, RemoteNameNodes, PatchedMatchSpec],
+ Pid = spawn_link(?MODULE, local_select, Args),
+ LocalMatch0 = [mnesia:dirty_select(Name, PatchedMatchSpec)
+ || {Name, Node} <- NameNodes, Node == node()],
+ LocalMatch = case Type of
+ ordered_set -> lists:merge(LocalMatch0);
+ _ -> lists:append(LocalMatch0)
+ end,
+ OldSelectFun = fun() -> SelectAllFun(PatchedMatchSpec) end,
+ local_collect(Ref, Pid, Type, LocalMatch, OldSelectFun)
+ end,
+ mnesia:fun_select(ActivityId, Opaque, Tab, MatchSpec, none, '_', SelectFun)
+ end
+ end.
+
+verify_numbers(FH,MatchSpec) ->
+ HashState = FH#frag_state.hash_state,
+ FragNumbers =
+ case FH#frag_state.hash_module of
+ HashMod when HashMod == ?DEFAULT_HASH_MOD ->
+ ?DEFAULT_HASH_MOD:match_spec_to_frag_numbers(HashState, MatchSpec);
+ HashMod ->
+ HashMod:match_spec_to_frag_numbers(HashState, MatchSpec)
+ end,
+ N = FH#frag_state.n_fragments,
+ VerifyFun = fun(F) when is_integer(F), F >= 1, F =< N -> false;
+ (_F) -> true
+ end,
+ case catch lists:filter(VerifyFun, FragNumbers) of
+ [] ->
+ FragNumbers;
+ BadFrags ->
+ mnesia:abort({"match_spec_to_frag_numbers: Fragment numbers out of range",
+ BadFrags, {range, 1, N}})
+ end.
+
+local_select(ReplyTo, Ref, RemoteNameNodes, MatchSpec) ->
+ RemoteNodes = mnesia_lib:uniq([Node || {_Name, Node} <- RemoteNameNodes]),
+ Args = [ReplyTo, Ref, RemoteNameNodes, MatchSpec],
+ {Replies, BadNodes} = rpc:multicall(RemoteNodes, ?MODULE, remote_select, Args),
+ case mnesia_lib:uniq(Replies) -- [ok] of
+ [] when BadNodes == [] ->
+ ReplyTo ! {local_select, Ref, ok};
+ _ when BadNodes /= [] ->
+ ReplyTo ! {local_select, Ref, {error, {node_not_running, hd(BadNodes)}}};
+ [{badrpc, {'EXIT', Reason}} | _] ->
+ ReplyTo ! {local_select, Ref, {error, Reason}};
+ [Reason | _] ->
+ ReplyTo ! {local_select, Ref, {error, Reason}}
+ end,
+ unlink(ReplyTo),
+ exit(normal).
+
+remote_select(ReplyTo, Ref, NameNodes, MatchSpec) ->
+ do_remote_select(ReplyTo, Ref, NameNodes, MatchSpec).
+
+do_remote_select(ReplyTo, Ref, [{Name, Node} | NameNodes], MatchSpec) ->
+ if
+ Node == node() ->
+ Res = (catch {ok, mnesia:dirty_select(Name, MatchSpec)}),
+ ReplyTo ! {remote_select, Ref, Node, Res},
+ do_remote_select(ReplyTo, Ref, NameNodes, MatchSpec);
+ true ->
+ do_remote_select(ReplyTo, Ref, NameNodes, MatchSpec)
+ end;
+do_remote_select(_ReplyTo, _Ref, [], _MatchSpec) ->
+ ok.
+
+local_collect(Ref, Pid, Type, LocalMatch, OldSelectFun) ->
+ receive
+ {local_select, Ref, LocalRes} ->
+ remote_collect(Ref, Type, LocalRes, LocalMatch, OldSelectFun);
+ {'EXIT', Pid, Reason} ->
+ remote_collect(Ref, Type, {error, Reason}, [], OldSelectFun)
+ end.
+
+remote_collect(Ref, Type, LocalRes = ok, Acc, OldSelectFun) ->
+ receive
+ {remote_select, Ref, Node, RemoteRes} ->
+ case RemoteRes of
+ {ok, RemoteMatch} ->
+ Matches = case Type of
+ ordered_set -> lists:merge(RemoteMatch, Acc);
+ _ -> RemoteMatch ++ Acc
+ end,
+ remote_collect(Ref, Type, LocalRes, Matches, OldSelectFun);
+ _ ->
+ remote_collect(Ref, Type, {error, {node_not_running, Node}}, [], OldSelectFun)
+ end
+ after 0 ->
+ Acc
+ end;
+remote_collect(Ref, Type, LocalRes = {error, Reason}, _Acc, OldSelectFun) ->
+ receive
+ {remote_select, Ref, _Node, _RemoteRes} ->
+ remote_collect(Ref, Type, LocalRes, [], OldSelectFun)
+ after 0 ->
+ mnesia:abort(Reason)
+ end.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Returns a list of cstructs
+
+expand_cstruct(Cs) ->
+ expand_cstruct(Cs, create).
+
+expand_cstruct(Cs, Mode) ->
+ Tab = Cs#cstruct.name,
+ Props = Cs#cstruct.frag_properties,
+ mnesia_schema:verify({alt, [nil, list]}, mnesia_lib:etype(Props),
+ {badarg, Tab, Props}),
+ %% Verify keys
+ ValidKeys = [foreign_key, n_fragments, node_pool,
+ n_ram_copies, n_disc_copies, n_disc_only_copies,
+ hash_module, hash_state],
+ Keys = mnesia_schema:check_keys(Tab, Props, ValidKeys),
+ mnesia_schema:check_duplicates(Tab, Keys),
+
+ %% Pick fragmentation props
+ ForeignKey = mnesia_schema:pick(Tab, foreign_key, Props, undefined),
+ {ForeignKey2, N, Pool, DefaultNR, DefaultND, DefaultNDO} =
+ pick_props(Tab, Cs, ForeignKey),
+
+ %% Verify node_pool
+ BadPool = {bad_type, Tab, {node_pool, Pool}},
+ mnesia_schema:verify(list, mnesia_lib:etype(Pool), BadPool),
+ NotAtom = fun(A) when is_atom(A) -> false;
+ (_A) -> true
+ end,
+ mnesia_schema:verify([], [P || P <- Pool, NotAtom(P)], BadPool),
+
+ NR = mnesia_schema:pick(Tab, n_ram_copies, Props, 0),
+ ND = mnesia_schema:pick(Tab, n_disc_copies, Props, 0),
+ NDO = mnesia_schema:pick(Tab, n_disc_only_copies, Props, 0),
+
+ PosInt = fun(I) when is_integer(I), I >= 0 -> true;
+ (_I) -> false
+ end,
+ mnesia_schema:verify(true, PosInt(NR),
+ {bad_type, Tab, {n_ram_copies, NR}}),
+ mnesia_schema:verify(true, PosInt(ND),
+ {bad_type, Tab, {n_disc_copies, ND}}),
+ mnesia_schema:verify(true, PosInt(NDO),
+ {bad_type, Tab, {n_disc_only_copies, NDO}}),
+
+ %% Verify n_fragments
+ Cs2 = verify_n_fragments(N, Cs, Mode),
+
+ %% Verify hash callback
+ HashMod = mnesia_schema:pick(Tab, hash_module, Props, ?DEFAULT_HASH_MOD),
+ HashState = mnesia_schema:pick(Tab, hash_state, Props, undefined),
+ HashState2 = HashMod:init_state(Tab, HashState), %% BUGBUG: Catch?
+
+ FH = #frag_state{foreign_key = ForeignKey2,
+ n_fragments = 1,
+ hash_module = HashMod,
+ hash_state = HashState2},
+ if
+ NR == 0, ND == 0, NDO == 0 ->
+ do_expand_cstruct(Cs2, FH, N, Pool, DefaultNR, DefaultND, DefaultNDO, Mode);
+ true ->
+ do_expand_cstruct(Cs2, FH, N, Pool, NR, ND, NDO, Mode)
+ end.
+
+do_expand_cstruct(Cs, FH, N, Pool, NR, ND, NDO, Mode) ->
+ Tab = Cs#cstruct.name,
+
+ LC = Cs#cstruct.local_content,
+ mnesia_schema:verify(false, LC,
+ {combine_error, Tab, {local_content, LC}}),
+
+ Snmp = Cs#cstruct.snmp,
+ mnesia_schema:verify([], Snmp,
+ {combine_error, Tab, {snmp, Snmp}}),
+
+ %% Add empty fragments
+ CommonProps = [{base_table, Tab}],
+ Cs2 = Cs#cstruct{frag_properties = lists:sort(CommonProps)},
+ expand_frag_cstructs(N, NR, ND, NDO, Cs2, Pool, Pool, FH, Mode).
+
+verify_n_fragments(N, Cs, Mode) when is_integer(N), N >= 1 ->
+ case Mode of
+ create ->
+ Cs#cstruct{ram_copies = [],
+ disc_copies = [],
+ disc_only_copies = []};
+ activate ->
+ Reason = {combine_error, Cs#cstruct.name, {n_fragments, N}},
+ mnesia_schema:verify(1, N, Reason),
+ Cs
+ end;
+verify_n_fragments(N, Cs, _Mode) ->
+ mnesia:abort({bad_type, Cs#cstruct.name, {n_fragments, N}}).
+
+pick_props(Tab, Cs, {ForeignTab, Attr}) ->
+ mnesia_schema:verify(true, ForeignTab /= Tab,
+ {combine_error, Tab, {ForeignTab, Attr}}),
+ Props = Cs#cstruct.frag_properties,
+ Attrs = Cs#cstruct.attributes,
+
+ ForeignKey = lookup_prop(ForeignTab, foreign_key),
+ ForeignN = lookup_prop(ForeignTab, n_fragments),
+ ForeignPool = lookup_prop(ForeignTab, node_pool),
+ N = mnesia_schema:pick(Tab, n_fragments, Props, ForeignN),
+ Pool = mnesia_schema:pick(Tab, node_pool, Props, ForeignPool),
+
+ mnesia_schema:verify(ForeignN, N,
+ {combine_error, Tab, {n_fragments, N},
+ ForeignTab, {n_fragments, ForeignN}}),
+
+ mnesia_schema:verify(ForeignPool, Pool,
+ {combine_error, Tab, {node_pool, Pool},
+ ForeignTab, {node_pool, ForeignPool}}),
+
+ mnesia_schema:verify(undefined, ForeignKey,
+ {combine_error, Tab,
+ "Multiple levels of foreign_key dependencies",
+ {ForeignTab, Attr}, ForeignKey}),
+
+ Key = {ForeignTab, mnesia_schema:attr_to_pos(Attr, Attrs)},
+ DefaultNR = length(val({ForeignTab, ram_copies})),
+ DefaultND = length(val({ForeignTab, disc_copies})),
+ DefaultNDO = length(val({ForeignTab, disc_only_copies})),
+ {Key, N, Pool, DefaultNR, DefaultND, DefaultNDO};
+pick_props(Tab, Cs, undefined) ->
+ Props = Cs#cstruct.frag_properties,
+ DefaultN = 1,
+ DefaultPool = mnesia:system_info(db_nodes),
+ N = mnesia_schema:pick(Tab, n_fragments, Props, DefaultN),
+ Pool = mnesia_schema:pick(Tab, node_pool, Props, DefaultPool),
+ DefaultNR = 1,
+ DefaultND = 0,
+ DefaultNDO = 0,
+ {undefined, N, Pool, DefaultNR, DefaultND, DefaultNDO};
+pick_props(Tab, _Cs, BadKey) ->
+ mnesia:abort({bad_type, Tab, {foreign_key, BadKey}}).
+
+expand_frag_cstructs(N, NR, ND, NDO, CommonCs, Dist, Pool, FH, Mode)
+ when N > 1, Mode == create ->
+ Frag = n_to_frag_name(CommonCs#cstruct.name, N),
+ Cs = CommonCs#cstruct{name = Frag},
+ {Cs2, RevModDist, RestDist} = set_frag_nodes(NR, ND, NDO, Cs, Dist, []),
+ ModDist = lists:reverse(RevModDist),
+ Dist2 = rearrange_dist(Cs, ModDist, RestDist, Pool),
+ %% Adjusts backwards, but it doesn't matter.
+ {FH2, _FromFrags, _AdditionalWriteFrags} = adjust_before_split(FH),
+ CsList = expand_frag_cstructs(N - 1, NR, ND, NDO, CommonCs, Dist2, Pool, FH2, Mode),
+ [Cs2 | CsList];
+expand_frag_cstructs(1, NR, ND, NDO, CommonCs, Dist, Pool, FH, Mode) ->
+ BaseProps = CommonCs#cstruct.frag_properties ++
+ [{foreign_key, FH#frag_state.foreign_key},
+ {hash_module, FH#frag_state.hash_module},
+ {hash_state, FH#frag_state.hash_state},
+ {n_fragments, FH#frag_state.n_fragments},
+ {node_pool, Pool}
+ ],
+ BaseCs = CommonCs#cstruct{frag_properties = lists:sort(BaseProps)},
+ case Mode of
+ activate ->
+ [BaseCs];
+ create ->
+ {BaseCs2, _, _} = set_frag_nodes(NR, ND, NDO, BaseCs, Dist, []),
+ [BaseCs2]
+ end.
+
+set_frag_nodes(NR, ND, NDO, Cs, [Head | Tail], Acc) when NR > 0 ->
+ Pos = #cstruct.ram_copies,
+ {Cs2, Head2} = set_frag_node(Cs, Pos, Head),
+ set_frag_nodes(NR - 1, ND, NDO, Cs2, Tail, [Head2 | Acc]);
+set_frag_nodes(NR, ND, NDO, Cs, [Head | Tail], Acc) when ND > 0 ->
+ Pos = #cstruct.disc_copies,
+ {Cs2, Head2} = set_frag_node(Cs, Pos, Head),
+ set_frag_nodes(NR, ND - 1, NDO, Cs2, Tail, [Head2 | Acc]);
+set_frag_nodes(NR, ND, NDO, Cs, [Head | Tail], Acc) when NDO > 0 ->
+ Pos = #cstruct.disc_only_copies,
+ {Cs2, Head2} = set_frag_node(Cs, Pos, Head),
+ set_frag_nodes(NR, ND, NDO - 1, Cs2, Tail, [Head2 | Acc]);
+set_frag_nodes(0, 0, 0, Cs, RestDist, ModDist) ->
+ {Cs, ModDist, RestDist};
+set_frag_nodes(_, _, _, Cs, [], _) ->
+ mnesia:abort({combine_error, Cs#cstruct.name, "Too few nodes in node_pool"}).
+
+set_frag_node(Cs, Pos, Head) ->
+ Ns = element(Pos, Cs),
+ {Node, Count2} =
+ case Head of
+ {N, Count} when is_atom(N), is_integer(Count), Count >= 0 ->
+ {N, Count + 1};
+ N when is_atom(N) ->
+ {N, 1};
+ BadNode ->
+ mnesia:abort({bad_type, Cs#cstruct.name, BadNode})
+ end,
+ mnesia_schema:verify(true,
+ lists:member(Node, val({current,db_nodes})),
+ {not_active, Cs#cstruct.name, Node}),
+ Cs2 = setelement(Pos, Cs, [Node | Ns]),
+ {Cs2, {Node, Count2}}.
+
+rearrange_dist(Cs, [{Node, Count} | ModDist], Dist, Pool) ->
+ Dist2 = insert_dist(Cs, Node, Count, Dist, Pool),
+ rearrange_dist(Cs, ModDist, Dist2, Pool);
+rearrange_dist(_Cs, [], Dist, _) ->
+ Dist.
+
+insert_dist(Cs, Node, Count, [Head | Tail], Pool) ->
+ case Head of
+ {Node2, Count2} when is_atom(Node2), is_integer(Count2), Count2 >= 0 ->
+ case node_diff(Node, Count, Node2, Count2, Pool) of
+ less ->
+ [{Node, Count}, Head | Tail];
+ greater ->
+ [Head | insert_dist(Cs, Node, Count, Tail, Pool)]
+ end;
+ Node2 when is_atom(Node2) ->
+ insert_dist(Cs, Node, Count, [{Node2, 0} | Tail], Pool);
+ BadNode ->
+ mnesia:abort({bad_type, Cs#cstruct.name, BadNode})
+ end;
+insert_dist(_Cs, Node, Count, [], _Pool) ->
+ [{Node, Count}];
+insert_dist(_Cs, _Node, _Count, Dist, _Pool) ->
+ mnesia:abort({bad_type, Dist}).
+
+node_diff(_Node, Count, _Node2, Count2, _Pool) when Count < Count2 ->
+ less;
+node_diff(Node, Count, Node2, Count2, Pool) when Count == Count2 ->
+ Pos = list_pos(Node, Pool, 1),
+ Pos2 = list_pos(Node2, Pool, 1),
+ if
+ Pos < Pos2 ->
+ less;
+ Pos > Pos2 ->
+ greater
+ end;
+node_diff(_Node, Count, _Node2, Count2, _Pool) when Count > Count2 ->
+ greater.
+
+%% Returns position of element in list
+list_pos(H, [H | _T], Pos) ->
+ Pos;
+list_pos(E, [_H | T], Pos) ->
+ list_pos(E, T, Pos + 1).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Switch function for changing of table fragmentation
+%%
+%% Returns a list of lists of schema ops
+
+change_table_frag(Tab, {activate, FragProps}) ->
+ make_activate(Tab, FragProps);
+change_table_frag(Tab, deactivate) ->
+ make_deactivate(Tab);
+change_table_frag(Tab, {add_frag, SortedNodes}) ->
+ make_multi_add_frag(Tab, SortedNodes);
+change_table_frag(Tab, del_frag) ->
+ make_multi_del_frag(Tab);
+change_table_frag(Tab, {add_node, Node}) ->
+ make_multi_add_node(Tab, Node);
+change_table_frag(Tab, {del_node, Node}) ->
+ make_multi_del_node(Tab, Node);
+change_table_frag(Tab, Change) ->
+ mnesia:abort({bad_type, Tab, Change}).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Turn a normal table into a fragmented table
+%%
+%% The storage type must be the same on all nodes
+
+make_activate(Tab, Props) ->
+ Cs = mnesia_schema:incr_version(val({Tab, cstruct})),
+ mnesia_schema:ensure_active(Cs),
+ case Cs#cstruct.frag_properties of
+ [] ->
+ Cs2 = Cs#cstruct{frag_properties = Props},
+ [Cs3] = expand_cstruct(Cs2, activate),
+ TabDef = mnesia_schema:cs2list(Cs3),
+ Op = {op, change_table_frag, activate, TabDef},
+ [[Op]];
+ BadProps ->
+ mnesia:abort({already_exists, Tab, {frag_properties, BadProps}})
+ end.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Turn a table into a normal defragmented table
+
+make_deactivate(Tab) ->
+ Cs = mnesia_schema:incr_version(val({Tab, cstruct})),
+ mnesia_schema:ensure_active(Cs),
+ Foreigners = lookup_foreigners(Tab),
+ BaseTab = lookup_prop(Tab, base_table),
+ FH = lookup_frag_hash(Tab),
+ if
+ BaseTab /= Tab ->
+ mnesia:abort({combine_error, Tab, "Not a base table"});
+ Foreigners /= [] ->
+ mnesia:abort({combine_error, Tab, "Too many foreigners", Foreigners});
+ FH#frag_state.n_fragments > 1 ->
+ mnesia:abort({combine_error, Tab, "Too many fragments"});
+ true ->
+ Cs2 = Cs#cstruct{frag_properties = []},
+ TabDef = mnesia_schema:cs2list(Cs2),
+ Op = {op, change_table_frag, deactivate, TabDef},
+ [[Op]]
+ end.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Add a fragment to a fragmented table and fill it with half of
+%% the records from one of the old fragments
+
+make_multi_add_frag(Tab, SortedNs) when is_list(SortedNs) ->
+ verify_multi(Tab),
+ Ops = make_add_frag(Tab, SortedNs),
+
+ %% Propagate to foreigners
+ MoreOps = [make_add_frag(T, SortedNs) || T <- lookup_foreigners(Tab)],
+ [Ops | MoreOps];
+make_multi_add_frag(Tab, SortedNs) ->
+ mnesia:abort({bad_type, Tab, SortedNs}).
+
+verify_multi(Tab) ->
+ FH = lookup_frag_hash(Tab),
+ ForeignKey = FH#frag_state.foreign_key,
+ mnesia_schema:verify(undefined, ForeignKey,
+ {combine_error, Tab,
+ "Op only allowed via foreign table",
+ {foreign_key, ForeignKey}}).
+
+make_frag_names_and_acquire_locks(Tab, N, FragIndecies, DoNotLockN) ->
+ mnesia_schema:get_tid_ts_and_lock(Tab, write),
+ Fun = fun(Index, FN) ->
+ if
+ DoNotLockN == true, Index == N ->
+ Name = n_to_frag_name(Tab, Index),
+ setelement(Index, FN, Name);
+ true ->
+ Name = n_to_frag_name(Tab, Index),
+ mnesia_schema:get_tid_ts_and_lock(Name, write),
+ setelement(Index , FN, Name)
+ end
+ end,
+ FragNames = erlang:make_tuple(N, undefined),
+ lists:foldl(Fun, FragNames, FragIndecies).
+
+make_add_frag(Tab, SortedNs) ->
+ Cs = mnesia_schema:incr_version(val({Tab, cstruct})),
+ mnesia_schema:ensure_active(Cs),
+ FH = lookup_frag_hash(Tab),
+ {FH2, FromIndecies, WriteIndecies} = adjust_before_split(FH),
+ N = FH2#frag_state.n_fragments,
+ FragNames = make_frag_names_and_acquire_locks(Tab, N, WriteIndecies, true),
+ NewFrag = element(N, FragNames),
+
+ NR = length(Cs#cstruct.ram_copies),
+ ND = length(Cs#cstruct.disc_copies),
+ NDO = length(Cs#cstruct.disc_only_copies),
+ NewCs = Cs#cstruct{name = NewFrag,
+ frag_properties = [{base_table, Tab}],
+ ram_copies = [],
+ disc_copies = [],
+ disc_only_copies = []},
+
+ {NewCs2, _, _} = set_frag_nodes(NR, ND, NDO, NewCs, SortedNs, []),
+ [NewOp] = mnesia_schema:make_create_table(NewCs2),
+
+ SplitOps = split(Tab, FH2, FromIndecies, FragNames, []),
+
+ Cs2 = replace_frag_hash(Cs, FH2),
+ TabDef = mnesia_schema:cs2list(Cs2),
+ BaseOp = {op, change_table_frag, {add_frag, SortedNs}, TabDef},
+
+ [BaseOp, NewOp | SplitOps].
+
+replace_frag_hash(Cs, FH) when is_record(FH, frag_state) ->
+ Fun = fun(Prop) ->
+ case Prop of
+ {n_fragments, _} ->
+ {true, {n_fragments, FH#frag_state.n_fragments}};
+ {hash_module, _} ->
+ {true, {hash_module, FH#frag_state.hash_module}};
+ {hash_state, _} ->
+ {true, {hash_state, FH#frag_state.hash_state}};
+ {next_n_to_split, _} ->
+ false;
+ {n_doubles, _} ->
+ false;
+ _ ->
+ true
+ end
+ end,
+ Props = lists:zf(Fun, Cs#cstruct.frag_properties),
+ Cs#cstruct{frag_properties = Props}.
+
+%% Adjust table info before split
+adjust_before_split(FH) ->
+ HashState = FH#frag_state.hash_state,
+ {HashState2, FromFrags, AdditionalWriteFrags} =
+ case FH#frag_state.hash_module of
+ HashMod when HashMod == ?DEFAULT_HASH_MOD ->
+ ?DEFAULT_HASH_MOD:add_frag(HashState);
+ HashMod ->
+ HashMod:add_frag(HashState)
+ end,
+ N = FH#frag_state.n_fragments + 1,
+ FromFrags2 = (catch lists:sort(FromFrags)),
+ UnionFrags = (catch lists:merge(FromFrags2, lists:sort(AdditionalWriteFrags))),
+ VerifyFun = fun(F) when is_integer(F), F >= 1, F =< N -> false;
+ (_F) -> true
+ end,
+ case catch lists:filter(VerifyFun, UnionFrags) of
+ [] ->
+ FH2 = FH#frag_state{n_fragments = N,
+ hash_state = HashState2},
+ {FH2, FromFrags2, UnionFrags};
+ BadFrags ->
+ mnesia:abort({"add_frag: Fragment numbers out of range",
+ BadFrags, {range, 1, N}})
+ end.
+
+split(Tab, FH, [SplitN | SplitNs], FragNames, Ops) ->
+ SplitFrag = element(SplitN, FragNames),
+ Pat = mnesia:table_info(SplitFrag, wild_pattern),
+ {_Mod, Tid, Ts} = mnesia_schema:get_tid_ts_and_lock(Tab, none),
+ Recs = mnesia:match_object(Tid, Ts, SplitFrag, Pat, read),
+ Ops2 = do_split(FH, SplitN, FragNames, Recs, Ops),
+ split(Tab, FH, SplitNs, FragNames, Ops2);
+split(_Tab, _FH, [], _FragNames, Ops) ->
+ Ops.
+
+%% Perform the split of the table
+do_split(FH, OldN, FragNames, [Rec | Recs], Ops) ->
+ Pos = key_pos(FH),
+ HashKey = element(Pos, Rec),
+ case key_to_n(FH, HashKey) of
+ NewN when NewN == OldN ->
+ %% Keep record in the same fragment. No need to move it.
+ do_split(FH, OldN, FragNames, Recs, Ops);
+ NewN ->
+ case element(NewN, FragNames) of
+ NewFrag when NewFrag /= undefined ->
+ OldFrag = element(OldN, FragNames),
+ Key = element(2, Rec),
+ NewOid = {NewFrag, Key},
+ OldOid = {OldFrag, Key},
+ Ops2 = [{op, rec, unknown, {NewOid, [Rec], write}},
+ {op, rec, unknown, {OldOid, [OldOid], delete}} | Ops],
+ do_split(FH, OldN, FragNames, Recs, Ops2);
+ _NewFrag ->
+ %% Tried to move record to fragment that not is locked
+ mnesia:abort({"add_frag: Fragment not locked", NewN})
+ end
+ end;
+do_split(_FH, _OldN, _FragNames, [], Ops) ->
+ Ops.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Delete a fragment from a fragmented table
+%% and merge its records with an other fragment
+
+make_multi_del_frag(Tab) ->
+ verify_multi(Tab),
+ Ops = make_del_frag(Tab),
+
+ %% Propagate to foreigners
+ MoreOps = [make_del_frag(T) || T <- lookup_foreigners(Tab)],
+ [Ops | MoreOps].
+
+make_del_frag(Tab) ->
+ FH = lookup_frag_hash(Tab),
+ case FH#frag_state.n_fragments of
+ N when N > 1 ->
+ Cs = mnesia_schema:incr_version(val({Tab, cstruct})),
+ mnesia_schema:ensure_active(Cs),
+ {FH2, FromIndecies, WriteIndecies} = adjust_before_merge(FH),
+ FragNames = make_frag_names_and_acquire_locks(Tab, N, WriteIndecies, false),
+
+ MergeOps = merge(Tab, FH2, FromIndecies, FragNames, []),
+ LastFrag = element(N, FragNames),
+ [LastOp] = mnesia_schema:make_delete_table(LastFrag, single_frag),
+ Cs2 = replace_frag_hash(Cs, FH2),
+ TabDef = mnesia_schema:cs2list(Cs2),
+ BaseOp = {op, change_table_frag, del_frag, TabDef},
+ [BaseOp, LastOp | MergeOps];
+ _ ->
+ %% Cannot remove the last fragment
+ mnesia:abort({no_exists, Tab})
+ end.
+
+%% Adjust tab info before merge
+adjust_before_merge(FH) ->
+ HashState = FH#frag_state.hash_state,
+ {HashState2, FromFrags, AdditionalWriteFrags} =
+ case FH#frag_state.hash_module of
+ HashMod when HashMod == ?DEFAULT_HASH_MOD ->
+ ?DEFAULT_HASH_MOD:del_frag(HashState);
+ HashMod ->
+ HashMod:del_frag(HashState)
+ end,
+ N = FH#frag_state.n_fragments,
+ FromFrags2 = (catch lists:sort(FromFrags)),
+ UnionFrags = (catch lists:merge(FromFrags2, lists:sort(AdditionalWriteFrags))),
+ VerifyFun = fun(F) when is_integer(F), F >= 1, F =< N -> false;
+ (_F) -> true
+ end,
+ case catch lists:filter(VerifyFun, UnionFrags) of
+ [] ->
+ case lists:member(N, FromFrags2) of
+ true ->
+ FH2 = FH#frag_state{n_fragments = N - 1,
+ hash_state = HashState2},
+ {FH2, FromFrags2, UnionFrags};
+ false ->
+ mnesia:abort({"del_frag: Last fragment number not included", N})
+ end;
+ BadFrags ->
+ mnesia:abort({"del_frag: Fragment numbers out of range",
+ BadFrags, {range, 1, N}})
+ end.
+
+merge(Tab, FH, [FromN | FromNs], FragNames, Ops) ->
+ FromFrag = element(FromN, FragNames),
+ Pat = mnesia:table_info(FromFrag, wild_pattern),
+ {_Mod, Tid, Ts} = mnesia_schema:get_tid_ts_and_lock(Tab, none),
+ Recs = mnesia:match_object(Tid, Ts, FromFrag, Pat, read),
+ Ops2 = do_merge(FH, FromN, FragNames, Recs, Ops),
+ merge(Tab, FH, FromNs, FragNames, Ops2);
+merge(_Tab, _FH, [], _FragNames, Ops) ->
+ Ops.
+
+%% Perform the merge of the table
+do_merge(FH, OldN, FragNames, [Rec | Recs], Ops) ->
+ Pos = key_pos(FH),
+ LastN = FH#frag_state.n_fragments + 1,
+ HashKey = element(Pos, Rec),
+ case key_to_n(FH, HashKey) of
+ NewN when NewN == LastN ->
+ %% Tried to leave a record in the fragment that is to be deleted
+ mnesia:abort({"del_frag: Fragment number out of range",
+ NewN, {range, 1, LastN}});
+ NewN when NewN == OldN ->
+ %% Keep record in the same fragment. No need to move it.
+ do_merge(FH, OldN, FragNames, Recs, Ops);
+ NewN when OldN == LastN ->
+ %% Move record from the fragment that is to be deleted
+ %% No need to create a delete op for each record.
+ case element(NewN, FragNames) of
+ NewFrag when NewFrag /= undefined ->
+ Key = element(2, Rec),
+ NewOid = {NewFrag, Key},
+ Ops2 = [{op, rec, unknown, {NewOid, [Rec], write}} | Ops],
+ do_merge(FH, OldN, FragNames, Recs, Ops2);
+ _NewFrag ->
+ %% Tried to move record to fragment that not is locked
+ mnesia:abort({"del_frag: Fragment not locked", NewN})
+ end;
+ NewN ->
+ case element(NewN, FragNames) of
+ NewFrag when NewFrag /= undefined ->
+ OldFrag = element(OldN, FragNames),
+ Key = element(2, Rec),
+ NewOid = {NewFrag, Key},
+ OldOid = {OldFrag, Key},
+ Ops2 = [{op, rec, unknown, {NewOid, [Rec], write}},
+ {op, rec, unknown, {OldOid, [OldOid], delete}} | Ops],
+ do_merge(FH, OldN, FragNames, Recs, Ops2);
+ _NewFrag ->
+ %% Tried to move record to fragment that not is locked
+ mnesia:abort({"del_frag: Fragment not locked", NewN})
+ end
+ end;
+ do_merge(_FH, _OldN, _FragNames, [], Ops) ->
+ Ops.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Add a node to the node pool of a fragmented table
+
+make_multi_add_node(Tab, Node) ->
+ verify_multi(Tab),
+ Ops = make_add_node(Tab, Node),
+
+ %% Propagate to foreigners
+ MoreOps = [make_add_node(T, Node) || T <- lookup_foreigners(Tab)],
+ [Ops | MoreOps].
+
+make_add_node(Tab, Node) when is_atom(Node) ->
+ Pool = lookup_prop(Tab, node_pool),
+ case lists:member(Node, Pool) of
+ false ->
+ Cs = mnesia_schema:incr_version(val({Tab, cstruct})),
+ Pool2 = Pool ++ [Node],
+ Props = Cs#cstruct.frag_properties,
+ Props2 = lists:keyreplace(node_pool, 1, Props, {node_pool, Pool2}),
+ Cs2 = Cs#cstruct{frag_properties = Props2},
+ TabDef = mnesia_schema:cs2list(Cs2),
+ Op = {op, change_table_frag, {add_node, Node}, TabDef},
+ [Op];
+ true ->
+ mnesia:abort({already_exists, Tab, Node})
+ end;
+make_add_node(Tab, Node) ->
+ mnesia:abort({bad_type, Tab, Node}).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Delet a node from the node pool of a fragmented table
+
+make_multi_del_node(Tab, Node) ->
+ verify_multi(Tab),
+ Ops = make_del_node(Tab, Node),
+
+ %% Propagate to foreigners
+ MoreOps = [make_del_node(T, Node) || T <- lookup_foreigners(Tab)],
+ [Ops | MoreOps].
+
+make_del_node(Tab, Node) when is_atom(Node) ->
+ Cs = mnesia_schema:incr_version(val({Tab, cstruct})),
+ mnesia_schema:ensure_active(Cs),
+ Pool = lookup_prop(Tab, node_pool),
+ case lists:member(Node, Pool) of
+ true ->
+ Pool2 = Pool -- [Node],
+ Props = lists:keyreplace(node_pool, 1, Cs#cstruct.frag_properties, {node_pool, Pool2}),
+ Cs2 = Cs#cstruct{frag_properties = Props},
+ TabDef = mnesia_schema:cs2list(Cs2),
+ Op = {op, change_table_frag, {del_node, Node}, TabDef},
+ [Op];
+ false ->
+ mnesia:abort({no_exists, Tab, Node})
+ end;
+make_del_node(Tab, Node) ->
+ mnesia:abort({bad_type, Tab, Node}).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Special case used to remove all references to a node during
+%% mnesia:del_table_copy(schema, Node)
+
+remove_node(Node, Cs) ->
+ Tab = Cs#cstruct.name,
+ case ?catch_val({Tab, frag_hash}) of
+ {'EXIT', _} ->
+ {Cs, false};
+ _ ->
+ Pool = lookup_prop(Tab, node_pool),
+ case lists:member(Node, Pool) of
+ true ->
+ Pool2 = Pool -- [Node],
+ Props = lists:keyreplace(node_pool, 1,
+ Cs#cstruct.frag_properties,
+ {node_pool, Pool2}),
+ {Cs#cstruct{frag_properties = Props}, true};
+ false ->
+ {Cs, false}
+ end
+ end.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Helpers
+
+val(Var) ->
+ case ?catch_val(Var) of
+ {'EXIT', Reason} -> mnesia_lib:other_val(Var, Reason);
+ Value -> Value
+ end.
+
+set_frag_hash(Tab, Props) ->
+ case props_to_frag_hash(Tab, Props) of
+ FH when is_record(FH, frag_state) ->
+ mnesia_lib:set({Tab, frag_hash}, FH);
+ no_hash ->
+ mnesia_lib:unset({Tab, frag_hash})
+ end.
+
+props_to_frag_hash(_Tab, []) ->
+ no_hash;
+props_to_frag_hash(Tab, Props) ->
+ case mnesia_schema:pick(Tab, base_table, Props, undefined) of
+ T when T == Tab ->
+ Foreign = mnesia_schema:pick(Tab, foreign_key, Props, must),
+ N = mnesia_schema:pick(Tab, n_fragments, Props, must),
+
+ case mnesia_schema:pick(Tab, hash_module, Props, undefined) of
+ undefined ->
+ Split = mnesia_schema:pick(Tab, next_n_to_split, Props, must),
+ Doubles = mnesia_schema:pick(Tab, n_doubles, Props, must),
+ FH = {frag_hash, Foreign, N, Split, Doubles},
+ HashState = ?OLD_HASH_MOD:init_state(Tab, FH),
+ #frag_state{foreign_key = Foreign,
+ n_fragments = N,
+ hash_module = ?OLD_HASH_MOD,
+ hash_state = HashState};
+ HashMod ->
+ HashState = mnesia_schema:pick(Tab, hash_state, Props, must),
+ #frag_state{foreign_key = Foreign,
+ n_fragments = N,
+ hash_module = HashMod,
+ hash_state = HashState}
+ %% Old style. Kept for backwards compatibility.
+ end;
+ _ ->
+ no_hash
+ end.
+
+lookup_prop(Tab, Prop) ->
+ Props = val({Tab, frag_properties}),
+ case lists:keysearch(Prop, 1, Props) of
+ {value, {Prop, Val}} ->
+ Val;
+ false ->
+ mnesia:abort({no_exists, Tab, Prop, {frag_properties, Props}})
+ end.
+
+lookup_frag_hash(Tab) ->
+ case ?catch_val({Tab, frag_hash}) of
+ FH when is_record(FH, frag_state) ->
+ FH;
+ {frag_hash, K, N, _S, _D} = FH ->
+ %% Old style. Kept for backwards compatibility.
+ HashState = ?OLD_HASH_MOD:init_state(Tab, FH),
+ #frag_state{foreign_key = K,
+ n_fragments = N,
+ hash_module = ?OLD_HASH_MOD,
+ hash_state = HashState};
+ {'EXIT', _} ->
+ mnesia:abort({no_exists, Tab, frag_properties, frag_hash})
+ end.
+
+%% Returns a list of tables
+lookup_foreigners(Tab) ->
+ %% First field in HashPat is either frag_hash or frag_state
+ HashPat = {'_', {Tab, '_'}, '_', '_', '_'},
+ [T || [T] <- ?ets_match(mnesia_gvar, {{'$1', frag_hash}, HashPat})].
+
+%% Returns name of fragment table
+record_to_frag_name(Tab, Rec) ->
+ case ?catch_val({Tab, frag_hash}) of
+ {'EXIT', _} ->
+ Tab;
+ FH ->
+ Pos = key_pos(FH),
+ Key = element(Pos, Rec),
+ N = key_to_n(FH, Key),
+ n_to_frag_name(Tab, N)
+ end.
+
+key_pos(FH) ->
+ case FH#frag_state.foreign_key of
+ undefined ->
+ 2;
+ {_ForeignTab, Pos} ->
+ Pos
+ end.
+
+%% Returns name of fragment table
+key_to_frag_name({BaseTab, _} = Tab, Key) ->
+ N = key_to_frag_number(Tab, Key),
+ n_to_frag_name(BaseTab, N);
+key_to_frag_name(Tab, Key) ->
+ N = key_to_frag_number(Tab, Key),
+ n_to_frag_name(Tab, N).
+
+%% Returns name of fragment table
+n_to_frag_name(Tab, 1) ->
+ Tab;
+n_to_frag_name(Tab, N) when is_atom(Tab), is_integer(N) ->
+ list_to_atom(atom_to_list(Tab) ++ "_frag" ++ integer_to_list(N));
+n_to_frag_name(Tab, N) ->
+ mnesia:abort({bad_type, Tab, N}).
+
+%% Returns name of fragment table
+key_to_frag_number({Tab, ForeignKey}, _Key) ->
+ FH = val({Tab, frag_hash}),
+ case FH#frag_state.foreign_key of
+ {_ForeignTab, _Pos} ->
+ key_to_n(FH, ForeignKey);
+ undefined ->
+ mnesia:abort({combine_error, Tab, frag_properties,
+ {foreign_key, undefined}})
+ end;
+key_to_frag_number(Tab, Key) ->
+ case ?catch_val({Tab, frag_hash}) of
+ {'EXIT', _} ->
+ 1;
+ FH ->
+ key_to_n(FH, Key)
+ end.
+
+%% Returns fragment number
+key_to_n(FH, Key) ->
+ HashState = FH#frag_state.hash_state,
+ N =
+ case FH#frag_state.hash_module of
+ HashMod when HashMod == ?DEFAULT_HASH_MOD ->
+ ?DEFAULT_HASH_MOD:key_to_frag_number(HashState, Key);
+ HashMod ->
+ HashMod:key_to_frag_number(HashState, Key)
+ end,
+ if
+ is_integer(N), N >= 1, N =< FH#frag_state.n_fragments ->
+ N;
+ true ->
+ mnesia:abort({"key_to_frag_number: Fragment number out of range",
+ N, {range, 1, FH#frag_state.n_fragments}})
+ end.
+
+%% Returns a list of frament table names
+frag_names(Tab) ->
+ case ?catch_val({Tab, frag_hash}) of
+ {'EXIT', _} ->
+ [Tab];
+ FH ->
+ N = FH#frag_state.n_fragments,
+ frag_names(Tab, N, [])
+ end.
+
+frag_names(Tab, 1, Acc) ->
+ [Tab | Acc];
+frag_names(Tab, N, Acc) ->
+ Frag = n_to_frag_name(Tab, N),
+ frag_names(Tab, N - 1, [Frag | Acc]).
+
+%% Returns a list of {Node, FragCount} tuples
+%% sorted on FragCounts
+frag_dist(Tab) ->
+ Pool = lookup_prop(Tab, node_pool),
+ Dist = [{good, Node, 0} || Node <- Pool],
+ Dist2 = count_frag(frag_names(Tab), Dist),
+ sort_dist(Dist2).
+
+count_frag([Frag | Frags], Dist) ->
+ Dist2 = incr_nodes(val({Frag, ram_copies}), Dist),
+ Dist3 = incr_nodes(val({Frag, disc_copies}), Dist2),
+ Dist4 = incr_nodes(val({Frag, disc_only_copies}), Dist3),
+ count_frag(Frags, Dist4);
+count_frag([], Dist) ->
+ Dist.
+
+incr_nodes([Node | Nodes], Dist) ->
+ Dist2 = incr_node(Node, Dist),
+ incr_nodes(Nodes, Dist2);
+incr_nodes([], Dist) ->
+ Dist.
+
+incr_node(Node, [{Kind, Node, Count} | Tail]) ->
+ [{Kind, Node, Count + 1} | Tail];
+incr_node(Node, [Head | Tail]) ->
+ [Head | incr_node(Node, Tail)];
+incr_node(Node, []) ->
+ [{bad, Node, 1}].
+
+%% Sorts dist according in decreasing count order
+sort_dist(Dist) ->
+ Dist2 = deep_dist(Dist, []),
+ Dist3 = lists:keysort(1, Dist2),
+ shallow_dist(Dist3).
+
+deep_dist([Head | Tail], Deep) ->
+ {Kind, _Node, Count} = Head,
+ {Tag, Same, Other} = pick_count(Kind, Count, [Head | Tail]),
+ deep_dist(Other, [{Tag, Same} | Deep]);
+deep_dist([], Deep) ->
+ Deep.
+
+pick_count(Kind, Count, [{Kind2, Node2, Count2} | Tail]) ->
+ Head = {Node2, Count2},
+ {_, Same, Other} = pick_count(Kind, Count, Tail),
+ if
+ Kind == bad ->
+ {bad, [Head | Same], Other};
+ Kind2 == bad ->
+ {Count, Same, [{Kind2, Node2, Count2} | Other]};
+ Count == Count2 ->
+ {Count, [Head | Same], Other};
+ true ->
+ {Count, Same, [{Kind2, Node2, Count2} | Other]}
+ end;
+pick_count(_Kind, Count, []) ->
+ {Count, [], []}.
+
+shallow_dist([{_Tag, Shallow} | Deep]) ->
+ Shallow ++ shallow_dist(Deep);
+shallow_dist([]) ->
+ [].
diff --git a/lib/mnesia/src/mnesia_frag_hash.erl b/lib/mnesia/src/mnesia_frag_hash.erl
new file mode 100644
index 0000000000..610ba2535c
--- /dev/null
+++ b/lib/mnesia/src/mnesia_frag_hash.erl
@@ -0,0 +1,151 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 2002-2009. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%
+
+%%
+%%%----------------------------------------------------------------------
+%%% Purpose : Implements hashing functionality for fragmented tables
+%%%----------------------------------------------------------------------
+
+%header_doc_include
+-module(mnesia_frag_hash).
+
+%% Fragmented Table Hashing callback functions
+-export([
+ init_state/2,
+ add_frag/1,
+ del_frag/1,
+ key_to_frag_number/2,
+ match_spec_to_frag_numbers/2
+ ]).
+
+%header_doc_include
+%%-behaviour(mnesia_frag_hash).
+
+%impl_doc_include
+-record(hash_state,
+ {n_fragments,
+ next_n_to_split,
+ n_doubles,
+ function}).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+init_state(_Tab, State) when State == undefined ->
+ #hash_state{n_fragments = 1,
+ next_n_to_split = 1,
+ n_doubles = 0,
+ function = phash2}.
+
+convert_old_state({hash_state, N, P, L}) ->
+ #hash_state{n_fragments = N,
+ next_n_to_split = P,
+ n_doubles = L,
+ function = phash}.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+add_frag(#hash_state{next_n_to_split = SplitN, n_doubles = L, n_fragments = N} = State) ->
+ P = SplitN + 1,
+ NewN = N + 1,
+ State2 = case power2(L) + 1 of
+ P2 when P2 == P ->
+ State#hash_state{n_fragments = NewN,
+ n_doubles = L + 1,
+ next_n_to_split = 1};
+ _ ->
+ State#hash_state{n_fragments = NewN,
+ next_n_to_split = P}
+ end,
+ {State2, [SplitN], [NewN]};
+add_frag(OldState) ->
+ State = convert_old_state(OldState),
+ add_frag(State).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+del_frag(#hash_state{next_n_to_split = SplitN, n_doubles = L, n_fragments = N} = State) ->
+ P = SplitN - 1,
+ if
+ P < 1 ->
+ L2 = L - 1,
+ MergeN = power2(L2),
+ State2 = State#hash_state{n_fragments = N - 1,
+ next_n_to_split = MergeN,
+ n_doubles = L2},
+ {State2, [N], [MergeN]};
+ true ->
+ MergeN = P,
+ State2 = State#hash_state{n_fragments = N - 1,
+ next_n_to_split = MergeN},
+ {State2, [N], [MergeN]}
+ end;
+del_frag(OldState) ->
+ State = convert_old_state(OldState),
+ del_frag(State).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+key_to_frag_number(#hash_state{function = phash, next_n_to_split = SplitN, n_doubles = L}, Key) ->
+ P = SplitN,
+ A = erlang:phash(Key, power2(L)),
+ if
+ A < P ->
+ erlang:phash(Key, power2(L + 1));
+ true ->
+ A
+ end;
+key_to_frag_number(#hash_state{function = phash2, next_n_to_split = SplitN, n_doubles = L}, Key) ->
+ P = SplitN,
+ A = erlang:phash2(Key, power2(L)) + 1,
+ if
+ A < P ->
+ erlang:phash2(Key, power2(L + 1)) + 1;
+ true ->
+ A
+ end;
+key_to_frag_number(OldState, Key) ->
+ State = convert_old_state(OldState),
+ key_to_frag_number(State, Key).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+match_spec_to_frag_numbers(#hash_state{n_fragments = N} = State, MatchSpec) ->
+ case MatchSpec of
+ [{HeadPat, _, _}] when is_tuple(HeadPat), tuple_size(HeadPat) > 2 ->
+ KeyPat = element(2, HeadPat),
+ case has_var(KeyPat) of
+ false ->
+ [key_to_frag_number(State, KeyPat)];
+ true ->
+ lists:seq(1, N)
+ end;
+ _ ->
+ lists:seq(1, N)
+ end;
+match_spec_to_frag_numbers(OldState, MatchSpec) ->
+ State = convert_old_state(OldState),
+ match_spec_to_frag_numbers(State, MatchSpec).
+
+power2(Y) ->
+ 1 bsl Y. % trunc(math:pow(2, Y)).
+
+%impl_doc_include
+
+has_var(Pat) ->
+ mnesia:has_var(Pat).
diff --git a/lib/mnesia/src/mnesia_frag_old_hash.erl b/lib/mnesia/src/mnesia_frag_old_hash.erl
new file mode 100644
index 0000000000..817bb54eb1
--- /dev/null
+++ b/lib/mnesia/src/mnesia_frag_old_hash.erl
@@ -0,0 +1,132 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 2002-2009. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%
+
+%%
+%%%----------------------------------------------------------------------
+%%% Purpose : Implements hashing functionality for fragmented tables
+%%%----------------------------------------------------------------------
+
+-module(mnesia_frag_old_hash).
+%%-behaviour(mnesia_frag_hash).
+
+-compile({nowarn_deprecated_function, {erlang,hash,2}}).
+
+%% Hashing callback functions
+-export([
+ init_state/2,
+ add_frag/1,
+ del_frag/1,
+ key_to_frag_number/2,
+ match_spec_to_frag_numbers/2
+ ]).
+
+-record(old_hash_state,
+ {n_fragments,
+ next_n_to_split,
+ n_doubles}).
+
+%% Old style. Kept for backwards compatibility.
+-record(frag_hash,
+ {foreign_key,
+ n_fragments,
+ next_n_to_split,
+ n_doubles}).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+init_state(_Tab, InitialState) when InitialState == undefined ->
+ #old_hash_state{n_fragments = 1,
+ next_n_to_split = 1,
+ n_doubles = 0};
+init_state(_Tab, FH) when is_record(FH, frag_hash) ->
+ %% Old style. Kept for backwards compatibility.
+ #old_hash_state{n_fragments = FH#frag_hash.n_fragments,
+ next_n_to_split = FH#frag_hash.next_n_to_split,
+ n_doubles = FH#frag_hash.n_doubles}.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+add_frag(State) when is_record(State, old_hash_state) ->
+ SplitN = State#old_hash_state.next_n_to_split,
+ P = SplitN + 1,
+ L = State#old_hash_state.n_doubles,
+ NewN = State#old_hash_state.n_fragments + 1,
+ State2 = case trunc(math:pow(2, L)) + 1 of
+ P2 when P2 == P ->
+ State#old_hash_state{n_fragments = NewN,
+ next_n_to_split = 1,
+ n_doubles = L + 1};
+ _ ->
+ State#old_hash_state{n_fragments = NewN,
+ next_n_to_split = P}
+ end,
+ {State2, [SplitN], [NewN]}.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+del_frag(State) when is_record(State, old_hash_state) ->
+ P = State#old_hash_state.next_n_to_split - 1,
+ L = State#old_hash_state.n_doubles,
+ N = State#old_hash_state.n_fragments,
+ if
+ P < 1 ->
+ L2 = L - 1,
+ MergeN = trunc(math:pow(2, L2)),
+ State2 = State#old_hash_state{n_fragments = N - 1,
+ next_n_to_split = MergeN,
+ n_doubles = L2},
+ {State2, [N], [MergeN]};
+ true ->
+ MergeN = P,
+ State2 = State#old_hash_state{n_fragments = N - 1,
+ next_n_to_split = MergeN},
+ {State2, [N], [MergeN]}
+ end.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+key_to_frag_number(State, Key) when is_record(State, old_hash_state) ->
+ L = State#old_hash_state.n_doubles,
+ A = erlang:hash(Key, trunc(math:pow(2, L))),
+ P = State#old_hash_state.next_n_to_split,
+ if
+ A < P ->
+ erlang:hash(Key, trunc(math:pow(2, L + 1)));
+ true ->
+ A
+ end.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+match_spec_to_frag_numbers(State, MatchSpec) when is_record(State, old_hash_state) ->
+ case MatchSpec of
+ [{HeadPat, _, _}] when is_tuple(HeadPat), tuple_size(HeadPat) > 2 ->
+ KeyPat = element(2, HeadPat),
+ case has_var(KeyPat) of
+ false ->
+ [key_to_frag_number(State, KeyPat)];
+ true ->
+ lists:seq(1, State#old_hash_state.n_fragments)
+ end;
+ _ ->
+ lists:seq(1, State#old_hash_state.n_fragments)
+ end.
+
+has_var(Pat) ->
+ mnesia:has_var(Pat).
diff --git a/lib/mnesia/src/mnesia_index.erl b/lib/mnesia/src/mnesia_index.erl
new file mode 100644
index 0000000000..4e6e8a997c
--- /dev/null
+++ b/lib/mnesia/src/mnesia_index.erl
@@ -0,0 +1,384 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 1996-2009. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%
+
+%%
+%% Purpose: Handles index functionality in mnesia
+
+-module(mnesia_index).
+-export([read/5,
+ add_index/5,
+ delete_index/3,
+ del_object_index/5,
+ clear_index/4,
+ dirty_match_object/3,
+ dirty_select/3,
+ dirty_read/3,
+ dirty_read2/3,
+
+ db_put/2,
+ db_get/2,
+ db_match_erase/2,
+ get_index_table/2,
+ get_index_table/3,
+
+ tab2filename/2,
+ tab2tmp_filename/2,
+ init_index/2,
+ init_indecies/3,
+ del_transient/2,
+ del_transient/3,
+ del_index_table/3]).
+
+-import(mnesia_lib, [verbose/2]).
+-include("mnesia.hrl").
+
+-record(index, {setorbag, pos_list}).
+
+val(Var) ->
+ case ?catch_val(Var) of
+ {'EXIT', _ReASoN_} -> mnesia_lib:other_val(Var, _ReASoN_);
+ _VaLuE_ -> _VaLuE_
+ end.
+
+%% read an object list throuh its index table
+%% we assume that table Tab has index on attribute number Pos
+
+read(Tid, Store, Tab, IxKey, Pos) ->
+ ResList = mnesia_locker:ixrlock(Tid, Store, Tab, IxKey, Pos),
+ %% Remove all tuples which don't include Ixkey, happens when Tab is a bag
+ case val({Tab, setorbag}) of
+ bag ->
+ mnesia_lib:key_search_all(IxKey, Pos, ResList);
+ _ ->
+ ResList
+ end.
+
+add_index(Index, Tab, Key, Obj, Old) ->
+ add_index2(Index#index.pos_list, Index#index.setorbag, Tab, Key, Obj, Old).
+
+add_index2([{Pos, Ixt} |Tail], bag, Tab, K, Obj, OldRecs) ->
+ db_put(Ixt, {element(Pos, Obj), K}),
+ add_index2(Tail, bag, Tab, K, Obj, OldRecs);
+add_index2([{Pos, Ixt} |Tail], Type, Tab, K, Obj, OldRecs) ->
+ %% Remove old tuples in index if Tab is updated
+ case OldRecs of
+ undefined ->
+ Old = mnesia_lib:db_get(Tab, K),
+ del_ixes(Ixt, Old, Pos, K);
+ Old ->
+ del_ixes(Ixt, Old, Pos, K)
+ end,
+ db_put(Ixt, {element(Pos, Obj), K}),
+ add_index2(Tail, Type, Tab, K, Obj, OldRecs);
+add_index2([], _, _Tab, _K, _Obj, _) -> ok.
+
+delete_index(Index, Tab, K) ->
+ delete_index2(Index#index.pos_list, Tab, K).
+
+delete_index2([{Pos, Ixt} | Tail], Tab, K) ->
+ DelObjs = mnesia_lib:db_get(Tab, K),
+ del_ixes(Ixt, DelObjs, Pos, K),
+ delete_index2(Tail, Tab, K);
+delete_index2([], _Tab, _K) -> ok.
+
+
+del_ixes(_Ixt, [], _Pos, _L) -> ok;
+del_ixes(Ixt, [Obj | Tail], Pos, Key) ->
+ db_match_erase(Ixt, {element(Pos, Obj), Key}),
+ del_ixes(Ixt, Tail, Pos, Key).
+
+del_object_index(Index, Tab, K, Obj, Old) ->
+ del_object_index2(Index#index.pos_list, Index#index.setorbag, Tab, K, Obj, Old).
+
+del_object_index2([], _, _Tab, _K, _Obj, _Old) -> ok;
+del_object_index2([{Pos, Ixt} | Tail], SoB, Tab, K, Obj, Old) ->
+ case SoB of
+ bag ->
+ del_object_bag(Tab, K, Obj, Pos, Ixt, Old);
+ _ -> %% If set remove the tuple in index table
+ del_ixes(Ixt, [Obj], Pos, K)
+ end,
+ del_object_index2(Tail, SoB, Tab, K, Obj, Old).
+
+del_object_bag(Tab, Key, Obj, Pos, Ixt, undefined) ->
+ IxKey = element(Pos, Obj),
+ Old = [X || X <- mnesia_lib:db_get(Tab, Key), element(Pos, X) =:= IxKey],
+ del_object_bag(Tab, Key, Obj, Pos, Ixt, Old);
+%% If Tab type is bag we need remove index identifier if Tab
+%% contains less than 2 elements.
+del_object_bag(_Tab, Key, Obj, Pos, Ixt, Old) when length(Old) < 2 ->
+ del_ixes(Ixt, [Obj], Pos, Key);
+del_object_bag(_Tab, _Key, _Obj, _Pos, _Ixt, _Old) -> ok.
+
+clear_index(Index, Tab, K, Obj) ->
+ clear_index2(Index#index.pos_list, Tab, K, Obj).
+
+clear_index2([], _Tab, _K, _Obj) -> ok;
+clear_index2([{_Pos, Ixt} | Tail], Tab, K, Obj) ->
+ db_match_erase(Ixt, Obj),
+ clear_index2(Tail, Tab, K, Obj).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+dirty_match_object(Tab, Pat, Pos) ->
+ %% Assume that we are on the node where the replica is
+ case element(2, Pat) of
+ '_' ->
+ IxKey = element(Pos, Pat),
+ RealKeys = realkeys(Tab, Pos, IxKey),
+ merge(RealKeys, Tab, Pat, []);
+ _Else ->
+ mnesia_lib:db_match_object(Tab, Pat)
+ end.
+
+merge([{_IxKey, RealKey} | Tail], Tab, Pat, Ack) ->
+ %% Assume that we are on the node where the replica is
+ Pat2 = setelement(2, Pat, RealKey),
+ Recs = mnesia_lib:db_match_object(Tab, Pat2),
+ merge(Tail, Tab, Pat, Recs ++ Ack);
+merge([], _, _, Ack) ->
+ Ack.
+
+realkeys(Tab, Pos, IxKey) ->
+ Index = get_index_table(Tab, Pos),
+ db_get(Index, IxKey). % a list on the form [{IxKey, RealKey1} , ....
+
+dirty_select(Tab, Spec, Pos) ->
+ %% Assume that we are on the node where the replica is
+ %% Returns the records without applying the match spec
+ %% The actual filtering is handled by the caller
+ IxKey = element(Pos, Spec),
+ RealKeys = realkeys(Tab, Pos, IxKey),
+ StorageType = val({Tab, storage_type}),
+ lists:append([mnesia_lib:db_get(StorageType, Tab, Key) || {_,Key} <- RealKeys]).
+
+dirty_read(Tab, IxKey, Pos) ->
+ ResList = mnesia:dirty_rpc(Tab, ?MODULE, dirty_read2,
+ [Tab, IxKey, Pos]),
+ case val({Tab, setorbag}) of
+ bag ->
+ %% Remove all tuples which don't include Ixkey
+ mnesia_lib:key_search_all(IxKey, Pos, ResList);
+ _ ->
+ ResList
+ end.
+
+dirty_read2(Tab, IxKey, Pos) ->
+ Ix = get_index_table(Tab, Pos),
+ Keys = db_match(Ix, {IxKey, '$1'}),
+ r_keys(Keys, Tab, []).
+
+r_keys([[H]|T],Tab,Ack) ->
+ V = mnesia_lib:db_get(Tab, H),
+ r_keys(T, Tab, V ++ Ack);
+r_keys([], _, Ack) ->
+ Ack.
+
+
+%%%%%%% Creation, Init and deletion routines for index tables
+%% We can have several indexes on the same table
+%% this can be a fairly costly operation if table is *very* large
+
+tab2filename(Tab, Pos) ->
+ mnesia_lib:dir(Tab) ++ "_" ++ integer_to_list(Pos) ++ ".DAT".
+
+tab2tmp_filename(Tab, Pos) ->
+ mnesia_lib:dir(Tab) ++ "_" ++ integer_to_list(Pos) ++ ".TMP".
+
+init_index(Tab, Storage) ->
+ PosList = val({Tab, index}),
+ init_indecies(Tab, Storage, PosList).
+
+init_indecies(Tab, Storage, PosList) ->
+ case Storage of
+ unknown ->
+ ignore;
+ disc_only_copies ->
+ init_disc_index(Tab, PosList);
+ ram_copies ->
+ make_ram_index(Tab, PosList);
+ disc_copies ->
+ make_ram_index(Tab, PosList)
+ end.
+
+%% works for both ram and disc indexes
+
+del_index_table(_, unknown, _) ->
+ ignore;
+del_index_table(Tab, Storage, Pos) ->
+ delete_transient_index(Tab, Pos, Storage),
+ mnesia_lib:del({Tab, index}, Pos).
+
+del_transient(Tab, Storage) ->
+ PosList = val({Tab, index}),
+ del_transient(Tab, PosList, Storage).
+
+del_transient(_, [], _) -> done;
+del_transient(Tab, [Pos | Tail], Storage) ->
+ delete_transient_index(Tab, Pos, Storage),
+ del_transient(Tab, Tail, Storage).
+
+delete_transient_index(Tab, Pos, disc_only_copies) ->
+ Tag = {Tab, index, Pos},
+ mnesia_monitor:unsafe_close_dets(Tag),
+ file:delete(tab2filename(Tab, Pos)),
+ del_index_info(Tab, Pos), %% Uses val(..)
+ mnesia_lib:unset({Tab, {index, Pos}});
+
+delete_transient_index(Tab, Pos, _Storage) ->
+ Ixt = val({Tab, {index, Pos}}),
+ ?ets_delete_table(Ixt),
+ del_index_info(Tab, Pos),
+ mnesia_lib:unset({Tab, {index, Pos}}).
+
+%%%%% misc functions for the index create/init/delete functions above
+
+%% assuming that the file exists.
+init_disc_index(_Tab, []) ->
+ done;
+init_disc_index(Tab, [Pos | Tail]) when is_integer(Pos) ->
+ Fn = tab2filename(Tab, Pos),
+ IxTag = {Tab, index, Pos},
+ file:delete(Fn),
+ Args = [{file, Fn}, {keypos, 1}, {type, bag}],
+ mnesia_monitor:open_dets(IxTag, Args),
+ Storage = disc_only_copies,
+ Key = mnesia_lib:db_first(Storage, Tab),
+ Recs = mnesia_lib:db_get(Storage, Tab, Key),
+ BinSize = size(term_to_binary(Recs)),
+ KeysPerChunk = (4000 div BinSize) + 1,
+ Init = {start, KeysPerChunk},
+ mnesia_lib:db_fixtable(Storage, Tab, true),
+ ok = dets:init_table(IxTag, create_fun(Init, Tab, Pos)),
+ mnesia_lib:db_fixtable(Storage, Tab, false),
+ mnesia_lib:set({Tab, {index, Pos}}, IxTag),
+ add_index_info(Tab, val({Tab, setorbag}), {Pos, {dets, IxTag}}),
+ init_disc_index(Tab, Tail).
+
+create_fun(Cont, Tab, Pos) ->
+ fun(read) ->
+ Data =
+ case Cont of
+ {start, KeysPerChunk} ->
+ mnesia_lib:db_init_chunk(disc_only_copies, Tab, KeysPerChunk);
+ '$end_of_table' ->
+ '$end_of_table';
+ _Else ->
+ mnesia_lib:db_chunk(disc_only_copies, Cont)
+ end,
+ case Data of
+ '$end_of_table' ->
+ end_of_input;
+ {Recs, Next} ->
+ IdxElems = [{element(Pos, Obj), element(2, Obj)} || Obj <- Recs],
+ {IdxElems, create_fun(Next, Tab, Pos)}
+ end;
+ (close) ->
+ ok
+ end.
+
+make_ram_index(_, []) ->
+ done;
+make_ram_index(Tab, [Pos | Tail]) ->
+ add_ram_index(Tab, Pos),
+ make_ram_index(Tab, Tail).
+
+add_ram_index(Tab, Pos) when is_integer(Pos) ->
+ verbose("Creating index for ~w ~n", [Tab]),
+ Index = mnesia_monitor:mktab(mnesia_index, [bag, public]),
+ Insert = fun(Rec, _Acc) ->
+ true = ?ets_insert(Index, {element(Pos, Rec), element(2, Rec)})
+ end,
+ mnesia_lib:db_fixtable(ram_copies, Tab, true),
+ true = ets:foldl(Insert, true, Tab),
+ mnesia_lib:db_fixtable(ram_copies, Tab, false),
+ mnesia_lib:set({Tab, {index, Pos}}, Index),
+ add_index_info(Tab, val({Tab, setorbag}), {Pos, {ram, Index}});
+add_ram_index(_Tab, snmp) ->
+ ok.
+
+add_index_info(Tab, Type, IxElem) ->
+ Commit = val({Tab, commit_work}),
+ case lists:keysearch(index, 1, Commit) of
+ false ->
+ Index = #index{setorbag = Type,
+ pos_list = [IxElem]},
+ %% Check later if mnesia_tm is sensative about the order
+ mnesia_lib:set({Tab, commit_work},
+ mnesia_lib:sort_commit([Index | Commit]));
+ {value, Old} ->
+ %% We could check for consistency here
+ Index = Old#index{pos_list = [IxElem | Old#index.pos_list]},
+ NewC = lists:keyreplace(index, 1, Commit, Index),
+ mnesia_lib:set({Tab, commit_work},
+ mnesia_lib:sort_commit(NewC))
+ end.
+
+del_index_info(Tab, Pos) ->
+ Commit = val({Tab, commit_work}),
+ case lists:keysearch(index, 1, Commit) of
+ false ->
+ %% Something is wrong ignore
+ skip;
+ {value, Old} ->
+ case lists:keydelete(Pos, 1, Old#index.pos_list) of
+ [] ->
+ NewC = lists:keydelete(index, 1, Commit),
+ mnesia_lib:set({Tab, commit_work},
+ mnesia_lib:sort_commit(NewC));
+ New ->
+ Index = Old#index{pos_list = New},
+ NewC = lists:keyreplace(index, 1, Commit, Index),
+ mnesia_lib:set({Tab, commit_work},
+ mnesia_lib:sort_commit(NewC))
+ end
+ end.
+
+db_put({ram, Ixt}, V) ->
+ true = ?ets_insert(Ixt, V);
+db_put({dets, Ixt}, V) ->
+ ok = dets:insert(Ixt, V).
+
+db_get({ram, Ixt}, K) ->
+ ?ets_lookup(Ixt, K);
+db_get({dets, Ixt}, K) ->
+ dets:lookup(Ixt, K).
+
+db_match_erase({ram, Ixt}, Pat) ->
+ true = ?ets_match_delete(Ixt, Pat);
+db_match_erase({dets, Ixt}, Pat) ->
+ ok = dets:match_delete(Ixt, Pat).
+
+db_match({ram, Ixt}, Pat) ->
+ ?ets_match(Ixt, Pat);
+db_match({dets, Ixt}, Pat) ->
+ dets:match(Ixt, Pat).
+
+get_index_table(Tab, Pos) ->
+ get_index_table(Tab, val({Tab, storage_type}), Pos).
+
+get_index_table(Tab, ram_copies, Pos) ->
+ {ram, val({Tab, {index, Pos}})};
+get_index_table(Tab, disc_copies, Pos) ->
+ {ram, val({Tab, {index, Pos}})};
+get_index_table(Tab, disc_only_copies, Pos) ->
+ {dets, val({Tab, {index, Pos}})};
+get_index_table(_Tab, unknown, _Pos) ->
+ unknown.
+
diff --git a/lib/mnesia/src/mnesia_kernel_sup.erl b/lib/mnesia/src/mnesia_kernel_sup.erl
new file mode 100644
index 0000000000..08f6129fc0
--- /dev/null
+++ b/lib/mnesia/src/mnesia_kernel_sup.erl
@@ -0,0 +1,65 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 1997-2009. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%
+
+%%
+-module(mnesia_kernel_sup).
+
+-behaviour(supervisor).
+
+-export([start/0, init/1, supervisor_timeout/1]).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% top supervisor callback functions
+
+start() ->
+ supervisor:start_link({local, mnesia_kernel_sup}, ?MODULE, []).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% sub supervisor callback functions
+
+init([]) ->
+ ProcLib = [mnesia_monitor, proc_lib],
+ Flags = {one_for_all, 0, timer:hours(24)}, % Trust the top supervisor
+ Workers = [worker_spec(mnesia_monitor, timer:seconds(3), [gen_server]),
+ worker_spec(mnesia_subscr, timer:seconds(3), [gen_server]),
+ worker_spec(mnesia_locker, timer:seconds(3), ProcLib),
+ worker_spec(mnesia_recover, timer:minutes(3), [gen_server]),
+ worker_spec(mnesia_tm, timer:seconds(30), ProcLib),
+ supervisor_spec(mnesia_checkpoint_sup),
+ supervisor_spec(mnesia_snmp_sup),
+ worker_spec(mnesia_controller, timer:seconds(3), [gen_server]),
+ worker_spec(mnesia_late_loader, timer:seconds(3), ProcLib)
+ ],
+ {ok, {Flags, Workers}}.
+
+worker_spec(Name, KillAfter, Modules) ->
+ KA = supervisor_timeout(KillAfter),
+ {Name, {Name, start, []}, permanent, KA, worker, [Name] ++ Modules}.
+
+supervisor_spec(Name) ->
+ {Name, {Name, start, []}, permanent, infinity, supervisor,
+ [Name, supervisor]}.
+
+-ifdef(debug_shutdown).
+supervisor_timeout(_KillAfter) -> timer:hours(24).
+-else.
+supervisor_timeout(KillAfter) -> KillAfter.
+-endif.
+
+
diff --git a/lib/mnesia/src/mnesia_late_loader.erl b/lib/mnesia/src/mnesia_late_loader.erl
new file mode 100644
index 0000000000..d09de3ca66
--- /dev/null
+++ b/lib/mnesia/src/mnesia_late_loader.erl
@@ -0,0 +1,108 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 1998-2009. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%
+
+%%
+-module(mnesia_late_loader).
+
+-export([
+ async_late_disc_load/3,
+ maybe_async_late_disc_load/3,
+ init/1,
+ start/0
+ ]).
+
+%% sys callback functions
+-export([
+ system_continue/3,
+ system_terminate/4,
+ system_code_change/4
+ ]).
+
+-define(SERVER_NAME, ?MODULE).
+
+-record(state, {supervisor}).
+
+async_late_disc_load(_, [], _) -> ok;
+async_late_disc_load(Node, Tabs, Reason) ->
+ Msg = {async_late_disc_load, Tabs, Reason},
+ catch ({?SERVER_NAME, Node} ! {self(), Msg}).
+
+maybe_async_late_disc_load(_, [], _) -> ok;
+maybe_async_late_disc_load(Node, Tabs, Reason) ->
+ Msg = {maybe_async_late_disc_load, Tabs, Reason},
+ catch ({?SERVER_NAME, Node} ! {self(), Msg}).
+
+start() ->
+ mnesia_monitor:start_proc(?SERVER_NAME, ?MODULE, init, [self()]).
+
+init(Parent) ->
+ %% Trap exit omitted intentionally
+ register(?SERVER_NAME, self()),
+ link(whereis(mnesia_controller)), %% We may not hang
+ mnesia_controller:merge_schema(),
+ unlink(whereis(mnesia_controller)),
+ mnesia_lib:set(mnesia_status, running),
+ proc_lib:init_ack(Parent, {ok, self()}),
+ loop(#state{supervisor = Parent}).
+
+loop(State) ->
+ receive
+ {_From, {async_late_disc_load, Tabs, Reason}} ->
+ mnesia_controller:schedule_late_disc_load(Tabs, Reason),
+ loop(State);
+
+ {_From, {maybe_async_late_disc_load, Tabs, Reason}} ->
+ CheckMaster =
+ fun(Tab, Good) ->
+ case mnesia_recover:get_master_nodes(Tab) of
+ [] -> [Tab|Good];
+ Masters ->
+ case lists:member(node(),Masters) of
+ true -> [Tab|Good];
+ false -> Good
+ end
+ end
+ end,
+ GoodTabs = lists:foldl(CheckMaster, [], Tabs),
+ mnesia_controller:schedule_late_disc_load(GoodTabs, Reason),
+ loop(State);
+
+ {system, From, Msg} ->
+ mnesia_lib:dbg_out("~p got {system, ~p, ~p}~n",
+ [?SERVER_NAME, From, Msg]),
+ Parent = State#state.supervisor,
+ sys:handle_system_msg(Msg, From, Parent, ?MODULE, [], State);
+
+ Msg ->
+ mnesia_lib:error("~p got unexpected message: ~p~n",
+ [?SERVER_NAME, Msg]),
+ loop(State)
+ end.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% System upgrade
+
+system_continue(_Parent, _Debug, State) ->
+ loop(State).
+
+system_terminate(Reason, _Parent, _Debug, _State) ->
+ exit(Reason).
+
+system_code_change(State, _Module, _OldVsn, _Extra) ->
+ {ok, State}.
diff --git a/lib/mnesia/src/mnesia_lib.erl b/lib/mnesia/src/mnesia_lib.erl
new file mode 100644
index 0000000000..dba808e66e
--- /dev/null
+++ b/lib/mnesia/src/mnesia_lib.erl
@@ -0,0 +1,1306 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 1996-2009. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%
+
+%%
+%% This module contains all sorts of various which doesn't fit
+%% anywhere else. Basically everything is exported.
+
+-module(mnesia_lib).
+
+-include("mnesia.hrl").
+-include_lib("kernel/include/file.hrl").
+
+-export([core_file/0]).
+
+-export([
+ active_tables/0,
+ add/2,
+ add_list/2,
+ add_lsort/2,
+ all_nodes/0,
+%% catch_val/1,
+ copy_file/2,
+ copy_holders/1,
+ coredump/0,
+ coredump/1,
+ create_counter/1,
+ cs_to_nodes/1,
+ cs_to_storage_type/2,
+ dets_to_ets/6,
+ db_chunk/2,
+ db_init_chunk/1,
+ db_init_chunk/2,
+ db_init_chunk/3,
+ db_erase/2,
+ db_erase/3,
+ db_erase_tab/1,
+ db_erase_tab/2,
+ db_first/1,
+ db_first/2,
+ db_last/1,
+ db_last/2,
+ db_fixtable/3,
+ db_get/2,
+ db_get/3,
+ db_match_erase/2,
+ db_match_erase/3,
+ db_match_object/2,
+ db_match_object/3,
+ db_next_key/2,
+ db_next_key/3,
+ db_prev_key/2,
+ db_prev_key/3,
+ db_put/2,
+ db_put/3,
+ db_select/2,
+ db_select/3,
+ db_select_init/4,
+ db_select_cont/3,
+ db_slot/2,
+ db_slot/3,
+ db_update_counter/3,
+ db_update_counter/4,
+ dbg_out/2,
+ del/2,
+ dets_sync_close/1,
+ dets_sync_open/2,
+ dets_sync_open/3,
+ dir/0,
+ dir/1,
+ dir_info/0,
+ dirty_rpc_error_tag/1,
+ dist_coredump/0,
+ disk_type/1,
+ disk_type/2,
+ elems/2,
+ ensure_loaded/1,
+ error/2,
+ error_desc/1,
+ etype/1,
+ exists/1,
+ fatal/2,
+ get_node_number/0,
+ fix_error/1,
+ important/2,
+ incr_counter/1,
+ incr_counter/2,
+ intersect/2,
+ is_running/0,
+ is_running/1,
+ is_running_remote/0,
+ is_string/1,
+ key_search_delete/3,
+ key_search_all/3,
+ last_error/0,
+ local_active_tables/0,
+ lock_table/1,
+ mkcore/1,
+ not_active_here/1,
+ other_val/2,
+ pad_name/3,
+ random_time/2,
+ read_counter/1,
+ readable_indecies/1,
+ remote_copy_holders/1,
+ report_fatal/2,
+ report_system_event/1,
+ running_nodes/0,
+ running_nodes/1,
+ schema_cs_to_storage_type/2,
+ search_delete/2,
+ set/2,
+ set_counter/2,
+ set_local_content_whereabouts/1,
+ set_remote_where_to_read/1,
+ set_remote_where_to_read/2,
+ show/1,
+ show/2,
+ sort_commit/1,
+ storage_type_at_node/2,
+ tab2dat/1,
+ tab2dmp/1,
+ tab2tmp/1,
+ tab2dcd/1,
+ tab2dcl/1,
+ to_list/1,
+ union/2,
+ uniq/1,
+ unlock_table/1,
+ unset/1,
+ %% update_counter/2,
+ val/1,
+ vcore/0,
+ vcore/1,
+ verbose/2,
+ view/0,
+ view/1,
+ view/2,
+ warning/2,
+
+ is_debug_compiled/0,
+ activate_debug_fun/5,
+ deactivate_debug_fun/3,
+ eval_debug_fun/4,
+ scratch_debug_fun/0
+ ]).
+
+
+search_delete(Obj, List) ->
+ search_delete(Obj, List, [], none).
+search_delete(Obj, [Obj|Tail], Ack, _Res) ->
+ search_delete(Obj, Tail, Ack, Obj);
+search_delete(Obj, [H|T], Ack, Res) ->
+ search_delete(Obj, T, [H|Ack], Res);
+search_delete(_, [], Ack, Res) ->
+ {Res, Ack}.
+
+key_search_delete(Key, Pos, TupleList) ->
+ key_search_delete(Key, Pos, TupleList, none, []).
+key_search_delete(Key, Pos, [H|T], _Obj, Ack) when element(Pos, H) == Key ->
+ key_search_delete(Key, Pos, T, H, Ack);
+key_search_delete(Key, Pos, [H|T], Obj, Ack) ->
+ key_search_delete(Key, Pos, T, Obj, [H|Ack]);
+key_search_delete(_, _, [], Obj, Ack) ->
+ {Obj, Ack}.
+
+key_search_all(Key, Pos, TupleList) ->
+ key_search_all(Key, Pos, TupleList, []).
+key_search_all(Key, N, [H|T], Ack) when element(N, H) == Key ->
+ key_search_all(Key, N, T, [H|Ack]);
+key_search_all(Key, N, [_|T], Ack) ->
+ key_search_all(Key, N, T, Ack);
+key_search_all(_, _, [], Ack) -> Ack.
+
+intersect(L1, L2) ->
+ L2 -- (L2 -- L1).
+
+elems(I, [H|T]) ->
+ [element(I, H) | elems(I, T)];
+elems(_, []) ->
+ [].
+
+%% sort_commit see to that checkpoint info is always first in
+%% commit_work structure the other info don't need to be sorted.
+sort_commit(List) ->
+ sort_commit2(List, []).
+
+sort_commit2([{checkpoints, ChkpL}| Rest], Acc) ->
+ [{checkpoints, ChkpL}| Rest] ++ Acc;
+sort_commit2([H | R], Acc) ->
+ sort_commit2(R, [H | Acc]);
+sort_commit2([], Acc) -> Acc.
+
+is_string([H|T]) ->
+ if
+ 0 =< H, H < 256, is_integer(H) -> is_string(T);
+ true -> false
+ end;
+is_string([]) -> true.
+
+%%%
+
+union([H|L1], L2) ->
+ case lists:member(H, L2) of
+ true -> union(L1, L2);
+ false -> [H | union(L1, L2)]
+ end;
+union([], L2) -> L2.
+
+uniq([]) ->
+ [];
+uniq(List) ->
+ [H|T] = lists:sort(List),
+ uniq1(H, T, []).
+
+uniq1(H, [H|R], Ack) ->
+ uniq1(H, R, Ack);
+uniq1(Old, [H|R], Ack) ->
+ uniq1(H, R, [Old|Ack]);
+uniq1(Old, [], Ack) ->
+ [Old| Ack].
+
+to_list(X) when is_list(X) -> X;
+to_list(X) -> atom_to_list(X).
+
+all_nodes() ->
+ Ns = mnesia:system_info(db_nodes) ++
+ mnesia:system_info(extra_db_nodes),
+ mnesia_lib:uniq(Ns).
+
+running_nodes() ->
+ running_nodes(all_nodes()).
+
+running_nodes(Ns) ->
+ {Replies, _BadNs} = rpc:multicall(Ns, ?MODULE, is_running_remote, []),
+ [N || {GoodState, N} <- Replies, GoodState == true].
+
+is_running_remote() ->
+ IsRunning = is_running(),
+ {IsRunning == yes, node()}.
+
+is_running(Node) when is_atom(Node) ->
+ case rpc:call(Node, ?MODULE, is_running, []) of
+ {badrpc, _} -> no;
+ X -> X
+ end.
+
+is_running() ->
+ case ?catch_val(mnesia_status) of
+ {'EXIT', _} -> no;
+ running -> yes;
+ starting -> starting;
+ stopping -> stopping
+ end.
+
+show(X) ->
+ show(X, []).
+show(F, A) ->
+ io:format(user, F, A).
+
+
+pad_name([Char | Chars], Len, Tail) ->
+ [Char | pad_name(Chars, Len - 1, Tail)];
+pad_name([], Len, Tail) when Len =< 0 ->
+ Tail;
+pad_name([], Len, Tail) ->
+ [$ | pad_name([], Len - 1, Tail)].
+
+%% Some utility functions .....
+active_here(Tab) ->
+ case val({Tab, where_to_read}) of
+ Node when Node == node() -> true;
+ _ -> false
+ end.
+
+not_active_here(Tab) ->
+ not active_here(Tab).
+
+exists(Fname) ->
+ case file:open(Fname, [raw,read]) of
+ {ok, F} ->file:close(F), true;
+ _ -> false
+ end.
+
+dir() -> mnesia_monitor:get_env(dir).
+
+dir(Fname) ->
+ filename:join([dir(), to_list(Fname)]).
+
+tab2dat(Tab) -> %% DETS files
+ dir(lists:concat([Tab, ".DAT"])).
+
+tab2tmp(Tab) ->
+ dir(lists:concat([Tab, ".TMP"])).
+
+tab2dmp(Tab) -> %% Dumped ets tables
+ dir(lists:concat([Tab, ".DMP"])).
+
+tab2dcd(Tab) -> %% Disc copies data
+ dir(lists:concat([Tab, ".DCD"])).
+
+tab2dcl(Tab) -> %% Disc copies log
+ dir(lists:concat([Tab, ".DCL"])).
+
+storage_type_at_node(Node, Tab) ->
+ search_key(Node, [{disc_copies, val({Tab, disc_copies})},
+ {ram_copies, val({Tab, ram_copies})},
+ {disc_only_copies, val({Tab, disc_only_copies})}]).
+
+cs_to_storage_type(Node, Cs) ->
+ search_key(Node, [{disc_copies, Cs#cstruct.disc_copies},
+ {ram_copies, Cs#cstruct.ram_copies},
+ {disc_only_copies, Cs#cstruct.disc_only_copies}]).
+
+schema_cs_to_storage_type(Node, Cs) ->
+ case cs_to_storage_type(Node, Cs) of
+ unknown when Cs#cstruct.name == schema -> ram_copies;
+ Other -> Other
+ end.
+
+
+search_key(Key, [{Val, List} | Tail]) ->
+ case lists:member(Key, List) of
+ true -> Val;
+ false -> search_key(Key, Tail)
+ end;
+search_key(_Key, []) ->
+ unknown.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% ops, we've got some global variables here :-)
+
+%% They are
+%%
+%% {Tab, setorbag}, -> set | bag
+%% {Tab, storage_type} -> disc_copies |ram_copies | unknown (**)
+%% {Tab, disc_copies} -> node list (from schema)
+%% {Tab, ram_copies}, -> node list (from schema)
+%% {Tab, arity}, -> number
+%% {Tab, attributes}, -> atom list
+%% {Tab, wild_pattern}, -> record tuple with '_'s
+%% {Tab, {index, Pos}} -> ets table
+%% {Tab, index} -> integer list
+%% {Tab, cstruct} -> cstruct structure
+%%
+
+%% The following fields are dynamic according to the
+%% the current node/table situation
+
+%% {Tab, where_to_write} -> node list
+%% {Tab, where_to_read} -> node | nowhere
+%%
+%% {schema, tables} -> tab list
+%% {schema, local_tables} -> tab list (**)
+%%
+%% {current, db_nodes} -> node list
+%%
+%% dir -> directory path (**)
+%% mnesia_status -> status | running | stopping (**)
+%% (**) == (Different on all nodes)
+%%
+
+val(Var) ->
+ case ?catch_val(Var) of
+ {'EXIT', _ReASoN_} -> mnesia_lib:other_val(Var, _ReASoN_);
+ _VaLuE_ -> _VaLuE_
+ end.
+
+set(Var, Val) ->
+ ?ets_insert(mnesia_gvar, {Var, Val}).
+
+unset(Var) ->
+ ?ets_delete(mnesia_gvar, Var).
+
+other_val(Var, Other) ->
+ case Var of
+ {_, where_to_read} -> nowhere;
+ {_, where_to_write} -> [];
+ {_, active_replicas} -> [];
+ _ ->
+ pr_other(Var, Other)
+ end.
+
+-spec(pr_other/2 :: (_,_) -> no_return()).
+
+pr_other(Var, Other) ->
+ Why =
+ case is_running() of
+ no -> {node_not_running, node()};
+ _ -> {no_exists, Var}
+ end,
+ verbose("~p (~p) val(mnesia_gvar, ~w) -> ~p ~p ~n",
+ [self(), process_info(self(), registered_name),
+ Var, Other, Why]),
+ case Other of
+ {badarg, [{ets, lookup_element, _}|_]} ->
+ exit(Why);
+ _ ->
+ erlang:error(Why)
+ end.
+
+%% Some functions for list valued variables
+add(Var, Val) ->
+ L = val(Var),
+ set(Var, [Val | lists:delete(Val, L)]).
+
+add_list(Var, List) ->
+ L = val(Var),
+ set(Var, union(L, List)).
+
+del(Var, Val) ->
+ L = val(Var),
+ set(Var, lists:delete(Val, L)).
+
+%% LSort -> [node()| Sorted] == Locker sorted
+
+add_lsort(Var, Val) when node() == Val ->
+ L = val(Var),
+ set(Var, [Val | lists:delete(Val, L)]);
+add_lsort(Var,Val) ->
+ case val(Var) of
+ [Head|Rest] when Head == node() ->
+ set(Var,[Head|lsort_add(Val,Rest)]);
+ List ->
+ set(Var,lsort_add(Val,List))
+ end.
+
+lsort_add(Val,List) ->
+ case ordsets:is_element(Val,List) of
+ true -> List;
+ false -> ordsets:add_element(Val,List)
+ end.
+
+%% This function is needed due to the fact
+%% that the application_controller enters
+%% a deadlock now and then. ac is implemented
+%% as a rather naive server.
+ensure_loaded(Appl) ->
+ case application_controller:get_loaded(Appl) of
+ {true, _} ->
+ ok;
+ false ->
+ case application:load(Appl) of
+ ok ->
+ ok;
+ {error, {already_loaded, Appl}} ->
+ ok;
+ {error, Reason} ->
+ {error, {application_load_error, Reason}}
+ end
+ end.
+
+local_active_tables() ->
+ Tabs = val({schema, local_tables}),
+ lists:zf(fun(Tab) -> active_here(Tab) end, Tabs).
+
+active_tables() ->
+ Tabs = val({schema, tables}),
+ F = fun(Tab) ->
+ case val({Tab, where_to_read}) of
+ nowhere -> false;
+ _ -> {true, Tab}
+ end
+ end,
+ lists:zf(F, Tabs).
+
+etype(X) when is_integer(X) -> integer;
+etype([]) -> nil;
+etype(X) when is_list(X) -> list;
+etype(X) when is_tuple(X) -> tuple;
+etype(X) when is_atom(X) -> atom;
+etype(_) -> othertype.
+
+remote_copy_holders(Cs) ->
+ copy_holders(Cs) -- [node()].
+
+copy_holders(Cs) when Cs#cstruct.local_content == false ->
+ cs_to_nodes(Cs);
+copy_holders(Cs) when Cs#cstruct.local_content == true ->
+ case lists:member(node(), cs_to_nodes(Cs)) of
+ true -> [node()];
+ false -> []
+ end.
+
+
+set_remote_where_to_read(Tab) ->
+ set_remote_where_to_read(Tab, []).
+
+set_remote_where_to_read(Tab, Ignore) ->
+ Active = val({Tab, active_replicas}),
+ Valid =
+ case mnesia_recover:get_master_nodes(Tab) of
+ [] -> Active;
+ Masters -> mnesia_lib:intersect(Masters, Active)
+ end,
+ Available = mnesia_lib:intersect(val({current, db_nodes}), Valid -- Ignore),
+ DiscOnlyC = val({Tab, disc_only_copies}),
+ Prefered = Available -- DiscOnlyC,
+ if
+ Prefered /= [] ->
+ set({Tab, where_to_read}, hd(Prefered));
+ Available /= [] ->
+ set({Tab, where_to_read}, hd(Available));
+ true ->
+ set({Tab, where_to_read}, nowhere)
+ end.
+
+%%% Local only
+set_local_content_whereabouts(Tab) ->
+ add({schema, local_tables}, Tab),
+ add({Tab, active_replicas}, node()),
+ set({Tab, where_to_write}, [node()]),
+ set({Tab, where_to_read}, node()).
+
+%%% counter routines
+
+create_counter(Name) ->
+ set_counter(Name, 0).
+
+set_counter(Name, Val) ->
+ ?ets_insert(mnesia_stats, {Name, Val}).
+
+incr_counter(Name) ->
+ ?ets_update_counter(mnesia_stats, Name, 1).
+
+incr_counter(Name, I) ->
+ ?ets_update_counter(mnesia_stats, Name, I).
+
+%% update_counter(Name, Val) ->
+%% ?ets_update_counter(mnesia_stats, Name, Val).
+
+read_counter(Name) ->
+ ?ets_lookup_element(mnesia_stats, Name, 2).
+
+cs_to_nodes(Cs) ->
+ Cs#cstruct.disc_only_copies ++
+ Cs#cstruct.disc_copies ++
+ Cs#cstruct.ram_copies.
+
+dist_coredump() ->
+ dist_coredump(all_nodes()).
+dist_coredump(Ns) ->
+ {Replies, _} = rpc:multicall(Ns, ?MODULE, coredump, []),
+ Replies.
+
+coredump() ->
+ coredump({crashinfo, {"user initiated~n", []}}).
+coredump(CrashInfo) ->
+ Core = mkcore(CrashInfo),
+ Out = core_file(),
+ important("Writing Mnesia core to file: ~p...~p~n", [Out, CrashInfo]),
+ file:write_file(Out, Core),
+ Out.
+
+core_file() ->
+ Integers = tuple_to_list(date()) ++ tuple_to_list(time()),
+ Fun = fun(I) when I < 10 -> ["_0", I];
+ (I) -> ["_", I]
+ end,
+ List = lists:append([Fun(I) || I <- Integers]),
+ case mnesia_monitor:get_env(core_dir) of
+ Dir when is_list(Dir) ->
+ filename:absname(lists:concat(["MnesiaCore.", node()] ++ List), Dir);
+ _ ->
+ filename:absname(lists:concat(["MnesiaCore.", node()] ++ List))
+ end.
+
+mkcore(CrashInfo) ->
+% dbg_out("Making a Mnesia core dump...~p~n", [CrashInfo]),
+ Nodes = [node() |nodes()],
+ %%TidLocks = (catch ets:tab2list(mnesia_tid_locks)),
+ HeldLocks = (catch mnesia:system_info(held_locks)),
+ Core = [
+ CrashInfo,
+ {time, {date(), time()}},
+ {self, catch process_info(self())},
+ {nodes, catch rpc:multicall(Nodes, ?MODULE, get_node_number, [])},
+ {applications, catch lists:sort(application:loaded_applications())},
+ {flags, catch init:get_arguments()},
+ {code_path, catch code:get_path()},
+ {code_loaded, catch lists:sort(code:all_loaded())},
+ {etsinfo, catch ets_info(ets:all())},
+
+ {version, catch mnesia:system_info(version)},
+ {schema, catch ets:tab2list(schema)},
+ {gvar, catch ets:tab2list(mnesia_gvar)},
+ {master_nodes, catch mnesia_recover:get_master_node_info()},
+
+ {processes, catch procs()},
+ {relatives, catch relatives()},
+ {workers, catch workers(mnesia_controller:get_workers(2000))},
+ {locking_procs, catch locking_procs(HeldLocks)},
+
+ {held_locks, HeldLocks},
+ {lock_queue, catch mnesia:system_info(lock_queue)},
+ {load_info, catch mnesia_controller:get_info(2000)},
+ {trans_info, catch mnesia_tm:get_info(2000)},
+
+ {schema_file, catch file:read_file(tab2dat(schema))},
+ {dir_info, catch dir_info()},
+ {logfile, catch {ok, read_log_files()}}
+ ],
+ term_to_binary(Core).
+
+procs() ->
+ Fun = fun(P) -> {P, (catch lists:zf(fun proc_info/1, process_info(P)))} end,
+ lists:map(Fun, processes()).
+
+proc_info({registered_name, Val}) -> {true, Val};
+proc_info({message_queue_len, Val}) -> {true, Val};
+proc_info({status, Val}) -> {true, Val};
+proc_info({current_function, Val}) -> {true, Val};
+proc_info(_) -> false.
+
+get_node_number() ->
+ {node(), self()}.
+
+read_log_files() ->
+ [{F, catch file:read_file(F)} || F <- mnesia_log:log_files()].
+
+dir_info() ->
+ {ok, Cwd} = file:get_cwd(),
+ Dir = dir(),
+ [{cwd, Cwd, file:read_file_info(Cwd)},
+ {mnesia_dir, Dir, file:read_file_info(Dir)}] ++
+ case file:list_dir(Dir) of
+ {ok, Files} ->
+ [{mnesia_file, F, catch file:read_file_info(dir(F))} || F <- Files];
+ Other ->
+ [Other]
+ end.
+
+ets_info([H|T]) ->
+ [{table, H, mk_info_tuple(ets:info(H))} | ets_info(T)];
+ets_info([]) -> [].
+
+mk_info_tuple(T) when is_list(T) ->
+ list_to_tuple(T);
+mk_info_tuple(T) -> T.
+
+relatives() ->
+ Info = fun(Name) ->
+ case whereis(Name) of
+ undefined -> false;
+ Pid -> {true, {Name, Pid, catch process_info(Pid)}}
+ end
+ end,
+ lists:zf(Info, mnesia:ms()).
+
+workers({workers, Loaders, Senders, Dumper}) ->
+ Info = fun({Pid, {send_table, Tab, _Receiver, _St}}) ->
+ case Pid of
+ undefined -> false;
+ Pid -> {true, {Pid, Tab, catch process_info(Pid)}}
+ end;
+ ({Pid, What}) when is_pid(Pid) ->
+ {true, {Pid, What, catch process_info(Pid)}};
+ ({Name, Pid}) ->
+ case Pid of
+ undefined -> false;
+ Pid -> {true, {Name, Pid, catch process_info(Pid)}}
+ end
+ end,
+ SInfo = lists:zf(Info, Senders),
+ Linfo = lists:zf(Info, Loaders),
+ [{senders, SInfo},{loader, Linfo}|lists:zf(Info, [{dumper, Dumper}])].
+
+locking_procs(LockList) when is_list(LockList) ->
+ Tids = [element(3, Lock) || Lock <- LockList],
+ UT = uniq(Tids),
+ Info = fun(Tid) ->
+ Pid = Tid#tid.pid,
+ case node(Pid) == node() of
+ true ->
+ {true, {Pid, catch process_info(Pid)}};
+ _ ->
+ false
+ end
+ end,
+ lists:zf(Info, UT).
+
+view() ->
+ Bin = mkcore({crashinfo, {"view only~n", []}}),
+ vcore(Bin).
+
+%% Displays a Mnesia file on the tty. The file may be repaired.
+view(File) ->
+ case suffix([".DAT", ".RET", ".DMP", ".TMP"], File) of
+ true ->
+ view(File, dat);
+ false ->
+ case suffix([".LOG", ".BUP", ".ETS"], File) of
+ true ->
+ view(File, log);
+ false ->
+ case lists:prefix("MnesiaCore.", File) of
+ true ->
+ view(File, core);
+ false ->
+ {error, "Unknown file name"}
+ end
+ end
+ end.
+
+view(File, dat) ->
+ dets:view(File);
+view(File, log) ->
+ mnesia_log:view(File);
+view(File, core) ->
+ vcore(File).
+
+suffix(Suffixes, File) ->
+ Fun = fun(S) -> lists:suffix(S, File) end,
+ lists:any(Fun, Suffixes).
+
+%% View a core file
+
+vcore() ->
+ Prefix = lists:concat(["MnesiaCore.", node()]),
+ Filter = fun(F) -> lists:prefix(Prefix, F) end,
+ {ok, Cwd} = file:get_cwd(),
+ case file:list_dir(Cwd) of
+ {ok, Files}->
+ CoreFiles = lists:sort(lists:zf(Filter, Files)),
+ show("Mnesia core files: ~p~n", [CoreFiles]),
+ vcore(lists:last(CoreFiles));
+ Error ->
+ Error
+ end.
+
+vcore(Bin) when is_binary(Bin) ->
+ Core = binary_to_term(Bin),
+ Fun = fun({Item, Info}) ->
+ show("***** ~p *****~n", [Item]),
+ case catch vcore_elem({Item, Info}) of
+ {'EXIT', Reason} ->
+ show("{'EXIT', ~p}~n", [Reason]);
+ _ -> ok
+ end
+ end,
+ lists:foreach(Fun, Core);
+
+vcore(File) ->
+ show("~n***** Mnesia core: ~p *****~n", [File]),
+ case file:read_file(File) of
+ {ok, Bin} ->
+ vcore(Bin);
+ _ ->
+ nocore
+ end.
+
+vcore_elem({schema_file, {ok, B}}) ->
+ Fname = "/tmp/schema.DAT",
+ file:write_file(Fname, B),
+ dets:view(Fname),
+ file:delete(Fname);
+
+vcore_elem({logfile, {ok, BinList}}) ->
+ Fun = fun({F, Info}) ->
+ show("----- logfile: ~p -----~n", [F]),
+ case Info of
+ {ok, B} ->
+ Fname = "/tmp/mnesia_vcore_elem.TMP",
+ file:write_file(Fname, B),
+ mnesia_log:view(Fname),
+ file:delete(Fname);
+ _ ->
+ show("~p~n", [Info])
+ end
+ end,
+ lists:foreach(Fun, BinList);
+
+vcore_elem({crashinfo, {Format, Args}}) ->
+ show(Format, Args);
+vcore_elem({gvar, L}) ->
+ show("~p~n", [lists:sort(L)]);
+vcore_elem({transactions, Info}) ->
+ mnesia_tm:display_info(user, Info);
+
+vcore_elem({_Item, Info}) ->
+ show("~p~n", [Info]).
+
+fix_error(X) ->
+ set(last_error, X), %% for debugabililty
+ case X of
+ {aborted, Reason} -> Reason;
+ {abort, Reason} -> Reason;
+ Y when is_atom(Y) -> Y;
+ {'EXIT', {_Reason, {Mod, _, _}}} when is_atom(Mod) ->
+ save(X),
+ case atom_to_list(Mod) of
+ [$m, $n, $e|_] -> badarg;
+ _ -> X
+ end;
+ _ -> X
+ end.
+
+last_error() ->
+ val(last_error).
+
+%% The following is a list of possible mnesia errors and what they
+%% actually mean
+
+error_desc(nested_transaction) -> "Nested transactions are not allowed";
+error_desc(badarg) -> "Bad or invalid argument, possibly bad type";
+error_desc(no_transaction) -> "Operation not allowed outside transactions";
+error_desc(combine_error) -> "Table options were ilegally combined";
+error_desc(bad_index) -> "Index already exists or was out of bounds";
+error_desc(already_exists) -> "Some schema option we try to set is already on";
+error_desc(index_exists)-> "Some ops can not be performed on tabs with index";
+error_desc(no_exists)-> "Tried to perform op on non-existing (non alive) item";
+error_desc(system_limit) -> "Some system_limit was exhausted";
+error_desc(mnesia_down) -> "A transaction involving objects at some remote "
+ "node which died while transaction was executing"
+ "*and* object(s) are no longer available elsewhere"
+ "in the network";
+error_desc(not_a_db_node) -> "A node which is non existant in "
+ "the schema was mentioned";
+error_desc(bad_type) -> "Bad type on some provided arguments";
+error_desc(node_not_running) -> "Node not running";
+error_desc(truncated_binary_file) -> "Truncated binary in file";
+error_desc(active) -> "Some delete ops require that "
+ "all active objects are removed";
+error_desc(illegal) -> "Operation not supported on object";
+error_desc({'EXIT', Reason}) ->
+ error_desc(Reason);
+error_desc({error, Reason}) ->
+ error_desc(Reason);
+error_desc({aborted, Reason}) ->
+ error_desc(Reason);
+error_desc(Reason) when is_tuple(Reason), size(Reason) > 0 ->
+ setelement(1, Reason, error_desc(element(1, Reason)));
+error_desc(Reason) ->
+ Reason.
+
+dirty_rpc_error_tag(Reason) ->
+ case Reason of
+ {'EXIT', _} -> badarg;
+ no_variable -> badarg;
+ _ -> no_exists
+ end.
+
+fatal(Format, Args) ->
+ catch set(mnesia_status, stopping),
+ Core = mkcore({crashinfo, {Format, Args}}),
+ report_fatal(Format, Args, Core),
+ timer:sleep(10000), % Enough to write the core dump to disc?
+ mnesia:lkill(),
+ exit(fatal).
+
+report_fatal(Format, Args) ->
+ report_fatal(Format, Args, nocore).
+
+report_fatal(Format, Args, Core) ->
+ report_system_event({mnesia_fatal, Format, Args, Core}),
+ catch exit(whereis(mnesia_monitor), fatal).
+
+%% We sleep longer and longer the more we try
+%% Made some testing and came up with the following constants
+random_time(Retries, _Counter0) ->
+% UpperLimit = 2000,
+% MaxIntv = trunc(UpperLimit * (1-(4/((Retries*Retries)+4)))),
+ UpperLimit = 500,
+ Dup = Retries * Retries,
+ MaxIntv = trunc(UpperLimit * (1-(50/((Dup)+50)))),
+
+ case get(random_seed) of
+ undefined ->
+ {X, Y, Z} = erlang:now(), %% time()
+ random:seed(X, Y, Z),
+ Time = Dup + random:uniform(MaxIntv),
+ %% dbg_out("---random_test rs ~w max ~w val ~w---~n", [Retries, MaxIntv, Time]),
+ Time;
+ _ ->
+ Time = Dup + random:uniform(MaxIntv),
+ %% dbg_out("---random_test rs ~w max ~w val ~w---~n", [Retries, MaxIntv, Time]),
+ Time
+ end.
+
+report_system_event(Event0) ->
+ Event = {mnesia_system_event, Event0},
+ report_system_event(catch_notify(Event), Event),
+ case ?catch_val(subscribers) of
+ {'EXIT', _} -> ignore;
+ Pids -> lists:foreach(fun(Pid) -> Pid ! Event end, Pids)
+ end,
+ ok.
+
+catch_notify(Event) ->
+ case whereis(mnesia_event) of
+ undefined ->
+ {'EXIT', {badarg, {mnesia_event, Event}}};
+ Pid ->
+ gen_event:notify(Pid, Event)
+ end.
+
+report_system_event({'EXIT', Reason}, Event) ->
+ Mod = mnesia_monitor:get_env(event_module),
+ case mnesia_sup:start_event() of
+ {ok, Pid} ->
+ link(Pid),
+ gen_event:call(mnesia_event, Mod, Event, infinity),
+ unlink(Pid),
+
+ %% We get an exit signal if server dies
+ receive
+ {'EXIT', Pid, _Reason} ->
+ {error, {node_not_running, node()}}
+ after 0 ->
+ gen_event:stop(mnesia_event),
+ ok
+ end;
+
+ Error ->
+ Msg = "Mnesia(~p): Cannot report event ~p: ~p (~p)~n",
+ error_logger:format(Msg, [node(), Event, Reason, Error])
+ end;
+report_system_event(_Res, _Event) ->
+ ignore.
+
+%% important messages are reported regardless of debug level
+important(Format, Args) ->
+ save({Format, Args}),
+ report_system_event({mnesia_info, Format, Args}).
+
+%% Warning messages are reported regardless of debug level
+warning(Format, Args) ->
+ save({Format, Args}),
+ report_system_event({mnesia_warning, Format, Args}).
+
+%% error messages are reported regardless of debug level
+error(Format, Args) ->
+ save({Format, Args}),
+ report_system_event({mnesia_error, Format, Args}).
+
+%% verbose messages are reported if debug level == debug or verbose
+verbose(Format, Args) ->
+ case mnesia_monitor:get_env(debug) of
+ none -> save({Format, Args});
+ verbose -> important(Format, Args);
+ debug -> important(Format, Args);
+ trace -> important(Format, Args)
+ end.
+
+%% debug message are display if debug level == 2
+dbg_out(Format, Args) ->
+ case mnesia_monitor:get_env(debug) of
+ none -> ignore;
+ verbose -> save({Format, Args});
+ _ -> report_system_event({mnesia_info, Format, Args})
+ end.
+
+%% Keep the last 10 debug print outs
+save(DbgInfo) ->
+ catch save2(DbgInfo).
+
+save2(DbgInfo) ->
+ Key = {'$$$_report', current_pos},
+ P =
+ case ?ets_lookup_element(mnesia_gvar, Key, 2) of
+ 30 -> -1;
+ I -> I
+ end,
+ set({'$$$_report', current_pos}, P+1),
+ set({'$$$_report', P+1}, {date(), time(), DbgInfo}).
+
+copy_file(From, To) ->
+ case file:open(From, [raw, binary, read]) of
+ {ok, F} ->
+ case file:open(To, [raw, binary, write]) of
+ {ok, T} ->
+ Res = copy_file_loop(F, T, 8000),
+ file:close(F),
+ file:close(T),
+ Res;
+ {error, Reason} ->
+ {error, Reason}
+ end;
+ {error, Reason} ->
+ {error, Reason}
+ end.
+
+copy_file_loop(F, T, ChunkSize) ->
+ case file:read(F, ChunkSize) of
+ {ok, Bin} ->
+ file:write(T, Bin),
+ copy_file_loop(F, T, ChunkSize);
+ eof ->
+ ok;
+ {error, Reason} ->
+ {error, Reason}
+ end.
+
+
+%%%%%%%%%%%%
+%% versions of all the lowlevel db funcs that determine whether we
+%% shall go to disc or ram to do the actual operation.
+
+db_get(Tab, Key) ->
+ db_get(val({Tab, storage_type}), Tab, Key).
+db_get(ram_copies, Tab, Key) -> ?ets_lookup(Tab, Key);
+db_get(disc_copies, Tab, Key) -> ?ets_lookup(Tab, Key);
+db_get(disc_only_copies, Tab, Key) -> dets:lookup(Tab, Key).
+
+db_init_chunk(Tab) ->
+ db_init_chunk(val({Tab, storage_type}), Tab, 1000).
+db_init_chunk(Tab, N) ->
+ db_init_chunk(val({Tab, storage_type}), Tab, N).
+
+db_init_chunk(disc_only_copies, Tab, N) ->
+ dets:select(Tab, [{'_', [], ['$_']}], N);
+db_init_chunk(_, Tab, N) ->
+ ets:select(Tab, [{'_', [], ['$_']}], N).
+
+db_chunk(disc_only_copies, State) ->
+ dets:select(State);
+db_chunk(_, State) ->
+ ets:select(State).
+
+db_put(Tab, Val) ->
+ db_put(val({Tab, storage_type}), Tab, Val).
+
+db_put(ram_copies, Tab, Val) -> ?ets_insert(Tab, Val), ok;
+db_put(disc_copies, Tab, Val) -> ?ets_insert(Tab, Val), ok;
+db_put(disc_only_copies, Tab, Val) -> dets:insert(Tab, Val).
+
+db_match_object(Tab, Pat) ->
+ db_match_object(val({Tab, storage_type}), Tab, Pat).
+db_match_object(Storage, Tab, Pat) ->
+ db_fixtable(Storage, Tab, true),
+ Res = catch_match_object(Storage, Tab, Pat),
+ db_fixtable(Storage, Tab, false),
+ case Res of
+ {'EXIT', Reason} -> exit(Reason);
+ _ -> Res
+ end.
+
+catch_match_object(disc_only_copies, Tab, Pat) ->
+ catch dets:match_object(Tab, Pat);
+catch_match_object(_, Tab, Pat) ->
+ catch ets:match_object(Tab, Pat).
+
+db_select(Tab, Pat) ->
+ db_select(val({Tab, storage_type}), Tab, Pat).
+
+db_select(Storage, Tab, Pat) ->
+ db_fixtable(Storage, Tab, true),
+ Res = catch_select(Storage, Tab, Pat),
+ db_fixtable(Storage, Tab, false),
+ case Res of
+ {'EXIT', Reason} -> exit(Reason);
+ _ -> Res
+ end.
+
+catch_select(disc_only_copies, Tab, Pat) ->
+ catch dets:select(Tab, Pat);
+catch_select(_, Tab, Pat) ->
+ catch ets:select(Tab, Pat).
+
+db_select_init(disc_only_copies, Tab, Pat, Limit) ->
+ dets:select(Tab, Pat, Limit);
+db_select_init(_, Tab, Pat, Limit) ->
+ ets:select(Tab, Pat, Limit).
+
+db_select_cont(disc_only_copies, Cont0, Ms) ->
+ Cont = dets:repair_continuation(Cont0, Ms),
+ dets:select(Cont);
+db_select_cont(_, Cont0, Ms) ->
+ Cont = ets:repair_continuation(Cont0, Ms),
+ ets:select(Cont).
+
+db_fixtable(ets, Tab, Bool) ->
+ ets:safe_fixtable(Tab, Bool);
+db_fixtable(ram_copies, Tab, Bool) ->
+ ets:safe_fixtable(Tab, Bool);
+db_fixtable(disc_copies, Tab, Bool) ->
+ ets:safe_fixtable(Tab, Bool);
+db_fixtable(dets, Tab, Bool) ->
+ dets:safe_fixtable(Tab, Bool);
+db_fixtable(disc_only_copies, Tab, Bool) ->
+ dets:safe_fixtable(Tab, Bool).
+
+db_erase(Tab, Key) ->
+ db_erase(val({Tab, storage_type}), Tab, Key).
+db_erase(ram_copies, Tab, Key) -> ?ets_delete(Tab, Key), ok;
+db_erase(disc_copies, Tab, Key) -> ?ets_delete(Tab, Key), ok;
+db_erase(disc_only_copies, Tab, Key) -> dets:delete(Tab, Key).
+
+db_match_erase(Tab, Pat) ->
+ db_match_erase(val({Tab, storage_type}), Tab, Pat).
+db_match_erase(ram_copies, Tab, Pat) -> ?ets_match_delete(Tab, Pat), ok;
+db_match_erase(disc_copies, Tab, Pat) -> ?ets_match_delete(Tab, Pat), ok;
+db_match_erase(disc_only_copies, Tab, Pat) -> dets:match_delete(Tab, Pat).
+
+db_first(Tab) ->
+ db_first(val({Tab, storage_type}), Tab).
+db_first(ram_copies, Tab) -> ?ets_first(Tab);
+db_first(disc_copies, Tab) -> ?ets_first(Tab);
+db_first(disc_only_copies, Tab) -> dets:first(Tab).
+
+db_next_key(Tab, Key) ->
+ db_next_key(val({Tab, storage_type}), Tab, Key).
+db_next_key(ram_copies, Tab, Key) -> ?ets_next(Tab, Key);
+db_next_key(disc_copies, Tab, Key) -> ?ets_next(Tab, Key);
+db_next_key(disc_only_copies, Tab, Key) -> dets:next(Tab, Key).
+
+db_last(Tab) ->
+ db_last(val({Tab, storage_type}), Tab).
+db_last(ram_copies, Tab) -> ?ets_last(Tab);
+db_last(disc_copies, Tab) -> ?ets_last(Tab);
+db_last(disc_only_copies, Tab) -> dets:first(Tab). %% Dets don't have order
+
+db_prev_key(Tab, Key) ->
+ db_prev_key(val({Tab, storage_type}), Tab, Key).
+db_prev_key(ram_copies, Tab, Key) -> ?ets_prev(Tab, Key);
+db_prev_key(disc_copies, Tab, Key) -> ?ets_prev(Tab, Key);
+db_prev_key(disc_only_copies, Tab, Key) -> dets:next(Tab, Key). %% Dets don't have order
+
+db_slot(Tab, Pos) ->
+ db_slot(val({Tab, storage_type}), Tab, Pos).
+db_slot(ram_copies, Tab, Pos) -> ?ets_slot(Tab, Pos);
+db_slot(disc_copies, Tab, Pos) -> ?ets_slot(Tab, Pos);
+db_slot(disc_only_copies, Tab, Pos) -> dets:slot(Tab, Pos).
+
+db_update_counter(Tab, C, Val) ->
+ db_update_counter(val({Tab, storage_type}), Tab, C, Val).
+db_update_counter(ram_copies, Tab, C, Val) ->
+ ?ets_update_counter(Tab, C, Val);
+db_update_counter(disc_copies, Tab, C, Val) ->
+ ?ets_update_counter(Tab, C, Val);
+db_update_counter(disc_only_copies, Tab, C, Val) ->
+ dets:update_counter(Tab, C, Val).
+
+db_erase_tab(Tab) ->
+ db_erase_tab(val({Tab, storage_type}), Tab).
+db_erase_tab(ram_copies, Tab) -> ?ets_delete_table(Tab);
+db_erase_tab(disc_copies, Tab) -> ?ets_delete_table(Tab);
+db_erase_tab(disc_only_copies, _Tab) -> ignore.
+
+%% assuming that Tab is a valid ets-table
+dets_to_ets(Tabname, Tab, File, Type, Rep, Lock) ->
+ {Open, Close} = mkfuns(Lock),
+ case Open(Tabname, [{file, File}, {type, disk_type(Tab, Type)},
+ {keypos, 2}, {repair, Rep}]) of
+ {ok, Tabname} ->
+ Res = dets:to_ets(Tabname, Tab),
+ Close(Tabname),
+ trav_ret(Res, Tab);
+ Other ->
+ Other
+ end.
+
+trav_ret(Tabname, Tabname) -> loaded;
+trav_ret(Other, _Tabname) -> Other.
+
+mkfuns(yes) ->
+ {fun(Tab, Args) -> dets_sync_open(Tab, Args) end,
+ fun(Tab) -> dets_sync_close(Tab) end};
+mkfuns(no) ->
+ {fun(Tab, Args) -> dets:open_file(Tab, Args) end,
+ fun(Tab) -> dets:close(Tab) end}.
+
+disk_type(Tab) ->
+ disk_type(Tab, val({Tab, setorbag})).
+
+disk_type(_Tab, ordered_set) ->
+ set;
+disk_type(_, Type) ->
+ Type.
+
+dets_sync_open(Tab, Ref, File) ->
+ Args = [{file, File},
+ {keypos, 2},
+ {repair, mnesia_monitor:get_env(auto_repair)},
+ {type, disk_type(Tab)}],
+ dets_sync_open(Ref, Args).
+
+lock_table(Tab) ->
+ global:set_lock({{mnesia_table_lock, Tab}, self()}, [node()], infinity).
+% dbg_out("dets_sync_open: ~p ~p~n", [T, self()]),
+
+unlock_table(Tab) ->
+ global:del_lock({{mnesia_table_lock, Tab}, self()}, [node()]).
+% dbg_out("unlock_table: ~p ~p~n", [T, self()]),
+
+dets_sync_open(Tab, Args) ->
+ lock_table(Tab),
+ case dets:open_file(Tab, Args) of
+ {ok, Tab} ->
+ {ok, Tab};
+ Other ->
+ dets_sync_close(Tab),
+ Other
+ end.
+
+dets_sync_close(Tab) ->
+ catch dets:close(Tab),
+ unlock_table(Tab),
+ ok.
+
+readable_indecies(Tab) ->
+ val({Tab, index}).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Managing conditional debug functions
+%%
+%% The main idea with the debug_fun's is to allow test programs
+%% to control the internal behaviour of Mnesia. This is needed
+%% to make the test programs independent of system load, swapping
+%% and other circumstances that may affect the behaviour of Mnesia.
+%%
+%% First should calls to ?eval_debug_fun be inserted at well
+%% defined places in Mnesia's code. E.g. in critical situations
+%% of startup, transaction commit, backups etc.
+%%
+%% Then compile Mnesia with the compiler option 'debug'.
+%%
+%% In test programs ?activate_debug_fun should be called
+%% in order to bind a fun to the debug identifier stated
+%% in the call to ?eval_debug_fun.
+%%
+%% If eval_debug_fun finds that the fun is activated it
+%% invokes the fun as NewContext = Fun(PreviousContext, EvalContext)
+%% and replaces the PreviousContext with the NewContext.
+%% The initial context of a debug_fun is given as argument to
+%% activate_debug_fun.
+
+-define(DEBUG_TAB, mnesia_debug).
+-record(debug_info, {id, function, context, file, line}).
+
+scratch_debug_fun() ->
+ dbg_out("scratch_debug_fun(): ~p~n", [?DEBUG_TAB]),
+ (catch ?ets_delete_table(?DEBUG_TAB)),
+ ?ets_new_table(?DEBUG_TAB, [set, public, named_table, {keypos, 2}]).
+
+activate_debug_fun(FunId, Fun, InitialContext, File, Line) ->
+ Info = #debug_info{id = FunId,
+ function = Fun,
+ context = InitialContext,
+ file = File,
+ line = Line
+ },
+ update_debug_info(Info).
+
+update_debug_info(Info) ->
+ case catch ?ets_insert(?DEBUG_TAB, Info) of
+ {'EXIT', _} ->
+ scratch_debug_fun(),
+ ?ets_insert(?DEBUG_TAB, Info);
+ _ ->
+ ok
+ end,
+ dbg_out("update_debug_info(~p)~n", [Info]),
+ ok.
+
+deactivate_debug_fun(FunId, _File, _Line) ->
+ catch ?ets_delete(?DEBUG_TAB, FunId),
+ ok.
+
+eval_debug_fun(FunId, EvalContext, EvalFile, EvalLine) ->
+ case catch ?ets_lookup(?DEBUG_TAB, FunId) of
+ [] ->
+ ok;
+ [Info] ->
+ OldContext = Info#debug_info.context,
+ dbg_out("~s(~p): ~w "
+ "activated in ~s(~p)~n "
+ "eval_debug_fun(~w, ~w)~n",
+ [filename:basename(EvalFile), EvalLine, Info#debug_info.id,
+ filename:basename(Info#debug_info.file), Info#debug_info.line,
+ OldContext, EvalContext]),
+ Fun = Info#debug_info.function,
+ NewContext = Fun(OldContext, EvalContext),
+
+ case catch ?ets_lookup(?DEBUG_TAB, FunId) of
+ [Info] when NewContext /= OldContext ->
+ NewInfo = Info#debug_info{context = NewContext},
+ update_debug_info(NewInfo);
+ _ ->
+ ok
+ end;
+ {'EXIT', _} -> ok
+ end.
+
+-ifdef(debug).
+ is_debug_compiled() -> true.
+-else.
+ is_debug_compiled() -> false.
+-endif.
+
+
diff --git a/lib/mnesia/src/mnesia_loader.erl b/lib/mnesia/src/mnesia_loader.erl
new file mode 100644
index 0000000000..77c317abc5
--- /dev/null
+++ b/lib/mnesia/src/mnesia_loader.erl
@@ -0,0 +1,828 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 1998-2009. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%
+
+%%
+%%% Purpose : Loads tables from local disc or from remote node
+
+-module(mnesia_loader).
+
+%% Mnesia internal stuff
+-export([disc_load_table/2,
+ net_load_table/4,
+ send_table/3]).
+
+-export([old_node_init_table/6]). %% Spawned old node protocol conversion hack
+-export([spawned_receiver/8]). %% Spawned lock taking process
+
+-import(mnesia_lib, [set/2, fatal/2, verbose/2, dbg_out/2]).
+
+-include("mnesia.hrl").
+
+val(Var) ->
+ case ?catch_val(Var) of
+ {'EXIT', Reason} -> mnesia_lib:other_val(Var, Reason);
+ Value -> Value
+ end.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Load a table from local disc
+
+disc_load_table(Tab, Reason) ->
+ Storage = val({Tab, storage_type}),
+ Type = val({Tab, setorbag}),
+ dbg_out("Getting table ~p (~p) from disc: ~p~n",
+ [Tab, Storage, Reason]),
+ ?eval_debug_fun({?MODULE, do_get_disc_copy},
+ [{tab, Tab},
+ {reason, Reason},
+ {storage, Storage},
+ {type, Type}]),
+ do_get_disc_copy2(Tab, Reason, Storage, Type).
+
+do_get_disc_copy2(Tab, _Reason, Storage, _Type) when Storage == unknown ->
+ verbose("Local table copy of ~p has recently been deleted, ignored.~n",
+ [Tab]),
+ {loaded, ok}; %% ?
+do_get_disc_copy2(Tab, Reason, Storage, Type) when Storage == disc_copies ->
+ %% NOW we create the actual table
+ Repair = mnesia_monitor:get_env(auto_repair),
+ Args = [{keypos, 2}, public, named_table, Type],
+ case Reason of
+ {dumper, _} -> %% Resources allready allocated
+ ignore;
+ _ ->
+ mnesia_monitor:mktab(Tab, Args),
+ Count = mnesia_log:dcd2ets(Tab, Repair),
+ case ets:info(Tab, size) of
+ X when X < Count * 4 ->
+ ok = mnesia_log:ets2dcd(Tab);
+ _ ->
+ ignore
+ end
+ end,
+ mnesia_index:init_index(Tab, Storage),
+ snmpify(Tab, Storage),
+ set({Tab, load_node}, node()),
+ set({Tab, load_reason}, Reason),
+ {loaded, ok};
+
+do_get_disc_copy2(Tab, Reason, Storage, Type) when Storage == ram_copies ->
+ Args = [{keypos, 2}, public, named_table, Type],
+ case Reason of
+ {dumper, _} -> %% Resources allready allocated
+ ignore;
+ _ ->
+ mnesia_monitor:mktab(Tab, Args),
+ Fname = mnesia_lib:tab2dcd(Tab),
+ Datname = mnesia_lib:tab2dat(Tab),
+ Repair = mnesia_monitor:get_env(auto_repair),
+ case mnesia_monitor:use_dir() of
+ true ->
+ case mnesia_lib:exists(Fname) of
+ true -> mnesia_log:dcd2ets(Tab, Repair);
+ false ->
+ case mnesia_lib:exists(Datname) of
+ true ->
+ mnesia_lib:dets_to_ets(Tab, Tab, Datname,
+ Type, Repair, no);
+ false ->
+ false
+ end
+ end;
+ false ->
+ false
+ end
+ end,
+ mnesia_index:init_index(Tab, Storage),
+ snmpify(Tab, Storage),
+ set({Tab, load_node}, node()),
+ set({Tab, load_reason}, Reason),
+ {loaded, ok};
+
+do_get_disc_copy2(Tab, Reason, Storage, Type) when Storage == disc_only_copies ->
+ Args = [{file, mnesia_lib:tab2dat(Tab)},
+ {type, mnesia_lib:disk_type(Tab, Type)},
+ {keypos, 2},
+ {repair, mnesia_monitor:get_env(auto_repair)}],
+ case Reason of
+ {dumper, _} ->
+ mnesia_index:init_index(Tab, Storage),
+ snmpify(Tab, Storage),
+ set({Tab, load_node}, node()),
+ set({Tab, load_reason}, Reason),
+ {loaded, ok};
+ _ ->
+ case mnesia_monitor:open_dets(Tab, Args) of
+ {ok, _} ->
+ mnesia_index:init_index(Tab, Storage),
+ snmpify(Tab, Storage),
+ set({Tab, load_node}, node()),
+ set({Tab, load_reason}, Reason),
+ {loaded, ok};
+ {error, Error} ->
+ {not_loaded, {"Failed to create dets table", Error}}
+ end
+ end.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Load a table from a remote node
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%
+%% Receiver Sender
+%% -------- ------
+%% Grab schema lock on table
+%% Determine table size
+%% Create empty pre-grown table
+%% Grab read lock on table
+%% Let receiver subscribe on updates done on sender node
+%% Disable rehashing of table
+%% Release read lock on table
+%% Send table to receiver in chunks
+%%
+%% Grab read lock on table
+%% Block dirty updates
+%% Update wherabouts
+%%
+%% Cancel the update subscription
+%% Process the subscription events
+%% Optionally dump to disc
+%% Unblock dirty updates
+%% Release read lock on table
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+-define(MAX_TRANSFER_SIZE, 7500).
+-define(MAX_RAM_FILE_SIZE, 1000000).
+-define(MAX_RAM_TRANSFERS, (?MAX_RAM_FILE_SIZE div ?MAX_TRANSFER_SIZE) + 1).
+-define(MAX_NOPACKETS, 20).
+
+net_load_table(Tab, Reason, Ns, Cs)
+ when Reason == {dumper,add_table_copy} ->
+ try_net_load_table(Tab, Reason, Ns, Cs);
+net_load_table(Tab, Reason, Ns, _Cs) ->
+ try_net_load_table(Tab, Reason, Ns, val({Tab, cstruct})).
+
+try_net_load_table(Tab, _Reason, [], _Cs) ->
+ verbose("Copy failed. No active replicas of ~p are available.~n", [Tab]),
+ {not_loaded, none_active};
+try_net_load_table(Tab, Reason, Ns, Cs) ->
+ Storage = mnesia_lib:cs_to_storage_type(node(), Cs),
+ do_get_network_copy(Tab, Reason, Ns, Storage, Cs).
+
+do_get_network_copy(Tab, _Reason, _Ns, unknown, _Cs) ->
+ verbose("Local table copy of ~p has recently been deleted, ignored.~n", [Tab]),
+ {not_loaded, storage_unknown};
+do_get_network_copy(Tab, Reason, Ns, Storage, Cs) ->
+ [Node | Tail] = Ns,
+ case lists:member(Node,val({current, db_nodes})) of
+ true ->
+ dbg_out("Getting table ~p (~p) from node ~p: ~p~n",
+ [Tab, Storage, Node, Reason]),
+ ?eval_debug_fun({?MODULE, do_get_network_copy},
+ [{tab, Tab}, {reason, Reason},
+ {nodes, Ns}, {storage, Storage}]),
+ case init_receiver(Node, Tab, Storage, Cs, Reason) of
+ ok ->
+ set({Tab, load_node}, Node),
+ set({Tab, load_reason}, Reason),
+ mnesia_controller:i_have_tab(Tab),
+ dbg_out("Table ~p copied from ~p to ~p~n", [Tab, Node, node()]),
+ {loaded, ok};
+ Err = {error, _} when element(1, Reason) == dumper ->
+ {not_loaded,Err};
+ restart ->
+ try_net_load_table(Tab, Reason, Tail ++ [Node], Cs);
+ down ->
+ try_net_load_table(Tab, Reason, Tail, Cs)
+ end;
+ false ->
+ try_net_load_table(Tab, Reason, Tail, Cs)
+ end.
+
+snmpify(Tab, Storage) ->
+ do_snmpify(Tab, val({Tab, snmp}), Storage).
+
+do_snmpify(_Tab, [], _Storage) ->
+ ignore;
+do_snmpify(Tab, Us, Storage) ->
+ Snmp = mnesia_snmp_hook:create_table(Us, Tab, Storage),
+ set({Tab, {index, snmp}}, Snmp).
+
+%% Start the recieiver
+init_receiver(Node, Tab, Storage, Cs, Reas={dumper,add_table_copy}) ->
+ case start_remote_sender(Node, Tab, Storage) of
+ {SenderPid, TabSize, DetsData} ->
+ start_receiver(Tab,Storage,Cs,SenderPid,TabSize,DetsData,Reas);
+ Else ->
+ Else
+ end;
+init_receiver(Node, Tab,Storage,Cs,Reason) ->
+ %% Grab a schema lock to avoid deadlock between table_loader and schema_commit dumping.
+ %% Both may grab tables-locks in different order.
+ Load =
+ fun() ->
+ {_,Tid,Ts} = get(mnesia_activity_state),
+ mnesia_locker:rlock(Tid, Ts#tidstore.store, {schema, Tab}),
+ %% Check that table still exists
+ Active = val({Tab, active_replicas}),
+ %% Check that we havn't loaded it already
+ case val({Tab,where_to_read}) == node() of
+ true -> ok;
+ _ ->
+ %% And that sender still got a copy
+ %% (something might have happend while
+ %% we where waiting for the lock)
+ true = lists:member(Node, Active),
+ {SenderPid, TabSize, DetsData} =
+ start_remote_sender(Node,Tab,Storage),
+ Init = table_init_fun(SenderPid),
+ Args = [self(),Tab,Storage,Cs,SenderPid,
+ TabSize,DetsData,Init],
+ Pid = spawn_link(?MODULE, spawned_receiver, Args),
+ put(mnesia_real_loader, Pid),
+ wait_on_load_complete(Pid)
+ end
+ end,
+ Res =
+ case mnesia:transaction(Load, 20) of
+ {atomic, {error,Result}} when
+ element(1,Reason) == dumper ->
+ {error,Result};
+ {atomic, {error,Result}} ->
+ fatal("Cannot create table ~p: ~p~n",
+ [[Tab, Storage], Result]);
+ {atomic, Result} -> Result;
+ {aborted, nomore} -> restart;
+ {aborted, _Reas} ->
+ verbose("Receiver failed on ~p from ~p:~nReason: ~p~n",
+ [Tab,Node,_Reas]),
+ down %% either this node or sender is dying
+ end,
+ unlink(whereis(mnesia_tm)), %% Avoid late unlink from tm
+ Res.
+
+start_remote_sender(Node,Tab,Storage) ->
+ mnesia_controller:start_remote_sender(Node, Tab, self(), Storage),
+ put(mnesia_table_sender_node, {Tab, Node}),
+ receive
+ {SenderPid, {first, TabSize}} ->
+ {SenderPid, TabSize, false};
+ {SenderPid, {first, TabSize, DetsData}} ->
+ {SenderPid, TabSize, DetsData};
+ %% Protocol conversion hack
+ {copier_done, Node} ->
+ verbose("Sender of table ~p crashed on node ~p ~n", [Tab, Node]),
+ down(Tab, Storage)
+ end.
+
+table_init_fun(SenderPid) ->
+ PConv = mnesia_monitor:needs_protocol_conversion(node(SenderPid)),
+ MeMyselfAndI = self(),
+ fun(read) ->
+ Receiver =
+ if
+ PConv == true ->
+ MeMyselfAndI ! {actual_tabrec, self()},
+ MeMyselfAndI; %% Old mnesia
+ PConv == false -> self()
+ end,
+ SenderPid ! {Receiver, more},
+ get_data(SenderPid, Receiver)
+ end.
+
+%% Add_table_copy get's it's own locks.
+start_receiver(Tab,Storage,Cs,SenderPid,TabSize,DetsData,{dumper,add_table_copy}) ->
+ Init = table_init_fun(SenderPid),
+ case do_init_table(Tab,Storage,Cs,SenderPid,TabSize,DetsData,self(), Init) of
+ Err = {error, _} ->
+ SenderPid ! {copier_done, node()},
+ Err;
+ Else ->
+ Else
+ end.
+
+spawned_receiver(ReplyTo,Tab,Storage,Cs, SenderPid,TabSize,DetsData, Init) ->
+ process_flag(trap_exit, true),
+ Done = do_init_table(Tab,Storage,Cs,
+ SenderPid,TabSize,DetsData,
+ ReplyTo, Init),
+ ReplyTo ! {self(),Done},
+ unlink(ReplyTo),
+ unlink(whereis(mnesia_controller)),
+ exit(normal).
+
+wait_on_load_complete(Pid) ->
+ receive
+ {Pid, Res} ->
+ Res;
+ {'EXIT', Pid, Reason} ->
+ exit(Reason);
+ Else ->
+ Pid ! Else,
+ wait_on_load_complete(Pid)
+ end.
+
+do_init_table(Tab,Storage,Cs,SenderPid,
+ TabSize,DetsInfo,OrigTabRec,Init) ->
+ case create_table(Tab, TabSize, Storage, Cs) of
+ {Storage,Tab} ->
+ %% Debug info
+ Node = node(SenderPid),
+ put(mnesia_table_receiver, {Tab, Node, SenderPid}),
+ mnesia_tm:block_tab(Tab),
+ PConv = mnesia_monitor:needs_protocol_conversion(Node),
+
+ case init_table(Tab,Storage,Init,PConv,DetsInfo,SenderPid) of
+ ok ->
+ tab_receiver(Node,Tab,Storage,Cs,PConv,OrigTabRec);
+ Reason ->
+ Msg = "[d]ets:init table failed",
+ verbose("~s: ~p: ~p~n", [Msg, Tab, Reason]),
+ down(Tab, Storage)
+ end;
+ Error ->
+ Error
+ end.
+
+create_table(Tab, TabSize, Storage, Cs) ->
+ if
+ Storage == disc_only_copies ->
+ mnesia_lib:lock_table(Tab),
+ Tmp = mnesia_lib:tab2tmp(Tab),
+ Size = lists:max([TabSize, 256]),
+ Args = [{file, Tmp},
+ {keypos, 2},
+%% {ram_file, true},
+ {estimated_no_objects, Size},
+ {repair, mnesia_monitor:get_env(auto_repair)},
+ {type, mnesia_lib:disk_type(Tab, Cs#cstruct.type)}],
+ file:delete(Tmp),
+ case mnesia_lib:dets_sync_open(Tab, Args) of
+ {ok, _} ->
+ mnesia_lib:unlock_table(Tab),
+ {Storage, Tab};
+ Else ->
+ mnesia_lib:unlock_table(Tab),
+ Else
+ end;
+ (Storage == ram_copies) or (Storage == disc_copies) ->
+ Args = [{keypos, 2}, public, named_table, Cs#cstruct.type],
+ case mnesia_monitor:unsafe_mktab(Tab, Args) of
+ Tab ->
+ {Storage, Tab};
+ Else ->
+ Else
+ end
+ end.
+
+tab_receiver(Node, Tab, Storage, Cs, PConv, OrigTabRec) ->
+ receive
+ {SenderPid, {no_more, DatBin}} when PConv == false ->
+ finish_copy(Storage,Tab,Cs,SenderPid,DatBin,OrigTabRec);
+
+ %% Protocol conversion hack
+ {SenderPid, {no_more, DatBin}} when is_pid(PConv) ->
+ PConv ! {SenderPid, no_more},
+ receive
+ {old_init_table_complete, ok} ->
+ finish_copy(Storage, Tab, Cs, SenderPid, DatBin,OrigTabRec);
+ {old_init_table_complete, Reason} ->
+ Msg = "OLD: [d]ets:init table failed",
+ verbose("~s: ~p: ~p~n", [Msg, Tab, Reason]),
+ down(Tab, Storage)
+ end;
+
+ {actual_tabrec, Pid} ->
+ tab_receiver(Node, Tab, Storage, Cs, Pid,OrigTabRec);
+
+ {SenderPid, {more, [Recs]}} when is_pid(PConv) ->
+ PConv ! {SenderPid, {more, Recs}}, %% Forward Msg to OldNodes
+ tab_receiver(Node, Tab, Storage, Cs, PConv,OrigTabRec);
+
+ {'EXIT', PConv, Reason} -> %% [d]ets:init process crashed
+ Msg = "Receiver crashed",
+ verbose("~s: ~p: ~p~n", [Msg, Tab, Reason]),
+ down(Tab, Storage);
+
+ %% Protocol conversion hack
+ {copier_done, Node} ->
+ verbose("Sender of table ~p crashed on node ~p ~n", [Tab, Node]),
+ down(Tab, Storage);
+
+ {'EXIT', Pid, Reason} ->
+ handle_exit(Pid, Reason),
+ tab_receiver(Node, Tab, Storage, Cs, PConv,OrigTabRec)
+ end.
+
+make_table_fun(Pid, TabRec) ->
+ fun(close) ->
+ ok;
+ (read) ->
+ get_data(Pid, TabRec)
+ end.
+
+get_data(Pid, TabRec) ->
+ receive
+ {Pid, {more, Recs}} ->
+ Pid ! {TabRec, more},
+ {Recs, make_table_fun(Pid,TabRec)};
+ {Pid, no_more} ->
+ end_of_input;
+ {copier_done, Node} ->
+ case node(Pid) of
+ Node ->
+ {copier_done, Node};
+ _ ->
+ get_data(Pid, TabRec)
+ end;
+ {'EXIT', Pid, Reason} ->
+ handle_exit(Pid, Reason),
+ get_data(Pid, TabRec)
+ end.
+
+init_table(Tab, disc_only_copies, Fun, false, DetsInfo,Sender) ->
+ ErtsVer = erlang:system_info(version),
+ case DetsInfo of
+ {ErtsVer, DetsData} ->
+ Res = (catch dets:is_compatible_bchunk_format(Tab, DetsData)),
+ case Res of
+ {'EXIT',{undef,[{dets,_,_}|_]}} ->
+ Sender ! {self(), {old_protocol, Tab}},
+ dets:init_table(Tab, Fun); %% Old dets version
+ {'EXIT', What} ->
+ exit(What);
+ false ->
+ Sender ! {self(), {old_protocol, Tab}},
+ dets:init_table(Tab, Fun); %% Old dets version
+ true ->
+ dets:init_table(Tab, Fun, [{format, bchunk}])
+ end;
+ Old when Old /= false ->
+ Sender ! {self(), {old_protocol, Tab}},
+ dets:init_table(Tab, Fun); %% Old dets version
+ _ ->
+ dets:init_table(Tab, Fun)
+ end;
+init_table(Tab, _, Fun, false, _DetsInfo,_) ->
+ case catch ets:init_table(Tab, Fun) of
+ true ->
+ ok;
+ {'EXIT', Else} -> Else
+ end;
+init_table(Tab, Storage, Fun, true, _DetsInfo, Sender) -> %% Old Nodes
+ spawn_link(?MODULE, old_node_init_table,
+ [Tab, Storage, Fun, self(), false, Sender]),
+ ok.
+
+old_node_init_table(Tab, Storage, Fun, TabReceiver, DetsInfo,Sender) ->
+ Res = init_table(Tab, Storage, Fun, false, DetsInfo,Sender),
+ TabReceiver ! {old_init_table_complete, Res},
+ unlink(TabReceiver),
+ ok.
+
+finish_copy(Storage,Tab,Cs,SenderPid,DatBin,OrigTabRec) ->
+ TabRef = {Storage, Tab},
+ subscr_receiver(TabRef, Cs#cstruct.record_name),
+ case handle_last(TabRef, Cs#cstruct.type, DatBin) of
+ ok ->
+ mnesia_index:init_index(Tab, Storage),
+ snmpify(Tab, Storage),
+ %% OrigTabRec must not be the spawned tab-receiver
+ %% due to old protocol.
+ SenderPid ! {OrigTabRec, no_more},
+ mnesia_tm:unblock_tab(Tab),
+ ok;
+ {error, Reason} ->
+ Msg = "Failed to handle last",
+ verbose("~s: ~p: ~p~n", [Msg, Tab, Reason]),
+ down(Tab, Storage)
+ end.
+
+subscr_receiver(TabRef = {_, Tab}, RecName) ->
+ receive
+ {mnesia_table_event, {Op, Val, _Tid}} ->
+ if
+ Tab == RecName ->
+ handle_event(TabRef, Op, Val);
+ true ->
+ handle_event(TabRef, Op, setelement(1, Val, RecName))
+ end,
+ subscr_receiver(TabRef, RecName);
+
+ {'EXIT', Pid, Reason} ->
+ handle_exit(Pid, Reason),
+ subscr_receiver(TabRef, RecName)
+ after 0 ->
+ ok
+ end.
+
+handle_event(TabRef, write, Rec) ->
+ db_put(TabRef, Rec);
+handle_event(TabRef, delete, {_Tab, Key}) ->
+ db_erase(TabRef, Key);
+handle_event(TabRef, delete_object, OldRec) ->
+ db_match_erase(TabRef, OldRec);
+handle_event(TabRef, clear_table, {_Tab, _Key}) ->
+ db_match_erase(TabRef, '_').
+
+handle_last({disc_copies, Tab}, _Type, nobin) ->
+ Ret = mnesia_log:ets2dcd(Tab),
+ Fname = mnesia_lib:tab2dat(Tab),
+ case mnesia_lib:exists(Fname) of
+ true -> %% Remove old .DAT files.
+ file:delete(Fname);
+ false ->
+ ok
+ end,
+ Ret;
+
+handle_last({disc_only_copies, Tab}, Type, nobin) ->
+ mnesia_lib:dets_sync_close(Tab),
+ Tmp = mnesia_lib:tab2tmp(Tab),
+ Dat = mnesia_lib:tab2dat(Tab),
+ case file:rename(Tmp, Dat) of
+ ok ->
+ Args = [{file, mnesia_lib:tab2dat(Tab)},
+ {type, mnesia_lib:disk_type(Tab, Type)},
+ {keypos, 2},
+ {repair, mnesia_monitor:get_env(auto_repair)}],
+ mnesia_monitor:open_dets(Tab, Args),
+ ok;
+ {error, Reason} ->
+ {error, {"Cannot swap tmp files", Tab, Reason}}
+ end;
+
+handle_last({ram_copies, _Tab}, _Type, nobin) ->
+ ok;
+handle_last({ram_copies, Tab}, _Type, DatBin) ->
+ case mnesia_monitor:use_dir() of
+ true ->
+ mnesia_lib:lock_table(Tab),
+ Tmp = mnesia_lib:tab2tmp(Tab),
+ ok = file:write_file(Tmp, DatBin),
+ ok = file:rename(Tmp, mnesia_lib:tab2dcd(Tab)),
+ mnesia_lib:unlock_table(Tab),
+ ok;
+ false ->
+ ok
+ end.
+
+down(Tab, Storage) ->
+ case Storage of
+ ram_copies ->
+ catch ?ets_delete_table(Tab);
+ disc_copies ->
+ catch ?ets_delete_table(Tab);
+ disc_only_copies ->
+ TmpFile = mnesia_lib:tab2tmp(Tab),
+ mnesia_lib:dets_sync_close(Tab),
+ file:delete(TmpFile)
+ end,
+ mnesia_checkpoint:tm_del_copy(Tab, node()),
+ mnesia_controller:sync_del_table_copy_whereabouts(Tab, node()),
+ mnesia_tm:unblock_tab(Tab),
+ flush_subcrs(),
+ down.
+
+flush_subcrs() ->
+ receive
+ {mnesia_table_event, _} ->
+ flush_subcrs();
+
+ {'EXIT', Pid, Reason} ->
+ handle_exit(Pid, Reason),
+ flush_subcrs()
+ after 0 ->
+ done
+ end.
+
+db_erase({ram_copies, Tab}, Key) ->
+ true = ?ets_delete(Tab, Key);
+db_erase({disc_copies, Tab}, Key) ->
+ true = ?ets_delete(Tab, Key);
+db_erase({disc_only_copies, Tab}, Key) ->
+ ok = dets:delete(Tab, Key).
+
+db_match_erase({ram_copies, Tab} , Pat) ->
+ true = ?ets_match_delete(Tab, Pat);
+db_match_erase({disc_copies, Tab} , Pat) ->
+ true = ?ets_match_delete(Tab, Pat);
+db_match_erase({disc_only_copies, Tab}, Pat) ->
+ ok = dets:match_delete(Tab, Pat).
+
+db_put({ram_copies, Tab}, Val) ->
+ true = ?ets_insert(Tab, Val);
+db_put({disc_copies, Tab}, Val) ->
+ true = ?ets_insert(Tab, Val);
+db_put({disc_only_copies, Tab}, Val) ->
+ ok = dets:insert(Tab, Val).
+
+%% This code executes at the remote site where the data is
+%% executes in a special copier process.
+
+calc_nokeys(Storage, Tab) ->
+ %% Calculate #keys per transfer
+ Key = mnesia_lib:db_first(Storage, Tab),
+ Recs = mnesia_lib:db_get(Storage, Tab, Key),
+ BinSize = size(term_to_binary(Recs)),
+ (?MAX_TRANSFER_SIZE div BinSize) + 1.
+
+send_table(Pid, Tab, RemoteS) ->
+ case ?catch_val({Tab, storage_type}) of
+ {'EXIT', _} ->
+ {error, {no_exists, Tab}};
+ unknown ->
+ {error, {no_exists, Tab}};
+ Storage ->
+ %% Send first
+ TabSize = mnesia:table_info(Tab, size),
+ Pconvert = mnesia_monitor:needs_protocol_conversion(node(Pid)),
+ KeysPerTransfer = calc_nokeys(Storage, Tab),
+ ChunkData = dets:info(Tab, bchunk_format),
+
+ UseDetsChunk =
+ Storage == RemoteS andalso
+ Storage == disc_only_copies andalso
+ ChunkData /= undefined andalso
+ Pconvert == false,
+ if
+ UseDetsChunk == true ->
+ DetsInfo = erlang:system_info(version),
+ Pid ! {self(), {first, TabSize, {DetsInfo, ChunkData}}};
+ true ->
+ Pid ! {self(), {first, TabSize}}
+ end,
+
+ %% Debug info
+ put(mnesia_table_sender, {Tab, node(Pid), Pid}),
+ {Init, Chunk} = reader_funcs(UseDetsChunk, Tab, Storage, KeysPerTransfer),
+
+ SendIt = fun() ->
+ prepare_copy(Pid, Tab, Storage),
+ send_more(Pid, 1, Chunk, Init(), Tab, Pconvert),
+ finish_copy(Pid, Tab, Storage, RemoteS)
+ end,
+
+ case catch SendIt() of
+ receiver_died ->
+ cleanup_tab_copier(Pid, Storage, Tab),
+ unlink(whereis(mnesia_tm)),
+ ok;
+ {_, receiver_died} ->
+ unlink(whereis(mnesia_tm)),
+ ok;
+ {atomic, no_more} ->
+ unlink(whereis(mnesia_tm)),
+ ok;
+ Reason ->
+ cleanup_tab_copier(Pid, Storage, Tab),
+ unlink(whereis(mnesia_tm)),
+ {error, Reason}
+ end
+ end.
+
+prepare_copy(Pid, Tab, Storage) ->
+ Trans =
+ fun() ->
+ mnesia:write_lock_table(Tab),
+ mnesia_subscr:subscribe(Pid, {table, Tab}),
+ update_where_to_write(Tab, node(Pid)),
+ mnesia_lib:db_fixtable(Storage, Tab, true),
+ ok
+ end,
+ case mnesia:transaction(Trans) of
+ {atomic, ok} ->
+ ok;
+ {aborted, Reason} ->
+ exit({tab_copier_prepare, Tab, Reason})
+ end.
+
+update_where_to_write(Tab, Node) ->
+ case val({Tab, access_mode}) of
+ read_only ->
+ ignore;
+ read_write ->
+ Current = val({current, db_nodes}),
+ Ns =
+ case lists:member(Node, Current) of
+ true -> Current;
+ false -> [Node | Current]
+ end,
+ update_where_to_write(Ns, Tab, Node)
+ end.
+
+update_where_to_write([], _, _) ->
+ ok;
+update_where_to_write([H|T], Tab, AddNode) ->
+ rpc:call(H, mnesia_controller, call,
+ [{update_where_to_write, [add, Tab, AddNode], self()}]),
+ update_where_to_write(T, Tab, AddNode).
+
+send_more(Pid, N, Chunk, DataState, Tab, OldNode) ->
+ receive
+ {NewPid, more} ->
+ case send_packet(N - 1, NewPid, Chunk, DataState, OldNode) of
+ New when is_integer(New) ->
+ New - 1;
+ NewData ->
+ send_more(NewPid, ?MAX_NOPACKETS, Chunk, NewData, Tab, OldNode)
+ end;
+ {_NewPid, {old_protocol, Tab}} ->
+ Storage = val({Tab, storage_type}),
+ {Init, NewChunk} =
+ reader_funcs(false, Tab, Storage, calc_nokeys(Storage, Tab)),
+ send_more(Pid, 1, NewChunk, Init(), Tab, OldNode);
+
+ {copier_done, Node} when Node == node(Pid)->
+ verbose("Receiver of table ~p crashed on ~p (more)~n", [Tab, Node]),
+ throw(receiver_died)
+ end.
+
+reader_funcs(UseDetsChunk, Tab, Storage, KeysPerTransfer) ->
+ case UseDetsChunk of
+ false ->
+ {fun() -> mnesia_lib:db_init_chunk(Storage, Tab, KeysPerTransfer) end,
+ fun(Cont) -> mnesia_lib:db_chunk(Storage, Cont) end};
+ true ->
+ {fun() -> dets_bchunk(Tab, start) end,
+ fun(Cont) -> dets_bchunk(Tab, Cont) end}
+ end.
+
+dets_bchunk(Tab, Chunk) -> %% Arrg
+ case dets:bchunk(Tab, Chunk) of
+ {Cont, Data} -> {Data, Cont};
+ Else -> Else
+ end.
+
+send_packet(N, Pid, _Chunk, '$end_of_table', OldNode) ->
+ case OldNode of
+ true -> ignore; %% Old nodes can't handle the new no_more
+ false -> Pid ! {self(), no_more}
+ end,
+ N;
+send_packet(N, Pid, Chunk, {[], Cont}, OldNode) ->
+ send_packet(N, Pid, Chunk, Chunk(Cont), OldNode);
+send_packet(N, Pid, Chunk, {Recs, Cont}, OldNode) when N < ?MAX_NOPACKETS ->
+ case OldNode of
+ true -> Pid ! {self(), {more, [Recs]}}; %% Old need's wrapping list
+ false -> Pid ! {self(), {more, Recs}}
+ end,
+ send_packet(N+1, Pid, Chunk, Chunk(Cont), OldNode);
+send_packet(_N, _Pid, _Chunk, DataState, _OldNode) ->
+ DataState.
+
+finish_copy(Pid, Tab, Storage, RemoteS) ->
+ RecNode = node(Pid),
+ DatBin = dat2bin(Tab, Storage, RemoteS),
+ Trans =
+ fun() ->
+ mnesia:read_lock_table(Tab),
+ A = val({Tab, access_mode}),
+ mnesia_controller:sync_and_block_table_whereabouts(Tab, RecNode, RemoteS, A),
+ cleanup_tab_copier(Pid, Storage, Tab),
+ mnesia_checkpoint:tm_add_copy(Tab, RecNode),
+ Pid ! {self(), {no_more, DatBin}},
+ receive
+ {Pid, no_more} -> % Dont bother about the spurious 'more' message
+ no_more;
+ {copier_done, Node} when Node == node(Pid)->
+ verbose("Tab receiver ~p crashed (more): ~p~n", [Tab, Node]),
+ receiver_died
+ end
+ end,
+ mnesia:transaction(Trans).
+
+cleanup_tab_copier(Pid, Storage, Tab) ->
+ mnesia_lib:db_fixtable(Storage, Tab, false),
+ mnesia_subscr:unsubscribe(Pid, {table, Tab}).
+
+dat2bin(Tab, ram_copies, ram_copies) ->
+ mnesia_lib:lock_table(Tab),
+ Res = file:read_file(mnesia_lib:tab2dcd(Tab)),
+ mnesia_lib:unlock_table(Tab),
+ case Res of
+ {ok, DatBin} -> DatBin;
+ _ -> nobin
+ end;
+dat2bin(_Tab, _LocalS, _RemoteS) ->
+ nobin.
+
+handle_exit(Pid, Reason) when node(Pid) == node() ->
+ exit(Reason);
+handle_exit(_Pid, _Reason) -> %% Not from our node, this will be handled by
+ ignore. %% mnesia_down soon.
diff --git a/lib/mnesia/src/mnesia_locker.erl b/lib/mnesia/src/mnesia_locker.erl
new file mode 100644
index 0000000000..cfa3f171b2
--- /dev/null
+++ b/lib/mnesia/src/mnesia_locker.erl
@@ -0,0 +1,1196 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 1996-2009. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%
+
+%%
+-module(mnesia_locker).
+
+-export([
+ get_held_locks/0,
+ get_lock_queue/0,
+ global_lock/5,
+ ixrlock/5,
+ init/1,
+ mnesia_down/2,
+ release_tid/1,
+ async_release_tid/2,
+ send_release_tid/2,
+ receive_release_tid_acc/2,
+ rlock/3,
+ rlock_table/3,
+ rwlock/3,
+ sticky_rwlock/3,
+ start/0,
+ sticky_wlock/3,
+ sticky_wlock_table/3,
+ wlock/3,
+ wlock_no_exist/4,
+ wlock_table/3
+ ]).
+
+%% sys callback functions
+-export([system_continue/3,
+ system_terminate/4,
+ system_code_change/4
+ ]).
+
+-include("mnesia.hrl").
+-import(mnesia_lib, [dbg_out/2, error/2, verbose/2]).
+
+-define(dbg(S,V), ok).
+%-define(dbg(S,V), dbg_out("~p:~p: " ++ S, [?MODULE, ?LINE] ++ V)).
+
+-define(ALL, '______WHOLETABLE_____').
+-define(STICK, '______STICK_____').
+-define(GLOBAL, '______GLOBAL_____').
+
+-record(state, {supervisor}).
+
+-record(queue, {oid, tid, op, pid, lucky}).
+
+%% mnesia_held_locks: contain {Oid, Op, Tid} entries (bag)
+-define(match_oid_held_locks(Oid), {Oid, '_', '_'}).
+%% mnesia_tid_locks: contain {Tid, Oid, Op} entries (bag)
+-define(match_oid_tid_locks(Tid), {Tid, '_', '_'}).
+%% mnesia_sticky_locks: contain {Oid, Node} entries and {Tab, Node} entries (set)
+-define(match_oid_sticky_locks(Oid),{Oid, '_'}).
+%% mnesia_lock_queue: contain {queue, Oid, Tid, Op, ReplyTo, WaitForTid} entries (bag)
+-define(match_oid_lock_queue(Oid), #queue{oid=Oid, tid='_', op = '_', pid = '_', lucky = '_'}).
+%% mnesia_lock_counter: {{write, Tab}, Number} &&
+%% {{read, Tab}, Number} entries (set)
+
+start() ->
+ mnesia_monitor:start_proc(?MODULE, ?MODULE, init, [self()]).
+
+init(Parent) ->
+ register(?MODULE, self()),
+ process_flag(trap_exit, true),
+ ?ets_new_table(mnesia_held_locks, [bag, private, named_table]),
+ ?ets_new_table(mnesia_tid_locks, [bag, private, named_table]),
+ ?ets_new_table(mnesia_sticky_locks, [set, private, named_table]),
+ ?ets_new_table(mnesia_lock_queue, [bag, private, named_table, {keypos, 2}]),
+
+ proc_lib:init_ack(Parent, {ok, self()}),
+ case ?catch_val(pid_sort_order) of
+ r9b_plain -> put(pid_sort_order, r9b_plain);
+ standard -> put(pid_sort_order, standard);
+ _ -> ignore
+ end,
+ loop(#state{supervisor = Parent}).
+
+val(Var) ->
+ case ?catch_val(Var) of
+ {'EXIT', _ReASoN_} -> mnesia_lib:other_val(Var, _ReASoN_);
+ _VaLuE_ -> _VaLuE_
+ end.
+
+reply(From, R) ->
+ From ! {?MODULE, node(), R}.
+
+l_request(Node, X, Store) ->
+ {?MODULE, Node} ! {self(), X},
+ l_req_rec(Node, Store).
+
+l_req_rec(Node, Store) ->
+ ?ets_insert(Store, {nodes, Node}),
+ receive
+ {?MODULE, Node, Reply} ->
+ Reply;
+ {mnesia_down, Node} ->
+ {not_granted, {node_not_running, Node}}
+ end.
+
+release_tid(Tid) ->
+ ?MODULE ! {release_tid, Tid}.
+
+async_release_tid(Nodes, Tid) ->
+ rpc:abcast(Nodes, ?MODULE, {release_tid, Tid}).
+
+send_release_tid(Nodes, Tid) ->
+ rpc:abcast(Nodes, ?MODULE, {self(), {sync_release_tid, Tid}}).
+
+receive_release_tid_acc([Node | Nodes], Tid) ->
+ receive
+ {?MODULE, Node, {tid_released, Tid}} ->
+ receive_release_tid_acc(Nodes, Tid);
+ {mnesia_down, Node} ->
+ receive_release_tid_acc(Nodes, Tid)
+ end;
+receive_release_tid_acc([], _Tid) ->
+ ok.
+
+loop(State) ->
+ receive
+ {From, {write, Tid, Oid}} ->
+ try_sticky_lock(Tid, write, From, Oid),
+ loop(State);
+
+ %% If Key == ?ALL it's a request to lock the entire table
+ %%
+
+ {From, {read, Tid, Oid}} ->
+ try_sticky_lock(Tid, read, From, Oid),
+ loop(State);
+
+ %% Really do a read, but get hold of a write lock
+ %% used by mnesia:wread(Oid).
+
+ {From, {read_write, Tid, Oid}} ->
+ try_sticky_lock(Tid, read_write, From, Oid),
+ loop(State);
+
+ %% Tid has somehow terminated, clear up everything
+ %% and pass locks on to queued processes.
+ %% This is the purpose of the mnesia_tid_locks table
+
+ {release_tid, Tid} ->
+ do_release_tid(Tid),
+ loop(State);
+
+ %% stick lock, first tries this to the where_to_read Node
+ {From, {test_set_sticky, Tid, {Tab, _} = Oid, Lock}} ->
+ case ?ets_lookup(mnesia_sticky_locks, Tab) of
+ [] ->
+ reply(From, not_stuck),
+ loop(State);
+ [{_,Node}] when Node == node() ->
+ %% Lock is stuck here, see now if we can just set
+ %% a regular write lock
+ try_lock(Tid, Lock, From, Oid),
+ loop(State);
+ [{_,Node}] ->
+ reply(From, {stuck_elsewhere, Node}),
+ loop(State)
+ end;
+
+ %% If test_set_sticky fails, we send this to all nodes
+ %% after aquiring a real write lock on Oid
+
+ {stick, {Tab, _}, N} ->
+ ?ets_insert(mnesia_sticky_locks, {Tab, N}),
+ loop(State);
+
+ %% The caller which sends this message, must have first
+ %% aquired a write lock on the entire table
+ {unstick, Tab} ->
+ ?ets_delete(mnesia_sticky_locks, Tab),
+ loop(State);
+
+ {From, {ix_read, Tid, Tab, IxKey, Pos}} ->
+ case ?ets_lookup(mnesia_sticky_locks, Tab) of
+ [] ->
+ set_read_lock_on_all_keys(Tid,From,Tab,IxKey,Pos),
+ loop(State);
+ [{_,N}] when N == node() ->
+ set_read_lock_on_all_keys(Tid,From,Tab,IxKey,Pos),
+ loop(State);
+ [{_,N}] ->
+ Req = {From, {ix_read, Tid, Tab, IxKey, Pos}},
+ From ! {?MODULE, node(), {switch, N, Req}},
+ loop(State)
+ end;
+
+ {From, {sync_release_tid, Tid}} ->
+ do_release_tid(Tid),
+ reply(From, {tid_released, Tid}),
+ loop(State);
+
+ {release_remote_non_pending, Node, Pending} ->
+ release_remote_non_pending(Node, Pending),
+ mnesia_monitor:mnesia_down(?MODULE, Node),
+ loop(State);
+
+ {'EXIT', Pid, _} when Pid == State#state.supervisor ->
+ do_stop();
+
+ {system, From, Msg} ->
+ verbose("~p got {system, ~p, ~p}~n", [?MODULE, From, Msg]),
+ Parent = State#state.supervisor,
+ sys:handle_system_msg(Msg, From, Parent, ?MODULE, [], State);
+
+ {get_table, From, LockTable} ->
+ From ! {LockTable, ?ets_match_object(LockTable, '_')},
+ loop(State);
+
+ Msg ->
+ error("~p got unexpected message: ~p~n", [?MODULE, Msg]),
+ loop(State)
+ end.
+
+set_lock(Tid, Oid, Op) ->
+ ?dbg("Granted ~p ~p ~p~n", [Tid,Oid,Op]),
+ ?ets_insert(mnesia_held_locks, {Oid, Op, Tid}),
+ ?ets_insert(mnesia_tid_locks, {Tid, Oid, Op}).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Acquire locks
+
+try_sticky_lock(Tid, Op, Pid, {Tab, _} = Oid) ->
+ case ?ets_lookup(mnesia_sticky_locks, Tab) of
+ [] ->
+ try_lock(Tid, Op, Pid, Oid);
+ [{_,N}] when N == node() ->
+ try_lock(Tid, Op, Pid, Oid);
+ [{_,N}] ->
+ Req = {Pid, {Op, Tid, Oid}},
+ Pid ! {?MODULE, node(), {switch, N, Req}}
+ end.
+
+try_lock(Tid, read_write, Pid, Oid) ->
+ try_lock(Tid, read_write, read, write, Pid, Oid);
+try_lock(Tid, Op, Pid, Oid) ->
+ try_lock(Tid, Op, Op, Op, Pid, Oid).
+
+try_lock(Tid, Op, SimpleOp, Lock, Pid, Oid) ->
+ case can_lock(Tid, Lock, Oid, {no, bad_luck}) of
+ yes ->
+ Reply = grant_lock(Tid, SimpleOp, Lock, Oid),
+ reply(Pid, Reply);
+ {no, Lucky} ->
+ C = #cyclic{op = SimpleOp, lock = Lock, oid = Oid, lucky = Lucky},
+ ?dbg("Rejected ~p ~p ~p ~p ~n", [Tid, Oid, Lock, Lucky]),
+ reply(Pid, {not_granted, C});
+ {queue, Lucky} ->
+ ?dbg("Queued ~p ~p ~p ~p ~n", [Tid, Oid, Lock, Lucky]),
+ %% Append to queue: Nice place for trace output
+ ?ets_insert(mnesia_lock_queue,
+ #queue{oid = Oid, tid = Tid, op = Op,
+ pid = Pid, lucky = Lucky}),
+ ?ets_insert(mnesia_tid_locks, {Tid, Oid, {queued, Op}})
+ end.
+
+grant_lock(Tid, read, Lock, Oid = {Tab, Key})
+ when Key /= ?ALL, Tab /= ?GLOBAL ->
+ case node(Tid#tid.pid) == node() of
+ true ->
+ set_lock(Tid, Oid, Lock),
+ {granted, lookup_in_client};
+ false ->
+ try
+ Val = mnesia_lib:db_get(Tab, Key), %% lookup as well
+ set_lock(Tid, Oid, Lock),
+ {granted, Val}
+ catch _:_Reason ->
+ %% Table has been deleted from this node,
+ %% restart the transaction.
+ C = #cyclic{op = read, lock = Lock, oid = Oid,
+ lucky = nowhere},
+ {not_granted, C}
+ end
+ end;
+grant_lock(Tid, {ix_read,IxKey,Pos}, Lock, Oid = {Tab, _}) ->
+ try
+ Res = ix_read_res(Tab, IxKey,Pos),
+ set_lock(Tid, Oid, Lock),
+ {granted, Res, [?ALL]}
+ catch _:_ ->
+ {not_granted, {no_exists, Tab, {index, [Pos]}}}
+ end;
+grant_lock(Tid, read, Lock, Oid) ->
+ set_lock(Tid, Oid, Lock),
+ {granted, ok};
+grant_lock(Tid, write, Lock, Oid) ->
+ set_lock(Tid, Oid, Lock),
+ granted.
+
+%% 1) Impose an ordering on all transactions favour old (low tid) transactions
+%% newer (higher tid) transactions may never wait on older ones,
+%% 2) When releasing the tids from the queue always begin with youngest (high tid)
+%% because of 1) it will avoid the deadlocks.
+%% 3) TabLocks is the problem :-) They should not starve and not deadlock
+%% handle tablocks in queue as they had locks on unlocked records.
+
+can_lock(Tid, read, {Tab, Key}, AlreadyQ) when Key /= ?ALL ->
+ %% The key is bound, no need for the other BIF
+ Oid = {Tab, Key},
+ ObjLocks = ?ets_match_object(mnesia_held_locks, {Oid, write, '_'}),
+ TabLocks = ?ets_match_object(mnesia_held_locks, {{Tab, ?ALL}, write, '_'}),
+ check_lock(Tid, Oid, ObjLocks, TabLocks, yes, AlreadyQ, read);
+
+can_lock(Tid, read, Oid, AlreadyQ) -> % Whole tab
+ Tab = element(1, Oid),
+ ObjLocks = ?ets_match_object(mnesia_held_locks, {{Tab, '_'}, write, '_'}),
+ check_lock(Tid, Oid, ObjLocks, [], yes, AlreadyQ, read);
+
+can_lock(Tid, write, {Tab, Key}, AlreadyQ) when Key /= ?ALL ->
+ Oid = {Tab, Key},
+ ObjLocks = ?ets_lookup(mnesia_held_locks, Oid),
+ TabLocks = ?ets_lookup(mnesia_held_locks, {Tab, ?ALL}),
+ check_lock(Tid, Oid, ObjLocks, TabLocks, yes, AlreadyQ, write);
+
+can_lock(Tid, write, Oid, AlreadyQ) -> % Whole tab
+ Tab = element(1, Oid),
+ ObjLocks = ?ets_match_object(mnesia_held_locks, ?match_oid_held_locks({Tab, '_'})),
+ check_lock(Tid, Oid, ObjLocks, [], yes, AlreadyQ, write).
+
+%% Check held locks for conflicting locks
+check_lock(Tid, Oid, [Lock | Locks], TabLocks, X, AlreadyQ, Type) ->
+ case element(3, Lock) of
+ Tid ->
+ check_lock(Tid, Oid, Locks, TabLocks, X, AlreadyQ, Type);
+ WaitForTid ->
+ Queue = allowed_to_be_queued(WaitForTid,Tid),
+ if Queue == true ->
+ check_lock(Tid, Oid, Locks, TabLocks, {queue, WaitForTid}, AlreadyQ, Type);
+ Tid#tid.pid == WaitForTid#tid.pid ->
+ dbg_out("Spurious lock conflict ~w ~w: ~w -> ~w~n",
+ [Oid, Lock, Tid, WaitForTid]),
+ %% Test..
+ {Tab, _Key} = Oid,
+ HaveQ = (ets:lookup(mnesia_lock_queue, Oid) /= [])
+ orelse (ets:lookup(mnesia_lock_queue,{Tab,?ALL}) /= []),
+ if
+ HaveQ ->
+ {no, WaitForTid};
+ true ->
+ check_lock(Tid,Oid,Locks,TabLocks,{queue,WaitForTid},AlreadyQ,Type)
+ end;
+ %%{no, WaitForTid}; Safe solution
+ true ->
+ {no, WaitForTid}
+ end
+ end;
+
+check_lock(_, _, [], [], X, {queue, bad_luck}, _) ->
+ X; %% The queue should be correct already no need to check it again
+
+check_lock(_, _, [], [], X = {queue, _Tid}, _AlreadyQ, _) ->
+ X;
+
+check_lock(Tid, Oid, [], [], X, AlreadyQ, Type) ->
+ {Tab, Key} = Oid,
+ if
+ Type == write ->
+ check_queue(Tid, Tab, X, AlreadyQ);
+ Key == ?ALL ->
+ %% hmm should be solvable by a clever select expr but not today...
+ check_queue(Tid, Tab, X, AlreadyQ);
+ true ->
+ %% If there is a queue on that object, read_lock shouldn't be granted
+ ObjLocks = ets:lookup(mnesia_lock_queue, Oid),
+ case max(ObjLocks) of
+ empty ->
+ check_queue(Tid, Tab, X, AlreadyQ);
+ ObjL ->
+ case allowed_to_be_queued(ObjL,Tid) of
+ false ->
+ %% Starvation Preemption (write waits for read)
+ {no, ObjL};
+ true ->
+ check_queue(Tid, Tab, {queue, ObjL}, AlreadyQ)
+ end
+ end
+ end;
+
+check_lock(Tid, Oid, [], TabLocks, X, AlreadyQ, Type) ->
+ check_lock(Tid, Oid, TabLocks, [], X, AlreadyQ, Type).
+
+%% True if WaitForTid > Tid -> % Important order
+allowed_to_be_queued(WaitForTid, Tid) ->
+ case get(pid_sort_order) of
+ undefined -> WaitForTid > Tid;
+ r9b_plain ->
+ cmp_tid(true, WaitForTid, Tid) =:= 1;
+ standard ->
+ cmp_tid(false, WaitForTid, Tid) =:= 1
+ end.
+
+%% Check queue for conflicting locks
+%% Assume that all queued locks belongs to other tid's
+
+check_queue(Tid, Tab, X, AlreadyQ) ->
+ TabLocks = ets:lookup(mnesia_lock_queue, {Tab,?ALL}),
+ Greatest = max(TabLocks),
+ case Greatest of
+ empty -> X;
+ Tid -> X;
+ WaitForTid ->
+ case allowed_to_be_queued(WaitForTid,Tid) of
+ true ->
+ {queue, WaitForTid};
+ false when AlreadyQ =:= {no, bad_luck} ->
+ {no, WaitForTid}
+ end
+ end.
+
+sort_queue(QL) ->
+ case get(pid_sort_order) of
+ undefined ->
+ lists:reverse(lists:keysort(#queue.tid, QL));
+ r9b_plain ->
+ lists:sort(fun(#queue{tid=X},#queue{tid=Y}) ->
+ cmp_tid(true, X, Y) == 1
+ end, QL);
+ standard ->
+ lists:sort(fun(#queue{tid=X},#queue{tid=Y}) ->
+ cmp_tid(false, X, Y) == 1
+ end, QL)
+ end.
+
+max([]) -> empty;
+max([#queue{tid=Max}]) -> Max;
+max(L) ->
+ [#queue{tid=Max}|_] = sort_queue(L),
+ Max.
+
+set_read_lock_on_all_keys(Tid, From, Tab, IxKey, Pos) ->
+ Oid = {Tab,?ALL},
+ Op = {ix_read,IxKey, Pos},
+ Lock = read,
+ case can_lock(Tid, Lock, Oid, {no, bad_luck}) of
+ yes ->
+ Reply = grant_lock(Tid, Op, Lock, Oid),
+ reply(From, Reply);
+ {no, Lucky} ->
+ C = #cyclic{op = Op, lock = Lock, oid = Oid, lucky = Lucky},
+ ?dbg("Rejected ~p ~p ~p ~p ~n", [Tid, Oid, Lock, Lucky]),
+ reply(From, {not_granted, C});
+ {queue, Lucky} ->
+ ?dbg("Queued ~p ~p ~p ~p ~n", [Tid, Oid, Lock, Lucky]),
+ %% Append to queue: Nice place for trace output
+ ?ets_insert(mnesia_lock_queue,
+ #queue{oid = Oid, tid = Tid, op = Op,
+ pid = From, lucky = Lucky}),
+ ?ets_insert(mnesia_tid_locks, {Tid, Oid, {queued, Op}})
+ end.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Release of locks
+
+%% Release remote non-pending nodes
+release_remote_non_pending(Node, Pending) ->
+ %% Clear the mnesia_sticky_locks table first, to avoid
+ %% unnecessary requests to the failing node
+ ?ets_match_delete(mnesia_sticky_locks, {'_' , Node}),
+
+ %% Then we have to release all locks held by processes
+ %% running at the failed node and also simply remove all
+ %% queue'd requests back to the failed node
+
+ AllTids = ?ets_match(mnesia_tid_locks, {'$1', '_', '_'}),
+ Tids = [T || [T] <- AllTids, Node == node(T#tid.pid), not lists:member(T, Pending)],
+ do_release_tids(Tids).
+
+do_release_tids([Tid | Tids]) ->
+ do_release_tid(Tid),
+ do_release_tids(Tids);
+do_release_tids([]) ->
+ ok.
+
+do_release_tid(Tid) ->
+ Locks = ?ets_lookup(mnesia_tid_locks, Tid),
+ ?dbg("Release ~p ~p ~n", [Tid, Locks]),
+ ?ets_delete(mnesia_tid_locks, Tid),
+ release_locks(Locks),
+ %% Removed queued locks which has had locks
+ UniqueLocks = keyunique(lists:sort(Locks),[]),
+ rearrange_queue(UniqueLocks).
+
+keyunique([{_Tid, Oid, _Op}|R], Acc = [{_, Oid, _}|_]) ->
+ keyunique(R, Acc);
+keyunique([H|R], Acc) ->
+ keyunique(R, [H|Acc]);
+keyunique([], Acc) ->
+ Acc.
+
+release_locks([Lock | Locks]) ->
+ release_lock(Lock),
+ release_locks(Locks);
+release_locks([]) ->
+ ok.
+
+release_lock({Tid, Oid, {queued, _}}) ->
+ ?ets_match_delete(mnesia_lock_queue, #queue{oid=Oid, tid = Tid, op = '_',
+ pid = '_', lucky = '_'});
+release_lock({Tid, Oid, Op}) ->
+ if
+ Op == write ->
+ ?ets_delete(mnesia_held_locks, Oid);
+ Op == read ->
+ ets:delete_object(mnesia_held_locks, {Oid, Op, Tid})
+ end.
+
+rearrange_queue([{_Tid, {Tab, Key}, _} | Locks]) ->
+ if
+ Key /= ?ALL->
+ Queue =
+ ets:lookup(mnesia_lock_queue, {Tab, ?ALL}) ++
+ ets:lookup(mnesia_lock_queue, {Tab, Key}),
+ case Queue of
+ [] ->
+ ok;
+ _ ->
+ Sorted = sort_queue(Queue),
+ try_waiters_obj(Sorted)
+ end;
+ true ->
+ Pat = ?match_oid_lock_queue({Tab, '_'}),
+ Queue = ?ets_match_object(mnesia_lock_queue, Pat),
+ Sorted = sort_queue(Queue),
+ try_waiters_tab(Sorted)
+ end,
+ ?dbg("RearrQ ~p~n", [Queue]),
+ rearrange_queue(Locks);
+rearrange_queue([]) ->
+ ok.
+
+try_waiters_obj([W | Waiters]) ->
+ case try_waiter(W) of
+ queued ->
+ no;
+ _ ->
+ try_waiters_obj(Waiters)
+ end;
+try_waiters_obj([]) ->
+ ok.
+
+try_waiters_tab([W | Waiters]) ->
+ case W#queue.oid of
+ {_Tab, ?ALL} ->
+ case try_waiter(W) of
+ queued ->
+ no;
+ _ ->
+ try_waiters_tab(Waiters)
+ end;
+ Oid ->
+ case try_waiter(W) of
+ queued ->
+ Rest = key_delete_all(Oid, #queue.oid, Waiters),
+ try_waiters_tab(Rest);
+ _ ->
+ try_waiters_tab(Waiters)
+ end
+ end;
+try_waiters_tab([]) ->
+ ok.
+
+try_waiter({queue, Oid, Tid, read_write, ReplyTo, _}) ->
+ try_waiter(Oid, read_write, read, write, ReplyTo, Tid);
+try_waiter({queue, Oid, Tid, IXR = {ix_read,_,_}, ReplyTo, _}) ->
+ try_waiter(Oid, IXR, IXR, read, ReplyTo, Tid);
+try_waiter({queue, Oid, Tid, Op, ReplyTo, _}) ->
+ try_waiter(Oid, Op, Op, Op, ReplyTo, Tid).
+
+try_waiter(Oid, Op, SimpleOp, Lock, ReplyTo, Tid) ->
+ case can_lock(Tid, Lock, Oid, {queue, bad_luck}) of
+ yes ->
+ %% Delete from queue: Nice place for trace output
+ ?ets_match_delete(mnesia_lock_queue,
+ #queue{oid=Oid, tid = Tid, op = Op,
+ pid = ReplyTo, lucky = '_'}),
+ Reply = grant_lock(Tid, SimpleOp, Lock, Oid),
+ reply(ReplyTo,Reply),
+ locked;
+ {queue, _Why} ->
+ ?dbg("Keep ~p ~p ~p ~p~n", [Tid, Oid, Lock, _Why]),
+ queued; % Keep waiter in queue
+ {no, Lucky} ->
+ C = #cyclic{op = SimpleOp, lock = Lock, oid = Oid, lucky = Lucky},
+ verbose("** WARNING ** Restarted transaction, possible deadlock in lock queue ~w: cyclic = ~w~n",
+ [Tid, C]),
+ ?ets_match_delete(mnesia_lock_queue,
+ #queue{oid=Oid, tid = Tid, op = Op,
+ pid = ReplyTo, lucky = '_'}),
+ Reply = {not_granted, C},
+ reply(ReplyTo,Reply),
+ removed
+ end.
+
+key_delete_all(Key, Pos, TupleList) ->
+ key_delete_all(Key, Pos, TupleList, []).
+key_delete_all(Key, Pos, [H|T], Ack) when element(Pos, H) == Key ->
+ key_delete_all(Key, Pos, T, Ack);
+key_delete_all(Key, Pos, [H|T], Ack) ->
+ key_delete_all(Key, Pos, T, [H|Ack]);
+key_delete_all(_, _, [], Ack) ->
+ lists:reverse(Ack).
+
+ix_read_res(Tab,IxKey,Pos) ->
+ Index = mnesia_index:get_index_table(Tab, Pos),
+ Rks = mnesia_lib:elems(2,mnesia_index:db_get(Index, IxKey)),
+ lists:append(lists:map(fun(Real) -> mnesia_lib:db_get(Tab, Real) end, Rks)).
+
+%% ********************* end server code ********************
+%% The following code executes at the client side of a transactions
+
+mnesia_down(N, Pending) ->
+ case whereis(?MODULE) of
+ undefined ->
+ %% Takes care of mnesia_down's in early startup
+ mnesia_monitor:mnesia_down(?MODULE, N);
+ Pid ->
+ %% Syncronously call needed in order to avoid
+ %% race with mnesia_tm's coordinator processes
+ %% that may restart and acquire new locks.
+ %% mnesia_monitor ensures the sync.
+ Pid ! {release_remote_non_pending, N, Pending}
+ end.
+
+%% Aquire a write lock, but do a read, used by
+%% mnesia:wread/1
+
+rwlock(Tid, Store, Oid) ->
+ {Tab, Key} = Oid,
+ case val({Tab, where_to_read}) of
+ nowhere ->
+ mnesia:abort({no_exists, Tab});
+ Node ->
+ Lock = write,
+ case need_lock(Store, Tab, Key, Lock) of
+ yes ->
+ Ns = w_nodes(Tab),
+ Res = get_rwlocks_on_nodes(Ns, rwlock, Node, Store, Tid, Oid),
+ ?ets_insert(Store, {{locks, Tab, Key}, Lock}),
+ Res;
+ no ->
+ if
+ Key == ?ALL ->
+ w_nodes(Tab);
+ Tab == ?GLOBAL ->
+ w_nodes(Tab);
+ true ->
+ dirty_rpc(Node, Tab, Key, Lock)
+ end
+ end
+ end.
+
+%% Return a list of nodes or abort transaction
+%% WE also insert any additional where_to_write nodes
+%% in the local store under the key == nodes
+
+w_nodes(Tab) ->
+ Nodes = ?catch_val({Tab, where_to_write}),
+ case Nodes of
+ [_ | _] -> Nodes;
+ _ -> mnesia:abort({no_exists, Tab})
+ end.
+
+%% aquire a sticky wlock, a sticky lock is a lock
+%% which remains at this node after the termination of the
+%% transaction.
+
+sticky_wlock(Tid, Store, Oid) ->
+ sticky_lock(Tid, Store, Oid, write).
+
+sticky_rwlock(Tid, Store, Oid) ->
+ sticky_lock(Tid, Store, Oid, read_write).
+
+sticky_lock(Tid, Store, {Tab, Key} = Oid, Lock) ->
+ N = val({Tab, where_to_read}),
+ if
+ node() == N ->
+ case need_lock(Store, Tab, Key, write) of
+ yes ->
+ do_sticky_lock(Tid, Store, Oid, Lock);
+ no ->
+ dirty_sticky_lock(Tab, Key, [N], Lock)
+ end;
+ true ->
+ mnesia:abort({not_local, Tab})
+ end.
+
+do_sticky_lock(Tid, Store, {Tab, Key} = Oid, Lock) ->
+ ?MODULE ! {self(), {test_set_sticky, Tid, Oid, Lock}},
+ N = node(),
+ receive
+ {?MODULE, N, granted} ->
+ ?ets_insert(Store, {{locks, Tab, Key}, write}),
+ [?ets_insert(Store, {nodes, Node}) || Node <- w_nodes(Tab)],
+ granted;
+ {?MODULE, N, {granted, Val}} -> %% for rwlocks
+ case opt_lookup_in_client(Val, Oid, write) of
+ C = #cyclic{} ->
+ exit({aborted, C});
+ Val2 ->
+ ?ets_insert(Store, {{locks, Tab, Key}, write}),
+ [?ets_insert(Store, {nodes, Node}) || Node <- w_nodes(Tab)],
+ Val2
+ end;
+ {?MODULE, N, {not_granted, Reason}} ->
+ exit({aborted, Reason});
+ {?MODULE, N, not_stuck} ->
+ not_stuck(Tid, Store, Tab, Key, Oid, Lock, N),
+ dirty_sticky_lock(Tab, Key, [N], Lock);
+ {mnesia_down, Node} ->
+ EMsg = {aborted, {node_not_running, Node}},
+ flush_remaining([N], Node, EMsg);
+ {?MODULE, N, {stuck_elsewhere, _N2}} ->
+ stuck_elsewhere(Tid, Store, Tab, Key, Oid, Lock),
+ dirty_sticky_lock(Tab, Key, [N], Lock)
+ end.
+
+not_stuck(Tid, Store, Tab, _Key, Oid, _Lock, N) ->
+ rlock(Tid, Store, {Tab, ?ALL}), %% needed?
+ wlock(Tid, Store, Oid), %% perfect sync
+ wlock(Tid, Store, {Tab, ?STICK}), %% max one sticker/table
+ Ns = val({Tab, where_to_write}),
+ rpc:abcast(Ns, ?MODULE, {stick, Oid, N}).
+
+stuck_elsewhere(Tid, Store, Tab, _Key, Oid, _Lock) ->
+ rlock(Tid, Store, {Tab, ?ALL}), %% needed?
+ wlock(Tid, Store, Oid), %% perfect sync
+ wlock(Tid, Store, {Tab, ?STICK}), %% max one sticker/table
+ Ns = val({Tab, where_to_write}),
+ rpc:abcast(Ns, ?MODULE, {unstick, Tab}).
+
+dirty_sticky_lock(Tab, Key, Nodes, Lock) ->
+ if
+ Lock == read_write ->
+ mnesia_lib:db_get(Tab, Key);
+ Key == ?ALL ->
+ Nodes;
+ Tab == ?GLOBAL ->
+ Nodes;
+ true ->
+ ok
+ end.
+
+sticky_wlock_table(Tid, Store, Tab) ->
+ sticky_lock(Tid, Store, {Tab, ?ALL}, write).
+
+%% aquire a wlock on Oid
+%% We store a {Tabname, write, Tid} in all locktables
+%% on all nodes containing a copy of Tabname
+%% We also store an item {{locks, Tab, Key}, write} in the
+%% local store when we have aquired the lock.
+%%
+wlock(Tid, Store, Oid) ->
+ {Tab, Key} = Oid,
+ case need_lock(Store, Tab, Key, write) of
+ yes ->
+ Ns = w_nodes(Tab),
+ Op = {self(), {write, Tid, Oid}},
+ ?ets_insert(Store, {{locks, Tab, Key}, write}),
+ get_wlocks_on_nodes(Ns, Ns, Store, Op, Oid);
+ no when Key /= ?ALL, Tab /= ?GLOBAL ->
+ [];
+ no ->
+ w_nodes(Tab)
+ end.
+
+wlock_table(Tid, Store, Tab) ->
+ wlock(Tid, Store, {Tab, ?ALL}).
+
+%% Write lock even if the table does not exist
+
+wlock_no_exist(Tid, Store, Tab, Ns) ->
+ Oid = {Tab, ?ALL},
+ Op = {self(), {write, Tid, Oid}},
+ get_wlocks_on_nodes(Ns, Ns, Store, Op, Oid).
+
+need_lock(Store, Tab, Key, LockPattern) ->
+ TabL = ?ets_match_object(Store, {{locks, Tab, ?ALL}, LockPattern}),
+ if
+ TabL == [] ->
+ KeyL = ?ets_match_object(Store, {{locks, Tab, Key}, LockPattern}),
+ if
+ KeyL == [] ->
+ yes;
+ true ->
+ no
+ end;
+ true ->
+ no
+ end.
+
+add_debug(Nodes) -> % Use process dictionary for debug info
+ put(mnesia_wlock_nodes, Nodes).
+
+del_debug() ->
+ erase(mnesia_wlock_nodes).
+
+%% We first send lock request to the local node if it is part of the lockers
+%% then the first sorted node then to the rest of the lockmanagers on all
+%% nodes holding a copy of the table
+
+get_wlocks_on_nodes([Node | Tail], Orig, Store, Request, Oid) ->
+ {?MODULE, Node} ! Request,
+ ?ets_insert(Store, {nodes, Node}),
+ receive_wlocks([Node], undefined, Store, Oid),
+ case node() of
+ Node -> %% Local done try one more
+ get_wlocks_on_nodes(Tail, Orig, Store, Request, Oid);
+ _ -> %% The first succeded cont with the rest
+ get_wlocks_on_nodes(Tail, Store, Request),
+ receive_wlocks(Tail, Orig, Store, Oid)
+ end;
+get_wlocks_on_nodes([], Orig, _Store, _Request, _Oid) ->
+ Orig.
+
+get_wlocks_on_nodes([Node | Tail], Store, Request) ->
+ {?MODULE, Node} ! Request,
+ ?ets_insert(Store,{nodes, Node}),
+ get_wlocks_on_nodes(Tail, Store, Request);
+get_wlocks_on_nodes([], _, _) ->
+ ok.
+
+get_rwlocks_on_nodes([ReadNode|Tail], _Res, ReadNode, Store, Tid, Oid) ->
+ Op = {self(), {read_write, Tid, Oid}},
+ {?MODULE, ReadNode} ! Op,
+ ?ets_insert(Store, {nodes, ReadNode}),
+ Res = receive_wlocks([ReadNode], undefined, Store, Oid),
+ case node() of
+ ReadNode ->
+ get_rwlocks_on_nodes(Tail, Res, ReadNode, Store, Tid, Oid);
+ _ ->
+ get_wlocks_on_nodes(Tail, Store, {self(), {write, Tid, Oid}}),
+ receive_wlocks(Tail, Res, Store, Oid)
+ end;
+get_rwlocks_on_nodes([Node | Tail], Res, ReadNode, Store, Tid, Oid) ->
+ Op = {self(), {write, Tid, Oid}},
+ {?MODULE, Node} ! Op,
+ ?ets_insert(Store, {nodes, Node}),
+ receive_wlocks([Node], undefined, Store, Oid),
+ if node() == Node ->
+ get_rwlocks_on_nodes(Tail, Res, ReadNode, Store, Tid, Oid);
+ Res == rwlock -> %% Hmm
+ Rest = lists:delete(ReadNode, Tail),
+ Op2 = {self(), {read_write, Tid, Oid}},
+ {?MODULE, ReadNode} ! Op2,
+ ?ets_insert(Store, {nodes, ReadNode}),
+ get_wlocks_on_nodes(Rest, Store, {self(), {write, Tid, Oid}}),
+ receive_wlocks([ReadNode|Rest], undefined, Store, Oid);
+ true ->
+ get_wlocks_on_nodes(Tail, Store, {self(), {write, Tid, Oid}}),
+ receive_wlocks(Tail, Res, Store, Oid)
+ end;
+get_rwlocks_on_nodes([],Res,_,_,_,_) ->
+ Res.
+
+receive_wlocks([], Res, _Store, _Oid) ->
+ del_debug(),
+ Res;
+receive_wlocks(Nodes = [This|Ns], Res, Store, Oid) ->
+ add_debug(Nodes),
+ receive
+ {?MODULE, Node, granted} ->
+ receive_wlocks(lists:delete(Node,Nodes), Res, Store, Oid);
+ {?MODULE, Node, {granted, Val}} -> %% for rwlocks
+ case opt_lookup_in_client(Val, Oid, write) of
+ C = #cyclic{} ->
+ flush_remaining(Nodes, Node, {aborted, C});
+ Val2 ->
+ receive_wlocks(lists:delete(Node,Nodes), Val2, Store, Oid)
+ end;
+ {?MODULE, Node, {not_granted, Reason}} ->
+ Reason1 = {aborted, Reason},
+ flush_remaining(Nodes,Node,Reason1);
+ {?MODULE, Node, {switch, Sticky, _Req}} -> %% for rwlocks
+ Tail = lists:delete(Node,Nodes),
+ Nonstuck = lists:delete(Sticky,Tail),
+ [?ets_insert(Store, {nodes, NSNode}) || NSNode <- Nonstuck],
+ case lists:member(Sticky,Tail) of
+ true ->
+ sticky_flush(Nonstuck,Store),
+ receive_wlocks([Sticky], Res, Store, Oid);
+ false ->
+ sticky_flush(Nonstuck,Store),
+ Res
+ end;
+ {mnesia_down, This} -> % Only look for down from Nodes in list
+ Reason1 = {aborted, {node_not_running, This}},
+ flush_remaining(Ns, This, Reason1)
+ end.
+
+sticky_flush([], _) ->
+ del_debug(),
+ ok;
+sticky_flush(Ns=[Node | Tail], Store) ->
+ add_debug(Ns),
+ receive
+ {?MODULE, Node, _} ->
+ sticky_flush(Tail, Store);
+ {mnesia_down, Node} ->
+ Reason1 = {aborted, {node_not_running, Node}},
+ flush_remaining(Tail, Node, Reason1)
+ end.
+
+flush_remaining([], _SkipNode, Res) ->
+ del_debug(),
+ exit(Res);
+flush_remaining([SkipNode | Tail ], SkipNode, Res) ->
+ flush_remaining(Tail, SkipNode, Res);
+flush_remaining(Ns=[Node | Tail], SkipNode, Res) ->
+ add_debug(Ns),
+ receive
+ {?MODULE, Node, _} ->
+ flush_remaining(Tail, SkipNode, Res);
+ {mnesia_down, Node} ->
+ flush_remaining(Tail, SkipNode, {aborted, {node_not_running, Node}})
+ end.
+
+opt_lookup_in_client(lookup_in_client, Oid, Lock) ->
+ {Tab, Key} = Oid,
+ case catch mnesia_lib:db_get(Tab, Key) of
+ {'EXIT', _} ->
+ %% Table has been deleted from this node,
+ %% restart the transaction.
+ #cyclic{op = read, lock = Lock, oid = Oid, lucky = nowhere};
+ Val ->
+ Val
+ end;
+opt_lookup_in_client(Val, _Oid, _Lock) ->
+ Val.
+
+return_granted_or_nodes({_, ?ALL} , Nodes) -> Nodes;
+return_granted_or_nodes({?GLOBAL, _}, Nodes) -> Nodes;
+return_granted_or_nodes(_ , _Nodes) -> granted.
+
+%% We store a {Tab, read, From} item in the
+%% locks table on the node where we actually do pick up the object
+%% and we also store an item {lock, Oid, read} in our local store
+%% so that we can release any locks we hold when we commit.
+%% This function not only aquires a read lock, but also reads the object
+
+%% Oid's are always {Tab, Key} tuples
+rlock(Tid, Store, Oid) ->
+ {Tab, Key} = Oid,
+ case val({Tab, where_to_read}) of
+ nowhere ->
+ mnesia:abort({no_exists, Tab});
+ Node ->
+ case need_lock(Store, Tab, Key, '_') of
+ yes ->
+ R = l_request(Node, {read, Tid, Oid}, Store),
+ rlock_get_reply(Node, Store, Oid, R);
+ no ->
+ if
+ Key == ?ALL ->
+ [Node];
+ Tab == ?GLOBAL ->
+ [Node];
+ true ->
+ dirty_rpc(Node, Tab, Key, read)
+ end
+ end
+ end.
+
+dirty_rpc(nowhere, Tab, Key, _Lock) ->
+ mnesia:abort({no_exists, {Tab, Key}});
+dirty_rpc(Node, _Tab, ?ALL, _Lock) ->
+ [Node];
+dirty_rpc(Node, ?GLOBAL, _Key, _Lock) ->
+ [Node];
+dirty_rpc(Node, Tab, Key, Lock) ->
+ Args = [Tab, Key],
+ case rpc:call(Node, mnesia_lib, db_get, Args) of
+ {badrpc, Reason} ->
+ case val({Tab, where_to_read}) of
+ Node ->
+ ErrorTag = mnesia_lib:dirty_rpc_error_tag(Reason),
+ mnesia:abort({ErrorTag, Args});
+ _NewNode ->
+ %% Table has been deleted from the node,
+ %% restart the transaction.
+ C = #cyclic{op = read, lock = Lock, oid = {Tab, Key}, lucky = nowhere},
+ exit({aborted, C})
+ end;
+ Other ->
+ Other
+ end.
+
+rlock_get_reply(Node, Store, Oid, {granted, V}) ->
+ {Tab, Key} = Oid,
+ ?ets_insert(Store, {{locks, Tab, Key}, read}),
+ ?ets_insert(Store, {nodes, Node}),
+ case opt_lookup_in_client(V, Oid, read) of
+ C = #cyclic{} ->
+ mnesia:abort(C);
+ Val ->
+ Val
+ end;
+rlock_get_reply(Node, Store, Oid, granted) ->
+ {Tab, Key} = Oid,
+ ?ets_insert(Store, {{locks, Tab, Key}, read}),
+ ?ets_insert(Store, {nodes, Node}),
+ return_granted_or_nodes(Oid, [Node]);
+rlock_get_reply(Node, Store, Tab, {granted, V, RealKeys}) ->
+ %% Kept for backwards compatibility, keep until no old nodes
+ %% are available
+ L = fun(K) -> ?ets_insert(Store, {{locks, Tab, K}, read}) end,
+ lists:foreach(L, RealKeys),
+ ?ets_insert(Store, {nodes, Node}),
+ V;
+rlock_get_reply(_Node, _Store, _Oid, {not_granted, Reason}) ->
+ exit({aborted, Reason});
+
+rlock_get_reply(_Node, Store, Oid, {switch, N2, Req}) ->
+ ?ets_insert(Store, {nodes, N2}),
+ {?MODULE, N2} ! Req,
+ rlock_get_reply(N2, Store, Oid, l_req_rec(N2, Store)).
+
+rlock_table(Tid, Store, Tab) ->
+ rlock(Tid, Store, {Tab, ?ALL}).
+
+ixrlock(Tid, Store, Tab, IxKey, Pos) ->
+ case val({Tab, where_to_read}) of
+ nowhere ->
+ mnesia:abort({no_exists, Tab});
+ Node ->
+ %%% Old code
+ %% R = l_request(Node, {ix_read, Tid, Tab, IxKey, Pos}, Store),
+ %% rlock_get_reply(Node, Store, Tab, R)
+
+ case need_lock(Store, Tab, ?ALL, read) of
+ no when Node =:= node() ->
+ ix_read_res(Tab,IxKey,Pos);
+ _ -> %% yes or need to get the result from other node
+ R = l_request(Node, {ix_read, Tid, Tab, IxKey, Pos}, Store),
+ rlock_get_reply(Node, Store, Tab, R)
+ end
+ end.
+
+%% Grabs the locks or exits
+global_lock(Tid, Store, Item, write, Ns) ->
+ Oid = {?GLOBAL, Item},
+ Op = {self(), {write, Tid, Oid}},
+ get_wlocks_on_nodes(Ns, Ns, Store, Op, Oid);
+global_lock(Tid, Store, Item, read, Ns) ->
+ Oid = {?GLOBAL, Item},
+ send_requests(Ns, {read, Tid, Oid}),
+ rec_requests(Ns, Oid, Store),
+ Ns.
+
+send_requests([Node | Nodes], X) ->
+ {?MODULE, Node} ! {self(), X},
+ send_requests(Nodes, X);
+send_requests([], _X) ->
+ ok.
+
+rec_requests([Node | Nodes], Oid, Store) ->
+ Res = l_req_rec(Node, Store),
+ case catch rlock_get_reply(Node, Store, Oid, Res) of
+ {'EXIT', Reason} ->
+ flush_remaining(Nodes, Node, Reason);
+ _ ->
+ rec_requests(Nodes, Oid, Store)
+ end;
+rec_requests([], _Oid, _Store) ->
+ ok.
+
+get_held_locks() ->
+ ?MODULE ! {get_table, self(), mnesia_held_locks},
+ receive {mnesia_held_locks, Locks} -> Locks end.
+
+get_lock_queue() ->
+ ?MODULE ! {get_table, self(), mnesia_lock_queue},
+ Q = receive {mnesia_lock_queue, Locks} -> Locks end,
+ [{Oid, Op, Pid, Tid, WFT} || {queue, Oid, Tid, Op, Pid, WFT} <- Q].
+
+do_stop() ->
+ exit(shutdown).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% System upgrade
+
+system_continue(_Parent, _Debug, State) ->
+ loop(State).
+
+system_terminate(_Reason, _Parent, _Debug, _State) ->
+ do_stop().
+
+system_code_change(State, _Module, _OldVsn, _Extra) ->
+ {ok, State}.
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% AXD301 patch sort pids according to R9B sort order
+%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+%% Om R9B == true, g�rs j�mf�relsen som i R9B plain.
+%% Om R9B == false, g�rs j�mf�relsen som i alla andra releaser.
+%% cmp_tid(T1, T2) returnerar -1 om T1 < T2, 0 om T1 = T2 och 1 om T1 > T2.
+
+-define(VERSION_MAGIC, 131).
+-define(ATOM_EXT, 100).
+-define(PID_EXT, 103).
+
+-record(pid_info, {serial, number, nodename, creation}).
+
+cmp_tid(R9B,
+ #tid{} = T,
+ #tid{} = T) when R9B == true; R9B == false ->
+ 0;
+cmp_tid(R9B,
+ #tid{counter = C, pid = Pid1},
+ #tid{counter = C, pid = Pid2}) when R9B == true; R9B == false ->
+ cmp_pid_info(R9B, pid_to_pid_info(Pid1), pid_to_pid_info(Pid2));
+cmp_tid(R9B,
+ #tid{counter = C1},
+ #tid{counter = C2}) when R9B == true; R9B == false ->
+ cmp(C1, C2).
+
+cmp_pid_info(_, #pid_info{} = PI, #pid_info{} = PI) ->
+ 0;
+cmp_pid_info(false,
+ #pid_info{serial = S, number = N, nodename = NN, creation = C1},
+ #pid_info{serial = S, number = N, nodename = NN, creation = C2}) ->
+ cmp(C1, C2);
+cmp_pid_info(false,
+ #pid_info{serial = S, number = N, nodename = NN1},
+ #pid_info{serial = S, number = N, nodename = NN2}) ->
+ cmp(NN1, NN2);
+cmp_pid_info(false,
+ #pid_info{serial = S, number = N1},
+ #pid_info{serial = S, number = N2}) ->
+ cmp(N1, N2);
+cmp_pid_info(false, #pid_info{serial = S1}, #pid_info{serial = S2}) ->
+ cmp(S1, S2);
+cmp_pid_info(true,
+ #pid_info{nodename = NN, creation = C, serial = S, number = N1},
+ #pid_info{nodename = NN, creation = C, serial = S, number = N2}) ->
+ cmp(N1, N2);
+cmp_pid_info(true,
+ #pid_info{nodename = NN, creation = C, serial = S1},
+ #pid_info{nodename = NN, creation = C, serial = S2}) ->
+ cmp(S1, S2);
+cmp_pid_info(true,
+ #pid_info{nodename = NN, creation = C1},
+ #pid_info{nodename = NN, creation = C2}) ->
+ cmp(C1, C2);
+cmp_pid_info(true, #pid_info{nodename = NN1}, #pid_info{nodename = NN2}) ->
+ cmp(NN1, NN2).
+
+cmp(X, X) -> 0;
+cmp(X1, X2) when X1 < X2 -> -1;
+cmp(_X1, _X2) -> 1.
+
+pid_to_pid_info(Pid) when is_pid(Pid) ->
+ [?VERSION_MAGIC, ?PID_EXT, ?ATOM_EXT, NNL1, NNL0 | Rest]
+ = binary_to_list(term_to_binary(Pid)),
+ [N3, N2, N1, N0, S3, S2, S1, S0, Creation] = drop(bytes2int(NNL1, NNL0),
+ Rest),
+ #pid_info{serial = bytes2int(S3, S2, S1, S0),
+ number = bytes2int(N3, N2, N1, N0),
+ nodename = node(Pid),
+ creation = Creation}.
+
+drop(0, L) -> L;
+drop(N, [_|L]) when is_integer(N), N > 0 -> drop(N-1, L);
+drop(N, []) when is_integer(N), N > 0 -> [].
+
+bytes2int(N1, N0) when 0 =< N1, N1 =< 255,
+ 0 =< N0, N0 =< 255 ->
+ (N1 bsl 8) bor N0.
+bytes2int(N3, N2, N1, N0) when 0 =< N3, N3 =< 255,
+ 0 =< N2, N2 =< 255,
+ 0 =< N1, N1 =< 255,
+ 0 =< N0, N0 =< 255 ->
+ (N3 bsl 24) bor (N2 bsl 16) bor (N1 bsl 8) bor N0.
+
diff --git a/lib/mnesia/src/mnesia_log.erl b/lib/mnesia/src/mnesia_log.erl
new file mode 100644
index 0000000000..00ec4740ee
--- /dev/null
+++ b/lib/mnesia/src/mnesia_log.erl
@@ -0,0 +1,1025 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 1996-2009. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%
+
+%%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%
+%% This module administers three kinds of log files:
+%%
+%% 1 The transaction log
+%% mnesia_tm appends to the log (via mnesia_log) at the
+%% end of each transaction (or dirty write) and
+%% mnesia_dumper reads the log and performs the ops in
+%% the dat files. The dump_log is done at startup and
+%% at intervals controlled by the user.
+%%
+%% 2 The mnesia_down log
+%% mnesia_tm appends to the log (via mnesia_log) when it
+%% realizes that mnesia goes up or down on another node.
+%% mnesia_init reads the log (via mnesia_log) at startup.
+%%
+%% 3 The backup log
+%% mnesia_schema produces one tiny log when the schema is
+%% initially created. mnesia_schema also reads the log
+%% when the user wants tables (possibly incl the schema)
+%% to be restored. mnesia_log appends to the log when the
+%% user wants to produce a real backup.
+%%
+%% The actual access to the backup media is performed via the
+%% mnesia_backup module for both read and write. mnesia_backup
+%% uses the disk_log (*), BUT the user may write an own module
+%% with the same interface as mnesia_backup and configure
+%% Mnesia so the alternate module performs the actual accesses
+%% to the backup media. This means that the user may put the
+%% backup on medias that Mnesia does not know about possibly on
+%% hosts where Erlang is not running.
+%%
+%% All these logs have to some extent a common structure.
+%% They are all using the disk_log module (*) for the basic
+%% file structure. The disk_log has a repair feature that
+%% can be used to skip erroneous log records if one comes to
+%% the conclusion that it is more important to reuse some
+%% of the log records than the risque of obtaining inconsistent
+%% data. If the data becomes inconsistent it is solely up to the
+%% application to make it consistent again. The automatic
+%% reparation of the disk_log is very powerful, but use it
+%% with extreme care.
+%%
+%% First in all Mnesia's log file is a mnesia log header.
+%% It contains a list with a log_header record as single
+%% element. The structure of the log_header may never be
+%% changed since it may be written to very old backup files.
+%% By holding this record definition stable we can be
+%% able to comprahend backups from timepoint 0. It also
+%% allows us to use the backup format as an interchange
+%% format between Mnesia releases.
+%%
+%% An op-list is a list of tuples with arity 3. Each tuple
+%% has this structure: {Oid, Recs, Op} where Oid is the tuple
+%% {Tab, Key}, Recs is a (possibly empty) list of records and
+%% Op is an atom.
+%%
+%% The log file structure for the transaction log is as follows.
+%%
+%% After the mnesia log section follows an extended record section
+%% containing op-lists. There are several values that Op may
+%% have, such as write, delete, update_counter, delete_object,
+%% and replace. There is no special end of section marker.
+%%
+%% +-----------------+
+%% | mnesia log head |
+%% +-----------------+
+%% | extended record |
+%% | section |
+%% +-----------------+
+%%
+%% The log file structure for the mnesia_down log is as follows.
+%%
+%% After the mnesia log section follows a mnesia_down section
+%% containg lists with yoyo records as single element.
+%%
+%% +-----------------+
+%% | mnesia log head |
+%% +-----------------+
+%% | mnesia_down |
+%% | section |
+%% +-----------------+
+%%
+%% The log file structure for the backup log is as follows.
+%%
+%% After the mnesia log section follows a schema section
+%% containing record lists. A record list is a list of tuples
+%% where {schema, Tab} is interpreted as a delete_table(Tab) and
+%% {schema, Tab, CreateList} are interpreted as create_table.
+%%
+%% The record section also contains record lists. In this section
+%% {Tab, Key} is interpreted as delete({Tab, Key}) and other tuples
+%% as write(Tuple). There is no special end of section marker.
+%%
+%% +-----------------+
+%% | mnesia log head |
+%% +-----------------+
+%% | schema section |
+%% +-----------------+
+%% | record section |
+%% +-----------------+
+%%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+-module(mnesia_log).
+
+-export([
+ append/2,
+ backup/1,
+ backup/2,
+ backup_checkpoint/2,
+ backup_checkpoint/3,
+ backup_log_header/0,
+ backup_master/2,
+ chunk_decision_log/1,
+ chunk_decision_tab/1,
+ chunk_log/1,
+ chunk_log/2,
+ close_decision_log/0,
+ close_decision_tab/0,
+ close_log/1,
+ unsafe_close_log/1,
+ confirm_log_dump/1,
+ confirm_decision_log_dump/0,
+ previous_log_file/0,
+ previous_decision_log_file/0,
+ latest_log_file/0,
+ decision_log_version/0,
+ decision_log_file/0,
+ decision_tab_file/0,
+ decision_tab_version/0,
+ dcl_version/0,
+ dcd_version/0,
+ ets2dcd/1,
+ ets2dcd/2,
+ dcd2ets/1,
+ dcd2ets/2,
+ init/0,
+ init_log_dump/0,
+ log/1,
+ slog/1,
+ log_decision/1,
+ log_files/0,
+ open_decision_log/0,
+ trans_log_header/0,
+ open_decision_tab/0,
+ dcl_log_header/0,
+ dcd_log_header/0,
+ open_log/4,
+ open_log/6,
+ prepare_decision_log_dump/0,
+ prepare_log_dump/1,
+ save_decision_tab/1,
+ purge_all_logs/0,
+ purge_some_logs/0,
+ stop/0,
+ tab_copier/3,
+ version/0,
+ view/0,
+ view/1,
+ write_trans_log_header/0
+ ]).
+
+
+-include("mnesia.hrl").
+-import(mnesia_lib, [val/1, dir/1]).
+-import(mnesia_lib, [exists/1, fatal/2, error/2, dbg_out/2]).
+
+trans_log_header() -> log_header(trans_log, version()).
+backup_log_header() -> log_header(backup_log, "1.2").
+decision_log_header() -> log_header(decision_log, decision_log_version()).
+decision_tab_header() -> log_header(decision_tab, decision_tab_version()).
+dcl_log_header() -> log_header(dcl_log, dcl_version()).
+dcd_log_header() -> log_header(dcd_log, dcd_version()).
+
+log_header(Kind, Version) ->
+ #log_header{log_version=Version,
+ log_kind=Kind,
+ mnesia_version=mnesia:system_info(version),
+ node=node(),
+ now=now()}.
+
+version() -> "4.3".
+
+decision_log_version() -> "3.0".
+
+decision_tab_version() -> "1.0".
+
+dcl_version() -> "1.0".
+dcd_version() -> "1.0".
+
+append(Log, Bin) when is_binary(Bin) ->
+ disk_log:balog(Log, Bin);
+append(Log, Term) ->
+ disk_log:alog(Log, Term).
+
+%% Synced append
+sappend(Log, Bin) when is_binary(Bin) ->
+ ok = disk_log:blog(Log, Bin);
+sappend(Log, Term) ->
+ ok = disk_log:log(Log, Term).
+
+%% Write commit records to the latest_log
+log(C) when C#commit.disc_copies == [],
+ C#commit.disc_only_copies == [],
+ C#commit.schema_ops == [] ->
+ ignore;
+log(C) ->
+ case mnesia_monitor:use_dir() of
+ true ->
+ if
+ is_record(C, commit) ->
+ C2 = C#commit{ram_copies = [], snmp = []},
+ append(latest_log, C2);
+ true ->
+ %% Either a commit record as binary
+ %% or some decision related info
+ append(latest_log, C)
+ end,
+ mnesia_dumper:incr_log_writes();
+ false ->
+ ignore
+ end.
+
+%% Synced
+
+slog(C) when C#commit.disc_copies == [],
+ C#commit.disc_only_copies == [],
+ C#commit.schema_ops == [] ->
+ ignore;
+slog(C) ->
+ case mnesia_monitor:use_dir() of
+ true ->
+ if
+ is_record(C, commit) ->
+ C2 = C#commit{ram_copies = [], snmp = []},
+ sappend(latest_log, C2);
+ true ->
+ %% Either a commit record as binary
+ %% or some decision related info
+ sappend(latest_log, C)
+ end,
+ mnesia_dumper:incr_log_writes();
+ false ->
+ ignore
+ end.
+
+
+%% Stuff related to the file LOG
+
+%% Returns a list of logfiles. The oldest is first.
+log_files() -> [previous_log_file(),
+ latest_log_file(),
+ decision_tab_file()
+ ].
+
+latest_log_file() -> dir(latest_log_name()).
+
+previous_log_file() -> dir("PREVIOUS.LOG").
+
+decision_log_file() -> dir(decision_log_name()).
+
+decision_tab_file() -> dir(decision_tab_name()).
+
+previous_decision_log_file() -> dir("PDECISION.LOG").
+
+latest_log_name() -> "LATEST.LOG".
+
+decision_log_name() -> "DECISION.LOG".
+
+decision_tab_name() -> "DECISION_TAB.LOG".
+
+init() ->
+ case mnesia_monitor:use_dir() of
+ true ->
+ Prev = previous_log_file(),
+ verify_no_exists(Prev),
+
+ Latest = latest_log_file(),
+ verify_no_exists(Latest),
+
+ Header = trans_log_header(),
+ open_log(latest_log, Header, Latest);
+ false ->
+ ok
+ end.
+
+verify_no_exists(Fname) ->
+ case exists(Fname) of
+ false ->
+ ok;
+ true ->
+ fatal("Log file exists: ~p~n", [Fname])
+ end.
+
+open_log(Name, Header, Fname) ->
+ Exists = exists(Fname),
+ open_log(Name, Header, Fname, Exists).
+
+open_log(Name, Header, Fname, Exists) ->
+ Repair = mnesia_monitor:get_env(auto_repair),
+ open_log(Name, Header, Fname, Exists, Repair).
+
+open_log(Name, Header, Fname, Exists, Repair) ->
+ case Name == previous_log of
+ true ->
+ open_log(Name, Header, Fname, Exists, Repair, read_only);
+ false ->
+ open_log(Name, Header, Fname, Exists, Repair, read_write)
+ end.
+
+open_log(Name, Header, Fname, Exists, Repair, Mode) ->
+ Args = [{file, Fname}, {name, Name}, {repair, Repair}, {mode, Mode}],
+%% io:format("~p:open_log: ~p ~p~n", [?MODULE, Name, Fname]),
+ case mnesia_monitor:open_log(Args) of
+ {ok, Log} when Exists == true ->
+ Log;
+ {ok, Log} ->
+ write_header(Log, Header),
+ Log;
+ {repaired, Log, _, {badbytes, 0}} when Exists == true ->
+ Log;
+ {repaired, Log, _, {badbytes, 0}} ->
+ write_header(Log, Header),
+ Log;
+ {repaired, Log, _Recover, BadBytes} ->
+ mnesia_lib:important("Data may be missing, log ~p repaired: Lost ~p bytes~n",
+ [Fname, BadBytes]),
+ Log;
+ {error, Reason} when Repair == true ->
+ file:delete(Fname),
+ mnesia_lib:important("Data may be missing, Corrupt logfile deleted: ~p, ~p ~n",
+ [Fname, Reason]),
+ %% Create a new
+ open_log(Name, Header, Fname, false, false, read_write);
+ {error, Reason} ->
+ fatal("Cannot open log file ~p: ~p~n", [Fname, Reason])
+ end.
+
+write_header(Log, Header) ->
+ append(Log, Header).
+
+write_trans_log_header() ->
+ write_header(latest_log, trans_log_header()).
+
+stop() ->
+ case mnesia_monitor:use_dir() of
+ true ->
+ close_log(latest_log);
+ false ->
+ ok
+ end.
+
+close_log(Log) ->
+%% io:format("mnesia_log:close_log ~p~n", [Log]),
+%% io:format("mnesia_log:close_log ~p~n", [Log]),
+ case disk_log:sync(Log) of
+ ok -> ok;
+ {error, {read_only_mode, Log}} ->
+ ok;
+ {error, Reason} ->
+ mnesia_lib:important("Failed syncing ~p to_disk reason ~p ~n",
+ [Log, Reason])
+ end,
+ mnesia_monitor:close_log(Log).
+
+unsafe_close_log(Log) ->
+%% io:format("mnesia_log:close_log ~p~n", [Log]),
+ mnesia_monitor:unsafe_close_log(Log).
+
+
+purge_some_logs() ->
+ mnesia_monitor:unsafe_close_log(latest_log),
+ file:delete(latest_log_file()),
+ file:delete(decision_tab_file()).
+
+purge_all_logs() ->
+ file:delete(previous_log_file()),
+ file:delete(latest_log_file()),
+ file:delete(decision_tab_file()).
+
+%% Prepare dump by renaming the open logfile if possible
+%% Returns a tuple on the following format: {Res, OpenLog}
+%% where OpenLog is the file descriptor to log file, ready for append
+%% and Res is one of the following: already_dumped, needs_dump or {error, Reason}
+prepare_log_dump(InitBy) ->
+ Diff = mnesia_dumper:get_log_writes() -
+ mnesia_lib:read_counter(trans_log_writes_prev),
+ if
+ Diff == 0, InitBy /= startup ->
+ already_dumped;
+ true ->
+ case mnesia_monitor:use_dir() of
+ true ->
+ Prev = previous_log_file(),
+ prepare_prev(Diff, InitBy, Prev, exists(Prev));
+ false ->
+ already_dumped
+ end
+ end.
+
+prepare_prev(Diff, _, _, true) ->
+ {needs_dump, Diff};
+prepare_prev(Diff, startup, Prev, false) ->
+ Latest = latest_log_file(),
+ case exists(Latest) of
+ true ->
+ case file:rename(Latest, Prev) of
+ ok ->
+ {needs_dump, Diff};
+ {error, Reason} ->
+ {error, Reason}
+ end;
+ false ->
+ already_dumped
+ end;
+prepare_prev(Diff, _InitBy, Prev, false) ->
+ Head = trans_log_header(),
+ case mnesia_monitor:reopen_log(latest_log, Prev, Head) of
+ ok ->
+ {needs_dump, Diff};
+ {error, Reason} ->
+ Latest = latest_log_file(),
+ {error, {"Cannot rename log file",
+ [Latest, Prev, Reason]}}
+ end.
+
+%% Init dump and return PrevLogFileDesc or exit.
+init_log_dump() ->
+ Fname = previous_log_file(),
+ open_log(previous_log, trans_log_header(), Fname),
+ start.
+
+
+chunk_log(Cont) ->
+ chunk_log(previous_log, Cont).
+
+chunk_log(_Log, eof) ->
+ eof;
+chunk_log(Log, Cont) ->
+ case catch disk_log:chunk(Log, Cont) of
+ {error, Reason} ->
+ fatal("Possibly truncated ~p file: ~p~n",
+ [Log, Reason]);
+ {C2, Chunk, _BadBytes} ->
+ %% Read_only case, should we warn about the bad log file?
+ %% BUGBUG Should we crash if Repair == false ??
+ %% We got to check this !!
+ mnesia_lib:important("~p repaired, lost ~p bad bytes~n", [Log, _BadBytes]),
+ {C2, Chunk};
+ Other ->
+ Other
+ end.
+
+%% Confirms the dump by closing prev log and delete the file
+confirm_log_dump(Updates) ->
+ case mnesia_monitor:close_log(previous_log) of
+ ok ->
+ file:delete(previous_log_file()),
+ mnesia_lib:incr_counter(trans_log_writes_prev, Updates),
+ dumped;
+ {error, Reason} ->
+ {error, Reason}
+ end.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Decision log
+
+open_decision_log() ->
+ Latest = decision_log_file(),
+ open_log(decision_log, decision_log_header(), Latest),
+ start.
+
+prepare_decision_log_dump() ->
+ Prev = previous_decision_log_file(),
+ prepare_decision_log_dump(exists(Prev), Prev).
+
+prepare_decision_log_dump(false, Prev) ->
+ Head = decision_log_header(),
+ case mnesia_monitor:reopen_log(decision_log, Prev, Head) of
+ ok ->
+ prepare_decision_log_dump(true, Prev);
+ {error, Reason} ->
+ fatal("Cannot rename decision log file ~p -> ~p: ~p~n",
+ [decision_log_file(), Prev, Reason])
+ end;
+prepare_decision_log_dump(true, Prev) ->
+ open_log(previous_decision_log, decision_log_header(), Prev),
+ start.
+
+chunk_decision_log(Cont) ->
+ %% dbg_out("chunk log ~p~n", [Cont]),
+ chunk_log(previous_decision_log, Cont).
+
+%% Confirms dump of the decision log
+confirm_decision_log_dump() ->
+ case mnesia_monitor:close_log(previous_decision_log) of
+ ok ->
+ file:delete(previous_decision_log_file());
+ {error, Reason} ->
+ fatal("Cannot confirm decision log dump: ~p~n",
+ [Reason])
+ end.
+
+save_decision_tab(Decisions) ->
+ Log = decision_tab,
+ Tmp = mnesia_lib:dir("DECISION_TAB.TMP"),
+ file:delete(Tmp),
+ open_log(Log, decision_tab_header(), Tmp),
+ append(Log, Decisions),
+ close_log(Log),
+ TabFile = decision_tab_file(),
+ ok = file:rename(Tmp, TabFile).
+
+open_decision_tab() ->
+ TabFile = decision_tab_file(),
+ open_log(decision_tab, decision_tab_header(), TabFile),
+ start.
+
+close_decision_tab() ->
+ close_log(decision_tab).
+
+chunk_decision_tab(Cont) ->
+ %% dbg_out("chunk tab ~p~n", [Cont]),
+ chunk_log(decision_tab, Cont).
+
+close_decision_log() ->
+ close_log(decision_log).
+
+log_decision(Decision) ->
+ append(decision_log, Decision).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Debug functions
+
+view() ->
+ lists:foreach(fun(F) -> view(F) end, log_files()).
+
+view(File) ->
+ mnesia_lib:show("***** ~p ***** ~n", [File]),
+ case exists(File) of
+ false ->
+ nolog;
+ true ->
+ N = view_only,
+ Args = [{file, File}, {name, N}, {mode, read_only}],
+ case disk_log:open(Args) of
+ {ok, N} ->
+ view_file(start, N);
+ {repaired, _, _, _} ->
+ view_file(start, N);
+ {error, Reason} ->
+ error("Cannot open log ~p: ~p~n", [File, Reason])
+ end
+ end.
+
+view_file(C, Log) ->
+ case disk_log:chunk(Log, C) of
+ {error, Reason} ->
+ error("** Possibly truncated FILE ~p~n", [Reason]),
+ error;
+ eof ->
+ disk_log:close(Log),
+ eof;
+ {C2, Terms, _BadBytes} ->
+ dbg_out("Lost ~p bytes in ~p ~n", [_BadBytes, Log]),
+ lists:foreach(fun(X) -> mnesia_lib:show("~p~n", [X]) end,
+ Terms),
+ view_file(C2, Log);
+ {C2, Terms} ->
+ lists:foreach(fun(X) -> mnesia_lib:show("~p~n", [X]) end,
+ Terms),
+ view_file(C2, Log)
+ end.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Backup
+
+-record(backup_args, {name, module, opaque, scope, prev_name, tables, cookie}).
+
+backup(Opaque) ->
+ backup(Opaque, []).
+
+backup(Opaque, Mod) when is_atom(Mod) ->
+ backup(Opaque, [{module, Mod}]);
+backup(Opaque, Args) when is_list(Args) ->
+ %% Backup all tables with max redundancy
+ CpArgs = [{ram_overrides_dump, false}, {max, val({schema, tables})}],
+ case mnesia_checkpoint:activate(CpArgs) of
+ {ok, Name, _Nodes} ->
+ Res = backup_checkpoint(Name, Opaque, Args),
+ mnesia_checkpoint:deactivate(Name),
+ Res;
+ {error, Reason} ->
+ {error, Reason}
+ end.
+
+backup_checkpoint(Name, Opaque) ->
+ backup_checkpoint(Name, Opaque, []).
+
+backup_checkpoint(Name, Opaque, Mod) when is_atom(Mod) ->
+ backup_checkpoint(Name, Opaque, [{module, Mod}]);
+backup_checkpoint(Name, Opaque, Args) when is_list(Args) ->
+ DefaultMod = mnesia_monitor:get_env(backup_module),
+ B = #backup_args{name = Name,
+ module = DefaultMod,
+ opaque = Opaque,
+ scope = global,
+ tables = all,
+ prev_name = Name},
+ case check_backup_args(Args, B) of
+ {ok, B2} ->
+ %% Decentralized backup
+ %% Incremental
+
+ Self = self(),
+ Pid = spawn_link(?MODULE, backup_master, [Self, B2]),
+ receive
+ {Pid, Self, Res} -> Res
+ end;
+ {error, Reason} ->
+ {error, Reason}
+ end.
+
+check_backup_args([Arg | Tail], B) ->
+ case catch check_backup_arg_type(Arg, B) of
+ {'EXIT', _Reason} ->
+ {error, {badarg, Arg}};
+ B2 ->
+ check_backup_args(Tail, B2)
+ end;
+
+check_backup_args([], B) ->
+ {ok, B}.
+
+check_backup_arg_type(Arg, B) ->
+ case Arg of
+ {scope, global} ->
+ B#backup_args{scope = global};
+ {scope, local} ->
+ B#backup_args{scope = local};
+ {module, Mod} ->
+ Mod2 = mnesia_monitor:do_check_type(backup_module, Mod),
+ B#backup_args{module = Mod2};
+ {incremental, Name} ->
+ B#backup_args{prev_name = Name};
+ {tables, Tabs} when is_list(Tabs) ->
+ B#backup_args{tables = Tabs}
+ end.
+
+backup_master(ClientPid, B) ->
+ process_flag(trap_exit, true),
+ case catch do_backup_master(B) of
+ {'EXIT', Reason} ->
+ ClientPid ! {self(), ClientPid, {error, {'EXIT', Reason}}};
+ Res ->
+ ClientPid ! {self(), ClientPid, Res}
+ end,
+ unlink(ClientPid),
+ exit(normal).
+
+do_backup_master(B) ->
+ Name = B#backup_args.name,
+ B2 = safe_apply(B, open_write, [B#backup_args.opaque]),
+ B3 = safe_write(B2, [backup_log_header()]),
+ case mnesia_checkpoint:tables_and_cookie(Name) of
+ {ok, AllTabs, Cookie} ->
+ Tabs = select_tables(AllTabs, B3),
+ B4 = B3#backup_args{cookie = Cookie},
+ %% Always put schema first in backup file
+ B5 = backup_schema(B4, Tabs),
+ B6 = lists:foldl(fun backup_tab/2, B5, Tabs -- [schema]),
+ safe_apply(B6, commit_write, [B6#backup_args.opaque]),
+ ok;
+ {error, Reason} ->
+ abort_write(B3, {?MODULE, backup_master}, [B], {error, Reason})
+ end.
+
+select_tables(AllTabs, B) ->
+ Tabs =
+ case B#backup_args.tables of
+ all -> AllTabs;
+ SomeTabs when is_list(SomeTabs) -> SomeTabs
+ end,
+ case B#backup_args.scope of
+ global ->
+ Tabs;
+ local ->
+ Name = B#backup_args.name,
+ [T || T <- Tabs, mnesia_checkpoint:most_local_node(Name, T) == {ok, node()}]
+ end.
+
+safe_write(B, []) ->
+ B;
+safe_write(B, Recs) ->
+ safe_apply(B, write, [B#backup_args.opaque, Recs]).
+
+backup_schema(B, Tabs) ->
+ case lists:member(schema, Tabs) of
+ true ->
+ backup_tab(schema, B);
+ false ->
+ Defs = [{schema, T, mnesia_schema:get_create_list(T)} || T <- Tabs],
+ safe_write(B, Defs)
+ end.
+
+safe_apply(B, write, [_, Items]) when Items == [] ->
+ B;
+safe_apply(B, What, Args) ->
+ Abort = fun(R) -> abort_write(B, What, Args, R) end,
+ receive
+ {'EXIT', Pid, R} -> Abort({'EXIT', Pid, R})
+ after 0 ->
+ Mod = B#backup_args.module,
+ case catch apply(Mod, What, Args) of
+ {ok, Opaque} -> B#backup_args{opaque=Opaque};
+ {error, R} -> Abort(R);
+ R -> Abort(R)
+ end
+ end.
+
+abort_write(B, What, Args, Reason) ->
+ Mod = B#backup_args.module,
+ Opaque = B#backup_args.opaque,
+ dbg_out("Failed to perform backup. M=~p:F=~p:A=~p -> ~p~n",
+ [Mod, What, Args, Reason]),
+ case catch apply(Mod, abort_write, [Opaque]) of
+ {ok, _Res} ->
+ throw({error, Reason});
+ Other ->
+ error("Failed to abort backup. ~p:~p~p -> ~p~n",
+ [Mod, abort_write, [Opaque], Other]),
+ throw({error, Reason})
+ end.
+
+backup_tab(Tab, B) ->
+ Name = B#backup_args.name,
+ case mnesia_checkpoint:most_local_node(Name, Tab) of
+ {ok, Node} when Node == node() ->
+ tab_copier(self(), B, Tab);
+ {ok, Node} ->
+ RemoteB = B,
+ Pid = spawn_link(Node, ?MODULE, tab_copier, [self(), RemoteB, Tab]),
+ RecName = val({Tab, record_name}),
+ tab_receiver(Pid, B, Tab, RecName, 0);
+ {error, Reason} ->
+ abort_write(B, {?MODULE, backup_tab}, [Tab, B], {error, Reason})
+ end.
+
+tab_copier(Pid, B, Tab) when is_record(B, backup_args) ->
+ %% Intentional crash at exit
+ Name = B#backup_args.name,
+ PrevName = B#backup_args.prev_name,
+ {FirstName, FirstSource} = select_source(Tab, Name, PrevName),
+
+ ?eval_debug_fun({?MODULE, tab_copier, pre}, [{name, Name}, {tab, Tab}]),
+ Res = handle_more(Pid, B, Tab, FirstName, FirstSource, Name),
+ ?eval_debug_fun({?MODULE, tab_copier, post}, [{name, Name}, {tab, Tab}]),
+
+ handle_last(Pid, Res).
+
+select_source(Tab, Name, PrevName) ->
+ if
+ Tab == schema ->
+ %% Always full backup of schema
+ {Name, table};
+ Name == PrevName ->
+ %% Full backup
+ {Name, table};
+ true ->
+ %% Wants incremental backup
+ case mnesia_checkpoint:most_local_node(PrevName, Tab) of
+ {ok, Node} when Node == node() ->
+ %% Accept incremental backup
+ {PrevName, retainer};
+ _ ->
+ %% Do a full backup anyway
+ dbg_out("Incremental backup escalated to full backup: ~p~n", [Tab]),
+ {Name, table}
+ end
+ end.
+
+handle_more(Pid, B, Tab, FirstName, FirstSource, Name) ->
+ Acc = {0, B},
+ case {mnesia_checkpoint:really_retain(Name, Tab),
+ mnesia_checkpoint:really_retain(FirstName, Tab)} of
+ {true, true} ->
+ Acc2 = iterate(B, FirstName, Tab, Pid, FirstSource, latest, first, Acc),
+ iterate(B, Name, Tab, Pid, retainer, checkpoint, last, Acc2);
+ {false, false}->
+ %% Put the dumped file in the backup
+ %% instead of the ram table. Does
+ %% only apply to ram_copies.
+ iterate(B, Name, Tab, Pid, retainer, checkpoint, last, Acc);
+ Bad ->
+ Reason = {"Checkpoints for incremental backup must have same "
+ "setting of ram_overrides_dump",
+ Tab, Name, FirstName, Bad},
+ abort_write(B, {?MODULE, backup_tab}, [Tab, B], {error, Reason})
+ end.
+
+handle_last(Pid, {_Count, B}) when Pid == self() ->
+ B;
+handle_last(Pid, _Acc) ->
+ unlink(Pid),
+ Pid ! {self(), {last, {ok, dummy}}},
+ exit(normal).
+
+iterate(B, Name, Tab, Pid, Source, Age, Pass, Acc) ->
+ Fun =
+ if
+ Pid == self() ->
+ RecName = val({Tab, record_name}),
+ fun(Recs, A) -> copy_records(RecName, Tab, Recs, A) end;
+ true ->
+ fun(Recs, A) -> send_records(Pid, Tab, Recs, Pass, A) end
+ end,
+ case mnesia_checkpoint:iterate(Name, Tab, Fun, Acc, Source, Age) of
+ {ok, Acc2} ->
+ Acc2;
+ {error, Reason} ->
+ R = {error, {"Tab copier iteration failed", Reason}},
+ abort_write(B, {?MODULE, iterate}, [self(), B, Tab], R)
+ end.
+
+copy_records(_RecName, _Tab, [], Acc) ->
+ Acc;
+copy_records(RecName, Tab, Recs, {Count, B}) ->
+ Recs2 = rec_filter(B, Tab, RecName, Recs),
+ B2 = safe_write(B, Recs2),
+ {Count + 1, B2}.
+
+send_records(Pid, Tab, Recs, Pass, {Count, B}) ->
+ receive
+ {Pid, more, Count} ->
+ if
+ Pass == last, Recs == [] ->
+ {Count, B};
+ true ->
+ Next = Count + 1,
+ Pid ! {self(), {more, Next, Recs}},
+ {Next, B}
+ end;
+ Msg ->
+ exit({send_records_unexpected_msg, Tab, Msg})
+ end.
+
+tab_receiver(Pid, B, Tab, RecName, Slot) ->
+ Pid ! {self(), more, Slot},
+ receive
+ {Pid, {more, Next, Recs}} ->
+ Recs2 = rec_filter(B, Tab, RecName, Recs),
+ B2 = safe_write(B, Recs2),
+ tab_receiver(Pid, B2, Tab, RecName, Next);
+
+ {Pid, {last, {ok,_}}} ->
+ B;
+
+ {'EXIT', Pid, {error, R}} ->
+ Reason = {error, {"Tab copier crashed", R}},
+ abort_write(B, {?MODULE, remote_tab_sender}, [self(), B, Tab], Reason);
+ {'EXIT', Pid, R} ->
+ Reason = {error, {"Tab copier crashed", {'EXIT', R}}},
+ abort_write(B, {?MODULE, remote_tab_sender}, [self(), B, Tab], Reason);
+ Msg ->
+ R = {error, {"Tab receiver got unexpected msg", Msg}},
+ abort_write(B, {?MODULE, remote_tab_sender}, [self(), B, Tab], R)
+ end.
+
+rec_filter(B, schema, _RecName, Recs) ->
+ case catch mnesia_bup:refresh_cookie(Recs, B#backup_args.cookie) of
+ Recs2 when is_list(Recs2) ->
+ Recs2;
+ {error, _Reason} ->
+ %% No schema table cookie
+ Recs
+ end;
+rec_filter(_B, Tab, Tab, Recs) ->
+ Recs;
+rec_filter(_B, Tab, _RecName, Recs) ->
+ [setelement(1, Rec, Tab) || Rec <- Recs].
+
+ets2dcd(Tab) ->
+ ets2dcd(Tab, dcd).
+
+ets2dcd(Tab, Ftype) ->
+ Fname =
+ case Ftype of
+ dcd -> mnesia_lib:tab2dcd(Tab);
+ dmp -> mnesia_lib:tab2dmp(Tab)
+ end,
+ TmpF = mnesia_lib:tab2tmp(Tab),
+ file:delete(TmpF),
+ Log = open_log({Tab, ets2dcd}, dcd_log_header(), TmpF, false),
+ mnesia_lib:db_fixtable(ram_copies, Tab, true),
+ ok = ets2dcd(mnesia_lib:db_init_chunk(ram_copies, Tab, 1000), Tab, Log),
+ mnesia_lib:db_fixtable(ram_copies, Tab, false),
+ close_log(Log),
+ ok = file:rename(TmpF, Fname),
+ %% Remove old log data which is now in the new dcd.
+ %% No one else should be accessing this file!
+ file:delete(mnesia_lib:tab2dcl(Tab)),
+ ok.
+
+ets2dcd('$end_of_table', _Tab, _Log) ->
+ ok;
+ets2dcd({Recs, Cont}, Tab, Log) ->
+ ok = disk_log:alog_terms(Log, Recs),
+ ets2dcd(mnesia_lib:db_chunk(ram_copies, Cont), Tab, Log).
+
+dcd2ets(Tab) ->
+ dcd2ets(Tab, mnesia_monitor:get_env(auto_repair)).
+
+dcd2ets(Tab, Rep) ->
+ Dcd = mnesia_lib:tab2dcd(Tab),
+ case mnesia_lib:exists(Dcd) of
+ true ->
+ Log = open_log({Tab, dcd2ets}, dcd_log_header(), Dcd,
+ true, Rep, read_only),
+ Data = chunk_log(Log, start),
+ ok = insert_dcdchunk(Data, Log, Tab),
+ close_log(Log),
+ load_dcl(Tab, Rep);
+ false -> %% Handle old dets files, and conversion from disc_only to disc.
+ Fname = mnesia_lib:tab2dat(Tab),
+ Type = val({Tab, setorbag}),
+ case mnesia_lib:dets_to_ets(Tab, Tab, Fname, Type, Rep, yes) of
+ loaded ->
+ ets2dcd(Tab),
+ file:delete(Fname),
+ 0;
+ {error, Error} ->
+ erlang:error({"Failed to load table from disc", [Tab, Error]})
+ end
+ end.
+
+insert_dcdchunk({Cont, [LogH | Rest]}, Log, Tab)
+ when is_record(LogH, log_header),
+ LogH#log_header.log_kind == dcd_log,
+ LogH#log_header.log_version >= "1.0" ->
+ insert_dcdchunk({Cont, Rest}, Log, Tab);
+
+insert_dcdchunk({Cont, Recs}, Log, Tab) ->
+ true = ets:insert(Tab, Recs),
+ insert_dcdchunk(chunk_log(Log, Cont), Log, Tab);
+insert_dcdchunk(eof, _Log, _Tab) ->
+ ok.
+
+load_dcl(Tab, Rep) ->
+ FName = mnesia_lib:tab2dcl(Tab),
+ case mnesia_lib:exists(FName) of
+ true ->
+ Name = {load_dcl,Tab},
+ open_log(Name,
+ dcl_log_header(),
+ FName,
+ true,
+ Rep,
+ read_only),
+ FirstChunk = chunk_log(Name, start),
+ N = insert_logchunk(FirstChunk, Name, 0),
+ close_log(Name),
+ N;
+ false ->
+ 0
+ end.
+
+insert_logchunk({C2, Recs}, Tab, C) ->
+ N = add_recs(Recs, C),
+ insert_logchunk(chunk_log(Tab, C2), Tab, C+N);
+insert_logchunk(eof, _Tab, C) ->
+ C.
+
+add_recs([{{Tab, _Key}, Val, write} | Rest], N) ->
+ true = ets:insert(Tab, Val),
+ add_recs(Rest, N+1);
+add_recs([{{Tab, Key}, _Val, delete} | Rest], N) ->
+ true = ets:delete(Tab, Key),
+ add_recs(Rest, N+1);
+add_recs([{{Tab, _Key}, Val, delete_object} | Rest], N) ->
+ true = ets:match_delete(Tab, Val),
+ add_recs(Rest, N+1);
+add_recs([{{Tab, Key}, Val, update_counter} | Rest], N) ->
+ {RecName, Incr} = Val,
+ case catch ets:update_counter(Tab, Key, Incr) of
+ CounterVal when is_integer(CounterVal) ->
+ ok;
+ _ when Incr < 0 ->
+ Zero = {RecName, Key, 0},
+ true = ets:insert(Tab, Zero);
+ _ ->
+ Zero = {RecName, Key, Incr},
+ true = ets:insert(Tab, Zero)
+ end,
+ add_recs(Rest, N+1);
+add_recs([LogH|Rest], N)
+ when is_record(LogH, log_header),
+ LogH#log_header.log_kind == dcl_log,
+ LogH#log_header.log_version >= "1.0" ->
+ add_recs(Rest, N);
+add_recs([{{Tab, _Key}, _Val, clear_table} | Rest], N) ->
+ true = ets:match_delete(Tab, '_'),
+ add_recs(Rest, N+ets:info(Tab, size));
+add_recs([], N) ->
+ N.
diff --git a/lib/mnesia/src/mnesia_monitor.erl b/lib/mnesia/src/mnesia_monitor.erl
new file mode 100644
index 0000000000..05ae943e3b
--- /dev/null
+++ b/lib/mnesia/src/mnesia_monitor.erl
@@ -0,0 +1,823 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 1996-2009. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%
+
+%%
+-module(mnesia_monitor).
+
+-behaviour(gen_server).
+
+%% Public exports
+-export([
+ close_dets/1,
+ close_log/1,
+ detect_inconcistency/2,
+ get_env/1,
+ init/0,
+ mktab/2,
+ unsafe_mktab/2,
+ mnesia_down/2,
+ needs_protocol_conversion/1,
+ negotiate_protocol/1,
+ disconnect/1,
+ open_dets/2,
+ unsafe_open_dets/2,
+ open_log/1,
+ patch_env/2,
+ protocol_version/0,
+ reopen_log/3,
+ set_env/2,
+ start/0,
+ start_proc/4,
+ terminate_proc/3,
+ unsafe_close_dets/1,
+ unsafe_close_log/1,
+ use_dir/0,
+ do_check_type/2
+ ]).
+
+%% gen_server callbacks
+-export([
+ init/1,
+ handle_call/3,
+ handle_cast/2,
+ handle_info/2,
+ terminate/2,
+ code_change/3
+ ]).
+
+%% Internal exports
+-export([
+ call/1,
+ cast/1,
+ detect_partitioned_network/2,
+ has_remote_mnesia_down/1,
+ negotiate_protocol_impl/2
+ ]).
+
+-import(mnesia_lib, [dbg_out/2, verbose/2, error/2, fatal/2, set/2]).
+
+-include("mnesia.hrl").
+
+-record(state, {supervisor, pending_negotiators = [],
+ going_down = [], tm_started = false, early_connects = [],
+ connecting, mq = []}).
+
+-define(current_protocol_version, {7,6}).
+
+-define(previous_protocol_version, {7,5}).
+
+start() ->
+ gen_server:start_link({local, ?MODULE}, ?MODULE,
+ [self()], [{timeout, infinity}
+ %% ,{debug, [trace]}
+ ]).
+
+init() ->
+ call(init).
+
+mnesia_down(From, Node) ->
+ cast({mnesia_down, From, Node}).
+
+mktab(Tab, Args) ->
+ unsafe_call({mktab, Tab, Args}).
+unsafe_mktab(Tab, Args) ->
+ unsafe_call({unsafe_mktab, Tab, Args}).
+
+open_dets(Tab, Args) ->
+ unsafe_call({open_dets, Tab, Args}).
+unsafe_open_dets(Tab, Args) ->
+ unsafe_call({unsafe_open_dets, Tab, Args}).
+
+close_dets(Tab) ->
+ unsafe_call({close_dets, Tab}).
+
+unsafe_close_dets(Name) ->
+ unsafe_call({unsafe_close_dets, Name}).
+
+open_log(Args) ->
+ unsafe_call({open_log, Args}).
+
+reopen_log(Name, Fname, Head) ->
+ unsafe_call({reopen_log, Name, Fname, Head}).
+
+close_log(Name) ->
+ unsafe_call({close_log, Name}).
+
+unsafe_close_log(Name) ->
+ unsafe_call({unsafe_close_log, Name}).
+
+
+disconnect(Node) ->
+ cast({disconnect, Node}).
+
+%% Returns GoodNoodes
+%% Creates a link to each compatible monitor and
+%% protocol_version to agreed version upon success
+
+negotiate_protocol([]) -> [];
+negotiate_protocol(Nodes) ->
+ call({negotiate_protocol, Nodes}).
+
+negotiate_protocol_impl(Nodes, Requester) ->
+ Version = mnesia:system_info(version),
+ Protocols = acceptable_protocol_versions(),
+ MonitorPid = whereis(?MODULE),
+ Msg = {negotiate_protocol, MonitorPid, Version, Protocols},
+ {Replies, _BadNodes} = multicall(Nodes, Msg),
+ Res = check_protocol(Replies, Protocols),
+ ?MODULE ! {protocol_negotiated,Requester,Res},
+ unlink(whereis(?MODULE)),
+ ok.
+
+check_protocol([{Node, {accept, Mon, Version, Protocol}} | Tail], Protocols) ->
+ case lists:member(Protocol, Protocols) of
+ true ->
+ case Protocol == protocol_version() of
+ true ->
+ set({protocol, Node}, {Protocol, false});
+ false ->
+ set({protocol, Node}, {Protocol, true})
+ end,
+ [node(Mon) | check_protocol(Tail, Protocols)];
+ false ->
+ verbose("Failed to connect with ~p. ~p protocols rejected. "
+ "expected version = ~p, expected protocol = ~p~n",
+ [Node, Protocols, Version, Protocol]),
+ unlink(Mon), % Get rid of unneccessary link
+ check_protocol(Tail, Protocols)
+ end;
+check_protocol([{Node, {reject, _Mon, Version, Protocol}} | Tail], Protocols) ->
+ verbose("Failed to connect with ~p. ~p protocols rejected. "
+ "expected version = ~p, expected protocol = ~p~n",
+ [Node, Protocols, Version, Protocol]),
+ check_protocol(Tail, Protocols);
+check_protocol([{error, _Reason} | Tail], Protocols) ->
+ dbg_out("~p connect failed error: ~p~n", [?MODULE, _Reason]),
+ check_protocol(Tail, Protocols);
+check_protocol([{badrpc, _Reason} | Tail], Protocols) ->
+ dbg_out("~p connect failed badrpc: ~p~n", [?MODULE, _Reason]),
+ check_protocol(Tail, Protocols);
+check_protocol([], [Protocol | _Protocols]) ->
+ set(protocol_version, Protocol),
+ [].
+
+protocol_version() ->
+ case ?catch_val(protocol_version) of
+ {'EXIT', _} -> ?current_protocol_version;
+ Version -> Version
+ end.
+
+%% A sorted list of acceptable protocols the
+%% preferred protocols are first in the list
+acceptable_protocol_versions() ->
+ [protocol_version(), ?previous_protocol_version].
+
+needs_protocol_conversion(Node) ->
+ case {?catch_val({protocol, Node}), protocol_version()} of
+ {{'EXIT', _}, _} ->
+ false;
+ {{_, Bool}, ?current_protocol_version} ->
+ Bool;
+ {{_, Bool}, _} ->
+ not Bool
+ end.
+
+cast(Msg) ->
+ case whereis(?MODULE) of
+ undefined -> ignore;
+ Pid -> gen_server:cast(Pid, Msg)
+ end.
+
+unsafe_call(Msg) ->
+ case whereis(?MODULE) of
+ undefined -> {error, {node_not_running, node()}};
+ Pid -> gen_server:call(Pid, Msg, infinity)
+ end.
+
+call(Msg) ->
+ case whereis(?MODULE) of
+ undefined ->
+ {error, {node_not_running, node()}};
+ Pid ->
+ link(Pid),
+ Res = gen_server:call(Pid, Msg, infinity),
+ unlink(Pid),
+
+ %% We get an exit signal if server dies
+ receive
+ {'EXIT', Pid, _Reason} ->
+ {error, {node_not_running, node()}}
+ after 0 ->
+ Res
+ end
+ end.
+
+multicall(Nodes, Msg) ->
+ rpc:multicall(Nodes, ?MODULE, call, [Msg]).
+
+start_proc(Who, Mod, Fun, Args) ->
+ Args2 = [Who, Mod, Fun, Args],
+ proc_lib:start_link(mnesia_sp, init_proc, Args2, infinity).
+
+terminate_proc(Who, R, State) when R /= shutdown, R /= killed ->
+ fatal("~p crashed: ~p state: ~p~n", [Who, R, State]);
+
+terminate_proc(Who, Reason, _State) ->
+ mnesia_lib:verbose("~p terminated: ~p~n", [Who, Reason]),
+ ok.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%% Callback functions from gen_server
+
+%%----------------------------------------------------------------------
+%% Func: init/1
+%% Returns: {ok, State} |
+%% {ok, State, Timeout} |
+%% {stop, Reason}
+%%----------------------------------------------------------------------
+init([Parent]) ->
+ process_flag(trap_exit, true),
+ ?ets_new_table(mnesia_gvar, [set, public, named_table]),
+ ?ets_new_table(mnesia_stats, [set, public, named_table]),
+ set(subscribers, []),
+ mnesia_lib:verbose("~p starting: ~p~n", [?MODULE, self()]),
+ Version = mnesia:system_info(version),
+ set(version, Version),
+ dbg_out("Version: ~p~n", [Version]),
+
+ case catch process_config_args(env()) of
+ ok ->
+ mnesia_lib:set({'$$$_report', current_pos}, 0),
+ Level = mnesia_lib:val(debug),
+ mnesia_lib:verbose("Mnesia debug level set to ~p\n", [Level]),
+ set(mnesia_status, starting), %% set start status
+ set({current, db_nodes}, [node()]),
+ set(use_dir, use_dir()),
+ mnesia_lib:create_counter(trans_aborts),
+ mnesia_lib:create_counter(trans_commits),
+ mnesia_lib:create_counter(trans_log_writes),
+ Left = get_env(dump_log_write_threshold),
+ mnesia_lib:set_counter(trans_log_writes_left, Left),
+ mnesia_lib:create_counter(trans_log_writes_prev),
+ mnesia_lib:create_counter(trans_restarts),
+ mnesia_lib:create_counter(trans_failures),
+ set(checkpoints, []),
+ set(pending_checkpoints, []),
+ set(pending_checkpoint_pids, []),
+
+ {ok, #state{supervisor = Parent}};
+ {'EXIT', Reason} ->
+ mnesia_lib:report_fatal("Bad configuration: ~p~n", [Reason]),
+ {stop, {bad_config, Reason}}
+ end.
+
+use_dir() ->
+ case ?catch_val(use_dir) of
+ {'EXIT', _} ->
+ case get_env(schema_location) of
+ disc -> true;
+ opt_disc -> non_empty_dir();
+ ram -> false
+ end;
+ Bool ->
+ Bool
+ end.
+
+%% Returns true if the Mnesia directory contains
+%% important files
+non_empty_dir() ->
+ mnesia_lib:exists(mnesia_bup:fallback_bup()) or
+ mnesia_lib:exists(mnesia_lib:tab2dmp(schema)) or
+ mnesia_lib:exists(mnesia_lib:tab2dat(schema)).
+
+%%----------------------------------------------------------------------
+%% Func: handle_call/3
+%% Returns: {reply, Reply, State} |
+%% {reply, Reply, State, Timeout} |
+%% {noreply, State} |
+%% {noreply, State, Timeout} |
+%% {stop, Reason, Reply, State} | (terminate/2 is called)
+%%----------------------------------------------------------------------
+
+handle_call({mktab, Tab, Args}, _From, State) ->
+ case catch ?ets_new_table(Tab, Args) of
+ {'EXIT', ExitReason} ->
+ Msg = "Cannot create ets table",
+ Reason = {system_limit, Msg, Tab, Args, ExitReason},
+ fatal("~p~n", [Reason]),
+ {noreply, State};
+ Reply ->
+ {reply, Reply, State}
+ end;
+
+handle_call({unsafe_mktab, Tab, Args}, _From, State) ->
+ case catch ?ets_new_table(Tab, Args) of
+ {'EXIT', ExitReason} ->
+ {reply, {error, ExitReason}, State};
+ Reply ->
+ {reply, Reply, State}
+ end;
+
+
+handle_call({open_dets, Tab, Args}, _From, State) ->
+ case mnesia_lib:dets_sync_open(Tab, Args) of
+ {ok, Tab} ->
+ {reply, {ok, Tab}, State};
+
+ {error, Reason} ->
+ Msg = "Cannot open dets table",
+ Error = {error, {Msg, Tab, Args, Reason}},
+ fatal("~p~n", [Error]),
+ {noreply, State}
+ end;
+
+handle_call({unsafe_open_dets, Tab, Args}, _From, State) ->
+ case mnesia_lib:dets_sync_open(Tab, Args) of
+ {ok, Tab} ->
+ {reply, {ok, Tab}, State};
+ {error, Reason} ->
+ {reply, {error,Reason}, State}
+ end;
+
+handle_call({close_dets, Tab}, _From, State) ->
+ ok = mnesia_lib:dets_sync_close(Tab),
+ {reply, ok, State};
+
+handle_call({unsafe_close_dets, Tab}, _From, State) ->
+ mnesia_lib:dets_sync_close(Tab),
+ {reply, ok, State};
+
+handle_call({open_log, Args}, _From, State) ->
+ Res = disk_log:open([{notify, true}|Args]),
+ {reply, Res, State};
+
+handle_call({reopen_log, Name, Fname, Head}, _From, State) ->
+ case disk_log:reopen(Name, Fname, Head) of
+ ok ->
+ {reply, ok, State};
+
+ {error, Reason} ->
+ Msg = "Cannot rename disk_log file",
+ Error = {error, {Msg, Name, Fname, Head, Reason}},
+ fatal("~p~n", [Error]),
+ {noreply, State}
+ end;
+
+handle_call({close_log, Name}, _From, State) ->
+ case disk_log:close(Name) of
+ ok ->
+ {reply, ok, State};
+
+ {error, Reason} ->
+ Msg = "Cannot close disk_log file",
+ Error = {error, {Msg, Name, Reason}},
+ fatal("~p~n", [Error]),
+ {noreply, State}
+ end;
+
+handle_call({unsafe_close_log, Name}, _From, State) ->
+ disk_log:close(Name),
+ {reply, ok, State};
+
+handle_call({negotiate_protocol, Mon, _Version, _Protocols}, _From, State)
+ when State#state.tm_started == false ->
+ State2 = State#state{early_connects = [node(Mon) | State#state.early_connects]},
+ {reply, {node(), {reject, self(), uninitialized, uninitialized}}, State2};
+
+%% From remote monitor..
+handle_call({negotiate_protocol, Mon, Version, Protocols}, From, State)
+ when node(Mon) /= node() ->
+ Protocol = protocol_version(),
+ MyVersion = mnesia:system_info(version),
+ case lists:member(Protocol, Protocols) of
+ true ->
+ accept_protocol(Mon, MyVersion, Protocol, From, State);
+ false ->
+ %% in this release we should be able to handle the previous
+ %% protocol
+ case hd(Protocols) of
+ ?previous_protocol_version ->
+ accept_protocol(Mon, MyVersion, ?previous_protocol_version, From, State);
+ _ ->
+ verbose("Connection with ~p rejected. "
+ "version = ~p, protocols = ~p, "
+ "expected version = ~p, expected protocol = ~p~n",
+ [node(Mon), Version, Protocols, MyVersion, Protocol]),
+ {reply, {node(), {reject, self(), MyVersion, Protocol}}, State}
+ end
+ end;
+
+%% Local request to negotiate with other monitors (nodes).
+handle_call({negotiate_protocol, Nodes}, From, State) ->
+ case mnesia_lib:intersect(State#state.going_down, Nodes) of
+ [] ->
+ spawn_link(?MODULE, negotiate_protocol_impl, [Nodes, From]),
+ {noreply, State#state{connecting={From,Nodes}}};
+ _ -> %% Cannot connect now, still processing mnesia down
+ {reply, busy, State}
+ end;
+
+handle_call(init, _From, State) ->
+ net_kernel:monitor_nodes(true),
+ EarlyNodes = State#state.early_connects,
+ State2 = State#state{tm_started = true},
+ {reply, EarlyNodes, State2};
+
+handle_call(Msg, _From, State) ->
+ error("~p got unexpected call: ~p~n", [?MODULE, Msg]),
+ {noreply, State}.
+
+accept_protocol(Mon, Version, Protocol, From, State) ->
+ Reply = {node(), {accept, self(), Version, Protocol}},
+ Node = node(Mon),
+ Pending0 = State#state.pending_negotiators,
+ Pending = lists:keydelete(Node, 1, Pending0),
+ case lists:member(Node, State#state.going_down) of
+ true ->
+ %% Wait for the mnesia_down to be processed,
+ %% before we reply
+ P = Pending ++ [{Node, Mon, From, Reply}],
+ {noreply, State#state{pending_negotiators = P}};
+ false ->
+ %% No need for wait
+ link(Mon), %% link to remote Monitor
+ case Protocol == protocol_version() of
+ true ->
+ set({protocol, Node}, {Protocol, false});
+ false ->
+ set({protocol, Node}, {Protocol, true})
+ end,
+ {reply, Reply, State#state{pending_negotiators = Pending}}
+ end.
+
+%%----------------------------------------------------------------------
+%% Func: handle_cast/2
+%% Returns: {noreply, State} |
+%% {noreply, State, Timeout} |
+%% {stop, Reason, State} (terminate/2 is called)
+%%----------------------------------------------------------------------
+
+handle_cast({mnesia_down, mnesia_controller, Node}, State) ->
+ mnesia_tm:mnesia_down(Node),
+ {noreply, State};
+
+handle_cast({mnesia_down, mnesia_tm, {Node, Pending}}, State) ->
+ mnesia_locker:mnesia_down(Node, Pending),
+ {noreply, State};
+
+handle_cast({mnesia_down, mnesia_locker, Node}, State) ->
+ Down = {mnesia_down, Node},
+ mnesia_lib:report_system_event(Down),
+ GoingDown = lists:delete(Node, State#state.going_down),
+ State2 = State#state{going_down = GoingDown},
+ Pending = State#state.pending_negotiators,
+ case lists:keysearch(Node, 1, Pending) of
+ {value, {Node, Mon, ReplyTo, Reply}} ->
+ %% Late reply to remote monitor
+ link(Mon), %% link to remote Monitor
+ gen_server:reply(ReplyTo, Reply),
+ P2 = lists:keydelete(Node, 1,Pending),
+ State3 = State2#state{pending_negotiators = P2},
+ process_q(State3);
+ false ->
+ %% No pending remote monitors
+ {noreply, State2}
+ end;
+
+handle_cast({disconnect, Node}, State) ->
+ case rpc:call(Node, erlang, whereis, [?MODULE]) of
+ {badrpc, _} ->
+ ignore;
+ undefined ->
+ ignore;
+ RemoteMon when is_pid(RemoteMon) ->
+ unlink(RemoteMon)
+ end,
+ {noreply, State};
+
+handle_cast({inconsistent_database, Context, Node}, State) ->
+ Msg = {inconsistent_database, Context, Node},
+ mnesia_lib:report_system_event(Msg),
+ {noreply, State};
+
+handle_cast(Msg, State) ->
+ error("~p got unexpected cast: ~p~n", [?MODULE, Msg]),
+ {noreply, State}.
+
+%%----------------------------------------------------------------------
+%% Func: handle_info/2
+%% Returns: {noreply, State} |
+%% {noreply, State, Timeout} |
+%% {stop, Reason, State} (terminate/2 is called)
+%%----------------------------------------------------------------------
+
+handle_info({'EXIT', Pid, R}, State) when Pid == State#state.supervisor ->
+ dbg_out("~p was ~p by supervisor~n",[?MODULE, R]),
+ {stop, R, State};
+
+handle_info({'EXIT', Pid, fatal}, State) when node(Pid) == node() ->
+ dbg_out("~p got FATAL ERROR from: ~p~n",[?MODULE, Pid]),
+ exit(State#state.supervisor, shutdown),
+ {noreply, State};
+
+handle_info(Msg = {'EXIT',Pid,_}, State) ->
+ Node = node(Pid),
+ if
+ Node /= node(), State#state.connecting == undefined ->
+ %% Remotly linked process died, assume that it was a mnesia_monitor
+ mnesia_recover:mnesia_down(Node),
+ mnesia_controller:mnesia_down(Node),
+ {noreply, State#state{going_down = [Node | State#state.going_down]}};
+ Node /= node() ->
+ {noreply, State#state{mq = State#state.mq ++ [{info, Msg}]}};
+ true ->
+ %% We have probably got an exit signal from
+ %% disk_log or dets
+ Hint = "Hint: check that the disk still is writable",
+ fatal("~p got unexpected info: ~p; ~p~n",
+ [?MODULE, Msg, Hint])
+ end;
+
+handle_info({protocol_negotiated, From,Res}, State) ->
+ From = element(1,State#state.connecting),
+ gen_server:reply(From, Res),
+ process_q(State#state{connecting = undefined});
+
+handle_info({nodeup, Node}, State) ->
+ %% Ok, we are connected to yet another Erlang node
+ %% Let's check if Mnesia is running there in order
+ %% to detect if the network has been partitioned
+ %% due to communication failure.
+
+ HasDown = mnesia_recover:has_mnesia_down(Node),
+ ImRunning = mnesia_lib:is_running(),
+
+ if
+ %% If I'm not running the test will be made later.
+ HasDown == true, ImRunning == yes ->
+ spawn_link(?MODULE, detect_partitioned_network, [self(), Node]);
+ true ->
+ ignore
+ end,
+ {noreply, State};
+
+handle_info({nodedown, _Node}, State) ->
+ %% Ignore, we are only caring about nodeup's
+ {noreply, State};
+
+handle_info({disk_log, _Node, Log, Info}, State) ->
+ case Info of
+ {truncated, _No} ->
+ ok;
+ _ ->
+ mnesia_lib:important("Warning Log file ~p error reason ~s~n",
+ [Log, disk_log:format_error(Info)])
+ end,
+ {noreply, State};
+
+handle_info(Msg, State) ->
+ error("~p got unexpected info (~p): ~p~n", [?MODULE, State, Msg]).
+
+process_q(State = #state{mq=[]}) -> {noreply,State};
+process_q(State = #state{mq=[{info,Msg}|R]}) ->
+ handle_info(Msg, State#state{mq=R});
+process_q(State = #state{mq=[{cast,Msg}|R]}) ->
+ handle_cast(Msg, State#state{mq=R});
+process_q(State = #state{mq=[{call,From,Msg}|R]}) ->
+ handle_call(Msg, From, State#state{mq=R}).
+
+%%----------------------------------------------------------------------
+%% Func: terminate/2
+%% Purpose: Shutdown the server
+%% Returns: any (ignored by gen_server)
+%%----------------------------------------------------------------------
+terminate(Reason, State) ->
+ terminate_proc(?MODULE, Reason, State).
+
+%%----------------------------------------------------------------------
+%% Func: code_change/3
+%% Purpose: Upgrade process when its code is to be changed
+%% Returns: {ok, NewState}
+%%----------------------------------------------------------------------
+
+
+code_change(_, {state, SUP, PN, GD, TMS, EC}, _) ->
+ {ok, #state{supervisor=SUP, pending_negotiators=PN,
+ going_down = GD, tm_started =TMS, early_connects = EC}};
+
+code_change(_OldVsn, State, _Extra) ->
+ {ok, State}.
+
+%%%----------------------------------------------------------------------
+%%% Internal functions
+%%%----------------------------------------------------------------------
+
+process_config_args([]) ->
+ ok;
+process_config_args([C|T]) ->
+ V = get_env(C),
+ dbg_out("Env ~p: ~p~n", [C, V]),
+ mnesia_lib:set(C, V),
+ process_config_args(T).
+
+set_env(E,Val) ->
+ mnesia_lib:set(E, check_type(E,Val)),
+ ok.
+
+get_env(E) ->
+ case ?catch_val(E) of
+ {'EXIT', _} ->
+ case application:get_env(mnesia, E) of
+ {ok, Val} ->
+ check_type(E, Val);
+ undefined ->
+ check_type(E, default_env(E))
+ end;
+ Val ->
+ Val
+ end.
+
+env() ->
+ [
+ access_module,
+ auto_repair,
+ backup_module,
+ debug,
+ dir,
+ dump_log_load_regulation,
+ dump_log_time_threshold,
+ dump_log_update_in_place,
+ dump_log_write_threshold,
+ embedded_mnemosyne,
+ event_module,
+ extra_db_nodes,
+ ignore_fallback_at_startup,
+ fallback_error_function,
+ max_wait_for_decision,
+ schema_location,
+ core_dir,
+ pid_sort_order,
+ no_table_loaders,
+ dc_dump_limit
+ ].
+
+default_env(access_module) ->
+ mnesia;
+default_env(auto_repair) ->
+ true;
+default_env(backup_module) ->
+ mnesia_backup;
+default_env(debug) ->
+ none;
+default_env(dir) ->
+ Name = lists:concat(["Mnesia.", node()]),
+ filename:absname(Name);
+default_env(dump_log_load_regulation) ->
+ false;
+default_env(dump_log_time_threshold) ->
+ timer:minutes(3);
+default_env(dump_log_update_in_place) ->
+ true;
+default_env(dump_log_write_threshold) ->
+ 1000;
+default_env(embedded_mnemosyne) ->
+ false;
+default_env(event_module) ->
+ mnesia_event;
+default_env(extra_db_nodes) ->
+ [];
+default_env(ignore_fallback_at_startup) ->
+ false;
+default_env(fallback_error_function) ->
+ {mnesia, lkill};
+default_env(max_wait_for_decision) ->
+ infinity;
+default_env(schema_location) ->
+ opt_disc;
+default_env(core_dir) ->
+ false;
+default_env(pid_sort_order) ->
+ false;
+default_env(no_table_loaders) ->
+ 2;
+default_env(dc_dump_limit) ->
+ 4.
+
+check_type(Env, Val) ->
+ case catch do_check_type(Env, Val) of
+ {'EXIT', _Reason} ->
+ exit({bad_config, Env, Val});
+ NewVal ->
+ NewVal
+ end.
+
+do_check_type(access_module, A) when is_atom(A) -> A;
+do_check_type(auto_repair, B) -> bool(B);
+do_check_type(backup_module, B) when is_atom(B) -> B;
+do_check_type(debug, debug) -> debug;
+do_check_type(debug, false) -> none;
+do_check_type(debug, none) -> none;
+do_check_type(debug, trace) -> trace;
+do_check_type(debug, true) -> debug;
+do_check_type(debug, verbose) -> verbose;
+do_check_type(dir, V) -> filename:absname(V);
+do_check_type(dump_log_load_regulation, B) -> bool(B);
+do_check_type(dump_log_time_threshold, I) when is_integer(I), I > 0 -> I;
+do_check_type(dump_log_update_in_place, B) -> bool(B);
+do_check_type(dump_log_write_threshold, I) when is_integer(I), I > 0 -> I;
+do_check_type(event_module, A) when is_atom(A) -> A;
+do_check_type(ignore_fallback_at_startup, B) -> bool(B);
+do_check_type(fallback_error_function, {Mod, Func})
+ when is_atom(Mod), is_atom(Func) -> {Mod, Func};
+do_check_type(embedded_mnemosyne, B) -> bool(B);
+do_check_type(extra_db_nodes, L) when is_list(L) ->
+ Fun = fun(N) when N == node() -> false;
+ (A) when is_atom(A) -> true
+ end,
+ lists:filter(Fun, L);
+do_check_type(max_wait_for_decision, infinity) -> infinity;
+do_check_type(max_wait_for_decision, I) when is_integer(I), I > 0 -> I;
+do_check_type(schema_location, M) -> media(M);
+do_check_type(core_dir, "false") -> false;
+do_check_type(core_dir, false) -> false;
+do_check_type(core_dir, Dir) when is_list(Dir) -> Dir;
+do_check_type(pid_sort_order, r9b_plain) -> r9b_plain;
+do_check_type(pid_sort_order, "r9b_plain") -> r9b_plain;
+do_check_type(pid_sort_order, standard) -> standard;
+do_check_type(pid_sort_order, "standard") -> standard;
+do_check_type(pid_sort_order, _) -> false;
+do_check_type(no_table_loaders, N) when is_integer(N), N > 0 -> N;
+do_check_type(dc_dump_limit,N) when is_number(N), N > 0 -> N.
+
+bool(true) -> true;
+bool(false) -> false.
+
+media(disc) -> disc;
+media(opt_disc) -> opt_disc;
+media(ram) -> ram.
+
+patch_env(Env, Val) ->
+ case catch do_check_type(Env, Val) of
+ {'EXIT', _Reason} ->
+ {error, {bad_type, Env, Val}};
+ NewVal ->
+ application_controller:set_env(mnesia, Env, NewVal),
+ NewVal
+ end.
+
+detect_partitioned_network(Mon, Node) ->
+ detect_inconcistency([Node], running_partitioned_network),
+ unlink(Mon),
+ exit(normal).
+
+detect_inconcistency([], _Context) ->
+ ok;
+detect_inconcistency(Nodes, Context) ->
+ Downs = [N || N <- Nodes, mnesia_recover:has_mnesia_down(N)],
+ {Replies, _BadNodes} =
+ rpc:multicall(Downs, ?MODULE, has_remote_mnesia_down, [node()]),
+ report_inconsistency(Replies, Context, ok).
+
+has_remote_mnesia_down(Node) ->
+ HasDown = mnesia_recover:has_mnesia_down(Node),
+ Master = mnesia_recover:get_master_nodes(schema),
+ if
+ HasDown == true, Master == [] ->
+ {true, node()};
+ true ->
+ {false, node()}
+ end.
+
+report_inconsistency([{true, Node} | Replies], Context, _Status) ->
+ %% Oops, Mnesia is already running on the
+ %% other node AND we both regard each
+ %% other as down. The database is
+ %% potentially inconsistent and we has to
+ %% do tell the applications about it, so
+ %% they may perform some clever recovery
+ %% action.
+ Msg = {inconsistent_database, Context, Node},
+ mnesia_lib:report_system_event(Msg),
+ report_inconsistency(Replies, Context, inconsistent_database);
+report_inconsistency([{false, _Node} | Replies], Context, Status) ->
+ report_inconsistency(Replies, Context, Status);
+report_inconsistency([{badrpc, _Reason} | Replies], Context, Status) ->
+ report_inconsistency(Replies, Context, Status);
+report_inconsistency([], _Context, Status) ->
+ Status.
diff --git a/lib/mnesia/src/mnesia_recover.erl b/lib/mnesia/src/mnesia_recover.erl
new file mode 100644
index 0000000000..6c53c2e752
--- /dev/null
+++ b/lib/mnesia/src/mnesia_recover.erl
@@ -0,0 +1,1196 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 1997-2009. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%
+
+%%
+-module(mnesia_recover).
+
+-behaviour(gen_server).
+
+-export([
+ allow_garb/0,
+ call/1,
+ connect_nodes/1,
+ disconnect/1,
+ dump_decision_tab/0,
+ get_master_node_info/0,
+ get_master_node_tables/0,
+ get_master_nodes/1,
+ get_mnesia_downs/0,
+ has_mnesia_down/1,
+ incr_trans_tid_serial/0,
+ init/0,
+ log_decision/1,
+ log_master_nodes/3,
+ log_mnesia_down/1,
+ log_mnesia_up/1,
+ mnesia_down/1,
+ note_decision/2,
+ note_log_decision/2,
+ outcome/2,
+ start/0,
+ start_garb/0,
+ still_pending/1,
+ sync_trans_tid_serial/1,
+ sync/0,
+ wait_for_decision/2,
+ what_happened/3
+ ]).
+
+%% gen_server callbacks
+-export([init/1,
+ handle_call/3,
+ handle_cast/2,
+ handle_info/2,
+ terminate/2,
+ code_change/3
+ ]).
+
+
+-include("mnesia.hrl").
+-import(mnesia_lib, [set/2, verbose/2, error/2, fatal/2]).
+
+-record(state, {supervisor,
+ unclear_pid,
+ unclear_decision,
+ unclear_waitfor,
+ tm_queue_len = 0,
+ initiated = false,
+ early_msgs = []
+ }).
+
+%%-define(DBG(F, A), mnesia:report_event(list_to_atom(lists:flatten(io_lib:format(F, A))))).
+%%-define(DBG(F, A), io:format("DBG: " ++ F, A)).
+
+-record(transient_decision, {tid, outcome}).
+
+start() ->
+ gen_server:start_link({local, ?MODULE}, ?MODULE, [self()],
+ [{timeout, infinity}
+ %%, {debug, [trace]}
+ ]).
+
+init() ->
+ call(init).
+
+start_garb() ->
+ Pid = whereis(mnesia_recover),
+ {ok, _} = timer:send_interval(timer:minutes(2), Pid, garb_decisions),
+ {ok, _} = timer:send_interval(timer:seconds(10), Pid, check_overload).
+
+allow_garb() ->
+ cast(allow_garb).
+
+
+%% The transaction log has either been swiched (latest -> previous) or
+%% there is nothing to be dumped. This means that the previous
+%% transaction log only may contain commit records which refers to
+%% transactions noted in the last two of the 'Prev' tables. All other
+%% tables may now be garbed by 'garb_decisions' (after 2 minutes).
+%% Max 10 tables are kept.
+do_allow_garb() ->
+ %% The order of the following stuff is important!
+ Curr = val(latest_transient_decision),
+ %% Don't garb small tables, they are created on every
+ %% dump_log and may be small (empty) for schema transactions
+ %% which are dumped twice
+ case ets:info(Curr, size) > 20 of
+ true ->
+ Old = val(previous_transient_decisions),
+ Next = create_transient_decision(),
+ {Prev, ReallyOld} = sublist([Curr | Old], 10, []),
+ [?ets_delete_table(Tab) || Tab <- ReallyOld],
+ set(previous_transient_decisions, Prev),
+ set(latest_transient_decision, Next);
+ false ->
+ ignore
+ end.
+
+sublist([H|R], N, Acc) when N > 0 ->
+ sublist(R, N-1, [H| Acc]);
+sublist(List, _N, Acc) ->
+ {lists:reverse(Acc), List}.
+
+do_garb_decisions() ->
+ case val(previous_transient_decisions) of
+ [First, Second | Rest] ->
+ set(previous_transient_decisions, [First, Second]),
+ [?ets_delete_table(Tab) || Tab <- Rest];
+ _ ->
+ ignore
+ end.
+
+connect_nodes(Ns) ->
+ call({connect_nodes, Ns}).
+
+disconnect(Node) ->
+ call({disconnect, Node}).
+
+log_decision(D) ->
+ cast({log_decision, D}).
+
+val(Var) ->
+ case ?catch_val(Var) of
+ {'EXIT', Reason} -> mnesia_lib:other_val(Var, Reason);
+ Value -> Value
+ end.
+
+call(Msg) ->
+ Pid = whereis(?MODULE),
+ case Pid of
+ undefined ->
+ {error, {node_not_running, node()}};
+ Pid ->
+ link(Pid),
+ Res = gen_server:call(Pid, Msg, infinity),
+ unlink(Pid),
+
+ %% We get an exit signal if server dies
+ receive
+ {'EXIT', Pid, _Reason} ->
+ {error, {node_not_running, node()}}
+ after 0 ->
+ Res
+ end
+ end.
+
+multicall(Nodes, Msg) ->
+ rpc:multicall(Nodes, ?MODULE, call, [Msg]).
+
+cast(Msg) ->
+ case whereis(?MODULE) of
+ undefined -> ignore;
+ Pid -> gen_server:cast(Pid, Msg)
+ end.
+
+abcast(Nodes, Msg) ->
+ gen_server:abcast(Nodes, ?MODULE, Msg).
+
+note_decision(Tid, Outcome) ->
+ Tab = val(latest_transient_decision),
+ ?ets_insert(Tab, #transient_decision{tid = Tid, outcome = Outcome}).
+
+note_up(Node, _Date, _Time) ->
+ ?ets_delete(mnesia_decision, Node).
+
+note_down(Node, Date, Time) ->
+ ?ets_insert(mnesia_decision, {mnesia_down, Node, Date, Time}).
+
+note_master_nodes(Tab, []) ->
+ ?ets_delete(mnesia_decision, Tab);
+note_master_nodes(Tab, Nodes) when is_list(Nodes) ->
+ Master = {master_nodes, Tab, Nodes},
+ ?ets_insert(mnesia_decision, Master).
+
+note_outcome(D) when D#decision.disc_nodes == [] ->
+%% ?DBG("~w: note_tmp_decision: ~w~n", [node(), D]),
+ note_decision(D#decision.tid, filter_outcome(D#decision.outcome)),
+ ?ets_delete(mnesia_decision, D#decision.tid);
+note_outcome(D) when D#decision.disc_nodes /= [] ->
+%% ?DBG("~w: note_decision: ~w~n", [node(), D]),
+ ?ets_insert(mnesia_decision, D).
+
+do_log_decision(D) when D#decision.outcome /= unclear ->
+ OldD = decision(D#decision.tid),
+ MergedD = merge_decisions(node(), OldD, D),
+ do_log_decision(MergedD, true, D);
+do_log_decision(D) ->
+ do_log_decision(D, false, undefined).
+
+do_log_decision(D, DoTell, NodeD) ->
+ DiscNs = D#decision.disc_nodes -- [node()],
+ Outcome = D#decision.outcome,
+ D2 =
+ case Outcome of
+ aborted -> D#decision{disc_nodes = DiscNs};
+ committed -> D#decision{disc_nodes = DiscNs};
+ _ -> D
+ end,
+ note_outcome(D2),
+ case mnesia_monitor:use_dir() of
+ true ->
+ mnesia_log:append(latest_log, D2),
+ if
+ DoTell == true, Outcome /= unclear ->
+ tell_im_certain(NodeD#decision.disc_nodes--[node()],D2),
+ tell_im_certain(NodeD#decision.ram_nodes--[node()], D2);
+ true ->
+ ignore
+ end;
+ false ->
+ ignore
+ end.
+
+tell_im_certain([], _D) ->
+ ignore;
+tell_im_certain(Nodes, D) ->
+ Msg = {im_certain, node(), D},
+ %% mnesia_lib:verbose("~w: tell: ~w~n", [Msg, Nodes]),
+ abcast(Nodes, Msg).
+
+sync() ->
+ call(sync).
+
+log_mnesia_up(Node) ->
+ call({log_mnesia_up, Node}).
+
+log_mnesia_down(Node) ->
+ call({log_mnesia_down, Node}).
+
+get_mnesia_downs() ->
+ Tab = mnesia_decision,
+ Pat = {mnesia_down, '_', '_', '_'},
+ Downs = ?ets_match_object(Tab, Pat),
+ [Node || {mnesia_down, Node, _Date, _Time} <- Downs].
+
+%% Check if we have got a mnesia_down from Node
+has_mnesia_down(Node) ->
+ case ?ets_lookup(mnesia_decision, Node) of
+ [{mnesia_down, Node, _Date, _Time}] ->
+ true;
+ [] ->
+ false
+ end.
+
+mnesia_down(Node) ->
+ case ?catch_val(recover_nodes) of
+ {'EXIT', _} ->
+ %% Not started yet
+ ignore;
+ _ ->
+ mnesia_lib:del(recover_nodes, Node),
+ cast({mnesia_down, Node})
+ end.
+
+log_master_nodes(Args, UseDir, IsRunning) ->
+ if
+ IsRunning == yes ->
+ log_master_nodes2(Args, UseDir, IsRunning, ok);
+ UseDir == false ->
+ ok;
+ true ->
+ Name = latest_log,
+ Fname = mnesia_log:latest_log_file(),
+ Exists = mnesia_lib:exists(Fname),
+ Repair = mnesia:system_info(auto_repair),
+ OpenArgs = [{file, Fname}, {name, Name}, {repair, Repair}],
+ case disk_log:open(OpenArgs) of
+ {ok, Name} ->
+ log_master_nodes2(Args, UseDir, IsRunning, ok);
+ {repaired, Name, {recovered, _R}, {badbytes, _B}}
+ when Exists == true ->
+ log_master_nodes2(Args, UseDir, IsRunning, ok);
+ {repaired, Name, {recovered, _R}, {badbytes, _B}}
+ when Exists == false ->
+ mnesia_log:write_trans_log_header(),
+ log_master_nodes2(Args, UseDir, IsRunning, ok);
+ {error, Reason} ->
+ {error, Reason}
+ end
+ end.
+
+log_master_nodes2([{Tab, Nodes} | Tail], UseDir, IsRunning, WorstRes) ->
+ Res =
+ case IsRunning of
+ yes ->
+ R = call({log_master_nodes, Tab, Nodes, UseDir, IsRunning}),
+ mnesia_controller:master_nodes_updated(Tab, Nodes),
+ R;
+ _ ->
+ do_log_master_nodes(Tab, Nodes, UseDir, IsRunning)
+ end,
+ case Res of
+ ok ->
+ log_master_nodes2(Tail, UseDir, IsRunning, WorstRes);
+ {error, Reason} ->
+ log_master_nodes2(Tail, UseDir, IsRunning, {error, Reason})
+ end;
+log_master_nodes2([], _UseDir, IsRunning, WorstRes) ->
+ case IsRunning of
+ yes ->
+ WorstRes;
+ _ ->
+ disk_log:close(latest_log),
+ WorstRes
+ end.
+
+get_master_node_info() ->
+ Tab = mnesia_decision,
+ Pat = {master_nodes, '_', '_'},
+ case catch mnesia_lib:db_match_object(ram_copies,Tab, Pat) of
+ {'EXIT', _} ->
+ [];
+ Masters ->
+ Masters
+ end.
+
+get_master_node_tables() ->
+ Masters = get_master_node_info(),
+ [Tab || {master_nodes, Tab, _Nodes} <- Masters].
+
+get_master_nodes(Tab) ->
+ case catch ?ets_lookup_element(mnesia_decision, Tab, 3) of
+ {'EXIT', _} -> [];
+ Nodes -> Nodes
+ end.
+
+%% Determine what has happened to the transaction
+what_happened(Tid, Protocol, Nodes) ->
+ Default =
+ case Protocol of
+ asym_trans -> aborted;
+ _ -> unclear %% sym_trans and sync_sym_trans
+ end,
+ This = node(),
+ case lists:member(This, Nodes) of
+ true ->
+ {ok, Outcome} = call({what_happened, Default, Tid}),
+ Others = Nodes -- [This],
+ case filter_outcome(Outcome) of
+ unclear -> what_happened_remotely(Tid, Default, Others);
+ aborted -> aborted;
+ committed -> committed
+ end;
+ false ->
+ what_happened_remotely(Tid, Default, Nodes)
+ end.
+
+what_happened_remotely(Tid, Default, Nodes) ->
+ {Replies, _} = multicall(Nodes, {what_happened, Default, Tid}),
+ check_what_happened(Replies, 0, 0).
+
+check_what_happened([H | T], Aborts, Commits) ->
+ case H of
+ {ok, R} ->
+ case filter_outcome(R) of
+ committed ->
+ check_what_happened(T, Aborts, Commits + 1);
+ aborted ->
+ check_what_happened(T, Aborts + 1, Commits);
+ unclear ->
+ check_what_happened(T, Aborts, Commits)
+ end;
+ {error, _} ->
+ check_what_happened(T, Aborts, Commits);
+ {badrpc, _} ->
+ check_what_happened(T, Aborts, Commits)
+ end;
+check_what_happened([], Aborts, Commits) ->
+ if
+ Aborts == 0, Commits == 0 -> aborted; % None of the active nodes knows
+ Aborts > 0 -> aborted; % Someody has aborted
+ Aborts == 0, Commits > 0 -> committed % All has committed
+ end.
+
+%% Determine what has happened to the transaction
+%% and possibly wait forever for the decision.
+wait_for_decision(presume_commit, _InitBy) ->
+ %% sym_trans
+ {{presume_commit, self()}, committed};
+
+wait_for_decision(D, InitBy) when D#decision.outcome == presume_abort ->
+ wait_for_decision(D, InitBy, 0).
+
+wait_for_decision(D, InitBy, N) ->
+ %% asym_trans
+ Tid = D#decision.tid,
+ Max = 10,
+ Outcome = outcome(Tid, D#decision.outcome),
+ if
+ Outcome =:= committed -> {Tid, committed};
+ Outcome =:= aborted -> {Tid, aborted};
+ Outcome =:= presume_abort ->
+ case N > Max of
+ true -> {Tid, aborted};
+ false -> % busy loop for ets decision moving
+ timer:sleep(10),
+ wait_for_decision(D, InitBy, N+1)
+ end;
+ InitBy /= startup ->
+ %% Wait a while for active transactions
+ %% to end and try again
+ timer:sleep(100),
+ wait_for_decision(D, InitBy, N);
+ InitBy == startup ->
+ {ok, Res} = call({wait_for_decision, D}),
+ {Tid, Res}
+ end.
+
+still_pending([Tid | Pending]) ->
+ case filter_outcome(outcome(Tid, unclear)) of
+ unclear -> [Tid | still_pending(Pending)];
+ _ -> still_pending(Pending)
+ end;
+still_pending([]) ->
+ [].
+
+load_decision_tab() ->
+ Cont = mnesia_log:open_decision_tab(),
+ load_decision_tab(Cont, load_decision_tab),
+ mnesia_log:close_decision_tab().
+
+load_decision_tab(eof, _InitBy) ->
+ ok;
+load_decision_tab(Cont, InitBy) ->
+ case mnesia_log:chunk_decision_tab(Cont) of
+ {Cont2, Decisions} ->
+ note_log_decisions(Decisions, InitBy),
+ load_decision_tab(Cont2, InitBy);
+ eof ->
+ ok
+ end.
+
+%% Dumps DECISION.LOG and PDECISION.LOG and removes them.
+%% From now on all decisions are logged in the transaction log file
+convert_old() ->
+ HasOldStuff =
+ mnesia_lib:exists(mnesia_log:previous_decision_log_file()) or
+ mnesia_lib:exists(mnesia_log:decision_log_file()),
+ case HasOldStuff of
+ true ->
+ mnesia_log:open_decision_log(),
+ dump_decision_log(startup),
+ dump_decision_log(startup),
+ mnesia_log:close_decision_log(),
+ Latest = mnesia_log:decision_log_file(),
+ ok = file:delete(Latest);
+ false ->
+ ignore
+ end.
+
+dump_decision_log(InitBy) ->
+ %% Assumed to be run in transaction log dumper process
+ Cont = mnesia_log:prepare_decision_log_dump(),
+ perform_dump_decision_log(Cont, InitBy).
+
+perform_dump_decision_log(eof, _InitBy) ->
+ confirm_decision_log_dump();
+perform_dump_decision_log(Cont, InitBy) when InitBy == startup ->
+ case mnesia_log:chunk_decision_log(Cont) of
+ {Cont2, Decisions} ->
+ note_log_decisions(Decisions, InitBy),
+ perform_dump_decision_log(Cont2, InitBy);
+ eof ->
+ confirm_decision_log_dump()
+ end;
+perform_dump_decision_log(_Cont, _InitBy) ->
+ confirm_decision_log_dump().
+
+confirm_decision_log_dump() ->
+ dump_decision_tab(),
+ mnesia_log:confirm_decision_log_dump().
+
+dump_decision_tab() ->
+ Tab = mnesia_decision,
+ All = mnesia_lib:db_match_object(ram_copies,Tab, '_'),
+ mnesia_log:save_decision_tab({decision_list, All}).
+
+note_log_decisions([What | Tail], InitBy) ->
+ note_log_decision(What, InitBy),
+ note_log_decisions(Tail, InitBy);
+note_log_decisions([], _InitBy) ->
+ ok.
+
+note_log_decision(NewD, InitBy) when NewD#decision.outcome == pre_commit ->
+ note_log_decision(NewD#decision{outcome = unclear}, InitBy);
+
+note_log_decision(NewD, _InitBy) when is_record(NewD, decision) ->
+ Tid = NewD#decision.tid,
+ sync_trans_tid_serial(Tid),
+ note_outcome(NewD);
+note_log_decision({trans_tid, serial, _Serial}, startup) ->
+ ignore;
+note_log_decision({trans_tid, serial, Serial}, _InitBy) ->
+ sync_trans_tid_serial(Serial);
+note_log_decision({mnesia_up, Node, Date, Time}, _InitBy) ->
+ note_up(Node, Date, Time);
+note_log_decision({mnesia_down, Node, Date, Time}, _InitBy) ->
+ note_down(Node, Date, Time);
+note_log_decision({master_nodes, Tab, Nodes}, _InitBy) ->
+ note_master_nodes(Tab, Nodes);
+note_log_decision(H, _InitBy) when H#log_header.log_kind == decision_log ->
+ V = mnesia_log:decision_log_version(),
+ if
+ H#log_header.log_version == V->
+ ok;
+ H#log_header.log_version == "2.0" ->
+ verbose("Accepting an old version format of decision log: ~p~n",
+ [V]),
+ ok;
+ true ->
+ fatal("Bad version of decision log: ~p~n", [H])
+ end;
+note_log_decision(H, _InitBy) when H#log_header.log_kind == decision_tab ->
+ V = mnesia_log:decision_tab_version(),
+ if
+ V == H#log_header.log_version ->
+ ok;
+ true ->
+ fatal("Bad version of decision tab: ~p~n", [H])
+ end;
+note_log_decision({decision_list, ItemList}, InitBy) ->
+ note_log_decisions(ItemList, InitBy);
+note_log_decision(BadItem, InitBy) ->
+ exit({"Bad decision log item", BadItem, InitBy}).
+
+trans_tid_serial() ->
+ ?ets_lookup_element(mnesia_decision, serial, 3).
+
+set_trans_tid_serial(Val) ->
+ ?ets_insert(mnesia_decision, {trans_tid, serial, Val}).
+
+incr_trans_tid_serial() ->
+ ?ets_update_counter(mnesia_decision, serial, 1).
+
+sync_trans_tid_serial(ThatCounter) when is_integer(ThatCounter) ->
+ ThisCounter = trans_tid_serial(),
+ if
+ ThatCounter > ThisCounter ->
+ set_trans_tid_serial(ThatCounter + 1);
+ true ->
+ ignore
+ end;
+sync_trans_tid_serial(Tid) ->
+ sync_trans_tid_serial(Tid#tid.counter).
+
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%% Callback functions from gen_server
+
+%%----------------------------------------------------------------------
+%% Func: init/1
+%% Returns: {ok, State} |
+%% {ok, State, Timeout} |
+%% {stop, Reason}
+%%----------------------------------------------------------------------
+init([Parent]) ->
+ process_flag(trap_exit, true),
+ mnesia_lib:verbose("~p starting: ~p~n", [?MODULE, self()]),
+ set(latest_transient_decision, create_transient_decision()),
+ set(previous_transient_decisions, []),
+ set(recover_nodes, []),
+ State = #state{supervisor = Parent},
+ {ok, State}.
+
+create_transient_decision() ->
+ ?ets_new_table(mnesia_transient_decision, [{keypos, 2}, set, public]).
+
+%%----------------------------------------------------------------------
+%% Func: handle_call/3
+%% Returns: {reply, Reply, State} |
+%% {reply, Reply, State, Timeout} |
+%% {noreply, State} |
+%% {noreply, State, Timeout} |
+%% {stop, Reason, Reply, State} | (terminate/2 is called)
+%%----------------------------------------------------------------------
+
+handle_call(init, From, State) when State#state.initiated == false ->
+ Args = [{keypos, 2}, set, public, named_table],
+ case mnesia_monitor:use_dir() of
+ true ->
+ ?ets_new_table(mnesia_decision, Args),
+ set_trans_tid_serial(0),
+ TabFile = mnesia_log:decision_tab_file(),
+ case mnesia_lib:exists(TabFile) of
+ true ->
+ load_decision_tab();
+ false ->
+ ignore
+ end,
+ convert_old(),
+ mnesia_dumper:opt_dump_log(scan_decisions);
+ false ->
+ ?ets_new_table(mnesia_decision, Args),
+ set_trans_tid_serial(0)
+ end,
+ handle_early_msgs(State, From);
+
+handle_call(Msg, From, State) when State#state.initiated == false ->
+ %% Buffer early messages
+ Msgs = State#state.early_msgs,
+ {noreply, State#state{early_msgs = [{call, Msg, From} | Msgs]}};
+
+handle_call({disconnect, Node}, _From, State) ->
+ mnesia_monitor:disconnect(Node),
+ mnesia_lib:del(recover_nodes, Node),
+ {reply, ok, State};
+
+handle_call({connect_nodes, Ns}, From, State) ->
+ %% Determine which nodes we should try to connect
+ AlreadyConnected = val(recover_nodes),
+ {_, Nodes} = mnesia_lib:search_delete(node(), Ns),
+ Check = Nodes -- AlreadyConnected,
+ case mnesia_monitor:negotiate_protocol(Check) of
+ busy ->
+ %% monitor is disconnecting some nodes retry
+ %% the req (to avoid deadlock).
+ erlang:send_after(2, self(), {connect_nodes,Ns,From}),
+ {noreply, State};
+ [] ->
+ %% No good noodes to connect to!
+ %% We can't use reply here because this function can be
+ %% called from handle_info
+ gen_server:reply(From, {[], AlreadyConnected}),
+ {noreply, State};
+ GoodNodes ->
+ %% Now we have agreed upon a protocol with some new nodes
+ %% and we may use them when we recover transactions
+ mnesia_lib:add_list(recover_nodes, GoodNodes),
+ cast({announce_all, GoodNodes}),
+ case get_master_nodes(schema) of
+ [] ->
+ Context = starting_partitioned_network,
+ mnesia_monitor:detect_inconcistency(GoodNodes, Context);
+ _ -> %% If master_nodes is set ignore old inconsistencies
+ ignore
+ end,
+ gen_server:reply(From, {GoodNodes, AlreadyConnected}),
+ {noreply,State}
+ end;
+
+handle_call({what_happened, Default, Tid}, _From, State) ->
+ sync_trans_tid_serial(Tid),
+ Outcome = outcome(Tid, Default),
+ {reply, {ok, Outcome}, State};
+
+handle_call({wait_for_decision, D}, From, State) ->
+ Recov = val(recover_nodes),
+ AliveRam = (mnesia_lib:intersect(D#decision.ram_nodes, Recov) -- [node()]),
+ RemoteDisc = D#decision.disc_nodes -- [node()],
+ if
+ AliveRam == [], RemoteDisc == [] ->
+ %% No more else to wait for and we may safely abort
+ {reply, {ok, aborted}, State};
+ true ->
+ verbose("Transaction ~p is unclear. "
+ "Wait for disc nodes: ~w ram: ~w~n",
+ [D#decision.tid, RemoteDisc, AliveRam]),
+ AliveDisc = mnesia_lib:intersect(RemoteDisc, Recov),
+ Msg = {what_decision, node(), D},
+ abcast(AliveRam, Msg),
+ abcast(AliveDisc, Msg),
+ case val(max_wait_for_decision) of
+ infinity ->
+ ignore;
+ MaxWait ->
+ ForceMsg = {force_decision, D#decision.tid},
+ {ok, _} = timer:send_after(MaxWait, ForceMsg)
+ end,
+ State2 = State#state{unclear_pid = From,
+ unclear_decision = D,
+ unclear_waitfor = (RemoteDisc ++ AliveRam)},
+ {noreply, State2}
+ end;
+
+handle_call({log_mnesia_up, Node}, _From, State) ->
+ do_log_mnesia_up(Node),
+ {reply, ok, State};
+
+handle_call({log_mnesia_down, Node}, _From, State) ->
+ do_log_mnesia_down(Node),
+ {reply, ok, State};
+
+handle_call({log_master_nodes, Tab, Nodes, UseDir, IsRunning}, _From, State) ->
+ do_log_master_nodes(Tab, Nodes, UseDir, IsRunning),
+ {reply, ok, State};
+
+handle_call(sync, _From, State) ->
+ {reply, ok, State};
+
+handle_call(Msg, _From, State) ->
+ error("~p got unexpected call: ~p~n", [?MODULE, Msg]),
+ {noreply, State}.
+
+do_log_mnesia_up(Node) ->
+ Yoyo = {mnesia_up, Node, Date = date(), Time = time()},
+ case mnesia_monitor:use_dir() of
+ true ->
+ mnesia_log:append(latest_log, Yoyo),
+ disk_log:sync(latest_log);
+ false ->
+ ignore
+ end,
+ note_up(Node, Date, Time).
+
+do_log_mnesia_down(Node) ->
+ Yoyo = {mnesia_down, Node, Date = date(), Time = time()},
+ case mnesia_monitor:use_dir() of
+ true ->
+ mnesia_log:append(latest_log, Yoyo),
+ disk_log:sync(latest_log);
+ false ->
+ ignore
+ end,
+ note_down(Node, Date, Time).
+
+do_log_master_nodes(Tab, Nodes, UseDir, IsRunning) ->
+ Master = {master_nodes, Tab, Nodes},
+ Res =
+ case UseDir of
+ true ->
+ LogRes = mnesia_log:append(latest_log, Master),
+ disk_log:sync(latest_log),
+ LogRes;
+ false ->
+ ok
+ end,
+ case IsRunning of
+ yes ->
+ note_master_nodes(Tab, Nodes);
+ _NotRunning ->
+ ignore
+ end,
+ Res.
+
+%%----------------------------------------------------------------------
+%% Func: handle_cast/2
+%% Returns: {noreply, State} |
+%% {noreply, State, Timeout} |
+%% {stop, Reason, State} (terminate/2 is called)
+%%----------------------------------------------------------------------
+
+handle_cast(Msg, State) when State#state.initiated == false ->
+ %% Buffer early messages
+ Msgs = State#state.early_msgs,
+ {noreply, State#state{early_msgs = [{cast, Msg} | Msgs]}};
+
+handle_cast({im_certain, Node, NewD}, State) ->
+ OldD = decision(NewD#decision.tid),
+ MergedD = merge_decisions(Node, OldD, NewD),
+ do_log_decision(MergedD, false, undefined),
+ {noreply, State};
+
+handle_cast({log_decision, D}, State) ->
+ do_log_decision(D),
+ {noreply, State};
+
+handle_cast(allow_garb, State) ->
+ do_allow_garb(),
+ {noreply, State};
+
+handle_cast({decisions, Node, Decisions}, State) ->
+ mnesia_lib:add(recover_nodes, Node),
+ State2 = add_remote_decisions(Node, Decisions, State),
+ {noreply, State2};
+
+handle_cast({what_decision, Node, OtherD}, State) ->
+ Tid = OtherD#decision.tid,
+ sync_trans_tid_serial(Tid),
+ Decision =
+ case decision(Tid) of
+ no_decision -> OtherD;
+ MyD when is_record(MyD, decision) -> MyD
+ end,
+ announce([Node], [Decision], [], true),
+ {noreply, State};
+
+handle_cast({mnesia_down, Node}, State) ->
+ case State#state.unclear_decision of
+ undefined ->
+ {noreply, State};
+ D ->
+ case lists:member(Node, D#decision.ram_nodes) of
+ false ->
+ {noreply, State};
+ true ->
+ State2 = add_remote_decision(Node, D, State),
+ {noreply, State2}
+ end
+ end;
+
+handle_cast({announce_all, Nodes}, State) ->
+ announce_all(Nodes),
+ {noreply, State};
+
+handle_cast(Msg, State) ->
+ error("~p got unexpected cast: ~p~n", [?MODULE, Msg]),
+ {noreply, State}.
+
+%%----------------------------------------------------------------------
+%% Func: handle_info/2
+%% Returns: {noreply, State} |
+%% {noreply, State, Timeout} |
+%% {stop, Reason, State} (terminate/2 is called)
+%%----------------------------------------------------------------------
+
+%% No need for buffering
+%% handle_info(Msg, State) when State#state.initiated == false ->
+%% %% Buffer early messages
+%% Msgs = State#state.early_msgs,
+%% {noreply, State#state{early_msgs = [{info, Msg} | Msgs]}};
+
+handle_info({connect_nodes, Ns, From}, State) ->
+ handle_call({connect_nodes,Ns},From,State);
+
+handle_info(check_overload, S) ->
+ %% Time to check if mnesia_tm is overloaded
+ case whereis(mnesia_tm) of
+ Pid when is_pid(Pid) ->
+
+ Threshold = 100,
+ Prev = S#state.tm_queue_len,
+ {message_queue_len, Len} =
+ process_info(Pid, message_queue_len),
+ if
+ Len > Threshold, Prev > Threshold ->
+ What = {mnesia_tm, message_queue_len, [Prev, Len]},
+ mnesia_lib:report_system_event({mnesia_overload, What}),
+ {noreply, S#state{tm_queue_len = 0}};
+
+ Len > Threshold ->
+ {noreply, S#state{tm_queue_len = Len}};
+
+ true ->
+ {noreply, S#state{tm_queue_len = 0}}
+ end;
+ undefined ->
+ {noreply, S}
+ end;
+
+handle_info(garb_decisions, State) ->
+ do_garb_decisions(),
+ {noreply, State};
+
+handle_info({force_decision, Tid}, State) ->
+ %% Enforce a transaction recovery decision,
+ %% if we still are waiting for the outcome
+
+ case State#state.unclear_decision of
+ U when U#decision.tid == Tid ->
+ verbose("Decided to abort transaction ~p since "
+ "max_wait_for_decision has been exceeded~n",
+ [Tid]),
+ D = U#decision{outcome = aborted},
+ State2 = add_remote_decision(node(), D, State),
+ {noreply, State2};
+ _ ->
+ {noreply, State}
+ end;
+
+handle_info({'EXIT', Pid, R}, State) when Pid == State#state.supervisor ->
+ mnesia_lib:dbg_out("~p was ~p~n",[?MODULE, R]),
+ {stop, shutdown, State};
+
+handle_info(Msg, State) ->
+ error("~p got unexpected info: ~p~n", [?MODULE, Msg]),
+ {noreply, State}.
+
+%%----------------------------------------------------------------------
+%% Func: terminate/2
+%% Purpose: Shutdown the server
+%% Returns: any (ignored by gen_server)
+%%----------------------------------------------------------------------
+
+terminate(Reason, State) ->
+ mnesia_monitor:terminate_proc(?MODULE, Reason, State).
+
+%%----------------------------------------------------------------------
+%% Func: code_change/3
+%% Purpose: Upgrade process when its code is to be changed
+%% Returns: {ok, NewState}
+%%----------------------------------------------------------------------
+code_change(_OldVsn, State, _Extra) ->
+ {ok, State}.
+
+%%%----------------------------------------------------------------------
+%%% Internal functions
+%%%----------------------------------------------------------------------
+
+handle_early_msgs(State, From) ->
+ Res = do_handle_early_msgs(State#state.early_msgs,
+ State#state{early_msgs = [],
+ initiated = true}),
+ gen_server:reply(From, ok),
+ Res.
+
+do_handle_early_msgs([Msg | Msgs], State) ->
+ %% The messages are in reverted order
+ case do_handle_early_msgs(Msgs, State) of
+%% {stop, Reason, Reply, State2} ->
+%% {stop, Reason, Reply, State2};
+ {stop, Reason, State2} ->
+ {stop, Reason, State2};
+ {noreply, State2} ->
+ handle_early_msg(Msg, State2)
+ end;
+
+do_handle_early_msgs([], State) ->
+ {noreply, State}.
+
+handle_early_msg({call, Msg, From}, State) ->
+ case handle_call(Msg, From, State) of
+ {reply, R, S} ->
+ gen_server:reply(From, R),
+ {noreply, S};
+ Other ->
+ Other
+ end;
+handle_early_msg({cast, Msg}, State) ->
+ handle_cast(Msg, State);
+handle_early_msg({info, Msg}, State) ->
+ handle_info(Msg, State).
+
+tabs() ->
+ Curr = val(latest_transient_decision), % Do not miss any trans even
+ Prev = val(previous_transient_decisions), % if the tabs are switched
+ [Curr, mnesia_decision | Prev]. % Ordered by hit probability
+
+decision(Tid) ->
+ decision(Tid, tabs()).
+
+decision(Tid, [Tab | Tabs]) ->
+ case catch ?ets_lookup(Tab, Tid) of
+ [D] when is_record(D, decision) ->
+ D;
+ [C] when is_record(C, transient_decision) ->
+ #decision{tid = C#transient_decision.tid,
+ outcome = C#transient_decision.outcome,
+ disc_nodes = [],
+ ram_nodes = []
+ };
+ [] ->
+ decision(Tid, Tabs);
+ {'EXIT', _} ->
+ %% Recently switched transient decision table
+ decision(Tid, Tabs)
+ end;
+decision(_Tid, []) ->
+ no_decision.
+
+outcome(Tid, Default) ->
+ outcome(Tid, Default, tabs()).
+
+outcome(Tid, Default, [Tab | Tabs]) ->
+ case catch ?ets_lookup_element(Tab, Tid, 3) of
+ {'EXIT', _} ->
+ outcome(Tid, Default, Tabs);
+ Val ->
+ Val
+ end;
+outcome(_Tid, Default, []) ->
+ Default.
+
+filter_outcome(Val) ->
+ case Val of
+ unclear -> unclear;
+ aborted -> aborted;
+ presume_abort -> aborted;
+ committed -> committed;
+ pre_commit -> unclear
+ end.
+
+filter_aborted(D) when D#decision.outcome == presume_abort ->
+ D#decision{outcome = aborted};
+filter_aborted(D) ->
+ D.
+
+%% Merge old decision D with new (probably remote) decision
+merge_decisions(Node, D, NewD0) ->
+ NewD = filter_aborted(NewD0),
+ if
+ D == no_decision, node() /= Node ->
+ %% We did not know anything about this txn
+ NewD#decision{disc_nodes = []};
+ D == no_decision ->
+ NewD;
+ is_record(D, decision) ->
+ DiscNs = D#decision.disc_nodes -- ([node(), Node]),
+ OldD = filter_aborted(D#decision{disc_nodes = DiscNs}),
+%% mnesia_lib:dbg_out("merge ~w: NewD = ~w~n D = ~w~n OldD = ~w~n",
+%% [Node, NewD, D, OldD]),
+ if
+ OldD#decision.outcome == unclear,
+ NewD#decision.outcome == unclear ->
+ D;
+
+ OldD#decision.outcome == NewD#decision.outcome ->
+ %% We have come to the same decision
+ OldD;
+
+ OldD#decision.outcome == committed,
+ NewD#decision.outcome == aborted ->
+ %% Interesting! We have already committed,
+ %% but someone else has aborted. Now we
+ %% have a nice little inconcistency. The
+ %% other guy (or some one else) has
+ %% enforced a recovery decision when
+ %% max_wait_for_decision was exceeded.
+ %% We will pretend that we have obeyed
+ %% the forced recovery decision, but we
+ %% will also generate an event in case the
+ %% application wants to do something clever.
+ Msg = {inconsistent_database, bad_decision, Node},
+ mnesia_lib:report_system_event(Msg),
+ OldD#decision{outcome = aborted};
+
+ OldD#decision.outcome == aborted ->
+ %% aborted overrrides anything
+ OldD#decision{outcome = aborted};
+
+ NewD#decision.outcome == aborted ->
+ %% aborted overrrides anything
+ OldD#decision{outcome = aborted};
+
+ OldD#decision.outcome == committed,
+ NewD#decision.outcome == unclear ->
+ %% committed overrides unclear
+ OldD#decision{outcome = committed};
+
+ OldD#decision.outcome == unclear,
+ NewD#decision.outcome == committed ->
+ %% committed overrides unclear
+ OldD#decision{outcome = committed}
+ end
+ end.
+
+add_remote_decisions(Node, [D | Tail], State) when is_record(D, decision) ->
+ State2 = add_remote_decision(Node, D, State),
+ add_remote_decisions(Node, Tail, State2);
+
+add_remote_decisions(Node, [C | Tail], State)
+ when is_record(C, transient_decision) ->
+ D = #decision{tid = C#transient_decision.tid,
+ outcome = C#transient_decision.outcome,
+ disc_nodes = [],
+ ram_nodes = []},
+ State2 = add_remote_decision(Node, D, State),
+ add_remote_decisions(Node, Tail, State2);
+
+add_remote_decisions(Node, [{mnesia_down, _, _, _} | Tail], State) ->
+ add_remote_decisions(Node, Tail, State);
+
+add_remote_decisions(Node, [{trans_tid, serial, Serial} | Tail], State) ->
+ sync_trans_tid_serial(Serial),
+ case State#state.unclear_decision of
+ undefined ->
+ ignored;
+ D ->
+ case lists:member(Node, D#decision.ram_nodes) of
+ true ->
+ ignore;
+ false ->
+ abcast([Node], {what_decision, node(), D})
+ end
+ end,
+ add_remote_decisions(Node, Tail, State);
+
+add_remote_decisions(_Node, [], State) ->
+ State.
+
+add_remote_decision(Node, NewD, State) ->
+ Tid = NewD#decision.tid,
+ OldD = decision(Tid),
+ D = merge_decisions(Node, OldD, NewD),
+ do_log_decision(D, false, undefined),
+ Outcome = D#decision.outcome,
+ if
+ OldD == no_decision ->
+ ignore;
+ Outcome == unclear ->
+ ignore;
+ true ->
+ case lists:member(node(), NewD#decision.disc_nodes) or
+ lists:member(node(), NewD#decision.ram_nodes) of
+ true ->
+ tell_im_certain([Node], D);
+ false ->
+ ignore
+ end
+ end,
+ case State#state.unclear_decision of
+ U when U#decision.tid == Tid ->
+ WaitFor = State#state.unclear_waitfor -- [Node],
+ if
+ Outcome == unclear, WaitFor == [] ->
+ %% Everybody are uncertain, lets abort
+ NewOutcome = aborted,
+ CertainD = D#decision{outcome = NewOutcome,
+ disc_nodes = [],
+ ram_nodes = []},
+ tell_im_certain(D#decision.disc_nodes, CertainD),
+ tell_im_certain(D#decision.ram_nodes, CertainD),
+ do_log_decision(CertainD, false, undefined),
+ verbose("Decided to abort transaction ~p "
+ "since everybody are uncertain ~p~n",
+ [Tid, CertainD]),
+ gen_server:reply(State#state.unclear_pid, {ok, NewOutcome}),
+ State#state{unclear_pid = undefined,
+ unclear_decision = undefined,
+ unclear_waitfor = undefined};
+ Outcome /= unclear ->
+ verbose("~p told us that transaction ~p was ~p~n",
+ [Node, Tid, Outcome]),
+ gen_server:reply(State#state.unclear_pid, {ok, Outcome}),
+ State#state{unclear_pid = undefined,
+ unclear_decision = undefined,
+ unclear_waitfor = undefined};
+ Outcome == unclear ->
+ State#state{unclear_waitfor = WaitFor}
+ end;
+ _ ->
+ State
+ end.
+
+announce_all([]) ->
+ ok;
+announce_all(ToNodes) ->
+ Tid = trans_tid_serial(),
+ announce(ToNodes, [{trans_tid,serial,Tid}], [], false).
+
+announce(ToNodes, [Head | Tail], Acc, ForceSend) ->
+ Acc2 = arrange(ToNodes, Head, Acc, ForceSend),
+ announce(ToNodes, Tail, Acc2, ForceSend);
+
+announce(_ToNodes, [], Acc, _ForceSend) ->
+ send_decisions(Acc).
+
+send_decisions([{Node, Decisions} | Tail]) ->
+ abcast([Node], {decisions, node(), Decisions}),
+ send_decisions(Tail);
+send_decisions([]) ->
+ ok.
+
+arrange([To | ToNodes], D, Acc, ForceSend) when is_record(D, decision) ->
+ NeedsAdd = (ForceSend or
+ lists:member(To, D#decision.disc_nodes) or
+ lists:member(To, D#decision.ram_nodes)),
+ case NeedsAdd of
+ true ->
+ Acc2 = add_decision(To, D, Acc),
+ arrange(ToNodes, D, Acc2, ForceSend);
+ false ->
+ arrange(ToNodes, D, Acc, ForceSend)
+ end;
+
+arrange([To | ToNodes], {trans_tid, serial, Serial}, Acc, ForceSend) ->
+ %% Do the lamport thing plus release the others
+ %% from uncertainity.
+ Acc2 = add_decision(To, {trans_tid, serial, Serial}, Acc),
+ arrange(ToNodes, {trans_tid, serial, Serial}, Acc2, ForceSend);
+
+arrange([], _Decision, Acc, _ForceSend) ->
+ Acc.
+
+add_decision(Node, Decision, [{Node, Decisions} | Tail]) ->
+ [{Node, [Decision | Decisions]} | Tail];
+add_decision(Node, Decision, [Head | Tail]) ->
+ [Head | add_decision(Node, Decision, Tail)];
+add_decision(Node, Decision, []) ->
+ [{Node, [Decision]}].
+
diff --git a/lib/mnesia/src/mnesia_registry.erl b/lib/mnesia/src/mnesia_registry.erl
new file mode 100644
index 0000000000..9805d48697
--- /dev/null
+++ b/lib/mnesia/src/mnesia_registry.erl
@@ -0,0 +1,280 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 1998-2009. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%
+
+%%
+-module(mnesia_registry).
+
+%%%----------------------------------------------------------------------
+%%% File : mnesia_registry.erl
+%%% Purpose : Support dump and restore of a registry on a C-node
+%%% This is an OTP internal module and is not public available.
+%%%
+%%% Example : Dump some hardcoded records into the Mnesia table Tab
+%%%
+%%% case rpc:call(Node, mnesia_registry, start_dump, [Tab, self()]) of
+%%% Pid when pid(Pid) ->
+%%% Pid ! {write, key1, key_size1, val_type1, val_size1, val1},
+%%% Pid ! {delete, key3},
+%%% Pid ! {write, key2, key_size2, val_type2, val_size2, val2},
+%%% Pid ! {write, key4, key_size4, val_type4, val_size4, val4},
+%%% Pid ! {commit, self()},
+%%% receive
+%%% {ok, Pid} ->
+%%% ok;
+%%% {'EXIT', Pid, Reason} ->
+%%% exit(Reason)
+%%% end;
+%%% {badrpc, Reason} ->
+%%% exit(Reason)
+%%% end.
+%%%
+%%% Example : Restore the corresponding Mnesia table Tab
+%%%
+%%% case rpc:call(Node, mnesia_registry, start_restore, [Tab, self()]) of
+%%% {size, Pid, N, LargestKey, LargestVal} ->
+%%% Pid ! {send_records, self()},
+%%% Fun = fun() ->
+%%% receive
+%%% {restore, KeySize, ValSize, ValType, Key, Val} ->
+%%% {Key, Val};
+%%% {'EXIT', Pid, Reason} ->
+%%% exit(Reason)
+%%% end
+%%% end,
+%%% lists:map(Fun, lists:seq(1, N));
+%%% {badrpc, Reason} ->
+%%% exit(Reason)
+%%% end.
+%%%
+%%%----------------------------------------------------------------------
+
+%% External exports
+-export([start_dump/2, start_restore/2]).
+-export([create_table/1, create_table/2]).
+
+%% Internal exports
+-export([init/4]).
+
+-record(state, {table, ops = [], link_to}).
+
+-record(registry_entry, {key, key_size, val_type, val_size, val}).
+
+-record(size, {pid = self(), n_values = 0, largest_key = 0, largest_val = 0}).
+
+%%%----------------------------------------------------------------------
+%%% Client
+%%%----------------------------------------------------------------------
+
+start(Type, Tab, LinkTo) ->
+ Starter = self(),
+ Args = [Type, Starter, LinkTo, Tab],
+ Pid = spawn_link(?MODULE, init, Args),
+ %% The receiver process may unlink the current process
+ receive
+ {ok, Res} ->
+ Res;
+ {'EXIT', Pid, Reason} when LinkTo == Starter ->
+ exit(Reason)
+ end.
+
+%% Starts a receiver process and optionally creates a Mnesia table
+%% with suitable default values. Returns the Pid of the receiver process
+%%
+%% The receiver process accumulates Mnesia operations and performs
+%% all operations or none at commit. The understood messages are:
+%%
+%% {write, Key, KeySize, ValType, ValSize, Val} ->
+%% accumulates mnesia:write({Tab, Key, KeySize, ValType, ValSize, Val})
+%% (no reply)
+%% {delete, Key} ->
+%% accumulates mnesia:delete({Tab, Key}) (no reply)
+%% {commit, ReplyTo} ->
+%% commits all accumulated operations
+%% and stops the process (replies {ok, Pid})
+%% abort ->
+%% stops the process (no reply)
+%%
+%% The receiver process is linked to the process with the process identifier
+%% LinkTo. If some error occurs the receiver process will invoke exit(Reason)
+%% and it is up to he LinkTo process to act properly when it receives an exit
+%% signal.
+
+start_dump(Tab, LinkTo) ->
+ start(dump, Tab, LinkTo).
+
+%% Starts a sender process which sends restore messages back to the
+%% LinkTo process. But first are some statistics about the table
+%% determined and returned as a 5-tuple:
+%%
+%% {size, SenderPid, N, LargestKeySize, LargestValSize}
+%%
+%% where N is the number of records in the table. Then the sender process
+%% waits for a 2-tuple message:
+%%
+%% {send_records, ReplyTo}
+%%
+%% At last N 6-tuple messages is sent to the ReplyTo process:
+%%
+%% ReplyTo ! {restore, KeySize, ValSize, ValType, Key, Val}
+%%
+%% If some error occurs the receiver process will invoke exit(Reason)
+%% and it is up to he LinkTo process to act properly when it receives an
+%% exit signal.
+
+start_restore(Tab, LinkTo) ->
+ start(restore, Tab, LinkTo).
+
+
+%% Optionally creates the Mnesia table Tab with suitable default values.
+%% Returns ok or EXIT's
+create_table(Tab) ->
+ Storage = mnesia:table_info(schema, storage_type),
+ create_table(Tab, [{Storage, [node()]}]).
+
+create_table(Tab, TabDef) ->
+ Attrs = record_info(fields, registry_entry),
+ case mnesia:create_table(Tab, [{attributes, Attrs} | TabDef]) of
+ {atomic, ok} ->
+ ok;
+ {aborted, {already_exists, Tab}} ->
+ ok;
+ {aborted, Reason} ->
+ exit(Reason)
+ end.
+
+%%%----------------------------------------------------------------------
+%%% Server
+%%%----------------------------------------------------------------------
+
+init(Type, Starter, LinkTo, Tab) ->
+ if
+ LinkTo /= Starter ->
+ link(LinkTo),
+ unlink(Starter);
+ true ->
+ ignore
+ end,
+ case Type of
+ dump ->
+ Starter ! {ok, self()},
+ dump_loop(#state{table = Tab, link_to = LinkTo});
+ restore ->
+ restore_table(Tab, Starter, LinkTo)
+ end.
+
+%%%----------------------------------------------------------------------
+%%% Dump loop
+%%%----------------------------------------------------------------------
+
+dump_loop(S) ->
+ Tab = S#state.table,
+ Ops = S#state.ops,
+ receive
+ {write, Key, KeySize, ValType, ValSize, Val} ->
+ RE = #registry_entry{key = Key,
+ key_size = KeySize,
+ val_type = ValType,
+ val_size = ValSize,
+ val = Val},
+ dump_loop(S#state{ops = [{write, RE} | Ops]});
+ {delete, Key} ->
+ dump_loop(S#state{ops = [{delete, Key} | Ops]});
+ {commit, ReplyTo} ->
+ create_table(Tab),
+ RecName = mnesia:table_info(Tab, record_name),
+ %% The Ops are in reverse order, but there is no need
+ %% for reversing the list of accumulated operations
+ case mnesia:transaction(fun handle_ops/3, [Tab, RecName, Ops]) of
+ {atomic, ok} ->
+ ReplyTo ! {ok, self()},
+ stop(S#state.link_to);
+ {aborted, Reason} ->
+ exit({aborted, Reason})
+ end;
+ abort ->
+ stop(S#state.link_to);
+ BadMsg ->
+ exit({bad_message, BadMsg})
+ end.
+
+stop(LinkTo) ->
+ unlink(LinkTo),
+ exit(normal).
+
+%% Grab a write lock for the entire table
+%% and iterate over all accumulated operations
+handle_ops(Tab, RecName, Ops) ->
+ mnesia:write_lock_table(Tab),
+ do_handle_ops(Tab, RecName, Ops).
+
+do_handle_ops(Tab, RecName, [{write, RegEntry} | Ops]) ->
+ Record = setelement(1, RegEntry, RecName),
+ mnesia:write(Tab, Record, write),
+ do_handle_ops(Tab, RecName, Ops);
+do_handle_ops(Tab, RecName, [{delete, Key} | Ops]) ->
+ mnesia:delete(Tab, Key, write),
+ do_handle_ops(Tab, RecName, Ops);
+do_handle_ops(_Tab, _RecName, []) ->
+ ok.
+
+%%%----------------------------------------------------------------------
+%%% Restore table
+%%%----------------------------------------------------------------------
+
+restore_table(Tab, Starter, LinkTo) ->
+ Pat = mnesia:table_info(Tab, wild_pattern),
+ Fun = fun() -> mnesia:match_object(Tab, Pat, read) end,
+ case mnesia:transaction(Fun) of
+ {atomic, AllRecords} ->
+ Size = calc_size(AllRecords, #size{}),
+ Starter ! {ok, Size},
+ receive
+ {send_records, ReplyTo} ->
+ send_records(AllRecords, ReplyTo),
+ unlink(LinkTo),
+ exit(normal);
+ BadMsg ->
+ exit({bad_message, BadMsg})
+ end;
+ {aborted, Reason} ->
+ exit(Reason)
+ end.
+
+calc_size([H | T], S) ->
+ KeySize = max(element(#registry_entry.key_size, H), S#size.largest_key),
+ ValSize = max(element(#registry_entry.val_size, H), S#size.largest_val),
+ N = S#size.n_values + 1,
+ calc_size(T, S#size{n_values = N, largest_key = KeySize, largest_val = ValSize});
+calc_size([], Size) ->
+ Size.
+
+max(New, Old) when New > Old -> New;
+max(_New, Old) -> Old.
+
+send_records([H | T], ReplyTo) ->
+ KeySize = element(#registry_entry.key_size, H),
+ ValSize = element(#registry_entry.val_size, H),
+ ValType = element(#registry_entry.val_type, H),
+ Key = element(#registry_entry.key, H),
+ Val = element(#registry_entry.val, H),
+ ReplyTo ! {restore, KeySize, ValSize, ValType, Key, Val},
+ send_records(T, ReplyTo);
+send_records([], _ReplyTo) ->
+ ok.
+
diff --git a/lib/mnesia/src/mnesia_schema.erl b/lib/mnesia/src/mnesia_schema.erl
new file mode 100644
index 0000000000..354431a296
--- /dev/null
+++ b/lib/mnesia/src/mnesia_schema.erl
@@ -0,0 +1,3027 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 1996-2009. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%
+
+%%
+%% In this module we provide a number of explicit functions
+%% to maninpulate the schema. All these functions are called
+%% within a special schema transaction.
+%%
+%% We also have an init/1 function defined here, this func is
+%% used by mnesia:start() to initialize the entire schema.
+
+-module(mnesia_schema).
+
+-export([
+ add_snmp/2,
+ add_table_copy/3,
+ add_table_index/2,
+ arrange_restore/3,
+ attr_tab_to_pos/2,
+ attr_to_pos/2,
+ change_table_copy_type/3,
+ change_table_access_mode/2,
+ change_table_load_order/2,
+ change_table_frag/2,
+ clear_table/1,
+ create_table/1,
+ cs2list/1,
+ del_snmp/1,
+ del_table_copy/2,
+ del_table_index/2,
+ delete_cstruct/2,
+ delete_schema/1,
+ delete_schema2/0,
+ delete_table/1,
+ delete_table_property/2,
+ dump_tables/1,
+ ensure_no_schema/1,
+ get_create_list/1,
+ get_initial_schema/2,
+ get_table_properties/1,
+ info/0,
+ info/1,
+ init/1,
+ insert_cstruct/3,
+ is_remote_member/1,
+ list2cs/1,
+ lock_schema/0,
+ merge_schema/0,
+ move_table/3,
+ opt_create_dir/2,
+ prepare_commit/3,
+ purge_dir/2,
+ purge_tmp_files/0,
+ ram_delete_table/2,
+% ram_delete_table/3,
+ read_cstructs_from_disc/0,
+ read_nodes/0,
+ remote_read_schema/0,
+ restore/1,
+ restore/2,
+ restore/3,
+ schema_coordinator/3,
+ set_where_to_read/3,
+ transform_table/4,
+ undo_prepare_commit/2,
+ unlock_schema/0,
+ version/0,
+ write_table_property/2
+ ]).
+
+%% Exports for mnesia_frag
+-export([
+ get_tid_ts_and_lock/2,
+ make_create_table/1,
+ ensure_active/1,
+ pick/4,
+ verify/3,
+ incr_version/1,
+ check_keys/3,
+ check_duplicates/2,
+ make_delete_table/2
+ ]).
+
+%% Needed outside to be able to use/set table_properties
+%% from user (not supported)
+-export([schema_transaction/1,
+ insert_schema_ops/2,
+ do_create_table/1,
+ do_delete_table/1,
+ do_read_table_property/2,
+ do_delete_table_property/2,
+ do_write_table_property/2]).
+
+-include("mnesia.hrl").
+-include_lib("kernel/include/file.hrl").
+
+-import(mnesia_lib, [set/2, del/2, verbose/2, dbg_out/2]).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Here comes the init function which also resides in
+%% this module, it is called upon by the trans server
+%% at startup of the system
+%%
+%% We have a meta table which looks like
+%% {table, schema,
+%% {type, set},
+%% {disc_copies, all},
+%% {arity, 2}
+%% {attributes, [key, val]}
+%%
+%% This means that we have a series of {schema, Name, Cs} tuples
+%% in a table called schema !!
+
+init(IgnoreFallback) ->
+ Res = read_schema(true, IgnoreFallback),
+ {ok, Source, _CreateList} = exit_on_error(Res),
+ verbose("Schema initiated from: ~p~n", [Source]),
+ set({schema, tables}, []),
+ set({schema, local_tables}, []),
+ Tabs = set_schema(?ets_first(schema)),
+ lists:foreach(fun(Tab) -> clear_whereabouts(Tab) end, Tabs),
+ set({schema, where_to_read}, node()),
+ set({schema, load_node}, node()),
+ set({schema, load_reason}, initial),
+ mnesia_controller:add_active_replica(schema, node()).
+
+exit_on_error({error, Reason}) ->
+ exit(Reason);
+exit_on_error(GoodRes) ->
+ GoodRes.
+
+val(Var) ->
+ case ?catch_val(Var) of
+ {'EXIT', Reason} -> mnesia_lib:other_val(Var, Reason);
+ Value -> Value
+ end.
+
+%% This function traverses all cstructs in the schema and
+%% sets all values in mnesia_gvar accordingly for each table/cstruct
+
+set_schema('$end_of_table') ->
+ [];
+set_schema(Tab) ->
+ do_set_schema(Tab),
+ [Tab | set_schema(?ets_next(schema, Tab))].
+
+get_create_list(Tab) ->
+ ?ets_lookup_element(schema, Tab, 3).
+
+do_set_schema(Tab) ->
+ List = get_create_list(Tab),
+ Cs = list2cs(List),
+ do_set_schema(Tab, Cs).
+
+do_set_schema(Tab, Cs) ->
+ Type = Cs#cstruct.type,
+ set({Tab, setorbag}, Type),
+ set({Tab, local_content}, Cs#cstruct.local_content),
+ set({Tab, ram_copies}, Cs#cstruct.ram_copies),
+ set({Tab, disc_copies}, Cs#cstruct.disc_copies),
+ set({Tab, disc_only_copies}, Cs#cstruct.disc_only_copies),
+ set({Tab, load_order}, Cs#cstruct.load_order),
+ set({Tab, access_mode}, Cs#cstruct.access_mode),
+ set({Tab, snmp}, Cs#cstruct.snmp),
+ set({Tab, user_properties}, Cs#cstruct.user_properties),
+ [set({Tab, user_property, element(1, P)}, P) || P <- Cs#cstruct.user_properties],
+ set({Tab, frag_properties}, Cs#cstruct.frag_properties),
+ mnesia_frag:set_frag_hash(Tab, Cs#cstruct.frag_properties),
+ set({Tab, attributes}, Cs#cstruct.attributes),
+ Arity = length(Cs#cstruct.attributes) + 1,
+ set({Tab, arity}, Arity),
+ RecName = Cs#cstruct.record_name,
+ set({Tab, record_name}, RecName),
+ set({Tab, record_validation}, {RecName, Arity, Type}),
+ set({Tab, wild_pattern}, wild(RecName, Arity)),
+ set({Tab, index}, Cs#cstruct.index),
+ %% create actual index tabs later
+ set({Tab, cookie}, Cs#cstruct.cookie),
+ set({Tab, version}, Cs#cstruct.version),
+ set({Tab, cstruct}, Cs),
+ Storage = mnesia_lib:schema_cs_to_storage_type(node(), Cs),
+ set({Tab, storage_type}, Storage),
+ mnesia_lib:add({schema, tables}, Tab),
+ Ns = mnesia_lib:cs_to_nodes(Cs),
+ case lists:member(node(), Ns) of
+ true ->
+ mnesia_lib:add({schema, local_tables}, Tab);
+ false when Tab == schema ->
+ mnesia_lib:add({schema, local_tables}, Tab);
+ false ->
+ ignore
+ end.
+
+wild(RecName, Arity) ->
+ Wp0 = list_to_tuple(lists:duplicate(Arity, '_')),
+ setelement(1, Wp0, RecName).
+
+%% Temporarily read the local schema and return a list
+%% of all nodes mentioned in the schema.DAT file
+read_nodes() ->
+ %% Ensure that we access the intended Mnesia
+ %% directory. This function may not be called
+ %% during startup since it will cause the
+ %% application_controller to get into deadlock
+ case mnesia_lib:ensure_loaded(?APPLICATION) of
+ ok ->
+ case read_schema(false) of
+ {ok, _Source, CreateList} ->
+ Cs = list2cs(CreateList),
+ {ok, Cs#cstruct.disc_copies ++ Cs#cstruct.ram_copies};
+ {error, Reason} ->
+ {error, Reason}
+ end;
+ {error, Reason} ->
+ {error, Reason}
+ end.
+
+%% Returns Version from the tuple {Version,MasterNodes}
+version() ->
+ case read_schema(false) of
+ {ok, Source, CreateList} when Source /= default ->
+ Cs = list2cs(CreateList),
+ {Version, _Details} = Cs#cstruct.version,
+ Version;
+ _ ->
+ case dir_exists(mnesia_lib:dir()) of
+ true -> {1,0};
+ false -> {0,0}
+ end
+ end.
+
+%% Calculate next table version from old cstruct
+incr_version(Cs) ->
+ {{Major, Minor}, _} = Cs#cstruct.version,
+ Nodes = mnesia_lib:intersect(val({schema, disc_copies}),
+ mnesia_lib:cs_to_nodes(Cs)),
+ V =
+ case Nodes -- val({Cs#cstruct.name, active_replicas}) of
+ [] -> {Major + 1, 0}; % All replicas are active
+ _ -> {Major, Minor + 1} % Some replicas are inactive
+ end,
+ Cs#cstruct{version = {V, {node(), now()}}}.
+
+%% Returns table name
+insert_cstruct(Tid, Cs, KeepWhereabouts) ->
+ Tab = Cs#cstruct.name,
+ TabDef = cs2list(Cs),
+ Val = {schema, Tab, TabDef},
+ mnesia_checkpoint:tm_retain(Tid, schema, Tab, write),
+ mnesia_subscr:report_table_event(schema, Tid, Val, write),
+ Active = val({Tab, active_replicas}),
+
+ case KeepWhereabouts of
+ true ->
+ ignore;
+ false when Active == [] ->
+ clear_whereabouts(Tab);
+ false ->
+ %% Someone else has initiated table
+ ignore
+ end,
+ set({Tab, cstruct}, Cs),
+ ?ets_insert(schema, Val),
+ do_set_schema(Tab, Cs),
+ Val.
+
+clear_whereabouts(Tab) ->
+ set({Tab, checkpoints}, []),
+ set({Tab, subscribers}, []),
+ set({Tab, where_to_read}, nowhere),
+ set({Tab, active_replicas}, []),
+ set({Tab, commit_work}, []),
+ set({Tab, where_to_write}, []),
+ set({Tab, where_to_commit}, []),
+ set({Tab, load_by_force}, false),
+ set({Tab, load_node}, unknown),
+ set({Tab, load_reason}, unknown).
+
+%% Returns table name
+delete_cstruct(Tid, Cs) ->
+ Tab = Cs#cstruct.name,
+ TabDef = cs2list(Cs),
+ Val = {schema, Tab, TabDef},
+ mnesia_checkpoint:tm_retain(Tid, schema, Tab, delete),
+ mnesia_subscr:report_table_event(schema, Tid, Val, delete),
+ mnesia_controller:update(
+ fun() ->
+ ?ets_match_delete(mnesia_gvar, {{Tab, '_'}, '_'}),
+ ?ets_match_delete(mnesia_gvar, {{Tab, '_', '_'}, '_'}),
+ del({schema, local_tables}, Tab),
+ del({schema, tables}, Tab),
+ ?ets_delete(schema, Tab)
+ end),
+ Val.
+
+%% Delete the Mnesia directory on all given nodes
+%% Requires that Mnesia is not running anywhere
+%% Returns ok | {error,Reason}
+delete_schema(Ns) when is_list(Ns), Ns /= [] ->
+ RunningNs = mnesia_lib:running_nodes(Ns),
+ Reason = "Cannot delete schema on all nodes",
+ if
+ RunningNs == [] ->
+ case rpc:multicall(Ns, ?MODULE, delete_schema2, []) of
+ {Replies, []} ->
+ case [R || R <- Replies, R /= ok] of
+ [] ->
+ ok;
+ BadReplies ->
+ verbose("~s: ~p~n", [Reason, BadReplies]),
+ {error, {"All nodes not running", BadReplies}}
+ end;
+ {_Replies, BadNs} ->
+ verbose("~s: ~p~n", [Reason, BadNs]),
+ {error, {"All nodes not running", BadNs}}
+ end;
+ true ->
+ verbose("~s: ~p~n", [Reason, RunningNs]),
+ {error, {"Mnesia is not stopped everywhere", RunningNs}}
+ end;
+delete_schema(Ns) ->
+ {error, {badarg, Ns}}.
+
+delete_schema2() ->
+ %% Ensure that we access the intended Mnesia
+ %% directory. This function may not be called
+ %% during startup since it will cause the
+ %% application_controller to get into deadlock
+ case mnesia_lib:ensure_loaded(?APPLICATION) of
+ ok ->
+ case mnesia_lib:is_running() of
+ no ->
+ Dir = mnesia_lib:dir(),
+ purge_dir(Dir, []),
+ ok;
+ _ ->
+ {error, {"Mnesia still running", node()}}
+ end;
+ {error, Reason} ->
+ {error, Reason}
+ end.
+
+ensure_no_schema([H|T]) when is_atom(H) ->
+ case rpc:call(H, ?MODULE, remote_read_schema, []) of
+ {badrpc, Reason} ->
+ {H, {"All nodes not running", H, Reason}};
+ {ok,Source, _} when Source /= default ->
+ {H, {already_exists, H}};
+ _ ->
+ ensure_no_schema(T)
+ end;
+ensure_no_schema([H|_]) ->
+ {error,{badarg, H}};
+ensure_no_schema([]) ->
+ ok.
+
+remote_read_schema() ->
+ %% Ensure that we access the intended Mnesia
+ %% directory. This function may not be called
+ %% during startup since it will cause the
+ %% application_controller to get into deadlock
+ case mnesia_lib:ensure_loaded(?APPLICATION) of
+ ok ->
+ case mnesia_monitor:get_env(schema_location) of
+ opt_disc ->
+ read_schema(false);
+ _ ->
+ read_schema(false)
+ end;
+ {error, Reason} ->
+ {error, Reason}
+ end.
+
+dir_exists(Dir) ->
+ dir_exists(Dir, mnesia_monitor:use_dir()).
+dir_exists(Dir, true) ->
+ case file:read_file_info(Dir) of
+ {ok, _} -> true;
+ _ -> false
+ end;
+dir_exists(_Dir, false) ->
+ false.
+
+opt_create_dir(UseDir, Dir) when UseDir == true->
+ case dir_exists(Dir, UseDir) of
+ true ->
+ check_can_write(Dir);
+ false ->
+ case file:make_dir(Dir) of
+ ok ->
+ verbose("Create Directory ~p~n", [Dir]),
+ ok;
+ {error, Reason} ->
+ verbose("Cannot create mnesia dir ~p~n", [Reason]),
+ {error, {"Cannot create Mnesia dir", Dir, Reason}}
+ end
+ end;
+opt_create_dir(false, _) ->
+ {error, {has_no_disc, node()}}.
+
+check_can_write(Dir) ->
+ case file:read_file_info(Dir) of
+ {ok, FI} when FI#file_info.type == directory,
+ FI#file_info.access == read_write ->
+ ok;
+ {ok, _} ->
+ {error, "Not allowed to write in Mnesia dir", Dir};
+ _ ->
+ {error, "Non existent Mnesia dir", Dir}
+ end.
+
+lock_schema() ->
+ mnesia_lib:lock_table(schema).
+
+unlock_schema() ->
+ mnesia_lib:unlock_table(schema).
+
+read_schema(Keep) ->
+ read_schema(Keep, false).
+
+%% The schema may be read for several reasons.
+%% If Mnesia is not already started the read intention
+%% we normally do not want the ets table named schema
+%% be left around.
+%% If Keep == true, the ets table schema is kept
+%% If Keep == false, the ets table schema is removed
+%%
+%% Returns {ok, Source, SchemaCstruct} or {error, Reason}
+%% Source may be: default | ram | disc | fallback
+
+read_schema(Keep, IgnoreFallback) ->
+ lock_schema(),
+ Res =
+ case mnesia:system_info(is_running) of
+ yes ->
+ {ok, ram, get_create_list(schema)};
+ _IsRunning ->
+ case mnesia_monitor:use_dir() of
+ true ->
+ read_disc_schema(Keep, IgnoreFallback);
+ false when Keep == true ->
+ Args = [{keypos, 2}, public, named_table, set],
+ mnesia_monitor:mktab(schema, Args),
+ CreateList = get_initial_schema(ram_copies, []),
+ ?ets_insert(schema,{schema, schema, CreateList}),
+ {ok, default, CreateList};
+ false when Keep == false ->
+ CreateList = get_initial_schema(ram_copies, []),
+ {ok, default, CreateList}
+ end
+ end,
+ unlock_schema(),
+ Res.
+
+read_disc_schema(Keep, IgnoreFallback) ->
+ Running = mnesia:system_info(is_running),
+ case mnesia_bup:fallback_exists() of
+ true when IgnoreFallback == false, Running /= yes ->
+ mnesia_bup:fallback_to_schema();
+ _ ->
+ %% If we're running, we read the schema file even
+ %% if fallback exists
+ Dat = mnesia_lib:tab2dat(schema),
+ case mnesia_lib:exists(Dat) of
+ true ->
+ do_read_disc_schema(Dat, Keep);
+ false ->
+ Dmp = mnesia_lib:tab2dmp(schema),
+ case mnesia_lib:exists(Dmp) of
+ true ->
+ %% May only happen when toggling of
+ %% schema storage type has been
+ %% interrupted
+ do_read_disc_schema(Dmp, Keep);
+ false ->
+ {error, "No schema file exists"}
+ end
+ end
+ end.
+
+do_read_disc_schema(Fname, Keep) ->
+ T =
+ case Keep of
+ false ->
+ Args = [{keypos, 2}, public, set],
+ ?ets_new_table(schema, Args);
+ true ->
+ Args = [{keypos, 2}, public, named_table, set],
+ mnesia_monitor:mktab(schema, Args)
+ end,
+ Repair = mnesia_monitor:get_env(auto_repair),
+ Res = % BUGBUG Fixa till dcl!
+ case mnesia_lib:dets_to_ets(schema, T, Fname, set, Repair, no) of
+ loaded -> {ok, disc, ?ets_lookup_element(T, schema, 3)};
+ Other -> {error, {"Cannot read schema", Fname, Other}}
+ end,
+ case Keep of
+ true -> ignore;
+ false -> ?ets_delete_table(T)
+ end,
+ Res.
+
+get_initial_schema(SchemaStorage, Nodes) ->
+ Cs = #cstruct{name = schema,
+ record_name = schema,
+ attributes = [table, cstruct]},
+ Cs2 =
+ case SchemaStorage of
+ ram_copies -> Cs#cstruct{ram_copies = Nodes};
+ disc_copies -> Cs#cstruct{disc_copies = Nodes}
+ end,
+ cs2list(Cs2).
+
+read_cstructs_from_disc() ->
+ %% Assumptions:
+ %% - local schema lock in global
+ %% - use_dir is true
+ %% - Mnesia is not running
+ %% - Ignore fallback
+
+ Fname = mnesia_lib:tab2dat(schema),
+ case mnesia_lib:exists(Fname) of
+ true ->
+ Args = [{file, Fname},
+ {keypos, 2},
+ {repair, mnesia_monitor:get_env(auto_repair)},
+ {type, set}],
+ case dets:open_file(make_ref(), Args) of
+ {ok, Tab} ->
+ Fun = fun({_, _, List}) ->
+ {continue, list2cs(List)}
+ end,
+ Cstructs = dets:traverse(Tab, Fun),
+ dets:close(Tab),
+ {ok, Cstructs};
+ {error, Reason} ->
+ {error, Reason}
+ end;
+ false ->
+ {error, "No schema file exists"}
+ end.
+
+%% We run a very special type of transactions when we
+%% we want to manipulate the schema.
+
+get_tid_ts_and_lock(Tab, Intent) ->
+ TidTs = get(mnesia_activity_state),
+ case TidTs of
+ {_Mod, Tid, Ts} when is_record(Ts, tidstore)->
+ Store = Ts#tidstore.store,
+ case Intent of
+ read -> mnesia_locker:rlock_table(Tid, Store, Tab);
+ write -> mnesia_locker:wlock_table(Tid, Store, Tab);
+ none -> ignore
+ end,
+ TidTs;
+ _ ->
+ mnesia:abort(no_transaction)
+ end.
+
+schema_transaction(Fun) ->
+ case get(mnesia_activity_state) of
+ undefined ->
+ Args = [self(), Fun, whereis(mnesia_controller)],
+ Pid = spawn_link(?MODULE, schema_coordinator, Args),
+ receive
+ {transaction_done, Res, Pid} -> Res;
+ {'EXIT', Pid, R} -> {aborted, {transaction_crashed, R}}
+ end;
+ _ ->
+ {aborted, nested_transaction}
+ end.
+
+%% This process may dump the transaction log, and should
+%% therefore not be run in an application process
+%%
+schema_coordinator(Client, _Fun, undefined) ->
+ Res = {aborted, {node_not_running, node()}},
+ Client ! {transaction_done, Res, self()},
+ unlink(Client);
+
+schema_coordinator(Client, Fun, Controller) when is_pid(Controller) ->
+ %% Do not trap exit in order to automatically die
+ %% when the controller dies
+
+ link(Controller),
+ unlink(Client),
+
+ %% Fulfull the transaction even if the client dies
+ Res = mnesia:transaction(Fun),
+ Client ! {transaction_done, Res, self()},
+ unlink(Controller), % Avoids spurious exit message
+ unlink(whereis(mnesia_tm)), % Avoids spurious exit message
+ exit(normal).
+
+%% The make* rotines return a list of ops, this function
+%% inserts em all in the Store and maintains the local order
+%% of ops.
+
+insert_schema_ops({_Mod, _Tid, Ts}, SchemaIOps) ->
+ do_insert_schema_ops(Ts#tidstore.store, SchemaIOps).
+
+do_insert_schema_ops(Store, [Head | Tail]) ->
+ ?ets_insert(Store, Head),
+ do_insert_schema_ops(Store, Tail);
+do_insert_schema_ops(_Store, []) ->
+ ok.
+
+cs2list(Cs) when is_record(Cs, cstruct) ->
+ Tags = record_info(fields, cstruct),
+ rec2list(Tags, 2, Cs);
+cs2list(CreateList) when is_list(CreateList) ->
+ CreateList.
+
+rec2list([Tag | Tags], Pos, Rec) ->
+ Val = element(Pos, Rec),
+ [{Tag, Val} | rec2list(Tags, Pos + 1, Rec)];
+rec2list([], _Pos, _Rec) ->
+ [].
+
+list2cs(List) when is_list(List) ->
+ Name = pick(unknown, name, List, must),
+ Type = pick(Name, type, List, set),
+ Rc0 = pick(Name, ram_copies, List, []),
+ Dc = pick(Name, disc_copies, List, []),
+ Doc = pick(Name, disc_only_copies, List, []),
+ Rc = case {Rc0, Dc, Doc} of
+ {[], [], []} -> [node()];
+ _ -> Rc0
+ end,
+ LC = pick(Name, local_content, List, false),
+ RecName = pick(Name, record_name, List, Name),
+ Attrs = pick(Name, attributes, List, [key, val]),
+ Snmp = pick(Name, snmp, List, []),
+ LoadOrder = pick(Name, load_order, List, 0),
+ AccessMode = pick(Name, access_mode, List, read_write),
+ UserProps = pick(Name, user_properties, List, []),
+ verify({alt, [nil, list]}, mnesia_lib:etype(UserProps),
+ {bad_type, Name, {user_properties, UserProps}}),
+ Cookie = pick(Name, cookie, List, ?unique_cookie),
+ Version = pick(Name, version, List, {{2, 0}, []}),
+ Ix = pick(Name, index, List, []),
+ verify({alt, [nil, list]}, mnesia_lib:etype(Ix),
+ {bad_type, Name, {index, [Ix]}}),
+ Ix2 = [attr_to_pos(I, Attrs) || I <- Ix],
+
+ Frag = pick(Name, frag_properties, List, []),
+ verify({alt, [nil, list]}, mnesia_lib:etype(Frag),
+ {badarg, Name, {frag_properties, Frag}}),
+
+ Keys = check_keys(Name, List, record_info(fields, cstruct)),
+ check_duplicates(Name, Keys),
+ #cstruct{name = Name,
+ ram_copies = Rc,
+ disc_copies = Dc,
+ disc_only_copies = Doc,
+ type = Type,
+ index = Ix2,
+ snmp = Snmp,
+ load_order = LoadOrder,
+ access_mode = AccessMode,
+ local_content = LC,
+ record_name = RecName,
+ attributes = Attrs,
+ user_properties = lists:sort(UserProps),
+ frag_properties = lists:sort(Frag),
+ cookie = Cookie,
+ version = Version};
+list2cs(Other) ->
+ mnesia:abort({badarg, Other}).
+
+pick(Tab, Key, List, Default) ->
+ case lists:keysearch(Key, 1, List) of
+ false when Default == must ->
+ mnesia:abort({badarg, Tab, "Missing key", Key, List});
+ false ->
+ Default;
+ {value, {Key, Value}} ->
+ Value;
+ {value, BadArg} ->
+ mnesia:abort({bad_type, Tab, BadArg})
+ end.
+
+%% Convert attribute name to integer if neccessary
+attr_tab_to_pos(_Tab, Pos) when is_integer(Pos) ->
+ Pos;
+attr_tab_to_pos(Tab, Attr) ->
+ attr_to_pos(Attr, val({Tab, attributes})).
+
+%% Convert attribute name to integer if neccessary
+attr_to_pos(Pos, _Attrs) when is_integer(Pos) ->
+ Pos;
+attr_to_pos(Attr, Attrs) when is_atom(Attr) ->
+ attr_to_pos(Attr, Attrs, 2);
+attr_to_pos(Attr, _) ->
+ mnesia:abort({bad_type, Attr}).
+
+attr_to_pos(Attr, [Attr | _Attrs], Pos) ->
+ Pos;
+attr_to_pos(Attr, [_ | Attrs], Pos) ->
+ attr_to_pos(Attr, Attrs, Pos + 1);
+attr_to_pos(Attr, _, _) ->
+ mnesia:abort({bad_type, Attr}).
+
+check_keys(Tab, [{Key, _Val} | Tail], Items) ->
+ case lists:member(Key, Items) of
+ true -> [Key | check_keys(Tab, Tail, Items)];
+ false -> mnesia:abort({badarg, Tab, Key})
+ end;
+check_keys(_, [], _) ->
+ [];
+check_keys(Tab, Arg, _) ->
+ mnesia:abort({badarg, Tab, Arg}).
+
+check_duplicates(Tab, Keys) ->
+ case has_duplicates(Keys) of
+ false -> ok;
+ true -> mnesia:abort({badarg, Tab, "Duplicate keys", Keys})
+ end.
+
+has_duplicates([H | T]) ->
+ case lists:member(H, T) of
+ true -> true;
+ false -> has_duplicates(T)
+ end;
+has_duplicates([]) ->
+ false.
+
+%% This is the only place where we check the validity of data
+verify_cstruct(Cs) when is_record(Cs, cstruct) ->
+ verify_nodes(Cs),
+
+ Tab = Cs#cstruct.name,
+ verify(atom, mnesia_lib:etype(Tab), {bad_type, Tab}),
+ Type = Cs#cstruct.type,
+ verify(true, lists:member(Type, [set, bag, ordered_set]),
+ {bad_type, Tab, {type, Type}}),
+
+ %% Currently ordered_set is not supported for disk_only_copies.
+ if
+ Type == ordered_set, Cs#cstruct.disc_only_copies /= [] ->
+ mnesia:abort({bad_type, Tab, {not_supported, Type, disc_only_copies}});
+ true ->
+ ok
+ end,
+
+ RecName = Cs#cstruct.record_name,
+ verify(atom, mnesia_lib:etype(RecName),
+ {bad_type, Tab, {record_name, RecName}}),
+
+ Attrs = Cs#cstruct.attributes,
+ verify(list, mnesia_lib:etype(Attrs),
+ {bad_type, Tab, {attributes, Attrs}}),
+
+ Arity = length(Attrs) + 1,
+ verify(true, Arity > 2, {bad_type, Tab, {attributes, Attrs}}),
+
+ lists:foldl(fun(Attr,_Other) when Attr == snmp ->
+ mnesia:abort({bad_type, Tab, {attributes, [Attr]}});
+ (Attr,Other) ->
+ verify(atom, mnesia_lib:etype(Attr),
+ {bad_type, Tab, {attributes, [Attr]}}),
+ verify(false, lists:member(Attr, Other),
+ {combine_error, Tab, {attributes, [Attr | Other]}}),
+ [Attr | Other]
+ end,
+ [],
+ Attrs),
+
+ Index = Cs#cstruct.index,
+ verify({alt, [nil, list]}, mnesia_lib:etype(Index),
+ {bad_type, Tab, {index, Index}}),
+
+ IxFun =
+ fun(Pos) ->
+ verify(true, fun() ->
+ if
+ is_integer(Pos),
+ Pos > 2,
+ Pos =< Arity ->
+ true;
+ true -> false
+ end
+ end,
+ {bad_type, Tab, {index, [Pos]}})
+ end,
+ lists:foreach(IxFun, Index),
+
+ LC = Cs#cstruct.local_content,
+ verify({alt, [true, false]}, LC,
+ {bad_type, Tab, {local_content, LC}}),
+ Access = Cs#cstruct.access_mode,
+ verify({alt, [read_write, read_only]}, Access,
+ {bad_type, Tab, {access_mode, Access}}),
+
+ Snmp = Cs#cstruct.snmp,
+ verify(true, mnesia_snmp_hook:check_ustruct(Snmp),
+ {badarg, Tab, {snmp, Snmp}}),
+
+ CheckProp = fun(Prop) when is_tuple(Prop), size(Prop) >= 1 -> ok;
+ (Prop) -> mnesia:abort({bad_type, Tab, {user_properties, [Prop]}})
+ end,
+ lists:foreach(CheckProp, Cs#cstruct.user_properties),
+
+ case Cs#cstruct.cookie of
+ {{MegaSecs, Secs, MicroSecs}, _Node}
+ when is_integer(MegaSecs), is_integer(Secs),
+ is_integer(MicroSecs), is_atom(node) ->
+ ok;
+ Cookie ->
+ mnesia:abort({bad_type, Tab, {cookie, Cookie}})
+ end,
+ case Cs#cstruct.version of
+ {{Major, Minor}, _Detail}
+ when is_integer(Major), is_integer(Minor) ->
+ ok;
+ Version ->
+ mnesia:abort({bad_type, Tab, {version, Version}})
+ end.
+
+verify_nodes(Cs) ->
+ Tab = Cs#cstruct.name,
+ Ram = Cs#cstruct.ram_copies,
+ Disc = Cs#cstruct.disc_copies,
+ DiscOnly = Cs#cstruct.disc_only_copies,
+ LoadOrder = Cs#cstruct.load_order,
+
+ verify({alt, [nil, list]}, mnesia_lib:etype(Ram),
+ {bad_type, Tab, {ram_copies, Ram}}),
+ verify({alt, [nil, list]}, mnesia_lib:etype(Disc),
+ {bad_type, Tab, {disc_copies, Disc}}),
+ case Tab of
+ schema ->
+ verify([], DiscOnly, {bad_type, Tab, {disc_only_copies, DiscOnly}});
+ _ ->
+ verify({alt, [nil, list]},
+ mnesia_lib:etype(DiscOnly),
+ {bad_type, Tab, {disc_only_copies, DiscOnly}})
+ end,
+ verify(integer, mnesia_lib:etype(LoadOrder),
+ {bad_type, Tab, {load_order, LoadOrder}}),
+
+ Nodes = Ram ++ Disc ++ DiscOnly,
+ verify(list, mnesia_lib:etype(Nodes),
+ {combine_error, Tab,
+ [{ram_copies, []}, {disc_copies, []}, {disc_only_copies, []}]}),
+ verify(false, has_duplicates(Nodes), {combine_error, Tab, Nodes}),
+ AtomCheck = fun(N) -> verify(atom, mnesia_lib:etype(N), {bad_type, Tab, N}) end,
+ lists:foreach(AtomCheck, Nodes).
+
+verify(Expected, Fun, Error) when is_function(Fun) ->
+ do_verify(Expected, catch Fun(), Error);
+verify(Expected, Actual, Error) ->
+ do_verify(Expected, Actual, Error).
+
+do_verify({alt, Values}, Value, Error) ->
+ case lists:member(Value, Values) of
+ true -> ok;
+ false -> mnesia:abort(Error)
+ end;
+do_verify(Value, Value, _) ->
+ ok;
+do_verify(_Value, _, Error) ->
+ mnesia:abort(Error).
+
+ensure_writable(Tab) ->
+ case val({Tab, where_to_write}) of
+ [] -> mnesia:abort({read_only, Tab});
+ _ -> ok
+ end.
+
+%% Ensure that all replicas on disk full nodes are active
+ensure_active(Cs) ->
+ ensure_active(Cs, active_replicas).
+
+ensure_active(Cs, What) ->
+ Tab = Cs#cstruct.name,
+ W = {Tab, What},
+ ensure_non_empty(W),
+ Nodes = mnesia_lib:intersect(val({schema, disc_copies}),
+ mnesia_lib:cs_to_nodes(Cs)),
+ case Nodes -- val(W) of
+ [] ->
+ ok;
+ Ns ->
+ Expl = "All replicas on diskfull nodes are not active yet",
+ case val({Tab, local_content}) of
+ true ->
+ case rpc:multicall(Ns, ?MODULE, is_remote_member, [W]) of
+ {Replies, []} ->
+ check_active(Replies, Expl, Tab);
+ {_Replies, BadNs} ->
+ mnesia:abort({not_active, Expl, Tab, BadNs})
+ end;
+ false ->
+ mnesia:abort({not_active, Expl, Tab, Ns})
+ end
+ end.
+
+ensure_non_empty({Tab, Vhat}) ->
+ case val({Tab, Vhat}) of
+ [] -> mnesia:abort({no_exists, Tab});
+ _ -> ok
+ end.
+
+ensure_not_active(Tab = schema, Node) ->
+ Active = val({Tab, active_replicas}),
+ case lists:member(Node, Active) of
+ false when Active =/= [] ->
+ ok;
+ false ->
+ mnesia:abort({no_exists, Tab});
+ true ->
+ Expl = "Mnesia is running",
+ mnesia:abort({active, Expl, Node})
+ end.
+
+is_remote_member(Key) ->
+ IsActive = lists:member(node(), val(Key)),
+ {IsActive, node()}.
+
+check_active([{true, _Node} | Replies], Expl, Tab) ->
+ check_active(Replies, Expl, Tab);
+check_active([{false, Node} | _Replies], Expl, Tab) ->
+ mnesia:abort({not_active, Expl, Tab, [Node]});
+check_active([{badrpc, Reason} | _Replies], Expl, Tab) ->
+ mnesia:abort({not_active, Expl, Tab, Reason});
+check_active([], _Expl, _Tab) ->
+ ok.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Here's the real interface function to create a table
+
+create_table(TabDef) ->
+ schema_transaction(fun() -> do_multi_create_table(TabDef) end).
+
+%% And the corresponding do routines ....
+
+do_multi_create_table(TabDef) ->
+ get_tid_ts_and_lock(schema, write),
+ ensure_writable(schema),
+ Cs = list2cs(TabDef),
+ case Cs#cstruct.frag_properties of
+ [] ->
+ do_create_table(Cs);
+ _Props ->
+ CsList = mnesia_frag:expand_cstruct(Cs),
+ lists:foreach(fun do_create_table/1, CsList)
+ end,
+ ok.
+
+do_create_table(Cs) ->
+ {_Mod, _Tid, Ts} = get_tid_ts_and_lock(schema, none),
+ Store = Ts#tidstore.store,
+ do_insert_schema_ops(Store, make_create_table(Cs)).
+
+make_create_table(Cs) ->
+ Tab = Cs#cstruct.name,
+ verify(false, check_if_exists(Tab), {already_exists, Tab}),
+ unsafe_make_create_table(Cs).
+
+% unsafe_do_create_table(Cs) ->
+% {_Mod, Tid, Ts} = get_tid_ts_and_lock(schema, none),
+% Store = Ts#tidstore.store,
+% do_insert_schema_ops(Store, unsafe_make_create_table(Cs)).
+
+unsafe_make_create_table(Cs) ->
+ {_Mod, Tid, Ts} = get_tid_ts_and_lock(schema, none),
+ verify_cstruct(Cs),
+ Tab = Cs#cstruct.name,
+
+ %% Check that we have all disc replica nodes running
+ DiscNodes = Cs#cstruct.disc_copies ++ Cs#cstruct.disc_only_copies,
+ RunningNodes = val({current, db_nodes}),
+ CheckDisc = fun(N) ->
+ verify(true, lists:member(N, RunningNodes),
+ {not_active, Tab, N})
+ end,
+ lists:foreach(CheckDisc, DiscNodes),
+
+ Nodes = mnesia_lib:intersect(mnesia_lib:cs_to_nodes(Cs), RunningNodes),
+ Store = Ts#tidstore.store,
+ mnesia_locker:wlock_no_exist(Tid, Store, Tab, Nodes),
+ [{op, create_table, cs2list(Cs)}].
+
+check_if_exists(Tab) ->
+ TidTs = get_tid_ts_and_lock(schema, write),
+ {_, _, Ts} = TidTs,
+ Store = Ts#tidstore.store,
+ ets:foldl(
+ fun({op, create_table, [{name, T}|_]}, _Acc) when T==Tab ->
+ true;
+ ({op, delete_table, [{name,T}|_]}, _Acc) when T==Tab ->
+ false;
+ (_Other, Acc) ->
+ Acc
+ end, existed_before(Tab), Store).
+
+existed_before(Tab) ->
+ ('EXIT' =/= element(1, ?catch_val({Tab,cstruct}))).
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Delete a table entirely on all nodes.
+
+delete_table(Tab) ->
+ schema_transaction(fun() -> do_delete_table(Tab) end).
+
+do_delete_table(schema) ->
+ mnesia:abort({bad_type, schema});
+do_delete_table(Tab) ->
+ TidTs = get_tid_ts_and_lock(schema, write),
+ ensure_writable(schema),
+ insert_schema_ops(TidTs, make_delete_table(Tab, whole_table)).
+
+make_delete_table(Tab, Mode) ->
+ case existed_before(Tab) of
+ false ->
+ %% Deleting a table that was created in this very
+ %% schema transaction. Delete all ops in the Store
+ %% that operate on this table. We cannot run a normal
+ %% delete operation, since that involves checking live
+ %% nodes etc.
+ TidTs = get_tid_ts_and_lock(schema, write),
+ {_, _, Ts} = TidTs,
+ Store = Ts#tidstore.store,
+ Deleted = ets:select_delete(
+ Store, [{{op,'$1',[{name,Tab}|'_']},
+ [{'or',
+ {'==','$1',create_table},
+ {'==','$1',delete_table}}], [true]}]),
+ ets:select_delete(
+ Store, [{{op,'$1',[{name,Tab}|'_'],'_'},
+ [{'or',
+ {'==','$1',write_table_property},
+ {'==','$1',delete_table_property}}],
+ [true]}]),
+ case Deleted of
+ 0 -> mnesia:abort({no_exists, Tab});
+ _ -> []
+ end;
+ true ->
+ case Mode of
+ whole_table ->
+ case val({Tab, frag_properties}) of
+ [] ->
+ [make_delete_table2(Tab)];
+ _Props ->
+ %% Check if it is a base table
+ mnesia_frag:lookup_frag_hash(Tab),
+
+ %% Check for foreigners
+ F = mnesia_frag:lookup_foreigners(Tab),
+ verify([], F, {combine_error,
+ Tab, "Too many foreigners", F}),
+ [make_delete_table2(T) ||
+ T <- mnesia_frag:frag_names(Tab)]
+ end;
+ single_frag ->
+ [make_delete_table2(Tab)]
+ end
+ end.
+
+make_delete_table2(Tab) ->
+ get_tid_ts_and_lock(Tab, write),
+ Cs = val({Tab, cstruct}),
+ ensure_active(Cs),
+ ensure_writable(Tab),
+ {op, delete_table, cs2list(Cs)}.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Change fragmentation of a table
+
+change_table_frag(Tab, Change) ->
+ schema_transaction(fun() -> do_change_table_frag(Tab, Change) end).
+
+do_change_table_frag(Tab, Change) when is_atom(Tab), Tab /= schema ->
+ TidTs = get_tid_ts_and_lock(schema, write),
+ Ops = mnesia_frag:change_table_frag(Tab, Change),
+ [insert_schema_ops(TidTs, Op) || Op <- Ops],
+ ok;
+do_change_table_frag(Tab, _Change) ->
+ mnesia:abort({bad_type, Tab}).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Clear a table
+
+%% No need for a schema transaction
+clear_table(Tab) ->
+ schema_transaction(fun() -> do_clear_table(Tab) end).
+
+do_clear_table(schema) ->
+ mnesia:abort({bad_type, schema});
+do_clear_table(Tab) ->
+ TidTs = get_tid_ts_and_lock(schema, write),
+ get_tid_ts_and_lock(Tab, write),
+ insert_schema_ops(TidTs, make_clear_table(Tab)).
+
+make_clear_table(Tab) ->
+ Cs = val({Tab, cstruct}),
+ ensure_writable(Tab),
+ [{op, clear_table, cs2list(Cs)}].
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+add_table_copy(Tab, Node, Storage) ->
+ schema_transaction(fun() -> do_add_table_copy(Tab, Node, Storage) end).
+
+do_add_table_copy(Tab, Node, Storage) when is_atom(Tab), is_atom(Node) ->
+ TidTs = get_tid_ts_and_lock(schema, write),
+ insert_schema_ops(TidTs, make_add_table_copy(Tab, Node, Storage));
+do_add_table_copy(Tab,Node,_) ->
+ mnesia:abort({badarg, Tab, Node}).
+
+make_add_table_copy(Tab, Node, Storage) ->
+ ensure_writable(schema),
+ Cs = incr_version(val({Tab, cstruct})),
+ Ns = mnesia_lib:cs_to_nodes(Cs),
+ verify(false, lists:member(Node, Ns), {already_exists, Tab, Node}),
+ Cs2 = new_cs(Cs, Node, Storage, add),
+ verify_cstruct(Cs2),
+
+ %% Check storage and if node is running
+ IsRunning = lists:member(Node, val({current, db_nodes})),
+ if
+ Tab == schema ->
+ if
+ Storage /= ram_copies ->
+ mnesia:abort({badarg, Tab, Storage});
+ IsRunning == true ->
+ mnesia:abort({already_exists, Tab, Node});
+ true ->
+ ignore
+ end;
+ Storage == ram_copies ->
+ ignore;
+ IsRunning == true ->
+ ignore;
+ IsRunning == false ->
+ mnesia:abort({not_active, schema, Node})
+ end,
+ [{op, add_table_copy, Storage, Node, cs2list(Cs2)}].
+
+del_table_copy(Tab, Node) ->
+ schema_transaction(fun() -> do_del_table_copy(Tab, Node) end).
+
+do_del_table_copy(Tab, Node) when is_atom(Node) ->
+ TidTs = get_tid_ts_and_lock(schema, write),
+%% get_tid_ts_and_lock(Tab, write),
+ insert_schema_ops(TidTs, make_del_table_copy(Tab, Node));
+do_del_table_copy(Tab, Node) ->
+ mnesia:abort({badarg, Tab, Node}).
+
+make_del_table_copy(Tab, Node) ->
+ ensure_writable(schema),
+ Cs = incr_version(val({Tab, cstruct})),
+ Storage = mnesia_lib:schema_cs_to_storage_type(Node, Cs),
+ Cs2 = new_cs(Cs, Node, Storage, del),
+ case mnesia_lib:cs_to_nodes(Cs2) of
+ [] when Tab == schema ->
+ mnesia:abort({combine_error, Tab, "Last replica"});
+ [] ->
+ ensure_active(Cs),
+ dbg_out("Last replica deleted in table ~p~n", [Tab]),
+ make_delete_table(Tab, whole_table);
+ _ when Tab == schema ->
+ %% ensure_active(Cs2),
+ ensure_not_active(Tab, Node),
+ verify_cstruct(Cs2),
+ Ops = remove_node_from_tabs(val({schema, tables}), Node),
+ [{op, del_table_copy, ram_copies, Node, cs2list(Cs2)} | Ops];
+ _ ->
+ ensure_active(Cs),
+ verify_cstruct(Cs2),
+ [{op, del_table_copy, Storage, Node, cs2list(Cs2)}]
+ end.
+
+remove_node_from_tabs([], _Node) ->
+ [];
+remove_node_from_tabs([schema|Rest], Node) ->
+ remove_node_from_tabs(Rest, Node);
+remove_node_from_tabs([Tab|Rest], Node) ->
+ {Cs, IsFragModified} =
+ mnesia_frag:remove_node(Node, incr_version(val({Tab, cstruct}))),
+ case mnesia_lib:schema_cs_to_storage_type(Node, Cs) of
+ unknown ->
+ case IsFragModified of
+ true ->
+ [{op, change_table_frag, {del_node, Node}, cs2list(Cs)} |
+ remove_node_from_tabs(Rest, Node)];
+ false ->
+ remove_node_from_tabs(Rest, Node)
+ end;
+ Storage ->
+ Cs2 = new_cs(Cs, Node, Storage, del),
+ case mnesia_lib:cs_to_nodes(Cs2) of
+ [] ->
+ [{op, delete_table, cs2list(Cs)} |
+ remove_node_from_tabs(Rest, Node)];
+ _Ns ->
+ verify_cstruct(Cs2),
+ [{op, del_table_copy, ram_copies, Node, cs2list(Cs2)}|
+ remove_node_from_tabs(Rest, Node)]
+ end
+ end.
+
+new_cs(Cs, Node, ram_copies, add) ->
+ Cs#cstruct{ram_copies = opt_add(Node, Cs#cstruct.ram_copies)};
+new_cs(Cs, Node, disc_copies, add) ->
+ Cs#cstruct{disc_copies = opt_add(Node, Cs#cstruct.disc_copies)};
+new_cs(Cs, Node, disc_only_copies, add) ->
+ Cs#cstruct{disc_only_copies = opt_add(Node, Cs#cstruct.disc_only_copies)};
+new_cs(Cs, Node, ram_copies, del) ->
+ Cs#cstruct{ram_copies = lists:delete(Node , Cs#cstruct.ram_copies)};
+new_cs(Cs, Node, disc_copies, del) ->
+ Cs#cstruct{disc_copies = lists:delete(Node , Cs#cstruct.disc_copies)};
+new_cs(Cs, Node, disc_only_copies, del) ->
+ Cs#cstruct{disc_only_copies =
+ lists:delete(Node , Cs#cstruct.disc_only_copies)};
+new_cs(Cs, _Node, Storage, _Op) ->
+ mnesia:abort({badarg, Cs#cstruct.name, Storage}).
+
+
+opt_add(N, L) -> [N | lists:delete(N, L)].
+
+move_table(Tab, FromNode, ToNode) ->
+ schema_transaction(fun() -> do_move_table(Tab, FromNode, ToNode) end).
+
+do_move_table(schema, _FromNode, _ToNode) ->
+ mnesia:abort({bad_type, schema});
+do_move_table(Tab, FromNode, ToNode) when is_atom(FromNode), is_atom(ToNode) ->
+ TidTs = get_tid_ts_and_lock(schema, write),
+ insert_schema_ops(TidTs, make_move_table(Tab, FromNode, ToNode));
+do_move_table(Tab, FromNode, ToNode) ->
+ mnesia:abort({badarg, Tab, FromNode, ToNode}).
+
+make_move_table(Tab, FromNode, ToNode) ->
+ ensure_writable(schema),
+ Cs = incr_version(val({Tab, cstruct})),
+ Ns = mnesia_lib:cs_to_nodes(Cs),
+ verify(false, lists:member(ToNode, Ns), {already_exists, Tab, ToNode}),
+ verify(true, lists:member(FromNode, val({Tab, where_to_write})),
+ {not_active, Tab, FromNode}),
+ verify(false, val({Tab,local_content}),
+ {"Cannot move table with local content", Tab}),
+ ensure_active(Cs),
+ Running = val({current, db_nodes}),
+ Storage = mnesia_lib:schema_cs_to_storage_type(FromNode, Cs),
+ verify(true, lists:member(ToNode, Running), {not_active, schema, ToNode}),
+
+ Cs2 = new_cs(Cs, ToNode, Storage, add),
+ Cs3 = new_cs(Cs2, FromNode, Storage, del),
+ verify_cstruct(Cs3),
+ [{op, add_table_copy, Storage, ToNode, cs2list(Cs2)},
+ {op, sync_trans},
+ {op, del_table_copy, Storage, FromNode, cs2list(Cs3)}].
+
+%% end of functions to add and delete nodes to tables
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%
+
+change_table_copy_type(Tab, Node, ToS) ->
+ schema_transaction(fun() -> do_change_table_copy_type(Tab, Node, ToS) end).
+
+do_change_table_copy_type(Tab, Node, ToS) when is_atom(Node) ->
+ TidTs = get_tid_ts_and_lock(schema, write),
+ get_tid_ts_and_lock(Tab, write), % ensure global sync
+ %% get_tid_ts_and_lock(Tab, read),
+ insert_schema_ops(TidTs, make_change_table_copy_type(Tab, Node, ToS));
+do_change_table_copy_type(Tab, Node, _ToS) ->
+ mnesia:abort({badarg, Tab, Node}).
+
+make_change_table_copy_type(Tab, Node, unknown) ->
+ make_del_table_copy(Tab, Node);
+make_change_table_copy_type(Tab, Node, ToS) ->
+ ensure_writable(schema),
+ Cs = incr_version(val({Tab, cstruct})),
+ FromS = mnesia_lib:storage_type_at_node(Node, Tab),
+
+ case compare_storage_type(false, FromS, ToS) of
+ {same, _} ->
+ mnesia:abort({already_exists, Tab, Node, ToS});
+ {diff, _} ->
+ ignore;
+ incompatible ->
+ ensure_active(Cs)
+ end,
+
+ Cs2 = new_cs(Cs, Node, FromS, del),
+ Cs3 = new_cs(Cs2, Node, ToS, add),
+ verify_cstruct(Cs3),
+
+ [{op, change_table_copy_type, Node, FromS, ToS, cs2list(Cs3)}].
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% change index functions ....
+%% Pos is allready added by 1 in both of these functions
+
+add_table_index(Tab, Pos) ->
+ schema_transaction(fun() -> do_add_table_index(Tab, Pos) end).
+
+do_add_table_index(schema, _Attr) ->
+ mnesia:abort({bad_type, schema});
+do_add_table_index(Tab, Attr) ->
+ TidTs = get_tid_ts_and_lock(schema, write),
+ get_tid_ts_and_lock(Tab, read),
+ Pos = attr_tab_to_pos(Tab, Attr),
+ insert_schema_ops(TidTs, make_add_table_index(Tab, Pos)).
+
+make_add_table_index(Tab, Pos) ->
+ ensure_writable(schema),
+ Cs = incr_version(val({Tab, cstruct})),
+ ensure_active(Cs),
+ Ix = Cs#cstruct.index,
+ verify(false, lists:member(Pos, Ix), {already_exists, Tab, Pos}),
+ Ix2 = lists:sort([Pos | Ix]),
+ Cs2 = Cs#cstruct{index = Ix2},
+ verify_cstruct(Cs2),
+ [{op, add_index, Pos, cs2list(Cs2)}].
+
+del_table_index(Tab, Pos) ->
+ schema_transaction(fun() -> do_del_table_index(Tab, Pos) end).
+
+do_del_table_index(schema, _Attr) ->
+ mnesia:abort({bad_type, schema});
+do_del_table_index(Tab, Attr) ->
+ TidTs = get_tid_ts_and_lock(schema, write),
+ get_tid_ts_and_lock(Tab, read),
+ Pos = attr_tab_to_pos(Tab, Attr),
+ insert_schema_ops(TidTs, make_del_table_index(Tab, Pos)).
+
+make_del_table_index(Tab, Pos) ->
+ ensure_writable(schema),
+ Cs = incr_version(val({Tab, cstruct})),
+ ensure_active(Cs),
+ Ix = Cs#cstruct.index,
+ verify(true, lists:member(Pos, Ix), {no_exists, Tab, Pos}),
+ Cs2 = Cs#cstruct{index = lists:delete(Pos, Ix)},
+ verify_cstruct(Cs2),
+ [{op, del_index, Pos, cs2list(Cs2)}].
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+add_snmp(Tab, Ustruct) ->
+ schema_transaction(fun() -> do_add_snmp(Tab, Ustruct) end).
+
+do_add_snmp(schema, _Ustruct) ->
+ mnesia:abort({bad_type, schema});
+do_add_snmp(Tab, Ustruct) ->
+ TidTs = get_tid_ts_and_lock(schema, write),
+ get_tid_ts_and_lock(Tab, read),
+ insert_schema_ops(TidTs, make_add_snmp(Tab, Ustruct)).
+
+make_add_snmp(Tab, Ustruct) ->
+ ensure_writable(schema),
+ Cs = incr_version(val({Tab, cstruct})),
+ ensure_active(Cs),
+ verify([], Cs#cstruct.snmp, {already_exists, Tab, snmp}),
+ Error = {badarg, Tab, snmp, Ustruct},
+ verify(true, mnesia_snmp_hook:check_ustruct(Ustruct), Error),
+ Cs2 = Cs#cstruct{snmp = Ustruct},
+ verify_cstruct(Cs2),
+ [{op, add_snmp, Ustruct, cs2list(Cs2)}].
+
+del_snmp(Tab) ->
+ schema_transaction(fun() -> do_del_snmp(Tab) end).
+
+do_del_snmp(schema) ->
+ mnesia:abort({bad_type, schema});
+do_del_snmp(Tab) ->
+ TidTs = get_tid_ts_and_lock(schema, write),
+ get_tid_ts_and_lock(Tab, read),
+ insert_schema_ops(TidTs, make_del_snmp(Tab)).
+
+make_del_snmp(Tab) ->
+ ensure_writable(schema),
+ Cs = incr_version(val({Tab, cstruct})),
+ ensure_active(Cs),
+ Cs2 = Cs#cstruct{snmp = []},
+ verify_cstruct(Cs2),
+ [{op, del_snmp, cs2list(Cs2)}].
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%
+
+transform_table(Tab, Fun, NewAttrs, NewRecName)
+ when is_function(Fun), is_list(NewAttrs), is_atom(NewRecName) ->
+ schema_transaction(fun() -> do_transform_table(Tab, Fun, NewAttrs, NewRecName) end);
+
+transform_table(Tab, ignore, NewAttrs, NewRecName)
+ when is_list(NewAttrs), is_atom(NewRecName) ->
+ schema_transaction(fun() -> do_transform_table(Tab, ignore, NewAttrs, NewRecName) end);
+
+transform_table(Tab, Fun, NewAttrs, NewRecName) ->
+ {aborted,{bad_type, Tab, Fun, NewAttrs, NewRecName}}.
+
+do_transform_table(schema, _Fun, _NewAttrs, _NewRecName) ->
+ mnesia:abort({bad_type, schema});
+do_transform_table(Tab, Fun, NewAttrs, NewRecName) ->
+ TidTs = get_tid_ts_and_lock(schema, write),
+ get_tid_ts_and_lock(Tab, write),
+ insert_schema_ops(TidTs, make_transform(Tab, Fun, NewAttrs, NewRecName)).
+
+make_transform(Tab, Fun, NewAttrs, NewRecName) ->
+ ensure_writable(schema),
+ Cs = incr_version(val({Tab, cstruct})),
+ ensure_active(Cs),
+ ensure_writable(Tab),
+ case mnesia_lib:val({Tab, index}) of
+ [] ->
+ Cs2 = Cs#cstruct{attributes = NewAttrs, record_name = NewRecName},
+ verify_cstruct(Cs2),
+ [{op, transform, Fun, cs2list(Cs2)}];
+ PosList ->
+ DelIdx = fun(Pos, Ncs) ->
+ Ix = Ncs#cstruct.index,
+ Ncs1 = Ncs#cstruct{index = lists:delete(Pos, Ix)},
+ Op = {op, del_index, Pos, cs2list(Ncs1)},
+ {Op, Ncs1}
+ end,
+ AddIdx = fun(Pos, Ncs) ->
+ Ix = Ncs#cstruct.index,
+ Ix2 = lists:sort([Pos | Ix]),
+ Ncs1 = Ncs#cstruct{index = Ix2},
+ Op = {op, add_index, Pos, cs2list(Ncs1)},
+ {Op, Ncs1}
+ end,
+ {DelOps, Cs1} = lists:mapfoldl(DelIdx, Cs, PosList),
+ Cs2 = Cs1#cstruct{attributes = NewAttrs, record_name = NewRecName},
+ {AddOps, Cs3} = lists:mapfoldl(AddIdx, Cs2, PosList),
+ verify_cstruct(Cs3),
+ lists:flatten([DelOps, {op, transform, Fun, cs2list(Cs2)}, AddOps])
+ end.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%
+
+change_table_access_mode(Tab, Mode) ->
+ schema_transaction(fun() -> do_change_table_access_mode(Tab, Mode) end).
+
+do_change_table_access_mode(Tab, Mode) ->
+ {_Mod, Tid, Ts} = get_tid_ts_and_lock(schema, write),
+ Store = Ts#tidstore.store,
+ mnesia_locker:wlock_no_exist(Tid, Store, schema, val({schema, active_replicas})),
+ mnesia_locker:wlock_no_exist(Tid, Store, Tab, val({Tab, active_replicas})),
+ do_insert_schema_ops(Store, make_change_table_access_mode(Tab, Mode)).
+
+make_change_table_access_mode(Tab, Mode) ->
+ ensure_writable(schema),
+ Cs = incr_version(val({Tab, cstruct})),
+ ensure_active(Cs),
+ OldMode = Cs#cstruct.access_mode,
+ verify(false, OldMode == Mode, {already_exists, Tab, Mode}),
+ Cs2 = Cs#cstruct{access_mode = Mode},
+ verify_cstruct(Cs2),
+ [{op, change_table_access_mode, cs2list(Cs2), OldMode, Mode}].
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+change_table_load_order(Tab, LoadOrder) ->
+ schema_transaction(fun() -> do_change_table_load_order(Tab, LoadOrder) end).
+
+do_change_table_load_order(schema, _LoadOrder) ->
+ mnesia:abort({bad_type, schema});
+do_change_table_load_order(Tab, LoadOrder) ->
+ TidTs = get_tid_ts_and_lock(schema, write),
+ get_tid_ts_and_lock(Tab, none),
+ insert_schema_ops(TidTs, make_change_table_load_order(Tab, LoadOrder)).
+
+make_change_table_load_order(Tab, LoadOrder) ->
+ ensure_writable(schema),
+ Cs = incr_version(val({Tab, cstruct})),
+ ensure_active(Cs),
+ OldLoadOrder = Cs#cstruct.load_order,
+ Cs2 = Cs#cstruct{load_order = LoadOrder},
+ verify_cstruct(Cs2),
+ [{op, change_table_load_order, cs2list(Cs2), OldLoadOrder, LoadOrder}].
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+write_table_property(Tab, Prop) when is_tuple(Prop), size(Prop) >= 1 ->
+ schema_transaction(fun() -> do_write_table_property(Tab, Prop) end);
+write_table_property(Tab, Prop) ->
+ {aborted, {bad_type, Tab, Prop}}.
+do_write_table_property(Tab, Prop) ->
+ TidTs = get_tid_ts_and_lock(schema, write),
+ {_, _, Ts} = TidTs,
+ Store = Ts#tidstore.store,
+ case change_prop_in_existing_op(Tab, Prop, write_property, Store) of
+ true ->
+ dbg_out("change_prop_in_existing_op"
+ "(~p,~p,write_property,Store) -> true~n",
+ [Tab,Prop]),
+ %% we have merged the table prop into the create_table op
+ ok;
+ false ->
+ dbg_out("change_prop_in_existing_op"
+ "(~p,~p,write_property,Store) -> false~n",
+ [Tab,Prop]),
+ %% this must be an existing table
+ get_tid_ts_and_lock(Tab, none),
+ insert_schema_ops(TidTs, make_write_table_properties(Tab, [Prop]))
+ end.
+
+make_write_table_properties(Tab, Props) ->
+ ensure_writable(schema),
+ Cs = incr_version(val({Tab, cstruct})),
+ ensure_active(Cs),
+ make_write_table_properties(Tab, Props, Cs).
+
+make_write_table_properties(Tab, [Prop | Props], Cs) ->
+ OldProps = Cs#cstruct.user_properties,
+ PropKey = element(1, Prop),
+ DelProps = lists:keydelete(PropKey, 1, OldProps),
+ MergedProps = lists:merge(DelProps, [Prop]),
+ Cs2 = Cs#cstruct{user_properties = MergedProps},
+ verify_cstruct(Cs2),
+ [{op, write_property, cs2list(Cs2), Prop} |
+ make_write_table_properties(Tab, Props, Cs2)];
+make_write_table_properties(_Tab, [], _Cs) ->
+ [].
+
+change_prop_in_existing_op(Tab, Prop, How, Store) ->
+ Ops = ets:match_object(Store, '_'),
+ case update_existing_op(Ops, Tab, Prop, How, []) of
+ {true, Ops1} ->
+ ets:match_delete(Store, '_'),
+ [ets:insert(Store, Op) || Op <- Ops1],
+ true;
+ false ->
+ false
+ end.
+
+update_existing_op([{op, Op, L = [{name,Tab}|_], _OldProp}|Ops],
+ Tab, Prop, How, Acc) when Op == write_property;
+ Op == delete_property ->
+ %% Apparently, mnesia_dumper doesn't care about OldProp here -- just L,
+ %% so we will throw away OldProp (not that it matters...) and insert Prop.
+ %% as element 3.
+ L1 = insert_prop(Prop, L, How),
+ NewOp = {op, How, L1, Prop},
+ {true, lists:reverse(Acc) ++ [NewOp|Ops]};
+update_existing_op([Op = {op, create_table, L}|Ops], Tab, Prop, How, Acc) ->
+ case lists:keysearch(name, 1, L) of
+ {value, {_, Tab}} ->
+ %% Tab is being created here -- insert Prop into L
+ L1 = insert_prop(Prop, L, How),
+ {true, lists:reverse(Acc) ++ [{op, create_table, L1}|Ops]};
+ _ ->
+ update_existing_op(Ops, Tab, Prop, How, [Op|Acc])
+ end;
+update_existing_op([Op|Ops], Tab, Prop, How, Acc) ->
+ update_existing_op(Ops, Tab, Prop, How, [Op|Acc]);
+update_existing_op([], _, _, _, _) ->
+ false.
+
+do_read_table_property(Tab, Key) ->
+ TidTs = get_tid_ts_and_lock(schema, read),
+ {_, _, Ts} = TidTs,
+ Store = Ts#tidstore.store,
+ Props = ets:foldl(
+ fun({op, create_table, [{name, T}|Opts]}, _Acc)
+ when T==Tab ->
+ find_props(Opts);
+ ({op, Op, [{name,T}|Opts], _Prop}, _Acc)
+ when T==Tab, Op==write_property; Op==delete_property ->
+ find_props(Opts);
+ ({op, delete_table, [{name,T}|_]}, _Acc)
+ when T==Tab ->
+ [];
+ (_Other, Acc) ->
+ Acc
+ end, [], Store),
+ case lists:keysearch(Key, 1, Props) of
+ {value, Property} ->
+ Property;
+ false ->
+ undefined
+ end.
+
+
+%% perhaps a misnomer. How could also be delete_property... never mind.
+%% Returns the modified L.
+insert_prop(Prop, L, How) ->
+ Prev = find_props(L),
+ MergedProps = merge_with_previous(How, Prop, Prev),
+ replace_props(L, MergedProps).
+
+find_props([{user_properties, P}|_]) -> P;
+find_props([_H|T]) -> find_props(T).
+%% we shouldn't reach []
+
+replace_props([{user_properties, _}|T], P) -> [{user_properties, P}|T];
+replace_props([H|T], P) -> [H|replace_props(T, P)].
+%% again, we shouldn't reach []
+
+merge_with_previous(write_property, Prop, Prev) ->
+ Key = element(1, Prop),
+ Prev1 = lists:keydelete(Key, 1, Prev),
+ lists:sort([Prop|Prev1]);
+merge_with_previous(delete_property, PropKey, Prev) ->
+ lists:keydelete(PropKey, 1, Prev).
+
+delete_table_property(Tab, PropKey) ->
+ schema_transaction(fun() -> do_delete_table_property(Tab, PropKey) end).
+
+do_delete_table_property(Tab, PropKey) ->
+ TidTs = get_tid_ts_and_lock(schema, write),
+ {_, _, Ts} = TidTs,
+ Store = Ts#tidstore.store,
+ case change_prop_in_existing_op(Tab, PropKey, delete_property, Store) of
+ true ->
+ dbg_out("change_prop_in_existing_op"
+ "(~p,~p,delete_property,Store) -> true~n",
+ [Tab,PropKey]),
+ %% we have merged the table prop into the create_table op
+ ok;
+ false ->
+ dbg_out("change_prop_in_existing_op"
+ "(~p,~p,delete_property,Store) -> false~n",
+ [Tab,PropKey]),
+ %% this must be an existing table
+ get_tid_ts_and_lock(Tab, none),
+ insert_schema_ops(TidTs,
+ make_delete_table_properties(Tab, [PropKey]))
+ end.
+
+make_delete_table_properties(Tab, PropKeys) ->
+ ensure_writable(schema),
+ Cs = incr_version(val({Tab, cstruct})),
+ ensure_active(Cs),
+ make_delete_table_properties(Tab, PropKeys, Cs).
+
+make_delete_table_properties(Tab, [PropKey | PropKeys], Cs) ->
+ OldProps = Cs#cstruct.user_properties,
+ Props = lists:keydelete(PropKey, 1, OldProps),
+ Cs2 = Cs#cstruct{user_properties = Props},
+ verify_cstruct(Cs2),
+ [{op, delete_property, cs2list(Cs2), PropKey} |
+ make_delete_table_properties(Tab, PropKeys, Cs2)];
+make_delete_table_properties(_Tab, [], _Cs) ->
+ [].
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+%% Ensure that the transaction can be committed even
+%% if the node crashes and Mnesia is restarted
+prepare_commit(Tid, Commit, WaitFor) ->
+ case Commit#commit.schema_ops of
+ [] ->
+ {false, Commit, optional};
+ OrigOps ->
+ {Modified, Ops, DumperMode} =
+ prepare_ops(Tid, OrigOps, WaitFor, false, [], optional),
+ InitBy = schema_prepare,
+ GoodRes = {Modified,
+ Commit#commit{schema_ops = lists:reverse(Ops)},
+ DumperMode},
+ case DumperMode of
+ optional ->
+ dbg_out("Transaction log dump skipped (~p): ~w~n",
+ [DumperMode, InitBy]);
+ mandatory ->
+ case mnesia_controller:sync_dump_log(InitBy) of
+ dumped ->
+ GoodRes;
+ {error, Reason} ->
+ mnesia:abort(Reason)
+ end
+ end,
+ case Ops of
+ [] ->
+ ignore;
+ _ ->
+ %% We need to grab a dumper lock here, the log may not
+ %% be dumped by others, during the schema commit phase.
+ mnesia_controller:wait_for_schema_commit_lock()
+ end,
+ GoodRes
+ end.
+
+prepare_ops(Tid, [Op | Ops], WaitFor, Changed, Acc, DumperMode) ->
+ case prepare_op(Tid, Op, WaitFor) of
+ {true, mandatory} ->
+ prepare_ops(Tid, Ops, WaitFor, Changed, [Op | Acc], mandatory);
+ {true, optional} ->
+ prepare_ops(Tid, Ops, WaitFor, Changed, [Op | Acc], DumperMode);
+ {true, Ops2, mandatory} ->
+ prepare_ops(Tid, Ops, WaitFor, true, Ops2 ++ Acc, mandatory);
+ {true, Ops2, optional} ->
+ prepare_ops(Tid, Ops, WaitFor, true, Ops2 ++ Acc, DumperMode);
+ {false, optional} ->
+ prepare_ops(Tid, Ops, WaitFor, true, Acc, DumperMode)
+ end;
+prepare_ops(_Tid, [], _WaitFor, Changed, Acc, DumperMode) ->
+ {Changed, Acc, DumperMode}.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Prepare for commit
+%% returns true if Op should be included, i.e. unmodified
+%% {true, Operation} if NewRecs should be included, i.e. modified
+%% false if Op should NOT be included, i.e. modified
+%%
+prepare_op(_Tid, {op, rec, unknown, Rec}, _WaitFor) ->
+ {{Tab, Key}, Items, _Op} = Rec,
+ case val({Tab, storage_type}) of
+ unknown ->
+ {false, optional};
+ Storage ->
+ mnesia_tm:prepare_snmp(Tab, Key, Items), % May exit
+ {true, [{op, rec, Storage, Rec}], optional}
+ end;
+
+prepare_op(_Tid, {op, announce_im_running, Node, SchemaDef, Running, RemoteRunning}, _WaitFor) ->
+ SchemaCs = list2cs(SchemaDef),
+ if
+ Node == node() -> %% Announce has already run on local node
+ ignore; %% from do_merge_schema
+ true ->
+ NewNodes = mnesia_lib:uniq(Running++RemoteRunning) -- val({current,db_nodes}),
+ mnesia_lib:set(prepare_op, {announce_im_running,NewNodes}),
+ announce_im_running(NewNodes, SchemaCs)
+ end,
+ {false, optional};
+
+prepare_op(_Tid, {op, sync_trans}, {part, CoordPid}) ->
+ CoordPid ! {sync_trans, self()},
+ receive
+ {sync_trans, CoordPid} ->
+ {false, optional};
+ {mnesia_down, _Node} = Else ->
+ mnesia_lib:verbose("sync_op terminated due to ~p~n", [Else]),
+ mnesia:abort(Else);
+ {'EXIT', _, _} = Else ->
+ mnesia_lib:verbose("sync_op terminated due to ~p~n", [Else]),
+ mnesia:abort(Else)
+ end;
+
+prepare_op(_Tid, {op, sync_trans}, {coord, Nodes}) ->
+ case receive_sync(Nodes, []) of
+ {abort, Reason} ->
+ mnesia_lib:verbose("sync_op terminated due to ~p~n", [Reason]),
+ mnesia:abort(Reason);
+ Pids ->
+ [Pid ! {sync_trans, self()} || Pid <- Pids],
+ {false, optional}
+ end;
+prepare_op(Tid, {op, create_table, TabDef}, _WaitFor) ->
+ Cs = list2cs(TabDef),
+ Storage = mnesia_lib:cs_to_storage_type(node(), Cs),
+ UseDir = mnesia_monitor:use_dir(),
+ Tab = Cs#cstruct.name,
+ case Storage of
+ disc_copies when UseDir == false ->
+ UseDirReason = {bad_type, Tab, Storage, node()},
+ mnesia:abort(UseDirReason);
+ disc_only_copies when UseDir == false ->
+ UseDirReason = {bad_type, Tab, Storage, node()},
+ mnesia:abort(UseDirReason);
+ ram_copies ->
+ mnesia_lib:set({Tab, create_table},true),
+ create_ram_table(Tab, Cs#cstruct.type),
+ insert_cstruct(Tid, Cs, false),
+ {true, optional};
+ disc_copies ->
+ mnesia_lib:set({Tab, create_table},true),
+ create_ram_table(Tab, Cs#cstruct.type),
+ create_disc_table(Tab),
+ insert_cstruct(Tid, Cs, false),
+ {true, optional};
+ disc_only_copies ->
+ mnesia_lib:set({Tab, create_table},true),
+ create_disc_only_table(Tab,Cs#cstruct.type),
+ insert_cstruct(Tid, Cs, false),
+ {true, optional};
+ unknown -> %% No replica on this node
+ mnesia_lib:set({Tab, create_table},true),
+ insert_cstruct(Tid, Cs, false),
+ {true, optional}
+ end;
+
+prepare_op(Tid, {op, add_table_copy, Storage, Node, TabDef}, _WaitFor) ->
+ Cs = list2cs(TabDef),
+ Tab = Cs#cstruct.name,
+
+ if
+ Tab == schema ->
+ {true, optional};
+
+ Node == node() ->
+ case mnesia_lib:val({schema, storage_type}) of
+ ram_copies when Storage /= ram_copies ->
+ Error = {combine_error, Tab, "has no disc", Node},
+ mnesia:abort(Error);
+ _ ->
+ ok
+ end,
+ %% Tables are created by mnesia_loader get_network code
+ insert_cstruct(Tid, Cs, true),
+ case mnesia_controller:get_network_copy(Tab, Cs) of
+ {loaded, ok} ->
+ {true, optional};
+ {not_loaded, ErrReason} ->
+ Reason = {system_limit, Tab, {Node, ErrReason}},
+ mnesia:abort(Reason)
+ end;
+ Node /= node() ->
+ %% Verify that ram table not has been dumped to disc
+ if
+ Storage /= ram_copies ->
+ case mnesia_lib:schema_cs_to_storage_type(node(), Cs) of
+ ram_copies ->
+ Dat = mnesia_lib:tab2dcd(Tab),
+ case mnesia_lib:exists(Dat) of
+ true ->
+ mnesia:abort({combine_error, Tab, Storage,
+ "Table dumped to disc", node()});
+ false ->
+ ok
+ end;
+ _ ->
+ ok
+ end;
+ true ->
+ ok
+ end,
+ insert_cstruct(Tid, Cs, true),
+ {true, optional}
+ end;
+
+prepare_op(Tid, {op, del_table_copy, _Storage, Node, TabDef}, _WaitFor) ->
+ Cs = list2cs(TabDef),
+ Tab = Cs#cstruct.name,
+
+ if
+ %% Schema table lock is always required to run a schema op.
+ %% No need to look it.
+ node(Tid#tid.pid) == node(), Tab /= schema ->
+ Self = self(),
+ Pid = spawn_link(fun() -> lock_del_table(Tab, Node, Cs, Self) end),
+ put(mnesia_lock, Pid),
+ receive
+ {Pid, updated} ->
+ {true, optional};
+ {Pid, FailReason} ->
+ mnesia:abort(FailReason);
+ {'EXIT', Pid, Reason} ->
+ mnesia:abort(Reason)
+ end;
+ true ->
+ {true, optional}
+ end;
+
+prepare_op(_Tid, {op, change_table_copy_type, N, FromS, ToS, TabDef}, _WaitFor)
+ when N == node() ->
+ Cs = list2cs(TabDef),
+ Tab = Cs#cstruct.name,
+
+ NotActive = mnesia_lib:not_active_here(Tab),
+
+ if
+ NotActive == true ->
+ mnesia:abort({not_active, Tab, node()});
+
+ Tab == schema ->
+ case {FromS, ToS} of
+ {ram_copies, disc_copies} ->
+ case mnesia:system_info(schema_location) of
+ opt_disc ->
+ ignore;
+ _ ->
+ mnesia:abort({combine_error, Tab, node(),
+ "schema_location must be opt_disc"})
+ end,
+ Dir = mnesia_lib:dir(),
+ case opt_create_dir(true, Dir) of
+ ok ->
+ purge_dir(Dir, []),
+ mnesia_log:purge_all_logs(),
+ set(use_dir, true),
+ mnesia_log:init(),
+ Ns = val({current, db_nodes}), %mnesia_lib:running_nodes(),
+ F = fun(U) -> mnesia_recover:log_mnesia_up(U) end,
+ lists:foreach(F, Ns),
+
+ mnesia_dumper:raw_named_dump_table(Tab, dmp),
+ mnesia_checkpoint:tm_change_table_copy_type(Tab, FromS, ToS);
+ {error, Reason} ->
+ mnesia:abort(Reason)
+ end;
+ {disc_copies, ram_copies} ->
+ Ltabs = val({schema, local_tables}) -- [schema],
+ Dtabs = [L || L <- Ltabs,
+ val({L, storage_type}) /= ram_copies],
+ verify([], Dtabs, {"Disc resident tables", Dtabs, N});
+ _ ->
+ mnesia:abort({combine_error, Tab, ToS})
+ end;
+
+ FromS == ram_copies ->
+ case mnesia_monitor:use_dir() of
+ true ->
+ Dat = mnesia_lib:tab2dcd(Tab),
+ case mnesia_lib:exists(Dat) of
+ true ->
+ mnesia:abort({combine_error, Tab, node(),
+ "Table dump exists"});
+ false ->
+ case ToS of
+ disc_copies ->
+ mnesia_log:ets2dcd(Tab, dmp);
+ disc_only_copies ->
+ mnesia_dumper:raw_named_dump_table(Tab, dmp)
+ end,
+ mnesia_checkpoint:tm_change_table_copy_type(Tab, FromS, ToS)
+ end;
+ false ->
+ mnesia:abort({has_no_disc, node()})
+ end;
+
+ FromS == disc_copies, ToS == disc_only_copies ->
+ mnesia_dumper:raw_named_dump_table(Tab, dmp);
+ FromS == disc_only_copies ->
+ Type = Cs#cstruct.type,
+ create_ram_table(Tab, Type),
+ Datname = mnesia_lib:tab2dat(Tab),
+ Repair = mnesia_monitor:get_env(auto_repair),
+ case mnesia_lib:dets_to_ets(Tab, Tab, Datname, Type, Repair, no) of
+ loaded -> ok;
+ Reason ->
+ Err = "Failed to copy disc data to ram",
+ mnesia:abort({system_limit, Tab, {Err,Reason}})
+ end;
+ true ->
+ ignore
+ end,
+ {true, mandatory};
+
+prepare_op(_Tid, {op, change_table_copy_type, N, _FromS, _ToS, _TabDef}, _WaitFor)
+ when N /= node() ->
+ {true, mandatory};
+
+prepare_op(_Tid, {op, delete_table, _TabDef}, _WaitFor) ->
+ {true, mandatory};
+
+prepare_op(_Tid, {op, dump_table, unknown, TabDef}, _WaitFor) ->
+ Cs = list2cs(TabDef),
+ Tab = Cs#cstruct.name,
+ case lists:member(node(), Cs#cstruct.ram_copies) of
+ true ->
+ case mnesia_monitor:use_dir() of
+ true ->
+ mnesia_log:ets2dcd(Tab, dmp),
+ Size = mnesia:table_info(Tab, size),
+ {true, [{op, dump_table, Size, TabDef}], optional};
+ false ->
+ mnesia:abort({has_no_disc, node()})
+ end;
+ false ->
+ {false, optional}
+ end;
+
+prepare_op(_Tid, {op, add_snmp, Ustruct, TabDef}, _WaitFor) ->
+ Cs = list2cs(TabDef),
+ case mnesia_lib:cs_to_storage_type(node(), Cs) of
+ unknown ->
+ {true, optional};
+ Storage ->
+ Tab = Cs#cstruct.name,
+ Stab = mnesia_snmp_hook:create_table(Ustruct, Tab, Storage),
+ mnesia_lib:set({Tab, {index, snmp}}, Stab),
+ {true, optional}
+ end;
+
+prepare_op(_Tid, {op, transform, ignore, _TabDef}, _WaitFor) ->
+ {true, mandatory}; %% Apply schema changes only.
+prepare_op(_Tid, {op, transform, Fun, TabDef}, _WaitFor) ->
+ Cs = list2cs(TabDef),
+ case mnesia_lib:cs_to_storage_type(node(), Cs) of
+ unknown ->
+ {true, mandatory};
+ Storage ->
+ Tab = Cs#cstruct.name,
+ RecName = Cs#cstruct.record_name,
+ Type = Cs#cstruct.type,
+ NewArity = length(Cs#cstruct.attributes) + 1,
+ mnesia_lib:db_fixtable(Storage, Tab, true),
+ Key = mnesia_lib:db_first(Tab),
+ Op = {op, transform, Fun, TabDef},
+ case catch transform_objs(Fun, Tab, RecName,
+ Key, NewArity, Storage, Type, [Op]) of
+ {'EXIT', Reason} ->
+ mnesia_lib:db_fixtable(Storage, Tab, false),
+ exit({"Bad transform function", Tab, Fun, node(), Reason});
+ Objs ->
+ mnesia_lib:db_fixtable(Storage, Tab, false),
+ {true, Objs, mandatory}
+ end
+ end;
+
+prepare_op(_Tid, {op, merge_schema, TabDef}, _WaitFor) ->
+ Cs = list2cs(TabDef),
+ case verify_merge(Cs) of
+ ok ->
+ {true, optional};
+ Error ->
+ verbose("Merge_Schema ~p failed on ~p: ~p~n", [_Tid,node(),Error]),
+ mnesia:abort({bad_commit, Error})
+ end;
+prepare_op(_Tid, _Op, _WaitFor) ->
+ {true, optional}.
+
+create_ram_table(Tab, Type) ->
+ Args = [{keypos, 2}, public, named_table, Type],
+ case mnesia_monitor:unsafe_mktab(Tab, Args) of
+ Tab ->
+ ok;
+ {error,Reason} ->
+ Err = "Failed to create ets table",
+ mnesia:abort({system_limit, Tab, {Err,Reason}})
+ end.
+create_disc_table(Tab) ->
+ File = mnesia_lib:tab2dcd(Tab),
+ file:delete(File),
+ FArg = [{file, File}, {name, {mnesia,create}},
+ {repair, false}, {mode, read_write}],
+ case mnesia_monitor:open_log(FArg) of
+ {ok,Log} ->
+ mnesia_monitor:unsafe_close_log(Log),
+ ok;
+ {error,Reason} ->
+ Err = "Failed to create disc table",
+ mnesia:abort({system_limit, Tab, {Err,Reason}})
+ end.
+create_disc_only_table(Tab,Type) ->
+ File = mnesia_lib:tab2dat(Tab),
+ file:delete(File),
+ Args = [{file, mnesia_lib:tab2dat(Tab)},
+ {type, mnesia_lib:disk_type(Tab, Type)},
+ {keypos, 2},
+ {repair, mnesia_monitor:get_env(auto_repair)}],
+ case mnesia_monitor:unsafe_open_dets(Tab, Args) of
+ {ok, _} ->
+ ok;
+ {error,Reason} ->
+ Err = "Failed to create disc table",
+ mnesia:abort({system_limit, Tab, {Err,Reason}})
+ end.
+
+
+receive_sync([], Pids) ->
+ Pids;
+receive_sync(Nodes, Pids) ->
+ receive
+ {sync_trans, Pid} ->
+ Node = node(Pid),
+ receive_sync(lists:delete(Node, Nodes), [Pid | Pids]);
+ Else ->
+ {abort, Else}
+ end.
+
+lock_del_table(Tab, Node, Cs, Father) ->
+ Ns = val({schema, active_replicas}),
+ process_flag(trap_exit,true),
+ Lock = fun() ->
+ mnesia:write_lock_table(Tab),
+ {Res, []} = rpc:multicall(Ns, ?MODULE, set_where_to_read, [Tab, Node, Cs]),
+ Filter = fun(ok) ->
+ false;
+ ({badrpc, {'EXIT', {undef, _}}}) ->
+ %% This will be the case we talks with elder nodes
+ %% than 3.8.2, they will set where_to_read without
+ %% getting a lock.
+ false;
+ (_) ->
+ true
+ end,
+ case lists:filter(Filter, Res) of
+ [] ->
+ Father ! {self(), updated},
+ %% When transaction is commited the process dies
+ %% and the lock is released.
+ receive _ -> ok end;
+ Err ->
+ Father ! {self(), {bad_commit, Err}}
+ end,
+ ok
+ end,
+ case mnesia:transaction(Lock) of
+ {atomic, ok} -> ok;
+ {aborted, R} -> Father ! {self(), R}
+ end,
+ unlink(Father),
+ unlink(whereis(mnesia_tm)),
+ exit(normal).
+
+set_where_to_read(Tab, Node, Cs) ->
+ case mnesia_lib:val({Tab, where_to_read}) of
+ Node ->
+ case Cs#cstruct.local_content of
+ true ->
+ ok;
+ false ->
+ mnesia_lib:set_remote_where_to_read(Tab, [Node]),
+ ok
+ end;
+ _ ->
+ ok
+ end.
+
+%% Build up the list in reverse order.
+transform_objs(_Fun, _Tab, _RT, '$end_of_table', _NewArity, _Storage, _Type, Acc) ->
+ Acc;
+transform_objs(Fun, Tab, RecName, Key, A, Storage, Type, Acc) ->
+ Objs = mnesia_lib:db_get(Tab, Key),
+ NextKey = mnesia_lib:db_next_key(Tab, Key),
+ Oid = {Tab, Key},
+ NewObjs = {Ws, Ds} = transform_obj(Tab, RecName, Key, Fun, Objs, A, Type, [], []),
+ if
+ NewObjs == {[], []} ->
+ transform_objs(Fun, Tab, RecName, NextKey, A, Storage, Type, Acc);
+ Type == bag ->
+ transform_objs(Fun, Tab, RecName, NextKey, A, Storage, Type,
+ [{op, rec, Storage, {Oid, Ws, write}},
+ {op, rec, Storage, {Oid, [Oid], delete}} | Acc]);
+ Ds == [] ->
+ %% Type is set or ordered_set, no need to delete the record first
+ transform_objs(Fun, Tab, RecName, NextKey, A, Storage, Type,
+ [{op, rec, Storage, {Oid, Ws, write}} | Acc]);
+ Ws == [] ->
+ transform_objs(Fun, Tab, RecName, NextKey, A, Storage, Type,
+ [{op, rec, Storage, {Oid, Ds, write}} | Acc]);
+ true ->
+ transform_objs(Fun, Tab, RecName, NextKey, A, Storage, Type,
+ [{op, rec, Storage, {Oid, Ws, write}},
+ {op, rec, Storage, {Oid, Ds, delete}} | Acc])
+ end.
+
+transform_obj(Tab, RecName, Key, Fun, [Obj|Rest], NewArity, Type, Ws, Ds) ->
+ NewObj = Fun(Obj),
+ if
+ size(NewObj) /= NewArity ->
+ exit({"Bad arity", Obj, NewObj});
+ NewObj == Obj ->
+ transform_obj(Tab, RecName, Key, Fun, Rest, NewArity, Type, Ws, Ds);
+ RecName == element(1, NewObj), Key == element(2, NewObj) ->
+ transform_obj(Tab, RecName, Key, Fun, Rest, NewArity,
+ Type, [NewObj | Ws], Ds);
+ NewObj == delete ->
+ case Type of
+ bag -> %% Just don't write that object
+ transform_obj(Tab, RecName, Key, Fun, Rest,
+ NewArity, Type, Ws, Ds);
+ _ ->
+ transform_obj(Tab, RecName, Key, Fun, Rest, NewArity,
+ Type, Ws, [NewObj | Ds])
+ end;
+ true ->
+ exit({"Bad key or Record Name", Obj, NewObj})
+ end;
+transform_obj(_Tab, _RecName, _Key, _Fun, [], _NewArity, _Type, Ws, Ds) ->
+ {lists:reverse(Ws), lists:reverse(Ds)}.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Undo prepare of commit
+undo_prepare_commit(Tid, Commit) ->
+ case Commit#commit.schema_ops of
+ [] ->
+ ignore;
+ Ops ->
+ %% Catch to allow failure mnesia_controller may not be started
+ catch mnesia_controller:release_schema_commit_lock(),
+ undo_prepare_ops(Tid, Ops)
+ end,
+ Commit.
+
+%% Undo in reverse order
+undo_prepare_ops(Tid, [Op | Ops]) ->
+ case element(1, Op) of
+ TheOp when TheOp /= op, TheOp /= restore_op ->
+ undo_prepare_ops(Tid, Ops);
+ _ ->
+ undo_prepare_ops(Tid, Ops),
+ undo_prepare_op(Tid, Op)
+ end;
+undo_prepare_ops(_Tid, []) ->
+ [].
+
+undo_prepare_op(_Tid, {op, announce_im_running, _Node, _, _Running, _RemoteRunning}) ->
+ case ?catch_val(prepare_op) of
+ {announce_im_running, New} ->
+ unannounce_im_running(New);
+ _Else ->
+ ok
+ end;
+
+undo_prepare_op(_Tid, {op, sync_trans}) ->
+ ok;
+
+undo_prepare_op(Tid, {op, create_table, TabDef}) ->
+ Cs = list2cs(TabDef),
+ Tab = Cs#cstruct.name,
+ mnesia_lib:unset({Tab, create_table}),
+ delete_cstruct(Tid, Cs),
+ case mnesia_lib:cs_to_storage_type(node(), Cs) of
+ unknown ->
+ ok;
+ ram_copies ->
+ ram_delete_table(Tab, ram_copies);
+ disc_copies ->
+ ram_delete_table(Tab, disc_copies),
+ DcdFile = mnesia_lib:tab2dcd(Tab),
+ %% disc_delete_table(Tab, Storage),
+ file:delete(DcdFile);
+ disc_only_copies ->
+ mnesia_monitor:unsafe_close_dets(Tab),
+ Dat = mnesia_lib:tab2dat(Tab),
+ %% disc_delete_table(Tab, Storage),
+ file:delete(Dat)
+ end;
+
+undo_prepare_op(Tid, {op, add_table_copy, Storage, Node, TabDef}) ->
+ Cs = list2cs(TabDef),
+ Tab = Cs#cstruct.name,
+ if
+ Tab == schema ->
+ true; % Nothing to prepare
+ Node == node() ->
+ mnesia_checkpoint:tm_del_copy(Tab, Node),
+ mnesia_controller:unannounce_add_table_copy(Tab, Node),
+ if
+ Storage == disc_only_copies; Tab == schema ->
+ mnesia_monitor:close_dets(Tab),
+ file:delete(mnesia_lib:tab2dat(Tab));
+ true ->
+ file:delete(mnesia_lib:tab2dcd(Tab))
+ end,
+ ram_delete_table(Tab, Storage),
+ Cs2 = new_cs(Cs, Node, Storage, del),
+ insert_cstruct(Tid, Cs2, true); % Don't care about the version
+ Node /= node() ->
+ mnesia_controller:unannounce_add_table_copy(Tab, Node),
+ Cs2 = new_cs(Cs, Node, Storage, del),
+ insert_cstruct(Tid, Cs2, true) % Don't care about the version
+ end;
+
+undo_prepare_op(_Tid, {op, del_table_copy, _, Node, TabDef})
+ when Node == node() ->
+ Cs = list2cs(TabDef),
+ Tab = Cs#cstruct.name,
+ mnesia_lib:set({Tab, where_to_read}, Node);
+
+
+undo_prepare_op(_Tid, {op, change_table_copy_type, N, FromS, ToS, TabDef})
+ when N == node() ->
+ Cs = list2cs(TabDef),
+ Tab = Cs#cstruct.name,
+ mnesia_checkpoint:tm_change_table_copy_type(Tab, ToS, FromS),
+ Dmp = mnesia_lib:tab2dmp(Tab),
+
+ case {FromS, ToS} of
+ {ram_copies, disc_copies} when Tab == schema ->
+ file:delete(Dmp),
+ mnesia_log:purge_some_logs(),
+ set(use_dir, false);
+ {ram_copies, disc_copies} ->
+ file:delete(Dmp);
+ {ram_copies, disc_only_copies} ->
+ file:delete(Dmp);
+ {disc_only_copies, _} ->
+ ram_delete_table(Tab, ram_copies);
+ _ ->
+ ignore
+ end;
+
+undo_prepare_op(_Tid, {op, dump_table, _Size, TabDef}) ->
+ Cs = list2cs(TabDef),
+ case lists:member(node(), Cs#cstruct.ram_copies) of
+ true ->
+ Tab = Cs#cstruct.name,
+ Dmp = mnesia_lib:tab2dmp(Tab),
+ file:delete(Dmp);
+ false ->
+ ignore
+ end;
+
+undo_prepare_op(_Tid, {op, add_snmp, _Ustruct, TabDef}) ->
+ Cs = list2cs(TabDef),
+ case mnesia_lib:cs_to_storage_type(node(), Cs) of
+ unknown ->
+ true;
+ _Storage ->
+ Tab = Cs#cstruct.name,
+ case ?catch_val({Tab, {index, snmp}}) of
+ {'EXIT',_} ->
+ ignore;
+ Stab ->
+ mnesia_snmp_hook:delete_table(Tab, Stab),
+ mnesia_lib:unset({Tab, {index, snmp}})
+ end
+ end;
+
+undo_prepare_op(_Tid, _Op) ->
+ ignore.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+ram_delete_table(Tab, Storage) ->
+ case Storage of
+ unknown ->
+ ignore;
+ disc_only_copies ->
+ ignore;
+ _Else ->
+ %% delete possible index files and data .....
+ %% Got to catch this since if no info has been set in the
+ %% mnesia_gvar it will crash
+ catch mnesia_index:del_transient(Tab, Storage),
+ case ?catch_val({Tab, {index, snmp}}) of
+ {'EXIT', _} ->
+ ignore;
+ Etab ->
+ catch mnesia_snmp_hook:delete_table(Tab, Etab)
+ end,
+ catch ?ets_delete_table(Tab)
+ end.
+
+purge_dir(Dir, KeepFiles) ->
+ Suffixes = known_suffixes(),
+ purge_dir(Dir, KeepFiles, Suffixes).
+
+purge_dir(Dir, KeepFiles, Suffixes) ->
+ case dir_exists(Dir) of
+ true ->
+ {ok, AllFiles} = file:list_dir(Dir),
+ purge_known_files(AllFiles, KeepFiles, Dir, Suffixes);
+ false ->
+ ok
+ end.
+
+purge_tmp_files() ->
+ case mnesia_monitor:use_dir() of
+ true ->
+ Dir = mnesia_lib:dir(),
+ KeepFiles = [],
+ Exists = mnesia_lib:exists(mnesia_lib:tab2dat(schema)),
+ case Exists of
+ true ->
+ Suffixes = tmp_suffixes(),
+ purge_dir(Dir, KeepFiles, Suffixes);
+ false ->
+ %% Interrupted change of storage type
+ %% for schema table
+ Suffixes = known_suffixes(),
+ purge_dir(Dir, KeepFiles, Suffixes),
+ mnesia_lib:set(use_dir, false)
+ end;
+
+ false ->
+ ok
+ end.
+
+purge_known_files([File | Tail], KeepFiles, Dir, Suffixes) ->
+ case lists:member(File, KeepFiles) of
+ true ->
+ ignore;
+ false ->
+ case has_known_suffix(File, Suffixes, false) of
+ false ->
+ ignore;
+ true ->
+ AbsFile = filename:join([Dir, File]),
+ file:delete(AbsFile)
+ end
+ end,
+ purge_known_files(Tail, KeepFiles, Dir, Suffixes);
+purge_known_files([], _KeepFiles, _Dir, _Suffixes) ->
+ ok.
+
+has_known_suffix(_File, _Suffixes, true) ->
+ true;
+has_known_suffix(File, [Suffix | Tail], false) ->
+ has_known_suffix(File, Tail, lists:suffix(Suffix, File));
+has_known_suffix(_File, [], Bool) ->
+ Bool.
+
+known_suffixes() -> real_suffixes() ++ tmp_suffixes().
+
+real_suffixes() -> [".DAT", ".LOG", ".BUP", ".DCL", ".DCD"].
+
+tmp_suffixes() -> [".TMP", ".BUPTMP", ".RET", ".DMP"].
+
+info() ->
+ Tabs = lists:sort(val({schema, tables})),
+ lists:foreach(fun(T) -> info(T) end, Tabs),
+ ok.
+
+info(Tab) ->
+ Props = get_table_properties(Tab),
+ io:format("-- Properties for ~w table --- ~n",[Tab]),
+ info2(Tab, Props).
+info2(Tab, [{cstruct, _V} | Tail]) -> % Ignore cstruct
+ info2(Tab, Tail);
+info2(Tab, [{frag_hash, _V} | Tail]) -> % Ignore frag_hash
+ info2(Tab, Tail);
+info2(Tab, [{P, V} | Tail]) ->
+ io:format("~-20w -> ~p~n",[P,V]),
+ info2(Tab, Tail);
+info2(_, []) ->
+ io:format("~n", []).
+
+get_table_properties(Tab) ->
+ case catch mnesia_lib:db_match_object(ram_copies,
+ mnesia_gvar, {{Tab, '_'}, '_'}) of
+ {'EXIT', _} ->
+ mnesia:abort({no_exists, Tab, all});
+ RawGvar ->
+ case [{Item, Val} || {{_Tab, Item}, Val} <- RawGvar] of
+ [] ->
+ [];
+ Gvar ->
+ Size = {size, mnesia:table_info(Tab, size)},
+ Memory = {memory, mnesia:table_info(Tab, memory)},
+ Master = {master_nodes, mnesia:table_info(Tab, master_nodes)},
+ lists:sort([Size, Memory, Master | Gvar])
+ end
+ end.
+
+%%%%%%%%%%% RESTORE %%%%%%%%%%%
+
+-record(r, {iter = schema,
+ module,
+ table_options = [],
+ default_op = clear_tables,
+ tables = [],
+ opaque,
+ insert_op = error_fun,
+ recs = error_recs
+ }).
+
+restore(Opaque) ->
+ restore(Opaque, [], mnesia_monitor:get_env(backup_module)).
+restore(Opaque, Args) when is_list(Args) ->
+ restore(Opaque, Args, mnesia_monitor:get_env(backup_module));
+restore(_Opaque, BadArg) ->
+ {aborted, {badarg, BadArg}}.
+restore(Opaque, Args, Module) when is_list(Args), is_atom(Module) ->
+ InitR = #r{opaque = Opaque, module = Module},
+ case catch lists:foldl(fun check_restore_arg/2, InitR, Args) of
+ R when is_record(R, r) ->
+ case mnesia_bup:read_schema(R#r.module, Opaque) of
+ {error, Reason} ->
+ {aborted, Reason};
+ BupSchema ->
+ schema_transaction(fun() -> do_restore(R, BupSchema) end)
+ end;
+ {'EXIT', Reason} ->
+ {aborted, Reason}
+ end;
+restore(_Opaque, Args, Module) ->
+ {aborted, {badarg, Args, Module}}.
+
+check_restore_arg({module, Mod}, R) when is_atom(Mod) ->
+ R#r{module = Mod};
+
+check_restore_arg({clear_tables, List}, R) when is_list(List) ->
+ case lists:member(schema, List) of
+ false ->
+ TableList = [{Tab, clear_tables} || Tab <- List],
+ R#r{table_options = R#r.table_options ++ TableList};
+ true ->
+ exit({badarg, {clear_tables, schema}})
+ end;
+check_restore_arg({recreate_tables, List}, R) when is_list(List) ->
+ case lists:member(schema, List) of
+ false ->
+ TableList = [{Tab, recreate_tables} || Tab <- List],
+ R#r{table_options = R#r.table_options ++ TableList};
+ true ->
+ exit({badarg, {recreate_tables, schema}})
+ end;
+check_restore_arg({keep_tables, List}, R) when is_list(List) ->
+ TableList = [{Tab, keep_tables} || Tab <- List],
+ R#r{table_options = R#r.table_options ++ TableList};
+check_restore_arg({skip_tables, List}, R) when is_list(List) ->
+ TableList = [{Tab, skip_tables} || Tab <- List],
+ R#r{table_options = R#r.table_options ++ TableList};
+check_restore_arg({default_op, Op}, R) ->
+ case Op of
+ clear_tables -> ok;
+ recreate_tables -> ok;
+ keep_tables -> ok;
+ skip_tables -> ok;
+ Else ->
+ exit({badarg, {bad_default_op, Else}})
+ end,
+ R#r{default_op = Op};
+
+check_restore_arg(BadArg,_) ->
+ exit({badarg, BadArg}).
+
+do_restore(R, BupSchema) ->
+ TidTs = get_tid_ts_and_lock(schema, write),
+ R2 = restore_schema(BupSchema, R),
+ insert_schema_ops(TidTs, [{restore_op, R2}]),
+ [element(1, TabStruct) || TabStruct <- R2#r.tables].
+
+arrange_restore(R, Fun, Recs) ->
+ R2 = R#r{insert_op = Fun, recs = Recs},
+ case mnesia_bup:iterate(R#r.module, fun restore_items/4, R#r.opaque, R2) of
+ {ok, R3} -> R3#r.recs;
+ {error, Reason} -> mnesia:abort(Reason)
+ end.
+
+restore_items([Rec | Recs], Header, Schema, R) ->
+ Tab = element(1, Rec),
+ case lists:keysearch(Tab, 1, R#r.tables) of
+ {value, {Tab, Where0, Snmp, RecName}} ->
+ Where = case Where0 of
+ undefined ->
+ val({Tab, where_to_commit});
+ _ ->
+ Where0
+ end,
+ {Rest, NRecs} = restore_tab_items([Rec | Recs], Tab,
+ RecName, Where, Snmp,
+ R#r.recs, R#r.insert_op),
+ restore_items(Rest, Header, Schema, R#r{recs = NRecs});
+ false ->
+ Rest = skip_tab_items(Recs, Tab),
+ restore_items(Rest, Header, Schema, R)
+ end;
+
+restore_items([], _Header, _Schema, R) ->
+ R.
+
+restore_func(Tab, R) ->
+ case lists:keysearch(Tab, 1, R#r.table_options) of
+ {value, {Tab, OP}} ->
+ OP;
+ false ->
+ R#r.default_op
+ end.
+
+where_to_commit(Tab, CsList) ->
+ Ram = [{N, ram_copies} || N <- pick(Tab, ram_copies, CsList, [])],
+ Disc = [{N, disc_copies} || N <- pick(Tab, disc_copies, CsList, [])],
+ DiscO = [{N, disc_only_copies} || N <- pick(Tab, disc_only_copies, CsList, [])],
+ Ram ++ Disc ++ DiscO.
+
+%% Changes of the Meta info of schema itself is not allowed
+restore_schema([{schema, schema, _List} | Schema], R) ->
+ restore_schema(Schema, R);
+restore_schema([{schema, Tab, List} | Schema], R) ->
+ case restore_func(Tab, R) of
+ clear_tables ->
+ do_clear_table(Tab),
+ Snmp = val({Tab, snmp}),
+ RecName = val({Tab, record_name}),
+ R2 = R#r{tables = [{Tab, undefined, Snmp, RecName} | R#r.tables]},
+ restore_schema(Schema, R2);
+ recreate_tables ->
+ case ?catch_val({Tab, cstruct}) of
+ {'EXIT', _} ->
+ TidTs = {_Mod, Tid, Ts} = get(mnesia_activity_state),
+ RunningNodes = val({current, db_nodes}),
+ Nodes = mnesia_lib:intersect(mnesia_lib:cs_to_nodes(list2cs(List)),
+ RunningNodes),
+ mnesia_locker:wlock_no_exist(Tid, Ts#tidstore.store, Tab, Nodes),
+ TidTs;
+ _ ->
+ TidTs = get_tid_ts_and_lock(Tab, write)
+ end,
+ NC = {cookie, ?unique_cookie},
+ List2 = lists:keyreplace(cookie, 1, List, NC),
+ Where = where_to_commit(Tab, List2),
+ Snmp = pick(Tab, snmp, List2, []),
+ RecName = pick(Tab, record_name, List2, Tab),
+ insert_schema_ops(TidTs, [{op, restore_recreate, List2}]),
+ R2 = R#r{tables = [{Tab, Where, Snmp, RecName} | R#r.tables]},
+ restore_schema(Schema, R2);
+ keep_tables ->
+ get_tid_ts_and_lock(Tab, write),
+ Snmp = val({Tab, snmp}),
+ RecName = val({Tab, record_name}),
+ R2 = R#r{tables = [{Tab, undefined, Snmp, RecName} | R#r.tables]},
+ restore_schema(Schema, R2);
+ skip_tables ->
+ restore_schema(Schema, R)
+ end;
+
+restore_schema([{schema, Tab} | Schema], R) ->
+ do_delete_table(Tab),
+ Tabs = lists:delete(Tab,R#r.tables),
+ restore_schema(Schema, R#r{tables = Tabs});
+restore_schema([], R) ->
+ R.
+
+restore_tab_items([Rec | Rest], Tab, RecName, Where, Snmp, Recs, Op)
+ when element(1, Rec) == Tab ->
+ NewRecs = Op(Rec, Recs, RecName, Where, Snmp),
+ restore_tab_items(Rest, Tab, RecName, Where, Snmp, NewRecs, Op);
+
+restore_tab_items(Rest, _Tab, _RecName, _Where, _Snmp, Recs, _Op) ->
+ {Rest, Recs}.
+
+skip_tab_items([Rec| Rest], Tab)
+ when element(1, Rec) == Tab ->
+ skip_tab_items(Rest, Tab);
+skip_tab_items(Recs, _) ->
+ Recs.
+
+%%%%%%%%% Dump tables %%%%%%%%%%%%%
+dump_tables(Tabs) when is_list(Tabs) ->
+ schema_transaction(fun() -> do_dump_tables(Tabs) end);
+dump_tables(Tabs) ->
+ {aborted, {bad_type, Tabs}}.
+
+do_dump_tables(Tabs) ->
+ TidTs = get_tid_ts_and_lock(schema, write),
+ insert_schema_ops(TidTs, make_dump_tables(Tabs)).
+
+make_dump_tables([schema | _Tabs]) ->
+ mnesia:abort({bad_type, schema});
+make_dump_tables([Tab | Tabs]) ->
+ get_tid_ts_and_lock(Tab, read),
+ TabDef = get_create_list(Tab),
+ DiscResident = val({Tab, disc_copies}) ++ val({Tab, disc_only_copies}),
+ verify([], DiscResident,
+ {"Only allowed on ram_copies", Tab, DiscResident}),
+ [{op, dump_table, unknown, TabDef} | make_dump_tables(Tabs)];
+make_dump_tables([]) ->
+ [].
+
+%% Merge the local schema with the schema on other nodes
+merge_schema() ->
+ schema_transaction(fun() -> do_merge_schema() end).
+
+do_merge_schema() ->
+ {_Mod, Tid, Ts} = get_tid_ts_and_lock(schema, write),
+ Connected = val(recover_nodes),
+ Running = val({current, db_nodes}),
+ Store = Ts#tidstore.store,
+ %% Verify that all nodes are locked that might not be the
+ %% case, if this trans where queued when new nodes where added.
+ case Running -- ets:lookup_element(Store, nodes, 2) of
+ [] -> ok; %% All known nodes are locked
+ Miss -> %% Abort! We don't want the sideeffects below to be executed
+ mnesia:abort({bad_commit, {missing_lock, Miss}})
+ end,
+ case Connected -- Running of
+ [Node | _] ->
+ %% Time for a schema merging party!
+ mnesia_locker:wlock_no_exist(Tid, Store, schema, [Node]),
+ case rpc:call(Node, mnesia_controller, get_cstructs, []) of
+ {cstructs, Cstructs, RemoteRunning1} ->
+ LockedAlready = Running ++ [Node],
+ {New, Old} = mnesia_recover:connect_nodes(RemoteRunning1),
+ RemoteRunning = mnesia_lib:intersect(New ++ Old, RemoteRunning1),
+ if
+ RemoteRunning /= RemoteRunning1 ->
+ mnesia_lib:error("Mnesia on ~p could not connect to node(s) ~p~n",
+ [node(), RemoteRunning1 -- RemoteRunning]);
+ true -> ok
+ end,
+ NeedsLock = RemoteRunning -- LockedAlready,
+ mnesia_locker:wlock_no_exist(Tid, Store, schema, NeedsLock),
+ {value, SchemaCs} =
+ lists:keysearch(schema, #cstruct.name, Cstructs),
+
+ %% Announce that Node is running
+ A = [{op, announce_im_running, node(),
+ cs2list(SchemaCs), Running, RemoteRunning}],
+ do_insert_schema_ops(Store, A),
+
+ %% Introduce remote tables to local node
+ do_insert_schema_ops(Store, make_merge_schema(Node, Cstructs)),
+
+ %% Introduce local tables to remote nodes
+ Tabs = val({schema, tables}),
+ Ops = [{op, merge_schema, get_create_list(T)}
+ || T <- Tabs,
+ not lists:keymember(T, #cstruct.name, Cstructs)],
+ do_insert_schema_ops(Store, Ops),
+
+ %% Ensure that the txn will be committed on all nodes
+ NewNodes = RemoteRunning -- Running,
+ mnesia_lib:set(prepare_op, {announce_im_running,NewNodes}),
+ announce_im_running(NewNodes, SchemaCs),
+ {merged, Running, RemoteRunning};
+ {error, Reason} ->
+ {"Cannot get cstructs", Node, Reason};
+ {badrpc, Reason} ->
+ {"Cannot get cstructs", Node, {badrpc, Reason}}
+ end;
+ [] ->
+ %% No more nodes to merge schema with
+ not_merged
+ end.
+
+make_merge_schema(Node, [Cs | Cstructs]) ->
+ Ops = do_make_merge_schema(Node, Cs),
+ Ops ++ make_merge_schema(Node, Cstructs);
+make_merge_schema(_Node, []) ->
+ [].
+
+%% Merge definitions of schema table
+do_make_merge_schema(Node, RemoteCs)
+ when RemoteCs#cstruct.name == schema ->
+ Cs = val({schema, cstruct}),
+ Masters = mnesia_recover:get_master_nodes(schema),
+ HasRemoteMaster = lists:member(Node, Masters),
+ HasLocalMaster = lists:member(node(), Masters),
+ Force = HasLocalMaster or HasRemoteMaster,
+ %% What is the storage types opinions?
+ StCsLocal = mnesia_lib:cs_to_storage_type(node(), Cs),
+ StRcsLocal = mnesia_lib:cs_to_storage_type(node(), RemoteCs),
+ StCsRemote = mnesia_lib:cs_to_storage_type(Node, Cs),
+ StRcsRemote = mnesia_lib:cs_to_storage_type(Node, RemoteCs),
+
+ if
+ Cs#cstruct.cookie == RemoteCs#cstruct.cookie,
+ Cs#cstruct.version == RemoteCs#cstruct.version ->
+ %% Great, we have the same cookie and version
+ %% and do not need to merge cstructs
+ [];
+
+ Cs#cstruct.cookie /= RemoteCs#cstruct.cookie,
+ Cs#cstruct.disc_copies /= [],
+ RemoteCs#cstruct.disc_copies /= [] ->
+ %% Both cstructs involves disc nodes
+ %% and we cannot merge them
+ if
+ HasLocalMaster == true,
+ HasRemoteMaster == false ->
+ %% Choose local cstruct,
+ %% since it's the master
+ [{op, merge_schema, cs2list(Cs)}];
+
+ HasRemoteMaster == true,
+ HasLocalMaster == false ->
+ %% Choose remote cstruct,
+ %% since it's the master
+ [{op, merge_schema, cs2list(RemoteCs)}];
+
+ true ->
+ Str = io_lib:format("Incompatible schema cookies. "
+ "Please, restart from old backup."
+ "~w = ~w, ~w = ~w~n",
+ [Node, cs2list(RemoteCs), node(), cs2list(Cs)]),
+ throw(Str)
+ end;
+
+ StCsLocal /= StRcsLocal, StRcsLocal /= unknown, StCsLocal /= ram_copies ->
+ Str = io_lib:format("Incompatible schema storage types (local). "
+ "on ~w storage ~w, on ~w storage ~w~n",
+ [node(), StCsLocal, Node, StRcsLocal]),
+ throw(Str);
+ StCsRemote /= StRcsRemote, StCsRemote /= unknown, StRcsRemote /= ram_copies ->
+ Str = io_lib:format("Incompatible schema storage types (remote). "
+ "on ~w cs ~w, on ~w rcs ~w~n",
+ [node(), cs2list(Cs), Node, cs2list(RemoteCs)]),
+ throw(Str);
+
+ Cs#cstruct.disc_copies /= [] ->
+ %% Choose local cstruct,
+ %% since it involves disc nodes
+ MergedCs = merge_cstructs(Cs, RemoteCs, Force),
+ [{op, merge_schema, cs2list(MergedCs)}];
+
+ RemoteCs#cstruct.disc_copies /= [] ->
+ %% Choose remote cstruct,
+ %% since it involves disc nodes
+ MergedCs = merge_cstructs(RemoteCs, Cs, Force),
+ [{op, merge_schema, cs2list(MergedCs)}];
+
+ Cs > RemoteCs ->
+ %% Choose remote cstruct
+ MergedCs = merge_cstructs(RemoteCs, Cs, Force),
+ [{op, merge_schema, cs2list(MergedCs)}];
+
+ true ->
+ %% Choose local cstruct
+ MergedCs = merge_cstructs(Cs, RemoteCs, Force),
+ [{op, merge_schema, cs2list(MergedCs)}]
+ end;
+
+%% Merge definitions of normal table
+do_make_merge_schema(Node, RemoteCs) ->
+ Tab = RemoteCs#cstruct.name,
+ Masters = mnesia_recover:get_master_nodes(schema),
+ HasRemoteMaster = lists:member(Node, Masters),
+ HasLocalMaster = lists:member(node(), Masters),
+ Force = HasLocalMaster or HasRemoteMaster,
+ case ?catch_val({Tab, cstruct}) of
+ {'EXIT', _} ->
+ %% A completely new table, created while Node was down
+ [{op, merge_schema, cs2list(RemoteCs)}];
+ Cs when Cs#cstruct.cookie == RemoteCs#cstruct.cookie ->
+ if
+ Cs#cstruct.version == RemoteCs#cstruct.version ->
+ %% We have exactly the same version of the
+ %% table def
+ [];
+
+ Cs#cstruct.version > RemoteCs#cstruct.version ->
+ %% Oops, we have different versions
+ %% of the table def, lets merge them.
+ %% The only changes that may have occurred
+ %% is that new replicas may have been added.
+ MergedCs = merge_cstructs(Cs, RemoteCs, Force),
+ [{op, merge_schema, cs2list(MergedCs)}];
+
+ Cs#cstruct.version < RemoteCs#cstruct.version ->
+ %% Oops, we have different versions
+ %% of the table def, lets merge them
+ MergedCs = merge_cstructs(RemoteCs, Cs, Force),
+ [{op, merge_schema, cs2list(MergedCs)}]
+ end;
+ Cs ->
+ %% Different cookies, not possible to merge
+ if
+ HasLocalMaster == true,
+ HasRemoteMaster == false ->
+ %% Choose local cstruct,
+ %% since it's the master
+ [{op, merge_schema, cs2list(Cs)}];
+
+ HasRemoteMaster == true,
+ HasLocalMaster == false ->
+ %% Choose remote cstruct,
+ %% since it's the master
+ [{op, merge_schema, cs2list(RemoteCs)}];
+
+ true ->
+ Str = io_lib:format("Bad cookie in table definition"
+ " ~w: ~w = ~w, ~w = ~w~n",
+ [Tab, node(), Cs, Node, RemoteCs]),
+ throw(Str)
+ end
+ end.
+
+%% Change of table definitions (cstructs) requires all replicas
+%% of the table to be active. New replicas, db_nodes and tables
+%% may however be added even if some replica is inactive. These
+%% invariants must be enforced in order to allow merge of cstructs.
+%%
+%% Returns a new cstruct or issues a fatal error
+merge_cstructs(Cs, RemoteCs, Force) ->
+ verify_cstruct(Cs),
+ case catch do_merge_cstructs(Cs, RemoteCs, Force) of
+ {'EXIT', {aborted, _Reason}} when Force == true ->
+ Cs;
+ {'EXIT', Reason} ->
+ exit(Reason);
+ MergedCs when is_record(MergedCs, cstruct) ->
+ MergedCs;
+ Other ->
+ throw(Other)
+ end.
+
+do_merge_cstructs(Cs, RemoteCs, Force) ->
+ verify_cstruct(RemoteCs),
+ Ns = mnesia_lib:uniq(mnesia_lib:cs_to_nodes(Cs) ++
+ mnesia_lib:cs_to_nodes(RemoteCs)),
+ {AnythingNew, MergedCs} =
+ merge_storage_type(Ns, false, Cs, RemoteCs, Force),
+ MergedCs2 = merge_versions(AnythingNew, MergedCs, RemoteCs, Force),
+ verify_cstruct(MergedCs2),
+ MergedCs2.
+
+merge_storage_type([N | Ns], AnythingNew, Cs, RemoteCs, Force) ->
+ Local = mnesia_lib:cs_to_storage_type(N, Cs),
+ Remote = mnesia_lib:cs_to_storage_type(N, RemoteCs),
+ case compare_storage_type(true, Local, Remote) of
+ {same, _Storage} ->
+ merge_storage_type(Ns, AnythingNew, Cs, RemoteCs, Force);
+ {diff, Storage} ->
+ Cs2 = change_storage_type(N, Storage, Cs),
+ merge_storage_type(Ns, true, Cs2, RemoteCs, Force);
+ incompatible when Force == true ->
+ merge_storage_type(Ns, AnythingNew, Cs, RemoteCs, Force);
+ Other ->
+ Str = io_lib:format("Cannot merge storage type for node ~w "
+ "in cstruct ~w with remote cstruct ~w (~w)~n",
+ [N, Cs, RemoteCs, Other]),
+ throw(Str)
+ end;
+merge_storage_type([], AnythingNew, MergedCs, _RemoteCs, _Force) ->
+ {AnythingNew, MergedCs}.
+
+compare_storage_type(_Retry, Any, Any) ->
+ {same, Any};
+compare_storage_type(_Retry, unknown, Any) ->
+ {diff, Any};
+compare_storage_type(_Retry, ram_copies, disc_copies) ->
+ {diff, disc_copies};
+compare_storage_type(_Retry, disc_copies, disc_only_copies) ->
+ {diff, disc_only_copies};
+compare_storage_type(true, One, Another) ->
+ compare_storage_type(false, Another, One);
+compare_storage_type(false, _One, _Another) ->
+ incompatible.
+
+change_storage_type(N, ram_copies, Cs) ->
+ Nodes = [N | Cs#cstruct.ram_copies],
+ Cs#cstruct{ram_copies = mnesia_lib:uniq(Nodes)};
+change_storage_type(N, disc_copies, Cs) ->
+ Nodes = [N | Cs#cstruct.disc_copies],
+ Cs#cstruct{disc_copies = mnesia_lib:uniq(Nodes)};
+change_storage_type(N, disc_only_copies, Cs) ->
+ Nodes = [N | Cs#cstruct.disc_only_copies],
+ Cs#cstruct{disc_only_copies = mnesia_lib:uniq(Nodes)}.
+
+%% BUGBUG: Verify match of frag info; equalit demanded for all but add_node
+
+merge_versions(AnythingNew, Cs, RemoteCs, Force) ->
+ if
+ Cs#cstruct.name == schema ->
+ ok;
+ Cs#cstruct.name /= schema,
+ Cs#cstruct.cookie == RemoteCs#cstruct.cookie ->
+ ok;
+ Force == true ->
+ ok;
+ true ->
+ Str = io_lib:format("Bad cookies. Cannot merge definitions of "
+ "table ~w. Local = ~w, Remote = ~w~n",
+ [Cs#cstruct.name, Cs, RemoteCs]),
+ throw(Str)
+ end,
+ if
+ Cs#cstruct.name == RemoteCs#cstruct.name,
+ Cs#cstruct.type == RemoteCs#cstruct.type,
+ Cs#cstruct.local_content == RemoteCs#cstruct.local_content,
+ Cs#cstruct.attributes == RemoteCs#cstruct.attributes,
+ Cs#cstruct.index == RemoteCs#cstruct.index,
+ Cs#cstruct.snmp == RemoteCs#cstruct.snmp,
+ Cs#cstruct.access_mode == RemoteCs#cstruct.access_mode,
+ Cs#cstruct.load_order == RemoteCs#cstruct.load_order,
+ Cs#cstruct.user_properties == RemoteCs#cstruct.user_properties ->
+ do_merge_versions(AnythingNew, Cs, RemoteCs);
+ Force == true ->
+ do_merge_versions(AnythingNew, Cs, RemoteCs);
+ true ->
+ Str1 = io_lib:format("Cannot merge definitions of "
+ "table ~w. Local = ~w, Remote = ~w~n",
+ [Cs#cstruct.name, Cs, RemoteCs]),
+ throw(Str1)
+ end.
+
+do_merge_versions(AnythingNew, MergedCs, RemoteCs) ->
+ {{Major1, Minor1}, _Detail1} = MergedCs#cstruct.version,
+ {{Major2, Minor2}, _Detail2} = RemoteCs#cstruct.version,
+ if
+ AnythingNew == false ->
+ MergedCs;
+ MergedCs#cstruct.version == RemoteCs#cstruct.version ->
+ V = {{Major1, Minor1}, dummy},
+ incr_version(MergedCs#cstruct{version = V});
+ Major1 == Major2 ->
+ Minor = lists:max([Minor1, Minor2]),
+ V = {{Major1, Minor}, dummy},
+ incr_version(MergedCs#cstruct{version = V});
+ Major1 /= Major2 ->
+ Major = lists:max([Major1, Major2]),
+ V = {{Major, 0}, dummy},
+ incr_version(MergedCs#cstruct{version = V})
+ end.
+
+%% Verify the basics
+verify_merge(RemoteCs) ->
+ Tab = RemoteCs#cstruct.name,
+ Masters = mnesia_recover:get_master_nodes(schema),
+ HasRemoteMaster = Masters /= [],
+ case ?catch_val({Tab, cstruct}) of
+ {'EXIT', _} ->
+ ok;
+ Cs ->
+ StCsLocal = mnesia_lib:cs_to_storage_type(node(), Cs),
+ StRcsLocal = mnesia_lib:cs_to_storage_type(node(), RemoteCs),
+ if
+ StCsLocal == StRcsLocal -> ok;
+ StCsLocal == unknown -> ok;
+ (StRcsLocal == unknown), (HasRemoteMaster == false) ->
+ {merge_error, Cs, RemoteCs};
+ %% Trust the merger
+ true -> ok
+ end
+ end.
+
+announce_im_running([N | Ns], SchemaCs) ->
+ {L1, L2} = mnesia_recover:connect_nodes([N]),
+ case lists:member(N, L1) or lists:member(N, L2) of
+ true ->
+ mnesia_lib:add({current, db_nodes}, N),
+ mnesia_controller:add_active_replica(schema, N, SchemaCs);
+ false ->
+ ignore
+ end,
+ announce_im_running(Ns, SchemaCs);
+announce_im_running([], _) ->
+ [].
+
+unannounce_im_running([N | Ns]) ->
+ mnesia_lib:del({current, db_nodes}, N),
+ mnesia_controller:del_active_replica(schema, N),
+ unannounce_im_running(Ns);
+unannounce_im_running([]) ->
+ ok.
+
diff --git a/lib/mnesia/src/mnesia_snmp_hook.erl b/lib/mnesia/src/mnesia_snmp_hook.erl
new file mode 100644
index 0000000000..8b4b5231e1
--- /dev/null
+++ b/lib/mnesia/src/mnesia_snmp_hook.erl
@@ -0,0 +1,259 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 1996-2009. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%
+
+%%
+-module(mnesia_snmp_hook).
+
+%% Hooks (called from mnesia)
+-export([check_ustruct/1, create_table/3, delete_table/2,
+ key_to_oid/2, key_to_oid/3, oid_to_key/2,
+ update/1,
+ get_row/2, get_next_index/2, get_mnesia_key/2]).
+
+-export([key_to_oid_i/2, oid_to_key_1/2]). %% Test
+
+-include("mnesia.hrl").
+
+val(Var) ->
+ case ?catch_val(Var) of
+ {'EXIT', _ReASoN_} -> mnesia_lib:other_val(Var, _ReASoN_);
+ _VaLuE_ -> _VaLuE_
+ end.
+
+check_ustruct([]) ->
+ true; %% default value, not SNMP'ified
+check_ustruct([{key, Types}]) ->
+ is_snmp_type(to_list(Types));
+check_ustruct(_) -> false.
+
+to_list(Tuple) when is_tuple(Tuple) -> tuple_to_list(Tuple);
+to_list(X) -> [X].
+
+is_snmp_type([integer | T]) -> is_snmp_type(T);
+is_snmp_type([string | T]) -> is_snmp_type(T);
+is_snmp_type([fix_string | T]) -> is_snmp_type(T);
+is_snmp_type([]) -> true;
+is_snmp_type(_) -> false.
+
+create_table([], MnesiaTab, _Storage) ->
+ mnesia:abort({badarg, MnesiaTab, {snmp, empty_snmpstruct}});
+
+create_table([{key, Us}], MnesiaTab, Storage) ->
+ Tree = b_new(MnesiaTab, Us),
+ mnesia_lib:db_fixtable(Storage, MnesiaTab, true),
+ First = mnesia_lib:db_first(Storage, MnesiaTab),
+ build_table(First, MnesiaTab, Tree, Us, Storage),
+ mnesia_lib:db_fixtable(Storage, MnesiaTab, false),
+ Tree.
+
+build_table(MnesiaKey, MnesiaTab, Tree, Us, Storage)
+ when MnesiaKey /= '$end_of_table' ->
+ %%update(write, Tree, MnesiaKey, MnesiaKey),
+ SnmpKey = key_to_oid_i(MnesiaKey, Us),
+ b_insert(Tree, SnmpKey, MnesiaKey),
+ Next = mnesia_lib:db_next_key(Storage, MnesiaTab, MnesiaKey),
+ build_table(Next, MnesiaTab, Tree, Us, Storage);
+build_table('$end_of_table', _MnesiaTab, _Tree, _Us, _Storage) ->
+ ok.
+
+delete_table(_MnesiaTab, Tree) ->
+ b_delete_tree(Tree),
+ ok.
+
+%%-----------------------------------------------------------------
+%% update({Op, MnesiaTab, MnesiaKey, SnmpKey})
+%%-----------------------------------------------------------------
+
+update({clear_table, MnesiaTab}) ->
+ Tree = val({MnesiaTab, {index, snmp}}),
+ b_clear(Tree),
+ ok;
+
+update({Op, MnesiaTab, MnesiaKey, SnmpKey}) ->
+ Tree = val({MnesiaTab, {index, snmp}}),
+ update(Op, Tree, MnesiaKey, SnmpKey).
+
+update(Op, Tree, MnesiaKey, SnmpKey) ->
+ case Op of
+ write ->
+ b_insert(Tree, SnmpKey, MnesiaKey);
+ update_counter ->
+ ignore;
+ delete ->
+ b_delete(Tree, SnmpKey);
+ delete_object ->
+ b_delete(Tree, SnmpKey)
+ end,
+ ok.
+
+%%-----------------------------------------------------------------
+%% Func: key_to_oid(Tab, Key, Ustruct)
+%% Args: Key ::= key()
+%% key() ::= int() | string() | {int() | string()}
+%% Type ::= {fix_string | term()}
+%% Make an OBJECT IDENTIFIER out of it.
+%% Variable length objects are prepended by their length.
+%% Ex. Key = {"pelle", 42} AND Type = {string, integer} =>
+%% OID [5, $p, $e, $l, $l, $e, 42]
+%% Key = {"pelle", 42} AND Type = {fix_string, integer} =>
+%% OID [$p, $e, $l, $l, $e, 42]
+%%-----------------------------------------------------------------
+
+key_to_oid(Tab,Key) ->
+ Types = val({Tab,snmp}),
+ key_to_oid(Tab, Key, Types).
+
+key_to_oid(Tab, Key, [{key, Types}]) ->
+ try key_to_oid_i(Key,Types)
+ catch _:_ ->
+ mnesia:abort({bad_snmp_key, {Tab,Key}, Types})
+ end.
+
+key_to_oid_i(Key, integer) when is_integer(Key) -> [Key];
+key_to_oid_i(Key, fix_string) when is_list(Key) -> Key;
+key_to_oid_i(Key, string) when is_list(Key) -> [length(Key) | Key];
+key_to_oid_i(Key, Types) -> keys_to_oid(size(Key), Key, [], Types).
+
+keys_to_oid(0, _Key, Oid, _Types) -> Oid;
+keys_to_oid(N, Key, Oid, Types) ->
+ Oid2 = lists:append(key_to_oid_i(element(N, Key), element(N, Types)), Oid),
+ keys_to_oid(N-1, Key, Oid2, Types).
+
+%%--------------------------------------------------
+%% The reverse of the above, i.e. snmp oid to mnesia key.
+%% This can be lookup up in tree but that might be on a remote node.
+%% It's probably faster to look it up, but use when it migth be remote
+oid_to_key(Oid, Tab) ->
+ [{key, Types}] = val({Tab,snmp}),
+ oid_to_key_1(Types, Oid).
+
+oid_to_key_1(integer, [Key]) -> Key;
+oid_to_key_1(fix_string, Key) -> Key;
+oid_to_key_1(string, [_|Key]) -> Key;
+oid_to_key_1(Tuple, Oid) ->
+ try
+ List = oid_to_key_2(1, size(Tuple), Tuple, Oid),
+ list_to_tuple(List)
+ catch
+ _:_ -> unknown
+ end.
+
+oid_to_key_2(N, Sz, Tuple, Oid0) when N =< Sz ->
+ case element(N, Tuple) of
+ integer ->
+ [Key|Oid] = Oid0,
+ [Key|oid_to_key_2(N+1, Sz, Tuple, Oid)];
+ fix_string when N =:= Sz ->
+ [Oid0];
+ fix_string ->
+ throw(fix_string);
+ string ->
+ [Len|Oid1] = Oid0,
+ {Str,Oid} = lists:split(Len, Oid1),
+ [Str|oid_to_key_2(N+1, Sz, Tuple, Oid)]
+ end;
+oid_to_key_2(N, Sz, _, []) when N =:= (Sz+1) ->
+ [].
+
+%%-----------------------------------------------------------------
+%% Func: get_row/2
+%% Args: Name is the name of the table (atom)
+%% RowIndex is an Oid
+%% Returns: {ok, Row} | undefined
+%% Note that the Row returned might contain columns that
+%% are not visible via SNMP. e.g. the first column may be
+%% ifIndex, and the last MFA ({ifIndex, col1, col2, MFA}).
+%% where ifIndex is used only as index (not as a real col),
+%% and MFA as extra info, used by the application.
+%%-----------------------------------------------------------------
+get_row(Name, RowIndex) ->
+ Tree = mnesia_lib:val({Name, {index, snmp}}),
+ case b_lookup(Tree, RowIndex) of
+ {ok, {_RowIndex, Key}} ->
+ [Row] = mnesia:dirty_read({Name, Key}),
+ {ok, Row};
+ _ ->
+ undefined
+ end.
+
+%%-----------------------------------------------------------------
+%% Func: get_next_index/2
+%% Args: Name is the name of the table (atom)
+%% RowIndex is an Oid
+%% Returns: {NextIndex,MnesiaKey} | {endOfTable, undefined}
+%%-----------------------------------------------------------------
+get_next_index(Name, RowIndex) ->
+ Tree = mnesia_lib:val({Name, {index, snmp}}),
+ case b_lookup_next(Tree, RowIndex) of
+ {ok, R} ->
+ R;
+ _ ->
+ {endOfTable,undefined}
+ end.
+
+%%-----------------------------------------------------------------
+%% Func: get_mnesia_key/2
+%% Purpose: Get the mnesia key corresponding to the RowIndex.
+%% Args: Name is the name of the table (atom)
+%% RowIndex is an Oid
+%% Returns: {ok, Key} | undefiend
+%%-----------------------------------------------------------------
+get_mnesia_key(Name, RowIndex) ->
+ Tree = mnesia_lib:val({Name, {index, snmp}}),
+ case b_lookup(Tree, RowIndex) of
+ {ok, {_RowIndex, Key}} ->
+ {ok, Key};
+ _ ->
+ undefined
+ end.
+
+
+%%-----------------------------------------------------------------
+%% Internal implementation, ordered_set ets.
+
+b_new(_Tab, _Us) ->
+ mnesia_monitor:unsafe_mktab(?MODULE, [public, ordered_set]).
+
+b_delete_tree(Tree) ->
+ ets:delete(Tree). %% Close via mnesia_monitor ?
+
+b_clear(Tree) ->
+ ets:delete_all_objects(Tree).
+
+b_insert(Tree, SnmpKey, MnesiaKey) ->
+ ets:insert(Tree, {SnmpKey, MnesiaKey}).
+
+b_delete(Tree, SnmpKey) ->
+ ets:delete(Tree, SnmpKey).
+
+b_lookup(Tree, RowIndex) ->
+ case ets:lookup(Tree, RowIndex) of
+ [X] ->
+ {ok, X};
+ _ ->
+ undefined
+ end.
+
+b_lookup_next(Tree,RowIndex) ->
+ case ets:next(Tree, RowIndex) of
+ '$end_of_table' ->
+ undefined;
+ Key ->
+ b_lookup(Tree, Key)
+ end.
diff --git a/lib/mnesia/src/mnesia_snmp_sup.erl b/lib/mnesia/src/mnesia_snmp_sup.erl
new file mode 100644
index 0000000000..7e86281428
--- /dev/null
+++ b/lib/mnesia/src/mnesia_snmp_sup.erl
@@ -0,0 +1,42 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 1997-2009. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%
+
+%%
+-module(mnesia_snmp_sup).
+
+-behaviour(supervisor).
+
+-export([start/0, init/1]).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% top supervisor callback functions
+
+start() ->
+ supervisor:start_link({local, ?MODULE}, ?MODULE, []).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% sub supervisor callback functions
+
+init([]) ->
+ Flags = {simple_one_for_one, 0, timer:hours(24)}, % Trust the top supervisor
+ MFA = {mnesia_snmp_hook, start, []},
+ Modules = [?MODULE, mnesia_snmp_hook, supervisor],
+ KillAfter = mnesia_kernel_sup:supervisor_timeout(timer:seconds(3)),
+ Workers = [{?MODULE, MFA, transient, KillAfter, worker, Modules}],
+ {ok, {Flags, Workers}}.
diff --git a/lib/mnesia/src/mnesia_sp.erl b/lib/mnesia/src/mnesia_sp.erl
new file mode 100644
index 0000000000..58a177513f
--- /dev/null
+++ b/lib/mnesia/src/mnesia_sp.erl
@@ -0,0 +1,42 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 1999-2009. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%
+
+%%
+
+%% To able to generate nice crash reports we need a catch on the highest level.
+%% This code can't be purged so a code change is not possible.
+%% And hence this a simple module.
+
+-module(mnesia_sp).
+
+-export([init_proc/4]).
+
+init_proc(Who, Mod, Fun, Args) ->
+ mnesia_lib:verbose("~p starting: ~p~n", [Who, self()]),
+ case catch apply(Mod, Fun, Args) of
+ {'EXIT', Reason} ->
+ mnesia_monitor:terminate_proc(Who, Reason, Args),
+ exit(Reason);
+ Other ->
+ Other
+ end.
+
+
+
+
diff --git a/lib/mnesia/src/mnesia_subscr.erl b/lib/mnesia/src/mnesia_subscr.erl
new file mode 100644
index 0000000000..afd1704dec
--- /dev/null
+++ b/lib/mnesia/src/mnesia_subscr.erl
@@ -0,0 +1,494 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 1997-2009. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%
+
+%%
+-module(mnesia_subscr).
+
+-behaviour(gen_server).
+
+-export([start/0,
+ set_debug_level/1,
+ subscribe/2,
+ unsubscribe/2,
+ unsubscribe_table/1,
+ subscribers/0,
+ report_table_event/4,
+ report_table_event/5,
+ report_table_event/6
+ ]).
+
+%% gen_server callbacks
+-export([init/1,
+ handle_call/3,
+ handle_cast/2,
+ handle_info/2,
+ terminate/2,
+ code_change/3
+ ]).
+
+-include("mnesia.hrl").
+
+-import(mnesia_lib, [error/2]).
+-record(state, {supervisor, pid_tab}).
+
+start() ->
+ gen_server:start_link({local, ?MODULE}, ?MODULE, [self()],
+ [{timeout, infinity}]).
+
+set_debug_level(Level) ->
+ OldEnv = application:get_env(mnesia, debug),
+ case mnesia_monitor:patch_env(debug, Level) of
+ {error, Reason} ->
+ {error, Reason};
+ NewLevel ->
+ set_debug_level(NewLevel, OldEnv)
+ end.
+
+set_debug_level(Level, OldEnv) ->
+ case mnesia:system_info(is_running) of
+ no when OldEnv == undefined ->
+ none;
+ no ->
+ {ok, E} = OldEnv,
+ E;
+ _ ->
+ Old = mnesia_lib:val(debug),
+ Local = mnesia:system_info(local_tables),
+ E = whereis(mnesia_event),
+ Sub = fun(Tab) -> subscribe(E, {table, Tab}) end,
+ UnSub = fun(Tab) -> unsubscribe(E, {table, Tab}) end,
+
+ case Level of
+ none ->
+ lists:foreach(UnSub, Local);
+ verbose ->
+ lists:foreach(UnSub, Local);
+ debug ->
+ lists:foreach(UnSub, Local -- [schema]),
+ Sub(schema);
+ trace ->
+ lists:foreach(Sub, Local)
+ end,
+ mnesia_lib:set(debug, Level),
+ Old
+ end.
+
+subscribe(ClientPid, system) ->
+ change_subscr(activate, ClientPid, system);
+subscribe(ClientPid, {table, Tab}) ->
+ change_subscr(activate, ClientPid, {table, Tab, simple});
+subscribe(ClientPid, {table, Tab, simple}) ->
+ change_subscr(activate, ClientPid, {table, Tab, simple});
+subscribe(ClientPid, {table, Tab, detailed}) ->
+ change_subscr(activate, ClientPid, {table, Tab, detailed});
+subscribe(_ClientPid, What) ->
+ {error, {badarg, What}}.
+
+unsubscribe(ClientPid, system) ->
+ change_subscr(deactivate, ClientPid, system);
+unsubscribe(ClientPid, {table, Tab}) ->
+ change_subscr(deactivate, ClientPid, {table, Tab, simple});
+unsubscribe(ClientPid, {table, Tab, simple}) ->
+ change_subscr(deactivate, ClientPid, {table, Tab, simple});
+unsubscribe(ClientPid, {table, Tab, detailed}) ->
+ change_subscr(deactivate, ClientPid, {table, Tab, detailed});
+unsubscribe(_ClientPid, What) ->
+ {error, {badarg, What}}.
+
+unsubscribe_table(Tab) ->
+ call({change, {deactivate_table, Tab}}).
+
+change_subscr(Kind, ClientPid, What) ->
+ call({change, {Kind, ClientPid, What}}).
+
+subscribers() ->
+ [whereis(mnesia_event) | mnesia_lib:val(subscribers)].
+
+report_table_event(Tab, Tid, Obj, Op) ->
+ case ?catch_val({Tab, commit_work}) of
+ {'EXIT', _} -> ok;
+ Commit ->
+ case lists:keysearch(subscribers, 1, Commit) of
+ false -> ok;
+ {value, Subs} ->
+ report_table_event(Subs, Tab, Tid, Obj, Op, undefined)
+ end
+ end.
+
+%% Backwards compatible for the moment when mnesia_tm get's updated!
+report_table_event(Subscr, Tab, Tid, Obj, Op) ->
+ report_table_event(Subscr, Tab, Tid, Obj, Op, undefined).
+
+report_table_event({subscribers, S1, S2}, Tab, Tid, _Obj, clear_table, _Old) ->
+ What = {delete, {schema, Tab}, Tid},
+ deliver(S1, {mnesia_table_event, What}),
+ TabDef = mnesia_schema:cs2list(?catch_val({Tab, cstruct})),
+ What2 = {write, {schema, Tab, TabDef}, Tid},
+ deliver(S1, {mnesia_table_event, What2}),
+ What3 = {delete, schema, {schema, Tab}, [{schema, Tab, TabDef}], Tid},
+ deliver(S2, {mnesia_table_event, What3}),
+ What4 = {write, schema, {schema, Tab, TabDef}, [], Tid},
+ deliver(S2, {mnesia_table_event, What4});
+
+report_table_event({subscribers, Subscr, []}, Tab, Tid, Obj, Op, _Old) ->
+ What = {Op, patch_record(Tab, Obj), Tid},
+ deliver(Subscr, {mnesia_table_event, What});
+
+report_table_event({subscribers, S1, S2}, Tab, Tid, Obj, Op, Old) ->
+ Standard = {Op, patch_record(Tab, Obj), Tid},
+ deliver(S1, {mnesia_table_event, Standard}),
+ Extended = what(Tab, Tid, Obj, Op, Old),
+ deliver(S2, Extended);
+
+%% Backwards compatible for the moment when mnesia_tm get's updated!
+report_table_event({subscribers, Subscr}, Tab, Tid, Obj, Op, Old) ->
+ report_table_event({subscribers, Subscr, []}, Tab, Tid, Obj, Op, Old).
+
+
+patch_record(Tab, Obj) ->
+ case Tab == element(1, Obj) of
+ true ->
+ Obj;
+ false ->
+ setelement(1, Obj, Tab)
+ end.
+
+what(Tab, Tid, {RecName, Key}, delete, undefined) ->
+ case catch mnesia_lib:db_get(Tab, Key) of
+ Old when is_list(Old) -> %% Op only allowed for set table.
+ {mnesia_table_event, {delete, Tab, {RecName, Key}, Old, Tid}};
+ _ ->
+ %% Record just deleted by a dirty_op or
+ %% the whole table has been deleted
+ ignore
+ end;
+what(Tab, Tid, Obj, delete, Old) ->
+ {mnesia_table_event, {delete, Tab, Obj, Old, Tid}};
+what(Tab, Tid, Obj, delete_object, _Old) ->
+ {mnesia_table_event, {delete, Tab, Obj, [Obj], Tid}};
+what(Tab, Tid, Obj, write, undefined) ->
+ case catch mnesia_lib:db_get(Tab, element(2, Obj)) of
+ Old when is_list(Old) ->
+ {mnesia_table_event, {write, Tab, Obj, Old, Tid}};
+ {'EXIT', _} ->
+ ignore
+ end.
+
+deliver(_, ignore) ->
+ ok;
+deliver([Pid | Pids], Msg) ->
+ Pid ! Msg,
+ deliver(Pids, Msg);
+deliver([], _Msg) ->
+ ok.
+
+call(Msg) ->
+ Pid = whereis(?MODULE),
+ case Pid of
+ undefined ->
+ {error, {node_not_running, node()}};
+ Pid ->
+ Res = gen_server:call(Pid, Msg, infinity),
+ %% We get an exit signal if server dies
+ receive
+ {'EXIT', _Pid, _Reason} ->
+ {error, {node_not_running, node()}}
+ after 0 ->
+ Res
+ end
+ end.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%% Callback functions from gen_server
+
+%%----------------------------------------------------------------------
+%% Func: init/1
+%% Returns: {ok, State} |
+%% {ok, State, Timeout} |
+%% {stop, Reason}
+%%----------------------------------------------------------------------
+init([Parent]) ->
+ process_flag(trap_exit, true),
+ ClientPid = whereis(mnesia_event),
+ link(ClientPid),
+ mnesia_lib:verbose("~p starting: ~p~n", [?MODULE, self()]),
+ Tab = ?ets_new_table(mnesia_subscr, [duplicate_bag, private]),
+ ?ets_insert(Tab, {ClientPid, system}),
+ {ok, #state{supervisor = Parent, pid_tab = Tab}}.
+
+%%----------------------------------------------------------------------
+%% Func: handle_call/3
+%% Returns: {reply, Reply, State} |
+%% {reply, Reply, State, Timeout} |
+%% {noreply, State} |
+%% {noreply, State, Timeout} |
+%% {stop, Reason, Reply, State} | (terminate/2 is called)
+%%----------------------------------------------------------------------
+handle_call({change, How}, _From, State) ->
+ Reply = do_change(How, State#state.pid_tab),
+ {reply, Reply, State};
+
+handle_call(Msg, _From, State) ->
+ error("~p got unexpected call: ~p~n", [?MODULE, Msg]),
+ {noreply, State}.
+
+%%----------------------------------------------------------------------
+%% Func: handle_cast/2
+%% Returns: {noreply, State} |
+%% {noreply, State, Timeout} |
+%% {stop, Reason, State} (terminate/2 is called)
+%%----------------------------------------------------------------------
+handle_cast(Msg, State) ->
+ error("~p got unexpected cast: ~p~n", [?MODULE, Msg]),
+ {noreply, State}.
+
+%%----------------------------------------------------------------------
+%% Func: handle_info/2
+%% Returns: {noreply, State} |
+%% {noreply, State, Timeout} |
+%% {stop, Reason, State} (terminate/2 is called)
+%%----------------------------------------------------------------------
+
+handle_info({'EXIT', Pid, _R}, State) when Pid == State#state.supervisor ->
+ {stop, shutdown, State};
+
+handle_info({'EXIT', Pid, _Reason}, State) ->
+ handle_exit(Pid, State#state.pid_tab),
+ {noreply, State};
+
+handle_info(Msg, State) ->
+ error("~p got unexpected info: ~p~n", [?MODULE, Msg]),
+ {noreply, State}.
+
+%%----------------------------------------------------------------------
+%% Func: terminate/2
+%% Purpose: Shutdown the server
+%% Returns: any (ignored by gen_server)
+%%----------------------------------------------------------------------
+terminate(Reason, State) ->
+ prepare_stop(State#state.pid_tab),
+ mnesia_monitor:terminate_proc(?MODULE, Reason, State).
+
+%%----------------------------------------------------------------------
+%% Func: code_change/3
+%% Purpose: Upgrade process when its code is to be changed
+%% Returns: {ok, NewState}
+%%----------------------------------------------------------------------
+code_change(_OldVsn, State, _Extra) ->
+ {ok, State}.
+
+%%%----------------------------------------------------------------------
+%%% Internal functions
+%%%----------------------------------------------------------------------
+
+do_change({activate, ClientPid, system}, SubscrTab) when is_pid(ClientPid) ->
+ Var = subscribers,
+ activate(ClientPid, system, Var, subscribers(), SubscrTab);
+do_change({activate, ClientPid, {table, Tab, How}}, SubscrTab) when is_pid(ClientPid) ->
+ case ?catch_val({Tab, where_to_read}) of
+ Node when Node == node() ->
+ Var = {Tab, commit_work},
+ activate(ClientPid, {table, Tab, How}, Var, mnesia_lib:val(Var), SubscrTab);
+ {'EXIT', _} ->
+ {error, {no_exists, Tab}};
+ _Node ->
+ {error, {not_active_local, Tab}}
+ end;
+do_change({deactivate, ClientPid, system}, SubscrTab) ->
+ Var = subscribers,
+ deactivate(ClientPid, system, Var, SubscrTab);
+do_change({deactivate, ClientPid, {table, Tab, How}}, SubscrTab) ->
+ Var = {Tab, commit_work},
+ deactivate(ClientPid, {table, Tab, How}, Var, SubscrTab);
+do_change({deactivate_table, Tab}, SubscrTab) ->
+ Var = {Tab, commit_work},
+ case ?catch_val(Var) of
+ {'EXIT', _} ->
+ {error, {no_exists, Tab}};
+ CommitWork ->
+ case lists:keysearch(subscribers, 1, CommitWork) of
+ false ->
+ ok;
+ {value, Subs} ->
+ Simple = {table, Tab, simple},
+ Detailed = {table, Tab, detailed},
+ Fs = fun(C) -> deactivate(C, Simple, Var, SubscrTab) end,
+ Fd = fun(C) -> deactivate(C, Detailed, Var, SubscrTab) end,
+ case Subs of
+ {subscribers, L1, L2} ->
+ lists:foreach(Fs, L1),
+ lists:foreach(Fd, L2);
+ {subscribers, L1} ->
+ lists:foreach(Fs, L1)
+ end
+ end,
+ {ok, node()}
+ end;
+do_change(_, _) ->
+ {error, badarg}.
+
+activate(ClientPid, What, Var, OldSubscribers, SubscrTab) ->
+ Old =
+ if Var == subscribers ->
+ OldSubscribers;
+ true ->
+ case lists:keysearch(subscribers, 1, OldSubscribers) of
+ false -> [];
+ {value, Subs} ->
+ case Subs of
+ {subscribers, L1, L2} ->
+ L1 ++ L2;
+ {subscribers, L1} ->
+ L1
+ end
+ end
+ end,
+ case lists:member(ClientPid, Old) of
+ false ->
+ %% Don't care about checking old links
+ case catch link(ClientPid) of
+ true ->
+ ?ets_insert(SubscrTab, {ClientPid, What}),
+ add_subscr(Var, What, ClientPid),
+ {ok, node()};
+ {'EXIT', _Reason} ->
+ {error, {no_exists, ClientPid}}
+ end;
+ true ->
+ {error, {already_exists, What}}
+ end.
+
+%%-record(subscribers, {pids = []}). Old subscriber record removed
+%% To solve backward compatibility, this code is a cludge..
+add_subscr(subscribers, _What, Pid) ->
+ mnesia_lib:add(subscribers, Pid),
+ {ok, node()};
+add_subscr({Tab, commit_work}, What, Pid) ->
+ Commit = mnesia_lib:val({Tab, commit_work}),
+ case lists:keysearch(subscribers, 1, Commit) of
+ false ->
+ Subscr =
+ case What of
+ {table, _, simple} ->
+ {subscribers, [Pid], []};
+ {table, _, detailed} ->
+ {subscribers, [], [Pid]}
+ end,
+ mnesia_lib:add({Tab, subscribers}, Pid),
+ mnesia_lib:set({Tab, commit_work},
+ mnesia_lib:sort_commit([Subscr | Commit]));
+ {value, Old} ->
+ {L1, L2} =
+ case Old of
+ {subscribers, L} -> %% Old Way
+ {L, []};
+ {subscribers, SL1, SL2} ->
+ {SL1, SL2}
+ end,
+ Subscr =
+ case What of
+ {table, _, simple} ->
+ {subscribers, [Pid | L1], L2};
+ {table, _, detailed} ->
+ {subscribers, L1, [Pid | L2]}
+ end,
+ NewC = lists:keyreplace(subscribers, 1, Commit, Subscr),
+ mnesia_lib:set({Tab, commit_work},
+ mnesia_lib:sort_commit(NewC)),
+ mnesia_lib:add({Tab, subscribers}, Pid)
+ end.
+
+deactivate(ClientPid, What, Var, SubscrTab) ->
+ ?ets_match_delete(SubscrTab, {ClientPid, What}),
+ case catch ?ets_lookup_element(SubscrTab, ClientPid, 1) of
+ List when is_list(List) ->
+ ignore;
+ {'EXIT', _} ->
+ unlink(ClientPid)
+ end,
+ del_subscr(Var, What, ClientPid),
+ {ok, node()}.
+
+del_subscr(subscribers, _What, Pid) ->
+ mnesia_lib:del(subscribers, Pid);
+del_subscr({Tab, commit_work}, What, Pid) ->
+ Commit = mnesia_lib:val({Tab, commit_work}),
+ case lists:keysearch(subscribers, 1, Commit) of
+ false ->
+ false;
+ {value, Old} ->
+ {L1, L2} =
+ case Old of
+ {subscribers, L} -> %% Old Way
+ {L, []};
+ {subscribers, SL1, SL2} ->
+ {SL1, SL2}
+ end,
+ Subscr =
+ case What of %% Ignore user error delete subscr from any list
+ {table, _, simple} ->
+ NewL1 = lists:delete(Pid, L1),
+ NewL2 = lists:delete(Pid, L2),
+ {subscribers, NewL1, NewL2};
+ {table, _, detailed} ->
+ NewL1 = lists:delete(Pid, L1),
+ NewL2 = lists:delete(Pid, L2),
+ {subscribers, NewL1, NewL2}
+ end,
+ case Subscr of
+ {subscribers, [], []} ->
+ NewC = lists:keydelete(subscribers, 1, Commit),
+ mnesia_lib:del({Tab, subscribers}, Pid),
+ mnesia_lib:set({Tab, commit_work},
+ mnesia_lib:sort_commit(NewC));
+ _ ->
+ NewC = lists:keyreplace(subscribers, 1, Commit, Subscr),
+ mnesia_lib:del({Tab, subscribers}, Pid),
+ mnesia_lib:set({Tab, commit_work},
+ mnesia_lib:sort_commit(NewC))
+ end
+ end.
+
+handle_exit(ClientPid, SubscrTab) ->
+ do_handle_exit(?ets_lookup(SubscrTab, ClientPid)),
+ ?ets_delete(SubscrTab, ClientPid).
+
+do_handle_exit([{ClientPid, What} | Tail]) ->
+ case What of
+ system ->
+ del_subscr(subscribers, What, ClientPid);
+ {_, Tab, _Level} ->
+ del_subscr({Tab, commit_work}, What, ClientPid)
+ end,
+ do_handle_exit(Tail);
+do_handle_exit([]) ->
+ ok.
+
+prepare_stop(SubscrTab) ->
+ mnesia_lib:report_system_event({mnesia_down, node()}),
+ do_prepare_stop(?ets_first(SubscrTab), SubscrTab).
+
+do_prepare_stop('$end_of_table', _SubscrTab) ->
+ ok;
+do_prepare_stop(ClientPid, SubscrTab) ->
+ Next = ?ets_next(SubscrTab, ClientPid),
+ handle_exit(ClientPid, SubscrTab),
+ unlink(ClientPid),
+ do_prepare_stop(Next, SubscrTab).
+
diff --git a/lib/mnesia/src/mnesia_sup.erl b/lib/mnesia/src/mnesia_sup.erl
new file mode 100644
index 0000000000..9ee4086f50
--- /dev/null
+++ b/lib/mnesia/src/mnesia_sup.erl
@@ -0,0 +1,131 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 1996-2009. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%
+
+%%
+%% Supervisor for the entire Mnesia application
+
+-module(mnesia_sup).
+
+-behaviour(application).
+-behaviour(supervisor).
+
+-export([start/0, start/2, init/1, stop/1, start_event/0, kill/0]).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% application and suprvisor callback functions
+
+start(normal, Args) ->
+ SupName = {local,?MODULE},
+ case supervisor:start_link(SupName, ?MODULE, [Args]) of
+ {ok, Pid} ->
+ {ok, Pid, {normal, Args}};
+ Error ->
+ Error
+ end;
+start(_, _) ->
+ {error, badarg}.
+
+start() ->
+ SupName = {local,?MODULE},
+ supervisor:start_link(SupName, ?MODULE, []).
+
+stop(_StartArgs) ->
+ ok.
+
+init([]) -> % Supervisor
+ init();
+init([[]]) -> % Application
+ init();
+init(BadArg) ->
+ {error, {badarg, BadArg}}.
+
+init() ->
+ Flags = {one_for_all, 0, 3600}, % Should be rest_for_one policy
+
+ Event = event_procs(),
+ Kernel = kernel_procs(),
+ Mnemosyne = mnemosyne_procs(),
+
+ {ok, {Flags, Event ++ Kernel ++ Mnemosyne}}.
+
+event_procs() ->
+ KillAfter = timer:seconds(30),
+ KA = mnesia_kernel_sup:supervisor_timeout(KillAfter),
+ E = mnesia_event,
+ [{E, {?MODULE, start_event, []}, permanent, KA, worker, [E, gen_event]}].
+
+kernel_procs() ->
+ K = mnesia_kernel_sup,
+ KA = infinity,
+ [{K, {K, start, []}, permanent, KA, supervisor, [K, supervisor]}].
+
+mnemosyne_procs() ->
+ case mnesia_monitor:get_env(embedded_mnemosyne) of
+ true ->
+ Q = mnemosyne_sup,
+ KA = infinity,
+ [{Q, {Q, start, []}, permanent, KA, supervisor, [Q, supervisor]}];
+ false ->
+ []
+ end.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% event handler
+
+start_event() ->
+ case gen_event:start_link({local, mnesia_event}) of
+ {ok, Pid} ->
+ case add_event_handler() of
+ ok ->
+ {ok, Pid};
+ Error ->
+ Error
+ end;
+ Error ->
+ Error
+ end.
+
+add_event_handler() ->
+ Handler = mnesia_monitor:get_env(event_module),
+ gen_event:add_handler(mnesia_event, Handler, []).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% debug functions
+
+kill() ->
+ Mnesia = [mnesia_fallback | mnesia:ms()],
+ Kill = fun(Name) -> catch exit(whereis(Name), kill) end,
+ lists:foreach(Kill, Mnesia),
+ lists:foreach(fun ensure_dead/1, Mnesia),
+ timer:sleep(10),
+ case lists:keymember(mnesia, 1, application:which_applications()) of
+ true -> kill();
+ false -> ok
+ end.
+
+ensure_dead(Name) ->
+ case whereis(Name) of
+ undefined ->
+ ok;
+ Pid when is_pid(Pid) ->
+ exit(Pid, kill),
+ timer:sleep(10),
+ ensure_dead(Name)
+ end.
+
diff --git a/lib/mnesia/src/mnesia_text.erl b/lib/mnesia/src/mnesia_text.erl
new file mode 100644
index 0000000000..f1a28bf43d
--- /dev/null
+++ b/lib/mnesia/src/mnesia_text.erl
@@ -0,0 +1,194 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 1996-2009. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%
+
+%%
+-module(mnesia_text).
+
+-export([parse/1, file/1, load_textfile/1, dump_to_textfile/1]).
+
+load_textfile(File) ->
+ ensure_started(),
+ case parse(File) of
+ {ok, {Tabs, Data}} ->
+ Badtabs = make_tabs(lists:map(fun validate_tab/1, Tabs)),
+ load_data(del_data(Badtabs, Data, []));
+ Other ->
+ Other
+ end.
+
+dump_to_textfile(File) ->
+ dump_to_textfile(mnesia_lib:is_running(), file:open(File, [write])).
+dump_to_textfile(yes, {ok, F}) ->
+ Tabs = lists:delete(schema, mnesia_lib:local_active_tables()),
+ Defs = lists:map(fun(T) -> {T, [{record_name, mnesia_lib:val({T, record_name})},
+ {attributes, mnesia_lib:val({T, attributes})}]}
+ end,
+ Tabs),
+ io:format(F, "~p.~n", [{tables, Defs}]),
+ lists:foreach(fun(T) -> dump_tab(F, T) end, Tabs),
+ file:close(F);
+dump_to_textfile(_,_) -> error.
+
+
+dump_tab(F, T) ->
+ W = mnesia_lib:val({T, wild_pattern}),
+ {atomic,All} = mnesia:transaction(fun() -> mnesia:match_object(T, W, read) end),
+ lists:foreach(fun(Term) -> io:format(F,"~p.~n", [setelement(1, Term, T)]) end, All).
+
+
+ensure_started() ->
+ case mnesia_lib:is_running() of
+ yes ->
+ yes;
+ no ->
+ case mnesia_lib:exists(mnesia_lib:dir("schema.DAT")) of
+ true ->
+ mnesia:start();
+ false ->
+ mnesia:create_schema([node()]),
+ mnesia:start()
+ end
+ end.
+
+del_data(Bad, [H|T], Ack) ->
+ case lists:member(element(1, H), Bad) of
+ true -> del_data(Bad, T, Ack);
+ false -> del_data(Bad, T, [H|Ack])
+ end;
+del_data(_Bad, [], Ack) ->
+ lists:reverse(Ack).
+
+%% Tis the place to call the validate func in mnesia_schema
+validate_tab({Tabname, List}) ->
+ {Tabname, List};
+validate_tab({Tabname, RecName, List}) ->
+ {Tabname, RecName, List};
+validate_tab(_) -> error(badtab).
+
+make_tabs([{Tab, Def} | Tail]) ->
+ case catch mnesia:table_info(Tab, where_to_read) of
+ {'EXIT', _} -> %% non-existing table
+ case mnesia:create_table(Tab, Def) of
+ {aborted, Reason} ->
+ io:format("** Failed to create table ~w ~n"
+ "** Reason = ~w, Args = ~p~n",
+ [Tab, Reason, Def]),
+ [Tab | make_tabs(Tail)];
+ _ ->
+ io:format("New table ~w~n", [Tab]),
+ make_tabs(Tail)
+ end;
+ Node ->
+ io:format("** Table ~w already exists on ~p, just entering data~n",
+ [Tab, Node]),
+ make_tabs(Tail)
+ end;
+
+make_tabs([]) ->
+ [].
+
+load_data(L) ->
+ mnesia:transaction(fun() ->
+ F = fun(X) ->
+ Tab = element(1, X),
+ RN = mnesia:table_info(Tab, record_name),
+ Rec = setelement(1, X, RN),
+ mnesia:write(Tab, Rec, write) end,
+ lists:foreach(F, L)
+ end).
+
+parse(File) ->
+ case file(File) of
+ {ok, Terms} ->
+ case catch collect(Terms) of
+ {error, X} ->
+ {error, X};
+ Other ->
+ {ok, Other}
+ end;
+ Other ->
+ Other
+ end.
+
+collect([{_, {tables, Tabs}}|L]) ->
+ {Tabs, collect_data(Tabs, L)};
+
+collect(_) ->
+ io:format("No tables found\n", []),
+ error(bad_header).
+
+collect_data(Tabs, [{Line, Term} | Tail]) when is_tuple(Term) ->
+ case lists:keysearch(element(1, Term), 1, Tabs) of
+ {value, _} ->
+ [Term | collect_data(Tabs, Tail)];
+ _Other ->
+ io:format("Object:~p at line ~w unknown\n", [Term,Line]),
+ error(undefined_object)
+ end;
+collect_data(_Tabs, []) -> [];
+collect_data(_Tabs, [H|_T]) ->
+ io:format("Object:~p unknown\n", [H]),
+ error(undefined_object).
+
+error(What) -> throw({error, What}).
+
+file(File) ->
+ case file:open(File, [read]) of
+ {ok, Stream} ->
+ Res = read_terms(Stream, File, 1, []),
+ file:close(Stream),
+ Res;
+ _Other ->
+ {error, open}
+ end.
+
+read_terms(Stream, File, Line, L) ->
+ case read_term_from_stream(Stream, File, Line) of
+ {ok, Term, NextLine} ->
+ read_terms(Stream, File, NextLine, [Term|L]);
+ error ->
+ {error, read};
+ eof ->
+ {ok, lists:reverse(L)}
+ end.
+
+read_term_from_stream(Stream, File, Line) ->
+ R = io:request(Stream, {get_until,'',erl_scan,tokens,[Line]}),
+ case R of
+ {ok,Toks,EndLine} ->
+ case erl_parse:parse_term(Toks) of
+ {ok, Term} ->
+ {ok, {Line, Term}, EndLine};
+ {error, {NewLine,Mod,What}} ->
+ Str = Mod:format_error(What),
+ io:format("Error in line:~p of:~p ~s\n",
+ [NewLine, File, Str]),
+ error;
+ T ->
+ io:format("Error2 **~p~n",[T]),
+ error
+ end;
+ {eof,_EndLine} ->
+ eof;
+ Other ->
+ io:format("Error1 **~p~n",[Other]),
+ error
+ end.
+
+
diff --git a/lib/mnesia/src/mnesia_tm.erl b/lib/mnesia/src/mnesia_tm.erl
new file mode 100644
index 0000000000..3f3a10a9c1
--- /dev/null
+++ b/lib/mnesia/src/mnesia_tm.erl
@@ -0,0 +1,2301 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 1996-2009. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%
+
+%%
+-module(mnesia_tm).
+
+-export([
+ start/0,
+ init/1,
+ non_transaction/5,
+ transaction/6,
+ commit_participant/5,
+ dirty/2,
+ display_info/2,
+ do_update_op/3,
+ get_info/1,
+ get_transactions/0,
+ info/1,
+ mnesia_down/1,
+ prepare_checkpoint/2,
+ prepare_checkpoint/1, % Internal
+ prepare_snmp/3,
+ do_snmp/2,
+ put_activity_id/1,
+ put_activity_id/2,
+ block_tab/1,
+ unblock_tab/1,
+ fixtable/3
+ ]).
+
+%% sys callback functions
+-export([system_continue/3,
+ system_terminate/4,
+ system_code_change/4
+ ]).
+
+-include("mnesia.hrl").
+-import(mnesia_lib, [set/2]).
+-import(mnesia_lib, [fatal/2, verbose/2, dbg_out/2]).
+
+-record(state, {coordinators = gb_trees:empty(), participants = gb_trees:empty(), supervisor,
+ blocked_tabs = [], dirty_queue = [], fixed_tabs = []}).
+%% Format on coordinators is [{Tid, EtsTabList} .....
+
+-record(prep, {protocol = sym_trans,
+ %% async_dirty | sync_dirty | sym_trans | sync_sym_trans | asym_trans
+ records = [],
+ prev_tab = [], % initiate to a non valid table name
+ prev_types,
+ prev_snmp,
+ types
+ }).
+
+-record(participant, {tid, pid, commit, disc_nodes = [],
+ ram_nodes = [], protocol = sym_trans}).
+
+start() ->
+ mnesia_monitor:start_proc(?MODULE, ?MODULE, init, [self()]).
+
+init(Parent) ->
+ register(?MODULE, self()),
+ process_flag(trap_exit, true),
+
+ %% Initialize the schema
+ IgnoreFallback = mnesia_monitor:get_env(ignore_fallback_at_startup),
+ mnesia_bup:tm_fallback_start(IgnoreFallback),
+ mnesia_schema:init(IgnoreFallback),
+
+ %% Handshake and initialize transaction recovery
+ mnesia_recover:init(),
+ Early = mnesia_monitor:init(),
+ AllOthers = mnesia_lib:uniq(Early ++ mnesia_lib:all_nodes()) -- [node()],
+ set(original_nodes, AllOthers),
+ mnesia_recover:connect_nodes(AllOthers),
+
+ %% Recover transactions, may wait for decision
+ case mnesia_monitor:use_dir() of
+ true ->
+ P = mnesia_dumper:opt_dump_log(startup), % previous log
+ L = mnesia_dumper:opt_dump_log(startup), % latest log
+ Msg = "Initial dump of log during startup: ~p~n",
+ mnesia_lib:verbose(Msg, [[P, L]]),
+ mnesia_log:init();
+ false ->
+ ignore
+ end,
+
+ mnesia_schema:purge_tmp_files(),
+ mnesia_recover:start_garb(),
+
+ ?eval_debug_fun({?MODULE, init}, [{nodes, AllOthers}]),
+
+ case val(debug) of
+ Debug when Debug /= debug, Debug /= trace ->
+ ignore;
+ _ ->
+ mnesia_subscr:subscribe(whereis(mnesia_event), {table, schema})
+ end,
+ proc_lib:init_ack(Parent, {ok, self()}),
+ doit_loop(#state{supervisor = Parent}).
+
+val(Var) ->
+ case ?catch_val(Var) of
+ {'EXIT', _ReASoN_} -> mnesia_lib:other_val(Var, _ReASoN_);
+ _VaLuE_ -> _VaLuE_
+ end.
+
+reply({From,Ref}, R) ->
+ From ! {?MODULE, Ref, R};
+reply(From, R) ->
+ From ! {?MODULE, node(), R}.
+
+reply(From, R, State) ->
+ reply(From, R),
+ doit_loop(State).
+
+req(R) ->
+ case whereis(?MODULE) of
+ undefined ->
+ {error, {node_not_running, node()}};
+ Pid ->
+ Ref = make_ref(),
+ Pid ! {{self(), Ref}, R},
+ rec(Pid, Ref)
+ end.
+
+rec() ->
+ rec(whereis(?MODULE)).
+
+rec(Pid) when is_pid(Pid) ->
+ receive
+ {?MODULE, _, Reply} ->
+ Reply;
+
+ {'EXIT', Pid, _} ->
+ {error, {node_not_running, node()}}
+ end;
+rec(undefined) ->
+ {error, {node_not_running, node()}}.
+
+rec(Pid, Ref) ->
+ receive
+ {?MODULE, Ref, Reply} ->
+ Reply;
+ {'EXIT', Pid, _} ->
+ {error, {node_not_running, node()}}
+ end.
+
+tmlink({From, Ref}) when is_reference(Ref) ->
+ link(From);
+tmlink(From) ->
+ link(From).
+tmpid({Pid, _Ref}) when is_pid(Pid) ->
+ Pid;
+tmpid(Pid) ->
+ Pid.
+
+%% Returns a list of participant transaction Tid's
+mnesia_down(Node) ->
+ %% Syncronously call needed in order to avoid
+ %% race with mnesia_tm's coordinator processes
+ %% that may restart and acquire new locks.
+ %% mnesia_monitor takes care of the sync
+ case whereis(?MODULE) of
+ undefined ->
+ mnesia_monitor:mnesia_down(?MODULE, {Node, []});
+ Pid ->
+ Pid ! {mnesia_down, Node}
+ end.
+
+prepare_checkpoint(Nodes, Cp) ->
+ rpc:multicall(Nodes, ?MODULE, prepare_checkpoint, [Cp]).
+
+prepare_checkpoint(Cp) ->
+ req({prepare_checkpoint,Cp}).
+
+block_tab(Tab) ->
+ req({block_tab, Tab}).
+
+unblock_tab(Tab) ->
+ req({unblock_tab, Tab}).
+
+doit_loop(#state{coordinators=Coordinators,participants=Participants,supervisor=Sup}=State) ->
+ receive
+ {_From, {async_dirty, Tid, Commit, Tab}} ->
+ case lists:member(Tab, State#state.blocked_tabs) of
+ false ->
+ do_async_dirty(Tid, Commit, Tab),
+ doit_loop(State);
+ true ->
+ Item = {async_dirty, Tid, Commit, Tab},
+ State2 = State#state{dirty_queue = [Item | State#state.dirty_queue]},
+ doit_loop(State2)
+ end;
+
+ {From, {sync_dirty, Tid, Commit, Tab}} ->
+ case lists:member(Tab, State#state.blocked_tabs) of
+ false ->
+ do_sync_dirty(From, Tid, Commit, Tab),
+ doit_loop(State);
+ true ->
+ Item = {sync_dirty, From, Tid, Commit, Tab},
+ State2 = State#state{dirty_queue = [Item | State#state.dirty_queue]},
+ doit_loop(State2)
+ end;
+
+ {From, start_outer} -> %% Create and associate ets_tab with Tid
+ case catch ?ets_new_table(mnesia_trans_store, [bag, public]) of
+ {'EXIT', Reason} -> %% system limit
+ Msg = "Cannot create an ets table for the "
+ "local transaction store",
+ reply(From, {error, {system_limit, Msg, Reason}}, State);
+ Etab ->
+ tmlink(From),
+ C = mnesia_recover:incr_trans_tid_serial(),
+ ?ets_insert(Etab, {nodes, node()}),
+ Tid = #tid{pid = tmpid(From), counter = C},
+ A2 = gb_trees:insert(Tid,[Etab],Coordinators),
+ S2 = State#state{coordinators = A2},
+ reply(From, {new_tid, Tid, Etab}, S2)
+ end;
+
+ {From, {ask_commit, Protocol, Tid, Commit, DiscNs, RamNs}} ->
+ ?eval_debug_fun({?MODULE, doit_ask_commit},
+ [{tid, Tid}, {prot, Protocol}]),
+ mnesia_checkpoint:tm_enter_pending(Tid, DiscNs, RamNs),
+ Pid =
+ case Protocol of
+ asym_trans when node(Tid#tid.pid) /= node() ->
+ Args = [tmpid(From), Tid, Commit, DiscNs, RamNs],
+ spawn_link(?MODULE, commit_participant, Args);
+ _ when node(Tid#tid.pid) /= node() -> %% *_sym_trans
+ reply(From, {vote_yes, Tid}),
+ nopid
+ end,
+ P = #participant{tid = Tid,
+ pid = Pid,
+ commit = Commit,
+ disc_nodes = DiscNs,
+ ram_nodes = RamNs,
+ protocol = Protocol},
+ State2 = State#state{participants = gb_trees:insert(Tid,P,Participants)},
+ doit_loop(State2);
+
+ {Tid, do_commit} ->
+ case gb_trees:lookup(Tid, Participants) of
+ none ->
+ verbose("Tried to commit a non participant transaction ~p~n",[Tid]),
+ doit_loop(State);
+ {value, P} ->
+ ?eval_debug_fun({?MODULE,do_commit,pre},[{tid,Tid},{participant,P}]),
+ case P#participant.pid of
+ nopid ->
+ Commit = P#participant.commit,
+ Member = lists:member(node(), P#participant.disc_nodes),
+ if Member == false ->
+ ignore;
+ P#participant.protocol == sym_trans ->
+ mnesia_log:log(Commit);
+ P#participant.protocol == sync_sym_trans ->
+ mnesia_log:slog(Commit)
+ end,
+ mnesia_recover:note_decision(Tid, committed),
+ do_commit(Tid, Commit),
+ if
+ P#participant.protocol == sync_sym_trans ->
+ Tid#tid.pid ! {?MODULE, node(), {committed, Tid}};
+ true ->
+ ignore
+ end,
+ mnesia_locker:release_tid(Tid),
+ transaction_terminated(Tid),
+ ?eval_debug_fun({?MODULE,do_commit,post},[{tid,Tid},{pid,nopid}]),
+ doit_loop(State#state{participants=
+ gb_trees:delete(Tid,Participants)});
+ Pid when is_pid(Pid) ->
+ Pid ! {Tid, committed},
+ ?eval_debug_fun({?MODULE, do_commit, post}, [{tid, Tid}, {pid, Pid}]),
+ doit_loop(State)
+ end
+ end;
+
+ {Tid, simple_commit} ->
+ mnesia_recover:note_decision(Tid, committed),
+ mnesia_locker:release_tid(Tid),
+ transaction_terminated(Tid),
+ doit_loop(State);
+
+ {Tid, {do_abort, Reason}} ->
+ ?eval_debug_fun({?MODULE, do_abort, pre}, [{tid, Tid}]),
+ case gb_trees:lookup(Tid, Participants) of
+ none ->
+ verbose("Tried to abort a non participant transaction ~p: ~p~n",
+ [Tid, Reason]),
+ mnesia_locker:release_tid(Tid),
+ doit_loop(State);
+ {value, P} ->
+ case P#participant.pid of
+ nopid ->
+ Commit = P#participant.commit,
+ mnesia_recover:note_decision(Tid, aborted),
+ do_abort(Tid, Commit),
+ if
+ P#participant.protocol == sync_sym_trans ->
+ Tid#tid.pid ! {?MODULE, node(), {aborted, Tid}};
+ true ->
+ ignore
+ end,
+ transaction_terminated(Tid),
+ mnesia_locker:release_tid(Tid),
+ ?eval_debug_fun({?MODULE, do_abort, post}, [{tid, Tid}, {pid, nopid}]),
+ doit_loop(State#state{participants=
+ gb_trees:delete(Tid,Participants)});
+ Pid when is_pid(Pid) ->
+ Pid ! {Tid, {do_abort, Reason}},
+ ?eval_debug_fun({?MODULE, do_abort, post},
+ [{tid, Tid}, {pid, Pid}]),
+ doit_loop(State)
+ end
+ end;
+
+ {From, {add_store, Tid}} -> %% new store for nested transaction
+ case catch ?ets_new_table(mnesia_trans_store, [bag, public]) of
+ {'EXIT', Reason} -> %% system limit
+ Msg = "Cannot create an ets table for a nested "
+ "local transaction store",
+ reply(From, {error, {system_limit, Msg, Reason}}, State);
+ Etab ->
+ A2 = add_coord_store(Coordinators, Tid, Etab),
+ reply(From, {new_store, Etab},
+ State#state{coordinators = A2})
+ end;
+
+ {From, {del_store, Tid, Current, Obsolete, PropagateStore}} ->
+ opt_propagate_store(Current, Obsolete, PropagateStore),
+ A2 = del_coord_store(Coordinators, Tid, Current, Obsolete),
+ reply(From, store_erased, State#state{coordinators = A2});
+
+ {'EXIT', Pid, Reason} ->
+ handle_exit(Pid, Reason, State);
+
+ {From, {restart, Tid, Store}} ->
+ A2 = restore_stores(Coordinators, Tid, Store),
+ clear_fixtable([Store]),
+ ?ets_match_delete(Store, '_'),
+ ?ets_insert(Store, {nodes, node()}),
+ reply(From, {restarted, Tid}, State#state{coordinators = A2});
+
+ {delete_transaction, Tid} ->
+ %% used to clear transactions which are committed
+ %% in coordinator or participant processes
+ case gb_trees:is_defined(Tid, Participants) of
+ false ->
+ case gb_trees:lookup(Tid, Coordinators) of
+ none ->
+ verbose("** ERROR ** Tried to delete a non transaction ~p~n",
+ [Tid]),
+ doit_loop(State);
+ {value, Etabs} ->
+ clear_fixtable(Etabs),
+ erase_ets_tabs(Etabs),
+ transaction_terminated(Tid),
+ doit_loop(State#state{coordinators =
+ gb_trees:delete(Tid,Coordinators)})
+ end;
+ true ->
+ transaction_terminated(Tid),
+ State2 = State#state{participants=gb_trees:delete(Tid,Participants)},
+ doit_loop(State2)
+ end;
+
+ {sync_trans_serial, Tid} ->
+ %% Do the Lamport thing here
+ mnesia_recover:sync_trans_tid_serial(Tid),
+ doit_loop(State);
+
+ {From, info} ->
+ reply(From, {info, gb_trees:values(Participants),
+ gb_trees:to_list(Coordinators)}, State);
+
+ {mnesia_down, N} ->
+ verbose("Got mnesia_down from ~p, reconfiguring...~n", [N]),
+ reconfigure_coordinators(N, gb_trees:to_list(Coordinators)),
+
+ Tids = gb_trees:keys(Participants),
+ reconfigure_participants(N, gb_trees:values(Participants)),
+ NewState = clear_fixtable(N, State),
+ mnesia_monitor:mnesia_down(?MODULE, {N, Tids}),
+ doit_loop(NewState);
+
+ {From, {unblock_me, Tab}} ->
+ case lists:member(Tab, State#state.blocked_tabs) of
+ false ->
+ verbose("Wrong dirty Op blocked on ~p ~p ~p",
+ [node(), Tab, From]),
+ reply(From, unblocked),
+ doit_loop(State);
+ true ->
+ Item = {Tab, unblock_me, From},
+ State2 = State#state{dirty_queue = [Item | State#state.dirty_queue]},
+ doit_loop(State2)
+ end;
+
+ {From, {block_tab, Tab}} ->
+ State2 = State#state{blocked_tabs = [Tab | State#state.blocked_tabs]},
+ reply(From, ok, State2);
+
+ {From, {unblock_tab, Tab}} ->
+ BlockedTabs2 = State#state.blocked_tabs -- [Tab],
+ case lists:member(Tab, BlockedTabs2) of
+ false ->
+ mnesia_controller:unblock_table(Tab),
+ Queue = process_dirty_queue(Tab, State#state.dirty_queue),
+ State2 = State#state{blocked_tabs = BlockedTabs2,
+ dirty_queue = Queue},
+ reply(From, ok, State2);
+ true ->
+ State2 = State#state{blocked_tabs = BlockedTabs2},
+ reply(From, ok, State2)
+ end;
+
+ {From, {prepare_checkpoint, Cp}} ->
+ Res = mnesia_checkpoint:tm_prepare(Cp),
+ case Res of
+ {ok, _Name, IgnoreNew, _Node} ->
+ prepare_pending_coordinators(gb_trees:to_list(Coordinators), IgnoreNew),
+ prepare_pending_participants(gb_trees:values(Participants), IgnoreNew);
+ {error, _Reason} ->
+ ignore
+ end,
+ reply(From, Res, State);
+ {From, {fixtable, [Tab,Lock,Requester]}} ->
+ case ?catch_val({Tab, storage_type}) of
+ {'EXIT', _} ->
+ reply(From, error, State);
+ Storage ->
+ mnesia_lib:db_fixtable(Storage,Tab,Lock),
+ NewState = manage_fixtable(Tab,Lock,Requester,State),
+ reply(From, node(), NewState)
+ end;
+
+ {system, From, Msg} ->
+ dbg_out("~p got {system, ~p, ~p}~n", [?MODULE, From, Msg]),
+ sys:handle_system_msg(Msg, From, Sup, ?MODULE, [], State);
+
+ Msg ->
+ verbose("** ERROR ** ~p got unexpected message: ~p~n", [?MODULE, Msg]),
+ doit_loop(State)
+ end.
+
+do_sync_dirty(From, Tid, Commit, _Tab) ->
+ ?eval_debug_fun({?MODULE, sync_dirty, pre}, [{tid, Tid}]),
+ Res = (catch do_dirty(Tid, Commit)),
+ ?eval_debug_fun({?MODULE, sync_dirty, post}, [{tid, Tid}]),
+ From ! {?MODULE, node(), {dirty_res, Res}}.
+
+do_async_dirty(Tid, Commit, _Tab) ->
+ ?eval_debug_fun({?MODULE, async_dirty, pre}, [{tid, Tid}]),
+ catch do_dirty(Tid, Commit),
+ ?eval_debug_fun({?MODULE, async_dirty, post}, [{tid, Tid}]).
+
+
+%% Process items in fifo order
+process_dirty_queue(Tab, [Item | Queue]) ->
+ Queue2 = process_dirty_queue(Tab, Queue),
+ case Item of
+ {async_dirty, Tid, Commit, Tab} ->
+ do_async_dirty(Tid, Commit, Tab),
+ Queue2;
+ {sync_dirty, From, Tid, Commit, Tab} ->
+ do_sync_dirty(From, Tid, Commit, Tab),
+ Queue2;
+ {Tab, unblock_me, From} ->
+ reply(From, unblocked),
+ Queue2;
+ _ ->
+ [Item | Queue2]
+ end;
+process_dirty_queue(_Tab, []) ->
+ [].
+
+prepare_pending_coordinators([{Tid, [Store | _Etabs]} | Coords], IgnoreNew) ->
+ case catch ?ets_lookup(Store, pending) of
+ [] ->
+ prepare_pending_coordinators(Coords, IgnoreNew);
+ [Pending] ->
+ case lists:member(Tid, IgnoreNew) of
+ false ->
+ mnesia_checkpoint:tm_enter_pending(Pending);
+ true ->
+ ignore
+ end,
+ prepare_pending_coordinators(Coords, IgnoreNew);
+ {'EXIT', _} ->
+ prepare_pending_coordinators(Coords, IgnoreNew)
+ end;
+prepare_pending_coordinators([], _IgnoreNew) ->
+ ok.
+
+prepare_pending_participants([Part | Parts], IgnoreNew) ->
+ Tid = Part#participant.tid,
+ D = Part#participant.disc_nodes,
+ R = Part#participant.ram_nodes,
+ case lists:member(Tid, IgnoreNew) of
+ false ->
+ mnesia_checkpoint:tm_enter_pending(Tid, D, R);
+ true ->
+ ignore
+ end,
+ prepare_pending_participants(Parts, IgnoreNew);
+prepare_pending_participants([], _IgnoreNew) ->
+ ok.
+
+handle_exit(Pid, _Reason, State) when node(Pid) /= node() ->
+ %% We got exit from a remote fool
+ doit_loop(State);
+
+handle_exit(Pid, _Reason, State) when Pid == State#state.supervisor ->
+ %% Our supervisor has died, time to stop
+ do_stop(State);
+
+handle_exit(Pid, Reason, State) ->
+ %% Check if it is a coordinator
+ case pid_search_delete(Pid, gb_trees:to_list(State#state.coordinators)) of
+ {none, _} ->
+ %% Check if it is a participant
+ Ps = gb_trees:values(State#state.participants),
+ case mnesia_lib:key_search_delete(Pid,#participant.pid,Ps) of
+ {none, _} ->
+ %% We got exit from a local fool
+ doit_loop(State);
+ {P = #participant{}, _RestP} ->
+ fatal("Participant ~p in transaction ~p died ~p~n",
+ [P#participant.pid, P#participant.tid, Reason]),
+ NewPs = gb_trees:delete(P#participant.tid,State#state.participants),
+ doit_loop(State#state{participants = NewPs})
+ end;
+
+ {{Tid, Etabs}, RestC} ->
+ %% A local coordinator has died and
+ %% we must determine the outcome of the
+ %% transaction and tell mnesia_tm on the
+ %% other nodes about it and then recover
+ %% locally.
+ recover_coordinator(Tid, Etabs),
+ doit_loop(State#state{coordinators = RestC})
+ end.
+
+recover_coordinator(Tid, Etabs) ->
+ verbose("Coordinator ~p in transaction ~p died.~n", [Tid#tid.pid, Tid]),
+
+ Store = hd(Etabs),
+ CheckNodes = get_elements(nodes,Store),
+ TellNodes = CheckNodes -- [node()],
+ case catch arrange(Tid, Store, async) of
+ {'EXIT', Reason} ->
+ dbg_out("Recovery of coordinator ~p failed:~n", [Tid, Reason]),
+ Protocol = asym_trans,
+ tell_outcome(Tid, Protocol, node(), CheckNodes, TellNodes);
+ {_N, Prep} ->
+ %% Tell the participants about the outcome
+ Protocol = Prep#prep.protocol,
+ Outcome = tell_outcome(Tid, Protocol, node(), CheckNodes, TellNodes),
+
+ %% Recover locally
+ CR = Prep#prep.records,
+ {DiscNs, RamNs} = commit_nodes(CR, [], []),
+ case lists:keysearch(node(), #commit.node, CR) of
+ {value, Local} ->
+ ?eval_debug_fun({?MODULE, recover_coordinator, pre},
+ [{tid, Tid}, {outcome, Outcome}, {prot, Protocol}]),
+ recover_coordinator(Tid, Protocol, Outcome, Local, DiscNs, RamNs),
+ ?eval_debug_fun({?MODULE, recover_coordinator, post},
+ [{tid, Tid}, {outcome, Outcome}, {prot, Protocol}]);
+ false -> %% When killed before store havn't been copied to
+ ok %% to the new nested trans store.
+ end
+ end,
+ erase_ets_tabs(Etabs),
+ transaction_terminated(Tid),
+ mnesia_locker:release_tid(Tid).
+
+recover_coordinator(Tid, sym_trans, committed, Local, _, _) ->
+ mnesia_recover:note_decision(Tid, committed),
+ do_dirty(Tid, Local);
+recover_coordinator(Tid, sym_trans, aborted, _Local, _, _) ->
+ mnesia_recover:note_decision(Tid, aborted);
+recover_coordinator(Tid, sync_sym_trans, committed, Local, _, _) ->
+ mnesia_recover:note_decision(Tid, committed),
+ do_dirty(Tid, Local);
+recover_coordinator(Tid, sync_sym_trans, aborted, _Local, _, _) ->
+ mnesia_recover:note_decision(Tid, aborted);
+
+recover_coordinator(Tid, asym_trans, committed, Local, DiscNs, RamNs) ->
+ D = #decision{tid = Tid, outcome = committed,
+ disc_nodes = DiscNs, ram_nodes = RamNs},
+ mnesia_recover:log_decision(D),
+ do_commit(Tid, Local);
+recover_coordinator(Tid, asym_trans, aborted, Local, DiscNs, RamNs) ->
+ D = #decision{tid = Tid, outcome = aborted,
+ disc_nodes = DiscNs, ram_nodes = RamNs},
+ mnesia_recover:log_decision(D),
+ do_abort(Tid, Local).
+
+restore_stores(Coords, Tid, Store) ->
+ Etstabs = gb_trees:get(Tid,Coords),
+ Remaining = lists:delete(Store, Etstabs),
+ erase_ets_tabs(Remaining),
+ gb_trees:update(Tid,[Store],Coords).
+
+add_coord_store(Coords, Tid, Etab) ->
+ Stores = gb_trees:get(Tid, Coords),
+ gb_trees:update(Tid, [Etab|Stores], Coords).
+
+del_coord_store(Coords, Tid, Current, Obsolete) ->
+ Stores = gb_trees:get(Tid, Coords),
+ Rest =
+ case Stores of
+ [Obsolete, Current | Tail] -> Tail;
+ [Current, Obsolete | Tail] -> Tail
+ end,
+ ?ets_delete_table(Obsolete),
+ gb_trees:update(Tid, [Current|Rest], Coords).
+
+erase_ets_tabs([H | T]) ->
+ ?ets_delete_table(H),
+ erase_ets_tabs(T);
+erase_ets_tabs([]) ->
+ ok.
+
+%% Clear one transactions all fixtables
+clear_fixtable([Store|_]) ->
+ Fixed = get_elements(fixtable, Store),
+ lists:foreach(fun({Tab,Node}) ->
+ rpc:cast(Node, ?MODULE, fixtable, [Tab,false,self()])
+ end, Fixed).
+
+%% Clear all fixtable Node have done
+clear_fixtable(Node, State=#state{fixed_tabs = FT0}) ->
+ case mnesia_lib:key_search_delete(Node, 1, FT0) of
+ {none, _Ft} ->
+ State;
+ {{Node,Tabs},FT} ->
+ lists:foreach(
+ fun(Tab) ->
+ case ?catch_val({Tab, storage_type}) of
+ {'EXIT', _} ->
+ ignore;
+ Storage ->
+ mnesia_lib:db_fixtable(Storage,Tab,false)
+ end
+ end, Tabs),
+ State#state{fixed_tabs=FT}
+ end.
+
+manage_fixtable(Tab,true,Requester,State=#state{fixed_tabs = FT0}) ->
+ Node = node(Requester),
+ case mnesia_lib:key_search_delete(Node, 1, FT0) of
+ {none, FT}->
+ State#state{fixed_tabs=[{Node, [Tab]}|FT]};
+ {{Node,Tabs},FT} ->
+ State#state{fixed_tabs=[{Node, [Tab|Tabs]}|FT]}
+ end;
+manage_fixtable(Tab,false,Requester,State = #state{fixed_tabs = FT0}) ->
+ Node = node(Requester),
+ case mnesia_lib:key_search_delete(Node, 1, FT0) of
+ {none,_FT} -> State; % Hmm? Safeguard
+ {{Node, Tabs0},FT} ->
+ case lists:delete(Tab, Tabs0) of
+ [] -> State#state{fixed_tabs=FT};
+ Tabs -> State#state{fixed_tabs=[{Node,Tabs}|FT]}
+ end
+ end.
+
+%% Deletes a pid from a list of participants
+%% or from a gb_trees of coordinators
+%% {none, All} or {Tr, Rest}
+pid_search_delete(Pid, Trs) ->
+ pid_search_delete(Pid, Trs, none, []).
+pid_search_delete(Pid, [Tr = {Tid, _Ts} | Trs], _Val, Ack) when Tid#tid.pid == Pid ->
+ pid_search_delete(Pid, Trs, Tr, Ack);
+pid_search_delete(Pid, [Tr | Trs], Val, Ack) ->
+ pid_search_delete(Pid, Trs, Val, [Tr | Ack]);
+
+pid_search_delete(_Pid, [], Val, Ack) ->
+ {Val, gb_trees:from_orddict(lists:reverse(Ack))}.
+
+transaction_terminated(Tid) ->
+ mnesia_checkpoint:tm_exit_pending(Tid),
+ Pid = Tid#tid.pid,
+ if
+ node(Pid) == node() ->
+ unlink(Pid);
+ true -> %% Do the Lamport thing here
+ mnesia_recover:sync_trans_tid_serial(Tid)
+ end.
+
+%% If there are an surrounding transaction, we inherit it's context
+non_transaction(OldState={_,_,Trans}, Fun, Args, ActivityKind, Mod)
+ when Trans /= non_transaction ->
+ Kind = case ActivityKind of
+ sync_dirty -> sync;
+ _ -> async
+ end,
+ case transaction(OldState, Fun, Args, infinity, Mod, Kind) of
+ {atomic, Res} ->
+ Res;
+ {aborted,Res} ->
+ exit(Res)
+ end;
+non_transaction(OldState, Fun, Args, ActivityKind, Mod) ->
+ Id = {ActivityKind, self()},
+ NewState = {Mod, Id, non_transaction},
+ put(mnesia_activity_state, NewState),
+ %% I Want something uniqe here, references are expensive
+ Ref = mNeSia_nOn_TrAnSacTioN,
+ RefRes = (catch {Ref, apply(Fun, Args)}),
+ case OldState of
+ undefined -> erase(mnesia_activity_state);
+ _ -> put(mnesia_activity_state, OldState)
+ end,
+ case RefRes of
+ {Ref, Res} ->
+ case Res of
+ {'EXIT', Reason} -> exit(Reason);
+ {aborted, Reason} -> mnesia:abort(Reason);
+ _ -> Res
+ end;
+ {'EXIT', Reason} ->
+ exit(Reason);
+ Throw ->
+ throw(Throw)
+ end.
+
+transaction(OldTidTs, Fun, Args, Retries, Mod, Type) ->
+ Factor = 1,
+ case OldTidTs of
+ undefined -> % Outer
+ execute_outer(Mod, Fun, Args, Factor, Retries, Type);
+ {_, _, non_transaction} -> % Transaction inside ?sync_dirty
+ Res = execute_outer(Mod, Fun, Args, Factor, Retries, Type),
+ put(mnesia_activity_state, OldTidTs),
+ Res;
+ {OldMod, Tid, Ts} -> % Nested
+ execute_inner(Mod, Tid, OldMod, Ts, Fun, Args, Factor, Retries, Type);
+ _ -> % Bad nesting
+ {aborted, nested_transaction}
+ end.
+
+execute_outer(Mod, Fun, Args, Factor, Retries, Type) ->
+ case req(start_outer) of
+ {error, Reason} ->
+ {aborted, Reason};
+ {new_tid, Tid, Store} ->
+ Ts = #tidstore{store = Store},
+ NewTidTs = {Mod, Tid, Ts},
+ put(mnesia_activity_state, NewTidTs),
+ execute_transaction(Fun, Args, Factor, Retries, Type)
+ end.
+
+execute_inner(Mod, Tid, OldMod, Ts, Fun, Args, Factor, Retries, Type) ->
+ case req({add_store, Tid}) of
+ {error, Reason} ->
+ {aborted, Reason};
+ {new_store, Ets} ->
+ copy_ets(Ts#tidstore.store, Ets),
+ Up = [{OldMod,Ts#tidstore.store} | Ts#tidstore.up_stores],
+ NewTs = Ts#tidstore{level = 1 + Ts#tidstore.level,
+ store = Ets,
+ up_stores = Up},
+ NewTidTs = {Mod, Tid, NewTs},
+ put(mnesia_activity_state, NewTidTs),
+ execute_transaction(Fun, Args, Factor, Retries, Type)
+ end.
+
+copy_ets(From, To) ->
+ do_copy_ets(?ets_first(From), From, To).
+do_copy_ets('$end_of_table', _,_) ->
+ ok;
+do_copy_ets(K, From, To) ->
+ Objs = ?ets_lookup(From, K),
+ insert_objs(Objs, To),
+ do_copy_ets(?ets_next(From, K), From, To).
+
+insert_objs([H|T], Tab) ->
+ ?ets_insert(Tab, H),
+ insert_objs(T, Tab);
+insert_objs([], _Tab) ->
+ ok.
+
+execute_transaction(Fun, Args, Factor, Retries, Type) ->
+ case catch apply_fun(Fun, Args, Type) of
+ {'EXIT', Reason} ->
+ check_exit(Fun, Args, Factor, Retries, Reason, Type);
+ {atomic, Value} ->
+ mnesia_lib:incr_counter(trans_commits),
+ erase(mnesia_activity_state),
+ %% no need to clear locks, already done by commit ...
+ %% Flush any un processed mnesia_down messages we might have
+ flush_downs(),
+ catch unlink(whereis(?MODULE)),
+ {atomic, Value};
+ {nested_atomic, Value} ->
+ mnesia_lib:incr_counter(trans_commits),
+ {atomic, Value};
+ Value -> %% User called throw
+ Reason = {aborted, {throw, Value}},
+ return_abort(Fun, Args, Reason)
+ end.
+
+apply_fun(Fun, Args, Type) ->
+ Result = apply(Fun, Args),
+ case t_commit(Type) of
+ do_commit ->
+ {atomic, Result};
+ do_commit_nested ->
+ {nested_atomic, Result};
+ {do_abort, {aborted, Reason}} ->
+ {'EXIT', {aborted, Reason}};
+ {do_abort, Reason} ->
+ {'EXIT', {aborted, Reason}}
+ end.
+
+check_exit(Fun, Args, Factor, Retries, Reason, Type) ->
+ case Reason of
+ {aborted, C = #cyclic{}} ->
+ maybe_restart(Fun, Args, Factor, Retries, Type, C);
+ {aborted, {node_not_running, N}} ->
+ maybe_restart(Fun, Args, Factor, Retries, Type, {node_not_running, N});
+ {aborted, {bad_commit, N}} ->
+ maybe_restart(Fun, Args, Factor, Retries, Type, {bad_commit, N});
+ _ ->
+ return_abort(Fun, Args, Reason)
+ end.
+
+maybe_restart(Fun, Args, Factor, Retries, Type, Why) ->
+ {Mod, Tid, Ts} = get(mnesia_activity_state),
+ case try_again(Retries) of
+ yes when Ts#tidstore.level == 1 ->
+ restart(Mod, Tid, Ts, Fun, Args, Factor, Retries, Type, Why);
+ yes ->
+ return_abort(Fun, Args, Why);
+ no ->
+ return_abort(Fun, Args, {aborted, nomore})
+ end.
+
+try_again(infinity) -> yes;
+try_again(X) when is_number(X) , X > 1 -> yes;
+try_again(_) -> no.
+
+%% We can only restart toplevel transactions.
+%% If a deadlock situation occurs in a nested transaction
+%% The whole thing including all nested transactions need to be
+%% restarted. The stack is thus popped by a consequtive series of
+%% exit({aborted, #cyclic{}}) calls
+
+restart(Mod, Tid, Ts, Fun, Args, Factor0, Retries0, Type, Why) ->
+ mnesia_lib:incr_counter(trans_restarts),
+ Retries = decr(Retries0),
+ case Why of
+ {bad_commit, _N} ->
+ return_abort(Fun, Args, Why),
+ Factor = 1,
+ SleepTime = mnesia_lib:random_time(Factor, Tid#tid.counter),
+ dbg_out("Restarting transaction ~w: in ~wms ~w~n", [Tid, SleepTime, Why]),
+ timer:sleep(SleepTime),
+ execute_outer(Mod, Fun, Args, Factor, Retries, Type);
+ {node_not_running, _N} -> %% Avoids hanging in receive_release_tid_ack
+ return_abort(Fun, Args, Why),
+ Factor = 1,
+ SleepTime = mnesia_lib:random_time(Factor, Tid#tid.counter),
+ dbg_out("Restarting transaction ~w: in ~wms ~w~n", [Tid, SleepTime, Why]),
+ timer:sleep(SleepTime),
+ execute_outer(Mod, Fun, Args, Factor, Retries, Type);
+ _ ->
+ SleepTime = mnesia_lib:random_time(Factor0, Tid#tid.counter),
+ dbg_out("Restarting transaction ~w: in ~wms ~w~n", [Tid, SleepTime, Why]),
+
+ if
+ Factor0 /= 10 ->
+ ignore;
+ true ->
+ %% Our serial may be much larger than other nodes ditto
+ AllNodes = val({current, db_nodes}),
+ verbose("Sync serial ~p~n", [Tid]),
+ rpc:abcast(AllNodes, ?MODULE, {sync_trans_serial, Tid})
+ end,
+ intercept_friends(Tid, Ts),
+ Store = Ts#tidstore.store,
+ Nodes = get_elements(nodes,Store),
+ ?MODULE ! {self(), {restart, Tid, Store}},
+ mnesia_locker:send_release_tid(Nodes, Tid),
+ timer:sleep(SleepTime),
+ mnesia_locker:receive_release_tid_acc(Nodes, Tid),
+ case get_restarted(Tid) of
+ {restarted, Tid} ->
+ execute_transaction(Fun, Args, Factor0 + 1,
+ Retries, Type);
+ {error, Reason} ->
+ mnesia:abort(Reason)
+ end
+ end.
+
+get_restarted(Tid) ->
+ case Res = rec() of
+ {restarted, Tid} ->
+ Res;
+ {error,_} ->
+ Res;
+ _ -> %% We could get a couple of aborts to many.
+ get_restarted(Tid)
+ end.
+
+decr(infinity) -> infinity;
+decr(X) when is_integer(X), X > 1 -> X - 1;
+decr(_X) -> 0.
+
+return_abort(Fun, Args, Reason) ->
+ {_Mod, Tid, Ts} = get(mnesia_activity_state),
+ dbg_out("Transaction ~p calling ~p with ~p failed: ~n ~p~n",
+ [Tid, Fun, Args, Reason]),
+ OldStore = Ts#tidstore.store,
+ Nodes = get_elements(nodes, OldStore),
+ intercept_friends(Tid, Ts),
+ catch mnesia_lib:incr_counter(trans_failures),
+ Level = Ts#tidstore.level,
+ if
+ Level == 1 ->
+ mnesia_locker:async_release_tid(Nodes, Tid),
+ ?MODULE ! {delete_transaction, Tid},
+ erase(mnesia_activity_state),
+ flush_downs(),
+ catch unlink(whereis(?MODULE)),
+ {aborted, mnesia_lib:fix_error(Reason)};
+ true ->
+ %% Nested transaction
+ [{OldMod,NewStore} | Tail] = Ts#tidstore.up_stores,
+ req({del_store, Tid, NewStore, OldStore, true}),
+ Ts2 = Ts#tidstore{store = NewStore,
+ up_stores = Tail,
+ level = Level - 1},
+ NewTidTs = {OldMod, Tid, Ts2},
+ put(mnesia_activity_state, NewTidTs),
+ case Reason of
+ #cyclic{} ->
+ exit({aborted, Reason});
+ {node_not_running, _N} ->
+ exit({aborted, Reason});
+ {bad_commit, _N}->
+ exit({aborted, Reason});
+ _ ->
+ {aborted, mnesia_lib:fix_error(Reason)}
+ end
+ end.
+
+flush_downs() ->
+ receive
+ {?MODULE, _, _} -> flush_downs(); % Votes
+ {mnesia_down, _} -> flush_downs()
+ after 0 -> flushed
+ end.
+
+
+put_activity_id(MTT) ->
+ put_activity_id(MTT, undefined).
+put_activity_id(undefined,_) ->
+ erase_activity_id();
+put_activity_id({Mod, Tid = #tid{}, Ts = #tidstore{}},Fun) ->
+ flush_downs(),
+ Store = Ts#tidstore.store,
+ if
+ is_function(Fun) ->
+ ?ets_insert(Store, {friends, {stop,Fun}});
+ true ->
+ ?ets_insert(Store, {friends, self()})
+ end,
+ NewTidTs = {Mod, Tid, Ts},
+ put(mnesia_activity_state, NewTidTs);
+put_activity_id(SimpleState,_) ->
+ put(mnesia_activity_state, SimpleState).
+
+erase_activity_id() ->
+ flush_downs(),
+ erase(mnesia_activity_state).
+
+get_elements(Type,Store) ->
+ case catch ?ets_lookup(Store, Type) of
+ [] -> [];
+ [{_,Val}] -> [Val];
+ {'EXIT', _} -> [];
+ Vals -> [Val|| {_,Val} <- Vals]
+ end.
+
+opt_propagate_store(_Current, _Obsolete, false) ->
+ ok;
+opt_propagate_store(Current, Obsolete, true) ->
+ propagate_store(Current, nodes, get_elements(nodes,Obsolete)),
+ propagate_store(Current, fixtable, get_elements(fixtable,Obsolete)),
+ propagate_store(Current, friends, get_elements(friends, Obsolete)).
+
+propagate_store(Store, Var, [Val | Vals]) ->
+ ?ets_insert(Store, {Var, Val}),
+ propagate_store(Store, Var, Vals);
+propagate_store(_Store, _Var, []) ->
+ ok.
+
+%% Tell all processes that are cooperating with the current transaction
+intercept_friends(_Tid, Ts) ->
+ Friends = get_elements(friends,Ts#tidstore.store),
+ intercept_best_friend(Friends, false).
+
+intercept_best_friend([],_) -> ok;
+intercept_best_friend([{stop,Fun} | R],Ignore) ->
+ catch Fun(),
+ intercept_best_friend(R,Ignore);
+intercept_best_friend([Pid | R],false) ->
+ Pid ! {activity_ended, undefined, self()},
+ wait_for_best_friend(Pid, 0),
+ intercept_best_friend(R,true);
+intercept_best_friend([_|R],true) ->
+ intercept_best_friend(R,true).
+
+wait_for_best_friend(Pid, Timeout) ->
+ receive
+ {'EXIT', Pid, _} -> ok;
+ {activity_ended, _, Pid} -> ok
+ after Timeout ->
+ case my_process_is_alive(Pid) of
+ true -> wait_for_best_friend(Pid, 1000);
+ false -> ok
+ end
+ end.
+
+my_process_is_alive(Pid) ->
+ case catch erlang:is_process_alive(Pid) of % New BIF in R5
+ true ->
+ true;
+ false ->
+ false;
+ {'EXIT', _} -> % Pre R5 backward compatibility
+ case process_info(Pid, message_queue_len) of
+ undefined -> false;
+ _ -> true
+ end
+ end.
+
+dirty(Protocol, Item) ->
+ {{Tab, Key}, _Val, _Op} = Item,
+ Tid = {dirty, self()},
+ Prep = prepare_items(Tid, Tab, Key, [Item], #prep{protocol= Protocol}),
+ CR = Prep#prep.records,
+ case Protocol of
+ async_dirty ->
+ %% Send commit records to the other involved nodes,
+ %% but do only wait for one node to complete.
+ %% Preferrably, the local node if possible.
+
+ ReadNode = val({Tab, where_to_read}),
+ {WaitFor, FirstRes} = async_send_dirty(Tid, CR, Tab, ReadNode),
+ rec_dirty(WaitFor, FirstRes);
+
+ sync_dirty ->
+ %% Send commit records to the other involved nodes,
+ %% and wait for all nodes to complete
+ {WaitFor, FirstRes} = sync_send_dirty(Tid, CR, Tab, []),
+ rec_dirty(WaitFor, FirstRes);
+ _ ->
+ mnesia:abort({bad_activity, Protocol})
+ end.
+
+%% This is the commit function, The first thing it does,
+%% is to find out which nodes that have been participating
+%% in this particular transaction, all of the mnesia_locker:lock*
+%% functions insert the names of the nodes where it aquires locks
+%% into the local shadow Store
+%% This function exacutes in the context of the user process
+t_commit(Type) ->
+ {_Mod, Tid, Ts} = get(mnesia_activity_state),
+ Store = Ts#tidstore.store,
+ if
+ Ts#tidstore.level == 1 ->
+ intercept_friends(Tid, Ts),
+ %% N is number of updates
+ case arrange(Tid, Store, Type) of
+ {N, Prep} when N > 0 ->
+ multi_commit(Prep#prep.protocol,
+ Tid, Prep#prep.records, Store);
+ {0, Prep} ->
+ multi_commit(read_only, Tid, Prep#prep.records, Store)
+ end;
+ true ->
+ %% nested commit
+ Level = Ts#tidstore.level,
+ [{OldMod,Obsolete} | Tail] = Ts#tidstore.up_stores,
+ req({del_store, Tid, Store, Obsolete, false}),
+ NewTs = Ts#tidstore{store = Store,
+ up_stores = Tail,
+ level = Level - 1},
+ NewTidTs = {OldMod, Tid, NewTs},
+ put(mnesia_activity_state, NewTidTs),
+ do_commit_nested
+ end.
+
+%% This function arranges for all objects we shall write in S to be
+%% in a list of {Node, CommitRecord}
+%% Important function for the performance of mnesia.
+
+arrange(Tid, Store, Type) ->
+ %% The local node is always included
+ Nodes = get_elements(nodes,Store),
+ Recs = prep_recs(Nodes, []),
+ Key = ?ets_first(Store),
+ N = 0,
+ Prep =
+ case Type of
+ async -> #prep{protocol = sym_trans, records = Recs};
+ sync -> #prep{protocol = sync_sym_trans, records = Recs}
+ end,
+ case catch do_arrange(Tid, Store, Key, Prep, N) of
+ {'EXIT', Reason} ->
+ dbg_out("do_arrange failed ~p ~p~n", [Reason, Tid]),
+ case Reason of
+ {aborted, R} ->
+ mnesia:abort(R);
+ _ ->
+ mnesia:abort(Reason)
+ end;
+ {New, Prepared} ->
+ {New, Prepared#prep{records = reverse(Prepared#prep.records)}}
+ end.
+
+reverse([]) ->
+ [];
+reverse([H=#commit{ram_copies=Ram, disc_copies=DC,
+ disc_only_copies=DOC,snmp = Snmp}
+ |R]) ->
+ [
+ H#commit{
+ ram_copies = lists:reverse(Ram),
+ disc_copies = lists:reverse(DC),
+ disc_only_copies = lists:reverse(DOC),
+ snmp = lists:reverse(Snmp)
+ }
+ | reverse(R)].
+
+prep_recs([N | Nodes], Recs) ->
+ prep_recs(Nodes, [#commit{decision = presume_commit, node = N} | Recs]);
+prep_recs([], Recs) ->
+ Recs.
+
+%% storage_types is a list of {Node, Storage} tuples
+%% where each tuple represents an active replica
+do_arrange(Tid, Store, {Tab, Key}, Prep, N) ->
+ Oid = {Tab, Key},
+ Items = ?ets_lookup(Store, Oid), %% Store is a bag
+ P2 = prepare_items(Tid, Tab, Key, Items, Prep),
+ do_arrange(Tid, Store, ?ets_next(Store, Oid), P2, N + 1);
+do_arrange(Tid, Store, SchemaKey, Prep, N) when SchemaKey == op ->
+ Items = ?ets_lookup(Store, SchemaKey), %% Store is a bag
+ P2 = prepare_schema_items(Tid, Items, Prep),
+ do_arrange(Tid, Store, ?ets_next(Store, SchemaKey), P2, N + 1);
+do_arrange(Tid, Store, RestoreKey, Prep, N) when RestoreKey == restore_op ->
+ [{restore_op, R}] = ?ets_lookup(Store, RestoreKey),
+ Fun = fun({Tab, Key}, CommitRecs, _RecName, Where, Snmp) ->
+ Item = [{{Tab, Key}, {Tab, Key}, delete}],
+ do_prepare_items(Tid, Tab, Key, Where, Snmp, Item, CommitRecs);
+ (BupRec, CommitRecs, RecName, Where, Snmp) ->
+ Tab = element(1, BupRec),
+ Key = element(2, BupRec),
+ Item =
+ if
+ Tab == RecName ->
+ [{{Tab, Key}, BupRec, write}];
+ true ->
+ BupRec2 = setelement(1, BupRec, RecName),
+ [{{Tab, Key}, BupRec2, write}]
+ end,
+ do_prepare_items(Tid, Tab, Key, Where, Snmp, Item, CommitRecs)
+ end,
+ Recs2 = mnesia_schema:arrange_restore(R, Fun, Prep#prep.records),
+ P2 = Prep#prep{protocol = asym_trans, records = Recs2},
+ do_arrange(Tid, Store, ?ets_next(Store, RestoreKey), P2, N + 1);
+do_arrange(_Tid, _Store, '$end_of_table', Prep, N) ->
+ {N, Prep};
+do_arrange(Tid, Store, IgnoredKey, Prep, N) -> %% locks, nodes ... local atoms...
+ do_arrange(Tid, Store, ?ets_next(Store, IgnoredKey), Prep, N).
+
+%% Returns a prep record with all items in reverse order
+prepare_schema_items(Tid, Items, Prep) ->
+ Types = [{N, schema_ops} || N <- val({current, db_nodes})],
+ Recs = prepare_nodes(Tid, Types, Items, Prep#prep.records, schema),
+ Prep#prep{protocol = asym_trans, records = Recs}.
+
+%% Returns a prep record with all items in reverse order
+prepare_items(Tid, Tab, Key, Items, Prep) when Prep#prep.prev_tab == Tab ->
+ Types = Prep#prep.prev_types,
+ Snmp = Prep#prep.prev_snmp,
+ Recs = Prep#prep.records,
+ Recs2 = do_prepare_items(Tid, Tab, Key, Types, Snmp, Items, Recs),
+ Prep#prep{records = Recs2};
+
+prepare_items(Tid, Tab, Key, Items, Prep) ->
+ Types = val({Tab, where_to_commit}),
+ case Types of
+ [] -> mnesia:abort({no_exists, Tab});
+ {blocked, _} ->
+ unblocked = req({unblock_me, Tab}),
+ prepare_items(Tid, Tab, Key, Items, Prep);
+ _ ->
+ Snmp = val({Tab, snmp}),
+ Recs2 = do_prepare_items(Tid, Tab, Key, Types,
+ Snmp, Items, Prep#prep.records),
+ Prep2 = Prep#prep{records = Recs2, prev_tab = Tab,
+ prev_types = Types, prev_snmp = Snmp},
+ check_prep(Prep2, Types)
+ end.
+
+do_prepare_items(Tid, Tab, Key, Types, Snmp, Items, Recs) ->
+ Recs2 = prepare_snmp(Tid, Tab, Key, Types, Snmp, Items, Recs), % May exit
+ prepare_nodes(Tid, Types, Items, Recs2, normal).
+
+prepare_snmp(Tab, Key, Items) ->
+ case val({Tab, snmp}) of
+ [] ->
+ [];
+ Ustruct when Key /= '_' ->
+ {_Oid, _Val, Op} = hd(Items),
+ %% Still making snmp oid (not used) because we want to catch errors here
+ %% And also it keeps backwards comp. with old nodes.
+ SnmpOid = mnesia_snmp_hook:key_to_oid(Tab, Key, Ustruct), % May exit
+ [{Op, Tab, Key, SnmpOid}];
+ _ ->
+ [{clear_table, Tab}]
+ end.
+
+prepare_snmp(_Tid, _Tab, _Key, _Types, [], _Items, Recs) ->
+ Recs;
+
+prepare_snmp(Tid, Tab, Key, Types, Us, Items, Recs) ->
+ if Key /= '_' ->
+ {_Oid, _Val, Op} = hd(Items),
+ SnmpOid = mnesia_snmp_hook:key_to_oid(Tab, Key, Us), % May exit
+ prepare_nodes(Tid, Types, [{Op, Tab, Key, SnmpOid}], Recs, snmp);
+ Key == '_' ->
+ prepare_nodes(Tid, Types, [{clear_table, Tab}], Recs, snmp)
+ end.
+
+check_prep(Prep, Types) when Prep#prep.types == Types ->
+ Prep;
+check_prep(Prep, Types) when Prep#prep.types == undefined ->
+ Prep#prep{types = Types};
+check_prep(Prep, _Types) ->
+ Prep#prep{protocol = asym_trans}.
+
+%% Returns a list of commit records
+prepare_nodes(Tid, [{Node, Storage} | Rest], Items, C, Kind) ->
+ {Rec, C2} = pick_node(Tid, Node, C, []),
+ Rec2 = prepare_node(Node, Storage, Items, Rec, Kind),
+ [Rec2 | prepare_nodes(Tid, Rest, Items, C2, Kind)];
+prepare_nodes(_Tid, [], _Items, CommitRecords, _Kind) ->
+ CommitRecords.
+
+pick_node(Tid, Node, [Rec | Rest], Done) ->
+ if
+ Rec#commit.node == Node ->
+ {Rec, Done ++ Rest};
+ true ->
+ pick_node(Tid, Node, Rest, [Rec | Done])
+ end;
+pick_node({dirty,_}, Node, [], Done) ->
+ {#commit{decision = presume_commit, node = Node}, Done};
+pick_node(_Tid, Node, [], _Done) ->
+ mnesia:abort({bad_commit, {missing_lock, Node}}).
+
+prepare_node(Node, Storage, [Item | Items], Rec, Kind) when Kind == snmp ->
+ Rec2 = Rec#commit{snmp = [Item | Rec#commit.snmp]},
+ prepare_node(Node, Storage, Items, Rec2, Kind);
+prepare_node(Node, Storage, [Item | Items], Rec, Kind) when Kind /= schema ->
+ Rec2 =
+ case Storage of
+ ram_copies ->
+ Rec#commit{ram_copies = [Item | Rec#commit.ram_copies]};
+ disc_copies ->
+ Rec#commit{disc_copies = [Item | Rec#commit.disc_copies]};
+ disc_only_copies ->
+ Rec#commit{disc_only_copies =
+ [Item | Rec#commit.disc_only_copies]}
+ end,
+ prepare_node(Node, Storage, Items, Rec2, Kind);
+prepare_node(_Node, _Storage, Items, Rec, Kind)
+ when Kind == schema, Rec#commit.schema_ops == [] ->
+ Rec#commit{schema_ops = Items};
+prepare_node(_Node, _Storage, [], Rec, _Kind) ->
+ Rec.
+
+%% multi_commit((Protocol, Tid, CommitRecords, Store)
+%% Local work is always performed in users process
+multi_commit(read_only, Tid, CR, _Store) ->
+ %% This featherweight commit protocol is used when no
+ %% updates has been performed in the transaction.
+
+ {DiscNs, RamNs} = commit_nodes(CR, [], []),
+ Msg = {Tid, simple_commit},
+ rpc:abcast(DiscNs -- [node()], ?MODULE, Msg),
+ rpc:abcast(RamNs -- [node()], ?MODULE, Msg),
+ mnesia_recover:note_decision(Tid, committed),
+ mnesia_locker:release_tid(Tid),
+ ?MODULE ! {delete_transaction, Tid},
+ do_commit;
+
+multi_commit(sym_trans, Tid, CR, Store) ->
+ %% This lightweight commit protocol is used when all
+ %% the involved tables are replicated symetrically.
+ %% Their storage types must match on each node.
+ %%
+ %% 1 Ask the other involved nodes if they want to commit
+ %% All involved nodes votes yes if they are up
+ %% 2a Somebody has voted no
+ %% Tell all yes voters to do_abort
+ %% 2b Everybody has voted yes
+ %% Tell everybody to do_commit. I.e. that they should
+ %% prepare the commit, log the commit record and
+ %% perform the updates.
+ %%
+ %% The outcome is kept 3 minutes in the transient decision table.
+ %%
+ %% Recovery:
+ %% If somebody dies before the coordinator has
+ %% broadcasted do_commit, the transaction is aborted.
+ %%
+ %% If a participant dies, the table load algorithm
+ %% ensures that the contents of the involved tables
+ %% are picked from another node.
+ %%
+ %% If the coordinator dies, each participants checks
+ %% the outcome with all the others. If all are uncertain
+ %% about the outcome, the transaction is aborted. If
+ %% somebody knows the outcome the others will follow.
+
+ {DiscNs, RamNs} = commit_nodes(CR, [], []),
+ Pending = mnesia_checkpoint:tm_enter_pending(Tid, DiscNs, RamNs),
+ ?ets_insert(Store, Pending),
+
+ {WaitFor, Local} = ask_commit(sym_trans, Tid, CR, DiscNs, RamNs),
+ {Outcome, []} = rec_all(WaitFor, Tid, do_commit, []),
+ ?eval_debug_fun({?MODULE, multi_commit_sym},
+ [{tid, Tid}, {outcome, Outcome}]),
+ rpc:abcast(DiscNs -- [node()], ?MODULE, {Tid, Outcome}),
+ rpc:abcast(RamNs -- [node()], ?MODULE, {Tid, Outcome}),
+ case Outcome of
+ do_commit ->
+ mnesia_recover:note_decision(Tid, committed),
+ do_dirty(Tid, Local),
+ mnesia_locker:release_tid(Tid),
+ ?MODULE ! {delete_transaction, Tid};
+ {do_abort, _Reason} ->
+ mnesia_recover:note_decision(Tid, aborted)
+ end,
+ ?eval_debug_fun({?MODULE, multi_commit_sym, post},
+ [{tid, Tid}, {outcome, Outcome}]),
+ Outcome;
+
+multi_commit(sync_sym_trans, Tid, CR, Store) ->
+ %% This protocol is the same as sym_trans except that it
+ %% uses syncronized calls to disk_log and syncronized commits
+ %% when several nodes are involved.
+
+ {DiscNs, RamNs} = commit_nodes(CR, [], []),
+ Pending = mnesia_checkpoint:tm_enter_pending(Tid, DiscNs, RamNs),
+ ?ets_insert(Store, Pending),
+
+ {WaitFor, Local} = ask_commit(sync_sym_trans, Tid, CR, DiscNs, RamNs),
+ {Outcome, []} = rec_all(WaitFor, Tid, do_commit, []),
+ ?eval_debug_fun({?MODULE, multi_commit_sym_sync},
+ [{tid, Tid}, {outcome, Outcome}]),
+ rpc:abcast(DiscNs -- [node()], ?MODULE, {Tid, Outcome}),
+ rpc:abcast(RamNs -- [node()], ?MODULE, {Tid, Outcome}),
+ case Outcome of
+ do_commit ->
+ mnesia_recover:note_decision(Tid, committed),
+ mnesia_log:slog(Local),
+ do_commit(Tid, Local),
+ %% Just wait for completion result is ignore.
+ rec_all(WaitFor, Tid, ignore, []),
+ mnesia_locker:release_tid(Tid),
+ ?MODULE ! {delete_transaction, Tid};
+ {do_abort, _Reason} ->
+ mnesia_recover:note_decision(Tid, aborted)
+ end,
+ ?eval_debug_fun({?MODULE, multi_commit_sym, post},
+ [{tid, Tid}, {outcome, Outcome}]),
+ Outcome;
+
+multi_commit(asym_trans, Tid, CR, Store) ->
+ %% This more expensive commit protocol is used when
+ %% table definitions are changed (schema transactions).
+ %% It is also used when the involved tables are
+ %% replicated asymetrically. If the storage type differs
+ %% on at least one node this protocol is used.
+ %%
+ %% 1 Ask the other involved nodes if they want to commit.
+ %% All involved nodes prepares the commit, logs a presume_abort
+ %% commit record and votes yes or no depending of the
+ %% outcome of the prepare. The preparation is also performed
+ %% by the coordinator.
+ %%
+ %% 2a Somebody has died or voted no
+ %% Tell all yes voters to do_abort
+ %% 2b Everybody has voted yes
+ %% Put a unclear marker in the log.
+ %% Tell the others to pre_commit. I.e. that they should
+ %% put a unclear marker in the log and reply
+ %% acc_pre_commit when they are done.
+ %%
+ %% 3a Somebody died
+ %% Tell the remaining participants to do_abort
+ %% 3b Everybody has replied acc_pre_commit
+ %% Tell everybody to committed. I.e that they should
+ %% put a committed marker in the log, perform the updates
+ %% and reply done_commit when they are done. The coordinator
+ %% must wait with putting his committed marker inte the log
+ %% until the committed has been sent to all the others.
+ %% Then he performs local commit before collecting replies.
+ %%
+ %% 4 Everybody has either died or replied done_commit
+ %% Return to the caller.
+ %%
+ %% Recovery:
+ %% If the coordinator dies, the participants (and
+ %% the coordinator when he starts again) must do
+ %% the following:
+ %%
+ %% If we have no unclear marker in the log we may
+ %% safely abort, since we know that nobody may have
+ %% decided to commit yet.
+ %%
+ %% If we have a committed marker in the log we may
+ %% safely commit since we know that everybody else
+ %% also will come to this conclusion.
+ %%
+ %% If we have a unclear marker but no committed
+ %% in the log we are uncertain about the real outcome
+ %% of the transaction and must ask the others before
+ %% we can decide what to do. If someone knows the
+ %% outcome we will do the same. If nobody knows, we
+ %% will wait for the remaining involved nodes to come
+ %% up. When all involved nodes are up and uncertain,
+ %% we decide to commit (first put a committed marker
+ %% in the log, then do the updates).
+
+ D = #decision{tid = Tid, outcome = presume_abort},
+ {D2, CR2} = commit_decision(D, CR, [], []),
+ DiscNs = D2#decision.disc_nodes,
+ RamNs = D2#decision.ram_nodes,
+ Pending = mnesia_checkpoint:tm_enter_pending(Tid, DiscNs, RamNs),
+ ?ets_insert(Store, Pending),
+ {WaitFor, Local} = ask_commit(asym_trans, Tid, CR2, DiscNs, RamNs),
+ SchemaPrep = (catch mnesia_schema:prepare_commit(Tid, Local, {coord, WaitFor})),
+ {Votes, Pids} = rec_all(WaitFor, Tid, do_commit, []),
+
+ ?eval_debug_fun({?MODULE, multi_commit_asym_got_votes},
+ [{tid, Tid}, {votes, Votes}]),
+ case Votes of
+ do_commit ->
+ case SchemaPrep of
+ {_Modified, C = #commit{}, DumperMode} ->
+ mnesia_log:log(C), % C is not a binary
+ ?eval_debug_fun({?MODULE, multi_commit_asym_log_commit_rec},
+ [{tid, Tid}]),
+
+ D3 = C#commit.decision,
+ D4 = D3#decision{outcome = unclear},
+ mnesia_recover:log_decision(D4),
+ ?eval_debug_fun({?MODULE, multi_commit_asym_log_commit_dec},
+ [{tid, Tid}]),
+ tell_participants(Pids, {Tid, pre_commit}),
+ %% Now we are uncertain and we do not know
+ %% if all participants have logged that
+ %% they are uncertain or not
+ rec_acc_pre_commit(Pids, Tid, Store, {C,Local},
+ do_commit, DumperMode, [], []);
+ {'EXIT', Reason} ->
+ %% The others have logged the commit
+ %% record but they are not uncertain
+ mnesia_recover:note_decision(Tid, aborted),
+ ?eval_debug_fun({?MODULE, multi_commit_asym_prepare_exit},
+ [{tid, Tid}]),
+ tell_participants(Pids, {Tid, {do_abort, Reason}}),
+ do_abort(Tid, Local),
+ {do_abort, Reason}
+ end;
+
+ {do_abort, Reason} ->
+ %% The others have logged the commit
+ %% record but they are not uncertain
+ mnesia_recover:note_decision(Tid, aborted),
+ ?eval_debug_fun({?MODULE, multi_commit_asym_do_abort}, [{tid, Tid}]),
+ tell_participants(Pids, {Tid, {do_abort, Reason}}),
+ do_abort(Tid, Local),
+ {do_abort, Reason}
+ end.
+
+%% Returns do_commit or {do_abort, Reason}
+rec_acc_pre_commit([Pid | Tail], Tid, Store, Commit, Res, DumperMode,
+ GoodPids, SchemaAckPids) ->
+ receive
+ {?MODULE, _, {acc_pre_commit, Tid, Pid, true}} ->
+ rec_acc_pre_commit(Tail, Tid, Store, Commit, Res, DumperMode,
+ [Pid | GoodPids], [Pid | SchemaAckPids]);
+
+ {?MODULE, _, {acc_pre_commit, Tid, Pid, false}} ->
+ rec_acc_pre_commit(Tail, Tid, Store, Commit, Res, DumperMode,
+ [Pid | GoodPids], SchemaAckPids);
+
+ {?MODULE, _, {acc_pre_commit, Tid, Pid}} ->
+ %% Kept for backwards compatibility. Remove after Mnesia 4.x
+ rec_acc_pre_commit(Tail, Tid, Store, Commit, Res, DumperMode,
+ [Pid | GoodPids], [Pid | SchemaAckPids]);
+ {?MODULE, _, {do_abort, Tid, Pid, _Reason}} ->
+ AbortRes = {do_abort, {bad_commit, node(Pid)}},
+ rec_acc_pre_commit(Tail, Tid, Store, Commit, AbortRes, DumperMode,
+ GoodPids, SchemaAckPids);
+ {mnesia_down, Node} when Node == node(Pid) ->
+ AbortRes = {do_abort, {bad_commit, Node}},
+ catch Pid ! {Tid, AbortRes}, %% Tell him that he has died
+ rec_acc_pre_commit(Tail, Tid, Store, Commit, AbortRes, DumperMode,
+ GoodPids, SchemaAckPids)
+ end;
+rec_acc_pre_commit([], Tid, Store, {Commit,OrigC}, Res, DumperMode, GoodPids, SchemaAckPids) ->
+ D = Commit#commit.decision,
+ case Res of
+ do_commit ->
+ %% Now everybody knows that the others
+ %% has voted yes. We also know that
+ %% everybody are uncertain.
+ prepare_sync_schema_commit(Store, SchemaAckPids),
+ tell_participants(GoodPids, {Tid, committed}),
+ D2 = D#decision{outcome = committed},
+ mnesia_recover:log_decision(D2),
+ ?eval_debug_fun({?MODULE, rec_acc_pre_commit_log_commit},
+ [{tid, Tid}]),
+
+ %% Now we have safely logged committed
+ %% and we can recover without asking others
+ do_commit(Tid, Commit, DumperMode),
+ ?eval_debug_fun({?MODULE, rec_acc_pre_commit_done_commit},
+ [{tid, Tid}]),
+ sync_schema_commit(Tid, Store, SchemaAckPids),
+ mnesia_locker:release_tid(Tid),
+ ?MODULE ! {delete_transaction, Tid};
+
+ {do_abort, Reason} ->
+ tell_participants(GoodPids, {Tid, {do_abort, Reason}}),
+ D2 = D#decision{outcome = aborted},
+ mnesia_recover:log_decision(D2),
+ ?eval_debug_fun({?MODULE, rec_acc_pre_commit_log_abort},
+ [{tid, Tid}]),
+ do_abort(Tid, OrigC),
+ ?eval_debug_fun({?MODULE, rec_acc_pre_commit_done_abort},
+ [{tid, Tid}])
+ end,
+ Res.
+
+%% Note all nodes in case of mnesia_down mgt
+prepare_sync_schema_commit(_Store, []) ->
+ ok;
+prepare_sync_schema_commit(Store, [Pid | Pids]) ->
+ ?ets_insert(Store, {waiting_for_commit_ack, node(Pid)}),
+ prepare_sync_schema_commit(Store, Pids).
+
+sync_schema_commit(_Tid, _Store, []) ->
+ ok;
+sync_schema_commit(Tid, Store, [Pid | Tail]) ->
+ receive
+ {?MODULE, _, {schema_commit, Tid, Pid}} ->
+ ?ets_match_delete(Store, {waiting_for_commit_ack, node(Pid)}),
+ sync_schema_commit(Tid, Store, Tail);
+
+ {mnesia_down, Node} when Node == node(Pid) ->
+ ?ets_match_delete(Store, {waiting_for_commit_ack, Node}),
+ sync_schema_commit(Tid, Store, Tail)
+ end.
+
+tell_participants([Pid | Pids], Msg) ->
+ Pid ! Msg,
+ tell_participants(Pids, Msg);
+tell_participants([], _Msg) ->
+ ok.
+
+%% Trap exit because we can get a shutdown from application manager
+commit_participant(Coord, Tid, Bin, DiscNs, RamNs) when is_binary(Bin) ->
+ process_flag(trap_exit, true),
+ Commit = binary_to_term(Bin),
+ commit_participant(Coord, Tid, Bin, Commit, DiscNs, RamNs);
+commit_participant(Coord, Tid, C = #commit{}, DiscNs, RamNs) ->
+ process_flag(trap_exit, true),
+ commit_participant(Coord, Tid, C, C, DiscNs, RamNs).
+
+commit_participant(Coord, Tid, Bin, C0, DiscNs, _RamNs) ->
+ ?eval_debug_fun({?MODULE, commit_participant, pre}, [{tid, Tid}]),
+ case catch mnesia_schema:prepare_commit(Tid, C0, {part, Coord}) of
+ {Modified, C = #commit{}, DumperMode} ->
+ %% If we can not find any local unclear decision
+ %% we should presume abort at startup recovery
+ case lists:member(node(), DiscNs) of
+ false ->
+ ignore;
+ true ->
+ case Modified of
+ false -> mnesia_log:log(Bin);
+ true -> mnesia_log:log(C)
+ end
+ end,
+ ?eval_debug_fun({?MODULE, commit_participant, vote_yes},
+ [{tid, Tid}]),
+ reply(Coord, {vote_yes, Tid, self()}),
+
+ receive
+ {Tid, pre_commit} ->
+ D = C#commit.decision,
+ mnesia_recover:log_decision(D#decision{outcome = unclear}),
+ ?eval_debug_fun({?MODULE, commit_participant, pre_commit},
+ [{tid, Tid}]),
+ Expect_schema_ack = C#commit.schema_ops /= [],
+ reply(Coord, {acc_pre_commit, Tid, self(), Expect_schema_ack}),
+
+ %% Now we are vulnerable for failures, since
+ %% we cannot decide without asking others
+ receive
+ {Tid, committed} ->
+ mnesia_recover:log_decision(D#decision{outcome = committed}),
+ ?eval_debug_fun({?MODULE, commit_participant, log_commit},
+ [{tid, Tid}]),
+ do_commit(Tid, C, DumperMode),
+ case Expect_schema_ack of
+ false -> ignore;
+ true -> reply(Coord, {schema_commit, Tid, self()})
+ end,
+ ?eval_debug_fun({?MODULE, commit_participant, do_commit},
+ [{tid, Tid}]);
+
+ {Tid, {do_abort, _Reason}} ->
+ mnesia_recover:log_decision(D#decision{outcome = aborted}),
+ ?eval_debug_fun({?MODULE, commit_participant, log_abort},
+ [{tid, Tid}]),
+ mnesia_schema:undo_prepare_commit(Tid, C0),
+ ?eval_debug_fun({?MODULE, commit_participant, undo_prepare},
+ [{tid, Tid}]);
+
+ {'EXIT', _, _} ->
+ mnesia_recover:log_decision(D#decision{outcome = aborted}),
+ ?eval_debug_fun({?MODULE, commit_participant, exit_log_abort},
+ [{tid, Tid}]),
+ mnesia_schema:undo_prepare_commit(Tid, C0),
+ ?eval_debug_fun({?MODULE, commit_participant, exit_undo_prepare},
+ [{tid, Tid}]);
+
+ Msg ->
+ verbose("** ERROR ** commit_participant ~p, got unexpected msg: ~p~n",
+ [Tid, Msg])
+ end;
+ {Tid, {do_abort, Reason}} ->
+ reply(Coord, {do_abort, Tid, self(), Reason}),
+ mnesia_schema:undo_prepare_commit(Tid, C0),
+ ?eval_debug_fun({?MODULE, commit_participant, pre_commit_undo_prepare},
+ [{tid, Tid}]);
+
+ {'EXIT', _, Reason} ->
+ reply(Coord, {do_abort, Tid, self(), {bad_commit,Reason}}),
+ mnesia_schema:undo_prepare_commit(Tid, C0),
+ ?eval_debug_fun({?MODULE, commit_participant, pre_commit_undo_prepare}, [{tid, Tid}]);
+
+ Msg ->
+ reply(Coord, {do_abort, Tid, self(), {bad_commit,internal}}),
+ verbose("** ERROR ** commit_participant ~p, got unexpected msg: ~p~n",
+ [Tid, Msg])
+ end;
+
+ {'EXIT', Reason} ->
+ ?eval_debug_fun({?MODULE, commit_participant, vote_no},
+ [{tid, Tid}]),
+ reply(Coord, {vote_no, Tid, Reason}),
+ mnesia_schema:undo_prepare_commit(Tid, C0)
+ end,
+ mnesia_locker:release_tid(Tid),
+ ?MODULE ! {delete_transaction, Tid},
+ unlink(whereis(?MODULE)),
+ exit(normal).
+
+do_abort(Tid, Bin) when is_binary(Bin) ->
+ %% Possible optimization:
+ %% If we want we could pass arround a flag
+ %% that tells us whether the binary contains
+ %% schema ops or not. Only if the binary
+ %% contains schema ops there are meningful
+ %% unpack the binary and perform
+ %% mnesia_schema:undo_prepare_commit/1.
+ do_abort(Tid, binary_to_term(Bin));
+do_abort(Tid, Commit) ->
+ mnesia_schema:undo_prepare_commit(Tid, Commit),
+ Commit.
+
+do_dirty(Tid, Commit) when Commit#commit.schema_ops == [] ->
+ mnesia_log:log(Commit),
+ do_commit(Tid, Commit).
+
+%% do_commit(Tid, CommitRecord)
+do_commit(Tid, Bin) when is_binary(Bin) ->
+ do_commit(Tid, binary_to_term(Bin));
+do_commit(Tid, C) ->
+ do_commit(Tid, C, optional).
+do_commit(Tid, Bin, DumperMode) when is_binary(Bin) ->
+ do_commit(Tid, binary_to_term(Bin), DumperMode);
+do_commit(Tid, C, DumperMode) ->
+ mnesia_dumper:update(Tid, C#commit.schema_ops, DumperMode),
+ R = do_snmp(Tid, C#commit.snmp),
+ R2 = do_update(Tid, ram_copies, C#commit.ram_copies, R),
+ R3 = do_update(Tid, disc_copies, C#commit.disc_copies, R2),
+ do_update(Tid, disc_only_copies, C#commit.disc_only_copies, R3).
+
+%% Update the items
+do_update(Tid, Storage, [Op | Ops], OldRes) ->
+ case catch do_update_op(Tid, Storage, Op) of
+ ok ->
+ do_update(Tid, Storage, Ops, OldRes);
+ {'EXIT', Reason} ->
+ %% This may only happen when we recently have
+ %% deleted our local replica, changed storage_type
+ %% or transformed table
+ %% BUGBUG: Updates may be lost if storage_type is changed.
+ %% Determine actual storage type and try again.
+ %% BUGBUG: Updates may be lost if table is transformed.
+
+ verbose("do_update in ~w failed: ~p -> {'EXIT', ~p}~n",
+ [Tid, Op, Reason]),
+ do_update(Tid, Storage, Ops, OldRes);
+ NewRes ->
+ do_update(Tid, Storage, Ops, NewRes)
+ end;
+do_update(_Tid, _Storage, [], Res) ->
+ Res.
+
+do_update_op(Tid, Storage, {{Tab, K}, Obj, write}) ->
+ commit_write(?catch_val({Tab, commit_work}), Tid,
+ Tab, K, Obj, undefined),
+ mnesia_lib:db_put(Storage, Tab, Obj);
+
+do_update_op(Tid, Storage, {{Tab, K}, Val, delete}) ->
+ commit_delete(?catch_val({Tab, commit_work}), Tid, Tab, K, Val, undefined),
+ mnesia_lib:db_erase(Storage, Tab, K);
+
+do_update_op(Tid, Storage, {{Tab, K}, {RecName, Incr}, update_counter}) ->
+ {NewObj, OldObjs} =
+ case catch mnesia_lib:db_update_counter(Storage, Tab, K, Incr) of
+ NewVal when is_integer(NewVal), NewVal >= 0 ->
+ {{RecName, K, NewVal}, [{RecName, K, NewVal - Incr}]};
+ _ when Incr > 0 ->
+ New = {RecName, K, Incr},
+ mnesia_lib:db_put(Storage, Tab, New),
+ {New, []};
+ _ ->
+ Zero = {RecName, K, 0},
+ mnesia_lib:db_put(Storage, Tab, Zero),
+ {Zero, []}
+ end,
+ commit_update(?catch_val({Tab, commit_work}), Tid, Tab,
+ K, NewObj, OldObjs),
+ element(3, NewObj);
+
+do_update_op(Tid, Storage, {{Tab, Key}, Obj, delete_object}) ->
+ commit_del_object(?catch_val({Tab, commit_work}),
+ Tid, Tab, Key, Obj, undefined),
+ mnesia_lib:db_match_erase(Storage, Tab, Obj);
+
+do_update_op(Tid, Storage, {{Tab, Key}, Obj, clear_table}) ->
+ commit_clear(?catch_val({Tab, commit_work}), Tid, Tab, Key, Obj),
+ mnesia_lib:db_match_erase(Storage, Tab, Obj).
+
+commit_write([], _, _, _, _, _) -> ok;
+commit_write([{checkpoints, CpList}|R], Tid, Tab, K, Obj, Old) ->
+ mnesia_checkpoint:tm_retain(Tid, Tab, K, write, CpList),
+ commit_write(R, Tid, Tab, K, Obj, Old);
+commit_write([H|R], Tid, Tab, K, Obj, Old)
+ when element(1, H) == subscribers ->
+ mnesia_subscr:report_table_event(H, Tab, Tid, Obj, write, Old),
+ commit_write(R, Tid, Tab, K, Obj, Old);
+commit_write([H|R], Tid, Tab, K, Obj, Old)
+ when element(1, H) == index ->
+ mnesia_index:add_index(H, Tab, K, Obj, Old),
+ commit_write(R, Tid, Tab, K, Obj, Old).
+
+commit_update([], _, _, _, _, _) -> ok;
+commit_update([{checkpoints, CpList}|R], Tid, Tab, K, Obj, _) ->
+ Old = mnesia_checkpoint:tm_retain(Tid, Tab, K, write, CpList),
+ commit_update(R, Tid, Tab, K, Obj, Old);
+commit_update([H|R], Tid, Tab, K, Obj, Old)
+ when element(1, H) == subscribers ->
+ mnesia_subscr:report_table_event(H, Tab, Tid, Obj, write, Old),
+ commit_update(R, Tid, Tab, K, Obj, Old);
+commit_update([H|R], Tid, Tab, K, Obj, Old)
+ when element(1, H) == index ->
+ mnesia_index:add_index(H, Tab, K, Obj, Old),
+ commit_update(R, Tid, Tab, K, Obj, Old).
+
+commit_delete([], _, _, _, _, _) -> ok;
+commit_delete([{checkpoints, CpList}|R], Tid, Tab, K, Obj, _) ->
+ Old = mnesia_checkpoint:tm_retain(Tid, Tab, K, delete, CpList),
+ commit_delete(R, Tid, Tab, K, Obj, Old);
+commit_delete([H|R], Tid, Tab, K, Obj, Old)
+ when element(1, H) == subscribers ->
+ mnesia_subscr:report_table_event(H, Tab, Tid, Obj, delete, Old),
+ commit_delete(R, Tid, Tab, K, Obj, Old);
+commit_delete([H|R], Tid, Tab, K, Obj, Old)
+ when element(1, H) == index ->
+ mnesia_index:delete_index(H, Tab, K),
+ commit_delete(R, Tid, Tab, K, Obj, Old).
+
+commit_del_object([], _, _, _, _, _) -> ok;
+commit_del_object([{checkpoints, CpList}|R], Tid, Tab, K, Obj, _) ->
+ Old = mnesia_checkpoint:tm_retain(Tid, Tab, K, delete_object, CpList),
+ commit_del_object(R, Tid, Tab, K, Obj, Old);
+commit_del_object([H|R], Tid, Tab, K, Obj, Old)
+ when element(1, H) == subscribers ->
+ mnesia_subscr:report_table_event(H, Tab, Tid, Obj, delete_object, Old),
+ commit_del_object(R, Tid, Tab, K, Obj, Old);
+commit_del_object([H|R], Tid, Tab, K, Obj, Old)
+ when element(1, H) == index ->
+ mnesia_index:del_object_index(H, Tab, K, Obj, Old),
+ commit_del_object(R, Tid, Tab, K, Obj, Old).
+
+commit_clear([], _, _, _, _) -> ok;
+commit_clear([{checkpoints, CpList}|R], Tid, Tab, K, Obj) ->
+ mnesia_checkpoint:tm_retain(Tid, Tab, K, clear_table, CpList),
+ commit_clear(R, Tid, Tab, K, Obj);
+commit_clear([H|R], Tid, Tab, K, Obj)
+ when element(1, H) == subscribers ->
+ mnesia_subscr:report_table_event(H, Tab, Tid, Obj, clear_table, undefined),
+ commit_clear(R, Tid, Tab, K, Obj);
+commit_clear([H|R], Tid, Tab, K, Obj)
+ when element(1, H) == index ->
+ mnesia_index:clear_index(H, Tab, K, Obj),
+ commit_clear(R, Tid, Tab, K, Obj).
+
+do_snmp(_, []) -> ok;
+do_snmp(Tid, [Head | Tail]) ->
+ case catch mnesia_snmp_hook:update(Head) of
+ {'EXIT', Reason} ->
+ %% This should only happen when we recently have
+ %% deleted our local replica or recently deattached
+ %% the snmp table
+
+ verbose("do_snmp in ~w failed: ~p -> {'EXIT', ~p}~n",
+ [Tid, Head, Reason]);
+ ok ->
+ ignore
+ end,
+ do_snmp(Tid, Tail).
+
+commit_nodes([C | Tail], AccD, AccR)
+ when C#commit.disc_copies == [],
+ C#commit.disc_only_copies == [],
+ C#commit.schema_ops == [] ->
+ commit_nodes(Tail, AccD, [C#commit.node | AccR]);
+commit_nodes([C | Tail], AccD, AccR) ->
+ commit_nodes(Tail, [C#commit.node | AccD], AccR);
+commit_nodes([], AccD, AccR) ->
+ {AccD, AccR}.
+
+commit_decision(D, [C | Tail], AccD, AccR) ->
+ N = C#commit.node,
+ {D2, Tail2} =
+ case C#commit.schema_ops of
+ [] when C#commit.disc_copies == [],
+ C#commit.disc_only_copies == [] ->
+ commit_decision(D, Tail, AccD, [N | AccR]);
+ [] ->
+ commit_decision(D, Tail, [N | AccD], AccR);
+ Ops ->
+ case ram_only_ops(N, Ops) of
+ true ->
+ commit_decision(D, Tail, AccD, [N | AccR]);
+ false ->
+ commit_decision(D, Tail, [N | AccD], AccR)
+ end
+ end,
+ {D2, [C#commit{decision = D2} | Tail2]};
+commit_decision(D, [], AccD, AccR) ->
+ {D#decision{disc_nodes = AccD, ram_nodes = AccR}, []}.
+
+ram_only_ops(N, [{op, change_table_copy_type, N, _FromS, _ToS, Cs} | _Ops ]) ->
+ case lists:member({name, schema}, Cs) of
+ true ->
+ %% We always use disk if change type of the schema
+ false;
+ false ->
+ not lists:member(N, val({schema, disc_copies}))
+ end;
+
+ram_only_ops(N, _Ops) ->
+ not lists:member(N, val({schema, disc_copies})).
+
+%% Returns {WaitFor, Res}
+sync_send_dirty(Tid, [Head | Tail], Tab, WaitFor) ->
+ Node = Head#commit.node,
+ if
+ Node == node() ->
+ {WF, _} = sync_send_dirty(Tid, Tail, Tab, WaitFor),
+ Res = do_dirty(Tid, Head),
+ {WF, Res};
+ true ->
+ {?MODULE, Node} ! {self(), {sync_dirty, Tid, Head, Tab}},
+ sync_send_dirty(Tid, Tail, Tab, [Node | WaitFor])
+ end;
+sync_send_dirty(_Tid, [], _Tab, WaitFor) ->
+ {WaitFor, {'EXIT', {aborted, {node_not_running, WaitFor}}}}.
+
+%% Returns {WaitFor, Res}
+async_send_dirty(_Tid, _Nodes, Tab, nowhere) ->
+ {[], {'EXIT', {aborted, {no_exists, Tab}}}};
+async_send_dirty(Tid, Nodes, Tab, ReadNode) ->
+ async_send_dirty(Tid, Nodes, Tab, ReadNode, [], ok).
+
+async_send_dirty(Tid, [Head | Tail], Tab, ReadNode, WaitFor, Res) ->
+ Node = Head#commit.node,
+ if
+ ReadNode == Node, Node == node() ->
+ NewRes = do_dirty(Tid, Head),
+ async_send_dirty(Tid, Tail, Tab, ReadNode, WaitFor, NewRes);
+ ReadNode == Node ->
+ {?MODULE, Node} ! {self(), {sync_dirty, Tid, Head, Tab}},
+ NewRes = {'EXIT', {aborted, {node_not_running, Node}}},
+ async_send_dirty(Tid, Tail, Tab, ReadNode, [Node | WaitFor], NewRes);
+ true ->
+ {?MODULE, Node} ! {self(), {async_dirty, Tid, Head, Tab}},
+ async_send_dirty(Tid, Tail, Tab, ReadNode, WaitFor, Res)
+ end;
+async_send_dirty(_Tid, [], _Tab, _ReadNode, WaitFor, Res) ->
+ {WaitFor, Res}.
+
+rec_dirty([Node | Tail], Res) when Node /= node() ->
+ NewRes = get_dirty_reply(Node, Res),
+ rec_dirty(Tail, NewRes);
+rec_dirty([], Res) ->
+ Res.
+
+get_dirty_reply(Node, Res) ->
+ receive
+ {?MODULE, Node, {'EXIT', Reason}} ->
+ {'EXIT', {aborted, {badarg, Reason}}};
+ {?MODULE, Node, {dirty_res, ok}} ->
+ case Res of
+ {'EXIT', {aborted, {node_not_running, _Node}}} ->
+ ok;
+ _ ->
+ %% Prioritize bad results, but node_not_running
+ Res
+ end;
+ {?MODULE, Node, {dirty_res, Reply}} ->
+ Reply;
+ {mnesia_down, Node} ->
+ case get(mnesia_activity_state) of
+ {_, Tid, _Ts} when element(1,Tid) == tid ->
+ %% Hmm dirty called inside a transaction, to avoid
+ %% hanging transaction we need to restart the transaction
+ mnesia:abort({node_not_running, Node});
+ _ ->
+ %% It's ok to ignore mnesia_down's since we will make
+ %% the replicas consistent again when Node is started
+ Res
+ end
+ after 1000 ->
+ case lists:member(Node, val({current, db_nodes})) of
+ true ->
+ get_dirty_reply(Node, Res);
+ false ->
+ Res
+ end
+ end.
+
+%% Assume that CommitRecord is no binary
+%% Return {Res, Pids}
+ask_commit(Protocol, Tid, CR, DiscNs, RamNs) ->
+ ask_commit(Protocol, Tid, CR, DiscNs, RamNs, [], no_local).
+
+ask_commit(Protocol, Tid, [Head | Tail], DiscNs, RamNs, WaitFor, Local) ->
+ Node = Head#commit.node,
+ if
+ Node == node() ->
+ ask_commit(Protocol, Tid, Tail, DiscNs, RamNs, WaitFor, Head);
+ true ->
+ Bin = opt_term_to_binary(Protocol, Head, DiscNs++RamNs),
+ Msg = {ask_commit, Protocol, Tid, Bin, DiscNs, RamNs},
+ {?MODULE, Node} ! {self(), Msg},
+ ask_commit(Protocol, Tid, Tail, DiscNs, RamNs, [Node | WaitFor], Local)
+ end;
+ask_commit(_Protocol, _Tid, [], _DiscNs, _RamNs, WaitFor, Local) ->
+ {WaitFor, Local}.
+
+%% This used to test protocol conversion between mnesia-nodes
+%% but it is really dependent on the emulator version on the
+%% two nodes (if funs are sent which they are in transform table op).
+%% to be safe we let erts do the translation (many times maybe and thus
+%% slower but it works.
+% opt_term_to_binary(asym_trans, Head, Nodes) ->
+% opt_term_to_binary(Nodes, Head);
+opt_term_to_binary(_Protocol, Head, _Nodes) ->
+ Head.
+
+rec_all([Node | Tail], Tid, Res, Pids) ->
+ receive
+ {?MODULE, Node, {vote_yes, Tid}} ->
+ rec_all(Tail, Tid, Res, Pids);
+ {?MODULE, Node, {vote_yes, Tid, Pid}} ->
+ rec_all(Tail, Tid, Res, [Pid | Pids]);
+ {?MODULE, Node, {vote_no, Tid, Reason}} ->
+ rec_all(Tail, Tid, {do_abort, Reason}, Pids);
+ {?MODULE, Node, {committed, Tid}} ->
+ rec_all(Tail, Tid, Res, Pids);
+ {?MODULE, Node, {aborted, Tid}} ->
+ rec_all(Tail, Tid, Res, Pids);
+
+ {mnesia_down, Node} ->
+ %% Make sure that mnesia_tm knows it has died
+ %% it may have been restarted
+ Abort = {do_abort, {bad_commit, Node}},
+ catch {?MODULE, Node} ! {Tid, Abort},
+ rec_all(Tail, Tid, Abort, Pids)
+ end;
+rec_all([], _Tid, Res, Pids) ->
+ {Res, Pids}.
+
+get_transactions() ->
+ {info, Participant, Coordinator} = req(info),
+ lists:map(fun({Tid, _Tabs}) ->
+ Status = tr_status(Tid,Participant),
+ {Tid#tid.counter, Tid#tid.pid, Status}
+ end,Coordinator).
+
+tr_status(Tid,Participant) ->
+ case lists:keymember(Tid, 1, Participant) of
+ true -> participant;
+ false -> coordinator
+ end.
+
+get_info(Timeout) ->
+ case whereis(?MODULE) of
+ undefined ->
+ {timeout, Timeout};
+ Pid ->
+ Pid ! {self(), info},
+ receive
+ {?MODULE, _, {info, Part, Coord}} ->
+ {info, Part, Coord}
+ after Timeout ->
+ {timeout, Timeout}
+ end
+ end.
+
+display_info(Stream, {timeout, T}) ->
+ io:format(Stream, "---> No info about coordinator and participant transactions, "
+ "timeout ~p <--- ~n", [T]);
+
+display_info(Stream, {info, Part, Coord}) ->
+ io:format(Stream, "---> Participant transactions <--- ~n", []),
+ lists:foreach(fun(P) -> pr_participant(Stream, P) end, Part),
+ io:format(Stream, "---> Coordinator transactions <---~n", []),
+ lists:foreach(fun({Tid, _Tabs}) -> pr_tid(Stream, Tid) end, Coord).
+
+pr_participant(Stream, P) ->
+ Commit0 = P#participant.commit,
+ Commit =
+ if
+ is_binary(Commit0) -> binary_to_term(Commit0);
+ true -> Commit0
+ end,
+ pr_tid(Stream, P#participant.tid),
+ io:format(Stream, "with participant objects ~p~n", [Commit]).
+
+
+pr_tid(Stream, Tid) ->
+ io:format(Stream, "Tid: ~p (owned by ~p) ~n",
+ [Tid#tid.counter, Tid#tid.pid]).
+
+info(Serial) ->
+ io:format( "Info about transaction with serial == ~p~n", [Serial]),
+ {info, Participant, Trs} = req(info),
+ search_pr_participant(Serial, Participant),
+ search_pr_coordinator(Serial, Trs).
+
+
+search_pr_coordinator(_S, []) -> no;
+search_pr_coordinator(S, [{Tid, _Ts}|Tail]) ->
+ case Tid#tid.counter of
+ S ->
+ io:format( "Tid is coordinator, owner == \n", []),
+ display_pid_info(Tid#tid.pid),
+ search_pr_coordinator(S, Tail);
+ _ ->
+ search_pr_coordinator(S, Tail)
+ end.
+
+search_pr_participant(_S, []) ->
+ false;
+search_pr_participant(S, [ P | Tail]) ->
+ Tid = P#participant.tid,
+ Commit0 = P#participant.commit,
+ if
+ Tid#tid.counter == S ->
+ io:format( "Tid is participant to commit, owner == \n", []),
+ Pid = Tid#tid.pid,
+ display_pid_info(Pid),
+ io:format( "Tid wants to write objects \n",[]),
+ Commit =
+ if
+ is_binary(Commit0) -> binary_to_term(Commit0);
+ true -> Commit0
+ end,
+
+ io:format("~p~n", [Commit]),
+ search_pr_participant(S,Tail); %% !!!!!
+ true ->
+ search_pr_participant(S, Tail)
+ end.
+
+display_pid_info(Pid) ->
+ case rpc:pinfo(Pid) of
+ undefined ->
+ io:format( "Dead process \n");
+ Info ->
+ Call = fetch(initial_call, Info),
+ Curr = case fetch(current_function, Info) of
+ {Mod,F,Args} when is_list(Args) ->
+ {Mod,F,length(Args)};
+ Other ->
+ Other
+ end,
+ Reds = fetch(reductions, Info),
+ LM = length(fetch(messages, Info)),
+ pformat(io_lib:format("~p", [Pid]),
+ io_lib:format("~p", [Call]),
+ io_lib:format("~p", [Curr]), Reds, LM)
+ end.
+
+pformat(A1, A2, A3, A4, A5) ->
+ io:format( "~-12s ~-21s ~-21s ~9w ~4w~n", [A1,A2,A3,A4,A5]).
+
+fetch(Key, Info) ->
+ case lists:keysearch(Key, 1, Info) of
+ {value, {_, Val}} ->
+ Val;
+ _ ->
+ 0
+ end.
+
+
+%%%%%%%%%%%%%%%%%%%%
+%%%%%%%%%%%%%%%%%%%%% reconfigure stuff comes here ......
+%%%%%%%%%%%%%%%%%%%%%
+
+reconfigure_coordinators(N, [{Tid, [Store | _]} | Coordinators]) ->
+ case mnesia_recover:outcome(Tid, unknown) of
+ committed ->
+ WaitingNodes = ?ets_lookup(Store, waiting_for_commit_ack),
+ case lists:keymember(N, 2, WaitingNodes) of
+ false ->
+ ignore; % avoid spurious mnesia_down messages
+ true ->
+ send_mnesia_down(Tid, Store, N)
+ end;
+ aborted ->
+ ignore; % avoid spurious mnesia_down messages
+ _ ->
+ %% Tell the coordinator about the mnesia_down
+ send_mnesia_down(Tid, Store, N)
+ end,
+ reconfigure_coordinators(N, Coordinators);
+reconfigure_coordinators(_N, []) ->
+ ok.
+
+send_mnesia_down(Tid, Store, Node) ->
+ Msg = {mnesia_down, Node},
+ send_to_pids([Tid#tid.pid | get_elements(friends,Store)], Msg).
+
+send_to_pids([Pid | Pids], Msg) when is_pid(Pid) ->
+ Pid ! Msg,
+ send_to_pids(Pids, Msg);
+send_to_pids([_ | Pids], Msg) ->
+ send_to_pids(Pids, Msg);
+send_to_pids([], _Msg) ->
+ ok.
+
+reconfigure_participants(N, [P | Tail]) ->
+ case lists:member(N, P#participant.disc_nodes) or
+ lists:member(N, P#participant.ram_nodes) of
+ false ->
+ %% Ignore, since we are not a participant
+ %% in the transaction.
+ reconfigure_participants(N, Tail);
+
+ true ->
+ %% We are on a participant node, lets
+ %% check if the dead one was a
+ %% participant or a coordinator.
+ Tid = P#participant.tid,
+ if
+ node(Tid#tid.pid) /= N ->
+ %% Another participant node died. Ignore.
+ reconfigure_participants(N, Tail);
+
+ true ->
+ %% The coordinator node has died and
+ %% we must determine the outcome of the
+ %% transaction and tell mnesia_tm on all
+ %% nodes (including the local node) about it
+ verbose("Coordinator ~p in transaction ~p died~n",
+ [Tid#tid.pid, Tid]),
+
+ Nodes = P#participant.disc_nodes ++
+ P#participant.ram_nodes,
+ AliveNodes = Nodes -- [N],
+ Protocol = P#participant.protocol,
+ tell_outcome(Tid, Protocol, N, AliveNodes, AliveNodes),
+ reconfigure_participants(N, Tail)
+ end
+ end;
+reconfigure_participants(_, []) ->
+ [].
+
+%% We need to determine the outcome of the transaction and
+%% tell mnesia_tm on all involved nodes (including the local node)
+%% about the outcome.
+tell_outcome(Tid, Protocol, Node, CheckNodes, TellNodes) ->
+ Outcome = mnesia_recover:what_happened(Tid, Protocol, CheckNodes),
+ case Outcome of
+ aborted ->
+ rpc:abcast(TellNodes, ?MODULE, {Tid,{do_abort, {mnesia_down, Node}}});
+ committed ->
+ rpc:abcast(TellNodes, ?MODULE, {Tid, do_commit})
+ end,
+ Outcome.
+
+do_stop(#state{coordinators = Coordinators}) ->
+ Msg = {mnesia_down, node()},
+ lists:foreach(fun({Tid, _}) -> Tid#tid.pid ! Msg end, gb_trees:to_list(Coordinators)),
+ mnesia_checkpoint:stop(),
+ mnesia_log:stop(),
+ exit(shutdown).
+
+fixtable(Tab, Lock, Me) ->
+ case req({fixtable, [Tab,Lock,Me]}) of
+ error ->
+ exit({no_exists, Tab});
+ Else ->
+ Else
+ end.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% System upgrade
+
+system_continue(_Parent, _Debug, State) ->
+ doit_loop(State).
+
+system_terminate(_Reason, _Parent, _Debug, State) ->
+ do_stop(State).
+
+system_code_change(State=#state{coordinators=Cs0,participants=Ps0},_Module,_OldVsn,downgrade) ->
+ case is_tuple(Cs0) of
+ true ->
+ Cs = gb_trees:to_list(Cs0),
+ Ps = gb_trees:values(Ps0),
+ {ok, State#state{coordinators=Cs,participants=Ps}};
+ false ->
+ {ok, State}
+ end;
+
+system_code_change(State=#state{coordinators=Cs0,participants=Ps0},_Module,_OldVsn,_Extra) ->
+ case is_list(Cs0) of
+ true ->
+ Cs = gb_trees:from_orddict(lists:sort(Cs0)),
+ Ps1 = [{P#participant.tid,P}|| P <- Ps0],
+ Ps = gb_trees:from_orddict(lists:sort(Ps1)),
+ {ok, State#state{coordinators=Cs,participants=Ps}};
+ false ->
+ {ok, State}
+ end.