aboutsummaryrefslogtreecommitdiffstats
path: root/lib/mnesia
diff options
context:
space:
mode:
Diffstat (limited to 'lib/mnesia')
-rw-r--r--lib/mnesia/AUTHORS5
-rw-r--r--lib/mnesia/Makefile39
-rw-r--r--lib/mnesia/doc/html/.gitignore0
-rw-r--r--lib/mnesia/doc/man3/.gitignore0
-rw-r--r--lib/mnesia/doc/man6/.gitignore0
-rw-r--r--lib/mnesia/doc/misc/Makefile64
-rw-r--r--lib/mnesia/doc/misc/implementation.txt375
-rw-r--r--lib/mnesia/doc/pdf/.gitignore0
-rw-r--r--lib/mnesia/doc/src/DATA100
-rw-r--r--lib/mnesia/doc/src/DATA217
-rw-r--r--lib/mnesia/doc/src/FRUITS12
-rw-r--r--lib/mnesia/doc/src/Makefile240
-rw-r--r--lib/mnesia/doc/src/Mnesia_App_A.xml87
-rw-r--r--lib/mnesia/doc/src/Mnesia_App_B.xmlsrc41
-rw-r--r--lib/mnesia/doc/src/Mnesia_App_C.xmlsrc43
-rw-r--r--lib/mnesia/doc/src/Mnesia_App_D.xmlsrc43
-rw-r--r--lib/mnesia/doc/src/Mnesia_chap1.xml265
-rw-r--r--lib/mnesia/doc/src/Mnesia_chap2.xmlsrc647
-rw-r--r--lib/mnesia/doc/src/Mnesia_chap3.xml556
-rw-r--r--lib/mnesia/doc/src/Mnesia_chap4.xmlsrc1171
-rw-r--r--lib/mnesia/doc/src/Mnesia_chap5.xmlsrc1398
-rw-r--r--lib/mnesia/doc/src/Mnesia_chap7.xmlsrc890
-rw-r--r--lib/mnesia/doc/src/Mnesia_chap8.xml64
-rw-r--r--lib/mnesia/doc/src/book.gifbin0 -> 1081 bytes
-rw-r--r--lib/mnesia/doc/src/book.xml48
-rw-r--r--lib/mnesia/doc/src/bup.erl239
-rw-r--r--lib/mnesia/doc/src/company.erl373
-rw-r--r--lib/mnesia/doc/src/company.fig88
-rw-r--r--lib/mnesia/doc/src/company.gifbin0 -> 2781 bytes
-rw-r--r--lib/mnesia/doc/src/company.hrl50
-rw-r--r--lib/mnesia/doc/src/company.ps213
-rw-r--r--lib/mnesia/doc/src/company_o.erl144
-rw-r--r--lib/mnesia/doc/src/company_o.hrl38
-rw-r--r--lib/mnesia/doc/src/fascicules.xml18
-rw-r--r--lib/mnesia/doc/src/make.dep46
-rw-r--r--lib/mnesia/doc/src/mnesia.gifbin0 -> 15184 bytes
-rw-r--r--lib/mnesia/doc/src/mnesia.xml3100
-rw-r--r--lib/mnesia/doc/src/mnesia_frag_hash.xml166
-rw-r--r--lib/mnesia/doc/src/mnesia_registry.xml104
-rw-r--r--lib/mnesia/doc/src/note.gifbin0 -> 1539 bytes
-rw-r--r--lib/mnesia/doc/src/notes.gifbin0 -> 2005 bytes
-rw-r--r--lib/mnesia/doc/src/notes.xml383
-rw-r--r--lib/mnesia/doc/src/notes_history.xml322
-rw-r--r--lib/mnesia/doc/src/part.xml49
-rw-r--r--lib/mnesia/doc/src/part_notes.xml41
-rw-r--r--lib/mnesia/doc/src/part_notes_history.xml41
-rw-r--r--lib/mnesia/doc/src/ref_man.gifbin0 -> 1530 bytes
-rw-r--r--lib/mnesia/doc/src/ref_man.xml44
-rw-r--r--lib/mnesia/doc/src/summary.html.src1
-rw-r--r--lib/mnesia/doc/src/user_guide.gifbin0 -> 1581 bytes
-rw-r--r--lib/mnesia/doc/src/warning.gifbin0 -> 1498 bytes
-rw-r--r--lib/mnesia/ebin/.gitignore0
l---------lib/mnesia/examples/DATA1
-rw-r--r--lib/mnesia/examples/Makefile103
-rw-r--r--lib/mnesia/examples/bench/Makefile10
-rw-r--r--lib/mnesia/examples/bench/README211
-rw-r--r--lib/mnesia/examples/bench/bench.config121
-rw-r--r--lib/mnesia/examples/bench/bench.config221
-rw-r--r--lib/mnesia/examples/bench/bench.config323
-rw-r--r--lib/mnesia/examples/bench/bench.config423
-rw-r--r--lib/mnesia/examples/bench/bench.config527
-rw-r--r--lib/mnesia/examples/bench/bench.config627
-rw-r--r--lib/mnesia/examples/bench/bench.config735
-rw-r--r--lib/mnesia/examples/bench/bench.erl327
-rw-r--r--lib/mnesia/examples/bench/bench.hrl107
-rwxr-xr-xlib/mnesia/examples/bench/bench.sh23
-rw-r--r--lib/mnesia/examples/bench/bench_generate.erl684
-rw-r--r--lib/mnesia/examples/bench/bench_populate.erl200
-rw-r--r--lib/mnesia/examples/bench/bench_trans.erl184
l---------lib/mnesia/examples/bup.erl1
l---------lib/mnesia/examples/company.erl1
l---------lib/mnesia/examples/company.hrl1
l---------lib/mnesia/examples/company_o.erl1
l---------lib/mnesia/examples/company_o.hrl1
-rw-r--r--lib/mnesia/examples/mnesia_meter.erl465
-rw-r--r--lib/mnesia/examples/mnesia_tpcb.erl1268
-rw-r--r--lib/mnesia/include/Makefile61
-rw-r--r--lib/mnesia/include/mnemosyne.hrl18
-rw-r--r--lib/mnesia/info2
-rw-r--r--lib/mnesia/priv/.gitignore0
-rw-r--r--lib/mnesia/src/Makefile139
-rw-r--r--lib/mnesia/src/mnesia.app.src52
-rw-r--r--lib/mnesia/src/mnesia.appup.src37
-rw-r--r--lib/mnesia/src/mnesia.erl2883
-rw-r--r--lib/mnesia/src/mnesia.hrl121
-rw-r--r--lib/mnesia/src/mnesia_backup.erl201
-rw-r--r--lib/mnesia/src/mnesia_bup.erl1186
-rw-r--r--lib/mnesia/src/mnesia_checkpoint.erl1295
-rw-r--r--lib/mnesia/src/mnesia_checkpoint_sup.erl42
-rw-r--r--lib/mnesia/src/mnesia_controller.erl2182
-rw-r--r--lib/mnesia/src/mnesia_dumper.erl1218
-rw-r--r--lib/mnesia/src/mnesia_event.erl260
-rw-r--r--lib/mnesia/src/mnesia_frag.erl1361
-rw-r--r--lib/mnesia/src/mnesia_frag_hash.erl151
-rw-r--r--lib/mnesia/src/mnesia_frag_old_hash.erl132
-rw-r--r--lib/mnesia/src/mnesia_index.erl384
-rw-r--r--lib/mnesia/src/mnesia_kernel_sup.erl65
-rw-r--r--lib/mnesia/src/mnesia_late_loader.erl108
-rw-r--r--lib/mnesia/src/mnesia_lib.erl1306
-rw-r--r--lib/mnesia/src/mnesia_loader.erl828
-rw-r--r--lib/mnesia/src/mnesia_locker.erl1196
-rw-r--r--lib/mnesia/src/mnesia_log.erl1025
-rw-r--r--lib/mnesia/src/mnesia_monitor.erl823
-rw-r--r--lib/mnesia/src/mnesia_recover.erl1196
-rw-r--r--lib/mnesia/src/mnesia_registry.erl280
-rw-r--r--lib/mnesia/src/mnesia_schema.erl3027
-rw-r--r--lib/mnesia/src/mnesia_snmp_hook.erl259
-rw-r--r--lib/mnesia/src/mnesia_snmp_sup.erl42
-rw-r--r--lib/mnesia/src/mnesia_sp.erl42
-rw-r--r--lib/mnesia/src/mnesia_subscr.erl494
-rw-r--r--lib/mnesia/src/mnesia_sup.erl131
-rw-r--r--lib/mnesia/src/mnesia_text.erl194
-rw-r--r--lib/mnesia/src/mnesia_tm.erl2301
-rw-r--r--lib/mnesia/vsn.mk15
114 files changed, 40387 insertions, 0 deletions
diff --git a/lib/mnesia/AUTHORS b/lib/mnesia/AUTHORS
new file mode 100644
index 0000000000..70e87038a8
--- /dev/null
+++ b/lib/mnesia/AUTHORS
@@ -0,0 +1,5 @@
+Original Authors:
+ Claes Wikstr�m Wrote the initial release.
+ H�kan Mattsson Rewrote alot of it.
+ Dan Gudmundsson Current maintainer.
+Contributors:
diff --git a/lib/mnesia/Makefile b/lib/mnesia/Makefile
new file mode 100644
index 0000000000..f687b0ecdb
--- /dev/null
+++ b/lib/mnesia/Makefile
@@ -0,0 +1,39 @@
+#
+# %CopyrightBegin%
+#
+# Copyright Ericsson AB 1996-2009. All Rights Reserved.
+#
+# The contents of this file are subject to the Erlang Public License,
+# Version 1.1, (the "License"); you may not use this file except in
+# compliance with the License. You should have received a copy of the
+# Erlang Public License along with this software. If not, it can be
+# retrieved online at http://www.erlang.org/.
+#
+# Software distributed under the License is distributed on an "AS IS"
+# basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+# the License for the specific language governing rights and limitations
+# under the License.
+#
+# %CopyrightEnd%
+#
+
+#
+include $(ERL_TOP)/make/target.mk
+include $(ERL_TOP)/make/$(TARGET)/otp.mk
+
+# ----------------------------------------------------
+# Macros
+# ----------------------------------------------------
+
+SUB_DIRECTORIES = src include examples doc/src
+
+include vsn.mk
+VSN = $(MNESIA_VSN)
+
+SPECIAL_TARGETS =
+
+# ----------------------------------------------------
+# Default Subdir Targets
+# ----------------------------------------------------
+include $(ERL_TOP)/make/otp_subdir.mk
+
diff --git a/lib/mnesia/doc/html/.gitignore b/lib/mnesia/doc/html/.gitignore
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/lib/mnesia/doc/html/.gitignore
diff --git a/lib/mnesia/doc/man3/.gitignore b/lib/mnesia/doc/man3/.gitignore
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/lib/mnesia/doc/man3/.gitignore
diff --git a/lib/mnesia/doc/man6/.gitignore b/lib/mnesia/doc/man6/.gitignore
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/lib/mnesia/doc/man6/.gitignore
diff --git a/lib/mnesia/doc/misc/Makefile b/lib/mnesia/doc/misc/Makefile
new file mode 100644
index 0000000000..e5fa327f5b
--- /dev/null
+++ b/lib/mnesia/doc/misc/Makefile
@@ -0,0 +1,64 @@
+#
+# %CopyrightBegin%
+#
+# Copyright Ericsson AB 1996-2009. All Rights Reserved.
+#
+# The contents of this file are subject to the Erlang Public License,
+# Version 1.1, (the "License"); you may not use this file except in
+# compliance with the License. You should have received a copy of the
+# Erlang Public License along with this software. If not, it can be
+# retrieved online at http://www.erlang.org/.
+#
+# Software distributed under the License is distributed on an "AS IS"
+# basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+# the License for the specific language governing rights and limitations
+# under the License.
+#
+# %CopyrightEnd%
+#
+
+#
+OTP_MAKE_ROOT=/home/super/otp/otp_make
+include $(OTP_MAKE_ROOT)/otp.mk
+
+#
+# Release Macros
+#
+
+#
+# Common macros
+#
+
+
+SGML_FILES= release_notes.sgml
+
+FIG_FILES =
+HTML_FILES= $(SGML_FILES:.sgml=.html)
+TEX_FILES= $(SGML_FILES:.sgml=.tex)
+DVI_FILES= $(SGML_FILES:.sgml=.dvi)
+PSFIG_FILES= $(FIG_FILES:.fig=.ps)
+PS_FILES= $(SGML_FILES:.sgml=.ps)
+GIF_FILES= min_head.gif
+ERL_FILES=
+HRL_FILES=
+DATA_FILES=
+
+
+#
+# Make Rules
+#
+
+all $(DEFAULT_OPT_TARGETS) $(DEFAULT_DEBUG_TARGETS): $(HTML_FILES) $(TEX_FILES) $(PSFIG_FILES) $(DVI_FILES) $(PS_FILES)
+
+clean:
+ @rm -f *.toc *.log *.aux *.tex sgmls_output sgmls_errs $(HTML_FILES) $(TEX_FILES) $(DVI_FILES) $(PSFIG_FILES) $(PS_FILES)
+
+#
+# Release Targets
+#
+include $(OTP_MAKE_ROOT)/otp_release_targets.mk
+
+release_variant: opt.$(TARGET)
+ $(MKPATH.$(TARGET)) $(RELEASE_PATH)/$(TARGET)/lib/mnesia-$(VSN)/doc/misc
+ $(INSTALLFILES.$(TARGET)) $(HTML_FILES) $(GIF_FILES) $(ERL_FILES) $(HRL_FILES) $(DATA_FILES) $(PS_FILES) $(RELEASE_PATH)/$(TARGET)/lib/mnesia-$(VSN)/doc/misc
+
diff --git a/lib/mnesia/doc/misc/implementation.txt b/lib/mnesia/doc/misc/implementation.txt
new file mode 100644
index 0000000000..1b8369e466
--- /dev/null
+++ b/lib/mnesia/doc/misc/implementation.txt
@@ -0,0 +1,375 @@
+
+Mnesia
+
+1 Introduction
+
+This document aims to give a brief introduction of the implementation
+of mnesia, it's data and functions.
+
+H�kan has written other mnesia papers of interest, (see ~hakan/public_html/):
+o Resource consumption (mnesia_consumption.txt)
+o What to think about when changing mnesia (mnesia_upgrade_policy.txt)
+o Mnesia internals course (mnesia_internals_slides.pdf)
+o Mnesia overview (mnesia_overview.pdf)
+
+1.1. Basic concepts
+
+In a mnesia cluster all nodes are equal, there is no concept off
+master or backup nodes. That said when mixing disc based (uses the
+disc to store meta information) nodes and ram based (do not use disc
+at all) nodes the disc based ones sometimes have precedence over ram
+based nodes.
+
+2 Meta Data
+
+Mnesia has two types of global meta data, static and dynamic.
+All the meta data is stored in the ets table mnesia_gvar.
+
+2.1 Static Meta Data
+The static data is the schema information, usually kept in
+'schema.DAT' file, the data is created with
+mnesia:create_schema(Nodes) for disc nodes (i.e. nodes which uses the
+disc). Ram based mnesia nodes create an empty schema at startup.
+
+The static data i.e. schema, contains information about which nodes
+are involved in the cluster and which type (ram or disc) they have. It
+also contains information about which tables exist on which node and
+so on.
+
+The schema information (static data) must always be the same on all
+active nodes in the mnesia cluster. Schema information is updated via
+schema functions, e.g. mnesia:add_table_copy/3,
+mnesia:change_table_copy/3...
+
+2.2 Dynamic Meta Data
+
+The dynamic data is transient and is local to each mnesia node
+in the cluster. Examples of dynamic data is: currently active mnesia
+nodes, which tables are currently available and where are they
+located. Dynamic data is updated internally by each mnesia during the
+nodes lifetime, i.e. when nodes goes up and down or are added to or
+deleted from the mnesia cluster.
+
+3 Processes and Files
+
+The most important processes in mnesia are mnesia_monitor,
+mnesia_controller, mnesia_tm and mnesia_locker.
+
+Mnesia_monitor acts as supervisor and monitors all resources. It
+listens for nodeup and nodedown and keeps links to other mnesia nodes,
+if a node goes down it forwards the information to all the necessary
+processes, e.g. mnesia_controller, mnesia_locker, mnesia_tm and all
+transactions. During start it negotiates the protocol version with
+the other nodes and keep track of which nodes uses which version. The
+monitor process also detects and warns about partioned networks, it is
+then up to the user to deal with them. It is the owner of all open
+files, ets tables and so on.
+
+The mnesia_controller process is responsible for loading tables,
+keeping the dynamic meta data updated, synchronize dangerous work such
+as schema transactions vs dump log operation vs table loading/sending.
+
+The last two processes are involved in all transactions, the
+mnesia_locker process manages transaction locks, and mnesia_tm manages
+all transaction work.
+
+4 Startup and table Loading
+
+The early startup is mostly driven by the mnesia_tm process/module,
+logs are dumped (see log dumping), node-names of other nodes in the
+cluster are retrieved from the static meta data or from environment
+parameters and initial connections are made to the other mnesia
+nodes.
+
+The rest of start up is driven by the mnesia_controller process where
+the schema (static meta data) is merged between each node, this is
+done to keep the schema consistent between all nodes in the
+cluster. When the schema is merged all local tables are put in a
+loading queue, tables which are only available or have local content
+is loaded directly from disc or created if they are type ram_copies.
+
+The other tables are kept in the queue until mnesia decides whether to
+load them from disk or from another node. If another mnesia node has
+already loaded the table, i.e. got a copy in ram or an open dets file,
+the table is always loaded from that node to keep the data consistent.
+If no other node has a loaded copy of the table, some mnesia node has
+to load it first, and the other nodes can copy the table from the
+first node. Mnesia keeps information about when other nodes went down,
+a starting mnesia will check which nodes have been down, if some of
+the nodes have not been down the starting node will let those nodes
+load the table first. If all other nodes have been down then the
+starting mnesia will load the table. The node that is allowed to load
+the table will load it and the other nodes will copy it from that node.
+
+If a node, which the starter node has not a 'mnesia_down' note from,
+is down the starter node will have to wait until that node comes up
+and decision can be taken, this behavior can be overruled by user
+settings. The order of table loading could be described as:
+
+1. Mnesia downs, Normally decides from where mnesia should load tables.
+2. Master nodes (overrides mnesia downs).
+3. Force load (overrides Master nodes).
+ 1) If possible, load table from active master nodes
+ 2) if no master nodes is active load from any active nodes,
+ 3) if no active node has an active table get local copy
+ (if ram create empty one)
+
+Currently mnesia can handle one download and one upload at the same
+time. Dumping and loading/sending may run simultaneously but neither
+of them may run during schema commit. Loaders/senders may not start if
+a schema commit is enqueued. That synchronization is made to prohibit
+that the schema transaction modifies the meta data and the
+prerequisites of the table loading changes.
+
+The actual loading of a table is implemented in 'mnesia_loader.erl'.
+It currently works as follows:
+
+Receiver Sender
+-------- ------
+Spawned
+Find sender node
+Queue sender request ---->
+ Spawned
+*)Spawn real receiver <---- Send init info
+*)Grab schema lock for Grab write table lock
+ that table to avoid Subscribe receiver
+ deadlock with schema transactions to table updates
+Create table (ets or dets) Release Lock
+
+Get data (with ack ---->
+ as flow control) <---- Burst data to receiver
+ Send no_more
+Apply subscription messages
+Store copy on disc Grab read lock
+Create index, snmp data Update meta data info
+and checkpoints if needed cleanup
+no_more ---->
+ Release lock
+
+*) Don't spawn or grab schema lock if operation is add_table_copy,
+ it's already a schema operation.
+
+
+5 Transaction
+
+Transaction are normally driven from the client process, i.e. the
+process that call 'mnesia:transaction'. The client first acquires a
+globally unique transaction id (tid) and temporary transaction storage
+(ts an ets table) from mnesia_tm and then executes the transaction
+fun. Mnesia-api calls such as 'mnesia:write/1' and 'mnesia:read'
+contains code for acquiring the needed locks. Intermediate database
+states and acquired locks are kept in the transaction storage, and all
+mnesia operations has to be "patched" against that store. I.e. a write
+operation in a transaction should be seen within (and only within)
+that transaction, if the same key is read after the write.
+After the transaction fun is completed the ts is analyzed to see which
+nodes are involved in the transaction, and what type of commit protocol
+shall be used. Then the result is committed and additional work such as
+snmp, checkpoints and index updates are performed. The transaction is
+finish by releasing all resources.
+
+An example:
+
+Example = fun(X) ->
+ {table1, key, Value} = mnesia:read(table1, key),
+ ok = mnesia:write(table1, {table1, key, Value+X}),
+ {table1, key, Updated} = mnesia:read(table1, key),
+ Updated
+ end,
+mnesia:transaction(Example, [10]).
+
+A message overview of a simple successful asynchronous transaction
+ non local
+Client Process mnesia_tm(local) mnesia_locker mnesia_tm
+------------------------------------------------------------------------
+Get tid ---->
+ <--- Tid and ts
+Get read lock
+from available node ------------------------------->
+Value <----------Value or restart trans---
+Patch value against ts
+
+Get write lock
+from all nodes ------------------------------->
+ ------------------------------->
+ok's <<---------ok's or restart trans---
+write data in ts
+
+Get read lock,already done.
+Read data Value
+'Patch' data with ts
+Fun return Value+X.
+
+If everything is ok
+commit transaction
+
+Find the nodes that the transaction
+needs to be committed on and
+collect every update from ts.
+
+Ask for commit ----------->
+ ----------------------------------------------->
+
+Ok's <<--------- ------------------------------
+Commit ----------------------------------------------->
+log commit decision on disk
+Commit locally
+ update snmp
+ update checkpoints
+ notify subscribers
+ update index
+
+Release locks ------------------------------->
+Release transaction ----->
+
+Return trans result
+----------------------
+
+If all needed resources are available, i.e. the needed tables are
+loaded somewhere in the cluster during the transaction, and the user
+code doesn't crash, a transaction in mnesia won't fail. If something
+happens in the mnesia cluster such as node down from the replica the
+transaction was about to read from, or that a lock couldn't be
+acquired and the transaction was not allowed to be queued on that
+lock, the transaction is restarted, i.e. all resources are released
+and the fun is called again. By default a transaction can be
+restarted is infinity many times, but the user may choose to limit
+the number of restarts.
+
+The dirty operations don't do any of the above they just finds out
+where to write the data, logs the operation to disk and casts (or call
+in case of sync_dirty operation) the data to those nodes. Therefore
+the dirty operations have the drawback that each write or delete sends
+a message per operation to the involved nodes.
+
+There is also a synchronous variant of 2-phase commit protocol which
+waits on an additional ack message after the transaction is committed
+on every node. The intention is to provide the user with a way to
+solve overloading problems.
+
+A 3-phase commit protocol is used for schema transaction or if the
+transaction result is going to be committed in a asymmetrical way,
+i.e. a transaction that writes to table a and b where table a and b
+have replicas on different nodes. The outcome of the transactions are
+stored temporary in an ets table and in the log file.
+
+6 Schema transactions
+
+Schema transactions are handled differently than ordinary
+transactions, they are implemented in mnesia_schema (and in
+mnesia_dumper). The schema operation is always spawned to protect from
+that the client process dies during the transaction.
+
+The actual transaction fun checks the pre-conditions and acquires the
+needed locks and notes the operation in the transaction store. During
+the commit, the schema transaction runs a schema prepare operation (on
+every node) that does the needed prerequisite job. Then the operation
+is logged to disc, and the actual commit work is done by dumping the
+log. Every schema operation has special clause in mnesia_dumper to
+handle the finishing work. Every schema prepare operation has a
+matching undo_prepare operation which needs to be invoked if the
+transaction is aborted.
+
+7 Locks
+
+"The locking algorithm is a traditional 'two-phase locking'* and the
+deadlock prevention is 'wait-die'*, time stamps for the wait-die algorithm
+is 'Lamport clock'* maintained by mnesia_tm. The Lamport clock is kept
+when the transaction is restarted to avoid starving."
+
+* References can be found in the paper mnesia_overview.pdf
+ Klacke, H�kan and Hans wrote about mnesia.
+
+What the quote above means is that read locks are acquired on the
+replica that mnesia read from, write locks are acquired on all nodes
+which have a replica. Several read lock can lock the same object, but
+write locks are exclusive. The transaction identifier (tid) is a ever
+increasing system uniq counter which have the same sort order on every
+node (a Lamport clock), which enables mnesia_locker to order the lock
+requests. When a lock request arrives, mnesia_locker checks whether
+the lock is available, if it is a 'granted' is sent back to the client
+and the lock is noted as taken in an ets table. If the lock is already
+occupied, it's tid is compared with tid of the transaction holding the
+lock. If the tid of holding transaction is greater than the tid of
+asking transaction it's allowed to be put in the lock queue (another
+ets table) and no response is sent back until the lock is released, if
+not the transaction will get a negative response and mnesia_tm will
+restart the transaction after it has slept for a random time.
+
+Sticky locks works almost as a write lock, the first time a sticky
+lock is acquired a request is sent to all nodes. The lock is marked as
+taken by the requesting node (not transaction), when the lock is later
+released it's only released on the node that has the sticky lock,
+thus the next time a transaction is requesting the lock it don't need
+to ask the others nodes. If another node wants the lock it has to request
+a lock release first, before it can acquire the lock.
+
+8 Fragmented tables
+
+Fragmented tables are used to split a large table in smaller parts.
+It is implemented as a layer between the client and mnesia which
+extends the meta data with additional properties and maps a {table,
+key} tuple to a table_fragment.
+
+The default mapping is erlang:phash() but the user may provide his own
+mapping function to be able to predict which records is stored in
+which table fragment, e.g. the client may want to steer where a
+record generated from a certain device is placed.
+
+The foreign key is used to co-locate other tables to the same node.
+The other additinal table attributes are also used to distribute the
+table fragments.
+
+9 Log Dumping
+
+All operations on disk tables are stored on a log 'LATEST.LOG' on
+disk, so mnesia can redo the transactions if the node goes down.
+Dumping the log means that mnesia moves the committed data from the
+general log to the table specific disk storage. To avoid that the log
+grows to large and uses a lot of disk space and makes the startup slow,
+mnesia dumps the log during it's uptime. There are two triggers that
+start the log dumping, timeouts and the number of commits since last
+dump, both are user configurable.
+
+Disc copies tables are implemented with two disk_log files, one
+'table.DCD' (disc copies data) and one 'table.DCL' (disc copies log).
+The dcd contains raw records, and the dcl contains operations on that
+table, i.e. '{write, {table, key, value}}' or '{delete, {table,
+key}}'. First time a record for a specific table is found when
+dumping the table, the size of both the dcd and the dcl files are
+checked. And if the sizeof(dcl)/sizeof(dcd) is greater than a
+threshold, the current ram table is dumped to file 'table.DCD' and the
+corresponding dcl file is deleted, and all other records in the
+general log that belongs to that table are ignored. If the threshold
+is not meet than the operations in the general log to that table are
+appended to the dcl file. On start up both files are read, first the
+contents of the dcd are loaded to an ets table, then it's modified by
+the operations stored in the corresponding dcl file.
+
+Disc only copies tables updates the 'dets' file directly when
+committing the data so those entries can be ignored during normal log
+dumping, they are only added to the 'dets' file during startup when
+mnesia don't know the state of the disk table.
+
+10 Checkpoints and backups
+
+Checkpoints are created to be able to take snapshots of the database,
+which is pretty good when you want consistent backups, i.e. you don't
+want half of a transaction in the backup. The checkpoint creates a
+shadow table (called retainer) for each table involved in the
+checkpoint. When a checkpoint is requested it will not start until all
+ongoing transactions are completed. The new transactions will update
+both the real table and update the shadow table with operations to
+undo the changes on the real table, when a key is modified the first
+time. I.e. when write operation '{table, a, 14}' is made, the shadow
+table is checked if key 'a' has a undo operation, if it has, nothing
+more is done. If not a {write, {table, a, OLD_VALUE}} is added to the
+shadow table if the real table had an old value, if not a {delete,
+{table, a}} operation is added to the shadow table.
+
+The backup is taken by copying every record in the real table and then
+appending every operation in the shadow table to the backup, thus
+undoing the changes that where made since the checkpoint where
+started.
+
+
diff --git a/lib/mnesia/doc/pdf/.gitignore b/lib/mnesia/doc/pdf/.gitignore
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/lib/mnesia/doc/pdf/.gitignore
diff --git a/lib/mnesia/doc/src/DATA b/lib/mnesia/doc/src/DATA
new file mode 100644
index 0000000000..5b4bedacaf
--- /dev/null
+++ b/lib/mnesia/doc/src/DATA
@@ -0,0 +1,100 @@
+%S0
+{tables,
+ [{employee, [{attributes, [emp_no,name,salary,sex, phone,room_no]}]},
+ {dept, [{attributes, [id, name]}]},
+ {project, [{attributes, [name, number]}]},
+ {manager, [{attributes, [emp, dept]},
+ {type, bag}]},
+ {at_dep, [{attributes, [emp, dept_id]}]},
+ {in_proj, [{attributes, [emp, proj_name]},
+ {type, bag}]}
+ ]
+}.
+
+%E0
+
+
+%S1
+{employee, 104465, "Johnson Torbjorn", 1, male, 99184, {242,038}}.
+{employee, 107912, "Carlsson Tuula", 2, female, 94556, {242,056}}.
+{employee, 114872, "Dacker Bjarne", 3, male, 99415, {221,035}}.
+{employee, 114849, "Armstrong Josef", 3, male, 99452, {221,023}}.
+{employee, 114952, "Froberg Magnus", 5, male, 99469, {222,018}}.
+{employee, 104531, "Nilsson Hans", 3, male, 99495, {222,026}}.
+{employee, 104659, "Tornkvist Torbjorn", 2, male, 99514, {222,022}}.
+{employee, 104732, "Wikstrom Claes", 2, male, 99586, {221,015}}.
+{employee, 117716, "Fedoriw Anna", 1, female, 99143, {221,031}}.
+{employee, 115020, "Hansson Catrin", 6, female, 99129, {222,072}}.
+{employee, 115018, "Mattsson Hakan", 3, male, 99251, {203,348}}.
+{employee, 113069, "Eriksson Morgan", 6, male, 99186, {241,543}}.
+%E1
+
+%S2
+%% departments
+{dept, 'B/SF', "Open Telecom Platform"}.
+{dept, 'B/SFP', "OTP - Product Development"}.
+{dept, 'B/SFR', "Computer Science Laboratory"}.
+%E2
+
+
+%% projects
+%S3
+{project, erlang, 1}.
+{project, otp, 2}.
+{project, beam, 3}.
+{project, mnesia, 5}.
+{project, wolf, 6}.
+{project, documentation, 7}.
+{project, www, 8}.
+
+%E3
+
+
+
+%% manager
+%S4
+{manager, 104465, 'B/SF'}.
+{manager, 104465, 'B/SFP'}.
+{manager, 114872, 'B/SFR'}.
+
+%E4
+%S5
+{at_dep, 104465, 'B/SF'}.
+{at_dep, 107912, 'B/SF'}.
+{at_dep, 114872, 'B/SFR'}.
+{at_dep, 114849, 'B/SFR'}.
+{at_dep, 114952, 'B/SFR'}.
+{at_dep, 104531, 'B/SFR'}.
+{at_dep, 104659, 'B/SFR'}.
+{at_dep, 104732, 'B/SFR'}.
+{at_dep, 117716, 'B/SFP'}.
+{at_dep, 115020, 'B/SFP'}.
+{at_dep, 115018, 'B/SFP'}.
+{at_dep, 113069, 'B/SFP'}.
+
+
+%E5
+%S6
+{in_proj, 104465, otp}.
+{in_proj, 107912, otp}.
+{in_proj, 114872, otp}.
+{in_proj, 114849, otp}.
+{in_proj, 114849, erlang}.
+{in_proj, 114952, otp}.
+{in_proj, 104531, otp}.
+{in_proj, 104531, mnesia}.
+{in_proj, 104545, wolf}.
+{in_proj, 104659, otp}.
+{in_proj, 104659, wolf}.
+{in_proj, 104732, otp}.
+{in_proj, 104732, mnesia}.
+{in_proj, 104732, erlang}.
+{in_proj, 117716, otp}.
+{in_proj, 117716, documentation}.
+{in_proj, 115020, otp}.
+{in_proj, 115018, otp}.
+{in_proj, 115018, mnesia}.
+{in_proj, 113069, otp}.
+
+%E6
+
diff --git a/lib/mnesia/doc/src/DATA2 b/lib/mnesia/doc/src/DATA2
new file mode 100644
index 0000000000..e547e84d99
--- /dev/null
+++ b/lib/mnesia/doc/src/DATA2
@@ -0,0 +1,17 @@
+
+
+%S0
+{tables
+ [{foo, [{attributes, [x,y,z]}]}]}.
+%E0
+
+
+%S1
+{foo, a, benny, 18}.
+{foo, b, elvis, 19}.
+{foo, c, benny, 20}.
+{foo, d, elvis, 21}.
+{foo, e, klacke, 22}.
+{foo, f, hans, 23}.
+%E1
+
diff --git a/lib/mnesia/doc/src/FRUITS b/lib/mnesia/doc/src/FRUITS
new file mode 100644
index 0000000000..43d64f9d8c
--- /dev/null
+++ b/lib/mnesia/doc/src/FRUITS
@@ -0,0 +1,12 @@
+%0
+{tables,
+ [{fruit, [{attributes, [name, color, taste]}]},
+ {vegetable, [{attributes, [name, color, taste, price]}]}]}.
+
+
+{fruit, orange, orange, sweet}.
+{fruit, apple, green, sweet}.
+{vegetable, carrot, orange, carrotish, 2.55}.
+{vegetable, potato, yellow, none, 0.45}.
+%0
+
diff --git a/lib/mnesia/doc/src/Makefile b/lib/mnesia/doc/src/Makefile
new file mode 100644
index 0000000000..f45b5137a3
--- /dev/null
+++ b/lib/mnesia/doc/src/Makefile
@@ -0,0 +1,240 @@
+#
+# %CopyrightBegin%
+#
+# Copyright Ericsson AB 1997-2009. All Rights Reserved.
+#
+# The contents of this file are subject to the Erlang Public License,
+# Version 1.1, (the "License"); you may not use this file except in
+# compliance with the License. You should have received a copy of the
+# Erlang Public License along with this software. If not, it can be
+# retrieved online at http://www.erlang.org/.
+#
+# Software distributed under the License is distributed on an "AS IS"
+# basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+# the License for the specific language governing rights and limitations
+# under the License.
+#
+# %CopyrightEnd%
+#
+
+#
+include $(ERL_TOP)/make/target.mk
+include $(ERL_TOP)/make/$(TARGET)/otp.mk
+
+# ----------------------------------------------------
+# Application version
+# ----------------------------------------------------
+include ../../vsn.mk
+VSN=$(MNESIA_VSN)
+APPLICATION=mnesia
+
+# ----------------------------------------------------
+# Include dependency
+# ----------------------------------------------------
+
+ifndef DOCSUPPORT
+include make.dep
+endif
+
+# ----------------------------------------------------
+# Release directory specification
+# ----------------------------------------------------
+RELSYSDIR = $(RELEASE_PATH)/lib/$(APPLICATION)-$(VSN)
+
+# ----------------------------------------------------
+# Target Specs
+# ----------------------------------------------------
+XML_APPLICATION_FILES = ref_man.xml
+XML_REF3_FILES = \
+ mnesia.xml \
+ mnesia_frag_hash.xml \
+ mnesia_registry.xml
+
+XML_PART_FILES = \
+ part.xml \
+ part_notes.xml \
+ part_notes_history.xml
+
+XML_CHAPTER_FILES = \
+ Mnesia_chap1.xml \
+ Mnesia_chap2.xml \
+ Mnesia_chap3.xml \
+ Mnesia_chap4.xml \
+ Mnesia_chap5.xml \
+ Mnesia_chap7.xml \
+ Mnesia_chap8.xml \
+ Mnesia_App_A.xml \
+ Mnesia_App_B.xml \
+ Mnesia_App_C.xml \
+ Mnesia_App_D.xml \
+ notes.xml
+
+BOOK_FILES = book.xml
+
+
+XML_FILES = \
+ $(BOOK_FILES) $(XML_CHAPTER_FILES) \
+ $(XML_PART_FILES) $(XML_REF3_FILES) $(XML_APPLICATION_FILES)
+
+GIF_FILES = \
+ book.gif \
+ company.gif \
+ mnesia.gif \
+ note.gif \
+ notes.gif \
+ ref_man.gif \
+ user_guide.gif \
+ warning.gif
+
+XML_HTML_FILES = \
+ notes_history.xml
+
+
+# ----------------------------------------------------
+
+HTML_FILES = $(XML_APPLICATION_FILES:%.xml=$(HTMLDIR)/%.html) \
+ $(XML_HTML_FILES:%.xml=$(HTMLDIR)/%.html) \
+ $(XML_PART_FILES:%.xml=$(HTMLDIR)/%.html)
+
+INFO_FILE = ../../info
+EXTRA_FILES = summary.html.src \
+ $(DEFAULT_GIF_FILES) \
+ $(DEFAULT_HTML_FILES) \
+ $(XML_REF3_FILES:%.xml=$(HTMLDIR)/%.html) \
+ $(XML_CHAPTER_FILES:%.xml=$(HTMLDIR)/%.html)
+
+MAN3_FILES = $(XML_REF3_FILES:%.xml=$(MAN3DIR)/%.3)
+
+ifdef DOCSUPPORT
+
+HTML_REF_MAN_FILE = $(HTMLDIR)/index.html
+
+TOP_PDF_FILE = $(PDFDIR)/$(APPLICATION)-$(VSN).pdf
+
+else
+TEX_FILES_BOOK = \
+ $(BOOK_FILES:%.xml=%.tex)
+TEX_FILES_REF_MAN = $(XML_REF3_FILES:%.xml=%.tex) \
+ $(XML_APPLICATION_FILES:%.xml=%.tex)
+TEX_FILES_USERS_GUIDE = \
+ $(XML_CHAPTER_FILES:%.xml=%.tex)
+
+TOP_PDF_FILE = $(APPLICATION)-$(VSN).pdf
+TOP_PS_FILE = $(APPLICATION)-$(VSN).ps
+
+$(TOP_PDF_FILE): book.dvi ../../vsn.mk
+ $(DVI2PS) $(DVIPS_FLAGS) -f $< | $(DISTILL) $(DISTILL_FLAGS) > $@
+
+$(TOP_PS_FILE): book.dvi ../../vsn.mk
+ $(DVI2PS) $(DVIPS_FLAGS) -f $< > $@
+
+endif
+
+# ----------------------------------------------------
+# FLAGS
+# ----------------------------------------------------
+XML_FLAGS +=
+DVIPS_FLAGS +=
+
+# ----------------------------------------------------
+# Targets
+# ----------------------------------------------------
+$(HTMLDIR)/%.gif: %.gif
+ $(INSTALL_DATA) $< $@
+
+ifdef DOCSUPPORT
+
+docs: pdf html man
+
+$(TOP_PDF_FILE): $(XML_FILES)
+
+pdf: $(TOP_PDF_FILE)
+
+html: gifs $(HTML_REF_MAN_FILE)
+
+clean clean_docs:
+ rm -rf $(HTMLDIR)/*
+ rm -f $(MAN3DIR)/*
+ rm -f $(TOP_PDF_FILE) $(TOP_PDF_FILE:%.pdf=%.fo)
+ rm -f errs core *~
+
+else
+
+ifeq ($(DOCTYPE),pdf)
+docs: pdf
+else
+ifeq ($(DOCTYPE),ps)
+docs: ps
+else
+docs: html gifs man
+endif
+endif
+
+pdf: $(TOP_PDF_FILE)
+
+ps: $(TOP_PS_FILE)
+
+html: $(HTML_FILES)
+
+
+clean clean_docs clean_tex:
+ rm -f $(TEX_FILES_USERS_GUIDE) $(TEX_FILES_REF_MAN) $(TEX_FILES_BOOK)
+ rm -f $(HTML_FILES) $(MAN3_FILES)
+ rm -f $(TOP_PDF_FILE) $(TOP_PS_FILE)
+ rm -f errs core *~ *xmls_output *xmls_errs $(LATEX_CLEAN)
+
+endif
+
+man: $(MAN3_FILES)
+
+gifs: $(GIF_FILES:%=$(HTMLDIR)/%)
+
+$(INDEX_TARGET): $(INDEX_SRC) ../../vsn.mk
+ sed -e 's;%VSN%;$(VSN);' $< > $@
+
+debug opt:
+
+# ----------------------------------------------------
+# Release Target
+# ----------------------------------------------------
+include $(ERL_TOP)/make/otp_release_targets.mk
+
+ifdef DOCSUPPORT
+
+release_docs_spec: docs
+ $(INSTALL_DIR) $(RELSYSDIR)/doc/pdf
+ $(INSTALL_DATA) $(TOP_PDF_FILE) $(RELSYSDIR)/doc/pdf
+ $(INSTALL_DIR) $(RELSYSDIR)/doc/html
+ $(INSTALL_DATA) $(HTMLDIR)/* \
+ $(RELSYSDIR)/doc/html
+ $(INSTALL_DATA) $(INFO_FILE) $(RELSYSDIR)
+ $(INSTALL_DIR) $(RELEASE_PATH)/man/man3
+ $(INSTALL_DATA) $(MAN3_FILES) $(RELEASE_PATH)/man/man3
+
+else
+
+ifeq ($(DOCTYPE),pdf)
+release_docs_spec: pdf
+ $(INSTALL_DIR) $(RELEASE_PATH)/pdf
+ $(INSTALL_DATA) $(TOP_PDF_FILE) $(RELEASE_PATH)/pdf
+else
+ifeq ($(DOCTYPE),ps)
+release_docs_spec: ps
+ $(INSTALL_DIR) $(RELEASE_PATH)/ps
+ $(INSTALL_DATA) $(TOP_PS_FILE) $(RELEASE_PATH)/ps
+else
+release_docs_spec: docs
+ $(INSTALL_DIR) $(RELSYSDIR)/doc/html
+ $(INSTALL_DATA) $(GIF_FILES) $(EXTRA_FILES) $(HTML_FILES) \
+ $(RELSYSDIR)/doc/html
+ $(INSTALL_DATA) $(INFO_FILE) $(RELSYSDIR)
+ $(INSTALL_DIR) $(RELEASE_PATH)/man/man3
+ $(INSTALL_DATA) $(MAN3_FILES) $(RELEASE_PATH)/man/man3
+endif
+endif
+
+endif
+
+
+release_spec:
+
diff --git a/lib/mnesia/doc/src/Mnesia_App_A.xml b/lib/mnesia/doc/src/Mnesia_App_A.xml
new file mode 100644
index 0000000000..86e5b7d03c
--- /dev/null
+++ b/lib/mnesia/doc/src/Mnesia_App_A.xml
@@ -0,0 +1,87 @@
+<?xml version="1.0" encoding="latin1" ?>
+<!DOCTYPE chapter SYSTEM "chapter.dtd">
+
+<chapter>
+ <header>
+ <copyright>
+ <year>1997</year><year>2009</year>
+ <holder>Ericsson AB. All Rights Reserved.</holder>
+ </copyright>
+ <legalnotice>
+ The contents of this file are subject to the Erlang Public License,
+ Version 1.1, (the "License"); you may not use this file except in
+ compliance with the License. You should have received a copy of the
+ Erlang Public License along with this software. If not, it can be
+ retrieved online at http://www.erlang.org/.
+
+ Software distributed under the License is distributed on an "AS IS"
+ basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+ the License for the specific language governing rights and limitations
+ under the License.
+
+ </legalnotice>
+
+ <title>Appendix A: Mnesia Error Messages</title>
+ <prepared>Claes Wikstr&ouml;m, Hans Nilsson and H&aring;kan Mattsson</prepared>
+ <responsible>Bjarne D&auml;cker</responsible>
+ <docno></docno>
+ <approved>Bjarne D&auml;cker</approved>
+ <checked>Bjarne D&auml;cker</checked>
+ <date>96-11-20</date>
+ <rev>B</rev>
+ <file>Mnesia_App_A.xml</file>
+ </header>
+ <p>Whenever an operation returns an error in Mnesia, a description
+ of the error is available. For example, the functions
+ <c>mnesia:transaction(Fun)</c>, or <c>mnesia:create_table(N,L)</c>
+ may return the tuple <c>{aborted, Reason}</c>, where <c>Reason</c>
+ is a term describing the error. The following function is used to
+ retrieve more detailed information about the error:
+ </p>
+ <list type="bulleted">
+ <item><c>mnesia:error_description(Error)</c></item>
+ </list>
+
+ <section>
+ <title>Errors in Mnesia</title>
+ <p>The following is a list of valid errors in Mnesia.</p>
+ <list type="bulleted">
+ <item><c>badarg</c>. Bad or invalid argument, possibly bad type.
+ </item>
+ <item><c>no_transaction</c>. Operation not allowed outside transactions.
+ </item>
+ <item><c>combine_error</c>. Table options were illegally combined.
+ </item>
+ <item><c>bad_index</c>. Index already exists, or was out of bounds.
+ </item>
+ <item><c>already_exists</c>. Schema option to be activated is already on.
+ </item>
+ <item><c>index_exists</c>. Some operations cannot be performed on tables with an index.
+ </item>
+ <item><c>no_exists</c>.; Tried to perform operation on non-existing (non-alive) item.
+ </item>
+ <item><c>system_limit</c>.; A system limit was exhausted.
+ </item>
+ <item><c>mnesia_down</c>. A transaction involves records on a
+ remote node which became unavailable before the transaction
+ was completed. Record(s) are no longer available elsewhere in
+ the network.</item>
+ <item><c>not_a_db_node</c>. A node was mentioned which does not exist in the schema.</item>
+ <item><c>bad_type</c>.; Bad type specified in argument.</item>
+ <item><c>node_not_running</c>. Node is not running.</item>
+ <item><c>truncated_binary_file</c>. Truncated binary in file.</item>
+ <item><c>active</c>. Some delete operations require that all active records are removed.</item>
+ <item><c>illegal</c>. Operation not supported on this record.</item>
+ </list>
+ <p>The following example illustrates a function which returns an error, and the method to retrieve more detailed error information.
+ </p>
+ <p>The function <c>mnesia:create_table(bar, [{attributes, 3.14}])</c> will return the tuple <c>{aborted,Reason}</c>, where <c>Reason</c> is the tuple
+ <c>{bad_type,bar,3.14000}</c>.
+ </p>
+ <p>The function <c>mnesia:error_description(Reason)</c>, returns the term
+ <c>{"Bad type on some provided arguments",bar,3.14000}</c> which is an error
+ description suitable
+ for display.</p>
+ </section>
+</chapter>
+
diff --git a/lib/mnesia/doc/src/Mnesia_App_B.xmlsrc b/lib/mnesia/doc/src/Mnesia_App_B.xmlsrc
new file mode 100644
index 0000000000..52f5e06d83
--- /dev/null
+++ b/lib/mnesia/doc/src/Mnesia_App_B.xmlsrc
@@ -0,0 +1,41 @@
+<?xml version="1.0" encoding="latin1" ?>
+<!DOCTYPE chapter SYSTEM "chapter.dtd">
+
+<chapter>
+ <header>
+ <copyright>
+ <year>1997</year><year>2009</year>
+ <holder>Ericsson AB. All Rights Reserved.</holder>
+ </copyright>
+ <legalnotice>
+ The contents of this file are subject to the Erlang Public License,
+ Version 1.1, (the "License"); you may not use this file except in
+ compliance with the License. You should have received a copy of the
+ Erlang Public License along with this software. If not, it can be
+ retrieved online at http://www.erlang.org/.
+
+ Software distributed under the License is distributed on an "AS IS"
+ basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+ the License for the specific language governing rights and limitations
+ under the License.
+
+ </legalnotice>
+
+ <title>Appendix B: The Backup Call Back Interface</title>
+ <prepared>Claes Wikstr&ouml;m, Hans Nilsson and H&aring;kan Mattsson</prepared>
+ <responsible>Bjarne D&auml;cker</responsible>
+ <docno></docno>
+ <approved>Bjarne D&auml;cker</approved>
+ <checked>Bjarne D&auml;cker</checked>
+ <date>97-05-27</date>
+ <rev>C</rev>
+ <file>Mnesia_App_B.xml</file>
+ </header>
+
+ <section>
+ <title>mnesia_backup callback behavior</title>
+ <p></p>
+ <codeinclude file="../../src/mnesia_backup.erl" tag="%0" type="erl"></codeinclude>
+ </section>
+</chapter>
+
diff --git a/lib/mnesia/doc/src/Mnesia_App_C.xmlsrc b/lib/mnesia/doc/src/Mnesia_App_C.xmlsrc
new file mode 100644
index 0000000000..d8916f25cb
--- /dev/null
+++ b/lib/mnesia/doc/src/Mnesia_App_C.xmlsrc
@@ -0,0 +1,43 @@
+<?xml version="1.0" encoding="latin1" ?>
+<!DOCTYPE chapter SYSTEM "chapter.dtd">
+
+<chapter>
+ <header>
+ <copyright>
+ <year>1998</year><year>2009</year>
+ <holder>Ericsson AB. All Rights Reserved.</holder>
+ </copyright>
+ <legalnotice>
+ The contents of this file are subject to the Erlang Public License,
+ Version 1.1, (the "License"); you may not use this file except in
+ compliance with the License. You should have received a copy of the
+ Erlang Public License along with this software. If not, it can be
+ retrieved online at http://www.erlang.org/.
+
+ Software distributed under the License is distributed on an "AS IS"
+ basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+ the License for the specific language governing rights and limitations
+ under the License.
+
+ </legalnotice>
+
+ <title>Appendix C: The Activity Access Call Back Interface</title>
+ <prepared>H&aring;kan Mattsson</prepared>
+ <responsible></responsible>
+ <docno></docno>
+ <approved></approved>
+ <checked></checked>
+ <date></date>
+ <rev></rev>
+ <file>Mnesia_App_C.xml</file>
+ </header>
+
+ <section>
+ <title>mnesia_access callback behavior</title>
+ <p></p>
+ <codeinclude file="../../src/mnesia_frag.erl" tag="%header_doc_include" type="erl"></codeinclude>
+ <p></p>
+ <codeinclude file="../../src/mnesia_frag.erl" tag="%impl_doc_include" type="erl"></codeinclude>
+ </section>
+</chapter>
+
diff --git a/lib/mnesia/doc/src/Mnesia_App_D.xmlsrc b/lib/mnesia/doc/src/Mnesia_App_D.xmlsrc
new file mode 100644
index 0000000000..d98680640d
--- /dev/null
+++ b/lib/mnesia/doc/src/Mnesia_App_D.xmlsrc
@@ -0,0 +1,43 @@
+<?xml version="1.0" encoding="latin1" ?>
+<!DOCTYPE chapter SYSTEM "chapter.dtd">
+
+<chapter>
+ <header>
+ <copyright>
+ <year>2002</year><year>2009</year>
+ <holder>Ericsson AB. All Rights Reserved.</holder>
+ </copyright>
+ <legalnotice>
+ The contents of this file are subject to the Erlang Public License,
+ Version 1.1, (the "License"); you may not use this file except in
+ compliance with the License. You should have received a copy of the
+ Erlang Public License along with this software. If not, it can be
+ retrieved online at http://www.erlang.org/.
+
+ Software distributed under the License is distributed on an "AS IS"
+ basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+ the License for the specific language governing rights and limitations
+ under the License.
+
+ </legalnotice>
+
+ <title>Appendix D: The Fragmented Table Hashing Call Back Interface</title>
+ <prepared>H&aring;kan Mattsson</prepared>
+ <responsible></responsible>
+ <docno></docno>
+ <approved></approved>
+ <checked></checked>
+ <date></date>
+ <rev></rev>
+ <file>Mnesia_App_D.xml</file>
+ </header>
+
+ <section>
+ <title>mnesia_frag_hash callback behavior</title>
+ <p></p>
+ <codeinclude file="../../src/mnesia_frag_hash.erl" tag="%header_doc_include" type="erl"></codeinclude>
+ <p></p>
+ <codeinclude file="../../src/mnesia_frag_hash.erl" tag="%impl_doc_include" type="erl"></codeinclude>
+ </section>
+</chapter>
+
diff --git a/lib/mnesia/doc/src/Mnesia_chap1.xml b/lib/mnesia/doc/src/Mnesia_chap1.xml
new file mode 100644
index 0000000000..9af81c85cb
--- /dev/null
+++ b/lib/mnesia/doc/src/Mnesia_chap1.xml
@@ -0,0 +1,265 @@
+<?xml version="1.0" encoding="latin1" ?>
+<!DOCTYPE chapter SYSTEM "chapter.dtd">
+
+<chapter>
+ <header>
+ <copyright>
+ <year>1997</year><year>2009</year>
+ <holder>Ericsson AB. All Rights Reserved.</holder>
+ </copyright>
+ <legalnotice>
+ The contents of this file are subject to the Erlang Public License,
+ Version 1.1, (the "License"); you may not use this file except in
+ compliance with the License. You should have received a copy of the
+ Erlang Public License along with this software. If not, it can be
+ retrieved online at http://www.erlang.org/.
+
+ Software distributed under the License is distributed on an "AS IS"
+ basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+ the License for the specific language governing rights and limitations
+ under the License.
+
+ </legalnotice>
+
+ <title>Introduction</title>
+ <prepared>Claes Wikstr&ouml;m, Hans Nilsson and H&aring;kan Mattsson</prepared>
+ <responsible>Bjarne D&auml;cker</responsible>
+ <docno></docno>
+ <approved>Bjarne D&auml;cker</approved>
+ <checked>Bjarne D&auml;cker</checked>
+ <date></date>
+ <rev>C</rev>
+ <file>Mnesia_chap1.xml</file>
+ </header>
+ <p>This book describes the Mnesia DataBase Management
+ System (DBMS). <em>Mnesia</em> is a distributed Database Management
+ System, appropriate for telecommunications applications and other
+ Erlang applications which require continuous operation and soft
+ real-time properties. It is one section of the Open Telecom Platform
+ (OTP), which is a control system platform for building
+ telecommunications applications.</p>
+
+ <section>
+ <title>About Mnesia</title>
+ <p>The management of data in telecommunications system has many
+ aspects whereof some, but not all, are addressed by traditional
+ commercial DBMSs (Data Base Management Systems). In particular the
+ very high level of fault tolerance which is required in many nonstop
+ systems, combined with requirements on the DBMS to run in the same
+ address space as the application, have led us to implement a brand new
+ DBMS. called Mnesia. Mnesia is implemented in, and very tightly
+ connected to, the programming language Erlang and it provides the
+ functionality that is necessary for the implementation of fault
+ tolerant telecommunications systems. Mnesia is a multiuser Distributed
+ DBMS specially made for industrial telecommunications applications
+ written in the symbolic programming language Erlang, which is also
+ the intended target language. Mnesia tries to address all of the data
+ management issues required for typical telecommunications systems and
+ it has a number of features that are not normally found in traditional
+ databases. <br></br>
+
+ In telecommunications applications there are different needs
+ from the features provided by traditional DBMSs. The applications now
+ implemented in the Erlang language need a mixture of a broad range
+ of features, which generally are not satisfied by traditional DBMSs.
+ Mnesia is designed with requirements like the following in mind:</p>
+ <list type="ordered">
+ <item>Fast real-time key/value lookup</item>
+ <item>Complicated non real-time queries mainly for
+ operation and maintenance</item>
+ <item>Distributed data due to distributed
+ applications</item>
+ <item>High fault tolerance</item>
+ <item>Dynamic re-configuration</item>
+ <item>Complex objects</item>
+ </list>
+ <p>What
+ sets Mnesia apart from most other DBMSs is that it is designed with
+ the typical data management problems of telecommunications applications
+ in mind. Hence Mnesia combines many concepts found in traditional
+ databases, such as transactions and queries with concepts found in data
+ management systems for telecommunications applications, such as very
+ fast real-time operations, configurable degree of fault tolerance (by
+ means of replication) and the ability to reconfigure the system without
+ stopping or suspending it. Mnesia is also interesting due to its tight
+ coupling to the programming language Erlang, thus almost turning Erlang
+ into a database programming language. This has many benefits, the
+ foremost is that
+ the impedance mismatch between data format used by the
+ DBMS and data format used by the programming language, which is used
+ to manipulate the data, completely disappears. <br></br>
+</p>
+ </section>
+
+ <section>
+ <title>The Mnesia DataBase Management System (DBMS)</title>
+ <p></p>
+
+ <section>
+ <title>Features</title>
+ <p>Mnesia contains the following features which combine to produce a fault-tolerant,
+ distributed database management system written in Erlang:
+ </p>
+ <list type="bulleted">
+ <item>Database schema can be dynamically reconfigured at runtime.
+ </item>
+ <item>Tables can be declared to have properties such as location,
+ replication, and persistence.
+ </item>
+ <item>Tables can be moved or replicated to several nodes to improve
+ fault tolerance. The rest of the system can still access the tables
+ to read, write, and delete records.
+ </item>
+ <item>Table locations are transparent to the programmer.
+ Programs address table names and the system itself keeps track of
+ table locations.
+ </item>
+ <item>Database transactions can be distributed, and a large number of
+ functions can be called within one transaction.
+ </item>
+ <item>Several transactions can run concurrently, and their execution is
+ fully synchronized by the database management system.
+ Mnesia ensures that no two processes manipulate data simultaneously.
+ </item>
+ <item>Transactions can be assigned the property of being executed on
+ all nodes in the system, or on none. Transactions can also be bypassed
+ in favor of running so called "dirty operations", which reduce
+ overheads and run very fast.
+ </item>
+ </list>
+ <p>Details of these features are described in the following sections.</p>
+ </section>
+ <p></p>
+
+ <section>
+ <title>Add-on Applications</title>
+ <p>QLC and Mnesia Session can be used in conjunction with Mnesia to produce
+ specialized functions which enhance the operational ability of Mnesia.
+ Both Mnesia Session and QLC have their own documentation as part
+ of the OTP documentation set. Below are the main features of Mnesia Session
+ and QLC when used in conjunction with Mnesia:</p>
+ <list type="bulleted">
+ <item><em>QLC</em> has the ability to optimize the query
+ compiler for the Mnesia Database Management System, essentially making
+ the DBMS more efficient.</item>
+ <item><em>QLC</em>, can be used as a database programming
+ language for Mnesia. It includes a notation called "list
+ comprehensions" and can be used to make complex database
+ queries over a set of tables.</item>
+ <item><em>Mnesia Session</em> is an interface for the Mnesia Database
+ Management System</item>
+ <item><em>Mnesia Session</em> enables access to the
+ Mnesia DBMS from foreign programming languages (i.e. other
+ languages than Erlang).</item>
+ </list>
+ <p></p>
+
+ <section>
+ <title>When to Use Mnesia</title>
+ <p>Use Mnesia with the following types of applications:
+ </p>
+ <list type="bulleted">
+ <item>Applications that need to replicate data.
+ </item>
+ <item>Applications that perform complicated searches on data.
+ </item>
+ <item>Applications that need to use atomic transactions to
+ update several records simultaneously.
+ </item>
+ <item>Applications that use soft real-time characteristics.
+ </item>
+ </list>
+ <p>On the other hand, Mnesia may not be appropriate with the
+ following types of applications:
+ </p>
+ <list type="bulleted">
+ <item>Programs that process plain text or binary data files
+ </item>
+ <item>Applications that merely need a look-up dictionary
+ which can be stored to disc can utilize the standard
+ library module <c>dets</c>, which is a disc based version
+ of the module <c>ets</c>.
+ </item>
+ <item>Applications which need disc logging facilities can
+ utilize the module <c>disc_log</c> by preference.
+ </item>
+ <item>Not suitable for hard real time systems.
+ </item>
+ </list>
+ </section>
+ </section>
+
+ <section>
+ <title>Scope and Purpose</title>
+ <p>This manual is included in the OTP document set. It describes
+ how to build Mnesia database applications, and how to integrate
+ and utilize the Mnesia database management system with
+ OTP. Programming constructs are described, and numerous
+ programming examples are included to illustrate the use of
+ Mnesia.
+ </p>
+ </section>
+
+ <section>
+ <title>Prerequisites</title>
+ <p>Readers of this manual are assumed to be familiar with system
+ development principles and database management systems. Readers
+ are also assumed to be familiar with the Erlang programming
+ language.</p>
+ </section>
+
+ <section>
+ <title>About This Book</title>
+ <p>This book contains the following chapters:
+ </p>
+ <list type="bulleted">
+ <item>Chapter 2, "Getting Started with Mnesia", introduces
+ Mnesia with an example database. Examples are shown of how to
+ start an Erlang session, specify a Mnesia database directory,
+ initialize a database schema, start Mnesia, and create
+ tables. Initial prototyping of record definitions is also
+ discussed.
+ </item>
+ <item>Chapter 3, "Building a Mnesia Database", more formally
+ describes the steps introduced in Chapter 2, namely the Mnesia
+ functions which define a database schema, start Mnesia, and
+ create the required tables.
+ </item>
+ <item>Chapter 4, "Transactions and other access contexts",
+ describes the transactions properties which make Mnesia into a
+ fault tolerant, real-time distributed database management
+ system. This chapter also describes the concept of locking in
+ order to ensure consistency in tables, and so called "dirty
+ operations", or short cuts which bypass the transaction system
+ to improve speed and reduce overheads.
+ </item>
+ <item>Chapter 5, "Miscellaneous Mnesia Features", describes
+ features which enable the construction of more complex
+ database applications. These features includes indexing,
+ checkpoints, distribution and fault tolerance, disc-less
+ nodes, replication manipulation, local content tables, concurrency,
+ and object based programming in Mnesia.
+ </item>
+ <item>Chapter 6, "Mnesia System Information", describes the
+ files contained in the Mnesia database directory, database
+ configuration data, core and table dumps, as well as the
+ important subject of backup, fall-back, and disaster recovery
+ principles.
+ </item>
+ <item>Chapter 7, "Combining Mnesia with SNMP", is a short
+ chapter which outlines Mnesia integrated with SNMP.
+ </item>
+ <item>Appendix A, "Mnesia Errors Messages", lists Mnesia error
+ messages and their meanings.
+ </item>
+ <item>Appendix B, "The Backup Call Back Interface", is a
+ program listing of the default implementation of this facility.
+ </item>
+ <item>Appendix C, "The Activity Access Call Back Interface",
+ is a program outlining of one possible implementations of this facility.
+ </item>
+ </list>
+ </section>
+ </section>
+</chapter>
+
diff --git a/lib/mnesia/doc/src/Mnesia_chap2.xmlsrc b/lib/mnesia/doc/src/Mnesia_chap2.xmlsrc
new file mode 100644
index 0000000000..0714c7b645
--- /dev/null
+++ b/lib/mnesia/doc/src/Mnesia_chap2.xmlsrc
@@ -0,0 +1,647 @@
+<?xml version="1.0" encoding="latin1" ?>
+<!DOCTYPE chapter SYSTEM "chapter.dtd">
+
+<chapter>
+ <header>
+ <copyright>
+ <year>1997</year><year>2009</year>
+ <holder>Ericsson AB. All Rights Reserved.</holder>
+ </copyright>
+ <legalnotice>
+ The contents of this file are subject to the Erlang Public License,
+ Version 1.1, (the "License"); you may not use this file except in
+ compliance with the License. You should have received a copy of the
+ Erlang Public License along with this software. If not, it can be
+ retrieved online at http://www.erlang.org/.
+
+ Software distributed under the License is distributed on an "AS IS"
+ basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+ the License for the specific language governing rights and limitations
+ under the License.
+
+ </legalnotice>
+
+ <title>Getting Started with Mnesia</title>
+ <prepared>Claes Wikstr&ouml;m, Hans Nilsson and H&aring;kan Mattsson</prepared>
+ <responsible></responsible>
+ <docno></docno>
+ <approved></approved>
+ <checked></checked>
+ <date></date>
+ <rev>C</rev>
+ <file>Mnesia_chap2.xml</file>
+ </header>
+ <p>This chapter introduces Mnesia. Following a brief discussion
+ about the first initial setup, a Mnesia database example is
+ demonstrated. This database example will be referenced in the
+ following chapters, where this example is modified in order to
+ illustrate various program constructs. In this chapter, the
+ following mandatory procedures are illustrated by examples:
+ </p>
+ <list type="bulleted">
+ <item>Starting an Erlang session and specifying a directory for the
+ Mnesia database.
+ </item>
+ <item>Initializing a database schema.
+ </item>
+ <item>Starting Mnesia and creating the required tables.</item>
+ </list>
+
+ <section>
+ <title>Starting Mnesia for the first time</title>
+ <p>Following is a simplified demonstration of a Mnesia system startup. This is the dialogue from the Erlang
+ shell:
+ </p>
+ <pre><![CDATA[
+ unix> erl -mnesia dir '"/tmp/funky"'
+ Erlang (BEAM) emulator version 4.9
+
+ Eshell V4.9 (abort with ^G)
+ 1>
+ 1> mnesia:create_schema([node()]).
+ ok
+ 2> mnesia:start().
+ ok
+ 3> mnesia:create_table(funky, []).
+ {atomic,ok}
+ 4> mnesia:info().
+ ---> Processes holding locks <---
+ ---> Processes waiting for locks <---
+ ---> Pending (remote) transactions <---
+ ---> Active (local) transactions <---
+ ---> Uncertain transactions <---
+ ---> Active tables <---
+ funky : with 0 records occupying 269 words of mem
+ schema : with 2 records occupying 353 words of mem
+ ===> System info in version "1.0", debug level = none <===
+ opt_disc. Directory "/tmp/funky" is used.
+ use fall-back at restart = false
+ running db nodes = [nonode@nohost]
+ stopped db nodes = []
+ remote = []
+ ram_copies = [funky]
+ disc_copies = [schema]
+ disc_only_copies = []
+ [{nonode@nohost,disc_copies}] = [schema]
+ [{nonode@nohost,ram_copies}] = [funky]
+ 1 transactions committed, 0 aborted, 0 restarted, 1 logged to disc
+ 0 held locks, 0 in queue; 0 local transactions, 0 remote
+ 0 transactions waits for other nodes: []
+ ok
+ ]]></pre>
+ <p>In the example above the following actions were performed:
+ </p>
+ <list type="bulleted">
+ <item>The Erlang system was started from the UNIX prompt
+ with a flag <c>-mnesia dir '"/tmp/funky"'</c>. This flag indicates
+ to Mnesia which directory will store the data.
+ </item>
+ <item>A new empty schema was initialized on the local node by evaluating
+ <c>mnesia:create_schema([node()]).</c> The schema contains
+ information about the database in general. This will be
+ thoroughly explained later on.
+ </item>
+ <item>The DBMS was started by evaluating <c>mnesia:start()</c>.
+ </item>
+ <item>A first table was created, called <c>funky</c> by evaluating
+ the expression <c>mnesia:create_table(funky, [])</c>. The table
+ was given default properties.
+ </item>
+ <item><c>mnesia:info()</c> was evaluated and subsequently displayed
+ information regarding the status of the database on the terminal.
+ </item>
+ </list>
+ </section>
+
+ <section>
+ <title>An Introductory Example</title>
+ <p>A Mnesia database is organized as a set of tables.
+ Each table is populated with instances (Erlang records).
+ A table also has a number of properties, such as location and
+ persistence.
+ </p>
+ <p>In this example we shall:
+ </p>
+ <list type="bulleted">
+ <item>Start an Erlang system, and specify the directory where
+ the database will be located.
+ </item>
+ <item>Initiate a new schema with an attribute that specifies
+ on which node, or nodes, the database will operate.
+ </item>
+ <item>Start Mnesia itself.
+ </item>
+ <item>Create and populate the database tables.
+ </item>
+ </list>
+
+ <section>
+ <title>The Example Database</title>
+ </section>
+ <p>In this database example, we will create the database and
+ relationships depicted in the following diagram. We will call this
+ database the <em>Company</em> database.
+ </p>
+ <image file="company.gif">
+ <icaption>Company Entity-Relation Diagram</icaption>
+ </image>
+ <p>The database model looks as follows:
+ </p>
+ <list type="bulleted">
+ <item>There are three entities: employee, project, and
+ department.
+ </item>
+ <item>
+ <p>There are three relationships between these entities:</p>
+ <list type="ordered">
+ <item>A department is managed by an employee, hence the
+ <em>manager</em> relationship.
+ </item>
+ <item>An employee works at a department, hence the
+ <em>at_dep</em> relationship.
+ </item>
+ <item>Each employee works on a number of projects, hence
+ the <em>in_proj</em> relationship.
+ </item>
+ </list>
+ </item>
+ </list>
+
+ <section>
+ <title>Defining Structure and Content</title>
+ <p>We first enter our record definitions into a text file
+ named <c>company.hrl</c>. This file defines the following
+ structure for our sample database:
+ </p>
+ <codeinclude file="company.hrl" tag="%0" type="erl"></codeinclude>
+ <p>The structure defines six tables in our database. In Mnesia,
+ the function <c>mnesia:create_table(Name, ArgList)</c> is
+ used to create tables. <c>Name</c> is the table
+ name <em>Note:</em> The current version of Mnesia does
+ not require that the name of the table is the same as the record
+ name, See Chapter 4:
+ <seealso marker="Mnesia_chap4#recordnames_tablenames">Record Names Versus Table Names.</seealso></p>
+ <p>For example, the table
+ for employees will be created with the function
+ <c>mnesia:create_table(employee, [{attributes, record_info(fields, employee)}]).</c> The table
+ name <c>employee</c> matches the name for records specified
+ in <c>ArgList</c>. The expression <c>record_info(fields, RecordName)</c> is processed by the Erlang preprocessor and
+ evaluates to a list containing the names of the different
+ fields for a record.
+ </p>
+ </section>
+
+ <section>
+ <title>The Program</title>
+ <p>The following shell interaction starts Mnesia and
+ initializes the schema for our <c>company</c> database:
+ </p>
+ <pre>
+
+ % <input>erl -mnesia dir '"/ldisc/scratch/Mnesia.Company"'</input>
+ Erlang (BEAM) emulator version 4.9
+
+ Eshell V4.9 (abort with ^G)
+ 1> mnesia:create_schema([node()]).
+ ok
+ 2> mnesia:start().
+ ok
+ </pre>
+ <p>The following program module creates and populates previously defined tables:
+ </p>
+ <codeinclude file="company.erl" tag="%0" type="erl"></codeinclude>
+ </section>
+
+ <section>
+ <title>The Program Explained</title>
+ <p>The following commands and functions were used to initiate the
+ Company database:
+ </p>
+ <list type="bulleted">
+ <item><c>% erl -mnesia dir '"/ldisc/scratch/Mnesia.Company"'.</c> This is a UNIX
+ command line entry which starts the Erlang system. The flag
+ <c>-mnesia dir Dir</c> specifies the location of the
+ database directory. The system responds and waits for
+ further input with the prompt <em>1></em>.
+ </item>
+ <item><c>mnesia:create_schema([node()]).</c> This function
+ has the format <c>mnesia:create_schema(DiscNodeList)</c> and
+ initiates a new schema. In this example, we have created a
+ non-distributed system using only one node. Schemas are fully
+ explained in Chapter 3:<seealso marker="Mnesia_chap3#def_schema">Defining a Schema</seealso>.
+ </item>
+ <item><c>mnesia:start().</c> This function starts
+ Mnesia. This function is fully explained in Chapter 3:
+ <seealso marker="Mnesia_chap3#start_mnesia">Starting Mnesia</seealso>.
+ </item>
+ </list>
+ <p>Continuing the dialogue with the Erlang shell will produce the following
+ the following:
+ </p>
+ <pre><![CDATA[
+ 3> company:init().
+ {atomic,ok}
+ 4> mnesia:info().
+ ---> Processes holding locks <---
+ ---> Processes waiting for locks <---
+ ---> Pending (remote) transactions <---
+ ---> Active (local) transactions <---
+ ---> Uncertain transactions <---
+ ---> Active tables <---
+ in_proj : with 0 records occuping 269 words of mem
+ at_dep : with 0 records occuping 269 words of mem
+ manager : with 0 records occuping 269 words of mem
+ project : with 0 records occuping 269 words of mem
+ dept : with 0 records occuping 269 words of mem
+ employee : with 0 records occuping 269 words of mem
+ schema : with 7 records occuping 571 words of mem
+ ===> System info in version "1.0", debug level = none <===
+ opt_disc. Directory "/ldisc/scratch/Mnesia.Company" is used.
+ use fall-back at restart = false
+ running db nodes = [nonode@nohost]
+ stopped db nodes = []
+ remote = []
+ ram_copies =
+ [at_dep,dept,employee,in_proj,manager,project]
+ disc_copies = [schema]
+ disc_only_copies = []
+ [{nonode@nohost,disc_copies}] = [schema]
+ [{nonode@nohost,ram_copies}] =
+ [employee,dept,project,manager,at_dep,in_proj]
+ 6 transactions committed, 0 aborted, 0 restarted, 6 logged to disc
+ 0 held locks, 0 in queue; 0 local transactions, 0 remote
+ 0 transactions waits for other nodes: []
+ ok
+ ]]></pre>
+ <p>A set of tables is created:
+ </p>
+ <list type="bulleted">
+ <item><c>mnesia:create_table(Name,ArgList)</c>. This
+ function is used to create the required database tables. The
+ options available with <c>ArgList</c> are explained in
+ Chapter 3: <seealso marker="Mnesia_chap3#create_tables">Creating New Tables</seealso>. </item>
+ </list>
+ <p>The <c>company:init/0</c> function creates our tables. Two tables are
+ of type <c>bag</c>. This is the <c>manager</c> relation as well
+ the <c>in_proj</c> relation. This shall be interpreted as: An
+ employee can be manager over several departments, and an employee
+ can participate in several projects. However, the <c>at_dep</c>
+ relation is <c>set</c> because an employee can only work in one department.
+ In this data model we have examples of relations that are one-to-one (<c>set</c>),
+ as well as one-to-many (<c>bag</c>).
+ </p>
+ <p><c>mnesia:info()</c> now indicates that a database
+ which has seven local tables, of which, six are our
+ user defined tables and one is the schema.
+ Six transactions have been committed, as six successful transactions were run when
+ creating the tables.
+ </p>
+ <p>To write a function which inserts an employee record into the database, there must be an
+ <c>at_dep</c> record and a set of <c>in_proj</c> records inserted. Examine the following
+ code used to complete this action:
+ </p>
+ <codeinclude file="company.erl" tag="%1" type="erl"></codeinclude>
+ <list type="bulleted">
+ <item>
+ <p><c>insert_emp(Emp, DeptId, ProjNames) -></c>. The
+ <c>insert_emp/3</c> arguments are:</p>
+ <list type="ordered">
+ <item><c>Emp</c> is an employee record.
+ </item>
+ <item><c>DeptId</c> is the identity of the department where the employee is working.
+ </item>
+ <item><c>ProjNames</c> is a list of the names of the projects where the employee are working.</item>
+ </list>
+ </item>
+ </list>
+ <p>The <c>insert_emp(Emp, DeptId, ProjNames) -></c> function
+ creates a <em>functional object</em>. Functional objects
+ are identified by the term <c>Fun</c>. The Fun is passed
+ as a single argument to the function
+ <c>mnesia:transaction(Fun)</c>. This means that Fun is
+ run as a transaction with the following properties:
+ </p>
+ <list type="bulleted">
+ <item>Fun either succeeds or fails completely.
+ </item>
+ <item>Code which manipulates the same data records can be
+ run concurrently without the different processes interfering
+ with each other.
+ </item>
+ </list>
+ <p>The function can be used as:</p>
+ <code type="none">
+ Emp = #employee{emp_no= 104732,
+ name = klacke,
+ salary = 7,
+ sex = male,
+ phone = 98108,
+ room_no = {221, 015}},
+ insert_emp(Me, 'B/SFR', [Erlang, mnesia, otp]).
+ </code>
+ <note>
+ <p>Functional Objects (Funs) are described in the
+ Erlang Reference Manual, "Fun Expressions".
+ </p>
+ </note>
+ </section>
+
+ <section>
+ <title>Initial Database Content</title>
+ <p>After the insertion of the employee named <c>klacke</c>
+ we have the following records in the database:
+ </p>
+ <marker id="table2_1"></marker>
+ <table>
+ <row>
+ <cell align="left" valign="middle">emp_no</cell>
+ <cell align="left" valign="middle">name</cell>
+ <cell align="left" valign="middle">salary</cell>
+ <cell align="left" valign="middle">sex</cell>
+ <cell align="left" valign="middle">phone</cell>
+ <cell align="left" valign="middle">room_no</cell>
+ </row>
+ <row>
+ <cell align="left" valign="middle">104732</cell>
+ <cell align="left" valign="middle">klacke</cell>
+ <cell align="left" valign="middle">7</cell>
+ <cell align="left" valign="middle">male</cell>
+ <cell align="left" valign="middle">99586</cell>
+ <cell align="left" valign="middle">{221, 015}</cell>
+ </row>
+ <tcaption>
+Employee</tcaption>
+ </table>
+ <p>An employee record has the following Erlang record/tuple
+ representation: <c>{employee, 104732, klacke, 7, male, 98108, {221, 015}}</c></p>
+ <marker id="table2_2"></marker>
+ <table>
+ <row>
+ <cell align="left" valign="middle">emp</cell>
+ <cell align="left" valign="middle">dept_name</cell>
+ </row>
+ <row>
+ <cell align="left" valign="middle">klacke</cell>
+ <cell align="left" valign="middle">B/SFR</cell>
+ </row>
+ <tcaption>
+At_dep</tcaption>
+ </table>
+ <p>At_dep has the following Erlang tuple representation:
+ <c>{at_dep, klacke, 'B/SFR'}</c>.
+ </p>
+ <marker id="table3_3"></marker>
+ <table>
+ <row>
+ <cell align="left" valign="middle">emp</cell>
+ <cell align="left" valign="middle">proj_name</cell>
+ </row>
+ <row>
+ <cell align="left" valign="middle">klacke</cell>
+ <cell align="left" valign="middle">Erlang</cell>
+ </row>
+ <row>
+ <cell align="left" valign="middle">klacke</cell>
+ <cell align="left" valign="middle">otp</cell>
+ </row>
+ <row>
+ <cell align="left" valign="middle">klacke</cell>
+ <cell align="left" valign="middle">mnesia</cell>
+ </row>
+ <tcaption>
+In_proj</tcaption>
+ </table>
+ <p>In_proj has the following Erlang tuple representation:
+ <c>{in_proj, klacke, 'Erlang', klacke, 'otp', klacke, 'mnesia'}</c></p>
+ <p>There is no difference between rows in a table and Mnesia
+ records. Both concepts are the same and will be used
+ interchangeably throughout this book.
+ </p>
+ <p>A Mnesia table is populated by Mnesia records. For example,
+ the tuple <c>{boss, klacke, bjarne}</c> is an record. The
+ second element in this tuple is the key. In order to uniquely
+ identify a table row both the key and the table name is
+ needed. The term <em>object identifier</em>,
+ (oid) is sometimes used for the arity two tuple {Tab, Key}. The oid for
+ the <c>{boss, klacke, bjarne}</c> record is the arity two
+ tuple <c>{boss, klacke}</c>. The first element of the tuple is
+ the type of the record and the second element is the key. An
+ oid can lead to zero, one, or more records depending on
+ whether the table type is <c>set</c> or <c>bag</c>.
+ </p>
+ <p>We were also able to insert the <c>{boss, klacke, bjarne}</c> record which contains an implicit reference to
+ another employee which does not yet exist in the
+ database. Mnesia does not enforce this.
+ </p>
+ </section>
+
+ <section>
+ <title>Adding Records and Relationships to the Database</title>
+ <p>After adding additional record to the Company database, we
+ may end up with the following records:
+ </p>
+ <p><em>Employees</em></p>
+ <code type="none">
+ {employee, 104465, "Johnson Torbjorn", 1, male, 99184, {242,038}}.
+ {employee, 107912, "Carlsson Tuula", 2, female,94556, {242,056}}.
+ {employee, 114872, "Dacker Bjarne", 3, male, 99415, {221,035}}.
+ {employee, 104531, "Nilsson Hans", 3, male, 99495, {222,026}}.
+ {employee, 104659, "Tornkvist Torbjorn", 2, male, 99514, {222,022}}.
+ {employee, 104732, "Wikstrom Claes", 2, male, 99586, {221,015}}.
+ {employee, 117716, "Fedoriw Anna", 1, female,99143, {221,031}}.
+ {employee, 115018, "Mattsson Hakan", 3, male, 99251, {203,348}}.
+ </code>
+ <p><em>Dept</em></p>
+ <code type="none">
+
+ {dept, 'B/SF', "Open Telecom Platform"}.
+ {dept, 'B/SFP', "OTP - Product Development"}.
+ {dept, 'B/SFR', "Computer Science Laboratory"}.
+ </code>
+ <p><em>Projects</em></p>
+ <code type="none">
+ %% projects
+ {project, erlang, 1}.
+ {project, otp, 2}.
+ {project, beam, 3}.
+ {project, mnesia, 5}.
+ {project, wolf, 6}.
+ {project, documentation, 7}.
+ {project, www, 8}.
+ </code>
+ <p>The above three tables, titled <c>employees</c>,
+ <c>dept</c>, and <c>projects</c>, are the tables which are
+ made up of real records. The following database content is
+ stored in the tables which is built on
+ relationships. These tables are titled <c>manager</c>,
+ <c>at_dep</c>, and <c>in_proj</c>.
+ </p>
+ <p><em>Manager</em></p>
+ <code type="none">
+
+ {manager, 104465, 'B/SF'}.
+ {manager, 104465, 'B/SFP'}.
+ {manager, 114872, 'B/SFR'}.
+ </code>
+ <p><em>At_dep</em></p>
+ <code type="none">
+ {at_dep, 104465, 'B/SF'}.
+ {at_dep, 107912, 'B/SF'}.
+ {at_dep, 114872, 'B/SFR'}.
+ {at_dep, 104531, 'B/SFR'}.
+ {at_dep, 104659, 'B/SFR'}.
+ {at_dep, 104732, 'B/SFR'}.
+ {at_dep, 117716, 'B/SFP'}.
+ {at_dep, 115018, 'B/SFP'}.
+ </code>
+ <p><em>In_proj</em></p>
+ <code type="none">
+ {in_proj, 104465, otp}.
+ {in_proj, 107912, otp}.
+ {in_proj, 114872, otp}.
+ {in_proj, 104531, otp}.
+ {in_proj, 104531, mnesia}.
+ {in_proj, 104545, wolf}.
+ {in_proj, 104659, otp}.
+ {in_proj, 104659, wolf}.
+ {in_proj, 104732, otp}.
+ {in_proj, 104732, mnesia}.
+ {in_proj, 104732, erlang}.
+ {in_proj, 117716, otp}.
+ {in_proj, 117716, documentation}.
+ {in_proj, 115018, otp}.
+ {in_proj, 115018, mnesia}.
+ </code>
+ <p>The room number is an attribute of the employee
+ record. This is a structured attribute which consists of a
+ tuple. The first element of the tuple identifies a corridor,
+ and the second element identifies the actual room in the
+ corridor. We could have chosen to represent this as a record
+ <c>-record(room, {corr, no}).</c> instead of an anonymous
+ tuple representation.
+ </p>
+ <p>The Company database is now initialized and contains
+ data. </p>
+ </section>
+
+ <section>
+ <title>Writing Queries</title>
+ <p>Retrieving data from DBMS should usually be done with <c>mnesia:read/3</c> or
+ <c>mnesia:read/1</c> functions. The following function raises the salary:</p>
+ <codeinclude file="company.erl" tag="%5" type="erl"></codeinclude>
+ <p>Since we want to update the record using <c>mnesia:write/1</c> after we have
+ increased the salary we acquire a write lock (third argument to read) when we read the
+ record from the table.
+ </p>
+ <p>It is not always the case that we can directly read the values from the table,
+ we might need to search the table or several tables to get the data we want, this
+ is done by writing database queries. Queries are always more expensive operations
+ than direct lookups done with <c>mnesia:read</c> and should be avoided in performance
+ critical code.</p>
+ <p>There are two methods for writing database queries:
+ </p>
+ <list type="bulleted">
+ <item>Mnesia functions
+ </item>
+ <item>QLC</item>
+ </list>
+
+ <section>
+ <title>Mnesia functions </title>
+ <p></p>
+ <p>The following function extracts the names of the female employees
+ stored in the database:
+ </p>
+ <pre>
+\011 mnesia:select(employee, [{#employee{sex = female, name = '$1', _ = '_'},[], ['$1']}]).
+ </pre>
+ <p>Select must always run within an activity such as a
+ transaction. To be able to call from the shell we might
+ construct a function as:
+ </p>
+ <codeinclude file="company.erl" tag="%20" type="erl"></codeinclude>
+ <p>The select expression matches all entries in table employee with
+ the field sex set to female.
+ </p>
+ <p>This function can be called from the shell as follows:
+ </p>
+ <pre>
+ (klacke@gin)1> <input>company:all_females().</input>
+ {atomic, ["Carlsson Tuula", "Fedoriw Anna"]}
+ </pre>
+ <p>See also the <seealso marker="Mnesia_chap4#matching">Pattern Matching </seealso>
+ chapter for a description of select and its syntax.
+ </p>
+ </section>
+
+ <section>
+ <title>Using QLC </title>
+ <p>This section contains simple introductory examples
+ only. Refer to <em>QLC reference manual</em> for a
+ full description of the QLC query language. Using QLC
+ might be more expensive than using Mnesia functions directly but
+ offers a nice syntax.
+ </p>
+ <p>The following function extracts a list of female employees
+ from the database:
+ </p>
+ <pre>
+ Q = qlc:q([E#employee.name || E <![CDATA[<-]]> mnesia:table(employee),
+\011 E#employee.sex == female]),
+\011 qlc:e(Q),
+ </pre>
+ <p>Accessing mnesia tables from a QLC list comprehension must
+ always be done within a transaction. Consider the following
+ function:
+ </p>
+ <codeinclude file="company.erl" tag="%2" type="erl"></codeinclude>
+ <p>This function can be called from the shell as follows:
+ </p>
+ <pre>
+ (klacke@gin)1> <input>company:females().</input>
+ {atomic, ["Carlsson Tuula", "Fedoriw Anna"]}
+ </pre>
+ <p>In traditional relational database terminology, the above
+ operation would be called a selection, followed by a projection.
+ </p>
+ <p>The list comprehension expression shown above contains a
+ number of syntactical elements.
+ </p>
+ <list type="bulleted">
+ <item>the first <c>[</c> bracket should be read as "build the
+ list"
+ </item>
+ <item>the <c>||</c> "such that" and the arrow <c><![CDATA[<-]]></c> should
+ be read as "taken from"
+ </item>
+ </list>
+ <p>Hence, the above list comprehension demonstrates the
+ formation of the list <c>E#employee.name</c> such that <c>E</c> is
+ taken from the table of employees and the <c>sex</c> attribute
+ of each records is equal with the atom <c>female</c>.
+ </p>
+ <p>The whole list comprehension must be given to the
+ <c>qlc:q/1</c> function.
+ </p>
+ <p>It is possible to combine list comprehensions with low
+ level Mnesia functions in the same transaction. If we want to
+ raise the salary of all female employees we execute:
+ </p>
+ <codeinclude file="company.erl" tag="%4" type="erl"></codeinclude>
+ <p>The function <c>raise_females/1</c> returns the tuple
+ <c>{atomic, Number}</c>, where <c>Number</c> is the number of
+ female employees who received a salary increase. Should an error
+ occur, the value <c>{aborted, Reason}</c> is returned. In the
+ case of an error, Mnesia guarantees that the salary is not
+ raised for any employees at all.
+ </p>
+ <pre>
+
+ 33><input>company:raise_females(33).</input>
+ {atomic,2}
+ </pre>
+ </section>
+ </section>
+ </section>
+</chapter>
+
diff --git a/lib/mnesia/doc/src/Mnesia_chap3.xml b/lib/mnesia/doc/src/Mnesia_chap3.xml
new file mode 100644
index 0000000000..9a382bcb5a
--- /dev/null
+++ b/lib/mnesia/doc/src/Mnesia_chap3.xml
@@ -0,0 +1,556 @@
+<?xml version="1.0" encoding="latin1" ?>
+<!DOCTYPE chapter SYSTEM "chapter.dtd">
+
+<chapter>
+ <header>
+ <copyright>
+ <year>1997</year><year>2009</year>
+ <holder>Ericsson AB. All Rights Reserved.</holder>
+ </copyright>
+ <legalnotice>
+ The contents of this file are subject to the Erlang Public License,
+ Version 1.1, (the "License"); you may not use this file except in
+ compliance with the License. You should have received a copy of the
+ Erlang Public License along with this software. If not, it can be
+ retrieved online at http://www.erlang.org/.
+
+ Software distributed under the License is distributed on an "AS IS"
+ basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+ the License for the specific language governing rights and limitations
+ under the License.
+
+ </legalnotice>
+
+ <title>Building A Mnesia Database</title>
+ <prepared></prepared>
+ <responsible></responsible>
+ <docno></docno>
+ <approved></approved>
+ <checked></checked>
+ <date></date>
+ <rev></rev>
+ <file>Mnesia_chap3.xml</file>
+ </header>
+ <p>This chapter details the basic steps involved when designing
+ a Mnesia database and the programming constructs which make different
+ solutions available to the programmer. The chapter includes the following
+ sections:
+ </p>
+ <list type="bulleted">
+ <item>defining a schema</item>
+ <item>the datamodel</item>
+ <item>starting Mnesia</item>
+ <item>creating new tables.</item>
+ </list>
+
+ <section>
+ <marker id="def_schema"></marker>
+ <title>Defining a Schema</title>
+ <p>The configuration of a Mnesia system is described in the
+ schema. The schema is a special table which contains information
+ such as the table names and each table's
+ storage type, (i.e. whether a table should be stored in RAM,
+ on disc or possibly on both, as well as its location).
+ </p>
+ <p>Unlike data tables, information contained in schema tables can only be
+ accessed and modified by using the schema related functions
+ described in this section.
+ </p>
+ <p>Mnesia has various functions for defining the
+ database schema. It is possible to move tables, delete tables,
+ or reconfigure the layout of tables.
+ </p>
+ <p>An important aspect of these functions is that the system can access a
+ table while it is being reconfigured. For example, it is possible to move a
+ table and simultaneously perform write operations to the same
+ table. This feature is essential for applications that require
+ continuous service.
+ </p>
+ <p>The following section describes the functions available for schema management,
+ all of which return a tuple:
+ </p>
+ <list type="bulleted">
+ <item><c>{atomic, ok}</c>; or,
+ </item>
+ <item><c>{aborted, Reason}</c> if unsuccessful.</item>
+ </list>
+
+ <section>
+ <title>Schema Functions</title>
+ <list type="bulleted">
+ <item><c>mnesia:create_schema(NodeList)</c>. This function is
+ used to initialize a new, empty schema. This is a mandatory
+ requirement before Mnesia can be started. Mnesia is a truly
+ distributed DBMS and the schema is a system table that is
+ replicated on all nodes in a Mnesia system.
+ The function will fail if a schema is already present on any of
+ the nodes in <c>NodeList</c>. This function requires Mnesia
+ to be stopped on the all
+ <c>db_nodes</c> contained in the parameter <c>NodeList</c>.
+ Applications call this function only once,
+ since it is usually a one-time activity to initialize a new
+ database.
+ </item>
+ <item><c>mnesia:delete_schema(DiscNodeList)</c>. This function
+ erases any old schemas on the nodes in
+ <c>DiscNodeList</c>. It also removes all old tables together
+ with all data. This function requires Mnesia to be stopped
+ on all <c>db_nodes</c>.
+ </item>
+ <item><c>mnesia:delete_table(Tab)</c>. This function
+ permanently deletes all replicas of table <c>Tab</c>.
+ </item>
+ <item><c>mnesia:clear_table(Tab)</c>. This function
+ permanently deletes all entries in table <c>Tab</c>.
+ </item>
+ <item><c>mnesia:move_table_copy(Tab, From, To)</c>. This
+ function moves the copy of table <c>Tab</c> from node
+ <c>From</c> to node <c>To</c>. The table storage type,
+ <c>{type}</c> is preserved, so if a RAM table is moved from
+ one node to another node, it remains a RAM table on the new
+ node. It is still possible for other transactions to perform
+ read and write operation to the table while it is being
+ moved.
+ </item>
+ <item><c>mnesia:add_table_copy(Tab, Node, Type)</c>. This
+ function creates a replica of the table <c>Tab</c> at node
+ <c>Node</c>. The <c>Type</c> argument must be either of the
+ atoms <c>ram_copies</c>, <c>disc_copies</c>, or
+ <c>disc_only_copies</c>. If we add a copy of the system
+ table <c>schema</c> to a node, this means that we want the
+ Mnesia schema to reside there as well. This action then
+ extends the set of nodes that comprise this particular
+ Mnesia system.
+ </item>
+ <item><c>mnesia:del_table_copy(Tab, Node)</c>. This function
+ deletes the replica of table <c>Tab</c> at node <c>Node</c>.
+ When the last replica of a table is removed, the table is
+ deleted.
+ </item>
+ <item>
+ <p><c>mnesia:transform_table(Tab, Fun, NewAttributeList, NewRecordName)</c>. This
+ function changes the format on all records in table
+ <c>Tab</c>. It applies the argument <c>Fun</c> to all
+ records in the table. <c>Fun</c> shall be a function which
+ takes an record of the old type, and returns the record of the new
+ type. The table key may not be changed.</p>
+ <code type="none">
+-record(old, {key, val}).
+-record(new, {key, val, extra}).
+
+Transformer =
+ fun(X) when record(X, old) ->
+ #new{key = X#old.key,
+ val = X#old.val,
+ extra = 42}
+ end,
+{atomic, ok} = mnesia:transform_table(foo, Transformer,
+ record_info(fields, new),
+ new),
+ </code>
+ <p>The <c>Fun</c> argument can also be the atom
+ <c>ignore</c>, it indicates that only the meta data about the table will
+ be updated. Usage of <c>ignore</c> is not recommended (since it creates
+ inconsistencies between the meta data and the actual data) but included
+ as a possibility for the user do to his own (off-line) transform.</p>
+ </item>
+ <item><c>change_table_copy_type(Tab, Node, ToType)</c>. This
+ function changes the storage type of a table. For example, a
+ RAM table is changed to a disc_table at the node specified
+ as <c>Node</c>.</item>
+ </list>
+ </section>
+ </section>
+
+ <section>
+ <title>The Data Model</title>
+ <p>The data model employed by Mnesia is an extended
+ relational data model. Data is organized as a set of
+ tables and relations between different data records can
+ be modeled as additional tables describing the actual
+ relationships.
+ Each table contains instances of Erlang records
+ and records are represented as Erlang tuples.
+ </p>
+ <p>Object identifiers, also known as oid, are made up of a table name and a key.
+ For example, if we have an employee record represented by the tuple
+ <c>{employee, 104732, klacke, 7, male, 98108, {221, 015}}</c>.
+ This record has an object id, (Oid) which is the tuple
+ <c>{employee, 104732}</c>.
+ </p>
+ <p>Thus, each table is made up of records, where the first element
+ is a record name and the second element of the table is a key
+ which identifies the particular record in that table. The
+ combination of the table name and a key, is an arity two tuple
+ <c>{Tab, Key}</c> called the Oid. See Chapter 4:<seealso marker="Mnesia_chap4#recordnames_tablenames">Record Names Versus Table Names</seealso>, for more information
+ regarding the relationship between the record name and the table
+ name.
+ </p>
+ <p>What makes the Mnesia data model an extended relational model
+ is the ability to store arbitrary Erlang terms in the attribute
+ fields. One attribute value could for example be a whole tree of
+ oids leading to other terms in other tables. This
+ type of record is hard to model in traditional relational
+ DBMSs.</p>
+ </section>
+
+ <section>
+ <marker id="start_mnesia"></marker>
+ <title>Starting Mnesia</title>
+ <p>Before we can start Mnesia, we must initialize an empty schema
+ on all the participating nodes.
+ </p>
+ <list type="bulleted">
+ <item>The Erlang system must be started.
+ </item>
+ <item>Nodes with disc database schema must be defined and
+ implemented with the function <c>create_schema(NodeList).</c></item>
+ </list>
+ <p>When running a distributed system, with two or more
+ participating nodes, then the <c>mnesia:start( ).</c> function
+ must be executed on each participating node. Typically this would
+ be part of the boot script in an embedded environment.
+ In a test environment or an interactive environment,
+ <c>mnesia:start()</c> can also be used either from the
+ Erlang shell, or another program.
+ </p>
+
+ <section>
+ <title>Initializing a Schema and Starting Mnesia</title>
+ <p>To use a known example, we illustrate how to run the
+ Company database described in Chapter 2 on two separate nodes,
+ which we call <c>a@gin</c> and <c>b@skeppet</c>. Each of these
+ nodes must have have a Mnesia directory as well as an
+ initialized schema before Mnesia can be started. There are two
+ ways to specify the Mnesia directory to be used:
+ </p>
+ <list type="bulleted">
+ <item>
+ <p>Specify the Mnesia directory by providing an application
+ parameter either when starting the Erlang shell or in the
+ application script. Previously the following example was used
+ to create the directory for our Company database:</p>
+ <pre>
+%<input>erl -mnesia dir '"/ldisc/scratch/Mnesia.Company"'</input>
+ </pre>
+ </item>
+ <item>If no command line flag is entered, then the Mnesia
+ directory will be the current working directory on the node
+ where the Erlang shell is started.</item>
+ </list>
+ <p>To start our Company database and get it running on the two
+ specified nodes, we enter the following commands:
+ </p>
+ <list type="ordered">
+ <item>
+ <p>On the node called gin:</p>
+ <pre>
+ gin %<input>erl -sname a -mnesia dir '"/ldisc/scratch/Mnesia.company"'</input>
+ </pre>
+ </item>
+ <item>
+ <p>On the node called skeppet:</p>
+ <pre>
+skeppet %<input>erl -sname b -mnesia dir '"/ldisc/scratch/Mnesia.company"'</input>
+ </pre>
+ </item>
+ <item>
+ <p>On one of the two nodes:</p>
+ <pre>
+(a@gin1)><input>mnesia:create_schema([a@gin, b@skeppet]).</input>
+ </pre>
+ </item>
+ <item>The function <c>mnesia:start()</c> is called on both
+ nodes.
+ </item>
+ <item>
+ <p>To initialize the database, execute the following
+ code on one of the two nodes.</p>
+ <codeinclude file="company.erl" tag="%12" type="erl"></codeinclude>
+ </item>
+ </list>
+ <p>As illustrated above, the two directories reside on different nodes, because the
+ <c>/ldisc/scratch</c> (the "local" disc) exists on the two different
+ nodes.
+ </p>
+ <p>By executing these commands we have configured two Erlang
+ nodes to run the Company database, and therefore, initialize the
+ database. This is required only once when setting up, the next time the
+ system is started <c>mnesia:start()</c> is called
+ on both nodes, to initialize the system from disc.
+ </p>
+ <p>In a system of Mnesia nodes, every node is aware of the
+ current location of all tables. In this example, data is
+ replicated on both nodes and functions which manipulate the
+ data in our tables can be executed on either of the two nodes.
+ Code which manipulate Mnesia data behaves identically
+ regardless of where the data resides.
+ </p>
+ <p>The function <c>mnesia:stop()</c> stops Mnesia on the node
+ where the function is executed. Both the <c>start/0</c> and
+ the <c>stop/0</c> functions work on the "local" Mnesia system,
+ and there are no functions which start or stop a set of nodes.
+ </p>
+ </section>
+
+ <section>
+ <title>The Start-Up Procedure</title>
+ <p>Mnesia is started by calling the following function:
+ </p>
+ <code type="none">
+ mnesia:start().
+ </code>
+ <p>This function initiates the DBMS locally. </p>
+ <p>The choice of configuration will alter the location and load
+ order of the tables. The alternatives are listed below: <br></br>
+</p>
+ <list type="ordered">
+ <item>Tables that are stored locally only, are initialized
+ from the local Mnesia directory.
+ </item>
+ <item>Replicated tables that reside locally
+ as well as somewhere else are either initiated from disc or
+ by copying the entire table from the other node depending on
+ which of the different replicas is the most recent. Mnesia
+ determines which of the tables is the most recent.
+ </item>
+ <item>Tables that reside on remote nodes are available to other nodes as soon
+ as they are loaded.</item>
+ </list>
+ <p>Table initialization is asynchronous, the function
+ call <c>mnesia:start()</c> returns the atom <c>ok</c> and
+ then starts to initialize the different tables. Depending on
+ the size of the database, this may take some time, and the
+ application programmer must wait for the tables that the
+ application needs before they can be used. This achieved by using
+ the function:</p>
+ <list type="bulleted">
+ <item><c>mnesia:wait_for_tables(TabList, Timeout)</c></item>
+ </list>
+ <p>This function suspends the caller until all tables
+ specified in <c>TabList</c> are properly initiated.
+ </p>
+ <p>A problem can arise if a replicated table on one node is
+ initiated, but Mnesia deduces that another (remote)
+ replica is more recent than the replica existing on
+ the local node, the initialization procedure will not proceed.
+ In this situation, a call to to
+ <c>mnesia:wait_for_tables/2</c> suspends the caller until the
+ remote node has initiated the table from its local disc and
+ the node has copied the table over the network to the local node.
+ </p>
+ <p>This procedure can be time consuming however, the shortcut function
+ shown below will load all the tables from disc at a faster rate:
+ </p>
+ <list type="bulleted">
+ <item><c>mnesia:force_load_table(Tab)</c>. This function forces
+ tables to be loaded from disc regardless of the network
+ situation.</item>
+ </list>
+ <p>Thus, we can assume that if an application
+ wishes to use tables <c>a</c> and <c>b</c>, then the
+ application must perform some action similar to the below code before it can utilize the tables.
+ </p>
+ <pre>
+ case mnesia:wait_for_tables([a, b], 20000) of
+ {timeout, RemainingTabs} ->
+ panic(RemainingTabs);
+ ok ->
+ synced
+ end.
+ </pre>
+ <warning>
+ <p>When tables are forcefully loaded from the local disc,
+ all operations that were performed on the replicated table
+ while the local node was down, and the remote replica was
+ alive, are lost. This can cause the database to become
+ inconsistent.</p>
+ </warning>
+ <p>If the start-up procedure fails, the
+ <c>mnesia:start()</c> function returns the cryptic tuple
+ <c>{error,{shutdown, {mnesia_sup,start,[normal,[]]}}}</c>.
+ Use command line arguments -boot start_sasl as argument to
+ the erl script in order to get more information
+ about the start failure.
+ </p>
+ </section>
+ </section>
+
+ <section>
+ <marker id="create_tables"></marker>
+ <title>Creating New Tables</title>
+ <p>Mnesia provides one function to create new tables. This
+ function is: <c>mnesia:create_table(Name, ArgList).</c></p>
+ <p>When executing this function, it returns one of the following
+ responses:
+ </p>
+ <list type="bulleted">
+ <item><c>{atomic, ok}</c> if the function executes
+ successfully
+ </item>
+ <item><c>{aborted, Reason}</c> if the function fails.
+ </item>
+ </list>
+ <p>The function arguments are:
+ </p>
+ <list type="bulleted">
+ <item><c>Name</c> is the atomic name of the table. It is
+ usually the same name as the name of the records that
+ constitute the table. (See <c>record_name</c> for more
+ details.)
+ </item>
+ <item>
+ <p><c>ArgList</c> is a list of <c>{Key,Value}</c> tuples.
+ The following arguments are valid:
+ </p>
+ <list type="bulleted">
+ <item>
+ <p><c>{type, Type}</c> where <c>Type</c> must be either of the
+ atoms <c>set</c>, <c>ordered_set</c> or <c>bag</c>.
+ The default value is
+ <c>set</c>. Note: currently 'ordered_set'
+ is not supported for 'disc_only_copies' tables.
+ A table of type <c>set</c> or <c>ordered_set</c> has either zero or
+ one record per key. Whereas a table of type <c>bag</c> can
+ have an arbitrary number of records per key. The key for
+ each record is always the first attribute of the record.</p>
+ <p>The following example illustrates the difference between
+ type <c>set</c> and <c>bag</c>: </p>
+ <pre>
+ f() -> F = fun() ->
+\011 mnesia:write({foo, 1, 2}), mnesia:write({foo, 1, 3}),
+\011 mnesia:read({foo, 1}) end, mnesia:transaction(F). </pre>
+ <p>This transaction will return the list <c>[{foo,1,3}]</c> if
+ the <c>foo</c> table is of type <c>set</c>. However, list
+ <c>[{foo,1,2}, {foo,1,3}]</c> will return if the table is
+ of type <c>bag</c>. Note the use of <c>bag</c> and
+ <c>set</c> table types. </p>
+ <p>Mnesia tables can never contain
+ duplicates of the same record in the same table. Duplicate
+ records have attributes with the same contents and key.
+ </p>
+ </item>
+ <item>
+ <p><c>{disc_copies, NodeList}</c>, where <c>NodeList</c> is a
+ list of the nodes where this table will reside on disc.</p>
+ <p>Write operations to a table replica of type
+ <c>disc_copies</c> will write data to the disc copy as well
+ as to the RAM copy of the table. </p>
+ <p>It is possible to have a
+ replicated table of type <c>disc_copies</c> on one node, and
+ the same table stored as a different type on another node.
+ The default value is <c>[]</c>. This arrangement is
+ desirable if we want the following operational
+ characteristics are required:</p>
+ <list type="ordered">
+ <item>read operations must be very fast and performed in RAM
+ </item>
+ <item>all write operations must be written to persistent
+ storage.</item>
+ </list>
+ <p>A write operation on a <c>disc_copies</c> table
+ replica will be performed in two steps. First the write
+ operation is appended to a log file, then the actual
+ operation is performed in RAM.
+ </p>
+ </item>
+ <item>
+ <p><c>{ram_copies, NodeList}</c>, where <c>NodeList</c> is a
+ list of the nodes where this table is stored in RAM. The
+ default value for <c>NodeList</c> is <c>[node()]</c>. If the
+ default value is used to create a new table, it will be
+ located on the local node only. </p>
+ <p>Table replicas of type
+ <c>ram_copies</c> can be dumped to disc with the function
+ <c>mnesia:dump_tables(TabList)</c>.
+ </p>
+ </item>
+ <item><c>{disc_only_copies, NodeList}</c>. These table
+ replicas are stored on disc only and are therefore slower to
+ access. However, a disc only replica consumes less memory than
+ a table replica of the other two storage types.
+ </item>
+ <item><c>{index, AttributeNameList}</c>, where
+ <c>AttributeNameList</c> is a list of atoms specifying the
+ names of the attributes Mnesia shall build and maintain. An
+ index table will exist for every element in the list. The
+ first field of a Mnesia record is the key and thus need no
+ extra index.
+ <br></br>
+The first field of a record is the second element of the
+ tuple, which is the representation of the record.
+ </item>
+ <item><c>{snmp, SnmpStruct}</c>. <c>SnmpStruct</c> is
+ described in the SNMP User Guide. Basically, if this attribute
+ is present in <c>ArgList</c> of <c>mnesia:create_table/2</c>,
+ the table is immediately accessible by means of the Simple
+ Network Management Protocol (SNMP).
+ <br></br>
+It is easy to design applications which use SNMP to
+ manipulate and control the system. Mnesia provides a direct
+ mapping between the logical tables that make up an SNMP
+ control application and the physical data which make up a
+ Mnesia table. <c>[]</c>
+ is default.
+ </item>
+ <item><c>{local_content, true}</c> When an application needs a
+ table whose contents should be locally unique on each
+ node,
+ <c>local_content</c> tables may be used. The name of the
+ table is known to all Mnesia nodes, but its contents is
+ unique for each node. Access to this type of table must be
+ done locally. </item>
+ <item>
+ <p><c>{attributes, AtomList}</c> is a list of the attribute
+ names for the records that are supposed to populate the
+ table. The default value is the list <c>[key, val]</c>. The
+ table must at least have one extra attribute besides the
+ key. When accessing single attributes in a record, it is not
+ recommended to hard code the attribute names as atoms. Use
+ the construct <c>record_info(fields,record_name)</c>
+ instead. The expression
+ <c>record_info(fields,record_name)</c> is processed by the
+ Erlang macro pre-processor and returns a list of the
+ record's field names. With the record definition
+ <c>-record(foo, {x,y,z}).</c> the expression
+ <c>record_info(fields,foo)</c> is expanded to the list
+ <c>[x,y,z]</c>. Accordingly, it is possible to provide the
+ attribute names yourself, or to use the <c>record_info/2</c>
+ notation. </p>
+ <p>It is recommended that
+ the <c>record_info/2</c> notation be used as it is easier to
+ maintain the program and it will be more robust with regards
+ to future record changes.
+ </p>
+ </item>
+ <item>
+ <p><c>{record_name, Atom}</c> specifies the common name of
+ all records stored in the table. All records, stored in
+ the table, must have this name as their first element.
+ The <c>record_name</c> defaults to the name of the
+ table. For more information see Chapter 4:<seealso marker="Mnesia_chap4#recordnames_tablenames">Record Names Versus Table Names</seealso>.</p>
+ </item>
+ </list>
+ </item>
+ </list>
+ <p>As an example, assume we have the record definition:</p>
+ <pre>
+ -record(funky, {x, y}).
+ </pre>
+ <p>The below call would create a table which is replicated on two
+ nodes, has an additional index on the <c>y</c> attribute, and is
+ of type
+ <c>bag</c>.</p>
+ <pre>
+ mnesia:create_table(funky, [{disc_copies, [N1, N2]}, {index,
+ [y]}, {type, bag}, {attributes, record_info(fields, funky)}]).
+ </pre>
+ <p>Whereas a call to the below default code values: </p>
+ <pre>
+mnesia:create_table(stuff, []) </pre>
+ <p>would return a table with a RAM copy on the
+ local node, no additional indexes and the attributes defaulted to
+ the list <c>[key,val]</c>.</p>
+ </section>
+</chapter>
+
diff --git a/lib/mnesia/doc/src/Mnesia_chap4.xmlsrc b/lib/mnesia/doc/src/Mnesia_chap4.xmlsrc
new file mode 100644
index 0000000000..7d89c1b0dd
--- /dev/null
+++ b/lib/mnesia/doc/src/Mnesia_chap4.xmlsrc
@@ -0,0 +1,1171 @@
+<?xml version="1.0" encoding="latin1" ?>
+<!DOCTYPE chapter SYSTEM "chapter.dtd">
+
+<chapter>
+ <header>
+ <copyright>
+ <year>1997</year><year>2009</year>
+ <holder>Ericsson AB. All Rights Reserved.</holder>
+ </copyright>
+ <legalnotice>
+ The contents of this file are subject to the Erlang Public License,
+ Version 1.1, (the "License"); you may not use this file except in
+ compliance with the License. You should have received a copy of the
+ Erlang Public License along with this software. If not, it can be
+ retrieved online at http://www.erlang.org/.
+
+ Software distributed under the License is distributed on an "AS IS"
+ basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+ the License for the specific language governing rights and limitations
+ under the License.
+
+ </legalnotice>
+
+ <title>Transactions and Other Access Contexts</title>
+ <prepared>Claes Wikstr&ouml;m, Hans Nilsson and H&aring;kan Mattsson</prepared>
+ <responsible></responsible>
+ <docno></docno>
+ <approved></approved>
+ <checked></checked>
+ <date></date>
+ <rev></rev>
+ <file>Mnesia_chap4.xml</file>
+ </header>
+ <p>This chapter describes the Mnesia transaction system and the
+ transaction properties which make Mnesia a fault tolerant,
+ distributed database management system.
+ </p>
+ <p>Also covered in this chapter are the locking functions,
+ including table locks and sticky locks, as well as alternative
+ functions which bypass the transaction system in favor of improved
+ speed and reduced overheads. These functions are called "dirty
+ operations". We also describe the usage of nested transactions.
+ This chapter contains the following sections:
+ </p>
+ <list type="bulleted">
+ <item>transaction properties, which include atomicity,
+ consistency, isolation, and durability
+ </item>
+ <item>Locking
+ </item>
+ <item>Dirty operations
+ </item>
+ <item>Record names vs table names
+ </item>
+ <item>Activity concept and various access contexts
+ </item>
+ <item>Nested transactions
+ </item>
+ <item>Pattern matching
+ </item>
+ <item>Iteration
+ </item>
+ </list>
+
+ <section>
+ <marker id="trans_prop"></marker>
+ <title>Transaction Properties</title>
+ <p>Transactions are an important tool when designing fault
+ tolerant, distributed systems. A Mnesia transaction is a mechanism
+ by which a series of database operations can be executed as one
+ functional block. The functional block which is run as a
+ transaction is called a Functional Object (Fun), and this code can
+ read, write, or delete Mnesia records. The Fun is evaluated as a
+ transaction which either commits, or aborts. If a transaction
+ succeeds in executing Fun it will replicate the action on all nodes
+ involved, or abort if an error occurs.
+ </p>
+ <p>The following example shows a transaction which raises the
+ salary of certain employee numbers.
+ </p>
+ <codeinclude file="company.erl" tag="%5" type="erl"></codeinclude>
+ <p>The transaction <c>raise(Eno, Raise) - ></c> contains a Fun
+ made up of four lines of code. This Fun is called by the statement
+ <c>mnesia:transaction(F)</c> and returns a value.
+ </p>
+ <p>The Mnesia transaction system facilitates the construction of
+ reliable, distributed systems by providing the following important
+ properties:
+ </p>
+ <list type="bulleted">
+ <item>The transaction handler ensures that a Fun which is placed
+ inside a transaction does not interfere with operations embedded
+ in other transactions when it executes a series of operations on
+ tables.
+ </item>
+ <item>The transaction handler ensures that either all operations
+ in the transaction are performed successfully on all nodes
+ atomically, or the transaction fails without permanent effect on
+ any of the nodes.
+ </item>
+ <item>The Mnesia transactions have four important properties,
+ which we call <em>A</em>tomicity,
+ <em>C</em>onsistency,<em>I</em>solation, and
+ <em>D</em>urability, or ACID for short. These properties are
+ described in the following sub-sections.</item>
+ </list>
+
+ <section>
+ <title>Atomicity</title>
+ <p><em>Atomicity</em> means that database changes which are
+ executed by a transaction take effect on all nodes involved, or
+ on none of the nodes. In other words, the transaction either
+ succeeds entirely, or it fails entirely.
+ </p>
+ <p>Atomicity is particularly important when we want to
+ atomically write more than one record in the same
+ transaction. The <c>raise/2</c> function, shown as an example
+ above, writes one record only. The <c>insert_emp/3</c> function,
+ shown in the program listing in Chapter 2, writes the record
+ <c>employee</c> as well as employee relations such as
+ <c>at_dep</c> and <c>in_proj</c> into the database. If we run
+ this latter code inside a transaction, then the transaction
+ handler ensures that the transaction either succeeds completely,
+ or not at all.
+ </p>
+ <p>Mnesia is a distributed DBMS where data can be replicated on
+ several nodes. In many such applications, it is important that a
+ series of write operations are performed atomically inside a
+ transaction. The atomicity property ensures that a transaction
+ take effect on all nodes, or none at all. </p>
+ </section>
+
+ <section>
+ <title>Consistency</title>
+ <p><em>Consistency</em>. This transaction property ensures that
+ a transaction always leaves the DBMS in a consistent state. For
+ example, Mnesia ensures that inconsistencies will not occur if
+ Erlang, Mnesia or the computer crashes while a write operation
+ is in progress.
+ </p>
+ </section>
+
+ <section>
+ <title>Isolation</title>
+ <p><em>Isolation</em>. This transaction property ensures that
+ transactions which execute on different nodes in a network, and
+ access and manipulate the same data records, will not interfere
+ with each other.
+ </p>
+ <p>The isolation property makes it possible to concurrently execute
+ the <c>raise/2</c> function. A classical problem in concurrency control
+ theory is the so called "lost update problem".
+ </p>
+ <p>The isolation property is extremely useful if the following
+ circumstances occurs where an employee (with an employee number
+ 123) and two processes, (P1 and P2), are concurrently trying to
+ raise the salary for the employee. The initial value of the
+ employees salary is, for example, 5. Process P1 then starts to execute,
+ it reads the employee record and adds 2 to the salary. At this
+ point in time, process P1 is for some reason preempted and
+ process P2 has the opportunity to run. P2 reads the record, adds 3
+ to the salary, and finally writes a new employee record with
+ the salary set to 8. Now, process P1 start to run again and
+ writes its employee record with salary set to 7, thus
+ effectively overwriting and undoing the work performed by
+ process P2. The update performed by P2 is lost.
+ </p>
+ <p>A transaction system makes it possible to concurrently
+ execute two or more processes which manipulate the same
+ record. The programmer does not need to check that the
+ updates are synchronous, this is overseen by the
+ transaction handler. All programs accessing the database through
+ the transaction system may be written as if they had sole access
+ to the data.
+ </p>
+ </section>
+
+ <section>
+ <title>Durability</title>
+ <p><em>Durability</em>. This transaction property ensures that
+ changes made to the DBMS by a transaction are permanent. Once a
+ transaction has been committed, all changes made to the database
+ are durable - i.e. they are written safely to disc and will not
+ be corrupted or disappear.
+ </p>
+ <note>
+ <p>The durability feature described does not entirely apply to
+ situations where Mnesia is configured as a "pure" primary memory
+ database.
+ </p>
+ </note>
+ </section>
+ </section>
+
+ <section>
+ <title>Locking</title>
+ <p>Different transaction managers employ different strategies to
+ satisfy the isolation property. Mnesia uses the standard technique
+ of two-phase locking. This means that locks are set on records
+ before they are read or written. Mnesia uses five different kinds
+ of locks.
+ </p>
+ <list type="bulleted">
+ <item><em>Read locks</em>. A read lock is set on one replica of
+ a record before it can be read.
+ </item>
+ <item><em>Write locks</em>. Whenever a transaction writes to an
+ record, write locks are first set on all replicas of that
+ particular record.
+ </item>
+ <item><em>Read table locks</em>. If a transaction traverses an
+ entire table in search for a record which satisfy some
+ particular property, it is most inefficient to set read locks on
+ the records, one by one. It is also very memory consuming, since
+ the read locks themselves may take up considerable space if the
+ table is very large. For this reason, Mnesia can set a read lock
+ on an entire table.
+ </item>
+ <item><em>Write table locks</em>. If a transaction writes a
+ large number of records to one table, it is possible to set a
+ write lock on the entire table.
+ </item>
+ <item><em>Sticky locks</em>. These are write locks that stay in
+ place at a node after the transaction which initiated the lock
+ has terminated. </item>
+ </list>
+ <p>Mnesia employs a strategy whereby functions such as
+ <c>mnesia:read/1</c> acquire the necessary locks dynamically as
+ the transactions execute. Mnesia automatically sets and releases
+ the locks and the programmer does not have to code these
+ operations.
+ </p>
+ <p>Deadlocks can occur when concurrent processes set and release
+ locks on the same records. Mnesia employs a "wait-die" strategy to
+ resolve these situations. If Mnesia suspects that a deadlock can
+ occur when a transaction tries to set a lock, the transaction is
+ forced to release all its locks and sleep for a while. The
+ Fun in the transaction will be evaluated one more time.
+ </p>
+ <p>For this reason, it is important that the code inside the Fun given to
+ <c>mnesia:transaction/1</c> is pure. Some strange results can
+ occur if, for example, messages are sent by the transaction
+ Fun. The following example illustrates this situation:
+ </p>
+ <codeinclude file="company.erl" tag="%6" type="erl"></codeinclude>
+ <p>This transaction could write the text <c>"Trying to write ... "</c> a thousand times to the terminal. Mnesia does guarantee,
+ however, that each and every transaction will eventually run. As a
+ result, Mnesia is not only deadlock free, but also livelock
+ free.
+ </p>
+ <p>The Mnesia programmer cannot prioritize one particular
+ transaction to execute before other transactions which are waiting
+ to execute. As a result, the Mnesia DBMS transaction system is not
+ suitable for hard real time applications. However, Mnesia contains
+ other features that have real time properties.
+ </p>
+ <p>Mnesia dynamically sets and releases locks as
+ transactions execute, therefore, it is very dangerous to execute code with
+ transaction side-effects. In particular, a <c>receive</c>
+ statement inside a transaction can lead to a situation where the
+ transaction hangs and never returns, which in turn can cause locks
+ not to release. This situation could bring the whole system to a
+ standstill since other transactions which execute in other
+ processes, or on other nodes, are forced to wait for the defective
+ transaction.
+ </p>
+ <p>If a transaction terminates abnormally, Mnesia will
+ automatically release the locks held by the transaction.
+ </p>
+ <p>We have shown examples of a number of functions that can be
+ used inside a transaction. The following list shows the
+ <em>simplest</em> Mnesia functions that work with transactions. It
+ is important to realize that these functions must be embedded in a
+ transaction. If no enclosing transaction (or other enclosing
+ Mnesia activity) exists, they will all fail.
+ </p>
+ <list type="bulleted">
+ <item><c>mnesia:transaction(Fun) -> {aborted, Reason} |{atomic, Value}</c>. This function executes one transaction with the
+ functional object <c>Fun</c> as the single parameter.
+ </item>
+ <item><c>mnesia:read({Tab, Key}) -> transaction abort | RecordList</c>. This function reads all records with <c>Key</c>
+ as key from table <c>Tab</c>. This function has the same semantics
+ regardless of the location of <c>Table</c>. If the table is of
+ type <c>bag</c>, the <c>read({Tab, Key})</c> can return an arbitrarily
+ long list. If the table is of type <c>set</c>, the list is
+ either of length one, or <c>[]</c>.
+ </item>
+ <item><c>mnesia:wread({Tab, Key}) -> transaction abort | RecordList</c>. This function behaves the same way as the
+ previously listed <c>read/1</c> function, except that it
+ acquires a write lock instead of a read lock. If we execute a
+ transaction which reads a record, modifies the record, and then
+ writes the record, it is slightly more efficient to set the
+ write lock immediately. In cases where we issue a
+ <c>mnesia:read/1</c>, followed by a <c>mnesia:write/1</c>, the
+ first read lock must be upgraded to a write lock when the write
+ operation is executed.
+ </item>
+ <item><c>mnesia:write(Record) -> transaction abort | ok</c>. This function writes a record into the database. The
+ <c>Record</c> argument is an instance of a record. The function
+ returns <c>ok</c>, or aborts the transaction if an error should
+ occur.
+ </item>
+ <item><c>mnesia:delete({Tab, Key}) -> transaction abort | ok</c>. This
+ function deletes all records with the given key.
+ </item>
+ <item><c>mnesia:delete_object(Record) -> transaction abort | ok</c>. This function deletes records with object id
+ <c>Record</c>. This function is used when we want to delete only
+ some records in a table of type <c>bag</c>. </item>
+ </list>
+
+ <section>
+ <title>Sticky Locks</title>
+ <p>As previously stated, the locking strategy used by Mnesia is
+ to lock one record when we read a record, and lock all replicas
+ of a record when we write a record. However, there are
+ applications which use Mnesia mainly for its fault-tolerant
+ qualities, and these applications may be configured with one
+ node doing all the heavy work, and a standby node which is ready
+ to take over in case the main node fails. Such applications may
+ benefit from using sticky locks instead of the normal locking
+ scheme.
+ </p>
+ <p>A sticky lock is a lock which stays in place at a node after
+ the transaction which first acquired the lock has terminated. To
+ illustrate this, assume that we execute the following
+ transaction:
+ </p>
+ <code type="none">
+ F = fun() ->
+ mnesia:write(#foo{a = kalle})
+ end,
+ mnesia:transaction(F).
+ </code>
+ <p>The <c>foo</c> table is replicated on the two nodes <c>N1</c>
+ and <c>N2</c>.
+ <br></br>
+Normal locking requires:
+ </p>
+ <list type="bulleted">
+ <item>one network rpc (2 messages) to acquire the write lock
+ </item>
+ <item>three network messages to execute the two-phase commit protocol.
+ </item>
+ </list>
+ <p>If we use sticky locks, we must first change the code as follows:
+ </p>
+ <code type="none">
+
+ F = fun() ->
+ mnesia:s_write(#foo{a = kalle})
+ end,
+ mnesia:transaction(F).
+ </code>
+ <p>This code uses the <c>s_write/1</c> function instead of the
+ <c>write/1</c> function. The <c>s_write/1</c> function sets a
+ sticky lock instead of a normal lock. If the table is not
+ replicated, sticky locks have no special effect. If the table is
+ replicated, and we set a sticky lock on node <c>N1</c>, this
+ lock will then stick to node <c>N1</c>. The next time we try to
+ set a sticky lock on the same record at node <c>N1</c>, Mnesia
+ will see that the lock is already set and will not do a network
+ operation in order to acquire the lock.
+ </p>
+ <p>It is much more efficient to set a local lock than it is to set
+ a networked lock, and for this reason sticky locks can benefit
+ application that use a replicated table and perform most of the
+ work on only one of the nodes.
+ </p>
+ <p>If a record is stuck at node <c>N1</c> and we try to set a
+ sticky lock for the record on node <c>N2</c>, the record must be
+ unstuck. This operation is expensive and will reduce performance. The unsticking is
+ done automatically if we issue <c>s_write/1</c> requests at
+ <c>N2</c>.
+ </p>
+ </section>
+
+ <section>
+ <title>Table Locks</title>
+ <p>Mnesia supports read and write locks on whole tables as a
+ complement to the normal locks on single records. As previously
+ stated, Mnesia sets and releases locks automatically, and the
+ programmer does not have to code these operations. However,
+ transactions which read and write a large number of records in a
+ specific table will execute more efficiently if we start the
+ transaction by setting a table lock on this table. This will
+ block other concurrent transactions from the table. The
+ following two function are used to set explicit table locks for
+ read and write operations:
+ </p>
+ <list type="bulleted">
+ <item><c>mnesia:read_lock_table(Tab)</c> Sets a read lock on
+ the table <c>Tab</c></item>
+ <item><c>mnesia:write_lock_table(Tab)</c> Sets a write lock on
+ the table <c>Tab</c></item>
+ </list>
+ <p>Alternate syntax for acquisition of table locks is as follows:
+ </p>
+ <code type="none">
+ mnesia:lock({table, Tab}, read)
+ mnesia:lock({table, Tab}, write)
+ </code>
+ <p>The matching operations in Mnesia may either lock the entire
+ table or just a single record (when the key is bound in the
+ pattern).
+ </p>
+ </section>
+
+ <section>
+ <title>Global Locks</title>
+ <p>Write locks are normally acquired on all nodes where a
+ replica of the table resides (and is active). Read locks are
+ acquired on one node (the local one if a local
+ replica exists).
+ </p>
+ <p>The function <c>mnesia:lock/2</c> is intended to support
+ table locks (as mentioned previously)
+ but also for situations when locks need to be
+ acquired regardless of how tables have been replicated:
+ </p>
+ <code type="none">
+ mnesia:lock({global, GlobalKey, Nodes}, LockKind)
+
+ LockKind ::= read | write | ...
+ </code>
+ <p>The lock is acquired on the LockItem on all Nodes in the
+ nodes list.</p>
+ </section>
+ </section>
+
+ <section>
+ <title>Dirty Operations</title>
+ <p>In many applications, the overhead of processing a transaction
+ may result in a loss of performance. Dirty operation are short
+ cuts which bypass much of the processing and increase the speed
+ of the transaction.
+ </p>
+ <p>Dirty operation are useful in many situations, for example in a datagram routing
+ application where Mnesia stores the routing table, and it is time
+ consuming to start a whole transaction every time a packet is
+ received. For this reason, Mnesia has functions which manipulate
+ tables without using transactions. This alternative
+ to processing is known as a dirty operation. However, it is important to
+ realize the trade-off in avoiding the overhead of transaction
+ processing:
+ </p>
+ <list type="bulleted">
+ <item>The atomicity and the isolation properties of Mnesia are lost.
+ </item>
+ <item>The isolation property is compromised, because other
+ Erlang processes, which use transaction to manipulate the data,
+ do not get the benefit of isolation if we simultaneously use
+ dirty operations to read and write records from the same table.
+ </item>
+ </list>
+ <p>The major advantage of dirty operations is that they execute
+ much faster than equivalent operations that are processed as
+ functional objects within a transaction.
+ </p>
+ <p>Dirty operations
+ are written to disc if they are performed on a table of type
+ <c>disc_copies</c>, or type <c>disc_only_copies</c>. Mnesia also
+ ensures that all replicas of a table are updated if a
+ dirty write operation is performed on a table.
+ </p>
+ <p>A dirty operation will ensure a certain level of consistency.
+ For example, it is not possible for dirty operations to return
+ garbled records. Hence, each individual read or write operation
+ is performed in an atomic manner.
+ </p>
+ <p>All dirty functions execute a call to <c>exit({aborted, Reason})</c> on failure. Even if the following functions are
+ executed inside a transaction no locks will be acquired. The
+ following functions are available:
+ </p>
+ <list type="bulleted">
+ <item><c>mnesia:dirty_read({Tab, Key})</c>. This function reads
+ record(s) from Mnesia.
+ </item>
+ <item><c>mnesia:dirty_write(Record)</c>. This function writes
+ the record <c>Record</c></item>
+ <item><c>mnesia:dirty_delete({Tab, Key})</c>. This function deletes
+ record(s) with the key <c>Key</c>.
+ </item>
+ <item><c>mnesia:dirty_delete_object(Record)</c> This function is
+ the dirty operation alternative to the function
+ <c>delete_object/1</c></item>
+ <item>
+ <p><c>mnesia:dirty_first(Tab)</c>. This function returns the
+ "first" key in the table <c>Tab</c>. </p>
+ <p>Records in <c>set</c> or <c>bag</c> tables are not sorted.
+ However, there is
+ a record order which is not known to the user.
+ This means that it is possible to traverse a table by means of
+ this function in conjunction with the <c>dirty_next/2</c>
+ function.
+ </p>
+ <p>If there are no records at all in the table, this function
+ will return the atom <c>'$end_of_table'</c>. It is not
+ recommended to use this atom as the key for any user
+ records.
+ </p>
+ </item>
+ <item><c>mnesia:dirty_next(Tab, Key)</c>. This function returns
+ the "next" key in the table <c>Tab</c>. This function makes it
+ possible to traverse a table and perform some operation on all
+ records in the table. When the end of the table is reached the
+ special key <c>'$end_of_table'</c> is returned. Otherwise, the
+ function returns a key which can be used to read the actual
+ record.
+ <br></br>
+The behavior is undefined if any process perform a write
+ operation on the table while we traverse the table with the
+ <c>dirty_next/2</c> function. This is because <c>write</c>
+ operations on a Mnesia table may lead to internal reorganizations
+ of the table itself. This is an implementation detail, but remember
+ the dirty functions are low level functions.
+ </item>
+ <item><c>mnesia:dirty_last(Tab)</c> This function works exactly as
+ <c>mnesia:dirty_first/1</c> but returns the last object in
+ Erlang term order for the <c>ordered_set</c> table type. For
+ all other table types, <c>mnesia:dirty_first/1</c> and
+ <c>mnesia:dirty_last/1</c> are synonyms.
+ </item>
+ <item><c>mnesia:dirty_prev(Tab, Key)</c> This function works exactly as
+ <c>mnesia:dirty_next/2</c> but returns the previous object in
+ Erlang term order for the ordered_set table type. For
+ all other table types, <c>mnesia:dirty_next/2</c> and
+ <c>mnesia:dirty_prev/2</c> are synonyms.
+ </item>
+ <item>
+ <p><c>mnesia:dirty_slot(Tab, Slot)</c></p>
+ <p>Returns the list of records that are associated with Slot
+ in a table. It can be used to traverse a table in a manner
+ similar to the <c>dirty_next/2</c> function. A table has a
+ number of slots that range from zero to some unknown upper
+ bound. The function <c>dirty_slot/2</c> returns the special
+ atom <c>'$end_of_table'</c> when the end of the table is
+ reached.
+ <br></br>
+The behavior of this function is undefined if the
+ table is written on while being
+ traversed. <c>mnesia:read_lock_table(Tab)</c> may be used to
+ ensure that no transaction protected writes are performed
+ during the iteration.
+ </p>
+ </item>
+ <item>
+ <p><c>mnesia:dirty_update_counter({Tab,Key}, Val)</c>. </p>
+ <p>Counters are positive integers with a value greater than or
+ equal to zero. Updating a counter will add the <c>Val</c> and
+ the counter where <c>Val</c> is a positive or negative integer.
+ <br></br>
+ There exists no special counter records in
+ Mnesia. However, records on the form of <c>{TabName, Key, Integer}</c> can be used as counters, and can be
+ persistent.
+ </p>
+ <p>It is not possible to have transaction protected updates of
+ counter records.
+ </p>
+ <p>There are two significant differences when using this
+ function instead of reading the record, performing the
+ arithmetic, and writing the record:
+ </p>
+ <list type="ordered">
+ <item>it is much more efficient
+ </item>
+ <item>the <c>dirty_update_counter/2</c> function is
+ performed as an atomic operation although it is not protected by
+ a transaction. Accordingly, no table update is lost if two
+ processes simultaneously execute the
+ <c>dirty_update_counter/2</c> function.
+ </item>
+ </list>
+ </item>
+ <item><c>mnesia:dirty_match_object(Pat)</c>. This function is
+ the dirty equivalent of <c>mnesia:match_object/1</c>.
+ </item>
+ <item><c>mnesia:dirty_select(Tab, Pat)</c>. This function is
+ the dirty equivalent of <c>mnesia:select/2</c>.
+ </item>
+ <item><c>mnesia:dirty_index_match_object(Pat, Pos)</c>. This
+ function is the dirty equivalent of
+ <c>mnesia:index_match_object/2</c>.
+ </item>
+ <item><c>mnesia:dirty_index_read(Tab, SecondaryKey, Pos)</c>. This
+ function is the dirty equivalent of <c>mnesia:index_read/3</c>.
+ </item>
+ <item><c>mnesia:dirty_all_keys(Tab)</c>. This function is the
+ dirty equivalent of <c>mnesia:all_keys/1</c>.
+ </item>
+ </list>
+ </section>
+
+ <section>
+ <marker id="recordnames_tablenames"></marker>
+ <title>Record Names versus Table Names</title>
+ <p>In Mnesia, all records in a table must have the same name. All
+ the records must be instances of the same
+ record type. The record name does however not necessarily be
+ the same as the table name. Even though that it is the case in
+ the most of the examples in this document. If a table is created
+ without the <c>record_name</c> property the code below will
+ ensure all records in the tables have the same name as the table:
+ </p>
+ <code type="none">
+ mnesia:create_table(subscriber, [])
+ </code>
+ <p>However, if the table is is created with an explicit record name
+ as argument, as shown below, it is possible to store subscriber
+ records in both of the tables regardless of the table names:
+ </p>
+ <code type="none">
+ TabDef = [{record_name, subscriber}],
+ mnesia:create_table(my_subscriber, TabDef),
+ mnesia:create_table(your_subscriber, TabDef).
+ </code>
+ <p>In order to access such
+ tables it is not possible to use the simplified access functions
+ as described earlier in the document. For example,
+ writing a subscriber record into a table requires a
+ <c>mnesia:write/3</c>function instead of the simplified functions
+ <c>mnesia:write/1</c> and <c>mnesia:s_write/1</c>:
+ </p>
+ <code type="none">
+ mnesia:write(subscriber, #subscriber{}, write)
+ mnesia:write(my_subscriber, #subscriber{}, sticky_write)
+ mnesia:write(your_subscriber, #subscriber{}, write)
+ </code>
+ <p>The following simplified piece of code illustrates the
+ relationship between the simplified access functions used in
+ most examples and their more flexible counterparts:
+ </p>
+ <code type="none">
+ mnesia:dirty_write(Record) ->
+ Tab = element(1, Record),
+ mnesia:dirty_write(Tab, Record).
+
+ mnesia:dirty_delete({Tab, Key}) ->
+ mnesia:dirty_delete(Tab, Key).
+
+ mnesia:dirty_delete_object(Record) ->
+ Tab = element(1, Record),
+ mnesia:dirty_delete_object(Tab, Record)
+
+ mnesia:dirty_update_counter({Tab, Key}, Incr) ->
+ mnesia:dirty_update_counter(Tab, Key, Incr).
+
+ mnesia:dirty_read({Tab, Key}) ->
+ Tab = element(1, Record),
+ mnesia:dirty_read(Tab, Key).
+
+ mnesia:dirty_match_object(Pattern) ->
+ Tab = element(1, Pattern),
+ mnesia:dirty_match_object(Tab, Pattern).
+
+ mnesia:dirty_index_match_object(Pattern, Attr)
+ Tab = element(1, Pattern),
+ mnesia:dirty_index_match_object(Tab, Pattern, Attr).
+
+ mnesia:write(Record) ->
+ Tab = element(1, Record),
+ mnesia:write(Tab, Record, write).
+
+ mnesia:s_write(Record) ->
+ Tab = element(1, Record),
+ mnesia:write(Tab, Record, sticky_write).
+
+ mnesia:delete({Tab, Key}) ->
+ mnesia:delete(Tab, Key, write).
+
+ mnesia:s_delete({Tab, Key}) ->
+ mnesia:delete(Tab, Key, sticky_write).
+
+ mnesia:delete_object(Record) ->
+ Tab = element(1, Record),
+ mnesia:delete_object(Tab, Record, write).
+
+ mnesia:s_delete_object(Record) ->
+ Tab = element(1, Record),
+ mnesia:delete_object(Tab, Record. sticky_write).
+
+ mnesia:read({Tab, Key}) ->
+ mnesia:read(Tab, Key, read).
+
+ mnesia:wread({Tab, Key}) ->
+ mnesia:read(Tab, Key, write).
+
+ mnesia:match_object(Pattern) ->
+ Tab = element(1, Pattern),
+ mnesia:match_object(Tab, Pattern, read).
+
+ mnesia:index_match_object(Pattern, Attr) ->
+ Tab = element(1, Pattern),
+ mnesia:index_match_object(Tab, Pattern, Attr, read).
+ </code>
+ </section>
+
+ <section>
+ <title>Activity Concept and Various Access Contexts</title>
+ <p>As previously described, a functional object (Fun) performing
+ table access operations as listed below may be
+ passed on as arguments to the function
+ <c>mnesia:transaction/1,2,3</c>:
+ </p>
+ <list type="bulleted">
+ <item>
+ <p>mnesia:write/3 (write/1, s_write/1)</p>
+ </item>
+ <item>
+ <p>mnesia:delete/3 (delete/1, s_delete/1)</p>
+ </item>
+ <item>
+ <p>mnesia:delete_object/3 (delete_object/1, s_delete_object/1)</p>
+ </item>
+ <item>
+ <p>mnesia:read/3 (read/1, wread/1)</p>
+ </item>
+ <item>
+ <p>mnesia:match_object/2 (match_object/1)</p>
+ </item>
+ <item>
+ <p>mnesia:select/3 (select/2)</p>
+ </item>
+ <item>
+ <p>mnesia:foldl/3 (foldl/4, foldr/3, foldr/4)</p>
+ </item>
+ <item>
+ <p>mnesia:all_keys/1</p>
+ </item>
+ <item>
+ <p>mnesia:index_match_object/4 (index_match_object/2)</p>
+ </item>
+ <item>
+ <p>mnesia:index_read/3</p>
+ </item>
+ <item>
+ <p>mnesia:lock/2 (read_lock_table/1, write_lock_table/1)</p>
+ </item>
+ <item>
+ <p>mnesia:table_info/2</p>
+ </item>
+ </list>
+ <p>These functions will be performed in a
+ transaction context involving mechanisms like locking, logging,
+ replication, checkpoints, subscriptions, commit protocols
+ etc.However, the same function may also be
+ evaluated in other activity contexts.
+ <br></br>
+The following activity access contexts are currently supported:
+ </p>
+ <list type="bulleted">
+ <item>
+ <p>transaction </p>
+ </item>
+ <item>
+ <p>sync_transaction</p>
+ </item>
+ <item>
+ <p>async_dirty</p>
+ </item>
+ <item>
+ <p>sync_dirty</p>
+ </item>
+ <item>
+ <p>ets</p>
+ </item>
+ </list>
+ <p>By passing the same "fun" as argument to the function
+ <c>mnesia:sync_transaction(Fun [, Args])</c> it will be performed
+ in synced transaction context. Synced transactions waits until all
+ active replicas has committed the transaction (to disc) before
+ returning from the mnesia:sync_transaction call. Using
+ sync_transaction is useful for applications that are executing on
+ several nodes and want to be sure that the update is performed on
+ the remote nodes before a remote process is spawned or a message
+ is sent to a remote process, and also when combining transaction
+ writes with dirty_reads. This is also useful in situations where
+ an application performs frequent or voluminous updates which may
+ overload Mnesia on other nodes.
+ </p>
+ <p>By passing the same "fun" as argument to the function
+ <c>mnesia:async_dirty(Fun [, Args])</c> it will be performed in
+ dirty context. The function calls will be mapped to the
+ corresponding dirty functions. This will still involve logging,
+ replication and subscriptions but there will be no locking,
+ local transaction storage or commit protocols involved.
+ Checkpoint retainers will be updated but will be updated
+ "dirty". Thus, they will be updated asynchronously. The
+ functions will wait for the operation to be performed on one
+ node but not the others. If the table resides locally no waiting
+ will occur.
+ </p>
+ <p>By passing the same "fun" as an argument to the function
+ <c>mnesia:sync_dirty(Fun [, Args])</c> it will be performed in
+ almost the same context as <c>mnesia:async_dirty/1,2</c>. The
+ difference is that the operations are performed
+ synchronously. The caller will wait for the updates to be
+ performed on all active replicas. Using sync_dirty is useful for
+ applications that are executing on several nodes and want to be
+ sure that the update is performed on the remote nodes before a remote
+ process is spawned or a message is sent to a remote process. This
+ is also useful in situations where an application performs frequent or
+ voluminous updates which may overload Mnesia on other
+ nodes.
+ </p>
+ <p>You can check if your code is executed within a transaction with
+ <c>mnesia:is_transaction/0</c>, it returns <c>true</c> when called
+ inside a transaction context and false otherwise.</p>
+
+ <p>Mnesia tables with storage type RAM_copies and disc_copies
+ are implemented internally as "ets-tables" and
+ it is possible for applications to access the these tables
+ directly. This is only recommended if all options have been weighed
+ and the possible outcomes are understood. By passing the earlier
+ mentioned "fun" to the function
+ <c>mnesia:ets(Fun [, Args])</c> it will be performed but in a very raw
+ context. The operations will be performed directly on the
+ local ets tables assuming that the local storage type are
+ RAM_copies and that the table is not replicated on other
+ nodes. Subscriptions will not be triggered nor
+ checkpoints updated, but this operation is blindingly fast. Disc resident
+ tables should not be updated with the ets-function since the
+ disc will not be updated.
+ </p>
+ <p>The Fun may also be passed as an argument to the function
+ <c>mnesia:activity/2,3,4</c> which enables usage of customized
+ activity access callback modules. It can either be obtained
+ directly by stating the module name as argument or implicitly
+ by usage of the <c>access_module</c> configuration parameter. A
+ customized callback module may be used for several purposes,
+ such as providing triggers, integrity constraints, run time
+ statistics, or virtual tables.
+ <br></br>
+ The callback module does
+ not have to access real Mnesia tables, it is free to do whatever
+ it likes as long as the callback interface is fulfilled.
+ <br></br>
+ In Appendix C "The Activity Access Call Back Interface" the source
+ code for one alternate implementation is provided
+ (mnesia_frag.erl). The context sensitive function
+ <c>mnesia:table_info/2</c> may be used to provide virtual
+ information about a table. One usage of this is to perform
+ <c>QLC</c> queries within an activity context with a
+ customized callback module. By providing table information about
+ table indices and other <c>QLC</c> requirements,
+ <c>QLC</c> may be used as a generic query language to
+ access virtual tables.
+ </p>
+ <p>QLC queries may be performed in all these activity
+ contexts (transaction, sync_transaction, async_dirty, sync_dirty
+ and ets). The ets activity will only work if the table has no
+ indices.
+ </p>
+ <note>
+ <p>The mnesia:dirty_* function always executes with
+ async_dirty semantics regardless of which activity access contexts
+ are invoked. They may even invoke contexts without any
+ enclosing activity access context.</p>
+ </note>
+ </section>
+
+ <section>
+ <title>Nested transactions</title>
+ <p>Transactions may be nested in an arbitrary fashion. A child transaction
+ must run in the same process as its parent. When a child transaction
+ aborts, the caller of the child transaction will get the
+ return value <c>{aborted, Reason}</c> and any work performed
+ by the child will be erased. If a child transaction commits, the
+ records written by the child will be propagated to the parent.
+ </p>
+ <p>No locks are released when child transactions terminate. Locks
+ created by a sequence of nested transactions are kept until
+ the topmost transaction terminates. Furthermore, any updates
+ performed by a nested transaction are only propagated
+ in such a manner so that the parent of the nested transaction
+ sees the updates. No final commitment will be done until
+ the top level transaction is terminated.
+ So, although a nested transaction returns <c>{atomic, Val}</c>,
+ if the enclosing parent transaction is aborted, the entire
+ nested operation is aborted.
+ </p>
+ <p>The ability to have nested transaction with identical semantics
+ as top level transaction makes it easier to write
+ library functions that manipulate mnesia tables.
+ </p>
+ <p>Say for example that we have a function that adds a
+ new subscriber to a telephony system:</p>
+ <pre>
+ add_subscriber(S) ->
+ mnesia:transaction(fun() ->
+ case mnesia:read( ..........
+ </pre>
+ <p>This function needs to be called as a transaction.
+ Now assume that we wish to write a function that
+ both calls the <c>add_subscriber/1</c> function and
+ is in itself protected by the context of a transaction.
+ By simply calling the <c>add_subscriber/1</c> from within
+ another transaction, a nested transaction is created.
+ </p>
+ <p>It is also possible to mix different activity access contexts while nesting,
+ but the dirty ones (async_dirty,sync_dirty and ets) will inherit the transaction
+ semantics if they are called inside a transaction and thus it will grab locks and
+ use two or three phase commit.
+ </p>
+ <pre>
+ add_subscriber(S) ->
+ mnesia:transaction(fun() ->
+ %% Transaction context
+ mnesia:read({some_tab, some_data}),
+ mnesia:sync_dirty(fun() ->
+ %% Still in a transaction context.
+ case mnesia:read( ..) ..end), end).
+ add_subscriber2(S) ->
+ mnesia:sync_dirty(fun() ->
+ %% In dirty context
+ mnesia:read({some_tab, some_data}),
+ mnesia:transaction(fun() ->
+ %% In a transaction context.
+ case mnesia:read( ..) ..end), end).
+ </pre>
+ </section>
+
+ <section>
+ <title>Pattern Matching</title>
+ <marker id="matching"></marker>
+ <p>When it is not possible to use <c>mnesia:read/3</c> Mnesia
+ provides the programmer with several functions for matching
+ records against a pattern. The most useful functions of these are:
+ </p>
+ <code type="none">
+ mnesia:select(Tab, MatchSpecification, LockKind) ->
+ transaction abort | [ObjectList]
+ mnesia:select(Tab, MatchSpecification, NObjects, Lock) ->
+ transaction abort | {[Object],Continuation} | '$end_of_table'
+ mnesia:select(Cont) ->
+ transaction abort | {[Object],Continuation} | '$end_of_table'
+ mnesia:match_object(Tab, Pattern, LockKind) ->
+ transaction abort | RecordList
+ </code>
+ <p>These functions matches a <c>Pattern</c> against all records in
+ table <c>Tab</c>. In a <c>mnesia:select</c> call <c>Pattern</c> is
+ a part of <c>MatchSpecification</c> described below. It is not
+ necessarily performed as an exhaustive search of the entire
+ table. By utilizing indices and bound values in the key of the
+ pattern, the actual work done by the function may be condensed
+ into a few hash lookups. Using <c>ordered_set</c> tables may reduce the
+ search space if the keys are partially bound.
+ </p>
+ <p>The pattern provided to the functions must be a valid record,
+ and the first element of the provided tuple must be the
+ <c>record_name</c> of the table. The special element <c>'_'</c>
+ matches any data structure in Erlang (also known as an Erlang
+ term). The special elements <c><![CDATA['$<number>']]></c> behaves as Erlang
+ variables i.e. matches anything and binds the first occurrence and
+ matches the coming occurrences of that variable against the bound value.
+ </p>
+ <p>Use the function <c>mnesia:table_info(Tab, wild_pattern)</c>
+ to obtain a basic pattern which matches all records in a table
+ or use the default value in record creation.
+ Do not make the pattern hard coded since it will make your code more
+ vulnerable to future changes of the record definition.
+ </p>
+ <code type="none">
+ Wildpattern = mnesia:table_info(employee, wild_pattern),
+ %% Or use
+ Wildpattern = #employee{_ = '_'},
+ </code>
+ <p>For the employee table the wild pattern will look like:</p>
+ <code type="none">
+ {employee, '_', '_', '_', '_', '_',' _'}.
+ </code>
+ <p>In order to constrain the match you must replace some
+ of the <c>'_'</c> elements. The code for matching out
+ all female employees, looks like:
+ </p>
+ <code type="none">
+ Pat = #employee{sex = female, _ = '_'},
+ F = fun() -> mnesia:match_object(Pat) end,
+ Females = mnesia:transaction(F).
+ </code>
+ <p>It is also possible to use the match function if we want to
+ check the equality of different attributes. Assume that we want
+ to find all employees which happens to have a employee number
+ which is equal to their room number:
+ </p>
+ <code type="none">
+ Pat = #employee{emp_no = '$1', room_no = '$1', _ = '_'},
+ F = fun() -> mnesia:match_object(Pat) end,
+ Odd = mnesia:transaction(F).
+ </code>
+ <p>The function <c>mnesia:match_object/3</c> lacks some important
+ features that <c>mnesia:select/3</c> have. For example
+ <c>mnesia:match_object/3</c> can only return the matching records,
+ and it can not express constraints other then equality.
+ If we want to find the names of the male employees on the second floor
+ we could write:
+ </p>
+ <codeinclude file="company.erl" tag="%21" type="erl"></codeinclude>
+ <p>Select can be used to add additional constraints and create
+ output which can not be done with <c>mnesia:match_object/3</c>. </p>
+ <p>The second argument to select is a <c>MatchSpecification</c>.
+ A <c>MatchSpecification</c> is list of <c>MatchFunctions</c>, where
+ each <c>MatchFunction</c> consists of a tuple containing
+ <c>{MatchHead, MatchCondition, MatchBody}</c>. <c>MatchHead</c>
+ is the same pattern used in <c>mnesia:match_object/3</c>
+ described above. <c>MatchCondition</c> is a list of additional
+ constraints applied to each record, and <c>MatchBody</c> is used
+ to construct the return values.
+ </p>
+ <p>A detailed explanation of match specifications can be found in
+ the <em>Erts users guide: Match specifications in Erlang </em>,
+ and the ets/dets documentations may provide some additional
+ information.
+ </p>
+ <p>The functions <c>select/4</c> and <c>select/1</c> are used to
+ get a limited number of results, where the <c>Continuation</c>
+ are used to get the next chunk of results. Mnesia uses the
+ <c>NObjects</c> as an recommendation only, thus more or less
+ results then specified with <c>NObjects</c> may be returned in
+ the result list, even the empty list may be returned despite there
+ are more results to collect.
+ </p>
+ <warning>
+ <p>There is a severe performance penalty in using
+ <c>mnesia:select/[1|2|3|4]</c> after any modifying operations
+ are done on that table in the same transaction, i.e. avoid using
+ <c>mnesia:write/1</c> or <c>mnesia:delete/1</c> before a
+ <c>mnesia:select</c> in the same transaction.</p>
+ </warning>
+ <p>If the key attribute is bound in a pattern, the match operation
+ is very efficient. However, if the key attribute in a pattern is
+ given as <c>'_'</c>, or <c>'$1'</c>, the whole <c>employee</c>
+ table must be searched for records that match. Hence if the table is
+ large, this can become a time consuming operation, but it can be
+ remedied with indices (refer to Chapter 5: <seealso marker="Mnesia_chap5#indexing">Indexing</seealso>) if
+ <c>mnesia:match_object</c> is used.
+ </p>
+ <p>QLC queries can also be used to search Mnesia tables. By
+ using <c>mnesia:table/[1|2]</c> as the generator inside a QLC
+ query you let the query operate on a mnesia table. Mnesia
+ specific options to <c>mnesia:table/2</c> are {lock, Lock},
+ {n_objects,Integer} and {traverse, SelMethod}. The <c>lock</c>
+ option specifies whether mnesia should acquire a read or write
+ lock on the table, and <c>n_objects</c> specifies how many
+ results should be returned in each chunk to QLC. The last option is
+ <c>traverse</c> and it specifies which function mnesia should
+ use to traverse the table. Default <c>select</c> is used, but by using
+ <c>{traverse, {select, MatchSpecification}}</c> as an option to
+ <c>mnesia:table/2</c> the user can specify it's own view of the
+ table.
+ </p>
+ <p>If no options are specified a read lock will acquired and 100
+ results will be returned in each chunk, and select will be used
+ to traverse the table, i.e.:
+ </p>
+ <code type="none">
+ mnesia:table(Tab) ->
+ mnesia:table(Tab, [{n_objects,100},{lock, read}, {traverse, select}]).
+ </code>
+ <p>The function <c>mnesia:all_keys(Tab)</c> returns all keys in a
+ table.</p>
+ </section>
+
+ <section>
+ <title>Iteration</title>
+ <marker id="iteration"></marker>
+ <p>Mnesia provides a couple of functions which iterates over all
+ the records in a table.
+ </p>
+ <code type="none">
+ mnesia:foldl(Fun, Acc0, Tab) -> NewAcc | transaction abort
+ mnesia:foldr(Fun, Acc0, Tab) -> NewAcc | transaction abort
+ mnesia:foldl(Fun, Acc0, Tab, LockType) -> NewAcc | transaction abort
+ mnesia:foldr(Fun, Acc0, Tab, LockType) -> NewAcc | transaction abort
+ </code>
+ <p>These functions iterate over the mnesia table <c>Tab</c> and
+ apply the function <c>Fun</c> to each record. The <c>Fun</c>
+ takes two arguments, the first argument is a record from the
+ table and the second argument is the accumulator. The
+ <c>Fun</c> return a new accumulator. </p>
+ <p>The first time the <c>Fun</c> is applied <c>Acc0</c> will
+ be the second argument. The next time the <c>Fun</c> is called
+ the return value from the previous call, will be used as the
+ second argument. The term the last call to the Fun returns
+ will be the return value of the <c>fold[lr]</c> function.
+ </p>
+ <p>The difference between <c>foldl</c> and <c>foldr</c> is the
+ order the table is accessed for <c>ordered_set</c> tables,
+ for every other table type the functions are equivalent.
+ </p>
+ <p><c>LockType</c> specifies what type of lock that shall be
+ acquired for the iteration, default is <c>read</c>. If
+ records are written or deleted during the iteration a write
+ lock should be acquired. </p>
+ <p>These functions might be used to find records in a table
+ when it is impossible to write constraints for
+ <c>mnesia:match_object/3</c>, or when you want to perform
+ some action on certain records.
+ </p>
+ <p>For example finding all the employees who has a salary
+ below 10 could look like:</p>
+ <code type="none"><![CDATA[
+ find_low_salaries() ->
+ Constraint =
+ fun(Emp, Acc) when Emp#employee.salary < 10 ->
+ [Emp | Acc];
+ (_, Acc) ->
+ Acc
+ end,
+ Find = fun() -> mnesia:foldl(Constraint, [], employee) end,
+ mnesia:transaction(Find).
+ ]]></code>
+ <p>Raising the salary to 10 for everyone with a salary below 10
+ and return the sum of all raises:</p>
+ <code type="none"><![CDATA[
+ increase_low_salaries() ->
+ Increase =
+ fun(Emp, Acc) when Emp#employee.salary < 10 ->
+ OldS = Emp#employee.salary,
+ ok = mnesia:write(Emp#employee{salary = 10}),
+ Acc + 10 - OldS;
+ (_, Acc) ->
+ Acc
+ end,
+ IncLow = fun() -> mnesia:foldl(Increase, 0, employee, write) end,
+ mnesia:transaction(IncLow).
+ ]]></code>
+ <p>A lot of nice things can be done with the iterator functions
+ but some caution should be taken about performance and memory
+ utilization for large tables. </p>
+ <p>Call these iteration functions on nodes that contain a replica of the
+ table. Each call to the function <c>Fun</c> access the table and if the table
+ resides on another node it will generate a lot of unnecessary
+ network traffic. </p>
+ <p>Mnesia also provides some functions that make it possible for
+ the user to iterate over the table. The order of the
+ iteration is unspecified if the table is not of the <c>ordered_set</c>
+ type. </p>
+ <code type="none">
+ mnesia:first(Tab) -> Key | transaction abort
+ mnesia:last(Tab) -> Key | transaction abort
+ mnesia:next(Tab,Key) -> Key | transaction abort
+ mnesia:prev(Tab,Key) -> Key | transaction abort
+ mnesia:snmp_get_next_index(Tab,Index) -> {ok, NextIndex} | endOfTable
+ </code>
+ <p>The order of first/last and next/prev are only valid for
+ <c>ordered_set</c> tables, for all other tables, they are synonyms.
+ When the end of the table is reached the special key
+ <c>'$end_of_table'</c> is returned.</p>
+ <p>If records are written and deleted during the traversal, use
+ <c>mnesia:fold[lr]/4</c> with a <c>write</c> lock. Or
+ <c>mnesia:write_lock_table/1</c> when using first and next.</p>
+ <p>Writing or deleting in transaction context creates a local copy
+ of each modified record, so modifying each record in a large
+ table uses a lot of memory. Mnesia will compensate for every
+ written or deleted record during the iteration in a transaction
+ context, which may reduce the performance. If possible avoid writing
+ or deleting records in the same transaction before iterating over the
+ table.</p>
+ <p>In dirty context, i.e. <c>sync_dirty</c> or <c>async_dirty</c>,
+ the modified records are not stored in a local copy; instead,
+ each record is updated separately. This generates a lot of
+ network traffic if the table has a replica on another node and
+ has all the other drawbacks that dirty operations
+ have. Especially for the <c>mnesia:first/1</c> and
+ <c>mnesia:next/2</c> commands, the same drawbacks as described
+ above for <c>dirty_first</c> and <c>dirty_next</c> applies, i.e.
+ no writes to the table should be done during iteration.</p>
+ <p></p>
+ </section>
+</chapter>
+
diff --git a/lib/mnesia/doc/src/Mnesia_chap5.xmlsrc b/lib/mnesia/doc/src/Mnesia_chap5.xmlsrc
new file mode 100644
index 0000000000..3ec0aa37f5
--- /dev/null
+++ b/lib/mnesia/doc/src/Mnesia_chap5.xmlsrc
@@ -0,0 +1,1398 @@
+<?xml version="1.0" encoding="latin1" ?>
+<!DOCTYPE chapter SYSTEM "chapter.dtd">
+
+<chapter>
+ <header>
+ <copyright>
+ <year>1997</year><year>2009</year>
+ <holder>Ericsson AB. All Rights Reserved.</holder>
+ </copyright>
+ <legalnotice>
+ The contents of this file are subject to the Erlang Public License,
+ Version 1.1, (the "License"); you may not use this file except in
+ compliance with the License. You should have received a copy of the
+ Erlang Public License along with this software. If not, it can be
+ retrieved online at http://www.erlang.org/.
+
+ Software distributed under the License is distributed on an "AS IS"
+ basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+ the License for the specific language governing rights and limitations
+ under the License.
+
+ </legalnotice>
+
+ <title>Miscellaneous Mnesia Features</title>
+ <prepared>Claes Wikstr&ouml;m, Hans Nilsson and H&aring;kan Mattsson</prepared>
+ <responsible></responsible>
+ <docno></docno>
+ <approved></approved>
+ <checked></checked>
+ <date></date>
+ <rev></rev>
+ <file>Mnesia_chap5.xml</file>
+ </header>
+ <p>The earlier chapters of this User Guide described how to get
+ started with Mnesia, and how to build a Mnesia database. In this
+ chapter, we will describe the more advanced features available
+ when building a distributed, fault tolerant Mnesia database. This
+ chapter contains the following sections:
+ </p>
+ <list type="bulleted">
+ <item>Indexing
+ </item>
+ <item>Distribution and Fault Tolerance
+ </item>
+ <item>Table fragmentation.
+ </item>
+ <item>Local content tables.
+ </item>
+ <item>Disc-less nodes.
+ </item>
+ <item>More about schema management
+ </item>
+ <item>Debugging a Mnesia application
+ </item>
+ <item>Concurrent Processes in Mnesia
+ </item>
+ <item>Prototyping
+ </item>
+ <item>Object Based Programming with Mnesia.
+ </item>
+ </list>
+
+ <section>
+ <marker id="indexing"></marker>
+ <title>Indexing</title>
+ <p>Data retrieval and matching can be performed very efficiently
+ if we know the key for the record. Conversely, if the key is not
+ known, all records in a table must be searched. The larger the
+ table the more time consuming it will become. To remedy this
+ problem Mnesia's indexing capabilities are used to improve data
+ retrieval and matching of records.
+ </p>
+ <p>The following two functions manipulate indexes on existing tables:
+ </p>
+ <list type="bulleted">
+ <item><c>mnesia:add_table_index(Tab, AttributeName) -> {aborted, R} |{atomic, ok}</c></item>
+ <item><c>mnesia:del_table_index(Tab, AttributeName) -> {aborted, R} |{atomic, ok}</c></item>
+ </list>
+ <p>These functions create or delete a table index on field
+ defined by <c>AttributeName</c>. To illustrate this, add an
+ index to the table definition <c>(employee, {emp_no, name, salary, sex, phone, room_no}</c>, which is the example table
+ from the Company database. The function
+ which adds an index on the element <c>salary</c> can be expressed in
+ the following way:
+ </p>
+ <list type="ordered">
+ <item><c>mnesia:add_table_index(employee, salary)</c></item>
+ </list>
+ <p>The indexing capabilities of Mnesia are utilized with the
+ following three functions, which retrieve and match records on the
+ basis of index entries in the database.
+ </p>
+ <list type="bulleted">
+ <item><c>mnesia:index_read(Tab, SecondaryKey, AttributeName) -> transaction abort | RecordList</c>.
+ Avoids an exhaustive search of the entire table, by looking up
+ the <c>SecondaryKey</c> in the index to find the primary keys.
+ </item>
+ <item><c>mnesia:index_match_object(Pattern, AttributeName) -> transaction abort | RecordList</c>
+ Avoids an exhaustive search of the entire table, by looking up
+ the secondary key in the index to find the primary keys.
+ The secondary key is found in the <c>AttributeName</c> field of
+ the <c>Pattern</c>. The secondary key must be bound.
+ </item>
+ <item><c>mnesia:match_object(Pattern) -> transaction abort | RecordList</c>
+ Uses indices to avoid exhaustive search of the entire table.
+ Unlike the other functions above, this function may utilize
+ any index as long as the secondary key is bound.</item>
+ </list>
+ <p>These functions are further described and exemplified in
+ Chapter 4: <seealso marker="Mnesia_chap4#matching">Pattern matching</seealso>.
+ </p>
+ </section>
+
+ <section>
+ <title>Distribution and Fault Tolerance</title>
+ <p>Mnesia is a distributed, fault tolerant DBMS. It is possible
+ to replicate tables on different Erlang nodes in a variety of
+ ways. The Mnesia programmer does not have to state
+ where the different tables reside, only the names of the
+ different tables are specified in the program code. This is
+ known as "location transparency" and it is an important
+ concept. In particular:
+ </p>
+ <list type="bulleted">
+ <item>A program will work regardless of the
+ location of the data. It makes no difference whether the data
+ resides on the local node, or on a remote node. <em>Note:</em> The program
+ will run slower if the data is located on a remote node.
+ </item>
+ <item>The database can be reconfigured, and tables can be
+ moved between nodes. These operations do not effect the user
+ programs.
+ </item>
+ </list>
+ <p>We have previously seen that each table has a number of
+ system attributes, such as <c>index</c> and
+ <c>type</c>.
+ </p>
+ <p>Table attributes are specified when the table is created. For
+ example, the following function will create a new table with two
+ RAM replicas:
+ </p>
+ <pre>
+ mnesia:create_table(foo,
+ [{ram_copies, [N1, N2]},
+ {attributes, record_info(fields, foo)}]).
+ </pre>
+ <p>Tables can also have the following properties,
+ where each attribute has a list of Erlang nodes as its value.
+ </p>
+ <list type="bulleted">
+ <item>
+ <p><c>ram_copies</c>. The value of the node list is a list of
+ Erlang nodes, and a RAM replica of the table will reside on
+ each node in the list. This is a RAM replica, and it is
+ important to realize that no disc operations are performed when
+ a program executes write operations to these replicas. However,
+ should permanent RAM replicas be a requirement, then the
+ following alternatives are available:</p>
+ <list type="ordered">
+ <item>The <c>mnesia:dump_tables/1</c> function can be used
+ to dump RAM table replicas to disc.
+ </item>
+ <item>The table replicas can be backed up; either from
+ RAM, or from disc if dumped there with the above
+ function.
+ </item>
+ </list>
+ </item>
+ <item><c>disc_copies</c>. The value of the attribute is a list
+ of Erlang nodes, and a replica of the table will reside both
+ in RAM and on disc on each node in the list. Write operations
+ addressed to the table will address both the RAM and the disc
+ copy of the table.
+ </item>
+ <item><c>disc_only_copies</c>. The value of the attribute is a
+ list of Erlang nodes, and a replica of the table will reside
+ only as a disc copy on each node in the list. The major
+ disadvantage of this type of table replica is the access
+ speed. The major advantage is that the table does not occupy
+ space in memory.
+ </item>
+ </list>
+ <p>It is also possible to set and change table properties on
+ existing tables. Refer to Chapter 3: <seealso marker="Mnesia_chap3#def_schema">Defining the Schema</seealso> for full
+ details.
+ </p>
+ <p>There are basically two reasons for using more than one table
+ replica: fault tolerance, or speed. It is worthwhile to note
+ that table replication provides a solution to both of these
+ system requirements.
+ </p>
+ <p>If we have two active table replicas, all information is
+ still available if one of the replicas fail. This can be a very
+ important property in many applications. Furthermore, if a table
+ replica exists at two specific nodes, applications which execute
+ at either of these nodes can read data from the table without
+ accessing the network. Network operations are considerably
+ slower and consume more resources than local operations.
+ </p>
+ <p>It can be advantageous to create table replicas for a
+ distributed application which reads data often, but writes data
+ seldom, in order to achieve fast read operations on the local
+ node. The major disadvantage with replication is the increased
+ time to write data. If a table has two replicas, every write
+ operation must access both table replicas. Since one of these
+ write operations must be a network operation, it is considerably
+ more expensive to perform a write operation to a replicated
+ table than to a non-replicated table.
+ </p>
+ </section>
+
+ <section>
+ <title>Table Fragmentation</title>
+
+ <section>
+ <title>The Concept</title>
+ <p>A concept of table fragmentation has been introduced in
+ order to cope with very large tables. The idea is to split a
+ table into several more manageable fragments. Each fragment
+ is implemented as a first class Mnesia table and may be
+ replicated, have indices etc. as any other table. But the
+ tables may neither have <c>local_content</c> nor have the
+ <c>snmp</c> connection activated.
+ </p>
+ <p>In order to be able to access a record in a fragmented
+ table, Mnesia must determine to which fragment the
+ actual record belongs. This is done by the
+ <c>mnesia_frag</c> module, which implements the
+ <c>mnesia_access</c> callback behaviour. Please, read the
+ documentation about <c>mnesia:activity/4</c> to see how
+ <c>mnesia_frag</c> can be used as a <c>mnesia_access</c>
+ callback module.
+ </p>
+ <p>At each record access <c>mnesia_frag</c> first computes
+ a hash value from the record key. Secondly the name of the
+ table fragment is determined from the hash value. And
+ finally the actual table access is performed by the same
+ functions as for non-fragmented tables. When the key is
+ not known beforehand, all fragments are searched for
+ matching records. Note: In <c>ordered_set</c> tables
+ the records will be ordered per fragment, and the
+ the order is undefined in results returned by select and
+ match_object.
+ </p>
+ <p>The following piece of code illustrates
+ how an existing Mnesia table is converted to be a
+ fragmented table and how more fragments are added later on.
+ </p>
+ <code type="none"><![CDATA[
+Eshell V4.7.3.3 (abort with ^G)
+(a@sam)1> mnesia:start().
+ok
+(a@sam)2> mnesia:system_info(running_db_nodes).
+[b@sam,c@sam,a@sam]
+(a@sam)3> Tab = dictionary.
+dictionary
+(a@sam)4> mnesia:create_table(Tab, [{ram_copies, [a@sam, b@sam]}]).
+{atomic,ok}
+(a@sam)5> Write = fun(Keys) -> [mnesia:write({Tab,K,-K}) || K <- Keys], ok end.
+#Fun<erl_eval>
+(a@sam)6> mnesia:activity(sync_dirty, Write, [lists:seq(1, 256)], mnesia_frag).
+ok
+(a@sam)7> mnesia:change_table_frag(Tab, {activate, []}).
+{atomic,ok}
+(a@sam)8> mnesia:table_info(Tab, frag_properties).
+[{base_table,dictionary},
+ {foreign_key,undefined},
+ {n_doubles,0},
+ {n_fragments,1},
+ {next_n_to_split,1},
+ {node_pool,[a@sam,b@sam,c@sam]}]
+(a@sam)9> Info = fun(Item) -> mnesia:table_info(Tab, Item) end.
+#Fun<erl_eval>
+(a@sam)10> Dist = mnesia:activity(sync_dirty, Info, [frag_dist], mnesia_frag).
+[{c@sam,0},{a@sam,1},{b@sam,1}]
+(a@sam)11> mnesia:change_table_frag(Tab, {add_frag, Dist}).
+{atomic,ok}
+(a@sam)12> Dist2 = mnesia:activity(sync_dirty, Info, [frag_dist], mnesia_frag).
+[{b@sam,1},{c@sam,1},{a@sam,2}]
+(a@sam)13> mnesia:change_table_frag(Tab, {add_frag, Dist2}).
+{atomic,ok}
+(a@sam)14> Dist3 = mnesia:activity(sync_dirty, Info, [frag_dist], mnesia_frag).
+[{a@sam,2},{b@sam,2},{c@sam,2}]
+(a@sam)15> mnesia:change_table_frag(Tab, {add_frag, Dist3}).
+{atomic,ok}
+(a@sam)16> Read = fun(Key) -> mnesia:read({Tab, Key}) end.
+#Fun<erl_eval>
+(a@sam)17> mnesia:activity(transaction, Read, [12], mnesia_frag).
+[{dictionary,12,-12}]
+(a@sam)18> mnesia:activity(sync_dirty, Info, [frag_size], mnesia_frag).
+[{dictionary,64},
+ {dictionary_frag2,64},
+ {dictionary_frag3,64},
+ {dictionary_frag4,64}]
+(a@sam)19>
+ ]]></code>
+ </section>
+
+ <section>
+ <title>Fragmentation Properties</title>
+ <p>There is a table property called
+ <c>frag_properties</c> and may be read with
+ <c>mnesia:table_info(Tab, frag_properties)</c>. The
+ fragmentation properties is a list of tagged tuples with
+ the arity 2. By default the list is empty, but when it is
+ non-empty it triggers Mnesia to regard the table as
+ fragmented. The fragmentation properties are:
+ </p>
+ <taglist>
+ <tag><c>{n_fragments, Int}</c></tag>
+ <item>
+ <p><c>n_fragments</c> regulates how many fragments
+ that the table currently has. This property may explicitly
+ be set at table creation and later be changed with
+ <c>{add_frag, NodesOrDist}</c> or
+ <c>del_frag</c>. <c>n_fragment</c>s defaults to <c>1</c>.
+ </p>
+ </item>
+ <tag><c>{node_pool, List}</c></tag>
+ <item>
+ <p>The node pool contains a list of nodes and may
+ explicitly be set at table creation and later be changed
+ with <c>{add_node, Node}</c> or <c>{del_node, Node}</c>. At table creation Mnesia tries to distribute
+ the replicas of each fragment evenly over all the nodes in
+ the node pool. Hopefully all nodes will end up with the
+ same number of replicas. <c>node_pool</c> defaults to the
+ return value from <c>mnesia:system_info(db_nodes)</c>.
+ </p>
+ </item>
+ <tag><c>{n_ram_copies, Int}</c></tag>
+ <item>
+ <p>Regulates how many <c>ram_copies</c> replicas
+ that each fragment should have. This property may
+ explicitly be set at table creation. The default is
+ <c>0</c>, but if <c>n_disc_copies</c> and
+ <c>n_disc_only_copies</c> also are <c>0</c>,
+ <c>n_ram_copies</c>\011will default be set to <c>1</c>.
+ </p>
+ </item>
+ <tag><c>{n_disc_copies, Int}</c></tag>
+ <item>
+ <p>Regulates how many <c>disc_copies</c> replicas
+ that each fragment should have. This property may
+ explicitly be set at table creation. The default is <c>0</c>.
+ </p>
+ </item>
+ <tag><c>{n_disc_only_copies, Int}</c></tag>
+ <item>
+ <p>Regulates how many <c>disc_only_copies</c> replicas
+ that each fragment should have. This property may
+ explicitly be set at table creation. The default is <c>0</c>.
+ </p>
+ </item>
+ <tag><c>{foreign_key, ForeignKey}</c></tag>
+ <item>
+ <p><c>ForeignKey</c> may either be the atom
+ <c>undefined</c> or the tuple <c>{ForeignTab, Attr}</c>,
+ where <c>Attr</c> denotes an attribute which should be
+ interpreted as a key in another fragmented table named
+ <c>ForeignTab</c>. Mnesia will ensure that the number of
+ fragments in this table and in the foreign table are
+ always the same. When fragments are added or deleted
+ Mnesia will automatically propagate the operation to all
+ fragmented tables that has a foreign key referring to this
+ table. Instead of using the record key to determine which
+ fragment to access, the value of the <c>Attr</c> field is
+ used. This feature makes it possible to automatically
+ co-locate records in different tables to the same
+ node. <c>foreign_key</c> defaults to
+ <c>undefined</c>. However if the foreign key is set to
+ something else it will cause the default values of the
+ other fragmentation properties to be the same values as
+ the actual fragmentation properties of the foreign table.
+ </p>
+ </item>
+ <tag><c>{hash_module, Atom}</c></tag>
+ <item>
+ <p>Enables definition of an alternate hashing scheme.
+ The module must implement the <c>mnesia_frag_hash</c>
+ callback behaviour (see the reference manual). This
+ property may explicitly be set at table creation.
+ The default is <c>mnesia_frag_hash</c>.</p>
+ <p>Older tables that was created before the concept of
+ user defined hash modules was introduced, uses
+ the <c>mnesia_frag_old_hash</c> module in order to
+ be backwards compatible. The <c>mnesia_frag_old_hash</c>
+ is still using the poor deprecated <c>erlang:hash/1</c>
+ function.
+ </p>
+ </item>
+ <tag><c>{hash_state, Term}</c></tag>
+ <item>
+ <p>Enables a table specific parameterization
+ of a generic hash module. This property may explicitly
+ be set at table creation.
+ The default is <c>undefined</c>.</p>
+ <code type="none"><![CDATA[
+Eshell V4.7.3.3 (abort with ^G)
+(a@sam)1> mnesia:start().
+ok
+(a@sam)2> PrimProps = [{n_fragments, 7}, {node_pool, [node()]}].
+[{n_fragments,7},{node_pool,[a@sam]}]
+(a@sam)3> mnesia:create_table(prim_dict,
+ [{frag_properties, PrimProps},
+ {attributes,[prim_key,prim_val]}]).
+{atomic,ok}
+(a@sam)4> SecProps = [{foreign_key, {prim_dict, sec_val}}].
+[{foreign_key,{prim_dict,sec_val}}]
+(a@sam)5> mnesia:create_table(sec_dict,
+\011 [{frag_properties, SecProps},
+(a@sam)5> {attributes, [sec_key, sec_val]}]).
+{atomic,ok}
+(a@sam)6> Write = fun(Rec) -> mnesia:write(Rec) end.
+#Fun<erl_eval>
+(a@sam)7> PrimKey = 11.
+11
+(a@sam)8> SecKey = 42.
+42
+(a@sam)9> mnesia:activity(sync_dirty, Write,
+\011\011 [{prim_dict, PrimKey, -11}], mnesia_frag).
+ok
+(a@sam)10> mnesia:activity(sync_dirty, Write,
+\011\011 [{sec_dict, SecKey, PrimKey}], mnesia_frag).
+ok
+(a@sam)11> mnesia:change_table_frag(prim_dict, {add_frag, [node()]}).
+{atomic,ok}
+(a@sam)12> SecRead = fun(PrimKey, SecKey) ->
+\011\011 mnesia:read({sec_dict, PrimKey}, SecKey, read) end.
+#Fun<erl_eval>
+(a@sam)13> mnesia:activity(transaction, SecRead,
+\011\011 [PrimKey, SecKey], mnesia_frag).
+[{sec_dict,42,11}]
+(a@sam)14> Info = fun(Tab, Item) -> mnesia:table_info(Tab, Item) end.
+#Fun<erl_eval>
+(a@sam)15> mnesia:activity(sync_dirty, Info,
+\011\011 [prim_dict, frag_size], mnesia_frag).
+[{prim_dict,0},
+ {prim_dict_frag2,0},
+ {prim_dict_frag3,0},
+ {prim_dict_frag4,1},
+ {prim_dict_frag5,0},
+ {prim_dict_frag6,0},
+ {prim_dict_frag7,0},
+ {prim_dict_frag8,0}]
+(a@sam)16> mnesia:activity(sync_dirty, Info,
+\011\011 [sec_dict, frag_size], mnesia_frag).
+[{sec_dict,0},
+ {sec_dict_frag2,0},
+ {sec_dict_frag3,0},
+ {sec_dict_frag4,1},
+ {sec_dict_frag5,0},
+ {sec_dict_frag6,0},
+ {sec_dict_frag7,0},
+ {sec_dict_frag8,0}]
+(a@sam)17>
+ ]]></code>
+ </item>
+ </taglist>
+ </section>
+
+ <section>
+ <title>Management of Fragmented Tables</title>
+ <p>The function <c>mnesia:change_table_frag(Tab, Change)</c>
+ is intended to be used for reconfiguration of fragmented
+ tables. The <c>Change</c> argument should have one of the
+ following values:
+ </p>
+ <taglist>
+ <tag><c>{activate, FragProps}</c></tag>
+ <item>
+ <p>Activates the fragmentation properties of an
+ existing table. <c>FragProps</c> should either contain
+ <c>{node_pool, Nodes}</c> or be empty.
+ </p>
+ </item>
+ <tag><c>deactivate</c></tag>
+ <item>
+ <p>Deactivates the fragmentation properties of a
+ table. The number of fragments must be <c>1</c>. No other
+ tables may refer to this table in its foreign key.
+ </p>
+ </item>
+ <tag><c>{add_frag, NodesOrDist}</c></tag>
+ <item>
+ <p>Adds one new fragment to a fragmented table. All
+ records in one of the old fragments will be rehashed and
+ about half of them will be moved to the new (last)
+ fragment. All other fragmented tables, which refers to this
+ table in their foreign key, will automatically get a new
+ fragment, and their records will also be dynamically
+ rehashed in the same manner as for the main table.
+ </p>
+ <p>The <c>NodesOrDist</c> argument may either be a list
+ of nodes or the result from <c>mnesia:table_info(Tab, frag_dist)</c>. The <c>NodesOrDist</c> argument is
+ assumed to be a sorted list with the best nodes to
+ host new replicas first in the list. The new fragment
+ will get the same number of replicas as the first
+ fragment (see <c>n_ram_copies</c>, <c>n_disc_copies</c>
+ and <c>n_disc_only_copies</c>). The <c>NodesOrDist</c>
+ list must at least contain one element for each
+ replica that needs to be allocated.
+ </p>
+ </item>
+ <tag><c>del_frag</c></tag>
+ <item>
+ <p>Deletes one fragment from a fragmented table. All
+ records in the last fragment will be moved to one of the other
+ fragments. All other fragmented tables which refers to
+ this table in their foreign key, will automatically lose
+ their last fragment and their records will also be
+ dynamically rehashed in the same manner as for the main
+ table.
+ </p>
+ </item>
+ <tag><c>{add_node, Node}</c></tag>
+ <item>
+ <p>Adds a new node to the <c>node_pool</c>. The new
+ node pool will affect the list returned from
+ <c>mnesia:table_info(Tab, frag_dist)</c>.
+ </p>
+ </item>
+ <tag><c>{del_node, Node}</c></tag>
+ <item>
+ <p>Deletes a new node from the <c>node_pool</c>. The
+ new node pool will affect the list returned from
+ <c>mnesia:table_info(Tab, frag_dist)</c>.</p>
+ </item>
+ </taglist>
+ </section>
+
+ <section>
+ <title>Extensions of Existing Functions</title>
+ <p>The function <c>mnesia:create_table/2</c> is used to
+ create a brand new fragmented table, by setting the table
+ property <c>frag_properties</c> to some proper values.
+ </p>
+ <p>The function <c>mnesia:delete_table/1</c> is used to
+ delete a fragmented table including all its
+ fragments. There must however not exist any other
+ fragmented tables which refers to this table in their foreign key.
+ </p>
+ <p>The function <c>mnesia:table_info/2</c> now understands
+ the <c>frag_properties</c> item.
+ </p>
+ <p>If the function <c>mnesia:table_info/2</c> is invoked in
+ the activity context of the <c>mnesia_frag</c> module,
+ information of several new items may be obtained:
+ </p>
+ <taglist>
+ <tag><c>base_table</c></tag>
+ <item>
+ <p>the name of the fragmented table
+ </p>
+ </item>
+ <tag><c>n_fragments</c></tag>
+ <item>
+ <p>the actual number of fragments
+ </p>
+ </item>
+ <tag><c>node_pool</c></tag>
+ <item>
+ <p>the pool of nodes
+ </p>
+ </item>
+ <tag><c>n_ram_copies</c></tag>
+ <item></item>
+ <tag><c>n_disc_copies</c></tag>
+ <item></item>
+ <tag><c>n_disc_only_copies</c></tag>
+ <item>
+ <p>the number of replicas with storage type
+ <c>ram_copies</c>, <c>disc_copies</c> and <c>disc_only_copies</c>
+ respectively. The actual values are dynamically derived
+ from the first fragment. The first fragment serves as a
+ pro-type and when the actual values needs to be computed
+ (e.g. when adding new fragments) they are simply
+ determined by counting the number of each replicas for
+ each storage type. This means, when the functions
+ <c>mnesia:add_table_copy/3</c>,
+ <c>mnesia:del_table_copy/2</c> and<c>mnesia:change_table_copy_type/2</c> are applied on the
+ first fragment, it will affect the settings on
+ <c>n_ram_copies</c>, <c>n_disc_copies</c>, and
+ <c>n_disc_only_copies</c>.
+ </p>
+ </item>
+ <tag><c>foreign_key</c></tag>
+ <item>
+ <p>the foreign key.
+ </p>
+ </item>
+ <tag><c>foreigners</c></tag>
+ <item>
+ <p>all other tables that refers to this table in
+ their foreign key.
+ </p>
+ </item>
+ <tag><c>frag_names</c></tag>
+ <item>
+ <p>the names of all fragments.
+ </p>
+ </item>
+ <tag><c>frag_dist</c></tag>
+ <item>
+ <p>a sorted list of <c>{Node, Count}</c> tuples
+ which is sorted in increasing <c>Count</c> order. The
+ <c>Count</c> is the total number of replicas that this
+ fragmented table hosts on each <c>Node</c>. The list
+ always contains at least all nodes in the
+ <c>node_pool</c>. The nodes which not belongs to the
+ <c>node_pool</c> will be put last in the list even if
+ their <c>Count</c> is lower.
+ </p>
+ </item>
+ <tag><c>frag_size</c></tag>
+ <item>
+ <p>a list of <c>{Name, Size}</c> tuples where
+ <c>Name</c> is a fragment <c>Name</c> and <c>Size</c> is
+ how many records it contains.
+ </p>
+ </item>
+ <tag><c>frag_memory</c></tag>
+ <item>
+ <p>a list of <c>{Name, Memory}</c> tuples where
+ <c>Name</c> is a fragment <c>Name</c> and <c>Memory</c> is
+ how much memory it occupies.
+ </p>
+ </item>
+ <tag><c>size</c></tag>
+ <item>
+ <p>total size of all fragments
+ </p>
+ </item>
+ <tag><c>memory</c></tag>
+ <item>
+ <p>the total memory of all fragments</p>
+ </item>
+ </taglist>
+ </section>
+
+ <section>
+ <title>Load Balancing</title>
+ <p>There are several algorithms for distributing records
+ in a fragmented table evenly over a
+ pool of nodes. No one is best, it simply depends of the
+ application needs. Here follows some examples of
+ situations which may need some attention:
+ </p>
+ <p><c>permanent change of nodes</c> when a new permanent
+ <c>db_node</c> is introduced or dropped, it may be time to
+ change the pool of nodes and re-distribute the replicas
+ evenly over the new pool of nodes. It may also be time to
+ add or delete a fragment before the replicas are re-distributed.
+ </p>
+ <p><c>size/memory threshold</c> when the total size or
+ total memory of a fragmented table (or a single
+ fragment) exceeds some application specific threshold, it
+ may be time to dynamically add a new fragment in order
+ obtain a better distribution of records.
+ </p>
+ <p><c>temporary node down</c> when a node temporarily goes
+ down it may be time to compensate some fragments with new
+ replicas in order to keep the desired level of
+ redundancy. When the node comes up again it may be time to
+ remove the superfluous replica.
+ </p>
+ <p><c>overload threshold</c> when the load on some node is
+ exceeds some application specific threshold, it may be time to
+ either add or move some fragment replicas to nodes with lesser
+ load. Extra care should be taken if the table has a foreign
+ key relation to some other table. In order to avoid severe
+ performance penalties, the same re-distribution must be
+ performed for all of the related tables.
+ </p>
+ <p>Use <c>mnesia:change_table_frag/2</c> to add new fragments
+ and apply the usual schema manipulation functions (such as
+ <c>mnesia:add_table_copy/3</c>, <c>mnesia:del_table_copy/2</c>
+ and <c>mnesia:change_table_copy_type/2</c>) on each fragment
+ to perform the actual re-distribution.
+ </p>
+ </section>
+ </section>
+
+ <section>
+ <title>Local Content Tables</title>
+ <p>Replicated tables have the same content on all nodes where
+ they are replicated. However, it is sometimes advantageous to
+ have tables but different content on different nodes.
+ </p>
+ <p>If we specify the attribute <c>{local_content, true}</c> when
+ we create the table, the table will reside on the nodes where
+ we specify that the table shall exist, but the write operations on the
+ table will only be performed on the local copy.
+ </p>
+ <p>Furthermore, when the table is initialized at start-up, the
+ table will only be initialized locally, and the table
+ content will not be copied from another node.
+ </p>
+ </section>
+
+ <section>
+ <title>Disc-less Nodes</title>
+ <p>It is possible to run Mnesia on nodes that do not have a
+ disc. It is of course not possible to have replicas
+ of neither <c>disc_copies</c>, nor <c>disc_only_copies</c>
+ on such nodes. This especially troublesome for the
+ <c>schema</c> table since Mnesia need the schema in order
+ to initialize itself.
+ </p>
+ <p>The schema table may, as other tables, reside on one or
+ more nodes. The storage type of the schema table may either
+ be <c>disc_copies</c> or <c>ram_copies</c>
+ (not <c>disc_only_copies</c>). At
+ start-up Mnesia uses its schema to determine with which
+ nodes it should try to establish contact. If any
+ of the other nodes are already started, the starting node
+ merges its table definitions with the table definitions
+ brought from the other nodes. This also applies to the
+ definition of the schema table itself. The application
+ parameter <c>extra_db_nodes</c> contains a list of nodes which
+ Mnesia also should establish contact with besides the ones
+ found in the schema. The default value is the empty list
+ <c>[]</c>.
+ </p>
+ <p>Hence, when a disc-less node needs to find the schema
+ definitions from a remote node on the network, we need to supply
+ this information through the application parameter <c>-mnesia extra_db_nodes NodeList</c>. Without this
+ configuration parameter set, Mnesia will start as a single node
+ system. It is also possible to use <c>mnesia:change_config/2</c>
+ to assign a value to 'extra_db_nodes' and force a connection
+ after mnesia have been started, i.e.
+ mnesia:change_config(extra_db_nodes, NodeList).
+ </p>
+ <p>The application parameter schema_location controls where
+ Mnesia will search for its schema. The parameter may be one of
+ the following atoms:
+ </p>
+ <taglist>
+ <tag><c>disc</c></tag>
+ <item>
+ <p>Mandatory disc. The schema is assumed to be located
+ on the Mnesia directory. And if the schema cannot be found,
+ Mnesia refuses to start.
+ </p>
+ </item>
+ <tag><c>ram</c></tag>
+ <item>
+ <p>Mandatory ram. The schema resides in ram
+ only. At start-up a tiny new schema is generated. This
+ default schema contains just the definition of the schema
+ table and only resides on the local node. Since no other
+ nodes are found in the default schema, the configuration
+ parameter <c>extra_db_nodes</c> must be used in order to let the
+ node share its table definitions with other nodes. (The
+ <c>extra_db_nodes</c> parameter may also be used on disc-full nodes.)
+ </p>
+ </item>
+ <tag><c>opt_disc</c></tag>
+ <item>
+ <p>Optional disc. The schema may reside on either disc
+ or ram. If the schema is found on disc, Mnesia starts as a
+ disc-full node (the storage type of the schema table is
+ disc_copies). If no schema is found on disc, Mnesia starts
+ as a disc-less node (the storage type of the schema table is
+ ram_copies). The default value for the application parameter
+ is
+ <c>opt_disc</c>. </p>
+ </item>
+ </taglist>
+ <p>When the <c>schema_location</c> is set to opt_disc the
+ function <c>mnesia:change_table_copy_type/3</c> may be used to
+ change the storage type of the schema.
+ This is illustrated below:
+ </p>
+ <pre>
+ 1> mnesia:start().
+ ok
+ 2> mnesia:change_table_copy_type(schema, node(), disc_copies).
+ {atomic, ok}
+ </pre>
+ <p>Assuming that the call to <c>mnesia:start</c> did not
+ find any schema to read on the disc, then Mnesia has started
+ as a disc-less node, and then changed it to a node that
+ utilizes the disc to locally store the schema.
+ </p>
+ </section>
+
+ <section>
+ <title>More Schema Management</title>
+ <p>It is possible to add and remove nodes from a Mnesia system.
+ This can be done by adding a copy of the schema to those nodes.
+ </p>
+ <p>The functions <c>mnesia:add_table_copy/3</c> and
+ <c>mnesia:del_table_copy/2</c> may be used to add and delete
+ replicas of the schema table. Adding a node to the list
+ of nodes where the schema is replicated will affect two
+ things. First it allows other tables to be replicated to
+ this node. Secondly it will cause Mnesia to try to contact
+ the node at start-up of disc-full nodes.
+ </p>
+ <p>The function call <c>mnesia:del_table_copy(schema, mynode@host)</c> deletes the node 'mynode@host' from the
+ Mnesia system. The call fails if mnesia is running on
+ 'mynode@host'. The other mnesia nodes will never try to connect
+ to that node again. Note, if there is a disc
+ resident schema on the node 'mynode@host', the entire mnesia
+ directory should be deleted. This can be done with
+ <c>mnesia:delete_schema/1</c>. If
+ mnesia is started again on the the node 'mynode@host' and the
+ directory has not been cleared, mnesia's behaviour is undefined.
+ </p>
+ <p>If the storage type of the schema is ram_copies, i.e, we
+ have disc-less node, Mnesia
+ will not use the disc on that particular node. The disc
+ usage is enabled by changing the storage type of the table
+ <c>schema</c> to disc_copies.
+ </p>
+ <p>New schemas are
+ created explicitly with <c>mnesia:create_schema/1</c> or implicitly
+ by starting Mnesia without a disc resident schema. Whenever
+ a table (including the schema table) is created it is
+ assigned its own unique cookie. The schema table is not created with
+ <c>mnesia:create_table/2</c> as normal tables.
+ </p>
+ <p>At start-up Mnesia connects different nodes to each other,
+ then they exchange table definitions with each other and the
+ table definitions are merged. During the merge procedure Mnesia
+ performs a sanity test to ensure that the table definitions are
+ compatible with each other. If a table exists on several nodes
+ the cookie must be the same, otherwise Mnesia will shutdown one
+ of the nodes. This unfortunate situation will occur if a table
+ has been created on two nodes independently of each other while
+ they were disconnected. To solve the problem, one of the tables
+ must be deleted (as the cookies differ we regard it to be two
+ different tables even if they happen to have the same name).
+ </p>
+ <p>Merging different versions of the schema table, does not
+ always require the cookies to be the same. If the storage
+ type of the schema table is disc_copies, the cookie is
+ immutable, and all other db_nodes must have the same
+ cookie. When the schema is stored as type ram_copies,
+ its cookie can be replaced with a cookie from another node
+ (ram_copies or disc_copies). The cookie replacement (during
+ merge of the schema table definition) is performed each time
+ a RAM node connects to another node.
+ </p>
+ <p><c>mnesia:system_info(schema_location)</c> and
+ <c>mnesia:system_info(extra_db_nodes)</c> may be used to determine
+ the actual values of schema_location and extra_db_nodes
+ respectively. <c>mnesia:system_info(use_dir)</c> may be used to
+ determine whether Mnesia is actually using the Mnesia
+ directory. <c>use_dir</c> may be determined even before
+ Mnesia is started. The function <c>mnesia:info/0</c> may now be
+ used to printout some system information even before Mnesia
+ is started. When Mnesia is started the function prints out
+ more information.
+ </p>
+ <p>Transactions which update the definition of a table,
+ requires that Mnesia is started on all nodes where the
+ storage type of the schema is disc_copies. All replicas of
+ the table on these nodes must also be loaded. There are a
+ few exceptions to these availability rules. Tables may be
+ created and new replicas may be added without starting all
+ of the disc-full nodes. New replicas may be added before all
+ other replicas of the table have been loaded, it will suffice
+ when one other replica is active.
+ </p>
+ </section>
+
+ <section>
+ <title>Mnesia Event Handling</title>
+ <p>System events and table events are the two categories of events
+ that Mnesia will generate in various situations.
+ </p>
+ <p>It is possible for user process to subscribe on the
+ events generated by Mnesia.
+ We have the following two functions:</p>
+ <taglist>
+ <tag><c>mnesia:subscribe(Event-Category)</c></tag>
+ <item>
+ <p>Ensures that a copy of all events of type
+ <c>Event-Category</c> are sent to the calling process.
+ </p>
+ </item>
+ <tag><c>mnesia:unsubscribe(Event-Category)</c></tag>
+ <item>Removes the subscription on events of type
+ <c>Event-Category</c></item>
+ </taglist>
+ <p><c>Event-Category</c> may either be the atom <c>system</c>, or
+ one of the tuples <c>{table, Tab, simple}</c>, <c>{table, Tab, detailed}</c>. The old event-category <c>{table, Tab}</c> is the same
+ event-category as <c>{table, Tab, simple}</c>.
+ The subscribe functions activate a subscription
+ of events. The events are delivered as messages to the process
+ evaluating the <c>mnesia:subscribe/1</c> function. The syntax of
+ system events is <c>{mnesia_system_event, Event}</c> and
+ <c>{mnesia_table_event, Event}</c> for table events. What system
+ events and table events means is described below.
+ </p>
+ <p>All system events are subscribed by Mnesia's
+ gen_event handler. The default gen_event handler is
+ <c>mnesia_event</c>. But it may be changed by using the application
+ parameter <c>event_module</c>. The value of this parameter must be
+ the name of a module implementing a complete handler
+ as specified by the <c>gen_event</c> module in
+ STDLIB. <c>mnesia:system_info(subscribers)</c> and
+ <c>mnesia:table_info(Tab, subscribers)</c> may be used to determine
+ which processes are subscribed to various
+ events.
+ </p>
+
+ <section>
+ <title>System Events</title>
+ <p>The system events are detailed below:</p>
+ <taglist>
+ <tag><c>{mnesia_up, Node}</c></tag>
+ <item>
+ <p>Mnesia has been started on a node.
+ Node is the name of the node. By default this event is ignored.
+ </p>
+ </item>
+ <tag><c>{mnesia_down, Node}</c></tag>
+ <item>
+ <p>Mnesia has been stopped on a node.
+ Node is the name of the node. By default this event is
+ ignored.
+ </p>
+ </item>
+ <tag><c>{mnesia_checkpoint_activated, Checkpoint}</c></tag>
+ <item>
+ <p>a checkpoint with the name
+ <c>Checkpoint</c> has been activated and that the current node is
+ involved in the checkpoint. Checkpoints may be activated
+ explicitly with <c>mnesia:activate_checkpoint/1</c> or implicitly
+ at backup, adding table replicas, internal transfer of data
+ between nodes etc. By default this event is ignored.
+ </p>
+ </item>
+ <tag><c>{mnesia_checkpoint_deactivated, Checkpoint}</c></tag>
+ <item>
+ <p>A checkpoint with the name
+ <c>Checkpoint</c> has been deactivated and that the current node was
+ involved in the checkpoint. Checkpoints may explicitly be
+ deactivated with <c>mnesia:deactivate/1</c> or implicitly when the
+ last replica of a table (involved in the checkpoint)
+ becomes unavailable, e.g. at node down. By default this
+ event is ignored.
+ </p>
+ </item>
+ <tag><c>{mnesia_overload, Details}</c></tag>
+ <item>
+ <p>Mnesia on the current node is
+ overloaded and the subscriber should take action.
+ </p>
+ <p>A typical overload situation occurs when the
+ applications are performing more updates on disc
+ resident tables than Mnesia is able to handle. Ignoring
+ this kind of overload may lead into a situation where
+ the disc space is exhausted (regardless of the size of
+ the tables stored on disc).
+ <br></br>
+ Each update is appended to
+ the transaction log and occasionally(depending of how it
+ is configured) dumped to the tables files. The
+ table file storage is more compact than the transaction
+ log storage, especially if the same record is updated
+ over and over again. If the thresholds for dumping the
+ transaction log have been reached before the previous
+ dump was finished an overload event is triggered.
+ </p>
+ <p>Another typical overload situation is when the
+ transaction manager cannot commit transactions at the
+ same pace as the applications are performing updates of
+ disc resident tables. When this happens the message
+ queue of the transaction manager will continue to grow
+ until the memory is exhausted or the load
+ decreases.
+ </p>
+ <p>The same problem may occur for dirty updates. The overload
+ is detected locally on the current node, but its cause may
+ be on another node. Application processes may cause heavy
+ loads if any table are residing on other nodes (replicated or not). By default this event
+ is reported to the error_logger.
+ </p>
+ </item>
+ <tag><c>{inconsistent_database, Context, Node}</c></tag>
+ <item>
+ <p>Mnesia regards the database as
+ potential inconsistent and gives its applications a chance
+ to recover from the inconsistency, e.g. by installing a
+ consistent backup as fallback and then restart the system
+ or pick a <c>MasterNode</c> from <c>mnesia:system_info(db_nodes)</c>)
+ and invoke <c>mnesia:set_master_node([MasterNode])</c>. By default an
+ error is reported to the error logger.
+ </p>
+ </item>
+ <tag><c>{mnesia_fatal, Format, Args, BinaryCore}</c></tag>
+ <item>
+ <p>Mnesia has encountered a fatal error
+ and will (in a short period of time) be terminated. The reason for
+ the fatal error is explained in Format and Args which may
+ be given as input to <c>io:format/2</c> or sent to the
+ error_logger. By default it will be sent to the
+ error_logger. <c>BinaryCore</c> is a binary containing a summary of
+ Mnesia's internal state at the time the when the fatal error was
+ encountered. By default the binary is written to a
+ unique file name on current directory. On RAM nodes the
+ core is ignored.
+ </p>
+ </item>
+ <tag><c>{mnesia_info, Format, Args}</c></tag>
+ <item>
+ <p>Mnesia has detected something that
+ may be of interest when debugging the system. This is explained
+ in <c>Format</c> and <c>Args</c> which may appear
+ as input to <c>io:format/2</c> or sent to the error_logger. By
+ default this event is printed with <c>io:format/2</c>.
+ </p>
+ </item>
+ <tag><c>{mnesia_error, Format, Args}</c></tag>
+ <item>
+ <p>Mnesia has encountered an error. The
+ reason for the error is explained i <c>Format</c> and <c>Args</c>
+ which may be given as input to <c>io:format/2</c> or sent to the
+ error_logger. By default this event is reported to the error_logger.
+ </p>
+ </item>
+ <tag><c>{mnesia_user, Event}</c></tag>
+ <item>
+ <p>An application has invoked the
+ function <c>mnesia:report_event(Event)</c>. <c>Event</c> may be any Erlang
+ data structure. When tracing a system of Mnesia applications
+ it is useful to be able to interleave Mnesia's own events with
+ application related events that give information about the
+ application context. Whenever the application starts with
+ a new and demanding Mnesia activity or enters a
+ new and interesting phase in its execution it may be a good idea
+ to use <c>mnesia:report_event/1</c>. </p>
+ </item>
+ </taglist>
+ </section>
+
+ <section>
+ <title>Table Events</title>
+ <p>Another category of events are table events, which are
+ events related to table updates. There are two types of table
+ events simple and detailed.
+ </p>
+ <p>The simple table events are tuples looking like this:
+ <c>{Oper, Record, ActivityId}</c>. Where <c>Oper</c> is the
+ operation performed. <c>Record</c> is the record involved in the
+ operation and <c>ActivityId</c> is the identity of the
+ transaction performing the operation. Note that the name of the
+ record is the table name even when the <c>record_name</c> has
+ another setting. The various table related events that may
+ occur are:
+ </p>
+ <taglist>
+ <tag><c>{write, NewRecord, ActivityId}</c></tag>
+ <item>
+ <p>a new record has been written.
+ NewRecord contains the new value of the record.
+ </p>
+ </item>
+ <tag><c>{delete_object, OldRecord, ActivityId}</c></tag>
+ <item>
+ <p>a record has possibly been deleted
+ with <c>mnesia:delete_object/1</c>. <c>OldRecord</c>
+ contains the value of the old record as stated as argument
+ by the application. Note that, other records with the same
+ key may be remaining in the table if it is a bag.
+ </p>
+ </item>
+ <tag><c>{delete, {Tab, Key}, ActivityId}</c></tag>
+ <item>
+ <p>one or more records possibly has
+ been deleted. All records with the key Key in the table
+ <c>Tab</c> have been deleted. </p>
+ </item>
+ </taglist>
+ <p>The detailed table events are tuples looking like
+ this: <c>{Oper, Table, Data, [OldRecs], ActivityId}</c>.
+ Where <c>Oper</c> is the operation
+ performed. <c>Table</c> is the table involved in the operation,
+ <c>Data</c> is the record/oid written/deleted.
+ <c>OldRecs</c> is the contents before the operation.
+ and <c>ActivityId</c> is the identity of the transaction
+ performing the operation.
+ The various table related events that may occur are:
+ </p>
+ <taglist>
+ <tag><c>{write, Table, NewRecord, [OldRecords], ActivityId}</c></tag>
+ <item>
+ <p>a new record has been written.
+ NewRecord contains the new value of the record and OldRecords
+ contains the records before the operation is performed.
+ Note that the new content is dependent on the type of the table.</p>
+ </item>
+ <tag><c>{delete, Table, What, [OldRecords], ActivityId}</c></tag>
+ <item>
+ <p>records has possibly been deleted
+ <c>What</c> is either {Table, Key} or a record {RecordName, Key, ...}
+ that was deleted.
+ Note that the new content is dependent on the type of the table.</p>
+ </item>
+ </taglist>
+ </section>
+ </section>
+
+ <section>
+ <title>Debugging Mnesia Applications</title>
+ <p>Debugging a Mnesia application can be difficult due to a number of reasons, primarily related
+ to difficulties in understanding how the transaction
+ and table load mechanisms work. An other source of
+ confusion may be the semantics of nested transactions.
+ </p>
+ <p>We may set the debug level of Mnesia by calling:
+ </p>
+ <list type="bulleted">
+ <item><c>mnesia:set_debug_level(Level)</c></item>
+ </list>
+ <p>Where the parameter <c>Level</c> is:
+ </p>
+ <taglist>
+ <tag><c>none</c></tag>
+ <item>
+ <p>no trace outputs at all. This is the default.
+ </p>
+ </item>
+ <tag><c>verbose</c></tag>
+ <item>
+ <p>activates tracing of important debug events. These
+ debug events will generate <c>{mnesia_info, Format, Args}</c>
+ system events. Processes may subscribe to these events with
+ <c>mnesia:subscribe/1</c>. The events are always sent to Mnesia's
+ event handler.
+ </p>
+ </item>
+ <tag><c>debug</c></tag>
+ <item>
+ <p>activates all events at the verbose level plus
+ traces of all debug events. These debug events will generate
+ <c>{mnesia_info, Format, Args}</c> system events. Processes may
+ subscribe to these events with <c>mnesia:subscribe/1</c>. The
+ events are always sent to Mnesia's event handler. On this
+ debug level Mnesia's event handler starts subscribing
+ updates in the schema table.
+ </p>
+ </item>
+ <tag><c>trace</c></tag>
+ <item>
+ <p>activates all events at the debug level. On this
+ debug level Mnesia's event handler starts subscribing
+ updates on all Mnesia tables. This level is only intended
+ for debugging small toy systems, since many large
+ events may be generated.</p>
+ </item>
+ <tag><c>false</c></tag>
+ <item>
+ <p>is an alias for none.</p>
+ </item>
+ <tag><c>true</c></tag>
+ <item>
+ <p>is an alias for debug.</p>
+ </item>
+ </taglist>
+ <p>The debug level of Mnesia itself, is also an application
+ parameter, thereby making it possible to start an Erlang system
+ in order to turn on Mnesia debug in the initial
+ start-up phase by using the following code:
+ </p>
+ <pre>
+ % erl -mnesia debug verbose
+ </pre>
+ </section>
+
+ <section>
+ <title>Concurrent Processes in Mnesia</title>
+ <p>Programming concurrent Erlang systems is the subject of
+ a separate book. However, it is worthwhile to draw attention to
+ the following features, which permit concurrent processes to
+ exist in a Mnesia system.
+ </p>
+ <p>A group of functions or processes can be called within a
+ transaction. A transaction may include statements that read,
+ write or delete data from the DBMS. A large number of such
+ transactions can run concurrently, and the programmer does not
+ have to explicitly synchronize the processes which manipulate
+ the data. All programs accessing the database through the
+ transaction system may be written as if they had sole access to
+ the data. This is a very desirable property since all
+ synchronization is taken care of by the transaction handler. If
+ a program reads or writes data, the system ensures that no other
+ program tries to manipulate the same data at the same time.
+ </p>
+ <p>It is possible to move tables, delete tables or reconfigure
+ the layout of a table in various ways. An important aspect of
+ the actual implementation of these functions is that it is
+ possible for user programs to continue to use a table while it
+ is being reconfigured. For example, it is possible to
+ simultaneously move a table and perform write operations to the
+ table . This is important for many applications that
+ require continuously available services. Refer to Chapter 4:
+ <seealso marker="Mnesia_chap4#trans_prop">Transactions and other access contexts</seealso> for more information.
+ </p>
+ </section>
+
+ <section>
+ <title>Prototyping</title>
+ <p>If and when we decide that we would like to start and manipulate
+ Mnesia, it is often easier to write the definitions and
+ data into an ordinary text file.
+ Initially, no tables and no data exist, or which
+ tables are required. At the initial stages of prototyping it
+ is prudent write all data into one file, process
+ that file and have the data in the file inserted into the database.
+ It is possible to initialize Mnesia with data read from a text file.
+ We have the following two functions to work with text files.
+ </p>
+ <list type="bulleted">
+ <item>
+ <p><c>mnesia:load_textfile(Filename)</c> Which loads a
+ series of local table definitions and data found in the file
+ into Mnesia. This function also starts Mnesia and possibly
+ creates a new schema. The function only operates on the
+ local node.
+ </p>
+ </item>
+ <item>
+ <p><c>mnesia:dump_to_textfile(Filename)</c> Dumps
+ all local tables of a mnesia system into a text file which can
+ then be edited (by means of a normal text editor) and then
+ later reloaded.</p>
+ </item>
+ </list>
+ <p>These functions are of course much slower than the ordinary
+ store and load functions of Mnesia. However, this is mainly intended for minor experiments
+ and initial prototyping. The major advantages of these functions is that they are very easy
+ to use.
+ </p>
+ <p>The format of the text file is:
+ </p>
+ <pre>
+ {tables, [{Typename, [Options]},
+ {Typename2 ......}]}.
+
+ {Typename, Attribute1, Atrribute2 ....}.
+ {Typename, Attribute1, Atrribute2 ....}.
+ </pre>
+ <p><c>Options</c> is a list of <c>{Key,Value}</c> tuples conforming
+ to the options we could give to <c>mnesia:create_table/2</c>.
+ </p>
+ <p>For example, if we want to start playing with a small
+ database for healthy foods, we enter then following data into
+ the file <c>FRUITS</c>.
+ </p>
+ <codeinclude file="FRUITS" tag="%0" type="erl"></codeinclude>
+ <p>The following session with the Erlang shell then shows how
+ to load the fruits database.
+ </p>
+ <pre><![CDATA[
+ % erl
+ Erlang (BEAM) emulator version 4.9
+
+ Eshell V4.9 (abort with ^G)
+ 1> mnesia:load_textfile("FRUITS").
+ New table fruit
+ New table vegetable
+ {atomic,ok}
+ 2> mnesia:info().
+ ---> Processes holding locks <---
+ ---> Processes waiting for locks <---
+ ---> Pending (remote) transactions <---
+ ---> Active (local) transactions <---
+ ---> Uncertain transactions <---
+ ---> Active tables <---
+ vegetable : with 2 records occuping 299 words of mem
+ fruit : with 2 records occuping 291 words of mem
+ schema : with 3 records occuping 401 words of mem
+ ===> System info in version "1.1", debug level = none <===
+ opt_disc. Directory "/var/tmp/Mnesia.nonode@nohost" is used.
+ use fallback at restart = false
+ running db nodes = [nonode@nohost]
+ stopped db nodes = []
+ remote = []
+ ram_copies = [fruit,vegetable]
+ disc_copies = [schema]
+ disc_only_copies = []
+ [{nonode@nohost,disc_copies}] = [schema]
+ [{nonode@nohost,ram_copies}] = [fruit,vegetable]
+ 3 transactions committed, 0 aborted, 0 restarted, 2 logged to disc
+ 0 held locks, 0 in queue; 0 local transactions, 0 remote
+ 0 transactions waits for other nodes: []
+ ok
+ 3>
+ ]]></pre>
+ <p>Where we can see that the DBMS was initiated from a
+ regular text file.
+ </p>
+ </section>
+
+ <section>
+ <title>Object Based Programming with Mnesia</title>
+ <p>The Company database introduced in Chapter 2 has three tables
+ which store records (employee, dept, project), and three tables
+ which store relationships (manager, at_dep, in_proj). This is a
+ normalized data model, which has some advantages over a
+ non-normalized data model.
+ </p>
+ <p>It is more efficient to do a
+ generalized search in a normalized database. Some operations are
+ also easier to perform on a normalized data model. For example,
+ we can easily remove one project, as the following example
+ illustrates:
+ </p>
+ <codeinclude file="company.erl" tag="%13" type="erl"></codeinclude>
+ <p>In reality, data models are seldom fully normalized. A
+ realistic alternative to a normalized database model would be
+ a data model which is not even in first normal form. Mnesia
+ is very suitable for applications such as telecommunications,
+ because it is easy to organize data in a very flexible manner. A
+ Mnesia database is always organized as a set of tables. Each
+ table is filled with rows/objects/records. What sets Mnesia
+ apart is that individual fields in a record can contain any type
+ of compound data structures. An individual field in a record can
+ contain lists, tuples, functions, and even record code.
+ </p>
+ <p>Many telecommunications applications have unique requirements
+ on lookup times for certain types of records. If our Company
+ database had been a part of a telecommunications system, then it
+ could be that the lookup time of an employee <em>together</em>
+ with a list of the projects the employee is working on, should
+ be minimized. If this was the case, we might choose a
+ drastically different data model which has no direct
+ relationships. We would only have the records themselves, and
+ different records could contain either direct references to
+ other records, or they could contain other records which are not
+ part of the Mnesia schema.
+ </p>
+ <p>We could create the following record definitions:
+ </p>
+ <codeinclude file="company_o.hrl" tag="%0" type="erl"></codeinclude>
+ <p>An record which describes an employee might look like this:
+ </p>
+ <pre>
+ Me = #employee{emp_no= 104732,
+ name = klacke,
+ salary = 7,
+ sex = male,
+ phone = 99586,
+ room_no = {221, 015},
+ dept = 'B/SFR',
+ projects = [erlang, mnesia, otp],
+ manager = 114872},
+ </pre>
+ <p>This model only has three different tables, and the employee
+ records contain references to other records. We have the following
+ references in the record.
+ </p>
+ <list type="bulleted">
+ <item><c>'B/SFR'</c> refers to a <c>dept</c> record.
+ </item>
+ <item><c>[erlang, mnesia, otp]</c>. This is a list of three
+ direct references to three different <c>projects</c> records.
+ </item>
+ <item><c>114872</c>. This refers to another employee record.
+ </item>
+ </list>
+ <p>We could also use the Mnesia record identifiers (<c>{Tab, Key}</c>)
+ as references. In this case, the <c>dept</c> attribute would be
+ set to the value <c>{dept, 'B/SFR'}</c> instead of
+ <c>'B/SFR'</c>.
+ </p>
+ <p>With this data model, some operations execute considerably
+ faster than they do with the normalized data model in our
+ Company database. On the other hand, some other operations
+ become much more complicated. In particular, it becomes more
+ difficult to ensure that records do not contain dangling
+ pointers to other non-existent, or deleted, records.
+ </p>
+ <p>The following code exemplifies a search with a non-normalized
+ data model. To find all employees at department
+ <c>Dep</c> with a salary higher than <c>Salary</c>, use the following code:
+ </p>
+ <codeinclude file="company_o.erl" tag="%9" type="erl"></codeinclude>
+ <p>This code is not only easier to write and to understand, but it
+ also executes much faster.
+ </p>
+ <p>It is easy to show examples of code which executes faster if
+ we use a non-normalized data model, instead of a normalized
+ model. The main reason for this is that fewer tables are
+ required. For this reason, we can more easily combine data from
+ different tables in join operations. In the above example, the
+ <c>get_emps/2</c> function was transformed from a join operation
+ into a simple query which consists of a selection and a projection
+ on one single table.
+ </p>
+ </section>
+</chapter>
+
diff --git a/lib/mnesia/doc/src/Mnesia_chap7.xmlsrc b/lib/mnesia/doc/src/Mnesia_chap7.xmlsrc
new file mode 100644
index 0000000000..7078499fbf
--- /dev/null
+++ b/lib/mnesia/doc/src/Mnesia_chap7.xmlsrc
@@ -0,0 +1,890 @@
+<?xml version="1.0" encoding="latin1" ?>
+<!DOCTYPE chapter SYSTEM "chapter.dtd">
+
+<chapter>
+ <header>
+ <copyright>
+ <year>1997</year><year>2009</year>
+ <holder>Ericsson AB. All Rights Reserved.</holder>
+ </copyright>
+ <legalnotice>
+ The contents of this file are subject to the Erlang Public License,
+ Version 1.1, (the "License"); you may not use this file except in
+ compliance with the License. You should have received a copy of the
+ Erlang Public License along with this software. If not, it can be
+ retrieved online at http://www.erlang.org/.
+
+ Software distributed under the License is distributed on an "AS IS"
+ basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+ the License for the specific language governing rights and limitations
+ under the License.
+
+ </legalnotice>
+
+ <title>Mnesia System Information</title>
+ <prepared>Claes Wikstr&ouml;m, Hans Nilsson and H&aring;kan Mattsson</prepared>
+ <responsible></responsible>
+ <docno></docno>
+ <approved></approved>
+ <checked></checked>
+ <date></date>
+ <rev></rev>
+ <file>Mnesia_chap7.xml</file>
+ </header>
+
+ <section>
+ <title>Database Configuration Data</title>
+ <p>The following two functions can be used to retrieve system
+ information. They are described in detail in the reference manual.
+ </p>
+ <list type="bulleted">
+ <item><c>mnesia:table_info(Tab, Key) -></c><c>Info | exit({aborted, Reason})</c>.
+ Returns information about one table. Such as the
+ current size of the table, on which nodes it resides etc.
+ </item>
+ <item><c>mnesia:system_info(Key) -> </c><c>Info | exit({aborted, Reason})</c>.
+ Returns information about the Mnesia system. For example, transaction
+ statistics, db_nodes, configuration parameters etc.
+ </item>
+ </list>
+ </section>
+
+ <section>
+ <title>Core Dumps</title>
+ <p>If Mnesia malfunctions, system information is dumped to a file
+ named <c>MnesiaCore.Node.When</c>. The type of system
+ information contained in this file can also be generated with
+ the function <c>mnesia_lib:coredump()</c>. If a Mnesia system
+ behaves strangely, it is recommended that a Mnesia core dump
+ file be included in the bug report.</p>
+ </section>
+
+ <section>
+ <title>Dumping Tables</title>
+ <p>Tables of type <c>ram_copies</c> are by definition stored in
+ memory only. It is possible, however, to dump these tables to
+ disc, either at regular intervals, or before the system is
+ shutdown. The function <c>mnesia:dump_tables(TabList)</c> dumps
+ all replicas of a set of RAM tables to disc. The tables can be
+ accessed while being dumped to disc. To dump the tables to
+ disc all replicas must have the storage type <c>ram_copies</c>.
+ </p>
+ <p>The table content is placed in a .DCD file on the
+ disc. When the Mnesia system is started, the RAM table will
+ initially be loaded with data from its .DCD file.
+ </p>
+ </section>
+
+ <section>
+ <marker id="checkpoints"></marker>
+ <title>Checkpoints</title>
+ <p>A checkpoint is a transaction consistent state that spans over
+ one or more tables. When a checkpoint is activated, the system
+ will remember the current content of the set of tables. The
+ checkpoint retains a transaction consistent state of the tables,
+ allowing the tables to be read and updated while the checkpoint
+ is active. A checkpoint is typically used to
+ back up tables to external media, but they are also used
+ internally in Mnesia for other purposes. Each checkpoint is
+ independent and a table may be involved in several checkpoints
+ simultaneously.
+ </p>
+ <p>Each table retains its old contents in a checkpoint retainer
+ and for performance critical applications, it may be important
+ to realize the processing overhead associated with checkpoints.
+ In a worst case scenario, the checkpoint retainer will consume
+ even more memory than the table itself. Each update will also be
+ slightly slower on those nodes where checkpoint
+ retainers are attached to the tables.
+ </p>
+ <p>For each table it is possible to choose if there should be one
+ checkpoint retainer attached to all replicas of the table, or if
+ it is enough to have only one checkpoint retainer attached to a
+ single replica. With a single checkpoint retainer per table, the
+ checkpoint will consume less memory, but it will be vulnerable
+ to node crashes. With several redundant checkpoint retainers the
+ checkpoint will survive as long as there is at least one active
+ checkpoint retainer attached to each table.
+ </p>
+ <p>Checkpoints may be explicitly deactivated with the function
+ <c>mnesia:deactivate_checkpoint(Name)</c>, where <c>Name</c> is
+ the name of an active checkpoint. This function returns
+ <c>ok</c> if successful, or <c>{error, Reason}</c> in the case
+ of an error. All tables in a checkpoint must be attached to at
+ least one checkpoint retainer. The checkpoint is automatically
+ de-activated by Mnesia, when any table lacks a checkpoint
+ retainer. This may happen when a node goes down or when a
+ replica is deleted. Use the <c>min</c> and
+ <c>max</c> arguments described below, to control the degree of
+ checkpoint retainer redundancy.
+ </p>
+ <p>Checkpoints are activated with the function <marker id="mnesia:chkpt(Args)"></marker>
+<c>mnesia:activate_checkpoint(Args)</c>,
+ where <c>Args</c> is a list of the following tuples:
+ </p>
+ <list type="bulleted">
+ <item><c>{name,Name}</c>. <c>Name</c> specifies a temporary name
+ of the checkpoint. The name may be re-used when the checkpoint
+ has been de-activated. If no name is specified, a name is
+ generated automatically.
+ </item>
+ <item><c>{max,MaxTabs}</c>. <c>MaxTabs</c> is a list of tables
+ which will be included in the checkpoint. The default is
+ <c>[]</c> (an empty list). For these tables, the redundancy
+ will be maximized. The old contents of the table will be
+ retained in the checkpoint retainer when the main table is
+ updated by the applications. The checkpoint becomes more fault
+ tolerant if the tables have several replicas. When new
+ replicas are added by means of the schema manipulation
+ function <c>mnesia:add_table_copy/3</c>, it will also
+ attach a local checkpoint retainer.
+ </item>
+ <item><c>{min,MinTabs}</c>. <c>MinTabs</c> is a list of tables
+ that should be included in the checkpoint. The default is
+ <c>[]</c>. For these tables, the redundancy will be minimized,
+ and there will be a single checkpoint retainer per table,
+ preferably at the local node.
+ </item>
+ <item><c>{allow_remote,Bool}</c>. <c>false</c> means that all
+ checkpoint retainers must be local. If a table does not reside
+ locally, the checkpoint cannot be activated. <c>true</c>
+ allows checkpoint retainers to be allocated on any node. The
+ defaults is <c>true</c>.
+ </item>
+ <item><c>{ram_overrides_dump,Bool}</c>. This argument only
+ applies to tables of type <c>ram_copies</c>. <c>Bool</c>
+ specifies if the table state in RAM should override the table
+ state on disc. <c>true</c> means that the latest committed
+ records in RAM are included in the checkpoint retainer. These
+ are the records that the application accesses. <c>false</c>
+ means that the records on the disc .DAT file are
+ included in the checkpoint retainer. These are the records
+ that will be loaded on start-up. Default is <c>false</c>.</item>
+ </list>
+ <p>The <c>mnesia:activate_checkpoint(Args)</c> returns one of the
+ following values:
+ </p>
+ <list type="bulleted">
+ <item><c>{ok, Name, Nodes}</c></item>
+ <item><c>{error, Reason}</c>.</item>
+ </list>
+ <p><c>Name</c> is the name of the checkpoint, and <c>Nodes</c> are
+ the nodes where the checkpoint is known.
+ </p>
+ <p>A list of active checkpoints can be obtained with the following
+ functions:
+ </p>
+ <list type="bulleted">
+ <item><c>mnesia:system_info(checkpoints)</c>. This function
+ returns all active checkpoints on the current node.</item>
+ <item><c>mnesia:table_info(Tab,checkpoints)</c>. This function
+ returns active checkpoints on a specific table.</item>
+ </list>
+ </section>
+
+ <section>
+ <title>Files</title>
+ <p>This section describes the internal files which are created and maintained by the Mnesia system,
+ in particular, the workings of the Mnesia log is described.
+ </p>
+
+ <section>
+ <title>Start-Up Files</title>
+ </section>
+ <p>In Chapter 3 we detailed the following pre-requisites for
+ starting Mnesia (refer Chapter 3: <seealso marker="Mnesia_chap3#start_mnesia">Starting Mnesia</seealso>:
+ </p>
+ <list type="bulleted">
+ <item>We must start an Erlang session and specify a Mnesia
+ directory for our database.
+ </item>
+ <item>We must initiate a database schema, using the function
+ <c>mnesia:create_schema/1</c>.
+ </item>
+ </list>
+ <p>The following example shows how these tasks are performed:
+ </p>
+ <list type="ordered">
+ <item>
+ <pre>
+% <input>erl -sname klacke -mnesia dir '"/ldisc/scratch/klacke"'</input> </pre>
+ </item>
+ <item>
+ <pre>
+Erlang (BEAM) emulator version 4.9
+
+Eshell V4.9 (abort with ^G)
+(klacke@gin)1> <input>mnesia:create_schema([node()]).</input>
+ok
+(klacke@gin)2>
+<input>^Z</input>
+Suspended </pre>
+ <p>We can inspect the Mnesia directory to see what files have been created. Enter the following command:
+ </p>
+ <pre>
+% <input>ls -l /ldisc/scratch/klacke</input>
+-rw-rw-r-- 1 klacke staff 247 Aug 12 15:06 FALLBACK.BUP </pre>
+ <p>The response shows that the file FALLBACK.BUP has been created. This is called a backup file, and it contains an initial schema. If we had specified more than one node in the <c>mnesia:create_schema/1</c> function, identical backup files would have been created on all nodes.
+ </p>
+ </item>
+ <item>
+ <p>Continue by starting Mnesia:</p>
+ <pre>
+(klacke@gin)3><input>mnesia:start( ).</input>
+ok </pre>
+ <p>We can now see the following listing in the Mnesia directory:
+ </p>
+ <pre>
+-rw-rw-r-- 1 klacke staff 86 May 26 19:03 LATEST.LOG
+-rw-rw-r-- 1 klacke staff 34507 May 26 19:03 schema.DAT </pre>
+ <p>The schema in the backup file FALLBACK.BUP has been used to generate the file <c>schema.DAT.</c> Since we have no other disc resident tables than the schema, no other data files were created. The file FALLBACK.BUP was removed after the successful "restoration". We also see a number of files that are for internal use by Mnesia.
+ </p>
+ </item>
+ <item>
+ <p>Enter the following command to create a table:</p>
+ <pre>
+(klacke@gin)4> <input>mnesia:create_table(foo,[{disc_copies, [node()]}]).</input>
+{atomic,ok} </pre>
+ <p>We can now see the following listing in the Mnesia directory:
+ </p>
+ <pre>
+% <input>ls -l /ldisc/scratch/klacke</input>
+-rw-rw-r-- 1 klacke staff 86 May 26 19:07 LATEST.LOG
+-rw-rw-r-- 1 klacke staff 94 May 26 19:07 foo.DCD
+-rw-rw-r-- 1 klacke staff 6679 May 26 19:07 schema.DAT </pre>
+ <p>Where a file <c>foo.DCD</c> has been created. This file will eventually store
+ all data that is written into the <c>foo</c> table.</p>
+ </item>
+ </list>
+
+ <section>
+ <title>The Log File</title>
+ <p>When starting Mnesia, a .LOG file called <c>LATEST.LOG</c>
+ was created and placed in the database directory. This file is
+ used by Mnesia to log disc based transactions. This includes all
+ transactions that write at least one record in a table which is
+ of storage type <c>disc_copies</c>, or
+ <c>disc_only_copies</c>. It also includes all operations which
+ manipulate the schema itself, such as creating new tables. The
+ format of the log can vary with different implementations of
+ Mnesia. The Mnesia log is currently implemented with the
+ standard library module <c>disc_log</c>.
+ </p>
+ <p>The log file will grow continuously and must be dumped at
+ regular intervals. "Dumping the log file" means that Mnesia will
+ perform all the operations listed in the log and place the
+ records in the corresponding .DAT, .DCD and .DCL data files. For
+ example, if the operation "write record <c>{foo, 4, elvis, 6}</c>"
+ is listed in the log, Mnesia inserts the operation into the
+ file <c>foo.DCL</c>, later when Mnesia thinks the .DCL has become to large
+ the data is moved to the .DCD file.
+ The dumping operation can be time consuming
+ if the log is very large. However, it is important to realize
+ that the Mnesia system continues to operate during log dumps.
+ </p>
+ <p>By default Mnesia either dumps the log whenever 100 records have
+ been written in the log or when 3 minutes have passed.
+ This is controlled by the two application parameters
+ <c>-mnesia dump_log_write_threshold WriteOperations</c> and
+ <c>-mnesia dump_log_time_threshold MilliSecs</c>.
+ </p>
+ <p>Before the log is dumped, the file <c>LATEST.LOG</c> is
+ renamed to <c>PREVIOUS.LOG</c>, and a new <c>LATEST.LOG</c> file
+ is created. Once the log has been successfully dumped, the file
+ <c>PREVIOUS.LOG</c> is deleted.
+ </p>
+ <p>The log is also dumped at start-up and whenever a schema
+ operation is performed.
+ </p>
+ </section>
+
+ <section>
+ <title>The Data Files</title>
+ <p>The directory listing also contains one .DAT file. This contain
+ the schema itself, contained in the <c>schema.DAT</c>
+ file. The DAT files are indexed files, and it is efficient to
+ insert and search for records in these files with a specific
+ key. The .DAT files are used for the schema and for <c>disc_only_copies</c>
+ tables. The Mnesia data files are currently implemented with the
+ standard library module <c>dets</c>, and all operations which
+ can be performed on <c>dets</c> files can also be performed on
+ the Mnesia data files. For example, <c>dets</c> contains a
+ function <c>dets:traverse/2</c> which can be used to view the
+ contents of a Mnesia DAT file. However, this can only be done
+ when Mnesia is not running. So, to view a our schema file, we
+ can: </p>
+ <pre>
+{ok, N} = dets:open_file(schema, [{file, "./schema.DAT"},{repair,false},
+{keypos, 2}]),
+F = fun(X) -> io:format("~p~n", [X]), continue end,
+dets:traverse(N, F),
+dets:close(N). </pre>
+ <note>
+ <p>Refer to the Reference Manual, <c>std_lib</c> for information about <c>dets</c>.</p>
+ </note>
+ <warning>
+ <p>The DAT files must always be opened with the <c>{repair, false}</c>
+ option. This ensures that these files are not
+ automatically repaired. Without this option, the database may
+ become inconsistent, because Mnesia may
+ believe that the files were properly closed. Refer to the reference
+ manual for information about the configuration parameter
+ <c>auto_repair</c>.</p>
+ </warning>
+ <warning>
+ <p>It is recommended that Data files are not tampered with while Mnesia is
+ running. While not prohibited, the behavior of Mnesia is unpredictable. </p>
+ </warning>
+ <p>The <c>disc_copies</c> tables are stored on disk with .DCL and .DCD files,
+ which are standard disk_log files.
+ </p>
+ </section>
+ </section>
+
+ <section>
+ <title>Loading of Tables at Start-up</title>
+ <p>At start-up Mnesia loads tables in order to make them accessible
+ for its applications. Sometimes Mnesia decides to load all tables
+ that reside locally, and sometimes the tables may not be
+ accessible until Mnesia brings a copy of the table
+ from another node.
+ </p>
+ <p>To understand the behavior of Mnesia at start-up it is
+ essential to understand how Mnesia reacts when it loses contact
+ with Mnesia on another node. At this stage, Mnesia cannot distinguish
+ between a communication failure and a "normal" node down. <br></br>
+
+ When this happens, Mnesia will assume that the other node is no longer running.
+ Whereas, in reality, the communication between the nodes has merely failed.
+ </p>
+ <p>To overcome this situation, simply try to restart the ongoing transactions that are
+ accessing tables on the failing node, and write a <c>mnesia_down</c> entry to a log file.
+ </p>
+ <p>At start-up, it must be noted that all tables residing on nodes
+ without a <c>mnesia_down</c> entry, may have fresher replicas.
+ Their replicas may have been updated after the termination
+ of Mnesia on the current node. In order to catch up with the latest
+ updates, transfer a copy of the table from one of these other
+ "fresh" nodes. If you are unlucky, other nodes may be down
+ and you must wait for the table to be
+ loaded on one of these nodes before receiving a fresh copy of
+ the table.
+ </p>
+ <p>Before an application makes its first access to a table,
+ <c>mnesia:wait_for_tables(TabList, Timeout)</c> ought to be executed
+ to ensure that the table is accessible from the local node. If
+ the function times out the application may choose to force a
+ load of the local replica with
+ <c>mnesia:force_load_table(Tab)</c> and deliberately lose all
+ updates that may have been performed on the other nodes while
+ the local node was down. If
+ Mnesia already has loaded the table on another node or intends
+ to do so, we will copy the table from that node in order to
+ avoid unnecessary inconsistency.
+ </p>
+ <warning>
+ <p>Keep in mind that it is only
+ one table that is loaded by <c>mnesia:force_load_table(Tab)</c>
+ and since committed transactions may have caused updates in
+ several tables, the tables may now become inconsistent due to
+ the forced load.</p>
+ </warning>
+ <p>The allowed <c>AccessMode</c> of a table may be defined to
+ either be <c>read_only</c> or <c>read_write</c>. And it may be
+ toggled with the function <c>mnesia:change_table_access_mode(Tab, AccessMode)</c> in runtime. <c>read_only</c> tables and
+ <c>local_content</c> tables will always be loaded locally, since
+ there are no need for copying the table from other nodes. Other
+ tables will primary be loaded remotely from active replicas on
+ other nodes if the table already has been loaded there, or if
+ the running Mnesia already has decided to load the table there.
+ </p>
+ <p>At start up, Mnesia will assume that its local replica is the
+ most recent version and load the table from disc if either
+ situation is detected:
+ </p>
+ <list type="bulleted">
+ <item><c>mnesia_down</c> is returned from all other nodes that holds a disc
+ resident replica of the table; or,</item>
+ <item>if all replicas are <c>ram_copies</c></item>
+ </list>
+ <p>This is normally a wise decision, but it may turn out to
+ be disastrous if the nodes have been disconnected due to a
+ communication failure, since Mnesia's normal table load
+ mechanism does not cope with communication failures.
+ </p>
+ <p>When Mnesia is loading many tables the default load
+ order. However, it is possible to
+ affect the load order by explicitly changing the
+ <c>load_order</c> property for the tables, with the function
+ <c>mnesia:change_table_load_order(Tab, LoadOrder)</c>. The
+ <c>LoadOrder</c> is by default <c>0</c> for all tables, but it
+ can be set to any integer. The table with the highest
+ <c>load_order</c> will be loaded first. Changing load order is
+ especially useful for applications that need to ensure early
+ availability of fundamental tables. Large peripheral
+ tables should have a low load order value, perhaps set
+ below 0.
+ </p>
+ </section>
+
+ <section>
+ <title>Recovery from Communication Failure</title>
+ <p>There are several occasions when Mnesia may detect that the
+ network has been partitioned due to a communication failure.
+ </p>
+ <p>One is when Mnesia already is up and running and the Erlang
+ nodes gain contact again. Then Mnesia will try to contact Mnesia
+ on the other node to see if it also thinks that the network has
+ been partitioned for a while. If Mnesia on both nodes has logged
+ <c>mnesia_down</c> entries from each other, Mnesia generates a
+ system event, called <c>{inconsistent_database, running_partitioned_network, Node}</c> which is sent to Mnesia's
+ event handler and other possible subscribers. The default event
+ handler reports an error to the error logger.
+ </p>
+ <p>Another occasion when Mnesia may detect that the network has
+ been partitioned due to a communication failure, is at start-up.
+ If Mnesia detects that both the local node and another node received
+ <c>mnesia_down</c> from each other it generates a
+ <c>{inconsistent_database, starting_partitioned_network, Node}</c> system event and acts as described above.
+ </p>
+ <p>If the application detects that there has been a communication
+ failure which may have caused an inconsistent database, it may
+ use the function <c>mnesia:set_master_nodes(Tab, Nodes)</c> to
+ pinpoint from which nodes each table may be loaded.</p>
+ <p>At start-up Mnesia's normal table load algorithm will be
+ bypassed and the table will be loaded from one of the master
+ nodes defined for the table, regardless of potential
+ <c>mnesia_down</c> entries in the log. The <c>Nodes</c> may only
+ contain nodes where the table has a replica and if it is empty,
+ the master node recovery mechanism for the particular table will
+ be reset and the normal load mechanism will be used when next
+ restarting.
+ </p>
+ <p>The function <c>mnesia:set_master_nodes(Nodes)</c> sets master
+ nodes for all tables. For each table it will determine its
+ replica nodes and invoke <c>mnesia:set_master_nodes(Tab, TabNodes)</c> with those replica nodes that are included in the
+ <c>Nodes</c> list (i.e. <c>TabNodes</c> is the intersection of
+ <c>Nodes</c> and the replica nodes of the table). If the
+ intersection is empty the master node recovery mechanism for the
+ particular table will be reset and the normal load mechanism
+ will be used at next restart.
+ </p>
+ <p>The functions <c>mnesia:system_info(master_node_tables)</c> and
+ <c>mnesia:table_info(Tab, master_nodes)</c> may be used to
+ obtain information about the potential master nodes.
+ </p>
+ <p>The function <c>mnesia:force_load_table(Tab)</c> may be used to
+ force load the table regardless of which table load mechanism
+ is activated.
+ </p>
+ </section>
+
+ <section>
+ <title>Recovery of Transactions</title>
+ <p>A Mnesia table may reside on one or more nodes. When a table is
+ updated, Mnesia will ensure that the updates will be replicated
+ to all nodes where the table resides. If a replica happens to be
+ inaccessible for some reason (e.g. due to a temporary node down),
+ Mnesia will then perform the replication later.
+ </p>
+ <p>On the node where the application is started, there will be a
+ transaction coordinator process. If the transaction is
+ distributed, there will also be a transaction participant process on
+ all the other nodes where commit work needs to be performed.
+ </p>
+ <p>Internally Mnesia uses several commit protocols. The selected
+ protocol depends on which table that has been updated in
+ the transaction. If all the involved tables are symmetrically
+ replicated, (i.e. they all have the same <c>ram_nodes</c>,
+ <c>disc_nodes</c> and <c>disc_only_nodes</c> currently
+ accessible from the coordinator node), a lightweight transaction
+ commit protocol is used.
+ </p>
+ <p>The number of messages that the
+ transaction coordinator and its participants needs to exchange
+ is few, since Mnesia's table load mechanism takes care of the
+ transaction recovery if the commit protocol gets
+ interrupted. Since all involved tables are replicated
+ symmetrically the transaction will automatically be recovered by
+ loading the involved tables from the same node at start-up of a
+ failing node. We do not really care if the transaction was
+ aborted or committed as long as we can ensure the ACID
+ properties. The lightweight commit protocol is non-blocking,
+ i.e. the surviving participants and their coordinator will
+ finish the transaction, regardless of some node crashes in the
+ middle of the commit protocol or not.
+ </p>
+ <p>If a node goes down in the middle of a dirty operation the
+ table load mechanism will ensure that the update will be
+ performed on all replicas or none. Both asynchronous dirty
+ updates and synchronous dirty updates use the same recovery
+ principle as lightweight transactions.
+ </p>
+ <p>If a transaction involves updates of asymmetrically replicated
+ tables or updates of the schema table, a heavyweight commit
+ protocol will be used. The heavyweight commit protocol is able
+ to finish the transaction regardless of how the tables are
+ replicated. The typical usage of a heavyweight transaction is
+ when we want to move a replica from one node to another. Then we
+ must ensure that the replica either is entirely moved or left as
+ it was. We must never end up in a situation with replicas on both
+ nodes or no node at all. Even if a node crashes in the middle of
+ the commit protocol, the transaction must be guaranteed to be
+ atomic. The heavyweight commit protocol involves more messages
+ between the transaction coordinator and its participants than
+ a lightweight protocol and it will perform recovery work at
+ start-up in order to finish the abort or commit work.
+ </p>
+ <p>The heavyweight commit protocol is also non-blocking,
+ which allows the surviving participants and their coordinator to
+ finish the transaction regardless (even if a node crashes in the
+ middle of the commit protocol). When a node fails at start-up,
+ Mnesia will determine the outcome of the transaction and
+ recover it. Lightweight protocols, heavyweight protocols and dirty updates, are
+ dependent on other nodes to be up and running in order to make the
+ correct heavyweight transaction recovery decision.
+ </p>
+ <p>If Mnesia has not started on some of the nodes that are involved in the
+ transaction AND neither the local node or any of the already
+ running nodes know the outcome of the transaction, Mnesia will
+ by default wait for one. In the worst case scenario all other
+ involved nodes must start before Mnesia can make the correct decision
+ about the transaction and finish its start-up.
+ </p>
+ <p>This means that Mnesia (on one node)may hang if a double fault occurs, i.e. when two nodes crash simultaneously
+ and one attempts to start when the other refuses to
+ start e.g. due to a hardware error.
+ </p>
+ <p>It is possible to specify the maximum time that Mnesia
+ will wait for other nodes to respond with a transaction
+ recovery decision. The configuration parameter
+ <c>max_wait_for_decision</c> defaults to infinity (which may
+ cause the indefinite hanging as mentioned above) but if it is
+ set to a definite time period (eg.three minutes), Mnesia will then enforce a
+ transaction recovery decision if needed, in order to allow
+ Mnesia to continue with its start-up procedure. </p>
+ <p>The downside of an enforced transaction recovery decision, is that the decision may be
+ incorrect, due to insufficient information regarding the other nodes'
+ recovery decisions. This may result in an
+ inconsistent database where Mnesia has committed the transaction
+ on some nodes but aborted it on others. </p>
+ <p>In fortunate cases the inconsistency will only appear in tables belonging to a specific
+ application, but if a schema transaction has been inconsistently
+ recovered due to the enforced transaction recovery decision, the
+ effects of the inconsistency can be fatal.
+ However, if the higher priority is availability rather than
+ consistency, then it may be worth the risk. </p>
+ <p>If Mnesia
+ encounters a inconsistent transaction decision a
+ <c>{inconsistent_database, bad_decision, Node}</c> system event
+ will be generated in order to give the application a chance to
+ install a fallback or other appropriate measures to resolve the inconsistency. The default
+ behavior of the Mnesia event handler is the same as if the
+ database became inconsistent as a result of partitioned network (see
+ above).
+ </p>
+ </section>
+
+ <section>
+ <title>Backup, Fallback, and Disaster Recovery</title>
+ <p>The following functions are used to backup data, to install a
+ backup as fallback, and for disaster recovery.
+ </p>
+ <list type="bulleted">
+ <item><c>mnesia:backup_checkpoint(Name, Opaque, [Mod])</c>. This
+ function performs a backup of the tables included in the
+ checkpoint.
+ </item>
+ <item><c>mnesia:backup(Opaque, [Mod])</c>. This function
+ activates a new checkpoint which covers all Mnesia tables and
+ performs a backup. It is performed with maximum degree of
+ redundancy (also refer to the function <seealso marker="#checkpoints">mnesia:activate_checkpoint(Args)</seealso>,
+ <c>{max, MaxTabs} and {min, MinTabs}).</c></item>
+ <item><c>mnesia:traverse_backup(Source,[SourceMod,]</c><c>Target,[TargetMod,]Fun,Ac)</c>. This function can be used
+ to read an existing backup, create a new backup from an
+ existing one, or to copy a backup from one type media to
+ another.
+ </item>
+ <item><c>mnesia:uninstall_fallback()</c>. This function removes
+ previously installed fallback files.
+ </item>
+ <item><c>mnesia:restore(Opaque, Args)</c>. This function
+ restores a set of tables from a previous backup.
+ </item>
+ <item><c>mnesia:install_fallback(Opaque, [Mod])</c>. This
+ function can be configured to restart the Mnesia and reload data
+ tables, and possibly schema tables, from an existing
+ backup. This function is typically used for disaster recovery
+ purposes, when data or schema tables are corrupted.</item>
+ </list>
+ <p>These functions are explained in the following
+ sub-sections. Also refer to the the section <seealso marker="#checkpoints">Checkpoints</seealso> in this chapter, which
+ describes the two functions used to activate and de-activate
+ checkpoints.
+ </p>
+
+ <section>
+ <title>Backup</title>
+ <p>Backup operation are performed with the following functions:
+ </p>
+ <list type="bulleted">
+ <item><c>mnesia:backup_checkpoint(Name, Opaque, [Mod])</c></item>
+ <item><c>mnesia:backup(Opaque, [Mod])</c></item>
+ <item><c>mnesia:traverse_backup(Source, [SourceMod,],</c><c>Target,[TargetMod,]Fun,Acc)</c>.</item>
+ </list>
+ <p>By default, the actual access to the backup media is
+ performed via the <c>mnesia_backup</c> module for both read
+ and write. Currently <c>mnesia_backup</c> is implemented with
+ the standard library module <c>disc_log</c>, but it is possible to write
+ your own module with the same interface as
+ <c>mnesia_backup</c> and configure Mnesia so the alternate
+ module performs the actual accesses to the backup media. This
+ means that the user may put the backup on medias that Mnesia
+ does not know about, possibly on hosts where Erlang is not
+ running. Use the configuration parameter <c><![CDATA[-mnesia backup_module <module>]]></c> for this purpose. </p>
+ <p>The source
+ for a backup is an activated checkpoint. The backup function
+ most commonly used is <c>mnesia:backup_checkpoint(Name, Opaque,[Mod])</c>. This function returns either <c>ok</c>, or
+ <c>{error,Reason}</c>. It has the following arguments:
+ </p>
+ <list type="bulleted">
+ <item><c>Name</c> is the name of an activated
+ checkpoint. Refer to the section <seealso marker="#checkpoints">Checkpoints</seealso> in this chapter, the
+ function <c>mnesia:activate_checkpoint(ArgList)</c> for
+ details on how to include table names in checkpoints.
+ </item>
+ <item><c>Opaque</c>. Mnesia does not interpret this argument,
+ but it is forwarded to the backup module. The Mnesia default
+ backup module, <c>mnesia_backup</c> interprets this argument
+ as a local file name.
+ </item>
+ <item><c>Mod</c>. The name of an alternate backup module.
+ </item>
+ </list>
+ <p>The function <c>mnesia:backup(Opaque[, Mod])</c> activates a
+ new checkpoint which covers all Mnesia tables with maximum
+ degree of redundancy and performs a backup. Maximum
+ redundancy means that each table replica has a checkpoint
+ retainer. Tables with the <c>local_contents</c> property are
+ backed up as they
+ look on the current node.
+ </p>
+ <p>It is possible to iterate over a backup, either for the
+ purpose of transforming it into a new backup, or just reading
+ it. The function <c>mnesia:traverse_backup(Source, [SourceMod,]</c><c>Target, [TargeMod,] Fun, Acc)</c> which normally returns <c>{ok, LastAcc}</c>, is used for both of these purposes.
+ </p>
+ <p>Before the traversal starts, the source backup media is
+ opened with <c>SourceMod:open_read(Source)</c>, and the target
+ backup media is opened with
+ <c>TargetMod:open_write(Target)</c>. The arguments are:
+ </p>
+ <list type="bulleted">
+ <item><c>SourceMod</c> and <c>TargetMod</c> are module names.
+ </item>
+ <item><c>Source</c> and <c>Target</c> are opaque data used
+ exclusively by the modules <c>SourceMod</c> and
+ <c>TargetMod</c> for the purpose of initializing the backup
+ medias.
+ </item>
+ <item><c>Acc</c> is an initial accumulator value.
+ </item>
+ <item><c>Fun(BackupItems, Acc)</c> is applied to each item in
+ the backup. The Fun must return a tuple <c>{ValGoodBackupItems, NewAcc}</c>, where <c>ValidBackupItems</c> is a list of valid
+ backup items, and <c>NewAcc</c> is a new accumulator value.
+ The <c>ValidBackupItems</c> are written to the target backup
+ with the function <c>TargetMod:write/2</c>.
+ </item>
+ <item><c>LastAcc</c> is the last accumulator value. I.e.
+ the last <c>NewAcc</c> value that was returned by <c>Fun</c>.
+ </item>
+ </list>
+ <p>It is also possible to perform a read-only traversal of the
+ source backup without updating a target backup. If
+ <c>TargetMod==read_only</c>, then no target backup is accessed
+ at all.
+ </p>
+ <p>By setting <c>SourceMod</c> and <c>TargetMod</c> to different
+ modules it is possible to copy a backup from one kind of backup
+ media to another.
+ </p>
+ <p>Valid <c>BackupItems</c> are the following tuples:
+ </p>
+ <list type="bulleted">
+ <item><c>{schema, Tab}</c> specifies a table to be deleted.
+ </item>
+ <item><c>{schema, Tab, CreateList}</c> specifies a table to be
+ created. See <c>mnesia_create_table/2</c> for more
+ information about <c>CreateList</c>.
+ </item>
+ <item><c>{Tab, Key}</c> specifies the full identity of a record
+ to be deleted.
+ </item>
+ <item><c>{Record}</c> specifies a record to be inserted. It
+ can be a tuple with <c>Tab</c> as first field. Note that the
+ record name is set to the table name regardless of what
+ <c>record_name</c> is set to.
+ </item>
+ </list>
+ <p>The backup data is divided into two sections. The first
+ section contains information related to the schema. All schema
+ related items are tuples where the first field equals the atom
+ schema. The second section is the record section. It is not
+ possible to mix schema records with other records and all schema
+ records must be located first in the backup.
+ </p>
+ <p>The schema itself is a table and will possibly be included in
+ the backup. All nodes where the schema table resides are
+ regarded as a <c>db_node</c>.
+ </p>
+ <p>The following example illustrates how
+ <c>mnesia:traverse_backup</c> can be used to rename a db_node in
+ a backup file:
+ </p>
+ <codeinclude file="bup.erl" tag="%0" type="erl"></codeinclude>
+ </section>
+
+ <section>
+ <title>Restore</title>
+ <p>Tables can be restored on-line from a backup without
+ restarting Mnesia. A restore is performed with the function
+ <c>mnesia:restore(Opaque,Args)</c>, where <c>Args</c> can
+ contain the following tuples:
+ </p>
+ <list type="bulleted">
+ <item><c>{module,Mod}</c>. The backup module <c>Mod</c> is
+ used to access the backup media. If omitted, the default
+ backup module will be used.</item>
+ <item><c>{skip_tables, TableList}</c> Where <c>TableList</c>
+ is a list of tables which should not be read from the backup.</item>
+ <item><c>{clear_tables, TableList}</c> Where <c>TableList</c>
+ is a list of tables which should be cleared, before the
+ records from the backup are inserted, i.e. all records in
+ the tables are deleted before the tables are restored.
+ Schema information about the tables is not cleared or read
+ from backup.</item>
+ <item><c>{keep_tables, TableList}</c> Where <c>TableList</c>
+ is a list of tables which should be not be cleared, before
+ the records from the backup are inserted, i.e. the records
+ in the backup will be added to the records in the table.
+ Schema information about the tables is not cleared or read
+ from backup.</item>
+ <item><c>{recreate_tables, TableList}</c> Where <c>TableList</c>
+ is a list of tables which should be re-created, before the
+ records from the backup are inserted. The tables are first
+ deleted and then created with the schema information from the
+ backup. All the nodes in the backup needs to be up and running.</item>
+ <item><c>{default_op, Operation}</c> Where <c>Operation</c> is
+ one of the following operations <c>skip_tables</c>,
+ <c>clear_tables</c>, <c>keep_tables</c> or
+ <c>recreate_tables</c>. The default operation specifies
+ which operation should be used on tables from the backup
+ which are not specified in any of the lists above.
+ If omitted, the operation <c>clear_tables</c> will be used. </item>
+ </list>
+ <p>The argument <c>Opaque</c> is forwarded to the backup module.
+ It returns <c>{atomic, TabList}</c> if successful, or the
+ tuple <c>{aborted, Reason}</c> in the case of an error.
+ <c>TabList</c> is a list of the restored tables. Tables which
+ are restored are write locked for the duration of the restore
+ operation. However, regardless of any lock conflict caused by
+ this, applications can continue to do their work during the
+ restore operation.
+ </p>
+ <p>The restoration is performed as a single transaction. If the
+ database is very large, it may not be possible to restore it
+ online. In such a case the old database must be restored by
+ installing a fallback, and then restart.
+ </p>
+ </section>
+
+ <section>
+ <title>Fallbacks</title>
+ <p>The function <c>mnesia:install_fallback(Opaque, [Mod])</c> is
+ used to install a backup as fallback. It uses the backup module
+ <c>Mod</c>, or the default backup module, to access the backup
+ media. This function returns <c>ok</c> if successful, or
+ <c>{error, Reason}</c> in the case of an error.
+ </p>
+ <p>Installing a fallback is a distributed operation that is
+ <em>only</em> performed on all <c>db_nodes</c>. The fallback
+ is used to restore the database the next time the system is
+ started. If a Mnesia node with a fallback installed detects that
+ Mnesia on another node has died for some reason, it will
+ unconditionally terminate itself.
+ </p>
+ <p>A fallback is typically used when a system upgrade is
+ performed. A system typically involves the installation of new
+ software versions, and Mnesia tables are often transformed into
+ new layouts. If the system crashes during an upgrade, it is
+ highly probable re-installation of the old
+ applications will be required and restoration of the database
+ to its previous state. This can be done if a backup is performed and
+ installed as a fallback before the system upgrade begins.
+ </p>
+ <p>If the system upgrade fails, Mnesia must be restarted on all
+ <c>db_nodes</c> in order to restore the old database. The
+ fallback will be automatically de-installed after a successful
+ start-up. The function <c>mnesia:uninstall_fallback()</c> may
+ also be used to de-install the fallback after a
+ successful system upgrade. Again, this is a distributed
+ operation that is either performed on all <c>db_nodes</c>, or
+ none. Both the installation and de-installation of fallbacks
+ require Erlang to be up and running on all <c>db_nodes</c>, but
+ it does not matter if Mnesia is running or not.
+ </p>
+ </section>
+
+ <section>
+ <title>Disaster Recovery</title>
+ <p>The system may become inconsistent as a result of a power
+ failure. The UNIX <c>fsck</c> feature can possibly repair the
+ file system, but there is no guarantee that the file contents
+ will be consistent.
+ </p>
+ <p>If Mnesia detects that a file has not been properly closed,
+ possibly as a result of a power failure, it will attempt to
+ repair the bad file in a similar manner. Data may be lost, but
+ Mnesia can be restarted even if the data is inconsistent. The
+ configuration parameter <c><![CDATA[-mnesia auto_repair <bool>]]></c> can be
+ used to control the behavior of Mnesia at start-up. If
+ <c><![CDATA[<bool>]]></c> has the value <c>true</c>, Mnesia will attempt to
+ repair the file; if <c><![CDATA[<bool>]]></c> has the value <c>false</c>,
+ Mnesia will not restart if it detects a suspect file. This
+ configuration parameter affects the repair behavior of log
+ files, DAT files, and the default backup media.
+ </p>
+ <p>The configuration parameter <c><![CDATA[-mnesia dump_log_update_in_place <bool>]]></c> controls the safety level of
+ the <c>mnesia:dump_log()</c> function. By default, Mnesia will
+ dump the transaction log directly into the DAT files. If a power
+ failure happens during the dump, this may cause the randomly
+ accessed DAT files to become corrupt. If the parameter is set to
+ <c>false</c>, Mnesia will copy the DAT files and target the dump
+ to the new temporary files. If the dump is successful, the
+ temporary files will be renamed to their normal DAT
+ suffixes. The possibility for unrecoverable inconsistencies in
+ the data files will be much smaller with this strategy. On the
+ other hand, the actual dumping of the transaction log will be
+ considerably slower. The system designer must decide whether
+ speed or safety is the higher priority.
+ </p>
+ <p>Replicas of type <c>disc_only_copies</c> will only be
+ affected by this parameter during the initial dump of the log
+ file at start-up. When designing applications which have
+ <em>very</em> high requirements, it may be appropriate not to
+ use <c>disc_only_copies</c> tables at all. The reason for this
+ is the random access nature of normal operating system files. If
+ a node goes down for reason for a reason such as a power
+ failure, these files may be corrupted because they are not
+ properly closed. The DAT files for <c>disc_only_copies</c> are
+ updated on a per transaction basis.
+ </p>
+ <p>If a disaster occurs and the Mnesia database has been
+ corrupted, it can be reconstructed from a backup. This should be
+ regarded as a last resort, since the backup contains old data. The
+ data is hopefully consistent, but data will definitely be lost
+ when an old backup is used to restore the database.
+ </p>
+ </section>
+ </section>
+</chapter>
+
diff --git a/lib/mnesia/doc/src/Mnesia_chap8.xml b/lib/mnesia/doc/src/Mnesia_chap8.xml
new file mode 100644
index 0000000000..3d2e23cf57
--- /dev/null
+++ b/lib/mnesia/doc/src/Mnesia_chap8.xml
@@ -0,0 +1,64 @@
+<?xml version="1.0" encoding="latin1" ?>
+<!DOCTYPE chapter SYSTEM "chapter.dtd">
+
+<chapter>
+ <header>
+ <copyright>
+ <year>1997</year><year>2009</year>
+ <holder>Ericsson AB. All Rights Reserved.</holder>
+ </copyright>
+ <legalnotice>
+ The contents of this file are subject to the Erlang Public License,
+ Version 1.1, (the "License"); you may not use this file except in
+ compliance with the License. You should have received a copy of the
+ Erlang Public License along with this software. If not, it can be
+ retrieved online at http://www.erlang.org/.
+
+ Software distributed under the License is distributed on an "AS IS"
+ basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+ the License for the specific language governing rights and limitations
+ under the License.
+
+ </legalnotice>
+
+ <title>Combining Mnesia with SNMP</title>
+ <prepared>Claes Wikstr&ouml;m, Hans Nilsson and H&aring;kan Mattsson</prepared>
+ <responsible></responsible>
+ <docno></docno>
+ <approved></approved>
+ <checked></checked>
+ <date></date>
+ <rev></rev>
+ <file>Mnesia_chap8.xml</file>
+ </header>
+
+ <section>
+ <title>Combining Mnesia and SNMP </title>
+ <p>Many telecommunications applications must be controlled and
+ reconfigured remotely. It is sometimes an advantage to perform
+ this remote control with an open protocol such as the Simple
+ Network Management Protocol (SNMP). The alternatives to this would
+ be:
+ </p>
+ <list type="bulleted">
+ <item>Not being able to control the application remotely at all.
+ </item>
+ <item>Using a proprietary control protocol.
+ </item>
+ <item>Using a bridge which maps control messages in a
+ proprietary protocol to a standardized management protocol and
+ vice versa.
+ </item>
+ </list>
+ <p>All of these approaches have different advantages and
+ disadvantages. Mnesia applications can easily be opened to the
+ SNMP protocol. It is possible to establish a direct one-to-one
+ mapping between Mnesia tables and SNMP tables. This
+ means that a Mnesia table can be configured to be <em>both</em>
+ a Mnesia table and an SNMP table. A number of functions to
+ control this behavior are described in the Mnesia reference
+ manual.
+ </p>
+ </section>
+</chapter>
+
diff --git a/lib/mnesia/doc/src/book.gif b/lib/mnesia/doc/src/book.gif
new file mode 100644
index 0000000000..94b3868792
--- /dev/null
+++ b/lib/mnesia/doc/src/book.gif
Binary files differ
diff --git a/lib/mnesia/doc/src/book.xml b/lib/mnesia/doc/src/book.xml
new file mode 100644
index 0000000000..5389e615c7
--- /dev/null
+++ b/lib/mnesia/doc/src/book.xml
@@ -0,0 +1,48 @@
+<?xml version="1.0" encoding="latin1" ?>
+<!DOCTYPE book SYSTEM "book.dtd">
+
+<book xmlns:xi="http://www.w3.org/2001/XInclude">
+ <header titlestyle="normal">
+ <copyright>
+ <year>1997</year><year>2009</year>
+ <holder>Ericsson AB. All Rights Reserved.</holder>
+ </copyright>
+ <legalnotice>
+ The contents of this file are subject to the Erlang Public License,
+ Version 1.1, (the "License"); you may not use this file except in
+ compliance with the License. You should have received a copy of the
+ Erlang Public License along with this software. If not, it can be
+ retrieved online at http://www.erlang.org/.
+
+ Software distributed under the License is distributed on an "AS IS"
+ basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+ the License for the specific language governing rights and limitations
+ under the License.
+
+ </legalnotice>
+
+ <title>Mnesia</title>
+ <prepared>Claes Wikstr&ouml;m, Hans Nilsson and H&aring;kan Mattsson</prepared>
+ <docno></docno>
+ <date>1997-05-27</date>
+ <rev>1.2</rev>
+ <file>book.sgml</file>
+ </header>
+ <insidecover>
+ </insidecover>
+ <pagetext>Mnesia DBMS</pagetext>
+ <preamble>
+ <contents level="2"></contents>
+ </preamble>
+ <parts lift="no">
+ <xi:include href="part.xml"/>
+ </parts>
+ <applications>
+ <xi:include href="ref_man.xml"/>
+ </applications>
+ <releasenotes>
+ <xi:include href="notes.xml"/>
+ </releasenotes>
+ <index></index>
+</book>
+
diff --git a/lib/mnesia/doc/src/bup.erl b/lib/mnesia/doc/src/bup.erl
new file mode 100644
index 0000000000..b9e541ad6a
--- /dev/null
+++ b/lib/mnesia/doc/src/bup.erl
@@ -0,0 +1,239 @@
+%% ``The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved via the world wide web at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% The Initial Developer of the Original Code is Ericsson Utvecklings AB.
+%% Portions created by Ericsson are Copyright 1999, Ericsson Utvecklings
+%% AB. All Rights Reserved.''
+%%
+%% $Id$
+%%
+-module(bup).
+-export([
+ change_node_name/5,
+ view/2,
+ test/0,
+ test/1
+ ]).
+
+-export([
+ count/1,
+ display/1
+ ]).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Management of backups, a few demos
+
+%0
+change_node_name(Mod, From, To, Source, Target) ->
+ Switch =
+ fun(Node) when Node == From -> To;
+ (Node) when Node == To -> throw({error, already_exists});
+ (Node) -> Node
+ end,
+ Convert =
+ fun({schema, db_nodes, Nodes}, Acc) ->
+ {[{schema, db_nodes, lists:map(Switch,Nodes)}], Acc};
+ ({schema, version, Version}, Acc) ->
+ {[{schema, version, Version}], Acc};
+ ({schema, cookie, Cookie}, Acc) ->
+ {[{schema, cookie, Cookie}], Acc};
+ ({schema, Tab, CreateList}, Acc) ->
+ Keys = [ram_copies, disc_copies, disc_only_copies],
+ OptSwitch =
+ fun({Key, Val}) ->
+ case lists:member(Key, Keys) of
+ true -> {Key, lists:map(Switch, Val)};
+ false-> {Key, Val}
+ end
+ end,
+ {[{schema, Tab, lists:map(OptSwitch, CreateList)}], Acc};
+ (Other, Acc) ->
+ {[Other], Acc}
+ end,
+ mnesia:traverse_backup(Source, Mod, Target, Mod, Convert, switched).
+
+view(Source, Mod) ->
+ View = fun(Item, Acc) ->
+ io:format("~p.~n",[Item]),
+ {[Item], Acc + 1}
+ end,
+ mnesia:traverse_backup(Source, Mod, dummy, read_only, View, 0).
+%0
+
+-record(bup_rec, {key, val}).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Test change of node name
+%%
+%% Assume db_nodes to be current node and on all other nodes but one
+%% Create new schema, start Mnesia on all db_nodes
+%% Create a table of disc_copies type which is replicated to all db_nodes
+%% Perform a backup and change current node to unused node in backup
+%% Start Mnesia on all nodes according to the new set of db_nodes
+test() ->
+ test(nodes()).
+
+test(Nodes)->
+ AllNodes = (Nodes -- [node()]) ++ [node()],
+ case length(AllNodes) of
+ Length when Length > 1 ->
+ OldBup = "old.BUP",
+ NewBup = "new.BUP",
+ Res = (catch test2(AllNodes, OldBup, NewBup)),
+ case Res of
+ {'EXIT', Reason} ->
+ file:delete(OldBup),
+ file:delete(NewBup),
+ {error, Reason};
+ ok ->
+ ok = count(NewBup),
+ file:delete(OldBup),
+ file:delete(NewBup),
+ ok
+ end;
+ _ ->
+ {error,{"Must run on at least one other node",AllNodes}}
+ end.
+
+test2(AllNodes, OldBup, NewBup) ->
+ ThisNode = node(),
+ OtherNode = hd(AllNodes -- [ThisNode]),
+ OldNodes = AllNodes -- [OtherNode],
+ NewNodes = AllNodes -- [ThisNode],
+ Mod = mnesia_backup, % Assume local files
+ file:delete(OldBup),
+ file:delete(NewBup),
+
+ %% Create old backup
+ rpc:multicall(AllNodes, mnesia, lkill, []),
+ ok = mnesia:delete_schema(AllNodes),
+ ok = mnesia:create_schema(OldNodes),
+ rpc:multicall(OldNodes, mnesia, start, []),
+ rpc:multicall(OldNodes, mnesia, wait_for_tables, [[schema], infinity]),
+
+ CreateList = [{disc_copies, OldNodes},
+ {attributes, record_info(fields, bup_rec)}],
+ {atomic, ok} = mnesia:create_table(bup_rec, CreateList),
+ rpc:multicall(OldNodes, mnesia, wait_for_tables, [[bup_rec], infinity]),
+ OldRecs = [#bup_rec{key = I, val = I * I} || I <- lists:seq(1, 10)],
+ lists:foreach(fun(R) -> ok = mnesia:dirty_write(R) end,OldRecs),
+ ok = mnesia:backup(OldBup, Mod),
+ ok = mnesia:dirty_write(#bup_rec{key = 4711, val = 4711}),
+ rpc:multicall(OldNodes, mnesia, stop, []),
+ {ok,_} = view(OldBup, Mod),
+
+ %% Change node name
+ {ok,_} = change_node_name(Mod, ThisNode, OtherNode, OldBup, NewBup),
+ ok = rpc:call(OtherNode, mnesia, install_fallback, [NewBup, Mod]),
+ {_NewStartRes,[]} = rpc:multicall(NewNodes, mnesia, start, []),
+ rpc:call(OtherNode, mnesia, wait_for_tables, [[bup_rec], infinity]),
+ Wild = rpc:call(OtherNode, mnesia, table_info, [bup_rec, wild_pattern]),
+ NewRecs = rpc:call(OtherNode, mnesia, dirty_match_object, [Wild]),
+ rpc:multicall(NewNodes, mnesia, stop, []),
+ {ok,_} = view(NewBup, Mod),
+
+ %% Sanity test
+ case {lists:sort(OldRecs), lists:sort(NewRecs)} of
+ {Same, Same} -> ok
+ end.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+-record(state, {counter_tab, size_tab, acc_size = 0, n_records = 0}).
+
+
+%% Iterates over a backup file and shows some statistics
+%% The identity of ets table containing the counters is not removed
+count(BupFile) ->
+ CounterTab = ets:new(?MODULE, [set, public]),
+ SizeTab = ets:new(?MODULE, [set, public]),
+ Mod = mnesia:system_info(backup_module),
+ State = #state{counter_tab = CounterTab, size_tab = SizeTab},
+ case mnesia:traverse_backup(BupFile, Mod, dummy, read_only, fun incr/2, State) of
+ {ok, State2} ->
+ Res = display(State2),
+ ets:delete(CounterTab),
+ ets:delete(SizeTab),
+ Res;
+ {error, Reason} ->
+ ets:delete(CounterTab),
+ ets:delete(SizeTab),
+ {error, Reason}
+ end.
+
+incr(Rec, State) ->
+ Tab = element(1, Rec),
+ Key = element(2, Rec),
+ Oid = {Tab, Key},
+ incr_counter(State#state.counter_tab, Oid),
+ Size = size(term_to_binary(Rec)),
+ max_size(State#state.size_tab, Tab, Key, Size),
+ AccSize = State#state.acc_size,
+ N = State#state.n_records,
+ State2 = State#state{acc_size = AccSize + Size, n_records = N + 1},
+ {[Rec], State2}.
+
+incr_counter(T, Counter) ->
+ case catch ets:update_counter(T, Counter, 1) of
+ {'EXIT', _} ->
+ ets:insert(T, {Counter, 1});
+ _ ->
+ ignore
+ end.
+
+max_size(T, Tab, Key, Size) ->
+ case catch ets:lookup_element(T, Tab, 2) of
+ {'EXIT', _} ->
+ ets:insert(T, {Tab, Size, Key});
+ OldSize when OldSize < Size ->
+ ets:insert(T, {Tab, Size, Key});
+ _ ->
+ ignore
+ end.
+
+%% Displays the statistics found in the ets table
+display(State) ->
+ CounterTab = State#state.counter_tab,
+ Tabs = [T || {{_, T}, _} <- match_tab(CounterTab, schema)],
+ io:format("~w tables with totally: ~w records, ~w keys, ~w bytes~n",
+ [length(Tabs),
+ State#state.n_records,
+ ets:info(CounterTab, size),
+ State#state.acc_size]),
+ display(State, lists:sort(Tabs)).
+
+display(State, [Tab | Tabs]) ->
+ Counters = match_tab(State#state.counter_tab, Tab),
+ io:format("~-10w records in table ~w~n", [length(Counters), Tab]),
+ Fun = fun({_Oid, Val}) when Val < 5 ->
+ ignore;
+ ({Oid, Val}) ->
+ io:format("~-10w *** records with key ~w~n", [Val, Oid])
+ end,
+ lists:foreach(Fun, Counters),
+ display_size(State#state.size_tab, Tab),
+ display(State, Tabs);
+display(_CounterTab, []) ->
+ ok.
+
+match_tab(T, Tab) ->
+ ets:match_object(T, {{Tab, '_'}, '_'}).
+
+display_size(T, Tab) ->
+ case catch ets:lookup(T, Tab) of
+ [] ->
+ ignore;
+ [{_, Size, Key}] when Size > 1000 ->
+ io:format("~-10w --- bytes occupied by largest record ~w~n",
+ [Size, {Tab, Key}]);
+ [{_, _, _}] ->
+ ignore
+ end.
diff --git a/lib/mnesia/doc/src/company.erl b/lib/mnesia/doc/src/company.erl
new file mode 100644
index 0000000000..28c32ed513
--- /dev/null
+++ b/lib/mnesia/doc/src/company.erl
@@ -0,0 +1,373 @@
+%% ``The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved via the world wide web at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% The Initial Developer of the Original Code is Ericsson Utvecklings AB.
+%% Portions created by Ericsson are Copyright 1999, Ericsson Utvecklings
+%% AB. All Rights Reserved.''
+%%
+%% $Id$
+%%
+-module(company).
+
+-compile(export_all).
+
+%0
+
+-include_lib("stdlib/include/qlc.hrl").
+-include("company.hrl").
+
+init() ->
+ mnesia:create_table(employee,
+ [{attributes, record_info(fields, employee)}]),
+ mnesia:create_table(dept,
+ [{attributes, record_info(fields, dept)}]),
+ mnesia:create_table(project,
+ [{attributes, record_info(fields, project)}]),
+ mnesia:create_table(manager, [{type, bag},
+ {attributes, record_info(fields, manager)}]),
+ mnesia:create_table(at_dep,
+ [{attributes, record_info(fields, at_dep)}]),
+ mnesia:create_table(in_proj, [{type, bag},
+ {attributes, record_info(fields, in_proj)}]).
+
+%0
+
+%1
+
+insert_emp(Emp, DeptId, ProjNames) ->
+ Ename = Emp#employee.name,
+ Fun = fun() ->
+ mnesia:write(Emp),
+ AtDep = #at_dep{emp = Ename, dept_id = DeptId},
+ mnesia:write(AtDep),
+ mk_projs(Ename, ProjNames)
+ end,
+ mnesia:transaction(Fun).
+
+
+mk_projs(Ename, [ProjName|Tail]) ->
+ mnesia:write(#in_proj{emp = Ename, proj_name = ProjName}),
+ mk_projs(Ename, Tail);
+mk_projs(_, []) -> ok.
+
+
+%1
+
+%2
+females() ->
+ F = fun() ->
+ Q = qlc:q([E#employee.name || E <- mnesia:table(employee),
+ E#employee.sex == female]),
+ qlc:e(Q)
+ end,
+ mnesia:transaction(F).
+%2
+%20
+all_females() ->
+ F = fun() ->
+ Female = #employee{sex = female, name = '$1', _ = '_'},
+ mnesia:select(employee, [{Female, [], ['$1']}])
+ end,
+ mnesia:transaction(F).
+%20
+
+g() -> l.
+
+%3
+female_bosses() ->
+ Q = qlc:q( [{E#employee.name, Boss#employee.name} ||
+ E <- mnesia:table(employee),
+ Boss <- mnesia:table(employee),
+ Atdep <- mnesia:table(at_dep),
+ Mgr <- mnesia:table(manager),
+ E#employee.sex == female,
+ Atdep#at_dep.emp == E#employee.emp_no,
+ Mgr#manager.emp == Boss#employee.emp_no,
+ Atdep#at_dep.dept_id == Mgr#manager.dept]
+ ),
+ mnesia:transaction(fun() -> qlc:e(Q) end).
+%3
+
+%4
+raise_females(Amount) ->
+ F = fun() ->
+ Q = qlc:q([E || E <- mnesia:table(employee),
+ E#employee.sex == female]),
+ Fs = qlc:e(Q),
+ over_write(Fs, Amount)
+ end,
+ mnesia:transaction(F).
+
+over_write([E|Tail], Amount) ->
+ Salary = E#employee.salary + Amount,
+ New = E#employee{salary = Salary},
+ mnesia:write(New),
+ 1 + over_write(Tail, Amount);
+over_write([], _) ->
+ 0.
+%4
+
+%5
+raise(Eno, Raise) ->
+ F = fun() ->
+ [E] = mnesia:read(employee, Eno, write),
+ Salary = E#employee.salary + Raise,
+ New = E#employee{salary = Salary},
+ mnesia:write(New)
+ end,
+ mnesia:transaction(F).
+%5
+
+
+%6
+bad_raise(Eno, Raise) ->
+ F = fun() ->
+ [E] = mnesia:read({employee, Eno}),
+ Salary = E#employee.salary + Raise,
+ New = E#employee{salary = Salary},
+ io:format("Trying to write ... ~n", []),
+ mnesia:write(New)
+ end,
+ mnesia:transaction(F).
+%6
+
+%9
+get_emps(Salary, Dep) ->
+ Q = qlc:q(
+ [E || E <- mnesia:table(employee),
+ At <- mnesia:table(at_dep),
+ E#employee.salary > Salary,
+ E#employee.emp_no == At#at_dep.emp,
+ At#at_dep.dept_id == Dep]
+ ),
+ F = fun() -> qlc:e(Q) end,
+ mnesia:transaction(F).
+%9
+%10
+get_emps2(Salary, Dep) ->
+ Epat = mnesia:table_info(employee, wild_pattern),
+ Apat = mnesia:table_info(at_dep, wild_pattern),
+ F = fun() ->
+ All = mnesia:match_object(Epat),
+ High = filter(All, Salary),
+ Alldeps = mnesia:match_object(Apat),
+ filter_deps(High, Alldeps, Dep)
+ end,
+ mnesia:transaction(F).
+
+
+filter([E|Tail], Salary) ->
+ if
+ E#employee.salary > Salary ->
+ [E | filter(Tail, Salary)];
+ true ->
+ filter(Tail, Salary)
+ end;
+filter([], _) ->
+ [].
+
+filter_deps([E|Tail], Deps, Dep) ->
+ case search_deps(E#employee.name, Deps, Dep) of
+ true ->
+ [E | filter_deps(Tail, Deps, Dep)];
+ false ->
+ filter_deps(Tail, Deps, Dep)
+ end;
+filter_deps([], _,_) ->
+ [].
+
+
+search_deps(Name, [D|Tail], Dep) ->
+ if
+ D#at_dep.emp == Name,
+ D#at_dep.dept_id == Dep -> true;
+ true -> search_deps(Name, Tail, Dep)
+ end;
+search_deps(_Name, _Tail, _Dep) ->
+ false.
+
+%10
+
+
+
+%11
+bench1() ->
+ Me = #employee{emp_no= 104732,
+ name = klacke,
+ salary = 7,
+ sex = male,
+ phone = 99586,
+ room_no = {221, 015}},
+
+ F = fun() -> insert_emp(Me, 'B/DUR', [erlang, mnesia, otp]) end,
+ T1 = timer:tc(company, dotimes, [1000, F]),
+ mnesia:add_table_copy(employee, b@skeppet, ram_copies),
+ mnesia:add_table_copy(at_dep, b@skeppet, ram_copies),
+ mnesia:add_table_copy(in_proj, b@skeppet, ram_copies),
+ T2 = timer:tc(company, dotimes, [1000, F]),
+ {T1, T2}.
+
+dotimes(0, _) ->
+ ok;
+dotimes(I, F) ->
+ F(), dotimes(I-1, F).
+
+%11
+
+
+
+
+
+%12
+
+dist_init() ->
+ mnesia:create_table(employee,
+ [{ram_copies, [a@gin, b@skeppet]},
+ {attributes, record_info(fields,
+ employee)}]),
+ mnesia:create_table(dept,
+ [{ram_copies, [a@gin, b@skeppet]},
+ {attributes, record_info(fields, dept)}]),
+ mnesia:create_table(project,
+ [{ram_copies, [a@gin, b@skeppet]},
+ {attributes, record_info(fields, project)}]),
+ mnesia:create_table(manager, [{type, bag},
+ {ram_copies, [a@gin, b@skeppet]},
+ {attributes, record_info(fields,
+ manager)}]),
+ mnesia:create_table(at_dep,
+ [{ram_copies, [a@gin, b@skeppet]},
+ {attributes, record_info(fields, at_dep)}]),
+ mnesia:create_table(in_proj,
+ [{type, bag},
+ {ram_copies, [a@gin, b@skeppet]},
+ {attributes, record_info(fields, in_proj)}]).
+%12
+
+%13
+remove_proj(ProjName) ->
+ F = fun() ->
+ Ip = qlc:e(qlc:q([X || X <- mnesia:table(in_proj),
+ X#in_proj.proj_name == ProjName]
+ )),
+ mnesia:delete({project, ProjName}),
+ del_in_projs(Ip)
+ end,
+ mnesia:transaction(F).
+
+del_in_projs([Ip|Tail]) ->
+ mnesia:delete_object(Ip),
+ del_in_projs(Tail);
+del_in_projs([]) ->
+ done.
+%13
+
+%14
+sync() ->
+ case mnesia:wait_for_tables(tabs(), 10000) of
+ {timeout, RemainingTabs} ->
+ panic(RemainingTabs);
+ ok ->
+ synced
+ end.
+
+tabs() -> [employee, dept, project, at_dep, in_proj, manager].
+
+%14
+
+
+find_male_on_second_floor() ->
+ Select = fun() ->
+%21
+ MatchHead = #employee{name='$1', sex=male, room_no={'$2', '_'}, _='_'},
+ Guard = [{'>=', '$2', 220},{'<', '$2', 230}],
+ Result = '$1',
+ mnesia:select(employee,[{MatchHead, Guard, [Result]}])
+%21
+ end,
+ mnesia:transaction(Select).
+
+panic(X) -> exit({panic, X}).
+
+
+fill_tables() ->
+ Emps =
+ [
+ {employee, 104465, "Johnson Torbjorn", 1, male, 99184, {242,038}},
+ {employee, 107912, "Carlsson Tuula", 2, female,94556, {242,056}},
+ {employee, 114872, "Dacker Bjarne", 3, male, 99415, {221,035}},
+ {employee, 104531, "Nilsson Hans", 3, male, 99495, {222,026}},
+ {employee, 104659, "Tornkvist Torbjorn", 2, male, 99514, {222,022}},
+ {employee, 104732, "Wikstrom Claes", 2, male, 99586, {221,015}},
+ {employee, 117716, "Fedoriw Anna", 1, female,99143, {221,031}},
+ {employee, 115018, "Mattsson Hakan", 3, male, 99251, {203,348}}
+ ],
+
+ Dept = [
+ {dept, 'B/SF', "Open Telecom Platform"},
+ {dept, 'B/SFP', "OTP - Product Development"},
+ {dept, 'B/SFR', "Computer Science Laboratory"}
+ ],
+
+ Projects = [
+ {project, erlang, 1},
+ {project, otp, 2},
+ {project, beam, 3},
+ {project, mnesia, 5},
+ {project, wolf, 6},
+ {project, documentation, 7},
+ {project, www, 8}
+ ],
+
+ Manager = [
+ {manager, 104465, 'B/SF'},
+ {manager, 104465, 'B/SFP'},
+ {manager, 114872, 'B/SFR'}
+ ],
+
+ At_dep = [
+ {at_dep, 104465, 'B/SF'},
+ {at_dep, 107912, 'B/SF'},
+ {at_dep, 114872, 'B/SFR'},
+ {at_dep, 104531, 'B/SFR'},
+ {at_dep, 104659, 'B/SFR'},
+ {at_dep, 104732, 'B/SFR'},
+ {at_dep, 117716, 'B/SFP'},
+ {at_dep, 115018, 'B/SFP'}
+ ],
+
+ In_proj = [
+ {in_proj, 104465, otp},
+ {in_proj, 107912, otp},
+ {in_proj, 114872, otp},
+ {in_proj, 104531, otp},
+ {in_proj, 104531, mnesia},
+ {in_proj, 104545, wolf},
+ {in_proj, 104659, otp},
+ {in_proj, 104659, wolf},
+ {in_proj, 104732, otp},
+ {in_proj, 104732, mnesia},
+ {in_proj, 104732, erlang},
+ {in_proj, 117716, otp},
+ {in_proj, 117716, documentation},
+ {in_proj, 115018, otp},
+ {in_proj, 115018, mnesia}
+ ],
+
+ [mnesia:dirty_write(W) || W <- Emps],
+ [mnesia:dirty_write(W) || W <- Dept],
+ [mnesia:dirty_write(W) || W <- Projects],
+ %% Relations
+ [mnesia:dirty_write(W) || W <- Manager],
+ [mnesia:dirty_write(W) || W <- At_dep],
+ [mnesia:dirty_write(W) || W <- In_proj],
+
+ ok.
diff --git a/lib/mnesia/doc/src/company.fig b/lib/mnesia/doc/src/company.fig
new file mode 100644
index 0000000000..9d5fcab041
--- /dev/null
+++ b/lib/mnesia/doc/src/company.fig
@@ -0,0 +1,88 @@
+#FIG 3.1
+Portrait
+Center
+Inches
+1200 2
+6 8550 2700 10950 3150
+2 2 0 1 -1 7 0 0 -1 0.000 0 0 -1 0 0 5
+ 8550 2700 10950 2700 10950 3150 8550 3150 8550 2700
+4 0 -1 0 0 0 12 0.0000 4 180 495 8850 3000 Project\001
+-6
+6 4950 2700 7350 3150
+2 2 0 1 -1 7 0 0 -1 0.000 0 0 -1 0 0 5
+ 4950 2700 7350 2700 7350 3150 4950 3150 4950 2700
+4 0 -1 0 0 0 12 0.0000 4 180 705 5325 3000 Employee\001
+-6
+6 1275 2775 3675 3225
+2 2 0 1 -1 7 0 0 -1 0.000 0 0 -1 0 0 5
+ 1275 2775 3675 2775 3675 3225 1275 3225 1275 2775
+4 0 -1 0 0 0 12 0.0000 4 180 345 1650 3075 Dept\001
+-6
+6 600 4500 2325 5700
+2 1 0 1 -1 7 0 0 -1 0.000 0 0 -1 0 0 5
+ 600 5100 1425 5700 2325 5100 1425 4500 600 5100
+4 0 -1 0 0 0 12 0.0000 4 180 630 1125 5175 Manager\001
+-6
+6 9000 4500 10725 5700
+6 9600 5025 10125 5250
+6 9600 5025 10125 5250
+4 0 -1 0 0 0 12 0.0000 4 180 525 9600 5175 In_proj\001
+-6
+-6
+2 1 0 1 -1 7 0 0 -1 0.000 0 0 -1 0 0 5
+ 9000 5100 9825 5700 10725 5100 9825 4500 9000 5100
+-6
+6 4125 2100 7950 2700
+1 2 0 1 -1 7 0 0 -1 0.000 1 0.0000 5287 2325 262 225 5025 2100 5550 2550
+1 2 0 1 -1 7 0 0 -1 0.000 1 0.0000 5812 2325 262 225 5550 2100 6075 2550
+1 2 0 1 -1 7 0 0 -1 0.000 1 0.0000 6337 2325 262 225 6075 2100 6600 2550
+1 2 0 1 -1 7 0 0 -1 0.000 1 0.0000 6862 2325 262 225 6600 2100 7125 2550
+1 2 0 1 -1 7 0 0 -1 0.000 1 0.0000 4612 2325 413 225 5025 2100 4200 2550
+1 2 0 1 -1 7 0 0 -1 0.000 1 0.0000 7537 2325 413 225 7950 2100 7125 2550
+2 1 0 1 -1 7 0 0 -1 0.000 0 0 -1 0 0 2
+ 4800 2550 5925 2700
+2 1 0 1 -1 7 0 0 -1 0.000 0 0 -1 0 0 2
+ 5475 2550 5850 2700
+2 1 0 1 -1 7 0 0 -1 0.000 0 0 -1 0 0 2
+ 5850 2550 5850 2700
+2 1 0 1 -1 7 0 0 -1 0.000 0 0 -1 0 0 2
+ 6300 2550 5850 2700
+2 1 0 1 -1 7 0 0 -1 0.000 0 0 -1 0 0 2
+ 6750 2550 5850 2700
+2 1 0 1 -1 7 0 0 -1 0.000 0 0 -1 0 0 2
+ 7275 2550 5925 2700
+4 0 -1 0 0 0 12 0.0000 4 180 3540 4350 2400 emp_no name salary sex phone room_no\001
+-6
+6 3300 4500 5100 5775
+6 3900 5025 4425 5250
+4 0 -1 0 0 0 12 0.0000 4 180 525 3900 5175 At_dep\001
+-6
+2 1 0 1 -1 7 0 0 -1 0.000 0 0 -1 0 0 5
+ 3323 5135 4148 5735 5048 5135 4148 4535 3323 5135
+-6
+1 2 0 1 -1 7 0 0 -1 0.000 1 0.0000 1875 2287 600 187 1275 2100 2475 2475
+1 2 0 1 -1 7 0 0 -1 0.000 1 0.0000 3075 2287 600 187 2475 2100 3675 2475
+1 2 0 1 -1 7 0 0 -1 0.000 1 0.0000 8850 2325 450 225 8400 2100 9300 2550
+1 2 0 1 -1 7 0 0 -1 0.000 1 0.0000 9750 2325 450 225 9300 2100 10200 2550
+2 1 0 1 -1 7 0 0 -1 0.000 0 0 -1 0 0 2
+ 1575 3225 600 5100
+2 1 0 1 -1 7 0 0 -1 0.000 0 0 -1 0 0 2
+ 2325 5100 5250 3150
+2 1 0 1 -1 7 0 0 -1 0.000 0 0 -1 0 0 2
+ 5850 3150 5025 5175
+2 1 0 1 -1 7 0 0 -1 0.000 0 0 -1 0 0 2
+ 3300 5100 2550 3225
+2 1 0 1 -1 7 0 0 -1 0.000 0 0 -1 0 0 2
+ 7050 3150 9000 5100
+2 1 0 1 -1 7 0 0 -1 0.000 0 0 -1 0 0 2
+ 10725 5100 9825 3150
+2 1 0 1 -1 7 0 0 -1 0.000 0 0 -1 0 0 2
+ 1875 2475 2400 2775
+2 1 0 1 -1 7 0 0 -1 0.000 0 0 -1 0 0 2
+ 3075 2475 2400 2775
+2 1 0 1 -1 7 0 0 -1 0.000 0 0 -1 0 0 2
+ 9000 2550 9750 2700
+2 1 0 1 -1 7 0 0 -1 0.000 0 0 -1 0 0 2
+ 9750 2550 9675 2700
+4 0 -1 0 0 0 12 0.0000 4 135 1500 1575 2325 id name\001
+4 0 -1 0 0 0 12 0.0000 4 135 1635 8775 2400 Name number \001
diff --git a/lib/mnesia/doc/src/company.gif b/lib/mnesia/doc/src/company.gif
new file mode 100644
index 0000000000..3cd0185e69
--- /dev/null
+++ b/lib/mnesia/doc/src/company.gif
Binary files differ
diff --git a/lib/mnesia/doc/src/company.hrl b/lib/mnesia/doc/src/company.hrl
new file mode 100644
index 0000000000..85e0e6ff12
--- /dev/null
+++ b/lib/mnesia/doc/src/company.hrl
@@ -0,0 +1,50 @@
+%% ``The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved via the world wide web at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% The Initial Developer of the Original Code is Ericsson Utvecklings AB.
+%% Portions created by Ericsson are Copyright 1999, Ericsson Utvecklings
+%% AB. All Rights Reserved.''
+%%
+%% $Id$
+%%
+
+
+%0
+-record(employee, {emp_no,
+ name,
+ salary,
+ sex,
+ phone,
+ room_no}).
+
+-record(dept, {id,
+ name}).
+
+-record(project, {name,
+ number}).
+
+
+-record(manager, {emp,
+ dept}).
+
+-record(at_dep, {emp,
+ dept_id}).
+
+-record(in_proj, {emp,
+ proj_name}).
+
+%0
+
+
+
+
+
+
diff --git a/lib/mnesia/doc/src/company.ps b/lib/mnesia/doc/src/company.ps
new file mode 100644
index 0000000000..64a45d07f3
--- /dev/null
+++ b/lib/mnesia/doc/src/company.ps
@@ -0,0 +1,213 @@
+%!PS-Adobe-2.0
+%%Title: company.fig
+%%Creator: fig2dev Version 3.1 Patchlevel 2
+%%CreationDate: Thu Oct 31 18:09:46 1996
+%%For: klacke@gin (Claes Wikstrom,EUA/SU)
+%Magnification: 0.70
+%%Orientation: Portrait
+%%BoundingBox: 79 343 516 498
+%%Pages: 1
+%%BeginSetup
+%%IncludeFeature: *PageSize A4
+%%EndSetup
+%%EndComments
+/$F2psDict 200 dict def
+$F2psDict begin
+$F2psDict /mtrx matrix put
+/col-1 {0 setgray} bind def
+/col0 {0.000 0.000 0.000 srgb} bind def
+/col1 {0.000 0.000 1.000 srgb} bind def
+/col2 {0.000 1.000 0.000 srgb} bind def
+/col3 {0.000 1.000 1.000 srgb} bind def
+/col4 {1.000 0.000 0.000 srgb} bind def
+/col5 {1.000 0.000 1.000 srgb} bind def
+/col6 {1.000 1.000 0.000 srgb} bind def
+/col7 {1.000 1.000 1.000 srgb} bind def
+/col8 {0.000 0.000 0.560 srgb} bind def
+/col9 {0.000 0.000 0.690 srgb} bind def
+/col10 {0.000 0.000 0.820 srgb} bind def
+/col11 {0.530 0.810 1.000 srgb} bind def
+/col12 {0.000 0.560 0.000 srgb} bind def
+/col13 {0.000 0.690 0.000 srgb} bind def
+/col14 {0.000 0.820 0.000 srgb} bind def
+/col15 {0.000 0.560 0.560 srgb} bind def
+/col16 {0.000 0.690 0.690 srgb} bind def
+/col17 {0.000 0.820 0.820 srgb} bind def
+/col18 {0.560 0.000 0.000 srgb} bind def
+/col19 {0.690 0.000 0.000 srgb} bind def
+/col20 {0.820 0.000 0.000 srgb} bind def
+/col21 {0.560 0.000 0.560 srgb} bind def
+/col22 {0.690 0.000 0.690 srgb} bind def
+/col23 {0.820 0.000 0.820 srgb} bind def
+/col24 {0.500 0.190 0.000 srgb} bind def
+/col25 {0.630 0.250 0.000 srgb} bind def
+/col26 {0.750 0.380 0.000 srgb} bind def
+/col27 {1.000 0.500 0.500 srgb} bind def
+/col28 {1.000 0.630 0.630 srgb} bind def
+/col29 {1.000 0.750 0.750 srgb} bind def
+/col30 {1.000 0.880 0.880 srgb} bind def
+/col31 {1.000 0.840 0.000 srgb} bind def
+
+end
+save
+55.0 585.5 translate
+1 -1 scale
+
+/cp {closepath} bind def
+/ef {eofill} bind def
+/gr {grestore} bind def
+/gs {gsave} bind def
+/sa {save} bind def
+/rs {restore} bind def
+/l {lineto} bind def
+/m {moveto} bind def
+/rm {rmoveto} bind def
+/n {newpath} bind def
+/s {stroke} bind def
+/sh {show} bind def
+/slc {setlinecap} bind def
+/slj {setlinejoin} bind def
+/slw {setlinewidth} bind def
+/srgb {setrgbcolor} bind def
+/rot {rotate} bind def
+/sc {scale} bind def
+/sd {setdash} bind def
+/ff {findfont} bind def
+/sf {setfont} bind def
+/scf {scalefont} bind def
+/sw {stringwidth} bind def
+/tr {translate} bind def
+/tnt {dup dup currentrgbcolor
+ 4 -2 roll dup 1 exch sub 3 -1 roll mul add
+ 4 -2 roll dup 1 exch sub 3 -1 roll mul add
+ 4 -2 roll dup 1 exch sub 3 -1 roll mul add srgb}
+ bind def
+/shd {dup dup currentrgbcolor 4 -2 roll mul 4 -2 roll mul
+ 4 -2 roll mul srgb} bind def
+ /DrawEllipse {
+ /endangle exch def
+ /startangle exch def
+ /yrad exch def
+ /xrad exch def
+ /y exch def
+ /x exch def
+ /savematrix mtrx currentmatrix def
+ x y tr xrad yrad sc 0 0 1 startangle endangle arc
+ closepath
+ savematrix setmatrix
+ } def
+
+/$F2psBegin {$F2psDict begin /$F2psEnteredState save def} def
+/$F2psEnd {$F2psEnteredState restore end} def
+%%EndProlog
+
+$F2psBegin
+10 setmiterlimit
+n 0 842 m 0 0 l 595 0 l 595 842 l cp clip
+ 0.04200 0.04200 sc
+%%Page: 1 1
+7.500 slw
+% Polyline
+n 8550 2700 m 10950 2700 l 10950 3150 l 8550 3150 l cp gs col-1 s gr
+/Times-Roman ff 180.00 scf sf
+8850 3000 m
+gs 1 -1 sc (Project) col-1 sh gr
+% Polyline
+n 4950 2700 m 7350 2700 l 7350 3150 l 4950 3150 l cp gs col-1 s gr
+/Times-Roman ff 180.00 scf sf
+5325 3000 m
+gs 1 -1 sc (Employee) col-1 sh gr
+% Polyline
+n 1275 2775 m 3675 2775 l 3675 3225 l 1275 3225 l cp gs col-1 s gr
+/Times-Roman ff 180.00 scf sf
+1650 3075 m
+gs 1 -1 sc (Dept) col-1 sh gr
+% Polyline
+n 600 5100 m 1425 5700 l 2325 5100 l 1425 4500 l 600 5100 l gs col-1 s gr
+/Times-Roman ff 180.00 scf sf
+1125 5175 m
+gs 1 -1 sc (Manager) col-1 sh gr
+/Times-Roman ff 180.00 scf sf
+9600 5175 m
+gs 1 -1 sc (In_proj) col-1 sh gr
+% Polyline
+n 9000 5100 m 9825 5700 l 10725 5100 l 9825 4500 l 9000 5100 l gs col-1 s gr
+% Ellipse
+n 5287 2325 262 225 0 360 DrawEllipse gs col-1 s gr
+
+% Ellipse
+n 5812 2325 262 225 0 360 DrawEllipse gs col-1 s gr
+
+% Ellipse
+n 6337 2325 262 225 0 360 DrawEllipse gs col-1 s gr
+
+% Ellipse
+n 6862 2325 262 225 0 360 DrawEllipse gs col-1 s gr
+
+% Ellipse
+n 4612 2325 413 225 0 360 DrawEllipse gs col-1 s gr
+
+% Ellipse
+n 7537 2325 413 225 0 360 DrawEllipse gs col-1 s gr
+
+% Polyline
+n 4800 2550 m 5925 2700 l gs col-1 s gr
+% Polyline
+n 5475 2550 m 5850 2700 l gs col-1 s gr
+% Polyline
+n 5850 2550 m 5850 2700 l gs col-1 s gr
+% Polyline
+n 6300 2550 m 5850 2700 l gs col-1 s gr
+% Polyline
+n 6750 2550 m 5850 2700 l gs col-1 s gr
+% Polyline
+n 7275 2550 m 5925 2700 l gs col-1 s gr
+/Times-Roman ff 180.00 scf sf
+4350 2400 m
+gs 1 -1 sc (emp_no name salary sex phone room_no) col-1 sh gr
+/Times-Roman ff 180.00 scf sf
+3900 5175 m
+gs 1 -1 sc (At_dep) col-1 sh gr
+% Polyline
+n 3323 5135 m 4148 5735 l 5048 5135 l 4148 4535 l 3323 5135 l gs col-1 s gr
+% Ellipse
+n 1875 2287 600 187 0 360 DrawEllipse gs col-1 s gr
+
+% Ellipse
+n 3075 2287 600 187 0 360 DrawEllipse gs col-1 s gr
+
+% Ellipse
+n 8850 2325 450 225 0 360 DrawEllipse gs col-1 s gr
+
+% Ellipse
+n 9750 2325 450 225 0 360 DrawEllipse gs col-1 s gr
+
+% Polyline
+n 1575 3225 m 600 5100 l gs col-1 s gr
+% Polyline
+n 2325 5100 m 5250 3150 l gs col-1 s gr
+% Polyline
+n 5850 3150 m 5025 5175 l gs col-1 s gr
+% Polyline
+n 3300 5100 m 2550 3225 l gs col-1 s gr
+% Polyline
+n 7050 3150 m 9000 5100 l gs col-1 s gr
+% Polyline
+n 10725 5100 m 9825 3150 l gs col-1 s gr
+% Polyline
+n 1875 2475 m 2400 2775 l gs col-1 s gr
+% Polyline
+n 3075 2475 m 2400 2775 l gs col-1 s gr
+% Polyline
+n 9000 2550 m 9750 2700 l gs col-1 s gr
+% Polyline
+n 9750 2550 m 9675 2700 l gs col-1 s gr
+/Times-Roman ff 180.00 scf sf
+1575 2325 m
+gs 1 -1 sc ( id name) col-1 sh gr
+/Times-Roman ff 180.00 scf sf
+8775 2400 m
+gs 1 -1 sc (Name number ) col-1 sh gr
+showpage
+$F2psEnd
+rs
diff --git a/lib/mnesia/doc/src/company_o.erl b/lib/mnesia/doc/src/company_o.erl
new file mode 100644
index 0000000000..3c7ad0d5e5
--- /dev/null
+++ b/lib/mnesia/doc/src/company_o.erl
@@ -0,0 +1,144 @@
+%% ``The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved via the world wide web at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% The Initial Developer of the Original Code is Ericsson Utvecklings AB.
+%% Portions created by Ericsson are Copyright 1999, Ericsson Utvecklings
+%% AB. All Rights Reserved.''
+%%
+%% $Id$
+%%
+-module(company_o).
+-compile(export_all).
+
+-import(mnesia, [transaction/1]).
+
+%0
+-include_lib("stdlib/include/qlc.hrl").
+-include("company_o.hrl").
+
+
+sinit() ->
+ mnesia:create_schema([node()]).
+
+init() ->
+ mnesia:create_table(employee,
+ [{attributes, record_info(fields, employee)}]),
+ mnesia:create_table(dept,
+ [{attributes, record_info(fields, dept)}]),
+ mnesia:create_table(project,
+ [{attributes, record_info(fields, project)}]).
+
+%0
+
+
+
+%1
+
+insert_emp(Emp, DeptId, ProjNames) ->
+ Fun = fun() ->
+ mnesia:write(Emp#employee{dept = DeptId,
+ projects = ProjNames})
+ end,
+ mnesia:transaction(Fun).
+
+
+%1
+
+%2
+females() ->
+ F = fun() ->
+ Q = qlc:q([E#employee.name || E <- mnesia:table(employee),
+ E#employee.sex == female]),
+ qlc:e(Q)
+ end,
+ mnesia:transaction(F).
+%2
+
+%3
+female_bosses() ->
+ F = fun() -> qlc:e(qlc:q(
+ [{E#employee.name, Boss#employee.name} ||
+ E <- mnesia:table(employee),
+ Boss <- mnesia:table(employee),
+ Boss#employee.emp_no == E#employee.manager,
+ E#employee.sex == female]
+ ))
+ end,
+ mnesia:transaction(F).
+
+
+%4
+raise_females(Amount) ->
+ F = fun() ->
+ Q = qlc:q([E || E <- mnesia:table(employee),
+ E#employee.sex == female]),
+ Fs = qlc:e(Q),
+ over_write(Fs, Amount)
+ end,
+ mnesia:transaction(F).
+
+over_write([E|Tail], Amount) ->
+ Salary = E#employee.salary + Amount,
+ New = E#employee{salary = Salary},
+ mnesia:write(New),
+ 1 + over_write(Tail, Amount);
+over_write([], _) ->
+ 0.
+%4
+
+%5
+raise(Eno, Raise) ->
+ F = fun() ->
+ [E] = mnesia:read({employee, Eno}),
+ Salary = E#employee.salary + Raise,
+ New = E#employee{salary = Salary},
+ mnesia:write(New)
+ end,
+ mnesia:transaction(F).
+%5
+
+
+%6
+bad_raise(Eno, Raise) ->
+ F = fun() ->
+ [E] = mnesia:read({employee, Eno}),
+ Salary = E#employee.salary + Raise,
+ New = E#employee{salary = Salary},
+ io:format("Trying to write ... ~n", []),
+ mnesia:write(New)
+ end,
+ mnesia:transaction(F).
+%6
+
+%9
+get_emps(Salary, Dep) ->
+ Q = qlc:q(
+ [E || E <- mnesia:table(employee),
+ E#employee.salary > Salary,
+ E#employee.dept == Dep]
+ ),
+ F = fun() -> qlc:e(Q) end,
+ transaction(F).
+%9
+
+%10
+get_emps2(Salary, Dep) ->
+ Epat0 = mnesia:table_info(employee, wild_pattern),
+ Epat = Epat0#employee{dept = Dep},
+ F = fun() ->
+ All = mnesia:match_object(Epat),
+ [E || E <-All, E#employee.salary > Salary ]
+ end,
+ mnesia:transaction(F).
+
+
+%10
+
diff --git a/lib/mnesia/doc/src/company_o.hrl b/lib/mnesia/doc/src/company_o.hrl
new file mode 100644
index 0000000000..d8b584c296
--- /dev/null
+++ b/lib/mnesia/doc/src/company_o.hrl
@@ -0,0 +1,38 @@
+%% ``The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved via the world wide web at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% The Initial Developer of the Original Code is Ericsson Utvecklings AB.
+%% Portions created by Ericsson are Copyright 1999, Ericsson Utvecklings
+%% AB. All Rights Reserved.''
+%%
+%% $Id$
+%%
+
+%0
+-record(employee, {emp_no,
+ name,
+ salary,
+ sex,
+ phone,
+ room_no,
+ dept,
+ projects,
+ manager}).
+
+
+-record(dept, {id,
+ name}).
+
+-record(project, {name,
+ number,
+ location}).
+
+%0
diff --git a/lib/mnesia/doc/src/fascicules.xml b/lib/mnesia/doc/src/fascicules.xml
new file mode 100644
index 0000000000..0678195e07
--- /dev/null
+++ b/lib/mnesia/doc/src/fascicules.xml
@@ -0,0 +1,18 @@
+<?xml version="1.0" encoding="latin1" ?>
+<!DOCTYPE fascicules SYSTEM "fascicules.dtd">
+
+<fascicules>
+ <fascicule file="part" href="part_frame.html" entry="no">
+ User's Guide
+ </fascicule>
+ <fascicule file="ref_man" href="ref_man_frame.html" entry="yes">
+ Reference Manual
+ </fascicule>
+ <fascicule file="part_notes" href="part_notes_frame.html" entry="no">
+ Release Notes
+ </fascicule>
+ <fascicule file="" href="../../../../doc/print.html" entry="no">
+ Off-Print
+ </fascicule>
+</fascicules>
+
diff --git a/lib/mnesia/doc/src/make.dep b/lib/mnesia/doc/src/make.dep
new file mode 100644
index 0000000000..6e79484cb3
--- /dev/null
+++ b/lib/mnesia/doc/src/make.dep
@@ -0,0 +1,46 @@
+# ----------------------------------------------------
+# >>>> Do not edit this file <<<<
+# This file was automaticly generated by
+# /home/otp/bin/docdepend
+# ----------------------------------------------------
+
+
+# ----------------------------------------------------
+# TeX files that the DVI file depend on
+# ----------------------------------------------------
+
+book.dvi: Mnesia_App_A.tex Mnesia_App_B.tex Mnesia_App_C.tex \
+ Mnesia_App_D.tex Mnesia_chap1.tex Mnesia_chap2.tex \
+ Mnesia_chap3.tex Mnesia_chap4.tex Mnesia_chap5.tex \
+ Mnesia_chap7.tex Mnesia_chap8.tex book.tex \
+ mnesia.tex mnesia_frag_hash.tex mnesia_registry.tex \
+ part.tex ref_man.tex
+
+# ----------------------------------------------------
+# Source inlined when transforming from source to LaTeX
+# ----------------------------------------------------
+
+Mnesia_App_B.tex: ../../src/mnesia_backup.erl
+
+Mnesia_App_C.tex: ../../src/mnesia_frag.erl
+
+Mnesia_App_D.tex: ../../src/mnesia_frag_hash.erl
+
+Mnesia_chap2.tex: company.erl company.hrl
+
+Mnesia_chap3.tex: company.erl
+
+Mnesia_chap4.tex: company.erl
+
+Mnesia_chap5.tex: FRUITS company.erl company_o.erl company_o.hrl
+
+Mnesia_chap7.tex: bup.erl
+
+book.tex: ref_man.xml
+
+# ----------------------------------------------------
+# Pictures that the DVI file depend on
+# ----------------------------------------------------
+
+book.dvi: company.ps
+
diff --git a/lib/mnesia/doc/src/mnesia.gif b/lib/mnesia/doc/src/mnesia.gif
new file mode 100644
index 0000000000..fbbabee5aa
--- /dev/null
+++ b/lib/mnesia/doc/src/mnesia.gif
Binary files differ
diff --git a/lib/mnesia/doc/src/mnesia.xml b/lib/mnesia/doc/src/mnesia.xml
new file mode 100644
index 0000000000..3484cd104a
--- /dev/null
+++ b/lib/mnesia/doc/src/mnesia.xml
@@ -0,0 +1,3100 @@
+<?xml version="1.0" encoding="latin1" ?>
+<!DOCTYPE erlref SYSTEM "erlref.dtd">
+
+<erlref>
+ <header>
+ <copyright>
+ <year>1996</year><year>2009</year>
+ <holder>Ericsson AB. All Rights Reserved.</holder>
+ </copyright>
+ <legalnotice>
+ The contents of this file are subject to the Erlang Public License,
+ Version 1.1, (the "License"); you may not use this file except in
+ compliance with the License. You should have received a copy of the
+ Erlang Public License along with this software. If not, it can be
+ retrieved online at http://www.erlang.org/.
+
+ Software distributed under the License is distributed on an "AS IS"
+ basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+ the License for the specific language governing rights and limitations
+ under the License.
+
+ </legalnotice>
+
+ <title>mnesia</title>
+ <prepared>Claes Wikstr&ouml;m and H&aring;kan Mattsson</prepared>
+ <responsible></responsible>
+ <docno></docno>
+ <approved></approved>
+ <checked></checked>
+ <date></date>
+ <rev></rev>
+ <file></file>
+ </header>
+ <module>mnesia</module>
+ <modulesummary>A Distributed Telecommunications DBMS </modulesummary>
+ <description>
+ <p><c>Mnesia</c> is a distributed DataBase Management System (DBMS),
+ appropriate for telecommunications applications and other Erlang
+ applications which require continuous operation and exhibit soft
+ real-time properties.
+ </p>
+ <p>Listed below are some of the most important and attractive capabilities, Mnesia provides:
+ </p>
+ <list type="bulleted">
+ <item>
+ <p>A relational/object hybrid data model which is
+ suitable for telecommunications applications.
+ </p>
+ </item>
+ <item>
+ <p>A specifically designed DBMS query language, QLC (as an add-on library).
+ </p>
+ </item>
+ <item>
+ <p>Persistence. Tables may be coherently kept on disc as
+ well as in main memory.
+ </p>
+ </item>
+ <item>
+ <p>Replication. Tables may be replicated at several nodes.
+ </p>
+ </item>
+ <item>
+ <p>Atomic transactions. A series of table manipulation
+ operations can be grouped into a single atomic
+ transaction.
+ </p>
+ </item>
+ <item>
+ <p>Location transparency. Programs can be written without
+ knowledge of the actual location of data.
+ </p>
+ </item>
+ <item>
+ <p>Extremely fast real time data searches.
+ </p>
+ </item>
+ <item>
+ <p>Schema manipulation routines. It is possible to
+ reconfigure the DBMS at runtime without stopping the
+ system.
+ </p>
+ </item>
+ </list>
+ <p>This Reference Manual describes the Mnesia API. This includes
+ functions used to define and manipulate Mnesia tables.
+ </p>
+ <p>All functions documented in these pages can be used in any
+ combination with queries using the list comprehension notation. The
+ query notation is described in the QLC's man page.
+ </p>
+ <p>Data in Mnesia is organized as a set of tables. Each table
+ has a name which must be an atom. Each table is made up of
+ Erlang records. The user is responsible for the record
+ definitions. Each table also has a set of properties. Below
+ are some of the properties that are associated with each
+ table:
+ </p>
+ <list type="bulleted">
+ <item>
+ <p><c>type</c>. Each table can either have 'set',
+ 'ordered_set' or 'bag' semantics. Note: currently 'ordered_set'
+ is not supported for 'disc_only_copies'. If a table is of type
+ 'set' it means that each key leads to either one or zero
+ records. <br></br>
+If a new item is inserted with the same key as
+ an existing record, the old record is overwritten. On the
+ other hand, if a table is of type 'bag', each key can map to
+ several records. However, all records in type bag tables are
+ unique, only the keys may be duplicated.
+ </p>
+ </item>
+ <item>
+ <p><c>record_name</c>. All records stored in a table must
+ have the same name. You may say that the records must be
+ instances of the same record type.
+ </p>
+ </item>
+ <item>
+ <p><c>ram_copies</c> A table can be replicated on a number
+ of Erlang nodes. The <c>ram_copies</c> property specifies a
+ list of Erlang nodes where RAM copies are kept. These
+ copies can be dumped to disc at regular intervals. However,
+ updates to these copies are not written to disc on a
+ transaction basis.
+ </p>
+ </item>
+ <item>
+ <p><c>disc_copies</c> The <c>disc_copies</c> property
+ specifies a list of Erlang nodes where the table is kept in
+ RAM as well as on disc. All updates of the table are
+ performed on the actual table and are also logged to disc.
+ If a table is of type <c>disc_copies</c> at a certain node,
+ it means that the entire table is resident in RAM memory as
+ well as on disc. Each transaction performed on the table is
+ appended to a LOG file as well as written into the RAM
+ table.
+ </p>
+ </item>
+ <item>
+ <p><c>disc_only_copies</c> Some, or all, table replicas
+ can be kept on disc only. These replicas are considerably
+ slower than the RAM based replicas.
+ </p>
+ </item>
+ <item>
+ <p><c>index</c> This is a list of attribute names, or
+ integers, which specify the tuple positions on which
+ Mnesia shall build and maintain an extra index table.
+ </p>
+ </item>
+ <item>
+ <p><c>local_content</c> When an application requires
+ tables whose contents is local to each node,
+ <c>local_content</c> tables may be used. The name of the
+ table is known to all Mnesia nodes, but its contents is
+ unique on each node. This means that access to such a table
+ must be done locally. Set the <c>local_content</c> field to
+ <c>true</c> if you want to enable the <c>local_content</c>
+ behavior. The default is <c>false</c>.
+ </p>
+ </item>
+ <item>
+ <p><c>snmp</c> Each (set based) Mnesia table can be
+ automatically turned into an SNMP ordered table as well.
+ This property specifies the types of the SNMP keys.
+ </p>
+ </item>
+ <item>
+ <p><c>attributes</c>. The names of the attributes for the
+ records that are inserted in the table.
+ </p>
+ </item>
+ </list>
+ <p>See <c>mnesia:create_table/2</c> about the complete set of
+ table properties and their details.
+ </p>
+ <p>This document uses a table of persons to illustrate various
+ examples. The following record definition is assumed:
+ </p>
+ <code type="none">
+-record(person, {name,
+ age = 0,
+ address = unknown,
+ salary = 0,
+ children = []}),
+ </code>
+ <p>The first attribute of the record is the primary key, or key
+ for short.
+ </p>
+ <p>The function descriptions are sorted in alphabetic order. <em>Hint:</em>
+ start to read about <c>mnesia:create_table/2</c>,
+ <c>mnesia:lock/2</c> and <c>mnesia:activity/4</c> before you continue on
+ and learn about the rest.
+ </p>
+ <p>Writing or deleting in transaction context creates a local copy
+ of each modified record during the transaction. During iteration,
+ i.e. <c>mnesia:fold[lr]/4</c> <c>mnesia:next/2</c> <c>mnesia:prev/2</c>
+ <c>mnesia:snmp_get_next_index/2</c>, mnesia will compensate for
+ every written or deleted record, which may reduce the
+ performance. If possible avoid writing or deleting records in
+ the same transaction before iterating over the table.
+ </p>
+ </description>
+ <funcs>
+ <func>
+ <name>abort(Reason) -> transaction abort </name>
+ <fsummary>Abort the current transaction.</fsummary>
+ <desc>
+ <p>Makes the transaction silently
+ return the tuple <c>{aborted, Reason}</c>.
+ The abortion of a Mnesia transaction means that
+ an exception will be thrown to an enclosing <c>catch</c>.
+ Thus, the expression <c>catch mnesia:abort(x)</c> does
+ not abort the transaction. </p>
+ </desc>
+ </func>
+ <func>
+ <name>activate_checkpoint(Args) -> {ok,Name,Nodes} | {error,Reason}</name>
+ <fsummary>Activate a checkpoint.</fsummary>
+ <desc>
+ <p>A checkpoint is a consistent view of the system.
+ A checkpoint can be activated on a set of tables.
+ This checkpoint can then be traversed and will
+ present a view of the system as it existed at the time when
+ the checkpoint was activated, even if the tables are being or have been
+ manipulated.
+ </p>
+ <p><c>Args</c> is a list of the following tuples:
+ </p>
+ <list type="bulleted">
+ <item>
+ <p><c>{name,Name}</c>. <c>Name</c> of checkpoint. Each
+ checkpoint must have a name which is unique to the
+ associated nodes. The name can be reused only once the
+ checkpoint has been deactivated. By default, a name
+ which is probably unique is generated.
+ </p>
+ </item>
+ <item>
+ <p><c>{max,MaxTabs}</c><c>MaxTabs</c> is a list of
+ tables that should be included in the checkpoint. The
+ default is []. For these tables, the redundancy will be
+ maximized and checkpoint information will be retained together
+ with all replicas. The checkpoint becomes more fault
+ tolerant if the tables have several replicas. When a new
+ replica is added by means of the schema manipulation
+ function <c>mnesia:add_table_copy/3</c>, a retainer will
+ also be attached automatically.
+ </p>
+ </item>
+ <item>
+ <p><c>{min,MinTabs}</c>. <c>MinTabs</c> is a list of
+ tables that should be included in the checkpoint. The
+ default is []. For these tables, the redundancy will be
+ minimized and the checkpoint information will only be retained
+ with one replica, preferably on the local node.
+ </p>
+ </item>
+ <item>
+ <p><c>{allow_remote,Bool}</c>. <c>false</c> means that
+ all retainers must be local. The checkpoint cannot be
+ activated if a table does not reside locally.
+ <c>true</c> allows retainers to be allocated on any
+ node. Default is set to <c>true</c>.
+ </p>
+ </item>
+ <item>
+ <p><c>{ram_overrides_dump,Bool} </c> Only applicable
+ for <c>ram_copies</c>. <c>Bool</c> allows you to choose
+ to backup the table state as it is in RAM, or as it is on
+ disc. <c>true</c> means that the latest committed
+ records in RAM should be included in the checkpoint.
+ These are the records that the application accesses.
+ <c>false</c> means that the records dumped to DAT files
+ should be included in the checkpoint. These are the
+ records that will be loaded at startup. Default is
+ <c>false</c>.
+ </p>
+ </item>
+ </list>
+ <p>Returns <c>{ok,Name,Nodes}</c> or <c>{error,Reason}</c>.
+ <c>Name</c> is the (possibly generated) name of the
+ checkpoint. <c>Nodes</c> are the nodes that
+ are involved in the checkpoint. Only nodes that keep a
+ checkpoint retainer know about the checkpoint.
+ </p>
+ </desc>
+ </func>
+ <func>
+ <name>activity(AccessContext, Fun [, Args]) -> ResultOfFun | exit(Reason)</name>
+ <fsummary>Execute <c>Fun</c>in <c>AccessContext</c>.</fsummary>
+ <desc>
+ <p>Invokes <c>mnesia:activity(AccessContext, Fun, Args, AccessMod)</c> where <c>AccessMod</c> is the default
+ access callback module obtained by
+ <c>mnesia:system_info(access_module)</c>. <c>Args</c>
+ defaults to the empty list <c>[]</c>.</p>
+ </desc>
+ </func>
+ <func>
+ <name>activity(AccessContext, Fun, Args, AccessMod) -> ResultOfFun | exit(Reason)</name>
+ <fsummary>Execute <c>Fun</c>in <c>AccessContext</c>.</fsummary>
+ <desc>
+ <p>This function executes the functional object <c>Fun</c>
+ with the arguments <c>Args</c>.
+ </p>
+ <p>The code which executes inside the activity can
+ consist of a series of table manipulation functions, which is
+ performed in a <c>AccessContext</c>. Currently, the following
+ access contexts are supported:
+ </p>
+ <taglist>
+ <tag><c>transaction</c></tag>
+ <item>
+ <p>Short for <c>{transaction, infinity}</c></p>
+ </item>
+ <tag><c>{transaction, Retries}</c></tag>
+ <item>
+ <p>Invokes <c>mnesia:transaction(Fun, Args, Retries)</c>. Note that the result from the <c>Fun</c> is
+ returned if the transaction was successful (atomic),
+ otherwise the function exits with an abort reason.
+ </p>
+ </item>
+ <tag><c>sync_transaction</c></tag>
+ <item>
+ <p>Short for <c>{sync_transaction, infinity}</c></p>
+ </item>
+ <tag><c>{sync_transaction, Retries}</c></tag>
+ <item>
+ <p>Invokes <c>mnesia:sync_transaction(Fun, Args, Retries)</c>. Note that the result from the <c>Fun</c> is
+ returned if the transaction was successful (atomic),
+ otherwise the function exits with an abort reason.
+ </p>
+ </item>
+ <tag><c>async_dirty</c></tag>
+ <item>
+ <p>Invokes <c>mnesia:async_dirty(Fun, Args)</c>.
+ </p>
+ </item>
+ <tag><c>sync_dirty</c></tag>
+ <item>
+ <p>Invokes <c>mnesia:sync_dirty(Fun, Args)</c>.
+ </p>
+ </item>
+ <tag><c>ets</c></tag>
+ <item>
+ <p>Invokes <c>mnesia:ets(Fun, Args)</c>.
+ </p>
+ </item>
+ </taglist>
+ <p>This function (<c>mnesia:activity/4</c>) differs in an
+ important aspect from the <c>mnesia:transaction</c>,
+ <c>mnesia:sync_transaction</c>,
+ <c>mnesia:async_dirty</c>, <c>mnesia:sync_dirty</c> and
+ <c>mnesia:ets</c> functions. The <c>AccessMod</c> argument
+ is the name of a callback module which implements the
+ <c>mnesia_access</c> behavior.
+ </p>
+ <p>Mnesia will forward calls to the following functions:
+ </p>
+ <list type="bulleted">
+ <item>
+ <p>mnesia:write/3 (write/1, s_write/1)</p>
+ </item>
+ <item>
+ <p>mnesia:delete/3 (delete/1, s_delete/1)</p>
+ </item>
+ <item>
+ <p>mnesia:delete_object/3 (delete_object/1, s_delete_object/1)</p>
+ </item>
+ <item>
+ <p>mnesia:read/3 (read/1, wread/1)</p>
+ </item>
+ <item>
+ <p>mnesia:match_object/3 (match_object/1)</p>
+ </item>
+ <item>
+ <p>mnesia:all_keys/1</p>
+ </item>
+ <item>
+ <p>mnesia:first/1</p>
+ </item>
+ <item>
+ <p>mnesia:last/1</p>
+ </item>
+ <item>
+ <p>mnesia:prev/2</p>
+ </item>
+ <item>
+ <p>mnesia:next/2</p>
+ </item>
+ <item>
+ <p>mnesia:index_match_object/4 (index_match_object/2)</p>
+ </item>
+ <item>
+ <p>mnesia:index_read/3</p>
+ </item>
+ <item>
+ <p>mnesia:lock/2 (read_lock_table/1, write_lock_table/1)</p>
+ </item>
+ <item>
+ <p>mnesia:table_info/2</p>
+ </item>
+ </list>
+ <p>to the corresponding:
+ </p>
+ <list type="bulleted">
+ <item>
+ <p>AccessMod:lock(ActivityId, Opaque, LockItem, LockKind)</p>
+ </item>
+ <item>
+ <p>AccessMod:write(ActivityId, Opaque, Tab, Rec, LockKind)</p>
+ </item>
+ <item>
+ <p>AccessMod:delete(ActivityId, Opaque, Tab, Key, LockKind)</p>
+ </item>
+ <item>
+ <p>AccessMod:delete_object(ActivityId, Opaque, Tab, RecXS, LockKind)</p>
+ </item>
+ <item>
+ <p>AccessMod:read(ActivityId, Opaque, Tab, Key, LockKind)</p>
+ </item>
+ <item>
+ <p>AccessMod:match_object(ActivityId, Opaque, Tab, Pattern, LockKind)</p>
+ </item>
+ <item>
+ <p>AccessMod:all_keys(ActivityId, Opaque, Tab, LockKind)</p>
+ </item>
+ <item>
+ <p>AccessMod:first(ActivityId, Opaque, Tab)</p>
+ </item>
+ <item>
+ <p>AccessMod:last(ActivityId, Opaque, Tab)</p>
+ </item>
+ <item>
+ <p>AccessMod:prev(ActivityId, Opaque, Tab, Key)</p>
+ </item>
+ <item>
+ <p>AccessMod:next(ActivityId, Opaque, Tab, Key)</p>
+ </item>
+ <item>
+ <p>AccessMod:index_match_object(ActivityId, Opaque, Tab, Pattern, Attr, LockKind)</p>
+ </item>
+ <item>
+ <p>AccessMod:index_read(ActivityId, Opaque, Tab, SecondaryKey, Attr, LockKind)</p>
+ </item>
+ <item>
+ <p>AccessMod:table_info(ActivityId, Opaque, Tab, InfoItem)</p>
+ </item>
+ </list>
+ <p>where <c>ActivityId</c> is a record which represents the
+ identity of the enclosing Mnesia activity. The first field
+ (obtained with <c>element(1, ActivityId)</c> contains an
+ atom which may be interpreted as the type of the activity:
+ <c>'ets'</c>, <c>'async_dirty'</c>, <c>'sync_dirty'</c> or
+ <c>'tid'</c>. <c>'tid'</c> means that the activity is a
+ transaction. The structure of the rest of the identity
+ record is internal to Mnesia.
+ </p>
+ <p><c>Opaque</c> is an opaque data structure which is internal
+ to Mnesia.</p>
+ </desc>
+ </func>
+ <func>
+ <name>add_table_copy(Tab, Node, Type) -> {aborted, R} | {atomic, ok}</name>
+ <fsummary>Copy a table to a remote node.</fsummary>
+ <desc>
+ <p>This function makes another copy of a table at the
+ node <c>Node</c>. The <c>Type</c> argument must be
+ either of the atoms <c>ram_copies</c>, <c>disc_copies</c>,
+ or
+ <c>disc_only_copies</c>. For example, the following call
+ ensures that a disc replica of the <c>person</c> table also
+ exists at node <c>Node</c>.</p>
+ <code type="none">
+mnesia:add_table_copy(person, Node, disc_copies)
+ </code>
+ <p>This function can also be used to add a replica of the
+ table named <c>schema</c>.</p>
+ </desc>
+ </func>
+ <func>
+ <name>add_table_index(Tab, AttrName) -> {aborted, R} | {atomic, ok}</name>
+ <fsummary>Create an index for a table. </fsummary>
+ <desc>
+ <p>Table indices can and should be used whenever the user
+ wants to frequently use some other field than the key field
+ to look up records. If this other field has an index
+ associated with it, these lookups can occur in constant time
+ and space. For example, if our application wishes to use
+ the age field of persons to efficiently find all person with
+ a specific age, it might be a good idea to have an index on
+ the age field. This can be accomplished with the following
+ call:</p>
+ <code type="none">
+mnesia:add_table_index(person, age)
+ </code>
+ <p>Indices do not come free, they occupy space which is
+ proportional to the size of the table. They also cause insertions
+ into the table to execute slightly slower. </p>
+ </desc>
+ </func>
+ <func>
+ <name>all_keys(Tab) -> KeyList | transaction abort</name>
+ <fsummary>Return all keys in a table.</fsummary>
+ <desc>
+ <p>This function returns a list of all keys in the table
+ named <c>Tab</c>. The semantics of this function is context
+ sensitive. See <c>mnesia:activity/4</c> for more information. In
+ transaction context it acquires a read lock on the entire
+ table.</p>
+ </desc>
+ </func>
+ <func>
+ <name>async_dirty(Fun, [, Args]) -> ResultOfFun | exit(Reason)</name>
+ <fsummary>Call the Fun in a context which is not protected by a transaction.</fsummary>
+ <desc>
+ <p>Call the <c>Fun</c> in a context which is not protected
+ by a transaction. The Mnesia function calls performed in the
+ <c>Fun</c> are mapped to the corresponding dirty
+ functions. This still involves logging, replication and
+ subscriptions, but there is no locking, local transaction
+ storage, or commit protocols involved. Checkpoint retainers
+ and indices are updated, but they will be updated dirty. As
+ for normal mnesia:dirty_* operations, the operations are
+ performed semi-asynchronously. See
+ <c>mnesia:activity/4</c> and the Mnesia User's Guide for
+ more details.
+ </p>
+ <p>It is possible to manipulate the Mnesia tables without
+ using transactions. This has some serious disadvantages, but
+ is considerably faster since the transaction manager is not
+ involved and no locks are set. A dirty operation does,
+ however, guarantee a certain level of consistency and it is
+ not possible for the dirty operations to return garbled
+ records. All dirty operations provide location transparency
+ to the programmer and a program does not have to be aware of
+ the whereabouts of a certain table in order to function.
+ </p>
+ <p><em>Note:</em>It is more than 10 times more efficient to read records dirty
+ than within a transaction.
+ </p>
+ <p>Depending on the application, it may be a good idea to use
+ the dirty functions for certain operations. Almost all
+ Mnesia functions which can be called within transactions
+ have a dirty equivalent which is much more
+ efficient. However, it must be noted that it is possible for
+ the database to be left in an inconsistent state if dirty
+ operations are used to update it. Dirty operations should
+ only be used for performance reasons when it is absolutely
+ necessary. </p>
+ <p><em>Note:</em> Calling (nesting) a <c>mnesia:[a]sync_dirty</c>
+ inside a transaction context will inherit the transaction semantics.
+ </p>
+ </desc>
+ </func>
+ <func>
+ <name>backup(Opaque [, BackupMod]) -> ok | {error,Reason}</name>
+ <fsummary>Back up all tables in the database.</fsummary>
+ <desc>
+ <p>Activates a new checkpoint covering all Mnesia tables,
+ including the schema, with maximum degree of redundancy and
+ performs a backup using <c>backup_checkpoint/2/3</c>. The
+ default value of the backup callback module <c>BackupMod</c>
+ is obtained by <c>mnesia:system_info(backup_module)</c>.</p>
+ </desc>
+ </func>
+ <func>
+ <name>backup_checkpoint(Name, Opaque [, BackupMod]) -> ok | {error,Reason}</name>
+ <fsummary>Back up all tables in a checkpoint.</fsummary>
+ <desc>
+ <p>The tables are backed up to external media using the backup
+ module <c>BackupMod</c>. Tables with the local contents
+ property is being backed up as they exist on the current
+ node. <c>BackupMod</c> is the default backup callback
+ module obtained by
+ <c>mnesia:system_info(backup_module)</c>. See the User's
+ Guide about the exact callback interface (the
+ <c>mnesia_backup behavior</c>).</p>
+ </desc>
+ </func>
+ <func>
+ <name>change_config(Config, Value) -> {error, Reason} | {ok, ReturnValue}</name>
+ <fsummary>Change a configuration parameter.</fsummary>
+ <desc>
+ <p>The <c>Config</c> should be an atom of the following
+ configuration parameters: </p>
+ <taglist>
+ <tag><c>extra_db_nodes</c></tag>
+ <item>
+ <p><c>Value</c> is a list of nodes which Mnesia should try to connect to.
+ The <c>ReturnValue</c> will be those nodes in
+ <c>Value</c> that Mnesia are connected to.
+ <br></br>
+Note: This function shall only be used to connect to newly started ram nodes
+ (N.D.R.S.N.) with an empty schema. If for example it is used after the network
+ have been partitioned it may lead to inconsistent tables.
+ <br></br>
+Note: Mnesia may be connected to other nodes than those
+ returned in <c>ReturnValue</c>.</p>
+ </item>
+ <tag><c>dc_dump_limit</c></tag>
+ <item>
+ <p><c>Value</c> is a number. See description in
+ <c>Configuration Parameters</c> below.
+ The <c>ReturnValue</c> is the new value. Note this configuration parameter
+ is not persistent, it will be lost when mnesia stopped.</p>
+ </item>
+ </taglist>
+ </desc>
+ </func>
+ <func>
+ <name>change_table_access_mode(Tab, AccessMode) -> {aborted, R} | {atomic, ok}</name>
+ <fsummary>Change the access mode for the table.</fsummary>
+ <desc>
+ <p>The <c>AcccessMode</c> is by default the atom
+ <c>read_write</c> but it may also be set to the atom
+ <c>read_only</c>. If the <c>AccessMode</c> is set to
+ <c>read_only</c>, it means that it is not possible to perform
+ updates to the table. At startup Mnesia always loads
+ <c>read_only</c> tables locally regardless of when and if
+ Mnesia was terminated on other nodes.</p>
+ </desc>
+ </func>
+ <func>
+ <name>change_table_copy_type(Tab, Node, To) -> {aborted, R} | {atomic, ok}</name>
+ <fsummary>Change the storage type of a table.</fsummary>
+ <desc>
+ <p>For example:</p>
+ <code type="none">
+mnesia:change_table_copy_type(person, node(), disc_copies)
+ </code>
+ <p>Transforms our <c>person</c> table from a RAM table into
+ a disc based table at <c>Node</c>.
+ </p>
+ <p>This function can also be used to change the storage type of
+ the table named <c>schema</c>. The schema table can only
+ have <c>ram_copies</c> or <c>disc_copies</c> as the storage type. If the
+ storage type of the schema is <c>ram_copies</c>, no other table
+ can be disc resident on that node.</p>
+ </desc>
+ </func>
+ <func>
+ <name>change_table_load_order(Tab, LoadOrder) -> {aborted, R} | {atomic, ok}</name>
+ <fsummary>Change the load order priority for the table.</fsummary>
+ <desc>
+ <p>The <c>LoadOrder</c> priority is by default <c>0</c> (zero)
+ but may be set to any integer. The tables with the highest
+ <c>LoadOrder</c> priority will be loaded first at startup.</p>
+ </desc>
+ </func>
+ <func>
+ <name>clear_table(Tab) -> {aborted, R} | {atomic, ok}</name>
+ <fsummary>Deletes all entries in a table.</fsummary>
+ <desc>
+ <p>Deletes all entries in the table <c>Tab</c>.</p>
+ </desc>
+ </func>
+ <func>
+ <name>create_schema(DiscNodes) -> ok | {error,Reason}</name>
+ <fsummary>Create a brand new schema on the specified nodes.</fsummary>
+ <desc>
+ <p>Creates a new database on disc. Various files are
+ created in the local Mnesia directory of each node. Note
+ that the directory must be unique for each node. Two nodes
+ may never share the same directory. If possible, use a local
+ disc device in order to improve performance.</p>
+ <p><c>mnesia:create_schema/1</c> fails if any of the
+ Erlang nodes given as <c>DiscNodes</c> are not alive, if
+ Mnesia is running on anyone of the nodes, or if anyone of
+ the nodes already has a schema. Use
+ <c>mnesia:delete_schema/1</c> to get rid of old faulty
+ schemas.
+ </p>
+ <p><em>Note:</em> Only nodes with disc should be
+ included in <c>DiscNodes</c>. Disc-less nodes, that is nodes
+ where all tables including the schema only resides in RAM,
+ may not be included.</p>
+ </desc>
+ </func>
+ <func>
+ <name>create_table(Name, TabDef) -> {atomic, ok} | {aborted, Reason}</name>
+ <fsummary>Create a Mnesia table called <c>Name</c>with properties as described by the argument <c>TabDef</c>.</fsummary>
+ <desc>
+ <p>This function creates a Mnesia table called <c>Name</c>
+ according to the
+ argument <c>TabDef</c>. This list must be a list of
+ <c>{Item, Value}</c> tuples, where the following values are
+ allowed:</p>
+ <list type="bulleted">
+ <item>
+ <p><c>{access_mode, Atom}</c>. The access mode is by
+ default the atom <c>read_write</c> but it may also be
+ set to the atom <c>read_only</c>. If the
+ <c>AccessMode</c> is set to <c>read_only</c>, it means
+ that it is not possible to perform updates to the table.
+ </p>
+ <p>At startup Mnesia always loads <c>read_only</c> tables
+ locally regardless of when and if Mnesia was terminated
+ on other nodes. This argument returns the access mode of
+ the table. The access mode may either be read_only or
+ read_write.
+ </p>
+ </item>
+ <item>
+ <p><c>{attributes, AtomList}</c> a list of the
+ attribute names for the records that are supposed to
+ populate the table. The default value is <c>[key, val]</c>. The table must have at least one extra
+ attribute in addition to the key.
+ </p>
+ <p>When accessing single attributes in a record, it is not
+ necessary, or even recommended, to hard code any
+ attribute names as atoms. Use the construct
+ <c>record_info(fields, RecordName)</c> instead. It can be
+ used for records of type <c>RecordName</c></p>
+ </item>
+ <item>
+ <p><c>{disc_copies, Nodelist}</c>, where
+ <c>Nodelist</c> is a list of the nodes where this table
+ is supposed to have disc copies. If a table replica is
+ of type <c>disc_copies</c>, all write operations on this
+ particular replica of the table are written to disc as
+ well as to the RAM copy of the table.
+ </p>
+ <p>It is possible
+ to have a replicated table of type <c>disc_copies</c>
+ on one node, and another type on another node. The
+ default value is <c>[]</c></p>
+ </item>
+ <item>
+ <p><c>{disc_only_copies, Nodelist}</c>, where
+ <c>Nodelist</c> is a list of the nodes where this table
+ is supposed to have <c>disc_only_copies</c>. A disc only
+ table replica is kept on disc only and unlike the other
+ replica types, the contents of the replica will not
+ reside in RAM. These replicas are considerably slower
+ than replicas held in RAM.
+ </p>
+ </item>
+ <item>
+ <p><c>{index, Intlist}</c>, where
+ <c>Intlist</c> is a list of attribute names (atoms) or
+ record fields for which Mnesia shall build and maintain
+ an extra index table. The <c>qlc</c> query compiler may
+ or may not utilize any additional indices while
+ processing queries on a table.
+ </p>
+ </item>
+ <item>
+ <p><c>{load_order, Integer}</c>. The load order
+ priority is by default <c>0</c> (zero) but may be set to
+ any integer. The tables with the highest load order
+ priority will be loaded first at startup.
+ </p>
+ </item>
+ <item>
+ <p><c>{ram_copies, Nodelist}</c>, where
+ <c>Nodelist</c> is a list of the nodes where this table
+ is supposed to have RAM copies. A table replica of type
+ <c>ram_copies</c> is obviously not written to disc on a
+ per transaction basis. It is possible to dump
+ <c>ram_copies</c> replicas to disc with the function
+ <c>mnesia:dump_tables(Tabs)</c>. The default value for
+ this attribute is <c>[node()]</c>.
+ </p>
+ </item>
+ <item>
+ <p><c>{record_name, Name}</c>, where <c>Name</c> must
+ be an atom. All records, stored in the table, must have
+ this name as the first element. It defaults to the same
+ name as the name of the table.
+ </p>
+ </item>
+ <item>
+ <p><c>{snmp, SnmpStruct}</c>. See
+ <c>mnesia:snmp_open_table/2</c> for a description of
+ <c>SnmpStruct</c>. If this attribute is present in the
+ <c>ArgList</c> to <c>mnesia:create_table/2</c>, the
+ table is immediately accessible by means of the Simple
+ Network Management Protocol (SNMP). This means that
+ applications which use SNMP to manipulate and control
+ the system can be designed easily, since Mnesia provides
+ a direct mapping between the logical tables that make up
+ an SNMP control application and the physical data which
+ makes up a Mnesia table.
+ </p>
+ </item>
+ <item>
+ <p><c>{type, Type}</c>, where <c>Type</c> must be
+ either of the atoms <c>set</c>, <c>ordered_set</c> or
+ <c>bag</c>. The default value is <c>set</c>. In a
+ <c>set</c> all records have unique keys and in a
+ <c>bag</c> several records may have the same key, but
+ the record content is unique. If a non-unique record is
+ stored the old, conflicting record(s) will simply be
+ overwritten. Note: currently 'ordered_set'
+ is not supported for 'disc_only_copies'.
+ </p>
+ </item>
+ <item>
+ <p><c>{local_content, Bool}</c>, where <c>Bool</c> must be
+ either <c>true</c> or <c>false</c>. The default value is <c>false</c>.\011 </p>
+ </item>
+ </list>
+ <p>For example, the following call creates the <c>person</c> table
+ previously defined and replicates it on 2 nodes:
+ </p>
+ <code type="none">
+mnesia:create_table(person,
+ [{ram_copies, [N1, N2]},
+ {attributes, record_info(fields,person)}]).
+ </code>
+ <p>If it was required that Mnesia build and maintain an extra index
+ table on the <c>address</c> attribute of all the <c>person</c>
+ records that are inserted in the table, the following code would be issued:
+ </p>
+ <code type="none">
+mnesia:create_table(person,
+ [{ram_copies, [N1, N2]},
+ {index, [address]},
+ {attributes, record_info(fields,person)}]).
+ </code>
+ <p>The specification of <c>index</c> and <c>attributes</c> may be
+ hard coded as <c>{index, [2]}</c> and
+ <c>{attributes, [name, age, address, salary, children]}</c>
+ respectively.
+ </p>
+ <p><c>mnesia:create_table/2</c> writes records into the
+ <c>schema</c> table. This function, as well as all other
+ schema manipulation functions, are implemented with the
+ normal transaction management system. This guarantees that
+ schema updates are performed on all nodes in an atomic
+ manner.</p>
+ </desc>
+ </func>
+ <func>
+ <name>deactivate_checkpoint(Name) -> ok | {error, Reason}</name>
+ <fsummary>Deactivate a checkpoint.</fsummary>
+ <desc>
+ <p>The checkpoint is automatically deactivated when some of
+ the tables involved have no retainer attached to them. This may
+ happen when nodes go down or when a replica is deleted.
+ Checkpoints will also be deactivated with this function.
+ <c>Name</c> is the name of an active checkpoint.</p>
+ </desc>
+ </func>
+ <func>
+ <name>del_table_copy(Tab, Node) -> {aborted, R} | {atomic, ok}</name>
+ <fsummary>Delete the replica of table <c>Tab</c>at node <c>Node</c>.</fsummary>
+ <desc>
+ <p>Deletes the replica of table <c>Tab</c> at node <c>Node</c>.
+ When the last replica is deleted with this
+ function, the table disappears entirely.
+ </p>
+ <p>This function may also be used to delete a replica of
+ the table named <c>schema</c>. Then the mnesia node will be removed.
+ Note: Mnesia must be stopped on the node first.</p>
+ </desc>
+ </func>
+ <func>
+ <name>del_table_index(Tab, AttrName) -> {aborted, R} | {atomic, ok}</name>
+ <fsummary>Delete an index in a table. </fsummary>
+ <desc>
+ <p>This function deletes the index on attribute with name
+ <c>AttrName</c> in a table.</p>
+ </desc>
+ </func>
+ <func>
+ <name>delete({Tab, Key}) -> transaction abort | ok </name>
+ <fsummary>Delete all records in table <c>Tab</c>with the key <c>Key</c>.</fsummary>
+ <desc>
+ <p>Invokes <c>mnesia:delete(Tab, Key, write)</c></p>
+ </desc>
+ </func>
+ <func>
+ <name>delete(Tab, Key, LockKind) -> transaction abort | ok </name>
+ <fsummary>Delete all records in table <c>Tab</c>with the key <c>Key</c>.</fsummary>
+ <desc>
+ <p>Deletes all records in table <c>Tab</c> with the key
+ <c>Key</c>.
+ </p>
+ <p>The semantics of this function is context sensitive. See
+ <c>mnesia:activity/4</c> for more information. In transaction
+ context it acquires a lock of type <c>LockKind</c> in the
+ record. Currently the lock types <c>write</c> and
+ <c>sticky_write</c> are supported.</p>
+ </desc>
+ </func>
+ <func>
+ <name>delete_object(Record) -> transaction abort | ok </name>
+ <fsummary>Delete a record</fsummary>
+ <desc>
+ <p>Invokes <c>mnesia:delete_object(Tab, Record, write)</c> where
+ <c>Tab</c> is <c>element(1, Record)</c>.</p>
+ </desc>
+ </func>
+ <func>
+ <name>delete_object(Tab, Record, LockKind) -> transaction abort | ok </name>
+ <fsummary>Delete a record</fsummary>
+ <desc>
+ <p>If a table is of type <c>bag</c>, we may sometimes
+ want to delete only some of the records with a certain
+ key. This can be done with the <c>delete_object/3</c>
+ function. A complete record must be supplied to this
+ function.
+ </p>
+ <p>The semantics of this function is context sensitive. See
+ <c>mnesia:activity/4</c> for more information. In transaction
+ context it acquires a lock of type <c>LockKind</c> on the
+ record. Currently the lock types <c>write</c> and
+ <c>sticky_write</c> are supported.</p>
+ </desc>
+ </func>
+ <func>
+ <name>delete_schema(DiscNodes) -> ok | {error,Reason}</name>
+ <fsummary>Delete the schema on the given nodes</fsummary>
+ <desc>
+ <p>Deletes a database created with
+ <c>mnesia:create_schema/1</c>.
+ <c>mnesia:delete_schema/1</c> fails if any of the Erlang
+ nodes given as <c>DiscNodes</c> is not alive, or if Mnesia
+ is running on any of the nodes.
+ </p>
+ <p>After the database has been deleted, it may still be
+ possible to start Mnesia as a disc-less node. This depends on
+ how the configuration parameter <c>schema_location</c> is set.
+ </p>
+ <warning>
+ <p>This function must be used with extreme
+ caution since it makes existing persistent data
+ obsolete. Think twice before using it. </p>
+ </warning>
+ </desc>
+ </func>
+ <func>
+ <name>delete_table(Tab) -> {aborted, Reason} | {atomic, ok} </name>
+ <fsummary>Delete permanently all replicas of table <c>Tab</c>.</fsummary>
+ <desc>
+ <p>Permanently deletes all replicas of table <c>Tab</c>.</p>
+ </desc>
+ </func>
+ <func>
+ <name>dirty_all_keys(Tab) -> KeyList | exit({aborted, Reason}).</name>
+ <fsummary>Dirty search for all record keys in table.</fsummary>
+ <desc>
+ <p>This is the dirty equivalent of the
+ <c>mnesia:all_keys/1</c> function.</p>
+ </desc>
+ </func>
+ <func>
+ <name>dirty_delete({Tab, Key}) -> ok | exit({aborted, Reason}) </name>
+ <fsummary>Dirty delete of a record.</fsummary>
+ <desc>
+ <p>Invokes <c>mnesia:dirty_delete(Tab, Key)</c>.</p>
+ </desc>
+ </func>
+ <func>
+ <name>dirty_delete(Tab, Key) -> ok | exit({aborted, Reason}) </name>
+ <fsummary>Dirty delete of a record. </fsummary>
+ <desc>
+ <p>This is the dirty equivalent of the
+ <c>mnesia:delete/3</c> function.</p>
+ </desc>
+ </func>
+ <func>
+ <name>dirty_delete_object(Record) </name>
+ <fsummary>Dirty delete of a record.</fsummary>
+ <desc>
+ <p>Invokes <c>mnesia:dirty_delete_object(Tab, Record)</c>
+ where <c>Tab</c> is <c>element(1, Record)</c>.</p>
+ </desc>
+ </func>
+ <func>
+ <name>dirty_delete_object(Tab, Record) </name>
+ <fsummary>Dirty delete of a record. </fsummary>
+ <desc>
+ <p>This is the dirty equivalent of the
+ <c>mnesia:delete_object/3</c> function.</p>
+ </desc>
+ </func>
+ <func>
+ <name>dirty_first(Tab) -> Key | exit({aborted, Reason}) </name>
+ <fsummary>Return the key for the first record in a table.</fsummary>
+ <desc>
+ <p>Records in <c>set</c> or <c>bag</c> tables are not ordered.
+ However, there
+ is an ordering of the records which is not known
+ to the user. Accordingly, it is possible to traverse a table by means
+ of this function in conjunction with the <c>mnesia:dirty_next/2</c>
+ function.
+ </p>
+ <p>If there are no records at all in the table, this function
+ returns the atom <c>'$end_of_table'</c>. For this reason, it
+ is highly undesirable, but not disallowed, to use this atom
+ as the key for any user records.</p>
+ </desc>
+ </func>
+ <func>
+ <name>dirty_index_match_object(Pattern, Pos)</name>
+ <fsummary>Dirty pattern match using index.</fsummary>
+ <desc>
+ <p>Invokes <c>mnesia:dirty_index_match_object(Tab, Pattern, Pos)</c> where <c>Tab</c> is <c>element(1, Pattern)</c>.</p>
+ </desc>
+ </func>
+ <func>
+ <name>dirty_index_match_object(Tab, Pattern, Pos)</name>
+ <fsummary>Dirty pattern match using index.</fsummary>
+ <desc>
+ <p>This is the dirty equivalent of the
+ <c>mnesia:index_match_object/4</c> function.</p>
+ </desc>
+ </func>
+ <func>
+ <name>dirty_index_read(Tab, SecondaryKey, Pos)</name>
+ <fsummary>Dirty read using index.</fsummary>
+ <desc>
+ <p>This is the dirty equivalent of the
+ <c>mnesia:index_read/3</c> function.</p>
+ </desc>
+ </func>
+ <func>
+ <name>dirty_last(Tab) -> Key | exit({aborted, Reason}) </name>
+ <fsummary>Return the key for the last record in a table.</fsummary>
+ <desc>
+ <p>This function works exactly
+ <c>mnesia:dirty_first/1</c> but returns the last object in
+ Erlang term order for the <c>ordered_set</c> table type. For
+ all other table types, <c>mnesia:dirty_first/1</c> and
+ <c>mnesia:dirty_last/1</c> are synonyms.</p>
+ </desc>
+ </func>
+ <func>
+ <name>dirty_match_object(Pattern) -> RecordList | exit({aborted, Reason}).</name>
+ <fsummary>Dirty pattern match pattern.</fsummary>
+ <desc>
+ <p>Invokes <c>mnesia:dirty_match_object(Tab, Pattern)</c>
+ where <c>Tab</c> is <c>element(1, Pattern)</c>.</p>
+ </desc>
+ </func>
+ <func>
+ <name>dirty_match_object(Tab, Pattern) -> RecordList | exit({aborted, Reason}).</name>
+ <fsummary>Dirty pattern match pattern.</fsummary>
+ <desc>
+ <p>This is the dirty equivalent of the
+ <c>mnesia:match_object/3</c> function.</p>
+ </desc>
+ </func>
+ <func>
+ <name>dirty_next(Tab, Key) -> Key | exit({aborted, Reason}) </name>
+ <fsummary>Return the next key in a table. </fsummary>
+ <desc>
+ <p>This function makes it possible to traverse a table
+ and perform operations on all records in the table. When
+ the end of the table is reached, the special key
+ <c>'$end_of_table'</c> is returned. Otherwise, the function
+ returns a key which can be used to read the actual record.The
+ behavior is undefined if another Erlang process performs write
+ operations on the table while it is being traversed with the
+ <c>mnesia:dirty_next/2</c> function.</p>
+ </desc>
+ </func>
+ <func>
+ <name>dirty_prev(Tab, Key) -> Key | exit({aborted, Reason}) </name>
+ <fsummary>Return the previous key in a table. </fsummary>
+ <desc>
+ <p>This function works exactly
+ <c>mnesia:dirty_next/2</c> but returns the previous object in
+ Erlang term order for the ordered_set table type. For
+ all other table types, <c>mnesia:dirty_next/2</c> and
+ <c>mnesia:dirty_prev/2</c> are synonyms.\011 </p>
+ </desc>
+ </func>
+ <func>
+ <name>dirty_read({Tab, Key}) -> ValueList | exit({aborted, Reason}</name>
+ <fsummary>Dirty read of records.</fsummary>
+ <desc>
+ <p>Invokes <c>mnesia:dirty_read(Tab, Key)</c>.</p>
+ </desc>
+ </func>
+ <func>
+ <name>dirty_read(Tab, Key) -> ValueList | exit({aborted, Reason}</name>
+ <fsummary>Dirty read of records.</fsummary>
+ <desc>
+ <p>This is the dirty equivalent of the
+ <c>mnesia:read/3</c> function.</p>
+ </desc>
+ </func>
+ <func>
+ <name>dirty_select(Tab, MatchSpec) -> ValueList | exit({aborted, Reason}</name>
+ <fsummary>Dirty match the objects in <c>Tab</c>against <c>MatchSpec</c>.</fsummary>
+ <desc>
+ <p>This is the dirty equivalent of the
+ <c>mnesia:select/2</c> function.</p>
+ </desc>
+ </func>
+ <func>
+ <name>dirty_slot(Tab, Slot) -> RecordList | exit({aborted, Reason})</name>
+ <fsummary>Return the list of records that are associated with Slot in a table.</fsummary>
+ <desc>
+ <p>This function can be used to traverse a table in a
+ manner similar to the <c>mnesia:dirty_next/2</c> function.
+ A table has a number of slots which range from 0 (zero) to some
+ unknown upper bound. The function
+ <c>mnesia:dirty_slot/2</c> returns the special atom
+ <c>'$end_of_table'</c> when the end of the table is reached.
+ The behavior of this function is undefined if a write
+ operation is performed on the table while it is being
+ traversed.</p>
+ </desc>
+ </func>
+ <func>
+ <name>dirty_update_counter({Tab, Key}, Incr) -> NewVal | exit({aborted, Reason})</name>
+ <fsummary>Dirty update of a counter record.</fsummary>
+ <desc>
+ <p>Invokes <c>mnesia:dirty_update_counter(Tab, Key, Incr)</c>.</p>
+ </desc>
+ </func>
+ <func>
+ <name>dirty_update_counter(Tab, Key, Incr) -> NewVal | exit({aborted, Reason})</name>
+ <fsummary>Dirty update of a counter record.</fsummary>
+ <desc>
+ <p>There are no special counter records in Mnesia. However,
+ records of the form <c>{Tab, Key, Integer}</c> can be used
+ as (possibly disc resident) counters, when <c>Tab</c> is a
+ <c>set</c>. This function updates a counter with a
+ positive or negative number. However, counters can never become less
+ than zero. There are two significant differences between
+ this function and the action of first reading the record,
+ performing the arithmetics, and then writing the record:</p>
+ <list type="bulleted">
+ <item>It is much more efficient</item>
+ <item><c>mnesia:dirty_update_counter/3</c> is
+ performed as an atomic operation despite the fact that it is not
+ protected by a transaction.</item>
+ </list>
+ <p>If two processes perform <c>mnesia:dirty_update_counter/3</c>
+ simultaneously, both updates will take effect without the
+ risk of loosing one of the updates. The new value
+ <c>NewVal</c> of the counter is returned.</p>
+ <p>If <c>Key</c> don't exits, a new record is created with the value
+ <c>Incr</c> if it is larger than 0, otherwise it is set to 0.</p>
+ </desc>
+ </func>
+ <func>
+ <name>dirty_write(Record) -> ok | exit({aborted, Reason})</name>
+ <fsummary>Dirty write of a record.</fsummary>
+ <desc>
+ <p>Invokes <c>mnesia:dirty_write(Tab, Record)</c>
+ where <c>Tab</c> is <c>element(1, Record)</c>.</p>
+ </desc>
+ </func>
+ <func>
+ <name>dirty_write(Tab, Record) -> ok | exit({aborted, Reason})</name>
+ <fsummary>Dirty write of a record.</fsummary>
+ <desc>
+ <p>This is the dirty equivalent of <c>mnesia:write/3</c>.</p>
+ </desc>
+ </func>
+ <func>
+ <name>dump_log() -> dumped</name>
+ <fsummary>Perform a user initiated dump of the local log file.</fsummary>
+ <desc>
+ <p>Performs a user initiated dump of the local log file.
+ This is usually not necessary since Mnesia, by default,
+ manages this automatically.</p>
+ </desc>
+ </func>
+ <func>
+ <name>dump_tables(TabList) -> {atomic, ok} | {aborted, Reason}</name>
+ <fsummary>Dump all RAM tables to disc.</fsummary>
+ <desc>
+ <p>This function dumps a set of <c>ram_copies</c> tables
+ to disc. The next time the system is started, these tables
+ are initiated with the data found in the files that are the
+ result of this dump. None of the tables may have disc
+ resident replicas.</p>
+ </desc>
+ </func>
+ <func>
+ <name>dump_to_textfile(Filename) </name>
+ <fsummary>Dump local tables into a text file.</fsummary>
+ <desc>
+ <p>Dumps all local tables of a mnesia system into a text file
+ which can then be edited (by means of a normal text editor)
+ and then later be reloaded with
+ <c>mnesia:load_textfile/1</c>. Only use this function for
+ educational purposes. Use other functions to deal with real
+ backups.</p>
+ </desc>
+ </func>
+ <func>
+ <name>error_description(Error) -> String </name>
+ <fsummary>Return a string describing a particular Mnesia error.</fsummary>
+ <desc>
+ <p>All Mnesia transactions, including all the schema
+ update functions, either return the value <c>{atomic, Val}</c> or the tuple <c>{aborted, Reason}</c>. The
+ <c>Reason</c> can be either of the following atoms. The
+ <c>error_description/1</c> function returns a descriptive
+ string which describes the error.
+ </p>
+ <list type="bulleted">
+ <item>
+ <p><c>nested_transaction</c>. Nested transactions are
+ not allowed in this context.
+ </p>
+ </item>
+ <item>
+ <p><c>badarg</c>. Bad or invalid argument, possibly
+ bad type.
+ </p>
+ </item>
+ <item>
+ <p><c>no_transaction</c>. Operation not allowed
+ outside transactions.
+ </p>
+ </item>
+ <item>
+ <p><c>combine_error</c>. Table options were illegally
+ combined.
+ </p>
+ </item>
+ <item>
+ <p><c>bad_index</c>. Index already exists or was out
+ of bounds.
+ </p>
+ </item>
+ <item>
+ <p><c>already_exists</c>. Schema option is already set.
+ </p>
+ </item>
+ <item>
+ <p><c>index_exists</c>. Some operations cannot be performed on
+ tabs with index.
+ </p>
+ </item>
+ <item>
+ <p><c>no_exists</c>. Tried to perform operation on
+ non-existing, or not alive, item.
+ </p>
+ </item>
+ <item>
+ <p><c>system_limit</c>. Some system_limit was exhausted.
+ </p>
+ </item>
+ <item>
+ <p><c>mnesia_down</c>. A transaction involving
+ records at some remote node which died while
+ transaction was executing. Record(s) are no longer
+ available elsewhere in the network.
+ </p>
+ </item>
+ <item>
+ <p><c>not_a_db_node</c>. A node which does not exist
+ in the schema was mentioned.
+ </p>
+ </item>
+ <item>
+ <p><c>bad_type</c>. Bad type on some arguments.
+ </p>
+ </item>
+ <item>
+ <p><c>node_not_running</c>. Node not running.
+ </p>
+ </item>
+ <item>
+ <p><c>truncated_binary_file</c>. Truncated binary in file.
+ </p>
+ </item>
+ <item>
+ <p><c>active</c>. Some delete operations require that
+ all active records are removed.
+ </p>
+ </item>
+ <item>
+ <p><c>illegal</c>. Operation not supported on record.
+ </p>
+ </item>
+ </list>
+ <p>The <c>Error</c> may be <c>Reason</c>,
+ <c>{error, Reason}</c>, or <c>{aborted, Reason}</c>. The
+ <c>Reason</c> may be an atom or a tuple with <c>Reason</c>
+ as an atom in the first field.</p>
+ </desc>
+ </func>
+ <func>
+ <name>ets(Fun, [, Args]) -> ResultOfFun | exit(Reason)</name>
+ <fsummary>Call the Fun in a raw context which is not protected by a transaction.</fsummary>
+ <desc>
+ <p>Call the <c>Fun</c> in a raw context which is not protected by
+ a transaction. The Mnesia function call is performed in the
+ <c>Fun</c> are performed directly on the local <c>ets</c> tables on
+ the assumption that the local storage type is
+ <c>ram_copies</c> and the tables are not replicated to other
+ nodes. Subscriptions are not triggered and checkpoints are
+ not updated, but it is extremely fast. This function can
+ also be applied to <c>disc_copies</c> tables if all
+ operations are read only. See <c>mnesia:activity/4</c>
+ and the Mnesia User's Guide for more details.</p>
+ <p><em>Note:</em> Calling (nesting) a <c>mnesia:ets</c>
+ inside a transaction context will inherit the transaction semantics.</p>
+ </desc>
+ </func>
+ <func>
+ <name>first(Tab) -> Key | transaction abort </name>
+ <fsummary>Return the key for the first record in a table.</fsummary>
+ <desc>
+ <p>Records in <c>set</c> or <c>bag</c> tables are not ordered.
+ However, there
+ is an ordering of the records which is not known
+ to the user. Accordingly, it is possible to traverse a table by means
+ of this function in conjunction with the <c>mnesia:next/2</c>
+ function.
+ </p>
+ <p>If there are no records at all in the table, this function
+ returns the atom <c>'$end_of_table'</c>. For this reason, it
+ is highly undesirable, but not disallowed, to use this atom
+ as the key for any user records.</p>
+ </desc>
+ </func>
+ <func>
+ <name>foldl(Function, Acc, Table) -> NewAcc | transaction abort </name>
+ <fsummary>Call Function for each record in Table </fsummary>
+ <desc>
+ <p>Iterates over the table <c>Table</c> and calls
+ <c>Function(Record, NewAcc)</c> for each <c>Record</c> in the table.
+ The term returned from <c>Function</c> will be used as the second
+ argument in the next call to the <c>Function</c>.
+ </p>
+ <p><c>foldl</c> returns the same term as the last call to
+ <c>Function</c> returned.</p>
+ </desc>
+ </func>
+ <func>
+ <name>foldr(Function, Acc, Table) -> NewAcc | transaction abort </name>
+ <fsummary>Call Function for each record in Table </fsummary>
+ <desc>
+ <p>This function works exactly as
+ <c>foldl/3</c> but iterates the table in the opposite order
+ for the <c>ordered_set</c> table type. For
+ all other table types, <c>foldr/3</c> and
+ <c>foldl/3</c> are synonyms.</p>
+ </desc>
+ </func>
+ <func>
+ <name>force_load_table(Tab) -> yes | ErrorDescription </name>
+ <fsummary>Force a table to be loaded into the system </fsummary>
+ <desc>
+ <p>The Mnesia algorithm for table load might lead to a
+ situation where a table cannot be loaded. This situation
+ occurs when a node is started and Mnesia concludes, or
+ suspects, that another copy of the table was active after
+ this local copy became inactive due to a system crash.
+ </p>
+ <p>If this situation is not acceptable, this function can be
+ used to override the strategy of the Mnesia table load
+ algorithm. This could lead to a situation where some
+ transaction effects are lost with a inconsistent database as
+ result, but for some applications high availability is more
+ important than consistent data.</p>
+ </desc>
+ </func>
+ <func>
+ <name>index_match_object(Pattern, Pos) -> transaction abort | ObjList</name>
+ <fsummary>Match records and utilizes index information.</fsummary>
+ <desc>
+ <p>Invokes <c>mnesia:index_match_object(Tab, Pattern, Pos, read)</c> where <c>Tab</c> is <c>element(1, Pattern)</c>.</p>
+ </desc>
+ </func>
+ <func>
+ <name>index_match_object(Tab, Pattern, Pos, LockKind) -> transaction abort | ObjList</name>
+ <fsummary>Match records and utilizes index information.</fsummary>
+ <desc>
+ <p>In a manner similar to the <c>mnesia:index_read/3</c>
+ function, we can also utilize any index information when we
+ try to match records. This function takes a pattern which
+ obeys the same rules as the <c>mnesia:match_object/3</c>
+ function with the exception that this function requires the
+ following conditions:
+ </p>
+ <list type="bulleted">
+ <item>
+ <p>The table <c>Tab</c> must have an index on
+ position <c>Pos</c>.
+ </p>
+ </item>
+ <item>
+ <p>The element in position <c>Pos</c> in
+ <c>Pattern</c> must be bound. <c>Pos</c> may either be
+ an integer (#record.Field), or an attribute name.</p>
+ </item>
+ </list>
+ <p>The two index search functions described here are
+ automatically invoked when searching tables with <c>qlc</c>
+ list comprehensions and also when using the low level
+ <c>mnesia:[dirty_]match_object</c> functions.
+ </p>
+ <p></p>
+ <p>The semantics of this function is context sensitive. See
+ <c>mnesia:activity/4</c> for more information. In transaction
+ context it acquires a lock of type <c>LockKind</c> on the
+ entire table or on a single record. Currently, the lock type
+ <c>read</c> is supported.
+ </p>
+ </desc>
+ </func>
+ <func>
+ <name>index_read(Tab, SecondaryKey, Pos) -> transaction abort | RecordList </name>
+ <fsummary>Read records via index table. </fsummary>
+ <desc>
+ <p>Assume there is an index on position <c>Pos</c> for a
+ certain record type. This function can be used to read the
+ records without knowing the actual key for the record. For
+ example, with an index in position 1 of the <c>person</c>
+ table, the call <c>mnesia:index_read(person, 36, #person.age)</c> returns a list of all persons with age
+ equal to 36. <c>Pos</c> may also be an attribute name
+ (atom), but if the notation <c>mnesia:index_read(person, 36, age)</c> is used, the field position will be searched for in
+ runtime, for each call.
+ </p>
+ <p>The semantics of this function is context sensitive. See
+ <c>mnesia:activity/4</c> for more information. In transaction
+ context it acquires a read lock on the entire table.</p>
+ </desc>
+ </func>
+ <func>
+ <name>info() -> ok </name>
+ <fsummary>Print some information about the system on the tty.</fsummary>
+ <desc>
+ <p>Prints some information about the system on the tty.
+ This function may be used even if Mnesia is not started.
+ However, more information will be displayed if Mnesia is
+ started.</p>
+ </desc>
+ </func>
+ <func>
+ <name>install_fallback(Opaque) -> ok | {error,Reason}</name>
+ <fsummary>Install a backup as fallback.</fsummary>
+ <desc>
+ <p>Invokes <c>mnesia:install_fallback(Opaque, Args)</c> where
+ <c>Args</c> is <c>[{scope, global}]</c>.</p>
+ </desc>
+ </func>
+ <func>
+ <name>install_fallback(Opaque), BackupMod) -> ok | {error,Reason}</name>
+ <fsummary>Install a backup as fallback.</fsummary>
+ <desc>
+ <p>Invokes <c>mnesia:install_fallback(Opaque, Args)</c> where
+ <c>Args</c> is <c>[{scope, global}, {module, BackupMod}]</c>.</p>
+ </desc>
+ </func>
+ <func>
+ <name>install_fallback(Opaque, Args) -> ok | {error,Reason}</name>
+ <fsummary>Install a backup as fallback.</fsummary>
+ <desc>
+ <p>This function is used to install a backup as fallback. The
+ fallback will be used to restore the database at the next
+ start-up. Installation of fallbacks requires Erlang to be up
+ and running on all the involved nodes, but it does not
+ matter if Mnesia is running or not. The installation of the
+ fallback will fail if the local node is not one of the disc
+ resident nodes in the backup.
+ </p>
+ <p><c>Args</c> is a list of the following tuples:
+ </p>
+ <list type="bulleted">
+ <item>
+ <p><c>{module, BackupMod}</c>.
+ All accesses of the backup media is performed via a
+ callback module named <c>BackupMod</c>. The
+ <c>Opaque</c> argument is forwarded to the callback
+ module which may interpret it as it wish. The default
+ callback module is called <c>mnesia_backup</c> and it
+ interprets the <c>Opaque</c> argument as a local
+ filename. The default for this module is also
+ configurable via the <c>-mnesia mnesia_backup</c>
+ configuration parameter. </p>
+ </item>
+ <item>
+ <p><c>{scope, Scope}</c>
+ The <c>Scope</c> of a fallback may either be
+ <c>global</c> for the entire database or <c>local</c>
+ for one node. By default, the installation of a fallback
+ is a global operation which either is performed all
+ nodes with disc resident schema or none. Which nodes
+ that are disc resident or not, is determined from the
+ schema info in the backup.</p>
+ <p>If the <c>Scope</c> of the operation is <c>local</c>
+ the fallback will only be installed on the local node.</p>
+ </item>
+ <item>
+ <p><c>{mnesia_dir, AlternateDir}</c>
+ This argument is only valid if the scope of the
+ installation is <c>local</c>. Normally the installation
+ of a fallback is targeted towards the Mnesia directory
+ as configured with the <c>-mnesia dir</c> configuration
+ parameter. But by explicitly supplying an
+ <c>AlternateDir</c> the fallback will be installed there
+ regardless of the Mnesia directory configuration
+ parameter setting. After installation of a fallback on
+ an alternate Mnesia directory that directory is fully
+ prepared for usage as an active Mnesia directory.
+ </p>
+ <p>This is a somewhat dangerous feature which must be
+ used with care. By unintentional mixing of directories
+ you may easily end up with a inconsistent database, if
+ the same backup is installed on more than one directory.</p>
+ </item>
+ </list>
+ </desc>
+ </func>
+ <func>
+ <name>is_transaction() -> boolean </name>
+ <fsummary>Check if code is running in a transaction.</fsummary>
+ <desc>
+ <p>When this function is executed inside a transaction context
+ it returns <c>true</c>, otherwise <c>false</c>.</p>
+ </desc>
+ </func>
+ <func>
+ <name>last(Tab) -> Key | transaction abort </name>
+ <fsummary>Return the key for the last record in a table.</fsummary>
+ <desc>
+ <p>This function works exactly
+ <c>mnesia:first/1</c> but returns the last object in
+ Erlang term order for the <c>ordered_set</c> table type. For
+ all other table types, <c>mnesia:first/1</c> and
+ <c>mnesia:last/1</c> are synonyms.</p>
+ </desc>
+ </func>
+ <func>
+ <name>load_textfile(Filename)</name>
+ <fsummary>Load tables from a text file.</fsummary>
+ <desc>
+ <p>Loads a series of definitions and data found in the
+ text file (generated with <c>mnesia:dump_to_textfile/1</c>)
+ into Mnesia. This function also starts Mnesia and possibly
+ creates a new schema. This function is intended for
+ educational purposes only and using other functions to deal
+ with real backups, is recommended.</p>
+ </desc>
+ </func>
+ <func>
+ <name>lock(LockItem, LockKind) -> Nodes | ok | transaction abort</name>
+ <fsummary>Explicit grab lock.</fsummary>
+ <desc>
+ <p>Write locks are normally acquired on all nodes where a
+ replica of the table resides (and is active). Read locks are
+ acquired on one node (the local node if a local
+ replica exists). Most of the context sensitive access functions
+ acquire an implicit lock if they are invoked in a
+ transaction context. The granularity of a lock may either
+ be a single record or an entire table.
+ </p>
+ <p>The normal usage is to call the function without checking
+ the return value since it exits if it fails and the
+ transaction is restarted by the transaction manager. It
+ returns all the locked nodes if a write lock is acquired, and
+ <c>ok</c> if it was a read lock.
+ </p>
+ <p>This function <c>mnesia:lock/2</c> is intended to support
+ explicit locking on tables but also intended for situations
+ when locks need to be acquired regardless of how tables are
+ replicated. Currently, two <c>LockKind</c>'s are supported:
+ </p>
+ <taglist>
+ <tag><c>write</c></tag>
+ <item>
+ <p>Write locks are exclusive, which means that if one
+ transaction manages to acquire a write lock on an item,
+ no other transaction may acquire any kind of lock on the
+ same item.
+ </p>
+ </item>
+ <tag><c>read</c></tag>
+ <item>
+ <p>Read locks may be shared, which means that if one
+ transaction manages to acquire a read lock on an item,
+ other transactions may also acquire a read lock on the
+ same item. However, if someone has a read lock no one can
+ acquire a write lock at the same item. If some one has a
+ write lock no one can acquire a read lock nor
+ a write lock at the same item.</p>
+ </item>
+ </taglist>
+ <p>Conflicting lock requests are automatically queued if there
+ is no risk of a deadlock. Otherwise the transaction must be
+ aborted and executed again. Mnesia does this automatically
+ as long as the upper limit of maximum <c>retries</c> is not
+ reached. See <c>mnesia:transaction/3</c> for the details.
+ </p>
+ <p>For the sake of completeness sticky write locks will also
+ be described here even if a sticky write lock is not
+ supported by this particular function:
+ </p>
+ <taglist>
+ <tag><c>sticky_write</c></tag>
+ <item>
+ <p>Sticky write locks are a mechanism which can be used
+ to optimize write lock acquisition. If your application
+ uses replicated tables mainly for fault tolerance (as
+ opposed to read access optimization purpose), sticky
+ locks may be the best option available.
+ </p>
+ <p>When a sticky write lock is acquired, all nodes will be
+ informed which node is locked. Subsequently,
+ sticky lock requests from the same node will be
+ performed as a local operation without any
+ communication with other nodes. The sticky lock
+ lingers on the node even after the transaction has
+ ended. See the Mnesia User's Guide for more information.</p>
+ </item>
+ </taglist>
+ <p>Currently, two kinds of <c>LockItem</c>'s are supported by
+ this function:
+ </p>
+ <taglist>
+ <tag><c>{table, Tab}</c></tag>
+ <item>
+ <p>This acquires a lock of type <c>LockKind</c> on the
+ entire table <c>Tab</c>.
+ </p>
+ </item>
+ <tag><c>{global, GlobalKey, Nodes}</c></tag>
+ <item>
+ <p>This acquires a lock of type <c>LockKind</c> on the
+ global resource <c>GlobalKey</c>. The lock is acquired
+ on all active nodes in the <c>Nodes</c> list. </p>
+ </item>
+ </taglist>
+ <p>Locks are released when the outermost transaction ends.
+ </p>
+ <p>The semantics of this function is context sensitive. See
+ <c>mnesia:activity/4</c> for more information. In transaction
+ context it acquires locks otherwise it just ignores the
+ request.</p>
+ </desc>
+ </func>
+ <func>
+ <name>match_object(Pattern) ->transaction abort | RecList </name>
+ <fsummary>Match <c>Pattern</c>for records. </fsummary>
+ <desc>
+ <p>Invokes <c>mnesia:match_object(Tab, Pattern, read)</c> where
+ <c>Tab</c> is <c>element(1, Pattern)</c>.</p>
+ </desc>
+ </func>
+ <func>
+ <name>match_object(Tab, Pattern, LockKind) ->transaction abort | RecList </name>
+ <fsummary>Match <c>Pattern</c>for records. </fsummary>
+ <desc>
+ <p>This function takes a pattern with 'don't care' variables
+ denoted as a '_' parameter. This function returns a list of
+ records which matched the pattern. Since the second element
+ of a record in a table is considered to be the key for the
+ record, the performance of this function depends on whether
+ this key is bound or not.
+ </p>
+ <p>For example, the call <c>mnesia:match_object(person, {person, '_', 36, '_', '_'}, read)</c> returns a list of all person records with an
+ age field of thirty-six (36).
+ </p>
+ <p>The function <c>mnesia:match_object/3</c>
+ automatically uses indices if these exist. However, no
+ heuristics are performed in order to select the best
+ index.
+ </p>
+ <p>The semantics of this function is context sensitive. See
+ <c>mnesia:activity/4</c> for more information. In transaction
+ context it acquires a lock of type <c>LockKind</c> on the
+ entire table or a single record. Currently, the lock type
+ <c>read</c> is supported.</p>
+ </desc>
+ </func>
+ <func>
+ <name>move_table_copy(Tab, From, To) -> {aborted, Reason} | {atomic, ok}</name>
+ <fsummary>Move the copy of table <c>Tab</c>from node<c>From</c>to node <c>To</c>.</fsummary>
+ <desc>
+ <p>Moves the copy of table <c>Tab</c> from node
+ <c>From</c> to node <c>To</c>.
+ </p>
+ <p>The storage type is preserved. For example, a RAM table
+ moved from one node remains a RAM on the new node. It is
+ still possible for other transactions to read and write in
+ the table while it is being moved.
+ </p>
+ <p>This function cannot be used on <c>local_content</c> tables.</p>
+ </desc>
+ </func>
+ <func>
+ <name>next(Tab, Key) -> Key | transaction abort </name>
+ <fsummary>Return the next key in a table. </fsummary>
+ <desc>
+ <p>This function makes it possible to traverse a table
+ and perform operations on all records in the table. When
+ the end of the table is reached, the special key
+ <c>'$end_of_table'</c> is returned. Otherwise, the function
+ returns a key which can be used to read the actual record.</p>
+ </desc>
+ </func>
+ <func>
+ <name>prev(Tab, Key) -> Key | transaction abort </name>
+ <fsummary>Return the previous key in a table. </fsummary>
+ <desc>
+ <p>This function works exactly
+ <c>mnesia:next/2</c> but returns the previous object in
+ Erlang term order for the ordered_set table type. For
+ all other table types, <c>mnesia:next/2</c> and
+ <c>mnesia:prev/2</c> are synonyms.\011 </p>
+ </desc>
+ </func>
+ <func>
+ <name>read({Tab, Key}) -> transaction abort | RecordList </name>
+ <fsummary>Read records(s) with a given key. </fsummary>
+ <desc>
+ <p>Invokes <c>mnesia:read(Tab, Key, read)</c>.</p>
+ </desc>
+ </func>
+ <func>
+ <name>read(Tab, Key) -> transaction abort | RecordList </name>
+ <fsummary>Read records(s) with a given key. </fsummary>
+ <desc>
+ <p>Invokes <c>mnesia:read(Tab, Key, read)</c>.</p>
+ </desc>
+ </func>
+ <func>
+ <name>read(Tab, Key, LockKind) -> transaction abort | RecordList </name>
+ <fsummary>Read records(s) with a given key. </fsummary>
+ <desc>
+ <p>This function reads all records from table <c>Tab</c> with
+ key <c>Key</c>. This function has the same semantics
+ regardless of the location of <c>Tab</c>. If the table is
+ of type <c>bag</c>, the <c>mnesia:read(Tab, Key)</c> can
+ return an arbitrarily long list. If the table is of type
+ <c>set</c>, the list is either of length 1, or <c>[]</c>.
+ </p>
+ <p>The semantics of this function is context sensitive. See
+ <c>mnesia:activity/4</c> for more information. In transaction
+ context it acquires a lock of type
+ <c>LockKind</c>. Currently, the lock types <c>read</c>,
+ <c>write</c> and <c>sticky_write</c> are supported.
+ </p>
+ <p>If the user wants to update the record it is more efficient to
+ use <c>write/sticky_write</c> as the LockKind.
+ </p>
+ </desc>
+ </func>
+ <func>
+ <name>read_lock_table(Tab) -> ok | transaction abort</name>
+ <fsummary>Set a read lock on an entire table.</fsummary>
+ <desc>
+ <p>Invokes <c>mnesia:lock({table, Tab}, read)</c>.</p>
+ </desc>
+ </func>
+ <func>
+ <name>report_event(Event) -> ok</name>
+ <fsummary>Report a user event to Mnesia's event handler.</fsummary>
+ <desc>
+ <p>When tracing a system of Mnesia applications it is useful
+ to be able to interleave Mnesia's own events with
+ application related events that give information about the
+ application context.
+ </p>
+ <p>Whenever the application begins a
+ new and demanding Mnesia task, or if it is entering a new
+ interesting phase in its execution, it may be a good idea to
+ use <c>mnesia:report_event/1</c>. The <c>Event</c> may be
+ any term and generates a <c>{mnesia_user, Event}</c> event
+ for any processes that subscribe to Mnesia system
+ events.</p>
+ </desc>
+ </func>
+ <func>
+ <name>restore(Opaque, Args) -> {atomic, RestoredTabs} |{aborted, Reason}</name>
+ <fsummary>Online restore of backup.</fsummary>
+ <desc>
+ <p>With this function, tables may be restored online from a
+ backup without restarting Mnesia. <c>Opaque</c> is forwarded
+ to the backup module. <c>Args</c> is a list of the following
+ tuples:
+ </p>
+ <list type="bulleted">
+ <item>
+ <p><c>{module,BackupMod}</c> The backup module
+ <c>BackupMod</c> will be used to access the backup
+ media. If omitted, the default backup module will be
+ used.
+ </p>
+ </item>
+ <item><c>{skip_tables, TabList}</c> Where <c>TabList</c>
+ is a list of tables which should not be read from the
+ backup.
+ </item>
+ <item><c>{clear_tables, TabList}</c> Where
+ <c>TabList</c> is a list of tables which should be
+ cleared, before the records from the backup are inserted,
+ ie. all records in the tables are deleted before the
+ tables are restored. Schema information about the tables
+ is not cleared or read from backup.
+ </item>
+ <item><c>{keep_tables, TabList}</c> Where <c>TabList</c>
+ is a list of tables which should be not be cleared, before
+ the records from the backup are inserted, i.e. the records
+ in the backup will be added to the records in the table.
+ Schema information about the tables is not cleared or read
+ from backup.
+ </item>
+ <item><c>{recreate_tables, TabList}</c> Where
+ <c>TabList</c> is a list of tables which should be
+ re-created, before the records from the backup are
+ inserted. The tables are first deleted and then created with
+ the schema information from the backup. All the nodes in the
+ backup needs to be up and running.
+ </item>
+ <item><c>{default_op, Operation}</c> Where <c>Operation</c> is
+ one of the following operations <c>skip_tables</c>,
+ <c>clear_tables</c>, <c>keep_tables</c> or
+ <c>recreate_tables</c>. The default operation specifies
+ which operation should be used on tables from the backup
+ which are not specified in any of the lists above. If
+ omitted, the operation <c>clear_tables</c> will be used.
+ </item>
+ </list>
+ <p>The affected tables are write locked during the
+ restoration, but regardless of the lock conflicts caused by
+ this, the applications can continue to do their work while
+ the restoration is being performed. The restoration is
+ performed as one single transaction.
+ </p>
+ <p>If the database is
+ huge, it may not be possible to restore it online. In such
+ cases, the old database must be restored by installing a
+ fallback and then restart.</p>
+ </desc>
+ </func>
+ <func>
+ <name>s_delete({Tab, Key}) -> ok | transaction abort </name>
+ <fsummary>Set sticky lock and delete records.</fsummary>
+ <desc>
+ <p>Invokes <c>mnesia:delete(Tab, Key, sticky_write)</c></p>
+ </desc>
+ </func>
+ <func>
+ <name>s_delete_object(Record) -> ok | transaction abort </name>
+ <fsummary>Set sticky lock and delete record.</fsummary>
+ <desc>
+ <p>Invokes <c>mnesia:delete_object(Tab, Record, sticky_write)</c> where <c>Tab</c> is <c>element(1, Record)</c>.</p>
+ </desc>
+ </func>
+ <func>
+ <name>s_write(Record) -> ok | transaction abort </name>
+ <fsummary>Write <c>Record</c>and sets stick lock.</fsummary>
+ <desc>
+ <p>Invokes <c>mnesia:write(Tab, Record, sticky_write)</c>
+ where <c>Tab</c> is <c>element(1, Record)</c>.</p>
+ </desc>
+ </func>
+ <func>
+ <name>schema() -> ok </name>
+ <fsummary>Print information about all table definitions on the tty. </fsummary>
+ <desc>
+ <p>Prints information about all table definitions on the tty.</p>
+ </desc>
+ </func>
+ <func>
+ <name>schema(Tab) -> ok </name>
+ <fsummary>Print information about one table definition on the tty.</fsummary>
+ <desc>
+ <p>Prints information about one table definition on the tty.</p>
+ </desc>
+ </func>
+ <func>
+ <name>select(Tab, MatchSpec [, Lock]) -> transaction abort | [Object] </name>
+ <fsummary>Match the objects in <c>Tab</c>against <c>MatchSpec</c>.</fsummary>
+ <desc>
+ <p>Matches the objects in the table <c>Tab</c> using a
+ match_spec as described in the ERTS Users Guide. Optionally a lock
+ <c>read</c> or <c>write</c> can be given as the third
+ argument, default is <c>read</c>. The return value depends
+ on the <c>MatchSpec</c>.</p>
+ <p><em>Note:</em> for best performance <c>select</c> should
+ be used before any modifying operations are done on that table
+ in the same transaction, i.e. don't use <c>write</c> or <c>delete</c>
+ before a <c>select</c>.</p>
+ <p>In its simplest forms the match_spec's look like this:</p>
+ <list type="bulleted">
+ <item>MatchSpec = [MatchFunction]</item>
+ <item>MatchFunction = {MatchHead, [Guard], [Result]}</item>
+ <item>MatchHead = tuple() | record()</item>
+ <item>Guard = {"Guardtest name", ...}</item>
+ <item>Result = "Term construct"</item>
+ </list>
+ <p>See the ERTS Users Guide and <c>ets</c> documentation for a
+ complete description of the select.</p>
+ <p>For example to find the names of all male persons with an age over 30 in table
+ Tab do:</p>
+ <code type="none">
+\011 MatchHead = #person{name='$1', sex=male, age='$2', _='_'},
+\011 Guard = {'>', '$2', 30},
+\011 Result = '$1',
+\011 mnesia:select(Tab,[{MatchHead, [Guard], [Result]}]),
+ </code>
+ </desc>
+ </func>
+ <func>
+ <name>select(Tab, MatchSpec, NObjects, Lock) -> transaction abort | {[Object],Cont} | '$end_of_table'</name>
+ <fsummary>Match the objects in <c>Tab</c>against <c>MatchSpec</c>.</fsummary>
+ <desc>
+ <p>Matches the objects in the table <c>Tab</c> using a
+ match_spec as described in ERTS users guide, and returns
+ a chunk of terms and a continuation, the wanted number
+ of returned terms is specified by the <c>NObjects</c> argument.
+ The lock argument can be <c>read</c> or <c>write</c>.
+ The continuation should be used as argument to <c>mnesia:select/1</c>,
+ if more or all answers are needed.</p>
+ <p><em>Note:</em> for best performance <c>select</c> should
+ be used before any modifying operations are done on that
+ table in the same transaction, i.e. don't use
+ <c>mnesia:write</c> or <c>mnesia:delete</c> before a
+ <c>mnesia:select</c>. For efficiency the <c>NObjects</c> is
+ a recommendation only and the result may contain anything
+ from an empty list to all available results. </p>
+ </desc>
+ </func>
+ <func>
+ <name>select(Cont) -> transaction abort | {[Object],Cont} | '$end_of_table'</name>
+ <fsummary>Continues selecting objects. </fsummary>
+ <desc>
+ <p>Selects more objects with the match specification initiated
+ by <c>mnesia:select/4</c>.
+ </p>
+ <p><em>Note:</em> Any modifying operations, i.e. <c>mnesia:write</c>
+ or <c>mnesia:delete</c>, that are done between the <c>mnesia:select/4</c>
+ and <c>mnesia:select/1</c> calls will not be visible in the result.</p>
+ </desc>
+ </func>
+ <func>
+ <name>set_debug_level(Level) -> OldLevel</name>
+ <fsummary>Change the internal debug level of Mnesia</fsummary>
+ <desc>
+ <p>Changes the internal debug level of Mnesia. See the
+ chapter about configuration parameters for details.</p>
+ </desc>
+ </func>
+ <func>
+ <name>set_master_nodes(MasterNodes) -> ok | {error, Reason} </name>
+ <fsummary>Set the master nodes for all tables</fsummary>
+ <desc>
+ <p>For each table Mnesia will determine its replica nodes
+ (<c>TabNodes</c>) and invoke <c>mnesia:set_master_nodes(Tab, TabMasterNodes)</c> where <c>TabMasterNodes</c> is the
+ intersection of <c>MasterNodes</c> and <c>TabNodes</c>. See
+ <c>mnesia:set_master_nodes/2</c> about the semantics.</p>
+ </desc>
+ </func>
+ <func>
+ <name>set_master_nodes(Tab, MasterNodes) -> ok | {error, Reason} </name>
+ <fsummary>Set the master nodes for a table</fsummary>
+ <desc>
+ <p>If the application detects that there has been a
+ communication failure (in a potentially partitioned network) which
+ may have caused an inconsistent database, it may use the
+ function <c>mnesia:set_master_nodes(Tab, MasterNodes)</c> to
+ define from which nodes each table will be loaded.
+ At startup Mnesia's normal table load algorithm will be
+ bypassed and the table will be loaded from one of the master
+ nodes defined for the table, regardless of when and if Mnesia
+ was terminated on other nodes. The <c>MasterNodes</c> may only
+ contain nodes where the table has a replica and if the
+ <c>MasterNodes</c> list is empty, the master node recovery
+ mechanism for the particular table will be reset and the
+ normal load mechanism will be used at next restart.
+ </p>
+ <p>The master node setting is always local and it may be
+ changed regardless of whether Mnesia is started or not.
+ </p>
+ <p>The database may also become inconsistent if the
+ <c>max_wait_for_decision</c> configuration parameter is used
+ or if <c>mnesia:force_load_table/1</c> is used.</p>
+ </desc>
+ </func>
+ <func>
+ <name>snmp_close_table(Tab) -> {aborted, R} | {atomic, ok}</name>
+ <fsummary>Remove the possibility for SNMP to manipulate the table.</fsummary>
+ <desc>
+ <p>Removes the possibility for SNMP to manipulate the
+ table.</p>
+ </desc>
+ </func>
+ <func>
+ <name>snmp_get_mnesia_key(Tab, RowIndex) -> {ok, Key} | undefined</name>
+ <fsummary>Get the corresponding Mnesia key from an SNMP index.</fsummary>
+ <type>
+ <v>Tab ::= atom()</v>
+ <v>RowIndex ::= [integer()]</v>
+ <v>Key ::= key() | {key(), key(), ...}</v>
+ <v>key() ::= integer() | string() | [integer()]</v>
+ </type>
+ <desc>
+ <p>Transforms an SNMP index to the corresponding Mnesia key.
+ If the SNMP table has multiple keys, the key is a tuple of
+ the key columns.</p>
+ </desc>
+ </func>
+ <func>
+ <name>snmp_get_next_index(Tab, RowIndex) -> {ok, NextIndex} | endOfTable</name>
+ <fsummary>Get the index of the next lexicographical row.</fsummary>
+ <type>
+ <v>Tab ::= atom()</v>
+ <v>RowIndex ::= [integer()]</v>
+ <v>NextIndex ::= [integer()]</v>
+ </type>
+ <desc>
+ <p>The <c>RowIndex</c> may specify a non-existing row.
+ Specifically, it might be the empty list. Returns the index
+ of the next lexicographical row. If <c>RowIndex</c> is the
+ empty list, this function will return the index of the first row
+ in the table.</p>
+ </desc>
+ </func>
+ <func>
+ <name>snmp_get_row(Tab, RowIndex) -> {ok, Row} | undefined</name>
+ <fsummary>Retrieve a row indexed by an SNMP index.</fsummary>
+ <type>
+ <v>Tab ::= atom()</v>
+ <v>RowIndex ::= [integer()]</v>
+ <v>Row ::= record(Tab)</v>
+ </type>
+ <desc>
+ <p>Makes it possible to read a row by its SNMP index. This
+ index is specified as an SNMP OBJECT IDENTIFIER, a list of
+ integers.</p>
+ </desc>
+ </func>
+ <func>
+ <name>snmp_open_table(Tab, SnmpStruct) -> {aborted, R} | {atomic, ok}</name>
+ <fsummary>Organize a Mnesia table as an SNMP table.</fsummary>
+ <type>
+ <v>Tab ::= atom()</v>
+ <v>SnmpStruct ::= [{key, type()}]</v>
+ <v>type() ::= type_spec() | {type_spec(), type_spec(), ...}</v>
+ <v>type_spec() ::= fix_string | string | integer</v>
+ </type>
+ <desc>
+ <p>It is possible to establish a direct one to one mapping
+ between Mnesia tables and SNMP tables. Many
+ telecommunication applications are controlled and monitored
+ by the SNMP protocol. This connection between Mnesia and
+ SNMP makes it simple and convenient to achieve this.
+ </p>
+ <p>The <c>SnmpStruct</c> argument is a list of SNMP
+ information. Currently, the only information needed is
+ information about the key types in the table. It is not
+ possible to handle multiple keys in Mnesia, but many SNMP
+ tables have multiple keys. Therefore, the following
+ convention is used: if a table has multiple keys, these must
+ always be stored as a tuple of the keys. Information about
+ the key types is specified as a tuple of atoms describing
+ the types. The only significant type is
+ <c>fix_string</c>. This means that a string has fixed
+ size. For example:
+ </p>
+ <code type="none">
+mnesia:snmp_open_table(person, [{key, string}])
+ </code>
+ <p>causes the <c>person</c> table to be ordered as an SNMP
+ table.
+ </p>
+ <p>Consider the following schema for a table of company
+ employees. Each employee is identified by department number
+ and name. The other table column stores the telephone number:
+ </p>
+ <code type="none">
+mnesia:create_table(employee,
+ [{snmp, [{key, {integer, string}}]},
+ {attributes, record_info(fields, employees)}]),
+ </code>
+ <p>The corresponding SNMP table would have three columns;
+ <c>department</c>, <c>name</c> and <c>telno</c>.
+ </p>
+ <p>It is possible to have table columns that are not visible
+ through the SNMP protocol. These columns must be the last
+ columns of the table. In the previous example, the SNMP
+ table could have columns <c>department</c> and <c>name</c>
+ only. The application could then use the <c>telno</c> column
+ internally, but it would not be visible to the SNMP
+ managers.
+ </p>
+ <p>In a table monitored by SNMP, all elements must be
+ integers, strings, or lists of integers.
+ </p>
+ <p>When a table is SNMP ordered, modifications are more
+ expensive than usual, O(logN). And more memory is used.
+ </p>
+ <p><em>Note:</em>Only the lexicographical SNMP ordering is
+ implemented in Mnesia, not the actual SNMP monitoring.</p>
+ </desc>
+ </func>
+ <func>
+ <name>start() -> ok | {error, Reason} </name>
+ <fsummary>Start a local Mnesia system.</fsummary>
+ <desc>
+ <p>The start-up procedure for a set of Mnesia nodes is a
+ fairly complicated operation. A Mnesia system consists of a set
+ of nodes, with Mnesia started locally on all
+ participating nodes. Normally, each node has a directory where
+ all the Mnesia files are written. This directory will be
+ referred to as the Mnesia directory. Mnesia may also be
+ started on disc-less nodes. See <c>mnesia:create_schema/1</c>
+ and the Mnesia User's Guide for more information about disc-less
+ nodes.
+ </p>
+ <p>The set of nodes which makes up a Mnesia system is kept in
+ a schema and it is possible to add and remove Mnesia nodes
+ from the schema. The initial schema is normally created on
+ disc with the function <c>mnesia:create_schema/1</c>. On
+ disc-less nodes, a tiny default schema is generated each time
+ Mnesia is started. During the start-up procedure, Mnesia
+ will exchange schema information between the nodes in order
+ to verify that the table definitions are compatible.
+ </p>
+ <p>Each schema has a unique cookie which may be regarded as a
+ unique schema identifier. The cookie must be the same on all
+ nodes where Mnesia is supposed to run. See the Mnesia
+ User's Guide for more information about these details.
+ </p>
+ <p>The schema file, as well as all other files which Mnesia
+ needs, are kept in the Mnesia directory. The command line
+ option <c>-mnesia dir Dir</c> can be used to specify the
+ location of this directory to the Mnesia system. If no such
+ command line option is found, the name of the directory
+ defaults to <c>Mnesia.Node</c>.
+ </p>
+ <p><c>application:start(mnesia)</c> may also be used.</p>
+ </desc>
+ </func>
+ <func>
+ <name>stop() -> stopped </name>
+ <fsummary>Stop Mnesia locally.</fsummary>
+ <desc>
+ <p>Stops Mnesia locally on the current node.
+ </p>
+ <p><c>application:stop(mnesia)</c> may also be used.</p>
+ </desc>
+ </func>
+ <func>
+ <name>subscribe(EventCategory)</name>
+ <fsummary>Subscribe to events of type <c>EventCategory</c>.</fsummary>
+ <desc>
+ <p>Ensures that a copy of all events of type
+ <c>EventCategory</c> are sent to the caller. The event
+ types available are described in the Mnesia User's Guide.</p>
+ </desc>
+ </func>
+ <func>
+ <name>sync_dirty(Fun, [, Args]) -> ResultOfFun | exit(Reason) </name>
+ <fsummary>Call the Fun in a context which is not protected by a transaction.</fsummary>
+ <desc>
+ <p>Call the <c>Fun</c> in a context which is not protected
+ by a transaction. The Mnesia function calls performed in the
+ <c>Fun</c> are mapped to the corresponding dirty functions.
+ It is performed in almost the same context as
+ <c>mnesia:async_dirty/1,2</c>. The difference is that the
+ operations are performed synchronously. The caller waits for
+ the updates to be performed on all active replicas before
+ the <c>Fun</c> returns. See <c>mnesia:activity/4</c> and the
+ Mnesia User's Guide for more details.</p>
+ </desc>
+ </func>
+ <func>
+ <name>sync_transaction(Fun, [[, Args], Retries]) -> {aborted, Reason} | {atomic, ResultOfFun} </name>
+ <fsummary>Synchronously execute a transaction.</fsummary>
+ <desc>
+ <p>This function waits until data have been committed and
+ logged to disk (if disk is used) on every involved node before
+ it returns, otherwise it behaves as
+ <c>mnesia:transaction/[1,2,3]</c>.</p>
+ <p>This functionality can be used to avoid that one process may overload
+ a database on another node.</p>
+ </desc>
+ </func>
+ <func>
+ <name>system_info(InfoKey) -> Info | exit({aborted, Reason})</name>
+ <fsummary>Return information about the Mnesia system</fsummary>
+ <desc>
+ <p>Returns information about the Mnesia system, such as
+ transaction statistics, db_nodes, and configuration parameters.
+ Valid keys are:</p>
+ <list type="bulleted">
+ <item>
+ <p><c>all</c>. This argument returns a list of all
+ local system information. Each element is a
+ <c>{InfoKey, InfoVal}</c> tuples.<em>Note:</em> New <c>InfoKey</c>'s may
+ be added and old undocumented <c>InfoKey</c>'s may be removed without
+ notice.</p>
+ </item>
+ <item>
+ <p><c>access_module</c>. This argument returns the name of
+ the module which is configured to be the activity access
+ callback module.
+ </p>
+ </item>
+ <item>
+ <p><c>auto_repair</c>. This argument returns
+ <c>true</c> or <c>false</c> to indicate if Mnesia is
+ configured to invoke the auto repair facility on corrupted
+ disc files.
+ </p>
+ </item>
+ <item>
+ <p><c>backup_module</c>. This argument returns the name of
+ the module which is configured to be the backup
+ callback module.
+ </p>
+ </item>
+ <item>
+ <p><c>checkpoints</c>. This argument
+ returns a list of the names of the
+ checkpoints currently active on this node.
+ </p>
+ </item>
+ <item>
+ <p><c>event_module</c>. This argument returns the name of
+ the module which is the event handler callback module.
+ </p>
+ </item>
+ <item>
+ <p><c>db_nodes</c>. This argument returns
+ the nodes which make up the persistent database. Disc
+ less nodes will only be included in the list of nodes if
+ they explicitly has been added to the schema, e.g. with
+ <c>mnesia:add_table_copy/3</c>. The function can be
+ invoked even if Mnesia is not yet running.
+ </p>
+ </item>
+ <item>
+ <p><c>debug</c>. This argument returns the current
+ debug level of Mnesia.
+ </p>
+ </item>
+ <item>
+ <p><c>directory</c>. This argument returns the name of
+ the Mnesia directory. It can be invoked even if Mnesia is
+ not yet running.
+ </p>
+ </item>
+ <item>
+ <p><c>dump_log_load_regulation</c>. This argument
+ returns a boolean which tells whether Mnesia is
+ configured to load regulate the dumper process or not.
+ This feature is temporary and will disappear in future
+ releases.
+ </p>
+ </item>
+ <item>
+ <p><c>dump_log_time_threshold</c>. This argument
+ returns the time threshold for transaction log dumps in
+ milliseconds.
+ </p>
+ </item>
+ <item>
+ <p><c>dump_log_update_in_place</c>. This argument
+ returns a boolean which tells whether Mnesia is
+ configured to perform the updates in the dets files
+ directly or if the updates should be performed in a copy
+ of the dets files.
+ </p>
+ </item>
+ <item>
+ <p><c>dump_log_write_threshold</c>. This argument
+ returns the write threshold for transaction log dumps as
+ the number of writes to the transaction log.
+ </p>
+ </item>
+ <item>
+ <p><c>extra_db_nodes</c>. This argument returns a list
+ of extra db_nodes to be contacted at start-up.
+ </p>
+ </item>
+ <item>
+ <p><c>fallback_activated</c>. This argument returns
+ true if a fallback is activated, otherwise false.
+ </p>
+ </item>
+ <item>
+ <p><c>held_locks</c>. This argument returns a list of
+ all locks held by the local Mnesia lock manager.
+ </p>
+ </item>
+ <item>
+ <p><c>is_running</c>. This argument returns <c>yes</c>
+ or <c>no</c> to indicate if Mnesia is running. It may
+ also return <c>starting</c> or <c>stopping</c>. Can be
+ invoked even if Mnesia is not yet running.
+ </p>
+ </item>
+ <item>
+ <p><c>local_tables</c>. This argument returns a list
+ of all tables which are configured to reside locally.
+ </p>
+ </item>
+ <item>
+ <p><c>lock_queue</c>. This argument returns a list of
+ all transactions that are queued for execution by the
+ local lock manager.
+ </p>
+ </item>
+ <item>
+ <p><c>log_version</c>. This argument returns the
+ version number of the Mnesia transaction log format.
+ </p>
+ </item>
+ <item>
+ <p><c>master_node_tables</c>. This argument returns a
+ list of all tables with at least one master node.
+ </p>
+ </item>
+ <item>
+ <p><c>protocol_version</c>. This argument
+ returns the version number
+ of the Mnesia inter-process communication protocol.
+ </p>
+ </item>
+ <item>
+ <p><c>running_db_nodes</c>. This argument returns a
+ list of nodes where Mnesia currently is running. This
+ function can be invoked even if Mnesia is not yet
+ running, but it will then have slightly different
+ semantics. If Mnesia is down on the local node, the
+ function will return those other <c>db_nodes</c> and
+ <c>extra_db_nodes</c> that for the moment are up and
+ running. If Mnesia is started, the function will return
+ those nodes that Mnesia on the local node is fully
+ connected to. Only those nodes that Mnesia has exchanged
+ schema information with are included as
+ <c>running_db_nodes</c>. After the merge of schemas, the
+ local Mnesia system is fully operable and applications
+ may perform access of remote replicas. Before the schema
+ merge Mnesia will only operate locally. Sometimes there
+ may be more nodes included in the
+ <c>running_db_nodes</c> list than all <c>db_nodes</c>
+ and <c>extra_db_nodes</c> together.
+ </p>
+ </item>
+ <item>
+ <p><c>schema_location</c>. This argument returns the
+ initial schema location.
+ </p>
+ </item>
+ <item>
+ <p><c>subscribers</c>. This argument returns a list of
+ local processes currently subscribing to system events.
+ </p>
+ </item>
+ <item>
+ <p><c>tables</c>. This argument returns a list of all
+ locally known tables.
+ </p>
+ </item>
+ <item>
+ <p><c>transactions</c>. This argument returns a list
+ of all currently active local transactions.
+ </p>
+ </item>
+ <item>
+ <p><c>transaction_failures</c>. This argument returns
+ a number which indicates how many transactions have
+ failed since Mnesia was started.
+ </p>
+ </item>
+ <item>
+ <p><c>transaction_commits</c>. This argument returns a
+ number which indicates how many transactions have
+ terminated successfully since Mnesia was started.
+ </p>
+ </item>
+ <item>
+ <p><c>transaction_restarts</c>. This argument returns
+ a number which indicates how many transactions have been
+ restarted since Mnesia was started.
+ </p>
+ </item>
+ <item>
+ <p><c>transaction_log_writes</c>. This argument
+ returns a number which indicates the number of write
+ operation that have been performed to the transaction
+ log since start-up.
+ </p>
+ </item>
+ <item>
+ <p><c>use_dir</c>. This argument returns a boolean
+ which indicates whether the Mnesia directory is used or
+ not. Can be invoked even if Mnesia is not yet running.
+ </p>
+ </item>
+ <item>
+ <p><c>version</c>. This argument returns the current
+ version number of Mnesia.
+ </p>
+ </item>
+ </list>
+ </desc>
+ </func>
+ <func>
+ <name>table(Tab [,[Option]]) -> QueryHandle </name>
+ <fsummary>Return a QLC query handle.</fsummary>
+ <desc>
+ <p> <marker id="qlc_table"></marker>
+Returns a QLC (Query List Comprehension) query handle, see
+ <seealso marker="stdlib:qlc">qlc(3)</seealso>.The module <c>qlc</c> implements a query language, it
+ can use mnesia tables as sources of data. Calling
+ <c>mnesia:table/1,2</c> is the means to make the <c>mnesia</c>
+ table <c>Tab</c> usable to QLC.</p>
+ <p>The list of Options may contain mnesia options or QLC
+ options, the following options are recognized by Mnesia:
+ <c>{traverse, SelectMethod},{lock, Lock},{n_objects,Number}</c>, any other option is forwarded
+ to QLC. The <c>lock</c> option may be <c>read</c> or
+ <c>write</c>, default is <c>read</c>. The option
+ <c>n_objects</c> specifies (roughly) the number of objects
+ returned from mnesia to QLC. Queries to remote tables may
+ need a larger chunks to reduce network overhead, default
+ <c>100</c> objects at a time are returned. The option
+ <c>traverse</c> determines the method to traverse the whole
+ table (if needed), the default method is <c>select</c>:</p>
+ <list type="bulleted">
+ <item>
+ <p><c>select</c>. The table is traversed by calling
+ <c>mnesia:select/4</c> and <c>mnesia:select/1</c>. The
+ match specification (the second argument of <c>select/3</c>)
+ is assembled by QLC: simple filters are
+ translated into equivalent match specifications while
+ more complicated filters have to be applied to all
+ objects returned by <c>select/3</c> given a match
+ specification that matches all objects.</p>
+ </item>
+ <item>
+ <p><c>{select, MatchSpec}</c>. As for <c>select</c>
+ the table is traversed by calling <c>mnesia:select/3</c> and
+ <c>mnesia:select/1</c>. The difference is that the match
+ specification is explicitly given. This is how to state
+ match specifications that cannot easily be expressed
+ within the syntax provided by QLC.</p>
+ </item>
+ </list>
+ </desc>
+ </func>
+ <func>
+ <name>table_info(Tab, InfoKey) -> Info | exit({aborted, Reason})</name>
+ <fsummary>Return local information about table.</fsummary>
+ <desc>
+ <p>The <c>table_info/2</c> function takes two arguments.
+ The first is the name of a Mnesia table, the second is one of
+ the following keys:
+ </p>
+ <list type="bulleted">
+ <item>
+ <p><c>all</c>. This argument returns a list of all
+ local table information. Each element is a <c>{InfoKey, ItemVal}</c> tuples. <em>Note:</em> New <c>InfoItem</c>'s may be
+ added and old undocumented <c>InfoItem</c>'s may be removed without
+ notice.</p>
+ </item>
+ <item>
+ <p><c>access_mode</c>. This argument returns the
+ access mode of the table. The access mode may either be
+ read_only or read_write.
+ </p>
+ </item>
+ <item>
+ <p><c>arity</c>. This argument returns the arity of
+ records in the table as specified in the schema.
+ </p>
+ </item>
+ <item>
+ <p><c>attributes</c>. This argument returns the table
+ attribute names which are specified in the schema.
+ </p>
+ </item>
+ <item>
+ <p><c>checkpoints</c>. This argument returns the names
+ of the currently active checkpoints which involves this
+ table on this node.
+ </p>
+ </item>
+ <item>
+ <p><c>cookie</c>. This argument returns a table cookie
+ which is a unique system generated identifier for the
+ table. The cookie is used internally to ensure that two
+ different table definitions using the same table name
+ cannot accidentally be intermixed. The cookie is
+ generated when the table is initially created.
+ </p>
+ </item>
+ <item>
+ <p><c>disc_copies</c>. This argument returns the nodes
+ where a disc_copy of the table resides according to the
+ schema.
+ </p>
+ </item>
+ <item>
+ <p><c>disc_only_copies </c>. This argument returns the
+ nodes where a disc_only_copy of the table resides
+ according to the schema.
+ </p>
+ </item>
+ <item>
+ <p><c>index</c>. This argument returns the list of
+ index position integers for the table.
+ </p>
+ </item>
+ <item>
+ <p><c>load_node</c>. This argument returns the name of
+ the node that Mnesia loaded the table from. The
+ structure of the returned value is unspecified but may
+ be useful for debugging purposes.
+ </p>
+ </item>
+ <item>
+ <p><c>load_order</c>. This argument returns the load
+ order priority of the table. It is an integer and
+ defaults to <c>0</c> (zero).
+ </p>
+ </item>
+ <item>
+ <p><c>load_reason</c>. This argument returns the
+ reason of why Mnesia decided to load the table. The
+ structure of the returned value is unspecified but may
+ be useful for debugging purposes.
+ </p>
+ </item>
+ <item>
+ <p><c>local_content</c>. This argument returns
+ <c>true</c> or <c>false</c> to indicate whether the
+ table is configured to have locally unique content on
+ each node.
+ </p>
+ </item>
+ <item>
+ <p><c>master_nodes</c>. This argument returns the
+ master nodes of a table.
+ </p>
+ </item>
+ <item>
+ <p><c>memory</c>. This argument returns the number of
+ words allocated to the table on this node.
+ </p>
+ </item>
+ <item>
+ <p><c>ram_copies</c>. This argument returns the nodes
+ where a ram_copy of the table resides according to the
+ schema.
+ </p>
+ </item>
+ <item>
+ <p><c>record_name</c>. This argument returns the
+ record name, common for all records in the table
+ </p>
+ </item>
+ <item>
+ <p><c>size</c>. This argument returns the number of
+ records inserted in the table.
+ </p>
+ </item>
+ <item>
+ <p><c>snmp</c>. This argument returns the SNMP struct.
+ <c>[]</c>meaning that the table currently has no SNMP
+ properties.
+ </p>
+ </item>
+ <item>
+ <p><c>storage_type</c>.This argument returns the local
+ storage type of the table. It can be <c>disc_copies</c>,
+ <c>ram_copies</c>, <c>disc_only_copies</c>, or the atom
+ <c>unknown</c>. <c>unknown</c> is returned for all
+ tables which only reside remotely.
+ </p>
+ </item>
+ <item>
+ <p><c>subscribers</c>. This argument returns a list
+ of local processes currently subscribing to local table
+ events which involve this table on this node.
+ </p>
+ </item>
+ <item>
+ <p><c>type</c>. This argument returns the table type,
+ which is either <c>bag</c>, <c>set</c> or <c>ordered_set</c>..
+ </p>
+ </item>
+ <item>
+ <p><c>user_properties</c>. This argument returns the
+ user associated table properties of the table. It is a
+ list of the stored property records.
+ </p>
+ </item>
+ <item>
+ <p><c>version</c>. This argument returns the current
+ version of the table definition. The table version is
+ incremented when the table definition is changed. The
+ table definition may be incremented directly when the
+ table definition has been changed in a schema
+ transaction, or when a committed table definition is
+ merged with table definitions from other nodes during
+ start-up.
+ </p>
+ </item>
+ <item>
+ <p><c>where_to_read</c>.This argument returns the node
+ where the table can be read. If the value <c>nowhere</c>
+ is returned, the table is not loaded, or it resides at a
+ remote node which is not running.
+ </p>
+ </item>
+ <item>
+ <p><c>where_to_write</c>. This argument returns a list
+ of the nodes that currently hold an active replica of
+ the table.
+ </p>
+ </item>
+ <item>
+ <p><c>wild_pattern</c>. This argument returns a
+ structure which can be given to the various match
+ functions for a certain table. A record tuple is where all
+ record fields have the value <c>'_'</c>.
+ </p>
+ </item>
+ </list>
+ </desc>
+ </func>
+ <func>
+ <name>transaction(Fun [[, Args], Retries]) -> {aborted, Reason} | {atomic, ResultOfFun}</name>
+ <fsummary>Execute a transaction.</fsummary>
+ <desc>
+ <p>This function executes the functional object <c>Fun</c>
+ with arguments <c>Args</c> as a transaction.
+ </p>
+ <p>The code which executes inside the transaction
+ can consist of a series of table manipulation functions.
+ If something goes wrong inside the transaction as a result of a
+ user error or a certain table not being available, the
+ entire transaction is aborted and the function
+ <c>transaction/1</c> returns the tuple
+ <c>{aborted, Reason}</c>.
+ </p>
+ <p>If all is well, <c>{atomic, ResultOfFun}</c> is returned where
+ <c>ResultOfFun</c> is the value of the last expression in
+ <c>Fun</c>.
+ </p>
+ <p>A function which adds a family to the database can be
+ written as follows if we have a structure <c>{family, Father, Mother, ChildrenList}</c>:
+ </p>
+ <code type="none">
+add_family({family, F, M, Children}) ->
+ ChildOids = lists:map(fun oid/1, Children),
+ Trans = fun() ->
+ mnesia:write(F#person{children = ChildOids},
+ mnesia:write(M#person{children = ChildOids},
+ Write = fun(Child) -> mnesia:write(Child) end,
+ lists:foreach(Write, Children)
+ end,
+ mnesia:transaction(Trans).
+
+oid(Rec) -> {element(1, Rec), element(2, Rec)}.
+ </code>
+ <p>This code adds a set of people to the database. Running this code
+ within one transaction will ensure that either the whole
+ family is added to the database, or the whole transaction
+ aborts. For example, if the last child is badly formatted,
+ or the executing process terminates due to an
+ <c>'EXIT'</c> signal while executing the family code, the
+ transaction aborts. Accordingly, the situation where half a
+ family is added can never occur.
+ </p>
+ <p>It is also useful to update the database within a transaction
+ if several processes concurrently update the same records.
+ For example, the function <c>raise(Name, Amount)</c>, which
+ adds <c>Amount</c> to the salary field of a person, should
+ be implemented as follows:
+ </p>
+ <code type="none">
+raise(Name, Amount) ->
+ mnesia:transaction(fun() ->
+ case mnesia:wread({person, Name}) of
+ [P] ->
+ Salary = Amount + P#person.salary,
+ P2 = P#person{salary = Salary},
+ mnesia:write(P2);
+ _ ->
+ mnesia:abort("No such person")
+ end
+ end).
+ </code>
+ <p>When this function executes within a transaction,
+ several processes running on different nodes can concurrently
+ execute the <c>raise/2</c> function without interfering
+ with each other.
+ </p>
+ <p>Since Mnesia detects deadlocks, a transaction can be
+ restarted any number of times. This function will attempt a restart as specified in
+ <c>Retries</c>. <c>Retries</c> must
+ be an integer greater than 0 or the atom <c>infinity</c>. Default is
+ <c>infinity</c>.</p>
+ </desc>
+ </func>
+ <func>
+ <name>transform_table(Tab, Fun, NewAttributeList, NewRecordName) -> {aborted, R} | {atomic, ok} </name>
+ <fsummary>Change format on all records in table. <c>Tab</c></fsummary>
+ <desc>
+ <p>This function applies the argument <c>Fun</c> to all
+ records in the table. <c>Fun</c> is a function which takes a
+ record of the old type and returns a transformed record of the
+ new type. The <c>Fun</c> argument can also be the atom
+ <c>ignore</c>, it indicates that only the meta data about the table will
+ be updated. Usage of <c>ignore</c> is not recommended but included
+ as a possibility for the user do to his own transform.
+ <c>NewAttributeList</c> and <c>NewRecordName</c>
+ specifies the attributes and the new record type of converted
+ table. Table name will always remain unchanged, if the
+ record_name is changed only the mnesia functions which
+ uses table identifiers will work, e.g. <c>mnesia:write/3</c>
+ will work but <c>mnesia:write/1</c> will not.</p>
+ </desc>
+ </func>
+ <func>
+ <name>transform_table(Tab, Fun, NewAttributeList) -> {aborted, R} | {atomic, ok} </name>
+ <fsummary>Change format on all records in table. <c>Tab</c></fsummary>
+ <desc>
+ <p>Invokes <c>mnesia:transform_table(Tab, Fun, NewAttributeList, RecName)</c>
+ where <c>RecName</c> is <c>mnesia:table_info(Tab, record_name)</c>.</p>
+ </desc>
+ </func>
+ <func>
+ <name>traverse_backup(Source, [SourceMod,] Target, [TargetMod,] Fun, Acc) -> {ok, LastAcc} | {error, Reason}</name>
+ <fsummary>Traversal of a backup.</fsummary>
+ <desc>
+ <p>With this function it is possible to iterate over a backup,
+ either for the purpose of transforming it into a new backup,
+ or just reading it. The arguments are explained briefly
+ below. See the Mnesia User's Guide for additional
+ details.
+ </p>
+ <list type="bulleted">
+ <item><c>SourceMod</c> and <c>TargetMod</c> are the names of
+ the modules which actually access the backup
+ media.
+ </item>
+ <item><c>Source</c> and <c>Target</c> are opaque data used
+ exclusively by the modules <c>SourceMod</c> and
+ <c>TargetMod</c> for the purpose of initializing the
+ backup media.
+ </item>
+ <item><c>Acc</c> is an initial accumulator value.
+ </item>
+ <item><c>Fun(BackupItems, Acc)</c> is applied to each item in
+ the backup. The Fun must return a tuple
+ <c>{BackupItems,NewAcc}</c>, where <c>BackupItems</c> is
+ a list of valid backup items, and <c>NewAcc</c> is a new
+ accumulator value. The returned backup items are written
+ in the target backup.
+ </item>
+ <item><c>LastAcc</c> is the last accumulator value. This is
+ the last <c>NewAcc</c> value that was returned by <c>Fun</c>.
+ </item>
+ </list>
+ </desc>
+ </func>
+ <func>
+ <name>uninstall_fallback() -> ok | {error,Reason}</name>
+ <fsummary>Uninstall a fallback.</fsummary>
+ <desc>
+ <p>Invokes <c>mnesia:uninstall_fallback([{scope, global}])</c>.</p>
+ </desc>
+ </func>
+ <func>
+ <name>uninstall_fallback(Args) -> ok | {error,Reason}</name>
+ <fsummary>Uninstall a fallback.</fsummary>
+ <desc>
+ <p>This function is used to de-install a fallback before it
+ has been used to restore the database. This is normally a
+ distributed operation that is either performed on all
+ nodes with disc resident schema or none. Uninstallation of
+ fallbacks requires Erlang to be up and running on all
+ involved nodes, but it does not matter if Mnesia is running
+ or not. Which nodes that are considered as disc-resident
+ nodes is determined from the schema info in the local
+ fallback.
+ </p>
+ <p><c>Args</c> is a list of the following tuples:
+ </p>
+ <list type="bulleted">
+ <item>
+ <p><c>{module, BackupMod}</c>.
+ See <c>mnesia:install_fallback/2</c> about the
+ semantics.</p>
+ </item>
+ <item>
+ <p><c>{scope, Scope}</c>
+ See <c>mnesia:install_fallback/2</c> about the
+ semantics.</p>
+ </item>
+ <item>
+ <p><c>{mnesia_dir, AlternateDir}</c>
+ See <c>mnesia:install_fallback/2</c> about the
+ semantics.</p>
+ </item>
+ </list>
+ </desc>
+ </func>
+ <func>
+ <name>unsubscribe(EventCategory)</name>
+ <fsummary>Subscribe to events of type <c>EventCategory</c>.</fsummary>
+ <desc>
+ <p>Stops sending events of type
+ <c>EventCategory</c> to the caller.</p>
+ </desc>
+ </func>
+ <func>
+ <name>wait_for_tables(TabList,Timeout) -> ok | {timeout, BadTabList} | {error, Reason} </name>
+ <fsummary>Wait for tables to be accessible.</fsummary>
+ <desc>
+ <p>Some applications need to wait for certain tables to
+ be accessible in order to do useful work.
+ <c>mnesia:wait_for_tables/2</c> hangs until all tables in the
+ <c>TabList</c> are accessible, or until <c>timeout</c> is
+ reached.</p>
+ </desc>
+ </func>
+ <func>
+ <name>wread({Tab, Key}) -> transaction abort | RecordList </name>
+ <fsummary>Read records with given key.</fsummary>
+ <desc>
+ <p>Invoke <c>mnesia:read(Tab, Key, write)</c>.</p>
+ </desc>
+ </func>
+ <func>
+ <name>write(Record) -> transaction abort | ok </name>
+ <fsummary>Writes a record into the database.</fsummary>
+ <desc>
+ <p>Invoke <c>mnesia:write(Tab, Record, write)</c> where
+ <c>Tab</c> is <c>element(1, Record)</c>.</p>
+ </desc>
+ </func>
+ <func>
+ <name>write(Tab, Record, LockKind) -> transaction abort | ok </name>
+ <fsummary>Write an record into the database.</fsummary>
+ <desc>
+ <p>Writes the record <c>Record</c> to the table <c>Tab</c>.
+ </p>
+ <p>The function returns <c>ok</c>, or aborts if an error
+ occurs. For example, the transaction aborts if no
+ <c>person</c> table exists.
+ </p>
+ <p>The semantics of this function is context sensitive. See
+ <c>mnesia:activity/4</c> for more information. In transaction
+ context it acquires a lock of type <c>LockKind</c>. The
+ following lock types are supported: <c>write</c> and
+ <c>sticky_write</c>.</p>
+ </desc>
+ </func>
+ <func>
+ <name>write_lock_table(Tab) -> ok | transaction abort</name>
+ <fsummary>Set write lock on an entire table.</fsummary>
+ <desc>
+ <p>Invokes <c>mnesia:lock({table, Tab}, write)</c>.</p>
+ </desc>
+ </func>
+ </funcs>
+
+ <section>
+ <title>Configuration Parameters</title>
+ <p>Mnesia reads the following application configuration
+ parameters:</p>
+ <list type="bulleted">
+ <item>
+ <p><c>-mnesia access_module Module</c>. The
+ name of the Mnesia activity access callback module. The default is
+ <c>mnesia</c>.
+ </p>
+ </item>
+ <item>
+ <p><c>-mnesia auto_repair true | false</c>. This flag controls
+ whether Mnesia will try to automatically repair
+ files that have not been properly closed. The default is
+ <c>true</c>.
+ </p>
+ </item>
+ <item>
+ <p><c>-mnesia backup_module Module</c>. The
+ name of the Mnesia backup callback module. The default is
+ <c>mnesia_backup</c>.
+ </p>
+ </item>
+ <item>
+ <p><c>-mnesia debug Level</c>
+ Controls the debug level of Mnesia.
+ Possible values are:</p>
+ <taglist>
+ <tag><c>none</c></tag>
+ <item>
+ <p>No trace outputs at all. This is the default setting.
+ </p>
+ </item>
+ <tag><c>verbose</c></tag>
+ <item>
+ <p>Activates tracing of important debug events. These
+ debug events generate <c>{mnesia_info, Format, Args}</c>
+ system events. Processes may subscribe to these events with
+ <c>mnesia:subscribe/1</c>. The events are always sent to Mnesia's
+ event handler.
+ </p>
+ </item>
+ <tag><c>debug</c></tag>
+ <item>
+ <p>Activates all events at the verbose level plus full
+ trace of all debug events. These debug events generate
+ <c>{mnesia_info, Format, Args}</c> system events. Processes may
+ subscribe to these events with <c>mnesia:subscribe/1</c>. The
+ events are always sent to the Mnesia event handler. On this
+ debug level, the Mnesia event handler starts subscribing to
+ updates in the schema table.
+ </p>
+ </item>
+ <tag><c>trace</c></tag>
+ <item>
+ <p>Activates all events at the level debug. On this
+ debug level, the Mnesia event handler starts subscribing to
+ updates on all Mnesia tables. This level is only intended
+ for debugging small toy systems since many large
+ events may be generated.
+ </p>
+ </item>
+ <tag><c>false</c></tag>
+ <item>
+ <p>An alias for none.
+ </p>
+ </item>
+ <tag><c>true</c></tag>
+ <item>
+ <p>An alias for debug.
+ </p>
+ </item>
+ </taglist>
+ </item>
+ <item>
+ <p><c>-mnesia core_dir Directory</c>. The name of the
+ directory where Mnesia core files is stored or
+ false. Setting it implies that also ram only nodes, will
+ generate a core file if a crash occurs. </p>
+ </item>
+ <item>
+ <p><c>-mnesia dc_dump_limit Number</c>.
+ Controls how often <c>disc_copies</c> tables are dumped from memory.
+ Tables are dumped when
+ <c>filesize(Log) > (filesize(Tab)/Dc_dump_limit)</c>.
+ Lower values reduces cpu overhead but increases disk space and
+ startup times. The default is 4.</p>
+ </item>
+ <item>
+ <p><c>-mnesia dir Directory</c>. The name of the directory
+ where all Mnesia data is stored. The name of the directory must
+ be unique for the current node. Two nodes may, under no
+ circumstances, share the same Mnesia directory. The results are
+ totally unpredictable.</p>
+ </item>
+ <item>
+ <p><c>-mnesia dump_log_load_regulation true | false</c>.
+ Controls if the log dumps should be performed as fast as
+ possible or if the dumper should do its own load
+ regulation. This feature is temporary and will disappear in a
+ future release. The default is <c>false</c>.
+ </p>
+ </item>
+ <item>
+ <p><c>-mnesia dump_log_update_in_place true | false</c>.
+ Controls if log dumps are performed on a copy of
+ the original data file, or if the log dump is
+ performed on the original data file. The default is <c>true</c></p>
+ </item>
+ <item>
+ <p><c>-mnesia dump_log_write_threshold Max</c>, where
+ <c>Max</c> is an integer which specifies the maximum number of writes
+ allowed to the transaction log before a new dump of the log
+ is performed. It defaults to 100 log writes.
+ </p>
+ </item>
+ <item>
+ <p><c>-mnesia dump_log_time_threshold Max</c>,
+ where <c>Max</c> is an integer which
+ specifies the dump log interval in milliseconds. It defaults
+ to 3 minutes. If a dump has not been performed within
+ <c>dump_log_time_threshold</c> milliseconds, then a new dump is
+ performed regardless of how many writes have been
+ performed.
+ </p>
+ </item>
+ <item>
+ <p><c>-mnesia event_module Module</c>. The
+ name of the Mnesia event handler callback module. The default is
+ <c>mnesia_event</c>.
+ </p>
+ </item>
+ <item>
+ <p><c>-mnesia extra_db_nodes Nodes</c> specifies a list of
+ nodes, in addition to the ones found in the schema, with which
+ Mnesia should also establish contact. The default value
+ is the empty list <c>[]</c>.
+ </p>
+ </item>
+ <item>
+ <p><c>-mnesia fallback_error_function {UserModule, UserFunc}</c>
+ specifies a user supplied callback function
+ which will be called if a fallback is installed and mnesia
+ goes down on another node. Mnesia will call the function
+ with one argument the name of the dying node, e.g.
+ <c>UserModule:UserFunc(DyingNode)</c>.
+ Mnesia should be restarted or else
+ the database could be inconsistent.
+ The default behaviour is to terminate mnesia.
+ </p>
+ </item>
+ <item>
+ <p><c>-mnesia max_wait_for_decision Timeout</c>. Specifies
+ how long Mnesia will wait for other nodes to share their
+ knowledge regarding the outcome of an unclear transaction. By
+ default the <c>Timeout</c> is set to the atom
+ <c>infinity</c>, which implies that if Mnesia upon startup
+ encounters a "heavyweight transaction" whose outcome is
+ unclear, the local Mnesia will wait until Mnesia is started
+ on some (in worst cases all) of the other nodes that were
+ involved in the interrupted transaction. This is a very rare
+ situation, but when/if it happens, Mnesia does not guess if
+ the transaction on the other nodes was committed or aborted.
+ Mnesia will wait until it knows the outcome and then act
+ accordingly.
+ </p>
+ <p>If <c>Timeout</c> is set to an integer value in
+ milliseconds, Mnesia will force "heavyweight transactions"
+ to be finished, even if the outcome of the transaction for
+ the moment is unclear. After <c>Timeout</c> milliseconds,
+ Mnesia will commit/abort the transaction and continue with
+ the startup. This may lead to a situation where the
+ transaction is committed on some nodes and aborted on other
+ nodes. If the transaction was a schema transaction, the
+ inconsistency may be fatal.
+ </p>
+ </item>
+ <item>
+ <p><c>-mnesia no_table_loaders NUMBER</c> specifies the number of
+ parallel table loaders during start. More loaders can be good if the
+ network latency is high or if many tables contains few records.
+ The default value is <c>2</c>.
+ </p>
+ </item>
+ <item>
+ <p><c>-mnesia schema_location Loc</c> controls where
+ Mnesia will look for its schema. The parameter
+ <c>Loc</c> may be one of the following atoms: </p>
+ <taglist>
+ <tag><c>disc</c></tag>
+ <item>
+ <p>Mandatory disc. The schema is assumed to be located
+ in the Mnesia directory. If the schema cannot be found,
+ Mnesia refuses to start. This is the old behavior.
+ </p>
+ </item>
+ <tag><c>ram</c></tag>
+ <item>
+ <p>Mandatory RAM. The schema resides in RAM
+ only. At start-up, a tiny new schema is generated. This
+ default schema just contains the definition of the schema
+ table and only resides on the local node. Since no other
+ nodes are found in the default schema, the configuration
+ parameter <c>extra_db_nodes</c> must be used in
+ order to let the
+ node share its table definitions with other nodes. (The
+ <c>extra_db_nodes</c> parameter may also be used on disc based nodes.)
+ </p>
+ </item>
+ <tag><c>opt_disc</c></tag>
+ <item>
+ <p>Optional disc. The schema may reside either on disc
+ or in RAM. If the schema is found on disc, Mnesia starts as a
+ disc based node and the storage type of the schema table is
+ <c>disc_copies</c>. If no schema is found on disc, Mnesia starts
+ as a disc-less node and the storage type of the schema table is
+ <c>ram_copies</c>. The default value for the application parameter
+ is <c>opt_disc</c>.
+ </p>
+ </item>
+ </taglist>
+ </item>
+ </list>
+ <p>First the SASL application parameters are checked, then
+ the command line flags are checked, and finally, the default
+ value is chosen.
+ </p>
+ </section>
+
+ <section>
+ <title>See Also</title>
+ <p>mnesia_registry(3), mnesia_session(3), qlc(3),
+ dets(3), ets(3), disk_log(3), application(3)
+ </p>
+ </section>
+
+</erlref>
+
diff --git a/lib/mnesia/doc/src/mnesia_frag_hash.xml b/lib/mnesia/doc/src/mnesia_frag_hash.xml
new file mode 100644
index 0000000000..ca03327994
--- /dev/null
+++ b/lib/mnesia/doc/src/mnesia_frag_hash.xml
@@ -0,0 +1,166 @@
+<?xml version="1.0" encoding="latin1" ?>
+<!DOCTYPE erlref SYSTEM "erlref.dtd">
+
+<erlref>
+ <header>
+ <copyright>
+ <year>2002</year>
+ <year>2007</year>
+ <holder>Ericsson AB, All Rights Reserved</holder>
+ </copyright>
+ <legalnotice>
+ The contents of this file are subject to the Erlang Public License,
+ Version 1.1, (the "License"); you may not use this file except in
+ compliance with the License. You should have received a copy of the
+ Erlang Public License along with this software. If not, it can be
+ retrieved online at http://www.erlang.org/.
+
+ Software distributed under the License is distributed on an "AS IS"
+ basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+ the License for the specific language governing rights and limitations
+ under the License.
+
+ The Initial Developer of the Original Code is Ericsson AB.
+ </legalnotice>
+
+ <title>mnesia_frag_hash</title>
+ <prepared>H&aring;kan Mattsson</prepared>
+ <responsible></responsible>
+ <docno></docno>
+ <approved></approved>
+ <checked></checked>
+ <date>2002-09-10</date>
+ <rev>A</rev>
+ <file>mnesia_frag_hash.sgml</file>
+ </header>
+ <module>mnesia_frag_hash</module>
+ <modulesummary>Defines mnesia_frag_hash callback behaviour</modulesummary>
+ <description>
+ <p>The module <c>mnesia_frag_hash</c> defines a callback
+ behaviour for user defined hash functions of fragmented tables.</p>
+ <p>Which module that is selected to implement the <c>mnesia_frag_hash</c>
+ behaviour for a particular fragmented table is specified together
+ with the other <c>frag_properties</c>. The <c>hash_module</c> defines
+ the module name. The <c>hash_state</c> defines the initial hash state.</p>
+ <p>It implements dynamic hashing which is a kind of hashing
+ that grows nicely when new fragments are added. It is well
+ suited for scalable hash tables</p>
+ </description>
+ <funcs>
+ <func>
+ <name>init_state(Tab, State) -> NewState | abort(Reason)</name>
+ <fsummary>Initiate the hash state for a new table</fsummary>
+ <type>
+ <v>Tab = atom()</v>
+ <v>State = term()</v>
+ <v>NewState = term()</v>
+ <v>Reason = term()</v>
+ </type>
+ <desc>
+ <p>This function is invoked when a fragmented table is
+ created with <c>mnesia:create_table/2</c> or when a
+ normal (un-fragmented) table is converted to be a
+ fragmented table with <c>mnesia:change_table_frag/2</c>.</p>
+ <p>Note that the <c>add_frag/2</c> function will be invoked
+ one time each for the rest of the fragments (all but number 1)
+ as a part of the table creation procedure.</p>
+ <p><c>State</c> is the initial value of the <c>hash_state</c><c>frag_property</c>. The <c>NewState</c> will be stored as
+ <c>hash_state</c> among the other <c>frag_properties</c>.
+ </p>
+ </desc>
+ </func>
+ <func>
+ <name>add_frag(State) -> {NewState, IterFrags, AdditionalLockFrags} | abort(Reason)</name>
+ <fsummary>This function is invoked when a new fragment is added to a fragmented table</fsummary>
+ <type>
+ <v>State = term()</v>
+ <v>NewState = term()</v>
+ <v>IterFrags = [integer()]</v>
+ <v>AdditionalLockFrags = [integer()]</v>
+ <v>Reason = term()</v>
+ </type>
+ <desc>
+ <p>In order to scale well, it is a good idea ensure that the
+ records are evenly distributed over all fragments including
+ the new one.</p>
+ <p>The <c>NewState</c> will be stored as <c>hash_state</c> among the
+ other <c>frag_properties</c>.
+ </p>
+ <p>As a part of the <c>add_frag</c> procedure, Mnesia will iterate
+ over all fragments corresponding to the <c>IterFrags</c> numbers
+ and invoke <c>key_to_frag_number(NewState,RecordKey)</c> for
+ each record. If the new fragment differs from the old
+ fragment, the record will be moved to the new fragment.</p>
+ <p>As the <c>add_frag</c> procedure is a part of a schema
+ transaction Mnesia will acquire a write locks on the
+ affected tables. That is both the fragments corresponding
+ to <c>IterFrags</c> and those corresponding to
+ <c>AdditionalLockFrags</c>.</p>
+ </desc>
+ </func>
+ <func>
+ <name>del_frag(State) -> {NewState, IterFrags, AdditionalLockFrags} | abort(Reason)</name>
+ <fsummary>This function is invoked when a fragment is deleted from a fragmented table</fsummary>
+ <type>
+ <v>State = term()</v>
+ <v>NewState = term()</v>
+ <v>IterFrags = [integer()]</v>
+ <v>AdditionalLockFrags = [integer()]</v>
+ <v>Reason = term()</v>
+ </type>
+ <desc>
+ <p>The <c>NewState</c> will be stored as <c>hash_state</c> among the
+ other <c>frag_properties</c>.
+ </p>
+ <p>As a part of the <c>del_frag</c> procedure, Mnesia will iterate
+ over all fragments corresponding to the <c>IterFrags</c> numbers
+ and invoke <c>key_to_frag_number(NewState,RecordKey)</c> for
+ each record. If the new fragment differs from the old
+ fragment, the record will be moved to the new fragment.</p>
+ <p>Note that all records in the last fragment must be moved to
+ another fragment as the entire fragment will be deleted.</p>
+ <p>As the <c>del_frag</c> procedure is a part of a schema
+ transaction Mnesia will acquire a write locks on the
+ affected tables. That is both the fragments corresponding
+ to <c>IterFrags</c> and those corresponding to
+ <c>AdditionalLockFrags</c>.</p>
+ </desc>
+ </func>
+ <func>
+ <name>key_to_frag_number(State, Key) -> FragNum | abort(Reason)</name>
+ <fsummary>Resolves the key of a record into a fragment number</fsummary>
+ <type>
+ <v>FragNum = integer()()</v>
+ <v>Reason = term()</v>
+ </type>
+ <desc>
+ <p>This function is invoked whenever Mnesia needs to determine
+ which fragment a certain record belongs to. It is typically
+ invoked at read, write and delete.</p>
+ </desc>
+ </func>
+ <func>
+ <name>match_spec_to_frag_numbers(State, MatchSpec) -> FragNums | abort(Reason)</name>
+ <fsummary>Resolves a MatchSpec into a list of fragment numbers</fsummary>
+ <type>
+ <v>MatcSpec = ets_select_match_spec()</v>
+ <v>FragNums = [FragNum]</v>
+ <v>FragNum = integer()</v>
+ <v>Reason = term()</v>
+ </type>
+ <desc>
+ <p>This function is invoked whenever Mnesia needs to determine
+ which fragments that needs to be searched for a MatchSpec.
+ It is typically invoked at select and match_object.</p>
+ </desc>
+ </func>
+ </funcs>
+
+ <section>
+ <title>See Also</title>
+ <p>mnesia(3)
+ </p>
+ </section>
+
+</erlref>
+
diff --git a/lib/mnesia/doc/src/mnesia_registry.xml b/lib/mnesia/doc/src/mnesia_registry.xml
new file mode 100644
index 0000000000..966134d508
--- /dev/null
+++ b/lib/mnesia/doc/src/mnesia_registry.xml
@@ -0,0 +1,104 @@
+<?xml version="1.0" encoding="latin1" ?>
+<!DOCTYPE erlref SYSTEM "erlref.dtd">
+
+<erlref>
+ <header>
+ <copyright>
+ <year>1998</year>
+ <year>2007</year>
+ <holder>Ericsson AB, All Rights Reserved</holder>
+ </copyright>
+ <legalnotice>
+ The contents of this file are subject to the Erlang Public License,
+ Version 1.1, (the "License"); you may not use this file except in
+ compliance with the License. You should have received a copy of the
+ Erlang Public License along with this software. If not, it can be
+ retrieved online at http://www.erlang.org/.
+
+ Software distributed under the License is distributed on an "AS IS"
+ basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+ the License for the specific language governing rights and limitations
+ under the License.
+
+ The Initial Developer of the Original Code is Ericsson AB.
+ </legalnotice>
+
+ <title>mnesia_registry</title>
+ <prepared>Dan Gudmundsson and H&aring;kan Mattsson</prepared>
+ <responsible></responsible>
+ <docno></docno>
+ <approved></approved>
+ <checked></checked>
+ <date>98-04-24</date>
+ <rev>A</rev>
+ <file>mnesia_registry.sgml</file>
+ </header>
+ <module>mnesia_registry</module>
+ <modulesummary>Dump support for registries in erl_interface. </modulesummary>
+ <description>
+ <p>The module <c>mnesia_registry</c> is usually part of
+ <c>erl_interface</c>, but for the time being, it is a part of the
+ Mnesia application.
+ </p>
+ <p><c>mnesia_registry</c> is mainly an module intended for
+ internal usage within OTP, but it has two functions that
+ are exported for public use.
+ </p>
+ <p>On C-nodes <c>erl_interface</c> has support for registry
+ tables. These reside in RAM on the C-node but they may also be
+ dumped into Mnesia tables. By default, the dumping of registry
+ tables via <c>erl_interface</c> causes a corresponding Mnesia
+ table to be created with <c>mnesia_registry:create_table/1</c>
+ if necessary.
+ </p>
+ <p>The tables that are created with these functions can be
+ administered as all other Mnesia tables. They may be included in
+ backups or replicas may be added etc. The tables are in fact
+ normal Mnesia tables owned by the user of the corresponding
+ <c>erl_interface</c> registries.
+ </p>
+ </description>
+ <funcs>
+ <func>
+ <name>create_table(Tab) -> ok | exit(Reason)</name>
+ <fsummary>Creates a registry table in Mnesia.</fsummary>
+ <desc>
+ <p>This is a wrapper function for
+ <c>mnesia:create_table/2</c> which creates a table (if there is no existing table)
+ with an appropriate set of <c>attributes</c>. The table will
+ only reside on the local node and its storage type will be
+ the same as the <c>schema</c> table on the local
+ node, ie. <c>{ram_copies,[node()]}</c> or
+ <c>{disc_copies,[node()]}</c>.
+ </p>
+ <p>It is this function that is used by <c>erl_interface</c> to
+ create the Mnesia table if it did not already exist.</p>
+ </desc>
+ </func>
+ <func>
+ <name>create_table(Tab, TabDef) -> ok | exit(Reason)</name>
+ <fsummary>Creates a customized registry table in Mnesia. </fsummary>
+ <desc>
+ <p>This is a wrapper function for
+ <c>mnesia:create_table/2</c> which creates a table (if there is no existing table)
+ with an appropriate set of <c>attributes</c>. The attributes
+ and <c>TabDef</c> are forwarded to
+ <c>mnesia:create_table/2</c>. For example, if the table should
+ reside as <c>disc_only_copies</c> on all nodes a call would
+ look like:</p>
+ <code type="none">
+ TabDef = [{{disc_only_copies, node()|nodes()]}],
+ mnesia_registry:create_table(my_reg, TabDef)
+ </code>
+ </desc>
+ </func>
+ </funcs>
+
+ <section>
+ <title>See Also</title>
+ <p>mnesia(3), erl_interface(3)
+ </p>
+ </section>
+
+</erlref>
+
diff --git a/lib/mnesia/doc/src/note.gif b/lib/mnesia/doc/src/note.gif
new file mode 100644
index 0000000000..6fffe30419
--- /dev/null
+++ b/lib/mnesia/doc/src/note.gif
Binary files differ
diff --git a/lib/mnesia/doc/src/notes.gif b/lib/mnesia/doc/src/notes.gif
new file mode 100644
index 0000000000..e000cca26a
--- /dev/null
+++ b/lib/mnesia/doc/src/notes.gif
Binary files differ
diff --git a/lib/mnesia/doc/src/notes.xml b/lib/mnesia/doc/src/notes.xml
new file mode 100644
index 0000000000..69f2185cd8
--- /dev/null
+++ b/lib/mnesia/doc/src/notes.xml
@@ -0,0 +1,383 @@
+<?xml version="1.0" encoding="latin1" ?>
+<!DOCTYPE chapter SYSTEM "chapter.dtd">
+
+<chapter>
+ <header>
+ <copyright>
+ <year>1996</year><year>2009</year>
+ <holder>Ericsson AB. All Rights Reserved.</holder>
+ </copyright>
+ <legalnotice>
+ The contents of this file are subject to the Erlang Public License,
+ Version 1.1, (the "License"); you may not use this file except in
+ compliance with the License. You should have received a copy of the
+ Erlang Public License along with this software. If not, it can be
+ retrieved online at http://www.erlang.org/.
+
+ Software distributed under the License is distributed on an "AS IS"
+ basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+ the License for the specific language governing rights and limitations
+ under the License.
+
+ </legalnotice>
+
+ <title>Mnesia Release Notes</title>
+ <prepared>Dan Gudmundsson and H&aring;kan Mattsson</prepared>
+ <responsible></responsible>
+ <docno></docno>
+ <approved></approved>
+ <checked></checked>
+ <date>05-01-26</date>
+ <rev>AE</rev>
+ <file>notes.xml</file>
+ </header>
+ <p>This document describes the changes made to the Mnesia system
+ from version to version. The intention of this document is to
+ list all incompatibilities as well as all enhancements and
+ bugfixes for every release of Mnesia. Each release of Mnesia
+ thus constitutes one section in this document. The title of each
+ section is the version number of Mnesia.</p>
+
+ <section><title>Mnesia 4.4.12</title>
+
+ <section><title>Improvements and New Features</title>
+ <list>
+ <item>
+ <p>
+ The documentation is now built with open source tools
+ (xsltproc and fop) that exists on most platforms. One
+ visible change is that the frames are removed.</p>
+ <p>
+ Own Id: OTP-8250</p>
+ </item>
+ </list>
+ </section>
+
+ </section>
+ <section><title>Mnesia 4.4.11</title>
+
+ <section><title>Improvements and New Features</title>
+ <list>
+ <item>
+ <p>
+ Fixed duplicate results with mnesia:index_read() on
+ ordered_set tables. Reported by Sam Bobroff. </p>
+ <p>
+ Fixed locking in mnesia:index_read() which now grabs a read
+ table lock to ensure correctness, this may slow down the
+ operation or block other processes trying to reach the
+ same table. </p>
+ <p>
+ Calling mnesia:dump_log() could crash mnesia,
+ Reported by Igor Ribeiro Sucupira.</p>
+ <p> Own Id: OTP-8074</p>
+ </item>
+ </list>
+ </section>
+
+ </section>
+ <section><title>Mnesia 4.4.10</title>
+
+ <section><title>Fixed Bugs and Malfunctions</title>
+ <list>
+ <item>
+ <p>
+ Mnesia crashed if a qlc query was running inside a
+ transaction when mnesia stopped at another node. Thanks
+ Teemu Antti-Poika.</p>
+ <p>
+ Own Id: OTP-7968</p>
+ </item>
+ <item>
+ <p>
+ Mnesia could crash when loading local_content tables.</p>
+ <p>
+ Own Id: OTP-8002 Aux Id: seq11277 </p>
+ </item>
+ </list>
+ </section>
+
+ <section><title>Improvements and New Features</title>
+ <list>
+ <item>
+ <p>
+ Minor (smp) optimizations.</p>
+ <p>
+ Own Id: OTP-7928</p>
+ </item>
+ </list>
+ </section>
+
+ </section>
+
+
+ <section><title>Mnesia 4.4.9</title>
+
+ <section><title>Fixed Bugs and Malfunctions</title>
+ <list>
+ <item>
+ <p>
+ mnesia:clear_table/1 crashed instead of returning
+ <c>{aborted,..}</c> if it was called inside a
+ transaction.</p>
+ <p>
+ Own Id: OTP-7911</p>
+ </item>
+ </list>
+ </section>
+
+ </section>
+
+ <section><title>Mnesia 4.4.8</title>
+
+ <section><title>Fixed Bugs and Malfunctions</title>
+ <list>
+ <item>
+ <p>
+ With bad timing several api functions could return or
+ exit with a bad error message when mnesia was shutting
+ down.</p>
+ <p>
+ Own Id: OTP-7753 Aux Id: seq11179 </p>
+ </item>
+ <item>
+ <p>
+ <c>mnesia:clear_table/1</c> cleared all nodes table
+ content even if the table was <c>local_content</c> only
+ type.</p>
+ <p>
+ Own Id: OTP-7835</p>
+ </item>
+ </list>
+ </section>
+
+ </section>
+
+ <section><title>Mnesia 4.4.7</title>
+
+ <section><title>Fixed Bugs and Malfunctions</title>
+ <list>
+ <item>
+ <p>
+ Disallowed match patterns ('_', and '$n') as argument to
+ <c>mnesia:delete_object/1</c> and friends.</p>
+ <p>
+ Own Id: OTP-7524</p>
+ </item>
+ </list>
+ </section>
+
+
+ <section><title>Improvements and New Features</title>
+ <list>
+ <item>
+ <p>
+ Introduced a few new functions in Mnesia: <c>mnesia:read/2</c>,
+ <c>mnesia:first/3</c>, <c>mnesia:last/3</c>, <c>mnesia:prev/4</c>,
+ <c>mnesia:next/4</c>, <c>mnesia_frag:first/1</c>, <c>mnesia_frag:last/1</c>,
+ <c>mnesia_frag:prev/2</c>, <c>mnesia_frag:next/2</c>.</p>
+ <p>
+ Own Id: OTP-7625</p>
+ </item>
+ </list>
+ </section>
+
+</section>
+
+ <section><title>Mnesia 4.4.6</title>
+ <section><title>Fixed Bugs and Malfunctions</title>
+ <list>
+ <item>
+ <p>
+ <c>mnesia:restore/2</c> aborted if a <c>EXIT</c> message
+ appeared in the client message queue.</p>
+ <p>
+ Own Id: OTP-7585 Aux Id: seq11046 </p>
+ </item>
+ </list>
+ </section>
+ </section>
+
+ <section><title>Mnesia 4.4.5</title>
+ <section><title>Improvements and New Features</title>
+ <list>
+ <item>
+ <p>
+ mnesia:clear_table/1 does not require that all
+ replicas of the table are available anymore.</p>
+ <p>
+ Own Id: OTP-7466 Aux Id: seq11015</p>
+ </item>
+ </list>
+ </section>
+ </section>
+
+ <section><title>Mnesia 4.4.4</title>
+
+ <section><title>Fixed Bugs and Malfunctions</title>
+ <list>
+ <item>
+ <p>
+ Mnesia did not garbage collect transaction decisions on
+ disk based nodes if no transactions where made on the
+ local node.</p>
+ <p>
+ Own Id: OTP-7419</p>
+ </item>
+ </list>
+ </section>
+
+ </section>
+
+ <section><title>Mnesia 4.4.3</title>
+
+ <section><title>Fixed Bugs and Malfunctions</title>
+ <list>
+ <item>
+ <p>
+ Table referred to by foreign key did not have node_pool
+ properly cleaned up when a node was removed from the
+ schema. Thanks Paul Mineiro.</p>
+ <p>
+ Own Id: OTP-7340</p>
+ </item>
+ <item>
+ <p>
+ Mnesia crashed and generated a core dump if a
+ schema_transaction was running when mnesia stopped.</p>
+ <p>
+ Own Id: OTP-7378 Aux Id: seq10964 </p>
+ </item>
+ </list>
+ </section>
+
+
+ <section><title>Improvements and New Features</title>
+ <list>
+ <item>
+ <p>
+ It is now possible to delete a db node even when other
+ disk resident nodes are down. Thanks Paul Mineiro.</p>
+ <p>
+ Own Id: OTP-7383</p>
+ </item>
+ </list>
+ </section>
+
+</section>
+
+ <section><title>Mnesia 4.4.2</title>
+ <section><title>Fixed Bugs and Malfunctions</title>
+ <list>
+ <item>
+ <p>
+ Sticky locks could lead to hanging transactions.</p>
+ <p>
+ Own Id: OTP-7205 Aux Id: seq10793 </p>
+ </item>
+ <item>
+ <p>
+ <c>mnesia:snmp_get_next_index/2</c> didn't work with
+ partial index keys. Argument checking is now done
+ according to documentation, in functions
+ <c>mnesia:snmp_get_row/2</c>,
+ <c>mnesia:snmp_get_mnesia_key/2</c> and
+ <c>mnesia:snmp_get_next_index/2</c>. These functions now
+ require that <c>RowIndex</c> is a list.</p>
+ <p>
+ *** POTENTIAL INCOMPATIBILITY ***</p>
+ <p>
+ Own Id: OTP-7208</p>
+ </item>
+ </list>
+ </section>
+ </section>
+
+ <section><title>Mnesia 4.4.1</title>
+ <section><title>Fixed Bugs and Malfunctions</title>
+ <list>
+ <item>
+ <p>
+ Snmp index tables was not initialized correctly in
+ <c>mnesia-4.4</c>.</p>
+ <p>
+ Own Id: OTP-7170 Aux Id: seq10870 </p>
+ </item>
+ </list>
+ </section>
+ <section><title>Known Bugs and Problems</title>
+ <list>
+ <item>
+ <p>
+ Rearranging fragmented tables is an O(N^2) operation.</p>
+ <p>
+ Own Id: OTP-6300</p>
+ </item>
+ </list>
+ </section>
+ </section>
+
+ <section><title>Mnesia 4.4</title>
+ <section><title>Fixed Bugs and Malfunctions</title>
+ <list>
+ <item>
+ <p>
+ Mnesia ignored the module argument to
+ <c>mnesia:restore/2</c>. Thanks Paul Minerio.</p>
+ <p>
+ Own Id: OTP-6981</p>
+ </item>
+ </list>
+ </section>
+
+ <section><title>Improvements and New Features</title>
+ <list>
+ <item>
+ <p>
+ Mnesia's snmp operations <c>snmp_get_row/2</c>,
+ <c>snmp_get_next_index/2</c> and
+ <c>snmp_get_mnesia_key/2</c> have been made context
+ aware, i.e. inside a transaction they will compensate for
+ table updates made in earlier in the same transaction.
+ This might cause a performance drop if a lot of updates
+ have been made before the invocation of these functions.</p>
+ <p>
+ *** POTENTIAL INCOMPATIBILITY ***</p>
+ <p>
+ Own Id: OTP-6856 Aux Id: seq10671 </p>
+ </item>
+ <item>
+ <p>
+ Introduced erlang:phash/2 as new default for fragmented
+ tables. Already existing tables will continue to use
+ whatever hash function they where using.</p>
+ <p>
+ Own Id: OTP-6923</p>
+ </item>
+ <item>
+ <p>
+ Introduced <c>mnesia:is_transaction/0</c>.</p>
+ <p>
+ Own Id: OTP-6995 Aux Id: seq10812 </p>
+ </item>
+ </list>
+ </section>
+
+ <section><title>Known Bugs and Problems</title>
+ <list>
+ <item>
+ <p>
+ Rearranging fragmented tables is an O(N^2) operation.</p>
+ <p>
+ Own Id: OTP-6300</p>
+ </item>
+ </list>
+ </section>
+ </section>
+
+ <!-- section>
+ <title>Previous Notes</title>
+ <p>For information about older versions see <url href="part_notes_history_frame.html">release notes history</url>.</p>
+ </section -->
+</chapter>
+
diff --git a/lib/mnesia/doc/src/notes_history.xml b/lib/mnesia/doc/src/notes_history.xml
new file mode 100644
index 0000000000..0984e33376
--- /dev/null
+++ b/lib/mnesia/doc/src/notes_history.xml
@@ -0,0 +1,322 @@
+<?xml version="1.0" encoding="latin1" ?>
+<!DOCTYPE chapter SYSTEM "chapter.dtd">
+
+<chapter>
+ <header>
+ <copyright>
+ <year>2004</year><year>2009</year>
+ <holder>Ericsson AB. All Rights Reserved.</holder>
+ </copyright>
+ <legalnotice>
+ The contents of this file are subject to the Erlang Public License,
+ Version 1.1, (the "License"); you may not use this file except in
+ compliance with the License. You should have received a copy of the
+ Erlang Public License along with this software. If not, it can be
+ retrieved online at http://www.erlang.org/.
+
+ Software distributed under the License is distributed on an "AS IS"
+ basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+ the License for the specific language governing rights and limitations
+ under the License.
+
+ </legalnotice>
+
+ <title>Mnesia Release Notes</title>
+ <prepared>Dan Gudmundsson and H&aring;kan Mattsson</prepared>
+ <responsible></responsible>
+ <docno></docno>
+ <approved></approved>
+ <checked></checked>
+ <date>04-08-22</date>
+ <rev>AE</rev>
+ <file>notes_history.sgml</file>
+ </header>
+ <p>This document describes the changes made to the Mnesia system
+ from version to version. The intention of this document is to
+ list all incompatibilities as well as all enhancements and
+ bugfixes for every release of Mnesia. Each release of Mnesia
+ thus constitutes one section in this document. The title of each
+ section is the version number of Mnesia.</p>
+
+ <section><title>Mnesia 4.3.7</title>
+ <section><title>Fixed Bugs and Malfunctions</title>
+ <list>
+ <item>
+ <p>
+ Removed a memory leak on ram-only nodes, introduced in
+ <c>mnesia-4.3.6</c>.</p>
+ <p>
+ Own Id: OTP-6936 Aux Id: seq10786 </p>
+ </item>
+ </list>
+ </section>
+ <section>
+ <title>Known Bugs and Problems</title>
+ <list type="bulleted">
+ <item>
+ <p>Rearranging fragmented tables is an O(N^2)
+ operation.</p>
+ <p>Own Id: OTP-6300</p>
+ </item>
+ </list>
+ </section>
+
+ </section>
+
+
+ <section>
+ <title>Mnesia 4.3.6</title>
+
+ <section><title>Fixed Bugs and Malfunctions</title>
+ <list>
+ <item>
+ <p>
+ A bug causing lots of records to be lost at startup from
+ an installed fallback has been fixed. The bug did however
+ not show up when a backup file generated with
+ <c>mnesia:backup/1</c> or
+ <c>mnesia:backup_checkpoint/2</c> was installed as
+ fallback. In order to trigger the bug, the items in the
+ backup file had to be rearranged in such an order that
+ records from different tables were interleaved with each
+ other.</p>
+ <p>
+ Own Id: OTP-6903 Aux Id: seq10763 </p>
+ </item>
+ <item>
+ <p>
+ Mnesia sometimes failed to commit schema operations on
+ all nodes, this have been seen on smp machines but could
+ happen on single processor as well with some bad timing.</p>
+ <p>
+ Own Id: OTP-6904</p>
+ </item>
+ <item>
+ <p>
+ <c>mnesia:select/1</c> failed to return all matches on
+ remote nodes if something was written to the table
+ earlier in the same transaction.</p>
+ <p>
+ Own Id: OTP-6908</p>
+ </item>
+ </list>
+ </section>
+
+ <section>
+ <title>Known Bugs and Problems</title>
+ <list type="bulleted">
+ <item>
+ <p>Rearranging fragmented tables is an O(N^2)
+ operation.</p>
+ <p>Own Id: OTP-6300</p>
+ </item>
+ </list>
+ </section>
+ </section>
+
+ <section>
+ <title>Mnesia 4.3.5</title>
+
+ <section>
+ <title>Fixed Bugs and Malfunctions</title>
+ <list type="bulleted">
+ <item>
+ <p>The internal index tables on bag tables where not always
+ cleaned correctly. Thanks Christopher Faulet and Salazard
+ Remy.</p>
+ <p>Own Id: OTP-6587</p>
+ </item>
+ <item>
+ <p>Changing the copy type with
+ <c>mnesia:change_table_copy/3</c> on a node which was down
+ was not handled correctly, that caused an eternal table
+ lock on the alive nodes. Thanks Hal Snyder.</p>
+ <p>Own
+ Id: OTP-6709</p>
+ </item>
+ </list>
+ </section>
+
+ <section>
+ <title>Known Bugs and Problems</title>
+ <list type="bulleted">
+ <item>
+ <p>Rearranging fragmented tables is an O(N^2)
+ operation.</p>
+ <p>Own Id: OTP-6300</p>
+ </item>
+ </list>
+ </section>
+ </section>
+
+ <section>
+ <title>Mnesia 4.3.4</title>
+
+ <section>
+ <title>Fixed Bugs and Malfunctions</title>
+ <list type="bulleted">
+ <item>
+ <p>Adding fragments to ram_copies tables was allowed on
+ nodes that where down.</p>
+ <p>Own Id: OTP-6367</p>
+ </item>
+ <item>
+ <p>Mnesia leaked transaction decisions (memory and disk
+ space).</p>
+ <p>Own Id: OTP-6464</p>
+ </item>
+ <item>
+ <p><c>dirty_update_counter/3</c> did not work properly on
+ disc tables when the counter was not initiated (Thanks to
+ Sebastien Saint-Sevin).</p>
+ <p>Own Id: OTP-6545</p>
+ </item>
+ <item>
+ <p>Chunked <c>mnesia:select</c> on fragmented tables could
+ crash (Thanks to Primanathan Reddy).</p>
+ <p>Own Id:
+ OTP-6548</p>
+ </item>
+ </list>
+ </section>
+
+ <section>
+ <title>Improvements and New Features</title>
+ <list type="bulleted">
+ <item>
+ <p>Introduced a new configure parameter dc_dump_limit.</p>
+ <p>Removed dead code (dialyzer warnings) and debugging
+ features that called interpreter commands.</p>
+ <p>Minor
+ performance increase when a lot of simultaneous
+ transactions where active. </p>
+ <p>Thank you Scott Lystig
+ Fritchie for debugging and bug reports.</p>
+ <p>Own Id:
+ OTP-6478</p>
+ </item>
+ </list>
+ </section>
+
+ <section>
+ <title>Known Bugs and Problems</title>
+ <list type="bulleted">
+ <item>
+ <p>Rearranging fragmented tables is an O(N^2)
+ operation.</p>
+ <p>Own Id: OTP-6300</p>
+ </item>
+ </list>
+ </section>
+ </section>
+
+ <section>
+ <title>Mnesia 4.3.3</title>
+
+ <section>
+ <title>Fixed Bugs and Malfunctions</title>
+ <list type="bulleted">
+ <item>
+ <p>Mnesia could crash during startup when loading tables
+ from remote node. </p>
+ <p>Own Id: OTP-6298 Aux Id: seq10402 </p>
+ </item>
+ <item>
+ <p>Mnesia could fail to update all copies during
+ del_table_copy. </p>
+ <p>Own Id: OTP-6299</p>
+ </item>
+ </list>
+ </section>
+
+ <section>
+ <title>Known Bugs and Problems</title>
+ <list type="bulleted">
+ <item>
+ <p>Rearranging fragmented tables is an O(N^2) operation.</p>
+ <p>Own Id: OTP-6300</p>
+ </item>
+ </list>
+ </section>
+ </section>
+
+ <section>
+ <title>Mnesia 4.3.2</title>
+
+ <section>
+ <title>Fixed Bugs and Malfunctions</title>
+ <list type="bulleted">
+ <item>
+ <p>Mnesia sometimes failed to remove [d]ets table fixation,
+ when using <c>mnesia:first/1</c>,<c>mnesia:next/2</c> or
+ <c>qlc</c> this could cause that deleted records are not
+ actually deleted in the [d]ets table and that
+ <c>mnesia:[dirty_]first/1</c> reported the wrong key. </p>
+ <p>Own Id: OTP-6193 Aux Id: seq10376</p>
+ </item>
+ </list>
+ </section>
+ </section>
+
+ <section>
+ <title>Mnesia 4.3.1</title>
+
+ <section>
+ <title>Fixed Bugs and Malfunctions</title>
+ <list type="bulleted">
+ <item>
+ <p>Mnesia could crash (bad match in mnesia_controller)
+ during start. </p>
+ <p>Own Id: OTP-6116 Aux Id: seq10305 </p>
+ </item>
+ </list>
+ </section>
+ </section>
+
+ <section>
+ <title>Mnesia 4.3</title>
+
+ <section>
+ <title>Fixed Bugs and Malfunctions</title>
+ <list type="bulleted">
+ <item>
+ <p>Deleting tables during the start of mnesia on another
+ node caused problems. </p>
+ <p>Own Id: OTP-5928 Aux Id:
+ seq10111 </p>
+ </item>
+ <item>
+ <p>Killing processes that runs nested transactions could
+ crash mnesia. </p>
+ <p>Own Id: OTP-6027 Aux Id: seq10244 </p>
+ </item>
+ <item>
+ <p>Creating or deleting tables with a checkpoint activated
+ could crash mnesia </p>
+ <p>Own Id: OTP-6064</p>
+ </item>
+ <item>
+ <p>Table loading could be mixed with schema operations
+ which could cause troubles. </p>
+ <p>Own Id: OTP-6065 Aux Id:
+ seq10291 </p>
+ </item>
+ </list>
+ </section>
+
+ <section>
+ <title>Improvements and New Features</title>
+ <list type="bulleted">
+ <item>
+ <p>Added parallel table loaders to increase startup
+ performance when the system have many small tables. The
+ configuration variable <c>no_table_loaders</c> configures
+ the number of loaders, default is two. </p>
+ <p>Own Id:
+ OTP-6087</p>
+ </item>
+ </list>
+ </section>
+ </section>
+</chapter>
+
diff --git a/lib/mnesia/doc/src/part.xml b/lib/mnesia/doc/src/part.xml
new file mode 100644
index 0000000000..b9654a4207
--- /dev/null
+++ b/lib/mnesia/doc/src/part.xml
@@ -0,0 +1,49 @@
+<?xml version="1.0" encoding="latin1" ?>
+<!DOCTYPE part SYSTEM "part.dtd">
+
+<part xmlns:xi="http://www.w3.org/2001/XInclude">
+ <header>
+ <copyright>
+ <year>1997</year><year>2009</year>
+ <holder>Ericsson AB. All Rights Reserved.</holder>
+ </copyright>
+ <legalnotice>
+ The contents of this file are subject to the Erlang Public License,
+ Version 1.1, (the "License"); you may not use this file except in
+ compliance with the License. You should have received a copy of the
+ Erlang Public License along with this software. If not, it can be
+ retrieved online at http://www.erlang.org/.
+
+ Software distributed under the License is distributed on an "AS IS"
+ basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+ the License for the specific language governing rights and limitations
+ under the License.
+
+ </legalnotice>
+
+ <title>Mnesia User's Guide</title>
+ <prepared>Claes Wikstr&ouml;m, Hans Nilsson and H&aring;kan Mattsson</prepared>
+ <docno></docno>
+ <date></date>
+ <rev></rev>
+ <file>part.sgml</file>
+ </header>
+ <description>
+ <p><em>Mnesia</em> is a distributed DataBase Management
+ System(DBMS), appropriate for telecommunications applications and other
+ Erlang applications which require continuous operation and exhibit soft
+ real-time properties.</p>
+ </description>
+ <xi:include href="Mnesia_chap1.xml"/>
+ <xi:include href="Mnesia_chap2.xml"/>
+ <xi:include href="Mnesia_chap3.xml"/>
+ <xi:include href="Mnesia_chap4.xml"/>
+ <xi:include href="Mnesia_chap5.xml"/>
+ <xi:include href="Mnesia_chap7.xml"/>
+ <xi:include href="Mnesia_chap8.xml"/>
+ <xi:include href="Mnesia_App_A.xml"/>
+ <xi:include href="Mnesia_App_B.xml"/>
+ <xi:include href="Mnesia_App_C.xml"/>
+ <xi:include href="Mnesia_App_D.xml"/>
+</part>
+
diff --git a/lib/mnesia/doc/src/part_notes.xml b/lib/mnesia/doc/src/part_notes.xml
new file mode 100644
index 0000000000..caa155585d
--- /dev/null
+++ b/lib/mnesia/doc/src/part_notes.xml
@@ -0,0 +1,41 @@
+<?xml version="1.0" encoding="latin1" ?>
+<!DOCTYPE part SYSTEM "part.dtd">
+
+<part xmlns:xi="http://www.w3.org/2001/XInclude">
+ <header>
+ <copyright>
+ <year>1997</year><year>2009</year>
+ <holder>Ericsson AB. All Rights Reserved.</holder>
+ </copyright>
+ <legalnotice>
+ The contents of this file are subject to the Erlang Public License,
+ Version 1.1, (the "License"); you may not use this file except in
+ compliance with the License. You should have received a copy of the
+ Erlang Public License along with this software. If not, it can be
+ retrieved online at http://www.erlang.org/.
+
+ Software distributed under the License is distributed on an "AS IS"
+ basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+ the License for the specific language governing rights and limitations
+ under the License.
+
+ </legalnotice>
+
+ <title>MNESIA Release Notes</title>
+ <prepared>Claes Wikstr&ouml;m, Hans Nilsson and H&aring;kan Mattsson</prepared>
+ <docno></docno>
+ <date>1997-05-27</date>
+ <rev>1.2</rev>
+ <file>part_notes.xml</file>
+ </header>
+ <description>
+ <p><em>Mnesia</em> is a Distributed DataBase Management
+ System (DBMS), appropriate for telecommunications applications and other
+ Erlang applications which require continuous operation and exhibit soft
+ real-time properties. </p>
+ <p>For information about older versions see
+ <url href="part_notes_history_frame.html">release notes history</url>.</p>
+ </description>
+ <xi:include href="notes.xml"/>
+</part>
+
diff --git a/lib/mnesia/doc/src/part_notes_history.xml b/lib/mnesia/doc/src/part_notes_history.xml
new file mode 100644
index 0000000000..177738623c
--- /dev/null
+++ b/lib/mnesia/doc/src/part_notes_history.xml
@@ -0,0 +1,41 @@
+<?xml version="1.0" encoding="latin1" ?>
+<!DOCTYPE part SYSTEM "part.dtd">
+
+<part>
+ <header>
+ <copyright>
+ <year>2004</year>
+ <year>2007</year>
+ <holder>Ericsson AB, All Rights Reserved</holder>
+ </copyright>
+ <legalnotice>
+ The contents of this file are subject to the Erlang Public License,
+ Version 1.1, (the "License"); you may not use this file except in
+ compliance with the License. You should have received a copy of the
+ Erlang Public License along with this software. If not, it can be
+ retrieved online at http://www.erlang.org/.
+
+ Software distributed under the License is distributed on an "AS IS"
+ basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+ the License for the specific language governing rights and limitations
+ under the License.
+
+ The Initial Developer of the Original Code is Ericsson AB.
+ </legalnotice>
+
+ <title>MNESIA Release Notes</title>
+ <prepared>Claes Wikstr&ouml;m, Hans Nilsson and H&aring;kan Mattsson</prepared>
+ <docno></docno>
+ <date>1997-05-27</date>
+ <rev>1.2</rev>
+ <file>part_notes_history.sgml</file>
+ </header>
+ <description>
+ <p><em>Mnesia</em> is a Distributed DataBase Management
+ System (DBMS), appropriate for telecommunications applications and other
+ Erlang applications which require continuous operation and exhibit soft
+ real-time properties. </p>
+ </description>
+ <include file="notes_history"></include>
+</part>
+
diff --git a/lib/mnesia/doc/src/ref_man.gif b/lib/mnesia/doc/src/ref_man.gif
new file mode 100644
index 0000000000..b13c4efd53
--- /dev/null
+++ b/lib/mnesia/doc/src/ref_man.gif
Binary files differ
diff --git a/lib/mnesia/doc/src/ref_man.xml b/lib/mnesia/doc/src/ref_man.xml
new file mode 100644
index 0000000000..417423641d
--- /dev/null
+++ b/lib/mnesia/doc/src/ref_man.xml
@@ -0,0 +1,44 @@
+<?xml version="1.0" encoding="latin1" ?>
+<!DOCTYPE application SYSTEM "application.dtd">
+
+<application xmlns:xi="http://www.w3.org/2001/XInclude">
+ <header>
+ <copyright>
+ <year>1997</year><year>2009</year>
+ <holder>Ericsson AB. All Rights Reserved.</holder>
+ </copyright>
+ <legalnotice>
+ The contents of this file are subject to the Erlang Public License,
+ Version 1.1, (the "License"); you may not use this file except in
+ compliance with the License. You should have received a copy of the
+ Erlang Public License along with this software. If not, it can be
+ retrieved online at http://www.erlang.org/.
+
+ Software distributed under the License is distributed on an "AS IS"
+ basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+ the License for the specific language governing rights and limitations
+ under the License.
+
+ </legalnotice>
+
+ <title>Mnesia Reference Manual</title>
+ <prepared>H&aring;kan Mattsson, Hans Nilsson, Claes Wikstr&ouml;m</prepared>
+ <responsible></responsible>
+ <docno></docno>
+ <approved></approved>
+ <checked></checked>
+ <date>1998-04-24</date>
+ <rev>A</rev>
+ <file>refman.sgml</file>
+ </header>
+ <description>
+ <p><em>Mnesia</em> is a distributed DataBase Management
+ System (DBMS), appropriate for telecommunications applications and other
+ Erlang applications which require continuous operation and exhibit soft
+ real-time properties. </p>
+ </description>
+ <xi:include href="mnesia.xml"/>
+ <xi:include href="mnesia_frag_hash.xml"/>
+ <xi:include href="mnesia_registry.xml"/>
+</application>
+
diff --git a/lib/mnesia/doc/src/summary.html.src b/lib/mnesia/doc/src/summary.html.src
new file mode 100644
index 0000000000..2941a2f46a
--- /dev/null
+++ b/lib/mnesia/doc/src/summary.html.src
@@ -0,0 +1 @@
+A heavy duty real-time distributed database \ No newline at end of file
diff --git a/lib/mnesia/doc/src/user_guide.gif b/lib/mnesia/doc/src/user_guide.gif
new file mode 100644
index 0000000000..e6275a803d
--- /dev/null
+++ b/lib/mnesia/doc/src/user_guide.gif
Binary files differ
diff --git a/lib/mnesia/doc/src/warning.gif b/lib/mnesia/doc/src/warning.gif
new file mode 100644
index 0000000000..96af52360e
--- /dev/null
+++ b/lib/mnesia/doc/src/warning.gif
Binary files differ
diff --git a/lib/mnesia/ebin/.gitignore b/lib/mnesia/ebin/.gitignore
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/lib/mnesia/ebin/.gitignore
diff --git a/lib/mnesia/examples/DATA b/lib/mnesia/examples/DATA
new file mode 120000
index 0000000000..2c2314b960
--- /dev/null
+++ b/lib/mnesia/examples/DATA
@@ -0,0 +1 @@
+../doc/src/DATA \ No newline at end of file
diff --git a/lib/mnesia/examples/Makefile b/lib/mnesia/examples/Makefile
new file mode 100644
index 0000000000..ff00ee76a5
--- /dev/null
+++ b/lib/mnesia/examples/Makefile
@@ -0,0 +1,103 @@
+#
+# %CopyrightBegin%
+#
+# Copyright Ericsson AB 1996-2009. All Rights Reserved.
+#
+# The contents of this file are subject to the Erlang Public License,
+# Version 1.1, (the "License"); you may not use this file except in
+# compliance with the License. You should have received a copy of the
+# Erlang Public License along with this software. If not, it can be
+# retrieved online at http://www.erlang.org/.
+#
+# Software distributed under the License is distributed on an "AS IS"
+# basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+# the License for the specific language governing rights and limitations
+# under the License.
+#
+# %CopyrightEnd%
+#
+
+#
+include $(ERL_TOP)/make/target.mk
+include $(ERL_TOP)/make/$(TARGET)/otp.mk
+
+# ----------------------------------------------------
+# Application version
+# ----------------------------------------------------
+include ../vsn.mk
+VSN=$(MNESIA_VSN)
+
+# ----------------------------------------------------
+# Release Macros
+# ----------------------------------------------------
+RELSYSDIR = $(RELEASE_PATH)/lib/mnesia-$(VSN)
+
+# ----------------------------------------------------
+# Common Macros
+# ----------------------------------------------------
+
+
+MODULES = \
+ company \
+ company_o \
+ bup \
+ mnesia_meter \
+ mnesia_tpcb
+
+ERL_FILES= $(MODULES:=.erl)
+
+HRL_FILES = \
+ company.hrl \
+ company_o.hrl
+
+DATA_FILES = \
+ DATA
+
+# TARGET_FILES= $(MODULES:%=$(EBIN)/%.$(EMULATOR))
+TARGET_FILES =
+
+# ----------------------------------------------------
+# FLAGS
+# ----------------------------------------------------
+ERL_COMPILE_FLAGS += -pa ../ebin
+EBIN = .
+
+# ----------------------------------------------------
+# Make Rules
+# ----------------------------------------------------
+debug opt: $(TARGET_FILES)
+
+clean:
+ rm -f $(TARGET_FILES) *~
+
+docs:
+
+# ----------------------------------------------------
+# Release Targets
+# ----------------------------------------------------
+include $(ERL_TOP)/make/otp_release_targets.mk
+
+release_spec: opt
+ $(INSTALL_DIR) $(RELSYSDIR)/examples
+ $(INSTALL_DATA) $(ERL_FILES) $(DATA_FILES) $(HRL_FILES) $(RELSYSDIR)/examples
+ $(INSTALL_DIR) $(RELSYSDIR)/examples/bench
+ (cd bench; $(INSTALL_DATA) \
+ Makefile \
+ README \
+ bench.erl \
+ bench.hrl \
+ bench_generate.erl \
+ bench_populate.erl \
+ bench_trans.erl \
+ bench.config1 \
+ bench.config2 \
+ bench.config3 \
+ bench.config4 \
+ bench.config5 \
+ bench.config6 \
+ bench.config7 \
+ $(RELSYSDIR)/examples/bench)
+ (cd bench; $(INSTALL_SCRIPT) bench.sh $(RELSYSDIR)/examples/bench)
+
+release_docs_spec:
+
diff --git a/lib/mnesia/examples/bench/Makefile b/lib/mnesia/examples/bench/Makefile
new file mode 100644
index 0000000000..55621e8cf4
--- /dev/null
+++ b/lib/mnesia/examples/bench/Makefile
@@ -0,0 +1,10 @@
+
+all:
+ erl -make
+
+clean:
+ rm *.beam
+
+test:
+ ./bench.sh bench.config*
+
diff --git a/lib/mnesia/examples/bench/README b/lib/mnesia/examples/bench/README
new file mode 100644
index 0000000000..5d31b5ba25
--- /dev/null
+++ b/lib/mnesia/examples/bench/README
@@ -0,0 +1,211 @@
+Author : Hakan Mattsson <[email protected]>
+Created : 21 Jun 2001 by Hakan Mattsson <[email protected]>
+
+This is an implementation of a real-time database benchmark
+(LMC/UU-01:025), defined by Richard Trembley (LMC) and Miroslaw
+Zakrzewski (LMC) . The implementation runs the benchmark on the Mnesia
+DBMS which is a part of Erlang/OTP (www.erlang.org).
+
+The implementation is organized in the following parts:
+
+ bench.erl - main API, startup and configuration
+ bench.hrl - record definitions
+ bench_populate.erl - create database and populate it with records
+ bench_trans.erl - the actual transactions to be benchmarked
+ bench_generate.erl - request generator, statistics computation
+
+Compile the files with:
+
+ make all
+
+and run the benchmarks with:
+
+ make test
+
+================================================================
+
+The benchmark runs on a set of Erlang nodes which should reside on
+one processor each.
+
+There are many options when running the benchmark. Benchmark
+configuration parameters may either be stated in a configuration file
+or as command line arguments in the Erlang shell. Erlang nodes may
+either be started manually or automatically by the benchmark program.
+
+In its the most automated usage you only need to provide one or more
+configuration files and run the
+
+ bench.sh <ConfigFiles>
+
+script to start all Erlang nodes, populate the database and run the
+actual benchmark for each one of the configuration files. The
+benchmark results will be displayed at stdout.
+
+In order to be able to automatically start remote Erlang nodes,
+you need to:
+
+ - put the $ERL_TOP/bin directory in your path on all nodes
+ - bind IP adresses to hostnames (e.g via DNS or /etc/hosts)
+ - enable usage of rsh so it does not prompt for password
+
+If you cannot achieve this, it is possible to run the benchmark
+anyway, but it requires more manual work to be done for each
+execution of the benchmark.
+
+================================================================
+
+For each configuration file given to the bench.sh script:
+
+ - a brand new Erlang node is started
+ - the bench:run(['YourConfigFile']) function is invoked
+ - the Erlang node(s) are halted.
+
+Without arguments, the bench.sh simply starts an Erlang shell.
+In that shell you have the ability to invoke Erlang functions,
+such as bench:run/1.
+
+The bench:start_all/1 function analyzes the configuration, starts
+all Erlang nodes necessary to perform the benchmark and starts
+Mnesia on all these nodes.
+
+The bench:populate/1 function populates the database according
+to the configuration and assumes that Mnesia is up and running
+on all nodes.
+
+The bench:generate/1 function starts the actual benchmark
+according to the configuration and assumes that Mnesia is
+up and running and that the database is fully populated.
+Given some arguments such as
+
+ Args = ['YourConfigFile', {statistics_detail, debug}].
+
+the invokation of
+
+ bench:run(Args).
+
+is equivivalent with:
+
+ SlaveNodes = bench:start_all(Args).
+ bench:populate(Args).
+ bench:generate(Args).
+ bench:stop_slave_nodes(SlaveNodes).
+
+In case you cannot get the automatic start of remote Erlang nodes to
+work (implied by bench:start_all/1) , you may need to manually start
+an Erlang node on each host (e.g. with bench.sh without arguments) and
+then invoke bench:run/1 or its equivivalents on one of them.
+
+================================================================
+
+The following configuration parameters are valid:
+
+generator_profile
+
+ Selects the transaction profile of the benchmark. Must be one
+ of the following atoms: t1, t2, t3, t4, t5, ping, random.
+ Defaults to random which means that the t1 .. t5 transaction
+ types are randomly selected according to the benchmark spec.
+ The other choices means disables the random choice and selects
+ one particular transaction type to be run over and over again.
+
+generator_warmup
+
+ Defines how long the request generators should "warm up" the
+ DBMS before the actual measurements are performed. The unit
+ is milliseconds and defaults to 2000 (2 seconds).
+
+generator_duration
+
+ Defines the duration of the actual benchmark measurement activity.
+ The unit is milliseconds and defaults to 15000 (15 seconds).
+
+generator_cooldown
+
+ Defines how long the request generators should "cool down" the
+ DBMS after the actual measurements has been performed. The unit
+ is milliseconds and defaults to 2000 (2 seconds).
+
+generator_nodes
+
+ Defines which Erlang nodes that should host request generators.
+ The default is all connected nodes.
+
+n_generators_per_node
+
+ Defines how many generator processes that should be running on
+ each generator node. The default is 2.
+
+statistics_detail
+
+ Regulates the detail level of statistics. It must be one of the
+ following atoms: normal, debug and debug2. debug enables a
+ finer grain of statistics to be reported, but since it requires
+ more counters, to be updated by the generator processes it may
+ cause slightly worse benchmark performace figures than the brief
+ default case, that is normal. debug2 prints out the debug info
+ and formats it according to LMC's benchmark program.
+
+storage_type
+
+ Defines whether the database should be kept solely in primary
+ memory (ram_copies), solely on disc (disc_only_copies) or
+ in both (disc_copies). The default is ram_copies. Currently
+ the other choices requires a little bit of manual preparation.
+
+table_nodes
+
+ Defines which Erlang nodes that should host the tables.
+
+n_fragments
+
+ Defines how many fragments each table should be divided in.
+ Default is 100. The fragments are evenly distributed over
+ all table nodes. The group table not devided in fragments.
+
+n_replicas
+
+ Defines how many replicas that should be kept of each fragment.
+ The group table is replicated to all table nodes.
+
+n_subscribers
+
+ Defines the number of subscriber records. Default 25000.
+
+n_subscribers
+
+ Defines the number of subscriber records. Default 25000.
+
+n_groups
+
+ Defines the number of group records. Default 5.
+
+n_servers
+
+ Defines the number of server records. Default 1.
+
+write_lock_type
+
+ Defines whether the transactions should use ordinary
+ write locks or if they utilize sticky write locks.
+ Must be one of the following atoms: write, sticky_write.
+ Default is write.
+
+use_binary_subscriber_key
+
+ Defines whether the subscriber key should be represented
+ as a string (binary) or as an integer. Default is false.
+
+always_try_nearest_node
+
+ The benchmark was initially written to test scalability
+ when more nodes were added to the database and when the
+ (fragmented) tables were distributed over all nodes. In
+ such a system the transactions should be evenly distributed
+ over all nodes. When this option is set to true it is possible
+ to make fair measurements of master/slave configurations, when
+ all transactions are performed on on one node. Default is false.
+
+cookie
+
+ Defines which cookie the Erlang node should use in its
+ distribution protocol. Must be an atom, default is 'bench'.
diff --git a/lib/mnesia/examples/bench/bench.config1 b/lib/mnesia/examples/bench/bench.config1
new file mode 100644
index 0000000000..e53ce51f63
--- /dev/null
+++ b/lib/mnesia/examples/bench/bench.config1
@@ -0,0 +1,21 @@
+{cookie, bench_cookie}.
+{generator_profile, random}.
+{statistics_detail, debug}.
+{generator_warmup, 120000}.
+{generator_duration, 900000}.
+{generator_cooldown, 120000}.
+{generator_nodes,
+ [bench@wppgpb1
+ ]}.
+{use_binary_subscriber_key, false}.
+{n_generators_per_node, 2}.
+{write_lock_type, sticky_write}.
+{table_nodes,
+ [bench@wppgpb1
+ ]}.
+{storage_type, ram_copies}.
+{n_replicas, 1}.
+{n_fragments, 100}.
+{n_subscribers, 500000}.
+{n_groups, 100}.
+{n_servers, 20}.
diff --git a/lib/mnesia/examples/bench/bench.config2 b/lib/mnesia/examples/bench/bench.config2
new file mode 100644
index 0000000000..f2f82f01fa
--- /dev/null
+++ b/lib/mnesia/examples/bench/bench.config2
@@ -0,0 +1,21 @@
+{cookie, bench_cookie}.
+{generator_profile, random}.
+{statistics_detail, debug}.
+{generator_warmup, 120000}.
+{generator_duration, 900000}.
+{generator_cooldown, 120000}.
+{generator_nodes,
+ [bench@wppgpb1
+ ]}.
+{use_binary_subscriber_key, false}.
+{n_generators_per_node, 2}.
+{write_lock_type, sticky_write}.
+{table_nodes,
+ [bench@wppgpb2
+ ]}.
+{storage_type, ram_copies}.
+{n_replicas, 1}.
+{n_fragments, 100}.
+{n_subscribers, 500000}.
+{n_groups, 100}.
+{n_servers, 20}.
diff --git a/lib/mnesia/examples/bench/bench.config3 b/lib/mnesia/examples/bench/bench.config3
new file mode 100644
index 0000000000..c96e4531fd
--- /dev/null
+++ b/lib/mnesia/examples/bench/bench.config3
@@ -0,0 +1,23 @@
+{cookie, bench_cookie}.
+{generator_profile, random}.
+{statistics_detail, debug}.
+{generator_warmup, 120000}.
+{generator_duration, 900000}.
+{generator_cooldown, 120000}.
+{generator_nodes,
+ [bench@wppgpb1,
+ bench@wppgpb2
+ ]}.
+{use_binary_subscriber_key, false}.
+{n_generators_per_node, 2}.
+{write_lock_type, sticky_write}.
+{table_nodes,
+ [bench@wppgpb3,
+ bench@wppgpb4
+ ]}.
+{storage_type, ram_copies}.
+{n_replicas, 2}.
+{n_fragments, 100}.
+{n_subscribers, 500000}.
+{n_groups, 100}.
+{n_servers, 20}.
diff --git a/lib/mnesia/examples/bench/bench.config4 b/lib/mnesia/examples/bench/bench.config4
new file mode 100644
index 0000000000..e7c0bf2151
--- /dev/null
+++ b/lib/mnesia/examples/bench/bench.config4
@@ -0,0 +1,23 @@
+{cookie, bench_cookie}.
+{generator_profile, random}.
+{statistics_detail, debug}.
+{generator_warmup, 120000}.
+{generator_duration, 900000}.
+{generator_cooldown, 120000}.
+{generator_nodes,
+ [bench@wppgpb1,
+ bench@wppgpb2
+ ]}.
+{use_binary_subscriber_key, false}.
+{n_generators_per_node, 2}.
+{write_lock_type, sticky_write}.
+{table_nodes,
+ [bench@wppgpb1,
+ bench@wppgpb2
+ ]}.
+{storage_type, ram_copies}.
+{n_replicas, 2}.
+{n_fragments, 100}.
+{n_subscribers, 500000}.
+{n_groups, 100}.
+{n_servers, 20}.
diff --git a/lib/mnesia/examples/bench/bench.config5 b/lib/mnesia/examples/bench/bench.config5
new file mode 100644
index 0000000000..623ec3fb73
--- /dev/null
+++ b/lib/mnesia/examples/bench/bench.config5
@@ -0,0 +1,27 @@
+{cookie, bench_cookie}.
+{generator_profile, random}.
+{statistics_detail, debug}.
+{generator_warmup, 120000}.
+{generator_duration, 900000}.
+{generator_cooldown, 120000}.
+{generator_nodes,
+ [bench@wppgpb1,
+ bench@wppgpb2,
+ bench@wppgpb3,
+ bench@wppgpb4
+ ]}.
+{use_binary_subscriber_key, false}.
+{n_generators_per_node, 2}.
+{write_lock_type, sticky_write}.
+{table_nodes,
+ [bench@wppgpb1,
+ bench@wppgpb2,
+ bench@wppgpb3,
+ bench@wppgpb4
+ ]}.
+{storage_type, ram_copies}.
+{n_replicas, 2}.
+{n_fragments, 100}.
+{n_subscribers, 500000}.
+{n_groups, 100}.
+{n_servers, 20}.
diff --git a/lib/mnesia/examples/bench/bench.config6 b/lib/mnesia/examples/bench/bench.config6
new file mode 100644
index 0000000000..f056890ff4
--- /dev/null
+++ b/lib/mnesia/examples/bench/bench.config6
@@ -0,0 +1,27 @@
+{cookie, bench_cookie}.
+{generator_profile, random}.
+{statistics_detail, debug}.
+{generator_warmup, 120000}.
+{generator_duration, 900000}.
+{generator_cooldown, 120000}.
+{generator_nodes,
+ [bench@wppgpb1,
+ bench@wppgpb2,
+ bench@wppgpb3,
+ bench@wppgpb4
+ ]}.
+{use_binary_subscriber_key, false}.
+{n_generators_per_node, 2}.
+{write_lock_type, sticky_write}.
+{table_nodes,
+ [bench@wppgpb5,
+ bench@wppgpb6,
+ bench@wppgpb7,
+ bench@wppgpb8
+ ]}.
+{storage_type, ram_copies}.
+{n_replicas, 2}.
+{n_fragments, 100}.
+{n_subscribers, 500000}.
+{n_groups, 100}.
+{n_servers, 20}.
diff --git a/lib/mnesia/examples/bench/bench.config7 b/lib/mnesia/examples/bench/bench.config7
new file mode 100644
index 0000000000..6a78570e71
--- /dev/null
+++ b/lib/mnesia/examples/bench/bench.config7
@@ -0,0 +1,35 @@
+{cookie, bench_cookie}.
+{generator_profile, random}.
+{statistics_detail, debug}.
+{generator_warmup, 120000}.
+{generator_duration, 900000}.
+{generator_cooldown, 120000}.
+{generator_nodes,
+ [bench@wppgpb1,
+ bench@wppgpb2,
+ bench@wppgpb3,
+ bench@wppgpb4,
+ bench@wppgpb5,
+ bench@wppgpb6,
+ bench@wppgpb7,
+ bench@wppgpb8
+ ]}.
+{use_binary_subscriber_key, false}.
+{n_generators_per_node, 2}.
+{write_lock_type, sticky_write}.
+{table_nodes,
+ [bench@wppgpb1,
+ bench@wppgpb2,
+ bench@wppgpb3,
+ bench@wppgpb4,
+ bench@wppgpb5,
+ bench@wppgpb6,
+ bench@wppgpb7,
+ bench@wppgpb8
+ ]}.
+{storage_type, ram_copies}.
+{n_replicas, 2}.
+{n_fragments, 100}.
+{n_subscribers, 500000}.
+{n_groups, 100}.
+{n_servers, 20}.
diff --git a/lib/mnesia/examples/bench/bench.erl b/lib/mnesia/examples/bench/bench.erl
new file mode 100644
index 0000000000..d191169296
--- /dev/null
+++ b/lib/mnesia/examples/bench/bench.erl
@@ -0,0 +1,327 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 2001-2009. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%% File : bench.hrl
+%%% Author : Hakan Mattsson <[email protected]>
+%%% Purpose : Implement the Canadian database benchmark (LMC/UU-01:025)
+%%% Created : 21 Jun 2001 by Hakan Mattsson <[email protected]>
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+-module(bench).
+-author('[email protected]').
+
+-include("bench.hrl").
+
+-export([
+ run/0, run/1,
+
+ start_all/0, start_all/1,
+ populate/0, populate/1,
+ generate/0, generate/1,
+
+ args_to_config/1, verify_config/2,
+ start/0, start/1,
+ stop_slave_nodes/1,
+ bind_schedulers/0
+ ]).
+
+bind_schedulers() ->
+ try
+ %% Avoid first core and bind schedules to the remaining ones
+ Topo = erlang:system_info(cpu_topology),
+ erlang:system_flag(cpu_topology,lists:reverse(Topo)),
+ %% N = erlang:system_info(schedulers),
+ %% erlang:system_flag(schedulers_online, lists:max([N - 1, 1])),
+ erlang:system_flag(scheduler_bind_type, default_bind),
+ timer:sleep(timer:seconds(1)), % Wait for Rickard
+ erlang:system_info(scheduler_bindings)
+ catch _:_ ->
+ %% Ancient systems
+ ignore
+ end.
+
+%% Run the benchmark:
+%%
+%% - Start all necessary Erlang nodes
+%% - Populate the database
+%% - Start the traffic generators
+%% - Calculate benchmark statistics
+%% - Stop the temporary Erlang nodes
+run() ->
+ FileName = "bench.config",
+ run([FileName]).
+
+run(Args) ->
+ C = args_to_config(Args),
+ SlaveNodes = start_all(C),
+ bench_populate:start(C),
+ Result = bench_generate:start(C),
+ stop_slave_nodes(SlaveNodes),
+ Result.
+
+%% Start Mnesia on the local node
+start() ->
+ FileName = 'bench.config',
+ start([FileName]).
+
+start(Args) ->
+ C = args_to_config(Args),
+ erlang:set_cookie(node(), C#config.cookie),
+ Nodes = [node() | (((C#config.table_nodes -- C#config.generator_nodes) ++
+ C#config.generator_nodes) -- [node()])],
+ Extra = [{extra_db_nodes, Nodes}],
+ ?d("Starting Mnesia on node ~p...", [node()]),
+ case mnesia:start(Extra) of
+ ok ->
+ Tables = mnesia:system_info(tables),
+ io:format(" ok.~n" , []),
+ ?d("Waiting for ~p tables...", [length(Tables)]),
+ wait(Tables);
+ {error, Reason} ->
+ io:format(" FAILED: ~p~n", [Reason]),
+ {error, Reason}
+ end.
+
+wait(Tables) ->
+ case mnesia:wait_for_tables(Tables, timer:seconds(10)) of
+ ok ->
+ io:format(" loaded.~n", []),
+ ok;
+ {timeout, More} ->
+ io:format(" ~p...", [length(More)]),
+ wait(More)
+ end.
+
+%% Populate the database
+populate() ->
+ FileName = 'bench.config',
+ populate([FileName]).
+
+populate(Args) ->
+ C = args_to_config(Args),
+ bench_populate:start(C).
+
+%% Start the traffic generators
+generate() ->
+ FileName = 'bench.config',
+ generate([FileName]).
+
+generate(Args) ->
+ C = args_to_config(Args),
+ bench_generate:start(C).
+
+start_all() ->
+ FileName = 'bench.config',
+ start_all([FileName]).
+
+start_all(Args) ->
+ C = args_to_config(Args),
+ Nodes = [node() | (((C#config.table_nodes -- C#config.generator_nodes) ++
+ C#config.generator_nodes) -- [node()])],
+ erlang:set_cookie(node(), C#config.cookie),
+ ?d("Starting Erlang nodes...~n", []),
+ ?d("~n", []),
+ SlaveNodes = do_start_all(Nodes, [], C#config.cookie),
+ Extra = [{extra_db_nodes, Nodes}],
+ ?d("~n", []),
+ ?d("Starting Mnesia...", []),
+ case rpc:multicall(Nodes, mnesia, start, [Extra]) of
+ {Replies, []} ->
+ case [R || R <- Replies, R /= ok] of
+ [] ->
+ io:format(" ok~n", []),
+ SlaveNodes;
+ Bad ->
+ io:format(" FAILED: ~p~n", [Bad]),
+ exit({mnesia_start, Bad})
+ end;
+ Bad ->
+ io:format(" FAILED: ~p~n", [Bad]),
+ exit({mnesia_start, Bad})
+ end.
+
+do_start_all([Node | Nodes], Acc, Cookie) when is_atom(Node) ->
+ case string:tokens(atom_to_list(Node), [$@]) of
+ [Name, Host] ->
+ Arg = lists:concat(["-setcookie ", Cookie]),
+ ?d(" ~s", [left(Node)]),
+ case slave:start_link(Host, Name, Arg) of
+ {ok, Node} ->
+ load_modules(Node),
+ rpc:call(Node, ?MODULE, bind_schedulers, []),
+ io:format(" started~n", []),
+ do_start_all(Nodes, [Node | Acc], Cookie);
+ {error, {already_running, Node}} ->
+ rpc:call(Node, ?MODULE, bind_schedulers, []),
+ io:format(" already started~n", []),
+ do_start_all(Nodes, Acc, Cookie);
+ {error, Reason} ->
+ io:format(" FAILED:~p~n", [Reason]),
+ stop_slave_nodes(Acc),
+ exit({slave_start_failed, Reason})
+ end;
+ _ ->
+ ?d(" ~s FAILED: "
+ "Not valid as node name. Must be 'name@host'.~n",
+ [left(Node)]),
+ stop_slave_nodes(Acc),
+ exit({bad_node_name, Node})
+ end;
+do_start_all([], StartedNodes, _Cookie) ->
+ StartedNodes.
+
+load_modules(Node) ->
+ Fun =
+ fun(Mod) ->
+ case code:get_object_code(Mod) of
+ {_Module, Bin, Fname} ->
+ rpc:call(Node, code,load_binary,[Mod,Fname,Bin]);
+ Other ->
+ Other
+ end
+ end,
+ lists:foreach(Fun, [bench, bench_generate, bench_populate, bench_trans]).
+
+stop_slave_nodes([]) ->
+ ok;
+stop_slave_nodes(Nodes) ->
+ ?d("~n", []),
+ ?d("Stopping Erlang nodes...~n", []),
+ ?d("~n", []),
+ do_stop_slave_nodes(Nodes).
+
+do_stop_slave_nodes([Node | Nodes]) ->
+ ?d(" ~s", [left(Node)]),
+ Res = slave:stop(Node),
+ io:format(" ~p~n", [Res]),
+ do_stop_slave_nodes(Nodes);
+do_stop_slave_nodes([]) ->
+ ok.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% The configuration
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+args_to_config(C) when is_record(C, config) ->
+ C;
+args_to_config(Args) when is_list(Args) ->
+ do_args_to_config(Args, []).
+
+do_args_to_config([{Key, Val} | Rest], Acc) when is_list(Acc) ->
+ do_args_to_config(Rest, Acc ++ [{Key, Val}]);
+do_args_to_config([FileName | Rest], Acc) when is_list(Acc) ->
+ io:nl(),
+ ?d("Reading configuration file ~p...", [FileName]),
+ case file:consult(FileName) of
+ {ok, Config} ->
+ io:format(" ok~n", []),
+ do_args_to_config(Rest, Acc ++ Config);
+ {error, Reason} ->
+ io:format(" FAILED: ~s~n",
+ [[lists:flatten(file:format_error( Reason))]]),
+ {error, {args_to_config, FileName, Reason}}
+ end;
+do_args_to_config([], Acc) when is_list(Acc) ->
+ verify_config(Acc, #config{}).
+
+verify_config([{Tag, Val} | T], C) ->
+ case Tag of
+ cookie when is_atom(Val) ->
+ verify_config(T, C#config{cookie = Val});
+ generator_profile when Val == random ->
+ verify_config(T, C#config{generator_profile = Val});
+ generator_profile when Val == t1 ->
+ verify_config(T, C#config{generator_profile = Val});
+ generator_profile when Val == t2 ->
+ verify_config(T, C#config{generator_profile = Val});
+ generator_profile when Val == t3 ->
+ verify_config(T, C#config{generator_profile = Val});
+ generator_profile when Val == t4 ->
+ verify_config(T, C#config{generator_profile = Val});
+ generator_profile when Val == t5 ->
+ verify_config(T, C#config{generator_profile = Val});
+ generator_profile when Val == ping ->
+ verify_config(T, C#config{generator_profile = Val});
+ generator_nodes when is_list(Val) ->
+ verify_config(T, C#config{generator_nodes = Val});
+ n_generators_per_node when is_integer(Val), Val >= 0 ->
+ verify_config(T, C#config{n_generators_per_node = Val});
+ generator_warmup when is_integer(Val), Val >= 0 ->
+ verify_config(T, C#config{generator_warmup = Val});
+ generator_duration when is_integer(Val), Val >= 0 ->
+ verify_config(T, C#config{generator_duration = Val});
+ generator_cooldown when is_integer(Val), Val >= 0 ->
+ verify_config(T, C#config{generator_cooldown = Val});
+ statistics_detail when Val == debug ->
+ verify_config(T, C#config{statistics_detail = Val});
+ statistics_detail when Val == debug2 ->
+ verify_config(T, C#config{statistics_detail = Val});
+ statistics_detail when Val == normal ->
+ verify_config(T, C#config{statistics_detail = Val});
+ table_nodes when is_list(Val) ->
+ verify_config(T, C#config{table_nodes = Val});
+ use_binary_subscriber_key when Val == true ->
+ verify_config(T, C#config{use_binary_subscriber_key = Val});
+ use_binary_subscriber_key when Val == false ->
+ verify_config(T, C#config{use_binary_subscriber_key = Val});
+ storage_type when is_atom(Val) ->
+ verify_config(T, C#config{storage_type = Val});
+ write_lock_type when Val == sticky_write ->
+ verify_config(T, C#config{write_lock_type = Val});
+ write_lock_type when Val == write ->
+ verify_config(T, C#config{write_lock_type = Val});
+ n_replicas when is_integer(Val), Val >= 0 ->
+ verify_config(T, C#config{n_replicas = Val});
+ n_fragments when is_integer(Val), Val >= 0 ->
+ verify_config(T, C#config{n_fragments = Val});
+ n_subscribers when is_integer(Val), Val >= 0 ->
+ verify_config(T, C#config{n_subscribers = Val});
+ n_groups when is_integer(Val), Val >= 0 ->
+ verify_config(T, C#config{n_groups = Val});
+ n_servers when is_integer(Val), Val >= 0 ->
+ verify_config(T, C#config{n_servers = Val});
+ always_try_nearest_node when Val == true; Val == false ->
+ verify_config(T, C#config{always_try_nearest_node = Val});
+ _ ->
+ ?e("Bad config value: ~p~n", [Tag, Val]),
+ exit({bad_config_value, {Tag, Val}})
+ end;
+verify_config([], C) ->
+ display_config(C),
+ C;
+verify_config(Config, _) ->
+ ?e("Bad config: ~p~n", [Config]),
+ exit({bad_config, Config}).
+
+display_config(C) when is_record(C, config) ->
+ ?d("~n", []),
+ ?d("Actual configuration...~n", []),
+ ?d("~n", []),
+ Fields = record_info(fields, config),
+ [config | Values] = tuple_to_list(C),
+ display_config(Fields, Values).
+
+display_config([F | Fields], [V | Values]) ->
+ ?d(" ~s ~p~n", [left(F), V]),
+ display_config(Fields, Values);
+display_config([], []) ->
+ ?d("~n", []),
+ ok.
+
+left(Term) ->
+ string:left(lists:flatten(io_lib:format("~p", [Term])), 27, $.).
diff --git a/lib/mnesia/examples/bench/bench.hrl b/lib/mnesia/examples/bench/bench.hrl
new file mode 100644
index 0000000000..7b0e0c1280
--- /dev/null
+++ b/lib/mnesia/examples/bench/bench.hrl
@@ -0,0 +1,107 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 2001-2009. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%% File : bench.hrl
+%%% Author : Hakan Mattsson <[email protected]>
+%%% Purpose : Define various database records
+%%% Created : 21 Jun 2001 by Hakan Mattsson <[email protected]>
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+-record(config,
+ {
+ generator_profile = random,
+ generator_warmup = timer:seconds(2),
+ generator_duration = timer:seconds(15),
+ generator_cooldown = timer:seconds(2),
+ generator_nodes = [node() | nodes()],
+ statistics_detail = debug,
+ n_generators_per_node = 1,
+ write_lock_type = sticky_write,
+ table_nodes = [node() | nodes()],
+ storage_type = ram_copies,
+ n_subscribers = 25000,
+ n_groups = 5,
+ n_servers = 1,
+ n_replicas = 1,
+ n_fragments = 100,
+ use_binary_subscriber_key = false,
+ always_try_nearest_node = false,
+ cookie = 'bench'
+ }).
+
+-record(subscriber,
+ {
+ subscriber_number, % string (10 chars)
+ subscriber_name, % string (32 chars)
+ group_id, % integer (uint32)
+ location, % integer (uint32)
+ active_sessions, % array of 32 booleans (32 bits)
+ changed_by, % string (25 chars)
+ changed_time, % string (25 chars)
+ suffix
+ }).
+
+-record(group,
+ {
+ group_id, % integer (uint32)
+ group_name, % string (32 chars)
+ allow_read, % array of 32 booleans (32 bits)
+ allow_insert, % array of 32 booleans (32 bits)
+ allow_delete % array of 32 booleans (32 bits)
+ }).
+
+-record(server,
+ {
+ server_key, % {ServerId, SubscriberNumberSuffix}
+ server_name, % string (32 chars)
+ no_of_read, % integer (uint32)
+ no_of_insert, % integer (uint32)
+ no_of_delete, % integer (uint32)
+ suffix
+ }).
+
+-record(session,
+ {
+ session_key, % {SubscriberNumber, ServerId}
+ session_details, % string (4000 chars)
+ suffix
+ }).
+
+-define(d(Format, Args),
+ io:format("~s" ++ Format, [string:left(lists:flatten(io_lib:format("~p(~p):", [?MODULE, ?LINE])), 30, $ ) | Args])).
+
+-define(e(Format, Args),
+ begin
+ ok = error_logger:format("~p(~p): " ++ Format, [?MODULE, ?LINE | Args]),
+ timer:sleep(1000)
+ end).
+
+-define(ERROR(M, F, A, R),
+ ?e("~w:~w~p\n\t ->~p\n", [M, F, A, R])).
+
+-define(APPLY(M, F, A),
+ fun() ->
+ case catch apply(M, F, A) of
+ ok -> {ok, ok};
+ {atomic, R} -> {ok, R};
+ {ok, R} -> {ok, R};
+ {aborted, R} -> ?ERROR(M, F, A, R);
+ {error, R} -> ?ERROR(M, F, A, R);
+ R -> ?ERROR(M, F, A, R)
+ end
+ end()).
diff --git a/lib/mnesia/examples/bench/bench.sh b/lib/mnesia/examples/bench/bench.sh
new file mode 100755
index 0000000000..1f8b5eec52
--- /dev/null
+++ b/lib/mnesia/examples/bench/bench.sh
@@ -0,0 +1,23 @@
+#!/bin/sh
+# Author : Hakan Mattsson <[email protected]>
+# Purpose : Simplify benchmark execution
+# Created : 21 Jun 2001 by Hakan Mattsson <[email protected]>
+######################################################################
+
+args="-pa .. -boot start_sasl -sasl errlog_type error -sname bench"
+set -x
+
+if [ $# -eq 0 ] ; then
+
+ erl $args
+
+else
+
+ while [ $# -gt 0 ]; do
+
+ erl $args -s bench run $1 -s erlang halt
+ shift
+ done
+
+fi
+
diff --git a/lib/mnesia/examples/bench/bench_generate.erl b/lib/mnesia/examples/bench/bench_generate.erl
new file mode 100644
index 0000000000..0fccc6c082
--- /dev/null
+++ b/lib/mnesia/examples/bench/bench_generate.erl
@@ -0,0 +1,684 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 2001-2009. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%% File : bench_generate.hrl
+%%% Author : Hakan Mattsson <[email protected]>
+%%% Purpose : Start request generators and collect statistics
+%%% Created : 21 Jun 2001 by Hakan Mattsson <[email protected]>
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+-module(bench_generate).
+-author('[email protected]').
+
+-include("bench.hrl").
+
+%% Public
+-export([start/1]).
+
+%% Internal
+-export([
+ monitor_init/2,
+ generator_init/2,
+ worker_init/1
+ ]).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% The traffic generator
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+%% -------------------------------------------------------------------
+%% Start request generators
+%% -------------------------------------------------------------------
+
+start(C) when is_record(C, config) ->
+ MonPid = spawn_link(?MODULE, monitor_init, [C, self()]),
+ receive
+ {'EXIT', MonPid, Reason} ->
+ exit(Reason);
+ {monitor_done, MonPid, Res} ->
+ Res
+ end.
+
+monitor_init(C, Parent) when is_record(C, config) ->
+ process_flag(trap_exit, true),
+ %% net_kernel:monitor_nodes(true), %% BUGBUG: Needed in order to re-start generators
+ Nodes = C#config.generator_nodes,
+ PerNode = C#config.n_generators_per_node,
+ Timer = C#config.generator_warmup,
+ ?d("~n", []),
+ ?d("Start ~p request generators each at ~p nodes...~n",
+ [PerNode, length(Nodes)]),
+ ?d("~n", []),
+ warmup_sticky(C),
+ ?d(" ~p seconds warmup...~n", [Timer div 1000]),
+ Alive = spawn_generators(C, Nodes, PerNode),
+ erlang:send_after(Timer, self(), warmup_done),
+ monitor_loop(C, Parent, Alive, []).
+
+spawn_generators(C, Nodes, PerNode) ->
+ [spawn_link(Node, ?MODULE, generator_init, [self(), C]) ||
+ Node <- Nodes,
+ _ <- lists:seq(1, PerNode)].
+
+warmup_sticky(C) ->
+ %% Select one node per fragment as master node
+ Tabs = [subscriber, session, server, suffix],
+ Fun = fun(S) ->
+ {[Node | _], _, Wlock} = nearest_node(S, transaction, C),
+ Stick = fun() -> [mnesia:read({T, S}, S, Wlock) || T <- Tabs] end,
+ Args = [transaction, Stick, [], mnesia_frag],
+ rpc:call(Node, mnesia, activity, Args)
+ end,
+ Suffixes = lists:seq(0, C#config.n_fragments - 1), % Assume even distrib.
+ lists:foreach(Fun, Suffixes).
+
+%% Main loop for benchmark monitor
+monitor_loop(C, Parent, Alive, Deceased) ->
+ receive
+ warmup_done ->
+ multicall(Alive, reset_statistics),
+ Timer = C#config.generator_duration,
+ ?d(" ~p seconds actual benchmarking...~n", [Timer div 1000]),
+ erlang:send_after(Timer, self(), measurement_done),
+ monitor_loop(C, Parent, Alive, Deceased);
+ measurement_done ->
+ Stats = multicall(Alive, get_statistics),
+ Timer = C#config.generator_cooldown,
+ ?d(" ~p seconds cooldown...~n", [Timer div 1000]),
+ erlang:send_after(Timer, self(), {cooldown_done, Stats}),
+ monitor_loop(C, Parent, Alive, Deceased);
+ {cooldown_done, Stats} ->
+ multicall(Alive, stop),
+ display_statistics(Stats, C),
+ Parent ! {monitor_done, self(), ok},
+ unlink(Parent),
+ exit(monitor_done);
+ {nodedown, _Node} ->
+ monitor_loop(C, Parent, Alive, Deceased);
+ {nodeup, Node} ->
+ NeedsBirth = [N || N <- Deceased, N == Node],
+ Born = spawn_generators(C, NeedsBirth, 1),
+ monitor_loop(C, Parent, Born ++ Alive, Deceased -- NeedsBirth);
+ {'EXIT', Pid, Reason} when Pid == Parent ->
+ exit(Reason);
+ {'EXIT', Pid, Reason} ->
+ case lists:member(Pid, Alive) of
+ true ->
+ ?d("Generator on node ~p died: ~p~n", [node(Pid), Reason]),
+ monitor_loop(C, Parent, Alive -- [Pid], [node(Pid) | Deceased]);
+ false ->
+ monitor_loop(C, Parent, Alive, Deceased)
+ end
+ end.
+
+%% Send message to a set of processes and wait for their replies
+multicall(Pids, Message) ->
+ Send =
+ fun(Pid) ->
+ Ref = erlang:monitor(process, Pid),
+ Pid ! {self(), Ref, Message},
+ {Pid, Ref}
+ end,
+ PidRefs = lists:map(Send, Pids),
+ Collect =
+ fun({Pid, Ref}) ->
+ receive
+ {'DOWN', Ref, process, Pid, Reason} ->
+ {Pid, {'EXIT', Reason}};
+ {Pid, Ref, Reply} ->
+ erlang:demonitor(Ref),
+ {Pid, Reply}
+ end
+ end,
+ lists:map(Collect, PidRefs).
+
+%% Initialize a traffic generator
+generator_init(Monitor, C) ->
+ process_flag(trap_exit, true),
+ Tables = mnesia:system_info(tables),
+ ok = mnesia:wait_for_tables(Tables, infinity),
+ {_Mega, Sec, Micro} = erlang:now(),
+ Uniq = lists:sum(binary_to_list(term_to_binary(make_ref()))),
+ random:seed(Uniq, Sec, Micro),
+ Counters = reset_counters(C, C#config.statistics_detail),
+ SessionTab = ets:new(bench_sessions, [public, {keypos, 1}]),
+ generator_loop(Monitor, C, SessionTab, Counters).
+
+%% Main loop for traffic generator
+generator_loop(Monitor, C, SessionTab, Counters) ->
+ receive
+ {ReplyTo, Ref, get_statistics} ->
+ Stats = get_counters(C, Counters),
+ ReplyTo ! {self(), Ref, Stats},
+ generator_loop(Monitor, C, SessionTab, Counters);
+ {ReplyTo, Ref, reset_statistics} ->
+ Stats = get_counters(C, Counters),
+ Counters2 = reset_counters(C, Counters),
+ ReplyTo ! {self(), Ref, Stats},
+ generator_loop(Monitor, C, SessionTab, Counters2);
+ {_ReplyTo, _Ref, stop} ->
+ exit(shutdown);
+ {'EXIT', Pid, Reason} when Pid == Monitor ->
+ exit(Reason);
+ {'EXIT', Pid, Reason} ->
+ Node = node(Pid),
+ ?d("Worker on node ~p(~p) died: ~p~n", [Node, node(), Reason]),
+ Key = {worker,Node},
+ case get(Key) of
+ undefined -> ignore;
+ Pid -> erase(Key);
+ _ -> ignore
+ end,
+ generator_loop(Monitor, C, SessionTab, Counters)
+ after 0 ->
+ {Name, {Nodes, Activity, Wlock}, Fun, CommitSessions} =
+ gen_trans(C, SessionTab),
+ Before = erlang:now(),
+ Res = call_worker(Nodes, Activity, Fun, Wlock, mnesia_frag),
+ After = erlang:now(),
+ Elapsed = elapsed(Before, After),
+ post_eval(Monitor, C, Elapsed, Res, Name, CommitSessions, SessionTab, Counters)
+ end.
+
+%% Perform a transaction on a node near the data
+call_worker([Node | _], Activity, Fun, Wlock, Mod) when Node == node() ->
+ {Node, catch mnesia:activity(Activity, Fun, [Wlock], Mod)};
+call_worker([Node | _] = Nodes, Activity, Fun, Wlock, Mod) ->
+ Key = {worker,Node},
+ case get(Key) of
+ Pid when is_pid(Pid) ->
+ Args = [Activity, Fun, [Wlock], Mod],
+ Pid ! {activity, self(), Args},
+ receive
+ {'EXIT', Pid, Reason} ->
+ ?d("Worker on node ~p(~p) died: ~p~n", [Node, node(), Reason]),
+ erase(Key),
+ retry_worker(Nodes, Activity, Fun, Wlock, Mod, {'EXIT', Reason});
+ {activity_result, Pid, Result} ->
+ case Result of
+ {'EXIT', {aborted, {not_local, _}}} ->
+ retry_worker(Nodes, Activity, Fun, Wlock, Mod, Result);
+ _ ->
+ {Node, Result}
+ end
+ end;
+ undefined ->
+ GenPid = self(),
+ Pid = spawn_link(Node, ?MODULE, worker_init, [GenPid]),
+ put(Key, Pid),
+ call_worker(Nodes, Activity, Fun, Wlock, Mod)
+ end.
+
+retry_worker([], _Activity, _Fun, _Wlock, _Mod, Reason) ->
+ {node(), Reason};
+retry_worker([BadNode | SpareNodes], Activity, Fun, Wlock, Mod, Reason) ->
+ Nodes = SpareNodes -- [BadNode],
+ case Nodes of
+ [] ->
+ {BadNode, Reason};
+ [_] ->
+ call_worker(Nodes, Activity, Fun, write, Mod);
+ _ ->
+ call_worker(Nodes, Activity, Fun, Wlock, Mod)
+ end.
+
+worker_init(Parent) ->
+ Tables = mnesia:system_info(tables),
+ ok = mnesia:wait_for_tables(Tables, infinity),
+ worker_loop(Parent).
+
+%% Main loop for remote workers
+worker_loop(Parent) ->
+ receive
+ {activity, Parent, [Activity, Fun, Extra, Mod]} ->
+ Result = (catch mnesia:activity(Activity, Fun, Extra, Mod)),
+ Parent ! {activity_result, self(), Result},
+ worker_loop(Parent)
+ end.
+
+
+elapsed({Before1, Before2, Before3}, {After1, After2, After3}) ->
+ After = After1 * 1000000000000 + After2 * 1000000 + After3,
+ Before = Before1 * 1000000000000 + Before2 * 1000000 + Before3,
+ After - Before.
+
+%% Lookup counters
+get_counters(_C, {table, Tab}) ->
+ ets:match_object(Tab, '_');
+get_counters(_C, {NM, NC, NA, NB}) ->
+ Trans = any,
+ Node = somewhere,
+ [{{Trans, n_micros, Node}, NM},
+ {{Trans, n_commits, Node}, NC},
+ {{Trans, n_aborts, Node}, NA},
+ {{Trans, n_branches_executed, Node}, NB}].
+
+% Clear all counters
+reset_counters(_C, normal) ->
+ {0, 0, 0, 0};
+reset_counters(C, {_, _, _, _}) ->
+ reset_counters(C, normal);
+reset_counters(C, debug) ->
+ CounterTab = ets:new(bench_pending, [public, {keypos, 1}]),
+ reset_counters(C, {table, CounterTab});
+reset_counters(C, debug2) ->
+ CounterTab = ets:new(bench_pending, [public, {keypos, 1}]),
+ reset_counters(C, {table, CounterTab});
+reset_counters(C, {table, Tab} = Counters) ->
+ Names = [n_micros, n_commits, n_aborts, n_branches_executed],
+ Nodes = C#config.generator_nodes ++ C#config.table_nodes,
+ TransTypes = [t1, t2, t3, t4, t5, ping],
+ [ets:insert(Tab, {{Trans, Name, Node}, 0}) || Name <- Names,
+ Node <- Nodes,
+ Trans <- TransTypes],
+ Counters.
+
+%% Determine the outcome of a transaction and increment the counters
+post_eval(Monitor, C, Elapsed, {Node, Res}, Name, CommitSessions, SessionTab, {table, Tab} = Counters) ->
+ case Res of
+ {do_commit, BranchExecuted, _} ->
+ incr(Tab, {Name, n_micros, Node}, Elapsed),
+ incr(Tab, {Name, n_commits, Node}, 1),
+ case BranchExecuted of
+ true ->
+ incr(Tab, {Name, n_branches_executed, Node}, 1),
+ commit_session(CommitSessions),
+ generator_loop(Monitor, C, SessionTab, Counters);
+ false ->
+ generator_loop(Monitor, C, SessionTab, Counters)
+ end;
+ {'EXIT', {aborted, {do_rollback, BranchExecuted, _}}} ->
+ incr(Tab, {Name, n_micros, Node}, Elapsed),
+ incr(Tab, {Name, n_aborts, Node}, 1),
+ case BranchExecuted of
+ true ->
+ incr(Tab, {Name, n_branches_executed, Node}, 1),
+ generator_loop(Monitor, C, SessionTab, Counters);
+ false ->
+ generator_loop(Monitor, C, SessionTab, Counters)
+ end;
+ _ ->
+ ?d("Failed(~p): ~p~n", [Node, Res]),
+ incr(Tab, {Name, n_micros, Node}, Elapsed),
+ incr(Tab, {Name, n_aborts, Node}, 1),
+ generator_loop(Monitor, C, SessionTab, Counters)
+ end;
+post_eval(Monitor, C, Elapsed, {_Node, Res}, _Name, CommitSessions, SessionTab, {NM, NC, NA, NB}) ->
+ case Res of
+ {do_commit, BranchExecuted, _} ->
+ case BranchExecuted of
+ true ->
+ commit_session(CommitSessions),
+ generator_loop(Monitor, C, SessionTab, {NM + Elapsed, NC + 1, NA, NB + 1});
+ false ->
+ generator_loop(Monitor, C, SessionTab, {NM + Elapsed, NC + 1, NA, NB})
+ end;
+ {'EXIT', {aborted, {do_rollback, BranchExecuted, _}}} ->
+ case BranchExecuted of
+ true ->
+ generator_loop(Monitor, C, SessionTab, {NM + Elapsed, NC, NA + 1, NB + 1});
+ false ->
+ generator_loop(Monitor, C, SessionTab, {NM + Elapsed, NC, NA + 1, NB})
+ end;
+ _ ->
+ ?d("Failed: ~p~n", [Res]),
+ generator_loop(Monitor, C, SessionTab, {NM + Elapsed, NC, NA + 1, NB})
+ end.
+
+incr(Tab, Counter, Incr) ->
+ ets:update_counter(Tab, Counter, Incr).
+
+commit_session(no_fun) ->
+ ignore;
+commit_session(Fun) when is_function(Fun, 0) ->
+ Fun().
+
+%% Randlomly choose a transaction type according to benchmar spec
+gen_trans(C, SessionTab) when C#config.generator_profile == random ->
+ case random:uniform(100) of
+ Rand when Rand > 0, Rand =< 25 -> gen_t1(C, SessionTab);
+ Rand when Rand > 25, Rand =< 50 -> gen_t2(C, SessionTab);
+ Rand when Rand > 50, Rand =< 70 -> gen_t3(C, SessionTab);
+ Rand when Rand > 70, Rand =< 85 -> gen_t4(C, SessionTab);
+ Rand when Rand > 85, Rand =< 100 -> gen_t5(C, SessionTab)
+ end;
+gen_trans(C, SessionTab) ->
+ case C#config.generator_profile of
+ t1 -> gen_t1(C, SessionTab);
+ t2 -> gen_t2(C, SessionTab);
+ t3 -> gen_t3(C, SessionTab);
+ t4 -> gen_t4(C, SessionTab);
+ t5 -> gen_t5(C, SessionTab);
+ ping -> gen_ping(C, SessionTab)
+ end.
+
+gen_t1(C, _SessionTab) ->
+ SubscrId = random:uniform(C#config.n_subscribers) - 1,
+ SubscrKey = bench_trans:number_to_key(SubscrId, C),
+ Location = 4711,
+ ChangedBy = <<4711:(8*25)>>,
+ ChangedTime = <<4711:(8*25)>>,
+ {t1,
+ nearest_node(SubscrId, transaction, C),
+ fun(Wlock) -> bench_trans:update_current_location(Wlock, SubscrKey, Location, ChangedBy, ChangedTime) end,
+ no_fun
+ }.
+
+gen_t2(C, _SessionTab) ->
+ SubscrId = random:uniform(C#config.n_subscribers) - 1,
+ SubscrKey = bench_trans:number_to_key(SubscrId, C),
+ {t2,
+ nearest_node(SubscrId, sync_dirty, C),
+ %%nearest_node(SubscrId, transaction, C),
+ fun(Wlock) -> bench_trans:read_current_location(Wlock, SubscrKey) end,
+ no_fun
+ }.
+
+gen_t3(C, SessionTab) ->
+ case ets:first(SessionTab) of
+ '$end_of_table' ->
+ %% This generator does not have any session,
+ %% try reading someone elses session details
+ SubscrId = random:uniform(C#config.n_subscribers) - 1,
+ SubscrKey = bench_trans:number_to_key(SubscrId, C),
+ ServerId = random:uniform(C#config.n_servers) - 1,
+ ServerBit = 1 bsl ServerId,
+ {t3,
+ nearest_node(SubscrId, transaction, C),
+ fun(Wlock) -> bench_trans:read_session_details(Wlock, SubscrKey, ServerBit, ServerId) end,
+ no_fun
+ };
+ {SubscrId, SubscrKey, ServerId} ->
+ %% This generator do have a session,
+ %% read its session details
+ ServerBit = 1 bsl ServerId,
+ {t3,
+ nearest_node(SubscrId, transaction, C),
+ fun(Wlock) -> bench_trans:read_session_details(Wlock, SubscrKey, ServerBit, ServerId) end,
+ no_fun
+ }
+ end.
+
+gen_t4(C, SessionTab) ->
+ %% This generator may already have sessions,
+ %% create a new session and hope that no other
+ %% generator already has occupied it
+ SubscrId = random:uniform(C#config.n_subscribers) - 1,
+ SubscrKey = bench_trans:number_to_key(SubscrId, C),
+ ServerId = random:uniform(C#config.n_servers) - 1,
+ ServerBit = 1 bsl ServerId,
+ Details = <<4711:(8*2000)>>,
+ DoRollback = (random:uniform(100) =< 2),
+ Insert = fun() -> ets:insert(SessionTab, {{SubscrId, SubscrKey, ServerId}, self()}) end,
+ {t4,
+ nearest_node(SubscrId, transaction, C),
+ fun(Wlock) -> bench_trans:create_session_to_server(Wlock, SubscrKey, ServerBit, ServerId, Details, DoRollback) end,
+ Insert
+ }.
+
+gen_t5(C, SessionTab) ->
+ case ets:first(SessionTab) of
+ '$end_of_table' ->
+ %% This generator does not have any session,
+ %% try to delete someone elses session details
+ SubscrId = random:uniform(C#config.n_subscribers) - 1,
+ SubscrKey = bench_trans:number_to_key(SubscrId, C),
+ ServerId = random:uniform(C#config.n_servers) - 1,
+ ServerBit = 1 bsl ServerId,
+ DoRollback = (random:uniform(100) =< 2),
+ {t5,
+ nearest_node(SubscrId, transaction, C),
+ fun(Wlock) -> bench_trans:delete_session_from_server(Wlock, SubscrKey, ServerBit, ServerId, DoRollback) end,
+ no_fun
+ };
+ {SubscrId, SubscrKey, ServerId} ->
+ %% This generator do have at least one session,
+ %% delete it.
+ ServerBit = 1 bsl ServerId,
+ DoRollback = (random:uniform(100) =< 2),
+ Delete = fun() -> ets:delete(SessionTab, {SubscrId, SubscrKey, ServerId}) end,
+ {t5,
+ nearest_node(SubscrId, transaction, C),
+ fun(Wlock) -> bench_trans:delete_session_from_server(Wlock, SubscrKey, ServerBit, ServerId, DoRollback) end,
+ Delete
+ }
+ end.
+
+gen_ping(C, _SessionTab) ->
+ SubscrId = random:uniform(C#config.n_subscribers) - 1,
+ {ping,
+ nearest_node(SubscrId, transaction, C),
+ fun(_Wlock) -> {do_commit, true, []} end,
+ no_fun
+ }.
+
+%% Select a node as near as the subscriber data as possible
+nearest_node(SubscrId, Activity, C) ->
+ Suffix = bench_trans:number_to_suffix(SubscrId),
+ case mnesia_frag:table_info(t, s, {suffix, Suffix}, where_to_write) of
+ [] ->
+ {[node()], Activity, write};
+ [Node] ->
+ {[Node], Activity, write};
+ Nodes ->
+ Wlock = C#config.write_lock_type,
+ if
+ C#config.always_try_nearest_node; Wlock =:= write ->
+ case lists:member(node(), Nodes) of
+ true ->
+ {[node() | Nodes], Activity, Wlock};
+ false ->
+ Node = pick_node(Suffix, C, Nodes),
+ {[Node | Nodes], Activity, Wlock}
+ end;
+ Wlock == sticky_write ->
+ Node = pick_node(Suffix, C, Nodes),
+ {[Node | Nodes], Activity, Wlock}
+ end
+ end.
+
+pick_node(Suffix, C, Nodes) ->
+ Ordered = lists:sort(Nodes),
+ NumberOfActive = length(Ordered),
+ PoolSize = length(C#config.table_nodes),
+ Suffix2 =
+ case PoolSize rem NumberOfActive of
+ 0 -> Suffix div (PoolSize div NumberOfActive);
+ _ -> Suffix
+ end,
+ N = (Suffix2 rem NumberOfActive) + 1,
+ lists:nth(N, Ordered).
+
+display_statistics(Stats, C) ->
+ GoodStats = [{node(GenPid), GenStats} || {GenPid, GenStats} <- Stats,
+ is_list(GenStats)],
+ FlatStats = [{Type, Name, EvalNode, GenNode, Count} ||
+ {GenNode, GenStats} <- GoodStats,
+ {{Type, Name, EvalNode}, Count} <- GenStats],
+ TotalStats = calc_stats_per_tag(lists:keysort(2, FlatStats), 2, []),
+ {value, {n_aborts, 0, NA, 0, 0}} =
+ lists:keysearch(n_aborts, 1, TotalStats ++ [{n_aborts, 0, 0, 0, 0}]),
+ {value, {n_commits, NC, 0, 0, 0}} =
+ lists:keysearch(n_commits, 1, TotalStats ++ [{n_commits, 0, 0, 0, 0}]),
+ {value, {n_branches_executed, 0, 0, _NB, 0}} =
+ lists:keysearch(n_branches_executed, 1, TotalStats ++ [{n_branches_executed, 0, 0, 0, 0}]),
+ {value, {n_micros, 0, 0, 0, AccMicros}} =
+ lists:keysearch(n_micros, 1, TotalStats ++ [{n_micros, 0, 0, 0, 0}]),
+ NT = NA + NC,
+ NG = length(GoodStats),
+ NTN = length(C#config.table_nodes),
+ WallMicros = C#config.generator_duration * 1000 * NG,
+ Overhead = (catch (WallMicros - AccMicros) / WallMicros),
+ ?d("~n", []),
+ ?d("Benchmark result...~n", []),
+ ?d("~n", []),
+ ?d(" ~p transactions per second (TPS).~n", [catch ((NT * 1000000 * NG) div AccMicros)]),
+ ?d(" ~p TPS per table node.~n", [catch ((NT * 1000000 * NG) div (AccMicros * NTN))]),
+ ?d(" ~p micro seconds in average per transaction, including latency.~n",
+ [catch (AccMicros div NT)]),
+ ?d(" ~p transactions. ~f% generator overhead.~n", [NT, Overhead * 100]),
+
+ TypeStats = calc_stats_per_tag(lists:keysort(1, FlatStats), 1, []),
+ EvalNodeStats = calc_stats_per_tag(lists:keysort(3, FlatStats), 3, []),
+ GenNodeStats = calc_stats_per_tag(lists:keysort(4, FlatStats), 4, []),
+ if
+ C#config.statistics_detail == normal ->
+ ignore;
+ true ->
+ ?d("~n", []),
+ ?d("Statistics per transaction type...~n", []),
+ ?d("~n", []),
+ display_type_stats(" ", TypeStats, NT, AccMicros),
+
+ ?d("~n", []),
+ ?d("Transaction statistics per table node...~n", []),
+ ?d("~n", []),
+ display_calc_stats(" ", EvalNodeStats, NT, AccMicros),
+
+ ?d("~n", []),
+ ?d("Transaction statistics per generator node...~n", []),
+ ?d("~n", []),
+ display_calc_stats(" ", GenNodeStats, NT, AccMicros)
+ end,
+ if
+ C#config.statistics_detail /= debug2 ->
+ ignore;
+ true ->
+ io:format("~n", []),
+ io:format("------ Test Results ------~n", []),
+ io:format("Length : ~p sec~n", [C#config.generator_duration div 1000]),
+ Host = lists:nth(2, string:tokens(atom_to_list(node()), [$@])),
+ io:format("Processor : ~s~n", [Host]),
+ io:format("Number of Proc: ~p~n", [NG]),
+ io:format("~n", []),
+ display_trans_stats(" ", TypeStats, NT, AccMicros, NG),
+ io:format("~n", []),
+ io:format(" Overall Statistics~n", []),
+ io:format(" Transactions: ~p~n", [NT]),
+ io:format(" Inner : ~p TPS~n", [catch ((NT * 1000000 * NG) div AccMicros)]),
+ io:format(" Outer : ~p TPS~n", [catch ((NT * 1000000 * NG) div WallMicros)]),
+ io:format("~n", [])
+ end.
+
+
+display_calc_stats(Prefix, [{_Tag, 0, 0, 0, 0} | Rest], NT, Micros) ->
+ display_calc_stats(Prefix, Rest, NT, Micros);
+display_calc_stats(Prefix, [{Tag, NC, NA, _NB, NM} | Rest], NT, Micros) ->
+ ?d("~s~s n=~s%\ttime=~s%~n",
+ [Prefix, left(Tag), percent(NC + NA, NT), percent(NM, Micros)]),
+ display_calc_stats(Prefix, Rest, NT, Micros);
+display_calc_stats(_, [], _, _) ->
+ ok.
+
+display_type_stats(Prefix, [{_Tag, 0, 0, 0, 0} | Rest], NT, Micros) ->
+ display_type_stats(Prefix, Rest, NT, Micros);
+display_type_stats(Prefix, [{Tag, NC, NA, NB, NM} | Rest], NT, Micros) ->
+ ?d("~s~s n=~s%\ttime=~s%\tavg micros=~p~n",
+ [
+ Prefix,
+ left(Tag),
+ percent(NC + NA, NT),
+ percent(NM, Micros),
+ catch (NM div (NC + NA))
+ ]),
+ case NA /= 0 of
+ true -> ?d("~s ~s% aborted~n", [Prefix, percent(NA, NC + NA)]);
+ false -> ignore
+ end,
+ case NB /= 0 of
+ true -> ?d("~s ~s% branches executed~n", [Prefix, percent(NB, NC + NA)]);
+ false -> ignore
+ end,
+ display_type_stats(Prefix, Rest, NT, Micros);
+display_type_stats(_, [], _, _) ->
+ ok.
+
+left(Term) ->
+ string:left(lists:flatten(io_lib:format("~p", [Term])), 27, $.).
+
+percent(_Part, 0) -> "infinity";
+percent(Part, Total) -> io_lib:format("~8.4f", [(Part * 100) / Total]).
+
+calc_stats_per_tag([], _Pos, Acc) ->
+ lists:sort(Acc);
+calc_stats_per_tag([Tuple | _] = FlatStats, Pos, Acc) when size(Tuple) == 5 ->
+ Tag = element(Pos, Tuple),
+ do_calc_stats_per_tag(FlatStats, Pos, {Tag, 0, 0, 0, 0}, Acc).
+
+do_calc_stats_per_tag([Tuple | Rest], Pos, {Tag, NC, NA, NB, NM}, Acc)
+ when element(Pos, Tuple) == Tag ->
+ Val = element(5, Tuple),
+ case element(2, Tuple) of
+ n_commits ->
+ do_calc_stats_per_tag(Rest, Pos, {Tag, NC + Val, NA, NB, NM}, Acc);
+ n_aborts ->
+ do_calc_stats_per_tag(Rest, Pos, {Tag, NC, NA + Val, NB, NM}, Acc);
+ n_branches_executed ->
+ do_calc_stats_per_tag(Rest, Pos, {Tag, NC, NA, NB + Val, NM}, Acc);
+ n_micros ->
+ do_calc_stats_per_tag(Rest, Pos, {Tag, NC, NA, NB, NM + Val}, Acc)
+ end;
+do_calc_stats_per_tag(GenStats, Pos, CalcStats, Acc) ->
+ calc_stats_per_tag(GenStats, Pos, [CalcStats | Acc]).
+
+display_trans_stats(Prefix, [{_Tag, 0, 0, 0, 0} | Rest], NT, Micros, NG) ->
+ display_trans_stats(Prefix, Rest, NT, Micros, NG);
+display_trans_stats(Prefix, [{Tag, NC, NA, NB, NM} | Rest], NT, Micros, NG) ->
+ Common =
+ fun(Name) ->
+ Sec = NM / (1000000 * NG),
+ io:format(" ~s: ~p (~p%) Time: ~p sec TPS = ~p~n",
+ [Name,
+ NC + NA,
+ round(((NC + NA) * 100) / NT),
+ round(Sec),
+ round((NC + NA) / Sec)])
+ end,
+ Branch =
+ fun() ->
+ io:format(" Branches Executed: ~p (~p%)~n",
+ [NB, round((NB * 100) / (NC + NA))])
+ end,
+ Rollback =
+ fun() ->
+ io:format(" Rollback Executed: ~p (~p%)~n",
+ [NA, round((NA * 100) / (NC + NA))])
+ end,
+ case Tag of
+ t1 ->
+ Common("T1");
+ t2 ->
+ Common("T2");
+ t3 ->
+ Common("T3"),
+ Branch();
+ t4 ->
+ Common("T4"),
+ Branch(),
+ Rollback();
+ t5 ->
+ Common("T5"),
+ Branch(),
+ Rollback();
+ _ ->
+ Common(io_lib:format("~p", [Tag]))
+ end,
+ display_trans_stats(Prefix, Rest, NT, Micros, NG);
+display_trans_stats(_, [], _, _, _) ->
+ ok.
+
diff --git a/lib/mnesia/examples/bench/bench_populate.erl b/lib/mnesia/examples/bench/bench_populate.erl
new file mode 100644
index 0000000000..f82ee210b6
--- /dev/null
+++ b/lib/mnesia/examples/bench/bench_populate.erl
@@ -0,0 +1,200 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 2001-2009. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%% File : bench_populate.hrl
+%%% Author : Hakan Mattsson <[email protected]>
+%%% Purpose : Populate the database
+%%% Created : 21 Jun 2001 by Hakan Mattsson <[email protected]>
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+-module(bench_populate).
+-author('[email protected]').
+
+-include("bench.hrl").
+
+%% Public
+-export([start/1]).
+
+%% Populate the database
+start(C) when is_record(C, config) ->
+ ?d("~n",[]),
+ ?d("Populate database...~n",[]),
+ ?d("~n",[]),
+ create_tables(C),
+ Populate =
+ fun() ->
+ populate_subscriber(write, C),
+ populate_group(write, C),
+ populate_server(write, C)
+ end,
+ mnesia:activity(sync_dirty, Populate, [], mnesia_frag).
+
+%% -------------------------------------------------------------------
+%% Create the tables
+%% -------------------------------------------------------------------
+
+create_tables(C) ->
+ ?d(" Delete old tables...~n",[]),
+ mnesia:delete_table(group),
+ mnesia:delete_table(subscriber),
+ mnesia:delete_table(session),
+ mnesia:delete_table(server),
+ mnesia:delete_table(suffix),
+
+ ?d(" Creating ~p tables, with ~p replicas distributed over ~p nodes...~n",
+ [C#config.storage_type,
+ C#config.n_replicas,
+ length(C#config.table_nodes)]),
+
+ %% Create group table
+ GroupDef = [{C#config.storage_type, C#config.table_nodes},
+ {attributes, record_info(fields, group)}],
+ ?APPLY(mnesia, create_table, [group, GroupDef]),
+
+ %% Create suffix table
+ FragStorage =
+ case C#config.storage_type of
+ ram_copies -> n_ram_copies;
+ disc_copies -> n_disc_copies;
+ disc_only_copies -> n_disc_only_copies
+ end,
+ FragProps =
+ [{FragStorage, C#config.n_replicas},
+ {node_pool, C#config.table_nodes},
+ {n_fragments, C#config.n_fragments}],
+ SuffixDef = [{frag_properties, FragProps}],
+ ?APPLY(mnesia, create_table, [suffix, SuffixDef]),
+
+ %% Create subscriber table
+ SubscriberDef =
+ [{frag_properties, [{foreign_key, {suffix, #subscriber.suffix}} | FragProps]},
+ {attributes, record_info(fields, subscriber)}],
+ ?APPLY(mnesia, create_table, [subscriber, SubscriberDef]),
+
+ %% Create session table
+ SessionDef =
+ [{frag_properties, [{foreign_key, {suffix, #session.suffix}} | FragProps]},
+ {attributes, record_info(fields, session)}],
+ ?APPLY(mnesia, create_table, [session, SessionDef]),
+
+ %% Create server table
+ ServerDef =
+ [{frag_properties, [{foreign_key, {suffix, #server.suffix}} | FragProps]},
+ {attributes, record_info(fields, server)}],
+ ?APPLY(mnesia, create_table, [server, ServerDef]).
+
+%% -------------------------------------------------------------------
+%% Populate the subscriber table
+%% -------------------------------------------------------------------
+
+populate_subscriber(Wlock, C) ->
+ random:seed(),
+ N = C#config.n_subscribers,
+ ?d(" Populate ~p subscribers...", [N]),
+ do_populate_subscriber(Wlock, N - 1, C).
+
+do_populate_subscriber(Wlock, Id, C) when Id >= 0 ->
+ Suffix = bench_trans:number_to_suffix(Id),
+ SubscrId = bench_trans:number_to_key(Id, C),
+ Name = list_to_binary([random:uniform(26) + $A - 1]),
+ GroupId = random:uniform(C#config.n_groups) - 1,
+ Subscr = #subscriber{subscriber_number = SubscrId,
+ subscriber_name = Name,
+ group_id = GroupId,
+ location = 0,
+ active_sessions = 0,
+ changed_by = <<"">>,
+ changed_time = <<"">>,
+ suffix = Suffix},
+ ?APPLY(mnesia, write, [subscriber, Subscr, Wlock]),
+ do_populate_subscriber(Wlock, Id - 1, C);
+do_populate_subscriber(_Wlock, _, _) ->
+ io:format(" totally ~p bytes~n",
+ [mnesia:table_info(subscriber, memory) * 4]),
+ ok.
+
+%% -------------------------------------------------------------------
+%% Populate the group table
+%% -------------------------------------------------------------------
+
+populate_group(Wlock, C) ->
+ random:seed(),
+ N = C#config.n_groups,
+ ?d(" Populate ~p groups...", [N]),
+ do_populate_group(Wlock, N - 1, C).
+
+do_populate_group(Wlock, Id, C) when Id >= 0 ->
+ Name = list_to_binary(["-group ", integer_to_list(Id), "-"]),
+ Allow = init_allow(C),
+ Group = #group{group_id = Id,
+ group_name = Name,
+ allow_read = Allow,
+ allow_insert = Allow,
+ allow_delete = Allow},
+ ?APPLY(mnesia, write, [group, Group, Wlock]),
+ do_populate_group(Wlock, Id - 1, C);
+do_populate_group(_Wlock, _, _) ->
+ io:format(" totally ~p bytes~n",
+ [mnesia:table_info(group, memory) * 4]),
+ ok.
+
+init_allow(C) ->
+ do_init_allow(0, C#config.n_servers - 1).
+
+do_init_allow(Allow, NS) when NS >= 0 ->
+ case random:uniform(100) < (90 + 1) of
+ true ->
+ ServerBit = 1 bsl NS,
+ do_init_allow(Allow bor ServerBit, NS - 1);
+ false ->
+ do_init_allow(Allow, NS - 1)
+ end;
+do_init_allow(Allow, _) ->
+ Allow.
+
+%% -------------------------------------------------------------------
+%% Populate the server table
+%% -------------------------------------------------------------------
+
+populate_server(Wlock, C) ->
+ random:seed(),
+ N = C#config.n_servers,
+ ?d(" Populate ~p servers with 100 records each...", [N]),
+ do_populate_server(Wlock, N - 1).
+
+do_populate_server(Wlock, Id) when Id >= 0 ->
+ populate_server_suffixes(Wlock, Id, 99),
+ do_populate_server(Wlock, Id - 1);
+do_populate_server(_Wlock, _) ->
+ io:format(" totally ~p bytes~n",
+ [mnesia:table_info(server, memory) * 4]),
+ ok.
+
+populate_server_suffixes(Wlock, Id, Suffix) when Suffix >= 0 ->
+ Name = list_to_binary(["-server ", integer_to_list(Id), "-"]),
+ Server = #server{server_key = {Id, Suffix},
+ server_name = Name,
+ no_of_read = 0,
+ no_of_insert = 0,
+ no_of_delete = 0,
+ suffix = Suffix},
+ ?APPLY(mnesia, write, [server, Server, Wlock]),
+ populate_server_suffixes(Wlock, Id, Suffix - 1);
+populate_server_suffixes(_Wlock, _, _) ->
+ ok.
+
diff --git a/lib/mnesia/examples/bench/bench_trans.erl b/lib/mnesia/examples/bench/bench_trans.erl
new file mode 100644
index 0000000000..945715daae
--- /dev/null
+++ b/lib/mnesia/examples/bench/bench_trans.erl
@@ -0,0 +1,184 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 2001-2009. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%% File : bench_trans.hrl
+%%% Author : Hakan Mattsson <[email protected]>
+%%% Purpose : Implement the transactions in Canadian database benchmark (LMC/UU-01:025)
+%%% Created : 21 Jun 2001 by Hakan Mattsson <[email protected]>
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+-module(bench_trans).
+-author('[email protected]').
+
+-include("bench.hrl").
+
+-export([
+ update_current_location/5,
+ read_current_location/2,
+ read_session_details/4,
+ create_session_to_server/6,
+ delete_session_from_server/5,
+ number_to_suffix/1,
+ number_to_key/2
+ ]).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% The transactions
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+%% -------------------------------------------------------------------
+%% T1
+%% -------------------------------------------------------------------
+
+update_current_location(Wlock, SubscrId, Location, ChangedBy, ChangedTime) ->
+ Suffix = number_to_suffix(SubscrId),
+ [Subscr] = mnesia:read({subscriber, Suffix}, SubscrId, Wlock),
+ Subscr2 = Subscr#subscriber{location = Location,
+ changed_by = ChangedBy,
+ changed_time = ChangedTime},
+ mnesia:write(subscriber, Subscr2, Wlock),
+ {do_commit, false, [ok]}.
+
+%% -------------------------------------------------------------------
+%% T2
+%% -------------------------------------------------------------------
+
+read_current_location(_Wlock, SubscrId) ->
+ Suffix = number_to_suffix(SubscrId),
+ [Subscr] = mnesia:read({subscriber, Suffix}, SubscrId, read),
+
+ Name = Subscr#subscriber.subscriber_name,
+ Location = Subscr#subscriber.location,
+ ChangedBy = Subscr#subscriber.changed_by,
+ ChangedTime = Subscr#subscriber.changed_time,
+ {do_commit, false, [Name, Location, ChangedBy, ChangedTime]}.
+
+%% -------------------------------------------------------------------
+%% T3
+%% -------------------------------------------------------------------
+
+read_session_details(Wlock, SubscrId, ServerBit, ServerId) ->
+ Suffix = number_to_suffix(SubscrId),
+ [Subscr] = mnesia:read({subscriber, Suffix}, SubscrId, read),
+ %%[Group] = mnesia:read(group, Subscr#subscriber.group_id, read),
+ [Group] = mnesia:dirty_read(group, Subscr#subscriber.group_id),
+
+ IsAllowed = ((Group#group.allow_read band ServerBit) == ServerBit),
+ IsActive = ((Subscr#subscriber.active_sessions band ServerBit) == ServerBit),
+ ExecuteBranch = (IsAllowed and IsActive),
+
+ case ExecuteBranch of
+ true ->
+ SessionKey = {SubscrId, ServerId},
+ [Session] = mnesia:read({session, Suffix}, SessionKey, read),
+
+ ServerKey = {ServerId, Suffix},
+ [Server] = mnesia:read({server, Suffix}, ServerKey, Wlock),
+ Server2 = Server#server{no_of_read = Server#server.no_of_read + 1},
+ mnesia:write(server, Server2, Wlock),
+ {do_commit, ExecuteBranch, [Session#session.session_details]};
+ false ->
+ {do_commit, ExecuteBranch, []}
+ end.
+
+%% -------------------------------------------------------------------
+%% T4
+%% -------------------------------------------------------------------
+
+create_session_to_server(Wlock, SubscrId, ServerBit, ServerId, Details, DoRollback) ->
+ Suffix = number_to_suffix(SubscrId),
+ [Subscr] = mnesia:read({subscriber, Suffix}, SubscrId, Wlock),
+ %%[Group] = mnesia:read(group, Subscr#subscriber.group_id, read),
+ [Group] = mnesia:dirty_read(group, Subscr#subscriber.group_id),
+
+ IsAllowed = ((Group#group.allow_insert band ServerBit) == ServerBit),
+ IsInactive = ((Subscr#subscriber.active_sessions band ServerBit) == 0),
+ ExecuteBranch = (IsAllowed and IsInactive),
+ case ExecuteBranch of
+ true ->
+ SessionKey = {SubscrId, ServerId},
+ Session = #session{session_key = SessionKey,
+ session_details = Details,
+ suffix = Suffix},
+ mnesia:write(session, Session, Wlock),
+ Active = (Subscr#subscriber.active_sessions bor ServerBit),
+ Subscr2 = Subscr#subscriber{active_sessions = Active},
+ mnesia:write(subscriber, Subscr2, Wlock),
+
+ ServerKey = {ServerId, Suffix},
+ [Server] = mnesia:read({server, Suffix}, ServerKey, Wlock),
+ Server2 = Server#server{no_of_insert = Server#server.no_of_insert + 1},
+ mnesia:write(server, Server2, Wlock);
+ false ->
+ ignore
+ end,
+ case DoRollback of
+ true ->
+ mnesia:abort({do_rollback, ExecuteBranch, []});
+ false ->
+ {do_commit, ExecuteBranch, []}
+ end.
+
+%% -------------------------------------------------------------------
+%% T5
+%% -------------------------------------------------------------------
+
+delete_session_from_server(Wlock, SubscrId, ServerBit, ServerId, DoRollback) ->
+ Suffix = number_to_suffix(SubscrId),
+ [Subscr] = mnesia:read({subscriber, Suffix}, SubscrId, Wlock),
+ %%[Group] = mnesia:read(group, Subscr#subscriber.group_id, read),
+ [Group] = mnesia:dirty_read(group, Subscr#subscriber.group_id),
+
+ IsAllowed = ((Group#group.allow_delete band ServerBit) == ServerBit),
+ IsActive = ((Subscr#subscriber.active_sessions band ServerBit) == ServerBit),
+ ExecuteBranch = (IsAllowed and IsActive),
+ case ExecuteBranch of
+ true ->
+ SessionKey = {SubscrId, ServerId},
+ mnesia:delete({session, Suffix}, SessionKey, Wlock),
+ Active = (Subscr#subscriber.active_sessions bxor ServerBit),
+ Subscr2 = Subscr#subscriber{active_sessions = Active},
+ mnesia:write(subscriber, Subscr2, Wlock),
+
+ ServerKey = {ServerId, Suffix},
+ [Server] = mnesia:read({server, Suffix}, ServerKey, Wlock),
+ Server2 = Server#server{no_of_delete = Server#server.no_of_delete + 1},
+ mnesia:write(server, Server2, Wlock);
+ false ->
+ ignore
+ end,
+ case DoRollback of
+ true ->
+ mnesia:abort({do_rollback, ExecuteBranch, []});
+ false ->
+ {do_commit, ExecuteBranch, []}
+ end.
+
+number_to_suffix(SubscrId) when is_integer(SubscrId) ->
+ SubscrId rem 100;
+number_to_suffix(<<_:8/binary, TimesTen:8/integer, TimesOne:8/integer>>) ->
+ ((TimesTen - $0) * 10) + (TimesOne - $0).
+
+number_to_key(Id, C) when is_integer(Id) ->
+ case C#config.use_binary_subscriber_key of
+ true ->
+ list_to_binary(string:right(integer_to_list(Id), 10, $0));
+ false ->
+ Id
+ end.
+
diff --git a/lib/mnesia/examples/bup.erl b/lib/mnesia/examples/bup.erl
new file mode 120000
index 0000000000..a25a785996
--- /dev/null
+++ b/lib/mnesia/examples/bup.erl
@@ -0,0 +1 @@
+../doc/src/bup.erl \ No newline at end of file
diff --git a/lib/mnesia/examples/company.erl b/lib/mnesia/examples/company.erl
new file mode 120000
index 0000000000..4b0c0b6bcc
--- /dev/null
+++ b/lib/mnesia/examples/company.erl
@@ -0,0 +1 @@
+../doc/src/company.erl \ No newline at end of file
diff --git a/lib/mnesia/examples/company.hrl b/lib/mnesia/examples/company.hrl
new file mode 120000
index 0000000000..95014d9781
--- /dev/null
+++ b/lib/mnesia/examples/company.hrl
@@ -0,0 +1 @@
+../doc/src/company.hrl \ No newline at end of file
diff --git a/lib/mnesia/examples/company_o.erl b/lib/mnesia/examples/company_o.erl
new file mode 120000
index 0000000000..f4a40b768a
--- /dev/null
+++ b/lib/mnesia/examples/company_o.erl
@@ -0,0 +1 @@
+../doc/src/company_o.erl \ No newline at end of file
diff --git a/lib/mnesia/examples/company_o.hrl b/lib/mnesia/examples/company_o.hrl
new file mode 120000
index 0000000000..bfa57e37ea
--- /dev/null
+++ b/lib/mnesia/examples/company_o.hrl
@@ -0,0 +1 @@
+../doc/src/company_o.hrl \ No newline at end of file
diff --git a/lib/mnesia/examples/mnesia_meter.erl b/lib/mnesia/examples/mnesia_meter.erl
new file mode 100644
index 0000000000..ea74d8691b
--- /dev/null
+++ b/lib/mnesia/examples/mnesia_meter.erl
@@ -0,0 +1,465 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 1997-2009. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%
+
+%%
+%% Getting started:
+%%
+%% 1 Start one or more distributed Erlang nodes
+%% 2a Connect the nodes, e.g. with net_adm:ping/1
+%% 3a Run mnesia_meter:go()
+%% 3b Run mnesia_meter:go(ReplicaType)
+%% 3c Run mnesia_meter:go(ReplicaType, Nodes)
+
+-module(mnesia_meter).
+-author('[email protected]').
+-export([
+ go/0,
+ go/1,
+ go/2,
+ repeat_meter/2
+ ]).
+
+-record(person, {name, %% atomic, unique key
+ data, %% compound structure
+ married_to, %% name of partner or undefined
+ children}). %% list of children
+
+-record(meter, {desc, init, meter, micros}).
+
+-record(result, {desc, list}).
+
+-define(TIMES, 1000).
+
+go() ->
+ go(ram_copies).
+
+go(ReplicaType) ->
+ go(ReplicaType, [node() | nodes()]).
+
+go(ReplicaType, Nodes) ->
+ {ok, FunOverhead} = tc(fun(_) -> {atomic, ok} end, ?TIMES),
+ Size = size(term_to_binary(#person{})),
+ io:format("A fun apply costs ~p micro seconds. Record size is ~p bytes.~n",
+ [FunOverhead, Size]),
+ Res = go(ReplicaType, Nodes, [], FunOverhead, []),
+ NewRes = rearrange(Res, []),
+ DescHeader = lists:flatten(io_lib:format("~w on ~w", [ReplicaType, Nodes])),
+ ItemHeader = lists:seq(1, length(Nodes)),
+ Header = #result{desc = DescHeader, list = ItemHeader},
+ SepList = ['--------' || _ <- Nodes],
+ Separator = #result{desc = "", list = SepList},
+ display([Separator, Header, Separator | NewRes] ++ [Separator]).
+
+go(_ReplicaType, [], _Config, _FunOverhead, Acc) ->
+ Acc;
+go(ReplicaType, [H | T], OldNodes, FunOverhead, Acc) ->
+ Nodes = [H | OldNodes],
+ Config = [{ReplicaType, Nodes}],
+ Res = run(Nodes, Config, FunOverhead),
+ go(ReplicaType, T, Nodes, FunOverhead, [{ReplicaType, Nodes, Res} | Acc]).
+
+rearrange([{_ReplicaType, _Nodes, Meters} | Tail], Acc) ->
+ Acc2 = [add_meter(M, Acc) || M <- Meters],
+ rearrange(Tail, Acc2);
+rearrange([], Acc) ->
+ Acc.
+
+add_meter(M, Acc) ->
+ case lists:keysearch(M#meter.desc, #result.desc, Acc) of
+ {value, R} ->
+ R#result{list = [M#meter.micros | R#result.list]};
+ false ->
+ #result{desc = M#meter.desc, list = [M#meter.micros]}
+ end.
+
+display(Res) ->
+ MaxDesc = lists:max([length(R#result.desc) || R <- Res]),
+ Format = lists:concat(["! ~-", MaxDesc, "s"]),
+ display(Res, Format, MaxDesc).
+
+display([R | Res], Format, MaxDesc) ->
+ case R#result.desc of
+ "" ->
+ io:format(Format, [lists:duplicate(MaxDesc, "-")]);
+ Desc ->
+ io:format(Format, [Desc])
+ end,
+ display_items(R#result.list, R#result.desc),
+ io:format(" !~n", []),
+ display(Res, Format, MaxDesc);
+display([], _Format, _MaxDesc) ->
+ ok.
+
+display_items([_Item | Items], "") ->
+ io:format(" ! ~s", [lists:duplicate(10, $-)]),
+ display_items(Items, "");
+display_items([Micros | Items], Desc) ->
+ io:format(" ! ~10w", [Micros]),
+ display_items(Items, Desc);
+display_items([], _Desc) ->
+ ok.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+meters() ->
+ [#meter{desc = "transaction update two records with read and write",
+ init = fun write_records/2,
+ meter = fun update_records/1},
+ #meter{desc = "transaction update two records with wread and write",
+ init = fun write_records/2,
+ meter = fun w_update_records/1},
+ #meter{desc = "transaction update two records with read and s_write",
+ init = fun s_write_records/2,
+ meter = fun s_update_records/1},
+ #meter{desc = "sync_dirty update two records with read and write",
+ init = fun sync_dirty_write_records/2,
+ meter = fun sync_dirty_update_records/1},
+ #meter{desc = "async_dirty update two records with read and write",
+ init = fun async_dirty_write_records/2,
+ meter = fun async_dirty_update_records/1},
+ #meter{desc = "plain fun update two records with dirty_read and dirty_write",
+ init = fun dirty_write_records/2,
+ meter = fun dirty_update_records/1},
+ #meter{desc = "ets update two records with read and write (local only)",
+ init = fun ets_opt_write_records/2,
+ meter = fun ets_update_records/1},
+ #meter{desc = "plain fun update two records with ets:lookup and ets:insert (local only)",
+ init = fun bif_opt_write_records/2,
+ meter = fun bif_update_records/1},
+ #meter{desc = "plain fun update two records with dets:lookup and dets:insert (local only)",
+ init = fun dets_opt_write_records/2,
+ meter = fun dets_update_records/1},
+
+ #meter{desc = "transaction write two records with write",
+ init = fun write_records/2,
+ meter = fun(X) -> write_records(X, 0-X) end},
+ #meter{desc = "transaction write two records with s_write",
+ init = fun s_write_records/2,
+ meter = fun(X) -> s_write_records(X, 0-X) end},
+ #meter{desc = "sync_dirty write two records with write",
+ init = fun sync_dirty_write_records/2,
+ meter = fun(X) -> sync_dirty_write_records(X, 0-X) end},
+ #meter{desc = "async_dirty write two records with write",
+ init = fun async_dirty_write_records/2,
+ meter = fun(X) -> async_dirty_write_records(X, 0-X) end},
+ #meter{desc = "plain fun write two records with dirty_write",
+ init = fun dirty_write_records/2,
+ meter = fun(X) -> dirty_write_records(X, 0-X) end},
+ #meter{desc = "ets write two records with write (local only)",
+ init = fun ets_opt_write_records/2,
+ meter = fun(X) -> ets_write_records(X, 0-X) end},
+ #meter{desc = "plain fun write two records with ets:insert (local only)",
+ init = fun bif_opt_write_records/2,
+ meter = fun(X) -> bif_write_records(X, 0-X) end},
+ #meter{desc = "plain fun write two records with dets:insert (local only)",
+ init = fun dets_opt_write_records/2,
+ meter = fun(X) -> dets_write_records(X, 0-X) end},
+
+ #meter{desc = "transaction read two records with read",
+ init = fun write_records/2,
+ meter = fun(X) -> read_records(X, 0-X) end},
+ #meter{desc = "sync_dirty read two records with read",
+ init = fun sync_dirty_write_records/2,
+ meter = fun(X) -> sync_dirty_read_records(X, 0-X) end},
+ #meter{desc = "async_dirty read two records with read",
+ init = fun async_dirty_write_records/2,
+ meter = fun(X) -> async_dirty_read_records(X, 0-X) end},
+ #meter{desc = "plain fun read two records with dirty_read",
+ init = fun dirty_write_records/2,
+ meter = fun(X) -> dirty_read_records(X, 0-X) end},
+ #meter{desc = "ets read two records with read",
+ init = fun ets_opt_write_records/2,
+ meter = fun(X) -> ets_read_records(X, 0-X) end},
+ #meter{desc = "plain fun read two records with ets:lookup",
+ init = fun bif_opt_write_records/2,
+ meter = fun(X) -> bif_read_records(X, 0-X) end},
+ #meter{desc = "plain fun read two records with dets:lookup",
+ init = fun dets_opt_write_records/2,
+ meter = fun(X) -> dets_read_records(X, 0-X) end}
+ ].
+
+update_fun(Name) ->
+ fun() ->
+ case mnesia:read({person, Name}) of
+ [] ->
+ mnesia:abort(no_such_person);
+ [Pers] ->
+ [Partner] = mnesia:read({person, Pers#person.married_to}),
+ mnesia:write(Pers#person{married_to = undefined}),
+ mnesia:write(Partner#person{married_to = undefined})
+ end
+ end.
+
+update_records(Name) ->
+ mnesia:transaction(update_fun(Name)).
+
+sync_dirty_update_records(Name) ->
+ {atomic, mnesia:sync_dirty(update_fun(Name))}.
+
+async_dirty_update_records(Name) ->
+ {atomic, mnesia:async_dirty(update_fun(Name))}.
+
+ets_update_records(Name) ->
+ {atomic, mnesia:ets(update_fun(Name))}.
+
+w_update_records(Name) ->
+ F = fun() ->
+ case mnesia:wread({person, Name}) of
+ [] ->
+ mnesia:abort(no_such_person);
+ [Pers] ->
+ [Partner] = mnesia:wread({person, Pers#person.married_to}),
+ mnesia:write(Pers#person{married_to = undefined}),
+ mnesia:write(Partner#person{married_to = undefined})
+ end
+ end,
+ mnesia:transaction(F).
+
+s_update_records(Name) ->
+ F = fun() ->
+ case mnesia:read({person, Name}) of
+ [] ->
+ mnesia:abort(no_such_person);
+ [Pers] ->
+ [Partner] = mnesia:read({person, Pers#person.married_to}),
+ mnesia:s_write(Pers#person{married_to = undefined}),
+ mnesia:s_write(Partner#person{married_to = undefined})
+ end
+ end,
+ mnesia:transaction(F).
+
+dirty_update_records(Name) ->
+ case mnesia:dirty_read({person, Name}) of
+ [] ->
+ mnesia:abort(no_such_person);
+ [Pers] ->
+ [Partner] = mnesia:dirty_read({person, Pers#person.married_to}),
+ mnesia:dirty_write(Pers#person{married_to = undefined}),
+ mnesia:dirty_write(Partner#person{married_to = undefined})
+ end,
+ {atomic, ok}.
+
+bif_update_records(Name) ->
+ case ets:lookup(person, Name) of
+ [] ->
+ mnesia:abort(no_such_person);
+ [Pers] ->
+ [Partner] = ets:lookup(person, Pers#person.married_to),
+ ets:insert(person, Pers#person{married_to = undefined}),
+ ets:insert(person, Partner#person{married_to = undefined})
+ end,
+ {atomic, ok}.
+
+dets_update_records(Name) ->
+ case dets:lookup(person, Name) of
+ [] ->
+ mnesia:abort(no_such_person);
+ [Pers] ->
+ [Partner] = dets:lookup(person, Pers#person.married_to),
+ dets:insert(person, Pers#person{married_to = undefined}),
+ dets:insert(person, Partner#person{married_to = undefined})
+ end,
+ {atomic, ok}.
+
+write_records_fun(Pers, Partner) ->
+ fun() ->
+ P = #person{children = [ulla, bella]},
+ mnesia:write(P#person{name = Pers, married_to = Partner}),
+ mnesia:write(P#person{name = Partner, married_to = Pers})
+ end.
+
+write_records(Pers, Partner) ->
+ mnesia:transaction(write_records_fun(Pers, Partner)).
+
+sync_dirty_write_records(Pers, Partner) ->
+ {atomic, mnesia:sync_dirty(write_records_fun(Pers, Partner))}.
+
+async_dirty_write_records(Pers, Partner) ->
+ {atomic, mnesia:async_dirty(write_records_fun(Pers, Partner))}.
+
+ets_write_records(Pers, Partner) ->
+ {atomic, mnesia:ets(write_records_fun(Pers, Partner))}.
+
+s_write_records(Pers, Partner) ->
+ F = fun() ->
+ P = #person{children = [ulla, bella]},
+ mnesia:s_write(P#person{name = Pers, married_to = Partner}),
+ mnesia:s_write(P#person{name = Partner, married_to = Pers})
+ end,
+ mnesia:transaction(F).
+
+dirty_write_records(Pers, Partner) ->
+ P = #person{children = [ulla, bella]},
+ mnesia:dirty_write(P#person{name = Pers, married_to = Partner}),
+ mnesia:dirty_write(P#person{name = Partner, married_to = Pers}),
+ {atomic, ok}.
+
+ets_opt_write_records(Pers, Partner) ->
+ case mnesia:table_info(person, where_to_commit) of
+ [{N, ram_copies}] when N == node() ->
+ ets_write_records(Pers, Partner);
+ _ ->
+ throw(skipped)
+ end.
+
+bif_opt_write_records(Pers, Partner) ->
+ case mnesia:table_info(person, where_to_commit) of
+ [{N, ram_copies}] when N == node() ->
+ bif_write_records(Pers, Partner);
+ _ ->
+ throw(skipped)
+ end.
+
+bif_write_records(Pers, Partner) ->
+ P = #person{children = [ulla, bella]},
+ ets:insert(person, P#person{name = Pers, married_to = Partner}),
+ ets:insert(person, P#person{name = Partner, married_to = Pers}),
+ {atomic, ok}.
+
+dets_opt_write_records(Pers, Partner) ->
+ case mnesia:table_info(person, where_to_commit) of
+ [{N, disc_only_copies}] when N == node() ->
+ dets_write_records(Pers, Partner);
+ _ ->
+ throw(skipped)
+ end.
+
+dets_write_records(Pers, Partner) ->
+ P = #person{children = [ulla, bella]},
+ dets:insert(person, P#person{name = Pers, married_to = Partner}),
+ dets:insert(person, P#person{name = Partner, married_to = Pers}),
+ {atomic, ok}.
+
+read_records_fun(Pers, Partner) ->
+ fun() ->
+ case {mnesia:read({person, Pers}),
+ mnesia:read({person, Partner})} of
+ {[_], [_]} ->
+ ok;
+ _ ->
+ mnesia:abort(no_such_person)
+ end
+ end.
+
+read_records(Pers, Partner) ->
+ mnesia:transaction(read_records_fun(Pers, Partner)).
+
+sync_dirty_read_records(Pers, Partner) ->
+ {atomic, mnesia:sync_dirty(read_records_fun(Pers, Partner))}.
+
+async_dirty_read_records(Pers, Partner) ->
+ {atomic, mnesia:async_dirty(read_records_fun(Pers, Partner))}.
+
+ets_read_records(Pers, Partner) ->
+ {atomic, mnesia:ets(read_records_fun(Pers, Partner))}.
+
+dirty_read_records(Pers, Partner) ->
+ case {mnesia:dirty_read({person, Pers}),
+ mnesia:dirty_read({person, Partner})} of
+ {[_], [_]} ->
+ {atomic, ok};
+ _ ->
+ mnesia:abort(no_such_person)
+ end.
+
+bif_read_records(Pers, Partner) ->
+ case {ets:lookup(person, Pers),
+ ets:lookup(person, Partner)} of
+ {[_], [_]} ->
+ {atomic, ok};
+ _ ->
+ mnesia:abort(no_such_person)
+ end.
+
+dets_read_records(Pers, Partner) ->
+ case {dets:lookup(person, Pers),
+ dets:lookup(person, Partner)} of
+ {[_], [_]} ->
+ {atomic, ok};
+ _ ->
+ mnesia:abort(no_such_person)
+ end.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+run(Nodes, Config, FunOverhead) ->
+ Meters = meters(),
+ io:format("Run ~w meters with table config: ~w~n", [length(Meters), Config]),
+ rpc:multicall(Nodes, mnesia, lkill, []),
+ start(Nodes, Config),
+ Res = [run_meter(Data, Nodes, FunOverhead) || Data <- Meters],
+ stop(Nodes),
+ Res.
+
+run_meter(M, Nodes, FunOverhead) when record(M, meter) ->
+ io:format(".", []),
+ case catch init_records(M#meter.init, ?TIMES) of
+ {atomic, ok} ->
+ rpc:multicall(Nodes, mnesia, dump_log, []),
+ case tc(M#meter.meter, ?TIMES) of
+ {ok, Micros} ->
+ M#meter{micros = lists:max([0, Micros - FunOverhead])};
+ {error, Reason} ->
+ M#meter{micros = Reason}
+ end;
+ Res ->
+ M#meter{micros = Res}
+ end.
+
+start(Nodes, Config) ->
+ mnesia:delete_schema(Nodes),
+ ok = mnesia:create_schema(Nodes),
+ Args = [[{dump_log_write_threshold, ?TIMES div 2},
+ {dump_log_time_threshold, timer:hours(10)}]],
+ lists:foreach(fun(Node) -> rpc:call(Node, mnesia, start, Args) end, Nodes),
+ Attrs = record_info(fields, person),
+ TabDef = [{attributes, Attrs} | Config],
+ {atomic, _} = mnesia:create_table(person, TabDef).
+
+stop(Nodes) ->
+ rpc:multicall(Nodes, mnesia, stop, []).
+
+%% Generate some dummy persons
+init_records(_Fun, 0) ->
+ {atomic, ok};
+init_records(Fun, Times) ->
+ {atomic, ok} = Fun(Times, 0 - Times),
+ init_records(Fun, Times - 1).
+
+tc(Fun, Times) ->
+ case catch timer:tc(?MODULE, repeat_meter, [Fun, Times]) of
+ {Micros, ok} ->
+ {ok, Micros div Times};
+ {_Micros, {error, Reason}} ->
+ {error, Reason};
+ {'EXIT', Reason} ->
+ {error, Reason}
+ end.
+
+%% The meter must return {atomic, ok}
+repeat_meter(Meter, Times) ->
+ repeat_meter(Meter, {atomic, ok}, Times).
+
+repeat_meter(_, {atomic, ok}, 0) ->
+ ok;
+repeat_meter(Meter, {atomic, _Result}, Times) when Times > 0 ->
+ repeat_meter(Meter, Meter(Times), Times - 1);
+repeat_meter(_Meter, Reason, _Times) ->
+ {error, Reason}.
+
diff --git a/lib/mnesia/examples/mnesia_tpcb.erl b/lib/mnesia/examples/mnesia_tpcb.erl
new file mode 100644
index 0000000000..903c53a21c
--- /dev/null
+++ b/lib/mnesia/examples/mnesia_tpcb.erl
@@ -0,0 +1,1268 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 1996-2009. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%
+
+%%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%
+%% MODULE
+%%
+%% mnesia_tpcb - TPC-B benchmarking of Mnesia
+%%
+%% DESCRIPTION
+%%
+%% The metrics used in the TPC-B benchmark are throughput as measured
+%% in transactions per second (TPS). The benchmark uses a single,
+%% simple update-intensive transaction to load the database system.
+%% The single transaction type provides a simple, repeatable
+%% unit of work, and is designed to exercise the basic components of
+%% a database system.
+%%
+%% The definition of the TPC-B states lots of detailed rules and
+%% conditions that must be fullfilled, e.g. how the ACID (atomicity,
+%% consistency, isolation and durability) properties are verified,
+%% how the random numbers must be distributed, minimum sizes of
+%% the different types of records, minimum duration of the benchmark,
+%% formulas to calculate prices (dollars per tps), disclosure issues
+%% etc. Please, see http://www.tpc.org/ about the nitty gritty details.
+%%
+%% The TPC-B benchmark is stated in terms of a hypothetical bank. The
+%% bank has one or more branches. Each branch has multiple tellers. The
+%% bank has many customers, each with an account. The database represents
+%% the cash position of each entity (branch, teller and account) and a
+%% history of recent transactions run by the bank. The transaction
+%% represents the work done when a customer makes a deposit or a
+%% withdrawal against his account. The transaction is performed by a
+%% teller at some branch.
+%%
+%% Each process that performs TPC-B transactions is called a driver.
+%% Drivers generates teller_id, account_id and delta amount of
+%% money randomly. An account, a teller and a branch are read, their
+%% balances are adjusted and a history record is created. The driver
+%% measures the time for 3 reads, 3 writes and 1 create.
+%%
+%% GETTING STARTED
+%%
+%% Generate tables and run with default configuration:
+%%
+%% mnesia_tpcb:start().
+%%
+%% A little bit more advanced;
+%%
+%% spawn(mnesia_tpcb, start, [[[{n_drivers_per_node, 8}, {stop_after, infinity}]]),
+%% mnesia_tpcb:stop().
+%%
+%% Really advanced;
+%%
+%% mnesia_tpcb:init(([{n_branches, 8}, {replica_type, disc_only_copies}]),
+%% mnesia_tpcb:run(([{n_drivers_per_node, 8}]),
+%% mnesia_tpcb:run(([{n_drivers_per_node, 64}]).
+%%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+-module(mnesia_tpcb).
+-author('[email protected]').
+
+-export([
+ config/2,
+ count_balance/0,
+ driver_init/2,
+ init/1,
+ reporter_init/2,
+ run/1,
+ start/0,
+ start/1,
+ start/2,
+ stop/0,
+ real_trans/5,
+ verify_tabs/0,
+ reply_gen_branch/3,
+ frag_add_delta/7,
+
+ conflict_test/1,
+ dist_test/1,
+ replica_test/1,
+ sticky_replica_test/1,
+ remote_test/1,
+ remote_frag2_test/1
+ ]).
+
+-define(SECOND, 1000000).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%% Account record, total size must be at least 100 bytes
+
+-define(ACCOUNT_FILLER,
+ {123456789012345678901234567890123456789012345678901234567890,
+ 123456789012345678901234567890123456789012345678901234567890,
+ 123456789012345678901234567890123456789012345678901234}).
+
+-record(account,
+ {
+ id = 0, % Unique account id
+ branch_id = 0, % Branch where the account is held
+ balance = 0, % Account balance
+ filler = ?ACCOUNT_FILLER % Gap filler to ensure size >= 100 bytes
+ }).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%% Branch record, total size must be at least 100 bytes
+
+-define(BRANCH_FILLER,
+ {123456789012345678901234567890123456789012345678901234567890,
+ 123456789012345678901234567890123456789012345678901234567890,
+ 123456789012345678901234567890123456789012345678901234567890}).
+
+-record(branch,
+ {
+ id = 0, % Unique branch id
+ balance = 0, % Total balance of whole branch
+ filler = ?BRANCH_FILLER % Gap filler to ensure size >= 100 bytes
+ }).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%% Teller record, total size must be at least 100 bytes
+
+-define(TELLER_FILLER,
+ {123456789012345678901234567890123456789012345678901234567890,
+ 123456789012345678901234567890123456789012345678901234567890,
+ 1234567890123456789012345678901234567890123456789012345678}).
+
+-record(teller,
+ {
+ id = 0, % Unique teller id
+ branch_id = 0, % Branch where the teller is located
+ balance = 0, % Teller balance
+ filler = ?TELLER_FILLER % Gap filler to ensure size >= 100 bytes
+ }).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%% History record, total size must be at least 50 bytes
+
+-define(HISTORY_FILLER, 1234567890).
+
+-record(history,
+ {
+ history_id = {0, 0}, % {DriverId, DriverLocalHistoryid}
+ time_stamp = now(), % Time point during active transaction
+ branch_id = 0, % Branch associated with teller
+ teller_id = 0, % Teller invlolved in transaction
+ account_id = 0, % Account updated by transaction
+ amount = 0, % Amount (delta) specified by transaction
+ filler = ?HISTORY_FILLER % Gap filler to ensure size >= 50 bytes
+ }).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+-record(tab_config,
+ {
+ db_nodes = [node()],
+ n_replicas = 1, % Ignored for non-fragmented tables
+ replica_nodes = [node()],
+ replica_type = ram_copies,
+ use_running_mnesia = false,
+ n_fragments = 0,
+ n_branches = 1,
+ n_tellers_per_branch = 10, % Must be 10
+ n_accounts_per_branch = 100000, % Must be 100000
+ branch_filler = ?BRANCH_FILLER,
+ account_filler = ?ACCOUNT_FILLER,
+ teller_filler = ?TELLER_FILLER
+ }).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+-record(run_config,
+ {
+ driver_nodes = [node()],
+ n_drivers_per_node = 1,
+ use_running_mnesia = false,
+ stop_after = timer:minutes(15), % Minimum 15 min
+ report_interval = timer:minutes(1),
+ use_sticky_locks = false,
+ spawn_near_branch = false,
+ activity_type = transaction,
+ reuse_history_id = false
+ }).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+-record(time,
+ {
+ n_trans = 0,
+ min_n = 0,
+ max_n = 0,
+ acc_time = 0,
+ max_time = 0
+ }).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+-record(driver_state,
+ {
+ driver_id,
+ driver_node,
+ seed,
+ n_local_branches,
+ local_branches,
+ tab_config,
+ run_config,
+ history_id,
+ time = #time{},
+ acc_time = #time{},
+ reuse_history_id
+ }).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+-record(reporter_state,
+ {
+ driver_pids,
+ starter_pid,
+ n_iters = 0,
+ prev_tps = 0,
+ curr = #time{},
+ acc = #time{},
+ init_micros,
+ prev_micros,
+ run_config
+ }).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% One driver on each node, table not replicated
+
+config(frag_test, ReplicaType) ->
+ Remote = nodes(),
+ Local = node(),
+ Nodes = [Local | Remote],
+ [
+ {n_branches, length(Nodes)},
+ {n_fragments, length(Nodes)},
+ {replica_nodes, Nodes},
+ {db_nodes, Nodes},
+ {driver_nodes, Nodes},
+ {n_accounts_per_branch, 100},
+ {replica_type, ReplicaType},
+ {stop_after, timer:minutes(1)},
+ {report_interval, timer:seconds(10)},
+ {reuse_history_id, true}
+ ];
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% One driver on each node, table replicated to two nodes.
+
+config(frag2_test, ReplicaType) ->
+ Remote = nodes(),
+ Local = node(),
+ Nodes = [Local | Remote],
+ [
+ {n_branches, length(Nodes)},
+ {n_fragments, length(Nodes)},
+ {n_replicas, 2},
+ {replica_nodes, Nodes},
+ {db_nodes, Nodes},
+ {driver_nodes, Nodes},
+ {n_accounts_per_branch, 100},
+ {replica_type, ReplicaType},
+ {stop_after, timer:minutes(1)},
+ {report_interval, timer:seconds(10)},
+ {reuse_history_id, true}
+ ];
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% One driver on this node, table replicated to all nodes.
+
+config(replica_test, ReplicaType) ->
+ Remote = nodes(),
+ Local = node(),
+ Nodes = [Local | Remote],
+ [
+ {db_nodes, Nodes},
+ {driver_nodes, [Local]},
+ {replica_nodes, Nodes},
+ {n_accounts_per_branch, 100},
+ {replica_type, ReplicaType},
+ {stop_after, timer:minutes(1)},
+ {report_interval, timer:seconds(10)},
+ {reuse_history_id, true}
+ ];
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% One driver on this node, table replicated to all nodes.
+
+config(sticky_replica_test, ReplicaType) ->
+ Remote = nodes(),
+ Local = node(),
+ Nodes = [Local | Remote],
+ [
+ {db_nodes, Nodes},
+ {driver_nodes, [node()]},
+ {replica_nodes, Nodes},
+ {n_accounts_per_branch, 100},
+ {replica_type, ReplicaType},
+ {use_sticky_locks, true},
+ {stop_after, timer:minutes(1)},
+ {report_interval, timer:seconds(10)},
+ {reuse_history_id, true}
+ ];
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Ten drivers per node, tables replicated to all nodes, lots of branches
+
+config(dist_test, ReplicaType) ->
+ Remote = nodes(),
+ Local = node(),
+ Nodes = [Local | Remote],
+ [
+ {db_nodes, Nodes},
+ {driver_nodes, Nodes},
+ {replica_nodes, Nodes},
+ {n_drivers_per_node, 10},
+ {n_branches, 10 * length(Nodes) * 100},
+ {n_accounts_per_branch, 10},
+ {replica_type, ReplicaType},
+ {stop_after, timer:minutes(1)},
+ {report_interval, timer:seconds(10)},
+ {reuse_history_id, true}
+ ];
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Ten drivers per node, tables replicated to all nodes, single branch
+
+config(conflict_test, ReplicaType) ->
+ Remote = nodes(),
+ Local = node(),
+ Nodes = [Local | Remote],
+ [
+ {db_nodes, Nodes},
+ {driver_nodes, Nodes},
+ {replica_nodes, Nodes},
+ {n_drivers_per_node, 10},
+ {n_branches, 1},
+ {n_accounts_per_branch, 10},
+ {replica_type, ReplicaType},
+ {stop_after, timer:minutes(1)},
+ {report_interval, timer:seconds(10)},
+ {reuse_history_id, true}
+ ];
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% One driver on this node, table replicated to all other nodes.
+
+config(remote_test, ReplicaType) ->
+ Remote = nodes(),
+ Local = node(),
+ Nodes = [Local | Remote],
+ [
+ {db_nodes, Nodes},
+ {driver_nodes, [Local]},
+ {replica_nodes, Remote},
+ {n_accounts_per_branch, 100},
+ {replica_type, ReplicaType},
+ {stop_after, timer:minutes(1)},
+ {report_interval, timer:seconds(10)},
+ {reuse_history_id, true}
+ ];
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% One driver on this node, table replicated to two other nodes.
+
+config(remote_frag2_test, ReplicaType) ->
+ Remote = nodes(),
+ Local = node(),
+ Nodes = [Local | Remote],
+ [
+ {n_branches, length(Remote)},
+ {n_fragments, length(Remote)},
+ {n_replicas, 2},
+ {replica_nodes, Remote},
+ {db_nodes, Nodes},
+ {driver_nodes, [Local]},
+ {n_accounts_per_branch, 100},
+ {replica_type, ReplicaType},
+ {stop_after, timer:minutes(1)},
+ {report_interval, timer:seconds(10)},
+ {reuse_history_id, true}
+ ].
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+start(What, ReplicaType) ->
+ spawn_link(?MODULE, start, [config(What, ReplicaType)]).
+
+replica_test(ReplicaType) ->
+ start(replica_test, ReplicaType).
+
+sticky_replica_test(ReplicaType) ->
+ start(sticky_replica_test, ReplicaType).
+
+dist_test(ReplicaType) ->
+ start(dist_test, ReplicaType).
+
+conflict_test(ReplicaType) ->
+ start(conflict_test, ReplicaType).
+
+remote_test(ReplicaType) ->
+ start(remote_test, ReplicaType).
+
+remote_frag2_test(ReplicaType) ->
+ start(remote_frag2_test, ReplicaType).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Args is a list of {Key, Val} tuples where Key is a field name
+%% in either the record tab_config or run_config. Unknown keys are ignored.
+
+start() ->
+ start([]).
+start(Args) ->
+ init(Args),
+ run(Args).
+
+list2rec(List, Fields, DefaultTuple) ->
+ [Name|Defaults] = tuple_to_list(DefaultTuple),
+ List2 = list2rec(List, Fields, Defaults, []),
+ list_to_tuple([Name] ++ List2).
+
+list2rec(_List, [], [], Acc) ->
+ Acc;
+list2rec(List, [F|Fields], [D|Defaults], Acc) ->
+ {Val, List2} =
+ case lists:keysearch(F, 1, List) of
+ false ->
+ {D, List};
+ {value, {F, NewVal}} ->
+ {NewVal, lists:keydelete(F, 1, List)}
+ end,
+ list2rec(List2, Fields, Defaults, Acc ++ [Val]).
+
+stop() ->
+ case whereis(mnesia_tpcb) of
+ undefined ->
+ {error, not_running};
+ Pid ->
+ sync_stop(Pid)
+ end.
+
+sync_stop(Pid) ->
+ Pid ! {self(), stop},
+ receive
+ {Pid, {stopped, Res}} -> Res
+ after timer:minutes(1) ->
+ exit(Pid, kill),
+ {error, brutal_kill}
+ end.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%% Initialization
+
+%% Args is a list of {Key, Val} tuples where Key is a field name
+%% in the record tab_config, unknown keys are ignored.
+
+init(Args) ->
+ TabConfig0 = list2rec(Args, record_info(fields, tab_config), #tab_config{}),
+ TabConfig =
+ if
+ TabConfig0#tab_config.n_fragments =:= 0 ->
+ TabConfig0#tab_config{n_replicas = length(TabConfig0#tab_config.replica_nodes)};
+ true ->
+ TabConfig0
+ end,
+ Tags = record_info(fields, tab_config),
+ Fun = fun(F, Pos) -> {{F, element(Pos, TabConfig)}, Pos + 1} end,
+ {List, _} = lists:mapfoldl(Fun, 2, Tags),
+ io:format("TPC-B: Table config: ~p ~n", [List]),
+
+ DbNodes = TabConfig#tab_config.db_nodes,
+ stop(),
+ if
+ TabConfig#tab_config.use_running_mnesia =:= true ->
+ ignore;
+ true ->
+ rpc:multicall(DbNodes, mnesia, lkill, []),
+ case mnesia:delete_schema(DbNodes) of
+ ok ->
+ case mnesia:create_schema(DbNodes) of
+ ok ->
+ {Replies, BadNodes} =
+ rpc:multicall(DbNodes, mnesia, start, []),
+ case [Res || Res <- Replies, Res =/= ok] of
+ [] when BadNodes =:= [] ->
+ ok;
+ BadRes ->
+ io:format("TPC-B: <ERROR> "
+ "Failed to start ~p: ~p~n",
+ [BadNodes, BadRes]),
+ exit({start_failed, BadRes, BadNodes})
+ end;
+ {error, Reason} ->
+ io:format("TPC-B: <ERROR> "
+ "Failed to create schema on disc: ~p~n",
+ [Reason]),
+ exit({create_schema_failed, Reason})
+ end;
+ {error, Reason} ->
+ io:format("TPC-B: <ERROR> "
+ "Failed to delete schema on disc: ~p~n",
+ [Reason]),
+ exit({delete_schema_failed, Reason})
+ end
+ end,
+ gen_tabs(TabConfig).
+
+gen_tabs(TC) ->
+ create_tab(TC, branch, record_info(fields, branch),
+ undefined),
+ create_tab(TC, account, record_info(fields, account),
+ {branch, #account.branch_id}),
+ create_tab(TC, teller, record_info(fields, teller),
+ {branch, #teller.branch_id}),
+ create_tab(TC, history, record_info(fields, history),
+ {branch, #history.branch_id}),
+
+ NB = TC#tab_config.n_branches,
+ NT = TC#tab_config.n_tellers_per_branch,
+ NA = TC#tab_config.n_accounts_per_branch,
+ io:format("TPC-B: Generating ~p branches a ~p bytes~n",
+ [NB, size(term_to_binary(default_branch(TC)))]),
+ io:format("TPC-B: Generating ~p * ~p tellers a ~p bytes~n",
+ [NB, NT, size(term_to_binary(default_teller(TC)))]),
+ io:format("TPC-B: Generating ~p * ~p accounts a ~p bytes~n",
+ [NB, NA, size(term_to_binary(default_account(TC)))]),
+ io:format("TPC-B: Generating 0 history records a ~p bytes~n",
+ [size(term_to_binary(default_history(TC)))]),
+ gen_branches(TC),
+
+ case verify_tabs() of
+ ok ->
+ ignore;
+ {error, Reason} ->
+ io:format("TPC-B: <ERROR> Inconsistent tables: ~w~n",
+ [Reason]),
+ exit({inconsistent_tables, Reason})
+ end.
+
+create_tab(TC, Name, Attrs, _ForeignKey) when TC#tab_config.n_fragments =:= 0 ->
+ Nodes = TC#tab_config.replica_nodes,
+ Type = TC#tab_config.replica_type,
+ Def = [{Type, Nodes}, {attributes, Attrs}],
+ create_tab(Name, Def);
+create_tab(TC, Name, Attrs, ForeignKey) ->
+ NReplicas = TC#tab_config.n_replicas,
+ NodePool = TC#tab_config.replica_nodes,
+ Type = TC#tab_config.replica_type,
+ NF = TC#tab_config.n_fragments,
+ Props = [{n_fragments, NF},
+ {node_pool, NodePool},
+ {n_copies(Type), NReplicas},
+ {foreign_key, ForeignKey}],
+ Def = [{frag_properties, Props},
+ {attributes, Attrs}],
+ create_tab(Name, Def).
+
+create_tab(Name, Def) ->
+ mnesia:delete_table(Name),
+ case mnesia:create_table(Name, Def) of
+ {atomic, ok} ->
+ ok;
+ {aborted, Reason} ->
+ io:format("TPC-B: <ERROR> failed to create table ~w ~w: ~p~n",
+ [Name, Def, Reason]),
+ exit({create_table_failed, Reason})
+ end.
+
+n_copies(Type) ->
+ case Type of
+ ram_copies -> n_ram_copies;
+ disc_copies -> n_disc_copies;
+ disc_only_copies -> n_disc_only_copies
+ end.
+
+gen_branches(TC) ->
+ First = 0,
+ Last = First + TC#tab_config.n_branches - 1,
+ GenPids = gen_branches(TC, First, Last, []),
+ wait_for_gen(GenPids).
+
+wait_for_gen([]) ->
+ ok;
+wait_for_gen(Pids) ->
+ receive
+ {branch_generated, Pid} -> wait_for_gen(lists:delete(Pid, Pids));
+ Exit ->
+ exit({tpcb_failed, Exit})
+ end.
+
+gen_branches(TC, BranchId, Last, UsedNs) when BranchId =< Last ->
+ UsedNs2 = get_branch_nodes(BranchId, UsedNs),
+ Node = hd(UsedNs2),
+ Pid = spawn_link(Node, ?MODULE, reply_gen_branch,
+ [self(), TC, BranchId]),
+ [Pid | gen_branches(TC, BranchId + 1, Last, UsedNs2)];
+gen_branches(_, _, _, _) ->
+ [].
+
+reply_gen_branch(ReplyTo, TC, BranchId) ->
+ gen_branch(TC, BranchId),
+ ReplyTo ! {branch_generated, self()},
+ unlink(ReplyTo).
+
+%% Returns a new list of nodes with the best node as head
+get_branch_nodes(BranchId, UsedNs) ->
+ WriteNs = table_info({branch, BranchId}, where_to_write),
+ WeightedNs = [{n_duplicates(N, UsedNs, 0), N} || N <- WriteNs],
+ [{_, LeastUsed} | _ ] = lists:sort(WeightedNs),
+ [LeastUsed | UsedNs].
+
+n_duplicates(_N, [], Count) ->
+ Count;
+n_duplicates(N, [N | Tail], Count) ->
+ n_duplicates(N, Tail, Count + 1);
+n_duplicates(N, [_ | Tail], Count) ->
+ n_duplicates(N, Tail, Count).
+
+gen_branch(TC, BranchId) ->
+ A = default_account(TC),
+ NA = TC#tab_config.n_accounts_per_branch,
+ FirstA = BranchId * NA,
+ ArgsA = [FirstA, FirstA + NA - 1, BranchId, A],
+ ok = mnesia:activity(async_dirty, fun gen_accounts/4, ArgsA, mnesia_frag),
+
+ T = default_teller(TC),
+ NT = TC#tab_config.n_tellers_per_branch,
+ FirstT = BranchId * NT,
+ ArgsT = [FirstT, FirstT + NT - 1, BranchId, T],
+ ok = mnesia:activity(async_dirty, fun gen_tellers/4, ArgsT, mnesia_frag),
+
+ B = default_branch(TC),
+ FunB = fun() -> mnesia:write(branch, B#branch{id = BranchId}, write) end,
+ ok = mnesia:activity(sync_dirty, FunB, [], mnesia_frag).
+
+gen_tellers(Id, Last, BranchId, T) when Id =< Last ->
+ mnesia:write(teller, T#teller{id = Id, branch_id=BranchId}, write),
+ gen_tellers(Id + 1, Last, BranchId, T);
+gen_tellers(_, _, _, _) ->
+ ok.
+
+gen_accounts(Id, Last, BranchId, A) when Id =< Last ->
+ mnesia:write(account, A#account{id = Id, branch_id=BranchId}, write),
+ gen_accounts(Id + 1, Last, BranchId, A);
+gen_accounts(_, _, _, _) ->
+ ok.
+
+default_branch(TC) -> #branch{filler = TC#tab_config.branch_filler}.
+default_teller(TC) -> #teller{filler = TC#tab_config.teller_filler}.
+default_account(TC) -> #account{filler = TC#tab_config.account_filler}.
+default_history(_TC) -> #history{}.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%% Run the benchmark
+
+%% Args is a list of {Key, Val} tuples where Key is a field name
+%% in the record run_config, unknown keys are ignored.
+run(Args) ->
+ RunConfig = list2rec(Args, record_info(fields, run_config), #run_config{}),
+ Tags = record_info(fields, run_config),
+ Fun = fun(F, Pos) -> {{F, element(Pos, RunConfig)}, Pos + 1} end,
+ {List, _} = lists:mapfoldl(Fun, 2, Tags),
+ io:format("TPC-B: Run config: ~p ~n", [List]),
+
+ Pid = spawn_link(?MODULE, reporter_init, [self(), RunConfig]),
+ receive
+ {Pid, {stopped, Res}} ->
+ Res; % Stopped by other process
+ Else ->
+ {tpcb_got, Else}
+ after RunConfig#run_config.stop_after ->
+ sync_stop(Pid)
+ end.
+
+reporter_init(Starter, RC) ->
+ register(mnesia_tpcb, self()),
+ process_flag(trap_exit, true),
+ DbNodes = mnesia:system_info(db_nodes),
+ if
+ RC#run_config.use_running_mnesia =:= true ->
+ ignore;
+ true ->
+ {Replies, BadNodes} =
+ rpc:multicall(DbNodes, mnesia, start, []),
+ case [Res || Res <- Replies, Res =/= ok] of
+ [] when BadNodes =:= [] ->
+ ok;
+ BadRes ->
+ io:format("TPC-B: <ERROR> "
+ "Failed to start ~w: ~p~n",
+ [BadNodes, BadRes]),
+ exit({start_failed, BadRes, BadNodes})
+ end,
+ verify_tabs()
+ end,
+
+ N = table_info(branch, size),
+ NT = table_info(teller, size) div N,
+ NA = table_info(account, size) div N,
+
+ {Type, NF, RepNodes} = table_storage(branch),
+ TC = #tab_config{n_fragments = NF,
+ n_branches = N,
+ n_tellers_per_branch = NT,
+ n_accounts_per_branch = NA,
+ db_nodes = DbNodes,
+ replica_nodes = RepNodes,
+ replica_type = Type
+ },
+ Drivers = start_drivers(RC, TC),
+ Now = now_to_micros(erlang:now()),
+ State = #reporter_state{driver_pids = Drivers,
+ run_config = RC,
+ starter_pid = Starter,
+ init_micros = Now,
+ prev_micros = Now
+ },
+ case catch reporter_loop(State) of
+ {'EXIT', Reason} ->
+ io:format("TPC-B: Abnormal termination: ~p~n", [Reason]),
+ if
+ RC#run_config.use_running_mnesia =:= true ->
+ ignore;
+ true ->
+ rpc:multicall(DbNodes, mnesia, lkill, [])
+ end,
+ unlink(Starter),
+ Starter ! {self(), {stopped, {error, Reason}}}, % To be sure
+ exit(shutdown);
+ {ok, Stopper, State2} ->
+ Time = State2#reporter_state.acc,
+ Res =
+ case verify_tabs() of
+ ok ->
+ {ok, Time};
+ {error, Reason} ->
+ io:format("TPC-B: <ERROR> Inconsistent tables, ~p~n",
+ [{error, Reason}]),
+ {error, Reason}
+ end,
+ if
+ RC#run_config.use_running_mnesia =:= true ->
+ ignore;
+ true ->
+ rpc:multicall(DbNodes, mnesia, stop, [])
+ end,
+ unlink(Starter),
+ Starter ! {self(), {stopped, Res}},
+ if
+ Stopper =/= Starter ->
+ Stopper ! {self(), {stopped, Res}};
+ true ->
+ ignore
+ end,
+ exit(shutdown)
+ end.
+
+table_info(Tab, Item) ->
+ Fun = fun() -> mnesia:table_info(Tab, Item) end,
+ mnesia:activity(sync_dirty, Fun, mnesia_frag).
+
+%% Returns {Storage, NFragments, ReplicaNodes}
+table_storage(Tab) ->
+ case mnesia:table_info(branch, frag_properties) of
+ [] ->
+ NFO = 0,
+ NR = length(mnesia:table_info(Tab, ram_copies)),
+ ND = length(mnesia:table_info(Tab, disc_copies)),
+ NDO = length(mnesia:table_info(Tab, disc_only_copies)),
+ if
+ NR =/= 0 -> {ram_copies, NFO, NR};
+ ND =/= 0 -> {disc_copies, NFO, ND};
+ NDO =/= 0 -> {disc_copies, NFO, NDO}
+ end;
+ Props ->
+ {value, NFO} = lists:keysearch(n_fragments, 1, Props),
+ NR = table_info(Tab, n_ram_copies),
+ ND = table_info(Tab, n_disc_copies),
+ NDO = table_info(Tab, n_disc_only_copies),
+ if
+ NR =/= 0 -> {ram_copies, NFO, NR};
+ ND =/= 0 -> {disc_copies, NFO, ND};
+ NDO =/= 0 -> {disc_copies, NFO, NDO}
+ end
+ end.
+
+reporter_loop(State) ->
+ RC = State#reporter_state.run_config,
+ receive
+ {From, stop} ->
+ {ok, From, call_drivers(State, stop)};
+ {'EXIT', Pid, Reason} when Pid =:= State#reporter_state.starter_pid ->
+ %% call_drivers(State, stop),
+ exit({starter_died, Pid, Reason})
+ after RC#run_config.report_interval ->
+ Iters = State#reporter_state.n_iters,
+ State2 = State#reporter_state{n_iters = Iters + 1},
+ case call_drivers(State2, report) of
+ State3 when State3#reporter_state.driver_pids =/= [] ->
+ State4 = State3#reporter_state{curr = #time{}},
+ reporter_loop(State4);
+ _ ->
+ exit(drivers_died)
+ end
+ end.
+
+call_drivers(State, Msg) ->
+ Drivers = State#reporter_state.driver_pids,
+ lists:foreach(fun(Pid) -> Pid ! {self(), Msg} end, Drivers),
+ State2 = show_report(calc_reports(Drivers, State)),
+ case Msg =:= stop of
+ true ->
+ Acc = State2#reporter_state.acc,
+ Init = State2#reporter_state.init_micros,
+ show_report(State2#reporter_state{n_iters = 0,
+ curr = Acc,
+ prev_micros = Init});
+ false ->
+ ignore
+ end,
+ State2.
+
+calc_reports([], State) ->
+ State;
+calc_reports([Pid|Drivers], State) ->
+ receive
+ {'EXIT', P, Reason} when P =:= State#reporter_state.starter_pid ->
+ exit({starter_died, P, Reason});
+ {'EXIT', Pid, Reason} ->
+ exit({driver_died, Pid, Reason});
+ {Pid, Time} when is_record(Time, time) ->
+ %% io:format("~w: ~w~n", [Pid, Time]),
+ A = add_time(State#reporter_state.acc, Time),
+ C = add_time(State#reporter_state.curr, Time),
+ State2 = State#reporter_state{acc = A, curr = C},
+ calc_reports(Drivers, State2)
+ end.
+
+add_time(Acc, New) ->
+ Acc#time{n_trans = New#time.n_trans + Acc#time.n_trans,
+ min_n = lists:min([New#time.n_trans, Acc#time.min_n] -- [0]),
+ max_n = lists:max([New#time.n_trans, Acc#time.max_n]),
+ acc_time = New#time.acc_time + Acc#time.acc_time,
+ max_time = lists:max([New#time.max_time, Acc#time.max_time])}.
+
+-define(AVOID_DIV_ZERO(_What_), try (_What_) catch _:_ -> 0 end).
+
+show_report(State) ->
+ Now = now_to_micros(erlang:now()),
+ Iters = State#reporter_state.n_iters,
+ Time = State#reporter_state.curr,
+ Max = Time#time.max_time,
+ N = Time#time.n_trans,
+ Avg = ?AVOID_DIV_ZERO(Time#time.acc_time div N),
+ AliveN = length(State#reporter_state.driver_pids),
+ Tps = ?AVOID_DIV_ZERO((?SECOND * AliveN) div Avg),
+ PrevTps= State#reporter_state.prev_tps,
+ {DiffSign, DiffTps} = signed_diff(Iters, Tps, PrevTps),
+ Unfairness = ?AVOID_DIV_ZERO(Time#time.max_n / Time#time.min_n),
+ BruttoAvg = ?AVOID_DIV_ZERO((Now - State#reporter_state.prev_micros) div N),
+%% io:format("n_iters=~p, n_trans=~p, n_drivers=~p, avg=~p, now=~p, prev=~p~n",
+%% [Iters, N, AliveN, BruttoAvg, Now, State#reporter_state.prev_micros]),
+ BruttoTps = ?AVOID_DIV_ZERO(?SECOND div BruttoAvg),
+ case Iters > 0 of
+ true ->
+ io:format("TPC-B: ~p iter ~s~p diff ~p (~p) tps ~p avg micros ~p max micros ~p unfairness~n",
+ [Iters, DiffSign, DiffTps, Tps, BruttoTps, Avg, Max, Unfairness]);
+ false ->
+ io:format("TPC-B: ~p (~p) transactions per second, "
+ "duration of longest transaction was ~p milliseconds~n",
+ [Tps, BruttoTps, Max div 1000])
+ end,
+ State#reporter_state{prev_tps = Tps, prev_micros = Now}.
+
+signed_diff(Iters, Curr, Prev) ->
+ case Iters > 1 of
+ true -> sign(Curr - Prev);
+ false -> sign(0)
+ end.
+
+sign(N) when N > 0 -> {"+", N};
+sign(N) -> {"", N}.
+
+now_to_micros({Mega, Secs, Micros}) ->
+ DT = calendar:now_to_datetime({Mega, Secs, 0}),
+ S = calendar:datetime_to_gregorian_seconds(DT),
+ (S * ?SECOND) + Micros.
+
+start_drivers(RC, TC) ->
+ LastHistoryId = table_info(history, size),
+ Reuse = RC#run_config.reuse_history_id,
+ DS = #driver_state{tab_config = TC,
+ run_config = RC,
+ n_local_branches = 0,
+ local_branches = [],
+ history_id = LastHistoryId,
+ reuse_history_id = Reuse},
+ Nodes = RC#run_config.driver_nodes,
+ NB = TC#tab_config.n_branches,
+ First = 0,
+ AllBranches = lists:seq(First, First + NB - 1),
+ ND = RC#run_config.n_drivers_per_node,
+ Spawn = fun(Spec) ->
+ Node = Spec#driver_state.driver_node,
+ spawn_link(Node, ?MODULE, driver_init, [Spec, AllBranches])
+ end,
+ Specs = [DS#driver_state{driver_id = Id, driver_node = N}
+ || N <- Nodes,
+ Id <- lists:seq(1, ND)],
+ Specs2 = lists:sort(lists:flatten(Specs)),
+ {Specs3, OrphanBranches} = alloc_local_branches(AllBranches, Specs2, []),
+ case length(OrphanBranches) of
+ N when N =< 10 ->
+ io:format("TPC-B: Orphan branches: ~p~n", [OrphanBranches]);
+ N ->
+ io:format("TPC-B: Orphan branches: ~p~n", [N])
+ end,
+ [Spawn(Spec) || Spec <- Specs3].
+
+alloc_local_branches([BranchId | Tail], Specs, OrphanBranches) ->
+ Nodes = table_info({branch, BranchId}, where_to_write),
+ LocalSpecs = [DS || DS <- Specs,
+ lists:member(DS#driver_state.driver_node, Nodes)],
+ case lists:keysort(#driver_state.n_local_branches, LocalSpecs) of
+ [] ->
+ alloc_local_branches(Tail, Specs, [BranchId | OrphanBranches]);
+ [DS | _] ->
+ LocalNB = DS#driver_state.n_local_branches + 1,
+ LocalBranches = [BranchId | DS#driver_state.local_branches],
+ DS2 = DS#driver_state{n_local_branches = LocalNB,
+ local_branches = LocalBranches},
+ Specs2 = Specs -- [DS],
+ Specs3 = [DS2 | Specs2],
+ alloc_local_branches(Tail, Specs3, OrphanBranches)
+ end;
+alloc_local_branches([], Specs, OrphanBranches) ->
+ {Specs, OrphanBranches}.
+
+driver_init(DS, AllBranches) ->
+ Seed = erlang:now(),
+ DS2 =
+ if
+ DS#driver_state.n_local_branches =:= 0 ->
+ DS#driver_state{seed = Seed,
+ n_local_branches = length(AllBranches),
+ local_branches = AllBranches};
+ true ->
+ DS#driver_state{seed = Seed}
+ end,
+ io:format("TPC-B: Driver ~p started as ~p on node ~p with ~p local branches~n",
+ [DS2#driver_state.driver_id, self(), node(), DS2#driver_state.n_local_branches]),
+ driver_loop(DS2).
+
+driver_loop(DS) ->
+ receive
+ {From, report} ->
+ From ! {self(), DS#driver_state.time},
+ Acc = add_time(DS#driver_state.time, DS#driver_state.acc_time),
+ DS2 = DS#driver_state{time=#time{}, acc_time = Acc}, % Reset timer
+ DS3 = calc_trans(DS2),
+ driver_loop(DS3);
+ {From, stop} ->
+ Acc = add_time(DS#driver_state.time, DS#driver_state.acc_time),
+ io:format("TPC-B: Driver ~p (~p) on node ~p stopped: ~w~n",
+ [DS#driver_state.driver_id, self(), node(self()), Acc]),
+ From ! {self(), DS#driver_state.time},
+ unlink(From),
+ exit(stopped)
+ after 0 ->
+ DS2 = calc_trans(DS),
+ driver_loop(DS2)
+ end.
+
+calc_trans(DS) ->
+ {Micros, DS2} = time_trans(DS),
+ Time = DS2#driver_state.time,
+ Time2 = Time#time{n_trans = Time#time.n_trans + 1,
+ acc_time = Time#time.acc_time + Micros,
+ max_time = lists:max([Micros, Time#time.max_time])
+ },
+ case DS#driver_state.reuse_history_id of
+ false ->
+ HistoryId = DS#driver_state.history_id + 1,
+ DS2#driver_state{time=Time2, history_id = HistoryId};
+ true ->
+ DS2#driver_state{time=Time2}
+ end.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+%% Generate teller_id, account_id and delta
+%% Time the TPC-B transaction
+time_trans(DS) ->
+ OldSeed = get(random_seed), % Avoid interference with Mnesia
+ put(random_seed, DS#driver_state.seed),
+ Random = random:uniform(),
+ NewSeed = get(random_seed),
+ case OldSeed of
+ undefined -> erase(random_seed);
+ _ -> put(random_seed, OldSeed)
+ end,
+
+ TC = DS#driver_state.tab_config,
+ RC = DS#driver_state.run_config,
+ {Branchid, Args} = random_to_args(Random, DS),
+ {Fun, Mod} = trans_type(TC, RC),
+ {Time, Res} = timer:tc(?MODULE, real_trans, [RC, Branchid, Fun, Args, Mod]),
+
+ case Res of
+ AccountBal when is_integer(AccountBal) ->
+ {Time, DS#driver_state{seed = NewSeed}};
+ Other ->
+ exit({crash, Other, Args, Random, DS})
+ end.
+
+random_to_args(Random, DS) ->
+ DriverId = DS#driver_state.driver_id,
+ TC = DS#driver_state.tab_config,
+ HistoryId = DS#driver_state.history_id,
+ Delta = trunc(Random * 1999998) - 999999, % -999999 <= Delta <= +999999
+
+ Branches = DS#driver_state.local_branches,
+ NB = DS#driver_state.n_local_branches,
+ NT = TC#tab_config.n_tellers_per_branch,
+ NA = TC#tab_config.n_accounts_per_branch,
+ Tmp = trunc(Random * NB * NT),
+ BranchPos = (Tmp div NT) + 1,
+ BranchId =
+ case TC#tab_config.n_fragments of
+ 0 -> BranchPos - 1;
+ _ -> lists:nth(BranchPos, Branches)
+ end,
+ RelativeTellerId = Tmp div NT,
+ TellerId = (BranchId * NT) + RelativeTellerId,
+ {AccountBranchId, AccountId} =
+ if
+ Random >= 0.85, NB > 1 ->
+ %% Pick from a remote account
+ TmpAccountId= trunc(Random * (NB - 1) * NA),
+ TmpAccountBranchId = TmpAccountId div NA,
+ if
+ TmpAccountBranchId =:= BranchId ->
+ {TmpAccountBranchId + 1, TmpAccountId + NA};
+ true ->
+ {TmpAccountBranchId, TmpAccountId}
+ end;
+ true ->
+ %% Pick from a local account
+ RelativeAccountId = trunc(Random * NA),
+ TmpAccountId = (BranchId * NA) + RelativeAccountId,
+ {BranchId, TmpAccountId}
+ end,
+
+ {BranchId, [DriverId, BranchId, TellerId, AccountBranchId, AccountId, HistoryId, Delta]}.
+
+real_trans(RC, BranchId, Fun, Args, Mod) ->
+ Type = RC#run_config.activity_type,
+ case RC#run_config.spawn_near_branch of
+ false ->
+ mnesia:activity(Type, Fun, Args, Mod);
+ true ->
+ Node = table_info({branch, BranchId}, where_to_read),
+ case rpc:call(Node, mnesia, activity, [Type, Fun, Args, Mod]) of
+ {badrpc, Reason} -> exit(Reason);
+ Other -> Other
+ end
+ end.
+
+trans_type(TC, RC) ->
+ if
+ TC#tab_config.n_fragments =:= 0,
+ RC#run_config.use_sticky_locks =:= false ->
+ {fun add_delta/7, mnesia};
+ TC#tab_config.n_fragments =:= 0,
+ RC#run_config.use_sticky_locks =:= true ->
+ {fun sticky_add_delta/7, mnesia};
+ TC#tab_config.n_fragments > 0,
+ RC#run_config.use_sticky_locks =:= false ->
+ {fun frag_add_delta/7, mnesia_frag}
+ end.
+
+%%
+%% Runs the TPC-B defined transaction and returns NewAccountBalance
+%%
+
+add_delta(DriverId, BranchId, TellerId, _AccountBranchId, AccountId, HistoryId, Delta) ->
+ %% Grab write lock already when the record is read
+
+ %% Add delta to branch balance
+ [B] = mnesia:read(branch, BranchId, write),
+ NewB = B#branch{balance = B#branch.balance + Delta},
+ ok = mnesia:write(branch, NewB, write),
+
+ %% Add delta to teller balance
+ [T] = mnesia:read(teller, TellerId, write),
+ NewT = T#teller{balance = T#teller.balance + Delta},
+ ok = mnesia:write(teller, NewT, write),
+
+ %% Add delta to account balance
+ [A] = mnesia:read(account, AccountId, write),
+ NewA = A#account{balance = A#account.balance + Delta},
+ ok = mnesia:write(account, NewA, write),
+
+ %% Append to history log
+ History = #history{history_id = {DriverId, HistoryId},
+ account_id = AccountId,
+ teller_id = TellerId,
+ branch_id = BranchId,
+ amount = Delta
+ },
+ ok = mnesia:write(history, History, write),
+
+ %% Return account balance
+ NewA#account.balance.
+
+sticky_add_delta(DriverId, BranchId, TellerId, _AccountBranchId, AccountId, HistoryId, Delta) ->
+ %% Grab orinary read lock when the record is read
+ %% Grab sticky write lock when the record is written
+ %% This transaction would benefit of an early stick_write lock at read
+
+ %% Add delta to branch balance
+ [B] = mnesia:read(branch, BranchId, read),
+ NewB = B#branch{balance = B#branch.balance + Delta},
+ ok = mnesia:write(branch, NewB, sticky_write),
+
+ %% Add delta to teller balance
+ [T] = mnesia:read(teller, TellerId, read),
+ NewT = T#teller{balance = T#teller.balance + Delta},
+ ok = mnesia:write(teller, NewT, sticky_write),
+
+ %% Add delta to account balance
+ [A] = mnesia:read(account, AccountId, read),
+ NewA = A#account{balance = A#account.balance + Delta},
+ ok = mnesia:write(account, NewA, sticky_write),
+
+ %% Append to history log
+ History = #history{history_id = {DriverId, HistoryId},
+ account_id = AccountId,
+ teller_id = TellerId,
+ branch_id = BranchId,
+ amount = Delta
+ },
+ ok = mnesia:write(history, History, sticky_write),
+
+ %% Return account balance
+ NewA#account.balance.
+
+frag_add_delta(DriverId, BranchId, TellerId, AccountBranchId, AccountId, HistoryId, Delta) ->
+ %% Access fragmented table
+ %% Grab write lock already when the record is read
+
+ %% Add delta to branch balance
+ [B] = mnesia:read(branch, BranchId, write),
+ NewB = B#branch{balance = B#branch.balance + Delta},
+ ok = mnesia:write(NewB),
+
+ %% Add delta to teller balance
+ [T] = mnesia:read({teller, BranchId}, TellerId, write),
+ NewT = T#teller{balance = T#teller.balance + Delta},
+ ok = mnesia:write(NewT),
+
+ %% Add delta to account balance
+ %%io:format("frag_add_delta(~p): ~p\n", [node(), {account, BranchId, AccountId}]),
+ [A] = mnesia:read({account, AccountBranchId}, AccountId, write),
+ NewA = A#account{balance = A#account.balance + Delta},
+ ok = mnesia:write(NewA),
+
+ %% Append to history log
+ History = #history{history_id = {DriverId, HistoryId},
+ account_id = AccountId,
+ teller_id = TellerId,
+ branch_id = BranchId,
+ amount = Delta
+ },
+ ok = mnesia:write(History),
+
+ %% Return account balance
+ NewA#account.balance.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Verify table consistency
+
+verify_tabs() ->
+ Nodes = mnesia:system_info(running_db_nodes),
+ case lists:member(node(), Nodes) of
+ true ->
+ Tabs = [branch, teller, account, history],
+ io:format("TPC-B: Verifying tables: ~w~n", [Tabs]),
+ rpc:multicall(Nodes, mnesia, wait_for_tables, [Tabs, infinity]),
+
+ Fun = fun() ->
+ mnesia:write_lock_table(branch),
+ mnesia:write_lock_table(teller),
+ mnesia:write_lock_table(account),
+ mnesia:write_lock_table(history),
+ {Res, BadNodes} =
+ rpc:multicall(Nodes, ?MODULE, count_balance, []),
+ check_balance(Res, BadNodes)
+ end,
+ case mnesia:transaction(Fun) of
+ {atomic, Res} -> Res;
+ {aborted, Reason} -> {error, Reason}
+ end;
+ false ->
+ {error, "Must be initiated from a running db_node"}
+ end.
+
+%% Returns a list of {Table, Node, Balance} tuples
+%% Assumes that no updates are performed
+
+-record(summary, {table, node, balance, size}).
+
+count_balance() ->
+ [count_balance(branch, #branch.balance),
+ count_balance(teller, #teller.balance),
+ count_balance(account, #account.balance)].
+
+count_balance(Tab, BalPos) ->
+ Frags = table_info(Tab, frag_names),
+ count_balance(Tab, Frags, 0, 0, BalPos).
+
+count_balance(Tab, [Frag | Frags], Bal, Size, BalPos) ->
+ First = mnesia:dirty_first(Frag),
+ {Bal2, Size2} = count_frag_balance(Frag, First, Bal, Size, BalPos),
+ count_balance(Tab, Frags, Bal2, Size2, BalPos);
+count_balance(Tab, [], Bal, Size, _BalPos) ->
+ #summary{table = Tab, node = node(), balance = Bal, size = Size}.
+
+count_frag_balance(_Frag, '$end_of_table', Bal, Size, _BalPos) ->
+ {Bal, Size};
+count_frag_balance(Frag, Key, Bal, Size, BalPos) ->
+ [Record] = mnesia:dirty_read({Frag, Key}),
+ Bal2 = Bal + element(BalPos, Record),
+ Next = mnesia:dirty_next(Frag, Key),
+ count_frag_balance(Frag, Next, Bal2, Size + 1, BalPos).
+
+check_balance([], []) ->
+ mnesia:abort({"No balance"});
+check_balance(Summaries, []) ->
+ [One | Rest] = lists:flatten(Summaries),
+ Balance = One#summary.balance,
+ %% Size = One#summary.size,
+ case [S || S <- Rest, S#summary.balance =/= Balance] of
+ [] ->
+ ok;
+ BadSummaries ->
+ mnesia:abort({"Bad balance", One, BadSummaries})
+ end;
+check_balance(_, BadNodes) ->
+ mnesia:abort({"Bad nodes", BadNodes}).
diff --git a/lib/mnesia/include/Makefile b/lib/mnesia/include/Makefile
new file mode 100644
index 0000000000..f9b7d72abe
--- /dev/null
+++ b/lib/mnesia/include/Makefile
@@ -0,0 +1,61 @@
+#
+# %CopyrightBegin%
+#
+# Copyright Ericsson AB 1998-2009. All Rights Reserved.
+#
+# The contents of this file are subject to the Erlang Public License,
+# Version 1.1, (the "License"); you may not use this file except in
+# compliance with the License. You should have received a copy of the
+# Erlang Public License along with this software. If not, it can be
+# retrieved online at http://www.erlang.org/.
+#
+# Software distributed under the License is distributed on an "AS IS"
+# basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+# the License for the specific language governing rights and limitations
+# under the License.
+#
+# %CopyrightEnd%
+#
+
+#
+include $(ERL_TOP)/make/target.mk
+include $(ERL_TOP)/make/$(TARGET)/otp.mk
+
+# ----------------------------------------------------
+# Application version
+# ----------------------------------------------------
+include ../vsn.mk
+VSN=$(MNESIA_VSN)
+
+# ----------------------------------------------------
+# Release Macros
+# ----------------------------------------------------
+RELSYSDIR = $(RELEASE_PATH)/lib/mnesia-$(VSN)
+
+# ----------------------------------------------------
+# Macros
+# ----------------------------------------------------
+
+INCLUDE_FILES =
+
+# ----------------------------------------------------
+# Make Rules
+# ----------------------------------------------------
+debug opt:
+
+clean:
+
+docs:
+
+
+# ----------------------------------------------------
+# Release Targets
+# ----------------------------------------------------
+include $(ERL_TOP)/make/otp_release_targets.mk
+
+release_spec:
+ $(INSTALL_DIR) $(RELSYSDIR)/include
+# $(INSTALL_DATA) $(INCLUDE_FILES) $(RELSYSDIR)/include
+
+release_docs_spec:
+
diff --git a/lib/mnesia/include/mnemosyne.hrl b/lib/mnesia/include/mnemosyne.hrl
new file mode 100644
index 0000000000..eb6ec53ae1
--- /dev/null
+++ b/lib/mnesia/include/mnemosyne.hrl
@@ -0,0 +1,18 @@
+%% ``The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved via the world wide web at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% The Initial Developer of the Original Code is Ericsson Utvecklings AB.
+%% Portions created by Ericsson are Copyright 1999, Ericsson Utvecklings
+%% AB. All Rights Reserved.''
+%%
+%% $Id$
+%%
+-compile({parse_transform,mnemosyne}).
diff --git a/lib/mnesia/info b/lib/mnesia/info
new file mode 100644
index 0000000000..bfd0816a62
--- /dev/null
+++ b/lib/mnesia/info
@@ -0,0 +1,2 @@
+group: dat Database Applications
+short: A heavy duty real-time distributed database
diff --git a/lib/mnesia/priv/.gitignore b/lib/mnesia/priv/.gitignore
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/lib/mnesia/priv/.gitignore
diff --git a/lib/mnesia/src/Makefile b/lib/mnesia/src/Makefile
new file mode 100644
index 0000000000..e032f563fa
--- /dev/null
+++ b/lib/mnesia/src/Makefile
@@ -0,0 +1,139 @@
+#
+# %CopyrightBegin%
+#
+# Copyright Ericsson AB 1996-2009. All Rights Reserved.
+#
+# The contents of this file are subject to the Erlang Public License,
+# Version 1.1, (the "License"); you may not use this file except in
+# compliance with the License. You should have received a copy of the
+# Erlang Public License along with this software. If not, it can be
+# retrieved online at http://www.erlang.org/.
+#
+# Software distributed under the License is distributed on an "AS IS"
+# basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+# the License for the specific language governing rights and limitations
+# under the License.
+#
+# %CopyrightEnd%
+#
+
+#
+include $(ERL_TOP)/make/target.mk
+
+ifeq ($(TYPE),debug)
+ERL_COMPILE_FLAGS += -Ddebug -W
+endif
+
+include $(ERL_TOP)/make/$(TARGET)/otp.mk
+
+# ----------------------------------------------------
+# Application version
+# ----------------------------------------------------
+include ../vsn.mk
+VSN=$(MNESIA_VSN)
+
+# ----------------------------------------------------
+# Release directory specification
+# ----------------------------------------------------
+RELSYSDIR = $(RELEASE_PATH)/lib/mnesia-$(VSN)
+
+# ----------------------------------------------------
+# Target Specs
+# ----------------------------------------------------
+MODULES= \
+ mnesia \
+ mnesia_backup \
+ mnesia_bup \
+ mnesia_checkpoint \
+ mnesia_checkpoint_sup \
+ mnesia_controller \
+ mnesia_dumper\
+ mnesia_event \
+ mnesia_frag \
+ mnesia_frag_hash \
+ mnesia_frag_old_hash \
+ mnesia_index \
+ mnesia_kernel_sup \
+ mnesia_late_loader \
+ mnesia_lib\
+ mnesia_loader \
+ mnesia_locker \
+ mnesia_log \
+ mnesia_monitor \
+ mnesia_recover \
+ mnesia_registry \
+ mnesia_schema\
+ mnesia_snmp_hook \
+ mnesia_snmp_sup \
+ mnesia_subscr \
+ mnesia_sup \
+ mnesia_sp \
+ mnesia_text \
+ mnesia_tm
+
+HRL_FILES= mnesia.hrl
+
+ERL_FILES= $(MODULES:%=%.erl)
+
+TARGET_FILES= $(MODULES:%=$(EBIN)/%.$(EMULATOR)) $(APP_TARGET) $(APPUP_TARGET)
+
+APP_FILE= mnesia.app
+
+APP_SRC= $(APP_FILE).src
+APP_TARGET= $(EBIN)/$(APP_FILE)
+
+APPUP_FILE= mnesia.appup
+
+APPUP_SRC= $(APPUP_FILE).src
+APPUP_TARGET= $(EBIN)/$(APPUP_FILE)
+
+
+
+# ----------------------------------------------------
+# FLAGS
+# ----------------------------------------------------
+ERL_COMPILE_FLAGS += \
+ +warn_unused_vars \
+ +'{parse_transform,sys_pre_attributes}' \
+ +'{attribute,insert,vsn,"mnesia_$(MNESIA_VSN)"}' \
+ -W
+
+# ----------------------------------------------------
+# Targets
+# ----------------------------------------------------
+
+opt: $(TARGET_FILES)
+
+debug:
+ @${MAKE} TYPE=debug
+
+clean:
+ rm -f $(TARGET_FILES)
+ rm -f core
+
+docs:
+
+# ----------------------------------------------------
+# Special Build Targets
+# ----------------------------------------------------
+
+$(APP_TARGET): $(APP_SRC) ../vsn.mk
+ sed -e 's;%VSN%;$(VSN);' $< > $@
+
+$(APPUP_TARGET): $(APPUP_SRC) ../vsn.mk
+ sed -e 's;%VSN%;$(VSN);' $< > $@
+
+
+# ----------------------------------------------------
+# Release Target
+# ----------------------------------------------------
+include $(ERL_TOP)/make/otp_release_targets.mk
+
+release_spec: opt
+ $(INSTALL_DIR) $(RELSYSDIR)/src
+ $(INSTALL_DATA) $(HRL_FILES) $(ERL_FILES) $(RELSYSDIR)/src
+ $(INSTALL_DIR) $(RELSYSDIR)/ebin
+ $(INSTALL_DATA) $(TARGET_FILES) $(RELSYSDIR)/ebin
+
+release_docs_spec:
+
diff --git a/lib/mnesia/src/mnesia.app.src b/lib/mnesia/src/mnesia.app.src
new file mode 100644
index 0000000000..3715488ec2
--- /dev/null
+++ b/lib/mnesia/src/mnesia.app.src
@@ -0,0 +1,52 @@
+{application, mnesia,
+ [{description, "MNESIA CXC 138 12"},
+ {vsn, "%VSN%"},
+ {modules, [
+ mnesia,
+ mnesia_backup,
+ mnesia_bup,
+ mnesia_checkpoint,
+ mnesia_checkpoint_sup,
+ mnesia_controller,
+ mnesia_dumper,
+ mnesia_event,
+ mnesia_frag,
+ mnesia_frag_hash,
+ mnesia_frag_old_hash,
+ mnesia_index,
+ mnesia_kernel_sup,
+ mnesia_late_loader,
+ mnesia_lib,
+ mnesia_loader,
+ mnesia_locker,
+ mnesia_log,
+ mnesia_monitor,
+ mnesia_recover,
+ mnesia_registry,
+ mnesia_schema,
+ mnesia_snmp_hook,
+ mnesia_snmp_sup,
+ mnesia_subscr,
+ mnesia_sup,
+ mnesia_sp,
+ mnesia_text,
+ mnesia_tm
+ ]},
+ {registered, [
+ mnesia_dumper_load_regulator,
+ mnesia_event,
+ mnesia_fallback,
+ mnesia_controller,
+ mnesia_kernel_sup,
+ mnesia_late_loader,
+ mnesia_locker,
+ mnesia_monitor,
+ mnesia_recover,
+ mnesia_substr,
+ mnesia_sup,
+ mnesia_tm
+ ]},
+ {applications, [kernel, stdlib]},
+ {mod, {mnesia_sup, []}}]}.
+
+
diff --git a/lib/mnesia/src/mnesia.appup.src b/lib/mnesia/src/mnesia.appup.src
new file mode 100644
index 0000000000..cad63bf8df
--- /dev/null
+++ b/lib/mnesia/src/mnesia.appup.src
@@ -0,0 +1,37 @@
+%% -*- erlang -*-
+{"%VSN%",
+ [
+ {"4.4.11",
+ [
+ {update, mnesia_locker, soft, soft_purge, soft_purge, []},
+ {update, mnesia_controller, soft, soft_purge, soft_purge, []}
+ ]
+ },
+ {"4.4.10",
+ [
+ {update, mnesia_locker, soft, soft_purge, soft_purge, []},
+ {update, mnesia_controller, soft, soft_purge, soft_purge, []}
+ ]
+ },
+ {"4.4.9", [{restart_application, mnesia}]},
+ {"4.4.8", [{restart_application, mnesia}]},
+ {"4.4.7", [{restart_application, mnesia}]}
+ ],
+ [
+ {"4.4.11",
+ [
+ {update, mnesia_locker, soft, soft_purge, soft_purge, []},
+ {update, mnesia_controller, soft, soft_purge, soft_purge, []}
+ ]
+ },
+ {"4.4.10",
+ [
+ {update, mnesia_locker, soft, soft_purge, soft_purge, []},
+ {update, mnesia_controller, soft, soft_purge, soft_purge, []}
+ ]
+ },
+ {"4.4.9", [{restart_application, mnesia}]},
+ {"4.4.8", [{restart_application, mnesia}]},
+ {"4.4.7", [{restart_application, mnesia}]}
+ ]
+}.
diff --git a/lib/mnesia/src/mnesia.erl b/lib/mnesia/src/mnesia.erl
new file mode 100644
index 0000000000..9a630f18eb
--- /dev/null
+++ b/lib/mnesia/src/mnesia.erl
@@ -0,0 +1,2883 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 1996-2009. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%
+
+%%
+%% This module exports the public interface of the Mnesia DBMS engine
+
+-module(mnesia).
+%-behaviour(mnesia_access).
+
+-export([
+ %% Start, stop and debugging
+ start/0, start/1, stop/0, % Not for public use
+ set_debug_level/1, lkill/0, kill/0, % Not for public use
+ ms/0,
+ change_config/2,
+
+ %% Activity mgt
+ abort/1, transaction/1, transaction/2, transaction/3,
+ sync_transaction/1, sync_transaction/2, sync_transaction/3,
+ async_dirty/1, async_dirty/2, sync_dirty/1, sync_dirty/2, ets/1, ets/2,
+ activity/2, activity/3, activity/4, % Not for public use
+ is_transaction/0,
+
+ %% Access within an activity - Lock acquisition
+ lock/2, lock/4,
+ read_lock_table/1,
+ write_lock_table/1,
+
+ %% Access within an activity - Updates
+ write/1, s_write/1, write/3, write/5,
+ delete/1, s_delete/1, delete/3, delete/5,
+ delete_object/1, s_delete_object/1, delete_object/3, delete_object/5,
+
+ %% Access within an activity - Reads
+ read/1, read/2, wread/1, read/3, read/5,
+ match_object/1, match_object/3, match_object/5,
+ select/1,select/2,select/3,select/4,select/5,select/6,
+ all_keys/1, all_keys/4,
+ index_match_object/2, index_match_object/4, index_match_object/6,
+ index_read/3, index_read/6,
+ first/1, next/2, last/1, prev/2,
+ first/3, next/4, last/3, prev/4,
+
+ %% Iterators within an activity
+ foldl/3, foldl/4, foldr/3, foldr/4,
+
+ %% Dirty access regardless of activities - Updates
+ dirty_write/1, dirty_write/2,
+ dirty_delete/1, dirty_delete/2,
+ dirty_delete_object/1, dirty_delete_object/2,
+ dirty_update_counter/2, dirty_update_counter/3,
+
+ %% Dirty access regardless of activities - Read
+ dirty_read/1, dirty_read/2,
+ dirty_select/2,
+ dirty_match_object/1, dirty_match_object/2, dirty_all_keys/1,
+ dirty_index_match_object/2, dirty_index_match_object/3,
+ dirty_index_read/3, dirty_slot/2,
+ dirty_first/1, dirty_next/2, dirty_last/1, dirty_prev/2,
+
+ %% Info
+ table_info/2, table_info/4, schema/0, schema/1,
+ error_description/1, info/0, system_info/1,
+ system_info/0, % Not for public use
+
+ %% Database mgt
+ create_schema/1, delete_schema/1,
+ backup/1, backup/2, traverse_backup/4, traverse_backup/6,
+ install_fallback/1, install_fallback/2,
+ uninstall_fallback/0, uninstall_fallback/1,
+ activate_checkpoint/1, deactivate_checkpoint/1,
+ backup_checkpoint/2, backup_checkpoint/3, restore/2,
+
+ %% Table mgt
+ create_table/1, create_table/2, delete_table/1,
+ add_table_copy/3, del_table_copy/2, move_table_copy/3,
+ add_table_index/2, del_table_index/2,
+ transform_table/3, transform_table/4,
+ change_table_copy_type/3,
+ read_table_property/2, write_table_property/2, delete_table_property/2,
+ change_table_frag/2,
+ clear_table/1, clear_table/4,
+
+ %% Table load
+ dump_tables/1, wait_for_tables/2, force_load_table/1,
+ change_table_access_mode/2, change_table_load_order/2,
+ set_master_nodes/1, set_master_nodes/2,
+
+ %% Misc admin
+ dump_log/0, subscribe/1, unsubscribe/1, report_event/1,
+
+ %% Snmp
+ snmp_open_table/2, snmp_close_table/1,
+ snmp_get_row/2, snmp_get_next_index/2, snmp_get_mnesia_key/2,
+
+ %% Textfile access
+ load_textfile/1, dump_to_textfile/1,
+
+ %% QLC functions
+ table/1, table/2,
+
+ %% Mnemosyne exclusive
+ get_activity_id/0, put_activity_id/1, % Not for public use
+
+ %% Mnesia internal functions
+ dirty_rpc/4, % Not for public use
+ has_var/1, fun_select/7, fun_select/10, select_cont/3, dirty_sel_init/5,
+ foldl/6, foldr/6,
+
+ %% Module internal callback functions
+ raw_table_info/2, % Not for public use
+ remote_dirty_match_object/2, % Not for public use
+ remote_dirty_select/2 % Not for public use
+ ]).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+-include("mnesia.hrl").
+-import(mnesia_lib, [verbose/2]).
+
+-define(DEFAULT_ACCESS, ?MODULE).
+
+%% Select
+-define(PATTERN_TO_OBJECT_MATCH_SPEC(Pat), [{Pat,[],['$_']}]).
+-define(PATTERN_TO_BINDINGS_MATCH_SPEC(Pat), [{Pat,[],['$$']}]).
+
+%% Local function in order to avoid external function call
+val(Var) ->
+ case ?catch_val(Var) of
+ {'EXIT', Reason} -> mnesia_lib:other_val(Var, Reason);
+ Value -> Value
+ end.
+
+is_dollar_digits(Var) ->
+ case atom_to_list(Var) of
+ [$$ | Digs] ->
+ is_digits(Digs);
+ _ ->
+ false
+ end.
+
+is_digits([Dig | Tail]) ->
+ if
+ $0 =< Dig, Dig =< $9 ->
+ is_digits(Tail);
+ true ->
+ false
+ end;
+is_digits([]) ->
+ true.
+
+has_var(X) when is_atom(X) ->
+ if
+ X == '_' ->
+ true;
+ is_atom(X) ->
+ is_dollar_digits(X);
+ true ->
+ false
+ end;
+has_var(X) when is_tuple(X) ->
+ e_has_var(X, tuple_size(X));
+has_var([H|T]) ->
+ case has_var(H) of
+ false -> has_var(T);
+ Other -> Other
+ end;
+has_var(_) -> false.
+
+e_has_var(_, 0) -> false;
+e_has_var(X, Pos) ->
+ case has_var(element(Pos, X))of
+ false -> e_has_var(X, Pos-1);
+ Other -> Other
+ end.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Start and stop
+
+start() ->
+ {Time , Res} = timer:tc(application, start, [?APPLICATION, temporary]),
+
+ Secs = Time div 1000000,
+ case Res of
+ ok ->
+ verbose("Mnesia started, ~p seconds~n",[ Secs]),
+ ok;
+ {error, {already_started, mnesia}} ->
+ verbose("Mnesia already started, ~p seconds~n",[ Secs]),
+ ok;
+ {error, R} ->
+ verbose("Mnesia failed to start, ~p seconds: ~p~n",[ Secs, R]),
+ {error, R}
+ end.
+
+start(ExtraEnv) when is_list(ExtraEnv) ->
+ case mnesia_lib:ensure_loaded(?APPLICATION) of
+ ok ->
+ patched_start(ExtraEnv);
+ Error ->
+ Error
+ end;
+start(ExtraEnv) ->
+ {error, {badarg, ExtraEnv}}.
+
+patched_start([{Env, Val} | Tail]) when is_atom(Env) ->
+ case mnesia_monitor:patch_env(Env, Val) of
+ {error, Reason} ->
+ {error, Reason};
+ _NewVal ->
+ patched_start(Tail)
+ end;
+patched_start([Head | _]) ->
+ {error, {bad_type, Head}};
+patched_start([]) ->
+ start().
+
+stop() ->
+ case application:stop(?APPLICATION) of
+ ok -> stopped;
+ {error, {not_started, ?APPLICATION}} -> stopped;
+ Other -> Other
+ end.
+
+change_config(extra_db_nodes, Ns) when is_list(Ns) ->
+ mnesia_controller:connect_nodes(Ns);
+change_config(dc_dump_limit, N) when is_number(N), N > 0 ->
+ case mnesia_lib:is_running() of
+ yes ->
+ mnesia_lib:set(dc_dump_limit, N),
+ {ok, N};
+ _ ->
+ {error, {not_started, ?APPLICATION}}
+ end;
+change_config(BadKey, _BadVal) ->
+ {error, {badarg, BadKey}}.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Debugging
+
+set_debug_level(Level) ->
+ mnesia_subscr:set_debug_level(Level).
+
+lkill() ->
+ mnesia_sup:kill().
+
+kill() ->
+ rpc:multicall(mnesia_sup, kill, []).
+
+ms() ->
+ [
+ mnesia,
+ mnesia_backup,
+ mnesia_bup,
+ mnesia_checkpoint,
+ mnesia_checkpoint_sup,
+ mnesia_controller,
+ mnesia_dumper,
+ mnesia_loader,
+ mnesia_frag,
+ mnesia_frag_hash,
+ mnesia_frag_old_hash,
+ mnesia_index,
+ mnesia_kernel_sup,
+ mnesia_late_loader,
+ mnesia_lib,
+ mnesia_log,
+ mnesia_registry,
+ mnesia_schema,
+ mnesia_snmp_hook,
+ mnesia_snmp_sup,
+ mnesia_subscr,
+ mnesia_sup,
+ mnesia_text,
+ mnesia_tm,
+ mnesia_recover,
+ mnesia_locker,
+
+ %% Keep these last in the list, so
+ %% mnesia_sup kills these last
+ mnesia_monitor,
+ mnesia_event
+ ].
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Activity mgt
+
+-spec(abort/1 :: (_) -> no_return()).
+
+abort(Reason) ->
+ exit({aborted, Reason}).
+
+is_transaction() ->
+ case get(mnesia_activity_state) of
+ {_, Tid, _Ts} when element(1,Tid) == tid ->
+ true;
+ _ ->
+ false
+ end.
+
+transaction(Fun) ->
+ transaction(get(mnesia_activity_state), Fun, [], infinity, ?DEFAULT_ACCESS, async).
+transaction(Fun, Retries) when is_integer(Retries), Retries >= 0 ->
+ transaction(get(mnesia_activity_state), Fun, [], Retries, ?DEFAULT_ACCESS, async);
+transaction(Fun, Retries) when Retries == infinity ->
+ transaction(get(mnesia_activity_state), Fun, [], Retries, ?DEFAULT_ACCESS, async);
+transaction(Fun, Args) ->
+ transaction(get(mnesia_activity_state), Fun, Args, infinity, ?DEFAULT_ACCESS, async).
+transaction(Fun, Args, Retries) ->
+ transaction(get(mnesia_activity_state), Fun, Args, Retries, ?DEFAULT_ACCESS, async).
+
+sync_transaction(Fun) ->
+ transaction(get(mnesia_activity_state), Fun, [], infinity, ?DEFAULT_ACCESS, sync).
+sync_transaction(Fun, Retries) when is_integer(Retries), Retries >= 0 ->
+ transaction(get(mnesia_activity_state), Fun, [], Retries, ?DEFAULT_ACCESS, sync);
+sync_transaction(Fun, Retries) when Retries == infinity ->
+ transaction(get(mnesia_activity_state), Fun, [], Retries, ?DEFAULT_ACCESS, sync);
+sync_transaction(Fun, Args) ->
+ transaction(get(mnesia_activity_state), Fun, Args, infinity, ?DEFAULT_ACCESS, sync).
+sync_transaction(Fun, Args, Retries) ->
+ transaction(get(mnesia_activity_state), Fun, Args, Retries, ?DEFAULT_ACCESS, sync).
+
+
+transaction(State, Fun, Args, Retries, Mod, Kind)
+ when is_function(Fun), is_list(Args), Retries == infinity, is_atom(Mod) ->
+ mnesia_tm:transaction(State, Fun, Args, Retries, Mod, Kind);
+transaction(State, Fun, Args, Retries, Mod, Kind)
+ when is_function(Fun), is_list(Args), is_integer(Retries), Retries >= 0, is_atom(Mod) ->
+ mnesia_tm:transaction(State, Fun, Args, Retries, Mod, Kind);
+transaction(_State, Fun, Args, Retries, Mod, _Kind) ->
+ {aborted, {badarg, Fun, Args, Retries, Mod}}.
+
+non_transaction(State, Fun, Args, ActivityKind, Mod)
+ when is_function(Fun), is_list(Args), is_atom(Mod) ->
+ mnesia_tm:non_transaction(State, Fun, Args, ActivityKind, Mod);
+non_transaction(_State, Fun, Args, _ActivityKind, _Mod) ->
+ {aborted, {badarg, Fun, Args}}.
+
+async_dirty(Fun) ->
+ async_dirty(Fun, []).
+async_dirty(Fun, Args) ->
+ non_transaction(get(mnesia_activity_state), Fun, Args, async_dirty, ?DEFAULT_ACCESS).
+
+sync_dirty(Fun) ->
+ sync_dirty(Fun, []).
+sync_dirty(Fun, Args) ->
+ non_transaction(get(mnesia_activity_state), Fun, Args, sync_dirty, ?DEFAULT_ACCESS).
+
+ets(Fun) ->
+ ets(Fun, []).
+ets(Fun, Args) ->
+ non_transaction(get(mnesia_activity_state), Fun, Args, ets, ?DEFAULT_ACCESS).
+
+activity(Kind, Fun) ->
+ activity(Kind, Fun, []).
+activity(Kind, Fun, Args) when is_list(Args) ->
+ activity(Kind, Fun, Args, mnesia_monitor:get_env(access_module));
+activity(Kind, Fun, Mod) ->
+ activity(Kind, Fun, [], Mod).
+
+activity(Kind, Fun, Args, Mod) ->
+ State = get(mnesia_activity_state),
+ case Kind of
+ ets -> non_transaction(State, Fun, Args, Kind, Mod);
+ async_dirty -> non_transaction(State, Fun, Args, Kind, Mod);
+ sync_dirty -> non_transaction(State, Fun, Args, Kind, Mod);
+ transaction -> wrap_trans(State, Fun, Args, infinity, Mod, async);
+ {transaction, Retries} -> wrap_trans(State, Fun, Args, Retries, Mod, async);
+ sync_transaction -> wrap_trans(State, Fun, Args, infinity, Mod, sync);
+ {sync_transaction, Retries} -> wrap_trans(State, Fun, Args, Retries, Mod, sync);
+ _ -> {aborted, {bad_type, Kind}}
+ end.
+
+wrap_trans(State, Fun, Args, Retries, Mod, Kind) ->
+ case transaction(State, Fun, Args, Retries, Mod, Kind) of
+ {atomic, GoodRes} -> GoodRes;
+ BadRes -> exit(BadRes)
+ end.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Access within an activity - lock acquisition
+
+%% Grab a lock on an item in the global lock table
+%% Item may be any term. Lock may be write or read.
+%% write lock is set on all the given nodes
+%% read lock is only set on the first node
+%% Nodes may either be a list of nodes or one node as an atom
+%% Mnesia on all Nodes must be connected to each other, but
+%% it is not neccessary that they are up and running.
+
+lock(LockItem, LockKind) ->
+ case get(mnesia_activity_state) of
+ {?DEFAULT_ACCESS, Tid, Ts} ->
+ lock(Tid, Ts, LockItem, LockKind);
+ {Mod, Tid, Ts} ->
+ Mod:lock(Tid, Ts, LockItem, LockKind);
+ _ ->
+ abort(no_transaction)
+ end.
+
+lock(Tid, Ts, LockItem, LockKind) ->
+ case element(1, Tid) of
+ tid ->
+ case LockItem of
+ {record, Tab, Key} ->
+ lock_record(Tid, Ts, Tab, Key, LockKind);
+ {table, Tab} ->
+ lock_table(Tid, Ts, Tab, LockKind);
+ {global, GlobalKey, Nodes} ->
+ global_lock(Tid, Ts, GlobalKey, LockKind, Nodes);
+ _ ->
+ abort({bad_type, LockItem})
+ end;
+ _Protocol ->
+ []
+ end.
+
+%% Grab a read lock on a whole table
+read_lock_table(Tab) ->
+ lock({table, Tab}, read),
+ ok.
+
+%% Grab a write lock on a whole table
+write_lock_table(Tab) ->
+ lock({table, Tab}, write),
+ ok.
+
+lock_record(Tid, Ts, Tab, Key, LockKind) when is_atom(Tab) ->
+ Store = Ts#tidstore.store,
+ Oid = {Tab, Key},
+ case LockKind of
+ read ->
+ mnesia_locker:rlock(Tid, Store, Oid);
+ write ->
+ mnesia_locker:wlock(Tid, Store, Oid);
+ sticky_write ->
+ mnesia_locker:sticky_wlock(Tid, Store, Oid);
+ none ->
+ [];
+ _ ->
+ abort({bad_type, Tab, LockKind})
+ end;
+lock_record(_Tid, _Ts, Tab, _Key, _LockKind) ->
+ abort({bad_type, Tab}).
+
+lock_table(Tid, Ts, Tab, LockKind) when is_atom(Tab) ->
+ Store = Ts#tidstore.store,
+ case LockKind of
+ read ->
+ mnesia_locker:rlock_table(Tid, Store, Tab);
+ write ->
+ mnesia_locker:wlock_table(Tid, Store, Tab);
+ sticky_write ->
+ mnesia_locker:sticky_wlock_table(Tid, Store, Tab);
+ none ->
+ [];
+ _ ->
+ abort({bad_type, Tab, LockKind})
+ end;
+lock_table(_Tid, _Ts, Tab, _LockKind) ->
+ abort({bad_type, Tab}).
+
+global_lock(Tid, Ts, Item, Kind, Nodes) when is_list(Nodes) ->
+ case element(1, Tid) of
+ tid ->
+ Store = Ts#tidstore.store,
+ GoodNs = good_global_nodes(Nodes),
+ if
+ Kind /= read, Kind /= write ->
+ abort({bad_type, Kind});
+ true ->
+ mnesia_locker:global_lock(Tid, Store, Item, Kind, GoodNs)
+ end;
+ _Protocol ->
+ []
+ end;
+global_lock(_Tid, _Ts, _Item, _Kind, Nodes) ->
+ abort({bad_type, Nodes}).
+
+good_global_nodes(Nodes) ->
+ Recover = [node() | val(recover_nodes)],
+ mnesia_lib:intersect(Nodes, Recover).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Access within an activity - updates
+
+write(Val) when is_tuple(Val), tuple_size(Val) > 2 ->
+ Tab = element(1, Val),
+ write(Tab, Val, write);
+write(Val) ->
+ abort({bad_type, Val}).
+
+s_write(Val) when is_tuple(Val), tuple_size(Val) > 2 ->
+ Tab = element(1, Val),
+ write(Tab, Val, sticky_write).
+
+write(Tab, Val, LockKind) ->
+ case get(mnesia_activity_state) of
+ {?DEFAULT_ACCESS, Tid, Ts} ->
+ write(Tid, Ts, Tab, Val, LockKind);
+ {Mod, Tid, Ts} ->
+ Mod:write(Tid, Ts, Tab, Val, LockKind);
+ _ ->
+ abort(no_transaction)
+ end.
+
+write(Tid, Ts, Tab, Val, LockKind)
+ when is_atom(Tab), Tab /= schema, is_tuple(Val), tuple_size(Val) > 2 ->
+ case element(1, Tid) of
+ ets ->
+ ?ets_insert(Tab, Val),
+ ok;
+ tid ->
+ Store = Ts#tidstore.store,
+ Oid = {Tab, element(2, Val)},
+ case LockKind of
+ write ->
+ mnesia_locker:wlock(Tid, Store, Oid);
+ sticky_write ->
+ mnesia_locker:sticky_wlock(Tid, Store, Oid);
+ _ ->
+ abort({bad_type, Tab, LockKind})
+ end,
+ write_to_store(Tab, Store, Oid, Val);
+ Protocol ->
+ do_dirty_write(Protocol, Tab, Val)
+ end;
+write(_Tid, _Ts, Tab, Val, LockKind) ->
+ abort({bad_type, Tab, Val, LockKind}).
+
+write_to_store(Tab, Store, Oid, Val) ->
+ case ?catch_val({Tab, record_validation}) of
+ {RecName, Arity, Type}
+ when tuple_size(Val) == Arity, RecName == element(1, Val) ->
+ case Type of
+ bag ->
+ ?ets_insert(Store, {Oid, Val, write});
+ _ ->
+ ?ets_delete(Store, Oid),
+ ?ets_insert(Store, {Oid, Val, write})
+ end,
+ ok;
+ {'EXIT', _} ->
+ abort({no_exists, Tab});
+ _ ->
+ abort({bad_type, Val})
+ end.
+
+delete({Tab, Key}) ->
+ delete(Tab, Key, write);
+delete(Oid) ->
+ abort({bad_type, Oid}).
+
+s_delete({Tab, Key}) ->
+ delete(Tab, Key, sticky_write);
+s_delete(Oid) ->
+ abort({bad_type, Oid}).
+
+delete(Tab, Key, LockKind) ->
+ case get(mnesia_activity_state) of
+ {?DEFAULT_ACCESS, Tid, Ts} ->
+ delete(Tid, Ts, Tab, Key, LockKind);
+ {Mod, Tid, Ts} ->
+ Mod:delete(Tid, Ts, Tab, Key, LockKind);
+ _ ->
+ abort(no_transaction)
+ end.
+
+delete(Tid, Ts, Tab, Key, LockKind)
+ when is_atom(Tab), Tab /= schema ->
+ case element(1, Tid) of
+ ets ->
+ ?ets_delete(Tab, Key),
+ ok;
+ tid ->
+ Store = Ts#tidstore.store,
+ Oid = {Tab, Key},
+ case LockKind of
+ write ->
+ mnesia_locker:wlock(Tid, Store, Oid);
+ sticky_write ->
+ mnesia_locker:sticky_wlock(Tid, Store, Oid);
+ _ ->
+ abort({bad_type, Tab, LockKind})
+ end,
+ ?ets_delete(Store, Oid),
+ ?ets_insert(Store, {Oid, Oid, delete}),
+ ok;
+ Protocol ->
+ do_dirty_delete(Protocol, Tab, Key)
+ end;
+delete(_Tid, _Ts, Tab, _Key, _LockKind) ->
+ abort({bad_type, Tab}).
+
+delete_object(Val) when is_tuple(Val), tuple_size(Val) > 2 ->
+ Tab = element(1, Val),
+ delete_object(Tab, Val, write);
+delete_object(Val) ->
+ abort({bad_type, Val}).
+
+s_delete_object(Val) when is_tuple(Val), tuple_size(Val) > 2 ->
+ Tab = element(1, Val),
+ delete_object(Tab, Val, sticky_write);
+s_delete_object(Val) ->
+ abort({bad_type, Val}).
+
+delete_object(Tab, Val, LockKind) ->
+ case get(mnesia_activity_state) of
+ {?DEFAULT_ACCESS, Tid, Ts} ->
+ delete_object(Tid, Ts, Tab, Val, LockKind);
+ {Mod, Tid, Ts} ->
+ Mod:delete_object(Tid, Ts, Tab, Val, LockKind);
+ _ ->
+ abort(no_transaction)
+ end.
+
+delete_object(Tid, Ts, Tab, Val, LockKind)
+ when is_atom(Tab), Tab /= schema, is_tuple(Val), tuple_size(Val) > 2 ->
+ case has_var(Val) of
+ false ->
+ do_delete_object(Tid, Ts, Tab, Val, LockKind);
+ true ->
+ abort({bad_type, Tab, Val})
+ end;
+delete_object(_Tid, _Ts, Tab, _Key, _LockKind) ->
+ abort({bad_type, Tab}).
+
+do_delete_object(Tid, Ts, Tab, Val, LockKind) ->
+ case element(1, Tid) of
+ ets ->
+ ?ets_match_delete(Tab, Val),
+ ok;
+ tid ->
+ Store = Ts#tidstore.store,
+ Oid = {Tab, element(2, Val)},
+ case LockKind of
+ write ->
+ mnesia_locker:wlock(Tid, Store, Oid);
+ sticky_write ->
+ mnesia_locker:sticky_wlock(Tid, Store, Oid);
+ _ ->
+ abort({bad_type, Tab, LockKind})
+ end,
+ case val({Tab, setorbag}) of
+ bag ->
+ ?ets_match_delete(Store, {Oid, Val, '_'}),
+ ?ets_insert(Store, {Oid, Val, delete_object});
+ _ ->
+ case ?ets_match_object(Store, {Oid, '_', write}) of
+ [] ->
+ ?ets_match_delete(Store, {Oid, Val, '_'}),
+ ?ets_insert(Store, {Oid, Val, delete_object});
+ _ ->
+ ?ets_delete(Store, Oid),
+ ?ets_insert(Store, {Oid, Oid, delete})
+ end
+ end,
+ ok;
+ Protocol ->
+ do_dirty_delete_object(Protocol, Tab, Val)
+ end.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Access within an activity - read
+
+read(Tab, Key) ->
+ read(Tab, Key, read).
+
+read({Tab, Key}) ->
+ read(Tab, Key, read);
+read(Oid) ->
+ abort({bad_type, Oid}).
+
+wread({Tab, Key}) ->
+ read(Tab, Key, write);
+wread(Oid) ->
+ abort({bad_type, Oid}).
+
+read(Tab, Key, LockKind) ->
+ case get(mnesia_activity_state) of
+ {?DEFAULT_ACCESS, Tid, Ts} ->
+ read(Tid, Ts, Tab, Key, LockKind);
+ {Mod, Tid, Ts} ->
+ Mod:read(Tid, Ts, Tab, Key, LockKind);
+ _ ->
+ abort(no_transaction)
+ end.
+
+read(Tid, Ts, Tab, Key, LockKind)
+ when is_atom(Tab), Tab /= schema ->
+ case element(1, Tid) of
+ ets ->
+ ?ets_lookup(Tab, Key);
+ tid ->
+ Store = Ts#tidstore.store,
+ Oid = {Tab, Key},
+ Objs =
+ case LockKind of
+ read ->
+ mnesia_locker:rlock(Tid, Store, Oid);
+ write ->
+ mnesia_locker:rwlock(Tid, Store, Oid);
+ sticky_write ->
+ mnesia_locker:sticky_rwlock(Tid, Store, Oid);
+ _ ->
+ abort({bad_type, Tab, LockKind})
+ end,
+ add_written(?ets_lookup(Store, Oid), Tab, Objs);
+ _Protocol ->
+ dirty_read(Tab, Key)
+ end;
+read(_Tid, _Ts, Tab, _Key, _LockKind) ->
+ abort({bad_type, Tab}).
+
+first(Tab) ->
+ case get(mnesia_activity_state) of
+ {?DEFAULT_ACCESS, Tid, Ts} ->
+ first(Tid, Ts, Tab);
+ {Mod, Tid, Ts} ->
+ Mod:first(Tid, Ts, Tab);
+ _ ->
+ abort(no_transaction)
+ end.
+
+first(Tid, Ts, Tab)
+ when is_atom(Tab), Tab /= schema ->
+ case element(1, Tid) of
+ ets ->
+ ?ets_first(Tab);
+ tid ->
+ lock_table(Tid, Ts, Tab, read),
+ do_fixtable(Tab,Ts),
+ Key = dirty_first(Tab),
+ stored_keys(Tab,Key,'$end_of_table',Ts,next,
+ val({Tab, setorbag}));
+ _Protocol ->
+ dirty_first(Tab)
+ end;
+first(_Tid, _Ts,Tab) ->
+ abort({bad_type, Tab}).
+
+last(Tab) ->
+ case get(mnesia_activity_state) of
+ {?DEFAULT_ACCESS, Tid, Ts} ->
+ last(Tid, Ts, Tab);
+ {Mod, Tid, Ts} ->
+ Mod:last(Tid, Ts, Tab);
+ _ ->
+ abort(no_transaction)
+ end.
+
+last(Tid, Ts, Tab)
+ when is_atom(Tab), Tab /= schema ->
+ case element(1, Tid) of
+ ets ->
+ ?ets_last(Tab);
+ tid ->
+ lock_table(Tid, Ts, Tab, read),
+ do_fixtable(Tab,Ts),
+ Key = dirty_last(Tab),
+ stored_keys(Tab,Key,'$end_of_table',Ts,prev,
+ val({Tab, setorbag}));
+ _Protocol ->
+ dirty_last(Tab)
+ end;
+last(_Tid, _Ts,Tab) ->
+ abort({bad_type, Tab}).
+
+next(Tab,Key) ->
+ case get(mnesia_activity_state) of
+ {?DEFAULT_ACCESS,Tid,Ts} ->
+ next(Tid,Ts,Tab,Key);
+ {Mod,Tid,Ts} ->
+ Mod:next(Tid,Ts,Tab,Key);
+ _ ->
+ abort(no_transaction)
+ end.
+next(Tid,Ts,Tab,Key)
+ when is_atom(Tab), Tab /= schema ->
+ case element(1, Tid) of
+ ets ->
+ ?ets_next(Tab,Key);
+ tid ->
+ lock_table(Tid, Ts, Tab, read),
+ do_fixtable(Tab,Ts),
+ New = (catch dirty_next(Tab,Key)),
+ stored_keys(Tab,New,Key,Ts,next,
+ val({Tab, setorbag}));
+ _Protocol ->
+ dirty_next(Tab,Key)
+ end;
+next(_Tid, _Ts,Tab,_) ->
+ abort({bad_type, Tab}).
+
+prev(Tab,Key) ->
+ case get(mnesia_activity_state) of
+ {?DEFAULT_ACCESS,Tid,Ts} ->
+ prev(Tid,Ts,Tab,Key);
+ {Mod,Tid,Ts} ->
+ Mod:prev(Tid,Ts,Tab,Key);
+ _ ->
+ abort(no_transaction)
+ end.
+prev(Tid,Ts,Tab,Key)
+ when is_atom(Tab), Tab /= schema ->
+ case element(1, Tid) of
+ ets ->
+ ?ets_prev(Tab,Key);
+ tid ->
+ lock_table(Tid, Ts, Tab, read),
+ do_fixtable(Tab,Ts),
+ New = (catch dirty_prev(Tab,Key)),
+ stored_keys(Tab,New,Key,Ts,prev,
+ val({Tab, setorbag}));
+ _Protocol ->
+ dirty_prev(Tab,Key)
+ end;
+prev(_Tid, _Ts,Tab,_) ->
+ abort({bad_type, Tab}).
+
+%% Compensate for transaction written and/or deleted records
+stored_keys(Tab,'$end_of_table',Prev,Ts,Op,Type) ->
+ case ts_keys(Ts#tidstore.store,Tab,Op,Type,[]) of
+ [] -> '$end_of_table';
+ Keys when Type == ordered_set->
+ get_ordered_tskey(Prev,Keys,Op);
+ Keys ->
+ get_next_tskey(Prev,Keys,Tab)
+ end;
+stored_keys(Tab,{'EXIT',{aborted,R={badarg,[Tab,Key]}}},
+ Key,#tidstore{store=Store},Op,Type) ->
+ %% Had to match on error, ouch..
+ case ?ets_match(Store, {{Tab, Key}, '_', '$1'}) of
+ [] -> abort(R);
+ Ops ->
+ case lists:last(Ops) of
+ [delete] -> abort(R);
+ _ ->
+ case ts_keys(Store,Tab,Op,Type,[]) of
+ [] -> '$end_of_table';
+ Keys -> get_next_tskey(Key,Keys,Tab)
+ end
+ end
+ end;
+stored_keys(_,{'EXIT',{aborted,R}},_,_,_,_) ->
+ abort(R);
+stored_keys(Tab,Key,Prev,#tidstore{store=Store},Op,ordered_set) ->
+ case ?ets_match(Store, {{Tab, Key}, '_', '$1'}) of
+ [] ->
+ Keys = ts_keys(Store,Tab,Op,ordered_set,[Key]),
+ get_ordered_tskey(Prev,Keys,Op);
+ Ops ->
+ case lists:last(Ops) of
+ [delete] ->
+ mnesia:Op(Tab,Key);
+ _ ->
+ Keys = ts_keys(Store,Tab,Op,ordered_set,[Key]),
+ get_ordered_tskey(Prev,Keys,Op)
+ end
+ end;
+stored_keys(Tab,Key,_,#tidstore{store=Store},Op,_) ->
+ case ?ets_match(Store, {{Tab, Key}, '_', '$1'}) of
+ [] -> Key;
+ Ops ->
+ case lists:last(Ops) of
+ [delete] -> mnesia:Op(Tab,Key);
+ _ -> Key
+ end
+ end.
+
+get_ordered_tskey('$end_of_table', [First|_],_) -> First;
+get_ordered_tskey(Prev, [First|_], next) when Prev < First -> First;
+get_ordered_tskey(Prev, [First|_], prev) when Prev > First -> First;
+get_ordered_tskey(Prev, [_|R],Op) -> get_ordered_tskey(Prev,R,Op);
+get_ordered_tskey(_, [],_) -> '$end_of_table'.
+
+get_next_tskey(Key,Keys,Tab) ->
+ Next =
+ if Key == '$end_of_table' -> hd(Keys);
+ true ->
+ case lists:dropwhile(fun(A) -> A /= Key end, Keys) of
+ [] -> hd(Keys); %% First stored key
+ [Key] -> '$end_of_table';
+ [Key,Next2|_] -> Next2
+ end
+ end,
+ case Next of
+ '$end_of_table' -> '$end_of_table';
+ _ -> %% Really slow anybody got another solution??
+ case dirty_read(Tab, Next) of
+ [] -> Next;
+ _ ->
+ %% Updated value we already returned this key
+ get_next_tskey(Next,Keys,Tab)
+ end
+ end.
+
+ts_keys(Store, Tab, Op, Type, Def) ->
+ All = ?ets_match(Store, {{Tab,'$1'},'_','$2'}),
+ Keys = ts_keys_1(All, Def),
+ if
+ Type == ordered_set, Op == prev ->
+ lists:reverse(lists:sort(Keys));
+ Type == ordered_set ->
+ lists:sort(Keys);
+ Op == next ->
+ lists:reverse(Keys);
+ true ->
+ Keys
+ end.
+
+ts_keys_1([[Key, write]|R], []) ->
+ ts_keys_1(R, [Key]);
+ts_keys_1([[Key, write]|R], Acc=[Key|_]) ->
+ ts_keys_1(R, Acc);
+ts_keys_1([[Key, write]|R], Acc) ->
+ ts_keys_1(R, [Key|Acc]);
+ts_keys_1([[Key, delete]|R], [Key|Acc]) ->
+ ts_keys_1(R, Acc);
+ts_keys_1([_|R], Acc) ->
+ ts_keys_1(R, Acc);
+ts_keys_1([], Acc) ->
+ Acc.
+
+
+%%%%%%%%%%%%%%%%%%%%%
+%% Iterators
+
+foldl(Fun, Acc, Tab) ->
+ foldl(Fun, Acc, Tab, read).
+
+foldl(Fun, Acc, Tab, LockKind) when is_function(Fun) ->
+ case get(mnesia_activity_state) of
+ {?DEFAULT_ACCESS, Tid, Ts} ->
+ foldl(Tid, Ts, Fun, Acc, Tab, LockKind);
+ {Mod, Tid, Ts} ->
+ Mod:foldl(Tid, Ts, Fun, Acc, Tab, LockKind);
+ _ ->
+ abort(no_transaction)
+ end.
+
+foldl(ActivityId, Opaque, Fun, Acc, Tab, LockKind) ->
+ {Type, Prev} = init_iteration(ActivityId, Opaque, Tab, LockKind),
+ Res = (catch do_foldl(ActivityId, Opaque, Tab, dirty_first(Tab), Fun, Acc, Type, Prev)),
+ close_iteration(Res, Tab).
+
+do_foldl(A, O, Tab, '$end_of_table', Fun, RAcc, _Type, Stored) ->
+ lists:foldl(fun(Key, Acc) ->
+ lists:foldl(Fun, Acc, read(A, O, Tab, Key, read))
+ end, RAcc, Stored);
+do_foldl(A, O, Tab, Key, Fun, Acc, ordered_set, [H | Stored]) when H == Key ->
+ NewAcc = lists:foldl(Fun, Acc, read(A, O, Tab, Key, read)),
+ {_, Tid, Ts} = get(mnesia_activity_state),
+ do_foldl(Tid, Ts, Tab, dirty_next(Tab, Key), Fun, NewAcc, ordered_set, Stored);
+do_foldl(A, O, Tab, Key, Fun, Acc, ordered_set, [H | Stored]) when H < Key ->
+ NewAcc = lists:foldl(Fun, Acc, read(A, O, Tab, H, read)),
+ {_, Tid, Ts} = get(mnesia_activity_state),
+ do_foldl(Tid, Ts, Tab, Key, Fun, NewAcc, ordered_set, Stored);
+do_foldl(A, O, Tab, Key, Fun, Acc, ordered_set, [H | Stored]) when H > Key ->
+ NewAcc = lists:foldl(Fun, Acc, read(A, O, Tab, Key, read)),
+ {_, Tid, Ts} = get(mnesia_activity_state),
+ do_foldl(Tid, Ts, Tab, dirty_next(Tab, Key), Fun, NewAcc, ordered_set, [H |Stored]);
+do_foldl(A, O, Tab, Key, Fun, Acc, Type, Stored) -> %% Type is set or bag
+ NewAcc = lists:foldl(Fun, Acc, read(A, O, Tab, Key, read)),
+ NewStored = ordsets:del_element(Key, Stored),
+ {_, Tid, Ts} = get(mnesia_activity_state),
+ do_foldl(Tid, Ts, Tab, dirty_next(Tab, Key), Fun, NewAcc, Type, NewStored).
+
+foldr(Fun, Acc, Tab) ->
+ foldr(Fun, Acc, Tab, read).
+foldr(Fun, Acc, Tab, LockKind) when is_function(Fun) ->
+ case get(mnesia_activity_state) of
+ {?DEFAULT_ACCESS, Tid, Ts} ->
+ foldr(Tid, Ts, Fun, Acc, Tab, LockKind);
+ {Mod, Tid, Ts} ->
+ Mod:foldr(Tid, Ts, Fun, Acc, Tab, LockKind);
+ _ ->
+ abort(no_transaction)
+ end.
+
+foldr(ActivityId, Opaque, Fun, Acc, Tab, LockKind) ->
+ {Type, TempPrev} = init_iteration(ActivityId, Opaque, Tab, LockKind),
+ Prev =
+ if
+ Type == ordered_set ->
+ lists:reverse(TempPrev);
+ true -> %% Order doesn't matter for set and bag
+ TempPrev %% Keep the order so we can use ordsets:del_element
+ end,
+ Res = (catch do_foldr(ActivityId, Opaque, Tab, dirty_last(Tab), Fun, Acc, Type, Prev)),
+ close_iteration(Res, Tab).
+
+do_foldr(A, O, Tab, '$end_of_table', Fun, RAcc, _Type, Stored) ->
+ lists:foldl(fun(Key, Acc) ->
+ lists:foldl(Fun, Acc, read(A, O, Tab, Key, read))
+ end, RAcc, Stored);
+do_foldr(A, O, Tab, Key, Fun, Acc, ordered_set, [H | Stored]) when H == Key ->
+ NewAcc = lists:foldl(Fun, Acc, read(A, O, Tab, Key, read)),
+ {_, Tid, Ts} = get(mnesia_activity_state),
+ do_foldr(Tid, Ts, Tab, dirty_prev(Tab, Key), Fun, NewAcc, ordered_set, Stored);
+do_foldr(A, O, Tab, Key, Fun, Acc, ordered_set, [H | Stored]) when H > Key ->
+ NewAcc = lists:foldl(Fun, Acc, read(A, O, Tab, H, read)),
+ {_, Tid, Ts} = get(mnesia_activity_state),
+ do_foldr(Tid, Ts, Tab, Key, Fun, NewAcc, ordered_set, Stored);
+do_foldr(A, O, Tab, Key, Fun, Acc, ordered_set, [H | Stored]) when H < Key ->
+ NewAcc = lists:foldl(Fun, Acc, read(A, O, Tab, Key, read)),
+ {_, Tid, Ts} = get(mnesia_activity_state),
+ do_foldr(Tid, Ts, Tab, dirty_prev(Tab, Key), Fun, NewAcc, ordered_set, [H |Stored]);
+do_foldr(A, O, Tab, Key, Fun, Acc, Type, Stored) -> %% Type is set or bag
+ NewAcc = lists:foldl(Fun, Acc, read(A, O, Tab, Key, read)),
+ NewStored = ordsets:del_element(Key, Stored),
+ {_, Tid, Ts} = get(mnesia_activity_state),
+ do_foldr(Tid, Ts, Tab, dirty_prev(Tab, Key), Fun, NewAcc, Type, NewStored).
+
+init_iteration(ActivityId, Opaque, Tab, LockKind) ->
+ lock(ActivityId, Opaque, {table, Tab}, LockKind),
+ Type = val({Tab, setorbag}),
+ Previous = add_previous(ActivityId, Opaque, Type, Tab),
+ St = val({Tab, storage_type}),
+ if
+ St == unknown ->
+ ignore;
+ true ->
+ mnesia_lib:db_fixtable(St, Tab, true)
+ end,
+ {Type, Previous}.
+
+close_iteration(Res, Tab) ->
+ case val({Tab, storage_type}) of
+ unknown ->
+ ignore;
+ St ->
+ mnesia_lib:db_fixtable(St, Tab, false)
+ end,
+ case Res of
+ {'EXIT', {aborted, What}} ->
+ abort(What);
+ {'EXIT', What} ->
+ abort(What);
+ _ ->
+ Res
+ end.
+
+add_previous(_ActivityId, non_transaction, _Type, _Tab) ->
+ [];
+add_previous(_Tid, Ts, _Type, Tab) ->
+ Previous = ?ets_match(Ts#tidstore.store, {{Tab, '$1'}, '_', write}),
+ lists:sort(lists:concat(Previous)).
+
+%% This routine fixes up the return value from read/1 so that
+%% it is correct with respect to what this particular transaction
+%% has already written, deleted .... etc
+
+add_written([], _Tab, Objs) ->
+ Objs; % standard normal fast case
+add_written(Written, Tab, Objs) ->
+ case val({Tab, setorbag}) of
+ bag ->
+ add_written_to_bag(Written, Objs, []);
+ _ ->
+ add_written_to_set(Written)
+ end.
+
+add_written_to_set(Ws) ->
+ case lists:last(Ws) of
+ {_, _, delete} -> [];
+ {_, Val, write} -> [Val];
+ {_, _, delete_object} -> []
+ end.
+
+add_written_to_bag([{_, Val, write} | Tail], Objs, Ack) ->
+ add_written_to_bag(Tail, lists:delete(Val, Objs), [Val | Ack]);
+add_written_to_bag([], Objs, Ack) ->
+ Objs ++ lists:reverse(Ack); %% Oldest write first as in ets
+add_written_to_bag([{_, _ , delete} | Tail], _Objs, _Ack) ->
+ %% This transaction just deleted all objects
+ %% with this key
+ add_written_to_bag(Tail, [], []);
+add_written_to_bag([{_, Val, delete_object} | Tail], Objs, Ack) ->
+ add_written_to_bag(Tail, lists:delete(Val, Objs), lists:delete(Val, Ack)).
+
+match_object(Pat) when is_tuple(Pat), tuple_size(Pat) > 2 ->
+ Tab = element(1, Pat),
+ match_object(Tab, Pat, read);
+match_object(Pat) ->
+ abort({bad_type, Pat}).
+
+match_object(Tab, Pat, LockKind) ->
+ case get(mnesia_activity_state) of
+ {?DEFAULT_ACCESS, Tid, Ts} ->
+ match_object(Tid, Ts, Tab, Pat, LockKind);
+ {Mod, Tid, Ts} ->
+ Mod:match_object(Tid, Ts, Tab, Pat, LockKind);
+ _ ->
+ abort(no_transaction)
+ end.
+
+match_object(Tid, Ts, Tab, Pat, LockKind)
+ when is_atom(Tab), Tab /= schema, is_tuple(Pat), tuple_size(Pat) > 2 ->
+ case element(1, Tid) of
+ ets ->
+ mnesia_lib:db_match_object(ram_copies, Tab, Pat);
+ tid ->
+ Key = element(2, Pat),
+ case has_var(Key) of
+ false -> lock_record(Tid, Ts, Tab, Key, LockKind);
+ true -> lock_table(Tid, Ts, Tab, LockKind)
+ end,
+ Objs = dirty_match_object(Tab, Pat),
+ add_written_match(Ts#tidstore.store, Pat, Tab, Objs);
+ _Protocol ->
+ dirty_match_object(Tab, Pat)
+ end;
+match_object(_Tid, _Ts, Tab, Pat, _LockKind) ->
+ abort({bad_type, Tab, Pat}).
+
+add_written_match(S, Pat, Tab, Objs) ->
+ Ops = find_ops(S, Tab, Pat),
+ add_match(Ops, Objs, val({Tab, setorbag})).
+
+find_ops(S, Tab, Pat) ->
+ GetWritten = [{{{Tab, '_'}, Pat, write}, [], ['$_']},
+ {{{Tab, '_'}, '_', delete}, [], ['$_']},
+ {{{Tab, '_'}, Pat, delete_object}, [], ['$_']}],
+ ets:select(S, GetWritten).
+
+add_match([], Objs, _Type) ->
+ Objs;
+add_match(Written, Objs, ordered_set) ->
+ %% Must use keysort which is stable
+ add_ordered_match(lists:keysort(1,Written), Objs, []);
+add_match([{Oid, _, delete}|R], Objs, Type) ->
+ add_match(R, deloid(Oid, Objs), Type);
+add_match([{_Oid, Val, delete_object}|R], Objs, Type) ->
+ add_match(R, lists:delete(Val, Objs), Type);
+add_match([{_Oid, Val, write}|R], Objs, bag) ->
+ add_match(R, [Val | lists:delete(Val, Objs)], bag);
+add_match([{Oid, Val, write}|R], Objs, set) ->
+ add_match(R, [Val | deloid(Oid,Objs)],set).
+
+%% For ordered_set only !!
+add_ordered_match(Written = [{{_, Key}, _, _}|_], [Obj|Objs], Acc)
+ when Key > element(2, Obj) ->
+ add_ordered_match(Written, Objs, [Obj|Acc]);
+add_ordered_match([{{_, Key}, Val, write}|Rest], Objs =[Obj|_], Acc)
+ when Key < element(2, Obj) ->
+ add_ordered_match(Rest, [Val|Objs],Acc);
+add_ordered_match([{{_, Key}, _, _DelOP}|Rest], Objs =[Obj|_], Acc)
+ when Key < element(2, Obj) ->
+ add_ordered_match(Rest,Objs,Acc);
+%% Greater than last object
+add_ordered_match([{_, Val, write}|Rest], [], Acc) ->
+ add_ordered_match(Rest, [Val], Acc);
+add_ordered_match([_|Rest], [], Acc) ->
+ add_ordered_match(Rest, [], Acc);
+%% Keys are equal from here
+add_ordered_match([{_, Val, write}|Rest], [_Obj|Objs], Acc) ->
+ add_ordered_match(Rest, [Val|Objs], Acc);
+add_ordered_match([{_, _Val, delete}|Rest], [_Obj|Objs], Acc) ->
+ add_ordered_match(Rest, Objs, Acc);
+add_ordered_match([{_, Val, delete_object}|Rest], [Val|Objs], Acc) ->
+ add_ordered_match(Rest, Objs, Acc);
+add_ordered_match([{_, _, delete_object}|Rest], Objs, Acc) ->
+ add_ordered_match(Rest, Objs, Acc);
+add_ordered_match([], Objs, Acc) ->
+ lists:reverse(Acc, Objs).
+
+%% For select chunk
+add_sel_match(Sorted, Objs, ordered_set) ->
+ add_sel_ordered_match(Sorted, Objs, []);
+add_sel_match(Written, Objs, Type) ->
+ add_sel_match(Written, Objs, Type, []).
+
+add_sel_match([], Objs, _Type, Acc) ->
+ {Objs,lists:reverse(Acc)};
+add_sel_match([Op={Oid, _, delete}|R], Objs, Type, Acc) ->
+ case deloid(Oid, Objs) of
+ Objs ->
+ add_sel_match(R, Objs, Type, [Op|Acc]);
+ NewObjs when Type == set ->
+ add_sel_match(R, NewObjs, Type, Acc);
+ NewObjs -> %% If bag we may get more in next chunk
+ add_sel_match(R, NewObjs, Type, [Op|Acc])
+ end;
+add_sel_match([Op = {_Oid, Val, delete_object}|R], Objs, Type, Acc) ->
+ case lists:delete(Val, Objs) of
+ Objs ->
+ add_sel_match(R, Objs, Type, [Op|Acc]);
+ NewObjs when Type == set ->
+ add_sel_match(R, NewObjs, Type, Acc);
+ NewObjs ->
+ add_sel_match(R, NewObjs, Type, [Op|Acc])
+ end;
+add_sel_match([Op={Oid={_,Key}, Val, write}|R], Objs, bag, Acc) ->
+ case lists:keymember(Key, 2, Objs) of
+ true ->
+ add_sel_match(R,[Val|lists:delete(Val,Objs)],bag,
+ [{Oid,Val,delete_object}|Acc]);
+ false ->
+ add_sel_match(R,Objs,bag,[Op|Acc])
+ end;
+add_sel_match([Op={Oid, Val, write}|R], Objs, set, Acc) ->
+ case deloid(Oid,Objs) of
+ Objs ->
+ add_sel_match(R, Objs,set, [Op|Acc]);
+ NewObjs ->
+ add_sel_match(R, [Val | NewObjs],set, Acc)
+ end.
+
+%% For ordered_set only !!
+add_sel_ordered_match(Written = [{{_, Key}, _, _}|_], [Obj|Objs],Acc)
+ when Key > element(2, Obj) ->
+ add_sel_ordered_match(Written, Objs, [Obj|Acc]);
+add_sel_ordered_match([{{_, Key}, Val, write}|Rest], Objs =[Obj|_],Acc)
+ when Key < element(2, Obj) ->
+ add_sel_ordered_match(Rest,[Val|Objs],Acc);
+add_sel_ordered_match([{{_, Key}, _, _DelOP}|Rest], Objs =[Obj|_], Acc)
+ when Key < element(2, Obj) ->
+ add_sel_ordered_match(Rest,Objs,Acc);
+%% Greater than last object
+add_sel_ordered_match(Ops1, [], Acc) ->
+ {lists:reverse(Acc), Ops1};
+%% Keys are equal from here
+add_sel_ordered_match([{_, Val, write}|Rest], [_Obj|Objs], Acc) ->
+ add_sel_ordered_match(Rest, [Val|Objs], Acc);
+add_sel_ordered_match([{_, _Val, delete}|Rest], [_Obj|Objs], Acc) ->
+ add_sel_ordered_match(Rest, Objs, Acc);
+add_sel_ordered_match([{_, Val, delete_object}|Rest], [Val|Objs], Acc) ->
+ add_sel_ordered_match(Rest, Objs, Acc);
+add_sel_ordered_match([{_, _, delete_object}|Rest], Objs, Acc) ->
+ add_sel_ordered_match(Rest, Objs, Acc);
+add_sel_ordered_match([], Objs, Acc) ->
+ {lists:reverse(Acc, Objs),[]}.
+
+
+deloid(_Oid, []) ->
+ [];
+deloid({Tab, Key}, [H | T]) when element(2, H) == Key ->
+ deloid({Tab, Key}, T);
+deloid(Oid, [H | T]) ->
+ [H | deloid(Oid, T)].
+
+%%%%%%%%%%%%%%%%%%
+% select
+
+select(Tab, Pat) ->
+ select(Tab, Pat, read).
+select(Tab, Pat, LockKind)
+ when is_atom(Tab), Tab /= schema, is_list(Pat) ->
+ case get(mnesia_activity_state) of
+ {?DEFAULT_ACCESS, Tid, Ts} ->
+ select(Tid, Ts, Tab, Pat, LockKind);
+ {Mod, Tid, Ts} ->
+ Mod:select(Tid, Ts, Tab, Pat, LockKind);
+ _ ->
+ abort(no_transaction)
+ end;
+select(Tab, Pat, _Lock) ->
+ abort({badarg, Tab, Pat}).
+
+select(Tid, Ts, Tab, Spec, LockKind) ->
+ SelectFun = fun(FixedSpec) -> dirty_select(Tab, FixedSpec) end,
+ fun_select(Tid, Ts, Tab, Spec, LockKind, Tab, SelectFun).
+
+fun_select(Tid, Ts, Tab, Spec, LockKind, TabPat, SelectFun) ->
+ case element(1, Tid) of
+ ets ->
+ mnesia_lib:db_select(ram_copies, Tab, Spec);
+ tid ->
+ select_lock(Tid,Ts,LockKind,Spec,Tab),
+ Store = Ts#tidstore.store,
+ Written = ?ets_match_object(Store, {{TabPat, '_'}, '_', '_'}),
+ case Written of
+ [] ->
+ %% Nothing changed in the table during this transaction,
+ %% Simple case get results from [d]ets
+ SelectFun(Spec);
+ _ ->
+ %% Hard (slow case) records added or deleted earlier
+ %% in the transaction, have to cope with that.
+ Type = val({Tab, setorbag}),
+ FixedSpec = get_record_pattern(Spec),
+ TabRecs = SelectFun(FixedSpec),
+ FixedRes = add_match(Written, TabRecs, Type),
+ CMS = ets:match_spec_compile(Spec),
+ ets:match_spec_run(FixedRes, CMS)
+ end;
+ _Protocol ->
+ SelectFun(Spec)
+ end.
+
+select_lock(Tid,Ts,LockKind,Spec,Tab) ->
+ %% Avoid table lock if possible
+ case Spec of
+ [{HeadPat,_, _}] when is_tuple(HeadPat), tuple_size(HeadPat) > 2 ->
+ Key = element(2, HeadPat),
+ case has_var(Key) of
+ false -> lock_record(Tid, Ts, Tab, Key, LockKind);
+ true -> lock_table(Tid, Ts, Tab, LockKind)
+ end;
+ _ ->
+ lock_table(Tid, Ts, Tab, LockKind)
+ end.
+
+%% Breakable Select
+select(Tab, Pat, NObjects, LockKind)
+ when is_atom(Tab), Tab /= schema, is_list(Pat), is_integer(NObjects) ->
+ case get(mnesia_activity_state) of
+ {?DEFAULT_ACCESS, Tid, Ts} ->
+ select(Tid, Ts, Tab, Pat, NObjects, LockKind);
+ {Mod, Tid, Ts} ->
+ Mod:select(Tid, Ts, Tab, Pat, NObjects, LockKind);
+ _ ->
+ abort(no_transaction)
+ end;
+select(Tab, Pat, NObjects, _Lock) ->
+ abort({badarg, Tab, Pat, NObjects}).
+
+select(Tid, Ts, Tab, Spec, NObjects, LockKind) ->
+ Where = val({Tab,where_to_read}),
+ Type = mnesia_lib:storage_type_at_node(Where,Tab),
+ InitFun = fun(FixedSpec) -> dirty_sel_init(Where,Tab,FixedSpec,NObjects,Type) end,
+ fun_select(Tid,Ts,Tab,Spec,LockKind,Tab,InitFun,NObjects,Where,Type).
+
+-record(mnesia_select, {tab,tid,node,storage,cont,written=[],spec,type,orig}).
+
+fun_select(Tid, Ts, Tab, Spec, LockKind, TabPat, Init, NObjects, Node, Storage) ->
+ Def = #mnesia_select{tid=Tid,node=Node,storage=Storage,tab=Tab,orig=Spec},
+ case element(1, Tid) of
+ ets ->
+ select_state(mnesia_lib:db_select_init(ram_copies,Tab,Spec,NObjects),Def);
+ tid ->
+ select_lock(Tid,Ts,LockKind,Spec,Tab),
+ Store = Ts#tidstore.store,
+ do_fixtable(Tab, Store),
+
+ Written0 = ?ets_match_object(Store, {{TabPat, '_'}, '_', '_'}),
+ case Written0 of
+ [] ->
+ %% Nothing changed in the table during this transaction,
+ %% Simple case get results from [d]ets
+ select_state(Init(Spec),Def);
+ _ ->
+ %% Hard (slow case) records added or deleted earlier
+ %% in the transaction, have to cope with that.
+ Type = val({Tab, setorbag}),
+ Written =
+ if Type == ordered_set -> %% Sort stable
+ lists:keysort(1,Written0);
+ true ->
+ Written0
+ end,
+ FixedSpec = get_record_pattern(Spec),
+ CMS = ets:match_spec_compile(Spec),
+ trans_select(Init(FixedSpec),
+ Def#mnesia_select{written=Written,spec=CMS,type=Type, orig=FixedSpec})
+ end;
+ _Protocol ->
+ select_state(Init(Spec),Def)
+ end.
+
+select(Cont) ->
+ case get(mnesia_activity_state) of
+ {?DEFAULT_ACCESS, Tid, Ts} ->
+ select_cont(Tid,Ts,Cont);
+ {Mod, Tid, Ts} ->
+ Mod:select_cont(Tid,Ts,Cont);
+ _ ->
+ abort(no_transaction)
+ end.
+
+select_cont(_Tid,_Ts,'$end_of_table') ->
+ '$end_of_table';
+select_cont(Tid,_Ts,State=#mnesia_select{tid=Tid,cont=Cont, orig=Ms})
+ when element(1,Tid) == ets ->
+ case Cont of
+ '$end_of_table' -> '$end_of_table';
+ _ -> select_state(mnesia_lib:db_select_cont(ram_copies,Cont,Ms),State)
+ end;
+select_cont(Tid,_,State=#mnesia_select{tid=Tid,written=[]}) ->
+ select_state(dirty_sel_cont(State),State);
+select_cont(Tid,_Ts,State=#mnesia_select{tid=Tid}) ->
+ trans_select(dirty_sel_cont(State), State);
+select_cont(_Tid2,_,#mnesia_select{tid=_Tid1}) -> % Missmatching tids
+ abort(wrong_transaction);
+select_cont(_,_,Cont) ->
+ abort({badarg, Cont}).
+
+trans_select('$end_of_table', #mnesia_select{written=Written0,spec=CMS,type=Type}) ->
+ Written = add_match(Written0, [], Type),
+ {ets:match_spec_run(Written, CMS), '$end_of_table'};
+trans_select({TabRecs,Cont}, State = #mnesia_select{written=Written0,spec=CMS,type=Type}) ->
+ {FixedRes,Written} = add_sel_match(Written0, TabRecs, Type),
+ select_state({ets:match_spec_run(FixedRes, CMS),Cont},
+ State#mnesia_select{written=Written}).
+
+select_state({Matches, Cont}, MS) ->
+ {Matches, MS#mnesia_select{cont=Cont}};
+select_state('$end_of_table',_) -> '$end_of_table'.
+
+get_record_pattern([]) -> [];
+get_record_pattern([{M,C,_B}|R]) ->
+ [{M,C,['$_']} | get_record_pattern(R)].
+
+all_keys(Tab) ->
+ case get(mnesia_activity_state) of
+ {?DEFAULT_ACCESS, Tid, Ts} ->
+ all_keys(Tid, Ts, Tab, read);
+ {Mod, Tid, Ts} ->
+ Mod:all_keys(Tid, Ts, Tab, read);
+ _ ->
+ abort(no_transaction)
+ end.
+
+all_keys(Tid, Ts, Tab, LockKind)
+ when is_atom(Tab), Tab /= schema ->
+ Pat0 = val({Tab, wild_pattern}),
+ Pat = setelement(2, Pat0, '$1'),
+ Keys = select(Tid, Ts, Tab, [{Pat, [], ['$1']}], LockKind),
+ case val({Tab, setorbag}) of
+ bag ->
+ mnesia_lib:uniq(Keys);
+ _ ->
+ Keys
+ end;
+all_keys(_Tid, _Ts, Tab, _LockKind) ->
+ abort({bad_type, Tab}).
+
+index_match_object(Pat, Attr) when is_tuple(Pat), tuple_size(Pat) > 2 ->
+ Tab = element(1, Pat),
+ index_match_object(Tab, Pat, Attr, read);
+index_match_object(Pat, _Attr) ->
+ abort({bad_type, Pat}).
+
+index_match_object(Tab, Pat, Attr, LockKind) ->
+ case get(mnesia_activity_state) of
+ {?DEFAULT_ACCESS, Tid, Ts} ->
+ index_match_object(Tid, Ts, Tab, Pat, Attr, LockKind);
+ {Mod, Tid, Ts} ->
+ Mod:index_match_object(Tid, Ts, Tab, Pat, Attr, LockKind);
+ _ ->
+ abort(no_transaction)
+ end.
+
+index_match_object(Tid, Ts, Tab, Pat, Attr, LockKind)
+ when is_atom(Tab), Tab /= schema, is_tuple(Pat), tuple_size(Pat) > 2 ->
+ case element(1, Tid) of
+ ets ->
+ dirty_index_match_object(Tab, Pat, Attr); % Should be optimized?
+ tid ->
+ case mnesia_schema:attr_tab_to_pos(Tab, Attr) of
+ Pos when Pos =< tuple_size(Pat) ->
+ case LockKind of
+ read ->
+ Store = Ts#tidstore.store,
+ mnesia_locker:rlock_table(Tid, Store, Tab),
+ Objs = dirty_index_match_object(Tab, Pat, Attr),
+ add_written_match(Store, Pat, Tab, Objs);
+ _ ->
+ abort({bad_type, Tab, LockKind})
+ end;
+ BadPos ->
+ abort({bad_type, Tab, BadPos})
+ end;
+ _Protocol ->
+ dirty_index_match_object(Tab, Pat, Attr)
+ end;
+index_match_object(_Tid, _Ts, Tab, Pat, _Attr, _LockKind) ->
+ abort({bad_type, Tab, Pat}).
+
+index_read(Tab, Key, Attr) ->
+ case get(mnesia_activity_state) of
+ {?DEFAULT_ACCESS, Tid, Ts} ->
+ index_read(Tid, Ts, Tab, Key, Attr, read);
+ {Mod, Tid, Ts} ->
+ Mod:index_read(Tid, Ts, Tab, Key, Attr, read);
+ _ ->
+ abort(no_transaction)
+ end.
+
+index_read(Tid, Ts, Tab, Key, Attr, LockKind)
+ when is_atom(Tab), Tab /= schema ->
+ case element(1, Tid) of
+ ets ->
+ dirty_index_read(Tab, Key, Attr); % Should be optimized?
+ tid ->
+ Pos = mnesia_schema:attr_tab_to_pos(Tab, Attr),
+ case LockKind of
+ read ->
+ case has_var(Key) of
+ false ->
+ Store = Ts#tidstore.store,
+ Objs = mnesia_index:read(Tid, Store, Tab, Key, Pos),
+ Pat = setelement(Pos, val({Tab, wild_pattern}), Key),
+ add_written_match(Store, Pat, Tab, Objs);
+ true ->
+ abort({bad_type, Tab, Attr, Key})
+ end;
+ _ ->
+ abort({bad_type, Tab, LockKind})
+ end;
+ _Protocol ->
+ dirty_index_read(Tab, Key, Attr)
+ end;
+index_read(_Tid, _Ts, Tab, _Key, _Attr, _LockKind) ->
+ abort({bad_type, Tab}).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Dirty access regardless of activities - updates
+
+dirty_write(Val) when is_tuple(Val), tuple_size(Val) > 2 ->
+ Tab = element(1, Val),
+ dirty_write(Tab, Val);
+dirty_write(Val) ->
+ abort({bad_type, Val}).
+
+dirty_write(Tab, Val) ->
+ do_dirty_write(async_dirty, Tab, Val).
+
+do_dirty_write(SyncMode, Tab, Val)
+ when is_atom(Tab), Tab /= schema, is_tuple(Val), tuple_size(Val) > 2 ->
+ case ?catch_val({Tab, record_validation}) of
+ {RecName, Arity, _Type}
+ when tuple_size(Val) == Arity, RecName == element(1, Val) ->
+ Oid = {Tab, element(2, Val)},
+ mnesia_tm:dirty(SyncMode, {Oid, Val, write});
+ {'EXIT', _} ->
+ abort({no_exists, Tab});
+ _ ->
+ abort({bad_type, Val})
+ end;
+do_dirty_write(_SyncMode, Tab, Val) ->
+ abort({bad_type, Tab, Val}).
+
+dirty_delete({Tab, Key}) ->
+ dirty_delete(Tab, Key);
+dirty_delete(Oid) ->
+ abort({bad_type, Oid}).
+
+dirty_delete(Tab, Key) ->
+ do_dirty_delete(async_dirty, Tab, Key).
+
+do_dirty_delete(SyncMode, Tab, Key) when is_atom(Tab), Tab /= schema ->
+ Oid = {Tab, Key},
+ mnesia_tm:dirty(SyncMode, {Oid, Oid, delete});
+do_dirty_delete(_SyncMode, Tab, _Key) ->
+ abort({bad_type, Tab}).
+
+dirty_delete_object(Val) when is_tuple(Val), tuple_size(Val) > 2 ->
+ Tab = element(1, Val),
+ dirty_delete_object(Tab, Val);
+dirty_delete_object(Val) ->
+ abort({bad_type, Val}).
+
+dirty_delete_object(Tab, Val) ->
+ do_dirty_delete_object(async_dirty, Tab, Val).
+
+do_dirty_delete_object(SyncMode, Tab, Val)
+ when is_atom(Tab), Tab /= schema, is_tuple(Val), tuple_size(Val) > 2 ->
+ Oid = {Tab, element(2, Val)},
+ case has_var(Val) of
+ false ->
+ mnesia_tm:dirty(SyncMode, {Oid, Val, delete_object});
+ true ->
+ abort({bad_type, Tab, Val})
+ end;
+
+do_dirty_delete_object(_SyncMode, Tab, Val) ->
+ abort({bad_type, Tab, Val}).
+
+%% A Counter is an Oid being {CounterTab, CounterName}
+
+dirty_update_counter({Tab, Key}, Incr) ->
+ dirty_update_counter(Tab, Key, Incr);
+dirty_update_counter(Counter, _Incr) ->
+ abort({bad_type, Counter}).
+
+dirty_update_counter(Tab, Key, Incr) ->
+ do_dirty_update_counter(async_dirty, Tab, Key, Incr).
+
+do_dirty_update_counter(SyncMode, Tab, Key, Incr)
+ when is_atom(Tab), Tab /= schema, is_integer(Incr) ->
+ case ?catch_val({Tab, record_validation}) of
+ {RecName, 3, set} ->
+ Oid = {Tab, Key},
+ mnesia_tm:dirty(SyncMode, {Oid, {RecName, Incr}, update_counter});
+ _ ->
+ abort({combine_error, Tab, update_counter})
+ end;
+do_dirty_update_counter(_SyncMode, Tab, _Key, Incr) ->
+ abort({bad_type, Tab, Incr}).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Dirty access regardless of activities - read
+
+dirty_read({Tab, Key}) ->
+ dirty_read(Tab, Key);
+dirty_read(Oid) ->
+ abort({bad_type, Oid}).
+
+dirty_read(Tab, Key)
+ when is_atom(Tab), Tab /= schema ->
+%% case catch ?ets_lookup(Tab, Key) of
+%% {'EXIT', _} ->
+ %% Bad luck, we have to perform a real lookup
+ dirty_rpc(Tab, mnesia_lib, db_get, [Tab, Key]);
+%% Val ->
+%% Val
+%% end;
+dirty_read(Tab, _Key) ->
+ abort({bad_type, Tab}).
+
+dirty_match_object(Pat) when is_tuple(Pat), tuple_size(Pat) > 2 ->
+ Tab = element(1, Pat),
+ dirty_match_object(Tab, Pat);
+dirty_match_object(Pat) ->
+ abort({bad_type, Pat}).
+
+dirty_match_object(Tab, Pat)
+ when is_atom(Tab), Tab /= schema, is_tuple(Pat), tuple_size(Pat) > 2 ->
+ dirty_rpc(Tab, ?MODULE, remote_dirty_match_object, [Tab, Pat]);
+dirty_match_object(Tab, Pat) ->
+ abort({bad_type, Tab, Pat}).
+
+remote_dirty_match_object(Tab, Pat) ->
+ Key = element(2, Pat),
+ case has_var(Key) of
+ false ->
+ mnesia_lib:db_match_object(Tab, Pat);
+ true ->
+ PosList = val({Tab, index}),
+ remote_dirty_match_object(Tab, Pat, PosList)
+ end.
+
+remote_dirty_match_object(Tab, Pat, [Pos | Tail]) when Pos =< tuple_size(Pat) ->
+ IxKey = element(Pos, Pat),
+ case has_var(IxKey) of
+ false ->
+ mnesia_index:dirty_match_object(Tab, Pat, Pos);
+ true ->
+ remote_dirty_match_object(Tab, Pat, Tail)
+ end;
+remote_dirty_match_object(Tab, Pat, []) ->
+ mnesia_lib:db_match_object(Tab, Pat);
+remote_dirty_match_object(Tab, Pat, _PosList) ->
+ abort({bad_type, Tab, Pat}).
+
+dirty_select(Tab, Spec) when is_atom(Tab), Tab /= schema, is_list(Spec) ->
+ dirty_rpc(Tab, ?MODULE, remote_dirty_select, [Tab, Spec]);
+dirty_select(Tab, Spec) ->
+ abort({bad_type, Tab, Spec}).
+
+remote_dirty_select(Tab, Spec) ->
+ case Spec of
+ [{HeadPat, _, _}] when is_tuple(HeadPat), tuple_size(HeadPat) > 2 ->
+ Key = element(2, HeadPat),
+ case has_var(Key) of
+ false ->
+ mnesia_lib:db_select(Tab, Spec);
+ true ->
+ PosList = val({Tab, index}),
+ remote_dirty_select(Tab, Spec, PosList)
+ end;
+ _ ->
+ mnesia_lib:db_select(Tab, Spec)
+ end.
+
+remote_dirty_select(Tab, [{HeadPat,_, _}] = Spec, [Pos | Tail])
+ when is_tuple(HeadPat), tuple_size(HeadPat) > 2, Pos =< tuple_size(HeadPat) ->
+ Key = element(Pos, HeadPat),
+ case has_var(Key) of
+ false ->
+ Recs = mnesia_index:dirty_select(Tab, HeadPat, Pos),
+ %% Returns the records without applying the match spec
+ %% The actual filtering is handled by the caller
+ CMS = ets:match_spec_compile(Spec),
+ case val({Tab, setorbag}) of
+ ordered_set ->
+ ets:match_spec_run(lists:sort(Recs), CMS);
+ _ ->
+ ets:match_spec_run(Recs, CMS)
+ end;
+ true ->
+ remote_dirty_select(Tab, Spec, Tail)
+ end;
+remote_dirty_select(Tab, Spec, _) ->
+ mnesia_lib:db_select(Tab, Spec).
+
+dirty_sel_init(Node,Tab,Spec,NObjects,Type) ->
+ do_dirty_rpc(Tab,Node,mnesia_lib,db_select_init,[Type,Tab,Spec,NObjects]).
+
+dirty_sel_cont(#mnesia_select{cont='$end_of_table'}) -> '$end_of_table';
+dirty_sel_cont(#mnesia_select{node=Node,tab=Tab,storage=Type,cont=Cont,orig=Ms}) ->
+ do_dirty_rpc(Tab,Node,mnesia_lib,db_select_cont,[Type,Cont,Ms]).
+
+dirty_all_keys(Tab) when is_atom(Tab), Tab /= schema ->
+ case ?catch_val({Tab, wild_pattern}) of
+ {'EXIT', _} ->
+ abort({no_exists, Tab});
+ Pat0 ->
+ Pat = setelement(2, Pat0, '$1'),
+ Keys = dirty_select(Tab, [{Pat, [], ['$1']}]),
+ case val({Tab, setorbag}) of
+ bag -> mnesia_lib:uniq(Keys);
+ _ -> Keys
+ end
+ end;
+dirty_all_keys(Tab) ->
+ abort({bad_type, Tab}).
+
+dirty_index_match_object(Pat, Attr) when is_tuple(Pat), tuple_size(Pat) > 2 ->
+ Tab = element(1, Pat),
+ dirty_index_match_object(Tab, Pat, Attr);
+dirty_index_match_object(Pat, _Attr) ->
+ abort({bad_type, Pat}).
+
+dirty_index_match_object(Tab, Pat, Attr)
+ when is_atom(Tab), Tab /= schema, is_tuple(Pat), tuple_size(Pat) > 2 ->
+ case mnesia_schema:attr_tab_to_pos(Tab, Attr) of
+ Pos when Pos =< tuple_size(Pat) ->
+ case has_var(element(2, Pat)) of
+ false ->
+ dirty_match_object(Tab, Pat);
+ true ->
+ Elem = element(Pos, Pat),
+ case has_var(Elem) of
+ false ->
+ dirty_rpc(Tab, mnesia_index, dirty_match_object,
+ [Tab, Pat, Pos]);
+ true ->
+ abort({bad_type, Tab, Attr, Elem})
+ end
+ end;
+ BadPos ->
+ abort({bad_type, Tab, BadPos})
+ end;
+dirty_index_match_object(Tab, Pat, _Attr) ->
+ abort({bad_type, Tab, Pat}).
+
+dirty_index_read(Tab, Key, Attr) when is_atom(Tab), Tab /= schema ->
+ Pos = mnesia_schema:attr_tab_to_pos(Tab, Attr),
+ case has_var(Key) of
+ false ->
+ mnesia_index:dirty_read(Tab, Key, Pos);
+ true ->
+ abort({bad_type, Tab, Attr, Key})
+ end;
+dirty_index_read(Tab, _Key, _Attr) ->
+ abort({bad_type, Tab}).
+
+dirty_slot(Tab, Slot) when is_atom(Tab), Tab /= schema, is_integer(Slot) ->
+ dirty_rpc(Tab, mnesia_lib, db_slot, [Tab, Slot]);
+dirty_slot(Tab, Slot) ->
+ abort({bad_type, Tab, Slot}).
+
+dirty_first(Tab) when is_atom(Tab), Tab /= schema ->
+ dirty_rpc(Tab, mnesia_lib, db_first, [Tab]);
+dirty_first(Tab) ->
+ abort({bad_type, Tab}).
+
+dirty_last(Tab) when is_atom(Tab), Tab /= schema ->
+ dirty_rpc(Tab, mnesia_lib, db_last, [Tab]);
+dirty_last(Tab) ->
+ abort({bad_type, Tab}).
+
+dirty_next(Tab, Key) when is_atom(Tab), Tab /= schema ->
+ dirty_rpc(Tab, mnesia_lib, db_next_key, [Tab, Key]);
+dirty_next(Tab, _Key) ->
+ abort({bad_type, Tab}).
+
+dirty_prev(Tab, Key) when is_atom(Tab), Tab /= schema ->
+ dirty_rpc(Tab, mnesia_lib, db_prev_key, [Tab, Key]);
+dirty_prev(Tab, _Key) ->
+ abort({bad_type, Tab}).
+
+
+dirty_rpc(Tab, M, F, Args) ->
+ Node = val({Tab, where_to_read}),
+ do_dirty_rpc(Tab, Node, M, F, Args).
+
+do_dirty_rpc(_Tab, nowhere, _, _, Args) ->
+ mnesia:abort({no_exists, Args});
+do_dirty_rpc(Tab, Node, M, F, Args) ->
+ case rpc:call(Node, M, F, Args) of
+ {badrpc, Reason} ->
+ timer:sleep(20), %% Do not be too eager, and can't use yield on SMP
+ %% Sync with mnesia_monitor
+ try sys:get_status(mnesia_monitor) catch _:_ -> ok end,
+ case mnesia_controller:call({check_w2r, Node, Tab}) of % Sync
+ NewNode when NewNode =:= Node ->
+ ErrorTag = mnesia_lib:dirty_rpc_error_tag(Reason),
+ mnesia:abort({ErrorTag, Args});
+ NewNode ->
+ case get(mnesia_activity_state) of
+ {_Mod, Tid, _Ts} when is_record(Tid, tid) ->
+ %% In order to perform a consistent
+ %% retry of a transaction we need
+ %% to acquire the lock on the NewNode.
+ %% In this context we do neither know
+ %% the kind or granularity of the lock.
+ %% --> Abort the transaction
+ mnesia:abort({node_not_running, Node});
+ {error, {node_not_running, _}} ->
+ %% Mnesia is stopping
+ mnesia:abort({no_exists, Args});
+ _ ->
+ %% Splendid! A dirty retry is safe
+ %% 'Node' probably went down now
+ %% Let mnesia_controller get broken link message first
+ do_dirty_rpc(Tab, NewNode, M, F, Args)
+ end
+ end;
+ Other ->
+ Other
+ end.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Info
+
+%% Info about one table
+table_info(Tab, Item) ->
+ case get(mnesia_activity_state) of
+ undefined ->
+ any_table_info(Tab, Item);
+ {?DEFAULT_ACCESS, _Tid, _Ts} ->
+ any_table_info(Tab, Item);
+ {Mod, Tid, Ts} ->
+ Mod:table_info(Tid, Ts, Tab, Item);
+ _ ->
+ abort(no_transaction)
+ end.
+
+table_info(_Tid, _Ts, Tab, Item) ->
+ any_table_info(Tab, Item).
+
+
+any_table_info(Tab, Item) when is_atom(Tab) ->
+ case Item of
+ master_nodes ->
+ mnesia_recover:get_master_nodes(Tab);
+% checkpoints ->
+% case ?catch_val({Tab, commit_work}) of
+% [{checkpoints, List} | _] -> List;
+% No_chk when is_list(No_chk) -> [];
+% Else -> info_reply(Else, Tab, Item)
+% end;
+ size ->
+ raw_table_info(Tab, Item);
+ memory ->
+ raw_table_info(Tab, Item);
+ type ->
+ case ?catch_val({Tab, setorbag}) of
+ {'EXIT', _} ->
+ bad_info_reply(Tab, Item);
+ Val ->
+ Val
+ end;
+ all ->
+ case mnesia_schema:get_table_properties(Tab) of
+ [] ->
+ abort({no_exists, Tab, Item});
+ Props ->
+ lists:map(fun({setorbag, Type}) -> {type, Type};
+ (Prop) -> Prop end,
+ Props)
+ end;
+ name ->
+ Tab;
+ _ ->
+ case ?catch_val({Tab, Item}) of
+ {'EXIT', _} ->
+ bad_info_reply(Tab, Item);
+ Val ->
+ Val
+ end
+ end;
+any_table_info(Tab, _Item) ->
+ abort({bad_type, Tab}).
+
+raw_table_info(Tab, Item) ->
+ case ?catch_val({Tab, storage_type}) of
+ ram_copies ->
+ info_reply(catch ?ets_info(Tab, Item), Tab, Item);
+ disc_copies ->
+ info_reply(catch ?ets_info(Tab, Item), Tab, Item);
+ disc_only_copies ->
+ info_reply(catch dets:info(Tab, Item), Tab, Item);
+ unknown ->
+ bad_info_reply(Tab, Item);
+ {'EXIT', _} ->
+ bad_info_reply(Tab, Item)
+ end.
+
+info_reply({'EXIT', _Reason}, Tab, Item) ->
+ bad_info_reply(Tab, Item);
+info_reply({error, _Reason}, Tab, Item) ->
+ bad_info_reply(Tab, Item);
+info_reply(Val, _Tab, _Item) ->
+ Val.
+
+bad_info_reply(_Tab, size) -> 0;
+bad_info_reply(_Tab, memory) -> 0;
+bad_info_reply(Tab, Item) -> abort({no_exists, Tab, Item}).
+
+%% Raw info about all tables
+schema() ->
+ mnesia_schema:info().
+
+%% Raw info about one tables
+schema(Tab) ->
+ mnesia_schema:info(Tab).
+
+error_description(Err) ->
+ mnesia_lib:error_desc(Err).
+
+info() ->
+ case mnesia_lib:is_running() of
+ yes ->
+ TmInfo = mnesia_tm:get_info(10000),
+ Held = system_info(held_locks),
+ Queued = system_info(lock_queue),
+
+ io:format("---> Processes holding locks <--- ~n", []),
+ lists:foreach(fun(L) -> io:format("Lock: ~p~n", [L]) end,
+ Held),
+
+ io:format( "---> Processes waiting for locks <--- ~n", []),
+ lists:foreach(fun({Oid, Op, _Pid, Tid, OwnerTid}) ->
+ io:format("Tid ~p waits for ~p lock "
+ "on oid ~p owned by ~p ~n",
+ [Tid, Op, Oid, OwnerTid])
+ end, Queued),
+ mnesia_tm:display_info(group_leader(), TmInfo),
+
+ Pat = {'_', unclear, '_'},
+ Uncertain = ets:match_object(mnesia_decision, Pat),
+
+ io:format( "---> Uncertain transactions <--- ~n", []),
+ lists:foreach(fun({Tid, _, Nodes}) ->
+ io:format("Tid ~w waits for decision "
+ "from ~w~n",
+ [Tid, Nodes])
+ end, Uncertain),
+
+ mnesia_controller:info(),
+ display_system_info(Held, Queued, TmInfo, Uncertain);
+ _ ->
+ mini_info()
+ end,
+ ok.
+
+mini_info() ->
+ io:format("===> System info in version ~p, debug level = ~p <===~n",
+ [system_info(version), system_info(debug)]),
+ Not =
+ case system_info(use_dir) of
+ true -> "";
+ false -> "NOT "
+ end,
+
+ io:format("~w. Directory ~p is ~sused.~n",
+ [system_info(schema_location), system_info(directory), Not]),
+ io:format("use fallback at restart = ~w~n",
+ [system_info(fallback_activated)]),
+ Running = system_info(running_db_nodes),
+ io:format("running db nodes = ~w~n", [Running]),
+ All = mnesia_lib:all_nodes(),
+ io:format("stopped db nodes = ~w ~n", [All -- Running]).
+
+display_system_info(Held, Queued, TmInfo, Uncertain) ->
+ mini_info(),
+ display_tab_info(),
+ S = fun(Items) -> [system_info(I) || I <- Items] end,
+
+ io:format("~w transactions committed, ~w aborted, "
+ "~w restarted, ~w logged to disc~n",
+ S([transaction_commits, transaction_failures,
+ transaction_restarts, transaction_log_writes])),
+
+ {Active, Pending} =
+ case TmInfo of
+ {timeout, _} -> {infinity, infinity};
+ {info, P, A} -> {length(A), length(P)}
+ end,
+ io:format("~w held locks, ~w in queue; "
+ "~w local transactions, ~w remote~n",
+ [length(Held), length(Queued), Active, Pending]),
+
+ Ufold = fun({_, _, Ns}, {C, Old}) ->
+ New = [N || N <- Ns, not lists:member(N, Old)],
+ {C + 1, New ++ Old}
+ end,
+ {Ucount, Unodes} = lists:foldl(Ufold, {0, []}, Uncertain),
+ io:format("~w transactions waits for other nodes: ~p~n",
+ [Ucount, Unodes]).
+
+display_tab_info() ->
+ MasterTabs = mnesia_recover:get_master_node_tables(),
+ io:format("master node tables = ~p~n", [lists:sort(MasterTabs)]),
+
+ Tabs = system_info(tables),
+
+ {Unknown, Ram, Disc, DiscOnly} =
+ lists:foldl(fun storage_count/2, {[], [], [], []}, Tabs),
+
+ io:format("remote = ~p~n", [lists:sort(Unknown)]),
+ io:format("ram_copies = ~p~n", [lists:sort(Ram)]),
+ io:format("disc_copies = ~p~n", [lists:sort(Disc)]),
+ io:format("disc_only_copies = ~p~n", [lists:sort(DiscOnly)]),
+
+ Rfoldl = fun(T, Acc) ->
+ Rpat =
+ case val({T, access_mode}) of
+ read_only ->
+ lists:sort([{A, read_only} || A <- val({T, active_replicas})]);
+ read_write ->
+ table_info(T, where_to_commit)
+ end,
+ case lists:keysearch(Rpat, 1, Acc) of
+ {value, {_Rpat, Rtabs}} ->
+ lists:keyreplace(Rpat, 1, Acc, {Rpat, [T | Rtabs]});
+ false ->
+ [{Rpat, [T]} | Acc]
+ end
+ end,
+ Repl = lists:foldl(Rfoldl, [], Tabs),
+ Rdisp = fun({Rpat, Rtabs}) -> io:format("~p = ~p~n", [Rpat, Rtabs]) end,
+ lists:foreach(Rdisp, lists:sort(Repl)).
+
+storage_count(T, {U, R, D, DO}) ->
+ case table_info(T, storage_type) of
+ unknown -> {[T | U], R, D, DO};
+ ram_copies -> {U, [T | R], D, DO};
+ disc_copies -> {U, R, [T | D], DO};
+ disc_only_copies -> {U, R, D, [T | DO]}
+ end.
+
+system_info(Item) ->
+ case catch system_info2(Item) of
+ {'EXIT',Error} -> abort(Error);
+ Other -> Other
+ end.
+
+system_info2(all) ->
+ Items = system_info_items(mnesia_lib:is_running()),
+ [{I, system_info(I)} || I <- Items];
+
+system_info2(db_nodes) ->
+ DiscNs = ?catch_val({schema, disc_copies}),
+ RamNs = ?catch_val({schema, ram_copies}),
+ if
+ is_list(DiscNs), is_list(RamNs) ->
+ DiscNs ++ RamNs;
+ true ->
+ case mnesia_schema:read_nodes() of
+ {ok, Nodes} -> Nodes;
+ {error,Reason} -> exit(Reason)
+ end
+ end;
+system_info2(running_db_nodes) ->
+ case ?catch_val({current, db_nodes}) of
+ {'EXIT',_} ->
+ %% Ensure that we access the intended Mnesia
+ %% directory. This function may not be called
+ %% during startup since it will cause the
+ %% application_controller to get into deadlock
+ load_mnesia_or_abort(),
+ mnesia_lib:running_nodes();
+ Other ->
+ Other
+ end;
+
+system_info2(extra_db_nodes) ->
+ case ?catch_val(extra_db_nodes) of
+ {'EXIT',_} ->
+ %% Ensure that we access the intended Mnesia
+ %% directory. This function may not be called
+ %% during startup since it will cause the
+ %% application_controller to get into deadlock
+ load_mnesia_or_abort(),
+ mnesia_monitor:get_env(extra_db_nodes);
+ Other ->
+ Other
+ end;
+
+system_info2(directory) ->
+ case ?catch_val(directory) of
+ {'EXIT',_} ->
+ %% Ensure that we access the intended Mnesia
+ %% directory. This function may not be called
+ %% during startup since it will cause the
+ %% application_controller to get into deadlock
+ load_mnesia_or_abort(),
+ mnesia_monitor:get_env(dir);
+ Other ->
+ Other
+ end;
+
+system_info2(use_dir) ->
+ case ?catch_val(use_dir) of
+ {'EXIT',_} ->
+ %% Ensure that we access the intended Mnesia
+ %% directory. This function may not be called
+ %% during startup since it will cause the
+ %% application_controller to get into deadlock
+ load_mnesia_or_abort(),
+ mnesia_monitor:use_dir();
+ Other ->
+ Other
+ end;
+
+system_info2(schema_location) ->
+ case ?catch_val(schema_location) of
+ {'EXIT',_} ->
+ %% Ensure that we access the intended Mnesia
+ %% directory. This function may not be called
+ %% during startup since it will cause the
+ %% application_controller to get into deadlock
+ load_mnesia_or_abort(),
+ mnesia_monitor:get_env(schema_location);
+ Other ->
+ Other
+ end;
+
+system_info2(fallback_activated) ->
+ case ?catch_val(fallback_activated) of
+ {'EXIT',_} ->
+ %% Ensure that we access the intended Mnesia
+ %% directory. This function may not be called
+ %% during startup since it will cause the
+ %% application_controller to get into deadlock
+ load_mnesia_or_abort(),
+ mnesia_bup:fallback_exists();
+ Other ->
+ Other
+ end;
+
+system_info2(version) ->
+ case ?catch_val(version) of
+ {'EXIT', _} ->
+ Apps = application:loaded_applications(),
+ case lists:keysearch(?APPLICATION, 1, Apps) of
+ {value, {_Name, _Desc, Version}} ->
+ Version;
+ false ->
+ %% Ensure that it does not match
+ {mnesia_not_loaded, node(), now()}
+ end;
+ Version ->
+ Version
+ end;
+
+system_info2(access_module) -> mnesia_monitor:get_env(access_module);
+system_info2(auto_repair) -> mnesia_monitor:get_env(auto_repair);
+system_info2(is_running) -> mnesia_lib:is_running();
+system_info2(backup_module) -> mnesia_monitor:get_env(backup_module);
+system_info2(event_module) -> mnesia_monitor:get_env(event_module);
+system_info2(debug) -> mnesia_monitor:get_env(debug);
+system_info2(dump_log_load_regulation) -> mnesia_monitor:get_env(dump_log_load_regulation);
+system_info2(dump_log_write_threshold) -> mnesia_monitor:get_env(dump_log_write_threshold);
+system_info2(dump_log_time_threshold) -> mnesia_monitor:get_env(dump_log_time_threshold);
+system_info2(dump_log_update_in_place) ->
+ mnesia_monitor:get_env(dump_log_update_in_place);
+system_info2(max_wait_for_decision) -> mnesia_monitor:get_env(max_wait_for_decision);
+system_info2(embedded_mnemosyne) -> mnesia_monitor:get_env(embedded_mnemosyne);
+system_info2(ignore_fallback_at_startup) -> mnesia_monitor:get_env(ignore_fallback_at_startup);
+system_info2(fallback_error_function) -> mnesia_monitor:get_env(fallback_error_function);
+system_info2(log_version) -> mnesia_log:version();
+system_info2(protocol_version) -> mnesia_monitor:protocol_version();
+system_info2(schema_version) -> mnesia_schema:version(); %backward compatibility
+system_info2(tables) -> val({schema, tables});
+system_info2(local_tables) -> val({schema, local_tables});
+system_info2(master_node_tables) -> mnesia_recover:get_master_node_tables();
+system_info2(subscribers) -> mnesia_subscr:subscribers();
+system_info2(checkpoints) -> mnesia_checkpoint:checkpoints();
+system_info2(held_locks) -> mnesia_locker:get_held_locks();
+system_info2(lock_queue) -> mnesia_locker:get_lock_queue();
+system_info2(transactions) -> mnesia_tm:get_transactions();
+system_info2(transaction_failures) -> mnesia_lib:read_counter(trans_failures);
+system_info2(transaction_commits) -> mnesia_lib:read_counter(trans_commits);
+system_info2(transaction_restarts) -> mnesia_lib:read_counter(trans_restarts);
+system_info2(transaction_log_writes) -> mnesia_dumper:get_log_writes();
+system_info2(core_dir) -> mnesia_monitor:get_env(core_dir);
+system_info2(no_table_loaders) -> mnesia_monitor:get_env(no_table_loaders);
+system_info2(dc_dump_limit) -> mnesia_monitor:get_env(dc_dump_limit);
+
+system_info2(Item) -> exit({badarg, Item}).
+
+system_info_items(yes) ->
+ [
+ access_module,
+ auto_repair,
+ backup_module,
+ checkpoints,
+ db_nodes,
+ debug,
+ directory,
+ dump_log_load_regulation,
+ dump_log_time_threshold,
+ dump_log_update_in_place,
+ dump_log_write_threshold,
+ embedded_mnemosyne,
+ event_module,
+ extra_db_nodes,
+ fallback_activated,
+ held_locks,
+ ignore_fallback_at_startup,
+ fallback_error_function,
+ is_running,
+ local_tables,
+ lock_queue,
+ log_version,
+ master_node_tables,
+ max_wait_for_decision,
+ protocol_version,
+ running_db_nodes,
+ schema_location,
+ schema_version,
+ subscribers,
+ tables,
+ transaction_commits,
+ transaction_failures,
+ transaction_log_writes,
+ transaction_restarts,
+ transactions,
+ use_dir,
+ core_dir,
+ no_table_loaders,
+ dc_dump_limit,
+ version
+ ];
+system_info_items(no) ->
+ [
+ auto_repair,
+ backup_module,
+ db_nodes,
+ debug,
+ directory,
+ dump_log_load_regulation,
+ dump_log_time_threshold,
+ dump_log_update_in_place,
+ dump_log_write_threshold,
+ event_module,
+ extra_db_nodes,
+ ignore_fallback_at_startup,
+ fallback_error_function,
+ is_running,
+ log_version,
+ max_wait_for_decision,
+ protocol_version,
+ running_db_nodes,
+ schema_location,
+ schema_version,
+ use_dir,
+ core_dir,
+ version
+ ].
+
+system_info() ->
+ IsRunning = mnesia_lib:is_running(),
+ case IsRunning of
+ yes ->
+ TmInfo = mnesia_tm:get_info(10000),
+ Held = system_info(held_locks),
+ Queued = system_info(lock_queue),
+ Pat = {'_', unclear, '_'},
+ Uncertain = ets:match_object(mnesia_decision, Pat),
+ display_system_info(Held, Queued, TmInfo, Uncertain);
+ _ ->
+ mini_info()
+ end,
+ IsRunning.
+
+load_mnesia_or_abort() ->
+ case mnesia_lib:ensure_loaded(?APPLICATION) of
+ ok ->
+ ok;
+ {error, Reason} ->
+ abort(Reason)
+ end.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Database mgt
+
+create_schema(Ns) ->
+ mnesia_bup:create_schema(Ns).
+
+delete_schema(Ns) ->
+ mnesia_schema:delete_schema(Ns).
+
+backup(Opaque) ->
+ mnesia_log:backup(Opaque).
+
+backup(Opaque, Mod) ->
+ mnesia_log:backup(Opaque, Mod).
+
+traverse_backup(S, T, Fun, Acc) ->
+ mnesia_bup:traverse_backup(S, T, Fun, Acc).
+
+traverse_backup(S, SM, T, TM, F, A) ->
+ mnesia_bup:traverse_backup(S, SM, T, TM, F, A).
+
+install_fallback(Opaque) ->
+ mnesia_bup:install_fallback(Opaque).
+
+install_fallback(Opaque, Mod) ->
+ mnesia_bup:install_fallback(Opaque, Mod).
+
+uninstall_fallback() ->
+ mnesia_bup:uninstall_fallback().
+
+uninstall_fallback(Args) ->
+ mnesia_bup:uninstall_fallback(Args).
+
+activate_checkpoint(Args) ->
+ mnesia_checkpoint:activate(Args).
+
+deactivate_checkpoint(Name) ->
+ mnesia_checkpoint:deactivate(Name).
+
+backup_checkpoint(Name, Opaque) ->
+ mnesia_log:backup_checkpoint(Name, Opaque).
+
+backup_checkpoint(Name, Opaque, Mod) ->
+ mnesia_log:backup_checkpoint(Name, Opaque, Mod).
+
+restore(Opaque, Args) ->
+ mnesia_schema:restore(Opaque, Args).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Table mgt
+
+create_table(Arg) ->
+ mnesia_schema:create_table(Arg).
+create_table(Name, Arg) when is_list(Arg) ->
+ mnesia_schema:create_table([{name, Name}| Arg]);
+create_table(Name, Arg) ->
+ {aborted, badarg, Name, Arg}.
+
+delete_table(Tab) ->
+ mnesia_schema:delete_table(Tab).
+
+add_table_copy(Tab, N, S) ->
+ mnesia_schema:add_table_copy(Tab, N, S).
+del_table_copy(Tab, N) ->
+ mnesia_schema:del_table_copy(Tab, N).
+
+move_table_copy(Tab, From, To) ->
+ mnesia_schema:move_table(Tab, From, To).
+
+add_table_index(Tab, Ix) ->
+ mnesia_schema:add_table_index(Tab, Ix).
+del_table_index(Tab, Ix) ->
+ mnesia_schema:del_table_index(Tab, Ix).
+
+transform_table(Tab, Fun, NewA) ->
+ case catch val({Tab, record_name}) of
+ {'EXIT', Reason} ->
+ mnesia:abort(Reason);
+ OldRN ->
+ mnesia_schema:transform_table(Tab, Fun, NewA, OldRN)
+ end.
+
+transform_table(Tab, Fun, NewA, NewRN) ->
+ mnesia_schema:transform_table(Tab, Fun, NewA, NewRN).
+
+change_table_copy_type(T, N, S) ->
+ mnesia_schema:change_table_copy_type(T, N, S).
+
+clear_table(Tab) ->
+ case get(mnesia_activity_state) of
+ State = {Mod, Tid, _Ts} when element(1, Tid) =/= tid ->
+ transaction(State, fun() -> do_clear_table(Tab) end, [], infinity, Mod, sync);
+ undefined ->
+ transaction(undefined, fun() -> do_clear_table(Tab) end, [], infinity, ?DEFAULT_ACCESS, sync);
+ _ -> %% Not allowed for clear_table
+ mnesia:abort({aborted, nested_transaction})
+ end.
+
+do_clear_table(Tab) ->
+ case get(mnesia_activity_state) of
+ {?DEFAULT_ACCESS, Tid, Ts} ->
+ clear_table(Tid, Ts, Tab, '_');
+ {Mod, Tid, Ts} ->
+ Mod:clear_table(Tid, Ts, Tab, '_');
+ _ ->
+ abort(no_transaction)
+ end.
+
+clear_table(Tid, Ts, Tab, Obj) when element(1, Tid) =:= tid ->
+ Store = Ts#tidstore.store,
+ mnesia_locker:wlock_table(Tid, Store, Tab),
+ Oid = {Tab, '_'},
+ ?ets_insert(Store, {Oid, Obj, clear_table}),
+ ok.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Table mgt - user properties
+
+read_table_property(Tab, PropKey) ->
+ val({Tab, user_property, PropKey}).
+
+write_table_property(Tab, Prop) ->
+ mnesia_schema:write_table_property(Tab, Prop).
+
+delete_table_property(Tab, PropKey) ->
+ mnesia_schema:delete_table_property(Tab, PropKey).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Table mgt - user properties
+
+change_table_frag(Tab, FragProp) ->
+ mnesia_schema:change_table_frag(Tab, FragProp).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Table mgt - table load
+
+%% Dump a ram table to disc
+dump_tables(Tabs) ->
+ mnesia_schema:dump_tables(Tabs).
+
+%% allow the user to wait for some tables to be loaded
+wait_for_tables(Tabs, Timeout) ->
+ mnesia_controller:wait_for_tables(Tabs, Timeout).
+
+force_load_table(Tab) ->
+ case mnesia_controller:force_load_table(Tab) of
+ ok -> yes; % Backwards compatibility
+ Other -> Other
+ end.
+
+change_table_access_mode(T, Access) ->
+ mnesia_schema:change_table_access_mode(T, Access).
+
+change_table_load_order(T, O) ->
+ mnesia_schema:change_table_load_order(T, O).
+
+set_master_nodes(Nodes) when is_list(Nodes) ->
+ UseDir = system_info(use_dir),
+ IsRunning = system_info(is_running),
+ case IsRunning of
+ yes ->
+ CsPat = {{'_', cstruct}, '_'},
+ Cstructs0 = ?ets_match_object(mnesia_gvar, CsPat),
+ Cstructs = [Cs || {_, Cs} <- Cstructs0],
+ log_valid_master_nodes(Cstructs, Nodes, UseDir, IsRunning);
+ _NotRunning ->
+ case UseDir of
+ true ->
+ mnesia_lib:lock_table(schema),
+ Res =
+ case mnesia_schema:read_cstructs_from_disc() of
+ {ok, Cstructs} ->
+ log_valid_master_nodes(Cstructs, Nodes, UseDir, IsRunning);
+ {error, Reason} ->
+ {error, Reason}
+ end,
+ mnesia_lib:unlock_table(schema),
+ Res;
+ false ->
+ ok
+ end
+ end;
+set_master_nodes(Nodes) ->
+ {error, {bad_type, Nodes}}.
+
+log_valid_master_nodes(Cstructs, Nodes, UseDir, IsRunning) ->
+ Fun = fun(Cs) ->
+ Copies = mnesia_lib:copy_holders(Cs),
+ Valid = mnesia_lib:intersect(Nodes, Copies),
+ {Cs#cstruct.name, Valid}
+ end,
+ Args = lists:map(Fun, Cstructs),
+ mnesia_recover:log_master_nodes(Args, UseDir, IsRunning).
+
+set_master_nodes(Tab, Nodes) when is_list(Nodes) ->
+ UseDir = system_info(use_dir),
+ IsRunning = system_info(is_running),
+ case IsRunning of
+ yes ->
+ case ?catch_val({Tab, cstruct}) of
+ {'EXIT', _} ->
+ {error, {no_exists, Tab}};
+ Cs ->
+ case Nodes -- mnesia_lib:copy_holders(Cs) of
+ [] ->
+ Args = [{Tab , Nodes}],
+ mnesia_recover:log_master_nodes(Args, UseDir, IsRunning);
+ BadNodes ->
+ {error, {no_exists, Tab, BadNodes}}
+ end
+ end;
+ _NotRunning ->
+ case UseDir of
+ true ->
+ mnesia_lib:lock_table(schema),
+ Res =
+ case mnesia_schema:read_cstructs_from_disc() of
+ {ok, Cstructs} ->
+ case lists:keysearch(Tab, 2, Cstructs) of
+ {value, Cs} ->
+ case Nodes -- mnesia_lib:copy_holders(Cs) of
+ [] ->
+ Args = [{Tab , Nodes}],
+ mnesia_recover:log_master_nodes(Args, UseDir, IsRunning);
+ BadNodes ->
+ {error, {no_exists, Tab, BadNodes}}
+ end;
+ false ->
+ {error, {no_exists, Tab}}
+ end;
+ {error, Reason} ->
+ {error, Reason}
+ end,
+ mnesia_lib:unlock_table(schema),
+ Res;
+ false ->
+ ok
+ end
+ end;
+set_master_nodes(Tab, Nodes) ->
+ {error, {bad_type, Tab, Nodes}}.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Misc admin
+
+dump_log() ->
+ mnesia_controller:sync_dump_log(user).
+
+subscribe(What) ->
+ mnesia_subscr:subscribe(self(), What).
+
+unsubscribe(What) ->
+ mnesia_subscr:unsubscribe(self(), What).
+
+report_event(Event) ->
+ mnesia_lib:report_system_event({mnesia_user, Event}).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Snmp
+
+snmp_open_table(Tab, Us) ->
+ mnesia_schema:add_snmp(Tab, Us).
+
+snmp_close_table(Tab) ->
+ mnesia_schema:del_snmp(Tab).
+
+snmp_get_row(Tab, RowIndex) when is_atom(Tab), Tab /= schema, is_list(RowIndex) ->
+ case get(mnesia_activity_state) of
+ {Mod, Tid, Ts=#tidstore{store=Store}} when element(1, Tid) =:= tid ->
+ case snmp_oid_to_mnesia_key(RowIndex, Tab) of
+ unknown -> %% Arrg contains fix_string
+ Ops = find_ops(Store, Tab, val({Tab, wild_pattern})),
+ SnmpType = val({Tab,snmp}),
+ Fix = fun({{_,Key},Row,Op}, Res) ->
+ case mnesia_snmp_hook:key_to_oid(Tab,Key,SnmpType) of
+ RowIndex ->
+ case Op of
+ write -> {ok, Row};
+ _ ->
+ undefined
+ end;
+ _ ->
+ Res
+ end
+ end,
+ lists:foldl(Fix, undefined, Ops);
+ Key ->
+ case Mod:read(Tid, Ts, Tab, Key, read) of
+ [Row] ->
+ {ok, Row};
+ _ ->
+ undefined
+ end
+ end;
+ _ ->
+ dirty_rpc(Tab, mnesia_snmp_hook, get_row, [Tab, RowIndex])
+ end;
+snmp_get_row(Tab, _RowIndex) ->
+ abort({bad_type, Tab}).
+
+%%%%%%%%%%%%%
+
+snmp_get_next_index(Tab, RowIndex) when is_atom(Tab), Tab /= schema, is_list(RowIndex) ->
+ {Next,OrigKey} = dirty_rpc(Tab, mnesia_snmp_hook, get_next_index, [Tab, RowIndex]),
+ case get(mnesia_activity_state) of
+ {_Mod, Tid, #tidstore{store=Store}} when element(1, Tid) =:= tid ->
+ case OrigKey of
+ undefined ->
+ snmp_order_keys(Store, Tab, RowIndex, []);
+ _ ->
+ case ?ets_match(Store, {{Tab,OrigKey}, '_', '$1'}) of
+ [] -> snmp_order_keys(Store,Tab,RowIndex,[OrigKey]);
+ Ops ->
+ case lists:last(Ops) of
+ [delete] -> snmp_get_next_index(Tab, Next);
+ _ -> snmp_order_keys(Store,Tab,RowIndex,[OrigKey])
+ end
+ end
+ end;
+ _ ->
+ case Next of
+ endOfTable -> endOfTable;
+ _ -> {ok, Next}
+ end
+ end;
+snmp_get_next_index(Tab, _RowIndex) ->
+ abort({bad_type, Tab}).
+
+snmp_order_keys(Store,Tab,RowIndex,Def) ->
+ All = ?ets_match(Store, {{Tab,'$1'},'_','$2'}),
+ SnmpType = val({Tab,snmp}),
+ Keys0 = [mnesia_snmp_hook:key_to_oid(Tab,Key,SnmpType) ||
+ Key <- ts_keys_1(All, Def)],
+ Keys = lists:sort(Keys0),
+ get_ordered_snmp_key(RowIndex,Keys).
+
+get_ordered_snmp_key(Prev, [First|_]) when Prev < First -> {ok, First};
+get_ordered_snmp_key(Prev, [_|R]) ->
+ get_ordered_snmp_key(Prev, R);
+get_ordered_snmp_key(_, []) ->
+ endOfTable.
+
+%%%%%%%%%%
+
+snmp_get_mnesia_key(Tab, RowIndex) when is_atom(Tab), Tab /= schema, is_list(RowIndex) ->
+ case get(mnesia_activity_state) of
+ {_Mod, Tid, Ts} when element(1, Tid) =:= tid ->
+ Res = dirty_rpc(Tab,mnesia_snmp_hook,get_mnesia_key,[Tab,RowIndex]),
+ snmp_filter_key(Res, RowIndex, Tab, Ts#tidstore.store);
+ _ ->
+ dirty_rpc(Tab, mnesia_snmp_hook, get_mnesia_key, [Tab, RowIndex])
+ end;
+snmp_get_mnesia_key(Tab, _RowIndex) ->
+ abort({bad_type, Tab}).
+
+snmp_oid_to_mnesia_key(RowIndex, Tab) ->
+ case mnesia_snmp_hook:oid_to_key(RowIndex, Tab) of
+ unknown -> %% Contains fix_string needs lookup
+ case dirty_rpc(Tab,mnesia_snmp_hook,get_mnesia_key,[Tab,RowIndex]) of
+ {ok, MnesiaKey} -> MnesiaKey;
+ undefined -> unknown
+ end;
+ MnesiaKey ->
+ MnesiaKey
+ end.
+
+snmp_filter_key(Res = {ok,Key}, _RowIndex, Tab, Store) ->
+ case ?ets_lookup(Store, {Tab,Key}) of
+ [] -> Res;
+ Ops ->
+ case lists:last(Ops) of
+ {_, _, write} -> Res;
+ _ -> undefined
+ end
+ end;
+snmp_filter_key(undefined, RowIndex, Tab, Store) ->
+ case mnesia_snmp_hook:oid_to_key(RowIndex, Tab) of
+ unknown -> %% Arrg contains fix_string
+ Ops = find_ops(Store, Tab, val({Tab, wild_pattern})),
+ SnmpType = val({Tab,snmp}),
+ Fix = fun({{_,Key},_,Op}, Res) ->
+ case mnesia_snmp_hook:key_to_oid(Tab,Key,SnmpType) of
+ RowIndex ->
+ case Op of
+ write -> {ok, Key};
+ _ ->
+ undefined
+ end;
+ _ ->
+ Res
+ end
+ end,
+ lists:foldl(Fix, undefined, Ops);
+ Key ->
+ case ?ets_lookup(Store, {Tab,Key}) of
+ [] ->
+ undefined;
+ Ops ->
+ case lists:last(Ops) of
+ {_, _, write} -> {ok, Key};
+ _ -> undefined
+ end
+ end
+ end.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Textfile access
+
+load_textfile(F) ->
+ mnesia_text:load_textfile(F).
+dump_to_textfile(F) ->
+ mnesia_text:dump_to_textfile(F).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% QLC Handles
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+table(Tab) ->
+ table(Tab, []).
+table(Tab,Opts) ->
+ {[Trav,Lock,NObjects],QlcOptions0} =
+ qlc_opts(Opts,[{traverse,select},{lock,read},{n_objects,100}]),
+ TF = case Trav of
+ {select,Ms} ->
+ fun() -> qlc_select(select(Tab,Ms,NObjects,Lock)) end;
+ select ->
+ fun(Ms) -> qlc_select(select(Tab,Ms,NObjects,Lock)) end;
+ _ ->
+ erlang:error({badarg, {Trav,[Tab, Opts]}})
+ end,
+ Pre = fun(Arg) -> pre_qlc(Arg, Tab) end,
+ Post = fun() -> post_qlc(Tab) end,
+ Info = fun(Tag) -> qlc_info(Tab, Tag) end,
+ ParentFun = fun() ->
+ {mnesia_activity, mnesia:get_activity_id()}
+ end,
+ Lookup =
+ case Trav of
+ {select, _} -> [];
+ _ ->
+ LFun = fun(2, Keys) ->
+ Read = fun(Key) -> read(Tab,Key,Lock) end,
+ lists:flatmap(Read, Keys);
+ (Index,Keys) ->
+ IdxRead = fun(Key) -> index_read(Tab,Key,Index) end,
+ lists:flatmap(IdxRead, Keys)
+ end,
+ [{lookup_fun, LFun}]
+ end,
+ MFA = fun(Type) -> qlc_format(Type, Tab, NObjects, Lock, Opts) end,
+ QlcOptions = [{pre_fun, Pre}, {post_fun, Post},
+ {info_fun, Info}, {parent_fun, ParentFun},
+ {format_fun, MFA}|Lookup] ++ QlcOptions0,
+ qlc:table(TF, QlcOptions).
+
+pre_qlc(Opts, Tab) ->
+ {_,Tid,_} =
+ case get(mnesia_activity_state) of
+ undefined ->
+ case lists:keysearch(parent_value, 1, Opts) of
+ {value, {parent_value,{mnesia_activity,undefined}}} ->
+ abort(no_transaction);
+ {value, {parent_value,{mnesia_activity,Aid}}} ->
+ {value,{stop_fun,Stop}} =
+ lists:keysearch(stop_fun,1,Opts),
+ put_activity_id(Aid,Stop),
+ Aid;
+ _ ->
+ abort(no_transaction)
+ end;
+ Else ->
+ Else
+ end,
+ case element(1,Tid) of
+ tid -> ok;
+ _ ->
+ case ?catch_val({Tab, setorbag}) of
+ ordered_set -> ok;
+ _ ->
+ dirty_rpc(Tab, mnesia_tm, fixtable, [Tab,true,self()]),
+ ok
+ end
+ end.
+
+post_qlc(Tab) ->
+ case catch get(mnesia_activity_state) of
+ {_,#tid{},_} -> ok;
+ _ ->
+ case ?catch_val({Tab, setorbag}) of
+ ordered_set ->
+ ok;
+ _ ->
+ dirty_rpc(Tab, mnesia_tm, fixtable, [Tab,false,self()]),
+ ok
+ end
+ end.
+
+qlc_select('$end_of_table') -> [];
+qlc_select({[], Cont}) -> qlc_select(select(Cont));
+qlc_select({Objects, Cont}) ->
+ Objects ++ fun() -> qlc_select(select(Cont)) end.
+
+qlc_opts(Opts, Keys) when is_list(Opts) ->
+ qlc_opts(Opts, Keys, []);
+qlc_opts(Option, Keys) ->
+ qlc_opts([Option], Keys, []).
+
+qlc_opts(Opts, [{Key,Def}|Keys], Acc) ->
+ Opt = case lists:keysearch(Key,1, Opts) of
+ {value, {Key,Value}} ->
+ Value;
+ false ->
+ Def
+ end,
+ qlc_opts(lists:keydelete(Key,1,Opts),Keys,[Opt|Acc]);
+qlc_opts(Opts,[],Acc) -> {lists:reverse(Acc),Opts}.
+
+qlc_info(Tab, num_of_objects) ->
+ dirty_rpc(Tab, ?MODULE, raw_table_info, [Tab, size]);
+qlc_info(_, keypos) -> 2;
+qlc_info(_, is_unique_objects) -> true;
+qlc_info(Tab, is_unique_keys) ->
+ case val({Tab, type}) of
+ set -> true;
+ ordered_set -> true;
+ _ -> false
+ end;
+qlc_info(Tab, is_sorted_objects) ->
+ case val({Tab, type}) of
+ ordered_set ->
+ case ?catch_val({Tab, frag_hash}) of
+ {'EXIT', _} ->
+ ascending;
+ _ -> %% Fragmented tables are not ordered
+ no
+ end;
+ _ -> no
+ end;
+qlc_info(Tab, indices) ->
+ val({Tab,index});
+qlc_info(_Tab, _) ->
+ undefined.
+
+qlc_format(all, Tab, NObjects, Lock, Opts) ->
+ {?MODULE, table, [Tab,[{n_objects, NObjects}, {lock,Lock}|Opts]]};
+qlc_format({match_spec, Ms}, Tab, NObjects, Lock, Opts) ->
+ {?MODULE, table, [Tab,[{traverse,{select,Ms}},{n_objects, NObjects}, {lock,Lock}|Opts]]};
+qlc_format({lookup, 2, Keys}, Tab, _, Lock, _) ->
+ io_lib:format("lists:flatmap(fun(V) -> "
+ "~w:read(~w, V, ~w) end, ~w)",
+ [?MODULE, Tab, Lock, Keys]);
+qlc_format({lookup, Index,Keys}, Tab, _, _, _) ->
+ io_lib:format("lists:flatmap(fun(V) -> "
+ "~w:index_read(~w, V, ~w) end, ~w)",
+ [?MODULE, Tab, Index, Keys]).
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+do_fixtable(Tab, #tidstore{store=Store}) ->
+ do_fixtable(Tab,Store);
+do_fixtable(Tab, Store) ->
+ case ?catch_val({Tab, setorbag}) of
+ ordered_set ->
+ ok;
+ _ ->
+ case ?ets_match_object(Store, {fixtable, {Tab, '_'}}) of
+ [] ->
+ Node = dirty_rpc(Tab, mnesia_tm, fixtable, [Tab,true,self()]),
+ ?ets_insert(Store, {fixtable, {Tab, Node}});
+ _ ->
+ ignore
+ end,
+ ok
+ end.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Mnemosyne exclusive
+
+get_activity_id() ->
+ get(mnesia_activity_state).
+
+put_activity_id(Activity) ->
+ mnesia_tm:put_activity_id(Activity).
+put_activity_id(Activity,Fun) ->
+ mnesia_tm:put_activity_id(Activity,Fun).
diff --git a/lib/mnesia/src/mnesia.hrl b/lib/mnesia/src/mnesia.hrl
new file mode 100644
index 0000000000..d488d9364a
--- /dev/null
+++ b/lib/mnesia/src/mnesia.hrl
@@ -0,0 +1,121 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 1996-2009. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%
+
+%%
+
+-define(APPLICATION, mnesia).
+
+-define(ets_lookup(Tab, Key), ets:lookup(Tab, Key)).
+-define(ets_lookup_element(Tab, Key, Pos), ets:lookup_element(Tab, Key, Pos)).
+-define(ets_insert(Tab, Rec), ets:insert(Tab, Rec)).
+-define(ets_delete(Tab, Key), ets:delete(Tab, Key)).
+-define(ets_match_delete(Tab, Pat), ets:match_delete(Tab, Pat)).
+-define(ets_match_object(Tab, Pat), ets:match_object(Tab, Pat)).
+-define(ets_match(Tab, Pat), ets:match(Tab, Pat)).
+-define(ets_info(Tab, Item), ets:info(Tab, Item)).
+-define(ets_update_counter(Tab, Key, Incr), ets:update_counter(Tab, Key, Incr)).
+-define(ets_first(Tab), ets:first(Tab)).
+-define(ets_next(Tab, Key), ets:next(Tab, Key)).
+-define(ets_last(Tab), ets:last(Tab)).
+-define(ets_prev(Tab, Key), ets:prev(Tab, Key)).
+-define(ets_slot(Tab, Pos), ets:slot(Tab, Pos)).
+-define(ets_new_table(Tab, Props), ets:new(Tab, Props)).
+-define(ets_delete_table(Tab), ets:delete(Tab)).
+-define(ets_fixtable(Tab, Bool), ets:fixtable(Tab, Bool)).
+
+-define(catch_val(Var), (catch ?ets_lookup_element(mnesia_gvar, Var, 2))).
+
+%% It's important that counter is first, since we compare tid's
+
+-record(tid,
+ {counter, %% serial no for tid
+ pid}). %% owner of tid
+
+
+-record(tidstore,
+ {store, %% current ets table for tid
+ up_stores = [], %% list of upper layer stores for nested trans
+ level = 1}). %% transaction level
+
+-define(unique_cookie, {erlang:now(), node()}).
+
+-record(cstruct, {name, % Atom
+ type = set, % set | bag
+ ram_copies = [], % [Node]
+ disc_copies = [], % [Node]
+ disc_only_copies = [], % [Node]
+ load_order = 0, % Integer
+ access_mode = read_write, % read_write | read_only
+ index = [], % [Integer]
+ snmp = [], % Snmp Ustruct
+ local_content = false, % true | false
+ record_name = {bad_record_name}, % Atom (Default = Name)
+ attributes = [key, val], % [Atom]
+ user_properties = [], % [Record]
+ frag_properties = [], % [{Key, Val]
+ cookie = ?unique_cookie, % Term
+ version = {{2, 0}, []}}). % {{Integer, Integer}, [Node]}
+
+%% Record for the head structure in Mnesia's log files
+%%
+%% The definition of this record may *NEVER* be changed
+%% since it may be written to very old backup files.
+%% By holding this record definition stable we can be
+%% able to comprahend backups from timepoint 0. It also
+%% allows us to use the backup format as an interchange
+%% format between Mnesia releases.
+
+-record(log_header,{log_kind,
+ log_version,
+ mnesia_version,
+ node,
+ now}).
+
+%% Commit records stored in the transaction log
+-record(commit, {node,
+ decision, % presume_commit | Decision
+ ram_copies = [],
+ disc_copies = [],
+ disc_only_copies = [],
+ snmp = [],
+ schema_ops = []
+ }).
+
+-record(decision, {tid,
+ outcome, % presume_abort | committed
+ disc_nodes,
+ ram_nodes}).
+
+%% Maybe cyclic wait
+-record(cyclic, {node = node(),
+ oid, % {Tab, Key}
+ op, % read | write
+ lock, % read | write
+ lucky
+ }).
+
+%% Managing conditional debug functions
+
+-ifdef(debug).
+ -define(eval_debug_fun(I, C),
+ mnesia_lib:eval_debug_fun(I, C, ?FILE, ?LINE)).
+-else.
+ -define(eval_debug_fun(I, C), ok).
+-endif.
+
diff --git a/lib/mnesia/src/mnesia_backup.erl b/lib/mnesia/src/mnesia_backup.erl
new file mode 100644
index 0000000000..f372ca0be5
--- /dev/null
+++ b/lib/mnesia/src/mnesia_backup.erl
@@ -0,0 +1,201 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 1996-2009. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%
+
+%%
+
+%%-behaviour(mnesia_backup).
+%0
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%
+%% This module contains one implementation of callback functions
+%% used by Mnesia at backup and restore. The user may however
+%% write an own module the same interface as mnesia_backup and
+%% configure Mnesia so the alternate module performs the actual
+%% accesses to the backup media. This means that the user may put
+%% the backup on medias that Mnesia does not know about, possibly
+%% on hosts where Erlang is not running.
+%%
+%% The OpaqueData argument is never interpreted by other parts of
+%% Mnesia. It is the property of this module. Alternate implementations
+%% of this module may have different interpretations of OpaqueData.
+%% The OpaqueData argument given to open_write/1 and open_read/1
+%% are forwarded directly from the user.
+%%
+%% All functions must return {ok, NewOpaqueData} or {error, Reason}.
+%%
+%% The NewOpaqueData arguments returned by backup callback functions will
+%% be given as input when the next backup callback function is invoked.
+%% If any return value does not match {ok, _} the backup will be aborted.
+%%
+%% The NewOpaqueData arguments returned by restore callback functions will
+%% be given as input when the next restore callback function is invoked
+%% If any return value does not match {ok, _} the restore will be aborted.
+%%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+-module(mnesia_backup).
+
+-include_lib("kernel/include/file.hrl").
+
+-export([
+ %% Write access
+ open_write/1,
+ write/2,
+ commit_write/1,
+ abort_write/1,
+
+ %% Read access
+ open_read/1,
+ read/1,
+ close_read/1
+ ]).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Backup callback interface
+-record(backup, {tmp_file, file, file_desc}).
+
+%% Opens backup media for write
+%%
+%% Returns {ok, OpaqueData} or {error, Reason}
+open_write(OpaqueData) ->
+ File = OpaqueData,
+ Tmp = lists:concat([File,".BUPTMP"]),
+ file:delete(Tmp),
+ file:delete(File),
+ case disk_log:open([{name, make_ref()},
+ {file, Tmp},
+ {repair, false},
+ {linkto, self()}]) of
+ {ok, Fd} ->
+ {ok, #backup{tmp_file = Tmp, file = File, file_desc = Fd}};
+ {error, Reason} ->
+ {error, Reason}
+ end.
+
+%% Writes BackupItems to the backup media
+%%
+%% Returns {ok, OpaqueData} or {error, Reason}
+write(OpaqueData, BackupItems) ->
+ B = OpaqueData,
+ case disk_log:log_terms(B#backup.file_desc, BackupItems) of
+ ok ->
+ {ok, B};
+ {error, Reason} ->
+ abort_write(B),
+ {error, Reason}
+ end.
+
+%% Closes the backup media after a successful backup
+%%
+%% Returns {ok, ReturnValueToUser} or {error, Reason}
+commit_write(OpaqueData) ->
+ B = OpaqueData,
+ case disk_log:sync(B#backup.file_desc) of
+ ok ->
+ case disk_log:close(B#backup.file_desc) of
+ ok ->
+ case file:rename(B#backup.tmp_file, B#backup.file) of
+ ok ->
+ {ok, B#backup.file};
+ {error, Reason} ->
+ {error, Reason}
+ end;
+ {error, Reason} ->
+ {error, Reason}
+ end;
+ {error, Reason} ->
+ {error, Reason}
+ end.
+
+%% Closes the backup media after an interrupted backup
+%%
+%% Returns {ok, ReturnValueToUser} or {error, Reason}
+abort_write(BackupRef) ->
+ Res = disk_log:close(BackupRef#backup.file_desc),
+ file:delete(BackupRef#backup.tmp_file),
+ case Res of
+ ok ->
+ {ok, BackupRef#backup.file};
+ {error, Reason} ->
+ {error, Reason}
+ end.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Restore callback interface
+
+-record(restore, {file, file_desc, cont}).
+
+%% Opens backup media for read
+%%
+%% Returns {ok, OpaqueData} or {error, Reason}
+open_read(OpaqueData) ->
+ File = OpaqueData,
+ case file:read_file_info(File) of
+ {error, Reason} ->
+ {error, Reason};
+ _FileInfo -> %% file exists
+ case disk_log:open([{file, File},
+ {name, make_ref()},
+ {repair, false},
+ {mode, read_only},
+ {linkto, self()}]) of
+ {ok, Fd} ->
+ {ok, #restore{file = File, file_desc = Fd, cont = start}};
+ {repaired, Fd, _, {badbytes, 0}} ->
+ {ok, #restore{file = File, file_desc = Fd, cont = start}};
+ {repaired, Fd, _, _} ->
+ {ok, #restore{file = File, file_desc = Fd, cont = start}};
+ {error, Reason} ->
+ {error, Reason}
+ end
+ end.
+
+%% Reads BackupItems from the backup media
+%%
+%% Returns {ok, OpaqueData, BackupItems} or {error, Reason}
+%%
+%% BackupItems == [] is interpreted as eof
+read(OpaqueData) ->
+ R = OpaqueData,
+ Fd = R#restore.file_desc,
+ case disk_log:chunk(Fd, R#restore.cont) of
+ {error, Reason} ->
+ {error, {"Possibly truncated", Reason}};
+ eof ->
+ {ok, R, []};
+ {Cont, []} ->
+ read(R#restore{cont = Cont});
+ {Cont, BackupItems, _BadBytes} ->
+ {ok, R#restore{cont = Cont}, BackupItems};
+ {Cont, BackupItems} ->
+ {ok, R#restore{cont = Cont}, BackupItems}
+ end.
+
+%% Closes the backup media after restore
+%%
+%% Returns {ok, ReturnValueToUser} or {error, Reason}
+close_read(OpaqueData) ->
+ R = OpaqueData,
+ case disk_log:close(R#restore.file_desc) of
+ ok -> {ok, R#restore.file};
+ {error, Reason} -> {error, Reason}
+ end.
+%0
+
diff --git a/lib/mnesia/src/mnesia_bup.erl b/lib/mnesia/src/mnesia_bup.erl
new file mode 100644
index 0000000000..37a8258d74
--- /dev/null
+++ b/lib/mnesia/src/mnesia_bup.erl
@@ -0,0 +1,1186 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 1996-2009. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%
+
+%%
+-module(mnesia_bup).
+-export([
+ %% Public interface
+ iterate/4,
+ read_schema/2,
+ fallback_bup/0,
+ fallback_exists/0,
+ tm_fallback_start/1,
+ create_schema/1,
+ install_fallback/1,
+ install_fallback/2,
+ uninstall_fallback/0,
+ uninstall_fallback/1,
+ traverse_backup/4,
+ traverse_backup/6,
+ make_initial_backup/3,
+ fallback_to_schema/0,
+ lookup_schema/2,
+ schema2bup/1,
+ refresh_cookie/2,
+
+ %% Internal
+ fallback_receiver/2,
+ install_fallback_master/2,
+ uninstall_fallback_master/2,
+ local_uninstall_fallback/2,
+ do_traverse_backup/7,
+ trav_apply/4
+ ]).
+
+-include("mnesia.hrl").
+-import(mnesia_lib, [verbose/2, dbg_out/2]).
+
+-record(restore, {mode, bup_module, bup_data}).
+
+-record(fallback_args, {opaque,
+ scope = global,
+ module = mnesia_monitor:get_env(backup_module),
+ use_default_dir = true,
+ mnesia_dir,
+ fallback_bup,
+ fallback_tmp,
+ skip_tables = [],
+ keep_tables = [],
+ default_op = keep_tables
+ }).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Backup iterator
+
+%% Reads schema section and iterates over all records in a backup.
+%%
+%% Fun(BunchOfRecords, Header, Schema, Acc) is applied when a suitable amount
+%% of records has been collected.
+%%
+%% BunchOfRecords will be [] when the iteration is done.
+iterate(Mod, Fun, Opaque, Acc) ->
+ R = #restore{bup_module = Mod, bup_data = Opaque},
+ case catch read_schema_section(R) of
+ {error, Reason} ->
+ {error, Reason};
+ {R2, {Header, Schema, Rest}} ->
+ case catch iter(R2, Header, Schema, Fun, Acc, Rest) of
+ {ok, R3, Res} ->
+ catch safe_apply(R3, close_read, [R3#restore.bup_data]),
+ {ok, Res};
+ {error, Reason} ->
+ catch safe_apply(R2, close_read, [R2#restore.bup_data]),
+ {error, Reason};
+ {'EXIT', Pid, Reason} ->
+ catch safe_apply(R2, close_read, [R2#restore.bup_data]),
+ {error, {'EXIT', Pid, Reason}};
+ {'EXIT', Reason} ->
+ catch safe_apply(R2, close_read, [R2#restore.bup_data]),
+ {error, {'EXIT', Reason}}
+ end
+ end.
+
+iter(R, Header, Schema, Fun, Acc, []) ->
+ case safe_apply(R, read, [R#restore.bup_data]) of
+ {R2, []} ->
+ Res = Fun([], Header, Schema, Acc),
+ {ok, R2, Res};
+ {R2, BupItems} ->
+ iter(R2, Header, Schema, Fun, Acc, BupItems)
+ end;
+iter(R, Header, Schema, Fun, Acc, BupItems) ->
+ Acc2 = Fun(BupItems, Header, Schema, Acc),
+ iter(R, Header, Schema, Fun, Acc2, []).
+
+safe_apply(R, write, [_, Items]) when Items =:= [] ->
+ R;
+safe_apply(R, What, Args) ->
+ Abort = fun(Re) -> abort_restore(R, What, Args, Re) end,
+ Mod = R#restore.bup_module,
+ case catch apply(Mod, What, Args) of
+ {ok, Opaque, Items} when What =:= read ->
+ {R#restore{bup_data = Opaque}, Items};
+ {ok, Opaque} when What =/= read->
+ R#restore{bup_data = Opaque};
+ {error, Re} ->
+ Abort(Re);
+ Re ->
+ Abort(Re)
+ end.
+
+abort_restore(R, What, Args, Reason) ->
+ Mod = R#restore.bup_module,
+ Opaque = R#restore.bup_data,
+ dbg_out("Restore aborted. ~p:~p~p -> ~p~n",
+ [Mod, What, Args, Reason]),
+ catch apply(Mod, close_read, [Opaque]),
+ throw({error, Reason}).
+
+fallback_to_schema() ->
+ Fname = fallback_bup(),
+ fallback_to_schema(Fname).
+
+fallback_to_schema(Fname) ->
+ Mod = mnesia_backup,
+ case read_schema(Mod, Fname) of
+ {error, Reason} ->
+ {error, Reason};
+ Schema ->
+ case catch lookup_schema(schema, Schema) of
+ {error, _} ->
+ {error, "No schema in fallback"};
+ List ->
+ {ok, fallback, List}
+ end
+ end.
+
+%% Opens Opaque reads schema and then close
+read_schema(Mod, Opaque) ->
+ R = #restore{bup_module = Mod, bup_data = Opaque},
+ case catch read_schema_section(R) of
+ {error, Reason} ->
+ {error, Reason};
+ {R2, {_Header, Schema, _}} ->
+ catch safe_apply(R2, close_read, [R2#restore.bup_data]),
+ Schema
+ end.
+
+%% Open backup media and extract schema
+%% rewind backup media and leave it open
+%% Returns {R, {Header, Schema}}
+read_schema_section(R) ->
+ case catch do_read_schema_section(R) of
+ {'EXIT', Reason} ->
+ catch safe_apply(R, close_read, [R#restore.bup_data]),
+ {error, {'EXIT', Reason}};
+ {error, Reason} ->
+ catch safe_apply(R, close_read, [R#restore.bup_data]),
+ {error, Reason};
+ {R2, {H, Schema, Rest}} ->
+ Schema2 = convert_schema(H#log_header.log_version, Schema),
+ {R2, {H, Schema2, Rest}}
+ end.
+
+do_read_schema_section(R) ->
+ R2 = safe_apply(R, open_read, [R#restore.bup_data]),
+ {R3, RawSchema} = safe_apply(R2, read, [R2#restore.bup_data]),
+ do_read_schema_section(R3, verify_header(RawSchema), []).
+
+do_read_schema_section(R, {ok, B, C, []}, Acc) ->
+ case safe_apply(R, read, [R#restore.bup_data]) of
+ {R2, []} ->
+ {R2, {B, Acc, []}};
+ {R2, RawSchema} ->
+ do_read_schema_section(R2, {ok, B, C, RawSchema}, Acc)
+ end;
+
+do_read_schema_section(R, {ok, B, C, [Head | Tail]}, Acc)
+ when element(1, Head) =:= schema ->
+ do_read_schema_section(R, {ok, B, C, Tail}, Acc ++ [Head]);
+
+do_read_schema_section(R, {ok, B, _C, Rest}, Acc) ->
+ {R, {B, Acc, Rest}};
+
+do_read_schema_section(_R, {error, Reason}, _Acc) ->
+ {error, Reason}.
+
+verify_header([H | RawSchema]) when is_record(H, log_header) ->
+ Current = mnesia_log:backup_log_header(),
+ if
+ H#log_header.log_kind =:= Current#log_header.log_kind ->
+ Versions = ["0.1", "1.1", Current#log_header.log_version],
+ case lists:member(H#log_header.log_version, Versions) of
+ true ->
+ {ok, H, Current, RawSchema};
+ false ->
+ {error, {"Bad header version. Cannot be used as backup.", H}}
+ end;
+ true ->
+ {error, {"Bad kind of header. Cannot be used as backup.", H}}
+ end;
+verify_header(RawSchema) ->
+ {error, {"Missing header. Cannot be used as backup.", catch hd(RawSchema)}}.
+
+refresh_cookie(Schema, NewCookie) ->
+ case lists:keysearch(schema, 2, Schema) of
+ {value, {schema, schema, List}} ->
+ Cs = mnesia_schema:list2cs(List),
+ Cs2 = Cs#cstruct{cookie = NewCookie},
+ Item = {schema, schema, mnesia_schema:cs2list(Cs2)},
+ lists:keyreplace(schema, 2, Schema, Item);
+
+ false ->
+ Reason = "No schema found. Cannot be used as backup.",
+ throw({error, {Reason, Schema}})
+ end.
+
+%% Convert schema items from an external backup
+%% If backup format is the latest, no conversion is needed
+%% All supported backup formats should have their converters
+%% here as separate function clauses.
+convert_schema("0.1", Schema) ->
+ convert_0_1(Schema);
+convert_schema("1.1", Schema) ->
+ %% The new backup format is a pure extension of the old one
+ Current = mnesia_log:backup_log_header(),
+ convert_schema(Current#log_header.log_version, Schema);
+convert_schema(Latest, Schema) ->
+ H = mnesia_log:backup_log_header(),
+ if
+ H#log_header.log_version =:= Latest ->
+ Schema;
+ true ->
+ Reason = "Bad backup header version. Cannot convert schema.",
+ throw({error, {Reason, H}})
+ end.
+
+%% Backward compatibility for 0.1
+convert_0_1(Schema) ->
+ case lists:keysearch(schema, 2, Schema) of
+ {value, {schema, schema, List}} ->
+ Schema2 = lists:keydelete(schema, 2, Schema),
+ Cs = mnesia_schema:list2cs(List),
+ convert_0_1(Schema2, [], Cs);
+ false ->
+ List = mnesia_schema:get_initial_schema(disc_copies, [node()]),
+ Cs = mnesia_schema:list2cs(List),
+ convert_0_1(Schema, [], Cs)
+ end.
+
+convert_0_1([{schema, cookie, Cookie} | Schema], Acc, Cs) ->
+ convert_0_1(Schema, Acc, Cs#cstruct{cookie = Cookie});
+convert_0_1([{schema, db_nodes, DbNodes} | Schema], Acc, Cs) ->
+ convert_0_1(Schema, Acc, Cs#cstruct{disc_copies = DbNodes});
+convert_0_1([{schema, version, Version} | Schema], Acc, Cs) ->
+ convert_0_1(Schema, Acc, Cs#cstruct{version = Version});
+convert_0_1([{schema, Tab, Def} | Schema], Acc, Cs) ->
+ Head =
+ case lists:keysearch(index, 1, Def) of
+ {value, {index, PosList}} ->
+ %% Remove the snmp "index"
+ P = PosList -- [snmp],
+ Def2 = lists:keyreplace(index, 1, Def, {index, P}),
+ {schema, Tab, Def2};
+ false ->
+ {schema, Tab, Def}
+ end,
+ convert_0_1(Schema, [Head | Acc], Cs);
+convert_0_1([Head | Schema], Acc, Cs) ->
+ convert_0_1(Schema, [Head | Acc], Cs);
+convert_0_1([], Acc, Cs) ->
+ [schema2bup({schema, schema, Cs}) | Acc].
+
+%% Returns Val or throw error
+lookup_schema(Key, Schema) ->
+ case lists:keysearch(Key, 2, Schema) of
+ {value, {schema, Key, Val}} -> Val;
+ false -> throw({error, {"Cannot lookup", Key}})
+ end.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Backup compatibility
+
+%% Convert internal schema items to backup dito
+schema2bup({schema, Tab}) ->
+ {schema, Tab};
+schema2bup({schema, Tab, TableDef}) ->
+ {schema, Tab, mnesia_schema:cs2list(TableDef)}.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Create schema on the given nodes
+%% Requires that old schemas has been deleted
+%% Returns ok | {error, Reason}
+create_schema([]) ->
+ create_schema([node()]);
+create_schema(Ns) when is_list(Ns) ->
+ case is_set(Ns) of
+ true ->
+ create_schema(Ns, mnesia_schema:ensure_no_schema(Ns));
+ false ->
+ {error, {combine_error, Ns}}
+ end;
+create_schema(Ns) ->
+ {error, {badarg, Ns}}.
+
+is_set(List) when is_list(List) ->
+ ordsets:is_set(lists:sort(List));
+is_set(_) ->
+ false.
+
+create_schema(Ns, ok) ->
+ %% Ensure that we access the intended Mnesia
+ %% directory. This function may not be called
+ %% during startup since it will cause the
+ %% application_controller to get into deadlock
+ case mnesia_lib:ensure_loaded(?APPLICATION) of
+ ok ->
+ case mnesia_monitor:get_env(schema_location) of
+ ram ->
+ {error, {has_no_disc, node()}};
+ _ ->
+ case mnesia_schema:opt_create_dir(true, mnesia_lib:dir()) of
+ {error, What} ->
+ {error, What};
+ ok ->
+ Mod = mnesia_backup,
+ Str = mk_str(),
+ File = mnesia_lib:dir(Str),
+ file:delete(File),
+ case catch make_initial_backup(Ns, File, Mod) of
+ {ok, _Res} ->
+ case do_install_fallback(File, Mod) of
+ ok ->
+ file:delete(File),
+ ok;
+ {error, Reason} ->
+ {error, Reason}
+ end;
+ {error, Reason} ->
+ {error, Reason}
+ end
+ end
+ end;
+ {error, Reason} ->
+ {error, Reason}
+ end;
+create_schema(_Ns, {error, Reason}) ->
+ {error, Reason};
+create_schema(_Ns, Reason) ->
+ {error, Reason}.
+
+mk_str() ->
+ Now = [integer_to_list(I) || I <- tuple_to_list(now())],
+ lists:concat([node()] ++ Now ++ ".TMP").
+
+make_initial_backup(Ns, Opaque, Mod) ->
+ Schema = [{schema, schema, mnesia_schema:get_initial_schema(disc_copies, Ns)}],
+ O2 = do_apply(Mod, open_write, [Opaque], Opaque),
+ O3 = do_apply(Mod, write, [O2, [mnesia_log:backup_log_header()]], O2),
+ O4 = do_apply(Mod, write, [O3, Schema], O3),
+ O5 = do_apply(Mod, commit_write, [O4], O4),
+ {ok, O5}.
+
+do_apply(_, write, [_, Items], Opaque) when Items =:= [] ->
+ Opaque;
+do_apply(Mod, What, Args, _Opaque) ->
+ case catch apply(Mod, What, Args) of
+ {ok, Opaque2} -> Opaque2;
+ {error, Reason} -> throw({error, Reason});
+ {'EXIT', Reason} -> throw({error, {'EXIT', Reason}})
+ end.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Restore
+
+%% Restore schema and possibly other tables from a backup
+%% and replicate them to the necessary nodes
+%% Requires that old schemas has been deleted
+%% Returns ok | {error, Reason}
+install_fallback(Opaque) ->
+ install_fallback(Opaque, []).
+
+install_fallback(Opaque, Args) ->
+ %% Ensure that we access the intended Mnesia
+ %% directory. This function may not be called
+ %% during startup since it will cause the
+ %% application_controller to get into deadlock
+ case mnesia_lib:ensure_loaded(?APPLICATION) of
+ ok ->
+ do_install_fallback(Opaque, Args);
+ {error, Reason} ->
+ {error, Reason}
+ end.
+
+do_install_fallback(Opaque, Mod) when is_atom(Mod) ->
+ do_install_fallback(Opaque, [{module, Mod}]);
+do_install_fallback(Opaque, Args) when is_list(Args) ->
+ case check_fallback_args(Args, #fallback_args{opaque = Opaque}) of
+ {ok, FA} ->
+ do_install_fallback(FA);
+ {error, Reason} ->
+ {error, Reason}
+ end;
+do_install_fallback(_Opaque, Args) ->
+ {error, {badarg, Args}}.
+
+check_fallback_args([Arg | Tail], FA) ->
+ case catch check_fallback_arg_type(Arg, FA) of
+ {'EXIT', _Reason} ->
+ {error, {badarg, Arg}};
+ FA2 ->
+ check_fallback_args(Tail, FA2)
+ end;
+check_fallback_args([], FA) ->
+ {ok, FA}.
+
+check_fallback_arg_type(Arg, FA) ->
+ case Arg of
+ {scope, global} ->
+ FA#fallback_args{scope = global};
+ {scope, local} ->
+ FA#fallback_args{scope = local};
+ {module, Mod} ->
+ Mod2 = mnesia_monitor:do_check_type(backup_module, Mod),
+ FA#fallback_args{module = Mod2};
+ {mnesia_dir, Dir} ->
+ FA#fallback_args{mnesia_dir = Dir,
+ use_default_dir = false};
+ {keep_tables, Tabs} ->
+ atom_list(Tabs),
+ FA#fallback_args{keep_tables = Tabs};
+ {skip_tables, Tabs} ->
+ atom_list(Tabs),
+ FA#fallback_args{skip_tables = Tabs};
+ {default_op, keep_tables} ->
+ FA#fallback_args{default_op = keep_tables};
+ {default_op, skip_tables} ->
+ FA#fallback_args{default_op = skip_tables}
+ end.
+
+atom_list([H | T]) when is_atom(H) ->
+ atom_list(T);
+atom_list([]) ->
+ ok.
+
+do_install_fallback(FA) ->
+ Pid = spawn_link(?MODULE, install_fallback_master, [self(), FA]),
+ Res =
+ receive
+ {'EXIT', Pid, Reason} -> % if appl has trapped exit
+ {error, {'EXIT', Reason}};
+ {Pid, Res2} ->
+ case Res2 of
+ {ok, _} ->
+ ok;
+ {error, Reason} ->
+ {error, {"Cannot install fallback", Reason}}
+ end
+ end,
+ Res.
+
+install_fallback_master(ClientPid, FA) ->
+ process_flag(trap_exit, true),
+ State = {start, FA},
+ Opaque = FA#fallback_args.opaque,
+ Mod = FA#fallback_args.module,
+ Res = (catch iterate(Mod, fun restore_recs/4, Opaque, State)),
+ unlink(ClientPid),
+ ClientPid ! {self(), Res},
+ exit(shutdown).
+
+restore_recs(_, _, _, stop) ->
+ throw({error, "restore_recs already stopped"});
+
+restore_recs(Recs, Header, Schema, {start, FA}) ->
+ %% No records in backup
+ Schema2 = convert_schema(Header#log_header.log_version, Schema),
+ CreateList = lookup_schema(schema, Schema2),
+ case catch mnesia_schema:list2cs(CreateList) of
+ {'EXIT', Reason} ->
+ throw({error, {"Bad schema in restore_recs", Reason}});
+ Cs ->
+ Ns = get_fallback_nodes(FA, Cs#cstruct.disc_copies),
+ global:set_lock({{mnesia_table_lock, schema}, self()}, Ns, infinity),
+ Args = [self(), FA],
+ Pids = [spawn_link(N, ?MODULE, fallback_receiver, Args) || N <- Ns],
+ send_fallback(Pids, {start, Header, Schema2}),
+ Res = restore_recs(Recs, Header, Schema2, Pids),
+ global:del_lock({{mnesia_table_lock, schema}, self()}, Ns),
+ Res
+ end;
+
+restore_recs([], _Header, _Schema, Pids) ->
+ send_fallback(Pids, swap),
+ send_fallback(Pids, stop),
+ stop;
+
+restore_recs(Recs, _, _, Pids) ->
+ send_fallback(Pids, {records, Recs}),
+ Pids.
+
+get_fallback_nodes(FA, Ns) ->
+ This = node(),
+ case lists:member(This, Ns) of
+ true ->
+ case FA#fallback_args.scope of
+ global -> Ns;
+ local -> [This]
+ end;
+ false ->
+ throw({error, {"No disc resident schema on local node", Ns}})
+ end.
+
+send_fallback(Pids, Msg) when is_list(Pids), Pids =/= [] ->
+ lists:foreach(fun(Pid) -> Pid ! {self(), Msg} end, Pids),
+ rec_answers(Pids, []).
+
+rec_answers([], Acc) ->
+ case {lists:keysearch(error, 1, Acc), mnesia_lib:uniq(Acc)} of
+ {{value, {error, Val}}, _} -> throw({error, Val});
+ {_, [SameAnswer]} -> SameAnswer;
+ {_, Other} -> throw({error, {"Different answers", Other}})
+ end;
+rec_answers(Pids, Acc) ->
+ receive
+ {'EXIT', Pid, stopped} ->
+ Pids2 = lists:delete(Pid, Pids),
+ rec_answers(Pids2, [stopped|Acc]);
+ {'EXIT', Pid, Reason} ->
+ Pids2 = lists:delete(Pid, Pids),
+ rec_answers(Pids2, [{error, {'EXIT', Pid, Reason}}|Acc]);
+ {Pid, Reply} ->
+ Pids2 = lists:delete(Pid, Pids),
+ rec_answers(Pids2, [Reply|Acc])
+ end.
+
+fallback_exists() ->
+ Fname = fallback_bup(),
+ fallback_exists(Fname).
+
+fallback_exists(Fname) ->
+ case mnesia_monitor:use_dir() of
+ true ->
+ mnesia_lib:exists(Fname);
+ false ->
+ case ?catch_val(active_fallback) of
+ {'EXIT', _} -> false;
+ Bool -> Bool
+ end
+ end.
+
+fallback_name() -> "FALLBACK.BUP".
+fallback_bup() -> mnesia_lib:dir(fallback_name()).
+
+fallback_tmp_name() -> "FALLBACK.TMP".
+%% fallback_full_tmp_name() -> mnesia_lib:dir(fallback_tmp_name()).
+
+fallback_receiver(Master, FA) ->
+ process_flag(trap_exit, true),
+
+ case catch register(mnesia_fallback, self()) of
+ {'EXIT', _} ->
+ Reason = {already_exists, node()},
+ local_fallback_error(Master, Reason);
+ true ->
+ FA2 = check_fallback_dir(Master, FA),
+ Bup = FA2#fallback_args.fallback_bup,
+ case mnesia_lib:exists(Bup) of
+ true ->
+ Reason2 = {already_exists, node()},
+ local_fallback_error(Master, Reason2);
+ false ->
+ Mod = mnesia_backup,
+ Tmp = FA2#fallback_args.fallback_tmp,
+ R = #restore{mode = replace,
+ bup_module = Mod,
+ bup_data = Tmp},
+ file:delete(Tmp),
+ case catch fallback_receiver_loop(Master, R, FA2, schema) of
+ {error, Reason} ->
+ local_fallback_error(Master, Reason);
+ Other ->
+ exit(Other)
+ end
+ end
+ end.
+
+local_fallback_error(Master, Reason) ->
+ Master ! {self(), {error, Reason}},
+ unlink(Master),
+ exit(Reason).
+
+check_fallback_dir(Master, FA) ->
+ case mnesia:system_info(schema_location) of
+ ram ->
+ Reason = {has_no_disc, node()},
+ local_fallback_error(Master, Reason);
+ _ ->
+ Dir = check_fallback_dir_arg(Master, FA),
+ Bup = filename:join([Dir, fallback_name()]),
+ Tmp = filename:join([Dir, fallback_tmp_name()]),
+ FA#fallback_args{fallback_bup = Bup,
+ fallback_tmp = Tmp,
+ mnesia_dir = Dir}
+ end.
+
+check_fallback_dir_arg(Master, FA) ->
+ case FA#fallback_args.use_default_dir of
+ true ->
+ mnesia_lib:dir();
+ false when FA#fallback_args.scope =:= local ->
+ Dir = FA#fallback_args.mnesia_dir,
+ case catch mnesia_monitor:do_check_type(dir, Dir) of
+ {'EXIT', _R} ->
+ Reason = {badarg, {dir, Dir}, node()},
+ local_fallback_error(Master, Reason);
+ AbsDir->
+ AbsDir
+ end;
+ false when FA#fallback_args.scope =:= global ->
+ Reason = {combine_error, global, dir, node()},
+ local_fallback_error(Master, Reason)
+ end.
+
+fallback_receiver_loop(Master, R, FA, State) ->
+ receive
+ {Master, {start, Header, Schema}} when State =:= schema ->
+ Dir = FA#fallback_args.mnesia_dir,
+ throw_bad_res(ok, mnesia_schema:opt_create_dir(true, Dir)),
+ R2 = safe_apply(R, open_write, [R#restore.bup_data]),
+ R3 = safe_apply(R2, write, [R2#restore.bup_data, [Header]]),
+ BupSchema = [schema2bup(S) || S <- Schema],
+ R4 = safe_apply(R3, write, [R3#restore.bup_data, BupSchema]),
+ Master ! {self(), ok},
+ fallback_receiver_loop(Master, R4, FA, records);
+
+ {Master, {records, Recs}} when State =:= records ->
+ R2 = safe_apply(R, write, [R#restore.bup_data, Recs]),
+ Master ! {self(), ok},
+ fallback_receiver_loop(Master, R2, FA, records);
+
+ {Master, swap} when State =/= schema ->
+ ?eval_debug_fun({?MODULE, fallback_receiver_loop, pre_swap}, []),
+ safe_apply(R, commit_write, [R#restore.bup_data]),
+ Bup = FA#fallback_args.fallback_bup,
+ Tmp = FA#fallback_args.fallback_tmp,
+ throw_bad_res(ok, file:rename(Tmp, Bup)),
+ catch mnesia_lib:set(active_fallback, true),
+ ?eval_debug_fun({?MODULE, fallback_receiver_loop, post_swap}, []),
+ Master ! {self(), ok},
+ fallback_receiver_loop(Master, R, FA, stop);
+
+ {Master, stop} when State =:= stop ->
+ stopped;
+
+ Msg ->
+ safe_apply(R, abort_write, [R#restore.bup_data]),
+ Tmp = FA#fallback_args.fallback_tmp,
+ file:delete(Tmp),
+ throw({error, "Unexpected msg fallback_receiver_loop", Msg})
+ end.
+
+throw_bad_res(Expected, Expected) -> Expected;
+throw_bad_res(_Expected, {error, Actual}) -> throw({error, Actual});
+throw_bad_res(_Expected, Actual) -> throw({error, Actual}).
+
+-record(local_tab, {name,
+ storage_type,
+ open,
+ add,
+ close,
+ swap,
+ record_name,
+ opened}).
+
+tm_fallback_start(IgnoreFallback) ->
+ mnesia_schema:lock_schema(),
+ Res = do_fallback_start(fallback_exists(), IgnoreFallback),
+ mnesia_schema: unlock_schema(),
+ case Res of
+ ok -> ok;
+ {error, Reason} -> exit(Reason)
+ end.
+
+do_fallback_start(false, _IgnoreFallback) ->
+ ok;
+do_fallback_start(true, true) ->
+ verbose("Ignoring fallback at startup, but leaving it active...~n", []),
+ mnesia_lib:set(active_fallback, true),
+ ok;
+do_fallback_start(true, false) ->
+ verbose("Starting from fallback...~n", []),
+
+ BupFile = fallback_bup(),
+ Mod = mnesia_backup,
+ LocalTabs = ?ets_new_table(mnesia_local_tables, [set, public, {keypos, 2}]),
+ case catch iterate(Mod, fun restore_tables/4, BupFile, {start, LocalTabs}) of
+ {ok, _Res} ->
+ catch dets:close(schema),
+ TmpSchema = mnesia_lib:tab2tmp(schema),
+ DatSchema = mnesia_lib:tab2dat(schema),
+ AllLT = ?ets_match_object(LocalTabs, '_'),
+ ?ets_delete_table(LocalTabs),
+ case file:rename(TmpSchema, DatSchema) of
+ ok ->
+ [(LT#local_tab.swap)(LT#local_tab.name, LT) ||
+ LT <- AllLT, LT#local_tab.name =/= schema],
+ file:delete(BupFile),
+ ok;
+ {error, Reason} ->
+ file:delete(TmpSchema),
+ {error, {"Cannot start from fallback. Rename error.", Reason}}
+ end;
+ {error, Reason} ->
+ {error, {"Cannot start from fallback", Reason}};
+ {'EXIT', Reason} ->
+ {error, {"Cannot start from fallback", Reason}}
+ end.
+
+restore_tables(All=[Rec | Recs], Header, Schema, State={local, LocalTabs, LT}) ->
+ Tab = element(1, Rec),
+ if
+ Tab =:= LT#local_tab.name ->
+ Key = element(2, Rec),
+ (LT#local_tab.add)(Tab, Key, Rec, LT),
+ restore_tables(Recs, Header, Schema, State);
+ true ->
+ NewState = {new, LocalTabs},
+ restore_tables(All, Header, Schema, NewState)
+ end;
+restore_tables(All=[Rec | Recs], Header, Schema, {new, LocalTabs}) ->
+ Tab = element(1, Rec),
+ case ?ets_lookup(LocalTabs, Tab) of
+ [] ->
+ State = {not_local, LocalTabs, Tab},
+ restore_tables(Recs, Header, Schema, State);
+ [LT] when is_record(LT, local_tab) ->
+ State = {local, LocalTabs, LT},
+ case LT#local_tab.opened of
+ true -> ignore;
+ false ->
+ (LT#local_tab.open)(Tab, LT),
+ ?ets_insert(LocalTabs,LT#local_tab{opened=true})
+ end,
+ restore_tables(All, Header, Schema, State)
+ end;
+restore_tables(All=[Rec | Recs], Header, Schema, S = {not_local, LocalTabs, PrevTab}) ->
+ Tab = element(1, Rec),
+ if
+ Tab =:= PrevTab ->
+ restore_tables(Recs, Header, Schema, S);
+ true ->
+ State = {new, LocalTabs},
+ restore_tables(All, Header, Schema, State)
+ end;
+restore_tables(Recs, Header, Schema, {start, LocalTabs}) ->
+ Dir = mnesia_lib:dir(),
+ OldDir = filename:join([Dir, "OLD_DIR"]),
+ mnesia_schema:purge_dir(OldDir, []),
+ mnesia_schema:purge_dir(Dir, [fallback_name()]),
+ init_dat_files(Schema, LocalTabs),
+ State = {new, LocalTabs},
+ restore_tables(Recs, Header, Schema, State);
+restore_tables([], _Header, _Schema, State) ->
+ State.
+
+%% Creates all neccessary dat files and inserts
+%% the table definitions in the schema table
+%%
+%% Returns a list of local_tab tuples for all local tables
+init_dat_files(Schema, LocalTabs) ->
+ TmpFile = mnesia_lib:tab2tmp(schema),
+ Args = [{file, TmpFile}, {keypos, 2}, {type, set}],
+ case dets:open_file(schema, Args) of % Assume schema lock
+ {ok, _} ->
+ create_dat_files(Schema, LocalTabs),
+ ok = dets:close(schema),
+ LocalTab = #local_tab{name = schema,
+ storage_type = disc_copies,
+ open = undefined,
+ add = undefined,
+ close = undefined,
+ swap = undefined,
+ record_name = schema,
+ opened = false},
+ ?ets_insert(LocalTabs, LocalTab);
+ {error, Reason} ->
+ throw({error, {"Cannot open file", schema, Args, Reason}})
+ end.
+
+create_dat_files([{schema, schema, TabDef} | Tail], LocalTabs) ->
+ ok = dets:insert(schema, {schema, schema, TabDef}),
+ create_dat_files(Tail, LocalTabs);
+create_dat_files([{schema, Tab, TabDef} | Tail], LocalTabs) ->
+ TmpFile = mnesia_lib:tab2tmp(Tab),
+ DatFile = mnesia_lib:tab2dat(Tab),
+ DclFile = mnesia_lib:tab2dcl(Tab),
+ DcdFile = mnesia_lib:tab2dcd(Tab),
+ Expunge = fun() ->
+ file:delete(DatFile),
+ file:delete(DclFile),
+ file:delete(DcdFile)
+ end,
+
+ mnesia_lib:dets_sync_close(Tab),
+ file:delete(TmpFile),
+ Cs = mnesia_schema:list2cs(TabDef),
+ ok = dets:insert(schema, {schema, Tab, TabDef}),
+ RecName = Cs#cstruct.record_name,
+ Storage = mnesia_lib:cs_to_storage_type(node(), Cs),
+ if
+ Storage =:= unknown ->
+ ok = dets:delete(schema, {schema, Tab}),
+ create_dat_files(Tail, LocalTabs);
+ Storage =:= disc_only_copies ->
+ Args = [{file, TmpFile}, {keypos, 2},
+ {type, mnesia_lib:disk_type(Tab, Cs#cstruct.type)}],
+ Open = fun(T, LT) when T =:= LT#local_tab.name ->
+ case mnesia_lib:dets_sync_open(T, Args) of
+ {ok, _} ->
+ ok;
+ {error, Reason} ->
+ throw({error, {"Cannot open file", T, Args, Reason}})
+ end
+ end,
+ Add = fun(T, Key, Rec, LT) when T =:= LT#local_tab.name ->
+ case Rec of
+ {_T, Key} ->
+ ok = dets:delete(T, Key);
+ (Rec) when T =:= RecName ->
+ ok = dets:insert(Tab, Rec);
+ (Rec) ->
+ Rec2 = setelement(1, Rec, RecName),
+ ok = dets:insert(T, Rec2)
+ end
+ end,
+ Close = fun(T, LT) when T =:= LT#local_tab.name ->
+ mnesia_lib:dets_sync_close(T)
+ end,
+ Swap = fun(T, LT) when T =:= LT#local_tab.name ->
+ Expunge(),
+ case LT#local_tab.opened of
+ true ->
+ Close(T,LT);
+ false ->
+ Open(T,LT),
+ Close(T,LT)
+ end,
+ case file:rename(TmpFile, DatFile) of
+ ok ->
+ ok;
+ {error, Reason} ->
+ mnesia_lib:fatal("Cannot rename file ~p -> ~p: ~p~n",
+ [TmpFile, DatFile, Reason])
+ end
+ end,
+ LocalTab = #local_tab{name = Tab,
+ storage_type = Storage,
+ open = Open,
+ add = Add,
+ close = Close,
+ swap = Swap,
+ record_name = RecName,
+ opened = false},
+ ?ets_insert(LocalTabs, LocalTab),
+ create_dat_files(Tail, LocalTabs);
+ Storage =:= ram_copies; Storage =:= disc_copies ->
+ Open = fun(T, LT) when T =:= LT#local_tab.name ->
+ mnesia_log:open_log({?MODULE, T},
+ mnesia_log:dcl_log_header(),
+ TmpFile,
+ false,
+ false,
+ read_write)
+ end,
+ Add = fun(T, Key, Rec, LT) when T =:= LT#local_tab.name ->
+ Log = {?MODULE, T},
+ case Rec of
+ {_T, Key} ->
+ mnesia_log:append(Log, {{T, Key}, {T, Key}, delete});
+ (Rec) when T =:= RecName ->
+ mnesia_log:append(Log, {{T, Key}, Rec, write});
+ (Rec) ->
+ Rec2 = setelement(1, Rec, RecName),
+ mnesia_log:append(Log, {{T, Key}, Rec2, write})
+ end
+ end,
+ Close = fun(T, LT) when T =:= LT#local_tab.name ->
+ mnesia_log:close_log({?MODULE, T})
+ end,
+ Swap = fun(T, LT) when T =:= LT#local_tab.name ->
+ Expunge(),
+ if
+ Storage =:= ram_copies, LT#local_tab.opened =:= false ->
+ ok;
+ true ->
+ Log = mnesia_log:open_log(fallback_tab,
+ mnesia_log:dcd_log_header(),
+ DcdFile,
+ false),
+ mnesia_log:close_log(Log),
+ case LT#local_tab.opened of
+ true ->
+ Close(T,LT);
+ false ->
+ Open(T,LT),
+ Close(T,LT)
+ end,
+ case file:rename(TmpFile, DclFile) of
+ ok ->
+ ok;
+ {error, Reason} ->
+ mnesia_lib:fatal("Cannot rename file ~p -> ~p: ~p~n",
+ [TmpFile, DclFile, Reason])
+ end
+ end
+ end,
+ LocalTab = #local_tab{name = Tab,
+ storage_type = Storage,
+ open = Open,
+ add = Add,
+ close = Close,
+ swap = Swap,
+ record_name = RecName,
+ opened = false
+ },
+ ?ets_insert(LocalTabs, LocalTab),
+ create_dat_files(Tail, LocalTabs)
+ end;
+create_dat_files([{schema, Tab} | Tail], LocalTabs) ->
+ ?ets_delete(LocalTabs, Tab),
+ ok = dets:delete(schema, {schema, Tab}),
+ TmpFile = mnesia_lib:tab2tmp(Tab),
+ mnesia_lib:dets_sync_close(Tab),
+ file:delete(TmpFile),
+ create_dat_files(Tail, LocalTabs);
+create_dat_files([], _LocalTabs) ->
+ ok.
+
+uninstall_fallback() ->
+ uninstall_fallback([{scope, global}]).
+
+uninstall_fallback(Args) ->
+ case check_fallback_args(Args, #fallback_args{}) of
+ {ok, FA} ->
+ do_uninstall_fallback(FA);
+ {error, Reason} ->
+ {error, Reason}
+ end.
+
+do_uninstall_fallback(FA) ->
+ %% Ensure that we access the intended Mnesia
+ %% directory. This function may not be called
+ %% during startup since it will cause the
+ %% application_controller to get into deadlock
+ case mnesia_lib:ensure_loaded(?APPLICATION) of
+ ok ->
+ Pid = spawn_link(?MODULE, uninstall_fallback_master, [self(), FA]),
+ receive
+ {'EXIT', Pid, Reason} -> % if appl has trapped exit
+ {error, {'EXIT', Reason}};
+ {Pid, Res} ->
+ Res
+ end;
+ {error, Reason} ->
+ {error, Reason}
+ end.
+
+uninstall_fallback_master(ClientPid, FA) ->
+ process_flag(trap_exit, true),
+
+ FA2 = check_fallback_dir(ClientPid, FA), % May exit
+ Bup = FA2#fallback_args.fallback_bup,
+ case fallback_to_schema(Bup) of
+ {ok, fallback, List} ->
+ Cs = mnesia_schema:list2cs(List),
+ case catch get_fallback_nodes(FA, Cs#cstruct.disc_copies) of
+ Ns when is_list(Ns) ->
+ do_uninstall(ClientPid, Ns, FA);
+ {error, Reason} ->
+ local_fallback_error(ClientPid, Reason)
+ end;
+ {error, Reason} ->
+ local_fallback_error(ClientPid, Reason)
+ end.
+
+do_uninstall(ClientPid, Ns, FA) ->
+ Args = [self(), FA],
+ global:set_lock({{mnesia_table_lock, schema}, self()}, Ns, infinity),
+ Pids = [spawn_link(N, ?MODULE, local_uninstall_fallback, Args) || N <- Ns],
+ Res = do_uninstall(ClientPid, Pids, [], [], ok),
+ global:del_lock({{mnesia_table_lock, schema}, self()}, Ns),
+ ClientPid ! {self(), Res},
+ unlink(ClientPid),
+ exit(shutdown).
+
+do_uninstall(ClientPid, [Pid | Pids], GoodPids, BadNodes, Res) ->
+ receive
+ %% {'EXIT', ClientPid, _} ->
+ %% client_exit;
+ {'EXIT', Pid, Reason} ->
+ BadNode = node(Pid),
+ BadRes = {error, {"Uninstall fallback", BadNode, Reason}},
+ do_uninstall(ClientPid, Pids, GoodPids, [BadNode | BadNodes], BadRes);
+ {Pid, {error, Reason}} ->
+ BadNode = node(Pid),
+ BadRes = {error, {"Uninstall fallback", BadNode, Reason}},
+ do_uninstall(ClientPid, Pids, GoodPids, [BadNode | BadNodes], BadRes);
+ {Pid, started} ->
+ do_uninstall(ClientPid, Pids, [Pid | GoodPids], BadNodes, Res)
+ end;
+do_uninstall(ClientPid, [], GoodPids, [], ok) ->
+ lists:foreach(fun(Pid) -> Pid ! {self(), do_uninstall} end, GoodPids),
+ rec_uninstall(ClientPid, GoodPids, ok);
+do_uninstall(_ClientPid, [], GoodPids, BadNodes, BadRes) ->
+ lists:foreach(fun(Pid) -> exit(Pid, shutdown) end, GoodPids),
+ {error, {node_not_running, BadNodes, BadRes}}.
+
+local_uninstall_fallback(Master, FA) ->
+ %% Don't trap exit
+
+ register(mnesia_fallback, self()), % May exit
+ FA2 = check_fallback_dir(Master, FA), % May exit
+ Master ! {self(), started},
+
+ receive
+ {Master, do_uninstall} ->
+ ?eval_debug_fun({?MODULE, uninstall_fallback2, pre_delete}, []),
+ catch mnesia_lib:set(active_fallback, false),
+ Tmp = FA2#fallback_args.fallback_tmp,
+ Bup = FA2#fallback_args.fallback_bup,
+ file:delete(Tmp),
+ Res =
+ case fallback_exists(Bup) of
+ true -> file:delete(Bup);
+ false -> ok
+ end,
+ ?eval_debug_fun({?MODULE, uninstall_fallback2, post_delete}, []),
+ Master ! {self(), Res},
+ unlink(Master),
+ exit(normal)
+ end.
+
+rec_uninstall(ClientPid, [Pid | Pids], AccRes) ->
+ receive
+ %% {'EXIT', ClientPid, _} ->
+ %% exit(shutdown);
+ {'EXIT', Pid, R} ->
+ Reason = {node_not_running, {node(Pid), R}},
+ rec_uninstall(ClientPid, Pids, {error, Reason});
+ {Pid, ok} ->
+ rec_uninstall(ClientPid, Pids, AccRes);
+ {Pid, BadRes} ->
+ rec_uninstall(ClientPid, Pids, BadRes)
+ end;
+rec_uninstall(ClientPid, [], Res) ->
+ ClientPid ! {self(), Res},
+ unlink(ClientPid),
+ exit(normal).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Backup traversal
+
+%% Iterate over a backup and produce a new backup.
+%% Fun(BackupItem, Acc) is applied for each BackupItem.
+%%
+%% Valid BackupItems are:
+%%
+%% {schema, Tab} Table to be deleted
+%% {schema, Tab, CreateList} Table to be created, CreateList may be empty
+%% {schema, db_nodes, DbNodes}List of nodes, defaults to [node()] OLD
+%% {schema, version, Version} Schema version OLD
+%% {schema, cookie, Cookie} Unique schema cookie OLD
+%% {Tab, Key} Oid for record to be deleted
+%% Record Record to be inserted.
+%%
+%% The Fun must return a tuple {BackupItems, NewAcc}
+%% where BackupItems is a list of valid BackupItems and
+%% NewAcc is a new accumulator value. Once BackupItems
+%% that not are schema related has been returned, no more schema
+%% items may be returned. The schema related items must always be
+%% first in the backup.
+%%
+%% If TargetMod =:= read_only, no new backup will be created.
+%%
+%% Opening of the source media will be performed by
+%% to SourceMod:open_read(Source)
+%%
+%% Opening of the target media will be performed by
+%% to TargetMod:open_write(Target)
+traverse_backup(Source, Target, Fun, Acc) ->
+ Mod = mnesia_monitor:get_env(backup_module),
+ traverse_backup(Source, Mod, Target, Mod, Fun, Acc).
+
+traverse_backup(Source, SourceMod, Target, TargetMod, Fun, Acc) ->
+ Args = [self(), Source, SourceMod, Target, TargetMod, Fun, Acc],
+ Pid = spawn_link(?MODULE, do_traverse_backup, Args),
+ receive
+ {'EXIT', Pid, Reason} ->
+ {error, {"Backup traversal crashed", Reason}};
+ {iter_done, Pid, Res} ->
+ Res
+ end.
+
+do_traverse_backup(ClientPid, Source, SourceMod, Target, TargetMod, Fun, Acc) ->
+ process_flag(trap_exit, true),
+ Iter =
+ if
+ TargetMod =/= read_only ->
+ case catch do_apply(TargetMod, open_write, [Target], Target) of
+ {error, Error} ->
+ unlink(ClientPid),
+ ClientPid ! {iter_done, self(), {error, Error}},
+ exit(Error);
+ Else -> Else
+ end;
+ true ->
+ ignore
+ end,
+ A = {start, Fun, Acc, TargetMod, Iter},
+ Res =
+ case iterate(SourceMod, fun trav_apply/4, Source, A) of
+ {ok, {iter, _, Acc2, _, Iter2}} when TargetMod =/= read_only ->
+ case catch do_apply(TargetMod, commit_write, [Iter2], Iter2) of
+ {error, Reason} ->
+ {error, Reason};
+ _ ->
+ {ok, Acc2}
+ end;
+ {ok, {iter, _, Acc2, _, _}} ->
+ {ok, Acc2};
+ {error, Reason} when TargetMod =/= read_only->
+ catch do_apply(TargetMod, abort_write, [Iter], Iter),
+ {error, {"Backup traversal failed", Reason}};
+ {error, Reason} ->
+ {error, {"Backup traversal failed", Reason}}
+ end,
+ unlink(ClientPid),
+ ClientPid ! {iter_done, self(), Res}.
+
+trav_apply(Recs, _Header, _Schema, {iter, Fun, Acc, Mod, Iter}) ->
+ {NewRecs, Acc2} = filter_foldl(Fun, Acc, Recs),
+ if
+ Mod =/= read_only, NewRecs =/= [] ->
+ Iter2 = do_apply(Mod, write, [Iter, NewRecs], Iter),
+ {iter, Fun, Acc2, Mod, Iter2};
+ true ->
+ {iter, Fun, Acc2, Mod, Iter}
+ end;
+trav_apply(Recs, Header, Schema, {start, Fun, Acc, Mod, Iter}) ->
+ Iter2 =
+ if
+ Mod =/= read_only ->
+ do_apply(Mod, write, [Iter, [Header]], Iter);
+ true ->
+ Iter
+ end,
+ TravAcc = trav_apply(Schema, Header, Schema, {iter, Fun, Acc, Mod, Iter2}),
+ trav_apply(Recs, Header, Schema, TravAcc).
+
+filter_foldl(Fun, Acc, [Head|Tail]) ->
+ case Fun(Head, Acc) of
+ {HeadItems, HeadAcc} when is_list(HeadItems) ->
+ {TailItems, TailAcc} = filter_foldl(Fun, HeadAcc, Tail),
+ {HeadItems ++ TailItems, TailAcc};
+ Other ->
+ throw({error, {"Fun must return a list", Other}})
+ end;
+filter_foldl(_Fun, Acc, []) ->
+ {[], Acc}.
+
diff --git a/lib/mnesia/src/mnesia_checkpoint.erl b/lib/mnesia/src/mnesia_checkpoint.erl
new file mode 100644
index 0000000000..eb8fe38908
--- /dev/null
+++ b/lib/mnesia/src/mnesia_checkpoint.erl
@@ -0,0 +1,1295 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 1996-2009. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%
+
+%%
+-module(mnesia_checkpoint).
+
+%% TM callback interface
+-export([
+ tm_add_copy/2,
+ tm_change_table_copy_type/3,
+ tm_del_copy/2,
+ tm_mnesia_down/1,
+ tm_prepare/1,
+ tm_retain/4,
+ tm_retain/5,
+ tm_enter_pending/1,
+ tm_enter_pending/3,
+ tm_exit_pending/1,
+ convert_cp_record/1
+ ]).
+
+%% Public interface
+-export([
+ activate/1,
+ checkpoints/0,
+ deactivate/1,
+ deactivate/2,
+ iterate/6,
+ most_local_node/2,
+ really_retain/2,
+ stop/0,
+ stop_iteration/1,
+ tables_and_cookie/1
+ ]).
+
+%% Internal
+-export([
+ call/2,
+ cast/2,
+ init/1,
+ remote_deactivate/1,
+ start/1
+ ]).
+
+%% sys callback interface
+-export([
+ system_code_change/4,
+ system_continue/3,
+ system_terminate/4
+ ]).
+
+-include("mnesia.hrl").
+-import(mnesia_lib, [add/2, del/2, set/2, unset/1]).
+-import(mnesia_lib, [dbg_out/2]).
+
+-record(checkpoint_args, {name = {now(), node()},
+ allow_remote = true,
+ ram_overrides_dump = false,
+ nodes = [],
+ node = node(),
+ now = now(),
+ cookie = ?unique_cookie,
+ min = [],
+ max = [],
+ pending_tab,
+ wait_for_old, % Initially undefined then List
+ is_activated = false,
+ ignore_new = [],
+ retainers = [],
+ iterators = [],
+ supervisor,
+ pid
+ }).
+
+%% Old record definition
+-record(checkpoint, {name,
+ allow_remote,
+ ram_overrides_dump,
+ nodes,
+ node,
+ now,
+ min,
+ max,
+ pending_tab,
+ wait_for_old,
+ is_activated,
+ ignore_new,
+ retainers,
+ iterators,
+ supervisor,
+ pid
+ }).
+
+-record(retainer, {cp_name, tab_name, store, writers = [], really_retain = true}).
+
+-record(iter, {tab_name, oid_tab, main_tab, retainer_tab, source, val, pid}).
+
+-record(pending, {tid, disc_nodes = [], ram_nodes = []}).
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% TM callback functions
+
+stop() ->
+ lists:foreach(fun(Name) -> call(Name, stop) end,
+ checkpoints()),
+ ok.
+
+tm_prepare(Cp) when is_record(Cp, checkpoint_args) ->
+ Name = Cp#checkpoint_args.name,
+ case lists:member(Name, checkpoints()) of
+ false ->
+ start_retainer(Cp);
+ true ->
+ {error, {already_exists, Name, node()}}
+ end;
+tm_prepare(Cp) when is_record(Cp, checkpoint) ->
+ %% Node with old protocol sent an old checkpoint record
+ %% and we have to convert it
+ case convert_cp_record(Cp) of
+ {ok, NewCp} ->
+ tm_prepare(NewCp);
+ {error, Reason} ->
+ {error, Reason}
+ end.
+
+tm_mnesia_down(Node) ->
+ lists:foreach(fun(Name) -> cast(Name, {mnesia_down, Node}) end,
+ checkpoints()).
+
+%% Returns pending
+tm_enter_pending(Tid, DiscNs, RamNs) ->
+ Pending = #pending{tid = Tid, disc_nodes = DiscNs, ram_nodes = RamNs},
+ tm_enter_pending(Pending).
+
+tm_enter_pending(Pending) ->
+ PendingTabs = val(pending_checkpoints),
+ tm_enter_pending(PendingTabs, Pending).
+
+tm_enter_pending([], Pending) ->
+ Pending;
+tm_enter_pending([Tab | Tabs], Pending) ->
+ catch ?ets_insert(Tab, Pending),
+ tm_enter_pending(Tabs, Pending).
+
+tm_exit_pending(Tid) ->
+ Pids = val(pending_checkpoint_pids),
+ tm_exit_pending(Pids, Tid).
+
+tm_exit_pending([], Tid) ->
+ Tid;
+tm_exit_pending([Pid | Pids], Tid) ->
+ Pid ! {self(), {exit_pending, Tid}},
+ tm_exit_pending(Pids, Tid).
+
+enter_still_pending([Tid | Tids], Tab) ->
+ ?ets_insert(Tab, #pending{tid = Tid}),
+ enter_still_pending(Tids, Tab);
+enter_still_pending([], _Tab) ->
+ ok.
+
+
+%% Looks up checkpoints for functions in mnesia_tm.
+tm_retain(Tid, Tab, Key, Op) ->
+ case val({Tab, commit_work}) of
+ [{checkpoints, Checkpoints} | _ ] ->
+ tm_retain(Tid, Tab, Key, Op, Checkpoints);
+ _ ->
+ undefined
+ end.
+
+tm_retain(Tid, Tab, Key, Op, Checkpoints) ->
+ case Op of
+ clear_table ->
+ OldRecs = mnesia_lib:db_match_object(Tab, '_'),
+ send_group_retain(OldRecs, Checkpoints, Tid, Tab, []),
+ OldRecs;
+ _ ->
+ OldRecs = mnesia_lib:db_get(Tab, Key),
+ send_retain(Checkpoints, {retain, Tid, Tab, Key, OldRecs}),
+ OldRecs
+ end.
+
+send_group_retain([Rec | Recs], Checkpoints, Tid, Tab, [PrevRec | PrevRecs])
+ when element(2, Rec) /= element(2, PrevRec) ->
+ Key = element(2, PrevRec),
+ OldRecs = lists:reverse([PrevRec | PrevRecs]),
+ send_retain(Checkpoints, {retain, Tid, Tab, Key, OldRecs}),
+ send_group_retain(Recs, Checkpoints, Tid, Tab, [Rec]);
+send_group_retain([Rec | Recs], Checkpoints, Tid, Tab, Acc) ->
+ send_group_retain(Recs, Checkpoints, Tid, Tab, [Rec | Acc]);
+send_group_retain([], Checkpoints, Tid, Tab, [PrevRec | PrevRecs]) ->
+ Key = element(2, PrevRec),
+ OldRecs = lists:reverse([PrevRec | PrevRecs]),
+ send_retain(Checkpoints, {retain, Tid, Tab, Key, OldRecs}),
+ ok;
+send_group_retain([], _Checkpoints, _Tid, _Tab, []) ->
+ ok.
+
+send_retain([Name | Names], Msg) ->
+ cast(Name, Msg),
+ send_retain(Names, Msg);
+send_retain([], _Msg) ->
+ ok.
+
+tm_add_copy(Tab, Node) when Node /= node() ->
+ case val({Tab, commit_work}) of
+ [{checkpoints, Checkpoints} | _ ] ->
+ Fun = fun(Name) -> call(Name, {add_copy, Tab, Node}) end,
+ map_call(Fun, Checkpoints, ok);
+ _ ->
+ ok
+ end.
+
+tm_del_copy(Tab, Node) when Node == node() ->
+ mnesia_subscr:unsubscribe_table(Tab),
+ case val({Tab, commit_work}) of
+ [{checkpoints, Checkpoints} | _ ] ->
+ Fun = fun(Name) -> call(Name, {del_copy, Tab, Node}) end,
+ map_call(Fun, Checkpoints, ok);
+ _ ->
+ ok
+ end.
+
+tm_change_table_copy_type(Tab, From, To) ->
+ case val({Tab, commit_work}) of
+ [{checkpoints, Checkpoints} | _ ] ->
+ Fun = fun(Name) -> call(Name, {change_copy, Tab, From, To}) end,
+ map_call(Fun, Checkpoints, ok);
+ _ ->
+ ok
+ end.
+
+map_call(Fun, [Name | Names], Res) ->
+ case Fun(Name) of
+ ok ->
+ map_call(Fun, Names, Res);
+ {error, {no_exists, Name}} ->
+ map_call(Fun, Names, Res);
+ {error, Reason} ->
+ %% BUGBUG: We may end up with some checkpoint retainers
+ %% too much in the add_copy case. How do we remove them?
+ map_call(Fun, Names, {error, Reason})
+ end;
+map_call(_Fun, [], Res) ->
+ Res.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Public functions
+
+deactivate(Name) ->
+ case call(Name, get_checkpoint) of
+ {error, Reason} ->
+ {error, Reason};
+ Cp ->
+ deactivate(Cp#checkpoint_args.nodes, Name)
+ end.
+
+deactivate(Nodes, Name) ->
+ rpc:multicall(Nodes, ?MODULE, remote_deactivate, [Name]),
+ ok.
+
+remote_deactivate(Name) ->
+ call(Name, deactivate).
+
+checkpoints() -> val(checkpoints).
+
+tables_and_cookie(Name) ->
+ case call(Name, get_checkpoint) of
+ {error, Reason} ->
+ {error, Reason};
+ Cp ->
+ Tabs = Cp#checkpoint_args.min ++ Cp#checkpoint_args.max,
+ Cookie = Cp#checkpoint_args.cookie,
+ {ok, Tabs, Cookie}
+ end.
+
+most_local_node(Name, Tab) ->
+ case ?catch_val({Tab, {retainer, Name}}) of
+ {'EXIT', _} ->
+ {error, {"No retainer attached to table", [Tab, Name]}};
+ R ->
+ Writers = R#retainer.writers,
+ LocalWriter = lists:member(node(), Writers),
+ if
+ LocalWriter == true ->
+ {ok, node()};
+ Writers /= [] ->
+ {ok, hd(Writers)};
+ true ->
+ {error, {"No retainer attached to table", [Tab, Name]}}
+ end
+ end.
+
+really_retain(Name, Tab) ->
+ R = val({Tab, {retainer, Name}}),
+ R#retainer.really_retain.
+
+%% Activate a checkpoint.
+%%
+%% A checkpoint is a transaction consistent state that may be used to
+%% perform a distributed backup or to rollback the involved tables to
+%% their old state. Backups may also be used to restore tables to
+%% their old state. Args is a list of the following tuples:
+%%
+%% {name, Name}
+%% Name of checkpoint. Each checkpoint must have a name which
+%% is unique on the reachable nodes. The name may be reused when
+%% the checkpoint has been deactivated.
+%% By default a probably unique name is generated.
+%% Multiple checkpoints may be set on the same table.
+%%
+%% {allow_remote, Bool}
+%% false means that all retainers must be local. If the
+%% table does not reside locally, the checkpoint fails.
+%% true allows retainers on other nodes.
+%%
+%% {min, MinTabs}
+%% Minimize redundancy and only keep checkpoint info together with
+%% one replica, preferrably at the local node. If any node involved
+%% the checkpoint goes down, the checkpoint is deactivated.
+%%
+%% {max, MaxTabs}
+%% Maximize redundancy and keep checkpoint info together with all
+%% replicas. The checkpoint becomes more fault tolerant if the
+%% tables has several replicas. When new replicas are added, they
+%% will also get a retainer attached to them.
+%%
+%% {ram_overrides_dump, Bool}
+%% {ram_overrides_dump, Tabs}
+%% Only applicable for ram_copies. Bool controls which versions of
+%% the records that should be included in the checkpoint state.
+%% true means that the latest comitted records in ram (i.e. the
+%% records that the application accesses) should be included
+%% in the checkpoint. false means that the records dumped to
+%% dat-files (the records that will be loaded at startup) should
+%% be included in the checkpoint. Tabs is a list of tables.
+%% Default is false.
+%%
+%% {ignore_new, TidList}
+%% Normally we wait for all pending transactions to complete
+%% before we allow iteration over the checkpoint. But in order
+%% to cope with checkpoint activation inside a transaction that
+%% currently prepares commit (mnesia_init:get_net_work_copy) we
+%% need to have the ability to ignore the enclosing transaction.
+%% We do not wait for the transactions in TidList to end. The
+%% transactions in TidList are regarded as newer than the checkpoint.
+
+activate(Args) ->
+ case args2cp(Args) of
+ {ok, Cp} ->
+ do_activate(Cp);
+ {error, Reason} ->
+ {error, Reason}
+ end.
+
+args2cp(Args) when is_list(Args)->
+ case catch lists:foldl(fun check_arg/2, #checkpoint_args{}, Args) of
+ {'EXIT', Reason} ->
+ {error, Reason};
+ Cp ->
+ case check_tables(Cp) of
+ {error, Reason} ->
+ {error, Reason};
+ {ok, Overriders, AllTabs} ->
+ arrange_retainers(Cp, Overriders, AllTabs)
+ end
+ end;
+args2cp(Args) ->
+ {error, {badarg, Args}}.
+
+check_arg({name, Name}, Cp) ->
+ case lists:member(Name, checkpoints()) of
+ true ->
+ exit({already_exists, Name});
+ false ->
+ case catch tab2retainer({foo, Name}) of
+ List when is_list(List) ->
+ Cp#checkpoint_args{name = Name};
+ _ ->
+ exit({badarg, Name})
+ end
+ end;
+check_arg({allow_remote, true}, Cp) ->
+ Cp#checkpoint_args{allow_remote = true};
+check_arg({allow_remote, false}, Cp) ->
+ Cp#checkpoint_args{allow_remote = false};
+check_arg({ram_overrides_dump, true}, Cp) ->
+ Cp#checkpoint_args{ram_overrides_dump = true};
+check_arg({ram_overrides_dump, false}, Cp) ->
+ Cp#checkpoint_args{ram_overrides_dump = false};
+check_arg({ram_overrides_dump, Tabs}, Cp) when is_list(Tabs) ->
+ Cp#checkpoint_args{ram_overrides_dump = Tabs};
+check_arg({min, Tabs}, Cp) when is_list(Tabs) ->
+ Cp#checkpoint_args{min = Tabs};
+check_arg({max, Tabs}, Cp) when is_list(Tabs) ->
+ Cp#checkpoint_args{max = Tabs};
+check_arg({ignore_new, Tids}, Cp) when is_list(Tids) ->
+ Cp#checkpoint_args{ignore_new = Tids};
+check_arg(Arg, _) ->
+ exit({badarg, Arg}).
+
+check_tables(Cp) ->
+ Min = Cp#checkpoint_args.min,
+ Max = Cp#checkpoint_args.max,
+ AllTabs = Min ++ Max,
+ DoubleTabs = [T || T <- Min, lists:member(T, Max)],
+ Overriders = Cp#checkpoint_args.ram_overrides_dump,
+ if
+ DoubleTabs /= [] ->
+ {error, {combine_error, Cp#checkpoint_args.name,
+ [{min, DoubleTabs}, {max, DoubleTabs}]}};
+ Min == [], Max == [] ->
+ {error, {combine_error, Cp#checkpoint_args.name,
+ [{min, Min}, {max, Max}]}};
+ Overriders == false ->
+ {ok, [], AllTabs};
+ Overriders == true ->
+ {ok, AllTabs, AllTabs};
+ is_list(Overriders) ->
+ case [T || T <- Overriders, not lists:member(T, Min)] of
+ [] ->
+ case [T || T <- Overriders, not lists:member(T, Max)] of
+ [] ->
+ {ok, Overriders, AllTabs};
+ Outsiders ->
+ {error, {combine_error, Cp#checkpoint_args.name,
+ [{ram_overrides_dump, Outsiders},
+ {max, Outsiders}]}}
+ end;
+ Outsiders ->
+ {error, {combine_error, Cp#checkpoint_args.name,
+ [{ram_overrides_dump, Outsiders},
+ {min, Outsiders}]}}
+ end
+ end.
+
+arrange_retainers(Cp, Overriders, AllTabs) ->
+ R = #retainer{cp_name = Cp#checkpoint_args.name},
+ case catch [R#retainer{tab_name = Tab,
+ writers = select_writers(Cp, Tab)}
+ || Tab <- AllTabs] of
+ {'EXIT', Reason} ->
+ {error, Reason};
+ Retainers ->
+ {ok, Cp#checkpoint_args{ram_overrides_dump = Overriders,
+ retainers = Retainers,
+ nodes = writers(Retainers)}}
+ end.
+
+select_writers(Cp, Tab) ->
+ case filter_remote(Cp, val({Tab, active_replicas})) of
+ [] ->
+ exit({"Cannot prepare checkpoint (replica not available)",
+ [Tab, Cp#checkpoint_args.name]});
+ Writers ->
+ This = node(),
+ case {lists:member(Tab, Cp#checkpoint_args.max),
+ lists:member(This, Writers)} of
+ {true, _} -> Writers; % Max
+ {false, true} -> [This];
+ {false, false} -> [hd(Writers)]
+ end
+ end.
+
+filter_remote(Cp, Writers) when Cp#checkpoint_args.allow_remote == true ->
+ Writers;
+filter_remote(_Cp, Writers) ->
+ This = node(),
+ case lists:member(This, Writers) of
+ true -> [This];
+ false -> []
+ end.
+
+writers(Retainers) ->
+ Fun = fun(R, Acc) -> R#retainer.writers ++ Acc end,
+ Writers = lists:foldl(Fun, [], Retainers),
+ mnesia_lib:uniq(Writers).
+
+do_activate(Cp) ->
+ Name = Cp#checkpoint_args.name,
+ Nodes = Cp#checkpoint_args.nodes,
+ case mnesia_tm:prepare_checkpoint(Nodes, Cp) of
+ {Replies, []} ->
+ check_prep(Replies, Name, Nodes, Cp#checkpoint_args.ignore_new);
+ {_, BadNodes} ->
+ {error, {"Cannot prepare checkpoint (bad nodes)",
+ [Name, BadNodes]}}
+ end.
+
+check_prep([{ok, Name, IgnoreNew, _Node} | Replies], Name, Nodes, IgnoreNew) ->
+ check_prep(Replies, Name, Nodes, IgnoreNew);
+check_prep([{error, Reason} | _Replies], Name, _Nodes, _IgnoreNew) ->
+ {error, {"Cannot prepare checkpoint (bad reply)",
+ [Name, Reason]}};
+check_prep([{badrpc, Reason} | _Replies], Name, _Nodes, _IgnoreNew) ->
+ {error, {"Cannot prepare checkpoint (badrpc)",
+ [Name, Reason]}};
+check_prep([], Name, Nodes, IgnoreNew) ->
+ collect_pending(Name, Nodes, IgnoreNew).
+
+collect_pending(Name, Nodes, IgnoreNew) ->
+ case rpc:multicall(Nodes, ?MODULE, call, [Name, collect_pending]) of
+ {Replies, []} ->
+ case catch ?ets_new_table(mnesia_union, [bag]) of
+ {'EXIT', Reason} -> %% system limit
+ Msg = "Cannot create an ets table pending union",
+ {error, {system_limit, Msg, Reason}};
+ UnionTab ->
+ compute_union(Replies, Nodes, Name, UnionTab, IgnoreNew)
+ end;
+ {_, BadNodes} ->
+ deactivate(Nodes, Name),
+ {error, {"Cannot collect from pending checkpoint", Name, BadNodes}}
+ end.
+
+compute_union([{ok, Pending} | Replies], Nodes, Name, UnionTab, IgnoreNew) ->
+ add_pending(Pending, UnionTab),
+ compute_union(Replies, Nodes, Name, UnionTab, IgnoreNew);
+compute_union([{error, Reason} | _Replies], Nodes, Name, UnionTab, _IgnoreNew) ->
+ deactivate(Nodes, Name),
+ ?ets_delete_table(UnionTab),
+ {error, Reason};
+compute_union([{badrpc, Reason} | _Replies], Nodes, Name, UnionTab, _IgnoreNew) ->
+ deactivate(Nodes, Name),
+ ?ets_delete_table(UnionTab),
+ {error, {badrpc, Reason}};
+compute_union([], Nodes, Name, UnionTab, IgnoreNew) ->
+ send_activate(Nodes, Nodes, Name, UnionTab, IgnoreNew).
+
+add_pending([P | Pending], UnionTab) ->
+ add_pending_node(P#pending.disc_nodes, P#pending.tid, UnionTab),
+ add_pending_node(P#pending.ram_nodes, P#pending.tid, UnionTab),
+ add_pending(Pending, UnionTab);
+add_pending([], _UnionTab) ->
+ ok.
+
+add_pending_node([Node | Nodes], Tid, UnionTab) ->
+ ?ets_insert(UnionTab, {Node, Tid}),
+ add_pending_node(Nodes, Tid, UnionTab);
+add_pending_node([], _Tid, _UnionTab) ->
+ ok.
+
+send_activate([Node | Nodes], AllNodes, Name, UnionTab, IgnoreNew) ->
+ Pending = [Tid || {_, Tid} <- ?ets_lookup(UnionTab, Node),
+ not lists:member(Tid, IgnoreNew)],
+ case rpc:call(Node, ?MODULE, call, [Name, {activate, Pending}]) of
+ activated ->
+ send_activate(Nodes, AllNodes, Name, UnionTab, IgnoreNew);
+ {badrpc, Reason} ->
+ deactivate(Nodes, Name),
+ ?ets_delete_table(UnionTab),
+ {error, {"Activation failed (bad node)", Name, Node, Reason}};
+ {error, Reason} ->
+ deactivate(Nodes, Name),
+ ?ets_delete_table(UnionTab),
+ {error, {"Activation failed", Name, Node, Reason}}
+ end;
+send_activate([], AllNodes, Name, UnionTab, _IgnoreNew) ->
+ ?ets_delete_table(UnionTab),
+ {ok, Name, AllNodes}.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Checkpoint server
+
+cast(Name, Msg) ->
+ case ?catch_val({checkpoint, Name}) of
+ {'EXIT', _} ->
+ {error, {no_exists, Name}};
+
+ Pid when is_pid(Pid) ->
+ Pid ! {self(), Msg},
+ {ok, Pid}
+ end.
+
+call(Name, Msg) ->
+ case ?catch_val({checkpoint, Name}) of
+ {'EXIT', _} ->
+ {error, {no_exists, Name}};
+
+ Pid when is_pid(Pid) ->
+ Monitor = erlang:monitor(process, Pid), %catch link(Pid), % Always local
+ Pid ! {self(), Msg},
+ Self = self(),
+ receive
+ {'EXIT', Pid, Reason} ->
+ {error, {"Got exit", [Name, Reason]}};
+ {'DOWN', Monitor, _, Pid, Reason} ->
+ {error, {"Got exit", [Name, Reason]}};
+ {Name, Self, Reply} ->
+ erlang:demonitor(Monitor),
+ Reply
+ end;
+ Error ->
+ Error
+ end.
+
+abcast(Nodes, Name, Msg) ->
+ rpc:eval_everywhere(Nodes, ?MODULE, cast, [Name, Msg]).
+
+reply(nopid, _Name, _Reply) ->
+ ignore;
+reply(ReplyTo, Name, Reply) ->
+ ReplyTo ! {Name, ReplyTo, Reply}.
+
+%% Returns {ok, NewCp} or {error, Reason}
+start_retainer(Cp) ->
+ % Will never be restarted
+ Name = Cp#checkpoint_args.name,
+ case supervisor:start_child(mnesia_checkpoint_sup, [Cp]) of
+ {ok, _Pid} ->
+ {ok, Name, Cp#checkpoint_args.ignore_new, node()};
+ {error, Reason} ->
+ {error, {"Cannot create checkpoint retainer",
+ Name, node(), Reason}}
+ end.
+
+start(Cp) ->
+ Name = Cp#checkpoint_args.name,
+ Args = [Cp#checkpoint_args{supervisor = self()}],
+ mnesia_monitor:start_proc({?MODULE, Name}, ?MODULE, init, Args).
+
+init(Cp) ->
+ process_flag(trap_exit, true),
+ process_flag(priority, high), %% Needed dets files might starve the system
+ Name = Cp#checkpoint_args.name,
+ Props = [set, public, {keypos, 2}],
+ case catch ?ets_new_table(mnesia_pending_checkpoint, Props) of
+ {'EXIT', Reason} -> %% system limit
+ Msg = "Cannot create an ets table for pending transactions",
+ Error = {error, {system_limit, Name, Msg, Reason}},
+ proc_lib:init_ack(Cp#checkpoint_args.supervisor, Error);
+ PendingTab ->
+ Rs = [prepare_tab(Cp, R) || R <- Cp#checkpoint_args.retainers],
+ Cp2 = Cp#checkpoint_args{retainers = Rs,
+ pid = self(),
+ pending_tab = PendingTab},
+ add(pending_checkpoint_pids, self()),
+ add(pending_checkpoints, PendingTab),
+ set({checkpoint, Name}, self()),
+ add(checkpoints, Name),
+ dbg_out("Checkpoint ~p (~p) started~n", [Name, self()]),
+ proc_lib:init_ack(Cp2#checkpoint_args.supervisor, {ok, self()}),
+ retainer_loop(Cp2)
+ end.
+
+prepare_tab(Cp, R) ->
+ Tab = R#retainer.tab_name,
+ prepare_tab(Cp, R, val({Tab, storage_type})).
+
+prepare_tab(Cp, R, Storage) ->
+ Tab = R#retainer.tab_name,
+ Name = R#retainer.cp_name,
+ case lists:member(node(), R#retainer.writers) of
+ true ->
+ R2 = retainer_create(Cp, R, Tab, Name, Storage),
+ set({Tab, {retainer, Name}}, R2),
+ %% Keep checkpoint info for table_info & mnesia_session
+ add({Tab, checkpoints}, Name),
+ add_chkp_info(Tab, Name),
+ R2;
+ false ->
+ set({Tab, {retainer, Name}}, R#retainer{store = undefined}),
+ R
+ end.
+
+add_chkp_info(Tab, Name) ->
+ case val({Tab, commit_work}) of
+ [{checkpoints, OldList} | CommitList] ->
+ case lists:member(Name, OldList) of
+ true ->
+ ok;
+ false ->
+ NewC = [{checkpoints, [Name | OldList]} | CommitList],
+ mnesia_lib:set({Tab, commit_work}, NewC)
+ end;
+ CommitList ->
+ Chkp = {checkpoints, [Name]},
+ %% OBS checkpoints needs to be first in the list!
+ mnesia_lib:set({Tab, commit_work}, [Chkp | CommitList])
+ end.
+
+tab2retainer({Tab, Name}) ->
+ FlatName = lists:flatten(io_lib:write(Name)),
+ mnesia_lib:dir(lists:concat([?MODULE, "_", Tab, "_", FlatName, ".RET"])).
+
+retainer_create(_Cp, R, Tab, Name, disc_only_copies) ->
+ Fname = tab2retainer({Tab, Name}),
+ file:delete(Fname),
+ Args = [{file, Fname}, {type, set}, {keypos, 2}, {repair, false}],
+ {ok, _} = mnesia_lib:dets_sync_open({Tab, Name}, Args),
+ dbg_out("Checkpoint retainer created ~p ~p~n", [Name, Tab]),
+ R#retainer{store = {dets, {Tab, Name}}, really_retain = true};
+retainer_create(Cp, R, Tab, Name, Storage) ->
+ T = ?ets_new_table(mnesia_retainer, [set, public, {keypos, 2}]),
+ Overriders = Cp#checkpoint_args.ram_overrides_dump,
+ ReallyR = R#retainer.really_retain,
+ ReallyCp = lists:member(Tab, Overriders),
+ ReallyR2 = prepare_ram_tab(Tab, T, Storage, ReallyR, ReallyCp),
+ dbg_out("Checkpoint retainer created ~p ~p~n", [Name, Tab]),
+ R#retainer{store = {ets, T}, really_retain = ReallyR2}.
+
+%% Copy the dumped table into retainer if needed
+%% If the really_retain flag already has been set to false,
+%% it should remain false even if we change storage type
+%% while the checkpoint is activated.
+prepare_ram_tab(Tab, T, ram_copies, true, false) ->
+ Fname = mnesia_lib:tab2dcd(Tab),
+ case mnesia_lib:exists(Fname) of
+ true ->
+ Log = mnesia_log:open_log(prepare_ram_tab,
+ mnesia_log:dcd_log_header(),
+ Fname, true,
+ mnesia_monitor:get_env(auto_repair),
+ read_only),
+ Add = fun(Rec) ->
+ Key = element(2, Rec),
+ Recs =
+ case ?ets_lookup(T, Key) of
+ [] -> [];
+ [{_, _, Old}] -> Old
+ end,
+ ?ets_insert(T, {Tab, Key, [Rec | Recs]}),
+ continue
+ end,
+ traverse_dcd(mnesia_log:chunk_log(Log, start), Log, Add),
+ mnesia_log:close_log(Log);
+ false ->
+ ok
+ end,
+ false;
+prepare_ram_tab(_, _, _, ReallyRetain, _) ->
+ ReallyRetain.
+
+traverse_dcd({Cont, [LogH | Rest]}, Log, Fun)
+ when is_record(LogH, log_header),
+ LogH#log_header.log_kind == dcd_log,
+ LogH#log_header.log_version >= "1.0" ->
+ traverse_dcd({Cont, Rest}, Log, Fun); %% BUGBUG Error handling repaired files
+traverse_dcd({Cont, Recs}, Log, Fun) -> %% trashed data??
+ lists:foreach(Fun, Recs),
+ traverse_dcd(mnesia_log:chunk_log(Log, Cont), Log, Fun);
+traverse_dcd(eof, _Log, _Fun) ->
+ ok.
+
+retainer_get({ets, Store}, Key) -> ?ets_lookup(Store, Key);
+retainer_get({dets, Store}, Key) -> dets:lookup(Store, Key).
+
+retainer_put({ets, Store}, Val) -> ?ets_insert(Store, Val);
+retainer_put({dets, Store}, Val) -> dets:insert(Store, Val).
+
+retainer_first({ets, Store}) -> ?ets_first(Store);
+retainer_first({dets, Store}) -> dets:first(Store).
+
+retainer_next({ets, Store}, Key) -> ?ets_next(Store, Key);
+retainer_next({dets, Store}, Key) -> dets:next(Store, Key).
+
+%% retainer_next_slot(Tab, Pos) ->
+%% case retainer_slot(Tab, Pos) of
+%% '$end_of_table' ->
+%% '$end_of_table';
+%% [] ->
+%% retainer_next_slot(Tab, Pos + 1);
+%% Recs when is_list(Recs) ->
+%% {Pos, Recs}
+%% end.
+%%
+%% retainer_slot({ets, Store}, Pos) -> ?ets_next(Store, Pos);
+%% retainer_slot({dets, Store}, Pos) -> dets:slot(Store, Pos).
+
+retainer_fixtable(Tab, Bool) when is_atom(Tab) ->
+ mnesia_lib:db_fixtable(val({Tab, storage_type}), Tab, Bool);
+retainer_fixtable({ets, Tab}, Bool) ->
+ mnesia_lib:db_fixtable(ram_copies, Tab, Bool);
+retainer_fixtable({dets, Tab}, Bool) ->
+ mnesia_lib:db_fixtable(disc_only_copies, Tab, Bool).
+
+retainer_delete({ets, Store}) ->
+ ?ets_delete_table(Store);
+retainer_delete({dets, Store}) ->
+ mnesia_lib:dets_sync_close(Store),
+ Fname = tab2retainer(Store),
+ file:delete(Fname).
+
+retainer_loop(Cp) ->
+ Name = Cp#checkpoint_args.name,
+ receive
+ {_From, {retain, Tid, Tab, Key, OldRecs}}
+ when Cp#checkpoint_args.wait_for_old == [] ->
+ R = val({Tab, {retainer, Name}}),
+ PendingTab = Cp#checkpoint_args.pending_tab,
+ case R#retainer.really_retain of
+ true when PendingTab =:= undefined ->
+ Store = R#retainer.store,
+ case retainer_get(Store, Key) of
+ [] -> retainer_put(Store, {Tab, Key, OldRecs});
+ _ -> already_retained
+ end;
+ true ->
+ case ets:member(PendingTab, Tid) of
+ true -> ignore;
+ false ->
+ Store = R#retainer.store,
+ case retainer_get(Store, Key) of
+ [] -> retainer_put(Store, {Tab, Key, OldRecs});
+ _ -> already_retained
+ end
+ end;
+ false ->
+ ignore
+ end,
+ retainer_loop(Cp);
+
+ %% Adm
+ {From, deactivate} ->
+ do_stop(Cp),
+ reply(From, Name, deactivated),
+ unlink(From),
+ exit(shutdown);
+
+ {'EXIT', Parent, _} when Parent == Cp#checkpoint_args.supervisor ->
+ %% do_stop(Cp),
+ %% assume that entire Mnesia is terminating
+ exit(shutdown);
+
+ {_From, {mnesia_down, Node}} ->
+ Cp2 = do_del_retainers(Cp, Node),
+ retainer_loop(Cp2);
+ {From, get_checkpoint} ->
+ reply(From, Name, Cp),
+ retainer_loop(Cp);
+ {From, {add_copy, Tab, Node}} when Cp#checkpoint_args.wait_for_old == [] ->
+ {Res, Cp2} = do_add_copy(Cp, Tab, Node),
+ reply(From, Name, Res),
+ retainer_loop(Cp2);
+ {From, {del_copy, Tab, Node}} when Cp#checkpoint_args.wait_for_old == [] ->
+ Cp2 = do_del_copy(Cp, Tab, Node),
+ reply(From, Name, ok),
+ retainer_loop(Cp2);
+ {From, {change_copy, Tab, From, To}} when Cp#checkpoint_args.wait_for_old == [] ->
+ Cp2 = do_change_copy(Cp, Tab, From, To),
+ reply(From, Name, ok),
+ retainer_loop(Cp2);
+ {_From, {add_retainer, R, Node}} ->
+ Cp2 = do_add_retainer(Cp, R, Node),
+ retainer_loop(Cp2);
+ {_From, {del_retainer, R, Node}} when Cp#checkpoint_args.wait_for_old == [] ->
+ Cp2 = do_del_retainer(Cp, R, Node),
+ retainer_loop(Cp2);
+
+ %% Iteration
+ {From, {iter_begin, Iter}} when Cp#checkpoint_args.wait_for_old == [] ->
+ Cp2 = iter_begin(Cp, From, Iter),
+ retainer_loop(Cp2);
+
+ {From, {iter_end, Iter}} when Cp#checkpoint_args.wait_for_old == [] ->
+ retainer_fixtable(Iter#iter.oid_tab, false),
+ Iters = Cp#checkpoint_args.iterators -- [Iter],
+ reply(From, Name, ok),
+ retainer_loop(Cp#checkpoint_args{iterators = Iters});
+
+ {_From, {exit_pending, Tid}}
+ when is_list(Cp#checkpoint_args.wait_for_old) ->
+ StillPending = lists:delete(Tid, Cp#checkpoint_args.wait_for_old),
+ Cp2 = Cp#checkpoint_args{wait_for_old = StillPending},
+ Cp3 = maybe_activate(Cp2),
+ retainer_loop(Cp3);
+
+ {From, collect_pending} ->
+ PendingTab = Cp#checkpoint_args.pending_tab,
+ del(pending_checkpoints, PendingTab),
+ Pending = ?ets_match_object(PendingTab, '_'),
+ reply(From, Name, {ok, Pending}),
+ retainer_loop(Cp);
+
+ {From, {activate, Pending}} ->
+ StillPending = mnesia_recover:still_pending(Pending),
+ enter_still_pending(StillPending, Cp#checkpoint_args.pending_tab),
+ Cp2 = maybe_activate(Cp#checkpoint_args{wait_for_old = StillPending}),
+ reply(From, Name, activated),
+ retainer_loop(Cp2);
+
+ {'EXIT', From, _Reason} ->
+ Iters = [Iter || Iter <- Cp#checkpoint_args.iterators,
+ check_iter(From, Iter)],
+ retainer_loop(Cp#checkpoint_args{iterators = Iters});
+
+ {system, From, Msg} ->
+ dbg_out("~p got {system, ~p, ~p}~n", [?MODULE, From, Msg]),
+ sys:handle_system_msg(Msg, From, no_parent, ?MODULE, [], Cp)
+ end.
+
+maybe_activate(Cp)
+ when Cp#checkpoint_args.wait_for_old == [],
+ Cp#checkpoint_args.is_activated == false ->
+ Cp#checkpoint_args{pending_tab = undefined, is_activated = true};
+maybe_activate(Cp) ->
+ Cp.
+
+iter_begin(Cp, From, Iter) ->
+ Name = Cp#checkpoint_args.name,
+ R = val({Iter#iter.tab_name, {retainer, Name}}),
+ Iter2 = init_tabs(R, Iter),
+ Iter3 = Iter2#iter{pid = From},
+ retainer_fixtable(Iter3#iter.oid_tab, true),
+ Iters = [Iter3 | Cp#checkpoint_args.iterators],
+ reply(From, Name, {ok, Iter3, self()}),
+ Cp#checkpoint_args{iterators = Iters}.
+
+do_stop(Cp) ->
+ Name = Cp#checkpoint_args.name,
+ del(pending_checkpoints, Cp#checkpoint_args.pending_tab),
+ del(pending_checkpoint_pids, self()),
+ del(checkpoints, Name),
+ unset({checkpoint, Name}),
+ lists:foreach(fun deactivate_tab/1, Cp#checkpoint_args.retainers),
+ Iters = Cp#checkpoint_args.iterators,
+ lists:foreach(fun(I) -> retainer_fixtable(I#iter.oid_tab, false) end, Iters).
+
+deactivate_tab(R) ->
+ Name = R#retainer.cp_name,
+ Tab = R#retainer.tab_name,
+ try
+ Active = lists:member(node(), R#retainer.writers),
+ case R#retainer.store of
+ undefined ->
+ ignore;
+ Store when Active == true ->
+ retainer_delete(Store);
+ _ ->
+ ignore
+ end,
+ unset({Tab, {retainer, Name}}),
+ del({Tab, checkpoints}, Name), %% Keep checkpoint info for table_info & mnesia_session
+ del_chkp_info(Tab, Name)
+ catch _:_ -> ignore
+ end.
+
+del_chkp_info(Tab, Name) ->
+ case val({Tab, commit_work}) of
+ [{checkpoints, ChkList} | Rest] ->
+ case lists:delete(Name, ChkList) of
+ [] ->
+ %% The only checkpoint was deleted
+ mnesia_lib:set({Tab, commit_work}, Rest);
+ NewList ->
+ mnesia_lib:set({Tab, commit_work},
+ [{checkpoints, NewList} | Rest])
+ end;
+ _ -> ignore
+ end.
+
+do_del_retainers(Cp, Node) ->
+ Rs = [do_del_retainer2(Cp, R, Node) || R <- Cp#checkpoint_args.retainers],
+ Cp#checkpoint_args{retainers = Rs, nodes = writers(Rs)}.
+
+do_del_retainer2(Cp, R, Node) ->
+ Writers = R#retainer.writers -- [Node],
+ R2 = R#retainer{writers = Writers},
+ set({R2#retainer.tab_name, {retainer, R2#retainer.cp_name}}, R2),
+ if
+ Writers == [] ->
+ Event = {mnesia_checkpoint_deactivated, Cp#checkpoint_args.name},
+ mnesia_lib:report_system_event(Event),
+ do_stop(Cp),
+ exit(shutdown);
+ Node == node() ->
+ deactivate_tab(R), % Avoids unnecessary tm_retain accesses
+ set({R2#retainer.tab_name, {retainer, R2#retainer.cp_name}}, R2),
+ R2;
+ true ->
+ R2
+ end.
+
+do_del_retainer(Cp, R0, Node) ->
+ {R, Rest} = find_retainer(R0, Cp#checkpoint_args.retainers, []),
+ R2 = do_del_retainer2(Cp, R, Node),
+ Rs = [R2|Rest],
+ Cp#checkpoint_args{retainers = Rs, nodes = writers(Rs)}.
+
+do_del_copy(Cp, Tab, ThisNode) when ThisNode == node() ->
+ Name = Cp#checkpoint_args.name,
+ Others = Cp#checkpoint_args.nodes -- [ThisNode],
+ R = val({Tab, {retainer, Name}}),
+ abcast(Others, Name, {del_retainer, R, ThisNode}),
+ do_del_retainer(Cp, R, ThisNode).
+
+do_add_copy(Cp, Tab, Node) when Node /= node()->
+ case lists:member(Tab, Cp#checkpoint_args.max) of
+ false ->
+ {ok, Cp};
+ true ->
+ Name = Cp#checkpoint_args.name,
+ R0 = val({Tab, {retainer, Name}}),
+ W = R0#retainer.writers,
+ R = R0#retainer{writers = W ++ [Node]},
+
+ case lists:member(Node, Cp#checkpoint_args.nodes) of
+ true ->
+ send_retainer(Cp, R, Node);
+ false ->
+ case tm_remote_prepare(Node, Cp) of
+ {ok, Name, _IgnoreNew, Node} ->
+ case lists:member(schema, Cp#checkpoint_args.max) of
+ true ->
+ %% We need to send schema retainer somewhere
+ RS0 = val({schema, {retainer, Name}}),
+ WS = RS0#retainer.writers,
+ RS1 = RS0#retainer{writers = WS ++ [Node]},
+ {ok, Cp1} = send_retainer(Cp, RS1, Node),
+ send_retainer(Cp1, R, Node);
+ false ->
+ send_retainer(Cp, R, Node)
+ end;
+ {badrpc, Reason} ->
+ {{error, {badrpc, Reason}}, Cp};
+ {error, Reason} ->
+ {{error, Reason}, Cp}
+ end
+ end
+ end.
+
+tm_remote_prepare(Node, Cp) ->
+ rpc:call(Node, ?MODULE, tm_prepare, [Cp]).
+
+do_add_retainer(Cp, R0, Node) ->
+ Writers = R0#retainer.writers,
+ {R, Rest} = find_retainer(R0, Cp#checkpoint_args.retainers, []),
+ NewRet =
+ if
+ Node == node() ->
+ prepare_tab(Cp, R#retainer{writers = Writers});
+ true ->
+ R#retainer{writers = Writers}
+ end,
+ Rs = [NewRet | Rest],
+ set({NewRet#retainer.tab_name, {retainer, NewRet#retainer.cp_name}}, NewRet),
+ Cp#checkpoint_args{retainers = Rs, nodes = writers(Rs)}.
+
+find_retainer(#retainer{cp_name = CP, tab_name = Tab},
+ [Ret = #retainer{cp_name = CP, tab_name = Tab} | R], Acc) ->
+ {Ret, R ++ Acc};
+find_retainer(Ret, [H|R], Acc) ->
+ find_retainer(Ret, R, [H|Acc]).
+
+send_retainer(Cp, R, Node) ->
+ Name = Cp#checkpoint_args.name,
+ Nodes0 = Cp#checkpoint_args.nodes -- [Node],
+ Nodes = Nodes0 -- [node()],
+ Msg = {add_retainer, R, Node},
+ abcast(Nodes, Name, Msg),
+ {ok, _} = rpc:call(Node, ?MODULE, cast, [Name, Msg]),
+ Store = R#retainer.store,
+ send_retainer2(Node, Name, Store, retainer_first(Store)),
+ Cp2 = do_add_retainer(Cp, R, Node),
+ {ok, Cp2}.
+
+send_retainer2(_, _, _, '$end_of_table') ->
+ ok;
+%%send_retainer2(Node, Name, Store, {Slot, Records}) ->
+send_retainer2(Node, Name, Store, Key) ->
+ [{Tab, _, Records}] = retainer_get(Store, Key),
+ abcast([Node], Name, {retain, {dirty, send_retainer}, Tab, Key, Records}),
+ send_retainer2(Node, Name, Store, retainer_next(Store, Key)).
+
+do_change_copy(Cp, Tab, FromType, ToType) ->
+ Name = Cp#checkpoint_args.name,
+ R = val({Tab, {retainer, Name}}),
+ R2 = prepare_tab(Cp, R, ToType),
+ {_, Old} = R#retainer.store,
+ {_, New} = R2#retainer.store,
+
+ Fname = tab2retainer({Tab, Name}),
+ if
+ FromType == disc_only_copies ->
+ mnesia_lib:dets_sync_close(Old),
+ loaded = mnesia_lib:dets_to_ets(Old, New, Fname, set, no, yes),
+ ok = file:delete(Fname);
+ ToType == disc_only_copies ->
+ TabSize = ?ets_info(Old, size),
+ Props = [{file, Fname},
+ {type, set},
+ {keypos, 2},
+%% {ram_file, true},
+ {estimated_no_objects, TabSize + 256},
+ {repair, false}],
+ {ok, _} = mnesia_lib:dets_sync_open(New, Props),
+ ok = mnesia_dumper:raw_dump_table(New, Old),
+ ?ets_delete_table(Old);
+ true ->
+ ignore
+ end,
+ Pos = #retainer.tab_name,
+ Rs = lists:keyreplace(Tab, Pos, Cp#checkpoint_args.retainers, R2),
+ Cp#checkpoint_args{retainers = Rs, nodes = writers(Rs)}.
+
+check_iter(From, Iter) when Iter#iter.pid == From ->
+ retainer_fixtable(Iter#iter.oid_tab, false),
+ false;
+check_iter(_From, _Iter) ->
+ true.
+
+init_tabs(R, Iter) ->
+ {Kind, _} = Store = R#retainer.store,
+ Main = {Kind, Iter#iter.tab_name},
+ Ret = Store,
+ Iter2 = Iter#iter{main_tab = Main, retainer_tab = Ret},
+ case Iter#iter.source of
+ table -> Iter2#iter{oid_tab = Main};
+ retainer -> Iter2#iter{oid_tab = Ret}
+ end.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Iteration
+%%
+%% Iterates over a table and applies Fun(ListOfRecords)
+%% with a suitable amount of records, e.g. 1000 or so.
+%% ListOfRecords is [] when the iteration is over.
+%%
+%% OidKind affects which internal table to be iterated over and
+%% ValKind affects which table to pick the actual records from. Legal
+%% values for OidKind and ValKind is the atom table or the atom
+%% retainer.
+%%
+%% The iteration may either be performed over the main table (which
+%% contains the latest values of the records, i.e. the values that
+%% are visible to the applications) or over the checkpoint retainer
+%% (which contains the values as the looked like the timepoint when
+%% the checkpoint was activated).
+%%
+%% It is possible to iterate over the main table and pick values
+%% from the retainer and vice versa.
+
+iterate(Name, Tab, Fun, Acc, Source, Val) ->
+ Iter0 = #iter{tab_name = Tab, source = Source, val = Val},
+ case call(Name, {iter_begin, Iter0}) of
+ {error, Reason} ->
+ {error, Reason};
+ {ok, Iter, Pid} ->
+ link(Pid), % We don't want any pending fixtable's
+ Res = (catch iter(Fun, Acc, Iter)),
+ unlink(Pid),
+ call(Name, {iter_end, Iter}),
+ case Res of
+ {'EXIT', Reason} -> {error, Reason};
+ {error, Reason} -> {error, Reason};
+ Acc2 -> {ok, Acc2}
+ end
+ end.
+
+iter(Fun, Acc, Iter)->
+ iter(Fun, Acc, Iter, retainer_first(Iter#iter.oid_tab)).
+
+iter(Fun, Acc, Iter, Key) ->
+ case get_records(Iter, Key) of
+ {'$end_of_table', []} ->
+ Fun([], Acc);
+ {'$end_of_table', Records} ->
+ Acc2 = Fun(Records, Acc),
+ Fun([], Acc2);
+ {Next, Records} ->
+ Acc2 = Fun(Records, Acc),
+ iter(Fun, Acc2, Iter, Next)
+ end.
+
+stop_iteration(Reason) ->
+ throw({error, {stopped, Reason}}).
+
+get_records(Iter, Key) ->
+ get_records(Iter, Key, 500, []). % 500 keys
+
+get_records(_Iter, Key, 0, Acc) ->
+ {Key, lists:append(lists:reverse(Acc))};
+get_records(_Iter, '$end_of_table', _I, Acc) ->
+ {'$end_of_table', lists:append(lists:reverse(Acc))};
+get_records(Iter, Key, I, Acc) ->
+ Recs = get_val(Iter, Key),
+ Next = retainer_next(Iter#iter.oid_tab, Key),
+ get_records(Iter, Next, I-1, [Recs | Acc]).
+
+get_val(Iter, Key) when Iter#iter.val == latest ->
+ get_latest_val(Iter, Key);
+get_val(Iter, Key) when Iter#iter.val == checkpoint ->
+ get_checkpoint_val(Iter, Key).
+
+get_latest_val(Iter, Key) when Iter#iter.source == table ->
+ retainer_get(Iter#iter.main_tab, Key);
+get_latest_val(Iter, Key) when Iter#iter.source == retainer ->
+ DeleteOid = {Iter#iter.tab_name, Key},
+ [DeleteOid | retainer_get(Iter#iter.main_tab, Key)].
+
+get_checkpoint_val(Iter, Key) when Iter#iter.source == table ->
+ retainer_get(Iter#iter.main_tab, Key);
+get_checkpoint_val(Iter, Key) when Iter#iter.source == retainer ->
+ DeleteOid = {Iter#iter.tab_name, Key},
+ case retainer_get(Iter#iter.retainer_tab, Key) of
+ [{_, _, []}] -> [DeleteOid];
+ [{_, _, Records}] -> [DeleteOid | Records]
+ end.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% System upgrade
+
+system_continue(_Parent, _Debug, Cp) ->
+ retainer_loop(Cp).
+
+system_terminate(_Reason, _Parent,_Debug, Cp) ->
+ do_stop(Cp).
+
+system_code_change(Cp, _Module, _OldVsn, _Extra) ->
+ {ok, Cp}.
+
+convert_cp_record(Cp) when is_record(Cp, checkpoint) ->
+ ROD =
+ case Cp#checkpoint.ram_overrides_dump of
+ true -> Cp#checkpoint.min ++ Cp#checkpoint.max;
+ false -> []
+ end,
+
+ {ok, #checkpoint_args{name = Cp#checkpoint.name,
+ allow_remote = Cp#checkpoint.name,
+ ram_overrides_dump = ROD,
+ nodes = Cp#checkpoint.nodes,
+ node = Cp#checkpoint.node,
+ now = Cp#checkpoint.now,
+ cookie = ?unique_cookie,
+ min = Cp#checkpoint.min,
+ max = Cp#checkpoint.max,
+ pending_tab = Cp#checkpoint.pending_tab,
+ wait_for_old = Cp#checkpoint.wait_for_old,
+ is_activated = Cp#checkpoint.is_activated,
+ ignore_new = Cp#checkpoint.ignore_new,
+ retainers = Cp#checkpoint.retainers,
+ iterators = Cp#checkpoint.iterators,
+ supervisor = Cp#checkpoint.supervisor,
+ pid = Cp#checkpoint.pid
+ }};
+convert_cp_record(Cp) when is_record(Cp, checkpoint_args) ->
+ AllTabs = Cp#checkpoint_args.min ++ Cp#checkpoint_args.max,
+ ROD = case Cp#checkpoint_args.ram_overrides_dump of
+ [] ->
+ false;
+ AllTabs ->
+ true;
+ _ ->
+ error
+ end,
+ if
+ ROD == error ->
+ {error, {"Old node cannot handle new checkpoint protocol",
+ ram_overrides_dump}};
+ true ->
+ {ok, #checkpoint{name = Cp#checkpoint_args.name,
+ allow_remote = Cp#checkpoint_args.name,
+ ram_overrides_dump = ROD,
+ nodes = Cp#checkpoint_args.nodes,
+ node = Cp#checkpoint_args.node,
+ now = Cp#checkpoint_args.now,
+ min = Cp#checkpoint_args.min,
+ max = Cp#checkpoint_args.max,
+ pending_tab = Cp#checkpoint_args.pending_tab,
+ wait_for_old = Cp#checkpoint_args.wait_for_old,
+ is_activated = Cp#checkpoint_args.is_activated,
+ ignore_new = Cp#checkpoint_args.ignore_new,
+ retainers = Cp#checkpoint_args.retainers,
+ iterators = Cp#checkpoint_args.iterators,
+ supervisor = Cp#checkpoint_args.supervisor,
+ pid = Cp#checkpoint_args.pid
+ }}
+ end.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+val(Var) ->
+ case ?catch_val(Var) of
+ {'EXIT', _ReASoN_} -> mnesia_lib:other_val(Var, _ReASoN_);
+ _VaLuE_ -> _VaLuE_
+ end.
+
diff --git a/lib/mnesia/src/mnesia_checkpoint_sup.erl b/lib/mnesia/src/mnesia_checkpoint_sup.erl
new file mode 100644
index 0000000000..2fe8df52f7
--- /dev/null
+++ b/lib/mnesia/src/mnesia_checkpoint_sup.erl
@@ -0,0 +1,42 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 1997-2009. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%
+
+%%
+-module(mnesia_checkpoint_sup).
+
+-behaviour(supervisor).
+
+-export([start/0, init/1]).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% top supervisor callback functions
+
+start() ->
+ supervisor:start_link({local, ?MODULE}, ?MODULE, []).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% sub supervisor callback functions
+
+init([]) ->
+ Flags = {simple_one_for_one, 0, timer:hours(24)}, % Trust the top supervisor
+ MFA = {mnesia_checkpoint, start, []},
+ Modules = [?MODULE, mnesia_checkpoint, supervisor],
+ KillAfter = mnesia_kernel_sup:supervisor_timeout(timer:seconds(3)),
+ Workers = [{?MODULE, MFA, transient, KillAfter, worker, Modules}],
+ {ok, {Flags, Workers}}.
diff --git a/lib/mnesia/src/mnesia_controller.erl b/lib/mnesia/src/mnesia_controller.erl
new file mode 100644
index 0000000000..9bc480e619
--- /dev/null
+++ b/lib/mnesia/src/mnesia_controller.erl
@@ -0,0 +1,2182 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 1996-2009. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%
+
+%%
+%% The mnesia_init process loads tables from local disc or from
+%% another nodes. It also coordinates updates of the info about
+%% where we can read and write tables.
+%%
+%% Tables may need to be loaded initially at startup of the local
+%% node or when other nodes announces that they already have loaded
+%% tables that we also want.
+%%
+%% Initially we set the load request queue to those tables that we
+%% safely can load locally, i.e. tables where we have the last
+%% consistent replica and we have received mnesia_down from all
+%% other nodes holding the table. Then we let the mnesia_init
+%% process enter its normal working state.
+%%
+%% When we need to load a table we append a request to the load
+%% request queue. All other requests are regarded as high priority
+%% and are processed immediately (e.g. update table whereabouts).
+%% We processes the load request queue as a "background" job..
+
+-module(mnesia_controller).
+
+-behaviour(gen_server).
+
+%% Mnesia internal stuff
+-export([
+ start/0,
+ i_have_tab/1,
+ info/0,
+ get_info/1,
+ get_workers/1,
+ force_load_table/1,
+ async_dump_log/1,
+ sync_dump_log/1,
+ connect_nodes/1,
+ wait_for_schema_commit_lock/0,
+ release_schema_commit_lock/0,
+ create_table/1,
+ get_disc_copy/1,
+ get_cstructs/0,
+ sync_and_block_table_whereabouts/4,
+ sync_del_table_copy_whereabouts/2,
+ block_table/1,
+ unblock_table/1,
+ block_controller/0,
+ unblock_controller/0,
+ unannounce_add_table_copy/2,
+ master_nodes_updated/2,
+ mnesia_down/1,
+ add_active_replica/2,
+ add_active_replica/3,
+ add_active_replica/4,
+ update/1,
+ change_table_access_mode/1,
+ del_active_replica/2,
+ wait_for_tables/2,
+ get_network_copy/2,
+ merge_schema/0,
+ start_remote_sender/4,
+ schedule_late_disc_load/2
+ ]).
+
+%% gen_server callbacks
+-export([init/1,
+ handle_call/3,
+ handle_cast/2,
+ handle_info/2,
+ terminate/2,
+ code_change/3]).
+
+%% Module internal stuff
+-export([call/1,
+ cast/1,
+ dump_and_reply/2,
+ load_and_reply/2,
+ send_and_reply/2,
+ wait_for_tables_init/2,
+ connect_nodes2/2
+ ]).
+
+-import(mnesia_lib, [set/2, add/2]).
+-import(mnesia_lib, [fatal/2, error/2, verbose/2, dbg_out/2]).
+
+-include("mnesia.hrl").
+
+-define(SERVER_NAME, ?MODULE).
+
+-record(state, {supervisor,
+ schema_is_merged = false,
+ early_msgs = [],
+ loader_pid = [], %% Was Pid is now [{Pid,Work}|..]
+ loader_queue, %% Was list is now gb_tree
+ sender_pid = [], %% Was a pid or undef is now [{Pid,Work}|..]
+ sender_queue = [],
+ late_loader_queue, %% Was list is now gb_tree
+ dumper_pid, %% Dumper or schema commit pid
+ dumper_queue = [], %% Dumper or schema commit queue
+ others = [], %% Processes that needs the copier_done msg
+ dump_log_timer_ref,
+ is_stopping = false
+ }).
+%% Backwards Comp. Sender_pid is now a list of senders..
+get_senders(#state{sender_pid = Pids}) when is_list(Pids) -> Pids.
+%% Backwards Comp. loader_pid is now a list of loaders..
+get_loaders(#state{loader_pid = Pids}) when is_list(Pids) -> Pids.
+max_loaders() ->
+ case ?catch_val(no_table_loaders) of
+ {'EXIT', _} ->
+ mnesia_lib:set(no_table_loaders,1),
+ 1;
+ Val -> Val
+ end.
+
+-record(schema_commit_lock, {owner}).
+-record(block_controller, {owner}).
+
+-record(dump_log, {initiated_by,
+ opt_reply_to
+ }).
+
+-record(net_load, {table,
+ reason,
+ opt_reply_to,
+ cstruct = unknown
+ }).
+
+-record(send_table, {table,
+ receiver_pid,
+ remote_storage
+ }).
+
+-record(disc_load, {table,
+ reason,
+ opt_reply_to
+ }).
+
+-record(late_load, {table,
+ reason,
+ opt_reply_to,
+ loaders
+ }).
+
+-record(loader_done, {worker_pid,
+ is_loaded,
+ table_name,
+ needs_announce,
+ needs_sync,
+ needs_reply,
+ reply_to,
+ reply}).
+
+-record(sender_done, {worker_pid,
+ worker_res,
+ table_name
+ }).
+
+-record(dumper_done, {worker_pid,
+ worker_res
+ }).
+
+val(Var) ->
+ case ?catch_val(Var) of
+ {'EXIT', Reason} -> mnesia_lib:other_val(Var, Reason);
+ Value -> Value
+ end.
+
+start() ->
+ gen_server:start_link({local, ?SERVER_NAME}, ?MODULE, [self()],
+ [{timeout, infinity}
+ %% ,{debug, [trace]}
+ ]).
+
+sync_dump_log(InitBy) ->
+ call({sync_dump_log, InitBy}).
+
+async_dump_log(InitBy) ->
+ ?SERVER_NAME ! {async_dump_log, InitBy}.
+
+%% Wait for tables to be active
+%% If needed, we will wait for Mnesia to start
+%% If Mnesia stops, we will wait for Mnesia to restart
+%% We will wait even if the list of tables is empty
+%%
+wait_for_tables(Tabs, Timeout) when is_list(Tabs), Timeout == infinity ->
+ do_wait_for_tables(Tabs, Timeout);
+wait_for_tables(Tabs, Timeout) when is_list(Tabs),
+ is_integer(Timeout), Timeout >= 0 ->
+ do_wait_for_tables(Tabs, Timeout);
+wait_for_tables(Tabs, Timeout) ->
+ {error, {badarg, Tabs, Timeout}}.
+
+do_wait_for_tables(Tabs, 0) ->
+ reply_wait(Tabs);
+do_wait_for_tables(Tabs, Timeout) ->
+ Pid = spawn_link(?MODULE, wait_for_tables_init, [self(), Tabs]),
+ receive
+ {?SERVER_NAME, Pid, Res} ->
+ Res;
+ {'EXIT', Pid, _} ->
+ reply_wait(Tabs)
+ after Timeout ->
+ unlink(Pid),
+ exit(Pid, timeout),
+ reply_wait(Tabs)
+ end.
+
+reply_wait(Tabs) ->
+ case catch mnesia_lib:active_tables() of
+ {'EXIT', _} ->
+ {error, {node_not_running, node()}};
+ Active when is_list(Active) ->
+ case Tabs -- Active of
+ [] ->
+ ok;
+ BadTabs ->
+ {timeout, BadTabs}
+ end
+ end.
+
+wait_for_tables_init(From, Tabs) ->
+ process_flag(trap_exit, true),
+ Res = wait_for_init(From, Tabs, whereis(?SERVER_NAME)),
+ From ! {?SERVER_NAME, self(), Res},
+ unlink(From),
+ exit(normal).
+
+wait_for_init(From, Tabs, Init) ->
+ case catch link(Init) of
+ {'EXIT', _} ->
+ %% Mnesia is not started
+ {error, {node_not_running, node()}};
+ true when is_pid(Init) ->
+ cast({sync_tabs, Tabs, self()}),
+ rec_tabs(Tabs, Tabs, From, Init)
+ end.
+
+sync_reply(Waiter, Tab) ->
+ Waiter ! {?SERVER_NAME, {tab_synced, Tab}}.
+
+rec_tabs([Tab | Tabs], AllTabs, From, Init) ->
+ receive
+ {?SERVER_NAME, {tab_synced, Tab}} ->
+ rec_tabs(Tabs, AllTabs, From, Init);
+
+ {'EXIT', From, _} ->
+ %% This will trigger an exit signal
+ %% to mnesia_init
+ exit(wait_for_tables_timeout);
+
+ {'EXIT', Init, _} ->
+ %% Oops, mnesia_init stopped,
+ exit(mnesia_stopped)
+ end;
+rec_tabs([], _, _, Init) ->
+ unlink(Init),
+ ok.
+
+get_cstructs() ->
+ call(get_cstructs).
+
+update(Fun) ->
+ call({update,Fun}).
+
+
+mnesia_down(Node) ->
+ case cast({mnesia_down, Node}) of
+ {error, _} -> mnesia_monitor:mnesia_down(?SERVER_NAME, Node);
+ _Pid -> ok
+ end.
+wait_for_schema_commit_lock() ->
+ link(whereis(?SERVER_NAME)),
+ unsafe_call(wait_for_schema_commit_lock).
+
+block_controller() ->
+ call(block_controller).
+
+unblock_controller() ->
+ cast(unblock_controller).
+
+release_schema_commit_lock() ->
+ cast({release_schema_commit_lock, self()}),
+ unlink(whereis(?SERVER_NAME)).
+
+%% Special for preparation of add table copy
+get_network_copy(Tab, Cs) ->
+% We can't let the controller queue this one
+% because that may cause a deadlock between schema_operations
+% and initial tableloadings which both takes schema locks.
+% But we have to get copier_done msgs when the other side
+% goes down.
+ call({add_other, self()}),
+ Reason = {dumper,add_table_copy},
+ Work = #net_load{table = Tab,reason = Reason,cstruct = Cs},
+ %% I'll need this cause it's linked trough the subscriber
+ %% might be solved by using monitor in subscr instead.
+ process_flag(trap_exit, true),
+ Load = load_table_fun(Work),
+ Res = (catch Load()),
+ process_flag(trap_exit, false),
+ call({del_other, self()}),
+ case Res of
+ #loader_done{is_loaded = true} ->
+ Tab = Res#loader_done.table_name,
+ case Res#loader_done.needs_announce of
+ true ->
+ i_have_tab(Tab);
+ false ->
+ ignore
+ end,
+ Res#loader_done.reply;
+ #loader_done{} ->
+ Res#loader_done.reply;
+ Else ->
+ {not_loaded, Else}
+ end.
+
+%% This functions is invoked from the dumper
+%%
+%% There are two cases here:
+%% startup ->
+%% no need for sync, since mnesia_controller not started yet
+%% schema_trans ->
+%% already synced with mnesia_controller since the dumper
+%% is syncronously started from mnesia_controller
+
+create_table(Tab) ->
+ {loaded, ok} = mnesia_loader:disc_load_table(Tab, {dumper,create_table}).
+
+get_disc_copy(Tab) ->
+ disc_load_table(Tab, {dumper,change_table_copy_type}, undefined).
+
+%% Returns ok instead of yes
+force_load_table(Tab) when is_atom(Tab), Tab /= schema ->
+ case ?catch_val({Tab, storage_type}) of
+ ram_copies ->
+ do_force_load_table(Tab);
+ disc_copies ->
+ do_force_load_table(Tab);
+ disc_only_copies ->
+ do_force_load_table(Tab);
+ unknown ->
+ set({Tab, load_by_force}, true),
+ cast({force_load_updated, Tab}),
+ wait_for_tables([Tab], infinity);
+ {'EXIT', _} ->
+ {error, {no_exists, Tab}}
+ end;
+force_load_table(Tab) ->
+ {error, {bad_type, Tab}}.
+
+do_force_load_table(Tab) ->
+ Loaded = ?catch_val({Tab, load_reason}),
+ case Loaded of
+ unknown ->
+ set({Tab, load_by_force}, true),
+ mnesia_late_loader:async_late_disc_load(node(), [Tab], forced_by_user),
+ wait_for_tables([Tab], infinity);
+ {'EXIT', _} ->
+ set({Tab, load_by_force}, true),
+ mnesia_late_loader:async_late_disc_load(node(), [Tab], forced_by_user),
+ wait_for_tables([Tab], infinity);
+ _ ->
+ ok
+ end.
+master_nodes_updated(schema, _Masters) ->
+ ignore;
+master_nodes_updated(Tab, Masters) ->
+ cast({master_nodes_updated, Tab, Masters}).
+
+schedule_late_disc_load(Tabs, Reason) ->
+ MsgTag = late_disc_load,
+ try_schedule_late_disc_load(Tabs, Reason, MsgTag).
+
+try_schedule_late_disc_load(Tabs, _Reason, MsgTag)
+ when Tabs == [], MsgTag /= schema_is_merged ->
+ ignore;
+try_schedule_late_disc_load(Tabs, Reason, MsgTag) ->
+ GetIntents =
+ fun() ->
+ Item = mnesia_late_disc_load,
+ Nodes = val({current, db_nodes}),
+ mnesia:lock({global, Item, Nodes}, write),
+ case multicall(Nodes -- [node()], disc_load_intents) of
+ {Replies, []} ->
+ call({MsgTag, Tabs, Reason, Replies}),
+ done;
+ {_, BadNodes} ->
+ %% Some nodes did not respond, lets try again
+ {retry, BadNodes}
+ end
+ end,
+ case mnesia:transaction(GetIntents) of
+ {atomic, done} ->
+ done;
+ {atomic, {retry, BadNodes}} ->
+ verbose("Retry late_load_tables because bad nodes: ~p~n",
+ [BadNodes]),
+ try_schedule_late_disc_load(Tabs, Reason, MsgTag);
+ {aborted, AbortReason} ->
+ fatal("Cannot late_load_tables~p: ~p~n",
+ [[Tabs, Reason, MsgTag], AbortReason])
+ end.
+
+connect_nodes(Ns) ->
+ case mnesia:system_info(is_running) of
+ no ->
+ {error, {node_not_running, node()}};
+ yes ->
+ Pid = spawn_link(?MODULE,connect_nodes2,[self(),Ns]),
+ receive
+ {?MODULE, Pid, Res, New} ->
+ case Res of
+ ok ->
+ mnesia_lib:add_list(extra_db_nodes, New),
+ {ok, New};
+ {aborted, {throw, Str}} when is_list(Str) ->
+ %%mnesia_recover:disconnect_nodes(New),
+ {error, {merge_schema_failed, lists:flatten(Str)}};
+ Else ->
+ {error, Else}
+ end;
+ {'EXIT', Pid, Reason} ->
+ {error, Reason}
+ end
+ end.
+
+connect_nodes2(Father, Ns) ->
+ Current = val({current, db_nodes}),
+ abcast([node()|Ns], {merging_schema, node()}),
+ {NewC, OldC} = mnesia_recover:connect_nodes(Ns),
+ Connected = NewC ++OldC,
+ New1 = mnesia_lib:intersect(Ns, Connected),
+ New = New1 -- Current,
+ process_flag(trap_exit, true),
+ Res = try_merge_schema(New),
+ Msg = {schema_is_merged, [], late_merge, []},
+ multicall([node()|Ns], Msg),
+ After = val({current, db_nodes}),
+ Father ! {?MODULE, self(), Res, mnesia_lib:intersect(Ns,After)},
+ unlink(Father),
+ ok.
+
+%% Merge the local schema with the schema on other nodes.
+%% But first we must let all processes that want to force
+%% load tables wait until the schema merge is done.
+
+merge_schema() ->
+ AllNodes = mnesia_lib:all_nodes(),
+ case try_merge_schema(AllNodes) of
+ ok ->
+ schema_is_merged();
+ {aborted, {throw, Str}} when is_list(Str) ->
+ fatal("Failed to merge schema: ~s~n", [Str]);
+ Else ->
+ fatal("Failed to merge schema: ~p~n", [Else])
+ end.
+
+try_merge_schema(Nodes) ->
+ case mnesia_schema:merge_schema() of
+ {atomic, not_merged} ->
+ %% No more nodes that we need to merge the schema with
+ ok;
+ {atomic, {merged, OldFriends, NewFriends}} ->
+ %% Check if new nodes has been added to the schema
+ Diff = mnesia_lib:all_nodes() -- [node() | Nodes],
+ mnesia_recover:connect_nodes(Diff),
+
+ %% Tell everybody to adopt orphan tables
+ im_running(OldFriends, NewFriends),
+ im_running(NewFriends, OldFriends),
+
+ try_merge_schema(Nodes);
+ {atomic, {"Cannot get cstructs", Node, Reason}} ->
+ dbg_out("Cannot get cstructs, Node ~p ~p~n", [Node, Reason]),
+ timer:sleep(1000), % Avoid a endless loop look alike
+ try_merge_schema(Nodes);
+ Other ->
+ Other
+ end.
+
+im_running(OldFriends, NewFriends) ->
+ abcast(OldFriends, {im_running, node(), NewFriends}).
+
+schema_is_merged() ->
+ MsgTag = schema_is_merged,
+ SafeLoads = initial_safe_loads(),
+
+ %% At this point we do not know anything about
+ %% which tables that the other nodes already
+ %% has loaded and therefore we let the normal
+ %% processing of the loader_queue take care
+ %% of it, since we at that time point will
+ %% know the whereabouts. We rely on the fact
+ %% that all nodes tells each other directly
+ %% when they have loaded a table and are
+ %% willing to share it.
+
+ try_schedule_late_disc_load(SafeLoads, initial, MsgTag).
+
+
+cast(Msg) ->
+ case whereis(?SERVER_NAME) of
+ undefined ->{error, {node_not_running, node()}};
+ Pid -> gen_server:cast(Pid, Msg)
+ end.
+
+abcast(Nodes, Msg) ->
+ gen_server:abcast(Nodes, ?SERVER_NAME, Msg).
+
+unsafe_call(Msg) ->
+ case whereis(?SERVER_NAME) of
+ undefined -> {error, {node_not_running, node()}};
+ Pid -> gen_server:call(Pid, Msg, infinity)
+ end.
+
+call(Msg) ->
+ case whereis(?SERVER_NAME) of
+ undefined ->
+ {error, {node_not_running, node()}};
+ Pid ->
+ link(Pid),
+ Res = gen_server:call(Pid, Msg, infinity),
+ unlink(Pid),
+
+ %% We get an exit signal if server dies
+ receive
+ {'EXIT', Pid, _Reason} ->
+ {error, {node_not_running, node()}}
+ after 0 ->
+ Res
+ end
+ end.
+
+remote_call(Node, Func, Args) ->
+ case catch gen_server:call({?MODULE, Node}, {Func, Args, self()}, infinity) of
+ {'EXIT', Error} ->
+ {error, Error};
+ Else ->
+ Else
+ end.
+
+multicall(Nodes, Msg) ->
+ {Good, Bad} = gen_server:multi_call(Nodes, ?MODULE, Msg, infinity),
+ PatchedGood = [Reply || {_Node, Reply} <- Good],
+ {PatchedGood, Bad}. %% Make the replies look like rpc:multicalls..
+%% rpc:multicall(Nodes, ?MODULE, call, [Msg]).
+
+%%%----------------------------------------------------------------------
+%%% Callback functions from gen_server
+%%%----------------------------------------------------------------------
+
+%%----------------------------------------------------------------------
+%% Func: init/1
+%% Returns: {ok, State} |
+%% {ok, State, Timeout} |
+%% {stop, Reason}
+%%----------------------------------------------------------------------
+init([Parent]) ->
+ process_flag(trap_exit, true),
+ mnesia_lib:verbose("~p starting: ~p~n", [?SERVER_NAME, self()]),
+
+ %% Handshake and initialize transaction recovery
+ %% for new nodes detected in the schema
+ All = mnesia_lib:all_nodes(),
+ Diff = All -- [node() | val(original_nodes)],
+ mnesia_lib:unset(original_nodes),
+ mnesia_recover:connect_nodes(Diff),
+
+ Interval = mnesia_monitor:get_env(dump_log_time_threshold),
+ Msg = {async_dump_log, time_threshold},
+ {ok, Ref} = timer:send_interval(Interval, Msg),
+ mnesia_dumper:start_regulator(),
+
+ Empty = gb_trees:empty(),
+ {ok, #state{supervisor = Parent, dump_log_timer_ref = Ref,
+ loader_queue = Empty,
+ late_loader_queue = Empty}}.
+
+%%----------------------------------------------------------------------
+%% Func: handle_call/3
+%% Returns: {reply, Reply, State} |
+%% {reply, Reply, State, Timeout} |
+%% {noreply, State} |
+%% {noreply, State, Timeout} |
+%% {stop, Reason, Reply, State} | (terminate/2 is called)
+%% {stop, Reason, Reply, State} (terminate/2 is called)
+%%----------------------------------------------------------------------
+
+handle_call({sync_dump_log, InitBy}, From, State) ->
+ Worker = #dump_log{initiated_by = InitBy,
+ opt_reply_to = From
+ },
+ State2 = add_worker(Worker, State),
+ noreply(State2);
+
+handle_call(wait_for_schema_commit_lock, From, State) ->
+ Worker = #schema_commit_lock{owner = From},
+ State2 = add_worker(Worker, State),
+ noreply(State2);
+
+handle_call(block_controller, From, State) ->
+ Worker = #block_controller{owner = From},
+ State2 = add_worker(Worker, State),
+ noreply(State2);
+
+handle_call({update,Fun}, From, State) ->
+ Res = (catch Fun()),
+ reply(From, Res),
+ noreply(State);
+
+handle_call(get_cstructs, From, State) ->
+ Tabs = val({schema, tables}),
+ Cstructs = [val({T, cstruct}) || T <- Tabs],
+ Running = val({current, db_nodes}),
+ reply(From, {cstructs, Cstructs, Running}),
+ noreply(State);
+
+handle_call({schema_is_merged, [], late_merge, []}, From,
+ State = #state{schema_is_merged = Merged}) ->
+ case Merged of
+ {false, Node} when Node == node(From) ->
+ Msgs = State#state.early_msgs,
+ State1 = State#state{early_msgs = [], schema_is_merged = true},
+ handle_early_msgs(lists:reverse(Msgs), State1);
+ _ ->
+ %% Ooops this came to early, before we have merged :-)
+ %% or it came to late or from a node we don't care about
+ reply(From, ignore),
+ noreply(State)
+ end;
+
+handle_call({schema_is_merged, TabsR, Reason, RemoteLoaders}, From, State) ->
+ State2 = late_disc_load(TabsR, Reason, RemoteLoaders, From, State),
+
+ %% Handle early messages
+ Msgs = State2#state.early_msgs,
+ State3 = State2#state{early_msgs = [], schema_is_merged = true},
+ handle_early_msgs(lists:reverse(Msgs), State3);
+
+handle_call(disc_load_intents,From,State = #state{loader_queue=LQ,late_loader_queue=LLQ}) ->
+ LQTabs = gb_trees:keys(LQ),
+ LLQTabs = gb_trees:keys(LLQ),
+ ActiveTabs = lists:sort(mnesia_lib:local_active_tables()),
+ reply(From, {ok, node(), ordsets:union([LQTabs,LLQTabs,ActiveTabs])}),
+ noreply(State);
+
+handle_call({update_where_to_write, [add, Tab, AddNode], _From}, _Dummy, State) ->
+ Current = val({current, db_nodes}),
+ Res =
+ case lists:member(AddNode, Current) and
+ (State#state.schema_is_merged == true) of
+ true ->
+ mnesia_lib:add_lsort({Tab, where_to_write}, AddNode);
+ false ->
+ ignore
+ end,
+ {reply, Res, State};
+
+handle_call({add_active_replica, [Tab, ToNode, RemoteS, AccessMode], From},
+ ReplyTo, State) ->
+ KnownNode = lists:member(ToNode, val({current, db_nodes})),
+ Merged = State#state.schema_is_merged,
+ if
+ KnownNode == false ->
+ reply(ReplyTo, ignore),
+ noreply(State);
+ Merged == true ->
+ Res = case ?catch_val({Tab, cstruct}) of
+ {'EXIT', _} -> %% Tab deleted
+ deleted;
+ _ ->
+ add_active_replica(Tab, ToNode, RemoteS, AccessMode)
+ end,
+ reply(ReplyTo, Res),
+ noreply(State);
+ true -> %% Schema is not merged
+ Msg = {add_active_replica, [Tab, ToNode, RemoteS, AccessMode], From},
+ Msgs = State#state.early_msgs,
+ reply(ReplyTo, ignore), %% Reply ignore and add data after schema merge
+ noreply(State#state{early_msgs = [{call, Msg, undefined} | Msgs]})
+ end;
+
+handle_call({unannounce_add_table_copy, [Tab, Node], From}, ReplyTo, State) ->
+ KnownNode = lists:member(node(From), val({current, db_nodes})),
+ Merged = State#state.schema_is_merged,
+ if
+ KnownNode == false ->
+ reply(ReplyTo, ignore),
+ noreply(State);
+ Merged == true ->
+ Res = unannounce_add_table_copy(Tab, Node),
+ reply(ReplyTo, Res),
+ noreply(State);
+ true -> %% Schema is not merged
+ Msg = {unannounce_add_table_copy, [Tab, Node], From},
+ Msgs = State#state.early_msgs,
+ reply(ReplyTo, ignore), %% Reply ignore and add data after schema merge
+ %% Set ReplyTO to undefined so we don't reply twice
+ noreply(State#state{early_msgs = [{call, Msg, undefined} | Msgs]})
+ end;
+
+handle_call({net_load, Tab, Cs}, From, State) ->
+ State2 =
+ case State#state.schema_is_merged of
+ true ->
+ Worker = #net_load{table = Tab,
+ opt_reply_to = From,
+ reason = {dumper,add_table_copy},
+ cstruct = Cs
+ },
+ add_worker(Worker, State);
+ false ->
+ reply(From, {not_loaded, schema_not_merged}),
+ State
+ end,
+ noreply(State2);
+
+handle_call(Msg, From, State) when State#state.schema_is_merged /= true ->
+ %% Buffer early messages
+ Msgs = State#state.early_msgs,
+ noreply(State#state{early_msgs = [{call, Msg, From} | Msgs]});
+
+handle_call({late_disc_load, Tabs, Reason, RemoteLoaders}, From, State) ->
+ State2 = late_disc_load(Tabs, Reason, RemoteLoaders, From, State),
+ noreply(State2);
+
+handle_call({unblock_table, Tab}, _Dummy, State) ->
+ Var = {Tab, where_to_commit},
+ case val(Var) of
+ {blocked, List} ->
+ set(Var, List); % where_to_commit
+ _ ->
+ ignore
+ end,
+ {reply, ok, State};
+
+handle_call({block_table, [Tab], From}, _Dummy, State) ->
+ case lists:member(node(From), val({current, db_nodes})) of
+ true ->
+ block_table(Tab);
+ false ->
+ ignore
+ end,
+ {reply, ok, State};
+
+handle_call({check_w2r, _Node, Tab}, _From, State) ->
+ {reply, val({Tab, where_to_read}), State};
+
+handle_call({add_other, Who}, _From, State = #state{others=Others0}) ->
+ Others = [Who|Others0],
+ {reply, ok, State#state{others=Others}};
+handle_call({del_other, Who}, _From, State = #state{others=Others0}) ->
+ Others = lists:delete(Who, Others0),
+ {reply, ok, State#state{others=Others}};
+
+handle_call(Msg, _From, State) ->
+ error("~p got unexpected call: ~p~n", [?SERVER_NAME, Msg]),
+ noreply(State).
+
+late_disc_load(TabsR, Reason, RemoteLoaders, From,
+ State = #state{loader_queue = LQ, late_loader_queue = LLQ}) ->
+ verbose("Intend to load tables: ~p~n", [TabsR]),
+ ?eval_debug_fun({?MODULE, late_disc_load},
+ [{tabs, TabsR},
+ {reason, Reason},
+ {loaders, RemoteLoaders}]),
+
+ reply(From, queued),
+ %% RemoteLoaders is a list of {ok, Node, Tabs} tuples
+
+ %% Remove deleted tabs and queued/loaded
+ LocalTabs = gb_sets:from_ordset(lists:sort(mnesia_lib:val({schema,local_tables}))),
+ Filter = fun(TabInfo0, Acc) ->
+ TabInfo = {Tab,_} =
+ case TabInfo0 of
+ {_,_} -> TabInfo0;
+ TabN -> {TabN,Reason}
+ end,
+ case gb_sets:is_member(Tab, LocalTabs) of
+ true ->
+ case ?catch_val({Tab, where_to_read}) == node() of
+ true -> Acc;
+ false ->
+ case gb_trees:is_defined(Tab,LQ) of
+ true -> Acc;
+ false -> [TabInfo | Acc]
+ end
+ end;
+ false -> Acc
+ end
+ end,
+
+ Tabs = lists:foldl(Filter, [], TabsR),
+
+ Nodes = val({current, db_nodes}),
+ LateQueue = late_loaders(Tabs, RemoteLoaders, Nodes, LLQ),
+ State#state{late_loader_queue = LateQueue}.
+
+late_loaders([{Tab, Reason} | Tabs], RemoteLoaders, Nodes, LLQ) ->
+ case gb_trees:is_defined(Tab, LLQ) of
+ false ->
+ LoadNodes = late_load_filter(RemoteLoaders, Tab, Nodes, []),
+ case LoadNodes of
+ [] -> cast({disc_load, Tab, Reason}); % Ugly cast
+ _ -> ignore
+ end,
+ LateLoad = #late_load{table=Tab,loaders=LoadNodes,reason=Reason},
+ late_loaders(Tabs, RemoteLoaders, Nodes, gb_trees:insert(Tab,LateLoad,LLQ));
+ true ->
+ late_loaders(Tabs, RemoteLoaders, Nodes, LLQ)
+ end;
+late_loaders([], _RemoteLoaders, _Nodes, LLQ) ->
+ LLQ.
+
+late_load_filter([{error, _} | RemoteLoaders], Tab, Nodes, Acc) ->
+ late_load_filter(RemoteLoaders, Tab, Nodes, Acc);
+late_load_filter([{badrpc, _} | RemoteLoaders], Tab, Nodes, Acc) ->
+ late_load_filter(RemoteLoaders, Tab, Nodes, Acc);
+late_load_filter([RL | RemoteLoaders], Tab, Nodes, Acc) ->
+ {ok, Node, Intents} = RL,
+ Access = val({Tab, access_mode}),
+ LocalC = val({Tab, local_content}),
+ StillActive = lists:member(Node, Nodes),
+ RemoteIntent = lists:member(Tab, Intents),
+ if
+ Access == read_write,
+ LocalC == false,
+ StillActive == true,
+ RemoteIntent == true ->
+ Masters = mnesia_recover:get_master_nodes(Tab),
+ case lists:member(Node, Masters) of
+ true ->
+ %% The other node is master node for
+ %% the table, accept his load intent
+ late_load_filter(RemoteLoaders, Tab, Nodes, [Node | Acc]);
+ false when Masters == [] ->
+ %% The table has no master nodes
+ %% accept his load intent
+ late_load_filter(RemoteLoaders, Tab, Nodes, [Node | Acc]);
+ false ->
+ %% Some one else is master node for
+ %% the table, ignore his load intent
+ late_load_filter(RemoteLoaders, Tab, Nodes, Acc)
+ end;
+ true ->
+ late_load_filter(RemoteLoaders, Tab, Nodes, Acc)
+ end;
+late_load_filter([], _Tab, _Nodes, Acc) ->
+ Acc.
+
+%%----------------------------------------------------------------------
+%% Func: handle_cast/2
+%% Returns: {noreply, State} |
+%% {noreply, State, Timeout} |
+%% {stop, Reason, State} (terminate/2 is called)
+%%----------------------------------------------------------------------
+
+handle_cast({release_schema_commit_lock, _Owner}, State) ->
+ if
+ State#state.is_stopping == true ->
+ {stop, shutdown, State};
+ true ->
+ case State#state.dumper_queue of
+ [#schema_commit_lock{}|Rest] ->
+ [_Worker | Rest] = State#state.dumper_queue,
+ State2 = State#state{dumper_pid = undefined,
+ dumper_queue = Rest},
+ State3 = opt_start_worker(State2),
+ noreply(State3);
+ _ ->
+ noreply(State)
+ end
+ end;
+
+handle_cast(unblock_controller, State) ->
+ if
+ State#state.is_stopping == true ->
+ {stop, shutdown, State};
+ is_record(hd(State#state.dumper_queue), block_controller) ->
+ [_Worker | Rest] = State#state.dumper_queue,
+ State2 = State#state{dumper_pid = undefined,
+ dumper_queue = Rest},
+ State3 = opt_start_worker(State2),
+ noreply(State3)
+ end;
+
+handle_cast({mnesia_down, Node}, State) ->
+ maybe_log_mnesia_down(Node),
+ mnesia_lib:del({current, db_nodes}, Node),
+ mnesia_checkpoint:tm_mnesia_down(Node),
+ Alltabs = val({schema, tables}),
+ reconfigure_tables(Node, Alltabs),
+ %% Done from (external point of view)
+ mnesia_monitor:mnesia_down(?SERVER_NAME, Node),
+
+ %% Fix if we are late_merging against the node that went down
+ case State#state.schema_is_merged of
+ {false, Node} ->
+ spawn(?MODULE, call, [{schema_is_merged, [], late_merge, []}]);
+ _ ->
+ ignore
+ end,
+
+ %% Fix internal stuff
+ LateQ = remove_loaders(Alltabs, Node, State#state.late_loader_queue),
+
+ case get_senders(State) ++ get_loaders(State) of
+ [] -> ignore;
+ Senders ->
+ lists:foreach(fun({Pid,_}) -> Pid ! {copier_done, Node} end,
+ Senders)
+ end,
+ lists:foreach(fun(Pid) -> Pid ! {copier_done,Node} end,
+ State#state.others),
+
+ Remove = fun(ST) ->
+ node(ST#send_table.receiver_pid) /= Node
+ end,
+ NewSenders = lists:filter(Remove, State#state.sender_queue),
+ Early = remove_early_messages(State#state.early_msgs, Node),
+ noreply(State#state{sender_queue = NewSenders,
+ early_msgs = Early,
+ late_loader_queue = LateQ
+ });
+
+handle_cast({merging_schema, Node}, State) ->
+ case State#state.schema_is_merged of
+ false ->
+ %% This comes from dynamic connect_nodes which are made
+ %% after mnesia:start() and the schema_merge.
+ ImANewKidInTheBlock =
+ (val({schema, storage_type}) == ram_copies)
+ andalso (mnesia_lib:val({schema, local_tables}) == [schema]),
+ case ImANewKidInTheBlock of
+ true -> %% I'm newly started ram_node..
+ noreply(State#state{schema_is_merged = {false, Node}});
+ false ->
+ noreply(State)
+ end;
+ _ -> %% Already merging schema.
+ noreply(State)
+ end;
+
+handle_cast(Msg, State) when State#state.schema_is_merged /= true ->
+ %% Buffer early messages
+ Msgs = State#state.early_msgs,
+ noreply(State#state{early_msgs = [{cast, Msg} | Msgs]});
+
+%% This must be done after schema_is_merged otherwise adopt_orphan
+%% might trigger a table load from wrong nodes as a result of that we don't
+%% know which tables we can load safly first.
+handle_cast({im_running, _Node, NewFriends}, State) ->
+ LocalTabs = mnesia_lib:local_active_tables() -- [schema],
+ RemoveLocalOnly = fun(Tab) -> not val({Tab, local_content}) end,
+ Tabs = lists:filter(RemoveLocalOnly, LocalTabs),
+ Ns = mnesia_lib:intersect(NewFriends, val({current, db_nodes})),
+ abcast(Ns, {adopt_orphans, node(), Tabs}),
+ noreply(State);
+
+handle_cast({disc_load, Tab, Reason}, State) ->
+ Worker = #disc_load{table = Tab, reason = Reason},
+ State2 = add_worker(Worker, State),
+ noreply(State2);
+
+handle_cast(Worker = #send_table{}, State) ->
+ State2 = add_worker(Worker, State),
+ noreply(State2);
+
+handle_cast({sync_tabs, Tabs, From}, State) ->
+ %% user initiated wait_for_tables
+ handle_sync_tabs(Tabs, From),
+ noreply(State);
+
+handle_cast({i_have_tab, Tab, Node}, State) ->
+ case lists:member(Node, val({current, db_nodes})) of
+ true ->
+ State2 = node_has_tabs([Tab], Node, State),
+ noreply(State2);
+ false ->
+ noreply(State)
+ end;
+
+handle_cast({force_load_updated, Tab}, State) ->
+ case val({Tab, active_replicas}) of
+ [] ->
+ %% No valid replicas
+ noreply(State);
+ [SomeNode | _] ->
+ State2 = node_has_tabs([Tab], SomeNode, State),
+ noreply(State2)
+ end;
+
+handle_cast({master_nodes_updated, Tab, Masters}, State) ->
+ Active = val({Tab, active_replicas}),
+ Valid =
+ case val({Tab, load_by_force}) of
+ true ->
+ Active;
+ false ->
+ if
+ Masters == [] ->
+ Active;
+ true ->
+ mnesia_lib:intersect(Masters, Active)
+ end
+ end,
+ case Valid of
+ [] ->
+ %% No valid replicas
+ noreply(State);
+ [SomeNode | _] ->
+ State2 = node_has_tabs([Tab], SomeNode, State),
+ noreply(State2)
+ end;
+
+handle_cast({adopt_orphans, Node, Tabs}, State) ->
+
+ State2 = node_has_tabs(Tabs, Node, State),
+
+ %% Register the other node as up and running
+ mnesia_recover:log_mnesia_up(Node),
+ verbose("Logging mnesia_up ~w~n",[Node]),
+ mnesia_lib:report_system_event({mnesia_up, Node}),
+
+ %% Load orphan tables
+ LocalTabs = val({schema, local_tables}) -- [schema],
+ Nodes = val({current, db_nodes}),
+ {LocalOrphans, RemoteMasters} =
+ orphan_tables(LocalTabs, Node, Nodes, [], []),
+ Reason = {adopt_orphan, node()},
+ mnesia_late_loader:async_late_disc_load(node(), LocalOrphans, Reason),
+
+ Fun =
+ fun(N) ->
+ RemoteOrphans =
+ [Tab || {Tab, Ns} <- RemoteMasters,
+ lists:member(N, Ns)],
+ mnesia_late_loader:maybe_async_late_disc_load(N, RemoteOrphans, Reason)
+ end,
+ lists:foreach(Fun, Nodes),
+ noreply(State2);
+
+handle_cast(Msg, State) ->
+ error("~p got unexpected cast: ~p~n", [?SERVER_NAME, Msg]),
+ noreply(State).
+
+handle_sync_tabs([Tab | Tabs], From) ->
+ case val({Tab, where_to_read}) of
+ nowhere ->
+ case get({sync_tab, Tab}) of
+ undefined ->
+ put({sync_tab, Tab}, [From]);
+ Pids ->
+ put({sync_tab, Tab}, [From | Pids])
+ end;
+ _ ->
+ sync_reply(From, Tab)
+ end,
+ handle_sync_tabs(Tabs, From);
+handle_sync_tabs([], _From) ->
+ ok.
+
+%%----------------------------------------------------------------------
+%% Func: handle_info/2
+%% Returns: {noreply, State} |
+%% {noreply, State, Timeout} |
+%% {stop, Reason, State} (terminate/2 is called)
+%%----------------------------------------------------------------------
+
+handle_info({async_dump_log, InitBy}, State) ->
+ Worker = #dump_log{initiated_by = InitBy},
+ State2 = add_worker(Worker, State),
+ noreply(State2);
+
+handle_info(#dumper_done{worker_pid=Pid, worker_res=Res}, State) ->
+ if
+ State#state.is_stopping == true ->
+ {stop, shutdown, State};
+ Res == dumped, Pid == State#state.dumper_pid ->
+ [Worker | Rest] = State#state.dumper_queue,
+ reply(Worker#dump_log.opt_reply_to, Res),
+ State2 = State#state{dumper_pid = undefined,
+ dumper_queue = Rest},
+ State3 = opt_start_worker(State2),
+ noreply(State3);
+ true ->
+ fatal("Dumper failed: ~p~n state: ~p~n", [Res, State]),
+ {stop, fatal, State}
+ end;
+
+handle_info(Done = #loader_done{worker_pid=WPid, table_name=Tab}, State0) ->
+ LateQueue0 = State0#state.late_loader_queue,
+ State1 = State0#state{loader_pid = lists:keydelete(WPid,1,get_loaders(State0))},
+
+ State2 =
+ case Done#loader_done.is_loaded of
+ true ->
+ %% Optional table announcement
+ if
+ Done#loader_done.needs_announce == true,
+ Done#loader_done.needs_reply == true ->
+ i_have_tab(Tab),
+ %% Should be {dumper,add_table_copy} only
+ reply(Done#loader_done.reply_to,
+ Done#loader_done.reply);
+ Done#loader_done.needs_reply == true ->
+ %% Should be {dumper,add_table_copy} only
+ reply(Done#loader_done.reply_to,
+ Done#loader_done.reply);
+ Done#loader_done.needs_announce == true, Tab == schema ->
+ i_have_tab(Tab);
+ Done#loader_done.needs_announce == true ->
+ i_have_tab(Tab),
+ %% Local node needs to perform user_sync_tab/1
+ Ns = val({current, db_nodes}),
+ abcast(Ns, {i_have_tab, Tab, node()});
+ Tab == schema ->
+ ignore;
+ true ->
+ %% Local node needs to perform user_sync_tab/1
+ Ns = val({current, db_nodes}),
+ AlreadyKnows = val({Tab, active_replicas}),
+ abcast(Ns -- AlreadyKnows, {i_have_tab, Tab, node()})
+ end,
+ %% Optional user sync
+ case Done#loader_done.needs_sync of
+ true -> user_sync_tab(Tab);
+ false -> ignore
+ end,
+ State1#state{late_loader_queue=gb_trees:delete_any(Tab, LateQueue0)};
+ false ->
+ %% Either the node went down or table was not
+ %% loaded remotly yet
+ case Done#loader_done.needs_reply of
+ true ->
+ reply(Done#loader_done.reply_to,
+ Done#loader_done.reply);
+ false ->
+ ignore
+ end,
+ case ?catch_val({Tab, active_replicas}) of
+ [_|_] -> % still available elsewhere
+ {value,{_,Worker}} = lists:keysearch(WPid,1,get_loaders(State0)),
+ add_loader(Tab,Worker,State1);
+ _ ->
+ State1
+ end
+ end,
+ State3 = opt_start_worker(State2),
+ noreply(State3);
+
+handle_info(#sender_done{worker_pid=Pid, worker_res=Res}, State) ->
+ Senders = get_senders(State),
+ {value, {Pid,_Worker}} = lists:keysearch(Pid, 1, Senders),
+ if
+ Res == ok ->
+ State2 = State#state{sender_pid = lists:keydelete(Pid, 1, Senders)},
+ State3 = opt_start_worker(State2),
+ noreply(State3);
+ true ->
+ %% No need to send any message to the table receiver
+ %% since it will soon get a mnesia_down anyway
+ fatal("Sender failed: ~p~n state: ~p~n", [Res, State]),
+ {stop, fatal, State}
+ end;
+
+handle_info({'EXIT', Pid, R}, State) when Pid == State#state.supervisor ->
+ catch set(mnesia_status, stopping),
+ case State#state.dumper_pid of
+ undefined ->
+ dbg_out("~p was ~p~n", [?SERVER_NAME, R]),
+ {stop, shutdown, State};
+ _ ->
+ noreply(State#state{is_stopping = true})
+ end;
+
+handle_info({'EXIT', Pid, R}, State) when Pid == State#state.dumper_pid ->
+ case State#state.dumper_queue of
+ [#schema_commit_lock{}|Workers] -> %% Schema trans crashed or was killed
+ dbg_out("WARNING: Dumper ~p exited ~p~n", [Pid, R]),
+ State2 = State#state{dumper_queue = Workers, dumper_pid = undefined},
+ State3 = opt_start_worker(State2),
+ noreply(State3);
+ _Other ->
+ fatal("Dumper or schema commit crashed: ~p~n state: ~p~n", [R, State]),
+ {stop, fatal, State}
+ end;
+
+handle_info(Msg = {'EXIT', Pid, R}, State) when R /= wait_for_tables_timeout ->
+ case lists:keymember(Pid, 1, get_senders(State)) of
+ true ->
+ %% No need to send any message to the table receiver
+ %% since it will soon get a mnesia_down anyway
+ fatal("Sender crashed: ~p~n state: ~p~n", [{Pid,R}, State]),
+ {stop, fatal, State};
+ false ->
+ case lists:keymember(Pid, 1, get_loaders(State)) of
+ true ->
+ fatal("Loader crashed: ~p~n state: ~p~n", [R, State]),
+ {stop, fatal, State};
+ false ->
+ error("~p got unexpected info: ~p~n", [?SERVER_NAME, Msg]),
+ noreply(State)
+ end
+ end;
+
+handle_info({From, get_state}, State) ->
+ From ! {?SERVER_NAME, State},
+ noreply(State);
+
+%% No real need for buffering
+handle_info(Msg, State) when State#state.schema_is_merged /= true ->
+ %% Buffer early messages
+ Msgs = State#state.early_msgs,
+ noreply(State#state{early_msgs = [{info, Msg} | Msgs]});
+
+handle_info({'EXIT', Pid, wait_for_tables_timeout}, State) ->
+ sync_tab_timeout(Pid, get()),
+ noreply(State);
+
+handle_info(Msg, State) ->
+ error("~p got unexpected info: ~p~n", [?SERVER_NAME, Msg]),
+ noreply(State).
+
+sync_tab_timeout(Pid, [{{sync_tab, Tab}, Pids} | Tail]) ->
+ case lists:delete(Pid, Pids) of
+ [] ->
+ erase({sync_tab, Tab});
+ Pids2 ->
+ put({sync_tab, Tab}, Pids2)
+ end,
+ sync_tab_timeout(Pid, Tail);
+sync_tab_timeout(Pid, [_ | Tail]) ->
+ sync_tab_timeout(Pid, Tail);
+sync_tab_timeout(_Pid, []) ->
+ ok.
+
+%% Pick the load record that has the highest load order
+%% Returns {BestLoad, RemainingQueue} or {none, []} if queue is empty
+pick_next(Queue) ->
+ List = gb_trees:values(Queue),
+ case pick_next(List, none, none) of
+ none -> {none, gb_trees:empty()};
+ {Tab, Worker} -> {Worker, gb_trees:delete(Tab,Queue)}
+ end.
+
+pick_next([Head = #net_load{table=Tab}| Tail], Load, Order) ->
+ select_best(Head, Tail, ?catch_val({Tab, load_order}), Load, Order);
+pick_next([Head = #disc_load{table=Tab}| Tail], Load, Order) ->
+ select_best(Head, Tail, ?catch_val({Tab, load_order}), Load, Order);
+pick_next([], none, _Order) ->
+ none;
+pick_next([], Load, _Order) ->
+ {element(2,Load), Load}.
+
+select_best(_Head, Tail, {'EXIT', _WHAT}, Load, Order) ->
+ %% Table have been deleted drop it.
+ pick_next(Tail, Load, Order);
+select_best(Load, Tail, Order, none, none) ->
+ pick_next(Tail, Load, Order);
+select_best(Load, Tail, Order, _OldLoad, OldOrder) when Order > OldOrder ->
+ pick_next(Tail, Load, Order);
+select_best(_Load, Tail, _Order, OldLoad, OldOrder) ->
+ pick_next(Tail, OldLoad, OldOrder).
+
+%%----------------------------------------------------------------------
+%% Func: terminate/2
+%% Purpose: Shutdown the server
+%% Returns: any (ignored by gen_server)
+%%----------------------------------------------------------------------
+terminate(Reason, State) ->
+ mnesia_monitor:terminate_proc(?SERVER_NAME, Reason, State).
+
+%%----------------------------------------------------------------------
+%% Func: code_change/3
+%% Purpose: Upgrade process when its code is to be changed
+%% Returns: {ok, NewState}
+%%----------------------------------------------------------------------
+code_change(_OldVsn, State0, _Extra) ->
+ %% Loader Queue
+ State1 = case State0#state.loader_pid of
+ Pids when is_list(Pids) -> State0;
+ undefined -> State0#state{loader_pid = [],loader_queue=gb_trees:empty()};
+ Pid when is_pid(Pid) ->
+ [Loader|Rest] = State0#state.loader_queue,
+ LQ0 = [{element(2,Rec),Rec} || Rec <- Rest],
+ LQ1 = lists:sort(LQ0),
+ LQ = gb_trees:from_orddict(LQ1),
+ State0#state{loader_pid=[{Pid,Loader}], loader_queue=LQ}
+ end,
+ %% LateLoaderQueue
+ State = if is_list(State1#state.late_loader_queue) ->
+ LLQ0 = State1#state.late_loader_queue,
+ LLQ1 = lists:sort([{element(2,Rec),Rec} || Rec <- LLQ0]),
+ LLQ = gb_trees:from_orddict(LLQ1),
+ State1#state{late_loader_queue=LLQ};
+ true ->
+ State1
+ end,
+ {ok, State}.
+
+%%%----------------------------------------------------------------------
+%%% Internal functions
+%%%----------------------------------------------------------------------
+
+maybe_log_mnesia_down(N) ->
+ %% We use mnesia_down when deciding which tables to load locally,
+ %% so if we are not running (i.e haven't decided which tables
+ %% to load locally), don't log mnesia_down yet.
+ case mnesia_lib:is_running() of
+ yes ->
+ verbose("Logging mnesia_down ~w~n", [N]),
+ mnesia_recover:log_mnesia_down(N),
+ ok;
+ _ ->
+ Filter = fun(Tab) ->
+ inactive_copy_holders(Tab, N)
+ end,
+ HalfLoadedTabs = lists:any(Filter, val({schema, local_tables}) -- [schema]),
+ if
+ HalfLoadedTabs == true ->
+ verbose("Logging mnesia_down ~w~n", [N]),
+ mnesia_recover:log_mnesia_down(N),
+ ok;
+ true ->
+ %% Unfortunately we have not loaded some common
+ %% tables yet, so we cannot rely on the nodedown
+ log_later %% BUGBUG handle this case!!!
+ end
+ end.
+
+inactive_copy_holders(Tab, Node) ->
+ Cs = val({Tab, cstruct}),
+ case mnesia_lib:cs_to_storage_type(Node, Cs) of
+ unknown ->
+ false;
+ _Storage ->
+ mnesia_lib:not_active_here(Tab)
+ end.
+
+orphan_tables([Tab | Tabs], Node, Ns, Local, Remote) ->
+ Cs = val({Tab, cstruct}),
+ CopyHolders = mnesia_lib:copy_holders(Cs),
+ RamCopyHolders = Cs#cstruct.ram_copies,
+ DiscCopyHolders = CopyHolders -- RamCopyHolders,
+ DiscNodes = val({schema, disc_copies}),
+ LocalContent = Cs#cstruct.local_content,
+ RamCopyHoldersOnDiscNodes = mnesia_lib:intersect(RamCopyHolders, DiscNodes),
+ Active = val({Tab, active_replicas}),
+ BeingCreated = (?catch_val({Tab, create_table}) == true),
+ Read = val({Tab, where_to_read}),
+ case lists:member(Node, DiscCopyHolders) of
+ _ when BeingCreated == true ->
+ orphan_tables(Tabs, Node, Ns, Local, Remote);
+ _ when Read == node() -> %% Allready loaded
+ orphan_tables(Tabs, Node, Ns, Local, Remote);
+ true when Active == [] ->
+ case DiscCopyHolders -- Ns of
+ [] ->
+ %% We're last up and the other nodes have not
+ %% loaded the table. Lets load it if we are
+ %% the smallest node.
+ case lists:min(DiscCopyHolders) of
+ Min when Min == node() ->
+ case mnesia_recover:get_master_nodes(Tab) of
+ [] ->
+ L = [Tab | Local],
+ orphan_tables(Tabs, Node, Ns, L, Remote);
+ Masters ->
+ R = [{Tab, Masters} | Remote],
+ orphan_tables(Tabs, Node, Ns, Local, R)
+ end;
+ _ ->
+ orphan_tables(Tabs, Node, Ns, Local, Remote)
+ end;
+ _ ->
+ orphan_tables(Tabs, Node, Ns, Local, Remote)
+ end;
+ false when Active == [], DiscCopyHolders == [], RamCopyHoldersOnDiscNodes == [] ->
+ %% Special case when all replicas resides on disc less nodes
+ orphan_tables(Tabs, Node, Ns, [Tab | Local], Remote);
+ _ when LocalContent == true ->
+ orphan_tables(Tabs, Node, Ns, [Tab | Local], Remote);
+ _ ->
+ orphan_tables(Tabs, Node, Ns, Local, Remote)
+ end;
+orphan_tables([], _, _, LocalOrphans, RemoteMasters) ->
+ {LocalOrphans, RemoteMasters}.
+
+node_has_tabs([Tab | Tabs], Node, State) when Node /= node() ->
+ State2 =
+ case catch update_whereabouts(Tab, Node, State) of
+ State1 = #state{} -> State1;
+ {'EXIT', R} -> %% Tab was just deleted?
+ case ?catch_val({Tab, cstruct}) of
+ {'EXIT', _} -> State; % yes
+ _ -> erlang:error(R)
+ end
+ end,
+ node_has_tabs(Tabs, Node, State2);
+node_has_tabs([Tab | Tabs], Node, State) ->
+ user_sync_tab(Tab),
+ node_has_tabs(Tabs, Node, State);
+node_has_tabs([], _Node, State) ->
+ State.
+
+update_whereabouts(Tab, Node, State) ->
+ Storage = val({Tab, storage_type}),
+ Read = val({Tab, where_to_read}),
+ LocalC = val({Tab, local_content}),
+ BeingCreated = (?catch_val({Tab, create_table}) == true),
+ Masters = mnesia_recover:get_master_nodes(Tab),
+ ByForce = val({Tab, load_by_force}),
+ GoGetIt =
+ if
+ ByForce == true ->
+ true;
+ Masters == [] ->
+ true;
+ true ->
+ lists:member(Node, Masters)
+ end,
+
+ dbg_out("Table ~w is loaded on ~w. s=~w, r=~w, lc=~w, f=~w, m=~w~n",
+ [Tab, Node, Storage, Read, LocalC, ByForce, GoGetIt]),
+ if
+ LocalC == true ->
+ %% Local contents, don't care about other node
+ State;
+ BeingCreated == true ->
+ %% The table is currently being created
+ %% It will be handled elsewhere
+ State;
+ Storage == unknown, Read == nowhere ->
+ %% No own copy, time to read remotely
+ %% if the other node is a good node
+ add_active_replica(Tab, Node),
+ case GoGetIt of
+ true ->
+ set({Tab, where_to_read}, Node),
+ user_sync_tab(Tab),
+ State;
+ false ->
+ State
+ end;
+ Storage == unknown ->
+ %% No own copy, continue to read remotely
+ add_active_replica(Tab, Node),
+ NodeST = mnesia_lib:storage_type_at_node(Node, Tab),
+ ReadST = mnesia_lib:storage_type_at_node(Read, Tab),
+ if %% Avoid reading from disc_only_copies
+ NodeST == disc_only_copies ->
+ ignore;
+ ReadST == disc_only_copies ->
+ mnesia_lib:set_remote_where_to_read(Tab);
+ true ->
+ ignore
+ end,
+ user_sync_tab(Tab),
+ State;
+ Read == nowhere ->
+ %% Own copy, go and get a copy of the table
+ %% if the other node is master or if there
+ %% are no master at all
+ add_active_replica(Tab, Node),
+ case GoGetIt of
+ true ->
+ Worker = #net_load{table = Tab,
+ reason = {active_remote, Node}},
+ add_worker(Worker, State);
+ false ->
+ State
+ end;
+ true ->
+ %% We already have an own copy
+ add_active_replica(Tab, Node),
+ user_sync_tab(Tab),
+ State
+ end.
+
+initial_safe_loads() ->
+ case val({schema, storage_type}) of
+ ram_copies ->
+ Downs = [],
+ Tabs = val({schema, local_tables}) -- [schema],
+ LastC = fun(T) -> last_consistent_replica(T, Downs) end,
+ lists:zf(LastC, Tabs);
+
+ disc_copies ->
+ Downs = mnesia_recover:get_mnesia_downs(),
+ dbg_out("mnesia_downs = ~p~n", [Downs]),
+
+ Tabs = val({schema, local_tables}) -- [schema],
+ LastC = fun(T) -> last_consistent_replica(T, Downs) end,
+ lists:zf(LastC, Tabs)
+ end.
+
+last_consistent_replica(Tab, Downs) ->
+ Cs = val({Tab, cstruct}),
+ Storage = mnesia_lib:cs_to_storage_type(node(), Cs),
+ Ram = Cs#cstruct.ram_copies,
+ Disc = Cs#cstruct.disc_copies,
+ DiscOnly = Cs#cstruct.disc_only_copies,
+ BetterCopies0 = mnesia_lib:remote_copy_holders(Cs) -- Downs,
+ BetterCopies = BetterCopies0 -- Ram,
+ AccessMode = Cs#cstruct.access_mode,
+ Copies = mnesia_lib:copy_holders(Cs),
+ Masters = mnesia_recover:get_master_nodes(Tab),
+ LocalMaster0 = lists:member(node(), Masters),
+ LocalContent = Cs#cstruct.local_content,
+ RemoteMaster =
+ if
+ Masters == [] -> false;
+ true -> not LocalMaster0
+ end,
+ LocalMaster =
+ if
+ Masters == [] -> false;
+ true -> LocalMaster0
+ end,
+ if
+ Copies == [node()] ->
+ %% Only one copy holder and it is local.
+ %% It may also be a local contents table
+ {true, {Tab, local_only}};
+ LocalContent == true ->
+ {true, {Tab, local_content}};
+ LocalMaster == true ->
+ %% We have a local master
+ {true, {Tab, local_master}};
+ RemoteMaster == true ->
+ %% Wait for remote master copy
+ false;
+ Storage == ram_copies ->
+ if
+ Disc == [], DiscOnly == [] ->
+ %% Nobody has copy on disc
+ {true, {Tab, ram_only}};
+ true ->
+ %% Some other node has copy on disc
+ false
+ end;
+ AccessMode == read_only ->
+ %% No one has been able to update the table,
+ %% i.e. all disc resident copies are equal
+ {true, {Tab, read_only}};
+ BetterCopies /= [], Masters /= [node()] ->
+ %% There are better copies on other nodes
+ %% and we do not have the only master copy
+ false;
+ true ->
+ {true, {Tab, initial}}
+ end.
+
+reconfigure_tables(N, [Tab |Tail]) ->
+ del_active_replica(Tab, N),
+ case val({Tab, where_to_read}) of
+ N -> mnesia_lib:set_remote_where_to_read(Tab);
+ _ -> ignore
+ end,
+ reconfigure_tables(N, Tail);
+reconfigure_tables(_, []) ->
+ ok.
+
+remove_loaders([Tab| Tabs], N, Loaders) ->
+ LateQ = drop_loaders(Tab, N, Loaders),
+ remove_loaders(Tabs, N, LateQ);
+remove_loaders([],_, LateQ) -> LateQ.
+
+remove_early_messages([], _Node) ->
+ [];
+remove_early_messages([{call, {add_active_replica, [_, Node, _, _], _}, _}|R], Node) ->
+ remove_early_messages(R, Node); %% Does a reply before queuing
+remove_early_messages([{call, {block_table, _, From}, ReplyTo}|R], Node)
+ when node(From) == Node ->
+ reply(ReplyTo, ok), %% Remove gen:server waits..
+ remove_early_messages(R, Node);
+remove_early_messages([{cast, {i_have_tab, _Tab, Node}}|R], Node) ->
+ remove_early_messages(R, Node);
+remove_early_messages([{cast, {adopt_orphans, Node, _Tabs}}|R], Node) ->
+ remove_early_messages(R, Node);
+remove_early_messages([M|R],Node) ->
+ [M|remove_early_messages(R,Node)].
+
+%% Drop loader from late load queue and possibly trigger a disc_load
+drop_loaders(Tab, Node, LLQ) ->
+ case gb_trees:lookup(Tab,LLQ) of
+ none ->
+ LLQ;
+ {value, H} ->
+ %% Check if it is time to issue a disc_load request
+ case H#late_load.loaders of
+ [Node] ->
+ Reason = {H#late_load.reason, last_loader_down, Node},
+ cast({disc_load, Tab, Reason}); % Ugly cast
+ _ ->
+ ignore
+ end,
+ %% Drop the node from the list of loaders
+ H2 = H#late_load{loaders = H#late_load.loaders -- [Node]},
+ gb_trees:update(Tab, H2, LLQ)
+ end.
+
+add_active_replica(Tab, Node) ->
+ add_active_replica(Tab, Node, val({Tab, cstruct})).
+
+add_active_replica(Tab, Node, Cs = #cstruct{}) ->
+ Storage = mnesia_lib:schema_cs_to_storage_type(Node, Cs),
+ AccessMode = Cs#cstruct.access_mode,
+ add_active_replica(Tab, Node, Storage, AccessMode).
+
+%% Block table primitives
+
+block_table(Tab) ->
+ Var = {Tab, where_to_commit},
+ Old = val(Var),
+ New = {blocked, Old},
+ set(Var, New). % where_to_commit
+
+unblock_table(Tab) ->
+ call({unblock_table, Tab}).
+
+is_tab_blocked(W2C) when is_list(W2C) ->
+ {false, W2C};
+is_tab_blocked({blocked, W2C}) when is_list(W2C) ->
+ {true, W2C}.
+
+mark_blocked_tab(true, Value) ->
+ {blocked, Value};
+mark_blocked_tab(false, Value) ->
+ Value.
+
+%%
+
+add_active_replica(Tab, Node, Storage, AccessMode) ->
+ Var = {Tab, where_to_commit},
+ {Blocked, Old} = is_tab_blocked(val(Var)),
+ Del = lists:keydelete(Node, 1, Old),
+ case AccessMode of
+ read_write ->
+ New = lists:sort([{Node, Storage} | Del]),
+ set(Var, mark_blocked_tab(Blocked, New)), % where_to_commit
+ mnesia_lib:add_lsort({Tab, where_to_write}, Node);
+ read_only ->
+ set(Var, mark_blocked_tab(Blocked, Del)),
+ mnesia_lib:del({Tab, where_to_write}, Node)
+ end,
+ add({Tab, active_replicas}, Node).
+
+del_active_replica(Tab, Node) ->
+ Var = {Tab, where_to_commit},
+ {Blocked, Old} = is_tab_blocked(val(Var)),
+ Del = lists:keydelete(Node, 1, Old),
+ New = lists:sort(Del),
+ set(Var, mark_blocked_tab(Blocked, New)), % where_to_commit
+ mnesia_lib:del({Tab, active_replicas}, Node),
+ mnesia_lib:del({Tab, where_to_write}, Node).
+
+change_table_access_mode(Cs) ->
+ W = fun() ->
+ Tab = Cs#cstruct.name,
+ lists:foreach(fun(N) -> add_active_replica(Tab, N, Cs) end,
+ val({Tab, active_replicas}))
+ end,
+ update(W).
+
+
+%% node To now has tab loaded, but this must be undone
+%% This code is rpc:call'ed from the tab_copier process
+%% when it has *not* released it's table lock
+unannounce_add_table_copy(Tab, To) ->
+ catch del_active_replica(Tab, To),
+ case catch val({Tab , where_to_read}) of
+ To ->
+ mnesia_lib:set_remote_where_to_read(Tab);
+ _ ->
+ ignore
+ end.
+
+user_sync_tab(Tab) ->
+ case val(debug) of
+ trace ->
+ mnesia_subscr:subscribe(whereis(mnesia_event), {table, Tab});
+ _ ->
+ ignore
+ end,
+
+ case erase({sync_tab, Tab}) of
+ undefined ->
+ ok;
+ Pids ->
+ lists:foreach(fun(Pid) -> sync_reply(Pid, Tab) end, Pids)
+ end.
+
+i_have_tab(Tab) ->
+ case val({Tab, local_content}) of
+ true ->
+ mnesia_lib:set_local_content_whereabouts(Tab);
+ false ->
+ set({Tab, where_to_read}, node())
+ end,
+ add_active_replica(Tab, node()).
+
+sync_and_block_table_whereabouts(Tab, ToNode, RemoteS, AccessMode) when Tab /= schema ->
+ Current = val({current, db_nodes}),
+ Ns =
+ case lists:member(ToNode, Current) of
+ true -> Current -- [ToNode];
+ false -> Current
+ end,
+ remote_call(ToNode, block_table, [Tab]),
+ [remote_call(Node, add_active_replica, [Tab, ToNode, RemoteS, AccessMode]) ||
+ Node <- [ToNode | Ns]],
+ ok.
+
+sync_del_table_copy_whereabouts(Tab, ToNode) when Tab /= schema ->
+ Current = val({current, db_nodes}),
+ Ns =
+ case lists:member(ToNode, Current) of
+ true -> Current;
+ false -> [ToNode | Current]
+ end,
+ Args = [Tab, ToNode],
+ [remote_call(Node, unannounce_add_table_copy, Args) || Node <- Ns],
+ ok.
+
+get_info(Timeout) ->
+ case whereis(?SERVER_NAME) of
+ undefined ->
+ {timeout, Timeout};
+ Pid ->
+ Pid ! {self(), get_state},
+ receive
+ {?SERVER_NAME, State = #state{loader_queue=LQ,late_loader_queue=LLQ}} ->
+ {info,State#state{loader_queue=gb_trees:to_list(LQ),
+ late_loader_queue=gb_trees:to_list(LLQ)}}
+ after Timeout ->
+ {timeout, Timeout}
+ end
+ end.
+
+get_workers(Timeout) ->
+ case whereis(?SERVER_NAME) of
+ undefined ->
+ {timeout, Timeout};
+ Pid ->
+ Pid ! {self(), get_state},
+ receive
+ {?SERVER_NAME, State = #state{}} ->
+ {workers, get_loaders(State), get_senders(State), State#state.dumper_pid}
+ after Timeout ->
+ {timeout, Timeout}
+ end
+ end.
+
+info() ->
+ Tabs = mnesia_lib:local_active_tables(),
+ io:format( "---> Active tables <--- ~n", []),
+ info(Tabs).
+
+info([Tab | Tail]) ->
+ case val({Tab, storage_type}) of
+ disc_only_copies ->
+ info_format(Tab,
+ dets:info(Tab, size),
+ dets:info(Tab, file_size),
+ "bytes on disc");
+ _ ->
+ info_format(Tab,
+ ?ets_info(Tab, size),
+ ?ets_info(Tab, memory),
+ "words of mem")
+ end,
+ info(Tail);
+info([]) -> ok.
+
+
+info_format(Tab, Size, Mem, Media) ->
+ StrT = mnesia_lib:pad_name(atom_to_list(Tab), 15, []),
+ StrS = mnesia_lib:pad_name(integer_to_list(Size), 8, []),
+ StrM = mnesia_lib:pad_name(integer_to_list(Mem), 8, []),
+ io:format("~s: with ~s records occupying ~s ~s~n",
+ [StrT, StrS, StrM, Media]).
+
+%% Handle early arrived messages
+handle_early_msgs([Msg | Msgs], State) ->
+ %% The messages are in reverse order
+ case handle_early_msg(Msg, State) of
+%% {stop, Reason, Reply, State2} -> % Will not happen according to dialyzer
+%% {stop, Reason, Reply, State2};
+ {stop, Reason, State2} ->
+ {stop, Reason, State2};
+ {noreply, State2} ->
+ handle_early_msgs(Msgs, State2);
+ {reply, Reply, State2} ->
+ {call, _Call, From} = Msg,
+ reply(From, Reply),
+ handle_early_msgs(Msgs, State2)
+ end;
+handle_early_msgs([], State) ->
+ noreply(State).
+
+handle_early_msg({call, Msg, From}, State) ->
+ handle_call(Msg, From, State);
+handle_early_msg({cast, Msg}, State) ->
+ handle_cast(Msg, State);
+handle_early_msg({info, Msg}, State) ->
+ handle_info(Msg, State).
+
+noreply(State) ->
+ {noreply, State}.
+
+reply(undefined, Reply) ->
+ Reply;
+reply(ReplyTo, Reply) ->
+ gen_server:reply(ReplyTo, Reply),
+ Reply.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Worker management
+
+%% Returns new State
+add_worker(Worker = #dump_log{}, State) ->
+ InitBy = Worker#dump_log.initiated_by,
+ Queue = State#state.dumper_queue,
+ case lists:keymember(InitBy, #dump_log.initiated_by, Queue) of
+ true when Worker#dump_log.opt_reply_to == undefined ->
+ %% The same threshold has been exceeded again,
+ %% before we have had the possibility to
+ %% process the older one.
+ DetectedBy = {dump_log, InitBy},
+ Event = {mnesia_overload, DetectedBy},
+ mnesia_lib:report_system_event(Event);
+ _ ->
+ ignore
+ end,
+ Queue2 = Queue ++ [Worker],
+ State2 = State#state{dumper_queue = Queue2},
+ opt_start_worker(State2);
+add_worker(Worker = #schema_commit_lock{}, State) ->
+ Queue = State#state.dumper_queue,
+ Queue2 = Queue ++ [Worker],
+ State2 = State#state{dumper_queue = Queue2},
+ opt_start_worker(State2);
+add_worker(Worker = #net_load{}, State) ->
+ opt_start_worker(add_loader(Worker#net_load.table,Worker,State));
+add_worker(Worker = #send_table{}, State) ->
+ Queue = State#state.sender_queue,
+ State2 = State#state{sender_queue = Queue ++ [Worker]},
+ opt_start_worker(State2);
+add_worker(Worker = #disc_load{}, State) ->
+ opt_start_worker(add_loader(Worker#disc_load.table,Worker,State));
+% Block controller should be used for upgrading mnesia.
+add_worker(Worker = #block_controller{}, State) ->
+ Queue = State#state.dumper_queue,
+ Queue2 = [Worker | Queue],
+ State2 = State#state{dumper_queue = Queue2},
+ opt_start_worker(State2).
+
+add_loader(Tab,Worker,State = #state{loader_queue=LQ0}) ->
+ case gb_trees:is_defined(Tab, LQ0) of
+ true -> State;
+ false ->
+ LQ=gb_trees:insert(Tab, Worker, LQ0),
+ State#state{loader_queue=LQ}
+ end.
+
+%% Optionally start a worker
+%%
+%% Dumpers and loaders may run simultaneously
+%% but neither of them may run during schema commit.
+%% Loaders may not start if a schema commit is enqueued.
+opt_start_worker(State) when State#state.is_stopping == true ->
+ State;
+opt_start_worker(State) ->
+ %% Prioritize dumper and schema commit
+ %% by checking them first
+ case State#state.dumper_queue of
+ [Worker | _Rest] when State#state.dumper_pid == undefined ->
+ %% Great, a worker in queue and neither
+ %% a schema transaction is being
+ %% committed and nor a dumper is running
+
+ %% Start worker but keep him in the queue
+ if
+ is_record(Worker, schema_commit_lock) ->
+ ReplyTo = Worker#schema_commit_lock.owner,
+ reply(ReplyTo, granted),
+ {Owner, _Tag} = ReplyTo,
+ opt_start_loader(State#state{dumper_pid = Owner});
+
+ is_record(Worker, dump_log) ->
+ Pid = spawn_link(?MODULE, dump_and_reply, [self(), Worker]),
+ State2 = State#state{dumper_pid = Pid},
+
+ %% If the worker was a dumper we may
+ %% possibly be able to start a loader
+ %% or sender
+ State3 = opt_start_sender(State2),
+ opt_start_loader(State3);
+
+ is_record(Worker, block_controller) ->
+ case {get_senders(State), get_loaders(State)} of
+ {[], []} ->
+ ReplyTo = Worker#block_controller.owner,
+ reply(ReplyTo, granted),
+ {Owner, _Tag} = ReplyTo,
+ State#state{dumper_pid = Owner};
+ _ ->
+ State
+ end
+ end;
+ _ ->
+ %% Bad luck, try with a loader or sender instead
+ State2 = opt_start_sender(State),
+ opt_start_loader(State2)
+ end.
+
+opt_start_sender(State) ->
+ case State#state.sender_queue of
+ []-> State; %% No need
+ SenderQ ->
+ {NewS,Kept} = opt_start_sender2(SenderQ, get_senders(State),
+ [], get_loaders(State)),
+ State#state{sender_pid = NewS, sender_queue = Kept}
+ end.
+
+opt_start_sender2([], Pids,Kept, _) -> {Pids,Kept};
+opt_start_sender2([Sender|R], Pids, Kept, LoaderQ) ->
+ Tab = Sender#send_table.table,
+ Active = val({Tab, active_replicas}),
+ IgotIt = lists:member(node(), Active),
+ IsLoading = lists:any(fun({_Pid,Loader}) ->
+ Tab == element(#net_load.table, Loader)
+ end, LoaderQ),
+ if
+ IgotIt, IsLoading ->
+ %% I'm currently finishing loading the table let him wait
+ opt_start_sender2(R,Pids, [Sender|Kept], LoaderQ);
+ IgotIt ->
+ %% Start worker but keep him in the queue
+ Pid = spawn_link(?MODULE, send_and_reply,[self(), Sender]),
+ opt_start_sender2(R,[{Pid,Sender}|Pids],Kept,LoaderQ);
+ true ->
+ verbose("Send table failed ~p not active on this node ~n", [Tab]),
+ Sender#send_table.receiver_pid ! {copier_done, node()},
+ opt_start_sender2(R,Pids, Kept, LoaderQ)
+ end.
+
+opt_start_loader(State = #state{loader_queue = LoaderQ}) ->
+ Current = get_loaders(State),
+ Max = max_loaders(),
+ case gb_trees:is_empty(LoaderQ) of
+ true ->
+ State;
+ _ when length(Current) >= Max ->
+ State;
+ false ->
+ SchemaQueue = State#state.dumper_queue,
+ case lists:keymember(schema_commit_lock, 1, SchemaQueue) of
+ false ->
+ case pick_next(LoaderQ) of
+ {none,Rest} ->
+ State#state{loader_queue=Rest};
+ {Worker,Rest} ->
+ case already_loading(Worker, get_loaders(State)) of
+ true ->
+ opt_start_loader(State#state{loader_queue = Rest});
+ false ->
+ %% Start worker but keep him in the queue
+ Pid = load_and_reply(self(), Worker),
+ State#state{loader_pid=[{Pid,Worker}|get_loaders(State)],
+ loader_queue = Rest}
+ end
+ end;
+ true ->
+ %% Bad luck, we must wait for the schema commit
+ State
+ end
+ end.
+
+already_loading(#net_load{table=Tab},Loaders) ->
+ already_loading2(Tab,Loaders);
+already_loading(#disc_load{table=Tab},Loaders) ->
+ already_loading2(Tab,Loaders).
+
+already_loading2(Tab, [{_,#net_load{table=Tab}}|_]) -> true;
+already_loading2(Tab, [{_,#disc_load{table=Tab}}|_]) -> true;
+already_loading2(Tab, [_|Rest]) -> already_loading2(Tab,Rest);
+already_loading2(_,[]) -> false.
+
+start_remote_sender(Node, Tab, Receiver, Storage) ->
+ Msg = #send_table{table = Tab,
+ receiver_pid = Receiver,
+ remote_storage = Storage},
+ gen_server:cast({?SERVER_NAME, Node}, Msg).
+
+dump_and_reply(ReplyTo, Worker) ->
+ %% No trap_exit, die intentionally instead
+ Res = mnesia_dumper:opt_dump_log(Worker#dump_log.initiated_by),
+ ReplyTo ! #dumper_done{worker_pid = self(),
+ worker_res = Res},
+ unlink(ReplyTo),
+ exit(normal).
+
+send_and_reply(ReplyTo, Worker) ->
+ %% No trap_exit, die intentionally instead
+ Res = mnesia_loader:send_table(Worker#send_table.receiver_pid,
+ Worker#send_table.table,
+ Worker#send_table.remote_storage),
+ ReplyTo ! #sender_done{worker_pid = self(),
+ worker_res = Res},
+ unlink(ReplyTo),
+ exit(normal).
+
+load_and_reply(ReplyTo, Worker) ->
+ Load = load_table_fun(Worker),
+ SendAndReply =
+ fun() ->
+ process_flag(trap_exit, true),
+ Done = Load(),
+ ReplyTo ! Done#loader_done{worker_pid = self()},
+ unlink(ReplyTo),
+ exit(normal)
+ end,
+ spawn_link(SendAndReply).
+
+%% Now it is time to load the table
+%% but first we must check if it still is neccessary
+load_table_fun(#net_load{cstruct=Cs, table=Tab, reason=Reason, opt_reply_to=ReplyTo}) ->
+ LocalC = val({Tab, local_content}),
+ AccessMode = val({Tab, access_mode}),
+ ReadNode = val({Tab, where_to_read}),
+ Active = filter_active(Tab),
+ Done = #loader_done{is_loaded = true,
+ table_name = Tab,
+ needs_announce = false,
+ needs_sync = false,
+ needs_reply = (ReplyTo /= undefined),
+ reply_to = ReplyTo,
+ reply = {loaded, ok}
+ },
+ if
+ ReadNode == node() ->
+ %% Already loaded locally
+ fun() -> Done end;
+ LocalC == true ->
+ fun() ->
+ Res = mnesia_loader:disc_load_table(Tab, load_local_content),
+ Done#loader_done{reply = Res, needs_announce = true, needs_sync = true}
+ end;
+ AccessMode == read_only, Reason /= {dumper,add_table_copy} ->
+ fun() -> disc_load_table(Tab, Reason, ReplyTo) end;
+ true ->
+ fun() ->
+ %% Either we cannot read the table yet
+ %% or someone is moving a replica between
+ %% two nodes
+ Res = mnesia_loader:net_load_table(Tab, Reason, Active, Cs),
+ case Res of
+ {loaded, ok} ->
+ Done#loader_done{needs_sync = true,
+ reply = Res};
+ {not_loaded, _} ->
+ Done#loader_done{is_loaded = false,
+ reply = Res}
+ end
+ end
+ end;
+load_table_fun(#disc_load{table=Tab, reason=Reason, opt_reply_to=ReplyTo}) ->
+ ReadNode = val({Tab, where_to_read}),
+ Active = filter_active(Tab),
+ Done = #loader_done{is_loaded = true,
+ table_name = Tab,
+ needs_announce = false,
+ needs_sync = false,
+ needs_reply = false
+ },
+ if
+ Active == [], ReadNode == nowhere ->
+ %% Not loaded anywhere, lets load it from disc
+ fun() -> disc_load_table(Tab, Reason, ReplyTo) end;
+ ReadNode == nowhere ->
+ %% Already loaded on other node, lets get it
+ Cs = val({Tab, cstruct}),
+ fun() ->
+ case mnesia_loader:net_load_table(Tab, Reason, Active, Cs) of
+ {loaded, ok} ->
+ Done#loader_done{needs_sync = true};
+ {not_loaded, storage_unknown} ->
+ Done#loader_done{is_loaded = false};
+ {not_loaded, ErrReason} ->
+ Done#loader_done{is_loaded = false,
+ reply = {not_loaded,ErrReason}}
+ end
+ end;
+ true ->
+ %% Already readable, do not worry be happy
+ fun() -> Done end
+ end.
+
+disc_load_table(Tab, Reason, ReplyTo) ->
+ Done = #loader_done{is_loaded = true,
+ table_name = Tab,
+ needs_announce = false,
+ needs_sync = false,
+ needs_reply = ReplyTo /= undefined,
+ reply_to = ReplyTo,
+ reply = {loaded, ok}
+ },
+ Res = mnesia_loader:disc_load_table(Tab, Reason),
+ if
+ Res == {loaded, ok} ->
+ Done#loader_done{needs_announce = true,
+ needs_sync = true,
+ reply = Res};
+ ReplyTo /= undefined ->
+ Done#loader_done{is_loaded = false,
+ reply = Res};
+ true ->
+ fatal("Cannot load table ~p from disc: ~p~n", [Tab, Res])
+ end.
+
+filter_active(Tab) ->
+ ByForce = val({Tab, load_by_force}),
+ Active = val({Tab, active_replicas}),
+ Masters = mnesia_recover:get_master_nodes(Tab),
+ Ns = do_filter_active(ByForce, Active, Masters),
+ %% Reorder the so that we load from fastest first
+ LS = ?catch_val({Tab, storage_type}),
+ DOC = val({Tab, disc_only_copies}),
+ {Good,Worse} =
+ case LS of
+ disc_only_copies ->
+ G = mnesia_lib:intersect(Ns, DOC),
+ {G,Ns--G};
+ _ ->
+ G = Ns -- DOC,
+ {G,Ns--G}
+ end,
+ %% Pick a random node of the fastest
+ Len = length(Good),
+ if
+ Len > 0 ->
+ R = erlang:phash(node(), Len+1),
+ random(R-1,Good,Worse);
+ true ->
+ Worse
+ end.
+
+random(N, [H|R], Acc) when N > 0 ->
+ random(N-1,R, [H|Acc]);
+random(0, L, Acc) ->
+ L ++ Acc.
+
+do_filter_active(true, Active, _Masters) ->
+ Active;
+do_filter_active(false, Active, []) ->
+ Active;
+do_filter_active(false, Active, Masters) ->
+ mnesia_lib:intersect(Active, Masters).
+
+
diff --git a/lib/mnesia/src/mnesia_dumper.erl b/lib/mnesia/src/mnesia_dumper.erl
new file mode 100644
index 0000000000..f669d009c6
--- /dev/null
+++ b/lib/mnesia/src/mnesia_dumper.erl
@@ -0,0 +1,1218 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 1996-2009. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%
+
+%%
+-module(mnesia_dumper).
+
+%% The InitBy arg may be one of the following:
+%% scan_decisions Initial scan for decisions
+%% startup Initial dump during startup
+%% schema_prepare Dump initiated during schema transaction preparation
+%% schema_update Dump initiated during schema transaction commit
+%% fast_schema_update A schema_update, but ignores the log file
+%% user Dump initiated by user
+%% write_threshold Automatic dump caused by too many log writes
+%% time_threshold Automatic dump caused by timeout
+
+%% Public interface
+-export([
+ get_log_writes/0,
+ incr_log_writes/0,
+ raw_dump_table/2,
+ raw_named_dump_table/2,
+ start_regulator/0,
+ opt_dump_log/1,
+ update/3
+ ]).
+
+ %% Internal stuff
+-export([regulator_init/1]).
+
+-include("mnesia.hrl").
+-include_lib("kernel/include/file.hrl").
+
+-import(mnesia_lib, [fatal/2, dbg_out/2]).
+
+-define(REGULATOR_NAME, mnesia_dumper_load_regulator).
+-define(DumpToEtsMultiplier, 4).
+
+get_log_writes() ->
+ Max = mnesia_monitor:get_env(dump_log_write_threshold),
+ Prev = mnesia_lib:read_counter(trans_log_writes),
+ Left = mnesia_lib:read_counter(trans_log_writes_left),
+ Diff = Max - Left,
+ Prev + Diff.
+
+incr_log_writes() ->
+ Left = mnesia_lib:incr_counter(trans_log_writes_left, -1),
+ if
+ Left > 0 ->
+ ignore;
+ true ->
+ adjust_log_writes(true)
+ end.
+
+adjust_log_writes(DoCast) ->
+ Token = {mnesia_adjust_log_writes, self()},
+ case global:set_lock(Token, [node()], 1) of
+ false ->
+ ignore; %% Somebody else is sending a dump request
+ true ->
+ case DoCast of
+ false ->
+ ignore;
+ true ->
+ mnesia_controller:async_dump_log(write_threshold)
+ end,
+ Max = mnesia_monitor:get_env(dump_log_write_threshold),
+ Left = mnesia_lib:read_counter(trans_log_writes_left),
+ %% Don't care if we lost a few writes
+ mnesia_lib:set_counter(trans_log_writes_left, Max),
+ Diff = Max - Left,
+ mnesia_lib:incr_counter(trans_log_writes, Diff),
+ global:del_lock(Token, [node()])
+ end.
+
+%% Returns 'ok' or exits
+opt_dump_log(InitBy) ->
+ Reg = case whereis(?REGULATOR_NAME) of
+ undefined ->
+ nopid;
+ Pid when is_pid(Pid) ->
+ Pid
+ end,
+ perform_dump(InitBy, Reg).
+
+%% Scan for decisions
+perform_dump(InitBy, Regulator) when InitBy == scan_decisions ->
+ ?eval_debug_fun({?MODULE, perform_dump}, [InitBy]),
+
+ dbg_out("Transaction log dump initiated by ~w~n", [InitBy]),
+ scan_decisions(mnesia_log:previous_log_file(), InitBy, Regulator),
+ scan_decisions(mnesia_log:latest_log_file(), InitBy, Regulator);
+
+%% Propagate the log into the DAT-files
+perform_dump(InitBy, Regulator) ->
+ ?eval_debug_fun({?MODULE, perform_dump}, [InitBy]),
+ LogState = mnesia_log:prepare_log_dump(InitBy),
+ dbg_out("Transaction log dump initiated by ~w: ~w~n",
+ [InitBy, LogState]),
+ adjust_log_writes(false),
+ case LogState of
+ already_dumped ->
+ mnesia_recover:allow_garb(),
+ dumped;
+ {needs_dump, Diff} ->
+ U = mnesia_monitor:get_env(dump_log_update_in_place),
+ Cont = mnesia_log:init_log_dump(),
+ mnesia_recover:sync(),
+ case catch do_perform_dump(Cont, U, InitBy, Regulator, undefined) of
+ ok ->
+ ?eval_debug_fun({?MODULE, post_dump}, [InitBy]),
+ case mnesia_monitor:use_dir() of
+ true ->
+ mnesia_recover:dump_decision_tab();
+ false ->
+ mnesia_log:purge_some_logs()
+ end,
+ mnesia_recover:allow_garb(),
+ %% And now to the crucial point...
+ mnesia_log:confirm_log_dump(Diff);
+ {error, Reason} ->
+ {error, Reason};
+ {'EXIT', {Desc, Reason}} ->
+ case mnesia_monitor:get_env(auto_repair) of
+ true ->
+ mnesia_lib:important(Desc, Reason),
+ %% Ignore rest of the log
+ mnesia_log:confirm_log_dump(Diff);
+ false ->
+ fatal(Desc, Reason)
+ end
+ end;
+ {error, Reason} ->
+ {error, {"Cannot prepare log dump", Reason}}
+ end.
+
+scan_decisions(Fname, InitBy, Regulator) ->
+ Exists = mnesia_lib:exists(Fname),
+ case Exists of
+ false ->
+ ok;
+ true ->
+ Header = mnesia_log:trans_log_header(),
+ Name = previous_log,
+ mnesia_log:open_log(Name, Header, Fname, Exists,
+ mnesia_monitor:get_env(auto_repair), read_only),
+ Cont = start,
+ Res = (catch do_perform_dump(Cont, false, InitBy, Regulator, undefined)),
+ mnesia_log:close_log(Name),
+ case Res of
+ ok -> ok;
+ {'EXIT', Reason} -> {error, Reason}
+ end
+ end.
+
+do_perform_dump(Cont, InPlace, InitBy, Regulator, OldVersion) ->
+ case mnesia_log:chunk_log(Cont) of
+ {C2, Recs} ->
+ case catch insert_recs(Recs, InPlace, InitBy, Regulator, OldVersion) of
+ {'EXIT', R} ->
+ Reason = {"Transaction log dump error: ~p~n", [R]},
+ close_files(InPlace, {error, Reason}, InitBy),
+ exit(Reason);
+ Version ->
+ do_perform_dump(C2, InPlace, InitBy, Regulator, Version)
+ end;
+ eof ->
+ close_files(InPlace, ok, InitBy),
+ erase(mnesia_dumper_dets),
+ ok
+ end.
+
+insert_recs([Rec | Recs], InPlace, InitBy, Regulator, LogV) ->
+ regulate(Regulator),
+ case insert_rec(Rec, InPlace, InitBy, LogV) of
+ LogH when is_record(LogH, log_header) ->
+ insert_recs(Recs, InPlace, InitBy, Regulator, LogH#log_header.log_version);
+ _ ->
+ insert_recs(Recs, InPlace, InitBy, Regulator, LogV)
+ end;
+
+insert_recs([], _InPlace, _InitBy, _Regulator, Version) ->
+ Version.
+
+insert_rec(Rec, _InPlace, scan_decisions, _LogV) ->
+ if
+ is_record(Rec, commit) ->
+ ignore;
+ is_record(Rec, log_header) ->
+ ignore;
+ true ->
+ mnesia_recover:note_log_decision(Rec, scan_decisions)
+ end;
+insert_rec(Rec, InPlace, InitBy, LogV) when is_record(Rec, commit) ->
+ %% Determine the Outcome of the transaction and recover it
+ D = Rec#commit.decision,
+ case mnesia_recover:wait_for_decision(D, InitBy) of
+ {Tid, committed} ->
+ do_insert_rec(Tid, Rec, InPlace, InitBy, LogV);
+ {Tid, aborted} ->
+ mnesia_schema:undo_prepare_commit(Tid, Rec)
+ end;
+insert_rec(H, _InPlace, _InitBy, _LogV) when is_record(H, log_header) ->
+ CurrentVersion = mnesia_log:version(),
+ if
+ H#log_header.log_kind /= trans_log ->
+ exit({"Bad kind of transaction log", H});
+ H#log_header.log_version == CurrentVersion ->
+ ok;
+ H#log_header.log_version == "4.2" ->
+ ok;
+ H#log_header.log_version == "4.1" ->
+ ok;
+ H#log_header.log_version == "4.0" ->
+ ok;
+ true ->
+ fatal("Bad version of transaction log: ~p~n", [H])
+ end,
+ H;
+
+insert_rec(_Rec, _InPlace, _InitBy, _LogV) ->
+ ok.
+
+do_insert_rec(Tid, Rec, InPlace, InitBy, LogV) ->
+ case Rec#commit.schema_ops of
+ [] ->
+ ignore;
+ SchemaOps ->
+ case val({schema, storage_type}) of
+ ram_copies ->
+ insert_ops(Tid, schema_ops, SchemaOps, InPlace, InitBy, LogV);
+ Storage ->
+ true = open_files(schema, Storage, InPlace, InitBy),
+ insert_ops(Tid, schema_ops, SchemaOps, InPlace, InitBy, LogV)
+ end
+ end,
+ D = Rec#commit.disc_copies,
+ insert_ops(Tid, disc_copies, D, InPlace, InitBy, LogV),
+ case InitBy of
+ startup ->
+ DO = Rec#commit.disc_only_copies,
+ insert_ops(Tid, disc_only_copies, DO, InPlace, InitBy, LogV);
+ _ ->
+ ignore
+ end.
+
+
+update(_Tid, [], _DumperMode) ->
+ dumped;
+update(Tid, SchemaOps, DumperMode) ->
+ UseDir = mnesia_monitor:use_dir(),
+ Res = perform_update(Tid, SchemaOps, DumperMode, UseDir),
+ mnesia_controller:release_schema_commit_lock(),
+ Res.
+
+perform_update(_Tid, _SchemaOps, mandatory, true) ->
+ %% Force a dump of the transaction log in order to let the
+ %% dumper perform needed updates
+
+ InitBy = schema_update,
+ ?eval_debug_fun({?MODULE, dump_schema_op}, [InitBy]),
+ opt_dump_log(InitBy);
+perform_update(Tid, SchemaOps, _DumperMode, _UseDir) ->
+ %% No need for a full transaction log dump.
+ %% Ignore the log file and perform only perform
+ %% the corresponding updates.
+
+ InitBy = fast_schema_update,
+ InPlace = mnesia_monitor:get_env(dump_log_update_in_place),
+ ?eval_debug_fun({?MODULE, dump_schema_op}, [InitBy]),
+ case catch insert_ops(Tid, schema_ops, SchemaOps, InPlace, InitBy,
+ mnesia_log:version()) of
+ {'EXIT', Reason} ->
+ Error = {error, {"Schema update error", Reason}},
+ close_files(InPlace, Error, InitBy),
+ fatal("Schema update error ~p ~p", [Reason, SchemaOps]);
+ _ ->
+ ?eval_debug_fun({?MODULE, post_dump}, [InitBy]),
+ close_files(InPlace, ok, InitBy),
+ ok
+ end.
+
+insert_ops(_Tid, _Storage, [], _InPlace, _InitBy, _) -> ok;
+insert_ops(Tid, Storage, [Op], InPlace, InitBy, Ver) when Ver >= "4.3"->
+ insert_op(Tid, Storage, Op, InPlace, InitBy),
+ ok;
+insert_ops(Tid, Storage, [Op | Ops], InPlace, InitBy, Ver) when Ver >= "4.3"->
+ insert_op(Tid, Storage, Op, InPlace, InitBy),
+ insert_ops(Tid, Storage, Ops, InPlace, InitBy, Ver);
+insert_ops(Tid, Storage, [Op | Ops], InPlace, InitBy, Ver) when Ver < "4.3" ->
+ insert_ops(Tid, Storage, Ops, InPlace, InitBy, Ver),
+ insert_op(Tid, Storage, Op, InPlace, InitBy).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Normal ops
+
+disc_insert(_Tid, Storage, Tab, Key, Val, Op, InPlace, InitBy) ->
+ case open_files(Tab, Storage, InPlace, InitBy) of
+ true ->
+ case Storage of
+ disc_copies when Tab /= schema ->
+ mnesia_log:append({?MODULE,Tab}, {{Tab, Key}, Val, Op}),
+ ok;
+ _ ->
+ dets_insert(Op,Tab,Key,Val)
+ end;
+ false ->
+ ignore
+ end.
+
+%% To fix update_counter so that it behaves better.
+%% i.e. if nothing have changed in tab except update_counter
+%% trust that the value in the dets file is correct.
+%% Otherwise we will get a double increment.
+%% This is perfect but update_counter is a dirty op.
+
+dets_insert(Op,Tab,Key,Val) ->
+ case Op of
+ write ->
+ dets_updated(Tab,Key),
+ ok = dets:insert(Tab, Val);
+ delete ->
+ dets_updated(Tab,Key),
+ ok = dets:delete(Tab, Key);
+ update_counter ->
+ case dets_incr_counter(Tab,Key) of
+ true ->
+ {RecName, Incr} = Val,
+ case catch dets:update_counter(Tab, Key, Incr) of
+ CounterVal when is_integer(CounterVal) ->
+ ok;
+ _ when Incr < 0 ->
+ Zero = {RecName, Key, 0},
+ ok = dets:insert(Tab, Zero);
+ _ ->
+ Init = {RecName, Key, Incr},
+ ok = dets:insert(Tab, Init)
+ end;
+ false -> ok
+ end;
+ delete_object ->
+ dets_updated(Tab,Key),
+ ok = dets:delete_object(Tab, Val);
+ clear_table ->
+ dets_cleared(Tab),
+ ok = dets:match_delete(Tab, '_')
+ end.
+
+dets_updated(Tab,Key) ->
+ case get(mnesia_dumper_dets) of
+ undefined ->
+ Empty = gb_trees:empty(),
+ Tree = gb_trees:insert(Tab, gb_sets:singleton(Key), Empty),
+ put(mnesia_dumper_dets, Tree);
+ Tree ->
+ case gb_trees:lookup(Tab,Tree) of
+ {value, cleared} -> ignore;
+ {value, Set} ->
+ T = gb_trees:update(Tab, gb_sets:add(Key, Set), Tree),
+ put(mnesia_dumper_dets, T);
+ none ->
+ T = gb_trees:insert(Tab, gb_sets:singleton(Key), Tree),
+ put(mnesia_dumper_dets, T)
+ end
+ end.
+
+dets_incr_counter(Tab,Key) ->
+ case get(mnesia_dumper_dets) of
+ undefined -> false;
+ Tree ->
+ case gb_trees:lookup(Tab,Tree) of
+ {value, cleared} -> true;
+ {value, Set} -> gb_sets:is_member(Key, Set);
+ none -> false
+ end
+ end.
+
+dets_cleared(Tab) ->
+ case get(mnesia_dumper_dets) of
+ undefined ->
+ Empty = gb_trees:empty(),
+ Tree = gb_trees:insert(Tab, cleared, Empty),
+ put(mnesia_dumper_dets, Tree);
+ Tree ->
+ case gb_trees:lookup(Tab,Tree) of
+ {value, cleared} -> ignore;
+ _ ->
+ T = gb_trees:enter(Tab, cleared, Tree),
+ put(mnesia_dumper_dets, T)
+ end
+ end.
+
+insert(Tid, Storage, Tab, Key, [Val | Tail], Op, InPlace, InitBy) ->
+ insert(Tid, Storage, Tab, Key, Val, Op, InPlace, InitBy),
+ insert(Tid, Storage, Tab, Key, Tail, Op, InPlace, InitBy);
+
+insert(_Tid, _Storage, _Tab, _Key, [], _Op, _InPlace, _InitBy) ->
+ ok;
+
+insert(Tid, Storage, Tab, Key, Val, Op, InPlace, InitBy) ->
+ Item = {{Tab, Key}, Val, Op},
+ case InitBy of
+ startup ->
+ disc_insert(Tid, Storage, Tab, Key, Val, Op, InPlace, InitBy);
+
+ _ when Storage == ram_copies ->
+ mnesia_tm:do_update_op(Tid, Storage, Item),
+ Snmp = mnesia_tm:prepare_snmp(Tab, Key, [Item]),
+ mnesia_tm:do_snmp(Tid, Snmp);
+
+ _ when Storage == disc_copies ->
+ disc_insert(Tid, Storage, Tab, Key, Val, Op, InPlace, InitBy),
+ mnesia_tm:do_update_op(Tid, Storage, Item),
+ Snmp = mnesia_tm:prepare_snmp(Tab, Key, [Item]),
+ mnesia_tm:do_snmp(Tid, Snmp);
+
+ _ when Storage == disc_only_copies ->
+ mnesia_tm:do_update_op(Tid, Storage, Item),
+ Snmp = mnesia_tm:prepare_snmp(Tab, Key, [Item]),
+ mnesia_tm:do_snmp(Tid, Snmp);
+
+ _ when Storage == unknown ->
+ ignore
+ end.
+
+disc_delete_table(Tab, Storage) ->
+ case mnesia_monitor:use_dir() of
+ true ->
+ if
+ Storage == disc_only_copies; Tab == schema ->
+ mnesia_monitor:unsafe_close_dets(Tab),
+ Dat = mnesia_lib:tab2dat(Tab),
+ file:delete(Dat);
+ true ->
+ DclFile = mnesia_lib:tab2dcl(Tab),
+ case get({?MODULE,Tab}) of
+ {opened_dumper, dcl} ->
+ del_opened_tab(Tab),
+ mnesia_log:unsafe_close_log(Tab);
+ _ ->
+ ok
+ end,
+ file:delete(DclFile),
+ DcdFile = mnesia_lib:tab2dcd(Tab),
+ file:delete(DcdFile),
+ ok
+ end,
+ erase({?MODULE, Tab});
+ false ->
+ ignore
+ end.
+
+disc_delete_indecies(_Tab, _Cs, Storage) when Storage /= disc_only_copies ->
+ ignore;
+disc_delete_indecies(Tab, Cs, disc_only_copies) ->
+ Indecies = Cs#cstruct.index,
+ mnesia_index:del_transient(Tab, Indecies, disc_only_copies).
+
+insert_op(Tid, Storage, {{Tab, Key}, Val, Op}, InPlace, InitBy) ->
+ %% Propagate to disc only
+ disc_insert(Tid, Storage, Tab, Key, Val, Op, InPlace, InitBy);
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% NOTE that all operations below will only
+%% be performed if the dump is initiated by
+%% startup or fast_schema_update
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+insert_op(_Tid, schema_ops, _OP, _InPlace, Initby)
+ when Initby /= startup,
+ Initby /= fast_schema_update,
+ Initby /= schema_update ->
+ ignore;
+
+insert_op(Tid, _, {op, rec, Storage, Item}, InPlace, InitBy) ->
+ {{Tab, Key}, ValList, Op} = Item,
+ insert(Tid, Storage, Tab, Key, ValList, Op, InPlace, InitBy);
+
+insert_op(Tid, _, {op, change_table_copy_type, N, FromS, ToS, TabDef}, InPlace, InitBy) ->
+ Cs = mnesia_schema:list2cs(TabDef),
+ Val = mnesia_schema:insert_cstruct(Tid, Cs, true), % Update ram only
+ {schema, Tab, _} = Val,
+ case lists:member(N, val({current, db_nodes})) of
+ true when InitBy /= startup ->
+ mnesia_controller:add_active_replica(Tab, N, Cs);
+ _ ->
+ ignore
+ end,
+ if
+ N == node() ->
+ Dmp = mnesia_lib:tab2dmp(Tab),
+ Dat = mnesia_lib:tab2dat(Tab),
+ Dcd = mnesia_lib:tab2dcd(Tab),
+ Dcl = mnesia_lib:tab2dcl(Tab),
+ case {FromS, ToS} of
+ {ram_copies, disc_copies} when Tab == schema ->
+ ok = ensure_rename(Dmp, Dat);
+ {ram_copies, disc_copies} ->
+ file:delete(Dcl),
+ ok = ensure_rename(Dmp, Dcd);
+ {disc_copies, ram_copies} when Tab == schema ->
+ mnesia_lib:set(use_dir, false),
+ mnesia_monitor:unsafe_close_dets(Tab),
+ file:delete(Dat);
+ {disc_copies, ram_copies} ->
+ file:delete(Dcl),
+ file:delete(Dcd);
+ {ram_copies, disc_only_copies} ->
+ ok = ensure_rename(Dmp, Dat),
+ true = open_files(Tab, disc_only_copies, InPlace, InitBy),
+ %% ram_delete_table must be done before init_indecies,
+ %% it uses info which is reset in init_indecies,
+ %% it doesn't matter, because init_indecies don't use
+ %% the ram replica of the table when creating the disc
+ %% index; Could be improved :)
+ mnesia_schema:ram_delete_table(Tab, FromS),
+ PosList = Cs#cstruct.index,
+ mnesia_index:init_indecies(Tab, disc_only_copies, PosList);
+ {disc_only_copies, ram_copies} ->
+ mnesia_monitor:unsafe_close_dets(Tab),
+ disc_delete_indecies(Tab, Cs, disc_only_copies),
+ case InitBy of
+ startup ->
+ ignore;
+ _ ->
+ mnesia_controller:get_disc_copy(Tab)
+ end,
+ disc_delete_table(Tab, disc_only_copies);
+ {disc_copies, disc_only_copies} ->
+ ok = ensure_rename(Dmp, Dat),
+ true = open_files(Tab, disc_only_copies, InPlace, InitBy),
+ mnesia_schema:ram_delete_table(Tab, FromS),
+ PosList = Cs#cstruct.index,
+ mnesia_index:init_indecies(Tab, disc_only_copies, PosList),
+ file:delete(Dcl),
+ file:delete(Dcd);
+ {disc_only_copies, disc_copies} ->
+ mnesia_monitor:unsafe_close_dets(Tab),
+ disc_delete_indecies(Tab, Cs, disc_only_copies),
+ case InitBy of
+ startup ->
+ ignore;
+ _ ->
+ mnesia_log:ets2dcd(Tab),
+ mnesia_controller:get_disc_copy(Tab),
+ disc_delete_table(Tab, disc_only_copies)
+ end
+ end;
+ true ->
+ ignore
+ end,
+ S = val({schema, storage_type}),
+ disc_insert(Tid, S, schema, Tab, Val, write, InPlace, InitBy);
+
+insert_op(Tid, _, {op, transform, _Fun, TabDef}, InPlace, InitBy) ->
+ Cs = mnesia_schema:list2cs(TabDef),
+ case mnesia_lib:cs_to_storage_type(node(), Cs) of
+ disc_copies ->
+ open_dcl(Cs#cstruct.name);
+ _ ->
+ ignore
+ end,
+ insert_cstruct(Tid, Cs, true, InPlace, InitBy);
+
+%%% Operations below this are handled without using the logg.
+
+insert_op(Tid, _, {op, restore_recreate, TabDef}, InPlace, InitBy) ->
+ Cs = mnesia_schema:list2cs(TabDef),
+ Tab = Cs#cstruct.name,
+ Type = Cs#cstruct.type,
+ Storage = mnesia_lib:cs_to_storage_type(node(), Cs),
+ %% Delete all possibly existing files and tables
+ disc_delete_table(Tab, Storage),
+ disc_delete_indecies(Tab, Cs, Storage),
+ case InitBy of
+ startup ->
+ ignore;
+ _ ->
+ case ?catch_val({Tab, cstruct}) of
+ {'EXIT', _} -> ignore;
+ _ ->
+ mnesia_schema:ram_delete_table(Tab, Storage),
+ mnesia_checkpoint:tm_del_copy(Tab, node())
+ end
+ end,
+ %% And create new ones..
+ if
+ (InitBy == startup) or (Storage == unknown) ->
+ ignore;
+ Storage == ram_copies ->
+ Args = [{keypos, 2}, public, named_table, Type],
+ mnesia_monitor:mktab(Tab, Args);
+ Storage == disc_copies ->
+ Args = [{keypos, 2}, public, named_table, Type],
+ mnesia_monitor:mktab(Tab, Args),
+ File = mnesia_lib:tab2dcd(Tab),
+ FArg = [{file, File}, {name, {mnesia,create}},
+ {repair, false}, {mode, read_write}],
+ {ok, Log} = mnesia_monitor:open_log(FArg),
+ mnesia_monitor:unsafe_close_log(Log);
+ Storage == disc_only_copies ->
+ File = mnesia_lib:tab2dat(Tab),
+ file:delete(File),
+ Args = [{file, mnesia_lib:tab2dat(Tab)},
+ {type, mnesia_lib:disk_type(Tab, Type)},
+ {keypos, 2},
+ {repair, mnesia_monitor:get_env(auto_repair)}],
+ mnesia_monitor:open_dets(Tab, Args)
+ end,
+ insert_op(Tid, ignore, {op, create_table, TabDef}, InPlace, InitBy);
+
+insert_op(Tid, _, {op, create_table, TabDef}, InPlace, InitBy) ->
+ Cs = mnesia_schema:list2cs(TabDef),
+ insert_cstruct(Tid, Cs, false, InPlace, InitBy),
+ Tab = Cs#cstruct.name,
+ Storage = mnesia_lib:cs_to_storage_type(node(), Cs),
+ case InitBy of
+ startup ->
+ case Storage of
+ unknown ->
+ ignore;
+ ram_copies ->
+ ignore;
+ disc_copies ->
+ Dcd = mnesia_lib:tab2dcd(Tab),
+ case mnesia_lib:exists(Dcd) of
+ true -> ignore;
+ false ->
+ mnesia_log:open_log(temp,
+ mnesia_log:dcl_log_header(),
+ Dcd,
+ false,
+ false,
+ read_write),
+ mnesia_log:unsafe_close_log(temp)
+ end;
+ _ ->
+ Args = [{file, mnesia_lib:tab2dat(Tab)},
+ {type, mnesia_lib:disk_type(Tab, Cs#cstruct.type)},
+ {keypos, 2},
+ {repair, mnesia_monitor:get_env(auto_repair)}],
+ case mnesia_monitor:open_dets(Tab, Args) of
+ {ok, _} ->
+ mnesia_monitor:unsafe_close_dets(Tab);
+ {error, Error} ->
+ exit({"Failed to create dets table", Error})
+ end
+ end;
+ _ ->
+ Copies = mnesia_lib:copy_holders(Cs),
+ Active = mnesia_lib:intersect(Copies, val({current, db_nodes})),
+ [mnesia_controller:add_active_replica(Tab, N, Cs) || N <- Active],
+
+ case Storage of
+ unknown ->
+ mnesia_lib:unset({Tab, create_table}),
+ case Cs#cstruct.local_content of
+ true ->
+ ignore;
+ false ->
+ mnesia_lib:set_remote_where_to_read(Tab)
+ end;
+ _ ->
+ case Cs#cstruct.local_content of
+ true ->
+ mnesia_lib:set_local_content_whereabouts(Tab);
+ false ->
+ mnesia_lib:set({Tab, where_to_read}, node())
+ end,
+ case Storage of
+ ram_copies ->
+ ignore;
+ _ ->
+ %% Indecies are still created by loader
+ disc_delete_indecies(Tab, Cs, Storage)
+ %% disc_delete_table(Tab, Storage)
+ end,
+
+ %% Update whereabouts and create table
+ mnesia_controller:create_table(Tab),
+ mnesia_lib:unset({Tab, create_table})
+ end
+ end;
+
+insert_op(_Tid, _, {op, dump_table, Size, TabDef}, _InPlace, _InitBy) ->
+ case Size of
+ unknown ->
+ ignore;
+ _ ->
+ Cs = mnesia_schema:list2cs(TabDef),
+ Tab = Cs#cstruct.name,
+ Dmp = mnesia_lib:tab2dmp(Tab),
+ Dat = mnesia_lib:tab2dcd(Tab),
+ case Size of
+ 0 ->
+ %% Assume that table files already are closed
+ file:delete(Dmp),
+ file:delete(Dat);
+ _ ->
+ ok = ensure_rename(Dmp, Dat)
+ end
+ end;
+
+insert_op(Tid, _, {op, delete_table, TabDef}, InPlace, InitBy) ->
+ Cs = mnesia_schema:list2cs(TabDef),
+ Tab = Cs#cstruct.name,
+ case mnesia_lib:cs_to_storage_type(node(), Cs) of
+ unknown ->
+ ignore;
+ Storage ->
+ disc_delete_table(Tab, Storage),
+ disc_delete_indecies(Tab, Cs, Storage),
+ case InitBy of
+ startup ->
+ ignore;
+ _ ->
+ mnesia_schema:ram_delete_table(Tab, Storage),
+ mnesia_checkpoint:tm_del_copy(Tab, node())
+ end
+ end,
+ delete_cstruct(Tid, Cs, InPlace, InitBy);
+
+insert_op(Tid, _, {op, clear_table, TabDef}, InPlace, InitBy) ->
+ Cs = mnesia_schema:list2cs(TabDef),
+ Tab = Cs#cstruct.name,
+ case mnesia_lib:cs_to_storage_type(node(), Cs) of
+ unknown ->
+ ignore;
+ Storage ->
+ Oid = '_', %%val({Tab, wild_pattern}),
+ if Storage == disc_copies ->
+ open_dcl(Cs#cstruct.name);
+ true ->
+ ignore
+ end,
+ %% Need to catch this, it crashes on ram_copies if
+ %% the op comes before table is loaded at startup.
+ catch insert(Tid, Storage, Tab, '_', Oid, clear_table, InPlace, InitBy)
+ end;
+
+insert_op(Tid, _, {op, merge_schema, TabDef}, InPlace, InitBy) ->
+ Cs = mnesia_schema:list2cs(TabDef),
+ case Cs#cstruct.name of
+ schema ->
+ %% If we bootstrap an empty (diskless) mnesia from another node
+ %% we might have changed the storage_type of schema.
+ %% I think this is a good place to do it.
+ Update = fun(NS = {Node,Storage}) ->
+ case mnesia_lib:cs_to_storage_type(Node, Cs) of
+ Storage -> NS;
+ disc_copies when Node == node() ->
+ Dir = mnesia_lib:dir(),
+ ok = mnesia_schema:opt_create_dir(true, Dir),
+ mnesia_schema:purge_dir(Dir, []),
+ mnesia_log:purge_all_logs(),
+
+ mnesia_lib:set(use_dir, true),
+ mnesia_log:init(),
+ Ns = val({current, db_nodes}),
+ F = fun(U) -> mnesia_recover:log_mnesia_up(U) end,
+ lists:foreach(F, Ns),
+ raw_named_dump_table(schema, dat),
+ temp_set_master_nodes(),
+ {Node,disc_copies};
+ CSstorage ->
+ {Node,CSstorage}
+ end
+ end,
+
+ W2C0 = val({schema, where_to_commit}),
+ W2C = case W2C0 of
+ {blocked, List} ->
+ {blocked,lists:map(Update,List)};
+ List ->
+ lists:map(Update,List)
+ end,
+ if W2C == W2C0 -> ignore;
+ true -> mnesia_lib:set({schema, where_to_commit}, W2C)
+ end;
+ _ ->
+ ignore
+ end,
+ insert_cstruct(Tid, Cs, false, InPlace, InitBy);
+
+insert_op(Tid, _, {op, del_table_copy, Storage, Node, TabDef}, InPlace, InitBy) ->
+ Cs = mnesia_schema:list2cs(TabDef),
+ Tab = Cs#cstruct.name,
+ if
+ Tab == schema, Storage == ram_copies ->
+ insert_cstruct(Tid, Cs, true, InPlace, InitBy);
+ Tab /= schema ->
+ mnesia_controller:del_active_replica(Tab, Node),
+ mnesia_lib:del({Tab, Storage}, Node),
+ if
+ Node == node() ->
+ case Cs#cstruct.local_content of
+ true -> mnesia_lib:set({Tab, where_to_read}, nowhere);
+ false -> mnesia_lib:set_remote_where_to_read(Tab)
+ end,
+ mnesia_lib:del({schema, local_tables}, Tab),
+ mnesia_lib:set({Tab, storage_type}, unknown),
+ insert_cstruct(Tid, Cs, true, InPlace, InitBy),
+ disc_delete_table(Tab, Storage),
+ disc_delete_indecies(Tab, Cs, Storage),
+ mnesia_schema:ram_delete_table(Tab, Storage),
+ mnesia_checkpoint:tm_del_copy(Tab, Node);
+ true ->
+ case val({Tab, where_to_read}) of
+ Node ->
+ mnesia_lib:set_remote_where_to_read(Tab);
+ _ ->
+ ignore
+ end,
+ insert_cstruct(Tid, Cs, true, InPlace, InitBy)
+ end
+ end;
+
+insert_op(Tid, _, {op, add_table_copy, _Storage, _Node, TabDef}, InPlace, InitBy) ->
+ %% During prepare commit, the files was created
+ %% and the replica was announced
+ Cs = mnesia_schema:list2cs(TabDef),
+ insert_cstruct(Tid, Cs, true, InPlace, InitBy);
+
+insert_op(Tid, _, {op, add_snmp, _Us, TabDef}, InPlace, InitBy) ->
+ Cs = mnesia_schema:list2cs(TabDef),
+ insert_cstruct(Tid, Cs, true, InPlace, InitBy);
+
+insert_op(Tid, _, {op, del_snmp, TabDef}, InPlace, InitBy) ->
+ Cs = mnesia_schema:list2cs(TabDef),
+ Tab = Cs#cstruct.name,
+ Storage = mnesia_lib:cs_to_storage_type(node(), Cs),
+ if
+ InitBy /= startup,
+ Storage /= unknown ->
+ case ?catch_val({Tab, {index, snmp}}) of
+ {'EXIT', _} ->
+ ignore;
+ Stab ->
+ mnesia_snmp_hook:delete_table(Tab, Stab),
+ mnesia_lib:unset({Tab, {index, snmp}})
+ end;
+ true ->
+ ignore
+ end,
+ insert_cstruct(Tid, Cs, true, InPlace, InitBy);
+
+insert_op(Tid, _, {op, add_index, Pos, TabDef}, InPlace, InitBy) ->
+ Cs = mnesia_schema:list2cs(TabDef),
+ Tab = insert_cstruct(Tid, Cs, true, InPlace, InitBy),
+ Storage = mnesia_lib:cs_to_storage_type(node(), Cs),
+ case InitBy of
+ startup when Storage == disc_only_copies ->
+ true = open_files(Tab, Storage, InPlace, InitBy),
+ mnesia_index:init_indecies(Tab, Storage, [Pos]);
+ startup ->
+ ignore;
+ _ ->
+ mnesia_index:init_indecies(Tab, Storage, [Pos])
+ end;
+
+insert_op(Tid, _, {op, del_index, Pos, TabDef}, InPlace, InitBy) ->
+ Cs = mnesia_schema:list2cs(TabDef),
+ Tab = Cs#cstruct.name,
+ Storage = mnesia_lib:cs_to_storage_type(node(), Cs),
+ case InitBy of
+ startup when Storage == disc_only_copies ->
+ mnesia_index:del_index_table(Tab, Storage, Pos);
+ startup ->
+ ignore;
+ _ ->
+ mnesia_index:del_index_table(Tab, Storage, Pos)
+ end,
+ insert_cstruct(Tid, Cs, true, InPlace, InitBy);
+
+insert_op(Tid, _, {op, change_table_access_mode,TabDef, _OldAccess, _Access}, InPlace, InitBy) ->
+ Cs = mnesia_schema:list2cs(TabDef),
+ case InitBy of
+ startup -> ignore;
+ _ -> mnesia_controller:change_table_access_mode(Cs)
+ end,
+ insert_cstruct(Tid, Cs, true, InPlace, InitBy);
+
+insert_op(Tid, _, {op, change_table_load_order, TabDef, _OldLevel, _Level}, InPlace, InitBy) ->
+ Cs = mnesia_schema:list2cs(TabDef),
+ insert_cstruct(Tid, Cs, true, InPlace, InitBy);
+
+insert_op(Tid, _, {op, delete_property, TabDef, PropKey}, InPlace, InitBy) ->
+ Cs = mnesia_schema:list2cs(TabDef),
+ Tab = Cs#cstruct.name,
+ mnesia_lib:unset({Tab, user_property, PropKey}),
+ insert_cstruct(Tid, Cs, true, InPlace, InitBy);
+
+insert_op(Tid, _, {op, write_property, TabDef, _Prop}, InPlace, InitBy) ->
+ Cs = mnesia_schema:list2cs(TabDef),
+ insert_cstruct(Tid, Cs, true, InPlace, InitBy);
+
+insert_op(Tid, _, {op, change_table_frag, _Change, TabDef}, InPlace, InitBy) ->
+ Cs = mnesia_schema:list2cs(TabDef),
+ insert_cstruct(Tid, Cs, true, InPlace, InitBy).
+
+open_files(Tab, Storage, UpdateInPlace, InitBy)
+ when Storage /= unknown, Storage /= ram_copies ->
+ case get({?MODULE, Tab}) of
+ undefined ->
+ case ?catch_val({Tab, setorbag}) of
+ {'EXIT', _} ->
+ false;
+ Type ->
+ case Storage of
+ disc_copies when Tab /= schema ->
+ Bool = open_disc_copies(Tab, InitBy),
+ Bool;
+ _ ->
+ Fname = prepare_open(Tab, UpdateInPlace),
+ Args = [{file, Fname},
+ {keypos, 2},
+ {repair, mnesia_monitor:get_env(auto_repair)},
+ {type, mnesia_lib:disk_type(Tab, Type)}],
+ {ok, _} = mnesia_monitor:open_dets(Tab, Args),
+ put({?MODULE, Tab}, {opened_dumper, dat}),
+ true
+ end
+ end;
+ already_dumped ->
+ false;
+ {opened_dumper, _} ->
+ true
+ end;
+open_files(_Tab, _Storage, _UpdateInPlace, _InitBy) ->
+ false.
+
+open_disc_copies(Tab, InitBy) ->
+ DclF = mnesia_lib:tab2dcl(Tab),
+ DumpEts =
+ case file:read_file_info(DclF) of
+ {error, enoent} ->
+ false;
+ {ok, DclInfo} ->
+ DcdF = mnesia_lib:tab2dcd(Tab),
+ case file:read_file_info(DcdF) of
+ {error, Reason} ->
+ mnesia_lib:dbg_out("File ~p info_error ~p ~n",
+ [DcdF, Reason]),
+ true;
+ {ok, DcdInfo} ->
+ Mul = case ?catch_val(dc_dump_limit) of
+ {'EXIT', _} -> ?DumpToEtsMultiplier;
+ Val -> Val
+ end,
+ DcdInfo#file_info.size =< (DclInfo#file_info.size * Mul)
+ end
+ end,
+ if
+ DumpEts == false; InitBy == startup ->
+ mnesia_log:open_log({?MODULE,Tab},
+ mnesia_log:dcl_log_header(),
+ DclF,
+ mnesia_lib:exists(DclF),
+ mnesia_monitor:get_env(auto_repair),
+ read_write),
+ put({?MODULE, Tab}, {opened_dumper, dcl}),
+ true;
+ true ->
+ mnesia_log:ets2dcd(Tab),
+ put({?MODULE, Tab}, already_dumped),
+ false
+ end.
+
+%% Always opens the dcl file for writing overriding already_dumped
+%% mechanismen, used for schema transactions.
+open_dcl(Tab) ->
+ case get({?MODULE, Tab}) of
+ {opened_dumper, _} ->
+ true;
+ _ -> %% undefined or already_dumped
+ DclF = mnesia_lib:tab2dcl(Tab),
+ mnesia_log:open_log({?MODULE,Tab},
+ mnesia_log:dcl_log_header(),
+ DclF,
+ mnesia_lib:exists(DclF),
+ mnesia_monitor:get_env(auto_repair),
+ read_write),
+ put({?MODULE, Tab}, {opened_dumper, dcl}),
+ true
+ end.
+
+prepare_open(Tab, UpdateInPlace) ->
+ Dat = mnesia_lib:tab2dat(Tab),
+ case UpdateInPlace of
+ true ->
+ Dat;
+ false ->
+ Tmp = mnesia_lib:tab2tmp(Tab),
+ case catch mnesia_lib:copy_file(Dat, Tmp) of
+ ok ->
+ Tmp;
+ Error ->
+ fatal("Cannot copy dets file ~p to ~p: ~p~n",
+ [Dat, Tmp, Error])
+ end
+ end.
+
+del_opened_tab(Tab) ->
+ erase({?MODULE, Tab}).
+
+close_files(UpdateInPlace, Outcome, InitBy) -> % Update in place
+ close_files(UpdateInPlace, Outcome, InitBy, get()).
+
+close_files(InPlace, Outcome, InitBy, [{{?MODULE, Tab}, already_dumped} | Tail]) ->
+ erase({?MODULE, Tab}),
+ close_files(InPlace, Outcome, InitBy, Tail);
+close_files(InPlace, Outcome, InitBy, [{{?MODULE, Tab}, {opened_dumper, Type}} | Tail]) ->
+ erase({?MODULE, Tab}),
+ case val({Tab, storage_type}) of
+ disc_only_copies when InitBy /= startup ->
+ ignore;
+ disc_copies when Tab /= schema ->
+ mnesia_log:close_log({?MODULE,Tab});
+ Storage ->
+ do_close(InPlace, Outcome, Tab, Type, Storage)
+ end,
+ close_files(InPlace, Outcome, InitBy, Tail);
+
+close_files(InPlace, Outcome, InitBy, [_ | Tail]) ->
+ close_files(InPlace, Outcome, InitBy, Tail);
+close_files(_, _, _InitBy, []) ->
+ ok.
+
+%% If storage is unknown during close clean up files, this can happen if timing
+%% is right and dirty_write conflicts with schema operations.
+do_close(_, _, Tab, dcl, unknown) ->
+ mnesia_log:close_log({?MODULE,Tab}),
+ file:delete(mnesia_lib:tab2dcl(Tab));
+do_close(_, _, Tab, dcl, _) -> %% To be safe, can it happen?
+ mnesia_log:close_log({?MODULE,Tab});
+
+do_close(InPlace, Outcome, Tab, dat, Storage) ->
+ mnesia_monitor:close_dets(Tab),
+ if
+ Storage == unknown, InPlace == true ->
+ file:delete(mnesia_lib:tab2dat(Tab));
+ InPlace == true ->
+ %% Update in place
+ ok;
+ Outcome == ok, Storage /= unknown ->
+ %% Success: swap tmp files with dat files
+ TabDat = mnesia_lib:tab2dat(Tab),
+ ok = file:rename(mnesia_lib:tab2tmp(Tab), TabDat);
+ true ->
+ file:delete(mnesia_lib:tab2tmp(Tab))
+ end.
+
+
+ensure_rename(From, To) ->
+ case mnesia_lib:exists(From) of
+ true ->
+ file:rename(From, To);
+ false ->
+ case mnesia_lib:exists(To) of
+ true ->
+ ok;
+ false ->
+ {error, {rename_failed, From, To}}
+ end
+ end.
+
+insert_cstruct(Tid, Cs, KeepWhereabouts, InPlace, InitBy) ->
+ Val = mnesia_schema:insert_cstruct(Tid, Cs, KeepWhereabouts),
+ {schema, Tab, _} = Val,
+ S = val({schema, storage_type}),
+ disc_insert(Tid, S, schema, Tab, Val, write, InPlace, InitBy),
+ Tab.
+
+delete_cstruct(Tid, Cs, InPlace, InitBy) ->
+ Val = mnesia_schema:delete_cstruct(Tid, Cs),
+ {schema, Tab, _} = Val,
+ S = val({schema, storage_type}),
+ disc_insert(Tid, S, schema, Tab, Val, delete, InPlace, InitBy),
+ Tab.
+
+
+temp_set_master_nodes() ->
+ Tabs = val({schema, local_tables}),
+ Masters = [{Tab, (val({Tab, disc_copies}) ++
+ val({Tab, ram_copies}) ++
+ val({Tab, disc_only_copies})) -- [node()]}
+ || Tab <- Tabs],
+ %% UseDir = false since we don't want to remember these
+ %% masternodes and we are running (really soon anyway) since we want this
+ %% to be known during table loading.
+ mnesia_recover:log_master_nodes(Masters, false, yes),
+ ok.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Raw dump of table. Dumper must have unique access to the ets table.
+
+raw_named_dump_table(Tab, Ftype) ->
+ case mnesia_monitor:use_dir() of
+ true ->
+ mnesia_lib:lock_table(Tab),
+ TmpFname = mnesia_lib:tab2tmp(Tab),
+ Fname =
+ case Ftype of
+ dat -> mnesia_lib:tab2dat(Tab);
+ dmp -> mnesia_lib:tab2dmp(Tab)
+ end,
+ file:delete(TmpFname),
+ file:delete(Fname),
+ TabSize = ?ets_info(Tab, size),
+ TabRef = Tab,
+ DiskType = mnesia_lib:disk_type(Tab),
+ Args = [{file, TmpFname},
+ {keypos, 2},
+ %% {ram_file, true},
+ {estimated_no_objects, TabSize + 256},
+ {repair, mnesia_monitor:get_env(auto_repair)},
+ {type, DiskType}],
+ case mnesia_lib:dets_sync_open(TabRef, Args) of
+ {ok, TabRef} ->
+ Storage = ram_copies,
+ mnesia_lib:db_fixtable(Storage, Tab, true),
+
+ case catch raw_dump_table(TabRef, Tab) of
+ {'EXIT', Reason} ->
+ mnesia_lib:db_fixtable(Storage, Tab, false),
+ mnesia_lib:dets_sync_close(Tab),
+ file:delete(TmpFname),
+ mnesia_lib:unlock_table(Tab),
+ exit({"Dump of table to disc failed", Reason});
+ ok ->
+ mnesia_lib:db_fixtable(Storage, Tab, false),
+ mnesia_lib:dets_sync_close(Tab),
+ mnesia_lib:unlock_table(Tab),
+ ok = file:rename(TmpFname, Fname)
+ end;
+ {error, Reason} ->
+ mnesia_lib:unlock_table(Tab),
+ exit({"Open of file before dump to disc failed", Reason})
+ end;
+ false ->
+ exit({has_no_disc, node()})
+ end.
+
+raw_dump_table(DetsRef, EtsRef) ->
+ dets:from_ets(DetsRef, EtsRef).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Load regulator
+%%
+%% This is a poor mans substitute for a fair scheduler algorithm
+%% in the Erlang emulator. The mnesia_dumper process performs many
+%% costly BIF invokations and must pay for this. But since the
+%% Emulator does not handle this properly we must compensate for
+%% this with some form of load regulation of ourselves in order to
+%% not steal all computation power in the Erlang Emulator ans make
+%% other processes starve. Hopefully this is a temporary solution.
+
+start_regulator() ->
+ case mnesia_monitor:get_env(dump_log_load_regulation) of
+ false ->
+ nopid;
+ true ->
+ N = ?REGULATOR_NAME,
+ case mnesia_monitor:start_proc(N, ?MODULE, regulator_init, [self()]) of
+ {ok, Pid} ->
+ Pid;
+ {error, Reason} ->
+ fatal("Failed to start ~n: ~p~n", [N, Reason])
+ end
+ end.
+
+regulator_init(Parent) ->
+ %% No need for trapping exits.
+ %% Using low priority causes the regulation
+ process_flag(priority, low),
+ register(?REGULATOR_NAME, self()),
+ proc_lib:init_ack(Parent, {ok, self()}),
+ regulator_loop().
+
+regulator_loop() ->
+ receive
+ {regulate, From} ->
+ From ! {regulated, self()},
+ regulator_loop();
+ {stop, From} ->
+ From ! {stopped, self()},
+ exit(normal)
+ end.
+
+regulate(nopid) ->
+ ok;
+regulate(RegulatorPid) ->
+ RegulatorPid ! {regulate, self()},
+ receive
+ {regulated, RegulatorPid} -> ok
+ end.
+
+val(Var) ->
+ case ?catch_val(Var) of
+ {'EXIT', Reason} -> mnesia_lib:other_val(Var, Reason);
+ Value -> Value
+ end.
diff --git a/lib/mnesia/src/mnesia_event.erl b/lib/mnesia/src/mnesia_event.erl
new file mode 100644
index 0000000000..ec6b99ecaa
--- /dev/null
+++ b/lib/mnesia/src/mnesia_event.erl
@@ -0,0 +1,260 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 1997-2009. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%
+
+%%
+-module(mnesia_event).
+
+-behaviour(gen_event).
+%-behaviour(mnesia_event).
+
+%% gen_event callback interface
+-export([init/1,
+ handle_event/2,
+ handle_call/2,
+ handle_info/2,
+ terminate/2,
+ code_change/3]).
+
+-record(state, {nodes = [],
+ dumped_core = false, %% only dump fatal core once
+ args}).
+
+%%%----------------------------------------------------------------
+%%% Callback functions from gen_server
+%%%----------------------------------------------------------------
+
+%%-----------------------------------------------------------------
+%% init(Args) ->
+%% {ok, State} | Error
+%%-----------------------------------------------------------------
+
+init(Args) ->
+ {ok, #state{args = Args}}.
+
+%%-----------------------------------------------------------------
+%% handle_event(Event, State) ->
+%% {ok, NewState} | remove_handler |
+%% {swap_handler, Args1, State1, Mod2, Args2}
+%%-----------------------------------------------------------------
+
+handle_event(Event, State) ->
+ handle_any_event(Event, State).
+
+%%-----------------------------------------------------------------
+%% handle_info(Msg, State) ->
+%% {ok, NewState} | remove_handler |
+%% {swap_handler, Args1, State1, Mod2, Args2}
+%%-----------------------------------------------------------------
+
+handle_info(Msg, State) ->
+ handle_any_event(Msg, State),
+ {ok, State}.
+
+%%-----------------------------------------------------------------
+%% handle_call(Event, State) ->
+%% {ok, Reply, NewState} | {remove_handler, Reply} |
+%% {swap_handler, Reply, Args1, State1, Mod2, Args2}
+%%-----------------------------------------------------------------
+
+handle_call(Msg, State) ->
+ Reply = ok,
+ {ok, NewState} = handle_any_event(Msg, State),
+ {ok, Reply, NewState}.
+
+%%-----------------------------------------------------------------
+%% terminate(Reason, State) ->
+%% AnyVal
+%%-----------------------------------------------------------------
+
+terminate(_Reason, _State) ->
+ ok.
+
+%%----------------------------------------------------------------------
+%% Func: code_change/3
+%% Purpose: Upgrade process when its code is to be changed
+%% Returns: {ok, NewState}
+%%----------------------------------------------------------------------
+code_change(_OldVsn, State, _Extra) ->
+ {ok, State}.
+
+%%-----------------------------------------------------------------
+%% Internal functions
+%%-----------------------------------------------------------------
+
+handle_any_event({mnesia_system_event, Event}, State) ->
+ handle_system_event(Event, State);
+handle_any_event({mnesia_table_event, Event}, State) ->
+ handle_table_event(Event, State);
+handle_any_event(Msg, State) ->
+ report_error("~p got unexpected event: ~p~n", [?MODULE, Msg]),
+ {ok, State}.
+
+handle_table_event({Oper, Record, TransId}, State) ->
+ report_info("~p performed by ~p on record:~n\t~p~n",
+ [Oper, TransId, Record]),
+ {ok, State}.
+
+handle_system_event({mnesia_checkpoint_activated, _Checkpoint}, State) ->
+ {ok, State};
+
+handle_system_event({mnesia_checkpoint_deactivated, _Checkpoint}, State) ->
+ {ok, State};
+
+handle_system_event({mnesia_up, Node}, State) ->
+ Nodes = [Node | State#state.nodes],
+ {ok, State#state{nodes = Nodes}};
+
+handle_system_event({mnesia_down, Node}, State) ->
+ case mnesia:system_info(fallback_activated) of
+ true ->
+ case mnesia_monitor:get_env(fallback_error_function) of
+ {mnesia, lkill} ->
+ Msg = "A fallback is installed and Mnesia "
+ "must be restarted. Forcing shutdown "
+ "after mnesia_down from ~p...~n",
+ report_fatal(Msg, [Node], nocore, State#state.dumped_core),
+ mnesia:lkill(),
+ exit(fatal);
+ {UserMod, UserFunc} ->
+ Msg = "Warning: A fallback is installed and Mnesia got mnesia_down "
+ "from ~p. ~n",
+ report_info(Msg, [Node]),
+ case catch apply(UserMod, UserFunc, [Node]) of
+ {'EXIT', {undef, _Reason}} ->
+ %% Backward compatibility
+ apply(UserMod, UserFunc, []);
+ {'EXIT', Reason} ->
+ exit(Reason);
+ _ ->
+ ok
+ end,
+ Nodes = lists:delete(Node, State#state.nodes),
+ {ok, State#state{nodes = Nodes}}
+ end;
+ false ->
+ Nodes = lists:delete(Node, State#state.nodes),
+ {ok, State#state{nodes = Nodes}}
+ end;
+
+handle_system_event({mnesia_overload, Details}, State) ->
+ report_warning("Mnesia is overloaded: ~p~n", [Details]),
+ {ok, State};
+
+handle_system_event({mnesia_info, Format, Args}, State) ->
+ report_info(Format, Args),
+ {ok, State};
+
+handle_system_event({mnesia_warning, Format, Args}, State) ->
+ report_warning(Format, Args),
+ {ok, State};
+
+handle_system_event({mnesia_error, Format, Args}, State) ->
+ report_error(Format, Args),
+ {ok, State};
+
+handle_system_event({mnesia_fatal, Format, Args, BinaryCore}, State) ->
+ report_fatal(Format, Args, BinaryCore, State#state.dumped_core),
+ {ok, State#state{dumped_core = true}};
+
+handle_system_event({inconsistent_database, Reason, Node}, State) ->
+ report_error("mnesia_event got {inconsistent_database, ~w, ~w}~n",
+ [Reason, Node]),
+ {ok, State};
+
+handle_system_event({mnesia_user, Event}, State) ->
+ report_info("User event: ~p~n", [Event]),
+ {ok, State};
+
+handle_system_event(Msg, State) ->
+ report_error("mnesia_event got unexpected system event: ~p~n", [Msg]),
+ {ok, State}.
+
+report_info(Format0, Args0) ->
+ Format = "Mnesia(~p): " ++ Format0,
+ Args = [node() | Args0],
+ case global:whereis_name(mnesia_global_logger) of
+ undefined ->
+ io:format(Format, Args);
+ Pid ->
+ io:format(Pid, Format, Args)
+ end.
+
+report_warning(Format0, Args0) ->
+ Format = "Mnesia(~p): ** WARNING ** " ++ Format0,
+ Args = [node() | Args0],
+ case erlang:function_exported(error_logger, warning_msg, 2) of
+ true ->
+ error_logger:warning_msg(Format, Args);
+ false ->
+ error_logger:format(Format, Args)
+ end,
+ case global:whereis_name(mnesia_global_logger) of
+ undefined ->
+ ok;
+ Pid ->
+ io:format(Pid, Format, Args)
+ end.
+
+report_error(Format0, Args0) ->
+ Format = "Mnesia(~p): ** ERROR ** " ++ Format0,
+ Args = [node() | Args0],
+ error_logger:format(Format, Args),
+ case global:whereis_name(mnesia_global_logger) of
+ undefined ->
+ ok;
+ Pid ->
+ io:format(Pid, Format, Args)
+ end.
+
+report_fatal(Format, Args, BinaryCore, CoreDumped) ->
+ UseDir = mnesia_monitor:use_dir(),
+ CoreDir = mnesia_monitor:get_env(core_dir),
+ if
+ is_list(CoreDir),CoreDumped == false, is_binary(BinaryCore) ->
+ core_file(CoreDir,BinaryCore,Format,Args);
+ (UseDir == true),CoreDumped == false, is_binary(BinaryCore) ->
+ core_file(CoreDir,BinaryCore,Format,Args);
+ true ->
+ report_error("(ignoring core) ** FATAL ** " ++ Format, Args)
+ end.
+
+core_file(CoreDir,BinaryCore,Format,Args) ->
+ %% Integers = tuple_to_list(date()) ++ tuple_to_list(time()),
+ Integers = tuple_to_list(now()),
+ Fun = fun(I) when I < 10 -> ["_0",I];
+ (I) -> ["_",I]
+ end,
+ List = lists:append([Fun(I) || I <- Integers]),
+ CoreFile = if is_list(CoreDir) ->
+ filename:absname(lists:concat(["MnesiaCore.", node()] ++ List),
+ CoreDir);
+ true ->
+ filename:absname(lists:concat(["MnesiaCore.", node()] ++ List))
+ end,
+ case file:write_file(CoreFile, BinaryCore) of
+ ok ->
+ report_error("(core dumped to file: ~p)~n ** FATAL ** " ++ Format,
+ [CoreFile] ++ Args);
+ {error, Reason} ->
+ report_error("(could not write core file: ~p)~n ** FATAL ** " ++ Format,
+ [Reason] ++ Args)
+ end.
+
+
+
diff --git a/lib/mnesia/src/mnesia_frag.erl b/lib/mnesia/src/mnesia_frag.erl
new file mode 100644
index 0000000000..a2958ab461
--- /dev/null
+++ b/lib/mnesia/src/mnesia_frag.erl
@@ -0,0 +1,1361 @@
+%%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 1998-2009. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%
+
+%%%
+%%%----------------------------------------------------------------------
+%%% Purpose : Support tables so large that they need
+%%% to be divided into several fragments.
+%%%----------------------------------------------------------------------
+
+%header_doc_include
+
+-module(mnesia_frag).
+
+%% Callback functions when accessed within an activity
+-export([
+ lock/4,
+ write/5, delete/5, delete_object/5,
+ read/5, match_object/5, all_keys/4,
+ select/5,select/6,select_cont/3,
+ index_match_object/6, index_read/6,
+ foldl/6, foldr/6, table_info/4,
+ first/3, next/4, prev/4, last/3,
+ clear_table/4
+ ]).
+
+%header_doc_include
+
+%% -behaviour(mnesia_access).
+
+-export([
+ change_table_frag/2,
+ remove_node/2,
+ expand_cstruct/1,
+ lookup_frag_hash/1,
+ lookup_foreigners/1,
+ frag_names/1,
+ set_frag_hash/2,
+ local_select/4,
+ remote_select/4
+ ]).
+
+-include("mnesia.hrl").
+
+-define(OLD_HASH_MOD, mnesia_frag_old_hash).
+-define(DEFAULT_HASH_MOD, mnesia_frag_hash).
+%%-define(DEFAULT_HASH_MOD, ?OLD_HASH_MOD). %% BUGBUG: New should be default
+
+-record(frag_state,
+ {foreign_key,
+ n_fragments,
+ hash_module,
+ hash_state}).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Access functions
+
+%impl_doc_include
+
+%% Callback functions which provides transparent
+%% access of fragmented tables from any activity
+%% access context.
+
+lock(ActivityId, Opaque, {table , Tab}, LockKind) ->
+ case frag_names(Tab) of
+ [Tab] ->
+ mnesia:lock(ActivityId, Opaque, {table, Tab}, LockKind);
+ Frags ->
+ DeepNs = [mnesia:lock(ActivityId, Opaque, {table, F}, LockKind) ||
+ F <- Frags],
+ mnesia_lib:uniq(lists:append(DeepNs))
+ end;
+
+lock(ActivityId, Opaque, LockItem, LockKind) ->
+ mnesia:lock(ActivityId, Opaque, LockItem, LockKind).
+
+write(ActivityId, Opaque, Tab, Rec, LockKind) ->
+ Frag = record_to_frag_name(Tab, Rec),
+ mnesia:write(ActivityId, Opaque, Frag, Rec, LockKind).
+
+delete(ActivityId, Opaque, Tab, Key, LockKind) ->
+ Frag = key_to_frag_name(Tab, Key),
+ mnesia:delete(ActivityId, Opaque, Frag, Key, LockKind).
+
+delete_object(ActivityId, Opaque, Tab, Rec, LockKind) ->
+ Frag = record_to_frag_name(Tab, Rec),
+ mnesia:delete_object(ActivityId, Opaque, Frag, Rec, LockKind).
+
+read(ActivityId, Opaque, Tab, Key, LockKind) ->
+ Frag = key_to_frag_name(Tab, Key),
+ mnesia:read(ActivityId, Opaque, Frag, Key, LockKind).
+
+match_object(ActivityId, Opaque, Tab, HeadPat, LockKind) ->
+ MatchSpec = [{HeadPat, [], ['$_']}],
+ select(ActivityId, Opaque, Tab, MatchSpec, LockKind).
+
+select(ActivityId, Opaque, Tab, MatchSpec, LockKind) ->
+ do_select(ActivityId, Opaque, Tab, MatchSpec, LockKind).
+
+
+select(ActivityId, Opaque, Tab, MatchSpec, Limit, LockKind) ->
+ init_select(ActivityId, Opaque, Tab, MatchSpec, Limit, LockKind).
+
+
+all_keys(ActivityId, Opaque, Tab, LockKind) ->
+ Match = [mnesia:all_keys(ActivityId, Opaque, Frag, LockKind)
+ || Frag <- frag_names(Tab)],
+ lists:append(Match).
+
+clear_table(ActivityId, Opaque, Tab, Obj) ->
+ [mnesia:clear_table(ActivityId, Opaque, Frag, Obj) || Frag <- frag_names(Tab)],
+ ok.
+
+index_match_object(ActivityId, Opaque, Tab, Pat, Attr, LockKind) ->
+ Match =
+ [mnesia:index_match_object(ActivityId, Opaque, Frag, Pat, Attr, LockKind)
+ || Frag <- frag_names(Tab)],
+ lists:append(Match).
+
+index_read(ActivityId, Opaque, Tab, Key, Attr, LockKind) ->
+ Match =
+ [mnesia:index_read(ActivityId, Opaque, Frag, Key, Attr, LockKind)
+ || Frag <- frag_names(Tab)],
+ lists:append(Match).
+
+foldl(ActivityId, Opaque, Fun, Acc, Tab, LockKind) ->
+ Fun2 = fun(Frag, A) ->
+ mnesia:foldl(ActivityId, Opaque, Fun, A, Frag, LockKind)
+ end,
+ lists:foldl(Fun2, Acc, frag_names(Tab)).
+
+foldr(ActivityId, Opaque, Fun, Acc, Tab, LockKind) ->
+ Fun2 = fun(Frag, A) ->
+ mnesia:foldr(ActivityId, Opaque, Fun, A, Frag, LockKind)
+ end,
+ lists:foldr(Fun2, Acc, frag_names(Tab)).
+
+table_info(ActivityId, Opaque, {Tab, Key}, Item) ->
+ Frag = key_to_frag_name(Tab, Key),
+ table_info2(ActivityId, Opaque, Tab, Frag, Item);
+table_info(ActivityId, Opaque, Tab, Item) ->
+ table_info2(ActivityId, Opaque, Tab, Tab, Item).
+
+table_info2(ActivityId, Opaque, Tab, Frag, Item) ->
+ case Item of
+ size ->
+ SumFun = fun({_, Size}, Acc) -> Acc + Size end,
+ lists:foldl(SumFun, 0, frag_size(ActivityId, Opaque, Tab));
+ memory ->
+ SumFun = fun({_, Size}, Acc) -> Acc + Size end,
+ lists:foldl(SumFun, 0, frag_memory(ActivityId, Opaque, Tab));
+ base_table ->
+ lookup_prop(Tab, base_table);
+ node_pool ->
+ lookup_prop(Tab, node_pool);
+ n_fragments ->
+ FH = lookup_frag_hash(Tab),
+ FH#frag_state.n_fragments;
+ foreign_key ->
+ FH = lookup_frag_hash(Tab),
+ FH#frag_state.foreign_key;
+ foreigners ->
+ lookup_foreigners(Tab);
+ n_ram_copies ->
+ length(val({Tab, ram_copies}));
+ n_disc_copies ->
+ length(val({Tab, disc_copies}));
+ n_disc_only_copies ->
+ length(val({Tab, disc_only_copies}));
+
+ frag_names ->
+ frag_names(Tab);
+ frag_dist ->
+ frag_dist(Tab);
+ frag_size ->
+ frag_size(ActivityId, Opaque, Tab);
+ frag_memory ->
+ frag_memory(ActivityId, Opaque, Tab);
+ _ ->
+ mnesia:table_info(ActivityId, Opaque, Frag, Item)
+ end.
+
+first(ActivityId, Opaque, Tab) ->
+ case ?catch_val({Tab, frag_hash}) of
+ {'EXIT', _} ->
+ mnesia:first(ActivityId, Opaque, Tab);
+ FH ->
+ FirstFrag = Tab,
+ case mnesia:first(ActivityId, Opaque, FirstFrag) of
+ '$end_of_table' ->
+ search_first(ActivityId, Opaque, Tab, 1, FH);
+ Next ->
+ Next
+ end
+ end.
+
+search_first(ActivityId, Opaque, Tab, N, FH) when N =< FH#frag_state.n_fragments ->
+ NextN = N + 1,
+ NextFrag = n_to_frag_name(Tab, NextN),
+ case mnesia:first(ActivityId, Opaque, NextFrag) of
+ '$end_of_table' ->
+ search_first(ActivityId, Opaque, Tab, NextN, FH);
+ Next ->
+ Next
+ end;
+search_first(_ActivityId, _Opaque, _Tab, _N, _FH) ->
+ '$end_of_table'.
+
+last(ActivityId, Opaque, Tab) ->
+ case ?catch_val({Tab, frag_hash}) of
+ {'EXIT', _} ->
+ mnesia:last(ActivityId, Opaque, Tab);
+ FH ->
+ LastN = FH#frag_state.n_fragments,
+ search_last(ActivityId, Opaque, Tab, LastN, FH)
+ end.
+
+search_last(ActivityId, Opaque, Tab, N, FH) when N >= 1 ->
+ Frag = n_to_frag_name(Tab, N),
+ case mnesia:last(ActivityId, Opaque, Frag) of
+ '$end_of_table' ->
+ PrevN = N - 1,
+ search_last(ActivityId, Opaque, Tab, PrevN, FH);
+ Prev ->
+ Prev
+ end;
+search_last(_ActivityId, _Opaque, _Tab, _N, _FH) ->
+ '$end_of_table'.
+
+prev(ActivityId, Opaque, Tab, Key) ->
+ case ?catch_val({Tab, frag_hash}) of
+ {'EXIT', _} ->
+ mnesia:prev(ActivityId, Opaque, Tab, Key);
+ FH ->
+ N = key_to_n(FH, Key),
+ Frag = n_to_frag_name(Tab, N),
+ case mnesia:prev(ActivityId, Opaque, Frag, Key) of
+ '$end_of_table' ->
+ search_prev(ActivityId, Opaque, Tab, N);
+ Prev ->
+ Prev
+ end
+ end.
+
+search_prev(ActivityId, Opaque, Tab, N) when N > 1 ->
+ PrevN = N - 1,
+ PrevFrag = n_to_frag_name(Tab, PrevN),
+ case mnesia:last(ActivityId, Opaque, PrevFrag) of
+ '$end_of_table' ->
+ search_prev(ActivityId, Opaque, Tab, PrevN);
+ Prev ->
+ Prev
+ end;
+search_prev(_ActivityId, _Opaque, _Tab, _N) ->
+ '$end_of_table'.
+
+next(ActivityId, Opaque, Tab, Key) ->
+ case ?catch_val({Tab, frag_hash}) of
+ {'EXIT', _} ->
+ mnesia:next(ActivityId, Opaque, Tab, Key);
+ FH ->
+ N = key_to_n(FH, Key),
+ Frag = n_to_frag_name(Tab, N),
+ case mnesia:next(ActivityId, Opaque, Frag, Key) of
+ '$end_of_table' ->
+ search_next(ActivityId, Opaque, Tab, N, FH);
+ Prev ->
+ Prev
+ end
+ end.
+
+search_next(ActivityId, Opaque, Tab, N, FH) when N < FH#frag_state.n_fragments ->
+ NextN = N + 1,
+ NextFrag = n_to_frag_name(Tab, NextN),
+ case mnesia:first(ActivityId, Opaque, NextFrag) of
+ '$end_of_table' ->
+ search_next(ActivityId, Opaque, Tab, NextN, FH);
+ Next ->
+ Next
+ end;
+search_next(_ActivityId, _Opaque, _Tab, _N, _FH) ->
+ '$end_of_table'.
+
+%impl_doc_include
+
+frag_size(ActivityId, Opaque, Tab) ->
+ [{F, remote_table_info(ActivityId, Opaque, F, size)} || F <- frag_names(Tab)].
+
+frag_memory(ActivityId, Opaque, Tab) ->
+ [{F, remote_table_info(ActivityId, Opaque, F, memory)} || F <- frag_names(Tab)].
+
+remote_table_info(ActivityId, Opaque, Tab, Item) ->
+ N = val({Tab, where_to_read}),
+ case rpc:call(N, mnesia, table_info, [ActivityId, Opaque, Tab, Item]) of
+ {badrpc, _} ->
+ mnesia:abort({no_exists, Tab, Item});
+ Info ->
+ Info
+ end.
+
+init_select(Tid,Opaque,Tab,Pat,Limit,LockKind) ->
+ case ?catch_val({Tab, frag_hash}) of
+ {'EXIT', _} ->
+ mnesia:select(Tid, Opaque, Tab, Pat, Limit,LockKind);
+ FH ->
+ FragNumbers = verify_numbers(FH,Pat),
+ Fun = fun(Num) ->
+ Name = n_to_frag_name(Tab, Num),
+ Node = val({Name, where_to_read}),
+ Storage = mnesia_lib:storage_type_at_node(Node, Name),
+ mnesia:lock(Tid, Opaque, {table, Name}, LockKind),
+ {Name, Node, Storage}
+ end,
+ [{FTab,Node,Type}|NameNodes] = lists:map(Fun, FragNumbers),
+ InitFun = fun(FixedSpec) -> mnesia:dirty_sel_init(Node,FTab,FixedSpec,Limit,Type) end,
+ Res = mnesia:fun_select(Tid,Opaque,FTab,Pat,LockKind,FTab,InitFun,Limit,Node,Type),
+ frag_sel_cont(Res, NameNodes, {Pat,LockKind,Limit})
+ end.
+
+select_cont(_Tid,_,{frag_cont, '$end_of_table', [],_}) -> '$end_of_table';
+select_cont(Tid,Ts,{frag_cont, '$end_of_table', [{Tab,Node,Type}|Rest],Args}) ->
+ {Spec,LockKind,Limit} = Args,
+ InitFun = fun(FixedSpec) -> mnesia:dirty_sel_init(Node,Tab,FixedSpec,Limit,Type) end,
+ Res = mnesia:fun_select(Tid,Ts,Tab,Spec,LockKind,Tab,InitFun,Limit,Node,Type),
+ frag_sel_cont(Res, Rest, Args);
+select_cont(Tid,Ts,{frag_cont, Cont, TabL, Args}) ->
+ frag_sel_cont(mnesia:select_cont(Tid,Ts,Cont),TabL,Args);
+select_cont(Tid,Ts,Else) -> %% Not a fragmented table
+ mnesia:select_cont(Tid,Ts,Else).
+
+frag_sel_cont('$end_of_table', [],_) ->
+ '$end_of_table';
+frag_sel_cont('$end_of_table', TabL,Args) ->
+ {[], {frag_cont, '$end_of_table', TabL,Args}};
+frag_sel_cont({Recs,Cont}, TabL,Args) ->
+ {Recs, {frag_cont, Cont, TabL,Args}}.
+
+do_select(ActivityId, Opaque, Tab, MatchSpec, LockKind) ->
+ case ?catch_val({Tab, frag_hash}) of
+ {'EXIT', _} ->
+ mnesia:select(ActivityId, Opaque, Tab, MatchSpec, LockKind);
+ FH ->
+ FragNumbers = verify_numbers(FH,MatchSpec),
+ Fun = fun(Num) ->
+ Name = n_to_frag_name(Tab, Num),
+ Node = val({Name, where_to_read}),
+ mnesia:lock(ActivityId, Opaque, {table, Name}, LockKind),
+ {Name, Node}
+ end,
+ NameNodes = lists:map(Fun, FragNumbers),
+ SelectAllFun =
+ fun(PatchedMatchSpec) ->
+ Match = [mnesia:dirty_select(Name, PatchedMatchSpec)
+ || {Name, _Node} <- NameNodes],
+ lists:append(Match)
+ end,
+ case [{Name, Node} || {Name, Node} <- NameNodes, Node /= node()] of
+ [] ->
+ %% All fragments are local
+ mnesia:fun_select(ActivityId, Opaque, Tab, MatchSpec, none, '_', SelectAllFun);
+ RemoteNameNodes ->
+ Type = val({Tab,setorbag}),
+ SelectFun =
+ fun(PatchedMatchSpec) ->
+ Ref = make_ref(),
+ Args = [self(), Ref, RemoteNameNodes, PatchedMatchSpec],
+ Pid = spawn_link(?MODULE, local_select, Args),
+ LocalMatch0 = [mnesia:dirty_select(Name, PatchedMatchSpec)
+ || {Name, Node} <- NameNodes, Node == node()],
+ LocalMatch = case Type of
+ ordered_set -> lists:merge(LocalMatch0);
+ _ -> lists:append(LocalMatch0)
+ end,
+ OldSelectFun = fun() -> SelectAllFun(PatchedMatchSpec) end,
+ local_collect(Ref, Pid, Type, LocalMatch, OldSelectFun)
+ end,
+ mnesia:fun_select(ActivityId, Opaque, Tab, MatchSpec, none, '_', SelectFun)
+ end
+ end.
+
+verify_numbers(FH,MatchSpec) ->
+ HashState = FH#frag_state.hash_state,
+ FragNumbers =
+ case FH#frag_state.hash_module of
+ HashMod when HashMod == ?DEFAULT_HASH_MOD ->
+ ?DEFAULT_HASH_MOD:match_spec_to_frag_numbers(HashState, MatchSpec);
+ HashMod ->
+ HashMod:match_spec_to_frag_numbers(HashState, MatchSpec)
+ end,
+ N = FH#frag_state.n_fragments,
+ VerifyFun = fun(F) when is_integer(F), F >= 1, F =< N -> false;
+ (_F) -> true
+ end,
+ case catch lists:filter(VerifyFun, FragNumbers) of
+ [] ->
+ FragNumbers;
+ BadFrags ->
+ mnesia:abort({"match_spec_to_frag_numbers: Fragment numbers out of range",
+ BadFrags, {range, 1, N}})
+ end.
+
+local_select(ReplyTo, Ref, RemoteNameNodes, MatchSpec) ->
+ RemoteNodes = mnesia_lib:uniq([Node || {_Name, Node} <- RemoteNameNodes]),
+ Args = [ReplyTo, Ref, RemoteNameNodes, MatchSpec],
+ {Replies, BadNodes} = rpc:multicall(RemoteNodes, ?MODULE, remote_select, Args),
+ case mnesia_lib:uniq(Replies) -- [ok] of
+ [] when BadNodes == [] ->
+ ReplyTo ! {local_select, Ref, ok};
+ _ when BadNodes /= [] ->
+ ReplyTo ! {local_select, Ref, {error, {node_not_running, hd(BadNodes)}}};
+ [{badrpc, {'EXIT', Reason}} | _] ->
+ ReplyTo ! {local_select, Ref, {error, Reason}};
+ [Reason | _] ->
+ ReplyTo ! {local_select, Ref, {error, Reason}}
+ end,
+ unlink(ReplyTo),
+ exit(normal).
+
+remote_select(ReplyTo, Ref, NameNodes, MatchSpec) ->
+ do_remote_select(ReplyTo, Ref, NameNodes, MatchSpec).
+
+do_remote_select(ReplyTo, Ref, [{Name, Node} | NameNodes], MatchSpec) ->
+ if
+ Node == node() ->
+ Res = (catch {ok, mnesia:dirty_select(Name, MatchSpec)}),
+ ReplyTo ! {remote_select, Ref, Node, Res},
+ do_remote_select(ReplyTo, Ref, NameNodes, MatchSpec);
+ true ->
+ do_remote_select(ReplyTo, Ref, NameNodes, MatchSpec)
+ end;
+do_remote_select(_ReplyTo, _Ref, [], _MatchSpec) ->
+ ok.
+
+local_collect(Ref, Pid, Type, LocalMatch, OldSelectFun) ->
+ receive
+ {local_select, Ref, LocalRes} ->
+ remote_collect(Ref, Type, LocalRes, LocalMatch, OldSelectFun);
+ {'EXIT', Pid, Reason} ->
+ remote_collect(Ref, Type, {error, Reason}, [], OldSelectFun)
+ end.
+
+remote_collect(Ref, Type, LocalRes = ok, Acc, OldSelectFun) ->
+ receive
+ {remote_select, Ref, Node, RemoteRes} ->
+ case RemoteRes of
+ {ok, RemoteMatch} ->
+ Matches = case Type of
+ ordered_set -> lists:merge(RemoteMatch, Acc);
+ _ -> RemoteMatch ++ Acc
+ end,
+ remote_collect(Ref, Type, LocalRes, Matches, OldSelectFun);
+ _ ->
+ remote_collect(Ref, Type, {error, {node_not_running, Node}}, [], OldSelectFun)
+ end
+ after 0 ->
+ Acc
+ end;
+remote_collect(Ref, Type, LocalRes = {error, Reason}, _Acc, OldSelectFun) ->
+ receive
+ {remote_select, Ref, _Node, _RemoteRes} ->
+ remote_collect(Ref, Type, LocalRes, [], OldSelectFun)
+ after 0 ->
+ mnesia:abort(Reason)
+ end.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Returns a list of cstructs
+
+expand_cstruct(Cs) ->
+ expand_cstruct(Cs, create).
+
+expand_cstruct(Cs, Mode) ->
+ Tab = Cs#cstruct.name,
+ Props = Cs#cstruct.frag_properties,
+ mnesia_schema:verify({alt, [nil, list]}, mnesia_lib:etype(Props),
+ {badarg, Tab, Props}),
+ %% Verify keys
+ ValidKeys = [foreign_key, n_fragments, node_pool,
+ n_ram_copies, n_disc_copies, n_disc_only_copies,
+ hash_module, hash_state],
+ Keys = mnesia_schema:check_keys(Tab, Props, ValidKeys),
+ mnesia_schema:check_duplicates(Tab, Keys),
+
+ %% Pick fragmentation props
+ ForeignKey = mnesia_schema:pick(Tab, foreign_key, Props, undefined),
+ {ForeignKey2, N, Pool, DefaultNR, DefaultND, DefaultNDO} =
+ pick_props(Tab, Cs, ForeignKey),
+
+ %% Verify node_pool
+ BadPool = {bad_type, Tab, {node_pool, Pool}},
+ mnesia_schema:verify(list, mnesia_lib:etype(Pool), BadPool),
+ NotAtom = fun(A) when is_atom(A) -> false;
+ (_A) -> true
+ end,
+ mnesia_schema:verify([], [P || P <- Pool, NotAtom(P)], BadPool),
+
+ NR = mnesia_schema:pick(Tab, n_ram_copies, Props, 0),
+ ND = mnesia_schema:pick(Tab, n_disc_copies, Props, 0),
+ NDO = mnesia_schema:pick(Tab, n_disc_only_copies, Props, 0),
+
+ PosInt = fun(I) when is_integer(I), I >= 0 -> true;
+ (_I) -> false
+ end,
+ mnesia_schema:verify(true, PosInt(NR),
+ {bad_type, Tab, {n_ram_copies, NR}}),
+ mnesia_schema:verify(true, PosInt(ND),
+ {bad_type, Tab, {n_disc_copies, ND}}),
+ mnesia_schema:verify(true, PosInt(NDO),
+ {bad_type, Tab, {n_disc_only_copies, NDO}}),
+
+ %% Verify n_fragments
+ Cs2 = verify_n_fragments(N, Cs, Mode),
+
+ %% Verify hash callback
+ HashMod = mnesia_schema:pick(Tab, hash_module, Props, ?DEFAULT_HASH_MOD),
+ HashState = mnesia_schema:pick(Tab, hash_state, Props, undefined),
+ HashState2 = HashMod:init_state(Tab, HashState), %% BUGBUG: Catch?
+
+ FH = #frag_state{foreign_key = ForeignKey2,
+ n_fragments = 1,
+ hash_module = HashMod,
+ hash_state = HashState2},
+ if
+ NR == 0, ND == 0, NDO == 0 ->
+ do_expand_cstruct(Cs2, FH, N, Pool, DefaultNR, DefaultND, DefaultNDO, Mode);
+ true ->
+ do_expand_cstruct(Cs2, FH, N, Pool, NR, ND, NDO, Mode)
+ end.
+
+do_expand_cstruct(Cs, FH, N, Pool, NR, ND, NDO, Mode) ->
+ Tab = Cs#cstruct.name,
+
+ LC = Cs#cstruct.local_content,
+ mnesia_schema:verify(false, LC,
+ {combine_error, Tab, {local_content, LC}}),
+
+ Snmp = Cs#cstruct.snmp,
+ mnesia_schema:verify([], Snmp,
+ {combine_error, Tab, {snmp, Snmp}}),
+
+ %% Add empty fragments
+ CommonProps = [{base_table, Tab}],
+ Cs2 = Cs#cstruct{frag_properties = lists:sort(CommonProps)},
+ expand_frag_cstructs(N, NR, ND, NDO, Cs2, Pool, Pool, FH, Mode).
+
+verify_n_fragments(N, Cs, Mode) when is_integer(N), N >= 1 ->
+ case Mode of
+ create ->
+ Cs#cstruct{ram_copies = [],
+ disc_copies = [],
+ disc_only_copies = []};
+ activate ->
+ Reason = {combine_error, Cs#cstruct.name, {n_fragments, N}},
+ mnesia_schema:verify(1, N, Reason),
+ Cs
+ end;
+verify_n_fragments(N, Cs, _Mode) ->
+ mnesia:abort({bad_type, Cs#cstruct.name, {n_fragments, N}}).
+
+pick_props(Tab, Cs, {ForeignTab, Attr}) ->
+ mnesia_schema:verify(true, ForeignTab /= Tab,
+ {combine_error, Tab, {ForeignTab, Attr}}),
+ Props = Cs#cstruct.frag_properties,
+ Attrs = Cs#cstruct.attributes,
+
+ ForeignKey = lookup_prop(ForeignTab, foreign_key),
+ ForeignN = lookup_prop(ForeignTab, n_fragments),
+ ForeignPool = lookup_prop(ForeignTab, node_pool),
+ N = mnesia_schema:pick(Tab, n_fragments, Props, ForeignN),
+ Pool = mnesia_schema:pick(Tab, node_pool, Props, ForeignPool),
+
+ mnesia_schema:verify(ForeignN, N,
+ {combine_error, Tab, {n_fragments, N},
+ ForeignTab, {n_fragments, ForeignN}}),
+
+ mnesia_schema:verify(ForeignPool, Pool,
+ {combine_error, Tab, {node_pool, Pool},
+ ForeignTab, {node_pool, ForeignPool}}),
+
+ mnesia_schema:verify(undefined, ForeignKey,
+ {combine_error, Tab,
+ "Multiple levels of foreign_key dependencies",
+ {ForeignTab, Attr}, ForeignKey}),
+
+ Key = {ForeignTab, mnesia_schema:attr_to_pos(Attr, Attrs)},
+ DefaultNR = length(val({ForeignTab, ram_copies})),
+ DefaultND = length(val({ForeignTab, disc_copies})),
+ DefaultNDO = length(val({ForeignTab, disc_only_copies})),
+ {Key, N, Pool, DefaultNR, DefaultND, DefaultNDO};
+pick_props(Tab, Cs, undefined) ->
+ Props = Cs#cstruct.frag_properties,
+ DefaultN = 1,
+ DefaultPool = mnesia:system_info(db_nodes),
+ N = mnesia_schema:pick(Tab, n_fragments, Props, DefaultN),
+ Pool = mnesia_schema:pick(Tab, node_pool, Props, DefaultPool),
+ DefaultNR = 1,
+ DefaultND = 0,
+ DefaultNDO = 0,
+ {undefined, N, Pool, DefaultNR, DefaultND, DefaultNDO};
+pick_props(Tab, _Cs, BadKey) ->
+ mnesia:abort({bad_type, Tab, {foreign_key, BadKey}}).
+
+expand_frag_cstructs(N, NR, ND, NDO, CommonCs, Dist, Pool, FH, Mode)
+ when N > 1, Mode == create ->
+ Frag = n_to_frag_name(CommonCs#cstruct.name, N),
+ Cs = CommonCs#cstruct{name = Frag},
+ {Cs2, RevModDist, RestDist} = set_frag_nodes(NR, ND, NDO, Cs, Dist, []),
+ ModDist = lists:reverse(RevModDist),
+ Dist2 = rearrange_dist(Cs, ModDist, RestDist, Pool),
+ %% Adjusts backwards, but it doesn't matter.
+ {FH2, _FromFrags, _AdditionalWriteFrags} = adjust_before_split(FH),
+ CsList = expand_frag_cstructs(N - 1, NR, ND, NDO, CommonCs, Dist2, Pool, FH2, Mode),
+ [Cs2 | CsList];
+expand_frag_cstructs(1, NR, ND, NDO, CommonCs, Dist, Pool, FH, Mode) ->
+ BaseProps = CommonCs#cstruct.frag_properties ++
+ [{foreign_key, FH#frag_state.foreign_key},
+ {hash_module, FH#frag_state.hash_module},
+ {hash_state, FH#frag_state.hash_state},
+ {n_fragments, FH#frag_state.n_fragments},
+ {node_pool, Pool}
+ ],
+ BaseCs = CommonCs#cstruct{frag_properties = lists:sort(BaseProps)},
+ case Mode of
+ activate ->
+ [BaseCs];
+ create ->
+ {BaseCs2, _, _} = set_frag_nodes(NR, ND, NDO, BaseCs, Dist, []),
+ [BaseCs2]
+ end.
+
+set_frag_nodes(NR, ND, NDO, Cs, [Head | Tail], Acc) when NR > 0 ->
+ Pos = #cstruct.ram_copies,
+ {Cs2, Head2} = set_frag_node(Cs, Pos, Head),
+ set_frag_nodes(NR - 1, ND, NDO, Cs2, Tail, [Head2 | Acc]);
+set_frag_nodes(NR, ND, NDO, Cs, [Head | Tail], Acc) when ND > 0 ->
+ Pos = #cstruct.disc_copies,
+ {Cs2, Head2} = set_frag_node(Cs, Pos, Head),
+ set_frag_nodes(NR, ND - 1, NDO, Cs2, Tail, [Head2 | Acc]);
+set_frag_nodes(NR, ND, NDO, Cs, [Head | Tail], Acc) when NDO > 0 ->
+ Pos = #cstruct.disc_only_copies,
+ {Cs2, Head2} = set_frag_node(Cs, Pos, Head),
+ set_frag_nodes(NR, ND, NDO - 1, Cs2, Tail, [Head2 | Acc]);
+set_frag_nodes(0, 0, 0, Cs, RestDist, ModDist) ->
+ {Cs, ModDist, RestDist};
+set_frag_nodes(_, _, _, Cs, [], _) ->
+ mnesia:abort({combine_error, Cs#cstruct.name, "Too few nodes in node_pool"}).
+
+set_frag_node(Cs, Pos, Head) ->
+ Ns = element(Pos, Cs),
+ {Node, Count2} =
+ case Head of
+ {N, Count} when is_atom(N), is_integer(Count), Count >= 0 ->
+ {N, Count + 1};
+ N when is_atom(N) ->
+ {N, 1};
+ BadNode ->
+ mnesia:abort({bad_type, Cs#cstruct.name, BadNode})
+ end,
+ mnesia_schema:verify(true,
+ lists:member(Node, val({current,db_nodes})),
+ {not_active, Cs#cstruct.name, Node}),
+ Cs2 = setelement(Pos, Cs, [Node | Ns]),
+ {Cs2, {Node, Count2}}.
+
+rearrange_dist(Cs, [{Node, Count} | ModDist], Dist, Pool) ->
+ Dist2 = insert_dist(Cs, Node, Count, Dist, Pool),
+ rearrange_dist(Cs, ModDist, Dist2, Pool);
+rearrange_dist(_Cs, [], Dist, _) ->
+ Dist.
+
+insert_dist(Cs, Node, Count, [Head | Tail], Pool) ->
+ case Head of
+ {Node2, Count2} when is_atom(Node2), is_integer(Count2), Count2 >= 0 ->
+ case node_diff(Node, Count, Node2, Count2, Pool) of
+ less ->
+ [{Node, Count}, Head | Tail];
+ greater ->
+ [Head | insert_dist(Cs, Node, Count, Tail, Pool)]
+ end;
+ Node2 when is_atom(Node2) ->
+ insert_dist(Cs, Node, Count, [{Node2, 0} | Tail], Pool);
+ BadNode ->
+ mnesia:abort({bad_type, Cs#cstruct.name, BadNode})
+ end;
+insert_dist(_Cs, Node, Count, [], _Pool) ->
+ [{Node, Count}];
+insert_dist(_Cs, _Node, _Count, Dist, _Pool) ->
+ mnesia:abort({bad_type, Dist}).
+
+node_diff(_Node, Count, _Node2, Count2, _Pool) when Count < Count2 ->
+ less;
+node_diff(Node, Count, Node2, Count2, Pool) when Count == Count2 ->
+ Pos = list_pos(Node, Pool, 1),
+ Pos2 = list_pos(Node2, Pool, 1),
+ if
+ Pos < Pos2 ->
+ less;
+ Pos > Pos2 ->
+ greater
+ end;
+node_diff(_Node, Count, _Node2, Count2, _Pool) when Count > Count2 ->
+ greater.
+
+%% Returns position of element in list
+list_pos(H, [H | _T], Pos) ->
+ Pos;
+list_pos(E, [_H | T], Pos) ->
+ list_pos(E, T, Pos + 1).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Switch function for changing of table fragmentation
+%%
+%% Returns a list of lists of schema ops
+
+change_table_frag(Tab, {activate, FragProps}) ->
+ make_activate(Tab, FragProps);
+change_table_frag(Tab, deactivate) ->
+ make_deactivate(Tab);
+change_table_frag(Tab, {add_frag, SortedNodes}) ->
+ make_multi_add_frag(Tab, SortedNodes);
+change_table_frag(Tab, del_frag) ->
+ make_multi_del_frag(Tab);
+change_table_frag(Tab, {add_node, Node}) ->
+ make_multi_add_node(Tab, Node);
+change_table_frag(Tab, {del_node, Node}) ->
+ make_multi_del_node(Tab, Node);
+change_table_frag(Tab, Change) ->
+ mnesia:abort({bad_type, Tab, Change}).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Turn a normal table into a fragmented table
+%%
+%% The storage type must be the same on all nodes
+
+make_activate(Tab, Props) ->
+ Cs = mnesia_schema:incr_version(val({Tab, cstruct})),
+ mnesia_schema:ensure_active(Cs),
+ case Cs#cstruct.frag_properties of
+ [] ->
+ Cs2 = Cs#cstruct{frag_properties = Props},
+ [Cs3] = expand_cstruct(Cs2, activate),
+ TabDef = mnesia_schema:cs2list(Cs3),
+ Op = {op, change_table_frag, activate, TabDef},
+ [[Op]];
+ BadProps ->
+ mnesia:abort({already_exists, Tab, {frag_properties, BadProps}})
+ end.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Turn a table into a normal defragmented table
+
+make_deactivate(Tab) ->
+ Cs = mnesia_schema:incr_version(val({Tab, cstruct})),
+ mnesia_schema:ensure_active(Cs),
+ Foreigners = lookup_foreigners(Tab),
+ BaseTab = lookup_prop(Tab, base_table),
+ FH = lookup_frag_hash(Tab),
+ if
+ BaseTab /= Tab ->
+ mnesia:abort({combine_error, Tab, "Not a base table"});
+ Foreigners /= [] ->
+ mnesia:abort({combine_error, Tab, "Too many foreigners", Foreigners});
+ FH#frag_state.n_fragments > 1 ->
+ mnesia:abort({combine_error, Tab, "Too many fragments"});
+ true ->
+ Cs2 = Cs#cstruct{frag_properties = []},
+ TabDef = mnesia_schema:cs2list(Cs2),
+ Op = {op, change_table_frag, deactivate, TabDef},
+ [[Op]]
+ end.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Add a fragment to a fragmented table and fill it with half of
+%% the records from one of the old fragments
+
+make_multi_add_frag(Tab, SortedNs) when is_list(SortedNs) ->
+ verify_multi(Tab),
+ Ops = make_add_frag(Tab, SortedNs),
+
+ %% Propagate to foreigners
+ MoreOps = [make_add_frag(T, SortedNs) || T <- lookup_foreigners(Tab)],
+ [Ops | MoreOps];
+make_multi_add_frag(Tab, SortedNs) ->
+ mnesia:abort({bad_type, Tab, SortedNs}).
+
+verify_multi(Tab) ->
+ FH = lookup_frag_hash(Tab),
+ ForeignKey = FH#frag_state.foreign_key,
+ mnesia_schema:verify(undefined, ForeignKey,
+ {combine_error, Tab,
+ "Op only allowed via foreign table",
+ {foreign_key, ForeignKey}}).
+
+make_frag_names_and_acquire_locks(Tab, N, FragIndecies, DoNotLockN) ->
+ mnesia_schema:get_tid_ts_and_lock(Tab, write),
+ Fun = fun(Index, FN) ->
+ if
+ DoNotLockN == true, Index == N ->
+ Name = n_to_frag_name(Tab, Index),
+ setelement(Index, FN, Name);
+ true ->
+ Name = n_to_frag_name(Tab, Index),
+ mnesia_schema:get_tid_ts_and_lock(Name, write),
+ setelement(Index , FN, Name)
+ end
+ end,
+ FragNames = erlang:make_tuple(N, undefined),
+ lists:foldl(Fun, FragNames, FragIndecies).
+
+make_add_frag(Tab, SortedNs) ->
+ Cs = mnesia_schema:incr_version(val({Tab, cstruct})),
+ mnesia_schema:ensure_active(Cs),
+ FH = lookup_frag_hash(Tab),
+ {FH2, FromIndecies, WriteIndecies} = adjust_before_split(FH),
+ N = FH2#frag_state.n_fragments,
+ FragNames = make_frag_names_and_acquire_locks(Tab, N, WriteIndecies, true),
+ NewFrag = element(N, FragNames),
+
+ NR = length(Cs#cstruct.ram_copies),
+ ND = length(Cs#cstruct.disc_copies),
+ NDO = length(Cs#cstruct.disc_only_copies),
+ NewCs = Cs#cstruct{name = NewFrag,
+ frag_properties = [{base_table, Tab}],
+ ram_copies = [],
+ disc_copies = [],
+ disc_only_copies = []},
+
+ {NewCs2, _, _} = set_frag_nodes(NR, ND, NDO, NewCs, SortedNs, []),
+ [NewOp] = mnesia_schema:make_create_table(NewCs2),
+
+ SplitOps = split(Tab, FH2, FromIndecies, FragNames, []),
+
+ Cs2 = replace_frag_hash(Cs, FH2),
+ TabDef = mnesia_schema:cs2list(Cs2),
+ BaseOp = {op, change_table_frag, {add_frag, SortedNs}, TabDef},
+
+ [BaseOp, NewOp | SplitOps].
+
+replace_frag_hash(Cs, FH) when is_record(FH, frag_state) ->
+ Fun = fun(Prop) ->
+ case Prop of
+ {n_fragments, _} ->
+ {true, {n_fragments, FH#frag_state.n_fragments}};
+ {hash_module, _} ->
+ {true, {hash_module, FH#frag_state.hash_module}};
+ {hash_state, _} ->
+ {true, {hash_state, FH#frag_state.hash_state}};
+ {next_n_to_split, _} ->
+ false;
+ {n_doubles, _} ->
+ false;
+ _ ->
+ true
+ end
+ end,
+ Props = lists:zf(Fun, Cs#cstruct.frag_properties),
+ Cs#cstruct{frag_properties = Props}.
+
+%% Adjust table info before split
+adjust_before_split(FH) ->
+ HashState = FH#frag_state.hash_state,
+ {HashState2, FromFrags, AdditionalWriteFrags} =
+ case FH#frag_state.hash_module of
+ HashMod when HashMod == ?DEFAULT_HASH_MOD ->
+ ?DEFAULT_HASH_MOD:add_frag(HashState);
+ HashMod ->
+ HashMod:add_frag(HashState)
+ end,
+ N = FH#frag_state.n_fragments + 1,
+ FromFrags2 = (catch lists:sort(FromFrags)),
+ UnionFrags = (catch lists:merge(FromFrags2, lists:sort(AdditionalWriteFrags))),
+ VerifyFun = fun(F) when is_integer(F), F >= 1, F =< N -> false;
+ (_F) -> true
+ end,
+ case catch lists:filter(VerifyFun, UnionFrags) of
+ [] ->
+ FH2 = FH#frag_state{n_fragments = N,
+ hash_state = HashState2},
+ {FH2, FromFrags2, UnionFrags};
+ BadFrags ->
+ mnesia:abort({"add_frag: Fragment numbers out of range",
+ BadFrags, {range, 1, N}})
+ end.
+
+split(Tab, FH, [SplitN | SplitNs], FragNames, Ops) ->
+ SplitFrag = element(SplitN, FragNames),
+ Pat = mnesia:table_info(SplitFrag, wild_pattern),
+ {_Mod, Tid, Ts} = mnesia_schema:get_tid_ts_and_lock(Tab, none),
+ Recs = mnesia:match_object(Tid, Ts, SplitFrag, Pat, read),
+ Ops2 = do_split(FH, SplitN, FragNames, Recs, Ops),
+ split(Tab, FH, SplitNs, FragNames, Ops2);
+split(_Tab, _FH, [], _FragNames, Ops) ->
+ Ops.
+
+%% Perform the split of the table
+do_split(FH, OldN, FragNames, [Rec | Recs], Ops) ->
+ Pos = key_pos(FH),
+ HashKey = element(Pos, Rec),
+ case key_to_n(FH, HashKey) of
+ NewN when NewN == OldN ->
+ %% Keep record in the same fragment. No need to move it.
+ do_split(FH, OldN, FragNames, Recs, Ops);
+ NewN ->
+ case element(NewN, FragNames) of
+ NewFrag when NewFrag /= undefined ->
+ OldFrag = element(OldN, FragNames),
+ Key = element(2, Rec),
+ NewOid = {NewFrag, Key},
+ OldOid = {OldFrag, Key},
+ Ops2 = [{op, rec, unknown, {NewOid, [Rec], write}},
+ {op, rec, unknown, {OldOid, [OldOid], delete}} | Ops],
+ do_split(FH, OldN, FragNames, Recs, Ops2);
+ _NewFrag ->
+ %% Tried to move record to fragment that not is locked
+ mnesia:abort({"add_frag: Fragment not locked", NewN})
+ end
+ end;
+do_split(_FH, _OldN, _FragNames, [], Ops) ->
+ Ops.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Delete a fragment from a fragmented table
+%% and merge its records with an other fragment
+
+make_multi_del_frag(Tab) ->
+ verify_multi(Tab),
+ Ops = make_del_frag(Tab),
+
+ %% Propagate to foreigners
+ MoreOps = [make_del_frag(T) || T <- lookup_foreigners(Tab)],
+ [Ops | MoreOps].
+
+make_del_frag(Tab) ->
+ FH = lookup_frag_hash(Tab),
+ case FH#frag_state.n_fragments of
+ N when N > 1 ->
+ Cs = mnesia_schema:incr_version(val({Tab, cstruct})),
+ mnesia_schema:ensure_active(Cs),
+ {FH2, FromIndecies, WriteIndecies} = adjust_before_merge(FH),
+ FragNames = make_frag_names_and_acquire_locks(Tab, N, WriteIndecies, false),
+
+ MergeOps = merge(Tab, FH2, FromIndecies, FragNames, []),
+ LastFrag = element(N, FragNames),
+ [LastOp] = mnesia_schema:make_delete_table(LastFrag, single_frag),
+ Cs2 = replace_frag_hash(Cs, FH2),
+ TabDef = mnesia_schema:cs2list(Cs2),
+ BaseOp = {op, change_table_frag, del_frag, TabDef},
+ [BaseOp, LastOp | MergeOps];
+ _ ->
+ %% Cannot remove the last fragment
+ mnesia:abort({no_exists, Tab})
+ end.
+
+%% Adjust tab info before merge
+adjust_before_merge(FH) ->
+ HashState = FH#frag_state.hash_state,
+ {HashState2, FromFrags, AdditionalWriteFrags} =
+ case FH#frag_state.hash_module of
+ HashMod when HashMod == ?DEFAULT_HASH_MOD ->
+ ?DEFAULT_HASH_MOD:del_frag(HashState);
+ HashMod ->
+ HashMod:del_frag(HashState)
+ end,
+ N = FH#frag_state.n_fragments,
+ FromFrags2 = (catch lists:sort(FromFrags)),
+ UnionFrags = (catch lists:merge(FromFrags2, lists:sort(AdditionalWriteFrags))),
+ VerifyFun = fun(F) when is_integer(F), F >= 1, F =< N -> false;
+ (_F) -> true
+ end,
+ case catch lists:filter(VerifyFun, UnionFrags) of
+ [] ->
+ case lists:member(N, FromFrags2) of
+ true ->
+ FH2 = FH#frag_state{n_fragments = N - 1,
+ hash_state = HashState2},
+ {FH2, FromFrags2, UnionFrags};
+ false ->
+ mnesia:abort({"del_frag: Last fragment number not included", N})
+ end;
+ BadFrags ->
+ mnesia:abort({"del_frag: Fragment numbers out of range",
+ BadFrags, {range, 1, N}})
+ end.
+
+merge(Tab, FH, [FromN | FromNs], FragNames, Ops) ->
+ FromFrag = element(FromN, FragNames),
+ Pat = mnesia:table_info(FromFrag, wild_pattern),
+ {_Mod, Tid, Ts} = mnesia_schema:get_tid_ts_and_lock(Tab, none),
+ Recs = mnesia:match_object(Tid, Ts, FromFrag, Pat, read),
+ Ops2 = do_merge(FH, FromN, FragNames, Recs, Ops),
+ merge(Tab, FH, FromNs, FragNames, Ops2);
+merge(_Tab, _FH, [], _FragNames, Ops) ->
+ Ops.
+
+%% Perform the merge of the table
+do_merge(FH, OldN, FragNames, [Rec | Recs], Ops) ->
+ Pos = key_pos(FH),
+ LastN = FH#frag_state.n_fragments + 1,
+ HashKey = element(Pos, Rec),
+ case key_to_n(FH, HashKey) of
+ NewN when NewN == LastN ->
+ %% Tried to leave a record in the fragment that is to be deleted
+ mnesia:abort({"del_frag: Fragment number out of range",
+ NewN, {range, 1, LastN}});
+ NewN when NewN == OldN ->
+ %% Keep record in the same fragment. No need to move it.
+ do_merge(FH, OldN, FragNames, Recs, Ops);
+ NewN when OldN == LastN ->
+ %% Move record from the fragment that is to be deleted
+ %% No need to create a delete op for each record.
+ case element(NewN, FragNames) of
+ NewFrag when NewFrag /= undefined ->
+ Key = element(2, Rec),
+ NewOid = {NewFrag, Key},
+ Ops2 = [{op, rec, unknown, {NewOid, [Rec], write}} | Ops],
+ do_merge(FH, OldN, FragNames, Recs, Ops2);
+ _NewFrag ->
+ %% Tried to move record to fragment that not is locked
+ mnesia:abort({"del_frag: Fragment not locked", NewN})
+ end;
+ NewN ->
+ case element(NewN, FragNames) of
+ NewFrag when NewFrag /= undefined ->
+ OldFrag = element(OldN, FragNames),
+ Key = element(2, Rec),
+ NewOid = {NewFrag, Key},
+ OldOid = {OldFrag, Key},
+ Ops2 = [{op, rec, unknown, {NewOid, [Rec], write}},
+ {op, rec, unknown, {OldOid, [OldOid], delete}} | Ops],
+ do_merge(FH, OldN, FragNames, Recs, Ops2);
+ _NewFrag ->
+ %% Tried to move record to fragment that not is locked
+ mnesia:abort({"del_frag: Fragment not locked", NewN})
+ end
+ end;
+ do_merge(_FH, _OldN, _FragNames, [], Ops) ->
+ Ops.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Add a node to the node pool of a fragmented table
+
+make_multi_add_node(Tab, Node) ->
+ verify_multi(Tab),
+ Ops = make_add_node(Tab, Node),
+
+ %% Propagate to foreigners
+ MoreOps = [make_add_node(T, Node) || T <- lookup_foreigners(Tab)],
+ [Ops | MoreOps].
+
+make_add_node(Tab, Node) when is_atom(Node) ->
+ Pool = lookup_prop(Tab, node_pool),
+ case lists:member(Node, Pool) of
+ false ->
+ Cs = mnesia_schema:incr_version(val({Tab, cstruct})),
+ Pool2 = Pool ++ [Node],
+ Props = Cs#cstruct.frag_properties,
+ Props2 = lists:keyreplace(node_pool, 1, Props, {node_pool, Pool2}),
+ Cs2 = Cs#cstruct{frag_properties = Props2},
+ TabDef = mnesia_schema:cs2list(Cs2),
+ Op = {op, change_table_frag, {add_node, Node}, TabDef},
+ [Op];
+ true ->
+ mnesia:abort({already_exists, Tab, Node})
+ end;
+make_add_node(Tab, Node) ->
+ mnesia:abort({bad_type, Tab, Node}).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Delet a node from the node pool of a fragmented table
+
+make_multi_del_node(Tab, Node) ->
+ verify_multi(Tab),
+ Ops = make_del_node(Tab, Node),
+
+ %% Propagate to foreigners
+ MoreOps = [make_del_node(T, Node) || T <- lookup_foreigners(Tab)],
+ [Ops | MoreOps].
+
+make_del_node(Tab, Node) when is_atom(Node) ->
+ Cs = mnesia_schema:incr_version(val({Tab, cstruct})),
+ mnesia_schema:ensure_active(Cs),
+ Pool = lookup_prop(Tab, node_pool),
+ case lists:member(Node, Pool) of
+ true ->
+ Pool2 = Pool -- [Node],
+ Props = lists:keyreplace(node_pool, 1, Cs#cstruct.frag_properties, {node_pool, Pool2}),
+ Cs2 = Cs#cstruct{frag_properties = Props},
+ TabDef = mnesia_schema:cs2list(Cs2),
+ Op = {op, change_table_frag, {del_node, Node}, TabDef},
+ [Op];
+ false ->
+ mnesia:abort({no_exists, Tab, Node})
+ end;
+make_del_node(Tab, Node) ->
+ mnesia:abort({bad_type, Tab, Node}).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Special case used to remove all references to a node during
+%% mnesia:del_table_copy(schema, Node)
+
+remove_node(Node, Cs) ->
+ Tab = Cs#cstruct.name,
+ case ?catch_val({Tab, frag_hash}) of
+ {'EXIT', _} ->
+ {Cs, false};
+ _ ->
+ Pool = lookup_prop(Tab, node_pool),
+ case lists:member(Node, Pool) of
+ true ->
+ Pool2 = Pool -- [Node],
+ Props = lists:keyreplace(node_pool, 1,
+ Cs#cstruct.frag_properties,
+ {node_pool, Pool2}),
+ {Cs#cstruct{frag_properties = Props}, true};
+ false ->
+ {Cs, false}
+ end
+ end.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Helpers
+
+val(Var) ->
+ case ?catch_val(Var) of
+ {'EXIT', Reason} -> mnesia_lib:other_val(Var, Reason);
+ Value -> Value
+ end.
+
+set_frag_hash(Tab, Props) ->
+ case props_to_frag_hash(Tab, Props) of
+ FH when is_record(FH, frag_state) ->
+ mnesia_lib:set({Tab, frag_hash}, FH);
+ no_hash ->
+ mnesia_lib:unset({Tab, frag_hash})
+ end.
+
+props_to_frag_hash(_Tab, []) ->
+ no_hash;
+props_to_frag_hash(Tab, Props) ->
+ case mnesia_schema:pick(Tab, base_table, Props, undefined) of
+ T when T == Tab ->
+ Foreign = mnesia_schema:pick(Tab, foreign_key, Props, must),
+ N = mnesia_schema:pick(Tab, n_fragments, Props, must),
+
+ case mnesia_schema:pick(Tab, hash_module, Props, undefined) of
+ undefined ->
+ Split = mnesia_schema:pick(Tab, next_n_to_split, Props, must),
+ Doubles = mnesia_schema:pick(Tab, n_doubles, Props, must),
+ FH = {frag_hash, Foreign, N, Split, Doubles},
+ HashState = ?OLD_HASH_MOD:init_state(Tab, FH),
+ #frag_state{foreign_key = Foreign,
+ n_fragments = N,
+ hash_module = ?OLD_HASH_MOD,
+ hash_state = HashState};
+ HashMod ->
+ HashState = mnesia_schema:pick(Tab, hash_state, Props, must),
+ #frag_state{foreign_key = Foreign,
+ n_fragments = N,
+ hash_module = HashMod,
+ hash_state = HashState}
+ %% Old style. Kept for backwards compatibility.
+ end;
+ _ ->
+ no_hash
+ end.
+
+lookup_prop(Tab, Prop) ->
+ Props = val({Tab, frag_properties}),
+ case lists:keysearch(Prop, 1, Props) of
+ {value, {Prop, Val}} ->
+ Val;
+ false ->
+ mnesia:abort({no_exists, Tab, Prop, {frag_properties, Props}})
+ end.
+
+lookup_frag_hash(Tab) ->
+ case ?catch_val({Tab, frag_hash}) of
+ FH when is_record(FH, frag_state) ->
+ FH;
+ {frag_hash, K, N, _S, _D} = FH ->
+ %% Old style. Kept for backwards compatibility.
+ HashState = ?OLD_HASH_MOD:init_state(Tab, FH),
+ #frag_state{foreign_key = K,
+ n_fragments = N,
+ hash_module = ?OLD_HASH_MOD,
+ hash_state = HashState};
+ {'EXIT', _} ->
+ mnesia:abort({no_exists, Tab, frag_properties, frag_hash})
+ end.
+
+%% Returns a list of tables
+lookup_foreigners(Tab) ->
+ %% First field in HashPat is either frag_hash or frag_state
+ HashPat = {'_', {Tab, '_'}, '_', '_', '_'},
+ [T || [T] <- ?ets_match(mnesia_gvar, {{'$1', frag_hash}, HashPat})].
+
+%% Returns name of fragment table
+record_to_frag_name(Tab, Rec) ->
+ case ?catch_val({Tab, frag_hash}) of
+ {'EXIT', _} ->
+ Tab;
+ FH ->
+ Pos = key_pos(FH),
+ Key = element(Pos, Rec),
+ N = key_to_n(FH, Key),
+ n_to_frag_name(Tab, N)
+ end.
+
+key_pos(FH) ->
+ case FH#frag_state.foreign_key of
+ undefined ->
+ 2;
+ {_ForeignTab, Pos} ->
+ Pos
+ end.
+
+%% Returns name of fragment table
+key_to_frag_name({BaseTab, _} = Tab, Key) ->
+ N = key_to_frag_number(Tab, Key),
+ n_to_frag_name(BaseTab, N);
+key_to_frag_name(Tab, Key) ->
+ N = key_to_frag_number(Tab, Key),
+ n_to_frag_name(Tab, N).
+
+%% Returns name of fragment table
+n_to_frag_name(Tab, 1) ->
+ Tab;
+n_to_frag_name(Tab, N) when is_atom(Tab), is_integer(N) ->
+ list_to_atom(atom_to_list(Tab) ++ "_frag" ++ integer_to_list(N));
+n_to_frag_name(Tab, N) ->
+ mnesia:abort({bad_type, Tab, N}).
+
+%% Returns name of fragment table
+key_to_frag_number({Tab, ForeignKey}, _Key) ->
+ FH = val({Tab, frag_hash}),
+ case FH#frag_state.foreign_key of
+ {_ForeignTab, _Pos} ->
+ key_to_n(FH, ForeignKey);
+ undefined ->
+ mnesia:abort({combine_error, Tab, frag_properties,
+ {foreign_key, undefined}})
+ end;
+key_to_frag_number(Tab, Key) ->
+ case ?catch_val({Tab, frag_hash}) of
+ {'EXIT', _} ->
+ 1;
+ FH ->
+ key_to_n(FH, Key)
+ end.
+
+%% Returns fragment number
+key_to_n(FH, Key) ->
+ HashState = FH#frag_state.hash_state,
+ N =
+ case FH#frag_state.hash_module of
+ HashMod when HashMod == ?DEFAULT_HASH_MOD ->
+ ?DEFAULT_HASH_MOD:key_to_frag_number(HashState, Key);
+ HashMod ->
+ HashMod:key_to_frag_number(HashState, Key)
+ end,
+ if
+ is_integer(N), N >= 1, N =< FH#frag_state.n_fragments ->
+ N;
+ true ->
+ mnesia:abort({"key_to_frag_number: Fragment number out of range",
+ N, {range, 1, FH#frag_state.n_fragments}})
+ end.
+
+%% Returns a list of frament table names
+frag_names(Tab) ->
+ case ?catch_val({Tab, frag_hash}) of
+ {'EXIT', _} ->
+ [Tab];
+ FH ->
+ N = FH#frag_state.n_fragments,
+ frag_names(Tab, N, [])
+ end.
+
+frag_names(Tab, 1, Acc) ->
+ [Tab | Acc];
+frag_names(Tab, N, Acc) ->
+ Frag = n_to_frag_name(Tab, N),
+ frag_names(Tab, N - 1, [Frag | Acc]).
+
+%% Returns a list of {Node, FragCount} tuples
+%% sorted on FragCounts
+frag_dist(Tab) ->
+ Pool = lookup_prop(Tab, node_pool),
+ Dist = [{good, Node, 0} || Node <- Pool],
+ Dist2 = count_frag(frag_names(Tab), Dist),
+ sort_dist(Dist2).
+
+count_frag([Frag | Frags], Dist) ->
+ Dist2 = incr_nodes(val({Frag, ram_copies}), Dist),
+ Dist3 = incr_nodes(val({Frag, disc_copies}), Dist2),
+ Dist4 = incr_nodes(val({Frag, disc_only_copies}), Dist3),
+ count_frag(Frags, Dist4);
+count_frag([], Dist) ->
+ Dist.
+
+incr_nodes([Node | Nodes], Dist) ->
+ Dist2 = incr_node(Node, Dist),
+ incr_nodes(Nodes, Dist2);
+incr_nodes([], Dist) ->
+ Dist.
+
+incr_node(Node, [{Kind, Node, Count} | Tail]) ->
+ [{Kind, Node, Count + 1} | Tail];
+incr_node(Node, [Head | Tail]) ->
+ [Head | incr_node(Node, Tail)];
+incr_node(Node, []) ->
+ [{bad, Node, 1}].
+
+%% Sorts dist according in decreasing count order
+sort_dist(Dist) ->
+ Dist2 = deep_dist(Dist, []),
+ Dist3 = lists:keysort(1, Dist2),
+ shallow_dist(Dist3).
+
+deep_dist([Head | Tail], Deep) ->
+ {Kind, _Node, Count} = Head,
+ {Tag, Same, Other} = pick_count(Kind, Count, [Head | Tail]),
+ deep_dist(Other, [{Tag, Same} | Deep]);
+deep_dist([], Deep) ->
+ Deep.
+
+pick_count(Kind, Count, [{Kind2, Node2, Count2} | Tail]) ->
+ Head = {Node2, Count2},
+ {_, Same, Other} = pick_count(Kind, Count, Tail),
+ if
+ Kind == bad ->
+ {bad, [Head | Same], Other};
+ Kind2 == bad ->
+ {Count, Same, [{Kind2, Node2, Count2} | Other]};
+ Count == Count2 ->
+ {Count, [Head | Same], Other};
+ true ->
+ {Count, Same, [{Kind2, Node2, Count2} | Other]}
+ end;
+pick_count(_Kind, Count, []) ->
+ {Count, [], []}.
+
+shallow_dist([{_Tag, Shallow} | Deep]) ->
+ Shallow ++ shallow_dist(Deep);
+shallow_dist([]) ->
+ [].
diff --git a/lib/mnesia/src/mnesia_frag_hash.erl b/lib/mnesia/src/mnesia_frag_hash.erl
new file mode 100644
index 0000000000..610ba2535c
--- /dev/null
+++ b/lib/mnesia/src/mnesia_frag_hash.erl
@@ -0,0 +1,151 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 2002-2009. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%
+
+%%
+%%%----------------------------------------------------------------------
+%%% Purpose : Implements hashing functionality for fragmented tables
+%%%----------------------------------------------------------------------
+
+%header_doc_include
+-module(mnesia_frag_hash).
+
+%% Fragmented Table Hashing callback functions
+-export([
+ init_state/2,
+ add_frag/1,
+ del_frag/1,
+ key_to_frag_number/2,
+ match_spec_to_frag_numbers/2
+ ]).
+
+%header_doc_include
+%%-behaviour(mnesia_frag_hash).
+
+%impl_doc_include
+-record(hash_state,
+ {n_fragments,
+ next_n_to_split,
+ n_doubles,
+ function}).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+init_state(_Tab, State) when State == undefined ->
+ #hash_state{n_fragments = 1,
+ next_n_to_split = 1,
+ n_doubles = 0,
+ function = phash2}.
+
+convert_old_state({hash_state, N, P, L}) ->
+ #hash_state{n_fragments = N,
+ next_n_to_split = P,
+ n_doubles = L,
+ function = phash}.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+add_frag(#hash_state{next_n_to_split = SplitN, n_doubles = L, n_fragments = N} = State) ->
+ P = SplitN + 1,
+ NewN = N + 1,
+ State2 = case power2(L) + 1 of
+ P2 when P2 == P ->
+ State#hash_state{n_fragments = NewN,
+ n_doubles = L + 1,
+ next_n_to_split = 1};
+ _ ->
+ State#hash_state{n_fragments = NewN,
+ next_n_to_split = P}
+ end,
+ {State2, [SplitN], [NewN]};
+add_frag(OldState) ->
+ State = convert_old_state(OldState),
+ add_frag(State).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+del_frag(#hash_state{next_n_to_split = SplitN, n_doubles = L, n_fragments = N} = State) ->
+ P = SplitN - 1,
+ if
+ P < 1 ->
+ L2 = L - 1,
+ MergeN = power2(L2),
+ State2 = State#hash_state{n_fragments = N - 1,
+ next_n_to_split = MergeN,
+ n_doubles = L2},
+ {State2, [N], [MergeN]};
+ true ->
+ MergeN = P,
+ State2 = State#hash_state{n_fragments = N - 1,
+ next_n_to_split = MergeN},
+ {State2, [N], [MergeN]}
+ end;
+del_frag(OldState) ->
+ State = convert_old_state(OldState),
+ del_frag(State).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+key_to_frag_number(#hash_state{function = phash, next_n_to_split = SplitN, n_doubles = L}, Key) ->
+ P = SplitN,
+ A = erlang:phash(Key, power2(L)),
+ if
+ A < P ->
+ erlang:phash(Key, power2(L + 1));
+ true ->
+ A
+ end;
+key_to_frag_number(#hash_state{function = phash2, next_n_to_split = SplitN, n_doubles = L}, Key) ->
+ P = SplitN,
+ A = erlang:phash2(Key, power2(L)) + 1,
+ if
+ A < P ->
+ erlang:phash2(Key, power2(L + 1)) + 1;
+ true ->
+ A
+ end;
+key_to_frag_number(OldState, Key) ->
+ State = convert_old_state(OldState),
+ key_to_frag_number(State, Key).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+match_spec_to_frag_numbers(#hash_state{n_fragments = N} = State, MatchSpec) ->
+ case MatchSpec of
+ [{HeadPat, _, _}] when is_tuple(HeadPat), tuple_size(HeadPat) > 2 ->
+ KeyPat = element(2, HeadPat),
+ case has_var(KeyPat) of
+ false ->
+ [key_to_frag_number(State, KeyPat)];
+ true ->
+ lists:seq(1, N)
+ end;
+ _ ->
+ lists:seq(1, N)
+ end;
+match_spec_to_frag_numbers(OldState, MatchSpec) ->
+ State = convert_old_state(OldState),
+ match_spec_to_frag_numbers(State, MatchSpec).
+
+power2(Y) ->
+ 1 bsl Y. % trunc(math:pow(2, Y)).
+
+%impl_doc_include
+
+has_var(Pat) ->
+ mnesia:has_var(Pat).
diff --git a/lib/mnesia/src/mnesia_frag_old_hash.erl b/lib/mnesia/src/mnesia_frag_old_hash.erl
new file mode 100644
index 0000000000..817bb54eb1
--- /dev/null
+++ b/lib/mnesia/src/mnesia_frag_old_hash.erl
@@ -0,0 +1,132 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 2002-2009. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%
+
+%%
+%%%----------------------------------------------------------------------
+%%% Purpose : Implements hashing functionality for fragmented tables
+%%%----------------------------------------------------------------------
+
+-module(mnesia_frag_old_hash).
+%%-behaviour(mnesia_frag_hash).
+
+-compile({nowarn_deprecated_function, {erlang,hash,2}}).
+
+%% Hashing callback functions
+-export([
+ init_state/2,
+ add_frag/1,
+ del_frag/1,
+ key_to_frag_number/2,
+ match_spec_to_frag_numbers/2
+ ]).
+
+-record(old_hash_state,
+ {n_fragments,
+ next_n_to_split,
+ n_doubles}).
+
+%% Old style. Kept for backwards compatibility.
+-record(frag_hash,
+ {foreign_key,
+ n_fragments,
+ next_n_to_split,
+ n_doubles}).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+init_state(_Tab, InitialState) when InitialState == undefined ->
+ #old_hash_state{n_fragments = 1,
+ next_n_to_split = 1,
+ n_doubles = 0};
+init_state(_Tab, FH) when is_record(FH, frag_hash) ->
+ %% Old style. Kept for backwards compatibility.
+ #old_hash_state{n_fragments = FH#frag_hash.n_fragments,
+ next_n_to_split = FH#frag_hash.next_n_to_split,
+ n_doubles = FH#frag_hash.n_doubles}.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+add_frag(State) when is_record(State, old_hash_state) ->
+ SplitN = State#old_hash_state.next_n_to_split,
+ P = SplitN + 1,
+ L = State#old_hash_state.n_doubles,
+ NewN = State#old_hash_state.n_fragments + 1,
+ State2 = case trunc(math:pow(2, L)) + 1 of
+ P2 when P2 == P ->
+ State#old_hash_state{n_fragments = NewN,
+ next_n_to_split = 1,
+ n_doubles = L + 1};
+ _ ->
+ State#old_hash_state{n_fragments = NewN,
+ next_n_to_split = P}
+ end,
+ {State2, [SplitN], [NewN]}.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+del_frag(State) when is_record(State, old_hash_state) ->
+ P = State#old_hash_state.next_n_to_split - 1,
+ L = State#old_hash_state.n_doubles,
+ N = State#old_hash_state.n_fragments,
+ if
+ P < 1 ->
+ L2 = L - 1,
+ MergeN = trunc(math:pow(2, L2)),
+ State2 = State#old_hash_state{n_fragments = N - 1,
+ next_n_to_split = MergeN,
+ n_doubles = L2},
+ {State2, [N], [MergeN]};
+ true ->
+ MergeN = P,
+ State2 = State#old_hash_state{n_fragments = N - 1,
+ next_n_to_split = MergeN},
+ {State2, [N], [MergeN]}
+ end.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+key_to_frag_number(State, Key) when is_record(State, old_hash_state) ->
+ L = State#old_hash_state.n_doubles,
+ A = erlang:hash(Key, trunc(math:pow(2, L))),
+ P = State#old_hash_state.next_n_to_split,
+ if
+ A < P ->
+ erlang:hash(Key, trunc(math:pow(2, L + 1)));
+ true ->
+ A
+ end.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+match_spec_to_frag_numbers(State, MatchSpec) when is_record(State, old_hash_state) ->
+ case MatchSpec of
+ [{HeadPat, _, _}] when is_tuple(HeadPat), tuple_size(HeadPat) > 2 ->
+ KeyPat = element(2, HeadPat),
+ case has_var(KeyPat) of
+ false ->
+ [key_to_frag_number(State, KeyPat)];
+ true ->
+ lists:seq(1, State#old_hash_state.n_fragments)
+ end;
+ _ ->
+ lists:seq(1, State#old_hash_state.n_fragments)
+ end.
+
+has_var(Pat) ->
+ mnesia:has_var(Pat).
diff --git a/lib/mnesia/src/mnesia_index.erl b/lib/mnesia/src/mnesia_index.erl
new file mode 100644
index 0000000000..4e6e8a997c
--- /dev/null
+++ b/lib/mnesia/src/mnesia_index.erl
@@ -0,0 +1,384 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 1996-2009. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%
+
+%%
+%% Purpose: Handles index functionality in mnesia
+
+-module(mnesia_index).
+-export([read/5,
+ add_index/5,
+ delete_index/3,
+ del_object_index/5,
+ clear_index/4,
+ dirty_match_object/3,
+ dirty_select/3,
+ dirty_read/3,
+ dirty_read2/3,
+
+ db_put/2,
+ db_get/2,
+ db_match_erase/2,
+ get_index_table/2,
+ get_index_table/3,
+
+ tab2filename/2,
+ tab2tmp_filename/2,
+ init_index/2,
+ init_indecies/3,
+ del_transient/2,
+ del_transient/3,
+ del_index_table/3]).
+
+-import(mnesia_lib, [verbose/2]).
+-include("mnesia.hrl").
+
+-record(index, {setorbag, pos_list}).
+
+val(Var) ->
+ case ?catch_val(Var) of
+ {'EXIT', _ReASoN_} -> mnesia_lib:other_val(Var, _ReASoN_);
+ _VaLuE_ -> _VaLuE_
+ end.
+
+%% read an object list throuh its index table
+%% we assume that table Tab has index on attribute number Pos
+
+read(Tid, Store, Tab, IxKey, Pos) ->
+ ResList = mnesia_locker:ixrlock(Tid, Store, Tab, IxKey, Pos),
+ %% Remove all tuples which don't include Ixkey, happens when Tab is a bag
+ case val({Tab, setorbag}) of
+ bag ->
+ mnesia_lib:key_search_all(IxKey, Pos, ResList);
+ _ ->
+ ResList
+ end.
+
+add_index(Index, Tab, Key, Obj, Old) ->
+ add_index2(Index#index.pos_list, Index#index.setorbag, Tab, Key, Obj, Old).
+
+add_index2([{Pos, Ixt} |Tail], bag, Tab, K, Obj, OldRecs) ->
+ db_put(Ixt, {element(Pos, Obj), K}),
+ add_index2(Tail, bag, Tab, K, Obj, OldRecs);
+add_index2([{Pos, Ixt} |Tail], Type, Tab, K, Obj, OldRecs) ->
+ %% Remove old tuples in index if Tab is updated
+ case OldRecs of
+ undefined ->
+ Old = mnesia_lib:db_get(Tab, K),
+ del_ixes(Ixt, Old, Pos, K);
+ Old ->
+ del_ixes(Ixt, Old, Pos, K)
+ end,
+ db_put(Ixt, {element(Pos, Obj), K}),
+ add_index2(Tail, Type, Tab, K, Obj, OldRecs);
+add_index2([], _, _Tab, _K, _Obj, _) -> ok.
+
+delete_index(Index, Tab, K) ->
+ delete_index2(Index#index.pos_list, Tab, K).
+
+delete_index2([{Pos, Ixt} | Tail], Tab, K) ->
+ DelObjs = mnesia_lib:db_get(Tab, K),
+ del_ixes(Ixt, DelObjs, Pos, K),
+ delete_index2(Tail, Tab, K);
+delete_index2([], _Tab, _K) -> ok.
+
+
+del_ixes(_Ixt, [], _Pos, _L) -> ok;
+del_ixes(Ixt, [Obj | Tail], Pos, Key) ->
+ db_match_erase(Ixt, {element(Pos, Obj), Key}),
+ del_ixes(Ixt, Tail, Pos, Key).
+
+del_object_index(Index, Tab, K, Obj, Old) ->
+ del_object_index2(Index#index.pos_list, Index#index.setorbag, Tab, K, Obj, Old).
+
+del_object_index2([], _, _Tab, _K, _Obj, _Old) -> ok;
+del_object_index2([{Pos, Ixt} | Tail], SoB, Tab, K, Obj, Old) ->
+ case SoB of
+ bag ->
+ del_object_bag(Tab, K, Obj, Pos, Ixt, Old);
+ _ -> %% If set remove the tuple in index table
+ del_ixes(Ixt, [Obj], Pos, K)
+ end,
+ del_object_index2(Tail, SoB, Tab, K, Obj, Old).
+
+del_object_bag(Tab, Key, Obj, Pos, Ixt, undefined) ->
+ IxKey = element(Pos, Obj),
+ Old = [X || X <- mnesia_lib:db_get(Tab, Key), element(Pos, X) =:= IxKey],
+ del_object_bag(Tab, Key, Obj, Pos, Ixt, Old);
+%% If Tab type is bag we need remove index identifier if Tab
+%% contains less than 2 elements.
+del_object_bag(_Tab, Key, Obj, Pos, Ixt, Old) when length(Old) < 2 ->
+ del_ixes(Ixt, [Obj], Pos, Key);
+del_object_bag(_Tab, _Key, _Obj, _Pos, _Ixt, _Old) -> ok.
+
+clear_index(Index, Tab, K, Obj) ->
+ clear_index2(Index#index.pos_list, Tab, K, Obj).
+
+clear_index2([], _Tab, _K, _Obj) -> ok;
+clear_index2([{_Pos, Ixt} | Tail], Tab, K, Obj) ->
+ db_match_erase(Ixt, Obj),
+ clear_index2(Tail, Tab, K, Obj).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+dirty_match_object(Tab, Pat, Pos) ->
+ %% Assume that we are on the node where the replica is
+ case element(2, Pat) of
+ '_' ->
+ IxKey = element(Pos, Pat),
+ RealKeys = realkeys(Tab, Pos, IxKey),
+ merge(RealKeys, Tab, Pat, []);
+ _Else ->
+ mnesia_lib:db_match_object(Tab, Pat)
+ end.
+
+merge([{_IxKey, RealKey} | Tail], Tab, Pat, Ack) ->
+ %% Assume that we are on the node where the replica is
+ Pat2 = setelement(2, Pat, RealKey),
+ Recs = mnesia_lib:db_match_object(Tab, Pat2),
+ merge(Tail, Tab, Pat, Recs ++ Ack);
+merge([], _, _, Ack) ->
+ Ack.
+
+realkeys(Tab, Pos, IxKey) ->
+ Index = get_index_table(Tab, Pos),
+ db_get(Index, IxKey). % a list on the form [{IxKey, RealKey1} , ....
+
+dirty_select(Tab, Spec, Pos) ->
+ %% Assume that we are on the node where the replica is
+ %% Returns the records without applying the match spec
+ %% The actual filtering is handled by the caller
+ IxKey = element(Pos, Spec),
+ RealKeys = realkeys(Tab, Pos, IxKey),
+ StorageType = val({Tab, storage_type}),
+ lists:append([mnesia_lib:db_get(StorageType, Tab, Key) || {_,Key} <- RealKeys]).
+
+dirty_read(Tab, IxKey, Pos) ->
+ ResList = mnesia:dirty_rpc(Tab, ?MODULE, dirty_read2,
+ [Tab, IxKey, Pos]),
+ case val({Tab, setorbag}) of
+ bag ->
+ %% Remove all tuples which don't include Ixkey
+ mnesia_lib:key_search_all(IxKey, Pos, ResList);
+ _ ->
+ ResList
+ end.
+
+dirty_read2(Tab, IxKey, Pos) ->
+ Ix = get_index_table(Tab, Pos),
+ Keys = db_match(Ix, {IxKey, '$1'}),
+ r_keys(Keys, Tab, []).
+
+r_keys([[H]|T],Tab,Ack) ->
+ V = mnesia_lib:db_get(Tab, H),
+ r_keys(T, Tab, V ++ Ack);
+r_keys([], _, Ack) ->
+ Ack.
+
+
+%%%%%%% Creation, Init and deletion routines for index tables
+%% We can have several indexes on the same table
+%% this can be a fairly costly operation if table is *very* large
+
+tab2filename(Tab, Pos) ->
+ mnesia_lib:dir(Tab) ++ "_" ++ integer_to_list(Pos) ++ ".DAT".
+
+tab2tmp_filename(Tab, Pos) ->
+ mnesia_lib:dir(Tab) ++ "_" ++ integer_to_list(Pos) ++ ".TMP".
+
+init_index(Tab, Storage) ->
+ PosList = val({Tab, index}),
+ init_indecies(Tab, Storage, PosList).
+
+init_indecies(Tab, Storage, PosList) ->
+ case Storage of
+ unknown ->
+ ignore;
+ disc_only_copies ->
+ init_disc_index(Tab, PosList);
+ ram_copies ->
+ make_ram_index(Tab, PosList);
+ disc_copies ->
+ make_ram_index(Tab, PosList)
+ end.
+
+%% works for both ram and disc indexes
+
+del_index_table(_, unknown, _) ->
+ ignore;
+del_index_table(Tab, Storage, Pos) ->
+ delete_transient_index(Tab, Pos, Storage),
+ mnesia_lib:del({Tab, index}, Pos).
+
+del_transient(Tab, Storage) ->
+ PosList = val({Tab, index}),
+ del_transient(Tab, PosList, Storage).
+
+del_transient(_, [], _) -> done;
+del_transient(Tab, [Pos | Tail], Storage) ->
+ delete_transient_index(Tab, Pos, Storage),
+ del_transient(Tab, Tail, Storage).
+
+delete_transient_index(Tab, Pos, disc_only_copies) ->
+ Tag = {Tab, index, Pos},
+ mnesia_monitor:unsafe_close_dets(Tag),
+ file:delete(tab2filename(Tab, Pos)),
+ del_index_info(Tab, Pos), %% Uses val(..)
+ mnesia_lib:unset({Tab, {index, Pos}});
+
+delete_transient_index(Tab, Pos, _Storage) ->
+ Ixt = val({Tab, {index, Pos}}),
+ ?ets_delete_table(Ixt),
+ del_index_info(Tab, Pos),
+ mnesia_lib:unset({Tab, {index, Pos}}).
+
+%%%%% misc functions for the index create/init/delete functions above
+
+%% assuming that the file exists.
+init_disc_index(_Tab, []) ->
+ done;
+init_disc_index(Tab, [Pos | Tail]) when is_integer(Pos) ->
+ Fn = tab2filename(Tab, Pos),
+ IxTag = {Tab, index, Pos},
+ file:delete(Fn),
+ Args = [{file, Fn}, {keypos, 1}, {type, bag}],
+ mnesia_monitor:open_dets(IxTag, Args),
+ Storage = disc_only_copies,
+ Key = mnesia_lib:db_first(Storage, Tab),
+ Recs = mnesia_lib:db_get(Storage, Tab, Key),
+ BinSize = size(term_to_binary(Recs)),
+ KeysPerChunk = (4000 div BinSize) + 1,
+ Init = {start, KeysPerChunk},
+ mnesia_lib:db_fixtable(Storage, Tab, true),
+ ok = dets:init_table(IxTag, create_fun(Init, Tab, Pos)),
+ mnesia_lib:db_fixtable(Storage, Tab, false),
+ mnesia_lib:set({Tab, {index, Pos}}, IxTag),
+ add_index_info(Tab, val({Tab, setorbag}), {Pos, {dets, IxTag}}),
+ init_disc_index(Tab, Tail).
+
+create_fun(Cont, Tab, Pos) ->
+ fun(read) ->
+ Data =
+ case Cont of
+ {start, KeysPerChunk} ->
+ mnesia_lib:db_init_chunk(disc_only_copies, Tab, KeysPerChunk);
+ '$end_of_table' ->
+ '$end_of_table';
+ _Else ->
+ mnesia_lib:db_chunk(disc_only_copies, Cont)
+ end,
+ case Data of
+ '$end_of_table' ->
+ end_of_input;
+ {Recs, Next} ->
+ IdxElems = [{element(Pos, Obj), element(2, Obj)} || Obj <- Recs],
+ {IdxElems, create_fun(Next, Tab, Pos)}
+ end;
+ (close) ->
+ ok
+ end.
+
+make_ram_index(_, []) ->
+ done;
+make_ram_index(Tab, [Pos | Tail]) ->
+ add_ram_index(Tab, Pos),
+ make_ram_index(Tab, Tail).
+
+add_ram_index(Tab, Pos) when is_integer(Pos) ->
+ verbose("Creating index for ~w ~n", [Tab]),
+ Index = mnesia_monitor:mktab(mnesia_index, [bag, public]),
+ Insert = fun(Rec, _Acc) ->
+ true = ?ets_insert(Index, {element(Pos, Rec), element(2, Rec)})
+ end,
+ mnesia_lib:db_fixtable(ram_copies, Tab, true),
+ true = ets:foldl(Insert, true, Tab),
+ mnesia_lib:db_fixtable(ram_copies, Tab, false),
+ mnesia_lib:set({Tab, {index, Pos}}, Index),
+ add_index_info(Tab, val({Tab, setorbag}), {Pos, {ram, Index}});
+add_ram_index(_Tab, snmp) ->
+ ok.
+
+add_index_info(Tab, Type, IxElem) ->
+ Commit = val({Tab, commit_work}),
+ case lists:keysearch(index, 1, Commit) of
+ false ->
+ Index = #index{setorbag = Type,
+ pos_list = [IxElem]},
+ %% Check later if mnesia_tm is sensative about the order
+ mnesia_lib:set({Tab, commit_work},
+ mnesia_lib:sort_commit([Index | Commit]));
+ {value, Old} ->
+ %% We could check for consistency here
+ Index = Old#index{pos_list = [IxElem | Old#index.pos_list]},
+ NewC = lists:keyreplace(index, 1, Commit, Index),
+ mnesia_lib:set({Tab, commit_work},
+ mnesia_lib:sort_commit(NewC))
+ end.
+
+del_index_info(Tab, Pos) ->
+ Commit = val({Tab, commit_work}),
+ case lists:keysearch(index, 1, Commit) of
+ false ->
+ %% Something is wrong ignore
+ skip;
+ {value, Old} ->
+ case lists:keydelete(Pos, 1, Old#index.pos_list) of
+ [] ->
+ NewC = lists:keydelete(index, 1, Commit),
+ mnesia_lib:set({Tab, commit_work},
+ mnesia_lib:sort_commit(NewC));
+ New ->
+ Index = Old#index{pos_list = New},
+ NewC = lists:keyreplace(index, 1, Commit, Index),
+ mnesia_lib:set({Tab, commit_work},
+ mnesia_lib:sort_commit(NewC))
+ end
+ end.
+
+db_put({ram, Ixt}, V) ->
+ true = ?ets_insert(Ixt, V);
+db_put({dets, Ixt}, V) ->
+ ok = dets:insert(Ixt, V).
+
+db_get({ram, Ixt}, K) ->
+ ?ets_lookup(Ixt, K);
+db_get({dets, Ixt}, K) ->
+ dets:lookup(Ixt, K).
+
+db_match_erase({ram, Ixt}, Pat) ->
+ true = ?ets_match_delete(Ixt, Pat);
+db_match_erase({dets, Ixt}, Pat) ->
+ ok = dets:match_delete(Ixt, Pat).
+
+db_match({ram, Ixt}, Pat) ->
+ ?ets_match(Ixt, Pat);
+db_match({dets, Ixt}, Pat) ->
+ dets:match(Ixt, Pat).
+
+get_index_table(Tab, Pos) ->
+ get_index_table(Tab, val({Tab, storage_type}), Pos).
+
+get_index_table(Tab, ram_copies, Pos) ->
+ {ram, val({Tab, {index, Pos}})};
+get_index_table(Tab, disc_copies, Pos) ->
+ {ram, val({Tab, {index, Pos}})};
+get_index_table(Tab, disc_only_copies, Pos) ->
+ {dets, val({Tab, {index, Pos}})};
+get_index_table(_Tab, unknown, _Pos) ->
+ unknown.
+
diff --git a/lib/mnesia/src/mnesia_kernel_sup.erl b/lib/mnesia/src/mnesia_kernel_sup.erl
new file mode 100644
index 0000000000..08f6129fc0
--- /dev/null
+++ b/lib/mnesia/src/mnesia_kernel_sup.erl
@@ -0,0 +1,65 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 1997-2009. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%
+
+%%
+-module(mnesia_kernel_sup).
+
+-behaviour(supervisor).
+
+-export([start/0, init/1, supervisor_timeout/1]).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% top supervisor callback functions
+
+start() ->
+ supervisor:start_link({local, mnesia_kernel_sup}, ?MODULE, []).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% sub supervisor callback functions
+
+init([]) ->
+ ProcLib = [mnesia_monitor, proc_lib],
+ Flags = {one_for_all, 0, timer:hours(24)}, % Trust the top supervisor
+ Workers = [worker_spec(mnesia_monitor, timer:seconds(3), [gen_server]),
+ worker_spec(mnesia_subscr, timer:seconds(3), [gen_server]),
+ worker_spec(mnesia_locker, timer:seconds(3), ProcLib),
+ worker_spec(mnesia_recover, timer:minutes(3), [gen_server]),
+ worker_spec(mnesia_tm, timer:seconds(30), ProcLib),
+ supervisor_spec(mnesia_checkpoint_sup),
+ supervisor_spec(mnesia_snmp_sup),
+ worker_spec(mnesia_controller, timer:seconds(3), [gen_server]),
+ worker_spec(mnesia_late_loader, timer:seconds(3), ProcLib)
+ ],
+ {ok, {Flags, Workers}}.
+
+worker_spec(Name, KillAfter, Modules) ->
+ KA = supervisor_timeout(KillAfter),
+ {Name, {Name, start, []}, permanent, KA, worker, [Name] ++ Modules}.
+
+supervisor_spec(Name) ->
+ {Name, {Name, start, []}, permanent, infinity, supervisor,
+ [Name, supervisor]}.
+
+-ifdef(debug_shutdown).
+supervisor_timeout(_KillAfter) -> timer:hours(24).
+-else.
+supervisor_timeout(KillAfter) -> KillAfter.
+-endif.
+
+
diff --git a/lib/mnesia/src/mnesia_late_loader.erl b/lib/mnesia/src/mnesia_late_loader.erl
new file mode 100644
index 0000000000..d09de3ca66
--- /dev/null
+++ b/lib/mnesia/src/mnesia_late_loader.erl
@@ -0,0 +1,108 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 1998-2009. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%
+
+%%
+-module(mnesia_late_loader).
+
+-export([
+ async_late_disc_load/3,
+ maybe_async_late_disc_load/3,
+ init/1,
+ start/0
+ ]).
+
+%% sys callback functions
+-export([
+ system_continue/3,
+ system_terminate/4,
+ system_code_change/4
+ ]).
+
+-define(SERVER_NAME, ?MODULE).
+
+-record(state, {supervisor}).
+
+async_late_disc_load(_, [], _) -> ok;
+async_late_disc_load(Node, Tabs, Reason) ->
+ Msg = {async_late_disc_load, Tabs, Reason},
+ catch ({?SERVER_NAME, Node} ! {self(), Msg}).
+
+maybe_async_late_disc_load(_, [], _) -> ok;
+maybe_async_late_disc_load(Node, Tabs, Reason) ->
+ Msg = {maybe_async_late_disc_load, Tabs, Reason},
+ catch ({?SERVER_NAME, Node} ! {self(), Msg}).
+
+start() ->
+ mnesia_monitor:start_proc(?SERVER_NAME, ?MODULE, init, [self()]).
+
+init(Parent) ->
+ %% Trap exit omitted intentionally
+ register(?SERVER_NAME, self()),
+ link(whereis(mnesia_controller)), %% We may not hang
+ mnesia_controller:merge_schema(),
+ unlink(whereis(mnesia_controller)),
+ mnesia_lib:set(mnesia_status, running),
+ proc_lib:init_ack(Parent, {ok, self()}),
+ loop(#state{supervisor = Parent}).
+
+loop(State) ->
+ receive
+ {_From, {async_late_disc_load, Tabs, Reason}} ->
+ mnesia_controller:schedule_late_disc_load(Tabs, Reason),
+ loop(State);
+
+ {_From, {maybe_async_late_disc_load, Tabs, Reason}} ->
+ CheckMaster =
+ fun(Tab, Good) ->
+ case mnesia_recover:get_master_nodes(Tab) of
+ [] -> [Tab|Good];
+ Masters ->
+ case lists:member(node(),Masters) of
+ true -> [Tab|Good];
+ false -> Good
+ end
+ end
+ end,
+ GoodTabs = lists:foldl(CheckMaster, [], Tabs),
+ mnesia_controller:schedule_late_disc_load(GoodTabs, Reason),
+ loop(State);
+
+ {system, From, Msg} ->
+ mnesia_lib:dbg_out("~p got {system, ~p, ~p}~n",
+ [?SERVER_NAME, From, Msg]),
+ Parent = State#state.supervisor,
+ sys:handle_system_msg(Msg, From, Parent, ?MODULE, [], State);
+
+ Msg ->
+ mnesia_lib:error("~p got unexpected message: ~p~n",
+ [?SERVER_NAME, Msg]),
+ loop(State)
+ end.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% System upgrade
+
+system_continue(_Parent, _Debug, State) ->
+ loop(State).
+
+system_terminate(Reason, _Parent, _Debug, _State) ->
+ exit(Reason).
+
+system_code_change(State, _Module, _OldVsn, _Extra) ->
+ {ok, State}.
diff --git a/lib/mnesia/src/mnesia_lib.erl b/lib/mnesia/src/mnesia_lib.erl
new file mode 100644
index 0000000000..dba808e66e
--- /dev/null
+++ b/lib/mnesia/src/mnesia_lib.erl
@@ -0,0 +1,1306 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 1996-2009. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%
+
+%%
+%% This module contains all sorts of various which doesn't fit
+%% anywhere else. Basically everything is exported.
+
+-module(mnesia_lib).
+
+-include("mnesia.hrl").
+-include_lib("kernel/include/file.hrl").
+
+-export([core_file/0]).
+
+-export([
+ active_tables/0,
+ add/2,
+ add_list/2,
+ add_lsort/2,
+ all_nodes/0,
+%% catch_val/1,
+ copy_file/2,
+ copy_holders/1,
+ coredump/0,
+ coredump/1,
+ create_counter/1,
+ cs_to_nodes/1,
+ cs_to_storage_type/2,
+ dets_to_ets/6,
+ db_chunk/2,
+ db_init_chunk/1,
+ db_init_chunk/2,
+ db_init_chunk/3,
+ db_erase/2,
+ db_erase/3,
+ db_erase_tab/1,
+ db_erase_tab/2,
+ db_first/1,
+ db_first/2,
+ db_last/1,
+ db_last/2,
+ db_fixtable/3,
+ db_get/2,
+ db_get/3,
+ db_match_erase/2,
+ db_match_erase/3,
+ db_match_object/2,
+ db_match_object/3,
+ db_next_key/2,
+ db_next_key/3,
+ db_prev_key/2,
+ db_prev_key/3,
+ db_put/2,
+ db_put/3,
+ db_select/2,
+ db_select/3,
+ db_select_init/4,
+ db_select_cont/3,
+ db_slot/2,
+ db_slot/3,
+ db_update_counter/3,
+ db_update_counter/4,
+ dbg_out/2,
+ del/2,
+ dets_sync_close/1,
+ dets_sync_open/2,
+ dets_sync_open/3,
+ dir/0,
+ dir/1,
+ dir_info/0,
+ dirty_rpc_error_tag/1,
+ dist_coredump/0,
+ disk_type/1,
+ disk_type/2,
+ elems/2,
+ ensure_loaded/1,
+ error/2,
+ error_desc/1,
+ etype/1,
+ exists/1,
+ fatal/2,
+ get_node_number/0,
+ fix_error/1,
+ important/2,
+ incr_counter/1,
+ incr_counter/2,
+ intersect/2,
+ is_running/0,
+ is_running/1,
+ is_running_remote/0,
+ is_string/1,
+ key_search_delete/3,
+ key_search_all/3,
+ last_error/0,
+ local_active_tables/0,
+ lock_table/1,
+ mkcore/1,
+ not_active_here/1,
+ other_val/2,
+ pad_name/3,
+ random_time/2,
+ read_counter/1,
+ readable_indecies/1,
+ remote_copy_holders/1,
+ report_fatal/2,
+ report_system_event/1,
+ running_nodes/0,
+ running_nodes/1,
+ schema_cs_to_storage_type/2,
+ search_delete/2,
+ set/2,
+ set_counter/2,
+ set_local_content_whereabouts/1,
+ set_remote_where_to_read/1,
+ set_remote_where_to_read/2,
+ show/1,
+ show/2,
+ sort_commit/1,
+ storage_type_at_node/2,
+ tab2dat/1,
+ tab2dmp/1,
+ tab2tmp/1,
+ tab2dcd/1,
+ tab2dcl/1,
+ to_list/1,
+ union/2,
+ uniq/1,
+ unlock_table/1,
+ unset/1,
+ %% update_counter/2,
+ val/1,
+ vcore/0,
+ vcore/1,
+ verbose/2,
+ view/0,
+ view/1,
+ view/2,
+ warning/2,
+
+ is_debug_compiled/0,
+ activate_debug_fun/5,
+ deactivate_debug_fun/3,
+ eval_debug_fun/4,
+ scratch_debug_fun/0
+ ]).
+
+
+search_delete(Obj, List) ->
+ search_delete(Obj, List, [], none).
+search_delete(Obj, [Obj|Tail], Ack, _Res) ->
+ search_delete(Obj, Tail, Ack, Obj);
+search_delete(Obj, [H|T], Ack, Res) ->
+ search_delete(Obj, T, [H|Ack], Res);
+search_delete(_, [], Ack, Res) ->
+ {Res, Ack}.
+
+key_search_delete(Key, Pos, TupleList) ->
+ key_search_delete(Key, Pos, TupleList, none, []).
+key_search_delete(Key, Pos, [H|T], _Obj, Ack) when element(Pos, H) == Key ->
+ key_search_delete(Key, Pos, T, H, Ack);
+key_search_delete(Key, Pos, [H|T], Obj, Ack) ->
+ key_search_delete(Key, Pos, T, Obj, [H|Ack]);
+key_search_delete(_, _, [], Obj, Ack) ->
+ {Obj, Ack}.
+
+key_search_all(Key, Pos, TupleList) ->
+ key_search_all(Key, Pos, TupleList, []).
+key_search_all(Key, N, [H|T], Ack) when element(N, H) == Key ->
+ key_search_all(Key, N, T, [H|Ack]);
+key_search_all(Key, N, [_|T], Ack) ->
+ key_search_all(Key, N, T, Ack);
+key_search_all(_, _, [], Ack) -> Ack.
+
+intersect(L1, L2) ->
+ L2 -- (L2 -- L1).
+
+elems(I, [H|T]) ->
+ [element(I, H) | elems(I, T)];
+elems(_, []) ->
+ [].
+
+%% sort_commit see to that checkpoint info is always first in
+%% commit_work structure the other info don't need to be sorted.
+sort_commit(List) ->
+ sort_commit2(List, []).
+
+sort_commit2([{checkpoints, ChkpL}| Rest], Acc) ->
+ [{checkpoints, ChkpL}| Rest] ++ Acc;
+sort_commit2([H | R], Acc) ->
+ sort_commit2(R, [H | Acc]);
+sort_commit2([], Acc) -> Acc.
+
+is_string([H|T]) ->
+ if
+ 0 =< H, H < 256, is_integer(H) -> is_string(T);
+ true -> false
+ end;
+is_string([]) -> true.
+
+%%%
+
+union([H|L1], L2) ->
+ case lists:member(H, L2) of
+ true -> union(L1, L2);
+ false -> [H | union(L1, L2)]
+ end;
+union([], L2) -> L2.
+
+uniq([]) ->
+ [];
+uniq(List) ->
+ [H|T] = lists:sort(List),
+ uniq1(H, T, []).
+
+uniq1(H, [H|R], Ack) ->
+ uniq1(H, R, Ack);
+uniq1(Old, [H|R], Ack) ->
+ uniq1(H, R, [Old|Ack]);
+uniq1(Old, [], Ack) ->
+ [Old| Ack].
+
+to_list(X) when is_list(X) -> X;
+to_list(X) -> atom_to_list(X).
+
+all_nodes() ->
+ Ns = mnesia:system_info(db_nodes) ++
+ mnesia:system_info(extra_db_nodes),
+ mnesia_lib:uniq(Ns).
+
+running_nodes() ->
+ running_nodes(all_nodes()).
+
+running_nodes(Ns) ->
+ {Replies, _BadNs} = rpc:multicall(Ns, ?MODULE, is_running_remote, []),
+ [N || {GoodState, N} <- Replies, GoodState == true].
+
+is_running_remote() ->
+ IsRunning = is_running(),
+ {IsRunning == yes, node()}.
+
+is_running(Node) when is_atom(Node) ->
+ case rpc:call(Node, ?MODULE, is_running, []) of
+ {badrpc, _} -> no;
+ X -> X
+ end.
+
+is_running() ->
+ case ?catch_val(mnesia_status) of
+ {'EXIT', _} -> no;
+ running -> yes;
+ starting -> starting;
+ stopping -> stopping
+ end.
+
+show(X) ->
+ show(X, []).
+show(F, A) ->
+ io:format(user, F, A).
+
+
+pad_name([Char | Chars], Len, Tail) ->
+ [Char | pad_name(Chars, Len - 1, Tail)];
+pad_name([], Len, Tail) when Len =< 0 ->
+ Tail;
+pad_name([], Len, Tail) ->
+ [$ | pad_name([], Len - 1, Tail)].
+
+%% Some utility functions .....
+active_here(Tab) ->
+ case val({Tab, where_to_read}) of
+ Node when Node == node() -> true;
+ _ -> false
+ end.
+
+not_active_here(Tab) ->
+ not active_here(Tab).
+
+exists(Fname) ->
+ case file:open(Fname, [raw,read]) of
+ {ok, F} ->file:close(F), true;
+ _ -> false
+ end.
+
+dir() -> mnesia_monitor:get_env(dir).
+
+dir(Fname) ->
+ filename:join([dir(), to_list(Fname)]).
+
+tab2dat(Tab) -> %% DETS files
+ dir(lists:concat([Tab, ".DAT"])).
+
+tab2tmp(Tab) ->
+ dir(lists:concat([Tab, ".TMP"])).
+
+tab2dmp(Tab) -> %% Dumped ets tables
+ dir(lists:concat([Tab, ".DMP"])).
+
+tab2dcd(Tab) -> %% Disc copies data
+ dir(lists:concat([Tab, ".DCD"])).
+
+tab2dcl(Tab) -> %% Disc copies log
+ dir(lists:concat([Tab, ".DCL"])).
+
+storage_type_at_node(Node, Tab) ->
+ search_key(Node, [{disc_copies, val({Tab, disc_copies})},
+ {ram_copies, val({Tab, ram_copies})},
+ {disc_only_copies, val({Tab, disc_only_copies})}]).
+
+cs_to_storage_type(Node, Cs) ->
+ search_key(Node, [{disc_copies, Cs#cstruct.disc_copies},
+ {ram_copies, Cs#cstruct.ram_copies},
+ {disc_only_copies, Cs#cstruct.disc_only_copies}]).
+
+schema_cs_to_storage_type(Node, Cs) ->
+ case cs_to_storage_type(Node, Cs) of
+ unknown when Cs#cstruct.name == schema -> ram_copies;
+ Other -> Other
+ end.
+
+
+search_key(Key, [{Val, List} | Tail]) ->
+ case lists:member(Key, List) of
+ true -> Val;
+ false -> search_key(Key, Tail)
+ end;
+search_key(_Key, []) ->
+ unknown.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% ops, we've got some global variables here :-)
+
+%% They are
+%%
+%% {Tab, setorbag}, -> set | bag
+%% {Tab, storage_type} -> disc_copies |ram_copies | unknown (**)
+%% {Tab, disc_copies} -> node list (from schema)
+%% {Tab, ram_copies}, -> node list (from schema)
+%% {Tab, arity}, -> number
+%% {Tab, attributes}, -> atom list
+%% {Tab, wild_pattern}, -> record tuple with '_'s
+%% {Tab, {index, Pos}} -> ets table
+%% {Tab, index} -> integer list
+%% {Tab, cstruct} -> cstruct structure
+%%
+
+%% The following fields are dynamic according to the
+%% the current node/table situation
+
+%% {Tab, where_to_write} -> node list
+%% {Tab, where_to_read} -> node | nowhere
+%%
+%% {schema, tables} -> tab list
+%% {schema, local_tables} -> tab list (**)
+%%
+%% {current, db_nodes} -> node list
+%%
+%% dir -> directory path (**)
+%% mnesia_status -> status | running | stopping (**)
+%% (**) == (Different on all nodes)
+%%
+
+val(Var) ->
+ case ?catch_val(Var) of
+ {'EXIT', _ReASoN_} -> mnesia_lib:other_val(Var, _ReASoN_);
+ _VaLuE_ -> _VaLuE_
+ end.
+
+set(Var, Val) ->
+ ?ets_insert(mnesia_gvar, {Var, Val}).
+
+unset(Var) ->
+ ?ets_delete(mnesia_gvar, Var).
+
+other_val(Var, Other) ->
+ case Var of
+ {_, where_to_read} -> nowhere;
+ {_, where_to_write} -> [];
+ {_, active_replicas} -> [];
+ _ ->
+ pr_other(Var, Other)
+ end.
+
+-spec(pr_other/2 :: (_,_) -> no_return()).
+
+pr_other(Var, Other) ->
+ Why =
+ case is_running() of
+ no -> {node_not_running, node()};
+ _ -> {no_exists, Var}
+ end,
+ verbose("~p (~p) val(mnesia_gvar, ~w) -> ~p ~p ~n",
+ [self(), process_info(self(), registered_name),
+ Var, Other, Why]),
+ case Other of
+ {badarg, [{ets, lookup_element, _}|_]} ->
+ exit(Why);
+ _ ->
+ erlang:error(Why)
+ end.
+
+%% Some functions for list valued variables
+add(Var, Val) ->
+ L = val(Var),
+ set(Var, [Val | lists:delete(Val, L)]).
+
+add_list(Var, List) ->
+ L = val(Var),
+ set(Var, union(L, List)).
+
+del(Var, Val) ->
+ L = val(Var),
+ set(Var, lists:delete(Val, L)).
+
+%% LSort -> [node()| Sorted] == Locker sorted
+
+add_lsort(Var, Val) when node() == Val ->
+ L = val(Var),
+ set(Var, [Val | lists:delete(Val, L)]);
+add_lsort(Var,Val) ->
+ case val(Var) of
+ [Head|Rest] when Head == node() ->
+ set(Var,[Head|lsort_add(Val,Rest)]);
+ List ->
+ set(Var,lsort_add(Val,List))
+ end.
+
+lsort_add(Val,List) ->
+ case ordsets:is_element(Val,List) of
+ true -> List;
+ false -> ordsets:add_element(Val,List)
+ end.
+
+%% This function is needed due to the fact
+%% that the application_controller enters
+%% a deadlock now and then. ac is implemented
+%% as a rather naive server.
+ensure_loaded(Appl) ->
+ case application_controller:get_loaded(Appl) of
+ {true, _} ->
+ ok;
+ false ->
+ case application:load(Appl) of
+ ok ->
+ ok;
+ {error, {already_loaded, Appl}} ->
+ ok;
+ {error, Reason} ->
+ {error, {application_load_error, Reason}}
+ end
+ end.
+
+local_active_tables() ->
+ Tabs = val({schema, local_tables}),
+ lists:zf(fun(Tab) -> active_here(Tab) end, Tabs).
+
+active_tables() ->
+ Tabs = val({schema, tables}),
+ F = fun(Tab) ->
+ case val({Tab, where_to_read}) of
+ nowhere -> false;
+ _ -> {true, Tab}
+ end
+ end,
+ lists:zf(F, Tabs).
+
+etype(X) when is_integer(X) -> integer;
+etype([]) -> nil;
+etype(X) when is_list(X) -> list;
+etype(X) when is_tuple(X) -> tuple;
+etype(X) when is_atom(X) -> atom;
+etype(_) -> othertype.
+
+remote_copy_holders(Cs) ->
+ copy_holders(Cs) -- [node()].
+
+copy_holders(Cs) when Cs#cstruct.local_content == false ->
+ cs_to_nodes(Cs);
+copy_holders(Cs) when Cs#cstruct.local_content == true ->
+ case lists:member(node(), cs_to_nodes(Cs)) of
+ true -> [node()];
+ false -> []
+ end.
+
+
+set_remote_where_to_read(Tab) ->
+ set_remote_where_to_read(Tab, []).
+
+set_remote_where_to_read(Tab, Ignore) ->
+ Active = val({Tab, active_replicas}),
+ Valid =
+ case mnesia_recover:get_master_nodes(Tab) of
+ [] -> Active;
+ Masters -> mnesia_lib:intersect(Masters, Active)
+ end,
+ Available = mnesia_lib:intersect(val({current, db_nodes}), Valid -- Ignore),
+ DiscOnlyC = val({Tab, disc_only_copies}),
+ Prefered = Available -- DiscOnlyC,
+ if
+ Prefered /= [] ->
+ set({Tab, where_to_read}, hd(Prefered));
+ Available /= [] ->
+ set({Tab, where_to_read}, hd(Available));
+ true ->
+ set({Tab, where_to_read}, nowhere)
+ end.
+
+%%% Local only
+set_local_content_whereabouts(Tab) ->
+ add({schema, local_tables}, Tab),
+ add({Tab, active_replicas}, node()),
+ set({Tab, where_to_write}, [node()]),
+ set({Tab, where_to_read}, node()).
+
+%%% counter routines
+
+create_counter(Name) ->
+ set_counter(Name, 0).
+
+set_counter(Name, Val) ->
+ ?ets_insert(mnesia_stats, {Name, Val}).
+
+incr_counter(Name) ->
+ ?ets_update_counter(mnesia_stats, Name, 1).
+
+incr_counter(Name, I) ->
+ ?ets_update_counter(mnesia_stats, Name, I).
+
+%% update_counter(Name, Val) ->
+%% ?ets_update_counter(mnesia_stats, Name, Val).
+
+read_counter(Name) ->
+ ?ets_lookup_element(mnesia_stats, Name, 2).
+
+cs_to_nodes(Cs) ->
+ Cs#cstruct.disc_only_copies ++
+ Cs#cstruct.disc_copies ++
+ Cs#cstruct.ram_copies.
+
+dist_coredump() ->
+ dist_coredump(all_nodes()).
+dist_coredump(Ns) ->
+ {Replies, _} = rpc:multicall(Ns, ?MODULE, coredump, []),
+ Replies.
+
+coredump() ->
+ coredump({crashinfo, {"user initiated~n", []}}).
+coredump(CrashInfo) ->
+ Core = mkcore(CrashInfo),
+ Out = core_file(),
+ important("Writing Mnesia core to file: ~p...~p~n", [Out, CrashInfo]),
+ file:write_file(Out, Core),
+ Out.
+
+core_file() ->
+ Integers = tuple_to_list(date()) ++ tuple_to_list(time()),
+ Fun = fun(I) when I < 10 -> ["_0", I];
+ (I) -> ["_", I]
+ end,
+ List = lists:append([Fun(I) || I <- Integers]),
+ case mnesia_monitor:get_env(core_dir) of
+ Dir when is_list(Dir) ->
+ filename:absname(lists:concat(["MnesiaCore.", node()] ++ List), Dir);
+ _ ->
+ filename:absname(lists:concat(["MnesiaCore.", node()] ++ List))
+ end.
+
+mkcore(CrashInfo) ->
+% dbg_out("Making a Mnesia core dump...~p~n", [CrashInfo]),
+ Nodes = [node() |nodes()],
+ %%TidLocks = (catch ets:tab2list(mnesia_tid_locks)),
+ HeldLocks = (catch mnesia:system_info(held_locks)),
+ Core = [
+ CrashInfo,
+ {time, {date(), time()}},
+ {self, catch process_info(self())},
+ {nodes, catch rpc:multicall(Nodes, ?MODULE, get_node_number, [])},
+ {applications, catch lists:sort(application:loaded_applications())},
+ {flags, catch init:get_arguments()},
+ {code_path, catch code:get_path()},
+ {code_loaded, catch lists:sort(code:all_loaded())},
+ {etsinfo, catch ets_info(ets:all())},
+
+ {version, catch mnesia:system_info(version)},
+ {schema, catch ets:tab2list(schema)},
+ {gvar, catch ets:tab2list(mnesia_gvar)},
+ {master_nodes, catch mnesia_recover:get_master_node_info()},
+
+ {processes, catch procs()},
+ {relatives, catch relatives()},
+ {workers, catch workers(mnesia_controller:get_workers(2000))},
+ {locking_procs, catch locking_procs(HeldLocks)},
+
+ {held_locks, HeldLocks},
+ {lock_queue, catch mnesia:system_info(lock_queue)},
+ {load_info, catch mnesia_controller:get_info(2000)},
+ {trans_info, catch mnesia_tm:get_info(2000)},
+
+ {schema_file, catch file:read_file(tab2dat(schema))},
+ {dir_info, catch dir_info()},
+ {logfile, catch {ok, read_log_files()}}
+ ],
+ term_to_binary(Core).
+
+procs() ->
+ Fun = fun(P) -> {P, (catch lists:zf(fun proc_info/1, process_info(P)))} end,
+ lists:map(Fun, processes()).
+
+proc_info({registered_name, Val}) -> {true, Val};
+proc_info({message_queue_len, Val}) -> {true, Val};
+proc_info({status, Val}) -> {true, Val};
+proc_info({current_function, Val}) -> {true, Val};
+proc_info(_) -> false.
+
+get_node_number() ->
+ {node(), self()}.
+
+read_log_files() ->
+ [{F, catch file:read_file(F)} || F <- mnesia_log:log_files()].
+
+dir_info() ->
+ {ok, Cwd} = file:get_cwd(),
+ Dir = dir(),
+ [{cwd, Cwd, file:read_file_info(Cwd)},
+ {mnesia_dir, Dir, file:read_file_info(Dir)}] ++
+ case file:list_dir(Dir) of
+ {ok, Files} ->
+ [{mnesia_file, F, catch file:read_file_info(dir(F))} || F <- Files];
+ Other ->
+ [Other]
+ end.
+
+ets_info([H|T]) ->
+ [{table, H, mk_info_tuple(ets:info(H))} | ets_info(T)];
+ets_info([]) -> [].
+
+mk_info_tuple(T) when is_list(T) ->
+ list_to_tuple(T);
+mk_info_tuple(T) -> T.
+
+relatives() ->
+ Info = fun(Name) ->
+ case whereis(Name) of
+ undefined -> false;
+ Pid -> {true, {Name, Pid, catch process_info(Pid)}}
+ end
+ end,
+ lists:zf(Info, mnesia:ms()).
+
+workers({workers, Loaders, Senders, Dumper}) ->
+ Info = fun({Pid, {send_table, Tab, _Receiver, _St}}) ->
+ case Pid of
+ undefined -> false;
+ Pid -> {true, {Pid, Tab, catch process_info(Pid)}}
+ end;
+ ({Pid, What}) when is_pid(Pid) ->
+ {true, {Pid, What, catch process_info(Pid)}};
+ ({Name, Pid}) ->
+ case Pid of
+ undefined -> false;
+ Pid -> {true, {Name, Pid, catch process_info(Pid)}}
+ end
+ end,
+ SInfo = lists:zf(Info, Senders),
+ Linfo = lists:zf(Info, Loaders),
+ [{senders, SInfo},{loader, Linfo}|lists:zf(Info, [{dumper, Dumper}])].
+
+locking_procs(LockList) when is_list(LockList) ->
+ Tids = [element(3, Lock) || Lock <- LockList],
+ UT = uniq(Tids),
+ Info = fun(Tid) ->
+ Pid = Tid#tid.pid,
+ case node(Pid) == node() of
+ true ->
+ {true, {Pid, catch process_info(Pid)}};
+ _ ->
+ false
+ end
+ end,
+ lists:zf(Info, UT).
+
+view() ->
+ Bin = mkcore({crashinfo, {"view only~n", []}}),
+ vcore(Bin).
+
+%% Displays a Mnesia file on the tty. The file may be repaired.
+view(File) ->
+ case suffix([".DAT", ".RET", ".DMP", ".TMP"], File) of
+ true ->
+ view(File, dat);
+ false ->
+ case suffix([".LOG", ".BUP", ".ETS"], File) of
+ true ->
+ view(File, log);
+ false ->
+ case lists:prefix("MnesiaCore.", File) of
+ true ->
+ view(File, core);
+ false ->
+ {error, "Unknown file name"}
+ end
+ end
+ end.
+
+view(File, dat) ->
+ dets:view(File);
+view(File, log) ->
+ mnesia_log:view(File);
+view(File, core) ->
+ vcore(File).
+
+suffix(Suffixes, File) ->
+ Fun = fun(S) -> lists:suffix(S, File) end,
+ lists:any(Fun, Suffixes).
+
+%% View a core file
+
+vcore() ->
+ Prefix = lists:concat(["MnesiaCore.", node()]),
+ Filter = fun(F) -> lists:prefix(Prefix, F) end,
+ {ok, Cwd} = file:get_cwd(),
+ case file:list_dir(Cwd) of
+ {ok, Files}->
+ CoreFiles = lists:sort(lists:zf(Filter, Files)),
+ show("Mnesia core files: ~p~n", [CoreFiles]),
+ vcore(lists:last(CoreFiles));
+ Error ->
+ Error
+ end.
+
+vcore(Bin) when is_binary(Bin) ->
+ Core = binary_to_term(Bin),
+ Fun = fun({Item, Info}) ->
+ show("***** ~p *****~n", [Item]),
+ case catch vcore_elem({Item, Info}) of
+ {'EXIT', Reason} ->
+ show("{'EXIT', ~p}~n", [Reason]);
+ _ -> ok
+ end
+ end,
+ lists:foreach(Fun, Core);
+
+vcore(File) ->
+ show("~n***** Mnesia core: ~p *****~n", [File]),
+ case file:read_file(File) of
+ {ok, Bin} ->
+ vcore(Bin);
+ _ ->
+ nocore
+ end.
+
+vcore_elem({schema_file, {ok, B}}) ->
+ Fname = "/tmp/schema.DAT",
+ file:write_file(Fname, B),
+ dets:view(Fname),
+ file:delete(Fname);
+
+vcore_elem({logfile, {ok, BinList}}) ->
+ Fun = fun({F, Info}) ->
+ show("----- logfile: ~p -----~n", [F]),
+ case Info of
+ {ok, B} ->
+ Fname = "/tmp/mnesia_vcore_elem.TMP",
+ file:write_file(Fname, B),
+ mnesia_log:view(Fname),
+ file:delete(Fname);
+ _ ->
+ show("~p~n", [Info])
+ end
+ end,
+ lists:foreach(Fun, BinList);
+
+vcore_elem({crashinfo, {Format, Args}}) ->
+ show(Format, Args);
+vcore_elem({gvar, L}) ->
+ show("~p~n", [lists:sort(L)]);
+vcore_elem({transactions, Info}) ->
+ mnesia_tm:display_info(user, Info);
+
+vcore_elem({_Item, Info}) ->
+ show("~p~n", [Info]).
+
+fix_error(X) ->
+ set(last_error, X), %% for debugabililty
+ case X of
+ {aborted, Reason} -> Reason;
+ {abort, Reason} -> Reason;
+ Y when is_atom(Y) -> Y;
+ {'EXIT', {_Reason, {Mod, _, _}}} when is_atom(Mod) ->
+ save(X),
+ case atom_to_list(Mod) of
+ [$m, $n, $e|_] -> badarg;
+ _ -> X
+ end;
+ _ -> X
+ end.
+
+last_error() ->
+ val(last_error).
+
+%% The following is a list of possible mnesia errors and what they
+%% actually mean
+
+error_desc(nested_transaction) -> "Nested transactions are not allowed";
+error_desc(badarg) -> "Bad or invalid argument, possibly bad type";
+error_desc(no_transaction) -> "Operation not allowed outside transactions";
+error_desc(combine_error) -> "Table options were ilegally combined";
+error_desc(bad_index) -> "Index already exists or was out of bounds";
+error_desc(already_exists) -> "Some schema option we try to set is already on";
+error_desc(index_exists)-> "Some ops can not be performed on tabs with index";
+error_desc(no_exists)-> "Tried to perform op on non-existing (non alive) item";
+error_desc(system_limit) -> "Some system_limit was exhausted";
+error_desc(mnesia_down) -> "A transaction involving objects at some remote "
+ "node which died while transaction was executing"
+ "*and* object(s) are no longer available elsewhere"
+ "in the network";
+error_desc(not_a_db_node) -> "A node which is non existant in "
+ "the schema was mentioned";
+error_desc(bad_type) -> "Bad type on some provided arguments";
+error_desc(node_not_running) -> "Node not running";
+error_desc(truncated_binary_file) -> "Truncated binary in file";
+error_desc(active) -> "Some delete ops require that "
+ "all active objects are removed";
+error_desc(illegal) -> "Operation not supported on object";
+error_desc({'EXIT', Reason}) ->
+ error_desc(Reason);
+error_desc({error, Reason}) ->
+ error_desc(Reason);
+error_desc({aborted, Reason}) ->
+ error_desc(Reason);
+error_desc(Reason) when is_tuple(Reason), size(Reason) > 0 ->
+ setelement(1, Reason, error_desc(element(1, Reason)));
+error_desc(Reason) ->
+ Reason.
+
+dirty_rpc_error_tag(Reason) ->
+ case Reason of
+ {'EXIT', _} -> badarg;
+ no_variable -> badarg;
+ _ -> no_exists
+ end.
+
+fatal(Format, Args) ->
+ catch set(mnesia_status, stopping),
+ Core = mkcore({crashinfo, {Format, Args}}),
+ report_fatal(Format, Args, Core),
+ timer:sleep(10000), % Enough to write the core dump to disc?
+ mnesia:lkill(),
+ exit(fatal).
+
+report_fatal(Format, Args) ->
+ report_fatal(Format, Args, nocore).
+
+report_fatal(Format, Args, Core) ->
+ report_system_event({mnesia_fatal, Format, Args, Core}),
+ catch exit(whereis(mnesia_monitor), fatal).
+
+%% We sleep longer and longer the more we try
+%% Made some testing and came up with the following constants
+random_time(Retries, _Counter0) ->
+% UpperLimit = 2000,
+% MaxIntv = trunc(UpperLimit * (1-(4/((Retries*Retries)+4)))),
+ UpperLimit = 500,
+ Dup = Retries * Retries,
+ MaxIntv = trunc(UpperLimit * (1-(50/((Dup)+50)))),
+
+ case get(random_seed) of
+ undefined ->
+ {X, Y, Z} = erlang:now(), %% time()
+ random:seed(X, Y, Z),
+ Time = Dup + random:uniform(MaxIntv),
+ %% dbg_out("---random_test rs ~w max ~w val ~w---~n", [Retries, MaxIntv, Time]),
+ Time;
+ _ ->
+ Time = Dup + random:uniform(MaxIntv),
+ %% dbg_out("---random_test rs ~w max ~w val ~w---~n", [Retries, MaxIntv, Time]),
+ Time
+ end.
+
+report_system_event(Event0) ->
+ Event = {mnesia_system_event, Event0},
+ report_system_event(catch_notify(Event), Event),
+ case ?catch_val(subscribers) of
+ {'EXIT', _} -> ignore;
+ Pids -> lists:foreach(fun(Pid) -> Pid ! Event end, Pids)
+ end,
+ ok.
+
+catch_notify(Event) ->
+ case whereis(mnesia_event) of
+ undefined ->
+ {'EXIT', {badarg, {mnesia_event, Event}}};
+ Pid ->
+ gen_event:notify(Pid, Event)
+ end.
+
+report_system_event({'EXIT', Reason}, Event) ->
+ Mod = mnesia_monitor:get_env(event_module),
+ case mnesia_sup:start_event() of
+ {ok, Pid} ->
+ link(Pid),
+ gen_event:call(mnesia_event, Mod, Event, infinity),
+ unlink(Pid),
+
+ %% We get an exit signal if server dies
+ receive
+ {'EXIT', Pid, _Reason} ->
+ {error, {node_not_running, node()}}
+ after 0 ->
+ gen_event:stop(mnesia_event),
+ ok
+ end;
+
+ Error ->
+ Msg = "Mnesia(~p): Cannot report event ~p: ~p (~p)~n",
+ error_logger:format(Msg, [node(), Event, Reason, Error])
+ end;
+report_system_event(_Res, _Event) ->
+ ignore.
+
+%% important messages are reported regardless of debug level
+important(Format, Args) ->
+ save({Format, Args}),
+ report_system_event({mnesia_info, Format, Args}).
+
+%% Warning messages are reported regardless of debug level
+warning(Format, Args) ->
+ save({Format, Args}),
+ report_system_event({mnesia_warning, Format, Args}).
+
+%% error messages are reported regardless of debug level
+error(Format, Args) ->
+ save({Format, Args}),
+ report_system_event({mnesia_error, Format, Args}).
+
+%% verbose messages are reported if debug level == debug or verbose
+verbose(Format, Args) ->
+ case mnesia_monitor:get_env(debug) of
+ none -> save({Format, Args});
+ verbose -> important(Format, Args);
+ debug -> important(Format, Args);
+ trace -> important(Format, Args)
+ end.
+
+%% debug message are display if debug level == 2
+dbg_out(Format, Args) ->
+ case mnesia_monitor:get_env(debug) of
+ none -> ignore;
+ verbose -> save({Format, Args});
+ _ -> report_system_event({mnesia_info, Format, Args})
+ end.
+
+%% Keep the last 10 debug print outs
+save(DbgInfo) ->
+ catch save2(DbgInfo).
+
+save2(DbgInfo) ->
+ Key = {'$$$_report', current_pos},
+ P =
+ case ?ets_lookup_element(mnesia_gvar, Key, 2) of
+ 30 -> -1;
+ I -> I
+ end,
+ set({'$$$_report', current_pos}, P+1),
+ set({'$$$_report', P+1}, {date(), time(), DbgInfo}).
+
+copy_file(From, To) ->
+ case file:open(From, [raw, binary, read]) of
+ {ok, F} ->
+ case file:open(To, [raw, binary, write]) of
+ {ok, T} ->
+ Res = copy_file_loop(F, T, 8000),
+ file:close(F),
+ file:close(T),
+ Res;
+ {error, Reason} ->
+ {error, Reason}
+ end;
+ {error, Reason} ->
+ {error, Reason}
+ end.
+
+copy_file_loop(F, T, ChunkSize) ->
+ case file:read(F, ChunkSize) of
+ {ok, Bin} ->
+ file:write(T, Bin),
+ copy_file_loop(F, T, ChunkSize);
+ eof ->
+ ok;
+ {error, Reason} ->
+ {error, Reason}
+ end.
+
+
+%%%%%%%%%%%%
+%% versions of all the lowlevel db funcs that determine whether we
+%% shall go to disc or ram to do the actual operation.
+
+db_get(Tab, Key) ->
+ db_get(val({Tab, storage_type}), Tab, Key).
+db_get(ram_copies, Tab, Key) -> ?ets_lookup(Tab, Key);
+db_get(disc_copies, Tab, Key) -> ?ets_lookup(Tab, Key);
+db_get(disc_only_copies, Tab, Key) -> dets:lookup(Tab, Key).
+
+db_init_chunk(Tab) ->
+ db_init_chunk(val({Tab, storage_type}), Tab, 1000).
+db_init_chunk(Tab, N) ->
+ db_init_chunk(val({Tab, storage_type}), Tab, N).
+
+db_init_chunk(disc_only_copies, Tab, N) ->
+ dets:select(Tab, [{'_', [], ['$_']}], N);
+db_init_chunk(_, Tab, N) ->
+ ets:select(Tab, [{'_', [], ['$_']}], N).
+
+db_chunk(disc_only_copies, State) ->
+ dets:select(State);
+db_chunk(_, State) ->
+ ets:select(State).
+
+db_put(Tab, Val) ->
+ db_put(val({Tab, storage_type}), Tab, Val).
+
+db_put(ram_copies, Tab, Val) -> ?ets_insert(Tab, Val), ok;
+db_put(disc_copies, Tab, Val) -> ?ets_insert(Tab, Val), ok;
+db_put(disc_only_copies, Tab, Val) -> dets:insert(Tab, Val).
+
+db_match_object(Tab, Pat) ->
+ db_match_object(val({Tab, storage_type}), Tab, Pat).
+db_match_object(Storage, Tab, Pat) ->
+ db_fixtable(Storage, Tab, true),
+ Res = catch_match_object(Storage, Tab, Pat),
+ db_fixtable(Storage, Tab, false),
+ case Res of
+ {'EXIT', Reason} -> exit(Reason);
+ _ -> Res
+ end.
+
+catch_match_object(disc_only_copies, Tab, Pat) ->
+ catch dets:match_object(Tab, Pat);
+catch_match_object(_, Tab, Pat) ->
+ catch ets:match_object(Tab, Pat).
+
+db_select(Tab, Pat) ->
+ db_select(val({Tab, storage_type}), Tab, Pat).
+
+db_select(Storage, Tab, Pat) ->
+ db_fixtable(Storage, Tab, true),
+ Res = catch_select(Storage, Tab, Pat),
+ db_fixtable(Storage, Tab, false),
+ case Res of
+ {'EXIT', Reason} -> exit(Reason);
+ _ -> Res
+ end.
+
+catch_select(disc_only_copies, Tab, Pat) ->
+ catch dets:select(Tab, Pat);
+catch_select(_, Tab, Pat) ->
+ catch ets:select(Tab, Pat).
+
+db_select_init(disc_only_copies, Tab, Pat, Limit) ->
+ dets:select(Tab, Pat, Limit);
+db_select_init(_, Tab, Pat, Limit) ->
+ ets:select(Tab, Pat, Limit).
+
+db_select_cont(disc_only_copies, Cont0, Ms) ->
+ Cont = dets:repair_continuation(Cont0, Ms),
+ dets:select(Cont);
+db_select_cont(_, Cont0, Ms) ->
+ Cont = ets:repair_continuation(Cont0, Ms),
+ ets:select(Cont).
+
+db_fixtable(ets, Tab, Bool) ->
+ ets:safe_fixtable(Tab, Bool);
+db_fixtable(ram_copies, Tab, Bool) ->
+ ets:safe_fixtable(Tab, Bool);
+db_fixtable(disc_copies, Tab, Bool) ->
+ ets:safe_fixtable(Tab, Bool);
+db_fixtable(dets, Tab, Bool) ->
+ dets:safe_fixtable(Tab, Bool);
+db_fixtable(disc_only_copies, Tab, Bool) ->
+ dets:safe_fixtable(Tab, Bool).
+
+db_erase(Tab, Key) ->
+ db_erase(val({Tab, storage_type}), Tab, Key).
+db_erase(ram_copies, Tab, Key) -> ?ets_delete(Tab, Key), ok;
+db_erase(disc_copies, Tab, Key) -> ?ets_delete(Tab, Key), ok;
+db_erase(disc_only_copies, Tab, Key) -> dets:delete(Tab, Key).
+
+db_match_erase(Tab, Pat) ->
+ db_match_erase(val({Tab, storage_type}), Tab, Pat).
+db_match_erase(ram_copies, Tab, Pat) -> ?ets_match_delete(Tab, Pat), ok;
+db_match_erase(disc_copies, Tab, Pat) -> ?ets_match_delete(Tab, Pat), ok;
+db_match_erase(disc_only_copies, Tab, Pat) -> dets:match_delete(Tab, Pat).
+
+db_first(Tab) ->
+ db_first(val({Tab, storage_type}), Tab).
+db_first(ram_copies, Tab) -> ?ets_first(Tab);
+db_first(disc_copies, Tab) -> ?ets_first(Tab);
+db_first(disc_only_copies, Tab) -> dets:first(Tab).
+
+db_next_key(Tab, Key) ->
+ db_next_key(val({Tab, storage_type}), Tab, Key).
+db_next_key(ram_copies, Tab, Key) -> ?ets_next(Tab, Key);
+db_next_key(disc_copies, Tab, Key) -> ?ets_next(Tab, Key);
+db_next_key(disc_only_copies, Tab, Key) -> dets:next(Tab, Key).
+
+db_last(Tab) ->
+ db_last(val({Tab, storage_type}), Tab).
+db_last(ram_copies, Tab) -> ?ets_last(Tab);
+db_last(disc_copies, Tab) -> ?ets_last(Tab);
+db_last(disc_only_copies, Tab) -> dets:first(Tab). %% Dets don't have order
+
+db_prev_key(Tab, Key) ->
+ db_prev_key(val({Tab, storage_type}), Tab, Key).
+db_prev_key(ram_copies, Tab, Key) -> ?ets_prev(Tab, Key);
+db_prev_key(disc_copies, Tab, Key) -> ?ets_prev(Tab, Key);
+db_prev_key(disc_only_copies, Tab, Key) -> dets:next(Tab, Key). %% Dets don't have order
+
+db_slot(Tab, Pos) ->
+ db_slot(val({Tab, storage_type}), Tab, Pos).
+db_slot(ram_copies, Tab, Pos) -> ?ets_slot(Tab, Pos);
+db_slot(disc_copies, Tab, Pos) -> ?ets_slot(Tab, Pos);
+db_slot(disc_only_copies, Tab, Pos) -> dets:slot(Tab, Pos).
+
+db_update_counter(Tab, C, Val) ->
+ db_update_counter(val({Tab, storage_type}), Tab, C, Val).
+db_update_counter(ram_copies, Tab, C, Val) ->
+ ?ets_update_counter(Tab, C, Val);
+db_update_counter(disc_copies, Tab, C, Val) ->
+ ?ets_update_counter(Tab, C, Val);
+db_update_counter(disc_only_copies, Tab, C, Val) ->
+ dets:update_counter(Tab, C, Val).
+
+db_erase_tab(Tab) ->
+ db_erase_tab(val({Tab, storage_type}), Tab).
+db_erase_tab(ram_copies, Tab) -> ?ets_delete_table(Tab);
+db_erase_tab(disc_copies, Tab) -> ?ets_delete_table(Tab);
+db_erase_tab(disc_only_copies, _Tab) -> ignore.
+
+%% assuming that Tab is a valid ets-table
+dets_to_ets(Tabname, Tab, File, Type, Rep, Lock) ->
+ {Open, Close} = mkfuns(Lock),
+ case Open(Tabname, [{file, File}, {type, disk_type(Tab, Type)},
+ {keypos, 2}, {repair, Rep}]) of
+ {ok, Tabname} ->
+ Res = dets:to_ets(Tabname, Tab),
+ Close(Tabname),
+ trav_ret(Res, Tab);
+ Other ->
+ Other
+ end.
+
+trav_ret(Tabname, Tabname) -> loaded;
+trav_ret(Other, _Tabname) -> Other.
+
+mkfuns(yes) ->
+ {fun(Tab, Args) -> dets_sync_open(Tab, Args) end,
+ fun(Tab) -> dets_sync_close(Tab) end};
+mkfuns(no) ->
+ {fun(Tab, Args) -> dets:open_file(Tab, Args) end,
+ fun(Tab) -> dets:close(Tab) end}.
+
+disk_type(Tab) ->
+ disk_type(Tab, val({Tab, setorbag})).
+
+disk_type(_Tab, ordered_set) ->
+ set;
+disk_type(_, Type) ->
+ Type.
+
+dets_sync_open(Tab, Ref, File) ->
+ Args = [{file, File},
+ {keypos, 2},
+ {repair, mnesia_monitor:get_env(auto_repair)},
+ {type, disk_type(Tab)}],
+ dets_sync_open(Ref, Args).
+
+lock_table(Tab) ->
+ global:set_lock({{mnesia_table_lock, Tab}, self()}, [node()], infinity).
+% dbg_out("dets_sync_open: ~p ~p~n", [T, self()]),
+
+unlock_table(Tab) ->
+ global:del_lock({{mnesia_table_lock, Tab}, self()}, [node()]).
+% dbg_out("unlock_table: ~p ~p~n", [T, self()]),
+
+dets_sync_open(Tab, Args) ->
+ lock_table(Tab),
+ case dets:open_file(Tab, Args) of
+ {ok, Tab} ->
+ {ok, Tab};
+ Other ->
+ dets_sync_close(Tab),
+ Other
+ end.
+
+dets_sync_close(Tab) ->
+ catch dets:close(Tab),
+ unlock_table(Tab),
+ ok.
+
+readable_indecies(Tab) ->
+ val({Tab, index}).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Managing conditional debug functions
+%%
+%% The main idea with the debug_fun's is to allow test programs
+%% to control the internal behaviour of Mnesia. This is needed
+%% to make the test programs independent of system load, swapping
+%% and other circumstances that may affect the behaviour of Mnesia.
+%%
+%% First should calls to ?eval_debug_fun be inserted at well
+%% defined places in Mnesia's code. E.g. in critical situations
+%% of startup, transaction commit, backups etc.
+%%
+%% Then compile Mnesia with the compiler option 'debug'.
+%%
+%% In test programs ?activate_debug_fun should be called
+%% in order to bind a fun to the debug identifier stated
+%% in the call to ?eval_debug_fun.
+%%
+%% If eval_debug_fun finds that the fun is activated it
+%% invokes the fun as NewContext = Fun(PreviousContext, EvalContext)
+%% and replaces the PreviousContext with the NewContext.
+%% The initial context of a debug_fun is given as argument to
+%% activate_debug_fun.
+
+-define(DEBUG_TAB, mnesia_debug).
+-record(debug_info, {id, function, context, file, line}).
+
+scratch_debug_fun() ->
+ dbg_out("scratch_debug_fun(): ~p~n", [?DEBUG_TAB]),
+ (catch ?ets_delete_table(?DEBUG_TAB)),
+ ?ets_new_table(?DEBUG_TAB, [set, public, named_table, {keypos, 2}]).
+
+activate_debug_fun(FunId, Fun, InitialContext, File, Line) ->
+ Info = #debug_info{id = FunId,
+ function = Fun,
+ context = InitialContext,
+ file = File,
+ line = Line
+ },
+ update_debug_info(Info).
+
+update_debug_info(Info) ->
+ case catch ?ets_insert(?DEBUG_TAB, Info) of
+ {'EXIT', _} ->
+ scratch_debug_fun(),
+ ?ets_insert(?DEBUG_TAB, Info);
+ _ ->
+ ok
+ end,
+ dbg_out("update_debug_info(~p)~n", [Info]),
+ ok.
+
+deactivate_debug_fun(FunId, _File, _Line) ->
+ catch ?ets_delete(?DEBUG_TAB, FunId),
+ ok.
+
+eval_debug_fun(FunId, EvalContext, EvalFile, EvalLine) ->
+ case catch ?ets_lookup(?DEBUG_TAB, FunId) of
+ [] ->
+ ok;
+ [Info] ->
+ OldContext = Info#debug_info.context,
+ dbg_out("~s(~p): ~w "
+ "activated in ~s(~p)~n "
+ "eval_debug_fun(~w, ~w)~n",
+ [filename:basename(EvalFile), EvalLine, Info#debug_info.id,
+ filename:basename(Info#debug_info.file), Info#debug_info.line,
+ OldContext, EvalContext]),
+ Fun = Info#debug_info.function,
+ NewContext = Fun(OldContext, EvalContext),
+
+ case catch ?ets_lookup(?DEBUG_TAB, FunId) of
+ [Info] when NewContext /= OldContext ->
+ NewInfo = Info#debug_info{context = NewContext},
+ update_debug_info(NewInfo);
+ _ ->
+ ok
+ end;
+ {'EXIT', _} -> ok
+ end.
+
+-ifdef(debug).
+ is_debug_compiled() -> true.
+-else.
+ is_debug_compiled() -> false.
+-endif.
+
+
diff --git a/lib/mnesia/src/mnesia_loader.erl b/lib/mnesia/src/mnesia_loader.erl
new file mode 100644
index 0000000000..77c317abc5
--- /dev/null
+++ b/lib/mnesia/src/mnesia_loader.erl
@@ -0,0 +1,828 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 1998-2009. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%
+
+%%
+%%% Purpose : Loads tables from local disc or from remote node
+
+-module(mnesia_loader).
+
+%% Mnesia internal stuff
+-export([disc_load_table/2,
+ net_load_table/4,
+ send_table/3]).
+
+-export([old_node_init_table/6]). %% Spawned old node protocol conversion hack
+-export([spawned_receiver/8]). %% Spawned lock taking process
+
+-import(mnesia_lib, [set/2, fatal/2, verbose/2, dbg_out/2]).
+
+-include("mnesia.hrl").
+
+val(Var) ->
+ case ?catch_val(Var) of
+ {'EXIT', Reason} -> mnesia_lib:other_val(Var, Reason);
+ Value -> Value
+ end.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Load a table from local disc
+
+disc_load_table(Tab, Reason) ->
+ Storage = val({Tab, storage_type}),
+ Type = val({Tab, setorbag}),
+ dbg_out("Getting table ~p (~p) from disc: ~p~n",
+ [Tab, Storage, Reason]),
+ ?eval_debug_fun({?MODULE, do_get_disc_copy},
+ [{tab, Tab},
+ {reason, Reason},
+ {storage, Storage},
+ {type, Type}]),
+ do_get_disc_copy2(Tab, Reason, Storage, Type).
+
+do_get_disc_copy2(Tab, _Reason, Storage, _Type) when Storage == unknown ->
+ verbose("Local table copy of ~p has recently been deleted, ignored.~n",
+ [Tab]),
+ {loaded, ok}; %% ?
+do_get_disc_copy2(Tab, Reason, Storage, Type) when Storage == disc_copies ->
+ %% NOW we create the actual table
+ Repair = mnesia_monitor:get_env(auto_repair),
+ Args = [{keypos, 2}, public, named_table, Type],
+ case Reason of
+ {dumper, _} -> %% Resources allready allocated
+ ignore;
+ _ ->
+ mnesia_monitor:mktab(Tab, Args),
+ Count = mnesia_log:dcd2ets(Tab, Repair),
+ case ets:info(Tab, size) of
+ X when X < Count * 4 ->
+ ok = mnesia_log:ets2dcd(Tab);
+ _ ->
+ ignore
+ end
+ end,
+ mnesia_index:init_index(Tab, Storage),
+ snmpify(Tab, Storage),
+ set({Tab, load_node}, node()),
+ set({Tab, load_reason}, Reason),
+ {loaded, ok};
+
+do_get_disc_copy2(Tab, Reason, Storage, Type) when Storage == ram_copies ->
+ Args = [{keypos, 2}, public, named_table, Type],
+ case Reason of
+ {dumper, _} -> %% Resources allready allocated
+ ignore;
+ _ ->
+ mnesia_monitor:mktab(Tab, Args),
+ Fname = mnesia_lib:tab2dcd(Tab),
+ Datname = mnesia_lib:tab2dat(Tab),
+ Repair = mnesia_monitor:get_env(auto_repair),
+ case mnesia_monitor:use_dir() of
+ true ->
+ case mnesia_lib:exists(Fname) of
+ true -> mnesia_log:dcd2ets(Tab, Repair);
+ false ->
+ case mnesia_lib:exists(Datname) of
+ true ->
+ mnesia_lib:dets_to_ets(Tab, Tab, Datname,
+ Type, Repair, no);
+ false ->
+ false
+ end
+ end;
+ false ->
+ false
+ end
+ end,
+ mnesia_index:init_index(Tab, Storage),
+ snmpify(Tab, Storage),
+ set({Tab, load_node}, node()),
+ set({Tab, load_reason}, Reason),
+ {loaded, ok};
+
+do_get_disc_copy2(Tab, Reason, Storage, Type) when Storage == disc_only_copies ->
+ Args = [{file, mnesia_lib:tab2dat(Tab)},
+ {type, mnesia_lib:disk_type(Tab, Type)},
+ {keypos, 2},
+ {repair, mnesia_monitor:get_env(auto_repair)}],
+ case Reason of
+ {dumper, _} ->
+ mnesia_index:init_index(Tab, Storage),
+ snmpify(Tab, Storage),
+ set({Tab, load_node}, node()),
+ set({Tab, load_reason}, Reason),
+ {loaded, ok};
+ _ ->
+ case mnesia_monitor:open_dets(Tab, Args) of
+ {ok, _} ->
+ mnesia_index:init_index(Tab, Storage),
+ snmpify(Tab, Storage),
+ set({Tab, load_node}, node()),
+ set({Tab, load_reason}, Reason),
+ {loaded, ok};
+ {error, Error} ->
+ {not_loaded, {"Failed to create dets table", Error}}
+ end
+ end.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Load a table from a remote node
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%
+%% Receiver Sender
+%% -------- ------
+%% Grab schema lock on table
+%% Determine table size
+%% Create empty pre-grown table
+%% Grab read lock on table
+%% Let receiver subscribe on updates done on sender node
+%% Disable rehashing of table
+%% Release read lock on table
+%% Send table to receiver in chunks
+%%
+%% Grab read lock on table
+%% Block dirty updates
+%% Update wherabouts
+%%
+%% Cancel the update subscription
+%% Process the subscription events
+%% Optionally dump to disc
+%% Unblock dirty updates
+%% Release read lock on table
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+-define(MAX_TRANSFER_SIZE, 7500).
+-define(MAX_RAM_FILE_SIZE, 1000000).
+-define(MAX_RAM_TRANSFERS, (?MAX_RAM_FILE_SIZE div ?MAX_TRANSFER_SIZE) + 1).
+-define(MAX_NOPACKETS, 20).
+
+net_load_table(Tab, Reason, Ns, Cs)
+ when Reason == {dumper,add_table_copy} ->
+ try_net_load_table(Tab, Reason, Ns, Cs);
+net_load_table(Tab, Reason, Ns, _Cs) ->
+ try_net_load_table(Tab, Reason, Ns, val({Tab, cstruct})).
+
+try_net_load_table(Tab, _Reason, [], _Cs) ->
+ verbose("Copy failed. No active replicas of ~p are available.~n", [Tab]),
+ {not_loaded, none_active};
+try_net_load_table(Tab, Reason, Ns, Cs) ->
+ Storage = mnesia_lib:cs_to_storage_type(node(), Cs),
+ do_get_network_copy(Tab, Reason, Ns, Storage, Cs).
+
+do_get_network_copy(Tab, _Reason, _Ns, unknown, _Cs) ->
+ verbose("Local table copy of ~p has recently been deleted, ignored.~n", [Tab]),
+ {not_loaded, storage_unknown};
+do_get_network_copy(Tab, Reason, Ns, Storage, Cs) ->
+ [Node | Tail] = Ns,
+ case lists:member(Node,val({current, db_nodes})) of
+ true ->
+ dbg_out("Getting table ~p (~p) from node ~p: ~p~n",
+ [Tab, Storage, Node, Reason]),
+ ?eval_debug_fun({?MODULE, do_get_network_copy},
+ [{tab, Tab}, {reason, Reason},
+ {nodes, Ns}, {storage, Storage}]),
+ case init_receiver(Node, Tab, Storage, Cs, Reason) of
+ ok ->
+ set({Tab, load_node}, Node),
+ set({Tab, load_reason}, Reason),
+ mnesia_controller:i_have_tab(Tab),
+ dbg_out("Table ~p copied from ~p to ~p~n", [Tab, Node, node()]),
+ {loaded, ok};
+ Err = {error, _} when element(1, Reason) == dumper ->
+ {not_loaded,Err};
+ restart ->
+ try_net_load_table(Tab, Reason, Tail ++ [Node], Cs);
+ down ->
+ try_net_load_table(Tab, Reason, Tail, Cs)
+ end;
+ false ->
+ try_net_load_table(Tab, Reason, Tail, Cs)
+ end.
+
+snmpify(Tab, Storage) ->
+ do_snmpify(Tab, val({Tab, snmp}), Storage).
+
+do_snmpify(_Tab, [], _Storage) ->
+ ignore;
+do_snmpify(Tab, Us, Storage) ->
+ Snmp = mnesia_snmp_hook:create_table(Us, Tab, Storage),
+ set({Tab, {index, snmp}}, Snmp).
+
+%% Start the recieiver
+init_receiver(Node, Tab, Storage, Cs, Reas={dumper,add_table_copy}) ->
+ case start_remote_sender(Node, Tab, Storage) of
+ {SenderPid, TabSize, DetsData} ->
+ start_receiver(Tab,Storage,Cs,SenderPid,TabSize,DetsData,Reas);
+ Else ->
+ Else
+ end;
+init_receiver(Node, Tab,Storage,Cs,Reason) ->
+ %% Grab a schema lock to avoid deadlock between table_loader and schema_commit dumping.
+ %% Both may grab tables-locks in different order.
+ Load =
+ fun() ->
+ {_,Tid,Ts} = get(mnesia_activity_state),
+ mnesia_locker:rlock(Tid, Ts#tidstore.store, {schema, Tab}),
+ %% Check that table still exists
+ Active = val({Tab, active_replicas}),
+ %% Check that we havn't loaded it already
+ case val({Tab,where_to_read}) == node() of
+ true -> ok;
+ _ ->
+ %% And that sender still got a copy
+ %% (something might have happend while
+ %% we where waiting for the lock)
+ true = lists:member(Node, Active),
+ {SenderPid, TabSize, DetsData} =
+ start_remote_sender(Node,Tab,Storage),
+ Init = table_init_fun(SenderPid),
+ Args = [self(),Tab,Storage,Cs,SenderPid,
+ TabSize,DetsData,Init],
+ Pid = spawn_link(?MODULE, spawned_receiver, Args),
+ put(mnesia_real_loader, Pid),
+ wait_on_load_complete(Pid)
+ end
+ end,
+ Res =
+ case mnesia:transaction(Load, 20) of
+ {atomic, {error,Result}} when
+ element(1,Reason) == dumper ->
+ {error,Result};
+ {atomic, {error,Result}} ->
+ fatal("Cannot create table ~p: ~p~n",
+ [[Tab, Storage], Result]);
+ {atomic, Result} -> Result;
+ {aborted, nomore} -> restart;
+ {aborted, _Reas} ->
+ verbose("Receiver failed on ~p from ~p:~nReason: ~p~n",
+ [Tab,Node,_Reas]),
+ down %% either this node or sender is dying
+ end,
+ unlink(whereis(mnesia_tm)), %% Avoid late unlink from tm
+ Res.
+
+start_remote_sender(Node,Tab,Storage) ->
+ mnesia_controller:start_remote_sender(Node, Tab, self(), Storage),
+ put(mnesia_table_sender_node, {Tab, Node}),
+ receive
+ {SenderPid, {first, TabSize}} ->
+ {SenderPid, TabSize, false};
+ {SenderPid, {first, TabSize, DetsData}} ->
+ {SenderPid, TabSize, DetsData};
+ %% Protocol conversion hack
+ {copier_done, Node} ->
+ verbose("Sender of table ~p crashed on node ~p ~n", [Tab, Node]),
+ down(Tab, Storage)
+ end.
+
+table_init_fun(SenderPid) ->
+ PConv = mnesia_monitor:needs_protocol_conversion(node(SenderPid)),
+ MeMyselfAndI = self(),
+ fun(read) ->
+ Receiver =
+ if
+ PConv == true ->
+ MeMyselfAndI ! {actual_tabrec, self()},
+ MeMyselfAndI; %% Old mnesia
+ PConv == false -> self()
+ end,
+ SenderPid ! {Receiver, more},
+ get_data(SenderPid, Receiver)
+ end.
+
+%% Add_table_copy get's it's own locks.
+start_receiver(Tab,Storage,Cs,SenderPid,TabSize,DetsData,{dumper,add_table_copy}) ->
+ Init = table_init_fun(SenderPid),
+ case do_init_table(Tab,Storage,Cs,SenderPid,TabSize,DetsData,self(), Init) of
+ Err = {error, _} ->
+ SenderPid ! {copier_done, node()},
+ Err;
+ Else ->
+ Else
+ end.
+
+spawned_receiver(ReplyTo,Tab,Storage,Cs, SenderPid,TabSize,DetsData, Init) ->
+ process_flag(trap_exit, true),
+ Done = do_init_table(Tab,Storage,Cs,
+ SenderPid,TabSize,DetsData,
+ ReplyTo, Init),
+ ReplyTo ! {self(),Done},
+ unlink(ReplyTo),
+ unlink(whereis(mnesia_controller)),
+ exit(normal).
+
+wait_on_load_complete(Pid) ->
+ receive
+ {Pid, Res} ->
+ Res;
+ {'EXIT', Pid, Reason} ->
+ exit(Reason);
+ Else ->
+ Pid ! Else,
+ wait_on_load_complete(Pid)
+ end.
+
+do_init_table(Tab,Storage,Cs,SenderPid,
+ TabSize,DetsInfo,OrigTabRec,Init) ->
+ case create_table(Tab, TabSize, Storage, Cs) of
+ {Storage,Tab} ->
+ %% Debug info
+ Node = node(SenderPid),
+ put(mnesia_table_receiver, {Tab, Node, SenderPid}),
+ mnesia_tm:block_tab(Tab),
+ PConv = mnesia_monitor:needs_protocol_conversion(Node),
+
+ case init_table(Tab,Storage,Init,PConv,DetsInfo,SenderPid) of
+ ok ->
+ tab_receiver(Node,Tab,Storage,Cs,PConv,OrigTabRec);
+ Reason ->
+ Msg = "[d]ets:init table failed",
+ verbose("~s: ~p: ~p~n", [Msg, Tab, Reason]),
+ down(Tab, Storage)
+ end;
+ Error ->
+ Error
+ end.
+
+create_table(Tab, TabSize, Storage, Cs) ->
+ if
+ Storage == disc_only_copies ->
+ mnesia_lib:lock_table(Tab),
+ Tmp = mnesia_lib:tab2tmp(Tab),
+ Size = lists:max([TabSize, 256]),
+ Args = [{file, Tmp},
+ {keypos, 2},
+%% {ram_file, true},
+ {estimated_no_objects, Size},
+ {repair, mnesia_monitor:get_env(auto_repair)},
+ {type, mnesia_lib:disk_type(Tab, Cs#cstruct.type)}],
+ file:delete(Tmp),
+ case mnesia_lib:dets_sync_open(Tab, Args) of
+ {ok, _} ->
+ mnesia_lib:unlock_table(Tab),
+ {Storage, Tab};
+ Else ->
+ mnesia_lib:unlock_table(Tab),
+ Else
+ end;
+ (Storage == ram_copies) or (Storage == disc_copies) ->
+ Args = [{keypos, 2}, public, named_table, Cs#cstruct.type],
+ case mnesia_monitor:unsafe_mktab(Tab, Args) of
+ Tab ->
+ {Storage, Tab};
+ Else ->
+ Else
+ end
+ end.
+
+tab_receiver(Node, Tab, Storage, Cs, PConv, OrigTabRec) ->
+ receive
+ {SenderPid, {no_more, DatBin}} when PConv == false ->
+ finish_copy(Storage,Tab,Cs,SenderPid,DatBin,OrigTabRec);
+
+ %% Protocol conversion hack
+ {SenderPid, {no_more, DatBin}} when is_pid(PConv) ->
+ PConv ! {SenderPid, no_more},
+ receive
+ {old_init_table_complete, ok} ->
+ finish_copy(Storage, Tab, Cs, SenderPid, DatBin,OrigTabRec);
+ {old_init_table_complete, Reason} ->
+ Msg = "OLD: [d]ets:init table failed",
+ verbose("~s: ~p: ~p~n", [Msg, Tab, Reason]),
+ down(Tab, Storage)
+ end;
+
+ {actual_tabrec, Pid} ->
+ tab_receiver(Node, Tab, Storage, Cs, Pid,OrigTabRec);
+
+ {SenderPid, {more, [Recs]}} when is_pid(PConv) ->
+ PConv ! {SenderPid, {more, Recs}}, %% Forward Msg to OldNodes
+ tab_receiver(Node, Tab, Storage, Cs, PConv,OrigTabRec);
+
+ {'EXIT', PConv, Reason} -> %% [d]ets:init process crashed
+ Msg = "Receiver crashed",
+ verbose("~s: ~p: ~p~n", [Msg, Tab, Reason]),
+ down(Tab, Storage);
+
+ %% Protocol conversion hack
+ {copier_done, Node} ->
+ verbose("Sender of table ~p crashed on node ~p ~n", [Tab, Node]),
+ down(Tab, Storage);
+
+ {'EXIT', Pid, Reason} ->
+ handle_exit(Pid, Reason),
+ tab_receiver(Node, Tab, Storage, Cs, PConv,OrigTabRec)
+ end.
+
+make_table_fun(Pid, TabRec) ->
+ fun(close) ->
+ ok;
+ (read) ->
+ get_data(Pid, TabRec)
+ end.
+
+get_data(Pid, TabRec) ->
+ receive
+ {Pid, {more, Recs}} ->
+ Pid ! {TabRec, more},
+ {Recs, make_table_fun(Pid,TabRec)};
+ {Pid, no_more} ->
+ end_of_input;
+ {copier_done, Node} ->
+ case node(Pid) of
+ Node ->
+ {copier_done, Node};
+ _ ->
+ get_data(Pid, TabRec)
+ end;
+ {'EXIT', Pid, Reason} ->
+ handle_exit(Pid, Reason),
+ get_data(Pid, TabRec)
+ end.
+
+init_table(Tab, disc_only_copies, Fun, false, DetsInfo,Sender) ->
+ ErtsVer = erlang:system_info(version),
+ case DetsInfo of
+ {ErtsVer, DetsData} ->
+ Res = (catch dets:is_compatible_bchunk_format(Tab, DetsData)),
+ case Res of
+ {'EXIT',{undef,[{dets,_,_}|_]}} ->
+ Sender ! {self(), {old_protocol, Tab}},
+ dets:init_table(Tab, Fun); %% Old dets version
+ {'EXIT', What} ->
+ exit(What);
+ false ->
+ Sender ! {self(), {old_protocol, Tab}},
+ dets:init_table(Tab, Fun); %% Old dets version
+ true ->
+ dets:init_table(Tab, Fun, [{format, bchunk}])
+ end;
+ Old when Old /= false ->
+ Sender ! {self(), {old_protocol, Tab}},
+ dets:init_table(Tab, Fun); %% Old dets version
+ _ ->
+ dets:init_table(Tab, Fun)
+ end;
+init_table(Tab, _, Fun, false, _DetsInfo,_) ->
+ case catch ets:init_table(Tab, Fun) of
+ true ->
+ ok;
+ {'EXIT', Else} -> Else
+ end;
+init_table(Tab, Storage, Fun, true, _DetsInfo, Sender) -> %% Old Nodes
+ spawn_link(?MODULE, old_node_init_table,
+ [Tab, Storage, Fun, self(), false, Sender]),
+ ok.
+
+old_node_init_table(Tab, Storage, Fun, TabReceiver, DetsInfo,Sender) ->
+ Res = init_table(Tab, Storage, Fun, false, DetsInfo,Sender),
+ TabReceiver ! {old_init_table_complete, Res},
+ unlink(TabReceiver),
+ ok.
+
+finish_copy(Storage,Tab,Cs,SenderPid,DatBin,OrigTabRec) ->
+ TabRef = {Storage, Tab},
+ subscr_receiver(TabRef, Cs#cstruct.record_name),
+ case handle_last(TabRef, Cs#cstruct.type, DatBin) of
+ ok ->
+ mnesia_index:init_index(Tab, Storage),
+ snmpify(Tab, Storage),
+ %% OrigTabRec must not be the spawned tab-receiver
+ %% due to old protocol.
+ SenderPid ! {OrigTabRec, no_more},
+ mnesia_tm:unblock_tab(Tab),
+ ok;
+ {error, Reason} ->
+ Msg = "Failed to handle last",
+ verbose("~s: ~p: ~p~n", [Msg, Tab, Reason]),
+ down(Tab, Storage)
+ end.
+
+subscr_receiver(TabRef = {_, Tab}, RecName) ->
+ receive
+ {mnesia_table_event, {Op, Val, _Tid}} ->
+ if
+ Tab == RecName ->
+ handle_event(TabRef, Op, Val);
+ true ->
+ handle_event(TabRef, Op, setelement(1, Val, RecName))
+ end,
+ subscr_receiver(TabRef, RecName);
+
+ {'EXIT', Pid, Reason} ->
+ handle_exit(Pid, Reason),
+ subscr_receiver(TabRef, RecName)
+ after 0 ->
+ ok
+ end.
+
+handle_event(TabRef, write, Rec) ->
+ db_put(TabRef, Rec);
+handle_event(TabRef, delete, {_Tab, Key}) ->
+ db_erase(TabRef, Key);
+handle_event(TabRef, delete_object, OldRec) ->
+ db_match_erase(TabRef, OldRec);
+handle_event(TabRef, clear_table, {_Tab, _Key}) ->
+ db_match_erase(TabRef, '_').
+
+handle_last({disc_copies, Tab}, _Type, nobin) ->
+ Ret = mnesia_log:ets2dcd(Tab),
+ Fname = mnesia_lib:tab2dat(Tab),
+ case mnesia_lib:exists(Fname) of
+ true -> %% Remove old .DAT files.
+ file:delete(Fname);
+ false ->
+ ok
+ end,
+ Ret;
+
+handle_last({disc_only_copies, Tab}, Type, nobin) ->
+ mnesia_lib:dets_sync_close(Tab),
+ Tmp = mnesia_lib:tab2tmp(Tab),
+ Dat = mnesia_lib:tab2dat(Tab),
+ case file:rename(Tmp, Dat) of
+ ok ->
+ Args = [{file, mnesia_lib:tab2dat(Tab)},
+ {type, mnesia_lib:disk_type(Tab, Type)},
+ {keypos, 2},
+ {repair, mnesia_monitor:get_env(auto_repair)}],
+ mnesia_monitor:open_dets(Tab, Args),
+ ok;
+ {error, Reason} ->
+ {error, {"Cannot swap tmp files", Tab, Reason}}
+ end;
+
+handle_last({ram_copies, _Tab}, _Type, nobin) ->
+ ok;
+handle_last({ram_copies, Tab}, _Type, DatBin) ->
+ case mnesia_monitor:use_dir() of
+ true ->
+ mnesia_lib:lock_table(Tab),
+ Tmp = mnesia_lib:tab2tmp(Tab),
+ ok = file:write_file(Tmp, DatBin),
+ ok = file:rename(Tmp, mnesia_lib:tab2dcd(Tab)),
+ mnesia_lib:unlock_table(Tab),
+ ok;
+ false ->
+ ok
+ end.
+
+down(Tab, Storage) ->
+ case Storage of
+ ram_copies ->
+ catch ?ets_delete_table(Tab);
+ disc_copies ->
+ catch ?ets_delete_table(Tab);
+ disc_only_copies ->
+ TmpFile = mnesia_lib:tab2tmp(Tab),
+ mnesia_lib:dets_sync_close(Tab),
+ file:delete(TmpFile)
+ end,
+ mnesia_checkpoint:tm_del_copy(Tab, node()),
+ mnesia_controller:sync_del_table_copy_whereabouts(Tab, node()),
+ mnesia_tm:unblock_tab(Tab),
+ flush_subcrs(),
+ down.
+
+flush_subcrs() ->
+ receive
+ {mnesia_table_event, _} ->
+ flush_subcrs();
+
+ {'EXIT', Pid, Reason} ->
+ handle_exit(Pid, Reason),
+ flush_subcrs()
+ after 0 ->
+ done
+ end.
+
+db_erase({ram_copies, Tab}, Key) ->
+ true = ?ets_delete(Tab, Key);
+db_erase({disc_copies, Tab}, Key) ->
+ true = ?ets_delete(Tab, Key);
+db_erase({disc_only_copies, Tab}, Key) ->
+ ok = dets:delete(Tab, Key).
+
+db_match_erase({ram_copies, Tab} , Pat) ->
+ true = ?ets_match_delete(Tab, Pat);
+db_match_erase({disc_copies, Tab} , Pat) ->
+ true = ?ets_match_delete(Tab, Pat);
+db_match_erase({disc_only_copies, Tab}, Pat) ->
+ ok = dets:match_delete(Tab, Pat).
+
+db_put({ram_copies, Tab}, Val) ->
+ true = ?ets_insert(Tab, Val);
+db_put({disc_copies, Tab}, Val) ->
+ true = ?ets_insert(Tab, Val);
+db_put({disc_only_copies, Tab}, Val) ->
+ ok = dets:insert(Tab, Val).
+
+%% This code executes at the remote site where the data is
+%% executes in a special copier process.
+
+calc_nokeys(Storage, Tab) ->
+ %% Calculate #keys per transfer
+ Key = mnesia_lib:db_first(Storage, Tab),
+ Recs = mnesia_lib:db_get(Storage, Tab, Key),
+ BinSize = size(term_to_binary(Recs)),
+ (?MAX_TRANSFER_SIZE div BinSize) + 1.
+
+send_table(Pid, Tab, RemoteS) ->
+ case ?catch_val({Tab, storage_type}) of
+ {'EXIT', _} ->
+ {error, {no_exists, Tab}};
+ unknown ->
+ {error, {no_exists, Tab}};
+ Storage ->
+ %% Send first
+ TabSize = mnesia:table_info(Tab, size),
+ Pconvert = mnesia_monitor:needs_protocol_conversion(node(Pid)),
+ KeysPerTransfer = calc_nokeys(Storage, Tab),
+ ChunkData = dets:info(Tab, bchunk_format),
+
+ UseDetsChunk =
+ Storage == RemoteS andalso
+ Storage == disc_only_copies andalso
+ ChunkData /= undefined andalso
+ Pconvert == false,
+ if
+ UseDetsChunk == true ->
+ DetsInfo = erlang:system_info(version),
+ Pid ! {self(), {first, TabSize, {DetsInfo, ChunkData}}};
+ true ->
+ Pid ! {self(), {first, TabSize}}
+ end,
+
+ %% Debug info
+ put(mnesia_table_sender, {Tab, node(Pid), Pid}),
+ {Init, Chunk} = reader_funcs(UseDetsChunk, Tab, Storage, KeysPerTransfer),
+
+ SendIt = fun() ->
+ prepare_copy(Pid, Tab, Storage),
+ send_more(Pid, 1, Chunk, Init(), Tab, Pconvert),
+ finish_copy(Pid, Tab, Storage, RemoteS)
+ end,
+
+ case catch SendIt() of
+ receiver_died ->
+ cleanup_tab_copier(Pid, Storage, Tab),
+ unlink(whereis(mnesia_tm)),
+ ok;
+ {_, receiver_died} ->
+ unlink(whereis(mnesia_tm)),
+ ok;
+ {atomic, no_more} ->
+ unlink(whereis(mnesia_tm)),
+ ok;
+ Reason ->
+ cleanup_tab_copier(Pid, Storage, Tab),
+ unlink(whereis(mnesia_tm)),
+ {error, Reason}
+ end
+ end.
+
+prepare_copy(Pid, Tab, Storage) ->
+ Trans =
+ fun() ->
+ mnesia:write_lock_table(Tab),
+ mnesia_subscr:subscribe(Pid, {table, Tab}),
+ update_where_to_write(Tab, node(Pid)),
+ mnesia_lib:db_fixtable(Storage, Tab, true),
+ ok
+ end,
+ case mnesia:transaction(Trans) of
+ {atomic, ok} ->
+ ok;
+ {aborted, Reason} ->
+ exit({tab_copier_prepare, Tab, Reason})
+ end.
+
+update_where_to_write(Tab, Node) ->
+ case val({Tab, access_mode}) of
+ read_only ->
+ ignore;
+ read_write ->
+ Current = val({current, db_nodes}),
+ Ns =
+ case lists:member(Node, Current) of
+ true -> Current;
+ false -> [Node | Current]
+ end,
+ update_where_to_write(Ns, Tab, Node)
+ end.
+
+update_where_to_write([], _, _) ->
+ ok;
+update_where_to_write([H|T], Tab, AddNode) ->
+ rpc:call(H, mnesia_controller, call,
+ [{update_where_to_write, [add, Tab, AddNode], self()}]),
+ update_where_to_write(T, Tab, AddNode).
+
+send_more(Pid, N, Chunk, DataState, Tab, OldNode) ->
+ receive
+ {NewPid, more} ->
+ case send_packet(N - 1, NewPid, Chunk, DataState, OldNode) of
+ New when is_integer(New) ->
+ New - 1;
+ NewData ->
+ send_more(NewPid, ?MAX_NOPACKETS, Chunk, NewData, Tab, OldNode)
+ end;
+ {_NewPid, {old_protocol, Tab}} ->
+ Storage = val({Tab, storage_type}),
+ {Init, NewChunk} =
+ reader_funcs(false, Tab, Storage, calc_nokeys(Storage, Tab)),
+ send_more(Pid, 1, NewChunk, Init(), Tab, OldNode);
+
+ {copier_done, Node} when Node == node(Pid)->
+ verbose("Receiver of table ~p crashed on ~p (more)~n", [Tab, Node]),
+ throw(receiver_died)
+ end.
+
+reader_funcs(UseDetsChunk, Tab, Storage, KeysPerTransfer) ->
+ case UseDetsChunk of
+ false ->
+ {fun() -> mnesia_lib:db_init_chunk(Storage, Tab, KeysPerTransfer) end,
+ fun(Cont) -> mnesia_lib:db_chunk(Storage, Cont) end};
+ true ->
+ {fun() -> dets_bchunk(Tab, start) end,
+ fun(Cont) -> dets_bchunk(Tab, Cont) end}
+ end.
+
+dets_bchunk(Tab, Chunk) -> %% Arrg
+ case dets:bchunk(Tab, Chunk) of
+ {Cont, Data} -> {Data, Cont};
+ Else -> Else
+ end.
+
+send_packet(N, Pid, _Chunk, '$end_of_table', OldNode) ->
+ case OldNode of
+ true -> ignore; %% Old nodes can't handle the new no_more
+ false -> Pid ! {self(), no_more}
+ end,
+ N;
+send_packet(N, Pid, Chunk, {[], Cont}, OldNode) ->
+ send_packet(N, Pid, Chunk, Chunk(Cont), OldNode);
+send_packet(N, Pid, Chunk, {Recs, Cont}, OldNode) when N < ?MAX_NOPACKETS ->
+ case OldNode of
+ true -> Pid ! {self(), {more, [Recs]}}; %% Old need's wrapping list
+ false -> Pid ! {self(), {more, Recs}}
+ end,
+ send_packet(N+1, Pid, Chunk, Chunk(Cont), OldNode);
+send_packet(_N, _Pid, _Chunk, DataState, _OldNode) ->
+ DataState.
+
+finish_copy(Pid, Tab, Storage, RemoteS) ->
+ RecNode = node(Pid),
+ DatBin = dat2bin(Tab, Storage, RemoteS),
+ Trans =
+ fun() ->
+ mnesia:read_lock_table(Tab),
+ A = val({Tab, access_mode}),
+ mnesia_controller:sync_and_block_table_whereabouts(Tab, RecNode, RemoteS, A),
+ cleanup_tab_copier(Pid, Storage, Tab),
+ mnesia_checkpoint:tm_add_copy(Tab, RecNode),
+ Pid ! {self(), {no_more, DatBin}},
+ receive
+ {Pid, no_more} -> % Dont bother about the spurious 'more' message
+ no_more;
+ {copier_done, Node} when Node == node(Pid)->
+ verbose("Tab receiver ~p crashed (more): ~p~n", [Tab, Node]),
+ receiver_died
+ end
+ end,
+ mnesia:transaction(Trans).
+
+cleanup_tab_copier(Pid, Storage, Tab) ->
+ mnesia_lib:db_fixtable(Storage, Tab, false),
+ mnesia_subscr:unsubscribe(Pid, {table, Tab}).
+
+dat2bin(Tab, ram_copies, ram_copies) ->
+ mnesia_lib:lock_table(Tab),
+ Res = file:read_file(mnesia_lib:tab2dcd(Tab)),
+ mnesia_lib:unlock_table(Tab),
+ case Res of
+ {ok, DatBin} -> DatBin;
+ _ -> nobin
+ end;
+dat2bin(_Tab, _LocalS, _RemoteS) ->
+ nobin.
+
+handle_exit(Pid, Reason) when node(Pid) == node() ->
+ exit(Reason);
+handle_exit(_Pid, _Reason) -> %% Not from our node, this will be handled by
+ ignore. %% mnesia_down soon.
diff --git a/lib/mnesia/src/mnesia_locker.erl b/lib/mnesia/src/mnesia_locker.erl
new file mode 100644
index 0000000000..cfa3f171b2
--- /dev/null
+++ b/lib/mnesia/src/mnesia_locker.erl
@@ -0,0 +1,1196 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 1996-2009. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%
+
+%%
+-module(mnesia_locker).
+
+-export([
+ get_held_locks/0,
+ get_lock_queue/0,
+ global_lock/5,
+ ixrlock/5,
+ init/1,
+ mnesia_down/2,
+ release_tid/1,
+ async_release_tid/2,
+ send_release_tid/2,
+ receive_release_tid_acc/2,
+ rlock/3,
+ rlock_table/3,
+ rwlock/3,
+ sticky_rwlock/3,
+ start/0,
+ sticky_wlock/3,
+ sticky_wlock_table/3,
+ wlock/3,
+ wlock_no_exist/4,
+ wlock_table/3
+ ]).
+
+%% sys callback functions
+-export([system_continue/3,
+ system_terminate/4,
+ system_code_change/4
+ ]).
+
+-include("mnesia.hrl").
+-import(mnesia_lib, [dbg_out/2, error/2, verbose/2]).
+
+-define(dbg(S,V), ok).
+%-define(dbg(S,V), dbg_out("~p:~p: " ++ S, [?MODULE, ?LINE] ++ V)).
+
+-define(ALL, '______WHOLETABLE_____').
+-define(STICK, '______STICK_____').
+-define(GLOBAL, '______GLOBAL_____').
+
+-record(state, {supervisor}).
+
+-record(queue, {oid, tid, op, pid, lucky}).
+
+%% mnesia_held_locks: contain {Oid, Op, Tid} entries (bag)
+-define(match_oid_held_locks(Oid), {Oid, '_', '_'}).
+%% mnesia_tid_locks: contain {Tid, Oid, Op} entries (bag)
+-define(match_oid_tid_locks(Tid), {Tid, '_', '_'}).
+%% mnesia_sticky_locks: contain {Oid, Node} entries and {Tab, Node} entries (set)
+-define(match_oid_sticky_locks(Oid),{Oid, '_'}).
+%% mnesia_lock_queue: contain {queue, Oid, Tid, Op, ReplyTo, WaitForTid} entries (bag)
+-define(match_oid_lock_queue(Oid), #queue{oid=Oid, tid='_', op = '_', pid = '_', lucky = '_'}).
+%% mnesia_lock_counter: {{write, Tab}, Number} &&
+%% {{read, Tab}, Number} entries (set)
+
+start() ->
+ mnesia_monitor:start_proc(?MODULE, ?MODULE, init, [self()]).
+
+init(Parent) ->
+ register(?MODULE, self()),
+ process_flag(trap_exit, true),
+ ?ets_new_table(mnesia_held_locks, [bag, private, named_table]),
+ ?ets_new_table(mnesia_tid_locks, [bag, private, named_table]),
+ ?ets_new_table(mnesia_sticky_locks, [set, private, named_table]),
+ ?ets_new_table(mnesia_lock_queue, [bag, private, named_table, {keypos, 2}]),
+
+ proc_lib:init_ack(Parent, {ok, self()}),
+ case ?catch_val(pid_sort_order) of
+ r9b_plain -> put(pid_sort_order, r9b_plain);
+ standard -> put(pid_sort_order, standard);
+ _ -> ignore
+ end,
+ loop(#state{supervisor = Parent}).
+
+val(Var) ->
+ case ?catch_val(Var) of
+ {'EXIT', _ReASoN_} -> mnesia_lib:other_val(Var, _ReASoN_);
+ _VaLuE_ -> _VaLuE_
+ end.
+
+reply(From, R) ->
+ From ! {?MODULE, node(), R}.
+
+l_request(Node, X, Store) ->
+ {?MODULE, Node} ! {self(), X},
+ l_req_rec(Node, Store).
+
+l_req_rec(Node, Store) ->
+ ?ets_insert(Store, {nodes, Node}),
+ receive
+ {?MODULE, Node, Reply} ->
+ Reply;
+ {mnesia_down, Node} ->
+ {not_granted, {node_not_running, Node}}
+ end.
+
+release_tid(Tid) ->
+ ?MODULE ! {release_tid, Tid}.
+
+async_release_tid(Nodes, Tid) ->
+ rpc:abcast(Nodes, ?MODULE, {release_tid, Tid}).
+
+send_release_tid(Nodes, Tid) ->
+ rpc:abcast(Nodes, ?MODULE, {self(), {sync_release_tid, Tid}}).
+
+receive_release_tid_acc([Node | Nodes], Tid) ->
+ receive
+ {?MODULE, Node, {tid_released, Tid}} ->
+ receive_release_tid_acc(Nodes, Tid);
+ {mnesia_down, Node} ->
+ receive_release_tid_acc(Nodes, Tid)
+ end;
+receive_release_tid_acc([], _Tid) ->
+ ok.
+
+loop(State) ->
+ receive
+ {From, {write, Tid, Oid}} ->
+ try_sticky_lock(Tid, write, From, Oid),
+ loop(State);
+
+ %% If Key == ?ALL it's a request to lock the entire table
+ %%
+
+ {From, {read, Tid, Oid}} ->
+ try_sticky_lock(Tid, read, From, Oid),
+ loop(State);
+
+ %% Really do a read, but get hold of a write lock
+ %% used by mnesia:wread(Oid).
+
+ {From, {read_write, Tid, Oid}} ->
+ try_sticky_lock(Tid, read_write, From, Oid),
+ loop(State);
+
+ %% Tid has somehow terminated, clear up everything
+ %% and pass locks on to queued processes.
+ %% This is the purpose of the mnesia_tid_locks table
+
+ {release_tid, Tid} ->
+ do_release_tid(Tid),
+ loop(State);
+
+ %% stick lock, first tries this to the where_to_read Node
+ {From, {test_set_sticky, Tid, {Tab, _} = Oid, Lock}} ->
+ case ?ets_lookup(mnesia_sticky_locks, Tab) of
+ [] ->
+ reply(From, not_stuck),
+ loop(State);
+ [{_,Node}] when Node == node() ->
+ %% Lock is stuck here, see now if we can just set
+ %% a regular write lock
+ try_lock(Tid, Lock, From, Oid),
+ loop(State);
+ [{_,Node}] ->
+ reply(From, {stuck_elsewhere, Node}),
+ loop(State)
+ end;
+
+ %% If test_set_sticky fails, we send this to all nodes
+ %% after aquiring a real write lock on Oid
+
+ {stick, {Tab, _}, N} ->
+ ?ets_insert(mnesia_sticky_locks, {Tab, N}),
+ loop(State);
+
+ %% The caller which sends this message, must have first
+ %% aquired a write lock on the entire table
+ {unstick, Tab} ->
+ ?ets_delete(mnesia_sticky_locks, Tab),
+ loop(State);
+
+ {From, {ix_read, Tid, Tab, IxKey, Pos}} ->
+ case ?ets_lookup(mnesia_sticky_locks, Tab) of
+ [] ->
+ set_read_lock_on_all_keys(Tid,From,Tab,IxKey,Pos),
+ loop(State);
+ [{_,N}] when N == node() ->
+ set_read_lock_on_all_keys(Tid,From,Tab,IxKey,Pos),
+ loop(State);
+ [{_,N}] ->
+ Req = {From, {ix_read, Tid, Tab, IxKey, Pos}},
+ From ! {?MODULE, node(), {switch, N, Req}},
+ loop(State)
+ end;
+
+ {From, {sync_release_tid, Tid}} ->
+ do_release_tid(Tid),
+ reply(From, {tid_released, Tid}),
+ loop(State);
+
+ {release_remote_non_pending, Node, Pending} ->
+ release_remote_non_pending(Node, Pending),
+ mnesia_monitor:mnesia_down(?MODULE, Node),
+ loop(State);
+
+ {'EXIT', Pid, _} when Pid == State#state.supervisor ->
+ do_stop();
+
+ {system, From, Msg} ->
+ verbose("~p got {system, ~p, ~p}~n", [?MODULE, From, Msg]),
+ Parent = State#state.supervisor,
+ sys:handle_system_msg(Msg, From, Parent, ?MODULE, [], State);
+
+ {get_table, From, LockTable} ->
+ From ! {LockTable, ?ets_match_object(LockTable, '_')},
+ loop(State);
+
+ Msg ->
+ error("~p got unexpected message: ~p~n", [?MODULE, Msg]),
+ loop(State)
+ end.
+
+set_lock(Tid, Oid, Op) ->
+ ?dbg("Granted ~p ~p ~p~n", [Tid,Oid,Op]),
+ ?ets_insert(mnesia_held_locks, {Oid, Op, Tid}),
+ ?ets_insert(mnesia_tid_locks, {Tid, Oid, Op}).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Acquire locks
+
+try_sticky_lock(Tid, Op, Pid, {Tab, _} = Oid) ->
+ case ?ets_lookup(mnesia_sticky_locks, Tab) of
+ [] ->
+ try_lock(Tid, Op, Pid, Oid);
+ [{_,N}] when N == node() ->
+ try_lock(Tid, Op, Pid, Oid);
+ [{_,N}] ->
+ Req = {Pid, {Op, Tid, Oid}},
+ Pid ! {?MODULE, node(), {switch, N, Req}}
+ end.
+
+try_lock(Tid, read_write, Pid, Oid) ->
+ try_lock(Tid, read_write, read, write, Pid, Oid);
+try_lock(Tid, Op, Pid, Oid) ->
+ try_lock(Tid, Op, Op, Op, Pid, Oid).
+
+try_lock(Tid, Op, SimpleOp, Lock, Pid, Oid) ->
+ case can_lock(Tid, Lock, Oid, {no, bad_luck}) of
+ yes ->
+ Reply = grant_lock(Tid, SimpleOp, Lock, Oid),
+ reply(Pid, Reply);
+ {no, Lucky} ->
+ C = #cyclic{op = SimpleOp, lock = Lock, oid = Oid, lucky = Lucky},
+ ?dbg("Rejected ~p ~p ~p ~p ~n", [Tid, Oid, Lock, Lucky]),
+ reply(Pid, {not_granted, C});
+ {queue, Lucky} ->
+ ?dbg("Queued ~p ~p ~p ~p ~n", [Tid, Oid, Lock, Lucky]),
+ %% Append to queue: Nice place for trace output
+ ?ets_insert(mnesia_lock_queue,
+ #queue{oid = Oid, tid = Tid, op = Op,
+ pid = Pid, lucky = Lucky}),
+ ?ets_insert(mnesia_tid_locks, {Tid, Oid, {queued, Op}})
+ end.
+
+grant_lock(Tid, read, Lock, Oid = {Tab, Key})
+ when Key /= ?ALL, Tab /= ?GLOBAL ->
+ case node(Tid#tid.pid) == node() of
+ true ->
+ set_lock(Tid, Oid, Lock),
+ {granted, lookup_in_client};
+ false ->
+ try
+ Val = mnesia_lib:db_get(Tab, Key), %% lookup as well
+ set_lock(Tid, Oid, Lock),
+ {granted, Val}
+ catch _:_Reason ->
+ %% Table has been deleted from this node,
+ %% restart the transaction.
+ C = #cyclic{op = read, lock = Lock, oid = Oid,
+ lucky = nowhere},
+ {not_granted, C}
+ end
+ end;
+grant_lock(Tid, {ix_read,IxKey,Pos}, Lock, Oid = {Tab, _}) ->
+ try
+ Res = ix_read_res(Tab, IxKey,Pos),
+ set_lock(Tid, Oid, Lock),
+ {granted, Res, [?ALL]}
+ catch _:_ ->
+ {not_granted, {no_exists, Tab, {index, [Pos]}}}
+ end;
+grant_lock(Tid, read, Lock, Oid) ->
+ set_lock(Tid, Oid, Lock),
+ {granted, ok};
+grant_lock(Tid, write, Lock, Oid) ->
+ set_lock(Tid, Oid, Lock),
+ granted.
+
+%% 1) Impose an ordering on all transactions favour old (low tid) transactions
+%% newer (higher tid) transactions may never wait on older ones,
+%% 2) When releasing the tids from the queue always begin with youngest (high tid)
+%% because of 1) it will avoid the deadlocks.
+%% 3) TabLocks is the problem :-) They should not starve and not deadlock
+%% handle tablocks in queue as they had locks on unlocked records.
+
+can_lock(Tid, read, {Tab, Key}, AlreadyQ) when Key /= ?ALL ->
+ %% The key is bound, no need for the other BIF
+ Oid = {Tab, Key},
+ ObjLocks = ?ets_match_object(mnesia_held_locks, {Oid, write, '_'}),
+ TabLocks = ?ets_match_object(mnesia_held_locks, {{Tab, ?ALL}, write, '_'}),
+ check_lock(Tid, Oid, ObjLocks, TabLocks, yes, AlreadyQ, read);
+
+can_lock(Tid, read, Oid, AlreadyQ) -> % Whole tab
+ Tab = element(1, Oid),
+ ObjLocks = ?ets_match_object(mnesia_held_locks, {{Tab, '_'}, write, '_'}),
+ check_lock(Tid, Oid, ObjLocks, [], yes, AlreadyQ, read);
+
+can_lock(Tid, write, {Tab, Key}, AlreadyQ) when Key /= ?ALL ->
+ Oid = {Tab, Key},
+ ObjLocks = ?ets_lookup(mnesia_held_locks, Oid),
+ TabLocks = ?ets_lookup(mnesia_held_locks, {Tab, ?ALL}),
+ check_lock(Tid, Oid, ObjLocks, TabLocks, yes, AlreadyQ, write);
+
+can_lock(Tid, write, Oid, AlreadyQ) -> % Whole tab
+ Tab = element(1, Oid),
+ ObjLocks = ?ets_match_object(mnesia_held_locks, ?match_oid_held_locks({Tab, '_'})),
+ check_lock(Tid, Oid, ObjLocks, [], yes, AlreadyQ, write).
+
+%% Check held locks for conflicting locks
+check_lock(Tid, Oid, [Lock | Locks], TabLocks, X, AlreadyQ, Type) ->
+ case element(3, Lock) of
+ Tid ->
+ check_lock(Tid, Oid, Locks, TabLocks, X, AlreadyQ, Type);
+ WaitForTid ->
+ Queue = allowed_to_be_queued(WaitForTid,Tid),
+ if Queue == true ->
+ check_lock(Tid, Oid, Locks, TabLocks, {queue, WaitForTid}, AlreadyQ, Type);
+ Tid#tid.pid == WaitForTid#tid.pid ->
+ dbg_out("Spurious lock conflict ~w ~w: ~w -> ~w~n",
+ [Oid, Lock, Tid, WaitForTid]),
+ %% Test..
+ {Tab, _Key} = Oid,
+ HaveQ = (ets:lookup(mnesia_lock_queue, Oid) /= [])
+ orelse (ets:lookup(mnesia_lock_queue,{Tab,?ALL}) /= []),
+ if
+ HaveQ ->
+ {no, WaitForTid};
+ true ->
+ check_lock(Tid,Oid,Locks,TabLocks,{queue,WaitForTid},AlreadyQ,Type)
+ end;
+ %%{no, WaitForTid}; Safe solution
+ true ->
+ {no, WaitForTid}
+ end
+ end;
+
+check_lock(_, _, [], [], X, {queue, bad_luck}, _) ->
+ X; %% The queue should be correct already no need to check it again
+
+check_lock(_, _, [], [], X = {queue, _Tid}, _AlreadyQ, _) ->
+ X;
+
+check_lock(Tid, Oid, [], [], X, AlreadyQ, Type) ->
+ {Tab, Key} = Oid,
+ if
+ Type == write ->
+ check_queue(Tid, Tab, X, AlreadyQ);
+ Key == ?ALL ->
+ %% hmm should be solvable by a clever select expr but not today...
+ check_queue(Tid, Tab, X, AlreadyQ);
+ true ->
+ %% If there is a queue on that object, read_lock shouldn't be granted
+ ObjLocks = ets:lookup(mnesia_lock_queue, Oid),
+ case max(ObjLocks) of
+ empty ->
+ check_queue(Tid, Tab, X, AlreadyQ);
+ ObjL ->
+ case allowed_to_be_queued(ObjL,Tid) of
+ false ->
+ %% Starvation Preemption (write waits for read)
+ {no, ObjL};
+ true ->
+ check_queue(Tid, Tab, {queue, ObjL}, AlreadyQ)
+ end
+ end
+ end;
+
+check_lock(Tid, Oid, [], TabLocks, X, AlreadyQ, Type) ->
+ check_lock(Tid, Oid, TabLocks, [], X, AlreadyQ, Type).
+
+%% True if WaitForTid > Tid -> % Important order
+allowed_to_be_queued(WaitForTid, Tid) ->
+ case get(pid_sort_order) of
+ undefined -> WaitForTid > Tid;
+ r9b_plain ->
+ cmp_tid(true, WaitForTid, Tid) =:= 1;
+ standard ->
+ cmp_tid(false, WaitForTid, Tid) =:= 1
+ end.
+
+%% Check queue for conflicting locks
+%% Assume that all queued locks belongs to other tid's
+
+check_queue(Tid, Tab, X, AlreadyQ) ->
+ TabLocks = ets:lookup(mnesia_lock_queue, {Tab,?ALL}),
+ Greatest = max(TabLocks),
+ case Greatest of
+ empty -> X;
+ Tid -> X;
+ WaitForTid ->
+ case allowed_to_be_queued(WaitForTid,Tid) of
+ true ->
+ {queue, WaitForTid};
+ false when AlreadyQ =:= {no, bad_luck} ->
+ {no, WaitForTid}
+ end
+ end.
+
+sort_queue(QL) ->
+ case get(pid_sort_order) of
+ undefined ->
+ lists:reverse(lists:keysort(#queue.tid, QL));
+ r9b_plain ->
+ lists:sort(fun(#queue{tid=X},#queue{tid=Y}) ->
+ cmp_tid(true, X, Y) == 1
+ end, QL);
+ standard ->
+ lists:sort(fun(#queue{tid=X},#queue{tid=Y}) ->
+ cmp_tid(false, X, Y) == 1
+ end, QL)
+ end.
+
+max([]) -> empty;
+max([#queue{tid=Max}]) -> Max;
+max(L) ->
+ [#queue{tid=Max}|_] = sort_queue(L),
+ Max.
+
+set_read_lock_on_all_keys(Tid, From, Tab, IxKey, Pos) ->
+ Oid = {Tab,?ALL},
+ Op = {ix_read,IxKey, Pos},
+ Lock = read,
+ case can_lock(Tid, Lock, Oid, {no, bad_luck}) of
+ yes ->
+ Reply = grant_lock(Tid, Op, Lock, Oid),
+ reply(From, Reply);
+ {no, Lucky} ->
+ C = #cyclic{op = Op, lock = Lock, oid = Oid, lucky = Lucky},
+ ?dbg("Rejected ~p ~p ~p ~p ~n", [Tid, Oid, Lock, Lucky]),
+ reply(From, {not_granted, C});
+ {queue, Lucky} ->
+ ?dbg("Queued ~p ~p ~p ~p ~n", [Tid, Oid, Lock, Lucky]),
+ %% Append to queue: Nice place for trace output
+ ?ets_insert(mnesia_lock_queue,
+ #queue{oid = Oid, tid = Tid, op = Op,
+ pid = From, lucky = Lucky}),
+ ?ets_insert(mnesia_tid_locks, {Tid, Oid, {queued, Op}})
+ end.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Release of locks
+
+%% Release remote non-pending nodes
+release_remote_non_pending(Node, Pending) ->
+ %% Clear the mnesia_sticky_locks table first, to avoid
+ %% unnecessary requests to the failing node
+ ?ets_match_delete(mnesia_sticky_locks, {'_' , Node}),
+
+ %% Then we have to release all locks held by processes
+ %% running at the failed node and also simply remove all
+ %% queue'd requests back to the failed node
+
+ AllTids = ?ets_match(mnesia_tid_locks, {'$1', '_', '_'}),
+ Tids = [T || [T] <- AllTids, Node == node(T#tid.pid), not lists:member(T, Pending)],
+ do_release_tids(Tids).
+
+do_release_tids([Tid | Tids]) ->
+ do_release_tid(Tid),
+ do_release_tids(Tids);
+do_release_tids([]) ->
+ ok.
+
+do_release_tid(Tid) ->
+ Locks = ?ets_lookup(mnesia_tid_locks, Tid),
+ ?dbg("Release ~p ~p ~n", [Tid, Locks]),
+ ?ets_delete(mnesia_tid_locks, Tid),
+ release_locks(Locks),
+ %% Removed queued locks which has had locks
+ UniqueLocks = keyunique(lists:sort(Locks),[]),
+ rearrange_queue(UniqueLocks).
+
+keyunique([{_Tid, Oid, _Op}|R], Acc = [{_, Oid, _}|_]) ->
+ keyunique(R, Acc);
+keyunique([H|R], Acc) ->
+ keyunique(R, [H|Acc]);
+keyunique([], Acc) ->
+ Acc.
+
+release_locks([Lock | Locks]) ->
+ release_lock(Lock),
+ release_locks(Locks);
+release_locks([]) ->
+ ok.
+
+release_lock({Tid, Oid, {queued, _}}) ->
+ ?ets_match_delete(mnesia_lock_queue, #queue{oid=Oid, tid = Tid, op = '_',
+ pid = '_', lucky = '_'});
+release_lock({Tid, Oid, Op}) ->
+ if
+ Op == write ->
+ ?ets_delete(mnesia_held_locks, Oid);
+ Op == read ->
+ ets:delete_object(mnesia_held_locks, {Oid, Op, Tid})
+ end.
+
+rearrange_queue([{_Tid, {Tab, Key}, _} | Locks]) ->
+ if
+ Key /= ?ALL->
+ Queue =
+ ets:lookup(mnesia_lock_queue, {Tab, ?ALL}) ++
+ ets:lookup(mnesia_lock_queue, {Tab, Key}),
+ case Queue of
+ [] ->
+ ok;
+ _ ->
+ Sorted = sort_queue(Queue),
+ try_waiters_obj(Sorted)
+ end;
+ true ->
+ Pat = ?match_oid_lock_queue({Tab, '_'}),
+ Queue = ?ets_match_object(mnesia_lock_queue, Pat),
+ Sorted = sort_queue(Queue),
+ try_waiters_tab(Sorted)
+ end,
+ ?dbg("RearrQ ~p~n", [Queue]),
+ rearrange_queue(Locks);
+rearrange_queue([]) ->
+ ok.
+
+try_waiters_obj([W | Waiters]) ->
+ case try_waiter(W) of
+ queued ->
+ no;
+ _ ->
+ try_waiters_obj(Waiters)
+ end;
+try_waiters_obj([]) ->
+ ok.
+
+try_waiters_tab([W | Waiters]) ->
+ case W#queue.oid of
+ {_Tab, ?ALL} ->
+ case try_waiter(W) of
+ queued ->
+ no;
+ _ ->
+ try_waiters_tab(Waiters)
+ end;
+ Oid ->
+ case try_waiter(W) of
+ queued ->
+ Rest = key_delete_all(Oid, #queue.oid, Waiters),
+ try_waiters_tab(Rest);
+ _ ->
+ try_waiters_tab(Waiters)
+ end
+ end;
+try_waiters_tab([]) ->
+ ok.
+
+try_waiter({queue, Oid, Tid, read_write, ReplyTo, _}) ->
+ try_waiter(Oid, read_write, read, write, ReplyTo, Tid);
+try_waiter({queue, Oid, Tid, IXR = {ix_read,_,_}, ReplyTo, _}) ->
+ try_waiter(Oid, IXR, IXR, read, ReplyTo, Tid);
+try_waiter({queue, Oid, Tid, Op, ReplyTo, _}) ->
+ try_waiter(Oid, Op, Op, Op, ReplyTo, Tid).
+
+try_waiter(Oid, Op, SimpleOp, Lock, ReplyTo, Tid) ->
+ case can_lock(Tid, Lock, Oid, {queue, bad_luck}) of
+ yes ->
+ %% Delete from queue: Nice place for trace output
+ ?ets_match_delete(mnesia_lock_queue,
+ #queue{oid=Oid, tid = Tid, op = Op,
+ pid = ReplyTo, lucky = '_'}),
+ Reply = grant_lock(Tid, SimpleOp, Lock, Oid),
+ reply(ReplyTo,Reply),
+ locked;
+ {queue, _Why} ->
+ ?dbg("Keep ~p ~p ~p ~p~n", [Tid, Oid, Lock, _Why]),
+ queued; % Keep waiter in queue
+ {no, Lucky} ->
+ C = #cyclic{op = SimpleOp, lock = Lock, oid = Oid, lucky = Lucky},
+ verbose("** WARNING ** Restarted transaction, possible deadlock in lock queue ~w: cyclic = ~w~n",
+ [Tid, C]),
+ ?ets_match_delete(mnesia_lock_queue,
+ #queue{oid=Oid, tid = Tid, op = Op,
+ pid = ReplyTo, lucky = '_'}),
+ Reply = {not_granted, C},
+ reply(ReplyTo,Reply),
+ removed
+ end.
+
+key_delete_all(Key, Pos, TupleList) ->
+ key_delete_all(Key, Pos, TupleList, []).
+key_delete_all(Key, Pos, [H|T], Ack) when element(Pos, H) == Key ->
+ key_delete_all(Key, Pos, T, Ack);
+key_delete_all(Key, Pos, [H|T], Ack) ->
+ key_delete_all(Key, Pos, T, [H|Ack]);
+key_delete_all(_, _, [], Ack) ->
+ lists:reverse(Ack).
+
+ix_read_res(Tab,IxKey,Pos) ->
+ Index = mnesia_index:get_index_table(Tab, Pos),
+ Rks = mnesia_lib:elems(2,mnesia_index:db_get(Index, IxKey)),
+ lists:append(lists:map(fun(Real) -> mnesia_lib:db_get(Tab, Real) end, Rks)).
+
+%% ********************* end server code ********************
+%% The following code executes at the client side of a transactions
+
+mnesia_down(N, Pending) ->
+ case whereis(?MODULE) of
+ undefined ->
+ %% Takes care of mnesia_down's in early startup
+ mnesia_monitor:mnesia_down(?MODULE, N);
+ Pid ->
+ %% Syncronously call needed in order to avoid
+ %% race with mnesia_tm's coordinator processes
+ %% that may restart and acquire new locks.
+ %% mnesia_monitor ensures the sync.
+ Pid ! {release_remote_non_pending, N, Pending}
+ end.
+
+%% Aquire a write lock, but do a read, used by
+%% mnesia:wread/1
+
+rwlock(Tid, Store, Oid) ->
+ {Tab, Key} = Oid,
+ case val({Tab, where_to_read}) of
+ nowhere ->
+ mnesia:abort({no_exists, Tab});
+ Node ->
+ Lock = write,
+ case need_lock(Store, Tab, Key, Lock) of
+ yes ->
+ Ns = w_nodes(Tab),
+ Res = get_rwlocks_on_nodes(Ns, rwlock, Node, Store, Tid, Oid),
+ ?ets_insert(Store, {{locks, Tab, Key}, Lock}),
+ Res;
+ no ->
+ if
+ Key == ?ALL ->
+ w_nodes(Tab);
+ Tab == ?GLOBAL ->
+ w_nodes(Tab);
+ true ->
+ dirty_rpc(Node, Tab, Key, Lock)
+ end
+ end
+ end.
+
+%% Return a list of nodes or abort transaction
+%% WE also insert any additional where_to_write nodes
+%% in the local store under the key == nodes
+
+w_nodes(Tab) ->
+ Nodes = ?catch_val({Tab, where_to_write}),
+ case Nodes of
+ [_ | _] -> Nodes;
+ _ -> mnesia:abort({no_exists, Tab})
+ end.
+
+%% aquire a sticky wlock, a sticky lock is a lock
+%% which remains at this node after the termination of the
+%% transaction.
+
+sticky_wlock(Tid, Store, Oid) ->
+ sticky_lock(Tid, Store, Oid, write).
+
+sticky_rwlock(Tid, Store, Oid) ->
+ sticky_lock(Tid, Store, Oid, read_write).
+
+sticky_lock(Tid, Store, {Tab, Key} = Oid, Lock) ->
+ N = val({Tab, where_to_read}),
+ if
+ node() == N ->
+ case need_lock(Store, Tab, Key, write) of
+ yes ->
+ do_sticky_lock(Tid, Store, Oid, Lock);
+ no ->
+ dirty_sticky_lock(Tab, Key, [N], Lock)
+ end;
+ true ->
+ mnesia:abort({not_local, Tab})
+ end.
+
+do_sticky_lock(Tid, Store, {Tab, Key} = Oid, Lock) ->
+ ?MODULE ! {self(), {test_set_sticky, Tid, Oid, Lock}},
+ N = node(),
+ receive
+ {?MODULE, N, granted} ->
+ ?ets_insert(Store, {{locks, Tab, Key}, write}),
+ [?ets_insert(Store, {nodes, Node}) || Node <- w_nodes(Tab)],
+ granted;
+ {?MODULE, N, {granted, Val}} -> %% for rwlocks
+ case opt_lookup_in_client(Val, Oid, write) of
+ C = #cyclic{} ->
+ exit({aborted, C});
+ Val2 ->
+ ?ets_insert(Store, {{locks, Tab, Key}, write}),
+ [?ets_insert(Store, {nodes, Node}) || Node <- w_nodes(Tab)],
+ Val2
+ end;
+ {?MODULE, N, {not_granted, Reason}} ->
+ exit({aborted, Reason});
+ {?MODULE, N, not_stuck} ->
+ not_stuck(Tid, Store, Tab, Key, Oid, Lock, N),
+ dirty_sticky_lock(Tab, Key, [N], Lock);
+ {mnesia_down, Node} ->
+ EMsg = {aborted, {node_not_running, Node}},
+ flush_remaining([N], Node, EMsg);
+ {?MODULE, N, {stuck_elsewhere, _N2}} ->
+ stuck_elsewhere(Tid, Store, Tab, Key, Oid, Lock),
+ dirty_sticky_lock(Tab, Key, [N], Lock)
+ end.
+
+not_stuck(Tid, Store, Tab, _Key, Oid, _Lock, N) ->
+ rlock(Tid, Store, {Tab, ?ALL}), %% needed?
+ wlock(Tid, Store, Oid), %% perfect sync
+ wlock(Tid, Store, {Tab, ?STICK}), %% max one sticker/table
+ Ns = val({Tab, where_to_write}),
+ rpc:abcast(Ns, ?MODULE, {stick, Oid, N}).
+
+stuck_elsewhere(Tid, Store, Tab, _Key, Oid, _Lock) ->
+ rlock(Tid, Store, {Tab, ?ALL}), %% needed?
+ wlock(Tid, Store, Oid), %% perfect sync
+ wlock(Tid, Store, {Tab, ?STICK}), %% max one sticker/table
+ Ns = val({Tab, where_to_write}),
+ rpc:abcast(Ns, ?MODULE, {unstick, Tab}).
+
+dirty_sticky_lock(Tab, Key, Nodes, Lock) ->
+ if
+ Lock == read_write ->
+ mnesia_lib:db_get(Tab, Key);
+ Key == ?ALL ->
+ Nodes;
+ Tab == ?GLOBAL ->
+ Nodes;
+ true ->
+ ok
+ end.
+
+sticky_wlock_table(Tid, Store, Tab) ->
+ sticky_lock(Tid, Store, {Tab, ?ALL}, write).
+
+%% aquire a wlock on Oid
+%% We store a {Tabname, write, Tid} in all locktables
+%% on all nodes containing a copy of Tabname
+%% We also store an item {{locks, Tab, Key}, write} in the
+%% local store when we have aquired the lock.
+%%
+wlock(Tid, Store, Oid) ->
+ {Tab, Key} = Oid,
+ case need_lock(Store, Tab, Key, write) of
+ yes ->
+ Ns = w_nodes(Tab),
+ Op = {self(), {write, Tid, Oid}},
+ ?ets_insert(Store, {{locks, Tab, Key}, write}),
+ get_wlocks_on_nodes(Ns, Ns, Store, Op, Oid);
+ no when Key /= ?ALL, Tab /= ?GLOBAL ->
+ [];
+ no ->
+ w_nodes(Tab)
+ end.
+
+wlock_table(Tid, Store, Tab) ->
+ wlock(Tid, Store, {Tab, ?ALL}).
+
+%% Write lock even if the table does not exist
+
+wlock_no_exist(Tid, Store, Tab, Ns) ->
+ Oid = {Tab, ?ALL},
+ Op = {self(), {write, Tid, Oid}},
+ get_wlocks_on_nodes(Ns, Ns, Store, Op, Oid).
+
+need_lock(Store, Tab, Key, LockPattern) ->
+ TabL = ?ets_match_object(Store, {{locks, Tab, ?ALL}, LockPattern}),
+ if
+ TabL == [] ->
+ KeyL = ?ets_match_object(Store, {{locks, Tab, Key}, LockPattern}),
+ if
+ KeyL == [] ->
+ yes;
+ true ->
+ no
+ end;
+ true ->
+ no
+ end.
+
+add_debug(Nodes) -> % Use process dictionary for debug info
+ put(mnesia_wlock_nodes, Nodes).
+
+del_debug() ->
+ erase(mnesia_wlock_nodes).
+
+%% We first send lock request to the local node if it is part of the lockers
+%% then the first sorted node then to the rest of the lockmanagers on all
+%% nodes holding a copy of the table
+
+get_wlocks_on_nodes([Node | Tail], Orig, Store, Request, Oid) ->
+ {?MODULE, Node} ! Request,
+ ?ets_insert(Store, {nodes, Node}),
+ receive_wlocks([Node], undefined, Store, Oid),
+ case node() of
+ Node -> %% Local done try one more
+ get_wlocks_on_nodes(Tail, Orig, Store, Request, Oid);
+ _ -> %% The first succeded cont with the rest
+ get_wlocks_on_nodes(Tail, Store, Request),
+ receive_wlocks(Tail, Orig, Store, Oid)
+ end;
+get_wlocks_on_nodes([], Orig, _Store, _Request, _Oid) ->
+ Orig.
+
+get_wlocks_on_nodes([Node | Tail], Store, Request) ->
+ {?MODULE, Node} ! Request,
+ ?ets_insert(Store,{nodes, Node}),
+ get_wlocks_on_nodes(Tail, Store, Request);
+get_wlocks_on_nodes([], _, _) ->
+ ok.
+
+get_rwlocks_on_nodes([ReadNode|Tail], _Res, ReadNode, Store, Tid, Oid) ->
+ Op = {self(), {read_write, Tid, Oid}},
+ {?MODULE, ReadNode} ! Op,
+ ?ets_insert(Store, {nodes, ReadNode}),
+ Res = receive_wlocks([ReadNode], undefined, Store, Oid),
+ case node() of
+ ReadNode ->
+ get_rwlocks_on_nodes(Tail, Res, ReadNode, Store, Tid, Oid);
+ _ ->
+ get_wlocks_on_nodes(Tail, Store, {self(), {write, Tid, Oid}}),
+ receive_wlocks(Tail, Res, Store, Oid)
+ end;
+get_rwlocks_on_nodes([Node | Tail], Res, ReadNode, Store, Tid, Oid) ->
+ Op = {self(), {write, Tid, Oid}},
+ {?MODULE, Node} ! Op,
+ ?ets_insert(Store, {nodes, Node}),
+ receive_wlocks([Node], undefined, Store, Oid),
+ if node() == Node ->
+ get_rwlocks_on_nodes(Tail, Res, ReadNode, Store, Tid, Oid);
+ Res == rwlock -> %% Hmm
+ Rest = lists:delete(ReadNode, Tail),
+ Op2 = {self(), {read_write, Tid, Oid}},
+ {?MODULE, ReadNode} ! Op2,
+ ?ets_insert(Store, {nodes, ReadNode}),
+ get_wlocks_on_nodes(Rest, Store, {self(), {write, Tid, Oid}}),
+ receive_wlocks([ReadNode|Rest], undefined, Store, Oid);
+ true ->
+ get_wlocks_on_nodes(Tail, Store, {self(), {write, Tid, Oid}}),
+ receive_wlocks(Tail, Res, Store, Oid)
+ end;
+get_rwlocks_on_nodes([],Res,_,_,_,_) ->
+ Res.
+
+receive_wlocks([], Res, _Store, _Oid) ->
+ del_debug(),
+ Res;
+receive_wlocks(Nodes = [This|Ns], Res, Store, Oid) ->
+ add_debug(Nodes),
+ receive
+ {?MODULE, Node, granted} ->
+ receive_wlocks(lists:delete(Node,Nodes), Res, Store, Oid);
+ {?MODULE, Node, {granted, Val}} -> %% for rwlocks
+ case opt_lookup_in_client(Val, Oid, write) of
+ C = #cyclic{} ->
+ flush_remaining(Nodes, Node, {aborted, C});
+ Val2 ->
+ receive_wlocks(lists:delete(Node,Nodes), Val2, Store, Oid)
+ end;
+ {?MODULE, Node, {not_granted, Reason}} ->
+ Reason1 = {aborted, Reason},
+ flush_remaining(Nodes,Node,Reason1);
+ {?MODULE, Node, {switch, Sticky, _Req}} -> %% for rwlocks
+ Tail = lists:delete(Node,Nodes),
+ Nonstuck = lists:delete(Sticky,Tail),
+ [?ets_insert(Store, {nodes, NSNode}) || NSNode <- Nonstuck],
+ case lists:member(Sticky,Tail) of
+ true ->
+ sticky_flush(Nonstuck,Store),
+ receive_wlocks([Sticky], Res, Store, Oid);
+ false ->
+ sticky_flush(Nonstuck,Store),
+ Res
+ end;
+ {mnesia_down, This} -> % Only look for down from Nodes in list
+ Reason1 = {aborted, {node_not_running, This}},
+ flush_remaining(Ns, This, Reason1)
+ end.
+
+sticky_flush([], _) ->
+ del_debug(),
+ ok;
+sticky_flush(Ns=[Node | Tail], Store) ->
+ add_debug(Ns),
+ receive
+ {?MODULE, Node, _} ->
+ sticky_flush(Tail, Store);
+ {mnesia_down, Node} ->
+ Reason1 = {aborted, {node_not_running, Node}},
+ flush_remaining(Tail, Node, Reason1)
+ end.
+
+flush_remaining([], _SkipNode, Res) ->
+ del_debug(),
+ exit(Res);
+flush_remaining([SkipNode | Tail ], SkipNode, Res) ->
+ flush_remaining(Tail, SkipNode, Res);
+flush_remaining(Ns=[Node | Tail], SkipNode, Res) ->
+ add_debug(Ns),
+ receive
+ {?MODULE, Node, _} ->
+ flush_remaining(Tail, SkipNode, Res);
+ {mnesia_down, Node} ->
+ flush_remaining(Tail, SkipNode, {aborted, {node_not_running, Node}})
+ end.
+
+opt_lookup_in_client(lookup_in_client, Oid, Lock) ->
+ {Tab, Key} = Oid,
+ case catch mnesia_lib:db_get(Tab, Key) of
+ {'EXIT', _} ->
+ %% Table has been deleted from this node,
+ %% restart the transaction.
+ #cyclic{op = read, lock = Lock, oid = Oid, lucky = nowhere};
+ Val ->
+ Val
+ end;
+opt_lookup_in_client(Val, _Oid, _Lock) ->
+ Val.
+
+return_granted_or_nodes({_, ?ALL} , Nodes) -> Nodes;
+return_granted_or_nodes({?GLOBAL, _}, Nodes) -> Nodes;
+return_granted_or_nodes(_ , _Nodes) -> granted.
+
+%% We store a {Tab, read, From} item in the
+%% locks table on the node where we actually do pick up the object
+%% and we also store an item {lock, Oid, read} in our local store
+%% so that we can release any locks we hold when we commit.
+%% This function not only aquires a read lock, but also reads the object
+
+%% Oid's are always {Tab, Key} tuples
+rlock(Tid, Store, Oid) ->
+ {Tab, Key} = Oid,
+ case val({Tab, where_to_read}) of
+ nowhere ->
+ mnesia:abort({no_exists, Tab});
+ Node ->
+ case need_lock(Store, Tab, Key, '_') of
+ yes ->
+ R = l_request(Node, {read, Tid, Oid}, Store),
+ rlock_get_reply(Node, Store, Oid, R);
+ no ->
+ if
+ Key == ?ALL ->
+ [Node];
+ Tab == ?GLOBAL ->
+ [Node];
+ true ->
+ dirty_rpc(Node, Tab, Key, read)
+ end
+ end
+ end.
+
+dirty_rpc(nowhere, Tab, Key, _Lock) ->
+ mnesia:abort({no_exists, {Tab, Key}});
+dirty_rpc(Node, _Tab, ?ALL, _Lock) ->
+ [Node];
+dirty_rpc(Node, ?GLOBAL, _Key, _Lock) ->
+ [Node];
+dirty_rpc(Node, Tab, Key, Lock) ->
+ Args = [Tab, Key],
+ case rpc:call(Node, mnesia_lib, db_get, Args) of
+ {badrpc, Reason} ->
+ case val({Tab, where_to_read}) of
+ Node ->
+ ErrorTag = mnesia_lib:dirty_rpc_error_tag(Reason),
+ mnesia:abort({ErrorTag, Args});
+ _NewNode ->
+ %% Table has been deleted from the node,
+ %% restart the transaction.
+ C = #cyclic{op = read, lock = Lock, oid = {Tab, Key}, lucky = nowhere},
+ exit({aborted, C})
+ end;
+ Other ->
+ Other
+ end.
+
+rlock_get_reply(Node, Store, Oid, {granted, V}) ->
+ {Tab, Key} = Oid,
+ ?ets_insert(Store, {{locks, Tab, Key}, read}),
+ ?ets_insert(Store, {nodes, Node}),
+ case opt_lookup_in_client(V, Oid, read) of
+ C = #cyclic{} ->
+ mnesia:abort(C);
+ Val ->
+ Val
+ end;
+rlock_get_reply(Node, Store, Oid, granted) ->
+ {Tab, Key} = Oid,
+ ?ets_insert(Store, {{locks, Tab, Key}, read}),
+ ?ets_insert(Store, {nodes, Node}),
+ return_granted_or_nodes(Oid, [Node]);
+rlock_get_reply(Node, Store, Tab, {granted, V, RealKeys}) ->
+ %% Kept for backwards compatibility, keep until no old nodes
+ %% are available
+ L = fun(K) -> ?ets_insert(Store, {{locks, Tab, K}, read}) end,
+ lists:foreach(L, RealKeys),
+ ?ets_insert(Store, {nodes, Node}),
+ V;
+rlock_get_reply(_Node, _Store, _Oid, {not_granted, Reason}) ->
+ exit({aborted, Reason});
+
+rlock_get_reply(_Node, Store, Oid, {switch, N2, Req}) ->
+ ?ets_insert(Store, {nodes, N2}),
+ {?MODULE, N2} ! Req,
+ rlock_get_reply(N2, Store, Oid, l_req_rec(N2, Store)).
+
+rlock_table(Tid, Store, Tab) ->
+ rlock(Tid, Store, {Tab, ?ALL}).
+
+ixrlock(Tid, Store, Tab, IxKey, Pos) ->
+ case val({Tab, where_to_read}) of
+ nowhere ->
+ mnesia:abort({no_exists, Tab});
+ Node ->
+ %%% Old code
+ %% R = l_request(Node, {ix_read, Tid, Tab, IxKey, Pos}, Store),
+ %% rlock_get_reply(Node, Store, Tab, R)
+
+ case need_lock(Store, Tab, ?ALL, read) of
+ no when Node =:= node() ->
+ ix_read_res(Tab,IxKey,Pos);
+ _ -> %% yes or need to get the result from other node
+ R = l_request(Node, {ix_read, Tid, Tab, IxKey, Pos}, Store),
+ rlock_get_reply(Node, Store, Tab, R)
+ end
+ end.
+
+%% Grabs the locks or exits
+global_lock(Tid, Store, Item, write, Ns) ->
+ Oid = {?GLOBAL, Item},
+ Op = {self(), {write, Tid, Oid}},
+ get_wlocks_on_nodes(Ns, Ns, Store, Op, Oid);
+global_lock(Tid, Store, Item, read, Ns) ->
+ Oid = {?GLOBAL, Item},
+ send_requests(Ns, {read, Tid, Oid}),
+ rec_requests(Ns, Oid, Store),
+ Ns.
+
+send_requests([Node | Nodes], X) ->
+ {?MODULE, Node} ! {self(), X},
+ send_requests(Nodes, X);
+send_requests([], _X) ->
+ ok.
+
+rec_requests([Node | Nodes], Oid, Store) ->
+ Res = l_req_rec(Node, Store),
+ case catch rlock_get_reply(Node, Store, Oid, Res) of
+ {'EXIT', Reason} ->
+ flush_remaining(Nodes, Node, Reason);
+ _ ->
+ rec_requests(Nodes, Oid, Store)
+ end;
+rec_requests([], _Oid, _Store) ->
+ ok.
+
+get_held_locks() ->
+ ?MODULE ! {get_table, self(), mnesia_held_locks},
+ receive {mnesia_held_locks, Locks} -> Locks end.
+
+get_lock_queue() ->
+ ?MODULE ! {get_table, self(), mnesia_lock_queue},
+ Q = receive {mnesia_lock_queue, Locks} -> Locks end,
+ [{Oid, Op, Pid, Tid, WFT} || {queue, Oid, Tid, Op, Pid, WFT} <- Q].
+
+do_stop() ->
+ exit(shutdown).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% System upgrade
+
+system_continue(_Parent, _Debug, State) ->
+ loop(State).
+
+system_terminate(_Reason, _Parent, _Debug, _State) ->
+ do_stop().
+
+system_code_change(State, _Module, _OldVsn, _Extra) ->
+ {ok, State}.
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% AXD301 patch sort pids according to R9B sort order
+%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+%% Om R9B == true, g�rs j�mf�relsen som i R9B plain.
+%% Om R9B == false, g�rs j�mf�relsen som i alla andra releaser.
+%% cmp_tid(T1, T2) returnerar -1 om T1 < T2, 0 om T1 = T2 och 1 om T1 > T2.
+
+-define(VERSION_MAGIC, 131).
+-define(ATOM_EXT, 100).
+-define(PID_EXT, 103).
+
+-record(pid_info, {serial, number, nodename, creation}).
+
+cmp_tid(R9B,
+ #tid{} = T,
+ #tid{} = T) when R9B == true; R9B == false ->
+ 0;
+cmp_tid(R9B,
+ #tid{counter = C, pid = Pid1},
+ #tid{counter = C, pid = Pid2}) when R9B == true; R9B == false ->
+ cmp_pid_info(R9B, pid_to_pid_info(Pid1), pid_to_pid_info(Pid2));
+cmp_tid(R9B,
+ #tid{counter = C1},
+ #tid{counter = C2}) when R9B == true; R9B == false ->
+ cmp(C1, C2).
+
+cmp_pid_info(_, #pid_info{} = PI, #pid_info{} = PI) ->
+ 0;
+cmp_pid_info(false,
+ #pid_info{serial = S, number = N, nodename = NN, creation = C1},
+ #pid_info{serial = S, number = N, nodename = NN, creation = C2}) ->
+ cmp(C1, C2);
+cmp_pid_info(false,
+ #pid_info{serial = S, number = N, nodename = NN1},
+ #pid_info{serial = S, number = N, nodename = NN2}) ->
+ cmp(NN1, NN2);
+cmp_pid_info(false,
+ #pid_info{serial = S, number = N1},
+ #pid_info{serial = S, number = N2}) ->
+ cmp(N1, N2);
+cmp_pid_info(false, #pid_info{serial = S1}, #pid_info{serial = S2}) ->
+ cmp(S1, S2);
+cmp_pid_info(true,
+ #pid_info{nodename = NN, creation = C, serial = S, number = N1},
+ #pid_info{nodename = NN, creation = C, serial = S, number = N2}) ->
+ cmp(N1, N2);
+cmp_pid_info(true,
+ #pid_info{nodename = NN, creation = C, serial = S1},
+ #pid_info{nodename = NN, creation = C, serial = S2}) ->
+ cmp(S1, S2);
+cmp_pid_info(true,
+ #pid_info{nodename = NN, creation = C1},
+ #pid_info{nodename = NN, creation = C2}) ->
+ cmp(C1, C2);
+cmp_pid_info(true, #pid_info{nodename = NN1}, #pid_info{nodename = NN2}) ->
+ cmp(NN1, NN2).
+
+cmp(X, X) -> 0;
+cmp(X1, X2) when X1 < X2 -> -1;
+cmp(_X1, _X2) -> 1.
+
+pid_to_pid_info(Pid) when is_pid(Pid) ->
+ [?VERSION_MAGIC, ?PID_EXT, ?ATOM_EXT, NNL1, NNL0 | Rest]
+ = binary_to_list(term_to_binary(Pid)),
+ [N3, N2, N1, N0, S3, S2, S1, S0, Creation] = drop(bytes2int(NNL1, NNL0),
+ Rest),
+ #pid_info{serial = bytes2int(S3, S2, S1, S0),
+ number = bytes2int(N3, N2, N1, N0),
+ nodename = node(Pid),
+ creation = Creation}.
+
+drop(0, L) -> L;
+drop(N, [_|L]) when is_integer(N), N > 0 -> drop(N-1, L);
+drop(N, []) when is_integer(N), N > 0 -> [].
+
+bytes2int(N1, N0) when 0 =< N1, N1 =< 255,
+ 0 =< N0, N0 =< 255 ->
+ (N1 bsl 8) bor N0.
+bytes2int(N3, N2, N1, N0) when 0 =< N3, N3 =< 255,
+ 0 =< N2, N2 =< 255,
+ 0 =< N1, N1 =< 255,
+ 0 =< N0, N0 =< 255 ->
+ (N3 bsl 24) bor (N2 bsl 16) bor (N1 bsl 8) bor N0.
+
diff --git a/lib/mnesia/src/mnesia_log.erl b/lib/mnesia/src/mnesia_log.erl
new file mode 100644
index 0000000000..00ec4740ee
--- /dev/null
+++ b/lib/mnesia/src/mnesia_log.erl
@@ -0,0 +1,1025 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 1996-2009. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%
+
+%%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%
+%% This module administers three kinds of log files:
+%%
+%% 1 The transaction log
+%% mnesia_tm appends to the log (via mnesia_log) at the
+%% end of each transaction (or dirty write) and
+%% mnesia_dumper reads the log and performs the ops in
+%% the dat files. The dump_log is done at startup and
+%% at intervals controlled by the user.
+%%
+%% 2 The mnesia_down log
+%% mnesia_tm appends to the log (via mnesia_log) when it
+%% realizes that mnesia goes up or down on another node.
+%% mnesia_init reads the log (via mnesia_log) at startup.
+%%
+%% 3 The backup log
+%% mnesia_schema produces one tiny log when the schema is
+%% initially created. mnesia_schema also reads the log
+%% when the user wants tables (possibly incl the schema)
+%% to be restored. mnesia_log appends to the log when the
+%% user wants to produce a real backup.
+%%
+%% The actual access to the backup media is performed via the
+%% mnesia_backup module for both read and write. mnesia_backup
+%% uses the disk_log (*), BUT the user may write an own module
+%% with the same interface as mnesia_backup and configure
+%% Mnesia so the alternate module performs the actual accesses
+%% to the backup media. This means that the user may put the
+%% backup on medias that Mnesia does not know about possibly on
+%% hosts where Erlang is not running.
+%%
+%% All these logs have to some extent a common structure.
+%% They are all using the disk_log module (*) for the basic
+%% file structure. The disk_log has a repair feature that
+%% can be used to skip erroneous log records if one comes to
+%% the conclusion that it is more important to reuse some
+%% of the log records than the risque of obtaining inconsistent
+%% data. If the data becomes inconsistent it is solely up to the
+%% application to make it consistent again. The automatic
+%% reparation of the disk_log is very powerful, but use it
+%% with extreme care.
+%%
+%% First in all Mnesia's log file is a mnesia log header.
+%% It contains a list with a log_header record as single
+%% element. The structure of the log_header may never be
+%% changed since it may be written to very old backup files.
+%% By holding this record definition stable we can be
+%% able to comprahend backups from timepoint 0. It also
+%% allows us to use the backup format as an interchange
+%% format between Mnesia releases.
+%%
+%% An op-list is a list of tuples with arity 3. Each tuple
+%% has this structure: {Oid, Recs, Op} where Oid is the tuple
+%% {Tab, Key}, Recs is a (possibly empty) list of records and
+%% Op is an atom.
+%%
+%% The log file structure for the transaction log is as follows.
+%%
+%% After the mnesia log section follows an extended record section
+%% containing op-lists. There are several values that Op may
+%% have, such as write, delete, update_counter, delete_object,
+%% and replace. There is no special end of section marker.
+%%
+%% +-----------------+
+%% | mnesia log head |
+%% +-----------------+
+%% | extended record |
+%% | section |
+%% +-----------------+
+%%
+%% The log file structure for the mnesia_down log is as follows.
+%%
+%% After the mnesia log section follows a mnesia_down section
+%% containg lists with yoyo records as single element.
+%%
+%% +-----------------+
+%% | mnesia log head |
+%% +-----------------+
+%% | mnesia_down |
+%% | section |
+%% +-----------------+
+%%
+%% The log file structure for the backup log is as follows.
+%%
+%% After the mnesia log section follows a schema section
+%% containing record lists. A record list is a list of tuples
+%% where {schema, Tab} is interpreted as a delete_table(Tab) and
+%% {schema, Tab, CreateList} are interpreted as create_table.
+%%
+%% The record section also contains record lists. In this section
+%% {Tab, Key} is interpreted as delete({Tab, Key}) and other tuples
+%% as write(Tuple). There is no special end of section marker.
+%%
+%% +-----------------+
+%% | mnesia log head |
+%% +-----------------+
+%% | schema section |
+%% +-----------------+
+%% | record section |
+%% +-----------------+
+%%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+-module(mnesia_log).
+
+-export([
+ append/2,
+ backup/1,
+ backup/2,
+ backup_checkpoint/2,
+ backup_checkpoint/3,
+ backup_log_header/0,
+ backup_master/2,
+ chunk_decision_log/1,
+ chunk_decision_tab/1,
+ chunk_log/1,
+ chunk_log/2,
+ close_decision_log/0,
+ close_decision_tab/0,
+ close_log/1,
+ unsafe_close_log/1,
+ confirm_log_dump/1,
+ confirm_decision_log_dump/0,
+ previous_log_file/0,
+ previous_decision_log_file/0,
+ latest_log_file/0,
+ decision_log_version/0,
+ decision_log_file/0,
+ decision_tab_file/0,
+ decision_tab_version/0,
+ dcl_version/0,
+ dcd_version/0,
+ ets2dcd/1,
+ ets2dcd/2,
+ dcd2ets/1,
+ dcd2ets/2,
+ init/0,
+ init_log_dump/0,
+ log/1,
+ slog/1,
+ log_decision/1,
+ log_files/0,
+ open_decision_log/0,
+ trans_log_header/0,
+ open_decision_tab/0,
+ dcl_log_header/0,
+ dcd_log_header/0,
+ open_log/4,
+ open_log/6,
+ prepare_decision_log_dump/0,
+ prepare_log_dump/1,
+ save_decision_tab/1,
+ purge_all_logs/0,
+ purge_some_logs/0,
+ stop/0,
+ tab_copier/3,
+ version/0,
+ view/0,
+ view/1,
+ write_trans_log_header/0
+ ]).
+
+
+-include("mnesia.hrl").
+-import(mnesia_lib, [val/1, dir/1]).
+-import(mnesia_lib, [exists/1, fatal/2, error/2, dbg_out/2]).
+
+trans_log_header() -> log_header(trans_log, version()).
+backup_log_header() -> log_header(backup_log, "1.2").
+decision_log_header() -> log_header(decision_log, decision_log_version()).
+decision_tab_header() -> log_header(decision_tab, decision_tab_version()).
+dcl_log_header() -> log_header(dcl_log, dcl_version()).
+dcd_log_header() -> log_header(dcd_log, dcd_version()).
+
+log_header(Kind, Version) ->
+ #log_header{log_version=Version,
+ log_kind=Kind,
+ mnesia_version=mnesia:system_info(version),
+ node=node(),
+ now=now()}.
+
+version() -> "4.3".
+
+decision_log_version() -> "3.0".
+
+decision_tab_version() -> "1.0".
+
+dcl_version() -> "1.0".
+dcd_version() -> "1.0".
+
+append(Log, Bin) when is_binary(Bin) ->
+ disk_log:balog(Log, Bin);
+append(Log, Term) ->
+ disk_log:alog(Log, Term).
+
+%% Synced append
+sappend(Log, Bin) when is_binary(Bin) ->
+ ok = disk_log:blog(Log, Bin);
+sappend(Log, Term) ->
+ ok = disk_log:log(Log, Term).
+
+%% Write commit records to the latest_log
+log(C) when C#commit.disc_copies == [],
+ C#commit.disc_only_copies == [],
+ C#commit.schema_ops == [] ->
+ ignore;
+log(C) ->
+ case mnesia_monitor:use_dir() of
+ true ->
+ if
+ is_record(C, commit) ->
+ C2 = C#commit{ram_copies = [], snmp = []},
+ append(latest_log, C2);
+ true ->
+ %% Either a commit record as binary
+ %% or some decision related info
+ append(latest_log, C)
+ end,
+ mnesia_dumper:incr_log_writes();
+ false ->
+ ignore
+ end.
+
+%% Synced
+
+slog(C) when C#commit.disc_copies == [],
+ C#commit.disc_only_copies == [],
+ C#commit.schema_ops == [] ->
+ ignore;
+slog(C) ->
+ case mnesia_monitor:use_dir() of
+ true ->
+ if
+ is_record(C, commit) ->
+ C2 = C#commit{ram_copies = [], snmp = []},
+ sappend(latest_log, C2);
+ true ->
+ %% Either a commit record as binary
+ %% or some decision related info
+ sappend(latest_log, C)
+ end,
+ mnesia_dumper:incr_log_writes();
+ false ->
+ ignore
+ end.
+
+
+%% Stuff related to the file LOG
+
+%% Returns a list of logfiles. The oldest is first.
+log_files() -> [previous_log_file(),
+ latest_log_file(),
+ decision_tab_file()
+ ].
+
+latest_log_file() -> dir(latest_log_name()).
+
+previous_log_file() -> dir("PREVIOUS.LOG").
+
+decision_log_file() -> dir(decision_log_name()).
+
+decision_tab_file() -> dir(decision_tab_name()).
+
+previous_decision_log_file() -> dir("PDECISION.LOG").
+
+latest_log_name() -> "LATEST.LOG".
+
+decision_log_name() -> "DECISION.LOG".
+
+decision_tab_name() -> "DECISION_TAB.LOG".
+
+init() ->
+ case mnesia_monitor:use_dir() of
+ true ->
+ Prev = previous_log_file(),
+ verify_no_exists(Prev),
+
+ Latest = latest_log_file(),
+ verify_no_exists(Latest),
+
+ Header = trans_log_header(),
+ open_log(latest_log, Header, Latest);
+ false ->
+ ok
+ end.
+
+verify_no_exists(Fname) ->
+ case exists(Fname) of
+ false ->
+ ok;
+ true ->
+ fatal("Log file exists: ~p~n", [Fname])
+ end.
+
+open_log(Name, Header, Fname) ->
+ Exists = exists(Fname),
+ open_log(Name, Header, Fname, Exists).
+
+open_log(Name, Header, Fname, Exists) ->
+ Repair = mnesia_monitor:get_env(auto_repair),
+ open_log(Name, Header, Fname, Exists, Repair).
+
+open_log(Name, Header, Fname, Exists, Repair) ->
+ case Name == previous_log of
+ true ->
+ open_log(Name, Header, Fname, Exists, Repair, read_only);
+ false ->
+ open_log(Name, Header, Fname, Exists, Repair, read_write)
+ end.
+
+open_log(Name, Header, Fname, Exists, Repair, Mode) ->
+ Args = [{file, Fname}, {name, Name}, {repair, Repair}, {mode, Mode}],
+%% io:format("~p:open_log: ~p ~p~n", [?MODULE, Name, Fname]),
+ case mnesia_monitor:open_log(Args) of
+ {ok, Log} when Exists == true ->
+ Log;
+ {ok, Log} ->
+ write_header(Log, Header),
+ Log;
+ {repaired, Log, _, {badbytes, 0}} when Exists == true ->
+ Log;
+ {repaired, Log, _, {badbytes, 0}} ->
+ write_header(Log, Header),
+ Log;
+ {repaired, Log, _Recover, BadBytes} ->
+ mnesia_lib:important("Data may be missing, log ~p repaired: Lost ~p bytes~n",
+ [Fname, BadBytes]),
+ Log;
+ {error, Reason} when Repair == true ->
+ file:delete(Fname),
+ mnesia_lib:important("Data may be missing, Corrupt logfile deleted: ~p, ~p ~n",
+ [Fname, Reason]),
+ %% Create a new
+ open_log(Name, Header, Fname, false, false, read_write);
+ {error, Reason} ->
+ fatal("Cannot open log file ~p: ~p~n", [Fname, Reason])
+ end.
+
+write_header(Log, Header) ->
+ append(Log, Header).
+
+write_trans_log_header() ->
+ write_header(latest_log, trans_log_header()).
+
+stop() ->
+ case mnesia_monitor:use_dir() of
+ true ->
+ close_log(latest_log);
+ false ->
+ ok
+ end.
+
+close_log(Log) ->
+%% io:format("mnesia_log:close_log ~p~n", [Log]),
+%% io:format("mnesia_log:close_log ~p~n", [Log]),
+ case disk_log:sync(Log) of
+ ok -> ok;
+ {error, {read_only_mode, Log}} ->
+ ok;
+ {error, Reason} ->
+ mnesia_lib:important("Failed syncing ~p to_disk reason ~p ~n",
+ [Log, Reason])
+ end,
+ mnesia_monitor:close_log(Log).
+
+unsafe_close_log(Log) ->
+%% io:format("mnesia_log:close_log ~p~n", [Log]),
+ mnesia_monitor:unsafe_close_log(Log).
+
+
+purge_some_logs() ->
+ mnesia_monitor:unsafe_close_log(latest_log),
+ file:delete(latest_log_file()),
+ file:delete(decision_tab_file()).
+
+purge_all_logs() ->
+ file:delete(previous_log_file()),
+ file:delete(latest_log_file()),
+ file:delete(decision_tab_file()).
+
+%% Prepare dump by renaming the open logfile if possible
+%% Returns a tuple on the following format: {Res, OpenLog}
+%% where OpenLog is the file descriptor to log file, ready for append
+%% and Res is one of the following: already_dumped, needs_dump or {error, Reason}
+prepare_log_dump(InitBy) ->
+ Diff = mnesia_dumper:get_log_writes() -
+ mnesia_lib:read_counter(trans_log_writes_prev),
+ if
+ Diff == 0, InitBy /= startup ->
+ already_dumped;
+ true ->
+ case mnesia_monitor:use_dir() of
+ true ->
+ Prev = previous_log_file(),
+ prepare_prev(Diff, InitBy, Prev, exists(Prev));
+ false ->
+ already_dumped
+ end
+ end.
+
+prepare_prev(Diff, _, _, true) ->
+ {needs_dump, Diff};
+prepare_prev(Diff, startup, Prev, false) ->
+ Latest = latest_log_file(),
+ case exists(Latest) of
+ true ->
+ case file:rename(Latest, Prev) of
+ ok ->
+ {needs_dump, Diff};
+ {error, Reason} ->
+ {error, Reason}
+ end;
+ false ->
+ already_dumped
+ end;
+prepare_prev(Diff, _InitBy, Prev, false) ->
+ Head = trans_log_header(),
+ case mnesia_monitor:reopen_log(latest_log, Prev, Head) of
+ ok ->
+ {needs_dump, Diff};
+ {error, Reason} ->
+ Latest = latest_log_file(),
+ {error, {"Cannot rename log file",
+ [Latest, Prev, Reason]}}
+ end.
+
+%% Init dump and return PrevLogFileDesc or exit.
+init_log_dump() ->
+ Fname = previous_log_file(),
+ open_log(previous_log, trans_log_header(), Fname),
+ start.
+
+
+chunk_log(Cont) ->
+ chunk_log(previous_log, Cont).
+
+chunk_log(_Log, eof) ->
+ eof;
+chunk_log(Log, Cont) ->
+ case catch disk_log:chunk(Log, Cont) of
+ {error, Reason} ->
+ fatal("Possibly truncated ~p file: ~p~n",
+ [Log, Reason]);
+ {C2, Chunk, _BadBytes} ->
+ %% Read_only case, should we warn about the bad log file?
+ %% BUGBUG Should we crash if Repair == false ??
+ %% We got to check this !!
+ mnesia_lib:important("~p repaired, lost ~p bad bytes~n", [Log, _BadBytes]),
+ {C2, Chunk};
+ Other ->
+ Other
+ end.
+
+%% Confirms the dump by closing prev log and delete the file
+confirm_log_dump(Updates) ->
+ case mnesia_monitor:close_log(previous_log) of
+ ok ->
+ file:delete(previous_log_file()),
+ mnesia_lib:incr_counter(trans_log_writes_prev, Updates),
+ dumped;
+ {error, Reason} ->
+ {error, Reason}
+ end.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Decision log
+
+open_decision_log() ->
+ Latest = decision_log_file(),
+ open_log(decision_log, decision_log_header(), Latest),
+ start.
+
+prepare_decision_log_dump() ->
+ Prev = previous_decision_log_file(),
+ prepare_decision_log_dump(exists(Prev), Prev).
+
+prepare_decision_log_dump(false, Prev) ->
+ Head = decision_log_header(),
+ case mnesia_monitor:reopen_log(decision_log, Prev, Head) of
+ ok ->
+ prepare_decision_log_dump(true, Prev);
+ {error, Reason} ->
+ fatal("Cannot rename decision log file ~p -> ~p: ~p~n",
+ [decision_log_file(), Prev, Reason])
+ end;
+prepare_decision_log_dump(true, Prev) ->
+ open_log(previous_decision_log, decision_log_header(), Prev),
+ start.
+
+chunk_decision_log(Cont) ->
+ %% dbg_out("chunk log ~p~n", [Cont]),
+ chunk_log(previous_decision_log, Cont).
+
+%% Confirms dump of the decision log
+confirm_decision_log_dump() ->
+ case mnesia_monitor:close_log(previous_decision_log) of
+ ok ->
+ file:delete(previous_decision_log_file());
+ {error, Reason} ->
+ fatal("Cannot confirm decision log dump: ~p~n",
+ [Reason])
+ end.
+
+save_decision_tab(Decisions) ->
+ Log = decision_tab,
+ Tmp = mnesia_lib:dir("DECISION_TAB.TMP"),
+ file:delete(Tmp),
+ open_log(Log, decision_tab_header(), Tmp),
+ append(Log, Decisions),
+ close_log(Log),
+ TabFile = decision_tab_file(),
+ ok = file:rename(Tmp, TabFile).
+
+open_decision_tab() ->
+ TabFile = decision_tab_file(),
+ open_log(decision_tab, decision_tab_header(), TabFile),
+ start.
+
+close_decision_tab() ->
+ close_log(decision_tab).
+
+chunk_decision_tab(Cont) ->
+ %% dbg_out("chunk tab ~p~n", [Cont]),
+ chunk_log(decision_tab, Cont).
+
+close_decision_log() ->
+ close_log(decision_log).
+
+log_decision(Decision) ->
+ append(decision_log, Decision).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Debug functions
+
+view() ->
+ lists:foreach(fun(F) -> view(F) end, log_files()).
+
+view(File) ->
+ mnesia_lib:show("***** ~p ***** ~n", [File]),
+ case exists(File) of
+ false ->
+ nolog;
+ true ->
+ N = view_only,
+ Args = [{file, File}, {name, N}, {mode, read_only}],
+ case disk_log:open(Args) of
+ {ok, N} ->
+ view_file(start, N);
+ {repaired, _, _, _} ->
+ view_file(start, N);
+ {error, Reason} ->
+ error("Cannot open log ~p: ~p~n", [File, Reason])
+ end
+ end.
+
+view_file(C, Log) ->
+ case disk_log:chunk(Log, C) of
+ {error, Reason} ->
+ error("** Possibly truncated FILE ~p~n", [Reason]),
+ error;
+ eof ->
+ disk_log:close(Log),
+ eof;
+ {C2, Terms, _BadBytes} ->
+ dbg_out("Lost ~p bytes in ~p ~n", [_BadBytes, Log]),
+ lists:foreach(fun(X) -> mnesia_lib:show("~p~n", [X]) end,
+ Terms),
+ view_file(C2, Log);
+ {C2, Terms} ->
+ lists:foreach(fun(X) -> mnesia_lib:show("~p~n", [X]) end,
+ Terms),
+ view_file(C2, Log)
+ end.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Backup
+
+-record(backup_args, {name, module, opaque, scope, prev_name, tables, cookie}).
+
+backup(Opaque) ->
+ backup(Opaque, []).
+
+backup(Opaque, Mod) when is_atom(Mod) ->
+ backup(Opaque, [{module, Mod}]);
+backup(Opaque, Args) when is_list(Args) ->
+ %% Backup all tables with max redundancy
+ CpArgs = [{ram_overrides_dump, false}, {max, val({schema, tables})}],
+ case mnesia_checkpoint:activate(CpArgs) of
+ {ok, Name, _Nodes} ->
+ Res = backup_checkpoint(Name, Opaque, Args),
+ mnesia_checkpoint:deactivate(Name),
+ Res;
+ {error, Reason} ->
+ {error, Reason}
+ end.
+
+backup_checkpoint(Name, Opaque) ->
+ backup_checkpoint(Name, Opaque, []).
+
+backup_checkpoint(Name, Opaque, Mod) when is_atom(Mod) ->
+ backup_checkpoint(Name, Opaque, [{module, Mod}]);
+backup_checkpoint(Name, Opaque, Args) when is_list(Args) ->
+ DefaultMod = mnesia_monitor:get_env(backup_module),
+ B = #backup_args{name = Name,
+ module = DefaultMod,
+ opaque = Opaque,
+ scope = global,
+ tables = all,
+ prev_name = Name},
+ case check_backup_args(Args, B) of
+ {ok, B2} ->
+ %% Decentralized backup
+ %% Incremental
+
+ Self = self(),
+ Pid = spawn_link(?MODULE, backup_master, [Self, B2]),
+ receive
+ {Pid, Self, Res} -> Res
+ end;
+ {error, Reason} ->
+ {error, Reason}
+ end.
+
+check_backup_args([Arg | Tail], B) ->
+ case catch check_backup_arg_type(Arg, B) of
+ {'EXIT', _Reason} ->
+ {error, {badarg, Arg}};
+ B2 ->
+ check_backup_args(Tail, B2)
+ end;
+
+check_backup_args([], B) ->
+ {ok, B}.
+
+check_backup_arg_type(Arg, B) ->
+ case Arg of
+ {scope, global} ->
+ B#backup_args{scope = global};
+ {scope, local} ->
+ B#backup_args{scope = local};
+ {module, Mod} ->
+ Mod2 = mnesia_monitor:do_check_type(backup_module, Mod),
+ B#backup_args{module = Mod2};
+ {incremental, Name} ->
+ B#backup_args{prev_name = Name};
+ {tables, Tabs} when is_list(Tabs) ->
+ B#backup_args{tables = Tabs}
+ end.
+
+backup_master(ClientPid, B) ->
+ process_flag(trap_exit, true),
+ case catch do_backup_master(B) of
+ {'EXIT', Reason} ->
+ ClientPid ! {self(), ClientPid, {error, {'EXIT', Reason}}};
+ Res ->
+ ClientPid ! {self(), ClientPid, Res}
+ end,
+ unlink(ClientPid),
+ exit(normal).
+
+do_backup_master(B) ->
+ Name = B#backup_args.name,
+ B2 = safe_apply(B, open_write, [B#backup_args.opaque]),
+ B3 = safe_write(B2, [backup_log_header()]),
+ case mnesia_checkpoint:tables_and_cookie(Name) of
+ {ok, AllTabs, Cookie} ->
+ Tabs = select_tables(AllTabs, B3),
+ B4 = B3#backup_args{cookie = Cookie},
+ %% Always put schema first in backup file
+ B5 = backup_schema(B4, Tabs),
+ B6 = lists:foldl(fun backup_tab/2, B5, Tabs -- [schema]),
+ safe_apply(B6, commit_write, [B6#backup_args.opaque]),
+ ok;
+ {error, Reason} ->
+ abort_write(B3, {?MODULE, backup_master}, [B], {error, Reason})
+ end.
+
+select_tables(AllTabs, B) ->
+ Tabs =
+ case B#backup_args.tables of
+ all -> AllTabs;
+ SomeTabs when is_list(SomeTabs) -> SomeTabs
+ end,
+ case B#backup_args.scope of
+ global ->
+ Tabs;
+ local ->
+ Name = B#backup_args.name,
+ [T || T <- Tabs, mnesia_checkpoint:most_local_node(Name, T) == {ok, node()}]
+ end.
+
+safe_write(B, []) ->
+ B;
+safe_write(B, Recs) ->
+ safe_apply(B, write, [B#backup_args.opaque, Recs]).
+
+backup_schema(B, Tabs) ->
+ case lists:member(schema, Tabs) of
+ true ->
+ backup_tab(schema, B);
+ false ->
+ Defs = [{schema, T, mnesia_schema:get_create_list(T)} || T <- Tabs],
+ safe_write(B, Defs)
+ end.
+
+safe_apply(B, write, [_, Items]) when Items == [] ->
+ B;
+safe_apply(B, What, Args) ->
+ Abort = fun(R) -> abort_write(B, What, Args, R) end,
+ receive
+ {'EXIT', Pid, R} -> Abort({'EXIT', Pid, R})
+ after 0 ->
+ Mod = B#backup_args.module,
+ case catch apply(Mod, What, Args) of
+ {ok, Opaque} -> B#backup_args{opaque=Opaque};
+ {error, R} -> Abort(R);
+ R -> Abort(R)
+ end
+ end.
+
+abort_write(B, What, Args, Reason) ->
+ Mod = B#backup_args.module,
+ Opaque = B#backup_args.opaque,
+ dbg_out("Failed to perform backup. M=~p:F=~p:A=~p -> ~p~n",
+ [Mod, What, Args, Reason]),
+ case catch apply(Mod, abort_write, [Opaque]) of
+ {ok, _Res} ->
+ throw({error, Reason});
+ Other ->
+ error("Failed to abort backup. ~p:~p~p -> ~p~n",
+ [Mod, abort_write, [Opaque], Other]),
+ throw({error, Reason})
+ end.
+
+backup_tab(Tab, B) ->
+ Name = B#backup_args.name,
+ case mnesia_checkpoint:most_local_node(Name, Tab) of
+ {ok, Node} when Node == node() ->
+ tab_copier(self(), B, Tab);
+ {ok, Node} ->
+ RemoteB = B,
+ Pid = spawn_link(Node, ?MODULE, tab_copier, [self(), RemoteB, Tab]),
+ RecName = val({Tab, record_name}),
+ tab_receiver(Pid, B, Tab, RecName, 0);
+ {error, Reason} ->
+ abort_write(B, {?MODULE, backup_tab}, [Tab, B], {error, Reason})
+ end.
+
+tab_copier(Pid, B, Tab) when is_record(B, backup_args) ->
+ %% Intentional crash at exit
+ Name = B#backup_args.name,
+ PrevName = B#backup_args.prev_name,
+ {FirstName, FirstSource} = select_source(Tab, Name, PrevName),
+
+ ?eval_debug_fun({?MODULE, tab_copier, pre}, [{name, Name}, {tab, Tab}]),
+ Res = handle_more(Pid, B, Tab, FirstName, FirstSource, Name),
+ ?eval_debug_fun({?MODULE, tab_copier, post}, [{name, Name}, {tab, Tab}]),
+
+ handle_last(Pid, Res).
+
+select_source(Tab, Name, PrevName) ->
+ if
+ Tab == schema ->
+ %% Always full backup of schema
+ {Name, table};
+ Name == PrevName ->
+ %% Full backup
+ {Name, table};
+ true ->
+ %% Wants incremental backup
+ case mnesia_checkpoint:most_local_node(PrevName, Tab) of
+ {ok, Node} when Node == node() ->
+ %% Accept incremental backup
+ {PrevName, retainer};
+ _ ->
+ %% Do a full backup anyway
+ dbg_out("Incremental backup escalated to full backup: ~p~n", [Tab]),
+ {Name, table}
+ end
+ end.
+
+handle_more(Pid, B, Tab, FirstName, FirstSource, Name) ->
+ Acc = {0, B},
+ case {mnesia_checkpoint:really_retain(Name, Tab),
+ mnesia_checkpoint:really_retain(FirstName, Tab)} of
+ {true, true} ->
+ Acc2 = iterate(B, FirstName, Tab, Pid, FirstSource, latest, first, Acc),
+ iterate(B, Name, Tab, Pid, retainer, checkpoint, last, Acc2);
+ {false, false}->
+ %% Put the dumped file in the backup
+ %% instead of the ram table. Does
+ %% only apply to ram_copies.
+ iterate(B, Name, Tab, Pid, retainer, checkpoint, last, Acc);
+ Bad ->
+ Reason = {"Checkpoints for incremental backup must have same "
+ "setting of ram_overrides_dump",
+ Tab, Name, FirstName, Bad},
+ abort_write(B, {?MODULE, backup_tab}, [Tab, B], {error, Reason})
+ end.
+
+handle_last(Pid, {_Count, B}) when Pid == self() ->
+ B;
+handle_last(Pid, _Acc) ->
+ unlink(Pid),
+ Pid ! {self(), {last, {ok, dummy}}},
+ exit(normal).
+
+iterate(B, Name, Tab, Pid, Source, Age, Pass, Acc) ->
+ Fun =
+ if
+ Pid == self() ->
+ RecName = val({Tab, record_name}),
+ fun(Recs, A) -> copy_records(RecName, Tab, Recs, A) end;
+ true ->
+ fun(Recs, A) -> send_records(Pid, Tab, Recs, Pass, A) end
+ end,
+ case mnesia_checkpoint:iterate(Name, Tab, Fun, Acc, Source, Age) of
+ {ok, Acc2} ->
+ Acc2;
+ {error, Reason} ->
+ R = {error, {"Tab copier iteration failed", Reason}},
+ abort_write(B, {?MODULE, iterate}, [self(), B, Tab], R)
+ end.
+
+copy_records(_RecName, _Tab, [], Acc) ->
+ Acc;
+copy_records(RecName, Tab, Recs, {Count, B}) ->
+ Recs2 = rec_filter(B, Tab, RecName, Recs),
+ B2 = safe_write(B, Recs2),
+ {Count + 1, B2}.
+
+send_records(Pid, Tab, Recs, Pass, {Count, B}) ->
+ receive
+ {Pid, more, Count} ->
+ if
+ Pass == last, Recs == [] ->
+ {Count, B};
+ true ->
+ Next = Count + 1,
+ Pid ! {self(), {more, Next, Recs}},
+ {Next, B}
+ end;
+ Msg ->
+ exit({send_records_unexpected_msg, Tab, Msg})
+ end.
+
+tab_receiver(Pid, B, Tab, RecName, Slot) ->
+ Pid ! {self(), more, Slot},
+ receive
+ {Pid, {more, Next, Recs}} ->
+ Recs2 = rec_filter(B, Tab, RecName, Recs),
+ B2 = safe_write(B, Recs2),
+ tab_receiver(Pid, B2, Tab, RecName, Next);
+
+ {Pid, {last, {ok,_}}} ->
+ B;
+
+ {'EXIT', Pid, {error, R}} ->
+ Reason = {error, {"Tab copier crashed", R}},
+ abort_write(B, {?MODULE, remote_tab_sender}, [self(), B, Tab], Reason);
+ {'EXIT', Pid, R} ->
+ Reason = {error, {"Tab copier crashed", {'EXIT', R}}},
+ abort_write(B, {?MODULE, remote_tab_sender}, [self(), B, Tab], Reason);
+ Msg ->
+ R = {error, {"Tab receiver got unexpected msg", Msg}},
+ abort_write(B, {?MODULE, remote_tab_sender}, [self(), B, Tab], R)
+ end.
+
+rec_filter(B, schema, _RecName, Recs) ->
+ case catch mnesia_bup:refresh_cookie(Recs, B#backup_args.cookie) of
+ Recs2 when is_list(Recs2) ->
+ Recs2;
+ {error, _Reason} ->
+ %% No schema table cookie
+ Recs
+ end;
+rec_filter(_B, Tab, Tab, Recs) ->
+ Recs;
+rec_filter(_B, Tab, _RecName, Recs) ->
+ [setelement(1, Rec, Tab) || Rec <- Recs].
+
+ets2dcd(Tab) ->
+ ets2dcd(Tab, dcd).
+
+ets2dcd(Tab, Ftype) ->
+ Fname =
+ case Ftype of
+ dcd -> mnesia_lib:tab2dcd(Tab);
+ dmp -> mnesia_lib:tab2dmp(Tab)
+ end,
+ TmpF = mnesia_lib:tab2tmp(Tab),
+ file:delete(TmpF),
+ Log = open_log({Tab, ets2dcd}, dcd_log_header(), TmpF, false),
+ mnesia_lib:db_fixtable(ram_copies, Tab, true),
+ ok = ets2dcd(mnesia_lib:db_init_chunk(ram_copies, Tab, 1000), Tab, Log),
+ mnesia_lib:db_fixtable(ram_copies, Tab, false),
+ close_log(Log),
+ ok = file:rename(TmpF, Fname),
+ %% Remove old log data which is now in the new dcd.
+ %% No one else should be accessing this file!
+ file:delete(mnesia_lib:tab2dcl(Tab)),
+ ok.
+
+ets2dcd('$end_of_table', _Tab, _Log) ->
+ ok;
+ets2dcd({Recs, Cont}, Tab, Log) ->
+ ok = disk_log:alog_terms(Log, Recs),
+ ets2dcd(mnesia_lib:db_chunk(ram_copies, Cont), Tab, Log).
+
+dcd2ets(Tab) ->
+ dcd2ets(Tab, mnesia_monitor:get_env(auto_repair)).
+
+dcd2ets(Tab, Rep) ->
+ Dcd = mnesia_lib:tab2dcd(Tab),
+ case mnesia_lib:exists(Dcd) of
+ true ->
+ Log = open_log({Tab, dcd2ets}, dcd_log_header(), Dcd,
+ true, Rep, read_only),
+ Data = chunk_log(Log, start),
+ ok = insert_dcdchunk(Data, Log, Tab),
+ close_log(Log),
+ load_dcl(Tab, Rep);
+ false -> %% Handle old dets files, and conversion from disc_only to disc.
+ Fname = mnesia_lib:tab2dat(Tab),
+ Type = val({Tab, setorbag}),
+ case mnesia_lib:dets_to_ets(Tab, Tab, Fname, Type, Rep, yes) of
+ loaded ->
+ ets2dcd(Tab),
+ file:delete(Fname),
+ 0;
+ {error, Error} ->
+ erlang:error({"Failed to load table from disc", [Tab, Error]})
+ end
+ end.
+
+insert_dcdchunk({Cont, [LogH | Rest]}, Log, Tab)
+ when is_record(LogH, log_header),
+ LogH#log_header.log_kind == dcd_log,
+ LogH#log_header.log_version >= "1.0" ->
+ insert_dcdchunk({Cont, Rest}, Log, Tab);
+
+insert_dcdchunk({Cont, Recs}, Log, Tab) ->
+ true = ets:insert(Tab, Recs),
+ insert_dcdchunk(chunk_log(Log, Cont), Log, Tab);
+insert_dcdchunk(eof, _Log, _Tab) ->
+ ok.
+
+load_dcl(Tab, Rep) ->
+ FName = mnesia_lib:tab2dcl(Tab),
+ case mnesia_lib:exists(FName) of
+ true ->
+ Name = {load_dcl,Tab},
+ open_log(Name,
+ dcl_log_header(),
+ FName,
+ true,
+ Rep,
+ read_only),
+ FirstChunk = chunk_log(Name, start),
+ N = insert_logchunk(FirstChunk, Name, 0),
+ close_log(Name),
+ N;
+ false ->
+ 0
+ end.
+
+insert_logchunk({C2, Recs}, Tab, C) ->
+ N = add_recs(Recs, C),
+ insert_logchunk(chunk_log(Tab, C2), Tab, C+N);
+insert_logchunk(eof, _Tab, C) ->
+ C.
+
+add_recs([{{Tab, _Key}, Val, write} | Rest], N) ->
+ true = ets:insert(Tab, Val),
+ add_recs(Rest, N+1);
+add_recs([{{Tab, Key}, _Val, delete} | Rest], N) ->
+ true = ets:delete(Tab, Key),
+ add_recs(Rest, N+1);
+add_recs([{{Tab, _Key}, Val, delete_object} | Rest], N) ->
+ true = ets:match_delete(Tab, Val),
+ add_recs(Rest, N+1);
+add_recs([{{Tab, Key}, Val, update_counter} | Rest], N) ->
+ {RecName, Incr} = Val,
+ case catch ets:update_counter(Tab, Key, Incr) of
+ CounterVal when is_integer(CounterVal) ->
+ ok;
+ _ when Incr < 0 ->
+ Zero = {RecName, Key, 0},
+ true = ets:insert(Tab, Zero);
+ _ ->
+ Zero = {RecName, Key, Incr},
+ true = ets:insert(Tab, Zero)
+ end,
+ add_recs(Rest, N+1);
+add_recs([LogH|Rest], N)
+ when is_record(LogH, log_header),
+ LogH#log_header.log_kind == dcl_log,
+ LogH#log_header.log_version >= "1.0" ->
+ add_recs(Rest, N);
+add_recs([{{Tab, _Key}, _Val, clear_table} | Rest], N) ->
+ true = ets:match_delete(Tab, '_'),
+ add_recs(Rest, N+ets:info(Tab, size));
+add_recs([], N) ->
+ N.
diff --git a/lib/mnesia/src/mnesia_monitor.erl b/lib/mnesia/src/mnesia_monitor.erl
new file mode 100644
index 0000000000..05ae943e3b
--- /dev/null
+++ b/lib/mnesia/src/mnesia_monitor.erl
@@ -0,0 +1,823 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 1996-2009. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%
+
+%%
+-module(mnesia_monitor).
+
+-behaviour(gen_server).
+
+%% Public exports
+-export([
+ close_dets/1,
+ close_log/1,
+ detect_inconcistency/2,
+ get_env/1,
+ init/0,
+ mktab/2,
+ unsafe_mktab/2,
+ mnesia_down/2,
+ needs_protocol_conversion/1,
+ negotiate_protocol/1,
+ disconnect/1,
+ open_dets/2,
+ unsafe_open_dets/2,
+ open_log/1,
+ patch_env/2,
+ protocol_version/0,
+ reopen_log/3,
+ set_env/2,
+ start/0,
+ start_proc/4,
+ terminate_proc/3,
+ unsafe_close_dets/1,
+ unsafe_close_log/1,
+ use_dir/0,
+ do_check_type/2
+ ]).
+
+%% gen_server callbacks
+-export([
+ init/1,
+ handle_call/3,
+ handle_cast/2,
+ handle_info/2,
+ terminate/2,
+ code_change/3
+ ]).
+
+%% Internal exports
+-export([
+ call/1,
+ cast/1,
+ detect_partitioned_network/2,
+ has_remote_mnesia_down/1,
+ negotiate_protocol_impl/2
+ ]).
+
+-import(mnesia_lib, [dbg_out/2, verbose/2, error/2, fatal/2, set/2]).
+
+-include("mnesia.hrl").
+
+-record(state, {supervisor, pending_negotiators = [],
+ going_down = [], tm_started = false, early_connects = [],
+ connecting, mq = []}).
+
+-define(current_protocol_version, {7,6}).
+
+-define(previous_protocol_version, {7,5}).
+
+start() ->
+ gen_server:start_link({local, ?MODULE}, ?MODULE,
+ [self()], [{timeout, infinity}
+ %% ,{debug, [trace]}
+ ]).
+
+init() ->
+ call(init).
+
+mnesia_down(From, Node) ->
+ cast({mnesia_down, From, Node}).
+
+mktab(Tab, Args) ->
+ unsafe_call({mktab, Tab, Args}).
+unsafe_mktab(Tab, Args) ->
+ unsafe_call({unsafe_mktab, Tab, Args}).
+
+open_dets(Tab, Args) ->
+ unsafe_call({open_dets, Tab, Args}).
+unsafe_open_dets(Tab, Args) ->
+ unsafe_call({unsafe_open_dets, Tab, Args}).
+
+close_dets(Tab) ->
+ unsafe_call({close_dets, Tab}).
+
+unsafe_close_dets(Name) ->
+ unsafe_call({unsafe_close_dets, Name}).
+
+open_log(Args) ->
+ unsafe_call({open_log, Args}).
+
+reopen_log(Name, Fname, Head) ->
+ unsafe_call({reopen_log, Name, Fname, Head}).
+
+close_log(Name) ->
+ unsafe_call({close_log, Name}).
+
+unsafe_close_log(Name) ->
+ unsafe_call({unsafe_close_log, Name}).
+
+
+disconnect(Node) ->
+ cast({disconnect, Node}).
+
+%% Returns GoodNoodes
+%% Creates a link to each compatible monitor and
+%% protocol_version to agreed version upon success
+
+negotiate_protocol([]) -> [];
+negotiate_protocol(Nodes) ->
+ call({negotiate_protocol, Nodes}).
+
+negotiate_protocol_impl(Nodes, Requester) ->
+ Version = mnesia:system_info(version),
+ Protocols = acceptable_protocol_versions(),
+ MonitorPid = whereis(?MODULE),
+ Msg = {negotiate_protocol, MonitorPid, Version, Protocols},
+ {Replies, _BadNodes} = multicall(Nodes, Msg),
+ Res = check_protocol(Replies, Protocols),
+ ?MODULE ! {protocol_negotiated,Requester,Res},
+ unlink(whereis(?MODULE)),
+ ok.
+
+check_protocol([{Node, {accept, Mon, Version, Protocol}} | Tail], Protocols) ->
+ case lists:member(Protocol, Protocols) of
+ true ->
+ case Protocol == protocol_version() of
+ true ->
+ set({protocol, Node}, {Protocol, false});
+ false ->
+ set({protocol, Node}, {Protocol, true})
+ end,
+ [node(Mon) | check_protocol(Tail, Protocols)];
+ false ->
+ verbose("Failed to connect with ~p. ~p protocols rejected. "
+ "expected version = ~p, expected protocol = ~p~n",
+ [Node, Protocols, Version, Protocol]),
+ unlink(Mon), % Get rid of unneccessary link
+ check_protocol(Tail, Protocols)
+ end;
+check_protocol([{Node, {reject, _Mon, Version, Protocol}} | Tail], Protocols) ->
+ verbose("Failed to connect with ~p. ~p protocols rejected. "
+ "expected version = ~p, expected protocol = ~p~n",
+ [Node, Protocols, Version, Protocol]),
+ check_protocol(Tail, Protocols);
+check_protocol([{error, _Reason} | Tail], Protocols) ->
+ dbg_out("~p connect failed error: ~p~n", [?MODULE, _Reason]),
+ check_protocol(Tail, Protocols);
+check_protocol([{badrpc, _Reason} | Tail], Protocols) ->
+ dbg_out("~p connect failed badrpc: ~p~n", [?MODULE, _Reason]),
+ check_protocol(Tail, Protocols);
+check_protocol([], [Protocol | _Protocols]) ->
+ set(protocol_version, Protocol),
+ [].
+
+protocol_version() ->
+ case ?catch_val(protocol_version) of
+ {'EXIT', _} -> ?current_protocol_version;
+ Version -> Version
+ end.
+
+%% A sorted list of acceptable protocols the
+%% preferred protocols are first in the list
+acceptable_protocol_versions() ->
+ [protocol_version(), ?previous_protocol_version].
+
+needs_protocol_conversion(Node) ->
+ case {?catch_val({protocol, Node}), protocol_version()} of
+ {{'EXIT', _}, _} ->
+ false;
+ {{_, Bool}, ?current_protocol_version} ->
+ Bool;
+ {{_, Bool}, _} ->
+ not Bool
+ end.
+
+cast(Msg) ->
+ case whereis(?MODULE) of
+ undefined -> ignore;
+ Pid -> gen_server:cast(Pid, Msg)
+ end.
+
+unsafe_call(Msg) ->
+ case whereis(?MODULE) of
+ undefined -> {error, {node_not_running, node()}};
+ Pid -> gen_server:call(Pid, Msg, infinity)
+ end.
+
+call(Msg) ->
+ case whereis(?MODULE) of
+ undefined ->
+ {error, {node_not_running, node()}};
+ Pid ->
+ link(Pid),
+ Res = gen_server:call(Pid, Msg, infinity),
+ unlink(Pid),
+
+ %% We get an exit signal if server dies
+ receive
+ {'EXIT', Pid, _Reason} ->
+ {error, {node_not_running, node()}}
+ after 0 ->
+ Res
+ end
+ end.
+
+multicall(Nodes, Msg) ->
+ rpc:multicall(Nodes, ?MODULE, call, [Msg]).
+
+start_proc(Who, Mod, Fun, Args) ->
+ Args2 = [Who, Mod, Fun, Args],
+ proc_lib:start_link(mnesia_sp, init_proc, Args2, infinity).
+
+terminate_proc(Who, R, State) when R /= shutdown, R /= killed ->
+ fatal("~p crashed: ~p state: ~p~n", [Who, R, State]);
+
+terminate_proc(Who, Reason, _State) ->
+ mnesia_lib:verbose("~p terminated: ~p~n", [Who, Reason]),
+ ok.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%% Callback functions from gen_server
+
+%%----------------------------------------------------------------------
+%% Func: init/1
+%% Returns: {ok, State} |
+%% {ok, State, Timeout} |
+%% {stop, Reason}
+%%----------------------------------------------------------------------
+init([Parent]) ->
+ process_flag(trap_exit, true),
+ ?ets_new_table(mnesia_gvar, [set, public, named_table]),
+ ?ets_new_table(mnesia_stats, [set, public, named_table]),
+ set(subscribers, []),
+ mnesia_lib:verbose("~p starting: ~p~n", [?MODULE, self()]),
+ Version = mnesia:system_info(version),
+ set(version, Version),
+ dbg_out("Version: ~p~n", [Version]),
+
+ case catch process_config_args(env()) of
+ ok ->
+ mnesia_lib:set({'$$$_report', current_pos}, 0),
+ Level = mnesia_lib:val(debug),
+ mnesia_lib:verbose("Mnesia debug level set to ~p\n", [Level]),
+ set(mnesia_status, starting), %% set start status
+ set({current, db_nodes}, [node()]),
+ set(use_dir, use_dir()),
+ mnesia_lib:create_counter(trans_aborts),
+ mnesia_lib:create_counter(trans_commits),
+ mnesia_lib:create_counter(trans_log_writes),
+ Left = get_env(dump_log_write_threshold),
+ mnesia_lib:set_counter(trans_log_writes_left, Left),
+ mnesia_lib:create_counter(trans_log_writes_prev),
+ mnesia_lib:create_counter(trans_restarts),
+ mnesia_lib:create_counter(trans_failures),
+ set(checkpoints, []),
+ set(pending_checkpoints, []),
+ set(pending_checkpoint_pids, []),
+
+ {ok, #state{supervisor = Parent}};
+ {'EXIT', Reason} ->
+ mnesia_lib:report_fatal("Bad configuration: ~p~n", [Reason]),
+ {stop, {bad_config, Reason}}
+ end.
+
+use_dir() ->
+ case ?catch_val(use_dir) of
+ {'EXIT', _} ->
+ case get_env(schema_location) of
+ disc -> true;
+ opt_disc -> non_empty_dir();
+ ram -> false
+ end;
+ Bool ->
+ Bool
+ end.
+
+%% Returns true if the Mnesia directory contains
+%% important files
+non_empty_dir() ->
+ mnesia_lib:exists(mnesia_bup:fallback_bup()) or
+ mnesia_lib:exists(mnesia_lib:tab2dmp(schema)) or
+ mnesia_lib:exists(mnesia_lib:tab2dat(schema)).
+
+%%----------------------------------------------------------------------
+%% Func: handle_call/3
+%% Returns: {reply, Reply, State} |
+%% {reply, Reply, State, Timeout} |
+%% {noreply, State} |
+%% {noreply, State, Timeout} |
+%% {stop, Reason, Reply, State} | (terminate/2 is called)
+%%----------------------------------------------------------------------
+
+handle_call({mktab, Tab, Args}, _From, State) ->
+ case catch ?ets_new_table(Tab, Args) of
+ {'EXIT', ExitReason} ->
+ Msg = "Cannot create ets table",
+ Reason = {system_limit, Msg, Tab, Args, ExitReason},
+ fatal("~p~n", [Reason]),
+ {noreply, State};
+ Reply ->
+ {reply, Reply, State}
+ end;
+
+handle_call({unsafe_mktab, Tab, Args}, _From, State) ->
+ case catch ?ets_new_table(Tab, Args) of
+ {'EXIT', ExitReason} ->
+ {reply, {error, ExitReason}, State};
+ Reply ->
+ {reply, Reply, State}
+ end;
+
+
+handle_call({open_dets, Tab, Args}, _From, State) ->
+ case mnesia_lib:dets_sync_open(Tab, Args) of
+ {ok, Tab} ->
+ {reply, {ok, Tab}, State};
+
+ {error, Reason} ->
+ Msg = "Cannot open dets table",
+ Error = {error, {Msg, Tab, Args, Reason}},
+ fatal("~p~n", [Error]),
+ {noreply, State}
+ end;
+
+handle_call({unsafe_open_dets, Tab, Args}, _From, State) ->
+ case mnesia_lib:dets_sync_open(Tab, Args) of
+ {ok, Tab} ->
+ {reply, {ok, Tab}, State};
+ {error, Reason} ->
+ {reply, {error,Reason}, State}
+ end;
+
+handle_call({close_dets, Tab}, _From, State) ->
+ ok = mnesia_lib:dets_sync_close(Tab),
+ {reply, ok, State};
+
+handle_call({unsafe_close_dets, Tab}, _From, State) ->
+ mnesia_lib:dets_sync_close(Tab),
+ {reply, ok, State};
+
+handle_call({open_log, Args}, _From, State) ->
+ Res = disk_log:open([{notify, true}|Args]),
+ {reply, Res, State};
+
+handle_call({reopen_log, Name, Fname, Head}, _From, State) ->
+ case disk_log:reopen(Name, Fname, Head) of
+ ok ->
+ {reply, ok, State};
+
+ {error, Reason} ->
+ Msg = "Cannot rename disk_log file",
+ Error = {error, {Msg, Name, Fname, Head, Reason}},
+ fatal("~p~n", [Error]),
+ {noreply, State}
+ end;
+
+handle_call({close_log, Name}, _From, State) ->
+ case disk_log:close(Name) of
+ ok ->
+ {reply, ok, State};
+
+ {error, Reason} ->
+ Msg = "Cannot close disk_log file",
+ Error = {error, {Msg, Name, Reason}},
+ fatal("~p~n", [Error]),
+ {noreply, State}
+ end;
+
+handle_call({unsafe_close_log, Name}, _From, State) ->
+ disk_log:close(Name),
+ {reply, ok, State};
+
+handle_call({negotiate_protocol, Mon, _Version, _Protocols}, _From, State)
+ when State#state.tm_started == false ->
+ State2 = State#state{early_connects = [node(Mon) | State#state.early_connects]},
+ {reply, {node(), {reject, self(), uninitialized, uninitialized}}, State2};
+
+%% From remote monitor..
+handle_call({negotiate_protocol, Mon, Version, Protocols}, From, State)
+ when node(Mon) /= node() ->
+ Protocol = protocol_version(),
+ MyVersion = mnesia:system_info(version),
+ case lists:member(Protocol, Protocols) of
+ true ->
+ accept_protocol(Mon, MyVersion, Protocol, From, State);
+ false ->
+ %% in this release we should be able to handle the previous
+ %% protocol
+ case hd(Protocols) of
+ ?previous_protocol_version ->
+ accept_protocol(Mon, MyVersion, ?previous_protocol_version, From, State);
+ _ ->
+ verbose("Connection with ~p rejected. "
+ "version = ~p, protocols = ~p, "
+ "expected version = ~p, expected protocol = ~p~n",
+ [node(Mon), Version, Protocols, MyVersion, Protocol]),
+ {reply, {node(), {reject, self(), MyVersion, Protocol}}, State}
+ end
+ end;
+
+%% Local request to negotiate with other monitors (nodes).
+handle_call({negotiate_protocol, Nodes}, From, State) ->
+ case mnesia_lib:intersect(State#state.going_down, Nodes) of
+ [] ->
+ spawn_link(?MODULE, negotiate_protocol_impl, [Nodes, From]),
+ {noreply, State#state{connecting={From,Nodes}}};
+ _ -> %% Cannot connect now, still processing mnesia down
+ {reply, busy, State}
+ end;
+
+handle_call(init, _From, State) ->
+ net_kernel:monitor_nodes(true),
+ EarlyNodes = State#state.early_connects,
+ State2 = State#state{tm_started = true},
+ {reply, EarlyNodes, State2};
+
+handle_call(Msg, _From, State) ->
+ error("~p got unexpected call: ~p~n", [?MODULE, Msg]),
+ {noreply, State}.
+
+accept_protocol(Mon, Version, Protocol, From, State) ->
+ Reply = {node(), {accept, self(), Version, Protocol}},
+ Node = node(Mon),
+ Pending0 = State#state.pending_negotiators,
+ Pending = lists:keydelete(Node, 1, Pending0),
+ case lists:member(Node, State#state.going_down) of
+ true ->
+ %% Wait for the mnesia_down to be processed,
+ %% before we reply
+ P = Pending ++ [{Node, Mon, From, Reply}],
+ {noreply, State#state{pending_negotiators = P}};
+ false ->
+ %% No need for wait
+ link(Mon), %% link to remote Monitor
+ case Protocol == protocol_version() of
+ true ->
+ set({protocol, Node}, {Protocol, false});
+ false ->
+ set({protocol, Node}, {Protocol, true})
+ end,
+ {reply, Reply, State#state{pending_negotiators = Pending}}
+ end.
+
+%%----------------------------------------------------------------------
+%% Func: handle_cast/2
+%% Returns: {noreply, State} |
+%% {noreply, State, Timeout} |
+%% {stop, Reason, State} (terminate/2 is called)
+%%----------------------------------------------------------------------
+
+handle_cast({mnesia_down, mnesia_controller, Node}, State) ->
+ mnesia_tm:mnesia_down(Node),
+ {noreply, State};
+
+handle_cast({mnesia_down, mnesia_tm, {Node, Pending}}, State) ->
+ mnesia_locker:mnesia_down(Node, Pending),
+ {noreply, State};
+
+handle_cast({mnesia_down, mnesia_locker, Node}, State) ->
+ Down = {mnesia_down, Node},
+ mnesia_lib:report_system_event(Down),
+ GoingDown = lists:delete(Node, State#state.going_down),
+ State2 = State#state{going_down = GoingDown},
+ Pending = State#state.pending_negotiators,
+ case lists:keysearch(Node, 1, Pending) of
+ {value, {Node, Mon, ReplyTo, Reply}} ->
+ %% Late reply to remote monitor
+ link(Mon), %% link to remote Monitor
+ gen_server:reply(ReplyTo, Reply),
+ P2 = lists:keydelete(Node, 1,Pending),
+ State3 = State2#state{pending_negotiators = P2},
+ process_q(State3);
+ false ->
+ %% No pending remote monitors
+ {noreply, State2}
+ end;
+
+handle_cast({disconnect, Node}, State) ->
+ case rpc:call(Node, erlang, whereis, [?MODULE]) of
+ {badrpc, _} ->
+ ignore;
+ undefined ->
+ ignore;
+ RemoteMon when is_pid(RemoteMon) ->
+ unlink(RemoteMon)
+ end,
+ {noreply, State};
+
+handle_cast({inconsistent_database, Context, Node}, State) ->
+ Msg = {inconsistent_database, Context, Node},
+ mnesia_lib:report_system_event(Msg),
+ {noreply, State};
+
+handle_cast(Msg, State) ->
+ error("~p got unexpected cast: ~p~n", [?MODULE, Msg]),
+ {noreply, State}.
+
+%%----------------------------------------------------------------------
+%% Func: handle_info/2
+%% Returns: {noreply, State} |
+%% {noreply, State, Timeout} |
+%% {stop, Reason, State} (terminate/2 is called)
+%%----------------------------------------------------------------------
+
+handle_info({'EXIT', Pid, R}, State) when Pid == State#state.supervisor ->
+ dbg_out("~p was ~p by supervisor~n",[?MODULE, R]),
+ {stop, R, State};
+
+handle_info({'EXIT', Pid, fatal}, State) when node(Pid) == node() ->
+ dbg_out("~p got FATAL ERROR from: ~p~n",[?MODULE, Pid]),
+ exit(State#state.supervisor, shutdown),
+ {noreply, State};
+
+handle_info(Msg = {'EXIT',Pid,_}, State) ->
+ Node = node(Pid),
+ if
+ Node /= node(), State#state.connecting == undefined ->
+ %% Remotly linked process died, assume that it was a mnesia_monitor
+ mnesia_recover:mnesia_down(Node),
+ mnesia_controller:mnesia_down(Node),
+ {noreply, State#state{going_down = [Node | State#state.going_down]}};
+ Node /= node() ->
+ {noreply, State#state{mq = State#state.mq ++ [{info, Msg}]}};
+ true ->
+ %% We have probably got an exit signal from
+ %% disk_log or dets
+ Hint = "Hint: check that the disk still is writable",
+ fatal("~p got unexpected info: ~p; ~p~n",
+ [?MODULE, Msg, Hint])
+ end;
+
+handle_info({protocol_negotiated, From,Res}, State) ->
+ From = element(1,State#state.connecting),
+ gen_server:reply(From, Res),
+ process_q(State#state{connecting = undefined});
+
+handle_info({nodeup, Node}, State) ->
+ %% Ok, we are connected to yet another Erlang node
+ %% Let's check if Mnesia is running there in order
+ %% to detect if the network has been partitioned
+ %% due to communication failure.
+
+ HasDown = mnesia_recover:has_mnesia_down(Node),
+ ImRunning = mnesia_lib:is_running(),
+
+ if
+ %% If I'm not running the test will be made later.
+ HasDown == true, ImRunning == yes ->
+ spawn_link(?MODULE, detect_partitioned_network, [self(), Node]);
+ true ->
+ ignore
+ end,
+ {noreply, State};
+
+handle_info({nodedown, _Node}, State) ->
+ %% Ignore, we are only caring about nodeup's
+ {noreply, State};
+
+handle_info({disk_log, _Node, Log, Info}, State) ->
+ case Info of
+ {truncated, _No} ->
+ ok;
+ _ ->
+ mnesia_lib:important("Warning Log file ~p error reason ~s~n",
+ [Log, disk_log:format_error(Info)])
+ end,
+ {noreply, State};
+
+handle_info(Msg, State) ->
+ error("~p got unexpected info (~p): ~p~n", [?MODULE, State, Msg]).
+
+process_q(State = #state{mq=[]}) -> {noreply,State};
+process_q(State = #state{mq=[{info,Msg}|R]}) ->
+ handle_info(Msg, State#state{mq=R});
+process_q(State = #state{mq=[{cast,Msg}|R]}) ->
+ handle_cast(Msg, State#state{mq=R});
+process_q(State = #state{mq=[{call,From,Msg}|R]}) ->
+ handle_call(Msg, From, State#state{mq=R}).
+
+%%----------------------------------------------------------------------
+%% Func: terminate/2
+%% Purpose: Shutdown the server
+%% Returns: any (ignored by gen_server)
+%%----------------------------------------------------------------------
+terminate(Reason, State) ->
+ terminate_proc(?MODULE, Reason, State).
+
+%%----------------------------------------------------------------------
+%% Func: code_change/3
+%% Purpose: Upgrade process when its code is to be changed
+%% Returns: {ok, NewState}
+%%----------------------------------------------------------------------
+
+
+code_change(_, {state, SUP, PN, GD, TMS, EC}, _) ->
+ {ok, #state{supervisor=SUP, pending_negotiators=PN,
+ going_down = GD, tm_started =TMS, early_connects = EC}};
+
+code_change(_OldVsn, State, _Extra) ->
+ {ok, State}.
+
+%%%----------------------------------------------------------------------
+%%% Internal functions
+%%%----------------------------------------------------------------------
+
+process_config_args([]) ->
+ ok;
+process_config_args([C|T]) ->
+ V = get_env(C),
+ dbg_out("Env ~p: ~p~n", [C, V]),
+ mnesia_lib:set(C, V),
+ process_config_args(T).
+
+set_env(E,Val) ->
+ mnesia_lib:set(E, check_type(E,Val)),
+ ok.
+
+get_env(E) ->
+ case ?catch_val(E) of
+ {'EXIT', _} ->
+ case application:get_env(mnesia, E) of
+ {ok, Val} ->
+ check_type(E, Val);
+ undefined ->
+ check_type(E, default_env(E))
+ end;
+ Val ->
+ Val
+ end.
+
+env() ->
+ [
+ access_module,
+ auto_repair,
+ backup_module,
+ debug,
+ dir,
+ dump_log_load_regulation,
+ dump_log_time_threshold,
+ dump_log_update_in_place,
+ dump_log_write_threshold,
+ embedded_mnemosyne,
+ event_module,
+ extra_db_nodes,
+ ignore_fallback_at_startup,
+ fallback_error_function,
+ max_wait_for_decision,
+ schema_location,
+ core_dir,
+ pid_sort_order,
+ no_table_loaders,
+ dc_dump_limit
+ ].
+
+default_env(access_module) ->
+ mnesia;
+default_env(auto_repair) ->
+ true;
+default_env(backup_module) ->
+ mnesia_backup;
+default_env(debug) ->
+ none;
+default_env(dir) ->
+ Name = lists:concat(["Mnesia.", node()]),
+ filename:absname(Name);
+default_env(dump_log_load_regulation) ->
+ false;
+default_env(dump_log_time_threshold) ->
+ timer:minutes(3);
+default_env(dump_log_update_in_place) ->
+ true;
+default_env(dump_log_write_threshold) ->
+ 1000;
+default_env(embedded_mnemosyne) ->
+ false;
+default_env(event_module) ->
+ mnesia_event;
+default_env(extra_db_nodes) ->
+ [];
+default_env(ignore_fallback_at_startup) ->
+ false;
+default_env(fallback_error_function) ->
+ {mnesia, lkill};
+default_env(max_wait_for_decision) ->
+ infinity;
+default_env(schema_location) ->
+ opt_disc;
+default_env(core_dir) ->
+ false;
+default_env(pid_sort_order) ->
+ false;
+default_env(no_table_loaders) ->
+ 2;
+default_env(dc_dump_limit) ->
+ 4.
+
+check_type(Env, Val) ->
+ case catch do_check_type(Env, Val) of
+ {'EXIT', _Reason} ->
+ exit({bad_config, Env, Val});
+ NewVal ->
+ NewVal
+ end.
+
+do_check_type(access_module, A) when is_atom(A) -> A;
+do_check_type(auto_repair, B) -> bool(B);
+do_check_type(backup_module, B) when is_atom(B) -> B;
+do_check_type(debug, debug) -> debug;
+do_check_type(debug, false) -> none;
+do_check_type(debug, none) -> none;
+do_check_type(debug, trace) -> trace;
+do_check_type(debug, true) -> debug;
+do_check_type(debug, verbose) -> verbose;
+do_check_type(dir, V) -> filename:absname(V);
+do_check_type(dump_log_load_regulation, B) -> bool(B);
+do_check_type(dump_log_time_threshold, I) when is_integer(I), I > 0 -> I;
+do_check_type(dump_log_update_in_place, B) -> bool(B);
+do_check_type(dump_log_write_threshold, I) when is_integer(I), I > 0 -> I;
+do_check_type(event_module, A) when is_atom(A) -> A;
+do_check_type(ignore_fallback_at_startup, B) -> bool(B);
+do_check_type(fallback_error_function, {Mod, Func})
+ when is_atom(Mod), is_atom(Func) -> {Mod, Func};
+do_check_type(embedded_mnemosyne, B) -> bool(B);
+do_check_type(extra_db_nodes, L) when is_list(L) ->
+ Fun = fun(N) when N == node() -> false;
+ (A) when is_atom(A) -> true
+ end,
+ lists:filter(Fun, L);
+do_check_type(max_wait_for_decision, infinity) -> infinity;
+do_check_type(max_wait_for_decision, I) when is_integer(I), I > 0 -> I;
+do_check_type(schema_location, M) -> media(M);
+do_check_type(core_dir, "false") -> false;
+do_check_type(core_dir, false) -> false;
+do_check_type(core_dir, Dir) when is_list(Dir) -> Dir;
+do_check_type(pid_sort_order, r9b_plain) -> r9b_plain;
+do_check_type(pid_sort_order, "r9b_plain") -> r9b_plain;
+do_check_type(pid_sort_order, standard) -> standard;
+do_check_type(pid_sort_order, "standard") -> standard;
+do_check_type(pid_sort_order, _) -> false;
+do_check_type(no_table_loaders, N) when is_integer(N), N > 0 -> N;
+do_check_type(dc_dump_limit,N) when is_number(N), N > 0 -> N.
+
+bool(true) -> true;
+bool(false) -> false.
+
+media(disc) -> disc;
+media(opt_disc) -> opt_disc;
+media(ram) -> ram.
+
+patch_env(Env, Val) ->
+ case catch do_check_type(Env, Val) of
+ {'EXIT', _Reason} ->
+ {error, {bad_type, Env, Val}};
+ NewVal ->
+ application_controller:set_env(mnesia, Env, NewVal),
+ NewVal
+ end.
+
+detect_partitioned_network(Mon, Node) ->
+ detect_inconcistency([Node], running_partitioned_network),
+ unlink(Mon),
+ exit(normal).
+
+detect_inconcistency([], _Context) ->
+ ok;
+detect_inconcistency(Nodes, Context) ->
+ Downs = [N || N <- Nodes, mnesia_recover:has_mnesia_down(N)],
+ {Replies, _BadNodes} =
+ rpc:multicall(Downs, ?MODULE, has_remote_mnesia_down, [node()]),
+ report_inconsistency(Replies, Context, ok).
+
+has_remote_mnesia_down(Node) ->
+ HasDown = mnesia_recover:has_mnesia_down(Node),
+ Master = mnesia_recover:get_master_nodes(schema),
+ if
+ HasDown == true, Master == [] ->
+ {true, node()};
+ true ->
+ {false, node()}
+ end.
+
+report_inconsistency([{true, Node} | Replies], Context, _Status) ->
+ %% Oops, Mnesia is already running on the
+ %% other node AND we both regard each
+ %% other as down. The database is
+ %% potentially inconsistent and we has to
+ %% do tell the applications about it, so
+ %% they may perform some clever recovery
+ %% action.
+ Msg = {inconsistent_database, Context, Node},
+ mnesia_lib:report_system_event(Msg),
+ report_inconsistency(Replies, Context, inconsistent_database);
+report_inconsistency([{false, _Node} | Replies], Context, Status) ->
+ report_inconsistency(Replies, Context, Status);
+report_inconsistency([{badrpc, _Reason} | Replies], Context, Status) ->
+ report_inconsistency(Replies, Context, Status);
+report_inconsistency([], _Context, Status) ->
+ Status.
diff --git a/lib/mnesia/src/mnesia_recover.erl b/lib/mnesia/src/mnesia_recover.erl
new file mode 100644
index 0000000000..6c53c2e752
--- /dev/null
+++ b/lib/mnesia/src/mnesia_recover.erl
@@ -0,0 +1,1196 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 1997-2009. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%
+
+%%
+-module(mnesia_recover).
+
+-behaviour(gen_server).
+
+-export([
+ allow_garb/0,
+ call/1,
+ connect_nodes/1,
+ disconnect/1,
+ dump_decision_tab/0,
+ get_master_node_info/0,
+ get_master_node_tables/0,
+ get_master_nodes/1,
+ get_mnesia_downs/0,
+ has_mnesia_down/1,
+ incr_trans_tid_serial/0,
+ init/0,
+ log_decision/1,
+ log_master_nodes/3,
+ log_mnesia_down/1,
+ log_mnesia_up/1,
+ mnesia_down/1,
+ note_decision/2,
+ note_log_decision/2,
+ outcome/2,
+ start/0,
+ start_garb/0,
+ still_pending/1,
+ sync_trans_tid_serial/1,
+ sync/0,
+ wait_for_decision/2,
+ what_happened/3
+ ]).
+
+%% gen_server callbacks
+-export([init/1,
+ handle_call/3,
+ handle_cast/2,
+ handle_info/2,
+ terminate/2,
+ code_change/3
+ ]).
+
+
+-include("mnesia.hrl").
+-import(mnesia_lib, [set/2, verbose/2, error/2, fatal/2]).
+
+-record(state, {supervisor,
+ unclear_pid,
+ unclear_decision,
+ unclear_waitfor,
+ tm_queue_len = 0,
+ initiated = false,
+ early_msgs = []
+ }).
+
+%%-define(DBG(F, A), mnesia:report_event(list_to_atom(lists:flatten(io_lib:format(F, A))))).
+%%-define(DBG(F, A), io:format("DBG: " ++ F, A)).
+
+-record(transient_decision, {tid, outcome}).
+
+start() ->
+ gen_server:start_link({local, ?MODULE}, ?MODULE, [self()],
+ [{timeout, infinity}
+ %%, {debug, [trace]}
+ ]).
+
+init() ->
+ call(init).
+
+start_garb() ->
+ Pid = whereis(mnesia_recover),
+ {ok, _} = timer:send_interval(timer:minutes(2), Pid, garb_decisions),
+ {ok, _} = timer:send_interval(timer:seconds(10), Pid, check_overload).
+
+allow_garb() ->
+ cast(allow_garb).
+
+
+%% The transaction log has either been swiched (latest -> previous) or
+%% there is nothing to be dumped. This means that the previous
+%% transaction log only may contain commit records which refers to
+%% transactions noted in the last two of the 'Prev' tables. All other
+%% tables may now be garbed by 'garb_decisions' (after 2 minutes).
+%% Max 10 tables are kept.
+do_allow_garb() ->
+ %% The order of the following stuff is important!
+ Curr = val(latest_transient_decision),
+ %% Don't garb small tables, they are created on every
+ %% dump_log and may be small (empty) for schema transactions
+ %% which are dumped twice
+ case ets:info(Curr, size) > 20 of
+ true ->
+ Old = val(previous_transient_decisions),
+ Next = create_transient_decision(),
+ {Prev, ReallyOld} = sublist([Curr | Old], 10, []),
+ [?ets_delete_table(Tab) || Tab <- ReallyOld],
+ set(previous_transient_decisions, Prev),
+ set(latest_transient_decision, Next);
+ false ->
+ ignore
+ end.
+
+sublist([H|R], N, Acc) when N > 0 ->
+ sublist(R, N-1, [H| Acc]);
+sublist(List, _N, Acc) ->
+ {lists:reverse(Acc), List}.
+
+do_garb_decisions() ->
+ case val(previous_transient_decisions) of
+ [First, Second | Rest] ->
+ set(previous_transient_decisions, [First, Second]),
+ [?ets_delete_table(Tab) || Tab <- Rest];
+ _ ->
+ ignore
+ end.
+
+connect_nodes(Ns) ->
+ call({connect_nodes, Ns}).
+
+disconnect(Node) ->
+ call({disconnect, Node}).
+
+log_decision(D) ->
+ cast({log_decision, D}).
+
+val(Var) ->
+ case ?catch_val(Var) of
+ {'EXIT', Reason} -> mnesia_lib:other_val(Var, Reason);
+ Value -> Value
+ end.
+
+call(Msg) ->
+ Pid = whereis(?MODULE),
+ case Pid of
+ undefined ->
+ {error, {node_not_running, node()}};
+ Pid ->
+ link(Pid),
+ Res = gen_server:call(Pid, Msg, infinity),
+ unlink(Pid),
+
+ %% We get an exit signal if server dies
+ receive
+ {'EXIT', Pid, _Reason} ->
+ {error, {node_not_running, node()}}
+ after 0 ->
+ Res
+ end
+ end.
+
+multicall(Nodes, Msg) ->
+ rpc:multicall(Nodes, ?MODULE, call, [Msg]).
+
+cast(Msg) ->
+ case whereis(?MODULE) of
+ undefined -> ignore;
+ Pid -> gen_server:cast(Pid, Msg)
+ end.
+
+abcast(Nodes, Msg) ->
+ gen_server:abcast(Nodes, ?MODULE, Msg).
+
+note_decision(Tid, Outcome) ->
+ Tab = val(latest_transient_decision),
+ ?ets_insert(Tab, #transient_decision{tid = Tid, outcome = Outcome}).
+
+note_up(Node, _Date, _Time) ->
+ ?ets_delete(mnesia_decision, Node).
+
+note_down(Node, Date, Time) ->
+ ?ets_insert(mnesia_decision, {mnesia_down, Node, Date, Time}).
+
+note_master_nodes(Tab, []) ->
+ ?ets_delete(mnesia_decision, Tab);
+note_master_nodes(Tab, Nodes) when is_list(Nodes) ->
+ Master = {master_nodes, Tab, Nodes},
+ ?ets_insert(mnesia_decision, Master).
+
+note_outcome(D) when D#decision.disc_nodes == [] ->
+%% ?DBG("~w: note_tmp_decision: ~w~n", [node(), D]),
+ note_decision(D#decision.tid, filter_outcome(D#decision.outcome)),
+ ?ets_delete(mnesia_decision, D#decision.tid);
+note_outcome(D) when D#decision.disc_nodes /= [] ->
+%% ?DBG("~w: note_decision: ~w~n", [node(), D]),
+ ?ets_insert(mnesia_decision, D).
+
+do_log_decision(D) when D#decision.outcome /= unclear ->
+ OldD = decision(D#decision.tid),
+ MergedD = merge_decisions(node(), OldD, D),
+ do_log_decision(MergedD, true, D);
+do_log_decision(D) ->
+ do_log_decision(D, false, undefined).
+
+do_log_decision(D, DoTell, NodeD) ->
+ DiscNs = D#decision.disc_nodes -- [node()],
+ Outcome = D#decision.outcome,
+ D2 =
+ case Outcome of
+ aborted -> D#decision{disc_nodes = DiscNs};
+ committed -> D#decision{disc_nodes = DiscNs};
+ _ -> D
+ end,
+ note_outcome(D2),
+ case mnesia_monitor:use_dir() of
+ true ->
+ mnesia_log:append(latest_log, D2),
+ if
+ DoTell == true, Outcome /= unclear ->
+ tell_im_certain(NodeD#decision.disc_nodes--[node()],D2),
+ tell_im_certain(NodeD#decision.ram_nodes--[node()], D2);
+ true ->
+ ignore
+ end;
+ false ->
+ ignore
+ end.
+
+tell_im_certain([], _D) ->
+ ignore;
+tell_im_certain(Nodes, D) ->
+ Msg = {im_certain, node(), D},
+ %% mnesia_lib:verbose("~w: tell: ~w~n", [Msg, Nodes]),
+ abcast(Nodes, Msg).
+
+sync() ->
+ call(sync).
+
+log_mnesia_up(Node) ->
+ call({log_mnesia_up, Node}).
+
+log_mnesia_down(Node) ->
+ call({log_mnesia_down, Node}).
+
+get_mnesia_downs() ->
+ Tab = mnesia_decision,
+ Pat = {mnesia_down, '_', '_', '_'},
+ Downs = ?ets_match_object(Tab, Pat),
+ [Node || {mnesia_down, Node, _Date, _Time} <- Downs].
+
+%% Check if we have got a mnesia_down from Node
+has_mnesia_down(Node) ->
+ case ?ets_lookup(mnesia_decision, Node) of
+ [{mnesia_down, Node, _Date, _Time}] ->
+ true;
+ [] ->
+ false
+ end.
+
+mnesia_down(Node) ->
+ case ?catch_val(recover_nodes) of
+ {'EXIT', _} ->
+ %% Not started yet
+ ignore;
+ _ ->
+ mnesia_lib:del(recover_nodes, Node),
+ cast({mnesia_down, Node})
+ end.
+
+log_master_nodes(Args, UseDir, IsRunning) ->
+ if
+ IsRunning == yes ->
+ log_master_nodes2(Args, UseDir, IsRunning, ok);
+ UseDir == false ->
+ ok;
+ true ->
+ Name = latest_log,
+ Fname = mnesia_log:latest_log_file(),
+ Exists = mnesia_lib:exists(Fname),
+ Repair = mnesia:system_info(auto_repair),
+ OpenArgs = [{file, Fname}, {name, Name}, {repair, Repair}],
+ case disk_log:open(OpenArgs) of
+ {ok, Name} ->
+ log_master_nodes2(Args, UseDir, IsRunning, ok);
+ {repaired, Name, {recovered, _R}, {badbytes, _B}}
+ when Exists == true ->
+ log_master_nodes2(Args, UseDir, IsRunning, ok);
+ {repaired, Name, {recovered, _R}, {badbytes, _B}}
+ when Exists == false ->
+ mnesia_log:write_trans_log_header(),
+ log_master_nodes2(Args, UseDir, IsRunning, ok);
+ {error, Reason} ->
+ {error, Reason}
+ end
+ end.
+
+log_master_nodes2([{Tab, Nodes} | Tail], UseDir, IsRunning, WorstRes) ->
+ Res =
+ case IsRunning of
+ yes ->
+ R = call({log_master_nodes, Tab, Nodes, UseDir, IsRunning}),
+ mnesia_controller:master_nodes_updated(Tab, Nodes),
+ R;
+ _ ->
+ do_log_master_nodes(Tab, Nodes, UseDir, IsRunning)
+ end,
+ case Res of
+ ok ->
+ log_master_nodes2(Tail, UseDir, IsRunning, WorstRes);
+ {error, Reason} ->
+ log_master_nodes2(Tail, UseDir, IsRunning, {error, Reason})
+ end;
+log_master_nodes2([], _UseDir, IsRunning, WorstRes) ->
+ case IsRunning of
+ yes ->
+ WorstRes;
+ _ ->
+ disk_log:close(latest_log),
+ WorstRes
+ end.
+
+get_master_node_info() ->
+ Tab = mnesia_decision,
+ Pat = {master_nodes, '_', '_'},
+ case catch mnesia_lib:db_match_object(ram_copies,Tab, Pat) of
+ {'EXIT', _} ->
+ [];
+ Masters ->
+ Masters
+ end.
+
+get_master_node_tables() ->
+ Masters = get_master_node_info(),
+ [Tab || {master_nodes, Tab, _Nodes} <- Masters].
+
+get_master_nodes(Tab) ->
+ case catch ?ets_lookup_element(mnesia_decision, Tab, 3) of
+ {'EXIT', _} -> [];
+ Nodes -> Nodes
+ end.
+
+%% Determine what has happened to the transaction
+what_happened(Tid, Protocol, Nodes) ->
+ Default =
+ case Protocol of
+ asym_trans -> aborted;
+ _ -> unclear %% sym_trans and sync_sym_trans
+ end,
+ This = node(),
+ case lists:member(This, Nodes) of
+ true ->
+ {ok, Outcome} = call({what_happened, Default, Tid}),
+ Others = Nodes -- [This],
+ case filter_outcome(Outcome) of
+ unclear -> what_happened_remotely(Tid, Default, Others);
+ aborted -> aborted;
+ committed -> committed
+ end;
+ false ->
+ what_happened_remotely(Tid, Default, Nodes)
+ end.
+
+what_happened_remotely(Tid, Default, Nodes) ->
+ {Replies, _} = multicall(Nodes, {what_happened, Default, Tid}),
+ check_what_happened(Replies, 0, 0).
+
+check_what_happened([H | T], Aborts, Commits) ->
+ case H of
+ {ok, R} ->
+ case filter_outcome(R) of
+ committed ->
+ check_what_happened(T, Aborts, Commits + 1);
+ aborted ->
+ check_what_happened(T, Aborts + 1, Commits);
+ unclear ->
+ check_what_happened(T, Aborts, Commits)
+ end;
+ {error, _} ->
+ check_what_happened(T, Aborts, Commits);
+ {badrpc, _} ->
+ check_what_happened(T, Aborts, Commits)
+ end;
+check_what_happened([], Aborts, Commits) ->
+ if
+ Aborts == 0, Commits == 0 -> aborted; % None of the active nodes knows
+ Aborts > 0 -> aborted; % Someody has aborted
+ Aborts == 0, Commits > 0 -> committed % All has committed
+ end.
+
+%% Determine what has happened to the transaction
+%% and possibly wait forever for the decision.
+wait_for_decision(presume_commit, _InitBy) ->
+ %% sym_trans
+ {{presume_commit, self()}, committed};
+
+wait_for_decision(D, InitBy) when D#decision.outcome == presume_abort ->
+ wait_for_decision(D, InitBy, 0).
+
+wait_for_decision(D, InitBy, N) ->
+ %% asym_trans
+ Tid = D#decision.tid,
+ Max = 10,
+ Outcome = outcome(Tid, D#decision.outcome),
+ if
+ Outcome =:= committed -> {Tid, committed};
+ Outcome =:= aborted -> {Tid, aborted};
+ Outcome =:= presume_abort ->
+ case N > Max of
+ true -> {Tid, aborted};
+ false -> % busy loop for ets decision moving
+ timer:sleep(10),
+ wait_for_decision(D, InitBy, N+1)
+ end;
+ InitBy /= startup ->
+ %% Wait a while for active transactions
+ %% to end and try again
+ timer:sleep(100),
+ wait_for_decision(D, InitBy, N);
+ InitBy == startup ->
+ {ok, Res} = call({wait_for_decision, D}),
+ {Tid, Res}
+ end.
+
+still_pending([Tid | Pending]) ->
+ case filter_outcome(outcome(Tid, unclear)) of
+ unclear -> [Tid | still_pending(Pending)];
+ _ -> still_pending(Pending)
+ end;
+still_pending([]) ->
+ [].
+
+load_decision_tab() ->
+ Cont = mnesia_log:open_decision_tab(),
+ load_decision_tab(Cont, load_decision_tab),
+ mnesia_log:close_decision_tab().
+
+load_decision_tab(eof, _InitBy) ->
+ ok;
+load_decision_tab(Cont, InitBy) ->
+ case mnesia_log:chunk_decision_tab(Cont) of
+ {Cont2, Decisions} ->
+ note_log_decisions(Decisions, InitBy),
+ load_decision_tab(Cont2, InitBy);
+ eof ->
+ ok
+ end.
+
+%% Dumps DECISION.LOG and PDECISION.LOG and removes them.
+%% From now on all decisions are logged in the transaction log file
+convert_old() ->
+ HasOldStuff =
+ mnesia_lib:exists(mnesia_log:previous_decision_log_file()) or
+ mnesia_lib:exists(mnesia_log:decision_log_file()),
+ case HasOldStuff of
+ true ->
+ mnesia_log:open_decision_log(),
+ dump_decision_log(startup),
+ dump_decision_log(startup),
+ mnesia_log:close_decision_log(),
+ Latest = mnesia_log:decision_log_file(),
+ ok = file:delete(Latest);
+ false ->
+ ignore
+ end.
+
+dump_decision_log(InitBy) ->
+ %% Assumed to be run in transaction log dumper process
+ Cont = mnesia_log:prepare_decision_log_dump(),
+ perform_dump_decision_log(Cont, InitBy).
+
+perform_dump_decision_log(eof, _InitBy) ->
+ confirm_decision_log_dump();
+perform_dump_decision_log(Cont, InitBy) when InitBy == startup ->
+ case mnesia_log:chunk_decision_log(Cont) of
+ {Cont2, Decisions} ->
+ note_log_decisions(Decisions, InitBy),
+ perform_dump_decision_log(Cont2, InitBy);
+ eof ->
+ confirm_decision_log_dump()
+ end;
+perform_dump_decision_log(_Cont, _InitBy) ->
+ confirm_decision_log_dump().
+
+confirm_decision_log_dump() ->
+ dump_decision_tab(),
+ mnesia_log:confirm_decision_log_dump().
+
+dump_decision_tab() ->
+ Tab = mnesia_decision,
+ All = mnesia_lib:db_match_object(ram_copies,Tab, '_'),
+ mnesia_log:save_decision_tab({decision_list, All}).
+
+note_log_decisions([What | Tail], InitBy) ->
+ note_log_decision(What, InitBy),
+ note_log_decisions(Tail, InitBy);
+note_log_decisions([], _InitBy) ->
+ ok.
+
+note_log_decision(NewD, InitBy) when NewD#decision.outcome == pre_commit ->
+ note_log_decision(NewD#decision{outcome = unclear}, InitBy);
+
+note_log_decision(NewD, _InitBy) when is_record(NewD, decision) ->
+ Tid = NewD#decision.tid,
+ sync_trans_tid_serial(Tid),
+ note_outcome(NewD);
+note_log_decision({trans_tid, serial, _Serial}, startup) ->
+ ignore;
+note_log_decision({trans_tid, serial, Serial}, _InitBy) ->
+ sync_trans_tid_serial(Serial);
+note_log_decision({mnesia_up, Node, Date, Time}, _InitBy) ->
+ note_up(Node, Date, Time);
+note_log_decision({mnesia_down, Node, Date, Time}, _InitBy) ->
+ note_down(Node, Date, Time);
+note_log_decision({master_nodes, Tab, Nodes}, _InitBy) ->
+ note_master_nodes(Tab, Nodes);
+note_log_decision(H, _InitBy) when H#log_header.log_kind == decision_log ->
+ V = mnesia_log:decision_log_version(),
+ if
+ H#log_header.log_version == V->
+ ok;
+ H#log_header.log_version == "2.0" ->
+ verbose("Accepting an old version format of decision log: ~p~n",
+ [V]),
+ ok;
+ true ->
+ fatal("Bad version of decision log: ~p~n", [H])
+ end;
+note_log_decision(H, _InitBy) when H#log_header.log_kind == decision_tab ->
+ V = mnesia_log:decision_tab_version(),
+ if
+ V == H#log_header.log_version ->
+ ok;
+ true ->
+ fatal("Bad version of decision tab: ~p~n", [H])
+ end;
+note_log_decision({decision_list, ItemList}, InitBy) ->
+ note_log_decisions(ItemList, InitBy);
+note_log_decision(BadItem, InitBy) ->
+ exit({"Bad decision log item", BadItem, InitBy}).
+
+trans_tid_serial() ->
+ ?ets_lookup_element(mnesia_decision, serial, 3).
+
+set_trans_tid_serial(Val) ->
+ ?ets_insert(mnesia_decision, {trans_tid, serial, Val}).
+
+incr_trans_tid_serial() ->
+ ?ets_update_counter(mnesia_decision, serial, 1).
+
+sync_trans_tid_serial(ThatCounter) when is_integer(ThatCounter) ->
+ ThisCounter = trans_tid_serial(),
+ if
+ ThatCounter > ThisCounter ->
+ set_trans_tid_serial(ThatCounter + 1);
+ true ->
+ ignore
+ end;
+sync_trans_tid_serial(Tid) ->
+ sync_trans_tid_serial(Tid#tid.counter).
+
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%% Callback functions from gen_server
+
+%%----------------------------------------------------------------------
+%% Func: init/1
+%% Returns: {ok, State} |
+%% {ok, State, Timeout} |
+%% {stop, Reason}
+%%----------------------------------------------------------------------
+init([Parent]) ->
+ process_flag(trap_exit, true),
+ mnesia_lib:verbose("~p starting: ~p~n", [?MODULE, self()]),
+ set(latest_transient_decision, create_transient_decision()),
+ set(previous_transient_decisions, []),
+ set(recover_nodes, []),
+ State = #state{supervisor = Parent},
+ {ok, State}.
+
+create_transient_decision() ->
+ ?ets_new_table(mnesia_transient_decision, [{keypos, 2}, set, public]).
+
+%%----------------------------------------------------------------------
+%% Func: handle_call/3
+%% Returns: {reply, Reply, State} |
+%% {reply, Reply, State, Timeout} |
+%% {noreply, State} |
+%% {noreply, State, Timeout} |
+%% {stop, Reason, Reply, State} | (terminate/2 is called)
+%%----------------------------------------------------------------------
+
+handle_call(init, From, State) when State#state.initiated == false ->
+ Args = [{keypos, 2}, set, public, named_table],
+ case mnesia_monitor:use_dir() of
+ true ->
+ ?ets_new_table(mnesia_decision, Args),
+ set_trans_tid_serial(0),
+ TabFile = mnesia_log:decision_tab_file(),
+ case mnesia_lib:exists(TabFile) of
+ true ->
+ load_decision_tab();
+ false ->
+ ignore
+ end,
+ convert_old(),
+ mnesia_dumper:opt_dump_log(scan_decisions);
+ false ->
+ ?ets_new_table(mnesia_decision, Args),
+ set_trans_tid_serial(0)
+ end,
+ handle_early_msgs(State, From);
+
+handle_call(Msg, From, State) when State#state.initiated == false ->
+ %% Buffer early messages
+ Msgs = State#state.early_msgs,
+ {noreply, State#state{early_msgs = [{call, Msg, From} | Msgs]}};
+
+handle_call({disconnect, Node}, _From, State) ->
+ mnesia_monitor:disconnect(Node),
+ mnesia_lib:del(recover_nodes, Node),
+ {reply, ok, State};
+
+handle_call({connect_nodes, Ns}, From, State) ->
+ %% Determine which nodes we should try to connect
+ AlreadyConnected = val(recover_nodes),
+ {_, Nodes} = mnesia_lib:search_delete(node(), Ns),
+ Check = Nodes -- AlreadyConnected,
+ case mnesia_monitor:negotiate_protocol(Check) of
+ busy ->
+ %% monitor is disconnecting some nodes retry
+ %% the req (to avoid deadlock).
+ erlang:send_after(2, self(), {connect_nodes,Ns,From}),
+ {noreply, State};
+ [] ->
+ %% No good noodes to connect to!
+ %% We can't use reply here because this function can be
+ %% called from handle_info
+ gen_server:reply(From, {[], AlreadyConnected}),
+ {noreply, State};
+ GoodNodes ->
+ %% Now we have agreed upon a protocol with some new nodes
+ %% and we may use them when we recover transactions
+ mnesia_lib:add_list(recover_nodes, GoodNodes),
+ cast({announce_all, GoodNodes}),
+ case get_master_nodes(schema) of
+ [] ->
+ Context = starting_partitioned_network,
+ mnesia_monitor:detect_inconcistency(GoodNodes, Context);
+ _ -> %% If master_nodes is set ignore old inconsistencies
+ ignore
+ end,
+ gen_server:reply(From, {GoodNodes, AlreadyConnected}),
+ {noreply,State}
+ end;
+
+handle_call({what_happened, Default, Tid}, _From, State) ->
+ sync_trans_tid_serial(Tid),
+ Outcome = outcome(Tid, Default),
+ {reply, {ok, Outcome}, State};
+
+handle_call({wait_for_decision, D}, From, State) ->
+ Recov = val(recover_nodes),
+ AliveRam = (mnesia_lib:intersect(D#decision.ram_nodes, Recov) -- [node()]),
+ RemoteDisc = D#decision.disc_nodes -- [node()],
+ if
+ AliveRam == [], RemoteDisc == [] ->
+ %% No more else to wait for and we may safely abort
+ {reply, {ok, aborted}, State};
+ true ->
+ verbose("Transaction ~p is unclear. "
+ "Wait for disc nodes: ~w ram: ~w~n",
+ [D#decision.tid, RemoteDisc, AliveRam]),
+ AliveDisc = mnesia_lib:intersect(RemoteDisc, Recov),
+ Msg = {what_decision, node(), D},
+ abcast(AliveRam, Msg),
+ abcast(AliveDisc, Msg),
+ case val(max_wait_for_decision) of
+ infinity ->
+ ignore;
+ MaxWait ->
+ ForceMsg = {force_decision, D#decision.tid},
+ {ok, _} = timer:send_after(MaxWait, ForceMsg)
+ end,
+ State2 = State#state{unclear_pid = From,
+ unclear_decision = D,
+ unclear_waitfor = (RemoteDisc ++ AliveRam)},
+ {noreply, State2}
+ end;
+
+handle_call({log_mnesia_up, Node}, _From, State) ->
+ do_log_mnesia_up(Node),
+ {reply, ok, State};
+
+handle_call({log_mnesia_down, Node}, _From, State) ->
+ do_log_mnesia_down(Node),
+ {reply, ok, State};
+
+handle_call({log_master_nodes, Tab, Nodes, UseDir, IsRunning}, _From, State) ->
+ do_log_master_nodes(Tab, Nodes, UseDir, IsRunning),
+ {reply, ok, State};
+
+handle_call(sync, _From, State) ->
+ {reply, ok, State};
+
+handle_call(Msg, _From, State) ->
+ error("~p got unexpected call: ~p~n", [?MODULE, Msg]),
+ {noreply, State}.
+
+do_log_mnesia_up(Node) ->
+ Yoyo = {mnesia_up, Node, Date = date(), Time = time()},
+ case mnesia_monitor:use_dir() of
+ true ->
+ mnesia_log:append(latest_log, Yoyo),
+ disk_log:sync(latest_log);
+ false ->
+ ignore
+ end,
+ note_up(Node, Date, Time).
+
+do_log_mnesia_down(Node) ->
+ Yoyo = {mnesia_down, Node, Date = date(), Time = time()},
+ case mnesia_monitor:use_dir() of
+ true ->
+ mnesia_log:append(latest_log, Yoyo),
+ disk_log:sync(latest_log);
+ false ->
+ ignore
+ end,
+ note_down(Node, Date, Time).
+
+do_log_master_nodes(Tab, Nodes, UseDir, IsRunning) ->
+ Master = {master_nodes, Tab, Nodes},
+ Res =
+ case UseDir of
+ true ->
+ LogRes = mnesia_log:append(latest_log, Master),
+ disk_log:sync(latest_log),
+ LogRes;
+ false ->
+ ok
+ end,
+ case IsRunning of
+ yes ->
+ note_master_nodes(Tab, Nodes);
+ _NotRunning ->
+ ignore
+ end,
+ Res.
+
+%%----------------------------------------------------------------------
+%% Func: handle_cast/2
+%% Returns: {noreply, State} |
+%% {noreply, State, Timeout} |
+%% {stop, Reason, State} (terminate/2 is called)
+%%----------------------------------------------------------------------
+
+handle_cast(Msg, State) when State#state.initiated == false ->
+ %% Buffer early messages
+ Msgs = State#state.early_msgs,
+ {noreply, State#state{early_msgs = [{cast, Msg} | Msgs]}};
+
+handle_cast({im_certain, Node, NewD}, State) ->
+ OldD = decision(NewD#decision.tid),
+ MergedD = merge_decisions(Node, OldD, NewD),
+ do_log_decision(MergedD, false, undefined),
+ {noreply, State};
+
+handle_cast({log_decision, D}, State) ->
+ do_log_decision(D),
+ {noreply, State};
+
+handle_cast(allow_garb, State) ->
+ do_allow_garb(),
+ {noreply, State};
+
+handle_cast({decisions, Node, Decisions}, State) ->
+ mnesia_lib:add(recover_nodes, Node),
+ State2 = add_remote_decisions(Node, Decisions, State),
+ {noreply, State2};
+
+handle_cast({what_decision, Node, OtherD}, State) ->
+ Tid = OtherD#decision.tid,
+ sync_trans_tid_serial(Tid),
+ Decision =
+ case decision(Tid) of
+ no_decision -> OtherD;
+ MyD when is_record(MyD, decision) -> MyD
+ end,
+ announce([Node], [Decision], [], true),
+ {noreply, State};
+
+handle_cast({mnesia_down, Node}, State) ->
+ case State#state.unclear_decision of
+ undefined ->
+ {noreply, State};
+ D ->
+ case lists:member(Node, D#decision.ram_nodes) of
+ false ->
+ {noreply, State};
+ true ->
+ State2 = add_remote_decision(Node, D, State),
+ {noreply, State2}
+ end
+ end;
+
+handle_cast({announce_all, Nodes}, State) ->
+ announce_all(Nodes),
+ {noreply, State};
+
+handle_cast(Msg, State) ->
+ error("~p got unexpected cast: ~p~n", [?MODULE, Msg]),
+ {noreply, State}.
+
+%%----------------------------------------------------------------------
+%% Func: handle_info/2
+%% Returns: {noreply, State} |
+%% {noreply, State, Timeout} |
+%% {stop, Reason, State} (terminate/2 is called)
+%%----------------------------------------------------------------------
+
+%% No need for buffering
+%% handle_info(Msg, State) when State#state.initiated == false ->
+%% %% Buffer early messages
+%% Msgs = State#state.early_msgs,
+%% {noreply, State#state{early_msgs = [{info, Msg} | Msgs]}};
+
+handle_info({connect_nodes, Ns, From}, State) ->
+ handle_call({connect_nodes,Ns},From,State);
+
+handle_info(check_overload, S) ->
+ %% Time to check if mnesia_tm is overloaded
+ case whereis(mnesia_tm) of
+ Pid when is_pid(Pid) ->
+
+ Threshold = 100,
+ Prev = S#state.tm_queue_len,
+ {message_queue_len, Len} =
+ process_info(Pid, message_queue_len),
+ if
+ Len > Threshold, Prev > Threshold ->
+ What = {mnesia_tm, message_queue_len, [Prev, Len]},
+ mnesia_lib:report_system_event({mnesia_overload, What}),
+ {noreply, S#state{tm_queue_len = 0}};
+
+ Len > Threshold ->
+ {noreply, S#state{tm_queue_len = Len}};
+
+ true ->
+ {noreply, S#state{tm_queue_len = 0}}
+ end;
+ undefined ->
+ {noreply, S}
+ end;
+
+handle_info(garb_decisions, State) ->
+ do_garb_decisions(),
+ {noreply, State};
+
+handle_info({force_decision, Tid}, State) ->
+ %% Enforce a transaction recovery decision,
+ %% if we still are waiting for the outcome
+
+ case State#state.unclear_decision of
+ U when U#decision.tid == Tid ->
+ verbose("Decided to abort transaction ~p since "
+ "max_wait_for_decision has been exceeded~n",
+ [Tid]),
+ D = U#decision{outcome = aborted},
+ State2 = add_remote_decision(node(), D, State),
+ {noreply, State2};
+ _ ->
+ {noreply, State}
+ end;
+
+handle_info({'EXIT', Pid, R}, State) when Pid == State#state.supervisor ->
+ mnesia_lib:dbg_out("~p was ~p~n",[?MODULE, R]),
+ {stop, shutdown, State};
+
+handle_info(Msg, State) ->
+ error("~p got unexpected info: ~p~n", [?MODULE, Msg]),
+ {noreply, State}.
+
+%%----------------------------------------------------------------------
+%% Func: terminate/2
+%% Purpose: Shutdown the server
+%% Returns: any (ignored by gen_server)
+%%----------------------------------------------------------------------
+
+terminate(Reason, State) ->
+ mnesia_monitor:terminate_proc(?MODULE, Reason, State).
+
+%%----------------------------------------------------------------------
+%% Func: code_change/3
+%% Purpose: Upgrade process when its code is to be changed
+%% Returns: {ok, NewState}
+%%----------------------------------------------------------------------
+code_change(_OldVsn, State, _Extra) ->
+ {ok, State}.
+
+%%%----------------------------------------------------------------------
+%%% Internal functions
+%%%----------------------------------------------------------------------
+
+handle_early_msgs(State, From) ->
+ Res = do_handle_early_msgs(State#state.early_msgs,
+ State#state{early_msgs = [],
+ initiated = true}),
+ gen_server:reply(From, ok),
+ Res.
+
+do_handle_early_msgs([Msg | Msgs], State) ->
+ %% The messages are in reverted order
+ case do_handle_early_msgs(Msgs, State) of
+%% {stop, Reason, Reply, State2} ->
+%% {stop, Reason, Reply, State2};
+ {stop, Reason, State2} ->
+ {stop, Reason, State2};
+ {noreply, State2} ->
+ handle_early_msg(Msg, State2)
+ end;
+
+do_handle_early_msgs([], State) ->
+ {noreply, State}.
+
+handle_early_msg({call, Msg, From}, State) ->
+ case handle_call(Msg, From, State) of
+ {reply, R, S} ->
+ gen_server:reply(From, R),
+ {noreply, S};
+ Other ->
+ Other
+ end;
+handle_early_msg({cast, Msg}, State) ->
+ handle_cast(Msg, State);
+handle_early_msg({info, Msg}, State) ->
+ handle_info(Msg, State).
+
+tabs() ->
+ Curr = val(latest_transient_decision), % Do not miss any trans even
+ Prev = val(previous_transient_decisions), % if the tabs are switched
+ [Curr, mnesia_decision | Prev]. % Ordered by hit probability
+
+decision(Tid) ->
+ decision(Tid, tabs()).
+
+decision(Tid, [Tab | Tabs]) ->
+ case catch ?ets_lookup(Tab, Tid) of
+ [D] when is_record(D, decision) ->
+ D;
+ [C] when is_record(C, transient_decision) ->
+ #decision{tid = C#transient_decision.tid,
+ outcome = C#transient_decision.outcome,
+ disc_nodes = [],
+ ram_nodes = []
+ };
+ [] ->
+ decision(Tid, Tabs);
+ {'EXIT', _} ->
+ %% Recently switched transient decision table
+ decision(Tid, Tabs)
+ end;
+decision(_Tid, []) ->
+ no_decision.
+
+outcome(Tid, Default) ->
+ outcome(Tid, Default, tabs()).
+
+outcome(Tid, Default, [Tab | Tabs]) ->
+ case catch ?ets_lookup_element(Tab, Tid, 3) of
+ {'EXIT', _} ->
+ outcome(Tid, Default, Tabs);
+ Val ->
+ Val
+ end;
+outcome(_Tid, Default, []) ->
+ Default.
+
+filter_outcome(Val) ->
+ case Val of
+ unclear -> unclear;
+ aborted -> aborted;
+ presume_abort -> aborted;
+ committed -> committed;
+ pre_commit -> unclear
+ end.
+
+filter_aborted(D) when D#decision.outcome == presume_abort ->
+ D#decision{outcome = aborted};
+filter_aborted(D) ->
+ D.
+
+%% Merge old decision D with new (probably remote) decision
+merge_decisions(Node, D, NewD0) ->
+ NewD = filter_aborted(NewD0),
+ if
+ D == no_decision, node() /= Node ->
+ %% We did not know anything about this txn
+ NewD#decision{disc_nodes = []};
+ D == no_decision ->
+ NewD;
+ is_record(D, decision) ->
+ DiscNs = D#decision.disc_nodes -- ([node(), Node]),
+ OldD = filter_aborted(D#decision{disc_nodes = DiscNs}),
+%% mnesia_lib:dbg_out("merge ~w: NewD = ~w~n D = ~w~n OldD = ~w~n",
+%% [Node, NewD, D, OldD]),
+ if
+ OldD#decision.outcome == unclear,
+ NewD#decision.outcome == unclear ->
+ D;
+
+ OldD#decision.outcome == NewD#decision.outcome ->
+ %% We have come to the same decision
+ OldD;
+
+ OldD#decision.outcome == committed,
+ NewD#decision.outcome == aborted ->
+ %% Interesting! We have already committed,
+ %% but someone else has aborted. Now we
+ %% have a nice little inconcistency. The
+ %% other guy (or some one else) has
+ %% enforced a recovery decision when
+ %% max_wait_for_decision was exceeded.
+ %% We will pretend that we have obeyed
+ %% the forced recovery decision, but we
+ %% will also generate an event in case the
+ %% application wants to do something clever.
+ Msg = {inconsistent_database, bad_decision, Node},
+ mnesia_lib:report_system_event(Msg),
+ OldD#decision{outcome = aborted};
+
+ OldD#decision.outcome == aborted ->
+ %% aborted overrrides anything
+ OldD#decision{outcome = aborted};
+
+ NewD#decision.outcome == aborted ->
+ %% aborted overrrides anything
+ OldD#decision{outcome = aborted};
+
+ OldD#decision.outcome == committed,
+ NewD#decision.outcome == unclear ->
+ %% committed overrides unclear
+ OldD#decision{outcome = committed};
+
+ OldD#decision.outcome == unclear,
+ NewD#decision.outcome == committed ->
+ %% committed overrides unclear
+ OldD#decision{outcome = committed}
+ end
+ end.
+
+add_remote_decisions(Node, [D | Tail], State) when is_record(D, decision) ->
+ State2 = add_remote_decision(Node, D, State),
+ add_remote_decisions(Node, Tail, State2);
+
+add_remote_decisions(Node, [C | Tail], State)
+ when is_record(C, transient_decision) ->
+ D = #decision{tid = C#transient_decision.tid,
+ outcome = C#transient_decision.outcome,
+ disc_nodes = [],
+ ram_nodes = []},
+ State2 = add_remote_decision(Node, D, State),
+ add_remote_decisions(Node, Tail, State2);
+
+add_remote_decisions(Node, [{mnesia_down, _, _, _} | Tail], State) ->
+ add_remote_decisions(Node, Tail, State);
+
+add_remote_decisions(Node, [{trans_tid, serial, Serial} | Tail], State) ->
+ sync_trans_tid_serial(Serial),
+ case State#state.unclear_decision of
+ undefined ->
+ ignored;
+ D ->
+ case lists:member(Node, D#decision.ram_nodes) of
+ true ->
+ ignore;
+ false ->
+ abcast([Node], {what_decision, node(), D})
+ end
+ end,
+ add_remote_decisions(Node, Tail, State);
+
+add_remote_decisions(_Node, [], State) ->
+ State.
+
+add_remote_decision(Node, NewD, State) ->
+ Tid = NewD#decision.tid,
+ OldD = decision(Tid),
+ D = merge_decisions(Node, OldD, NewD),
+ do_log_decision(D, false, undefined),
+ Outcome = D#decision.outcome,
+ if
+ OldD == no_decision ->
+ ignore;
+ Outcome == unclear ->
+ ignore;
+ true ->
+ case lists:member(node(), NewD#decision.disc_nodes) or
+ lists:member(node(), NewD#decision.ram_nodes) of
+ true ->
+ tell_im_certain([Node], D);
+ false ->
+ ignore
+ end
+ end,
+ case State#state.unclear_decision of
+ U when U#decision.tid == Tid ->
+ WaitFor = State#state.unclear_waitfor -- [Node],
+ if
+ Outcome == unclear, WaitFor == [] ->
+ %% Everybody are uncertain, lets abort
+ NewOutcome = aborted,
+ CertainD = D#decision{outcome = NewOutcome,
+ disc_nodes = [],
+ ram_nodes = []},
+ tell_im_certain(D#decision.disc_nodes, CertainD),
+ tell_im_certain(D#decision.ram_nodes, CertainD),
+ do_log_decision(CertainD, false, undefined),
+ verbose("Decided to abort transaction ~p "
+ "since everybody are uncertain ~p~n",
+ [Tid, CertainD]),
+ gen_server:reply(State#state.unclear_pid, {ok, NewOutcome}),
+ State#state{unclear_pid = undefined,
+ unclear_decision = undefined,
+ unclear_waitfor = undefined};
+ Outcome /= unclear ->
+ verbose("~p told us that transaction ~p was ~p~n",
+ [Node, Tid, Outcome]),
+ gen_server:reply(State#state.unclear_pid, {ok, Outcome}),
+ State#state{unclear_pid = undefined,
+ unclear_decision = undefined,
+ unclear_waitfor = undefined};
+ Outcome == unclear ->
+ State#state{unclear_waitfor = WaitFor}
+ end;
+ _ ->
+ State
+ end.
+
+announce_all([]) ->
+ ok;
+announce_all(ToNodes) ->
+ Tid = trans_tid_serial(),
+ announce(ToNodes, [{trans_tid,serial,Tid}], [], false).
+
+announce(ToNodes, [Head | Tail], Acc, ForceSend) ->
+ Acc2 = arrange(ToNodes, Head, Acc, ForceSend),
+ announce(ToNodes, Tail, Acc2, ForceSend);
+
+announce(_ToNodes, [], Acc, _ForceSend) ->
+ send_decisions(Acc).
+
+send_decisions([{Node, Decisions} | Tail]) ->
+ abcast([Node], {decisions, node(), Decisions}),
+ send_decisions(Tail);
+send_decisions([]) ->
+ ok.
+
+arrange([To | ToNodes], D, Acc, ForceSend) when is_record(D, decision) ->
+ NeedsAdd = (ForceSend or
+ lists:member(To, D#decision.disc_nodes) or
+ lists:member(To, D#decision.ram_nodes)),
+ case NeedsAdd of
+ true ->
+ Acc2 = add_decision(To, D, Acc),
+ arrange(ToNodes, D, Acc2, ForceSend);
+ false ->
+ arrange(ToNodes, D, Acc, ForceSend)
+ end;
+
+arrange([To | ToNodes], {trans_tid, serial, Serial}, Acc, ForceSend) ->
+ %% Do the lamport thing plus release the others
+ %% from uncertainity.
+ Acc2 = add_decision(To, {trans_tid, serial, Serial}, Acc),
+ arrange(ToNodes, {trans_tid, serial, Serial}, Acc2, ForceSend);
+
+arrange([], _Decision, Acc, _ForceSend) ->
+ Acc.
+
+add_decision(Node, Decision, [{Node, Decisions} | Tail]) ->
+ [{Node, [Decision | Decisions]} | Tail];
+add_decision(Node, Decision, [Head | Tail]) ->
+ [Head | add_decision(Node, Decision, Tail)];
+add_decision(Node, Decision, []) ->
+ [{Node, [Decision]}].
+
diff --git a/lib/mnesia/src/mnesia_registry.erl b/lib/mnesia/src/mnesia_registry.erl
new file mode 100644
index 0000000000..9805d48697
--- /dev/null
+++ b/lib/mnesia/src/mnesia_registry.erl
@@ -0,0 +1,280 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 1998-2009. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%
+
+%%
+-module(mnesia_registry).
+
+%%%----------------------------------------------------------------------
+%%% File : mnesia_registry.erl
+%%% Purpose : Support dump and restore of a registry on a C-node
+%%% This is an OTP internal module and is not public available.
+%%%
+%%% Example : Dump some hardcoded records into the Mnesia table Tab
+%%%
+%%% case rpc:call(Node, mnesia_registry, start_dump, [Tab, self()]) of
+%%% Pid when pid(Pid) ->
+%%% Pid ! {write, key1, key_size1, val_type1, val_size1, val1},
+%%% Pid ! {delete, key3},
+%%% Pid ! {write, key2, key_size2, val_type2, val_size2, val2},
+%%% Pid ! {write, key4, key_size4, val_type4, val_size4, val4},
+%%% Pid ! {commit, self()},
+%%% receive
+%%% {ok, Pid} ->
+%%% ok;
+%%% {'EXIT', Pid, Reason} ->
+%%% exit(Reason)
+%%% end;
+%%% {badrpc, Reason} ->
+%%% exit(Reason)
+%%% end.
+%%%
+%%% Example : Restore the corresponding Mnesia table Tab
+%%%
+%%% case rpc:call(Node, mnesia_registry, start_restore, [Tab, self()]) of
+%%% {size, Pid, N, LargestKey, LargestVal} ->
+%%% Pid ! {send_records, self()},
+%%% Fun = fun() ->
+%%% receive
+%%% {restore, KeySize, ValSize, ValType, Key, Val} ->
+%%% {Key, Val};
+%%% {'EXIT', Pid, Reason} ->
+%%% exit(Reason)
+%%% end
+%%% end,
+%%% lists:map(Fun, lists:seq(1, N));
+%%% {badrpc, Reason} ->
+%%% exit(Reason)
+%%% end.
+%%%
+%%%----------------------------------------------------------------------
+
+%% External exports
+-export([start_dump/2, start_restore/2]).
+-export([create_table/1, create_table/2]).
+
+%% Internal exports
+-export([init/4]).
+
+-record(state, {table, ops = [], link_to}).
+
+-record(registry_entry, {key, key_size, val_type, val_size, val}).
+
+-record(size, {pid = self(), n_values = 0, largest_key = 0, largest_val = 0}).
+
+%%%----------------------------------------------------------------------
+%%% Client
+%%%----------------------------------------------------------------------
+
+start(Type, Tab, LinkTo) ->
+ Starter = self(),
+ Args = [Type, Starter, LinkTo, Tab],
+ Pid = spawn_link(?MODULE, init, Args),
+ %% The receiver process may unlink the current process
+ receive
+ {ok, Res} ->
+ Res;
+ {'EXIT', Pid, Reason} when LinkTo == Starter ->
+ exit(Reason)
+ end.
+
+%% Starts a receiver process and optionally creates a Mnesia table
+%% with suitable default values. Returns the Pid of the receiver process
+%%
+%% The receiver process accumulates Mnesia operations and performs
+%% all operations or none at commit. The understood messages are:
+%%
+%% {write, Key, KeySize, ValType, ValSize, Val} ->
+%% accumulates mnesia:write({Tab, Key, KeySize, ValType, ValSize, Val})
+%% (no reply)
+%% {delete, Key} ->
+%% accumulates mnesia:delete({Tab, Key}) (no reply)
+%% {commit, ReplyTo} ->
+%% commits all accumulated operations
+%% and stops the process (replies {ok, Pid})
+%% abort ->
+%% stops the process (no reply)
+%%
+%% The receiver process is linked to the process with the process identifier
+%% LinkTo. If some error occurs the receiver process will invoke exit(Reason)
+%% and it is up to he LinkTo process to act properly when it receives an exit
+%% signal.
+
+start_dump(Tab, LinkTo) ->
+ start(dump, Tab, LinkTo).
+
+%% Starts a sender process which sends restore messages back to the
+%% LinkTo process. But first are some statistics about the table
+%% determined and returned as a 5-tuple:
+%%
+%% {size, SenderPid, N, LargestKeySize, LargestValSize}
+%%
+%% where N is the number of records in the table. Then the sender process
+%% waits for a 2-tuple message:
+%%
+%% {send_records, ReplyTo}
+%%
+%% At last N 6-tuple messages is sent to the ReplyTo process:
+%%
+%% ReplyTo ! {restore, KeySize, ValSize, ValType, Key, Val}
+%%
+%% If some error occurs the receiver process will invoke exit(Reason)
+%% and it is up to he LinkTo process to act properly when it receives an
+%% exit signal.
+
+start_restore(Tab, LinkTo) ->
+ start(restore, Tab, LinkTo).
+
+
+%% Optionally creates the Mnesia table Tab with suitable default values.
+%% Returns ok or EXIT's
+create_table(Tab) ->
+ Storage = mnesia:table_info(schema, storage_type),
+ create_table(Tab, [{Storage, [node()]}]).
+
+create_table(Tab, TabDef) ->
+ Attrs = record_info(fields, registry_entry),
+ case mnesia:create_table(Tab, [{attributes, Attrs} | TabDef]) of
+ {atomic, ok} ->
+ ok;
+ {aborted, {already_exists, Tab}} ->
+ ok;
+ {aborted, Reason} ->
+ exit(Reason)
+ end.
+
+%%%----------------------------------------------------------------------
+%%% Server
+%%%----------------------------------------------------------------------
+
+init(Type, Starter, LinkTo, Tab) ->
+ if
+ LinkTo /= Starter ->
+ link(LinkTo),
+ unlink(Starter);
+ true ->
+ ignore
+ end,
+ case Type of
+ dump ->
+ Starter ! {ok, self()},
+ dump_loop(#state{table = Tab, link_to = LinkTo});
+ restore ->
+ restore_table(Tab, Starter, LinkTo)
+ end.
+
+%%%----------------------------------------------------------------------
+%%% Dump loop
+%%%----------------------------------------------------------------------
+
+dump_loop(S) ->
+ Tab = S#state.table,
+ Ops = S#state.ops,
+ receive
+ {write, Key, KeySize, ValType, ValSize, Val} ->
+ RE = #registry_entry{key = Key,
+ key_size = KeySize,
+ val_type = ValType,
+ val_size = ValSize,
+ val = Val},
+ dump_loop(S#state{ops = [{write, RE} | Ops]});
+ {delete, Key} ->
+ dump_loop(S#state{ops = [{delete, Key} | Ops]});
+ {commit, ReplyTo} ->
+ create_table(Tab),
+ RecName = mnesia:table_info(Tab, record_name),
+ %% The Ops are in reverse order, but there is no need
+ %% for reversing the list of accumulated operations
+ case mnesia:transaction(fun handle_ops/3, [Tab, RecName, Ops]) of
+ {atomic, ok} ->
+ ReplyTo ! {ok, self()},
+ stop(S#state.link_to);
+ {aborted, Reason} ->
+ exit({aborted, Reason})
+ end;
+ abort ->
+ stop(S#state.link_to);
+ BadMsg ->
+ exit({bad_message, BadMsg})
+ end.
+
+stop(LinkTo) ->
+ unlink(LinkTo),
+ exit(normal).
+
+%% Grab a write lock for the entire table
+%% and iterate over all accumulated operations
+handle_ops(Tab, RecName, Ops) ->
+ mnesia:write_lock_table(Tab),
+ do_handle_ops(Tab, RecName, Ops).
+
+do_handle_ops(Tab, RecName, [{write, RegEntry} | Ops]) ->
+ Record = setelement(1, RegEntry, RecName),
+ mnesia:write(Tab, Record, write),
+ do_handle_ops(Tab, RecName, Ops);
+do_handle_ops(Tab, RecName, [{delete, Key} | Ops]) ->
+ mnesia:delete(Tab, Key, write),
+ do_handle_ops(Tab, RecName, Ops);
+do_handle_ops(_Tab, _RecName, []) ->
+ ok.
+
+%%%----------------------------------------------------------------------
+%%% Restore table
+%%%----------------------------------------------------------------------
+
+restore_table(Tab, Starter, LinkTo) ->
+ Pat = mnesia:table_info(Tab, wild_pattern),
+ Fun = fun() -> mnesia:match_object(Tab, Pat, read) end,
+ case mnesia:transaction(Fun) of
+ {atomic, AllRecords} ->
+ Size = calc_size(AllRecords, #size{}),
+ Starter ! {ok, Size},
+ receive
+ {send_records, ReplyTo} ->
+ send_records(AllRecords, ReplyTo),
+ unlink(LinkTo),
+ exit(normal);
+ BadMsg ->
+ exit({bad_message, BadMsg})
+ end;
+ {aborted, Reason} ->
+ exit(Reason)
+ end.
+
+calc_size([H | T], S) ->
+ KeySize = max(element(#registry_entry.key_size, H), S#size.largest_key),
+ ValSize = max(element(#registry_entry.val_size, H), S#size.largest_val),
+ N = S#size.n_values + 1,
+ calc_size(T, S#size{n_values = N, largest_key = KeySize, largest_val = ValSize});
+calc_size([], Size) ->
+ Size.
+
+max(New, Old) when New > Old -> New;
+max(_New, Old) -> Old.
+
+send_records([H | T], ReplyTo) ->
+ KeySize = element(#registry_entry.key_size, H),
+ ValSize = element(#registry_entry.val_size, H),
+ ValType = element(#registry_entry.val_type, H),
+ Key = element(#registry_entry.key, H),
+ Val = element(#registry_entry.val, H),
+ ReplyTo ! {restore, KeySize, ValSize, ValType, Key, Val},
+ send_records(T, ReplyTo);
+send_records([], _ReplyTo) ->
+ ok.
+
diff --git a/lib/mnesia/src/mnesia_schema.erl b/lib/mnesia/src/mnesia_schema.erl
new file mode 100644
index 0000000000..354431a296
--- /dev/null
+++ b/lib/mnesia/src/mnesia_schema.erl
@@ -0,0 +1,3027 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 1996-2009. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%
+
+%%
+%% In this module we provide a number of explicit functions
+%% to maninpulate the schema. All these functions are called
+%% within a special schema transaction.
+%%
+%% We also have an init/1 function defined here, this func is
+%% used by mnesia:start() to initialize the entire schema.
+
+-module(mnesia_schema).
+
+-export([
+ add_snmp/2,
+ add_table_copy/3,
+ add_table_index/2,
+ arrange_restore/3,
+ attr_tab_to_pos/2,
+ attr_to_pos/2,
+ change_table_copy_type/3,
+ change_table_access_mode/2,
+ change_table_load_order/2,
+ change_table_frag/2,
+ clear_table/1,
+ create_table/1,
+ cs2list/1,
+ del_snmp/1,
+ del_table_copy/2,
+ del_table_index/2,
+ delete_cstruct/2,
+ delete_schema/1,
+ delete_schema2/0,
+ delete_table/1,
+ delete_table_property/2,
+ dump_tables/1,
+ ensure_no_schema/1,
+ get_create_list/1,
+ get_initial_schema/2,
+ get_table_properties/1,
+ info/0,
+ info/1,
+ init/1,
+ insert_cstruct/3,
+ is_remote_member/1,
+ list2cs/1,
+ lock_schema/0,
+ merge_schema/0,
+ move_table/3,
+ opt_create_dir/2,
+ prepare_commit/3,
+ purge_dir/2,
+ purge_tmp_files/0,
+ ram_delete_table/2,
+% ram_delete_table/3,
+ read_cstructs_from_disc/0,
+ read_nodes/0,
+ remote_read_schema/0,
+ restore/1,
+ restore/2,
+ restore/3,
+ schema_coordinator/3,
+ set_where_to_read/3,
+ transform_table/4,
+ undo_prepare_commit/2,
+ unlock_schema/0,
+ version/0,
+ write_table_property/2
+ ]).
+
+%% Exports for mnesia_frag
+-export([
+ get_tid_ts_and_lock/2,
+ make_create_table/1,
+ ensure_active/1,
+ pick/4,
+ verify/3,
+ incr_version/1,
+ check_keys/3,
+ check_duplicates/2,
+ make_delete_table/2
+ ]).
+
+%% Needed outside to be able to use/set table_properties
+%% from user (not supported)
+-export([schema_transaction/1,
+ insert_schema_ops/2,
+ do_create_table/1,
+ do_delete_table/1,
+ do_read_table_property/2,
+ do_delete_table_property/2,
+ do_write_table_property/2]).
+
+-include("mnesia.hrl").
+-include_lib("kernel/include/file.hrl").
+
+-import(mnesia_lib, [set/2, del/2, verbose/2, dbg_out/2]).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Here comes the init function which also resides in
+%% this module, it is called upon by the trans server
+%% at startup of the system
+%%
+%% We have a meta table which looks like
+%% {table, schema,
+%% {type, set},
+%% {disc_copies, all},
+%% {arity, 2}
+%% {attributes, [key, val]}
+%%
+%% This means that we have a series of {schema, Name, Cs} tuples
+%% in a table called schema !!
+
+init(IgnoreFallback) ->
+ Res = read_schema(true, IgnoreFallback),
+ {ok, Source, _CreateList} = exit_on_error(Res),
+ verbose("Schema initiated from: ~p~n", [Source]),
+ set({schema, tables}, []),
+ set({schema, local_tables}, []),
+ Tabs = set_schema(?ets_first(schema)),
+ lists:foreach(fun(Tab) -> clear_whereabouts(Tab) end, Tabs),
+ set({schema, where_to_read}, node()),
+ set({schema, load_node}, node()),
+ set({schema, load_reason}, initial),
+ mnesia_controller:add_active_replica(schema, node()).
+
+exit_on_error({error, Reason}) ->
+ exit(Reason);
+exit_on_error(GoodRes) ->
+ GoodRes.
+
+val(Var) ->
+ case ?catch_val(Var) of
+ {'EXIT', Reason} -> mnesia_lib:other_val(Var, Reason);
+ Value -> Value
+ end.
+
+%% This function traverses all cstructs in the schema and
+%% sets all values in mnesia_gvar accordingly for each table/cstruct
+
+set_schema('$end_of_table') ->
+ [];
+set_schema(Tab) ->
+ do_set_schema(Tab),
+ [Tab | set_schema(?ets_next(schema, Tab))].
+
+get_create_list(Tab) ->
+ ?ets_lookup_element(schema, Tab, 3).
+
+do_set_schema(Tab) ->
+ List = get_create_list(Tab),
+ Cs = list2cs(List),
+ do_set_schema(Tab, Cs).
+
+do_set_schema(Tab, Cs) ->
+ Type = Cs#cstruct.type,
+ set({Tab, setorbag}, Type),
+ set({Tab, local_content}, Cs#cstruct.local_content),
+ set({Tab, ram_copies}, Cs#cstruct.ram_copies),
+ set({Tab, disc_copies}, Cs#cstruct.disc_copies),
+ set({Tab, disc_only_copies}, Cs#cstruct.disc_only_copies),
+ set({Tab, load_order}, Cs#cstruct.load_order),
+ set({Tab, access_mode}, Cs#cstruct.access_mode),
+ set({Tab, snmp}, Cs#cstruct.snmp),
+ set({Tab, user_properties}, Cs#cstruct.user_properties),
+ [set({Tab, user_property, element(1, P)}, P) || P <- Cs#cstruct.user_properties],
+ set({Tab, frag_properties}, Cs#cstruct.frag_properties),
+ mnesia_frag:set_frag_hash(Tab, Cs#cstruct.frag_properties),
+ set({Tab, attributes}, Cs#cstruct.attributes),
+ Arity = length(Cs#cstruct.attributes) + 1,
+ set({Tab, arity}, Arity),
+ RecName = Cs#cstruct.record_name,
+ set({Tab, record_name}, RecName),
+ set({Tab, record_validation}, {RecName, Arity, Type}),
+ set({Tab, wild_pattern}, wild(RecName, Arity)),
+ set({Tab, index}, Cs#cstruct.index),
+ %% create actual index tabs later
+ set({Tab, cookie}, Cs#cstruct.cookie),
+ set({Tab, version}, Cs#cstruct.version),
+ set({Tab, cstruct}, Cs),
+ Storage = mnesia_lib:schema_cs_to_storage_type(node(), Cs),
+ set({Tab, storage_type}, Storage),
+ mnesia_lib:add({schema, tables}, Tab),
+ Ns = mnesia_lib:cs_to_nodes(Cs),
+ case lists:member(node(), Ns) of
+ true ->
+ mnesia_lib:add({schema, local_tables}, Tab);
+ false when Tab == schema ->
+ mnesia_lib:add({schema, local_tables}, Tab);
+ false ->
+ ignore
+ end.
+
+wild(RecName, Arity) ->
+ Wp0 = list_to_tuple(lists:duplicate(Arity, '_')),
+ setelement(1, Wp0, RecName).
+
+%% Temporarily read the local schema and return a list
+%% of all nodes mentioned in the schema.DAT file
+read_nodes() ->
+ %% Ensure that we access the intended Mnesia
+ %% directory. This function may not be called
+ %% during startup since it will cause the
+ %% application_controller to get into deadlock
+ case mnesia_lib:ensure_loaded(?APPLICATION) of
+ ok ->
+ case read_schema(false) of
+ {ok, _Source, CreateList} ->
+ Cs = list2cs(CreateList),
+ {ok, Cs#cstruct.disc_copies ++ Cs#cstruct.ram_copies};
+ {error, Reason} ->
+ {error, Reason}
+ end;
+ {error, Reason} ->
+ {error, Reason}
+ end.
+
+%% Returns Version from the tuple {Version,MasterNodes}
+version() ->
+ case read_schema(false) of
+ {ok, Source, CreateList} when Source /= default ->
+ Cs = list2cs(CreateList),
+ {Version, _Details} = Cs#cstruct.version,
+ Version;
+ _ ->
+ case dir_exists(mnesia_lib:dir()) of
+ true -> {1,0};
+ false -> {0,0}
+ end
+ end.
+
+%% Calculate next table version from old cstruct
+incr_version(Cs) ->
+ {{Major, Minor}, _} = Cs#cstruct.version,
+ Nodes = mnesia_lib:intersect(val({schema, disc_copies}),
+ mnesia_lib:cs_to_nodes(Cs)),
+ V =
+ case Nodes -- val({Cs#cstruct.name, active_replicas}) of
+ [] -> {Major + 1, 0}; % All replicas are active
+ _ -> {Major, Minor + 1} % Some replicas are inactive
+ end,
+ Cs#cstruct{version = {V, {node(), now()}}}.
+
+%% Returns table name
+insert_cstruct(Tid, Cs, KeepWhereabouts) ->
+ Tab = Cs#cstruct.name,
+ TabDef = cs2list(Cs),
+ Val = {schema, Tab, TabDef},
+ mnesia_checkpoint:tm_retain(Tid, schema, Tab, write),
+ mnesia_subscr:report_table_event(schema, Tid, Val, write),
+ Active = val({Tab, active_replicas}),
+
+ case KeepWhereabouts of
+ true ->
+ ignore;
+ false when Active == [] ->
+ clear_whereabouts(Tab);
+ false ->
+ %% Someone else has initiated table
+ ignore
+ end,
+ set({Tab, cstruct}, Cs),
+ ?ets_insert(schema, Val),
+ do_set_schema(Tab, Cs),
+ Val.
+
+clear_whereabouts(Tab) ->
+ set({Tab, checkpoints}, []),
+ set({Tab, subscribers}, []),
+ set({Tab, where_to_read}, nowhere),
+ set({Tab, active_replicas}, []),
+ set({Tab, commit_work}, []),
+ set({Tab, where_to_write}, []),
+ set({Tab, where_to_commit}, []),
+ set({Tab, load_by_force}, false),
+ set({Tab, load_node}, unknown),
+ set({Tab, load_reason}, unknown).
+
+%% Returns table name
+delete_cstruct(Tid, Cs) ->
+ Tab = Cs#cstruct.name,
+ TabDef = cs2list(Cs),
+ Val = {schema, Tab, TabDef},
+ mnesia_checkpoint:tm_retain(Tid, schema, Tab, delete),
+ mnesia_subscr:report_table_event(schema, Tid, Val, delete),
+ mnesia_controller:update(
+ fun() ->
+ ?ets_match_delete(mnesia_gvar, {{Tab, '_'}, '_'}),
+ ?ets_match_delete(mnesia_gvar, {{Tab, '_', '_'}, '_'}),
+ del({schema, local_tables}, Tab),
+ del({schema, tables}, Tab),
+ ?ets_delete(schema, Tab)
+ end),
+ Val.
+
+%% Delete the Mnesia directory on all given nodes
+%% Requires that Mnesia is not running anywhere
+%% Returns ok | {error,Reason}
+delete_schema(Ns) when is_list(Ns), Ns /= [] ->
+ RunningNs = mnesia_lib:running_nodes(Ns),
+ Reason = "Cannot delete schema on all nodes",
+ if
+ RunningNs == [] ->
+ case rpc:multicall(Ns, ?MODULE, delete_schema2, []) of
+ {Replies, []} ->
+ case [R || R <- Replies, R /= ok] of
+ [] ->
+ ok;
+ BadReplies ->
+ verbose("~s: ~p~n", [Reason, BadReplies]),
+ {error, {"All nodes not running", BadReplies}}
+ end;
+ {_Replies, BadNs} ->
+ verbose("~s: ~p~n", [Reason, BadNs]),
+ {error, {"All nodes not running", BadNs}}
+ end;
+ true ->
+ verbose("~s: ~p~n", [Reason, RunningNs]),
+ {error, {"Mnesia is not stopped everywhere", RunningNs}}
+ end;
+delete_schema(Ns) ->
+ {error, {badarg, Ns}}.
+
+delete_schema2() ->
+ %% Ensure that we access the intended Mnesia
+ %% directory. This function may not be called
+ %% during startup since it will cause the
+ %% application_controller to get into deadlock
+ case mnesia_lib:ensure_loaded(?APPLICATION) of
+ ok ->
+ case mnesia_lib:is_running() of
+ no ->
+ Dir = mnesia_lib:dir(),
+ purge_dir(Dir, []),
+ ok;
+ _ ->
+ {error, {"Mnesia still running", node()}}
+ end;
+ {error, Reason} ->
+ {error, Reason}
+ end.
+
+ensure_no_schema([H|T]) when is_atom(H) ->
+ case rpc:call(H, ?MODULE, remote_read_schema, []) of
+ {badrpc, Reason} ->
+ {H, {"All nodes not running", H, Reason}};
+ {ok,Source, _} when Source /= default ->
+ {H, {already_exists, H}};
+ _ ->
+ ensure_no_schema(T)
+ end;
+ensure_no_schema([H|_]) ->
+ {error,{badarg, H}};
+ensure_no_schema([]) ->
+ ok.
+
+remote_read_schema() ->
+ %% Ensure that we access the intended Mnesia
+ %% directory. This function may not be called
+ %% during startup since it will cause the
+ %% application_controller to get into deadlock
+ case mnesia_lib:ensure_loaded(?APPLICATION) of
+ ok ->
+ case mnesia_monitor:get_env(schema_location) of
+ opt_disc ->
+ read_schema(false);
+ _ ->
+ read_schema(false)
+ end;
+ {error, Reason} ->
+ {error, Reason}
+ end.
+
+dir_exists(Dir) ->
+ dir_exists(Dir, mnesia_monitor:use_dir()).
+dir_exists(Dir, true) ->
+ case file:read_file_info(Dir) of
+ {ok, _} -> true;
+ _ -> false
+ end;
+dir_exists(_Dir, false) ->
+ false.
+
+opt_create_dir(UseDir, Dir) when UseDir == true->
+ case dir_exists(Dir, UseDir) of
+ true ->
+ check_can_write(Dir);
+ false ->
+ case file:make_dir(Dir) of
+ ok ->
+ verbose("Create Directory ~p~n", [Dir]),
+ ok;
+ {error, Reason} ->
+ verbose("Cannot create mnesia dir ~p~n", [Reason]),
+ {error, {"Cannot create Mnesia dir", Dir, Reason}}
+ end
+ end;
+opt_create_dir(false, _) ->
+ {error, {has_no_disc, node()}}.
+
+check_can_write(Dir) ->
+ case file:read_file_info(Dir) of
+ {ok, FI} when FI#file_info.type == directory,
+ FI#file_info.access == read_write ->
+ ok;
+ {ok, _} ->
+ {error, "Not allowed to write in Mnesia dir", Dir};
+ _ ->
+ {error, "Non existent Mnesia dir", Dir}
+ end.
+
+lock_schema() ->
+ mnesia_lib:lock_table(schema).
+
+unlock_schema() ->
+ mnesia_lib:unlock_table(schema).
+
+read_schema(Keep) ->
+ read_schema(Keep, false).
+
+%% The schema may be read for several reasons.
+%% If Mnesia is not already started the read intention
+%% we normally do not want the ets table named schema
+%% be left around.
+%% If Keep == true, the ets table schema is kept
+%% If Keep == false, the ets table schema is removed
+%%
+%% Returns {ok, Source, SchemaCstruct} or {error, Reason}
+%% Source may be: default | ram | disc | fallback
+
+read_schema(Keep, IgnoreFallback) ->
+ lock_schema(),
+ Res =
+ case mnesia:system_info(is_running) of
+ yes ->
+ {ok, ram, get_create_list(schema)};
+ _IsRunning ->
+ case mnesia_monitor:use_dir() of
+ true ->
+ read_disc_schema(Keep, IgnoreFallback);
+ false when Keep == true ->
+ Args = [{keypos, 2}, public, named_table, set],
+ mnesia_monitor:mktab(schema, Args),
+ CreateList = get_initial_schema(ram_copies, []),
+ ?ets_insert(schema,{schema, schema, CreateList}),
+ {ok, default, CreateList};
+ false when Keep == false ->
+ CreateList = get_initial_schema(ram_copies, []),
+ {ok, default, CreateList}
+ end
+ end,
+ unlock_schema(),
+ Res.
+
+read_disc_schema(Keep, IgnoreFallback) ->
+ Running = mnesia:system_info(is_running),
+ case mnesia_bup:fallback_exists() of
+ true when IgnoreFallback == false, Running /= yes ->
+ mnesia_bup:fallback_to_schema();
+ _ ->
+ %% If we're running, we read the schema file even
+ %% if fallback exists
+ Dat = mnesia_lib:tab2dat(schema),
+ case mnesia_lib:exists(Dat) of
+ true ->
+ do_read_disc_schema(Dat, Keep);
+ false ->
+ Dmp = mnesia_lib:tab2dmp(schema),
+ case mnesia_lib:exists(Dmp) of
+ true ->
+ %% May only happen when toggling of
+ %% schema storage type has been
+ %% interrupted
+ do_read_disc_schema(Dmp, Keep);
+ false ->
+ {error, "No schema file exists"}
+ end
+ end
+ end.
+
+do_read_disc_schema(Fname, Keep) ->
+ T =
+ case Keep of
+ false ->
+ Args = [{keypos, 2}, public, set],
+ ?ets_new_table(schema, Args);
+ true ->
+ Args = [{keypos, 2}, public, named_table, set],
+ mnesia_monitor:mktab(schema, Args)
+ end,
+ Repair = mnesia_monitor:get_env(auto_repair),
+ Res = % BUGBUG Fixa till dcl!
+ case mnesia_lib:dets_to_ets(schema, T, Fname, set, Repair, no) of
+ loaded -> {ok, disc, ?ets_lookup_element(T, schema, 3)};
+ Other -> {error, {"Cannot read schema", Fname, Other}}
+ end,
+ case Keep of
+ true -> ignore;
+ false -> ?ets_delete_table(T)
+ end,
+ Res.
+
+get_initial_schema(SchemaStorage, Nodes) ->
+ Cs = #cstruct{name = schema,
+ record_name = schema,
+ attributes = [table, cstruct]},
+ Cs2 =
+ case SchemaStorage of
+ ram_copies -> Cs#cstruct{ram_copies = Nodes};
+ disc_copies -> Cs#cstruct{disc_copies = Nodes}
+ end,
+ cs2list(Cs2).
+
+read_cstructs_from_disc() ->
+ %% Assumptions:
+ %% - local schema lock in global
+ %% - use_dir is true
+ %% - Mnesia is not running
+ %% - Ignore fallback
+
+ Fname = mnesia_lib:tab2dat(schema),
+ case mnesia_lib:exists(Fname) of
+ true ->
+ Args = [{file, Fname},
+ {keypos, 2},
+ {repair, mnesia_monitor:get_env(auto_repair)},
+ {type, set}],
+ case dets:open_file(make_ref(), Args) of
+ {ok, Tab} ->
+ Fun = fun({_, _, List}) ->
+ {continue, list2cs(List)}
+ end,
+ Cstructs = dets:traverse(Tab, Fun),
+ dets:close(Tab),
+ {ok, Cstructs};
+ {error, Reason} ->
+ {error, Reason}
+ end;
+ false ->
+ {error, "No schema file exists"}
+ end.
+
+%% We run a very special type of transactions when we
+%% we want to manipulate the schema.
+
+get_tid_ts_and_lock(Tab, Intent) ->
+ TidTs = get(mnesia_activity_state),
+ case TidTs of
+ {_Mod, Tid, Ts} when is_record(Ts, tidstore)->
+ Store = Ts#tidstore.store,
+ case Intent of
+ read -> mnesia_locker:rlock_table(Tid, Store, Tab);
+ write -> mnesia_locker:wlock_table(Tid, Store, Tab);
+ none -> ignore
+ end,
+ TidTs;
+ _ ->
+ mnesia:abort(no_transaction)
+ end.
+
+schema_transaction(Fun) ->
+ case get(mnesia_activity_state) of
+ undefined ->
+ Args = [self(), Fun, whereis(mnesia_controller)],
+ Pid = spawn_link(?MODULE, schema_coordinator, Args),
+ receive
+ {transaction_done, Res, Pid} -> Res;
+ {'EXIT', Pid, R} -> {aborted, {transaction_crashed, R}}
+ end;
+ _ ->
+ {aborted, nested_transaction}
+ end.
+
+%% This process may dump the transaction log, and should
+%% therefore not be run in an application process
+%%
+schema_coordinator(Client, _Fun, undefined) ->
+ Res = {aborted, {node_not_running, node()}},
+ Client ! {transaction_done, Res, self()},
+ unlink(Client);
+
+schema_coordinator(Client, Fun, Controller) when is_pid(Controller) ->
+ %% Do not trap exit in order to automatically die
+ %% when the controller dies
+
+ link(Controller),
+ unlink(Client),
+
+ %% Fulfull the transaction even if the client dies
+ Res = mnesia:transaction(Fun),
+ Client ! {transaction_done, Res, self()},
+ unlink(Controller), % Avoids spurious exit message
+ unlink(whereis(mnesia_tm)), % Avoids spurious exit message
+ exit(normal).
+
+%% The make* rotines return a list of ops, this function
+%% inserts em all in the Store and maintains the local order
+%% of ops.
+
+insert_schema_ops({_Mod, _Tid, Ts}, SchemaIOps) ->
+ do_insert_schema_ops(Ts#tidstore.store, SchemaIOps).
+
+do_insert_schema_ops(Store, [Head | Tail]) ->
+ ?ets_insert(Store, Head),
+ do_insert_schema_ops(Store, Tail);
+do_insert_schema_ops(_Store, []) ->
+ ok.
+
+cs2list(Cs) when is_record(Cs, cstruct) ->
+ Tags = record_info(fields, cstruct),
+ rec2list(Tags, 2, Cs);
+cs2list(CreateList) when is_list(CreateList) ->
+ CreateList.
+
+rec2list([Tag | Tags], Pos, Rec) ->
+ Val = element(Pos, Rec),
+ [{Tag, Val} | rec2list(Tags, Pos + 1, Rec)];
+rec2list([], _Pos, _Rec) ->
+ [].
+
+list2cs(List) when is_list(List) ->
+ Name = pick(unknown, name, List, must),
+ Type = pick(Name, type, List, set),
+ Rc0 = pick(Name, ram_copies, List, []),
+ Dc = pick(Name, disc_copies, List, []),
+ Doc = pick(Name, disc_only_copies, List, []),
+ Rc = case {Rc0, Dc, Doc} of
+ {[], [], []} -> [node()];
+ _ -> Rc0
+ end,
+ LC = pick(Name, local_content, List, false),
+ RecName = pick(Name, record_name, List, Name),
+ Attrs = pick(Name, attributes, List, [key, val]),
+ Snmp = pick(Name, snmp, List, []),
+ LoadOrder = pick(Name, load_order, List, 0),
+ AccessMode = pick(Name, access_mode, List, read_write),
+ UserProps = pick(Name, user_properties, List, []),
+ verify({alt, [nil, list]}, mnesia_lib:etype(UserProps),
+ {bad_type, Name, {user_properties, UserProps}}),
+ Cookie = pick(Name, cookie, List, ?unique_cookie),
+ Version = pick(Name, version, List, {{2, 0}, []}),
+ Ix = pick(Name, index, List, []),
+ verify({alt, [nil, list]}, mnesia_lib:etype(Ix),
+ {bad_type, Name, {index, [Ix]}}),
+ Ix2 = [attr_to_pos(I, Attrs) || I <- Ix],
+
+ Frag = pick(Name, frag_properties, List, []),
+ verify({alt, [nil, list]}, mnesia_lib:etype(Frag),
+ {badarg, Name, {frag_properties, Frag}}),
+
+ Keys = check_keys(Name, List, record_info(fields, cstruct)),
+ check_duplicates(Name, Keys),
+ #cstruct{name = Name,
+ ram_copies = Rc,
+ disc_copies = Dc,
+ disc_only_copies = Doc,
+ type = Type,
+ index = Ix2,
+ snmp = Snmp,
+ load_order = LoadOrder,
+ access_mode = AccessMode,
+ local_content = LC,
+ record_name = RecName,
+ attributes = Attrs,
+ user_properties = lists:sort(UserProps),
+ frag_properties = lists:sort(Frag),
+ cookie = Cookie,
+ version = Version};
+list2cs(Other) ->
+ mnesia:abort({badarg, Other}).
+
+pick(Tab, Key, List, Default) ->
+ case lists:keysearch(Key, 1, List) of
+ false when Default == must ->
+ mnesia:abort({badarg, Tab, "Missing key", Key, List});
+ false ->
+ Default;
+ {value, {Key, Value}} ->
+ Value;
+ {value, BadArg} ->
+ mnesia:abort({bad_type, Tab, BadArg})
+ end.
+
+%% Convert attribute name to integer if neccessary
+attr_tab_to_pos(_Tab, Pos) when is_integer(Pos) ->
+ Pos;
+attr_tab_to_pos(Tab, Attr) ->
+ attr_to_pos(Attr, val({Tab, attributes})).
+
+%% Convert attribute name to integer if neccessary
+attr_to_pos(Pos, _Attrs) when is_integer(Pos) ->
+ Pos;
+attr_to_pos(Attr, Attrs) when is_atom(Attr) ->
+ attr_to_pos(Attr, Attrs, 2);
+attr_to_pos(Attr, _) ->
+ mnesia:abort({bad_type, Attr}).
+
+attr_to_pos(Attr, [Attr | _Attrs], Pos) ->
+ Pos;
+attr_to_pos(Attr, [_ | Attrs], Pos) ->
+ attr_to_pos(Attr, Attrs, Pos + 1);
+attr_to_pos(Attr, _, _) ->
+ mnesia:abort({bad_type, Attr}).
+
+check_keys(Tab, [{Key, _Val} | Tail], Items) ->
+ case lists:member(Key, Items) of
+ true -> [Key | check_keys(Tab, Tail, Items)];
+ false -> mnesia:abort({badarg, Tab, Key})
+ end;
+check_keys(_, [], _) ->
+ [];
+check_keys(Tab, Arg, _) ->
+ mnesia:abort({badarg, Tab, Arg}).
+
+check_duplicates(Tab, Keys) ->
+ case has_duplicates(Keys) of
+ false -> ok;
+ true -> mnesia:abort({badarg, Tab, "Duplicate keys", Keys})
+ end.
+
+has_duplicates([H | T]) ->
+ case lists:member(H, T) of
+ true -> true;
+ false -> has_duplicates(T)
+ end;
+has_duplicates([]) ->
+ false.
+
+%% This is the only place where we check the validity of data
+verify_cstruct(Cs) when is_record(Cs, cstruct) ->
+ verify_nodes(Cs),
+
+ Tab = Cs#cstruct.name,
+ verify(atom, mnesia_lib:etype(Tab), {bad_type, Tab}),
+ Type = Cs#cstruct.type,
+ verify(true, lists:member(Type, [set, bag, ordered_set]),
+ {bad_type, Tab, {type, Type}}),
+
+ %% Currently ordered_set is not supported for disk_only_copies.
+ if
+ Type == ordered_set, Cs#cstruct.disc_only_copies /= [] ->
+ mnesia:abort({bad_type, Tab, {not_supported, Type, disc_only_copies}});
+ true ->
+ ok
+ end,
+
+ RecName = Cs#cstruct.record_name,
+ verify(atom, mnesia_lib:etype(RecName),
+ {bad_type, Tab, {record_name, RecName}}),
+
+ Attrs = Cs#cstruct.attributes,
+ verify(list, mnesia_lib:etype(Attrs),
+ {bad_type, Tab, {attributes, Attrs}}),
+
+ Arity = length(Attrs) + 1,
+ verify(true, Arity > 2, {bad_type, Tab, {attributes, Attrs}}),
+
+ lists:foldl(fun(Attr,_Other) when Attr == snmp ->
+ mnesia:abort({bad_type, Tab, {attributes, [Attr]}});
+ (Attr,Other) ->
+ verify(atom, mnesia_lib:etype(Attr),
+ {bad_type, Tab, {attributes, [Attr]}}),
+ verify(false, lists:member(Attr, Other),
+ {combine_error, Tab, {attributes, [Attr | Other]}}),
+ [Attr | Other]
+ end,
+ [],
+ Attrs),
+
+ Index = Cs#cstruct.index,
+ verify({alt, [nil, list]}, mnesia_lib:etype(Index),
+ {bad_type, Tab, {index, Index}}),
+
+ IxFun =
+ fun(Pos) ->
+ verify(true, fun() ->
+ if
+ is_integer(Pos),
+ Pos > 2,
+ Pos =< Arity ->
+ true;
+ true -> false
+ end
+ end,
+ {bad_type, Tab, {index, [Pos]}})
+ end,
+ lists:foreach(IxFun, Index),
+
+ LC = Cs#cstruct.local_content,
+ verify({alt, [true, false]}, LC,
+ {bad_type, Tab, {local_content, LC}}),
+ Access = Cs#cstruct.access_mode,
+ verify({alt, [read_write, read_only]}, Access,
+ {bad_type, Tab, {access_mode, Access}}),
+
+ Snmp = Cs#cstruct.snmp,
+ verify(true, mnesia_snmp_hook:check_ustruct(Snmp),
+ {badarg, Tab, {snmp, Snmp}}),
+
+ CheckProp = fun(Prop) when is_tuple(Prop), size(Prop) >= 1 -> ok;
+ (Prop) -> mnesia:abort({bad_type, Tab, {user_properties, [Prop]}})
+ end,
+ lists:foreach(CheckProp, Cs#cstruct.user_properties),
+
+ case Cs#cstruct.cookie of
+ {{MegaSecs, Secs, MicroSecs}, _Node}
+ when is_integer(MegaSecs), is_integer(Secs),
+ is_integer(MicroSecs), is_atom(node) ->
+ ok;
+ Cookie ->
+ mnesia:abort({bad_type, Tab, {cookie, Cookie}})
+ end,
+ case Cs#cstruct.version of
+ {{Major, Minor}, _Detail}
+ when is_integer(Major), is_integer(Minor) ->
+ ok;
+ Version ->
+ mnesia:abort({bad_type, Tab, {version, Version}})
+ end.
+
+verify_nodes(Cs) ->
+ Tab = Cs#cstruct.name,
+ Ram = Cs#cstruct.ram_copies,
+ Disc = Cs#cstruct.disc_copies,
+ DiscOnly = Cs#cstruct.disc_only_copies,
+ LoadOrder = Cs#cstruct.load_order,
+
+ verify({alt, [nil, list]}, mnesia_lib:etype(Ram),
+ {bad_type, Tab, {ram_copies, Ram}}),
+ verify({alt, [nil, list]}, mnesia_lib:etype(Disc),
+ {bad_type, Tab, {disc_copies, Disc}}),
+ case Tab of
+ schema ->
+ verify([], DiscOnly, {bad_type, Tab, {disc_only_copies, DiscOnly}});
+ _ ->
+ verify({alt, [nil, list]},
+ mnesia_lib:etype(DiscOnly),
+ {bad_type, Tab, {disc_only_copies, DiscOnly}})
+ end,
+ verify(integer, mnesia_lib:etype(LoadOrder),
+ {bad_type, Tab, {load_order, LoadOrder}}),
+
+ Nodes = Ram ++ Disc ++ DiscOnly,
+ verify(list, mnesia_lib:etype(Nodes),
+ {combine_error, Tab,
+ [{ram_copies, []}, {disc_copies, []}, {disc_only_copies, []}]}),
+ verify(false, has_duplicates(Nodes), {combine_error, Tab, Nodes}),
+ AtomCheck = fun(N) -> verify(atom, mnesia_lib:etype(N), {bad_type, Tab, N}) end,
+ lists:foreach(AtomCheck, Nodes).
+
+verify(Expected, Fun, Error) when is_function(Fun) ->
+ do_verify(Expected, catch Fun(), Error);
+verify(Expected, Actual, Error) ->
+ do_verify(Expected, Actual, Error).
+
+do_verify({alt, Values}, Value, Error) ->
+ case lists:member(Value, Values) of
+ true -> ok;
+ false -> mnesia:abort(Error)
+ end;
+do_verify(Value, Value, _) ->
+ ok;
+do_verify(_Value, _, Error) ->
+ mnesia:abort(Error).
+
+ensure_writable(Tab) ->
+ case val({Tab, where_to_write}) of
+ [] -> mnesia:abort({read_only, Tab});
+ _ -> ok
+ end.
+
+%% Ensure that all replicas on disk full nodes are active
+ensure_active(Cs) ->
+ ensure_active(Cs, active_replicas).
+
+ensure_active(Cs, What) ->
+ Tab = Cs#cstruct.name,
+ W = {Tab, What},
+ ensure_non_empty(W),
+ Nodes = mnesia_lib:intersect(val({schema, disc_copies}),
+ mnesia_lib:cs_to_nodes(Cs)),
+ case Nodes -- val(W) of
+ [] ->
+ ok;
+ Ns ->
+ Expl = "All replicas on diskfull nodes are not active yet",
+ case val({Tab, local_content}) of
+ true ->
+ case rpc:multicall(Ns, ?MODULE, is_remote_member, [W]) of
+ {Replies, []} ->
+ check_active(Replies, Expl, Tab);
+ {_Replies, BadNs} ->
+ mnesia:abort({not_active, Expl, Tab, BadNs})
+ end;
+ false ->
+ mnesia:abort({not_active, Expl, Tab, Ns})
+ end
+ end.
+
+ensure_non_empty({Tab, Vhat}) ->
+ case val({Tab, Vhat}) of
+ [] -> mnesia:abort({no_exists, Tab});
+ _ -> ok
+ end.
+
+ensure_not_active(Tab = schema, Node) ->
+ Active = val({Tab, active_replicas}),
+ case lists:member(Node, Active) of
+ false when Active =/= [] ->
+ ok;
+ false ->
+ mnesia:abort({no_exists, Tab});
+ true ->
+ Expl = "Mnesia is running",
+ mnesia:abort({active, Expl, Node})
+ end.
+
+is_remote_member(Key) ->
+ IsActive = lists:member(node(), val(Key)),
+ {IsActive, node()}.
+
+check_active([{true, _Node} | Replies], Expl, Tab) ->
+ check_active(Replies, Expl, Tab);
+check_active([{false, Node} | _Replies], Expl, Tab) ->
+ mnesia:abort({not_active, Expl, Tab, [Node]});
+check_active([{badrpc, Reason} | _Replies], Expl, Tab) ->
+ mnesia:abort({not_active, Expl, Tab, Reason});
+check_active([], _Expl, _Tab) ->
+ ok.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Here's the real interface function to create a table
+
+create_table(TabDef) ->
+ schema_transaction(fun() -> do_multi_create_table(TabDef) end).
+
+%% And the corresponding do routines ....
+
+do_multi_create_table(TabDef) ->
+ get_tid_ts_and_lock(schema, write),
+ ensure_writable(schema),
+ Cs = list2cs(TabDef),
+ case Cs#cstruct.frag_properties of
+ [] ->
+ do_create_table(Cs);
+ _Props ->
+ CsList = mnesia_frag:expand_cstruct(Cs),
+ lists:foreach(fun do_create_table/1, CsList)
+ end,
+ ok.
+
+do_create_table(Cs) ->
+ {_Mod, _Tid, Ts} = get_tid_ts_and_lock(schema, none),
+ Store = Ts#tidstore.store,
+ do_insert_schema_ops(Store, make_create_table(Cs)).
+
+make_create_table(Cs) ->
+ Tab = Cs#cstruct.name,
+ verify(false, check_if_exists(Tab), {already_exists, Tab}),
+ unsafe_make_create_table(Cs).
+
+% unsafe_do_create_table(Cs) ->
+% {_Mod, Tid, Ts} = get_tid_ts_and_lock(schema, none),
+% Store = Ts#tidstore.store,
+% do_insert_schema_ops(Store, unsafe_make_create_table(Cs)).
+
+unsafe_make_create_table(Cs) ->
+ {_Mod, Tid, Ts} = get_tid_ts_and_lock(schema, none),
+ verify_cstruct(Cs),
+ Tab = Cs#cstruct.name,
+
+ %% Check that we have all disc replica nodes running
+ DiscNodes = Cs#cstruct.disc_copies ++ Cs#cstruct.disc_only_copies,
+ RunningNodes = val({current, db_nodes}),
+ CheckDisc = fun(N) ->
+ verify(true, lists:member(N, RunningNodes),
+ {not_active, Tab, N})
+ end,
+ lists:foreach(CheckDisc, DiscNodes),
+
+ Nodes = mnesia_lib:intersect(mnesia_lib:cs_to_nodes(Cs), RunningNodes),
+ Store = Ts#tidstore.store,
+ mnesia_locker:wlock_no_exist(Tid, Store, Tab, Nodes),
+ [{op, create_table, cs2list(Cs)}].
+
+check_if_exists(Tab) ->
+ TidTs = get_tid_ts_and_lock(schema, write),
+ {_, _, Ts} = TidTs,
+ Store = Ts#tidstore.store,
+ ets:foldl(
+ fun({op, create_table, [{name, T}|_]}, _Acc) when T==Tab ->
+ true;
+ ({op, delete_table, [{name,T}|_]}, _Acc) when T==Tab ->
+ false;
+ (_Other, Acc) ->
+ Acc
+ end, existed_before(Tab), Store).
+
+existed_before(Tab) ->
+ ('EXIT' =/= element(1, ?catch_val({Tab,cstruct}))).
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Delete a table entirely on all nodes.
+
+delete_table(Tab) ->
+ schema_transaction(fun() -> do_delete_table(Tab) end).
+
+do_delete_table(schema) ->
+ mnesia:abort({bad_type, schema});
+do_delete_table(Tab) ->
+ TidTs = get_tid_ts_and_lock(schema, write),
+ ensure_writable(schema),
+ insert_schema_ops(TidTs, make_delete_table(Tab, whole_table)).
+
+make_delete_table(Tab, Mode) ->
+ case existed_before(Tab) of
+ false ->
+ %% Deleting a table that was created in this very
+ %% schema transaction. Delete all ops in the Store
+ %% that operate on this table. We cannot run a normal
+ %% delete operation, since that involves checking live
+ %% nodes etc.
+ TidTs = get_tid_ts_and_lock(schema, write),
+ {_, _, Ts} = TidTs,
+ Store = Ts#tidstore.store,
+ Deleted = ets:select_delete(
+ Store, [{{op,'$1',[{name,Tab}|'_']},
+ [{'or',
+ {'==','$1',create_table},
+ {'==','$1',delete_table}}], [true]}]),
+ ets:select_delete(
+ Store, [{{op,'$1',[{name,Tab}|'_'],'_'},
+ [{'or',
+ {'==','$1',write_table_property},
+ {'==','$1',delete_table_property}}],
+ [true]}]),
+ case Deleted of
+ 0 -> mnesia:abort({no_exists, Tab});
+ _ -> []
+ end;
+ true ->
+ case Mode of
+ whole_table ->
+ case val({Tab, frag_properties}) of
+ [] ->
+ [make_delete_table2(Tab)];
+ _Props ->
+ %% Check if it is a base table
+ mnesia_frag:lookup_frag_hash(Tab),
+
+ %% Check for foreigners
+ F = mnesia_frag:lookup_foreigners(Tab),
+ verify([], F, {combine_error,
+ Tab, "Too many foreigners", F}),
+ [make_delete_table2(T) ||
+ T <- mnesia_frag:frag_names(Tab)]
+ end;
+ single_frag ->
+ [make_delete_table2(Tab)]
+ end
+ end.
+
+make_delete_table2(Tab) ->
+ get_tid_ts_and_lock(Tab, write),
+ Cs = val({Tab, cstruct}),
+ ensure_active(Cs),
+ ensure_writable(Tab),
+ {op, delete_table, cs2list(Cs)}.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Change fragmentation of a table
+
+change_table_frag(Tab, Change) ->
+ schema_transaction(fun() -> do_change_table_frag(Tab, Change) end).
+
+do_change_table_frag(Tab, Change) when is_atom(Tab), Tab /= schema ->
+ TidTs = get_tid_ts_and_lock(schema, write),
+ Ops = mnesia_frag:change_table_frag(Tab, Change),
+ [insert_schema_ops(TidTs, Op) || Op <- Ops],
+ ok;
+do_change_table_frag(Tab, _Change) ->
+ mnesia:abort({bad_type, Tab}).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Clear a table
+
+%% No need for a schema transaction
+clear_table(Tab) ->
+ schema_transaction(fun() -> do_clear_table(Tab) end).
+
+do_clear_table(schema) ->
+ mnesia:abort({bad_type, schema});
+do_clear_table(Tab) ->
+ TidTs = get_tid_ts_and_lock(schema, write),
+ get_tid_ts_and_lock(Tab, write),
+ insert_schema_ops(TidTs, make_clear_table(Tab)).
+
+make_clear_table(Tab) ->
+ Cs = val({Tab, cstruct}),
+ ensure_writable(Tab),
+ [{op, clear_table, cs2list(Cs)}].
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+add_table_copy(Tab, Node, Storage) ->
+ schema_transaction(fun() -> do_add_table_copy(Tab, Node, Storage) end).
+
+do_add_table_copy(Tab, Node, Storage) when is_atom(Tab), is_atom(Node) ->
+ TidTs = get_tid_ts_and_lock(schema, write),
+ insert_schema_ops(TidTs, make_add_table_copy(Tab, Node, Storage));
+do_add_table_copy(Tab,Node,_) ->
+ mnesia:abort({badarg, Tab, Node}).
+
+make_add_table_copy(Tab, Node, Storage) ->
+ ensure_writable(schema),
+ Cs = incr_version(val({Tab, cstruct})),
+ Ns = mnesia_lib:cs_to_nodes(Cs),
+ verify(false, lists:member(Node, Ns), {already_exists, Tab, Node}),
+ Cs2 = new_cs(Cs, Node, Storage, add),
+ verify_cstruct(Cs2),
+
+ %% Check storage and if node is running
+ IsRunning = lists:member(Node, val({current, db_nodes})),
+ if
+ Tab == schema ->
+ if
+ Storage /= ram_copies ->
+ mnesia:abort({badarg, Tab, Storage});
+ IsRunning == true ->
+ mnesia:abort({already_exists, Tab, Node});
+ true ->
+ ignore
+ end;
+ Storage == ram_copies ->
+ ignore;
+ IsRunning == true ->
+ ignore;
+ IsRunning == false ->
+ mnesia:abort({not_active, schema, Node})
+ end,
+ [{op, add_table_copy, Storage, Node, cs2list(Cs2)}].
+
+del_table_copy(Tab, Node) ->
+ schema_transaction(fun() -> do_del_table_copy(Tab, Node) end).
+
+do_del_table_copy(Tab, Node) when is_atom(Node) ->
+ TidTs = get_tid_ts_and_lock(schema, write),
+%% get_tid_ts_and_lock(Tab, write),
+ insert_schema_ops(TidTs, make_del_table_copy(Tab, Node));
+do_del_table_copy(Tab, Node) ->
+ mnesia:abort({badarg, Tab, Node}).
+
+make_del_table_copy(Tab, Node) ->
+ ensure_writable(schema),
+ Cs = incr_version(val({Tab, cstruct})),
+ Storage = mnesia_lib:schema_cs_to_storage_type(Node, Cs),
+ Cs2 = new_cs(Cs, Node, Storage, del),
+ case mnesia_lib:cs_to_nodes(Cs2) of
+ [] when Tab == schema ->
+ mnesia:abort({combine_error, Tab, "Last replica"});
+ [] ->
+ ensure_active(Cs),
+ dbg_out("Last replica deleted in table ~p~n", [Tab]),
+ make_delete_table(Tab, whole_table);
+ _ when Tab == schema ->
+ %% ensure_active(Cs2),
+ ensure_not_active(Tab, Node),
+ verify_cstruct(Cs2),
+ Ops = remove_node_from_tabs(val({schema, tables}), Node),
+ [{op, del_table_copy, ram_copies, Node, cs2list(Cs2)} | Ops];
+ _ ->
+ ensure_active(Cs),
+ verify_cstruct(Cs2),
+ [{op, del_table_copy, Storage, Node, cs2list(Cs2)}]
+ end.
+
+remove_node_from_tabs([], _Node) ->
+ [];
+remove_node_from_tabs([schema|Rest], Node) ->
+ remove_node_from_tabs(Rest, Node);
+remove_node_from_tabs([Tab|Rest], Node) ->
+ {Cs, IsFragModified} =
+ mnesia_frag:remove_node(Node, incr_version(val({Tab, cstruct}))),
+ case mnesia_lib:schema_cs_to_storage_type(Node, Cs) of
+ unknown ->
+ case IsFragModified of
+ true ->
+ [{op, change_table_frag, {del_node, Node}, cs2list(Cs)} |
+ remove_node_from_tabs(Rest, Node)];
+ false ->
+ remove_node_from_tabs(Rest, Node)
+ end;
+ Storage ->
+ Cs2 = new_cs(Cs, Node, Storage, del),
+ case mnesia_lib:cs_to_nodes(Cs2) of
+ [] ->
+ [{op, delete_table, cs2list(Cs)} |
+ remove_node_from_tabs(Rest, Node)];
+ _Ns ->
+ verify_cstruct(Cs2),
+ [{op, del_table_copy, ram_copies, Node, cs2list(Cs2)}|
+ remove_node_from_tabs(Rest, Node)]
+ end
+ end.
+
+new_cs(Cs, Node, ram_copies, add) ->
+ Cs#cstruct{ram_copies = opt_add(Node, Cs#cstruct.ram_copies)};
+new_cs(Cs, Node, disc_copies, add) ->
+ Cs#cstruct{disc_copies = opt_add(Node, Cs#cstruct.disc_copies)};
+new_cs(Cs, Node, disc_only_copies, add) ->
+ Cs#cstruct{disc_only_copies = opt_add(Node, Cs#cstruct.disc_only_copies)};
+new_cs(Cs, Node, ram_copies, del) ->
+ Cs#cstruct{ram_copies = lists:delete(Node , Cs#cstruct.ram_copies)};
+new_cs(Cs, Node, disc_copies, del) ->
+ Cs#cstruct{disc_copies = lists:delete(Node , Cs#cstruct.disc_copies)};
+new_cs(Cs, Node, disc_only_copies, del) ->
+ Cs#cstruct{disc_only_copies =
+ lists:delete(Node , Cs#cstruct.disc_only_copies)};
+new_cs(Cs, _Node, Storage, _Op) ->
+ mnesia:abort({badarg, Cs#cstruct.name, Storage}).
+
+
+opt_add(N, L) -> [N | lists:delete(N, L)].
+
+move_table(Tab, FromNode, ToNode) ->
+ schema_transaction(fun() -> do_move_table(Tab, FromNode, ToNode) end).
+
+do_move_table(schema, _FromNode, _ToNode) ->
+ mnesia:abort({bad_type, schema});
+do_move_table(Tab, FromNode, ToNode) when is_atom(FromNode), is_atom(ToNode) ->
+ TidTs = get_tid_ts_and_lock(schema, write),
+ insert_schema_ops(TidTs, make_move_table(Tab, FromNode, ToNode));
+do_move_table(Tab, FromNode, ToNode) ->
+ mnesia:abort({badarg, Tab, FromNode, ToNode}).
+
+make_move_table(Tab, FromNode, ToNode) ->
+ ensure_writable(schema),
+ Cs = incr_version(val({Tab, cstruct})),
+ Ns = mnesia_lib:cs_to_nodes(Cs),
+ verify(false, lists:member(ToNode, Ns), {already_exists, Tab, ToNode}),
+ verify(true, lists:member(FromNode, val({Tab, where_to_write})),
+ {not_active, Tab, FromNode}),
+ verify(false, val({Tab,local_content}),
+ {"Cannot move table with local content", Tab}),
+ ensure_active(Cs),
+ Running = val({current, db_nodes}),
+ Storage = mnesia_lib:schema_cs_to_storage_type(FromNode, Cs),
+ verify(true, lists:member(ToNode, Running), {not_active, schema, ToNode}),
+
+ Cs2 = new_cs(Cs, ToNode, Storage, add),
+ Cs3 = new_cs(Cs2, FromNode, Storage, del),
+ verify_cstruct(Cs3),
+ [{op, add_table_copy, Storage, ToNode, cs2list(Cs2)},
+ {op, sync_trans},
+ {op, del_table_copy, Storage, FromNode, cs2list(Cs3)}].
+
+%% end of functions to add and delete nodes to tables
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%
+
+change_table_copy_type(Tab, Node, ToS) ->
+ schema_transaction(fun() -> do_change_table_copy_type(Tab, Node, ToS) end).
+
+do_change_table_copy_type(Tab, Node, ToS) when is_atom(Node) ->
+ TidTs = get_tid_ts_and_lock(schema, write),
+ get_tid_ts_and_lock(Tab, write), % ensure global sync
+ %% get_tid_ts_and_lock(Tab, read),
+ insert_schema_ops(TidTs, make_change_table_copy_type(Tab, Node, ToS));
+do_change_table_copy_type(Tab, Node, _ToS) ->
+ mnesia:abort({badarg, Tab, Node}).
+
+make_change_table_copy_type(Tab, Node, unknown) ->
+ make_del_table_copy(Tab, Node);
+make_change_table_copy_type(Tab, Node, ToS) ->
+ ensure_writable(schema),
+ Cs = incr_version(val({Tab, cstruct})),
+ FromS = mnesia_lib:storage_type_at_node(Node, Tab),
+
+ case compare_storage_type(false, FromS, ToS) of
+ {same, _} ->
+ mnesia:abort({already_exists, Tab, Node, ToS});
+ {diff, _} ->
+ ignore;
+ incompatible ->
+ ensure_active(Cs)
+ end,
+
+ Cs2 = new_cs(Cs, Node, FromS, del),
+ Cs3 = new_cs(Cs2, Node, ToS, add),
+ verify_cstruct(Cs3),
+
+ [{op, change_table_copy_type, Node, FromS, ToS, cs2list(Cs3)}].
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% change index functions ....
+%% Pos is allready added by 1 in both of these functions
+
+add_table_index(Tab, Pos) ->
+ schema_transaction(fun() -> do_add_table_index(Tab, Pos) end).
+
+do_add_table_index(schema, _Attr) ->
+ mnesia:abort({bad_type, schema});
+do_add_table_index(Tab, Attr) ->
+ TidTs = get_tid_ts_and_lock(schema, write),
+ get_tid_ts_and_lock(Tab, read),
+ Pos = attr_tab_to_pos(Tab, Attr),
+ insert_schema_ops(TidTs, make_add_table_index(Tab, Pos)).
+
+make_add_table_index(Tab, Pos) ->
+ ensure_writable(schema),
+ Cs = incr_version(val({Tab, cstruct})),
+ ensure_active(Cs),
+ Ix = Cs#cstruct.index,
+ verify(false, lists:member(Pos, Ix), {already_exists, Tab, Pos}),
+ Ix2 = lists:sort([Pos | Ix]),
+ Cs2 = Cs#cstruct{index = Ix2},
+ verify_cstruct(Cs2),
+ [{op, add_index, Pos, cs2list(Cs2)}].
+
+del_table_index(Tab, Pos) ->
+ schema_transaction(fun() -> do_del_table_index(Tab, Pos) end).
+
+do_del_table_index(schema, _Attr) ->
+ mnesia:abort({bad_type, schema});
+do_del_table_index(Tab, Attr) ->
+ TidTs = get_tid_ts_and_lock(schema, write),
+ get_tid_ts_and_lock(Tab, read),
+ Pos = attr_tab_to_pos(Tab, Attr),
+ insert_schema_ops(TidTs, make_del_table_index(Tab, Pos)).
+
+make_del_table_index(Tab, Pos) ->
+ ensure_writable(schema),
+ Cs = incr_version(val({Tab, cstruct})),
+ ensure_active(Cs),
+ Ix = Cs#cstruct.index,
+ verify(true, lists:member(Pos, Ix), {no_exists, Tab, Pos}),
+ Cs2 = Cs#cstruct{index = lists:delete(Pos, Ix)},
+ verify_cstruct(Cs2),
+ [{op, del_index, Pos, cs2list(Cs2)}].
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+add_snmp(Tab, Ustruct) ->
+ schema_transaction(fun() -> do_add_snmp(Tab, Ustruct) end).
+
+do_add_snmp(schema, _Ustruct) ->
+ mnesia:abort({bad_type, schema});
+do_add_snmp(Tab, Ustruct) ->
+ TidTs = get_tid_ts_and_lock(schema, write),
+ get_tid_ts_and_lock(Tab, read),
+ insert_schema_ops(TidTs, make_add_snmp(Tab, Ustruct)).
+
+make_add_snmp(Tab, Ustruct) ->
+ ensure_writable(schema),
+ Cs = incr_version(val({Tab, cstruct})),
+ ensure_active(Cs),
+ verify([], Cs#cstruct.snmp, {already_exists, Tab, snmp}),
+ Error = {badarg, Tab, snmp, Ustruct},
+ verify(true, mnesia_snmp_hook:check_ustruct(Ustruct), Error),
+ Cs2 = Cs#cstruct{snmp = Ustruct},
+ verify_cstruct(Cs2),
+ [{op, add_snmp, Ustruct, cs2list(Cs2)}].
+
+del_snmp(Tab) ->
+ schema_transaction(fun() -> do_del_snmp(Tab) end).
+
+do_del_snmp(schema) ->
+ mnesia:abort({bad_type, schema});
+do_del_snmp(Tab) ->
+ TidTs = get_tid_ts_and_lock(schema, write),
+ get_tid_ts_and_lock(Tab, read),
+ insert_schema_ops(TidTs, make_del_snmp(Tab)).
+
+make_del_snmp(Tab) ->
+ ensure_writable(schema),
+ Cs = incr_version(val({Tab, cstruct})),
+ ensure_active(Cs),
+ Cs2 = Cs#cstruct{snmp = []},
+ verify_cstruct(Cs2),
+ [{op, del_snmp, cs2list(Cs2)}].
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%
+
+transform_table(Tab, Fun, NewAttrs, NewRecName)
+ when is_function(Fun), is_list(NewAttrs), is_atom(NewRecName) ->
+ schema_transaction(fun() -> do_transform_table(Tab, Fun, NewAttrs, NewRecName) end);
+
+transform_table(Tab, ignore, NewAttrs, NewRecName)
+ when is_list(NewAttrs), is_atom(NewRecName) ->
+ schema_transaction(fun() -> do_transform_table(Tab, ignore, NewAttrs, NewRecName) end);
+
+transform_table(Tab, Fun, NewAttrs, NewRecName) ->
+ {aborted,{bad_type, Tab, Fun, NewAttrs, NewRecName}}.
+
+do_transform_table(schema, _Fun, _NewAttrs, _NewRecName) ->
+ mnesia:abort({bad_type, schema});
+do_transform_table(Tab, Fun, NewAttrs, NewRecName) ->
+ TidTs = get_tid_ts_and_lock(schema, write),
+ get_tid_ts_and_lock(Tab, write),
+ insert_schema_ops(TidTs, make_transform(Tab, Fun, NewAttrs, NewRecName)).
+
+make_transform(Tab, Fun, NewAttrs, NewRecName) ->
+ ensure_writable(schema),
+ Cs = incr_version(val({Tab, cstruct})),
+ ensure_active(Cs),
+ ensure_writable(Tab),
+ case mnesia_lib:val({Tab, index}) of
+ [] ->
+ Cs2 = Cs#cstruct{attributes = NewAttrs, record_name = NewRecName},
+ verify_cstruct(Cs2),
+ [{op, transform, Fun, cs2list(Cs2)}];
+ PosList ->
+ DelIdx = fun(Pos, Ncs) ->
+ Ix = Ncs#cstruct.index,
+ Ncs1 = Ncs#cstruct{index = lists:delete(Pos, Ix)},
+ Op = {op, del_index, Pos, cs2list(Ncs1)},
+ {Op, Ncs1}
+ end,
+ AddIdx = fun(Pos, Ncs) ->
+ Ix = Ncs#cstruct.index,
+ Ix2 = lists:sort([Pos | Ix]),
+ Ncs1 = Ncs#cstruct{index = Ix2},
+ Op = {op, add_index, Pos, cs2list(Ncs1)},
+ {Op, Ncs1}
+ end,
+ {DelOps, Cs1} = lists:mapfoldl(DelIdx, Cs, PosList),
+ Cs2 = Cs1#cstruct{attributes = NewAttrs, record_name = NewRecName},
+ {AddOps, Cs3} = lists:mapfoldl(AddIdx, Cs2, PosList),
+ verify_cstruct(Cs3),
+ lists:flatten([DelOps, {op, transform, Fun, cs2list(Cs2)}, AddOps])
+ end.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%
+
+change_table_access_mode(Tab, Mode) ->
+ schema_transaction(fun() -> do_change_table_access_mode(Tab, Mode) end).
+
+do_change_table_access_mode(Tab, Mode) ->
+ {_Mod, Tid, Ts} = get_tid_ts_and_lock(schema, write),
+ Store = Ts#tidstore.store,
+ mnesia_locker:wlock_no_exist(Tid, Store, schema, val({schema, active_replicas})),
+ mnesia_locker:wlock_no_exist(Tid, Store, Tab, val({Tab, active_replicas})),
+ do_insert_schema_ops(Store, make_change_table_access_mode(Tab, Mode)).
+
+make_change_table_access_mode(Tab, Mode) ->
+ ensure_writable(schema),
+ Cs = incr_version(val({Tab, cstruct})),
+ ensure_active(Cs),
+ OldMode = Cs#cstruct.access_mode,
+ verify(false, OldMode == Mode, {already_exists, Tab, Mode}),
+ Cs2 = Cs#cstruct{access_mode = Mode},
+ verify_cstruct(Cs2),
+ [{op, change_table_access_mode, cs2list(Cs2), OldMode, Mode}].
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+change_table_load_order(Tab, LoadOrder) ->
+ schema_transaction(fun() -> do_change_table_load_order(Tab, LoadOrder) end).
+
+do_change_table_load_order(schema, _LoadOrder) ->
+ mnesia:abort({bad_type, schema});
+do_change_table_load_order(Tab, LoadOrder) ->
+ TidTs = get_tid_ts_and_lock(schema, write),
+ get_tid_ts_and_lock(Tab, none),
+ insert_schema_ops(TidTs, make_change_table_load_order(Tab, LoadOrder)).
+
+make_change_table_load_order(Tab, LoadOrder) ->
+ ensure_writable(schema),
+ Cs = incr_version(val({Tab, cstruct})),
+ ensure_active(Cs),
+ OldLoadOrder = Cs#cstruct.load_order,
+ Cs2 = Cs#cstruct{load_order = LoadOrder},
+ verify_cstruct(Cs2),
+ [{op, change_table_load_order, cs2list(Cs2), OldLoadOrder, LoadOrder}].
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+write_table_property(Tab, Prop) when is_tuple(Prop), size(Prop) >= 1 ->
+ schema_transaction(fun() -> do_write_table_property(Tab, Prop) end);
+write_table_property(Tab, Prop) ->
+ {aborted, {bad_type, Tab, Prop}}.
+do_write_table_property(Tab, Prop) ->
+ TidTs = get_tid_ts_and_lock(schema, write),
+ {_, _, Ts} = TidTs,
+ Store = Ts#tidstore.store,
+ case change_prop_in_existing_op(Tab, Prop, write_property, Store) of
+ true ->
+ dbg_out("change_prop_in_existing_op"
+ "(~p,~p,write_property,Store) -> true~n",
+ [Tab,Prop]),
+ %% we have merged the table prop into the create_table op
+ ok;
+ false ->
+ dbg_out("change_prop_in_existing_op"
+ "(~p,~p,write_property,Store) -> false~n",
+ [Tab,Prop]),
+ %% this must be an existing table
+ get_tid_ts_and_lock(Tab, none),
+ insert_schema_ops(TidTs, make_write_table_properties(Tab, [Prop]))
+ end.
+
+make_write_table_properties(Tab, Props) ->
+ ensure_writable(schema),
+ Cs = incr_version(val({Tab, cstruct})),
+ ensure_active(Cs),
+ make_write_table_properties(Tab, Props, Cs).
+
+make_write_table_properties(Tab, [Prop | Props], Cs) ->
+ OldProps = Cs#cstruct.user_properties,
+ PropKey = element(1, Prop),
+ DelProps = lists:keydelete(PropKey, 1, OldProps),
+ MergedProps = lists:merge(DelProps, [Prop]),
+ Cs2 = Cs#cstruct{user_properties = MergedProps},
+ verify_cstruct(Cs2),
+ [{op, write_property, cs2list(Cs2), Prop} |
+ make_write_table_properties(Tab, Props, Cs2)];
+make_write_table_properties(_Tab, [], _Cs) ->
+ [].
+
+change_prop_in_existing_op(Tab, Prop, How, Store) ->
+ Ops = ets:match_object(Store, '_'),
+ case update_existing_op(Ops, Tab, Prop, How, []) of
+ {true, Ops1} ->
+ ets:match_delete(Store, '_'),
+ [ets:insert(Store, Op) || Op <- Ops1],
+ true;
+ false ->
+ false
+ end.
+
+update_existing_op([{op, Op, L = [{name,Tab}|_], _OldProp}|Ops],
+ Tab, Prop, How, Acc) when Op == write_property;
+ Op == delete_property ->
+ %% Apparently, mnesia_dumper doesn't care about OldProp here -- just L,
+ %% so we will throw away OldProp (not that it matters...) and insert Prop.
+ %% as element 3.
+ L1 = insert_prop(Prop, L, How),
+ NewOp = {op, How, L1, Prop},
+ {true, lists:reverse(Acc) ++ [NewOp|Ops]};
+update_existing_op([Op = {op, create_table, L}|Ops], Tab, Prop, How, Acc) ->
+ case lists:keysearch(name, 1, L) of
+ {value, {_, Tab}} ->
+ %% Tab is being created here -- insert Prop into L
+ L1 = insert_prop(Prop, L, How),
+ {true, lists:reverse(Acc) ++ [{op, create_table, L1}|Ops]};
+ _ ->
+ update_existing_op(Ops, Tab, Prop, How, [Op|Acc])
+ end;
+update_existing_op([Op|Ops], Tab, Prop, How, Acc) ->
+ update_existing_op(Ops, Tab, Prop, How, [Op|Acc]);
+update_existing_op([], _, _, _, _) ->
+ false.
+
+do_read_table_property(Tab, Key) ->
+ TidTs = get_tid_ts_and_lock(schema, read),
+ {_, _, Ts} = TidTs,
+ Store = Ts#tidstore.store,
+ Props = ets:foldl(
+ fun({op, create_table, [{name, T}|Opts]}, _Acc)
+ when T==Tab ->
+ find_props(Opts);
+ ({op, Op, [{name,T}|Opts], _Prop}, _Acc)
+ when T==Tab, Op==write_property; Op==delete_property ->
+ find_props(Opts);
+ ({op, delete_table, [{name,T}|_]}, _Acc)
+ when T==Tab ->
+ [];
+ (_Other, Acc) ->
+ Acc
+ end, [], Store),
+ case lists:keysearch(Key, 1, Props) of
+ {value, Property} ->
+ Property;
+ false ->
+ undefined
+ end.
+
+
+%% perhaps a misnomer. How could also be delete_property... never mind.
+%% Returns the modified L.
+insert_prop(Prop, L, How) ->
+ Prev = find_props(L),
+ MergedProps = merge_with_previous(How, Prop, Prev),
+ replace_props(L, MergedProps).
+
+find_props([{user_properties, P}|_]) -> P;
+find_props([_H|T]) -> find_props(T).
+%% we shouldn't reach []
+
+replace_props([{user_properties, _}|T], P) -> [{user_properties, P}|T];
+replace_props([H|T], P) -> [H|replace_props(T, P)].
+%% again, we shouldn't reach []
+
+merge_with_previous(write_property, Prop, Prev) ->
+ Key = element(1, Prop),
+ Prev1 = lists:keydelete(Key, 1, Prev),
+ lists:sort([Prop|Prev1]);
+merge_with_previous(delete_property, PropKey, Prev) ->
+ lists:keydelete(PropKey, 1, Prev).
+
+delete_table_property(Tab, PropKey) ->
+ schema_transaction(fun() -> do_delete_table_property(Tab, PropKey) end).
+
+do_delete_table_property(Tab, PropKey) ->
+ TidTs = get_tid_ts_and_lock(schema, write),
+ {_, _, Ts} = TidTs,
+ Store = Ts#tidstore.store,
+ case change_prop_in_existing_op(Tab, PropKey, delete_property, Store) of
+ true ->
+ dbg_out("change_prop_in_existing_op"
+ "(~p,~p,delete_property,Store) -> true~n",
+ [Tab,PropKey]),
+ %% we have merged the table prop into the create_table op
+ ok;
+ false ->
+ dbg_out("change_prop_in_existing_op"
+ "(~p,~p,delete_property,Store) -> false~n",
+ [Tab,PropKey]),
+ %% this must be an existing table
+ get_tid_ts_and_lock(Tab, none),
+ insert_schema_ops(TidTs,
+ make_delete_table_properties(Tab, [PropKey]))
+ end.
+
+make_delete_table_properties(Tab, PropKeys) ->
+ ensure_writable(schema),
+ Cs = incr_version(val({Tab, cstruct})),
+ ensure_active(Cs),
+ make_delete_table_properties(Tab, PropKeys, Cs).
+
+make_delete_table_properties(Tab, [PropKey | PropKeys], Cs) ->
+ OldProps = Cs#cstruct.user_properties,
+ Props = lists:keydelete(PropKey, 1, OldProps),
+ Cs2 = Cs#cstruct{user_properties = Props},
+ verify_cstruct(Cs2),
+ [{op, delete_property, cs2list(Cs2), PropKey} |
+ make_delete_table_properties(Tab, PropKeys, Cs2)];
+make_delete_table_properties(_Tab, [], _Cs) ->
+ [].
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+%% Ensure that the transaction can be committed even
+%% if the node crashes and Mnesia is restarted
+prepare_commit(Tid, Commit, WaitFor) ->
+ case Commit#commit.schema_ops of
+ [] ->
+ {false, Commit, optional};
+ OrigOps ->
+ {Modified, Ops, DumperMode} =
+ prepare_ops(Tid, OrigOps, WaitFor, false, [], optional),
+ InitBy = schema_prepare,
+ GoodRes = {Modified,
+ Commit#commit{schema_ops = lists:reverse(Ops)},
+ DumperMode},
+ case DumperMode of
+ optional ->
+ dbg_out("Transaction log dump skipped (~p): ~w~n",
+ [DumperMode, InitBy]);
+ mandatory ->
+ case mnesia_controller:sync_dump_log(InitBy) of
+ dumped ->
+ GoodRes;
+ {error, Reason} ->
+ mnesia:abort(Reason)
+ end
+ end,
+ case Ops of
+ [] ->
+ ignore;
+ _ ->
+ %% We need to grab a dumper lock here, the log may not
+ %% be dumped by others, during the schema commit phase.
+ mnesia_controller:wait_for_schema_commit_lock()
+ end,
+ GoodRes
+ end.
+
+prepare_ops(Tid, [Op | Ops], WaitFor, Changed, Acc, DumperMode) ->
+ case prepare_op(Tid, Op, WaitFor) of
+ {true, mandatory} ->
+ prepare_ops(Tid, Ops, WaitFor, Changed, [Op | Acc], mandatory);
+ {true, optional} ->
+ prepare_ops(Tid, Ops, WaitFor, Changed, [Op | Acc], DumperMode);
+ {true, Ops2, mandatory} ->
+ prepare_ops(Tid, Ops, WaitFor, true, Ops2 ++ Acc, mandatory);
+ {true, Ops2, optional} ->
+ prepare_ops(Tid, Ops, WaitFor, true, Ops2 ++ Acc, DumperMode);
+ {false, optional} ->
+ prepare_ops(Tid, Ops, WaitFor, true, Acc, DumperMode)
+ end;
+prepare_ops(_Tid, [], _WaitFor, Changed, Acc, DumperMode) ->
+ {Changed, Acc, DumperMode}.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Prepare for commit
+%% returns true if Op should be included, i.e. unmodified
+%% {true, Operation} if NewRecs should be included, i.e. modified
+%% false if Op should NOT be included, i.e. modified
+%%
+prepare_op(_Tid, {op, rec, unknown, Rec}, _WaitFor) ->
+ {{Tab, Key}, Items, _Op} = Rec,
+ case val({Tab, storage_type}) of
+ unknown ->
+ {false, optional};
+ Storage ->
+ mnesia_tm:prepare_snmp(Tab, Key, Items), % May exit
+ {true, [{op, rec, Storage, Rec}], optional}
+ end;
+
+prepare_op(_Tid, {op, announce_im_running, Node, SchemaDef, Running, RemoteRunning}, _WaitFor) ->
+ SchemaCs = list2cs(SchemaDef),
+ if
+ Node == node() -> %% Announce has already run on local node
+ ignore; %% from do_merge_schema
+ true ->
+ NewNodes = mnesia_lib:uniq(Running++RemoteRunning) -- val({current,db_nodes}),
+ mnesia_lib:set(prepare_op, {announce_im_running,NewNodes}),
+ announce_im_running(NewNodes, SchemaCs)
+ end,
+ {false, optional};
+
+prepare_op(_Tid, {op, sync_trans}, {part, CoordPid}) ->
+ CoordPid ! {sync_trans, self()},
+ receive
+ {sync_trans, CoordPid} ->
+ {false, optional};
+ {mnesia_down, _Node} = Else ->
+ mnesia_lib:verbose("sync_op terminated due to ~p~n", [Else]),
+ mnesia:abort(Else);
+ {'EXIT', _, _} = Else ->
+ mnesia_lib:verbose("sync_op terminated due to ~p~n", [Else]),
+ mnesia:abort(Else)
+ end;
+
+prepare_op(_Tid, {op, sync_trans}, {coord, Nodes}) ->
+ case receive_sync(Nodes, []) of
+ {abort, Reason} ->
+ mnesia_lib:verbose("sync_op terminated due to ~p~n", [Reason]),
+ mnesia:abort(Reason);
+ Pids ->
+ [Pid ! {sync_trans, self()} || Pid <- Pids],
+ {false, optional}
+ end;
+prepare_op(Tid, {op, create_table, TabDef}, _WaitFor) ->
+ Cs = list2cs(TabDef),
+ Storage = mnesia_lib:cs_to_storage_type(node(), Cs),
+ UseDir = mnesia_monitor:use_dir(),
+ Tab = Cs#cstruct.name,
+ case Storage of
+ disc_copies when UseDir == false ->
+ UseDirReason = {bad_type, Tab, Storage, node()},
+ mnesia:abort(UseDirReason);
+ disc_only_copies when UseDir == false ->
+ UseDirReason = {bad_type, Tab, Storage, node()},
+ mnesia:abort(UseDirReason);
+ ram_copies ->
+ mnesia_lib:set({Tab, create_table},true),
+ create_ram_table(Tab, Cs#cstruct.type),
+ insert_cstruct(Tid, Cs, false),
+ {true, optional};
+ disc_copies ->
+ mnesia_lib:set({Tab, create_table},true),
+ create_ram_table(Tab, Cs#cstruct.type),
+ create_disc_table(Tab),
+ insert_cstruct(Tid, Cs, false),
+ {true, optional};
+ disc_only_copies ->
+ mnesia_lib:set({Tab, create_table},true),
+ create_disc_only_table(Tab,Cs#cstruct.type),
+ insert_cstruct(Tid, Cs, false),
+ {true, optional};
+ unknown -> %% No replica on this node
+ mnesia_lib:set({Tab, create_table},true),
+ insert_cstruct(Tid, Cs, false),
+ {true, optional}
+ end;
+
+prepare_op(Tid, {op, add_table_copy, Storage, Node, TabDef}, _WaitFor) ->
+ Cs = list2cs(TabDef),
+ Tab = Cs#cstruct.name,
+
+ if
+ Tab == schema ->
+ {true, optional};
+
+ Node == node() ->
+ case mnesia_lib:val({schema, storage_type}) of
+ ram_copies when Storage /= ram_copies ->
+ Error = {combine_error, Tab, "has no disc", Node},
+ mnesia:abort(Error);
+ _ ->
+ ok
+ end,
+ %% Tables are created by mnesia_loader get_network code
+ insert_cstruct(Tid, Cs, true),
+ case mnesia_controller:get_network_copy(Tab, Cs) of
+ {loaded, ok} ->
+ {true, optional};
+ {not_loaded, ErrReason} ->
+ Reason = {system_limit, Tab, {Node, ErrReason}},
+ mnesia:abort(Reason)
+ end;
+ Node /= node() ->
+ %% Verify that ram table not has been dumped to disc
+ if
+ Storage /= ram_copies ->
+ case mnesia_lib:schema_cs_to_storage_type(node(), Cs) of
+ ram_copies ->
+ Dat = mnesia_lib:tab2dcd(Tab),
+ case mnesia_lib:exists(Dat) of
+ true ->
+ mnesia:abort({combine_error, Tab, Storage,
+ "Table dumped to disc", node()});
+ false ->
+ ok
+ end;
+ _ ->
+ ok
+ end;
+ true ->
+ ok
+ end,
+ insert_cstruct(Tid, Cs, true),
+ {true, optional}
+ end;
+
+prepare_op(Tid, {op, del_table_copy, _Storage, Node, TabDef}, _WaitFor) ->
+ Cs = list2cs(TabDef),
+ Tab = Cs#cstruct.name,
+
+ if
+ %% Schema table lock is always required to run a schema op.
+ %% No need to look it.
+ node(Tid#tid.pid) == node(), Tab /= schema ->
+ Self = self(),
+ Pid = spawn_link(fun() -> lock_del_table(Tab, Node, Cs, Self) end),
+ put(mnesia_lock, Pid),
+ receive
+ {Pid, updated} ->
+ {true, optional};
+ {Pid, FailReason} ->
+ mnesia:abort(FailReason);
+ {'EXIT', Pid, Reason} ->
+ mnesia:abort(Reason)
+ end;
+ true ->
+ {true, optional}
+ end;
+
+prepare_op(_Tid, {op, change_table_copy_type, N, FromS, ToS, TabDef}, _WaitFor)
+ when N == node() ->
+ Cs = list2cs(TabDef),
+ Tab = Cs#cstruct.name,
+
+ NotActive = mnesia_lib:not_active_here(Tab),
+
+ if
+ NotActive == true ->
+ mnesia:abort({not_active, Tab, node()});
+
+ Tab == schema ->
+ case {FromS, ToS} of
+ {ram_copies, disc_copies} ->
+ case mnesia:system_info(schema_location) of
+ opt_disc ->
+ ignore;
+ _ ->
+ mnesia:abort({combine_error, Tab, node(),
+ "schema_location must be opt_disc"})
+ end,
+ Dir = mnesia_lib:dir(),
+ case opt_create_dir(true, Dir) of
+ ok ->
+ purge_dir(Dir, []),
+ mnesia_log:purge_all_logs(),
+ set(use_dir, true),
+ mnesia_log:init(),
+ Ns = val({current, db_nodes}), %mnesia_lib:running_nodes(),
+ F = fun(U) -> mnesia_recover:log_mnesia_up(U) end,
+ lists:foreach(F, Ns),
+
+ mnesia_dumper:raw_named_dump_table(Tab, dmp),
+ mnesia_checkpoint:tm_change_table_copy_type(Tab, FromS, ToS);
+ {error, Reason} ->
+ mnesia:abort(Reason)
+ end;
+ {disc_copies, ram_copies} ->
+ Ltabs = val({schema, local_tables}) -- [schema],
+ Dtabs = [L || L <- Ltabs,
+ val({L, storage_type}) /= ram_copies],
+ verify([], Dtabs, {"Disc resident tables", Dtabs, N});
+ _ ->
+ mnesia:abort({combine_error, Tab, ToS})
+ end;
+
+ FromS == ram_copies ->
+ case mnesia_monitor:use_dir() of
+ true ->
+ Dat = mnesia_lib:tab2dcd(Tab),
+ case mnesia_lib:exists(Dat) of
+ true ->
+ mnesia:abort({combine_error, Tab, node(),
+ "Table dump exists"});
+ false ->
+ case ToS of
+ disc_copies ->
+ mnesia_log:ets2dcd(Tab, dmp);
+ disc_only_copies ->
+ mnesia_dumper:raw_named_dump_table(Tab, dmp)
+ end,
+ mnesia_checkpoint:tm_change_table_copy_type(Tab, FromS, ToS)
+ end;
+ false ->
+ mnesia:abort({has_no_disc, node()})
+ end;
+
+ FromS == disc_copies, ToS == disc_only_copies ->
+ mnesia_dumper:raw_named_dump_table(Tab, dmp);
+ FromS == disc_only_copies ->
+ Type = Cs#cstruct.type,
+ create_ram_table(Tab, Type),
+ Datname = mnesia_lib:tab2dat(Tab),
+ Repair = mnesia_monitor:get_env(auto_repair),
+ case mnesia_lib:dets_to_ets(Tab, Tab, Datname, Type, Repair, no) of
+ loaded -> ok;
+ Reason ->
+ Err = "Failed to copy disc data to ram",
+ mnesia:abort({system_limit, Tab, {Err,Reason}})
+ end;
+ true ->
+ ignore
+ end,
+ {true, mandatory};
+
+prepare_op(_Tid, {op, change_table_copy_type, N, _FromS, _ToS, _TabDef}, _WaitFor)
+ when N /= node() ->
+ {true, mandatory};
+
+prepare_op(_Tid, {op, delete_table, _TabDef}, _WaitFor) ->
+ {true, mandatory};
+
+prepare_op(_Tid, {op, dump_table, unknown, TabDef}, _WaitFor) ->
+ Cs = list2cs(TabDef),
+ Tab = Cs#cstruct.name,
+ case lists:member(node(), Cs#cstruct.ram_copies) of
+ true ->
+ case mnesia_monitor:use_dir() of
+ true ->
+ mnesia_log:ets2dcd(Tab, dmp),
+ Size = mnesia:table_info(Tab, size),
+ {true, [{op, dump_table, Size, TabDef}], optional};
+ false ->
+ mnesia:abort({has_no_disc, node()})
+ end;
+ false ->
+ {false, optional}
+ end;
+
+prepare_op(_Tid, {op, add_snmp, Ustruct, TabDef}, _WaitFor) ->
+ Cs = list2cs(TabDef),
+ case mnesia_lib:cs_to_storage_type(node(), Cs) of
+ unknown ->
+ {true, optional};
+ Storage ->
+ Tab = Cs#cstruct.name,
+ Stab = mnesia_snmp_hook:create_table(Ustruct, Tab, Storage),
+ mnesia_lib:set({Tab, {index, snmp}}, Stab),
+ {true, optional}
+ end;
+
+prepare_op(_Tid, {op, transform, ignore, _TabDef}, _WaitFor) ->
+ {true, mandatory}; %% Apply schema changes only.
+prepare_op(_Tid, {op, transform, Fun, TabDef}, _WaitFor) ->
+ Cs = list2cs(TabDef),
+ case mnesia_lib:cs_to_storage_type(node(), Cs) of
+ unknown ->
+ {true, mandatory};
+ Storage ->
+ Tab = Cs#cstruct.name,
+ RecName = Cs#cstruct.record_name,
+ Type = Cs#cstruct.type,
+ NewArity = length(Cs#cstruct.attributes) + 1,
+ mnesia_lib:db_fixtable(Storage, Tab, true),
+ Key = mnesia_lib:db_first(Tab),
+ Op = {op, transform, Fun, TabDef},
+ case catch transform_objs(Fun, Tab, RecName,
+ Key, NewArity, Storage, Type, [Op]) of
+ {'EXIT', Reason} ->
+ mnesia_lib:db_fixtable(Storage, Tab, false),
+ exit({"Bad transform function", Tab, Fun, node(), Reason});
+ Objs ->
+ mnesia_lib:db_fixtable(Storage, Tab, false),
+ {true, Objs, mandatory}
+ end
+ end;
+
+prepare_op(_Tid, {op, merge_schema, TabDef}, _WaitFor) ->
+ Cs = list2cs(TabDef),
+ case verify_merge(Cs) of
+ ok ->
+ {true, optional};
+ Error ->
+ verbose("Merge_Schema ~p failed on ~p: ~p~n", [_Tid,node(),Error]),
+ mnesia:abort({bad_commit, Error})
+ end;
+prepare_op(_Tid, _Op, _WaitFor) ->
+ {true, optional}.
+
+create_ram_table(Tab, Type) ->
+ Args = [{keypos, 2}, public, named_table, Type],
+ case mnesia_monitor:unsafe_mktab(Tab, Args) of
+ Tab ->
+ ok;
+ {error,Reason} ->
+ Err = "Failed to create ets table",
+ mnesia:abort({system_limit, Tab, {Err,Reason}})
+ end.
+create_disc_table(Tab) ->
+ File = mnesia_lib:tab2dcd(Tab),
+ file:delete(File),
+ FArg = [{file, File}, {name, {mnesia,create}},
+ {repair, false}, {mode, read_write}],
+ case mnesia_monitor:open_log(FArg) of
+ {ok,Log} ->
+ mnesia_monitor:unsafe_close_log(Log),
+ ok;
+ {error,Reason} ->
+ Err = "Failed to create disc table",
+ mnesia:abort({system_limit, Tab, {Err,Reason}})
+ end.
+create_disc_only_table(Tab,Type) ->
+ File = mnesia_lib:tab2dat(Tab),
+ file:delete(File),
+ Args = [{file, mnesia_lib:tab2dat(Tab)},
+ {type, mnesia_lib:disk_type(Tab, Type)},
+ {keypos, 2},
+ {repair, mnesia_monitor:get_env(auto_repair)}],
+ case mnesia_monitor:unsafe_open_dets(Tab, Args) of
+ {ok, _} ->
+ ok;
+ {error,Reason} ->
+ Err = "Failed to create disc table",
+ mnesia:abort({system_limit, Tab, {Err,Reason}})
+ end.
+
+
+receive_sync([], Pids) ->
+ Pids;
+receive_sync(Nodes, Pids) ->
+ receive
+ {sync_trans, Pid} ->
+ Node = node(Pid),
+ receive_sync(lists:delete(Node, Nodes), [Pid | Pids]);
+ Else ->
+ {abort, Else}
+ end.
+
+lock_del_table(Tab, Node, Cs, Father) ->
+ Ns = val({schema, active_replicas}),
+ process_flag(trap_exit,true),
+ Lock = fun() ->
+ mnesia:write_lock_table(Tab),
+ {Res, []} = rpc:multicall(Ns, ?MODULE, set_where_to_read, [Tab, Node, Cs]),
+ Filter = fun(ok) ->
+ false;
+ ({badrpc, {'EXIT', {undef, _}}}) ->
+ %% This will be the case we talks with elder nodes
+ %% than 3.8.2, they will set where_to_read without
+ %% getting a lock.
+ false;
+ (_) ->
+ true
+ end,
+ case lists:filter(Filter, Res) of
+ [] ->
+ Father ! {self(), updated},
+ %% When transaction is commited the process dies
+ %% and the lock is released.
+ receive _ -> ok end;
+ Err ->
+ Father ! {self(), {bad_commit, Err}}
+ end,
+ ok
+ end,
+ case mnesia:transaction(Lock) of
+ {atomic, ok} -> ok;
+ {aborted, R} -> Father ! {self(), R}
+ end,
+ unlink(Father),
+ unlink(whereis(mnesia_tm)),
+ exit(normal).
+
+set_where_to_read(Tab, Node, Cs) ->
+ case mnesia_lib:val({Tab, where_to_read}) of
+ Node ->
+ case Cs#cstruct.local_content of
+ true ->
+ ok;
+ false ->
+ mnesia_lib:set_remote_where_to_read(Tab, [Node]),
+ ok
+ end;
+ _ ->
+ ok
+ end.
+
+%% Build up the list in reverse order.
+transform_objs(_Fun, _Tab, _RT, '$end_of_table', _NewArity, _Storage, _Type, Acc) ->
+ Acc;
+transform_objs(Fun, Tab, RecName, Key, A, Storage, Type, Acc) ->
+ Objs = mnesia_lib:db_get(Tab, Key),
+ NextKey = mnesia_lib:db_next_key(Tab, Key),
+ Oid = {Tab, Key},
+ NewObjs = {Ws, Ds} = transform_obj(Tab, RecName, Key, Fun, Objs, A, Type, [], []),
+ if
+ NewObjs == {[], []} ->
+ transform_objs(Fun, Tab, RecName, NextKey, A, Storage, Type, Acc);
+ Type == bag ->
+ transform_objs(Fun, Tab, RecName, NextKey, A, Storage, Type,
+ [{op, rec, Storage, {Oid, Ws, write}},
+ {op, rec, Storage, {Oid, [Oid], delete}} | Acc]);
+ Ds == [] ->
+ %% Type is set or ordered_set, no need to delete the record first
+ transform_objs(Fun, Tab, RecName, NextKey, A, Storage, Type,
+ [{op, rec, Storage, {Oid, Ws, write}} | Acc]);
+ Ws == [] ->
+ transform_objs(Fun, Tab, RecName, NextKey, A, Storage, Type,
+ [{op, rec, Storage, {Oid, Ds, write}} | Acc]);
+ true ->
+ transform_objs(Fun, Tab, RecName, NextKey, A, Storage, Type,
+ [{op, rec, Storage, {Oid, Ws, write}},
+ {op, rec, Storage, {Oid, Ds, delete}} | Acc])
+ end.
+
+transform_obj(Tab, RecName, Key, Fun, [Obj|Rest], NewArity, Type, Ws, Ds) ->
+ NewObj = Fun(Obj),
+ if
+ size(NewObj) /= NewArity ->
+ exit({"Bad arity", Obj, NewObj});
+ NewObj == Obj ->
+ transform_obj(Tab, RecName, Key, Fun, Rest, NewArity, Type, Ws, Ds);
+ RecName == element(1, NewObj), Key == element(2, NewObj) ->
+ transform_obj(Tab, RecName, Key, Fun, Rest, NewArity,
+ Type, [NewObj | Ws], Ds);
+ NewObj == delete ->
+ case Type of
+ bag -> %% Just don't write that object
+ transform_obj(Tab, RecName, Key, Fun, Rest,
+ NewArity, Type, Ws, Ds);
+ _ ->
+ transform_obj(Tab, RecName, Key, Fun, Rest, NewArity,
+ Type, Ws, [NewObj | Ds])
+ end;
+ true ->
+ exit({"Bad key or Record Name", Obj, NewObj})
+ end;
+transform_obj(_Tab, _RecName, _Key, _Fun, [], _NewArity, _Type, Ws, Ds) ->
+ {lists:reverse(Ws), lists:reverse(Ds)}.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Undo prepare of commit
+undo_prepare_commit(Tid, Commit) ->
+ case Commit#commit.schema_ops of
+ [] ->
+ ignore;
+ Ops ->
+ %% Catch to allow failure mnesia_controller may not be started
+ catch mnesia_controller:release_schema_commit_lock(),
+ undo_prepare_ops(Tid, Ops)
+ end,
+ Commit.
+
+%% Undo in reverse order
+undo_prepare_ops(Tid, [Op | Ops]) ->
+ case element(1, Op) of
+ TheOp when TheOp /= op, TheOp /= restore_op ->
+ undo_prepare_ops(Tid, Ops);
+ _ ->
+ undo_prepare_ops(Tid, Ops),
+ undo_prepare_op(Tid, Op)
+ end;
+undo_prepare_ops(_Tid, []) ->
+ [].
+
+undo_prepare_op(_Tid, {op, announce_im_running, _Node, _, _Running, _RemoteRunning}) ->
+ case ?catch_val(prepare_op) of
+ {announce_im_running, New} ->
+ unannounce_im_running(New);
+ _Else ->
+ ok
+ end;
+
+undo_prepare_op(_Tid, {op, sync_trans}) ->
+ ok;
+
+undo_prepare_op(Tid, {op, create_table, TabDef}) ->
+ Cs = list2cs(TabDef),
+ Tab = Cs#cstruct.name,
+ mnesia_lib:unset({Tab, create_table}),
+ delete_cstruct(Tid, Cs),
+ case mnesia_lib:cs_to_storage_type(node(), Cs) of
+ unknown ->
+ ok;
+ ram_copies ->
+ ram_delete_table(Tab, ram_copies);
+ disc_copies ->
+ ram_delete_table(Tab, disc_copies),
+ DcdFile = mnesia_lib:tab2dcd(Tab),
+ %% disc_delete_table(Tab, Storage),
+ file:delete(DcdFile);
+ disc_only_copies ->
+ mnesia_monitor:unsafe_close_dets(Tab),
+ Dat = mnesia_lib:tab2dat(Tab),
+ %% disc_delete_table(Tab, Storage),
+ file:delete(Dat)
+ end;
+
+undo_prepare_op(Tid, {op, add_table_copy, Storage, Node, TabDef}) ->
+ Cs = list2cs(TabDef),
+ Tab = Cs#cstruct.name,
+ if
+ Tab == schema ->
+ true; % Nothing to prepare
+ Node == node() ->
+ mnesia_checkpoint:tm_del_copy(Tab, Node),
+ mnesia_controller:unannounce_add_table_copy(Tab, Node),
+ if
+ Storage == disc_only_copies; Tab == schema ->
+ mnesia_monitor:close_dets(Tab),
+ file:delete(mnesia_lib:tab2dat(Tab));
+ true ->
+ file:delete(mnesia_lib:tab2dcd(Tab))
+ end,
+ ram_delete_table(Tab, Storage),
+ Cs2 = new_cs(Cs, Node, Storage, del),
+ insert_cstruct(Tid, Cs2, true); % Don't care about the version
+ Node /= node() ->
+ mnesia_controller:unannounce_add_table_copy(Tab, Node),
+ Cs2 = new_cs(Cs, Node, Storage, del),
+ insert_cstruct(Tid, Cs2, true) % Don't care about the version
+ end;
+
+undo_prepare_op(_Tid, {op, del_table_copy, _, Node, TabDef})
+ when Node == node() ->
+ Cs = list2cs(TabDef),
+ Tab = Cs#cstruct.name,
+ mnesia_lib:set({Tab, where_to_read}, Node);
+
+
+undo_prepare_op(_Tid, {op, change_table_copy_type, N, FromS, ToS, TabDef})
+ when N == node() ->
+ Cs = list2cs(TabDef),
+ Tab = Cs#cstruct.name,
+ mnesia_checkpoint:tm_change_table_copy_type(Tab, ToS, FromS),
+ Dmp = mnesia_lib:tab2dmp(Tab),
+
+ case {FromS, ToS} of
+ {ram_copies, disc_copies} when Tab == schema ->
+ file:delete(Dmp),
+ mnesia_log:purge_some_logs(),
+ set(use_dir, false);
+ {ram_copies, disc_copies} ->
+ file:delete(Dmp);
+ {ram_copies, disc_only_copies} ->
+ file:delete(Dmp);
+ {disc_only_copies, _} ->
+ ram_delete_table(Tab, ram_copies);
+ _ ->
+ ignore
+ end;
+
+undo_prepare_op(_Tid, {op, dump_table, _Size, TabDef}) ->
+ Cs = list2cs(TabDef),
+ case lists:member(node(), Cs#cstruct.ram_copies) of
+ true ->
+ Tab = Cs#cstruct.name,
+ Dmp = mnesia_lib:tab2dmp(Tab),
+ file:delete(Dmp);
+ false ->
+ ignore
+ end;
+
+undo_prepare_op(_Tid, {op, add_snmp, _Ustruct, TabDef}) ->
+ Cs = list2cs(TabDef),
+ case mnesia_lib:cs_to_storage_type(node(), Cs) of
+ unknown ->
+ true;
+ _Storage ->
+ Tab = Cs#cstruct.name,
+ case ?catch_val({Tab, {index, snmp}}) of
+ {'EXIT',_} ->
+ ignore;
+ Stab ->
+ mnesia_snmp_hook:delete_table(Tab, Stab),
+ mnesia_lib:unset({Tab, {index, snmp}})
+ end
+ end;
+
+undo_prepare_op(_Tid, _Op) ->
+ ignore.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+ram_delete_table(Tab, Storage) ->
+ case Storage of
+ unknown ->
+ ignore;
+ disc_only_copies ->
+ ignore;
+ _Else ->
+ %% delete possible index files and data .....
+ %% Got to catch this since if no info has been set in the
+ %% mnesia_gvar it will crash
+ catch mnesia_index:del_transient(Tab, Storage),
+ case ?catch_val({Tab, {index, snmp}}) of
+ {'EXIT', _} ->
+ ignore;
+ Etab ->
+ catch mnesia_snmp_hook:delete_table(Tab, Etab)
+ end,
+ catch ?ets_delete_table(Tab)
+ end.
+
+purge_dir(Dir, KeepFiles) ->
+ Suffixes = known_suffixes(),
+ purge_dir(Dir, KeepFiles, Suffixes).
+
+purge_dir(Dir, KeepFiles, Suffixes) ->
+ case dir_exists(Dir) of
+ true ->
+ {ok, AllFiles} = file:list_dir(Dir),
+ purge_known_files(AllFiles, KeepFiles, Dir, Suffixes);
+ false ->
+ ok
+ end.
+
+purge_tmp_files() ->
+ case mnesia_monitor:use_dir() of
+ true ->
+ Dir = mnesia_lib:dir(),
+ KeepFiles = [],
+ Exists = mnesia_lib:exists(mnesia_lib:tab2dat(schema)),
+ case Exists of
+ true ->
+ Suffixes = tmp_suffixes(),
+ purge_dir(Dir, KeepFiles, Suffixes);
+ false ->
+ %% Interrupted change of storage type
+ %% for schema table
+ Suffixes = known_suffixes(),
+ purge_dir(Dir, KeepFiles, Suffixes),
+ mnesia_lib:set(use_dir, false)
+ end;
+
+ false ->
+ ok
+ end.
+
+purge_known_files([File | Tail], KeepFiles, Dir, Suffixes) ->
+ case lists:member(File, KeepFiles) of
+ true ->
+ ignore;
+ false ->
+ case has_known_suffix(File, Suffixes, false) of
+ false ->
+ ignore;
+ true ->
+ AbsFile = filename:join([Dir, File]),
+ file:delete(AbsFile)
+ end
+ end,
+ purge_known_files(Tail, KeepFiles, Dir, Suffixes);
+purge_known_files([], _KeepFiles, _Dir, _Suffixes) ->
+ ok.
+
+has_known_suffix(_File, _Suffixes, true) ->
+ true;
+has_known_suffix(File, [Suffix | Tail], false) ->
+ has_known_suffix(File, Tail, lists:suffix(Suffix, File));
+has_known_suffix(_File, [], Bool) ->
+ Bool.
+
+known_suffixes() -> real_suffixes() ++ tmp_suffixes().
+
+real_suffixes() -> [".DAT", ".LOG", ".BUP", ".DCL", ".DCD"].
+
+tmp_suffixes() -> [".TMP", ".BUPTMP", ".RET", ".DMP"].
+
+info() ->
+ Tabs = lists:sort(val({schema, tables})),
+ lists:foreach(fun(T) -> info(T) end, Tabs),
+ ok.
+
+info(Tab) ->
+ Props = get_table_properties(Tab),
+ io:format("-- Properties for ~w table --- ~n",[Tab]),
+ info2(Tab, Props).
+info2(Tab, [{cstruct, _V} | Tail]) -> % Ignore cstruct
+ info2(Tab, Tail);
+info2(Tab, [{frag_hash, _V} | Tail]) -> % Ignore frag_hash
+ info2(Tab, Tail);
+info2(Tab, [{P, V} | Tail]) ->
+ io:format("~-20w -> ~p~n",[P,V]),
+ info2(Tab, Tail);
+info2(_, []) ->
+ io:format("~n", []).
+
+get_table_properties(Tab) ->
+ case catch mnesia_lib:db_match_object(ram_copies,
+ mnesia_gvar, {{Tab, '_'}, '_'}) of
+ {'EXIT', _} ->
+ mnesia:abort({no_exists, Tab, all});
+ RawGvar ->
+ case [{Item, Val} || {{_Tab, Item}, Val} <- RawGvar] of
+ [] ->
+ [];
+ Gvar ->
+ Size = {size, mnesia:table_info(Tab, size)},
+ Memory = {memory, mnesia:table_info(Tab, memory)},
+ Master = {master_nodes, mnesia:table_info(Tab, master_nodes)},
+ lists:sort([Size, Memory, Master | Gvar])
+ end
+ end.
+
+%%%%%%%%%%% RESTORE %%%%%%%%%%%
+
+-record(r, {iter = schema,
+ module,
+ table_options = [],
+ default_op = clear_tables,
+ tables = [],
+ opaque,
+ insert_op = error_fun,
+ recs = error_recs
+ }).
+
+restore(Opaque) ->
+ restore(Opaque, [], mnesia_monitor:get_env(backup_module)).
+restore(Opaque, Args) when is_list(Args) ->
+ restore(Opaque, Args, mnesia_monitor:get_env(backup_module));
+restore(_Opaque, BadArg) ->
+ {aborted, {badarg, BadArg}}.
+restore(Opaque, Args, Module) when is_list(Args), is_atom(Module) ->
+ InitR = #r{opaque = Opaque, module = Module},
+ case catch lists:foldl(fun check_restore_arg/2, InitR, Args) of
+ R when is_record(R, r) ->
+ case mnesia_bup:read_schema(R#r.module, Opaque) of
+ {error, Reason} ->
+ {aborted, Reason};
+ BupSchema ->
+ schema_transaction(fun() -> do_restore(R, BupSchema) end)
+ end;
+ {'EXIT', Reason} ->
+ {aborted, Reason}
+ end;
+restore(_Opaque, Args, Module) ->
+ {aborted, {badarg, Args, Module}}.
+
+check_restore_arg({module, Mod}, R) when is_atom(Mod) ->
+ R#r{module = Mod};
+
+check_restore_arg({clear_tables, List}, R) when is_list(List) ->
+ case lists:member(schema, List) of
+ false ->
+ TableList = [{Tab, clear_tables} || Tab <- List],
+ R#r{table_options = R#r.table_options ++ TableList};
+ true ->
+ exit({badarg, {clear_tables, schema}})
+ end;
+check_restore_arg({recreate_tables, List}, R) when is_list(List) ->
+ case lists:member(schema, List) of
+ false ->
+ TableList = [{Tab, recreate_tables} || Tab <- List],
+ R#r{table_options = R#r.table_options ++ TableList};
+ true ->
+ exit({badarg, {recreate_tables, schema}})
+ end;
+check_restore_arg({keep_tables, List}, R) when is_list(List) ->
+ TableList = [{Tab, keep_tables} || Tab <- List],
+ R#r{table_options = R#r.table_options ++ TableList};
+check_restore_arg({skip_tables, List}, R) when is_list(List) ->
+ TableList = [{Tab, skip_tables} || Tab <- List],
+ R#r{table_options = R#r.table_options ++ TableList};
+check_restore_arg({default_op, Op}, R) ->
+ case Op of
+ clear_tables -> ok;
+ recreate_tables -> ok;
+ keep_tables -> ok;
+ skip_tables -> ok;
+ Else ->
+ exit({badarg, {bad_default_op, Else}})
+ end,
+ R#r{default_op = Op};
+
+check_restore_arg(BadArg,_) ->
+ exit({badarg, BadArg}).
+
+do_restore(R, BupSchema) ->
+ TidTs = get_tid_ts_and_lock(schema, write),
+ R2 = restore_schema(BupSchema, R),
+ insert_schema_ops(TidTs, [{restore_op, R2}]),
+ [element(1, TabStruct) || TabStruct <- R2#r.tables].
+
+arrange_restore(R, Fun, Recs) ->
+ R2 = R#r{insert_op = Fun, recs = Recs},
+ case mnesia_bup:iterate(R#r.module, fun restore_items/4, R#r.opaque, R2) of
+ {ok, R3} -> R3#r.recs;
+ {error, Reason} -> mnesia:abort(Reason)
+ end.
+
+restore_items([Rec | Recs], Header, Schema, R) ->
+ Tab = element(1, Rec),
+ case lists:keysearch(Tab, 1, R#r.tables) of
+ {value, {Tab, Where0, Snmp, RecName}} ->
+ Where = case Where0 of
+ undefined ->
+ val({Tab, where_to_commit});
+ _ ->
+ Where0
+ end,
+ {Rest, NRecs} = restore_tab_items([Rec | Recs], Tab,
+ RecName, Where, Snmp,
+ R#r.recs, R#r.insert_op),
+ restore_items(Rest, Header, Schema, R#r{recs = NRecs});
+ false ->
+ Rest = skip_tab_items(Recs, Tab),
+ restore_items(Rest, Header, Schema, R)
+ end;
+
+restore_items([], _Header, _Schema, R) ->
+ R.
+
+restore_func(Tab, R) ->
+ case lists:keysearch(Tab, 1, R#r.table_options) of
+ {value, {Tab, OP}} ->
+ OP;
+ false ->
+ R#r.default_op
+ end.
+
+where_to_commit(Tab, CsList) ->
+ Ram = [{N, ram_copies} || N <- pick(Tab, ram_copies, CsList, [])],
+ Disc = [{N, disc_copies} || N <- pick(Tab, disc_copies, CsList, [])],
+ DiscO = [{N, disc_only_copies} || N <- pick(Tab, disc_only_copies, CsList, [])],
+ Ram ++ Disc ++ DiscO.
+
+%% Changes of the Meta info of schema itself is not allowed
+restore_schema([{schema, schema, _List} | Schema], R) ->
+ restore_schema(Schema, R);
+restore_schema([{schema, Tab, List} | Schema], R) ->
+ case restore_func(Tab, R) of
+ clear_tables ->
+ do_clear_table(Tab),
+ Snmp = val({Tab, snmp}),
+ RecName = val({Tab, record_name}),
+ R2 = R#r{tables = [{Tab, undefined, Snmp, RecName} | R#r.tables]},
+ restore_schema(Schema, R2);
+ recreate_tables ->
+ case ?catch_val({Tab, cstruct}) of
+ {'EXIT', _} ->
+ TidTs = {_Mod, Tid, Ts} = get(mnesia_activity_state),
+ RunningNodes = val({current, db_nodes}),
+ Nodes = mnesia_lib:intersect(mnesia_lib:cs_to_nodes(list2cs(List)),
+ RunningNodes),
+ mnesia_locker:wlock_no_exist(Tid, Ts#tidstore.store, Tab, Nodes),
+ TidTs;
+ _ ->
+ TidTs = get_tid_ts_and_lock(Tab, write)
+ end,
+ NC = {cookie, ?unique_cookie},
+ List2 = lists:keyreplace(cookie, 1, List, NC),
+ Where = where_to_commit(Tab, List2),
+ Snmp = pick(Tab, snmp, List2, []),
+ RecName = pick(Tab, record_name, List2, Tab),
+ insert_schema_ops(TidTs, [{op, restore_recreate, List2}]),
+ R2 = R#r{tables = [{Tab, Where, Snmp, RecName} | R#r.tables]},
+ restore_schema(Schema, R2);
+ keep_tables ->
+ get_tid_ts_and_lock(Tab, write),
+ Snmp = val({Tab, snmp}),
+ RecName = val({Tab, record_name}),
+ R2 = R#r{tables = [{Tab, undefined, Snmp, RecName} | R#r.tables]},
+ restore_schema(Schema, R2);
+ skip_tables ->
+ restore_schema(Schema, R)
+ end;
+
+restore_schema([{schema, Tab} | Schema], R) ->
+ do_delete_table(Tab),
+ Tabs = lists:delete(Tab,R#r.tables),
+ restore_schema(Schema, R#r{tables = Tabs});
+restore_schema([], R) ->
+ R.
+
+restore_tab_items([Rec | Rest], Tab, RecName, Where, Snmp, Recs, Op)
+ when element(1, Rec) == Tab ->
+ NewRecs = Op(Rec, Recs, RecName, Where, Snmp),
+ restore_tab_items(Rest, Tab, RecName, Where, Snmp, NewRecs, Op);
+
+restore_tab_items(Rest, _Tab, _RecName, _Where, _Snmp, Recs, _Op) ->
+ {Rest, Recs}.
+
+skip_tab_items([Rec| Rest], Tab)
+ when element(1, Rec) == Tab ->
+ skip_tab_items(Rest, Tab);
+skip_tab_items(Recs, _) ->
+ Recs.
+
+%%%%%%%%% Dump tables %%%%%%%%%%%%%
+dump_tables(Tabs) when is_list(Tabs) ->
+ schema_transaction(fun() -> do_dump_tables(Tabs) end);
+dump_tables(Tabs) ->
+ {aborted, {bad_type, Tabs}}.
+
+do_dump_tables(Tabs) ->
+ TidTs = get_tid_ts_and_lock(schema, write),
+ insert_schema_ops(TidTs, make_dump_tables(Tabs)).
+
+make_dump_tables([schema | _Tabs]) ->
+ mnesia:abort({bad_type, schema});
+make_dump_tables([Tab | Tabs]) ->
+ get_tid_ts_and_lock(Tab, read),
+ TabDef = get_create_list(Tab),
+ DiscResident = val({Tab, disc_copies}) ++ val({Tab, disc_only_copies}),
+ verify([], DiscResident,
+ {"Only allowed on ram_copies", Tab, DiscResident}),
+ [{op, dump_table, unknown, TabDef} | make_dump_tables(Tabs)];
+make_dump_tables([]) ->
+ [].
+
+%% Merge the local schema with the schema on other nodes
+merge_schema() ->
+ schema_transaction(fun() -> do_merge_schema() end).
+
+do_merge_schema() ->
+ {_Mod, Tid, Ts} = get_tid_ts_and_lock(schema, write),
+ Connected = val(recover_nodes),
+ Running = val({current, db_nodes}),
+ Store = Ts#tidstore.store,
+ %% Verify that all nodes are locked that might not be the
+ %% case, if this trans where queued when new nodes where added.
+ case Running -- ets:lookup_element(Store, nodes, 2) of
+ [] -> ok; %% All known nodes are locked
+ Miss -> %% Abort! We don't want the sideeffects below to be executed
+ mnesia:abort({bad_commit, {missing_lock, Miss}})
+ end,
+ case Connected -- Running of
+ [Node | _] ->
+ %% Time for a schema merging party!
+ mnesia_locker:wlock_no_exist(Tid, Store, schema, [Node]),
+ case rpc:call(Node, mnesia_controller, get_cstructs, []) of
+ {cstructs, Cstructs, RemoteRunning1} ->
+ LockedAlready = Running ++ [Node],
+ {New, Old} = mnesia_recover:connect_nodes(RemoteRunning1),
+ RemoteRunning = mnesia_lib:intersect(New ++ Old, RemoteRunning1),
+ if
+ RemoteRunning /= RemoteRunning1 ->
+ mnesia_lib:error("Mnesia on ~p could not connect to node(s) ~p~n",
+ [node(), RemoteRunning1 -- RemoteRunning]);
+ true -> ok
+ end,
+ NeedsLock = RemoteRunning -- LockedAlready,
+ mnesia_locker:wlock_no_exist(Tid, Store, schema, NeedsLock),
+ {value, SchemaCs} =
+ lists:keysearch(schema, #cstruct.name, Cstructs),
+
+ %% Announce that Node is running
+ A = [{op, announce_im_running, node(),
+ cs2list(SchemaCs), Running, RemoteRunning}],
+ do_insert_schema_ops(Store, A),
+
+ %% Introduce remote tables to local node
+ do_insert_schema_ops(Store, make_merge_schema(Node, Cstructs)),
+
+ %% Introduce local tables to remote nodes
+ Tabs = val({schema, tables}),
+ Ops = [{op, merge_schema, get_create_list(T)}
+ || T <- Tabs,
+ not lists:keymember(T, #cstruct.name, Cstructs)],
+ do_insert_schema_ops(Store, Ops),
+
+ %% Ensure that the txn will be committed on all nodes
+ NewNodes = RemoteRunning -- Running,
+ mnesia_lib:set(prepare_op, {announce_im_running,NewNodes}),
+ announce_im_running(NewNodes, SchemaCs),
+ {merged, Running, RemoteRunning};
+ {error, Reason} ->
+ {"Cannot get cstructs", Node, Reason};
+ {badrpc, Reason} ->
+ {"Cannot get cstructs", Node, {badrpc, Reason}}
+ end;
+ [] ->
+ %% No more nodes to merge schema with
+ not_merged
+ end.
+
+make_merge_schema(Node, [Cs | Cstructs]) ->
+ Ops = do_make_merge_schema(Node, Cs),
+ Ops ++ make_merge_schema(Node, Cstructs);
+make_merge_schema(_Node, []) ->
+ [].
+
+%% Merge definitions of schema table
+do_make_merge_schema(Node, RemoteCs)
+ when RemoteCs#cstruct.name == schema ->
+ Cs = val({schema, cstruct}),
+ Masters = mnesia_recover:get_master_nodes(schema),
+ HasRemoteMaster = lists:member(Node, Masters),
+ HasLocalMaster = lists:member(node(), Masters),
+ Force = HasLocalMaster or HasRemoteMaster,
+ %% What is the storage types opinions?
+ StCsLocal = mnesia_lib:cs_to_storage_type(node(), Cs),
+ StRcsLocal = mnesia_lib:cs_to_storage_type(node(), RemoteCs),
+ StCsRemote = mnesia_lib:cs_to_storage_type(Node, Cs),
+ StRcsRemote = mnesia_lib:cs_to_storage_type(Node, RemoteCs),
+
+ if
+ Cs#cstruct.cookie == RemoteCs#cstruct.cookie,
+ Cs#cstruct.version == RemoteCs#cstruct.version ->
+ %% Great, we have the same cookie and version
+ %% and do not need to merge cstructs
+ [];
+
+ Cs#cstruct.cookie /= RemoteCs#cstruct.cookie,
+ Cs#cstruct.disc_copies /= [],
+ RemoteCs#cstruct.disc_copies /= [] ->
+ %% Both cstructs involves disc nodes
+ %% and we cannot merge them
+ if
+ HasLocalMaster == true,
+ HasRemoteMaster == false ->
+ %% Choose local cstruct,
+ %% since it's the master
+ [{op, merge_schema, cs2list(Cs)}];
+
+ HasRemoteMaster == true,
+ HasLocalMaster == false ->
+ %% Choose remote cstruct,
+ %% since it's the master
+ [{op, merge_schema, cs2list(RemoteCs)}];
+
+ true ->
+ Str = io_lib:format("Incompatible schema cookies. "
+ "Please, restart from old backup."
+ "~w = ~w, ~w = ~w~n",
+ [Node, cs2list(RemoteCs), node(), cs2list(Cs)]),
+ throw(Str)
+ end;
+
+ StCsLocal /= StRcsLocal, StRcsLocal /= unknown, StCsLocal /= ram_copies ->
+ Str = io_lib:format("Incompatible schema storage types (local). "
+ "on ~w storage ~w, on ~w storage ~w~n",
+ [node(), StCsLocal, Node, StRcsLocal]),
+ throw(Str);
+ StCsRemote /= StRcsRemote, StCsRemote /= unknown, StRcsRemote /= ram_copies ->
+ Str = io_lib:format("Incompatible schema storage types (remote). "
+ "on ~w cs ~w, on ~w rcs ~w~n",
+ [node(), cs2list(Cs), Node, cs2list(RemoteCs)]),
+ throw(Str);
+
+ Cs#cstruct.disc_copies /= [] ->
+ %% Choose local cstruct,
+ %% since it involves disc nodes
+ MergedCs = merge_cstructs(Cs, RemoteCs, Force),
+ [{op, merge_schema, cs2list(MergedCs)}];
+
+ RemoteCs#cstruct.disc_copies /= [] ->
+ %% Choose remote cstruct,
+ %% since it involves disc nodes
+ MergedCs = merge_cstructs(RemoteCs, Cs, Force),
+ [{op, merge_schema, cs2list(MergedCs)}];
+
+ Cs > RemoteCs ->
+ %% Choose remote cstruct
+ MergedCs = merge_cstructs(RemoteCs, Cs, Force),
+ [{op, merge_schema, cs2list(MergedCs)}];
+
+ true ->
+ %% Choose local cstruct
+ MergedCs = merge_cstructs(Cs, RemoteCs, Force),
+ [{op, merge_schema, cs2list(MergedCs)}]
+ end;
+
+%% Merge definitions of normal table
+do_make_merge_schema(Node, RemoteCs) ->
+ Tab = RemoteCs#cstruct.name,
+ Masters = mnesia_recover:get_master_nodes(schema),
+ HasRemoteMaster = lists:member(Node, Masters),
+ HasLocalMaster = lists:member(node(), Masters),
+ Force = HasLocalMaster or HasRemoteMaster,
+ case ?catch_val({Tab, cstruct}) of
+ {'EXIT', _} ->
+ %% A completely new table, created while Node was down
+ [{op, merge_schema, cs2list(RemoteCs)}];
+ Cs when Cs#cstruct.cookie == RemoteCs#cstruct.cookie ->
+ if
+ Cs#cstruct.version == RemoteCs#cstruct.version ->
+ %% We have exactly the same version of the
+ %% table def
+ [];
+
+ Cs#cstruct.version > RemoteCs#cstruct.version ->
+ %% Oops, we have different versions
+ %% of the table def, lets merge them.
+ %% The only changes that may have occurred
+ %% is that new replicas may have been added.
+ MergedCs = merge_cstructs(Cs, RemoteCs, Force),
+ [{op, merge_schema, cs2list(MergedCs)}];
+
+ Cs#cstruct.version < RemoteCs#cstruct.version ->
+ %% Oops, we have different versions
+ %% of the table def, lets merge them
+ MergedCs = merge_cstructs(RemoteCs, Cs, Force),
+ [{op, merge_schema, cs2list(MergedCs)}]
+ end;
+ Cs ->
+ %% Different cookies, not possible to merge
+ if
+ HasLocalMaster == true,
+ HasRemoteMaster == false ->
+ %% Choose local cstruct,
+ %% since it's the master
+ [{op, merge_schema, cs2list(Cs)}];
+
+ HasRemoteMaster == true,
+ HasLocalMaster == false ->
+ %% Choose remote cstruct,
+ %% since it's the master
+ [{op, merge_schema, cs2list(RemoteCs)}];
+
+ true ->
+ Str = io_lib:format("Bad cookie in table definition"
+ " ~w: ~w = ~w, ~w = ~w~n",
+ [Tab, node(), Cs, Node, RemoteCs]),
+ throw(Str)
+ end
+ end.
+
+%% Change of table definitions (cstructs) requires all replicas
+%% of the table to be active. New replicas, db_nodes and tables
+%% may however be added even if some replica is inactive. These
+%% invariants must be enforced in order to allow merge of cstructs.
+%%
+%% Returns a new cstruct or issues a fatal error
+merge_cstructs(Cs, RemoteCs, Force) ->
+ verify_cstruct(Cs),
+ case catch do_merge_cstructs(Cs, RemoteCs, Force) of
+ {'EXIT', {aborted, _Reason}} when Force == true ->
+ Cs;
+ {'EXIT', Reason} ->
+ exit(Reason);
+ MergedCs when is_record(MergedCs, cstruct) ->
+ MergedCs;
+ Other ->
+ throw(Other)
+ end.
+
+do_merge_cstructs(Cs, RemoteCs, Force) ->
+ verify_cstruct(RemoteCs),
+ Ns = mnesia_lib:uniq(mnesia_lib:cs_to_nodes(Cs) ++
+ mnesia_lib:cs_to_nodes(RemoteCs)),
+ {AnythingNew, MergedCs} =
+ merge_storage_type(Ns, false, Cs, RemoteCs, Force),
+ MergedCs2 = merge_versions(AnythingNew, MergedCs, RemoteCs, Force),
+ verify_cstruct(MergedCs2),
+ MergedCs2.
+
+merge_storage_type([N | Ns], AnythingNew, Cs, RemoteCs, Force) ->
+ Local = mnesia_lib:cs_to_storage_type(N, Cs),
+ Remote = mnesia_lib:cs_to_storage_type(N, RemoteCs),
+ case compare_storage_type(true, Local, Remote) of
+ {same, _Storage} ->
+ merge_storage_type(Ns, AnythingNew, Cs, RemoteCs, Force);
+ {diff, Storage} ->
+ Cs2 = change_storage_type(N, Storage, Cs),
+ merge_storage_type(Ns, true, Cs2, RemoteCs, Force);
+ incompatible when Force == true ->
+ merge_storage_type(Ns, AnythingNew, Cs, RemoteCs, Force);
+ Other ->
+ Str = io_lib:format("Cannot merge storage type for node ~w "
+ "in cstruct ~w with remote cstruct ~w (~w)~n",
+ [N, Cs, RemoteCs, Other]),
+ throw(Str)
+ end;
+merge_storage_type([], AnythingNew, MergedCs, _RemoteCs, _Force) ->
+ {AnythingNew, MergedCs}.
+
+compare_storage_type(_Retry, Any, Any) ->
+ {same, Any};
+compare_storage_type(_Retry, unknown, Any) ->
+ {diff, Any};
+compare_storage_type(_Retry, ram_copies, disc_copies) ->
+ {diff, disc_copies};
+compare_storage_type(_Retry, disc_copies, disc_only_copies) ->
+ {diff, disc_only_copies};
+compare_storage_type(true, One, Another) ->
+ compare_storage_type(false, Another, One);
+compare_storage_type(false, _One, _Another) ->
+ incompatible.
+
+change_storage_type(N, ram_copies, Cs) ->
+ Nodes = [N | Cs#cstruct.ram_copies],
+ Cs#cstruct{ram_copies = mnesia_lib:uniq(Nodes)};
+change_storage_type(N, disc_copies, Cs) ->
+ Nodes = [N | Cs#cstruct.disc_copies],
+ Cs#cstruct{disc_copies = mnesia_lib:uniq(Nodes)};
+change_storage_type(N, disc_only_copies, Cs) ->
+ Nodes = [N | Cs#cstruct.disc_only_copies],
+ Cs#cstruct{disc_only_copies = mnesia_lib:uniq(Nodes)}.
+
+%% BUGBUG: Verify match of frag info; equalit demanded for all but add_node
+
+merge_versions(AnythingNew, Cs, RemoteCs, Force) ->
+ if
+ Cs#cstruct.name == schema ->
+ ok;
+ Cs#cstruct.name /= schema,
+ Cs#cstruct.cookie == RemoteCs#cstruct.cookie ->
+ ok;
+ Force == true ->
+ ok;
+ true ->
+ Str = io_lib:format("Bad cookies. Cannot merge definitions of "
+ "table ~w. Local = ~w, Remote = ~w~n",
+ [Cs#cstruct.name, Cs, RemoteCs]),
+ throw(Str)
+ end,
+ if
+ Cs#cstruct.name == RemoteCs#cstruct.name,
+ Cs#cstruct.type == RemoteCs#cstruct.type,
+ Cs#cstruct.local_content == RemoteCs#cstruct.local_content,
+ Cs#cstruct.attributes == RemoteCs#cstruct.attributes,
+ Cs#cstruct.index == RemoteCs#cstruct.index,
+ Cs#cstruct.snmp == RemoteCs#cstruct.snmp,
+ Cs#cstruct.access_mode == RemoteCs#cstruct.access_mode,
+ Cs#cstruct.load_order == RemoteCs#cstruct.load_order,
+ Cs#cstruct.user_properties == RemoteCs#cstruct.user_properties ->
+ do_merge_versions(AnythingNew, Cs, RemoteCs);
+ Force == true ->
+ do_merge_versions(AnythingNew, Cs, RemoteCs);
+ true ->
+ Str1 = io_lib:format("Cannot merge definitions of "
+ "table ~w. Local = ~w, Remote = ~w~n",
+ [Cs#cstruct.name, Cs, RemoteCs]),
+ throw(Str1)
+ end.
+
+do_merge_versions(AnythingNew, MergedCs, RemoteCs) ->
+ {{Major1, Minor1}, _Detail1} = MergedCs#cstruct.version,
+ {{Major2, Minor2}, _Detail2} = RemoteCs#cstruct.version,
+ if
+ AnythingNew == false ->
+ MergedCs;
+ MergedCs#cstruct.version == RemoteCs#cstruct.version ->
+ V = {{Major1, Minor1}, dummy},
+ incr_version(MergedCs#cstruct{version = V});
+ Major1 == Major2 ->
+ Minor = lists:max([Minor1, Minor2]),
+ V = {{Major1, Minor}, dummy},
+ incr_version(MergedCs#cstruct{version = V});
+ Major1 /= Major2 ->
+ Major = lists:max([Major1, Major2]),
+ V = {{Major, 0}, dummy},
+ incr_version(MergedCs#cstruct{version = V})
+ end.
+
+%% Verify the basics
+verify_merge(RemoteCs) ->
+ Tab = RemoteCs#cstruct.name,
+ Masters = mnesia_recover:get_master_nodes(schema),
+ HasRemoteMaster = Masters /= [],
+ case ?catch_val({Tab, cstruct}) of
+ {'EXIT', _} ->
+ ok;
+ Cs ->
+ StCsLocal = mnesia_lib:cs_to_storage_type(node(), Cs),
+ StRcsLocal = mnesia_lib:cs_to_storage_type(node(), RemoteCs),
+ if
+ StCsLocal == StRcsLocal -> ok;
+ StCsLocal == unknown -> ok;
+ (StRcsLocal == unknown), (HasRemoteMaster == false) ->
+ {merge_error, Cs, RemoteCs};
+ %% Trust the merger
+ true -> ok
+ end
+ end.
+
+announce_im_running([N | Ns], SchemaCs) ->
+ {L1, L2} = mnesia_recover:connect_nodes([N]),
+ case lists:member(N, L1) or lists:member(N, L2) of
+ true ->
+ mnesia_lib:add({current, db_nodes}, N),
+ mnesia_controller:add_active_replica(schema, N, SchemaCs);
+ false ->
+ ignore
+ end,
+ announce_im_running(Ns, SchemaCs);
+announce_im_running([], _) ->
+ [].
+
+unannounce_im_running([N | Ns]) ->
+ mnesia_lib:del({current, db_nodes}, N),
+ mnesia_controller:del_active_replica(schema, N),
+ unannounce_im_running(Ns);
+unannounce_im_running([]) ->
+ ok.
+
diff --git a/lib/mnesia/src/mnesia_snmp_hook.erl b/lib/mnesia/src/mnesia_snmp_hook.erl
new file mode 100644
index 0000000000..8b4b5231e1
--- /dev/null
+++ b/lib/mnesia/src/mnesia_snmp_hook.erl
@@ -0,0 +1,259 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 1996-2009. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%
+
+%%
+-module(mnesia_snmp_hook).
+
+%% Hooks (called from mnesia)
+-export([check_ustruct/1, create_table/3, delete_table/2,
+ key_to_oid/2, key_to_oid/3, oid_to_key/2,
+ update/1,
+ get_row/2, get_next_index/2, get_mnesia_key/2]).
+
+-export([key_to_oid_i/2, oid_to_key_1/2]). %% Test
+
+-include("mnesia.hrl").
+
+val(Var) ->
+ case ?catch_val(Var) of
+ {'EXIT', _ReASoN_} -> mnesia_lib:other_val(Var, _ReASoN_);
+ _VaLuE_ -> _VaLuE_
+ end.
+
+check_ustruct([]) ->
+ true; %% default value, not SNMP'ified
+check_ustruct([{key, Types}]) ->
+ is_snmp_type(to_list(Types));
+check_ustruct(_) -> false.
+
+to_list(Tuple) when is_tuple(Tuple) -> tuple_to_list(Tuple);
+to_list(X) -> [X].
+
+is_snmp_type([integer | T]) -> is_snmp_type(T);
+is_snmp_type([string | T]) -> is_snmp_type(T);
+is_snmp_type([fix_string | T]) -> is_snmp_type(T);
+is_snmp_type([]) -> true;
+is_snmp_type(_) -> false.
+
+create_table([], MnesiaTab, _Storage) ->
+ mnesia:abort({badarg, MnesiaTab, {snmp, empty_snmpstruct}});
+
+create_table([{key, Us}], MnesiaTab, Storage) ->
+ Tree = b_new(MnesiaTab, Us),
+ mnesia_lib:db_fixtable(Storage, MnesiaTab, true),
+ First = mnesia_lib:db_first(Storage, MnesiaTab),
+ build_table(First, MnesiaTab, Tree, Us, Storage),
+ mnesia_lib:db_fixtable(Storage, MnesiaTab, false),
+ Tree.
+
+build_table(MnesiaKey, MnesiaTab, Tree, Us, Storage)
+ when MnesiaKey /= '$end_of_table' ->
+ %%update(write, Tree, MnesiaKey, MnesiaKey),
+ SnmpKey = key_to_oid_i(MnesiaKey, Us),
+ b_insert(Tree, SnmpKey, MnesiaKey),
+ Next = mnesia_lib:db_next_key(Storage, MnesiaTab, MnesiaKey),
+ build_table(Next, MnesiaTab, Tree, Us, Storage);
+build_table('$end_of_table', _MnesiaTab, _Tree, _Us, _Storage) ->
+ ok.
+
+delete_table(_MnesiaTab, Tree) ->
+ b_delete_tree(Tree),
+ ok.
+
+%%-----------------------------------------------------------------
+%% update({Op, MnesiaTab, MnesiaKey, SnmpKey})
+%%-----------------------------------------------------------------
+
+update({clear_table, MnesiaTab}) ->
+ Tree = val({MnesiaTab, {index, snmp}}),
+ b_clear(Tree),
+ ok;
+
+update({Op, MnesiaTab, MnesiaKey, SnmpKey}) ->
+ Tree = val({MnesiaTab, {index, snmp}}),
+ update(Op, Tree, MnesiaKey, SnmpKey).
+
+update(Op, Tree, MnesiaKey, SnmpKey) ->
+ case Op of
+ write ->
+ b_insert(Tree, SnmpKey, MnesiaKey);
+ update_counter ->
+ ignore;
+ delete ->
+ b_delete(Tree, SnmpKey);
+ delete_object ->
+ b_delete(Tree, SnmpKey)
+ end,
+ ok.
+
+%%-----------------------------------------------------------------
+%% Func: key_to_oid(Tab, Key, Ustruct)
+%% Args: Key ::= key()
+%% key() ::= int() | string() | {int() | string()}
+%% Type ::= {fix_string | term()}
+%% Make an OBJECT IDENTIFIER out of it.
+%% Variable length objects are prepended by their length.
+%% Ex. Key = {"pelle", 42} AND Type = {string, integer} =>
+%% OID [5, $p, $e, $l, $l, $e, 42]
+%% Key = {"pelle", 42} AND Type = {fix_string, integer} =>
+%% OID [$p, $e, $l, $l, $e, 42]
+%%-----------------------------------------------------------------
+
+key_to_oid(Tab,Key) ->
+ Types = val({Tab,snmp}),
+ key_to_oid(Tab, Key, Types).
+
+key_to_oid(Tab, Key, [{key, Types}]) ->
+ try key_to_oid_i(Key,Types)
+ catch _:_ ->
+ mnesia:abort({bad_snmp_key, {Tab,Key}, Types})
+ end.
+
+key_to_oid_i(Key, integer) when is_integer(Key) -> [Key];
+key_to_oid_i(Key, fix_string) when is_list(Key) -> Key;
+key_to_oid_i(Key, string) when is_list(Key) -> [length(Key) | Key];
+key_to_oid_i(Key, Types) -> keys_to_oid(size(Key), Key, [], Types).
+
+keys_to_oid(0, _Key, Oid, _Types) -> Oid;
+keys_to_oid(N, Key, Oid, Types) ->
+ Oid2 = lists:append(key_to_oid_i(element(N, Key), element(N, Types)), Oid),
+ keys_to_oid(N-1, Key, Oid2, Types).
+
+%%--------------------------------------------------
+%% The reverse of the above, i.e. snmp oid to mnesia key.
+%% This can be lookup up in tree but that might be on a remote node.
+%% It's probably faster to look it up, but use when it migth be remote
+oid_to_key(Oid, Tab) ->
+ [{key, Types}] = val({Tab,snmp}),
+ oid_to_key_1(Types, Oid).
+
+oid_to_key_1(integer, [Key]) -> Key;
+oid_to_key_1(fix_string, Key) -> Key;
+oid_to_key_1(string, [_|Key]) -> Key;
+oid_to_key_1(Tuple, Oid) ->
+ try
+ List = oid_to_key_2(1, size(Tuple), Tuple, Oid),
+ list_to_tuple(List)
+ catch
+ _:_ -> unknown
+ end.
+
+oid_to_key_2(N, Sz, Tuple, Oid0) when N =< Sz ->
+ case element(N, Tuple) of
+ integer ->
+ [Key|Oid] = Oid0,
+ [Key|oid_to_key_2(N+1, Sz, Tuple, Oid)];
+ fix_string when N =:= Sz ->
+ [Oid0];
+ fix_string ->
+ throw(fix_string);
+ string ->
+ [Len|Oid1] = Oid0,
+ {Str,Oid} = lists:split(Len, Oid1),
+ [Str|oid_to_key_2(N+1, Sz, Tuple, Oid)]
+ end;
+oid_to_key_2(N, Sz, _, []) when N =:= (Sz+1) ->
+ [].
+
+%%-----------------------------------------------------------------
+%% Func: get_row/2
+%% Args: Name is the name of the table (atom)
+%% RowIndex is an Oid
+%% Returns: {ok, Row} | undefined
+%% Note that the Row returned might contain columns that
+%% are not visible via SNMP. e.g. the first column may be
+%% ifIndex, and the last MFA ({ifIndex, col1, col2, MFA}).
+%% where ifIndex is used only as index (not as a real col),
+%% and MFA as extra info, used by the application.
+%%-----------------------------------------------------------------
+get_row(Name, RowIndex) ->
+ Tree = mnesia_lib:val({Name, {index, snmp}}),
+ case b_lookup(Tree, RowIndex) of
+ {ok, {_RowIndex, Key}} ->
+ [Row] = mnesia:dirty_read({Name, Key}),
+ {ok, Row};
+ _ ->
+ undefined
+ end.
+
+%%-----------------------------------------------------------------
+%% Func: get_next_index/2
+%% Args: Name is the name of the table (atom)
+%% RowIndex is an Oid
+%% Returns: {NextIndex,MnesiaKey} | {endOfTable, undefined}
+%%-----------------------------------------------------------------
+get_next_index(Name, RowIndex) ->
+ Tree = mnesia_lib:val({Name, {index, snmp}}),
+ case b_lookup_next(Tree, RowIndex) of
+ {ok, R} ->
+ R;
+ _ ->
+ {endOfTable,undefined}
+ end.
+
+%%-----------------------------------------------------------------
+%% Func: get_mnesia_key/2
+%% Purpose: Get the mnesia key corresponding to the RowIndex.
+%% Args: Name is the name of the table (atom)
+%% RowIndex is an Oid
+%% Returns: {ok, Key} | undefiend
+%%-----------------------------------------------------------------
+get_mnesia_key(Name, RowIndex) ->
+ Tree = mnesia_lib:val({Name, {index, snmp}}),
+ case b_lookup(Tree, RowIndex) of
+ {ok, {_RowIndex, Key}} ->
+ {ok, Key};
+ _ ->
+ undefined
+ end.
+
+
+%%-----------------------------------------------------------------
+%% Internal implementation, ordered_set ets.
+
+b_new(_Tab, _Us) ->
+ mnesia_monitor:unsafe_mktab(?MODULE, [public, ordered_set]).
+
+b_delete_tree(Tree) ->
+ ets:delete(Tree). %% Close via mnesia_monitor ?
+
+b_clear(Tree) ->
+ ets:delete_all_objects(Tree).
+
+b_insert(Tree, SnmpKey, MnesiaKey) ->
+ ets:insert(Tree, {SnmpKey, MnesiaKey}).
+
+b_delete(Tree, SnmpKey) ->
+ ets:delete(Tree, SnmpKey).
+
+b_lookup(Tree, RowIndex) ->
+ case ets:lookup(Tree, RowIndex) of
+ [X] ->
+ {ok, X};
+ _ ->
+ undefined
+ end.
+
+b_lookup_next(Tree,RowIndex) ->
+ case ets:next(Tree, RowIndex) of
+ '$end_of_table' ->
+ undefined;
+ Key ->
+ b_lookup(Tree, Key)
+ end.
diff --git a/lib/mnesia/src/mnesia_snmp_sup.erl b/lib/mnesia/src/mnesia_snmp_sup.erl
new file mode 100644
index 0000000000..7e86281428
--- /dev/null
+++ b/lib/mnesia/src/mnesia_snmp_sup.erl
@@ -0,0 +1,42 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 1997-2009. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%
+
+%%
+-module(mnesia_snmp_sup).
+
+-behaviour(supervisor).
+
+-export([start/0, init/1]).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% top supervisor callback functions
+
+start() ->
+ supervisor:start_link({local, ?MODULE}, ?MODULE, []).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% sub supervisor callback functions
+
+init([]) ->
+ Flags = {simple_one_for_one, 0, timer:hours(24)}, % Trust the top supervisor
+ MFA = {mnesia_snmp_hook, start, []},
+ Modules = [?MODULE, mnesia_snmp_hook, supervisor],
+ KillAfter = mnesia_kernel_sup:supervisor_timeout(timer:seconds(3)),
+ Workers = [{?MODULE, MFA, transient, KillAfter, worker, Modules}],
+ {ok, {Flags, Workers}}.
diff --git a/lib/mnesia/src/mnesia_sp.erl b/lib/mnesia/src/mnesia_sp.erl
new file mode 100644
index 0000000000..58a177513f
--- /dev/null
+++ b/lib/mnesia/src/mnesia_sp.erl
@@ -0,0 +1,42 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 1999-2009. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%
+
+%%
+
+%% To able to generate nice crash reports we need a catch on the highest level.
+%% This code can't be purged so a code change is not possible.
+%% And hence this a simple module.
+
+-module(mnesia_sp).
+
+-export([init_proc/4]).
+
+init_proc(Who, Mod, Fun, Args) ->
+ mnesia_lib:verbose("~p starting: ~p~n", [Who, self()]),
+ case catch apply(Mod, Fun, Args) of
+ {'EXIT', Reason} ->
+ mnesia_monitor:terminate_proc(Who, Reason, Args),
+ exit(Reason);
+ Other ->
+ Other
+ end.
+
+
+
+
diff --git a/lib/mnesia/src/mnesia_subscr.erl b/lib/mnesia/src/mnesia_subscr.erl
new file mode 100644
index 0000000000..afd1704dec
--- /dev/null
+++ b/lib/mnesia/src/mnesia_subscr.erl
@@ -0,0 +1,494 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 1997-2009. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%
+
+%%
+-module(mnesia_subscr).
+
+-behaviour(gen_server).
+
+-export([start/0,
+ set_debug_level/1,
+ subscribe/2,
+ unsubscribe/2,
+ unsubscribe_table/1,
+ subscribers/0,
+ report_table_event/4,
+ report_table_event/5,
+ report_table_event/6
+ ]).
+
+%% gen_server callbacks
+-export([init/1,
+ handle_call/3,
+ handle_cast/2,
+ handle_info/2,
+ terminate/2,
+ code_change/3
+ ]).
+
+-include("mnesia.hrl").
+
+-import(mnesia_lib, [error/2]).
+-record(state, {supervisor, pid_tab}).
+
+start() ->
+ gen_server:start_link({local, ?MODULE}, ?MODULE, [self()],
+ [{timeout, infinity}]).
+
+set_debug_level(Level) ->
+ OldEnv = application:get_env(mnesia, debug),
+ case mnesia_monitor:patch_env(debug, Level) of
+ {error, Reason} ->
+ {error, Reason};
+ NewLevel ->
+ set_debug_level(NewLevel, OldEnv)
+ end.
+
+set_debug_level(Level, OldEnv) ->
+ case mnesia:system_info(is_running) of
+ no when OldEnv == undefined ->
+ none;
+ no ->
+ {ok, E} = OldEnv,
+ E;
+ _ ->
+ Old = mnesia_lib:val(debug),
+ Local = mnesia:system_info(local_tables),
+ E = whereis(mnesia_event),
+ Sub = fun(Tab) -> subscribe(E, {table, Tab}) end,
+ UnSub = fun(Tab) -> unsubscribe(E, {table, Tab}) end,
+
+ case Level of
+ none ->
+ lists:foreach(UnSub, Local);
+ verbose ->
+ lists:foreach(UnSub, Local);
+ debug ->
+ lists:foreach(UnSub, Local -- [schema]),
+ Sub(schema);
+ trace ->
+ lists:foreach(Sub, Local)
+ end,
+ mnesia_lib:set(debug, Level),
+ Old
+ end.
+
+subscribe(ClientPid, system) ->
+ change_subscr(activate, ClientPid, system);
+subscribe(ClientPid, {table, Tab}) ->
+ change_subscr(activate, ClientPid, {table, Tab, simple});
+subscribe(ClientPid, {table, Tab, simple}) ->
+ change_subscr(activate, ClientPid, {table, Tab, simple});
+subscribe(ClientPid, {table, Tab, detailed}) ->
+ change_subscr(activate, ClientPid, {table, Tab, detailed});
+subscribe(_ClientPid, What) ->
+ {error, {badarg, What}}.
+
+unsubscribe(ClientPid, system) ->
+ change_subscr(deactivate, ClientPid, system);
+unsubscribe(ClientPid, {table, Tab}) ->
+ change_subscr(deactivate, ClientPid, {table, Tab, simple});
+unsubscribe(ClientPid, {table, Tab, simple}) ->
+ change_subscr(deactivate, ClientPid, {table, Tab, simple});
+unsubscribe(ClientPid, {table, Tab, detailed}) ->
+ change_subscr(deactivate, ClientPid, {table, Tab, detailed});
+unsubscribe(_ClientPid, What) ->
+ {error, {badarg, What}}.
+
+unsubscribe_table(Tab) ->
+ call({change, {deactivate_table, Tab}}).
+
+change_subscr(Kind, ClientPid, What) ->
+ call({change, {Kind, ClientPid, What}}).
+
+subscribers() ->
+ [whereis(mnesia_event) | mnesia_lib:val(subscribers)].
+
+report_table_event(Tab, Tid, Obj, Op) ->
+ case ?catch_val({Tab, commit_work}) of
+ {'EXIT', _} -> ok;
+ Commit ->
+ case lists:keysearch(subscribers, 1, Commit) of
+ false -> ok;
+ {value, Subs} ->
+ report_table_event(Subs, Tab, Tid, Obj, Op, undefined)
+ end
+ end.
+
+%% Backwards compatible for the moment when mnesia_tm get's updated!
+report_table_event(Subscr, Tab, Tid, Obj, Op) ->
+ report_table_event(Subscr, Tab, Tid, Obj, Op, undefined).
+
+report_table_event({subscribers, S1, S2}, Tab, Tid, _Obj, clear_table, _Old) ->
+ What = {delete, {schema, Tab}, Tid},
+ deliver(S1, {mnesia_table_event, What}),
+ TabDef = mnesia_schema:cs2list(?catch_val({Tab, cstruct})),
+ What2 = {write, {schema, Tab, TabDef}, Tid},
+ deliver(S1, {mnesia_table_event, What2}),
+ What3 = {delete, schema, {schema, Tab}, [{schema, Tab, TabDef}], Tid},
+ deliver(S2, {mnesia_table_event, What3}),
+ What4 = {write, schema, {schema, Tab, TabDef}, [], Tid},
+ deliver(S2, {mnesia_table_event, What4});
+
+report_table_event({subscribers, Subscr, []}, Tab, Tid, Obj, Op, _Old) ->
+ What = {Op, patch_record(Tab, Obj), Tid},
+ deliver(Subscr, {mnesia_table_event, What});
+
+report_table_event({subscribers, S1, S2}, Tab, Tid, Obj, Op, Old) ->
+ Standard = {Op, patch_record(Tab, Obj), Tid},
+ deliver(S1, {mnesia_table_event, Standard}),
+ Extended = what(Tab, Tid, Obj, Op, Old),
+ deliver(S2, Extended);
+
+%% Backwards compatible for the moment when mnesia_tm get's updated!
+report_table_event({subscribers, Subscr}, Tab, Tid, Obj, Op, Old) ->
+ report_table_event({subscribers, Subscr, []}, Tab, Tid, Obj, Op, Old).
+
+
+patch_record(Tab, Obj) ->
+ case Tab == element(1, Obj) of
+ true ->
+ Obj;
+ false ->
+ setelement(1, Obj, Tab)
+ end.
+
+what(Tab, Tid, {RecName, Key}, delete, undefined) ->
+ case catch mnesia_lib:db_get(Tab, Key) of
+ Old when is_list(Old) -> %% Op only allowed for set table.
+ {mnesia_table_event, {delete, Tab, {RecName, Key}, Old, Tid}};
+ _ ->
+ %% Record just deleted by a dirty_op or
+ %% the whole table has been deleted
+ ignore
+ end;
+what(Tab, Tid, Obj, delete, Old) ->
+ {mnesia_table_event, {delete, Tab, Obj, Old, Tid}};
+what(Tab, Tid, Obj, delete_object, _Old) ->
+ {mnesia_table_event, {delete, Tab, Obj, [Obj], Tid}};
+what(Tab, Tid, Obj, write, undefined) ->
+ case catch mnesia_lib:db_get(Tab, element(2, Obj)) of
+ Old when is_list(Old) ->
+ {mnesia_table_event, {write, Tab, Obj, Old, Tid}};
+ {'EXIT', _} ->
+ ignore
+ end.
+
+deliver(_, ignore) ->
+ ok;
+deliver([Pid | Pids], Msg) ->
+ Pid ! Msg,
+ deliver(Pids, Msg);
+deliver([], _Msg) ->
+ ok.
+
+call(Msg) ->
+ Pid = whereis(?MODULE),
+ case Pid of
+ undefined ->
+ {error, {node_not_running, node()}};
+ Pid ->
+ Res = gen_server:call(Pid, Msg, infinity),
+ %% We get an exit signal if server dies
+ receive
+ {'EXIT', _Pid, _Reason} ->
+ {error, {node_not_running, node()}}
+ after 0 ->
+ Res
+ end
+ end.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%% Callback functions from gen_server
+
+%%----------------------------------------------------------------------
+%% Func: init/1
+%% Returns: {ok, State} |
+%% {ok, State, Timeout} |
+%% {stop, Reason}
+%%----------------------------------------------------------------------
+init([Parent]) ->
+ process_flag(trap_exit, true),
+ ClientPid = whereis(mnesia_event),
+ link(ClientPid),
+ mnesia_lib:verbose("~p starting: ~p~n", [?MODULE, self()]),
+ Tab = ?ets_new_table(mnesia_subscr, [duplicate_bag, private]),
+ ?ets_insert(Tab, {ClientPid, system}),
+ {ok, #state{supervisor = Parent, pid_tab = Tab}}.
+
+%%----------------------------------------------------------------------
+%% Func: handle_call/3
+%% Returns: {reply, Reply, State} |
+%% {reply, Reply, State, Timeout} |
+%% {noreply, State} |
+%% {noreply, State, Timeout} |
+%% {stop, Reason, Reply, State} | (terminate/2 is called)
+%%----------------------------------------------------------------------
+handle_call({change, How}, _From, State) ->
+ Reply = do_change(How, State#state.pid_tab),
+ {reply, Reply, State};
+
+handle_call(Msg, _From, State) ->
+ error("~p got unexpected call: ~p~n", [?MODULE, Msg]),
+ {noreply, State}.
+
+%%----------------------------------------------------------------------
+%% Func: handle_cast/2
+%% Returns: {noreply, State} |
+%% {noreply, State, Timeout} |
+%% {stop, Reason, State} (terminate/2 is called)
+%%----------------------------------------------------------------------
+handle_cast(Msg, State) ->
+ error("~p got unexpected cast: ~p~n", [?MODULE, Msg]),
+ {noreply, State}.
+
+%%----------------------------------------------------------------------
+%% Func: handle_info/2
+%% Returns: {noreply, State} |
+%% {noreply, State, Timeout} |
+%% {stop, Reason, State} (terminate/2 is called)
+%%----------------------------------------------------------------------
+
+handle_info({'EXIT', Pid, _R}, State) when Pid == State#state.supervisor ->
+ {stop, shutdown, State};
+
+handle_info({'EXIT', Pid, _Reason}, State) ->
+ handle_exit(Pid, State#state.pid_tab),
+ {noreply, State};
+
+handle_info(Msg, State) ->
+ error("~p got unexpected info: ~p~n", [?MODULE, Msg]),
+ {noreply, State}.
+
+%%----------------------------------------------------------------------
+%% Func: terminate/2
+%% Purpose: Shutdown the server
+%% Returns: any (ignored by gen_server)
+%%----------------------------------------------------------------------
+terminate(Reason, State) ->
+ prepare_stop(State#state.pid_tab),
+ mnesia_monitor:terminate_proc(?MODULE, Reason, State).
+
+%%----------------------------------------------------------------------
+%% Func: code_change/3
+%% Purpose: Upgrade process when its code is to be changed
+%% Returns: {ok, NewState}
+%%----------------------------------------------------------------------
+code_change(_OldVsn, State, _Extra) ->
+ {ok, State}.
+
+%%%----------------------------------------------------------------------
+%%% Internal functions
+%%%----------------------------------------------------------------------
+
+do_change({activate, ClientPid, system}, SubscrTab) when is_pid(ClientPid) ->
+ Var = subscribers,
+ activate(ClientPid, system, Var, subscribers(), SubscrTab);
+do_change({activate, ClientPid, {table, Tab, How}}, SubscrTab) when is_pid(ClientPid) ->
+ case ?catch_val({Tab, where_to_read}) of
+ Node when Node == node() ->
+ Var = {Tab, commit_work},
+ activate(ClientPid, {table, Tab, How}, Var, mnesia_lib:val(Var), SubscrTab);
+ {'EXIT', _} ->
+ {error, {no_exists, Tab}};
+ _Node ->
+ {error, {not_active_local, Tab}}
+ end;
+do_change({deactivate, ClientPid, system}, SubscrTab) ->
+ Var = subscribers,
+ deactivate(ClientPid, system, Var, SubscrTab);
+do_change({deactivate, ClientPid, {table, Tab, How}}, SubscrTab) ->
+ Var = {Tab, commit_work},
+ deactivate(ClientPid, {table, Tab, How}, Var, SubscrTab);
+do_change({deactivate_table, Tab}, SubscrTab) ->
+ Var = {Tab, commit_work},
+ case ?catch_val(Var) of
+ {'EXIT', _} ->
+ {error, {no_exists, Tab}};
+ CommitWork ->
+ case lists:keysearch(subscribers, 1, CommitWork) of
+ false ->
+ ok;
+ {value, Subs} ->
+ Simple = {table, Tab, simple},
+ Detailed = {table, Tab, detailed},
+ Fs = fun(C) -> deactivate(C, Simple, Var, SubscrTab) end,
+ Fd = fun(C) -> deactivate(C, Detailed, Var, SubscrTab) end,
+ case Subs of
+ {subscribers, L1, L2} ->
+ lists:foreach(Fs, L1),
+ lists:foreach(Fd, L2);
+ {subscribers, L1} ->
+ lists:foreach(Fs, L1)
+ end
+ end,
+ {ok, node()}
+ end;
+do_change(_, _) ->
+ {error, badarg}.
+
+activate(ClientPid, What, Var, OldSubscribers, SubscrTab) ->
+ Old =
+ if Var == subscribers ->
+ OldSubscribers;
+ true ->
+ case lists:keysearch(subscribers, 1, OldSubscribers) of
+ false -> [];
+ {value, Subs} ->
+ case Subs of
+ {subscribers, L1, L2} ->
+ L1 ++ L2;
+ {subscribers, L1} ->
+ L1
+ end
+ end
+ end,
+ case lists:member(ClientPid, Old) of
+ false ->
+ %% Don't care about checking old links
+ case catch link(ClientPid) of
+ true ->
+ ?ets_insert(SubscrTab, {ClientPid, What}),
+ add_subscr(Var, What, ClientPid),
+ {ok, node()};
+ {'EXIT', _Reason} ->
+ {error, {no_exists, ClientPid}}
+ end;
+ true ->
+ {error, {already_exists, What}}
+ end.
+
+%%-record(subscribers, {pids = []}). Old subscriber record removed
+%% To solve backward compatibility, this code is a cludge..
+add_subscr(subscribers, _What, Pid) ->
+ mnesia_lib:add(subscribers, Pid),
+ {ok, node()};
+add_subscr({Tab, commit_work}, What, Pid) ->
+ Commit = mnesia_lib:val({Tab, commit_work}),
+ case lists:keysearch(subscribers, 1, Commit) of
+ false ->
+ Subscr =
+ case What of
+ {table, _, simple} ->
+ {subscribers, [Pid], []};
+ {table, _, detailed} ->
+ {subscribers, [], [Pid]}
+ end,
+ mnesia_lib:add({Tab, subscribers}, Pid),
+ mnesia_lib:set({Tab, commit_work},
+ mnesia_lib:sort_commit([Subscr | Commit]));
+ {value, Old} ->
+ {L1, L2} =
+ case Old of
+ {subscribers, L} -> %% Old Way
+ {L, []};
+ {subscribers, SL1, SL2} ->
+ {SL1, SL2}
+ end,
+ Subscr =
+ case What of
+ {table, _, simple} ->
+ {subscribers, [Pid | L1], L2};
+ {table, _, detailed} ->
+ {subscribers, L1, [Pid | L2]}
+ end,
+ NewC = lists:keyreplace(subscribers, 1, Commit, Subscr),
+ mnesia_lib:set({Tab, commit_work},
+ mnesia_lib:sort_commit(NewC)),
+ mnesia_lib:add({Tab, subscribers}, Pid)
+ end.
+
+deactivate(ClientPid, What, Var, SubscrTab) ->
+ ?ets_match_delete(SubscrTab, {ClientPid, What}),
+ case catch ?ets_lookup_element(SubscrTab, ClientPid, 1) of
+ List when is_list(List) ->
+ ignore;
+ {'EXIT', _} ->
+ unlink(ClientPid)
+ end,
+ del_subscr(Var, What, ClientPid),
+ {ok, node()}.
+
+del_subscr(subscribers, _What, Pid) ->
+ mnesia_lib:del(subscribers, Pid);
+del_subscr({Tab, commit_work}, What, Pid) ->
+ Commit = mnesia_lib:val({Tab, commit_work}),
+ case lists:keysearch(subscribers, 1, Commit) of
+ false ->
+ false;
+ {value, Old} ->
+ {L1, L2} =
+ case Old of
+ {subscribers, L} -> %% Old Way
+ {L, []};
+ {subscribers, SL1, SL2} ->
+ {SL1, SL2}
+ end,
+ Subscr =
+ case What of %% Ignore user error delete subscr from any list
+ {table, _, simple} ->
+ NewL1 = lists:delete(Pid, L1),
+ NewL2 = lists:delete(Pid, L2),
+ {subscribers, NewL1, NewL2};
+ {table, _, detailed} ->
+ NewL1 = lists:delete(Pid, L1),
+ NewL2 = lists:delete(Pid, L2),
+ {subscribers, NewL1, NewL2}
+ end,
+ case Subscr of
+ {subscribers, [], []} ->
+ NewC = lists:keydelete(subscribers, 1, Commit),
+ mnesia_lib:del({Tab, subscribers}, Pid),
+ mnesia_lib:set({Tab, commit_work},
+ mnesia_lib:sort_commit(NewC));
+ _ ->
+ NewC = lists:keyreplace(subscribers, 1, Commit, Subscr),
+ mnesia_lib:del({Tab, subscribers}, Pid),
+ mnesia_lib:set({Tab, commit_work},
+ mnesia_lib:sort_commit(NewC))
+ end
+ end.
+
+handle_exit(ClientPid, SubscrTab) ->
+ do_handle_exit(?ets_lookup(SubscrTab, ClientPid)),
+ ?ets_delete(SubscrTab, ClientPid).
+
+do_handle_exit([{ClientPid, What} | Tail]) ->
+ case What of
+ system ->
+ del_subscr(subscribers, What, ClientPid);
+ {_, Tab, _Level} ->
+ del_subscr({Tab, commit_work}, What, ClientPid)
+ end,
+ do_handle_exit(Tail);
+do_handle_exit([]) ->
+ ok.
+
+prepare_stop(SubscrTab) ->
+ mnesia_lib:report_system_event({mnesia_down, node()}),
+ do_prepare_stop(?ets_first(SubscrTab), SubscrTab).
+
+do_prepare_stop('$end_of_table', _SubscrTab) ->
+ ok;
+do_prepare_stop(ClientPid, SubscrTab) ->
+ Next = ?ets_next(SubscrTab, ClientPid),
+ handle_exit(ClientPid, SubscrTab),
+ unlink(ClientPid),
+ do_prepare_stop(Next, SubscrTab).
+
diff --git a/lib/mnesia/src/mnesia_sup.erl b/lib/mnesia/src/mnesia_sup.erl
new file mode 100644
index 0000000000..9ee4086f50
--- /dev/null
+++ b/lib/mnesia/src/mnesia_sup.erl
@@ -0,0 +1,131 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 1996-2009. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%
+
+%%
+%% Supervisor for the entire Mnesia application
+
+-module(mnesia_sup).
+
+-behaviour(application).
+-behaviour(supervisor).
+
+-export([start/0, start/2, init/1, stop/1, start_event/0, kill/0]).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% application and suprvisor callback functions
+
+start(normal, Args) ->
+ SupName = {local,?MODULE},
+ case supervisor:start_link(SupName, ?MODULE, [Args]) of
+ {ok, Pid} ->
+ {ok, Pid, {normal, Args}};
+ Error ->
+ Error
+ end;
+start(_, _) ->
+ {error, badarg}.
+
+start() ->
+ SupName = {local,?MODULE},
+ supervisor:start_link(SupName, ?MODULE, []).
+
+stop(_StartArgs) ->
+ ok.
+
+init([]) -> % Supervisor
+ init();
+init([[]]) -> % Application
+ init();
+init(BadArg) ->
+ {error, {badarg, BadArg}}.
+
+init() ->
+ Flags = {one_for_all, 0, 3600}, % Should be rest_for_one policy
+
+ Event = event_procs(),
+ Kernel = kernel_procs(),
+ Mnemosyne = mnemosyne_procs(),
+
+ {ok, {Flags, Event ++ Kernel ++ Mnemosyne}}.
+
+event_procs() ->
+ KillAfter = timer:seconds(30),
+ KA = mnesia_kernel_sup:supervisor_timeout(KillAfter),
+ E = mnesia_event,
+ [{E, {?MODULE, start_event, []}, permanent, KA, worker, [E, gen_event]}].
+
+kernel_procs() ->
+ K = mnesia_kernel_sup,
+ KA = infinity,
+ [{K, {K, start, []}, permanent, KA, supervisor, [K, supervisor]}].
+
+mnemosyne_procs() ->
+ case mnesia_monitor:get_env(embedded_mnemosyne) of
+ true ->
+ Q = mnemosyne_sup,
+ KA = infinity,
+ [{Q, {Q, start, []}, permanent, KA, supervisor, [Q, supervisor]}];
+ false ->
+ []
+ end.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% event handler
+
+start_event() ->
+ case gen_event:start_link({local, mnesia_event}) of
+ {ok, Pid} ->
+ case add_event_handler() of
+ ok ->
+ {ok, Pid};
+ Error ->
+ Error
+ end;
+ Error ->
+ Error
+ end.
+
+add_event_handler() ->
+ Handler = mnesia_monitor:get_env(event_module),
+ gen_event:add_handler(mnesia_event, Handler, []).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% debug functions
+
+kill() ->
+ Mnesia = [mnesia_fallback | mnesia:ms()],
+ Kill = fun(Name) -> catch exit(whereis(Name), kill) end,
+ lists:foreach(Kill, Mnesia),
+ lists:foreach(fun ensure_dead/1, Mnesia),
+ timer:sleep(10),
+ case lists:keymember(mnesia, 1, application:which_applications()) of
+ true -> kill();
+ false -> ok
+ end.
+
+ensure_dead(Name) ->
+ case whereis(Name) of
+ undefined ->
+ ok;
+ Pid when is_pid(Pid) ->
+ exit(Pid, kill),
+ timer:sleep(10),
+ ensure_dead(Name)
+ end.
+
diff --git a/lib/mnesia/src/mnesia_text.erl b/lib/mnesia/src/mnesia_text.erl
new file mode 100644
index 0000000000..f1a28bf43d
--- /dev/null
+++ b/lib/mnesia/src/mnesia_text.erl
@@ -0,0 +1,194 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 1996-2009. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%
+
+%%
+-module(mnesia_text).
+
+-export([parse/1, file/1, load_textfile/1, dump_to_textfile/1]).
+
+load_textfile(File) ->
+ ensure_started(),
+ case parse(File) of
+ {ok, {Tabs, Data}} ->
+ Badtabs = make_tabs(lists:map(fun validate_tab/1, Tabs)),
+ load_data(del_data(Badtabs, Data, []));
+ Other ->
+ Other
+ end.
+
+dump_to_textfile(File) ->
+ dump_to_textfile(mnesia_lib:is_running(), file:open(File, [write])).
+dump_to_textfile(yes, {ok, F}) ->
+ Tabs = lists:delete(schema, mnesia_lib:local_active_tables()),
+ Defs = lists:map(fun(T) -> {T, [{record_name, mnesia_lib:val({T, record_name})},
+ {attributes, mnesia_lib:val({T, attributes})}]}
+ end,
+ Tabs),
+ io:format(F, "~p.~n", [{tables, Defs}]),
+ lists:foreach(fun(T) -> dump_tab(F, T) end, Tabs),
+ file:close(F);
+dump_to_textfile(_,_) -> error.
+
+
+dump_tab(F, T) ->
+ W = mnesia_lib:val({T, wild_pattern}),
+ {atomic,All} = mnesia:transaction(fun() -> mnesia:match_object(T, W, read) end),
+ lists:foreach(fun(Term) -> io:format(F,"~p.~n", [setelement(1, Term, T)]) end, All).
+
+
+ensure_started() ->
+ case mnesia_lib:is_running() of
+ yes ->
+ yes;
+ no ->
+ case mnesia_lib:exists(mnesia_lib:dir("schema.DAT")) of
+ true ->
+ mnesia:start();
+ false ->
+ mnesia:create_schema([node()]),
+ mnesia:start()
+ end
+ end.
+
+del_data(Bad, [H|T], Ack) ->
+ case lists:member(element(1, H), Bad) of
+ true -> del_data(Bad, T, Ack);
+ false -> del_data(Bad, T, [H|Ack])
+ end;
+del_data(_Bad, [], Ack) ->
+ lists:reverse(Ack).
+
+%% Tis the place to call the validate func in mnesia_schema
+validate_tab({Tabname, List}) ->
+ {Tabname, List};
+validate_tab({Tabname, RecName, List}) ->
+ {Tabname, RecName, List};
+validate_tab(_) -> error(badtab).
+
+make_tabs([{Tab, Def} | Tail]) ->
+ case catch mnesia:table_info(Tab, where_to_read) of
+ {'EXIT', _} -> %% non-existing table
+ case mnesia:create_table(Tab, Def) of
+ {aborted, Reason} ->
+ io:format("** Failed to create table ~w ~n"
+ "** Reason = ~w, Args = ~p~n",
+ [Tab, Reason, Def]),
+ [Tab | make_tabs(Tail)];
+ _ ->
+ io:format("New table ~w~n", [Tab]),
+ make_tabs(Tail)
+ end;
+ Node ->
+ io:format("** Table ~w already exists on ~p, just entering data~n",
+ [Tab, Node]),
+ make_tabs(Tail)
+ end;
+
+make_tabs([]) ->
+ [].
+
+load_data(L) ->
+ mnesia:transaction(fun() ->
+ F = fun(X) ->
+ Tab = element(1, X),
+ RN = mnesia:table_info(Tab, record_name),
+ Rec = setelement(1, X, RN),
+ mnesia:write(Tab, Rec, write) end,
+ lists:foreach(F, L)
+ end).
+
+parse(File) ->
+ case file(File) of
+ {ok, Terms} ->
+ case catch collect(Terms) of
+ {error, X} ->
+ {error, X};
+ Other ->
+ {ok, Other}
+ end;
+ Other ->
+ Other
+ end.
+
+collect([{_, {tables, Tabs}}|L]) ->
+ {Tabs, collect_data(Tabs, L)};
+
+collect(_) ->
+ io:format("No tables found\n", []),
+ error(bad_header).
+
+collect_data(Tabs, [{Line, Term} | Tail]) when is_tuple(Term) ->
+ case lists:keysearch(element(1, Term), 1, Tabs) of
+ {value, _} ->
+ [Term | collect_data(Tabs, Tail)];
+ _Other ->
+ io:format("Object:~p at line ~w unknown\n", [Term,Line]),
+ error(undefined_object)
+ end;
+collect_data(_Tabs, []) -> [];
+collect_data(_Tabs, [H|_T]) ->
+ io:format("Object:~p unknown\n", [H]),
+ error(undefined_object).
+
+error(What) -> throw({error, What}).
+
+file(File) ->
+ case file:open(File, [read]) of
+ {ok, Stream} ->
+ Res = read_terms(Stream, File, 1, []),
+ file:close(Stream),
+ Res;
+ _Other ->
+ {error, open}
+ end.
+
+read_terms(Stream, File, Line, L) ->
+ case read_term_from_stream(Stream, File, Line) of
+ {ok, Term, NextLine} ->
+ read_terms(Stream, File, NextLine, [Term|L]);
+ error ->
+ {error, read};
+ eof ->
+ {ok, lists:reverse(L)}
+ end.
+
+read_term_from_stream(Stream, File, Line) ->
+ R = io:request(Stream, {get_until,'',erl_scan,tokens,[Line]}),
+ case R of
+ {ok,Toks,EndLine} ->
+ case erl_parse:parse_term(Toks) of
+ {ok, Term} ->
+ {ok, {Line, Term}, EndLine};
+ {error, {NewLine,Mod,What}} ->
+ Str = Mod:format_error(What),
+ io:format("Error in line:~p of:~p ~s\n",
+ [NewLine, File, Str]),
+ error;
+ T ->
+ io:format("Error2 **~p~n",[T]),
+ error
+ end;
+ {eof,_EndLine} ->
+ eof;
+ Other ->
+ io:format("Error1 **~p~n",[Other]),
+ error
+ end.
+
+
diff --git a/lib/mnesia/src/mnesia_tm.erl b/lib/mnesia/src/mnesia_tm.erl
new file mode 100644
index 0000000000..3f3a10a9c1
--- /dev/null
+++ b/lib/mnesia/src/mnesia_tm.erl
@@ -0,0 +1,2301 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 1996-2009. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%
+
+%%
+-module(mnesia_tm).
+
+-export([
+ start/0,
+ init/1,
+ non_transaction/5,
+ transaction/6,
+ commit_participant/5,
+ dirty/2,
+ display_info/2,
+ do_update_op/3,
+ get_info/1,
+ get_transactions/0,
+ info/1,
+ mnesia_down/1,
+ prepare_checkpoint/2,
+ prepare_checkpoint/1, % Internal
+ prepare_snmp/3,
+ do_snmp/2,
+ put_activity_id/1,
+ put_activity_id/2,
+ block_tab/1,
+ unblock_tab/1,
+ fixtable/3
+ ]).
+
+%% sys callback functions
+-export([system_continue/3,
+ system_terminate/4,
+ system_code_change/4
+ ]).
+
+-include("mnesia.hrl").
+-import(mnesia_lib, [set/2]).
+-import(mnesia_lib, [fatal/2, verbose/2, dbg_out/2]).
+
+-record(state, {coordinators = gb_trees:empty(), participants = gb_trees:empty(), supervisor,
+ blocked_tabs = [], dirty_queue = [], fixed_tabs = []}).
+%% Format on coordinators is [{Tid, EtsTabList} .....
+
+-record(prep, {protocol = sym_trans,
+ %% async_dirty | sync_dirty | sym_trans | sync_sym_trans | asym_trans
+ records = [],
+ prev_tab = [], % initiate to a non valid table name
+ prev_types,
+ prev_snmp,
+ types
+ }).
+
+-record(participant, {tid, pid, commit, disc_nodes = [],
+ ram_nodes = [], protocol = sym_trans}).
+
+start() ->
+ mnesia_monitor:start_proc(?MODULE, ?MODULE, init, [self()]).
+
+init(Parent) ->
+ register(?MODULE, self()),
+ process_flag(trap_exit, true),
+
+ %% Initialize the schema
+ IgnoreFallback = mnesia_monitor:get_env(ignore_fallback_at_startup),
+ mnesia_bup:tm_fallback_start(IgnoreFallback),
+ mnesia_schema:init(IgnoreFallback),
+
+ %% Handshake and initialize transaction recovery
+ mnesia_recover:init(),
+ Early = mnesia_monitor:init(),
+ AllOthers = mnesia_lib:uniq(Early ++ mnesia_lib:all_nodes()) -- [node()],
+ set(original_nodes, AllOthers),
+ mnesia_recover:connect_nodes(AllOthers),
+
+ %% Recover transactions, may wait for decision
+ case mnesia_monitor:use_dir() of
+ true ->
+ P = mnesia_dumper:opt_dump_log(startup), % previous log
+ L = mnesia_dumper:opt_dump_log(startup), % latest log
+ Msg = "Initial dump of log during startup: ~p~n",
+ mnesia_lib:verbose(Msg, [[P, L]]),
+ mnesia_log:init();
+ false ->
+ ignore
+ end,
+
+ mnesia_schema:purge_tmp_files(),
+ mnesia_recover:start_garb(),
+
+ ?eval_debug_fun({?MODULE, init}, [{nodes, AllOthers}]),
+
+ case val(debug) of
+ Debug when Debug /= debug, Debug /= trace ->
+ ignore;
+ _ ->
+ mnesia_subscr:subscribe(whereis(mnesia_event), {table, schema})
+ end,
+ proc_lib:init_ack(Parent, {ok, self()}),
+ doit_loop(#state{supervisor = Parent}).
+
+val(Var) ->
+ case ?catch_val(Var) of
+ {'EXIT', _ReASoN_} -> mnesia_lib:other_val(Var, _ReASoN_);
+ _VaLuE_ -> _VaLuE_
+ end.
+
+reply({From,Ref}, R) ->
+ From ! {?MODULE, Ref, R};
+reply(From, R) ->
+ From ! {?MODULE, node(), R}.
+
+reply(From, R, State) ->
+ reply(From, R),
+ doit_loop(State).
+
+req(R) ->
+ case whereis(?MODULE) of
+ undefined ->
+ {error, {node_not_running, node()}};
+ Pid ->
+ Ref = make_ref(),
+ Pid ! {{self(), Ref}, R},
+ rec(Pid, Ref)
+ end.
+
+rec() ->
+ rec(whereis(?MODULE)).
+
+rec(Pid) when is_pid(Pid) ->
+ receive
+ {?MODULE, _, Reply} ->
+ Reply;
+
+ {'EXIT', Pid, _} ->
+ {error, {node_not_running, node()}}
+ end;
+rec(undefined) ->
+ {error, {node_not_running, node()}}.
+
+rec(Pid, Ref) ->
+ receive
+ {?MODULE, Ref, Reply} ->
+ Reply;
+ {'EXIT', Pid, _} ->
+ {error, {node_not_running, node()}}
+ end.
+
+tmlink({From, Ref}) when is_reference(Ref) ->
+ link(From);
+tmlink(From) ->
+ link(From).
+tmpid({Pid, _Ref}) when is_pid(Pid) ->
+ Pid;
+tmpid(Pid) ->
+ Pid.
+
+%% Returns a list of participant transaction Tid's
+mnesia_down(Node) ->
+ %% Syncronously call needed in order to avoid
+ %% race with mnesia_tm's coordinator processes
+ %% that may restart and acquire new locks.
+ %% mnesia_monitor takes care of the sync
+ case whereis(?MODULE) of
+ undefined ->
+ mnesia_monitor:mnesia_down(?MODULE, {Node, []});
+ Pid ->
+ Pid ! {mnesia_down, Node}
+ end.
+
+prepare_checkpoint(Nodes, Cp) ->
+ rpc:multicall(Nodes, ?MODULE, prepare_checkpoint, [Cp]).
+
+prepare_checkpoint(Cp) ->
+ req({prepare_checkpoint,Cp}).
+
+block_tab(Tab) ->
+ req({block_tab, Tab}).
+
+unblock_tab(Tab) ->
+ req({unblock_tab, Tab}).
+
+doit_loop(#state{coordinators=Coordinators,participants=Participants,supervisor=Sup}=State) ->
+ receive
+ {_From, {async_dirty, Tid, Commit, Tab}} ->
+ case lists:member(Tab, State#state.blocked_tabs) of
+ false ->
+ do_async_dirty(Tid, Commit, Tab),
+ doit_loop(State);
+ true ->
+ Item = {async_dirty, Tid, Commit, Tab},
+ State2 = State#state{dirty_queue = [Item | State#state.dirty_queue]},
+ doit_loop(State2)
+ end;
+
+ {From, {sync_dirty, Tid, Commit, Tab}} ->
+ case lists:member(Tab, State#state.blocked_tabs) of
+ false ->
+ do_sync_dirty(From, Tid, Commit, Tab),
+ doit_loop(State);
+ true ->
+ Item = {sync_dirty, From, Tid, Commit, Tab},
+ State2 = State#state{dirty_queue = [Item | State#state.dirty_queue]},
+ doit_loop(State2)
+ end;
+
+ {From, start_outer} -> %% Create and associate ets_tab with Tid
+ case catch ?ets_new_table(mnesia_trans_store, [bag, public]) of
+ {'EXIT', Reason} -> %% system limit
+ Msg = "Cannot create an ets table for the "
+ "local transaction store",
+ reply(From, {error, {system_limit, Msg, Reason}}, State);
+ Etab ->
+ tmlink(From),
+ C = mnesia_recover:incr_trans_tid_serial(),
+ ?ets_insert(Etab, {nodes, node()}),
+ Tid = #tid{pid = tmpid(From), counter = C},
+ A2 = gb_trees:insert(Tid,[Etab],Coordinators),
+ S2 = State#state{coordinators = A2},
+ reply(From, {new_tid, Tid, Etab}, S2)
+ end;
+
+ {From, {ask_commit, Protocol, Tid, Commit, DiscNs, RamNs}} ->
+ ?eval_debug_fun({?MODULE, doit_ask_commit},
+ [{tid, Tid}, {prot, Protocol}]),
+ mnesia_checkpoint:tm_enter_pending(Tid, DiscNs, RamNs),
+ Pid =
+ case Protocol of
+ asym_trans when node(Tid#tid.pid) /= node() ->
+ Args = [tmpid(From), Tid, Commit, DiscNs, RamNs],
+ spawn_link(?MODULE, commit_participant, Args);
+ _ when node(Tid#tid.pid) /= node() -> %% *_sym_trans
+ reply(From, {vote_yes, Tid}),
+ nopid
+ end,
+ P = #participant{tid = Tid,
+ pid = Pid,
+ commit = Commit,
+ disc_nodes = DiscNs,
+ ram_nodes = RamNs,
+ protocol = Protocol},
+ State2 = State#state{participants = gb_trees:insert(Tid,P,Participants)},
+ doit_loop(State2);
+
+ {Tid, do_commit} ->
+ case gb_trees:lookup(Tid, Participants) of
+ none ->
+ verbose("Tried to commit a non participant transaction ~p~n",[Tid]),
+ doit_loop(State);
+ {value, P} ->
+ ?eval_debug_fun({?MODULE,do_commit,pre},[{tid,Tid},{participant,P}]),
+ case P#participant.pid of
+ nopid ->
+ Commit = P#participant.commit,
+ Member = lists:member(node(), P#participant.disc_nodes),
+ if Member == false ->
+ ignore;
+ P#participant.protocol == sym_trans ->
+ mnesia_log:log(Commit);
+ P#participant.protocol == sync_sym_trans ->
+ mnesia_log:slog(Commit)
+ end,
+ mnesia_recover:note_decision(Tid, committed),
+ do_commit(Tid, Commit),
+ if
+ P#participant.protocol == sync_sym_trans ->
+ Tid#tid.pid ! {?MODULE, node(), {committed, Tid}};
+ true ->
+ ignore
+ end,
+ mnesia_locker:release_tid(Tid),
+ transaction_terminated(Tid),
+ ?eval_debug_fun({?MODULE,do_commit,post},[{tid,Tid},{pid,nopid}]),
+ doit_loop(State#state{participants=
+ gb_trees:delete(Tid,Participants)});
+ Pid when is_pid(Pid) ->
+ Pid ! {Tid, committed},
+ ?eval_debug_fun({?MODULE, do_commit, post}, [{tid, Tid}, {pid, Pid}]),
+ doit_loop(State)
+ end
+ end;
+
+ {Tid, simple_commit} ->
+ mnesia_recover:note_decision(Tid, committed),
+ mnesia_locker:release_tid(Tid),
+ transaction_terminated(Tid),
+ doit_loop(State);
+
+ {Tid, {do_abort, Reason}} ->
+ ?eval_debug_fun({?MODULE, do_abort, pre}, [{tid, Tid}]),
+ case gb_trees:lookup(Tid, Participants) of
+ none ->
+ verbose("Tried to abort a non participant transaction ~p: ~p~n",
+ [Tid, Reason]),
+ mnesia_locker:release_tid(Tid),
+ doit_loop(State);
+ {value, P} ->
+ case P#participant.pid of
+ nopid ->
+ Commit = P#participant.commit,
+ mnesia_recover:note_decision(Tid, aborted),
+ do_abort(Tid, Commit),
+ if
+ P#participant.protocol == sync_sym_trans ->
+ Tid#tid.pid ! {?MODULE, node(), {aborted, Tid}};
+ true ->
+ ignore
+ end,
+ transaction_terminated(Tid),
+ mnesia_locker:release_tid(Tid),
+ ?eval_debug_fun({?MODULE, do_abort, post}, [{tid, Tid}, {pid, nopid}]),
+ doit_loop(State#state{participants=
+ gb_trees:delete(Tid,Participants)});
+ Pid when is_pid(Pid) ->
+ Pid ! {Tid, {do_abort, Reason}},
+ ?eval_debug_fun({?MODULE, do_abort, post},
+ [{tid, Tid}, {pid, Pid}]),
+ doit_loop(State)
+ end
+ end;
+
+ {From, {add_store, Tid}} -> %% new store for nested transaction
+ case catch ?ets_new_table(mnesia_trans_store, [bag, public]) of
+ {'EXIT', Reason} -> %% system limit
+ Msg = "Cannot create an ets table for a nested "
+ "local transaction store",
+ reply(From, {error, {system_limit, Msg, Reason}}, State);
+ Etab ->
+ A2 = add_coord_store(Coordinators, Tid, Etab),
+ reply(From, {new_store, Etab},
+ State#state{coordinators = A2})
+ end;
+
+ {From, {del_store, Tid, Current, Obsolete, PropagateStore}} ->
+ opt_propagate_store(Current, Obsolete, PropagateStore),
+ A2 = del_coord_store(Coordinators, Tid, Current, Obsolete),
+ reply(From, store_erased, State#state{coordinators = A2});
+
+ {'EXIT', Pid, Reason} ->
+ handle_exit(Pid, Reason, State);
+
+ {From, {restart, Tid, Store}} ->
+ A2 = restore_stores(Coordinators, Tid, Store),
+ clear_fixtable([Store]),
+ ?ets_match_delete(Store, '_'),
+ ?ets_insert(Store, {nodes, node()}),
+ reply(From, {restarted, Tid}, State#state{coordinators = A2});
+
+ {delete_transaction, Tid} ->
+ %% used to clear transactions which are committed
+ %% in coordinator or participant processes
+ case gb_trees:is_defined(Tid, Participants) of
+ false ->
+ case gb_trees:lookup(Tid, Coordinators) of
+ none ->
+ verbose("** ERROR ** Tried to delete a non transaction ~p~n",
+ [Tid]),
+ doit_loop(State);
+ {value, Etabs} ->
+ clear_fixtable(Etabs),
+ erase_ets_tabs(Etabs),
+ transaction_terminated(Tid),
+ doit_loop(State#state{coordinators =
+ gb_trees:delete(Tid,Coordinators)})
+ end;
+ true ->
+ transaction_terminated(Tid),
+ State2 = State#state{participants=gb_trees:delete(Tid,Participants)},
+ doit_loop(State2)
+ end;
+
+ {sync_trans_serial, Tid} ->
+ %% Do the Lamport thing here
+ mnesia_recover:sync_trans_tid_serial(Tid),
+ doit_loop(State);
+
+ {From, info} ->
+ reply(From, {info, gb_trees:values(Participants),
+ gb_trees:to_list(Coordinators)}, State);
+
+ {mnesia_down, N} ->
+ verbose("Got mnesia_down from ~p, reconfiguring...~n", [N]),
+ reconfigure_coordinators(N, gb_trees:to_list(Coordinators)),
+
+ Tids = gb_trees:keys(Participants),
+ reconfigure_participants(N, gb_trees:values(Participants)),
+ NewState = clear_fixtable(N, State),
+ mnesia_monitor:mnesia_down(?MODULE, {N, Tids}),
+ doit_loop(NewState);
+
+ {From, {unblock_me, Tab}} ->
+ case lists:member(Tab, State#state.blocked_tabs) of
+ false ->
+ verbose("Wrong dirty Op blocked on ~p ~p ~p",
+ [node(), Tab, From]),
+ reply(From, unblocked),
+ doit_loop(State);
+ true ->
+ Item = {Tab, unblock_me, From},
+ State2 = State#state{dirty_queue = [Item | State#state.dirty_queue]},
+ doit_loop(State2)
+ end;
+
+ {From, {block_tab, Tab}} ->
+ State2 = State#state{blocked_tabs = [Tab | State#state.blocked_tabs]},
+ reply(From, ok, State2);
+
+ {From, {unblock_tab, Tab}} ->
+ BlockedTabs2 = State#state.blocked_tabs -- [Tab],
+ case lists:member(Tab, BlockedTabs2) of
+ false ->
+ mnesia_controller:unblock_table(Tab),
+ Queue = process_dirty_queue(Tab, State#state.dirty_queue),
+ State2 = State#state{blocked_tabs = BlockedTabs2,
+ dirty_queue = Queue},
+ reply(From, ok, State2);
+ true ->
+ State2 = State#state{blocked_tabs = BlockedTabs2},
+ reply(From, ok, State2)
+ end;
+
+ {From, {prepare_checkpoint, Cp}} ->
+ Res = mnesia_checkpoint:tm_prepare(Cp),
+ case Res of
+ {ok, _Name, IgnoreNew, _Node} ->
+ prepare_pending_coordinators(gb_trees:to_list(Coordinators), IgnoreNew),
+ prepare_pending_participants(gb_trees:values(Participants), IgnoreNew);
+ {error, _Reason} ->
+ ignore
+ end,
+ reply(From, Res, State);
+ {From, {fixtable, [Tab,Lock,Requester]}} ->
+ case ?catch_val({Tab, storage_type}) of
+ {'EXIT', _} ->
+ reply(From, error, State);
+ Storage ->
+ mnesia_lib:db_fixtable(Storage,Tab,Lock),
+ NewState = manage_fixtable(Tab,Lock,Requester,State),
+ reply(From, node(), NewState)
+ end;
+
+ {system, From, Msg} ->
+ dbg_out("~p got {system, ~p, ~p}~n", [?MODULE, From, Msg]),
+ sys:handle_system_msg(Msg, From, Sup, ?MODULE, [], State);
+
+ Msg ->
+ verbose("** ERROR ** ~p got unexpected message: ~p~n", [?MODULE, Msg]),
+ doit_loop(State)
+ end.
+
+do_sync_dirty(From, Tid, Commit, _Tab) ->
+ ?eval_debug_fun({?MODULE, sync_dirty, pre}, [{tid, Tid}]),
+ Res = (catch do_dirty(Tid, Commit)),
+ ?eval_debug_fun({?MODULE, sync_dirty, post}, [{tid, Tid}]),
+ From ! {?MODULE, node(), {dirty_res, Res}}.
+
+do_async_dirty(Tid, Commit, _Tab) ->
+ ?eval_debug_fun({?MODULE, async_dirty, pre}, [{tid, Tid}]),
+ catch do_dirty(Tid, Commit),
+ ?eval_debug_fun({?MODULE, async_dirty, post}, [{tid, Tid}]).
+
+
+%% Process items in fifo order
+process_dirty_queue(Tab, [Item | Queue]) ->
+ Queue2 = process_dirty_queue(Tab, Queue),
+ case Item of
+ {async_dirty, Tid, Commit, Tab} ->
+ do_async_dirty(Tid, Commit, Tab),
+ Queue2;
+ {sync_dirty, From, Tid, Commit, Tab} ->
+ do_sync_dirty(From, Tid, Commit, Tab),
+ Queue2;
+ {Tab, unblock_me, From} ->
+ reply(From, unblocked),
+ Queue2;
+ _ ->
+ [Item | Queue2]
+ end;
+process_dirty_queue(_Tab, []) ->
+ [].
+
+prepare_pending_coordinators([{Tid, [Store | _Etabs]} | Coords], IgnoreNew) ->
+ case catch ?ets_lookup(Store, pending) of
+ [] ->
+ prepare_pending_coordinators(Coords, IgnoreNew);
+ [Pending] ->
+ case lists:member(Tid, IgnoreNew) of
+ false ->
+ mnesia_checkpoint:tm_enter_pending(Pending);
+ true ->
+ ignore
+ end,
+ prepare_pending_coordinators(Coords, IgnoreNew);
+ {'EXIT', _} ->
+ prepare_pending_coordinators(Coords, IgnoreNew)
+ end;
+prepare_pending_coordinators([], _IgnoreNew) ->
+ ok.
+
+prepare_pending_participants([Part | Parts], IgnoreNew) ->
+ Tid = Part#participant.tid,
+ D = Part#participant.disc_nodes,
+ R = Part#participant.ram_nodes,
+ case lists:member(Tid, IgnoreNew) of
+ false ->
+ mnesia_checkpoint:tm_enter_pending(Tid, D, R);
+ true ->
+ ignore
+ end,
+ prepare_pending_participants(Parts, IgnoreNew);
+prepare_pending_participants([], _IgnoreNew) ->
+ ok.
+
+handle_exit(Pid, _Reason, State) when node(Pid) /= node() ->
+ %% We got exit from a remote fool
+ doit_loop(State);
+
+handle_exit(Pid, _Reason, State) when Pid == State#state.supervisor ->
+ %% Our supervisor has died, time to stop
+ do_stop(State);
+
+handle_exit(Pid, Reason, State) ->
+ %% Check if it is a coordinator
+ case pid_search_delete(Pid, gb_trees:to_list(State#state.coordinators)) of
+ {none, _} ->
+ %% Check if it is a participant
+ Ps = gb_trees:values(State#state.participants),
+ case mnesia_lib:key_search_delete(Pid,#participant.pid,Ps) of
+ {none, _} ->
+ %% We got exit from a local fool
+ doit_loop(State);
+ {P = #participant{}, _RestP} ->
+ fatal("Participant ~p in transaction ~p died ~p~n",
+ [P#participant.pid, P#participant.tid, Reason]),
+ NewPs = gb_trees:delete(P#participant.tid,State#state.participants),
+ doit_loop(State#state{participants = NewPs})
+ end;
+
+ {{Tid, Etabs}, RestC} ->
+ %% A local coordinator has died and
+ %% we must determine the outcome of the
+ %% transaction and tell mnesia_tm on the
+ %% other nodes about it and then recover
+ %% locally.
+ recover_coordinator(Tid, Etabs),
+ doit_loop(State#state{coordinators = RestC})
+ end.
+
+recover_coordinator(Tid, Etabs) ->
+ verbose("Coordinator ~p in transaction ~p died.~n", [Tid#tid.pid, Tid]),
+
+ Store = hd(Etabs),
+ CheckNodes = get_elements(nodes,Store),
+ TellNodes = CheckNodes -- [node()],
+ case catch arrange(Tid, Store, async) of
+ {'EXIT', Reason} ->
+ dbg_out("Recovery of coordinator ~p failed:~n", [Tid, Reason]),
+ Protocol = asym_trans,
+ tell_outcome(Tid, Protocol, node(), CheckNodes, TellNodes);
+ {_N, Prep} ->
+ %% Tell the participants about the outcome
+ Protocol = Prep#prep.protocol,
+ Outcome = tell_outcome(Tid, Protocol, node(), CheckNodes, TellNodes),
+
+ %% Recover locally
+ CR = Prep#prep.records,
+ {DiscNs, RamNs} = commit_nodes(CR, [], []),
+ case lists:keysearch(node(), #commit.node, CR) of
+ {value, Local} ->
+ ?eval_debug_fun({?MODULE, recover_coordinator, pre},
+ [{tid, Tid}, {outcome, Outcome}, {prot, Protocol}]),
+ recover_coordinator(Tid, Protocol, Outcome, Local, DiscNs, RamNs),
+ ?eval_debug_fun({?MODULE, recover_coordinator, post},
+ [{tid, Tid}, {outcome, Outcome}, {prot, Protocol}]);
+ false -> %% When killed before store havn't been copied to
+ ok %% to the new nested trans store.
+ end
+ end,
+ erase_ets_tabs(Etabs),
+ transaction_terminated(Tid),
+ mnesia_locker:release_tid(Tid).
+
+recover_coordinator(Tid, sym_trans, committed, Local, _, _) ->
+ mnesia_recover:note_decision(Tid, committed),
+ do_dirty(Tid, Local);
+recover_coordinator(Tid, sym_trans, aborted, _Local, _, _) ->
+ mnesia_recover:note_decision(Tid, aborted);
+recover_coordinator(Tid, sync_sym_trans, committed, Local, _, _) ->
+ mnesia_recover:note_decision(Tid, committed),
+ do_dirty(Tid, Local);
+recover_coordinator(Tid, sync_sym_trans, aborted, _Local, _, _) ->
+ mnesia_recover:note_decision(Tid, aborted);
+
+recover_coordinator(Tid, asym_trans, committed, Local, DiscNs, RamNs) ->
+ D = #decision{tid = Tid, outcome = committed,
+ disc_nodes = DiscNs, ram_nodes = RamNs},
+ mnesia_recover:log_decision(D),
+ do_commit(Tid, Local);
+recover_coordinator(Tid, asym_trans, aborted, Local, DiscNs, RamNs) ->
+ D = #decision{tid = Tid, outcome = aborted,
+ disc_nodes = DiscNs, ram_nodes = RamNs},
+ mnesia_recover:log_decision(D),
+ do_abort(Tid, Local).
+
+restore_stores(Coords, Tid, Store) ->
+ Etstabs = gb_trees:get(Tid,Coords),
+ Remaining = lists:delete(Store, Etstabs),
+ erase_ets_tabs(Remaining),
+ gb_trees:update(Tid,[Store],Coords).
+
+add_coord_store(Coords, Tid, Etab) ->
+ Stores = gb_trees:get(Tid, Coords),
+ gb_trees:update(Tid, [Etab|Stores], Coords).
+
+del_coord_store(Coords, Tid, Current, Obsolete) ->
+ Stores = gb_trees:get(Tid, Coords),
+ Rest =
+ case Stores of
+ [Obsolete, Current | Tail] -> Tail;
+ [Current, Obsolete | Tail] -> Tail
+ end,
+ ?ets_delete_table(Obsolete),
+ gb_trees:update(Tid, [Current|Rest], Coords).
+
+erase_ets_tabs([H | T]) ->
+ ?ets_delete_table(H),
+ erase_ets_tabs(T);
+erase_ets_tabs([]) ->
+ ok.
+
+%% Clear one transactions all fixtables
+clear_fixtable([Store|_]) ->
+ Fixed = get_elements(fixtable, Store),
+ lists:foreach(fun({Tab,Node}) ->
+ rpc:cast(Node, ?MODULE, fixtable, [Tab,false,self()])
+ end, Fixed).
+
+%% Clear all fixtable Node have done
+clear_fixtable(Node, State=#state{fixed_tabs = FT0}) ->
+ case mnesia_lib:key_search_delete(Node, 1, FT0) of
+ {none, _Ft} ->
+ State;
+ {{Node,Tabs},FT} ->
+ lists:foreach(
+ fun(Tab) ->
+ case ?catch_val({Tab, storage_type}) of
+ {'EXIT', _} ->
+ ignore;
+ Storage ->
+ mnesia_lib:db_fixtable(Storage,Tab,false)
+ end
+ end, Tabs),
+ State#state{fixed_tabs=FT}
+ end.
+
+manage_fixtable(Tab,true,Requester,State=#state{fixed_tabs = FT0}) ->
+ Node = node(Requester),
+ case mnesia_lib:key_search_delete(Node, 1, FT0) of
+ {none, FT}->
+ State#state{fixed_tabs=[{Node, [Tab]}|FT]};
+ {{Node,Tabs},FT} ->
+ State#state{fixed_tabs=[{Node, [Tab|Tabs]}|FT]}
+ end;
+manage_fixtable(Tab,false,Requester,State = #state{fixed_tabs = FT0}) ->
+ Node = node(Requester),
+ case mnesia_lib:key_search_delete(Node, 1, FT0) of
+ {none,_FT} -> State; % Hmm? Safeguard
+ {{Node, Tabs0},FT} ->
+ case lists:delete(Tab, Tabs0) of
+ [] -> State#state{fixed_tabs=FT};
+ Tabs -> State#state{fixed_tabs=[{Node,Tabs}|FT]}
+ end
+ end.
+
+%% Deletes a pid from a list of participants
+%% or from a gb_trees of coordinators
+%% {none, All} or {Tr, Rest}
+pid_search_delete(Pid, Trs) ->
+ pid_search_delete(Pid, Trs, none, []).
+pid_search_delete(Pid, [Tr = {Tid, _Ts} | Trs], _Val, Ack) when Tid#tid.pid == Pid ->
+ pid_search_delete(Pid, Trs, Tr, Ack);
+pid_search_delete(Pid, [Tr | Trs], Val, Ack) ->
+ pid_search_delete(Pid, Trs, Val, [Tr | Ack]);
+
+pid_search_delete(_Pid, [], Val, Ack) ->
+ {Val, gb_trees:from_orddict(lists:reverse(Ack))}.
+
+transaction_terminated(Tid) ->
+ mnesia_checkpoint:tm_exit_pending(Tid),
+ Pid = Tid#tid.pid,
+ if
+ node(Pid) == node() ->
+ unlink(Pid);
+ true -> %% Do the Lamport thing here
+ mnesia_recover:sync_trans_tid_serial(Tid)
+ end.
+
+%% If there are an surrounding transaction, we inherit it's context
+non_transaction(OldState={_,_,Trans}, Fun, Args, ActivityKind, Mod)
+ when Trans /= non_transaction ->
+ Kind = case ActivityKind of
+ sync_dirty -> sync;
+ _ -> async
+ end,
+ case transaction(OldState, Fun, Args, infinity, Mod, Kind) of
+ {atomic, Res} ->
+ Res;
+ {aborted,Res} ->
+ exit(Res)
+ end;
+non_transaction(OldState, Fun, Args, ActivityKind, Mod) ->
+ Id = {ActivityKind, self()},
+ NewState = {Mod, Id, non_transaction},
+ put(mnesia_activity_state, NewState),
+ %% I Want something uniqe here, references are expensive
+ Ref = mNeSia_nOn_TrAnSacTioN,
+ RefRes = (catch {Ref, apply(Fun, Args)}),
+ case OldState of
+ undefined -> erase(mnesia_activity_state);
+ _ -> put(mnesia_activity_state, OldState)
+ end,
+ case RefRes of
+ {Ref, Res} ->
+ case Res of
+ {'EXIT', Reason} -> exit(Reason);
+ {aborted, Reason} -> mnesia:abort(Reason);
+ _ -> Res
+ end;
+ {'EXIT', Reason} ->
+ exit(Reason);
+ Throw ->
+ throw(Throw)
+ end.
+
+transaction(OldTidTs, Fun, Args, Retries, Mod, Type) ->
+ Factor = 1,
+ case OldTidTs of
+ undefined -> % Outer
+ execute_outer(Mod, Fun, Args, Factor, Retries, Type);
+ {_, _, non_transaction} -> % Transaction inside ?sync_dirty
+ Res = execute_outer(Mod, Fun, Args, Factor, Retries, Type),
+ put(mnesia_activity_state, OldTidTs),
+ Res;
+ {OldMod, Tid, Ts} -> % Nested
+ execute_inner(Mod, Tid, OldMod, Ts, Fun, Args, Factor, Retries, Type);
+ _ -> % Bad nesting
+ {aborted, nested_transaction}
+ end.
+
+execute_outer(Mod, Fun, Args, Factor, Retries, Type) ->
+ case req(start_outer) of
+ {error, Reason} ->
+ {aborted, Reason};
+ {new_tid, Tid, Store} ->
+ Ts = #tidstore{store = Store},
+ NewTidTs = {Mod, Tid, Ts},
+ put(mnesia_activity_state, NewTidTs),
+ execute_transaction(Fun, Args, Factor, Retries, Type)
+ end.
+
+execute_inner(Mod, Tid, OldMod, Ts, Fun, Args, Factor, Retries, Type) ->
+ case req({add_store, Tid}) of
+ {error, Reason} ->
+ {aborted, Reason};
+ {new_store, Ets} ->
+ copy_ets(Ts#tidstore.store, Ets),
+ Up = [{OldMod,Ts#tidstore.store} | Ts#tidstore.up_stores],
+ NewTs = Ts#tidstore{level = 1 + Ts#tidstore.level,
+ store = Ets,
+ up_stores = Up},
+ NewTidTs = {Mod, Tid, NewTs},
+ put(mnesia_activity_state, NewTidTs),
+ execute_transaction(Fun, Args, Factor, Retries, Type)
+ end.
+
+copy_ets(From, To) ->
+ do_copy_ets(?ets_first(From), From, To).
+do_copy_ets('$end_of_table', _,_) ->
+ ok;
+do_copy_ets(K, From, To) ->
+ Objs = ?ets_lookup(From, K),
+ insert_objs(Objs, To),
+ do_copy_ets(?ets_next(From, K), From, To).
+
+insert_objs([H|T], Tab) ->
+ ?ets_insert(Tab, H),
+ insert_objs(T, Tab);
+insert_objs([], _Tab) ->
+ ok.
+
+execute_transaction(Fun, Args, Factor, Retries, Type) ->
+ case catch apply_fun(Fun, Args, Type) of
+ {'EXIT', Reason} ->
+ check_exit(Fun, Args, Factor, Retries, Reason, Type);
+ {atomic, Value} ->
+ mnesia_lib:incr_counter(trans_commits),
+ erase(mnesia_activity_state),
+ %% no need to clear locks, already done by commit ...
+ %% Flush any un processed mnesia_down messages we might have
+ flush_downs(),
+ catch unlink(whereis(?MODULE)),
+ {atomic, Value};
+ {nested_atomic, Value} ->
+ mnesia_lib:incr_counter(trans_commits),
+ {atomic, Value};
+ Value -> %% User called throw
+ Reason = {aborted, {throw, Value}},
+ return_abort(Fun, Args, Reason)
+ end.
+
+apply_fun(Fun, Args, Type) ->
+ Result = apply(Fun, Args),
+ case t_commit(Type) of
+ do_commit ->
+ {atomic, Result};
+ do_commit_nested ->
+ {nested_atomic, Result};
+ {do_abort, {aborted, Reason}} ->
+ {'EXIT', {aborted, Reason}};
+ {do_abort, Reason} ->
+ {'EXIT', {aborted, Reason}}
+ end.
+
+check_exit(Fun, Args, Factor, Retries, Reason, Type) ->
+ case Reason of
+ {aborted, C = #cyclic{}} ->
+ maybe_restart(Fun, Args, Factor, Retries, Type, C);
+ {aborted, {node_not_running, N}} ->
+ maybe_restart(Fun, Args, Factor, Retries, Type, {node_not_running, N});
+ {aborted, {bad_commit, N}} ->
+ maybe_restart(Fun, Args, Factor, Retries, Type, {bad_commit, N});
+ _ ->
+ return_abort(Fun, Args, Reason)
+ end.
+
+maybe_restart(Fun, Args, Factor, Retries, Type, Why) ->
+ {Mod, Tid, Ts} = get(mnesia_activity_state),
+ case try_again(Retries) of
+ yes when Ts#tidstore.level == 1 ->
+ restart(Mod, Tid, Ts, Fun, Args, Factor, Retries, Type, Why);
+ yes ->
+ return_abort(Fun, Args, Why);
+ no ->
+ return_abort(Fun, Args, {aborted, nomore})
+ end.
+
+try_again(infinity) -> yes;
+try_again(X) when is_number(X) , X > 1 -> yes;
+try_again(_) -> no.
+
+%% We can only restart toplevel transactions.
+%% If a deadlock situation occurs in a nested transaction
+%% The whole thing including all nested transactions need to be
+%% restarted. The stack is thus popped by a consequtive series of
+%% exit({aborted, #cyclic{}}) calls
+
+restart(Mod, Tid, Ts, Fun, Args, Factor0, Retries0, Type, Why) ->
+ mnesia_lib:incr_counter(trans_restarts),
+ Retries = decr(Retries0),
+ case Why of
+ {bad_commit, _N} ->
+ return_abort(Fun, Args, Why),
+ Factor = 1,
+ SleepTime = mnesia_lib:random_time(Factor, Tid#tid.counter),
+ dbg_out("Restarting transaction ~w: in ~wms ~w~n", [Tid, SleepTime, Why]),
+ timer:sleep(SleepTime),
+ execute_outer(Mod, Fun, Args, Factor, Retries, Type);
+ {node_not_running, _N} -> %% Avoids hanging in receive_release_tid_ack
+ return_abort(Fun, Args, Why),
+ Factor = 1,
+ SleepTime = mnesia_lib:random_time(Factor, Tid#tid.counter),
+ dbg_out("Restarting transaction ~w: in ~wms ~w~n", [Tid, SleepTime, Why]),
+ timer:sleep(SleepTime),
+ execute_outer(Mod, Fun, Args, Factor, Retries, Type);
+ _ ->
+ SleepTime = mnesia_lib:random_time(Factor0, Tid#tid.counter),
+ dbg_out("Restarting transaction ~w: in ~wms ~w~n", [Tid, SleepTime, Why]),
+
+ if
+ Factor0 /= 10 ->
+ ignore;
+ true ->
+ %% Our serial may be much larger than other nodes ditto
+ AllNodes = val({current, db_nodes}),
+ verbose("Sync serial ~p~n", [Tid]),
+ rpc:abcast(AllNodes, ?MODULE, {sync_trans_serial, Tid})
+ end,
+ intercept_friends(Tid, Ts),
+ Store = Ts#tidstore.store,
+ Nodes = get_elements(nodes,Store),
+ ?MODULE ! {self(), {restart, Tid, Store}},
+ mnesia_locker:send_release_tid(Nodes, Tid),
+ timer:sleep(SleepTime),
+ mnesia_locker:receive_release_tid_acc(Nodes, Tid),
+ case get_restarted(Tid) of
+ {restarted, Tid} ->
+ execute_transaction(Fun, Args, Factor0 + 1,
+ Retries, Type);
+ {error, Reason} ->
+ mnesia:abort(Reason)
+ end
+ end.
+
+get_restarted(Tid) ->
+ case Res = rec() of
+ {restarted, Tid} ->
+ Res;
+ {error,_} ->
+ Res;
+ _ -> %% We could get a couple of aborts to many.
+ get_restarted(Tid)
+ end.
+
+decr(infinity) -> infinity;
+decr(X) when is_integer(X), X > 1 -> X - 1;
+decr(_X) -> 0.
+
+return_abort(Fun, Args, Reason) ->
+ {_Mod, Tid, Ts} = get(mnesia_activity_state),
+ dbg_out("Transaction ~p calling ~p with ~p failed: ~n ~p~n",
+ [Tid, Fun, Args, Reason]),
+ OldStore = Ts#tidstore.store,
+ Nodes = get_elements(nodes, OldStore),
+ intercept_friends(Tid, Ts),
+ catch mnesia_lib:incr_counter(trans_failures),
+ Level = Ts#tidstore.level,
+ if
+ Level == 1 ->
+ mnesia_locker:async_release_tid(Nodes, Tid),
+ ?MODULE ! {delete_transaction, Tid},
+ erase(mnesia_activity_state),
+ flush_downs(),
+ catch unlink(whereis(?MODULE)),
+ {aborted, mnesia_lib:fix_error(Reason)};
+ true ->
+ %% Nested transaction
+ [{OldMod,NewStore} | Tail] = Ts#tidstore.up_stores,
+ req({del_store, Tid, NewStore, OldStore, true}),
+ Ts2 = Ts#tidstore{store = NewStore,
+ up_stores = Tail,
+ level = Level - 1},
+ NewTidTs = {OldMod, Tid, Ts2},
+ put(mnesia_activity_state, NewTidTs),
+ case Reason of
+ #cyclic{} ->
+ exit({aborted, Reason});
+ {node_not_running, _N} ->
+ exit({aborted, Reason});
+ {bad_commit, _N}->
+ exit({aborted, Reason});
+ _ ->
+ {aborted, mnesia_lib:fix_error(Reason)}
+ end
+ end.
+
+flush_downs() ->
+ receive
+ {?MODULE, _, _} -> flush_downs(); % Votes
+ {mnesia_down, _} -> flush_downs()
+ after 0 -> flushed
+ end.
+
+
+put_activity_id(MTT) ->
+ put_activity_id(MTT, undefined).
+put_activity_id(undefined,_) ->
+ erase_activity_id();
+put_activity_id({Mod, Tid = #tid{}, Ts = #tidstore{}},Fun) ->
+ flush_downs(),
+ Store = Ts#tidstore.store,
+ if
+ is_function(Fun) ->
+ ?ets_insert(Store, {friends, {stop,Fun}});
+ true ->
+ ?ets_insert(Store, {friends, self()})
+ end,
+ NewTidTs = {Mod, Tid, Ts},
+ put(mnesia_activity_state, NewTidTs);
+put_activity_id(SimpleState,_) ->
+ put(mnesia_activity_state, SimpleState).
+
+erase_activity_id() ->
+ flush_downs(),
+ erase(mnesia_activity_state).
+
+get_elements(Type,Store) ->
+ case catch ?ets_lookup(Store, Type) of
+ [] -> [];
+ [{_,Val}] -> [Val];
+ {'EXIT', _} -> [];
+ Vals -> [Val|| {_,Val} <- Vals]
+ end.
+
+opt_propagate_store(_Current, _Obsolete, false) ->
+ ok;
+opt_propagate_store(Current, Obsolete, true) ->
+ propagate_store(Current, nodes, get_elements(nodes,Obsolete)),
+ propagate_store(Current, fixtable, get_elements(fixtable,Obsolete)),
+ propagate_store(Current, friends, get_elements(friends, Obsolete)).
+
+propagate_store(Store, Var, [Val | Vals]) ->
+ ?ets_insert(Store, {Var, Val}),
+ propagate_store(Store, Var, Vals);
+propagate_store(_Store, _Var, []) ->
+ ok.
+
+%% Tell all processes that are cooperating with the current transaction
+intercept_friends(_Tid, Ts) ->
+ Friends = get_elements(friends,Ts#tidstore.store),
+ intercept_best_friend(Friends, false).
+
+intercept_best_friend([],_) -> ok;
+intercept_best_friend([{stop,Fun} | R],Ignore) ->
+ catch Fun(),
+ intercept_best_friend(R,Ignore);
+intercept_best_friend([Pid | R],false) ->
+ Pid ! {activity_ended, undefined, self()},
+ wait_for_best_friend(Pid, 0),
+ intercept_best_friend(R,true);
+intercept_best_friend([_|R],true) ->
+ intercept_best_friend(R,true).
+
+wait_for_best_friend(Pid, Timeout) ->
+ receive
+ {'EXIT', Pid, _} -> ok;
+ {activity_ended, _, Pid} -> ok
+ after Timeout ->
+ case my_process_is_alive(Pid) of
+ true -> wait_for_best_friend(Pid, 1000);
+ false -> ok
+ end
+ end.
+
+my_process_is_alive(Pid) ->
+ case catch erlang:is_process_alive(Pid) of % New BIF in R5
+ true ->
+ true;
+ false ->
+ false;
+ {'EXIT', _} -> % Pre R5 backward compatibility
+ case process_info(Pid, message_queue_len) of
+ undefined -> false;
+ _ -> true
+ end
+ end.
+
+dirty(Protocol, Item) ->
+ {{Tab, Key}, _Val, _Op} = Item,
+ Tid = {dirty, self()},
+ Prep = prepare_items(Tid, Tab, Key, [Item], #prep{protocol= Protocol}),
+ CR = Prep#prep.records,
+ case Protocol of
+ async_dirty ->
+ %% Send commit records to the other involved nodes,
+ %% but do only wait for one node to complete.
+ %% Preferrably, the local node if possible.
+
+ ReadNode = val({Tab, where_to_read}),
+ {WaitFor, FirstRes} = async_send_dirty(Tid, CR, Tab, ReadNode),
+ rec_dirty(WaitFor, FirstRes);
+
+ sync_dirty ->
+ %% Send commit records to the other involved nodes,
+ %% and wait for all nodes to complete
+ {WaitFor, FirstRes} = sync_send_dirty(Tid, CR, Tab, []),
+ rec_dirty(WaitFor, FirstRes);
+ _ ->
+ mnesia:abort({bad_activity, Protocol})
+ end.
+
+%% This is the commit function, The first thing it does,
+%% is to find out which nodes that have been participating
+%% in this particular transaction, all of the mnesia_locker:lock*
+%% functions insert the names of the nodes where it aquires locks
+%% into the local shadow Store
+%% This function exacutes in the context of the user process
+t_commit(Type) ->
+ {_Mod, Tid, Ts} = get(mnesia_activity_state),
+ Store = Ts#tidstore.store,
+ if
+ Ts#tidstore.level == 1 ->
+ intercept_friends(Tid, Ts),
+ %% N is number of updates
+ case arrange(Tid, Store, Type) of
+ {N, Prep} when N > 0 ->
+ multi_commit(Prep#prep.protocol,
+ Tid, Prep#prep.records, Store);
+ {0, Prep} ->
+ multi_commit(read_only, Tid, Prep#prep.records, Store)
+ end;
+ true ->
+ %% nested commit
+ Level = Ts#tidstore.level,
+ [{OldMod,Obsolete} | Tail] = Ts#tidstore.up_stores,
+ req({del_store, Tid, Store, Obsolete, false}),
+ NewTs = Ts#tidstore{store = Store,
+ up_stores = Tail,
+ level = Level - 1},
+ NewTidTs = {OldMod, Tid, NewTs},
+ put(mnesia_activity_state, NewTidTs),
+ do_commit_nested
+ end.
+
+%% This function arranges for all objects we shall write in S to be
+%% in a list of {Node, CommitRecord}
+%% Important function for the performance of mnesia.
+
+arrange(Tid, Store, Type) ->
+ %% The local node is always included
+ Nodes = get_elements(nodes,Store),
+ Recs = prep_recs(Nodes, []),
+ Key = ?ets_first(Store),
+ N = 0,
+ Prep =
+ case Type of
+ async -> #prep{protocol = sym_trans, records = Recs};
+ sync -> #prep{protocol = sync_sym_trans, records = Recs}
+ end,
+ case catch do_arrange(Tid, Store, Key, Prep, N) of
+ {'EXIT', Reason} ->
+ dbg_out("do_arrange failed ~p ~p~n", [Reason, Tid]),
+ case Reason of
+ {aborted, R} ->
+ mnesia:abort(R);
+ _ ->
+ mnesia:abort(Reason)
+ end;
+ {New, Prepared} ->
+ {New, Prepared#prep{records = reverse(Prepared#prep.records)}}
+ end.
+
+reverse([]) ->
+ [];
+reverse([H=#commit{ram_copies=Ram, disc_copies=DC,
+ disc_only_copies=DOC,snmp = Snmp}
+ |R]) ->
+ [
+ H#commit{
+ ram_copies = lists:reverse(Ram),
+ disc_copies = lists:reverse(DC),
+ disc_only_copies = lists:reverse(DOC),
+ snmp = lists:reverse(Snmp)
+ }
+ | reverse(R)].
+
+prep_recs([N | Nodes], Recs) ->
+ prep_recs(Nodes, [#commit{decision = presume_commit, node = N} | Recs]);
+prep_recs([], Recs) ->
+ Recs.
+
+%% storage_types is a list of {Node, Storage} tuples
+%% where each tuple represents an active replica
+do_arrange(Tid, Store, {Tab, Key}, Prep, N) ->
+ Oid = {Tab, Key},
+ Items = ?ets_lookup(Store, Oid), %% Store is a bag
+ P2 = prepare_items(Tid, Tab, Key, Items, Prep),
+ do_arrange(Tid, Store, ?ets_next(Store, Oid), P2, N + 1);
+do_arrange(Tid, Store, SchemaKey, Prep, N) when SchemaKey == op ->
+ Items = ?ets_lookup(Store, SchemaKey), %% Store is a bag
+ P2 = prepare_schema_items(Tid, Items, Prep),
+ do_arrange(Tid, Store, ?ets_next(Store, SchemaKey), P2, N + 1);
+do_arrange(Tid, Store, RestoreKey, Prep, N) when RestoreKey == restore_op ->
+ [{restore_op, R}] = ?ets_lookup(Store, RestoreKey),
+ Fun = fun({Tab, Key}, CommitRecs, _RecName, Where, Snmp) ->
+ Item = [{{Tab, Key}, {Tab, Key}, delete}],
+ do_prepare_items(Tid, Tab, Key, Where, Snmp, Item, CommitRecs);
+ (BupRec, CommitRecs, RecName, Where, Snmp) ->
+ Tab = element(1, BupRec),
+ Key = element(2, BupRec),
+ Item =
+ if
+ Tab == RecName ->
+ [{{Tab, Key}, BupRec, write}];
+ true ->
+ BupRec2 = setelement(1, BupRec, RecName),
+ [{{Tab, Key}, BupRec2, write}]
+ end,
+ do_prepare_items(Tid, Tab, Key, Where, Snmp, Item, CommitRecs)
+ end,
+ Recs2 = mnesia_schema:arrange_restore(R, Fun, Prep#prep.records),
+ P2 = Prep#prep{protocol = asym_trans, records = Recs2},
+ do_arrange(Tid, Store, ?ets_next(Store, RestoreKey), P2, N + 1);
+do_arrange(_Tid, _Store, '$end_of_table', Prep, N) ->
+ {N, Prep};
+do_arrange(Tid, Store, IgnoredKey, Prep, N) -> %% locks, nodes ... local atoms...
+ do_arrange(Tid, Store, ?ets_next(Store, IgnoredKey), Prep, N).
+
+%% Returns a prep record with all items in reverse order
+prepare_schema_items(Tid, Items, Prep) ->
+ Types = [{N, schema_ops} || N <- val({current, db_nodes})],
+ Recs = prepare_nodes(Tid, Types, Items, Prep#prep.records, schema),
+ Prep#prep{protocol = asym_trans, records = Recs}.
+
+%% Returns a prep record with all items in reverse order
+prepare_items(Tid, Tab, Key, Items, Prep) when Prep#prep.prev_tab == Tab ->
+ Types = Prep#prep.prev_types,
+ Snmp = Prep#prep.prev_snmp,
+ Recs = Prep#prep.records,
+ Recs2 = do_prepare_items(Tid, Tab, Key, Types, Snmp, Items, Recs),
+ Prep#prep{records = Recs2};
+
+prepare_items(Tid, Tab, Key, Items, Prep) ->
+ Types = val({Tab, where_to_commit}),
+ case Types of
+ [] -> mnesia:abort({no_exists, Tab});
+ {blocked, _} ->
+ unblocked = req({unblock_me, Tab}),
+ prepare_items(Tid, Tab, Key, Items, Prep);
+ _ ->
+ Snmp = val({Tab, snmp}),
+ Recs2 = do_prepare_items(Tid, Tab, Key, Types,
+ Snmp, Items, Prep#prep.records),
+ Prep2 = Prep#prep{records = Recs2, prev_tab = Tab,
+ prev_types = Types, prev_snmp = Snmp},
+ check_prep(Prep2, Types)
+ end.
+
+do_prepare_items(Tid, Tab, Key, Types, Snmp, Items, Recs) ->
+ Recs2 = prepare_snmp(Tid, Tab, Key, Types, Snmp, Items, Recs), % May exit
+ prepare_nodes(Tid, Types, Items, Recs2, normal).
+
+prepare_snmp(Tab, Key, Items) ->
+ case val({Tab, snmp}) of
+ [] ->
+ [];
+ Ustruct when Key /= '_' ->
+ {_Oid, _Val, Op} = hd(Items),
+ %% Still making snmp oid (not used) because we want to catch errors here
+ %% And also it keeps backwards comp. with old nodes.
+ SnmpOid = mnesia_snmp_hook:key_to_oid(Tab, Key, Ustruct), % May exit
+ [{Op, Tab, Key, SnmpOid}];
+ _ ->
+ [{clear_table, Tab}]
+ end.
+
+prepare_snmp(_Tid, _Tab, _Key, _Types, [], _Items, Recs) ->
+ Recs;
+
+prepare_snmp(Tid, Tab, Key, Types, Us, Items, Recs) ->
+ if Key /= '_' ->
+ {_Oid, _Val, Op} = hd(Items),
+ SnmpOid = mnesia_snmp_hook:key_to_oid(Tab, Key, Us), % May exit
+ prepare_nodes(Tid, Types, [{Op, Tab, Key, SnmpOid}], Recs, snmp);
+ Key == '_' ->
+ prepare_nodes(Tid, Types, [{clear_table, Tab}], Recs, snmp)
+ end.
+
+check_prep(Prep, Types) when Prep#prep.types == Types ->
+ Prep;
+check_prep(Prep, Types) when Prep#prep.types == undefined ->
+ Prep#prep{types = Types};
+check_prep(Prep, _Types) ->
+ Prep#prep{protocol = asym_trans}.
+
+%% Returns a list of commit records
+prepare_nodes(Tid, [{Node, Storage} | Rest], Items, C, Kind) ->
+ {Rec, C2} = pick_node(Tid, Node, C, []),
+ Rec2 = prepare_node(Node, Storage, Items, Rec, Kind),
+ [Rec2 | prepare_nodes(Tid, Rest, Items, C2, Kind)];
+prepare_nodes(_Tid, [], _Items, CommitRecords, _Kind) ->
+ CommitRecords.
+
+pick_node(Tid, Node, [Rec | Rest], Done) ->
+ if
+ Rec#commit.node == Node ->
+ {Rec, Done ++ Rest};
+ true ->
+ pick_node(Tid, Node, Rest, [Rec | Done])
+ end;
+pick_node({dirty,_}, Node, [], Done) ->
+ {#commit{decision = presume_commit, node = Node}, Done};
+pick_node(_Tid, Node, [], _Done) ->
+ mnesia:abort({bad_commit, {missing_lock, Node}}).
+
+prepare_node(Node, Storage, [Item | Items], Rec, Kind) when Kind == snmp ->
+ Rec2 = Rec#commit{snmp = [Item | Rec#commit.snmp]},
+ prepare_node(Node, Storage, Items, Rec2, Kind);
+prepare_node(Node, Storage, [Item | Items], Rec, Kind) when Kind /= schema ->
+ Rec2 =
+ case Storage of
+ ram_copies ->
+ Rec#commit{ram_copies = [Item | Rec#commit.ram_copies]};
+ disc_copies ->
+ Rec#commit{disc_copies = [Item | Rec#commit.disc_copies]};
+ disc_only_copies ->
+ Rec#commit{disc_only_copies =
+ [Item | Rec#commit.disc_only_copies]}
+ end,
+ prepare_node(Node, Storage, Items, Rec2, Kind);
+prepare_node(_Node, _Storage, Items, Rec, Kind)
+ when Kind == schema, Rec#commit.schema_ops == [] ->
+ Rec#commit{schema_ops = Items};
+prepare_node(_Node, _Storage, [], Rec, _Kind) ->
+ Rec.
+
+%% multi_commit((Protocol, Tid, CommitRecords, Store)
+%% Local work is always performed in users process
+multi_commit(read_only, Tid, CR, _Store) ->
+ %% This featherweight commit protocol is used when no
+ %% updates has been performed in the transaction.
+
+ {DiscNs, RamNs} = commit_nodes(CR, [], []),
+ Msg = {Tid, simple_commit},
+ rpc:abcast(DiscNs -- [node()], ?MODULE, Msg),
+ rpc:abcast(RamNs -- [node()], ?MODULE, Msg),
+ mnesia_recover:note_decision(Tid, committed),
+ mnesia_locker:release_tid(Tid),
+ ?MODULE ! {delete_transaction, Tid},
+ do_commit;
+
+multi_commit(sym_trans, Tid, CR, Store) ->
+ %% This lightweight commit protocol is used when all
+ %% the involved tables are replicated symetrically.
+ %% Their storage types must match on each node.
+ %%
+ %% 1 Ask the other involved nodes if they want to commit
+ %% All involved nodes votes yes if they are up
+ %% 2a Somebody has voted no
+ %% Tell all yes voters to do_abort
+ %% 2b Everybody has voted yes
+ %% Tell everybody to do_commit. I.e. that they should
+ %% prepare the commit, log the commit record and
+ %% perform the updates.
+ %%
+ %% The outcome is kept 3 minutes in the transient decision table.
+ %%
+ %% Recovery:
+ %% If somebody dies before the coordinator has
+ %% broadcasted do_commit, the transaction is aborted.
+ %%
+ %% If a participant dies, the table load algorithm
+ %% ensures that the contents of the involved tables
+ %% are picked from another node.
+ %%
+ %% If the coordinator dies, each participants checks
+ %% the outcome with all the others. If all are uncertain
+ %% about the outcome, the transaction is aborted. If
+ %% somebody knows the outcome the others will follow.
+
+ {DiscNs, RamNs} = commit_nodes(CR, [], []),
+ Pending = mnesia_checkpoint:tm_enter_pending(Tid, DiscNs, RamNs),
+ ?ets_insert(Store, Pending),
+
+ {WaitFor, Local} = ask_commit(sym_trans, Tid, CR, DiscNs, RamNs),
+ {Outcome, []} = rec_all(WaitFor, Tid, do_commit, []),
+ ?eval_debug_fun({?MODULE, multi_commit_sym},
+ [{tid, Tid}, {outcome, Outcome}]),
+ rpc:abcast(DiscNs -- [node()], ?MODULE, {Tid, Outcome}),
+ rpc:abcast(RamNs -- [node()], ?MODULE, {Tid, Outcome}),
+ case Outcome of
+ do_commit ->
+ mnesia_recover:note_decision(Tid, committed),
+ do_dirty(Tid, Local),
+ mnesia_locker:release_tid(Tid),
+ ?MODULE ! {delete_transaction, Tid};
+ {do_abort, _Reason} ->
+ mnesia_recover:note_decision(Tid, aborted)
+ end,
+ ?eval_debug_fun({?MODULE, multi_commit_sym, post},
+ [{tid, Tid}, {outcome, Outcome}]),
+ Outcome;
+
+multi_commit(sync_sym_trans, Tid, CR, Store) ->
+ %% This protocol is the same as sym_trans except that it
+ %% uses syncronized calls to disk_log and syncronized commits
+ %% when several nodes are involved.
+
+ {DiscNs, RamNs} = commit_nodes(CR, [], []),
+ Pending = mnesia_checkpoint:tm_enter_pending(Tid, DiscNs, RamNs),
+ ?ets_insert(Store, Pending),
+
+ {WaitFor, Local} = ask_commit(sync_sym_trans, Tid, CR, DiscNs, RamNs),
+ {Outcome, []} = rec_all(WaitFor, Tid, do_commit, []),
+ ?eval_debug_fun({?MODULE, multi_commit_sym_sync},
+ [{tid, Tid}, {outcome, Outcome}]),
+ rpc:abcast(DiscNs -- [node()], ?MODULE, {Tid, Outcome}),
+ rpc:abcast(RamNs -- [node()], ?MODULE, {Tid, Outcome}),
+ case Outcome of
+ do_commit ->
+ mnesia_recover:note_decision(Tid, committed),
+ mnesia_log:slog(Local),
+ do_commit(Tid, Local),
+ %% Just wait for completion result is ignore.
+ rec_all(WaitFor, Tid, ignore, []),
+ mnesia_locker:release_tid(Tid),
+ ?MODULE ! {delete_transaction, Tid};
+ {do_abort, _Reason} ->
+ mnesia_recover:note_decision(Tid, aborted)
+ end,
+ ?eval_debug_fun({?MODULE, multi_commit_sym, post},
+ [{tid, Tid}, {outcome, Outcome}]),
+ Outcome;
+
+multi_commit(asym_trans, Tid, CR, Store) ->
+ %% This more expensive commit protocol is used when
+ %% table definitions are changed (schema transactions).
+ %% It is also used when the involved tables are
+ %% replicated asymetrically. If the storage type differs
+ %% on at least one node this protocol is used.
+ %%
+ %% 1 Ask the other involved nodes if they want to commit.
+ %% All involved nodes prepares the commit, logs a presume_abort
+ %% commit record and votes yes or no depending of the
+ %% outcome of the prepare. The preparation is also performed
+ %% by the coordinator.
+ %%
+ %% 2a Somebody has died or voted no
+ %% Tell all yes voters to do_abort
+ %% 2b Everybody has voted yes
+ %% Put a unclear marker in the log.
+ %% Tell the others to pre_commit. I.e. that they should
+ %% put a unclear marker in the log and reply
+ %% acc_pre_commit when they are done.
+ %%
+ %% 3a Somebody died
+ %% Tell the remaining participants to do_abort
+ %% 3b Everybody has replied acc_pre_commit
+ %% Tell everybody to committed. I.e that they should
+ %% put a committed marker in the log, perform the updates
+ %% and reply done_commit when they are done. The coordinator
+ %% must wait with putting his committed marker inte the log
+ %% until the committed has been sent to all the others.
+ %% Then he performs local commit before collecting replies.
+ %%
+ %% 4 Everybody has either died or replied done_commit
+ %% Return to the caller.
+ %%
+ %% Recovery:
+ %% If the coordinator dies, the participants (and
+ %% the coordinator when he starts again) must do
+ %% the following:
+ %%
+ %% If we have no unclear marker in the log we may
+ %% safely abort, since we know that nobody may have
+ %% decided to commit yet.
+ %%
+ %% If we have a committed marker in the log we may
+ %% safely commit since we know that everybody else
+ %% also will come to this conclusion.
+ %%
+ %% If we have a unclear marker but no committed
+ %% in the log we are uncertain about the real outcome
+ %% of the transaction and must ask the others before
+ %% we can decide what to do. If someone knows the
+ %% outcome we will do the same. If nobody knows, we
+ %% will wait for the remaining involved nodes to come
+ %% up. When all involved nodes are up and uncertain,
+ %% we decide to commit (first put a committed marker
+ %% in the log, then do the updates).
+
+ D = #decision{tid = Tid, outcome = presume_abort},
+ {D2, CR2} = commit_decision(D, CR, [], []),
+ DiscNs = D2#decision.disc_nodes,
+ RamNs = D2#decision.ram_nodes,
+ Pending = mnesia_checkpoint:tm_enter_pending(Tid, DiscNs, RamNs),
+ ?ets_insert(Store, Pending),
+ {WaitFor, Local} = ask_commit(asym_trans, Tid, CR2, DiscNs, RamNs),
+ SchemaPrep = (catch mnesia_schema:prepare_commit(Tid, Local, {coord, WaitFor})),
+ {Votes, Pids} = rec_all(WaitFor, Tid, do_commit, []),
+
+ ?eval_debug_fun({?MODULE, multi_commit_asym_got_votes},
+ [{tid, Tid}, {votes, Votes}]),
+ case Votes of
+ do_commit ->
+ case SchemaPrep of
+ {_Modified, C = #commit{}, DumperMode} ->
+ mnesia_log:log(C), % C is not a binary
+ ?eval_debug_fun({?MODULE, multi_commit_asym_log_commit_rec},
+ [{tid, Tid}]),
+
+ D3 = C#commit.decision,
+ D4 = D3#decision{outcome = unclear},
+ mnesia_recover:log_decision(D4),
+ ?eval_debug_fun({?MODULE, multi_commit_asym_log_commit_dec},
+ [{tid, Tid}]),
+ tell_participants(Pids, {Tid, pre_commit}),
+ %% Now we are uncertain and we do not know
+ %% if all participants have logged that
+ %% they are uncertain or not
+ rec_acc_pre_commit(Pids, Tid, Store, {C,Local},
+ do_commit, DumperMode, [], []);
+ {'EXIT', Reason} ->
+ %% The others have logged the commit
+ %% record but they are not uncertain
+ mnesia_recover:note_decision(Tid, aborted),
+ ?eval_debug_fun({?MODULE, multi_commit_asym_prepare_exit},
+ [{tid, Tid}]),
+ tell_participants(Pids, {Tid, {do_abort, Reason}}),
+ do_abort(Tid, Local),
+ {do_abort, Reason}
+ end;
+
+ {do_abort, Reason} ->
+ %% The others have logged the commit
+ %% record but they are not uncertain
+ mnesia_recover:note_decision(Tid, aborted),
+ ?eval_debug_fun({?MODULE, multi_commit_asym_do_abort}, [{tid, Tid}]),
+ tell_participants(Pids, {Tid, {do_abort, Reason}}),
+ do_abort(Tid, Local),
+ {do_abort, Reason}
+ end.
+
+%% Returns do_commit or {do_abort, Reason}
+rec_acc_pre_commit([Pid | Tail], Tid, Store, Commit, Res, DumperMode,
+ GoodPids, SchemaAckPids) ->
+ receive
+ {?MODULE, _, {acc_pre_commit, Tid, Pid, true}} ->
+ rec_acc_pre_commit(Tail, Tid, Store, Commit, Res, DumperMode,
+ [Pid | GoodPids], [Pid | SchemaAckPids]);
+
+ {?MODULE, _, {acc_pre_commit, Tid, Pid, false}} ->
+ rec_acc_pre_commit(Tail, Tid, Store, Commit, Res, DumperMode,
+ [Pid | GoodPids], SchemaAckPids);
+
+ {?MODULE, _, {acc_pre_commit, Tid, Pid}} ->
+ %% Kept for backwards compatibility. Remove after Mnesia 4.x
+ rec_acc_pre_commit(Tail, Tid, Store, Commit, Res, DumperMode,
+ [Pid | GoodPids], [Pid | SchemaAckPids]);
+ {?MODULE, _, {do_abort, Tid, Pid, _Reason}} ->
+ AbortRes = {do_abort, {bad_commit, node(Pid)}},
+ rec_acc_pre_commit(Tail, Tid, Store, Commit, AbortRes, DumperMode,
+ GoodPids, SchemaAckPids);
+ {mnesia_down, Node} when Node == node(Pid) ->
+ AbortRes = {do_abort, {bad_commit, Node}},
+ catch Pid ! {Tid, AbortRes}, %% Tell him that he has died
+ rec_acc_pre_commit(Tail, Tid, Store, Commit, AbortRes, DumperMode,
+ GoodPids, SchemaAckPids)
+ end;
+rec_acc_pre_commit([], Tid, Store, {Commit,OrigC}, Res, DumperMode, GoodPids, SchemaAckPids) ->
+ D = Commit#commit.decision,
+ case Res of
+ do_commit ->
+ %% Now everybody knows that the others
+ %% has voted yes. We also know that
+ %% everybody are uncertain.
+ prepare_sync_schema_commit(Store, SchemaAckPids),
+ tell_participants(GoodPids, {Tid, committed}),
+ D2 = D#decision{outcome = committed},
+ mnesia_recover:log_decision(D2),
+ ?eval_debug_fun({?MODULE, rec_acc_pre_commit_log_commit},
+ [{tid, Tid}]),
+
+ %% Now we have safely logged committed
+ %% and we can recover without asking others
+ do_commit(Tid, Commit, DumperMode),
+ ?eval_debug_fun({?MODULE, rec_acc_pre_commit_done_commit},
+ [{tid, Tid}]),
+ sync_schema_commit(Tid, Store, SchemaAckPids),
+ mnesia_locker:release_tid(Tid),
+ ?MODULE ! {delete_transaction, Tid};
+
+ {do_abort, Reason} ->
+ tell_participants(GoodPids, {Tid, {do_abort, Reason}}),
+ D2 = D#decision{outcome = aborted},
+ mnesia_recover:log_decision(D2),
+ ?eval_debug_fun({?MODULE, rec_acc_pre_commit_log_abort},
+ [{tid, Tid}]),
+ do_abort(Tid, OrigC),
+ ?eval_debug_fun({?MODULE, rec_acc_pre_commit_done_abort},
+ [{tid, Tid}])
+ end,
+ Res.
+
+%% Note all nodes in case of mnesia_down mgt
+prepare_sync_schema_commit(_Store, []) ->
+ ok;
+prepare_sync_schema_commit(Store, [Pid | Pids]) ->
+ ?ets_insert(Store, {waiting_for_commit_ack, node(Pid)}),
+ prepare_sync_schema_commit(Store, Pids).
+
+sync_schema_commit(_Tid, _Store, []) ->
+ ok;
+sync_schema_commit(Tid, Store, [Pid | Tail]) ->
+ receive
+ {?MODULE, _, {schema_commit, Tid, Pid}} ->
+ ?ets_match_delete(Store, {waiting_for_commit_ack, node(Pid)}),
+ sync_schema_commit(Tid, Store, Tail);
+
+ {mnesia_down, Node} when Node == node(Pid) ->
+ ?ets_match_delete(Store, {waiting_for_commit_ack, Node}),
+ sync_schema_commit(Tid, Store, Tail)
+ end.
+
+tell_participants([Pid | Pids], Msg) ->
+ Pid ! Msg,
+ tell_participants(Pids, Msg);
+tell_participants([], _Msg) ->
+ ok.
+
+%% Trap exit because we can get a shutdown from application manager
+commit_participant(Coord, Tid, Bin, DiscNs, RamNs) when is_binary(Bin) ->
+ process_flag(trap_exit, true),
+ Commit = binary_to_term(Bin),
+ commit_participant(Coord, Tid, Bin, Commit, DiscNs, RamNs);
+commit_participant(Coord, Tid, C = #commit{}, DiscNs, RamNs) ->
+ process_flag(trap_exit, true),
+ commit_participant(Coord, Tid, C, C, DiscNs, RamNs).
+
+commit_participant(Coord, Tid, Bin, C0, DiscNs, _RamNs) ->
+ ?eval_debug_fun({?MODULE, commit_participant, pre}, [{tid, Tid}]),
+ case catch mnesia_schema:prepare_commit(Tid, C0, {part, Coord}) of
+ {Modified, C = #commit{}, DumperMode} ->
+ %% If we can not find any local unclear decision
+ %% we should presume abort at startup recovery
+ case lists:member(node(), DiscNs) of
+ false ->
+ ignore;
+ true ->
+ case Modified of
+ false -> mnesia_log:log(Bin);
+ true -> mnesia_log:log(C)
+ end
+ end,
+ ?eval_debug_fun({?MODULE, commit_participant, vote_yes},
+ [{tid, Tid}]),
+ reply(Coord, {vote_yes, Tid, self()}),
+
+ receive
+ {Tid, pre_commit} ->
+ D = C#commit.decision,
+ mnesia_recover:log_decision(D#decision{outcome = unclear}),
+ ?eval_debug_fun({?MODULE, commit_participant, pre_commit},
+ [{tid, Tid}]),
+ Expect_schema_ack = C#commit.schema_ops /= [],
+ reply(Coord, {acc_pre_commit, Tid, self(), Expect_schema_ack}),
+
+ %% Now we are vulnerable for failures, since
+ %% we cannot decide without asking others
+ receive
+ {Tid, committed} ->
+ mnesia_recover:log_decision(D#decision{outcome = committed}),
+ ?eval_debug_fun({?MODULE, commit_participant, log_commit},
+ [{tid, Tid}]),
+ do_commit(Tid, C, DumperMode),
+ case Expect_schema_ack of
+ false -> ignore;
+ true -> reply(Coord, {schema_commit, Tid, self()})
+ end,
+ ?eval_debug_fun({?MODULE, commit_participant, do_commit},
+ [{tid, Tid}]);
+
+ {Tid, {do_abort, _Reason}} ->
+ mnesia_recover:log_decision(D#decision{outcome = aborted}),
+ ?eval_debug_fun({?MODULE, commit_participant, log_abort},
+ [{tid, Tid}]),
+ mnesia_schema:undo_prepare_commit(Tid, C0),
+ ?eval_debug_fun({?MODULE, commit_participant, undo_prepare},
+ [{tid, Tid}]);
+
+ {'EXIT', _, _} ->
+ mnesia_recover:log_decision(D#decision{outcome = aborted}),
+ ?eval_debug_fun({?MODULE, commit_participant, exit_log_abort},
+ [{tid, Tid}]),
+ mnesia_schema:undo_prepare_commit(Tid, C0),
+ ?eval_debug_fun({?MODULE, commit_participant, exit_undo_prepare},
+ [{tid, Tid}]);
+
+ Msg ->
+ verbose("** ERROR ** commit_participant ~p, got unexpected msg: ~p~n",
+ [Tid, Msg])
+ end;
+ {Tid, {do_abort, Reason}} ->
+ reply(Coord, {do_abort, Tid, self(), Reason}),
+ mnesia_schema:undo_prepare_commit(Tid, C0),
+ ?eval_debug_fun({?MODULE, commit_participant, pre_commit_undo_prepare},
+ [{tid, Tid}]);
+
+ {'EXIT', _, Reason} ->
+ reply(Coord, {do_abort, Tid, self(), {bad_commit,Reason}}),
+ mnesia_schema:undo_prepare_commit(Tid, C0),
+ ?eval_debug_fun({?MODULE, commit_participant, pre_commit_undo_prepare}, [{tid, Tid}]);
+
+ Msg ->
+ reply(Coord, {do_abort, Tid, self(), {bad_commit,internal}}),
+ verbose("** ERROR ** commit_participant ~p, got unexpected msg: ~p~n",
+ [Tid, Msg])
+ end;
+
+ {'EXIT', Reason} ->
+ ?eval_debug_fun({?MODULE, commit_participant, vote_no},
+ [{tid, Tid}]),
+ reply(Coord, {vote_no, Tid, Reason}),
+ mnesia_schema:undo_prepare_commit(Tid, C0)
+ end,
+ mnesia_locker:release_tid(Tid),
+ ?MODULE ! {delete_transaction, Tid},
+ unlink(whereis(?MODULE)),
+ exit(normal).
+
+do_abort(Tid, Bin) when is_binary(Bin) ->
+ %% Possible optimization:
+ %% If we want we could pass arround a flag
+ %% that tells us whether the binary contains
+ %% schema ops or not. Only if the binary
+ %% contains schema ops there are meningful
+ %% unpack the binary and perform
+ %% mnesia_schema:undo_prepare_commit/1.
+ do_abort(Tid, binary_to_term(Bin));
+do_abort(Tid, Commit) ->
+ mnesia_schema:undo_prepare_commit(Tid, Commit),
+ Commit.
+
+do_dirty(Tid, Commit) when Commit#commit.schema_ops == [] ->
+ mnesia_log:log(Commit),
+ do_commit(Tid, Commit).
+
+%% do_commit(Tid, CommitRecord)
+do_commit(Tid, Bin) when is_binary(Bin) ->
+ do_commit(Tid, binary_to_term(Bin));
+do_commit(Tid, C) ->
+ do_commit(Tid, C, optional).
+do_commit(Tid, Bin, DumperMode) when is_binary(Bin) ->
+ do_commit(Tid, binary_to_term(Bin), DumperMode);
+do_commit(Tid, C, DumperMode) ->
+ mnesia_dumper:update(Tid, C#commit.schema_ops, DumperMode),
+ R = do_snmp(Tid, C#commit.snmp),
+ R2 = do_update(Tid, ram_copies, C#commit.ram_copies, R),
+ R3 = do_update(Tid, disc_copies, C#commit.disc_copies, R2),
+ do_update(Tid, disc_only_copies, C#commit.disc_only_copies, R3).
+
+%% Update the items
+do_update(Tid, Storage, [Op | Ops], OldRes) ->
+ case catch do_update_op(Tid, Storage, Op) of
+ ok ->
+ do_update(Tid, Storage, Ops, OldRes);
+ {'EXIT', Reason} ->
+ %% This may only happen when we recently have
+ %% deleted our local replica, changed storage_type
+ %% or transformed table
+ %% BUGBUG: Updates may be lost if storage_type is changed.
+ %% Determine actual storage type and try again.
+ %% BUGBUG: Updates may be lost if table is transformed.
+
+ verbose("do_update in ~w failed: ~p -> {'EXIT', ~p}~n",
+ [Tid, Op, Reason]),
+ do_update(Tid, Storage, Ops, OldRes);
+ NewRes ->
+ do_update(Tid, Storage, Ops, NewRes)
+ end;
+do_update(_Tid, _Storage, [], Res) ->
+ Res.
+
+do_update_op(Tid, Storage, {{Tab, K}, Obj, write}) ->
+ commit_write(?catch_val({Tab, commit_work}), Tid,
+ Tab, K, Obj, undefined),
+ mnesia_lib:db_put(Storage, Tab, Obj);
+
+do_update_op(Tid, Storage, {{Tab, K}, Val, delete}) ->
+ commit_delete(?catch_val({Tab, commit_work}), Tid, Tab, K, Val, undefined),
+ mnesia_lib:db_erase(Storage, Tab, K);
+
+do_update_op(Tid, Storage, {{Tab, K}, {RecName, Incr}, update_counter}) ->
+ {NewObj, OldObjs} =
+ case catch mnesia_lib:db_update_counter(Storage, Tab, K, Incr) of
+ NewVal when is_integer(NewVal), NewVal >= 0 ->
+ {{RecName, K, NewVal}, [{RecName, K, NewVal - Incr}]};
+ _ when Incr > 0 ->
+ New = {RecName, K, Incr},
+ mnesia_lib:db_put(Storage, Tab, New),
+ {New, []};
+ _ ->
+ Zero = {RecName, K, 0},
+ mnesia_lib:db_put(Storage, Tab, Zero),
+ {Zero, []}
+ end,
+ commit_update(?catch_val({Tab, commit_work}), Tid, Tab,
+ K, NewObj, OldObjs),
+ element(3, NewObj);
+
+do_update_op(Tid, Storage, {{Tab, Key}, Obj, delete_object}) ->
+ commit_del_object(?catch_val({Tab, commit_work}),
+ Tid, Tab, Key, Obj, undefined),
+ mnesia_lib:db_match_erase(Storage, Tab, Obj);
+
+do_update_op(Tid, Storage, {{Tab, Key}, Obj, clear_table}) ->
+ commit_clear(?catch_val({Tab, commit_work}), Tid, Tab, Key, Obj),
+ mnesia_lib:db_match_erase(Storage, Tab, Obj).
+
+commit_write([], _, _, _, _, _) -> ok;
+commit_write([{checkpoints, CpList}|R], Tid, Tab, K, Obj, Old) ->
+ mnesia_checkpoint:tm_retain(Tid, Tab, K, write, CpList),
+ commit_write(R, Tid, Tab, K, Obj, Old);
+commit_write([H|R], Tid, Tab, K, Obj, Old)
+ when element(1, H) == subscribers ->
+ mnesia_subscr:report_table_event(H, Tab, Tid, Obj, write, Old),
+ commit_write(R, Tid, Tab, K, Obj, Old);
+commit_write([H|R], Tid, Tab, K, Obj, Old)
+ when element(1, H) == index ->
+ mnesia_index:add_index(H, Tab, K, Obj, Old),
+ commit_write(R, Tid, Tab, K, Obj, Old).
+
+commit_update([], _, _, _, _, _) -> ok;
+commit_update([{checkpoints, CpList}|R], Tid, Tab, K, Obj, _) ->
+ Old = mnesia_checkpoint:tm_retain(Tid, Tab, K, write, CpList),
+ commit_update(R, Tid, Tab, K, Obj, Old);
+commit_update([H|R], Tid, Tab, K, Obj, Old)
+ when element(1, H) == subscribers ->
+ mnesia_subscr:report_table_event(H, Tab, Tid, Obj, write, Old),
+ commit_update(R, Tid, Tab, K, Obj, Old);
+commit_update([H|R], Tid, Tab, K, Obj, Old)
+ when element(1, H) == index ->
+ mnesia_index:add_index(H, Tab, K, Obj, Old),
+ commit_update(R, Tid, Tab, K, Obj, Old).
+
+commit_delete([], _, _, _, _, _) -> ok;
+commit_delete([{checkpoints, CpList}|R], Tid, Tab, K, Obj, _) ->
+ Old = mnesia_checkpoint:tm_retain(Tid, Tab, K, delete, CpList),
+ commit_delete(R, Tid, Tab, K, Obj, Old);
+commit_delete([H|R], Tid, Tab, K, Obj, Old)
+ when element(1, H) == subscribers ->
+ mnesia_subscr:report_table_event(H, Tab, Tid, Obj, delete, Old),
+ commit_delete(R, Tid, Tab, K, Obj, Old);
+commit_delete([H|R], Tid, Tab, K, Obj, Old)
+ when element(1, H) == index ->
+ mnesia_index:delete_index(H, Tab, K),
+ commit_delete(R, Tid, Tab, K, Obj, Old).
+
+commit_del_object([], _, _, _, _, _) -> ok;
+commit_del_object([{checkpoints, CpList}|R], Tid, Tab, K, Obj, _) ->
+ Old = mnesia_checkpoint:tm_retain(Tid, Tab, K, delete_object, CpList),
+ commit_del_object(R, Tid, Tab, K, Obj, Old);
+commit_del_object([H|R], Tid, Tab, K, Obj, Old)
+ when element(1, H) == subscribers ->
+ mnesia_subscr:report_table_event(H, Tab, Tid, Obj, delete_object, Old),
+ commit_del_object(R, Tid, Tab, K, Obj, Old);
+commit_del_object([H|R], Tid, Tab, K, Obj, Old)
+ when element(1, H) == index ->
+ mnesia_index:del_object_index(H, Tab, K, Obj, Old),
+ commit_del_object(R, Tid, Tab, K, Obj, Old).
+
+commit_clear([], _, _, _, _) -> ok;
+commit_clear([{checkpoints, CpList}|R], Tid, Tab, K, Obj) ->
+ mnesia_checkpoint:tm_retain(Tid, Tab, K, clear_table, CpList),
+ commit_clear(R, Tid, Tab, K, Obj);
+commit_clear([H|R], Tid, Tab, K, Obj)
+ when element(1, H) == subscribers ->
+ mnesia_subscr:report_table_event(H, Tab, Tid, Obj, clear_table, undefined),
+ commit_clear(R, Tid, Tab, K, Obj);
+commit_clear([H|R], Tid, Tab, K, Obj)
+ when element(1, H) == index ->
+ mnesia_index:clear_index(H, Tab, K, Obj),
+ commit_clear(R, Tid, Tab, K, Obj).
+
+do_snmp(_, []) -> ok;
+do_snmp(Tid, [Head | Tail]) ->
+ case catch mnesia_snmp_hook:update(Head) of
+ {'EXIT', Reason} ->
+ %% This should only happen when we recently have
+ %% deleted our local replica or recently deattached
+ %% the snmp table
+
+ verbose("do_snmp in ~w failed: ~p -> {'EXIT', ~p}~n",
+ [Tid, Head, Reason]);
+ ok ->
+ ignore
+ end,
+ do_snmp(Tid, Tail).
+
+commit_nodes([C | Tail], AccD, AccR)
+ when C#commit.disc_copies == [],
+ C#commit.disc_only_copies == [],
+ C#commit.schema_ops == [] ->
+ commit_nodes(Tail, AccD, [C#commit.node | AccR]);
+commit_nodes([C | Tail], AccD, AccR) ->
+ commit_nodes(Tail, [C#commit.node | AccD], AccR);
+commit_nodes([], AccD, AccR) ->
+ {AccD, AccR}.
+
+commit_decision(D, [C | Tail], AccD, AccR) ->
+ N = C#commit.node,
+ {D2, Tail2} =
+ case C#commit.schema_ops of
+ [] when C#commit.disc_copies == [],
+ C#commit.disc_only_copies == [] ->
+ commit_decision(D, Tail, AccD, [N | AccR]);
+ [] ->
+ commit_decision(D, Tail, [N | AccD], AccR);
+ Ops ->
+ case ram_only_ops(N, Ops) of
+ true ->
+ commit_decision(D, Tail, AccD, [N | AccR]);
+ false ->
+ commit_decision(D, Tail, [N | AccD], AccR)
+ end
+ end,
+ {D2, [C#commit{decision = D2} | Tail2]};
+commit_decision(D, [], AccD, AccR) ->
+ {D#decision{disc_nodes = AccD, ram_nodes = AccR}, []}.
+
+ram_only_ops(N, [{op, change_table_copy_type, N, _FromS, _ToS, Cs} | _Ops ]) ->
+ case lists:member({name, schema}, Cs) of
+ true ->
+ %% We always use disk if change type of the schema
+ false;
+ false ->
+ not lists:member(N, val({schema, disc_copies}))
+ end;
+
+ram_only_ops(N, _Ops) ->
+ not lists:member(N, val({schema, disc_copies})).
+
+%% Returns {WaitFor, Res}
+sync_send_dirty(Tid, [Head | Tail], Tab, WaitFor) ->
+ Node = Head#commit.node,
+ if
+ Node == node() ->
+ {WF, _} = sync_send_dirty(Tid, Tail, Tab, WaitFor),
+ Res = do_dirty(Tid, Head),
+ {WF, Res};
+ true ->
+ {?MODULE, Node} ! {self(), {sync_dirty, Tid, Head, Tab}},
+ sync_send_dirty(Tid, Tail, Tab, [Node | WaitFor])
+ end;
+sync_send_dirty(_Tid, [], _Tab, WaitFor) ->
+ {WaitFor, {'EXIT', {aborted, {node_not_running, WaitFor}}}}.
+
+%% Returns {WaitFor, Res}
+async_send_dirty(_Tid, _Nodes, Tab, nowhere) ->
+ {[], {'EXIT', {aborted, {no_exists, Tab}}}};
+async_send_dirty(Tid, Nodes, Tab, ReadNode) ->
+ async_send_dirty(Tid, Nodes, Tab, ReadNode, [], ok).
+
+async_send_dirty(Tid, [Head | Tail], Tab, ReadNode, WaitFor, Res) ->
+ Node = Head#commit.node,
+ if
+ ReadNode == Node, Node == node() ->
+ NewRes = do_dirty(Tid, Head),
+ async_send_dirty(Tid, Tail, Tab, ReadNode, WaitFor, NewRes);
+ ReadNode == Node ->
+ {?MODULE, Node} ! {self(), {sync_dirty, Tid, Head, Tab}},
+ NewRes = {'EXIT', {aborted, {node_not_running, Node}}},
+ async_send_dirty(Tid, Tail, Tab, ReadNode, [Node | WaitFor], NewRes);
+ true ->
+ {?MODULE, Node} ! {self(), {async_dirty, Tid, Head, Tab}},
+ async_send_dirty(Tid, Tail, Tab, ReadNode, WaitFor, Res)
+ end;
+async_send_dirty(_Tid, [], _Tab, _ReadNode, WaitFor, Res) ->
+ {WaitFor, Res}.
+
+rec_dirty([Node | Tail], Res) when Node /= node() ->
+ NewRes = get_dirty_reply(Node, Res),
+ rec_dirty(Tail, NewRes);
+rec_dirty([], Res) ->
+ Res.
+
+get_dirty_reply(Node, Res) ->
+ receive
+ {?MODULE, Node, {'EXIT', Reason}} ->
+ {'EXIT', {aborted, {badarg, Reason}}};
+ {?MODULE, Node, {dirty_res, ok}} ->
+ case Res of
+ {'EXIT', {aborted, {node_not_running, _Node}}} ->
+ ok;
+ _ ->
+ %% Prioritize bad results, but node_not_running
+ Res
+ end;
+ {?MODULE, Node, {dirty_res, Reply}} ->
+ Reply;
+ {mnesia_down, Node} ->
+ case get(mnesia_activity_state) of
+ {_, Tid, _Ts} when element(1,Tid) == tid ->
+ %% Hmm dirty called inside a transaction, to avoid
+ %% hanging transaction we need to restart the transaction
+ mnesia:abort({node_not_running, Node});
+ _ ->
+ %% It's ok to ignore mnesia_down's since we will make
+ %% the replicas consistent again when Node is started
+ Res
+ end
+ after 1000 ->
+ case lists:member(Node, val({current, db_nodes})) of
+ true ->
+ get_dirty_reply(Node, Res);
+ false ->
+ Res
+ end
+ end.
+
+%% Assume that CommitRecord is no binary
+%% Return {Res, Pids}
+ask_commit(Protocol, Tid, CR, DiscNs, RamNs) ->
+ ask_commit(Protocol, Tid, CR, DiscNs, RamNs, [], no_local).
+
+ask_commit(Protocol, Tid, [Head | Tail], DiscNs, RamNs, WaitFor, Local) ->
+ Node = Head#commit.node,
+ if
+ Node == node() ->
+ ask_commit(Protocol, Tid, Tail, DiscNs, RamNs, WaitFor, Head);
+ true ->
+ Bin = opt_term_to_binary(Protocol, Head, DiscNs++RamNs),
+ Msg = {ask_commit, Protocol, Tid, Bin, DiscNs, RamNs},
+ {?MODULE, Node} ! {self(), Msg},
+ ask_commit(Protocol, Tid, Tail, DiscNs, RamNs, [Node | WaitFor], Local)
+ end;
+ask_commit(_Protocol, _Tid, [], _DiscNs, _RamNs, WaitFor, Local) ->
+ {WaitFor, Local}.
+
+%% This used to test protocol conversion between mnesia-nodes
+%% but it is really dependent on the emulator version on the
+%% two nodes (if funs are sent which they are in transform table op).
+%% to be safe we let erts do the translation (many times maybe and thus
+%% slower but it works.
+% opt_term_to_binary(asym_trans, Head, Nodes) ->
+% opt_term_to_binary(Nodes, Head);
+opt_term_to_binary(_Protocol, Head, _Nodes) ->
+ Head.
+
+rec_all([Node | Tail], Tid, Res, Pids) ->
+ receive
+ {?MODULE, Node, {vote_yes, Tid}} ->
+ rec_all(Tail, Tid, Res, Pids);
+ {?MODULE, Node, {vote_yes, Tid, Pid}} ->
+ rec_all(Tail, Tid, Res, [Pid | Pids]);
+ {?MODULE, Node, {vote_no, Tid, Reason}} ->
+ rec_all(Tail, Tid, {do_abort, Reason}, Pids);
+ {?MODULE, Node, {committed, Tid}} ->
+ rec_all(Tail, Tid, Res, Pids);
+ {?MODULE, Node, {aborted, Tid}} ->
+ rec_all(Tail, Tid, Res, Pids);
+
+ {mnesia_down, Node} ->
+ %% Make sure that mnesia_tm knows it has died
+ %% it may have been restarted
+ Abort = {do_abort, {bad_commit, Node}},
+ catch {?MODULE, Node} ! {Tid, Abort},
+ rec_all(Tail, Tid, Abort, Pids)
+ end;
+rec_all([], _Tid, Res, Pids) ->
+ {Res, Pids}.
+
+get_transactions() ->
+ {info, Participant, Coordinator} = req(info),
+ lists:map(fun({Tid, _Tabs}) ->
+ Status = tr_status(Tid,Participant),
+ {Tid#tid.counter, Tid#tid.pid, Status}
+ end,Coordinator).
+
+tr_status(Tid,Participant) ->
+ case lists:keymember(Tid, 1, Participant) of
+ true -> participant;
+ false -> coordinator
+ end.
+
+get_info(Timeout) ->
+ case whereis(?MODULE) of
+ undefined ->
+ {timeout, Timeout};
+ Pid ->
+ Pid ! {self(), info},
+ receive
+ {?MODULE, _, {info, Part, Coord}} ->
+ {info, Part, Coord}
+ after Timeout ->
+ {timeout, Timeout}
+ end
+ end.
+
+display_info(Stream, {timeout, T}) ->
+ io:format(Stream, "---> No info about coordinator and participant transactions, "
+ "timeout ~p <--- ~n", [T]);
+
+display_info(Stream, {info, Part, Coord}) ->
+ io:format(Stream, "---> Participant transactions <--- ~n", []),
+ lists:foreach(fun(P) -> pr_participant(Stream, P) end, Part),
+ io:format(Stream, "---> Coordinator transactions <---~n", []),
+ lists:foreach(fun({Tid, _Tabs}) -> pr_tid(Stream, Tid) end, Coord).
+
+pr_participant(Stream, P) ->
+ Commit0 = P#participant.commit,
+ Commit =
+ if
+ is_binary(Commit0) -> binary_to_term(Commit0);
+ true -> Commit0
+ end,
+ pr_tid(Stream, P#participant.tid),
+ io:format(Stream, "with participant objects ~p~n", [Commit]).
+
+
+pr_tid(Stream, Tid) ->
+ io:format(Stream, "Tid: ~p (owned by ~p) ~n",
+ [Tid#tid.counter, Tid#tid.pid]).
+
+info(Serial) ->
+ io:format( "Info about transaction with serial == ~p~n", [Serial]),
+ {info, Participant, Trs} = req(info),
+ search_pr_participant(Serial, Participant),
+ search_pr_coordinator(Serial, Trs).
+
+
+search_pr_coordinator(_S, []) -> no;
+search_pr_coordinator(S, [{Tid, _Ts}|Tail]) ->
+ case Tid#tid.counter of
+ S ->
+ io:format( "Tid is coordinator, owner == \n", []),
+ display_pid_info(Tid#tid.pid),
+ search_pr_coordinator(S, Tail);
+ _ ->
+ search_pr_coordinator(S, Tail)
+ end.
+
+search_pr_participant(_S, []) ->
+ false;
+search_pr_participant(S, [ P | Tail]) ->
+ Tid = P#participant.tid,
+ Commit0 = P#participant.commit,
+ if
+ Tid#tid.counter == S ->
+ io:format( "Tid is participant to commit, owner == \n", []),
+ Pid = Tid#tid.pid,
+ display_pid_info(Pid),
+ io:format( "Tid wants to write objects \n",[]),
+ Commit =
+ if
+ is_binary(Commit0) -> binary_to_term(Commit0);
+ true -> Commit0
+ end,
+
+ io:format("~p~n", [Commit]),
+ search_pr_participant(S,Tail); %% !!!!!
+ true ->
+ search_pr_participant(S, Tail)
+ end.
+
+display_pid_info(Pid) ->
+ case rpc:pinfo(Pid) of
+ undefined ->
+ io:format( "Dead process \n");
+ Info ->
+ Call = fetch(initial_call, Info),
+ Curr = case fetch(current_function, Info) of
+ {Mod,F,Args} when is_list(Args) ->
+ {Mod,F,length(Args)};
+ Other ->
+ Other
+ end,
+ Reds = fetch(reductions, Info),
+ LM = length(fetch(messages, Info)),
+ pformat(io_lib:format("~p", [Pid]),
+ io_lib:format("~p", [Call]),
+ io_lib:format("~p", [Curr]), Reds, LM)
+ end.
+
+pformat(A1, A2, A3, A4, A5) ->
+ io:format( "~-12s ~-21s ~-21s ~9w ~4w~n", [A1,A2,A3,A4,A5]).
+
+fetch(Key, Info) ->
+ case lists:keysearch(Key, 1, Info) of
+ {value, {_, Val}} ->
+ Val;
+ _ ->
+ 0
+ end.
+
+
+%%%%%%%%%%%%%%%%%%%%
+%%%%%%%%%%%%%%%%%%%%% reconfigure stuff comes here ......
+%%%%%%%%%%%%%%%%%%%%%
+
+reconfigure_coordinators(N, [{Tid, [Store | _]} | Coordinators]) ->
+ case mnesia_recover:outcome(Tid, unknown) of
+ committed ->
+ WaitingNodes = ?ets_lookup(Store, waiting_for_commit_ack),
+ case lists:keymember(N, 2, WaitingNodes) of
+ false ->
+ ignore; % avoid spurious mnesia_down messages
+ true ->
+ send_mnesia_down(Tid, Store, N)
+ end;
+ aborted ->
+ ignore; % avoid spurious mnesia_down messages
+ _ ->
+ %% Tell the coordinator about the mnesia_down
+ send_mnesia_down(Tid, Store, N)
+ end,
+ reconfigure_coordinators(N, Coordinators);
+reconfigure_coordinators(_N, []) ->
+ ok.
+
+send_mnesia_down(Tid, Store, Node) ->
+ Msg = {mnesia_down, Node},
+ send_to_pids([Tid#tid.pid | get_elements(friends,Store)], Msg).
+
+send_to_pids([Pid | Pids], Msg) when is_pid(Pid) ->
+ Pid ! Msg,
+ send_to_pids(Pids, Msg);
+send_to_pids([_ | Pids], Msg) ->
+ send_to_pids(Pids, Msg);
+send_to_pids([], _Msg) ->
+ ok.
+
+reconfigure_participants(N, [P | Tail]) ->
+ case lists:member(N, P#participant.disc_nodes) or
+ lists:member(N, P#participant.ram_nodes) of
+ false ->
+ %% Ignore, since we are not a participant
+ %% in the transaction.
+ reconfigure_participants(N, Tail);
+
+ true ->
+ %% We are on a participant node, lets
+ %% check if the dead one was a
+ %% participant or a coordinator.
+ Tid = P#participant.tid,
+ if
+ node(Tid#tid.pid) /= N ->
+ %% Another participant node died. Ignore.
+ reconfigure_participants(N, Tail);
+
+ true ->
+ %% The coordinator node has died and
+ %% we must determine the outcome of the
+ %% transaction and tell mnesia_tm on all
+ %% nodes (including the local node) about it
+ verbose("Coordinator ~p in transaction ~p died~n",
+ [Tid#tid.pid, Tid]),
+
+ Nodes = P#participant.disc_nodes ++
+ P#participant.ram_nodes,
+ AliveNodes = Nodes -- [N],
+ Protocol = P#participant.protocol,
+ tell_outcome(Tid, Protocol, N, AliveNodes, AliveNodes),
+ reconfigure_participants(N, Tail)
+ end
+ end;
+reconfigure_participants(_, []) ->
+ [].
+
+%% We need to determine the outcome of the transaction and
+%% tell mnesia_tm on all involved nodes (including the local node)
+%% about the outcome.
+tell_outcome(Tid, Protocol, Node, CheckNodes, TellNodes) ->
+ Outcome = mnesia_recover:what_happened(Tid, Protocol, CheckNodes),
+ case Outcome of
+ aborted ->
+ rpc:abcast(TellNodes, ?MODULE, {Tid,{do_abort, {mnesia_down, Node}}});
+ committed ->
+ rpc:abcast(TellNodes, ?MODULE, {Tid, do_commit})
+ end,
+ Outcome.
+
+do_stop(#state{coordinators = Coordinators}) ->
+ Msg = {mnesia_down, node()},
+ lists:foreach(fun({Tid, _}) -> Tid#tid.pid ! Msg end, gb_trees:to_list(Coordinators)),
+ mnesia_checkpoint:stop(),
+ mnesia_log:stop(),
+ exit(shutdown).
+
+fixtable(Tab, Lock, Me) ->
+ case req({fixtable, [Tab,Lock,Me]}) of
+ error ->
+ exit({no_exists, Tab});
+ Else ->
+ Else
+ end.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% System upgrade
+
+system_continue(_Parent, _Debug, State) ->
+ doit_loop(State).
+
+system_terminate(_Reason, _Parent, _Debug, State) ->
+ do_stop(State).
+
+system_code_change(State=#state{coordinators=Cs0,participants=Ps0},_Module,_OldVsn,downgrade) ->
+ case is_tuple(Cs0) of
+ true ->
+ Cs = gb_trees:to_list(Cs0),
+ Ps = gb_trees:values(Ps0),
+ {ok, State#state{coordinators=Cs,participants=Ps}};
+ false ->
+ {ok, State}
+ end;
+
+system_code_change(State=#state{coordinators=Cs0,participants=Ps0},_Module,_OldVsn,_Extra) ->
+ case is_list(Cs0) of
+ true ->
+ Cs = gb_trees:from_orddict(lists:sort(Cs0)),
+ Ps1 = [{P#participant.tid,P}|| P <- Ps0],
+ Ps = gb_trees:from_orddict(lists:sort(Ps1)),
+ {ok, State#state{coordinators=Cs,participants=Ps}};
+ false ->
+ {ok, State}
+ end.
diff --git a/lib/mnesia/vsn.mk b/lib/mnesia/vsn.mk
new file mode 100644
index 0000000000..2de3658bf3
--- /dev/null
+++ b/lib/mnesia/vsn.mk
@@ -0,0 +1,15 @@
+
+MNESIA_VSN = 4.4.12
+
+TICKETS = OTP-8250
+#TICKETS_4.4.11 = OTP-8074
+#TICKETS_4.4.10 = OTP-7928 OTP-7968 OTP-8002
+#TICKETS_4.4.9 = OTP-7911
+#TICKETS_4.4.8 = OTP-7753 OTP-7835
+#TICKETS_4.4.7 = OTP-7524 OTP-7625
+#TICKETS_4.4.6 = OTP-7585
+#TICKETS_4.4.5 = OTP-7466
+#TICKETS_4.4.4 = OTP-7419
+#TICKETS_4.4.3 = OTP-7340 OTP-7378 OTP-7383
+#TICKETS_4.4.2 = OTP-7205 OTP-7208
+#TICKETS_4.4.1 = OTP-7170