diff options
Diffstat (limited to 'lib/mnesia')
114 files changed, 40387 insertions, 0 deletions
diff --git a/lib/mnesia/AUTHORS b/lib/mnesia/AUTHORS new file mode 100644 index 0000000000..70e87038a8 --- /dev/null +++ b/lib/mnesia/AUTHORS @@ -0,0 +1,5 @@ +Original Authors: + Claes Wikstr�m Wrote the initial release. + H�kan Mattsson Rewrote alot of it. + Dan Gudmundsson Current maintainer. +Contributors: diff --git a/lib/mnesia/Makefile b/lib/mnesia/Makefile new file mode 100644 index 0000000000..f687b0ecdb --- /dev/null +++ b/lib/mnesia/Makefile @@ -0,0 +1,39 @@ +# +# %CopyrightBegin% +# +# Copyright Ericsson AB 1996-2009. All Rights Reserved. +# +# The contents of this file are subject to the Erlang Public License, +# Version 1.1, (the "License"); you may not use this file except in +# compliance with the License. You should have received a copy of the +# Erlang Public License along with this software. If not, it can be +# retrieved online at http://www.erlang.org/. +# +# Software distributed under the License is distributed on an "AS IS" +# basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +# the License for the specific language governing rights and limitations +# under the License. +# +# %CopyrightEnd% +# + +# +include $(ERL_TOP)/make/target.mk +include $(ERL_TOP)/make/$(TARGET)/otp.mk + +# ---------------------------------------------------- +# Macros +# ---------------------------------------------------- + +SUB_DIRECTORIES = src include examples doc/src + +include vsn.mk +VSN = $(MNESIA_VSN) + +SPECIAL_TARGETS = + +# ---------------------------------------------------- +# Default Subdir Targets +# ---------------------------------------------------- +include $(ERL_TOP)/make/otp_subdir.mk + diff --git a/lib/mnesia/doc/html/.gitignore b/lib/mnesia/doc/html/.gitignore new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/lib/mnesia/doc/html/.gitignore diff --git a/lib/mnesia/doc/man3/.gitignore b/lib/mnesia/doc/man3/.gitignore new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/lib/mnesia/doc/man3/.gitignore diff --git a/lib/mnesia/doc/man6/.gitignore b/lib/mnesia/doc/man6/.gitignore new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/lib/mnesia/doc/man6/.gitignore diff --git a/lib/mnesia/doc/misc/Makefile b/lib/mnesia/doc/misc/Makefile new file mode 100644 index 0000000000..e5fa327f5b --- /dev/null +++ b/lib/mnesia/doc/misc/Makefile @@ -0,0 +1,64 @@ +# +# %CopyrightBegin% +# +# Copyright Ericsson AB 1996-2009. All Rights Reserved. +# +# The contents of this file are subject to the Erlang Public License, +# Version 1.1, (the "License"); you may not use this file except in +# compliance with the License. You should have received a copy of the +# Erlang Public License along with this software. If not, it can be +# retrieved online at http://www.erlang.org/. +# +# Software distributed under the License is distributed on an "AS IS" +# basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +# the License for the specific language governing rights and limitations +# under the License. +# +# %CopyrightEnd% +# + +# +OTP_MAKE_ROOT=/home/super/otp/otp_make +include $(OTP_MAKE_ROOT)/otp.mk + +# +# Release Macros +# + +# +# Common macros +# + + +SGML_FILES= release_notes.sgml + +FIG_FILES = +HTML_FILES= $(SGML_FILES:.sgml=.html) +TEX_FILES= $(SGML_FILES:.sgml=.tex) +DVI_FILES= $(SGML_FILES:.sgml=.dvi) +PSFIG_FILES= $(FIG_FILES:.fig=.ps) +PS_FILES= $(SGML_FILES:.sgml=.ps) +GIF_FILES= min_head.gif +ERL_FILES= +HRL_FILES= +DATA_FILES= + + +# +# Make Rules +# + +all $(DEFAULT_OPT_TARGETS) $(DEFAULT_DEBUG_TARGETS): $(HTML_FILES) $(TEX_FILES) $(PSFIG_FILES) $(DVI_FILES) $(PS_FILES) + +clean: + @rm -f *.toc *.log *.aux *.tex sgmls_output sgmls_errs $(HTML_FILES) $(TEX_FILES) $(DVI_FILES) $(PSFIG_FILES) $(PS_FILES) + +# +# Release Targets +# +include $(OTP_MAKE_ROOT)/otp_release_targets.mk + +release_variant: opt.$(TARGET) + $(MKPATH.$(TARGET)) $(RELEASE_PATH)/$(TARGET)/lib/mnesia-$(VSN)/doc/misc + $(INSTALLFILES.$(TARGET)) $(HTML_FILES) $(GIF_FILES) $(ERL_FILES) $(HRL_FILES) $(DATA_FILES) $(PS_FILES) $(RELEASE_PATH)/$(TARGET)/lib/mnesia-$(VSN)/doc/misc + diff --git a/lib/mnesia/doc/misc/implementation.txt b/lib/mnesia/doc/misc/implementation.txt new file mode 100644 index 0000000000..1b8369e466 --- /dev/null +++ b/lib/mnesia/doc/misc/implementation.txt @@ -0,0 +1,375 @@ + +Mnesia + +1 Introduction + +This document aims to give a brief introduction of the implementation +of mnesia, it's data and functions. + +H�kan has written other mnesia papers of interest, (see ~hakan/public_html/): +o Resource consumption (mnesia_consumption.txt) +o What to think about when changing mnesia (mnesia_upgrade_policy.txt) +o Mnesia internals course (mnesia_internals_slides.pdf) +o Mnesia overview (mnesia_overview.pdf) + +1.1. Basic concepts + +In a mnesia cluster all nodes are equal, there is no concept off +master or backup nodes. That said when mixing disc based (uses the +disc to store meta information) nodes and ram based (do not use disc +at all) nodes the disc based ones sometimes have precedence over ram +based nodes. + +2 Meta Data + +Mnesia has two types of global meta data, static and dynamic. +All the meta data is stored in the ets table mnesia_gvar. + +2.1 Static Meta Data +The static data is the schema information, usually kept in +'schema.DAT' file, the data is created with +mnesia:create_schema(Nodes) for disc nodes (i.e. nodes which uses the +disc). Ram based mnesia nodes create an empty schema at startup. + +The static data i.e. schema, contains information about which nodes +are involved in the cluster and which type (ram or disc) they have. It +also contains information about which tables exist on which node and +so on. + +The schema information (static data) must always be the same on all +active nodes in the mnesia cluster. Schema information is updated via +schema functions, e.g. mnesia:add_table_copy/3, +mnesia:change_table_copy/3... + +2.2 Dynamic Meta Data + +The dynamic data is transient and is local to each mnesia node +in the cluster. Examples of dynamic data is: currently active mnesia +nodes, which tables are currently available and where are they +located. Dynamic data is updated internally by each mnesia during the +nodes lifetime, i.e. when nodes goes up and down or are added to or +deleted from the mnesia cluster. + +3 Processes and Files + +The most important processes in mnesia are mnesia_monitor, +mnesia_controller, mnesia_tm and mnesia_locker. + +Mnesia_monitor acts as supervisor and monitors all resources. It +listens for nodeup and nodedown and keeps links to other mnesia nodes, +if a node goes down it forwards the information to all the necessary +processes, e.g. mnesia_controller, mnesia_locker, mnesia_tm and all +transactions. During start it negotiates the protocol version with +the other nodes and keep track of which nodes uses which version. The +monitor process also detects and warns about partioned networks, it is +then up to the user to deal with them. It is the owner of all open +files, ets tables and so on. + +The mnesia_controller process is responsible for loading tables, +keeping the dynamic meta data updated, synchronize dangerous work such +as schema transactions vs dump log operation vs table loading/sending. + +The last two processes are involved in all transactions, the +mnesia_locker process manages transaction locks, and mnesia_tm manages +all transaction work. + +4 Startup and table Loading + +The early startup is mostly driven by the mnesia_tm process/module, +logs are dumped (see log dumping), node-names of other nodes in the +cluster are retrieved from the static meta data or from environment +parameters and initial connections are made to the other mnesia +nodes. + +The rest of start up is driven by the mnesia_controller process where +the schema (static meta data) is merged between each node, this is +done to keep the schema consistent between all nodes in the +cluster. When the schema is merged all local tables are put in a +loading queue, tables which are only available or have local content +is loaded directly from disc or created if they are type ram_copies. + +The other tables are kept in the queue until mnesia decides whether to +load them from disk or from another node. If another mnesia node has +already loaded the table, i.e. got a copy in ram or an open dets file, +the table is always loaded from that node to keep the data consistent. +If no other node has a loaded copy of the table, some mnesia node has +to load it first, and the other nodes can copy the table from the +first node. Mnesia keeps information about when other nodes went down, +a starting mnesia will check which nodes have been down, if some of +the nodes have not been down the starting node will let those nodes +load the table first. If all other nodes have been down then the +starting mnesia will load the table. The node that is allowed to load +the table will load it and the other nodes will copy it from that node. + +If a node, which the starter node has not a 'mnesia_down' note from, +is down the starter node will have to wait until that node comes up +and decision can be taken, this behavior can be overruled by user +settings. The order of table loading could be described as: + +1. Mnesia downs, Normally decides from where mnesia should load tables. +2. Master nodes (overrides mnesia downs). +3. Force load (overrides Master nodes). + 1) If possible, load table from active master nodes + 2) if no master nodes is active load from any active nodes, + 3) if no active node has an active table get local copy + (if ram create empty one) + +Currently mnesia can handle one download and one upload at the same +time. Dumping and loading/sending may run simultaneously but neither +of them may run during schema commit. Loaders/senders may not start if +a schema commit is enqueued. That synchronization is made to prohibit +that the schema transaction modifies the meta data and the +prerequisites of the table loading changes. + +The actual loading of a table is implemented in 'mnesia_loader.erl'. +It currently works as follows: + +Receiver Sender +-------- ------ +Spawned +Find sender node +Queue sender request ----> + Spawned +*)Spawn real receiver <---- Send init info +*)Grab schema lock for Grab write table lock + that table to avoid Subscribe receiver + deadlock with schema transactions to table updates +Create table (ets or dets) Release Lock + +Get data (with ack ----> + as flow control) <---- Burst data to receiver + Send no_more +Apply subscription messages +Store copy on disc Grab read lock +Create index, snmp data Update meta data info +and checkpoints if needed cleanup +no_more ----> + Release lock + +*) Don't spawn or grab schema lock if operation is add_table_copy, + it's already a schema operation. + + +5 Transaction + +Transaction are normally driven from the client process, i.e. the +process that call 'mnesia:transaction'. The client first acquires a +globally unique transaction id (tid) and temporary transaction storage +(ts an ets table) from mnesia_tm and then executes the transaction +fun. Mnesia-api calls such as 'mnesia:write/1' and 'mnesia:read' +contains code for acquiring the needed locks. Intermediate database +states and acquired locks are kept in the transaction storage, and all +mnesia operations has to be "patched" against that store. I.e. a write +operation in a transaction should be seen within (and only within) +that transaction, if the same key is read after the write. +After the transaction fun is completed the ts is analyzed to see which +nodes are involved in the transaction, and what type of commit protocol +shall be used. Then the result is committed and additional work such as +snmp, checkpoints and index updates are performed. The transaction is +finish by releasing all resources. + +An example: + +Example = fun(X) -> + {table1, key, Value} = mnesia:read(table1, key), + ok = mnesia:write(table1, {table1, key, Value+X}), + {table1, key, Updated} = mnesia:read(table1, key), + Updated + end, +mnesia:transaction(Example, [10]). + +A message overview of a simple successful asynchronous transaction + non local +Client Process mnesia_tm(local) mnesia_locker mnesia_tm +------------------------------------------------------------------------ +Get tid ----> + <--- Tid and ts +Get read lock +from available node -------------------------------> +Value <----------Value or restart trans--- +Patch value against ts + +Get write lock +from all nodes -------------------------------> + -------------------------------> +ok's <<---------ok's or restart trans--- +write data in ts + +Get read lock,already done. +Read data Value +'Patch' data with ts +Fun return Value+X. + +If everything is ok +commit transaction + +Find the nodes that the transaction +needs to be committed on and +collect every update from ts. + +Ask for commit -----------> + -----------------------------------------------> + +Ok's <<--------- ------------------------------ +Commit -----------------------------------------------> +log commit decision on disk +Commit locally + update snmp + update checkpoints + notify subscribers + update index + +Release locks -------------------------------> +Release transaction -----> + +Return trans result +---------------------- + +If all needed resources are available, i.e. the needed tables are +loaded somewhere in the cluster during the transaction, and the user +code doesn't crash, a transaction in mnesia won't fail. If something +happens in the mnesia cluster such as node down from the replica the +transaction was about to read from, or that a lock couldn't be +acquired and the transaction was not allowed to be queued on that +lock, the transaction is restarted, i.e. all resources are released +and the fun is called again. By default a transaction can be +restarted is infinity many times, but the user may choose to limit +the number of restarts. + +The dirty operations don't do any of the above they just finds out +where to write the data, logs the operation to disk and casts (or call +in case of sync_dirty operation) the data to those nodes. Therefore +the dirty operations have the drawback that each write or delete sends +a message per operation to the involved nodes. + +There is also a synchronous variant of 2-phase commit protocol which +waits on an additional ack message after the transaction is committed +on every node. The intention is to provide the user with a way to +solve overloading problems. + +A 3-phase commit protocol is used for schema transaction or if the +transaction result is going to be committed in a asymmetrical way, +i.e. a transaction that writes to table a and b where table a and b +have replicas on different nodes. The outcome of the transactions are +stored temporary in an ets table and in the log file. + +6 Schema transactions + +Schema transactions are handled differently than ordinary +transactions, they are implemented in mnesia_schema (and in +mnesia_dumper). The schema operation is always spawned to protect from +that the client process dies during the transaction. + +The actual transaction fun checks the pre-conditions and acquires the +needed locks and notes the operation in the transaction store. During +the commit, the schema transaction runs a schema prepare operation (on +every node) that does the needed prerequisite job. Then the operation +is logged to disc, and the actual commit work is done by dumping the +log. Every schema operation has special clause in mnesia_dumper to +handle the finishing work. Every schema prepare operation has a +matching undo_prepare operation which needs to be invoked if the +transaction is aborted. + +7 Locks + +"The locking algorithm is a traditional 'two-phase locking'* and the +deadlock prevention is 'wait-die'*, time stamps for the wait-die algorithm +is 'Lamport clock'* maintained by mnesia_tm. The Lamport clock is kept +when the transaction is restarted to avoid starving." + +* References can be found in the paper mnesia_overview.pdf + Klacke, H�kan and Hans wrote about mnesia. + +What the quote above means is that read locks are acquired on the +replica that mnesia read from, write locks are acquired on all nodes +which have a replica. Several read lock can lock the same object, but +write locks are exclusive. The transaction identifier (tid) is a ever +increasing system uniq counter which have the same sort order on every +node (a Lamport clock), which enables mnesia_locker to order the lock +requests. When a lock request arrives, mnesia_locker checks whether +the lock is available, if it is a 'granted' is sent back to the client +and the lock is noted as taken in an ets table. If the lock is already +occupied, it's tid is compared with tid of the transaction holding the +lock. If the tid of holding transaction is greater than the tid of +asking transaction it's allowed to be put in the lock queue (another +ets table) and no response is sent back until the lock is released, if +not the transaction will get a negative response and mnesia_tm will +restart the transaction after it has slept for a random time. + +Sticky locks works almost as a write lock, the first time a sticky +lock is acquired a request is sent to all nodes. The lock is marked as +taken by the requesting node (not transaction), when the lock is later +released it's only released on the node that has the sticky lock, +thus the next time a transaction is requesting the lock it don't need +to ask the others nodes. If another node wants the lock it has to request +a lock release first, before it can acquire the lock. + +8 Fragmented tables + +Fragmented tables are used to split a large table in smaller parts. +It is implemented as a layer between the client and mnesia which +extends the meta data with additional properties and maps a {table, +key} tuple to a table_fragment. + +The default mapping is erlang:phash() but the user may provide his own +mapping function to be able to predict which records is stored in +which table fragment, e.g. the client may want to steer where a +record generated from a certain device is placed. + +The foreign key is used to co-locate other tables to the same node. +The other additinal table attributes are also used to distribute the +table fragments. + +9 Log Dumping + +All operations on disk tables are stored on a log 'LATEST.LOG' on +disk, so mnesia can redo the transactions if the node goes down. +Dumping the log means that mnesia moves the committed data from the +general log to the table specific disk storage. To avoid that the log +grows to large and uses a lot of disk space and makes the startup slow, +mnesia dumps the log during it's uptime. There are two triggers that +start the log dumping, timeouts and the number of commits since last +dump, both are user configurable. + +Disc copies tables are implemented with two disk_log files, one +'table.DCD' (disc copies data) and one 'table.DCL' (disc copies log). +The dcd contains raw records, and the dcl contains operations on that +table, i.e. '{write, {table, key, value}}' or '{delete, {table, +key}}'. First time a record for a specific table is found when +dumping the table, the size of both the dcd and the dcl files are +checked. And if the sizeof(dcl)/sizeof(dcd) is greater than a +threshold, the current ram table is dumped to file 'table.DCD' and the +corresponding dcl file is deleted, and all other records in the +general log that belongs to that table are ignored. If the threshold +is not meet than the operations in the general log to that table are +appended to the dcl file. On start up both files are read, first the +contents of the dcd are loaded to an ets table, then it's modified by +the operations stored in the corresponding dcl file. + +Disc only copies tables updates the 'dets' file directly when +committing the data so those entries can be ignored during normal log +dumping, they are only added to the 'dets' file during startup when +mnesia don't know the state of the disk table. + +10 Checkpoints and backups + +Checkpoints are created to be able to take snapshots of the database, +which is pretty good when you want consistent backups, i.e. you don't +want half of a transaction in the backup. The checkpoint creates a +shadow table (called retainer) for each table involved in the +checkpoint. When a checkpoint is requested it will not start until all +ongoing transactions are completed. The new transactions will update +both the real table and update the shadow table with operations to +undo the changes on the real table, when a key is modified the first +time. I.e. when write operation '{table, a, 14}' is made, the shadow +table is checked if key 'a' has a undo operation, if it has, nothing +more is done. If not a {write, {table, a, OLD_VALUE}} is added to the +shadow table if the real table had an old value, if not a {delete, +{table, a}} operation is added to the shadow table. + +The backup is taken by copying every record in the real table and then +appending every operation in the shadow table to the backup, thus +undoing the changes that where made since the checkpoint where +started. + + diff --git a/lib/mnesia/doc/pdf/.gitignore b/lib/mnesia/doc/pdf/.gitignore new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/lib/mnesia/doc/pdf/.gitignore diff --git a/lib/mnesia/doc/src/DATA b/lib/mnesia/doc/src/DATA new file mode 100644 index 0000000000..5b4bedacaf --- /dev/null +++ b/lib/mnesia/doc/src/DATA @@ -0,0 +1,100 @@ +%S0 +{tables, + [{employee, [{attributes, [emp_no,name,salary,sex, phone,room_no]}]}, + {dept, [{attributes, [id, name]}]}, + {project, [{attributes, [name, number]}]}, + {manager, [{attributes, [emp, dept]}, + {type, bag}]}, + {at_dep, [{attributes, [emp, dept_id]}]}, + {in_proj, [{attributes, [emp, proj_name]}, + {type, bag}]} + ] +}. + +%E0 + + +%S1 +{employee, 104465, "Johnson Torbjorn", 1, male, 99184, {242,038}}. +{employee, 107912, "Carlsson Tuula", 2, female, 94556, {242,056}}. +{employee, 114872, "Dacker Bjarne", 3, male, 99415, {221,035}}. +{employee, 114849, "Armstrong Josef", 3, male, 99452, {221,023}}. +{employee, 114952, "Froberg Magnus", 5, male, 99469, {222,018}}. +{employee, 104531, "Nilsson Hans", 3, male, 99495, {222,026}}. +{employee, 104659, "Tornkvist Torbjorn", 2, male, 99514, {222,022}}. +{employee, 104732, "Wikstrom Claes", 2, male, 99586, {221,015}}. +{employee, 117716, "Fedoriw Anna", 1, female, 99143, {221,031}}. +{employee, 115020, "Hansson Catrin", 6, female, 99129, {222,072}}. +{employee, 115018, "Mattsson Hakan", 3, male, 99251, {203,348}}. +{employee, 113069, "Eriksson Morgan", 6, male, 99186, {241,543}}. +%E1 + +%S2 +%% departments +{dept, 'B/SF', "Open Telecom Platform"}. +{dept, 'B/SFP', "OTP - Product Development"}. +{dept, 'B/SFR', "Computer Science Laboratory"}. +%E2 + + +%% projects +%S3 +{project, erlang, 1}. +{project, otp, 2}. +{project, beam, 3}. +{project, mnesia, 5}. +{project, wolf, 6}. +{project, documentation, 7}. +{project, www, 8}. + +%E3 + + + +%% manager +%S4 +{manager, 104465, 'B/SF'}. +{manager, 104465, 'B/SFP'}. +{manager, 114872, 'B/SFR'}. + +%E4 +%S5 +{at_dep, 104465, 'B/SF'}. +{at_dep, 107912, 'B/SF'}. +{at_dep, 114872, 'B/SFR'}. +{at_dep, 114849, 'B/SFR'}. +{at_dep, 114952, 'B/SFR'}. +{at_dep, 104531, 'B/SFR'}. +{at_dep, 104659, 'B/SFR'}. +{at_dep, 104732, 'B/SFR'}. +{at_dep, 117716, 'B/SFP'}. +{at_dep, 115020, 'B/SFP'}. +{at_dep, 115018, 'B/SFP'}. +{at_dep, 113069, 'B/SFP'}. + + +%E5 +%S6 +{in_proj, 104465, otp}. +{in_proj, 107912, otp}. +{in_proj, 114872, otp}. +{in_proj, 114849, otp}. +{in_proj, 114849, erlang}. +{in_proj, 114952, otp}. +{in_proj, 104531, otp}. +{in_proj, 104531, mnesia}. +{in_proj, 104545, wolf}. +{in_proj, 104659, otp}. +{in_proj, 104659, wolf}. +{in_proj, 104732, otp}. +{in_proj, 104732, mnesia}. +{in_proj, 104732, erlang}. +{in_proj, 117716, otp}. +{in_proj, 117716, documentation}. +{in_proj, 115020, otp}. +{in_proj, 115018, otp}. +{in_proj, 115018, mnesia}. +{in_proj, 113069, otp}. + +%E6 + diff --git a/lib/mnesia/doc/src/DATA2 b/lib/mnesia/doc/src/DATA2 new file mode 100644 index 0000000000..e547e84d99 --- /dev/null +++ b/lib/mnesia/doc/src/DATA2 @@ -0,0 +1,17 @@ + + +%S0 +{tables + [{foo, [{attributes, [x,y,z]}]}]}. +%E0 + + +%S1 +{foo, a, benny, 18}. +{foo, b, elvis, 19}. +{foo, c, benny, 20}. +{foo, d, elvis, 21}. +{foo, e, klacke, 22}. +{foo, f, hans, 23}. +%E1 + diff --git a/lib/mnesia/doc/src/FRUITS b/lib/mnesia/doc/src/FRUITS new file mode 100644 index 0000000000..43d64f9d8c --- /dev/null +++ b/lib/mnesia/doc/src/FRUITS @@ -0,0 +1,12 @@ +%0 +{tables, + [{fruit, [{attributes, [name, color, taste]}]}, + {vegetable, [{attributes, [name, color, taste, price]}]}]}. + + +{fruit, orange, orange, sweet}. +{fruit, apple, green, sweet}. +{vegetable, carrot, orange, carrotish, 2.55}. +{vegetable, potato, yellow, none, 0.45}. +%0 + diff --git a/lib/mnesia/doc/src/Makefile b/lib/mnesia/doc/src/Makefile new file mode 100644 index 0000000000..f45b5137a3 --- /dev/null +++ b/lib/mnesia/doc/src/Makefile @@ -0,0 +1,240 @@ +# +# %CopyrightBegin% +# +# Copyright Ericsson AB 1997-2009. All Rights Reserved. +# +# The contents of this file are subject to the Erlang Public License, +# Version 1.1, (the "License"); you may not use this file except in +# compliance with the License. You should have received a copy of the +# Erlang Public License along with this software. If not, it can be +# retrieved online at http://www.erlang.org/. +# +# Software distributed under the License is distributed on an "AS IS" +# basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +# the License for the specific language governing rights and limitations +# under the License. +# +# %CopyrightEnd% +# + +# +include $(ERL_TOP)/make/target.mk +include $(ERL_TOP)/make/$(TARGET)/otp.mk + +# ---------------------------------------------------- +# Application version +# ---------------------------------------------------- +include ../../vsn.mk +VSN=$(MNESIA_VSN) +APPLICATION=mnesia + +# ---------------------------------------------------- +# Include dependency +# ---------------------------------------------------- + +ifndef DOCSUPPORT +include make.dep +endif + +# ---------------------------------------------------- +# Release directory specification +# ---------------------------------------------------- +RELSYSDIR = $(RELEASE_PATH)/lib/$(APPLICATION)-$(VSN) + +# ---------------------------------------------------- +# Target Specs +# ---------------------------------------------------- +XML_APPLICATION_FILES = ref_man.xml +XML_REF3_FILES = \ + mnesia.xml \ + mnesia_frag_hash.xml \ + mnesia_registry.xml + +XML_PART_FILES = \ + part.xml \ + part_notes.xml \ + part_notes_history.xml + +XML_CHAPTER_FILES = \ + Mnesia_chap1.xml \ + Mnesia_chap2.xml \ + Mnesia_chap3.xml \ + Mnesia_chap4.xml \ + Mnesia_chap5.xml \ + Mnesia_chap7.xml \ + Mnesia_chap8.xml \ + Mnesia_App_A.xml \ + Mnesia_App_B.xml \ + Mnesia_App_C.xml \ + Mnesia_App_D.xml \ + notes.xml + +BOOK_FILES = book.xml + + +XML_FILES = \ + $(BOOK_FILES) $(XML_CHAPTER_FILES) \ + $(XML_PART_FILES) $(XML_REF3_FILES) $(XML_APPLICATION_FILES) + +GIF_FILES = \ + book.gif \ + company.gif \ + mnesia.gif \ + note.gif \ + notes.gif \ + ref_man.gif \ + user_guide.gif \ + warning.gif + +XML_HTML_FILES = \ + notes_history.xml + + +# ---------------------------------------------------- + +HTML_FILES = $(XML_APPLICATION_FILES:%.xml=$(HTMLDIR)/%.html) \ + $(XML_HTML_FILES:%.xml=$(HTMLDIR)/%.html) \ + $(XML_PART_FILES:%.xml=$(HTMLDIR)/%.html) + +INFO_FILE = ../../info +EXTRA_FILES = summary.html.src \ + $(DEFAULT_GIF_FILES) \ + $(DEFAULT_HTML_FILES) \ + $(XML_REF3_FILES:%.xml=$(HTMLDIR)/%.html) \ + $(XML_CHAPTER_FILES:%.xml=$(HTMLDIR)/%.html) + +MAN3_FILES = $(XML_REF3_FILES:%.xml=$(MAN3DIR)/%.3) + +ifdef DOCSUPPORT + +HTML_REF_MAN_FILE = $(HTMLDIR)/index.html + +TOP_PDF_FILE = $(PDFDIR)/$(APPLICATION)-$(VSN).pdf + +else +TEX_FILES_BOOK = \ + $(BOOK_FILES:%.xml=%.tex) +TEX_FILES_REF_MAN = $(XML_REF3_FILES:%.xml=%.tex) \ + $(XML_APPLICATION_FILES:%.xml=%.tex) +TEX_FILES_USERS_GUIDE = \ + $(XML_CHAPTER_FILES:%.xml=%.tex) + +TOP_PDF_FILE = $(APPLICATION)-$(VSN).pdf +TOP_PS_FILE = $(APPLICATION)-$(VSN).ps + +$(TOP_PDF_FILE): book.dvi ../../vsn.mk + $(DVI2PS) $(DVIPS_FLAGS) -f $< | $(DISTILL) $(DISTILL_FLAGS) > $@ + +$(TOP_PS_FILE): book.dvi ../../vsn.mk + $(DVI2PS) $(DVIPS_FLAGS) -f $< > $@ + +endif + +# ---------------------------------------------------- +# FLAGS +# ---------------------------------------------------- +XML_FLAGS += +DVIPS_FLAGS += + +# ---------------------------------------------------- +# Targets +# ---------------------------------------------------- +$(HTMLDIR)/%.gif: %.gif + $(INSTALL_DATA) $< $@ + +ifdef DOCSUPPORT + +docs: pdf html man + +$(TOP_PDF_FILE): $(XML_FILES) + +pdf: $(TOP_PDF_FILE) + +html: gifs $(HTML_REF_MAN_FILE) + +clean clean_docs: + rm -rf $(HTMLDIR)/* + rm -f $(MAN3DIR)/* + rm -f $(TOP_PDF_FILE) $(TOP_PDF_FILE:%.pdf=%.fo) + rm -f errs core *~ + +else + +ifeq ($(DOCTYPE),pdf) +docs: pdf +else +ifeq ($(DOCTYPE),ps) +docs: ps +else +docs: html gifs man +endif +endif + +pdf: $(TOP_PDF_FILE) + +ps: $(TOP_PS_FILE) + +html: $(HTML_FILES) + + +clean clean_docs clean_tex: + rm -f $(TEX_FILES_USERS_GUIDE) $(TEX_FILES_REF_MAN) $(TEX_FILES_BOOK) + rm -f $(HTML_FILES) $(MAN3_FILES) + rm -f $(TOP_PDF_FILE) $(TOP_PS_FILE) + rm -f errs core *~ *xmls_output *xmls_errs $(LATEX_CLEAN) + +endif + +man: $(MAN3_FILES) + +gifs: $(GIF_FILES:%=$(HTMLDIR)/%) + +$(INDEX_TARGET): $(INDEX_SRC) ../../vsn.mk + sed -e 's;%VSN%;$(VSN);' $< > $@ + +debug opt: + +# ---------------------------------------------------- +# Release Target +# ---------------------------------------------------- +include $(ERL_TOP)/make/otp_release_targets.mk + +ifdef DOCSUPPORT + +release_docs_spec: docs + $(INSTALL_DIR) $(RELSYSDIR)/doc/pdf + $(INSTALL_DATA) $(TOP_PDF_FILE) $(RELSYSDIR)/doc/pdf + $(INSTALL_DIR) $(RELSYSDIR)/doc/html + $(INSTALL_DATA) $(HTMLDIR)/* \ + $(RELSYSDIR)/doc/html + $(INSTALL_DATA) $(INFO_FILE) $(RELSYSDIR) + $(INSTALL_DIR) $(RELEASE_PATH)/man/man3 + $(INSTALL_DATA) $(MAN3_FILES) $(RELEASE_PATH)/man/man3 + +else + +ifeq ($(DOCTYPE),pdf) +release_docs_spec: pdf + $(INSTALL_DIR) $(RELEASE_PATH)/pdf + $(INSTALL_DATA) $(TOP_PDF_FILE) $(RELEASE_PATH)/pdf +else +ifeq ($(DOCTYPE),ps) +release_docs_spec: ps + $(INSTALL_DIR) $(RELEASE_PATH)/ps + $(INSTALL_DATA) $(TOP_PS_FILE) $(RELEASE_PATH)/ps +else +release_docs_spec: docs + $(INSTALL_DIR) $(RELSYSDIR)/doc/html + $(INSTALL_DATA) $(GIF_FILES) $(EXTRA_FILES) $(HTML_FILES) \ + $(RELSYSDIR)/doc/html + $(INSTALL_DATA) $(INFO_FILE) $(RELSYSDIR) + $(INSTALL_DIR) $(RELEASE_PATH)/man/man3 + $(INSTALL_DATA) $(MAN3_FILES) $(RELEASE_PATH)/man/man3 +endif +endif + +endif + + +release_spec: + diff --git a/lib/mnesia/doc/src/Mnesia_App_A.xml b/lib/mnesia/doc/src/Mnesia_App_A.xml new file mode 100644 index 0000000000..86e5b7d03c --- /dev/null +++ b/lib/mnesia/doc/src/Mnesia_App_A.xml @@ -0,0 +1,87 @@ +<?xml version="1.0" encoding="latin1" ?> +<!DOCTYPE chapter SYSTEM "chapter.dtd"> + +<chapter> + <header> + <copyright> + <year>1997</year><year>2009</year> + <holder>Ericsson AB. All Rights Reserved.</holder> + </copyright> + <legalnotice> + The contents of this file are subject to the Erlang Public License, + Version 1.1, (the "License"); you may not use this file except in + compliance with the License. You should have received a copy of the + Erlang Public License along with this software. If not, it can be + retrieved online at http://www.erlang.org/. + + Software distributed under the License is distributed on an "AS IS" + basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + the License for the specific language governing rights and limitations + under the License. + + </legalnotice> + + <title>Appendix A: Mnesia Error Messages</title> + <prepared>Claes Wikström, Hans Nilsson and Håkan Mattsson</prepared> + <responsible>Bjarne Däcker</responsible> + <docno></docno> + <approved>Bjarne Däcker</approved> + <checked>Bjarne Däcker</checked> + <date>96-11-20</date> + <rev>B</rev> + <file>Mnesia_App_A.xml</file> + </header> + <p>Whenever an operation returns an error in Mnesia, a description + of the error is available. For example, the functions + <c>mnesia:transaction(Fun)</c>, or <c>mnesia:create_table(N,L)</c> + may return the tuple <c>{aborted, Reason}</c>, where <c>Reason</c> + is a term describing the error. The following function is used to + retrieve more detailed information about the error: + </p> + <list type="bulleted"> + <item><c>mnesia:error_description(Error)</c></item> + </list> + + <section> + <title>Errors in Mnesia</title> + <p>The following is a list of valid errors in Mnesia.</p> + <list type="bulleted"> + <item><c>badarg</c>. Bad or invalid argument, possibly bad type. + </item> + <item><c>no_transaction</c>. Operation not allowed outside transactions. + </item> + <item><c>combine_error</c>. Table options were illegally combined. + </item> + <item><c>bad_index</c>. Index already exists, or was out of bounds. + </item> + <item><c>already_exists</c>. Schema option to be activated is already on. + </item> + <item><c>index_exists</c>. Some operations cannot be performed on tables with an index. + </item> + <item><c>no_exists</c>.; Tried to perform operation on non-existing (non-alive) item. + </item> + <item><c>system_limit</c>.; A system limit was exhausted. + </item> + <item><c>mnesia_down</c>. A transaction involves records on a + remote node which became unavailable before the transaction + was completed. Record(s) are no longer available elsewhere in + the network.</item> + <item><c>not_a_db_node</c>. A node was mentioned which does not exist in the schema.</item> + <item><c>bad_type</c>.; Bad type specified in argument.</item> + <item><c>node_not_running</c>. Node is not running.</item> + <item><c>truncated_binary_file</c>. Truncated binary in file.</item> + <item><c>active</c>. Some delete operations require that all active records are removed.</item> + <item><c>illegal</c>. Operation not supported on this record.</item> + </list> + <p>The following example illustrates a function which returns an error, and the method to retrieve more detailed error information. + </p> + <p>The function <c>mnesia:create_table(bar, [{attributes, 3.14}])</c> will return the tuple <c>{aborted,Reason}</c>, where <c>Reason</c> is the tuple + <c>{bad_type,bar,3.14000}</c>. + </p> + <p>The function <c>mnesia:error_description(Reason)</c>, returns the term + <c>{"Bad type on some provided arguments",bar,3.14000}</c> which is an error + description suitable + for display.</p> + </section> +</chapter> + diff --git a/lib/mnesia/doc/src/Mnesia_App_B.xmlsrc b/lib/mnesia/doc/src/Mnesia_App_B.xmlsrc new file mode 100644 index 0000000000..52f5e06d83 --- /dev/null +++ b/lib/mnesia/doc/src/Mnesia_App_B.xmlsrc @@ -0,0 +1,41 @@ +<?xml version="1.0" encoding="latin1" ?> +<!DOCTYPE chapter SYSTEM "chapter.dtd"> + +<chapter> + <header> + <copyright> + <year>1997</year><year>2009</year> + <holder>Ericsson AB. All Rights Reserved.</holder> + </copyright> + <legalnotice> + The contents of this file are subject to the Erlang Public License, + Version 1.1, (the "License"); you may not use this file except in + compliance with the License. You should have received a copy of the + Erlang Public License along with this software. If not, it can be + retrieved online at http://www.erlang.org/. + + Software distributed under the License is distributed on an "AS IS" + basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + the License for the specific language governing rights and limitations + under the License. + + </legalnotice> + + <title>Appendix B: The Backup Call Back Interface</title> + <prepared>Claes Wikström, Hans Nilsson and Håkan Mattsson</prepared> + <responsible>Bjarne Däcker</responsible> + <docno></docno> + <approved>Bjarne Däcker</approved> + <checked>Bjarne Däcker</checked> + <date>97-05-27</date> + <rev>C</rev> + <file>Mnesia_App_B.xml</file> + </header> + + <section> + <title>mnesia_backup callback behavior</title> + <p></p> + <codeinclude file="../../src/mnesia_backup.erl" tag="%0" type="erl"></codeinclude> + </section> +</chapter> + diff --git a/lib/mnesia/doc/src/Mnesia_App_C.xmlsrc b/lib/mnesia/doc/src/Mnesia_App_C.xmlsrc new file mode 100644 index 0000000000..d8916f25cb --- /dev/null +++ b/lib/mnesia/doc/src/Mnesia_App_C.xmlsrc @@ -0,0 +1,43 @@ +<?xml version="1.0" encoding="latin1" ?> +<!DOCTYPE chapter SYSTEM "chapter.dtd"> + +<chapter> + <header> + <copyright> + <year>1998</year><year>2009</year> + <holder>Ericsson AB. All Rights Reserved.</holder> + </copyright> + <legalnotice> + The contents of this file are subject to the Erlang Public License, + Version 1.1, (the "License"); you may not use this file except in + compliance with the License. You should have received a copy of the + Erlang Public License along with this software. If not, it can be + retrieved online at http://www.erlang.org/. + + Software distributed under the License is distributed on an "AS IS" + basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + the License for the specific language governing rights and limitations + under the License. + + </legalnotice> + + <title>Appendix C: The Activity Access Call Back Interface</title> + <prepared>Håkan Mattsson</prepared> + <responsible></responsible> + <docno></docno> + <approved></approved> + <checked></checked> + <date></date> + <rev></rev> + <file>Mnesia_App_C.xml</file> + </header> + + <section> + <title>mnesia_access callback behavior</title> + <p></p> + <codeinclude file="../../src/mnesia_frag.erl" tag="%header_doc_include" type="erl"></codeinclude> + <p></p> + <codeinclude file="../../src/mnesia_frag.erl" tag="%impl_doc_include" type="erl"></codeinclude> + </section> +</chapter> + diff --git a/lib/mnesia/doc/src/Mnesia_App_D.xmlsrc b/lib/mnesia/doc/src/Mnesia_App_D.xmlsrc new file mode 100644 index 0000000000..d98680640d --- /dev/null +++ b/lib/mnesia/doc/src/Mnesia_App_D.xmlsrc @@ -0,0 +1,43 @@ +<?xml version="1.0" encoding="latin1" ?> +<!DOCTYPE chapter SYSTEM "chapter.dtd"> + +<chapter> + <header> + <copyright> + <year>2002</year><year>2009</year> + <holder>Ericsson AB. All Rights Reserved.</holder> + </copyright> + <legalnotice> + The contents of this file are subject to the Erlang Public License, + Version 1.1, (the "License"); you may not use this file except in + compliance with the License. You should have received a copy of the + Erlang Public License along with this software. If not, it can be + retrieved online at http://www.erlang.org/. + + Software distributed under the License is distributed on an "AS IS" + basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + the License for the specific language governing rights and limitations + under the License. + + </legalnotice> + + <title>Appendix D: The Fragmented Table Hashing Call Back Interface</title> + <prepared>Håkan Mattsson</prepared> + <responsible></responsible> + <docno></docno> + <approved></approved> + <checked></checked> + <date></date> + <rev></rev> + <file>Mnesia_App_D.xml</file> + </header> + + <section> + <title>mnesia_frag_hash callback behavior</title> + <p></p> + <codeinclude file="../../src/mnesia_frag_hash.erl" tag="%header_doc_include" type="erl"></codeinclude> + <p></p> + <codeinclude file="../../src/mnesia_frag_hash.erl" tag="%impl_doc_include" type="erl"></codeinclude> + </section> +</chapter> + diff --git a/lib/mnesia/doc/src/Mnesia_chap1.xml b/lib/mnesia/doc/src/Mnesia_chap1.xml new file mode 100644 index 0000000000..9af81c85cb --- /dev/null +++ b/lib/mnesia/doc/src/Mnesia_chap1.xml @@ -0,0 +1,265 @@ +<?xml version="1.0" encoding="latin1" ?> +<!DOCTYPE chapter SYSTEM "chapter.dtd"> + +<chapter> + <header> + <copyright> + <year>1997</year><year>2009</year> + <holder>Ericsson AB. All Rights Reserved.</holder> + </copyright> + <legalnotice> + The contents of this file are subject to the Erlang Public License, + Version 1.1, (the "License"); you may not use this file except in + compliance with the License. You should have received a copy of the + Erlang Public License along with this software. If not, it can be + retrieved online at http://www.erlang.org/. + + Software distributed under the License is distributed on an "AS IS" + basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + the License for the specific language governing rights and limitations + under the License. + + </legalnotice> + + <title>Introduction</title> + <prepared>Claes Wikström, Hans Nilsson and Håkan Mattsson</prepared> + <responsible>Bjarne Däcker</responsible> + <docno></docno> + <approved>Bjarne Däcker</approved> + <checked>Bjarne Däcker</checked> + <date></date> + <rev>C</rev> + <file>Mnesia_chap1.xml</file> + </header> + <p>This book describes the Mnesia DataBase Management + System (DBMS). <em>Mnesia</em> is a distributed Database Management + System, appropriate for telecommunications applications and other + Erlang applications which require continuous operation and soft + real-time properties. It is one section of the Open Telecom Platform + (OTP), which is a control system platform for building + telecommunications applications.</p> + + <section> + <title>About Mnesia</title> + <p>The management of data in telecommunications system has many + aspects whereof some, but not all, are addressed by traditional + commercial DBMSs (Data Base Management Systems). In particular the + very high level of fault tolerance which is required in many nonstop + systems, combined with requirements on the DBMS to run in the same + address space as the application, have led us to implement a brand new + DBMS. called Mnesia. Mnesia is implemented in, and very tightly + connected to, the programming language Erlang and it provides the + functionality that is necessary for the implementation of fault + tolerant telecommunications systems. Mnesia is a multiuser Distributed + DBMS specially made for industrial telecommunications applications + written in the symbolic programming language Erlang, which is also + the intended target language. Mnesia tries to address all of the data + management issues required for typical telecommunications systems and + it has a number of features that are not normally found in traditional + databases. <br></br> + + In telecommunications applications there are different needs + from the features provided by traditional DBMSs. The applications now + implemented in the Erlang language need a mixture of a broad range + of features, which generally are not satisfied by traditional DBMSs. + Mnesia is designed with requirements like the following in mind:</p> + <list type="ordered"> + <item>Fast real-time key/value lookup</item> + <item>Complicated non real-time queries mainly for + operation and maintenance</item> + <item>Distributed data due to distributed + applications</item> + <item>High fault tolerance</item> + <item>Dynamic re-configuration</item> + <item>Complex objects</item> + </list> + <p>What + sets Mnesia apart from most other DBMSs is that it is designed with + the typical data management problems of telecommunications applications + in mind. Hence Mnesia combines many concepts found in traditional + databases, such as transactions and queries with concepts found in data + management systems for telecommunications applications, such as very + fast real-time operations, configurable degree of fault tolerance (by + means of replication) and the ability to reconfigure the system without + stopping or suspending it. Mnesia is also interesting due to its tight + coupling to the programming language Erlang, thus almost turning Erlang + into a database programming language. This has many benefits, the + foremost is that + the impedance mismatch between data format used by the + DBMS and data format used by the programming language, which is used + to manipulate the data, completely disappears. <br></br> +</p> + </section> + + <section> + <title>The Mnesia DataBase Management System (DBMS)</title> + <p></p> + + <section> + <title>Features</title> + <p>Mnesia contains the following features which combine to produce a fault-tolerant, + distributed database management system written in Erlang: + </p> + <list type="bulleted"> + <item>Database schema can be dynamically reconfigured at runtime. + </item> + <item>Tables can be declared to have properties such as location, + replication, and persistence. + </item> + <item>Tables can be moved or replicated to several nodes to improve + fault tolerance. The rest of the system can still access the tables + to read, write, and delete records. + </item> + <item>Table locations are transparent to the programmer. + Programs address table names and the system itself keeps track of + table locations. + </item> + <item>Database transactions can be distributed, and a large number of + functions can be called within one transaction. + </item> + <item>Several transactions can run concurrently, and their execution is + fully synchronized by the database management system. + Mnesia ensures that no two processes manipulate data simultaneously. + </item> + <item>Transactions can be assigned the property of being executed on + all nodes in the system, or on none. Transactions can also be bypassed + in favor of running so called "dirty operations", which reduce + overheads and run very fast. + </item> + </list> + <p>Details of these features are described in the following sections.</p> + </section> + <p></p> + + <section> + <title>Add-on Applications</title> + <p>QLC and Mnesia Session can be used in conjunction with Mnesia to produce + specialized functions which enhance the operational ability of Mnesia. + Both Mnesia Session and QLC have their own documentation as part + of the OTP documentation set. Below are the main features of Mnesia Session + and QLC when used in conjunction with Mnesia:</p> + <list type="bulleted"> + <item><em>QLC</em> has the ability to optimize the query + compiler for the Mnesia Database Management System, essentially making + the DBMS more efficient.</item> + <item><em>QLC</em>, can be used as a database programming + language for Mnesia. It includes a notation called "list + comprehensions" and can be used to make complex database + queries over a set of tables.</item> + <item><em>Mnesia Session</em> is an interface for the Mnesia Database + Management System</item> + <item><em>Mnesia Session</em> enables access to the + Mnesia DBMS from foreign programming languages (i.e. other + languages than Erlang).</item> + </list> + <p></p> + + <section> + <title>When to Use Mnesia</title> + <p>Use Mnesia with the following types of applications: + </p> + <list type="bulleted"> + <item>Applications that need to replicate data. + </item> + <item>Applications that perform complicated searches on data. + </item> + <item>Applications that need to use atomic transactions to + update several records simultaneously. + </item> + <item>Applications that use soft real-time characteristics. + </item> + </list> + <p>On the other hand, Mnesia may not be appropriate with the + following types of applications: + </p> + <list type="bulleted"> + <item>Programs that process plain text or binary data files + </item> + <item>Applications that merely need a look-up dictionary + which can be stored to disc can utilize the standard + library module <c>dets</c>, which is a disc based version + of the module <c>ets</c>. + </item> + <item>Applications which need disc logging facilities can + utilize the module <c>disc_log</c> by preference. + </item> + <item>Not suitable for hard real time systems. + </item> + </list> + </section> + </section> + + <section> + <title>Scope and Purpose</title> + <p>This manual is included in the OTP document set. It describes + how to build Mnesia database applications, and how to integrate + and utilize the Mnesia database management system with + OTP. Programming constructs are described, and numerous + programming examples are included to illustrate the use of + Mnesia. + </p> + </section> + + <section> + <title>Prerequisites</title> + <p>Readers of this manual are assumed to be familiar with system + development principles and database management systems. Readers + are also assumed to be familiar with the Erlang programming + language.</p> + </section> + + <section> + <title>About This Book</title> + <p>This book contains the following chapters: + </p> + <list type="bulleted"> + <item>Chapter 2, "Getting Started with Mnesia", introduces + Mnesia with an example database. Examples are shown of how to + start an Erlang session, specify a Mnesia database directory, + initialize a database schema, start Mnesia, and create + tables. Initial prototyping of record definitions is also + discussed. + </item> + <item>Chapter 3, "Building a Mnesia Database", more formally + describes the steps introduced in Chapter 2, namely the Mnesia + functions which define a database schema, start Mnesia, and + create the required tables. + </item> + <item>Chapter 4, "Transactions and other access contexts", + describes the transactions properties which make Mnesia into a + fault tolerant, real-time distributed database management + system. This chapter also describes the concept of locking in + order to ensure consistency in tables, and so called "dirty + operations", or short cuts which bypass the transaction system + to improve speed and reduce overheads. + </item> + <item>Chapter 5, "Miscellaneous Mnesia Features", describes + features which enable the construction of more complex + database applications. These features includes indexing, + checkpoints, distribution and fault tolerance, disc-less + nodes, replication manipulation, local content tables, concurrency, + and object based programming in Mnesia. + </item> + <item>Chapter 6, "Mnesia System Information", describes the + files contained in the Mnesia database directory, database + configuration data, core and table dumps, as well as the + important subject of backup, fall-back, and disaster recovery + principles. + </item> + <item>Chapter 7, "Combining Mnesia with SNMP", is a short + chapter which outlines Mnesia integrated with SNMP. + </item> + <item>Appendix A, "Mnesia Errors Messages", lists Mnesia error + messages and their meanings. + </item> + <item>Appendix B, "The Backup Call Back Interface", is a + program listing of the default implementation of this facility. + </item> + <item>Appendix C, "The Activity Access Call Back Interface", + is a program outlining of one possible implementations of this facility. + </item> + </list> + </section> + </section> +</chapter> + diff --git a/lib/mnesia/doc/src/Mnesia_chap2.xmlsrc b/lib/mnesia/doc/src/Mnesia_chap2.xmlsrc new file mode 100644 index 0000000000..0714c7b645 --- /dev/null +++ b/lib/mnesia/doc/src/Mnesia_chap2.xmlsrc @@ -0,0 +1,647 @@ +<?xml version="1.0" encoding="latin1" ?> +<!DOCTYPE chapter SYSTEM "chapter.dtd"> + +<chapter> + <header> + <copyright> + <year>1997</year><year>2009</year> + <holder>Ericsson AB. All Rights Reserved.</holder> + </copyright> + <legalnotice> + The contents of this file are subject to the Erlang Public License, + Version 1.1, (the "License"); you may not use this file except in + compliance with the License. You should have received a copy of the + Erlang Public License along with this software. If not, it can be + retrieved online at http://www.erlang.org/. + + Software distributed under the License is distributed on an "AS IS" + basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + the License for the specific language governing rights and limitations + under the License. + + </legalnotice> + + <title>Getting Started with Mnesia</title> + <prepared>Claes Wikström, Hans Nilsson and Håkan Mattsson</prepared> + <responsible></responsible> + <docno></docno> + <approved></approved> + <checked></checked> + <date></date> + <rev>C</rev> + <file>Mnesia_chap2.xml</file> + </header> + <p>This chapter introduces Mnesia. Following a brief discussion + about the first initial setup, a Mnesia database example is + demonstrated. This database example will be referenced in the + following chapters, where this example is modified in order to + illustrate various program constructs. In this chapter, the + following mandatory procedures are illustrated by examples: + </p> + <list type="bulleted"> + <item>Starting an Erlang session and specifying a directory for the + Mnesia database. + </item> + <item>Initializing a database schema. + </item> + <item>Starting Mnesia and creating the required tables.</item> + </list> + + <section> + <title>Starting Mnesia for the first time</title> + <p>Following is a simplified demonstration of a Mnesia system startup. This is the dialogue from the Erlang + shell: + </p> + <pre><![CDATA[ + unix> erl -mnesia dir '"/tmp/funky"' + Erlang (BEAM) emulator version 4.9 + + Eshell V4.9 (abort with ^G) + 1> + 1> mnesia:create_schema([node()]). + ok + 2> mnesia:start(). + ok + 3> mnesia:create_table(funky, []). + {atomic,ok} + 4> mnesia:info(). + ---> Processes holding locks <--- + ---> Processes waiting for locks <--- + ---> Pending (remote) transactions <--- + ---> Active (local) transactions <--- + ---> Uncertain transactions <--- + ---> Active tables <--- + funky : with 0 records occupying 269 words of mem + schema : with 2 records occupying 353 words of mem + ===> System info in version "1.0", debug level = none <=== + opt_disc. Directory "/tmp/funky" is used. + use fall-back at restart = false + running db nodes = [nonode@nohost] + stopped db nodes = [] + remote = [] + ram_copies = [funky] + disc_copies = [schema] + disc_only_copies = [] + [{nonode@nohost,disc_copies}] = [schema] + [{nonode@nohost,ram_copies}] = [funky] + 1 transactions committed, 0 aborted, 0 restarted, 1 logged to disc + 0 held locks, 0 in queue; 0 local transactions, 0 remote + 0 transactions waits for other nodes: [] + ok + ]]></pre> + <p>In the example above the following actions were performed: + </p> + <list type="bulleted"> + <item>The Erlang system was started from the UNIX prompt + with a flag <c>-mnesia dir '"/tmp/funky"'</c>. This flag indicates + to Mnesia which directory will store the data. + </item> + <item>A new empty schema was initialized on the local node by evaluating + <c>mnesia:create_schema([node()]).</c> The schema contains + information about the database in general. This will be + thoroughly explained later on. + </item> + <item>The DBMS was started by evaluating <c>mnesia:start()</c>. + </item> + <item>A first table was created, called <c>funky</c> by evaluating + the expression <c>mnesia:create_table(funky, [])</c>. The table + was given default properties. + </item> + <item><c>mnesia:info()</c> was evaluated and subsequently displayed + information regarding the status of the database on the terminal. + </item> + </list> + </section> + + <section> + <title>An Introductory Example</title> + <p>A Mnesia database is organized as a set of tables. + Each table is populated with instances (Erlang records). + A table also has a number of properties, such as location and + persistence. + </p> + <p>In this example we shall: + </p> + <list type="bulleted"> + <item>Start an Erlang system, and specify the directory where + the database will be located. + </item> + <item>Initiate a new schema with an attribute that specifies + on which node, or nodes, the database will operate. + </item> + <item>Start Mnesia itself. + </item> + <item>Create and populate the database tables. + </item> + </list> + + <section> + <title>The Example Database</title> + </section> + <p>In this database example, we will create the database and + relationships depicted in the following diagram. We will call this + database the <em>Company</em> database. + </p> + <image file="company.gif"> + <icaption>Company Entity-Relation Diagram</icaption> + </image> + <p>The database model looks as follows: + </p> + <list type="bulleted"> + <item>There are three entities: employee, project, and + department. + </item> + <item> + <p>There are three relationships between these entities:</p> + <list type="ordered"> + <item>A department is managed by an employee, hence the + <em>manager</em> relationship. + </item> + <item>An employee works at a department, hence the + <em>at_dep</em> relationship. + </item> + <item>Each employee works on a number of projects, hence + the <em>in_proj</em> relationship. + </item> + </list> + </item> + </list> + + <section> + <title>Defining Structure and Content</title> + <p>We first enter our record definitions into a text file + named <c>company.hrl</c>. This file defines the following + structure for our sample database: + </p> + <codeinclude file="company.hrl" tag="%0" type="erl"></codeinclude> + <p>The structure defines six tables in our database. In Mnesia, + the function <c>mnesia:create_table(Name, ArgList)</c> is + used to create tables. <c>Name</c> is the table + name <em>Note:</em> The current version of Mnesia does + not require that the name of the table is the same as the record + name, See Chapter 4: + <seealso marker="Mnesia_chap4#recordnames_tablenames">Record Names Versus Table Names.</seealso></p> + <p>For example, the table + for employees will be created with the function + <c>mnesia:create_table(employee, [{attributes, record_info(fields, employee)}]).</c> The table + name <c>employee</c> matches the name for records specified + in <c>ArgList</c>. The expression <c>record_info(fields, RecordName)</c> is processed by the Erlang preprocessor and + evaluates to a list containing the names of the different + fields for a record. + </p> + </section> + + <section> + <title>The Program</title> + <p>The following shell interaction starts Mnesia and + initializes the schema for our <c>company</c> database: + </p> + <pre> + + % <input>erl -mnesia dir '"/ldisc/scratch/Mnesia.Company"'</input> + Erlang (BEAM) emulator version 4.9 + + Eshell V4.9 (abort with ^G) + 1> mnesia:create_schema([node()]). + ok + 2> mnesia:start(). + ok + </pre> + <p>The following program module creates and populates previously defined tables: + </p> + <codeinclude file="company.erl" tag="%0" type="erl"></codeinclude> + </section> + + <section> + <title>The Program Explained</title> + <p>The following commands and functions were used to initiate the + Company database: + </p> + <list type="bulleted"> + <item><c>% erl -mnesia dir '"/ldisc/scratch/Mnesia.Company"'.</c> This is a UNIX + command line entry which starts the Erlang system. The flag + <c>-mnesia dir Dir</c> specifies the location of the + database directory. The system responds and waits for + further input with the prompt <em>1></em>. + </item> + <item><c>mnesia:create_schema([node()]).</c> This function + has the format <c>mnesia:create_schema(DiscNodeList)</c> and + initiates a new schema. In this example, we have created a + non-distributed system using only one node. Schemas are fully + explained in Chapter 3:<seealso marker="Mnesia_chap3#def_schema">Defining a Schema</seealso>. + </item> + <item><c>mnesia:start().</c> This function starts + Mnesia. This function is fully explained in Chapter 3: + <seealso marker="Mnesia_chap3#start_mnesia">Starting Mnesia</seealso>. + </item> + </list> + <p>Continuing the dialogue with the Erlang shell will produce the following + the following: + </p> + <pre><![CDATA[ + 3> company:init(). + {atomic,ok} + 4> mnesia:info(). + ---> Processes holding locks <--- + ---> Processes waiting for locks <--- + ---> Pending (remote) transactions <--- + ---> Active (local) transactions <--- + ---> Uncertain transactions <--- + ---> Active tables <--- + in_proj : with 0 records occuping 269 words of mem + at_dep : with 0 records occuping 269 words of mem + manager : with 0 records occuping 269 words of mem + project : with 0 records occuping 269 words of mem + dept : with 0 records occuping 269 words of mem + employee : with 0 records occuping 269 words of mem + schema : with 7 records occuping 571 words of mem + ===> System info in version "1.0", debug level = none <=== + opt_disc. Directory "/ldisc/scratch/Mnesia.Company" is used. + use fall-back at restart = false + running db nodes = [nonode@nohost] + stopped db nodes = [] + remote = [] + ram_copies = + [at_dep,dept,employee,in_proj,manager,project] + disc_copies = [schema] + disc_only_copies = [] + [{nonode@nohost,disc_copies}] = [schema] + [{nonode@nohost,ram_copies}] = + [employee,dept,project,manager,at_dep,in_proj] + 6 transactions committed, 0 aborted, 0 restarted, 6 logged to disc + 0 held locks, 0 in queue; 0 local transactions, 0 remote + 0 transactions waits for other nodes: [] + ok + ]]></pre> + <p>A set of tables is created: + </p> + <list type="bulleted"> + <item><c>mnesia:create_table(Name,ArgList)</c>. This + function is used to create the required database tables. The + options available with <c>ArgList</c> are explained in + Chapter 3: <seealso marker="Mnesia_chap3#create_tables">Creating New Tables</seealso>. </item> + </list> + <p>The <c>company:init/0</c> function creates our tables. Two tables are + of type <c>bag</c>. This is the <c>manager</c> relation as well + the <c>in_proj</c> relation. This shall be interpreted as: An + employee can be manager over several departments, and an employee + can participate in several projects. However, the <c>at_dep</c> + relation is <c>set</c> because an employee can only work in one department. + In this data model we have examples of relations that are one-to-one (<c>set</c>), + as well as one-to-many (<c>bag</c>). + </p> + <p><c>mnesia:info()</c> now indicates that a database + which has seven local tables, of which, six are our + user defined tables and one is the schema. + Six transactions have been committed, as six successful transactions were run when + creating the tables. + </p> + <p>To write a function which inserts an employee record into the database, there must be an + <c>at_dep</c> record and a set of <c>in_proj</c> records inserted. Examine the following + code used to complete this action: + </p> + <codeinclude file="company.erl" tag="%1" type="erl"></codeinclude> + <list type="bulleted"> + <item> + <p><c>insert_emp(Emp, DeptId, ProjNames) -></c>. The + <c>insert_emp/3</c> arguments are:</p> + <list type="ordered"> + <item><c>Emp</c> is an employee record. + </item> + <item><c>DeptId</c> is the identity of the department where the employee is working. + </item> + <item><c>ProjNames</c> is a list of the names of the projects where the employee are working.</item> + </list> + </item> + </list> + <p>The <c>insert_emp(Emp, DeptId, ProjNames) -></c> function + creates a <em>functional object</em>. Functional objects + are identified by the term <c>Fun</c>. The Fun is passed + as a single argument to the function + <c>mnesia:transaction(Fun)</c>. This means that Fun is + run as a transaction with the following properties: + </p> + <list type="bulleted"> + <item>Fun either succeeds or fails completely. + </item> + <item>Code which manipulates the same data records can be + run concurrently without the different processes interfering + with each other. + </item> + </list> + <p>The function can be used as:</p> + <code type="none"> + Emp = #employee{emp_no= 104732, + name = klacke, + salary = 7, + sex = male, + phone = 98108, + room_no = {221, 015}}, + insert_emp(Me, 'B/SFR', [Erlang, mnesia, otp]). + </code> + <note> + <p>Functional Objects (Funs) are described in the + Erlang Reference Manual, "Fun Expressions". + </p> + </note> + </section> + + <section> + <title>Initial Database Content</title> + <p>After the insertion of the employee named <c>klacke</c> + we have the following records in the database: + </p> + <marker id="table2_1"></marker> + <table> + <row> + <cell align="left" valign="middle">emp_no</cell> + <cell align="left" valign="middle">name</cell> + <cell align="left" valign="middle">salary</cell> + <cell align="left" valign="middle">sex</cell> + <cell align="left" valign="middle">phone</cell> + <cell align="left" valign="middle">room_no</cell> + </row> + <row> + <cell align="left" valign="middle">104732</cell> + <cell align="left" valign="middle">klacke</cell> + <cell align="left" valign="middle">7</cell> + <cell align="left" valign="middle">male</cell> + <cell align="left" valign="middle">99586</cell> + <cell align="left" valign="middle">{221, 015}</cell> + </row> + <tcaption> +Employee</tcaption> + </table> + <p>An employee record has the following Erlang record/tuple + representation: <c>{employee, 104732, klacke, 7, male, 98108, {221, 015}}</c></p> + <marker id="table2_2"></marker> + <table> + <row> + <cell align="left" valign="middle">emp</cell> + <cell align="left" valign="middle">dept_name</cell> + </row> + <row> + <cell align="left" valign="middle">klacke</cell> + <cell align="left" valign="middle">B/SFR</cell> + </row> + <tcaption> +At_dep</tcaption> + </table> + <p>At_dep has the following Erlang tuple representation: + <c>{at_dep, klacke, 'B/SFR'}</c>. + </p> + <marker id="table3_3"></marker> + <table> + <row> + <cell align="left" valign="middle">emp</cell> + <cell align="left" valign="middle">proj_name</cell> + </row> + <row> + <cell align="left" valign="middle">klacke</cell> + <cell align="left" valign="middle">Erlang</cell> + </row> + <row> + <cell align="left" valign="middle">klacke</cell> + <cell align="left" valign="middle">otp</cell> + </row> + <row> + <cell align="left" valign="middle">klacke</cell> + <cell align="left" valign="middle">mnesia</cell> + </row> + <tcaption> +In_proj</tcaption> + </table> + <p>In_proj has the following Erlang tuple representation: + <c>{in_proj, klacke, 'Erlang', klacke, 'otp', klacke, 'mnesia'}</c></p> + <p>There is no difference between rows in a table and Mnesia + records. Both concepts are the same and will be used + interchangeably throughout this book. + </p> + <p>A Mnesia table is populated by Mnesia records. For example, + the tuple <c>{boss, klacke, bjarne}</c> is an record. The + second element in this tuple is the key. In order to uniquely + identify a table row both the key and the table name is + needed. The term <em>object identifier</em>, + (oid) is sometimes used for the arity two tuple {Tab, Key}. The oid for + the <c>{boss, klacke, bjarne}</c> record is the arity two + tuple <c>{boss, klacke}</c>. The first element of the tuple is + the type of the record and the second element is the key. An + oid can lead to zero, one, or more records depending on + whether the table type is <c>set</c> or <c>bag</c>. + </p> + <p>We were also able to insert the <c>{boss, klacke, bjarne}</c> record which contains an implicit reference to + another employee which does not yet exist in the + database. Mnesia does not enforce this. + </p> + </section> + + <section> + <title>Adding Records and Relationships to the Database</title> + <p>After adding additional record to the Company database, we + may end up with the following records: + </p> + <p><em>Employees</em></p> + <code type="none"> + {employee, 104465, "Johnson Torbjorn", 1, male, 99184, {242,038}}. + {employee, 107912, "Carlsson Tuula", 2, female,94556, {242,056}}. + {employee, 114872, "Dacker Bjarne", 3, male, 99415, {221,035}}. + {employee, 104531, "Nilsson Hans", 3, male, 99495, {222,026}}. + {employee, 104659, "Tornkvist Torbjorn", 2, male, 99514, {222,022}}. + {employee, 104732, "Wikstrom Claes", 2, male, 99586, {221,015}}. + {employee, 117716, "Fedoriw Anna", 1, female,99143, {221,031}}. + {employee, 115018, "Mattsson Hakan", 3, male, 99251, {203,348}}. + </code> + <p><em>Dept</em></p> + <code type="none"> + + {dept, 'B/SF', "Open Telecom Platform"}. + {dept, 'B/SFP', "OTP - Product Development"}. + {dept, 'B/SFR', "Computer Science Laboratory"}. + </code> + <p><em>Projects</em></p> + <code type="none"> + %% projects + {project, erlang, 1}. + {project, otp, 2}. + {project, beam, 3}. + {project, mnesia, 5}. + {project, wolf, 6}. + {project, documentation, 7}. + {project, www, 8}. + </code> + <p>The above three tables, titled <c>employees</c>, + <c>dept</c>, and <c>projects</c>, are the tables which are + made up of real records. The following database content is + stored in the tables which is built on + relationships. These tables are titled <c>manager</c>, + <c>at_dep</c>, and <c>in_proj</c>. + </p> + <p><em>Manager</em></p> + <code type="none"> + + {manager, 104465, 'B/SF'}. + {manager, 104465, 'B/SFP'}. + {manager, 114872, 'B/SFR'}. + </code> + <p><em>At_dep</em></p> + <code type="none"> + {at_dep, 104465, 'B/SF'}. + {at_dep, 107912, 'B/SF'}. + {at_dep, 114872, 'B/SFR'}. + {at_dep, 104531, 'B/SFR'}. + {at_dep, 104659, 'B/SFR'}. + {at_dep, 104732, 'B/SFR'}. + {at_dep, 117716, 'B/SFP'}. + {at_dep, 115018, 'B/SFP'}. + </code> + <p><em>In_proj</em></p> + <code type="none"> + {in_proj, 104465, otp}. + {in_proj, 107912, otp}. + {in_proj, 114872, otp}. + {in_proj, 104531, otp}. + {in_proj, 104531, mnesia}. + {in_proj, 104545, wolf}. + {in_proj, 104659, otp}. + {in_proj, 104659, wolf}. + {in_proj, 104732, otp}. + {in_proj, 104732, mnesia}. + {in_proj, 104732, erlang}. + {in_proj, 117716, otp}. + {in_proj, 117716, documentation}. + {in_proj, 115018, otp}. + {in_proj, 115018, mnesia}. + </code> + <p>The room number is an attribute of the employee + record. This is a structured attribute which consists of a + tuple. The first element of the tuple identifies a corridor, + and the second element identifies the actual room in the + corridor. We could have chosen to represent this as a record + <c>-record(room, {corr, no}).</c> instead of an anonymous + tuple representation. + </p> + <p>The Company database is now initialized and contains + data. </p> + </section> + + <section> + <title>Writing Queries</title> + <p>Retrieving data from DBMS should usually be done with <c>mnesia:read/3</c> or + <c>mnesia:read/1</c> functions. The following function raises the salary:</p> + <codeinclude file="company.erl" tag="%5" type="erl"></codeinclude> + <p>Since we want to update the record using <c>mnesia:write/1</c> after we have + increased the salary we acquire a write lock (third argument to read) when we read the + record from the table. + </p> + <p>It is not always the case that we can directly read the values from the table, + we might need to search the table or several tables to get the data we want, this + is done by writing database queries. Queries are always more expensive operations + than direct lookups done with <c>mnesia:read</c> and should be avoided in performance + critical code.</p> + <p>There are two methods for writing database queries: + </p> + <list type="bulleted"> + <item>Mnesia functions + </item> + <item>QLC</item> + </list> + + <section> + <title>Mnesia functions </title> + <p></p> + <p>The following function extracts the names of the female employees + stored in the database: + </p> + <pre> +\011 mnesia:select(employee, [{#employee{sex = female, name = '$1', _ = '_'},[], ['$1']}]). + </pre> + <p>Select must always run within an activity such as a + transaction. To be able to call from the shell we might + construct a function as: + </p> + <codeinclude file="company.erl" tag="%20" type="erl"></codeinclude> + <p>The select expression matches all entries in table employee with + the field sex set to female. + </p> + <p>This function can be called from the shell as follows: + </p> + <pre> + (klacke@gin)1> <input>company:all_females().</input> + {atomic, ["Carlsson Tuula", "Fedoriw Anna"]} + </pre> + <p>See also the <seealso marker="Mnesia_chap4#matching">Pattern Matching </seealso> + chapter for a description of select and its syntax. + </p> + </section> + + <section> + <title>Using QLC </title> + <p>This section contains simple introductory examples + only. Refer to <em>QLC reference manual</em> for a + full description of the QLC query language. Using QLC + might be more expensive than using Mnesia functions directly but + offers a nice syntax. + </p> + <p>The following function extracts a list of female employees + from the database: + </p> + <pre> + Q = qlc:q([E#employee.name || E <![CDATA[<-]]> mnesia:table(employee), +\011 E#employee.sex == female]), +\011 qlc:e(Q), + </pre> + <p>Accessing mnesia tables from a QLC list comprehension must + always be done within a transaction. Consider the following + function: + </p> + <codeinclude file="company.erl" tag="%2" type="erl"></codeinclude> + <p>This function can be called from the shell as follows: + </p> + <pre> + (klacke@gin)1> <input>company:females().</input> + {atomic, ["Carlsson Tuula", "Fedoriw Anna"]} + </pre> + <p>In traditional relational database terminology, the above + operation would be called a selection, followed by a projection. + </p> + <p>The list comprehension expression shown above contains a + number of syntactical elements. + </p> + <list type="bulleted"> + <item>the first <c>[</c> bracket should be read as "build the + list" + </item> + <item>the <c>||</c> "such that" and the arrow <c><![CDATA[<-]]></c> should + be read as "taken from" + </item> + </list> + <p>Hence, the above list comprehension demonstrates the + formation of the list <c>E#employee.name</c> such that <c>E</c> is + taken from the table of employees and the <c>sex</c> attribute + of each records is equal with the atom <c>female</c>. + </p> + <p>The whole list comprehension must be given to the + <c>qlc:q/1</c> function. + </p> + <p>It is possible to combine list comprehensions with low + level Mnesia functions in the same transaction. If we want to + raise the salary of all female employees we execute: + </p> + <codeinclude file="company.erl" tag="%4" type="erl"></codeinclude> + <p>The function <c>raise_females/1</c> returns the tuple + <c>{atomic, Number}</c>, where <c>Number</c> is the number of + female employees who received a salary increase. Should an error + occur, the value <c>{aborted, Reason}</c> is returned. In the + case of an error, Mnesia guarantees that the salary is not + raised for any employees at all. + </p> + <pre> + + 33><input>company:raise_females(33).</input> + {atomic,2} + </pre> + </section> + </section> + </section> +</chapter> + diff --git a/lib/mnesia/doc/src/Mnesia_chap3.xml b/lib/mnesia/doc/src/Mnesia_chap3.xml new file mode 100644 index 0000000000..9a382bcb5a --- /dev/null +++ b/lib/mnesia/doc/src/Mnesia_chap3.xml @@ -0,0 +1,556 @@ +<?xml version="1.0" encoding="latin1" ?> +<!DOCTYPE chapter SYSTEM "chapter.dtd"> + +<chapter> + <header> + <copyright> + <year>1997</year><year>2009</year> + <holder>Ericsson AB. All Rights Reserved.</holder> + </copyright> + <legalnotice> + The contents of this file are subject to the Erlang Public License, + Version 1.1, (the "License"); you may not use this file except in + compliance with the License. You should have received a copy of the + Erlang Public License along with this software. If not, it can be + retrieved online at http://www.erlang.org/. + + Software distributed under the License is distributed on an "AS IS" + basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + the License for the specific language governing rights and limitations + under the License. + + </legalnotice> + + <title>Building A Mnesia Database</title> + <prepared></prepared> + <responsible></responsible> + <docno></docno> + <approved></approved> + <checked></checked> + <date></date> + <rev></rev> + <file>Mnesia_chap3.xml</file> + </header> + <p>This chapter details the basic steps involved when designing + a Mnesia database and the programming constructs which make different + solutions available to the programmer. The chapter includes the following + sections: + </p> + <list type="bulleted"> + <item>defining a schema</item> + <item>the datamodel</item> + <item>starting Mnesia</item> + <item>creating new tables.</item> + </list> + + <section> + <marker id="def_schema"></marker> + <title>Defining a Schema</title> + <p>The configuration of a Mnesia system is described in the + schema. The schema is a special table which contains information + such as the table names and each table's + storage type, (i.e. whether a table should be stored in RAM, + on disc or possibly on both, as well as its location). + </p> + <p>Unlike data tables, information contained in schema tables can only be + accessed and modified by using the schema related functions + described in this section. + </p> + <p>Mnesia has various functions for defining the + database schema. It is possible to move tables, delete tables, + or reconfigure the layout of tables. + </p> + <p>An important aspect of these functions is that the system can access a + table while it is being reconfigured. For example, it is possible to move a + table and simultaneously perform write operations to the same + table. This feature is essential for applications that require + continuous service. + </p> + <p>The following section describes the functions available for schema management, + all of which return a tuple: + </p> + <list type="bulleted"> + <item><c>{atomic, ok}</c>; or, + </item> + <item><c>{aborted, Reason}</c> if unsuccessful.</item> + </list> + + <section> + <title>Schema Functions</title> + <list type="bulleted"> + <item><c>mnesia:create_schema(NodeList)</c>. This function is + used to initialize a new, empty schema. This is a mandatory + requirement before Mnesia can be started. Mnesia is a truly + distributed DBMS and the schema is a system table that is + replicated on all nodes in a Mnesia system. + The function will fail if a schema is already present on any of + the nodes in <c>NodeList</c>. This function requires Mnesia + to be stopped on the all + <c>db_nodes</c> contained in the parameter <c>NodeList</c>. + Applications call this function only once, + since it is usually a one-time activity to initialize a new + database. + </item> + <item><c>mnesia:delete_schema(DiscNodeList)</c>. This function + erases any old schemas on the nodes in + <c>DiscNodeList</c>. It also removes all old tables together + with all data. This function requires Mnesia to be stopped + on all <c>db_nodes</c>. + </item> + <item><c>mnesia:delete_table(Tab)</c>. This function + permanently deletes all replicas of table <c>Tab</c>. + </item> + <item><c>mnesia:clear_table(Tab)</c>. This function + permanently deletes all entries in table <c>Tab</c>. + </item> + <item><c>mnesia:move_table_copy(Tab, From, To)</c>. This + function moves the copy of table <c>Tab</c> from node + <c>From</c> to node <c>To</c>. The table storage type, + <c>{type}</c> is preserved, so if a RAM table is moved from + one node to another node, it remains a RAM table on the new + node. It is still possible for other transactions to perform + read and write operation to the table while it is being + moved. + </item> + <item><c>mnesia:add_table_copy(Tab, Node, Type)</c>. This + function creates a replica of the table <c>Tab</c> at node + <c>Node</c>. The <c>Type</c> argument must be either of the + atoms <c>ram_copies</c>, <c>disc_copies</c>, or + <c>disc_only_copies</c>. If we add a copy of the system + table <c>schema</c> to a node, this means that we want the + Mnesia schema to reside there as well. This action then + extends the set of nodes that comprise this particular + Mnesia system. + </item> + <item><c>mnesia:del_table_copy(Tab, Node)</c>. This function + deletes the replica of table <c>Tab</c> at node <c>Node</c>. + When the last replica of a table is removed, the table is + deleted. + </item> + <item> + <p><c>mnesia:transform_table(Tab, Fun, NewAttributeList, NewRecordName)</c>. This + function changes the format on all records in table + <c>Tab</c>. It applies the argument <c>Fun</c> to all + records in the table. <c>Fun</c> shall be a function which + takes an record of the old type, and returns the record of the new + type. The table key may not be changed.</p> + <code type="none"> +-record(old, {key, val}). +-record(new, {key, val, extra}). + +Transformer = + fun(X) when record(X, old) -> + #new{key = X#old.key, + val = X#old.val, + extra = 42} + end, +{atomic, ok} = mnesia:transform_table(foo, Transformer, + record_info(fields, new), + new), + </code> + <p>The <c>Fun</c> argument can also be the atom + <c>ignore</c>, it indicates that only the meta data about the table will + be updated. Usage of <c>ignore</c> is not recommended (since it creates + inconsistencies between the meta data and the actual data) but included + as a possibility for the user do to his own (off-line) transform.</p> + </item> + <item><c>change_table_copy_type(Tab, Node, ToType)</c>. This + function changes the storage type of a table. For example, a + RAM table is changed to a disc_table at the node specified + as <c>Node</c>.</item> + </list> + </section> + </section> + + <section> + <title>The Data Model</title> + <p>The data model employed by Mnesia is an extended + relational data model. Data is organized as a set of + tables and relations between different data records can + be modeled as additional tables describing the actual + relationships. + Each table contains instances of Erlang records + and records are represented as Erlang tuples. + </p> + <p>Object identifiers, also known as oid, are made up of a table name and a key. + For example, if we have an employee record represented by the tuple + <c>{employee, 104732, klacke, 7, male, 98108, {221, 015}}</c>. + This record has an object id, (Oid) which is the tuple + <c>{employee, 104732}</c>. + </p> + <p>Thus, each table is made up of records, where the first element + is a record name and the second element of the table is a key + which identifies the particular record in that table. The + combination of the table name and a key, is an arity two tuple + <c>{Tab, Key}</c> called the Oid. See Chapter 4:<seealso marker="Mnesia_chap4#recordnames_tablenames">Record Names Versus Table Names</seealso>, for more information + regarding the relationship between the record name and the table + name. + </p> + <p>What makes the Mnesia data model an extended relational model + is the ability to store arbitrary Erlang terms in the attribute + fields. One attribute value could for example be a whole tree of + oids leading to other terms in other tables. This + type of record is hard to model in traditional relational + DBMSs.</p> + </section> + + <section> + <marker id="start_mnesia"></marker> + <title>Starting Mnesia</title> + <p>Before we can start Mnesia, we must initialize an empty schema + on all the participating nodes. + </p> + <list type="bulleted"> + <item>The Erlang system must be started. + </item> + <item>Nodes with disc database schema must be defined and + implemented with the function <c>create_schema(NodeList).</c></item> + </list> + <p>When running a distributed system, with two or more + participating nodes, then the <c>mnesia:start( ).</c> function + must be executed on each participating node. Typically this would + be part of the boot script in an embedded environment. + In a test environment or an interactive environment, + <c>mnesia:start()</c> can also be used either from the + Erlang shell, or another program. + </p> + + <section> + <title>Initializing a Schema and Starting Mnesia</title> + <p>To use a known example, we illustrate how to run the + Company database described in Chapter 2 on two separate nodes, + which we call <c>a@gin</c> and <c>b@skeppet</c>. Each of these + nodes must have have a Mnesia directory as well as an + initialized schema before Mnesia can be started. There are two + ways to specify the Mnesia directory to be used: + </p> + <list type="bulleted"> + <item> + <p>Specify the Mnesia directory by providing an application + parameter either when starting the Erlang shell or in the + application script. Previously the following example was used + to create the directory for our Company database:</p> + <pre> +%<input>erl -mnesia dir '"/ldisc/scratch/Mnesia.Company"'</input> + </pre> + </item> + <item>If no command line flag is entered, then the Mnesia + directory will be the current working directory on the node + where the Erlang shell is started.</item> + </list> + <p>To start our Company database and get it running on the two + specified nodes, we enter the following commands: + </p> + <list type="ordered"> + <item> + <p>On the node called gin:</p> + <pre> + gin %<input>erl -sname a -mnesia dir '"/ldisc/scratch/Mnesia.company"'</input> + </pre> + </item> + <item> + <p>On the node called skeppet:</p> + <pre> +skeppet %<input>erl -sname b -mnesia dir '"/ldisc/scratch/Mnesia.company"'</input> + </pre> + </item> + <item> + <p>On one of the two nodes:</p> + <pre> +(a@gin1)><input>mnesia:create_schema([a@gin, b@skeppet]).</input> + </pre> + </item> + <item>The function <c>mnesia:start()</c> is called on both + nodes. + </item> + <item> + <p>To initialize the database, execute the following + code on one of the two nodes.</p> + <codeinclude file="company.erl" tag="%12" type="erl"></codeinclude> + </item> + </list> + <p>As illustrated above, the two directories reside on different nodes, because the + <c>/ldisc/scratch</c> (the "local" disc) exists on the two different + nodes. + </p> + <p>By executing these commands we have configured two Erlang + nodes to run the Company database, and therefore, initialize the + database. This is required only once when setting up, the next time the + system is started <c>mnesia:start()</c> is called + on both nodes, to initialize the system from disc. + </p> + <p>In a system of Mnesia nodes, every node is aware of the + current location of all tables. In this example, data is + replicated on both nodes and functions which manipulate the + data in our tables can be executed on either of the two nodes. + Code which manipulate Mnesia data behaves identically + regardless of where the data resides. + </p> + <p>The function <c>mnesia:stop()</c> stops Mnesia on the node + where the function is executed. Both the <c>start/0</c> and + the <c>stop/0</c> functions work on the "local" Mnesia system, + and there are no functions which start or stop a set of nodes. + </p> + </section> + + <section> + <title>The Start-Up Procedure</title> + <p>Mnesia is started by calling the following function: + </p> + <code type="none"> + mnesia:start(). + </code> + <p>This function initiates the DBMS locally. </p> + <p>The choice of configuration will alter the location and load + order of the tables. The alternatives are listed below: <br></br> +</p> + <list type="ordered"> + <item>Tables that are stored locally only, are initialized + from the local Mnesia directory. + </item> + <item>Replicated tables that reside locally + as well as somewhere else are either initiated from disc or + by copying the entire table from the other node depending on + which of the different replicas is the most recent. Mnesia + determines which of the tables is the most recent. + </item> + <item>Tables that reside on remote nodes are available to other nodes as soon + as they are loaded.</item> + </list> + <p>Table initialization is asynchronous, the function + call <c>mnesia:start()</c> returns the atom <c>ok</c> and + then starts to initialize the different tables. Depending on + the size of the database, this may take some time, and the + application programmer must wait for the tables that the + application needs before they can be used. This achieved by using + the function:</p> + <list type="bulleted"> + <item><c>mnesia:wait_for_tables(TabList, Timeout)</c></item> + </list> + <p>This function suspends the caller until all tables + specified in <c>TabList</c> are properly initiated. + </p> + <p>A problem can arise if a replicated table on one node is + initiated, but Mnesia deduces that another (remote) + replica is more recent than the replica existing on + the local node, the initialization procedure will not proceed. + In this situation, a call to to + <c>mnesia:wait_for_tables/2</c> suspends the caller until the + remote node has initiated the table from its local disc and + the node has copied the table over the network to the local node. + </p> + <p>This procedure can be time consuming however, the shortcut function + shown below will load all the tables from disc at a faster rate: + </p> + <list type="bulleted"> + <item><c>mnesia:force_load_table(Tab)</c>. This function forces + tables to be loaded from disc regardless of the network + situation.</item> + </list> + <p>Thus, we can assume that if an application + wishes to use tables <c>a</c> and <c>b</c>, then the + application must perform some action similar to the below code before it can utilize the tables. + </p> + <pre> + case mnesia:wait_for_tables([a, b], 20000) of + {timeout, RemainingTabs} -> + panic(RemainingTabs); + ok -> + synced + end. + </pre> + <warning> + <p>When tables are forcefully loaded from the local disc, + all operations that were performed on the replicated table + while the local node was down, and the remote replica was + alive, are lost. This can cause the database to become + inconsistent.</p> + </warning> + <p>If the start-up procedure fails, the + <c>mnesia:start()</c> function returns the cryptic tuple + <c>{error,{shutdown, {mnesia_sup,start,[normal,[]]}}}</c>. + Use command line arguments -boot start_sasl as argument to + the erl script in order to get more information + about the start failure. + </p> + </section> + </section> + + <section> + <marker id="create_tables"></marker> + <title>Creating New Tables</title> + <p>Mnesia provides one function to create new tables. This + function is: <c>mnesia:create_table(Name, ArgList).</c></p> + <p>When executing this function, it returns one of the following + responses: + </p> + <list type="bulleted"> + <item><c>{atomic, ok}</c> if the function executes + successfully + </item> + <item><c>{aborted, Reason}</c> if the function fails. + </item> + </list> + <p>The function arguments are: + </p> + <list type="bulleted"> + <item><c>Name</c> is the atomic name of the table. It is + usually the same name as the name of the records that + constitute the table. (See <c>record_name</c> for more + details.) + </item> + <item> + <p><c>ArgList</c> is a list of <c>{Key,Value}</c> tuples. + The following arguments are valid: + </p> + <list type="bulleted"> + <item> + <p><c>{type, Type}</c> where <c>Type</c> must be either of the + atoms <c>set</c>, <c>ordered_set</c> or <c>bag</c>. + The default value is + <c>set</c>. Note: currently 'ordered_set' + is not supported for 'disc_only_copies' tables. + A table of type <c>set</c> or <c>ordered_set</c> has either zero or + one record per key. Whereas a table of type <c>bag</c> can + have an arbitrary number of records per key. The key for + each record is always the first attribute of the record.</p> + <p>The following example illustrates the difference between + type <c>set</c> and <c>bag</c>: </p> + <pre> + f() -> F = fun() -> +\011 mnesia:write({foo, 1, 2}), mnesia:write({foo, 1, 3}), +\011 mnesia:read({foo, 1}) end, mnesia:transaction(F). </pre> + <p>This transaction will return the list <c>[{foo,1,3}]</c> if + the <c>foo</c> table is of type <c>set</c>. However, list + <c>[{foo,1,2}, {foo,1,3}]</c> will return if the table is + of type <c>bag</c>. Note the use of <c>bag</c> and + <c>set</c> table types. </p> + <p>Mnesia tables can never contain + duplicates of the same record in the same table. Duplicate + records have attributes with the same contents and key. + </p> + </item> + <item> + <p><c>{disc_copies, NodeList}</c>, where <c>NodeList</c> is a + list of the nodes where this table will reside on disc.</p> + <p>Write operations to a table replica of type + <c>disc_copies</c> will write data to the disc copy as well + as to the RAM copy of the table. </p> + <p>It is possible to have a + replicated table of type <c>disc_copies</c> on one node, and + the same table stored as a different type on another node. + The default value is <c>[]</c>. This arrangement is + desirable if we want the following operational + characteristics are required:</p> + <list type="ordered"> + <item>read operations must be very fast and performed in RAM + </item> + <item>all write operations must be written to persistent + storage.</item> + </list> + <p>A write operation on a <c>disc_copies</c> table + replica will be performed in two steps. First the write + operation is appended to a log file, then the actual + operation is performed in RAM. + </p> + </item> + <item> + <p><c>{ram_copies, NodeList}</c>, where <c>NodeList</c> is a + list of the nodes where this table is stored in RAM. The + default value for <c>NodeList</c> is <c>[node()]</c>. If the + default value is used to create a new table, it will be + located on the local node only. </p> + <p>Table replicas of type + <c>ram_copies</c> can be dumped to disc with the function + <c>mnesia:dump_tables(TabList)</c>. + </p> + </item> + <item><c>{disc_only_copies, NodeList}</c>. These table + replicas are stored on disc only and are therefore slower to + access. However, a disc only replica consumes less memory than + a table replica of the other two storage types. + </item> + <item><c>{index, AttributeNameList}</c>, where + <c>AttributeNameList</c> is a list of atoms specifying the + names of the attributes Mnesia shall build and maintain. An + index table will exist for every element in the list. The + first field of a Mnesia record is the key and thus need no + extra index. + <br></br> +The first field of a record is the second element of the + tuple, which is the representation of the record. + </item> + <item><c>{snmp, SnmpStruct}</c>. <c>SnmpStruct</c> is + described in the SNMP User Guide. Basically, if this attribute + is present in <c>ArgList</c> of <c>mnesia:create_table/2</c>, + the table is immediately accessible by means of the Simple + Network Management Protocol (SNMP). + <br></br> +It is easy to design applications which use SNMP to + manipulate and control the system. Mnesia provides a direct + mapping between the logical tables that make up an SNMP + control application and the physical data which make up a + Mnesia table. <c>[]</c> + is default. + </item> + <item><c>{local_content, true}</c> When an application needs a + table whose contents should be locally unique on each + node, + <c>local_content</c> tables may be used. The name of the + table is known to all Mnesia nodes, but its contents is + unique for each node. Access to this type of table must be + done locally. </item> + <item> + <p><c>{attributes, AtomList}</c> is a list of the attribute + names for the records that are supposed to populate the + table. The default value is the list <c>[key, val]</c>. The + table must at least have one extra attribute besides the + key. When accessing single attributes in a record, it is not + recommended to hard code the attribute names as atoms. Use + the construct <c>record_info(fields,record_name)</c> + instead. The expression + <c>record_info(fields,record_name)</c> is processed by the + Erlang macro pre-processor and returns a list of the + record's field names. With the record definition + <c>-record(foo, {x,y,z}).</c> the expression + <c>record_info(fields,foo)</c> is expanded to the list + <c>[x,y,z]</c>. Accordingly, it is possible to provide the + attribute names yourself, or to use the <c>record_info/2</c> + notation. </p> + <p>It is recommended that + the <c>record_info/2</c> notation be used as it is easier to + maintain the program and it will be more robust with regards + to future record changes. + </p> + </item> + <item> + <p><c>{record_name, Atom}</c> specifies the common name of + all records stored in the table. All records, stored in + the table, must have this name as their first element. + The <c>record_name</c> defaults to the name of the + table. For more information see Chapter 4:<seealso marker="Mnesia_chap4#recordnames_tablenames">Record Names Versus Table Names</seealso>.</p> + </item> + </list> + </item> + </list> + <p>As an example, assume we have the record definition:</p> + <pre> + -record(funky, {x, y}). + </pre> + <p>The below call would create a table which is replicated on two + nodes, has an additional index on the <c>y</c> attribute, and is + of type + <c>bag</c>.</p> + <pre> + mnesia:create_table(funky, [{disc_copies, [N1, N2]}, {index, + [y]}, {type, bag}, {attributes, record_info(fields, funky)}]). + </pre> + <p>Whereas a call to the below default code values: </p> + <pre> +mnesia:create_table(stuff, []) </pre> + <p>would return a table with a RAM copy on the + local node, no additional indexes and the attributes defaulted to + the list <c>[key,val]</c>.</p> + </section> +</chapter> + diff --git a/lib/mnesia/doc/src/Mnesia_chap4.xmlsrc b/lib/mnesia/doc/src/Mnesia_chap4.xmlsrc new file mode 100644 index 0000000000..7d89c1b0dd --- /dev/null +++ b/lib/mnesia/doc/src/Mnesia_chap4.xmlsrc @@ -0,0 +1,1171 @@ +<?xml version="1.0" encoding="latin1" ?> +<!DOCTYPE chapter SYSTEM "chapter.dtd"> + +<chapter> + <header> + <copyright> + <year>1997</year><year>2009</year> + <holder>Ericsson AB. All Rights Reserved.</holder> + </copyright> + <legalnotice> + The contents of this file are subject to the Erlang Public License, + Version 1.1, (the "License"); you may not use this file except in + compliance with the License. You should have received a copy of the + Erlang Public License along with this software. If not, it can be + retrieved online at http://www.erlang.org/. + + Software distributed under the License is distributed on an "AS IS" + basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + the License for the specific language governing rights and limitations + under the License. + + </legalnotice> + + <title>Transactions and Other Access Contexts</title> + <prepared>Claes Wikström, Hans Nilsson and Håkan Mattsson</prepared> + <responsible></responsible> + <docno></docno> + <approved></approved> + <checked></checked> + <date></date> + <rev></rev> + <file>Mnesia_chap4.xml</file> + </header> + <p>This chapter describes the Mnesia transaction system and the + transaction properties which make Mnesia a fault tolerant, + distributed database management system. + </p> + <p>Also covered in this chapter are the locking functions, + including table locks and sticky locks, as well as alternative + functions which bypass the transaction system in favor of improved + speed and reduced overheads. These functions are called "dirty + operations". We also describe the usage of nested transactions. + This chapter contains the following sections: + </p> + <list type="bulleted"> + <item>transaction properties, which include atomicity, + consistency, isolation, and durability + </item> + <item>Locking + </item> + <item>Dirty operations + </item> + <item>Record names vs table names + </item> + <item>Activity concept and various access contexts + </item> + <item>Nested transactions + </item> + <item>Pattern matching + </item> + <item>Iteration + </item> + </list> + + <section> + <marker id="trans_prop"></marker> + <title>Transaction Properties</title> + <p>Transactions are an important tool when designing fault + tolerant, distributed systems. A Mnesia transaction is a mechanism + by which a series of database operations can be executed as one + functional block. The functional block which is run as a + transaction is called a Functional Object (Fun), and this code can + read, write, or delete Mnesia records. The Fun is evaluated as a + transaction which either commits, or aborts. If a transaction + succeeds in executing Fun it will replicate the action on all nodes + involved, or abort if an error occurs. + </p> + <p>The following example shows a transaction which raises the + salary of certain employee numbers. + </p> + <codeinclude file="company.erl" tag="%5" type="erl"></codeinclude> + <p>The transaction <c>raise(Eno, Raise) - ></c> contains a Fun + made up of four lines of code. This Fun is called by the statement + <c>mnesia:transaction(F)</c> and returns a value. + </p> + <p>The Mnesia transaction system facilitates the construction of + reliable, distributed systems by providing the following important + properties: + </p> + <list type="bulleted"> + <item>The transaction handler ensures that a Fun which is placed + inside a transaction does not interfere with operations embedded + in other transactions when it executes a series of operations on + tables. + </item> + <item>The transaction handler ensures that either all operations + in the transaction are performed successfully on all nodes + atomically, or the transaction fails without permanent effect on + any of the nodes. + </item> + <item>The Mnesia transactions have four important properties, + which we call <em>A</em>tomicity, + <em>C</em>onsistency,<em>I</em>solation, and + <em>D</em>urability, or ACID for short. These properties are + described in the following sub-sections.</item> + </list> + + <section> + <title>Atomicity</title> + <p><em>Atomicity</em> means that database changes which are + executed by a transaction take effect on all nodes involved, or + on none of the nodes. In other words, the transaction either + succeeds entirely, or it fails entirely. + </p> + <p>Atomicity is particularly important when we want to + atomically write more than one record in the same + transaction. The <c>raise/2</c> function, shown as an example + above, writes one record only. The <c>insert_emp/3</c> function, + shown in the program listing in Chapter 2, writes the record + <c>employee</c> as well as employee relations such as + <c>at_dep</c> and <c>in_proj</c> into the database. If we run + this latter code inside a transaction, then the transaction + handler ensures that the transaction either succeeds completely, + or not at all. + </p> + <p>Mnesia is a distributed DBMS where data can be replicated on + several nodes. In many such applications, it is important that a + series of write operations are performed atomically inside a + transaction. The atomicity property ensures that a transaction + take effect on all nodes, or none at all. </p> + </section> + + <section> + <title>Consistency</title> + <p><em>Consistency</em>. This transaction property ensures that + a transaction always leaves the DBMS in a consistent state. For + example, Mnesia ensures that inconsistencies will not occur if + Erlang, Mnesia or the computer crashes while a write operation + is in progress. + </p> + </section> + + <section> + <title>Isolation</title> + <p><em>Isolation</em>. This transaction property ensures that + transactions which execute on different nodes in a network, and + access and manipulate the same data records, will not interfere + with each other. + </p> + <p>The isolation property makes it possible to concurrently execute + the <c>raise/2</c> function. A classical problem in concurrency control + theory is the so called "lost update problem". + </p> + <p>The isolation property is extremely useful if the following + circumstances occurs where an employee (with an employee number + 123) and two processes, (P1 and P2), are concurrently trying to + raise the salary for the employee. The initial value of the + employees salary is, for example, 5. Process P1 then starts to execute, + it reads the employee record and adds 2 to the salary. At this + point in time, process P1 is for some reason preempted and + process P2 has the opportunity to run. P2 reads the record, adds 3 + to the salary, and finally writes a new employee record with + the salary set to 8. Now, process P1 start to run again and + writes its employee record with salary set to 7, thus + effectively overwriting and undoing the work performed by + process P2. The update performed by P2 is lost. + </p> + <p>A transaction system makes it possible to concurrently + execute two or more processes which manipulate the same + record. The programmer does not need to check that the + updates are synchronous, this is overseen by the + transaction handler. All programs accessing the database through + the transaction system may be written as if they had sole access + to the data. + </p> + </section> + + <section> + <title>Durability</title> + <p><em>Durability</em>. This transaction property ensures that + changes made to the DBMS by a transaction are permanent. Once a + transaction has been committed, all changes made to the database + are durable - i.e. they are written safely to disc and will not + be corrupted or disappear. + </p> + <note> + <p>The durability feature described does not entirely apply to + situations where Mnesia is configured as a "pure" primary memory + database. + </p> + </note> + </section> + </section> + + <section> + <title>Locking</title> + <p>Different transaction managers employ different strategies to + satisfy the isolation property. Mnesia uses the standard technique + of two-phase locking. This means that locks are set on records + before they are read or written. Mnesia uses five different kinds + of locks. + </p> + <list type="bulleted"> + <item><em>Read locks</em>. A read lock is set on one replica of + a record before it can be read. + </item> + <item><em>Write locks</em>. Whenever a transaction writes to an + record, write locks are first set on all replicas of that + particular record. + </item> + <item><em>Read table locks</em>. If a transaction traverses an + entire table in search for a record which satisfy some + particular property, it is most inefficient to set read locks on + the records, one by one. It is also very memory consuming, since + the read locks themselves may take up considerable space if the + table is very large. For this reason, Mnesia can set a read lock + on an entire table. + </item> + <item><em>Write table locks</em>. If a transaction writes a + large number of records to one table, it is possible to set a + write lock on the entire table. + </item> + <item><em>Sticky locks</em>. These are write locks that stay in + place at a node after the transaction which initiated the lock + has terminated. </item> + </list> + <p>Mnesia employs a strategy whereby functions such as + <c>mnesia:read/1</c> acquire the necessary locks dynamically as + the transactions execute. Mnesia automatically sets and releases + the locks and the programmer does not have to code these + operations. + </p> + <p>Deadlocks can occur when concurrent processes set and release + locks on the same records. Mnesia employs a "wait-die" strategy to + resolve these situations. If Mnesia suspects that a deadlock can + occur when a transaction tries to set a lock, the transaction is + forced to release all its locks and sleep for a while. The + Fun in the transaction will be evaluated one more time. + </p> + <p>For this reason, it is important that the code inside the Fun given to + <c>mnesia:transaction/1</c> is pure. Some strange results can + occur if, for example, messages are sent by the transaction + Fun. The following example illustrates this situation: + </p> + <codeinclude file="company.erl" tag="%6" type="erl"></codeinclude> + <p>This transaction could write the text <c>"Trying to write ... "</c> a thousand times to the terminal. Mnesia does guarantee, + however, that each and every transaction will eventually run. As a + result, Mnesia is not only deadlock free, but also livelock + free. + </p> + <p>The Mnesia programmer cannot prioritize one particular + transaction to execute before other transactions which are waiting + to execute. As a result, the Mnesia DBMS transaction system is not + suitable for hard real time applications. However, Mnesia contains + other features that have real time properties. + </p> + <p>Mnesia dynamically sets and releases locks as + transactions execute, therefore, it is very dangerous to execute code with + transaction side-effects. In particular, a <c>receive</c> + statement inside a transaction can lead to a situation where the + transaction hangs and never returns, which in turn can cause locks + not to release. This situation could bring the whole system to a + standstill since other transactions which execute in other + processes, or on other nodes, are forced to wait for the defective + transaction. + </p> + <p>If a transaction terminates abnormally, Mnesia will + automatically release the locks held by the transaction. + </p> + <p>We have shown examples of a number of functions that can be + used inside a transaction. The following list shows the + <em>simplest</em> Mnesia functions that work with transactions. It + is important to realize that these functions must be embedded in a + transaction. If no enclosing transaction (or other enclosing + Mnesia activity) exists, they will all fail. + </p> + <list type="bulleted"> + <item><c>mnesia:transaction(Fun) -> {aborted, Reason} |{atomic, Value}</c>. This function executes one transaction with the + functional object <c>Fun</c> as the single parameter. + </item> + <item><c>mnesia:read({Tab, Key}) -> transaction abort | RecordList</c>. This function reads all records with <c>Key</c> + as key from table <c>Tab</c>. This function has the same semantics + regardless of the location of <c>Table</c>. If the table is of + type <c>bag</c>, the <c>read({Tab, Key})</c> can return an arbitrarily + long list. If the table is of type <c>set</c>, the list is + either of length one, or <c>[]</c>. + </item> + <item><c>mnesia:wread({Tab, Key}) -> transaction abort | RecordList</c>. This function behaves the same way as the + previously listed <c>read/1</c> function, except that it + acquires a write lock instead of a read lock. If we execute a + transaction which reads a record, modifies the record, and then + writes the record, it is slightly more efficient to set the + write lock immediately. In cases where we issue a + <c>mnesia:read/1</c>, followed by a <c>mnesia:write/1</c>, the + first read lock must be upgraded to a write lock when the write + operation is executed. + </item> + <item><c>mnesia:write(Record) -> transaction abort | ok</c>. This function writes a record into the database. The + <c>Record</c> argument is an instance of a record. The function + returns <c>ok</c>, or aborts the transaction if an error should + occur. + </item> + <item><c>mnesia:delete({Tab, Key}) -> transaction abort | ok</c>. This + function deletes all records with the given key. + </item> + <item><c>mnesia:delete_object(Record) -> transaction abort | ok</c>. This function deletes records with object id + <c>Record</c>. This function is used when we want to delete only + some records in a table of type <c>bag</c>. </item> + </list> + + <section> + <title>Sticky Locks</title> + <p>As previously stated, the locking strategy used by Mnesia is + to lock one record when we read a record, and lock all replicas + of a record when we write a record. However, there are + applications which use Mnesia mainly for its fault-tolerant + qualities, and these applications may be configured with one + node doing all the heavy work, and a standby node which is ready + to take over in case the main node fails. Such applications may + benefit from using sticky locks instead of the normal locking + scheme. + </p> + <p>A sticky lock is a lock which stays in place at a node after + the transaction which first acquired the lock has terminated. To + illustrate this, assume that we execute the following + transaction: + </p> + <code type="none"> + F = fun() -> + mnesia:write(#foo{a = kalle}) + end, + mnesia:transaction(F). + </code> + <p>The <c>foo</c> table is replicated on the two nodes <c>N1</c> + and <c>N2</c>. + <br></br> +Normal locking requires: + </p> + <list type="bulleted"> + <item>one network rpc (2 messages) to acquire the write lock + </item> + <item>three network messages to execute the two-phase commit protocol. + </item> + </list> + <p>If we use sticky locks, we must first change the code as follows: + </p> + <code type="none"> + + F = fun() -> + mnesia:s_write(#foo{a = kalle}) + end, + mnesia:transaction(F). + </code> + <p>This code uses the <c>s_write/1</c> function instead of the + <c>write/1</c> function. The <c>s_write/1</c> function sets a + sticky lock instead of a normal lock. If the table is not + replicated, sticky locks have no special effect. If the table is + replicated, and we set a sticky lock on node <c>N1</c>, this + lock will then stick to node <c>N1</c>. The next time we try to + set a sticky lock on the same record at node <c>N1</c>, Mnesia + will see that the lock is already set and will not do a network + operation in order to acquire the lock. + </p> + <p>It is much more efficient to set a local lock than it is to set + a networked lock, and for this reason sticky locks can benefit + application that use a replicated table and perform most of the + work on only one of the nodes. + </p> + <p>If a record is stuck at node <c>N1</c> and we try to set a + sticky lock for the record on node <c>N2</c>, the record must be + unstuck. This operation is expensive and will reduce performance. The unsticking is + done automatically if we issue <c>s_write/1</c> requests at + <c>N2</c>. + </p> + </section> + + <section> + <title>Table Locks</title> + <p>Mnesia supports read and write locks on whole tables as a + complement to the normal locks on single records. As previously + stated, Mnesia sets and releases locks automatically, and the + programmer does not have to code these operations. However, + transactions which read and write a large number of records in a + specific table will execute more efficiently if we start the + transaction by setting a table lock on this table. This will + block other concurrent transactions from the table. The + following two function are used to set explicit table locks for + read and write operations: + </p> + <list type="bulleted"> + <item><c>mnesia:read_lock_table(Tab)</c> Sets a read lock on + the table <c>Tab</c></item> + <item><c>mnesia:write_lock_table(Tab)</c> Sets a write lock on + the table <c>Tab</c></item> + </list> + <p>Alternate syntax for acquisition of table locks is as follows: + </p> + <code type="none"> + mnesia:lock({table, Tab}, read) + mnesia:lock({table, Tab}, write) + </code> + <p>The matching operations in Mnesia may either lock the entire + table or just a single record (when the key is bound in the + pattern). + </p> + </section> + + <section> + <title>Global Locks</title> + <p>Write locks are normally acquired on all nodes where a + replica of the table resides (and is active). Read locks are + acquired on one node (the local one if a local + replica exists). + </p> + <p>The function <c>mnesia:lock/2</c> is intended to support + table locks (as mentioned previously) + but also for situations when locks need to be + acquired regardless of how tables have been replicated: + </p> + <code type="none"> + mnesia:lock({global, GlobalKey, Nodes}, LockKind) + + LockKind ::= read | write | ... + </code> + <p>The lock is acquired on the LockItem on all Nodes in the + nodes list.</p> + </section> + </section> + + <section> + <title>Dirty Operations</title> + <p>In many applications, the overhead of processing a transaction + may result in a loss of performance. Dirty operation are short + cuts which bypass much of the processing and increase the speed + of the transaction. + </p> + <p>Dirty operation are useful in many situations, for example in a datagram routing + application where Mnesia stores the routing table, and it is time + consuming to start a whole transaction every time a packet is + received. For this reason, Mnesia has functions which manipulate + tables without using transactions. This alternative + to processing is known as a dirty operation. However, it is important to + realize the trade-off in avoiding the overhead of transaction + processing: + </p> + <list type="bulleted"> + <item>The atomicity and the isolation properties of Mnesia are lost. + </item> + <item>The isolation property is compromised, because other + Erlang processes, which use transaction to manipulate the data, + do not get the benefit of isolation if we simultaneously use + dirty operations to read and write records from the same table. + </item> + </list> + <p>The major advantage of dirty operations is that they execute + much faster than equivalent operations that are processed as + functional objects within a transaction. + </p> + <p>Dirty operations + are written to disc if they are performed on a table of type + <c>disc_copies</c>, or type <c>disc_only_copies</c>. Mnesia also + ensures that all replicas of a table are updated if a + dirty write operation is performed on a table. + </p> + <p>A dirty operation will ensure a certain level of consistency. + For example, it is not possible for dirty operations to return + garbled records. Hence, each individual read or write operation + is performed in an atomic manner. + </p> + <p>All dirty functions execute a call to <c>exit({aborted, Reason})</c> on failure. Even if the following functions are + executed inside a transaction no locks will be acquired. The + following functions are available: + </p> + <list type="bulleted"> + <item><c>mnesia:dirty_read({Tab, Key})</c>. This function reads + record(s) from Mnesia. + </item> + <item><c>mnesia:dirty_write(Record)</c>. This function writes + the record <c>Record</c></item> + <item><c>mnesia:dirty_delete({Tab, Key})</c>. This function deletes + record(s) with the key <c>Key</c>. + </item> + <item><c>mnesia:dirty_delete_object(Record)</c> This function is + the dirty operation alternative to the function + <c>delete_object/1</c></item> + <item> + <p><c>mnesia:dirty_first(Tab)</c>. This function returns the + "first" key in the table <c>Tab</c>. </p> + <p>Records in <c>set</c> or <c>bag</c> tables are not sorted. + However, there is + a record order which is not known to the user. + This means that it is possible to traverse a table by means of + this function in conjunction with the <c>dirty_next/2</c> + function. + </p> + <p>If there are no records at all in the table, this function + will return the atom <c>'$end_of_table'</c>. It is not + recommended to use this atom as the key for any user + records. + </p> + </item> + <item><c>mnesia:dirty_next(Tab, Key)</c>. This function returns + the "next" key in the table <c>Tab</c>. This function makes it + possible to traverse a table and perform some operation on all + records in the table. When the end of the table is reached the + special key <c>'$end_of_table'</c> is returned. Otherwise, the + function returns a key which can be used to read the actual + record. + <br></br> +The behavior is undefined if any process perform a write + operation on the table while we traverse the table with the + <c>dirty_next/2</c> function. This is because <c>write</c> + operations on a Mnesia table may lead to internal reorganizations + of the table itself. This is an implementation detail, but remember + the dirty functions are low level functions. + </item> + <item><c>mnesia:dirty_last(Tab)</c> This function works exactly as + <c>mnesia:dirty_first/1</c> but returns the last object in + Erlang term order for the <c>ordered_set</c> table type. For + all other table types, <c>mnesia:dirty_first/1</c> and + <c>mnesia:dirty_last/1</c> are synonyms. + </item> + <item><c>mnesia:dirty_prev(Tab, Key)</c> This function works exactly as + <c>mnesia:dirty_next/2</c> but returns the previous object in + Erlang term order for the ordered_set table type. For + all other table types, <c>mnesia:dirty_next/2</c> and + <c>mnesia:dirty_prev/2</c> are synonyms. + </item> + <item> + <p><c>mnesia:dirty_slot(Tab, Slot)</c></p> + <p>Returns the list of records that are associated with Slot + in a table. It can be used to traverse a table in a manner + similar to the <c>dirty_next/2</c> function. A table has a + number of slots that range from zero to some unknown upper + bound. The function <c>dirty_slot/2</c> returns the special + atom <c>'$end_of_table'</c> when the end of the table is + reached. + <br></br> +The behavior of this function is undefined if the + table is written on while being + traversed. <c>mnesia:read_lock_table(Tab)</c> may be used to + ensure that no transaction protected writes are performed + during the iteration. + </p> + </item> + <item> + <p><c>mnesia:dirty_update_counter({Tab,Key}, Val)</c>. </p> + <p>Counters are positive integers with a value greater than or + equal to zero. Updating a counter will add the <c>Val</c> and + the counter where <c>Val</c> is a positive or negative integer. + <br></br> + There exists no special counter records in + Mnesia. However, records on the form of <c>{TabName, Key, Integer}</c> can be used as counters, and can be + persistent. + </p> + <p>It is not possible to have transaction protected updates of + counter records. + </p> + <p>There are two significant differences when using this + function instead of reading the record, performing the + arithmetic, and writing the record: + </p> + <list type="ordered"> + <item>it is much more efficient + </item> + <item>the <c>dirty_update_counter/2</c> function is + performed as an atomic operation although it is not protected by + a transaction. Accordingly, no table update is lost if two + processes simultaneously execute the + <c>dirty_update_counter/2</c> function. + </item> + </list> + </item> + <item><c>mnesia:dirty_match_object(Pat)</c>. This function is + the dirty equivalent of <c>mnesia:match_object/1</c>. + </item> + <item><c>mnesia:dirty_select(Tab, Pat)</c>. This function is + the dirty equivalent of <c>mnesia:select/2</c>. + </item> + <item><c>mnesia:dirty_index_match_object(Pat, Pos)</c>. This + function is the dirty equivalent of + <c>mnesia:index_match_object/2</c>. + </item> + <item><c>mnesia:dirty_index_read(Tab, SecondaryKey, Pos)</c>. This + function is the dirty equivalent of <c>mnesia:index_read/3</c>. + </item> + <item><c>mnesia:dirty_all_keys(Tab)</c>. This function is the + dirty equivalent of <c>mnesia:all_keys/1</c>. + </item> + </list> + </section> + + <section> + <marker id="recordnames_tablenames"></marker> + <title>Record Names versus Table Names</title> + <p>In Mnesia, all records in a table must have the same name. All + the records must be instances of the same + record type. The record name does however not necessarily be + the same as the table name. Even though that it is the case in + the most of the examples in this document. If a table is created + without the <c>record_name</c> property the code below will + ensure all records in the tables have the same name as the table: + </p> + <code type="none"> + mnesia:create_table(subscriber, []) + </code> + <p>However, if the table is is created with an explicit record name + as argument, as shown below, it is possible to store subscriber + records in both of the tables regardless of the table names: + </p> + <code type="none"> + TabDef = [{record_name, subscriber}], + mnesia:create_table(my_subscriber, TabDef), + mnesia:create_table(your_subscriber, TabDef). + </code> + <p>In order to access such + tables it is not possible to use the simplified access functions + as described earlier in the document. For example, + writing a subscriber record into a table requires a + <c>mnesia:write/3</c>function instead of the simplified functions + <c>mnesia:write/1</c> and <c>mnesia:s_write/1</c>: + </p> + <code type="none"> + mnesia:write(subscriber, #subscriber{}, write) + mnesia:write(my_subscriber, #subscriber{}, sticky_write) + mnesia:write(your_subscriber, #subscriber{}, write) + </code> + <p>The following simplified piece of code illustrates the + relationship between the simplified access functions used in + most examples and their more flexible counterparts: + </p> + <code type="none"> + mnesia:dirty_write(Record) -> + Tab = element(1, Record), + mnesia:dirty_write(Tab, Record). + + mnesia:dirty_delete({Tab, Key}) -> + mnesia:dirty_delete(Tab, Key). + + mnesia:dirty_delete_object(Record) -> + Tab = element(1, Record), + mnesia:dirty_delete_object(Tab, Record) + + mnesia:dirty_update_counter({Tab, Key}, Incr) -> + mnesia:dirty_update_counter(Tab, Key, Incr). + + mnesia:dirty_read({Tab, Key}) -> + Tab = element(1, Record), + mnesia:dirty_read(Tab, Key). + + mnesia:dirty_match_object(Pattern) -> + Tab = element(1, Pattern), + mnesia:dirty_match_object(Tab, Pattern). + + mnesia:dirty_index_match_object(Pattern, Attr) + Tab = element(1, Pattern), + mnesia:dirty_index_match_object(Tab, Pattern, Attr). + + mnesia:write(Record) -> + Tab = element(1, Record), + mnesia:write(Tab, Record, write). + + mnesia:s_write(Record) -> + Tab = element(1, Record), + mnesia:write(Tab, Record, sticky_write). + + mnesia:delete({Tab, Key}) -> + mnesia:delete(Tab, Key, write). + + mnesia:s_delete({Tab, Key}) -> + mnesia:delete(Tab, Key, sticky_write). + + mnesia:delete_object(Record) -> + Tab = element(1, Record), + mnesia:delete_object(Tab, Record, write). + + mnesia:s_delete_object(Record) -> + Tab = element(1, Record), + mnesia:delete_object(Tab, Record. sticky_write). + + mnesia:read({Tab, Key}) -> + mnesia:read(Tab, Key, read). + + mnesia:wread({Tab, Key}) -> + mnesia:read(Tab, Key, write). + + mnesia:match_object(Pattern) -> + Tab = element(1, Pattern), + mnesia:match_object(Tab, Pattern, read). + + mnesia:index_match_object(Pattern, Attr) -> + Tab = element(1, Pattern), + mnesia:index_match_object(Tab, Pattern, Attr, read). + </code> + </section> + + <section> + <title>Activity Concept and Various Access Contexts</title> + <p>As previously described, a functional object (Fun) performing + table access operations as listed below may be + passed on as arguments to the function + <c>mnesia:transaction/1,2,3</c>: + </p> + <list type="bulleted"> + <item> + <p>mnesia:write/3 (write/1, s_write/1)</p> + </item> + <item> + <p>mnesia:delete/3 (delete/1, s_delete/1)</p> + </item> + <item> + <p>mnesia:delete_object/3 (delete_object/1, s_delete_object/1)</p> + </item> + <item> + <p>mnesia:read/3 (read/1, wread/1)</p> + </item> + <item> + <p>mnesia:match_object/2 (match_object/1)</p> + </item> + <item> + <p>mnesia:select/3 (select/2)</p> + </item> + <item> + <p>mnesia:foldl/3 (foldl/4, foldr/3, foldr/4)</p> + </item> + <item> + <p>mnesia:all_keys/1</p> + </item> + <item> + <p>mnesia:index_match_object/4 (index_match_object/2)</p> + </item> + <item> + <p>mnesia:index_read/3</p> + </item> + <item> + <p>mnesia:lock/2 (read_lock_table/1, write_lock_table/1)</p> + </item> + <item> + <p>mnesia:table_info/2</p> + </item> + </list> + <p>These functions will be performed in a + transaction context involving mechanisms like locking, logging, + replication, checkpoints, subscriptions, commit protocols + etc.However, the same function may also be + evaluated in other activity contexts. + <br></br> +The following activity access contexts are currently supported: + </p> + <list type="bulleted"> + <item> + <p>transaction </p> + </item> + <item> + <p>sync_transaction</p> + </item> + <item> + <p>async_dirty</p> + </item> + <item> + <p>sync_dirty</p> + </item> + <item> + <p>ets</p> + </item> + </list> + <p>By passing the same "fun" as argument to the function + <c>mnesia:sync_transaction(Fun [, Args])</c> it will be performed + in synced transaction context. Synced transactions waits until all + active replicas has committed the transaction (to disc) before + returning from the mnesia:sync_transaction call. Using + sync_transaction is useful for applications that are executing on + several nodes and want to be sure that the update is performed on + the remote nodes before a remote process is spawned or a message + is sent to a remote process, and also when combining transaction + writes with dirty_reads. This is also useful in situations where + an application performs frequent or voluminous updates which may + overload Mnesia on other nodes. + </p> + <p>By passing the same "fun" as argument to the function + <c>mnesia:async_dirty(Fun [, Args])</c> it will be performed in + dirty context. The function calls will be mapped to the + corresponding dirty functions. This will still involve logging, + replication and subscriptions but there will be no locking, + local transaction storage or commit protocols involved. + Checkpoint retainers will be updated but will be updated + "dirty". Thus, they will be updated asynchronously. The + functions will wait for the operation to be performed on one + node but not the others. If the table resides locally no waiting + will occur. + </p> + <p>By passing the same "fun" as an argument to the function + <c>mnesia:sync_dirty(Fun [, Args])</c> it will be performed in + almost the same context as <c>mnesia:async_dirty/1,2</c>. The + difference is that the operations are performed + synchronously. The caller will wait for the updates to be + performed on all active replicas. Using sync_dirty is useful for + applications that are executing on several nodes and want to be + sure that the update is performed on the remote nodes before a remote + process is spawned or a message is sent to a remote process. This + is also useful in situations where an application performs frequent or + voluminous updates which may overload Mnesia on other + nodes. + </p> + <p>You can check if your code is executed within a transaction with + <c>mnesia:is_transaction/0</c>, it returns <c>true</c> when called + inside a transaction context and false otherwise.</p> + + <p>Mnesia tables with storage type RAM_copies and disc_copies + are implemented internally as "ets-tables" and + it is possible for applications to access the these tables + directly. This is only recommended if all options have been weighed + and the possible outcomes are understood. By passing the earlier + mentioned "fun" to the function + <c>mnesia:ets(Fun [, Args])</c> it will be performed but in a very raw + context. The operations will be performed directly on the + local ets tables assuming that the local storage type are + RAM_copies and that the table is not replicated on other + nodes. Subscriptions will not be triggered nor + checkpoints updated, but this operation is blindingly fast. Disc resident + tables should not be updated with the ets-function since the + disc will not be updated. + </p> + <p>The Fun may also be passed as an argument to the function + <c>mnesia:activity/2,3,4</c> which enables usage of customized + activity access callback modules. It can either be obtained + directly by stating the module name as argument or implicitly + by usage of the <c>access_module</c> configuration parameter. A + customized callback module may be used for several purposes, + such as providing triggers, integrity constraints, run time + statistics, or virtual tables. + <br></br> + The callback module does + not have to access real Mnesia tables, it is free to do whatever + it likes as long as the callback interface is fulfilled. + <br></br> + In Appendix C "The Activity Access Call Back Interface" the source + code for one alternate implementation is provided + (mnesia_frag.erl). The context sensitive function + <c>mnesia:table_info/2</c> may be used to provide virtual + information about a table. One usage of this is to perform + <c>QLC</c> queries within an activity context with a + customized callback module. By providing table information about + table indices and other <c>QLC</c> requirements, + <c>QLC</c> may be used as a generic query language to + access virtual tables. + </p> + <p>QLC queries may be performed in all these activity + contexts (transaction, sync_transaction, async_dirty, sync_dirty + and ets). The ets activity will only work if the table has no + indices. + </p> + <note> + <p>The mnesia:dirty_* function always executes with + async_dirty semantics regardless of which activity access contexts + are invoked. They may even invoke contexts without any + enclosing activity access context.</p> + </note> + </section> + + <section> + <title>Nested transactions</title> + <p>Transactions may be nested in an arbitrary fashion. A child transaction + must run in the same process as its parent. When a child transaction + aborts, the caller of the child transaction will get the + return value <c>{aborted, Reason}</c> and any work performed + by the child will be erased. If a child transaction commits, the + records written by the child will be propagated to the parent. + </p> + <p>No locks are released when child transactions terminate. Locks + created by a sequence of nested transactions are kept until + the topmost transaction terminates. Furthermore, any updates + performed by a nested transaction are only propagated + in such a manner so that the parent of the nested transaction + sees the updates. No final commitment will be done until + the top level transaction is terminated. + So, although a nested transaction returns <c>{atomic, Val}</c>, + if the enclosing parent transaction is aborted, the entire + nested operation is aborted. + </p> + <p>The ability to have nested transaction with identical semantics + as top level transaction makes it easier to write + library functions that manipulate mnesia tables. + </p> + <p>Say for example that we have a function that adds a + new subscriber to a telephony system:</p> + <pre> + add_subscriber(S) -> + mnesia:transaction(fun() -> + case mnesia:read( .......... + </pre> + <p>This function needs to be called as a transaction. + Now assume that we wish to write a function that + both calls the <c>add_subscriber/1</c> function and + is in itself protected by the context of a transaction. + By simply calling the <c>add_subscriber/1</c> from within + another transaction, a nested transaction is created. + </p> + <p>It is also possible to mix different activity access contexts while nesting, + but the dirty ones (async_dirty,sync_dirty and ets) will inherit the transaction + semantics if they are called inside a transaction and thus it will grab locks and + use two or three phase commit. + </p> + <pre> + add_subscriber(S) -> + mnesia:transaction(fun() -> + %% Transaction context + mnesia:read({some_tab, some_data}), + mnesia:sync_dirty(fun() -> + %% Still in a transaction context. + case mnesia:read( ..) ..end), end). + add_subscriber2(S) -> + mnesia:sync_dirty(fun() -> + %% In dirty context + mnesia:read({some_tab, some_data}), + mnesia:transaction(fun() -> + %% In a transaction context. + case mnesia:read( ..) ..end), end). + </pre> + </section> + + <section> + <title>Pattern Matching</title> + <marker id="matching"></marker> + <p>When it is not possible to use <c>mnesia:read/3</c> Mnesia + provides the programmer with several functions for matching + records against a pattern. The most useful functions of these are: + </p> + <code type="none"> + mnesia:select(Tab, MatchSpecification, LockKind) -> + transaction abort | [ObjectList] + mnesia:select(Tab, MatchSpecification, NObjects, Lock) -> + transaction abort | {[Object],Continuation} | '$end_of_table' + mnesia:select(Cont) -> + transaction abort | {[Object],Continuation} | '$end_of_table' + mnesia:match_object(Tab, Pattern, LockKind) -> + transaction abort | RecordList + </code> + <p>These functions matches a <c>Pattern</c> against all records in + table <c>Tab</c>. In a <c>mnesia:select</c> call <c>Pattern</c> is + a part of <c>MatchSpecification</c> described below. It is not + necessarily performed as an exhaustive search of the entire + table. By utilizing indices and bound values in the key of the + pattern, the actual work done by the function may be condensed + into a few hash lookups. Using <c>ordered_set</c> tables may reduce the + search space if the keys are partially bound. + </p> + <p>The pattern provided to the functions must be a valid record, + and the first element of the provided tuple must be the + <c>record_name</c> of the table. The special element <c>'_'</c> + matches any data structure in Erlang (also known as an Erlang + term). The special elements <c><![CDATA['$<number>']]></c> behaves as Erlang + variables i.e. matches anything and binds the first occurrence and + matches the coming occurrences of that variable against the bound value. + </p> + <p>Use the function <c>mnesia:table_info(Tab, wild_pattern)</c> + to obtain a basic pattern which matches all records in a table + or use the default value in record creation. + Do not make the pattern hard coded since it will make your code more + vulnerable to future changes of the record definition. + </p> + <code type="none"> + Wildpattern = mnesia:table_info(employee, wild_pattern), + %% Or use + Wildpattern = #employee{_ = '_'}, + </code> + <p>For the employee table the wild pattern will look like:</p> + <code type="none"> + {employee, '_', '_', '_', '_', '_',' _'}. + </code> + <p>In order to constrain the match you must replace some + of the <c>'_'</c> elements. The code for matching out + all female employees, looks like: + </p> + <code type="none"> + Pat = #employee{sex = female, _ = '_'}, + F = fun() -> mnesia:match_object(Pat) end, + Females = mnesia:transaction(F). + </code> + <p>It is also possible to use the match function if we want to + check the equality of different attributes. Assume that we want + to find all employees which happens to have a employee number + which is equal to their room number: + </p> + <code type="none"> + Pat = #employee{emp_no = '$1', room_no = '$1', _ = '_'}, + F = fun() -> mnesia:match_object(Pat) end, + Odd = mnesia:transaction(F). + </code> + <p>The function <c>mnesia:match_object/3</c> lacks some important + features that <c>mnesia:select/3</c> have. For example + <c>mnesia:match_object/3</c> can only return the matching records, + and it can not express constraints other then equality. + If we want to find the names of the male employees on the second floor + we could write: + </p> + <codeinclude file="company.erl" tag="%21" type="erl"></codeinclude> + <p>Select can be used to add additional constraints and create + output which can not be done with <c>mnesia:match_object/3</c>. </p> + <p>The second argument to select is a <c>MatchSpecification</c>. + A <c>MatchSpecification</c> is list of <c>MatchFunctions</c>, where + each <c>MatchFunction</c> consists of a tuple containing + <c>{MatchHead, MatchCondition, MatchBody}</c>. <c>MatchHead</c> + is the same pattern used in <c>mnesia:match_object/3</c> + described above. <c>MatchCondition</c> is a list of additional + constraints applied to each record, and <c>MatchBody</c> is used + to construct the return values. + </p> + <p>A detailed explanation of match specifications can be found in + the <em>Erts users guide: Match specifications in Erlang </em>, + and the ets/dets documentations may provide some additional + information. + </p> + <p>The functions <c>select/4</c> and <c>select/1</c> are used to + get a limited number of results, where the <c>Continuation</c> + are used to get the next chunk of results. Mnesia uses the + <c>NObjects</c> as an recommendation only, thus more or less + results then specified with <c>NObjects</c> may be returned in + the result list, even the empty list may be returned despite there + are more results to collect. + </p> + <warning> + <p>There is a severe performance penalty in using + <c>mnesia:select/[1|2|3|4]</c> after any modifying operations + are done on that table in the same transaction, i.e. avoid using + <c>mnesia:write/1</c> or <c>mnesia:delete/1</c> before a + <c>mnesia:select</c> in the same transaction.</p> + </warning> + <p>If the key attribute is bound in a pattern, the match operation + is very efficient. However, if the key attribute in a pattern is + given as <c>'_'</c>, or <c>'$1'</c>, the whole <c>employee</c> + table must be searched for records that match. Hence if the table is + large, this can become a time consuming operation, but it can be + remedied with indices (refer to Chapter 5: <seealso marker="Mnesia_chap5#indexing">Indexing</seealso>) if + <c>mnesia:match_object</c> is used. + </p> + <p>QLC queries can also be used to search Mnesia tables. By + using <c>mnesia:table/[1|2]</c> as the generator inside a QLC + query you let the query operate on a mnesia table. Mnesia + specific options to <c>mnesia:table/2</c> are {lock, Lock}, + {n_objects,Integer} and {traverse, SelMethod}. The <c>lock</c> + option specifies whether mnesia should acquire a read or write + lock on the table, and <c>n_objects</c> specifies how many + results should be returned in each chunk to QLC. The last option is + <c>traverse</c> and it specifies which function mnesia should + use to traverse the table. Default <c>select</c> is used, but by using + <c>{traverse, {select, MatchSpecification}}</c> as an option to + <c>mnesia:table/2</c> the user can specify it's own view of the + table. + </p> + <p>If no options are specified a read lock will acquired and 100 + results will be returned in each chunk, and select will be used + to traverse the table, i.e.: + </p> + <code type="none"> + mnesia:table(Tab) -> + mnesia:table(Tab, [{n_objects,100},{lock, read}, {traverse, select}]). + </code> + <p>The function <c>mnesia:all_keys(Tab)</c> returns all keys in a + table.</p> + </section> + + <section> + <title>Iteration</title> + <marker id="iteration"></marker> + <p>Mnesia provides a couple of functions which iterates over all + the records in a table. + </p> + <code type="none"> + mnesia:foldl(Fun, Acc0, Tab) -> NewAcc | transaction abort + mnesia:foldr(Fun, Acc0, Tab) -> NewAcc | transaction abort + mnesia:foldl(Fun, Acc0, Tab, LockType) -> NewAcc | transaction abort + mnesia:foldr(Fun, Acc0, Tab, LockType) -> NewAcc | transaction abort + </code> + <p>These functions iterate over the mnesia table <c>Tab</c> and + apply the function <c>Fun</c> to each record. The <c>Fun</c> + takes two arguments, the first argument is a record from the + table and the second argument is the accumulator. The + <c>Fun</c> return a new accumulator. </p> + <p>The first time the <c>Fun</c> is applied <c>Acc0</c> will + be the second argument. The next time the <c>Fun</c> is called + the return value from the previous call, will be used as the + second argument. The term the last call to the Fun returns + will be the return value of the <c>fold[lr]</c> function. + </p> + <p>The difference between <c>foldl</c> and <c>foldr</c> is the + order the table is accessed for <c>ordered_set</c> tables, + for every other table type the functions are equivalent. + </p> + <p><c>LockType</c> specifies what type of lock that shall be + acquired for the iteration, default is <c>read</c>. If + records are written or deleted during the iteration a write + lock should be acquired. </p> + <p>These functions might be used to find records in a table + when it is impossible to write constraints for + <c>mnesia:match_object/3</c>, or when you want to perform + some action on certain records. + </p> + <p>For example finding all the employees who has a salary + below 10 could look like:</p> + <code type="none"><![CDATA[ + find_low_salaries() -> + Constraint = + fun(Emp, Acc) when Emp#employee.salary < 10 -> + [Emp | Acc]; + (_, Acc) -> + Acc + end, + Find = fun() -> mnesia:foldl(Constraint, [], employee) end, + mnesia:transaction(Find). + ]]></code> + <p>Raising the salary to 10 for everyone with a salary below 10 + and return the sum of all raises:</p> + <code type="none"><![CDATA[ + increase_low_salaries() -> + Increase = + fun(Emp, Acc) when Emp#employee.salary < 10 -> + OldS = Emp#employee.salary, + ok = mnesia:write(Emp#employee{salary = 10}), + Acc + 10 - OldS; + (_, Acc) -> + Acc + end, + IncLow = fun() -> mnesia:foldl(Increase, 0, employee, write) end, + mnesia:transaction(IncLow). + ]]></code> + <p>A lot of nice things can be done with the iterator functions + but some caution should be taken about performance and memory + utilization for large tables. </p> + <p>Call these iteration functions on nodes that contain a replica of the + table. Each call to the function <c>Fun</c> access the table and if the table + resides on another node it will generate a lot of unnecessary + network traffic. </p> + <p>Mnesia also provides some functions that make it possible for + the user to iterate over the table. The order of the + iteration is unspecified if the table is not of the <c>ordered_set</c> + type. </p> + <code type="none"> + mnesia:first(Tab) -> Key | transaction abort + mnesia:last(Tab) -> Key | transaction abort + mnesia:next(Tab,Key) -> Key | transaction abort + mnesia:prev(Tab,Key) -> Key | transaction abort + mnesia:snmp_get_next_index(Tab,Index) -> {ok, NextIndex} | endOfTable + </code> + <p>The order of first/last and next/prev are only valid for + <c>ordered_set</c> tables, for all other tables, they are synonyms. + When the end of the table is reached the special key + <c>'$end_of_table'</c> is returned.</p> + <p>If records are written and deleted during the traversal, use + <c>mnesia:fold[lr]/4</c> with a <c>write</c> lock. Or + <c>mnesia:write_lock_table/1</c> when using first and next.</p> + <p>Writing or deleting in transaction context creates a local copy + of each modified record, so modifying each record in a large + table uses a lot of memory. Mnesia will compensate for every + written or deleted record during the iteration in a transaction + context, which may reduce the performance. If possible avoid writing + or deleting records in the same transaction before iterating over the + table.</p> + <p>In dirty context, i.e. <c>sync_dirty</c> or <c>async_dirty</c>, + the modified records are not stored in a local copy; instead, + each record is updated separately. This generates a lot of + network traffic if the table has a replica on another node and + has all the other drawbacks that dirty operations + have. Especially for the <c>mnesia:first/1</c> and + <c>mnesia:next/2</c> commands, the same drawbacks as described + above for <c>dirty_first</c> and <c>dirty_next</c> applies, i.e. + no writes to the table should be done during iteration.</p> + <p></p> + </section> +</chapter> + diff --git a/lib/mnesia/doc/src/Mnesia_chap5.xmlsrc b/lib/mnesia/doc/src/Mnesia_chap5.xmlsrc new file mode 100644 index 0000000000..3ec0aa37f5 --- /dev/null +++ b/lib/mnesia/doc/src/Mnesia_chap5.xmlsrc @@ -0,0 +1,1398 @@ +<?xml version="1.0" encoding="latin1" ?> +<!DOCTYPE chapter SYSTEM "chapter.dtd"> + +<chapter> + <header> + <copyright> + <year>1997</year><year>2009</year> + <holder>Ericsson AB. All Rights Reserved.</holder> + </copyright> + <legalnotice> + The contents of this file are subject to the Erlang Public License, + Version 1.1, (the "License"); you may not use this file except in + compliance with the License. You should have received a copy of the + Erlang Public License along with this software. If not, it can be + retrieved online at http://www.erlang.org/. + + Software distributed under the License is distributed on an "AS IS" + basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + the License for the specific language governing rights and limitations + under the License. + + </legalnotice> + + <title>Miscellaneous Mnesia Features</title> + <prepared>Claes Wikström, Hans Nilsson and Håkan Mattsson</prepared> + <responsible></responsible> + <docno></docno> + <approved></approved> + <checked></checked> + <date></date> + <rev></rev> + <file>Mnesia_chap5.xml</file> + </header> + <p>The earlier chapters of this User Guide described how to get + started with Mnesia, and how to build a Mnesia database. In this + chapter, we will describe the more advanced features available + when building a distributed, fault tolerant Mnesia database. This + chapter contains the following sections: + </p> + <list type="bulleted"> + <item>Indexing + </item> + <item>Distribution and Fault Tolerance + </item> + <item>Table fragmentation. + </item> + <item>Local content tables. + </item> + <item>Disc-less nodes. + </item> + <item>More about schema management + </item> + <item>Debugging a Mnesia application + </item> + <item>Concurrent Processes in Mnesia + </item> + <item>Prototyping + </item> + <item>Object Based Programming with Mnesia. + </item> + </list> + + <section> + <marker id="indexing"></marker> + <title>Indexing</title> + <p>Data retrieval and matching can be performed very efficiently + if we know the key for the record. Conversely, if the key is not + known, all records in a table must be searched. The larger the + table the more time consuming it will become. To remedy this + problem Mnesia's indexing capabilities are used to improve data + retrieval and matching of records. + </p> + <p>The following two functions manipulate indexes on existing tables: + </p> + <list type="bulleted"> + <item><c>mnesia:add_table_index(Tab, AttributeName) -> {aborted, R} |{atomic, ok}</c></item> + <item><c>mnesia:del_table_index(Tab, AttributeName) -> {aborted, R} |{atomic, ok}</c></item> + </list> + <p>These functions create or delete a table index on field + defined by <c>AttributeName</c>. To illustrate this, add an + index to the table definition <c>(employee, {emp_no, name, salary, sex, phone, room_no}</c>, which is the example table + from the Company database. The function + which adds an index on the element <c>salary</c> can be expressed in + the following way: + </p> + <list type="ordered"> + <item><c>mnesia:add_table_index(employee, salary)</c></item> + </list> + <p>The indexing capabilities of Mnesia are utilized with the + following three functions, which retrieve and match records on the + basis of index entries in the database. + </p> + <list type="bulleted"> + <item><c>mnesia:index_read(Tab, SecondaryKey, AttributeName) -> transaction abort | RecordList</c>. + Avoids an exhaustive search of the entire table, by looking up + the <c>SecondaryKey</c> in the index to find the primary keys. + </item> + <item><c>mnesia:index_match_object(Pattern, AttributeName) -> transaction abort | RecordList</c> + Avoids an exhaustive search of the entire table, by looking up + the secondary key in the index to find the primary keys. + The secondary key is found in the <c>AttributeName</c> field of + the <c>Pattern</c>. The secondary key must be bound. + </item> + <item><c>mnesia:match_object(Pattern) -> transaction abort | RecordList</c> + Uses indices to avoid exhaustive search of the entire table. + Unlike the other functions above, this function may utilize + any index as long as the secondary key is bound.</item> + </list> + <p>These functions are further described and exemplified in + Chapter 4: <seealso marker="Mnesia_chap4#matching">Pattern matching</seealso>. + </p> + </section> + + <section> + <title>Distribution and Fault Tolerance</title> + <p>Mnesia is a distributed, fault tolerant DBMS. It is possible + to replicate tables on different Erlang nodes in a variety of + ways. The Mnesia programmer does not have to state + where the different tables reside, only the names of the + different tables are specified in the program code. This is + known as "location transparency" and it is an important + concept. In particular: + </p> + <list type="bulleted"> + <item>A program will work regardless of the + location of the data. It makes no difference whether the data + resides on the local node, or on a remote node. <em>Note:</em> The program + will run slower if the data is located on a remote node. + </item> + <item>The database can be reconfigured, and tables can be + moved between nodes. These operations do not effect the user + programs. + </item> + </list> + <p>We have previously seen that each table has a number of + system attributes, such as <c>index</c> and + <c>type</c>. + </p> + <p>Table attributes are specified when the table is created. For + example, the following function will create a new table with two + RAM replicas: + </p> + <pre> + mnesia:create_table(foo, + [{ram_copies, [N1, N2]}, + {attributes, record_info(fields, foo)}]). + </pre> + <p>Tables can also have the following properties, + where each attribute has a list of Erlang nodes as its value. + </p> + <list type="bulleted"> + <item> + <p><c>ram_copies</c>. The value of the node list is a list of + Erlang nodes, and a RAM replica of the table will reside on + each node in the list. This is a RAM replica, and it is + important to realize that no disc operations are performed when + a program executes write operations to these replicas. However, + should permanent RAM replicas be a requirement, then the + following alternatives are available:</p> + <list type="ordered"> + <item>The <c>mnesia:dump_tables/1</c> function can be used + to dump RAM table replicas to disc. + </item> + <item>The table replicas can be backed up; either from + RAM, or from disc if dumped there with the above + function. + </item> + </list> + </item> + <item><c>disc_copies</c>. The value of the attribute is a list + of Erlang nodes, and a replica of the table will reside both + in RAM and on disc on each node in the list. Write operations + addressed to the table will address both the RAM and the disc + copy of the table. + </item> + <item><c>disc_only_copies</c>. The value of the attribute is a + list of Erlang nodes, and a replica of the table will reside + only as a disc copy on each node in the list. The major + disadvantage of this type of table replica is the access + speed. The major advantage is that the table does not occupy + space in memory. + </item> + </list> + <p>It is also possible to set and change table properties on + existing tables. Refer to Chapter 3: <seealso marker="Mnesia_chap3#def_schema">Defining the Schema</seealso> for full + details. + </p> + <p>There are basically two reasons for using more than one table + replica: fault tolerance, or speed. It is worthwhile to note + that table replication provides a solution to both of these + system requirements. + </p> + <p>If we have two active table replicas, all information is + still available if one of the replicas fail. This can be a very + important property in many applications. Furthermore, if a table + replica exists at two specific nodes, applications which execute + at either of these nodes can read data from the table without + accessing the network. Network operations are considerably + slower and consume more resources than local operations. + </p> + <p>It can be advantageous to create table replicas for a + distributed application which reads data often, but writes data + seldom, in order to achieve fast read operations on the local + node. The major disadvantage with replication is the increased + time to write data. If a table has two replicas, every write + operation must access both table replicas. Since one of these + write operations must be a network operation, it is considerably + more expensive to perform a write operation to a replicated + table than to a non-replicated table. + </p> + </section> + + <section> + <title>Table Fragmentation</title> + + <section> + <title>The Concept</title> + <p>A concept of table fragmentation has been introduced in + order to cope with very large tables. The idea is to split a + table into several more manageable fragments. Each fragment + is implemented as a first class Mnesia table and may be + replicated, have indices etc. as any other table. But the + tables may neither have <c>local_content</c> nor have the + <c>snmp</c> connection activated. + </p> + <p>In order to be able to access a record in a fragmented + table, Mnesia must determine to which fragment the + actual record belongs. This is done by the + <c>mnesia_frag</c> module, which implements the + <c>mnesia_access</c> callback behaviour. Please, read the + documentation about <c>mnesia:activity/4</c> to see how + <c>mnesia_frag</c> can be used as a <c>mnesia_access</c> + callback module. + </p> + <p>At each record access <c>mnesia_frag</c> first computes + a hash value from the record key. Secondly the name of the + table fragment is determined from the hash value. And + finally the actual table access is performed by the same + functions as for non-fragmented tables. When the key is + not known beforehand, all fragments are searched for + matching records. Note: In <c>ordered_set</c> tables + the records will be ordered per fragment, and the + the order is undefined in results returned by select and + match_object. + </p> + <p>The following piece of code illustrates + how an existing Mnesia table is converted to be a + fragmented table and how more fragments are added later on. + </p> + <code type="none"><![CDATA[ +Eshell V4.7.3.3 (abort with ^G) +(a@sam)1> mnesia:start(). +ok +(a@sam)2> mnesia:system_info(running_db_nodes). +[b@sam,c@sam,a@sam] +(a@sam)3> Tab = dictionary. +dictionary +(a@sam)4> mnesia:create_table(Tab, [{ram_copies, [a@sam, b@sam]}]). +{atomic,ok} +(a@sam)5> Write = fun(Keys) -> [mnesia:write({Tab,K,-K}) || K <- Keys], ok end. +#Fun<erl_eval> +(a@sam)6> mnesia:activity(sync_dirty, Write, [lists:seq(1, 256)], mnesia_frag). +ok +(a@sam)7> mnesia:change_table_frag(Tab, {activate, []}). +{atomic,ok} +(a@sam)8> mnesia:table_info(Tab, frag_properties). +[{base_table,dictionary}, + {foreign_key,undefined}, + {n_doubles,0}, + {n_fragments,1}, + {next_n_to_split,1}, + {node_pool,[a@sam,b@sam,c@sam]}] +(a@sam)9> Info = fun(Item) -> mnesia:table_info(Tab, Item) end. +#Fun<erl_eval> +(a@sam)10> Dist = mnesia:activity(sync_dirty, Info, [frag_dist], mnesia_frag). +[{c@sam,0},{a@sam,1},{b@sam,1}] +(a@sam)11> mnesia:change_table_frag(Tab, {add_frag, Dist}). +{atomic,ok} +(a@sam)12> Dist2 = mnesia:activity(sync_dirty, Info, [frag_dist], mnesia_frag). +[{b@sam,1},{c@sam,1},{a@sam,2}] +(a@sam)13> mnesia:change_table_frag(Tab, {add_frag, Dist2}). +{atomic,ok} +(a@sam)14> Dist3 = mnesia:activity(sync_dirty, Info, [frag_dist], mnesia_frag). +[{a@sam,2},{b@sam,2},{c@sam,2}] +(a@sam)15> mnesia:change_table_frag(Tab, {add_frag, Dist3}). +{atomic,ok} +(a@sam)16> Read = fun(Key) -> mnesia:read({Tab, Key}) end. +#Fun<erl_eval> +(a@sam)17> mnesia:activity(transaction, Read, [12], mnesia_frag). +[{dictionary,12,-12}] +(a@sam)18> mnesia:activity(sync_dirty, Info, [frag_size], mnesia_frag). +[{dictionary,64}, + {dictionary_frag2,64}, + {dictionary_frag3,64}, + {dictionary_frag4,64}] +(a@sam)19> + ]]></code> + </section> + + <section> + <title>Fragmentation Properties</title> + <p>There is a table property called + <c>frag_properties</c> and may be read with + <c>mnesia:table_info(Tab, frag_properties)</c>. The + fragmentation properties is a list of tagged tuples with + the arity 2. By default the list is empty, but when it is + non-empty it triggers Mnesia to regard the table as + fragmented. The fragmentation properties are: + </p> + <taglist> + <tag><c>{n_fragments, Int}</c></tag> + <item> + <p><c>n_fragments</c> regulates how many fragments + that the table currently has. This property may explicitly + be set at table creation and later be changed with + <c>{add_frag, NodesOrDist}</c> or + <c>del_frag</c>. <c>n_fragment</c>s defaults to <c>1</c>. + </p> + </item> + <tag><c>{node_pool, List}</c></tag> + <item> + <p>The node pool contains a list of nodes and may + explicitly be set at table creation and later be changed + with <c>{add_node, Node}</c> or <c>{del_node, Node}</c>. At table creation Mnesia tries to distribute + the replicas of each fragment evenly over all the nodes in + the node pool. Hopefully all nodes will end up with the + same number of replicas. <c>node_pool</c> defaults to the + return value from <c>mnesia:system_info(db_nodes)</c>. + </p> + </item> + <tag><c>{n_ram_copies, Int}</c></tag> + <item> + <p>Regulates how many <c>ram_copies</c> replicas + that each fragment should have. This property may + explicitly be set at table creation. The default is + <c>0</c>, but if <c>n_disc_copies</c> and + <c>n_disc_only_copies</c> also are <c>0</c>, + <c>n_ram_copies</c>\011will default be set to <c>1</c>. + </p> + </item> + <tag><c>{n_disc_copies, Int}</c></tag> + <item> + <p>Regulates how many <c>disc_copies</c> replicas + that each fragment should have. This property may + explicitly be set at table creation. The default is <c>0</c>. + </p> + </item> + <tag><c>{n_disc_only_copies, Int}</c></tag> + <item> + <p>Regulates how many <c>disc_only_copies</c> replicas + that each fragment should have. This property may + explicitly be set at table creation. The default is <c>0</c>. + </p> + </item> + <tag><c>{foreign_key, ForeignKey}</c></tag> + <item> + <p><c>ForeignKey</c> may either be the atom + <c>undefined</c> or the tuple <c>{ForeignTab, Attr}</c>, + where <c>Attr</c> denotes an attribute which should be + interpreted as a key in another fragmented table named + <c>ForeignTab</c>. Mnesia will ensure that the number of + fragments in this table and in the foreign table are + always the same. When fragments are added or deleted + Mnesia will automatically propagate the operation to all + fragmented tables that has a foreign key referring to this + table. Instead of using the record key to determine which + fragment to access, the value of the <c>Attr</c> field is + used. This feature makes it possible to automatically + co-locate records in different tables to the same + node. <c>foreign_key</c> defaults to + <c>undefined</c>. However if the foreign key is set to + something else it will cause the default values of the + other fragmentation properties to be the same values as + the actual fragmentation properties of the foreign table. + </p> + </item> + <tag><c>{hash_module, Atom}</c></tag> + <item> + <p>Enables definition of an alternate hashing scheme. + The module must implement the <c>mnesia_frag_hash</c> + callback behaviour (see the reference manual). This + property may explicitly be set at table creation. + The default is <c>mnesia_frag_hash</c>.</p> + <p>Older tables that was created before the concept of + user defined hash modules was introduced, uses + the <c>mnesia_frag_old_hash</c> module in order to + be backwards compatible. The <c>mnesia_frag_old_hash</c> + is still using the poor deprecated <c>erlang:hash/1</c> + function. + </p> + </item> + <tag><c>{hash_state, Term}</c></tag> + <item> + <p>Enables a table specific parameterization + of a generic hash module. This property may explicitly + be set at table creation. + The default is <c>undefined</c>.</p> + <code type="none"><![CDATA[ +Eshell V4.7.3.3 (abort with ^G) +(a@sam)1> mnesia:start(). +ok +(a@sam)2> PrimProps = [{n_fragments, 7}, {node_pool, [node()]}]. +[{n_fragments,7},{node_pool,[a@sam]}] +(a@sam)3> mnesia:create_table(prim_dict, + [{frag_properties, PrimProps}, + {attributes,[prim_key,prim_val]}]). +{atomic,ok} +(a@sam)4> SecProps = [{foreign_key, {prim_dict, sec_val}}]. +[{foreign_key,{prim_dict,sec_val}}] +(a@sam)5> mnesia:create_table(sec_dict, +\011 [{frag_properties, SecProps}, +(a@sam)5> {attributes, [sec_key, sec_val]}]). +{atomic,ok} +(a@sam)6> Write = fun(Rec) -> mnesia:write(Rec) end. +#Fun<erl_eval> +(a@sam)7> PrimKey = 11. +11 +(a@sam)8> SecKey = 42. +42 +(a@sam)9> mnesia:activity(sync_dirty, Write, +\011\011 [{prim_dict, PrimKey, -11}], mnesia_frag). +ok +(a@sam)10> mnesia:activity(sync_dirty, Write, +\011\011 [{sec_dict, SecKey, PrimKey}], mnesia_frag). +ok +(a@sam)11> mnesia:change_table_frag(prim_dict, {add_frag, [node()]}). +{atomic,ok} +(a@sam)12> SecRead = fun(PrimKey, SecKey) -> +\011\011 mnesia:read({sec_dict, PrimKey}, SecKey, read) end. +#Fun<erl_eval> +(a@sam)13> mnesia:activity(transaction, SecRead, +\011\011 [PrimKey, SecKey], mnesia_frag). +[{sec_dict,42,11}] +(a@sam)14> Info = fun(Tab, Item) -> mnesia:table_info(Tab, Item) end. +#Fun<erl_eval> +(a@sam)15> mnesia:activity(sync_dirty, Info, +\011\011 [prim_dict, frag_size], mnesia_frag). +[{prim_dict,0}, + {prim_dict_frag2,0}, + {prim_dict_frag3,0}, + {prim_dict_frag4,1}, + {prim_dict_frag5,0}, + {prim_dict_frag6,0}, + {prim_dict_frag7,0}, + {prim_dict_frag8,0}] +(a@sam)16> mnesia:activity(sync_dirty, Info, +\011\011 [sec_dict, frag_size], mnesia_frag). +[{sec_dict,0}, + {sec_dict_frag2,0}, + {sec_dict_frag3,0}, + {sec_dict_frag4,1}, + {sec_dict_frag5,0}, + {sec_dict_frag6,0}, + {sec_dict_frag7,0}, + {sec_dict_frag8,0}] +(a@sam)17> + ]]></code> + </item> + </taglist> + </section> + + <section> + <title>Management of Fragmented Tables</title> + <p>The function <c>mnesia:change_table_frag(Tab, Change)</c> + is intended to be used for reconfiguration of fragmented + tables. The <c>Change</c> argument should have one of the + following values: + </p> + <taglist> + <tag><c>{activate, FragProps}</c></tag> + <item> + <p>Activates the fragmentation properties of an + existing table. <c>FragProps</c> should either contain + <c>{node_pool, Nodes}</c> or be empty. + </p> + </item> + <tag><c>deactivate</c></tag> + <item> + <p>Deactivates the fragmentation properties of a + table. The number of fragments must be <c>1</c>. No other + tables may refer to this table in its foreign key. + </p> + </item> + <tag><c>{add_frag, NodesOrDist}</c></tag> + <item> + <p>Adds one new fragment to a fragmented table. All + records in one of the old fragments will be rehashed and + about half of them will be moved to the new (last) + fragment. All other fragmented tables, which refers to this + table in their foreign key, will automatically get a new + fragment, and their records will also be dynamically + rehashed in the same manner as for the main table. + </p> + <p>The <c>NodesOrDist</c> argument may either be a list + of nodes or the result from <c>mnesia:table_info(Tab, frag_dist)</c>. The <c>NodesOrDist</c> argument is + assumed to be a sorted list with the best nodes to + host new replicas first in the list. The new fragment + will get the same number of replicas as the first + fragment (see <c>n_ram_copies</c>, <c>n_disc_copies</c> + and <c>n_disc_only_copies</c>). The <c>NodesOrDist</c> + list must at least contain one element for each + replica that needs to be allocated. + </p> + </item> + <tag><c>del_frag</c></tag> + <item> + <p>Deletes one fragment from a fragmented table. All + records in the last fragment will be moved to one of the other + fragments. All other fragmented tables which refers to + this table in their foreign key, will automatically lose + their last fragment and their records will also be + dynamically rehashed in the same manner as for the main + table. + </p> + </item> + <tag><c>{add_node, Node}</c></tag> + <item> + <p>Adds a new node to the <c>node_pool</c>. The new + node pool will affect the list returned from + <c>mnesia:table_info(Tab, frag_dist)</c>. + </p> + </item> + <tag><c>{del_node, Node}</c></tag> + <item> + <p>Deletes a new node from the <c>node_pool</c>. The + new node pool will affect the list returned from + <c>mnesia:table_info(Tab, frag_dist)</c>.</p> + </item> + </taglist> + </section> + + <section> + <title>Extensions of Existing Functions</title> + <p>The function <c>mnesia:create_table/2</c> is used to + create a brand new fragmented table, by setting the table + property <c>frag_properties</c> to some proper values. + </p> + <p>The function <c>mnesia:delete_table/1</c> is used to + delete a fragmented table including all its + fragments. There must however not exist any other + fragmented tables which refers to this table in their foreign key. + </p> + <p>The function <c>mnesia:table_info/2</c> now understands + the <c>frag_properties</c> item. + </p> + <p>If the function <c>mnesia:table_info/2</c> is invoked in + the activity context of the <c>mnesia_frag</c> module, + information of several new items may be obtained: + </p> + <taglist> + <tag><c>base_table</c></tag> + <item> + <p>the name of the fragmented table + </p> + </item> + <tag><c>n_fragments</c></tag> + <item> + <p>the actual number of fragments + </p> + </item> + <tag><c>node_pool</c></tag> + <item> + <p>the pool of nodes + </p> + </item> + <tag><c>n_ram_copies</c></tag> + <item></item> + <tag><c>n_disc_copies</c></tag> + <item></item> + <tag><c>n_disc_only_copies</c></tag> + <item> + <p>the number of replicas with storage type + <c>ram_copies</c>, <c>disc_copies</c> and <c>disc_only_copies</c> + respectively. The actual values are dynamically derived + from the first fragment. The first fragment serves as a + pro-type and when the actual values needs to be computed + (e.g. when adding new fragments) they are simply + determined by counting the number of each replicas for + each storage type. This means, when the functions + <c>mnesia:add_table_copy/3</c>, + <c>mnesia:del_table_copy/2</c> and<c>mnesia:change_table_copy_type/2</c> are applied on the + first fragment, it will affect the settings on + <c>n_ram_copies</c>, <c>n_disc_copies</c>, and + <c>n_disc_only_copies</c>. + </p> + </item> + <tag><c>foreign_key</c></tag> + <item> + <p>the foreign key. + </p> + </item> + <tag><c>foreigners</c></tag> + <item> + <p>all other tables that refers to this table in + their foreign key. + </p> + </item> + <tag><c>frag_names</c></tag> + <item> + <p>the names of all fragments. + </p> + </item> + <tag><c>frag_dist</c></tag> + <item> + <p>a sorted list of <c>{Node, Count}</c> tuples + which is sorted in increasing <c>Count</c> order. The + <c>Count</c> is the total number of replicas that this + fragmented table hosts on each <c>Node</c>. The list + always contains at least all nodes in the + <c>node_pool</c>. The nodes which not belongs to the + <c>node_pool</c> will be put last in the list even if + their <c>Count</c> is lower. + </p> + </item> + <tag><c>frag_size</c></tag> + <item> + <p>a list of <c>{Name, Size}</c> tuples where + <c>Name</c> is a fragment <c>Name</c> and <c>Size</c> is + how many records it contains. + </p> + </item> + <tag><c>frag_memory</c></tag> + <item> + <p>a list of <c>{Name, Memory}</c> tuples where + <c>Name</c> is a fragment <c>Name</c> and <c>Memory</c> is + how much memory it occupies. + </p> + </item> + <tag><c>size</c></tag> + <item> + <p>total size of all fragments + </p> + </item> + <tag><c>memory</c></tag> + <item> + <p>the total memory of all fragments</p> + </item> + </taglist> + </section> + + <section> + <title>Load Balancing</title> + <p>There are several algorithms for distributing records + in a fragmented table evenly over a + pool of nodes. No one is best, it simply depends of the + application needs. Here follows some examples of + situations which may need some attention: + </p> + <p><c>permanent change of nodes</c> when a new permanent + <c>db_node</c> is introduced or dropped, it may be time to + change the pool of nodes and re-distribute the replicas + evenly over the new pool of nodes. It may also be time to + add or delete a fragment before the replicas are re-distributed. + </p> + <p><c>size/memory threshold</c> when the total size or + total memory of a fragmented table (or a single + fragment) exceeds some application specific threshold, it + may be time to dynamically add a new fragment in order + obtain a better distribution of records. + </p> + <p><c>temporary node down</c> when a node temporarily goes + down it may be time to compensate some fragments with new + replicas in order to keep the desired level of + redundancy. When the node comes up again it may be time to + remove the superfluous replica. + </p> + <p><c>overload threshold</c> when the load on some node is + exceeds some application specific threshold, it may be time to + either add or move some fragment replicas to nodes with lesser + load. Extra care should be taken if the table has a foreign + key relation to some other table. In order to avoid severe + performance penalties, the same re-distribution must be + performed for all of the related tables. + </p> + <p>Use <c>mnesia:change_table_frag/2</c> to add new fragments + and apply the usual schema manipulation functions (such as + <c>mnesia:add_table_copy/3</c>, <c>mnesia:del_table_copy/2</c> + and <c>mnesia:change_table_copy_type/2</c>) on each fragment + to perform the actual re-distribution. + </p> + </section> + </section> + + <section> + <title>Local Content Tables</title> + <p>Replicated tables have the same content on all nodes where + they are replicated. However, it is sometimes advantageous to + have tables but different content on different nodes. + </p> + <p>If we specify the attribute <c>{local_content, true}</c> when + we create the table, the table will reside on the nodes where + we specify that the table shall exist, but the write operations on the + table will only be performed on the local copy. + </p> + <p>Furthermore, when the table is initialized at start-up, the + table will only be initialized locally, and the table + content will not be copied from another node. + </p> + </section> + + <section> + <title>Disc-less Nodes</title> + <p>It is possible to run Mnesia on nodes that do not have a + disc. It is of course not possible to have replicas + of neither <c>disc_copies</c>, nor <c>disc_only_copies</c> + on such nodes. This especially troublesome for the + <c>schema</c> table since Mnesia need the schema in order + to initialize itself. + </p> + <p>The schema table may, as other tables, reside on one or + more nodes. The storage type of the schema table may either + be <c>disc_copies</c> or <c>ram_copies</c> + (not <c>disc_only_copies</c>). At + start-up Mnesia uses its schema to determine with which + nodes it should try to establish contact. If any + of the other nodes are already started, the starting node + merges its table definitions with the table definitions + brought from the other nodes. This also applies to the + definition of the schema table itself. The application + parameter <c>extra_db_nodes</c> contains a list of nodes which + Mnesia also should establish contact with besides the ones + found in the schema. The default value is the empty list + <c>[]</c>. + </p> + <p>Hence, when a disc-less node needs to find the schema + definitions from a remote node on the network, we need to supply + this information through the application parameter <c>-mnesia extra_db_nodes NodeList</c>. Without this + configuration parameter set, Mnesia will start as a single node + system. It is also possible to use <c>mnesia:change_config/2</c> + to assign a value to 'extra_db_nodes' and force a connection + after mnesia have been started, i.e. + mnesia:change_config(extra_db_nodes, NodeList). + </p> + <p>The application parameter schema_location controls where + Mnesia will search for its schema. The parameter may be one of + the following atoms: + </p> + <taglist> + <tag><c>disc</c></tag> + <item> + <p>Mandatory disc. The schema is assumed to be located + on the Mnesia directory. And if the schema cannot be found, + Mnesia refuses to start. + </p> + </item> + <tag><c>ram</c></tag> + <item> + <p>Mandatory ram. The schema resides in ram + only. At start-up a tiny new schema is generated. This + default schema contains just the definition of the schema + table and only resides on the local node. Since no other + nodes are found in the default schema, the configuration + parameter <c>extra_db_nodes</c> must be used in order to let the + node share its table definitions with other nodes. (The + <c>extra_db_nodes</c> parameter may also be used on disc-full nodes.) + </p> + </item> + <tag><c>opt_disc</c></tag> + <item> + <p>Optional disc. The schema may reside on either disc + or ram. If the schema is found on disc, Mnesia starts as a + disc-full node (the storage type of the schema table is + disc_copies). If no schema is found on disc, Mnesia starts + as a disc-less node (the storage type of the schema table is + ram_copies). The default value for the application parameter + is + <c>opt_disc</c>. </p> + </item> + </taglist> + <p>When the <c>schema_location</c> is set to opt_disc the + function <c>mnesia:change_table_copy_type/3</c> may be used to + change the storage type of the schema. + This is illustrated below: + </p> + <pre> + 1> mnesia:start(). + ok + 2> mnesia:change_table_copy_type(schema, node(), disc_copies). + {atomic, ok} + </pre> + <p>Assuming that the call to <c>mnesia:start</c> did not + find any schema to read on the disc, then Mnesia has started + as a disc-less node, and then changed it to a node that + utilizes the disc to locally store the schema. + </p> + </section> + + <section> + <title>More Schema Management</title> + <p>It is possible to add and remove nodes from a Mnesia system. + This can be done by adding a copy of the schema to those nodes. + </p> + <p>The functions <c>mnesia:add_table_copy/3</c> and + <c>mnesia:del_table_copy/2</c> may be used to add and delete + replicas of the schema table. Adding a node to the list + of nodes where the schema is replicated will affect two + things. First it allows other tables to be replicated to + this node. Secondly it will cause Mnesia to try to contact + the node at start-up of disc-full nodes. + </p> + <p>The function call <c>mnesia:del_table_copy(schema, mynode@host)</c> deletes the node 'mynode@host' from the + Mnesia system. The call fails if mnesia is running on + 'mynode@host'. The other mnesia nodes will never try to connect + to that node again. Note, if there is a disc + resident schema on the node 'mynode@host', the entire mnesia + directory should be deleted. This can be done with + <c>mnesia:delete_schema/1</c>. If + mnesia is started again on the the node 'mynode@host' and the + directory has not been cleared, mnesia's behaviour is undefined. + </p> + <p>If the storage type of the schema is ram_copies, i.e, we + have disc-less node, Mnesia + will not use the disc on that particular node. The disc + usage is enabled by changing the storage type of the table + <c>schema</c> to disc_copies. + </p> + <p>New schemas are + created explicitly with <c>mnesia:create_schema/1</c> or implicitly + by starting Mnesia without a disc resident schema. Whenever + a table (including the schema table) is created it is + assigned its own unique cookie. The schema table is not created with + <c>mnesia:create_table/2</c> as normal tables. + </p> + <p>At start-up Mnesia connects different nodes to each other, + then they exchange table definitions with each other and the + table definitions are merged. During the merge procedure Mnesia + performs a sanity test to ensure that the table definitions are + compatible with each other. If a table exists on several nodes + the cookie must be the same, otherwise Mnesia will shutdown one + of the nodes. This unfortunate situation will occur if a table + has been created on two nodes independently of each other while + they were disconnected. To solve the problem, one of the tables + must be deleted (as the cookies differ we regard it to be two + different tables even if they happen to have the same name). + </p> + <p>Merging different versions of the schema table, does not + always require the cookies to be the same. If the storage + type of the schema table is disc_copies, the cookie is + immutable, and all other db_nodes must have the same + cookie. When the schema is stored as type ram_copies, + its cookie can be replaced with a cookie from another node + (ram_copies or disc_copies). The cookie replacement (during + merge of the schema table definition) is performed each time + a RAM node connects to another node. + </p> + <p><c>mnesia:system_info(schema_location)</c> and + <c>mnesia:system_info(extra_db_nodes)</c> may be used to determine + the actual values of schema_location and extra_db_nodes + respectively. <c>mnesia:system_info(use_dir)</c> may be used to + determine whether Mnesia is actually using the Mnesia + directory. <c>use_dir</c> may be determined even before + Mnesia is started. The function <c>mnesia:info/0</c> may now be + used to printout some system information even before Mnesia + is started. When Mnesia is started the function prints out + more information. + </p> + <p>Transactions which update the definition of a table, + requires that Mnesia is started on all nodes where the + storage type of the schema is disc_copies. All replicas of + the table on these nodes must also be loaded. There are a + few exceptions to these availability rules. Tables may be + created and new replicas may be added without starting all + of the disc-full nodes. New replicas may be added before all + other replicas of the table have been loaded, it will suffice + when one other replica is active. + </p> + </section> + + <section> + <title>Mnesia Event Handling</title> + <p>System events and table events are the two categories of events + that Mnesia will generate in various situations. + </p> + <p>It is possible for user process to subscribe on the + events generated by Mnesia. + We have the following two functions:</p> + <taglist> + <tag><c>mnesia:subscribe(Event-Category)</c></tag> + <item> + <p>Ensures that a copy of all events of type + <c>Event-Category</c> are sent to the calling process. + </p> + </item> + <tag><c>mnesia:unsubscribe(Event-Category)</c></tag> + <item>Removes the subscription on events of type + <c>Event-Category</c></item> + </taglist> + <p><c>Event-Category</c> may either be the atom <c>system</c>, or + one of the tuples <c>{table, Tab, simple}</c>, <c>{table, Tab, detailed}</c>. The old event-category <c>{table, Tab}</c> is the same + event-category as <c>{table, Tab, simple}</c>. + The subscribe functions activate a subscription + of events. The events are delivered as messages to the process + evaluating the <c>mnesia:subscribe/1</c> function. The syntax of + system events is <c>{mnesia_system_event, Event}</c> and + <c>{mnesia_table_event, Event}</c> for table events. What system + events and table events means is described below. + </p> + <p>All system events are subscribed by Mnesia's + gen_event handler. The default gen_event handler is + <c>mnesia_event</c>. But it may be changed by using the application + parameter <c>event_module</c>. The value of this parameter must be + the name of a module implementing a complete handler + as specified by the <c>gen_event</c> module in + STDLIB. <c>mnesia:system_info(subscribers)</c> and + <c>mnesia:table_info(Tab, subscribers)</c> may be used to determine + which processes are subscribed to various + events. + </p> + + <section> + <title>System Events</title> + <p>The system events are detailed below:</p> + <taglist> + <tag><c>{mnesia_up, Node}</c></tag> + <item> + <p>Mnesia has been started on a node. + Node is the name of the node. By default this event is ignored. + </p> + </item> + <tag><c>{mnesia_down, Node}</c></tag> + <item> + <p>Mnesia has been stopped on a node. + Node is the name of the node. By default this event is + ignored. + </p> + </item> + <tag><c>{mnesia_checkpoint_activated, Checkpoint}</c></tag> + <item> + <p>a checkpoint with the name + <c>Checkpoint</c> has been activated and that the current node is + involved in the checkpoint. Checkpoints may be activated + explicitly with <c>mnesia:activate_checkpoint/1</c> or implicitly + at backup, adding table replicas, internal transfer of data + between nodes etc. By default this event is ignored. + </p> + </item> + <tag><c>{mnesia_checkpoint_deactivated, Checkpoint}</c></tag> + <item> + <p>A checkpoint with the name + <c>Checkpoint</c> has been deactivated and that the current node was + involved in the checkpoint. Checkpoints may explicitly be + deactivated with <c>mnesia:deactivate/1</c> or implicitly when the + last replica of a table (involved in the checkpoint) + becomes unavailable, e.g. at node down. By default this + event is ignored. + </p> + </item> + <tag><c>{mnesia_overload, Details}</c></tag> + <item> + <p>Mnesia on the current node is + overloaded and the subscriber should take action. + </p> + <p>A typical overload situation occurs when the + applications are performing more updates on disc + resident tables than Mnesia is able to handle. Ignoring + this kind of overload may lead into a situation where + the disc space is exhausted (regardless of the size of + the tables stored on disc). + <br></br> + Each update is appended to + the transaction log and occasionally(depending of how it + is configured) dumped to the tables files. The + table file storage is more compact than the transaction + log storage, especially if the same record is updated + over and over again. If the thresholds for dumping the + transaction log have been reached before the previous + dump was finished an overload event is triggered. + </p> + <p>Another typical overload situation is when the + transaction manager cannot commit transactions at the + same pace as the applications are performing updates of + disc resident tables. When this happens the message + queue of the transaction manager will continue to grow + until the memory is exhausted or the load + decreases. + </p> + <p>The same problem may occur for dirty updates. The overload + is detected locally on the current node, but its cause may + be on another node. Application processes may cause heavy + loads if any table are residing on other nodes (replicated or not). By default this event + is reported to the error_logger. + </p> + </item> + <tag><c>{inconsistent_database, Context, Node}</c></tag> + <item> + <p>Mnesia regards the database as + potential inconsistent and gives its applications a chance + to recover from the inconsistency, e.g. by installing a + consistent backup as fallback and then restart the system + or pick a <c>MasterNode</c> from <c>mnesia:system_info(db_nodes)</c>) + and invoke <c>mnesia:set_master_node([MasterNode])</c>. By default an + error is reported to the error logger. + </p> + </item> + <tag><c>{mnesia_fatal, Format, Args, BinaryCore}</c></tag> + <item> + <p>Mnesia has encountered a fatal error + and will (in a short period of time) be terminated. The reason for + the fatal error is explained in Format and Args which may + be given as input to <c>io:format/2</c> or sent to the + error_logger. By default it will be sent to the + error_logger. <c>BinaryCore</c> is a binary containing a summary of + Mnesia's internal state at the time the when the fatal error was + encountered. By default the binary is written to a + unique file name on current directory. On RAM nodes the + core is ignored. + </p> + </item> + <tag><c>{mnesia_info, Format, Args}</c></tag> + <item> + <p>Mnesia has detected something that + may be of interest when debugging the system. This is explained + in <c>Format</c> and <c>Args</c> which may appear + as input to <c>io:format/2</c> or sent to the error_logger. By + default this event is printed with <c>io:format/2</c>. + </p> + </item> + <tag><c>{mnesia_error, Format, Args}</c></tag> + <item> + <p>Mnesia has encountered an error. The + reason for the error is explained i <c>Format</c> and <c>Args</c> + which may be given as input to <c>io:format/2</c> or sent to the + error_logger. By default this event is reported to the error_logger. + </p> + </item> + <tag><c>{mnesia_user, Event}</c></tag> + <item> + <p>An application has invoked the + function <c>mnesia:report_event(Event)</c>. <c>Event</c> may be any Erlang + data structure. When tracing a system of Mnesia applications + it is useful to be able to interleave Mnesia's own events with + application related events that give information about the + application context. Whenever the application starts with + a new and demanding Mnesia activity or enters a + new and interesting phase in its execution it may be a good idea + to use <c>mnesia:report_event/1</c>. </p> + </item> + </taglist> + </section> + + <section> + <title>Table Events</title> + <p>Another category of events are table events, which are + events related to table updates. There are two types of table + events simple and detailed. + </p> + <p>The simple table events are tuples looking like this: + <c>{Oper, Record, ActivityId}</c>. Where <c>Oper</c> is the + operation performed. <c>Record</c> is the record involved in the + operation and <c>ActivityId</c> is the identity of the + transaction performing the operation. Note that the name of the + record is the table name even when the <c>record_name</c> has + another setting. The various table related events that may + occur are: + </p> + <taglist> + <tag><c>{write, NewRecord, ActivityId}</c></tag> + <item> + <p>a new record has been written. + NewRecord contains the new value of the record. + </p> + </item> + <tag><c>{delete_object, OldRecord, ActivityId}</c></tag> + <item> + <p>a record has possibly been deleted + with <c>mnesia:delete_object/1</c>. <c>OldRecord</c> + contains the value of the old record as stated as argument + by the application. Note that, other records with the same + key may be remaining in the table if it is a bag. + </p> + </item> + <tag><c>{delete, {Tab, Key}, ActivityId}</c></tag> + <item> + <p>one or more records possibly has + been deleted. All records with the key Key in the table + <c>Tab</c> have been deleted. </p> + </item> + </taglist> + <p>The detailed table events are tuples looking like + this: <c>{Oper, Table, Data, [OldRecs], ActivityId}</c>. + Where <c>Oper</c> is the operation + performed. <c>Table</c> is the table involved in the operation, + <c>Data</c> is the record/oid written/deleted. + <c>OldRecs</c> is the contents before the operation. + and <c>ActivityId</c> is the identity of the transaction + performing the operation. + The various table related events that may occur are: + </p> + <taglist> + <tag><c>{write, Table, NewRecord, [OldRecords], ActivityId}</c></tag> + <item> + <p>a new record has been written. + NewRecord contains the new value of the record and OldRecords + contains the records before the operation is performed. + Note that the new content is dependent on the type of the table.</p> + </item> + <tag><c>{delete, Table, What, [OldRecords], ActivityId}</c></tag> + <item> + <p>records has possibly been deleted + <c>What</c> is either {Table, Key} or a record {RecordName, Key, ...} + that was deleted. + Note that the new content is dependent on the type of the table.</p> + </item> + </taglist> + </section> + </section> + + <section> + <title>Debugging Mnesia Applications</title> + <p>Debugging a Mnesia application can be difficult due to a number of reasons, primarily related + to difficulties in understanding how the transaction + and table load mechanisms work. An other source of + confusion may be the semantics of nested transactions. + </p> + <p>We may set the debug level of Mnesia by calling: + </p> + <list type="bulleted"> + <item><c>mnesia:set_debug_level(Level)</c></item> + </list> + <p>Where the parameter <c>Level</c> is: + </p> + <taglist> + <tag><c>none</c></tag> + <item> + <p>no trace outputs at all. This is the default. + </p> + </item> + <tag><c>verbose</c></tag> + <item> + <p>activates tracing of important debug events. These + debug events will generate <c>{mnesia_info, Format, Args}</c> + system events. Processes may subscribe to these events with + <c>mnesia:subscribe/1</c>. The events are always sent to Mnesia's + event handler. + </p> + </item> + <tag><c>debug</c></tag> + <item> + <p>activates all events at the verbose level plus + traces of all debug events. These debug events will generate + <c>{mnesia_info, Format, Args}</c> system events. Processes may + subscribe to these events with <c>mnesia:subscribe/1</c>. The + events are always sent to Mnesia's event handler. On this + debug level Mnesia's event handler starts subscribing + updates in the schema table. + </p> + </item> + <tag><c>trace</c></tag> + <item> + <p>activates all events at the debug level. On this + debug level Mnesia's event handler starts subscribing + updates on all Mnesia tables. This level is only intended + for debugging small toy systems, since many large + events may be generated.</p> + </item> + <tag><c>false</c></tag> + <item> + <p>is an alias for none.</p> + </item> + <tag><c>true</c></tag> + <item> + <p>is an alias for debug.</p> + </item> + </taglist> + <p>The debug level of Mnesia itself, is also an application + parameter, thereby making it possible to start an Erlang system + in order to turn on Mnesia debug in the initial + start-up phase by using the following code: + </p> + <pre> + % erl -mnesia debug verbose + </pre> + </section> + + <section> + <title>Concurrent Processes in Mnesia</title> + <p>Programming concurrent Erlang systems is the subject of + a separate book. However, it is worthwhile to draw attention to + the following features, which permit concurrent processes to + exist in a Mnesia system. + </p> + <p>A group of functions or processes can be called within a + transaction. A transaction may include statements that read, + write or delete data from the DBMS. A large number of such + transactions can run concurrently, and the programmer does not + have to explicitly synchronize the processes which manipulate + the data. All programs accessing the database through the + transaction system may be written as if they had sole access to + the data. This is a very desirable property since all + synchronization is taken care of by the transaction handler. If + a program reads or writes data, the system ensures that no other + program tries to manipulate the same data at the same time. + </p> + <p>It is possible to move tables, delete tables or reconfigure + the layout of a table in various ways. An important aspect of + the actual implementation of these functions is that it is + possible for user programs to continue to use a table while it + is being reconfigured. For example, it is possible to + simultaneously move a table and perform write operations to the + table . This is important for many applications that + require continuously available services. Refer to Chapter 4: + <seealso marker="Mnesia_chap4#trans_prop">Transactions and other access contexts</seealso> for more information. + </p> + </section> + + <section> + <title>Prototyping</title> + <p>If and when we decide that we would like to start and manipulate + Mnesia, it is often easier to write the definitions and + data into an ordinary text file. + Initially, no tables and no data exist, or which + tables are required. At the initial stages of prototyping it + is prudent write all data into one file, process + that file and have the data in the file inserted into the database. + It is possible to initialize Mnesia with data read from a text file. + We have the following two functions to work with text files. + </p> + <list type="bulleted"> + <item> + <p><c>mnesia:load_textfile(Filename)</c> Which loads a + series of local table definitions and data found in the file + into Mnesia. This function also starts Mnesia and possibly + creates a new schema. The function only operates on the + local node. + </p> + </item> + <item> + <p><c>mnesia:dump_to_textfile(Filename)</c> Dumps + all local tables of a mnesia system into a text file which can + then be edited (by means of a normal text editor) and then + later reloaded.</p> + </item> + </list> + <p>These functions are of course much slower than the ordinary + store and load functions of Mnesia. However, this is mainly intended for minor experiments + and initial prototyping. The major advantages of these functions is that they are very easy + to use. + </p> + <p>The format of the text file is: + </p> + <pre> + {tables, [{Typename, [Options]}, + {Typename2 ......}]}. + + {Typename, Attribute1, Atrribute2 ....}. + {Typename, Attribute1, Atrribute2 ....}. + </pre> + <p><c>Options</c> is a list of <c>{Key,Value}</c> tuples conforming + to the options we could give to <c>mnesia:create_table/2</c>. + </p> + <p>For example, if we want to start playing with a small + database for healthy foods, we enter then following data into + the file <c>FRUITS</c>. + </p> + <codeinclude file="FRUITS" tag="%0" type="erl"></codeinclude> + <p>The following session with the Erlang shell then shows how + to load the fruits database. + </p> + <pre><![CDATA[ + % erl + Erlang (BEAM) emulator version 4.9 + + Eshell V4.9 (abort with ^G) + 1> mnesia:load_textfile("FRUITS"). + New table fruit + New table vegetable + {atomic,ok} + 2> mnesia:info(). + ---> Processes holding locks <--- + ---> Processes waiting for locks <--- + ---> Pending (remote) transactions <--- + ---> Active (local) transactions <--- + ---> Uncertain transactions <--- + ---> Active tables <--- + vegetable : with 2 records occuping 299 words of mem + fruit : with 2 records occuping 291 words of mem + schema : with 3 records occuping 401 words of mem + ===> System info in version "1.1", debug level = none <=== + opt_disc. Directory "/var/tmp/Mnesia.nonode@nohost" is used. + use fallback at restart = false + running db nodes = [nonode@nohost] + stopped db nodes = [] + remote = [] + ram_copies = [fruit,vegetable] + disc_copies = [schema] + disc_only_copies = [] + [{nonode@nohost,disc_copies}] = [schema] + [{nonode@nohost,ram_copies}] = [fruit,vegetable] + 3 transactions committed, 0 aborted, 0 restarted, 2 logged to disc + 0 held locks, 0 in queue; 0 local transactions, 0 remote + 0 transactions waits for other nodes: [] + ok + 3> + ]]></pre> + <p>Where we can see that the DBMS was initiated from a + regular text file. + </p> + </section> + + <section> + <title>Object Based Programming with Mnesia</title> + <p>The Company database introduced in Chapter 2 has three tables + which store records (employee, dept, project), and three tables + which store relationships (manager, at_dep, in_proj). This is a + normalized data model, which has some advantages over a + non-normalized data model. + </p> + <p>It is more efficient to do a + generalized search in a normalized database. Some operations are + also easier to perform on a normalized data model. For example, + we can easily remove one project, as the following example + illustrates: + </p> + <codeinclude file="company.erl" tag="%13" type="erl"></codeinclude> + <p>In reality, data models are seldom fully normalized. A + realistic alternative to a normalized database model would be + a data model which is not even in first normal form. Mnesia + is very suitable for applications such as telecommunications, + because it is easy to organize data in a very flexible manner. A + Mnesia database is always organized as a set of tables. Each + table is filled with rows/objects/records. What sets Mnesia + apart is that individual fields in a record can contain any type + of compound data structures. An individual field in a record can + contain lists, tuples, functions, and even record code. + </p> + <p>Many telecommunications applications have unique requirements + on lookup times for certain types of records. If our Company + database had been a part of a telecommunications system, then it + could be that the lookup time of an employee <em>together</em> + with a list of the projects the employee is working on, should + be minimized. If this was the case, we might choose a + drastically different data model which has no direct + relationships. We would only have the records themselves, and + different records could contain either direct references to + other records, or they could contain other records which are not + part of the Mnesia schema. + </p> + <p>We could create the following record definitions: + </p> + <codeinclude file="company_o.hrl" tag="%0" type="erl"></codeinclude> + <p>An record which describes an employee might look like this: + </p> + <pre> + Me = #employee{emp_no= 104732, + name = klacke, + salary = 7, + sex = male, + phone = 99586, + room_no = {221, 015}, + dept = 'B/SFR', + projects = [erlang, mnesia, otp], + manager = 114872}, + </pre> + <p>This model only has three different tables, and the employee + records contain references to other records. We have the following + references in the record. + </p> + <list type="bulleted"> + <item><c>'B/SFR'</c> refers to a <c>dept</c> record. + </item> + <item><c>[erlang, mnesia, otp]</c>. This is a list of three + direct references to three different <c>projects</c> records. + </item> + <item><c>114872</c>. This refers to another employee record. + </item> + </list> + <p>We could also use the Mnesia record identifiers (<c>{Tab, Key}</c>) + as references. In this case, the <c>dept</c> attribute would be + set to the value <c>{dept, 'B/SFR'}</c> instead of + <c>'B/SFR'</c>. + </p> + <p>With this data model, some operations execute considerably + faster than they do with the normalized data model in our + Company database. On the other hand, some other operations + become much more complicated. In particular, it becomes more + difficult to ensure that records do not contain dangling + pointers to other non-existent, or deleted, records. + </p> + <p>The following code exemplifies a search with a non-normalized + data model. To find all employees at department + <c>Dep</c> with a salary higher than <c>Salary</c>, use the following code: + </p> + <codeinclude file="company_o.erl" tag="%9" type="erl"></codeinclude> + <p>This code is not only easier to write and to understand, but it + also executes much faster. + </p> + <p>It is easy to show examples of code which executes faster if + we use a non-normalized data model, instead of a normalized + model. The main reason for this is that fewer tables are + required. For this reason, we can more easily combine data from + different tables in join operations. In the above example, the + <c>get_emps/2</c> function was transformed from a join operation + into a simple query which consists of a selection and a projection + on one single table. + </p> + </section> +</chapter> + diff --git a/lib/mnesia/doc/src/Mnesia_chap7.xmlsrc b/lib/mnesia/doc/src/Mnesia_chap7.xmlsrc new file mode 100644 index 0000000000..7078499fbf --- /dev/null +++ b/lib/mnesia/doc/src/Mnesia_chap7.xmlsrc @@ -0,0 +1,890 @@ +<?xml version="1.0" encoding="latin1" ?> +<!DOCTYPE chapter SYSTEM "chapter.dtd"> + +<chapter> + <header> + <copyright> + <year>1997</year><year>2009</year> + <holder>Ericsson AB. All Rights Reserved.</holder> + </copyright> + <legalnotice> + The contents of this file are subject to the Erlang Public License, + Version 1.1, (the "License"); you may not use this file except in + compliance with the License. You should have received a copy of the + Erlang Public License along with this software. If not, it can be + retrieved online at http://www.erlang.org/. + + Software distributed under the License is distributed on an "AS IS" + basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + the License for the specific language governing rights and limitations + under the License. + + </legalnotice> + + <title>Mnesia System Information</title> + <prepared>Claes Wikström, Hans Nilsson and Håkan Mattsson</prepared> + <responsible></responsible> + <docno></docno> + <approved></approved> + <checked></checked> + <date></date> + <rev></rev> + <file>Mnesia_chap7.xml</file> + </header> + + <section> + <title>Database Configuration Data</title> + <p>The following two functions can be used to retrieve system + information. They are described in detail in the reference manual. + </p> + <list type="bulleted"> + <item><c>mnesia:table_info(Tab, Key) -></c><c>Info | exit({aborted, Reason})</c>. + Returns information about one table. Such as the + current size of the table, on which nodes it resides etc. + </item> + <item><c>mnesia:system_info(Key) -> </c><c>Info | exit({aborted, Reason})</c>. + Returns information about the Mnesia system. For example, transaction + statistics, db_nodes, configuration parameters etc. + </item> + </list> + </section> + + <section> + <title>Core Dumps</title> + <p>If Mnesia malfunctions, system information is dumped to a file + named <c>MnesiaCore.Node.When</c>. The type of system + information contained in this file can also be generated with + the function <c>mnesia_lib:coredump()</c>. If a Mnesia system + behaves strangely, it is recommended that a Mnesia core dump + file be included in the bug report.</p> + </section> + + <section> + <title>Dumping Tables</title> + <p>Tables of type <c>ram_copies</c> are by definition stored in + memory only. It is possible, however, to dump these tables to + disc, either at regular intervals, or before the system is + shutdown. The function <c>mnesia:dump_tables(TabList)</c> dumps + all replicas of a set of RAM tables to disc. The tables can be + accessed while being dumped to disc. To dump the tables to + disc all replicas must have the storage type <c>ram_copies</c>. + </p> + <p>The table content is placed in a .DCD file on the + disc. When the Mnesia system is started, the RAM table will + initially be loaded with data from its .DCD file. + </p> + </section> + + <section> + <marker id="checkpoints"></marker> + <title>Checkpoints</title> + <p>A checkpoint is a transaction consistent state that spans over + one or more tables. When a checkpoint is activated, the system + will remember the current content of the set of tables. The + checkpoint retains a transaction consistent state of the tables, + allowing the tables to be read and updated while the checkpoint + is active. A checkpoint is typically used to + back up tables to external media, but they are also used + internally in Mnesia for other purposes. Each checkpoint is + independent and a table may be involved in several checkpoints + simultaneously. + </p> + <p>Each table retains its old contents in a checkpoint retainer + and for performance critical applications, it may be important + to realize the processing overhead associated with checkpoints. + In a worst case scenario, the checkpoint retainer will consume + even more memory than the table itself. Each update will also be + slightly slower on those nodes where checkpoint + retainers are attached to the tables. + </p> + <p>For each table it is possible to choose if there should be one + checkpoint retainer attached to all replicas of the table, or if + it is enough to have only one checkpoint retainer attached to a + single replica. With a single checkpoint retainer per table, the + checkpoint will consume less memory, but it will be vulnerable + to node crashes. With several redundant checkpoint retainers the + checkpoint will survive as long as there is at least one active + checkpoint retainer attached to each table. + </p> + <p>Checkpoints may be explicitly deactivated with the function + <c>mnesia:deactivate_checkpoint(Name)</c>, where <c>Name</c> is + the name of an active checkpoint. This function returns + <c>ok</c> if successful, or <c>{error, Reason}</c> in the case + of an error. All tables in a checkpoint must be attached to at + least one checkpoint retainer. The checkpoint is automatically + de-activated by Mnesia, when any table lacks a checkpoint + retainer. This may happen when a node goes down or when a + replica is deleted. Use the <c>min</c> and + <c>max</c> arguments described below, to control the degree of + checkpoint retainer redundancy. + </p> + <p>Checkpoints are activated with the function <marker id="mnesia:chkpt(Args)"></marker> +<c>mnesia:activate_checkpoint(Args)</c>, + where <c>Args</c> is a list of the following tuples: + </p> + <list type="bulleted"> + <item><c>{name,Name}</c>. <c>Name</c> specifies a temporary name + of the checkpoint. The name may be re-used when the checkpoint + has been de-activated. If no name is specified, a name is + generated automatically. + </item> + <item><c>{max,MaxTabs}</c>. <c>MaxTabs</c> is a list of tables + which will be included in the checkpoint. The default is + <c>[]</c> (an empty list). For these tables, the redundancy + will be maximized. The old contents of the table will be + retained in the checkpoint retainer when the main table is + updated by the applications. The checkpoint becomes more fault + tolerant if the tables have several replicas. When new + replicas are added by means of the schema manipulation + function <c>mnesia:add_table_copy/3</c>, it will also + attach a local checkpoint retainer. + </item> + <item><c>{min,MinTabs}</c>. <c>MinTabs</c> is a list of tables + that should be included in the checkpoint. The default is + <c>[]</c>. For these tables, the redundancy will be minimized, + and there will be a single checkpoint retainer per table, + preferably at the local node. + </item> + <item><c>{allow_remote,Bool}</c>. <c>false</c> means that all + checkpoint retainers must be local. If a table does not reside + locally, the checkpoint cannot be activated. <c>true</c> + allows checkpoint retainers to be allocated on any node. The + defaults is <c>true</c>. + </item> + <item><c>{ram_overrides_dump,Bool}</c>. This argument only + applies to tables of type <c>ram_copies</c>. <c>Bool</c> + specifies if the table state in RAM should override the table + state on disc. <c>true</c> means that the latest committed + records in RAM are included in the checkpoint retainer. These + are the records that the application accesses. <c>false</c> + means that the records on the disc .DAT file are + included in the checkpoint retainer. These are the records + that will be loaded on start-up. Default is <c>false</c>.</item> + </list> + <p>The <c>mnesia:activate_checkpoint(Args)</c> returns one of the + following values: + </p> + <list type="bulleted"> + <item><c>{ok, Name, Nodes}</c></item> + <item><c>{error, Reason}</c>.</item> + </list> + <p><c>Name</c> is the name of the checkpoint, and <c>Nodes</c> are + the nodes where the checkpoint is known. + </p> + <p>A list of active checkpoints can be obtained with the following + functions: + </p> + <list type="bulleted"> + <item><c>mnesia:system_info(checkpoints)</c>. This function + returns all active checkpoints on the current node.</item> + <item><c>mnesia:table_info(Tab,checkpoints)</c>. This function + returns active checkpoints on a specific table.</item> + </list> + </section> + + <section> + <title>Files</title> + <p>This section describes the internal files which are created and maintained by the Mnesia system, + in particular, the workings of the Mnesia log is described. + </p> + + <section> + <title>Start-Up Files</title> + </section> + <p>In Chapter 3 we detailed the following pre-requisites for + starting Mnesia (refer Chapter 3: <seealso marker="Mnesia_chap3#start_mnesia">Starting Mnesia</seealso>: + </p> + <list type="bulleted"> + <item>We must start an Erlang session and specify a Mnesia + directory for our database. + </item> + <item>We must initiate a database schema, using the function + <c>mnesia:create_schema/1</c>. + </item> + </list> + <p>The following example shows how these tasks are performed: + </p> + <list type="ordered"> + <item> + <pre> +% <input>erl -sname klacke -mnesia dir '"/ldisc/scratch/klacke"'</input> </pre> + </item> + <item> + <pre> +Erlang (BEAM) emulator version 4.9 + +Eshell V4.9 (abort with ^G) +(klacke@gin)1> <input>mnesia:create_schema([node()]).</input> +ok +(klacke@gin)2> +<input>^Z</input> +Suspended </pre> + <p>We can inspect the Mnesia directory to see what files have been created. Enter the following command: + </p> + <pre> +% <input>ls -l /ldisc/scratch/klacke</input> +-rw-rw-r-- 1 klacke staff 247 Aug 12 15:06 FALLBACK.BUP </pre> + <p>The response shows that the file FALLBACK.BUP has been created. This is called a backup file, and it contains an initial schema. If we had specified more than one node in the <c>mnesia:create_schema/1</c> function, identical backup files would have been created on all nodes. + </p> + </item> + <item> + <p>Continue by starting Mnesia:</p> + <pre> +(klacke@gin)3><input>mnesia:start( ).</input> +ok </pre> + <p>We can now see the following listing in the Mnesia directory: + </p> + <pre> +-rw-rw-r-- 1 klacke staff 86 May 26 19:03 LATEST.LOG +-rw-rw-r-- 1 klacke staff 34507 May 26 19:03 schema.DAT </pre> + <p>The schema in the backup file FALLBACK.BUP has been used to generate the file <c>schema.DAT.</c> Since we have no other disc resident tables than the schema, no other data files were created. The file FALLBACK.BUP was removed after the successful "restoration". We also see a number of files that are for internal use by Mnesia. + </p> + </item> + <item> + <p>Enter the following command to create a table:</p> + <pre> +(klacke@gin)4> <input>mnesia:create_table(foo,[{disc_copies, [node()]}]).</input> +{atomic,ok} </pre> + <p>We can now see the following listing in the Mnesia directory: + </p> + <pre> +% <input>ls -l /ldisc/scratch/klacke</input> +-rw-rw-r-- 1 klacke staff 86 May 26 19:07 LATEST.LOG +-rw-rw-r-- 1 klacke staff 94 May 26 19:07 foo.DCD +-rw-rw-r-- 1 klacke staff 6679 May 26 19:07 schema.DAT </pre> + <p>Where a file <c>foo.DCD</c> has been created. This file will eventually store + all data that is written into the <c>foo</c> table.</p> + </item> + </list> + + <section> + <title>The Log File</title> + <p>When starting Mnesia, a .LOG file called <c>LATEST.LOG</c> + was created and placed in the database directory. This file is + used by Mnesia to log disc based transactions. This includes all + transactions that write at least one record in a table which is + of storage type <c>disc_copies</c>, or + <c>disc_only_copies</c>. It also includes all operations which + manipulate the schema itself, such as creating new tables. The + format of the log can vary with different implementations of + Mnesia. The Mnesia log is currently implemented with the + standard library module <c>disc_log</c>. + </p> + <p>The log file will grow continuously and must be dumped at + regular intervals. "Dumping the log file" means that Mnesia will + perform all the operations listed in the log and place the + records in the corresponding .DAT, .DCD and .DCL data files. For + example, if the operation "write record <c>{foo, 4, elvis, 6}</c>" + is listed in the log, Mnesia inserts the operation into the + file <c>foo.DCL</c>, later when Mnesia thinks the .DCL has become to large + the data is moved to the .DCD file. + The dumping operation can be time consuming + if the log is very large. However, it is important to realize + that the Mnesia system continues to operate during log dumps. + </p> + <p>By default Mnesia either dumps the log whenever 100 records have + been written in the log or when 3 minutes have passed. + This is controlled by the two application parameters + <c>-mnesia dump_log_write_threshold WriteOperations</c> and + <c>-mnesia dump_log_time_threshold MilliSecs</c>. + </p> + <p>Before the log is dumped, the file <c>LATEST.LOG</c> is + renamed to <c>PREVIOUS.LOG</c>, and a new <c>LATEST.LOG</c> file + is created. Once the log has been successfully dumped, the file + <c>PREVIOUS.LOG</c> is deleted. + </p> + <p>The log is also dumped at start-up and whenever a schema + operation is performed. + </p> + </section> + + <section> + <title>The Data Files</title> + <p>The directory listing also contains one .DAT file. This contain + the schema itself, contained in the <c>schema.DAT</c> + file. The DAT files are indexed files, and it is efficient to + insert and search for records in these files with a specific + key. The .DAT files are used for the schema and for <c>disc_only_copies</c> + tables. The Mnesia data files are currently implemented with the + standard library module <c>dets</c>, and all operations which + can be performed on <c>dets</c> files can also be performed on + the Mnesia data files. For example, <c>dets</c> contains a + function <c>dets:traverse/2</c> which can be used to view the + contents of a Mnesia DAT file. However, this can only be done + when Mnesia is not running. So, to view a our schema file, we + can: </p> + <pre> +{ok, N} = dets:open_file(schema, [{file, "./schema.DAT"},{repair,false}, +{keypos, 2}]), +F = fun(X) -> io:format("~p~n", [X]), continue end, +dets:traverse(N, F), +dets:close(N). </pre> + <note> + <p>Refer to the Reference Manual, <c>std_lib</c> for information about <c>dets</c>.</p> + </note> + <warning> + <p>The DAT files must always be opened with the <c>{repair, false}</c> + option. This ensures that these files are not + automatically repaired. Without this option, the database may + become inconsistent, because Mnesia may + believe that the files were properly closed. Refer to the reference + manual for information about the configuration parameter + <c>auto_repair</c>.</p> + </warning> + <warning> + <p>It is recommended that Data files are not tampered with while Mnesia is + running. While not prohibited, the behavior of Mnesia is unpredictable. </p> + </warning> + <p>The <c>disc_copies</c> tables are stored on disk with .DCL and .DCD files, + which are standard disk_log files. + </p> + </section> + </section> + + <section> + <title>Loading of Tables at Start-up</title> + <p>At start-up Mnesia loads tables in order to make them accessible + for its applications. Sometimes Mnesia decides to load all tables + that reside locally, and sometimes the tables may not be + accessible until Mnesia brings a copy of the table + from another node. + </p> + <p>To understand the behavior of Mnesia at start-up it is + essential to understand how Mnesia reacts when it loses contact + with Mnesia on another node. At this stage, Mnesia cannot distinguish + between a communication failure and a "normal" node down. <br></br> + + When this happens, Mnesia will assume that the other node is no longer running. + Whereas, in reality, the communication between the nodes has merely failed. + </p> + <p>To overcome this situation, simply try to restart the ongoing transactions that are + accessing tables on the failing node, and write a <c>mnesia_down</c> entry to a log file. + </p> + <p>At start-up, it must be noted that all tables residing on nodes + without a <c>mnesia_down</c> entry, may have fresher replicas. + Their replicas may have been updated after the termination + of Mnesia on the current node. In order to catch up with the latest + updates, transfer a copy of the table from one of these other + "fresh" nodes. If you are unlucky, other nodes may be down + and you must wait for the table to be + loaded on one of these nodes before receiving a fresh copy of + the table. + </p> + <p>Before an application makes its first access to a table, + <c>mnesia:wait_for_tables(TabList, Timeout)</c> ought to be executed + to ensure that the table is accessible from the local node. If + the function times out the application may choose to force a + load of the local replica with + <c>mnesia:force_load_table(Tab)</c> and deliberately lose all + updates that may have been performed on the other nodes while + the local node was down. If + Mnesia already has loaded the table on another node or intends + to do so, we will copy the table from that node in order to + avoid unnecessary inconsistency. + </p> + <warning> + <p>Keep in mind that it is only + one table that is loaded by <c>mnesia:force_load_table(Tab)</c> + and since committed transactions may have caused updates in + several tables, the tables may now become inconsistent due to + the forced load.</p> + </warning> + <p>The allowed <c>AccessMode</c> of a table may be defined to + either be <c>read_only</c> or <c>read_write</c>. And it may be + toggled with the function <c>mnesia:change_table_access_mode(Tab, AccessMode)</c> in runtime. <c>read_only</c> tables and + <c>local_content</c> tables will always be loaded locally, since + there are no need for copying the table from other nodes. Other + tables will primary be loaded remotely from active replicas on + other nodes if the table already has been loaded there, or if + the running Mnesia already has decided to load the table there. + </p> + <p>At start up, Mnesia will assume that its local replica is the + most recent version and load the table from disc if either + situation is detected: + </p> + <list type="bulleted"> + <item><c>mnesia_down</c> is returned from all other nodes that holds a disc + resident replica of the table; or,</item> + <item>if all replicas are <c>ram_copies</c></item> + </list> + <p>This is normally a wise decision, but it may turn out to + be disastrous if the nodes have been disconnected due to a + communication failure, since Mnesia's normal table load + mechanism does not cope with communication failures. + </p> + <p>When Mnesia is loading many tables the default load + order. However, it is possible to + affect the load order by explicitly changing the + <c>load_order</c> property for the tables, with the function + <c>mnesia:change_table_load_order(Tab, LoadOrder)</c>. The + <c>LoadOrder</c> is by default <c>0</c> for all tables, but it + can be set to any integer. The table with the highest + <c>load_order</c> will be loaded first. Changing load order is + especially useful for applications that need to ensure early + availability of fundamental tables. Large peripheral + tables should have a low load order value, perhaps set + below 0. + </p> + </section> + + <section> + <title>Recovery from Communication Failure</title> + <p>There are several occasions when Mnesia may detect that the + network has been partitioned due to a communication failure. + </p> + <p>One is when Mnesia already is up and running and the Erlang + nodes gain contact again. Then Mnesia will try to contact Mnesia + on the other node to see if it also thinks that the network has + been partitioned for a while. If Mnesia on both nodes has logged + <c>mnesia_down</c> entries from each other, Mnesia generates a + system event, called <c>{inconsistent_database, running_partitioned_network, Node}</c> which is sent to Mnesia's + event handler and other possible subscribers. The default event + handler reports an error to the error logger. + </p> + <p>Another occasion when Mnesia may detect that the network has + been partitioned due to a communication failure, is at start-up. + If Mnesia detects that both the local node and another node received + <c>mnesia_down</c> from each other it generates a + <c>{inconsistent_database, starting_partitioned_network, Node}</c> system event and acts as described above. + </p> + <p>If the application detects that there has been a communication + failure which may have caused an inconsistent database, it may + use the function <c>mnesia:set_master_nodes(Tab, Nodes)</c> to + pinpoint from which nodes each table may be loaded.</p> + <p>At start-up Mnesia's normal table load algorithm will be + bypassed and the table will be loaded from one of the master + nodes defined for the table, regardless of potential + <c>mnesia_down</c> entries in the log. The <c>Nodes</c> may only + contain nodes where the table has a replica and if it is empty, + the master node recovery mechanism for the particular table will + be reset and the normal load mechanism will be used when next + restarting. + </p> + <p>The function <c>mnesia:set_master_nodes(Nodes)</c> sets master + nodes for all tables. For each table it will determine its + replica nodes and invoke <c>mnesia:set_master_nodes(Tab, TabNodes)</c> with those replica nodes that are included in the + <c>Nodes</c> list (i.e. <c>TabNodes</c> is the intersection of + <c>Nodes</c> and the replica nodes of the table). If the + intersection is empty the master node recovery mechanism for the + particular table will be reset and the normal load mechanism + will be used at next restart. + </p> + <p>The functions <c>mnesia:system_info(master_node_tables)</c> and + <c>mnesia:table_info(Tab, master_nodes)</c> may be used to + obtain information about the potential master nodes. + </p> + <p>The function <c>mnesia:force_load_table(Tab)</c> may be used to + force load the table regardless of which table load mechanism + is activated. + </p> + </section> + + <section> + <title>Recovery of Transactions</title> + <p>A Mnesia table may reside on one or more nodes. When a table is + updated, Mnesia will ensure that the updates will be replicated + to all nodes where the table resides. If a replica happens to be + inaccessible for some reason (e.g. due to a temporary node down), + Mnesia will then perform the replication later. + </p> + <p>On the node where the application is started, there will be a + transaction coordinator process. If the transaction is + distributed, there will also be a transaction participant process on + all the other nodes where commit work needs to be performed. + </p> + <p>Internally Mnesia uses several commit protocols. The selected + protocol depends on which table that has been updated in + the transaction. If all the involved tables are symmetrically + replicated, (i.e. they all have the same <c>ram_nodes</c>, + <c>disc_nodes</c> and <c>disc_only_nodes</c> currently + accessible from the coordinator node), a lightweight transaction + commit protocol is used. + </p> + <p>The number of messages that the + transaction coordinator and its participants needs to exchange + is few, since Mnesia's table load mechanism takes care of the + transaction recovery if the commit protocol gets + interrupted. Since all involved tables are replicated + symmetrically the transaction will automatically be recovered by + loading the involved tables from the same node at start-up of a + failing node. We do not really care if the transaction was + aborted or committed as long as we can ensure the ACID + properties. The lightweight commit protocol is non-blocking, + i.e. the surviving participants and their coordinator will + finish the transaction, regardless of some node crashes in the + middle of the commit protocol or not. + </p> + <p>If a node goes down in the middle of a dirty operation the + table load mechanism will ensure that the update will be + performed on all replicas or none. Both asynchronous dirty + updates and synchronous dirty updates use the same recovery + principle as lightweight transactions. + </p> + <p>If a transaction involves updates of asymmetrically replicated + tables or updates of the schema table, a heavyweight commit + protocol will be used. The heavyweight commit protocol is able + to finish the transaction regardless of how the tables are + replicated. The typical usage of a heavyweight transaction is + when we want to move a replica from one node to another. Then we + must ensure that the replica either is entirely moved or left as + it was. We must never end up in a situation with replicas on both + nodes or no node at all. Even if a node crashes in the middle of + the commit protocol, the transaction must be guaranteed to be + atomic. The heavyweight commit protocol involves more messages + between the transaction coordinator and its participants than + a lightweight protocol and it will perform recovery work at + start-up in order to finish the abort or commit work. + </p> + <p>The heavyweight commit protocol is also non-blocking, + which allows the surviving participants and their coordinator to + finish the transaction regardless (even if a node crashes in the + middle of the commit protocol). When a node fails at start-up, + Mnesia will determine the outcome of the transaction and + recover it. Lightweight protocols, heavyweight protocols and dirty updates, are + dependent on other nodes to be up and running in order to make the + correct heavyweight transaction recovery decision. + </p> + <p>If Mnesia has not started on some of the nodes that are involved in the + transaction AND neither the local node or any of the already + running nodes know the outcome of the transaction, Mnesia will + by default wait for one. In the worst case scenario all other + involved nodes must start before Mnesia can make the correct decision + about the transaction and finish its start-up. + </p> + <p>This means that Mnesia (on one node)may hang if a double fault occurs, i.e. when two nodes crash simultaneously + and one attempts to start when the other refuses to + start e.g. due to a hardware error. + </p> + <p>It is possible to specify the maximum time that Mnesia + will wait for other nodes to respond with a transaction + recovery decision. The configuration parameter + <c>max_wait_for_decision</c> defaults to infinity (which may + cause the indefinite hanging as mentioned above) but if it is + set to a definite time period (eg.three minutes), Mnesia will then enforce a + transaction recovery decision if needed, in order to allow + Mnesia to continue with its start-up procedure. </p> + <p>The downside of an enforced transaction recovery decision, is that the decision may be + incorrect, due to insufficient information regarding the other nodes' + recovery decisions. This may result in an + inconsistent database where Mnesia has committed the transaction + on some nodes but aborted it on others. </p> + <p>In fortunate cases the inconsistency will only appear in tables belonging to a specific + application, but if a schema transaction has been inconsistently + recovered due to the enforced transaction recovery decision, the + effects of the inconsistency can be fatal. + However, if the higher priority is availability rather than + consistency, then it may be worth the risk. </p> + <p>If Mnesia + encounters a inconsistent transaction decision a + <c>{inconsistent_database, bad_decision, Node}</c> system event + will be generated in order to give the application a chance to + install a fallback or other appropriate measures to resolve the inconsistency. The default + behavior of the Mnesia event handler is the same as if the + database became inconsistent as a result of partitioned network (see + above). + </p> + </section> + + <section> + <title>Backup, Fallback, and Disaster Recovery</title> + <p>The following functions are used to backup data, to install a + backup as fallback, and for disaster recovery. + </p> + <list type="bulleted"> + <item><c>mnesia:backup_checkpoint(Name, Opaque, [Mod])</c>. This + function performs a backup of the tables included in the + checkpoint. + </item> + <item><c>mnesia:backup(Opaque, [Mod])</c>. This function + activates a new checkpoint which covers all Mnesia tables and + performs a backup. It is performed with maximum degree of + redundancy (also refer to the function <seealso marker="#checkpoints">mnesia:activate_checkpoint(Args)</seealso>, + <c>{max, MaxTabs} and {min, MinTabs}).</c></item> + <item><c>mnesia:traverse_backup(Source,[SourceMod,]</c><c>Target,[TargetMod,]Fun,Ac)</c>. This function can be used + to read an existing backup, create a new backup from an + existing one, or to copy a backup from one type media to + another. + </item> + <item><c>mnesia:uninstall_fallback()</c>. This function removes + previously installed fallback files. + </item> + <item><c>mnesia:restore(Opaque, Args)</c>. This function + restores a set of tables from a previous backup. + </item> + <item><c>mnesia:install_fallback(Opaque, [Mod])</c>. This + function can be configured to restart the Mnesia and reload data + tables, and possibly schema tables, from an existing + backup. This function is typically used for disaster recovery + purposes, when data or schema tables are corrupted.</item> + </list> + <p>These functions are explained in the following + sub-sections. Also refer to the the section <seealso marker="#checkpoints">Checkpoints</seealso> in this chapter, which + describes the two functions used to activate and de-activate + checkpoints. + </p> + + <section> + <title>Backup</title> + <p>Backup operation are performed with the following functions: + </p> + <list type="bulleted"> + <item><c>mnesia:backup_checkpoint(Name, Opaque, [Mod])</c></item> + <item><c>mnesia:backup(Opaque, [Mod])</c></item> + <item><c>mnesia:traverse_backup(Source, [SourceMod,],</c><c>Target,[TargetMod,]Fun,Acc)</c>.</item> + </list> + <p>By default, the actual access to the backup media is + performed via the <c>mnesia_backup</c> module for both read + and write. Currently <c>mnesia_backup</c> is implemented with + the standard library module <c>disc_log</c>, but it is possible to write + your own module with the same interface as + <c>mnesia_backup</c> and configure Mnesia so the alternate + module performs the actual accesses to the backup media. This + means that the user may put the backup on medias that Mnesia + does not know about, possibly on hosts where Erlang is not + running. Use the configuration parameter <c><![CDATA[-mnesia backup_module <module>]]></c> for this purpose. </p> + <p>The source + for a backup is an activated checkpoint. The backup function + most commonly used is <c>mnesia:backup_checkpoint(Name, Opaque,[Mod])</c>. This function returns either <c>ok</c>, or + <c>{error,Reason}</c>. It has the following arguments: + </p> + <list type="bulleted"> + <item><c>Name</c> is the name of an activated + checkpoint. Refer to the section <seealso marker="#checkpoints">Checkpoints</seealso> in this chapter, the + function <c>mnesia:activate_checkpoint(ArgList)</c> for + details on how to include table names in checkpoints. + </item> + <item><c>Opaque</c>. Mnesia does not interpret this argument, + but it is forwarded to the backup module. The Mnesia default + backup module, <c>mnesia_backup</c> interprets this argument + as a local file name. + </item> + <item><c>Mod</c>. The name of an alternate backup module. + </item> + </list> + <p>The function <c>mnesia:backup(Opaque[, Mod])</c> activates a + new checkpoint which covers all Mnesia tables with maximum + degree of redundancy and performs a backup. Maximum + redundancy means that each table replica has a checkpoint + retainer. Tables with the <c>local_contents</c> property are + backed up as they + look on the current node. + </p> + <p>It is possible to iterate over a backup, either for the + purpose of transforming it into a new backup, or just reading + it. The function <c>mnesia:traverse_backup(Source, [SourceMod,]</c><c>Target, [TargeMod,] Fun, Acc)</c> which normally returns <c>{ok, LastAcc}</c>, is used for both of these purposes. + </p> + <p>Before the traversal starts, the source backup media is + opened with <c>SourceMod:open_read(Source)</c>, and the target + backup media is opened with + <c>TargetMod:open_write(Target)</c>. The arguments are: + </p> + <list type="bulleted"> + <item><c>SourceMod</c> and <c>TargetMod</c> are module names. + </item> + <item><c>Source</c> and <c>Target</c> are opaque data used + exclusively by the modules <c>SourceMod</c> and + <c>TargetMod</c> for the purpose of initializing the backup + medias. + </item> + <item><c>Acc</c> is an initial accumulator value. + </item> + <item><c>Fun(BackupItems, Acc)</c> is applied to each item in + the backup. The Fun must return a tuple <c>{ValGoodBackupItems, NewAcc}</c>, where <c>ValidBackupItems</c> is a list of valid + backup items, and <c>NewAcc</c> is a new accumulator value. + The <c>ValidBackupItems</c> are written to the target backup + with the function <c>TargetMod:write/2</c>. + </item> + <item><c>LastAcc</c> is the last accumulator value. I.e. + the last <c>NewAcc</c> value that was returned by <c>Fun</c>. + </item> + </list> + <p>It is also possible to perform a read-only traversal of the + source backup without updating a target backup. If + <c>TargetMod==read_only</c>, then no target backup is accessed + at all. + </p> + <p>By setting <c>SourceMod</c> and <c>TargetMod</c> to different + modules it is possible to copy a backup from one kind of backup + media to another. + </p> + <p>Valid <c>BackupItems</c> are the following tuples: + </p> + <list type="bulleted"> + <item><c>{schema, Tab}</c> specifies a table to be deleted. + </item> + <item><c>{schema, Tab, CreateList}</c> specifies a table to be + created. See <c>mnesia_create_table/2</c> for more + information about <c>CreateList</c>. + </item> + <item><c>{Tab, Key}</c> specifies the full identity of a record + to be deleted. + </item> + <item><c>{Record}</c> specifies a record to be inserted. It + can be a tuple with <c>Tab</c> as first field. Note that the + record name is set to the table name regardless of what + <c>record_name</c> is set to. + </item> + </list> + <p>The backup data is divided into two sections. The first + section contains information related to the schema. All schema + related items are tuples where the first field equals the atom + schema. The second section is the record section. It is not + possible to mix schema records with other records and all schema + records must be located first in the backup. + </p> + <p>The schema itself is a table and will possibly be included in + the backup. All nodes where the schema table resides are + regarded as a <c>db_node</c>. + </p> + <p>The following example illustrates how + <c>mnesia:traverse_backup</c> can be used to rename a db_node in + a backup file: + </p> + <codeinclude file="bup.erl" tag="%0" type="erl"></codeinclude> + </section> + + <section> + <title>Restore</title> + <p>Tables can be restored on-line from a backup without + restarting Mnesia. A restore is performed with the function + <c>mnesia:restore(Opaque,Args)</c>, where <c>Args</c> can + contain the following tuples: + </p> + <list type="bulleted"> + <item><c>{module,Mod}</c>. The backup module <c>Mod</c> is + used to access the backup media. If omitted, the default + backup module will be used.</item> + <item><c>{skip_tables, TableList}</c> Where <c>TableList</c> + is a list of tables which should not be read from the backup.</item> + <item><c>{clear_tables, TableList}</c> Where <c>TableList</c> + is a list of tables which should be cleared, before the + records from the backup are inserted, i.e. all records in + the tables are deleted before the tables are restored. + Schema information about the tables is not cleared or read + from backup.</item> + <item><c>{keep_tables, TableList}</c> Where <c>TableList</c> + is a list of tables which should be not be cleared, before + the records from the backup are inserted, i.e. the records + in the backup will be added to the records in the table. + Schema information about the tables is not cleared or read + from backup.</item> + <item><c>{recreate_tables, TableList}</c> Where <c>TableList</c> + is a list of tables which should be re-created, before the + records from the backup are inserted. The tables are first + deleted and then created with the schema information from the + backup. All the nodes in the backup needs to be up and running.</item> + <item><c>{default_op, Operation}</c> Where <c>Operation</c> is + one of the following operations <c>skip_tables</c>, + <c>clear_tables</c>, <c>keep_tables</c> or + <c>recreate_tables</c>. The default operation specifies + which operation should be used on tables from the backup + which are not specified in any of the lists above. + If omitted, the operation <c>clear_tables</c> will be used. </item> + </list> + <p>The argument <c>Opaque</c> is forwarded to the backup module. + It returns <c>{atomic, TabList}</c> if successful, or the + tuple <c>{aborted, Reason}</c> in the case of an error. + <c>TabList</c> is a list of the restored tables. Tables which + are restored are write locked for the duration of the restore + operation. However, regardless of any lock conflict caused by + this, applications can continue to do their work during the + restore operation. + </p> + <p>The restoration is performed as a single transaction. If the + database is very large, it may not be possible to restore it + online. In such a case the old database must be restored by + installing a fallback, and then restart. + </p> + </section> + + <section> + <title>Fallbacks</title> + <p>The function <c>mnesia:install_fallback(Opaque, [Mod])</c> is + used to install a backup as fallback. It uses the backup module + <c>Mod</c>, or the default backup module, to access the backup + media. This function returns <c>ok</c> if successful, or + <c>{error, Reason}</c> in the case of an error. + </p> + <p>Installing a fallback is a distributed operation that is + <em>only</em> performed on all <c>db_nodes</c>. The fallback + is used to restore the database the next time the system is + started. If a Mnesia node with a fallback installed detects that + Mnesia on another node has died for some reason, it will + unconditionally terminate itself. + </p> + <p>A fallback is typically used when a system upgrade is + performed. A system typically involves the installation of new + software versions, and Mnesia tables are often transformed into + new layouts. If the system crashes during an upgrade, it is + highly probable re-installation of the old + applications will be required and restoration of the database + to its previous state. This can be done if a backup is performed and + installed as a fallback before the system upgrade begins. + </p> + <p>If the system upgrade fails, Mnesia must be restarted on all + <c>db_nodes</c> in order to restore the old database. The + fallback will be automatically de-installed after a successful + start-up. The function <c>mnesia:uninstall_fallback()</c> may + also be used to de-install the fallback after a + successful system upgrade. Again, this is a distributed + operation that is either performed on all <c>db_nodes</c>, or + none. Both the installation and de-installation of fallbacks + require Erlang to be up and running on all <c>db_nodes</c>, but + it does not matter if Mnesia is running or not. + </p> + </section> + + <section> + <title>Disaster Recovery</title> + <p>The system may become inconsistent as a result of a power + failure. The UNIX <c>fsck</c> feature can possibly repair the + file system, but there is no guarantee that the file contents + will be consistent. + </p> + <p>If Mnesia detects that a file has not been properly closed, + possibly as a result of a power failure, it will attempt to + repair the bad file in a similar manner. Data may be lost, but + Mnesia can be restarted even if the data is inconsistent. The + configuration parameter <c><![CDATA[-mnesia auto_repair <bool>]]></c> can be + used to control the behavior of Mnesia at start-up. If + <c><![CDATA[<bool>]]></c> has the value <c>true</c>, Mnesia will attempt to + repair the file; if <c><![CDATA[<bool>]]></c> has the value <c>false</c>, + Mnesia will not restart if it detects a suspect file. This + configuration parameter affects the repair behavior of log + files, DAT files, and the default backup media. + </p> + <p>The configuration parameter <c><![CDATA[-mnesia dump_log_update_in_place <bool>]]></c> controls the safety level of + the <c>mnesia:dump_log()</c> function. By default, Mnesia will + dump the transaction log directly into the DAT files. If a power + failure happens during the dump, this may cause the randomly + accessed DAT files to become corrupt. If the parameter is set to + <c>false</c>, Mnesia will copy the DAT files and target the dump + to the new temporary files. If the dump is successful, the + temporary files will be renamed to their normal DAT + suffixes. The possibility for unrecoverable inconsistencies in + the data files will be much smaller with this strategy. On the + other hand, the actual dumping of the transaction log will be + considerably slower. The system designer must decide whether + speed or safety is the higher priority. + </p> + <p>Replicas of type <c>disc_only_copies</c> will only be + affected by this parameter during the initial dump of the log + file at start-up. When designing applications which have + <em>very</em> high requirements, it may be appropriate not to + use <c>disc_only_copies</c> tables at all. The reason for this + is the random access nature of normal operating system files. If + a node goes down for reason for a reason such as a power + failure, these files may be corrupted because they are not + properly closed. The DAT files for <c>disc_only_copies</c> are + updated on a per transaction basis. + </p> + <p>If a disaster occurs and the Mnesia database has been + corrupted, it can be reconstructed from a backup. This should be + regarded as a last resort, since the backup contains old data. The + data is hopefully consistent, but data will definitely be lost + when an old backup is used to restore the database. + </p> + </section> + </section> +</chapter> + diff --git a/lib/mnesia/doc/src/Mnesia_chap8.xml b/lib/mnesia/doc/src/Mnesia_chap8.xml new file mode 100644 index 0000000000..3d2e23cf57 --- /dev/null +++ b/lib/mnesia/doc/src/Mnesia_chap8.xml @@ -0,0 +1,64 @@ +<?xml version="1.0" encoding="latin1" ?> +<!DOCTYPE chapter SYSTEM "chapter.dtd"> + +<chapter> + <header> + <copyright> + <year>1997</year><year>2009</year> + <holder>Ericsson AB. All Rights Reserved.</holder> + </copyright> + <legalnotice> + The contents of this file are subject to the Erlang Public License, + Version 1.1, (the "License"); you may not use this file except in + compliance with the License. You should have received a copy of the + Erlang Public License along with this software. If not, it can be + retrieved online at http://www.erlang.org/. + + Software distributed under the License is distributed on an "AS IS" + basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + the License for the specific language governing rights and limitations + under the License. + + </legalnotice> + + <title>Combining Mnesia with SNMP</title> + <prepared>Claes Wikström, Hans Nilsson and Håkan Mattsson</prepared> + <responsible></responsible> + <docno></docno> + <approved></approved> + <checked></checked> + <date></date> + <rev></rev> + <file>Mnesia_chap8.xml</file> + </header> + + <section> + <title>Combining Mnesia and SNMP </title> + <p>Many telecommunications applications must be controlled and + reconfigured remotely. It is sometimes an advantage to perform + this remote control with an open protocol such as the Simple + Network Management Protocol (SNMP). The alternatives to this would + be: + </p> + <list type="bulleted"> + <item>Not being able to control the application remotely at all. + </item> + <item>Using a proprietary control protocol. + </item> + <item>Using a bridge which maps control messages in a + proprietary protocol to a standardized management protocol and + vice versa. + </item> + </list> + <p>All of these approaches have different advantages and + disadvantages. Mnesia applications can easily be opened to the + SNMP protocol. It is possible to establish a direct one-to-one + mapping between Mnesia tables and SNMP tables. This + means that a Mnesia table can be configured to be <em>both</em> + a Mnesia table and an SNMP table. A number of functions to + control this behavior are described in the Mnesia reference + manual. + </p> + </section> +</chapter> + diff --git a/lib/mnesia/doc/src/book.gif b/lib/mnesia/doc/src/book.gif Binary files differnew file mode 100644 index 0000000000..94b3868792 --- /dev/null +++ b/lib/mnesia/doc/src/book.gif diff --git a/lib/mnesia/doc/src/book.xml b/lib/mnesia/doc/src/book.xml new file mode 100644 index 0000000000..5389e615c7 --- /dev/null +++ b/lib/mnesia/doc/src/book.xml @@ -0,0 +1,48 @@ +<?xml version="1.0" encoding="latin1" ?> +<!DOCTYPE book SYSTEM "book.dtd"> + +<book xmlns:xi="http://www.w3.org/2001/XInclude"> + <header titlestyle="normal"> + <copyright> + <year>1997</year><year>2009</year> + <holder>Ericsson AB. All Rights Reserved.</holder> + </copyright> + <legalnotice> + The contents of this file are subject to the Erlang Public License, + Version 1.1, (the "License"); you may not use this file except in + compliance with the License. You should have received a copy of the + Erlang Public License along with this software. If not, it can be + retrieved online at http://www.erlang.org/. + + Software distributed under the License is distributed on an "AS IS" + basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + the License for the specific language governing rights and limitations + under the License. + + </legalnotice> + + <title>Mnesia</title> + <prepared>Claes Wikström, Hans Nilsson and Håkan Mattsson</prepared> + <docno></docno> + <date>1997-05-27</date> + <rev>1.2</rev> + <file>book.sgml</file> + </header> + <insidecover> + </insidecover> + <pagetext>Mnesia DBMS</pagetext> + <preamble> + <contents level="2"></contents> + </preamble> + <parts lift="no"> + <xi:include href="part.xml"/> + </parts> + <applications> + <xi:include href="ref_man.xml"/> + </applications> + <releasenotes> + <xi:include href="notes.xml"/> + </releasenotes> + <index></index> +</book> + diff --git a/lib/mnesia/doc/src/bup.erl b/lib/mnesia/doc/src/bup.erl new file mode 100644 index 0000000000..b9e541ad6a --- /dev/null +++ b/lib/mnesia/doc/src/bup.erl @@ -0,0 +1,239 @@ +%% ``The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved via the world wide web at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% The Initial Developer of the Original Code is Ericsson Utvecklings AB. +%% Portions created by Ericsson are Copyright 1999, Ericsson Utvecklings +%% AB. All Rights Reserved.'' +%% +%% $Id$ +%% +-module(bup). +-export([ + change_node_name/5, + view/2, + test/0, + test/1 + ]). + +-export([ + count/1, + display/1 + ]). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Management of backups, a few demos + +%0 +change_node_name(Mod, From, To, Source, Target) -> + Switch = + fun(Node) when Node == From -> To; + (Node) when Node == To -> throw({error, already_exists}); + (Node) -> Node + end, + Convert = + fun({schema, db_nodes, Nodes}, Acc) -> + {[{schema, db_nodes, lists:map(Switch,Nodes)}], Acc}; + ({schema, version, Version}, Acc) -> + {[{schema, version, Version}], Acc}; + ({schema, cookie, Cookie}, Acc) -> + {[{schema, cookie, Cookie}], Acc}; + ({schema, Tab, CreateList}, Acc) -> + Keys = [ram_copies, disc_copies, disc_only_copies], + OptSwitch = + fun({Key, Val}) -> + case lists:member(Key, Keys) of + true -> {Key, lists:map(Switch, Val)}; + false-> {Key, Val} + end + end, + {[{schema, Tab, lists:map(OptSwitch, CreateList)}], Acc}; + (Other, Acc) -> + {[Other], Acc} + end, + mnesia:traverse_backup(Source, Mod, Target, Mod, Convert, switched). + +view(Source, Mod) -> + View = fun(Item, Acc) -> + io:format("~p.~n",[Item]), + {[Item], Acc + 1} + end, + mnesia:traverse_backup(Source, Mod, dummy, read_only, View, 0). +%0 + +-record(bup_rec, {key, val}). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Test change of node name +%% +%% Assume db_nodes to be current node and on all other nodes but one +%% Create new schema, start Mnesia on all db_nodes +%% Create a table of disc_copies type which is replicated to all db_nodes +%% Perform a backup and change current node to unused node in backup +%% Start Mnesia on all nodes according to the new set of db_nodes +test() -> + test(nodes()). + +test(Nodes)-> + AllNodes = (Nodes -- [node()]) ++ [node()], + case length(AllNodes) of + Length when Length > 1 -> + OldBup = "old.BUP", + NewBup = "new.BUP", + Res = (catch test2(AllNodes, OldBup, NewBup)), + case Res of + {'EXIT', Reason} -> + file:delete(OldBup), + file:delete(NewBup), + {error, Reason}; + ok -> + ok = count(NewBup), + file:delete(OldBup), + file:delete(NewBup), + ok + end; + _ -> + {error,{"Must run on at least one other node",AllNodes}} + end. + +test2(AllNodes, OldBup, NewBup) -> + ThisNode = node(), + OtherNode = hd(AllNodes -- [ThisNode]), + OldNodes = AllNodes -- [OtherNode], + NewNodes = AllNodes -- [ThisNode], + Mod = mnesia_backup, % Assume local files + file:delete(OldBup), + file:delete(NewBup), + + %% Create old backup + rpc:multicall(AllNodes, mnesia, lkill, []), + ok = mnesia:delete_schema(AllNodes), + ok = mnesia:create_schema(OldNodes), + rpc:multicall(OldNodes, mnesia, start, []), + rpc:multicall(OldNodes, mnesia, wait_for_tables, [[schema], infinity]), + + CreateList = [{disc_copies, OldNodes}, + {attributes, record_info(fields, bup_rec)}], + {atomic, ok} = mnesia:create_table(bup_rec, CreateList), + rpc:multicall(OldNodes, mnesia, wait_for_tables, [[bup_rec], infinity]), + OldRecs = [#bup_rec{key = I, val = I * I} || I <- lists:seq(1, 10)], + lists:foreach(fun(R) -> ok = mnesia:dirty_write(R) end,OldRecs), + ok = mnesia:backup(OldBup, Mod), + ok = mnesia:dirty_write(#bup_rec{key = 4711, val = 4711}), + rpc:multicall(OldNodes, mnesia, stop, []), + {ok,_} = view(OldBup, Mod), + + %% Change node name + {ok,_} = change_node_name(Mod, ThisNode, OtherNode, OldBup, NewBup), + ok = rpc:call(OtherNode, mnesia, install_fallback, [NewBup, Mod]), + {_NewStartRes,[]} = rpc:multicall(NewNodes, mnesia, start, []), + rpc:call(OtherNode, mnesia, wait_for_tables, [[bup_rec], infinity]), + Wild = rpc:call(OtherNode, mnesia, table_info, [bup_rec, wild_pattern]), + NewRecs = rpc:call(OtherNode, mnesia, dirty_match_object, [Wild]), + rpc:multicall(NewNodes, mnesia, stop, []), + {ok,_} = view(NewBup, Mod), + + %% Sanity test + case {lists:sort(OldRecs), lists:sort(NewRecs)} of + {Same, Same} -> ok + end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +-record(state, {counter_tab, size_tab, acc_size = 0, n_records = 0}). + + +%% Iterates over a backup file and shows some statistics +%% The identity of ets table containing the counters is not removed +count(BupFile) -> + CounterTab = ets:new(?MODULE, [set, public]), + SizeTab = ets:new(?MODULE, [set, public]), + Mod = mnesia:system_info(backup_module), + State = #state{counter_tab = CounterTab, size_tab = SizeTab}, + case mnesia:traverse_backup(BupFile, Mod, dummy, read_only, fun incr/2, State) of + {ok, State2} -> + Res = display(State2), + ets:delete(CounterTab), + ets:delete(SizeTab), + Res; + {error, Reason} -> + ets:delete(CounterTab), + ets:delete(SizeTab), + {error, Reason} + end. + +incr(Rec, State) -> + Tab = element(1, Rec), + Key = element(2, Rec), + Oid = {Tab, Key}, + incr_counter(State#state.counter_tab, Oid), + Size = size(term_to_binary(Rec)), + max_size(State#state.size_tab, Tab, Key, Size), + AccSize = State#state.acc_size, + N = State#state.n_records, + State2 = State#state{acc_size = AccSize + Size, n_records = N + 1}, + {[Rec], State2}. + +incr_counter(T, Counter) -> + case catch ets:update_counter(T, Counter, 1) of + {'EXIT', _} -> + ets:insert(T, {Counter, 1}); + _ -> + ignore + end. + +max_size(T, Tab, Key, Size) -> + case catch ets:lookup_element(T, Tab, 2) of + {'EXIT', _} -> + ets:insert(T, {Tab, Size, Key}); + OldSize when OldSize < Size -> + ets:insert(T, {Tab, Size, Key}); + _ -> + ignore + end. + +%% Displays the statistics found in the ets table +display(State) -> + CounterTab = State#state.counter_tab, + Tabs = [T || {{_, T}, _} <- match_tab(CounterTab, schema)], + io:format("~w tables with totally: ~w records, ~w keys, ~w bytes~n", + [length(Tabs), + State#state.n_records, + ets:info(CounterTab, size), + State#state.acc_size]), + display(State, lists:sort(Tabs)). + +display(State, [Tab | Tabs]) -> + Counters = match_tab(State#state.counter_tab, Tab), + io:format("~-10w records in table ~w~n", [length(Counters), Tab]), + Fun = fun({_Oid, Val}) when Val < 5 -> + ignore; + ({Oid, Val}) -> + io:format("~-10w *** records with key ~w~n", [Val, Oid]) + end, + lists:foreach(Fun, Counters), + display_size(State#state.size_tab, Tab), + display(State, Tabs); +display(_CounterTab, []) -> + ok. + +match_tab(T, Tab) -> + ets:match_object(T, {{Tab, '_'}, '_'}). + +display_size(T, Tab) -> + case catch ets:lookup(T, Tab) of + [] -> + ignore; + [{_, Size, Key}] when Size > 1000 -> + io:format("~-10w --- bytes occupied by largest record ~w~n", + [Size, {Tab, Key}]); + [{_, _, _}] -> + ignore + end. diff --git a/lib/mnesia/doc/src/company.erl b/lib/mnesia/doc/src/company.erl new file mode 100644 index 0000000000..28c32ed513 --- /dev/null +++ b/lib/mnesia/doc/src/company.erl @@ -0,0 +1,373 @@ +%% ``The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved via the world wide web at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% The Initial Developer of the Original Code is Ericsson Utvecklings AB. +%% Portions created by Ericsson are Copyright 1999, Ericsson Utvecklings +%% AB. All Rights Reserved.'' +%% +%% $Id$ +%% +-module(company). + +-compile(export_all). + +%0 + +-include_lib("stdlib/include/qlc.hrl"). +-include("company.hrl"). + +init() -> + mnesia:create_table(employee, + [{attributes, record_info(fields, employee)}]), + mnesia:create_table(dept, + [{attributes, record_info(fields, dept)}]), + mnesia:create_table(project, + [{attributes, record_info(fields, project)}]), + mnesia:create_table(manager, [{type, bag}, + {attributes, record_info(fields, manager)}]), + mnesia:create_table(at_dep, + [{attributes, record_info(fields, at_dep)}]), + mnesia:create_table(in_proj, [{type, bag}, + {attributes, record_info(fields, in_proj)}]). + +%0 + +%1 + +insert_emp(Emp, DeptId, ProjNames) -> + Ename = Emp#employee.name, + Fun = fun() -> + mnesia:write(Emp), + AtDep = #at_dep{emp = Ename, dept_id = DeptId}, + mnesia:write(AtDep), + mk_projs(Ename, ProjNames) + end, + mnesia:transaction(Fun). + + +mk_projs(Ename, [ProjName|Tail]) -> + mnesia:write(#in_proj{emp = Ename, proj_name = ProjName}), + mk_projs(Ename, Tail); +mk_projs(_, []) -> ok. + + +%1 + +%2 +females() -> + F = fun() -> + Q = qlc:q([E#employee.name || E <- mnesia:table(employee), + E#employee.sex == female]), + qlc:e(Q) + end, + mnesia:transaction(F). +%2 +%20 +all_females() -> + F = fun() -> + Female = #employee{sex = female, name = '$1', _ = '_'}, + mnesia:select(employee, [{Female, [], ['$1']}]) + end, + mnesia:transaction(F). +%20 + +g() -> l. + +%3 +female_bosses() -> + Q = qlc:q( [{E#employee.name, Boss#employee.name} || + E <- mnesia:table(employee), + Boss <- mnesia:table(employee), + Atdep <- mnesia:table(at_dep), + Mgr <- mnesia:table(manager), + E#employee.sex == female, + Atdep#at_dep.emp == E#employee.emp_no, + Mgr#manager.emp == Boss#employee.emp_no, + Atdep#at_dep.dept_id == Mgr#manager.dept] + ), + mnesia:transaction(fun() -> qlc:e(Q) end). +%3 + +%4 +raise_females(Amount) -> + F = fun() -> + Q = qlc:q([E || E <- mnesia:table(employee), + E#employee.sex == female]), + Fs = qlc:e(Q), + over_write(Fs, Amount) + end, + mnesia:transaction(F). + +over_write([E|Tail], Amount) -> + Salary = E#employee.salary + Amount, + New = E#employee{salary = Salary}, + mnesia:write(New), + 1 + over_write(Tail, Amount); +over_write([], _) -> + 0. +%4 + +%5 +raise(Eno, Raise) -> + F = fun() -> + [E] = mnesia:read(employee, Eno, write), + Salary = E#employee.salary + Raise, + New = E#employee{salary = Salary}, + mnesia:write(New) + end, + mnesia:transaction(F). +%5 + + +%6 +bad_raise(Eno, Raise) -> + F = fun() -> + [E] = mnesia:read({employee, Eno}), + Salary = E#employee.salary + Raise, + New = E#employee{salary = Salary}, + io:format("Trying to write ... ~n", []), + mnesia:write(New) + end, + mnesia:transaction(F). +%6 + +%9 +get_emps(Salary, Dep) -> + Q = qlc:q( + [E || E <- mnesia:table(employee), + At <- mnesia:table(at_dep), + E#employee.salary > Salary, + E#employee.emp_no == At#at_dep.emp, + At#at_dep.dept_id == Dep] + ), + F = fun() -> qlc:e(Q) end, + mnesia:transaction(F). +%9 +%10 +get_emps2(Salary, Dep) -> + Epat = mnesia:table_info(employee, wild_pattern), + Apat = mnesia:table_info(at_dep, wild_pattern), + F = fun() -> + All = mnesia:match_object(Epat), + High = filter(All, Salary), + Alldeps = mnesia:match_object(Apat), + filter_deps(High, Alldeps, Dep) + end, + mnesia:transaction(F). + + +filter([E|Tail], Salary) -> + if + E#employee.salary > Salary -> + [E | filter(Tail, Salary)]; + true -> + filter(Tail, Salary) + end; +filter([], _) -> + []. + +filter_deps([E|Tail], Deps, Dep) -> + case search_deps(E#employee.name, Deps, Dep) of + true -> + [E | filter_deps(Tail, Deps, Dep)]; + false -> + filter_deps(Tail, Deps, Dep) + end; +filter_deps([], _,_) -> + []. + + +search_deps(Name, [D|Tail], Dep) -> + if + D#at_dep.emp == Name, + D#at_dep.dept_id == Dep -> true; + true -> search_deps(Name, Tail, Dep) + end; +search_deps(_Name, _Tail, _Dep) -> + false. + +%10 + + + +%11 +bench1() -> + Me = #employee{emp_no= 104732, + name = klacke, + salary = 7, + sex = male, + phone = 99586, + room_no = {221, 015}}, + + F = fun() -> insert_emp(Me, 'B/DUR', [erlang, mnesia, otp]) end, + T1 = timer:tc(company, dotimes, [1000, F]), + mnesia:add_table_copy(employee, b@skeppet, ram_copies), + mnesia:add_table_copy(at_dep, b@skeppet, ram_copies), + mnesia:add_table_copy(in_proj, b@skeppet, ram_copies), + T2 = timer:tc(company, dotimes, [1000, F]), + {T1, T2}. + +dotimes(0, _) -> + ok; +dotimes(I, F) -> + F(), dotimes(I-1, F). + +%11 + + + + + +%12 + +dist_init() -> + mnesia:create_table(employee, + [{ram_copies, [a@gin, b@skeppet]}, + {attributes, record_info(fields, + employee)}]), + mnesia:create_table(dept, + [{ram_copies, [a@gin, b@skeppet]}, + {attributes, record_info(fields, dept)}]), + mnesia:create_table(project, + [{ram_copies, [a@gin, b@skeppet]}, + {attributes, record_info(fields, project)}]), + mnesia:create_table(manager, [{type, bag}, + {ram_copies, [a@gin, b@skeppet]}, + {attributes, record_info(fields, + manager)}]), + mnesia:create_table(at_dep, + [{ram_copies, [a@gin, b@skeppet]}, + {attributes, record_info(fields, at_dep)}]), + mnesia:create_table(in_proj, + [{type, bag}, + {ram_copies, [a@gin, b@skeppet]}, + {attributes, record_info(fields, in_proj)}]). +%12 + +%13 +remove_proj(ProjName) -> + F = fun() -> + Ip = qlc:e(qlc:q([X || X <- mnesia:table(in_proj), + X#in_proj.proj_name == ProjName] + )), + mnesia:delete({project, ProjName}), + del_in_projs(Ip) + end, + mnesia:transaction(F). + +del_in_projs([Ip|Tail]) -> + mnesia:delete_object(Ip), + del_in_projs(Tail); +del_in_projs([]) -> + done. +%13 + +%14 +sync() -> + case mnesia:wait_for_tables(tabs(), 10000) of + {timeout, RemainingTabs} -> + panic(RemainingTabs); + ok -> + synced + end. + +tabs() -> [employee, dept, project, at_dep, in_proj, manager]. + +%14 + + +find_male_on_second_floor() -> + Select = fun() -> +%21 + MatchHead = #employee{name='$1', sex=male, room_no={'$2', '_'}, _='_'}, + Guard = [{'>=', '$2', 220},{'<', '$2', 230}], + Result = '$1', + mnesia:select(employee,[{MatchHead, Guard, [Result]}]) +%21 + end, + mnesia:transaction(Select). + +panic(X) -> exit({panic, X}). + + +fill_tables() -> + Emps = + [ + {employee, 104465, "Johnson Torbjorn", 1, male, 99184, {242,038}}, + {employee, 107912, "Carlsson Tuula", 2, female,94556, {242,056}}, + {employee, 114872, "Dacker Bjarne", 3, male, 99415, {221,035}}, + {employee, 104531, "Nilsson Hans", 3, male, 99495, {222,026}}, + {employee, 104659, "Tornkvist Torbjorn", 2, male, 99514, {222,022}}, + {employee, 104732, "Wikstrom Claes", 2, male, 99586, {221,015}}, + {employee, 117716, "Fedoriw Anna", 1, female,99143, {221,031}}, + {employee, 115018, "Mattsson Hakan", 3, male, 99251, {203,348}} + ], + + Dept = [ + {dept, 'B/SF', "Open Telecom Platform"}, + {dept, 'B/SFP', "OTP - Product Development"}, + {dept, 'B/SFR', "Computer Science Laboratory"} + ], + + Projects = [ + {project, erlang, 1}, + {project, otp, 2}, + {project, beam, 3}, + {project, mnesia, 5}, + {project, wolf, 6}, + {project, documentation, 7}, + {project, www, 8} + ], + + Manager = [ + {manager, 104465, 'B/SF'}, + {manager, 104465, 'B/SFP'}, + {manager, 114872, 'B/SFR'} + ], + + At_dep = [ + {at_dep, 104465, 'B/SF'}, + {at_dep, 107912, 'B/SF'}, + {at_dep, 114872, 'B/SFR'}, + {at_dep, 104531, 'B/SFR'}, + {at_dep, 104659, 'B/SFR'}, + {at_dep, 104732, 'B/SFR'}, + {at_dep, 117716, 'B/SFP'}, + {at_dep, 115018, 'B/SFP'} + ], + + In_proj = [ + {in_proj, 104465, otp}, + {in_proj, 107912, otp}, + {in_proj, 114872, otp}, + {in_proj, 104531, otp}, + {in_proj, 104531, mnesia}, + {in_proj, 104545, wolf}, + {in_proj, 104659, otp}, + {in_proj, 104659, wolf}, + {in_proj, 104732, otp}, + {in_proj, 104732, mnesia}, + {in_proj, 104732, erlang}, + {in_proj, 117716, otp}, + {in_proj, 117716, documentation}, + {in_proj, 115018, otp}, + {in_proj, 115018, mnesia} + ], + + [mnesia:dirty_write(W) || W <- Emps], + [mnesia:dirty_write(W) || W <- Dept], + [mnesia:dirty_write(W) || W <- Projects], + %% Relations + [mnesia:dirty_write(W) || W <- Manager], + [mnesia:dirty_write(W) || W <- At_dep], + [mnesia:dirty_write(W) || W <- In_proj], + + ok. diff --git a/lib/mnesia/doc/src/company.fig b/lib/mnesia/doc/src/company.fig new file mode 100644 index 0000000000..9d5fcab041 --- /dev/null +++ b/lib/mnesia/doc/src/company.fig @@ -0,0 +1,88 @@ +#FIG 3.1 +Portrait +Center +Inches +1200 2 +6 8550 2700 10950 3150 +2 2 0 1 -1 7 0 0 -1 0.000 0 0 -1 0 0 5 + 8550 2700 10950 2700 10950 3150 8550 3150 8550 2700 +4 0 -1 0 0 0 12 0.0000 4 180 495 8850 3000 Project\001 +-6 +6 4950 2700 7350 3150 +2 2 0 1 -1 7 0 0 -1 0.000 0 0 -1 0 0 5 + 4950 2700 7350 2700 7350 3150 4950 3150 4950 2700 +4 0 -1 0 0 0 12 0.0000 4 180 705 5325 3000 Employee\001 +-6 +6 1275 2775 3675 3225 +2 2 0 1 -1 7 0 0 -1 0.000 0 0 -1 0 0 5 + 1275 2775 3675 2775 3675 3225 1275 3225 1275 2775 +4 0 -1 0 0 0 12 0.0000 4 180 345 1650 3075 Dept\001 +-6 +6 600 4500 2325 5700 +2 1 0 1 -1 7 0 0 -1 0.000 0 0 -1 0 0 5 + 600 5100 1425 5700 2325 5100 1425 4500 600 5100 +4 0 -1 0 0 0 12 0.0000 4 180 630 1125 5175 Manager\001 +-6 +6 9000 4500 10725 5700 +6 9600 5025 10125 5250 +6 9600 5025 10125 5250 +4 0 -1 0 0 0 12 0.0000 4 180 525 9600 5175 In_proj\001 +-6 +-6 +2 1 0 1 -1 7 0 0 -1 0.000 0 0 -1 0 0 5 + 9000 5100 9825 5700 10725 5100 9825 4500 9000 5100 +-6 +6 4125 2100 7950 2700 +1 2 0 1 -1 7 0 0 -1 0.000 1 0.0000 5287 2325 262 225 5025 2100 5550 2550 +1 2 0 1 -1 7 0 0 -1 0.000 1 0.0000 5812 2325 262 225 5550 2100 6075 2550 +1 2 0 1 -1 7 0 0 -1 0.000 1 0.0000 6337 2325 262 225 6075 2100 6600 2550 +1 2 0 1 -1 7 0 0 -1 0.000 1 0.0000 6862 2325 262 225 6600 2100 7125 2550 +1 2 0 1 -1 7 0 0 -1 0.000 1 0.0000 4612 2325 413 225 5025 2100 4200 2550 +1 2 0 1 -1 7 0 0 -1 0.000 1 0.0000 7537 2325 413 225 7950 2100 7125 2550 +2 1 0 1 -1 7 0 0 -1 0.000 0 0 -1 0 0 2 + 4800 2550 5925 2700 +2 1 0 1 -1 7 0 0 -1 0.000 0 0 -1 0 0 2 + 5475 2550 5850 2700 +2 1 0 1 -1 7 0 0 -1 0.000 0 0 -1 0 0 2 + 5850 2550 5850 2700 +2 1 0 1 -1 7 0 0 -1 0.000 0 0 -1 0 0 2 + 6300 2550 5850 2700 +2 1 0 1 -1 7 0 0 -1 0.000 0 0 -1 0 0 2 + 6750 2550 5850 2700 +2 1 0 1 -1 7 0 0 -1 0.000 0 0 -1 0 0 2 + 7275 2550 5925 2700 +4 0 -1 0 0 0 12 0.0000 4 180 3540 4350 2400 emp_no name salary sex phone room_no\001 +-6 +6 3300 4500 5100 5775 +6 3900 5025 4425 5250 +4 0 -1 0 0 0 12 0.0000 4 180 525 3900 5175 At_dep\001 +-6 +2 1 0 1 -1 7 0 0 -1 0.000 0 0 -1 0 0 5 + 3323 5135 4148 5735 5048 5135 4148 4535 3323 5135 +-6 +1 2 0 1 -1 7 0 0 -1 0.000 1 0.0000 1875 2287 600 187 1275 2100 2475 2475 +1 2 0 1 -1 7 0 0 -1 0.000 1 0.0000 3075 2287 600 187 2475 2100 3675 2475 +1 2 0 1 -1 7 0 0 -1 0.000 1 0.0000 8850 2325 450 225 8400 2100 9300 2550 +1 2 0 1 -1 7 0 0 -1 0.000 1 0.0000 9750 2325 450 225 9300 2100 10200 2550 +2 1 0 1 -1 7 0 0 -1 0.000 0 0 -1 0 0 2 + 1575 3225 600 5100 +2 1 0 1 -1 7 0 0 -1 0.000 0 0 -1 0 0 2 + 2325 5100 5250 3150 +2 1 0 1 -1 7 0 0 -1 0.000 0 0 -1 0 0 2 + 5850 3150 5025 5175 +2 1 0 1 -1 7 0 0 -1 0.000 0 0 -1 0 0 2 + 3300 5100 2550 3225 +2 1 0 1 -1 7 0 0 -1 0.000 0 0 -1 0 0 2 + 7050 3150 9000 5100 +2 1 0 1 -1 7 0 0 -1 0.000 0 0 -1 0 0 2 + 10725 5100 9825 3150 +2 1 0 1 -1 7 0 0 -1 0.000 0 0 -1 0 0 2 + 1875 2475 2400 2775 +2 1 0 1 -1 7 0 0 -1 0.000 0 0 -1 0 0 2 + 3075 2475 2400 2775 +2 1 0 1 -1 7 0 0 -1 0.000 0 0 -1 0 0 2 + 9000 2550 9750 2700 +2 1 0 1 -1 7 0 0 -1 0.000 0 0 -1 0 0 2 + 9750 2550 9675 2700 +4 0 -1 0 0 0 12 0.0000 4 135 1500 1575 2325 id name\001 +4 0 -1 0 0 0 12 0.0000 4 135 1635 8775 2400 Name number \001 diff --git a/lib/mnesia/doc/src/company.gif b/lib/mnesia/doc/src/company.gif Binary files differnew file mode 100644 index 0000000000..3cd0185e69 --- /dev/null +++ b/lib/mnesia/doc/src/company.gif diff --git a/lib/mnesia/doc/src/company.hrl b/lib/mnesia/doc/src/company.hrl new file mode 100644 index 0000000000..85e0e6ff12 --- /dev/null +++ b/lib/mnesia/doc/src/company.hrl @@ -0,0 +1,50 @@ +%% ``The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved via the world wide web at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% The Initial Developer of the Original Code is Ericsson Utvecklings AB. +%% Portions created by Ericsson are Copyright 1999, Ericsson Utvecklings +%% AB. All Rights Reserved.'' +%% +%% $Id$ +%% + + +%0 +-record(employee, {emp_no, + name, + salary, + sex, + phone, + room_no}). + +-record(dept, {id, + name}). + +-record(project, {name, + number}). + + +-record(manager, {emp, + dept}). + +-record(at_dep, {emp, + dept_id}). + +-record(in_proj, {emp, + proj_name}). + +%0 + + + + + + diff --git a/lib/mnesia/doc/src/company.ps b/lib/mnesia/doc/src/company.ps new file mode 100644 index 0000000000..64a45d07f3 --- /dev/null +++ b/lib/mnesia/doc/src/company.ps @@ -0,0 +1,213 @@ +%!PS-Adobe-2.0 +%%Title: company.fig +%%Creator: fig2dev Version 3.1 Patchlevel 2 +%%CreationDate: Thu Oct 31 18:09:46 1996 +%%For: klacke@gin (Claes Wikstrom,EUA/SU) +%Magnification: 0.70 +%%Orientation: Portrait +%%BoundingBox: 79 343 516 498 +%%Pages: 1 +%%BeginSetup +%%IncludeFeature: *PageSize A4 +%%EndSetup +%%EndComments +/$F2psDict 200 dict def +$F2psDict begin +$F2psDict /mtrx matrix put +/col-1 {0 setgray} bind def +/col0 {0.000 0.000 0.000 srgb} bind def +/col1 {0.000 0.000 1.000 srgb} bind def +/col2 {0.000 1.000 0.000 srgb} bind def +/col3 {0.000 1.000 1.000 srgb} bind def +/col4 {1.000 0.000 0.000 srgb} bind def +/col5 {1.000 0.000 1.000 srgb} bind def +/col6 {1.000 1.000 0.000 srgb} bind def +/col7 {1.000 1.000 1.000 srgb} bind def +/col8 {0.000 0.000 0.560 srgb} bind def +/col9 {0.000 0.000 0.690 srgb} bind def +/col10 {0.000 0.000 0.820 srgb} bind def +/col11 {0.530 0.810 1.000 srgb} bind def +/col12 {0.000 0.560 0.000 srgb} bind def +/col13 {0.000 0.690 0.000 srgb} bind def +/col14 {0.000 0.820 0.000 srgb} bind def +/col15 {0.000 0.560 0.560 srgb} bind def +/col16 {0.000 0.690 0.690 srgb} bind def +/col17 {0.000 0.820 0.820 srgb} bind def +/col18 {0.560 0.000 0.000 srgb} bind def +/col19 {0.690 0.000 0.000 srgb} bind def +/col20 {0.820 0.000 0.000 srgb} bind def +/col21 {0.560 0.000 0.560 srgb} bind def +/col22 {0.690 0.000 0.690 srgb} bind def +/col23 {0.820 0.000 0.820 srgb} bind def +/col24 {0.500 0.190 0.000 srgb} bind def +/col25 {0.630 0.250 0.000 srgb} bind def +/col26 {0.750 0.380 0.000 srgb} bind def +/col27 {1.000 0.500 0.500 srgb} bind def +/col28 {1.000 0.630 0.630 srgb} bind def +/col29 {1.000 0.750 0.750 srgb} bind def +/col30 {1.000 0.880 0.880 srgb} bind def +/col31 {1.000 0.840 0.000 srgb} bind def + +end +save +55.0 585.5 translate +1 -1 scale + +/cp {closepath} bind def +/ef {eofill} bind def +/gr {grestore} bind def +/gs {gsave} bind def +/sa {save} bind def +/rs {restore} bind def +/l {lineto} bind def +/m {moveto} bind def +/rm {rmoveto} bind def +/n {newpath} bind def +/s {stroke} bind def +/sh {show} bind def +/slc {setlinecap} bind def +/slj {setlinejoin} bind def +/slw {setlinewidth} bind def +/srgb {setrgbcolor} bind def +/rot {rotate} bind def +/sc {scale} bind def +/sd {setdash} bind def +/ff {findfont} bind def +/sf {setfont} bind def +/scf {scalefont} bind def +/sw {stringwidth} bind def +/tr {translate} bind def +/tnt {dup dup currentrgbcolor + 4 -2 roll dup 1 exch sub 3 -1 roll mul add + 4 -2 roll dup 1 exch sub 3 -1 roll mul add + 4 -2 roll dup 1 exch sub 3 -1 roll mul add srgb} + bind def +/shd {dup dup currentrgbcolor 4 -2 roll mul 4 -2 roll mul + 4 -2 roll mul srgb} bind def + /DrawEllipse { + /endangle exch def + /startangle exch def + /yrad exch def + /xrad exch def + /y exch def + /x exch def + /savematrix mtrx currentmatrix def + x y tr xrad yrad sc 0 0 1 startangle endangle arc + closepath + savematrix setmatrix + } def + +/$F2psBegin {$F2psDict begin /$F2psEnteredState save def} def +/$F2psEnd {$F2psEnteredState restore end} def +%%EndProlog + +$F2psBegin +10 setmiterlimit +n 0 842 m 0 0 l 595 0 l 595 842 l cp clip + 0.04200 0.04200 sc +%%Page: 1 1 +7.500 slw +% Polyline +n 8550 2700 m 10950 2700 l 10950 3150 l 8550 3150 l cp gs col-1 s gr +/Times-Roman ff 180.00 scf sf +8850 3000 m +gs 1 -1 sc (Project) col-1 sh gr +% Polyline +n 4950 2700 m 7350 2700 l 7350 3150 l 4950 3150 l cp gs col-1 s gr +/Times-Roman ff 180.00 scf sf +5325 3000 m +gs 1 -1 sc (Employee) col-1 sh gr +% Polyline +n 1275 2775 m 3675 2775 l 3675 3225 l 1275 3225 l cp gs col-1 s gr +/Times-Roman ff 180.00 scf sf +1650 3075 m +gs 1 -1 sc (Dept) col-1 sh gr +% Polyline +n 600 5100 m 1425 5700 l 2325 5100 l 1425 4500 l 600 5100 l gs col-1 s gr +/Times-Roman ff 180.00 scf sf +1125 5175 m +gs 1 -1 sc (Manager) col-1 sh gr +/Times-Roman ff 180.00 scf sf +9600 5175 m +gs 1 -1 sc (In_proj) col-1 sh gr +% Polyline +n 9000 5100 m 9825 5700 l 10725 5100 l 9825 4500 l 9000 5100 l gs col-1 s gr +% Ellipse +n 5287 2325 262 225 0 360 DrawEllipse gs col-1 s gr + +% Ellipse +n 5812 2325 262 225 0 360 DrawEllipse gs col-1 s gr + +% Ellipse +n 6337 2325 262 225 0 360 DrawEllipse gs col-1 s gr + +% Ellipse +n 6862 2325 262 225 0 360 DrawEllipse gs col-1 s gr + +% Ellipse +n 4612 2325 413 225 0 360 DrawEllipse gs col-1 s gr + +% Ellipse +n 7537 2325 413 225 0 360 DrawEllipse gs col-1 s gr + +% Polyline +n 4800 2550 m 5925 2700 l gs col-1 s gr +% Polyline +n 5475 2550 m 5850 2700 l gs col-1 s gr +% Polyline +n 5850 2550 m 5850 2700 l gs col-1 s gr +% Polyline +n 6300 2550 m 5850 2700 l gs col-1 s gr +% Polyline +n 6750 2550 m 5850 2700 l gs col-1 s gr +% Polyline +n 7275 2550 m 5925 2700 l gs col-1 s gr +/Times-Roman ff 180.00 scf sf +4350 2400 m +gs 1 -1 sc (emp_no name salary sex phone room_no) col-1 sh gr +/Times-Roman ff 180.00 scf sf +3900 5175 m +gs 1 -1 sc (At_dep) col-1 sh gr +% Polyline +n 3323 5135 m 4148 5735 l 5048 5135 l 4148 4535 l 3323 5135 l gs col-1 s gr +% Ellipse +n 1875 2287 600 187 0 360 DrawEllipse gs col-1 s gr + +% Ellipse +n 3075 2287 600 187 0 360 DrawEllipse gs col-1 s gr + +% Ellipse +n 8850 2325 450 225 0 360 DrawEllipse gs col-1 s gr + +% Ellipse +n 9750 2325 450 225 0 360 DrawEllipse gs col-1 s gr + +% Polyline +n 1575 3225 m 600 5100 l gs col-1 s gr +% Polyline +n 2325 5100 m 5250 3150 l gs col-1 s gr +% Polyline +n 5850 3150 m 5025 5175 l gs col-1 s gr +% Polyline +n 3300 5100 m 2550 3225 l gs col-1 s gr +% Polyline +n 7050 3150 m 9000 5100 l gs col-1 s gr +% Polyline +n 10725 5100 m 9825 3150 l gs col-1 s gr +% Polyline +n 1875 2475 m 2400 2775 l gs col-1 s gr +% Polyline +n 3075 2475 m 2400 2775 l gs col-1 s gr +% Polyline +n 9000 2550 m 9750 2700 l gs col-1 s gr +% Polyline +n 9750 2550 m 9675 2700 l gs col-1 s gr +/Times-Roman ff 180.00 scf sf +1575 2325 m +gs 1 -1 sc ( id name) col-1 sh gr +/Times-Roman ff 180.00 scf sf +8775 2400 m +gs 1 -1 sc (Name number ) col-1 sh gr +showpage +$F2psEnd +rs diff --git a/lib/mnesia/doc/src/company_o.erl b/lib/mnesia/doc/src/company_o.erl new file mode 100644 index 0000000000..3c7ad0d5e5 --- /dev/null +++ b/lib/mnesia/doc/src/company_o.erl @@ -0,0 +1,144 @@ +%% ``The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved via the world wide web at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% The Initial Developer of the Original Code is Ericsson Utvecklings AB. +%% Portions created by Ericsson are Copyright 1999, Ericsson Utvecklings +%% AB. All Rights Reserved.'' +%% +%% $Id$ +%% +-module(company_o). +-compile(export_all). + +-import(mnesia, [transaction/1]). + +%0 +-include_lib("stdlib/include/qlc.hrl"). +-include("company_o.hrl"). + + +sinit() -> + mnesia:create_schema([node()]). + +init() -> + mnesia:create_table(employee, + [{attributes, record_info(fields, employee)}]), + mnesia:create_table(dept, + [{attributes, record_info(fields, dept)}]), + mnesia:create_table(project, + [{attributes, record_info(fields, project)}]). + +%0 + + + +%1 + +insert_emp(Emp, DeptId, ProjNames) -> + Fun = fun() -> + mnesia:write(Emp#employee{dept = DeptId, + projects = ProjNames}) + end, + mnesia:transaction(Fun). + + +%1 + +%2 +females() -> + F = fun() -> + Q = qlc:q([E#employee.name || E <- mnesia:table(employee), + E#employee.sex == female]), + qlc:e(Q) + end, + mnesia:transaction(F). +%2 + +%3 +female_bosses() -> + F = fun() -> qlc:e(qlc:q( + [{E#employee.name, Boss#employee.name} || + E <- mnesia:table(employee), + Boss <- mnesia:table(employee), + Boss#employee.emp_no == E#employee.manager, + E#employee.sex == female] + )) + end, + mnesia:transaction(F). + + +%4 +raise_females(Amount) -> + F = fun() -> + Q = qlc:q([E || E <- mnesia:table(employee), + E#employee.sex == female]), + Fs = qlc:e(Q), + over_write(Fs, Amount) + end, + mnesia:transaction(F). + +over_write([E|Tail], Amount) -> + Salary = E#employee.salary + Amount, + New = E#employee{salary = Salary}, + mnesia:write(New), + 1 + over_write(Tail, Amount); +over_write([], _) -> + 0. +%4 + +%5 +raise(Eno, Raise) -> + F = fun() -> + [E] = mnesia:read({employee, Eno}), + Salary = E#employee.salary + Raise, + New = E#employee{salary = Salary}, + mnesia:write(New) + end, + mnesia:transaction(F). +%5 + + +%6 +bad_raise(Eno, Raise) -> + F = fun() -> + [E] = mnesia:read({employee, Eno}), + Salary = E#employee.salary + Raise, + New = E#employee{salary = Salary}, + io:format("Trying to write ... ~n", []), + mnesia:write(New) + end, + mnesia:transaction(F). +%6 + +%9 +get_emps(Salary, Dep) -> + Q = qlc:q( + [E || E <- mnesia:table(employee), + E#employee.salary > Salary, + E#employee.dept == Dep] + ), + F = fun() -> qlc:e(Q) end, + transaction(F). +%9 + +%10 +get_emps2(Salary, Dep) -> + Epat0 = mnesia:table_info(employee, wild_pattern), + Epat = Epat0#employee{dept = Dep}, + F = fun() -> + All = mnesia:match_object(Epat), + [E || E <-All, E#employee.salary > Salary ] + end, + mnesia:transaction(F). + + +%10 + diff --git a/lib/mnesia/doc/src/company_o.hrl b/lib/mnesia/doc/src/company_o.hrl new file mode 100644 index 0000000000..d8b584c296 --- /dev/null +++ b/lib/mnesia/doc/src/company_o.hrl @@ -0,0 +1,38 @@ +%% ``The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved via the world wide web at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% The Initial Developer of the Original Code is Ericsson Utvecklings AB. +%% Portions created by Ericsson are Copyright 1999, Ericsson Utvecklings +%% AB. All Rights Reserved.'' +%% +%% $Id$ +%% + +%0 +-record(employee, {emp_no, + name, + salary, + sex, + phone, + room_no, + dept, + projects, + manager}). + + +-record(dept, {id, + name}). + +-record(project, {name, + number, + location}). + +%0 diff --git a/lib/mnesia/doc/src/fascicules.xml b/lib/mnesia/doc/src/fascicules.xml new file mode 100644 index 0000000000..0678195e07 --- /dev/null +++ b/lib/mnesia/doc/src/fascicules.xml @@ -0,0 +1,18 @@ +<?xml version="1.0" encoding="latin1" ?> +<!DOCTYPE fascicules SYSTEM "fascicules.dtd"> + +<fascicules> + <fascicule file="part" href="part_frame.html" entry="no"> + User's Guide + </fascicule> + <fascicule file="ref_man" href="ref_man_frame.html" entry="yes"> + Reference Manual + </fascicule> + <fascicule file="part_notes" href="part_notes_frame.html" entry="no"> + Release Notes + </fascicule> + <fascicule file="" href="../../../../doc/print.html" entry="no"> + Off-Print + </fascicule> +</fascicules> + diff --git a/lib/mnesia/doc/src/make.dep b/lib/mnesia/doc/src/make.dep new file mode 100644 index 0000000000..6e79484cb3 --- /dev/null +++ b/lib/mnesia/doc/src/make.dep @@ -0,0 +1,46 @@ +# ---------------------------------------------------- +# >>>> Do not edit this file <<<< +# This file was automaticly generated by +# /home/otp/bin/docdepend +# ---------------------------------------------------- + + +# ---------------------------------------------------- +# TeX files that the DVI file depend on +# ---------------------------------------------------- + +book.dvi: Mnesia_App_A.tex Mnesia_App_B.tex Mnesia_App_C.tex \ + Mnesia_App_D.tex Mnesia_chap1.tex Mnesia_chap2.tex \ + Mnesia_chap3.tex Mnesia_chap4.tex Mnesia_chap5.tex \ + Mnesia_chap7.tex Mnesia_chap8.tex book.tex \ + mnesia.tex mnesia_frag_hash.tex mnesia_registry.tex \ + part.tex ref_man.tex + +# ---------------------------------------------------- +# Source inlined when transforming from source to LaTeX +# ---------------------------------------------------- + +Mnesia_App_B.tex: ../../src/mnesia_backup.erl + +Mnesia_App_C.tex: ../../src/mnesia_frag.erl + +Mnesia_App_D.tex: ../../src/mnesia_frag_hash.erl + +Mnesia_chap2.tex: company.erl company.hrl + +Mnesia_chap3.tex: company.erl + +Mnesia_chap4.tex: company.erl + +Mnesia_chap5.tex: FRUITS company.erl company_o.erl company_o.hrl + +Mnesia_chap7.tex: bup.erl + +book.tex: ref_man.xml + +# ---------------------------------------------------- +# Pictures that the DVI file depend on +# ---------------------------------------------------- + +book.dvi: company.ps + diff --git a/lib/mnesia/doc/src/mnesia.gif b/lib/mnesia/doc/src/mnesia.gif Binary files differnew file mode 100644 index 0000000000..fbbabee5aa --- /dev/null +++ b/lib/mnesia/doc/src/mnesia.gif diff --git a/lib/mnesia/doc/src/mnesia.xml b/lib/mnesia/doc/src/mnesia.xml new file mode 100644 index 0000000000..3484cd104a --- /dev/null +++ b/lib/mnesia/doc/src/mnesia.xml @@ -0,0 +1,3100 @@ +<?xml version="1.0" encoding="latin1" ?> +<!DOCTYPE erlref SYSTEM "erlref.dtd"> + +<erlref> + <header> + <copyright> + <year>1996</year><year>2009</year> + <holder>Ericsson AB. All Rights Reserved.</holder> + </copyright> + <legalnotice> + The contents of this file are subject to the Erlang Public License, + Version 1.1, (the "License"); you may not use this file except in + compliance with the License. You should have received a copy of the + Erlang Public License along with this software. If not, it can be + retrieved online at http://www.erlang.org/. + + Software distributed under the License is distributed on an "AS IS" + basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + the License for the specific language governing rights and limitations + under the License. + + </legalnotice> + + <title>mnesia</title> + <prepared>Claes Wikström and Håkan Mattsson</prepared> + <responsible></responsible> + <docno></docno> + <approved></approved> + <checked></checked> + <date></date> + <rev></rev> + <file></file> + </header> + <module>mnesia</module> + <modulesummary>A Distributed Telecommunications DBMS </modulesummary> + <description> + <p><c>Mnesia</c> is a distributed DataBase Management System (DBMS), + appropriate for telecommunications applications and other Erlang + applications which require continuous operation and exhibit soft + real-time properties. + </p> + <p>Listed below are some of the most important and attractive capabilities, Mnesia provides: + </p> + <list type="bulleted"> + <item> + <p>A relational/object hybrid data model which is + suitable for telecommunications applications. + </p> + </item> + <item> + <p>A specifically designed DBMS query language, QLC (as an add-on library). + </p> + </item> + <item> + <p>Persistence. Tables may be coherently kept on disc as + well as in main memory. + </p> + </item> + <item> + <p>Replication. Tables may be replicated at several nodes. + </p> + </item> + <item> + <p>Atomic transactions. A series of table manipulation + operations can be grouped into a single atomic + transaction. + </p> + </item> + <item> + <p>Location transparency. Programs can be written without + knowledge of the actual location of data. + </p> + </item> + <item> + <p>Extremely fast real time data searches. + </p> + </item> + <item> + <p>Schema manipulation routines. It is possible to + reconfigure the DBMS at runtime without stopping the + system. + </p> + </item> + </list> + <p>This Reference Manual describes the Mnesia API. This includes + functions used to define and manipulate Mnesia tables. + </p> + <p>All functions documented in these pages can be used in any + combination with queries using the list comprehension notation. The + query notation is described in the QLC's man page. + </p> + <p>Data in Mnesia is organized as a set of tables. Each table + has a name which must be an atom. Each table is made up of + Erlang records. The user is responsible for the record + definitions. Each table also has a set of properties. Below + are some of the properties that are associated with each + table: + </p> + <list type="bulleted"> + <item> + <p><c>type</c>. Each table can either have 'set', + 'ordered_set' or 'bag' semantics. Note: currently 'ordered_set' + is not supported for 'disc_only_copies'. If a table is of type + 'set' it means that each key leads to either one or zero + records. <br></br> +If a new item is inserted with the same key as + an existing record, the old record is overwritten. On the + other hand, if a table is of type 'bag', each key can map to + several records. However, all records in type bag tables are + unique, only the keys may be duplicated. + </p> + </item> + <item> + <p><c>record_name</c>. All records stored in a table must + have the same name. You may say that the records must be + instances of the same record type. + </p> + </item> + <item> + <p><c>ram_copies</c> A table can be replicated on a number + of Erlang nodes. The <c>ram_copies</c> property specifies a + list of Erlang nodes where RAM copies are kept. These + copies can be dumped to disc at regular intervals. However, + updates to these copies are not written to disc on a + transaction basis. + </p> + </item> + <item> + <p><c>disc_copies</c> The <c>disc_copies</c> property + specifies a list of Erlang nodes where the table is kept in + RAM as well as on disc. All updates of the table are + performed on the actual table and are also logged to disc. + If a table is of type <c>disc_copies</c> at a certain node, + it means that the entire table is resident in RAM memory as + well as on disc. Each transaction performed on the table is + appended to a LOG file as well as written into the RAM + table. + </p> + </item> + <item> + <p><c>disc_only_copies</c> Some, or all, table replicas + can be kept on disc only. These replicas are considerably + slower than the RAM based replicas. + </p> + </item> + <item> + <p><c>index</c> This is a list of attribute names, or + integers, which specify the tuple positions on which + Mnesia shall build and maintain an extra index table. + </p> + </item> + <item> + <p><c>local_content</c> When an application requires + tables whose contents is local to each node, + <c>local_content</c> tables may be used. The name of the + table is known to all Mnesia nodes, but its contents is + unique on each node. This means that access to such a table + must be done locally. Set the <c>local_content</c> field to + <c>true</c> if you want to enable the <c>local_content</c> + behavior. The default is <c>false</c>. + </p> + </item> + <item> + <p><c>snmp</c> Each (set based) Mnesia table can be + automatically turned into an SNMP ordered table as well. + This property specifies the types of the SNMP keys. + </p> + </item> + <item> + <p><c>attributes</c>. The names of the attributes for the + records that are inserted in the table. + </p> + </item> + </list> + <p>See <c>mnesia:create_table/2</c> about the complete set of + table properties and their details. + </p> + <p>This document uses a table of persons to illustrate various + examples. The following record definition is assumed: + </p> + <code type="none"> +-record(person, {name, + age = 0, + address = unknown, + salary = 0, + children = []}), + </code> + <p>The first attribute of the record is the primary key, or key + for short. + </p> + <p>The function descriptions are sorted in alphabetic order. <em>Hint:</em> + start to read about <c>mnesia:create_table/2</c>, + <c>mnesia:lock/2</c> and <c>mnesia:activity/4</c> before you continue on + and learn about the rest. + </p> + <p>Writing or deleting in transaction context creates a local copy + of each modified record during the transaction. During iteration, + i.e. <c>mnesia:fold[lr]/4</c> <c>mnesia:next/2</c> <c>mnesia:prev/2</c> + <c>mnesia:snmp_get_next_index/2</c>, mnesia will compensate for + every written or deleted record, which may reduce the + performance. If possible avoid writing or deleting records in + the same transaction before iterating over the table. + </p> + </description> + <funcs> + <func> + <name>abort(Reason) -> transaction abort </name> + <fsummary>Abort the current transaction.</fsummary> + <desc> + <p>Makes the transaction silently + return the tuple <c>{aborted, Reason}</c>. + The abortion of a Mnesia transaction means that + an exception will be thrown to an enclosing <c>catch</c>. + Thus, the expression <c>catch mnesia:abort(x)</c> does + not abort the transaction. </p> + </desc> + </func> + <func> + <name>activate_checkpoint(Args) -> {ok,Name,Nodes} | {error,Reason}</name> + <fsummary>Activate a checkpoint.</fsummary> + <desc> + <p>A checkpoint is a consistent view of the system. + A checkpoint can be activated on a set of tables. + This checkpoint can then be traversed and will + present a view of the system as it existed at the time when + the checkpoint was activated, even if the tables are being or have been + manipulated. + </p> + <p><c>Args</c> is a list of the following tuples: + </p> + <list type="bulleted"> + <item> + <p><c>{name,Name}</c>. <c>Name</c> of checkpoint. Each + checkpoint must have a name which is unique to the + associated nodes. The name can be reused only once the + checkpoint has been deactivated. By default, a name + which is probably unique is generated. + </p> + </item> + <item> + <p><c>{max,MaxTabs}</c><c>MaxTabs</c> is a list of + tables that should be included in the checkpoint. The + default is []. For these tables, the redundancy will be + maximized and checkpoint information will be retained together + with all replicas. The checkpoint becomes more fault + tolerant if the tables have several replicas. When a new + replica is added by means of the schema manipulation + function <c>mnesia:add_table_copy/3</c>, a retainer will + also be attached automatically. + </p> + </item> + <item> + <p><c>{min,MinTabs}</c>. <c>MinTabs</c> is a list of + tables that should be included in the checkpoint. The + default is []. For these tables, the redundancy will be + minimized and the checkpoint information will only be retained + with one replica, preferably on the local node. + </p> + </item> + <item> + <p><c>{allow_remote,Bool}</c>. <c>false</c> means that + all retainers must be local. The checkpoint cannot be + activated if a table does not reside locally. + <c>true</c> allows retainers to be allocated on any + node. Default is set to <c>true</c>. + </p> + </item> + <item> + <p><c>{ram_overrides_dump,Bool} </c> Only applicable + for <c>ram_copies</c>. <c>Bool</c> allows you to choose + to backup the table state as it is in RAM, or as it is on + disc. <c>true</c> means that the latest committed + records in RAM should be included in the checkpoint. + These are the records that the application accesses. + <c>false</c> means that the records dumped to DAT files + should be included in the checkpoint. These are the + records that will be loaded at startup. Default is + <c>false</c>. + </p> + </item> + </list> + <p>Returns <c>{ok,Name,Nodes}</c> or <c>{error,Reason}</c>. + <c>Name</c> is the (possibly generated) name of the + checkpoint. <c>Nodes</c> are the nodes that + are involved in the checkpoint. Only nodes that keep a + checkpoint retainer know about the checkpoint. + </p> + </desc> + </func> + <func> + <name>activity(AccessContext, Fun [, Args]) -> ResultOfFun | exit(Reason)</name> + <fsummary>Execute <c>Fun</c>in <c>AccessContext</c>.</fsummary> + <desc> + <p>Invokes <c>mnesia:activity(AccessContext, Fun, Args, AccessMod)</c> where <c>AccessMod</c> is the default + access callback module obtained by + <c>mnesia:system_info(access_module)</c>. <c>Args</c> + defaults to the empty list <c>[]</c>.</p> + </desc> + </func> + <func> + <name>activity(AccessContext, Fun, Args, AccessMod) -> ResultOfFun | exit(Reason)</name> + <fsummary>Execute <c>Fun</c>in <c>AccessContext</c>.</fsummary> + <desc> + <p>This function executes the functional object <c>Fun</c> + with the arguments <c>Args</c>. + </p> + <p>The code which executes inside the activity can + consist of a series of table manipulation functions, which is + performed in a <c>AccessContext</c>. Currently, the following + access contexts are supported: + </p> + <taglist> + <tag><c>transaction</c></tag> + <item> + <p>Short for <c>{transaction, infinity}</c></p> + </item> + <tag><c>{transaction, Retries}</c></tag> + <item> + <p>Invokes <c>mnesia:transaction(Fun, Args, Retries)</c>. Note that the result from the <c>Fun</c> is + returned if the transaction was successful (atomic), + otherwise the function exits with an abort reason. + </p> + </item> + <tag><c>sync_transaction</c></tag> + <item> + <p>Short for <c>{sync_transaction, infinity}</c></p> + </item> + <tag><c>{sync_transaction, Retries}</c></tag> + <item> + <p>Invokes <c>mnesia:sync_transaction(Fun, Args, Retries)</c>. Note that the result from the <c>Fun</c> is + returned if the transaction was successful (atomic), + otherwise the function exits with an abort reason. + </p> + </item> + <tag><c>async_dirty</c></tag> + <item> + <p>Invokes <c>mnesia:async_dirty(Fun, Args)</c>. + </p> + </item> + <tag><c>sync_dirty</c></tag> + <item> + <p>Invokes <c>mnesia:sync_dirty(Fun, Args)</c>. + </p> + </item> + <tag><c>ets</c></tag> + <item> + <p>Invokes <c>mnesia:ets(Fun, Args)</c>. + </p> + </item> + </taglist> + <p>This function (<c>mnesia:activity/4</c>) differs in an + important aspect from the <c>mnesia:transaction</c>, + <c>mnesia:sync_transaction</c>, + <c>mnesia:async_dirty</c>, <c>mnesia:sync_dirty</c> and + <c>mnesia:ets</c> functions. The <c>AccessMod</c> argument + is the name of a callback module which implements the + <c>mnesia_access</c> behavior. + </p> + <p>Mnesia will forward calls to the following functions: + </p> + <list type="bulleted"> + <item> + <p>mnesia:write/3 (write/1, s_write/1)</p> + </item> + <item> + <p>mnesia:delete/3 (delete/1, s_delete/1)</p> + </item> + <item> + <p>mnesia:delete_object/3 (delete_object/1, s_delete_object/1)</p> + </item> + <item> + <p>mnesia:read/3 (read/1, wread/1)</p> + </item> + <item> + <p>mnesia:match_object/3 (match_object/1)</p> + </item> + <item> + <p>mnesia:all_keys/1</p> + </item> + <item> + <p>mnesia:first/1</p> + </item> + <item> + <p>mnesia:last/1</p> + </item> + <item> + <p>mnesia:prev/2</p> + </item> + <item> + <p>mnesia:next/2</p> + </item> + <item> + <p>mnesia:index_match_object/4 (index_match_object/2)</p> + </item> + <item> + <p>mnesia:index_read/3</p> + </item> + <item> + <p>mnesia:lock/2 (read_lock_table/1, write_lock_table/1)</p> + </item> + <item> + <p>mnesia:table_info/2</p> + </item> + </list> + <p>to the corresponding: + </p> + <list type="bulleted"> + <item> + <p>AccessMod:lock(ActivityId, Opaque, LockItem, LockKind)</p> + </item> + <item> + <p>AccessMod:write(ActivityId, Opaque, Tab, Rec, LockKind)</p> + </item> + <item> + <p>AccessMod:delete(ActivityId, Opaque, Tab, Key, LockKind)</p> + </item> + <item> + <p>AccessMod:delete_object(ActivityId, Opaque, Tab, RecXS, LockKind)</p> + </item> + <item> + <p>AccessMod:read(ActivityId, Opaque, Tab, Key, LockKind)</p> + </item> + <item> + <p>AccessMod:match_object(ActivityId, Opaque, Tab, Pattern, LockKind)</p> + </item> + <item> + <p>AccessMod:all_keys(ActivityId, Opaque, Tab, LockKind)</p> + </item> + <item> + <p>AccessMod:first(ActivityId, Opaque, Tab)</p> + </item> + <item> + <p>AccessMod:last(ActivityId, Opaque, Tab)</p> + </item> + <item> + <p>AccessMod:prev(ActivityId, Opaque, Tab, Key)</p> + </item> + <item> + <p>AccessMod:next(ActivityId, Opaque, Tab, Key)</p> + </item> + <item> + <p>AccessMod:index_match_object(ActivityId, Opaque, Tab, Pattern, Attr, LockKind)</p> + </item> + <item> + <p>AccessMod:index_read(ActivityId, Opaque, Tab, SecondaryKey, Attr, LockKind)</p> + </item> + <item> + <p>AccessMod:table_info(ActivityId, Opaque, Tab, InfoItem)</p> + </item> + </list> + <p>where <c>ActivityId</c> is a record which represents the + identity of the enclosing Mnesia activity. The first field + (obtained with <c>element(1, ActivityId)</c> contains an + atom which may be interpreted as the type of the activity: + <c>'ets'</c>, <c>'async_dirty'</c>, <c>'sync_dirty'</c> or + <c>'tid'</c>. <c>'tid'</c> means that the activity is a + transaction. The structure of the rest of the identity + record is internal to Mnesia. + </p> + <p><c>Opaque</c> is an opaque data structure which is internal + to Mnesia.</p> + </desc> + </func> + <func> + <name>add_table_copy(Tab, Node, Type) -> {aborted, R} | {atomic, ok}</name> + <fsummary>Copy a table to a remote node.</fsummary> + <desc> + <p>This function makes another copy of a table at the + node <c>Node</c>. The <c>Type</c> argument must be + either of the atoms <c>ram_copies</c>, <c>disc_copies</c>, + or + <c>disc_only_copies</c>. For example, the following call + ensures that a disc replica of the <c>person</c> table also + exists at node <c>Node</c>.</p> + <code type="none"> +mnesia:add_table_copy(person, Node, disc_copies) + </code> + <p>This function can also be used to add a replica of the + table named <c>schema</c>.</p> + </desc> + </func> + <func> + <name>add_table_index(Tab, AttrName) -> {aborted, R} | {atomic, ok}</name> + <fsummary>Create an index for a table. </fsummary> + <desc> + <p>Table indices can and should be used whenever the user + wants to frequently use some other field than the key field + to look up records. If this other field has an index + associated with it, these lookups can occur in constant time + and space. For example, if our application wishes to use + the age field of persons to efficiently find all person with + a specific age, it might be a good idea to have an index on + the age field. This can be accomplished with the following + call:</p> + <code type="none"> +mnesia:add_table_index(person, age) + </code> + <p>Indices do not come free, they occupy space which is + proportional to the size of the table. They also cause insertions + into the table to execute slightly slower. </p> + </desc> + </func> + <func> + <name>all_keys(Tab) -> KeyList | transaction abort</name> + <fsummary>Return all keys in a table.</fsummary> + <desc> + <p>This function returns a list of all keys in the table + named <c>Tab</c>. The semantics of this function is context + sensitive. See <c>mnesia:activity/4</c> for more information. In + transaction context it acquires a read lock on the entire + table.</p> + </desc> + </func> + <func> + <name>async_dirty(Fun, [, Args]) -> ResultOfFun | exit(Reason)</name> + <fsummary>Call the Fun in a context which is not protected by a transaction.</fsummary> + <desc> + <p>Call the <c>Fun</c> in a context which is not protected + by a transaction. The Mnesia function calls performed in the + <c>Fun</c> are mapped to the corresponding dirty + functions. This still involves logging, replication and + subscriptions, but there is no locking, local transaction + storage, or commit protocols involved. Checkpoint retainers + and indices are updated, but they will be updated dirty. As + for normal mnesia:dirty_* operations, the operations are + performed semi-asynchronously. See + <c>mnesia:activity/4</c> and the Mnesia User's Guide for + more details. + </p> + <p>It is possible to manipulate the Mnesia tables without + using transactions. This has some serious disadvantages, but + is considerably faster since the transaction manager is not + involved and no locks are set. A dirty operation does, + however, guarantee a certain level of consistency and it is + not possible for the dirty operations to return garbled + records. All dirty operations provide location transparency + to the programmer and a program does not have to be aware of + the whereabouts of a certain table in order to function. + </p> + <p><em>Note:</em>It is more than 10 times more efficient to read records dirty + than within a transaction. + </p> + <p>Depending on the application, it may be a good idea to use + the dirty functions for certain operations. Almost all + Mnesia functions which can be called within transactions + have a dirty equivalent which is much more + efficient. However, it must be noted that it is possible for + the database to be left in an inconsistent state if dirty + operations are used to update it. Dirty operations should + only be used for performance reasons when it is absolutely + necessary. </p> + <p><em>Note:</em> Calling (nesting) a <c>mnesia:[a]sync_dirty</c> + inside a transaction context will inherit the transaction semantics. + </p> + </desc> + </func> + <func> + <name>backup(Opaque [, BackupMod]) -> ok | {error,Reason}</name> + <fsummary>Back up all tables in the database.</fsummary> + <desc> + <p>Activates a new checkpoint covering all Mnesia tables, + including the schema, with maximum degree of redundancy and + performs a backup using <c>backup_checkpoint/2/3</c>. The + default value of the backup callback module <c>BackupMod</c> + is obtained by <c>mnesia:system_info(backup_module)</c>.</p> + </desc> + </func> + <func> + <name>backup_checkpoint(Name, Opaque [, BackupMod]) -> ok | {error,Reason}</name> + <fsummary>Back up all tables in a checkpoint.</fsummary> + <desc> + <p>The tables are backed up to external media using the backup + module <c>BackupMod</c>. Tables with the local contents + property is being backed up as they exist on the current + node. <c>BackupMod</c> is the default backup callback + module obtained by + <c>mnesia:system_info(backup_module)</c>. See the User's + Guide about the exact callback interface (the + <c>mnesia_backup behavior</c>).</p> + </desc> + </func> + <func> + <name>change_config(Config, Value) -> {error, Reason} | {ok, ReturnValue}</name> + <fsummary>Change a configuration parameter.</fsummary> + <desc> + <p>The <c>Config</c> should be an atom of the following + configuration parameters: </p> + <taglist> + <tag><c>extra_db_nodes</c></tag> + <item> + <p><c>Value</c> is a list of nodes which Mnesia should try to connect to. + The <c>ReturnValue</c> will be those nodes in + <c>Value</c> that Mnesia are connected to. + <br></br> +Note: This function shall only be used to connect to newly started ram nodes + (N.D.R.S.N.) with an empty schema. If for example it is used after the network + have been partitioned it may lead to inconsistent tables. + <br></br> +Note: Mnesia may be connected to other nodes than those + returned in <c>ReturnValue</c>.</p> + </item> + <tag><c>dc_dump_limit</c></tag> + <item> + <p><c>Value</c> is a number. See description in + <c>Configuration Parameters</c> below. + The <c>ReturnValue</c> is the new value. Note this configuration parameter + is not persistent, it will be lost when mnesia stopped.</p> + </item> + </taglist> + </desc> + </func> + <func> + <name>change_table_access_mode(Tab, AccessMode) -> {aborted, R} | {atomic, ok}</name> + <fsummary>Change the access mode for the table.</fsummary> + <desc> + <p>The <c>AcccessMode</c> is by default the atom + <c>read_write</c> but it may also be set to the atom + <c>read_only</c>. If the <c>AccessMode</c> is set to + <c>read_only</c>, it means that it is not possible to perform + updates to the table. At startup Mnesia always loads + <c>read_only</c> tables locally regardless of when and if + Mnesia was terminated on other nodes.</p> + </desc> + </func> + <func> + <name>change_table_copy_type(Tab, Node, To) -> {aborted, R} | {atomic, ok}</name> + <fsummary>Change the storage type of a table.</fsummary> + <desc> + <p>For example:</p> + <code type="none"> +mnesia:change_table_copy_type(person, node(), disc_copies) + </code> + <p>Transforms our <c>person</c> table from a RAM table into + a disc based table at <c>Node</c>. + </p> + <p>This function can also be used to change the storage type of + the table named <c>schema</c>. The schema table can only + have <c>ram_copies</c> or <c>disc_copies</c> as the storage type. If the + storage type of the schema is <c>ram_copies</c>, no other table + can be disc resident on that node.</p> + </desc> + </func> + <func> + <name>change_table_load_order(Tab, LoadOrder) -> {aborted, R} | {atomic, ok}</name> + <fsummary>Change the load order priority for the table.</fsummary> + <desc> + <p>The <c>LoadOrder</c> priority is by default <c>0</c> (zero) + but may be set to any integer. The tables with the highest + <c>LoadOrder</c> priority will be loaded first at startup.</p> + </desc> + </func> + <func> + <name>clear_table(Tab) -> {aborted, R} | {atomic, ok}</name> + <fsummary>Deletes all entries in a table.</fsummary> + <desc> + <p>Deletes all entries in the table <c>Tab</c>.</p> + </desc> + </func> + <func> + <name>create_schema(DiscNodes) -> ok | {error,Reason}</name> + <fsummary>Create a brand new schema on the specified nodes.</fsummary> + <desc> + <p>Creates a new database on disc. Various files are + created in the local Mnesia directory of each node. Note + that the directory must be unique for each node. Two nodes + may never share the same directory. If possible, use a local + disc device in order to improve performance.</p> + <p><c>mnesia:create_schema/1</c> fails if any of the + Erlang nodes given as <c>DiscNodes</c> are not alive, if + Mnesia is running on anyone of the nodes, or if anyone of + the nodes already has a schema. Use + <c>mnesia:delete_schema/1</c> to get rid of old faulty + schemas. + </p> + <p><em>Note:</em> Only nodes with disc should be + included in <c>DiscNodes</c>. Disc-less nodes, that is nodes + where all tables including the schema only resides in RAM, + may not be included.</p> + </desc> + </func> + <func> + <name>create_table(Name, TabDef) -> {atomic, ok} | {aborted, Reason}</name> + <fsummary>Create a Mnesia table called <c>Name</c>with properties as described by the argument <c>TabDef</c>.</fsummary> + <desc> + <p>This function creates a Mnesia table called <c>Name</c> + according to the + argument <c>TabDef</c>. This list must be a list of + <c>{Item, Value}</c> tuples, where the following values are + allowed:</p> + <list type="bulleted"> + <item> + <p><c>{access_mode, Atom}</c>. The access mode is by + default the atom <c>read_write</c> but it may also be + set to the atom <c>read_only</c>. If the + <c>AccessMode</c> is set to <c>read_only</c>, it means + that it is not possible to perform updates to the table. + </p> + <p>At startup Mnesia always loads <c>read_only</c> tables + locally regardless of when and if Mnesia was terminated + on other nodes. This argument returns the access mode of + the table. The access mode may either be read_only or + read_write. + </p> + </item> + <item> + <p><c>{attributes, AtomList}</c> a list of the + attribute names for the records that are supposed to + populate the table. The default value is <c>[key, val]</c>. The table must have at least one extra + attribute in addition to the key. + </p> + <p>When accessing single attributes in a record, it is not + necessary, or even recommended, to hard code any + attribute names as atoms. Use the construct + <c>record_info(fields, RecordName)</c> instead. It can be + used for records of type <c>RecordName</c></p> + </item> + <item> + <p><c>{disc_copies, Nodelist}</c>, where + <c>Nodelist</c> is a list of the nodes where this table + is supposed to have disc copies. If a table replica is + of type <c>disc_copies</c>, all write operations on this + particular replica of the table are written to disc as + well as to the RAM copy of the table. + </p> + <p>It is possible + to have a replicated table of type <c>disc_copies</c> + on one node, and another type on another node. The + default value is <c>[]</c></p> + </item> + <item> + <p><c>{disc_only_copies, Nodelist}</c>, where + <c>Nodelist</c> is a list of the nodes where this table + is supposed to have <c>disc_only_copies</c>. A disc only + table replica is kept on disc only and unlike the other + replica types, the contents of the replica will not + reside in RAM. These replicas are considerably slower + than replicas held in RAM. + </p> + </item> + <item> + <p><c>{index, Intlist}</c>, where + <c>Intlist</c> is a list of attribute names (atoms) or + record fields for which Mnesia shall build and maintain + an extra index table. The <c>qlc</c> query compiler may + or may not utilize any additional indices while + processing queries on a table. + </p> + </item> + <item> + <p><c>{load_order, Integer}</c>. The load order + priority is by default <c>0</c> (zero) but may be set to + any integer. The tables with the highest load order + priority will be loaded first at startup. + </p> + </item> + <item> + <p><c>{ram_copies, Nodelist}</c>, where + <c>Nodelist</c> is a list of the nodes where this table + is supposed to have RAM copies. A table replica of type + <c>ram_copies</c> is obviously not written to disc on a + per transaction basis. It is possible to dump + <c>ram_copies</c> replicas to disc with the function + <c>mnesia:dump_tables(Tabs)</c>. The default value for + this attribute is <c>[node()]</c>. + </p> + </item> + <item> + <p><c>{record_name, Name}</c>, where <c>Name</c> must + be an atom. All records, stored in the table, must have + this name as the first element. It defaults to the same + name as the name of the table. + </p> + </item> + <item> + <p><c>{snmp, SnmpStruct}</c>. See + <c>mnesia:snmp_open_table/2</c> for a description of + <c>SnmpStruct</c>. If this attribute is present in the + <c>ArgList</c> to <c>mnesia:create_table/2</c>, the + table is immediately accessible by means of the Simple + Network Management Protocol (SNMP). This means that + applications which use SNMP to manipulate and control + the system can be designed easily, since Mnesia provides + a direct mapping between the logical tables that make up + an SNMP control application and the physical data which + makes up a Mnesia table. + </p> + </item> + <item> + <p><c>{type, Type}</c>, where <c>Type</c> must be + either of the atoms <c>set</c>, <c>ordered_set</c> or + <c>bag</c>. The default value is <c>set</c>. In a + <c>set</c> all records have unique keys and in a + <c>bag</c> several records may have the same key, but + the record content is unique. If a non-unique record is + stored the old, conflicting record(s) will simply be + overwritten. Note: currently 'ordered_set' + is not supported for 'disc_only_copies'. + </p> + </item> + <item> + <p><c>{local_content, Bool}</c>, where <c>Bool</c> must be + either <c>true</c> or <c>false</c>. The default value is <c>false</c>.\011 </p> + </item> + </list> + <p>For example, the following call creates the <c>person</c> table + previously defined and replicates it on 2 nodes: + </p> + <code type="none"> +mnesia:create_table(person, + [{ram_copies, [N1, N2]}, + {attributes, record_info(fields,person)}]). + </code> + <p>If it was required that Mnesia build and maintain an extra index + table on the <c>address</c> attribute of all the <c>person</c> + records that are inserted in the table, the following code would be issued: + </p> + <code type="none"> +mnesia:create_table(person, + [{ram_copies, [N1, N2]}, + {index, [address]}, + {attributes, record_info(fields,person)}]). + </code> + <p>The specification of <c>index</c> and <c>attributes</c> may be + hard coded as <c>{index, [2]}</c> and + <c>{attributes, [name, age, address, salary, children]}</c> + respectively. + </p> + <p><c>mnesia:create_table/2</c> writes records into the + <c>schema</c> table. This function, as well as all other + schema manipulation functions, are implemented with the + normal transaction management system. This guarantees that + schema updates are performed on all nodes in an atomic + manner.</p> + </desc> + </func> + <func> + <name>deactivate_checkpoint(Name) -> ok | {error, Reason}</name> + <fsummary>Deactivate a checkpoint.</fsummary> + <desc> + <p>The checkpoint is automatically deactivated when some of + the tables involved have no retainer attached to them. This may + happen when nodes go down or when a replica is deleted. + Checkpoints will also be deactivated with this function. + <c>Name</c> is the name of an active checkpoint.</p> + </desc> + </func> + <func> + <name>del_table_copy(Tab, Node) -> {aborted, R} | {atomic, ok}</name> + <fsummary>Delete the replica of table <c>Tab</c>at node <c>Node</c>.</fsummary> + <desc> + <p>Deletes the replica of table <c>Tab</c> at node <c>Node</c>. + When the last replica is deleted with this + function, the table disappears entirely. + </p> + <p>This function may also be used to delete a replica of + the table named <c>schema</c>. Then the mnesia node will be removed. + Note: Mnesia must be stopped on the node first.</p> + </desc> + </func> + <func> + <name>del_table_index(Tab, AttrName) -> {aborted, R} | {atomic, ok}</name> + <fsummary>Delete an index in a table. </fsummary> + <desc> + <p>This function deletes the index on attribute with name + <c>AttrName</c> in a table.</p> + </desc> + </func> + <func> + <name>delete({Tab, Key}) -> transaction abort | ok </name> + <fsummary>Delete all records in table <c>Tab</c>with the key <c>Key</c>.</fsummary> + <desc> + <p>Invokes <c>mnesia:delete(Tab, Key, write)</c></p> + </desc> + </func> + <func> + <name>delete(Tab, Key, LockKind) -> transaction abort | ok </name> + <fsummary>Delete all records in table <c>Tab</c>with the key <c>Key</c>.</fsummary> + <desc> + <p>Deletes all records in table <c>Tab</c> with the key + <c>Key</c>. + </p> + <p>The semantics of this function is context sensitive. See + <c>mnesia:activity/4</c> for more information. In transaction + context it acquires a lock of type <c>LockKind</c> in the + record. Currently the lock types <c>write</c> and + <c>sticky_write</c> are supported.</p> + </desc> + </func> + <func> + <name>delete_object(Record) -> transaction abort | ok </name> + <fsummary>Delete a record</fsummary> + <desc> + <p>Invokes <c>mnesia:delete_object(Tab, Record, write)</c> where + <c>Tab</c> is <c>element(1, Record)</c>.</p> + </desc> + </func> + <func> + <name>delete_object(Tab, Record, LockKind) -> transaction abort | ok </name> + <fsummary>Delete a record</fsummary> + <desc> + <p>If a table is of type <c>bag</c>, we may sometimes + want to delete only some of the records with a certain + key. This can be done with the <c>delete_object/3</c> + function. A complete record must be supplied to this + function. + </p> + <p>The semantics of this function is context sensitive. See + <c>mnesia:activity/4</c> for more information. In transaction + context it acquires a lock of type <c>LockKind</c> on the + record. Currently the lock types <c>write</c> and + <c>sticky_write</c> are supported.</p> + </desc> + </func> + <func> + <name>delete_schema(DiscNodes) -> ok | {error,Reason}</name> + <fsummary>Delete the schema on the given nodes</fsummary> + <desc> + <p>Deletes a database created with + <c>mnesia:create_schema/1</c>. + <c>mnesia:delete_schema/1</c> fails if any of the Erlang + nodes given as <c>DiscNodes</c> is not alive, or if Mnesia + is running on any of the nodes. + </p> + <p>After the database has been deleted, it may still be + possible to start Mnesia as a disc-less node. This depends on + how the configuration parameter <c>schema_location</c> is set. + </p> + <warning> + <p>This function must be used with extreme + caution since it makes existing persistent data + obsolete. Think twice before using it. </p> + </warning> + </desc> + </func> + <func> + <name>delete_table(Tab) -> {aborted, Reason} | {atomic, ok} </name> + <fsummary>Delete permanently all replicas of table <c>Tab</c>.</fsummary> + <desc> + <p>Permanently deletes all replicas of table <c>Tab</c>.</p> + </desc> + </func> + <func> + <name>dirty_all_keys(Tab) -> KeyList | exit({aborted, Reason}).</name> + <fsummary>Dirty search for all record keys in table.</fsummary> + <desc> + <p>This is the dirty equivalent of the + <c>mnesia:all_keys/1</c> function.</p> + </desc> + </func> + <func> + <name>dirty_delete({Tab, Key}) -> ok | exit({aborted, Reason}) </name> + <fsummary>Dirty delete of a record.</fsummary> + <desc> + <p>Invokes <c>mnesia:dirty_delete(Tab, Key)</c>.</p> + </desc> + </func> + <func> + <name>dirty_delete(Tab, Key) -> ok | exit({aborted, Reason}) </name> + <fsummary>Dirty delete of a record. </fsummary> + <desc> + <p>This is the dirty equivalent of the + <c>mnesia:delete/3</c> function.</p> + </desc> + </func> + <func> + <name>dirty_delete_object(Record) </name> + <fsummary>Dirty delete of a record.</fsummary> + <desc> + <p>Invokes <c>mnesia:dirty_delete_object(Tab, Record)</c> + where <c>Tab</c> is <c>element(1, Record)</c>.</p> + </desc> + </func> + <func> + <name>dirty_delete_object(Tab, Record) </name> + <fsummary>Dirty delete of a record. </fsummary> + <desc> + <p>This is the dirty equivalent of the + <c>mnesia:delete_object/3</c> function.</p> + </desc> + </func> + <func> + <name>dirty_first(Tab) -> Key | exit({aborted, Reason}) </name> + <fsummary>Return the key for the first record in a table.</fsummary> + <desc> + <p>Records in <c>set</c> or <c>bag</c> tables are not ordered. + However, there + is an ordering of the records which is not known + to the user. Accordingly, it is possible to traverse a table by means + of this function in conjunction with the <c>mnesia:dirty_next/2</c> + function. + </p> + <p>If there are no records at all in the table, this function + returns the atom <c>'$end_of_table'</c>. For this reason, it + is highly undesirable, but not disallowed, to use this atom + as the key for any user records.</p> + </desc> + </func> + <func> + <name>dirty_index_match_object(Pattern, Pos)</name> + <fsummary>Dirty pattern match using index.</fsummary> + <desc> + <p>Invokes <c>mnesia:dirty_index_match_object(Tab, Pattern, Pos)</c> where <c>Tab</c> is <c>element(1, Pattern)</c>.</p> + </desc> + </func> + <func> + <name>dirty_index_match_object(Tab, Pattern, Pos)</name> + <fsummary>Dirty pattern match using index.</fsummary> + <desc> + <p>This is the dirty equivalent of the + <c>mnesia:index_match_object/4</c> function.</p> + </desc> + </func> + <func> + <name>dirty_index_read(Tab, SecondaryKey, Pos)</name> + <fsummary>Dirty read using index.</fsummary> + <desc> + <p>This is the dirty equivalent of the + <c>mnesia:index_read/3</c> function.</p> + </desc> + </func> + <func> + <name>dirty_last(Tab) -> Key | exit({aborted, Reason}) </name> + <fsummary>Return the key for the last record in a table.</fsummary> + <desc> + <p>This function works exactly + <c>mnesia:dirty_first/1</c> but returns the last object in + Erlang term order for the <c>ordered_set</c> table type. For + all other table types, <c>mnesia:dirty_first/1</c> and + <c>mnesia:dirty_last/1</c> are synonyms.</p> + </desc> + </func> + <func> + <name>dirty_match_object(Pattern) -> RecordList | exit({aborted, Reason}).</name> + <fsummary>Dirty pattern match pattern.</fsummary> + <desc> + <p>Invokes <c>mnesia:dirty_match_object(Tab, Pattern)</c> + where <c>Tab</c> is <c>element(1, Pattern)</c>.</p> + </desc> + </func> + <func> + <name>dirty_match_object(Tab, Pattern) -> RecordList | exit({aborted, Reason}).</name> + <fsummary>Dirty pattern match pattern.</fsummary> + <desc> + <p>This is the dirty equivalent of the + <c>mnesia:match_object/3</c> function.</p> + </desc> + </func> + <func> + <name>dirty_next(Tab, Key) -> Key | exit({aborted, Reason}) </name> + <fsummary>Return the next key in a table. </fsummary> + <desc> + <p>This function makes it possible to traverse a table + and perform operations on all records in the table. When + the end of the table is reached, the special key + <c>'$end_of_table'</c> is returned. Otherwise, the function + returns a key which can be used to read the actual record.The + behavior is undefined if another Erlang process performs write + operations on the table while it is being traversed with the + <c>mnesia:dirty_next/2</c> function.</p> + </desc> + </func> + <func> + <name>dirty_prev(Tab, Key) -> Key | exit({aborted, Reason}) </name> + <fsummary>Return the previous key in a table. </fsummary> + <desc> + <p>This function works exactly + <c>mnesia:dirty_next/2</c> but returns the previous object in + Erlang term order for the ordered_set table type. For + all other table types, <c>mnesia:dirty_next/2</c> and + <c>mnesia:dirty_prev/2</c> are synonyms.\011 </p> + </desc> + </func> + <func> + <name>dirty_read({Tab, Key}) -> ValueList | exit({aborted, Reason}</name> + <fsummary>Dirty read of records.</fsummary> + <desc> + <p>Invokes <c>mnesia:dirty_read(Tab, Key)</c>.</p> + </desc> + </func> + <func> + <name>dirty_read(Tab, Key) -> ValueList | exit({aborted, Reason}</name> + <fsummary>Dirty read of records.</fsummary> + <desc> + <p>This is the dirty equivalent of the + <c>mnesia:read/3</c> function.</p> + </desc> + </func> + <func> + <name>dirty_select(Tab, MatchSpec) -> ValueList | exit({aborted, Reason}</name> + <fsummary>Dirty match the objects in <c>Tab</c>against <c>MatchSpec</c>.</fsummary> + <desc> + <p>This is the dirty equivalent of the + <c>mnesia:select/2</c> function.</p> + </desc> + </func> + <func> + <name>dirty_slot(Tab, Slot) -> RecordList | exit({aborted, Reason})</name> + <fsummary>Return the list of records that are associated with Slot in a table.</fsummary> + <desc> + <p>This function can be used to traverse a table in a + manner similar to the <c>mnesia:dirty_next/2</c> function. + A table has a number of slots which range from 0 (zero) to some + unknown upper bound. The function + <c>mnesia:dirty_slot/2</c> returns the special atom + <c>'$end_of_table'</c> when the end of the table is reached. + The behavior of this function is undefined if a write + operation is performed on the table while it is being + traversed.</p> + </desc> + </func> + <func> + <name>dirty_update_counter({Tab, Key}, Incr) -> NewVal | exit({aborted, Reason})</name> + <fsummary>Dirty update of a counter record.</fsummary> + <desc> + <p>Invokes <c>mnesia:dirty_update_counter(Tab, Key, Incr)</c>.</p> + </desc> + </func> + <func> + <name>dirty_update_counter(Tab, Key, Incr) -> NewVal | exit({aborted, Reason})</name> + <fsummary>Dirty update of a counter record.</fsummary> + <desc> + <p>There are no special counter records in Mnesia. However, + records of the form <c>{Tab, Key, Integer}</c> can be used + as (possibly disc resident) counters, when <c>Tab</c> is a + <c>set</c>. This function updates a counter with a + positive or negative number. However, counters can never become less + than zero. There are two significant differences between + this function and the action of first reading the record, + performing the arithmetics, and then writing the record:</p> + <list type="bulleted"> + <item>It is much more efficient</item> + <item><c>mnesia:dirty_update_counter/3</c> is + performed as an atomic operation despite the fact that it is not + protected by a transaction.</item> + </list> + <p>If two processes perform <c>mnesia:dirty_update_counter/3</c> + simultaneously, both updates will take effect without the + risk of loosing one of the updates. The new value + <c>NewVal</c> of the counter is returned.</p> + <p>If <c>Key</c> don't exits, a new record is created with the value + <c>Incr</c> if it is larger than 0, otherwise it is set to 0.</p> + </desc> + </func> + <func> + <name>dirty_write(Record) -> ok | exit({aborted, Reason})</name> + <fsummary>Dirty write of a record.</fsummary> + <desc> + <p>Invokes <c>mnesia:dirty_write(Tab, Record)</c> + where <c>Tab</c> is <c>element(1, Record)</c>.</p> + </desc> + </func> + <func> + <name>dirty_write(Tab, Record) -> ok | exit({aborted, Reason})</name> + <fsummary>Dirty write of a record.</fsummary> + <desc> + <p>This is the dirty equivalent of <c>mnesia:write/3</c>.</p> + </desc> + </func> + <func> + <name>dump_log() -> dumped</name> + <fsummary>Perform a user initiated dump of the local log file.</fsummary> + <desc> + <p>Performs a user initiated dump of the local log file. + This is usually not necessary since Mnesia, by default, + manages this automatically.</p> + </desc> + </func> + <func> + <name>dump_tables(TabList) -> {atomic, ok} | {aborted, Reason}</name> + <fsummary>Dump all RAM tables to disc.</fsummary> + <desc> + <p>This function dumps a set of <c>ram_copies</c> tables + to disc. The next time the system is started, these tables + are initiated with the data found in the files that are the + result of this dump. None of the tables may have disc + resident replicas.</p> + </desc> + </func> + <func> + <name>dump_to_textfile(Filename) </name> + <fsummary>Dump local tables into a text file.</fsummary> + <desc> + <p>Dumps all local tables of a mnesia system into a text file + which can then be edited (by means of a normal text editor) + and then later be reloaded with + <c>mnesia:load_textfile/1</c>. Only use this function for + educational purposes. Use other functions to deal with real + backups.</p> + </desc> + </func> + <func> + <name>error_description(Error) -> String </name> + <fsummary>Return a string describing a particular Mnesia error.</fsummary> + <desc> + <p>All Mnesia transactions, including all the schema + update functions, either return the value <c>{atomic, Val}</c> or the tuple <c>{aborted, Reason}</c>. The + <c>Reason</c> can be either of the following atoms. The + <c>error_description/1</c> function returns a descriptive + string which describes the error. + </p> + <list type="bulleted"> + <item> + <p><c>nested_transaction</c>. Nested transactions are + not allowed in this context. + </p> + </item> + <item> + <p><c>badarg</c>. Bad or invalid argument, possibly + bad type. + </p> + </item> + <item> + <p><c>no_transaction</c>. Operation not allowed + outside transactions. + </p> + </item> + <item> + <p><c>combine_error</c>. Table options were illegally + combined. + </p> + </item> + <item> + <p><c>bad_index</c>. Index already exists or was out + of bounds. + </p> + </item> + <item> + <p><c>already_exists</c>. Schema option is already set. + </p> + </item> + <item> + <p><c>index_exists</c>. Some operations cannot be performed on + tabs with index. + </p> + </item> + <item> + <p><c>no_exists</c>. Tried to perform operation on + non-existing, or not alive, item. + </p> + </item> + <item> + <p><c>system_limit</c>. Some system_limit was exhausted. + </p> + </item> + <item> + <p><c>mnesia_down</c>. A transaction involving + records at some remote node which died while + transaction was executing. Record(s) are no longer + available elsewhere in the network. + </p> + </item> + <item> + <p><c>not_a_db_node</c>. A node which does not exist + in the schema was mentioned. + </p> + </item> + <item> + <p><c>bad_type</c>. Bad type on some arguments. + </p> + </item> + <item> + <p><c>node_not_running</c>. Node not running. + </p> + </item> + <item> + <p><c>truncated_binary_file</c>. Truncated binary in file. + </p> + </item> + <item> + <p><c>active</c>. Some delete operations require that + all active records are removed. + </p> + </item> + <item> + <p><c>illegal</c>. Operation not supported on record. + </p> + </item> + </list> + <p>The <c>Error</c> may be <c>Reason</c>, + <c>{error, Reason}</c>, or <c>{aborted, Reason}</c>. The + <c>Reason</c> may be an atom or a tuple with <c>Reason</c> + as an atom in the first field.</p> + </desc> + </func> + <func> + <name>ets(Fun, [, Args]) -> ResultOfFun | exit(Reason)</name> + <fsummary>Call the Fun in a raw context which is not protected by a transaction.</fsummary> + <desc> + <p>Call the <c>Fun</c> in a raw context which is not protected by + a transaction. The Mnesia function call is performed in the + <c>Fun</c> are performed directly on the local <c>ets</c> tables on + the assumption that the local storage type is + <c>ram_copies</c> and the tables are not replicated to other + nodes. Subscriptions are not triggered and checkpoints are + not updated, but it is extremely fast. This function can + also be applied to <c>disc_copies</c> tables if all + operations are read only. See <c>mnesia:activity/4</c> + and the Mnesia User's Guide for more details.</p> + <p><em>Note:</em> Calling (nesting) a <c>mnesia:ets</c> + inside a transaction context will inherit the transaction semantics.</p> + </desc> + </func> + <func> + <name>first(Tab) -> Key | transaction abort </name> + <fsummary>Return the key for the first record in a table.</fsummary> + <desc> + <p>Records in <c>set</c> or <c>bag</c> tables are not ordered. + However, there + is an ordering of the records which is not known + to the user. Accordingly, it is possible to traverse a table by means + of this function in conjunction with the <c>mnesia:next/2</c> + function. + </p> + <p>If there are no records at all in the table, this function + returns the atom <c>'$end_of_table'</c>. For this reason, it + is highly undesirable, but not disallowed, to use this atom + as the key for any user records.</p> + </desc> + </func> + <func> + <name>foldl(Function, Acc, Table) -> NewAcc | transaction abort </name> + <fsummary>Call Function for each record in Table </fsummary> + <desc> + <p>Iterates over the table <c>Table</c> and calls + <c>Function(Record, NewAcc)</c> for each <c>Record</c> in the table. + The term returned from <c>Function</c> will be used as the second + argument in the next call to the <c>Function</c>. + </p> + <p><c>foldl</c> returns the same term as the last call to + <c>Function</c> returned.</p> + </desc> + </func> + <func> + <name>foldr(Function, Acc, Table) -> NewAcc | transaction abort </name> + <fsummary>Call Function for each record in Table </fsummary> + <desc> + <p>This function works exactly as + <c>foldl/3</c> but iterates the table in the opposite order + for the <c>ordered_set</c> table type. For + all other table types, <c>foldr/3</c> and + <c>foldl/3</c> are synonyms.</p> + </desc> + </func> + <func> + <name>force_load_table(Tab) -> yes | ErrorDescription </name> + <fsummary>Force a table to be loaded into the system </fsummary> + <desc> + <p>The Mnesia algorithm for table load might lead to a + situation where a table cannot be loaded. This situation + occurs when a node is started and Mnesia concludes, or + suspects, that another copy of the table was active after + this local copy became inactive due to a system crash. + </p> + <p>If this situation is not acceptable, this function can be + used to override the strategy of the Mnesia table load + algorithm. This could lead to a situation where some + transaction effects are lost with a inconsistent database as + result, but for some applications high availability is more + important than consistent data.</p> + </desc> + </func> + <func> + <name>index_match_object(Pattern, Pos) -> transaction abort | ObjList</name> + <fsummary>Match records and utilizes index information.</fsummary> + <desc> + <p>Invokes <c>mnesia:index_match_object(Tab, Pattern, Pos, read)</c> where <c>Tab</c> is <c>element(1, Pattern)</c>.</p> + </desc> + </func> + <func> + <name>index_match_object(Tab, Pattern, Pos, LockKind) -> transaction abort | ObjList</name> + <fsummary>Match records and utilizes index information.</fsummary> + <desc> + <p>In a manner similar to the <c>mnesia:index_read/3</c> + function, we can also utilize any index information when we + try to match records. This function takes a pattern which + obeys the same rules as the <c>mnesia:match_object/3</c> + function with the exception that this function requires the + following conditions: + </p> + <list type="bulleted"> + <item> + <p>The table <c>Tab</c> must have an index on + position <c>Pos</c>. + </p> + </item> + <item> + <p>The element in position <c>Pos</c> in + <c>Pattern</c> must be bound. <c>Pos</c> may either be + an integer (#record.Field), or an attribute name.</p> + </item> + </list> + <p>The two index search functions described here are + automatically invoked when searching tables with <c>qlc</c> + list comprehensions and also when using the low level + <c>mnesia:[dirty_]match_object</c> functions. + </p> + <p></p> + <p>The semantics of this function is context sensitive. See + <c>mnesia:activity/4</c> for more information. In transaction + context it acquires a lock of type <c>LockKind</c> on the + entire table or on a single record. Currently, the lock type + <c>read</c> is supported. + </p> + </desc> + </func> + <func> + <name>index_read(Tab, SecondaryKey, Pos) -> transaction abort | RecordList </name> + <fsummary>Read records via index table. </fsummary> + <desc> + <p>Assume there is an index on position <c>Pos</c> for a + certain record type. This function can be used to read the + records without knowing the actual key for the record. For + example, with an index in position 1 of the <c>person</c> + table, the call <c>mnesia:index_read(person, 36, #person.age)</c> returns a list of all persons with age + equal to 36. <c>Pos</c> may also be an attribute name + (atom), but if the notation <c>mnesia:index_read(person, 36, age)</c> is used, the field position will be searched for in + runtime, for each call. + </p> + <p>The semantics of this function is context sensitive. See + <c>mnesia:activity/4</c> for more information. In transaction + context it acquires a read lock on the entire table.</p> + </desc> + </func> + <func> + <name>info() -> ok </name> + <fsummary>Print some information about the system on the tty.</fsummary> + <desc> + <p>Prints some information about the system on the tty. + This function may be used even if Mnesia is not started. + However, more information will be displayed if Mnesia is + started.</p> + </desc> + </func> + <func> + <name>install_fallback(Opaque) -> ok | {error,Reason}</name> + <fsummary>Install a backup as fallback.</fsummary> + <desc> + <p>Invokes <c>mnesia:install_fallback(Opaque, Args)</c> where + <c>Args</c> is <c>[{scope, global}]</c>.</p> + </desc> + </func> + <func> + <name>install_fallback(Opaque), BackupMod) -> ok | {error,Reason}</name> + <fsummary>Install a backup as fallback.</fsummary> + <desc> + <p>Invokes <c>mnesia:install_fallback(Opaque, Args)</c> where + <c>Args</c> is <c>[{scope, global}, {module, BackupMod}]</c>.</p> + </desc> + </func> + <func> + <name>install_fallback(Opaque, Args) -> ok | {error,Reason}</name> + <fsummary>Install a backup as fallback.</fsummary> + <desc> + <p>This function is used to install a backup as fallback. The + fallback will be used to restore the database at the next + start-up. Installation of fallbacks requires Erlang to be up + and running on all the involved nodes, but it does not + matter if Mnesia is running or not. The installation of the + fallback will fail if the local node is not one of the disc + resident nodes in the backup. + </p> + <p><c>Args</c> is a list of the following tuples: + </p> + <list type="bulleted"> + <item> + <p><c>{module, BackupMod}</c>. + All accesses of the backup media is performed via a + callback module named <c>BackupMod</c>. The + <c>Opaque</c> argument is forwarded to the callback + module which may interpret it as it wish. The default + callback module is called <c>mnesia_backup</c> and it + interprets the <c>Opaque</c> argument as a local + filename. The default for this module is also + configurable via the <c>-mnesia mnesia_backup</c> + configuration parameter. </p> + </item> + <item> + <p><c>{scope, Scope}</c> + The <c>Scope</c> of a fallback may either be + <c>global</c> for the entire database or <c>local</c> + for one node. By default, the installation of a fallback + is a global operation which either is performed all + nodes with disc resident schema or none. Which nodes + that are disc resident or not, is determined from the + schema info in the backup.</p> + <p>If the <c>Scope</c> of the operation is <c>local</c> + the fallback will only be installed on the local node.</p> + </item> + <item> + <p><c>{mnesia_dir, AlternateDir}</c> + This argument is only valid if the scope of the + installation is <c>local</c>. Normally the installation + of a fallback is targeted towards the Mnesia directory + as configured with the <c>-mnesia dir</c> configuration + parameter. But by explicitly supplying an + <c>AlternateDir</c> the fallback will be installed there + regardless of the Mnesia directory configuration + parameter setting. After installation of a fallback on + an alternate Mnesia directory that directory is fully + prepared for usage as an active Mnesia directory. + </p> + <p>This is a somewhat dangerous feature which must be + used with care. By unintentional mixing of directories + you may easily end up with a inconsistent database, if + the same backup is installed on more than one directory.</p> + </item> + </list> + </desc> + </func> + <func> + <name>is_transaction() -> boolean </name> + <fsummary>Check if code is running in a transaction.</fsummary> + <desc> + <p>When this function is executed inside a transaction context + it returns <c>true</c>, otherwise <c>false</c>.</p> + </desc> + </func> + <func> + <name>last(Tab) -> Key | transaction abort </name> + <fsummary>Return the key for the last record in a table.</fsummary> + <desc> + <p>This function works exactly + <c>mnesia:first/1</c> but returns the last object in + Erlang term order for the <c>ordered_set</c> table type. For + all other table types, <c>mnesia:first/1</c> and + <c>mnesia:last/1</c> are synonyms.</p> + </desc> + </func> + <func> + <name>load_textfile(Filename)</name> + <fsummary>Load tables from a text file.</fsummary> + <desc> + <p>Loads a series of definitions and data found in the + text file (generated with <c>mnesia:dump_to_textfile/1</c>) + into Mnesia. This function also starts Mnesia and possibly + creates a new schema. This function is intended for + educational purposes only and using other functions to deal + with real backups, is recommended.</p> + </desc> + </func> + <func> + <name>lock(LockItem, LockKind) -> Nodes | ok | transaction abort</name> + <fsummary>Explicit grab lock.</fsummary> + <desc> + <p>Write locks are normally acquired on all nodes where a + replica of the table resides (and is active). Read locks are + acquired on one node (the local node if a local + replica exists). Most of the context sensitive access functions + acquire an implicit lock if they are invoked in a + transaction context. The granularity of a lock may either + be a single record or an entire table. + </p> + <p>The normal usage is to call the function without checking + the return value since it exits if it fails and the + transaction is restarted by the transaction manager. It + returns all the locked nodes if a write lock is acquired, and + <c>ok</c> if it was a read lock. + </p> + <p>This function <c>mnesia:lock/2</c> is intended to support + explicit locking on tables but also intended for situations + when locks need to be acquired regardless of how tables are + replicated. Currently, two <c>LockKind</c>'s are supported: + </p> + <taglist> + <tag><c>write</c></tag> + <item> + <p>Write locks are exclusive, which means that if one + transaction manages to acquire a write lock on an item, + no other transaction may acquire any kind of lock on the + same item. + </p> + </item> + <tag><c>read</c></tag> + <item> + <p>Read locks may be shared, which means that if one + transaction manages to acquire a read lock on an item, + other transactions may also acquire a read lock on the + same item. However, if someone has a read lock no one can + acquire a write lock at the same item. If some one has a + write lock no one can acquire a read lock nor + a write lock at the same item.</p> + </item> + </taglist> + <p>Conflicting lock requests are automatically queued if there + is no risk of a deadlock. Otherwise the transaction must be + aborted and executed again. Mnesia does this automatically + as long as the upper limit of maximum <c>retries</c> is not + reached. See <c>mnesia:transaction/3</c> for the details. + </p> + <p>For the sake of completeness sticky write locks will also + be described here even if a sticky write lock is not + supported by this particular function: + </p> + <taglist> + <tag><c>sticky_write</c></tag> + <item> + <p>Sticky write locks are a mechanism which can be used + to optimize write lock acquisition. If your application + uses replicated tables mainly for fault tolerance (as + opposed to read access optimization purpose), sticky + locks may be the best option available. + </p> + <p>When a sticky write lock is acquired, all nodes will be + informed which node is locked. Subsequently, + sticky lock requests from the same node will be + performed as a local operation without any + communication with other nodes. The sticky lock + lingers on the node even after the transaction has + ended. See the Mnesia User's Guide for more information.</p> + </item> + </taglist> + <p>Currently, two kinds of <c>LockItem</c>'s are supported by + this function: + </p> + <taglist> + <tag><c>{table, Tab}</c></tag> + <item> + <p>This acquires a lock of type <c>LockKind</c> on the + entire table <c>Tab</c>. + </p> + </item> + <tag><c>{global, GlobalKey, Nodes}</c></tag> + <item> + <p>This acquires a lock of type <c>LockKind</c> on the + global resource <c>GlobalKey</c>. The lock is acquired + on all active nodes in the <c>Nodes</c> list. </p> + </item> + </taglist> + <p>Locks are released when the outermost transaction ends. + </p> + <p>The semantics of this function is context sensitive. See + <c>mnesia:activity/4</c> for more information. In transaction + context it acquires locks otherwise it just ignores the + request.</p> + </desc> + </func> + <func> + <name>match_object(Pattern) ->transaction abort | RecList </name> + <fsummary>Match <c>Pattern</c>for records. </fsummary> + <desc> + <p>Invokes <c>mnesia:match_object(Tab, Pattern, read)</c> where + <c>Tab</c> is <c>element(1, Pattern)</c>.</p> + </desc> + </func> + <func> + <name>match_object(Tab, Pattern, LockKind) ->transaction abort | RecList </name> + <fsummary>Match <c>Pattern</c>for records. </fsummary> + <desc> + <p>This function takes a pattern with 'don't care' variables + denoted as a '_' parameter. This function returns a list of + records which matched the pattern. Since the second element + of a record in a table is considered to be the key for the + record, the performance of this function depends on whether + this key is bound or not. + </p> + <p>For example, the call <c>mnesia:match_object(person, {person, '_', 36, '_', '_'}, read)</c> returns a list of all person records with an + age field of thirty-six (36). + </p> + <p>The function <c>mnesia:match_object/3</c> + automatically uses indices if these exist. However, no + heuristics are performed in order to select the best + index. + </p> + <p>The semantics of this function is context sensitive. See + <c>mnesia:activity/4</c> for more information. In transaction + context it acquires a lock of type <c>LockKind</c> on the + entire table or a single record. Currently, the lock type + <c>read</c> is supported.</p> + </desc> + </func> + <func> + <name>move_table_copy(Tab, From, To) -> {aborted, Reason} | {atomic, ok}</name> + <fsummary>Move the copy of table <c>Tab</c>from node<c>From</c>to node <c>To</c>.</fsummary> + <desc> + <p>Moves the copy of table <c>Tab</c> from node + <c>From</c> to node <c>To</c>. + </p> + <p>The storage type is preserved. For example, a RAM table + moved from one node remains a RAM on the new node. It is + still possible for other transactions to read and write in + the table while it is being moved. + </p> + <p>This function cannot be used on <c>local_content</c> tables.</p> + </desc> + </func> + <func> + <name>next(Tab, Key) -> Key | transaction abort </name> + <fsummary>Return the next key in a table. </fsummary> + <desc> + <p>This function makes it possible to traverse a table + and perform operations on all records in the table. When + the end of the table is reached, the special key + <c>'$end_of_table'</c> is returned. Otherwise, the function + returns a key which can be used to read the actual record.</p> + </desc> + </func> + <func> + <name>prev(Tab, Key) -> Key | transaction abort </name> + <fsummary>Return the previous key in a table. </fsummary> + <desc> + <p>This function works exactly + <c>mnesia:next/2</c> but returns the previous object in + Erlang term order for the ordered_set table type. For + all other table types, <c>mnesia:next/2</c> and + <c>mnesia:prev/2</c> are synonyms.\011 </p> + </desc> + </func> + <func> + <name>read({Tab, Key}) -> transaction abort | RecordList </name> + <fsummary>Read records(s) with a given key. </fsummary> + <desc> + <p>Invokes <c>mnesia:read(Tab, Key, read)</c>.</p> + </desc> + </func> + <func> + <name>read(Tab, Key) -> transaction abort | RecordList </name> + <fsummary>Read records(s) with a given key. </fsummary> + <desc> + <p>Invokes <c>mnesia:read(Tab, Key, read)</c>.</p> + </desc> + </func> + <func> + <name>read(Tab, Key, LockKind) -> transaction abort | RecordList </name> + <fsummary>Read records(s) with a given key. </fsummary> + <desc> + <p>This function reads all records from table <c>Tab</c> with + key <c>Key</c>. This function has the same semantics + regardless of the location of <c>Tab</c>. If the table is + of type <c>bag</c>, the <c>mnesia:read(Tab, Key)</c> can + return an arbitrarily long list. If the table is of type + <c>set</c>, the list is either of length 1, or <c>[]</c>. + </p> + <p>The semantics of this function is context sensitive. See + <c>mnesia:activity/4</c> for more information. In transaction + context it acquires a lock of type + <c>LockKind</c>. Currently, the lock types <c>read</c>, + <c>write</c> and <c>sticky_write</c> are supported. + </p> + <p>If the user wants to update the record it is more efficient to + use <c>write/sticky_write</c> as the LockKind. + </p> + </desc> + </func> + <func> + <name>read_lock_table(Tab) -> ok | transaction abort</name> + <fsummary>Set a read lock on an entire table.</fsummary> + <desc> + <p>Invokes <c>mnesia:lock({table, Tab}, read)</c>.</p> + </desc> + </func> + <func> + <name>report_event(Event) -> ok</name> + <fsummary>Report a user event to Mnesia's event handler.</fsummary> + <desc> + <p>When tracing a system of Mnesia applications it is useful + to be able to interleave Mnesia's own events with + application related events that give information about the + application context. + </p> + <p>Whenever the application begins a + new and demanding Mnesia task, or if it is entering a new + interesting phase in its execution, it may be a good idea to + use <c>mnesia:report_event/1</c>. The <c>Event</c> may be + any term and generates a <c>{mnesia_user, Event}</c> event + for any processes that subscribe to Mnesia system + events.</p> + </desc> + </func> + <func> + <name>restore(Opaque, Args) -> {atomic, RestoredTabs} |{aborted, Reason}</name> + <fsummary>Online restore of backup.</fsummary> + <desc> + <p>With this function, tables may be restored online from a + backup without restarting Mnesia. <c>Opaque</c> is forwarded + to the backup module. <c>Args</c> is a list of the following + tuples: + </p> + <list type="bulleted"> + <item> + <p><c>{module,BackupMod}</c> The backup module + <c>BackupMod</c> will be used to access the backup + media. If omitted, the default backup module will be + used. + </p> + </item> + <item><c>{skip_tables, TabList}</c> Where <c>TabList</c> + is a list of tables which should not be read from the + backup. + </item> + <item><c>{clear_tables, TabList}</c> Where + <c>TabList</c> is a list of tables which should be + cleared, before the records from the backup are inserted, + ie. all records in the tables are deleted before the + tables are restored. Schema information about the tables + is not cleared or read from backup. + </item> + <item><c>{keep_tables, TabList}</c> Where <c>TabList</c> + is a list of tables which should be not be cleared, before + the records from the backup are inserted, i.e. the records + in the backup will be added to the records in the table. + Schema information about the tables is not cleared or read + from backup. + </item> + <item><c>{recreate_tables, TabList}</c> Where + <c>TabList</c> is a list of tables which should be + re-created, before the records from the backup are + inserted. The tables are first deleted and then created with + the schema information from the backup. All the nodes in the + backup needs to be up and running. + </item> + <item><c>{default_op, Operation}</c> Where <c>Operation</c> is + one of the following operations <c>skip_tables</c>, + <c>clear_tables</c>, <c>keep_tables</c> or + <c>recreate_tables</c>. The default operation specifies + which operation should be used on tables from the backup + which are not specified in any of the lists above. If + omitted, the operation <c>clear_tables</c> will be used. + </item> + </list> + <p>The affected tables are write locked during the + restoration, but regardless of the lock conflicts caused by + this, the applications can continue to do their work while + the restoration is being performed. The restoration is + performed as one single transaction. + </p> + <p>If the database is + huge, it may not be possible to restore it online. In such + cases, the old database must be restored by installing a + fallback and then restart.</p> + </desc> + </func> + <func> + <name>s_delete({Tab, Key}) -> ok | transaction abort </name> + <fsummary>Set sticky lock and delete records.</fsummary> + <desc> + <p>Invokes <c>mnesia:delete(Tab, Key, sticky_write)</c></p> + </desc> + </func> + <func> + <name>s_delete_object(Record) -> ok | transaction abort </name> + <fsummary>Set sticky lock and delete record.</fsummary> + <desc> + <p>Invokes <c>mnesia:delete_object(Tab, Record, sticky_write)</c> where <c>Tab</c> is <c>element(1, Record)</c>.</p> + </desc> + </func> + <func> + <name>s_write(Record) -> ok | transaction abort </name> + <fsummary>Write <c>Record</c>and sets stick lock.</fsummary> + <desc> + <p>Invokes <c>mnesia:write(Tab, Record, sticky_write)</c> + where <c>Tab</c> is <c>element(1, Record)</c>.</p> + </desc> + </func> + <func> + <name>schema() -> ok </name> + <fsummary>Print information about all table definitions on the tty. </fsummary> + <desc> + <p>Prints information about all table definitions on the tty.</p> + </desc> + </func> + <func> + <name>schema(Tab) -> ok </name> + <fsummary>Print information about one table definition on the tty.</fsummary> + <desc> + <p>Prints information about one table definition on the tty.</p> + </desc> + </func> + <func> + <name>select(Tab, MatchSpec [, Lock]) -> transaction abort | [Object] </name> + <fsummary>Match the objects in <c>Tab</c>against <c>MatchSpec</c>.</fsummary> + <desc> + <p>Matches the objects in the table <c>Tab</c> using a + match_spec as described in the ERTS Users Guide. Optionally a lock + <c>read</c> or <c>write</c> can be given as the third + argument, default is <c>read</c>. The return value depends + on the <c>MatchSpec</c>.</p> + <p><em>Note:</em> for best performance <c>select</c> should + be used before any modifying operations are done on that table + in the same transaction, i.e. don't use <c>write</c> or <c>delete</c> + before a <c>select</c>.</p> + <p>In its simplest forms the match_spec's look like this:</p> + <list type="bulleted"> + <item>MatchSpec = [MatchFunction]</item> + <item>MatchFunction = {MatchHead, [Guard], [Result]}</item> + <item>MatchHead = tuple() | record()</item> + <item>Guard = {"Guardtest name", ...}</item> + <item>Result = "Term construct"</item> + </list> + <p>See the ERTS Users Guide and <c>ets</c> documentation for a + complete description of the select.</p> + <p>For example to find the names of all male persons with an age over 30 in table + Tab do:</p> + <code type="none"> +\011 MatchHead = #person{name='$1', sex=male, age='$2', _='_'}, +\011 Guard = {'>', '$2', 30}, +\011 Result = '$1', +\011 mnesia:select(Tab,[{MatchHead, [Guard], [Result]}]), + </code> + </desc> + </func> + <func> + <name>select(Tab, MatchSpec, NObjects, Lock) -> transaction abort | {[Object],Cont} | '$end_of_table'</name> + <fsummary>Match the objects in <c>Tab</c>against <c>MatchSpec</c>.</fsummary> + <desc> + <p>Matches the objects in the table <c>Tab</c> using a + match_spec as described in ERTS users guide, and returns + a chunk of terms and a continuation, the wanted number + of returned terms is specified by the <c>NObjects</c> argument. + The lock argument can be <c>read</c> or <c>write</c>. + The continuation should be used as argument to <c>mnesia:select/1</c>, + if more or all answers are needed.</p> + <p><em>Note:</em> for best performance <c>select</c> should + be used before any modifying operations are done on that + table in the same transaction, i.e. don't use + <c>mnesia:write</c> or <c>mnesia:delete</c> before a + <c>mnesia:select</c>. For efficiency the <c>NObjects</c> is + a recommendation only and the result may contain anything + from an empty list to all available results. </p> + </desc> + </func> + <func> + <name>select(Cont) -> transaction abort | {[Object],Cont} | '$end_of_table'</name> + <fsummary>Continues selecting objects. </fsummary> + <desc> + <p>Selects more objects with the match specification initiated + by <c>mnesia:select/4</c>. + </p> + <p><em>Note:</em> Any modifying operations, i.e. <c>mnesia:write</c> + or <c>mnesia:delete</c>, that are done between the <c>mnesia:select/4</c> + and <c>mnesia:select/1</c> calls will not be visible in the result.</p> + </desc> + </func> + <func> + <name>set_debug_level(Level) -> OldLevel</name> + <fsummary>Change the internal debug level of Mnesia</fsummary> + <desc> + <p>Changes the internal debug level of Mnesia. See the + chapter about configuration parameters for details.</p> + </desc> + </func> + <func> + <name>set_master_nodes(MasterNodes) -> ok | {error, Reason} </name> + <fsummary>Set the master nodes for all tables</fsummary> + <desc> + <p>For each table Mnesia will determine its replica nodes + (<c>TabNodes</c>) and invoke <c>mnesia:set_master_nodes(Tab, TabMasterNodes)</c> where <c>TabMasterNodes</c> is the + intersection of <c>MasterNodes</c> and <c>TabNodes</c>. See + <c>mnesia:set_master_nodes/2</c> about the semantics.</p> + </desc> + </func> + <func> + <name>set_master_nodes(Tab, MasterNodes) -> ok | {error, Reason} </name> + <fsummary>Set the master nodes for a table</fsummary> + <desc> + <p>If the application detects that there has been a + communication failure (in a potentially partitioned network) which + may have caused an inconsistent database, it may use the + function <c>mnesia:set_master_nodes(Tab, MasterNodes)</c> to + define from which nodes each table will be loaded. + At startup Mnesia's normal table load algorithm will be + bypassed and the table will be loaded from one of the master + nodes defined for the table, regardless of when and if Mnesia + was terminated on other nodes. The <c>MasterNodes</c> may only + contain nodes where the table has a replica and if the + <c>MasterNodes</c> list is empty, the master node recovery + mechanism for the particular table will be reset and the + normal load mechanism will be used at next restart. + </p> + <p>The master node setting is always local and it may be + changed regardless of whether Mnesia is started or not. + </p> + <p>The database may also become inconsistent if the + <c>max_wait_for_decision</c> configuration parameter is used + or if <c>mnesia:force_load_table/1</c> is used.</p> + </desc> + </func> + <func> + <name>snmp_close_table(Tab) -> {aborted, R} | {atomic, ok}</name> + <fsummary>Remove the possibility for SNMP to manipulate the table.</fsummary> + <desc> + <p>Removes the possibility for SNMP to manipulate the + table.</p> + </desc> + </func> + <func> + <name>snmp_get_mnesia_key(Tab, RowIndex) -> {ok, Key} | undefined</name> + <fsummary>Get the corresponding Mnesia key from an SNMP index.</fsummary> + <type> + <v>Tab ::= atom()</v> + <v>RowIndex ::= [integer()]</v> + <v>Key ::= key() | {key(), key(), ...}</v> + <v>key() ::= integer() | string() | [integer()]</v> + </type> + <desc> + <p>Transforms an SNMP index to the corresponding Mnesia key. + If the SNMP table has multiple keys, the key is a tuple of + the key columns.</p> + </desc> + </func> + <func> + <name>snmp_get_next_index(Tab, RowIndex) -> {ok, NextIndex} | endOfTable</name> + <fsummary>Get the index of the next lexicographical row.</fsummary> + <type> + <v>Tab ::= atom()</v> + <v>RowIndex ::= [integer()]</v> + <v>NextIndex ::= [integer()]</v> + </type> + <desc> + <p>The <c>RowIndex</c> may specify a non-existing row. + Specifically, it might be the empty list. Returns the index + of the next lexicographical row. If <c>RowIndex</c> is the + empty list, this function will return the index of the first row + in the table.</p> + </desc> + </func> + <func> + <name>snmp_get_row(Tab, RowIndex) -> {ok, Row} | undefined</name> + <fsummary>Retrieve a row indexed by an SNMP index.</fsummary> + <type> + <v>Tab ::= atom()</v> + <v>RowIndex ::= [integer()]</v> + <v>Row ::= record(Tab)</v> + </type> + <desc> + <p>Makes it possible to read a row by its SNMP index. This + index is specified as an SNMP OBJECT IDENTIFIER, a list of + integers.</p> + </desc> + </func> + <func> + <name>snmp_open_table(Tab, SnmpStruct) -> {aborted, R} | {atomic, ok}</name> + <fsummary>Organize a Mnesia table as an SNMP table.</fsummary> + <type> + <v>Tab ::= atom()</v> + <v>SnmpStruct ::= [{key, type()}]</v> + <v>type() ::= type_spec() | {type_spec(), type_spec(), ...}</v> + <v>type_spec() ::= fix_string | string | integer</v> + </type> + <desc> + <p>It is possible to establish a direct one to one mapping + between Mnesia tables and SNMP tables. Many + telecommunication applications are controlled and monitored + by the SNMP protocol. This connection between Mnesia and + SNMP makes it simple and convenient to achieve this. + </p> + <p>The <c>SnmpStruct</c> argument is a list of SNMP + information. Currently, the only information needed is + information about the key types in the table. It is not + possible to handle multiple keys in Mnesia, but many SNMP + tables have multiple keys. Therefore, the following + convention is used: if a table has multiple keys, these must + always be stored as a tuple of the keys. Information about + the key types is specified as a tuple of atoms describing + the types. The only significant type is + <c>fix_string</c>. This means that a string has fixed + size. For example: + </p> + <code type="none"> +mnesia:snmp_open_table(person, [{key, string}]) + </code> + <p>causes the <c>person</c> table to be ordered as an SNMP + table. + </p> + <p>Consider the following schema for a table of company + employees. Each employee is identified by department number + and name. The other table column stores the telephone number: + </p> + <code type="none"> +mnesia:create_table(employee, + [{snmp, [{key, {integer, string}}]}, + {attributes, record_info(fields, employees)}]), + </code> + <p>The corresponding SNMP table would have three columns; + <c>department</c>, <c>name</c> and <c>telno</c>. + </p> + <p>It is possible to have table columns that are not visible + through the SNMP protocol. These columns must be the last + columns of the table. In the previous example, the SNMP + table could have columns <c>department</c> and <c>name</c> + only. The application could then use the <c>telno</c> column + internally, but it would not be visible to the SNMP + managers. + </p> + <p>In a table monitored by SNMP, all elements must be + integers, strings, or lists of integers. + </p> + <p>When a table is SNMP ordered, modifications are more + expensive than usual, O(logN). And more memory is used. + </p> + <p><em>Note:</em>Only the lexicographical SNMP ordering is + implemented in Mnesia, not the actual SNMP monitoring.</p> + </desc> + </func> + <func> + <name>start() -> ok | {error, Reason} </name> + <fsummary>Start a local Mnesia system.</fsummary> + <desc> + <p>The start-up procedure for a set of Mnesia nodes is a + fairly complicated operation. A Mnesia system consists of a set + of nodes, with Mnesia started locally on all + participating nodes. Normally, each node has a directory where + all the Mnesia files are written. This directory will be + referred to as the Mnesia directory. Mnesia may also be + started on disc-less nodes. See <c>mnesia:create_schema/1</c> + and the Mnesia User's Guide for more information about disc-less + nodes. + </p> + <p>The set of nodes which makes up a Mnesia system is kept in + a schema and it is possible to add and remove Mnesia nodes + from the schema. The initial schema is normally created on + disc with the function <c>mnesia:create_schema/1</c>. On + disc-less nodes, a tiny default schema is generated each time + Mnesia is started. During the start-up procedure, Mnesia + will exchange schema information between the nodes in order + to verify that the table definitions are compatible. + </p> + <p>Each schema has a unique cookie which may be regarded as a + unique schema identifier. The cookie must be the same on all + nodes where Mnesia is supposed to run. See the Mnesia + User's Guide for more information about these details. + </p> + <p>The schema file, as well as all other files which Mnesia + needs, are kept in the Mnesia directory. The command line + option <c>-mnesia dir Dir</c> can be used to specify the + location of this directory to the Mnesia system. If no such + command line option is found, the name of the directory + defaults to <c>Mnesia.Node</c>. + </p> + <p><c>application:start(mnesia)</c> may also be used.</p> + </desc> + </func> + <func> + <name>stop() -> stopped </name> + <fsummary>Stop Mnesia locally.</fsummary> + <desc> + <p>Stops Mnesia locally on the current node. + </p> + <p><c>application:stop(mnesia)</c> may also be used.</p> + </desc> + </func> + <func> + <name>subscribe(EventCategory)</name> + <fsummary>Subscribe to events of type <c>EventCategory</c>.</fsummary> + <desc> + <p>Ensures that a copy of all events of type + <c>EventCategory</c> are sent to the caller. The event + types available are described in the Mnesia User's Guide.</p> + </desc> + </func> + <func> + <name>sync_dirty(Fun, [, Args]) -> ResultOfFun | exit(Reason) </name> + <fsummary>Call the Fun in a context which is not protected by a transaction.</fsummary> + <desc> + <p>Call the <c>Fun</c> in a context which is not protected + by a transaction. The Mnesia function calls performed in the + <c>Fun</c> are mapped to the corresponding dirty functions. + It is performed in almost the same context as + <c>mnesia:async_dirty/1,2</c>. The difference is that the + operations are performed synchronously. The caller waits for + the updates to be performed on all active replicas before + the <c>Fun</c> returns. See <c>mnesia:activity/4</c> and the + Mnesia User's Guide for more details.</p> + </desc> + </func> + <func> + <name>sync_transaction(Fun, [[, Args], Retries]) -> {aborted, Reason} | {atomic, ResultOfFun} </name> + <fsummary>Synchronously execute a transaction.</fsummary> + <desc> + <p>This function waits until data have been committed and + logged to disk (if disk is used) on every involved node before + it returns, otherwise it behaves as + <c>mnesia:transaction/[1,2,3]</c>.</p> + <p>This functionality can be used to avoid that one process may overload + a database on another node.</p> + </desc> + </func> + <func> + <name>system_info(InfoKey) -> Info | exit({aborted, Reason})</name> + <fsummary>Return information about the Mnesia system</fsummary> + <desc> + <p>Returns information about the Mnesia system, such as + transaction statistics, db_nodes, and configuration parameters. + Valid keys are:</p> + <list type="bulleted"> + <item> + <p><c>all</c>. This argument returns a list of all + local system information. Each element is a + <c>{InfoKey, InfoVal}</c> tuples.<em>Note:</em> New <c>InfoKey</c>'s may + be added and old undocumented <c>InfoKey</c>'s may be removed without + notice.</p> + </item> + <item> + <p><c>access_module</c>. This argument returns the name of + the module which is configured to be the activity access + callback module. + </p> + </item> + <item> + <p><c>auto_repair</c>. This argument returns + <c>true</c> or <c>false</c> to indicate if Mnesia is + configured to invoke the auto repair facility on corrupted + disc files. + </p> + </item> + <item> + <p><c>backup_module</c>. This argument returns the name of + the module which is configured to be the backup + callback module. + </p> + </item> + <item> + <p><c>checkpoints</c>. This argument + returns a list of the names of the + checkpoints currently active on this node. + </p> + </item> + <item> + <p><c>event_module</c>. This argument returns the name of + the module which is the event handler callback module. + </p> + </item> + <item> + <p><c>db_nodes</c>. This argument returns + the nodes which make up the persistent database. Disc + less nodes will only be included in the list of nodes if + they explicitly has been added to the schema, e.g. with + <c>mnesia:add_table_copy/3</c>. The function can be + invoked even if Mnesia is not yet running. + </p> + </item> + <item> + <p><c>debug</c>. This argument returns the current + debug level of Mnesia. + </p> + </item> + <item> + <p><c>directory</c>. This argument returns the name of + the Mnesia directory. It can be invoked even if Mnesia is + not yet running. + </p> + </item> + <item> + <p><c>dump_log_load_regulation</c>. This argument + returns a boolean which tells whether Mnesia is + configured to load regulate the dumper process or not. + This feature is temporary and will disappear in future + releases. + </p> + </item> + <item> + <p><c>dump_log_time_threshold</c>. This argument + returns the time threshold for transaction log dumps in + milliseconds. + </p> + </item> + <item> + <p><c>dump_log_update_in_place</c>. This argument + returns a boolean which tells whether Mnesia is + configured to perform the updates in the dets files + directly or if the updates should be performed in a copy + of the dets files. + </p> + </item> + <item> + <p><c>dump_log_write_threshold</c>. This argument + returns the write threshold for transaction log dumps as + the number of writes to the transaction log. + </p> + </item> + <item> + <p><c>extra_db_nodes</c>. This argument returns a list + of extra db_nodes to be contacted at start-up. + </p> + </item> + <item> + <p><c>fallback_activated</c>. This argument returns + true if a fallback is activated, otherwise false. + </p> + </item> + <item> + <p><c>held_locks</c>. This argument returns a list of + all locks held by the local Mnesia lock manager. + </p> + </item> + <item> + <p><c>is_running</c>. This argument returns <c>yes</c> + or <c>no</c> to indicate if Mnesia is running. It may + also return <c>starting</c> or <c>stopping</c>. Can be + invoked even if Mnesia is not yet running. + </p> + </item> + <item> + <p><c>local_tables</c>. This argument returns a list + of all tables which are configured to reside locally. + </p> + </item> + <item> + <p><c>lock_queue</c>. This argument returns a list of + all transactions that are queued for execution by the + local lock manager. + </p> + </item> + <item> + <p><c>log_version</c>. This argument returns the + version number of the Mnesia transaction log format. + </p> + </item> + <item> + <p><c>master_node_tables</c>. This argument returns a + list of all tables with at least one master node. + </p> + </item> + <item> + <p><c>protocol_version</c>. This argument + returns the version number + of the Mnesia inter-process communication protocol. + </p> + </item> + <item> + <p><c>running_db_nodes</c>. This argument returns a + list of nodes where Mnesia currently is running. This + function can be invoked even if Mnesia is not yet + running, but it will then have slightly different + semantics. If Mnesia is down on the local node, the + function will return those other <c>db_nodes</c> and + <c>extra_db_nodes</c> that for the moment are up and + running. If Mnesia is started, the function will return + those nodes that Mnesia on the local node is fully + connected to. Only those nodes that Mnesia has exchanged + schema information with are included as + <c>running_db_nodes</c>. After the merge of schemas, the + local Mnesia system is fully operable and applications + may perform access of remote replicas. Before the schema + merge Mnesia will only operate locally. Sometimes there + may be more nodes included in the + <c>running_db_nodes</c> list than all <c>db_nodes</c> + and <c>extra_db_nodes</c> together. + </p> + </item> + <item> + <p><c>schema_location</c>. This argument returns the + initial schema location. + </p> + </item> + <item> + <p><c>subscribers</c>. This argument returns a list of + local processes currently subscribing to system events. + </p> + </item> + <item> + <p><c>tables</c>. This argument returns a list of all + locally known tables. + </p> + </item> + <item> + <p><c>transactions</c>. This argument returns a list + of all currently active local transactions. + </p> + </item> + <item> + <p><c>transaction_failures</c>. This argument returns + a number which indicates how many transactions have + failed since Mnesia was started. + </p> + </item> + <item> + <p><c>transaction_commits</c>. This argument returns a + number which indicates how many transactions have + terminated successfully since Mnesia was started. + </p> + </item> + <item> + <p><c>transaction_restarts</c>. This argument returns + a number which indicates how many transactions have been + restarted since Mnesia was started. + </p> + </item> + <item> + <p><c>transaction_log_writes</c>. This argument + returns a number which indicates the number of write + operation that have been performed to the transaction + log since start-up. + </p> + </item> + <item> + <p><c>use_dir</c>. This argument returns a boolean + which indicates whether the Mnesia directory is used or + not. Can be invoked even if Mnesia is not yet running. + </p> + </item> + <item> + <p><c>version</c>. This argument returns the current + version number of Mnesia. + </p> + </item> + </list> + </desc> + </func> + <func> + <name>table(Tab [,[Option]]) -> QueryHandle </name> + <fsummary>Return a QLC query handle.</fsummary> + <desc> + <p> <marker id="qlc_table"></marker> +Returns a QLC (Query List Comprehension) query handle, see + <seealso marker="stdlib:qlc">qlc(3)</seealso>.The module <c>qlc</c> implements a query language, it + can use mnesia tables as sources of data. Calling + <c>mnesia:table/1,2</c> is the means to make the <c>mnesia</c> + table <c>Tab</c> usable to QLC.</p> + <p>The list of Options may contain mnesia options or QLC + options, the following options are recognized by Mnesia: + <c>{traverse, SelectMethod},{lock, Lock},{n_objects,Number}</c>, any other option is forwarded + to QLC. The <c>lock</c> option may be <c>read</c> or + <c>write</c>, default is <c>read</c>. The option + <c>n_objects</c> specifies (roughly) the number of objects + returned from mnesia to QLC. Queries to remote tables may + need a larger chunks to reduce network overhead, default + <c>100</c> objects at a time are returned. The option + <c>traverse</c> determines the method to traverse the whole + table (if needed), the default method is <c>select</c>:</p> + <list type="bulleted"> + <item> + <p><c>select</c>. The table is traversed by calling + <c>mnesia:select/4</c> and <c>mnesia:select/1</c>. The + match specification (the second argument of <c>select/3</c>) + is assembled by QLC: simple filters are + translated into equivalent match specifications while + more complicated filters have to be applied to all + objects returned by <c>select/3</c> given a match + specification that matches all objects.</p> + </item> + <item> + <p><c>{select, MatchSpec}</c>. As for <c>select</c> + the table is traversed by calling <c>mnesia:select/3</c> and + <c>mnesia:select/1</c>. The difference is that the match + specification is explicitly given. This is how to state + match specifications that cannot easily be expressed + within the syntax provided by QLC.</p> + </item> + </list> + </desc> + </func> + <func> + <name>table_info(Tab, InfoKey) -> Info | exit({aborted, Reason})</name> + <fsummary>Return local information about table.</fsummary> + <desc> + <p>The <c>table_info/2</c> function takes two arguments. + The first is the name of a Mnesia table, the second is one of + the following keys: + </p> + <list type="bulleted"> + <item> + <p><c>all</c>. This argument returns a list of all + local table information. Each element is a <c>{InfoKey, ItemVal}</c> tuples. <em>Note:</em> New <c>InfoItem</c>'s may be + added and old undocumented <c>InfoItem</c>'s may be removed without + notice.</p> + </item> + <item> + <p><c>access_mode</c>. This argument returns the + access mode of the table. The access mode may either be + read_only or read_write. + </p> + </item> + <item> + <p><c>arity</c>. This argument returns the arity of + records in the table as specified in the schema. + </p> + </item> + <item> + <p><c>attributes</c>. This argument returns the table + attribute names which are specified in the schema. + </p> + </item> + <item> + <p><c>checkpoints</c>. This argument returns the names + of the currently active checkpoints which involves this + table on this node. + </p> + </item> + <item> + <p><c>cookie</c>. This argument returns a table cookie + which is a unique system generated identifier for the + table. The cookie is used internally to ensure that two + different table definitions using the same table name + cannot accidentally be intermixed. The cookie is + generated when the table is initially created. + </p> + </item> + <item> + <p><c>disc_copies</c>. This argument returns the nodes + where a disc_copy of the table resides according to the + schema. + </p> + </item> + <item> + <p><c>disc_only_copies </c>. This argument returns the + nodes where a disc_only_copy of the table resides + according to the schema. + </p> + </item> + <item> + <p><c>index</c>. This argument returns the list of + index position integers for the table. + </p> + </item> + <item> + <p><c>load_node</c>. This argument returns the name of + the node that Mnesia loaded the table from. The + structure of the returned value is unspecified but may + be useful for debugging purposes. + </p> + </item> + <item> + <p><c>load_order</c>. This argument returns the load + order priority of the table. It is an integer and + defaults to <c>0</c> (zero). + </p> + </item> + <item> + <p><c>load_reason</c>. This argument returns the + reason of why Mnesia decided to load the table. The + structure of the returned value is unspecified but may + be useful for debugging purposes. + </p> + </item> + <item> + <p><c>local_content</c>. This argument returns + <c>true</c> or <c>false</c> to indicate whether the + table is configured to have locally unique content on + each node. + </p> + </item> + <item> + <p><c>master_nodes</c>. This argument returns the + master nodes of a table. + </p> + </item> + <item> + <p><c>memory</c>. This argument returns the number of + words allocated to the table on this node. + </p> + </item> + <item> + <p><c>ram_copies</c>. This argument returns the nodes + where a ram_copy of the table resides according to the + schema. + </p> + </item> + <item> + <p><c>record_name</c>. This argument returns the + record name, common for all records in the table + </p> + </item> + <item> + <p><c>size</c>. This argument returns the number of + records inserted in the table. + </p> + </item> + <item> + <p><c>snmp</c>. This argument returns the SNMP struct. + <c>[]</c>meaning that the table currently has no SNMP + properties. + </p> + </item> + <item> + <p><c>storage_type</c>.This argument returns the local + storage type of the table. It can be <c>disc_copies</c>, + <c>ram_copies</c>, <c>disc_only_copies</c>, or the atom + <c>unknown</c>. <c>unknown</c> is returned for all + tables which only reside remotely. + </p> + </item> + <item> + <p><c>subscribers</c>. This argument returns a list + of local processes currently subscribing to local table + events which involve this table on this node. + </p> + </item> + <item> + <p><c>type</c>. This argument returns the table type, + which is either <c>bag</c>, <c>set</c> or <c>ordered_set</c>.. + </p> + </item> + <item> + <p><c>user_properties</c>. This argument returns the + user associated table properties of the table. It is a + list of the stored property records. + </p> + </item> + <item> + <p><c>version</c>. This argument returns the current + version of the table definition. The table version is + incremented when the table definition is changed. The + table definition may be incremented directly when the + table definition has been changed in a schema + transaction, or when a committed table definition is + merged with table definitions from other nodes during + start-up. + </p> + </item> + <item> + <p><c>where_to_read</c>.This argument returns the node + where the table can be read. If the value <c>nowhere</c> + is returned, the table is not loaded, or it resides at a + remote node which is not running. + </p> + </item> + <item> + <p><c>where_to_write</c>. This argument returns a list + of the nodes that currently hold an active replica of + the table. + </p> + </item> + <item> + <p><c>wild_pattern</c>. This argument returns a + structure which can be given to the various match + functions for a certain table. A record tuple is where all + record fields have the value <c>'_'</c>. + </p> + </item> + </list> + </desc> + </func> + <func> + <name>transaction(Fun [[, Args], Retries]) -> {aborted, Reason} | {atomic, ResultOfFun}</name> + <fsummary>Execute a transaction.</fsummary> + <desc> + <p>This function executes the functional object <c>Fun</c> + with arguments <c>Args</c> as a transaction. + </p> + <p>The code which executes inside the transaction + can consist of a series of table manipulation functions. + If something goes wrong inside the transaction as a result of a + user error or a certain table not being available, the + entire transaction is aborted and the function + <c>transaction/1</c> returns the tuple + <c>{aborted, Reason}</c>. + </p> + <p>If all is well, <c>{atomic, ResultOfFun}</c> is returned where + <c>ResultOfFun</c> is the value of the last expression in + <c>Fun</c>. + </p> + <p>A function which adds a family to the database can be + written as follows if we have a structure <c>{family, Father, Mother, ChildrenList}</c>: + </p> + <code type="none"> +add_family({family, F, M, Children}) -> + ChildOids = lists:map(fun oid/1, Children), + Trans = fun() -> + mnesia:write(F#person{children = ChildOids}, + mnesia:write(M#person{children = ChildOids}, + Write = fun(Child) -> mnesia:write(Child) end, + lists:foreach(Write, Children) + end, + mnesia:transaction(Trans). + +oid(Rec) -> {element(1, Rec), element(2, Rec)}. + </code> + <p>This code adds a set of people to the database. Running this code + within one transaction will ensure that either the whole + family is added to the database, or the whole transaction + aborts. For example, if the last child is badly formatted, + or the executing process terminates due to an + <c>'EXIT'</c> signal while executing the family code, the + transaction aborts. Accordingly, the situation where half a + family is added can never occur. + </p> + <p>It is also useful to update the database within a transaction + if several processes concurrently update the same records. + For example, the function <c>raise(Name, Amount)</c>, which + adds <c>Amount</c> to the salary field of a person, should + be implemented as follows: + </p> + <code type="none"> +raise(Name, Amount) -> + mnesia:transaction(fun() -> + case mnesia:wread({person, Name}) of + [P] -> + Salary = Amount + P#person.salary, + P2 = P#person{salary = Salary}, + mnesia:write(P2); + _ -> + mnesia:abort("No such person") + end + end). + </code> + <p>When this function executes within a transaction, + several processes running on different nodes can concurrently + execute the <c>raise/2</c> function without interfering + with each other. + </p> + <p>Since Mnesia detects deadlocks, a transaction can be + restarted any number of times. This function will attempt a restart as specified in + <c>Retries</c>. <c>Retries</c> must + be an integer greater than 0 or the atom <c>infinity</c>. Default is + <c>infinity</c>.</p> + </desc> + </func> + <func> + <name>transform_table(Tab, Fun, NewAttributeList, NewRecordName) -> {aborted, R} | {atomic, ok} </name> + <fsummary>Change format on all records in table. <c>Tab</c></fsummary> + <desc> + <p>This function applies the argument <c>Fun</c> to all + records in the table. <c>Fun</c> is a function which takes a + record of the old type and returns a transformed record of the + new type. The <c>Fun</c> argument can also be the atom + <c>ignore</c>, it indicates that only the meta data about the table will + be updated. Usage of <c>ignore</c> is not recommended but included + as a possibility for the user do to his own transform. + <c>NewAttributeList</c> and <c>NewRecordName</c> + specifies the attributes and the new record type of converted + table. Table name will always remain unchanged, if the + record_name is changed only the mnesia functions which + uses table identifiers will work, e.g. <c>mnesia:write/3</c> + will work but <c>mnesia:write/1</c> will not.</p> + </desc> + </func> + <func> + <name>transform_table(Tab, Fun, NewAttributeList) -> {aborted, R} | {atomic, ok} </name> + <fsummary>Change format on all records in table. <c>Tab</c></fsummary> + <desc> + <p>Invokes <c>mnesia:transform_table(Tab, Fun, NewAttributeList, RecName)</c> + where <c>RecName</c> is <c>mnesia:table_info(Tab, record_name)</c>.</p> + </desc> + </func> + <func> + <name>traverse_backup(Source, [SourceMod,] Target, [TargetMod,] Fun, Acc) -> {ok, LastAcc} | {error, Reason}</name> + <fsummary>Traversal of a backup.</fsummary> + <desc> + <p>With this function it is possible to iterate over a backup, + either for the purpose of transforming it into a new backup, + or just reading it. The arguments are explained briefly + below. See the Mnesia User's Guide for additional + details. + </p> + <list type="bulleted"> + <item><c>SourceMod</c> and <c>TargetMod</c> are the names of + the modules which actually access the backup + media. + </item> + <item><c>Source</c> and <c>Target</c> are opaque data used + exclusively by the modules <c>SourceMod</c> and + <c>TargetMod</c> for the purpose of initializing the + backup media. + </item> + <item><c>Acc</c> is an initial accumulator value. + </item> + <item><c>Fun(BackupItems, Acc)</c> is applied to each item in + the backup. The Fun must return a tuple + <c>{BackupItems,NewAcc}</c>, where <c>BackupItems</c> is + a list of valid backup items, and <c>NewAcc</c> is a new + accumulator value. The returned backup items are written + in the target backup. + </item> + <item><c>LastAcc</c> is the last accumulator value. This is + the last <c>NewAcc</c> value that was returned by <c>Fun</c>. + </item> + </list> + </desc> + </func> + <func> + <name>uninstall_fallback() -> ok | {error,Reason}</name> + <fsummary>Uninstall a fallback.</fsummary> + <desc> + <p>Invokes <c>mnesia:uninstall_fallback([{scope, global}])</c>.</p> + </desc> + </func> + <func> + <name>uninstall_fallback(Args) -> ok | {error,Reason}</name> + <fsummary>Uninstall a fallback.</fsummary> + <desc> + <p>This function is used to de-install a fallback before it + has been used to restore the database. This is normally a + distributed operation that is either performed on all + nodes with disc resident schema or none. Uninstallation of + fallbacks requires Erlang to be up and running on all + involved nodes, but it does not matter if Mnesia is running + or not. Which nodes that are considered as disc-resident + nodes is determined from the schema info in the local + fallback. + </p> + <p><c>Args</c> is a list of the following tuples: + </p> + <list type="bulleted"> + <item> + <p><c>{module, BackupMod}</c>. + See <c>mnesia:install_fallback/2</c> about the + semantics.</p> + </item> + <item> + <p><c>{scope, Scope}</c> + See <c>mnesia:install_fallback/2</c> about the + semantics.</p> + </item> + <item> + <p><c>{mnesia_dir, AlternateDir}</c> + See <c>mnesia:install_fallback/2</c> about the + semantics.</p> + </item> + </list> + </desc> + </func> + <func> + <name>unsubscribe(EventCategory)</name> + <fsummary>Subscribe to events of type <c>EventCategory</c>.</fsummary> + <desc> + <p>Stops sending events of type + <c>EventCategory</c> to the caller.</p> + </desc> + </func> + <func> + <name>wait_for_tables(TabList,Timeout) -> ok | {timeout, BadTabList} | {error, Reason} </name> + <fsummary>Wait for tables to be accessible.</fsummary> + <desc> + <p>Some applications need to wait for certain tables to + be accessible in order to do useful work. + <c>mnesia:wait_for_tables/2</c> hangs until all tables in the + <c>TabList</c> are accessible, or until <c>timeout</c> is + reached.</p> + </desc> + </func> + <func> + <name>wread({Tab, Key}) -> transaction abort | RecordList </name> + <fsummary>Read records with given key.</fsummary> + <desc> + <p>Invoke <c>mnesia:read(Tab, Key, write)</c>.</p> + </desc> + </func> + <func> + <name>write(Record) -> transaction abort | ok </name> + <fsummary>Writes a record into the database.</fsummary> + <desc> + <p>Invoke <c>mnesia:write(Tab, Record, write)</c> where + <c>Tab</c> is <c>element(1, Record)</c>.</p> + </desc> + </func> + <func> + <name>write(Tab, Record, LockKind) -> transaction abort | ok </name> + <fsummary>Write an record into the database.</fsummary> + <desc> + <p>Writes the record <c>Record</c> to the table <c>Tab</c>. + </p> + <p>The function returns <c>ok</c>, or aborts if an error + occurs. For example, the transaction aborts if no + <c>person</c> table exists. + </p> + <p>The semantics of this function is context sensitive. See + <c>mnesia:activity/4</c> for more information. In transaction + context it acquires a lock of type <c>LockKind</c>. The + following lock types are supported: <c>write</c> and + <c>sticky_write</c>.</p> + </desc> + </func> + <func> + <name>write_lock_table(Tab) -> ok | transaction abort</name> + <fsummary>Set write lock on an entire table.</fsummary> + <desc> + <p>Invokes <c>mnesia:lock({table, Tab}, write)</c>.</p> + </desc> + </func> + </funcs> + + <section> + <title>Configuration Parameters</title> + <p>Mnesia reads the following application configuration + parameters:</p> + <list type="bulleted"> + <item> + <p><c>-mnesia access_module Module</c>. The + name of the Mnesia activity access callback module. The default is + <c>mnesia</c>. + </p> + </item> + <item> + <p><c>-mnesia auto_repair true | false</c>. This flag controls + whether Mnesia will try to automatically repair + files that have not been properly closed. The default is + <c>true</c>. + </p> + </item> + <item> + <p><c>-mnesia backup_module Module</c>. The + name of the Mnesia backup callback module. The default is + <c>mnesia_backup</c>. + </p> + </item> + <item> + <p><c>-mnesia debug Level</c> + Controls the debug level of Mnesia. + Possible values are:</p> + <taglist> + <tag><c>none</c></tag> + <item> + <p>No trace outputs at all. This is the default setting. + </p> + </item> + <tag><c>verbose</c></tag> + <item> + <p>Activates tracing of important debug events. These + debug events generate <c>{mnesia_info, Format, Args}</c> + system events. Processes may subscribe to these events with + <c>mnesia:subscribe/1</c>. The events are always sent to Mnesia's + event handler. + </p> + </item> + <tag><c>debug</c></tag> + <item> + <p>Activates all events at the verbose level plus full + trace of all debug events. These debug events generate + <c>{mnesia_info, Format, Args}</c> system events. Processes may + subscribe to these events with <c>mnesia:subscribe/1</c>. The + events are always sent to the Mnesia event handler. On this + debug level, the Mnesia event handler starts subscribing to + updates in the schema table. + </p> + </item> + <tag><c>trace</c></tag> + <item> + <p>Activates all events at the level debug. On this + debug level, the Mnesia event handler starts subscribing to + updates on all Mnesia tables. This level is only intended + for debugging small toy systems since many large + events may be generated. + </p> + </item> + <tag><c>false</c></tag> + <item> + <p>An alias for none. + </p> + </item> + <tag><c>true</c></tag> + <item> + <p>An alias for debug. + </p> + </item> + </taglist> + </item> + <item> + <p><c>-mnesia core_dir Directory</c>. The name of the + directory where Mnesia core files is stored or + false. Setting it implies that also ram only nodes, will + generate a core file if a crash occurs. </p> + </item> + <item> + <p><c>-mnesia dc_dump_limit Number</c>. + Controls how often <c>disc_copies</c> tables are dumped from memory. + Tables are dumped when + <c>filesize(Log) > (filesize(Tab)/Dc_dump_limit)</c>. + Lower values reduces cpu overhead but increases disk space and + startup times. The default is 4.</p> + </item> + <item> + <p><c>-mnesia dir Directory</c>. The name of the directory + where all Mnesia data is stored. The name of the directory must + be unique for the current node. Two nodes may, under no + circumstances, share the same Mnesia directory. The results are + totally unpredictable.</p> + </item> + <item> + <p><c>-mnesia dump_log_load_regulation true | false</c>. + Controls if the log dumps should be performed as fast as + possible or if the dumper should do its own load + regulation. This feature is temporary and will disappear in a + future release. The default is <c>false</c>. + </p> + </item> + <item> + <p><c>-mnesia dump_log_update_in_place true | false</c>. + Controls if log dumps are performed on a copy of + the original data file, or if the log dump is + performed on the original data file. The default is <c>true</c></p> + </item> + <item> + <p><c>-mnesia dump_log_write_threshold Max</c>, where + <c>Max</c> is an integer which specifies the maximum number of writes + allowed to the transaction log before a new dump of the log + is performed. It defaults to 100 log writes. + </p> + </item> + <item> + <p><c>-mnesia dump_log_time_threshold Max</c>, + where <c>Max</c> is an integer which + specifies the dump log interval in milliseconds. It defaults + to 3 minutes. If a dump has not been performed within + <c>dump_log_time_threshold</c> milliseconds, then a new dump is + performed regardless of how many writes have been + performed. + </p> + </item> + <item> + <p><c>-mnesia event_module Module</c>. The + name of the Mnesia event handler callback module. The default is + <c>mnesia_event</c>. + </p> + </item> + <item> + <p><c>-mnesia extra_db_nodes Nodes</c> specifies a list of + nodes, in addition to the ones found in the schema, with which + Mnesia should also establish contact. The default value + is the empty list <c>[]</c>. + </p> + </item> + <item> + <p><c>-mnesia fallback_error_function {UserModule, UserFunc}</c> + specifies a user supplied callback function + which will be called if a fallback is installed and mnesia + goes down on another node. Mnesia will call the function + with one argument the name of the dying node, e.g. + <c>UserModule:UserFunc(DyingNode)</c>. + Mnesia should be restarted or else + the database could be inconsistent. + The default behaviour is to terminate mnesia. + </p> + </item> + <item> + <p><c>-mnesia max_wait_for_decision Timeout</c>. Specifies + how long Mnesia will wait for other nodes to share their + knowledge regarding the outcome of an unclear transaction. By + default the <c>Timeout</c> is set to the atom + <c>infinity</c>, which implies that if Mnesia upon startup + encounters a "heavyweight transaction" whose outcome is + unclear, the local Mnesia will wait until Mnesia is started + on some (in worst cases all) of the other nodes that were + involved in the interrupted transaction. This is a very rare + situation, but when/if it happens, Mnesia does not guess if + the transaction on the other nodes was committed or aborted. + Mnesia will wait until it knows the outcome and then act + accordingly. + </p> + <p>If <c>Timeout</c> is set to an integer value in + milliseconds, Mnesia will force "heavyweight transactions" + to be finished, even if the outcome of the transaction for + the moment is unclear. After <c>Timeout</c> milliseconds, + Mnesia will commit/abort the transaction and continue with + the startup. This may lead to a situation where the + transaction is committed on some nodes and aborted on other + nodes. If the transaction was a schema transaction, the + inconsistency may be fatal. + </p> + </item> + <item> + <p><c>-mnesia no_table_loaders NUMBER</c> specifies the number of + parallel table loaders during start. More loaders can be good if the + network latency is high or if many tables contains few records. + The default value is <c>2</c>. + </p> + </item> + <item> + <p><c>-mnesia schema_location Loc</c> controls where + Mnesia will look for its schema. The parameter + <c>Loc</c> may be one of the following atoms: </p> + <taglist> + <tag><c>disc</c></tag> + <item> + <p>Mandatory disc. The schema is assumed to be located + in the Mnesia directory. If the schema cannot be found, + Mnesia refuses to start. This is the old behavior. + </p> + </item> + <tag><c>ram</c></tag> + <item> + <p>Mandatory RAM. The schema resides in RAM + only. At start-up, a tiny new schema is generated. This + default schema just contains the definition of the schema + table and only resides on the local node. Since no other + nodes are found in the default schema, the configuration + parameter <c>extra_db_nodes</c> must be used in + order to let the + node share its table definitions with other nodes. (The + <c>extra_db_nodes</c> parameter may also be used on disc based nodes.) + </p> + </item> + <tag><c>opt_disc</c></tag> + <item> + <p>Optional disc. The schema may reside either on disc + or in RAM. If the schema is found on disc, Mnesia starts as a + disc based node and the storage type of the schema table is + <c>disc_copies</c>. If no schema is found on disc, Mnesia starts + as a disc-less node and the storage type of the schema table is + <c>ram_copies</c>. The default value for the application parameter + is <c>opt_disc</c>. + </p> + </item> + </taglist> + </item> + </list> + <p>First the SASL application parameters are checked, then + the command line flags are checked, and finally, the default + value is chosen. + </p> + </section> + + <section> + <title>See Also</title> + <p>mnesia_registry(3), mnesia_session(3), qlc(3), + dets(3), ets(3), disk_log(3), application(3) + </p> + </section> + +</erlref> + diff --git a/lib/mnesia/doc/src/mnesia_frag_hash.xml b/lib/mnesia/doc/src/mnesia_frag_hash.xml new file mode 100644 index 0000000000..ca03327994 --- /dev/null +++ b/lib/mnesia/doc/src/mnesia_frag_hash.xml @@ -0,0 +1,166 @@ +<?xml version="1.0" encoding="latin1" ?> +<!DOCTYPE erlref SYSTEM "erlref.dtd"> + +<erlref> + <header> + <copyright> + <year>2002</year> + <year>2007</year> + <holder>Ericsson AB, All Rights Reserved</holder> + </copyright> + <legalnotice> + The contents of this file are subject to the Erlang Public License, + Version 1.1, (the "License"); you may not use this file except in + compliance with the License. You should have received a copy of the + Erlang Public License along with this software. If not, it can be + retrieved online at http://www.erlang.org/. + + Software distributed under the License is distributed on an "AS IS" + basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + the License for the specific language governing rights and limitations + under the License. + + The Initial Developer of the Original Code is Ericsson AB. + </legalnotice> + + <title>mnesia_frag_hash</title> + <prepared>Håkan Mattsson</prepared> + <responsible></responsible> + <docno></docno> + <approved></approved> + <checked></checked> + <date>2002-09-10</date> + <rev>A</rev> + <file>mnesia_frag_hash.sgml</file> + </header> + <module>mnesia_frag_hash</module> + <modulesummary>Defines mnesia_frag_hash callback behaviour</modulesummary> + <description> + <p>The module <c>mnesia_frag_hash</c> defines a callback + behaviour for user defined hash functions of fragmented tables.</p> + <p>Which module that is selected to implement the <c>mnesia_frag_hash</c> + behaviour for a particular fragmented table is specified together + with the other <c>frag_properties</c>. The <c>hash_module</c> defines + the module name. The <c>hash_state</c> defines the initial hash state.</p> + <p>It implements dynamic hashing which is a kind of hashing + that grows nicely when new fragments are added. It is well + suited for scalable hash tables</p> + </description> + <funcs> + <func> + <name>init_state(Tab, State) -> NewState | abort(Reason)</name> + <fsummary>Initiate the hash state for a new table</fsummary> + <type> + <v>Tab = atom()</v> + <v>State = term()</v> + <v>NewState = term()</v> + <v>Reason = term()</v> + </type> + <desc> + <p>This function is invoked when a fragmented table is + created with <c>mnesia:create_table/2</c> or when a + normal (un-fragmented) table is converted to be a + fragmented table with <c>mnesia:change_table_frag/2</c>.</p> + <p>Note that the <c>add_frag/2</c> function will be invoked + one time each for the rest of the fragments (all but number 1) + as a part of the table creation procedure.</p> + <p><c>State</c> is the initial value of the <c>hash_state</c><c>frag_property</c>. The <c>NewState</c> will be stored as + <c>hash_state</c> among the other <c>frag_properties</c>. + </p> + </desc> + </func> + <func> + <name>add_frag(State) -> {NewState, IterFrags, AdditionalLockFrags} | abort(Reason)</name> + <fsummary>This function is invoked when a new fragment is added to a fragmented table</fsummary> + <type> + <v>State = term()</v> + <v>NewState = term()</v> + <v>IterFrags = [integer()]</v> + <v>AdditionalLockFrags = [integer()]</v> + <v>Reason = term()</v> + </type> + <desc> + <p>In order to scale well, it is a good idea ensure that the + records are evenly distributed over all fragments including + the new one.</p> + <p>The <c>NewState</c> will be stored as <c>hash_state</c> among the + other <c>frag_properties</c>. + </p> + <p>As a part of the <c>add_frag</c> procedure, Mnesia will iterate + over all fragments corresponding to the <c>IterFrags</c> numbers + and invoke <c>key_to_frag_number(NewState,RecordKey)</c> for + each record. If the new fragment differs from the old + fragment, the record will be moved to the new fragment.</p> + <p>As the <c>add_frag</c> procedure is a part of a schema + transaction Mnesia will acquire a write locks on the + affected tables. That is both the fragments corresponding + to <c>IterFrags</c> and those corresponding to + <c>AdditionalLockFrags</c>.</p> + </desc> + </func> + <func> + <name>del_frag(State) -> {NewState, IterFrags, AdditionalLockFrags} | abort(Reason)</name> + <fsummary>This function is invoked when a fragment is deleted from a fragmented table</fsummary> + <type> + <v>State = term()</v> + <v>NewState = term()</v> + <v>IterFrags = [integer()]</v> + <v>AdditionalLockFrags = [integer()]</v> + <v>Reason = term()</v> + </type> + <desc> + <p>The <c>NewState</c> will be stored as <c>hash_state</c> among the + other <c>frag_properties</c>. + </p> + <p>As a part of the <c>del_frag</c> procedure, Mnesia will iterate + over all fragments corresponding to the <c>IterFrags</c> numbers + and invoke <c>key_to_frag_number(NewState,RecordKey)</c> for + each record. If the new fragment differs from the old + fragment, the record will be moved to the new fragment.</p> + <p>Note that all records in the last fragment must be moved to + another fragment as the entire fragment will be deleted.</p> + <p>As the <c>del_frag</c> procedure is a part of a schema + transaction Mnesia will acquire a write locks on the + affected tables. That is both the fragments corresponding + to <c>IterFrags</c> and those corresponding to + <c>AdditionalLockFrags</c>.</p> + </desc> + </func> + <func> + <name>key_to_frag_number(State, Key) -> FragNum | abort(Reason)</name> + <fsummary>Resolves the key of a record into a fragment number</fsummary> + <type> + <v>FragNum = integer()()</v> + <v>Reason = term()</v> + </type> + <desc> + <p>This function is invoked whenever Mnesia needs to determine + which fragment a certain record belongs to. It is typically + invoked at read, write and delete.</p> + </desc> + </func> + <func> + <name>match_spec_to_frag_numbers(State, MatchSpec) -> FragNums | abort(Reason)</name> + <fsummary>Resolves a MatchSpec into a list of fragment numbers</fsummary> + <type> + <v>MatcSpec = ets_select_match_spec()</v> + <v>FragNums = [FragNum]</v> + <v>FragNum = integer()</v> + <v>Reason = term()</v> + </type> + <desc> + <p>This function is invoked whenever Mnesia needs to determine + which fragments that needs to be searched for a MatchSpec. + It is typically invoked at select and match_object.</p> + </desc> + </func> + </funcs> + + <section> + <title>See Also</title> + <p>mnesia(3) + </p> + </section> + +</erlref> + diff --git a/lib/mnesia/doc/src/mnesia_registry.xml b/lib/mnesia/doc/src/mnesia_registry.xml new file mode 100644 index 0000000000..966134d508 --- /dev/null +++ b/lib/mnesia/doc/src/mnesia_registry.xml @@ -0,0 +1,104 @@ +<?xml version="1.0" encoding="latin1" ?> +<!DOCTYPE erlref SYSTEM "erlref.dtd"> + +<erlref> + <header> + <copyright> + <year>1998</year> + <year>2007</year> + <holder>Ericsson AB, All Rights Reserved</holder> + </copyright> + <legalnotice> + The contents of this file are subject to the Erlang Public License, + Version 1.1, (the "License"); you may not use this file except in + compliance with the License. You should have received a copy of the + Erlang Public License along with this software. If not, it can be + retrieved online at http://www.erlang.org/. + + Software distributed under the License is distributed on an "AS IS" + basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + the License for the specific language governing rights and limitations + under the License. + + The Initial Developer of the Original Code is Ericsson AB. + </legalnotice> + + <title>mnesia_registry</title> + <prepared>Dan Gudmundsson and Håkan Mattsson</prepared> + <responsible></responsible> + <docno></docno> + <approved></approved> + <checked></checked> + <date>98-04-24</date> + <rev>A</rev> + <file>mnesia_registry.sgml</file> + </header> + <module>mnesia_registry</module> + <modulesummary>Dump support for registries in erl_interface. </modulesummary> + <description> + <p>The module <c>mnesia_registry</c> is usually part of + <c>erl_interface</c>, but for the time being, it is a part of the + Mnesia application. + </p> + <p><c>mnesia_registry</c> is mainly an module intended for + internal usage within OTP, but it has two functions that + are exported for public use. + </p> + <p>On C-nodes <c>erl_interface</c> has support for registry + tables. These reside in RAM on the C-node but they may also be + dumped into Mnesia tables. By default, the dumping of registry + tables via <c>erl_interface</c> causes a corresponding Mnesia + table to be created with <c>mnesia_registry:create_table/1</c> + if necessary. + </p> + <p>The tables that are created with these functions can be + administered as all other Mnesia tables. They may be included in + backups or replicas may be added etc. The tables are in fact + normal Mnesia tables owned by the user of the corresponding + <c>erl_interface</c> registries. + </p> + </description> + <funcs> + <func> + <name>create_table(Tab) -> ok | exit(Reason)</name> + <fsummary>Creates a registry table in Mnesia.</fsummary> + <desc> + <p>This is a wrapper function for + <c>mnesia:create_table/2</c> which creates a table (if there is no existing table) + with an appropriate set of <c>attributes</c>. The table will + only reside on the local node and its storage type will be + the same as the <c>schema</c> table on the local + node, ie. <c>{ram_copies,[node()]}</c> or + <c>{disc_copies,[node()]}</c>. + </p> + <p>It is this function that is used by <c>erl_interface</c> to + create the Mnesia table if it did not already exist.</p> + </desc> + </func> + <func> + <name>create_table(Tab, TabDef) -> ok | exit(Reason)</name> + <fsummary>Creates a customized registry table in Mnesia. </fsummary> + <desc> + <p>This is a wrapper function for + <c>mnesia:create_table/2</c> which creates a table (if there is no existing table) + with an appropriate set of <c>attributes</c>. The attributes + and <c>TabDef</c> are forwarded to + <c>mnesia:create_table/2</c>. For example, if the table should + reside as <c>disc_only_copies</c> on all nodes a call would + look like:</p> + <code type="none"> + TabDef = [{{disc_only_copies, node()|nodes()]}], + mnesia_registry:create_table(my_reg, TabDef) + </code> + </desc> + </func> + </funcs> + + <section> + <title>See Also</title> + <p>mnesia(3), erl_interface(3) + </p> + </section> + +</erlref> + diff --git a/lib/mnesia/doc/src/note.gif b/lib/mnesia/doc/src/note.gif Binary files differnew file mode 100644 index 0000000000..6fffe30419 --- /dev/null +++ b/lib/mnesia/doc/src/note.gif diff --git a/lib/mnesia/doc/src/notes.gif b/lib/mnesia/doc/src/notes.gif Binary files differnew file mode 100644 index 0000000000..e000cca26a --- /dev/null +++ b/lib/mnesia/doc/src/notes.gif diff --git a/lib/mnesia/doc/src/notes.xml b/lib/mnesia/doc/src/notes.xml new file mode 100644 index 0000000000..69f2185cd8 --- /dev/null +++ b/lib/mnesia/doc/src/notes.xml @@ -0,0 +1,383 @@ +<?xml version="1.0" encoding="latin1" ?> +<!DOCTYPE chapter SYSTEM "chapter.dtd"> + +<chapter> + <header> + <copyright> + <year>1996</year><year>2009</year> + <holder>Ericsson AB. All Rights Reserved.</holder> + </copyright> + <legalnotice> + The contents of this file are subject to the Erlang Public License, + Version 1.1, (the "License"); you may not use this file except in + compliance with the License. You should have received a copy of the + Erlang Public License along with this software. If not, it can be + retrieved online at http://www.erlang.org/. + + Software distributed under the License is distributed on an "AS IS" + basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + the License for the specific language governing rights and limitations + under the License. + + </legalnotice> + + <title>Mnesia Release Notes</title> + <prepared>Dan Gudmundsson and Håkan Mattsson</prepared> + <responsible></responsible> + <docno></docno> + <approved></approved> + <checked></checked> + <date>05-01-26</date> + <rev>AE</rev> + <file>notes.xml</file> + </header> + <p>This document describes the changes made to the Mnesia system + from version to version. The intention of this document is to + list all incompatibilities as well as all enhancements and + bugfixes for every release of Mnesia. Each release of Mnesia + thus constitutes one section in this document. The title of each + section is the version number of Mnesia.</p> + + <section><title>Mnesia 4.4.12</title> + + <section><title>Improvements and New Features</title> + <list> + <item> + <p> + The documentation is now built with open source tools + (xsltproc and fop) that exists on most platforms. One + visible change is that the frames are removed.</p> + <p> + Own Id: OTP-8250</p> + </item> + </list> + </section> + + </section> + <section><title>Mnesia 4.4.11</title> + + <section><title>Improvements and New Features</title> + <list> + <item> + <p> + Fixed duplicate results with mnesia:index_read() on + ordered_set tables. Reported by Sam Bobroff. </p> + <p> + Fixed locking in mnesia:index_read() which now grabs a read + table lock to ensure correctness, this may slow down the + operation or block other processes trying to reach the + same table. </p> + <p> + Calling mnesia:dump_log() could crash mnesia, + Reported by Igor Ribeiro Sucupira.</p> + <p> Own Id: OTP-8074</p> + </item> + </list> + </section> + + </section> + <section><title>Mnesia 4.4.10</title> + + <section><title>Fixed Bugs and Malfunctions</title> + <list> + <item> + <p> + Mnesia crashed if a qlc query was running inside a + transaction when mnesia stopped at another node. Thanks + Teemu Antti-Poika.</p> + <p> + Own Id: OTP-7968</p> + </item> + <item> + <p> + Mnesia could crash when loading local_content tables.</p> + <p> + Own Id: OTP-8002 Aux Id: seq11277 </p> + </item> + </list> + </section> + + <section><title>Improvements and New Features</title> + <list> + <item> + <p> + Minor (smp) optimizations.</p> + <p> + Own Id: OTP-7928</p> + </item> + </list> + </section> + + </section> + + + <section><title>Mnesia 4.4.9</title> + + <section><title>Fixed Bugs and Malfunctions</title> + <list> + <item> + <p> + mnesia:clear_table/1 crashed instead of returning + <c>{aborted,..}</c> if it was called inside a + transaction.</p> + <p> + Own Id: OTP-7911</p> + </item> + </list> + </section> + + </section> + + <section><title>Mnesia 4.4.8</title> + + <section><title>Fixed Bugs and Malfunctions</title> + <list> + <item> + <p> + With bad timing several api functions could return or + exit with a bad error message when mnesia was shutting + down.</p> + <p> + Own Id: OTP-7753 Aux Id: seq11179 </p> + </item> + <item> + <p> + <c>mnesia:clear_table/1</c> cleared all nodes table + content even if the table was <c>local_content</c> only + type.</p> + <p> + Own Id: OTP-7835</p> + </item> + </list> + </section> + + </section> + + <section><title>Mnesia 4.4.7</title> + + <section><title>Fixed Bugs and Malfunctions</title> + <list> + <item> + <p> + Disallowed match patterns ('_', and '$n') as argument to + <c>mnesia:delete_object/1</c> and friends.</p> + <p> + Own Id: OTP-7524</p> + </item> + </list> + </section> + + + <section><title>Improvements and New Features</title> + <list> + <item> + <p> + Introduced a few new functions in Mnesia: <c>mnesia:read/2</c>, + <c>mnesia:first/3</c>, <c>mnesia:last/3</c>, <c>mnesia:prev/4</c>, + <c>mnesia:next/4</c>, <c>mnesia_frag:first/1</c>, <c>mnesia_frag:last/1</c>, + <c>mnesia_frag:prev/2</c>, <c>mnesia_frag:next/2</c>.</p> + <p> + Own Id: OTP-7625</p> + </item> + </list> + </section> + +</section> + + <section><title>Mnesia 4.4.6</title> + <section><title>Fixed Bugs and Malfunctions</title> + <list> + <item> + <p> + <c>mnesia:restore/2</c> aborted if a <c>EXIT</c> message + appeared in the client message queue.</p> + <p> + Own Id: OTP-7585 Aux Id: seq11046 </p> + </item> + </list> + </section> + </section> + + <section><title>Mnesia 4.4.5</title> + <section><title>Improvements and New Features</title> + <list> + <item> + <p> + mnesia:clear_table/1 does not require that all + replicas of the table are available anymore.</p> + <p> + Own Id: OTP-7466 Aux Id: seq11015</p> + </item> + </list> + </section> + </section> + + <section><title>Mnesia 4.4.4</title> + + <section><title>Fixed Bugs and Malfunctions</title> + <list> + <item> + <p> + Mnesia did not garbage collect transaction decisions on + disk based nodes if no transactions where made on the + local node.</p> + <p> + Own Id: OTP-7419</p> + </item> + </list> + </section> + + </section> + + <section><title>Mnesia 4.4.3</title> + + <section><title>Fixed Bugs and Malfunctions</title> + <list> + <item> + <p> + Table referred to by foreign key did not have node_pool + properly cleaned up when a node was removed from the + schema. Thanks Paul Mineiro.</p> + <p> + Own Id: OTP-7340</p> + </item> + <item> + <p> + Mnesia crashed and generated a core dump if a + schema_transaction was running when mnesia stopped.</p> + <p> + Own Id: OTP-7378 Aux Id: seq10964 </p> + </item> + </list> + </section> + + + <section><title>Improvements and New Features</title> + <list> + <item> + <p> + It is now possible to delete a db node even when other + disk resident nodes are down. Thanks Paul Mineiro.</p> + <p> + Own Id: OTP-7383</p> + </item> + </list> + </section> + +</section> + + <section><title>Mnesia 4.4.2</title> + <section><title>Fixed Bugs and Malfunctions</title> + <list> + <item> + <p> + Sticky locks could lead to hanging transactions.</p> + <p> + Own Id: OTP-7205 Aux Id: seq10793 </p> + </item> + <item> + <p> + <c>mnesia:snmp_get_next_index/2</c> didn't work with + partial index keys. Argument checking is now done + according to documentation, in functions + <c>mnesia:snmp_get_row/2</c>, + <c>mnesia:snmp_get_mnesia_key/2</c> and + <c>mnesia:snmp_get_next_index/2</c>. These functions now + require that <c>RowIndex</c> is a list.</p> + <p> + *** POTENTIAL INCOMPATIBILITY ***</p> + <p> + Own Id: OTP-7208</p> + </item> + </list> + </section> + </section> + + <section><title>Mnesia 4.4.1</title> + <section><title>Fixed Bugs and Malfunctions</title> + <list> + <item> + <p> + Snmp index tables was not initialized correctly in + <c>mnesia-4.4</c>.</p> + <p> + Own Id: OTP-7170 Aux Id: seq10870 </p> + </item> + </list> + </section> + <section><title>Known Bugs and Problems</title> + <list> + <item> + <p> + Rearranging fragmented tables is an O(N^2) operation.</p> + <p> + Own Id: OTP-6300</p> + </item> + </list> + </section> + </section> + + <section><title>Mnesia 4.4</title> + <section><title>Fixed Bugs and Malfunctions</title> + <list> + <item> + <p> + Mnesia ignored the module argument to + <c>mnesia:restore/2</c>. Thanks Paul Minerio.</p> + <p> + Own Id: OTP-6981</p> + </item> + </list> + </section> + + <section><title>Improvements and New Features</title> + <list> + <item> + <p> + Mnesia's snmp operations <c>snmp_get_row/2</c>, + <c>snmp_get_next_index/2</c> and + <c>snmp_get_mnesia_key/2</c> have been made context + aware, i.e. inside a transaction they will compensate for + table updates made in earlier in the same transaction. + This might cause a performance drop if a lot of updates + have been made before the invocation of these functions.</p> + <p> + *** POTENTIAL INCOMPATIBILITY ***</p> + <p> + Own Id: OTP-6856 Aux Id: seq10671 </p> + </item> + <item> + <p> + Introduced erlang:phash/2 as new default for fragmented + tables. Already existing tables will continue to use + whatever hash function they where using.</p> + <p> + Own Id: OTP-6923</p> + </item> + <item> + <p> + Introduced <c>mnesia:is_transaction/0</c>.</p> + <p> + Own Id: OTP-6995 Aux Id: seq10812 </p> + </item> + </list> + </section> + + <section><title>Known Bugs and Problems</title> + <list> + <item> + <p> + Rearranging fragmented tables is an O(N^2) operation.</p> + <p> + Own Id: OTP-6300</p> + </item> + </list> + </section> + </section> + + <!-- section> + <title>Previous Notes</title> + <p>For information about older versions see <url href="part_notes_history_frame.html">release notes history</url>.</p> + </section --> +</chapter> + diff --git a/lib/mnesia/doc/src/notes_history.xml b/lib/mnesia/doc/src/notes_history.xml new file mode 100644 index 0000000000..0984e33376 --- /dev/null +++ b/lib/mnesia/doc/src/notes_history.xml @@ -0,0 +1,322 @@ +<?xml version="1.0" encoding="latin1" ?> +<!DOCTYPE chapter SYSTEM "chapter.dtd"> + +<chapter> + <header> + <copyright> + <year>2004</year><year>2009</year> + <holder>Ericsson AB. All Rights Reserved.</holder> + </copyright> + <legalnotice> + The contents of this file are subject to the Erlang Public License, + Version 1.1, (the "License"); you may not use this file except in + compliance with the License. You should have received a copy of the + Erlang Public License along with this software. If not, it can be + retrieved online at http://www.erlang.org/. + + Software distributed under the License is distributed on an "AS IS" + basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + the License for the specific language governing rights and limitations + under the License. + + </legalnotice> + + <title>Mnesia Release Notes</title> + <prepared>Dan Gudmundsson and Håkan Mattsson</prepared> + <responsible></responsible> + <docno></docno> + <approved></approved> + <checked></checked> + <date>04-08-22</date> + <rev>AE</rev> + <file>notes_history.sgml</file> + </header> + <p>This document describes the changes made to the Mnesia system + from version to version. The intention of this document is to + list all incompatibilities as well as all enhancements and + bugfixes for every release of Mnesia. Each release of Mnesia + thus constitutes one section in this document. The title of each + section is the version number of Mnesia.</p> + + <section><title>Mnesia 4.3.7</title> + <section><title>Fixed Bugs and Malfunctions</title> + <list> + <item> + <p> + Removed a memory leak on ram-only nodes, introduced in + <c>mnesia-4.3.6</c>.</p> + <p> + Own Id: OTP-6936 Aux Id: seq10786 </p> + </item> + </list> + </section> + <section> + <title>Known Bugs and Problems</title> + <list type="bulleted"> + <item> + <p>Rearranging fragmented tables is an O(N^2) + operation.</p> + <p>Own Id: OTP-6300</p> + </item> + </list> + </section> + + </section> + + + <section> + <title>Mnesia 4.3.6</title> + + <section><title>Fixed Bugs and Malfunctions</title> + <list> + <item> + <p> + A bug causing lots of records to be lost at startup from + an installed fallback has been fixed. The bug did however + not show up when a backup file generated with + <c>mnesia:backup/1</c> or + <c>mnesia:backup_checkpoint/2</c> was installed as + fallback. In order to trigger the bug, the items in the + backup file had to be rearranged in such an order that + records from different tables were interleaved with each + other.</p> + <p> + Own Id: OTP-6903 Aux Id: seq10763 </p> + </item> + <item> + <p> + Mnesia sometimes failed to commit schema operations on + all nodes, this have been seen on smp machines but could + happen on single processor as well with some bad timing.</p> + <p> + Own Id: OTP-6904</p> + </item> + <item> + <p> + <c>mnesia:select/1</c> failed to return all matches on + remote nodes if something was written to the table + earlier in the same transaction.</p> + <p> + Own Id: OTP-6908</p> + </item> + </list> + </section> + + <section> + <title>Known Bugs and Problems</title> + <list type="bulleted"> + <item> + <p>Rearranging fragmented tables is an O(N^2) + operation.</p> + <p>Own Id: OTP-6300</p> + </item> + </list> + </section> + </section> + + <section> + <title>Mnesia 4.3.5</title> + + <section> + <title>Fixed Bugs and Malfunctions</title> + <list type="bulleted"> + <item> + <p>The internal index tables on bag tables where not always + cleaned correctly. Thanks Christopher Faulet and Salazard + Remy.</p> + <p>Own Id: OTP-6587</p> + </item> + <item> + <p>Changing the copy type with + <c>mnesia:change_table_copy/3</c> on a node which was down + was not handled correctly, that caused an eternal table + lock on the alive nodes. Thanks Hal Snyder.</p> + <p>Own + Id: OTP-6709</p> + </item> + </list> + </section> + + <section> + <title>Known Bugs and Problems</title> + <list type="bulleted"> + <item> + <p>Rearranging fragmented tables is an O(N^2) + operation.</p> + <p>Own Id: OTP-6300</p> + </item> + </list> + </section> + </section> + + <section> + <title>Mnesia 4.3.4</title> + + <section> + <title>Fixed Bugs and Malfunctions</title> + <list type="bulleted"> + <item> + <p>Adding fragments to ram_copies tables was allowed on + nodes that where down.</p> + <p>Own Id: OTP-6367</p> + </item> + <item> + <p>Mnesia leaked transaction decisions (memory and disk + space).</p> + <p>Own Id: OTP-6464</p> + </item> + <item> + <p><c>dirty_update_counter/3</c> did not work properly on + disc tables when the counter was not initiated (Thanks to + Sebastien Saint-Sevin).</p> + <p>Own Id: OTP-6545</p> + </item> + <item> + <p>Chunked <c>mnesia:select</c> on fragmented tables could + crash (Thanks to Primanathan Reddy).</p> + <p>Own Id: + OTP-6548</p> + </item> + </list> + </section> + + <section> + <title>Improvements and New Features</title> + <list type="bulleted"> + <item> + <p>Introduced a new configure parameter dc_dump_limit.</p> + <p>Removed dead code (dialyzer warnings) and debugging + features that called interpreter commands.</p> + <p>Minor + performance increase when a lot of simultaneous + transactions where active. </p> + <p>Thank you Scott Lystig + Fritchie for debugging and bug reports.</p> + <p>Own Id: + OTP-6478</p> + </item> + </list> + </section> + + <section> + <title>Known Bugs and Problems</title> + <list type="bulleted"> + <item> + <p>Rearranging fragmented tables is an O(N^2) + operation.</p> + <p>Own Id: OTP-6300</p> + </item> + </list> + </section> + </section> + + <section> + <title>Mnesia 4.3.3</title> + + <section> + <title>Fixed Bugs and Malfunctions</title> + <list type="bulleted"> + <item> + <p>Mnesia could crash during startup when loading tables + from remote node. </p> + <p>Own Id: OTP-6298 Aux Id: seq10402 </p> + </item> + <item> + <p>Mnesia could fail to update all copies during + del_table_copy. </p> + <p>Own Id: OTP-6299</p> + </item> + </list> + </section> + + <section> + <title>Known Bugs and Problems</title> + <list type="bulleted"> + <item> + <p>Rearranging fragmented tables is an O(N^2) operation.</p> + <p>Own Id: OTP-6300</p> + </item> + </list> + </section> + </section> + + <section> + <title>Mnesia 4.3.2</title> + + <section> + <title>Fixed Bugs and Malfunctions</title> + <list type="bulleted"> + <item> + <p>Mnesia sometimes failed to remove [d]ets table fixation, + when using <c>mnesia:first/1</c>,<c>mnesia:next/2</c> or + <c>qlc</c> this could cause that deleted records are not + actually deleted in the [d]ets table and that + <c>mnesia:[dirty_]first/1</c> reported the wrong key. </p> + <p>Own Id: OTP-6193 Aux Id: seq10376</p> + </item> + </list> + </section> + </section> + + <section> + <title>Mnesia 4.3.1</title> + + <section> + <title>Fixed Bugs and Malfunctions</title> + <list type="bulleted"> + <item> + <p>Mnesia could crash (bad match in mnesia_controller) + during start. </p> + <p>Own Id: OTP-6116 Aux Id: seq10305 </p> + </item> + </list> + </section> + </section> + + <section> + <title>Mnesia 4.3</title> + + <section> + <title>Fixed Bugs and Malfunctions</title> + <list type="bulleted"> + <item> + <p>Deleting tables during the start of mnesia on another + node caused problems. </p> + <p>Own Id: OTP-5928 Aux Id: + seq10111 </p> + </item> + <item> + <p>Killing processes that runs nested transactions could + crash mnesia. </p> + <p>Own Id: OTP-6027 Aux Id: seq10244 </p> + </item> + <item> + <p>Creating or deleting tables with a checkpoint activated + could crash mnesia </p> + <p>Own Id: OTP-6064</p> + </item> + <item> + <p>Table loading could be mixed with schema operations + which could cause troubles. </p> + <p>Own Id: OTP-6065 Aux Id: + seq10291 </p> + </item> + </list> + </section> + + <section> + <title>Improvements and New Features</title> + <list type="bulleted"> + <item> + <p>Added parallel table loaders to increase startup + performance when the system have many small tables. The + configuration variable <c>no_table_loaders</c> configures + the number of loaders, default is two. </p> + <p>Own Id: + OTP-6087</p> + </item> + </list> + </section> + </section> +</chapter> + diff --git a/lib/mnesia/doc/src/part.xml b/lib/mnesia/doc/src/part.xml new file mode 100644 index 0000000000..b9654a4207 --- /dev/null +++ b/lib/mnesia/doc/src/part.xml @@ -0,0 +1,49 @@ +<?xml version="1.0" encoding="latin1" ?> +<!DOCTYPE part SYSTEM "part.dtd"> + +<part xmlns:xi="http://www.w3.org/2001/XInclude"> + <header> + <copyright> + <year>1997</year><year>2009</year> + <holder>Ericsson AB. All Rights Reserved.</holder> + </copyright> + <legalnotice> + The contents of this file are subject to the Erlang Public License, + Version 1.1, (the "License"); you may not use this file except in + compliance with the License. You should have received a copy of the + Erlang Public License along with this software. If not, it can be + retrieved online at http://www.erlang.org/. + + Software distributed under the License is distributed on an "AS IS" + basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + the License for the specific language governing rights and limitations + under the License. + + </legalnotice> + + <title>Mnesia User's Guide</title> + <prepared>Claes Wikström, Hans Nilsson and Håkan Mattsson</prepared> + <docno></docno> + <date></date> + <rev></rev> + <file>part.sgml</file> + </header> + <description> + <p><em>Mnesia</em> is a distributed DataBase Management + System(DBMS), appropriate for telecommunications applications and other + Erlang applications which require continuous operation and exhibit soft + real-time properties.</p> + </description> + <xi:include href="Mnesia_chap1.xml"/> + <xi:include href="Mnesia_chap2.xml"/> + <xi:include href="Mnesia_chap3.xml"/> + <xi:include href="Mnesia_chap4.xml"/> + <xi:include href="Mnesia_chap5.xml"/> + <xi:include href="Mnesia_chap7.xml"/> + <xi:include href="Mnesia_chap8.xml"/> + <xi:include href="Mnesia_App_A.xml"/> + <xi:include href="Mnesia_App_B.xml"/> + <xi:include href="Mnesia_App_C.xml"/> + <xi:include href="Mnesia_App_D.xml"/> +</part> + diff --git a/lib/mnesia/doc/src/part_notes.xml b/lib/mnesia/doc/src/part_notes.xml new file mode 100644 index 0000000000..caa155585d --- /dev/null +++ b/lib/mnesia/doc/src/part_notes.xml @@ -0,0 +1,41 @@ +<?xml version="1.0" encoding="latin1" ?> +<!DOCTYPE part SYSTEM "part.dtd"> + +<part xmlns:xi="http://www.w3.org/2001/XInclude"> + <header> + <copyright> + <year>1997</year><year>2009</year> + <holder>Ericsson AB. All Rights Reserved.</holder> + </copyright> + <legalnotice> + The contents of this file are subject to the Erlang Public License, + Version 1.1, (the "License"); you may not use this file except in + compliance with the License. You should have received a copy of the + Erlang Public License along with this software. If not, it can be + retrieved online at http://www.erlang.org/. + + Software distributed under the License is distributed on an "AS IS" + basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + the License for the specific language governing rights and limitations + under the License. + + </legalnotice> + + <title>MNESIA Release Notes</title> + <prepared>Claes Wikström, Hans Nilsson and Håkan Mattsson</prepared> + <docno></docno> + <date>1997-05-27</date> + <rev>1.2</rev> + <file>part_notes.xml</file> + </header> + <description> + <p><em>Mnesia</em> is a Distributed DataBase Management + System (DBMS), appropriate for telecommunications applications and other + Erlang applications which require continuous operation and exhibit soft + real-time properties. </p> + <p>For information about older versions see + <url href="part_notes_history_frame.html">release notes history</url>.</p> + </description> + <xi:include href="notes.xml"/> +</part> + diff --git a/lib/mnesia/doc/src/part_notes_history.xml b/lib/mnesia/doc/src/part_notes_history.xml new file mode 100644 index 0000000000..177738623c --- /dev/null +++ b/lib/mnesia/doc/src/part_notes_history.xml @@ -0,0 +1,41 @@ +<?xml version="1.0" encoding="latin1" ?> +<!DOCTYPE part SYSTEM "part.dtd"> + +<part> + <header> + <copyright> + <year>2004</year> + <year>2007</year> + <holder>Ericsson AB, All Rights Reserved</holder> + </copyright> + <legalnotice> + The contents of this file are subject to the Erlang Public License, + Version 1.1, (the "License"); you may not use this file except in + compliance with the License. You should have received a copy of the + Erlang Public License along with this software. If not, it can be + retrieved online at http://www.erlang.org/. + + Software distributed under the License is distributed on an "AS IS" + basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + the License for the specific language governing rights and limitations + under the License. + + The Initial Developer of the Original Code is Ericsson AB. + </legalnotice> + + <title>MNESIA Release Notes</title> + <prepared>Claes Wikström, Hans Nilsson and Håkan Mattsson</prepared> + <docno></docno> + <date>1997-05-27</date> + <rev>1.2</rev> + <file>part_notes_history.sgml</file> + </header> + <description> + <p><em>Mnesia</em> is a Distributed DataBase Management + System (DBMS), appropriate for telecommunications applications and other + Erlang applications which require continuous operation and exhibit soft + real-time properties. </p> + </description> + <include file="notes_history"></include> +</part> + diff --git a/lib/mnesia/doc/src/ref_man.gif b/lib/mnesia/doc/src/ref_man.gif Binary files differnew file mode 100644 index 0000000000..b13c4efd53 --- /dev/null +++ b/lib/mnesia/doc/src/ref_man.gif diff --git a/lib/mnesia/doc/src/ref_man.xml b/lib/mnesia/doc/src/ref_man.xml new file mode 100644 index 0000000000..417423641d --- /dev/null +++ b/lib/mnesia/doc/src/ref_man.xml @@ -0,0 +1,44 @@ +<?xml version="1.0" encoding="latin1" ?> +<!DOCTYPE application SYSTEM "application.dtd"> + +<application xmlns:xi="http://www.w3.org/2001/XInclude"> + <header> + <copyright> + <year>1997</year><year>2009</year> + <holder>Ericsson AB. All Rights Reserved.</holder> + </copyright> + <legalnotice> + The contents of this file are subject to the Erlang Public License, + Version 1.1, (the "License"); you may not use this file except in + compliance with the License. You should have received a copy of the + Erlang Public License along with this software. If not, it can be + retrieved online at http://www.erlang.org/. + + Software distributed under the License is distributed on an "AS IS" + basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + the License for the specific language governing rights and limitations + under the License. + + </legalnotice> + + <title>Mnesia Reference Manual</title> + <prepared>Håkan Mattsson, Hans Nilsson, Claes Wikström</prepared> + <responsible></responsible> + <docno></docno> + <approved></approved> + <checked></checked> + <date>1998-04-24</date> + <rev>A</rev> + <file>refman.sgml</file> + </header> + <description> + <p><em>Mnesia</em> is a distributed DataBase Management + System (DBMS), appropriate for telecommunications applications and other + Erlang applications which require continuous operation and exhibit soft + real-time properties. </p> + </description> + <xi:include href="mnesia.xml"/> + <xi:include href="mnesia_frag_hash.xml"/> + <xi:include href="mnesia_registry.xml"/> +</application> + diff --git a/lib/mnesia/doc/src/summary.html.src b/lib/mnesia/doc/src/summary.html.src new file mode 100644 index 0000000000..2941a2f46a --- /dev/null +++ b/lib/mnesia/doc/src/summary.html.src @@ -0,0 +1 @@ +A heavy duty real-time distributed database
\ No newline at end of file diff --git a/lib/mnesia/doc/src/user_guide.gif b/lib/mnesia/doc/src/user_guide.gif Binary files differnew file mode 100644 index 0000000000..e6275a803d --- /dev/null +++ b/lib/mnesia/doc/src/user_guide.gif diff --git a/lib/mnesia/doc/src/warning.gif b/lib/mnesia/doc/src/warning.gif Binary files differnew file mode 100644 index 0000000000..96af52360e --- /dev/null +++ b/lib/mnesia/doc/src/warning.gif diff --git a/lib/mnesia/ebin/.gitignore b/lib/mnesia/ebin/.gitignore new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/lib/mnesia/ebin/.gitignore diff --git a/lib/mnesia/examples/DATA b/lib/mnesia/examples/DATA new file mode 120000 index 0000000000..2c2314b960 --- /dev/null +++ b/lib/mnesia/examples/DATA @@ -0,0 +1 @@ +../doc/src/DATA
\ No newline at end of file diff --git a/lib/mnesia/examples/Makefile b/lib/mnesia/examples/Makefile new file mode 100644 index 0000000000..ff00ee76a5 --- /dev/null +++ b/lib/mnesia/examples/Makefile @@ -0,0 +1,103 @@ +# +# %CopyrightBegin% +# +# Copyright Ericsson AB 1996-2009. All Rights Reserved. +# +# The contents of this file are subject to the Erlang Public License, +# Version 1.1, (the "License"); you may not use this file except in +# compliance with the License. You should have received a copy of the +# Erlang Public License along with this software. If not, it can be +# retrieved online at http://www.erlang.org/. +# +# Software distributed under the License is distributed on an "AS IS" +# basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +# the License for the specific language governing rights and limitations +# under the License. +# +# %CopyrightEnd% +# + +# +include $(ERL_TOP)/make/target.mk +include $(ERL_TOP)/make/$(TARGET)/otp.mk + +# ---------------------------------------------------- +# Application version +# ---------------------------------------------------- +include ../vsn.mk +VSN=$(MNESIA_VSN) + +# ---------------------------------------------------- +# Release Macros +# ---------------------------------------------------- +RELSYSDIR = $(RELEASE_PATH)/lib/mnesia-$(VSN) + +# ---------------------------------------------------- +# Common Macros +# ---------------------------------------------------- + + +MODULES = \ + company \ + company_o \ + bup \ + mnesia_meter \ + mnesia_tpcb + +ERL_FILES= $(MODULES:=.erl) + +HRL_FILES = \ + company.hrl \ + company_o.hrl + +DATA_FILES = \ + DATA + +# TARGET_FILES= $(MODULES:%=$(EBIN)/%.$(EMULATOR)) +TARGET_FILES = + +# ---------------------------------------------------- +# FLAGS +# ---------------------------------------------------- +ERL_COMPILE_FLAGS += -pa ../ebin +EBIN = . + +# ---------------------------------------------------- +# Make Rules +# ---------------------------------------------------- +debug opt: $(TARGET_FILES) + +clean: + rm -f $(TARGET_FILES) *~ + +docs: + +# ---------------------------------------------------- +# Release Targets +# ---------------------------------------------------- +include $(ERL_TOP)/make/otp_release_targets.mk + +release_spec: opt + $(INSTALL_DIR) $(RELSYSDIR)/examples + $(INSTALL_DATA) $(ERL_FILES) $(DATA_FILES) $(HRL_FILES) $(RELSYSDIR)/examples + $(INSTALL_DIR) $(RELSYSDIR)/examples/bench + (cd bench; $(INSTALL_DATA) \ + Makefile \ + README \ + bench.erl \ + bench.hrl \ + bench_generate.erl \ + bench_populate.erl \ + bench_trans.erl \ + bench.config1 \ + bench.config2 \ + bench.config3 \ + bench.config4 \ + bench.config5 \ + bench.config6 \ + bench.config7 \ + $(RELSYSDIR)/examples/bench) + (cd bench; $(INSTALL_SCRIPT) bench.sh $(RELSYSDIR)/examples/bench) + +release_docs_spec: + diff --git a/lib/mnesia/examples/bench/Makefile b/lib/mnesia/examples/bench/Makefile new file mode 100644 index 0000000000..55621e8cf4 --- /dev/null +++ b/lib/mnesia/examples/bench/Makefile @@ -0,0 +1,10 @@ + +all: + erl -make + +clean: + rm *.beam + +test: + ./bench.sh bench.config* + diff --git a/lib/mnesia/examples/bench/README b/lib/mnesia/examples/bench/README new file mode 100644 index 0000000000..5d31b5ba25 --- /dev/null +++ b/lib/mnesia/examples/bench/README @@ -0,0 +1,211 @@ +Author : Hakan Mattsson <[email protected]> +Created : 21 Jun 2001 by Hakan Mattsson <[email protected]> + +This is an implementation of a real-time database benchmark +(LMC/UU-01:025), defined by Richard Trembley (LMC) and Miroslaw +Zakrzewski (LMC) . The implementation runs the benchmark on the Mnesia +DBMS which is a part of Erlang/OTP (www.erlang.org). + +The implementation is organized in the following parts: + + bench.erl - main API, startup and configuration + bench.hrl - record definitions + bench_populate.erl - create database and populate it with records + bench_trans.erl - the actual transactions to be benchmarked + bench_generate.erl - request generator, statistics computation + +Compile the files with: + + make all + +and run the benchmarks with: + + make test + +================================================================ + +The benchmark runs on a set of Erlang nodes which should reside on +one processor each. + +There are many options when running the benchmark. Benchmark +configuration parameters may either be stated in a configuration file +or as command line arguments in the Erlang shell. Erlang nodes may +either be started manually or automatically by the benchmark program. + +In its the most automated usage you only need to provide one or more +configuration files and run the + + bench.sh <ConfigFiles> + +script to start all Erlang nodes, populate the database and run the +actual benchmark for each one of the configuration files. The +benchmark results will be displayed at stdout. + +In order to be able to automatically start remote Erlang nodes, +you need to: + + - put the $ERL_TOP/bin directory in your path on all nodes + - bind IP adresses to hostnames (e.g via DNS or /etc/hosts) + - enable usage of rsh so it does not prompt for password + +If you cannot achieve this, it is possible to run the benchmark +anyway, but it requires more manual work to be done for each +execution of the benchmark. + +================================================================ + +For each configuration file given to the bench.sh script: + + - a brand new Erlang node is started + - the bench:run(['YourConfigFile']) function is invoked + - the Erlang node(s) are halted. + +Without arguments, the bench.sh simply starts an Erlang shell. +In that shell you have the ability to invoke Erlang functions, +such as bench:run/1. + +The bench:start_all/1 function analyzes the configuration, starts +all Erlang nodes necessary to perform the benchmark and starts +Mnesia on all these nodes. + +The bench:populate/1 function populates the database according +to the configuration and assumes that Mnesia is up and running +on all nodes. + +The bench:generate/1 function starts the actual benchmark +according to the configuration and assumes that Mnesia is +up and running and that the database is fully populated. +Given some arguments such as + + Args = ['YourConfigFile', {statistics_detail, debug}]. + +the invokation of + + bench:run(Args). + +is equivivalent with: + + SlaveNodes = bench:start_all(Args). + bench:populate(Args). + bench:generate(Args). + bench:stop_slave_nodes(SlaveNodes). + +In case you cannot get the automatic start of remote Erlang nodes to +work (implied by bench:start_all/1) , you may need to manually start +an Erlang node on each host (e.g. with bench.sh without arguments) and +then invoke bench:run/1 or its equivivalents on one of them. + +================================================================ + +The following configuration parameters are valid: + +generator_profile + + Selects the transaction profile of the benchmark. Must be one + of the following atoms: t1, t2, t3, t4, t5, ping, random. + Defaults to random which means that the t1 .. t5 transaction + types are randomly selected according to the benchmark spec. + The other choices means disables the random choice and selects + one particular transaction type to be run over and over again. + +generator_warmup + + Defines how long the request generators should "warm up" the + DBMS before the actual measurements are performed. The unit + is milliseconds and defaults to 2000 (2 seconds). + +generator_duration + + Defines the duration of the actual benchmark measurement activity. + The unit is milliseconds and defaults to 15000 (15 seconds). + +generator_cooldown + + Defines how long the request generators should "cool down" the + DBMS after the actual measurements has been performed. The unit + is milliseconds and defaults to 2000 (2 seconds). + +generator_nodes + + Defines which Erlang nodes that should host request generators. + The default is all connected nodes. + +n_generators_per_node + + Defines how many generator processes that should be running on + each generator node. The default is 2. + +statistics_detail + + Regulates the detail level of statistics. It must be one of the + following atoms: normal, debug and debug2. debug enables a + finer grain of statistics to be reported, but since it requires + more counters, to be updated by the generator processes it may + cause slightly worse benchmark performace figures than the brief + default case, that is normal. debug2 prints out the debug info + and formats it according to LMC's benchmark program. + +storage_type + + Defines whether the database should be kept solely in primary + memory (ram_copies), solely on disc (disc_only_copies) or + in both (disc_copies). The default is ram_copies. Currently + the other choices requires a little bit of manual preparation. + +table_nodes + + Defines which Erlang nodes that should host the tables. + +n_fragments + + Defines how many fragments each table should be divided in. + Default is 100. The fragments are evenly distributed over + all table nodes. The group table not devided in fragments. + +n_replicas + + Defines how many replicas that should be kept of each fragment. + The group table is replicated to all table nodes. + +n_subscribers + + Defines the number of subscriber records. Default 25000. + +n_subscribers + + Defines the number of subscriber records. Default 25000. + +n_groups + + Defines the number of group records. Default 5. + +n_servers + + Defines the number of server records. Default 1. + +write_lock_type + + Defines whether the transactions should use ordinary + write locks or if they utilize sticky write locks. + Must be one of the following atoms: write, sticky_write. + Default is write. + +use_binary_subscriber_key + + Defines whether the subscriber key should be represented + as a string (binary) or as an integer. Default is false. + +always_try_nearest_node + + The benchmark was initially written to test scalability + when more nodes were added to the database and when the + (fragmented) tables were distributed over all nodes. In + such a system the transactions should be evenly distributed + over all nodes. When this option is set to true it is possible + to make fair measurements of master/slave configurations, when + all transactions are performed on on one node. Default is false. + +cookie + + Defines which cookie the Erlang node should use in its + distribution protocol. Must be an atom, default is 'bench'. diff --git a/lib/mnesia/examples/bench/bench.config1 b/lib/mnesia/examples/bench/bench.config1 new file mode 100644 index 0000000000..e53ce51f63 --- /dev/null +++ b/lib/mnesia/examples/bench/bench.config1 @@ -0,0 +1,21 @@ +{cookie, bench_cookie}. +{generator_profile, random}. +{statistics_detail, debug}. +{generator_warmup, 120000}. +{generator_duration, 900000}. +{generator_cooldown, 120000}. +{generator_nodes, + [bench@wppgpb1 + ]}. +{use_binary_subscriber_key, false}. +{n_generators_per_node, 2}. +{write_lock_type, sticky_write}. +{table_nodes, + [bench@wppgpb1 + ]}. +{storage_type, ram_copies}. +{n_replicas, 1}. +{n_fragments, 100}. +{n_subscribers, 500000}. +{n_groups, 100}. +{n_servers, 20}. diff --git a/lib/mnesia/examples/bench/bench.config2 b/lib/mnesia/examples/bench/bench.config2 new file mode 100644 index 0000000000..f2f82f01fa --- /dev/null +++ b/lib/mnesia/examples/bench/bench.config2 @@ -0,0 +1,21 @@ +{cookie, bench_cookie}. +{generator_profile, random}. +{statistics_detail, debug}. +{generator_warmup, 120000}. +{generator_duration, 900000}. +{generator_cooldown, 120000}. +{generator_nodes, + [bench@wppgpb1 + ]}. +{use_binary_subscriber_key, false}. +{n_generators_per_node, 2}. +{write_lock_type, sticky_write}. +{table_nodes, + [bench@wppgpb2 + ]}. +{storage_type, ram_copies}. +{n_replicas, 1}. +{n_fragments, 100}. +{n_subscribers, 500000}. +{n_groups, 100}. +{n_servers, 20}. diff --git a/lib/mnesia/examples/bench/bench.config3 b/lib/mnesia/examples/bench/bench.config3 new file mode 100644 index 0000000000..c96e4531fd --- /dev/null +++ b/lib/mnesia/examples/bench/bench.config3 @@ -0,0 +1,23 @@ +{cookie, bench_cookie}. +{generator_profile, random}. +{statistics_detail, debug}. +{generator_warmup, 120000}. +{generator_duration, 900000}. +{generator_cooldown, 120000}. +{generator_nodes, + [bench@wppgpb1, + bench@wppgpb2 + ]}. +{use_binary_subscriber_key, false}. +{n_generators_per_node, 2}. +{write_lock_type, sticky_write}. +{table_nodes, + [bench@wppgpb3, + bench@wppgpb4 + ]}. +{storage_type, ram_copies}. +{n_replicas, 2}. +{n_fragments, 100}. +{n_subscribers, 500000}. +{n_groups, 100}. +{n_servers, 20}. diff --git a/lib/mnesia/examples/bench/bench.config4 b/lib/mnesia/examples/bench/bench.config4 new file mode 100644 index 0000000000..e7c0bf2151 --- /dev/null +++ b/lib/mnesia/examples/bench/bench.config4 @@ -0,0 +1,23 @@ +{cookie, bench_cookie}. +{generator_profile, random}. +{statistics_detail, debug}. +{generator_warmup, 120000}. +{generator_duration, 900000}. +{generator_cooldown, 120000}. +{generator_nodes, + [bench@wppgpb1, + bench@wppgpb2 + ]}. +{use_binary_subscriber_key, false}. +{n_generators_per_node, 2}. +{write_lock_type, sticky_write}. +{table_nodes, + [bench@wppgpb1, + bench@wppgpb2 + ]}. +{storage_type, ram_copies}. +{n_replicas, 2}. +{n_fragments, 100}. +{n_subscribers, 500000}. +{n_groups, 100}. +{n_servers, 20}. diff --git a/lib/mnesia/examples/bench/bench.config5 b/lib/mnesia/examples/bench/bench.config5 new file mode 100644 index 0000000000..623ec3fb73 --- /dev/null +++ b/lib/mnesia/examples/bench/bench.config5 @@ -0,0 +1,27 @@ +{cookie, bench_cookie}. +{generator_profile, random}. +{statistics_detail, debug}. +{generator_warmup, 120000}. +{generator_duration, 900000}. +{generator_cooldown, 120000}. +{generator_nodes, + [bench@wppgpb1, + bench@wppgpb2, + bench@wppgpb3, + bench@wppgpb4 + ]}. +{use_binary_subscriber_key, false}. +{n_generators_per_node, 2}. +{write_lock_type, sticky_write}. +{table_nodes, + [bench@wppgpb1, + bench@wppgpb2, + bench@wppgpb3, + bench@wppgpb4 + ]}. +{storage_type, ram_copies}. +{n_replicas, 2}. +{n_fragments, 100}. +{n_subscribers, 500000}. +{n_groups, 100}. +{n_servers, 20}. diff --git a/lib/mnesia/examples/bench/bench.config6 b/lib/mnesia/examples/bench/bench.config6 new file mode 100644 index 0000000000..f056890ff4 --- /dev/null +++ b/lib/mnesia/examples/bench/bench.config6 @@ -0,0 +1,27 @@ +{cookie, bench_cookie}. +{generator_profile, random}. +{statistics_detail, debug}. +{generator_warmup, 120000}. +{generator_duration, 900000}. +{generator_cooldown, 120000}. +{generator_nodes, + [bench@wppgpb1, + bench@wppgpb2, + bench@wppgpb3, + bench@wppgpb4 + ]}. +{use_binary_subscriber_key, false}. +{n_generators_per_node, 2}. +{write_lock_type, sticky_write}. +{table_nodes, + [bench@wppgpb5, + bench@wppgpb6, + bench@wppgpb7, + bench@wppgpb8 + ]}. +{storage_type, ram_copies}. +{n_replicas, 2}. +{n_fragments, 100}. +{n_subscribers, 500000}. +{n_groups, 100}. +{n_servers, 20}. diff --git a/lib/mnesia/examples/bench/bench.config7 b/lib/mnesia/examples/bench/bench.config7 new file mode 100644 index 0000000000..6a78570e71 --- /dev/null +++ b/lib/mnesia/examples/bench/bench.config7 @@ -0,0 +1,35 @@ +{cookie, bench_cookie}. +{generator_profile, random}. +{statistics_detail, debug}. +{generator_warmup, 120000}. +{generator_duration, 900000}. +{generator_cooldown, 120000}. +{generator_nodes, + [bench@wppgpb1, + bench@wppgpb2, + bench@wppgpb3, + bench@wppgpb4, + bench@wppgpb5, + bench@wppgpb6, + bench@wppgpb7, + bench@wppgpb8 + ]}. +{use_binary_subscriber_key, false}. +{n_generators_per_node, 2}. +{write_lock_type, sticky_write}. +{table_nodes, + [bench@wppgpb1, + bench@wppgpb2, + bench@wppgpb3, + bench@wppgpb4, + bench@wppgpb5, + bench@wppgpb6, + bench@wppgpb7, + bench@wppgpb8 + ]}. +{storage_type, ram_copies}. +{n_replicas, 2}. +{n_fragments, 100}. +{n_subscribers, 500000}. +{n_groups, 100}. +{n_servers, 20}. diff --git a/lib/mnesia/examples/bench/bench.erl b/lib/mnesia/examples/bench/bench.erl new file mode 100644 index 0000000000..d191169296 --- /dev/null +++ b/lib/mnesia/examples/bench/bench.erl @@ -0,0 +1,327 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2001-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%% File : bench.hrl +%%% Author : Hakan Mattsson <[email protected]> +%%% Purpose : Implement the Canadian database benchmark (LMC/UU-01:025) +%%% Created : 21 Jun 2001 by Hakan Mattsson <[email protected]> +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +-module(bench). +-author('[email protected]'). + +-include("bench.hrl"). + +-export([ + run/0, run/1, + + start_all/0, start_all/1, + populate/0, populate/1, + generate/0, generate/1, + + args_to_config/1, verify_config/2, + start/0, start/1, + stop_slave_nodes/1, + bind_schedulers/0 + ]). + +bind_schedulers() -> + try + %% Avoid first core and bind schedules to the remaining ones + Topo = erlang:system_info(cpu_topology), + erlang:system_flag(cpu_topology,lists:reverse(Topo)), + %% N = erlang:system_info(schedulers), + %% erlang:system_flag(schedulers_online, lists:max([N - 1, 1])), + erlang:system_flag(scheduler_bind_type, default_bind), + timer:sleep(timer:seconds(1)), % Wait for Rickard + erlang:system_info(scheduler_bindings) + catch _:_ -> + %% Ancient systems + ignore + end. + +%% Run the benchmark: +%% +%% - Start all necessary Erlang nodes +%% - Populate the database +%% - Start the traffic generators +%% - Calculate benchmark statistics +%% - Stop the temporary Erlang nodes +run() -> + FileName = "bench.config", + run([FileName]). + +run(Args) -> + C = args_to_config(Args), + SlaveNodes = start_all(C), + bench_populate:start(C), + Result = bench_generate:start(C), + stop_slave_nodes(SlaveNodes), + Result. + +%% Start Mnesia on the local node +start() -> + FileName = 'bench.config', + start([FileName]). + +start(Args) -> + C = args_to_config(Args), + erlang:set_cookie(node(), C#config.cookie), + Nodes = [node() | (((C#config.table_nodes -- C#config.generator_nodes) ++ + C#config.generator_nodes) -- [node()])], + Extra = [{extra_db_nodes, Nodes}], + ?d("Starting Mnesia on node ~p...", [node()]), + case mnesia:start(Extra) of + ok -> + Tables = mnesia:system_info(tables), + io:format(" ok.~n" , []), + ?d("Waiting for ~p tables...", [length(Tables)]), + wait(Tables); + {error, Reason} -> + io:format(" FAILED: ~p~n", [Reason]), + {error, Reason} + end. + +wait(Tables) -> + case mnesia:wait_for_tables(Tables, timer:seconds(10)) of + ok -> + io:format(" loaded.~n", []), + ok; + {timeout, More} -> + io:format(" ~p...", [length(More)]), + wait(More) + end. + +%% Populate the database +populate() -> + FileName = 'bench.config', + populate([FileName]). + +populate(Args) -> + C = args_to_config(Args), + bench_populate:start(C). + +%% Start the traffic generators +generate() -> + FileName = 'bench.config', + generate([FileName]). + +generate(Args) -> + C = args_to_config(Args), + bench_generate:start(C). + +start_all() -> + FileName = 'bench.config', + start_all([FileName]). + +start_all(Args) -> + C = args_to_config(Args), + Nodes = [node() | (((C#config.table_nodes -- C#config.generator_nodes) ++ + C#config.generator_nodes) -- [node()])], + erlang:set_cookie(node(), C#config.cookie), + ?d("Starting Erlang nodes...~n", []), + ?d("~n", []), + SlaveNodes = do_start_all(Nodes, [], C#config.cookie), + Extra = [{extra_db_nodes, Nodes}], + ?d("~n", []), + ?d("Starting Mnesia...", []), + case rpc:multicall(Nodes, mnesia, start, [Extra]) of + {Replies, []} -> + case [R || R <- Replies, R /= ok] of + [] -> + io:format(" ok~n", []), + SlaveNodes; + Bad -> + io:format(" FAILED: ~p~n", [Bad]), + exit({mnesia_start, Bad}) + end; + Bad -> + io:format(" FAILED: ~p~n", [Bad]), + exit({mnesia_start, Bad}) + end. + +do_start_all([Node | Nodes], Acc, Cookie) when is_atom(Node) -> + case string:tokens(atom_to_list(Node), [$@]) of + [Name, Host] -> + Arg = lists:concat(["-setcookie ", Cookie]), + ?d(" ~s", [left(Node)]), + case slave:start_link(Host, Name, Arg) of + {ok, Node} -> + load_modules(Node), + rpc:call(Node, ?MODULE, bind_schedulers, []), + io:format(" started~n", []), + do_start_all(Nodes, [Node | Acc], Cookie); + {error, {already_running, Node}} -> + rpc:call(Node, ?MODULE, bind_schedulers, []), + io:format(" already started~n", []), + do_start_all(Nodes, Acc, Cookie); + {error, Reason} -> + io:format(" FAILED:~p~n", [Reason]), + stop_slave_nodes(Acc), + exit({slave_start_failed, Reason}) + end; + _ -> + ?d(" ~s FAILED: " + "Not valid as node name. Must be 'name@host'.~n", + [left(Node)]), + stop_slave_nodes(Acc), + exit({bad_node_name, Node}) + end; +do_start_all([], StartedNodes, _Cookie) -> + StartedNodes. + +load_modules(Node) -> + Fun = + fun(Mod) -> + case code:get_object_code(Mod) of + {_Module, Bin, Fname} -> + rpc:call(Node, code,load_binary,[Mod,Fname,Bin]); + Other -> + Other + end + end, + lists:foreach(Fun, [bench, bench_generate, bench_populate, bench_trans]). + +stop_slave_nodes([]) -> + ok; +stop_slave_nodes(Nodes) -> + ?d("~n", []), + ?d("Stopping Erlang nodes...~n", []), + ?d("~n", []), + do_stop_slave_nodes(Nodes). + +do_stop_slave_nodes([Node | Nodes]) -> + ?d(" ~s", [left(Node)]), + Res = slave:stop(Node), + io:format(" ~p~n", [Res]), + do_stop_slave_nodes(Nodes); +do_stop_slave_nodes([]) -> + ok. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% The configuration +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +args_to_config(C) when is_record(C, config) -> + C; +args_to_config(Args) when is_list(Args) -> + do_args_to_config(Args, []). + +do_args_to_config([{Key, Val} | Rest], Acc) when is_list(Acc) -> + do_args_to_config(Rest, Acc ++ [{Key, Val}]); +do_args_to_config([FileName | Rest], Acc) when is_list(Acc) -> + io:nl(), + ?d("Reading configuration file ~p...", [FileName]), + case file:consult(FileName) of + {ok, Config} -> + io:format(" ok~n", []), + do_args_to_config(Rest, Acc ++ Config); + {error, Reason} -> + io:format(" FAILED: ~s~n", + [[lists:flatten(file:format_error( Reason))]]), + {error, {args_to_config, FileName, Reason}} + end; +do_args_to_config([], Acc) when is_list(Acc) -> + verify_config(Acc, #config{}). + +verify_config([{Tag, Val} | T], C) -> + case Tag of + cookie when is_atom(Val) -> + verify_config(T, C#config{cookie = Val}); + generator_profile when Val == random -> + verify_config(T, C#config{generator_profile = Val}); + generator_profile when Val == t1 -> + verify_config(T, C#config{generator_profile = Val}); + generator_profile when Val == t2 -> + verify_config(T, C#config{generator_profile = Val}); + generator_profile when Val == t3 -> + verify_config(T, C#config{generator_profile = Val}); + generator_profile when Val == t4 -> + verify_config(T, C#config{generator_profile = Val}); + generator_profile when Val == t5 -> + verify_config(T, C#config{generator_profile = Val}); + generator_profile when Val == ping -> + verify_config(T, C#config{generator_profile = Val}); + generator_nodes when is_list(Val) -> + verify_config(T, C#config{generator_nodes = Val}); + n_generators_per_node when is_integer(Val), Val >= 0 -> + verify_config(T, C#config{n_generators_per_node = Val}); + generator_warmup when is_integer(Val), Val >= 0 -> + verify_config(T, C#config{generator_warmup = Val}); + generator_duration when is_integer(Val), Val >= 0 -> + verify_config(T, C#config{generator_duration = Val}); + generator_cooldown when is_integer(Val), Val >= 0 -> + verify_config(T, C#config{generator_cooldown = Val}); + statistics_detail when Val == debug -> + verify_config(T, C#config{statistics_detail = Val}); + statistics_detail when Val == debug2 -> + verify_config(T, C#config{statistics_detail = Val}); + statistics_detail when Val == normal -> + verify_config(T, C#config{statistics_detail = Val}); + table_nodes when is_list(Val) -> + verify_config(T, C#config{table_nodes = Val}); + use_binary_subscriber_key when Val == true -> + verify_config(T, C#config{use_binary_subscriber_key = Val}); + use_binary_subscriber_key when Val == false -> + verify_config(T, C#config{use_binary_subscriber_key = Val}); + storage_type when is_atom(Val) -> + verify_config(T, C#config{storage_type = Val}); + write_lock_type when Val == sticky_write -> + verify_config(T, C#config{write_lock_type = Val}); + write_lock_type when Val == write -> + verify_config(T, C#config{write_lock_type = Val}); + n_replicas when is_integer(Val), Val >= 0 -> + verify_config(T, C#config{n_replicas = Val}); + n_fragments when is_integer(Val), Val >= 0 -> + verify_config(T, C#config{n_fragments = Val}); + n_subscribers when is_integer(Val), Val >= 0 -> + verify_config(T, C#config{n_subscribers = Val}); + n_groups when is_integer(Val), Val >= 0 -> + verify_config(T, C#config{n_groups = Val}); + n_servers when is_integer(Val), Val >= 0 -> + verify_config(T, C#config{n_servers = Val}); + always_try_nearest_node when Val == true; Val == false -> + verify_config(T, C#config{always_try_nearest_node = Val}); + _ -> + ?e("Bad config value: ~p~n", [Tag, Val]), + exit({bad_config_value, {Tag, Val}}) + end; +verify_config([], C) -> + display_config(C), + C; +verify_config(Config, _) -> + ?e("Bad config: ~p~n", [Config]), + exit({bad_config, Config}). + +display_config(C) when is_record(C, config) -> + ?d("~n", []), + ?d("Actual configuration...~n", []), + ?d("~n", []), + Fields = record_info(fields, config), + [config | Values] = tuple_to_list(C), + display_config(Fields, Values). + +display_config([F | Fields], [V | Values]) -> + ?d(" ~s ~p~n", [left(F), V]), + display_config(Fields, Values); +display_config([], []) -> + ?d("~n", []), + ok. + +left(Term) -> + string:left(lists:flatten(io_lib:format("~p", [Term])), 27, $.). diff --git a/lib/mnesia/examples/bench/bench.hrl b/lib/mnesia/examples/bench/bench.hrl new file mode 100644 index 0000000000..7b0e0c1280 --- /dev/null +++ b/lib/mnesia/examples/bench/bench.hrl @@ -0,0 +1,107 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2001-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%% File : bench.hrl +%%% Author : Hakan Mattsson <[email protected]> +%%% Purpose : Define various database records +%%% Created : 21 Jun 2001 by Hakan Mattsson <[email protected]> +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +-record(config, + { + generator_profile = random, + generator_warmup = timer:seconds(2), + generator_duration = timer:seconds(15), + generator_cooldown = timer:seconds(2), + generator_nodes = [node() | nodes()], + statistics_detail = debug, + n_generators_per_node = 1, + write_lock_type = sticky_write, + table_nodes = [node() | nodes()], + storage_type = ram_copies, + n_subscribers = 25000, + n_groups = 5, + n_servers = 1, + n_replicas = 1, + n_fragments = 100, + use_binary_subscriber_key = false, + always_try_nearest_node = false, + cookie = 'bench' + }). + +-record(subscriber, + { + subscriber_number, % string (10 chars) + subscriber_name, % string (32 chars) + group_id, % integer (uint32) + location, % integer (uint32) + active_sessions, % array of 32 booleans (32 bits) + changed_by, % string (25 chars) + changed_time, % string (25 chars) + suffix + }). + +-record(group, + { + group_id, % integer (uint32) + group_name, % string (32 chars) + allow_read, % array of 32 booleans (32 bits) + allow_insert, % array of 32 booleans (32 bits) + allow_delete % array of 32 booleans (32 bits) + }). + +-record(server, + { + server_key, % {ServerId, SubscriberNumberSuffix} + server_name, % string (32 chars) + no_of_read, % integer (uint32) + no_of_insert, % integer (uint32) + no_of_delete, % integer (uint32) + suffix + }). + +-record(session, + { + session_key, % {SubscriberNumber, ServerId} + session_details, % string (4000 chars) + suffix + }). + +-define(d(Format, Args), + io:format("~s" ++ Format, [string:left(lists:flatten(io_lib:format("~p(~p):", [?MODULE, ?LINE])), 30, $ ) | Args])). + +-define(e(Format, Args), + begin + ok = error_logger:format("~p(~p): " ++ Format, [?MODULE, ?LINE | Args]), + timer:sleep(1000) + end). + +-define(ERROR(M, F, A, R), + ?e("~w:~w~p\n\t ->~p\n", [M, F, A, R])). + +-define(APPLY(M, F, A), + fun() -> + case catch apply(M, F, A) of + ok -> {ok, ok}; + {atomic, R} -> {ok, R}; + {ok, R} -> {ok, R}; + {aborted, R} -> ?ERROR(M, F, A, R); + {error, R} -> ?ERROR(M, F, A, R); + R -> ?ERROR(M, F, A, R) + end + end()). diff --git a/lib/mnesia/examples/bench/bench.sh b/lib/mnesia/examples/bench/bench.sh new file mode 100755 index 0000000000..1f8b5eec52 --- /dev/null +++ b/lib/mnesia/examples/bench/bench.sh @@ -0,0 +1,23 @@ +#!/bin/sh +# Author : Hakan Mattsson <[email protected]> +# Purpose : Simplify benchmark execution +# Created : 21 Jun 2001 by Hakan Mattsson <[email protected]> +###################################################################### + +args="-pa .. -boot start_sasl -sasl errlog_type error -sname bench" +set -x + +if [ $# -eq 0 ] ; then + + erl $args + +else + + while [ $# -gt 0 ]; do + + erl $args -s bench run $1 -s erlang halt + shift + done + +fi + diff --git a/lib/mnesia/examples/bench/bench_generate.erl b/lib/mnesia/examples/bench/bench_generate.erl new file mode 100644 index 0000000000..0fccc6c082 --- /dev/null +++ b/lib/mnesia/examples/bench/bench_generate.erl @@ -0,0 +1,684 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2001-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%% File : bench_generate.hrl +%%% Author : Hakan Mattsson <[email protected]> +%%% Purpose : Start request generators and collect statistics +%%% Created : 21 Jun 2001 by Hakan Mattsson <[email protected]> +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +-module(bench_generate). +-author('[email protected]'). + +-include("bench.hrl"). + +%% Public +-export([start/1]). + +%% Internal +-export([ + monitor_init/2, + generator_init/2, + worker_init/1 + ]). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% The traffic generator +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%% ------------------------------------------------------------------- +%% Start request generators +%% ------------------------------------------------------------------- + +start(C) when is_record(C, config) -> + MonPid = spawn_link(?MODULE, monitor_init, [C, self()]), + receive + {'EXIT', MonPid, Reason} -> + exit(Reason); + {monitor_done, MonPid, Res} -> + Res + end. + +monitor_init(C, Parent) when is_record(C, config) -> + process_flag(trap_exit, true), + %% net_kernel:monitor_nodes(true), %% BUGBUG: Needed in order to re-start generators + Nodes = C#config.generator_nodes, + PerNode = C#config.n_generators_per_node, + Timer = C#config.generator_warmup, + ?d("~n", []), + ?d("Start ~p request generators each at ~p nodes...~n", + [PerNode, length(Nodes)]), + ?d("~n", []), + warmup_sticky(C), + ?d(" ~p seconds warmup...~n", [Timer div 1000]), + Alive = spawn_generators(C, Nodes, PerNode), + erlang:send_after(Timer, self(), warmup_done), + monitor_loop(C, Parent, Alive, []). + +spawn_generators(C, Nodes, PerNode) -> + [spawn_link(Node, ?MODULE, generator_init, [self(), C]) || + Node <- Nodes, + _ <- lists:seq(1, PerNode)]. + +warmup_sticky(C) -> + %% Select one node per fragment as master node + Tabs = [subscriber, session, server, suffix], + Fun = fun(S) -> + {[Node | _], _, Wlock} = nearest_node(S, transaction, C), + Stick = fun() -> [mnesia:read({T, S}, S, Wlock) || T <- Tabs] end, + Args = [transaction, Stick, [], mnesia_frag], + rpc:call(Node, mnesia, activity, Args) + end, + Suffixes = lists:seq(0, C#config.n_fragments - 1), % Assume even distrib. + lists:foreach(Fun, Suffixes). + +%% Main loop for benchmark monitor +monitor_loop(C, Parent, Alive, Deceased) -> + receive + warmup_done -> + multicall(Alive, reset_statistics), + Timer = C#config.generator_duration, + ?d(" ~p seconds actual benchmarking...~n", [Timer div 1000]), + erlang:send_after(Timer, self(), measurement_done), + monitor_loop(C, Parent, Alive, Deceased); + measurement_done -> + Stats = multicall(Alive, get_statistics), + Timer = C#config.generator_cooldown, + ?d(" ~p seconds cooldown...~n", [Timer div 1000]), + erlang:send_after(Timer, self(), {cooldown_done, Stats}), + monitor_loop(C, Parent, Alive, Deceased); + {cooldown_done, Stats} -> + multicall(Alive, stop), + display_statistics(Stats, C), + Parent ! {monitor_done, self(), ok}, + unlink(Parent), + exit(monitor_done); + {nodedown, _Node} -> + monitor_loop(C, Parent, Alive, Deceased); + {nodeup, Node} -> + NeedsBirth = [N || N <- Deceased, N == Node], + Born = spawn_generators(C, NeedsBirth, 1), + monitor_loop(C, Parent, Born ++ Alive, Deceased -- NeedsBirth); + {'EXIT', Pid, Reason} when Pid == Parent -> + exit(Reason); + {'EXIT', Pid, Reason} -> + case lists:member(Pid, Alive) of + true -> + ?d("Generator on node ~p died: ~p~n", [node(Pid), Reason]), + monitor_loop(C, Parent, Alive -- [Pid], [node(Pid) | Deceased]); + false -> + monitor_loop(C, Parent, Alive, Deceased) + end + end. + +%% Send message to a set of processes and wait for their replies +multicall(Pids, Message) -> + Send = + fun(Pid) -> + Ref = erlang:monitor(process, Pid), + Pid ! {self(), Ref, Message}, + {Pid, Ref} + end, + PidRefs = lists:map(Send, Pids), + Collect = + fun({Pid, Ref}) -> + receive + {'DOWN', Ref, process, Pid, Reason} -> + {Pid, {'EXIT', Reason}}; + {Pid, Ref, Reply} -> + erlang:demonitor(Ref), + {Pid, Reply} + end + end, + lists:map(Collect, PidRefs). + +%% Initialize a traffic generator +generator_init(Monitor, C) -> + process_flag(trap_exit, true), + Tables = mnesia:system_info(tables), + ok = mnesia:wait_for_tables(Tables, infinity), + {_Mega, Sec, Micro} = erlang:now(), + Uniq = lists:sum(binary_to_list(term_to_binary(make_ref()))), + random:seed(Uniq, Sec, Micro), + Counters = reset_counters(C, C#config.statistics_detail), + SessionTab = ets:new(bench_sessions, [public, {keypos, 1}]), + generator_loop(Monitor, C, SessionTab, Counters). + +%% Main loop for traffic generator +generator_loop(Monitor, C, SessionTab, Counters) -> + receive + {ReplyTo, Ref, get_statistics} -> + Stats = get_counters(C, Counters), + ReplyTo ! {self(), Ref, Stats}, + generator_loop(Monitor, C, SessionTab, Counters); + {ReplyTo, Ref, reset_statistics} -> + Stats = get_counters(C, Counters), + Counters2 = reset_counters(C, Counters), + ReplyTo ! {self(), Ref, Stats}, + generator_loop(Monitor, C, SessionTab, Counters2); + {_ReplyTo, _Ref, stop} -> + exit(shutdown); + {'EXIT', Pid, Reason} when Pid == Monitor -> + exit(Reason); + {'EXIT', Pid, Reason} -> + Node = node(Pid), + ?d("Worker on node ~p(~p) died: ~p~n", [Node, node(), Reason]), + Key = {worker,Node}, + case get(Key) of + undefined -> ignore; + Pid -> erase(Key); + _ -> ignore + end, + generator_loop(Monitor, C, SessionTab, Counters) + after 0 -> + {Name, {Nodes, Activity, Wlock}, Fun, CommitSessions} = + gen_trans(C, SessionTab), + Before = erlang:now(), + Res = call_worker(Nodes, Activity, Fun, Wlock, mnesia_frag), + After = erlang:now(), + Elapsed = elapsed(Before, After), + post_eval(Monitor, C, Elapsed, Res, Name, CommitSessions, SessionTab, Counters) + end. + +%% Perform a transaction on a node near the data +call_worker([Node | _], Activity, Fun, Wlock, Mod) when Node == node() -> + {Node, catch mnesia:activity(Activity, Fun, [Wlock], Mod)}; +call_worker([Node | _] = Nodes, Activity, Fun, Wlock, Mod) -> + Key = {worker,Node}, + case get(Key) of + Pid when is_pid(Pid) -> + Args = [Activity, Fun, [Wlock], Mod], + Pid ! {activity, self(), Args}, + receive + {'EXIT', Pid, Reason} -> + ?d("Worker on node ~p(~p) died: ~p~n", [Node, node(), Reason]), + erase(Key), + retry_worker(Nodes, Activity, Fun, Wlock, Mod, {'EXIT', Reason}); + {activity_result, Pid, Result} -> + case Result of + {'EXIT', {aborted, {not_local, _}}} -> + retry_worker(Nodes, Activity, Fun, Wlock, Mod, Result); + _ -> + {Node, Result} + end + end; + undefined -> + GenPid = self(), + Pid = spawn_link(Node, ?MODULE, worker_init, [GenPid]), + put(Key, Pid), + call_worker(Nodes, Activity, Fun, Wlock, Mod) + end. + +retry_worker([], _Activity, _Fun, _Wlock, _Mod, Reason) -> + {node(), Reason}; +retry_worker([BadNode | SpareNodes], Activity, Fun, Wlock, Mod, Reason) -> + Nodes = SpareNodes -- [BadNode], + case Nodes of + [] -> + {BadNode, Reason}; + [_] -> + call_worker(Nodes, Activity, Fun, write, Mod); + _ -> + call_worker(Nodes, Activity, Fun, Wlock, Mod) + end. + +worker_init(Parent) -> + Tables = mnesia:system_info(tables), + ok = mnesia:wait_for_tables(Tables, infinity), + worker_loop(Parent). + +%% Main loop for remote workers +worker_loop(Parent) -> + receive + {activity, Parent, [Activity, Fun, Extra, Mod]} -> + Result = (catch mnesia:activity(Activity, Fun, Extra, Mod)), + Parent ! {activity_result, self(), Result}, + worker_loop(Parent) + end. + + +elapsed({Before1, Before2, Before3}, {After1, After2, After3}) -> + After = After1 * 1000000000000 + After2 * 1000000 + After3, + Before = Before1 * 1000000000000 + Before2 * 1000000 + Before3, + After - Before. + +%% Lookup counters +get_counters(_C, {table, Tab}) -> + ets:match_object(Tab, '_'); +get_counters(_C, {NM, NC, NA, NB}) -> + Trans = any, + Node = somewhere, + [{{Trans, n_micros, Node}, NM}, + {{Trans, n_commits, Node}, NC}, + {{Trans, n_aborts, Node}, NA}, + {{Trans, n_branches_executed, Node}, NB}]. + +% Clear all counters +reset_counters(_C, normal) -> + {0, 0, 0, 0}; +reset_counters(C, {_, _, _, _}) -> + reset_counters(C, normal); +reset_counters(C, debug) -> + CounterTab = ets:new(bench_pending, [public, {keypos, 1}]), + reset_counters(C, {table, CounterTab}); +reset_counters(C, debug2) -> + CounterTab = ets:new(bench_pending, [public, {keypos, 1}]), + reset_counters(C, {table, CounterTab}); +reset_counters(C, {table, Tab} = Counters) -> + Names = [n_micros, n_commits, n_aborts, n_branches_executed], + Nodes = C#config.generator_nodes ++ C#config.table_nodes, + TransTypes = [t1, t2, t3, t4, t5, ping], + [ets:insert(Tab, {{Trans, Name, Node}, 0}) || Name <- Names, + Node <- Nodes, + Trans <- TransTypes], + Counters. + +%% Determine the outcome of a transaction and increment the counters +post_eval(Monitor, C, Elapsed, {Node, Res}, Name, CommitSessions, SessionTab, {table, Tab} = Counters) -> + case Res of + {do_commit, BranchExecuted, _} -> + incr(Tab, {Name, n_micros, Node}, Elapsed), + incr(Tab, {Name, n_commits, Node}, 1), + case BranchExecuted of + true -> + incr(Tab, {Name, n_branches_executed, Node}, 1), + commit_session(CommitSessions), + generator_loop(Monitor, C, SessionTab, Counters); + false -> + generator_loop(Monitor, C, SessionTab, Counters) + end; + {'EXIT', {aborted, {do_rollback, BranchExecuted, _}}} -> + incr(Tab, {Name, n_micros, Node}, Elapsed), + incr(Tab, {Name, n_aborts, Node}, 1), + case BranchExecuted of + true -> + incr(Tab, {Name, n_branches_executed, Node}, 1), + generator_loop(Monitor, C, SessionTab, Counters); + false -> + generator_loop(Monitor, C, SessionTab, Counters) + end; + _ -> + ?d("Failed(~p): ~p~n", [Node, Res]), + incr(Tab, {Name, n_micros, Node}, Elapsed), + incr(Tab, {Name, n_aborts, Node}, 1), + generator_loop(Monitor, C, SessionTab, Counters) + end; +post_eval(Monitor, C, Elapsed, {_Node, Res}, _Name, CommitSessions, SessionTab, {NM, NC, NA, NB}) -> + case Res of + {do_commit, BranchExecuted, _} -> + case BranchExecuted of + true -> + commit_session(CommitSessions), + generator_loop(Monitor, C, SessionTab, {NM + Elapsed, NC + 1, NA, NB + 1}); + false -> + generator_loop(Monitor, C, SessionTab, {NM + Elapsed, NC + 1, NA, NB}) + end; + {'EXIT', {aborted, {do_rollback, BranchExecuted, _}}} -> + case BranchExecuted of + true -> + generator_loop(Monitor, C, SessionTab, {NM + Elapsed, NC, NA + 1, NB + 1}); + false -> + generator_loop(Monitor, C, SessionTab, {NM + Elapsed, NC, NA + 1, NB}) + end; + _ -> + ?d("Failed: ~p~n", [Res]), + generator_loop(Monitor, C, SessionTab, {NM + Elapsed, NC, NA + 1, NB}) + end. + +incr(Tab, Counter, Incr) -> + ets:update_counter(Tab, Counter, Incr). + +commit_session(no_fun) -> + ignore; +commit_session(Fun) when is_function(Fun, 0) -> + Fun(). + +%% Randlomly choose a transaction type according to benchmar spec +gen_trans(C, SessionTab) when C#config.generator_profile == random -> + case random:uniform(100) of + Rand when Rand > 0, Rand =< 25 -> gen_t1(C, SessionTab); + Rand when Rand > 25, Rand =< 50 -> gen_t2(C, SessionTab); + Rand when Rand > 50, Rand =< 70 -> gen_t3(C, SessionTab); + Rand when Rand > 70, Rand =< 85 -> gen_t4(C, SessionTab); + Rand when Rand > 85, Rand =< 100 -> gen_t5(C, SessionTab) + end; +gen_trans(C, SessionTab) -> + case C#config.generator_profile of + t1 -> gen_t1(C, SessionTab); + t2 -> gen_t2(C, SessionTab); + t3 -> gen_t3(C, SessionTab); + t4 -> gen_t4(C, SessionTab); + t5 -> gen_t5(C, SessionTab); + ping -> gen_ping(C, SessionTab) + end. + +gen_t1(C, _SessionTab) -> + SubscrId = random:uniform(C#config.n_subscribers) - 1, + SubscrKey = bench_trans:number_to_key(SubscrId, C), + Location = 4711, + ChangedBy = <<4711:(8*25)>>, + ChangedTime = <<4711:(8*25)>>, + {t1, + nearest_node(SubscrId, transaction, C), + fun(Wlock) -> bench_trans:update_current_location(Wlock, SubscrKey, Location, ChangedBy, ChangedTime) end, + no_fun + }. + +gen_t2(C, _SessionTab) -> + SubscrId = random:uniform(C#config.n_subscribers) - 1, + SubscrKey = bench_trans:number_to_key(SubscrId, C), + {t2, + nearest_node(SubscrId, sync_dirty, C), + %%nearest_node(SubscrId, transaction, C), + fun(Wlock) -> bench_trans:read_current_location(Wlock, SubscrKey) end, + no_fun + }. + +gen_t3(C, SessionTab) -> + case ets:first(SessionTab) of + '$end_of_table' -> + %% This generator does not have any session, + %% try reading someone elses session details + SubscrId = random:uniform(C#config.n_subscribers) - 1, + SubscrKey = bench_trans:number_to_key(SubscrId, C), + ServerId = random:uniform(C#config.n_servers) - 1, + ServerBit = 1 bsl ServerId, + {t3, + nearest_node(SubscrId, transaction, C), + fun(Wlock) -> bench_trans:read_session_details(Wlock, SubscrKey, ServerBit, ServerId) end, + no_fun + }; + {SubscrId, SubscrKey, ServerId} -> + %% This generator do have a session, + %% read its session details + ServerBit = 1 bsl ServerId, + {t3, + nearest_node(SubscrId, transaction, C), + fun(Wlock) -> bench_trans:read_session_details(Wlock, SubscrKey, ServerBit, ServerId) end, + no_fun + } + end. + +gen_t4(C, SessionTab) -> + %% This generator may already have sessions, + %% create a new session and hope that no other + %% generator already has occupied it + SubscrId = random:uniform(C#config.n_subscribers) - 1, + SubscrKey = bench_trans:number_to_key(SubscrId, C), + ServerId = random:uniform(C#config.n_servers) - 1, + ServerBit = 1 bsl ServerId, + Details = <<4711:(8*2000)>>, + DoRollback = (random:uniform(100) =< 2), + Insert = fun() -> ets:insert(SessionTab, {{SubscrId, SubscrKey, ServerId}, self()}) end, + {t4, + nearest_node(SubscrId, transaction, C), + fun(Wlock) -> bench_trans:create_session_to_server(Wlock, SubscrKey, ServerBit, ServerId, Details, DoRollback) end, + Insert + }. + +gen_t5(C, SessionTab) -> + case ets:first(SessionTab) of + '$end_of_table' -> + %% This generator does not have any session, + %% try to delete someone elses session details + SubscrId = random:uniform(C#config.n_subscribers) - 1, + SubscrKey = bench_trans:number_to_key(SubscrId, C), + ServerId = random:uniform(C#config.n_servers) - 1, + ServerBit = 1 bsl ServerId, + DoRollback = (random:uniform(100) =< 2), + {t5, + nearest_node(SubscrId, transaction, C), + fun(Wlock) -> bench_trans:delete_session_from_server(Wlock, SubscrKey, ServerBit, ServerId, DoRollback) end, + no_fun + }; + {SubscrId, SubscrKey, ServerId} -> + %% This generator do have at least one session, + %% delete it. + ServerBit = 1 bsl ServerId, + DoRollback = (random:uniform(100) =< 2), + Delete = fun() -> ets:delete(SessionTab, {SubscrId, SubscrKey, ServerId}) end, + {t5, + nearest_node(SubscrId, transaction, C), + fun(Wlock) -> bench_trans:delete_session_from_server(Wlock, SubscrKey, ServerBit, ServerId, DoRollback) end, + Delete + } + end. + +gen_ping(C, _SessionTab) -> + SubscrId = random:uniform(C#config.n_subscribers) - 1, + {ping, + nearest_node(SubscrId, transaction, C), + fun(_Wlock) -> {do_commit, true, []} end, + no_fun + }. + +%% Select a node as near as the subscriber data as possible +nearest_node(SubscrId, Activity, C) -> + Suffix = bench_trans:number_to_suffix(SubscrId), + case mnesia_frag:table_info(t, s, {suffix, Suffix}, where_to_write) of + [] -> + {[node()], Activity, write}; + [Node] -> + {[Node], Activity, write}; + Nodes -> + Wlock = C#config.write_lock_type, + if + C#config.always_try_nearest_node; Wlock =:= write -> + case lists:member(node(), Nodes) of + true -> + {[node() | Nodes], Activity, Wlock}; + false -> + Node = pick_node(Suffix, C, Nodes), + {[Node | Nodes], Activity, Wlock} + end; + Wlock == sticky_write -> + Node = pick_node(Suffix, C, Nodes), + {[Node | Nodes], Activity, Wlock} + end + end. + +pick_node(Suffix, C, Nodes) -> + Ordered = lists:sort(Nodes), + NumberOfActive = length(Ordered), + PoolSize = length(C#config.table_nodes), + Suffix2 = + case PoolSize rem NumberOfActive of + 0 -> Suffix div (PoolSize div NumberOfActive); + _ -> Suffix + end, + N = (Suffix2 rem NumberOfActive) + 1, + lists:nth(N, Ordered). + +display_statistics(Stats, C) -> + GoodStats = [{node(GenPid), GenStats} || {GenPid, GenStats} <- Stats, + is_list(GenStats)], + FlatStats = [{Type, Name, EvalNode, GenNode, Count} || + {GenNode, GenStats} <- GoodStats, + {{Type, Name, EvalNode}, Count} <- GenStats], + TotalStats = calc_stats_per_tag(lists:keysort(2, FlatStats), 2, []), + {value, {n_aborts, 0, NA, 0, 0}} = + lists:keysearch(n_aborts, 1, TotalStats ++ [{n_aborts, 0, 0, 0, 0}]), + {value, {n_commits, NC, 0, 0, 0}} = + lists:keysearch(n_commits, 1, TotalStats ++ [{n_commits, 0, 0, 0, 0}]), + {value, {n_branches_executed, 0, 0, _NB, 0}} = + lists:keysearch(n_branches_executed, 1, TotalStats ++ [{n_branches_executed, 0, 0, 0, 0}]), + {value, {n_micros, 0, 0, 0, AccMicros}} = + lists:keysearch(n_micros, 1, TotalStats ++ [{n_micros, 0, 0, 0, 0}]), + NT = NA + NC, + NG = length(GoodStats), + NTN = length(C#config.table_nodes), + WallMicros = C#config.generator_duration * 1000 * NG, + Overhead = (catch (WallMicros - AccMicros) / WallMicros), + ?d("~n", []), + ?d("Benchmark result...~n", []), + ?d("~n", []), + ?d(" ~p transactions per second (TPS).~n", [catch ((NT * 1000000 * NG) div AccMicros)]), + ?d(" ~p TPS per table node.~n", [catch ((NT * 1000000 * NG) div (AccMicros * NTN))]), + ?d(" ~p micro seconds in average per transaction, including latency.~n", + [catch (AccMicros div NT)]), + ?d(" ~p transactions. ~f% generator overhead.~n", [NT, Overhead * 100]), + + TypeStats = calc_stats_per_tag(lists:keysort(1, FlatStats), 1, []), + EvalNodeStats = calc_stats_per_tag(lists:keysort(3, FlatStats), 3, []), + GenNodeStats = calc_stats_per_tag(lists:keysort(4, FlatStats), 4, []), + if + C#config.statistics_detail == normal -> + ignore; + true -> + ?d("~n", []), + ?d("Statistics per transaction type...~n", []), + ?d("~n", []), + display_type_stats(" ", TypeStats, NT, AccMicros), + + ?d("~n", []), + ?d("Transaction statistics per table node...~n", []), + ?d("~n", []), + display_calc_stats(" ", EvalNodeStats, NT, AccMicros), + + ?d("~n", []), + ?d("Transaction statistics per generator node...~n", []), + ?d("~n", []), + display_calc_stats(" ", GenNodeStats, NT, AccMicros) + end, + if + C#config.statistics_detail /= debug2 -> + ignore; + true -> + io:format("~n", []), + io:format("------ Test Results ------~n", []), + io:format("Length : ~p sec~n", [C#config.generator_duration div 1000]), + Host = lists:nth(2, string:tokens(atom_to_list(node()), [$@])), + io:format("Processor : ~s~n", [Host]), + io:format("Number of Proc: ~p~n", [NG]), + io:format("~n", []), + display_trans_stats(" ", TypeStats, NT, AccMicros, NG), + io:format("~n", []), + io:format(" Overall Statistics~n", []), + io:format(" Transactions: ~p~n", [NT]), + io:format(" Inner : ~p TPS~n", [catch ((NT * 1000000 * NG) div AccMicros)]), + io:format(" Outer : ~p TPS~n", [catch ((NT * 1000000 * NG) div WallMicros)]), + io:format("~n", []) + end. + + +display_calc_stats(Prefix, [{_Tag, 0, 0, 0, 0} | Rest], NT, Micros) -> + display_calc_stats(Prefix, Rest, NT, Micros); +display_calc_stats(Prefix, [{Tag, NC, NA, _NB, NM} | Rest], NT, Micros) -> + ?d("~s~s n=~s%\ttime=~s%~n", + [Prefix, left(Tag), percent(NC + NA, NT), percent(NM, Micros)]), + display_calc_stats(Prefix, Rest, NT, Micros); +display_calc_stats(_, [], _, _) -> + ok. + +display_type_stats(Prefix, [{_Tag, 0, 0, 0, 0} | Rest], NT, Micros) -> + display_type_stats(Prefix, Rest, NT, Micros); +display_type_stats(Prefix, [{Tag, NC, NA, NB, NM} | Rest], NT, Micros) -> + ?d("~s~s n=~s%\ttime=~s%\tavg micros=~p~n", + [ + Prefix, + left(Tag), + percent(NC + NA, NT), + percent(NM, Micros), + catch (NM div (NC + NA)) + ]), + case NA /= 0 of + true -> ?d("~s ~s% aborted~n", [Prefix, percent(NA, NC + NA)]); + false -> ignore + end, + case NB /= 0 of + true -> ?d("~s ~s% branches executed~n", [Prefix, percent(NB, NC + NA)]); + false -> ignore + end, + display_type_stats(Prefix, Rest, NT, Micros); +display_type_stats(_, [], _, _) -> + ok. + +left(Term) -> + string:left(lists:flatten(io_lib:format("~p", [Term])), 27, $.). + +percent(_Part, 0) -> "infinity"; +percent(Part, Total) -> io_lib:format("~8.4f", [(Part * 100) / Total]). + +calc_stats_per_tag([], _Pos, Acc) -> + lists:sort(Acc); +calc_stats_per_tag([Tuple | _] = FlatStats, Pos, Acc) when size(Tuple) == 5 -> + Tag = element(Pos, Tuple), + do_calc_stats_per_tag(FlatStats, Pos, {Tag, 0, 0, 0, 0}, Acc). + +do_calc_stats_per_tag([Tuple | Rest], Pos, {Tag, NC, NA, NB, NM}, Acc) + when element(Pos, Tuple) == Tag -> + Val = element(5, Tuple), + case element(2, Tuple) of + n_commits -> + do_calc_stats_per_tag(Rest, Pos, {Tag, NC + Val, NA, NB, NM}, Acc); + n_aborts -> + do_calc_stats_per_tag(Rest, Pos, {Tag, NC, NA + Val, NB, NM}, Acc); + n_branches_executed -> + do_calc_stats_per_tag(Rest, Pos, {Tag, NC, NA, NB + Val, NM}, Acc); + n_micros -> + do_calc_stats_per_tag(Rest, Pos, {Tag, NC, NA, NB, NM + Val}, Acc) + end; +do_calc_stats_per_tag(GenStats, Pos, CalcStats, Acc) -> + calc_stats_per_tag(GenStats, Pos, [CalcStats | Acc]). + +display_trans_stats(Prefix, [{_Tag, 0, 0, 0, 0} | Rest], NT, Micros, NG) -> + display_trans_stats(Prefix, Rest, NT, Micros, NG); +display_trans_stats(Prefix, [{Tag, NC, NA, NB, NM} | Rest], NT, Micros, NG) -> + Common = + fun(Name) -> + Sec = NM / (1000000 * NG), + io:format(" ~s: ~p (~p%) Time: ~p sec TPS = ~p~n", + [Name, + NC + NA, + round(((NC + NA) * 100) / NT), + round(Sec), + round((NC + NA) / Sec)]) + end, + Branch = + fun() -> + io:format(" Branches Executed: ~p (~p%)~n", + [NB, round((NB * 100) / (NC + NA))]) + end, + Rollback = + fun() -> + io:format(" Rollback Executed: ~p (~p%)~n", + [NA, round((NA * 100) / (NC + NA))]) + end, + case Tag of + t1 -> + Common("T1"); + t2 -> + Common("T2"); + t3 -> + Common("T3"), + Branch(); + t4 -> + Common("T4"), + Branch(), + Rollback(); + t5 -> + Common("T5"), + Branch(), + Rollback(); + _ -> + Common(io_lib:format("~p", [Tag])) + end, + display_trans_stats(Prefix, Rest, NT, Micros, NG); +display_trans_stats(_, [], _, _, _) -> + ok. + diff --git a/lib/mnesia/examples/bench/bench_populate.erl b/lib/mnesia/examples/bench/bench_populate.erl new file mode 100644 index 0000000000..f82ee210b6 --- /dev/null +++ b/lib/mnesia/examples/bench/bench_populate.erl @@ -0,0 +1,200 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2001-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%% File : bench_populate.hrl +%%% Author : Hakan Mattsson <[email protected]> +%%% Purpose : Populate the database +%%% Created : 21 Jun 2001 by Hakan Mattsson <[email protected]> +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +-module(bench_populate). +-author('[email protected]'). + +-include("bench.hrl"). + +%% Public +-export([start/1]). + +%% Populate the database +start(C) when is_record(C, config) -> + ?d("~n",[]), + ?d("Populate database...~n",[]), + ?d("~n",[]), + create_tables(C), + Populate = + fun() -> + populate_subscriber(write, C), + populate_group(write, C), + populate_server(write, C) + end, + mnesia:activity(sync_dirty, Populate, [], mnesia_frag). + +%% ------------------------------------------------------------------- +%% Create the tables +%% ------------------------------------------------------------------- + +create_tables(C) -> + ?d(" Delete old tables...~n",[]), + mnesia:delete_table(group), + mnesia:delete_table(subscriber), + mnesia:delete_table(session), + mnesia:delete_table(server), + mnesia:delete_table(suffix), + + ?d(" Creating ~p tables, with ~p replicas distributed over ~p nodes...~n", + [C#config.storage_type, + C#config.n_replicas, + length(C#config.table_nodes)]), + + %% Create group table + GroupDef = [{C#config.storage_type, C#config.table_nodes}, + {attributes, record_info(fields, group)}], + ?APPLY(mnesia, create_table, [group, GroupDef]), + + %% Create suffix table + FragStorage = + case C#config.storage_type of + ram_copies -> n_ram_copies; + disc_copies -> n_disc_copies; + disc_only_copies -> n_disc_only_copies + end, + FragProps = + [{FragStorage, C#config.n_replicas}, + {node_pool, C#config.table_nodes}, + {n_fragments, C#config.n_fragments}], + SuffixDef = [{frag_properties, FragProps}], + ?APPLY(mnesia, create_table, [suffix, SuffixDef]), + + %% Create subscriber table + SubscriberDef = + [{frag_properties, [{foreign_key, {suffix, #subscriber.suffix}} | FragProps]}, + {attributes, record_info(fields, subscriber)}], + ?APPLY(mnesia, create_table, [subscriber, SubscriberDef]), + + %% Create session table + SessionDef = + [{frag_properties, [{foreign_key, {suffix, #session.suffix}} | FragProps]}, + {attributes, record_info(fields, session)}], + ?APPLY(mnesia, create_table, [session, SessionDef]), + + %% Create server table + ServerDef = + [{frag_properties, [{foreign_key, {suffix, #server.suffix}} | FragProps]}, + {attributes, record_info(fields, server)}], + ?APPLY(mnesia, create_table, [server, ServerDef]). + +%% ------------------------------------------------------------------- +%% Populate the subscriber table +%% ------------------------------------------------------------------- + +populate_subscriber(Wlock, C) -> + random:seed(), + N = C#config.n_subscribers, + ?d(" Populate ~p subscribers...", [N]), + do_populate_subscriber(Wlock, N - 1, C). + +do_populate_subscriber(Wlock, Id, C) when Id >= 0 -> + Suffix = bench_trans:number_to_suffix(Id), + SubscrId = bench_trans:number_to_key(Id, C), + Name = list_to_binary([random:uniform(26) + $A - 1]), + GroupId = random:uniform(C#config.n_groups) - 1, + Subscr = #subscriber{subscriber_number = SubscrId, + subscriber_name = Name, + group_id = GroupId, + location = 0, + active_sessions = 0, + changed_by = <<"">>, + changed_time = <<"">>, + suffix = Suffix}, + ?APPLY(mnesia, write, [subscriber, Subscr, Wlock]), + do_populate_subscriber(Wlock, Id - 1, C); +do_populate_subscriber(_Wlock, _, _) -> + io:format(" totally ~p bytes~n", + [mnesia:table_info(subscriber, memory) * 4]), + ok. + +%% ------------------------------------------------------------------- +%% Populate the group table +%% ------------------------------------------------------------------- + +populate_group(Wlock, C) -> + random:seed(), + N = C#config.n_groups, + ?d(" Populate ~p groups...", [N]), + do_populate_group(Wlock, N - 1, C). + +do_populate_group(Wlock, Id, C) when Id >= 0 -> + Name = list_to_binary(["-group ", integer_to_list(Id), "-"]), + Allow = init_allow(C), + Group = #group{group_id = Id, + group_name = Name, + allow_read = Allow, + allow_insert = Allow, + allow_delete = Allow}, + ?APPLY(mnesia, write, [group, Group, Wlock]), + do_populate_group(Wlock, Id - 1, C); +do_populate_group(_Wlock, _, _) -> + io:format(" totally ~p bytes~n", + [mnesia:table_info(group, memory) * 4]), + ok. + +init_allow(C) -> + do_init_allow(0, C#config.n_servers - 1). + +do_init_allow(Allow, NS) when NS >= 0 -> + case random:uniform(100) < (90 + 1) of + true -> + ServerBit = 1 bsl NS, + do_init_allow(Allow bor ServerBit, NS - 1); + false -> + do_init_allow(Allow, NS - 1) + end; +do_init_allow(Allow, _) -> + Allow. + +%% ------------------------------------------------------------------- +%% Populate the server table +%% ------------------------------------------------------------------- + +populate_server(Wlock, C) -> + random:seed(), + N = C#config.n_servers, + ?d(" Populate ~p servers with 100 records each...", [N]), + do_populate_server(Wlock, N - 1). + +do_populate_server(Wlock, Id) when Id >= 0 -> + populate_server_suffixes(Wlock, Id, 99), + do_populate_server(Wlock, Id - 1); +do_populate_server(_Wlock, _) -> + io:format(" totally ~p bytes~n", + [mnesia:table_info(server, memory) * 4]), + ok. + +populate_server_suffixes(Wlock, Id, Suffix) when Suffix >= 0 -> + Name = list_to_binary(["-server ", integer_to_list(Id), "-"]), + Server = #server{server_key = {Id, Suffix}, + server_name = Name, + no_of_read = 0, + no_of_insert = 0, + no_of_delete = 0, + suffix = Suffix}, + ?APPLY(mnesia, write, [server, Server, Wlock]), + populate_server_suffixes(Wlock, Id, Suffix - 1); +populate_server_suffixes(_Wlock, _, _) -> + ok. + diff --git a/lib/mnesia/examples/bench/bench_trans.erl b/lib/mnesia/examples/bench/bench_trans.erl new file mode 100644 index 0000000000..945715daae --- /dev/null +++ b/lib/mnesia/examples/bench/bench_trans.erl @@ -0,0 +1,184 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2001-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%% File : bench_trans.hrl +%%% Author : Hakan Mattsson <[email protected]> +%%% Purpose : Implement the transactions in Canadian database benchmark (LMC/UU-01:025) +%%% Created : 21 Jun 2001 by Hakan Mattsson <[email protected]> +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +-module(bench_trans). +-author('[email protected]'). + +-include("bench.hrl"). + +-export([ + update_current_location/5, + read_current_location/2, + read_session_details/4, + create_session_to_server/6, + delete_session_from_server/5, + number_to_suffix/1, + number_to_key/2 + ]). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% The transactions +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%% ------------------------------------------------------------------- +%% T1 +%% ------------------------------------------------------------------- + +update_current_location(Wlock, SubscrId, Location, ChangedBy, ChangedTime) -> + Suffix = number_to_suffix(SubscrId), + [Subscr] = mnesia:read({subscriber, Suffix}, SubscrId, Wlock), + Subscr2 = Subscr#subscriber{location = Location, + changed_by = ChangedBy, + changed_time = ChangedTime}, + mnesia:write(subscriber, Subscr2, Wlock), + {do_commit, false, [ok]}. + +%% ------------------------------------------------------------------- +%% T2 +%% ------------------------------------------------------------------- + +read_current_location(_Wlock, SubscrId) -> + Suffix = number_to_suffix(SubscrId), + [Subscr] = mnesia:read({subscriber, Suffix}, SubscrId, read), + + Name = Subscr#subscriber.subscriber_name, + Location = Subscr#subscriber.location, + ChangedBy = Subscr#subscriber.changed_by, + ChangedTime = Subscr#subscriber.changed_time, + {do_commit, false, [Name, Location, ChangedBy, ChangedTime]}. + +%% ------------------------------------------------------------------- +%% T3 +%% ------------------------------------------------------------------- + +read_session_details(Wlock, SubscrId, ServerBit, ServerId) -> + Suffix = number_to_suffix(SubscrId), + [Subscr] = mnesia:read({subscriber, Suffix}, SubscrId, read), + %%[Group] = mnesia:read(group, Subscr#subscriber.group_id, read), + [Group] = mnesia:dirty_read(group, Subscr#subscriber.group_id), + + IsAllowed = ((Group#group.allow_read band ServerBit) == ServerBit), + IsActive = ((Subscr#subscriber.active_sessions band ServerBit) == ServerBit), + ExecuteBranch = (IsAllowed and IsActive), + + case ExecuteBranch of + true -> + SessionKey = {SubscrId, ServerId}, + [Session] = mnesia:read({session, Suffix}, SessionKey, read), + + ServerKey = {ServerId, Suffix}, + [Server] = mnesia:read({server, Suffix}, ServerKey, Wlock), + Server2 = Server#server{no_of_read = Server#server.no_of_read + 1}, + mnesia:write(server, Server2, Wlock), + {do_commit, ExecuteBranch, [Session#session.session_details]}; + false -> + {do_commit, ExecuteBranch, []} + end. + +%% ------------------------------------------------------------------- +%% T4 +%% ------------------------------------------------------------------- + +create_session_to_server(Wlock, SubscrId, ServerBit, ServerId, Details, DoRollback) -> + Suffix = number_to_suffix(SubscrId), + [Subscr] = mnesia:read({subscriber, Suffix}, SubscrId, Wlock), + %%[Group] = mnesia:read(group, Subscr#subscriber.group_id, read), + [Group] = mnesia:dirty_read(group, Subscr#subscriber.group_id), + + IsAllowed = ((Group#group.allow_insert band ServerBit) == ServerBit), + IsInactive = ((Subscr#subscriber.active_sessions band ServerBit) == 0), + ExecuteBranch = (IsAllowed and IsInactive), + case ExecuteBranch of + true -> + SessionKey = {SubscrId, ServerId}, + Session = #session{session_key = SessionKey, + session_details = Details, + suffix = Suffix}, + mnesia:write(session, Session, Wlock), + Active = (Subscr#subscriber.active_sessions bor ServerBit), + Subscr2 = Subscr#subscriber{active_sessions = Active}, + mnesia:write(subscriber, Subscr2, Wlock), + + ServerKey = {ServerId, Suffix}, + [Server] = mnesia:read({server, Suffix}, ServerKey, Wlock), + Server2 = Server#server{no_of_insert = Server#server.no_of_insert + 1}, + mnesia:write(server, Server2, Wlock); + false -> + ignore + end, + case DoRollback of + true -> + mnesia:abort({do_rollback, ExecuteBranch, []}); + false -> + {do_commit, ExecuteBranch, []} + end. + +%% ------------------------------------------------------------------- +%% T5 +%% ------------------------------------------------------------------- + +delete_session_from_server(Wlock, SubscrId, ServerBit, ServerId, DoRollback) -> + Suffix = number_to_suffix(SubscrId), + [Subscr] = mnesia:read({subscriber, Suffix}, SubscrId, Wlock), + %%[Group] = mnesia:read(group, Subscr#subscriber.group_id, read), + [Group] = mnesia:dirty_read(group, Subscr#subscriber.group_id), + + IsAllowed = ((Group#group.allow_delete band ServerBit) == ServerBit), + IsActive = ((Subscr#subscriber.active_sessions band ServerBit) == ServerBit), + ExecuteBranch = (IsAllowed and IsActive), + case ExecuteBranch of + true -> + SessionKey = {SubscrId, ServerId}, + mnesia:delete({session, Suffix}, SessionKey, Wlock), + Active = (Subscr#subscriber.active_sessions bxor ServerBit), + Subscr2 = Subscr#subscriber{active_sessions = Active}, + mnesia:write(subscriber, Subscr2, Wlock), + + ServerKey = {ServerId, Suffix}, + [Server] = mnesia:read({server, Suffix}, ServerKey, Wlock), + Server2 = Server#server{no_of_delete = Server#server.no_of_delete + 1}, + mnesia:write(server, Server2, Wlock); + false -> + ignore + end, + case DoRollback of + true -> + mnesia:abort({do_rollback, ExecuteBranch, []}); + false -> + {do_commit, ExecuteBranch, []} + end. + +number_to_suffix(SubscrId) when is_integer(SubscrId) -> + SubscrId rem 100; +number_to_suffix(<<_:8/binary, TimesTen:8/integer, TimesOne:8/integer>>) -> + ((TimesTen - $0) * 10) + (TimesOne - $0). + +number_to_key(Id, C) when is_integer(Id) -> + case C#config.use_binary_subscriber_key of + true -> + list_to_binary(string:right(integer_to_list(Id), 10, $0)); + false -> + Id + end. + diff --git a/lib/mnesia/examples/bup.erl b/lib/mnesia/examples/bup.erl new file mode 120000 index 0000000000..a25a785996 --- /dev/null +++ b/lib/mnesia/examples/bup.erl @@ -0,0 +1 @@ +../doc/src/bup.erl
\ No newline at end of file diff --git a/lib/mnesia/examples/company.erl b/lib/mnesia/examples/company.erl new file mode 120000 index 0000000000..4b0c0b6bcc --- /dev/null +++ b/lib/mnesia/examples/company.erl @@ -0,0 +1 @@ +../doc/src/company.erl
\ No newline at end of file diff --git a/lib/mnesia/examples/company.hrl b/lib/mnesia/examples/company.hrl new file mode 120000 index 0000000000..95014d9781 --- /dev/null +++ b/lib/mnesia/examples/company.hrl @@ -0,0 +1 @@ +../doc/src/company.hrl
\ No newline at end of file diff --git a/lib/mnesia/examples/company_o.erl b/lib/mnesia/examples/company_o.erl new file mode 120000 index 0000000000..f4a40b768a --- /dev/null +++ b/lib/mnesia/examples/company_o.erl @@ -0,0 +1 @@ +../doc/src/company_o.erl
\ No newline at end of file diff --git a/lib/mnesia/examples/company_o.hrl b/lib/mnesia/examples/company_o.hrl new file mode 120000 index 0000000000..bfa57e37ea --- /dev/null +++ b/lib/mnesia/examples/company_o.hrl @@ -0,0 +1 @@ +../doc/src/company_o.hrl
\ No newline at end of file diff --git a/lib/mnesia/examples/mnesia_meter.erl b/lib/mnesia/examples/mnesia_meter.erl new file mode 100644 index 0000000000..ea74d8691b --- /dev/null +++ b/lib/mnesia/examples/mnesia_meter.erl @@ -0,0 +1,465 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 1997-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +%% +%% Getting started: +%% +%% 1 Start one or more distributed Erlang nodes +%% 2a Connect the nodes, e.g. with net_adm:ping/1 +%% 3a Run mnesia_meter:go() +%% 3b Run mnesia_meter:go(ReplicaType) +%% 3c Run mnesia_meter:go(ReplicaType, Nodes) + +-module(mnesia_meter). +-author('[email protected]'). +-export([ + go/0, + go/1, + go/2, + repeat_meter/2 + ]). + +-record(person, {name, %% atomic, unique key + data, %% compound structure + married_to, %% name of partner or undefined + children}). %% list of children + +-record(meter, {desc, init, meter, micros}). + +-record(result, {desc, list}). + +-define(TIMES, 1000). + +go() -> + go(ram_copies). + +go(ReplicaType) -> + go(ReplicaType, [node() | nodes()]). + +go(ReplicaType, Nodes) -> + {ok, FunOverhead} = tc(fun(_) -> {atomic, ok} end, ?TIMES), + Size = size(term_to_binary(#person{})), + io:format("A fun apply costs ~p micro seconds. Record size is ~p bytes.~n", + [FunOverhead, Size]), + Res = go(ReplicaType, Nodes, [], FunOverhead, []), + NewRes = rearrange(Res, []), + DescHeader = lists:flatten(io_lib:format("~w on ~w", [ReplicaType, Nodes])), + ItemHeader = lists:seq(1, length(Nodes)), + Header = #result{desc = DescHeader, list = ItemHeader}, + SepList = ['--------' || _ <- Nodes], + Separator = #result{desc = "", list = SepList}, + display([Separator, Header, Separator | NewRes] ++ [Separator]). + +go(_ReplicaType, [], _Config, _FunOverhead, Acc) -> + Acc; +go(ReplicaType, [H | T], OldNodes, FunOverhead, Acc) -> + Nodes = [H | OldNodes], + Config = [{ReplicaType, Nodes}], + Res = run(Nodes, Config, FunOverhead), + go(ReplicaType, T, Nodes, FunOverhead, [{ReplicaType, Nodes, Res} | Acc]). + +rearrange([{_ReplicaType, _Nodes, Meters} | Tail], Acc) -> + Acc2 = [add_meter(M, Acc) || M <- Meters], + rearrange(Tail, Acc2); +rearrange([], Acc) -> + Acc. + +add_meter(M, Acc) -> + case lists:keysearch(M#meter.desc, #result.desc, Acc) of + {value, R} -> + R#result{list = [M#meter.micros | R#result.list]}; + false -> + #result{desc = M#meter.desc, list = [M#meter.micros]} + end. + +display(Res) -> + MaxDesc = lists:max([length(R#result.desc) || R <- Res]), + Format = lists:concat(["! ~-", MaxDesc, "s"]), + display(Res, Format, MaxDesc). + +display([R | Res], Format, MaxDesc) -> + case R#result.desc of + "" -> + io:format(Format, [lists:duplicate(MaxDesc, "-")]); + Desc -> + io:format(Format, [Desc]) + end, + display_items(R#result.list, R#result.desc), + io:format(" !~n", []), + display(Res, Format, MaxDesc); +display([], _Format, _MaxDesc) -> + ok. + +display_items([_Item | Items], "") -> + io:format(" ! ~s", [lists:duplicate(10, $-)]), + display_items(Items, ""); +display_items([Micros | Items], Desc) -> + io:format(" ! ~10w", [Micros]), + display_items(Items, Desc); +display_items([], _Desc) -> + ok. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +meters() -> + [#meter{desc = "transaction update two records with read and write", + init = fun write_records/2, + meter = fun update_records/1}, + #meter{desc = "transaction update two records with wread and write", + init = fun write_records/2, + meter = fun w_update_records/1}, + #meter{desc = "transaction update two records with read and s_write", + init = fun s_write_records/2, + meter = fun s_update_records/1}, + #meter{desc = "sync_dirty update two records with read and write", + init = fun sync_dirty_write_records/2, + meter = fun sync_dirty_update_records/1}, + #meter{desc = "async_dirty update two records with read and write", + init = fun async_dirty_write_records/2, + meter = fun async_dirty_update_records/1}, + #meter{desc = "plain fun update two records with dirty_read and dirty_write", + init = fun dirty_write_records/2, + meter = fun dirty_update_records/1}, + #meter{desc = "ets update two records with read and write (local only)", + init = fun ets_opt_write_records/2, + meter = fun ets_update_records/1}, + #meter{desc = "plain fun update two records with ets:lookup and ets:insert (local only)", + init = fun bif_opt_write_records/2, + meter = fun bif_update_records/1}, + #meter{desc = "plain fun update two records with dets:lookup and dets:insert (local only)", + init = fun dets_opt_write_records/2, + meter = fun dets_update_records/1}, + + #meter{desc = "transaction write two records with write", + init = fun write_records/2, + meter = fun(X) -> write_records(X, 0-X) end}, + #meter{desc = "transaction write two records with s_write", + init = fun s_write_records/2, + meter = fun(X) -> s_write_records(X, 0-X) end}, + #meter{desc = "sync_dirty write two records with write", + init = fun sync_dirty_write_records/2, + meter = fun(X) -> sync_dirty_write_records(X, 0-X) end}, + #meter{desc = "async_dirty write two records with write", + init = fun async_dirty_write_records/2, + meter = fun(X) -> async_dirty_write_records(X, 0-X) end}, + #meter{desc = "plain fun write two records with dirty_write", + init = fun dirty_write_records/2, + meter = fun(X) -> dirty_write_records(X, 0-X) end}, + #meter{desc = "ets write two records with write (local only)", + init = fun ets_opt_write_records/2, + meter = fun(X) -> ets_write_records(X, 0-X) end}, + #meter{desc = "plain fun write two records with ets:insert (local only)", + init = fun bif_opt_write_records/2, + meter = fun(X) -> bif_write_records(X, 0-X) end}, + #meter{desc = "plain fun write two records with dets:insert (local only)", + init = fun dets_opt_write_records/2, + meter = fun(X) -> dets_write_records(X, 0-X) end}, + + #meter{desc = "transaction read two records with read", + init = fun write_records/2, + meter = fun(X) -> read_records(X, 0-X) end}, + #meter{desc = "sync_dirty read two records with read", + init = fun sync_dirty_write_records/2, + meter = fun(X) -> sync_dirty_read_records(X, 0-X) end}, + #meter{desc = "async_dirty read two records with read", + init = fun async_dirty_write_records/2, + meter = fun(X) -> async_dirty_read_records(X, 0-X) end}, + #meter{desc = "plain fun read two records with dirty_read", + init = fun dirty_write_records/2, + meter = fun(X) -> dirty_read_records(X, 0-X) end}, + #meter{desc = "ets read two records with read", + init = fun ets_opt_write_records/2, + meter = fun(X) -> ets_read_records(X, 0-X) end}, + #meter{desc = "plain fun read two records with ets:lookup", + init = fun bif_opt_write_records/2, + meter = fun(X) -> bif_read_records(X, 0-X) end}, + #meter{desc = "plain fun read two records with dets:lookup", + init = fun dets_opt_write_records/2, + meter = fun(X) -> dets_read_records(X, 0-X) end} + ]. + +update_fun(Name) -> + fun() -> + case mnesia:read({person, Name}) of + [] -> + mnesia:abort(no_such_person); + [Pers] -> + [Partner] = mnesia:read({person, Pers#person.married_to}), + mnesia:write(Pers#person{married_to = undefined}), + mnesia:write(Partner#person{married_to = undefined}) + end + end. + +update_records(Name) -> + mnesia:transaction(update_fun(Name)). + +sync_dirty_update_records(Name) -> + {atomic, mnesia:sync_dirty(update_fun(Name))}. + +async_dirty_update_records(Name) -> + {atomic, mnesia:async_dirty(update_fun(Name))}. + +ets_update_records(Name) -> + {atomic, mnesia:ets(update_fun(Name))}. + +w_update_records(Name) -> + F = fun() -> + case mnesia:wread({person, Name}) of + [] -> + mnesia:abort(no_such_person); + [Pers] -> + [Partner] = mnesia:wread({person, Pers#person.married_to}), + mnesia:write(Pers#person{married_to = undefined}), + mnesia:write(Partner#person{married_to = undefined}) + end + end, + mnesia:transaction(F). + +s_update_records(Name) -> + F = fun() -> + case mnesia:read({person, Name}) of + [] -> + mnesia:abort(no_such_person); + [Pers] -> + [Partner] = mnesia:read({person, Pers#person.married_to}), + mnesia:s_write(Pers#person{married_to = undefined}), + mnesia:s_write(Partner#person{married_to = undefined}) + end + end, + mnesia:transaction(F). + +dirty_update_records(Name) -> + case mnesia:dirty_read({person, Name}) of + [] -> + mnesia:abort(no_such_person); + [Pers] -> + [Partner] = mnesia:dirty_read({person, Pers#person.married_to}), + mnesia:dirty_write(Pers#person{married_to = undefined}), + mnesia:dirty_write(Partner#person{married_to = undefined}) + end, + {atomic, ok}. + +bif_update_records(Name) -> + case ets:lookup(person, Name) of + [] -> + mnesia:abort(no_such_person); + [Pers] -> + [Partner] = ets:lookup(person, Pers#person.married_to), + ets:insert(person, Pers#person{married_to = undefined}), + ets:insert(person, Partner#person{married_to = undefined}) + end, + {atomic, ok}. + +dets_update_records(Name) -> + case dets:lookup(person, Name) of + [] -> + mnesia:abort(no_such_person); + [Pers] -> + [Partner] = dets:lookup(person, Pers#person.married_to), + dets:insert(person, Pers#person{married_to = undefined}), + dets:insert(person, Partner#person{married_to = undefined}) + end, + {atomic, ok}. + +write_records_fun(Pers, Partner) -> + fun() -> + P = #person{children = [ulla, bella]}, + mnesia:write(P#person{name = Pers, married_to = Partner}), + mnesia:write(P#person{name = Partner, married_to = Pers}) + end. + +write_records(Pers, Partner) -> + mnesia:transaction(write_records_fun(Pers, Partner)). + +sync_dirty_write_records(Pers, Partner) -> + {atomic, mnesia:sync_dirty(write_records_fun(Pers, Partner))}. + +async_dirty_write_records(Pers, Partner) -> + {atomic, mnesia:async_dirty(write_records_fun(Pers, Partner))}. + +ets_write_records(Pers, Partner) -> + {atomic, mnesia:ets(write_records_fun(Pers, Partner))}. + +s_write_records(Pers, Partner) -> + F = fun() -> + P = #person{children = [ulla, bella]}, + mnesia:s_write(P#person{name = Pers, married_to = Partner}), + mnesia:s_write(P#person{name = Partner, married_to = Pers}) + end, + mnesia:transaction(F). + +dirty_write_records(Pers, Partner) -> + P = #person{children = [ulla, bella]}, + mnesia:dirty_write(P#person{name = Pers, married_to = Partner}), + mnesia:dirty_write(P#person{name = Partner, married_to = Pers}), + {atomic, ok}. + +ets_opt_write_records(Pers, Partner) -> + case mnesia:table_info(person, where_to_commit) of + [{N, ram_copies}] when N == node() -> + ets_write_records(Pers, Partner); + _ -> + throw(skipped) + end. + +bif_opt_write_records(Pers, Partner) -> + case mnesia:table_info(person, where_to_commit) of + [{N, ram_copies}] when N == node() -> + bif_write_records(Pers, Partner); + _ -> + throw(skipped) + end. + +bif_write_records(Pers, Partner) -> + P = #person{children = [ulla, bella]}, + ets:insert(person, P#person{name = Pers, married_to = Partner}), + ets:insert(person, P#person{name = Partner, married_to = Pers}), + {atomic, ok}. + +dets_opt_write_records(Pers, Partner) -> + case mnesia:table_info(person, where_to_commit) of + [{N, disc_only_copies}] when N == node() -> + dets_write_records(Pers, Partner); + _ -> + throw(skipped) + end. + +dets_write_records(Pers, Partner) -> + P = #person{children = [ulla, bella]}, + dets:insert(person, P#person{name = Pers, married_to = Partner}), + dets:insert(person, P#person{name = Partner, married_to = Pers}), + {atomic, ok}. + +read_records_fun(Pers, Partner) -> + fun() -> + case {mnesia:read({person, Pers}), + mnesia:read({person, Partner})} of + {[_], [_]} -> + ok; + _ -> + mnesia:abort(no_such_person) + end + end. + +read_records(Pers, Partner) -> + mnesia:transaction(read_records_fun(Pers, Partner)). + +sync_dirty_read_records(Pers, Partner) -> + {atomic, mnesia:sync_dirty(read_records_fun(Pers, Partner))}. + +async_dirty_read_records(Pers, Partner) -> + {atomic, mnesia:async_dirty(read_records_fun(Pers, Partner))}. + +ets_read_records(Pers, Partner) -> + {atomic, mnesia:ets(read_records_fun(Pers, Partner))}. + +dirty_read_records(Pers, Partner) -> + case {mnesia:dirty_read({person, Pers}), + mnesia:dirty_read({person, Partner})} of + {[_], [_]} -> + {atomic, ok}; + _ -> + mnesia:abort(no_such_person) + end. + +bif_read_records(Pers, Partner) -> + case {ets:lookup(person, Pers), + ets:lookup(person, Partner)} of + {[_], [_]} -> + {atomic, ok}; + _ -> + mnesia:abort(no_such_person) + end. + +dets_read_records(Pers, Partner) -> + case {dets:lookup(person, Pers), + dets:lookup(person, Partner)} of + {[_], [_]} -> + {atomic, ok}; + _ -> + mnesia:abort(no_such_person) + end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +run(Nodes, Config, FunOverhead) -> + Meters = meters(), + io:format("Run ~w meters with table config: ~w~n", [length(Meters), Config]), + rpc:multicall(Nodes, mnesia, lkill, []), + start(Nodes, Config), + Res = [run_meter(Data, Nodes, FunOverhead) || Data <- Meters], + stop(Nodes), + Res. + +run_meter(M, Nodes, FunOverhead) when record(M, meter) -> + io:format(".", []), + case catch init_records(M#meter.init, ?TIMES) of + {atomic, ok} -> + rpc:multicall(Nodes, mnesia, dump_log, []), + case tc(M#meter.meter, ?TIMES) of + {ok, Micros} -> + M#meter{micros = lists:max([0, Micros - FunOverhead])}; + {error, Reason} -> + M#meter{micros = Reason} + end; + Res -> + M#meter{micros = Res} + end. + +start(Nodes, Config) -> + mnesia:delete_schema(Nodes), + ok = mnesia:create_schema(Nodes), + Args = [[{dump_log_write_threshold, ?TIMES div 2}, + {dump_log_time_threshold, timer:hours(10)}]], + lists:foreach(fun(Node) -> rpc:call(Node, mnesia, start, Args) end, Nodes), + Attrs = record_info(fields, person), + TabDef = [{attributes, Attrs} | Config], + {atomic, _} = mnesia:create_table(person, TabDef). + +stop(Nodes) -> + rpc:multicall(Nodes, mnesia, stop, []). + +%% Generate some dummy persons +init_records(_Fun, 0) -> + {atomic, ok}; +init_records(Fun, Times) -> + {atomic, ok} = Fun(Times, 0 - Times), + init_records(Fun, Times - 1). + +tc(Fun, Times) -> + case catch timer:tc(?MODULE, repeat_meter, [Fun, Times]) of + {Micros, ok} -> + {ok, Micros div Times}; + {_Micros, {error, Reason}} -> + {error, Reason}; + {'EXIT', Reason} -> + {error, Reason} + end. + +%% The meter must return {atomic, ok} +repeat_meter(Meter, Times) -> + repeat_meter(Meter, {atomic, ok}, Times). + +repeat_meter(_, {atomic, ok}, 0) -> + ok; +repeat_meter(Meter, {atomic, _Result}, Times) when Times > 0 -> + repeat_meter(Meter, Meter(Times), Times - 1); +repeat_meter(_Meter, Reason, _Times) -> + {error, Reason}. + diff --git a/lib/mnesia/examples/mnesia_tpcb.erl b/lib/mnesia/examples/mnesia_tpcb.erl new file mode 100644 index 0000000000..903c53a21c --- /dev/null +++ b/lib/mnesia/examples/mnesia_tpcb.erl @@ -0,0 +1,1268 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 1996-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% MODULE +%% +%% mnesia_tpcb - TPC-B benchmarking of Mnesia +%% +%% DESCRIPTION +%% +%% The metrics used in the TPC-B benchmark are throughput as measured +%% in transactions per second (TPS). The benchmark uses a single, +%% simple update-intensive transaction to load the database system. +%% The single transaction type provides a simple, repeatable +%% unit of work, and is designed to exercise the basic components of +%% a database system. +%% +%% The definition of the TPC-B states lots of detailed rules and +%% conditions that must be fullfilled, e.g. how the ACID (atomicity, +%% consistency, isolation and durability) properties are verified, +%% how the random numbers must be distributed, minimum sizes of +%% the different types of records, minimum duration of the benchmark, +%% formulas to calculate prices (dollars per tps), disclosure issues +%% etc. Please, see http://www.tpc.org/ about the nitty gritty details. +%% +%% The TPC-B benchmark is stated in terms of a hypothetical bank. The +%% bank has one or more branches. Each branch has multiple tellers. The +%% bank has many customers, each with an account. The database represents +%% the cash position of each entity (branch, teller and account) and a +%% history of recent transactions run by the bank. The transaction +%% represents the work done when a customer makes a deposit or a +%% withdrawal against his account. The transaction is performed by a +%% teller at some branch. +%% +%% Each process that performs TPC-B transactions is called a driver. +%% Drivers generates teller_id, account_id and delta amount of +%% money randomly. An account, a teller and a branch are read, their +%% balances are adjusted and a history record is created. The driver +%% measures the time for 3 reads, 3 writes and 1 create. +%% +%% GETTING STARTED +%% +%% Generate tables and run with default configuration: +%% +%% mnesia_tpcb:start(). +%% +%% A little bit more advanced; +%% +%% spawn(mnesia_tpcb, start, [[[{n_drivers_per_node, 8}, {stop_after, infinity}]]), +%% mnesia_tpcb:stop(). +%% +%% Really advanced; +%% +%% mnesia_tpcb:init(([{n_branches, 8}, {replica_type, disc_only_copies}]), +%% mnesia_tpcb:run(([{n_drivers_per_node, 8}]), +%% mnesia_tpcb:run(([{n_drivers_per_node, 64}]). +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +-module(mnesia_tpcb). +-author('[email protected]'). + +-export([ + config/2, + count_balance/0, + driver_init/2, + init/1, + reporter_init/2, + run/1, + start/0, + start/1, + start/2, + stop/0, + real_trans/5, + verify_tabs/0, + reply_gen_branch/3, + frag_add_delta/7, + + conflict_test/1, + dist_test/1, + replica_test/1, + sticky_replica_test/1, + remote_test/1, + remote_frag2_test/1 + ]). + +-define(SECOND, 1000000). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%% Account record, total size must be at least 100 bytes + +-define(ACCOUNT_FILLER, + {123456789012345678901234567890123456789012345678901234567890, + 123456789012345678901234567890123456789012345678901234567890, + 123456789012345678901234567890123456789012345678901234}). + +-record(account, + { + id = 0, % Unique account id + branch_id = 0, % Branch where the account is held + balance = 0, % Account balance + filler = ?ACCOUNT_FILLER % Gap filler to ensure size >= 100 bytes + }). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%% Branch record, total size must be at least 100 bytes + +-define(BRANCH_FILLER, + {123456789012345678901234567890123456789012345678901234567890, + 123456789012345678901234567890123456789012345678901234567890, + 123456789012345678901234567890123456789012345678901234567890}). + +-record(branch, + { + id = 0, % Unique branch id + balance = 0, % Total balance of whole branch + filler = ?BRANCH_FILLER % Gap filler to ensure size >= 100 bytes + }). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%% Teller record, total size must be at least 100 bytes + +-define(TELLER_FILLER, + {123456789012345678901234567890123456789012345678901234567890, + 123456789012345678901234567890123456789012345678901234567890, + 1234567890123456789012345678901234567890123456789012345678}). + +-record(teller, + { + id = 0, % Unique teller id + branch_id = 0, % Branch where the teller is located + balance = 0, % Teller balance + filler = ?TELLER_FILLER % Gap filler to ensure size >= 100 bytes + }). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%% History record, total size must be at least 50 bytes + +-define(HISTORY_FILLER, 1234567890). + +-record(history, + { + history_id = {0, 0}, % {DriverId, DriverLocalHistoryid} + time_stamp = now(), % Time point during active transaction + branch_id = 0, % Branch associated with teller + teller_id = 0, % Teller invlolved in transaction + account_id = 0, % Account updated by transaction + amount = 0, % Amount (delta) specified by transaction + filler = ?HISTORY_FILLER % Gap filler to ensure size >= 50 bytes + }). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +-record(tab_config, + { + db_nodes = [node()], + n_replicas = 1, % Ignored for non-fragmented tables + replica_nodes = [node()], + replica_type = ram_copies, + use_running_mnesia = false, + n_fragments = 0, + n_branches = 1, + n_tellers_per_branch = 10, % Must be 10 + n_accounts_per_branch = 100000, % Must be 100000 + branch_filler = ?BRANCH_FILLER, + account_filler = ?ACCOUNT_FILLER, + teller_filler = ?TELLER_FILLER + }). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +-record(run_config, + { + driver_nodes = [node()], + n_drivers_per_node = 1, + use_running_mnesia = false, + stop_after = timer:minutes(15), % Minimum 15 min + report_interval = timer:minutes(1), + use_sticky_locks = false, + spawn_near_branch = false, + activity_type = transaction, + reuse_history_id = false + }). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +-record(time, + { + n_trans = 0, + min_n = 0, + max_n = 0, + acc_time = 0, + max_time = 0 + }). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +-record(driver_state, + { + driver_id, + driver_node, + seed, + n_local_branches, + local_branches, + tab_config, + run_config, + history_id, + time = #time{}, + acc_time = #time{}, + reuse_history_id + }). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +-record(reporter_state, + { + driver_pids, + starter_pid, + n_iters = 0, + prev_tps = 0, + curr = #time{}, + acc = #time{}, + init_micros, + prev_micros, + run_config + }). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% One driver on each node, table not replicated + +config(frag_test, ReplicaType) -> + Remote = nodes(), + Local = node(), + Nodes = [Local | Remote], + [ + {n_branches, length(Nodes)}, + {n_fragments, length(Nodes)}, + {replica_nodes, Nodes}, + {db_nodes, Nodes}, + {driver_nodes, Nodes}, + {n_accounts_per_branch, 100}, + {replica_type, ReplicaType}, + {stop_after, timer:minutes(1)}, + {report_interval, timer:seconds(10)}, + {reuse_history_id, true} + ]; + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% One driver on each node, table replicated to two nodes. + +config(frag2_test, ReplicaType) -> + Remote = nodes(), + Local = node(), + Nodes = [Local | Remote], + [ + {n_branches, length(Nodes)}, + {n_fragments, length(Nodes)}, + {n_replicas, 2}, + {replica_nodes, Nodes}, + {db_nodes, Nodes}, + {driver_nodes, Nodes}, + {n_accounts_per_branch, 100}, + {replica_type, ReplicaType}, + {stop_after, timer:minutes(1)}, + {report_interval, timer:seconds(10)}, + {reuse_history_id, true} + ]; + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% One driver on this node, table replicated to all nodes. + +config(replica_test, ReplicaType) -> + Remote = nodes(), + Local = node(), + Nodes = [Local | Remote], + [ + {db_nodes, Nodes}, + {driver_nodes, [Local]}, + {replica_nodes, Nodes}, + {n_accounts_per_branch, 100}, + {replica_type, ReplicaType}, + {stop_after, timer:minutes(1)}, + {report_interval, timer:seconds(10)}, + {reuse_history_id, true} + ]; + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% One driver on this node, table replicated to all nodes. + +config(sticky_replica_test, ReplicaType) -> + Remote = nodes(), + Local = node(), + Nodes = [Local | Remote], + [ + {db_nodes, Nodes}, + {driver_nodes, [node()]}, + {replica_nodes, Nodes}, + {n_accounts_per_branch, 100}, + {replica_type, ReplicaType}, + {use_sticky_locks, true}, + {stop_after, timer:minutes(1)}, + {report_interval, timer:seconds(10)}, + {reuse_history_id, true} + ]; + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Ten drivers per node, tables replicated to all nodes, lots of branches + +config(dist_test, ReplicaType) -> + Remote = nodes(), + Local = node(), + Nodes = [Local | Remote], + [ + {db_nodes, Nodes}, + {driver_nodes, Nodes}, + {replica_nodes, Nodes}, + {n_drivers_per_node, 10}, + {n_branches, 10 * length(Nodes) * 100}, + {n_accounts_per_branch, 10}, + {replica_type, ReplicaType}, + {stop_after, timer:minutes(1)}, + {report_interval, timer:seconds(10)}, + {reuse_history_id, true} + ]; + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Ten drivers per node, tables replicated to all nodes, single branch + +config(conflict_test, ReplicaType) -> + Remote = nodes(), + Local = node(), + Nodes = [Local | Remote], + [ + {db_nodes, Nodes}, + {driver_nodes, Nodes}, + {replica_nodes, Nodes}, + {n_drivers_per_node, 10}, + {n_branches, 1}, + {n_accounts_per_branch, 10}, + {replica_type, ReplicaType}, + {stop_after, timer:minutes(1)}, + {report_interval, timer:seconds(10)}, + {reuse_history_id, true} + ]; + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% One driver on this node, table replicated to all other nodes. + +config(remote_test, ReplicaType) -> + Remote = nodes(), + Local = node(), + Nodes = [Local | Remote], + [ + {db_nodes, Nodes}, + {driver_nodes, [Local]}, + {replica_nodes, Remote}, + {n_accounts_per_branch, 100}, + {replica_type, ReplicaType}, + {stop_after, timer:minutes(1)}, + {report_interval, timer:seconds(10)}, + {reuse_history_id, true} + ]; + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% One driver on this node, table replicated to two other nodes. + +config(remote_frag2_test, ReplicaType) -> + Remote = nodes(), + Local = node(), + Nodes = [Local | Remote], + [ + {n_branches, length(Remote)}, + {n_fragments, length(Remote)}, + {n_replicas, 2}, + {replica_nodes, Remote}, + {db_nodes, Nodes}, + {driver_nodes, [Local]}, + {n_accounts_per_branch, 100}, + {replica_type, ReplicaType}, + {stop_after, timer:minutes(1)}, + {report_interval, timer:seconds(10)}, + {reuse_history_id, true} + ]. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +start(What, ReplicaType) -> + spawn_link(?MODULE, start, [config(What, ReplicaType)]). + +replica_test(ReplicaType) -> + start(replica_test, ReplicaType). + +sticky_replica_test(ReplicaType) -> + start(sticky_replica_test, ReplicaType). + +dist_test(ReplicaType) -> + start(dist_test, ReplicaType). + +conflict_test(ReplicaType) -> + start(conflict_test, ReplicaType). + +remote_test(ReplicaType) -> + start(remote_test, ReplicaType). + +remote_frag2_test(ReplicaType) -> + start(remote_frag2_test, ReplicaType). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Args is a list of {Key, Val} tuples where Key is a field name +%% in either the record tab_config or run_config. Unknown keys are ignored. + +start() -> + start([]). +start(Args) -> + init(Args), + run(Args). + +list2rec(List, Fields, DefaultTuple) -> + [Name|Defaults] = tuple_to_list(DefaultTuple), + List2 = list2rec(List, Fields, Defaults, []), + list_to_tuple([Name] ++ List2). + +list2rec(_List, [], [], Acc) -> + Acc; +list2rec(List, [F|Fields], [D|Defaults], Acc) -> + {Val, List2} = + case lists:keysearch(F, 1, List) of + false -> + {D, List}; + {value, {F, NewVal}} -> + {NewVal, lists:keydelete(F, 1, List)} + end, + list2rec(List2, Fields, Defaults, Acc ++ [Val]). + +stop() -> + case whereis(mnesia_tpcb) of + undefined -> + {error, not_running}; + Pid -> + sync_stop(Pid) + end. + +sync_stop(Pid) -> + Pid ! {self(), stop}, + receive + {Pid, {stopped, Res}} -> Res + after timer:minutes(1) -> + exit(Pid, kill), + {error, brutal_kill} + end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%% Initialization + +%% Args is a list of {Key, Val} tuples where Key is a field name +%% in the record tab_config, unknown keys are ignored. + +init(Args) -> + TabConfig0 = list2rec(Args, record_info(fields, tab_config), #tab_config{}), + TabConfig = + if + TabConfig0#tab_config.n_fragments =:= 0 -> + TabConfig0#tab_config{n_replicas = length(TabConfig0#tab_config.replica_nodes)}; + true -> + TabConfig0 + end, + Tags = record_info(fields, tab_config), + Fun = fun(F, Pos) -> {{F, element(Pos, TabConfig)}, Pos + 1} end, + {List, _} = lists:mapfoldl(Fun, 2, Tags), + io:format("TPC-B: Table config: ~p ~n", [List]), + + DbNodes = TabConfig#tab_config.db_nodes, + stop(), + if + TabConfig#tab_config.use_running_mnesia =:= true -> + ignore; + true -> + rpc:multicall(DbNodes, mnesia, lkill, []), + case mnesia:delete_schema(DbNodes) of + ok -> + case mnesia:create_schema(DbNodes) of + ok -> + {Replies, BadNodes} = + rpc:multicall(DbNodes, mnesia, start, []), + case [Res || Res <- Replies, Res =/= ok] of + [] when BadNodes =:= [] -> + ok; + BadRes -> + io:format("TPC-B: <ERROR> " + "Failed to start ~p: ~p~n", + [BadNodes, BadRes]), + exit({start_failed, BadRes, BadNodes}) + end; + {error, Reason} -> + io:format("TPC-B: <ERROR> " + "Failed to create schema on disc: ~p~n", + [Reason]), + exit({create_schema_failed, Reason}) + end; + {error, Reason} -> + io:format("TPC-B: <ERROR> " + "Failed to delete schema on disc: ~p~n", + [Reason]), + exit({delete_schema_failed, Reason}) + end + end, + gen_tabs(TabConfig). + +gen_tabs(TC) -> + create_tab(TC, branch, record_info(fields, branch), + undefined), + create_tab(TC, account, record_info(fields, account), + {branch, #account.branch_id}), + create_tab(TC, teller, record_info(fields, teller), + {branch, #teller.branch_id}), + create_tab(TC, history, record_info(fields, history), + {branch, #history.branch_id}), + + NB = TC#tab_config.n_branches, + NT = TC#tab_config.n_tellers_per_branch, + NA = TC#tab_config.n_accounts_per_branch, + io:format("TPC-B: Generating ~p branches a ~p bytes~n", + [NB, size(term_to_binary(default_branch(TC)))]), + io:format("TPC-B: Generating ~p * ~p tellers a ~p bytes~n", + [NB, NT, size(term_to_binary(default_teller(TC)))]), + io:format("TPC-B: Generating ~p * ~p accounts a ~p bytes~n", + [NB, NA, size(term_to_binary(default_account(TC)))]), + io:format("TPC-B: Generating 0 history records a ~p bytes~n", + [size(term_to_binary(default_history(TC)))]), + gen_branches(TC), + + case verify_tabs() of + ok -> + ignore; + {error, Reason} -> + io:format("TPC-B: <ERROR> Inconsistent tables: ~w~n", + [Reason]), + exit({inconsistent_tables, Reason}) + end. + +create_tab(TC, Name, Attrs, _ForeignKey) when TC#tab_config.n_fragments =:= 0 -> + Nodes = TC#tab_config.replica_nodes, + Type = TC#tab_config.replica_type, + Def = [{Type, Nodes}, {attributes, Attrs}], + create_tab(Name, Def); +create_tab(TC, Name, Attrs, ForeignKey) -> + NReplicas = TC#tab_config.n_replicas, + NodePool = TC#tab_config.replica_nodes, + Type = TC#tab_config.replica_type, + NF = TC#tab_config.n_fragments, + Props = [{n_fragments, NF}, + {node_pool, NodePool}, + {n_copies(Type), NReplicas}, + {foreign_key, ForeignKey}], + Def = [{frag_properties, Props}, + {attributes, Attrs}], + create_tab(Name, Def). + +create_tab(Name, Def) -> + mnesia:delete_table(Name), + case mnesia:create_table(Name, Def) of + {atomic, ok} -> + ok; + {aborted, Reason} -> + io:format("TPC-B: <ERROR> failed to create table ~w ~w: ~p~n", + [Name, Def, Reason]), + exit({create_table_failed, Reason}) + end. + +n_copies(Type) -> + case Type of + ram_copies -> n_ram_copies; + disc_copies -> n_disc_copies; + disc_only_copies -> n_disc_only_copies + end. + +gen_branches(TC) -> + First = 0, + Last = First + TC#tab_config.n_branches - 1, + GenPids = gen_branches(TC, First, Last, []), + wait_for_gen(GenPids). + +wait_for_gen([]) -> + ok; +wait_for_gen(Pids) -> + receive + {branch_generated, Pid} -> wait_for_gen(lists:delete(Pid, Pids)); + Exit -> + exit({tpcb_failed, Exit}) + end. + +gen_branches(TC, BranchId, Last, UsedNs) when BranchId =< Last -> + UsedNs2 = get_branch_nodes(BranchId, UsedNs), + Node = hd(UsedNs2), + Pid = spawn_link(Node, ?MODULE, reply_gen_branch, + [self(), TC, BranchId]), + [Pid | gen_branches(TC, BranchId + 1, Last, UsedNs2)]; +gen_branches(_, _, _, _) -> + []. + +reply_gen_branch(ReplyTo, TC, BranchId) -> + gen_branch(TC, BranchId), + ReplyTo ! {branch_generated, self()}, + unlink(ReplyTo). + +%% Returns a new list of nodes with the best node as head +get_branch_nodes(BranchId, UsedNs) -> + WriteNs = table_info({branch, BranchId}, where_to_write), + WeightedNs = [{n_duplicates(N, UsedNs, 0), N} || N <- WriteNs], + [{_, LeastUsed} | _ ] = lists:sort(WeightedNs), + [LeastUsed | UsedNs]. + +n_duplicates(_N, [], Count) -> + Count; +n_duplicates(N, [N | Tail], Count) -> + n_duplicates(N, Tail, Count + 1); +n_duplicates(N, [_ | Tail], Count) -> + n_duplicates(N, Tail, Count). + +gen_branch(TC, BranchId) -> + A = default_account(TC), + NA = TC#tab_config.n_accounts_per_branch, + FirstA = BranchId * NA, + ArgsA = [FirstA, FirstA + NA - 1, BranchId, A], + ok = mnesia:activity(async_dirty, fun gen_accounts/4, ArgsA, mnesia_frag), + + T = default_teller(TC), + NT = TC#tab_config.n_tellers_per_branch, + FirstT = BranchId * NT, + ArgsT = [FirstT, FirstT + NT - 1, BranchId, T], + ok = mnesia:activity(async_dirty, fun gen_tellers/4, ArgsT, mnesia_frag), + + B = default_branch(TC), + FunB = fun() -> mnesia:write(branch, B#branch{id = BranchId}, write) end, + ok = mnesia:activity(sync_dirty, FunB, [], mnesia_frag). + +gen_tellers(Id, Last, BranchId, T) when Id =< Last -> + mnesia:write(teller, T#teller{id = Id, branch_id=BranchId}, write), + gen_tellers(Id + 1, Last, BranchId, T); +gen_tellers(_, _, _, _) -> + ok. + +gen_accounts(Id, Last, BranchId, A) when Id =< Last -> + mnesia:write(account, A#account{id = Id, branch_id=BranchId}, write), + gen_accounts(Id + 1, Last, BranchId, A); +gen_accounts(_, _, _, _) -> + ok. + +default_branch(TC) -> #branch{filler = TC#tab_config.branch_filler}. +default_teller(TC) -> #teller{filler = TC#tab_config.teller_filler}. +default_account(TC) -> #account{filler = TC#tab_config.account_filler}. +default_history(_TC) -> #history{}. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%% Run the benchmark + +%% Args is a list of {Key, Val} tuples where Key is a field name +%% in the record run_config, unknown keys are ignored. +run(Args) -> + RunConfig = list2rec(Args, record_info(fields, run_config), #run_config{}), + Tags = record_info(fields, run_config), + Fun = fun(F, Pos) -> {{F, element(Pos, RunConfig)}, Pos + 1} end, + {List, _} = lists:mapfoldl(Fun, 2, Tags), + io:format("TPC-B: Run config: ~p ~n", [List]), + + Pid = spawn_link(?MODULE, reporter_init, [self(), RunConfig]), + receive + {Pid, {stopped, Res}} -> + Res; % Stopped by other process + Else -> + {tpcb_got, Else} + after RunConfig#run_config.stop_after -> + sync_stop(Pid) + end. + +reporter_init(Starter, RC) -> + register(mnesia_tpcb, self()), + process_flag(trap_exit, true), + DbNodes = mnesia:system_info(db_nodes), + if + RC#run_config.use_running_mnesia =:= true -> + ignore; + true -> + {Replies, BadNodes} = + rpc:multicall(DbNodes, mnesia, start, []), + case [Res || Res <- Replies, Res =/= ok] of + [] when BadNodes =:= [] -> + ok; + BadRes -> + io:format("TPC-B: <ERROR> " + "Failed to start ~w: ~p~n", + [BadNodes, BadRes]), + exit({start_failed, BadRes, BadNodes}) + end, + verify_tabs() + end, + + N = table_info(branch, size), + NT = table_info(teller, size) div N, + NA = table_info(account, size) div N, + + {Type, NF, RepNodes} = table_storage(branch), + TC = #tab_config{n_fragments = NF, + n_branches = N, + n_tellers_per_branch = NT, + n_accounts_per_branch = NA, + db_nodes = DbNodes, + replica_nodes = RepNodes, + replica_type = Type + }, + Drivers = start_drivers(RC, TC), + Now = now_to_micros(erlang:now()), + State = #reporter_state{driver_pids = Drivers, + run_config = RC, + starter_pid = Starter, + init_micros = Now, + prev_micros = Now + }, + case catch reporter_loop(State) of + {'EXIT', Reason} -> + io:format("TPC-B: Abnormal termination: ~p~n", [Reason]), + if + RC#run_config.use_running_mnesia =:= true -> + ignore; + true -> + rpc:multicall(DbNodes, mnesia, lkill, []) + end, + unlink(Starter), + Starter ! {self(), {stopped, {error, Reason}}}, % To be sure + exit(shutdown); + {ok, Stopper, State2} -> + Time = State2#reporter_state.acc, + Res = + case verify_tabs() of + ok -> + {ok, Time}; + {error, Reason} -> + io:format("TPC-B: <ERROR> Inconsistent tables, ~p~n", + [{error, Reason}]), + {error, Reason} + end, + if + RC#run_config.use_running_mnesia =:= true -> + ignore; + true -> + rpc:multicall(DbNodes, mnesia, stop, []) + end, + unlink(Starter), + Starter ! {self(), {stopped, Res}}, + if + Stopper =/= Starter -> + Stopper ! {self(), {stopped, Res}}; + true -> + ignore + end, + exit(shutdown) + end. + +table_info(Tab, Item) -> + Fun = fun() -> mnesia:table_info(Tab, Item) end, + mnesia:activity(sync_dirty, Fun, mnesia_frag). + +%% Returns {Storage, NFragments, ReplicaNodes} +table_storage(Tab) -> + case mnesia:table_info(branch, frag_properties) of + [] -> + NFO = 0, + NR = length(mnesia:table_info(Tab, ram_copies)), + ND = length(mnesia:table_info(Tab, disc_copies)), + NDO = length(mnesia:table_info(Tab, disc_only_copies)), + if + NR =/= 0 -> {ram_copies, NFO, NR}; + ND =/= 0 -> {disc_copies, NFO, ND}; + NDO =/= 0 -> {disc_copies, NFO, NDO} + end; + Props -> + {value, NFO} = lists:keysearch(n_fragments, 1, Props), + NR = table_info(Tab, n_ram_copies), + ND = table_info(Tab, n_disc_copies), + NDO = table_info(Tab, n_disc_only_copies), + if + NR =/= 0 -> {ram_copies, NFO, NR}; + ND =/= 0 -> {disc_copies, NFO, ND}; + NDO =/= 0 -> {disc_copies, NFO, NDO} + end + end. + +reporter_loop(State) -> + RC = State#reporter_state.run_config, + receive + {From, stop} -> + {ok, From, call_drivers(State, stop)}; + {'EXIT', Pid, Reason} when Pid =:= State#reporter_state.starter_pid -> + %% call_drivers(State, stop), + exit({starter_died, Pid, Reason}) + after RC#run_config.report_interval -> + Iters = State#reporter_state.n_iters, + State2 = State#reporter_state{n_iters = Iters + 1}, + case call_drivers(State2, report) of + State3 when State3#reporter_state.driver_pids =/= [] -> + State4 = State3#reporter_state{curr = #time{}}, + reporter_loop(State4); + _ -> + exit(drivers_died) + end + end. + +call_drivers(State, Msg) -> + Drivers = State#reporter_state.driver_pids, + lists:foreach(fun(Pid) -> Pid ! {self(), Msg} end, Drivers), + State2 = show_report(calc_reports(Drivers, State)), + case Msg =:= stop of + true -> + Acc = State2#reporter_state.acc, + Init = State2#reporter_state.init_micros, + show_report(State2#reporter_state{n_iters = 0, + curr = Acc, + prev_micros = Init}); + false -> + ignore + end, + State2. + +calc_reports([], State) -> + State; +calc_reports([Pid|Drivers], State) -> + receive + {'EXIT', P, Reason} when P =:= State#reporter_state.starter_pid -> + exit({starter_died, P, Reason}); + {'EXIT', Pid, Reason} -> + exit({driver_died, Pid, Reason}); + {Pid, Time} when is_record(Time, time) -> + %% io:format("~w: ~w~n", [Pid, Time]), + A = add_time(State#reporter_state.acc, Time), + C = add_time(State#reporter_state.curr, Time), + State2 = State#reporter_state{acc = A, curr = C}, + calc_reports(Drivers, State2) + end. + +add_time(Acc, New) -> + Acc#time{n_trans = New#time.n_trans + Acc#time.n_trans, + min_n = lists:min([New#time.n_trans, Acc#time.min_n] -- [0]), + max_n = lists:max([New#time.n_trans, Acc#time.max_n]), + acc_time = New#time.acc_time + Acc#time.acc_time, + max_time = lists:max([New#time.max_time, Acc#time.max_time])}. + +-define(AVOID_DIV_ZERO(_What_), try (_What_) catch _:_ -> 0 end). + +show_report(State) -> + Now = now_to_micros(erlang:now()), + Iters = State#reporter_state.n_iters, + Time = State#reporter_state.curr, + Max = Time#time.max_time, + N = Time#time.n_trans, + Avg = ?AVOID_DIV_ZERO(Time#time.acc_time div N), + AliveN = length(State#reporter_state.driver_pids), + Tps = ?AVOID_DIV_ZERO((?SECOND * AliveN) div Avg), + PrevTps= State#reporter_state.prev_tps, + {DiffSign, DiffTps} = signed_diff(Iters, Tps, PrevTps), + Unfairness = ?AVOID_DIV_ZERO(Time#time.max_n / Time#time.min_n), + BruttoAvg = ?AVOID_DIV_ZERO((Now - State#reporter_state.prev_micros) div N), +%% io:format("n_iters=~p, n_trans=~p, n_drivers=~p, avg=~p, now=~p, prev=~p~n", +%% [Iters, N, AliveN, BruttoAvg, Now, State#reporter_state.prev_micros]), + BruttoTps = ?AVOID_DIV_ZERO(?SECOND div BruttoAvg), + case Iters > 0 of + true -> + io:format("TPC-B: ~p iter ~s~p diff ~p (~p) tps ~p avg micros ~p max micros ~p unfairness~n", + [Iters, DiffSign, DiffTps, Tps, BruttoTps, Avg, Max, Unfairness]); + false -> + io:format("TPC-B: ~p (~p) transactions per second, " + "duration of longest transaction was ~p milliseconds~n", + [Tps, BruttoTps, Max div 1000]) + end, + State#reporter_state{prev_tps = Tps, prev_micros = Now}. + +signed_diff(Iters, Curr, Prev) -> + case Iters > 1 of + true -> sign(Curr - Prev); + false -> sign(0) + end. + +sign(N) when N > 0 -> {"+", N}; +sign(N) -> {"", N}. + +now_to_micros({Mega, Secs, Micros}) -> + DT = calendar:now_to_datetime({Mega, Secs, 0}), + S = calendar:datetime_to_gregorian_seconds(DT), + (S * ?SECOND) + Micros. + +start_drivers(RC, TC) -> + LastHistoryId = table_info(history, size), + Reuse = RC#run_config.reuse_history_id, + DS = #driver_state{tab_config = TC, + run_config = RC, + n_local_branches = 0, + local_branches = [], + history_id = LastHistoryId, + reuse_history_id = Reuse}, + Nodes = RC#run_config.driver_nodes, + NB = TC#tab_config.n_branches, + First = 0, + AllBranches = lists:seq(First, First + NB - 1), + ND = RC#run_config.n_drivers_per_node, + Spawn = fun(Spec) -> + Node = Spec#driver_state.driver_node, + spawn_link(Node, ?MODULE, driver_init, [Spec, AllBranches]) + end, + Specs = [DS#driver_state{driver_id = Id, driver_node = N} + || N <- Nodes, + Id <- lists:seq(1, ND)], + Specs2 = lists:sort(lists:flatten(Specs)), + {Specs3, OrphanBranches} = alloc_local_branches(AllBranches, Specs2, []), + case length(OrphanBranches) of + N when N =< 10 -> + io:format("TPC-B: Orphan branches: ~p~n", [OrphanBranches]); + N -> + io:format("TPC-B: Orphan branches: ~p~n", [N]) + end, + [Spawn(Spec) || Spec <- Specs3]. + +alloc_local_branches([BranchId | Tail], Specs, OrphanBranches) -> + Nodes = table_info({branch, BranchId}, where_to_write), + LocalSpecs = [DS || DS <- Specs, + lists:member(DS#driver_state.driver_node, Nodes)], + case lists:keysort(#driver_state.n_local_branches, LocalSpecs) of + [] -> + alloc_local_branches(Tail, Specs, [BranchId | OrphanBranches]); + [DS | _] -> + LocalNB = DS#driver_state.n_local_branches + 1, + LocalBranches = [BranchId | DS#driver_state.local_branches], + DS2 = DS#driver_state{n_local_branches = LocalNB, + local_branches = LocalBranches}, + Specs2 = Specs -- [DS], + Specs3 = [DS2 | Specs2], + alloc_local_branches(Tail, Specs3, OrphanBranches) + end; +alloc_local_branches([], Specs, OrphanBranches) -> + {Specs, OrphanBranches}. + +driver_init(DS, AllBranches) -> + Seed = erlang:now(), + DS2 = + if + DS#driver_state.n_local_branches =:= 0 -> + DS#driver_state{seed = Seed, + n_local_branches = length(AllBranches), + local_branches = AllBranches}; + true -> + DS#driver_state{seed = Seed} + end, + io:format("TPC-B: Driver ~p started as ~p on node ~p with ~p local branches~n", + [DS2#driver_state.driver_id, self(), node(), DS2#driver_state.n_local_branches]), + driver_loop(DS2). + +driver_loop(DS) -> + receive + {From, report} -> + From ! {self(), DS#driver_state.time}, + Acc = add_time(DS#driver_state.time, DS#driver_state.acc_time), + DS2 = DS#driver_state{time=#time{}, acc_time = Acc}, % Reset timer + DS3 = calc_trans(DS2), + driver_loop(DS3); + {From, stop} -> + Acc = add_time(DS#driver_state.time, DS#driver_state.acc_time), + io:format("TPC-B: Driver ~p (~p) on node ~p stopped: ~w~n", + [DS#driver_state.driver_id, self(), node(self()), Acc]), + From ! {self(), DS#driver_state.time}, + unlink(From), + exit(stopped) + after 0 -> + DS2 = calc_trans(DS), + driver_loop(DS2) + end. + +calc_trans(DS) -> + {Micros, DS2} = time_trans(DS), + Time = DS2#driver_state.time, + Time2 = Time#time{n_trans = Time#time.n_trans + 1, + acc_time = Time#time.acc_time + Micros, + max_time = lists:max([Micros, Time#time.max_time]) + }, + case DS#driver_state.reuse_history_id of + false -> + HistoryId = DS#driver_state.history_id + 1, + DS2#driver_state{time=Time2, history_id = HistoryId}; + true -> + DS2#driver_state{time=Time2} + end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%% Generate teller_id, account_id and delta +%% Time the TPC-B transaction +time_trans(DS) -> + OldSeed = get(random_seed), % Avoid interference with Mnesia + put(random_seed, DS#driver_state.seed), + Random = random:uniform(), + NewSeed = get(random_seed), + case OldSeed of + undefined -> erase(random_seed); + _ -> put(random_seed, OldSeed) + end, + + TC = DS#driver_state.tab_config, + RC = DS#driver_state.run_config, + {Branchid, Args} = random_to_args(Random, DS), + {Fun, Mod} = trans_type(TC, RC), + {Time, Res} = timer:tc(?MODULE, real_trans, [RC, Branchid, Fun, Args, Mod]), + + case Res of + AccountBal when is_integer(AccountBal) -> + {Time, DS#driver_state{seed = NewSeed}}; + Other -> + exit({crash, Other, Args, Random, DS}) + end. + +random_to_args(Random, DS) -> + DriverId = DS#driver_state.driver_id, + TC = DS#driver_state.tab_config, + HistoryId = DS#driver_state.history_id, + Delta = trunc(Random * 1999998) - 999999, % -999999 <= Delta <= +999999 + + Branches = DS#driver_state.local_branches, + NB = DS#driver_state.n_local_branches, + NT = TC#tab_config.n_tellers_per_branch, + NA = TC#tab_config.n_accounts_per_branch, + Tmp = trunc(Random * NB * NT), + BranchPos = (Tmp div NT) + 1, + BranchId = + case TC#tab_config.n_fragments of + 0 -> BranchPos - 1; + _ -> lists:nth(BranchPos, Branches) + end, + RelativeTellerId = Tmp div NT, + TellerId = (BranchId * NT) + RelativeTellerId, + {AccountBranchId, AccountId} = + if + Random >= 0.85, NB > 1 -> + %% Pick from a remote account + TmpAccountId= trunc(Random * (NB - 1) * NA), + TmpAccountBranchId = TmpAccountId div NA, + if + TmpAccountBranchId =:= BranchId -> + {TmpAccountBranchId + 1, TmpAccountId + NA}; + true -> + {TmpAccountBranchId, TmpAccountId} + end; + true -> + %% Pick from a local account + RelativeAccountId = trunc(Random * NA), + TmpAccountId = (BranchId * NA) + RelativeAccountId, + {BranchId, TmpAccountId} + end, + + {BranchId, [DriverId, BranchId, TellerId, AccountBranchId, AccountId, HistoryId, Delta]}. + +real_trans(RC, BranchId, Fun, Args, Mod) -> + Type = RC#run_config.activity_type, + case RC#run_config.spawn_near_branch of + false -> + mnesia:activity(Type, Fun, Args, Mod); + true -> + Node = table_info({branch, BranchId}, where_to_read), + case rpc:call(Node, mnesia, activity, [Type, Fun, Args, Mod]) of + {badrpc, Reason} -> exit(Reason); + Other -> Other + end + end. + +trans_type(TC, RC) -> + if + TC#tab_config.n_fragments =:= 0, + RC#run_config.use_sticky_locks =:= false -> + {fun add_delta/7, mnesia}; + TC#tab_config.n_fragments =:= 0, + RC#run_config.use_sticky_locks =:= true -> + {fun sticky_add_delta/7, mnesia}; + TC#tab_config.n_fragments > 0, + RC#run_config.use_sticky_locks =:= false -> + {fun frag_add_delta/7, mnesia_frag} + end. + +%% +%% Runs the TPC-B defined transaction and returns NewAccountBalance +%% + +add_delta(DriverId, BranchId, TellerId, _AccountBranchId, AccountId, HistoryId, Delta) -> + %% Grab write lock already when the record is read + + %% Add delta to branch balance + [B] = mnesia:read(branch, BranchId, write), + NewB = B#branch{balance = B#branch.balance + Delta}, + ok = mnesia:write(branch, NewB, write), + + %% Add delta to teller balance + [T] = mnesia:read(teller, TellerId, write), + NewT = T#teller{balance = T#teller.balance + Delta}, + ok = mnesia:write(teller, NewT, write), + + %% Add delta to account balance + [A] = mnesia:read(account, AccountId, write), + NewA = A#account{balance = A#account.balance + Delta}, + ok = mnesia:write(account, NewA, write), + + %% Append to history log + History = #history{history_id = {DriverId, HistoryId}, + account_id = AccountId, + teller_id = TellerId, + branch_id = BranchId, + amount = Delta + }, + ok = mnesia:write(history, History, write), + + %% Return account balance + NewA#account.balance. + +sticky_add_delta(DriverId, BranchId, TellerId, _AccountBranchId, AccountId, HistoryId, Delta) -> + %% Grab orinary read lock when the record is read + %% Grab sticky write lock when the record is written + %% This transaction would benefit of an early stick_write lock at read + + %% Add delta to branch balance + [B] = mnesia:read(branch, BranchId, read), + NewB = B#branch{balance = B#branch.balance + Delta}, + ok = mnesia:write(branch, NewB, sticky_write), + + %% Add delta to teller balance + [T] = mnesia:read(teller, TellerId, read), + NewT = T#teller{balance = T#teller.balance + Delta}, + ok = mnesia:write(teller, NewT, sticky_write), + + %% Add delta to account balance + [A] = mnesia:read(account, AccountId, read), + NewA = A#account{balance = A#account.balance + Delta}, + ok = mnesia:write(account, NewA, sticky_write), + + %% Append to history log + History = #history{history_id = {DriverId, HistoryId}, + account_id = AccountId, + teller_id = TellerId, + branch_id = BranchId, + amount = Delta + }, + ok = mnesia:write(history, History, sticky_write), + + %% Return account balance + NewA#account.balance. + +frag_add_delta(DriverId, BranchId, TellerId, AccountBranchId, AccountId, HistoryId, Delta) -> + %% Access fragmented table + %% Grab write lock already when the record is read + + %% Add delta to branch balance + [B] = mnesia:read(branch, BranchId, write), + NewB = B#branch{balance = B#branch.balance + Delta}, + ok = mnesia:write(NewB), + + %% Add delta to teller balance + [T] = mnesia:read({teller, BranchId}, TellerId, write), + NewT = T#teller{balance = T#teller.balance + Delta}, + ok = mnesia:write(NewT), + + %% Add delta to account balance + %%io:format("frag_add_delta(~p): ~p\n", [node(), {account, BranchId, AccountId}]), + [A] = mnesia:read({account, AccountBranchId}, AccountId, write), + NewA = A#account{balance = A#account.balance + Delta}, + ok = mnesia:write(NewA), + + %% Append to history log + History = #history{history_id = {DriverId, HistoryId}, + account_id = AccountId, + teller_id = TellerId, + branch_id = BranchId, + amount = Delta + }, + ok = mnesia:write(History), + + %% Return account balance + NewA#account.balance. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Verify table consistency + +verify_tabs() -> + Nodes = mnesia:system_info(running_db_nodes), + case lists:member(node(), Nodes) of + true -> + Tabs = [branch, teller, account, history], + io:format("TPC-B: Verifying tables: ~w~n", [Tabs]), + rpc:multicall(Nodes, mnesia, wait_for_tables, [Tabs, infinity]), + + Fun = fun() -> + mnesia:write_lock_table(branch), + mnesia:write_lock_table(teller), + mnesia:write_lock_table(account), + mnesia:write_lock_table(history), + {Res, BadNodes} = + rpc:multicall(Nodes, ?MODULE, count_balance, []), + check_balance(Res, BadNodes) + end, + case mnesia:transaction(Fun) of + {atomic, Res} -> Res; + {aborted, Reason} -> {error, Reason} + end; + false -> + {error, "Must be initiated from a running db_node"} + end. + +%% Returns a list of {Table, Node, Balance} tuples +%% Assumes that no updates are performed + +-record(summary, {table, node, balance, size}). + +count_balance() -> + [count_balance(branch, #branch.balance), + count_balance(teller, #teller.balance), + count_balance(account, #account.balance)]. + +count_balance(Tab, BalPos) -> + Frags = table_info(Tab, frag_names), + count_balance(Tab, Frags, 0, 0, BalPos). + +count_balance(Tab, [Frag | Frags], Bal, Size, BalPos) -> + First = mnesia:dirty_first(Frag), + {Bal2, Size2} = count_frag_balance(Frag, First, Bal, Size, BalPos), + count_balance(Tab, Frags, Bal2, Size2, BalPos); +count_balance(Tab, [], Bal, Size, _BalPos) -> + #summary{table = Tab, node = node(), balance = Bal, size = Size}. + +count_frag_balance(_Frag, '$end_of_table', Bal, Size, _BalPos) -> + {Bal, Size}; +count_frag_balance(Frag, Key, Bal, Size, BalPos) -> + [Record] = mnesia:dirty_read({Frag, Key}), + Bal2 = Bal + element(BalPos, Record), + Next = mnesia:dirty_next(Frag, Key), + count_frag_balance(Frag, Next, Bal2, Size + 1, BalPos). + +check_balance([], []) -> + mnesia:abort({"No balance"}); +check_balance(Summaries, []) -> + [One | Rest] = lists:flatten(Summaries), + Balance = One#summary.balance, + %% Size = One#summary.size, + case [S || S <- Rest, S#summary.balance =/= Balance] of + [] -> + ok; + BadSummaries -> + mnesia:abort({"Bad balance", One, BadSummaries}) + end; +check_balance(_, BadNodes) -> + mnesia:abort({"Bad nodes", BadNodes}). diff --git a/lib/mnesia/include/Makefile b/lib/mnesia/include/Makefile new file mode 100644 index 0000000000..f9b7d72abe --- /dev/null +++ b/lib/mnesia/include/Makefile @@ -0,0 +1,61 @@ +# +# %CopyrightBegin% +# +# Copyright Ericsson AB 1998-2009. All Rights Reserved. +# +# The contents of this file are subject to the Erlang Public License, +# Version 1.1, (the "License"); you may not use this file except in +# compliance with the License. You should have received a copy of the +# Erlang Public License along with this software. If not, it can be +# retrieved online at http://www.erlang.org/. +# +# Software distributed under the License is distributed on an "AS IS" +# basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +# the License for the specific language governing rights and limitations +# under the License. +# +# %CopyrightEnd% +# + +# +include $(ERL_TOP)/make/target.mk +include $(ERL_TOP)/make/$(TARGET)/otp.mk + +# ---------------------------------------------------- +# Application version +# ---------------------------------------------------- +include ../vsn.mk +VSN=$(MNESIA_VSN) + +# ---------------------------------------------------- +# Release Macros +# ---------------------------------------------------- +RELSYSDIR = $(RELEASE_PATH)/lib/mnesia-$(VSN) + +# ---------------------------------------------------- +# Macros +# ---------------------------------------------------- + +INCLUDE_FILES = + +# ---------------------------------------------------- +# Make Rules +# ---------------------------------------------------- +debug opt: + +clean: + +docs: + + +# ---------------------------------------------------- +# Release Targets +# ---------------------------------------------------- +include $(ERL_TOP)/make/otp_release_targets.mk + +release_spec: + $(INSTALL_DIR) $(RELSYSDIR)/include +# $(INSTALL_DATA) $(INCLUDE_FILES) $(RELSYSDIR)/include + +release_docs_spec: + diff --git a/lib/mnesia/include/mnemosyne.hrl b/lib/mnesia/include/mnemosyne.hrl new file mode 100644 index 0000000000..eb6ec53ae1 --- /dev/null +++ b/lib/mnesia/include/mnemosyne.hrl @@ -0,0 +1,18 @@ +%% ``The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved via the world wide web at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% The Initial Developer of the Original Code is Ericsson Utvecklings AB. +%% Portions created by Ericsson are Copyright 1999, Ericsson Utvecklings +%% AB. All Rights Reserved.'' +%% +%% $Id$ +%% +-compile({parse_transform,mnemosyne}). diff --git a/lib/mnesia/info b/lib/mnesia/info new file mode 100644 index 0000000000..bfd0816a62 --- /dev/null +++ b/lib/mnesia/info @@ -0,0 +1,2 @@ +group: dat Database Applications +short: A heavy duty real-time distributed database diff --git a/lib/mnesia/priv/.gitignore b/lib/mnesia/priv/.gitignore new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/lib/mnesia/priv/.gitignore diff --git a/lib/mnesia/src/Makefile b/lib/mnesia/src/Makefile new file mode 100644 index 0000000000..e032f563fa --- /dev/null +++ b/lib/mnesia/src/Makefile @@ -0,0 +1,139 @@ +# +# %CopyrightBegin% +# +# Copyright Ericsson AB 1996-2009. All Rights Reserved. +# +# The contents of this file are subject to the Erlang Public License, +# Version 1.1, (the "License"); you may not use this file except in +# compliance with the License. You should have received a copy of the +# Erlang Public License along with this software. If not, it can be +# retrieved online at http://www.erlang.org/. +# +# Software distributed under the License is distributed on an "AS IS" +# basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +# the License for the specific language governing rights and limitations +# under the License. +# +# %CopyrightEnd% +# + +# +include $(ERL_TOP)/make/target.mk + +ifeq ($(TYPE),debug) +ERL_COMPILE_FLAGS += -Ddebug -W +endif + +include $(ERL_TOP)/make/$(TARGET)/otp.mk + +# ---------------------------------------------------- +# Application version +# ---------------------------------------------------- +include ../vsn.mk +VSN=$(MNESIA_VSN) + +# ---------------------------------------------------- +# Release directory specification +# ---------------------------------------------------- +RELSYSDIR = $(RELEASE_PATH)/lib/mnesia-$(VSN) + +# ---------------------------------------------------- +# Target Specs +# ---------------------------------------------------- +MODULES= \ + mnesia \ + mnesia_backup \ + mnesia_bup \ + mnesia_checkpoint \ + mnesia_checkpoint_sup \ + mnesia_controller \ + mnesia_dumper\ + mnesia_event \ + mnesia_frag \ + mnesia_frag_hash \ + mnesia_frag_old_hash \ + mnesia_index \ + mnesia_kernel_sup \ + mnesia_late_loader \ + mnesia_lib\ + mnesia_loader \ + mnesia_locker \ + mnesia_log \ + mnesia_monitor \ + mnesia_recover \ + mnesia_registry \ + mnesia_schema\ + mnesia_snmp_hook \ + mnesia_snmp_sup \ + mnesia_subscr \ + mnesia_sup \ + mnesia_sp \ + mnesia_text \ + mnesia_tm + +HRL_FILES= mnesia.hrl + +ERL_FILES= $(MODULES:%=%.erl) + +TARGET_FILES= $(MODULES:%=$(EBIN)/%.$(EMULATOR)) $(APP_TARGET) $(APPUP_TARGET) + +APP_FILE= mnesia.app + +APP_SRC= $(APP_FILE).src +APP_TARGET= $(EBIN)/$(APP_FILE) + +APPUP_FILE= mnesia.appup + +APPUP_SRC= $(APPUP_FILE).src +APPUP_TARGET= $(EBIN)/$(APPUP_FILE) + + + +# ---------------------------------------------------- +# FLAGS +# ---------------------------------------------------- +ERL_COMPILE_FLAGS += \ + +warn_unused_vars \ + +'{parse_transform,sys_pre_attributes}' \ + +'{attribute,insert,vsn,"mnesia_$(MNESIA_VSN)"}' \ + -W + +# ---------------------------------------------------- +# Targets +# ---------------------------------------------------- + +opt: $(TARGET_FILES) + +debug: + @${MAKE} TYPE=debug + +clean: + rm -f $(TARGET_FILES) + rm -f core + +docs: + +# ---------------------------------------------------- +# Special Build Targets +# ---------------------------------------------------- + +$(APP_TARGET): $(APP_SRC) ../vsn.mk + sed -e 's;%VSN%;$(VSN);' $< > $@ + +$(APPUP_TARGET): $(APPUP_SRC) ../vsn.mk + sed -e 's;%VSN%;$(VSN);' $< > $@ + + +# ---------------------------------------------------- +# Release Target +# ---------------------------------------------------- +include $(ERL_TOP)/make/otp_release_targets.mk + +release_spec: opt + $(INSTALL_DIR) $(RELSYSDIR)/src + $(INSTALL_DATA) $(HRL_FILES) $(ERL_FILES) $(RELSYSDIR)/src + $(INSTALL_DIR) $(RELSYSDIR)/ebin + $(INSTALL_DATA) $(TARGET_FILES) $(RELSYSDIR)/ebin + +release_docs_spec: + diff --git a/lib/mnesia/src/mnesia.app.src b/lib/mnesia/src/mnesia.app.src new file mode 100644 index 0000000000..3715488ec2 --- /dev/null +++ b/lib/mnesia/src/mnesia.app.src @@ -0,0 +1,52 @@ +{application, mnesia, + [{description, "MNESIA CXC 138 12"}, + {vsn, "%VSN%"}, + {modules, [ + mnesia, + mnesia_backup, + mnesia_bup, + mnesia_checkpoint, + mnesia_checkpoint_sup, + mnesia_controller, + mnesia_dumper, + mnesia_event, + mnesia_frag, + mnesia_frag_hash, + mnesia_frag_old_hash, + mnesia_index, + mnesia_kernel_sup, + mnesia_late_loader, + mnesia_lib, + mnesia_loader, + mnesia_locker, + mnesia_log, + mnesia_monitor, + mnesia_recover, + mnesia_registry, + mnesia_schema, + mnesia_snmp_hook, + mnesia_snmp_sup, + mnesia_subscr, + mnesia_sup, + mnesia_sp, + mnesia_text, + mnesia_tm + ]}, + {registered, [ + mnesia_dumper_load_regulator, + mnesia_event, + mnesia_fallback, + mnesia_controller, + mnesia_kernel_sup, + mnesia_late_loader, + mnesia_locker, + mnesia_monitor, + mnesia_recover, + mnesia_substr, + mnesia_sup, + mnesia_tm + ]}, + {applications, [kernel, stdlib]}, + {mod, {mnesia_sup, []}}]}. + + diff --git a/lib/mnesia/src/mnesia.appup.src b/lib/mnesia/src/mnesia.appup.src new file mode 100644 index 0000000000..cad63bf8df --- /dev/null +++ b/lib/mnesia/src/mnesia.appup.src @@ -0,0 +1,37 @@ +%% -*- erlang -*- +{"%VSN%", + [ + {"4.4.11", + [ + {update, mnesia_locker, soft, soft_purge, soft_purge, []}, + {update, mnesia_controller, soft, soft_purge, soft_purge, []} + ] + }, + {"4.4.10", + [ + {update, mnesia_locker, soft, soft_purge, soft_purge, []}, + {update, mnesia_controller, soft, soft_purge, soft_purge, []} + ] + }, + {"4.4.9", [{restart_application, mnesia}]}, + {"4.4.8", [{restart_application, mnesia}]}, + {"4.4.7", [{restart_application, mnesia}]} + ], + [ + {"4.4.11", + [ + {update, mnesia_locker, soft, soft_purge, soft_purge, []}, + {update, mnesia_controller, soft, soft_purge, soft_purge, []} + ] + }, + {"4.4.10", + [ + {update, mnesia_locker, soft, soft_purge, soft_purge, []}, + {update, mnesia_controller, soft, soft_purge, soft_purge, []} + ] + }, + {"4.4.9", [{restart_application, mnesia}]}, + {"4.4.8", [{restart_application, mnesia}]}, + {"4.4.7", [{restart_application, mnesia}]} + ] +}. diff --git a/lib/mnesia/src/mnesia.erl b/lib/mnesia/src/mnesia.erl new file mode 100644 index 0000000000..9a630f18eb --- /dev/null +++ b/lib/mnesia/src/mnesia.erl @@ -0,0 +1,2883 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 1996-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +%% +%% This module exports the public interface of the Mnesia DBMS engine + +-module(mnesia). +%-behaviour(mnesia_access). + +-export([ + %% Start, stop and debugging + start/0, start/1, stop/0, % Not for public use + set_debug_level/1, lkill/0, kill/0, % Not for public use + ms/0, + change_config/2, + + %% Activity mgt + abort/1, transaction/1, transaction/2, transaction/3, + sync_transaction/1, sync_transaction/2, sync_transaction/3, + async_dirty/1, async_dirty/2, sync_dirty/1, sync_dirty/2, ets/1, ets/2, + activity/2, activity/3, activity/4, % Not for public use + is_transaction/0, + + %% Access within an activity - Lock acquisition + lock/2, lock/4, + read_lock_table/1, + write_lock_table/1, + + %% Access within an activity - Updates + write/1, s_write/1, write/3, write/5, + delete/1, s_delete/1, delete/3, delete/5, + delete_object/1, s_delete_object/1, delete_object/3, delete_object/5, + + %% Access within an activity - Reads + read/1, read/2, wread/1, read/3, read/5, + match_object/1, match_object/3, match_object/5, + select/1,select/2,select/3,select/4,select/5,select/6, + all_keys/1, all_keys/4, + index_match_object/2, index_match_object/4, index_match_object/6, + index_read/3, index_read/6, + first/1, next/2, last/1, prev/2, + first/3, next/4, last/3, prev/4, + + %% Iterators within an activity + foldl/3, foldl/4, foldr/3, foldr/4, + + %% Dirty access regardless of activities - Updates + dirty_write/1, dirty_write/2, + dirty_delete/1, dirty_delete/2, + dirty_delete_object/1, dirty_delete_object/2, + dirty_update_counter/2, dirty_update_counter/3, + + %% Dirty access regardless of activities - Read + dirty_read/1, dirty_read/2, + dirty_select/2, + dirty_match_object/1, dirty_match_object/2, dirty_all_keys/1, + dirty_index_match_object/2, dirty_index_match_object/3, + dirty_index_read/3, dirty_slot/2, + dirty_first/1, dirty_next/2, dirty_last/1, dirty_prev/2, + + %% Info + table_info/2, table_info/4, schema/0, schema/1, + error_description/1, info/0, system_info/1, + system_info/0, % Not for public use + + %% Database mgt + create_schema/1, delete_schema/1, + backup/1, backup/2, traverse_backup/4, traverse_backup/6, + install_fallback/1, install_fallback/2, + uninstall_fallback/0, uninstall_fallback/1, + activate_checkpoint/1, deactivate_checkpoint/1, + backup_checkpoint/2, backup_checkpoint/3, restore/2, + + %% Table mgt + create_table/1, create_table/2, delete_table/1, + add_table_copy/3, del_table_copy/2, move_table_copy/3, + add_table_index/2, del_table_index/2, + transform_table/3, transform_table/4, + change_table_copy_type/3, + read_table_property/2, write_table_property/2, delete_table_property/2, + change_table_frag/2, + clear_table/1, clear_table/4, + + %% Table load + dump_tables/1, wait_for_tables/2, force_load_table/1, + change_table_access_mode/2, change_table_load_order/2, + set_master_nodes/1, set_master_nodes/2, + + %% Misc admin + dump_log/0, subscribe/1, unsubscribe/1, report_event/1, + + %% Snmp + snmp_open_table/2, snmp_close_table/1, + snmp_get_row/2, snmp_get_next_index/2, snmp_get_mnesia_key/2, + + %% Textfile access + load_textfile/1, dump_to_textfile/1, + + %% QLC functions + table/1, table/2, + + %% Mnemosyne exclusive + get_activity_id/0, put_activity_id/1, % Not for public use + + %% Mnesia internal functions + dirty_rpc/4, % Not for public use + has_var/1, fun_select/7, fun_select/10, select_cont/3, dirty_sel_init/5, + foldl/6, foldr/6, + + %% Module internal callback functions + raw_table_info/2, % Not for public use + remote_dirty_match_object/2, % Not for public use + remote_dirty_select/2 % Not for public use + ]). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +-include("mnesia.hrl"). +-import(mnesia_lib, [verbose/2]). + +-define(DEFAULT_ACCESS, ?MODULE). + +%% Select +-define(PATTERN_TO_OBJECT_MATCH_SPEC(Pat), [{Pat,[],['$_']}]). +-define(PATTERN_TO_BINDINGS_MATCH_SPEC(Pat), [{Pat,[],['$$']}]). + +%% Local function in order to avoid external function call +val(Var) -> + case ?catch_val(Var) of + {'EXIT', Reason} -> mnesia_lib:other_val(Var, Reason); + Value -> Value + end. + +is_dollar_digits(Var) -> + case atom_to_list(Var) of + [$$ | Digs] -> + is_digits(Digs); + _ -> + false + end. + +is_digits([Dig | Tail]) -> + if + $0 =< Dig, Dig =< $9 -> + is_digits(Tail); + true -> + false + end; +is_digits([]) -> + true. + +has_var(X) when is_atom(X) -> + if + X == '_' -> + true; + is_atom(X) -> + is_dollar_digits(X); + true -> + false + end; +has_var(X) when is_tuple(X) -> + e_has_var(X, tuple_size(X)); +has_var([H|T]) -> + case has_var(H) of + false -> has_var(T); + Other -> Other + end; +has_var(_) -> false. + +e_has_var(_, 0) -> false; +e_has_var(X, Pos) -> + case has_var(element(Pos, X))of + false -> e_has_var(X, Pos-1); + Other -> Other + end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Start and stop + +start() -> + {Time , Res} = timer:tc(application, start, [?APPLICATION, temporary]), + + Secs = Time div 1000000, + case Res of + ok -> + verbose("Mnesia started, ~p seconds~n",[ Secs]), + ok; + {error, {already_started, mnesia}} -> + verbose("Mnesia already started, ~p seconds~n",[ Secs]), + ok; + {error, R} -> + verbose("Mnesia failed to start, ~p seconds: ~p~n",[ Secs, R]), + {error, R} + end. + +start(ExtraEnv) when is_list(ExtraEnv) -> + case mnesia_lib:ensure_loaded(?APPLICATION) of + ok -> + patched_start(ExtraEnv); + Error -> + Error + end; +start(ExtraEnv) -> + {error, {badarg, ExtraEnv}}. + +patched_start([{Env, Val} | Tail]) when is_atom(Env) -> + case mnesia_monitor:patch_env(Env, Val) of + {error, Reason} -> + {error, Reason}; + _NewVal -> + patched_start(Tail) + end; +patched_start([Head | _]) -> + {error, {bad_type, Head}}; +patched_start([]) -> + start(). + +stop() -> + case application:stop(?APPLICATION) of + ok -> stopped; + {error, {not_started, ?APPLICATION}} -> stopped; + Other -> Other + end. + +change_config(extra_db_nodes, Ns) when is_list(Ns) -> + mnesia_controller:connect_nodes(Ns); +change_config(dc_dump_limit, N) when is_number(N), N > 0 -> + case mnesia_lib:is_running() of + yes -> + mnesia_lib:set(dc_dump_limit, N), + {ok, N}; + _ -> + {error, {not_started, ?APPLICATION}} + end; +change_config(BadKey, _BadVal) -> + {error, {badarg, BadKey}}. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Debugging + +set_debug_level(Level) -> + mnesia_subscr:set_debug_level(Level). + +lkill() -> + mnesia_sup:kill(). + +kill() -> + rpc:multicall(mnesia_sup, kill, []). + +ms() -> + [ + mnesia, + mnesia_backup, + mnesia_bup, + mnesia_checkpoint, + mnesia_checkpoint_sup, + mnesia_controller, + mnesia_dumper, + mnesia_loader, + mnesia_frag, + mnesia_frag_hash, + mnesia_frag_old_hash, + mnesia_index, + mnesia_kernel_sup, + mnesia_late_loader, + mnesia_lib, + mnesia_log, + mnesia_registry, + mnesia_schema, + mnesia_snmp_hook, + mnesia_snmp_sup, + mnesia_subscr, + mnesia_sup, + mnesia_text, + mnesia_tm, + mnesia_recover, + mnesia_locker, + + %% Keep these last in the list, so + %% mnesia_sup kills these last + mnesia_monitor, + mnesia_event + ]. + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Activity mgt + +-spec(abort/1 :: (_) -> no_return()). + +abort(Reason) -> + exit({aborted, Reason}). + +is_transaction() -> + case get(mnesia_activity_state) of + {_, Tid, _Ts} when element(1,Tid) == tid -> + true; + _ -> + false + end. + +transaction(Fun) -> + transaction(get(mnesia_activity_state), Fun, [], infinity, ?DEFAULT_ACCESS, async). +transaction(Fun, Retries) when is_integer(Retries), Retries >= 0 -> + transaction(get(mnesia_activity_state), Fun, [], Retries, ?DEFAULT_ACCESS, async); +transaction(Fun, Retries) when Retries == infinity -> + transaction(get(mnesia_activity_state), Fun, [], Retries, ?DEFAULT_ACCESS, async); +transaction(Fun, Args) -> + transaction(get(mnesia_activity_state), Fun, Args, infinity, ?DEFAULT_ACCESS, async). +transaction(Fun, Args, Retries) -> + transaction(get(mnesia_activity_state), Fun, Args, Retries, ?DEFAULT_ACCESS, async). + +sync_transaction(Fun) -> + transaction(get(mnesia_activity_state), Fun, [], infinity, ?DEFAULT_ACCESS, sync). +sync_transaction(Fun, Retries) when is_integer(Retries), Retries >= 0 -> + transaction(get(mnesia_activity_state), Fun, [], Retries, ?DEFAULT_ACCESS, sync); +sync_transaction(Fun, Retries) when Retries == infinity -> + transaction(get(mnesia_activity_state), Fun, [], Retries, ?DEFAULT_ACCESS, sync); +sync_transaction(Fun, Args) -> + transaction(get(mnesia_activity_state), Fun, Args, infinity, ?DEFAULT_ACCESS, sync). +sync_transaction(Fun, Args, Retries) -> + transaction(get(mnesia_activity_state), Fun, Args, Retries, ?DEFAULT_ACCESS, sync). + + +transaction(State, Fun, Args, Retries, Mod, Kind) + when is_function(Fun), is_list(Args), Retries == infinity, is_atom(Mod) -> + mnesia_tm:transaction(State, Fun, Args, Retries, Mod, Kind); +transaction(State, Fun, Args, Retries, Mod, Kind) + when is_function(Fun), is_list(Args), is_integer(Retries), Retries >= 0, is_atom(Mod) -> + mnesia_tm:transaction(State, Fun, Args, Retries, Mod, Kind); +transaction(_State, Fun, Args, Retries, Mod, _Kind) -> + {aborted, {badarg, Fun, Args, Retries, Mod}}. + +non_transaction(State, Fun, Args, ActivityKind, Mod) + when is_function(Fun), is_list(Args), is_atom(Mod) -> + mnesia_tm:non_transaction(State, Fun, Args, ActivityKind, Mod); +non_transaction(_State, Fun, Args, _ActivityKind, _Mod) -> + {aborted, {badarg, Fun, Args}}. + +async_dirty(Fun) -> + async_dirty(Fun, []). +async_dirty(Fun, Args) -> + non_transaction(get(mnesia_activity_state), Fun, Args, async_dirty, ?DEFAULT_ACCESS). + +sync_dirty(Fun) -> + sync_dirty(Fun, []). +sync_dirty(Fun, Args) -> + non_transaction(get(mnesia_activity_state), Fun, Args, sync_dirty, ?DEFAULT_ACCESS). + +ets(Fun) -> + ets(Fun, []). +ets(Fun, Args) -> + non_transaction(get(mnesia_activity_state), Fun, Args, ets, ?DEFAULT_ACCESS). + +activity(Kind, Fun) -> + activity(Kind, Fun, []). +activity(Kind, Fun, Args) when is_list(Args) -> + activity(Kind, Fun, Args, mnesia_monitor:get_env(access_module)); +activity(Kind, Fun, Mod) -> + activity(Kind, Fun, [], Mod). + +activity(Kind, Fun, Args, Mod) -> + State = get(mnesia_activity_state), + case Kind of + ets -> non_transaction(State, Fun, Args, Kind, Mod); + async_dirty -> non_transaction(State, Fun, Args, Kind, Mod); + sync_dirty -> non_transaction(State, Fun, Args, Kind, Mod); + transaction -> wrap_trans(State, Fun, Args, infinity, Mod, async); + {transaction, Retries} -> wrap_trans(State, Fun, Args, Retries, Mod, async); + sync_transaction -> wrap_trans(State, Fun, Args, infinity, Mod, sync); + {sync_transaction, Retries} -> wrap_trans(State, Fun, Args, Retries, Mod, sync); + _ -> {aborted, {bad_type, Kind}} + end. + +wrap_trans(State, Fun, Args, Retries, Mod, Kind) -> + case transaction(State, Fun, Args, Retries, Mod, Kind) of + {atomic, GoodRes} -> GoodRes; + BadRes -> exit(BadRes) + end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Access within an activity - lock acquisition + +%% Grab a lock on an item in the global lock table +%% Item may be any term. Lock may be write or read. +%% write lock is set on all the given nodes +%% read lock is only set on the first node +%% Nodes may either be a list of nodes or one node as an atom +%% Mnesia on all Nodes must be connected to each other, but +%% it is not neccessary that they are up and running. + +lock(LockItem, LockKind) -> + case get(mnesia_activity_state) of + {?DEFAULT_ACCESS, Tid, Ts} -> + lock(Tid, Ts, LockItem, LockKind); + {Mod, Tid, Ts} -> + Mod:lock(Tid, Ts, LockItem, LockKind); + _ -> + abort(no_transaction) + end. + +lock(Tid, Ts, LockItem, LockKind) -> + case element(1, Tid) of + tid -> + case LockItem of + {record, Tab, Key} -> + lock_record(Tid, Ts, Tab, Key, LockKind); + {table, Tab} -> + lock_table(Tid, Ts, Tab, LockKind); + {global, GlobalKey, Nodes} -> + global_lock(Tid, Ts, GlobalKey, LockKind, Nodes); + _ -> + abort({bad_type, LockItem}) + end; + _Protocol -> + [] + end. + +%% Grab a read lock on a whole table +read_lock_table(Tab) -> + lock({table, Tab}, read), + ok. + +%% Grab a write lock on a whole table +write_lock_table(Tab) -> + lock({table, Tab}, write), + ok. + +lock_record(Tid, Ts, Tab, Key, LockKind) when is_atom(Tab) -> + Store = Ts#tidstore.store, + Oid = {Tab, Key}, + case LockKind of + read -> + mnesia_locker:rlock(Tid, Store, Oid); + write -> + mnesia_locker:wlock(Tid, Store, Oid); + sticky_write -> + mnesia_locker:sticky_wlock(Tid, Store, Oid); + none -> + []; + _ -> + abort({bad_type, Tab, LockKind}) + end; +lock_record(_Tid, _Ts, Tab, _Key, _LockKind) -> + abort({bad_type, Tab}). + +lock_table(Tid, Ts, Tab, LockKind) when is_atom(Tab) -> + Store = Ts#tidstore.store, + case LockKind of + read -> + mnesia_locker:rlock_table(Tid, Store, Tab); + write -> + mnesia_locker:wlock_table(Tid, Store, Tab); + sticky_write -> + mnesia_locker:sticky_wlock_table(Tid, Store, Tab); + none -> + []; + _ -> + abort({bad_type, Tab, LockKind}) + end; +lock_table(_Tid, _Ts, Tab, _LockKind) -> + abort({bad_type, Tab}). + +global_lock(Tid, Ts, Item, Kind, Nodes) when is_list(Nodes) -> + case element(1, Tid) of + tid -> + Store = Ts#tidstore.store, + GoodNs = good_global_nodes(Nodes), + if + Kind /= read, Kind /= write -> + abort({bad_type, Kind}); + true -> + mnesia_locker:global_lock(Tid, Store, Item, Kind, GoodNs) + end; + _Protocol -> + [] + end; +global_lock(_Tid, _Ts, _Item, _Kind, Nodes) -> + abort({bad_type, Nodes}). + +good_global_nodes(Nodes) -> + Recover = [node() | val(recover_nodes)], + mnesia_lib:intersect(Nodes, Recover). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Access within an activity - updates + +write(Val) when is_tuple(Val), tuple_size(Val) > 2 -> + Tab = element(1, Val), + write(Tab, Val, write); +write(Val) -> + abort({bad_type, Val}). + +s_write(Val) when is_tuple(Val), tuple_size(Val) > 2 -> + Tab = element(1, Val), + write(Tab, Val, sticky_write). + +write(Tab, Val, LockKind) -> + case get(mnesia_activity_state) of + {?DEFAULT_ACCESS, Tid, Ts} -> + write(Tid, Ts, Tab, Val, LockKind); + {Mod, Tid, Ts} -> + Mod:write(Tid, Ts, Tab, Val, LockKind); + _ -> + abort(no_transaction) + end. + +write(Tid, Ts, Tab, Val, LockKind) + when is_atom(Tab), Tab /= schema, is_tuple(Val), tuple_size(Val) > 2 -> + case element(1, Tid) of + ets -> + ?ets_insert(Tab, Val), + ok; + tid -> + Store = Ts#tidstore.store, + Oid = {Tab, element(2, Val)}, + case LockKind of + write -> + mnesia_locker:wlock(Tid, Store, Oid); + sticky_write -> + mnesia_locker:sticky_wlock(Tid, Store, Oid); + _ -> + abort({bad_type, Tab, LockKind}) + end, + write_to_store(Tab, Store, Oid, Val); + Protocol -> + do_dirty_write(Protocol, Tab, Val) + end; +write(_Tid, _Ts, Tab, Val, LockKind) -> + abort({bad_type, Tab, Val, LockKind}). + +write_to_store(Tab, Store, Oid, Val) -> + case ?catch_val({Tab, record_validation}) of + {RecName, Arity, Type} + when tuple_size(Val) == Arity, RecName == element(1, Val) -> + case Type of + bag -> + ?ets_insert(Store, {Oid, Val, write}); + _ -> + ?ets_delete(Store, Oid), + ?ets_insert(Store, {Oid, Val, write}) + end, + ok; + {'EXIT', _} -> + abort({no_exists, Tab}); + _ -> + abort({bad_type, Val}) + end. + +delete({Tab, Key}) -> + delete(Tab, Key, write); +delete(Oid) -> + abort({bad_type, Oid}). + +s_delete({Tab, Key}) -> + delete(Tab, Key, sticky_write); +s_delete(Oid) -> + abort({bad_type, Oid}). + +delete(Tab, Key, LockKind) -> + case get(mnesia_activity_state) of + {?DEFAULT_ACCESS, Tid, Ts} -> + delete(Tid, Ts, Tab, Key, LockKind); + {Mod, Tid, Ts} -> + Mod:delete(Tid, Ts, Tab, Key, LockKind); + _ -> + abort(no_transaction) + end. + +delete(Tid, Ts, Tab, Key, LockKind) + when is_atom(Tab), Tab /= schema -> + case element(1, Tid) of + ets -> + ?ets_delete(Tab, Key), + ok; + tid -> + Store = Ts#tidstore.store, + Oid = {Tab, Key}, + case LockKind of + write -> + mnesia_locker:wlock(Tid, Store, Oid); + sticky_write -> + mnesia_locker:sticky_wlock(Tid, Store, Oid); + _ -> + abort({bad_type, Tab, LockKind}) + end, + ?ets_delete(Store, Oid), + ?ets_insert(Store, {Oid, Oid, delete}), + ok; + Protocol -> + do_dirty_delete(Protocol, Tab, Key) + end; +delete(_Tid, _Ts, Tab, _Key, _LockKind) -> + abort({bad_type, Tab}). + +delete_object(Val) when is_tuple(Val), tuple_size(Val) > 2 -> + Tab = element(1, Val), + delete_object(Tab, Val, write); +delete_object(Val) -> + abort({bad_type, Val}). + +s_delete_object(Val) when is_tuple(Val), tuple_size(Val) > 2 -> + Tab = element(1, Val), + delete_object(Tab, Val, sticky_write); +s_delete_object(Val) -> + abort({bad_type, Val}). + +delete_object(Tab, Val, LockKind) -> + case get(mnesia_activity_state) of + {?DEFAULT_ACCESS, Tid, Ts} -> + delete_object(Tid, Ts, Tab, Val, LockKind); + {Mod, Tid, Ts} -> + Mod:delete_object(Tid, Ts, Tab, Val, LockKind); + _ -> + abort(no_transaction) + end. + +delete_object(Tid, Ts, Tab, Val, LockKind) + when is_atom(Tab), Tab /= schema, is_tuple(Val), tuple_size(Val) > 2 -> + case has_var(Val) of + false -> + do_delete_object(Tid, Ts, Tab, Val, LockKind); + true -> + abort({bad_type, Tab, Val}) + end; +delete_object(_Tid, _Ts, Tab, _Key, _LockKind) -> + abort({bad_type, Tab}). + +do_delete_object(Tid, Ts, Tab, Val, LockKind) -> + case element(1, Tid) of + ets -> + ?ets_match_delete(Tab, Val), + ok; + tid -> + Store = Ts#tidstore.store, + Oid = {Tab, element(2, Val)}, + case LockKind of + write -> + mnesia_locker:wlock(Tid, Store, Oid); + sticky_write -> + mnesia_locker:sticky_wlock(Tid, Store, Oid); + _ -> + abort({bad_type, Tab, LockKind}) + end, + case val({Tab, setorbag}) of + bag -> + ?ets_match_delete(Store, {Oid, Val, '_'}), + ?ets_insert(Store, {Oid, Val, delete_object}); + _ -> + case ?ets_match_object(Store, {Oid, '_', write}) of + [] -> + ?ets_match_delete(Store, {Oid, Val, '_'}), + ?ets_insert(Store, {Oid, Val, delete_object}); + _ -> + ?ets_delete(Store, Oid), + ?ets_insert(Store, {Oid, Oid, delete}) + end + end, + ok; + Protocol -> + do_dirty_delete_object(Protocol, Tab, Val) + end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Access within an activity - read + +read(Tab, Key) -> + read(Tab, Key, read). + +read({Tab, Key}) -> + read(Tab, Key, read); +read(Oid) -> + abort({bad_type, Oid}). + +wread({Tab, Key}) -> + read(Tab, Key, write); +wread(Oid) -> + abort({bad_type, Oid}). + +read(Tab, Key, LockKind) -> + case get(mnesia_activity_state) of + {?DEFAULT_ACCESS, Tid, Ts} -> + read(Tid, Ts, Tab, Key, LockKind); + {Mod, Tid, Ts} -> + Mod:read(Tid, Ts, Tab, Key, LockKind); + _ -> + abort(no_transaction) + end. + +read(Tid, Ts, Tab, Key, LockKind) + when is_atom(Tab), Tab /= schema -> + case element(1, Tid) of + ets -> + ?ets_lookup(Tab, Key); + tid -> + Store = Ts#tidstore.store, + Oid = {Tab, Key}, + Objs = + case LockKind of + read -> + mnesia_locker:rlock(Tid, Store, Oid); + write -> + mnesia_locker:rwlock(Tid, Store, Oid); + sticky_write -> + mnesia_locker:sticky_rwlock(Tid, Store, Oid); + _ -> + abort({bad_type, Tab, LockKind}) + end, + add_written(?ets_lookup(Store, Oid), Tab, Objs); + _Protocol -> + dirty_read(Tab, Key) + end; +read(_Tid, _Ts, Tab, _Key, _LockKind) -> + abort({bad_type, Tab}). + +first(Tab) -> + case get(mnesia_activity_state) of + {?DEFAULT_ACCESS, Tid, Ts} -> + first(Tid, Ts, Tab); + {Mod, Tid, Ts} -> + Mod:first(Tid, Ts, Tab); + _ -> + abort(no_transaction) + end. + +first(Tid, Ts, Tab) + when is_atom(Tab), Tab /= schema -> + case element(1, Tid) of + ets -> + ?ets_first(Tab); + tid -> + lock_table(Tid, Ts, Tab, read), + do_fixtable(Tab,Ts), + Key = dirty_first(Tab), + stored_keys(Tab,Key,'$end_of_table',Ts,next, + val({Tab, setorbag})); + _Protocol -> + dirty_first(Tab) + end; +first(_Tid, _Ts,Tab) -> + abort({bad_type, Tab}). + +last(Tab) -> + case get(mnesia_activity_state) of + {?DEFAULT_ACCESS, Tid, Ts} -> + last(Tid, Ts, Tab); + {Mod, Tid, Ts} -> + Mod:last(Tid, Ts, Tab); + _ -> + abort(no_transaction) + end. + +last(Tid, Ts, Tab) + when is_atom(Tab), Tab /= schema -> + case element(1, Tid) of + ets -> + ?ets_last(Tab); + tid -> + lock_table(Tid, Ts, Tab, read), + do_fixtable(Tab,Ts), + Key = dirty_last(Tab), + stored_keys(Tab,Key,'$end_of_table',Ts,prev, + val({Tab, setorbag})); + _Protocol -> + dirty_last(Tab) + end; +last(_Tid, _Ts,Tab) -> + abort({bad_type, Tab}). + +next(Tab,Key) -> + case get(mnesia_activity_state) of + {?DEFAULT_ACCESS,Tid,Ts} -> + next(Tid,Ts,Tab,Key); + {Mod,Tid,Ts} -> + Mod:next(Tid,Ts,Tab,Key); + _ -> + abort(no_transaction) + end. +next(Tid,Ts,Tab,Key) + when is_atom(Tab), Tab /= schema -> + case element(1, Tid) of + ets -> + ?ets_next(Tab,Key); + tid -> + lock_table(Tid, Ts, Tab, read), + do_fixtable(Tab,Ts), + New = (catch dirty_next(Tab,Key)), + stored_keys(Tab,New,Key,Ts,next, + val({Tab, setorbag})); + _Protocol -> + dirty_next(Tab,Key) + end; +next(_Tid, _Ts,Tab,_) -> + abort({bad_type, Tab}). + +prev(Tab,Key) -> + case get(mnesia_activity_state) of + {?DEFAULT_ACCESS,Tid,Ts} -> + prev(Tid,Ts,Tab,Key); + {Mod,Tid,Ts} -> + Mod:prev(Tid,Ts,Tab,Key); + _ -> + abort(no_transaction) + end. +prev(Tid,Ts,Tab,Key) + when is_atom(Tab), Tab /= schema -> + case element(1, Tid) of + ets -> + ?ets_prev(Tab,Key); + tid -> + lock_table(Tid, Ts, Tab, read), + do_fixtable(Tab,Ts), + New = (catch dirty_prev(Tab,Key)), + stored_keys(Tab,New,Key,Ts,prev, + val({Tab, setorbag})); + _Protocol -> + dirty_prev(Tab,Key) + end; +prev(_Tid, _Ts,Tab,_) -> + abort({bad_type, Tab}). + +%% Compensate for transaction written and/or deleted records +stored_keys(Tab,'$end_of_table',Prev,Ts,Op,Type) -> + case ts_keys(Ts#tidstore.store,Tab,Op,Type,[]) of + [] -> '$end_of_table'; + Keys when Type == ordered_set-> + get_ordered_tskey(Prev,Keys,Op); + Keys -> + get_next_tskey(Prev,Keys,Tab) + end; +stored_keys(Tab,{'EXIT',{aborted,R={badarg,[Tab,Key]}}}, + Key,#tidstore{store=Store},Op,Type) -> + %% Had to match on error, ouch.. + case ?ets_match(Store, {{Tab, Key}, '_', '$1'}) of + [] -> abort(R); + Ops -> + case lists:last(Ops) of + [delete] -> abort(R); + _ -> + case ts_keys(Store,Tab,Op,Type,[]) of + [] -> '$end_of_table'; + Keys -> get_next_tskey(Key,Keys,Tab) + end + end + end; +stored_keys(_,{'EXIT',{aborted,R}},_,_,_,_) -> + abort(R); +stored_keys(Tab,Key,Prev,#tidstore{store=Store},Op,ordered_set) -> + case ?ets_match(Store, {{Tab, Key}, '_', '$1'}) of + [] -> + Keys = ts_keys(Store,Tab,Op,ordered_set,[Key]), + get_ordered_tskey(Prev,Keys,Op); + Ops -> + case lists:last(Ops) of + [delete] -> + mnesia:Op(Tab,Key); + _ -> + Keys = ts_keys(Store,Tab,Op,ordered_set,[Key]), + get_ordered_tskey(Prev,Keys,Op) + end + end; +stored_keys(Tab,Key,_,#tidstore{store=Store},Op,_) -> + case ?ets_match(Store, {{Tab, Key}, '_', '$1'}) of + [] -> Key; + Ops -> + case lists:last(Ops) of + [delete] -> mnesia:Op(Tab,Key); + _ -> Key + end + end. + +get_ordered_tskey('$end_of_table', [First|_],_) -> First; +get_ordered_tskey(Prev, [First|_], next) when Prev < First -> First; +get_ordered_tskey(Prev, [First|_], prev) when Prev > First -> First; +get_ordered_tskey(Prev, [_|R],Op) -> get_ordered_tskey(Prev,R,Op); +get_ordered_tskey(_, [],_) -> '$end_of_table'. + +get_next_tskey(Key,Keys,Tab) -> + Next = + if Key == '$end_of_table' -> hd(Keys); + true -> + case lists:dropwhile(fun(A) -> A /= Key end, Keys) of + [] -> hd(Keys); %% First stored key + [Key] -> '$end_of_table'; + [Key,Next2|_] -> Next2 + end + end, + case Next of + '$end_of_table' -> '$end_of_table'; + _ -> %% Really slow anybody got another solution?? + case dirty_read(Tab, Next) of + [] -> Next; + _ -> + %% Updated value we already returned this key + get_next_tskey(Next,Keys,Tab) + end + end. + +ts_keys(Store, Tab, Op, Type, Def) -> + All = ?ets_match(Store, {{Tab,'$1'},'_','$2'}), + Keys = ts_keys_1(All, Def), + if + Type == ordered_set, Op == prev -> + lists:reverse(lists:sort(Keys)); + Type == ordered_set -> + lists:sort(Keys); + Op == next -> + lists:reverse(Keys); + true -> + Keys + end. + +ts_keys_1([[Key, write]|R], []) -> + ts_keys_1(R, [Key]); +ts_keys_1([[Key, write]|R], Acc=[Key|_]) -> + ts_keys_1(R, Acc); +ts_keys_1([[Key, write]|R], Acc) -> + ts_keys_1(R, [Key|Acc]); +ts_keys_1([[Key, delete]|R], [Key|Acc]) -> + ts_keys_1(R, Acc); +ts_keys_1([_|R], Acc) -> + ts_keys_1(R, Acc); +ts_keys_1([], Acc) -> + Acc. + + +%%%%%%%%%%%%%%%%%%%%% +%% Iterators + +foldl(Fun, Acc, Tab) -> + foldl(Fun, Acc, Tab, read). + +foldl(Fun, Acc, Tab, LockKind) when is_function(Fun) -> + case get(mnesia_activity_state) of + {?DEFAULT_ACCESS, Tid, Ts} -> + foldl(Tid, Ts, Fun, Acc, Tab, LockKind); + {Mod, Tid, Ts} -> + Mod:foldl(Tid, Ts, Fun, Acc, Tab, LockKind); + _ -> + abort(no_transaction) + end. + +foldl(ActivityId, Opaque, Fun, Acc, Tab, LockKind) -> + {Type, Prev} = init_iteration(ActivityId, Opaque, Tab, LockKind), + Res = (catch do_foldl(ActivityId, Opaque, Tab, dirty_first(Tab), Fun, Acc, Type, Prev)), + close_iteration(Res, Tab). + +do_foldl(A, O, Tab, '$end_of_table', Fun, RAcc, _Type, Stored) -> + lists:foldl(fun(Key, Acc) -> + lists:foldl(Fun, Acc, read(A, O, Tab, Key, read)) + end, RAcc, Stored); +do_foldl(A, O, Tab, Key, Fun, Acc, ordered_set, [H | Stored]) when H == Key -> + NewAcc = lists:foldl(Fun, Acc, read(A, O, Tab, Key, read)), + {_, Tid, Ts} = get(mnesia_activity_state), + do_foldl(Tid, Ts, Tab, dirty_next(Tab, Key), Fun, NewAcc, ordered_set, Stored); +do_foldl(A, O, Tab, Key, Fun, Acc, ordered_set, [H | Stored]) when H < Key -> + NewAcc = lists:foldl(Fun, Acc, read(A, O, Tab, H, read)), + {_, Tid, Ts} = get(mnesia_activity_state), + do_foldl(Tid, Ts, Tab, Key, Fun, NewAcc, ordered_set, Stored); +do_foldl(A, O, Tab, Key, Fun, Acc, ordered_set, [H | Stored]) when H > Key -> + NewAcc = lists:foldl(Fun, Acc, read(A, O, Tab, Key, read)), + {_, Tid, Ts} = get(mnesia_activity_state), + do_foldl(Tid, Ts, Tab, dirty_next(Tab, Key), Fun, NewAcc, ordered_set, [H |Stored]); +do_foldl(A, O, Tab, Key, Fun, Acc, Type, Stored) -> %% Type is set or bag + NewAcc = lists:foldl(Fun, Acc, read(A, O, Tab, Key, read)), + NewStored = ordsets:del_element(Key, Stored), + {_, Tid, Ts} = get(mnesia_activity_state), + do_foldl(Tid, Ts, Tab, dirty_next(Tab, Key), Fun, NewAcc, Type, NewStored). + +foldr(Fun, Acc, Tab) -> + foldr(Fun, Acc, Tab, read). +foldr(Fun, Acc, Tab, LockKind) when is_function(Fun) -> + case get(mnesia_activity_state) of + {?DEFAULT_ACCESS, Tid, Ts} -> + foldr(Tid, Ts, Fun, Acc, Tab, LockKind); + {Mod, Tid, Ts} -> + Mod:foldr(Tid, Ts, Fun, Acc, Tab, LockKind); + _ -> + abort(no_transaction) + end. + +foldr(ActivityId, Opaque, Fun, Acc, Tab, LockKind) -> + {Type, TempPrev} = init_iteration(ActivityId, Opaque, Tab, LockKind), + Prev = + if + Type == ordered_set -> + lists:reverse(TempPrev); + true -> %% Order doesn't matter for set and bag + TempPrev %% Keep the order so we can use ordsets:del_element + end, + Res = (catch do_foldr(ActivityId, Opaque, Tab, dirty_last(Tab), Fun, Acc, Type, Prev)), + close_iteration(Res, Tab). + +do_foldr(A, O, Tab, '$end_of_table', Fun, RAcc, _Type, Stored) -> + lists:foldl(fun(Key, Acc) -> + lists:foldl(Fun, Acc, read(A, O, Tab, Key, read)) + end, RAcc, Stored); +do_foldr(A, O, Tab, Key, Fun, Acc, ordered_set, [H | Stored]) when H == Key -> + NewAcc = lists:foldl(Fun, Acc, read(A, O, Tab, Key, read)), + {_, Tid, Ts} = get(mnesia_activity_state), + do_foldr(Tid, Ts, Tab, dirty_prev(Tab, Key), Fun, NewAcc, ordered_set, Stored); +do_foldr(A, O, Tab, Key, Fun, Acc, ordered_set, [H | Stored]) when H > Key -> + NewAcc = lists:foldl(Fun, Acc, read(A, O, Tab, H, read)), + {_, Tid, Ts} = get(mnesia_activity_state), + do_foldr(Tid, Ts, Tab, Key, Fun, NewAcc, ordered_set, Stored); +do_foldr(A, O, Tab, Key, Fun, Acc, ordered_set, [H | Stored]) when H < Key -> + NewAcc = lists:foldl(Fun, Acc, read(A, O, Tab, Key, read)), + {_, Tid, Ts} = get(mnesia_activity_state), + do_foldr(Tid, Ts, Tab, dirty_prev(Tab, Key), Fun, NewAcc, ordered_set, [H |Stored]); +do_foldr(A, O, Tab, Key, Fun, Acc, Type, Stored) -> %% Type is set or bag + NewAcc = lists:foldl(Fun, Acc, read(A, O, Tab, Key, read)), + NewStored = ordsets:del_element(Key, Stored), + {_, Tid, Ts} = get(mnesia_activity_state), + do_foldr(Tid, Ts, Tab, dirty_prev(Tab, Key), Fun, NewAcc, Type, NewStored). + +init_iteration(ActivityId, Opaque, Tab, LockKind) -> + lock(ActivityId, Opaque, {table, Tab}, LockKind), + Type = val({Tab, setorbag}), + Previous = add_previous(ActivityId, Opaque, Type, Tab), + St = val({Tab, storage_type}), + if + St == unknown -> + ignore; + true -> + mnesia_lib:db_fixtable(St, Tab, true) + end, + {Type, Previous}. + +close_iteration(Res, Tab) -> + case val({Tab, storage_type}) of + unknown -> + ignore; + St -> + mnesia_lib:db_fixtable(St, Tab, false) + end, + case Res of + {'EXIT', {aborted, What}} -> + abort(What); + {'EXIT', What} -> + abort(What); + _ -> + Res + end. + +add_previous(_ActivityId, non_transaction, _Type, _Tab) -> + []; +add_previous(_Tid, Ts, _Type, Tab) -> + Previous = ?ets_match(Ts#tidstore.store, {{Tab, '$1'}, '_', write}), + lists:sort(lists:concat(Previous)). + +%% This routine fixes up the return value from read/1 so that +%% it is correct with respect to what this particular transaction +%% has already written, deleted .... etc + +add_written([], _Tab, Objs) -> + Objs; % standard normal fast case +add_written(Written, Tab, Objs) -> + case val({Tab, setorbag}) of + bag -> + add_written_to_bag(Written, Objs, []); + _ -> + add_written_to_set(Written) + end. + +add_written_to_set(Ws) -> + case lists:last(Ws) of + {_, _, delete} -> []; + {_, Val, write} -> [Val]; + {_, _, delete_object} -> [] + end. + +add_written_to_bag([{_, Val, write} | Tail], Objs, Ack) -> + add_written_to_bag(Tail, lists:delete(Val, Objs), [Val | Ack]); +add_written_to_bag([], Objs, Ack) -> + Objs ++ lists:reverse(Ack); %% Oldest write first as in ets +add_written_to_bag([{_, _ , delete} | Tail], _Objs, _Ack) -> + %% This transaction just deleted all objects + %% with this key + add_written_to_bag(Tail, [], []); +add_written_to_bag([{_, Val, delete_object} | Tail], Objs, Ack) -> + add_written_to_bag(Tail, lists:delete(Val, Objs), lists:delete(Val, Ack)). + +match_object(Pat) when is_tuple(Pat), tuple_size(Pat) > 2 -> + Tab = element(1, Pat), + match_object(Tab, Pat, read); +match_object(Pat) -> + abort({bad_type, Pat}). + +match_object(Tab, Pat, LockKind) -> + case get(mnesia_activity_state) of + {?DEFAULT_ACCESS, Tid, Ts} -> + match_object(Tid, Ts, Tab, Pat, LockKind); + {Mod, Tid, Ts} -> + Mod:match_object(Tid, Ts, Tab, Pat, LockKind); + _ -> + abort(no_transaction) + end. + +match_object(Tid, Ts, Tab, Pat, LockKind) + when is_atom(Tab), Tab /= schema, is_tuple(Pat), tuple_size(Pat) > 2 -> + case element(1, Tid) of + ets -> + mnesia_lib:db_match_object(ram_copies, Tab, Pat); + tid -> + Key = element(2, Pat), + case has_var(Key) of + false -> lock_record(Tid, Ts, Tab, Key, LockKind); + true -> lock_table(Tid, Ts, Tab, LockKind) + end, + Objs = dirty_match_object(Tab, Pat), + add_written_match(Ts#tidstore.store, Pat, Tab, Objs); + _Protocol -> + dirty_match_object(Tab, Pat) + end; +match_object(_Tid, _Ts, Tab, Pat, _LockKind) -> + abort({bad_type, Tab, Pat}). + +add_written_match(S, Pat, Tab, Objs) -> + Ops = find_ops(S, Tab, Pat), + add_match(Ops, Objs, val({Tab, setorbag})). + +find_ops(S, Tab, Pat) -> + GetWritten = [{{{Tab, '_'}, Pat, write}, [], ['$_']}, + {{{Tab, '_'}, '_', delete}, [], ['$_']}, + {{{Tab, '_'}, Pat, delete_object}, [], ['$_']}], + ets:select(S, GetWritten). + +add_match([], Objs, _Type) -> + Objs; +add_match(Written, Objs, ordered_set) -> + %% Must use keysort which is stable + add_ordered_match(lists:keysort(1,Written), Objs, []); +add_match([{Oid, _, delete}|R], Objs, Type) -> + add_match(R, deloid(Oid, Objs), Type); +add_match([{_Oid, Val, delete_object}|R], Objs, Type) -> + add_match(R, lists:delete(Val, Objs), Type); +add_match([{_Oid, Val, write}|R], Objs, bag) -> + add_match(R, [Val | lists:delete(Val, Objs)], bag); +add_match([{Oid, Val, write}|R], Objs, set) -> + add_match(R, [Val | deloid(Oid,Objs)],set). + +%% For ordered_set only !! +add_ordered_match(Written = [{{_, Key}, _, _}|_], [Obj|Objs], Acc) + when Key > element(2, Obj) -> + add_ordered_match(Written, Objs, [Obj|Acc]); +add_ordered_match([{{_, Key}, Val, write}|Rest], Objs =[Obj|_], Acc) + when Key < element(2, Obj) -> + add_ordered_match(Rest, [Val|Objs],Acc); +add_ordered_match([{{_, Key}, _, _DelOP}|Rest], Objs =[Obj|_], Acc) + when Key < element(2, Obj) -> + add_ordered_match(Rest,Objs,Acc); +%% Greater than last object +add_ordered_match([{_, Val, write}|Rest], [], Acc) -> + add_ordered_match(Rest, [Val], Acc); +add_ordered_match([_|Rest], [], Acc) -> + add_ordered_match(Rest, [], Acc); +%% Keys are equal from here +add_ordered_match([{_, Val, write}|Rest], [_Obj|Objs], Acc) -> + add_ordered_match(Rest, [Val|Objs], Acc); +add_ordered_match([{_, _Val, delete}|Rest], [_Obj|Objs], Acc) -> + add_ordered_match(Rest, Objs, Acc); +add_ordered_match([{_, Val, delete_object}|Rest], [Val|Objs], Acc) -> + add_ordered_match(Rest, Objs, Acc); +add_ordered_match([{_, _, delete_object}|Rest], Objs, Acc) -> + add_ordered_match(Rest, Objs, Acc); +add_ordered_match([], Objs, Acc) -> + lists:reverse(Acc, Objs). + +%% For select chunk +add_sel_match(Sorted, Objs, ordered_set) -> + add_sel_ordered_match(Sorted, Objs, []); +add_sel_match(Written, Objs, Type) -> + add_sel_match(Written, Objs, Type, []). + +add_sel_match([], Objs, _Type, Acc) -> + {Objs,lists:reverse(Acc)}; +add_sel_match([Op={Oid, _, delete}|R], Objs, Type, Acc) -> + case deloid(Oid, Objs) of + Objs -> + add_sel_match(R, Objs, Type, [Op|Acc]); + NewObjs when Type == set -> + add_sel_match(R, NewObjs, Type, Acc); + NewObjs -> %% If bag we may get more in next chunk + add_sel_match(R, NewObjs, Type, [Op|Acc]) + end; +add_sel_match([Op = {_Oid, Val, delete_object}|R], Objs, Type, Acc) -> + case lists:delete(Val, Objs) of + Objs -> + add_sel_match(R, Objs, Type, [Op|Acc]); + NewObjs when Type == set -> + add_sel_match(R, NewObjs, Type, Acc); + NewObjs -> + add_sel_match(R, NewObjs, Type, [Op|Acc]) + end; +add_sel_match([Op={Oid={_,Key}, Val, write}|R], Objs, bag, Acc) -> + case lists:keymember(Key, 2, Objs) of + true -> + add_sel_match(R,[Val|lists:delete(Val,Objs)],bag, + [{Oid,Val,delete_object}|Acc]); + false -> + add_sel_match(R,Objs,bag,[Op|Acc]) + end; +add_sel_match([Op={Oid, Val, write}|R], Objs, set, Acc) -> + case deloid(Oid,Objs) of + Objs -> + add_sel_match(R, Objs,set, [Op|Acc]); + NewObjs -> + add_sel_match(R, [Val | NewObjs],set, Acc) + end. + +%% For ordered_set only !! +add_sel_ordered_match(Written = [{{_, Key}, _, _}|_], [Obj|Objs],Acc) + when Key > element(2, Obj) -> + add_sel_ordered_match(Written, Objs, [Obj|Acc]); +add_sel_ordered_match([{{_, Key}, Val, write}|Rest], Objs =[Obj|_],Acc) + when Key < element(2, Obj) -> + add_sel_ordered_match(Rest,[Val|Objs],Acc); +add_sel_ordered_match([{{_, Key}, _, _DelOP}|Rest], Objs =[Obj|_], Acc) + when Key < element(2, Obj) -> + add_sel_ordered_match(Rest,Objs,Acc); +%% Greater than last object +add_sel_ordered_match(Ops1, [], Acc) -> + {lists:reverse(Acc), Ops1}; +%% Keys are equal from here +add_sel_ordered_match([{_, Val, write}|Rest], [_Obj|Objs], Acc) -> + add_sel_ordered_match(Rest, [Val|Objs], Acc); +add_sel_ordered_match([{_, _Val, delete}|Rest], [_Obj|Objs], Acc) -> + add_sel_ordered_match(Rest, Objs, Acc); +add_sel_ordered_match([{_, Val, delete_object}|Rest], [Val|Objs], Acc) -> + add_sel_ordered_match(Rest, Objs, Acc); +add_sel_ordered_match([{_, _, delete_object}|Rest], Objs, Acc) -> + add_sel_ordered_match(Rest, Objs, Acc); +add_sel_ordered_match([], Objs, Acc) -> + {lists:reverse(Acc, Objs),[]}. + + +deloid(_Oid, []) -> + []; +deloid({Tab, Key}, [H | T]) when element(2, H) == Key -> + deloid({Tab, Key}, T); +deloid(Oid, [H | T]) -> + [H | deloid(Oid, T)]. + +%%%%%%%%%%%%%%%%%% +% select + +select(Tab, Pat) -> + select(Tab, Pat, read). +select(Tab, Pat, LockKind) + when is_atom(Tab), Tab /= schema, is_list(Pat) -> + case get(mnesia_activity_state) of + {?DEFAULT_ACCESS, Tid, Ts} -> + select(Tid, Ts, Tab, Pat, LockKind); + {Mod, Tid, Ts} -> + Mod:select(Tid, Ts, Tab, Pat, LockKind); + _ -> + abort(no_transaction) + end; +select(Tab, Pat, _Lock) -> + abort({badarg, Tab, Pat}). + +select(Tid, Ts, Tab, Spec, LockKind) -> + SelectFun = fun(FixedSpec) -> dirty_select(Tab, FixedSpec) end, + fun_select(Tid, Ts, Tab, Spec, LockKind, Tab, SelectFun). + +fun_select(Tid, Ts, Tab, Spec, LockKind, TabPat, SelectFun) -> + case element(1, Tid) of + ets -> + mnesia_lib:db_select(ram_copies, Tab, Spec); + tid -> + select_lock(Tid,Ts,LockKind,Spec,Tab), + Store = Ts#tidstore.store, + Written = ?ets_match_object(Store, {{TabPat, '_'}, '_', '_'}), + case Written of + [] -> + %% Nothing changed in the table during this transaction, + %% Simple case get results from [d]ets + SelectFun(Spec); + _ -> + %% Hard (slow case) records added or deleted earlier + %% in the transaction, have to cope with that. + Type = val({Tab, setorbag}), + FixedSpec = get_record_pattern(Spec), + TabRecs = SelectFun(FixedSpec), + FixedRes = add_match(Written, TabRecs, Type), + CMS = ets:match_spec_compile(Spec), + ets:match_spec_run(FixedRes, CMS) + end; + _Protocol -> + SelectFun(Spec) + end. + +select_lock(Tid,Ts,LockKind,Spec,Tab) -> + %% Avoid table lock if possible + case Spec of + [{HeadPat,_, _}] when is_tuple(HeadPat), tuple_size(HeadPat) > 2 -> + Key = element(2, HeadPat), + case has_var(Key) of + false -> lock_record(Tid, Ts, Tab, Key, LockKind); + true -> lock_table(Tid, Ts, Tab, LockKind) + end; + _ -> + lock_table(Tid, Ts, Tab, LockKind) + end. + +%% Breakable Select +select(Tab, Pat, NObjects, LockKind) + when is_atom(Tab), Tab /= schema, is_list(Pat), is_integer(NObjects) -> + case get(mnesia_activity_state) of + {?DEFAULT_ACCESS, Tid, Ts} -> + select(Tid, Ts, Tab, Pat, NObjects, LockKind); + {Mod, Tid, Ts} -> + Mod:select(Tid, Ts, Tab, Pat, NObjects, LockKind); + _ -> + abort(no_transaction) + end; +select(Tab, Pat, NObjects, _Lock) -> + abort({badarg, Tab, Pat, NObjects}). + +select(Tid, Ts, Tab, Spec, NObjects, LockKind) -> + Where = val({Tab,where_to_read}), + Type = mnesia_lib:storage_type_at_node(Where,Tab), + InitFun = fun(FixedSpec) -> dirty_sel_init(Where,Tab,FixedSpec,NObjects,Type) end, + fun_select(Tid,Ts,Tab,Spec,LockKind,Tab,InitFun,NObjects,Where,Type). + +-record(mnesia_select, {tab,tid,node,storage,cont,written=[],spec,type,orig}). + +fun_select(Tid, Ts, Tab, Spec, LockKind, TabPat, Init, NObjects, Node, Storage) -> + Def = #mnesia_select{tid=Tid,node=Node,storage=Storage,tab=Tab,orig=Spec}, + case element(1, Tid) of + ets -> + select_state(mnesia_lib:db_select_init(ram_copies,Tab,Spec,NObjects),Def); + tid -> + select_lock(Tid,Ts,LockKind,Spec,Tab), + Store = Ts#tidstore.store, + do_fixtable(Tab, Store), + + Written0 = ?ets_match_object(Store, {{TabPat, '_'}, '_', '_'}), + case Written0 of + [] -> + %% Nothing changed in the table during this transaction, + %% Simple case get results from [d]ets + select_state(Init(Spec),Def); + _ -> + %% Hard (slow case) records added or deleted earlier + %% in the transaction, have to cope with that. + Type = val({Tab, setorbag}), + Written = + if Type == ordered_set -> %% Sort stable + lists:keysort(1,Written0); + true -> + Written0 + end, + FixedSpec = get_record_pattern(Spec), + CMS = ets:match_spec_compile(Spec), + trans_select(Init(FixedSpec), + Def#mnesia_select{written=Written,spec=CMS,type=Type, orig=FixedSpec}) + end; + _Protocol -> + select_state(Init(Spec),Def) + end. + +select(Cont) -> + case get(mnesia_activity_state) of + {?DEFAULT_ACCESS, Tid, Ts} -> + select_cont(Tid,Ts,Cont); + {Mod, Tid, Ts} -> + Mod:select_cont(Tid,Ts,Cont); + _ -> + abort(no_transaction) + end. + +select_cont(_Tid,_Ts,'$end_of_table') -> + '$end_of_table'; +select_cont(Tid,_Ts,State=#mnesia_select{tid=Tid,cont=Cont, orig=Ms}) + when element(1,Tid) == ets -> + case Cont of + '$end_of_table' -> '$end_of_table'; + _ -> select_state(mnesia_lib:db_select_cont(ram_copies,Cont,Ms),State) + end; +select_cont(Tid,_,State=#mnesia_select{tid=Tid,written=[]}) -> + select_state(dirty_sel_cont(State),State); +select_cont(Tid,_Ts,State=#mnesia_select{tid=Tid}) -> + trans_select(dirty_sel_cont(State), State); +select_cont(_Tid2,_,#mnesia_select{tid=_Tid1}) -> % Missmatching tids + abort(wrong_transaction); +select_cont(_,_,Cont) -> + abort({badarg, Cont}). + +trans_select('$end_of_table', #mnesia_select{written=Written0,spec=CMS,type=Type}) -> + Written = add_match(Written0, [], Type), + {ets:match_spec_run(Written, CMS), '$end_of_table'}; +trans_select({TabRecs,Cont}, State = #mnesia_select{written=Written0,spec=CMS,type=Type}) -> + {FixedRes,Written} = add_sel_match(Written0, TabRecs, Type), + select_state({ets:match_spec_run(FixedRes, CMS),Cont}, + State#mnesia_select{written=Written}). + +select_state({Matches, Cont}, MS) -> + {Matches, MS#mnesia_select{cont=Cont}}; +select_state('$end_of_table',_) -> '$end_of_table'. + +get_record_pattern([]) -> []; +get_record_pattern([{M,C,_B}|R]) -> + [{M,C,['$_']} | get_record_pattern(R)]. + +all_keys(Tab) -> + case get(mnesia_activity_state) of + {?DEFAULT_ACCESS, Tid, Ts} -> + all_keys(Tid, Ts, Tab, read); + {Mod, Tid, Ts} -> + Mod:all_keys(Tid, Ts, Tab, read); + _ -> + abort(no_transaction) + end. + +all_keys(Tid, Ts, Tab, LockKind) + when is_atom(Tab), Tab /= schema -> + Pat0 = val({Tab, wild_pattern}), + Pat = setelement(2, Pat0, '$1'), + Keys = select(Tid, Ts, Tab, [{Pat, [], ['$1']}], LockKind), + case val({Tab, setorbag}) of + bag -> + mnesia_lib:uniq(Keys); + _ -> + Keys + end; +all_keys(_Tid, _Ts, Tab, _LockKind) -> + abort({bad_type, Tab}). + +index_match_object(Pat, Attr) when is_tuple(Pat), tuple_size(Pat) > 2 -> + Tab = element(1, Pat), + index_match_object(Tab, Pat, Attr, read); +index_match_object(Pat, _Attr) -> + abort({bad_type, Pat}). + +index_match_object(Tab, Pat, Attr, LockKind) -> + case get(mnesia_activity_state) of + {?DEFAULT_ACCESS, Tid, Ts} -> + index_match_object(Tid, Ts, Tab, Pat, Attr, LockKind); + {Mod, Tid, Ts} -> + Mod:index_match_object(Tid, Ts, Tab, Pat, Attr, LockKind); + _ -> + abort(no_transaction) + end. + +index_match_object(Tid, Ts, Tab, Pat, Attr, LockKind) + when is_atom(Tab), Tab /= schema, is_tuple(Pat), tuple_size(Pat) > 2 -> + case element(1, Tid) of + ets -> + dirty_index_match_object(Tab, Pat, Attr); % Should be optimized? + tid -> + case mnesia_schema:attr_tab_to_pos(Tab, Attr) of + Pos when Pos =< tuple_size(Pat) -> + case LockKind of + read -> + Store = Ts#tidstore.store, + mnesia_locker:rlock_table(Tid, Store, Tab), + Objs = dirty_index_match_object(Tab, Pat, Attr), + add_written_match(Store, Pat, Tab, Objs); + _ -> + abort({bad_type, Tab, LockKind}) + end; + BadPos -> + abort({bad_type, Tab, BadPos}) + end; + _Protocol -> + dirty_index_match_object(Tab, Pat, Attr) + end; +index_match_object(_Tid, _Ts, Tab, Pat, _Attr, _LockKind) -> + abort({bad_type, Tab, Pat}). + +index_read(Tab, Key, Attr) -> + case get(mnesia_activity_state) of + {?DEFAULT_ACCESS, Tid, Ts} -> + index_read(Tid, Ts, Tab, Key, Attr, read); + {Mod, Tid, Ts} -> + Mod:index_read(Tid, Ts, Tab, Key, Attr, read); + _ -> + abort(no_transaction) + end. + +index_read(Tid, Ts, Tab, Key, Attr, LockKind) + when is_atom(Tab), Tab /= schema -> + case element(1, Tid) of + ets -> + dirty_index_read(Tab, Key, Attr); % Should be optimized? + tid -> + Pos = mnesia_schema:attr_tab_to_pos(Tab, Attr), + case LockKind of + read -> + case has_var(Key) of + false -> + Store = Ts#tidstore.store, + Objs = mnesia_index:read(Tid, Store, Tab, Key, Pos), + Pat = setelement(Pos, val({Tab, wild_pattern}), Key), + add_written_match(Store, Pat, Tab, Objs); + true -> + abort({bad_type, Tab, Attr, Key}) + end; + _ -> + abort({bad_type, Tab, LockKind}) + end; + _Protocol -> + dirty_index_read(Tab, Key, Attr) + end; +index_read(_Tid, _Ts, Tab, _Key, _Attr, _LockKind) -> + abort({bad_type, Tab}). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Dirty access regardless of activities - updates + +dirty_write(Val) when is_tuple(Val), tuple_size(Val) > 2 -> + Tab = element(1, Val), + dirty_write(Tab, Val); +dirty_write(Val) -> + abort({bad_type, Val}). + +dirty_write(Tab, Val) -> + do_dirty_write(async_dirty, Tab, Val). + +do_dirty_write(SyncMode, Tab, Val) + when is_atom(Tab), Tab /= schema, is_tuple(Val), tuple_size(Val) > 2 -> + case ?catch_val({Tab, record_validation}) of + {RecName, Arity, _Type} + when tuple_size(Val) == Arity, RecName == element(1, Val) -> + Oid = {Tab, element(2, Val)}, + mnesia_tm:dirty(SyncMode, {Oid, Val, write}); + {'EXIT', _} -> + abort({no_exists, Tab}); + _ -> + abort({bad_type, Val}) + end; +do_dirty_write(_SyncMode, Tab, Val) -> + abort({bad_type, Tab, Val}). + +dirty_delete({Tab, Key}) -> + dirty_delete(Tab, Key); +dirty_delete(Oid) -> + abort({bad_type, Oid}). + +dirty_delete(Tab, Key) -> + do_dirty_delete(async_dirty, Tab, Key). + +do_dirty_delete(SyncMode, Tab, Key) when is_atom(Tab), Tab /= schema -> + Oid = {Tab, Key}, + mnesia_tm:dirty(SyncMode, {Oid, Oid, delete}); +do_dirty_delete(_SyncMode, Tab, _Key) -> + abort({bad_type, Tab}). + +dirty_delete_object(Val) when is_tuple(Val), tuple_size(Val) > 2 -> + Tab = element(1, Val), + dirty_delete_object(Tab, Val); +dirty_delete_object(Val) -> + abort({bad_type, Val}). + +dirty_delete_object(Tab, Val) -> + do_dirty_delete_object(async_dirty, Tab, Val). + +do_dirty_delete_object(SyncMode, Tab, Val) + when is_atom(Tab), Tab /= schema, is_tuple(Val), tuple_size(Val) > 2 -> + Oid = {Tab, element(2, Val)}, + case has_var(Val) of + false -> + mnesia_tm:dirty(SyncMode, {Oid, Val, delete_object}); + true -> + abort({bad_type, Tab, Val}) + end; + +do_dirty_delete_object(_SyncMode, Tab, Val) -> + abort({bad_type, Tab, Val}). + +%% A Counter is an Oid being {CounterTab, CounterName} + +dirty_update_counter({Tab, Key}, Incr) -> + dirty_update_counter(Tab, Key, Incr); +dirty_update_counter(Counter, _Incr) -> + abort({bad_type, Counter}). + +dirty_update_counter(Tab, Key, Incr) -> + do_dirty_update_counter(async_dirty, Tab, Key, Incr). + +do_dirty_update_counter(SyncMode, Tab, Key, Incr) + when is_atom(Tab), Tab /= schema, is_integer(Incr) -> + case ?catch_val({Tab, record_validation}) of + {RecName, 3, set} -> + Oid = {Tab, Key}, + mnesia_tm:dirty(SyncMode, {Oid, {RecName, Incr}, update_counter}); + _ -> + abort({combine_error, Tab, update_counter}) + end; +do_dirty_update_counter(_SyncMode, Tab, _Key, Incr) -> + abort({bad_type, Tab, Incr}). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Dirty access regardless of activities - read + +dirty_read({Tab, Key}) -> + dirty_read(Tab, Key); +dirty_read(Oid) -> + abort({bad_type, Oid}). + +dirty_read(Tab, Key) + when is_atom(Tab), Tab /= schema -> +%% case catch ?ets_lookup(Tab, Key) of +%% {'EXIT', _} -> + %% Bad luck, we have to perform a real lookup + dirty_rpc(Tab, mnesia_lib, db_get, [Tab, Key]); +%% Val -> +%% Val +%% end; +dirty_read(Tab, _Key) -> + abort({bad_type, Tab}). + +dirty_match_object(Pat) when is_tuple(Pat), tuple_size(Pat) > 2 -> + Tab = element(1, Pat), + dirty_match_object(Tab, Pat); +dirty_match_object(Pat) -> + abort({bad_type, Pat}). + +dirty_match_object(Tab, Pat) + when is_atom(Tab), Tab /= schema, is_tuple(Pat), tuple_size(Pat) > 2 -> + dirty_rpc(Tab, ?MODULE, remote_dirty_match_object, [Tab, Pat]); +dirty_match_object(Tab, Pat) -> + abort({bad_type, Tab, Pat}). + +remote_dirty_match_object(Tab, Pat) -> + Key = element(2, Pat), + case has_var(Key) of + false -> + mnesia_lib:db_match_object(Tab, Pat); + true -> + PosList = val({Tab, index}), + remote_dirty_match_object(Tab, Pat, PosList) + end. + +remote_dirty_match_object(Tab, Pat, [Pos | Tail]) when Pos =< tuple_size(Pat) -> + IxKey = element(Pos, Pat), + case has_var(IxKey) of + false -> + mnesia_index:dirty_match_object(Tab, Pat, Pos); + true -> + remote_dirty_match_object(Tab, Pat, Tail) + end; +remote_dirty_match_object(Tab, Pat, []) -> + mnesia_lib:db_match_object(Tab, Pat); +remote_dirty_match_object(Tab, Pat, _PosList) -> + abort({bad_type, Tab, Pat}). + +dirty_select(Tab, Spec) when is_atom(Tab), Tab /= schema, is_list(Spec) -> + dirty_rpc(Tab, ?MODULE, remote_dirty_select, [Tab, Spec]); +dirty_select(Tab, Spec) -> + abort({bad_type, Tab, Spec}). + +remote_dirty_select(Tab, Spec) -> + case Spec of + [{HeadPat, _, _}] when is_tuple(HeadPat), tuple_size(HeadPat) > 2 -> + Key = element(2, HeadPat), + case has_var(Key) of + false -> + mnesia_lib:db_select(Tab, Spec); + true -> + PosList = val({Tab, index}), + remote_dirty_select(Tab, Spec, PosList) + end; + _ -> + mnesia_lib:db_select(Tab, Spec) + end. + +remote_dirty_select(Tab, [{HeadPat,_, _}] = Spec, [Pos | Tail]) + when is_tuple(HeadPat), tuple_size(HeadPat) > 2, Pos =< tuple_size(HeadPat) -> + Key = element(Pos, HeadPat), + case has_var(Key) of + false -> + Recs = mnesia_index:dirty_select(Tab, HeadPat, Pos), + %% Returns the records without applying the match spec + %% The actual filtering is handled by the caller + CMS = ets:match_spec_compile(Spec), + case val({Tab, setorbag}) of + ordered_set -> + ets:match_spec_run(lists:sort(Recs), CMS); + _ -> + ets:match_spec_run(Recs, CMS) + end; + true -> + remote_dirty_select(Tab, Spec, Tail) + end; +remote_dirty_select(Tab, Spec, _) -> + mnesia_lib:db_select(Tab, Spec). + +dirty_sel_init(Node,Tab,Spec,NObjects,Type) -> + do_dirty_rpc(Tab,Node,mnesia_lib,db_select_init,[Type,Tab,Spec,NObjects]). + +dirty_sel_cont(#mnesia_select{cont='$end_of_table'}) -> '$end_of_table'; +dirty_sel_cont(#mnesia_select{node=Node,tab=Tab,storage=Type,cont=Cont,orig=Ms}) -> + do_dirty_rpc(Tab,Node,mnesia_lib,db_select_cont,[Type,Cont,Ms]). + +dirty_all_keys(Tab) when is_atom(Tab), Tab /= schema -> + case ?catch_val({Tab, wild_pattern}) of + {'EXIT', _} -> + abort({no_exists, Tab}); + Pat0 -> + Pat = setelement(2, Pat0, '$1'), + Keys = dirty_select(Tab, [{Pat, [], ['$1']}]), + case val({Tab, setorbag}) of + bag -> mnesia_lib:uniq(Keys); + _ -> Keys + end + end; +dirty_all_keys(Tab) -> + abort({bad_type, Tab}). + +dirty_index_match_object(Pat, Attr) when is_tuple(Pat), tuple_size(Pat) > 2 -> + Tab = element(1, Pat), + dirty_index_match_object(Tab, Pat, Attr); +dirty_index_match_object(Pat, _Attr) -> + abort({bad_type, Pat}). + +dirty_index_match_object(Tab, Pat, Attr) + when is_atom(Tab), Tab /= schema, is_tuple(Pat), tuple_size(Pat) > 2 -> + case mnesia_schema:attr_tab_to_pos(Tab, Attr) of + Pos when Pos =< tuple_size(Pat) -> + case has_var(element(2, Pat)) of + false -> + dirty_match_object(Tab, Pat); + true -> + Elem = element(Pos, Pat), + case has_var(Elem) of + false -> + dirty_rpc(Tab, mnesia_index, dirty_match_object, + [Tab, Pat, Pos]); + true -> + abort({bad_type, Tab, Attr, Elem}) + end + end; + BadPos -> + abort({bad_type, Tab, BadPos}) + end; +dirty_index_match_object(Tab, Pat, _Attr) -> + abort({bad_type, Tab, Pat}). + +dirty_index_read(Tab, Key, Attr) when is_atom(Tab), Tab /= schema -> + Pos = mnesia_schema:attr_tab_to_pos(Tab, Attr), + case has_var(Key) of + false -> + mnesia_index:dirty_read(Tab, Key, Pos); + true -> + abort({bad_type, Tab, Attr, Key}) + end; +dirty_index_read(Tab, _Key, _Attr) -> + abort({bad_type, Tab}). + +dirty_slot(Tab, Slot) when is_atom(Tab), Tab /= schema, is_integer(Slot) -> + dirty_rpc(Tab, mnesia_lib, db_slot, [Tab, Slot]); +dirty_slot(Tab, Slot) -> + abort({bad_type, Tab, Slot}). + +dirty_first(Tab) when is_atom(Tab), Tab /= schema -> + dirty_rpc(Tab, mnesia_lib, db_first, [Tab]); +dirty_first(Tab) -> + abort({bad_type, Tab}). + +dirty_last(Tab) when is_atom(Tab), Tab /= schema -> + dirty_rpc(Tab, mnesia_lib, db_last, [Tab]); +dirty_last(Tab) -> + abort({bad_type, Tab}). + +dirty_next(Tab, Key) when is_atom(Tab), Tab /= schema -> + dirty_rpc(Tab, mnesia_lib, db_next_key, [Tab, Key]); +dirty_next(Tab, _Key) -> + abort({bad_type, Tab}). + +dirty_prev(Tab, Key) when is_atom(Tab), Tab /= schema -> + dirty_rpc(Tab, mnesia_lib, db_prev_key, [Tab, Key]); +dirty_prev(Tab, _Key) -> + abort({bad_type, Tab}). + + +dirty_rpc(Tab, M, F, Args) -> + Node = val({Tab, where_to_read}), + do_dirty_rpc(Tab, Node, M, F, Args). + +do_dirty_rpc(_Tab, nowhere, _, _, Args) -> + mnesia:abort({no_exists, Args}); +do_dirty_rpc(Tab, Node, M, F, Args) -> + case rpc:call(Node, M, F, Args) of + {badrpc, Reason} -> + timer:sleep(20), %% Do not be too eager, and can't use yield on SMP + %% Sync with mnesia_monitor + try sys:get_status(mnesia_monitor) catch _:_ -> ok end, + case mnesia_controller:call({check_w2r, Node, Tab}) of % Sync + NewNode when NewNode =:= Node -> + ErrorTag = mnesia_lib:dirty_rpc_error_tag(Reason), + mnesia:abort({ErrorTag, Args}); + NewNode -> + case get(mnesia_activity_state) of + {_Mod, Tid, _Ts} when is_record(Tid, tid) -> + %% In order to perform a consistent + %% retry of a transaction we need + %% to acquire the lock on the NewNode. + %% In this context we do neither know + %% the kind or granularity of the lock. + %% --> Abort the transaction + mnesia:abort({node_not_running, Node}); + {error, {node_not_running, _}} -> + %% Mnesia is stopping + mnesia:abort({no_exists, Args}); + _ -> + %% Splendid! A dirty retry is safe + %% 'Node' probably went down now + %% Let mnesia_controller get broken link message first + do_dirty_rpc(Tab, NewNode, M, F, Args) + end + end; + Other -> + Other + end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Info + +%% Info about one table +table_info(Tab, Item) -> + case get(mnesia_activity_state) of + undefined -> + any_table_info(Tab, Item); + {?DEFAULT_ACCESS, _Tid, _Ts} -> + any_table_info(Tab, Item); + {Mod, Tid, Ts} -> + Mod:table_info(Tid, Ts, Tab, Item); + _ -> + abort(no_transaction) + end. + +table_info(_Tid, _Ts, Tab, Item) -> + any_table_info(Tab, Item). + + +any_table_info(Tab, Item) when is_atom(Tab) -> + case Item of + master_nodes -> + mnesia_recover:get_master_nodes(Tab); +% checkpoints -> +% case ?catch_val({Tab, commit_work}) of +% [{checkpoints, List} | _] -> List; +% No_chk when is_list(No_chk) -> []; +% Else -> info_reply(Else, Tab, Item) +% end; + size -> + raw_table_info(Tab, Item); + memory -> + raw_table_info(Tab, Item); + type -> + case ?catch_val({Tab, setorbag}) of + {'EXIT', _} -> + bad_info_reply(Tab, Item); + Val -> + Val + end; + all -> + case mnesia_schema:get_table_properties(Tab) of + [] -> + abort({no_exists, Tab, Item}); + Props -> + lists:map(fun({setorbag, Type}) -> {type, Type}; + (Prop) -> Prop end, + Props) + end; + name -> + Tab; + _ -> + case ?catch_val({Tab, Item}) of + {'EXIT', _} -> + bad_info_reply(Tab, Item); + Val -> + Val + end + end; +any_table_info(Tab, _Item) -> + abort({bad_type, Tab}). + +raw_table_info(Tab, Item) -> + case ?catch_val({Tab, storage_type}) of + ram_copies -> + info_reply(catch ?ets_info(Tab, Item), Tab, Item); + disc_copies -> + info_reply(catch ?ets_info(Tab, Item), Tab, Item); + disc_only_copies -> + info_reply(catch dets:info(Tab, Item), Tab, Item); + unknown -> + bad_info_reply(Tab, Item); + {'EXIT', _} -> + bad_info_reply(Tab, Item) + end. + +info_reply({'EXIT', _Reason}, Tab, Item) -> + bad_info_reply(Tab, Item); +info_reply({error, _Reason}, Tab, Item) -> + bad_info_reply(Tab, Item); +info_reply(Val, _Tab, _Item) -> + Val. + +bad_info_reply(_Tab, size) -> 0; +bad_info_reply(_Tab, memory) -> 0; +bad_info_reply(Tab, Item) -> abort({no_exists, Tab, Item}). + +%% Raw info about all tables +schema() -> + mnesia_schema:info(). + +%% Raw info about one tables +schema(Tab) -> + mnesia_schema:info(Tab). + +error_description(Err) -> + mnesia_lib:error_desc(Err). + +info() -> + case mnesia_lib:is_running() of + yes -> + TmInfo = mnesia_tm:get_info(10000), + Held = system_info(held_locks), + Queued = system_info(lock_queue), + + io:format("---> Processes holding locks <--- ~n", []), + lists:foreach(fun(L) -> io:format("Lock: ~p~n", [L]) end, + Held), + + io:format( "---> Processes waiting for locks <--- ~n", []), + lists:foreach(fun({Oid, Op, _Pid, Tid, OwnerTid}) -> + io:format("Tid ~p waits for ~p lock " + "on oid ~p owned by ~p ~n", + [Tid, Op, Oid, OwnerTid]) + end, Queued), + mnesia_tm:display_info(group_leader(), TmInfo), + + Pat = {'_', unclear, '_'}, + Uncertain = ets:match_object(mnesia_decision, Pat), + + io:format( "---> Uncertain transactions <--- ~n", []), + lists:foreach(fun({Tid, _, Nodes}) -> + io:format("Tid ~w waits for decision " + "from ~w~n", + [Tid, Nodes]) + end, Uncertain), + + mnesia_controller:info(), + display_system_info(Held, Queued, TmInfo, Uncertain); + _ -> + mini_info() + end, + ok. + +mini_info() -> + io:format("===> System info in version ~p, debug level = ~p <===~n", + [system_info(version), system_info(debug)]), + Not = + case system_info(use_dir) of + true -> ""; + false -> "NOT " + end, + + io:format("~w. Directory ~p is ~sused.~n", + [system_info(schema_location), system_info(directory), Not]), + io:format("use fallback at restart = ~w~n", + [system_info(fallback_activated)]), + Running = system_info(running_db_nodes), + io:format("running db nodes = ~w~n", [Running]), + All = mnesia_lib:all_nodes(), + io:format("stopped db nodes = ~w ~n", [All -- Running]). + +display_system_info(Held, Queued, TmInfo, Uncertain) -> + mini_info(), + display_tab_info(), + S = fun(Items) -> [system_info(I) || I <- Items] end, + + io:format("~w transactions committed, ~w aborted, " + "~w restarted, ~w logged to disc~n", + S([transaction_commits, transaction_failures, + transaction_restarts, transaction_log_writes])), + + {Active, Pending} = + case TmInfo of + {timeout, _} -> {infinity, infinity}; + {info, P, A} -> {length(A), length(P)} + end, + io:format("~w held locks, ~w in queue; " + "~w local transactions, ~w remote~n", + [length(Held), length(Queued), Active, Pending]), + + Ufold = fun({_, _, Ns}, {C, Old}) -> + New = [N || N <- Ns, not lists:member(N, Old)], + {C + 1, New ++ Old} + end, + {Ucount, Unodes} = lists:foldl(Ufold, {0, []}, Uncertain), + io:format("~w transactions waits for other nodes: ~p~n", + [Ucount, Unodes]). + +display_tab_info() -> + MasterTabs = mnesia_recover:get_master_node_tables(), + io:format("master node tables = ~p~n", [lists:sort(MasterTabs)]), + + Tabs = system_info(tables), + + {Unknown, Ram, Disc, DiscOnly} = + lists:foldl(fun storage_count/2, {[], [], [], []}, Tabs), + + io:format("remote = ~p~n", [lists:sort(Unknown)]), + io:format("ram_copies = ~p~n", [lists:sort(Ram)]), + io:format("disc_copies = ~p~n", [lists:sort(Disc)]), + io:format("disc_only_copies = ~p~n", [lists:sort(DiscOnly)]), + + Rfoldl = fun(T, Acc) -> + Rpat = + case val({T, access_mode}) of + read_only -> + lists:sort([{A, read_only} || A <- val({T, active_replicas})]); + read_write -> + table_info(T, where_to_commit) + end, + case lists:keysearch(Rpat, 1, Acc) of + {value, {_Rpat, Rtabs}} -> + lists:keyreplace(Rpat, 1, Acc, {Rpat, [T | Rtabs]}); + false -> + [{Rpat, [T]} | Acc] + end + end, + Repl = lists:foldl(Rfoldl, [], Tabs), + Rdisp = fun({Rpat, Rtabs}) -> io:format("~p = ~p~n", [Rpat, Rtabs]) end, + lists:foreach(Rdisp, lists:sort(Repl)). + +storage_count(T, {U, R, D, DO}) -> + case table_info(T, storage_type) of + unknown -> {[T | U], R, D, DO}; + ram_copies -> {U, [T | R], D, DO}; + disc_copies -> {U, R, [T | D], DO}; + disc_only_copies -> {U, R, D, [T | DO]} + end. + +system_info(Item) -> + case catch system_info2(Item) of + {'EXIT',Error} -> abort(Error); + Other -> Other + end. + +system_info2(all) -> + Items = system_info_items(mnesia_lib:is_running()), + [{I, system_info(I)} || I <- Items]; + +system_info2(db_nodes) -> + DiscNs = ?catch_val({schema, disc_copies}), + RamNs = ?catch_val({schema, ram_copies}), + if + is_list(DiscNs), is_list(RamNs) -> + DiscNs ++ RamNs; + true -> + case mnesia_schema:read_nodes() of + {ok, Nodes} -> Nodes; + {error,Reason} -> exit(Reason) + end + end; +system_info2(running_db_nodes) -> + case ?catch_val({current, db_nodes}) of + {'EXIT',_} -> + %% Ensure that we access the intended Mnesia + %% directory. This function may not be called + %% during startup since it will cause the + %% application_controller to get into deadlock + load_mnesia_or_abort(), + mnesia_lib:running_nodes(); + Other -> + Other + end; + +system_info2(extra_db_nodes) -> + case ?catch_val(extra_db_nodes) of + {'EXIT',_} -> + %% Ensure that we access the intended Mnesia + %% directory. This function may not be called + %% during startup since it will cause the + %% application_controller to get into deadlock + load_mnesia_or_abort(), + mnesia_monitor:get_env(extra_db_nodes); + Other -> + Other + end; + +system_info2(directory) -> + case ?catch_val(directory) of + {'EXIT',_} -> + %% Ensure that we access the intended Mnesia + %% directory. This function may not be called + %% during startup since it will cause the + %% application_controller to get into deadlock + load_mnesia_or_abort(), + mnesia_monitor:get_env(dir); + Other -> + Other + end; + +system_info2(use_dir) -> + case ?catch_val(use_dir) of + {'EXIT',_} -> + %% Ensure that we access the intended Mnesia + %% directory. This function may not be called + %% during startup since it will cause the + %% application_controller to get into deadlock + load_mnesia_or_abort(), + mnesia_monitor:use_dir(); + Other -> + Other + end; + +system_info2(schema_location) -> + case ?catch_val(schema_location) of + {'EXIT',_} -> + %% Ensure that we access the intended Mnesia + %% directory. This function may not be called + %% during startup since it will cause the + %% application_controller to get into deadlock + load_mnesia_or_abort(), + mnesia_monitor:get_env(schema_location); + Other -> + Other + end; + +system_info2(fallback_activated) -> + case ?catch_val(fallback_activated) of + {'EXIT',_} -> + %% Ensure that we access the intended Mnesia + %% directory. This function may not be called + %% during startup since it will cause the + %% application_controller to get into deadlock + load_mnesia_or_abort(), + mnesia_bup:fallback_exists(); + Other -> + Other + end; + +system_info2(version) -> + case ?catch_val(version) of + {'EXIT', _} -> + Apps = application:loaded_applications(), + case lists:keysearch(?APPLICATION, 1, Apps) of + {value, {_Name, _Desc, Version}} -> + Version; + false -> + %% Ensure that it does not match + {mnesia_not_loaded, node(), now()} + end; + Version -> + Version + end; + +system_info2(access_module) -> mnesia_monitor:get_env(access_module); +system_info2(auto_repair) -> mnesia_monitor:get_env(auto_repair); +system_info2(is_running) -> mnesia_lib:is_running(); +system_info2(backup_module) -> mnesia_monitor:get_env(backup_module); +system_info2(event_module) -> mnesia_monitor:get_env(event_module); +system_info2(debug) -> mnesia_monitor:get_env(debug); +system_info2(dump_log_load_regulation) -> mnesia_monitor:get_env(dump_log_load_regulation); +system_info2(dump_log_write_threshold) -> mnesia_monitor:get_env(dump_log_write_threshold); +system_info2(dump_log_time_threshold) -> mnesia_monitor:get_env(dump_log_time_threshold); +system_info2(dump_log_update_in_place) -> + mnesia_monitor:get_env(dump_log_update_in_place); +system_info2(max_wait_for_decision) -> mnesia_monitor:get_env(max_wait_for_decision); +system_info2(embedded_mnemosyne) -> mnesia_monitor:get_env(embedded_mnemosyne); +system_info2(ignore_fallback_at_startup) -> mnesia_monitor:get_env(ignore_fallback_at_startup); +system_info2(fallback_error_function) -> mnesia_monitor:get_env(fallback_error_function); +system_info2(log_version) -> mnesia_log:version(); +system_info2(protocol_version) -> mnesia_monitor:protocol_version(); +system_info2(schema_version) -> mnesia_schema:version(); %backward compatibility +system_info2(tables) -> val({schema, tables}); +system_info2(local_tables) -> val({schema, local_tables}); +system_info2(master_node_tables) -> mnesia_recover:get_master_node_tables(); +system_info2(subscribers) -> mnesia_subscr:subscribers(); +system_info2(checkpoints) -> mnesia_checkpoint:checkpoints(); +system_info2(held_locks) -> mnesia_locker:get_held_locks(); +system_info2(lock_queue) -> mnesia_locker:get_lock_queue(); +system_info2(transactions) -> mnesia_tm:get_transactions(); +system_info2(transaction_failures) -> mnesia_lib:read_counter(trans_failures); +system_info2(transaction_commits) -> mnesia_lib:read_counter(trans_commits); +system_info2(transaction_restarts) -> mnesia_lib:read_counter(trans_restarts); +system_info2(transaction_log_writes) -> mnesia_dumper:get_log_writes(); +system_info2(core_dir) -> mnesia_monitor:get_env(core_dir); +system_info2(no_table_loaders) -> mnesia_monitor:get_env(no_table_loaders); +system_info2(dc_dump_limit) -> mnesia_monitor:get_env(dc_dump_limit); + +system_info2(Item) -> exit({badarg, Item}). + +system_info_items(yes) -> + [ + access_module, + auto_repair, + backup_module, + checkpoints, + db_nodes, + debug, + directory, + dump_log_load_regulation, + dump_log_time_threshold, + dump_log_update_in_place, + dump_log_write_threshold, + embedded_mnemosyne, + event_module, + extra_db_nodes, + fallback_activated, + held_locks, + ignore_fallback_at_startup, + fallback_error_function, + is_running, + local_tables, + lock_queue, + log_version, + master_node_tables, + max_wait_for_decision, + protocol_version, + running_db_nodes, + schema_location, + schema_version, + subscribers, + tables, + transaction_commits, + transaction_failures, + transaction_log_writes, + transaction_restarts, + transactions, + use_dir, + core_dir, + no_table_loaders, + dc_dump_limit, + version + ]; +system_info_items(no) -> + [ + auto_repair, + backup_module, + db_nodes, + debug, + directory, + dump_log_load_regulation, + dump_log_time_threshold, + dump_log_update_in_place, + dump_log_write_threshold, + event_module, + extra_db_nodes, + ignore_fallback_at_startup, + fallback_error_function, + is_running, + log_version, + max_wait_for_decision, + protocol_version, + running_db_nodes, + schema_location, + schema_version, + use_dir, + core_dir, + version + ]. + +system_info() -> + IsRunning = mnesia_lib:is_running(), + case IsRunning of + yes -> + TmInfo = mnesia_tm:get_info(10000), + Held = system_info(held_locks), + Queued = system_info(lock_queue), + Pat = {'_', unclear, '_'}, + Uncertain = ets:match_object(mnesia_decision, Pat), + display_system_info(Held, Queued, TmInfo, Uncertain); + _ -> + mini_info() + end, + IsRunning. + +load_mnesia_or_abort() -> + case mnesia_lib:ensure_loaded(?APPLICATION) of + ok -> + ok; + {error, Reason} -> + abort(Reason) + end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Database mgt + +create_schema(Ns) -> + mnesia_bup:create_schema(Ns). + +delete_schema(Ns) -> + mnesia_schema:delete_schema(Ns). + +backup(Opaque) -> + mnesia_log:backup(Opaque). + +backup(Opaque, Mod) -> + mnesia_log:backup(Opaque, Mod). + +traverse_backup(S, T, Fun, Acc) -> + mnesia_bup:traverse_backup(S, T, Fun, Acc). + +traverse_backup(S, SM, T, TM, F, A) -> + mnesia_bup:traverse_backup(S, SM, T, TM, F, A). + +install_fallback(Opaque) -> + mnesia_bup:install_fallback(Opaque). + +install_fallback(Opaque, Mod) -> + mnesia_bup:install_fallback(Opaque, Mod). + +uninstall_fallback() -> + mnesia_bup:uninstall_fallback(). + +uninstall_fallback(Args) -> + mnesia_bup:uninstall_fallback(Args). + +activate_checkpoint(Args) -> + mnesia_checkpoint:activate(Args). + +deactivate_checkpoint(Name) -> + mnesia_checkpoint:deactivate(Name). + +backup_checkpoint(Name, Opaque) -> + mnesia_log:backup_checkpoint(Name, Opaque). + +backup_checkpoint(Name, Opaque, Mod) -> + mnesia_log:backup_checkpoint(Name, Opaque, Mod). + +restore(Opaque, Args) -> + mnesia_schema:restore(Opaque, Args). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Table mgt + +create_table(Arg) -> + mnesia_schema:create_table(Arg). +create_table(Name, Arg) when is_list(Arg) -> + mnesia_schema:create_table([{name, Name}| Arg]); +create_table(Name, Arg) -> + {aborted, badarg, Name, Arg}. + +delete_table(Tab) -> + mnesia_schema:delete_table(Tab). + +add_table_copy(Tab, N, S) -> + mnesia_schema:add_table_copy(Tab, N, S). +del_table_copy(Tab, N) -> + mnesia_schema:del_table_copy(Tab, N). + +move_table_copy(Tab, From, To) -> + mnesia_schema:move_table(Tab, From, To). + +add_table_index(Tab, Ix) -> + mnesia_schema:add_table_index(Tab, Ix). +del_table_index(Tab, Ix) -> + mnesia_schema:del_table_index(Tab, Ix). + +transform_table(Tab, Fun, NewA) -> + case catch val({Tab, record_name}) of + {'EXIT', Reason} -> + mnesia:abort(Reason); + OldRN -> + mnesia_schema:transform_table(Tab, Fun, NewA, OldRN) + end. + +transform_table(Tab, Fun, NewA, NewRN) -> + mnesia_schema:transform_table(Tab, Fun, NewA, NewRN). + +change_table_copy_type(T, N, S) -> + mnesia_schema:change_table_copy_type(T, N, S). + +clear_table(Tab) -> + case get(mnesia_activity_state) of + State = {Mod, Tid, _Ts} when element(1, Tid) =/= tid -> + transaction(State, fun() -> do_clear_table(Tab) end, [], infinity, Mod, sync); + undefined -> + transaction(undefined, fun() -> do_clear_table(Tab) end, [], infinity, ?DEFAULT_ACCESS, sync); + _ -> %% Not allowed for clear_table + mnesia:abort({aborted, nested_transaction}) + end. + +do_clear_table(Tab) -> + case get(mnesia_activity_state) of + {?DEFAULT_ACCESS, Tid, Ts} -> + clear_table(Tid, Ts, Tab, '_'); + {Mod, Tid, Ts} -> + Mod:clear_table(Tid, Ts, Tab, '_'); + _ -> + abort(no_transaction) + end. + +clear_table(Tid, Ts, Tab, Obj) when element(1, Tid) =:= tid -> + Store = Ts#tidstore.store, + mnesia_locker:wlock_table(Tid, Store, Tab), + Oid = {Tab, '_'}, + ?ets_insert(Store, {Oid, Obj, clear_table}), + ok. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Table mgt - user properties + +read_table_property(Tab, PropKey) -> + val({Tab, user_property, PropKey}). + +write_table_property(Tab, Prop) -> + mnesia_schema:write_table_property(Tab, Prop). + +delete_table_property(Tab, PropKey) -> + mnesia_schema:delete_table_property(Tab, PropKey). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Table mgt - user properties + +change_table_frag(Tab, FragProp) -> + mnesia_schema:change_table_frag(Tab, FragProp). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Table mgt - table load + +%% Dump a ram table to disc +dump_tables(Tabs) -> + mnesia_schema:dump_tables(Tabs). + +%% allow the user to wait for some tables to be loaded +wait_for_tables(Tabs, Timeout) -> + mnesia_controller:wait_for_tables(Tabs, Timeout). + +force_load_table(Tab) -> + case mnesia_controller:force_load_table(Tab) of + ok -> yes; % Backwards compatibility + Other -> Other + end. + +change_table_access_mode(T, Access) -> + mnesia_schema:change_table_access_mode(T, Access). + +change_table_load_order(T, O) -> + mnesia_schema:change_table_load_order(T, O). + +set_master_nodes(Nodes) when is_list(Nodes) -> + UseDir = system_info(use_dir), + IsRunning = system_info(is_running), + case IsRunning of + yes -> + CsPat = {{'_', cstruct}, '_'}, + Cstructs0 = ?ets_match_object(mnesia_gvar, CsPat), + Cstructs = [Cs || {_, Cs} <- Cstructs0], + log_valid_master_nodes(Cstructs, Nodes, UseDir, IsRunning); + _NotRunning -> + case UseDir of + true -> + mnesia_lib:lock_table(schema), + Res = + case mnesia_schema:read_cstructs_from_disc() of + {ok, Cstructs} -> + log_valid_master_nodes(Cstructs, Nodes, UseDir, IsRunning); + {error, Reason} -> + {error, Reason} + end, + mnesia_lib:unlock_table(schema), + Res; + false -> + ok + end + end; +set_master_nodes(Nodes) -> + {error, {bad_type, Nodes}}. + +log_valid_master_nodes(Cstructs, Nodes, UseDir, IsRunning) -> + Fun = fun(Cs) -> + Copies = mnesia_lib:copy_holders(Cs), + Valid = mnesia_lib:intersect(Nodes, Copies), + {Cs#cstruct.name, Valid} + end, + Args = lists:map(Fun, Cstructs), + mnesia_recover:log_master_nodes(Args, UseDir, IsRunning). + +set_master_nodes(Tab, Nodes) when is_list(Nodes) -> + UseDir = system_info(use_dir), + IsRunning = system_info(is_running), + case IsRunning of + yes -> + case ?catch_val({Tab, cstruct}) of + {'EXIT', _} -> + {error, {no_exists, Tab}}; + Cs -> + case Nodes -- mnesia_lib:copy_holders(Cs) of + [] -> + Args = [{Tab , Nodes}], + mnesia_recover:log_master_nodes(Args, UseDir, IsRunning); + BadNodes -> + {error, {no_exists, Tab, BadNodes}} + end + end; + _NotRunning -> + case UseDir of + true -> + mnesia_lib:lock_table(schema), + Res = + case mnesia_schema:read_cstructs_from_disc() of + {ok, Cstructs} -> + case lists:keysearch(Tab, 2, Cstructs) of + {value, Cs} -> + case Nodes -- mnesia_lib:copy_holders(Cs) of + [] -> + Args = [{Tab , Nodes}], + mnesia_recover:log_master_nodes(Args, UseDir, IsRunning); + BadNodes -> + {error, {no_exists, Tab, BadNodes}} + end; + false -> + {error, {no_exists, Tab}} + end; + {error, Reason} -> + {error, Reason} + end, + mnesia_lib:unlock_table(schema), + Res; + false -> + ok + end + end; +set_master_nodes(Tab, Nodes) -> + {error, {bad_type, Tab, Nodes}}. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Misc admin + +dump_log() -> + mnesia_controller:sync_dump_log(user). + +subscribe(What) -> + mnesia_subscr:subscribe(self(), What). + +unsubscribe(What) -> + mnesia_subscr:unsubscribe(self(), What). + +report_event(Event) -> + mnesia_lib:report_system_event({mnesia_user, Event}). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Snmp + +snmp_open_table(Tab, Us) -> + mnesia_schema:add_snmp(Tab, Us). + +snmp_close_table(Tab) -> + mnesia_schema:del_snmp(Tab). + +snmp_get_row(Tab, RowIndex) when is_atom(Tab), Tab /= schema, is_list(RowIndex) -> + case get(mnesia_activity_state) of + {Mod, Tid, Ts=#tidstore{store=Store}} when element(1, Tid) =:= tid -> + case snmp_oid_to_mnesia_key(RowIndex, Tab) of + unknown -> %% Arrg contains fix_string + Ops = find_ops(Store, Tab, val({Tab, wild_pattern})), + SnmpType = val({Tab,snmp}), + Fix = fun({{_,Key},Row,Op}, Res) -> + case mnesia_snmp_hook:key_to_oid(Tab,Key,SnmpType) of + RowIndex -> + case Op of + write -> {ok, Row}; + _ -> + undefined + end; + _ -> + Res + end + end, + lists:foldl(Fix, undefined, Ops); + Key -> + case Mod:read(Tid, Ts, Tab, Key, read) of + [Row] -> + {ok, Row}; + _ -> + undefined + end + end; + _ -> + dirty_rpc(Tab, mnesia_snmp_hook, get_row, [Tab, RowIndex]) + end; +snmp_get_row(Tab, _RowIndex) -> + abort({bad_type, Tab}). + +%%%%%%%%%%%%% + +snmp_get_next_index(Tab, RowIndex) when is_atom(Tab), Tab /= schema, is_list(RowIndex) -> + {Next,OrigKey} = dirty_rpc(Tab, mnesia_snmp_hook, get_next_index, [Tab, RowIndex]), + case get(mnesia_activity_state) of + {_Mod, Tid, #tidstore{store=Store}} when element(1, Tid) =:= tid -> + case OrigKey of + undefined -> + snmp_order_keys(Store, Tab, RowIndex, []); + _ -> + case ?ets_match(Store, {{Tab,OrigKey}, '_', '$1'}) of + [] -> snmp_order_keys(Store,Tab,RowIndex,[OrigKey]); + Ops -> + case lists:last(Ops) of + [delete] -> snmp_get_next_index(Tab, Next); + _ -> snmp_order_keys(Store,Tab,RowIndex,[OrigKey]) + end + end + end; + _ -> + case Next of + endOfTable -> endOfTable; + _ -> {ok, Next} + end + end; +snmp_get_next_index(Tab, _RowIndex) -> + abort({bad_type, Tab}). + +snmp_order_keys(Store,Tab,RowIndex,Def) -> + All = ?ets_match(Store, {{Tab,'$1'},'_','$2'}), + SnmpType = val({Tab,snmp}), + Keys0 = [mnesia_snmp_hook:key_to_oid(Tab,Key,SnmpType) || + Key <- ts_keys_1(All, Def)], + Keys = lists:sort(Keys0), + get_ordered_snmp_key(RowIndex,Keys). + +get_ordered_snmp_key(Prev, [First|_]) when Prev < First -> {ok, First}; +get_ordered_snmp_key(Prev, [_|R]) -> + get_ordered_snmp_key(Prev, R); +get_ordered_snmp_key(_, []) -> + endOfTable. + +%%%%%%%%%% + +snmp_get_mnesia_key(Tab, RowIndex) when is_atom(Tab), Tab /= schema, is_list(RowIndex) -> + case get(mnesia_activity_state) of + {_Mod, Tid, Ts} when element(1, Tid) =:= tid -> + Res = dirty_rpc(Tab,mnesia_snmp_hook,get_mnesia_key,[Tab,RowIndex]), + snmp_filter_key(Res, RowIndex, Tab, Ts#tidstore.store); + _ -> + dirty_rpc(Tab, mnesia_snmp_hook, get_mnesia_key, [Tab, RowIndex]) + end; +snmp_get_mnesia_key(Tab, _RowIndex) -> + abort({bad_type, Tab}). + +snmp_oid_to_mnesia_key(RowIndex, Tab) -> + case mnesia_snmp_hook:oid_to_key(RowIndex, Tab) of + unknown -> %% Contains fix_string needs lookup + case dirty_rpc(Tab,mnesia_snmp_hook,get_mnesia_key,[Tab,RowIndex]) of + {ok, MnesiaKey} -> MnesiaKey; + undefined -> unknown + end; + MnesiaKey -> + MnesiaKey + end. + +snmp_filter_key(Res = {ok,Key}, _RowIndex, Tab, Store) -> + case ?ets_lookup(Store, {Tab,Key}) of + [] -> Res; + Ops -> + case lists:last(Ops) of + {_, _, write} -> Res; + _ -> undefined + end + end; +snmp_filter_key(undefined, RowIndex, Tab, Store) -> + case mnesia_snmp_hook:oid_to_key(RowIndex, Tab) of + unknown -> %% Arrg contains fix_string + Ops = find_ops(Store, Tab, val({Tab, wild_pattern})), + SnmpType = val({Tab,snmp}), + Fix = fun({{_,Key},_,Op}, Res) -> + case mnesia_snmp_hook:key_to_oid(Tab,Key,SnmpType) of + RowIndex -> + case Op of + write -> {ok, Key}; + _ -> + undefined + end; + _ -> + Res + end + end, + lists:foldl(Fix, undefined, Ops); + Key -> + case ?ets_lookup(Store, {Tab,Key}) of + [] -> + undefined; + Ops -> + case lists:last(Ops) of + {_, _, write} -> {ok, Key}; + _ -> undefined + end + end + end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Textfile access + +load_textfile(F) -> + mnesia_text:load_textfile(F). +dump_to_textfile(F) -> + mnesia_text:dump_to_textfile(F). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% QLC Handles +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +table(Tab) -> + table(Tab, []). +table(Tab,Opts) -> + {[Trav,Lock,NObjects],QlcOptions0} = + qlc_opts(Opts,[{traverse,select},{lock,read},{n_objects,100}]), + TF = case Trav of + {select,Ms} -> + fun() -> qlc_select(select(Tab,Ms,NObjects,Lock)) end; + select -> + fun(Ms) -> qlc_select(select(Tab,Ms,NObjects,Lock)) end; + _ -> + erlang:error({badarg, {Trav,[Tab, Opts]}}) + end, + Pre = fun(Arg) -> pre_qlc(Arg, Tab) end, + Post = fun() -> post_qlc(Tab) end, + Info = fun(Tag) -> qlc_info(Tab, Tag) end, + ParentFun = fun() -> + {mnesia_activity, mnesia:get_activity_id()} + end, + Lookup = + case Trav of + {select, _} -> []; + _ -> + LFun = fun(2, Keys) -> + Read = fun(Key) -> read(Tab,Key,Lock) end, + lists:flatmap(Read, Keys); + (Index,Keys) -> + IdxRead = fun(Key) -> index_read(Tab,Key,Index) end, + lists:flatmap(IdxRead, Keys) + end, + [{lookup_fun, LFun}] + end, + MFA = fun(Type) -> qlc_format(Type, Tab, NObjects, Lock, Opts) end, + QlcOptions = [{pre_fun, Pre}, {post_fun, Post}, + {info_fun, Info}, {parent_fun, ParentFun}, + {format_fun, MFA}|Lookup] ++ QlcOptions0, + qlc:table(TF, QlcOptions). + +pre_qlc(Opts, Tab) -> + {_,Tid,_} = + case get(mnesia_activity_state) of + undefined -> + case lists:keysearch(parent_value, 1, Opts) of + {value, {parent_value,{mnesia_activity,undefined}}} -> + abort(no_transaction); + {value, {parent_value,{mnesia_activity,Aid}}} -> + {value,{stop_fun,Stop}} = + lists:keysearch(stop_fun,1,Opts), + put_activity_id(Aid,Stop), + Aid; + _ -> + abort(no_transaction) + end; + Else -> + Else + end, + case element(1,Tid) of + tid -> ok; + _ -> + case ?catch_val({Tab, setorbag}) of + ordered_set -> ok; + _ -> + dirty_rpc(Tab, mnesia_tm, fixtable, [Tab,true,self()]), + ok + end + end. + +post_qlc(Tab) -> + case catch get(mnesia_activity_state) of + {_,#tid{},_} -> ok; + _ -> + case ?catch_val({Tab, setorbag}) of + ordered_set -> + ok; + _ -> + dirty_rpc(Tab, mnesia_tm, fixtable, [Tab,false,self()]), + ok + end + end. + +qlc_select('$end_of_table') -> []; +qlc_select({[], Cont}) -> qlc_select(select(Cont)); +qlc_select({Objects, Cont}) -> + Objects ++ fun() -> qlc_select(select(Cont)) end. + +qlc_opts(Opts, Keys) when is_list(Opts) -> + qlc_opts(Opts, Keys, []); +qlc_opts(Option, Keys) -> + qlc_opts([Option], Keys, []). + +qlc_opts(Opts, [{Key,Def}|Keys], Acc) -> + Opt = case lists:keysearch(Key,1, Opts) of + {value, {Key,Value}} -> + Value; + false -> + Def + end, + qlc_opts(lists:keydelete(Key,1,Opts),Keys,[Opt|Acc]); +qlc_opts(Opts,[],Acc) -> {lists:reverse(Acc),Opts}. + +qlc_info(Tab, num_of_objects) -> + dirty_rpc(Tab, ?MODULE, raw_table_info, [Tab, size]); +qlc_info(_, keypos) -> 2; +qlc_info(_, is_unique_objects) -> true; +qlc_info(Tab, is_unique_keys) -> + case val({Tab, type}) of + set -> true; + ordered_set -> true; + _ -> false + end; +qlc_info(Tab, is_sorted_objects) -> + case val({Tab, type}) of + ordered_set -> + case ?catch_val({Tab, frag_hash}) of + {'EXIT', _} -> + ascending; + _ -> %% Fragmented tables are not ordered + no + end; + _ -> no + end; +qlc_info(Tab, indices) -> + val({Tab,index}); +qlc_info(_Tab, _) -> + undefined. + +qlc_format(all, Tab, NObjects, Lock, Opts) -> + {?MODULE, table, [Tab,[{n_objects, NObjects}, {lock,Lock}|Opts]]}; +qlc_format({match_spec, Ms}, Tab, NObjects, Lock, Opts) -> + {?MODULE, table, [Tab,[{traverse,{select,Ms}},{n_objects, NObjects}, {lock,Lock}|Opts]]}; +qlc_format({lookup, 2, Keys}, Tab, _, Lock, _) -> + io_lib:format("lists:flatmap(fun(V) -> " + "~w:read(~w, V, ~w) end, ~w)", + [?MODULE, Tab, Lock, Keys]); +qlc_format({lookup, Index,Keys}, Tab, _, _, _) -> + io_lib:format("lists:flatmap(fun(V) -> " + "~w:index_read(~w, V, ~w) end, ~w)", + [?MODULE, Tab, Index, Keys]). + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +do_fixtable(Tab, #tidstore{store=Store}) -> + do_fixtable(Tab,Store); +do_fixtable(Tab, Store) -> + case ?catch_val({Tab, setorbag}) of + ordered_set -> + ok; + _ -> + case ?ets_match_object(Store, {fixtable, {Tab, '_'}}) of + [] -> + Node = dirty_rpc(Tab, mnesia_tm, fixtable, [Tab,true,self()]), + ?ets_insert(Store, {fixtable, {Tab, Node}}); + _ -> + ignore + end, + ok + end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Mnemosyne exclusive + +get_activity_id() -> + get(mnesia_activity_state). + +put_activity_id(Activity) -> + mnesia_tm:put_activity_id(Activity). +put_activity_id(Activity,Fun) -> + mnesia_tm:put_activity_id(Activity,Fun). diff --git a/lib/mnesia/src/mnesia.hrl b/lib/mnesia/src/mnesia.hrl new file mode 100644 index 0000000000..d488d9364a --- /dev/null +++ b/lib/mnesia/src/mnesia.hrl @@ -0,0 +1,121 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 1996-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +%% + +-define(APPLICATION, mnesia). + +-define(ets_lookup(Tab, Key), ets:lookup(Tab, Key)). +-define(ets_lookup_element(Tab, Key, Pos), ets:lookup_element(Tab, Key, Pos)). +-define(ets_insert(Tab, Rec), ets:insert(Tab, Rec)). +-define(ets_delete(Tab, Key), ets:delete(Tab, Key)). +-define(ets_match_delete(Tab, Pat), ets:match_delete(Tab, Pat)). +-define(ets_match_object(Tab, Pat), ets:match_object(Tab, Pat)). +-define(ets_match(Tab, Pat), ets:match(Tab, Pat)). +-define(ets_info(Tab, Item), ets:info(Tab, Item)). +-define(ets_update_counter(Tab, Key, Incr), ets:update_counter(Tab, Key, Incr)). +-define(ets_first(Tab), ets:first(Tab)). +-define(ets_next(Tab, Key), ets:next(Tab, Key)). +-define(ets_last(Tab), ets:last(Tab)). +-define(ets_prev(Tab, Key), ets:prev(Tab, Key)). +-define(ets_slot(Tab, Pos), ets:slot(Tab, Pos)). +-define(ets_new_table(Tab, Props), ets:new(Tab, Props)). +-define(ets_delete_table(Tab), ets:delete(Tab)). +-define(ets_fixtable(Tab, Bool), ets:fixtable(Tab, Bool)). + +-define(catch_val(Var), (catch ?ets_lookup_element(mnesia_gvar, Var, 2))). + +%% It's important that counter is first, since we compare tid's + +-record(tid, + {counter, %% serial no for tid + pid}). %% owner of tid + + +-record(tidstore, + {store, %% current ets table for tid + up_stores = [], %% list of upper layer stores for nested trans + level = 1}). %% transaction level + +-define(unique_cookie, {erlang:now(), node()}). + +-record(cstruct, {name, % Atom + type = set, % set | bag + ram_copies = [], % [Node] + disc_copies = [], % [Node] + disc_only_copies = [], % [Node] + load_order = 0, % Integer + access_mode = read_write, % read_write | read_only + index = [], % [Integer] + snmp = [], % Snmp Ustruct + local_content = false, % true | false + record_name = {bad_record_name}, % Atom (Default = Name) + attributes = [key, val], % [Atom] + user_properties = [], % [Record] + frag_properties = [], % [{Key, Val] + cookie = ?unique_cookie, % Term + version = {{2, 0}, []}}). % {{Integer, Integer}, [Node]} + +%% Record for the head structure in Mnesia's log files +%% +%% The definition of this record may *NEVER* be changed +%% since it may be written to very old backup files. +%% By holding this record definition stable we can be +%% able to comprahend backups from timepoint 0. It also +%% allows us to use the backup format as an interchange +%% format between Mnesia releases. + +-record(log_header,{log_kind, + log_version, + mnesia_version, + node, + now}). + +%% Commit records stored in the transaction log +-record(commit, {node, + decision, % presume_commit | Decision + ram_copies = [], + disc_copies = [], + disc_only_copies = [], + snmp = [], + schema_ops = [] + }). + +-record(decision, {tid, + outcome, % presume_abort | committed + disc_nodes, + ram_nodes}). + +%% Maybe cyclic wait +-record(cyclic, {node = node(), + oid, % {Tab, Key} + op, % read | write + lock, % read | write + lucky + }). + +%% Managing conditional debug functions + +-ifdef(debug). + -define(eval_debug_fun(I, C), + mnesia_lib:eval_debug_fun(I, C, ?FILE, ?LINE)). +-else. + -define(eval_debug_fun(I, C), ok). +-endif. + diff --git a/lib/mnesia/src/mnesia_backup.erl b/lib/mnesia/src/mnesia_backup.erl new file mode 100644 index 0000000000..f372ca0be5 --- /dev/null +++ b/lib/mnesia/src/mnesia_backup.erl @@ -0,0 +1,201 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 1996-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +%% + +%%-behaviour(mnesia_backup). +%0 + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% This module contains one implementation of callback functions +%% used by Mnesia at backup and restore. The user may however +%% write an own module the same interface as mnesia_backup and +%% configure Mnesia so the alternate module performs the actual +%% accesses to the backup media. This means that the user may put +%% the backup on medias that Mnesia does not know about, possibly +%% on hosts where Erlang is not running. +%% +%% The OpaqueData argument is never interpreted by other parts of +%% Mnesia. It is the property of this module. Alternate implementations +%% of this module may have different interpretations of OpaqueData. +%% The OpaqueData argument given to open_write/1 and open_read/1 +%% are forwarded directly from the user. +%% +%% All functions must return {ok, NewOpaqueData} or {error, Reason}. +%% +%% The NewOpaqueData arguments returned by backup callback functions will +%% be given as input when the next backup callback function is invoked. +%% If any return value does not match {ok, _} the backup will be aborted. +%% +%% The NewOpaqueData arguments returned by restore callback functions will +%% be given as input when the next restore callback function is invoked +%% If any return value does not match {ok, _} the restore will be aborted. +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +-module(mnesia_backup). + +-include_lib("kernel/include/file.hrl"). + +-export([ + %% Write access + open_write/1, + write/2, + commit_write/1, + abort_write/1, + + %% Read access + open_read/1, + read/1, + close_read/1 + ]). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Backup callback interface +-record(backup, {tmp_file, file, file_desc}). + +%% Opens backup media for write +%% +%% Returns {ok, OpaqueData} or {error, Reason} +open_write(OpaqueData) -> + File = OpaqueData, + Tmp = lists:concat([File,".BUPTMP"]), + file:delete(Tmp), + file:delete(File), + case disk_log:open([{name, make_ref()}, + {file, Tmp}, + {repair, false}, + {linkto, self()}]) of + {ok, Fd} -> + {ok, #backup{tmp_file = Tmp, file = File, file_desc = Fd}}; + {error, Reason} -> + {error, Reason} + end. + +%% Writes BackupItems to the backup media +%% +%% Returns {ok, OpaqueData} or {error, Reason} +write(OpaqueData, BackupItems) -> + B = OpaqueData, + case disk_log:log_terms(B#backup.file_desc, BackupItems) of + ok -> + {ok, B}; + {error, Reason} -> + abort_write(B), + {error, Reason} + end. + +%% Closes the backup media after a successful backup +%% +%% Returns {ok, ReturnValueToUser} or {error, Reason} +commit_write(OpaqueData) -> + B = OpaqueData, + case disk_log:sync(B#backup.file_desc) of + ok -> + case disk_log:close(B#backup.file_desc) of + ok -> + case file:rename(B#backup.tmp_file, B#backup.file) of + ok -> + {ok, B#backup.file}; + {error, Reason} -> + {error, Reason} + end; + {error, Reason} -> + {error, Reason} + end; + {error, Reason} -> + {error, Reason} + end. + +%% Closes the backup media after an interrupted backup +%% +%% Returns {ok, ReturnValueToUser} or {error, Reason} +abort_write(BackupRef) -> + Res = disk_log:close(BackupRef#backup.file_desc), + file:delete(BackupRef#backup.tmp_file), + case Res of + ok -> + {ok, BackupRef#backup.file}; + {error, Reason} -> + {error, Reason} + end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Restore callback interface + +-record(restore, {file, file_desc, cont}). + +%% Opens backup media for read +%% +%% Returns {ok, OpaqueData} or {error, Reason} +open_read(OpaqueData) -> + File = OpaqueData, + case file:read_file_info(File) of + {error, Reason} -> + {error, Reason}; + _FileInfo -> %% file exists + case disk_log:open([{file, File}, + {name, make_ref()}, + {repair, false}, + {mode, read_only}, + {linkto, self()}]) of + {ok, Fd} -> + {ok, #restore{file = File, file_desc = Fd, cont = start}}; + {repaired, Fd, _, {badbytes, 0}} -> + {ok, #restore{file = File, file_desc = Fd, cont = start}}; + {repaired, Fd, _, _} -> + {ok, #restore{file = File, file_desc = Fd, cont = start}}; + {error, Reason} -> + {error, Reason} + end + end. + +%% Reads BackupItems from the backup media +%% +%% Returns {ok, OpaqueData, BackupItems} or {error, Reason} +%% +%% BackupItems == [] is interpreted as eof +read(OpaqueData) -> + R = OpaqueData, + Fd = R#restore.file_desc, + case disk_log:chunk(Fd, R#restore.cont) of + {error, Reason} -> + {error, {"Possibly truncated", Reason}}; + eof -> + {ok, R, []}; + {Cont, []} -> + read(R#restore{cont = Cont}); + {Cont, BackupItems, _BadBytes} -> + {ok, R#restore{cont = Cont}, BackupItems}; + {Cont, BackupItems} -> + {ok, R#restore{cont = Cont}, BackupItems} + end. + +%% Closes the backup media after restore +%% +%% Returns {ok, ReturnValueToUser} or {error, Reason} +close_read(OpaqueData) -> + R = OpaqueData, + case disk_log:close(R#restore.file_desc) of + ok -> {ok, R#restore.file}; + {error, Reason} -> {error, Reason} + end. +%0 + diff --git a/lib/mnesia/src/mnesia_bup.erl b/lib/mnesia/src/mnesia_bup.erl new file mode 100644 index 0000000000..37a8258d74 --- /dev/null +++ b/lib/mnesia/src/mnesia_bup.erl @@ -0,0 +1,1186 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 1996-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +%% +-module(mnesia_bup). +-export([ + %% Public interface + iterate/4, + read_schema/2, + fallback_bup/0, + fallback_exists/0, + tm_fallback_start/1, + create_schema/1, + install_fallback/1, + install_fallback/2, + uninstall_fallback/0, + uninstall_fallback/1, + traverse_backup/4, + traverse_backup/6, + make_initial_backup/3, + fallback_to_schema/0, + lookup_schema/2, + schema2bup/1, + refresh_cookie/2, + + %% Internal + fallback_receiver/2, + install_fallback_master/2, + uninstall_fallback_master/2, + local_uninstall_fallback/2, + do_traverse_backup/7, + trav_apply/4 + ]). + +-include("mnesia.hrl"). +-import(mnesia_lib, [verbose/2, dbg_out/2]). + +-record(restore, {mode, bup_module, bup_data}). + +-record(fallback_args, {opaque, + scope = global, + module = mnesia_monitor:get_env(backup_module), + use_default_dir = true, + mnesia_dir, + fallback_bup, + fallback_tmp, + skip_tables = [], + keep_tables = [], + default_op = keep_tables + }). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Backup iterator + +%% Reads schema section and iterates over all records in a backup. +%% +%% Fun(BunchOfRecords, Header, Schema, Acc) is applied when a suitable amount +%% of records has been collected. +%% +%% BunchOfRecords will be [] when the iteration is done. +iterate(Mod, Fun, Opaque, Acc) -> + R = #restore{bup_module = Mod, bup_data = Opaque}, + case catch read_schema_section(R) of + {error, Reason} -> + {error, Reason}; + {R2, {Header, Schema, Rest}} -> + case catch iter(R2, Header, Schema, Fun, Acc, Rest) of + {ok, R3, Res} -> + catch safe_apply(R3, close_read, [R3#restore.bup_data]), + {ok, Res}; + {error, Reason} -> + catch safe_apply(R2, close_read, [R2#restore.bup_data]), + {error, Reason}; + {'EXIT', Pid, Reason} -> + catch safe_apply(R2, close_read, [R2#restore.bup_data]), + {error, {'EXIT', Pid, Reason}}; + {'EXIT', Reason} -> + catch safe_apply(R2, close_read, [R2#restore.bup_data]), + {error, {'EXIT', Reason}} + end + end. + +iter(R, Header, Schema, Fun, Acc, []) -> + case safe_apply(R, read, [R#restore.bup_data]) of + {R2, []} -> + Res = Fun([], Header, Schema, Acc), + {ok, R2, Res}; + {R2, BupItems} -> + iter(R2, Header, Schema, Fun, Acc, BupItems) + end; +iter(R, Header, Schema, Fun, Acc, BupItems) -> + Acc2 = Fun(BupItems, Header, Schema, Acc), + iter(R, Header, Schema, Fun, Acc2, []). + +safe_apply(R, write, [_, Items]) when Items =:= [] -> + R; +safe_apply(R, What, Args) -> + Abort = fun(Re) -> abort_restore(R, What, Args, Re) end, + Mod = R#restore.bup_module, + case catch apply(Mod, What, Args) of + {ok, Opaque, Items} when What =:= read -> + {R#restore{bup_data = Opaque}, Items}; + {ok, Opaque} when What =/= read-> + R#restore{bup_data = Opaque}; + {error, Re} -> + Abort(Re); + Re -> + Abort(Re) + end. + +abort_restore(R, What, Args, Reason) -> + Mod = R#restore.bup_module, + Opaque = R#restore.bup_data, + dbg_out("Restore aborted. ~p:~p~p -> ~p~n", + [Mod, What, Args, Reason]), + catch apply(Mod, close_read, [Opaque]), + throw({error, Reason}). + +fallback_to_schema() -> + Fname = fallback_bup(), + fallback_to_schema(Fname). + +fallback_to_schema(Fname) -> + Mod = mnesia_backup, + case read_schema(Mod, Fname) of + {error, Reason} -> + {error, Reason}; + Schema -> + case catch lookup_schema(schema, Schema) of + {error, _} -> + {error, "No schema in fallback"}; + List -> + {ok, fallback, List} + end + end. + +%% Opens Opaque reads schema and then close +read_schema(Mod, Opaque) -> + R = #restore{bup_module = Mod, bup_data = Opaque}, + case catch read_schema_section(R) of + {error, Reason} -> + {error, Reason}; + {R2, {_Header, Schema, _}} -> + catch safe_apply(R2, close_read, [R2#restore.bup_data]), + Schema + end. + +%% Open backup media and extract schema +%% rewind backup media and leave it open +%% Returns {R, {Header, Schema}} +read_schema_section(R) -> + case catch do_read_schema_section(R) of + {'EXIT', Reason} -> + catch safe_apply(R, close_read, [R#restore.bup_data]), + {error, {'EXIT', Reason}}; + {error, Reason} -> + catch safe_apply(R, close_read, [R#restore.bup_data]), + {error, Reason}; + {R2, {H, Schema, Rest}} -> + Schema2 = convert_schema(H#log_header.log_version, Schema), + {R2, {H, Schema2, Rest}} + end. + +do_read_schema_section(R) -> + R2 = safe_apply(R, open_read, [R#restore.bup_data]), + {R3, RawSchema} = safe_apply(R2, read, [R2#restore.bup_data]), + do_read_schema_section(R3, verify_header(RawSchema), []). + +do_read_schema_section(R, {ok, B, C, []}, Acc) -> + case safe_apply(R, read, [R#restore.bup_data]) of + {R2, []} -> + {R2, {B, Acc, []}}; + {R2, RawSchema} -> + do_read_schema_section(R2, {ok, B, C, RawSchema}, Acc) + end; + +do_read_schema_section(R, {ok, B, C, [Head | Tail]}, Acc) + when element(1, Head) =:= schema -> + do_read_schema_section(R, {ok, B, C, Tail}, Acc ++ [Head]); + +do_read_schema_section(R, {ok, B, _C, Rest}, Acc) -> + {R, {B, Acc, Rest}}; + +do_read_schema_section(_R, {error, Reason}, _Acc) -> + {error, Reason}. + +verify_header([H | RawSchema]) when is_record(H, log_header) -> + Current = mnesia_log:backup_log_header(), + if + H#log_header.log_kind =:= Current#log_header.log_kind -> + Versions = ["0.1", "1.1", Current#log_header.log_version], + case lists:member(H#log_header.log_version, Versions) of + true -> + {ok, H, Current, RawSchema}; + false -> + {error, {"Bad header version. Cannot be used as backup.", H}} + end; + true -> + {error, {"Bad kind of header. Cannot be used as backup.", H}} + end; +verify_header(RawSchema) -> + {error, {"Missing header. Cannot be used as backup.", catch hd(RawSchema)}}. + +refresh_cookie(Schema, NewCookie) -> + case lists:keysearch(schema, 2, Schema) of + {value, {schema, schema, List}} -> + Cs = mnesia_schema:list2cs(List), + Cs2 = Cs#cstruct{cookie = NewCookie}, + Item = {schema, schema, mnesia_schema:cs2list(Cs2)}, + lists:keyreplace(schema, 2, Schema, Item); + + false -> + Reason = "No schema found. Cannot be used as backup.", + throw({error, {Reason, Schema}}) + end. + +%% Convert schema items from an external backup +%% If backup format is the latest, no conversion is needed +%% All supported backup formats should have their converters +%% here as separate function clauses. +convert_schema("0.1", Schema) -> + convert_0_1(Schema); +convert_schema("1.1", Schema) -> + %% The new backup format is a pure extension of the old one + Current = mnesia_log:backup_log_header(), + convert_schema(Current#log_header.log_version, Schema); +convert_schema(Latest, Schema) -> + H = mnesia_log:backup_log_header(), + if + H#log_header.log_version =:= Latest -> + Schema; + true -> + Reason = "Bad backup header version. Cannot convert schema.", + throw({error, {Reason, H}}) + end. + +%% Backward compatibility for 0.1 +convert_0_1(Schema) -> + case lists:keysearch(schema, 2, Schema) of + {value, {schema, schema, List}} -> + Schema2 = lists:keydelete(schema, 2, Schema), + Cs = mnesia_schema:list2cs(List), + convert_0_1(Schema2, [], Cs); + false -> + List = mnesia_schema:get_initial_schema(disc_copies, [node()]), + Cs = mnesia_schema:list2cs(List), + convert_0_1(Schema, [], Cs) + end. + +convert_0_1([{schema, cookie, Cookie} | Schema], Acc, Cs) -> + convert_0_1(Schema, Acc, Cs#cstruct{cookie = Cookie}); +convert_0_1([{schema, db_nodes, DbNodes} | Schema], Acc, Cs) -> + convert_0_1(Schema, Acc, Cs#cstruct{disc_copies = DbNodes}); +convert_0_1([{schema, version, Version} | Schema], Acc, Cs) -> + convert_0_1(Schema, Acc, Cs#cstruct{version = Version}); +convert_0_1([{schema, Tab, Def} | Schema], Acc, Cs) -> + Head = + case lists:keysearch(index, 1, Def) of + {value, {index, PosList}} -> + %% Remove the snmp "index" + P = PosList -- [snmp], + Def2 = lists:keyreplace(index, 1, Def, {index, P}), + {schema, Tab, Def2}; + false -> + {schema, Tab, Def} + end, + convert_0_1(Schema, [Head | Acc], Cs); +convert_0_1([Head | Schema], Acc, Cs) -> + convert_0_1(Schema, [Head | Acc], Cs); +convert_0_1([], Acc, Cs) -> + [schema2bup({schema, schema, Cs}) | Acc]. + +%% Returns Val or throw error +lookup_schema(Key, Schema) -> + case lists:keysearch(Key, 2, Schema) of + {value, {schema, Key, Val}} -> Val; + false -> throw({error, {"Cannot lookup", Key}}) + end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Backup compatibility + +%% Convert internal schema items to backup dito +schema2bup({schema, Tab}) -> + {schema, Tab}; +schema2bup({schema, Tab, TableDef}) -> + {schema, Tab, mnesia_schema:cs2list(TableDef)}. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Create schema on the given nodes +%% Requires that old schemas has been deleted +%% Returns ok | {error, Reason} +create_schema([]) -> + create_schema([node()]); +create_schema(Ns) when is_list(Ns) -> + case is_set(Ns) of + true -> + create_schema(Ns, mnesia_schema:ensure_no_schema(Ns)); + false -> + {error, {combine_error, Ns}} + end; +create_schema(Ns) -> + {error, {badarg, Ns}}. + +is_set(List) when is_list(List) -> + ordsets:is_set(lists:sort(List)); +is_set(_) -> + false. + +create_schema(Ns, ok) -> + %% Ensure that we access the intended Mnesia + %% directory. This function may not be called + %% during startup since it will cause the + %% application_controller to get into deadlock + case mnesia_lib:ensure_loaded(?APPLICATION) of + ok -> + case mnesia_monitor:get_env(schema_location) of + ram -> + {error, {has_no_disc, node()}}; + _ -> + case mnesia_schema:opt_create_dir(true, mnesia_lib:dir()) of + {error, What} -> + {error, What}; + ok -> + Mod = mnesia_backup, + Str = mk_str(), + File = mnesia_lib:dir(Str), + file:delete(File), + case catch make_initial_backup(Ns, File, Mod) of + {ok, _Res} -> + case do_install_fallback(File, Mod) of + ok -> + file:delete(File), + ok; + {error, Reason} -> + {error, Reason} + end; + {error, Reason} -> + {error, Reason} + end + end + end; + {error, Reason} -> + {error, Reason} + end; +create_schema(_Ns, {error, Reason}) -> + {error, Reason}; +create_schema(_Ns, Reason) -> + {error, Reason}. + +mk_str() -> + Now = [integer_to_list(I) || I <- tuple_to_list(now())], + lists:concat([node()] ++ Now ++ ".TMP"). + +make_initial_backup(Ns, Opaque, Mod) -> + Schema = [{schema, schema, mnesia_schema:get_initial_schema(disc_copies, Ns)}], + O2 = do_apply(Mod, open_write, [Opaque], Opaque), + O3 = do_apply(Mod, write, [O2, [mnesia_log:backup_log_header()]], O2), + O4 = do_apply(Mod, write, [O3, Schema], O3), + O5 = do_apply(Mod, commit_write, [O4], O4), + {ok, O5}. + +do_apply(_, write, [_, Items], Opaque) when Items =:= [] -> + Opaque; +do_apply(Mod, What, Args, _Opaque) -> + case catch apply(Mod, What, Args) of + {ok, Opaque2} -> Opaque2; + {error, Reason} -> throw({error, Reason}); + {'EXIT', Reason} -> throw({error, {'EXIT', Reason}}) + end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Restore + +%% Restore schema and possibly other tables from a backup +%% and replicate them to the necessary nodes +%% Requires that old schemas has been deleted +%% Returns ok | {error, Reason} +install_fallback(Opaque) -> + install_fallback(Opaque, []). + +install_fallback(Opaque, Args) -> + %% Ensure that we access the intended Mnesia + %% directory. This function may not be called + %% during startup since it will cause the + %% application_controller to get into deadlock + case mnesia_lib:ensure_loaded(?APPLICATION) of + ok -> + do_install_fallback(Opaque, Args); + {error, Reason} -> + {error, Reason} + end. + +do_install_fallback(Opaque, Mod) when is_atom(Mod) -> + do_install_fallback(Opaque, [{module, Mod}]); +do_install_fallback(Opaque, Args) when is_list(Args) -> + case check_fallback_args(Args, #fallback_args{opaque = Opaque}) of + {ok, FA} -> + do_install_fallback(FA); + {error, Reason} -> + {error, Reason} + end; +do_install_fallback(_Opaque, Args) -> + {error, {badarg, Args}}. + +check_fallback_args([Arg | Tail], FA) -> + case catch check_fallback_arg_type(Arg, FA) of + {'EXIT', _Reason} -> + {error, {badarg, Arg}}; + FA2 -> + check_fallback_args(Tail, FA2) + end; +check_fallback_args([], FA) -> + {ok, FA}. + +check_fallback_arg_type(Arg, FA) -> + case Arg of + {scope, global} -> + FA#fallback_args{scope = global}; + {scope, local} -> + FA#fallback_args{scope = local}; + {module, Mod} -> + Mod2 = mnesia_monitor:do_check_type(backup_module, Mod), + FA#fallback_args{module = Mod2}; + {mnesia_dir, Dir} -> + FA#fallback_args{mnesia_dir = Dir, + use_default_dir = false}; + {keep_tables, Tabs} -> + atom_list(Tabs), + FA#fallback_args{keep_tables = Tabs}; + {skip_tables, Tabs} -> + atom_list(Tabs), + FA#fallback_args{skip_tables = Tabs}; + {default_op, keep_tables} -> + FA#fallback_args{default_op = keep_tables}; + {default_op, skip_tables} -> + FA#fallback_args{default_op = skip_tables} + end. + +atom_list([H | T]) when is_atom(H) -> + atom_list(T); +atom_list([]) -> + ok. + +do_install_fallback(FA) -> + Pid = spawn_link(?MODULE, install_fallback_master, [self(), FA]), + Res = + receive + {'EXIT', Pid, Reason} -> % if appl has trapped exit + {error, {'EXIT', Reason}}; + {Pid, Res2} -> + case Res2 of + {ok, _} -> + ok; + {error, Reason} -> + {error, {"Cannot install fallback", Reason}} + end + end, + Res. + +install_fallback_master(ClientPid, FA) -> + process_flag(trap_exit, true), + State = {start, FA}, + Opaque = FA#fallback_args.opaque, + Mod = FA#fallback_args.module, + Res = (catch iterate(Mod, fun restore_recs/4, Opaque, State)), + unlink(ClientPid), + ClientPid ! {self(), Res}, + exit(shutdown). + +restore_recs(_, _, _, stop) -> + throw({error, "restore_recs already stopped"}); + +restore_recs(Recs, Header, Schema, {start, FA}) -> + %% No records in backup + Schema2 = convert_schema(Header#log_header.log_version, Schema), + CreateList = lookup_schema(schema, Schema2), + case catch mnesia_schema:list2cs(CreateList) of + {'EXIT', Reason} -> + throw({error, {"Bad schema in restore_recs", Reason}}); + Cs -> + Ns = get_fallback_nodes(FA, Cs#cstruct.disc_copies), + global:set_lock({{mnesia_table_lock, schema}, self()}, Ns, infinity), + Args = [self(), FA], + Pids = [spawn_link(N, ?MODULE, fallback_receiver, Args) || N <- Ns], + send_fallback(Pids, {start, Header, Schema2}), + Res = restore_recs(Recs, Header, Schema2, Pids), + global:del_lock({{mnesia_table_lock, schema}, self()}, Ns), + Res + end; + +restore_recs([], _Header, _Schema, Pids) -> + send_fallback(Pids, swap), + send_fallback(Pids, stop), + stop; + +restore_recs(Recs, _, _, Pids) -> + send_fallback(Pids, {records, Recs}), + Pids. + +get_fallback_nodes(FA, Ns) -> + This = node(), + case lists:member(This, Ns) of + true -> + case FA#fallback_args.scope of + global -> Ns; + local -> [This] + end; + false -> + throw({error, {"No disc resident schema on local node", Ns}}) + end. + +send_fallback(Pids, Msg) when is_list(Pids), Pids =/= [] -> + lists:foreach(fun(Pid) -> Pid ! {self(), Msg} end, Pids), + rec_answers(Pids, []). + +rec_answers([], Acc) -> + case {lists:keysearch(error, 1, Acc), mnesia_lib:uniq(Acc)} of + {{value, {error, Val}}, _} -> throw({error, Val}); + {_, [SameAnswer]} -> SameAnswer; + {_, Other} -> throw({error, {"Different answers", Other}}) + end; +rec_answers(Pids, Acc) -> + receive + {'EXIT', Pid, stopped} -> + Pids2 = lists:delete(Pid, Pids), + rec_answers(Pids2, [stopped|Acc]); + {'EXIT', Pid, Reason} -> + Pids2 = lists:delete(Pid, Pids), + rec_answers(Pids2, [{error, {'EXIT', Pid, Reason}}|Acc]); + {Pid, Reply} -> + Pids2 = lists:delete(Pid, Pids), + rec_answers(Pids2, [Reply|Acc]) + end. + +fallback_exists() -> + Fname = fallback_bup(), + fallback_exists(Fname). + +fallback_exists(Fname) -> + case mnesia_monitor:use_dir() of + true -> + mnesia_lib:exists(Fname); + false -> + case ?catch_val(active_fallback) of + {'EXIT', _} -> false; + Bool -> Bool + end + end. + +fallback_name() -> "FALLBACK.BUP". +fallback_bup() -> mnesia_lib:dir(fallback_name()). + +fallback_tmp_name() -> "FALLBACK.TMP". +%% fallback_full_tmp_name() -> mnesia_lib:dir(fallback_tmp_name()). + +fallback_receiver(Master, FA) -> + process_flag(trap_exit, true), + + case catch register(mnesia_fallback, self()) of + {'EXIT', _} -> + Reason = {already_exists, node()}, + local_fallback_error(Master, Reason); + true -> + FA2 = check_fallback_dir(Master, FA), + Bup = FA2#fallback_args.fallback_bup, + case mnesia_lib:exists(Bup) of + true -> + Reason2 = {already_exists, node()}, + local_fallback_error(Master, Reason2); + false -> + Mod = mnesia_backup, + Tmp = FA2#fallback_args.fallback_tmp, + R = #restore{mode = replace, + bup_module = Mod, + bup_data = Tmp}, + file:delete(Tmp), + case catch fallback_receiver_loop(Master, R, FA2, schema) of + {error, Reason} -> + local_fallback_error(Master, Reason); + Other -> + exit(Other) + end + end + end. + +local_fallback_error(Master, Reason) -> + Master ! {self(), {error, Reason}}, + unlink(Master), + exit(Reason). + +check_fallback_dir(Master, FA) -> + case mnesia:system_info(schema_location) of + ram -> + Reason = {has_no_disc, node()}, + local_fallback_error(Master, Reason); + _ -> + Dir = check_fallback_dir_arg(Master, FA), + Bup = filename:join([Dir, fallback_name()]), + Tmp = filename:join([Dir, fallback_tmp_name()]), + FA#fallback_args{fallback_bup = Bup, + fallback_tmp = Tmp, + mnesia_dir = Dir} + end. + +check_fallback_dir_arg(Master, FA) -> + case FA#fallback_args.use_default_dir of + true -> + mnesia_lib:dir(); + false when FA#fallback_args.scope =:= local -> + Dir = FA#fallback_args.mnesia_dir, + case catch mnesia_monitor:do_check_type(dir, Dir) of + {'EXIT', _R} -> + Reason = {badarg, {dir, Dir}, node()}, + local_fallback_error(Master, Reason); + AbsDir-> + AbsDir + end; + false when FA#fallback_args.scope =:= global -> + Reason = {combine_error, global, dir, node()}, + local_fallback_error(Master, Reason) + end. + +fallback_receiver_loop(Master, R, FA, State) -> + receive + {Master, {start, Header, Schema}} when State =:= schema -> + Dir = FA#fallback_args.mnesia_dir, + throw_bad_res(ok, mnesia_schema:opt_create_dir(true, Dir)), + R2 = safe_apply(R, open_write, [R#restore.bup_data]), + R3 = safe_apply(R2, write, [R2#restore.bup_data, [Header]]), + BupSchema = [schema2bup(S) || S <- Schema], + R4 = safe_apply(R3, write, [R3#restore.bup_data, BupSchema]), + Master ! {self(), ok}, + fallback_receiver_loop(Master, R4, FA, records); + + {Master, {records, Recs}} when State =:= records -> + R2 = safe_apply(R, write, [R#restore.bup_data, Recs]), + Master ! {self(), ok}, + fallback_receiver_loop(Master, R2, FA, records); + + {Master, swap} when State =/= schema -> + ?eval_debug_fun({?MODULE, fallback_receiver_loop, pre_swap}, []), + safe_apply(R, commit_write, [R#restore.bup_data]), + Bup = FA#fallback_args.fallback_bup, + Tmp = FA#fallback_args.fallback_tmp, + throw_bad_res(ok, file:rename(Tmp, Bup)), + catch mnesia_lib:set(active_fallback, true), + ?eval_debug_fun({?MODULE, fallback_receiver_loop, post_swap}, []), + Master ! {self(), ok}, + fallback_receiver_loop(Master, R, FA, stop); + + {Master, stop} when State =:= stop -> + stopped; + + Msg -> + safe_apply(R, abort_write, [R#restore.bup_data]), + Tmp = FA#fallback_args.fallback_tmp, + file:delete(Tmp), + throw({error, "Unexpected msg fallback_receiver_loop", Msg}) + end. + +throw_bad_res(Expected, Expected) -> Expected; +throw_bad_res(_Expected, {error, Actual}) -> throw({error, Actual}); +throw_bad_res(_Expected, Actual) -> throw({error, Actual}). + +-record(local_tab, {name, + storage_type, + open, + add, + close, + swap, + record_name, + opened}). + +tm_fallback_start(IgnoreFallback) -> + mnesia_schema:lock_schema(), + Res = do_fallback_start(fallback_exists(), IgnoreFallback), + mnesia_schema: unlock_schema(), + case Res of + ok -> ok; + {error, Reason} -> exit(Reason) + end. + +do_fallback_start(false, _IgnoreFallback) -> + ok; +do_fallback_start(true, true) -> + verbose("Ignoring fallback at startup, but leaving it active...~n", []), + mnesia_lib:set(active_fallback, true), + ok; +do_fallback_start(true, false) -> + verbose("Starting from fallback...~n", []), + + BupFile = fallback_bup(), + Mod = mnesia_backup, + LocalTabs = ?ets_new_table(mnesia_local_tables, [set, public, {keypos, 2}]), + case catch iterate(Mod, fun restore_tables/4, BupFile, {start, LocalTabs}) of + {ok, _Res} -> + catch dets:close(schema), + TmpSchema = mnesia_lib:tab2tmp(schema), + DatSchema = mnesia_lib:tab2dat(schema), + AllLT = ?ets_match_object(LocalTabs, '_'), + ?ets_delete_table(LocalTabs), + case file:rename(TmpSchema, DatSchema) of + ok -> + [(LT#local_tab.swap)(LT#local_tab.name, LT) || + LT <- AllLT, LT#local_tab.name =/= schema], + file:delete(BupFile), + ok; + {error, Reason} -> + file:delete(TmpSchema), + {error, {"Cannot start from fallback. Rename error.", Reason}} + end; + {error, Reason} -> + {error, {"Cannot start from fallback", Reason}}; + {'EXIT', Reason} -> + {error, {"Cannot start from fallback", Reason}} + end. + +restore_tables(All=[Rec | Recs], Header, Schema, State={local, LocalTabs, LT}) -> + Tab = element(1, Rec), + if + Tab =:= LT#local_tab.name -> + Key = element(2, Rec), + (LT#local_tab.add)(Tab, Key, Rec, LT), + restore_tables(Recs, Header, Schema, State); + true -> + NewState = {new, LocalTabs}, + restore_tables(All, Header, Schema, NewState) + end; +restore_tables(All=[Rec | Recs], Header, Schema, {new, LocalTabs}) -> + Tab = element(1, Rec), + case ?ets_lookup(LocalTabs, Tab) of + [] -> + State = {not_local, LocalTabs, Tab}, + restore_tables(Recs, Header, Schema, State); + [LT] when is_record(LT, local_tab) -> + State = {local, LocalTabs, LT}, + case LT#local_tab.opened of + true -> ignore; + false -> + (LT#local_tab.open)(Tab, LT), + ?ets_insert(LocalTabs,LT#local_tab{opened=true}) + end, + restore_tables(All, Header, Schema, State) + end; +restore_tables(All=[Rec | Recs], Header, Schema, S = {not_local, LocalTabs, PrevTab}) -> + Tab = element(1, Rec), + if + Tab =:= PrevTab -> + restore_tables(Recs, Header, Schema, S); + true -> + State = {new, LocalTabs}, + restore_tables(All, Header, Schema, State) + end; +restore_tables(Recs, Header, Schema, {start, LocalTabs}) -> + Dir = mnesia_lib:dir(), + OldDir = filename:join([Dir, "OLD_DIR"]), + mnesia_schema:purge_dir(OldDir, []), + mnesia_schema:purge_dir(Dir, [fallback_name()]), + init_dat_files(Schema, LocalTabs), + State = {new, LocalTabs}, + restore_tables(Recs, Header, Schema, State); +restore_tables([], _Header, _Schema, State) -> + State. + +%% Creates all neccessary dat files and inserts +%% the table definitions in the schema table +%% +%% Returns a list of local_tab tuples for all local tables +init_dat_files(Schema, LocalTabs) -> + TmpFile = mnesia_lib:tab2tmp(schema), + Args = [{file, TmpFile}, {keypos, 2}, {type, set}], + case dets:open_file(schema, Args) of % Assume schema lock + {ok, _} -> + create_dat_files(Schema, LocalTabs), + ok = dets:close(schema), + LocalTab = #local_tab{name = schema, + storage_type = disc_copies, + open = undefined, + add = undefined, + close = undefined, + swap = undefined, + record_name = schema, + opened = false}, + ?ets_insert(LocalTabs, LocalTab); + {error, Reason} -> + throw({error, {"Cannot open file", schema, Args, Reason}}) + end. + +create_dat_files([{schema, schema, TabDef} | Tail], LocalTabs) -> + ok = dets:insert(schema, {schema, schema, TabDef}), + create_dat_files(Tail, LocalTabs); +create_dat_files([{schema, Tab, TabDef} | Tail], LocalTabs) -> + TmpFile = mnesia_lib:tab2tmp(Tab), + DatFile = mnesia_lib:tab2dat(Tab), + DclFile = mnesia_lib:tab2dcl(Tab), + DcdFile = mnesia_lib:tab2dcd(Tab), + Expunge = fun() -> + file:delete(DatFile), + file:delete(DclFile), + file:delete(DcdFile) + end, + + mnesia_lib:dets_sync_close(Tab), + file:delete(TmpFile), + Cs = mnesia_schema:list2cs(TabDef), + ok = dets:insert(schema, {schema, Tab, TabDef}), + RecName = Cs#cstruct.record_name, + Storage = mnesia_lib:cs_to_storage_type(node(), Cs), + if + Storage =:= unknown -> + ok = dets:delete(schema, {schema, Tab}), + create_dat_files(Tail, LocalTabs); + Storage =:= disc_only_copies -> + Args = [{file, TmpFile}, {keypos, 2}, + {type, mnesia_lib:disk_type(Tab, Cs#cstruct.type)}], + Open = fun(T, LT) when T =:= LT#local_tab.name -> + case mnesia_lib:dets_sync_open(T, Args) of + {ok, _} -> + ok; + {error, Reason} -> + throw({error, {"Cannot open file", T, Args, Reason}}) + end + end, + Add = fun(T, Key, Rec, LT) when T =:= LT#local_tab.name -> + case Rec of + {_T, Key} -> + ok = dets:delete(T, Key); + (Rec) when T =:= RecName -> + ok = dets:insert(Tab, Rec); + (Rec) -> + Rec2 = setelement(1, Rec, RecName), + ok = dets:insert(T, Rec2) + end + end, + Close = fun(T, LT) when T =:= LT#local_tab.name -> + mnesia_lib:dets_sync_close(T) + end, + Swap = fun(T, LT) when T =:= LT#local_tab.name -> + Expunge(), + case LT#local_tab.opened of + true -> + Close(T,LT); + false -> + Open(T,LT), + Close(T,LT) + end, + case file:rename(TmpFile, DatFile) of + ok -> + ok; + {error, Reason} -> + mnesia_lib:fatal("Cannot rename file ~p -> ~p: ~p~n", + [TmpFile, DatFile, Reason]) + end + end, + LocalTab = #local_tab{name = Tab, + storage_type = Storage, + open = Open, + add = Add, + close = Close, + swap = Swap, + record_name = RecName, + opened = false}, + ?ets_insert(LocalTabs, LocalTab), + create_dat_files(Tail, LocalTabs); + Storage =:= ram_copies; Storage =:= disc_copies -> + Open = fun(T, LT) when T =:= LT#local_tab.name -> + mnesia_log:open_log({?MODULE, T}, + mnesia_log:dcl_log_header(), + TmpFile, + false, + false, + read_write) + end, + Add = fun(T, Key, Rec, LT) when T =:= LT#local_tab.name -> + Log = {?MODULE, T}, + case Rec of + {_T, Key} -> + mnesia_log:append(Log, {{T, Key}, {T, Key}, delete}); + (Rec) when T =:= RecName -> + mnesia_log:append(Log, {{T, Key}, Rec, write}); + (Rec) -> + Rec2 = setelement(1, Rec, RecName), + mnesia_log:append(Log, {{T, Key}, Rec2, write}) + end + end, + Close = fun(T, LT) when T =:= LT#local_tab.name -> + mnesia_log:close_log({?MODULE, T}) + end, + Swap = fun(T, LT) when T =:= LT#local_tab.name -> + Expunge(), + if + Storage =:= ram_copies, LT#local_tab.opened =:= false -> + ok; + true -> + Log = mnesia_log:open_log(fallback_tab, + mnesia_log:dcd_log_header(), + DcdFile, + false), + mnesia_log:close_log(Log), + case LT#local_tab.opened of + true -> + Close(T,LT); + false -> + Open(T,LT), + Close(T,LT) + end, + case file:rename(TmpFile, DclFile) of + ok -> + ok; + {error, Reason} -> + mnesia_lib:fatal("Cannot rename file ~p -> ~p: ~p~n", + [TmpFile, DclFile, Reason]) + end + end + end, + LocalTab = #local_tab{name = Tab, + storage_type = Storage, + open = Open, + add = Add, + close = Close, + swap = Swap, + record_name = RecName, + opened = false + }, + ?ets_insert(LocalTabs, LocalTab), + create_dat_files(Tail, LocalTabs) + end; +create_dat_files([{schema, Tab} | Tail], LocalTabs) -> + ?ets_delete(LocalTabs, Tab), + ok = dets:delete(schema, {schema, Tab}), + TmpFile = mnesia_lib:tab2tmp(Tab), + mnesia_lib:dets_sync_close(Tab), + file:delete(TmpFile), + create_dat_files(Tail, LocalTabs); +create_dat_files([], _LocalTabs) -> + ok. + +uninstall_fallback() -> + uninstall_fallback([{scope, global}]). + +uninstall_fallback(Args) -> + case check_fallback_args(Args, #fallback_args{}) of + {ok, FA} -> + do_uninstall_fallback(FA); + {error, Reason} -> + {error, Reason} + end. + +do_uninstall_fallback(FA) -> + %% Ensure that we access the intended Mnesia + %% directory. This function may not be called + %% during startup since it will cause the + %% application_controller to get into deadlock + case mnesia_lib:ensure_loaded(?APPLICATION) of + ok -> + Pid = spawn_link(?MODULE, uninstall_fallback_master, [self(), FA]), + receive + {'EXIT', Pid, Reason} -> % if appl has trapped exit + {error, {'EXIT', Reason}}; + {Pid, Res} -> + Res + end; + {error, Reason} -> + {error, Reason} + end. + +uninstall_fallback_master(ClientPid, FA) -> + process_flag(trap_exit, true), + + FA2 = check_fallback_dir(ClientPid, FA), % May exit + Bup = FA2#fallback_args.fallback_bup, + case fallback_to_schema(Bup) of + {ok, fallback, List} -> + Cs = mnesia_schema:list2cs(List), + case catch get_fallback_nodes(FA, Cs#cstruct.disc_copies) of + Ns when is_list(Ns) -> + do_uninstall(ClientPid, Ns, FA); + {error, Reason} -> + local_fallback_error(ClientPid, Reason) + end; + {error, Reason} -> + local_fallback_error(ClientPid, Reason) + end. + +do_uninstall(ClientPid, Ns, FA) -> + Args = [self(), FA], + global:set_lock({{mnesia_table_lock, schema}, self()}, Ns, infinity), + Pids = [spawn_link(N, ?MODULE, local_uninstall_fallback, Args) || N <- Ns], + Res = do_uninstall(ClientPid, Pids, [], [], ok), + global:del_lock({{mnesia_table_lock, schema}, self()}, Ns), + ClientPid ! {self(), Res}, + unlink(ClientPid), + exit(shutdown). + +do_uninstall(ClientPid, [Pid | Pids], GoodPids, BadNodes, Res) -> + receive + %% {'EXIT', ClientPid, _} -> + %% client_exit; + {'EXIT', Pid, Reason} -> + BadNode = node(Pid), + BadRes = {error, {"Uninstall fallback", BadNode, Reason}}, + do_uninstall(ClientPid, Pids, GoodPids, [BadNode | BadNodes], BadRes); + {Pid, {error, Reason}} -> + BadNode = node(Pid), + BadRes = {error, {"Uninstall fallback", BadNode, Reason}}, + do_uninstall(ClientPid, Pids, GoodPids, [BadNode | BadNodes], BadRes); + {Pid, started} -> + do_uninstall(ClientPid, Pids, [Pid | GoodPids], BadNodes, Res) + end; +do_uninstall(ClientPid, [], GoodPids, [], ok) -> + lists:foreach(fun(Pid) -> Pid ! {self(), do_uninstall} end, GoodPids), + rec_uninstall(ClientPid, GoodPids, ok); +do_uninstall(_ClientPid, [], GoodPids, BadNodes, BadRes) -> + lists:foreach(fun(Pid) -> exit(Pid, shutdown) end, GoodPids), + {error, {node_not_running, BadNodes, BadRes}}. + +local_uninstall_fallback(Master, FA) -> + %% Don't trap exit + + register(mnesia_fallback, self()), % May exit + FA2 = check_fallback_dir(Master, FA), % May exit + Master ! {self(), started}, + + receive + {Master, do_uninstall} -> + ?eval_debug_fun({?MODULE, uninstall_fallback2, pre_delete}, []), + catch mnesia_lib:set(active_fallback, false), + Tmp = FA2#fallback_args.fallback_tmp, + Bup = FA2#fallback_args.fallback_bup, + file:delete(Tmp), + Res = + case fallback_exists(Bup) of + true -> file:delete(Bup); + false -> ok + end, + ?eval_debug_fun({?MODULE, uninstall_fallback2, post_delete}, []), + Master ! {self(), Res}, + unlink(Master), + exit(normal) + end. + +rec_uninstall(ClientPid, [Pid | Pids], AccRes) -> + receive + %% {'EXIT', ClientPid, _} -> + %% exit(shutdown); + {'EXIT', Pid, R} -> + Reason = {node_not_running, {node(Pid), R}}, + rec_uninstall(ClientPid, Pids, {error, Reason}); + {Pid, ok} -> + rec_uninstall(ClientPid, Pids, AccRes); + {Pid, BadRes} -> + rec_uninstall(ClientPid, Pids, BadRes) + end; +rec_uninstall(ClientPid, [], Res) -> + ClientPid ! {self(), Res}, + unlink(ClientPid), + exit(normal). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Backup traversal + +%% Iterate over a backup and produce a new backup. +%% Fun(BackupItem, Acc) is applied for each BackupItem. +%% +%% Valid BackupItems are: +%% +%% {schema, Tab} Table to be deleted +%% {schema, Tab, CreateList} Table to be created, CreateList may be empty +%% {schema, db_nodes, DbNodes}List of nodes, defaults to [node()] OLD +%% {schema, version, Version} Schema version OLD +%% {schema, cookie, Cookie} Unique schema cookie OLD +%% {Tab, Key} Oid for record to be deleted +%% Record Record to be inserted. +%% +%% The Fun must return a tuple {BackupItems, NewAcc} +%% where BackupItems is a list of valid BackupItems and +%% NewAcc is a new accumulator value. Once BackupItems +%% that not are schema related has been returned, no more schema +%% items may be returned. The schema related items must always be +%% first in the backup. +%% +%% If TargetMod =:= read_only, no new backup will be created. +%% +%% Opening of the source media will be performed by +%% to SourceMod:open_read(Source) +%% +%% Opening of the target media will be performed by +%% to TargetMod:open_write(Target) +traverse_backup(Source, Target, Fun, Acc) -> + Mod = mnesia_monitor:get_env(backup_module), + traverse_backup(Source, Mod, Target, Mod, Fun, Acc). + +traverse_backup(Source, SourceMod, Target, TargetMod, Fun, Acc) -> + Args = [self(), Source, SourceMod, Target, TargetMod, Fun, Acc], + Pid = spawn_link(?MODULE, do_traverse_backup, Args), + receive + {'EXIT', Pid, Reason} -> + {error, {"Backup traversal crashed", Reason}}; + {iter_done, Pid, Res} -> + Res + end. + +do_traverse_backup(ClientPid, Source, SourceMod, Target, TargetMod, Fun, Acc) -> + process_flag(trap_exit, true), + Iter = + if + TargetMod =/= read_only -> + case catch do_apply(TargetMod, open_write, [Target], Target) of + {error, Error} -> + unlink(ClientPid), + ClientPid ! {iter_done, self(), {error, Error}}, + exit(Error); + Else -> Else + end; + true -> + ignore + end, + A = {start, Fun, Acc, TargetMod, Iter}, + Res = + case iterate(SourceMod, fun trav_apply/4, Source, A) of + {ok, {iter, _, Acc2, _, Iter2}} when TargetMod =/= read_only -> + case catch do_apply(TargetMod, commit_write, [Iter2], Iter2) of + {error, Reason} -> + {error, Reason}; + _ -> + {ok, Acc2} + end; + {ok, {iter, _, Acc2, _, _}} -> + {ok, Acc2}; + {error, Reason} when TargetMod =/= read_only-> + catch do_apply(TargetMod, abort_write, [Iter], Iter), + {error, {"Backup traversal failed", Reason}}; + {error, Reason} -> + {error, {"Backup traversal failed", Reason}} + end, + unlink(ClientPid), + ClientPid ! {iter_done, self(), Res}. + +trav_apply(Recs, _Header, _Schema, {iter, Fun, Acc, Mod, Iter}) -> + {NewRecs, Acc2} = filter_foldl(Fun, Acc, Recs), + if + Mod =/= read_only, NewRecs =/= [] -> + Iter2 = do_apply(Mod, write, [Iter, NewRecs], Iter), + {iter, Fun, Acc2, Mod, Iter2}; + true -> + {iter, Fun, Acc2, Mod, Iter} + end; +trav_apply(Recs, Header, Schema, {start, Fun, Acc, Mod, Iter}) -> + Iter2 = + if + Mod =/= read_only -> + do_apply(Mod, write, [Iter, [Header]], Iter); + true -> + Iter + end, + TravAcc = trav_apply(Schema, Header, Schema, {iter, Fun, Acc, Mod, Iter2}), + trav_apply(Recs, Header, Schema, TravAcc). + +filter_foldl(Fun, Acc, [Head|Tail]) -> + case Fun(Head, Acc) of + {HeadItems, HeadAcc} when is_list(HeadItems) -> + {TailItems, TailAcc} = filter_foldl(Fun, HeadAcc, Tail), + {HeadItems ++ TailItems, TailAcc}; + Other -> + throw({error, {"Fun must return a list", Other}}) + end; +filter_foldl(_Fun, Acc, []) -> + {[], Acc}. + diff --git a/lib/mnesia/src/mnesia_checkpoint.erl b/lib/mnesia/src/mnesia_checkpoint.erl new file mode 100644 index 0000000000..eb8fe38908 --- /dev/null +++ b/lib/mnesia/src/mnesia_checkpoint.erl @@ -0,0 +1,1295 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 1996-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +%% +-module(mnesia_checkpoint). + +%% TM callback interface +-export([ + tm_add_copy/2, + tm_change_table_copy_type/3, + tm_del_copy/2, + tm_mnesia_down/1, + tm_prepare/1, + tm_retain/4, + tm_retain/5, + tm_enter_pending/1, + tm_enter_pending/3, + tm_exit_pending/1, + convert_cp_record/1 + ]). + +%% Public interface +-export([ + activate/1, + checkpoints/0, + deactivate/1, + deactivate/2, + iterate/6, + most_local_node/2, + really_retain/2, + stop/0, + stop_iteration/1, + tables_and_cookie/1 + ]). + +%% Internal +-export([ + call/2, + cast/2, + init/1, + remote_deactivate/1, + start/1 + ]). + +%% sys callback interface +-export([ + system_code_change/4, + system_continue/3, + system_terminate/4 + ]). + +-include("mnesia.hrl"). +-import(mnesia_lib, [add/2, del/2, set/2, unset/1]). +-import(mnesia_lib, [dbg_out/2]). + +-record(checkpoint_args, {name = {now(), node()}, + allow_remote = true, + ram_overrides_dump = false, + nodes = [], + node = node(), + now = now(), + cookie = ?unique_cookie, + min = [], + max = [], + pending_tab, + wait_for_old, % Initially undefined then List + is_activated = false, + ignore_new = [], + retainers = [], + iterators = [], + supervisor, + pid + }). + +%% Old record definition +-record(checkpoint, {name, + allow_remote, + ram_overrides_dump, + nodes, + node, + now, + min, + max, + pending_tab, + wait_for_old, + is_activated, + ignore_new, + retainers, + iterators, + supervisor, + pid + }). + +-record(retainer, {cp_name, tab_name, store, writers = [], really_retain = true}). + +-record(iter, {tab_name, oid_tab, main_tab, retainer_tab, source, val, pid}). + +-record(pending, {tid, disc_nodes = [], ram_nodes = []}). + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% TM callback functions + +stop() -> + lists:foreach(fun(Name) -> call(Name, stop) end, + checkpoints()), + ok. + +tm_prepare(Cp) when is_record(Cp, checkpoint_args) -> + Name = Cp#checkpoint_args.name, + case lists:member(Name, checkpoints()) of + false -> + start_retainer(Cp); + true -> + {error, {already_exists, Name, node()}} + end; +tm_prepare(Cp) when is_record(Cp, checkpoint) -> + %% Node with old protocol sent an old checkpoint record + %% and we have to convert it + case convert_cp_record(Cp) of + {ok, NewCp} -> + tm_prepare(NewCp); + {error, Reason} -> + {error, Reason} + end. + +tm_mnesia_down(Node) -> + lists:foreach(fun(Name) -> cast(Name, {mnesia_down, Node}) end, + checkpoints()). + +%% Returns pending +tm_enter_pending(Tid, DiscNs, RamNs) -> + Pending = #pending{tid = Tid, disc_nodes = DiscNs, ram_nodes = RamNs}, + tm_enter_pending(Pending). + +tm_enter_pending(Pending) -> + PendingTabs = val(pending_checkpoints), + tm_enter_pending(PendingTabs, Pending). + +tm_enter_pending([], Pending) -> + Pending; +tm_enter_pending([Tab | Tabs], Pending) -> + catch ?ets_insert(Tab, Pending), + tm_enter_pending(Tabs, Pending). + +tm_exit_pending(Tid) -> + Pids = val(pending_checkpoint_pids), + tm_exit_pending(Pids, Tid). + +tm_exit_pending([], Tid) -> + Tid; +tm_exit_pending([Pid | Pids], Tid) -> + Pid ! {self(), {exit_pending, Tid}}, + tm_exit_pending(Pids, Tid). + +enter_still_pending([Tid | Tids], Tab) -> + ?ets_insert(Tab, #pending{tid = Tid}), + enter_still_pending(Tids, Tab); +enter_still_pending([], _Tab) -> + ok. + + +%% Looks up checkpoints for functions in mnesia_tm. +tm_retain(Tid, Tab, Key, Op) -> + case val({Tab, commit_work}) of + [{checkpoints, Checkpoints} | _ ] -> + tm_retain(Tid, Tab, Key, Op, Checkpoints); + _ -> + undefined + end. + +tm_retain(Tid, Tab, Key, Op, Checkpoints) -> + case Op of + clear_table -> + OldRecs = mnesia_lib:db_match_object(Tab, '_'), + send_group_retain(OldRecs, Checkpoints, Tid, Tab, []), + OldRecs; + _ -> + OldRecs = mnesia_lib:db_get(Tab, Key), + send_retain(Checkpoints, {retain, Tid, Tab, Key, OldRecs}), + OldRecs + end. + +send_group_retain([Rec | Recs], Checkpoints, Tid, Tab, [PrevRec | PrevRecs]) + when element(2, Rec) /= element(2, PrevRec) -> + Key = element(2, PrevRec), + OldRecs = lists:reverse([PrevRec | PrevRecs]), + send_retain(Checkpoints, {retain, Tid, Tab, Key, OldRecs}), + send_group_retain(Recs, Checkpoints, Tid, Tab, [Rec]); +send_group_retain([Rec | Recs], Checkpoints, Tid, Tab, Acc) -> + send_group_retain(Recs, Checkpoints, Tid, Tab, [Rec | Acc]); +send_group_retain([], Checkpoints, Tid, Tab, [PrevRec | PrevRecs]) -> + Key = element(2, PrevRec), + OldRecs = lists:reverse([PrevRec | PrevRecs]), + send_retain(Checkpoints, {retain, Tid, Tab, Key, OldRecs}), + ok; +send_group_retain([], _Checkpoints, _Tid, _Tab, []) -> + ok. + +send_retain([Name | Names], Msg) -> + cast(Name, Msg), + send_retain(Names, Msg); +send_retain([], _Msg) -> + ok. + +tm_add_copy(Tab, Node) when Node /= node() -> + case val({Tab, commit_work}) of + [{checkpoints, Checkpoints} | _ ] -> + Fun = fun(Name) -> call(Name, {add_copy, Tab, Node}) end, + map_call(Fun, Checkpoints, ok); + _ -> + ok + end. + +tm_del_copy(Tab, Node) when Node == node() -> + mnesia_subscr:unsubscribe_table(Tab), + case val({Tab, commit_work}) of + [{checkpoints, Checkpoints} | _ ] -> + Fun = fun(Name) -> call(Name, {del_copy, Tab, Node}) end, + map_call(Fun, Checkpoints, ok); + _ -> + ok + end. + +tm_change_table_copy_type(Tab, From, To) -> + case val({Tab, commit_work}) of + [{checkpoints, Checkpoints} | _ ] -> + Fun = fun(Name) -> call(Name, {change_copy, Tab, From, To}) end, + map_call(Fun, Checkpoints, ok); + _ -> + ok + end. + +map_call(Fun, [Name | Names], Res) -> + case Fun(Name) of + ok -> + map_call(Fun, Names, Res); + {error, {no_exists, Name}} -> + map_call(Fun, Names, Res); + {error, Reason} -> + %% BUGBUG: We may end up with some checkpoint retainers + %% too much in the add_copy case. How do we remove them? + map_call(Fun, Names, {error, Reason}) + end; +map_call(_Fun, [], Res) -> + Res. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Public functions + +deactivate(Name) -> + case call(Name, get_checkpoint) of + {error, Reason} -> + {error, Reason}; + Cp -> + deactivate(Cp#checkpoint_args.nodes, Name) + end. + +deactivate(Nodes, Name) -> + rpc:multicall(Nodes, ?MODULE, remote_deactivate, [Name]), + ok. + +remote_deactivate(Name) -> + call(Name, deactivate). + +checkpoints() -> val(checkpoints). + +tables_and_cookie(Name) -> + case call(Name, get_checkpoint) of + {error, Reason} -> + {error, Reason}; + Cp -> + Tabs = Cp#checkpoint_args.min ++ Cp#checkpoint_args.max, + Cookie = Cp#checkpoint_args.cookie, + {ok, Tabs, Cookie} + end. + +most_local_node(Name, Tab) -> + case ?catch_val({Tab, {retainer, Name}}) of + {'EXIT', _} -> + {error, {"No retainer attached to table", [Tab, Name]}}; + R -> + Writers = R#retainer.writers, + LocalWriter = lists:member(node(), Writers), + if + LocalWriter == true -> + {ok, node()}; + Writers /= [] -> + {ok, hd(Writers)}; + true -> + {error, {"No retainer attached to table", [Tab, Name]}} + end + end. + +really_retain(Name, Tab) -> + R = val({Tab, {retainer, Name}}), + R#retainer.really_retain. + +%% Activate a checkpoint. +%% +%% A checkpoint is a transaction consistent state that may be used to +%% perform a distributed backup or to rollback the involved tables to +%% their old state. Backups may also be used to restore tables to +%% their old state. Args is a list of the following tuples: +%% +%% {name, Name} +%% Name of checkpoint. Each checkpoint must have a name which +%% is unique on the reachable nodes. The name may be reused when +%% the checkpoint has been deactivated. +%% By default a probably unique name is generated. +%% Multiple checkpoints may be set on the same table. +%% +%% {allow_remote, Bool} +%% false means that all retainers must be local. If the +%% table does not reside locally, the checkpoint fails. +%% true allows retainers on other nodes. +%% +%% {min, MinTabs} +%% Minimize redundancy and only keep checkpoint info together with +%% one replica, preferrably at the local node. If any node involved +%% the checkpoint goes down, the checkpoint is deactivated. +%% +%% {max, MaxTabs} +%% Maximize redundancy and keep checkpoint info together with all +%% replicas. The checkpoint becomes more fault tolerant if the +%% tables has several replicas. When new replicas are added, they +%% will also get a retainer attached to them. +%% +%% {ram_overrides_dump, Bool} +%% {ram_overrides_dump, Tabs} +%% Only applicable for ram_copies. Bool controls which versions of +%% the records that should be included in the checkpoint state. +%% true means that the latest comitted records in ram (i.e. the +%% records that the application accesses) should be included +%% in the checkpoint. false means that the records dumped to +%% dat-files (the records that will be loaded at startup) should +%% be included in the checkpoint. Tabs is a list of tables. +%% Default is false. +%% +%% {ignore_new, TidList} +%% Normally we wait for all pending transactions to complete +%% before we allow iteration over the checkpoint. But in order +%% to cope with checkpoint activation inside a transaction that +%% currently prepares commit (mnesia_init:get_net_work_copy) we +%% need to have the ability to ignore the enclosing transaction. +%% We do not wait for the transactions in TidList to end. The +%% transactions in TidList are regarded as newer than the checkpoint. + +activate(Args) -> + case args2cp(Args) of + {ok, Cp} -> + do_activate(Cp); + {error, Reason} -> + {error, Reason} + end. + +args2cp(Args) when is_list(Args)-> + case catch lists:foldl(fun check_arg/2, #checkpoint_args{}, Args) of + {'EXIT', Reason} -> + {error, Reason}; + Cp -> + case check_tables(Cp) of + {error, Reason} -> + {error, Reason}; + {ok, Overriders, AllTabs} -> + arrange_retainers(Cp, Overriders, AllTabs) + end + end; +args2cp(Args) -> + {error, {badarg, Args}}. + +check_arg({name, Name}, Cp) -> + case lists:member(Name, checkpoints()) of + true -> + exit({already_exists, Name}); + false -> + case catch tab2retainer({foo, Name}) of + List when is_list(List) -> + Cp#checkpoint_args{name = Name}; + _ -> + exit({badarg, Name}) + end + end; +check_arg({allow_remote, true}, Cp) -> + Cp#checkpoint_args{allow_remote = true}; +check_arg({allow_remote, false}, Cp) -> + Cp#checkpoint_args{allow_remote = false}; +check_arg({ram_overrides_dump, true}, Cp) -> + Cp#checkpoint_args{ram_overrides_dump = true}; +check_arg({ram_overrides_dump, false}, Cp) -> + Cp#checkpoint_args{ram_overrides_dump = false}; +check_arg({ram_overrides_dump, Tabs}, Cp) when is_list(Tabs) -> + Cp#checkpoint_args{ram_overrides_dump = Tabs}; +check_arg({min, Tabs}, Cp) when is_list(Tabs) -> + Cp#checkpoint_args{min = Tabs}; +check_arg({max, Tabs}, Cp) when is_list(Tabs) -> + Cp#checkpoint_args{max = Tabs}; +check_arg({ignore_new, Tids}, Cp) when is_list(Tids) -> + Cp#checkpoint_args{ignore_new = Tids}; +check_arg(Arg, _) -> + exit({badarg, Arg}). + +check_tables(Cp) -> + Min = Cp#checkpoint_args.min, + Max = Cp#checkpoint_args.max, + AllTabs = Min ++ Max, + DoubleTabs = [T || T <- Min, lists:member(T, Max)], + Overriders = Cp#checkpoint_args.ram_overrides_dump, + if + DoubleTabs /= [] -> + {error, {combine_error, Cp#checkpoint_args.name, + [{min, DoubleTabs}, {max, DoubleTabs}]}}; + Min == [], Max == [] -> + {error, {combine_error, Cp#checkpoint_args.name, + [{min, Min}, {max, Max}]}}; + Overriders == false -> + {ok, [], AllTabs}; + Overriders == true -> + {ok, AllTabs, AllTabs}; + is_list(Overriders) -> + case [T || T <- Overriders, not lists:member(T, Min)] of + [] -> + case [T || T <- Overriders, not lists:member(T, Max)] of + [] -> + {ok, Overriders, AllTabs}; + Outsiders -> + {error, {combine_error, Cp#checkpoint_args.name, + [{ram_overrides_dump, Outsiders}, + {max, Outsiders}]}} + end; + Outsiders -> + {error, {combine_error, Cp#checkpoint_args.name, + [{ram_overrides_dump, Outsiders}, + {min, Outsiders}]}} + end + end. + +arrange_retainers(Cp, Overriders, AllTabs) -> + R = #retainer{cp_name = Cp#checkpoint_args.name}, + case catch [R#retainer{tab_name = Tab, + writers = select_writers(Cp, Tab)} + || Tab <- AllTabs] of + {'EXIT', Reason} -> + {error, Reason}; + Retainers -> + {ok, Cp#checkpoint_args{ram_overrides_dump = Overriders, + retainers = Retainers, + nodes = writers(Retainers)}} + end. + +select_writers(Cp, Tab) -> + case filter_remote(Cp, val({Tab, active_replicas})) of + [] -> + exit({"Cannot prepare checkpoint (replica not available)", + [Tab, Cp#checkpoint_args.name]}); + Writers -> + This = node(), + case {lists:member(Tab, Cp#checkpoint_args.max), + lists:member(This, Writers)} of + {true, _} -> Writers; % Max + {false, true} -> [This]; + {false, false} -> [hd(Writers)] + end + end. + +filter_remote(Cp, Writers) when Cp#checkpoint_args.allow_remote == true -> + Writers; +filter_remote(_Cp, Writers) -> + This = node(), + case lists:member(This, Writers) of + true -> [This]; + false -> [] + end. + +writers(Retainers) -> + Fun = fun(R, Acc) -> R#retainer.writers ++ Acc end, + Writers = lists:foldl(Fun, [], Retainers), + mnesia_lib:uniq(Writers). + +do_activate(Cp) -> + Name = Cp#checkpoint_args.name, + Nodes = Cp#checkpoint_args.nodes, + case mnesia_tm:prepare_checkpoint(Nodes, Cp) of + {Replies, []} -> + check_prep(Replies, Name, Nodes, Cp#checkpoint_args.ignore_new); + {_, BadNodes} -> + {error, {"Cannot prepare checkpoint (bad nodes)", + [Name, BadNodes]}} + end. + +check_prep([{ok, Name, IgnoreNew, _Node} | Replies], Name, Nodes, IgnoreNew) -> + check_prep(Replies, Name, Nodes, IgnoreNew); +check_prep([{error, Reason} | _Replies], Name, _Nodes, _IgnoreNew) -> + {error, {"Cannot prepare checkpoint (bad reply)", + [Name, Reason]}}; +check_prep([{badrpc, Reason} | _Replies], Name, _Nodes, _IgnoreNew) -> + {error, {"Cannot prepare checkpoint (badrpc)", + [Name, Reason]}}; +check_prep([], Name, Nodes, IgnoreNew) -> + collect_pending(Name, Nodes, IgnoreNew). + +collect_pending(Name, Nodes, IgnoreNew) -> + case rpc:multicall(Nodes, ?MODULE, call, [Name, collect_pending]) of + {Replies, []} -> + case catch ?ets_new_table(mnesia_union, [bag]) of + {'EXIT', Reason} -> %% system limit + Msg = "Cannot create an ets table pending union", + {error, {system_limit, Msg, Reason}}; + UnionTab -> + compute_union(Replies, Nodes, Name, UnionTab, IgnoreNew) + end; + {_, BadNodes} -> + deactivate(Nodes, Name), + {error, {"Cannot collect from pending checkpoint", Name, BadNodes}} + end. + +compute_union([{ok, Pending} | Replies], Nodes, Name, UnionTab, IgnoreNew) -> + add_pending(Pending, UnionTab), + compute_union(Replies, Nodes, Name, UnionTab, IgnoreNew); +compute_union([{error, Reason} | _Replies], Nodes, Name, UnionTab, _IgnoreNew) -> + deactivate(Nodes, Name), + ?ets_delete_table(UnionTab), + {error, Reason}; +compute_union([{badrpc, Reason} | _Replies], Nodes, Name, UnionTab, _IgnoreNew) -> + deactivate(Nodes, Name), + ?ets_delete_table(UnionTab), + {error, {badrpc, Reason}}; +compute_union([], Nodes, Name, UnionTab, IgnoreNew) -> + send_activate(Nodes, Nodes, Name, UnionTab, IgnoreNew). + +add_pending([P | Pending], UnionTab) -> + add_pending_node(P#pending.disc_nodes, P#pending.tid, UnionTab), + add_pending_node(P#pending.ram_nodes, P#pending.tid, UnionTab), + add_pending(Pending, UnionTab); +add_pending([], _UnionTab) -> + ok. + +add_pending_node([Node | Nodes], Tid, UnionTab) -> + ?ets_insert(UnionTab, {Node, Tid}), + add_pending_node(Nodes, Tid, UnionTab); +add_pending_node([], _Tid, _UnionTab) -> + ok. + +send_activate([Node | Nodes], AllNodes, Name, UnionTab, IgnoreNew) -> + Pending = [Tid || {_, Tid} <- ?ets_lookup(UnionTab, Node), + not lists:member(Tid, IgnoreNew)], + case rpc:call(Node, ?MODULE, call, [Name, {activate, Pending}]) of + activated -> + send_activate(Nodes, AllNodes, Name, UnionTab, IgnoreNew); + {badrpc, Reason} -> + deactivate(Nodes, Name), + ?ets_delete_table(UnionTab), + {error, {"Activation failed (bad node)", Name, Node, Reason}}; + {error, Reason} -> + deactivate(Nodes, Name), + ?ets_delete_table(UnionTab), + {error, {"Activation failed", Name, Node, Reason}} + end; +send_activate([], AllNodes, Name, UnionTab, _IgnoreNew) -> + ?ets_delete_table(UnionTab), + {ok, Name, AllNodes}. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Checkpoint server + +cast(Name, Msg) -> + case ?catch_val({checkpoint, Name}) of + {'EXIT', _} -> + {error, {no_exists, Name}}; + + Pid when is_pid(Pid) -> + Pid ! {self(), Msg}, + {ok, Pid} + end. + +call(Name, Msg) -> + case ?catch_val({checkpoint, Name}) of + {'EXIT', _} -> + {error, {no_exists, Name}}; + + Pid when is_pid(Pid) -> + Monitor = erlang:monitor(process, Pid), %catch link(Pid), % Always local + Pid ! {self(), Msg}, + Self = self(), + receive + {'EXIT', Pid, Reason} -> + {error, {"Got exit", [Name, Reason]}}; + {'DOWN', Monitor, _, Pid, Reason} -> + {error, {"Got exit", [Name, Reason]}}; + {Name, Self, Reply} -> + erlang:demonitor(Monitor), + Reply + end; + Error -> + Error + end. + +abcast(Nodes, Name, Msg) -> + rpc:eval_everywhere(Nodes, ?MODULE, cast, [Name, Msg]). + +reply(nopid, _Name, _Reply) -> + ignore; +reply(ReplyTo, Name, Reply) -> + ReplyTo ! {Name, ReplyTo, Reply}. + +%% Returns {ok, NewCp} or {error, Reason} +start_retainer(Cp) -> + % Will never be restarted + Name = Cp#checkpoint_args.name, + case supervisor:start_child(mnesia_checkpoint_sup, [Cp]) of + {ok, _Pid} -> + {ok, Name, Cp#checkpoint_args.ignore_new, node()}; + {error, Reason} -> + {error, {"Cannot create checkpoint retainer", + Name, node(), Reason}} + end. + +start(Cp) -> + Name = Cp#checkpoint_args.name, + Args = [Cp#checkpoint_args{supervisor = self()}], + mnesia_monitor:start_proc({?MODULE, Name}, ?MODULE, init, Args). + +init(Cp) -> + process_flag(trap_exit, true), + process_flag(priority, high), %% Needed dets files might starve the system + Name = Cp#checkpoint_args.name, + Props = [set, public, {keypos, 2}], + case catch ?ets_new_table(mnesia_pending_checkpoint, Props) of + {'EXIT', Reason} -> %% system limit + Msg = "Cannot create an ets table for pending transactions", + Error = {error, {system_limit, Name, Msg, Reason}}, + proc_lib:init_ack(Cp#checkpoint_args.supervisor, Error); + PendingTab -> + Rs = [prepare_tab(Cp, R) || R <- Cp#checkpoint_args.retainers], + Cp2 = Cp#checkpoint_args{retainers = Rs, + pid = self(), + pending_tab = PendingTab}, + add(pending_checkpoint_pids, self()), + add(pending_checkpoints, PendingTab), + set({checkpoint, Name}, self()), + add(checkpoints, Name), + dbg_out("Checkpoint ~p (~p) started~n", [Name, self()]), + proc_lib:init_ack(Cp2#checkpoint_args.supervisor, {ok, self()}), + retainer_loop(Cp2) + end. + +prepare_tab(Cp, R) -> + Tab = R#retainer.tab_name, + prepare_tab(Cp, R, val({Tab, storage_type})). + +prepare_tab(Cp, R, Storage) -> + Tab = R#retainer.tab_name, + Name = R#retainer.cp_name, + case lists:member(node(), R#retainer.writers) of + true -> + R2 = retainer_create(Cp, R, Tab, Name, Storage), + set({Tab, {retainer, Name}}, R2), + %% Keep checkpoint info for table_info & mnesia_session + add({Tab, checkpoints}, Name), + add_chkp_info(Tab, Name), + R2; + false -> + set({Tab, {retainer, Name}}, R#retainer{store = undefined}), + R + end. + +add_chkp_info(Tab, Name) -> + case val({Tab, commit_work}) of + [{checkpoints, OldList} | CommitList] -> + case lists:member(Name, OldList) of + true -> + ok; + false -> + NewC = [{checkpoints, [Name | OldList]} | CommitList], + mnesia_lib:set({Tab, commit_work}, NewC) + end; + CommitList -> + Chkp = {checkpoints, [Name]}, + %% OBS checkpoints needs to be first in the list! + mnesia_lib:set({Tab, commit_work}, [Chkp | CommitList]) + end. + +tab2retainer({Tab, Name}) -> + FlatName = lists:flatten(io_lib:write(Name)), + mnesia_lib:dir(lists:concat([?MODULE, "_", Tab, "_", FlatName, ".RET"])). + +retainer_create(_Cp, R, Tab, Name, disc_only_copies) -> + Fname = tab2retainer({Tab, Name}), + file:delete(Fname), + Args = [{file, Fname}, {type, set}, {keypos, 2}, {repair, false}], + {ok, _} = mnesia_lib:dets_sync_open({Tab, Name}, Args), + dbg_out("Checkpoint retainer created ~p ~p~n", [Name, Tab]), + R#retainer{store = {dets, {Tab, Name}}, really_retain = true}; +retainer_create(Cp, R, Tab, Name, Storage) -> + T = ?ets_new_table(mnesia_retainer, [set, public, {keypos, 2}]), + Overriders = Cp#checkpoint_args.ram_overrides_dump, + ReallyR = R#retainer.really_retain, + ReallyCp = lists:member(Tab, Overriders), + ReallyR2 = prepare_ram_tab(Tab, T, Storage, ReallyR, ReallyCp), + dbg_out("Checkpoint retainer created ~p ~p~n", [Name, Tab]), + R#retainer{store = {ets, T}, really_retain = ReallyR2}. + +%% Copy the dumped table into retainer if needed +%% If the really_retain flag already has been set to false, +%% it should remain false even if we change storage type +%% while the checkpoint is activated. +prepare_ram_tab(Tab, T, ram_copies, true, false) -> + Fname = mnesia_lib:tab2dcd(Tab), + case mnesia_lib:exists(Fname) of + true -> + Log = mnesia_log:open_log(prepare_ram_tab, + mnesia_log:dcd_log_header(), + Fname, true, + mnesia_monitor:get_env(auto_repair), + read_only), + Add = fun(Rec) -> + Key = element(2, Rec), + Recs = + case ?ets_lookup(T, Key) of + [] -> []; + [{_, _, Old}] -> Old + end, + ?ets_insert(T, {Tab, Key, [Rec | Recs]}), + continue + end, + traverse_dcd(mnesia_log:chunk_log(Log, start), Log, Add), + mnesia_log:close_log(Log); + false -> + ok + end, + false; +prepare_ram_tab(_, _, _, ReallyRetain, _) -> + ReallyRetain. + +traverse_dcd({Cont, [LogH | Rest]}, Log, Fun) + when is_record(LogH, log_header), + LogH#log_header.log_kind == dcd_log, + LogH#log_header.log_version >= "1.0" -> + traverse_dcd({Cont, Rest}, Log, Fun); %% BUGBUG Error handling repaired files +traverse_dcd({Cont, Recs}, Log, Fun) -> %% trashed data?? + lists:foreach(Fun, Recs), + traverse_dcd(mnesia_log:chunk_log(Log, Cont), Log, Fun); +traverse_dcd(eof, _Log, _Fun) -> + ok. + +retainer_get({ets, Store}, Key) -> ?ets_lookup(Store, Key); +retainer_get({dets, Store}, Key) -> dets:lookup(Store, Key). + +retainer_put({ets, Store}, Val) -> ?ets_insert(Store, Val); +retainer_put({dets, Store}, Val) -> dets:insert(Store, Val). + +retainer_first({ets, Store}) -> ?ets_first(Store); +retainer_first({dets, Store}) -> dets:first(Store). + +retainer_next({ets, Store}, Key) -> ?ets_next(Store, Key); +retainer_next({dets, Store}, Key) -> dets:next(Store, Key). + +%% retainer_next_slot(Tab, Pos) -> +%% case retainer_slot(Tab, Pos) of +%% '$end_of_table' -> +%% '$end_of_table'; +%% [] -> +%% retainer_next_slot(Tab, Pos + 1); +%% Recs when is_list(Recs) -> +%% {Pos, Recs} +%% end. +%% +%% retainer_slot({ets, Store}, Pos) -> ?ets_next(Store, Pos); +%% retainer_slot({dets, Store}, Pos) -> dets:slot(Store, Pos). + +retainer_fixtable(Tab, Bool) when is_atom(Tab) -> + mnesia_lib:db_fixtable(val({Tab, storage_type}), Tab, Bool); +retainer_fixtable({ets, Tab}, Bool) -> + mnesia_lib:db_fixtable(ram_copies, Tab, Bool); +retainer_fixtable({dets, Tab}, Bool) -> + mnesia_lib:db_fixtable(disc_only_copies, Tab, Bool). + +retainer_delete({ets, Store}) -> + ?ets_delete_table(Store); +retainer_delete({dets, Store}) -> + mnesia_lib:dets_sync_close(Store), + Fname = tab2retainer(Store), + file:delete(Fname). + +retainer_loop(Cp) -> + Name = Cp#checkpoint_args.name, + receive + {_From, {retain, Tid, Tab, Key, OldRecs}} + when Cp#checkpoint_args.wait_for_old == [] -> + R = val({Tab, {retainer, Name}}), + PendingTab = Cp#checkpoint_args.pending_tab, + case R#retainer.really_retain of + true when PendingTab =:= undefined -> + Store = R#retainer.store, + case retainer_get(Store, Key) of + [] -> retainer_put(Store, {Tab, Key, OldRecs}); + _ -> already_retained + end; + true -> + case ets:member(PendingTab, Tid) of + true -> ignore; + false -> + Store = R#retainer.store, + case retainer_get(Store, Key) of + [] -> retainer_put(Store, {Tab, Key, OldRecs}); + _ -> already_retained + end + end; + false -> + ignore + end, + retainer_loop(Cp); + + %% Adm + {From, deactivate} -> + do_stop(Cp), + reply(From, Name, deactivated), + unlink(From), + exit(shutdown); + + {'EXIT', Parent, _} when Parent == Cp#checkpoint_args.supervisor -> + %% do_stop(Cp), + %% assume that entire Mnesia is terminating + exit(shutdown); + + {_From, {mnesia_down, Node}} -> + Cp2 = do_del_retainers(Cp, Node), + retainer_loop(Cp2); + {From, get_checkpoint} -> + reply(From, Name, Cp), + retainer_loop(Cp); + {From, {add_copy, Tab, Node}} when Cp#checkpoint_args.wait_for_old == [] -> + {Res, Cp2} = do_add_copy(Cp, Tab, Node), + reply(From, Name, Res), + retainer_loop(Cp2); + {From, {del_copy, Tab, Node}} when Cp#checkpoint_args.wait_for_old == [] -> + Cp2 = do_del_copy(Cp, Tab, Node), + reply(From, Name, ok), + retainer_loop(Cp2); + {From, {change_copy, Tab, From, To}} when Cp#checkpoint_args.wait_for_old == [] -> + Cp2 = do_change_copy(Cp, Tab, From, To), + reply(From, Name, ok), + retainer_loop(Cp2); + {_From, {add_retainer, R, Node}} -> + Cp2 = do_add_retainer(Cp, R, Node), + retainer_loop(Cp2); + {_From, {del_retainer, R, Node}} when Cp#checkpoint_args.wait_for_old == [] -> + Cp2 = do_del_retainer(Cp, R, Node), + retainer_loop(Cp2); + + %% Iteration + {From, {iter_begin, Iter}} when Cp#checkpoint_args.wait_for_old == [] -> + Cp2 = iter_begin(Cp, From, Iter), + retainer_loop(Cp2); + + {From, {iter_end, Iter}} when Cp#checkpoint_args.wait_for_old == [] -> + retainer_fixtable(Iter#iter.oid_tab, false), + Iters = Cp#checkpoint_args.iterators -- [Iter], + reply(From, Name, ok), + retainer_loop(Cp#checkpoint_args{iterators = Iters}); + + {_From, {exit_pending, Tid}} + when is_list(Cp#checkpoint_args.wait_for_old) -> + StillPending = lists:delete(Tid, Cp#checkpoint_args.wait_for_old), + Cp2 = Cp#checkpoint_args{wait_for_old = StillPending}, + Cp3 = maybe_activate(Cp2), + retainer_loop(Cp3); + + {From, collect_pending} -> + PendingTab = Cp#checkpoint_args.pending_tab, + del(pending_checkpoints, PendingTab), + Pending = ?ets_match_object(PendingTab, '_'), + reply(From, Name, {ok, Pending}), + retainer_loop(Cp); + + {From, {activate, Pending}} -> + StillPending = mnesia_recover:still_pending(Pending), + enter_still_pending(StillPending, Cp#checkpoint_args.pending_tab), + Cp2 = maybe_activate(Cp#checkpoint_args{wait_for_old = StillPending}), + reply(From, Name, activated), + retainer_loop(Cp2); + + {'EXIT', From, _Reason} -> + Iters = [Iter || Iter <- Cp#checkpoint_args.iterators, + check_iter(From, Iter)], + retainer_loop(Cp#checkpoint_args{iterators = Iters}); + + {system, From, Msg} -> + dbg_out("~p got {system, ~p, ~p}~n", [?MODULE, From, Msg]), + sys:handle_system_msg(Msg, From, no_parent, ?MODULE, [], Cp) + end. + +maybe_activate(Cp) + when Cp#checkpoint_args.wait_for_old == [], + Cp#checkpoint_args.is_activated == false -> + Cp#checkpoint_args{pending_tab = undefined, is_activated = true}; +maybe_activate(Cp) -> + Cp. + +iter_begin(Cp, From, Iter) -> + Name = Cp#checkpoint_args.name, + R = val({Iter#iter.tab_name, {retainer, Name}}), + Iter2 = init_tabs(R, Iter), + Iter3 = Iter2#iter{pid = From}, + retainer_fixtable(Iter3#iter.oid_tab, true), + Iters = [Iter3 | Cp#checkpoint_args.iterators], + reply(From, Name, {ok, Iter3, self()}), + Cp#checkpoint_args{iterators = Iters}. + +do_stop(Cp) -> + Name = Cp#checkpoint_args.name, + del(pending_checkpoints, Cp#checkpoint_args.pending_tab), + del(pending_checkpoint_pids, self()), + del(checkpoints, Name), + unset({checkpoint, Name}), + lists:foreach(fun deactivate_tab/1, Cp#checkpoint_args.retainers), + Iters = Cp#checkpoint_args.iterators, + lists:foreach(fun(I) -> retainer_fixtable(I#iter.oid_tab, false) end, Iters). + +deactivate_tab(R) -> + Name = R#retainer.cp_name, + Tab = R#retainer.tab_name, + try + Active = lists:member(node(), R#retainer.writers), + case R#retainer.store of + undefined -> + ignore; + Store when Active == true -> + retainer_delete(Store); + _ -> + ignore + end, + unset({Tab, {retainer, Name}}), + del({Tab, checkpoints}, Name), %% Keep checkpoint info for table_info & mnesia_session + del_chkp_info(Tab, Name) + catch _:_ -> ignore + end. + +del_chkp_info(Tab, Name) -> + case val({Tab, commit_work}) of + [{checkpoints, ChkList} | Rest] -> + case lists:delete(Name, ChkList) of + [] -> + %% The only checkpoint was deleted + mnesia_lib:set({Tab, commit_work}, Rest); + NewList -> + mnesia_lib:set({Tab, commit_work}, + [{checkpoints, NewList} | Rest]) + end; + _ -> ignore + end. + +do_del_retainers(Cp, Node) -> + Rs = [do_del_retainer2(Cp, R, Node) || R <- Cp#checkpoint_args.retainers], + Cp#checkpoint_args{retainers = Rs, nodes = writers(Rs)}. + +do_del_retainer2(Cp, R, Node) -> + Writers = R#retainer.writers -- [Node], + R2 = R#retainer{writers = Writers}, + set({R2#retainer.tab_name, {retainer, R2#retainer.cp_name}}, R2), + if + Writers == [] -> + Event = {mnesia_checkpoint_deactivated, Cp#checkpoint_args.name}, + mnesia_lib:report_system_event(Event), + do_stop(Cp), + exit(shutdown); + Node == node() -> + deactivate_tab(R), % Avoids unnecessary tm_retain accesses + set({R2#retainer.tab_name, {retainer, R2#retainer.cp_name}}, R2), + R2; + true -> + R2 + end. + +do_del_retainer(Cp, R0, Node) -> + {R, Rest} = find_retainer(R0, Cp#checkpoint_args.retainers, []), + R2 = do_del_retainer2(Cp, R, Node), + Rs = [R2|Rest], + Cp#checkpoint_args{retainers = Rs, nodes = writers(Rs)}. + +do_del_copy(Cp, Tab, ThisNode) when ThisNode == node() -> + Name = Cp#checkpoint_args.name, + Others = Cp#checkpoint_args.nodes -- [ThisNode], + R = val({Tab, {retainer, Name}}), + abcast(Others, Name, {del_retainer, R, ThisNode}), + do_del_retainer(Cp, R, ThisNode). + +do_add_copy(Cp, Tab, Node) when Node /= node()-> + case lists:member(Tab, Cp#checkpoint_args.max) of + false -> + {ok, Cp}; + true -> + Name = Cp#checkpoint_args.name, + R0 = val({Tab, {retainer, Name}}), + W = R0#retainer.writers, + R = R0#retainer{writers = W ++ [Node]}, + + case lists:member(Node, Cp#checkpoint_args.nodes) of + true -> + send_retainer(Cp, R, Node); + false -> + case tm_remote_prepare(Node, Cp) of + {ok, Name, _IgnoreNew, Node} -> + case lists:member(schema, Cp#checkpoint_args.max) of + true -> + %% We need to send schema retainer somewhere + RS0 = val({schema, {retainer, Name}}), + WS = RS0#retainer.writers, + RS1 = RS0#retainer{writers = WS ++ [Node]}, + {ok, Cp1} = send_retainer(Cp, RS1, Node), + send_retainer(Cp1, R, Node); + false -> + send_retainer(Cp, R, Node) + end; + {badrpc, Reason} -> + {{error, {badrpc, Reason}}, Cp}; + {error, Reason} -> + {{error, Reason}, Cp} + end + end + end. + +tm_remote_prepare(Node, Cp) -> + rpc:call(Node, ?MODULE, tm_prepare, [Cp]). + +do_add_retainer(Cp, R0, Node) -> + Writers = R0#retainer.writers, + {R, Rest} = find_retainer(R0, Cp#checkpoint_args.retainers, []), + NewRet = + if + Node == node() -> + prepare_tab(Cp, R#retainer{writers = Writers}); + true -> + R#retainer{writers = Writers} + end, + Rs = [NewRet | Rest], + set({NewRet#retainer.tab_name, {retainer, NewRet#retainer.cp_name}}, NewRet), + Cp#checkpoint_args{retainers = Rs, nodes = writers(Rs)}. + +find_retainer(#retainer{cp_name = CP, tab_name = Tab}, + [Ret = #retainer{cp_name = CP, tab_name = Tab} | R], Acc) -> + {Ret, R ++ Acc}; +find_retainer(Ret, [H|R], Acc) -> + find_retainer(Ret, R, [H|Acc]). + +send_retainer(Cp, R, Node) -> + Name = Cp#checkpoint_args.name, + Nodes0 = Cp#checkpoint_args.nodes -- [Node], + Nodes = Nodes0 -- [node()], + Msg = {add_retainer, R, Node}, + abcast(Nodes, Name, Msg), + {ok, _} = rpc:call(Node, ?MODULE, cast, [Name, Msg]), + Store = R#retainer.store, + send_retainer2(Node, Name, Store, retainer_first(Store)), + Cp2 = do_add_retainer(Cp, R, Node), + {ok, Cp2}. + +send_retainer2(_, _, _, '$end_of_table') -> + ok; +%%send_retainer2(Node, Name, Store, {Slot, Records}) -> +send_retainer2(Node, Name, Store, Key) -> + [{Tab, _, Records}] = retainer_get(Store, Key), + abcast([Node], Name, {retain, {dirty, send_retainer}, Tab, Key, Records}), + send_retainer2(Node, Name, Store, retainer_next(Store, Key)). + +do_change_copy(Cp, Tab, FromType, ToType) -> + Name = Cp#checkpoint_args.name, + R = val({Tab, {retainer, Name}}), + R2 = prepare_tab(Cp, R, ToType), + {_, Old} = R#retainer.store, + {_, New} = R2#retainer.store, + + Fname = tab2retainer({Tab, Name}), + if + FromType == disc_only_copies -> + mnesia_lib:dets_sync_close(Old), + loaded = mnesia_lib:dets_to_ets(Old, New, Fname, set, no, yes), + ok = file:delete(Fname); + ToType == disc_only_copies -> + TabSize = ?ets_info(Old, size), + Props = [{file, Fname}, + {type, set}, + {keypos, 2}, +%% {ram_file, true}, + {estimated_no_objects, TabSize + 256}, + {repair, false}], + {ok, _} = mnesia_lib:dets_sync_open(New, Props), + ok = mnesia_dumper:raw_dump_table(New, Old), + ?ets_delete_table(Old); + true -> + ignore + end, + Pos = #retainer.tab_name, + Rs = lists:keyreplace(Tab, Pos, Cp#checkpoint_args.retainers, R2), + Cp#checkpoint_args{retainers = Rs, nodes = writers(Rs)}. + +check_iter(From, Iter) when Iter#iter.pid == From -> + retainer_fixtable(Iter#iter.oid_tab, false), + false; +check_iter(_From, _Iter) -> + true. + +init_tabs(R, Iter) -> + {Kind, _} = Store = R#retainer.store, + Main = {Kind, Iter#iter.tab_name}, + Ret = Store, + Iter2 = Iter#iter{main_tab = Main, retainer_tab = Ret}, + case Iter#iter.source of + table -> Iter2#iter{oid_tab = Main}; + retainer -> Iter2#iter{oid_tab = Ret} + end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Iteration +%% +%% Iterates over a table and applies Fun(ListOfRecords) +%% with a suitable amount of records, e.g. 1000 or so. +%% ListOfRecords is [] when the iteration is over. +%% +%% OidKind affects which internal table to be iterated over and +%% ValKind affects which table to pick the actual records from. Legal +%% values for OidKind and ValKind is the atom table or the atom +%% retainer. +%% +%% The iteration may either be performed over the main table (which +%% contains the latest values of the records, i.e. the values that +%% are visible to the applications) or over the checkpoint retainer +%% (which contains the values as the looked like the timepoint when +%% the checkpoint was activated). +%% +%% It is possible to iterate over the main table and pick values +%% from the retainer and vice versa. + +iterate(Name, Tab, Fun, Acc, Source, Val) -> + Iter0 = #iter{tab_name = Tab, source = Source, val = Val}, + case call(Name, {iter_begin, Iter0}) of + {error, Reason} -> + {error, Reason}; + {ok, Iter, Pid} -> + link(Pid), % We don't want any pending fixtable's + Res = (catch iter(Fun, Acc, Iter)), + unlink(Pid), + call(Name, {iter_end, Iter}), + case Res of + {'EXIT', Reason} -> {error, Reason}; + {error, Reason} -> {error, Reason}; + Acc2 -> {ok, Acc2} + end + end. + +iter(Fun, Acc, Iter)-> + iter(Fun, Acc, Iter, retainer_first(Iter#iter.oid_tab)). + +iter(Fun, Acc, Iter, Key) -> + case get_records(Iter, Key) of + {'$end_of_table', []} -> + Fun([], Acc); + {'$end_of_table', Records} -> + Acc2 = Fun(Records, Acc), + Fun([], Acc2); + {Next, Records} -> + Acc2 = Fun(Records, Acc), + iter(Fun, Acc2, Iter, Next) + end. + +stop_iteration(Reason) -> + throw({error, {stopped, Reason}}). + +get_records(Iter, Key) -> + get_records(Iter, Key, 500, []). % 500 keys + +get_records(_Iter, Key, 0, Acc) -> + {Key, lists:append(lists:reverse(Acc))}; +get_records(_Iter, '$end_of_table', _I, Acc) -> + {'$end_of_table', lists:append(lists:reverse(Acc))}; +get_records(Iter, Key, I, Acc) -> + Recs = get_val(Iter, Key), + Next = retainer_next(Iter#iter.oid_tab, Key), + get_records(Iter, Next, I-1, [Recs | Acc]). + +get_val(Iter, Key) when Iter#iter.val == latest -> + get_latest_val(Iter, Key); +get_val(Iter, Key) when Iter#iter.val == checkpoint -> + get_checkpoint_val(Iter, Key). + +get_latest_val(Iter, Key) when Iter#iter.source == table -> + retainer_get(Iter#iter.main_tab, Key); +get_latest_val(Iter, Key) when Iter#iter.source == retainer -> + DeleteOid = {Iter#iter.tab_name, Key}, + [DeleteOid | retainer_get(Iter#iter.main_tab, Key)]. + +get_checkpoint_val(Iter, Key) when Iter#iter.source == table -> + retainer_get(Iter#iter.main_tab, Key); +get_checkpoint_val(Iter, Key) when Iter#iter.source == retainer -> + DeleteOid = {Iter#iter.tab_name, Key}, + case retainer_get(Iter#iter.retainer_tab, Key) of + [{_, _, []}] -> [DeleteOid]; + [{_, _, Records}] -> [DeleteOid | Records] + end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% System upgrade + +system_continue(_Parent, _Debug, Cp) -> + retainer_loop(Cp). + +system_terminate(_Reason, _Parent,_Debug, Cp) -> + do_stop(Cp). + +system_code_change(Cp, _Module, _OldVsn, _Extra) -> + {ok, Cp}. + +convert_cp_record(Cp) when is_record(Cp, checkpoint) -> + ROD = + case Cp#checkpoint.ram_overrides_dump of + true -> Cp#checkpoint.min ++ Cp#checkpoint.max; + false -> [] + end, + + {ok, #checkpoint_args{name = Cp#checkpoint.name, + allow_remote = Cp#checkpoint.name, + ram_overrides_dump = ROD, + nodes = Cp#checkpoint.nodes, + node = Cp#checkpoint.node, + now = Cp#checkpoint.now, + cookie = ?unique_cookie, + min = Cp#checkpoint.min, + max = Cp#checkpoint.max, + pending_tab = Cp#checkpoint.pending_tab, + wait_for_old = Cp#checkpoint.wait_for_old, + is_activated = Cp#checkpoint.is_activated, + ignore_new = Cp#checkpoint.ignore_new, + retainers = Cp#checkpoint.retainers, + iterators = Cp#checkpoint.iterators, + supervisor = Cp#checkpoint.supervisor, + pid = Cp#checkpoint.pid + }}; +convert_cp_record(Cp) when is_record(Cp, checkpoint_args) -> + AllTabs = Cp#checkpoint_args.min ++ Cp#checkpoint_args.max, + ROD = case Cp#checkpoint_args.ram_overrides_dump of + [] -> + false; + AllTabs -> + true; + _ -> + error + end, + if + ROD == error -> + {error, {"Old node cannot handle new checkpoint protocol", + ram_overrides_dump}}; + true -> + {ok, #checkpoint{name = Cp#checkpoint_args.name, + allow_remote = Cp#checkpoint_args.name, + ram_overrides_dump = ROD, + nodes = Cp#checkpoint_args.nodes, + node = Cp#checkpoint_args.node, + now = Cp#checkpoint_args.now, + min = Cp#checkpoint_args.min, + max = Cp#checkpoint_args.max, + pending_tab = Cp#checkpoint_args.pending_tab, + wait_for_old = Cp#checkpoint_args.wait_for_old, + is_activated = Cp#checkpoint_args.is_activated, + ignore_new = Cp#checkpoint_args.ignore_new, + retainers = Cp#checkpoint_args.retainers, + iterators = Cp#checkpoint_args.iterators, + supervisor = Cp#checkpoint_args.supervisor, + pid = Cp#checkpoint_args.pid + }} + end. + +%%%%%%%%%%%%%%%%%%%%%%%%%% + +val(Var) -> + case ?catch_val(Var) of + {'EXIT', _ReASoN_} -> mnesia_lib:other_val(Var, _ReASoN_); + _VaLuE_ -> _VaLuE_ + end. + diff --git a/lib/mnesia/src/mnesia_checkpoint_sup.erl b/lib/mnesia/src/mnesia_checkpoint_sup.erl new file mode 100644 index 0000000000..2fe8df52f7 --- /dev/null +++ b/lib/mnesia/src/mnesia_checkpoint_sup.erl @@ -0,0 +1,42 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 1997-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +%% +-module(mnesia_checkpoint_sup). + +-behaviour(supervisor). + +-export([start/0, init/1]). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% top supervisor callback functions + +start() -> + supervisor:start_link({local, ?MODULE}, ?MODULE, []). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% sub supervisor callback functions + +init([]) -> + Flags = {simple_one_for_one, 0, timer:hours(24)}, % Trust the top supervisor + MFA = {mnesia_checkpoint, start, []}, + Modules = [?MODULE, mnesia_checkpoint, supervisor], + KillAfter = mnesia_kernel_sup:supervisor_timeout(timer:seconds(3)), + Workers = [{?MODULE, MFA, transient, KillAfter, worker, Modules}], + {ok, {Flags, Workers}}. diff --git a/lib/mnesia/src/mnesia_controller.erl b/lib/mnesia/src/mnesia_controller.erl new file mode 100644 index 0000000000..9bc480e619 --- /dev/null +++ b/lib/mnesia/src/mnesia_controller.erl @@ -0,0 +1,2182 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 1996-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +%% +%% The mnesia_init process loads tables from local disc or from +%% another nodes. It also coordinates updates of the info about +%% where we can read and write tables. +%% +%% Tables may need to be loaded initially at startup of the local +%% node or when other nodes announces that they already have loaded +%% tables that we also want. +%% +%% Initially we set the load request queue to those tables that we +%% safely can load locally, i.e. tables where we have the last +%% consistent replica and we have received mnesia_down from all +%% other nodes holding the table. Then we let the mnesia_init +%% process enter its normal working state. +%% +%% When we need to load a table we append a request to the load +%% request queue. All other requests are regarded as high priority +%% and are processed immediately (e.g. update table whereabouts). +%% We processes the load request queue as a "background" job.. + +-module(mnesia_controller). + +-behaviour(gen_server). + +%% Mnesia internal stuff +-export([ + start/0, + i_have_tab/1, + info/0, + get_info/1, + get_workers/1, + force_load_table/1, + async_dump_log/1, + sync_dump_log/1, + connect_nodes/1, + wait_for_schema_commit_lock/0, + release_schema_commit_lock/0, + create_table/1, + get_disc_copy/1, + get_cstructs/0, + sync_and_block_table_whereabouts/4, + sync_del_table_copy_whereabouts/2, + block_table/1, + unblock_table/1, + block_controller/0, + unblock_controller/0, + unannounce_add_table_copy/2, + master_nodes_updated/2, + mnesia_down/1, + add_active_replica/2, + add_active_replica/3, + add_active_replica/4, + update/1, + change_table_access_mode/1, + del_active_replica/2, + wait_for_tables/2, + get_network_copy/2, + merge_schema/0, + start_remote_sender/4, + schedule_late_disc_load/2 + ]). + +%% gen_server callbacks +-export([init/1, + handle_call/3, + handle_cast/2, + handle_info/2, + terminate/2, + code_change/3]). + +%% Module internal stuff +-export([call/1, + cast/1, + dump_and_reply/2, + load_and_reply/2, + send_and_reply/2, + wait_for_tables_init/2, + connect_nodes2/2 + ]). + +-import(mnesia_lib, [set/2, add/2]). +-import(mnesia_lib, [fatal/2, error/2, verbose/2, dbg_out/2]). + +-include("mnesia.hrl"). + +-define(SERVER_NAME, ?MODULE). + +-record(state, {supervisor, + schema_is_merged = false, + early_msgs = [], + loader_pid = [], %% Was Pid is now [{Pid,Work}|..] + loader_queue, %% Was list is now gb_tree + sender_pid = [], %% Was a pid or undef is now [{Pid,Work}|..] + sender_queue = [], + late_loader_queue, %% Was list is now gb_tree + dumper_pid, %% Dumper or schema commit pid + dumper_queue = [], %% Dumper or schema commit queue + others = [], %% Processes that needs the copier_done msg + dump_log_timer_ref, + is_stopping = false + }). +%% Backwards Comp. Sender_pid is now a list of senders.. +get_senders(#state{sender_pid = Pids}) when is_list(Pids) -> Pids. +%% Backwards Comp. loader_pid is now a list of loaders.. +get_loaders(#state{loader_pid = Pids}) when is_list(Pids) -> Pids. +max_loaders() -> + case ?catch_val(no_table_loaders) of + {'EXIT', _} -> + mnesia_lib:set(no_table_loaders,1), + 1; + Val -> Val + end. + +-record(schema_commit_lock, {owner}). +-record(block_controller, {owner}). + +-record(dump_log, {initiated_by, + opt_reply_to + }). + +-record(net_load, {table, + reason, + opt_reply_to, + cstruct = unknown + }). + +-record(send_table, {table, + receiver_pid, + remote_storage + }). + +-record(disc_load, {table, + reason, + opt_reply_to + }). + +-record(late_load, {table, + reason, + opt_reply_to, + loaders + }). + +-record(loader_done, {worker_pid, + is_loaded, + table_name, + needs_announce, + needs_sync, + needs_reply, + reply_to, + reply}). + +-record(sender_done, {worker_pid, + worker_res, + table_name + }). + +-record(dumper_done, {worker_pid, + worker_res + }). + +val(Var) -> + case ?catch_val(Var) of + {'EXIT', Reason} -> mnesia_lib:other_val(Var, Reason); + Value -> Value + end. + +start() -> + gen_server:start_link({local, ?SERVER_NAME}, ?MODULE, [self()], + [{timeout, infinity} + %% ,{debug, [trace]} + ]). + +sync_dump_log(InitBy) -> + call({sync_dump_log, InitBy}). + +async_dump_log(InitBy) -> + ?SERVER_NAME ! {async_dump_log, InitBy}. + +%% Wait for tables to be active +%% If needed, we will wait for Mnesia to start +%% If Mnesia stops, we will wait for Mnesia to restart +%% We will wait even if the list of tables is empty +%% +wait_for_tables(Tabs, Timeout) when is_list(Tabs), Timeout == infinity -> + do_wait_for_tables(Tabs, Timeout); +wait_for_tables(Tabs, Timeout) when is_list(Tabs), + is_integer(Timeout), Timeout >= 0 -> + do_wait_for_tables(Tabs, Timeout); +wait_for_tables(Tabs, Timeout) -> + {error, {badarg, Tabs, Timeout}}. + +do_wait_for_tables(Tabs, 0) -> + reply_wait(Tabs); +do_wait_for_tables(Tabs, Timeout) -> + Pid = spawn_link(?MODULE, wait_for_tables_init, [self(), Tabs]), + receive + {?SERVER_NAME, Pid, Res} -> + Res; + {'EXIT', Pid, _} -> + reply_wait(Tabs) + after Timeout -> + unlink(Pid), + exit(Pid, timeout), + reply_wait(Tabs) + end. + +reply_wait(Tabs) -> + case catch mnesia_lib:active_tables() of + {'EXIT', _} -> + {error, {node_not_running, node()}}; + Active when is_list(Active) -> + case Tabs -- Active of + [] -> + ok; + BadTabs -> + {timeout, BadTabs} + end + end. + +wait_for_tables_init(From, Tabs) -> + process_flag(trap_exit, true), + Res = wait_for_init(From, Tabs, whereis(?SERVER_NAME)), + From ! {?SERVER_NAME, self(), Res}, + unlink(From), + exit(normal). + +wait_for_init(From, Tabs, Init) -> + case catch link(Init) of + {'EXIT', _} -> + %% Mnesia is not started + {error, {node_not_running, node()}}; + true when is_pid(Init) -> + cast({sync_tabs, Tabs, self()}), + rec_tabs(Tabs, Tabs, From, Init) + end. + +sync_reply(Waiter, Tab) -> + Waiter ! {?SERVER_NAME, {tab_synced, Tab}}. + +rec_tabs([Tab | Tabs], AllTabs, From, Init) -> + receive + {?SERVER_NAME, {tab_synced, Tab}} -> + rec_tabs(Tabs, AllTabs, From, Init); + + {'EXIT', From, _} -> + %% This will trigger an exit signal + %% to mnesia_init + exit(wait_for_tables_timeout); + + {'EXIT', Init, _} -> + %% Oops, mnesia_init stopped, + exit(mnesia_stopped) + end; +rec_tabs([], _, _, Init) -> + unlink(Init), + ok. + +get_cstructs() -> + call(get_cstructs). + +update(Fun) -> + call({update,Fun}). + + +mnesia_down(Node) -> + case cast({mnesia_down, Node}) of + {error, _} -> mnesia_monitor:mnesia_down(?SERVER_NAME, Node); + _Pid -> ok + end. +wait_for_schema_commit_lock() -> + link(whereis(?SERVER_NAME)), + unsafe_call(wait_for_schema_commit_lock). + +block_controller() -> + call(block_controller). + +unblock_controller() -> + cast(unblock_controller). + +release_schema_commit_lock() -> + cast({release_schema_commit_lock, self()}), + unlink(whereis(?SERVER_NAME)). + +%% Special for preparation of add table copy +get_network_copy(Tab, Cs) -> +% We can't let the controller queue this one +% because that may cause a deadlock between schema_operations +% and initial tableloadings which both takes schema locks. +% But we have to get copier_done msgs when the other side +% goes down. + call({add_other, self()}), + Reason = {dumper,add_table_copy}, + Work = #net_load{table = Tab,reason = Reason,cstruct = Cs}, + %% I'll need this cause it's linked trough the subscriber + %% might be solved by using monitor in subscr instead. + process_flag(trap_exit, true), + Load = load_table_fun(Work), + Res = (catch Load()), + process_flag(trap_exit, false), + call({del_other, self()}), + case Res of + #loader_done{is_loaded = true} -> + Tab = Res#loader_done.table_name, + case Res#loader_done.needs_announce of + true -> + i_have_tab(Tab); + false -> + ignore + end, + Res#loader_done.reply; + #loader_done{} -> + Res#loader_done.reply; + Else -> + {not_loaded, Else} + end. + +%% This functions is invoked from the dumper +%% +%% There are two cases here: +%% startup -> +%% no need for sync, since mnesia_controller not started yet +%% schema_trans -> +%% already synced with mnesia_controller since the dumper +%% is syncronously started from mnesia_controller + +create_table(Tab) -> + {loaded, ok} = mnesia_loader:disc_load_table(Tab, {dumper,create_table}). + +get_disc_copy(Tab) -> + disc_load_table(Tab, {dumper,change_table_copy_type}, undefined). + +%% Returns ok instead of yes +force_load_table(Tab) when is_atom(Tab), Tab /= schema -> + case ?catch_val({Tab, storage_type}) of + ram_copies -> + do_force_load_table(Tab); + disc_copies -> + do_force_load_table(Tab); + disc_only_copies -> + do_force_load_table(Tab); + unknown -> + set({Tab, load_by_force}, true), + cast({force_load_updated, Tab}), + wait_for_tables([Tab], infinity); + {'EXIT', _} -> + {error, {no_exists, Tab}} + end; +force_load_table(Tab) -> + {error, {bad_type, Tab}}. + +do_force_load_table(Tab) -> + Loaded = ?catch_val({Tab, load_reason}), + case Loaded of + unknown -> + set({Tab, load_by_force}, true), + mnesia_late_loader:async_late_disc_load(node(), [Tab], forced_by_user), + wait_for_tables([Tab], infinity); + {'EXIT', _} -> + set({Tab, load_by_force}, true), + mnesia_late_loader:async_late_disc_load(node(), [Tab], forced_by_user), + wait_for_tables([Tab], infinity); + _ -> + ok + end. +master_nodes_updated(schema, _Masters) -> + ignore; +master_nodes_updated(Tab, Masters) -> + cast({master_nodes_updated, Tab, Masters}). + +schedule_late_disc_load(Tabs, Reason) -> + MsgTag = late_disc_load, + try_schedule_late_disc_load(Tabs, Reason, MsgTag). + +try_schedule_late_disc_load(Tabs, _Reason, MsgTag) + when Tabs == [], MsgTag /= schema_is_merged -> + ignore; +try_schedule_late_disc_load(Tabs, Reason, MsgTag) -> + GetIntents = + fun() -> + Item = mnesia_late_disc_load, + Nodes = val({current, db_nodes}), + mnesia:lock({global, Item, Nodes}, write), + case multicall(Nodes -- [node()], disc_load_intents) of + {Replies, []} -> + call({MsgTag, Tabs, Reason, Replies}), + done; + {_, BadNodes} -> + %% Some nodes did not respond, lets try again + {retry, BadNodes} + end + end, + case mnesia:transaction(GetIntents) of + {atomic, done} -> + done; + {atomic, {retry, BadNodes}} -> + verbose("Retry late_load_tables because bad nodes: ~p~n", + [BadNodes]), + try_schedule_late_disc_load(Tabs, Reason, MsgTag); + {aborted, AbortReason} -> + fatal("Cannot late_load_tables~p: ~p~n", + [[Tabs, Reason, MsgTag], AbortReason]) + end. + +connect_nodes(Ns) -> + case mnesia:system_info(is_running) of + no -> + {error, {node_not_running, node()}}; + yes -> + Pid = spawn_link(?MODULE,connect_nodes2,[self(),Ns]), + receive + {?MODULE, Pid, Res, New} -> + case Res of + ok -> + mnesia_lib:add_list(extra_db_nodes, New), + {ok, New}; + {aborted, {throw, Str}} when is_list(Str) -> + %%mnesia_recover:disconnect_nodes(New), + {error, {merge_schema_failed, lists:flatten(Str)}}; + Else -> + {error, Else} + end; + {'EXIT', Pid, Reason} -> + {error, Reason} + end + end. + +connect_nodes2(Father, Ns) -> + Current = val({current, db_nodes}), + abcast([node()|Ns], {merging_schema, node()}), + {NewC, OldC} = mnesia_recover:connect_nodes(Ns), + Connected = NewC ++OldC, + New1 = mnesia_lib:intersect(Ns, Connected), + New = New1 -- Current, + process_flag(trap_exit, true), + Res = try_merge_schema(New), + Msg = {schema_is_merged, [], late_merge, []}, + multicall([node()|Ns], Msg), + After = val({current, db_nodes}), + Father ! {?MODULE, self(), Res, mnesia_lib:intersect(Ns,After)}, + unlink(Father), + ok. + +%% Merge the local schema with the schema on other nodes. +%% But first we must let all processes that want to force +%% load tables wait until the schema merge is done. + +merge_schema() -> + AllNodes = mnesia_lib:all_nodes(), + case try_merge_schema(AllNodes) of + ok -> + schema_is_merged(); + {aborted, {throw, Str}} when is_list(Str) -> + fatal("Failed to merge schema: ~s~n", [Str]); + Else -> + fatal("Failed to merge schema: ~p~n", [Else]) + end. + +try_merge_schema(Nodes) -> + case mnesia_schema:merge_schema() of + {atomic, not_merged} -> + %% No more nodes that we need to merge the schema with + ok; + {atomic, {merged, OldFriends, NewFriends}} -> + %% Check if new nodes has been added to the schema + Diff = mnesia_lib:all_nodes() -- [node() | Nodes], + mnesia_recover:connect_nodes(Diff), + + %% Tell everybody to adopt orphan tables + im_running(OldFriends, NewFriends), + im_running(NewFriends, OldFriends), + + try_merge_schema(Nodes); + {atomic, {"Cannot get cstructs", Node, Reason}} -> + dbg_out("Cannot get cstructs, Node ~p ~p~n", [Node, Reason]), + timer:sleep(1000), % Avoid a endless loop look alike + try_merge_schema(Nodes); + Other -> + Other + end. + +im_running(OldFriends, NewFriends) -> + abcast(OldFriends, {im_running, node(), NewFriends}). + +schema_is_merged() -> + MsgTag = schema_is_merged, + SafeLoads = initial_safe_loads(), + + %% At this point we do not know anything about + %% which tables that the other nodes already + %% has loaded and therefore we let the normal + %% processing of the loader_queue take care + %% of it, since we at that time point will + %% know the whereabouts. We rely on the fact + %% that all nodes tells each other directly + %% when they have loaded a table and are + %% willing to share it. + + try_schedule_late_disc_load(SafeLoads, initial, MsgTag). + + +cast(Msg) -> + case whereis(?SERVER_NAME) of + undefined ->{error, {node_not_running, node()}}; + Pid -> gen_server:cast(Pid, Msg) + end. + +abcast(Nodes, Msg) -> + gen_server:abcast(Nodes, ?SERVER_NAME, Msg). + +unsafe_call(Msg) -> + case whereis(?SERVER_NAME) of + undefined -> {error, {node_not_running, node()}}; + Pid -> gen_server:call(Pid, Msg, infinity) + end. + +call(Msg) -> + case whereis(?SERVER_NAME) of + undefined -> + {error, {node_not_running, node()}}; + Pid -> + link(Pid), + Res = gen_server:call(Pid, Msg, infinity), + unlink(Pid), + + %% We get an exit signal if server dies + receive + {'EXIT', Pid, _Reason} -> + {error, {node_not_running, node()}} + after 0 -> + Res + end + end. + +remote_call(Node, Func, Args) -> + case catch gen_server:call({?MODULE, Node}, {Func, Args, self()}, infinity) of + {'EXIT', Error} -> + {error, Error}; + Else -> + Else + end. + +multicall(Nodes, Msg) -> + {Good, Bad} = gen_server:multi_call(Nodes, ?MODULE, Msg, infinity), + PatchedGood = [Reply || {_Node, Reply} <- Good], + {PatchedGood, Bad}. %% Make the replies look like rpc:multicalls.. +%% rpc:multicall(Nodes, ?MODULE, call, [Msg]). + +%%%---------------------------------------------------------------------- +%%% Callback functions from gen_server +%%%---------------------------------------------------------------------- + +%%---------------------------------------------------------------------- +%% Func: init/1 +%% Returns: {ok, State} | +%% {ok, State, Timeout} | +%% {stop, Reason} +%%---------------------------------------------------------------------- +init([Parent]) -> + process_flag(trap_exit, true), + mnesia_lib:verbose("~p starting: ~p~n", [?SERVER_NAME, self()]), + + %% Handshake and initialize transaction recovery + %% for new nodes detected in the schema + All = mnesia_lib:all_nodes(), + Diff = All -- [node() | val(original_nodes)], + mnesia_lib:unset(original_nodes), + mnesia_recover:connect_nodes(Diff), + + Interval = mnesia_monitor:get_env(dump_log_time_threshold), + Msg = {async_dump_log, time_threshold}, + {ok, Ref} = timer:send_interval(Interval, Msg), + mnesia_dumper:start_regulator(), + + Empty = gb_trees:empty(), + {ok, #state{supervisor = Parent, dump_log_timer_ref = Ref, + loader_queue = Empty, + late_loader_queue = Empty}}. + +%%---------------------------------------------------------------------- +%% Func: handle_call/3 +%% Returns: {reply, Reply, State} | +%% {reply, Reply, State, Timeout} | +%% {noreply, State} | +%% {noreply, State, Timeout} | +%% {stop, Reason, Reply, State} | (terminate/2 is called) +%% {stop, Reason, Reply, State} (terminate/2 is called) +%%---------------------------------------------------------------------- + +handle_call({sync_dump_log, InitBy}, From, State) -> + Worker = #dump_log{initiated_by = InitBy, + opt_reply_to = From + }, + State2 = add_worker(Worker, State), + noreply(State2); + +handle_call(wait_for_schema_commit_lock, From, State) -> + Worker = #schema_commit_lock{owner = From}, + State2 = add_worker(Worker, State), + noreply(State2); + +handle_call(block_controller, From, State) -> + Worker = #block_controller{owner = From}, + State2 = add_worker(Worker, State), + noreply(State2); + +handle_call({update,Fun}, From, State) -> + Res = (catch Fun()), + reply(From, Res), + noreply(State); + +handle_call(get_cstructs, From, State) -> + Tabs = val({schema, tables}), + Cstructs = [val({T, cstruct}) || T <- Tabs], + Running = val({current, db_nodes}), + reply(From, {cstructs, Cstructs, Running}), + noreply(State); + +handle_call({schema_is_merged, [], late_merge, []}, From, + State = #state{schema_is_merged = Merged}) -> + case Merged of + {false, Node} when Node == node(From) -> + Msgs = State#state.early_msgs, + State1 = State#state{early_msgs = [], schema_is_merged = true}, + handle_early_msgs(lists:reverse(Msgs), State1); + _ -> + %% Ooops this came to early, before we have merged :-) + %% or it came to late or from a node we don't care about + reply(From, ignore), + noreply(State) + end; + +handle_call({schema_is_merged, TabsR, Reason, RemoteLoaders}, From, State) -> + State2 = late_disc_load(TabsR, Reason, RemoteLoaders, From, State), + + %% Handle early messages + Msgs = State2#state.early_msgs, + State3 = State2#state{early_msgs = [], schema_is_merged = true}, + handle_early_msgs(lists:reverse(Msgs), State3); + +handle_call(disc_load_intents,From,State = #state{loader_queue=LQ,late_loader_queue=LLQ}) -> + LQTabs = gb_trees:keys(LQ), + LLQTabs = gb_trees:keys(LLQ), + ActiveTabs = lists:sort(mnesia_lib:local_active_tables()), + reply(From, {ok, node(), ordsets:union([LQTabs,LLQTabs,ActiveTabs])}), + noreply(State); + +handle_call({update_where_to_write, [add, Tab, AddNode], _From}, _Dummy, State) -> + Current = val({current, db_nodes}), + Res = + case lists:member(AddNode, Current) and + (State#state.schema_is_merged == true) of + true -> + mnesia_lib:add_lsort({Tab, where_to_write}, AddNode); + false -> + ignore + end, + {reply, Res, State}; + +handle_call({add_active_replica, [Tab, ToNode, RemoteS, AccessMode], From}, + ReplyTo, State) -> + KnownNode = lists:member(ToNode, val({current, db_nodes})), + Merged = State#state.schema_is_merged, + if + KnownNode == false -> + reply(ReplyTo, ignore), + noreply(State); + Merged == true -> + Res = case ?catch_val({Tab, cstruct}) of + {'EXIT', _} -> %% Tab deleted + deleted; + _ -> + add_active_replica(Tab, ToNode, RemoteS, AccessMode) + end, + reply(ReplyTo, Res), + noreply(State); + true -> %% Schema is not merged + Msg = {add_active_replica, [Tab, ToNode, RemoteS, AccessMode], From}, + Msgs = State#state.early_msgs, + reply(ReplyTo, ignore), %% Reply ignore and add data after schema merge + noreply(State#state{early_msgs = [{call, Msg, undefined} | Msgs]}) + end; + +handle_call({unannounce_add_table_copy, [Tab, Node], From}, ReplyTo, State) -> + KnownNode = lists:member(node(From), val({current, db_nodes})), + Merged = State#state.schema_is_merged, + if + KnownNode == false -> + reply(ReplyTo, ignore), + noreply(State); + Merged == true -> + Res = unannounce_add_table_copy(Tab, Node), + reply(ReplyTo, Res), + noreply(State); + true -> %% Schema is not merged + Msg = {unannounce_add_table_copy, [Tab, Node], From}, + Msgs = State#state.early_msgs, + reply(ReplyTo, ignore), %% Reply ignore and add data after schema merge + %% Set ReplyTO to undefined so we don't reply twice + noreply(State#state{early_msgs = [{call, Msg, undefined} | Msgs]}) + end; + +handle_call({net_load, Tab, Cs}, From, State) -> + State2 = + case State#state.schema_is_merged of + true -> + Worker = #net_load{table = Tab, + opt_reply_to = From, + reason = {dumper,add_table_copy}, + cstruct = Cs + }, + add_worker(Worker, State); + false -> + reply(From, {not_loaded, schema_not_merged}), + State + end, + noreply(State2); + +handle_call(Msg, From, State) when State#state.schema_is_merged /= true -> + %% Buffer early messages + Msgs = State#state.early_msgs, + noreply(State#state{early_msgs = [{call, Msg, From} | Msgs]}); + +handle_call({late_disc_load, Tabs, Reason, RemoteLoaders}, From, State) -> + State2 = late_disc_load(Tabs, Reason, RemoteLoaders, From, State), + noreply(State2); + +handle_call({unblock_table, Tab}, _Dummy, State) -> + Var = {Tab, where_to_commit}, + case val(Var) of + {blocked, List} -> + set(Var, List); % where_to_commit + _ -> + ignore + end, + {reply, ok, State}; + +handle_call({block_table, [Tab], From}, _Dummy, State) -> + case lists:member(node(From), val({current, db_nodes})) of + true -> + block_table(Tab); + false -> + ignore + end, + {reply, ok, State}; + +handle_call({check_w2r, _Node, Tab}, _From, State) -> + {reply, val({Tab, where_to_read}), State}; + +handle_call({add_other, Who}, _From, State = #state{others=Others0}) -> + Others = [Who|Others0], + {reply, ok, State#state{others=Others}}; +handle_call({del_other, Who}, _From, State = #state{others=Others0}) -> + Others = lists:delete(Who, Others0), + {reply, ok, State#state{others=Others}}; + +handle_call(Msg, _From, State) -> + error("~p got unexpected call: ~p~n", [?SERVER_NAME, Msg]), + noreply(State). + +late_disc_load(TabsR, Reason, RemoteLoaders, From, + State = #state{loader_queue = LQ, late_loader_queue = LLQ}) -> + verbose("Intend to load tables: ~p~n", [TabsR]), + ?eval_debug_fun({?MODULE, late_disc_load}, + [{tabs, TabsR}, + {reason, Reason}, + {loaders, RemoteLoaders}]), + + reply(From, queued), + %% RemoteLoaders is a list of {ok, Node, Tabs} tuples + + %% Remove deleted tabs and queued/loaded + LocalTabs = gb_sets:from_ordset(lists:sort(mnesia_lib:val({schema,local_tables}))), + Filter = fun(TabInfo0, Acc) -> + TabInfo = {Tab,_} = + case TabInfo0 of + {_,_} -> TabInfo0; + TabN -> {TabN,Reason} + end, + case gb_sets:is_member(Tab, LocalTabs) of + true -> + case ?catch_val({Tab, where_to_read}) == node() of + true -> Acc; + false -> + case gb_trees:is_defined(Tab,LQ) of + true -> Acc; + false -> [TabInfo | Acc] + end + end; + false -> Acc + end + end, + + Tabs = lists:foldl(Filter, [], TabsR), + + Nodes = val({current, db_nodes}), + LateQueue = late_loaders(Tabs, RemoteLoaders, Nodes, LLQ), + State#state{late_loader_queue = LateQueue}. + +late_loaders([{Tab, Reason} | Tabs], RemoteLoaders, Nodes, LLQ) -> + case gb_trees:is_defined(Tab, LLQ) of + false -> + LoadNodes = late_load_filter(RemoteLoaders, Tab, Nodes, []), + case LoadNodes of + [] -> cast({disc_load, Tab, Reason}); % Ugly cast + _ -> ignore + end, + LateLoad = #late_load{table=Tab,loaders=LoadNodes,reason=Reason}, + late_loaders(Tabs, RemoteLoaders, Nodes, gb_trees:insert(Tab,LateLoad,LLQ)); + true -> + late_loaders(Tabs, RemoteLoaders, Nodes, LLQ) + end; +late_loaders([], _RemoteLoaders, _Nodes, LLQ) -> + LLQ. + +late_load_filter([{error, _} | RemoteLoaders], Tab, Nodes, Acc) -> + late_load_filter(RemoteLoaders, Tab, Nodes, Acc); +late_load_filter([{badrpc, _} | RemoteLoaders], Tab, Nodes, Acc) -> + late_load_filter(RemoteLoaders, Tab, Nodes, Acc); +late_load_filter([RL | RemoteLoaders], Tab, Nodes, Acc) -> + {ok, Node, Intents} = RL, + Access = val({Tab, access_mode}), + LocalC = val({Tab, local_content}), + StillActive = lists:member(Node, Nodes), + RemoteIntent = lists:member(Tab, Intents), + if + Access == read_write, + LocalC == false, + StillActive == true, + RemoteIntent == true -> + Masters = mnesia_recover:get_master_nodes(Tab), + case lists:member(Node, Masters) of + true -> + %% The other node is master node for + %% the table, accept his load intent + late_load_filter(RemoteLoaders, Tab, Nodes, [Node | Acc]); + false when Masters == [] -> + %% The table has no master nodes + %% accept his load intent + late_load_filter(RemoteLoaders, Tab, Nodes, [Node | Acc]); + false -> + %% Some one else is master node for + %% the table, ignore his load intent + late_load_filter(RemoteLoaders, Tab, Nodes, Acc) + end; + true -> + late_load_filter(RemoteLoaders, Tab, Nodes, Acc) + end; +late_load_filter([], _Tab, _Nodes, Acc) -> + Acc. + +%%---------------------------------------------------------------------- +%% Func: handle_cast/2 +%% Returns: {noreply, State} | +%% {noreply, State, Timeout} | +%% {stop, Reason, State} (terminate/2 is called) +%%---------------------------------------------------------------------- + +handle_cast({release_schema_commit_lock, _Owner}, State) -> + if + State#state.is_stopping == true -> + {stop, shutdown, State}; + true -> + case State#state.dumper_queue of + [#schema_commit_lock{}|Rest] -> + [_Worker | Rest] = State#state.dumper_queue, + State2 = State#state{dumper_pid = undefined, + dumper_queue = Rest}, + State3 = opt_start_worker(State2), + noreply(State3); + _ -> + noreply(State) + end + end; + +handle_cast(unblock_controller, State) -> + if + State#state.is_stopping == true -> + {stop, shutdown, State}; + is_record(hd(State#state.dumper_queue), block_controller) -> + [_Worker | Rest] = State#state.dumper_queue, + State2 = State#state{dumper_pid = undefined, + dumper_queue = Rest}, + State3 = opt_start_worker(State2), + noreply(State3) + end; + +handle_cast({mnesia_down, Node}, State) -> + maybe_log_mnesia_down(Node), + mnesia_lib:del({current, db_nodes}, Node), + mnesia_checkpoint:tm_mnesia_down(Node), + Alltabs = val({schema, tables}), + reconfigure_tables(Node, Alltabs), + %% Done from (external point of view) + mnesia_monitor:mnesia_down(?SERVER_NAME, Node), + + %% Fix if we are late_merging against the node that went down + case State#state.schema_is_merged of + {false, Node} -> + spawn(?MODULE, call, [{schema_is_merged, [], late_merge, []}]); + _ -> + ignore + end, + + %% Fix internal stuff + LateQ = remove_loaders(Alltabs, Node, State#state.late_loader_queue), + + case get_senders(State) ++ get_loaders(State) of + [] -> ignore; + Senders -> + lists:foreach(fun({Pid,_}) -> Pid ! {copier_done, Node} end, + Senders) + end, + lists:foreach(fun(Pid) -> Pid ! {copier_done,Node} end, + State#state.others), + + Remove = fun(ST) -> + node(ST#send_table.receiver_pid) /= Node + end, + NewSenders = lists:filter(Remove, State#state.sender_queue), + Early = remove_early_messages(State#state.early_msgs, Node), + noreply(State#state{sender_queue = NewSenders, + early_msgs = Early, + late_loader_queue = LateQ + }); + +handle_cast({merging_schema, Node}, State) -> + case State#state.schema_is_merged of + false -> + %% This comes from dynamic connect_nodes which are made + %% after mnesia:start() and the schema_merge. + ImANewKidInTheBlock = + (val({schema, storage_type}) == ram_copies) + andalso (mnesia_lib:val({schema, local_tables}) == [schema]), + case ImANewKidInTheBlock of + true -> %% I'm newly started ram_node.. + noreply(State#state{schema_is_merged = {false, Node}}); + false -> + noreply(State) + end; + _ -> %% Already merging schema. + noreply(State) + end; + +handle_cast(Msg, State) when State#state.schema_is_merged /= true -> + %% Buffer early messages + Msgs = State#state.early_msgs, + noreply(State#state{early_msgs = [{cast, Msg} | Msgs]}); + +%% This must be done after schema_is_merged otherwise adopt_orphan +%% might trigger a table load from wrong nodes as a result of that we don't +%% know which tables we can load safly first. +handle_cast({im_running, _Node, NewFriends}, State) -> + LocalTabs = mnesia_lib:local_active_tables() -- [schema], + RemoveLocalOnly = fun(Tab) -> not val({Tab, local_content}) end, + Tabs = lists:filter(RemoveLocalOnly, LocalTabs), + Ns = mnesia_lib:intersect(NewFriends, val({current, db_nodes})), + abcast(Ns, {adopt_orphans, node(), Tabs}), + noreply(State); + +handle_cast({disc_load, Tab, Reason}, State) -> + Worker = #disc_load{table = Tab, reason = Reason}, + State2 = add_worker(Worker, State), + noreply(State2); + +handle_cast(Worker = #send_table{}, State) -> + State2 = add_worker(Worker, State), + noreply(State2); + +handle_cast({sync_tabs, Tabs, From}, State) -> + %% user initiated wait_for_tables + handle_sync_tabs(Tabs, From), + noreply(State); + +handle_cast({i_have_tab, Tab, Node}, State) -> + case lists:member(Node, val({current, db_nodes})) of + true -> + State2 = node_has_tabs([Tab], Node, State), + noreply(State2); + false -> + noreply(State) + end; + +handle_cast({force_load_updated, Tab}, State) -> + case val({Tab, active_replicas}) of + [] -> + %% No valid replicas + noreply(State); + [SomeNode | _] -> + State2 = node_has_tabs([Tab], SomeNode, State), + noreply(State2) + end; + +handle_cast({master_nodes_updated, Tab, Masters}, State) -> + Active = val({Tab, active_replicas}), + Valid = + case val({Tab, load_by_force}) of + true -> + Active; + false -> + if + Masters == [] -> + Active; + true -> + mnesia_lib:intersect(Masters, Active) + end + end, + case Valid of + [] -> + %% No valid replicas + noreply(State); + [SomeNode | _] -> + State2 = node_has_tabs([Tab], SomeNode, State), + noreply(State2) + end; + +handle_cast({adopt_orphans, Node, Tabs}, State) -> + + State2 = node_has_tabs(Tabs, Node, State), + + %% Register the other node as up and running + mnesia_recover:log_mnesia_up(Node), + verbose("Logging mnesia_up ~w~n",[Node]), + mnesia_lib:report_system_event({mnesia_up, Node}), + + %% Load orphan tables + LocalTabs = val({schema, local_tables}) -- [schema], + Nodes = val({current, db_nodes}), + {LocalOrphans, RemoteMasters} = + orphan_tables(LocalTabs, Node, Nodes, [], []), + Reason = {adopt_orphan, node()}, + mnesia_late_loader:async_late_disc_load(node(), LocalOrphans, Reason), + + Fun = + fun(N) -> + RemoteOrphans = + [Tab || {Tab, Ns} <- RemoteMasters, + lists:member(N, Ns)], + mnesia_late_loader:maybe_async_late_disc_load(N, RemoteOrphans, Reason) + end, + lists:foreach(Fun, Nodes), + noreply(State2); + +handle_cast(Msg, State) -> + error("~p got unexpected cast: ~p~n", [?SERVER_NAME, Msg]), + noreply(State). + +handle_sync_tabs([Tab | Tabs], From) -> + case val({Tab, where_to_read}) of + nowhere -> + case get({sync_tab, Tab}) of + undefined -> + put({sync_tab, Tab}, [From]); + Pids -> + put({sync_tab, Tab}, [From | Pids]) + end; + _ -> + sync_reply(From, Tab) + end, + handle_sync_tabs(Tabs, From); +handle_sync_tabs([], _From) -> + ok. + +%%---------------------------------------------------------------------- +%% Func: handle_info/2 +%% Returns: {noreply, State} | +%% {noreply, State, Timeout} | +%% {stop, Reason, State} (terminate/2 is called) +%%---------------------------------------------------------------------- + +handle_info({async_dump_log, InitBy}, State) -> + Worker = #dump_log{initiated_by = InitBy}, + State2 = add_worker(Worker, State), + noreply(State2); + +handle_info(#dumper_done{worker_pid=Pid, worker_res=Res}, State) -> + if + State#state.is_stopping == true -> + {stop, shutdown, State}; + Res == dumped, Pid == State#state.dumper_pid -> + [Worker | Rest] = State#state.dumper_queue, + reply(Worker#dump_log.opt_reply_to, Res), + State2 = State#state{dumper_pid = undefined, + dumper_queue = Rest}, + State3 = opt_start_worker(State2), + noreply(State3); + true -> + fatal("Dumper failed: ~p~n state: ~p~n", [Res, State]), + {stop, fatal, State} + end; + +handle_info(Done = #loader_done{worker_pid=WPid, table_name=Tab}, State0) -> + LateQueue0 = State0#state.late_loader_queue, + State1 = State0#state{loader_pid = lists:keydelete(WPid,1,get_loaders(State0))}, + + State2 = + case Done#loader_done.is_loaded of + true -> + %% Optional table announcement + if + Done#loader_done.needs_announce == true, + Done#loader_done.needs_reply == true -> + i_have_tab(Tab), + %% Should be {dumper,add_table_copy} only + reply(Done#loader_done.reply_to, + Done#loader_done.reply); + Done#loader_done.needs_reply == true -> + %% Should be {dumper,add_table_copy} only + reply(Done#loader_done.reply_to, + Done#loader_done.reply); + Done#loader_done.needs_announce == true, Tab == schema -> + i_have_tab(Tab); + Done#loader_done.needs_announce == true -> + i_have_tab(Tab), + %% Local node needs to perform user_sync_tab/1 + Ns = val({current, db_nodes}), + abcast(Ns, {i_have_tab, Tab, node()}); + Tab == schema -> + ignore; + true -> + %% Local node needs to perform user_sync_tab/1 + Ns = val({current, db_nodes}), + AlreadyKnows = val({Tab, active_replicas}), + abcast(Ns -- AlreadyKnows, {i_have_tab, Tab, node()}) + end, + %% Optional user sync + case Done#loader_done.needs_sync of + true -> user_sync_tab(Tab); + false -> ignore + end, + State1#state{late_loader_queue=gb_trees:delete_any(Tab, LateQueue0)}; + false -> + %% Either the node went down or table was not + %% loaded remotly yet + case Done#loader_done.needs_reply of + true -> + reply(Done#loader_done.reply_to, + Done#loader_done.reply); + false -> + ignore + end, + case ?catch_val({Tab, active_replicas}) of + [_|_] -> % still available elsewhere + {value,{_,Worker}} = lists:keysearch(WPid,1,get_loaders(State0)), + add_loader(Tab,Worker,State1); + _ -> + State1 + end + end, + State3 = opt_start_worker(State2), + noreply(State3); + +handle_info(#sender_done{worker_pid=Pid, worker_res=Res}, State) -> + Senders = get_senders(State), + {value, {Pid,_Worker}} = lists:keysearch(Pid, 1, Senders), + if + Res == ok -> + State2 = State#state{sender_pid = lists:keydelete(Pid, 1, Senders)}, + State3 = opt_start_worker(State2), + noreply(State3); + true -> + %% No need to send any message to the table receiver + %% since it will soon get a mnesia_down anyway + fatal("Sender failed: ~p~n state: ~p~n", [Res, State]), + {stop, fatal, State} + end; + +handle_info({'EXIT', Pid, R}, State) when Pid == State#state.supervisor -> + catch set(mnesia_status, stopping), + case State#state.dumper_pid of + undefined -> + dbg_out("~p was ~p~n", [?SERVER_NAME, R]), + {stop, shutdown, State}; + _ -> + noreply(State#state{is_stopping = true}) + end; + +handle_info({'EXIT', Pid, R}, State) when Pid == State#state.dumper_pid -> + case State#state.dumper_queue of + [#schema_commit_lock{}|Workers] -> %% Schema trans crashed or was killed + dbg_out("WARNING: Dumper ~p exited ~p~n", [Pid, R]), + State2 = State#state{dumper_queue = Workers, dumper_pid = undefined}, + State3 = opt_start_worker(State2), + noreply(State3); + _Other -> + fatal("Dumper or schema commit crashed: ~p~n state: ~p~n", [R, State]), + {stop, fatal, State} + end; + +handle_info(Msg = {'EXIT', Pid, R}, State) when R /= wait_for_tables_timeout -> + case lists:keymember(Pid, 1, get_senders(State)) of + true -> + %% No need to send any message to the table receiver + %% since it will soon get a mnesia_down anyway + fatal("Sender crashed: ~p~n state: ~p~n", [{Pid,R}, State]), + {stop, fatal, State}; + false -> + case lists:keymember(Pid, 1, get_loaders(State)) of + true -> + fatal("Loader crashed: ~p~n state: ~p~n", [R, State]), + {stop, fatal, State}; + false -> + error("~p got unexpected info: ~p~n", [?SERVER_NAME, Msg]), + noreply(State) + end + end; + +handle_info({From, get_state}, State) -> + From ! {?SERVER_NAME, State}, + noreply(State); + +%% No real need for buffering +handle_info(Msg, State) when State#state.schema_is_merged /= true -> + %% Buffer early messages + Msgs = State#state.early_msgs, + noreply(State#state{early_msgs = [{info, Msg} | Msgs]}); + +handle_info({'EXIT', Pid, wait_for_tables_timeout}, State) -> + sync_tab_timeout(Pid, get()), + noreply(State); + +handle_info(Msg, State) -> + error("~p got unexpected info: ~p~n", [?SERVER_NAME, Msg]), + noreply(State). + +sync_tab_timeout(Pid, [{{sync_tab, Tab}, Pids} | Tail]) -> + case lists:delete(Pid, Pids) of + [] -> + erase({sync_tab, Tab}); + Pids2 -> + put({sync_tab, Tab}, Pids2) + end, + sync_tab_timeout(Pid, Tail); +sync_tab_timeout(Pid, [_ | Tail]) -> + sync_tab_timeout(Pid, Tail); +sync_tab_timeout(_Pid, []) -> + ok. + +%% Pick the load record that has the highest load order +%% Returns {BestLoad, RemainingQueue} or {none, []} if queue is empty +pick_next(Queue) -> + List = gb_trees:values(Queue), + case pick_next(List, none, none) of + none -> {none, gb_trees:empty()}; + {Tab, Worker} -> {Worker, gb_trees:delete(Tab,Queue)} + end. + +pick_next([Head = #net_load{table=Tab}| Tail], Load, Order) -> + select_best(Head, Tail, ?catch_val({Tab, load_order}), Load, Order); +pick_next([Head = #disc_load{table=Tab}| Tail], Load, Order) -> + select_best(Head, Tail, ?catch_val({Tab, load_order}), Load, Order); +pick_next([], none, _Order) -> + none; +pick_next([], Load, _Order) -> + {element(2,Load), Load}. + +select_best(_Head, Tail, {'EXIT', _WHAT}, Load, Order) -> + %% Table have been deleted drop it. + pick_next(Tail, Load, Order); +select_best(Load, Tail, Order, none, none) -> + pick_next(Tail, Load, Order); +select_best(Load, Tail, Order, _OldLoad, OldOrder) when Order > OldOrder -> + pick_next(Tail, Load, Order); +select_best(_Load, Tail, _Order, OldLoad, OldOrder) -> + pick_next(Tail, OldLoad, OldOrder). + +%%---------------------------------------------------------------------- +%% Func: terminate/2 +%% Purpose: Shutdown the server +%% Returns: any (ignored by gen_server) +%%---------------------------------------------------------------------- +terminate(Reason, State) -> + mnesia_monitor:terminate_proc(?SERVER_NAME, Reason, State). + +%%---------------------------------------------------------------------- +%% Func: code_change/3 +%% Purpose: Upgrade process when its code is to be changed +%% Returns: {ok, NewState} +%%---------------------------------------------------------------------- +code_change(_OldVsn, State0, _Extra) -> + %% Loader Queue + State1 = case State0#state.loader_pid of + Pids when is_list(Pids) -> State0; + undefined -> State0#state{loader_pid = [],loader_queue=gb_trees:empty()}; + Pid when is_pid(Pid) -> + [Loader|Rest] = State0#state.loader_queue, + LQ0 = [{element(2,Rec),Rec} || Rec <- Rest], + LQ1 = lists:sort(LQ0), + LQ = gb_trees:from_orddict(LQ1), + State0#state{loader_pid=[{Pid,Loader}], loader_queue=LQ} + end, + %% LateLoaderQueue + State = if is_list(State1#state.late_loader_queue) -> + LLQ0 = State1#state.late_loader_queue, + LLQ1 = lists:sort([{element(2,Rec),Rec} || Rec <- LLQ0]), + LLQ = gb_trees:from_orddict(LLQ1), + State1#state{late_loader_queue=LLQ}; + true -> + State1 + end, + {ok, State}. + +%%%---------------------------------------------------------------------- +%%% Internal functions +%%%---------------------------------------------------------------------- + +maybe_log_mnesia_down(N) -> + %% We use mnesia_down when deciding which tables to load locally, + %% so if we are not running (i.e haven't decided which tables + %% to load locally), don't log mnesia_down yet. + case mnesia_lib:is_running() of + yes -> + verbose("Logging mnesia_down ~w~n", [N]), + mnesia_recover:log_mnesia_down(N), + ok; + _ -> + Filter = fun(Tab) -> + inactive_copy_holders(Tab, N) + end, + HalfLoadedTabs = lists:any(Filter, val({schema, local_tables}) -- [schema]), + if + HalfLoadedTabs == true -> + verbose("Logging mnesia_down ~w~n", [N]), + mnesia_recover:log_mnesia_down(N), + ok; + true -> + %% Unfortunately we have not loaded some common + %% tables yet, so we cannot rely on the nodedown + log_later %% BUGBUG handle this case!!! + end + end. + +inactive_copy_holders(Tab, Node) -> + Cs = val({Tab, cstruct}), + case mnesia_lib:cs_to_storage_type(Node, Cs) of + unknown -> + false; + _Storage -> + mnesia_lib:not_active_here(Tab) + end. + +orphan_tables([Tab | Tabs], Node, Ns, Local, Remote) -> + Cs = val({Tab, cstruct}), + CopyHolders = mnesia_lib:copy_holders(Cs), + RamCopyHolders = Cs#cstruct.ram_copies, + DiscCopyHolders = CopyHolders -- RamCopyHolders, + DiscNodes = val({schema, disc_copies}), + LocalContent = Cs#cstruct.local_content, + RamCopyHoldersOnDiscNodes = mnesia_lib:intersect(RamCopyHolders, DiscNodes), + Active = val({Tab, active_replicas}), + BeingCreated = (?catch_val({Tab, create_table}) == true), + Read = val({Tab, where_to_read}), + case lists:member(Node, DiscCopyHolders) of + _ when BeingCreated == true -> + orphan_tables(Tabs, Node, Ns, Local, Remote); + _ when Read == node() -> %% Allready loaded + orphan_tables(Tabs, Node, Ns, Local, Remote); + true when Active == [] -> + case DiscCopyHolders -- Ns of + [] -> + %% We're last up and the other nodes have not + %% loaded the table. Lets load it if we are + %% the smallest node. + case lists:min(DiscCopyHolders) of + Min when Min == node() -> + case mnesia_recover:get_master_nodes(Tab) of + [] -> + L = [Tab | Local], + orphan_tables(Tabs, Node, Ns, L, Remote); + Masters -> + R = [{Tab, Masters} | Remote], + orphan_tables(Tabs, Node, Ns, Local, R) + end; + _ -> + orphan_tables(Tabs, Node, Ns, Local, Remote) + end; + _ -> + orphan_tables(Tabs, Node, Ns, Local, Remote) + end; + false when Active == [], DiscCopyHolders == [], RamCopyHoldersOnDiscNodes == [] -> + %% Special case when all replicas resides on disc less nodes + orphan_tables(Tabs, Node, Ns, [Tab | Local], Remote); + _ when LocalContent == true -> + orphan_tables(Tabs, Node, Ns, [Tab | Local], Remote); + _ -> + orphan_tables(Tabs, Node, Ns, Local, Remote) + end; +orphan_tables([], _, _, LocalOrphans, RemoteMasters) -> + {LocalOrphans, RemoteMasters}. + +node_has_tabs([Tab | Tabs], Node, State) when Node /= node() -> + State2 = + case catch update_whereabouts(Tab, Node, State) of + State1 = #state{} -> State1; + {'EXIT', R} -> %% Tab was just deleted? + case ?catch_val({Tab, cstruct}) of + {'EXIT', _} -> State; % yes + _ -> erlang:error(R) + end + end, + node_has_tabs(Tabs, Node, State2); +node_has_tabs([Tab | Tabs], Node, State) -> + user_sync_tab(Tab), + node_has_tabs(Tabs, Node, State); +node_has_tabs([], _Node, State) -> + State. + +update_whereabouts(Tab, Node, State) -> + Storage = val({Tab, storage_type}), + Read = val({Tab, where_to_read}), + LocalC = val({Tab, local_content}), + BeingCreated = (?catch_val({Tab, create_table}) == true), + Masters = mnesia_recover:get_master_nodes(Tab), + ByForce = val({Tab, load_by_force}), + GoGetIt = + if + ByForce == true -> + true; + Masters == [] -> + true; + true -> + lists:member(Node, Masters) + end, + + dbg_out("Table ~w is loaded on ~w. s=~w, r=~w, lc=~w, f=~w, m=~w~n", + [Tab, Node, Storage, Read, LocalC, ByForce, GoGetIt]), + if + LocalC == true -> + %% Local contents, don't care about other node + State; + BeingCreated == true -> + %% The table is currently being created + %% It will be handled elsewhere + State; + Storage == unknown, Read == nowhere -> + %% No own copy, time to read remotely + %% if the other node is a good node + add_active_replica(Tab, Node), + case GoGetIt of + true -> + set({Tab, where_to_read}, Node), + user_sync_tab(Tab), + State; + false -> + State + end; + Storage == unknown -> + %% No own copy, continue to read remotely + add_active_replica(Tab, Node), + NodeST = mnesia_lib:storage_type_at_node(Node, Tab), + ReadST = mnesia_lib:storage_type_at_node(Read, Tab), + if %% Avoid reading from disc_only_copies + NodeST == disc_only_copies -> + ignore; + ReadST == disc_only_copies -> + mnesia_lib:set_remote_where_to_read(Tab); + true -> + ignore + end, + user_sync_tab(Tab), + State; + Read == nowhere -> + %% Own copy, go and get a copy of the table + %% if the other node is master or if there + %% are no master at all + add_active_replica(Tab, Node), + case GoGetIt of + true -> + Worker = #net_load{table = Tab, + reason = {active_remote, Node}}, + add_worker(Worker, State); + false -> + State + end; + true -> + %% We already have an own copy + add_active_replica(Tab, Node), + user_sync_tab(Tab), + State + end. + +initial_safe_loads() -> + case val({schema, storage_type}) of + ram_copies -> + Downs = [], + Tabs = val({schema, local_tables}) -- [schema], + LastC = fun(T) -> last_consistent_replica(T, Downs) end, + lists:zf(LastC, Tabs); + + disc_copies -> + Downs = mnesia_recover:get_mnesia_downs(), + dbg_out("mnesia_downs = ~p~n", [Downs]), + + Tabs = val({schema, local_tables}) -- [schema], + LastC = fun(T) -> last_consistent_replica(T, Downs) end, + lists:zf(LastC, Tabs) + end. + +last_consistent_replica(Tab, Downs) -> + Cs = val({Tab, cstruct}), + Storage = mnesia_lib:cs_to_storage_type(node(), Cs), + Ram = Cs#cstruct.ram_copies, + Disc = Cs#cstruct.disc_copies, + DiscOnly = Cs#cstruct.disc_only_copies, + BetterCopies0 = mnesia_lib:remote_copy_holders(Cs) -- Downs, + BetterCopies = BetterCopies0 -- Ram, + AccessMode = Cs#cstruct.access_mode, + Copies = mnesia_lib:copy_holders(Cs), + Masters = mnesia_recover:get_master_nodes(Tab), + LocalMaster0 = lists:member(node(), Masters), + LocalContent = Cs#cstruct.local_content, + RemoteMaster = + if + Masters == [] -> false; + true -> not LocalMaster0 + end, + LocalMaster = + if + Masters == [] -> false; + true -> LocalMaster0 + end, + if + Copies == [node()] -> + %% Only one copy holder and it is local. + %% It may also be a local contents table + {true, {Tab, local_only}}; + LocalContent == true -> + {true, {Tab, local_content}}; + LocalMaster == true -> + %% We have a local master + {true, {Tab, local_master}}; + RemoteMaster == true -> + %% Wait for remote master copy + false; + Storage == ram_copies -> + if + Disc == [], DiscOnly == [] -> + %% Nobody has copy on disc + {true, {Tab, ram_only}}; + true -> + %% Some other node has copy on disc + false + end; + AccessMode == read_only -> + %% No one has been able to update the table, + %% i.e. all disc resident copies are equal + {true, {Tab, read_only}}; + BetterCopies /= [], Masters /= [node()] -> + %% There are better copies on other nodes + %% and we do not have the only master copy + false; + true -> + {true, {Tab, initial}} + end. + +reconfigure_tables(N, [Tab |Tail]) -> + del_active_replica(Tab, N), + case val({Tab, where_to_read}) of + N -> mnesia_lib:set_remote_where_to_read(Tab); + _ -> ignore + end, + reconfigure_tables(N, Tail); +reconfigure_tables(_, []) -> + ok. + +remove_loaders([Tab| Tabs], N, Loaders) -> + LateQ = drop_loaders(Tab, N, Loaders), + remove_loaders(Tabs, N, LateQ); +remove_loaders([],_, LateQ) -> LateQ. + +remove_early_messages([], _Node) -> + []; +remove_early_messages([{call, {add_active_replica, [_, Node, _, _], _}, _}|R], Node) -> + remove_early_messages(R, Node); %% Does a reply before queuing +remove_early_messages([{call, {block_table, _, From}, ReplyTo}|R], Node) + when node(From) == Node -> + reply(ReplyTo, ok), %% Remove gen:server waits.. + remove_early_messages(R, Node); +remove_early_messages([{cast, {i_have_tab, _Tab, Node}}|R], Node) -> + remove_early_messages(R, Node); +remove_early_messages([{cast, {adopt_orphans, Node, _Tabs}}|R], Node) -> + remove_early_messages(R, Node); +remove_early_messages([M|R],Node) -> + [M|remove_early_messages(R,Node)]. + +%% Drop loader from late load queue and possibly trigger a disc_load +drop_loaders(Tab, Node, LLQ) -> + case gb_trees:lookup(Tab,LLQ) of + none -> + LLQ; + {value, H} -> + %% Check if it is time to issue a disc_load request + case H#late_load.loaders of + [Node] -> + Reason = {H#late_load.reason, last_loader_down, Node}, + cast({disc_load, Tab, Reason}); % Ugly cast + _ -> + ignore + end, + %% Drop the node from the list of loaders + H2 = H#late_load{loaders = H#late_load.loaders -- [Node]}, + gb_trees:update(Tab, H2, LLQ) + end. + +add_active_replica(Tab, Node) -> + add_active_replica(Tab, Node, val({Tab, cstruct})). + +add_active_replica(Tab, Node, Cs = #cstruct{}) -> + Storage = mnesia_lib:schema_cs_to_storage_type(Node, Cs), + AccessMode = Cs#cstruct.access_mode, + add_active_replica(Tab, Node, Storage, AccessMode). + +%% Block table primitives + +block_table(Tab) -> + Var = {Tab, where_to_commit}, + Old = val(Var), + New = {blocked, Old}, + set(Var, New). % where_to_commit + +unblock_table(Tab) -> + call({unblock_table, Tab}). + +is_tab_blocked(W2C) when is_list(W2C) -> + {false, W2C}; +is_tab_blocked({blocked, W2C}) when is_list(W2C) -> + {true, W2C}. + +mark_blocked_tab(true, Value) -> + {blocked, Value}; +mark_blocked_tab(false, Value) -> + Value. + +%% + +add_active_replica(Tab, Node, Storage, AccessMode) -> + Var = {Tab, where_to_commit}, + {Blocked, Old} = is_tab_blocked(val(Var)), + Del = lists:keydelete(Node, 1, Old), + case AccessMode of + read_write -> + New = lists:sort([{Node, Storage} | Del]), + set(Var, mark_blocked_tab(Blocked, New)), % where_to_commit + mnesia_lib:add_lsort({Tab, where_to_write}, Node); + read_only -> + set(Var, mark_blocked_tab(Blocked, Del)), + mnesia_lib:del({Tab, where_to_write}, Node) + end, + add({Tab, active_replicas}, Node). + +del_active_replica(Tab, Node) -> + Var = {Tab, where_to_commit}, + {Blocked, Old} = is_tab_blocked(val(Var)), + Del = lists:keydelete(Node, 1, Old), + New = lists:sort(Del), + set(Var, mark_blocked_tab(Blocked, New)), % where_to_commit + mnesia_lib:del({Tab, active_replicas}, Node), + mnesia_lib:del({Tab, where_to_write}, Node). + +change_table_access_mode(Cs) -> + W = fun() -> + Tab = Cs#cstruct.name, + lists:foreach(fun(N) -> add_active_replica(Tab, N, Cs) end, + val({Tab, active_replicas})) + end, + update(W). + + +%% node To now has tab loaded, but this must be undone +%% This code is rpc:call'ed from the tab_copier process +%% when it has *not* released it's table lock +unannounce_add_table_copy(Tab, To) -> + catch del_active_replica(Tab, To), + case catch val({Tab , where_to_read}) of + To -> + mnesia_lib:set_remote_where_to_read(Tab); + _ -> + ignore + end. + +user_sync_tab(Tab) -> + case val(debug) of + trace -> + mnesia_subscr:subscribe(whereis(mnesia_event), {table, Tab}); + _ -> + ignore + end, + + case erase({sync_tab, Tab}) of + undefined -> + ok; + Pids -> + lists:foreach(fun(Pid) -> sync_reply(Pid, Tab) end, Pids) + end. + +i_have_tab(Tab) -> + case val({Tab, local_content}) of + true -> + mnesia_lib:set_local_content_whereabouts(Tab); + false -> + set({Tab, where_to_read}, node()) + end, + add_active_replica(Tab, node()). + +sync_and_block_table_whereabouts(Tab, ToNode, RemoteS, AccessMode) when Tab /= schema -> + Current = val({current, db_nodes}), + Ns = + case lists:member(ToNode, Current) of + true -> Current -- [ToNode]; + false -> Current + end, + remote_call(ToNode, block_table, [Tab]), + [remote_call(Node, add_active_replica, [Tab, ToNode, RemoteS, AccessMode]) || + Node <- [ToNode | Ns]], + ok. + +sync_del_table_copy_whereabouts(Tab, ToNode) when Tab /= schema -> + Current = val({current, db_nodes}), + Ns = + case lists:member(ToNode, Current) of + true -> Current; + false -> [ToNode | Current] + end, + Args = [Tab, ToNode], + [remote_call(Node, unannounce_add_table_copy, Args) || Node <- Ns], + ok. + +get_info(Timeout) -> + case whereis(?SERVER_NAME) of + undefined -> + {timeout, Timeout}; + Pid -> + Pid ! {self(), get_state}, + receive + {?SERVER_NAME, State = #state{loader_queue=LQ,late_loader_queue=LLQ}} -> + {info,State#state{loader_queue=gb_trees:to_list(LQ), + late_loader_queue=gb_trees:to_list(LLQ)}} + after Timeout -> + {timeout, Timeout} + end + end. + +get_workers(Timeout) -> + case whereis(?SERVER_NAME) of + undefined -> + {timeout, Timeout}; + Pid -> + Pid ! {self(), get_state}, + receive + {?SERVER_NAME, State = #state{}} -> + {workers, get_loaders(State), get_senders(State), State#state.dumper_pid} + after Timeout -> + {timeout, Timeout} + end + end. + +info() -> + Tabs = mnesia_lib:local_active_tables(), + io:format( "---> Active tables <--- ~n", []), + info(Tabs). + +info([Tab | Tail]) -> + case val({Tab, storage_type}) of + disc_only_copies -> + info_format(Tab, + dets:info(Tab, size), + dets:info(Tab, file_size), + "bytes on disc"); + _ -> + info_format(Tab, + ?ets_info(Tab, size), + ?ets_info(Tab, memory), + "words of mem") + end, + info(Tail); +info([]) -> ok. + + +info_format(Tab, Size, Mem, Media) -> + StrT = mnesia_lib:pad_name(atom_to_list(Tab), 15, []), + StrS = mnesia_lib:pad_name(integer_to_list(Size), 8, []), + StrM = mnesia_lib:pad_name(integer_to_list(Mem), 8, []), + io:format("~s: with ~s records occupying ~s ~s~n", + [StrT, StrS, StrM, Media]). + +%% Handle early arrived messages +handle_early_msgs([Msg | Msgs], State) -> + %% The messages are in reverse order + case handle_early_msg(Msg, State) of +%% {stop, Reason, Reply, State2} -> % Will not happen according to dialyzer +%% {stop, Reason, Reply, State2}; + {stop, Reason, State2} -> + {stop, Reason, State2}; + {noreply, State2} -> + handle_early_msgs(Msgs, State2); + {reply, Reply, State2} -> + {call, _Call, From} = Msg, + reply(From, Reply), + handle_early_msgs(Msgs, State2) + end; +handle_early_msgs([], State) -> + noreply(State). + +handle_early_msg({call, Msg, From}, State) -> + handle_call(Msg, From, State); +handle_early_msg({cast, Msg}, State) -> + handle_cast(Msg, State); +handle_early_msg({info, Msg}, State) -> + handle_info(Msg, State). + +noreply(State) -> + {noreply, State}. + +reply(undefined, Reply) -> + Reply; +reply(ReplyTo, Reply) -> + gen_server:reply(ReplyTo, Reply), + Reply. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Worker management + +%% Returns new State +add_worker(Worker = #dump_log{}, State) -> + InitBy = Worker#dump_log.initiated_by, + Queue = State#state.dumper_queue, + case lists:keymember(InitBy, #dump_log.initiated_by, Queue) of + true when Worker#dump_log.opt_reply_to == undefined -> + %% The same threshold has been exceeded again, + %% before we have had the possibility to + %% process the older one. + DetectedBy = {dump_log, InitBy}, + Event = {mnesia_overload, DetectedBy}, + mnesia_lib:report_system_event(Event); + _ -> + ignore + end, + Queue2 = Queue ++ [Worker], + State2 = State#state{dumper_queue = Queue2}, + opt_start_worker(State2); +add_worker(Worker = #schema_commit_lock{}, State) -> + Queue = State#state.dumper_queue, + Queue2 = Queue ++ [Worker], + State2 = State#state{dumper_queue = Queue2}, + opt_start_worker(State2); +add_worker(Worker = #net_load{}, State) -> + opt_start_worker(add_loader(Worker#net_load.table,Worker,State)); +add_worker(Worker = #send_table{}, State) -> + Queue = State#state.sender_queue, + State2 = State#state{sender_queue = Queue ++ [Worker]}, + opt_start_worker(State2); +add_worker(Worker = #disc_load{}, State) -> + opt_start_worker(add_loader(Worker#disc_load.table,Worker,State)); +% Block controller should be used for upgrading mnesia. +add_worker(Worker = #block_controller{}, State) -> + Queue = State#state.dumper_queue, + Queue2 = [Worker | Queue], + State2 = State#state{dumper_queue = Queue2}, + opt_start_worker(State2). + +add_loader(Tab,Worker,State = #state{loader_queue=LQ0}) -> + case gb_trees:is_defined(Tab, LQ0) of + true -> State; + false -> + LQ=gb_trees:insert(Tab, Worker, LQ0), + State#state{loader_queue=LQ} + end. + +%% Optionally start a worker +%% +%% Dumpers and loaders may run simultaneously +%% but neither of them may run during schema commit. +%% Loaders may not start if a schema commit is enqueued. +opt_start_worker(State) when State#state.is_stopping == true -> + State; +opt_start_worker(State) -> + %% Prioritize dumper and schema commit + %% by checking them first + case State#state.dumper_queue of + [Worker | _Rest] when State#state.dumper_pid == undefined -> + %% Great, a worker in queue and neither + %% a schema transaction is being + %% committed and nor a dumper is running + + %% Start worker but keep him in the queue + if + is_record(Worker, schema_commit_lock) -> + ReplyTo = Worker#schema_commit_lock.owner, + reply(ReplyTo, granted), + {Owner, _Tag} = ReplyTo, + opt_start_loader(State#state{dumper_pid = Owner}); + + is_record(Worker, dump_log) -> + Pid = spawn_link(?MODULE, dump_and_reply, [self(), Worker]), + State2 = State#state{dumper_pid = Pid}, + + %% If the worker was a dumper we may + %% possibly be able to start a loader + %% or sender + State3 = opt_start_sender(State2), + opt_start_loader(State3); + + is_record(Worker, block_controller) -> + case {get_senders(State), get_loaders(State)} of + {[], []} -> + ReplyTo = Worker#block_controller.owner, + reply(ReplyTo, granted), + {Owner, _Tag} = ReplyTo, + State#state{dumper_pid = Owner}; + _ -> + State + end + end; + _ -> + %% Bad luck, try with a loader or sender instead + State2 = opt_start_sender(State), + opt_start_loader(State2) + end. + +opt_start_sender(State) -> + case State#state.sender_queue of + []-> State; %% No need + SenderQ -> + {NewS,Kept} = opt_start_sender2(SenderQ, get_senders(State), + [], get_loaders(State)), + State#state{sender_pid = NewS, sender_queue = Kept} + end. + +opt_start_sender2([], Pids,Kept, _) -> {Pids,Kept}; +opt_start_sender2([Sender|R], Pids, Kept, LoaderQ) -> + Tab = Sender#send_table.table, + Active = val({Tab, active_replicas}), + IgotIt = lists:member(node(), Active), + IsLoading = lists:any(fun({_Pid,Loader}) -> + Tab == element(#net_load.table, Loader) + end, LoaderQ), + if + IgotIt, IsLoading -> + %% I'm currently finishing loading the table let him wait + opt_start_sender2(R,Pids, [Sender|Kept], LoaderQ); + IgotIt -> + %% Start worker but keep him in the queue + Pid = spawn_link(?MODULE, send_and_reply,[self(), Sender]), + opt_start_sender2(R,[{Pid,Sender}|Pids],Kept,LoaderQ); + true -> + verbose("Send table failed ~p not active on this node ~n", [Tab]), + Sender#send_table.receiver_pid ! {copier_done, node()}, + opt_start_sender2(R,Pids, Kept, LoaderQ) + end. + +opt_start_loader(State = #state{loader_queue = LoaderQ}) -> + Current = get_loaders(State), + Max = max_loaders(), + case gb_trees:is_empty(LoaderQ) of + true -> + State; + _ when length(Current) >= Max -> + State; + false -> + SchemaQueue = State#state.dumper_queue, + case lists:keymember(schema_commit_lock, 1, SchemaQueue) of + false -> + case pick_next(LoaderQ) of + {none,Rest} -> + State#state{loader_queue=Rest}; + {Worker,Rest} -> + case already_loading(Worker, get_loaders(State)) of + true -> + opt_start_loader(State#state{loader_queue = Rest}); + false -> + %% Start worker but keep him in the queue + Pid = load_and_reply(self(), Worker), + State#state{loader_pid=[{Pid,Worker}|get_loaders(State)], + loader_queue = Rest} + end + end; + true -> + %% Bad luck, we must wait for the schema commit + State + end + end. + +already_loading(#net_load{table=Tab},Loaders) -> + already_loading2(Tab,Loaders); +already_loading(#disc_load{table=Tab},Loaders) -> + already_loading2(Tab,Loaders). + +already_loading2(Tab, [{_,#net_load{table=Tab}}|_]) -> true; +already_loading2(Tab, [{_,#disc_load{table=Tab}}|_]) -> true; +already_loading2(Tab, [_|Rest]) -> already_loading2(Tab,Rest); +already_loading2(_,[]) -> false. + +start_remote_sender(Node, Tab, Receiver, Storage) -> + Msg = #send_table{table = Tab, + receiver_pid = Receiver, + remote_storage = Storage}, + gen_server:cast({?SERVER_NAME, Node}, Msg). + +dump_and_reply(ReplyTo, Worker) -> + %% No trap_exit, die intentionally instead + Res = mnesia_dumper:opt_dump_log(Worker#dump_log.initiated_by), + ReplyTo ! #dumper_done{worker_pid = self(), + worker_res = Res}, + unlink(ReplyTo), + exit(normal). + +send_and_reply(ReplyTo, Worker) -> + %% No trap_exit, die intentionally instead + Res = mnesia_loader:send_table(Worker#send_table.receiver_pid, + Worker#send_table.table, + Worker#send_table.remote_storage), + ReplyTo ! #sender_done{worker_pid = self(), + worker_res = Res}, + unlink(ReplyTo), + exit(normal). + +load_and_reply(ReplyTo, Worker) -> + Load = load_table_fun(Worker), + SendAndReply = + fun() -> + process_flag(trap_exit, true), + Done = Load(), + ReplyTo ! Done#loader_done{worker_pid = self()}, + unlink(ReplyTo), + exit(normal) + end, + spawn_link(SendAndReply). + +%% Now it is time to load the table +%% but first we must check if it still is neccessary +load_table_fun(#net_load{cstruct=Cs, table=Tab, reason=Reason, opt_reply_to=ReplyTo}) -> + LocalC = val({Tab, local_content}), + AccessMode = val({Tab, access_mode}), + ReadNode = val({Tab, where_to_read}), + Active = filter_active(Tab), + Done = #loader_done{is_loaded = true, + table_name = Tab, + needs_announce = false, + needs_sync = false, + needs_reply = (ReplyTo /= undefined), + reply_to = ReplyTo, + reply = {loaded, ok} + }, + if + ReadNode == node() -> + %% Already loaded locally + fun() -> Done end; + LocalC == true -> + fun() -> + Res = mnesia_loader:disc_load_table(Tab, load_local_content), + Done#loader_done{reply = Res, needs_announce = true, needs_sync = true} + end; + AccessMode == read_only, Reason /= {dumper,add_table_copy} -> + fun() -> disc_load_table(Tab, Reason, ReplyTo) end; + true -> + fun() -> + %% Either we cannot read the table yet + %% or someone is moving a replica between + %% two nodes + Res = mnesia_loader:net_load_table(Tab, Reason, Active, Cs), + case Res of + {loaded, ok} -> + Done#loader_done{needs_sync = true, + reply = Res}; + {not_loaded, _} -> + Done#loader_done{is_loaded = false, + reply = Res} + end + end + end; +load_table_fun(#disc_load{table=Tab, reason=Reason, opt_reply_to=ReplyTo}) -> + ReadNode = val({Tab, where_to_read}), + Active = filter_active(Tab), + Done = #loader_done{is_loaded = true, + table_name = Tab, + needs_announce = false, + needs_sync = false, + needs_reply = false + }, + if + Active == [], ReadNode == nowhere -> + %% Not loaded anywhere, lets load it from disc + fun() -> disc_load_table(Tab, Reason, ReplyTo) end; + ReadNode == nowhere -> + %% Already loaded on other node, lets get it + Cs = val({Tab, cstruct}), + fun() -> + case mnesia_loader:net_load_table(Tab, Reason, Active, Cs) of + {loaded, ok} -> + Done#loader_done{needs_sync = true}; + {not_loaded, storage_unknown} -> + Done#loader_done{is_loaded = false}; + {not_loaded, ErrReason} -> + Done#loader_done{is_loaded = false, + reply = {not_loaded,ErrReason}} + end + end; + true -> + %% Already readable, do not worry be happy + fun() -> Done end + end. + +disc_load_table(Tab, Reason, ReplyTo) -> + Done = #loader_done{is_loaded = true, + table_name = Tab, + needs_announce = false, + needs_sync = false, + needs_reply = ReplyTo /= undefined, + reply_to = ReplyTo, + reply = {loaded, ok} + }, + Res = mnesia_loader:disc_load_table(Tab, Reason), + if + Res == {loaded, ok} -> + Done#loader_done{needs_announce = true, + needs_sync = true, + reply = Res}; + ReplyTo /= undefined -> + Done#loader_done{is_loaded = false, + reply = Res}; + true -> + fatal("Cannot load table ~p from disc: ~p~n", [Tab, Res]) + end. + +filter_active(Tab) -> + ByForce = val({Tab, load_by_force}), + Active = val({Tab, active_replicas}), + Masters = mnesia_recover:get_master_nodes(Tab), + Ns = do_filter_active(ByForce, Active, Masters), + %% Reorder the so that we load from fastest first + LS = ?catch_val({Tab, storage_type}), + DOC = val({Tab, disc_only_copies}), + {Good,Worse} = + case LS of + disc_only_copies -> + G = mnesia_lib:intersect(Ns, DOC), + {G,Ns--G}; + _ -> + G = Ns -- DOC, + {G,Ns--G} + end, + %% Pick a random node of the fastest + Len = length(Good), + if + Len > 0 -> + R = erlang:phash(node(), Len+1), + random(R-1,Good,Worse); + true -> + Worse + end. + +random(N, [H|R], Acc) when N > 0 -> + random(N-1,R, [H|Acc]); +random(0, L, Acc) -> + L ++ Acc. + +do_filter_active(true, Active, _Masters) -> + Active; +do_filter_active(false, Active, []) -> + Active; +do_filter_active(false, Active, Masters) -> + mnesia_lib:intersect(Active, Masters). + + diff --git a/lib/mnesia/src/mnesia_dumper.erl b/lib/mnesia/src/mnesia_dumper.erl new file mode 100644 index 0000000000..f669d009c6 --- /dev/null +++ b/lib/mnesia/src/mnesia_dumper.erl @@ -0,0 +1,1218 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 1996-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +%% +-module(mnesia_dumper). + +%% The InitBy arg may be one of the following: +%% scan_decisions Initial scan for decisions +%% startup Initial dump during startup +%% schema_prepare Dump initiated during schema transaction preparation +%% schema_update Dump initiated during schema transaction commit +%% fast_schema_update A schema_update, but ignores the log file +%% user Dump initiated by user +%% write_threshold Automatic dump caused by too many log writes +%% time_threshold Automatic dump caused by timeout + +%% Public interface +-export([ + get_log_writes/0, + incr_log_writes/0, + raw_dump_table/2, + raw_named_dump_table/2, + start_regulator/0, + opt_dump_log/1, + update/3 + ]). + + %% Internal stuff +-export([regulator_init/1]). + +-include("mnesia.hrl"). +-include_lib("kernel/include/file.hrl"). + +-import(mnesia_lib, [fatal/2, dbg_out/2]). + +-define(REGULATOR_NAME, mnesia_dumper_load_regulator). +-define(DumpToEtsMultiplier, 4). + +get_log_writes() -> + Max = mnesia_monitor:get_env(dump_log_write_threshold), + Prev = mnesia_lib:read_counter(trans_log_writes), + Left = mnesia_lib:read_counter(trans_log_writes_left), + Diff = Max - Left, + Prev + Diff. + +incr_log_writes() -> + Left = mnesia_lib:incr_counter(trans_log_writes_left, -1), + if + Left > 0 -> + ignore; + true -> + adjust_log_writes(true) + end. + +adjust_log_writes(DoCast) -> + Token = {mnesia_adjust_log_writes, self()}, + case global:set_lock(Token, [node()], 1) of + false -> + ignore; %% Somebody else is sending a dump request + true -> + case DoCast of + false -> + ignore; + true -> + mnesia_controller:async_dump_log(write_threshold) + end, + Max = mnesia_monitor:get_env(dump_log_write_threshold), + Left = mnesia_lib:read_counter(trans_log_writes_left), + %% Don't care if we lost a few writes + mnesia_lib:set_counter(trans_log_writes_left, Max), + Diff = Max - Left, + mnesia_lib:incr_counter(trans_log_writes, Diff), + global:del_lock(Token, [node()]) + end. + +%% Returns 'ok' or exits +opt_dump_log(InitBy) -> + Reg = case whereis(?REGULATOR_NAME) of + undefined -> + nopid; + Pid when is_pid(Pid) -> + Pid + end, + perform_dump(InitBy, Reg). + +%% Scan for decisions +perform_dump(InitBy, Regulator) when InitBy == scan_decisions -> + ?eval_debug_fun({?MODULE, perform_dump}, [InitBy]), + + dbg_out("Transaction log dump initiated by ~w~n", [InitBy]), + scan_decisions(mnesia_log:previous_log_file(), InitBy, Regulator), + scan_decisions(mnesia_log:latest_log_file(), InitBy, Regulator); + +%% Propagate the log into the DAT-files +perform_dump(InitBy, Regulator) -> + ?eval_debug_fun({?MODULE, perform_dump}, [InitBy]), + LogState = mnesia_log:prepare_log_dump(InitBy), + dbg_out("Transaction log dump initiated by ~w: ~w~n", + [InitBy, LogState]), + adjust_log_writes(false), + case LogState of + already_dumped -> + mnesia_recover:allow_garb(), + dumped; + {needs_dump, Diff} -> + U = mnesia_monitor:get_env(dump_log_update_in_place), + Cont = mnesia_log:init_log_dump(), + mnesia_recover:sync(), + case catch do_perform_dump(Cont, U, InitBy, Regulator, undefined) of + ok -> + ?eval_debug_fun({?MODULE, post_dump}, [InitBy]), + case mnesia_monitor:use_dir() of + true -> + mnesia_recover:dump_decision_tab(); + false -> + mnesia_log:purge_some_logs() + end, + mnesia_recover:allow_garb(), + %% And now to the crucial point... + mnesia_log:confirm_log_dump(Diff); + {error, Reason} -> + {error, Reason}; + {'EXIT', {Desc, Reason}} -> + case mnesia_monitor:get_env(auto_repair) of + true -> + mnesia_lib:important(Desc, Reason), + %% Ignore rest of the log + mnesia_log:confirm_log_dump(Diff); + false -> + fatal(Desc, Reason) + end + end; + {error, Reason} -> + {error, {"Cannot prepare log dump", Reason}} + end. + +scan_decisions(Fname, InitBy, Regulator) -> + Exists = mnesia_lib:exists(Fname), + case Exists of + false -> + ok; + true -> + Header = mnesia_log:trans_log_header(), + Name = previous_log, + mnesia_log:open_log(Name, Header, Fname, Exists, + mnesia_monitor:get_env(auto_repair), read_only), + Cont = start, + Res = (catch do_perform_dump(Cont, false, InitBy, Regulator, undefined)), + mnesia_log:close_log(Name), + case Res of + ok -> ok; + {'EXIT', Reason} -> {error, Reason} + end + end. + +do_perform_dump(Cont, InPlace, InitBy, Regulator, OldVersion) -> + case mnesia_log:chunk_log(Cont) of + {C2, Recs} -> + case catch insert_recs(Recs, InPlace, InitBy, Regulator, OldVersion) of + {'EXIT', R} -> + Reason = {"Transaction log dump error: ~p~n", [R]}, + close_files(InPlace, {error, Reason}, InitBy), + exit(Reason); + Version -> + do_perform_dump(C2, InPlace, InitBy, Regulator, Version) + end; + eof -> + close_files(InPlace, ok, InitBy), + erase(mnesia_dumper_dets), + ok + end. + +insert_recs([Rec | Recs], InPlace, InitBy, Regulator, LogV) -> + regulate(Regulator), + case insert_rec(Rec, InPlace, InitBy, LogV) of + LogH when is_record(LogH, log_header) -> + insert_recs(Recs, InPlace, InitBy, Regulator, LogH#log_header.log_version); + _ -> + insert_recs(Recs, InPlace, InitBy, Regulator, LogV) + end; + +insert_recs([], _InPlace, _InitBy, _Regulator, Version) -> + Version. + +insert_rec(Rec, _InPlace, scan_decisions, _LogV) -> + if + is_record(Rec, commit) -> + ignore; + is_record(Rec, log_header) -> + ignore; + true -> + mnesia_recover:note_log_decision(Rec, scan_decisions) + end; +insert_rec(Rec, InPlace, InitBy, LogV) when is_record(Rec, commit) -> + %% Determine the Outcome of the transaction and recover it + D = Rec#commit.decision, + case mnesia_recover:wait_for_decision(D, InitBy) of + {Tid, committed} -> + do_insert_rec(Tid, Rec, InPlace, InitBy, LogV); + {Tid, aborted} -> + mnesia_schema:undo_prepare_commit(Tid, Rec) + end; +insert_rec(H, _InPlace, _InitBy, _LogV) when is_record(H, log_header) -> + CurrentVersion = mnesia_log:version(), + if + H#log_header.log_kind /= trans_log -> + exit({"Bad kind of transaction log", H}); + H#log_header.log_version == CurrentVersion -> + ok; + H#log_header.log_version == "4.2" -> + ok; + H#log_header.log_version == "4.1" -> + ok; + H#log_header.log_version == "4.0" -> + ok; + true -> + fatal("Bad version of transaction log: ~p~n", [H]) + end, + H; + +insert_rec(_Rec, _InPlace, _InitBy, _LogV) -> + ok. + +do_insert_rec(Tid, Rec, InPlace, InitBy, LogV) -> + case Rec#commit.schema_ops of + [] -> + ignore; + SchemaOps -> + case val({schema, storage_type}) of + ram_copies -> + insert_ops(Tid, schema_ops, SchemaOps, InPlace, InitBy, LogV); + Storage -> + true = open_files(schema, Storage, InPlace, InitBy), + insert_ops(Tid, schema_ops, SchemaOps, InPlace, InitBy, LogV) + end + end, + D = Rec#commit.disc_copies, + insert_ops(Tid, disc_copies, D, InPlace, InitBy, LogV), + case InitBy of + startup -> + DO = Rec#commit.disc_only_copies, + insert_ops(Tid, disc_only_copies, DO, InPlace, InitBy, LogV); + _ -> + ignore + end. + + +update(_Tid, [], _DumperMode) -> + dumped; +update(Tid, SchemaOps, DumperMode) -> + UseDir = mnesia_monitor:use_dir(), + Res = perform_update(Tid, SchemaOps, DumperMode, UseDir), + mnesia_controller:release_schema_commit_lock(), + Res. + +perform_update(_Tid, _SchemaOps, mandatory, true) -> + %% Force a dump of the transaction log in order to let the + %% dumper perform needed updates + + InitBy = schema_update, + ?eval_debug_fun({?MODULE, dump_schema_op}, [InitBy]), + opt_dump_log(InitBy); +perform_update(Tid, SchemaOps, _DumperMode, _UseDir) -> + %% No need for a full transaction log dump. + %% Ignore the log file and perform only perform + %% the corresponding updates. + + InitBy = fast_schema_update, + InPlace = mnesia_monitor:get_env(dump_log_update_in_place), + ?eval_debug_fun({?MODULE, dump_schema_op}, [InitBy]), + case catch insert_ops(Tid, schema_ops, SchemaOps, InPlace, InitBy, + mnesia_log:version()) of + {'EXIT', Reason} -> + Error = {error, {"Schema update error", Reason}}, + close_files(InPlace, Error, InitBy), + fatal("Schema update error ~p ~p", [Reason, SchemaOps]); + _ -> + ?eval_debug_fun({?MODULE, post_dump}, [InitBy]), + close_files(InPlace, ok, InitBy), + ok + end. + +insert_ops(_Tid, _Storage, [], _InPlace, _InitBy, _) -> ok; +insert_ops(Tid, Storage, [Op], InPlace, InitBy, Ver) when Ver >= "4.3"-> + insert_op(Tid, Storage, Op, InPlace, InitBy), + ok; +insert_ops(Tid, Storage, [Op | Ops], InPlace, InitBy, Ver) when Ver >= "4.3"-> + insert_op(Tid, Storage, Op, InPlace, InitBy), + insert_ops(Tid, Storage, Ops, InPlace, InitBy, Ver); +insert_ops(Tid, Storage, [Op | Ops], InPlace, InitBy, Ver) when Ver < "4.3" -> + insert_ops(Tid, Storage, Ops, InPlace, InitBy, Ver), + insert_op(Tid, Storage, Op, InPlace, InitBy). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Normal ops + +disc_insert(_Tid, Storage, Tab, Key, Val, Op, InPlace, InitBy) -> + case open_files(Tab, Storage, InPlace, InitBy) of + true -> + case Storage of + disc_copies when Tab /= schema -> + mnesia_log:append({?MODULE,Tab}, {{Tab, Key}, Val, Op}), + ok; + _ -> + dets_insert(Op,Tab,Key,Val) + end; + false -> + ignore + end. + +%% To fix update_counter so that it behaves better. +%% i.e. if nothing have changed in tab except update_counter +%% trust that the value in the dets file is correct. +%% Otherwise we will get a double increment. +%% This is perfect but update_counter is a dirty op. + +dets_insert(Op,Tab,Key,Val) -> + case Op of + write -> + dets_updated(Tab,Key), + ok = dets:insert(Tab, Val); + delete -> + dets_updated(Tab,Key), + ok = dets:delete(Tab, Key); + update_counter -> + case dets_incr_counter(Tab,Key) of + true -> + {RecName, Incr} = Val, + case catch dets:update_counter(Tab, Key, Incr) of + CounterVal when is_integer(CounterVal) -> + ok; + _ when Incr < 0 -> + Zero = {RecName, Key, 0}, + ok = dets:insert(Tab, Zero); + _ -> + Init = {RecName, Key, Incr}, + ok = dets:insert(Tab, Init) + end; + false -> ok + end; + delete_object -> + dets_updated(Tab,Key), + ok = dets:delete_object(Tab, Val); + clear_table -> + dets_cleared(Tab), + ok = dets:match_delete(Tab, '_') + end. + +dets_updated(Tab,Key) -> + case get(mnesia_dumper_dets) of + undefined -> + Empty = gb_trees:empty(), + Tree = gb_trees:insert(Tab, gb_sets:singleton(Key), Empty), + put(mnesia_dumper_dets, Tree); + Tree -> + case gb_trees:lookup(Tab,Tree) of + {value, cleared} -> ignore; + {value, Set} -> + T = gb_trees:update(Tab, gb_sets:add(Key, Set), Tree), + put(mnesia_dumper_dets, T); + none -> + T = gb_trees:insert(Tab, gb_sets:singleton(Key), Tree), + put(mnesia_dumper_dets, T) + end + end. + +dets_incr_counter(Tab,Key) -> + case get(mnesia_dumper_dets) of + undefined -> false; + Tree -> + case gb_trees:lookup(Tab,Tree) of + {value, cleared} -> true; + {value, Set} -> gb_sets:is_member(Key, Set); + none -> false + end + end. + +dets_cleared(Tab) -> + case get(mnesia_dumper_dets) of + undefined -> + Empty = gb_trees:empty(), + Tree = gb_trees:insert(Tab, cleared, Empty), + put(mnesia_dumper_dets, Tree); + Tree -> + case gb_trees:lookup(Tab,Tree) of + {value, cleared} -> ignore; + _ -> + T = gb_trees:enter(Tab, cleared, Tree), + put(mnesia_dumper_dets, T) + end + end. + +insert(Tid, Storage, Tab, Key, [Val | Tail], Op, InPlace, InitBy) -> + insert(Tid, Storage, Tab, Key, Val, Op, InPlace, InitBy), + insert(Tid, Storage, Tab, Key, Tail, Op, InPlace, InitBy); + +insert(_Tid, _Storage, _Tab, _Key, [], _Op, _InPlace, _InitBy) -> + ok; + +insert(Tid, Storage, Tab, Key, Val, Op, InPlace, InitBy) -> + Item = {{Tab, Key}, Val, Op}, + case InitBy of + startup -> + disc_insert(Tid, Storage, Tab, Key, Val, Op, InPlace, InitBy); + + _ when Storage == ram_copies -> + mnesia_tm:do_update_op(Tid, Storage, Item), + Snmp = mnesia_tm:prepare_snmp(Tab, Key, [Item]), + mnesia_tm:do_snmp(Tid, Snmp); + + _ when Storage == disc_copies -> + disc_insert(Tid, Storage, Tab, Key, Val, Op, InPlace, InitBy), + mnesia_tm:do_update_op(Tid, Storage, Item), + Snmp = mnesia_tm:prepare_snmp(Tab, Key, [Item]), + mnesia_tm:do_snmp(Tid, Snmp); + + _ when Storage == disc_only_copies -> + mnesia_tm:do_update_op(Tid, Storage, Item), + Snmp = mnesia_tm:prepare_snmp(Tab, Key, [Item]), + mnesia_tm:do_snmp(Tid, Snmp); + + _ when Storage == unknown -> + ignore + end. + +disc_delete_table(Tab, Storage) -> + case mnesia_monitor:use_dir() of + true -> + if + Storage == disc_only_copies; Tab == schema -> + mnesia_monitor:unsafe_close_dets(Tab), + Dat = mnesia_lib:tab2dat(Tab), + file:delete(Dat); + true -> + DclFile = mnesia_lib:tab2dcl(Tab), + case get({?MODULE,Tab}) of + {opened_dumper, dcl} -> + del_opened_tab(Tab), + mnesia_log:unsafe_close_log(Tab); + _ -> + ok + end, + file:delete(DclFile), + DcdFile = mnesia_lib:tab2dcd(Tab), + file:delete(DcdFile), + ok + end, + erase({?MODULE, Tab}); + false -> + ignore + end. + +disc_delete_indecies(_Tab, _Cs, Storage) when Storage /= disc_only_copies -> + ignore; +disc_delete_indecies(Tab, Cs, disc_only_copies) -> + Indecies = Cs#cstruct.index, + mnesia_index:del_transient(Tab, Indecies, disc_only_copies). + +insert_op(Tid, Storage, {{Tab, Key}, Val, Op}, InPlace, InitBy) -> + %% Propagate to disc only + disc_insert(Tid, Storage, Tab, Key, Val, Op, InPlace, InitBy); + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% NOTE that all operations below will only +%% be performed if the dump is initiated by +%% startup or fast_schema_update +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +insert_op(_Tid, schema_ops, _OP, _InPlace, Initby) + when Initby /= startup, + Initby /= fast_schema_update, + Initby /= schema_update -> + ignore; + +insert_op(Tid, _, {op, rec, Storage, Item}, InPlace, InitBy) -> + {{Tab, Key}, ValList, Op} = Item, + insert(Tid, Storage, Tab, Key, ValList, Op, InPlace, InitBy); + +insert_op(Tid, _, {op, change_table_copy_type, N, FromS, ToS, TabDef}, InPlace, InitBy) -> + Cs = mnesia_schema:list2cs(TabDef), + Val = mnesia_schema:insert_cstruct(Tid, Cs, true), % Update ram only + {schema, Tab, _} = Val, + case lists:member(N, val({current, db_nodes})) of + true when InitBy /= startup -> + mnesia_controller:add_active_replica(Tab, N, Cs); + _ -> + ignore + end, + if + N == node() -> + Dmp = mnesia_lib:tab2dmp(Tab), + Dat = mnesia_lib:tab2dat(Tab), + Dcd = mnesia_lib:tab2dcd(Tab), + Dcl = mnesia_lib:tab2dcl(Tab), + case {FromS, ToS} of + {ram_copies, disc_copies} when Tab == schema -> + ok = ensure_rename(Dmp, Dat); + {ram_copies, disc_copies} -> + file:delete(Dcl), + ok = ensure_rename(Dmp, Dcd); + {disc_copies, ram_copies} when Tab == schema -> + mnesia_lib:set(use_dir, false), + mnesia_monitor:unsafe_close_dets(Tab), + file:delete(Dat); + {disc_copies, ram_copies} -> + file:delete(Dcl), + file:delete(Dcd); + {ram_copies, disc_only_copies} -> + ok = ensure_rename(Dmp, Dat), + true = open_files(Tab, disc_only_copies, InPlace, InitBy), + %% ram_delete_table must be done before init_indecies, + %% it uses info which is reset in init_indecies, + %% it doesn't matter, because init_indecies don't use + %% the ram replica of the table when creating the disc + %% index; Could be improved :) + mnesia_schema:ram_delete_table(Tab, FromS), + PosList = Cs#cstruct.index, + mnesia_index:init_indecies(Tab, disc_only_copies, PosList); + {disc_only_copies, ram_copies} -> + mnesia_monitor:unsafe_close_dets(Tab), + disc_delete_indecies(Tab, Cs, disc_only_copies), + case InitBy of + startup -> + ignore; + _ -> + mnesia_controller:get_disc_copy(Tab) + end, + disc_delete_table(Tab, disc_only_copies); + {disc_copies, disc_only_copies} -> + ok = ensure_rename(Dmp, Dat), + true = open_files(Tab, disc_only_copies, InPlace, InitBy), + mnesia_schema:ram_delete_table(Tab, FromS), + PosList = Cs#cstruct.index, + mnesia_index:init_indecies(Tab, disc_only_copies, PosList), + file:delete(Dcl), + file:delete(Dcd); + {disc_only_copies, disc_copies} -> + mnesia_monitor:unsafe_close_dets(Tab), + disc_delete_indecies(Tab, Cs, disc_only_copies), + case InitBy of + startup -> + ignore; + _ -> + mnesia_log:ets2dcd(Tab), + mnesia_controller:get_disc_copy(Tab), + disc_delete_table(Tab, disc_only_copies) + end + end; + true -> + ignore + end, + S = val({schema, storage_type}), + disc_insert(Tid, S, schema, Tab, Val, write, InPlace, InitBy); + +insert_op(Tid, _, {op, transform, _Fun, TabDef}, InPlace, InitBy) -> + Cs = mnesia_schema:list2cs(TabDef), + case mnesia_lib:cs_to_storage_type(node(), Cs) of + disc_copies -> + open_dcl(Cs#cstruct.name); + _ -> + ignore + end, + insert_cstruct(Tid, Cs, true, InPlace, InitBy); + +%%% Operations below this are handled without using the logg. + +insert_op(Tid, _, {op, restore_recreate, TabDef}, InPlace, InitBy) -> + Cs = mnesia_schema:list2cs(TabDef), + Tab = Cs#cstruct.name, + Type = Cs#cstruct.type, + Storage = mnesia_lib:cs_to_storage_type(node(), Cs), + %% Delete all possibly existing files and tables + disc_delete_table(Tab, Storage), + disc_delete_indecies(Tab, Cs, Storage), + case InitBy of + startup -> + ignore; + _ -> + case ?catch_val({Tab, cstruct}) of + {'EXIT', _} -> ignore; + _ -> + mnesia_schema:ram_delete_table(Tab, Storage), + mnesia_checkpoint:tm_del_copy(Tab, node()) + end + end, + %% And create new ones.. + if + (InitBy == startup) or (Storage == unknown) -> + ignore; + Storage == ram_copies -> + Args = [{keypos, 2}, public, named_table, Type], + mnesia_monitor:mktab(Tab, Args); + Storage == disc_copies -> + Args = [{keypos, 2}, public, named_table, Type], + mnesia_monitor:mktab(Tab, Args), + File = mnesia_lib:tab2dcd(Tab), + FArg = [{file, File}, {name, {mnesia,create}}, + {repair, false}, {mode, read_write}], + {ok, Log} = mnesia_monitor:open_log(FArg), + mnesia_monitor:unsafe_close_log(Log); + Storage == disc_only_copies -> + File = mnesia_lib:tab2dat(Tab), + file:delete(File), + Args = [{file, mnesia_lib:tab2dat(Tab)}, + {type, mnesia_lib:disk_type(Tab, Type)}, + {keypos, 2}, + {repair, mnesia_monitor:get_env(auto_repair)}], + mnesia_monitor:open_dets(Tab, Args) + end, + insert_op(Tid, ignore, {op, create_table, TabDef}, InPlace, InitBy); + +insert_op(Tid, _, {op, create_table, TabDef}, InPlace, InitBy) -> + Cs = mnesia_schema:list2cs(TabDef), + insert_cstruct(Tid, Cs, false, InPlace, InitBy), + Tab = Cs#cstruct.name, + Storage = mnesia_lib:cs_to_storage_type(node(), Cs), + case InitBy of + startup -> + case Storage of + unknown -> + ignore; + ram_copies -> + ignore; + disc_copies -> + Dcd = mnesia_lib:tab2dcd(Tab), + case mnesia_lib:exists(Dcd) of + true -> ignore; + false -> + mnesia_log:open_log(temp, + mnesia_log:dcl_log_header(), + Dcd, + false, + false, + read_write), + mnesia_log:unsafe_close_log(temp) + end; + _ -> + Args = [{file, mnesia_lib:tab2dat(Tab)}, + {type, mnesia_lib:disk_type(Tab, Cs#cstruct.type)}, + {keypos, 2}, + {repair, mnesia_monitor:get_env(auto_repair)}], + case mnesia_monitor:open_dets(Tab, Args) of + {ok, _} -> + mnesia_monitor:unsafe_close_dets(Tab); + {error, Error} -> + exit({"Failed to create dets table", Error}) + end + end; + _ -> + Copies = mnesia_lib:copy_holders(Cs), + Active = mnesia_lib:intersect(Copies, val({current, db_nodes})), + [mnesia_controller:add_active_replica(Tab, N, Cs) || N <- Active], + + case Storage of + unknown -> + mnesia_lib:unset({Tab, create_table}), + case Cs#cstruct.local_content of + true -> + ignore; + false -> + mnesia_lib:set_remote_where_to_read(Tab) + end; + _ -> + case Cs#cstruct.local_content of + true -> + mnesia_lib:set_local_content_whereabouts(Tab); + false -> + mnesia_lib:set({Tab, where_to_read}, node()) + end, + case Storage of + ram_copies -> + ignore; + _ -> + %% Indecies are still created by loader + disc_delete_indecies(Tab, Cs, Storage) + %% disc_delete_table(Tab, Storage) + end, + + %% Update whereabouts and create table + mnesia_controller:create_table(Tab), + mnesia_lib:unset({Tab, create_table}) + end + end; + +insert_op(_Tid, _, {op, dump_table, Size, TabDef}, _InPlace, _InitBy) -> + case Size of + unknown -> + ignore; + _ -> + Cs = mnesia_schema:list2cs(TabDef), + Tab = Cs#cstruct.name, + Dmp = mnesia_lib:tab2dmp(Tab), + Dat = mnesia_lib:tab2dcd(Tab), + case Size of + 0 -> + %% Assume that table files already are closed + file:delete(Dmp), + file:delete(Dat); + _ -> + ok = ensure_rename(Dmp, Dat) + end + end; + +insert_op(Tid, _, {op, delete_table, TabDef}, InPlace, InitBy) -> + Cs = mnesia_schema:list2cs(TabDef), + Tab = Cs#cstruct.name, + case mnesia_lib:cs_to_storage_type(node(), Cs) of + unknown -> + ignore; + Storage -> + disc_delete_table(Tab, Storage), + disc_delete_indecies(Tab, Cs, Storage), + case InitBy of + startup -> + ignore; + _ -> + mnesia_schema:ram_delete_table(Tab, Storage), + mnesia_checkpoint:tm_del_copy(Tab, node()) + end + end, + delete_cstruct(Tid, Cs, InPlace, InitBy); + +insert_op(Tid, _, {op, clear_table, TabDef}, InPlace, InitBy) -> + Cs = mnesia_schema:list2cs(TabDef), + Tab = Cs#cstruct.name, + case mnesia_lib:cs_to_storage_type(node(), Cs) of + unknown -> + ignore; + Storage -> + Oid = '_', %%val({Tab, wild_pattern}), + if Storage == disc_copies -> + open_dcl(Cs#cstruct.name); + true -> + ignore + end, + %% Need to catch this, it crashes on ram_copies if + %% the op comes before table is loaded at startup. + catch insert(Tid, Storage, Tab, '_', Oid, clear_table, InPlace, InitBy) + end; + +insert_op(Tid, _, {op, merge_schema, TabDef}, InPlace, InitBy) -> + Cs = mnesia_schema:list2cs(TabDef), + case Cs#cstruct.name of + schema -> + %% If we bootstrap an empty (diskless) mnesia from another node + %% we might have changed the storage_type of schema. + %% I think this is a good place to do it. + Update = fun(NS = {Node,Storage}) -> + case mnesia_lib:cs_to_storage_type(Node, Cs) of + Storage -> NS; + disc_copies when Node == node() -> + Dir = mnesia_lib:dir(), + ok = mnesia_schema:opt_create_dir(true, Dir), + mnesia_schema:purge_dir(Dir, []), + mnesia_log:purge_all_logs(), + + mnesia_lib:set(use_dir, true), + mnesia_log:init(), + Ns = val({current, db_nodes}), + F = fun(U) -> mnesia_recover:log_mnesia_up(U) end, + lists:foreach(F, Ns), + raw_named_dump_table(schema, dat), + temp_set_master_nodes(), + {Node,disc_copies}; + CSstorage -> + {Node,CSstorage} + end + end, + + W2C0 = val({schema, where_to_commit}), + W2C = case W2C0 of + {blocked, List} -> + {blocked,lists:map(Update,List)}; + List -> + lists:map(Update,List) + end, + if W2C == W2C0 -> ignore; + true -> mnesia_lib:set({schema, where_to_commit}, W2C) + end; + _ -> + ignore + end, + insert_cstruct(Tid, Cs, false, InPlace, InitBy); + +insert_op(Tid, _, {op, del_table_copy, Storage, Node, TabDef}, InPlace, InitBy) -> + Cs = mnesia_schema:list2cs(TabDef), + Tab = Cs#cstruct.name, + if + Tab == schema, Storage == ram_copies -> + insert_cstruct(Tid, Cs, true, InPlace, InitBy); + Tab /= schema -> + mnesia_controller:del_active_replica(Tab, Node), + mnesia_lib:del({Tab, Storage}, Node), + if + Node == node() -> + case Cs#cstruct.local_content of + true -> mnesia_lib:set({Tab, where_to_read}, nowhere); + false -> mnesia_lib:set_remote_where_to_read(Tab) + end, + mnesia_lib:del({schema, local_tables}, Tab), + mnesia_lib:set({Tab, storage_type}, unknown), + insert_cstruct(Tid, Cs, true, InPlace, InitBy), + disc_delete_table(Tab, Storage), + disc_delete_indecies(Tab, Cs, Storage), + mnesia_schema:ram_delete_table(Tab, Storage), + mnesia_checkpoint:tm_del_copy(Tab, Node); + true -> + case val({Tab, where_to_read}) of + Node -> + mnesia_lib:set_remote_where_to_read(Tab); + _ -> + ignore + end, + insert_cstruct(Tid, Cs, true, InPlace, InitBy) + end + end; + +insert_op(Tid, _, {op, add_table_copy, _Storage, _Node, TabDef}, InPlace, InitBy) -> + %% During prepare commit, the files was created + %% and the replica was announced + Cs = mnesia_schema:list2cs(TabDef), + insert_cstruct(Tid, Cs, true, InPlace, InitBy); + +insert_op(Tid, _, {op, add_snmp, _Us, TabDef}, InPlace, InitBy) -> + Cs = mnesia_schema:list2cs(TabDef), + insert_cstruct(Tid, Cs, true, InPlace, InitBy); + +insert_op(Tid, _, {op, del_snmp, TabDef}, InPlace, InitBy) -> + Cs = mnesia_schema:list2cs(TabDef), + Tab = Cs#cstruct.name, + Storage = mnesia_lib:cs_to_storage_type(node(), Cs), + if + InitBy /= startup, + Storage /= unknown -> + case ?catch_val({Tab, {index, snmp}}) of + {'EXIT', _} -> + ignore; + Stab -> + mnesia_snmp_hook:delete_table(Tab, Stab), + mnesia_lib:unset({Tab, {index, snmp}}) + end; + true -> + ignore + end, + insert_cstruct(Tid, Cs, true, InPlace, InitBy); + +insert_op(Tid, _, {op, add_index, Pos, TabDef}, InPlace, InitBy) -> + Cs = mnesia_schema:list2cs(TabDef), + Tab = insert_cstruct(Tid, Cs, true, InPlace, InitBy), + Storage = mnesia_lib:cs_to_storage_type(node(), Cs), + case InitBy of + startup when Storage == disc_only_copies -> + true = open_files(Tab, Storage, InPlace, InitBy), + mnesia_index:init_indecies(Tab, Storage, [Pos]); + startup -> + ignore; + _ -> + mnesia_index:init_indecies(Tab, Storage, [Pos]) + end; + +insert_op(Tid, _, {op, del_index, Pos, TabDef}, InPlace, InitBy) -> + Cs = mnesia_schema:list2cs(TabDef), + Tab = Cs#cstruct.name, + Storage = mnesia_lib:cs_to_storage_type(node(), Cs), + case InitBy of + startup when Storage == disc_only_copies -> + mnesia_index:del_index_table(Tab, Storage, Pos); + startup -> + ignore; + _ -> + mnesia_index:del_index_table(Tab, Storage, Pos) + end, + insert_cstruct(Tid, Cs, true, InPlace, InitBy); + +insert_op(Tid, _, {op, change_table_access_mode,TabDef, _OldAccess, _Access}, InPlace, InitBy) -> + Cs = mnesia_schema:list2cs(TabDef), + case InitBy of + startup -> ignore; + _ -> mnesia_controller:change_table_access_mode(Cs) + end, + insert_cstruct(Tid, Cs, true, InPlace, InitBy); + +insert_op(Tid, _, {op, change_table_load_order, TabDef, _OldLevel, _Level}, InPlace, InitBy) -> + Cs = mnesia_schema:list2cs(TabDef), + insert_cstruct(Tid, Cs, true, InPlace, InitBy); + +insert_op(Tid, _, {op, delete_property, TabDef, PropKey}, InPlace, InitBy) -> + Cs = mnesia_schema:list2cs(TabDef), + Tab = Cs#cstruct.name, + mnesia_lib:unset({Tab, user_property, PropKey}), + insert_cstruct(Tid, Cs, true, InPlace, InitBy); + +insert_op(Tid, _, {op, write_property, TabDef, _Prop}, InPlace, InitBy) -> + Cs = mnesia_schema:list2cs(TabDef), + insert_cstruct(Tid, Cs, true, InPlace, InitBy); + +insert_op(Tid, _, {op, change_table_frag, _Change, TabDef}, InPlace, InitBy) -> + Cs = mnesia_schema:list2cs(TabDef), + insert_cstruct(Tid, Cs, true, InPlace, InitBy). + +open_files(Tab, Storage, UpdateInPlace, InitBy) + when Storage /= unknown, Storage /= ram_copies -> + case get({?MODULE, Tab}) of + undefined -> + case ?catch_val({Tab, setorbag}) of + {'EXIT', _} -> + false; + Type -> + case Storage of + disc_copies when Tab /= schema -> + Bool = open_disc_copies(Tab, InitBy), + Bool; + _ -> + Fname = prepare_open(Tab, UpdateInPlace), + Args = [{file, Fname}, + {keypos, 2}, + {repair, mnesia_monitor:get_env(auto_repair)}, + {type, mnesia_lib:disk_type(Tab, Type)}], + {ok, _} = mnesia_monitor:open_dets(Tab, Args), + put({?MODULE, Tab}, {opened_dumper, dat}), + true + end + end; + already_dumped -> + false; + {opened_dumper, _} -> + true + end; +open_files(_Tab, _Storage, _UpdateInPlace, _InitBy) -> + false. + +open_disc_copies(Tab, InitBy) -> + DclF = mnesia_lib:tab2dcl(Tab), + DumpEts = + case file:read_file_info(DclF) of + {error, enoent} -> + false; + {ok, DclInfo} -> + DcdF = mnesia_lib:tab2dcd(Tab), + case file:read_file_info(DcdF) of + {error, Reason} -> + mnesia_lib:dbg_out("File ~p info_error ~p ~n", + [DcdF, Reason]), + true; + {ok, DcdInfo} -> + Mul = case ?catch_val(dc_dump_limit) of + {'EXIT', _} -> ?DumpToEtsMultiplier; + Val -> Val + end, + DcdInfo#file_info.size =< (DclInfo#file_info.size * Mul) + end + end, + if + DumpEts == false; InitBy == startup -> + mnesia_log:open_log({?MODULE,Tab}, + mnesia_log:dcl_log_header(), + DclF, + mnesia_lib:exists(DclF), + mnesia_monitor:get_env(auto_repair), + read_write), + put({?MODULE, Tab}, {opened_dumper, dcl}), + true; + true -> + mnesia_log:ets2dcd(Tab), + put({?MODULE, Tab}, already_dumped), + false + end. + +%% Always opens the dcl file for writing overriding already_dumped +%% mechanismen, used for schema transactions. +open_dcl(Tab) -> + case get({?MODULE, Tab}) of + {opened_dumper, _} -> + true; + _ -> %% undefined or already_dumped + DclF = mnesia_lib:tab2dcl(Tab), + mnesia_log:open_log({?MODULE,Tab}, + mnesia_log:dcl_log_header(), + DclF, + mnesia_lib:exists(DclF), + mnesia_monitor:get_env(auto_repair), + read_write), + put({?MODULE, Tab}, {opened_dumper, dcl}), + true + end. + +prepare_open(Tab, UpdateInPlace) -> + Dat = mnesia_lib:tab2dat(Tab), + case UpdateInPlace of + true -> + Dat; + false -> + Tmp = mnesia_lib:tab2tmp(Tab), + case catch mnesia_lib:copy_file(Dat, Tmp) of + ok -> + Tmp; + Error -> + fatal("Cannot copy dets file ~p to ~p: ~p~n", + [Dat, Tmp, Error]) + end + end. + +del_opened_tab(Tab) -> + erase({?MODULE, Tab}). + +close_files(UpdateInPlace, Outcome, InitBy) -> % Update in place + close_files(UpdateInPlace, Outcome, InitBy, get()). + +close_files(InPlace, Outcome, InitBy, [{{?MODULE, Tab}, already_dumped} | Tail]) -> + erase({?MODULE, Tab}), + close_files(InPlace, Outcome, InitBy, Tail); +close_files(InPlace, Outcome, InitBy, [{{?MODULE, Tab}, {opened_dumper, Type}} | Tail]) -> + erase({?MODULE, Tab}), + case val({Tab, storage_type}) of + disc_only_copies when InitBy /= startup -> + ignore; + disc_copies when Tab /= schema -> + mnesia_log:close_log({?MODULE,Tab}); + Storage -> + do_close(InPlace, Outcome, Tab, Type, Storage) + end, + close_files(InPlace, Outcome, InitBy, Tail); + +close_files(InPlace, Outcome, InitBy, [_ | Tail]) -> + close_files(InPlace, Outcome, InitBy, Tail); +close_files(_, _, _InitBy, []) -> + ok. + +%% If storage is unknown during close clean up files, this can happen if timing +%% is right and dirty_write conflicts with schema operations. +do_close(_, _, Tab, dcl, unknown) -> + mnesia_log:close_log({?MODULE,Tab}), + file:delete(mnesia_lib:tab2dcl(Tab)); +do_close(_, _, Tab, dcl, _) -> %% To be safe, can it happen? + mnesia_log:close_log({?MODULE,Tab}); + +do_close(InPlace, Outcome, Tab, dat, Storage) -> + mnesia_monitor:close_dets(Tab), + if + Storage == unknown, InPlace == true -> + file:delete(mnesia_lib:tab2dat(Tab)); + InPlace == true -> + %% Update in place + ok; + Outcome == ok, Storage /= unknown -> + %% Success: swap tmp files with dat files + TabDat = mnesia_lib:tab2dat(Tab), + ok = file:rename(mnesia_lib:tab2tmp(Tab), TabDat); + true -> + file:delete(mnesia_lib:tab2tmp(Tab)) + end. + + +ensure_rename(From, To) -> + case mnesia_lib:exists(From) of + true -> + file:rename(From, To); + false -> + case mnesia_lib:exists(To) of + true -> + ok; + false -> + {error, {rename_failed, From, To}} + end + end. + +insert_cstruct(Tid, Cs, KeepWhereabouts, InPlace, InitBy) -> + Val = mnesia_schema:insert_cstruct(Tid, Cs, KeepWhereabouts), + {schema, Tab, _} = Val, + S = val({schema, storage_type}), + disc_insert(Tid, S, schema, Tab, Val, write, InPlace, InitBy), + Tab. + +delete_cstruct(Tid, Cs, InPlace, InitBy) -> + Val = mnesia_schema:delete_cstruct(Tid, Cs), + {schema, Tab, _} = Val, + S = val({schema, storage_type}), + disc_insert(Tid, S, schema, Tab, Val, delete, InPlace, InitBy), + Tab. + + +temp_set_master_nodes() -> + Tabs = val({schema, local_tables}), + Masters = [{Tab, (val({Tab, disc_copies}) ++ + val({Tab, ram_copies}) ++ + val({Tab, disc_only_copies})) -- [node()]} + || Tab <- Tabs], + %% UseDir = false since we don't want to remember these + %% masternodes and we are running (really soon anyway) since we want this + %% to be known during table loading. + mnesia_recover:log_master_nodes(Masters, false, yes), + ok. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Raw dump of table. Dumper must have unique access to the ets table. + +raw_named_dump_table(Tab, Ftype) -> + case mnesia_monitor:use_dir() of + true -> + mnesia_lib:lock_table(Tab), + TmpFname = mnesia_lib:tab2tmp(Tab), + Fname = + case Ftype of + dat -> mnesia_lib:tab2dat(Tab); + dmp -> mnesia_lib:tab2dmp(Tab) + end, + file:delete(TmpFname), + file:delete(Fname), + TabSize = ?ets_info(Tab, size), + TabRef = Tab, + DiskType = mnesia_lib:disk_type(Tab), + Args = [{file, TmpFname}, + {keypos, 2}, + %% {ram_file, true}, + {estimated_no_objects, TabSize + 256}, + {repair, mnesia_monitor:get_env(auto_repair)}, + {type, DiskType}], + case mnesia_lib:dets_sync_open(TabRef, Args) of + {ok, TabRef} -> + Storage = ram_copies, + mnesia_lib:db_fixtable(Storage, Tab, true), + + case catch raw_dump_table(TabRef, Tab) of + {'EXIT', Reason} -> + mnesia_lib:db_fixtable(Storage, Tab, false), + mnesia_lib:dets_sync_close(Tab), + file:delete(TmpFname), + mnesia_lib:unlock_table(Tab), + exit({"Dump of table to disc failed", Reason}); + ok -> + mnesia_lib:db_fixtable(Storage, Tab, false), + mnesia_lib:dets_sync_close(Tab), + mnesia_lib:unlock_table(Tab), + ok = file:rename(TmpFname, Fname) + end; + {error, Reason} -> + mnesia_lib:unlock_table(Tab), + exit({"Open of file before dump to disc failed", Reason}) + end; + false -> + exit({has_no_disc, node()}) + end. + +raw_dump_table(DetsRef, EtsRef) -> + dets:from_ets(DetsRef, EtsRef). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Load regulator +%% +%% This is a poor mans substitute for a fair scheduler algorithm +%% in the Erlang emulator. The mnesia_dumper process performs many +%% costly BIF invokations and must pay for this. But since the +%% Emulator does not handle this properly we must compensate for +%% this with some form of load regulation of ourselves in order to +%% not steal all computation power in the Erlang Emulator ans make +%% other processes starve. Hopefully this is a temporary solution. + +start_regulator() -> + case mnesia_monitor:get_env(dump_log_load_regulation) of + false -> + nopid; + true -> + N = ?REGULATOR_NAME, + case mnesia_monitor:start_proc(N, ?MODULE, regulator_init, [self()]) of + {ok, Pid} -> + Pid; + {error, Reason} -> + fatal("Failed to start ~n: ~p~n", [N, Reason]) + end + end. + +regulator_init(Parent) -> + %% No need for trapping exits. + %% Using low priority causes the regulation + process_flag(priority, low), + register(?REGULATOR_NAME, self()), + proc_lib:init_ack(Parent, {ok, self()}), + regulator_loop(). + +regulator_loop() -> + receive + {regulate, From} -> + From ! {regulated, self()}, + regulator_loop(); + {stop, From} -> + From ! {stopped, self()}, + exit(normal) + end. + +regulate(nopid) -> + ok; +regulate(RegulatorPid) -> + RegulatorPid ! {regulate, self()}, + receive + {regulated, RegulatorPid} -> ok + end. + +val(Var) -> + case ?catch_val(Var) of + {'EXIT', Reason} -> mnesia_lib:other_val(Var, Reason); + Value -> Value + end. diff --git a/lib/mnesia/src/mnesia_event.erl b/lib/mnesia/src/mnesia_event.erl new file mode 100644 index 0000000000..ec6b99ecaa --- /dev/null +++ b/lib/mnesia/src/mnesia_event.erl @@ -0,0 +1,260 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 1997-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +%% +-module(mnesia_event). + +-behaviour(gen_event). +%-behaviour(mnesia_event). + +%% gen_event callback interface +-export([init/1, + handle_event/2, + handle_call/2, + handle_info/2, + terminate/2, + code_change/3]). + +-record(state, {nodes = [], + dumped_core = false, %% only dump fatal core once + args}). + +%%%---------------------------------------------------------------- +%%% Callback functions from gen_server +%%%---------------------------------------------------------------- + +%%----------------------------------------------------------------- +%% init(Args) -> +%% {ok, State} | Error +%%----------------------------------------------------------------- + +init(Args) -> + {ok, #state{args = Args}}. + +%%----------------------------------------------------------------- +%% handle_event(Event, State) -> +%% {ok, NewState} | remove_handler | +%% {swap_handler, Args1, State1, Mod2, Args2} +%%----------------------------------------------------------------- + +handle_event(Event, State) -> + handle_any_event(Event, State). + +%%----------------------------------------------------------------- +%% handle_info(Msg, State) -> +%% {ok, NewState} | remove_handler | +%% {swap_handler, Args1, State1, Mod2, Args2} +%%----------------------------------------------------------------- + +handle_info(Msg, State) -> + handle_any_event(Msg, State), + {ok, State}. + +%%----------------------------------------------------------------- +%% handle_call(Event, State) -> +%% {ok, Reply, NewState} | {remove_handler, Reply} | +%% {swap_handler, Reply, Args1, State1, Mod2, Args2} +%%----------------------------------------------------------------- + +handle_call(Msg, State) -> + Reply = ok, + {ok, NewState} = handle_any_event(Msg, State), + {ok, Reply, NewState}. + +%%----------------------------------------------------------------- +%% terminate(Reason, State) -> +%% AnyVal +%%----------------------------------------------------------------- + +terminate(_Reason, _State) -> + ok. + +%%---------------------------------------------------------------------- +%% Func: code_change/3 +%% Purpose: Upgrade process when its code is to be changed +%% Returns: {ok, NewState} +%%---------------------------------------------------------------------- +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + +%%----------------------------------------------------------------- +%% Internal functions +%%----------------------------------------------------------------- + +handle_any_event({mnesia_system_event, Event}, State) -> + handle_system_event(Event, State); +handle_any_event({mnesia_table_event, Event}, State) -> + handle_table_event(Event, State); +handle_any_event(Msg, State) -> + report_error("~p got unexpected event: ~p~n", [?MODULE, Msg]), + {ok, State}. + +handle_table_event({Oper, Record, TransId}, State) -> + report_info("~p performed by ~p on record:~n\t~p~n", + [Oper, TransId, Record]), + {ok, State}. + +handle_system_event({mnesia_checkpoint_activated, _Checkpoint}, State) -> + {ok, State}; + +handle_system_event({mnesia_checkpoint_deactivated, _Checkpoint}, State) -> + {ok, State}; + +handle_system_event({mnesia_up, Node}, State) -> + Nodes = [Node | State#state.nodes], + {ok, State#state{nodes = Nodes}}; + +handle_system_event({mnesia_down, Node}, State) -> + case mnesia:system_info(fallback_activated) of + true -> + case mnesia_monitor:get_env(fallback_error_function) of + {mnesia, lkill} -> + Msg = "A fallback is installed and Mnesia " + "must be restarted. Forcing shutdown " + "after mnesia_down from ~p...~n", + report_fatal(Msg, [Node], nocore, State#state.dumped_core), + mnesia:lkill(), + exit(fatal); + {UserMod, UserFunc} -> + Msg = "Warning: A fallback is installed and Mnesia got mnesia_down " + "from ~p. ~n", + report_info(Msg, [Node]), + case catch apply(UserMod, UserFunc, [Node]) of + {'EXIT', {undef, _Reason}} -> + %% Backward compatibility + apply(UserMod, UserFunc, []); + {'EXIT', Reason} -> + exit(Reason); + _ -> + ok + end, + Nodes = lists:delete(Node, State#state.nodes), + {ok, State#state{nodes = Nodes}} + end; + false -> + Nodes = lists:delete(Node, State#state.nodes), + {ok, State#state{nodes = Nodes}} + end; + +handle_system_event({mnesia_overload, Details}, State) -> + report_warning("Mnesia is overloaded: ~p~n", [Details]), + {ok, State}; + +handle_system_event({mnesia_info, Format, Args}, State) -> + report_info(Format, Args), + {ok, State}; + +handle_system_event({mnesia_warning, Format, Args}, State) -> + report_warning(Format, Args), + {ok, State}; + +handle_system_event({mnesia_error, Format, Args}, State) -> + report_error(Format, Args), + {ok, State}; + +handle_system_event({mnesia_fatal, Format, Args, BinaryCore}, State) -> + report_fatal(Format, Args, BinaryCore, State#state.dumped_core), + {ok, State#state{dumped_core = true}}; + +handle_system_event({inconsistent_database, Reason, Node}, State) -> + report_error("mnesia_event got {inconsistent_database, ~w, ~w}~n", + [Reason, Node]), + {ok, State}; + +handle_system_event({mnesia_user, Event}, State) -> + report_info("User event: ~p~n", [Event]), + {ok, State}; + +handle_system_event(Msg, State) -> + report_error("mnesia_event got unexpected system event: ~p~n", [Msg]), + {ok, State}. + +report_info(Format0, Args0) -> + Format = "Mnesia(~p): " ++ Format0, + Args = [node() | Args0], + case global:whereis_name(mnesia_global_logger) of + undefined -> + io:format(Format, Args); + Pid -> + io:format(Pid, Format, Args) + end. + +report_warning(Format0, Args0) -> + Format = "Mnesia(~p): ** WARNING ** " ++ Format0, + Args = [node() | Args0], + case erlang:function_exported(error_logger, warning_msg, 2) of + true -> + error_logger:warning_msg(Format, Args); + false -> + error_logger:format(Format, Args) + end, + case global:whereis_name(mnesia_global_logger) of + undefined -> + ok; + Pid -> + io:format(Pid, Format, Args) + end. + +report_error(Format0, Args0) -> + Format = "Mnesia(~p): ** ERROR ** " ++ Format0, + Args = [node() | Args0], + error_logger:format(Format, Args), + case global:whereis_name(mnesia_global_logger) of + undefined -> + ok; + Pid -> + io:format(Pid, Format, Args) + end. + +report_fatal(Format, Args, BinaryCore, CoreDumped) -> + UseDir = mnesia_monitor:use_dir(), + CoreDir = mnesia_monitor:get_env(core_dir), + if + is_list(CoreDir),CoreDumped == false, is_binary(BinaryCore) -> + core_file(CoreDir,BinaryCore,Format,Args); + (UseDir == true),CoreDumped == false, is_binary(BinaryCore) -> + core_file(CoreDir,BinaryCore,Format,Args); + true -> + report_error("(ignoring core) ** FATAL ** " ++ Format, Args) + end. + +core_file(CoreDir,BinaryCore,Format,Args) -> + %% Integers = tuple_to_list(date()) ++ tuple_to_list(time()), + Integers = tuple_to_list(now()), + Fun = fun(I) when I < 10 -> ["_0",I]; + (I) -> ["_",I] + end, + List = lists:append([Fun(I) || I <- Integers]), + CoreFile = if is_list(CoreDir) -> + filename:absname(lists:concat(["MnesiaCore.", node()] ++ List), + CoreDir); + true -> + filename:absname(lists:concat(["MnesiaCore.", node()] ++ List)) + end, + case file:write_file(CoreFile, BinaryCore) of + ok -> + report_error("(core dumped to file: ~p)~n ** FATAL ** " ++ Format, + [CoreFile] ++ Args); + {error, Reason} -> + report_error("(could not write core file: ~p)~n ** FATAL ** " ++ Format, + [Reason] ++ Args) + end. + + + diff --git a/lib/mnesia/src/mnesia_frag.erl b/lib/mnesia/src/mnesia_frag.erl new file mode 100644 index 0000000000..a2958ab461 --- /dev/null +++ b/lib/mnesia/src/mnesia_frag.erl @@ -0,0 +1,1361 @@ +%%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 1998-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +%%% +%%%---------------------------------------------------------------------- +%%% Purpose : Support tables so large that they need +%%% to be divided into several fragments. +%%%---------------------------------------------------------------------- + +%header_doc_include + +-module(mnesia_frag). + +%% Callback functions when accessed within an activity +-export([ + lock/4, + write/5, delete/5, delete_object/5, + read/5, match_object/5, all_keys/4, + select/5,select/6,select_cont/3, + index_match_object/6, index_read/6, + foldl/6, foldr/6, table_info/4, + first/3, next/4, prev/4, last/3, + clear_table/4 + ]). + +%header_doc_include + +%% -behaviour(mnesia_access). + +-export([ + change_table_frag/2, + remove_node/2, + expand_cstruct/1, + lookup_frag_hash/1, + lookup_foreigners/1, + frag_names/1, + set_frag_hash/2, + local_select/4, + remote_select/4 + ]). + +-include("mnesia.hrl"). + +-define(OLD_HASH_MOD, mnesia_frag_old_hash). +-define(DEFAULT_HASH_MOD, mnesia_frag_hash). +%%-define(DEFAULT_HASH_MOD, ?OLD_HASH_MOD). %% BUGBUG: New should be default + +-record(frag_state, + {foreign_key, + n_fragments, + hash_module, + hash_state}). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Access functions + +%impl_doc_include + +%% Callback functions which provides transparent +%% access of fragmented tables from any activity +%% access context. + +lock(ActivityId, Opaque, {table , Tab}, LockKind) -> + case frag_names(Tab) of + [Tab] -> + mnesia:lock(ActivityId, Opaque, {table, Tab}, LockKind); + Frags -> + DeepNs = [mnesia:lock(ActivityId, Opaque, {table, F}, LockKind) || + F <- Frags], + mnesia_lib:uniq(lists:append(DeepNs)) + end; + +lock(ActivityId, Opaque, LockItem, LockKind) -> + mnesia:lock(ActivityId, Opaque, LockItem, LockKind). + +write(ActivityId, Opaque, Tab, Rec, LockKind) -> + Frag = record_to_frag_name(Tab, Rec), + mnesia:write(ActivityId, Opaque, Frag, Rec, LockKind). + +delete(ActivityId, Opaque, Tab, Key, LockKind) -> + Frag = key_to_frag_name(Tab, Key), + mnesia:delete(ActivityId, Opaque, Frag, Key, LockKind). + +delete_object(ActivityId, Opaque, Tab, Rec, LockKind) -> + Frag = record_to_frag_name(Tab, Rec), + mnesia:delete_object(ActivityId, Opaque, Frag, Rec, LockKind). + +read(ActivityId, Opaque, Tab, Key, LockKind) -> + Frag = key_to_frag_name(Tab, Key), + mnesia:read(ActivityId, Opaque, Frag, Key, LockKind). + +match_object(ActivityId, Opaque, Tab, HeadPat, LockKind) -> + MatchSpec = [{HeadPat, [], ['$_']}], + select(ActivityId, Opaque, Tab, MatchSpec, LockKind). + +select(ActivityId, Opaque, Tab, MatchSpec, LockKind) -> + do_select(ActivityId, Opaque, Tab, MatchSpec, LockKind). + + +select(ActivityId, Opaque, Tab, MatchSpec, Limit, LockKind) -> + init_select(ActivityId, Opaque, Tab, MatchSpec, Limit, LockKind). + + +all_keys(ActivityId, Opaque, Tab, LockKind) -> + Match = [mnesia:all_keys(ActivityId, Opaque, Frag, LockKind) + || Frag <- frag_names(Tab)], + lists:append(Match). + +clear_table(ActivityId, Opaque, Tab, Obj) -> + [mnesia:clear_table(ActivityId, Opaque, Frag, Obj) || Frag <- frag_names(Tab)], + ok. + +index_match_object(ActivityId, Opaque, Tab, Pat, Attr, LockKind) -> + Match = + [mnesia:index_match_object(ActivityId, Opaque, Frag, Pat, Attr, LockKind) + || Frag <- frag_names(Tab)], + lists:append(Match). + +index_read(ActivityId, Opaque, Tab, Key, Attr, LockKind) -> + Match = + [mnesia:index_read(ActivityId, Opaque, Frag, Key, Attr, LockKind) + || Frag <- frag_names(Tab)], + lists:append(Match). + +foldl(ActivityId, Opaque, Fun, Acc, Tab, LockKind) -> + Fun2 = fun(Frag, A) -> + mnesia:foldl(ActivityId, Opaque, Fun, A, Frag, LockKind) + end, + lists:foldl(Fun2, Acc, frag_names(Tab)). + +foldr(ActivityId, Opaque, Fun, Acc, Tab, LockKind) -> + Fun2 = fun(Frag, A) -> + mnesia:foldr(ActivityId, Opaque, Fun, A, Frag, LockKind) + end, + lists:foldr(Fun2, Acc, frag_names(Tab)). + +table_info(ActivityId, Opaque, {Tab, Key}, Item) -> + Frag = key_to_frag_name(Tab, Key), + table_info2(ActivityId, Opaque, Tab, Frag, Item); +table_info(ActivityId, Opaque, Tab, Item) -> + table_info2(ActivityId, Opaque, Tab, Tab, Item). + +table_info2(ActivityId, Opaque, Tab, Frag, Item) -> + case Item of + size -> + SumFun = fun({_, Size}, Acc) -> Acc + Size end, + lists:foldl(SumFun, 0, frag_size(ActivityId, Opaque, Tab)); + memory -> + SumFun = fun({_, Size}, Acc) -> Acc + Size end, + lists:foldl(SumFun, 0, frag_memory(ActivityId, Opaque, Tab)); + base_table -> + lookup_prop(Tab, base_table); + node_pool -> + lookup_prop(Tab, node_pool); + n_fragments -> + FH = lookup_frag_hash(Tab), + FH#frag_state.n_fragments; + foreign_key -> + FH = lookup_frag_hash(Tab), + FH#frag_state.foreign_key; + foreigners -> + lookup_foreigners(Tab); + n_ram_copies -> + length(val({Tab, ram_copies})); + n_disc_copies -> + length(val({Tab, disc_copies})); + n_disc_only_copies -> + length(val({Tab, disc_only_copies})); + + frag_names -> + frag_names(Tab); + frag_dist -> + frag_dist(Tab); + frag_size -> + frag_size(ActivityId, Opaque, Tab); + frag_memory -> + frag_memory(ActivityId, Opaque, Tab); + _ -> + mnesia:table_info(ActivityId, Opaque, Frag, Item) + end. + +first(ActivityId, Opaque, Tab) -> + case ?catch_val({Tab, frag_hash}) of + {'EXIT', _} -> + mnesia:first(ActivityId, Opaque, Tab); + FH -> + FirstFrag = Tab, + case mnesia:first(ActivityId, Opaque, FirstFrag) of + '$end_of_table' -> + search_first(ActivityId, Opaque, Tab, 1, FH); + Next -> + Next + end + end. + +search_first(ActivityId, Opaque, Tab, N, FH) when N =< FH#frag_state.n_fragments -> + NextN = N + 1, + NextFrag = n_to_frag_name(Tab, NextN), + case mnesia:first(ActivityId, Opaque, NextFrag) of + '$end_of_table' -> + search_first(ActivityId, Opaque, Tab, NextN, FH); + Next -> + Next + end; +search_first(_ActivityId, _Opaque, _Tab, _N, _FH) -> + '$end_of_table'. + +last(ActivityId, Opaque, Tab) -> + case ?catch_val({Tab, frag_hash}) of + {'EXIT', _} -> + mnesia:last(ActivityId, Opaque, Tab); + FH -> + LastN = FH#frag_state.n_fragments, + search_last(ActivityId, Opaque, Tab, LastN, FH) + end. + +search_last(ActivityId, Opaque, Tab, N, FH) when N >= 1 -> + Frag = n_to_frag_name(Tab, N), + case mnesia:last(ActivityId, Opaque, Frag) of + '$end_of_table' -> + PrevN = N - 1, + search_last(ActivityId, Opaque, Tab, PrevN, FH); + Prev -> + Prev + end; +search_last(_ActivityId, _Opaque, _Tab, _N, _FH) -> + '$end_of_table'. + +prev(ActivityId, Opaque, Tab, Key) -> + case ?catch_val({Tab, frag_hash}) of + {'EXIT', _} -> + mnesia:prev(ActivityId, Opaque, Tab, Key); + FH -> + N = key_to_n(FH, Key), + Frag = n_to_frag_name(Tab, N), + case mnesia:prev(ActivityId, Opaque, Frag, Key) of + '$end_of_table' -> + search_prev(ActivityId, Opaque, Tab, N); + Prev -> + Prev + end + end. + +search_prev(ActivityId, Opaque, Tab, N) when N > 1 -> + PrevN = N - 1, + PrevFrag = n_to_frag_name(Tab, PrevN), + case mnesia:last(ActivityId, Opaque, PrevFrag) of + '$end_of_table' -> + search_prev(ActivityId, Opaque, Tab, PrevN); + Prev -> + Prev + end; +search_prev(_ActivityId, _Opaque, _Tab, _N) -> + '$end_of_table'. + +next(ActivityId, Opaque, Tab, Key) -> + case ?catch_val({Tab, frag_hash}) of + {'EXIT', _} -> + mnesia:next(ActivityId, Opaque, Tab, Key); + FH -> + N = key_to_n(FH, Key), + Frag = n_to_frag_name(Tab, N), + case mnesia:next(ActivityId, Opaque, Frag, Key) of + '$end_of_table' -> + search_next(ActivityId, Opaque, Tab, N, FH); + Prev -> + Prev + end + end. + +search_next(ActivityId, Opaque, Tab, N, FH) when N < FH#frag_state.n_fragments -> + NextN = N + 1, + NextFrag = n_to_frag_name(Tab, NextN), + case mnesia:first(ActivityId, Opaque, NextFrag) of + '$end_of_table' -> + search_next(ActivityId, Opaque, Tab, NextN, FH); + Next -> + Next + end; +search_next(_ActivityId, _Opaque, _Tab, _N, _FH) -> + '$end_of_table'. + +%impl_doc_include + +frag_size(ActivityId, Opaque, Tab) -> + [{F, remote_table_info(ActivityId, Opaque, F, size)} || F <- frag_names(Tab)]. + +frag_memory(ActivityId, Opaque, Tab) -> + [{F, remote_table_info(ActivityId, Opaque, F, memory)} || F <- frag_names(Tab)]. + +remote_table_info(ActivityId, Opaque, Tab, Item) -> + N = val({Tab, where_to_read}), + case rpc:call(N, mnesia, table_info, [ActivityId, Opaque, Tab, Item]) of + {badrpc, _} -> + mnesia:abort({no_exists, Tab, Item}); + Info -> + Info + end. + +init_select(Tid,Opaque,Tab,Pat,Limit,LockKind) -> + case ?catch_val({Tab, frag_hash}) of + {'EXIT', _} -> + mnesia:select(Tid, Opaque, Tab, Pat, Limit,LockKind); + FH -> + FragNumbers = verify_numbers(FH,Pat), + Fun = fun(Num) -> + Name = n_to_frag_name(Tab, Num), + Node = val({Name, where_to_read}), + Storage = mnesia_lib:storage_type_at_node(Node, Name), + mnesia:lock(Tid, Opaque, {table, Name}, LockKind), + {Name, Node, Storage} + end, + [{FTab,Node,Type}|NameNodes] = lists:map(Fun, FragNumbers), + InitFun = fun(FixedSpec) -> mnesia:dirty_sel_init(Node,FTab,FixedSpec,Limit,Type) end, + Res = mnesia:fun_select(Tid,Opaque,FTab,Pat,LockKind,FTab,InitFun,Limit,Node,Type), + frag_sel_cont(Res, NameNodes, {Pat,LockKind,Limit}) + end. + +select_cont(_Tid,_,{frag_cont, '$end_of_table', [],_}) -> '$end_of_table'; +select_cont(Tid,Ts,{frag_cont, '$end_of_table', [{Tab,Node,Type}|Rest],Args}) -> + {Spec,LockKind,Limit} = Args, + InitFun = fun(FixedSpec) -> mnesia:dirty_sel_init(Node,Tab,FixedSpec,Limit,Type) end, + Res = mnesia:fun_select(Tid,Ts,Tab,Spec,LockKind,Tab,InitFun,Limit,Node,Type), + frag_sel_cont(Res, Rest, Args); +select_cont(Tid,Ts,{frag_cont, Cont, TabL, Args}) -> + frag_sel_cont(mnesia:select_cont(Tid,Ts,Cont),TabL,Args); +select_cont(Tid,Ts,Else) -> %% Not a fragmented table + mnesia:select_cont(Tid,Ts,Else). + +frag_sel_cont('$end_of_table', [],_) -> + '$end_of_table'; +frag_sel_cont('$end_of_table', TabL,Args) -> + {[], {frag_cont, '$end_of_table', TabL,Args}}; +frag_sel_cont({Recs,Cont}, TabL,Args) -> + {Recs, {frag_cont, Cont, TabL,Args}}. + +do_select(ActivityId, Opaque, Tab, MatchSpec, LockKind) -> + case ?catch_val({Tab, frag_hash}) of + {'EXIT', _} -> + mnesia:select(ActivityId, Opaque, Tab, MatchSpec, LockKind); + FH -> + FragNumbers = verify_numbers(FH,MatchSpec), + Fun = fun(Num) -> + Name = n_to_frag_name(Tab, Num), + Node = val({Name, where_to_read}), + mnesia:lock(ActivityId, Opaque, {table, Name}, LockKind), + {Name, Node} + end, + NameNodes = lists:map(Fun, FragNumbers), + SelectAllFun = + fun(PatchedMatchSpec) -> + Match = [mnesia:dirty_select(Name, PatchedMatchSpec) + || {Name, _Node} <- NameNodes], + lists:append(Match) + end, + case [{Name, Node} || {Name, Node} <- NameNodes, Node /= node()] of + [] -> + %% All fragments are local + mnesia:fun_select(ActivityId, Opaque, Tab, MatchSpec, none, '_', SelectAllFun); + RemoteNameNodes -> + Type = val({Tab,setorbag}), + SelectFun = + fun(PatchedMatchSpec) -> + Ref = make_ref(), + Args = [self(), Ref, RemoteNameNodes, PatchedMatchSpec], + Pid = spawn_link(?MODULE, local_select, Args), + LocalMatch0 = [mnesia:dirty_select(Name, PatchedMatchSpec) + || {Name, Node} <- NameNodes, Node == node()], + LocalMatch = case Type of + ordered_set -> lists:merge(LocalMatch0); + _ -> lists:append(LocalMatch0) + end, + OldSelectFun = fun() -> SelectAllFun(PatchedMatchSpec) end, + local_collect(Ref, Pid, Type, LocalMatch, OldSelectFun) + end, + mnesia:fun_select(ActivityId, Opaque, Tab, MatchSpec, none, '_', SelectFun) + end + end. + +verify_numbers(FH,MatchSpec) -> + HashState = FH#frag_state.hash_state, + FragNumbers = + case FH#frag_state.hash_module of + HashMod when HashMod == ?DEFAULT_HASH_MOD -> + ?DEFAULT_HASH_MOD:match_spec_to_frag_numbers(HashState, MatchSpec); + HashMod -> + HashMod:match_spec_to_frag_numbers(HashState, MatchSpec) + end, + N = FH#frag_state.n_fragments, + VerifyFun = fun(F) when is_integer(F), F >= 1, F =< N -> false; + (_F) -> true + end, + case catch lists:filter(VerifyFun, FragNumbers) of + [] -> + FragNumbers; + BadFrags -> + mnesia:abort({"match_spec_to_frag_numbers: Fragment numbers out of range", + BadFrags, {range, 1, N}}) + end. + +local_select(ReplyTo, Ref, RemoteNameNodes, MatchSpec) -> + RemoteNodes = mnesia_lib:uniq([Node || {_Name, Node} <- RemoteNameNodes]), + Args = [ReplyTo, Ref, RemoteNameNodes, MatchSpec], + {Replies, BadNodes} = rpc:multicall(RemoteNodes, ?MODULE, remote_select, Args), + case mnesia_lib:uniq(Replies) -- [ok] of + [] when BadNodes == [] -> + ReplyTo ! {local_select, Ref, ok}; + _ when BadNodes /= [] -> + ReplyTo ! {local_select, Ref, {error, {node_not_running, hd(BadNodes)}}}; + [{badrpc, {'EXIT', Reason}} | _] -> + ReplyTo ! {local_select, Ref, {error, Reason}}; + [Reason | _] -> + ReplyTo ! {local_select, Ref, {error, Reason}} + end, + unlink(ReplyTo), + exit(normal). + +remote_select(ReplyTo, Ref, NameNodes, MatchSpec) -> + do_remote_select(ReplyTo, Ref, NameNodes, MatchSpec). + +do_remote_select(ReplyTo, Ref, [{Name, Node} | NameNodes], MatchSpec) -> + if + Node == node() -> + Res = (catch {ok, mnesia:dirty_select(Name, MatchSpec)}), + ReplyTo ! {remote_select, Ref, Node, Res}, + do_remote_select(ReplyTo, Ref, NameNodes, MatchSpec); + true -> + do_remote_select(ReplyTo, Ref, NameNodes, MatchSpec) + end; +do_remote_select(_ReplyTo, _Ref, [], _MatchSpec) -> + ok. + +local_collect(Ref, Pid, Type, LocalMatch, OldSelectFun) -> + receive + {local_select, Ref, LocalRes} -> + remote_collect(Ref, Type, LocalRes, LocalMatch, OldSelectFun); + {'EXIT', Pid, Reason} -> + remote_collect(Ref, Type, {error, Reason}, [], OldSelectFun) + end. + +remote_collect(Ref, Type, LocalRes = ok, Acc, OldSelectFun) -> + receive + {remote_select, Ref, Node, RemoteRes} -> + case RemoteRes of + {ok, RemoteMatch} -> + Matches = case Type of + ordered_set -> lists:merge(RemoteMatch, Acc); + _ -> RemoteMatch ++ Acc + end, + remote_collect(Ref, Type, LocalRes, Matches, OldSelectFun); + _ -> + remote_collect(Ref, Type, {error, {node_not_running, Node}}, [], OldSelectFun) + end + after 0 -> + Acc + end; +remote_collect(Ref, Type, LocalRes = {error, Reason}, _Acc, OldSelectFun) -> + receive + {remote_select, Ref, _Node, _RemoteRes} -> + remote_collect(Ref, Type, LocalRes, [], OldSelectFun) + after 0 -> + mnesia:abort(Reason) + end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Returns a list of cstructs + +expand_cstruct(Cs) -> + expand_cstruct(Cs, create). + +expand_cstruct(Cs, Mode) -> + Tab = Cs#cstruct.name, + Props = Cs#cstruct.frag_properties, + mnesia_schema:verify({alt, [nil, list]}, mnesia_lib:etype(Props), + {badarg, Tab, Props}), + %% Verify keys + ValidKeys = [foreign_key, n_fragments, node_pool, + n_ram_copies, n_disc_copies, n_disc_only_copies, + hash_module, hash_state], + Keys = mnesia_schema:check_keys(Tab, Props, ValidKeys), + mnesia_schema:check_duplicates(Tab, Keys), + + %% Pick fragmentation props + ForeignKey = mnesia_schema:pick(Tab, foreign_key, Props, undefined), + {ForeignKey2, N, Pool, DefaultNR, DefaultND, DefaultNDO} = + pick_props(Tab, Cs, ForeignKey), + + %% Verify node_pool + BadPool = {bad_type, Tab, {node_pool, Pool}}, + mnesia_schema:verify(list, mnesia_lib:etype(Pool), BadPool), + NotAtom = fun(A) when is_atom(A) -> false; + (_A) -> true + end, + mnesia_schema:verify([], [P || P <- Pool, NotAtom(P)], BadPool), + + NR = mnesia_schema:pick(Tab, n_ram_copies, Props, 0), + ND = mnesia_schema:pick(Tab, n_disc_copies, Props, 0), + NDO = mnesia_schema:pick(Tab, n_disc_only_copies, Props, 0), + + PosInt = fun(I) when is_integer(I), I >= 0 -> true; + (_I) -> false + end, + mnesia_schema:verify(true, PosInt(NR), + {bad_type, Tab, {n_ram_copies, NR}}), + mnesia_schema:verify(true, PosInt(ND), + {bad_type, Tab, {n_disc_copies, ND}}), + mnesia_schema:verify(true, PosInt(NDO), + {bad_type, Tab, {n_disc_only_copies, NDO}}), + + %% Verify n_fragments + Cs2 = verify_n_fragments(N, Cs, Mode), + + %% Verify hash callback + HashMod = mnesia_schema:pick(Tab, hash_module, Props, ?DEFAULT_HASH_MOD), + HashState = mnesia_schema:pick(Tab, hash_state, Props, undefined), + HashState2 = HashMod:init_state(Tab, HashState), %% BUGBUG: Catch? + + FH = #frag_state{foreign_key = ForeignKey2, + n_fragments = 1, + hash_module = HashMod, + hash_state = HashState2}, + if + NR == 0, ND == 0, NDO == 0 -> + do_expand_cstruct(Cs2, FH, N, Pool, DefaultNR, DefaultND, DefaultNDO, Mode); + true -> + do_expand_cstruct(Cs2, FH, N, Pool, NR, ND, NDO, Mode) + end. + +do_expand_cstruct(Cs, FH, N, Pool, NR, ND, NDO, Mode) -> + Tab = Cs#cstruct.name, + + LC = Cs#cstruct.local_content, + mnesia_schema:verify(false, LC, + {combine_error, Tab, {local_content, LC}}), + + Snmp = Cs#cstruct.snmp, + mnesia_schema:verify([], Snmp, + {combine_error, Tab, {snmp, Snmp}}), + + %% Add empty fragments + CommonProps = [{base_table, Tab}], + Cs2 = Cs#cstruct{frag_properties = lists:sort(CommonProps)}, + expand_frag_cstructs(N, NR, ND, NDO, Cs2, Pool, Pool, FH, Mode). + +verify_n_fragments(N, Cs, Mode) when is_integer(N), N >= 1 -> + case Mode of + create -> + Cs#cstruct{ram_copies = [], + disc_copies = [], + disc_only_copies = []}; + activate -> + Reason = {combine_error, Cs#cstruct.name, {n_fragments, N}}, + mnesia_schema:verify(1, N, Reason), + Cs + end; +verify_n_fragments(N, Cs, _Mode) -> + mnesia:abort({bad_type, Cs#cstruct.name, {n_fragments, N}}). + +pick_props(Tab, Cs, {ForeignTab, Attr}) -> + mnesia_schema:verify(true, ForeignTab /= Tab, + {combine_error, Tab, {ForeignTab, Attr}}), + Props = Cs#cstruct.frag_properties, + Attrs = Cs#cstruct.attributes, + + ForeignKey = lookup_prop(ForeignTab, foreign_key), + ForeignN = lookup_prop(ForeignTab, n_fragments), + ForeignPool = lookup_prop(ForeignTab, node_pool), + N = mnesia_schema:pick(Tab, n_fragments, Props, ForeignN), + Pool = mnesia_schema:pick(Tab, node_pool, Props, ForeignPool), + + mnesia_schema:verify(ForeignN, N, + {combine_error, Tab, {n_fragments, N}, + ForeignTab, {n_fragments, ForeignN}}), + + mnesia_schema:verify(ForeignPool, Pool, + {combine_error, Tab, {node_pool, Pool}, + ForeignTab, {node_pool, ForeignPool}}), + + mnesia_schema:verify(undefined, ForeignKey, + {combine_error, Tab, + "Multiple levels of foreign_key dependencies", + {ForeignTab, Attr}, ForeignKey}), + + Key = {ForeignTab, mnesia_schema:attr_to_pos(Attr, Attrs)}, + DefaultNR = length(val({ForeignTab, ram_copies})), + DefaultND = length(val({ForeignTab, disc_copies})), + DefaultNDO = length(val({ForeignTab, disc_only_copies})), + {Key, N, Pool, DefaultNR, DefaultND, DefaultNDO}; +pick_props(Tab, Cs, undefined) -> + Props = Cs#cstruct.frag_properties, + DefaultN = 1, + DefaultPool = mnesia:system_info(db_nodes), + N = mnesia_schema:pick(Tab, n_fragments, Props, DefaultN), + Pool = mnesia_schema:pick(Tab, node_pool, Props, DefaultPool), + DefaultNR = 1, + DefaultND = 0, + DefaultNDO = 0, + {undefined, N, Pool, DefaultNR, DefaultND, DefaultNDO}; +pick_props(Tab, _Cs, BadKey) -> + mnesia:abort({bad_type, Tab, {foreign_key, BadKey}}). + +expand_frag_cstructs(N, NR, ND, NDO, CommonCs, Dist, Pool, FH, Mode) + when N > 1, Mode == create -> + Frag = n_to_frag_name(CommonCs#cstruct.name, N), + Cs = CommonCs#cstruct{name = Frag}, + {Cs2, RevModDist, RestDist} = set_frag_nodes(NR, ND, NDO, Cs, Dist, []), + ModDist = lists:reverse(RevModDist), + Dist2 = rearrange_dist(Cs, ModDist, RestDist, Pool), + %% Adjusts backwards, but it doesn't matter. + {FH2, _FromFrags, _AdditionalWriteFrags} = adjust_before_split(FH), + CsList = expand_frag_cstructs(N - 1, NR, ND, NDO, CommonCs, Dist2, Pool, FH2, Mode), + [Cs2 | CsList]; +expand_frag_cstructs(1, NR, ND, NDO, CommonCs, Dist, Pool, FH, Mode) -> + BaseProps = CommonCs#cstruct.frag_properties ++ + [{foreign_key, FH#frag_state.foreign_key}, + {hash_module, FH#frag_state.hash_module}, + {hash_state, FH#frag_state.hash_state}, + {n_fragments, FH#frag_state.n_fragments}, + {node_pool, Pool} + ], + BaseCs = CommonCs#cstruct{frag_properties = lists:sort(BaseProps)}, + case Mode of + activate -> + [BaseCs]; + create -> + {BaseCs2, _, _} = set_frag_nodes(NR, ND, NDO, BaseCs, Dist, []), + [BaseCs2] + end. + +set_frag_nodes(NR, ND, NDO, Cs, [Head | Tail], Acc) when NR > 0 -> + Pos = #cstruct.ram_copies, + {Cs2, Head2} = set_frag_node(Cs, Pos, Head), + set_frag_nodes(NR - 1, ND, NDO, Cs2, Tail, [Head2 | Acc]); +set_frag_nodes(NR, ND, NDO, Cs, [Head | Tail], Acc) when ND > 0 -> + Pos = #cstruct.disc_copies, + {Cs2, Head2} = set_frag_node(Cs, Pos, Head), + set_frag_nodes(NR, ND - 1, NDO, Cs2, Tail, [Head2 | Acc]); +set_frag_nodes(NR, ND, NDO, Cs, [Head | Tail], Acc) when NDO > 0 -> + Pos = #cstruct.disc_only_copies, + {Cs2, Head2} = set_frag_node(Cs, Pos, Head), + set_frag_nodes(NR, ND, NDO - 1, Cs2, Tail, [Head2 | Acc]); +set_frag_nodes(0, 0, 0, Cs, RestDist, ModDist) -> + {Cs, ModDist, RestDist}; +set_frag_nodes(_, _, _, Cs, [], _) -> + mnesia:abort({combine_error, Cs#cstruct.name, "Too few nodes in node_pool"}). + +set_frag_node(Cs, Pos, Head) -> + Ns = element(Pos, Cs), + {Node, Count2} = + case Head of + {N, Count} when is_atom(N), is_integer(Count), Count >= 0 -> + {N, Count + 1}; + N when is_atom(N) -> + {N, 1}; + BadNode -> + mnesia:abort({bad_type, Cs#cstruct.name, BadNode}) + end, + mnesia_schema:verify(true, + lists:member(Node, val({current,db_nodes})), + {not_active, Cs#cstruct.name, Node}), + Cs2 = setelement(Pos, Cs, [Node | Ns]), + {Cs2, {Node, Count2}}. + +rearrange_dist(Cs, [{Node, Count} | ModDist], Dist, Pool) -> + Dist2 = insert_dist(Cs, Node, Count, Dist, Pool), + rearrange_dist(Cs, ModDist, Dist2, Pool); +rearrange_dist(_Cs, [], Dist, _) -> + Dist. + +insert_dist(Cs, Node, Count, [Head | Tail], Pool) -> + case Head of + {Node2, Count2} when is_atom(Node2), is_integer(Count2), Count2 >= 0 -> + case node_diff(Node, Count, Node2, Count2, Pool) of + less -> + [{Node, Count}, Head | Tail]; + greater -> + [Head | insert_dist(Cs, Node, Count, Tail, Pool)] + end; + Node2 when is_atom(Node2) -> + insert_dist(Cs, Node, Count, [{Node2, 0} | Tail], Pool); + BadNode -> + mnesia:abort({bad_type, Cs#cstruct.name, BadNode}) + end; +insert_dist(_Cs, Node, Count, [], _Pool) -> + [{Node, Count}]; +insert_dist(_Cs, _Node, _Count, Dist, _Pool) -> + mnesia:abort({bad_type, Dist}). + +node_diff(_Node, Count, _Node2, Count2, _Pool) when Count < Count2 -> + less; +node_diff(Node, Count, Node2, Count2, Pool) when Count == Count2 -> + Pos = list_pos(Node, Pool, 1), + Pos2 = list_pos(Node2, Pool, 1), + if + Pos < Pos2 -> + less; + Pos > Pos2 -> + greater + end; +node_diff(_Node, Count, _Node2, Count2, _Pool) when Count > Count2 -> + greater. + +%% Returns position of element in list +list_pos(H, [H | _T], Pos) -> + Pos; +list_pos(E, [_H | T], Pos) -> + list_pos(E, T, Pos + 1). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Switch function for changing of table fragmentation +%% +%% Returns a list of lists of schema ops + +change_table_frag(Tab, {activate, FragProps}) -> + make_activate(Tab, FragProps); +change_table_frag(Tab, deactivate) -> + make_deactivate(Tab); +change_table_frag(Tab, {add_frag, SortedNodes}) -> + make_multi_add_frag(Tab, SortedNodes); +change_table_frag(Tab, del_frag) -> + make_multi_del_frag(Tab); +change_table_frag(Tab, {add_node, Node}) -> + make_multi_add_node(Tab, Node); +change_table_frag(Tab, {del_node, Node}) -> + make_multi_del_node(Tab, Node); +change_table_frag(Tab, Change) -> + mnesia:abort({bad_type, Tab, Change}). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Turn a normal table into a fragmented table +%% +%% The storage type must be the same on all nodes + +make_activate(Tab, Props) -> + Cs = mnesia_schema:incr_version(val({Tab, cstruct})), + mnesia_schema:ensure_active(Cs), + case Cs#cstruct.frag_properties of + [] -> + Cs2 = Cs#cstruct{frag_properties = Props}, + [Cs3] = expand_cstruct(Cs2, activate), + TabDef = mnesia_schema:cs2list(Cs3), + Op = {op, change_table_frag, activate, TabDef}, + [[Op]]; + BadProps -> + mnesia:abort({already_exists, Tab, {frag_properties, BadProps}}) + end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Turn a table into a normal defragmented table + +make_deactivate(Tab) -> + Cs = mnesia_schema:incr_version(val({Tab, cstruct})), + mnesia_schema:ensure_active(Cs), + Foreigners = lookup_foreigners(Tab), + BaseTab = lookup_prop(Tab, base_table), + FH = lookup_frag_hash(Tab), + if + BaseTab /= Tab -> + mnesia:abort({combine_error, Tab, "Not a base table"}); + Foreigners /= [] -> + mnesia:abort({combine_error, Tab, "Too many foreigners", Foreigners}); + FH#frag_state.n_fragments > 1 -> + mnesia:abort({combine_error, Tab, "Too many fragments"}); + true -> + Cs2 = Cs#cstruct{frag_properties = []}, + TabDef = mnesia_schema:cs2list(Cs2), + Op = {op, change_table_frag, deactivate, TabDef}, + [[Op]] + end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Add a fragment to a fragmented table and fill it with half of +%% the records from one of the old fragments + +make_multi_add_frag(Tab, SortedNs) when is_list(SortedNs) -> + verify_multi(Tab), + Ops = make_add_frag(Tab, SortedNs), + + %% Propagate to foreigners + MoreOps = [make_add_frag(T, SortedNs) || T <- lookup_foreigners(Tab)], + [Ops | MoreOps]; +make_multi_add_frag(Tab, SortedNs) -> + mnesia:abort({bad_type, Tab, SortedNs}). + +verify_multi(Tab) -> + FH = lookup_frag_hash(Tab), + ForeignKey = FH#frag_state.foreign_key, + mnesia_schema:verify(undefined, ForeignKey, + {combine_error, Tab, + "Op only allowed via foreign table", + {foreign_key, ForeignKey}}). + +make_frag_names_and_acquire_locks(Tab, N, FragIndecies, DoNotLockN) -> + mnesia_schema:get_tid_ts_and_lock(Tab, write), + Fun = fun(Index, FN) -> + if + DoNotLockN == true, Index == N -> + Name = n_to_frag_name(Tab, Index), + setelement(Index, FN, Name); + true -> + Name = n_to_frag_name(Tab, Index), + mnesia_schema:get_tid_ts_and_lock(Name, write), + setelement(Index , FN, Name) + end + end, + FragNames = erlang:make_tuple(N, undefined), + lists:foldl(Fun, FragNames, FragIndecies). + +make_add_frag(Tab, SortedNs) -> + Cs = mnesia_schema:incr_version(val({Tab, cstruct})), + mnesia_schema:ensure_active(Cs), + FH = lookup_frag_hash(Tab), + {FH2, FromIndecies, WriteIndecies} = adjust_before_split(FH), + N = FH2#frag_state.n_fragments, + FragNames = make_frag_names_and_acquire_locks(Tab, N, WriteIndecies, true), + NewFrag = element(N, FragNames), + + NR = length(Cs#cstruct.ram_copies), + ND = length(Cs#cstruct.disc_copies), + NDO = length(Cs#cstruct.disc_only_copies), + NewCs = Cs#cstruct{name = NewFrag, + frag_properties = [{base_table, Tab}], + ram_copies = [], + disc_copies = [], + disc_only_copies = []}, + + {NewCs2, _, _} = set_frag_nodes(NR, ND, NDO, NewCs, SortedNs, []), + [NewOp] = mnesia_schema:make_create_table(NewCs2), + + SplitOps = split(Tab, FH2, FromIndecies, FragNames, []), + + Cs2 = replace_frag_hash(Cs, FH2), + TabDef = mnesia_schema:cs2list(Cs2), + BaseOp = {op, change_table_frag, {add_frag, SortedNs}, TabDef}, + + [BaseOp, NewOp | SplitOps]. + +replace_frag_hash(Cs, FH) when is_record(FH, frag_state) -> + Fun = fun(Prop) -> + case Prop of + {n_fragments, _} -> + {true, {n_fragments, FH#frag_state.n_fragments}}; + {hash_module, _} -> + {true, {hash_module, FH#frag_state.hash_module}}; + {hash_state, _} -> + {true, {hash_state, FH#frag_state.hash_state}}; + {next_n_to_split, _} -> + false; + {n_doubles, _} -> + false; + _ -> + true + end + end, + Props = lists:zf(Fun, Cs#cstruct.frag_properties), + Cs#cstruct{frag_properties = Props}. + +%% Adjust table info before split +adjust_before_split(FH) -> + HashState = FH#frag_state.hash_state, + {HashState2, FromFrags, AdditionalWriteFrags} = + case FH#frag_state.hash_module of + HashMod when HashMod == ?DEFAULT_HASH_MOD -> + ?DEFAULT_HASH_MOD:add_frag(HashState); + HashMod -> + HashMod:add_frag(HashState) + end, + N = FH#frag_state.n_fragments + 1, + FromFrags2 = (catch lists:sort(FromFrags)), + UnionFrags = (catch lists:merge(FromFrags2, lists:sort(AdditionalWriteFrags))), + VerifyFun = fun(F) when is_integer(F), F >= 1, F =< N -> false; + (_F) -> true + end, + case catch lists:filter(VerifyFun, UnionFrags) of + [] -> + FH2 = FH#frag_state{n_fragments = N, + hash_state = HashState2}, + {FH2, FromFrags2, UnionFrags}; + BadFrags -> + mnesia:abort({"add_frag: Fragment numbers out of range", + BadFrags, {range, 1, N}}) + end. + +split(Tab, FH, [SplitN | SplitNs], FragNames, Ops) -> + SplitFrag = element(SplitN, FragNames), + Pat = mnesia:table_info(SplitFrag, wild_pattern), + {_Mod, Tid, Ts} = mnesia_schema:get_tid_ts_and_lock(Tab, none), + Recs = mnesia:match_object(Tid, Ts, SplitFrag, Pat, read), + Ops2 = do_split(FH, SplitN, FragNames, Recs, Ops), + split(Tab, FH, SplitNs, FragNames, Ops2); +split(_Tab, _FH, [], _FragNames, Ops) -> + Ops. + +%% Perform the split of the table +do_split(FH, OldN, FragNames, [Rec | Recs], Ops) -> + Pos = key_pos(FH), + HashKey = element(Pos, Rec), + case key_to_n(FH, HashKey) of + NewN when NewN == OldN -> + %% Keep record in the same fragment. No need to move it. + do_split(FH, OldN, FragNames, Recs, Ops); + NewN -> + case element(NewN, FragNames) of + NewFrag when NewFrag /= undefined -> + OldFrag = element(OldN, FragNames), + Key = element(2, Rec), + NewOid = {NewFrag, Key}, + OldOid = {OldFrag, Key}, + Ops2 = [{op, rec, unknown, {NewOid, [Rec], write}}, + {op, rec, unknown, {OldOid, [OldOid], delete}} | Ops], + do_split(FH, OldN, FragNames, Recs, Ops2); + _NewFrag -> + %% Tried to move record to fragment that not is locked + mnesia:abort({"add_frag: Fragment not locked", NewN}) + end + end; +do_split(_FH, _OldN, _FragNames, [], Ops) -> + Ops. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Delete a fragment from a fragmented table +%% and merge its records with an other fragment + +make_multi_del_frag(Tab) -> + verify_multi(Tab), + Ops = make_del_frag(Tab), + + %% Propagate to foreigners + MoreOps = [make_del_frag(T) || T <- lookup_foreigners(Tab)], + [Ops | MoreOps]. + +make_del_frag(Tab) -> + FH = lookup_frag_hash(Tab), + case FH#frag_state.n_fragments of + N when N > 1 -> + Cs = mnesia_schema:incr_version(val({Tab, cstruct})), + mnesia_schema:ensure_active(Cs), + {FH2, FromIndecies, WriteIndecies} = adjust_before_merge(FH), + FragNames = make_frag_names_and_acquire_locks(Tab, N, WriteIndecies, false), + + MergeOps = merge(Tab, FH2, FromIndecies, FragNames, []), + LastFrag = element(N, FragNames), + [LastOp] = mnesia_schema:make_delete_table(LastFrag, single_frag), + Cs2 = replace_frag_hash(Cs, FH2), + TabDef = mnesia_schema:cs2list(Cs2), + BaseOp = {op, change_table_frag, del_frag, TabDef}, + [BaseOp, LastOp | MergeOps]; + _ -> + %% Cannot remove the last fragment + mnesia:abort({no_exists, Tab}) + end. + +%% Adjust tab info before merge +adjust_before_merge(FH) -> + HashState = FH#frag_state.hash_state, + {HashState2, FromFrags, AdditionalWriteFrags} = + case FH#frag_state.hash_module of + HashMod when HashMod == ?DEFAULT_HASH_MOD -> + ?DEFAULT_HASH_MOD:del_frag(HashState); + HashMod -> + HashMod:del_frag(HashState) + end, + N = FH#frag_state.n_fragments, + FromFrags2 = (catch lists:sort(FromFrags)), + UnionFrags = (catch lists:merge(FromFrags2, lists:sort(AdditionalWriteFrags))), + VerifyFun = fun(F) when is_integer(F), F >= 1, F =< N -> false; + (_F) -> true + end, + case catch lists:filter(VerifyFun, UnionFrags) of + [] -> + case lists:member(N, FromFrags2) of + true -> + FH2 = FH#frag_state{n_fragments = N - 1, + hash_state = HashState2}, + {FH2, FromFrags2, UnionFrags}; + false -> + mnesia:abort({"del_frag: Last fragment number not included", N}) + end; + BadFrags -> + mnesia:abort({"del_frag: Fragment numbers out of range", + BadFrags, {range, 1, N}}) + end. + +merge(Tab, FH, [FromN | FromNs], FragNames, Ops) -> + FromFrag = element(FromN, FragNames), + Pat = mnesia:table_info(FromFrag, wild_pattern), + {_Mod, Tid, Ts} = mnesia_schema:get_tid_ts_and_lock(Tab, none), + Recs = mnesia:match_object(Tid, Ts, FromFrag, Pat, read), + Ops2 = do_merge(FH, FromN, FragNames, Recs, Ops), + merge(Tab, FH, FromNs, FragNames, Ops2); +merge(_Tab, _FH, [], _FragNames, Ops) -> + Ops. + +%% Perform the merge of the table +do_merge(FH, OldN, FragNames, [Rec | Recs], Ops) -> + Pos = key_pos(FH), + LastN = FH#frag_state.n_fragments + 1, + HashKey = element(Pos, Rec), + case key_to_n(FH, HashKey) of + NewN when NewN == LastN -> + %% Tried to leave a record in the fragment that is to be deleted + mnesia:abort({"del_frag: Fragment number out of range", + NewN, {range, 1, LastN}}); + NewN when NewN == OldN -> + %% Keep record in the same fragment. No need to move it. + do_merge(FH, OldN, FragNames, Recs, Ops); + NewN when OldN == LastN -> + %% Move record from the fragment that is to be deleted + %% No need to create a delete op for each record. + case element(NewN, FragNames) of + NewFrag when NewFrag /= undefined -> + Key = element(2, Rec), + NewOid = {NewFrag, Key}, + Ops2 = [{op, rec, unknown, {NewOid, [Rec], write}} | Ops], + do_merge(FH, OldN, FragNames, Recs, Ops2); + _NewFrag -> + %% Tried to move record to fragment that not is locked + mnesia:abort({"del_frag: Fragment not locked", NewN}) + end; + NewN -> + case element(NewN, FragNames) of + NewFrag when NewFrag /= undefined -> + OldFrag = element(OldN, FragNames), + Key = element(2, Rec), + NewOid = {NewFrag, Key}, + OldOid = {OldFrag, Key}, + Ops2 = [{op, rec, unknown, {NewOid, [Rec], write}}, + {op, rec, unknown, {OldOid, [OldOid], delete}} | Ops], + do_merge(FH, OldN, FragNames, Recs, Ops2); + _NewFrag -> + %% Tried to move record to fragment that not is locked + mnesia:abort({"del_frag: Fragment not locked", NewN}) + end + end; + do_merge(_FH, _OldN, _FragNames, [], Ops) -> + Ops. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Add a node to the node pool of a fragmented table + +make_multi_add_node(Tab, Node) -> + verify_multi(Tab), + Ops = make_add_node(Tab, Node), + + %% Propagate to foreigners + MoreOps = [make_add_node(T, Node) || T <- lookup_foreigners(Tab)], + [Ops | MoreOps]. + +make_add_node(Tab, Node) when is_atom(Node) -> + Pool = lookup_prop(Tab, node_pool), + case lists:member(Node, Pool) of + false -> + Cs = mnesia_schema:incr_version(val({Tab, cstruct})), + Pool2 = Pool ++ [Node], + Props = Cs#cstruct.frag_properties, + Props2 = lists:keyreplace(node_pool, 1, Props, {node_pool, Pool2}), + Cs2 = Cs#cstruct{frag_properties = Props2}, + TabDef = mnesia_schema:cs2list(Cs2), + Op = {op, change_table_frag, {add_node, Node}, TabDef}, + [Op]; + true -> + mnesia:abort({already_exists, Tab, Node}) + end; +make_add_node(Tab, Node) -> + mnesia:abort({bad_type, Tab, Node}). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Delet a node from the node pool of a fragmented table + +make_multi_del_node(Tab, Node) -> + verify_multi(Tab), + Ops = make_del_node(Tab, Node), + + %% Propagate to foreigners + MoreOps = [make_del_node(T, Node) || T <- lookup_foreigners(Tab)], + [Ops | MoreOps]. + +make_del_node(Tab, Node) when is_atom(Node) -> + Cs = mnesia_schema:incr_version(val({Tab, cstruct})), + mnesia_schema:ensure_active(Cs), + Pool = lookup_prop(Tab, node_pool), + case lists:member(Node, Pool) of + true -> + Pool2 = Pool -- [Node], + Props = lists:keyreplace(node_pool, 1, Cs#cstruct.frag_properties, {node_pool, Pool2}), + Cs2 = Cs#cstruct{frag_properties = Props}, + TabDef = mnesia_schema:cs2list(Cs2), + Op = {op, change_table_frag, {del_node, Node}, TabDef}, + [Op]; + false -> + mnesia:abort({no_exists, Tab, Node}) + end; +make_del_node(Tab, Node) -> + mnesia:abort({bad_type, Tab, Node}). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Special case used to remove all references to a node during +%% mnesia:del_table_copy(schema, Node) + +remove_node(Node, Cs) -> + Tab = Cs#cstruct.name, + case ?catch_val({Tab, frag_hash}) of + {'EXIT', _} -> + {Cs, false}; + _ -> + Pool = lookup_prop(Tab, node_pool), + case lists:member(Node, Pool) of + true -> + Pool2 = Pool -- [Node], + Props = lists:keyreplace(node_pool, 1, + Cs#cstruct.frag_properties, + {node_pool, Pool2}), + {Cs#cstruct{frag_properties = Props}, true}; + false -> + {Cs, false} + end + end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Helpers + +val(Var) -> + case ?catch_val(Var) of + {'EXIT', Reason} -> mnesia_lib:other_val(Var, Reason); + Value -> Value + end. + +set_frag_hash(Tab, Props) -> + case props_to_frag_hash(Tab, Props) of + FH when is_record(FH, frag_state) -> + mnesia_lib:set({Tab, frag_hash}, FH); + no_hash -> + mnesia_lib:unset({Tab, frag_hash}) + end. + +props_to_frag_hash(_Tab, []) -> + no_hash; +props_to_frag_hash(Tab, Props) -> + case mnesia_schema:pick(Tab, base_table, Props, undefined) of + T when T == Tab -> + Foreign = mnesia_schema:pick(Tab, foreign_key, Props, must), + N = mnesia_schema:pick(Tab, n_fragments, Props, must), + + case mnesia_schema:pick(Tab, hash_module, Props, undefined) of + undefined -> + Split = mnesia_schema:pick(Tab, next_n_to_split, Props, must), + Doubles = mnesia_schema:pick(Tab, n_doubles, Props, must), + FH = {frag_hash, Foreign, N, Split, Doubles}, + HashState = ?OLD_HASH_MOD:init_state(Tab, FH), + #frag_state{foreign_key = Foreign, + n_fragments = N, + hash_module = ?OLD_HASH_MOD, + hash_state = HashState}; + HashMod -> + HashState = mnesia_schema:pick(Tab, hash_state, Props, must), + #frag_state{foreign_key = Foreign, + n_fragments = N, + hash_module = HashMod, + hash_state = HashState} + %% Old style. Kept for backwards compatibility. + end; + _ -> + no_hash + end. + +lookup_prop(Tab, Prop) -> + Props = val({Tab, frag_properties}), + case lists:keysearch(Prop, 1, Props) of + {value, {Prop, Val}} -> + Val; + false -> + mnesia:abort({no_exists, Tab, Prop, {frag_properties, Props}}) + end. + +lookup_frag_hash(Tab) -> + case ?catch_val({Tab, frag_hash}) of + FH when is_record(FH, frag_state) -> + FH; + {frag_hash, K, N, _S, _D} = FH -> + %% Old style. Kept for backwards compatibility. + HashState = ?OLD_HASH_MOD:init_state(Tab, FH), + #frag_state{foreign_key = K, + n_fragments = N, + hash_module = ?OLD_HASH_MOD, + hash_state = HashState}; + {'EXIT', _} -> + mnesia:abort({no_exists, Tab, frag_properties, frag_hash}) + end. + +%% Returns a list of tables +lookup_foreigners(Tab) -> + %% First field in HashPat is either frag_hash or frag_state + HashPat = {'_', {Tab, '_'}, '_', '_', '_'}, + [T || [T] <- ?ets_match(mnesia_gvar, {{'$1', frag_hash}, HashPat})]. + +%% Returns name of fragment table +record_to_frag_name(Tab, Rec) -> + case ?catch_val({Tab, frag_hash}) of + {'EXIT', _} -> + Tab; + FH -> + Pos = key_pos(FH), + Key = element(Pos, Rec), + N = key_to_n(FH, Key), + n_to_frag_name(Tab, N) + end. + +key_pos(FH) -> + case FH#frag_state.foreign_key of + undefined -> + 2; + {_ForeignTab, Pos} -> + Pos + end. + +%% Returns name of fragment table +key_to_frag_name({BaseTab, _} = Tab, Key) -> + N = key_to_frag_number(Tab, Key), + n_to_frag_name(BaseTab, N); +key_to_frag_name(Tab, Key) -> + N = key_to_frag_number(Tab, Key), + n_to_frag_name(Tab, N). + +%% Returns name of fragment table +n_to_frag_name(Tab, 1) -> + Tab; +n_to_frag_name(Tab, N) when is_atom(Tab), is_integer(N) -> + list_to_atom(atom_to_list(Tab) ++ "_frag" ++ integer_to_list(N)); +n_to_frag_name(Tab, N) -> + mnesia:abort({bad_type, Tab, N}). + +%% Returns name of fragment table +key_to_frag_number({Tab, ForeignKey}, _Key) -> + FH = val({Tab, frag_hash}), + case FH#frag_state.foreign_key of + {_ForeignTab, _Pos} -> + key_to_n(FH, ForeignKey); + undefined -> + mnesia:abort({combine_error, Tab, frag_properties, + {foreign_key, undefined}}) + end; +key_to_frag_number(Tab, Key) -> + case ?catch_val({Tab, frag_hash}) of + {'EXIT', _} -> + 1; + FH -> + key_to_n(FH, Key) + end. + +%% Returns fragment number +key_to_n(FH, Key) -> + HashState = FH#frag_state.hash_state, + N = + case FH#frag_state.hash_module of + HashMod when HashMod == ?DEFAULT_HASH_MOD -> + ?DEFAULT_HASH_MOD:key_to_frag_number(HashState, Key); + HashMod -> + HashMod:key_to_frag_number(HashState, Key) + end, + if + is_integer(N), N >= 1, N =< FH#frag_state.n_fragments -> + N; + true -> + mnesia:abort({"key_to_frag_number: Fragment number out of range", + N, {range, 1, FH#frag_state.n_fragments}}) + end. + +%% Returns a list of frament table names +frag_names(Tab) -> + case ?catch_val({Tab, frag_hash}) of + {'EXIT', _} -> + [Tab]; + FH -> + N = FH#frag_state.n_fragments, + frag_names(Tab, N, []) + end. + +frag_names(Tab, 1, Acc) -> + [Tab | Acc]; +frag_names(Tab, N, Acc) -> + Frag = n_to_frag_name(Tab, N), + frag_names(Tab, N - 1, [Frag | Acc]). + +%% Returns a list of {Node, FragCount} tuples +%% sorted on FragCounts +frag_dist(Tab) -> + Pool = lookup_prop(Tab, node_pool), + Dist = [{good, Node, 0} || Node <- Pool], + Dist2 = count_frag(frag_names(Tab), Dist), + sort_dist(Dist2). + +count_frag([Frag | Frags], Dist) -> + Dist2 = incr_nodes(val({Frag, ram_copies}), Dist), + Dist3 = incr_nodes(val({Frag, disc_copies}), Dist2), + Dist4 = incr_nodes(val({Frag, disc_only_copies}), Dist3), + count_frag(Frags, Dist4); +count_frag([], Dist) -> + Dist. + +incr_nodes([Node | Nodes], Dist) -> + Dist2 = incr_node(Node, Dist), + incr_nodes(Nodes, Dist2); +incr_nodes([], Dist) -> + Dist. + +incr_node(Node, [{Kind, Node, Count} | Tail]) -> + [{Kind, Node, Count + 1} | Tail]; +incr_node(Node, [Head | Tail]) -> + [Head | incr_node(Node, Tail)]; +incr_node(Node, []) -> + [{bad, Node, 1}]. + +%% Sorts dist according in decreasing count order +sort_dist(Dist) -> + Dist2 = deep_dist(Dist, []), + Dist3 = lists:keysort(1, Dist2), + shallow_dist(Dist3). + +deep_dist([Head | Tail], Deep) -> + {Kind, _Node, Count} = Head, + {Tag, Same, Other} = pick_count(Kind, Count, [Head | Tail]), + deep_dist(Other, [{Tag, Same} | Deep]); +deep_dist([], Deep) -> + Deep. + +pick_count(Kind, Count, [{Kind2, Node2, Count2} | Tail]) -> + Head = {Node2, Count2}, + {_, Same, Other} = pick_count(Kind, Count, Tail), + if + Kind == bad -> + {bad, [Head | Same], Other}; + Kind2 == bad -> + {Count, Same, [{Kind2, Node2, Count2} | Other]}; + Count == Count2 -> + {Count, [Head | Same], Other}; + true -> + {Count, Same, [{Kind2, Node2, Count2} | Other]} + end; +pick_count(_Kind, Count, []) -> + {Count, [], []}. + +shallow_dist([{_Tag, Shallow} | Deep]) -> + Shallow ++ shallow_dist(Deep); +shallow_dist([]) -> + []. diff --git a/lib/mnesia/src/mnesia_frag_hash.erl b/lib/mnesia/src/mnesia_frag_hash.erl new file mode 100644 index 0000000000..610ba2535c --- /dev/null +++ b/lib/mnesia/src/mnesia_frag_hash.erl @@ -0,0 +1,151 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2002-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +%% +%%%---------------------------------------------------------------------- +%%% Purpose : Implements hashing functionality for fragmented tables +%%%---------------------------------------------------------------------- + +%header_doc_include +-module(mnesia_frag_hash). + +%% Fragmented Table Hashing callback functions +-export([ + init_state/2, + add_frag/1, + del_frag/1, + key_to_frag_number/2, + match_spec_to_frag_numbers/2 + ]). + +%header_doc_include +%%-behaviour(mnesia_frag_hash). + +%impl_doc_include +-record(hash_state, + {n_fragments, + next_n_to_split, + n_doubles, + function}). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +init_state(_Tab, State) when State == undefined -> + #hash_state{n_fragments = 1, + next_n_to_split = 1, + n_doubles = 0, + function = phash2}. + +convert_old_state({hash_state, N, P, L}) -> + #hash_state{n_fragments = N, + next_n_to_split = P, + n_doubles = L, + function = phash}. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +add_frag(#hash_state{next_n_to_split = SplitN, n_doubles = L, n_fragments = N} = State) -> + P = SplitN + 1, + NewN = N + 1, + State2 = case power2(L) + 1 of + P2 when P2 == P -> + State#hash_state{n_fragments = NewN, + n_doubles = L + 1, + next_n_to_split = 1}; + _ -> + State#hash_state{n_fragments = NewN, + next_n_to_split = P} + end, + {State2, [SplitN], [NewN]}; +add_frag(OldState) -> + State = convert_old_state(OldState), + add_frag(State). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +del_frag(#hash_state{next_n_to_split = SplitN, n_doubles = L, n_fragments = N} = State) -> + P = SplitN - 1, + if + P < 1 -> + L2 = L - 1, + MergeN = power2(L2), + State2 = State#hash_state{n_fragments = N - 1, + next_n_to_split = MergeN, + n_doubles = L2}, + {State2, [N], [MergeN]}; + true -> + MergeN = P, + State2 = State#hash_state{n_fragments = N - 1, + next_n_to_split = MergeN}, + {State2, [N], [MergeN]} + end; +del_frag(OldState) -> + State = convert_old_state(OldState), + del_frag(State). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +key_to_frag_number(#hash_state{function = phash, next_n_to_split = SplitN, n_doubles = L}, Key) -> + P = SplitN, + A = erlang:phash(Key, power2(L)), + if + A < P -> + erlang:phash(Key, power2(L + 1)); + true -> + A + end; +key_to_frag_number(#hash_state{function = phash2, next_n_to_split = SplitN, n_doubles = L}, Key) -> + P = SplitN, + A = erlang:phash2(Key, power2(L)) + 1, + if + A < P -> + erlang:phash2(Key, power2(L + 1)) + 1; + true -> + A + end; +key_to_frag_number(OldState, Key) -> + State = convert_old_state(OldState), + key_to_frag_number(State, Key). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +match_spec_to_frag_numbers(#hash_state{n_fragments = N} = State, MatchSpec) -> + case MatchSpec of + [{HeadPat, _, _}] when is_tuple(HeadPat), tuple_size(HeadPat) > 2 -> + KeyPat = element(2, HeadPat), + case has_var(KeyPat) of + false -> + [key_to_frag_number(State, KeyPat)]; + true -> + lists:seq(1, N) + end; + _ -> + lists:seq(1, N) + end; +match_spec_to_frag_numbers(OldState, MatchSpec) -> + State = convert_old_state(OldState), + match_spec_to_frag_numbers(State, MatchSpec). + +power2(Y) -> + 1 bsl Y. % trunc(math:pow(2, Y)). + +%impl_doc_include + +has_var(Pat) -> + mnesia:has_var(Pat). diff --git a/lib/mnesia/src/mnesia_frag_old_hash.erl b/lib/mnesia/src/mnesia_frag_old_hash.erl new file mode 100644 index 0000000000..817bb54eb1 --- /dev/null +++ b/lib/mnesia/src/mnesia_frag_old_hash.erl @@ -0,0 +1,132 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2002-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +%% +%%%---------------------------------------------------------------------- +%%% Purpose : Implements hashing functionality for fragmented tables +%%%---------------------------------------------------------------------- + +-module(mnesia_frag_old_hash). +%%-behaviour(mnesia_frag_hash). + +-compile({nowarn_deprecated_function, {erlang,hash,2}}). + +%% Hashing callback functions +-export([ + init_state/2, + add_frag/1, + del_frag/1, + key_to_frag_number/2, + match_spec_to_frag_numbers/2 + ]). + +-record(old_hash_state, + {n_fragments, + next_n_to_split, + n_doubles}). + +%% Old style. Kept for backwards compatibility. +-record(frag_hash, + {foreign_key, + n_fragments, + next_n_to_split, + n_doubles}). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +init_state(_Tab, InitialState) when InitialState == undefined -> + #old_hash_state{n_fragments = 1, + next_n_to_split = 1, + n_doubles = 0}; +init_state(_Tab, FH) when is_record(FH, frag_hash) -> + %% Old style. Kept for backwards compatibility. + #old_hash_state{n_fragments = FH#frag_hash.n_fragments, + next_n_to_split = FH#frag_hash.next_n_to_split, + n_doubles = FH#frag_hash.n_doubles}. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +add_frag(State) when is_record(State, old_hash_state) -> + SplitN = State#old_hash_state.next_n_to_split, + P = SplitN + 1, + L = State#old_hash_state.n_doubles, + NewN = State#old_hash_state.n_fragments + 1, + State2 = case trunc(math:pow(2, L)) + 1 of + P2 when P2 == P -> + State#old_hash_state{n_fragments = NewN, + next_n_to_split = 1, + n_doubles = L + 1}; + _ -> + State#old_hash_state{n_fragments = NewN, + next_n_to_split = P} + end, + {State2, [SplitN], [NewN]}. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +del_frag(State) when is_record(State, old_hash_state) -> + P = State#old_hash_state.next_n_to_split - 1, + L = State#old_hash_state.n_doubles, + N = State#old_hash_state.n_fragments, + if + P < 1 -> + L2 = L - 1, + MergeN = trunc(math:pow(2, L2)), + State2 = State#old_hash_state{n_fragments = N - 1, + next_n_to_split = MergeN, + n_doubles = L2}, + {State2, [N], [MergeN]}; + true -> + MergeN = P, + State2 = State#old_hash_state{n_fragments = N - 1, + next_n_to_split = MergeN}, + {State2, [N], [MergeN]} + end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +key_to_frag_number(State, Key) when is_record(State, old_hash_state) -> + L = State#old_hash_state.n_doubles, + A = erlang:hash(Key, trunc(math:pow(2, L))), + P = State#old_hash_state.next_n_to_split, + if + A < P -> + erlang:hash(Key, trunc(math:pow(2, L + 1))); + true -> + A + end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +match_spec_to_frag_numbers(State, MatchSpec) when is_record(State, old_hash_state) -> + case MatchSpec of + [{HeadPat, _, _}] when is_tuple(HeadPat), tuple_size(HeadPat) > 2 -> + KeyPat = element(2, HeadPat), + case has_var(KeyPat) of + false -> + [key_to_frag_number(State, KeyPat)]; + true -> + lists:seq(1, State#old_hash_state.n_fragments) + end; + _ -> + lists:seq(1, State#old_hash_state.n_fragments) + end. + +has_var(Pat) -> + mnesia:has_var(Pat). diff --git a/lib/mnesia/src/mnesia_index.erl b/lib/mnesia/src/mnesia_index.erl new file mode 100644 index 0000000000..4e6e8a997c --- /dev/null +++ b/lib/mnesia/src/mnesia_index.erl @@ -0,0 +1,384 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 1996-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +%% +%% Purpose: Handles index functionality in mnesia + +-module(mnesia_index). +-export([read/5, + add_index/5, + delete_index/3, + del_object_index/5, + clear_index/4, + dirty_match_object/3, + dirty_select/3, + dirty_read/3, + dirty_read2/3, + + db_put/2, + db_get/2, + db_match_erase/2, + get_index_table/2, + get_index_table/3, + + tab2filename/2, + tab2tmp_filename/2, + init_index/2, + init_indecies/3, + del_transient/2, + del_transient/3, + del_index_table/3]). + +-import(mnesia_lib, [verbose/2]). +-include("mnesia.hrl"). + +-record(index, {setorbag, pos_list}). + +val(Var) -> + case ?catch_val(Var) of + {'EXIT', _ReASoN_} -> mnesia_lib:other_val(Var, _ReASoN_); + _VaLuE_ -> _VaLuE_ + end. + +%% read an object list throuh its index table +%% we assume that table Tab has index on attribute number Pos + +read(Tid, Store, Tab, IxKey, Pos) -> + ResList = mnesia_locker:ixrlock(Tid, Store, Tab, IxKey, Pos), + %% Remove all tuples which don't include Ixkey, happens when Tab is a bag + case val({Tab, setorbag}) of + bag -> + mnesia_lib:key_search_all(IxKey, Pos, ResList); + _ -> + ResList + end. + +add_index(Index, Tab, Key, Obj, Old) -> + add_index2(Index#index.pos_list, Index#index.setorbag, Tab, Key, Obj, Old). + +add_index2([{Pos, Ixt} |Tail], bag, Tab, K, Obj, OldRecs) -> + db_put(Ixt, {element(Pos, Obj), K}), + add_index2(Tail, bag, Tab, K, Obj, OldRecs); +add_index2([{Pos, Ixt} |Tail], Type, Tab, K, Obj, OldRecs) -> + %% Remove old tuples in index if Tab is updated + case OldRecs of + undefined -> + Old = mnesia_lib:db_get(Tab, K), + del_ixes(Ixt, Old, Pos, K); + Old -> + del_ixes(Ixt, Old, Pos, K) + end, + db_put(Ixt, {element(Pos, Obj), K}), + add_index2(Tail, Type, Tab, K, Obj, OldRecs); +add_index2([], _, _Tab, _K, _Obj, _) -> ok. + +delete_index(Index, Tab, K) -> + delete_index2(Index#index.pos_list, Tab, K). + +delete_index2([{Pos, Ixt} | Tail], Tab, K) -> + DelObjs = mnesia_lib:db_get(Tab, K), + del_ixes(Ixt, DelObjs, Pos, K), + delete_index2(Tail, Tab, K); +delete_index2([], _Tab, _K) -> ok. + + +del_ixes(_Ixt, [], _Pos, _L) -> ok; +del_ixes(Ixt, [Obj | Tail], Pos, Key) -> + db_match_erase(Ixt, {element(Pos, Obj), Key}), + del_ixes(Ixt, Tail, Pos, Key). + +del_object_index(Index, Tab, K, Obj, Old) -> + del_object_index2(Index#index.pos_list, Index#index.setorbag, Tab, K, Obj, Old). + +del_object_index2([], _, _Tab, _K, _Obj, _Old) -> ok; +del_object_index2([{Pos, Ixt} | Tail], SoB, Tab, K, Obj, Old) -> + case SoB of + bag -> + del_object_bag(Tab, K, Obj, Pos, Ixt, Old); + _ -> %% If set remove the tuple in index table + del_ixes(Ixt, [Obj], Pos, K) + end, + del_object_index2(Tail, SoB, Tab, K, Obj, Old). + +del_object_bag(Tab, Key, Obj, Pos, Ixt, undefined) -> + IxKey = element(Pos, Obj), + Old = [X || X <- mnesia_lib:db_get(Tab, Key), element(Pos, X) =:= IxKey], + del_object_bag(Tab, Key, Obj, Pos, Ixt, Old); +%% If Tab type is bag we need remove index identifier if Tab +%% contains less than 2 elements. +del_object_bag(_Tab, Key, Obj, Pos, Ixt, Old) when length(Old) < 2 -> + del_ixes(Ixt, [Obj], Pos, Key); +del_object_bag(_Tab, _Key, _Obj, _Pos, _Ixt, _Old) -> ok. + +clear_index(Index, Tab, K, Obj) -> + clear_index2(Index#index.pos_list, Tab, K, Obj). + +clear_index2([], _Tab, _K, _Obj) -> ok; +clear_index2([{_Pos, Ixt} | Tail], Tab, K, Obj) -> + db_match_erase(Ixt, Obj), + clear_index2(Tail, Tab, K, Obj). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +dirty_match_object(Tab, Pat, Pos) -> + %% Assume that we are on the node where the replica is + case element(2, Pat) of + '_' -> + IxKey = element(Pos, Pat), + RealKeys = realkeys(Tab, Pos, IxKey), + merge(RealKeys, Tab, Pat, []); + _Else -> + mnesia_lib:db_match_object(Tab, Pat) + end. + +merge([{_IxKey, RealKey} | Tail], Tab, Pat, Ack) -> + %% Assume that we are on the node where the replica is + Pat2 = setelement(2, Pat, RealKey), + Recs = mnesia_lib:db_match_object(Tab, Pat2), + merge(Tail, Tab, Pat, Recs ++ Ack); +merge([], _, _, Ack) -> + Ack. + +realkeys(Tab, Pos, IxKey) -> + Index = get_index_table(Tab, Pos), + db_get(Index, IxKey). % a list on the form [{IxKey, RealKey1} , .... + +dirty_select(Tab, Spec, Pos) -> + %% Assume that we are on the node where the replica is + %% Returns the records without applying the match spec + %% The actual filtering is handled by the caller + IxKey = element(Pos, Spec), + RealKeys = realkeys(Tab, Pos, IxKey), + StorageType = val({Tab, storage_type}), + lists:append([mnesia_lib:db_get(StorageType, Tab, Key) || {_,Key} <- RealKeys]). + +dirty_read(Tab, IxKey, Pos) -> + ResList = mnesia:dirty_rpc(Tab, ?MODULE, dirty_read2, + [Tab, IxKey, Pos]), + case val({Tab, setorbag}) of + bag -> + %% Remove all tuples which don't include Ixkey + mnesia_lib:key_search_all(IxKey, Pos, ResList); + _ -> + ResList + end. + +dirty_read2(Tab, IxKey, Pos) -> + Ix = get_index_table(Tab, Pos), + Keys = db_match(Ix, {IxKey, '$1'}), + r_keys(Keys, Tab, []). + +r_keys([[H]|T],Tab,Ack) -> + V = mnesia_lib:db_get(Tab, H), + r_keys(T, Tab, V ++ Ack); +r_keys([], _, Ack) -> + Ack. + + +%%%%%%% Creation, Init and deletion routines for index tables +%% We can have several indexes on the same table +%% this can be a fairly costly operation if table is *very* large + +tab2filename(Tab, Pos) -> + mnesia_lib:dir(Tab) ++ "_" ++ integer_to_list(Pos) ++ ".DAT". + +tab2tmp_filename(Tab, Pos) -> + mnesia_lib:dir(Tab) ++ "_" ++ integer_to_list(Pos) ++ ".TMP". + +init_index(Tab, Storage) -> + PosList = val({Tab, index}), + init_indecies(Tab, Storage, PosList). + +init_indecies(Tab, Storage, PosList) -> + case Storage of + unknown -> + ignore; + disc_only_copies -> + init_disc_index(Tab, PosList); + ram_copies -> + make_ram_index(Tab, PosList); + disc_copies -> + make_ram_index(Tab, PosList) + end. + +%% works for both ram and disc indexes + +del_index_table(_, unknown, _) -> + ignore; +del_index_table(Tab, Storage, Pos) -> + delete_transient_index(Tab, Pos, Storage), + mnesia_lib:del({Tab, index}, Pos). + +del_transient(Tab, Storage) -> + PosList = val({Tab, index}), + del_transient(Tab, PosList, Storage). + +del_transient(_, [], _) -> done; +del_transient(Tab, [Pos | Tail], Storage) -> + delete_transient_index(Tab, Pos, Storage), + del_transient(Tab, Tail, Storage). + +delete_transient_index(Tab, Pos, disc_only_copies) -> + Tag = {Tab, index, Pos}, + mnesia_monitor:unsafe_close_dets(Tag), + file:delete(tab2filename(Tab, Pos)), + del_index_info(Tab, Pos), %% Uses val(..) + mnesia_lib:unset({Tab, {index, Pos}}); + +delete_transient_index(Tab, Pos, _Storage) -> + Ixt = val({Tab, {index, Pos}}), + ?ets_delete_table(Ixt), + del_index_info(Tab, Pos), + mnesia_lib:unset({Tab, {index, Pos}}). + +%%%%% misc functions for the index create/init/delete functions above + +%% assuming that the file exists. +init_disc_index(_Tab, []) -> + done; +init_disc_index(Tab, [Pos | Tail]) when is_integer(Pos) -> + Fn = tab2filename(Tab, Pos), + IxTag = {Tab, index, Pos}, + file:delete(Fn), + Args = [{file, Fn}, {keypos, 1}, {type, bag}], + mnesia_monitor:open_dets(IxTag, Args), + Storage = disc_only_copies, + Key = mnesia_lib:db_first(Storage, Tab), + Recs = mnesia_lib:db_get(Storage, Tab, Key), + BinSize = size(term_to_binary(Recs)), + KeysPerChunk = (4000 div BinSize) + 1, + Init = {start, KeysPerChunk}, + mnesia_lib:db_fixtable(Storage, Tab, true), + ok = dets:init_table(IxTag, create_fun(Init, Tab, Pos)), + mnesia_lib:db_fixtable(Storage, Tab, false), + mnesia_lib:set({Tab, {index, Pos}}, IxTag), + add_index_info(Tab, val({Tab, setorbag}), {Pos, {dets, IxTag}}), + init_disc_index(Tab, Tail). + +create_fun(Cont, Tab, Pos) -> + fun(read) -> + Data = + case Cont of + {start, KeysPerChunk} -> + mnesia_lib:db_init_chunk(disc_only_copies, Tab, KeysPerChunk); + '$end_of_table' -> + '$end_of_table'; + _Else -> + mnesia_lib:db_chunk(disc_only_copies, Cont) + end, + case Data of + '$end_of_table' -> + end_of_input; + {Recs, Next} -> + IdxElems = [{element(Pos, Obj), element(2, Obj)} || Obj <- Recs], + {IdxElems, create_fun(Next, Tab, Pos)} + end; + (close) -> + ok + end. + +make_ram_index(_, []) -> + done; +make_ram_index(Tab, [Pos | Tail]) -> + add_ram_index(Tab, Pos), + make_ram_index(Tab, Tail). + +add_ram_index(Tab, Pos) when is_integer(Pos) -> + verbose("Creating index for ~w ~n", [Tab]), + Index = mnesia_monitor:mktab(mnesia_index, [bag, public]), + Insert = fun(Rec, _Acc) -> + true = ?ets_insert(Index, {element(Pos, Rec), element(2, Rec)}) + end, + mnesia_lib:db_fixtable(ram_copies, Tab, true), + true = ets:foldl(Insert, true, Tab), + mnesia_lib:db_fixtable(ram_copies, Tab, false), + mnesia_lib:set({Tab, {index, Pos}}, Index), + add_index_info(Tab, val({Tab, setorbag}), {Pos, {ram, Index}}); +add_ram_index(_Tab, snmp) -> + ok. + +add_index_info(Tab, Type, IxElem) -> + Commit = val({Tab, commit_work}), + case lists:keysearch(index, 1, Commit) of + false -> + Index = #index{setorbag = Type, + pos_list = [IxElem]}, + %% Check later if mnesia_tm is sensative about the order + mnesia_lib:set({Tab, commit_work}, + mnesia_lib:sort_commit([Index | Commit])); + {value, Old} -> + %% We could check for consistency here + Index = Old#index{pos_list = [IxElem | Old#index.pos_list]}, + NewC = lists:keyreplace(index, 1, Commit, Index), + mnesia_lib:set({Tab, commit_work}, + mnesia_lib:sort_commit(NewC)) + end. + +del_index_info(Tab, Pos) -> + Commit = val({Tab, commit_work}), + case lists:keysearch(index, 1, Commit) of + false -> + %% Something is wrong ignore + skip; + {value, Old} -> + case lists:keydelete(Pos, 1, Old#index.pos_list) of + [] -> + NewC = lists:keydelete(index, 1, Commit), + mnesia_lib:set({Tab, commit_work}, + mnesia_lib:sort_commit(NewC)); + New -> + Index = Old#index{pos_list = New}, + NewC = lists:keyreplace(index, 1, Commit, Index), + mnesia_lib:set({Tab, commit_work}, + mnesia_lib:sort_commit(NewC)) + end + end. + +db_put({ram, Ixt}, V) -> + true = ?ets_insert(Ixt, V); +db_put({dets, Ixt}, V) -> + ok = dets:insert(Ixt, V). + +db_get({ram, Ixt}, K) -> + ?ets_lookup(Ixt, K); +db_get({dets, Ixt}, K) -> + dets:lookup(Ixt, K). + +db_match_erase({ram, Ixt}, Pat) -> + true = ?ets_match_delete(Ixt, Pat); +db_match_erase({dets, Ixt}, Pat) -> + ok = dets:match_delete(Ixt, Pat). + +db_match({ram, Ixt}, Pat) -> + ?ets_match(Ixt, Pat); +db_match({dets, Ixt}, Pat) -> + dets:match(Ixt, Pat). + +get_index_table(Tab, Pos) -> + get_index_table(Tab, val({Tab, storage_type}), Pos). + +get_index_table(Tab, ram_copies, Pos) -> + {ram, val({Tab, {index, Pos}})}; +get_index_table(Tab, disc_copies, Pos) -> + {ram, val({Tab, {index, Pos}})}; +get_index_table(Tab, disc_only_copies, Pos) -> + {dets, val({Tab, {index, Pos}})}; +get_index_table(_Tab, unknown, _Pos) -> + unknown. + diff --git a/lib/mnesia/src/mnesia_kernel_sup.erl b/lib/mnesia/src/mnesia_kernel_sup.erl new file mode 100644 index 0000000000..08f6129fc0 --- /dev/null +++ b/lib/mnesia/src/mnesia_kernel_sup.erl @@ -0,0 +1,65 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 1997-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +%% +-module(mnesia_kernel_sup). + +-behaviour(supervisor). + +-export([start/0, init/1, supervisor_timeout/1]). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% top supervisor callback functions + +start() -> + supervisor:start_link({local, mnesia_kernel_sup}, ?MODULE, []). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% sub supervisor callback functions + +init([]) -> + ProcLib = [mnesia_monitor, proc_lib], + Flags = {one_for_all, 0, timer:hours(24)}, % Trust the top supervisor + Workers = [worker_spec(mnesia_monitor, timer:seconds(3), [gen_server]), + worker_spec(mnesia_subscr, timer:seconds(3), [gen_server]), + worker_spec(mnesia_locker, timer:seconds(3), ProcLib), + worker_spec(mnesia_recover, timer:minutes(3), [gen_server]), + worker_spec(mnesia_tm, timer:seconds(30), ProcLib), + supervisor_spec(mnesia_checkpoint_sup), + supervisor_spec(mnesia_snmp_sup), + worker_spec(mnesia_controller, timer:seconds(3), [gen_server]), + worker_spec(mnesia_late_loader, timer:seconds(3), ProcLib) + ], + {ok, {Flags, Workers}}. + +worker_spec(Name, KillAfter, Modules) -> + KA = supervisor_timeout(KillAfter), + {Name, {Name, start, []}, permanent, KA, worker, [Name] ++ Modules}. + +supervisor_spec(Name) -> + {Name, {Name, start, []}, permanent, infinity, supervisor, + [Name, supervisor]}. + +-ifdef(debug_shutdown). +supervisor_timeout(_KillAfter) -> timer:hours(24). +-else. +supervisor_timeout(KillAfter) -> KillAfter. +-endif. + + diff --git a/lib/mnesia/src/mnesia_late_loader.erl b/lib/mnesia/src/mnesia_late_loader.erl new file mode 100644 index 0000000000..d09de3ca66 --- /dev/null +++ b/lib/mnesia/src/mnesia_late_loader.erl @@ -0,0 +1,108 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 1998-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +%% +-module(mnesia_late_loader). + +-export([ + async_late_disc_load/3, + maybe_async_late_disc_load/3, + init/1, + start/0 + ]). + +%% sys callback functions +-export([ + system_continue/3, + system_terminate/4, + system_code_change/4 + ]). + +-define(SERVER_NAME, ?MODULE). + +-record(state, {supervisor}). + +async_late_disc_load(_, [], _) -> ok; +async_late_disc_load(Node, Tabs, Reason) -> + Msg = {async_late_disc_load, Tabs, Reason}, + catch ({?SERVER_NAME, Node} ! {self(), Msg}). + +maybe_async_late_disc_load(_, [], _) -> ok; +maybe_async_late_disc_load(Node, Tabs, Reason) -> + Msg = {maybe_async_late_disc_load, Tabs, Reason}, + catch ({?SERVER_NAME, Node} ! {self(), Msg}). + +start() -> + mnesia_monitor:start_proc(?SERVER_NAME, ?MODULE, init, [self()]). + +init(Parent) -> + %% Trap exit omitted intentionally + register(?SERVER_NAME, self()), + link(whereis(mnesia_controller)), %% We may not hang + mnesia_controller:merge_schema(), + unlink(whereis(mnesia_controller)), + mnesia_lib:set(mnesia_status, running), + proc_lib:init_ack(Parent, {ok, self()}), + loop(#state{supervisor = Parent}). + +loop(State) -> + receive + {_From, {async_late_disc_load, Tabs, Reason}} -> + mnesia_controller:schedule_late_disc_load(Tabs, Reason), + loop(State); + + {_From, {maybe_async_late_disc_load, Tabs, Reason}} -> + CheckMaster = + fun(Tab, Good) -> + case mnesia_recover:get_master_nodes(Tab) of + [] -> [Tab|Good]; + Masters -> + case lists:member(node(),Masters) of + true -> [Tab|Good]; + false -> Good + end + end + end, + GoodTabs = lists:foldl(CheckMaster, [], Tabs), + mnesia_controller:schedule_late_disc_load(GoodTabs, Reason), + loop(State); + + {system, From, Msg} -> + mnesia_lib:dbg_out("~p got {system, ~p, ~p}~n", + [?SERVER_NAME, From, Msg]), + Parent = State#state.supervisor, + sys:handle_system_msg(Msg, From, Parent, ?MODULE, [], State); + + Msg -> + mnesia_lib:error("~p got unexpected message: ~p~n", + [?SERVER_NAME, Msg]), + loop(State) + end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% System upgrade + +system_continue(_Parent, _Debug, State) -> + loop(State). + +system_terminate(Reason, _Parent, _Debug, _State) -> + exit(Reason). + +system_code_change(State, _Module, _OldVsn, _Extra) -> + {ok, State}. diff --git a/lib/mnesia/src/mnesia_lib.erl b/lib/mnesia/src/mnesia_lib.erl new file mode 100644 index 0000000000..dba808e66e --- /dev/null +++ b/lib/mnesia/src/mnesia_lib.erl @@ -0,0 +1,1306 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 1996-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +%% +%% This module contains all sorts of various which doesn't fit +%% anywhere else. Basically everything is exported. + +-module(mnesia_lib). + +-include("mnesia.hrl"). +-include_lib("kernel/include/file.hrl"). + +-export([core_file/0]). + +-export([ + active_tables/0, + add/2, + add_list/2, + add_lsort/2, + all_nodes/0, +%% catch_val/1, + copy_file/2, + copy_holders/1, + coredump/0, + coredump/1, + create_counter/1, + cs_to_nodes/1, + cs_to_storage_type/2, + dets_to_ets/6, + db_chunk/2, + db_init_chunk/1, + db_init_chunk/2, + db_init_chunk/3, + db_erase/2, + db_erase/3, + db_erase_tab/1, + db_erase_tab/2, + db_first/1, + db_first/2, + db_last/1, + db_last/2, + db_fixtable/3, + db_get/2, + db_get/3, + db_match_erase/2, + db_match_erase/3, + db_match_object/2, + db_match_object/3, + db_next_key/2, + db_next_key/3, + db_prev_key/2, + db_prev_key/3, + db_put/2, + db_put/3, + db_select/2, + db_select/3, + db_select_init/4, + db_select_cont/3, + db_slot/2, + db_slot/3, + db_update_counter/3, + db_update_counter/4, + dbg_out/2, + del/2, + dets_sync_close/1, + dets_sync_open/2, + dets_sync_open/3, + dir/0, + dir/1, + dir_info/0, + dirty_rpc_error_tag/1, + dist_coredump/0, + disk_type/1, + disk_type/2, + elems/2, + ensure_loaded/1, + error/2, + error_desc/1, + etype/1, + exists/1, + fatal/2, + get_node_number/0, + fix_error/1, + important/2, + incr_counter/1, + incr_counter/2, + intersect/2, + is_running/0, + is_running/1, + is_running_remote/0, + is_string/1, + key_search_delete/3, + key_search_all/3, + last_error/0, + local_active_tables/0, + lock_table/1, + mkcore/1, + not_active_here/1, + other_val/2, + pad_name/3, + random_time/2, + read_counter/1, + readable_indecies/1, + remote_copy_holders/1, + report_fatal/2, + report_system_event/1, + running_nodes/0, + running_nodes/1, + schema_cs_to_storage_type/2, + search_delete/2, + set/2, + set_counter/2, + set_local_content_whereabouts/1, + set_remote_where_to_read/1, + set_remote_where_to_read/2, + show/1, + show/2, + sort_commit/1, + storage_type_at_node/2, + tab2dat/1, + tab2dmp/1, + tab2tmp/1, + tab2dcd/1, + tab2dcl/1, + to_list/1, + union/2, + uniq/1, + unlock_table/1, + unset/1, + %% update_counter/2, + val/1, + vcore/0, + vcore/1, + verbose/2, + view/0, + view/1, + view/2, + warning/2, + + is_debug_compiled/0, + activate_debug_fun/5, + deactivate_debug_fun/3, + eval_debug_fun/4, + scratch_debug_fun/0 + ]). + + +search_delete(Obj, List) -> + search_delete(Obj, List, [], none). +search_delete(Obj, [Obj|Tail], Ack, _Res) -> + search_delete(Obj, Tail, Ack, Obj); +search_delete(Obj, [H|T], Ack, Res) -> + search_delete(Obj, T, [H|Ack], Res); +search_delete(_, [], Ack, Res) -> + {Res, Ack}. + +key_search_delete(Key, Pos, TupleList) -> + key_search_delete(Key, Pos, TupleList, none, []). +key_search_delete(Key, Pos, [H|T], _Obj, Ack) when element(Pos, H) == Key -> + key_search_delete(Key, Pos, T, H, Ack); +key_search_delete(Key, Pos, [H|T], Obj, Ack) -> + key_search_delete(Key, Pos, T, Obj, [H|Ack]); +key_search_delete(_, _, [], Obj, Ack) -> + {Obj, Ack}. + +key_search_all(Key, Pos, TupleList) -> + key_search_all(Key, Pos, TupleList, []). +key_search_all(Key, N, [H|T], Ack) when element(N, H) == Key -> + key_search_all(Key, N, T, [H|Ack]); +key_search_all(Key, N, [_|T], Ack) -> + key_search_all(Key, N, T, Ack); +key_search_all(_, _, [], Ack) -> Ack. + +intersect(L1, L2) -> + L2 -- (L2 -- L1). + +elems(I, [H|T]) -> + [element(I, H) | elems(I, T)]; +elems(_, []) -> + []. + +%% sort_commit see to that checkpoint info is always first in +%% commit_work structure the other info don't need to be sorted. +sort_commit(List) -> + sort_commit2(List, []). + +sort_commit2([{checkpoints, ChkpL}| Rest], Acc) -> + [{checkpoints, ChkpL}| Rest] ++ Acc; +sort_commit2([H | R], Acc) -> + sort_commit2(R, [H | Acc]); +sort_commit2([], Acc) -> Acc. + +is_string([H|T]) -> + if + 0 =< H, H < 256, is_integer(H) -> is_string(T); + true -> false + end; +is_string([]) -> true. + +%%% + +union([H|L1], L2) -> + case lists:member(H, L2) of + true -> union(L1, L2); + false -> [H | union(L1, L2)] + end; +union([], L2) -> L2. + +uniq([]) -> + []; +uniq(List) -> + [H|T] = lists:sort(List), + uniq1(H, T, []). + +uniq1(H, [H|R], Ack) -> + uniq1(H, R, Ack); +uniq1(Old, [H|R], Ack) -> + uniq1(H, R, [Old|Ack]); +uniq1(Old, [], Ack) -> + [Old| Ack]. + +to_list(X) when is_list(X) -> X; +to_list(X) -> atom_to_list(X). + +all_nodes() -> + Ns = mnesia:system_info(db_nodes) ++ + mnesia:system_info(extra_db_nodes), + mnesia_lib:uniq(Ns). + +running_nodes() -> + running_nodes(all_nodes()). + +running_nodes(Ns) -> + {Replies, _BadNs} = rpc:multicall(Ns, ?MODULE, is_running_remote, []), + [N || {GoodState, N} <- Replies, GoodState == true]. + +is_running_remote() -> + IsRunning = is_running(), + {IsRunning == yes, node()}. + +is_running(Node) when is_atom(Node) -> + case rpc:call(Node, ?MODULE, is_running, []) of + {badrpc, _} -> no; + X -> X + end. + +is_running() -> + case ?catch_val(mnesia_status) of + {'EXIT', _} -> no; + running -> yes; + starting -> starting; + stopping -> stopping + end. + +show(X) -> + show(X, []). +show(F, A) -> + io:format(user, F, A). + + +pad_name([Char | Chars], Len, Tail) -> + [Char | pad_name(Chars, Len - 1, Tail)]; +pad_name([], Len, Tail) when Len =< 0 -> + Tail; +pad_name([], Len, Tail) -> + [$ | pad_name([], Len - 1, Tail)]. + +%% Some utility functions ..... +active_here(Tab) -> + case val({Tab, where_to_read}) of + Node when Node == node() -> true; + _ -> false + end. + +not_active_here(Tab) -> + not active_here(Tab). + +exists(Fname) -> + case file:open(Fname, [raw,read]) of + {ok, F} ->file:close(F), true; + _ -> false + end. + +dir() -> mnesia_monitor:get_env(dir). + +dir(Fname) -> + filename:join([dir(), to_list(Fname)]). + +tab2dat(Tab) -> %% DETS files + dir(lists:concat([Tab, ".DAT"])). + +tab2tmp(Tab) -> + dir(lists:concat([Tab, ".TMP"])). + +tab2dmp(Tab) -> %% Dumped ets tables + dir(lists:concat([Tab, ".DMP"])). + +tab2dcd(Tab) -> %% Disc copies data + dir(lists:concat([Tab, ".DCD"])). + +tab2dcl(Tab) -> %% Disc copies log + dir(lists:concat([Tab, ".DCL"])). + +storage_type_at_node(Node, Tab) -> + search_key(Node, [{disc_copies, val({Tab, disc_copies})}, + {ram_copies, val({Tab, ram_copies})}, + {disc_only_copies, val({Tab, disc_only_copies})}]). + +cs_to_storage_type(Node, Cs) -> + search_key(Node, [{disc_copies, Cs#cstruct.disc_copies}, + {ram_copies, Cs#cstruct.ram_copies}, + {disc_only_copies, Cs#cstruct.disc_only_copies}]). + +schema_cs_to_storage_type(Node, Cs) -> + case cs_to_storage_type(Node, Cs) of + unknown when Cs#cstruct.name == schema -> ram_copies; + Other -> Other + end. + + +search_key(Key, [{Val, List} | Tail]) -> + case lists:member(Key, List) of + true -> Val; + false -> search_key(Key, Tail) + end; +search_key(_Key, []) -> + unknown. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% ops, we've got some global variables here :-) + +%% They are +%% +%% {Tab, setorbag}, -> set | bag +%% {Tab, storage_type} -> disc_copies |ram_copies | unknown (**) +%% {Tab, disc_copies} -> node list (from schema) +%% {Tab, ram_copies}, -> node list (from schema) +%% {Tab, arity}, -> number +%% {Tab, attributes}, -> atom list +%% {Tab, wild_pattern}, -> record tuple with '_'s +%% {Tab, {index, Pos}} -> ets table +%% {Tab, index} -> integer list +%% {Tab, cstruct} -> cstruct structure +%% + +%% The following fields are dynamic according to the +%% the current node/table situation + +%% {Tab, where_to_write} -> node list +%% {Tab, where_to_read} -> node | nowhere +%% +%% {schema, tables} -> tab list +%% {schema, local_tables} -> tab list (**) +%% +%% {current, db_nodes} -> node list +%% +%% dir -> directory path (**) +%% mnesia_status -> status | running | stopping (**) +%% (**) == (Different on all nodes) +%% + +val(Var) -> + case ?catch_val(Var) of + {'EXIT', _ReASoN_} -> mnesia_lib:other_val(Var, _ReASoN_); + _VaLuE_ -> _VaLuE_ + end. + +set(Var, Val) -> + ?ets_insert(mnesia_gvar, {Var, Val}). + +unset(Var) -> + ?ets_delete(mnesia_gvar, Var). + +other_val(Var, Other) -> + case Var of + {_, where_to_read} -> nowhere; + {_, where_to_write} -> []; + {_, active_replicas} -> []; + _ -> + pr_other(Var, Other) + end. + +-spec(pr_other/2 :: (_,_) -> no_return()). + +pr_other(Var, Other) -> + Why = + case is_running() of + no -> {node_not_running, node()}; + _ -> {no_exists, Var} + end, + verbose("~p (~p) val(mnesia_gvar, ~w) -> ~p ~p ~n", + [self(), process_info(self(), registered_name), + Var, Other, Why]), + case Other of + {badarg, [{ets, lookup_element, _}|_]} -> + exit(Why); + _ -> + erlang:error(Why) + end. + +%% Some functions for list valued variables +add(Var, Val) -> + L = val(Var), + set(Var, [Val | lists:delete(Val, L)]). + +add_list(Var, List) -> + L = val(Var), + set(Var, union(L, List)). + +del(Var, Val) -> + L = val(Var), + set(Var, lists:delete(Val, L)). + +%% LSort -> [node()| Sorted] == Locker sorted + +add_lsort(Var, Val) when node() == Val -> + L = val(Var), + set(Var, [Val | lists:delete(Val, L)]); +add_lsort(Var,Val) -> + case val(Var) of + [Head|Rest] when Head == node() -> + set(Var,[Head|lsort_add(Val,Rest)]); + List -> + set(Var,lsort_add(Val,List)) + end. + +lsort_add(Val,List) -> + case ordsets:is_element(Val,List) of + true -> List; + false -> ordsets:add_element(Val,List) + end. + +%% This function is needed due to the fact +%% that the application_controller enters +%% a deadlock now and then. ac is implemented +%% as a rather naive server. +ensure_loaded(Appl) -> + case application_controller:get_loaded(Appl) of + {true, _} -> + ok; + false -> + case application:load(Appl) of + ok -> + ok; + {error, {already_loaded, Appl}} -> + ok; + {error, Reason} -> + {error, {application_load_error, Reason}} + end + end. + +local_active_tables() -> + Tabs = val({schema, local_tables}), + lists:zf(fun(Tab) -> active_here(Tab) end, Tabs). + +active_tables() -> + Tabs = val({schema, tables}), + F = fun(Tab) -> + case val({Tab, where_to_read}) of + nowhere -> false; + _ -> {true, Tab} + end + end, + lists:zf(F, Tabs). + +etype(X) when is_integer(X) -> integer; +etype([]) -> nil; +etype(X) when is_list(X) -> list; +etype(X) when is_tuple(X) -> tuple; +etype(X) when is_atom(X) -> atom; +etype(_) -> othertype. + +remote_copy_holders(Cs) -> + copy_holders(Cs) -- [node()]. + +copy_holders(Cs) when Cs#cstruct.local_content == false -> + cs_to_nodes(Cs); +copy_holders(Cs) when Cs#cstruct.local_content == true -> + case lists:member(node(), cs_to_nodes(Cs)) of + true -> [node()]; + false -> [] + end. + + +set_remote_where_to_read(Tab) -> + set_remote_where_to_read(Tab, []). + +set_remote_where_to_read(Tab, Ignore) -> + Active = val({Tab, active_replicas}), + Valid = + case mnesia_recover:get_master_nodes(Tab) of + [] -> Active; + Masters -> mnesia_lib:intersect(Masters, Active) + end, + Available = mnesia_lib:intersect(val({current, db_nodes}), Valid -- Ignore), + DiscOnlyC = val({Tab, disc_only_copies}), + Prefered = Available -- DiscOnlyC, + if + Prefered /= [] -> + set({Tab, where_to_read}, hd(Prefered)); + Available /= [] -> + set({Tab, where_to_read}, hd(Available)); + true -> + set({Tab, where_to_read}, nowhere) + end. + +%%% Local only +set_local_content_whereabouts(Tab) -> + add({schema, local_tables}, Tab), + add({Tab, active_replicas}, node()), + set({Tab, where_to_write}, [node()]), + set({Tab, where_to_read}, node()). + +%%% counter routines + +create_counter(Name) -> + set_counter(Name, 0). + +set_counter(Name, Val) -> + ?ets_insert(mnesia_stats, {Name, Val}). + +incr_counter(Name) -> + ?ets_update_counter(mnesia_stats, Name, 1). + +incr_counter(Name, I) -> + ?ets_update_counter(mnesia_stats, Name, I). + +%% update_counter(Name, Val) -> +%% ?ets_update_counter(mnesia_stats, Name, Val). + +read_counter(Name) -> + ?ets_lookup_element(mnesia_stats, Name, 2). + +cs_to_nodes(Cs) -> + Cs#cstruct.disc_only_copies ++ + Cs#cstruct.disc_copies ++ + Cs#cstruct.ram_copies. + +dist_coredump() -> + dist_coredump(all_nodes()). +dist_coredump(Ns) -> + {Replies, _} = rpc:multicall(Ns, ?MODULE, coredump, []), + Replies. + +coredump() -> + coredump({crashinfo, {"user initiated~n", []}}). +coredump(CrashInfo) -> + Core = mkcore(CrashInfo), + Out = core_file(), + important("Writing Mnesia core to file: ~p...~p~n", [Out, CrashInfo]), + file:write_file(Out, Core), + Out. + +core_file() -> + Integers = tuple_to_list(date()) ++ tuple_to_list(time()), + Fun = fun(I) when I < 10 -> ["_0", I]; + (I) -> ["_", I] + end, + List = lists:append([Fun(I) || I <- Integers]), + case mnesia_monitor:get_env(core_dir) of + Dir when is_list(Dir) -> + filename:absname(lists:concat(["MnesiaCore.", node()] ++ List), Dir); + _ -> + filename:absname(lists:concat(["MnesiaCore.", node()] ++ List)) + end. + +mkcore(CrashInfo) -> +% dbg_out("Making a Mnesia core dump...~p~n", [CrashInfo]), + Nodes = [node() |nodes()], + %%TidLocks = (catch ets:tab2list(mnesia_tid_locks)), + HeldLocks = (catch mnesia:system_info(held_locks)), + Core = [ + CrashInfo, + {time, {date(), time()}}, + {self, catch process_info(self())}, + {nodes, catch rpc:multicall(Nodes, ?MODULE, get_node_number, [])}, + {applications, catch lists:sort(application:loaded_applications())}, + {flags, catch init:get_arguments()}, + {code_path, catch code:get_path()}, + {code_loaded, catch lists:sort(code:all_loaded())}, + {etsinfo, catch ets_info(ets:all())}, + + {version, catch mnesia:system_info(version)}, + {schema, catch ets:tab2list(schema)}, + {gvar, catch ets:tab2list(mnesia_gvar)}, + {master_nodes, catch mnesia_recover:get_master_node_info()}, + + {processes, catch procs()}, + {relatives, catch relatives()}, + {workers, catch workers(mnesia_controller:get_workers(2000))}, + {locking_procs, catch locking_procs(HeldLocks)}, + + {held_locks, HeldLocks}, + {lock_queue, catch mnesia:system_info(lock_queue)}, + {load_info, catch mnesia_controller:get_info(2000)}, + {trans_info, catch mnesia_tm:get_info(2000)}, + + {schema_file, catch file:read_file(tab2dat(schema))}, + {dir_info, catch dir_info()}, + {logfile, catch {ok, read_log_files()}} + ], + term_to_binary(Core). + +procs() -> + Fun = fun(P) -> {P, (catch lists:zf(fun proc_info/1, process_info(P)))} end, + lists:map(Fun, processes()). + +proc_info({registered_name, Val}) -> {true, Val}; +proc_info({message_queue_len, Val}) -> {true, Val}; +proc_info({status, Val}) -> {true, Val}; +proc_info({current_function, Val}) -> {true, Val}; +proc_info(_) -> false. + +get_node_number() -> + {node(), self()}. + +read_log_files() -> + [{F, catch file:read_file(F)} || F <- mnesia_log:log_files()]. + +dir_info() -> + {ok, Cwd} = file:get_cwd(), + Dir = dir(), + [{cwd, Cwd, file:read_file_info(Cwd)}, + {mnesia_dir, Dir, file:read_file_info(Dir)}] ++ + case file:list_dir(Dir) of + {ok, Files} -> + [{mnesia_file, F, catch file:read_file_info(dir(F))} || F <- Files]; + Other -> + [Other] + end. + +ets_info([H|T]) -> + [{table, H, mk_info_tuple(ets:info(H))} | ets_info(T)]; +ets_info([]) -> []. + +mk_info_tuple(T) when is_list(T) -> + list_to_tuple(T); +mk_info_tuple(T) -> T. + +relatives() -> + Info = fun(Name) -> + case whereis(Name) of + undefined -> false; + Pid -> {true, {Name, Pid, catch process_info(Pid)}} + end + end, + lists:zf(Info, mnesia:ms()). + +workers({workers, Loaders, Senders, Dumper}) -> + Info = fun({Pid, {send_table, Tab, _Receiver, _St}}) -> + case Pid of + undefined -> false; + Pid -> {true, {Pid, Tab, catch process_info(Pid)}} + end; + ({Pid, What}) when is_pid(Pid) -> + {true, {Pid, What, catch process_info(Pid)}}; + ({Name, Pid}) -> + case Pid of + undefined -> false; + Pid -> {true, {Name, Pid, catch process_info(Pid)}} + end + end, + SInfo = lists:zf(Info, Senders), + Linfo = lists:zf(Info, Loaders), + [{senders, SInfo},{loader, Linfo}|lists:zf(Info, [{dumper, Dumper}])]. + +locking_procs(LockList) when is_list(LockList) -> + Tids = [element(3, Lock) || Lock <- LockList], + UT = uniq(Tids), + Info = fun(Tid) -> + Pid = Tid#tid.pid, + case node(Pid) == node() of + true -> + {true, {Pid, catch process_info(Pid)}}; + _ -> + false + end + end, + lists:zf(Info, UT). + +view() -> + Bin = mkcore({crashinfo, {"view only~n", []}}), + vcore(Bin). + +%% Displays a Mnesia file on the tty. The file may be repaired. +view(File) -> + case suffix([".DAT", ".RET", ".DMP", ".TMP"], File) of + true -> + view(File, dat); + false -> + case suffix([".LOG", ".BUP", ".ETS"], File) of + true -> + view(File, log); + false -> + case lists:prefix("MnesiaCore.", File) of + true -> + view(File, core); + false -> + {error, "Unknown file name"} + end + end + end. + +view(File, dat) -> + dets:view(File); +view(File, log) -> + mnesia_log:view(File); +view(File, core) -> + vcore(File). + +suffix(Suffixes, File) -> + Fun = fun(S) -> lists:suffix(S, File) end, + lists:any(Fun, Suffixes). + +%% View a core file + +vcore() -> + Prefix = lists:concat(["MnesiaCore.", node()]), + Filter = fun(F) -> lists:prefix(Prefix, F) end, + {ok, Cwd} = file:get_cwd(), + case file:list_dir(Cwd) of + {ok, Files}-> + CoreFiles = lists:sort(lists:zf(Filter, Files)), + show("Mnesia core files: ~p~n", [CoreFiles]), + vcore(lists:last(CoreFiles)); + Error -> + Error + end. + +vcore(Bin) when is_binary(Bin) -> + Core = binary_to_term(Bin), + Fun = fun({Item, Info}) -> + show("***** ~p *****~n", [Item]), + case catch vcore_elem({Item, Info}) of + {'EXIT', Reason} -> + show("{'EXIT', ~p}~n", [Reason]); + _ -> ok + end + end, + lists:foreach(Fun, Core); + +vcore(File) -> + show("~n***** Mnesia core: ~p *****~n", [File]), + case file:read_file(File) of + {ok, Bin} -> + vcore(Bin); + _ -> + nocore + end. + +vcore_elem({schema_file, {ok, B}}) -> + Fname = "/tmp/schema.DAT", + file:write_file(Fname, B), + dets:view(Fname), + file:delete(Fname); + +vcore_elem({logfile, {ok, BinList}}) -> + Fun = fun({F, Info}) -> + show("----- logfile: ~p -----~n", [F]), + case Info of + {ok, B} -> + Fname = "/tmp/mnesia_vcore_elem.TMP", + file:write_file(Fname, B), + mnesia_log:view(Fname), + file:delete(Fname); + _ -> + show("~p~n", [Info]) + end + end, + lists:foreach(Fun, BinList); + +vcore_elem({crashinfo, {Format, Args}}) -> + show(Format, Args); +vcore_elem({gvar, L}) -> + show("~p~n", [lists:sort(L)]); +vcore_elem({transactions, Info}) -> + mnesia_tm:display_info(user, Info); + +vcore_elem({_Item, Info}) -> + show("~p~n", [Info]). + +fix_error(X) -> + set(last_error, X), %% for debugabililty + case X of + {aborted, Reason} -> Reason; + {abort, Reason} -> Reason; + Y when is_atom(Y) -> Y; + {'EXIT', {_Reason, {Mod, _, _}}} when is_atom(Mod) -> + save(X), + case atom_to_list(Mod) of + [$m, $n, $e|_] -> badarg; + _ -> X + end; + _ -> X + end. + +last_error() -> + val(last_error). + +%% The following is a list of possible mnesia errors and what they +%% actually mean + +error_desc(nested_transaction) -> "Nested transactions are not allowed"; +error_desc(badarg) -> "Bad or invalid argument, possibly bad type"; +error_desc(no_transaction) -> "Operation not allowed outside transactions"; +error_desc(combine_error) -> "Table options were ilegally combined"; +error_desc(bad_index) -> "Index already exists or was out of bounds"; +error_desc(already_exists) -> "Some schema option we try to set is already on"; +error_desc(index_exists)-> "Some ops can not be performed on tabs with index"; +error_desc(no_exists)-> "Tried to perform op on non-existing (non alive) item"; +error_desc(system_limit) -> "Some system_limit was exhausted"; +error_desc(mnesia_down) -> "A transaction involving objects at some remote " + "node which died while transaction was executing" + "*and* object(s) are no longer available elsewhere" + "in the network"; +error_desc(not_a_db_node) -> "A node which is non existant in " + "the schema was mentioned"; +error_desc(bad_type) -> "Bad type on some provided arguments"; +error_desc(node_not_running) -> "Node not running"; +error_desc(truncated_binary_file) -> "Truncated binary in file"; +error_desc(active) -> "Some delete ops require that " + "all active objects are removed"; +error_desc(illegal) -> "Operation not supported on object"; +error_desc({'EXIT', Reason}) -> + error_desc(Reason); +error_desc({error, Reason}) -> + error_desc(Reason); +error_desc({aborted, Reason}) -> + error_desc(Reason); +error_desc(Reason) when is_tuple(Reason), size(Reason) > 0 -> + setelement(1, Reason, error_desc(element(1, Reason))); +error_desc(Reason) -> + Reason. + +dirty_rpc_error_tag(Reason) -> + case Reason of + {'EXIT', _} -> badarg; + no_variable -> badarg; + _ -> no_exists + end. + +fatal(Format, Args) -> + catch set(mnesia_status, stopping), + Core = mkcore({crashinfo, {Format, Args}}), + report_fatal(Format, Args, Core), + timer:sleep(10000), % Enough to write the core dump to disc? + mnesia:lkill(), + exit(fatal). + +report_fatal(Format, Args) -> + report_fatal(Format, Args, nocore). + +report_fatal(Format, Args, Core) -> + report_system_event({mnesia_fatal, Format, Args, Core}), + catch exit(whereis(mnesia_monitor), fatal). + +%% We sleep longer and longer the more we try +%% Made some testing and came up with the following constants +random_time(Retries, _Counter0) -> +% UpperLimit = 2000, +% MaxIntv = trunc(UpperLimit * (1-(4/((Retries*Retries)+4)))), + UpperLimit = 500, + Dup = Retries * Retries, + MaxIntv = trunc(UpperLimit * (1-(50/((Dup)+50)))), + + case get(random_seed) of + undefined -> + {X, Y, Z} = erlang:now(), %% time() + random:seed(X, Y, Z), + Time = Dup + random:uniform(MaxIntv), + %% dbg_out("---random_test rs ~w max ~w val ~w---~n", [Retries, MaxIntv, Time]), + Time; + _ -> + Time = Dup + random:uniform(MaxIntv), + %% dbg_out("---random_test rs ~w max ~w val ~w---~n", [Retries, MaxIntv, Time]), + Time + end. + +report_system_event(Event0) -> + Event = {mnesia_system_event, Event0}, + report_system_event(catch_notify(Event), Event), + case ?catch_val(subscribers) of + {'EXIT', _} -> ignore; + Pids -> lists:foreach(fun(Pid) -> Pid ! Event end, Pids) + end, + ok. + +catch_notify(Event) -> + case whereis(mnesia_event) of + undefined -> + {'EXIT', {badarg, {mnesia_event, Event}}}; + Pid -> + gen_event:notify(Pid, Event) + end. + +report_system_event({'EXIT', Reason}, Event) -> + Mod = mnesia_monitor:get_env(event_module), + case mnesia_sup:start_event() of + {ok, Pid} -> + link(Pid), + gen_event:call(mnesia_event, Mod, Event, infinity), + unlink(Pid), + + %% We get an exit signal if server dies + receive + {'EXIT', Pid, _Reason} -> + {error, {node_not_running, node()}} + after 0 -> + gen_event:stop(mnesia_event), + ok + end; + + Error -> + Msg = "Mnesia(~p): Cannot report event ~p: ~p (~p)~n", + error_logger:format(Msg, [node(), Event, Reason, Error]) + end; +report_system_event(_Res, _Event) -> + ignore. + +%% important messages are reported regardless of debug level +important(Format, Args) -> + save({Format, Args}), + report_system_event({mnesia_info, Format, Args}). + +%% Warning messages are reported regardless of debug level +warning(Format, Args) -> + save({Format, Args}), + report_system_event({mnesia_warning, Format, Args}). + +%% error messages are reported regardless of debug level +error(Format, Args) -> + save({Format, Args}), + report_system_event({mnesia_error, Format, Args}). + +%% verbose messages are reported if debug level == debug or verbose +verbose(Format, Args) -> + case mnesia_monitor:get_env(debug) of + none -> save({Format, Args}); + verbose -> important(Format, Args); + debug -> important(Format, Args); + trace -> important(Format, Args) + end. + +%% debug message are display if debug level == 2 +dbg_out(Format, Args) -> + case mnesia_monitor:get_env(debug) of + none -> ignore; + verbose -> save({Format, Args}); + _ -> report_system_event({mnesia_info, Format, Args}) + end. + +%% Keep the last 10 debug print outs +save(DbgInfo) -> + catch save2(DbgInfo). + +save2(DbgInfo) -> + Key = {'$$$_report', current_pos}, + P = + case ?ets_lookup_element(mnesia_gvar, Key, 2) of + 30 -> -1; + I -> I + end, + set({'$$$_report', current_pos}, P+1), + set({'$$$_report', P+1}, {date(), time(), DbgInfo}). + +copy_file(From, To) -> + case file:open(From, [raw, binary, read]) of + {ok, F} -> + case file:open(To, [raw, binary, write]) of + {ok, T} -> + Res = copy_file_loop(F, T, 8000), + file:close(F), + file:close(T), + Res; + {error, Reason} -> + {error, Reason} + end; + {error, Reason} -> + {error, Reason} + end. + +copy_file_loop(F, T, ChunkSize) -> + case file:read(F, ChunkSize) of + {ok, Bin} -> + file:write(T, Bin), + copy_file_loop(F, T, ChunkSize); + eof -> + ok; + {error, Reason} -> + {error, Reason} + end. + + +%%%%%%%%%%%% +%% versions of all the lowlevel db funcs that determine whether we +%% shall go to disc or ram to do the actual operation. + +db_get(Tab, Key) -> + db_get(val({Tab, storage_type}), Tab, Key). +db_get(ram_copies, Tab, Key) -> ?ets_lookup(Tab, Key); +db_get(disc_copies, Tab, Key) -> ?ets_lookup(Tab, Key); +db_get(disc_only_copies, Tab, Key) -> dets:lookup(Tab, Key). + +db_init_chunk(Tab) -> + db_init_chunk(val({Tab, storage_type}), Tab, 1000). +db_init_chunk(Tab, N) -> + db_init_chunk(val({Tab, storage_type}), Tab, N). + +db_init_chunk(disc_only_copies, Tab, N) -> + dets:select(Tab, [{'_', [], ['$_']}], N); +db_init_chunk(_, Tab, N) -> + ets:select(Tab, [{'_', [], ['$_']}], N). + +db_chunk(disc_only_copies, State) -> + dets:select(State); +db_chunk(_, State) -> + ets:select(State). + +db_put(Tab, Val) -> + db_put(val({Tab, storage_type}), Tab, Val). + +db_put(ram_copies, Tab, Val) -> ?ets_insert(Tab, Val), ok; +db_put(disc_copies, Tab, Val) -> ?ets_insert(Tab, Val), ok; +db_put(disc_only_copies, Tab, Val) -> dets:insert(Tab, Val). + +db_match_object(Tab, Pat) -> + db_match_object(val({Tab, storage_type}), Tab, Pat). +db_match_object(Storage, Tab, Pat) -> + db_fixtable(Storage, Tab, true), + Res = catch_match_object(Storage, Tab, Pat), + db_fixtable(Storage, Tab, false), + case Res of + {'EXIT', Reason} -> exit(Reason); + _ -> Res + end. + +catch_match_object(disc_only_copies, Tab, Pat) -> + catch dets:match_object(Tab, Pat); +catch_match_object(_, Tab, Pat) -> + catch ets:match_object(Tab, Pat). + +db_select(Tab, Pat) -> + db_select(val({Tab, storage_type}), Tab, Pat). + +db_select(Storage, Tab, Pat) -> + db_fixtable(Storage, Tab, true), + Res = catch_select(Storage, Tab, Pat), + db_fixtable(Storage, Tab, false), + case Res of + {'EXIT', Reason} -> exit(Reason); + _ -> Res + end. + +catch_select(disc_only_copies, Tab, Pat) -> + catch dets:select(Tab, Pat); +catch_select(_, Tab, Pat) -> + catch ets:select(Tab, Pat). + +db_select_init(disc_only_copies, Tab, Pat, Limit) -> + dets:select(Tab, Pat, Limit); +db_select_init(_, Tab, Pat, Limit) -> + ets:select(Tab, Pat, Limit). + +db_select_cont(disc_only_copies, Cont0, Ms) -> + Cont = dets:repair_continuation(Cont0, Ms), + dets:select(Cont); +db_select_cont(_, Cont0, Ms) -> + Cont = ets:repair_continuation(Cont0, Ms), + ets:select(Cont). + +db_fixtable(ets, Tab, Bool) -> + ets:safe_fixtable(Tab, Bool); +db_fixtable(ram_copies, Tab, Bool) -> + ets:safe_fixtable(Tab, Bool); +db_fixtable(disc_copies, Tab, Bool) -> + ets:safe_fixtable(Tab, Bool); +db_fixtable(dets, Tab, Bool) -> + dets:safe_fixtable(Tab, Bool); +db_fixtable(disc_only_copies, Tab, Bool) -> + dets:safe_fixtable(Tab, Bool). + +db_erase(Tab, Key) -> + db_erase(val({Tab, storage_type}), Tab, Key). +db_erase(ram_copies, Tab, Key) -> ?ets_delete(Tab, Key), ok; +db_erase(disc_copies, Tab, Key) -> ?ets_delete(Tab, Key), ok; +db_erase(disc_only_copies, Tab, Key) -> dets:delete(Tab, Key). + +db_match_erase(Tab, Pat) -> + db_match_erase(val({Tab, storage_type}), Tab, Pat). +db_match_erase(ram_copies, Tab, Pat) -> ?ets_match_delete(Tab, Pat), ok; +db_match_erase(disc_copies, Tab, Pat) -> ?ets_match_delete(Tab, Pat), ok; +db_match_erase(disc_only_copies, Tab, Pat) -> dets:match_delete(Tab, Pat). + +db_first(Tab) -> + db_first(val({Tab, storage_type}), Tab). +db_first(ram_copies, Tab) -> ?ets_first(Tab); +db_first(disc_copies, Tab) -> ?ets_first(Tab); +db_first(disc_only_copies, Tab) -> dets:first(Tab). + +db_next_key(Tab, Key) -> + db_next_key(val({Tab, storage_type}), Tab, Key). +db_next_key(ram_copies, Tab, Key) -> ?ets_next(Tab, Key); +db_next_key(disc_copies, Tab, Key) -> ?ets_next(Tab, Key); +db_next_key(disc_only_copies, Tab, Key) -> dets:next(Tab, Key). + +db_last(Tab) -> + db_last(val({Tab, storage_type}), Tab). +db_last(ram_copies, Tab) -> ?ets_last(Tab); +db_last(disc_copies, Tab) -> ?ets_last(Tab); +db_last(disc_only_copies, Tab) -> dets:first(Tab). %% Dets don't have order + +db_prev_key(Tab, Key) -> + db_prev_key(val({Tab, storage_type}), Tab, Key). +db_prev_key(ram_copies, Tab, Key) -> ?ets_prev(Tab, Key); +db_prev_key(disc_copies, Tab, Key) -> ?ets_prev(Tab, Key); +db_prev_key(disc_only_copies, Tab, Key) -> dets:next(Tab, Key). %% Dets don't have order + +db_slot(Tab, Pos) -> + db_slot(val({Tab, storage_type}), Tab, Pos). +db_slot(ram_copies, Tab, Pos) -> ?ets_slot(Tab, Pos); +db_slot(disc_copies, Tab, Pos) -> ?ets_slot(Tab, Pos); +db_slot(disc_only_copies, Tab, Pos) -> dets:slot(Tab, Pos). + +db_update_counter(Tab, C, Val) -> + db_update_counter(val({Tab, storage_type}), Tab, C, Val). +db_update_counter(ram_copies, Tab, C, Val) -> + ?ets_update_counter(Tab, C, Val); +db_update_counter(disc_copies, Tab, C, Val) -> + ?ets_update_counter(Tab, C, Val); +db_update_counter(disc_only_copies, Tab, C, Val) -> + dets:update_counter(Tab, C, Val). + +db_erase_tab(Tab) -> + db_erase_tab(val({Tab, storage_type}), Tab). +db_erase_tab(ram_copies, Tab) -> ?ets_delete_table(Tab); +db_erase_tab(disc_copies, Tab) -> ?ets_delete_table(Tab); +db_erase_tab(disc_only_copies, _Tab) -> ignore. + +%% assuming that Tab is a valid ets-table +dets_to_ets(Tabname, Tab, File, Type, Rep, Lock) -> + {Open, Close} = mkfuns(Lock), + case Open(Tabname, [{file, File}, {type, disk_type(Tab, Type)}, + {keypos, 2}, {repair, Rep}]) of + {ok, Tabname} -> + Res = dets:to_ets(Tabname, Tab), + Close(Tabname), + trav_ret(Res, Tab); + Other -> + Other + end. + +trav_ret(Tabname, Tabname) -> loaded; +trav_ret(Other, _Tabname) -> Other. + +mkfuns(yes) -> + {fun(Tab, Args) -> dets_sync_open(Tab, Args) end, + fun(Tab) -> dets_sync_close(Tab) end}; +mkfuns(no) -> + {fun(Tab, Args) -> dets:open_file(Tab, Args) end, + fun(Tab) -> dets:close(Tab) end}. + +disk_type(Tab) -> + disk_type(Tab, val({Tab, setorbag})). + +disk_type(_Tab, ordered_set) -> + set; +disk_type(_, Type) -> + Type. + +dets_sync_open(Tab, Ref, File) -> + Args = [{file, File}, + {keypos, 2}, + {repair, mnesia_monitor:get_env(auto_repair)}, + {type, disk_type(Tab)}], + dets_sync_open(Ref, Args). + +lock_table(Tab) -> + global:set_lock({{mnesia_table_lock, Tab}, self()}, [node()], infinity). +% dbg_out("dets_sync_open: ~p ~p~n", [T, self()]), + +unlock_table(Tab) -> + global:del_lock({{mnesia_table_lock, Tab}, self()}, [node()]). +% dbg_out("unlock_table: ~p ~p~n", [T, self()]), + +dets_sync_open(Tab, Args) -> + lock_table(Tab), + case dets:open_file(Tab, Args) of + {ok, Tab} -> + {ok, Tab}; + Other -> + dets_sync_close(Tab), + Other + end. + +dets_sync_close(Tab) -> + catch dets:close(Tab), + unlock_table(Tab), + ok. + +readable_indecies(Tab) -> + val({Tab, index}). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Managing conditional debug functions +%% +%% The main idea with the debug_fun's is to allow test programs +%% to control the internal behaviour of Mnesia. This is needed +%% to make the test programs independent of system load, swapping +%% and other circumstances that may affect the behaviour of Mnesia. +%% +%% First should calls to ?eval_debug_fun be inserted at well +%% defined places in Mnesia's code. E.g. in critical situations +%% of startup, transaction commit, backups etc. +%% +%% Then compile Mnesia with the compiler option 'debug'. +%% +%% In test programs ?activate_debug_fun should be called +%% in order to bind a fun to the debug identifier stated +%% in the call to ?eval_debug_fun. +%% +%% If eval_debug_fun finds that the fun is activated it +%% invokes the fun as NewContext = Fun(PreviousContext, EvalContext) +%% and replaces the PreviousContext with the NewContext. +%% The initial context of a debug_fun is given as argument to +%% activate_debug_fun. + +-define(DEBUG_TAB, mnesia_debug). +-record(debug_info, {id, function, context, file, line}). + +scratch_debug_fun() -> + dbg_out("scratch_debug_fun(): ~p~n", [?DEBUG_TAB]), + (catch ?ets_delete_table(?DEBUG_TAB)), + ?ets_new_table(?DEBUG_TAB, [set, public, named_table, {keypos, 2}]). + +activate_debug_fun(FunId, Fun, InitialContext, File, Line) -> + Info = #debug_info{id = FunId, + function = Fun, + context = InitialContext, + file = File, + line = Line + }, + update_debug_info(Info). + +update_debug_info(Info) -> + case catch ?ets_insert(?DEBUG_TAB, Info) of + {'EXIT', _} -> + scratch_debug_fun(), + ?ets_insert(?DEBUG_TAB, Info); + _ -> + ok + end, + dbg_out("update_debug_info(~p)~n", [Info]), + ok. + +deactivate_debug_fun(FunId, _File, _Line) -> + catch ?ets_delete(?DEBUG_TAB, FunId), + ok. + +eval_debug_fun(FunId, EvalContext, EvalFile, EvalLine) -> + case catch ?ets_lookup(?DEBUG_TAB, FunId) of + [] -> + ok; + [Info] -> + OldContext = Info#debug_info.context, + dbg_out("~s(~p): ~w " + "activated in ~s(~p)~n " + "eval_debug_fun(~w, ~w)~n", + [filename:basename(EvalFile), EvalLine, Info#debug_info.id, + filename:basename(Info#debug_info.file), Info#debug_info.line, + OldContext, EvalContext]), + Fun = Info#debug_info.function, + NewContext = Fun(OldContext, EvalContext), + + case catch ?ets_lookup(?DEBUG_TAB, FunId) of + [Info] when NewContext /= OldContext -> + NewInfo = Info#debug_info{context = NewContext}, + update_debug_info(NewInfo); + _ -> + ok + end; + {'EXIT', _} -> ok + end. + +-ifdef(debug). + is_debug_compiled() -> true. +-else. + is_debug_compiled() -> false. +-endif. + + diff --git a/lib/mnesia/src/mnesia_loader.erl b/lib/mnesia/src/mnesia_loader.erl new file mode 100644 index 0000000000..77c317abc5 --- /dev/null +++ b/lib/mnesia/src/mnesia_loader.erl @@ -0,0 +1,828 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 1998-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +%% +%%% Purpose : Loads tables from local disc or from remote node + +-module(mnesia_loader). + +%% Mnesia internal stuff +-export([disc_load_table/2, + net_load_table/4, + send_table/3]). + +-export([old_node_init_table/6]). %% Spawned old node protocol conversion hack +-export([spawned_receiver/8]). %% Spawned lock taking process + +-import(mnesia_lib, [set/2, fatal/2, verbose/2, dbg_out/2]). + +-include("mnesia.hrl"). + +val(Var) -> + case ?catch_val(Var) of + {'EXIT', Reason} -> mnesia_lib:other_val(Var, Reason); + Value -> Value + end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Load a table from local disc + +disc_load_table(Tab, Reason) -> + Storage = val({Tab, storage_type}), + Type = val({Tab, setorbag}), + dbg_out("Getting table ~p (~p) from disc: ~p~n", + [Tab, Storage, Reason]), + ?eval_debug_fun({?MODULE, do_get_disc_copy}, + [{tab, Tab}, + {reason, Reason}, + {storage, Storage}, + {type, Type}]), + do_get_disc_copy2(Tab, Reason, Storage, Type). + +do_get_disc_copy2(Tab, _Reason, Storage, _Type) when Storage == unknown -> + verbose("Local table copy of ~p has recently been deleted, ignored.~n", + [Tab]), + {loaded, ok}; %% ? +do_get_disc_copy2(Tab, Reason, Storage, Type) when Storage == disc_copies -> + %% NOW we create the actual table + Repair = mnesia_monitor:get_env(auto_repair), + Args = [{keypos, 2}, public, named_table, Type], + case Reason of + {dumper, _} -> %% Resources allready allocated + ignore; + _ -> + mnesia_monitor:mktab(Tab, Args), + Count = mnesia_log:dcd2ets(Tab, Repair), + case ets:info(Tab, size) of + X when X < Count * 4 -> + ok = mnesia_log:ets2dcd(Tab); + _ -> + ignore + end + end, + mnesia_index:init_index(Tab, Storage), + snmpify(Tab, Storage), + set({Tab, load_node}, node()), + set({Tab, load_reason}, Reason), + {loaded, ok}; + +do_get_disc_copy2(Tab, Reason, Storage, Type) when Storage == ram_copies -> + Args = [{keypos, 2}, public, named_table, Type], + case Reason of + {dumper, _} -> %% Resources allready allocated + ignore; + _ -> + mnesia_monitor:mktab(Tab, Args), + Fname = mnesia_lib:tab2dcd(Tab), + Datname = mnesia_lib:tab2dat(Tab), + Repair = mnesia_monitor:get_env(auto_repair), + case mnesia_monitor:use_dir() of + true -> + case mnesia_lib:exists(Fname) of + true -> mnesia_log:dcd2ets(Tab, Repair); + false -> + case mnesia_lib:exists(Datname) of + true -> + mnesia_lib:dets_to_ets(Tab, Tab, Datname, + Type, Repair, no); + false -> + false + end + end; + false -> + false + end + end, + mnesia_index:init_index(Tab, Storage), + snmpify(Tab, Storage), + set({Tab, load_node}, node()), + set({Tab, load_reason}, Reason), + {loaded, ok}; + +do_get_disc_copy2(Tab, Reason, Storage, Type) when Storage == disc_only_copies -> + Args = [{file, mnesia_lib:tab2dat(Tab)}, + {type, mnesia_lib:disk_type(Tab, Type)}, + {keypos, 2}, + {repair, mnesia_monitor:get_env(auto_repair)}], + case Reason of + {dumper, _} -> + mnesia_index:init_index(Tab, Storage), + snmpify(Tab, Storage), + set({Tab, load_node}, node()), + set({Tab, load_reason}, Reason), + {loaded, ok}; + _ -> + case mnesia_monitor:open_dets(Tab, Args) of + {ok, _} -> + mnesia_index:init_index(Tab, Storage), + snmpify(Tab, Storage), + set({Tab, load_node}, node()), + set({Tab, load_reason}, Reason), + {loaded, ok}; + {error, Error} -> + {not_loaded, {"Failed to create dets table", Error}} + end + end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Load a table from a remote node +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% Receiver Sender +%% -------- ------ +%% Grab schema lock on table +%% Determine table size +%% Create empty pre-grown table +%% Grab read lock on table +%% Let receiver subscribe on updates done on sender node +%% Disable rehashing of table +%% Release read lock on table +%% Send table to receiver in chunks +%% +%% Grab read lock on table +%% Block dirty updates +%% Update wherabouts +%% +%% Cancel the update subscription +%% Process the subscription events +%% Optionally dump to disc +%% Unblock dirty updates +%% Release read lock on table +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +-define(MAX_TRANSFER_SIZE, 7500). +-define(MAX_RAM_FILE_SIZE, 1000000). +-define(MAX_RAM_TRANSFERS, (?MAX_RAM_FILE_SIZE div ?MAX_TRANSFER_SIZE) + 1). +-define(MAX_NOPACKETS, 20). + +net_load_table(Tab, Reason, Ns, Cs) + when Reason == {dumper,add_table_copy} -> + try_net_load_table(Tab, Reason, Ns, Cs); +net_load_table(Tab, Reason, Ns, _Cs) -> + try_net_load_table(Tab, Reason, Ns, val({Tab, cstruct})). + +try_net_load_table(Tab, _Reason, [], _Cs) -> + verbose("Copy failed. No active replicas of ~p are available.~n", [Tab]), + {not_loaded, none_active}; +try_net_load_table(Tab, Reason, Ns, Cs) -> + Storage = mnesia_lib:cs_to_storage_type(node(), Cs), + do_get_network_copy(Tab, Reason, Ns, Storage, Cs). + +do_get_network_copy(Tab, _Reason, _Ns, unknown, _Cs) -> + verbose("Local table copy of ~p has recently been deleted, ignored.~n", [Tab]), + {not_loaded, storage_unknown}; +do_get_network_copy(Tab, Reason, Ns, Storage, Cs) -> + [Node | Tail] = Ns, + case lists:member(Node,val({current, db_nodes})) of + true -> + dbg_out("Getting table ~p (~p) from node ~p: ~p~n", + [Tab, Storage, Node, Reason]), + ?eval_debug_fun({?MODULE, do_get_network_copy}, + [{tab, Tab}, {reason, Reason}, + {nodes, Ns}, {storage, Storage}]), + case init_receiver(Node, Tab, Storage, Cs, Reason) of + ok -> + set({Tab, load_node}, Node), + set({Tab, load_reason}, Reason), + mnesia_controller:i_have_tab(Tab), + dbg_out("Table ~p copied from ~p to ~p~n", [Tab, Node, node()]), + {loaded, ok}; + Err = {error, _} when element(1, Reason) == dumper -> + {not_loaded,Err}; + restart -> + try_net_load_table(Tab, Reason, Tail ++ [Node], Cs); + down -> + try_net_load_table(Tab, Reason, Tail, Cs) + end; + false -> + try_net_load_table(Tab, Reason, Tail, Cs) + end. + +snmpify(Tab, Storage) -> + do_snmpify(Tab, val({Tab, snmp}), Storage). + +do_snmpify(_Tab, [], _Storage) -> + ignore; +do_snmpify(Tab, Us, Storage) -> + Snmp = mnesia_snmp_hook:create_table(Us, Tab, Storage), + set({Tab, {index, snmp}}, Snmp). + +%% Start the recieiver +init_receiver(Node, Tab, Storage, Cs, Reas={dumper,add_table_copy}) -> + case start_remote_sender(Node, Tab, Storage) of + {SenderPid, TabSize, DetsData} -> + start_receiver(Tab,Storage,Cs,SenderPid,TabSize,DetsData,Reas); + Else -> + Else + end; +init_receiver(Node, Tab,Storage,Cs,Reason) -> + %% Grab a schema lock to avoid deadlock between table_loader and schema_commit dumping. + %% Both may grab tables-locks in different order. + Load = + fun() -> + {_,Tid,Ts} = get(mnesia_activity_state), + mnesia_locker:rlock(Tid, Ts#tidstore.store, {schema, Tab}), + %% Check that table still exists + Active = val({Tab, active_replicas}), + %% Check that we havn't loaded it already + case val({Tab,where_to_read}) == node() of + true -> ok; + _ -> + %% And that sender still got a copy + %% (something might have happend while + %% we where waiting for the lock) + true = lists:member(Node, Active), + {SenderPid, TabSize, DetsData} = + start_remote_sender(Node,Tab,Storage), + Init = table_init_fun(SenderPid), + Args = [self(),Tab,Storage,Cs,SenderPid, + TabSize,DetsData,Init], + Pid = spawn_link(?MODULE, spawned_receiver, Args), + put(mnesia_real_loader, Pid), + wait_on_load_complete(Pid) + end + end, + Res = + case mnesia:transaction(Load, 20) of + {atomic, {error,Result}} when + element(1,Reason) == dumper -> + {error,Result}; + {atomic, {error,Result}} -> + fatal("Cannot create table ~p: ~p~n", + [[Tab, Storage], Result]); + {atomic, Result} -> Result; + {aborted, nomore} -> restart; + {aborted, _Reas} -> + verbose("Receiver failed on ~p from ~p:~nReason: ~p~n", + [Tab,Node,_Reas]), + down %% either this node or sender is dying + end, + unlink(whereis(mnesia_tm)), %% Avoid late unlink from tm + Res. + +start_remote_sender(Node,Tab,Storage) -> + mnesia_controller:start_remote_sender(Node, Tab, self(), Storage), + put(mnesia_table_sender_node, {Tab, Node}), + receive + {SenderPid, {first, TabSize}} -> + {SenderPid, TabSize, false}; + {SenderPid, {first, TabSize, DetsData}} -> + {SenderPid, TabSize, DetsData}; + %% Protocol conversion hack + {copier_done, Node} -> + verbose("Sender of table ~p crashed on node ~p ~n", [Tab, Node]), + down(Tab, Storage) + end. + +table_init_fun(SenderPid) -> + PConv = mnesia_monitor:needs_protocol_conversion(node(SenderPid)), + MeMyselfAndI = self(), + fun(read) -> + Receiver = + if + PConv == true -> + MeMyselfAndI ! {actual_tabrec, self()}, + MeMyselfAndI; %% Old mnesia + PConv == false -> self() + end, + SenderPid ! {Receiver, more}, + get_data(SenderPid, Receiver) + end. + +%% Add_table_copy get's it's own locks. +start_receiver(Tab,Storage,Cs,SenderPid,TabSize,DetsData,{dumper,add_table_copy}) -> + Init = table_init_fun(SenderPid), + case do_init_table(Tab,Storage,Cs,SenderPid,TabSize,DetsData,self(), Init) of + Err = {error, _} -> + SenderPid ! {copier_done, node()}, + Err; + Else -> + Else + end. + +spawned_receiver(ReplyTo,Tab,Storage,Cs, SenderPid,TabSize,DetsData, Init) -> + process_flag(trap_exit, true), + Done = do_init_table(Tab,Storage,Cs, + SenderPid,TabSize,DetsData, + ReplyTo, Init), + ReplyTo ! {self(),Done}, + unlink(ReplyTo), + unlink(whereis(mnesia_controller)), + exit(normal). + +wait_on_load_complete(Pid) -> + receive + {Pid, Res} -> + Res; + {'EXIT', Pid, Reason} -> + exit(Reason); + Else -> + Pid ! Else, + wait_on_load_complete(Pid) + end. + +do_init_table(Tab,Storage,Cs,SenderPid, + TabSize,DetsInfo,OrigTabRec,Init) -> + case create_table(Tab, TabSize, Storage, Cs) of + {Storage,Tab} -> + %% Debug info + Node = node(SenderPid), + put(mnesia_table_receiver, {Tab, Node, SenderPid}), + mnesia_tm:block_tab(Tab), + PConv = mnesia_monitor:needs_protocol_conversion(Node), + + case init_table(Tab,Storage,Init,PConv,DetsInfo,SenderPid) of + ok -> + tab_receiver(Node,Tab,Storage,Cs,PConv,OrigTabRec); + Reason -> + Msg = "[d]ets:init table failed", + verbose("~s: ~p: ~p~n", [Msg, Tab, Reason]), + down(Tab, Storage) + end; + Error -> + Error + end. + +create_table(Tab, TabSize, Storage, Cs) -> + if + Storage == disc_only_copies -> + mnesia_lib:lock_table(Tab), + Tmp = mnesia_lib:tab2tmp(Tab), + Size = lists:max([TabSize, 256]), + Args = [{file, Tmp}, + {keypos, 2}, +%% {ram_file, true}, + {estimated_no_objects, Size}, + {repair, mnesia_monitor:get_env(auto_repair)}, + {type, mnesia_lib:disk_type(Tab, Cs#cstruct.type)}], + file:delete(Tmp), + case mnesia_lib:dets_sync_open(Tab, Args) of + {ok, _} -> + mnesia_lib:unlock_table(Tab), + {Storage, Tab}; + Else -> + mnesia_lib:unlock_table(Tab), + Else + end; + (Storage == ram_copies) or (Storage == disc_copies) -> + Args = [{keypos, 2}, public, named_table, Cs#cstruct.type], + case mnesia_monitor:unsafe_mktab(Tab, Args) of + Tab -> + {Storage, Tab}; + Else -> + Else + end + end. + +tab_receiver(Node, Tab, Storage, Cs, PConv, OrigTabRec) -> + receive + {SenderPid, {no_more, DatBin}} when PConv == false -> + finish_copy(Storage,Tab,Cs,SenderPid,DatBin,OrigTabRec); + + %% Protocol conversion hack + {SenderPid, {no_more, DatBin}} when is_pid(PConv) -> + PConv ! {SenderPid, no_more}, + receive + {old_init_table_complete, ok} -> + finish_copy(Storage, Tab, Cs, SenderPid, DatBin,OrigTabRec); + {old_init_table_complete, Reason} -> + Msg = "OLD: [d]ets:init table failed", + verbose("~s: ~p: ~p~n", [Msg, Tab, Reason]), + down(Tab, Storage) + end; + + {actual_tabrec, Pid} -> + tab_receiver(Node, Tab, Storage, Cs, Pid,OrigTabRec); + + {SenderPid, {more, [Recs]}} when is_pid(PConv) -> + PConv ! {SenderPid, {more, Recs}}, %% Forward Msg to OldNodes + tab_receiver(Node, Tab, Storage, Cs, PConv,OrigTabRec); + + {'EXIT', PConv, Reason} -> %% [d]ets:init process crashed + Msg = "Receiver crashed", + verbose("~s: ~p: ~p~n", [Msg, Tab, Reason]), + down(Tab, Storage); + + %% Protocol conversion hack + {copier_done, Node} -> + verbose("Sender of table ~p crashed on node ~p ~n", [Tab, Node]), + down(Tab, Storage); + + {'EXIT', Pid, Reason} -> + handle_exit(Pid, Reason), + tab_receiver(Node, Tab, Storage, Cs, PConv,OrigTabRec) + end. + +make_table_fun(Pid, TabRec) -> + fun(close) -> + ok; + (read) -> + get_data(Pid, TabRec) + end. + +get_data(Pid, TabRec) -> + receive + {Pid, {more, Recs}} -> + Pid ! {TabRec, more}, + {Recs, make_table_fun(Pid,TabRec)}; + {Pid, no_more} -> + end_of_input; + {copier_done, Node} -> + case node(Pid) of + Node -> + {copier_done, Node}; + _ -> + get_data(Pid, TabRec) + end; + {'EXIT', Pid, Reason} -> + handle_exit(Pid, Reason), + get_data(Pid, TabRec) + end. + +init_table(Tab, disc_only_copies, Fun, false, DetsInfo,Sender) -> + ErtsVer = erlang:system_info(version), + case DetsInfo of + {ErtsVer, DetsData} -> + Res = (catch dets:is_compatible_bchunk_format(Tab, DetsData)), + case Res of + {'EXIT',{undef,[{dets,_,_}|_]}} -> + Sender ! {self(), {old_protocol, Tab}}, + dets:init_table(Tab, Fun); %% Old dets version + {'EXIT', What} -> + exit(What); + false -> + Sender ! {self(), {old_protocol, Tab}}, + dets:init_table(Tab, Fun); %% Old dets version + true -> + dets:init_table(Tab, Fun, [{format, bchunk}]) + end; + Old when Old /= false -> + Sender ! {self(), {old_protocol, Tab}}, + dets:init_table(Tab, Fun); %% Old dets version + _ -> + dets:init_table(Tab, Fun) + end; +init_table(Tab, _, Fun, false, _DetsInfo,_) -> + case catch ets:init_table(Tab, Fun) of + true -> + ok; + {'EXIT', Else} -> Else + end; +init_table(Tab, Storage, Fun, true, _DetsInfo, Sender) -> %% Old Nodes + spawn_link(?MODULE, old_node_init_table, + [Tab, Storage, Fun, self(), false, Sender]), + ok. + +old_node_init_table(Tab, Storage, Fun, TabReceiver, DetsInfo,Sender) -> + Res = init_table(Tab, Storage, Fun, false, DetsInfo,Sender), + TabReceiver ! {old_init_table_complete, Res}, + unlink(TabReceiver), + ok. + +finish_copy(Storage,Tab,Cs,SenderPid,DatBin,OrigTabRec) -> + TabRef = {Storage, Tab}, + subscr_receiver(TabRef, Cs#cstruct.record_name), + case handle_last(TabRef, Cs#cstruct.type, DatBin) of + ok -> + mnesia_index:init_index(Tab, Storage), + snmpify(Tab, Storage), + %% OrigTabRec must not be the spawned tab-receiver + %% due to old protocol. + SenderPid ! {OrigTabRec, no_more}, + mnesia_tm:unblock_tab(Tab), + ok; + {error, Reason} -> + Msg = "Failed to handle last", + verbose("~s: ~p: ~p~n", [Msg, Tab, Reason]), + down(Tab, Storage) + end. + +subscr_receiver(TabRef = {_, Tab}, RecName) -> + receive + {mnesia_table_event, {Op, Val, _Tid}} -> + if + Tab == RecName -> + handle_event(TabRef, Op, Val); + true -> + handle_event(TabRef, Op, setelement(1, Val, RecName)) + end, + subscr_receiver(TabRef, RecName); + + {'EXIT', Pid, Reason} -> + handle_exit(Pid, Reason), + subscr_receiver(TabRef, RecName) + after 0 -> + ok + end. + +handle_event(TabRef, write, Rec) -> + db_put(TabRef, Rec); +handle_event(TabRef, delete, {_Tab, Key}) -> + db_erase(TabRef, Key); +handle_event(TabRef, delete_object, OldRec) -> + db_match_erase(TabRef, OldRec); +handle_event(TabRef, clear_table, {_Tab, _Key}) -> + db_match_erase(TabRef, '_'). + +handle_last({disc_copies, Tab}, _Type, nobin) -> + Ret = mnesia_log:ets2dcd(Tab), + Fname = mnesia_lib:tab2dat(Tab), + case mnesia_lib:exists(Fname) of + true -> %% Remove old .DAT files. + file:delete(Fname); + false -> + ok + end, + Ret; + +handle_last({disc_only_copies, Tab}, Type, nobin) -> + mnesia_lib:dets_sync_close(Tab), + Tmp = mnesia_lib:tab2tmp(Tab), + Dat = mnesia_lib:tab2dat(Tab), + case file:rename(Tmp, Dat) of + ok -> + Args = [{file, mnesia_lib:tab2dat(Tab)}, + {type, mnesia_lib:disk_type(Tab, Type)}, + {keypos, 2}, + {repair, mnesia_monitor:get_env(auto_repair)}], + mnesia_monitor:open_dets(Tab, Args), + ok; + {error, Reason} -> + {error, {"Cannot swap tmp files", Tab, Reason}} + end; + +handle_last({ram_copies, _Tab}, _Type, nobin) -> + ok; +handle_last({ram_copies, Tab}, _Type, DatBin) -> + case mnesia_monitor:use_dir() of + true -> + mnesia_lib:lock_table(Tab), + Tmp = mnesia_lib:tab2tmp(Tab), + ok = file:write_file(Tmp, DatBin), + ok = file:rename(Tmp, mnesia_lib:tab2dcd(Tab)), + mnesia_lib:unlock_table(Tab), + ok; + false -> + ok + end. + +down(Tab, Storage) -> + case Storage of + ram_copies -> + catch ?ets_delete_table(Tab); + disc_copies -> + catch ?ets_delete_table(Tab); + disc_only_copies -> + TmpFile = mnesia_lib:tab2tmp(Tab), + mnesia_lib:dets_sync_close(Tab), + file:delete(TmpFile) + end, + mnesia_checkpoint:tm_del_copy(Tab, node()), + mnesia_controller:sync_del_table_copy_whereabouts(Tab, node()), + mnesia_tm:unblock_tab(Tab), + flush_subcrs(), + down. + +flush_subcrs() -> + receive + {mnesia_table_event, _} -> + flush_subcrs(); + + {'EXIT', Pid, Reason} -> + handle_exit(Pid, Reason), + flush_subcrs() + after 0 -> + done + end. + +db_erase({ram_copies, Tab}, Key) -> + true = ?ets_delete(Tab, Key); +db_erase({disc_copies, Tab}, Key) -> + true = ?ets_delete(Tab, Key); +db_erase({disc_only_copies, Tab}, Key) -> + ok = dets:delete(Tab, Key). + +db_match_erase({ram_copies, Tab} , Pat) -> + true = ?ets_match_delete(Tab, Pat); +db_match_erase({disc_copies, Tab} , Pat) -> + true = ?ets_match_delete(Tab, Pat); +db_match_erase({disc_only_copies, Tab}, Pat) -> + ok = dets:match_delete(Tab, Pat). + +db_put({ram_copies, Tab}, Val) -> + true = ?ets_insert(Tab, Val); +db_put({disc_copies, Tab}, Val) -> + true = ?ets_insert(Tab, Val); +db_put({disc_only_copies, Tab}, Val) -> + ok = dets:insert(Tab, Val). + +%% This code executes at the remote site where the data is +%% executes in a special copier process. + +calc_nokeys(Storage, Tab) -> + %% Calculate #keys per transfer + Key = mnesia_lib:db_first(Storage, Tab), + Recs = mnesia_lib:db_get(Storage, Tab, Key), + BinSize = size(term_to_binary(Recs)), + (?MAX_TRANSFER_SIZE div BinSize) + 1. + +send_table(Pid, Tab, RemoteS) -> + case ?catch_val({Tab, storage_type}) of + {'EXIT', _} -> + {error, {no_exists, Tab}}; + unknown -> + {error, {no_exists, Tab}}; + Storage -> + %% Send first + TabSize = mnesia:table_info(Tab, size), + Pconvert = mnesia_monitor:needs_protocol_conversion(node(Pid)), + KeysPerTransfer = calc_nokeys(Storage, Tab), + ChunkData = dets:info(Tab, bchunk_format), + + UseDetsChunk = + Storage == RemoteS andalso + Storage == disc_only_copies andalso + ChunkData /= undefined andalso + Pconvert == false, + if + UseDetsChunk == true -> + DetsInfo = erlang:system_info(version), + Pid ! {self(), {first, TabSize, {DetsInfo, ChunkData}}}; + true -> + Pid ! {self(), {first, TabSize}} + end, + + %% Debug info + put(mnesia_table_sender, {Tab, node(Pid), Pid}), + {Init, Chunk} = reader_funcs(UseDetsChunk, Tab, Storage, KeysPerTransfer), + + SendIt = fun() -> + prepare_copy(Pid, Tab, Storage), + send_more(Pid, 1, Chunk, Init(), Tab, Pconvert), + finish_copy(Pid, Tab, Storage, RemoteS) + end, + + case catch SendIt() of + receiver_died -> + cleanup_tab_copier(Pid, Storage, Tab), + unlink(whereis(mnesia_tm)), + ok; + {_, receiver_died} -> + unlink(whereis(mnesia_tm)), + ok; + {atomic, no_more} -> + unlink(whereis(mnesia_tm)), + ok; + Reason -> + cleanup_tab_copier(Pid, Storage, Tab), + unlink(whereis(mnesia_tm)), + {error, Reason} + end + end. + +prepare_copy(Pid, Tab, Storage) -> + Trans = + fun() -> + mnesia:write_lock_table(Tab), + mnesia_subscr:subscribe(Pid, {table, Tab}), + update_where_to_write(Tab, node(Pid)), + mnesia_lib:db_fixtable(Storage, Tab, true), + ok + end, + case mnesia:transaction(Trans) of + {atomic, ok} -> + ok; + {aborted, Reason} -> + exit({tab_copier_prepare, Tab, Reason}) + end. + +update_where_to_write(Tab, Node) -> + case val({Tab, access_mode}) of + read_only -> + ignore; + read_write -> + Current = val({current, db_nodes}), + Ns = + case lists:member(Node, Current) of + true -> Current; + false -> [Node | Current] + end, + update_where_to_write(Ns, Tab, Node) + end. + +update_where_to_write([], _, _) -> + ok; +update_where_to_write([H|T], Tab, AddNode) -> + rpc:call(H, mnesia_controller, call, + [{update_where_to_write, [add, Tab, AddNode], self()}]), + update_where_to_write(T, Tab, AddNode). + +send_more(Pid, N, Chunk, DataState, Tab, OldNode) -> + receive + {NewPid, more} -> + case send_packet(N - 1, NewPid, Chunk, DataState, OldNode) of + New when is_integer(New) -> + New - 1; + NewData -> + send_more(NewPid, ?MAX_NOPACKETS, Chunk, NewData, Tab, OldNode) + end; + {_NewPid, {old_protocol, Tab}} -> + Storage = val({Tab, storage_type}), + {Init, NewChunk} = + reader_funcs(false, Tab, Storage, calc_nokeys(Storage, Tab)), + send_more(Pid, 1, NewChunk, Init(), Tab, OldNode); + + {copier_done, Node} when Node == node(Pid)-> + verbose("Receiver of table ~p crashed on ~p (more)~n", [Tab, Node]), + throw(receiver_died) + end. + +reader_funcs(UseDetsChunk, Tab, Storage, KeysPerTransfer) -> + case UseDetsChunk of + false -> + {fun() -> mnesia_lib:db_init_chunk(Storage, Tab, KeysPerTransfer) end, + fun(Cont) -> mnesia_lib:db_chunk(Storage, Cont) end}; + true -> + {fun() -> dets_bchunk(Tab, start) end, + fun(Cont) -> dets_bchunk(Tab, Cont) end} + end. + +dets_bchunk(Tab, Chunk) -> %% Arrg + case dets:bchunk(Tab, Chunk) of + {Cont, Data} -> {Data, Cont}; + Else -> Else + end. + +send_packet(N, Pid, _Chunk, '$end_of_table', OldNode) -> + case OldNode of + true -> ignore; %% Old nodes can't handle the new no_more + false -> Pid ! {self(), no_more} + end, + N; +send_packet(N, Pid, Chunk, {[], Cont}, OldNode) -> + send_packet(N, Pid, Chunk, Chunk(Cont), OldNode); +send_packet(N, Pid, Chunk, {Recs, Cont}, OldNode) when N < ?MAX_NOPACKETS -> + case OldNode of + true -> Pid ! {self(), {more, [Recs]}}; %% Old need's wrapping list + false -> Pid ! {self(), {more, Recs}} + end, + send_packet(N+1, Pid, Chunk, Chunk(Cont), OldNode); +send_packet(_N, _Pid, _Chunk, DataState, _OldNode) -> + DataState. + +finish_copy(Pid, Tab, Storage, RemoteS) -> + RecNode = node(Pid), + DatBin = dat2bin(Tab, Storage, RemoteS), + Trans = + fun() -> + mnesia:read_lock_table(Tab), + A = val({Tab, access_mode}), + mnesia_controller:sync_and_block_table_whereabouts(Tab, RecNode, RemoteS, A), + cleanup_tab_copier(Pid, Storage, Tab), + mnesia_checkpoint:tm_add_copy(Tab, RecNode), + Pid ! {self(), {no_more, DatBin}}, + receive + {Pid, no_more} -> % Dont bother about the spurious 'more' message + no_more; + {copier_done, Node} when Node == node(Pid)-> + verbose("Tab receiver ~p crashed (more): ~p~n", [Tab, Node]), + receiver_died + end + end, + mnesia:transaction(Trans). + +cleanup_tab_copier(Pid, Storage, Tab) -> + mnesia_lib:db_fixtable(Storage, Tab, false), + mnesia_subscr:unsubscribe(Pid, {table, Tab}). + +dat2bin(Tab, ram_copies, ram_copies) -> + mnesia_lib:lock_table(Tab), + Res = file:read_file(mnesia_lib:tab2dcd(Tab)), + mnesia_lib:unlock_table(Tab), + case Res of + {ok, DatBin} -> DatBin; + _ -> nobin + end; +dat2bin(_Tab, _LocalS, _RemoteS) -> + nobin. + +handle_exit(Pid, Reason) when node(Pid) == node() -> + exit(Reason); +handle_exit(_Pid, _Reason) -> %% Not from our node, this will be handled by + ignore. %% mnesia_down soon. diff --git a/lib/mnesia/src/mnesia_locker.erl b/lib/mnesia/src/mnesia_locker.erl new file mode 100644 index 0000000000..cfa3f171b2 --- /dev/null +++ b/lib/mnesia/src/mnesia_locker.erl @@ -0,0 +1,1196 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 1996-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +%% +-module(mnesia_locker). + +-export([ + get_held_locks/0, + get_lock_queue/0, + global_lock/5, + ixrlock/5, + init/1, + mnesia_down/2, + release_tid/1, + async_release_tid/2, + send_release_tid/2, + receive_release_tid_acc/2, + rlock/3, + rlock_table/3, + rwlock/3, + sticky_rwlock/3, + start/0, + sticky_wlock/3, + sticky_wlock_table/3, + wlock/3, + wlock_no_exist/4, + wlock_table/3 + ]). + +%% sys callback functions +-export([system_continue/3, + system_terminate/4, + system_code_change/4 + ]). + +-include("mnesia.hrl"). +-import(mnesia_lib, [dbg_out/2, error/2, verbose/2]). + +-define(dbg(S,V), ok). +%-define(dbg(S,V), dbg_out("~p:~p: " ++ S, [?MODULE, ?LINE] ++ V)). + +-define(ALL, '______WHOLETABLE_____'). +-define(STICK, '______STICK_____'). +-define(GLOBAL, '______GLOBAL_____'). + +-record(state, {supervisor}). + +-record(queue, {oid, tid, op, pid, lucky}). + +%% mnesia_held_locks: contain {Oid, Op, Tid} entries (bag) +-define(match_oid_held_locks(Oid), {Oid, '_', '_'}). +%% mnesia_tid_locks: contain {Tid, Oid, Op} entries (bag) +-define(match_oid_tid_locks(Tid), {Tid, '_', '_'}). +%% mnesia_sticky_locks: contain {Oid, Node} entries and {Tab, Node} entries (set) +-define(match_oid_sticky_locks(Oid),{Oid, '_'}). +%% mnesia_lock_queue: contain {queue, Oid, Tid, Op, ReplyTo, WaitForTid} entries (bag) +-define(match_oid_lock_queue(Oid), #queue{oid=Oid, tid='_', op = '_', pid = '_', lucky = '_'}). +%% mnesia_lock_counter: {{write, Tab}, Number} && +%% {{read, Tab}, Number} entries (set) + +start() -> + mnesia_monitor:start_proc(?MODULE, ?MODULE, init, [self()]). + +init(Parent) -> + register(?MODULE, self()), + process_flag(trap_exit, true), + ?ets_new_table(mnesia_held_locks, [bag, private, named_table]), + ?ets_new_table(mnesia_tid_locks, [bag, private, named_table]), + ?ets_new_table(mnesia_sticky_locks, [set, private, named_table]), + ?ets_new_table(mnesia_lock_queue, [bag, private, named_table, {keypos, 2}]), + + proc_lib:init_ack(Parent, {ok, self()}), + case ?catch_val(pid_sort_order) of + r9b_plain -> put(pid_sort_order, r9b_plain); + standard -> put(pid_sort_order, standard); + _ -> ignore + end, + loop(#state{supervisor = Parent}). + +val(Var) -> + case ?catch_val(Var) of + {'EXIT', _ReASoN_} -> mnesia_lib:other_val(Var, _ReASoN_); + _VaLuE_ -> _VaLuE_ + end. + +reply(From, R) -> + From ! {?MODULE, node(), R}. + +l_request(Node, X, Store) -> + {?MODULE, Node} ! {self(), X}, + l_req_rec(Node, Store). + +l_req_rec(Node, Store) -> + ?ets_insert(Store, {nodes, Node}), + receive + {?MODULE, Node, Reply} -> + Reply; + {mnesia_down, Node} -> + {not_granted, {node_not_running, Node}} + end. + +release_tid(Tid) -> + ?MODULE ! {release_tid, Tid}. + +async_release_tid(Nodes, Tid) -> + rpc:abcast(Nodes, ?MODULE, {release_tid, Tid}). + +send_release_tid(Nodes, Tid) -> + rpc:abcast(Nodes, ?MODULE, {self(), {sync_release_tid, Tid}}). + +receive_release_tid_acc([Node | Nodes], Tid) -> + receive + {?MODULE, Node, {tid_released, Tid}} -> + receive_release_tid_acc(Nodes, Tid); + {mnesia_down, Node} -> + receive_release_tid_acc(Nodes, Tid) + end; +receive_release_tid_acc([], _Tid) -> + ok. + +loop(State) -> + receive + {From, {write, Tid, Oid}} -> + try_sticky_lock(Tid, write, From, Oid), + loop(State); + + %% If Key == ?ALL it's a request to lock the entire table + %% + + {From, {read, Tid, Oid}} -> + try_sticky_lock(Tid, read, From, Oid), + loop(State); + + %% Really do a read, but get hold of a write lock + %% used by mnesia:wread(Oid). + + {From, {read_write, Tid, Oid}} -> + try_sticky_lock(Tid, read_write, From, Oid), + loop(State); + + %% Tid has somehow terminated, clear up everything + %% and pass locks on to queued processes. + %% This is the purpose of the mnesia_tid_locks table + + {release_tid, Tid} -> + do_release_tid(Tid), + loop(State); + + %% stick lock, first tries this to the where_to_read Node + {From, {test_set_sticky, Tid, {Tab, _} = Oid, Lock}} -> + case ?ets_lookup(mnesia_sticky_locks, Tab) of + [] -> + reply(From, not_stuck), + loop(State); + [{_,Node}] when Node == node() -> + %% Lock is stuck here, see now if we can just set + %% a regular write lock + try_lock(Tid, Lock, From, Oid), + loop(State); + [{_,Node}] -> + reply(From, {stuck_elsewhere, Node}), + loop(State) + end; + + %% If test_set_sticky fails, we send this to all nodes + %% after aquiring a real write lock on Oid + + {stick, {Tab, _}, N} -> + ?ets_insert(mnesia_sticky_locks, {Tab, N}), + loop(State); + + %% The caller which sends this message, must have first + %% aquired a write lock on the entire table + {unstick, Tab} -> + ?ets_delete(mnesia_sticky_locks, Tab), + loop(State); + + {From, {ix_read, Tid, Tab, IxKey, Pos}} -> + case ?ets_lookup(mnesia_sticky_locks, Tab) of + [] -> + set_read_lock_on_all_keys(Tid,From,Tab,IxKey,Pos), + loop(State); + [{_,N}] when N == node() -> + set_read_lock_on_all_keys(Tid,From,Tab,IxKey,Pos), + loop(State); + [{_,N}] -> + Req = {From, {ix_read, Tid, Tab, IxKey, Pos}}, + From ! {?MODULE, node(), {switch, N, Req}}, + loop(State) + end; + + {From, {sync_release_tid, Tid}} -> + do_release_tid(Tid), + reply(From, {tid_released, Tid}), + loop(State); + + {release_remote_non_pending, Node, Pending} -> + release_remote_non_pending(Node, Pending), + mnesia_monitor:mnesia_down(?MODULE, Node), + loop(State); + + {'EXIT', Pid, _} when Pid == State#state.supervisor -> + do_stop(); + + {system, From, Msg} -> + verbose("~p got {system, ~p, ~p}~n", [?MODULE, From, Msg]), + Parent = State#state.supervisor, + sys:handle_system_msg(Msg, From, Parent, ?MODULE, [], State); + + {get_table, From, LockTable} -> + From ! {LockTable, ?ets_match_object(LockTable, '_')}, + loop(State); + + Msg -> + error("~p got unexpected message: ~p~n", [?MODULE, Msg]), + loop(State) + end. + +set_lock(Tid, Oid, Op) -> + ?dbg("Granted ~p ~p ~p~n", [Tid,Oid,Op]), + ?ets_insert(mnesia_held_locks, {Oid, Op, Tid}), + ?ets_insert(mnesia_tid_locks, {Tid, Oid, Op}). + +%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Acquire locks + +try_sticky_lock(Tid, Op, Pid, {Tab, _} = Oid) -> + case ?ets_lookup(mnesia_sticky_locks, Tab) of + [] -> + try_lock(Tid, Op, Pid, Oid); + [{_,N}] when N == node() -> + try_lock(Tid, Op, Pid, Oid); + [{_,N}] -> + Req = {Pid, {Op, Tid, Oid}}, + Pid ! {?MODULE, node(), {switch, N, Req}} + end. + +try_lock(Tid, read_write, Pid, Oid) -> + try_lock(Tid, read_write, read, write, Pid, Oid); +try_lock(Tid, Op, Pid, Oid) -> + try_lock(Tid, Op, Op, Op, Pid, Oid). + +try_lock(Tid, Op, SimpleOp, Lock, Pid, Oid) -> + case can_lock(Tid, Lock, Oid, {no, bad_luck}) of + yes -> + Reply = grant_lock(Tid, SimpleOp, Lock, Oid), + reply(Pid, Reply); + {no, Lucky} -> + C = #cyclic{op = SimpleOp, lock = Lock, oid = Oid, lucky = Lucky}, + ?dbg("Rejected ~p ~p ~p ~p ~n", [Tid, Oid, Lock, Lucky]), + reply(Pid, {not_granted, C}); + {queue, Lucky} -> + ?dbg("Queued ~p ~p ~p ~p ~n", [Tid, Oid, Lock, Lucky]), + %% Append to queue: Nice place for trace output + ?ets_insert(mnesia_lock_queue, + #queue{oid = Oid, tid = Tid, op = Op, + pid = Pid, lucky = Lucky}), + ?ets_insert(mnesia_tid_locks, {Tid, Oid, {queued, Op}}) + end. + +grant_lock(Tid, read, Lock, Oid = {Tab, Key}) + when Key /= ?ALL, Tab /= ?GLOBAL -> + case node(Tid#tid.pid) == node() of + true -> + set_lock(Tid, Oid, Lock), + {granted, lookup_in_client}; + false -> + try + Val = mnesia_lib:db_get(Tab, Key), %% lookup as well + set_lock(Tid, Oid, Lock), + {granted, Val} + catch _:_Reason -> + %% Table has been deleted from this node, + %% restart the transaction. + C = #cyclic{op = read, lock = Lock, oid = Oid, + lucky = nowhere}, + {not_granted, C} + end + end; +grant_lock(Tid, {ix_read,IxKey,Pos}, Lock, Oid = {Tab, _}) -> + try + Res = ix_read_res(Tab, IxKey,Pos), + set_lock(Tid, Oid, Lock), + {granted, Res, [?ALL]} + catch _:_ -> + {not_granted, {no_exists, Tab, {index, [Pos]}}} + end; +grant_lock(Tid, read, Lock, Oid) -> + set_lock(Tid, Oid, Lock), + {granted, ok}; +grant_lock(Tid, write, Lock, Oid) -> + set_lock(Tid, Oid, Lock), + granted. + +%% 1) Impose an ordering on all transactions favour old (low tid) transactions +%% newer (higher tid) transactions may never wait on older ones, +%% 2) When releasing the tids from the queue always begin with youngest (high tid) +%% because of 1) it will avoid the deadlocks. +%% 3) TabLocks is the problem :-) They should not starve and not deadlock +%% handle tablocks in queue as they had locks on unlocked records. + +can_lock(Tid, read, {Tab, Key}, AlreadyQ) when Key /= ?ALL -> + %% The key is bound, no need for the other BIF + Oid = {Tab, Key}, + ObjLocks = ?ets_match_object(mnesia_held_locks, {Oid, write, '_'}), + TabLocks = ?ets_match_object(mnesia_held_locks, {{Tab, ?ALL}, write, '_'}), + check_lock(Tid, Oid, ObjLocks, TabLocks, yes, AlreadyQ, read); + +can_lock(Tid, read, Oid, AlreadyQ) -> % Whole tab + Tab = element(1, Oid), + ObjLocks = ?ets_match_object(mnesia_held_locks, {{Tab, '_'}, write, '_'}), + check_lock(Tid, Oid, ObjLocks, [], yes, AlreadyQ, read); + +can_lock(Tid, write, {Tab, Key}, AlreadyQ) when Key /= ?ALL -> + Oid = {Tab, Key}, + ObjLocks = ?ets_lookup(mnesia_held_locks, Oid), + TabLocks = ?ets_lookup(mnesia_held_locks, {Tab, ?ALL}), + check_lock(Tid, Oid, ObjLocks, TabLocks, yes, AlreadyQ, write); + +can_lock(Tid, write, Oid, AlreadyQ) -> % Whole tab + Tab = element(1, Oid), + ObjLocks = ?ets_match_object(mnesia_held_locks, ?match_oid_held_locks({Tab, '_'})), + check_lock(Tid, Oid, ObjLocks, [], yes, AlreadyQ, write). + +%% Check held locks for conflicting locks +check_lock(Tid, Oid, [Lock | Locks], TabLocks, X, AlreadyQ, Type) -> + case element(3, Lock) of + Tid -> + check_lock(Tid, Oid, Locks, TabLocks, X, AlreadyQ, Type); + WaitForTid -> + Queue = allowed_to_be_queued(WaitForTid,Tid), + if Queue == true -> + check_lock(Tid, Oid, Locks, TabLocks, {queue, WaitForTid}, AlreadyQ, Type); + Tid#tid.pid == WaitForTid#tid.pid -> + dbg_out("Spurious lock conflict ~w ~w: ~w -> ~w~n", + [Oid, Lock, Tid, WaitForTid]), + %% Test.. + {Tab, _Key} = Oid, + HaveQ = (ets:lookup(mnesia_lock_queue, Oid) /= []) + orelse (ets:lookup(mnesia_lock_queue,{Tab,?ALL}) /= []), + if + HaveQ -> + {no, WaitForTid}; + true -> + check_lock(Tid,Oid,Locks,TabLocks,{queue,WaitForTid},AlreadyQ,Type) + end; + %%{no, WaitForTid}; Safe solution + true -> + {no, WaitForTid} + end + end; + +check_lock(_, _, [], [], X, {queue, bad_luck}, _) -> + X; %% The queue should be correct already no need to check it again + +check_lock(_, _, [], [], X = {queue, _Tid}, _AlreadyQ, _) -> + X; + +check_lock(Tid, Oid, [], [], X, AlreadyQ, Type) -> + {Tab, Key} = Oid, + if + Type == write -> + check_queue(Tid, Tab, X, AlreadyQ); + Key == ?ALL -> + %% hmm should be solvable by a clever select expr but not today... + check_queue(Tid, Tab, X, AlreadyQ); + true -> + %% If there is a queue on that object, read_lock shouldn't be granted + ObjLocks = ets:lookup(mnesia_lock_queue, Oid), + case max(ObjLocks) of + empty -> + check_queue(Tid, Tab, X, AlreadyQ); + ObjL -> + case allowed_to_be_queued(ObjL,Tid) of + false -> + %% Starvation Preemption (write waits for read) + {no, ObjL}; + true -> + check_queue(Tid, Tab, {queue, ObjL}, AlreadyQ) + end + end + end; + +check_lock(Tid, Oid, [], TabLocks, X, AlreadyQ, Type) -> + check_lock(Tid, Oid, TabLocks, [], X, AlreadyQ, Type). + +%% True if WaitForTid > Tid -> % Important order +allowed_to_be_queued(WaitForTid, Tid) -> + case get(pid_sort_order) of + undefined -> WaitForTid > Tid; + r9b_plain -> + cmp_tid(true, WaitForTid, Tid) =:= 1; + standard -> + cmp_tid(false, WaitForTid, Tid) =:= 1 + end. + +%% Check queue for conflicting locks +%% Assume that all queued locks belongs to other tid's + +check_queue(Tid, Tab, X, AlreadyQ) -> + TabLocks = ets:lookup(mnesia_lock_queue, {Tab,?ALL}), + Greatest = max(TabLocks), + case Greatest of + empty -> X; + Tid -> X; + WaitForTid -> + case allowed_to_be_queued(WaitForTid,Tid) of + true -> + {queue, WaitForTid}; + false when AlreadyQ =:= {no, bad_luck} -> + {no, WaitForTid} + end + end. + +sort_queue(QL) -> + case get(pid_sort_order) of + undefined -> + lists:reverse(lists:keysort(#queue.tid, QL)); + r9b_plain -> + lists:sort(fun(#queue{tid=X},#queue{tid=Y}) -> + cmp_tid(true, X, Y) == 1 + end, QL); + standard -> + lists:sort(fun(#queue{tid=X},#queue{tid=Y}) -> + cmp_tid(false, X, Y) == 1 + end, QL) + end. + +max([]) -> empty; +max([#queue{tid=Max}]) -> Max; +max(L) -> + [#queue{tid=Max}|_] = sort_queue(L), + Max. + +set_read_lock_on_all_keys(Tid, From, Tab, IxKey, Pos) -> + Oid = {Tab,?ALL}, + Op = {ix_read,IxKey, Pos}, + Lock = read, + case can_lock(Tid, Lock, Oid, {no, bad_luck}) of + yes -> + Reply = grant_lock(Tid, Op, Lock, Oid), + reply(From, Reply); + {no, Lucky} -> + C = #cyclic{op = Op, lock = Lock, oid = Oid, lucky = Lucky}, + ?dbg("Rejected ~p ~p ~p ~p ~n", [Tid, Oid, Lock, Lucky]), + reply(From, {not_granted, C}); + {queue, Lucky} -> + ?dbg("Queued ~p ~p ~p ~p ~n", [Tid, Oid, Lock, Lucky]), + %% Append to queue: Nice place for trace output + ?ets_insert(mnesia_lock_queue, + #queue{oid = Oid, tid = Tid, op = Op, + pid = From, lucky = Lucky}), + ?ets_insert(mnesia_tid_locks, {Tid, Oid, {queued, Op}}) + end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Release of locks + +%% Release remote non-pending nodes +release_remote_non_pending(Node, Pending) -> + %% Clear the mnesia_sticky_locks table first, to avoid + %% unnecessary requests to the failing node + ?ets_match_delete(mnesia_sticky_locks, {'_' , Node}), + + %% Then we have to release all locks held by processes + %% running at the failed node and also simply remove all + %% queue'd requests back to the failed node + + AllTids = ?ets_match(mnesia_tid_locks, {'$1', '_', '_'}), + Tids = [T || [T] <- AllTids, Node == node(T#tid.pid), not lists:member(T, Pending)], + do_release_tids(Tids). + +do_release_tids([Tid | Tids]) -> + do_release_tid(Tid), + do_release_tids(Tids); +do_release_tids([]) -> + ok. + +do_release_tid(Tid) -> + Locks = ?ets_lookup(mnesia_tid_locks, Tid), + ?dbg("Release ~p ~p ~n", [Tid, Locks]), + ?ets_delete(mnesia_tid_locks, Tid), + release_locks(Locks), + %% Removed queued locks which has had locks + UniqueLocks = keyunique(lists:sort(Locks),[]), + rearrange_queue(UniqueLocks). + +keyunique([{_Tid, Oid, _Op}|R], Acc = [{_, Oid, _}|_]) -> + keyunique(R, Acc); +keyunique([H|R], Acc) -> + keyunique(R, [H|Acc]); +keyunique([], Acc) -> + Acc. + +release_locks([Lock | Locks]) -> + release_lock(Lock), + release_locks(Locks); +release_locks([]) -> + ok. + +release_lock({Tid, Oid, {queued, _}}) -> + ?ets_match_delete(mnesia_lock_queue, #queue{oid=Oid, tid = Tid, op = '_', + pid = '_', lucky = '_'}); +release_lock({Tid, Oid, Op}) -> + if + Op == write -> + ?ets_delete(mnesia_held_locks, Oid); + Op == read -> + ets:delete_object(mnesia_held_locks, {Oid, Op, Tid}) + end. + +rearrange_queue([{_Tid, {Tab, Key}, _} | Locks]) -> + if + Key /= ?ALL-> + Queue = + ets:lookup(mnesia_lock_queue, {Tab, ?ALL}) ++ + ets:lookup(mnesia_lock_queue, {Tab, Key}), + case Queue of + [] -> + ok; + _ -> + Sorted = sort_queue(Queue), + try_waiters_obj(Sorted) + end; + true -> + Pat = ?match_oid_lock_queue({Tab, '_'}), + Queue = ?ets_match_object(mnesia_lock_queue, Pat), + Sorted = sort_queue(Queue), + try_waiters_tab(Sorted) + end, + ?dbg("RearrQ ~p~n", [Queue]), + rearrange_queue(Locks); +rearrange_queue([]) -> + ok. + +try_waiters_obj([W | Waiters]) -> + case try_waiter(W) of + queued -> + no; + _ -> + try_waiters_obj(Waiters) + end; +try_waiters_obj([]) -> + ok. + +try_waiters_tab([W | Waiters]) -> + case W#queue.oid of + {_Tab, ?ALL} -> + case try_waiter(W) of + queued -> + no; + _ -> + try_waiters_tab(Waiters) + end; + Oid -> + case try_waiter(W) of + queued -> + Rest = key_delete_all(Oid, #queue.oid, Waiters), + try_waiters_tab(Rest); + _ -> + try_waiters_tab(Waiters) + end + end; +try_waiters_tab([]) -> + ok. + +try_waiter({queue, Oid, Tid, read_write, ReplyTo, _}) -> + try_waiter(Oid, read_write, read, write, ReplyTo, Tid); +try_waiter({queue, Oid, Tid, IXR = {ix_read,_,_}, ReplyTo, _}) -> + try_waiter(Oid, IXR, IXR, read, ReplyTo, Tid); +try_waiter({queue, Oid, Tid, Op, ReplyTo, _}) -> + try_waiter(Oid, Op, Op, Op, ReplyTo, Tid). + +try_waiter(Oid, Op, SimpleOp, Lock, ReplyTo, Tid) -> + case can_lock(Tid, Lock, Oid, {queue, bad_luck}) of + yes -> + %% Delete from queue: Nice place for trace output + ?ets_match_delete(mnesia_lock_queue, + #queue{oid=Oid, tid = Tid, op = Op, + pid = ReplyTo, lucky = '_'}), + Reply = grant_lock(Tid, SimpleOp, Lock, Oid), + reply(ReplyTo,Reply), + locked; + {queue, _Why} -> + ?dbg("Keep ~p ~p ~p ~p~n", [Tid, Oid, Lock, _Why]), + queued; % Keep waiter in queue + {no, Lucky} -> + C = #cyclic{op = SimpleOp, lock = Lock, oid = Oid, lucky = Lucky}, + verbose("** WARNING ** Restarted transaction, possible deadlock in lock queue ~w: cyclic = ~w~n", + [Tid, C]), + ?ets_match_delete(mnesia_lock_queue, + #queue{oid=Oid, tid = Tid, op = Op, + pid = ReplyTo, lucky = '_'}), + Reply = {not_granted, C}, + reply(ReplyTo,Reply), + removed + end. + +key_delete_all(Key, Pos, TupleList) -> + key_delete_all(Key, Pos, TupleList, []). +key_delete_all(Key, Pos, [H|T], Ack) when element(Pos, H) == Key -> + key_delete_all(Key, Pos, T, Ack); +key_delete_all(Key, Pos, [H|T], Ack) -> + key_delete_all(Key, Pos, T, [H|Ack]); +key_delete_all(_, _, [], Ack) -> + lists:reverse(Ack). + +ix_read_res(Tab,IxKey,Pos) -> + Index = mnesia_index:get_index_table(Tab, Pos), + Rks = mnesia_lib:elems(2,mnesia_index:db_get(Index, IxKey)), + lists:append(lists:map(fun(Real) -> mnesia_lib:db_get(Tab, Real) end, Rks)). + +%% ********************* end server code ******************** +%% The following code executes at the client side of a transactions + +mnesia_down(N, Pending) -> + case whereis(?MODULE) of + undefined -> + %% Takes care of mnesia_down's in early startup + mnesia_monitor:mnesia_down(?MODULE, N); + Pid -> + %% Syncronously call needed in order to avoid + %% race with mnesia_tm's coordinator processes + %% that may restart and acquire new locks. + %% mnesia_monitor ensures the sync. + Pid ! {release_remote_non_pending, N, Pending} + end. + +%% Aquire a write lock, but do a read, used by +%% mnesia:wread/1 + +rwlock(Tid, Store, Oid) -> + {Tab, Key} = Oid, + case val({Tab, where_to_read}) of + nowhere -> + mnesia:abort({no_exists, Tab}); + Node -> + Lock = write, + case need_lock(Store, Tab, Key, Lock) of + yes -> + Ns = w_nodes(Tab), + Res = get_rwlocks_on_nodes(Ns, rwlock, Node, Store, Tid, Oid), + ?ets_insert(Store, {{locks, Tab, Key}, Lock}), + Res; + no -> + if + Key == ?ALL -> + w_nodes(Tab); + Tab == ?GLOBAL -> + w_nodes(Tab); + true -> + dirty_rpc(Node, Tab, Key, Lock) + end + end + end. + +%% Return a list of nodes or abort transaction +%% WE also insert any additional where_to_write nodes +%% in the local store under the key == nodes + +w_nodes(Tab) -> + Nodes = ?catch_val({Tab, where_to_write}), + case Nodes of + [_ | _] -> Nodes; + _ -> mnesia:abort({no_exists, Tab}) + end. + +%% aquire a sticky wlock, a sticky lock is a lock +%% which remains at this node after the termination of the +%% transaction. + +sticky_wlock(Tid, Store, Oid) -> + sticky_lock(Tid, Store, Oid, write). + +sticky_rwlock(Tid, Store, Oid) -> + sticky_lock(Tid, Store, Oid, read_write). + +sticky_lock(Tid, Store, {Tab, Key} = Oid, Lock) -> + N = val({Tab, where_to_read}), + if + node() == N -> + case need_lock(Store, Tab, Key, write) of + yes -> + do_sticky_lock(Tid, Store, Oid, Lock); + no -> + dirty_sticky_lock(Tab, Key, [N], Lock) + end; + true -> + mnesia:abort({not_local, Tab}) + end. + +do_sticky_lock(Tid, Store, {Tab, Key} = Oid, Lock) -> + ?MODULE ! {self(), {test_set_sticky, Tid, Oid, Lock}}, + N = node(), + receive + {?MODULE, N, granted} -> + ?ets_insert(Store, {{locks, Tab, Key}, write}), + [?ets_insert(Store, {nodes, Node}) || Node <- w_nodes(Tab)], + granted; + {?MODULE, N, {granted, Val}} -> %% for rwlocks + case opt_lookup_in_client(Val, Oid, write) of + C = #cyclic{} -> + exit({aborted, C}); + Val2 -> + ?ets_insert(Store, {{locks, Tab, Key}, write}), + [?ets_insert(Store, {nodes, Node}) || Node <- w_nodes(Tab)], + Val2 + end; + {?MODULE, N, {not_granted, Reason}} -> + exit({aborted, Reason}); + {?MODULE, N, not_stuck} -> + not_stuck(Tid, Store, Tab, Key, Oid, Lock, N), + dirty_sticky_lock(Tab, Key, [N], Lock); + {mnesia_down, Node} -> + EMsg = {aborted, {node_not_running, Node}}, + flush_remaining([N], Node, EMsg); + {?MODULE, N, {stuck_elsewhere, _N2}} -> + stuck_elsewhere(Tid, Store, Tab, Key, Oid, Lock), + dirty_sticky_lock(Tab, Key, [N], Lock) + end. + +not_stuck(Tid, Store, Tab, _Key, Oid, _Lock, N) -> + rlock(Tid, Store, {Tab, ?ALL}), %% needed? + wlock(Tid, Store, Oid), %% perfect sync + wlock(Tid, Store, {Tab, ?STICK}), %% max one sticker/table + Ns = val({Tab, where_to_write}), + rpc:abcast(Ns, ?MODULE, {stick, Oid, N}). + +stuck_elsewhere(Tid, Store, Tab, _Key, Oid, _Lock) -> + rlock(Tid, Store, {Tab, ?ALL}), %% needed? + wlock(Tid, Store, Oid), %% perfect sync + wlock(Tid, Store, {Tab, ?STICK}), %% max one sticker/table + Ns = val({Tab, where_to_write}), + rpc:abcast(Ns, ?MODULE, {unstick, Tab}). + +dirty_sticky_lock(Tab, Key, Nodes, Lock) -> + if + Lock == read_write -> + mnesia_lib:db_get(Tab, Key); + Key == ?ALL -> + Nodes; + Tab == ?GLOBAL -> + Nodes; + true -> + ok + end. + +sticky_wlock_table(Tid, Store, Tab) -> + sticky_lock(Tid, Store, {Tab, ?ALL}, write). + +%% aquire a wlock on Oid +%% We store a {Tabname, write, Tid} in all locktables +%% on all nodes containing a copy of Tabname +%% We also store an item {{locks, Tab, Key}, write} in the +%% local store when we have aquired the lock. +%% +wlock(Tid, Store, Oid) -> + {Tab, Key} = Oid, + case need_lock(Store, Tab, Key, write) of + yes -> + Ns = w_nodes(Tab), + Op = {self(), {write, Tid, Oid}}, + ?ets_insert(Store, {{locks, Tab, Key}, write}), + get_wlocks_on_nodes(Ns, Ns, Store, Op, Oid); + no when Key /= ?ALL, Tab /= ?GLOBAL -> + []; + no -> + w_nodes(Tab) + end. + +wlock_table(Tid, Store, Tab) -> + wlock(Tid, Store, {Tab, ?ALL}). + +%% Write lock even if the table does not exist + +wlock_no_exist(Tid, Store, Tab, Ns) -> + Oid = {Tab, ?ALL}, + Op = {self(), {write, Tid, Oid}}, + get_wlocks_on_nodes(Ns, Ns, Store, Op, Oid). + +need_lock(Store, Tab, Key, LockPattern) -> + TabL = ?ets_match_object(Store, {{locks, Tab, ?ALL}, LockPattern}), + if + TabL == [] -> + KeyL = ?ets_match_object(Store, {{locks, Tab, Key}, LockPattern}), + if + KeyL == [] -> + yes; + true -> + no + end; + true -> + no + end. + +add_debug(Nodes) -> % Use process dictionary for debug info + put(mnesia_wlock_nodes, Nodes). + +del_debug() -> + erase(mnesia_wlock_nodes). + +%% We first send lock request to the local node if it is part of the lockers +%% then the first sorted node then to the rest of the lockmanagers on all +%% nodes holding a copy of the table + +get_wlocks_on_nodes([Node | Tail], Orig, Store, Request, Oid) -> + {?MODULE, Node} ! Request, + ?ets_insert(Store, {nodes, Node}), + receive_wlocks([Node], undefined, Store, Oid), + case node() of + Node -> %% Local done try one more + get_wlocks_on_nodes(Tail, Orig, Store, Request, Oid); + _ -> %% The first succeded cont with the rest + get_wlocks_on_nodes(Tail, Store, Request), + receive_wlocks(Tail, Orig, Store, Oid) + end; +get_wlocks_on_nodes([], Orig, _Store, _Request, _Oid) -> + Orig. + +get_wlocks_on_nodes([Node | Tail], Store, Request) -> + {?MODULE, Node} ! Request, + ?ets_insert(Store,{nodes, Node}), + get_wlocks_on_nodes(Tail, Store, Request); +get_wlocks_on_nodes([], _, _) -> + ok. + +get_rwlocks_on_nodes([ReadNode|Tail], _Res, ReadNode, Store, Tid, Oid) -> + Op = {self(), {read_write, Tid, Oid}}, + {?MODULE, ReadNode} ! Op, + ?ets_insert(Store, {nodes, ReadNode}), + Res = receive_wlocks([ReadNode], undefined, Store, Oid), + case node() of + ReadNode -> + get_rwlocks_on_nodes(Tail, Res, ReadNode, Store, Tid, Oid); + _ -> + get_wlocks_on_nodes(Tail, Store, {self(), {write, Tid, Oid}}), + receive_wlocks(Tail, Res, Store, Oid) + end; +get_rwlocks_on_nodes([Node | Tail], Res, ReadNode, Store, Tid, Oid) -> + Op = {self(), {write, Tid, Oid}}, + {?MODULE, Node} ! Op, + ?ets_insert(Store, {nodes, Node}), + receive_wlocks([Node], undefined, Store, Oid), + if node() == Node -> + get_rwlocks_on_nodes(Tail, Res, ReadNode, Store, Tid, Oid); + Res == rwlock -> %% Hmm + Rest = lists:delete(ReadNode, Tail), + Op2 = {self(), {read_write, Tid, Oid}}, + {?MODULE, ReadNode} ! Op2, + ?ets_insert(Store, {nodes, ReadNode}), + get_wlocks_on_nodes(Rest, Store, {self(), {write, Tid, Oid}}), + receive_wlocks([ReadNode|Rest], undefined, Store, Oid); + true -> + get_wlocks_on_nodes(Tail, Store, {self(), {write, Tid, Oid}}), + receive_wlocks(Tail, Res, Store, Oid) + end; +get_rwlocks_on_nodes([],Res,_,_,_,_) -> + Res. + +receive_wlocks([], Res, _Store, _Oid) -> + del_debug(), + Res; +receive_wlocks(Nodes = [This|Ns], Res, Store, Oid) -> + add_debug(Nodes), + receive + {?MODULE, Node, granted} -> + receive_wlocks(lists:delete(Node,Nodes), Res, Store, Oid); + {?MODULE, Node, {granted, Val}} -> %% for rwlocks + case opt_lookup_in_client(Val, Oid, write) of + C = #cyclic{} -> + flush_remaining(Nodes, Node, {aborted, C}); + Val2 -> + receive_wlocks(lists:delete(Node,Nodes), Val2, Store, Oid) + end; + {?MODULE, Node, {not_granted, Reason}} -> + Reason1 = {aborted, Reason}, + flush_remaining(Nodes,Node,Reason1); + {?MODULE, Node, {switch, Sticky, _Req}} -> %% for rwlocks + Tail = lists:delete(Node,Nodes), + Nonstuck = lists:delete(Sticky,Tail), + [?ets_insert(Store, {nodes, NSNode}) || NSNode <- Nonstuck], + case lists:member(Sticky,Tail) of + true -> + sticky_flush(Nonstuck,Store), + receive_wlocks([Sticky], Res, Store, Oid); + false -> + sticky_flush(Nonstuck,Store), + Res + end; + {mnesia_down, This} -> % Only look for down from Nodes in list + Reason1 = {aborted, {node_not_running, This}}, + flush_remaining(Ns, This, Reason1) + end. + +sticky_flush([], _) -> + del_debug(), + ok; +sticky_flush(Ns=[Node | Tail], Store) -> + add_debug(Ns), + receive + {?MODULE, Node, _} -> + sticky_flush(Tail, Store); + {mnesia_down, Node} -> + Reason1 = {aborted, {node_not_running, Node}}, + flush_remaining(Tail, Node, Reason1) + end. + +flush_remaining([], _SkipNode, Res) -> + del_debug(), + exit(Res); +flush_remaining([SkipNode | Tail ], SkipNode, Res) -> + flush_remaining(Tail, SkipNode, Res); +flush_remaining(Ns=[Node | Tail], SkipNode, Res) -> + add_debug(Ns), + receive + {?MODULE, Node, _} -> + flush_remaining(Tail, SkipNode, Res); + {mnesia_down, Node} -> + flush_remaining(Tail, SkipNode, {aborted, {node_not_running, Node}}) + end. + +opt_lookup_in_client(lookup_in_client, Oid, Lock) -> + {Tab, Key} = Oid, + case catch mnesia_lib:db_get(Tab, Key) of + {'EXIT', _} -> + %% Table has been deleted from this node, + %% restart the transaction. + #cyclic{op = read, lock = Lock, oid = Oid, lucky = nowhere}; + Val -> + Val + end; +opt_lookup_in_client(Val, _Oid, _Lock) -> + Val. + +return_granted_or_nodes({_, ?ALL} , Nodes) -> Nodes; +return_granted_or_nodes({?GLOBAL, _}, Nodes) -> Nodes; +return_granted_or_nodes(_ , _Nodes) -> granted. + +%% We store a {Tab, read, From} item in the +%% locks table on the node where we actually do pick up the object +%% and we also store an item {lock, Oid, read} in our local store +%% so that we can release any locks we hold when we commit. +%% This function not only aquires a read lock, but also reads the object + +%% Oid's are always {Tab, Key} tuples +rlock(Tid, Store, Oid) -> + {Tab, Key} = Oid, + case val({Tab, where_to_read}) of + nowhere -> + mnesia:abort({no_exists, Tab}); + Node -> + case need_lock(Store, Tab, Key, '_') of + yes -> + R = l_request(Node, {read, Tid, Oid}, Store), + rlock_get_reply(Node, Store, Oid, R); + no -> + if + Key == ?ALL -> + [Node]; + Tab == ?GLOBAL -> + [Node]; + true -> + dirty_rpc(Node, Tab, Key, read) + end + end + end. + +dirty_rpc(nowhere, Tab, Key, _Lock) -> + mnesia:abort({no_exists, {Tab, Key}}); +dirty_rpc(Node, _Tab, ?ALL, _Lock) -> + [Node]; +dirty_rpc(Node, ?GLOBAL, _Key, _Lock) -> + [Node]; +dirty_rpc(Node, Tab, Key, Lock) -> + Args = [Tab, Key], + case rpc:call(Node, mnesia_lib, db_get, Args) of + {badrpc, Reason} -> + case val({Tab, where_to_read}) of + Node -> + ErrorTag = mnesia_lib:dirty_rpc_error_tag(Reason), + mnesia:abort({ErrorTag, Args}); + _NewNode -> + %% Table has been deleted from the node, + %% restart the transaction. + C = #cyclic{op = read, lock = Lock, oid = {Tab, Key}, lucky = nowhere}, + exit({aborted, C}) + end; + Other -> + Other + end. + +rlock_get_reply(Node, Store, Oid, {granted, V}) -> + {Tab, Key} = Oid, + ?ets_insert(Store, {{locks, Tab, Key}, read}), + ?ets_insert(Store, {nodes, Node}), + case opt_lookup_in_client(V, Oid, read) of + C = #cyclic{} -> + mnesia:abort(C); + Val -> + Val + end; +rlock_get_reply(Node, Store, Oid, granted) -> + {Tab, Key} = Oid, + ?ets_insert(Store, {{locks, Tab, Key}, read}), + ?ets_insert(Store, {nodes, Node}), + return_granted_or_nodes(Oid, [Node]); +rlock_get_reply(Node, Store, Tab, {granted, V, RealKeys}) -> + %% Kept for backwards compatibility, keep until no old nodes + %% are available + L = fun(K) -> ?ets_insert(Store, {{locks, Tab, K}, read}) end, + lists:foreach(L, RealKeys), + ?ets_insert(Store, {nodes, Node}), + V; +rlock_get_reply(_Node, _Store, _Oid, {not_granted, Reason}) -> + exit({aborted, Reason}); + +rlock_get_reply(_Node, Store, Oid, {switch, N2, Req}) -> + ?ets_insert(Store, {nodes, N2}), + {?MODULE, N2} ! Req, + rlock_get_reply(N2, Store, Oid, l_req_rec(N2, Store)). + +rlock_table(Tid, Store, Tab) -> + rlock(Tid, Store, {Tab, ?ALL}). + +ixrlock(Tid, Store, Tab, IxKey, Pos) -> + case val({Tab, where_to_read}) of + nowhere -> + mnesia:abort({no_exists, Tab}); + Node -> + %%% Old code + %% R = l_request(Node, {ix_read, Tid, Tab, IxKey, Pos}, Store), + %% rlock_get_reply(Node, Store, Tab, R) + + case need_lock(Store, Tab, ?ALL, read) of + no when Node =:= node() -> + ix_read_res(Tab,IxKey,Pos); + _ -> %% yes or need to get the result from other node + R = l_request(Node, {ix_read, Tid, Tab, IxKey, Pos}, Store), + rlock_get_reply(Node, Store, Tab, R) + end + end. + +%% Grabs the locks or exits +global_lock(Tid, Store, Item, write, Ns) -> + Oid = {?GLOBAL, Item}, + Op = {self(), {write, Tid, Oid}}, + get_wlocks_on_nodes(Ns, Ns, Store, Op, Oid); +global_lock(Tid, Store, Item, read, Ns) -> + Oid = {?GLOBAL, Item}, + send_requests(Ns, {read, Tid, Oid}), + rec_requests(Ns, Oid, Store), + Ns. + +send_requests([Node | Nodes], X) -> + {?MODULE, Node} ! {self(), X}, + send_requests(Nodes, X); +send_requests([], _X) -> + ok. + +rec_requests([Node | Nodes], Oid, Store) -> + Res = l_req_rec(Node, Store), + case catch rlock_get_reply(Node, Store, Oid, Res) of + {'EXIT', Reason} -> + flush_remaining(Nodes, Node, Reason); + _ -> + rec_requests(Nodes, Oid, Store) + end; +rec_requests([], _Oid, _Store) -> + ok. + +get_held_locks() -> + ?MODULE ! {get_table, self(), mnesia_held_locks}, + receive {mnesia_held_locks, Locks} -> Locks end. + +get_lock_queue() -> + ?MODULE ! {get_table, self(), mnesia_lock_queue}, + Q = receive {mnesia_lock_queue, Locks} -> Locks end, + [{Oid, Op, Pid, Tid, WFT} || {queue, Oid, Tid, Op, Pid, WFT} <- Q]. + +do_stop() -> + exit(shutdown). + +%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% System upgrade + +system_continue(_Parent, _Debug, State) -> + loop(State). + +system_terminate(_Reason, _Parent, _Debug, _State) -> + do_stop(). + +system_code_change(State, _Module, _OldVsn, _Extra) -> + {ok, State}. + + +%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% AXD301 patch sort pids according to R9B sort order +%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%% Om R9B == true, g�rs j�mf�relsen som i R9B plain. +%% Om R9B == false, g�rs j�mf�relsen som i alla andra releaser. +%% cmp_tid(T1, T2) returnerar -1 om T1 < T2, 0 om T1 = T2 och 1 om T1 > T2. + +-define(VERSION_MAGIC, 131). +-define(ATOM_EXT, 100). +-define(PID_EXT, 103). + +-record(pid_info, {serial, number, nodename, creation}). + +cmp_tid(R9B, + #tid{} = T, + #tid{} = T) when R9B == true; R9B == false -> + 0; +cmp_tid(R9B, + #tid{counter = C, pid = Pid1}, + #tid{counter = C, pid = Pid2}) when R9B == true; R9B == false -> + cmp_pid_info(R9B, pid_to_pid_info(Pid1), pid_to_pid_info(Pid2)); +cmp_tid(R9B, + #tid{counter = C1}, + #tid{counter = C2}) when R9B == true; R9B == false -> + cmp(C1, C2). + +cmp_pid_info(_, #pid_info{} = PI, #pid_info{} = PI) -> + 0; +cmp_pid_info(false, + #pid_info{serial = S, number = N, nodename = NN, creation = C1}, + #pid_info{serial = S, number = N, nodename = NN, creation = C2}) -> + cmp(C1, C2); +cmp_pid_info(false, + #pid_info{serial = S, number = N, nodename = NN1}, + #pid_info{serial = S, number = N, nodename = NN2}) -> + cmp(NN1, NN2); +cmp_pid_info(false, + #pid_info{serial = S, number = N1}, + #pid_info{serial = S, number = N2}) -> + cmp(N1, N2); +cmp_pid_info(false, #pid_info{serial = S1}, #pid_info{serial = S2}) -> + cmp(S1, S2); +cmp_pid_info(true, + #pid_info{nodename = NN, creation = C, serial = S, number = N1}, + #pid_info{nodename = NN, creation = C, serial = S, number = N2}) -> + cmp(N1, N2); +cmp_pid_info(true, + #pid_info{nodename = NN, creation = C, serial = S1}, + #pid_info{nodename = NN, creation = C, serial = S2}) -> + cmp(S1, S2); +cmp_pid_info(true, + #pid_info{nodename = NN, creation = C1}, + #pid_info{nodename = NN, creation = C2}) -> + cmp(C1, C2); +cmp_pid_info(true, #pid_info{nodename = NN1}, #pid_info{nodename = NN2}) -> + cmp(NN1, NN2). + +cmp(X, X) -> 0; +cmp(X1, X2) when X1 < X2 -> -1; +cmp(_X1, _X2) -> 1. + +pid_to_pid_info(Pid) when is_pid(Pid) -> + [?VERSION_MAGIC, ?PID_EXT, ?ATOM_EXT, NNL1, NNL0 | Rest] + = binary_to_list(term_to_binary(Pid)), + [N3, N2, N1, N0, S3, S2, S1, S0, Creation] = drop(bytes2int(NNL1, NNL0), + Rest), + #pid_info{serial = bytes2int(S3, S2, S1, S0), + number = bytes2int(N3, N2, N1, N0), + nodename = node(Pid), + creation = Creation}. + +drop(0, L) -> L; +drop(N, [_|L]) when is_integer(N), N > 0 -> drop(N-1, L); +drop(N, []) when is_integer(N), N > 0 -> []. + +bytes2int(N1, N0) when 0 =< N1, N1 =< 255, + 0 =< N0, N0 =< 255 -> + (N1 bsl 8) bor N0. +bytes2int(N3, N2, N1, N0) when 0 =< N3, N3 =< 255, + 0 =< N2, N2 =< 255, + 0 =< N1, N1 =< 255, + 0 =< N0, N0 =< 255 -> + (N3 bsl 24) bor (N2 bsl 16) bor (N1 bsl 8) bor N0. + diff --git a/lib/mnesia/src/mnesia_log.erl b/lib/mnesia/src/mnesia_log.erl new file mode 100644 index 0000000000..00ec4740ee --- /dev/null +++ b/lib/mnesia/src/mnesia_log.erl @@ -0,0 +1,1025 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 1996-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% This module administers three kinds of log files: +%% +%% 1 The transaction log +%% mnesia_tm appends to the log (via mnesia_log) at the +%% end of each transaction (or dirty write) and +%% mnesia_dumper reads the log and performs the ops in +%% the dat files. The dump_log is done at startup and +%% at intervals controlled by the user. +%% +%% 2 The mnesia_down log +%% mnesia_tm appends to the log (via mnesia_log) when it +%% realizes that mnesia goes up or down on another node. +%% mnesia_init reads the log (via mnesia_log) at startup. +%% +%% 3 The backup log +%% mnesia_schema produces one tiny log when the schema is +%% initially created. mnesia_schema also reads the log +%% when the user wants tables (possibly incl the schema) +%% to be restored. mnesia_log appends to the log when the +%% user wants to produce a real backup. +%% +%% The actual access to the backup media is performed via the +%% mnesia_backup module for both read and write. mnesia_backup +%% uses the disk_log (*), BUT the user may write an own module +%% with the same interface as mnesia_backup and configure +%% Mnesia so the alternate module performs the actual accesses +%% to the backup media. This means that the user may put the +%% backup on medias that Mnesia does not know about possibly on +%% hosts where Erlang is not running. +%% +%% All these logs have to some extent a common structure. +%% They are all using the disk_log module (*) for the basic +%% file structure. The disk_log has a repair feature that +%% can be used to skip erroneous log records if one comes to +%% the conclusion that it is more important to reuse some +%% of the log records than the risque of obtaining inconsistent +%% data. If the data becomes inconsistent it is solely up to the +%% application to make it consistent again. The automatic +%% reparation of the disk_log is very powerful, but use it +%% with extreme care. +%% +%% First in all Mnesia's log file is a mnesia log header. +%% It contains a list with a log_header record as single +%% element. The structure of the log_header may never be +%% changed since it may be written to very old backup files. +%% By holding this record definition stable we can be +%% able to comprahend backups from timepoint 0. It also +%% allows us to use the backup format as an interchange +%% format between Mnesia releases. +%% +%% An op-list is a list of tuples with arity 3. Each tuple +%% has this structure: {Oid, Recs, Op} where Oid is the tuple +%% {Tab, Key}, Recs is a (possibly empty) list of records and +%% Op is an atom. +%% +%% The log file structure for the transaction log is as follows. +%% +%% After the mnesia log section follows an extended record section +%% containing op-lists. There are several values that Op may +%% have, such as write, delete, update_counter, delete_object, +%% and replace. There is no special end of section marker. +%% +%% +-----------------+ +%% | mnesia log head | +%% +-----------------+ +%% | extended record | +%% | section | +%% +-----------------+ +%% +%% The log file structure for the mnesia_down log is as follows. +%% +%% After the mnesia log section follows a mnesia_down section +%% containg lists with yoyo records as single element. +%% +%% +-----------------+ +%% | mnesia log head | +%% +-----------------+ +%% | mnesia_down | +%% | section | +%% +-----------------+ +%% +%% The log file structure for the backup log is as follows. +%% +%% After the mnesia log section follows a schema section +%% containing record lists. A record list is a list of tuples +%% where {schema, Tab} is interpreted as a delete_table(Tab) and +%% {schema, Tab, CreateList} are interpreted as create_table. +%% +%% The record section also contains record lists. In this section +%% {Tab, Key} is interpreted as delete({Tab, Key}) and other tuples +%% as write(Tuple). There is no special end of section marker. +%% +%% +-----------------+ +%% | mnesia log head | +%% +-----------------+ +%% | schema section | +%% +-----------------+ +%% | record section | +%% +-----------------+ +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +-module(mnesia_log). + +-export([ + append/2, + backup/1, + backup/2, + backup_checkpoint/2, + backup_checkpoint/3, + backup_log_header/0, + backup_master/2, + chunk_decision_log/1, + chunk_decision_tab/1, + chunk_log/1, + chunk_log/2, + close_decision_log/0, + close_decision_tab/0, + close_log/1, + unsafe_close_log/1, + confirm_log_dump/1, + confirm_decision_log_dump/0, + previous_log_file/0, + previous_decision_log_file/0, + latest_log_file/0, + decision_log_version/0, + decision_log_file/0, + decision_tab_file/0, + decision_tab_version/0, + dcl_version/0, + dcd_version/0, + ets2dcd/1, + ets2dcd/2, + dcd2ets/1, + dcd2ets/2, + init/0, + init_log_dump/0, + log/1, + slog/1, + log_decision/1, + log_files/0, + open_decision_log/0, + trans_log_header/0, + open_decision_tab/0, + dcl_log_header/0, + dcd_log_header/0, + open_log/4, + open_log/6, + prepare_decision_log_dump/0, + prepare_log_dump/1, + save_decision_tab/1, + purge_all_logs/0, + purge_some_logs/0, + stop/0, + tab_copier/3, + version/0, + view/0, + view/1, + write_trans_log_header/0 + ]). + + +-include("mnesia.hrl"). +-import(mnesia_lib, [val/1, dir/1]). +-import(mnesia_lib, [exists/1, fatal/2, error/2, dbg_out/2]). + +trans_log_header() -> log_header(trans_log, version()). +backup_log_header() -> log_header(backup_log, "1.2"). +decision_log_header() -> log_header(decision_log, decision_log_version()). +decision_tab_header() -> log_header(decision_tab, decision_tab_version()). +dcl_log_header() -> log_header(dcl_log, dcl_version()). +dcd_log_header() -> log_header(dcd_log, dcd_version()). + +log_header(Kind, Version) -> + #log_header{log_version=Version, + log_kind=Kind, + mnesia_version=mnesia:system_info(version), + node=node(), + now=now()}. + +version() -> "4.3". + +decision_log_version() -> "3.0". + +decision_tab_version() -> "1.0". + +dcl_version() -> "1.0". +dcd_version() -> "1.0". + +append(Log, Bin) when is_binary(Bin) -> + disk_log:balog(Log, Bin); +append(Log, Term) -> + disk_log:alog(Log, Term). + +%% Synced append +sappend(Log, Bin) when is_binary(Bin) -> + ok = disk_log:blog(Log, Bin); +sappend(Log, Term) -> + ok = disk_log:log(Log, Term). + +%% Write commit records to the latest_log +log(C) when C#commit.disc_copies == [], + C#commit.disc_only_copies == [], + C#commit.schema_ops == [] -> + ignore; +log(C) -> + case mnesia_monitor:use_dir() of + true -> + if + is_record(C, commit) -> + C2 = C#commit{ram_copies = [], snmp = []}, + append(latest_log, C2); + true -> + %% Either a commit record as binary + %% or some decision related info + append(latest_log, C) + end, + mnesia_dumper:incr_log_writes(); + false -> + ignore + end. + +%% Synced + +slog(C) when C#commit.disc_copies == [], + C#commit.disc_only_copies == [], + C#commit.schema_ops == [] -> + ignore; +slog(C) -> + case mnesia_monitor:use_dir() of + true -> + if + is_record(C, commit) -> + C2 = C#commit{ram_copies = [], snmp = []}, + sappend(latest_log, C2); + true -> + %% Either a commit record as binary + %% or some decision related info + sappend(latest_log, C) + end, + mnesia_dumper:incr_log_writes(); + false -> + ignore + end. + + +%% Stuff related to the file LOG + +%% Returns a list of logfiles. The oldest is first. +log_files() -> [previous_log_file(), + latest_log_file(), + decision_tab_file() + ]. + +latest_log_file() -> dir(latest_log_name()). + +previous_log_file() -> dir("PREVIOUS.LOG"). + +decision_log_file() -> dir(decision_log_name()). + +decision_tab_file() -> dir(decision_tab_name()). + +previous_decision_log_file() -> dir("PDECISION.LOG"). + +latest_log_name() -> "LATEST.LOG". + +decision_log_name() -> "DECISION.LOG". + +decision_tab_name() -> "DECISION_TAB.LOG". + +init() -> + case mnesia_monitor:use_dir() of + true -> + Prev = previous_log_file(), + verify_no_exists(Prev), + + Latest = latest_log_file(), + verify_no_exists(Latest), + + Header = trans_log_header(), + open_log(latest_log, Header, Latest); + false -> + ok + end. + +verify_no_exists(Fname) -> + case exists(Fname) of + false -> + ok; + true -> + fatal("Log file exists: ~p~n", [Fname]) + end. + +open_log(Name, Header, Fname) -> + Exists = exists(Fname), + open_log(Name, Header, Fname, Exists). + +open_log(Name, Header, Fname, Exists) -> + Repair = mnesia_monitor:get_env(auto_repair), + open_log(Name, Header, Fname, Exists, Repair). + +open_log(Name, Header, Fname, Exists, Repair) -> + case Name == previous_log of + true -> + open_log(Name, Header, Fname, Exists, Repair, read_only); + false -> + open_log(Name, Header, Fname, Exists, Repair, read_write) + end. + +open_log(Name, Header, Fname, Exists, Repair, Mode) -> + Args = [{file, Fname}, {name, Name}, {repair, Repair}, {mode, Mode}], +%% io:format("~p:open_log: ~p ~p~n", [?MODULE, Name, Fname]), + case mnesia_monitor:open_log(Args) of + {ok, Log} when Exists == true -> + Log; + {ok, Log} -> + write_header(Log, Header), + Log; + {repaired, Log, _, {badbytes, 0}} when Exists == true -> + Log; + {repaired, Log, _, {badbytes, 0}} -> + write_header(Log, Header), + Log; + {repaired, Log, _Recover, BadBytes} -> + mnesia_lib:important("Data may be missing, log ~p repaired: Lost ~p bytes~n", + [Fname, BadBytes]), + Log; + {error, Reason} when Repair == true -> + file:delete(Fname), + mnesia_lib:important("Data may be missing, Corrupt logfile deleted: ~p, ~p ~n", + [Fname, Reason]), + %% Create a new + open_log(Name, Header, Fname, false, false, read_write); + {error, Reason} -> + fatal("Cannot open log file ~p: ~p~n", [Fname, Reason]) + end. + +write_header(Log, Header) -> + append(Log, Header). + +write_trans_log_header() -> + write_header(latest_log, trans_log_header()). + +stop() -> + case mnesia_monitor:use_dir() of + true -> + close_log(latest_log); + false -> + ok + end. + +close_log(Log) -> +%% io:format("mnesia_log:close_log ~p~n", [Log]), +%% io:format("mnesia_log:close_log ~p~n", [Log]), + case disk_log:sync(Log) of + ok -> ok; + {error, {read_only_mode, Log}} -> + ok; + {error, Reason} -> + mnesia_lib:important("Failed syncing ~p to_disk reason ~p ~n", + [Log, Reason]) + end, + mnesia_monitor:close_log(Log). + +unsafe_close_log(Log) -> +%% io:format("mnesia_log:close_log ~p~n", [Log]), + mnesia_monitor:unsafe_close_log(Log). + + +purge_some_logs() -> + mnesia_monitor:unsafe_close_log(latest_log), + file:delete(latest_log_file()), + file:delete(decision_tab_file()). + +purge_all_logs() -> + file:delete(previous_log_file()), + file:delete(latest_log_file()), + file:delete(decision_tab_file()). + +%% Prepare dump by renaming the open logfile if possible +%% Returns a tuple on the following format: {Res, OpenLog} +%% where OpenLog is the file descriptor to log file, ready for append +%% and Res is one of the following: already_dumped, needs_dump or {error, Reason} +prepare_log_dump(InitBy) -> + Diff = mnesia_dumper:get_log_writes() - + mnesia_lib:read_counter(trans_log_writes_prev), + if + Diff == 0, InitBy /= startup -> + already_dumped; + true -> + case mnesia_monitor:use_dir() of + true -> + Prev = previous_log_file(), + prepare_prev(Diff, InitBy, Prev, exists(Prev)); + false -> + already_dumped + end + end. + +prepare_prev(Diff, _, _, true) -> + {needs_dump, Diff}; +prepare_prev(Diff, startup, Prev, false) -> + Latest = latest_log_file(), + case exists(Latest) of + true -> + case file:rename(Latest, Prev) of + ok -> + {needs_dump, Diff}; + {error, Reason} -> + {error, Reason} + end; + false -> + already_dumped + end; +prepare_prev(Diff, _InitBy, Prev, false) -> + Head = trans_log_header(), + case mnesia_monitor:reopen_log(latest_log, Prev, Head) of + ok -> + {needs_dump, Diff}; + {error, Reason} -> + Latest = latest_log_file(), + {error, {"Cannot rename log file", + [Latest, Prev, Reason]}} + end. + +%% Init dump and return PrevLogFileDesc or exit. +init_log_dump() -> + Fname = previous_log_file(), + open_log(previous_log, trans_log_header(), Fname), + start. + + +chunk_log(Cont) -> + chunk_log(previous_log, Cont). + +chunk_log(_Log, eof) -> + eof; +chunk_log(Log, Cont) -> + case catch disk_log:chunk(Log, Cont) of + {error, Reason} -> + fatal("Possibly truncated ~p file: ~p~n", + [Log, Reason]); + {C2, Chunk, _BadBytes} -> + %% Read_only case, should we warn about the bad log file? + %% BUGBUG Should we crash if Repair == false ?? + %% We got to check this !! + mnesia_lib:important("~p repaired, lost ~p bad bytes~n", [Log, _BadBytes]), + {C2, Chunk}; + Other -> + Other + end. + +%% Confirms the dump by closing prev log and delete the file +confirm_log_dump(Updates) -> + case mnesia_monitor:close_log(previous_log) of + ok -> + file:delete(previous_log_file()), + mnesia_lib:incr_counter(trans_log_writes_prev, Updates), + dumped; + {error, Reason} -> + {error, Reason} + end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Decision log + +open_decision_log() -> + Latest = decision_log_file(), + open_log(decision_log, decision_log_header(), Latest), + start. + +prepare_decision_log_dump() -> + Prev = previous_decision_log_file(), + prepare_decision_log_dump(exists(Prev), Prev). + +prepare_decision_log_dump(false, Prev) -> + Head = decision_log_header(), + case mnesia_monitor:reopen_log(decision_log, Prev, Head) of + ok -> + prepare_decision_log_dump(true, Prev); + {error, Reason} -> + fatal("Cannot rename decision log file ~p -> ~p: ~p~n", + [decision_log_file(), Prev, Reason]) + end; +prepare_decision_log_dump(true, Prev) -> + open_log(previous_decision_log, decision_log_header(), Prev), + start. + +chunk_decision_log(Cont) -> + %% dbg_out("chunk log ~p~n", [Cont]), + chunk_log(previous_decision_log, Cont). + +%% Confirms dump of the decision log +confirm_decision_log_dump() -> + case mnesia_monitor:close_log(previous_decision_log) of + ok -> + file:delete(previous_decision_log_file()); + {error, Reason} -> + fatal("Cannot confirm decision log dump: ~p~n", + [Reason]) + end. + +save_decision_tab(Decisions) -> + Log = decision_tab, + Tmp = mnesia_lib:dir("DECISION_TAB.TMP"), + file:delete(Tmp), + open_log(Log, decision_tab_header(), Tmp), + append(Log, Decisions), + close_log(Log), + TabFile = decision_tab_file(), + ok = file:rename(Tmp, TabFile). + +open_decision_tab() -> + TabFile = decision_tab_file(), + open_log(decision_tab, decision_tab_header(), TabFile), + start. + +close_decision_tab() -> + close_log(decision_tab). + +chunk_decision_tab(Cont) -> + %% dbg_out("chunk tab ~p~n", [Cont]), + chunk_log(decision_tab, Cont). + +close_decision_log() -> + close_log(decision_log). + +log_decision(Decision) -> + append(decision_log, Decision). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Debug functions + +view() -> + lists:foreach(fun(F) -> view(F) end, log_files()). + +view(File) -> + mnesia_lib:show("***** ~p ***** ~n", [File]), + case exists(File) of + false -> + nolog; + true -> + N = view_only, + Args = [{file, File}, {name, N}, {mode, read_only}], + case disk_log:open(Args) of + {ok, N} -> + view_file(start, N); + {repaired, _, _, _} -> + view_file(start, N); + {error, Reason} -> + error("Cannot open log ~p: ~p~n", [File, Reason]) + end + end. + +view_file(C, Log) -> + case disk_log:chunk(Log, C) of + {error, Reason} -> + error("** Possibly truncated FILE ~p~n", [Reason]), + error; + eof -> + disk_log:close(Log), + eof; + {C2, Terms, _BadBytes} -> + dbg_out("Lost ~p bytes in ~p ~n", [_BadBytes, Log]), + lists:foreach(fun(X) -> mnesia_lib:show("~p~n", [X]) end, + Terms), + view_file(C2, Log); + {C2, Terms} -> + lists:foreach(fun(X) -> mnesia_lib:show("~p~n", [X]) end, + Terms), + view_file(C2, Log) + end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Backup + +-record(backup_args, {name, module, opaque, scope, prev_name, tables, cookie}). + +backup(Opaque) -> + backup(Opaque, []). + +backup(Opaque, Mod) when is_atom(Mod) -> + backup(Opaque, [{module, Mod}]); +backup(Opaque, Args) when is_list(Args) -> + %% Backup all tables with max redundancy + CpArgs = [{ram_overrides_dump, false}, {max, val({schema, tables})}], + case mnesia_checkpoint:activate(CpArgs) of + {ok, Name, _Nodes} -> + Res = backup_checkpoint(Name, Opaque, Args), + mnesia_checkpoint:deactivate(Name), + Res; + {error, Reason} -> + {error, Reason} + end. + +backup_checkpoint(Name, Opaque) -> + backup_checkpoint(Name, Opaque, []). + +backup_checkpoint(Name, Opaque, Mod) when is_atom(Mod) -> + backup_checkpoint(Name, Opaque, [{module, Mod}]); +backup_checkpoint(Name, Opaque, Args) when is_list(Args) -> + DefaultMod = mnesia_monitor:get_env(backup_module), + B = #backup_args{name = Name, + module = DefaultMod, + opaque = Opaque, + scope = global, + tables = all, + prev_name = Name}, + case check_backup_args(Args, B) of + {ok, B2} -> + %% Decentralized backup + %% Incremental + + Self = self(), + Pid = spawn_link(?MODULE, backup_master, [Self, B2]), + receive + {Pid, Self, Res} -> Res + end; + {error, Reason} -> + {error, Reason} + end. + +check_backup_args([Arg | Tail], B) -> + case catch check_backup_arg_type(Arg, B) of + {'EXIT', _Reason} -> + {error, {badarg, Arg}}; + B2 -> + check_backup_args(Tail, B2) + end; + +check_backup_args([], B) -> + {ok, B}. + +check_backup_arg_type(Arg, B) -> + case Arg of + {scope, global} -> + B#backup_args{scope = global}; + {scope, local} -> + B#backup_args{scope = local}; + {module, Mod} -> + Mod2 = mnesia_monitor:do_check_type(backup_module, Mod), + B#backup_args{module = Mod2}; + {incremental, Name} -> + B#backup_args{prev_name = Name}; + {tables, Tabs} when is_list(Tabs) -> + B#backup_args{tables = Tabs} + end. + +backup_master(ClientPid, B) -> + process_flag(trap_exit, true), + case catch do_backup_master(B) of + {'EXIT', Reason} -> + ClientPid ! {self(), ClientPid, {error, {'EXIT', Reason}}}; + Res -> + ClientPid ! {self(), ClientPid, Res} + end, + unlink(ClientPid), + exit(normal). + +do_backup_master(B) -> + Name = B#backup_args.name, + B2 = safe_apply(B, open_write, [B#backup_args.opaque]), + B3 = safe_write(B2, [backup_log_header()]), + case mnesia_checkpoint:tables_and_cookie(Name) of + {ok, AllTabs, Cookie} -> + Tabs = select_tables(AllTabs, B3), + B4 = B3#backup_args{cookie = Cookie}, + %% Always put schema first in backup file + B5 = backup_schema(B4, Tabs), + B6 = lists:foldl(fun backup_tab/2, B5, Tabs -- [schema]), + safe_apply(B6, commit_write, [B6#backup_args.opaque]), + ok; + {error, Reason} -> + abort_write(B3, {?MODULE, backup_master}, [B], {error, Reason}) + end. + +select_tables(AllTabs, B) -> + Tabs = + case B#backup_args.tables of + all -> AllTabs; + SomeTabs when is_list(SomeTabs) -> SomeTabs + end, + case B#backup_args.scope of + global -> + Tabs; + local -> + Name = B#backup_args.name, + [T || T <- Tabs, mnesia_checkpoint:most_local_node(Name, T) == {ok, node()}] + end. + +safe_write(B, []) -> + B; +safe_write(B, Recs) -> + safe_apply(B, write, [B#backup_args.opaque, Recs]). + +backup_schema(B, Tabs) -> + case lists:member(schema, Tabs) of + true -> + backup_tab(schema, B); + false -> + Defs = [{schema, T, mnesia_schema:get_create_list(T)} || T <- Tabs], + safe_write(B, Defs) + end. + +safe_apply(B, write, [_, Items]) when Items == [] -> + B; +safe_apply(B, What, Args) -> + Abort = fun(R) -> abort_write(B, What, Args, R) end, + receive + {'EXIT', Pid, R} -> Abort({'EXIT', Pid, R}) + after 0 -> + Mod = B#backup_args.module, + case catch apply(Mod, What, Args) of + {ok, Opaque} -> B#backup_args{opaque=Opaque}; + {error, R} -> Abort(R); + R -> Abort(R) + end + end. + +abort_write(B, What, Args, Reason) -> + Mod = B#backup_args.module, + Opaque = B#backup_args.opaque, + dbg_out("Failed to perform backup. M=~p:F=~p:A=~p -> ~p~n", + [Mod, What, Args, Reason]), + case catch apply(Mod, abort_write, [Opaque]) of + {ok, _Res} -> + throw({error, Reason}); + Other -> + error("Failed to abort backup. ~p:~p~p -> ~p~n", + [Mod, abort_write, [Opaque], Other]), + throw({error, Reason}) + end. + +backup_tab(Tab, B) -> + Name = B#backup_args.name, + case mnesia_checkpoint:most_local_node(Name, Tab) of + {ok, Node} when Node == node() -> + tab_copier(self(), B, Tab); + {ok, Node} -> + RemoteB = B, + Pid = spawn_link(Node, ?MODULE, tab_copier, [self(), RemoteB, Tab]), + RecName = val({Tab, record_name}), + tab_receiver(Pid, B, Tab, RecName, 0); + {error, Reason} -> + abort_write(B, {?MODULE, backup_tab}, [Tab, B], {error, Reason}) + end. + +tab_copier(Pid, B, Tab) when is_record(B, backup_args) -> + %% Intentional crash at exit + Name = B#backup_args.name, + PrevName = B#backup_args.prev_name, + {FirstName, FirstSource} = select_source(Tab, Name, PrevName), + + ?eval_debug_fun({?MODULE, tab_copier, pre}, [{name, Name}, {tab, Tab}]), + Res = handle_more(Pid, B, Tab, FirstName, FirstSource, Name), + ?eval_debug_fun({?MODULE, tab_copier, post}, [{name, Name}, {tab, Tab}]), + + handle_last(Pid, Res). + +select_source(Tab, Name, PrevName) -> + if + Tab == schema -> + %% Always full backup of schema + {Name, table}; + Name == PrevName -> + %% Full backup + {Name, table}; + true -> + %% Wants incremental backup + case mnesia_checkpoint:most_local_node(PrevName, Tab) of + {ok, Node} when Node == node() -> + %% Accept incremental backup + {PrevName, retainer}; + _ -> + %% Do a full backup anyway + dbg_out("Incremental backup escalated to full backup: ~p~n", [Tab]), + {Name, table} + end + end. + +handle_more(Pid, B, Tab, FirstName, FirstSource, Name) -> + Acc = {0, B}, + case {mnesia_checkpoint:really_retain(Name, Tab), + mnesia_checkpoint:really_retain(FirstName, Tab)} of + {true, true} -> + Acc2 = iterate(B, FirstName, Tab, Pid, FirstSource, latest, first, Acc), + iterate(B, Name, Tab, Pid, retainer, checkpoint, last, Acc2); + {false, false}-> + %% Put the dumped file in the backup + %% instead of the ram table. Does + %% only apply to ram_copies. + iterate(B, Name, Tab, Pid, retainer, checkpoint, last, Acc); + Bad -> + Reason = {"Checkpoints for incremental backup must have same " + "setting of ram_overrides_dump", + Tab, Name, FirstName, Bad}, + abort_write(B, {?MODULE, backup_tab}, [Tab, B], {error, Reason}) + end. + +handle_last(Pid, {_Count, B}) when Pid == self() -> + B; +handle_last(Pid, _Acc) -> + unlink(Pid), + Pid ! {self(), {last, {ok, dummy}}}, + exit(normal). + +iterate(B, Name, Tab, Pid, Source, Age, Pass, Acc) -> + Fun = + if + Pid == self() -> + RecName = val({Tab, record_name}), + fun(Recs, A) -> copy_records(RecName, Tab, Recs, A) end; + true -> + fun(Recs, A) -> send_records(Pid, Tab, Recs, Pass, A) end + end, + case mnesia_checkpoint:iterate(Name, Tab, Fun, Acc, Source, Age) of + {ok, Acc2} -> + Acc2; + {error, Reason} -> + R = {error, {"Tab copier iteration failed", Reason}}, + abort_write(B, {?MODULE, iterate}, [self(), B, Tab], R) + end. + +copy_records(_RecName, _Tab, [], Acc) -> + Acc; +copy_records(RecName, Tab, Recs, {Count, B}) -> + Recs2 = rec_filter(B, Tab, RecName, Recs), + B2 = safe_write(B, Recs2), + {Count + 1, B2}. + +send_records(Pid, Tab, Recs, Pass, {Count, B}) -> + receive + {Pid, more, Count} -> + if + Pass == last, Recs == [] -> + {Count, B}; + true -> + Next = Count + 1, + Pid ! {self(), {more, Next, Recs}}, + {Next, B} + end; + Msg -> + exit({send_records_unexpected_msg, Tab, Msg}) + end. + +tab_receiver(Pid, B, Tab, RecName, Slot) -> + Pid ! {self(), more, Slot}, + receive + {Pid, {more, Next, Recs}} -> + Recs2 = rec_filter(B, Tab, RecName, Recs), + B2 = safe_write(B, Recs2), + tab_receiver(Pid, B2, Tab, RecName, Next); + + {Pid, {last, {ok,_}}} -> + B; + + {'EXIT', Pid, {error, R}} -> + Reason = {error, {"Tab copier crashed", R}}, + abort_write(B, {?MODULE, remote_tab_sender}, [self(), B, Tab], Reason); + {'EXIT', Pid, R} -> + Reason = {error, {"Tab copier crashed", {'EXIT', R}}}, + abort_write(B, {?MODULE, remote_tab_sender}, [self(), B, Tab], Reason); + Msg -> + R = {error, {"Tab receiver got unexpected msg", Msg}}, + abort_write(B, {?MODULE, remote_tab_sender}, [self(), B, Tab], R) + end. + +rec_filter(B, schema, _RecName, Recs) -> + case catch mnesia_bup:refresh_cookie(Recs, B#backup_args.cookie) of + Recs2 when is_list(Recs2) -> + Recs2; + {error, _Reason} -> + %% No schema table cookie + Recs + end; +rec_filter(_B, Tab, Tab, Recs) -> + Recs; +rec_filter(_B, Tab, _RecName, Recs) -> + [setelement(1, Rec, Tab) || Rec <- Recs]. + +ets2dcd(Tab) -> + ets2dcd(Tab, dcd). + +ets2dcd(Tab, Ftype) -> + Fname = + case Ftype of + dcd -> mnesia_lib:tab2dcd(Tab); + dmp -> mnesia_lib:tab2dmp(Tab) + end, + TmpF = mnesia_lib:tab2tmp(Tab), + file:delete(TmpF), + Log = open_log({Tab, ets2dcd}, dcd_log_header(), TmpF, false), + mnesia_lib:db_fixtable(ram_copies, Tab, true), + ok = ets2dcd(mnesia_lib:db_init_chunk(ram_copies, Tab, 1000), Tab, Log), + mnesia_lib:db_fixtable(ram_copies, Tab, false), + close_log(Log), + ok = file:rename(TmpF, Fname), + %% Remove old log data which is now in the new dcd. + %% No one else should be accessing this file! + file:delete(mnesia_lib:tab2dcl(Tab)), + ok. + +ets2dcd('$end_of_table', _Tab, _Log) -> + ok; +ets2dcd({Recs, Cont}, Tab, Log) -> + ok = disk_log:alog_terms(Log, Recs), + ets2dcd(mnesia_lib:db_chunk(ram_copies, Cont), Tab, Log). + +dcd2ets(Tab) -> + dcd2ets(Tab, mnesia_monitor:get_env(auto_repair)). + +dcd2ets(Tab, Rep) -> + Dcd = mnesia_lib:tab2dcd(Tab), + case mnesia_lib:exists(Dcd) of + true -> + Log = open_log({Tab, dcd2ets}, dcd_log_header(), Dcd, + true, Rep, read_only), + Data = chunk_log(Log, start), + ok = insert_dcdchunk(Data, Log, Tab), + close_log(Log), + load_dcl(Tab, Rep); + false -> %% Handle old dets files, and conversion from disc_only to disc. + Fname = mnesia_lib:tab2dat(Tab), + Type = val({Tab, setorbag}), + case mnesia_lib:dets_to_ets(Tab, Tab, Fname, Type, Rep, yes) of + loaded -> + ets2dcd(Tab), + file:delete(Fname), + 0; + {error, Error} -> + erlang:error({"Failed to load table from disc", [Tab, Error]}) + end + end. + +insert_dcdchunk({Cont, [LogH | Rest]}, Log, Tab) + when is_record(LogH, log_header), + LogH#log_header.log_kind == dcd_log, + LogH#log_header.log_version >= "1.0" -> + insert_dcdchunk({Cont, Rest}, Log, Tab); + +insert_dcdchunk({Cont, Recs}, Log, Tab) -> + true = ets:insert(Tab, Recs), + insert_dcdchunk(chunk_log(Log, Cont), Log, Tab); +insert_dcdchunk(eof, _Log, _Tab) -> + ok. + +load_dcl(Tab, Rep) -> + FName = mnesia_lib:tab2dcl(Tab), + case mnesia_lib:exists(FName) of + true -> + Name = {load_dcl,Tab}, + open_log(Name, + dcl_log_header(), + FName, + true, + Rep, + read_only), + FirstChunk = chunk_log(Name, start), + N = insert_logchunk(FirstChunk, Name, 0), + close_log(Name), + N; + false -> + 0 + end. + +insert_logchunk({C2, Recs}, Tab, C) -> + N = add_recs(Recs, C), + insert_logchunk(chunk_log(Tab, C2), Tab, C+N); +insert_logchunk(eof, _Tab, C) -> + C. + +add_recs([{{Tab, _Key}, Val, write} | Rest], N) -> + true = ets:insert(Tab, Val), + add_recs(Rest, N+1); +add_recs([{{Tab, Key}, _Val, delete} | Rest], N) -> + true = ets:delete(Tab, Key), + add_recs(Rest, N+1); +add_recs([{{Tab, _Key}, Val, delete_object} | Rest], N) -> + true = ets:match_delete(Tab, Val), + add_recs(Rest, N+1); +add_recs([{{Tab, Key}, Val, update_counter} | Rest], N) -> + {RecName, Incr} = Val, + case catch ets:update_counter(Tab, Key, Incr) of + CounterVal when is_integer(CounterVal) -> + ok; + _ when Incr < 0 -> + Zero = {RecName, Key, 0}, + true = ets:insert(Tab, Zero); + _ -> + Zero = {RecName, Key, Incr}, + true = ets:insert(Tab, Zero) + end, + add_recs(Rest, N+1); +add_recs([LogH|Rest], N) + when is_record(LogH, log_header), + LogH#log_header.log_kind == dcl_log, + LogH#log_header.log_version >= "1.0" -> + add_recs(Rest, N); +add_recs([{{Tab, _Key}, _Val, clear_table} | Rest], N) -> + true = ets:match_delete(Tab, '_'), + add_recs(Rest, N+ets:info(Tab, size)); +add_recs([], N) -> + N. diff --git a/lib/mnesia/src/mnesia_monitor.erl b/lib/mnesia/src/mnesia_monitor.erl new file mode 100644 index 0000000000..05ae943e3b --- /dev/null +++ b/lib/mnesia/src/mnesia_monitor.erl @@ -0,0 +1,823 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 1996-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +%% +-module(mnesia_monitor). + +-behaviour(gen_server). + +%% Public exports +-export([ + close_dets/1, + close_log/1, + detect_inconcistency/2, + get_env/1, + init/0, + mktab/2, + unsafe_mktab/2, + mnesia_down/2, + needs_protocol_conversion/1, + negotiate_protocol/1, + disconnect/1, + open_dets/2, + unsafe_open_dets/2, + open_log/1, + patch_env/2, + protocol_version/0, + reopen_log/3, + set_env/2, + start/0, + start_proc/4, + terminate_proc/3, + unsafe_close_dets/1, + unsafe_close_log/1, + use_dir/0, + do_check_type/2 + ]). + +%% gen_server callbacks +-export([ + init/1, + handle_call/3, + handle_cast/2, + handle_info/2, + terminate/2, + code_change/3 + ]). + +%% Internal exports +-export([ + call/1, + cast/1, + detect_partitioned_network/2, + has_remote_mnesia_down/1, + negotiate_protocol_impl/2 + ]). + +-import(mnesia_lib, [dbg_out/2, verbose/2, error/2, fatal/2, set/2]). + +-include("mnesia.hrl"). + +-record(state, {supervisor, pending_negotiators = [], + going_down = [], tm_started = false, early_connects = [], + connecting, mq = []}). + +-define(current_protocol_version, {7,6}). + +-define(previous_protocol_version, {7,5}). + +start() -> + gen_server:start_link({local, ?MODULE}, ?MODULE, + [self()], [{timeout, infinity} + %% ,{debug, [trace]} + ]). + +init() -> + call(init). + +mnesia_down(From, Node) -> + cast({mnesia_down, From, Node}). + +mktab(Tab, Args) -> + unsafe_call({mktab, Tab, Args}). +unsafe_mktab(Tab, Args) -> + unsafe_call({unsafe_mktab, Tab, Args}). + +open_dets(Tab, Args) -> + unsafe_call({open_dets, Tab, Args}). +unsafe_open_dets(Tab, Args) -> + unsafe_call({unsafe_open_dets, Tab, Args}). + +close_dets(Tab) -> + unsafe_call({close_dets, Tab}). + +unsafe_close_dets(Name) -> + unsafe_call({unsafe_close_dets, Name}). + +open_log(Args) -> + unsafe_call({open_log, Args}). + +reopen_log(Name, Fname, Head) -> + unsafe_call({reopen_log, Name, Fname, Head}). + +close_log(Name) -> + unsafe_call({close_log, Name}). + +unsafe_close_log(Name) -> + unsafe_call({unsafe_close_log, Name}). + + +disconnect(Node) -> + cast({disconnect, Node}). + +%% Returns GoodNoodes +%% Creates a link to each compatible monitor and +%% protocol_version to agreed version upon success + +negotiate_protocol([]) -> []; +negotiate_protocol(Nodes) -> + call({negotiate_protocol, Nodes}). + +negotiate_protocol_impl(Nodes, Requester) -> + Version = mnesia:system_info(version), + Protocols = acceptable_protocol_versions(), + MonitorPid = whereis(?MODULE), + Msg = {negotiate_protocol, MonitorPid, Version, Protocols}, + {Replies, _BadNodes} = multicall(Nodes, Msg), + Res = check_protocol(Replies, Protocols), + ?MODULE ! {protocol_negotiated,Requester,Res}, + unlink(whereis(?MODULE)), + ok. + +check_protocol([{Node, {accept, Mon, Version, Protocol}} | Tail], Protocols) -> + case lists:member(Protocol, Protocols) of + true -> + case Protocol == protocol_version() of + true -> + set({protocol, Node}, {Protocol, false}); + false -> + set({protocol, Node}, {Protocol, true}) + end, + [node(Mon) | check_protocol(Tail, Protocols)]; + false -> + verbose("Failed to connect with ~p. ~p protocols rejected. " + "expected version = ~p, expected protocol = ~p~n", + [Node, Protocols, Version, Protocol]), + unlink(Mon), % Get rid of unneccessary link + check_protocol(Tail, Protocols) + end; +check_protocol([{Node, {reject, _Mon, Version, Protocol}} | Tail], Protocols) -> + verbose("Failed to connect with ~p. ~p protocols rejected. " + "expected version = ~p, expected protocol = ~p~n", + [Node, Protocols, Version, Protocol]), + check_protocol(Tail, Protocols); +check_protocol([{error, _Reason} | Tail], Protocols) -> + dbg_out("~p connect failed error: ~p~n", [?MODULE, _Reason]), + check_protocol(Tail, Protocols); +check_protocol([{badrpc, _Reason} | Tail], Protocols) -> + dbg_out("~p connect failed badrpc: ~p~n", [?MODULE, _Reason]), + check_protocol(Tail, Protocols); +check_protocol([], [Protocol | _Protocols]) -> + set(protocol_version, Protocol), + []. + +protocol_version() -> + case ?catch_val(protocol_version) of + {'EXIT', _} -> ?current_protocol_version; + Version -> Version + end. + +%% A sorted list of acceptable protocols the +%% preferred protocols are first in the list +acceptable_protocol_versions() -> + [protocol_version(), ?previous_protocol_version]. + +needs_protocol_conversion(Node) -> + case {?catch_val({protocol, Node}), protocol_version()} of + {{'EXIT', _}, _} -> + false; + {{_, Bool}, ?current_protocol_version} -> + Bool; + {{_, Bool}, _} -> + not Bool + end. + +cast(Msg) -> + case whereis(?MODULE) of + undefined -> ignore; + Pid -> gen_server:cast(Pid, Msg) + end. + +unsafe_call(Msg) -> + case whereis(?MODULE) of + undefined -> {error, {node_not_running, node()}}; + Pid -> gen_server:call(Pid, Msg, infinity) + end. + +call(Msg) -> + case whereis(?MODULE) of + undefined -> + {error, {node_not_running, node()}}; + Pid -> + link(Pid), + Res = gen_server:call(Pid, Msg, infinity), + unlink(Pid), + + %% We get an exit signal if server dies + receive + {'EXIT', Pid, _Reason} -> + {error, {node_not_running, node()}} + after 0 -> + Res + end + end. + +multicall(Nodes, Msg) -> + rpc:multicall(Nodes, ?MODULE, call, [Msg]). + +start_proc(Who, Mod, Fun, Args) -> + Args2 = [Who, Mod, Fun, Args], + proc_lib:start_link(mnesia_sp, init_proc, Args2, infinity). + +terminate_proc(Who, R, State) when R /= shutdown, R /= killed -> + fatal("~p crashed: ~p state: ~p~n", [Who, R, State]); + +terminate_proc(Who, Reason, _State) -> + mnesia_lib:verbose("~p terminated: ~p~n", [Who, Reason]), + ok. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%% Callback functions from gen_server + +%%---------------------------------------------------------------------- +%% Func: init/1 +%% Returns: {ok, State} | +%% {ok, State, Timeout} | +%% {stop, Reason} +%%---------------------------------------------------------------------- +init([Parent]) -> + process_flag(trap_exit, true), + ?ets_new_table(mnesia_gvar, [set, public, named_table]), + ?ets_new_table(mnesia_stats, [set, public, named_table]), + set(subscribers, []), + mnesia_lib:verbose("~p starting: ~p~n", [?MODULE, self()]), + Version = mnesia:system_info(version), + set(version, Version), + dbg_out("Version: ~p~n", [Version]), + + case catch process_config_args(env()) of + ok -> + mnesia_lib:set({'$$$_report', current_pos}, 0), + Level = mnesia_lib:val(debug), + mnesia_lib:verbose("Mnesia debug level set to ~p\n", [Level]), + set(mnesia_status, starting), %% set start status + set({current, db_nodes}, [node()]), + set(use_dir, use_dir()), + mnesia_lib:create_counter(trans_aborts), + mnesia_lib:create_counter(trans_commits), + mnesia_lib:create_counter(trans_log_writes), + Left = get_env(dump_log_write_threshold), + mnesia_lib:set_counter(trans_log_writes_left, Left), + mnesia_lib:create_counter(trans_log_writes_prev), + mnesia_lib:create_counter(trans_restarts), + mnesia_lib:create_counter(trans_failures), + set(checkpoints, []), + set(pending_checkpoints, []), + set(pending_checkpoint_pids, []), + + {ok, #state{supervisor = Parent}}; + {'EXIT', Reason} -> + mnesia_lib:report_fatal("Bad configuration: ~p~n", [Reason]), + {stop, {bad_config, Reason}} + end. + +use_dir() -> + case ?catch_val(use_dir) of + {'EXIT', _} -> + case get_env(schema_location) of + disc -> true; + opt_disc -> non_empty_dir(); + ram -> false + end; + Bool -> + Bool + end. + +%% Returns true if the Mnesia directory contains +%% important files +non_empty_dir() -> + mnesia_lib:exists(mnesia_bup:fallback_bup()) or + mnesia_lib:exists(mnesia_lib:tab2dmp(schema)) or + mnesia_lib:exists(mnesia_lib:tab2dat(schema)). + +%%---------------------------------------------------------------------- +%% Func: handle_call/3 +%% Returns: {reply, Reply, State} | +%% {reply, Reply, State, Timeout} | +%% {noreply, State} | +%% {noreply, State, Timeout} | +%% {stop, Reason, Reply, State} | (terminate/2 is called) +%%---------------------------------------------------------------------- + +handle_call({mktab, Tab, Args}, _From, State) -> + case catch ?ets_new_table(Tab, Args) of + {'EXIT', ExitReason} -> + Msg = "Cannot create ets table", + Reason = {system_limit, Msg, Tab, Args, ExitReason}, + fatal("~p~n", [Reason]), + {noreply, State}; + Reply -> + {reply, Reply, State} + end; + +handle_call({unsafe_mktab, Tab, Args}, _From, State) -> + case catch ?ets_new_table(Tab, Args) of + {'EXIT', ExitReason} -> + {reply, {error, ExitReason}, State}; + Reply -> + {reply, Reply, State} + end; + + +handle_call({open_dets, Tab, Args}, _From, State) -> + case mnesia_lib:dets_sync_open(Tab, Args) of + {ok, Tab} -> + {reply, {ok, Tab}, State}; + + {error, Reason} -> + Msg = "Cannot open dets table", + Error = {error, {Msg, Tab, Args, Reason}}, + fatal("~p~n", [Error]), + {noreply, State} + end; + +handle_call({unsafe_open_dets, Tab, Args}, _From, State) -> + case mnesia_lib:dets_sync_open(Tab, Args) of + {ok, Tab} -> + {reply, {ok, Tab}, State}; + {error, Reason} -> + {reply, {error,Reason}, State} + end; + +handle_call({close_dets, Tab}, _From, State) -> + ok = mnesia_lib:dets_sync_close(Tab), + {reply, ok, State}; + +handle_call({unsafe_close_dets, Tab}, _From, State) -> + mnesia_lib:dets_sync_close(Tab), + {reply, ok, State}; + +handle_call({open_log, Args}, _From, State) -> + Res = disk_log:open([{notify, true}|Args]), + {reply, Res, State}; + +handle_call({reopen_log, Name, Fname, Head}, _From, State) -> + case disk_log:reopen(Name, Fname, Head) of + ok -> + {reply, ok, State}; + + {error, Reason} -> + Msg = "Cannot rename disk_log file", + Error = {error, {Msg, Name, Fname, Head, Reason}}, + fatal("~p~n", [Error]), + {noreply, State} + end; + +handle_call({close_log, Name}, _From, State) -> + case disk_log:close(Name) of + ok -> + {reply, ok, State}; + + {error, Reason} -> + Msg = "Cannot close disk_log file", + Error = {error, {Msg, Name, Reason}}, + fatal("~p~n", [Error]), + {noreply, State} + end; + +handle_call({unsafe_close_log, Name}, _From, State) -> + disk_log:close(Name), + {reply, ok, State}; + +handle_call({negotiate_protocol, Mon, _Version, _Protocols}, _From, State) + when State#state.tm_started == false -> + State2 = State#state{early_connects = [node(Mon) | State#state.early_connects]}, + {reply, {node(), {reject, self(), uninitialized, uninitialized}}, State2}; + +%% From remote monitor.. +handle_call({negotiate_protocol, Mon, Version, Protocols}, From, State) + when node(Mon) /= node() -> + Protocol = protocol_version(), + MyVersion = mnesia:system_info(version), + case lists:member(Protocol, Protocols) of + true -> + accept_protocol(Mon, MyVersion, Protocol, From, State); + false -> + %% in this release we should be able to handle the previous + %% protocol + case hd(Protocols) of + ?previous_protocol_version -> + accept_protocol(Mon, MyVersion, ?previous_protocol_version, From, State); + _ -> + verbose("Connection with ~p rejected. " + "version = ~p, protocols = ~p, " + "expected version = ~p, expected protocol = ~p~n", + [node(Mon), Version, Protocols, MyVersion, Protocol]), + {reply, {node(), {reject, self(), MyVersion, Protocol}}, State} + end + end; + +%% Local request to negotiate with other monitors (nodes). +handle_call({negotiate_protocol, Nodes}, From, State) -> + case mnesia_lib:intersect(State#state.going_down, Nodes) of + [] -> + spawn_link(?MODULE, negotiate_protocol_impl, [Nodes, From]), + {noreply, State#state{connecting={From,Nodes}}}; + _ -> %% Cannot connect now, still processing mnesia down + {reply, busy, State} + end; + +handle_call(init, _From, State) -> + net_kernel:monitor_nodes(true), + EarlyNodes = State#state.early_connects, + State2 = State#state{tm_started = true}, + {reply, EarlyNodes, State2}; + +handle_call(Msg, _From, State) -> + error("~p got unexpected call: ~p~n", [?MODULE, Msg]), + {noreply, State}. + +accept_protocol(Mon, Version, Protocol, From, State) -> + Reply = {node(), {accept, self(), Version, Protocol}}, + Node = node(Mon), + Pending0 = State#state.pending_negotiators, + Pending = lists:keydelete(Node, 1, Pending0), + case lists:member(Node, State#state.going_down) of + true -> + %% Wait for the mnesia_down to be processed, + %% before we reply + P = Pending ++ [{Node, Mon, From, Reply}], + {noreply, State#state{pending_negotiators = P}}; + false -> + %% No need for wait + link(Mon), %% link to remote Monitor + case Protocol == protocol_version() of + true -> + set({protocol, Node}, {Protocol, false}); + false -> + set({protocol, Node}, {Protocol, true}) + end, + {reply, Reply, State#state{pending_negotiators = Pending}} + end. + +%%---------------------------------------------------------------------- +%% Func: handle_cast/2 +%% Returns: {noreply, State} | +%% {noreply, State, Timeout} | +%% {stop, Reason, State} (terminate/2 is called) +%%---------------------------------------------------------------------- + +handle_cast({mnesia_down, mnesia_controller, Node}, State) -> + mnesia_tm:mnesia_down(Node), + {noreply, State}; + +handle_cast({mnesia_down, mnesia_tm, {Node, Pending}}, State) -> + mnesia_locker:mnesia_down(Node, Pending), + {noreply, State}; + +handle_cast({mnesia_down, mnesia_locker, Node}, State) -> + Down = {mnesia_down, Node}, + mnesia_lib:report_system_event(Down), + GoingDown = lists:delete(Node, State#state.going_down), + State2 = State#state{going_down = GoingDown}, + Pending = State#state.pending_negotiators, + case lists:keysearch(Node, 1, Pending) of + {value, {Node, Mon, ReplyTo, Reply}} -> + %% Late reply to remote monitor + link(Mon), %% link to remote Monitor + gen_server:reply(ReplyTo, Reply), + P2 = lists:keydelete(Node, 1,Pending), + State3 = State2#state{pending_negotiators = P2}, + process_q(State3); + false -> + %% No pending remote monitors + {noreply, State2} + end; + +handle_cast({disconnect, Node}, State) -> + case rpc:call(Node, erlang, whereis, [?MODULE]) of + {badrpc, _} -> + ignore; + undefined -> + ignore; + RemoteMon when is_pid(RemoteMon) -> + unlink(RemoteMon) + end, + {noreply, State}; + +handle_cast({inconsistent_database, Context, Node}, State) -> + Msg = {inconsistent_database, Context, Node}, + mnesia_lib:report_system_event(Msg), + {noreply, State}; + +handle_cast(Msg, State) -> + error("~p got unexpected cast: ~p~n", [?MODULE, Msg]), + {noreply, State}. + +%%---------------------------------------------------------------------- +%% Func: handle_info/2 +%% Returns: {noreply, State} | +%% {noreply, State, Timeout} | +%% {stop, Reason, State} (terminate/2 is called) +%%---------------------------------------------------------------------- + +handle_info({'EXIT', Pid, R}, State) when Pid == State#state.supervisor -> + dbg_out("~p was ~p by supervisor~n",[?MODULE, R]), + {stop, R, State}; + +handle_info({'EXIT', Pid, fatal}, State) when node(Pid) == node() -> + dbg_out("~p got FATAL ERROR from: ~p~n",[?MODULE, Pid]), + exit(State#state.supervisor, shutdown), + {noreply, State}; + +handle_info(Msg = {'EXIT',Pid,_}, State) -> + Node = node(Pid), + if + Node /= node(), State#state.connecting == undefined -> + %% Remotly linked process died, assume that it was a mnesia_monitor + mnesia_recover:mnesia_down(Node), + mnesia_controller:mnesia_down(Node), + {noreply, State#state{going_down = [Node | State#state.going_down]}}; + Node /= node() -> + {noreply, State#state{mq = State#state.mq ++ [{info, Msg}]}}; + true -> + %% We have probably got an exit signal from + %% disk_log or dets + Hint = "Hint: check that the disk still is writable", + fatal("~p got unexpected info: ~p; ~p~n", + [?MODULE, Msg, Hint]) + end; + +handle_info({protocol_negotiated, From,Res}, State) -> + From = element(1,State#state.connecting), + gen_server:reply(From, Res), + process_q(State#state{connecting = undefined}); + +handle_info({nodeup, Node}, State) -> + %% Ok, we are connected to yet another Erlang node + %% Let's check if Mnesia is running there in order + %% to detect if the network has been partitioned + %% due to communication failure. + + HasDown = mnesia_recover:has_mnesia_down(Node), + ImRunning = mnesia_lib:is_running(), + + if + %% If I'm not running the test will be made later. + HasDown == true, ImRunning == yes -> + spawn_link(?MODULE, detect_partitioned_network, [self(), Node]); + true -> + ignore + end, + {noreply, State}; + +handle_info({nodedown, _Node}, State) -> + %% Ignore, we are only caring about nodeup's + {noreply, State}; + +handle_info({disk_log, _Node, Log, Info}, State) -> + case Info of + {truncated, _No} -> + ok; + _ -> + mnesia_lib:important("Warning Log file ~p error reason ~s~n", + [Log, disk_log:format_error(Info)]) + end, + {noreply, State}; + +handle_info(Msg, State) -> + error("~p got unexpected info (~p): ~p~n", [?MODULE, State, Msg]). + +process_q(State = #state{mq=[]}) -> {noreply,State}; +process_q(State = #state{mq=[{info,Msg}|R]}) -> + handle_info(Msg, State#state{mq=R}); +process_q(State = #state{mq=[{cast,Msg}|R]}) -> + handle_cast(Msg, State#state{mq=R}); +process_q(State = #state{mq=[{call,From,Msg}|R]}) -> + handle_call(Msg, From, State#state{mq=R}). + +%%---------------------------------------------------------------------- +%% Func: terminate/2 +%% Purpose: Shutdown the server +%% Returns: any (ignored by gen_server) +%%---------------------------------------------------------------------- +terminate(Reason, State) -> + terminate_proc(?MODULE, Reason, State). + +%%---------------------------------------------------------------------- +%% Func: code_change/3 +%% Purpose: Upgrade process when its code is to be changed +%% Returns: {ok, NewState} +%%---------------------------------------------------------------------- + + +code_change(_, {state, SUP, PN, GD, TMS, EC}, _) -> + {ok, #state{supervisor=SUP, pending_negotiators=PN, + going_down = GD, tm_started =TMS, early_connects = EC}}; + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + +%%%---------------------------------------------------------------------- +%%% Internal functions +%%%---------------------------------------------------------------------- + +process_config_args([]) -> + ok; +process_config_args([C|T]) -> + V = get_env(C), + dbg_out("Env ~p: ~p~n", [C, V]), + mnesia_lib:set(C, V), + process_config_args(T). + +set_env(E,Val) -> + mnesia_lib:set(E, check_type(E,Val)), + ok. + +get_env(E) -> + case ?catch_val(E) of + {'EXIT', _} -> + case application:get_env(mnesia, E) of + {ok, Val} -> + check_type(E, Val); + undefined -> + check_type(E, default_env(E)) + end; + Val -> + Val + end. + +env() -> + [ + access_module, + auto_repair, + backup_module, + debug, + dir, + dump_log_load_regulation, + dump_log_time_threshold, + dump_log_update_in_place, + dump_log_write_threshold, + embedded_mnemosyne, + event_module, + extra_db_nodes, + ignore_fallback_at_startup, + fallback_error_function, + max_wait_for_decision, + schema_location, + core_dir, + pid_sort_order, + no_table_loaders, + dc_dump_limit + ]. + +default_env(access_module) -> + mnesia; +default_env(auto_repair) -> + true; +default_env(backup_module) -> + mnesia_backup; +default_env(debug) -> + none; +default_env(dir) -> + Name = lists:concat(["Mnesia.", node()]), + filename:absname(Name); +default_env(dump_log_load_regulation) -> + false; +default_env(dump_log_time_threshold) -> + timer:minutes(3); +default_env(dump_log_update_in_place) -> + true; +default_env(dump_log_write_threshold) -> + 1000; +default_env(embedded_mnemosyne) -> + false; +default_env(event_module) -> + mnesia_event; +default_env(extra_db_nodes) -> + []; +default_env(ignore_fallback_at_startup) -> + false; +default_env(fallback_error_function) -> + {mnesia, lkill}; +default_env(max_wait_for_decision) -> + infinity; +default_env(schema_location) -> + opt_disc; +default_env(core_dir) -> + false; +default_env(pid_sort_order) -> + false; +default_env(no_table_loaders) -> + 2; +default_env(dc_dump_limit) -> + 4. + +check_type(Env, Val) -> + case catch do_check_type(Env, Val) of + {'EXIT', _Reason} -> + exit({bad_config, Env, Val}); + NewVal -> + NewVal + end. + +do_check_type(access_module, A) when is_atom(A) -> A; +do_check_type(auto_repair, B) -> bool(B); +do_check_type(backup_module, B) when is_atom(B) -> B; +do_check_type(debug, debug) -> debug; +do_check_type(debug, false) -> none; +do_check_type(debug, none) -> none; +do_check_type(debug, trace) -> trace; +do_check_type(debug, true) -> debug; +do_check_type(debug, verbose) -> verbose; +do_check_type(dir, V) -> filename:absname(V); +do_check_type(dump_log_load_regulation, B) -> bool(B); +do_check_type(dump_log_time_threshold, I) when is_integer(I), I > 0 -> I; +do_check_type(dump_log_update_in_place, B) -> bool(B); +do_check_type(dump_log_write_threshold, I) when is_integer(I), I > 0 -> I; +do_check_type(event_module, A) when is_atom(A) -> A; +do_check_type(ignore_fallback_at_startup, B) -> bool(B); +do_check_type(fallback_error_function, {Mod, Func}) + when is_atom(Mod), is_atom(Func) -> {Mod, Func}; +do_check_type(embedded_mnemosyne, B) -> bool(B); +do_check_type(extra_db_nodes, L) when is_list(L) -> + Fun = fun(N) when N == node() -> false; + (A) when is_atom(A) -> true + end, + lists:filter(Fun, L); +do_check_type(max_wait_for_decision, infinity) -> infinity; +do_check_type(max_wait_for_decision, I) when is_integer(I), I > 0 -> I; +do_check_type(schema_location, M) -> media(M); +do_check_type(core_dir, "false") -> false; +do_check_type(core_dir, false) -> false; +do_check_type(core_dir, Dir) when is_list(Dir) -> Dir; +do_check_type(pid_sort_order, r9b_plain) -> r9b_plain; +do_check_type(pid_sort_order, "r9b_plain") -> r9b_plain; +do_check_type(pid_sort_order, standard) -> standard; +do_check_type(pid_sort_order, "standard") -> standard; +do_check_type(pid_sort_order, _) -> false; +do_check_type(no_table_loaders, N) when is_integer(N), N > 0 -> N; +do_check_type(dc_dump_limit,N) when is_number(N), N > 0 -> N. + +bool(true) -> true; +bool(false) -> false. + +media(disc) -> disc; +media(opt_disc) -> opt_disc; +media(ram) -> ram. + +patch_env(Env, Val) -> + case catch do_check_type(Env, Val) of + {'EXIT', _Reason} -> + {error, {bad_type, Env, Val}}; + NewVal -> + application_controller:set_env(mnesia, Env, NewVal), + NewVal + end. + +detect_partitioned_network(Mon, Node) -> + detect_inconcistency([Node], running_partitioned_network), + unlink(Mon), + exit(normal). + +detect_inconcistency([], _Context) -> + ok; +detect_inconcistency(Nodes, Context) -> + Downs = [N || N <- Nodes, mnesia_recover:has_mnesia_down(N)], + {Replies, _BadNodes} = + rpc:multicall(Downs, ?MODULE, has_remote_mnesia_down, [node()]), + report_inconsistency(Replies, Context, ok). + +has_remote_mnesia_down(Node) -> + HasDown = mnesia_recover:has_mnesia_down(Node), + Master = mnesia_recover:get_master_nodes(schema), + if + HasDown == true, Master == [] -> + {true, node()}; + true -> + {false, node()} + end. + +report_inconsistency([{true, Node} | Replies], Context, _Status) -> + %% Oops, Mnesia is already running on the + %% other node AND we both regard each + %% other as down. The database is + %% potentially inconsistent and we has to + %% do tell the applications about it, so + %% they may perform some clever recovery + %% action. + Msg = {inconsistent_database, Context, Node}, + mnesia_lib:report_system_event(Msg), + report_inconsistency(Replies, Context, inconsistent_database); +report_inconsistency([{false, _Node} | Replies], Context, Status) -> + report_inconsistency(Replies, Context, Status); +report_inconsistency([{badrpc, _Reason} | Replies], Context, Status) -> + report_inconsistency(Replies, Context, Status); +report_inconsistency([], _Context, Status) -> + Status. diff --git a/lib/mnesia/src/mnesia_recover.erl b/lib/mnesia/src/mnesia_recover.erl new file mode 100644 index 0000000000..6c53c2e752 --- /dev/null +++ b/lib/mnesia/src/mnesia_recover.erl @@ -0,0 +1,1196 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 1997-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +%% +-module(mnesia_recover). + +-behaviour(gen_server). + +-export([ + allow_garb/0, + call/1, + connect_nodes/1, + disconnect/1, + dump_decision_tab/0, + get_master_node_info/0, + get_master_node_tables/0, + get_master_nodes/1, + get_mnesia_downs/0, + has_mnesia_down/1, + incr_trans_tid_serial/0, + init/0, + log_decision/1, + log_master_nodes/3, + log_mnesia_down/1, + log_mnesia_up/1, + mnesia_down/1, + note_decision/2, + note_log_decision/2, + outcome/2, + start/0, + start_garb/0, + still_pending/1, + sync_trans_tid_serial/1, + sync/0, + wait_for_decision/2, + what_happened/3 + ]). + +%% gen_server callbacks +-export([init/1, + handle_call/3, + handle_cast/2, + handle_info/2, + terminate/2, + code_change/3 + ]). + + +-include("mnesia.hrl"). +-import(mnesia_lib, [set/2, verbose/2, error/2, fatal/2]). + +-record(state, {supervisor, + unclear_pid, + unclear_decision, + unclear_waitfor, + tm_queue_len = 0, + initiated = false, + early_msgs = [] + }). + +%%-define(DBG(F, A), mnesia:report_event(list_to_atom(lists:flatten(io_lib:format(F, A))))). +%%-define(DBG(F, A), io:format("DBG: " ++ F, A)). + +-record(transient_decision, {tid, outcome}). + +start() -> + gen_server:start_link({local, ?MODULE}, ?MODULE, [self()], + [{timeout, infinity} + %%, {debug, [trace]} + ]). + +init() -> + call(init). + +start_garb() -> + Pid = whereis(mnesia_recover), + {ok, _} = timer:send_interval(timer:minutes(2), Pid, garb_decisions), + {ok, _} = timer:send_interval(timer:seconds(10), Pid, check_overload). + +allow_garb() -> + cast(allow_garb). + + +%% The transaction log has either been swiched (latest -> previous) or +%% there is nothing to be dumped. This means that the previous +%% transaction log only may contain commit records which refers to +%% transactions noted in the last two of the 'Prev' tables. All other +%% tables may now be garbed by 'garb_decisions' (after 2 minutes). +%% Max 10 tables are kept. +do_allow_garb() -> + %% The order of the following stuff is important! + Curr = val(latest_transient_decision), + %% Don't garb small tables, they are created on every + %% dump_log and may be small (empty) for schema transactions + %% which are dumped twice + case ets:info(Curr, size) > 20 of + true -> + Old = val(previous_transient_decisions), + Next = create_transient_decision(), + {Prev, ReallyOld} = sublist([Curr | Old], 10, []), + [?ets_delete_table(Tab) || Tab <- ReallyOld], + set(previous_transient_decisions, Prev), + set(latest_transient_decision, Next); + false -> + ignore + end. + +sublist([H|R], N, Acc) when N > 0 -> + sublist(R, N-1, [H| Acc]); +sublist(List, _N, Acc) -> + {lists:reverse(Acc), List}. + +do_garb_decisions() -> + case val(previous_transient_decisions) of + [First, Second | Rest] -> + set(previous_transient_decisions, [First, Second]), + [?ets_delete_table(Tab) || Tab <- Rest]; + _ -> + ignore + end. + +connect_nodes(Ns) -> + call({connect_nodes, Ns}). + +disconnect(Node) -> + call({disconnect, Node}). + +log_decision(D) -> + cast({log_decision, D}). + +val(Var) -> + case ?catch_val(Var) of + {'EXIT', Reason} -> mnesia_lib:other_val(Var, Reason); + Value -> Value + end. + +call(Msg) -> + Pid = whereis(?MODULE), + case Pid of + undefined -> + {error, {node_not_running, node()}}; + Pid -> + link(Pid), + Res = gen_server:call(Pid, Msg, infinity), + unlink(Pid), + + %% We get an exit signal if server dies + receive + {'EXIT', Pid, _Reason} -> + {error, {node_not_running, node()}} + after 0 -> + Res + end + end. + +multicall(Nodes, Msg) -> + rpc:multicall(Nodes, ?MODULE, call, [Msg]). + +cast(Msg) -> + case whereis(?MODULE) of + undefined -> ignore; + Pid -> gen_server:cast(Pid, Msg) + end. + +abcast(Nodes, Msg) -> + gen_server:abcast(Nodes, ?MODULE, Msg). + +note_decision(Tid, Outcome) -> + Tab = val(latest_transient_decision), + ?ets_insert(Tab, #transient_decision{tid = Tid, outcome = Outcome}). + +note_up(Node, _Date, _Time) -> + ?ets_delete(mnesia_decision, Node). + +note_down(Node, Date, Time) -> + ?ets_insert(mnesia_decision, {mnesia_down, Node, Date, Time}). + +note_master_nodes(Tab, []) -> + ?ets_delete(mnesia_decision, Tab); +note_master_nodes(Tab, Nodes) when is_list(Nodes) -> + Master = {master_nodes, Tab, Nodes}, + ?ets_insert(mnesia_decision, Master). + +note_outcome(D) when D#decision.disc_nodes == [] -> +%% ?DBG("~w: note_tmp_decision: ~w~n", [node(), D]), + note_decision(D#decision.tid, filter_outcome(D#decision.outcome)), + ?ets_delete(mnesia_decision, D#decision.tid); +note_outcome(D) when D#decision.disc_nodes /= [] -> +%% ?DBG("~w: note_decision: ~w~n", [node(), D]), + ?ets_insert(mnesia_decision, D). + +do_log_decision(D) when D#decision.outcome /= unclear -> + OldD = decision(D#decision.tid), + MergedD = merge_decisions(node(), OldD, D), + do_log_decision(MergedD, true, D); +do_log_decision(D) -> + do_log_decision(D, false, undefined). + +do_log_decision(D, DoTell, NodeD) -> + DiscNs = D#decision.disc_nodes -- [node()], + Outcome = D#decision.outcome, + D2 = + case Outcome of + aborted -> D#decision{disc_nodes = DiscNs}; + committed -> D#decision{disc_nodes = DiscNs}; + _ -> D + end, + note_outcome(D2), + case mnesia_monitor:use_dir() of + true -> + mnesia_log:append(latest_log, D2), + if + DoTell == true, Outcome /= unclear -> + tell_im_certain(NodeD#decision.disc_nodes--[node()],D2), + tell_im_certain(NodeD#decision.ram_nodes--[node()], D2); + true -> + ignore + end; + false -> + ignore + end. + +tell_im_certain([], _D) -> + ignore; +tell_im_certain(Nodes, D) -> + Msg = {im_certain, node(), D}, + %% mnesia_lib:verbose("~w: tell: ~w~n", [Msg, Nodes]), + abcast(Nodes, Msg). + +sync() -> + call(sync). + +log_mnesia_up(Node) -> + call({log_mnesia_up, Node}). + +log_mnesia_down(Node) -> + call({log_mnesia_down, Node}). + +get_mnesia_downs() -> + Tab = mnesia_decision, + Pat = {mnesia_down, '_', '_', '_'}, + Downs = ?ets_match_object(Tab, Pat), + [Node || {mnesia_down, Node, _Date, _Time} <- Downs]. + +%% Check if we have got a mnesia_down from Node +has_mnesia_down(Node) -> + case ?ets_lookup(mnesia_decision, Node) of + [{mnesia_down, Node, _Date, _Time}] -> + true; + [] -> + false + end. + +mnesia_down(Node) -> + case ?catch_val(recover_nodes) of + {'EXIT', _} -> + %% Not started yet + ignore; + _ -> + mnesia_lib:del(recover_nodes, Node), + cast({mnesia_down, Node}) + end. + +log_master_nodes(Args, UseDir, IsRunning) -> + if + IsRunning == yes -> + log_master_nodes2(Args, UseDir, IsRunning, ok); + UseDir == false -> + ok; + true -> + Name = latest_log, + Fname = mnesia_log:latest_log_file(), + Exists = mnesia_lib:exists(Fname), + Repair = mnesia:system_info(auto_repair), + OpenArgs = [{file, Fname}, {name, Name}, {repair, Repair}], + case disk_log:open(OpenArgs) of + {ok, Name} -> + log_master_nodes2(Args, UseDir, IsRunning, ok); + {repaired, Name, {recovered, _R}, {badbytes, _B}} + when Exists == true -> + log_master_nodes2(Args, UseDir, IsRunning, ok); + {repaired, Name, {recovered, _R}, {badbytes, _B}} + when Exists == false -> + mnesia_log:write_trans_log_header(), + log_master_nodes2(Args, UseDir, IsRunning, ok); + {error, Reason} -> + {error, Reason} + end + end. + +log_master_nodes2([{Tab, Nodes} | Tail], UseDir, IsRunning, WorstRes) -> + Res = + case IsRunning of + yes -> + R = call({log_master_nodes, Tab, Nodes, UseDir, IsRunning}), + mnesia_controller:master_nodes_updated(Tab, Nodes), + R; + _ -> + do_log_master_nodes(Tab, Nodes, UseDir, IsRunning) + end, + case Res of + ok -> + log_master_nodes2(Tail, UseDir, IsRunning, WorstRes); + {error, Reason} -> + log_master_nodes2(Tail, UseDir, IsRunning, {error, Reason}) + end; +log_master_nodes2([], _UseDir, IsRunning, WorstRes) -> + case IsRunning of + yes -> + WorstRes; + _ -> + disk_log:close(latest_log), + WorstRes + end. + +get_master_node_info() -> + Tab = mnesia_decision, + Pat = {master_nodes, '_', '_'}, + case catch mnesia_lib:db_match_object(ram_copies,Tab, Pat) of + {'EXIT', _} -> + []; + Masters -> + Masters + end. + +get_master_node_tables() -> + Masters = get_master_node_info(), + [Tab || {master_nodes, Tab, _Nodes} <- Masters]. + +get_master_nodes(Tab) -> + case catch ?ets_lookup_element(mnesia_decision, Tab, 3) of + {'EXIT', _} -> []; + Nodes -> Nodes + end. + +%% Determine what has happened to the transaction +what_happened(Tid, Protocol, Nodes) -> + Default = + case Protocol of + asym_trans -> aborted; + _ -> unclear %% sym_trans and sync_sym_trans + end, + This = node(), + case lists:member(This, Nodes) of + true -> + {ok, Outcome} = call({what_happened, Default, Tid}), + Others = Nodes -- [This], + case filter_outcome(Outcome) of + unclear -> what_happened_remotely(Tid, Default, Others); + aborted -> aborted; + committed -> committed + end; + false -> + what_happened_remotely(Tid, Default, Nodes) + end. + +what_happened_remotely(Tid, Default, Nodes) -> + {Replies, _} = multicall(Nodes, {what_happened, Default, Tid}), + check_what_happened(Replies, 0, 0). + +check_what_happened([H | T], Aborts, Commits) -> + case H of + {ok, R} -> + case filter_outcome(R) of + committed -> + check_what_happened(T, Aborts, Commits + 1); + aborted -> + check_what_happened(T, Aborts + 1, Commits); + unclear -> + check_what_happened(T, Aborts, Commits) + end; + {error, _} -> + check_what_happened(T, Aborts, Commits); + {badrpc, _} -> + check_what_happened(T, Aborts, Commits) + end; +check_what_happened([], Aborts, Commits) -> + if + Aborts == 0, Commits == 0 -> aborted; % None of the active nodes knows + Aborts > 0 -> aborted; % Someody has aborted + Aborts == 0, Commits > 0 -> committed % All has committed + end. + +%% Determine what has happened to the transaction +%% and possibly wait forever for the decision. +wait_for_decision(presume_commit, _InitBy) -> + %% sym_trans + {{presume_commit, self()}, committed}; + +wait_for_decision(D, InitBy) when D#decision.outcome == presume_abort -> + wait_for_decision(D, InitBy, 0). + +wait_for_decision(D, InitBy, N) -> + %% asym_trans + Tid = D#decision.tid, + Max = 10, + Outcome = outcome(Tid, D#decision.outcome), + if + Outcome =:= committed -> {Tid, committed}; + Outcome =:= aborted -> {Tid, aborted}; + Outcome =:= presume_abort -> + case N > Max of + true -> {Tid, aborted}; + false -> % busy loop for ets decision moving + timer:sleep(10), + wait_for_decision(D, InitBy, N+1) + end; + InitBy /= startup -> + %% Wait a while for active transactions + %% to end and try again + timer:sleep(100), + wait_for_decision(D, InitBy, N); + InitBy == startup -> + {ok, Res} = call({wait_for_decision, D}), + {Tid, Res} + end. + +still_pending([Tid | Pending]) -> + case filter_outcome(outcome(Tid, unclear)) of + unclear -> [Tid | still_pending(Pending)]; + _ -> still_pending(Pending) + end; +still_pending([]) -> + []. + +load_decision_tab() -> + Cont = mnesia_log:open_decision_tab(), + load_decision_tab(Cont, load_decision_tab), + mnesia_log:close_decision_tab(). + +load_decision_tab(eof, _InitBy) -> + ok; +load_decision_tab(Cont, InitBy) -> + case mnesia_log:chunk_decision_tab(Cont) of + {Cont2, Decisions} -> + note_log_decisions(Decisions, InitBy), + load_decision_tab(Cont2, InitBy); + eof -> + ok + end. + +%% Dumps DECISION.LOG and PDECISION.LOG and removes them. +%% From now on all decisions are logged in the transaction log file +convert_old() -> + HasOldStuff = + mnesia_lib:exists(mnesia_log:previous_decision_log_file()) or + mnesia_lib:exists(mnesia_log:decision_log_file()), + case HasOldStuff of + true -> + mnesia_log:open_decision_log(), + dump_decision_log(startup), + dump_decision_log(startup), + mnesia_log:close_decision_log(), + Latest = mnesia_log:decision_log_file(), + ok = file:delete(Latest); + false -> + ignore + end. + +dump_decision_log(InitBy) -> + %% Assumed to be run in transaction log dumper process + Cont = mnesia_log:prepare_decision_log_dump(), + perform_dump_decision_log(Cont, InitBy). + +perform_dump_decision_log(eof, _InitBy) -> + confirm_decision_log_dump(); +perform_dump_decision_log(Cont, InitBy) when InitBy == startup -> + case mnesia_log:chunk_decision_log(Cont) of + {Cont2, Decisions} -> + note_log_decisions(Decisions, InitBy), + perform_dump_decision_log(Cont2, InitBy); + eof -> + confirm_decision_log_dump() + end; +perform_dump_decision_log(_Cont, _InitBy) -> + confirm_decision_log_dump(). + +confirm_decision_log_dump() -> + dump_decision_tab(), + mnesia_log:confirm_decision_log_dump(). + +dump_decision_tab() -> + Tab = mnesia_decision, + All = mnesia_lib:db_match_object(ram_copies,Tab, '_'), + mnesia_log:save_decision_tab({decision_list, All}). + +note_log_decisions([What | Tail], InitBy) -> + note_log_decision(What, InitBy), + note_log_decisions(Tail, InitBy); +note_log_decisions([], _InitBy) -> + ok. + +note_log_decision(NewD, InitBy) when NewD#decision.outcome == pre_commit -> + note_log_decision(NewD#decision{outcome = unclear}, InitBy); + +note_log_decision(NewD, _InitBy) when is_record(NewD, decision) -> + Tid = NewD#decision.tid, + sync_trans_tid_serial(Tid), + note_outcome(NewD); +note_log_decision({trans_tid, serial, _Serial}, startup) -> + ignore; +note_log_decision({trans_tid, serial, Serial}, _InitBy) -> + sync_trans_tid_serial(Serial); +note_log_decision({mnesia_up, Node, Date, Time}, _InitBy) -> + note_up(Node, Date, Time); +note_log_decision({mnesia_down, Node, Date, Time}, _InitBy) -> + note_down(Node, Date, Time); +note_log_decision({master_nodes, Tab, Nodes}, _InitBy) -> + note_master_nodes(Tab, Nodes); +note_log_decision(H, _InitBy) when H#log_header.log_kind == decision_log -> + V = mnesia_log:decision_log_version(), + if + H#log_header.log_version == V-> + ok; + H#log_header.log_version == "2.0" -> + verbose("Accepting an old version format of decision log: ~p~n", + [V]), + ok; + true -> + fatal("Bad version of decision log: ~p~n", [H]) + end; +note_log_decision(H, _InitBy) when H#log_header.log_kind == decision_tab -> + V = mnesia_log:decision_tab_version(), + if + V == H#log_header.log_version -> + ok; + true -> + fatal("Bad version of decision tab: ~p~n", [H]) + end; +note_log_decision({decision_list, ItemList}, InitBy) -> + note_log_decisions(ItemList, InitBy); +note_log_decision(BadItem, InitBy) -> + exit({"Bad decision log item", BadItem, InitBy}). + +trans_tid_serial() -> + ?ets_lookup_element(mnesia_decision, serial, 3). + +set_trans_tid_serial(Val) -> + ?ets_insert(mnesia_decision, {trans_tid, serial, Val}). + +incr_trans_tid_serial() -> + ?ets_update_counter(mnesia_decision, serial, 1). + +sync_trans_tid_serial(ThatCounter) when is_integer(ThatCounter) -> + ThisCounter = trans_tid_serial(), + if + ThatCounter > ThisCounter -> + set_trans_tid_serial(ThatCounter + 1); + true -> + ignore + end; +sync_trans_tid_serial(Tid) -> + sync_trans_tid_serial(Tid#tid.counter). + + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%% Callback functions from gen_server + +%%---------------------------------------------------------------------- +%% Func: init/1 +%% Returns: {ok, State} | +%% {ok, State, Timeout} | +%% {stop, Reason} +%%---------------------------------------------------------------------- +init([Parent]) -> + process_flag(trap_exit, true), + mnesia_lib:verbose("~p starting: ~p~n", [?MODULE, self()]), + set(latest_transient_decision, create_transient_decision()), + set(previous_transient_decisions, []), + set(recover_nodes, []), + State = #state{supervisor = Parent}, + {ok, State}. + +create_transient_decision() -> + ?ets_new_table(mnesia_transient_decision, [{keypos, 2}, set, public]). + +%%---------------------------------------------------------------------- +%% Func: handle_call/3 +%% Returns: {reply, Reply, State} | +%% {reply, Reply, State, Timeout} | +%% {noreply, State} | +%% {noreply, State, Timeout} | +%% {stop, Reason, Reply, State} | (terminate/2 is called) +%%---------------------------------------------------------------------- + +handle_call(init, From, State) when State#state.initiated == false -> + Args = [{keypos, 2}, set, public, named_table], + case mnesia_monitor:use_dir() of + true -> + ?ets_new_table(mnesia_decision, Args), + set_trans_tid_serial(0), + TabFile = mnesia_log:decision_tab_file(), + case mnesia_lib:exists(TabFile) of + true -> + load_decision_tab(); + false -> + ignore + end, + convert_old(), + mnesia_dumper:opt_dump_log(scan_decisions); + false -> + ?ets_new_table(mnesia_decision, Args), + set_trans_tid_serial(0) + end, + handle_early_msgs(State, From); + +handle_call(Msg, From, State) when State#state.initiated == false -> + %% Buffer early messages + Msgs = State#state.early_msgs, + {noreply, State#state{early_msgs = [{call, Msg, From} | Msgs]}}; + +handle_call({disconnect, Node}, _From, State) -> + mnesia_monitor:disconnect(Node), + mnesia_lib:del(recover_nodes, Node), + {reply, ok, State}; + +handle_call({connect_nodes, Ns}, From, State) -> + %% Determine which nodes we should try to connect + AlreadyConnected = val(recover_nodes), + {_, Nodes} = mnesia_lib:search_delete(node(), Ns), + Check = Nodes -- AlreadyConnected, + case mnesia_monitor:negotiate_protocol(Check) of + busy -> + %% monitor is disconnecting some nodes retry + %% the req (to avoid deadlock). + erlang:send_after(2, self(), {connect_nodes,Ns,From}), + {noreply, State}; + [] -> + %% No good noodes to connect to! + %% We can't use reply here because this function can be + %% called from handle_info + gen_server:reply(From, {[], AlreadyConnected}), + {noreply, State}; + GoodNodes -> + %% Now we have agreed upon a protocol with some new nodes + %% and we may use them when we recover transactions + mnesia_lib:add_list(recover_nodes, GoodNodes), + cast({announce_all, GoodNodes}), + case get_master_nodes(schema) of + [] -> + Context = starting_partitioned_network, + mnesia_monitor:detect_inconcistency(GoodNodes, Context); + _ -> %% If master_nodes is set ignore old inconsistencies + ignore + end, + gen_server:reply(From, {GoodNodes, AlreadyConnected}), + {noreply,State} + end; + +handle_call({what_happened, Default, Tid}, _From, State) -> + sync_trans_tid_serial(Tid), + Outcome = outcome(Tid, Default), + {reply, {ok, Outcome}, State}; + +handle_call({wait_for_decision, D}, From, State) -> + Recov = val(recover_nodes), + AliveRam = (mnesia_lib:intersect(D#decision.ram_nodes, Recov) -- [node()]), + RemoteDisc = D#decision.disc_nodes -- [node()], + if + AliveRam == [], RemoteDisc == [] -> + %% No more else to wait for and we may safely abort + {reply, {ok, aborted}, State}; + true -> + verbose("Transaction ~p is unclear. " + "Wait for disc nodes: ~w ram: ~w~n", + [D#decision.tid, RemoteDisc, AliveRam]), + AliveDisc = mnesia_lib:intersect(RemoteDisc, Recov), + Msg = {what_decision, node(), D}, + abcast(AliveRam, Msg), + abcast(AliveDisc, Msg), + case val(max_wait_for_decision) of + infinity -> + ignore; + MaxWait -> + ForceMsg = {force_decision, D#decision.tid}, + {ok, _} = timer:send_after(MaxWait, ForceMsg) + end, + State2 = State#state{unclear_pid = From, + unclear_decision = D, + unclear_waitfor = (RemoteDisc ++ AliveRam)}, + {noreply, State2} + end; + +handle_call({log_mnesia_up, Node}, _From, State) -> + do_log_mnesia_up(Node), + {reply, ok, State}; + +handle_call({log_mnesia_down, Node}, _From, State) -> + do_log_mnesia_down(Node), + {reply, ok, State}; + +handle_call({log_master_nodes, Tab, Nodes, UseDir, IsRunning}, _From, State) -> + do_log_master_nodes(Tab, Nodes, UseDir, IsRunning), + {reply, ok, State}; + +handle_call(sync, _From, State) -> + {reply, ok, State}; + +handle_call(Msg, _From, State) -> + error("~p got unexpected call: ~p~n", [?MODULE, Msg]), + {noreply, State}. + +do_log_mnesia_up(Node) -> + Yoyo = {mnesia_up, Node, Date = date(), Time = time()}, + case mnesia_monitor:use_dir() of + true -> + mnesia_log:append(latest_log, Yoyo), + disk_log:sync(latest_log); + false -> + ignore + end, + note_up(Node, Date, Time). + +do_log_mnesia_down(Node) -> + Yoyo = {mnesia_down, Node, Date = date(), Time = time()}, + case mnesia_monitor:use_dir() of + true -> + mnesia_log:append(latest_log, Yoyo), + disk_log:sync(latest_log); + false -> + ignore + end, + note_down(Node, Date, Time). + +do_log_master_nodes(Tab, Nodes, UseDir, IsRunning) -> + Master = {master_nodes, Tab, Nodes}, + Res = + case UseDir of + true -> + LogRes = mnesia_log:append(latest_log, Master), + disk_log:sync(latest_log), + LogRes; + false -> + ok + end, + case IsRunning of + yes -> + note_master_nodes(Tab, Nodes); + _NotRunning -> + ignore + end, + Res. + +%%---------------------------------------------------------------------- +%% Func: handle_cast/2 +%% Returns: {noreply, State} | +%% {noreply, State, Timeout} | +%% {stop, Reason, State} (terminate/2 is called) +%%---------------------------------------------------------------------- + +handle_cast(Msg, State) when State#state.initiated == false -> + %% Buffer early messages + Msgs = State#state.early_msgs, + {noreply, State#state{early_msgs = [{cast, Msg} | Msgs]}}; + +handle_cast({im_certain, Node, NewD}, State) -> + OldD = decision(NewD#decision.tid), + MergedD = merge_decisions(Node, OldD, NewD), + do_log_decision(MergedD, false, undefined), + {noreply, State}; + +handle_cast({log_decision, D}, State) -> + do_log_decision(D), + {noreply, State}; + +handle_cast(allow_garb, State) -> + do_allow_garb(), + {noreply, State}; + +handle_cast({decisions, Node, Decisions}, State) -> + mnesia_lib:add(recover_nodes, Node), + State2 = add_remote_decisions(Node, Decisions, State), + {noreply, State2}; + +handle_cast({what_decision, Node, OtherD}, State) -> + Tid = OtherD#decision.tid, + sync_trans_tid_serial(Tid), + Decision = + case decision(Tid) of + no_decision -> OtherD; + MyD when is_record(MyD, decision) -> MyD + end, + announce([Node], [Decision], [], true), + {noreply, State}; + +handle_cast({mnesia_down, Node}, State) -> + case State#state.unclear_decision of + undefined -> + {noreply, State}; + D -> + case lists:member(Node, D#decision.ram_nodes) of + false -> + {noreply, State}; + true -> + State2 = add_remote_decision(Node, D, State), + {noreply, State2} + end + end; + +handle_cast({announce_all, Nodes}, State) -> + announce_all(Nodes), + {noreply, State}; + +handle_cast(Msg, State) -> + error("~p got unexpected cast: ~p~n", [?MODULE, Msg]), + {noreply, State}. + +%%---------------------------------------------------------------------- +%% Func: handle_info/2 +%% Returns: {noreply, State} | +%% {noreply, State, Timeout} | +%% {stop, Reason, State} (terminate/2 is called) +%%---------------------------------------------------------------------- + +%% No need for buffering +%% handle_info(Msg, State) when State#state.initiated == false -> +%% %% Buffer early messages +%% Msgs = State#state.early_msgs, +%% {noreply, State#state{early_msgs = [{info, Msg} | Msgs]}}; + +handle_info({connect_nodes, Ns, From}, State) -> + handle_call({connect_nodes,Ns},From,State); + +handle_info(check_overload, S) -> + %% Time to check if mnesia_tm is overloaded + case whereis(mnesia_tm) of + Pid when is_pid(Pid) -> + + Threshold = 100, + Prev = S#state.tm_queue_len, + {message_queue_len, Len} = + process_info(Pid, message_queue_len), + if + Len > Threshold, Prev > Threshold -> + What = {mnesia_tm, message_queue_len, [Prev, Len]}, + mnesia_lib:report_system_event({mnesia_overload, What}), + {noreply, S#state{tm_queue_len = 0}}; + + Len > Threshold -> + {noreply, S#state{tm_queue_len = Len}}; + + true -> + {noreply, S#state{tm_queue_len = 0}} + end; + undefined -> + {noreply, S} + end; + +handle_info(garb_decisions, State) -> + do_garb_decisions(), + {noreply, State}; + +handle_info({force_decision, Tid}, State) -> + %% Enforce a transaction recovery decision, + %% if we still are waiting for the outcome + + case State#state.unclear_decision of + U when U#decision.tid == Tid -> + verbose("Decided to abort transaction ~p since " + "max_wait_for_decision has been exceeded~n", + [Tid]), + D = U#decision{outcome = aborted}, + State2 = add_remote_decision(node(), D, State), + {noreply, State2}; + _ -> + {noreply, State} + end; + +handle_info({'EXIT', Pid, R}, State) when Pid == State#state.supervisor -> + mnesia_lib:dbg_out("~p was ~p~n",[?MODULE, R]), + {stop, shutdown, State}; + +handle_info(Msg, State) -> + error("~p got unexpected info: ~p~n", [?MODULE, Msg]), + {noreply, State}. + +%%---------------------------------------------------------------------- +%% Func: terminate/2 +%% Purpose: Shutdown the server +%% Returns: any (ignored by gen_server) +%%---------------------------------------------------------------------- + +terminate(Reason, State) -> + mnesia_monitor:terminate_proc(?MODULE, Reason, State). + +%%---------------------------------------------------------------------- +%% Func: code_change/3 +%% Purpose: Upgrade process when its code is to be changed +%% Returns: {ok, NewState} +%%---------------------------------------------------------------------- +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + +%%%---------------------------------------------------------------------- +%%% Internal functions +%%%---------------------------------------------------------------------- + +handle_early_msgs(State, From) -> + Res = do_handle_early_msgs(State#state.early_msgs, + State#state{early_msgs = [], + initiated = true}), + gen_server:reply(From, ok), + Res. + +do_handle_early_msgs([Msg | Msgs], State) -> + %% The messages are in reverted order + case do_handle_early_msgs(Msgs, State) of +%% {stop, Reason, Reply, State2} -> +%% {stop, Reason, Reply, State2}; + {stop, Reason, State2} -> + {stop, Reason, State2}; + {noreply, State2} -> + handle_early_msg(Msg, State2) + end; + +do_handle_early_msgs([], State) -> + {noreply, State}. + +handle_early_msg({call, Msg, From}, State) -> + case handle_call(Msg, From, State) of + {reply, R, S} -> + gen_server:reply(From, R), + {noreply, S}; + Other -> + Other + end; +handle_early_msg({cast, Msg}, State) -> + handle_cast(Msg, State); +handle_early_msg({info, Msg}, State) -> + handle_info(Msg, State). + +tabs() -> + Curr = val(latest_transient_decision), % Do not miss any trans even + Prev = val(previous_transient_decisions), % if the tabs are switched + [Curr, mnesia_decision | Prev]. % Ordered by hit probability + +decision(Tid) -> + decision(Tid, tabs()). + +decision(Tid, [Tab | Tabs]) -> + case catch ?ets_lookup(Tab, Tid) of + [D] when is_record(D, decision) -> + D; + [C] when is_record(C, transient_decision) -> + #decision{tid = C#transient_decision.tid, + outcome = C#transient_decision.outcome, + disc_nodes = [], + ram_nodes = [] + }; + [] -> + decision(Tid, Tabs); + {'EXIT', _} -> + %% Recently switched transient decision table + decision(Tid, Tabs) + end; +decision(_Tid, []) -> + no_decision. + +outcome(Tid, Default) -> + outcome(Tid, Default, tabs()). + +outcome(Tid, Default, [Tab | Tabs]) -> + case catch ?ets_lookup_element(Tab, Tid, 3) of + {'EXIT', _} -> + outcome(Tid, Default, Tabs); + Val -> + Val + end; +outcome(_Tid, Default, []) -> + Default. + +filter_outcome(Val) -> + case Val of + unclear -> unclear; + aborted -> aborted; + presume_abort -> aborted; + committed -> committed; + pre_commit -> unclear + end. + +filter_aborted(D) when D#decision.outcome == presume_abort -> + D#decision{outcome = aborted}; +filter_aborted(D) -> + D. + +%% Merge old decision D with new (probably remote) decision +merge_decisions(Node, D, NewD0) -> + NewD = filter_aborted(NewD0), + if + D == no_decision, node() /= Node -> + %% We did not know anything about this txn + NewD#decision{disc_nodes = []}; + D == no_decision -> + NewD; + is_record(D, decision) -> + DiscNs = D#decision.disc_nodes -- ([node(), Node]), + OldD = filter_aborted(D#decision{disc_nodes = DiscNs}), +%% mnesia_lib:dbg_out("merge ~w: NewD = ~w~n D = ~w~n OldD = ~w~n", +%% [Node, NewD, D, OldD]), + if + OldD#decision.outcome == unclear, + NewD#decision.outcome == unclear -> + D; + + OldD#decision.outcome == NewD#decision.outcome -> + %% We have come to the same decision + OldD; + + OldD#decision.outcome == committed, + NewD#decision.outcome == aborted -> + %% Interesting! We have already committed, + %% but someone else has aborted. Now we + %% have a nice little inconcistency. The + %% other guy (or some one else) has + %% enforced a recovery decision when + %% max_wait_for_decision was exceeded. + %% We will pretend that we have obeyed + %% the forced recovery decision, but we + %% will also generate an event in case the + %% application wants to do something clever. + Msg = {inconsistent_database, bad_decision, Node}, + mnesia_lib:report_system_event(Msg), + OldD#decision{outcome = aborted}; + + OldD#decision.outcome == aborted -> + %% aborted overrrides anything + OldD#decision{outcome = aborted}; + + NewD#decision.outcome == aborted -> + %% aborted overrrides anything + OldD#decision{outcome = aborted}; + + OldD#decision.outcome == committed, + NewD#decision.outcome == unclear -> + %% committed overrides unclear + OldD#decision{outcome = committed}; + + OldD#decision.outcome == unclear, + NewD#decision.outcome == committed -> + %% committed overrides unclear + OldD#decision{outcome = committed} + end + end. + +add_remote_decisions(Node, [D | Tail], State) when is_record(D, decision) -> + State2 = add_remote_decision(Node, D, State), + add_remote_decisions(Node, Tail, State2); + +add_remote_decisions(Node, [C | Tail], State) + when is_record(C, transient_decision) -> + D = #decision{tid = C#transient_decision.tid, + outcome = C#transient_decision.outcome, + disc_nodes = [], + ram_nodes = []}, + State2 = add_remote_decision(Node, D, State), + add_remote_decisions(Node, Tail, State2); + +add_remote_decisions(Node, [{mnesia_down, _, _, _} | Tail], State) -> + add_remote_decisions(Node, Tail, State); + +add_remote_decisions(Node, [{trans_tid, serial, Serial} | Tail], State) -> + sync_trans_tid_serial(Serial), + case State#state.unclear_decision of + undefined -> + ignored; + D -> + case lists:member(Node, D#decision.ram_nodes) of + true -> + ignore; + false -> + abcast([Node], {what_decision, node(), D}) + end + end, + add_remote_decisions(Node, Tail, State); + +add_remote_decisions(_Node, [], State) -> + State. + +add_remote_decision(Node, NewD, State) -> + Tid = NewD#decision.tid, + OldD = decision(Tid), + D = merge_decisions(Node, OldD, NewD), + do_log_decision(D, false, undefined), + Outcome = D#decision.outcome, + if + OldD == no_decision -> + ignore; + Outcome == unclear -> + ignore; + true -> + case lists:member(node(), NewD#decision.disc_nodes) or + lists:member(node(), NewD#decision.ram_nodes) of + true -> + tell_im_certain([Node], D); + false -> + ignore + end + end, + case State#state.unclear_decision of + U when U#decision.tid == Tid -> + WaitFor = State#state.unclear_waitfor -- [Node], + if + Outcome == unclear, WaitFor == [] -> + %% Everybody are uncertain, lets abort + NewOutcome = aborted, + CertainD = D#decision{outcome = NewOutcome, + disc_nodes = [], + ram_nodes = []}, + tell_im_certain(D#decision.disc_nodes, CertainD), + tell_im_certain(D#decision.ram_nodes, CertainD), + do_log_decision(CertainD, false, undefined), + verbose("Decided to abort transaction ~p " + "since everybody are uncertain ~p~n", + [Tid, CertainD]), + gen_server:reply(State#state.unclear_pid, {ok, NewOutcome}), + State#state{unclear_pid = undefined, + unclear_decision = undefined, + unclear_waitfor = undefined}; + Outcome /= unclear -> + verbose("~p told us that transaction ~p was ~p~n", + [Node, Tid, Outcome]), + gen_server:reply(State#state.unclear_pid, {ok, Outcome}), + State#state{unclear_pid = undefined, + unclear_decision = undefined, + unclear_waitfor = undefined}; + Outcome == unclear -> + State#state{unclear_waitfor = WaitFor} + end; + _ -> + State + end. + +announce_all([]) -> + ok; +announce_all(ToNodes) -> + Tid = trans_tid_serial(), + announce(ToNodes, [{trans_tid,serial,Tid}], [], false). + +announce(ToNodes, [Head | Tail], Acc, ForceSend) -> + Acc2 = arrange(ToNodes, Head, Acc, ForceSend), + announce(ToNodes, Tail, Acc2, ForceSend); + +announce(_ToNodes, [], Acc, _ForceSend) -> + send_decisions(Acc). + +send_decisions([{Node, Decisions} | Tail]) -> + abcast([Node], {decisions, node(), Decisions}), + send_decisions(Tail); +send_decisions([]) -> + ok. + +arrange([To | ToNodes], D, Acc, ForceSend) when is_record(D, decision) -> + NeedsAdd = (ForceSend or + lists:member(To, D#decision.disc_nodes) or + lists:member(To, D#decision.ram_nodes)), + case NeedsAdd of + true -> + Acc2 = add_decision(To, D, Acc), + arrange(ToNodes, D, Acc2, ForceSend); + false -> + arrange(ToNodes, D, Acc, ForceSend) + end; + +arrange([To | ToNodes], {trans_tid, serial, Serial}, Acc, ForceSend) -> + %% Do the lamport thing plus release the others + %% from uncertainity. + Acc2 = add_decision(To, {trans_tid, serial, Serial}, Acc), + arrange(ToNodes, {trans_tid, serial, Serial}, Acc2, ForceSend); + +arrange([], _Decision, Acc, _ForceSend) -> + Acc. + +add_decision(Node, Decision, [{Node, Decisions} | Tail]) -> + [{Node, [Decision | Decisions]} | Tail]; +add_decision(Node, Decision, [Head | Tail]) -> + [Head | add_decision(Node, Decision, Tail)]; +add_decision(Node, Decision, []) -> + [{Node, [Decision]}]. + diff --git a/lib/mnesia/src/mnesia_registry.erl b/lib/mnesia/src/mnesia_registry.erl new file mode 100644 index 0000000000..9805d48697 --- /dev/null +++ b/lib/mnesia/src/mnesia_registry.erl @@ -0,0 +1,280 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 1998-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +%% +-module(mnesia_registry). + +%%%---------------------------------------------------------------------- +%%% File : mnesia_registry.erl +%%% Purpose : Support dump and restore of a registry on a C-node +%%% This is an OTP internal module and is not public available. +%%% +%%% Example : Dump some hardcoded records into the Mnesia table Tab +%%% +%%% case rpc:call(Node, mnesia_registry, start_dump, [Tab, self()]) of +%%% Pid when pid(Pid) -> +%%% Pid ! {write, key1, key_size1, val_type1, val_size1, val1}, +%%% Pid ! {delete, key3}, +%%% Pid ! {write, key2, key_size2, val_type2, val_size2, val2}, +%%% Pid ! {write, key4, key_size4, val_type4, val_size4, val4}, +%%% Pid ! {commit, self()}, +%%% receive +%%% {ok, Pid} -> +%%% ok; +%%% {'EXIT', Pid, Reason} -> +%%% exit(Reason) +%%% end; +%%% {badrpc, Reason} -> +%%% exit(Reason) +%%% end. +%%% +%%% Example : Restore the corresponding Mnesia table Tab +%%% +%%% case rpc:call(Node, mnesia_registry, start_restore, [Tab, self()]) of +%%% {size, Pid, N, LargestKey, LargestVal} -> +%%% Pid ! {send_records, self()}, +%%% Fun = fun() -> +%%% receive +%%% {restore, KeySize, ValSize, ValType, Key, Val} -> +%%% {Key, Val}; +%%% {'EXIT', Pid, Reason} -> +%%% exit(Reason) +%%% end +%%% end, +%%% lists:map(Fun, lists:seq(1, N)); +%%% {badrpc, Reason} -> +%%% exit(Reason) +%%% end. +%%% +%%%---------------------------------------------------------------------- + +%% External exports +-export([start_dump/2, start_restore/2]). +-export([create_table/1, create_table/2]). + +%% Internal exports +-export([init/4]). + +-record(state, {table, ops = [], link_to}). + +-record(registry_entry, {key, key_size, val_type, val_size, val}). + +-record(size, {pid = self(), n_values = 0, largest_key = 0, largest_val = 0}). + +%%%---------------------------------------------------------------------- +%%% Client +%%%---------------------------------------------------------------------- + +start(Type, Tab, LinkTo) -> + Starter = self(), + Args = [Type, Starter, LinkTo, Tab], + Pid = spawn_link(?MODULE, init, Args), + %% The receiver process may unlink the current process + receive + {ok, Res} -> + Res; + {'EXIT', Pid, Reason} when LinkTo == Starter -> + exit(Reason) + end. + +%% Starts a receiver process and optionally creates a Mnesia table +%% with suitable default values. Returns the Pid of the receiver process +%% +%% The receiver process accumulates Mnesia operations and performs +%% all operations or none at commit. The understood messages are: +%% +%% {write, Key, KeySize, ValType, ValSize, Val} -> +%% accumulates mnesia:write({Tab, Key, KeySize, ValType, ValSize, Val}) +%% (no reply) +%% {delete, Key} -> +%% accumulates mnesia:delete({Tab, Key}) (no reply) +%% {commit, ReplyTo} -> +%% commits all accumulated operations +%% and stops the process (replies {ok, Pid}) +%% abort -> +%% stops the process (no reply) +%% +%% The receiver process is linked to the process with the process identifier +%% LinkTo. If some error occurs the receiver process will invoke exit(Reason) +%% and it is up to he LinkTo process to act properly when it receives an exit +%% signal. + +start_dump(Tab, LinkTo) -> + start(dump, Tab, LinkTo). + +%% Starts a sender process which sends restore messages back to the +%% LinkTo process. But first are some statistics about the table +%% determined and returned as a 5-tuple: +%% +%% {size, SenderPid, N, LargestKeySize, LargestValSize} +%% +%% where N is the number of records in the table. Then the sender process +%% waits for a 2-tuple message: +%% +%% {send_records, ReplyTo} +%% +%% At last N 6-tuple messages is sent to the ReplyTo process: +%% +%% ReplyTo ! {restore, KeySize, ValSize, ValType, Key, Val} +%% +%% If some error occurs the receiver process will invoke exit(Reason) +%% and it is up to he LinkTo process to act properly when it receives an +%% exit signal. + +start_restore(Tab, LinkTo) -> + start(restore, Tab, LinkTo). + + +%% Optionally creates the Mnesia table Tab with suitable default values. +%% Returns ok or EXIT's +create_table(Tab) -> + Storage = mnesia:table_info(schema, storage_type), + create_table(Tab, [{Storage, [node()]}]). + +create_table(Tab, TabDef) -> + Attrs = record_info(fields, registry_entry), + case mnesia:create_table(Tab, [{attributes, Attrs} | TabDef]) of + {atomic, ok} -> + ok; + {aborted, {already_exists, Tab}} -> + ok; + {aborted, Reason} -> + exit(Reason) + end. + +%%%---------------------------------------------------------------------- +%%% Server +%%%---------------------------------------------------------------------- + +init(Type, Starter, LinkTo, Tab) -> + if + LinkTo /= Starter -> + link(LinkTo), + unlink(Starter); + true -> + ignore + end, + case Type of + dump -> + Starter ! {ok, self()}, + dump_loop(#state{table = Tab, link_to = LinkTo}); + restore -> + restore_table(Tab, Starter, LinkTo) + end. + +%%%---------------------------------------------------------------------- +%%% Dump loop +%%%---------------------------------------------------------------------- + +dump_loop(S) -> + Tab = S#state.table, + Ops = S#state.ops, + receive + {write, Key, KeySize, ValType, ValSize, Val} -> + RE = #registry_entry{key = Key, + key_size = KeySize, + val_type = ValType, + val_size = ValSize, + val = Val}, + dump_loop(S#state{ops = [{write, RE} | Ops]}); + {delete, Key} -> + dump_loop(S#state{ops = [{delete, Key} | Ops]}); + {commit, ReplyTo} -> + create_table(Tab), + RecName = mnesia:table_info(Tab, record_name), + %% The Ops are in reverse order, but there is no need + %% for reversing the list of accumulated operations + case mnesia:transaction(fun handle_ops/3, [Tab, RecName, Ops]) of + {atomic, ok} -> + ReplyTo ! {ok, self()}, + stop(S#state.link_to); + {aborted, Reason} -> + exit({aborted, Reason}) + end; + abort -> + stop(S#state.link_to); + BadMsg -> + exit({bad_message, BadMsg}) + end. + +stop(LinkTo) -> + unlink(LinkTo), + exit(normal). + +%% Grab a write lock for the entire table +%% and iterate over all accumulated operations +handle_ops(Tab, RecName, Ops) -> + mnesia:write_lock_table(Tab), + do_handle_ops(Tab, RecName, Ops). + +do_handle_ops(Tab, RecName, [{write, RegEntry} | Ops]) -> + Record = setelement(1, RegEntry, RecName), + mnesia:write(Tab, Record, write), + do_handle_ops(Tab, RecName, Ops); +do_handle_ops(Tab, RecName, [{delete, Key} | Ops]) -> + mnesia:delete(Tab, Key, write), + do_handle_ops(Tab, RecName, Ops); +do_handle_ops(_Tab, _RecName, []) -> + ok. + +%%%---------------------------------------------------------------------- +%%% Restore table +%%%---------------------------------------------------------------------- + +restore_table(Tab, Starter, LinkTo) -> + Pat = mnesia:table_info(Tab, wild_pattern), + Fun = fun() -> mnesia:match_object(Tab, Pat, read) end, + case mnesia:transaction(Fun) of + {atomic, AllRecords} -> + Size = calc_size(AllRecords, #size{}), + Starter ! {ok, Size}, + receive + {send_records, ReplyTo} -> + send_records(AllRecords, ReplyTo), + unlink(LinkTo), + exit(normal); + BadMsg -> + exit({bad_message, BadMsg}) + end; + {aborted, Reason} -> + exit(Reason) + end. + +calc_size([H | T], S) -> + KeySize = max(element(#registry_entry.key_size, H), S#size.largest_key), + ValSize = max(element(#registry_entry.val_size, H), S#size.largest_val), + N = S#size.n_values + 1, + calc_size(T, S#size{n_values = N, largest_key = KeySize, largest_val = ValSize}); +calc_size([], Size) -> + Size. + +max(New, Old) when New > Old -> New; +max(_New, Old) -> Old. + +send_records([H | T], ReplyTo) -> + KeySize = element(#registry_entry.key_size, H), + ValSize = element(#registry_entry.val_size, H), + ValType = element(#registry_entry.val_type, H), + Key = element(#registry_entry.key, H), + Val = element(#registry_entry.val, H), + ReplyTo ! {restore, KeySize, ValSize, ValType, Key, Val}, + send_records(T, ReplyTo); +send_records([], _ReplyTo) -> + ok. + diff --git a/lib/mnesia/src/mnesia_schema.erl b/lib/mnesia/src/mnesia_schema.erl new file mode 100644 index 0000000000..354431a296 --- /dev/null +++ b/lib/mnesia/src/mnesia_schema.erl @@ -0,0 +1,3027 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 1996-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +%% +%% In this module we provide a number of explicit functions +%% to maninpulate the schema. All these functions are called +%% within a special schema transaction. +%% +%% We also have an init/1 function defined here, this func is +%% used by mnesia:start() to initialize the entire schema. + +-module(mnesia_schema). + +-export([ + add_snmp/2, + add_table_copy/3, + add_table_index/2, + arrange_restore/3, + attr_tab_to_pos/2, + attr_to_pos/2, + change_table_copy_type/3, + change_table_access_mode/2, + change_table_load_order/2, + change_table_frag/2, + clear_table/1, + create_table/1, + cs2list/1, + del_snmp/1, + del_table_copy/2, + del_table_index/2, + delete_cstruct/2, + delete_schema/1, + delete_schema2/0, + delete_table/1, + delete_table_property/2, + dump_tables/1, + ensure_no_schema/1, + get_create_list/1, + get_initial_schema/2, + get_table_properties/1, + info/0, + info/1, + init/1, + insert_cstruct/3, + is_remote_member/1, + list2cs/1, + lock_schema/0, + merge_schema/0, + move_table/3, + opt_create_dir/2, + prepare_commit/3, + purge_dir/2, + purge_tmp_files/0, + ram_delete_table/2, +% ram_delete_table/3, + read_cstructs_from_disc/0, + read_nodes/0, + remote_read_schema/0, + restore/1, + restore/2, + restore/3, + schema_coordinator/3, + set_where_to_read/3, + transform_table/4, + undo_prepare_commit/2, + unlock_schema/0, + version/0, + write_table_property/2 + ]). + +%% Exports for mnesia_frag +-export([ + get_tid_ts_and_lock/2, + make_create_table/1, + ensure_active/1, + pick/4, + verify/3, + incr_version/1, + check_keys/3, + check_duplicates/2, + make_delete_table/2 + ]). + +%% Needed outside to be able to use/set table_properties +%% from user (not supported) +-export([schema_transaction/1, + insert_schema_ops/2, + do_create_table/1, + do_delete_table/1, + do_read_table_property/2, + do_delete_table_property/2, + do_write_table_property/2]). + +-include("mnesia.hrl"). +-include_lib("kernel/include/file.hrl"). + +-import(mnesia_lib, [set/2, del/2, verbose/2, dbg_out/2]). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Here comes the init function which also resides in +%% this module, it is called upon by the trans server +%% at startup of the system +%% +%% We have a meta table which looks like +%% {table, schema, +%% {type, set}, +%% {disc_copies, all}, +%% {arity, 2} +%% {attributes, [key, val]} +%% +%% This means that we have a series of {schema, Name, Cs} tuples +%% in a table called schema !! + +init(IgnoreFallback) -> + Res = read_schema(true, IgnoreFallback), + {ok, Source, _CreateList} = exit_on_error(Res), + verbose("Schema initiated from: ~p~n", [Source]), + set({schema, tables}, []), + set({schema, local_tables}, []), + Tabs = set_schema(?ets_first(schema)), + lists:foreach(fun(Tab) -> clear_whereabouts(Tab) end, Tabs), + set({schema, where_to_read}, node()), + set({schema, load_node}, node()), + set({schema, load_reason}, initial), + mnesia_controller:add_active_replica(schema, node()). + +exit_on_error({error, Reason}) -> + exit(Reason); +exit_on_error(GoodRes) -> + GoodRes. + +val(Var) -> + case ?catch_val(Var) of + {'EXIT', Reason} -> mnesia_lib:other_val(Var, Reason); + Value -> Value + end. + +%% This function traverses all cstructs in the schema and +%% sets all values in mnesia_gvar accordingly for each table/cstruct + +set_schema('$end_of_table') -> + []; +set_schema(Tab) -> + do_set_schema(Tab), + [Tab | set_schema(?ets_next(schema, Tab))]. + +get_create_list(Tab) -> + ?ets_lookup_element(schema, Tab, 3). + +do_set_schema(Tab) -> + List = get_create_list(Tab), + Cs = list2cs(List), + do_set_schema(Tab, Cs). + +do_set_schema(Tab, Cs) -> + Type = Cs#cstruct.type, + set({Tab, setorbag}, Type), + set({Tab, local_content}, Cs#cstruct.local_content), + set({Tab, ram_copies}, Cs#cstruct.ram_copies), + set({Tab, disc_copies}, Cs#cstruct.disc_copies), + set({Tab, disc_only_copies}, Cs#cstruct.disc_only_copies), + set({Tab, load_order}, Cs#cstruct.load_order), + set({Tab, access_mode}, Cs#cstruct.access_mode), + set({Tab, snmp}, Cs#cstruct.snmp), + set({Tab, user_properties}, Cs#cstruct.user_properties), + [set({Tab, user_property, element(1, P)}, P) || P <- Cs#cstruct.user_properties], + set({Tab, frag_properties}, Cs#cstruct.frag_properties), + mnesia_frag:set_frag_hash(Tab, Cs#cstruct.frag_properties), + set({Tab, attributes}, Cs#cstruct.attributes), + Arity = length(Cs#cstruct.attributes) + 1, + set({Tab, arity}, Arity), + RecName = Cs#cstruct.record_name, + set({Tab, record_name}, RecName), + set({Tab, record_validation}, {RecName, Arity, Type}), + set({Tab, wild_pattern}, wild(RecName, Arity)), + set({Tab, index}, Cs#cstruct.index), + %% create actual index tabs later + set({Tab, cookie}, Cs#cstruct.cookie), + set({Tab, version}, Cs#cstruct.version), + set({Tab, cstruct}, Cs), + Storage = mnesia_lib:schema_cs_to_storage_type(node(), Cs), + set({Tab, storage_type}, Storage), + mnesia_lib:add({schema, tables}, Tab), + Ns = mnesia_lib:cs_to_nodes(Cs), + case lists:member(node(), Ns) of + true -> + mnesia_lib:add({schema, local_tables}, Tab); + false when Tab == schema -> + mnesia_lib:add({schema, local_tables}, Tab); + false -> + ignore + end. + +wild(RecName, Arity) -> + Wp0 = list_to_tuple(lists:duplicate(Arity, '_')), + setelement(1, Wp0, RecName). + +%% Temporarily read the local schema and return a list +%% of all nodes mentioned in the schema.DAT file +read_nodes() -> + %% Ensure that we access the intended Mnesia + %% directory. This function may not be called + %% during startup since it will cause the + %% application_controller to get into deadlock + case mnesia_lib:ensure_loaded(?APPLICATION) of + ok -> + case read_schema(false) of + {ok, _Source, CreateList} -> + Cs = list2cs(CreateList), + {ok, Cs#cstruct.disc_copies ++ Cs#cstruct.ram_copies}; + {error, Reason} -> + {error, Reason} + end; + {error, Reason} -> + {error, Reason} + end. + +%% Returns Version from the tuple {Version,MasterNodes} +version() -> + case read_schema(false) of + {ok, Source, CreateList} when Source /= default -> + Cs = list2cs(CreateList), + {Version, _Details} = Cs#cstruct.version, + Version; + _ -> + case dir_exists(mnesia_lib:dir()) of + true -> {1,0}; + false -> {0,0} + end + end. + +%% Calculate next table version from old cstruct +incr_version(Cs) -> + {{Major, Minor}, _} = Cs#cstruct.version, + Nodes = mnesia_lib:intersect(val({schema, disc_copies}), + mnesia_lib:cs_to_nodes(Cs)), + V = + case Nodes -- val({Cs#cstruct.name, active_replicas}) of + [] -> {Major + 1, 0}; % All replicas are active + _ -> {Major, Minor + 1} % Some replicas are inactive + end, + Cs#cstruct{version = {V, {node(), now()}}}. + +%% Returns table name +insert_cstruct(Tid, Cs, KeepWhereabouts) -> + Tab = Cs#cstruct.name, + TabDef = cs2list(Cs), + Val = {schema, Tab, TabDef}, + mnesia_checkpoint:tm_retain(Tid, schema, Tab, write), + mnesia_subscr:report_table_event(schema, Tid, Val, write), + Active = val({Tab, active_replicas}), + + case KeepWhereabouts of + true -> + ignore; + false when Active == [] -> + clear_whereabouts(Tab); + false -> + %% Someone else has initiated table + ignore + end, + set({Tab, cstruct}, Cs), + ?ets_insert(schema, Val), + do_set_schema(Tab, Cs), + Val. + +clear_whereabouts(Tab) -> + set({Tab, checkpoints}, []), + set({Tab, subscribers}, []), + set({Tab, where_to_read}, nowhere), + set({Tab, active_replicas}, []), + set({Tab, commit_work}, []), + set({Tab, where_to_write}, []), + set({Tab, where_to_commit}, []), + set({Tab, load_by_force}, false), + set({Tab, load_node}, unknown), + set({Tab, load_reason}, unknown). + +%% Returns table name +delete_cstruct(Tid, Cs) -> + Tab = Cs#cstruct.name, + TabDef = cs2list(Cs), + Val = {schema, Tab, TabDef}, + mnesia_checkpoint:tm_retain(Tid, schema, Tab, delete), + mnesia_subscr:report_table_event(schema, Tid, Val, delete), + mnesia_controller:update( + fun() -> + ?ets_match_delete(mnesia_gvar, {{Tab, '_'}, '_'}), + ?ets_match_delete(mnesia_gvar, {{Tab, '_', '_'}, '_'}), + del({schema, local_tables}, Tab), + del({schema, tables}, Tab), + ?ets_delete(schema, Tab) + end), + Val. + +%% Delete the Mnesia directory on all given nodes +%% Requires that Mnesia is not running anywhere +%% Returns ok | {error,Reason} +delete_schema(Ns) when is_list(Ns), Ns /= [] -> + RunningNs = mnesia_lib:running_nodes(Ns), + Reason = "Cannot delete schema on all nodes", + if + RunningNs == [] -> + case rpc:multicall(Ns, ?MODULE, delete_schema2, []) of + {Replies, []} -> + case [R || R <- Replies, R /= ok] of + [] -> + ok; + BadReplies -> + verbose("~s: ~p~n", [Reason, BadReplies]), + {error, {"All nodes not running", BadReplies}} + end; + {_Replies, BadNs} -> + verbose("~s: ~p~n", [Reason, BadNs]), + {error, {"All nodes not running", BadNs}} + end; + true -> + verbose("~s: ~p~n", [Reason, RunningNs]), + {error, {"Mnesia is not stopped everywhere", RunningNs}} + end; +delete_schema(Ns) -> + {error, {badarg, Ns}}. + +delete_schema2() -> + %% Ensure that we access the intended Mnesia + %% directory. This function may not be called + %% during startup since it will cause the + %% application_controller to get into deadlock + case mnesia_lib:ensure_loaded(?APPLICATION) of + ok -> + case mnesia_lib:is_running() of + no -> + Dir = mnesia_lib:dir(), + purge_dir(Dir, []), + ok; + _ -> + {error, {"Mnesia still running", node()}} + end; + {error, Reason} -> + {error, Reason} + end. + +ensure_no_schema([H|T]) when is_atom(H) -> + case rpc:call(H, ?MODULE, remote_read_schema, []) of + {badrpc, Reason} -> + {H, {"All nodes not running", H, Reason}}; + {ok,Source, _} when Source /= default -> + {H, {already_exists, H}}; + _ -> + ensure_no_schema(T) + end; +ensure_no_schema([H|_]) -> + {error,{badarg, H}}; +ensure_no_schema([]) -> + ok. + +remote_read_schema() -> + %% Ensure that we access the intended Mnesia + %% directory. This function may not be called + %% during startup since it will cause the + %% application_controller to get into deadlock + case mnesia_lib:ensure_loaded(?APPLICATION) of + ok -> + case mnesia_monitor:get_env(schema_location) of + opt_disc -> + read_schema(false); + _ -> + read_schema(false) + end; + {error, Reason} -> + {error, Reason} + end. + +dir_exists(Dir) -> + dir_exists(Dir, mnesia_monitor:use_dir()). +dir_exists(Dir, true) -> + case file:read_file_info(Dir) of + {ok, _} -> true; + _ -> false + end; +dir_exists(_Dir, false) -> + false. + +opt_create_dir(UseDir, Dir) when UseDir == true-> + case dir_exists(Dir, UseDir) of + true -> + check_can_write(Dir); + false -> + case file:make_dir(Dir) of + ok -> + verbose("Create Directory ~p~n", [Dir]), + ok; + {error, Reason} -> + verbose("Cannot create mnesia dir ~p~n", [Reason]), + {error, {"Cannot create Mnesia dir", Dir, Reason}} + end + end; +opt_create_dir(false, _) -> + {error, {has_no_disc, node()}}. + +check_can_write(Dir) -> + case file:read_file_info(Dir) of + {ok, FI} when FI#file_info.type == directory, + FI#file_info.access == read_write -> + ok; + {ok, _} -> + {error, "Not allowed to write in Mnesia dir", Dir}; + _ -> + {error, "Non existent Mnesia dir", Dir} + end. + +lock_schema() -> + mnesia_lib:lock_table(schema). + +unlock_schema() -> + mnesia_lib:unlock_table(schema). + +read_schema(Keep) -> + read_schema(Keep, false). + +%% The schema may be read for several reasons. +%% If Mnesia is not already started the read intention +%% we normally do not want the ets table named schema +%% be left around. +%% If Keep == true, the ets table schema is kept +%% If Keep == false, the ets table schema is removed +%% +%% Returns {ok, Source, SchemaCstruct} or {error, Reason} +%% Source may be: default | ram | disc | fallback + +read_schema(Keep, IgnoreFallback) -> + lock_schema(), + Res = + case mnesia:system_info(is_running) of + yes -> + {ok, ram, get_create_list(schema)}; + _IsRunning -> + case mnesia_monitor:use_dir() of + true -> + read_disc_schema(Keep, IgnoreFallback); + false when Keep == true -> + Args = [{keypos, 2}, public, named_table, set], + mnesia_monitor:mktab(schema, Args), + CreateList = get_initial_schema(ram_copies, []), + ?ets_insert(schema,{schema, schema, CreateList}), + {ok, default, CreateList}; + false when Keep == false -> + CreateList = get_initial_schema(ram_copies, []), + {ok, default, CreateList} + end + end, + unlock_schema(), + Res. + +read_disc_schema(Keep, IgnoreFallback) -> + Running = mnesia:system_info(is_running), + case mnesia_bup:fallback_exists() of + true when IgnoreFallback == false, Running /= yes -> + mnesia_bup:fallback_to_schema(); + _ -> + %% If we're running, we read the schema file even + %% if fallback exists + Dat = mnesia_lib:tab2dat(schema), + case mnesia_lib:exists(Dat) of + true -> + do_read_disc_schema(Dat, Keep); + false -> + Dmp = mnesia_lib:tab2dmp(schema), + case mnesia_lib:exists(Dmp) of + true -> + %% May only happen when toggling of + %% schema storage type has been + %% interrupted + do_read_disc_schema(Dmp, Keep); + false -> + {error, "No schema file exists"} + end + end + end. + +do_read_disc_schema(Fname, Keep) -> + T = + case Keep of + false -> + Args = [{keypos, 2}, public, set], + ?ets_new_table(schema, Args); + true -> + Args = [{keypos, 2}, public, named_table, set], + mnesia_monitor:mktab(schema, Args) + end, + Repair = mnesia_monitor:get_env(auto_repair), + Res = % BUGBUG Fixa till dcl! + case mnesia_lib:dets_to_ets(schema, T, Fname, set, Repair, no) of + loaded -> {ok, disc, ?ets_lookup_element(T, schema, 3)}; + Other -> {error, {"Cannot read schema", Fname, Other}} + end, + case Keep of + true -> ignore; + false -> ?ets_delete_table(T) + end, + Res. + +get_initial_schema(SchemaStorage, Nodes) -> + Cs = #cstruct{name = schema, + record_name = schema, + attributes = [table, cstruct]}, + Cs2 = + case SchemaStorage of + ram_copies -> Cs#cstruct{ram_copies = Nodes}; + disc_copies -> Cs#cstruct{disc_copies = Nodes} + end, + cs2list(Cs2). + +read_cstructs_from_disc() -> + %% Assumptions: + %% - local schema lock in global + %% - use_dir is true + %% - Mnesia is not running + %% - Ignore fallback + + Fname = mnesia_lib:tab2dat(schema), + case mnesia_lib:exists(Fname) of + true -> + Args = [{file, Fname}, + {keypos, 2}, + {repair, mnesia_monitor:get_env(auto_repair)}, + {type, set}], + case dets:open_file(make_ref(), Args) of + {ok, Tab} -> + Fun = fun({_, _, List}) -> + {continue, list2cs(List)} + end, + Cstructs = dets:traverse(Tab, Fun), + dets:close(Tab), + {ok, Cstructs}; + {error, Reason} -> + {error, Reason} + end; + false -> + {error, "No schema file exists"} + end. + +%% We run a very special type of transactions when we +%% we want to manipulate the schema. + +get_tid_ts_and_lock(Tab, Intent) -> + TidTs = get(mnesia_activity_state), + case TidTs of + {_Mod, Tid, Ts} when is_record(Ts, tidstore)-> + Store = Ts#tidstore.store, + case Intent of + read -> mnesia_locker:rlock_table(Tid, Store, Tab); + write -> mnesia_locker:wlock_table(Tid, Store, Tab); + none -> ignore + end, + TidTs; + _ -> + mnesia:abort(no_transaction) + end. + +schema_transaction(Fun) -> + case get(mnesia_activity_state) of + undefined -> + Args = [self(), Fun, whereis(mnesia_controller)], + Pid = spawn_link(?MODULE, schema_coordinator, Args), + receive + {transaction_done, Res, Pid} -> Res; + {'EXIT', Pid, R} -> {aborted, {transaction_crashed, R}} + end; + _ -> + {aborted, nested_transaction} + end. + +%% This process may dump the transaction log, and should +%% therefore not be run in an application process +%% +schema_coordinator(Client, _Fun, undefined) -> + Res = {aborted, {node_not_running, node()}}, + Client ! {transaction_done, Res, self()}, + unlink(Client); + +schema_coordinator(Client, Fun, Controller) when is_pid(Controller) -> + %% Do not trap exit in order to automatically die + %% when the controller dies + + link(Controller), + unlink(Client), + + %% Fulfull the transaction even if the client dies + Res = mnesia:transaction(Fun), + Client ! {transaction_done, Res, self()}, + unlink(Controller), % Avoids spurious exit message + unlink(whereis(mnesia_tm)), % Avoids spurious exit message + exit(normal). + +%% The make* rotines return a list of ops, this function +%% inserts em all in the Store and maintains the local order +%% of ops. + +insert_schema_ops({_Mod, _Tid, Ts}, SchemaIOps) -> + do_insert_schema_ops(Ts#tidstore.store, SchemaIOps). + +do_insert_schema_ops(Store, [Head | Tail]) -> + ?ets_insert(Store, Head), + do_insert_schema_ops(Store, Tail); +do_insert_schema_ops(_Store, []) -> + ok. + +cs2list(Cs) when is_record(Cs, cstruct) -> + Tags = record_info(fields, cstruct), + rec2list(Tags, 2, Cs); +cs2list(CreateList) when is_list(CreateList) -> + CreateList. + +rec2list([Tag | Tags], Pos, Rec) -> + Val = element(Pos, Rec), + [{Tag, Val} | rec2list(Tags, Pos + 1, Rec)]; +rec2list([], _Pos, _Rec) -> + []. + +list2cs(List) when is_list(List) -> + Name = pick(unknown, name, List, must), + Type = pick(Name, type, List, set), + Rc0 = pick(Name, ram_copies, List, []), + Dc = pick(Name, disc_copies, List, []), + Doc = pick(Name, disc_only_copies, List, []), + Rc = case {Rc0, Dc, Doc} of + {[], [], []} -> [node()]; + _ -> Rc0 + end, + LC = pick(Name, local_content, List, false), + RecName = pick(Name, record_name, List, Name), + Attrs = pick(Name, attributes, List, [key, val]), + Snmp = pick(Name, snmp, List, []), + LoadOrder = pick(Name, load_order, List, 0), + AccessMode = pick(Name, access_mode, List, read_write), + UserProps = pick(Name, user_properties, List, []), + verify({alt, [nil, list]}, mnesia_lib:etype(UserProps), + {bad_type, Name, {user_properties, UserProps}}), + Cookie = pick(Name, cookie, List, ?unique_cookie), + Version = pick(Name, version, List, {{2, 0}, []}), + Ix = pick(Name, index, List, []), + verify({alt, [nil, list]}, mnesia_lib:etype(Ix), + {bad_type, Name, {index, [Ix]}}), + Ix2 = [attr_to_pos(I, Attrs) || I <- Ix], + + Frag = pick(Name, frag_properties, List, []), + verify({alt, [nil, list]}, mnesia_lib:etype(Frag), + {badarg, Name, {frag_properties, Frag}}), + + Keys = check_keys(Name, List, record_info(fields, cstruct)), + check_duplicates(Name, Keys), + #cstruct{name = Name, + ram_copies = Rc, + disc_copies = Dc, + disc_only_copies = Doc, + type = Type, + index = Ix2, + snmp = Snmp, + load_order = LoadOrder, + access_mode = AccessMode, + local_content = LC, + record_name = RecName, + attributes = Attrs, + user_properties = lists:sort(UserProps), + frag_properties = lists:sort(Frag), + cookie = Cookie, + version = Version}; +list2cs(Other) -> + mnesia:abort({badarg, Other}). + +pick(Tab, Key, List, Default) -> + case lists:keysearch(Key, 1, List) of + false when Default == must -> + mnesia:abort({badarg, Tab, "Missing key", Key, List}); + false -> + Default; + {value, {Key, Value}} -> + Value; + {value, BadArg} -> + mnesia:abort({bad_type, Tab, BadArg}) + end. + +%% Convert attribute name to integer if neccessary +attr_tab_to_pos(_Tab, Pos) when is_integer(Pos) -> + Pos; +attr_tab_to_pos(Tab, Attr) -> + attr_to_pos(Attr, val({Tab, attributes})). + +%% Convert attribute name to integer if neccessary +attr_to_pos(Pos, _Attrs) when is_integer(Pos) -> + Pos; +attr_to_pos(Attr, Attrs) when is_atom(Attr) -> + attr_to_pos(Attr, Attrs, 2); +attr_to_pos(Attr, _) -> + mnesia:abort({bad_type, Attr}). + +attr_to_pos(Attr, [Attr | _Attrs], Pos) -> + Pos; +attr_to_pos(Attr, [_ | Attrs], Pos) -> + attr_to_pos(Attr, Attrs, Pos + 1); +attr_to_pos(Attr, _, _) -> + mnesia:abort({bad_type, Attr}). + +check_keys(Tab, [{Key, _Val} | Tail], Items) -> + case lists:member(Key, Items) of + true -> [Key | check_keys(Tab, Tail, Items)]; + false -> mnesia:abort({badarg, Tab, Key}) + end; +check_keys(_, [], _) -> + []; +check_keys(Tab, Arg, _) -> + mnesia:abort({badarg, Tab, Arg}). + +check_duplicates(Tab, Keys) -> + case has_duplicates(Keys) of + false -> ok; + true -> mnesia:abort({badarg, Tab, "Duplicate keys", Keys}) + end. + +has_duplicates([H | T]) -> + case lists:member(H, T) of + true -> true; + false -> has_duplicates(T) + end; +has_duplicates([]) -> + false. + +%% This is the only place where we check the validity of data +verify_cstruct(Cs) when is_record(Cs, cstruct) -> + verify_nodes(Cs), + + Tab = Cs#cstruct.name, + verify(atom, mnesia_lib:etype(Tab), {bad_type, Tab}), + Type = Cs#cstruct.type, + verify(true, lists:member(Type, [set, bag, ordered_set]), + {bad_type, Tab, {type, Type}}), + + %% Currently ordered_set is not supported for disk_only_copies. + if + Type == ordered_set, Cs#cstruct.disc_only_copies /= [] -> + mnesia:abort({bad_type, Tab, {not_supported, Type, disc_only_copies}}); + true -> + ok + end, + + RecName = Cs#cstruct.record_name, + verify(atom, mnesia_lib:etype(RecName), + {bad_type, Tab, {record_name, RecName}}), + + Attrs = Cs#cstruct.attributes, + verify(list, mnesia_lib:etype(Attrs), + {bad_type, Tab, {attributes, Attrs}}), + + Arity = length(Attrs) + 1, + verify(true, Arity > 2, {bad_type, Tab, {attributes, Attrs}}), + + lists:foldl(fun(Attr,_Other) when Attr == snmp -> + mnesia:abort({bad_type, Tab, {attributes, [Attr]}}); + (Attr,Other) -> + verify(atom, mnesia_lib:etype(Attr), + {bad_type, Tab, {attributes, [Attr]}}), + verify(false, lists:member(Attr, Other), + {combine_error, Tab, {attributes, [Attr | Other]}}), + [Attr | Other] + end, + [], + Attrs), + + Index = Cs#cstruct.index, + verify({alt, [nil, list]}, mnesia_lib:etype(Index), + {bad_type, Tab, {index, Index}}), + + IxFun = + fun(Pos) -> + verify(true, fun() -> + if + is_integer(Pos), + Pos > 2, + Pos =< Arity -> + true; + true -> false + end + end, + {bad_type, Tab, {index, [Pos]}}) + end, + lists:foreach(IxFun, Index), + + LC = Cs#cstruct.local_content, + verify({alt, [true, false]}, LC, + {bad_type, Tab, {local_content, LC}}), + Access = Cs#cstruct.access_mode, + verify({alt, [read_write, read_only]}, Access, + {bad_type, Tab, {access_mode, Access}}), + + Snmp = Cs#cstruct.snmp, + verify(true, mnesia_snmp_hook:check_ustruct(Snmp), + {badarg, Tab, {snmp, Snmp}}), + + CheckProp = fun(Prop) when is_tuple(Prop), size(Prop) >= 1 -> ok; + (Prop) -> mnesia:abort({bad_type, Tab, {user_properties, [Prop]}}) + end, + lists:foreach(CheckProp, Cs#cstruct.user_properties), + + case Cs#cstruct.cookie of + {{MegaSecs, Secs, MicroSecs}, _Node} + when is_integer(MegaSecs), is_integer(Secs), + is_integer(MicroSecs), is_atom(node) -> + ok; + Cookie -> + mnesia:abort({bad_type, Tab, {cookie, Cookie}}) + end, + case Cs#cstruct.version of + {{Major, Minor}, _Detail} + when is_integer(Major), is_integer(Minor) -> + ok; + Version -> + mnesia:abort({bad_type, Tab, {version, Version}}) + end. + +verify_nodes(Cs) -> + Tab = Cs#cstruct.name, + Ram = Cs#cstruct.ram_copies, + Disc = Cs#cstruct.disc_copies, + DiscOnly = Cs#cstruct.disc_only_copies, + LoadOrder = Cs#cstruct.load_order, + + verify({alt, [nil, list]}, mnesia_lib:etype(Ram), + {bad_type, Tab, {ram_copies, Ram}}), + verify({alt, [nil, list]}, mnesia_lib:etype(Disc), + {bad_type, Tab, {disc_copies, Disc}}), + case Tab of + schema -> + verify([], DiscOnly, {bad_type, Tab, {disc_only_copies, DiscOnly}}); + _ -> + verify({alt, [nil, list]}, + mnesia_lib:etype(DiscOnly), + {bad_type, Tab, {disc_only_copies, DiscOnly}}) + end, + verify(integer, mnesia_lib:etype(LoadOrder), + {bad_type, Tab, {load_order, LoadOrder}}), + + Nodes = Ram ++ Disc ++ DiscOnly, + verify(list, mnesia_lib:etype(Nodes), + {combine_error, Tab, + [{ram_copies, []}, {disc_copies, []}, {disc_only_copies, []}]}), + verify(false, has_duplicates(Nodes), {combine_error, Tab, Nodes}), + AtomCheck = fun(N) -> verify(atom, mnesia_lib:etype(N), {bad_type, Tab, N}) end, + lists:foreach(AtomCheck, Nodes). + +verify(Expected, Fun, Error) when is_function(Fun) -> + do_verify(Expected, catch Fun(), Error); +verify(Expected, Actual, Error) -> + do_verify(Expected, Actual, Error). + +do_verify({alt, Values}, Value, Error) -> + case lists:member(Value, Values) of + true -> ok; + false -> mnesia:abort(Error) + end; +do_verify(Value, Value, _) -> + ok; +do_verify(_Value, _, Error) -> + mnesia:abort(Error). + +ensure_writable(Tab) -> + case val({Tab, where_to_write}) of + [] -> mnesia:abort({read_only, Tab}); + _ -> ok + end. + +%% Ensure that all replicas on disk full nodes are active +ensure_active(Cs) -> + ensure_active(Cs, active_replicas). + +ensure_active(Cs, What) -> + Tab = Cs#cstruct.name, + W = {Tab, What}, + ensure_non_empty(W), + Nodes = mnesia_lib:intersect(val({schema, disc_copies}), + mnesia_lib:cs_to_nodes(Cs)), + case Nodes -- val(W) of + [] -> + ok; + Ns -> + Expl = "All replicas on diskfull nodes are not active yet", + case val({Tab, local_content}) of + true -> + case rpc:multicall(Ns, ?MODULE, is_remote_member, [W]) of + {Replies, []} -> + check_active(Replies, Expl, Tab); + {_Replies, BadNs} -> + mnesia:abort({not_active, Expl, Tab, BadNs}) + end; + false -> + mnesia:abort({not_active, Expl, Tab, Ns}) + end + end. + +ensure_non_empty({Tab, Vhat}) -> + case val({Tab, Vhat}) of + [] -> mnesia:abort({no_exists, Tab}); + _ -> ok + end. + +ensure_not_active(Tab = schema, Node) -> + Active = val({Tab, active_replicas}), + case lists:member(Node, Active) of + false when Active =/= [] -> + ok; + false -> + mnesia:abort({no_exists, Tab}); + true -> + Expl = "Mnesia is running", + mnesia:abort({active, Expl, Node}) + end. + +is_remote_member(Key) -> + IsActive = lists:member(node(), val(Key)), + {IsActive, node()}. + +check_active([{true, _Node} | Replies], Expl, Tab) -> + check_active(Replies, Expl, Tab); +check_active([{false, Node} | _Replies], Expl, Tab) -> + mnesia:abort({not_active, Expl, Tab, [Node]}); +check_active([{badrpc, Reason} | _Replies], Expl, Tab) -> + mnesia:abort({not_active, Expl, Tab, Reason}); +check_active([], _Expl, _Tab) -> + ok. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Here's the real interface function to create a table + +create_table(TabDef) -> + schema_transaction(fun() -> do_multi_create_table(TabDef) end). + +%% And the corresponding do routines .... + +do_multi_create_table(TabDef) -> + get_tid_ts_and_lock(schema, write), + ensure_writable(schema), + Cs = list2cs(TabDef), + case Cs#cstruct.frag_properties of + [] -> + do_create_table(Cs); + _Props -> + CsList = mnesia_frag:expand_cstruct(Cs), + lists:foreach(fun do_create_table/1, CsList) + end, + ok. + +do_create_table(Cs) -> + {_Mod, _Tid, Ts} = get_tid_ts_and_lock(schema, none), + Store = Ts#tidstore.store, + do_insert_schema_ops(Store, make_create_table(Cs)). + +make_create_table(Cs) -> + Tab = Cs#cstruct.name, + verify(false, check_if_exists(Tab), {already_exists, Tab}), + unsafe_make_create_table(Cs). + +% unsafe_do_create_table(Cs) -> +% {_Mod, Tid, Ts} = get_tid_ts_and_lock(schema, none), +% Store = Ts#tidstore.store, +% do_insert_schema_ops(Store, unsafe_make_create_table(Cs)). + +unsafe_make_create_table(Cs) -> + {_Mod, Tid, Ts} = get_tid_ts_and_lock(schema, none), + verify_cstruct(Cs), + Tab = Cs#cstruct.name, + + %% Check that we have all disc replica nodes running + DiscNodes = Cs#cstruct.disc_copies ++ Cs#cstruct.disc_only_copies, + RunningNodes = val({current, db_nodes}), + CheckDisc = fun(N) -> + verify(true, lists:member(N, RunningNodes), + {not_active, Tab, N}) + end, + lists:foreach(CheckDisc, DiscNodes), + + Nodes = mnesia_lib:intersect(mnesia_lib:cs_to_nodes(Cs), RunningNodes), + Store = Ts#tidstore.store, + mnesia_locker:wlock_no_exist(Tid, Store, Tab, Nodes), + [{op, create_table, cs2list(Cs)}]. + +check_if_exists(Tab) -> + TidTs = get_tid_ts_and_lock(schema, write), + {_, _, Ts} = TidTs, + Store = Ts#tidstore.store, + ets:foldl( + fun({op, create_table, [{name, T}|_]}, _Acc) when T==Tab -> + true; + ({op, delete_table, [{name,T}|_]}, _Acc) when T==Tab -> + false; + (_Other, Acc) -> + Acc + end, existed_before(Tab), Store). + +existed_before(Tab) -> + ('EXIT' =/= element(1, ?catch_val({Tab,cstruct}))). + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Delete a table entirely on all nodes. + +delete_table(Tab) -> + schema_transaction(fun() -> do_delete_table(Tab) end). + +do_delete_table(schema) -> + mnesia:abort({bad_type, schema}); +do_delete_table(Tab) -> + TidTs = get_tid_ts_and_lock(schema, write), + ensure_writable(schema), + insert_schema_ops(TidTs, make_delete_table(Tab, whole_table)). + +make_delete_table(Tab, Mode) -> + case existed_before(Tab) of + false -> + %% Deleting a table that was created in this very + %% schema transaction. Delete all ops in the Store + %% that operate on this table. We cannot run a normal + %% delete operation, since that involves checking live + %% nodes etc. + TidTs = get_tid_ts_and_lock(schema, write), + {_, _, Ts} = TidTs, + Store = Ts#tidstore.store, + Deleted = ets:select_delete( + Store, [{{op,'$1',[{name,Tab}|'_']}, + [{'or', + {'==','$1',create_table}, + {'==','$1',delete_table}}], [true]}]), + ets:select_delete( + Store, [{{op,'$1',[{name,Tab}|'_'],'_'}, + [{'or', + {'==','$1',write_table_property}, + {'==','$1',delete_table_property}}], + [true]}]), + case Deleted of + 0 -> mnesia:abort({no_exists, Tab}); + _ -> [] + end; + true -> + case Mode of + whole_table -> + case val({Tab, frag_properties}) of + [] -> + [make_delete_table2(Tab)]; + _Props -> + %% Check if it is a base table + mnesia_frag:lookup_frag_hash(Tab), + + %% Check for foreigners + F = mnesia_frag:lookup_foreigners(Tab), + verify([], F, {combine_error, + Tab, "Too many foreigners", F}), + [make_delete_table2(T) || + T <- mnesia_frag:frag_names(Tab)] + end; + single_frag -> + [make_delete_table2(Tab)] + end + end. + +make_delete_table2(Tab) -> + get_tid_ts_and_lock(Tab, write), + Cs = val({Tab, cstruct}), + ensure_active(Cs), + ensure_writable(Tab), + {op, delete_table, cs2list(Cs)}. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Change fragmentation of a table + +change_table_frag(Tab, Change) -> + schema_transaction(fun() -> do_change_table_frag(Tab, Change) end). + +do_change_table_frag(Tab, Change) when is_atom(Tab), Tab /= schema -> + TidTs = get_tid_ts_and_lock(schema, write), + Ops = mnesia_frag:change_table_frag(Tab, Change), + [insert_schema_ops(TidTs, Op) || Op <- Ops], + ok; +do_change_table_frag(Tab, _Change) -> + mnesia:abort({bad_type, Tab}). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Clear a table + +%% No need for a schema transaction +clear_table(Tab) -> + schema_transaction(fun() -> do_clear_table(Tab) end). + +do_clear_table(schema) -> + mnesia:abort({bad_type, schema}); +do_clear_table(Tab) -> + TidTs = get_tid_ts_and_lock(schema, write), + get_tid_ts_and_lock(Tab, write), + insert_schema_ops(TidTs, make_clear_table(Tab)). + +make_clear_table(Tab) -> + Cs = val({Tab, cstruct}), + ensure_writable(Tab), + [{op, clear_table, cs2list(Cs)}]. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +add_table_copy(Tab, Node, Storage) -> + schema_transaction(fun() -> do_add_table_copy(Tab, Node, Storage) end). + +do_add_table_copy(Tab, Node, Storage) when is_atom(Tab), is_atom(Node) -> + TidTs = get_tid_ts_and_lock(schema, write), + insert_schema_ops(TidTs, make_add_table_copy(Tab, Node, Storage)); +do_add_table_copy(Tab,Node,_) -> + mnesia:abort({badarg, Tab, Node}). + +make_add_table_copy(Tab, Node, Storage) -> + ensure_writable(schema), + Cs = incr_version(val({Tab, cstruct})), + Ns = mnesia_lib:cs_to_nodes(Cs), + verify(false, lists:member(Node, Ns), {already_exists, Tab, Node}), + Cs2 = new_cs(Cs, Node, Storage, add), + verify_cstruct(Cs2), + + %% Check storage and if node is running + IsRunning = lists:member(Node, val({current, db_nodes})), + if + Tab == schema -> + if + Storage /= ram_copies -> + mnesia:abort({badarg, Tab, Storage}); + IsRunning == true -> + mnesia:abort({already_exists, Tab, Node}); + true -> + ignore + end; + Storage == ram_copies -> + ignore; + IsRunning == true -> + ignore; + IsRunning == false -> + mnesia:abort({not_active, schema, Node}) + end, + [{op, add_table_copy, Storage, Node, cs2list(Cs2)}]. + +del_table_copy(Tab, Node) -> + schema_transaction(fun() -> do_del_table_copy(Tab, Node) end). + +do_del_table_copy(Tab, Node) when is_atom(Node) -> + TidTs = get_tid_ts_and_lock(schema, write), +%% get_tid_ts_and_lock(Tab, write), + insert_schema_ops(TidTs, make_del_table_copy(Tab, Node)); +do_del_table_copy(Tab, Node) -> + mnesia:abort({badarg, Tab, Node}). + +make_del_table_copy(Tab, Node) -> + ensure_writable(schema), + Cs = incr_version(val({Tab, cstruct})), + Storage = mnesia_lib:schema_cs_to_storage_type(Node, Cs), + Cs2 = new_cs(Cs, Node, Storage, del), + case mnesia_lib:cs_to_nodes(Cs2) of + [] when Tab == schema -> + mnesia:abort({combine_error, Tab, "Last replica"}); + [] -> + ensure_active(Cs), + dbg_out("Last replica deleted in table ~p~n", [Tab]), + make_delete_table(Tab, whole_table); + _ when Tab == schema -> + %% ensure_active(Cs2), + ensure_not_active(Tab, Node), + verify_cstruct(Cs2), + Ops = remove_node_from_tabs(val({schema, tables}), Node), + [{op, del_table_copy, ram_copies, Node, cs2list(Cs2)} | Ops]; + _ -> + ensure_active(Cs), + verify_cstruct(Cs2), + [{op, del_table_copy, Storage, Node, cs2list(Cs2)}] + end. + +remove_node_from_tabs([], _Node) -> + []; +remove_node_from_tabs([schema|Rest], Node) -> + remove_node_from_tabs(Rest, Node); +remove_node_from_tabs([Tab|Rest], Node) -> + {Cs, IsFragModified} = + mnesia_frag:remove_node(Node, incr_version(val({Tab, cstruct}))), + case mnesia_lib:schema_cs_to_storage_type(Node, Cs) of + unknown -> + case IsFragModified of + true -> + [{op, change_table_frag, {del_node, Node}, cs2list(Cs)} | + remove_node_from_tabs(Rest, Node)]; + false -> + remove_node_from_tabs(Rest, Node) + end; + Storage -> + Cs2 = new_cs(Cs, Node, Storage, del), + case mnesia_lib:cs_to_nodes(Cs2) of + [] -> + [{op, delete_table, cs2list(Cs)} | + remove_node_from_tabs(Rest, Node)]; + _Ns -> + verify_cstruct(Cs2), + [{op, del_table_copy, ram_copies, Node, cs2list(Cs2)}| + remove_node_from_tabs(Rest, Node)] + end + end. + +new_cs(Cs, Node, ram_copies, add) -> + Cs#cstruct{ram_copies = opt_add(Node, Cs#cstruct.ram_copies)}; +new_cs(Cs, Node, disc_copies, add) -> + Cs#cstruct{disc_copies = opt_add(Node, Cs#cstruct.disc_copies)}; +new_cs(Cs, Node, disc_only_copies, add) -> + Cs#cstruct{disc_only_copies = opt_add(Node, Cs#cstruct.disc_only_copies)}; +new_cs(Cs, Node, ram_copies, del) -> + Cs#cstruct{ram_copies = lists:delete(Node , Cs#cstruct.ram_copies)}; +new_cs(Cs, Node, disc_copies, del) -> + Cs#cstruct{disc_copies = lists:delete(Node , Cs#cstruct.disc_copies)}; +new_cs(Cs, Node, disc_only_copies, del) -> + Cs#cstruct{disc_only_copies = + lists:delete(Node , Cs#cstruct.disc_only_copies)}; +new_cs(Cs, _Node, Storage, _Op) -> + mnesia:abort({badarg, Cs#cstruct.name, Storage}). + + +opt_add(N, L) -> [N | lists:delete(N, L)]. + +move_table(Tab, FromNode, ToNode) -> + schema_transaction(fun() -> do_move_table(Tab, FromNode, ToNode) end). + +do_move_table(schema, _FromNode, _ToNode) -> + mnesia:abort({bad_type, schema}); +do_move_table(Tab, FromNode, ToNode) when is_atom(FromNode), is_atom(ToNode) -> + TidTs = get_tid_ts_and_lock(schema, write), + insert_schema_ops(TidTs, make_move_table(Tab, FromNode, ToNode)); +do_move_table(Tab, FromNode, ToNode) -> + mnesia:abort({badarg, Tab, FromNode, ToNode}). + +make_move_table(Tab, FromNode, ToNode) -> + ensure_writable(schema), + Cs = incr_version(val({Tab, cstruct})), + Ns = mnesia_lib:cs_to_nodes(Cs), + verify(false, lists:member(ToNode, Ns), {already_exists, Tab, ToNode}), + verify(true, lists:member(FromNode, val({Tab, where_to_write})), + {not_active, Tab, FromNode}), + verify(false, val({Tab,local_content}), + {"Cannot move table with local content", Tab}), + ensure_active(Cs), + Running = val({current, db_nodes}), + Storage = mnesia_lib:schema_cs_to_storage_type(FromNode, Cs), + verify(true, lists:member(ToNode, Running), {not_active, schema, ToNode}), + + Cs2 = new_cs(Cs, ToNode, Storage, add), + Cs3 = new_cs(Cs2, FromNode, Storage, del), + verify_cstruct(Cs3), + [{op, add_table_copy, Storage, ToNode, cs2list(Cs2)}, + {op, sync_trans}, + {op, del_table_copy, Storage, FromNode, cs2list(Cs3)}]. + +%% end of functions to add and delete nodes to tables +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% + +change_table_copy_type(Tab, Node, ToS) -> + schema_transaction(fun() -> do_change_table_copy_type(Tab, Node, ToS) end). + +do_change_table_copy_type(Tab, Node, ToS) when is_atom(Node) -> + TidTs = get_tid_ts_and_lock(schema, write), + get_tid_ts_and_lock(Tab, write), % ensure global sync + %% get_tid_ts_and_lock(Tab, read), + insert_schema_ops(TidTs, make_change_table_copy_type(Tab, Node, ToS)); +do_change_table_copy_type(Tab, Node, _ToS) -> + mnesia:abort({badarg, Tab, Node}). + +make_change_table_copy_type(Tab, Node, unknown) -> + make_del_table_copy(Tab, Node); +make_change_table_copy_type(Tab, Node, ToS) -> + ensure_writable(schema), + Cs = incr_version(val({Tab, cstruct})), + FromS = mnesia_lib:storage_type_at_node(Node, Tab), + + case compare_storage_type(false, FromS, ToS) of + {same, _} -> + mnesia:abort({already_exists, Tab, Node, ToS}); + {diff, _} -> + ignore; + incompatible -> + ensure_active(Cs) + end, + + Cs2 = new_cs(Cs, Node, FromS, del), + Cs3 = new_cs(Cs2, Node, ToS, add), + verify_cstruct(Cs3), + + [{op, change_table_copy_type, Node, FromS, ToS, cs2list(Cs3)}]. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% change index functions .... +%% Pos is allready added by 1 in both of these functions + +add_table_index(Tab, Pos) -> + schema_transaction(fun() -> do_add_table_index(Tab, Pos) end). + +do_add_table_index(schema, _Attr) -> + mnesia:abort({bad_type, schema}); +do_add_table_index(Tab, Attr) -> + TidTs = get_tid_ts_and_lock(schema, write), + get_tid_ts_and_lock(Tab, read), + Pos = attr_tab_to_pos(Tab, Attr), + insert_schema_ops(TidTs, make_add_table_index(Tab, Pos)). + +make_add_table_index(Tab, Pos) -> + ensure_writable(schema), + Cs = incr_version(val({Tab, cstruct})), + ensure_active(Cs), + Ix = Cs#cstruct.index, + verify(false, lists:member(Pos, Ix), {already_exists, Tab, Pos}), + Ix2 = lists:sort([Pos | Ix]), + Cs2 = Cs#cstruct{index = Ix2}, + verify_cstruct(Cs2), + [{op, add_index, Pos, cs2list(Cs2)}]. + +del_table_index(Tab, Pos) -> + schema_transaction(fun() -> do_del_table_index(Tab, Pos) end). + +do_del_table_index(schema, _Attr) -> + mnesia:abort({bad_type, schema}); +do_del_table_index(Tab, Attr) -> + TidTs = get_tid_ts_and_lock(schema, write), + get_tid_ts_and_lock(Tab, read), + Pos = attr_tab_to_pos(Tab, Attr), + insert_schema_ops(TidTs, make_del_table_index(Tab, Pos)). + +make_del_table_index(Tab, Pos) -> + ensure_writable(schema), + Cs = incr_version(val({Tab, cstruct})), + ensure_active(Cs), + Ix = Cs#cstruct.index, + verify(true, lists:member(Pos, Ix), {no_exists, Tab, Pos}), + Cs2 = Cs#cstruct{index = lists:delete(Pos, Ix)}, + verify_cstruct(Cs2), + [{op, del_index, Pos, cs2list(Cs2)}]. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +add_snmp(Tab, Ustruct) -> + schema_transaction(fun() -> do_add_snmp(Tab, Ustruct) end). + +do_add_snmp(schema, _Ustruct) -> + mnesia:abort({bad_type, schema}); +do_add_snmp(Tab, Ustruct) -> + TidTs = get_tid_ts_and_lock(schema, write), + get_tid_ts_and_lock(Tab, read), + insert_schema_ops(TidTs, make_add_snmp(Tab, Ustruct)). + +make_add_snmp(Tab, Ustruct) -> + ensure_writable(schema), + Cs = incr_version(val({Tab, cstruct})), + ensure_active(Cs), + verify([], Cs#cstruct.snmp, {already_exists, Tab, snmp}), + Error = {badarg, Tab, snmp, Ustruct}, + verify(true, mnesia_snmp_hook:check_ustruct(Ustruct), Error), + Cs2 = Cs#cstruct{snmp = Ustruct}, + verify_cstruct(Cs2), + [{op, add_snmp, Ustruct, cs2list(Cs2)}]. + +del_snmp(Tab) -> + schema_transaction(fun() -> do_del_snmp(Tab) end). + +do_del_snmp(schema) -> + mnesia:abort({bad_type, schema}); +do_del_snmp(Tab) -> + TidTs = get_tid_ts_and_lock(schema, write), + get_tid_ts_and_lock(Tab, read), + insert_schema_ops(TidTs, make_del_snmp(Tab)). + +make_del_snmp(Tab) -> + ensure_writable(schema), + Cs = incr_version(val({Tab, cstruct})), + ensure_active(Cs), + Cs2 = Cs#cstruct{snmp = []}, + verify_cstruct(Cs2), + [{op, del_snmp, cs2list(Cs2)}]. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% + +transform_table(Tab, Fun, NewAttrs, NewRecName) + when is_function(Fun), is_list(NewAttrs), is_atom(NewRecName) -> + schema_transaction(fun() -> do_transform_table(Tab, Fun, NewAttrs, NewRecName) end); + +transform_table(Tab, ignore, NewAttrs, NewRecName) + when is_list(NewAttrs), is_atom(NewRecName) -> + schema_transaction(fun() -> do_transform_table(Tab, ignore, NewAttrs, NewRecName) end); + +transform_table(Tab, Fun, NewAttrs, NewRecName) -> + {aborted,{bad_type, Tab, Fun, NewAttrs, NewRecName}}. + +do_transform_table(schema, _Fun, _NewAttrs, _NewRecName) -> + mnesia:abort({bad_type, schema}); +do_transform_table(Tab, Fun, NewAttrs, NewRecName) -> + TidTs = get_tid_ts_and_lock(schema, write), + get_tid_ts_and_lock(Tab, write), + insert_schema_ops(TidTs, make_transform(Tab, Fun, NewAttrs, NewRecName)). + +make_transform(Tab, Fun, NewAttrs, NewRecName) -> + ensure_writable(schema), + Cs = incr_version(val({Tab, cstruct})), + ensure_active(Cs), + ensure_writable(Tab), + case mnesia_lib:val({Tab, index}) of + [] -> + Cs2 = Cs#cstruct{attributes = NewAttrs, record_name = NewRecName}, + verify_cstruct(Cs2), + [{op, transform, Fun, cs2list(Cs2)}]; + PosList -> + DelIdx = fun(Pos, Ncs) -> + Ix = Ncs#cstruct.index, + Ncs1 = Ncs#cstruct{index = lists:delete(Pos, Ix)}, + Op = {op, del_index, Pos, cs2list(Ncs1)}, + {Op, Ncs1} + end, + AddIdx = fun(Pos, Ncs) -> + Ix = Ncs#cstruct.index, + Ix2 = lists:sort([Pos | Ix]), + Ncs1 = Ncs#cstruct{index = Ix2}, + Op = {op, add_index, Pos, cs2list(Ncs1)}, + {Op, Ncs1} + end, + {DelOps, Cs1} = lists:mapfoldl(DelIdx, Cs, PosList), + Cs2 = Cs1#cstruct{attributes = NewAttrs, record_name = NewRecName}, + {AddOps, Cs3} = lists:mapfoldl(AddIdx, Cs2, PosList), + verify_cstruct(Cs3), + lists:flatten([DelOps, {op, transform, Fun, cs2list(Cs2)}, AddOps]) + end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% + +change_table_access_mode(Tab, Mode) -> + schema_transaction(fun() -> do_change_table_access_mode(Tab, Mode) end). + +do_change_table_access_mode(Tab, Mode) -> + {_Mod, Tid, Ts} = get_tid_ts_and_lock(schema, write), + Store = Ts#tidstore.store, + mnesia_locker:wlock_no_exist(Tid, Store, schema, val({schema, active_replicas})), + mnesia_locker:wlock_no_exist(Tid, Store, Tab, val({Tab, active_replicas})), + do_insert_schema_ops(Store, make_change_table_access_mode(Tab, Mode)). + +make_change_table_access_mode(Tab, Mode) -> + ensure_writable(schema), + Cs = incr_version(val({Tab, cstruct})), + ensure_active(Cs), + OldMode = Cs#cstruct.access_mode, + verify(false, OldMode == Mode, {already_exists, Tab, Mode}), + Cs2 = Cs#cstruct{access_mode = Mode}, + verify_cstruct(Cs2), + [{op, change_table_access_mode, cs2list(Cs2), OldMode, Mode}]. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +change_table_load_order(Tab, LoadOrder) -> + schema_transaction(fun() -> do_change_table_load_order(Tab, LoadOrder) end). + +do_change_table_load_order(schema, _LoadOrder) -> + mnesia:abort({bad_type, schema}); +do_change_table_load_order(Tab, LoadOrder) -> + TidTs = get_tid_ts_and_lock(schema, write), + get_tid_ts_and_lock(Tab, none), + insert_schema_ops(TidTs, make_change_table_load_order(Tab, LoadOrder)). + +make_change_table_load_order(Tab, LoadOrder) -> + ensure_writable(schema), + Cs = incr_version(val({Tab, cstruct})), + ensure_active(Cs), + OldLoadOrder = Cs#cstruct.load_order, + Cs2 = Cs#cstruct{load_order = LoadOrder}, + verify_cstruct(Cs2), + [{op, change_table_load_order, cs2list(Cs2), OldLoadOrder, LoadOrder}]. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +write_table_property(Tab, Prop) when is_tuple(Prop), size(Prop) >= 1 -> + schema_transaction(fun() -> do_write_table_property(Tab, Prop) end); +write_table_property(Tab, Prop) -> + {aborted, {bad_type, Tab, Prop}}. +do_write_table_property(Tab, Prop) -> + TidTs = get_tid_ts_and_lock(schema, write), + {_, _, Ts} = TidTs, + Store = Ts#tidstore.store, + case change_prop_in_existing_op(Tab, Prop, write_property, Store) of + true -> + dbg_out("change_prop_in_existing_op" + "(~p,~p,write_property,Store) -> true~n", + [Tab,Prop]), + %% we have merged the table prop into the create_table op + ok; + false -> + dbg_out("change_prop_in_existing_op" + "(~p,~p,write_property,Store) -> false~n", + [Tab,Prop]), + %% this must be an existing table + get_tid_ts_and_lock(Tab, none), + insert_schema_ops(TidTs, make_write_table_properties(Tab, [Prop])) + end. + +make_write_table_properties(Tab, Props) -> + ensure_writable(schema), + Cs = incr_version(val({Tab, cstruct})), + ensure_active(Cs), + make_write_table_properties(Tab, Props, Cs). + +make_write_table_properties(Tab, [Prop | Props], Cs) -> + OldProps = Cs#cstruct.user_properties, + PropKey = element(1, Prop), + DelProps = lists:keydelete(PropKey, 1, OldProps), + MergedProps = lists:merge(DelProps, [Prop]), + Cs2 = Cs#cstruct{user_properties = MergedProps}, + verify_cstruct(Cs2), + [{op, write_property, cs2list(Cs2), Prop} | + make_write_table_properties(Tab, Props, Cs2)]; +make_write_table_properties(_Tab, [], _Cs) -> + []. + +change_prop_in_existing_op(Tab, Prop, How, Store) -> + Ops = ets:match_object(Store, '_'), + case update_existing_op(Ops, Tab, Prop, How, []) of + {true, Ops1} -> + ets:match_delete(Store, '_'), + [ets:insert(Store, Op) || Op <- Ops1], + true; + false -> + false + end. + +update_existing_op([{op, Op, L = [{name,Tab}|_], _OldProp}|Ops], + Tab, Prop, How, Acc) when Op == write_property; + Op == delete_property -> + %% Apparently, mnesia_dumper doesn't care about OldProp here -- just L, + %% so we will throw away OldProp (not that it matters...) and insert Prop. + %% as element 3. + L1 = insert_prop(Prop, L, How), + NewOp = {op, How, L1, Prop}, + {true, lists:reverse(Acc) ++ [NewOp|Ops]}; +update_existing_op([Op = {op, create_table, L}|Ops], Tab, Prop, How, Acc) -> + case lists:keysearch(name, 1, L) of + {value, {_, Tab}} -> + %% Tab is being created here -- insert Prop into L + L1 = insert_prop(Prop, L, How), + {true, lists:reverse(Acc) ++ [{op, create_table, L1}|Ops]}; + _ -> + update_existing_op(Ops, Tab, Prop, How, [Op|Acc]) + end; +update_existing_op([Op|Ops], Tab, Prop, How, Acc) -> + update_existing_op(Ops, Tab, Prop, How, [Op|Acc]); +update_existing_op([], _, _, _, _) -> + false. + +do_read_table_property(Tab, Key) -> + TidTs = get_tid_ts_and_lock(schema, read), + {_, _, Ts} = TidTs, + Store = Ts#tidstore.store, + Props = ets:foldl( + fun({op, create_table, [{name, T}|Opts]}, _Acc) + when T==Tab -> + find_props(Opts); + ({op, Op, [{name,T}|Opts], _Prop}, _Acc) + when T==Tab, Op==write_property; Op==delete_property -> + find_props(Opts); + ({op, delete_table, [{name,T}|_]}, _Acc) + when T==Tab -> + []; + (_Other, Acc) -> + Acc + end, [], Store), + case lists:keysearch(Key, 1, Props) of + {value, Property} -> + Property; + false -> + undefined + end. + + +%% perhaps a misnomer. How could also be delete_property... never mind. +%% Returns the modified L. +insert_prop(Prop, L, How) -> + Prev = find_props(L), + MergedProps = merge_with_previous(How, Prop, Prev), + replace_props(L, MergedProps). + +find_props([{user_properties, P}|_]) -> P; +find_props([_H|T]) -> find_props(T). +%% we shouldn't reach [] + +replace_props([{user_properties, _}|T], P) -> [{user_properties, P}|T]; +replace_props([H|T], P) -> [H|replace_props(T, P)]. +%% again, we shouldn't reach [] + +merge_with_previous(write_property, Prop, Prev) -> + Key = element(1, Prop), + Prev1 = lists:keydelete(Key, 1, Prev), + lists:sort([Prop|Prev1]); +merge_with_previous(delete_property, PropKey, Prev) -> + lists:keydelete(PropKey, 1, Prev). + +delete_table_property(Tab, PropKey) -> + schema_transaction(fun() -> do_delete_table_property(Tab, PropKey) end). + +do_delete_table_property(Tab, PropKey) -> + TidTs = get_tid_ts_and_lock(schema, write), + {_, _, Ts} = TidTs, + Store = Ts#tidstore.store, + case change_prop_in_existing_op(Tab, PropKey, delete_property, Store) of + true -> + dbg_out("change_prop_in_existing_op" + "(~p,~p,delete_property,Store) -> true~n", + [Tab,PropKey]), + %% we have merged the table prop into the create_table op + ok; + false -> + dbg_out("change_prop_in_existing_op" + "(~p,~p,delete_property,Store) -> false~n", + [Tab,PropKey]), + %% this must be an existing table + get_tid_ts_and_lock(Tab, none), + insert_schema_ops(TidTs, + make_delete_table_properties(Tab, [PropKey])) + end. + +make_delete_table_properties(Tab, PropKeys) -> + ensure_writable(schema), + Cs = incr_version(val({Tab, cstruct})), + ensure_active(Cs), + make_delete_table_properties(Tab, PropKeys, Cs). + +make_delete_table_properties(Tab, [PropKey | PropKeys], Cs) -> + OldProps = Cs#cstruct.user_properties, + Props = lists:keydelete(PropKey, 1, OldProps), + Cs2 = Cs#cstruct{user_properties = Props}, + verify_cstruct(Cs2), + [{op, delete_property, cs2list(Cs2), PropKey} | + make_delete_table_properties(Tab, PropKeys, Cs2)]; +make_delete_table_properties(_Tab, [], _Cs) -> + []. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%% Ensure that the transaction can be committed even +%% if the node crashes and Mnesia is restarted +prepare_commit(Tid, Commit, WaitFor) -> + case Commit#commit.schema_ops of + [] -> + {false, Commit, optional}; + OrigOps -> + {Modified, Ops, DumperMode} = + prepare_ops(Tid, OrigOps, WaitFor, false, [], optional), + InitBy = schema_prepare, + GoodRes = {Modified, + Commit#commit{schema_ops = lists:reverse(Ops)}, + DumperMode}, + case DumperMode of + optional -> + dbg_out("Transaction log dump skipped (~p): ~w~n", + [DumperMode, InitBy]); + mandatory -> + case mnesia_controller:sync_dump_log(InitBy) of + dumped -> + GoodRes; + {error, Reason} -> + mnesia:abort(Reason) + end + end, + case Ops of + [] -> + ignore; + _ -> + %% We need to grab a dumper lock here, the log may not + %% be dumped by others, during the schema commit phase. + mnesia_controller:wait_for_schema_commit_lock() + end, + GoodRes + end. + +prepare_ops(Tid, [Op | Ops], WaitFor, Changed, Acc, DumperMode) -> + case prepare_op(Tid, Op, WaitFor) of + {true, mandatory} -> + prepare_ops(Tid, Ops, WaitFor, Changed, [Op | Acc], mandatory); + {true, optional} -> + prepare_ops(Tid, Ops, WaitFor, Changed, [Op | Acc], DumperMode); + {true, Ops2, mandatory} -> + prepare_ops(Tid, Ops, WaitFor, true, Ops2 ++ Acc, mandatory); + {true, Ops2, optional} -> + prepare_ops(Tid, Ops, WaitFor, true, Ops2 ++ Acc, DumperMode); + {false, optional} -> + prepare_ops(Tid, Ops, WaitFor, true, Acc, DumperMode) + end; +prepare_ops(_Tid, [], _WaitFor, Changed, Acc, DumperMode) -> + {Changed, Acc, DumperMode}. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Prepare for commit +%% returns true if Op should be included, i.e. unmodified +%% {true, Operation} if NewRecs should be included, i.e. modified +%% false if Op should NOT be included, i.e. modified +%% +prepare_op(_Tid, {op, rec, unknown, Rec}, _WaitFor) -> + {{Tab, Key}, Items, _Op} = Rec, + case val({Tab, storage_type}) of + unknown -> + {false, optional}; + Storage -> + mnesia_tm:prepare_snmp(Tab, Key, Items), % May exit + {true, [{op, rec, Storage, Rec}], optional} + end; + +prepare_op(_Tid, {op, announce_im_running, Node, SchemaDef, Running, RemoteRunning}, _WaitFor) -> + SchemaCs = list2cs(SchemaDef), + if + Node == node() -> %% Announce has already run on local node + ignore; %% from do_merge_schema + true -> + NewNodes = mnesia_lib:uniq(Running++RemoteRunning) -- val({current,db_nodes}), + mnesia_lib:set(prepare_op, {announce_im_running,NewNodes}), + announce_im_running(NewNodes, SchemaCs) + end, + {false, optional}; + +prepare_op(_Tid, {op, sync_trans}, {part, CoordPid}) -> + CoordPid ! {sync_trans, self()}, + receive + {sync_trans, CoordPid} -> + {false, optional}; + {mnesia_down, _Node} = Else -> + mnesia_lib:verbose("sync_op terminated due to ~p~n", [Else]), + mnesia:abort(Else); + {'EXIT', _, _} = Else -> + mnesia_lib:verbose("sync_op terminated due to ~p~n", [Else]), + mnesia:abort(Else) + end; + +prepare_op(_Tid, {op, sync_trans}, {coord, Nodes}) -> + case receive_sync(Nodes, []) of + {abort, Reason} -> + mnesia_lib:verbose("sync_op terminated due to ~p~n", [Reason]), + mnesia:abort(Reason); + Pids -> + [Pid ! {sync_trans, self()} || Pid <- Pids], + {false, optional} + end; +prepare_op(Tid, {op, create_table, TabDef}, _WaitFor) -> + Cs = list2cs(TabDef), + Storage = mnesia_lib:cs_to_storage_type(node(), Cs), + UseDir = mnesia_monitor:use_dir(), + Tab = Cs#cstruct.name, + case Storage of + disc_copies when UseDir == false -> + UseDirReason = {bad_type, Tab, Storage, node()}, + mnesia:abort(UseDirReason); + disc_only_copies when UseDir == false -> + UseDirReason = {bad_type, Tab, Storage, node()}, + mnesia:abort(UseDirReason); + ram_copies -> + mnesia_lib:set({Tab, create_table},true), + create_ram_table(Tab, Cs#cstruct.type), + insert_cstruct(Tid, Cs, false), + {true, optional}; + disc_copies -> + mnesia_lib:set({Tab, create_table},true), + create_ram_table(Tab, Cs#cstruct.type), + create_disc_table(Tab), + insert_cstruct(Tid, Cs, false), + {true, optional}; + disc_only_copies -> + mnesia_lib:set({Tab, create_table},true), + create_disc_only_table(Tab,Cs#cstruct.type), + insert_cstruct(Tid, Cs, false), + {true, optional}; + unknown -> %% No replica on this node + mnesia_lib:set({Tab, create_table},true), + insert_cstruct(Tid, Cs, false), + {true, optional} + end; + +prepare_op(Tid, {op, add_table_copy, Storage, Node, TabDef}, _WaitFor) -> + Cs = list2cs(TabDef), + Tab = Cs#cstruct.name, + + if + Tab == schema -> + {true, optional}; + + Node == node() -> + case mnesia_lib:val({schema, storage_type}) of + ram_copies when Storage /= ram_copies -> + Error = {combine_error, Tab, "has no disc", Node}, + mnesia:abort(Error); + _ -> + ok + end, + %% Tables are created by mnesia_loader get_network code + insert_cstruct(Tid, Cs, true), + case mnesia_controller:get_network_copy(Tab, Cs) of + {loaded, ok} -> + {true, optional}; + {not_loaded, ErrReason} -> + Reason = {system_limit, Tab, {Node, ErrReason}}, + mnesia:abort(Reason) + end; + Node /= node() -> + %% Verify that ram table not has been dumped to disc + if + Storage /= ram_copies -> + case mnesia_lib:schema_cs_to_storage_type(node(), Cs) of + ram_copies -> + Dat = mnesia_lib:tab2dcd(Tab), + case mnesia_lib:exists(Dat) of + true -> + mnesia:abort({combine_error, Tab, Storage, + "Table dumped to disc", node()}); + false -> + ok + end; + _ -> + ok + end; + true -> + ok + end, + insert_cstruct(Tid, Cs, true), + {true, optional} + end; + +prepare_op(Tid, {op, del_table_copy, _Storage, Node, TabDef}, _WaitFor) -> + Cs = list2cs(TabDef), + Tab = Cs#cstruct.name, + + if + %% Schema table lock is always required to run a schema op. + %% No need to look it. + node(Tid#tid.pid) == node(), Tab /= schema -> + Self = self(), + Pid = spawn_link(fun() -> lock_del_table(Tab, Node, Cs, Self) end), + put(mnesia_lock, Pid), + receive + {Pid, updated} -> + {true, optional}; + {Pid, FailReason} -> + mnesia:abort(FailReason); + {'EXIT', Pid, Reason} -> + mnesia:abort(Reason) + end; + true -> + {true, optional} + end; + +prepare_op(_Tid, {op, change_table_copy_type, N, FromS, ToS, TabDef}, _WaitFor) + when N == node() -> + Cs = list2cs(TabDef), + Tab = Cs#cstruct.name, + + NotActive = mnesia_lib:not_active_here(Tab), + + if + NotActive == true -> + mnesia:abort({not_active, Tab, node()}); + + Tab == schema -> + case {FromS, ToS} of + {ram_copies, disc_copies} -> + case mnesia:system_info(schema_location) of + opt_disc -> + ignore; + _ -> + mnesia:abort({combine_error, Tab, node(), + "schema_location must be opt_disc"}) + end, + Dir = mnesia_lib:dir(), + case opt_create_dir(true, Dir) of + ok -> + purge_dir(Dir, []), + mnesia_log:purge_all_logs(), + set(use_dir, true), + mnesia_log:init(), + Ns = val({current, db_nodes}), %mnesia_lib:running_nodes(), + F = fun(U) -> mnesia_recover:log_mnesia_up(U) end, + lists:foreach(F, Ns), + + mnesia_dumper:raw_named_dump_table(Tab, dmp), + mnesia_checkpoint:tm_change_table_copy_type(Tab, FromS, ToS); + {error, Reason} -> + mnesia:abort(Reason) + end; + {disc_copies, ram_copies} -> + Ltabs = val({schema, local_tables}) -- [schema], + Dtabs = [L || L <- Ltabs, + val({L, storage_type}) /= ram_copies], + verify([], Dtabs, {"Disc resident tables", Dtabs, N}); + _ -> + mnesia:abort({combine_error, Tab, ToS}) + end; + + FromS == ram_copies -> + case mnesia_monitor:use_dir() of + true -> + Dat = mnesia_lib:tab2dcd(Tab), + case mnesia_lib:exists(Dat) of + true -> + mnesia:abort({combine_error, Tab, node(), + "Table dump exists"}); + false -> + case ToS of + disc_copies -> + mnesia_log:ets2dcd(Tab, dmp); + disc_only_copies -> + mnesia_dumper:raw_named_dump_table(Tab, dmp) + end, + mnesia_checkpoint:tm_change_table_copy_type(Tab, FromS, ToS) + end; + false -> + mnesia:abort({has_no_disc, node()}) + end; + + FromS == disc_copies, ToS == disc_only_copies -> + mnesia_dumper:raw_named_dump_table(Tab, dmp); + FromS == disc_only_copies -> + Type = Cs#cstruct.type, + create_ram_table(Tab, Type), + Datname = mnesia_lib:tab2dat(Tab), + Repair = mnesia_monitor:get_env(auto_repair), + case mnesia_lib:dets_to_ets(Tab, Tab, Datname, Type, Repair, no) of + loaded -> ok; + Reason -> + Err = "Failed to copy disc data to ram", + mnesia:abort({system_limit, Tab, {Err,Reason}}) + end; + true -> + ignore + end, + {true, mandatory}; + +prepare_op(_Tid, {op, change_table_copy_type, N, _FromS, _ToS, _TabDef}, _WaitFor) + when N /= node() -> + {true, mandatory}; + +prepare_op(_Tid, {op, delete_table, _TabDef}, _WaitFor) -> + {true, mandatory}; + +prepare_op(_Tid, {op, dump_table, unknown, TabDef}, _WaitFor) -> + Cs = list2cs(TabDef), + Tab = Cs#cstruct.name, + case lists:member(node(), Cs#cstruct.ram_copies) of + true -> + case mnesia_monitor:use_dir() of + true -> + mnesia_log:ets2dcd(Tab, dmp), + Size = mnesia:table_info(Tab, size), + {true, [{op, dump_table, Size, TabDef}], optional}; + false -> + mnesia:abort({has_no_disc, node()}) + end; + false -> + {false, optional} + end; + +prepare_op(_Tid, {op, add_snmp, Ustruct, TabDef}, _WaitFor) -> + Cs = list2cs(TabDef), + case mnesia_lib:cs_to_storage_type(node(), Cs) of + unknown -> + {true, optional}; + Storage -> + Tab = Cs#cstruct.name, + Stab = mnesia_snmp_hook:create_table(Ustruct, Tab, Storage), + mnesia_lib:set({Tab, {index, snmp}}, Stab), + {true, optional} + end; + +prepare_op(_Tid, {op, transform, ignore, _TabDef}, _WaitFor) -> + {true, mandatory}; %% Apply schema changes only. +prepare_op(_Tid, {op, transform, Fun, TabDef}, _WaitFor) -> + Cs = list2cs(TabDef), + case mnesia_lib:cs_to_storage_type(node(), Cs) of + unknown -> + {true, mandatory}; + Storage -> + Tab = Cs#cstruct.name, + RecName = Cs#cstruct.record_name, + Type = Cs#cstruct.type, + NewArity = length(Cs#cstruct.attributes) + 1, + mnesia_lib:db_fixtable(Storage, Tab, true), + Key = mnesia_lib:db_first(Tab), + Op = {op, transform, Fun, TabDef}, + case catch transform_objs(Fun, Tab, RecName, + Key, NewArity, Storage, Type, [Op]) of + {'EXIT', Reason} -> + mnesia_lib:db_fixtable(Storage, Tab, false), + exit({"Bad transform function", Tab, Fun, node(), Reason}); + Objs -> + mnesia_lib:db_fixtable(Storage, Tab, false), + {true, Objs, mandatory} + end + end; + +prepare_op(_Tid, {op, merge_schema, TabDef}, _WaitFor) -> + Cs = list2cs(TabDef), + case verify_merge(Cs) of + ok -> + {true, optional}; + Error -> + verbose("Merge_Schema ~p failed on ~p: ~p~n", [_Tid,node(),Error]), + mnesia:abort({bad_commit, Error}) + end; +prepare_op(_Tid, _Op, _WaitFor) -> + {true, optional}. + +create_ram_table(Tab, Type) -> + Args = [{keypos, 2}, public, named_table, Type], + case mnesia_monitor:unsafe_mktab(Tab, Args) of + Tab -> + ok; + {error,Reason} -> + Err = "Failed to create ets table", + mnesia:abort({system_limit, Tab, {Err,Reason}}) + end. +create_disc_table(Tab) -> + File = mnesia_lib:tab2dcd(Tab), + file:delete(File), + FArg = [{file, File}, {name, {mnesia,create}}, + {repair, false}, {mode, read_write}], + case mnesia_monitor:open_log(FArg) of + {ok,Log} -> + mnesia_monitor:unsafe_close_log(Log), + ok; + {error,Reason} -> + Err = "Failed to create disc table", + mnesia:abort({system_limit, Tab, {Err,Reason}}) + end. +create_disc_only_table(Tab,Type) -> + File = mnesia_lib:tab2dat(Tab), + file:delete(File), + Args = [{file, mnesia_lib:tab2dat(Tab)}, + {type, mnesia_lib:disk_type(Tab, Type)}, + {keypos, 2}, + {repair, mnesia_monitor:get_env(auto_repair)}], + case mnesia_monitor:unsafe_open_dets(Tab, Args) of + {ok, _} -> + ok; + {error,Reason} -> + Err = "Failed to create disc table", + mnesia:abort({system_limit, Tab, {Err,Reason}}) + end. + + +receive_sync([], Pids) -> + Pids; +receive_sync(Nodes, Pids) -> + receive + {sync_trans, Pid} -> + Node = node(Pid), + receive_sync(lists:delete(Node, Nodes), [Pid | Pids]); + Else -> + {abort, Else} + end. + +lock_del_table(Tab, Node, Cs, Father) -> + Ns = val({schema, active_replicas}), + process_flag(trap_exit,true), + Lock = fun() -> + mnesia:write_lock_table(Tab), + {Res, []} = rpc:multicall(Ns, ?MODULE, set_where_to_read, [Tab, Node, Cs]), + Filter = fun(ok) -> + false; + ({badrpc, {'EXIT', {undef, _}}}) -> + %% This will be the case we talks with elder nodes + %% than 3.8.2, they will set where_to_read without + %% getting a lock. + false; + (_) -> + true + end, + case lists:filter(Filter, Res) of + [] -> + Father ! {self(), updated}, + %% When transaction is commited the process dies + %% and the lock is released. + receive _ -> ok end; + Err -> + Father ! {self(), {bad_commit, Err}} + end, + ok + end, + case mnesia:transaction(Lock) of + {atomic, ok} -> ok; + {aborted, R} -> Father ! {self(), R} + end, + unlink(Father), + unlink(whereis(mnesia_tm)), + exit(normal). + +set_where_to_read(Tab, Node, Cs) -> + case mnesia_lib:val({Tab, where_to_read}) of + Node -> + case Cs#cstruct.local_content of + true -> + ok; + false -> + mnesia_lib:set_remote_where_to_read(Tab, [Node]), + ok + end; + _ -> + ok + end. + +%% Build up the list in reverse order. +transform_objs(_Fun, _Tab, _RT, '$end_of_table', _NewArity, _Storage, _Type, Acc) -> + Acc; +transform_objs(Fun, Tab, RecName, Key, A, Storage, Type, Acc) -> + Objs = mnesia_lib:db_get(Tab, Key), + NextKey = mnesia_lib:db_next_key(Tab, Key), + Oid = {Tab, Key}, + NewObjs = {Ws, Ds} = transform_obj(Tab, RecName, Key, Fun, Objs, A, Type, [], []), + if + NewObjs == {[], []} -> + transform_objs(Fun, Tab, RecName, NextKey, A, Storage, Type, Acc); + Type == bag -> + transform_objs(Fun, Tab, RecName, NextKey, A, Storage, Type, + [{op, rec, Storage, {Oid, Ws, write}}, + {op, rec, Storage, {Oid, [Oid], delete}} | Acc]); + Ds == [] -> + %% Type is set or ordered_set, no need to delete the record first + transform_objs(Fun, Tab, RecName, NextKey, A, Storage, Type, + [{op, rec, Storage, {Oid, Ws, write}} | Acc]); + Ws == [] -> + transform_objs(Fun, Tab, RecName, NextKey, A, Storage, Type, + [{op, rec, Storage, {Oid, Ds, write}} | Acc]); + true -> + transform_objs(Fun, Tab, RecName, NextKey, A, Storage, Type, + [{op, rec, Storage, {Oid, Ws, write}}, + {op, rec, Storage, {Oid, Ds, delete}} | Acc]) + end. + +transform_obj(Tab, RecName, Key, Fun, [Obj|Rest], NewArity, Type, Ws, Ds) -> + NewObj = Fun(Obj), + if + size(NewObj) /= NewArity -> + exit({"Bad arity", Obj, NewObj}); + NewObj == Obj -> + transform_obj(Tab, RecName, Key, Fun, Rest, NewArity, Type, Ws, Ds); + RecName == element(1, NewObj), Key == element(2, NewObj) -> + transform_obj(Tab, RecName, Key, Fun, Rest, NewArity, + Type, [NewObj | Ws], Ds); + NewObj == delete -> + case Type of + bag -> %% Just don't write that object + transform_obj(Tab, RecName, Key, Fun, Rest, + NewArity, Type, Ws, Ds); + _ -> + transform_obj(Tab, RecName, Key, Fun, Rest, NewArity, + Type, Ws, [NewObj | Ds]) + end; + true -> + exit({"Bad key or Record Name", Obj, NewObj}) + end; +transform_obj(_Tab, _RecName, _Key, _Fun, [], _NewArity, _Type, Ws, Ds) -> + {lists:reverse(Ws), lists:reverse(Ds)}. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Undo prepare of commit +undo_prepare_commit(Tid, Commit) -> + case Commit#commit.schema_ops of + [] -> + ignore; + Ops -> + %% Catch to allow failure mnesia_controller may not be started + catch mnesia_controller:release_schema_commit_lock(), + undo_prepare_ops(Tid, Ops) + end, + Commit. + +%% Undo in reverse order +undo_prepare_ops(Tid, [Op | Ops]) -> + case element(1, Op) of + TheOp when TheOp /= op, TheOp /= restore_op -> + undo_prepare_ops(Tid, Ops); + _ -> + undo_prepare_ops(Tid, Ops), + undo_prepare_op(Tid, Op) + end; +undo_prepare_ops(_Tid, []) -> + []. + +undo_prepare_op(_Tid, {op, announce_im_running, _Node, _, _Running, _RemoteRunning}) -> + case ?catch_val(prepare_op) of + {announce_im_running, New} -> + unannounce_im_running(New); + _Else -> + ok + end; + +undo_prepare_op(_Tid, {op, sync_trans}) -> + ok; + +undo_prepare_op(Tid, {op, create_table, TabDef}) -> + Cs = list2cs(TabDef), + Tab = Cs#cstruct.name, + mnesia_lib:unset({Tab, create_table}), + delete_cstruct(Tid, Cs), + case mnesia_lib:cs_to_storage_type(node(), Cs) of + unknown -> + ok; + ram_copies -> + ram_delete_table(Tab, ram_copies); + disc_copies -> + ram_delete_table(Tab, disc_copies), + DcdFile = mnesia_lib:tab2dcd(Tab), + %% disc_delete_table(Tab, Storage), + file:delete(DcdFile); + disc_only_copies -> + mnesia_monitor:unsafe_close_dets(Tab), + Dat = mnesia_lib:tab2dat(Tab), + %% disc_delete_table(Tab, Storage), + file:delete(Dat) + end; + +undo_prepare_op(Tid, {op, add_table_copy, Storage, Node, TabDef}) -> + Cs = list2cs(TabDef), + Tab = Cs#cstruct.name, + if + Tab == schema -> + true; % Nothing to prepare + Node == node() -> + mnesia_checkpoint:tm_del_copy(Tab, Node), + mnesia_controller:unannounce_add_table_copy(Tab, Node), + if + Storage == disc_only_copies; Tab == schema -> + mnesia_monitor:close_dets(Tab), + file:delete(mnesia_lib:tab2dat(Tab)); + true -> + file:delete(mnesia_lib:tab2dcd(Tab)) + end, + ram_delete_table(Tab, Storage), + Cs2 = new_cs(Cs, Node, Storage, del), + insert_cstruct(Tid, Cs2, true); % Don't care about the version + Node /= node() -> + mnesia_controller:unannounce_add_table_copy(Tab, Node), + Cs2 = new_cs(Cs, Node, Storage, del), + insert_cstruct(Tid, Cs2, true) % Don't care about the version + end; + +undo_prepare_op(_Tid, {op, del_table_copy, _, Node, TabDef}) + when Node == node() -> + Cs = list2cs(TabDef), + Tab = Cs#cstruct.name, + mnesia_lib:set({Tab, where_to_read}, Node); + + +undo_prepare_op(_Tid, {op, change_table_copy_type, N, FromS, ToS, TabDef}) + when N == node() -> + Cs = list2cs(TabDef), + Tab = Cs#cstruct.name, + mnesia_checkpoint:tm_change_table_copy_type(Tab, ToS, FromS), + Dmp = mnesia_lib:tab2dmp(Tab), + + case {FromS, ToS} of + {ram_copies, disc_copies} when Tab == schema -> + file:delete(Dmp), + mnesia_log:purge_some_logs(), + set(use_dir, false); + {ram_copies, disc_copies} -> + file:delete(Dmp); + {ram_copies, disc_only_copies} -> + file:delete(Dmp); + {disc_only_copies, _} -> + ram_delete_table(Tab, ram_copies); + _ -> + ignore + end; + +undo_prepare_op(_Tid, {op, dump_table, _Size, TabDef}) -> + Cs = list2cs(TabDef), + case lists:member(node(), Cs#cstruct.ram_copies) of + true -> + Tab = Cs#cstruct.name, + Dmp = mnesia_lib:tab2dmp(Tab), + file:delete(Dmp); + false -> + ignore + end; + +undo_prepare_op(_Tid, {op, add_snmp, _Ustruct, TabDef}) -> + Cs = list2cs(TabDef), + case mnesia_lib:cs_to_storage_type(node(), Cs) of + unknown -> + true; + _Storage -> + Tab = Cs#cstruct.name, + case ?catch_val({Tab, {index, snmp}}) of + {'EXIT',_} -> + ignore; + Stab -> + mnesia_snmp_hook:delete_table(Tab, Stab), + mnesia_lib:unset({Tab, {index, snmp}}) + end + end; + +undo_prepare_op(_Tid, _Op) -> + ignore. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +ram_delete_table(Tab, Storage) -> + case Storage of + unknown -> + ignore; + disc_only_copies -> + ignore; + _Else -> + %% delete possible index files and data ..... + %% Got to catch this since if no info has been set in the + %% mnesia_gvar it will crash + catch mnesia_index:del_transient(Tab, Storage), + case ?catch_val({Tab, {index, snmp}}) of + {'EXIT', _} -> + ignore; + Etab -> + catch mnesia_snmp_hook:delete_table(Tab, Etab) + end, + catch ?ets_delete_table(Tab) + end. + +purge_dir(Dir, KeepFiles) -> + Suffixes = known_suffixes(), + purge_dir(Dir, KeepFiles, Suffixes). + +purge_dir(Dir, KeepFiles, Suffixes) -> + case dir_exists(Dir) of + true -> + {ok, AllFiles} = file:list_dir(Dir), + purge_known_files(AllFiles, KeepFiles, Dir, Suffixes); + false -> + ok + end. + +purge_tmp_files() -> + case mnesia_monitor:use_dir() of + true -> + Dir = mnesia_lib:dir(), + KeepFiles = [], + Exists = mnesia_lib:exists(mnesia_lib:tab2dat(schema)), + case Exists of + true -> + Suffixes = tmp_suffixes(), + purge_dir(Dir, KeepFiles, Suffixes); + false -> + %% Interrupted change of storage type + %% for schema table + Suffixes = known_suffixes(), + purge_dir(Dir, KeepFiles, Suffixes), + mnesia_lib:set(use_dir, false) + end; + + false -> + ok + end. + +purge_known_files([File | Tail], KeepFiles, Dir, Suffixes) -> + case lists:member(File, KeepFiles) of + true -> + ignore; + false -> + case has_known_suffix(File, Suffixes, false) of + false -> + ignore; + true -> + AbsFile = filename:join([Dir, File]), + file:delete(AbsFile) + end + end, + purge_known_files(Tail, KeepFiles, Dir, Suffixes); +purge_known_files([], _KeepFiles, _Dir, _Suffixes) -> + ok. + +has_known_suffix(_File, _Suffixes, true) -> + true; +has_known_suffix(File, [Suffix | Tail], false) -> + has_known_suffix(File, Tail, lists:suffix(Suffix, File)); +has_known_suffix(_File, [], Bool) -> + Bool. + +known_suffixes() -> real_suffixes() ++ tmp_suffixes(). + +real_suffixes() -> [".DAT", ".LOG", ".BUP", ".DCL", ".DCD"]. + +tmp_suffixes() -> [".TMP", ".BUPTMP", ".RET", ".DMP"]. + +info() -> + Tabs = lists:sort(val({schema, tables})), + lists:foreach(fun(T) -> info(T) end, Tabs), + ok. + +info(Tab) -> + Props = get_table_properties(Tab), + io:format("-- Properties for ~w table --- ~n",[Tab]), + info2(Tab, Props). +info2(Tab, [{cstruct, _V} | Tail]) -> % Ignore cstruct + info2(Tab, Tail); +info2(Tab, [{frag_hash, _V} | Tail]) -> % Ignore frag_hash + info2(Tab, Tail); +info2(Tab, [{P, V} | Tail]) -> + io:format("~-20w -> ~p~n",[P,V]), + info2(Tab, Tail); +info2(_, []) -> + io:format("~n", []). + +get_table_properties(Tab) -> + case catch mnesia_lib:db_match_object(ram_copies, + mnesia_gvar, {{Tab, '_'}, '_'}) of + {'EXIT', _} -> + mnesia:abort({no_exists, Tab, all}); + RawGvar -> + case [{Item, Val} || {{_Tab, Item}, Val} <- RawGvar] of + [] -> + []; + Gvar -> + Size = {size, mnesia:table_info(Tab, size)}, + Memory = {memory, mnesia:table_info(Tab, memory)}, + Master = {master_nodes, mnesia:table_info(Tab, master_nodes)}, + lists:sort([Size, Memory, Master | Gvar]) + end + end. + +%%%%%%%%%%% RESTORE %%%%%%%%%%% + +-record(r, {iter = schema, + module, + table_options = [], + default_op = clear_tables, + tables = [], + opaque, + insert_op = error_fun, + recs = error_recs + }). + +restore(Opaque) -> + restore(Opaque, [], mnesia_monitor:get_env(backup_module)). +restore(Opaque, Args) when is_list(Args) -> + restore(Opaque, Args, mnesia_monitor:get_env(backup_module)); +restore(_Opaque, BadArg) -> + {aborted, {badarg, BadArg}}. +restore(Opaque, Args, Module) when is_list(Args), is_atom(Module) -> + InitR = #r{opaque = Opaque, module = Module}, + case catch lists:foldl(fun check_restore_arg/2, InitR, Args) of + R when is_record(R, r) -> + case mnesia_bup:read_schema(R#r.module, Opaque) of + {error, Reason} -> + {aborted, Reason}; + BupSchema -> + schema_transaction(fun() -> do_restore(R, BupSchema) end) + end; + {'EXIT', Reason} -> + {aborted, Reason} + end; +restore(_Opaque, Args, Module) -> + {aborted, {badarg, Args, Module}}. + +check_restore_arg({module, Mod}, R) when is_atom(Mod) -> + R#r{module = Mod}; + +check_restore_arg({clear_tables, List}, R) when is_list(List) -> + case lists:member(schema, List) of + false -> + TableList = [{Tab, clear_tables} || Tab <- List], + R#r{table_options = R#r.table_options ++ TableList}; + true -> + exit({badarg, {clear_tables, schema}}) + end; +check_restore_arg({recreate_tables, List}, R) when is_list(List) -> + case lists:member(schema, List) of + false -> + TableList = [{Tab, recreate_tables} || Tab <- List], + R#r{table_options = R#r.table_options ++ TableList}; + true -> + exit({badarg, {recreate_tables, schema}}) + end; +check_restore_arg({keep_tables, List}, R) when is_list(List) -> + TableList = [{Tab, keep_tables} || Tab <- List], + R#r{table_options = R#r.table_options ++ TableList}; +check_restore_arg({skip_tables, List}, R) when is_list(List) -> + TableList = [{Tab, skip_tables} || Tab <- List], + R#r{table_options = R#r.table_options ++ TableList}; +check_restore_arg({default_op, Op}, R) -> + case Op of + clear_tables -> ok; + recreate_tables -> ok; + keep_tables -> ok; + skip_tables -> ok; + Else -> + exit({badarg, {bad_default_op, Else}}) + end, + R#r{default_op = Op}; + +check_restore_arg(BadArg,_) -> + exit({badarg, BadArg}). + +do_restore(R, BupSchema) -> + TidTs = get_tid_ts_and_lock(schema, write), + R2 = restore_schema(BupSchema, R), + insert_schema_ops(TidTs, [{restore_op, R2}]), + [element(1, TabStruct) || TabStruct <- R2#r.tables]. + +arrange_restore(R, Fun, Recs) -> + R2 = R#r{insert_op = Fun, recs = Recs}, + case mnesia_bup:iterate(R#r.module, fun restore_items/4, R#r.opaque, R2) of + {ok, R3} -> R3#r.recs; + {error, Reason} -> mnesia:abort(Reason) + end. + +restore_items([Rec | Recs], Header, Schema, R) -> + Tab = element(1, Rec), + case lists:keysearch(Tab, 1, R#r.tables) of + {value, {Tab, Where0, Snmp, RecName}} -> + Where = case Where0 of + undefined -> + val({Tab, where_to_commit}); + _ -> + Where0 + end, + {Rest, NRecs} = restore_tab_items([Rec | Recs], Tab, + RecName, Where, Snmp, + R#r.recs, R#r.insert_op), + restore_items(Rest, Header, Schema, R#r{recs = NRecs}); + false -> + Rest = skip_tab_items(Recs, Tab), + restore_items(Rest, Header, Schema, R) + end; + +restore_items([], _Header, _Schema, R) -> + R. + +restore_func(Tab, R) -> + case lists:keysearch(Tab, 1, R#r.table_options) of + {value, {Tab, OP}} -> + OP; + false -> + R#r.default_op + end. + +where_to_commit(Tab, CsList) -> + Ram = [{N, ram_copies} || N <- pick(Tab, ram_copies, CsList, [])], + Disc = [{N, disc_copies} || N <- pick(Tab, disc_copies, CsList, [])], + DiscO = [{N, disc_only_copies} || N <- pick(Tab, disc_only_copies, CsList, [])], + Ram ++ Disc ++ DiscO. + +%% Changes of the Meta info of schema itself is not allowed +restore_schema([{schema, schema, _List} | Schema], R) -> + restore_schema(Schema, R); +restore_schema([{schema, Tab, List} | Schema], R) -> + case restore_func(Tab, R) of + clear_tables -> + do_clear_table(Tab), + Snmp = val({Tab, snmp}), + RecName = val({Tab, record_name}), + R2 = R#r{tables = [{Tab, undefined, Snmp, RecName} | R#r.tables]}, + restore_schema(Schema, R2); + recreate_tables -> + case ?catch_val({Tab, cstruct}) of + {'EXIT', _} -> + TidTs = {_Mod, Tid, Ts} = get(mnesia_activity_state), + RunningNodes = val({current, db_nodes}), + Nodes = mnesia_lib:intersect(mnesia_lib:cs_to_nodes(list2cs(List)), + RunningNodes), + mnesia_locker:wlock_no_exist(Tid, Ts#tidstore.store, Tab, Nodes), + TidTs; + _ -> + TidTs = get_tid_ts_and_lock(Tab, write) + end, + NC = {cookie, ?unique_cookie}, + List2 = lists:keyreplace(cookie, 1, List, NC), + Where = where_to_commit(Tab, List2), + Snmp = pick(Tab, snmp, List2, []), + RecName = pick(Tab, record_name, List2, Tab), + insert_schema_ops(TidTs, [{op, restore_recreate, List2}]), + R2 = R#r{tables = [{Tab, Where, Snmp, RecName} | R#r.tables]}, + restore_schema(Schema, R2); + keep_tables -> + get_tid_ts_and_lock(Tab, write), + Snmp = val({Tab, snmp}), + RecName = val({Tab, record_name}), + R2 = R#r{tables = [{Tab, undefined, Snmp, RecName} | R#r.tables]}, + restore_schema(Schema, R2); + skip_tables -> + restore_schema(Schema, R) + end; + +restore_schema([{schema, Tab} | Schema], R) -> + do_delete_table(Tab), + Tabs = lists:delete(Tab,R#r.tables), + restore_schema(Schema, R#r{tables = Tabs}); +restore_schema([], R) -> + R. + +restore_tab_items([Rec | Rest], Tab, RecName, Where, Snmp, Recs, Op) + when element(1, Rec) == Tab -> + NewRecs = Op(Rec, Recs, RecName, Where, Snmp), + restore_tab_items(Rest, Tab, RecName, Where, Snmp, NewRecs, Op); + +restore_tab_items(Rest, _Tab, _RecName, _Where, _Snmp, Recs, _Op) -> + {Rest, Recs}. + +skip_tab_items([Rec| Rest], Tab) + when element(1, Rec) == Tab -> + skip_tab_items(Rest, Tab); +skip_tab_items(Recs, _) -> + Recs. + +%%%%%%%%% Dump tables %%%%%%%%%%%%% +dump_tables(Tabs) when is_list(Tabs) -> + schema_transaction(fun() -> do_dump_tables(Tabs) end); +dump_tables(Tabs) -> + {aborted, {bad_type, Tabs}}. + +do_dump_tables(Tabs) -> + TidTs = get_tid_ts_and_lock(schema, write), + insert_schema_ops(TidTs, make_dump_tables(Tabs)). + +make_dump_tables([schema | _Tabs]) -> + mnesia:abort({bad_type, schema}); +make_dump_tables([Tab | Tabs]) -> + get_tid_ts_and_lock(Tab, read), + TabDef = get_create_list(Tab), + DiscResident = val({Tab, disc_copies}) ++ val({Tab, disc_only_copies}), + verify([], DiscResident, + {"Only allowed on ram_copies", Tab, DiscResident}), + [{op, dump_table, unknown, TabDef} | make_dump_tables(Tabs)]; +make_dump_tables([]) -> + []. + +%% Merge the local schema with the schema on other nodes +merge_schema() -> + schema_transaction(fun() -> do_merge_schema() end). + +do_merge_schema() -> + {_Mod, Tid, Ts} = get_tid_ts_and_lock(schema, write), + Connected = val(recover_nodes), + Running = val({current, db_nodes}), + Store = Ts#tidstore.store, + %% Verify that all nodes are locked that might not be the + %% case, if this trans where queued when new nodes where added. + case Running -- ets:lookup_element(Store, nodes, 2) of + [] -> ok; %% All known nodes are locked + Miss -> %% Abort! We don't want the sideeffects below to be executed + mnesia:abort({bad_commit, {missing_lock, Miss}}) + end, + case Connected -- Running of + [Node | _] -> + %% Time for a schema merging party! + mnesia_locker:wlock_no_exist(Tid, Store, schema, [Node]), + case rpc:call(Node, mnesia_controller, get_cstructs, []) of + {cstructs, Cstructs, RemoteRunning1} -> + LockedAlready = Running ++ [Node], + {New, Old} = mnesia_recover:connect_nodes(RemoteRunning1), + RemoteRunning = mnesia_lib:intersect(New ++ Old, RemoteRunning1), + if + RemoteRunning /= RemoteRunning1 -> + mnesia_lib:error("Mnesia on ~p could not connect to node(s) ~p~n", + [node(), RemoteRunning1 -- RemoteRunning]); + true -> ok + end, + NeedsLock = RemoteRunning -- LockedAlready, + mnesia_locker:wlock_no_exist(Tid, Store, schema, NeedsLock), + {value, SchemaCs} = + lists:keysearch(schema, #cstruct.name, Cstructs), + + %% Announce that Node is running + A = [{op, announce_im_running, node(), + cs2list(SchemaCs), Running, RemoteRunning}], + do_insert_schema_ops(Store, A), + + %% Introduce remote tables to local node + do_insert_schema_ops(Store, make_merge_schema(Node, Cstructs)), + + %% Introduce local tables to remote nodes + Tabs = val({schema, tables}), + Ops = [{op, merge_schema, get_create_list(T)} + || T <- Tabs, + not lists:keymember(T, #cstruct.name, Cstructs)], + do_insert_schema_ops(Store, Ops), + + %% Ensure that the txn will be committed on all nodes + NewNodes = RemoteRunning -- Running, + mnesia_lib:set(prepare_op, {announce_im_running,NewNodes}), + announce_im_running(NewNodes, SchemaCs), + {merged, Running, RemoteRunning}; + {error, Reason} -> + {"Cannot get cstructs", Node, Reason}; + {badrpc, Reason} -> + {"Cannot get cstructs", Node, {badrpc, Reason}} + end; + [] -> + %% No more nodes to merge schema with + not_merged + end. + +make_merge_schema(Node, [Cs | Cstructs]) -> + Ops = do_make_merge_schema(Node, Cs), + Ops ++ make_merge_schema(Node, Cstructs); +make_merge_schema(_Node, []) -> + []. + +%% Merge definitions of schema table +do_make_merge_schema(Node, RemoteCs) + when RemoteCs#cstruct.name == schema -> + Cs = val({schema, cstruct}), + Masters = mnesia_recover:get_master_nodes(schema), + HasRemoteMaster = lists:member(Node, Masters), + HasLocalMaster = lists:member(node(), Masters), + Force = HasLocalMaster or HasRemoteMaster, + %% What is the storage types opinions? + StCsLocal = mnesia_lib:cs_to_storage_type(node(), Cs), + StRcsLocal = mnesia_lib:cs_to_storage_type(node(), RemoteCs), + StCsRemote = mnesia_lib:cs_to_storage_type(Node, Cs), + StRcsRemote = mnesia_lib:cs_to_storage_type(Node, RemoteCs), + + if + Cs#cstruct.cookie == RemoteCs#cstruct.cookie, + Cs#cstruct.version == RemoteCs#cstruct.version -> + %% Great, we have the same cookie and version + %% and do not need to merge cstructs + []; + + Cs#cstruct.cookie /= RemoteCs#cstruct.cookie, + Cs#cstruct.disc_copies /= [], + RemoteCs#cstruct.disc_copies /= [] -> + %% Both cstructs involves disc nodes + %% and we cannot merge them + if + HasLocalMaster == true, + HasRemoteMaster == false -> + %% Choose local cstruct, + %% since it's the master + [{op, merge_schema, cs2list(Cs)}]; + + HasRemoteMaster == true, + HasLocalMaster == false -> + %% Choose remote cstruct, + %% since it's the master + [{op, merge_schema, cs2list(RemoteCs)}]; + + true -> + Str = io_lib:format("Incompatible schema cookies. " + "Please, restart from old backup." + "~w = ~w, ~w = ~w~n", + [Node, cs2list(RemoteCs), node(), cs2list(Cs)]), + throw(Str) + end; + + StCsLocal /= StRcsLocal, StRcsLocal /= unknown, StCsLocal /= ram_copies -> + Str = io_lib:format("Incompatible schema storage types (local). " + "on ~w storage ~w, on ~w storage ~w~n", + [node(), StCsLocal, Node, StRcsLocal]), + throw(Str); + StCsRemote /= StRcsRemote, StCsRemote /= unknown, StRcsRemote /= ram_copies -> + Str = io_lib:format("Incompatible schema storage types (remote). " + "on ~w cs ~w, on ~w rcs ~w~n", + [node(), cs2list(Cs), Node, cs2list(RemoteCs)]), + throw(Str); + + Cs#cstruct.disc_copies /= [] -> + %% Choose local cstruct, + %% since it involves disc nodes + MergedCs = merge_cstructs(Cs, RemoteCs, Force), + [{op, merge_schema, cs2list(MergedCs)}]; + + RemoteCs#cstruct.disc_copies /= [] -> + %% Choose remote cstruct, + %% since it involves disc nodes + MergedCs = merge_cstructs(RemoteCs, Cs, Force), + [{op, merge_schema, cs2list(MergedCs)}]; + + Cs > RemoteCs -> + %% Choose remote cstruct + MergedCs = merge_cstructs(RemoteCs, Cs, Force), + [{op, merge_schema, cs2list(MergedCs)}]; + + true -> + %% Choose local cstruct + MergedCs = merge_cstructs(Cs, RemoteCs, Force), + [{op, merge_schema, cs2list(MergedCs)}] + end; + +%% Merge definitions of normal table +do_make_merge_schema(Node, RemoteCs) -> + Tab = RemoteCs#cstruct.name, + Masters = mnesia_recover:get_master_nodes(schema), + HasRemoteMaster = lists:member(Node, Masters), + HasLocalMaster = lists:member(node(), Masters), + Force = HasLocalMaster or HasRemoteMaster, + case ?catch_val({Tab, cstruct}) of + {'EXIT', _} -> + %% A completely new table, created while Node was down + [{op, merge_schema, cs2list(RemoteCs)}]; + Cs when Cs#cstruct.cookie == RemoteCs#cstruct.cookie -> + if + Cs#cstruct.version == RemoteCs#cstruct.version -> + %% We have exactly the same version of the + %% table def + []; + + Cs#cstruct.version > RemoteCs#cstruct.version -> + %% Oops, we have different versions + %% of the table def, lets merge them. + %% The only changes that may have occurred + %% is that new replicas may have been added. + MergedCs = merge_cstructs(Cs, RemoteCs, Force), + [{op, merge_schema, cs2list(MergedCs)}]; + + Cs#cstruct.version < RemoteCs#cstruct.version -> + %% Oops, we have different versions + %% of the table def, lets merge them + MergedCs = merge_cstructs(RemoteCs, Cs, Force), + [{op, merge_schema, cs2list(MergedCs)}] + end; + Cs -> + %% Different cookies, not possible to merge + if + HasLocalMaster == true, + HasRemoteMaster == false -> + %% Choose local cstruct, + %% since it's the master + [{op, merge_schema, cs2list(Cs)}]; + + HasRemoteMaster == true, + HasLocalMaster == false -> + %% Choose remote cstruct, + %% since it's the master + [{op, merge_schema, cs2list(RemoteCs)}]; + + true -> + Str = io_lib:format("Bad cookie in table definition" + " ~w: ~w = ~w, ~w = ~w~n", + [Tab, node(), Cs, Node, RemoteCs]), + throw(Str) + end + end. + +%% Change of table definitions (cstructs) requires all replicas +%% of the table to be active. New replicas, db_nodes and tables +%% may however be added even if some replica is inactive. These +%% invariants must be enforced in order to allow merge of cstructs. +%% +%% Returns a new cstruct or issues a fatal error +merge_cstructs(Cs, RemoteCs, Force) -> + verify_cstruct(Cs), + case catch do_merge_cstructs(Cs, RemoteCs, Force) of + {'EXIT', {aborted, _Reason}} when Force == true -> + Cs; + {'EXIT', Reason} -> + exit(Reason); + MergedCs when is_record(MergedCs, cstruct) -> + MergedCs; + Other -> + throw(Other) + end. + +do_merge_cstructs(Cs, RemoteCs, Force) -> + verify_cstruct(RemoteCs), + Ns = mnesia_lib:uniq(mnesia_lib:cs_to_nodes(Cs) ++ + mnesia_lib:cs_to_nodes(RemoteCs)), + {AnythingNew, MergedCs} = + merge_storage_type(Ns, false, Cs, RemoteCs, Force), + MergedCs2 = merge_versions(AnythingNew, MergedCs, RemoteCs, Force), + verify_cstruct(MergedCs2), + MergedCs2. + +merge_storage_type([N | Ns], AnythingNew, Cs, RemoteCs, Force) -> + Local = mnesia_lib:cs_to_storage_type(N, Cs), + Remote = mnesia_lib:cs_to_storage_type(N, RemoteCs), + case compare_storage_type(true, Local, Remote) of + {same, _Storage} -> + merge_storage_type(Ns, AnythingNew, Cs, RemoteCs, Force); + {diff, Storage} -> + Cs2 = change_storage_type(N, Storage, Cs), + merge_storage_type(Ns, true, Cs2, RemoteCs, Force); + incompatible when Force == true -> + merge_storage_type(Ns, AnythingNew, Cs, RemoteCs, Force); + Other -> + Str = io_lib:format("Cannot merge storage type for node ~w " + "in cstruct ~w with remote cstruct ~w (~w)~n", + [N, Cs, RemoteCs, Other]), + throw(Str) + end; +merge_storage_type([], AnythingNew, MergedCs, _RemoteCs, _Force) -> + {AnythingNew, MergedCs}. + +compare_storage_type(_Retry, Any, Any) -> + {same, Any}; +compare_storage_type(_Retry, unknown, Any) -> + {diff, Any}; +compare_storage_type(_Retry, ram_copies, disc_copies) -> + {diff, disc_copies}; +compare_storage_type(_Retry, disc_copies, disc_only_copies) -> + {diff, disc_only_copies}; +compare_storage_type(true, One, Another) -> + compare_storage_type(false, Another, One); +compare_storage_type(false, _One, _Another) -> + incompatible. + +change_storage_type(N, ram_copies, Cs) -> + Nodes = [N | Cs#cstruct.ram_copies], + Cs#cstruct{ram_copies = mnesia_lib:uniq(Nodes)}; +change_storage_type(N, disc_copies, Cs) -> + Nodes = [N | Cs#cstruct.disc_copies], + Cs#cstruct{disc_copies = mnesia_lib:uniq(Nodes)}; +change_storage_type(N, disc_only_copies, Cs) -> + Nodes = [N | Cs#cstruct.disc_only_copies], + Cs#cstruct{disc_only_copies = mnesia_lib:uniq(Nodes)}. + +%% BUGBUG: Verify match of frag info; equalit demanded for all but add_node + +merge_versions(AnythingNew, Cs, RemoteCs, Force) -> + if + Cs#cstruct.name == schema -> + ok; + Cs#cstruct.name /= schema, + Cs#cstruct.cookie == RemoteCs#cstruct.cookie -> + ok; + Force == true -> + ok; + true -> + Str = io_lib:format("Bad cookies. Cannot merge definitions of " + "table ~w. Local = ~w, Remote = ~w~n", + [Cs#cstruct.name, Cs, RemoteCs]), + throw(Str) + end, + if + Cs#cstruct.name == RemoteCs#cstruct.name, + Cs#cstruct.type == RemoteCs#cstruct.type, + Cs#cstruct.local_content == RemoteCs#cstruct.local_content, + Cs#cstruct.attributes == RemoteCs#cstruct.attributes, + Cs#cstruct.index == RemoteCs#cstruct.index, + Cs#cstruct.snmp == RemoteCs#cstruct.snmp, + Cs#cstruct.access_mode == RemoteCs#cstruct.access_mode, + Cs#cstruct.load_order == RemoteCs#cstruct.load_order, + Cs#cstruct.user_properties == RemoteCs#cstruct.user_properties -> + do_merge_versions(AnythingNew, Cs, RemoteCs); + Force == true -> + do_merge_versions(AnythingNew, Cs, RemoteCs); + true -> + Str1 = io_lib:format("Cannot merge definitions of " + "table ~w. Local = ~w, Remote = ~w~n", + [Cs#cstruct.name, Cs, RemoteCs]), + throw(Str1) + end. + +do_merge_versions(AnythingNew, MergedCs, RemoteCs) -> + {{Major1, Minor1}, _Detail1} = MergedCs#cstruct.version, + {{Major2, Minor2}, _Detail2} = RemoteCs#cstruct.version, + if + AnythingNew == false -> + MergedCs; + MergedCs#cstruct.version == RemoteCs#cstruct.version -> + V = {{Major1, Minor1}, dummy}, + incr_version(MergedCs#cstruct{version = V}); + Major1 == Major2 -> + Minor = lists:max([Minor1, Minor2]), + V = {{Major1, Minor}, dummy}, + incr_version(MergedCs#cstruct{version = V}); + Major1 /= Major2 -> + Major = lists:max([Major1, Major2]), + V = {{Major, 0}, dummy}, + incr_version(MergedCs#cstruct{version = V}) + end. + +%% Verify the basics +verify_merge(RemoteCs) -> + Tab = RemoteCs#cstruct.name, + Masters = mnesia_recover:get_master_nodes(schema), + HasRemoteMaster = Masters /= [], + case ?catch_val({Tab, cstruct}) of + {'EXIT', _} -> + ok; + Cs -> + StCsLocal = mnesia_lib:cs_to_storage_type(node(), Cs), + StRcsLocal = mnesia_lib:cs_to_storage_type(node(), RemoteCs), + if + StCsLocal == StRcsLocal -> ok; + StCsLocal == unknown -> ok; + (StRcsLocal == unknown), (HasRemoteMaster == false) -> + {merge_error, Cs, RemoteCs}; + %% Trust the merger + true -> ok + end + end. + +announce_im_running([N | Ns], SchemaCs) -> + {L1, L2} = mnesia_recover:connect_nodes([N]), + case lists:member(N, L1) or lists:member(N, L2) of + true -> + mnesia_lib:add({current, db_nodes}, N), + mnesia_controller:add_active_replica(schema, N, SchemaCs); + false -> + ignore + end, + announce_im_running(Ns, SchemaCs); +announce_im_running([], _) -> + []. + +unannounce_im_running([N | Ns]) -> + mnesia_lib:del({current, db_nodes}, N), + mnesia_controller:del_active_replica(schema, N), + unannounce_im_running(Ns); +unannounce_im_running([]) -> + ok. + diff --git a/lib/mnesia/src/mnesia_snmp_hook.erl b/lib/mnesia/src/mnesia_snmp_hook.erl new file mode 100644 index 0000000000..8b4b5231e1 --- /dev/null +++ b/lib/mnesia/src/mnesia_snmp_hook.erl @@ -0,0 +1,259 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 1996-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +%% +-module(mnesia_snmp_hook). + +%% Hooks (called from mnesia) +-export([check_ustruct/1, create_table/3, delete_table/2, + key_to_oid/2, key_to_oid/3, oid_to_key/2, + update/1, + get_row/2, get_next_index/2, get_mnesia_key/2]). + +-export([key_to_oid_i/2, oid_to_key_1/2]). %% Test + +-include("mnesia.hrl"). + +val(Var) -> + case ?catch_val(Var) of + {'EXIT', _ReASoN_} -> mnesia_lib:other_val(Var, _ReASoN_); + _VaLuE_ -> _VaLuE_ + end. + +check_ustruct([]) -> + true; %% default value, not SNMP'ified +check_ustruct([{key, Types}]) -> + is_snmp_type(to_list(Types)); +check_ustruct(_) -> false. + +to_list(Tuple) when is_tuple(Tuple) -> tuple_to_list(Tuple); +to_list(X) -> [X]. + +is_snmp_type([integer | T]) -> is_snmp_type(T); +is_snmp_type([string | T]) -> is_snmp_type(T); +is_snmp_type([fix_string | T]) -> is_snmp_type(T); +is_snmp_type([]) -> true; +is_snmp_type(_) -> false. + +create_table([], MnesiaTab, _Storage) -> + mnesia:abort({badarg, MnesiaTab, {snmp, empty_snmpstruct}}); + +create_table([{key, Us}], MnesiaTab, Storage) -> + Tree = b_new(MnesiaTab, Us), + mnesia_lib:db_fixtable(Storage, MnesiaTab, true), + First = mnesia_lib:db_first(Storage, MnesiaTab), + build_table(First, MnesiaTab, Tree, Us, Storage), + mnesia_lib:db_fixtable(Storage, MnesiaTab, false), + Tree. + +build_table(MnesiaKey, MnesiaTab, Tree, Us, Storage) + when MnesiaKey /= '$end_of_table' -> + %%update(write, Tree, MnesiaKey, MnesiaKey), + SnmpKey = key_to_oid_i(MnesiaKey, Us), + b_insert(Tree, SnmpKey, MnesiaKey), + Next = mnesia_lib:db_next_key(Storage, MnesiaTab, MnesiaKey), + build_table(Next, MnesiaTab, Tree, Us, Storage); +build_table('$end_of_table', _MnesiaTab, _Tree, _Us, _Storage) -> + ok. + +delete_table(_MnesiaTab, Tree) -> + b_delete_tree(Tree), + ok. + +%%----------------------------------------------------------------- +%% update({Op, MnesiaTab, MnesiaKey, SnmpKey}) +%%----------------------------------------------------------------- + +update({clear_table, MnesiaTab}) -> + Tree = val({MnesiaTab, {index, snmp}}), + b_clear(Tree), + ok; + +update({Op, MnesiaTab, MnesiaKey, SnmpKey}) -> + Tree = val({MnesiaTab, {index, snmp}}), + update(Op, Tree, MnesiaKey, SnmpKey). + +update(Op, Tree, MnesiaKey, SnmpKey) -> + case Op of + write -> + b_insert(Tree, SnmpKey, MnesiaKey); + update_counter -> + ignore; + delete -> + b_delete(Tree, SnmpKey); + delete_object -> + b_delete(Tree, SnmpKey) + end, + ok. + +%%----------------------------------------------------------------- +%% Func: key_to_oid(Tab, Key, Ustruct) +%% Args: Key ::= key() +%% key() ::= int() | string() | {int() | string()} +%% Type ::= {fix_string | term()} +%% Make an OBJECT IDENTIFIER out of it. +%% Variable length objects are prepended by their length. +%% Ex. Key = {"pelle", 42} AND Type = {string, integer} => +%% OID [5, $p, $e, $l, $l, $e, 42] +%% Key = {"pelle", 42} AND Type = {fix_string, integer} => +%% OID [$p, $e, $l, $l, $e, 42] +%%----------------------------------------------------------------- + +key_to_oid(Tab,Key) -> + Types = val({Tab,snmp}), + key_to_oid(Tab, Key, Types). + +key_to_oid(Tab, Key, [{key, Types}]) -> + try key_to_oid_i(Key,Types) + catch _:_ -> + mnesia:abort({bad_snmp_key, {Tab,Key}, Types}) + end. + +key_to_oid_i(Key, integer) when is_integer(Key) -> [Key]; +key_to_oid_i(Key, fix_string) when is_list(Key) -> Key; +key_to_oid_i(Key, string) when is_list(Key) -> [length(Key) | Key]; +key_to_oid_i(Key, Types) -> keys_to_oid(size(Key), Key, [], Types). + +keys_to_oid(0, _Key, Oid, _Types) -> Oid; +keys_to_oid(N, Key, Oid, Types) -> + Oid2 = lists:append(key_to_oid_i(element(N, Key), element(N, Types)), Oid), + keys_to_oid(N-1, Key, Oid2, Types). + +%%-------------------------------------------------- +%% The reverse of the above, i.e. snmp oid to mnesia key. +%% This can be lookup up in tree but that might be on a remote node. +%% It's probably faster to look it up, but use when it migth be remote +oid_to_key(Oid, Tab) -> + [{key, Types}] = val({Tab,snmp}), + oid_to_key_1(Types, Oid). + +oid_to_key_1(integer, [Key]) -> Key; +oid_to_key_1(fix_string, Key) -> Key; +oid_to_key_1(string, [_|Key]) -> Key; +oid_to_key_1(Tuple, Oid) -> + try + List = oid_to_key_2(1, size(Tuple), Tuple, Oid), + list_to_tuple(List) + catch + _:_ -> unknown + end. + +oid_to_key_2(N, Sz, Tuple, Oid0) when N =< Sz -> + case element(N, Tuple) of + integer -> + [Key|Oid] = Oid0, + [Key|oid_to_key_2(N+1, Sz, Tuple, Oid)]; + fix_string when N =:= Sz -> + [Oid0]; + fix_string -> + throw(fix_string); + string -> + [Len|Oid1] = Oid0, + {Str,Oid} = lists:split(Len, Oid1), + [Str|oid_to_key_2(N+1, Sz, Tuple, Oid)] + end; +oid_to_key_2(N, Sz, _, []) when N =:= (Sz+1) -> + []. + +%%----------------------------------------------------------------- +%% Func: get_row/2 +%% Args: Name is the name of the table (atom) +%% RowIndex is an Oid +%% Returns: {ok, Row} | undefined +%% Note that the Row returned might contain columns that +%% are not visible via SNMP. e.g. the first column may be +%% ifIndex, and the last MFA ({ifIndex, col1, col2, MFA}). +%% where ifIndex is used only as index (not as a real col), +%% and MFA as extra info, used by the application. +%%----------------------------------------------------------------- +get_row(Name, RowIndex) -> + Tree = mnesia_lib:val({Name, {index, snmp}}), + case b_lookup(Tree, RowIndex) of + {ok, {_RowIndex, Key}} -> + [Row] = mnesia:dirty_read({Name, Key}), + {ok, Row}; + _ -> + undefined + end. + +%%----------------------------------------------------------------- +%% Func: get_next_index/2 +%% Args: Name is the name of the table (atom) +%% RowIndex is an Oid +%% Returns: {NextIndex,MnesiaKey} | {endOfTable, undefined} +%%----------------------------------------------------------------- +get_next_index(Name, RowIndex) -> + Tree = mnesia_lib:val({Name, {index, snmp}}), + case b_lookup_next(Tree, RowIndex) of + {ok, R} -> + R; + _ -> + {endOfTable,undefined} + end. + +%%----------------------------------------------------------------- +%% Func: get_mnesia_key/2 +%% Purpose: Get the mnesia key corresponding to the RowIndex. +%% Args: Name is the name of the table (atom) +%% RowIndex is an Oid +%% Returns: {ok, Key} | undefiend +%%----------------------------------------------------------------- +get_mnesia_key(Name, RowIndex) -> + Tree = mnesia_lib:val({Name, {index, snmp}}), + case b_lookup(Tree, RowIndex) of + {ok, {_RowIndex, Key}} -> + {ok, Key}; + _ -> + undefined + end. + + +%%----------------------------------------------------------------- +%% Internal implementation, ordered_set ets. + +b_new(_Tab, _Us) -> + mnesia_monitor:unsafe_mktab(?MODULE, [public, ordered_set]). + +b_delete_tree(Tree) -> + ets:delete(Tree). %% Close via mnesia_monitor ? + +b_clear(Tree) -> + ets:delete_all_objects(Tree). + +b_insert(Tree, SnmpKey, MnesiaKey) -> + ets:insert(Tree, {SnmpKey, MnesiaKey}). + +b_delete(Tree, SnmpKey) -> + ets:delete(Tree, SnmpKey). + +b_lookup(Tree, RowIndex) -> + case ets:lookup(Tree, RowIndex) of + [X] -> + {ok, X}; + _ -> + undefined + end. + +b_lookup_next(Tree,RowIndex) -> + case ets:next(Tree, RowIndex) of + '$end_of_table' -> + undefined; + Key -> + b_lookup(Tree, Key) + end. diff --git a/lib/mnesia/src/mnesia_snmp_sup.erl b/lib/mnesia/src/mnesia_snmp_sup.erl new file mode 100644 index 0000000000..7e86281428 --- /dev/null +++ b/lib/mnesia/src/mnesia_snmp_sup.erl @@ -0,0 +1,42 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 1997-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +%% +-module(mnesia_snmp_sup). + +-behaviour(supervisor). + +-export([start/0, init/1]). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% top supervisor callback functions + +start() -> + supervisor:start_link({local, ?MODULE}, ?MODULE, []). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% sub supervisor callback functions + +init([]) -> + Flags = {simple_one_for_one, 0, timer:hours(24)}, % Trust the top supervisor + MFA = {mnesia_snmp_hook, start, []}, + Modules = [?MODULE, mnesia_snmp_hook, supervisor], + KillAfter = mnesia_kernel_sup:supervisor_timeout(timer:seconds(3)), + Workers = [{?MODULE, MFA, transient, KillAfter, worker, Modules}], + {ok, {Flags, Workers}}. diff --git a/lib/mnesia/src/mnesia_sp.erl b/lib/mnesia/src/mnesia_sp.erl new file mode 100644 index 0000000000..58a177513f --- /dev/null +++ b/lib/mnesia/src/mnesia_sp.erl @@ -0,0 +1,42 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 1999-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +%% + +%% To able to generate nice crash reports we need a catch on the highest level. +%% This code can't be purged so a code change is not possible. +%% And hence this a simple module. + +-module(mnesia_sp). + +-export([init_proc/4]). + +init_proc(Who, Mod, Fun, Args) -> + mnesia_lib:verbose("~p starting: ~p~n", [Who, self()]), + case catch apply(Mod, Fun, Args) of + {'EXIT', Reason} -> + mnesia_monitor:terminate_proc(Who, Reason, Args), + exit(Reason); + Other -> + Other + end. + + + + diff --git a/lib/mnesia/src/mnesia_subscr.erl b/lib/mnesia/src/mnesia_subscr.erl new file mode 100644 index 0000000000..afd1704dec --- /dev/null +++ b/lib/mnesia/src/mnesia_subscr.erl @@ -0,0 +1,494 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 1997-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +%% +-module(mnesia_subscr). + +-behaviour(gen_server). + +-export([start/0, + set_debug_level/1, + subscribe/2, + unsubscribe/2, + unsubscribe_table/1, + subscribers/0, + report_table_event/4, + report_table_event/5, + report_table_event/6 + ]). + +%% gen_server callbacks +-export([init/1, + handle_call/3, + handle_cast/2, + handle_info/2, + terminate/2, + code_change/3 + ]). + +-include("mnesia.hrl"). + +-import(mnesia_lib, [error/2]). +-record(state, {supervisor, pid_tab}). + +start() -> + gen_server:start_link({local, ?MODULE}, ?MODULE, [self()], + [{timeout, infinity}]). + +set_debug_level(Level) -> + OldEnv = application:get_env(mnesia, debug), + case mnesia_monitor:patch_env(debug, Level) of + {error, Reason} -> + {error, Reason}; + NewLevel -> + set_debug_level(NewLevel, OldEnv) + end. + +set_debug_level(Level, OldEnv) -> + case mnesia:system_info(is_running) of + no when OldEnv == undefined -> + none; + no -> + {ok, E} = OldEnv, + E; + _ -> + Old = mnesia_lib:val(debug), + Local = mnesia:system_info(local_tables), + E = whereis(mnesia_event), + Sub = fun(Tab) -> subscribe(E, {table, Tab}) end, + UnSub = fun(Tab) -> unsubscribe(E, {table, Tab}) end, + + case Level of + none -> + lists:foreach(UnSub, Local); + verbose -> + lists:foreach(UnSub, Local); + debug -> + lists:foreach(UnSub, Local -- [schema]), + Sub(schema); + trace -> + lists:foreach(Sub, Local) + end, + mnesia_lib:set(debug, Level), + Old + end. + +subscribe(ClientPid, system) -> + change_subscr(activate, ClientPid, system); +subscribe(ClientPid, {table, Tab}) -> + change_subscr(activate, ClientPid, {table, Tab, simple}); +subscribe(ClientPid, {table, Tab, simple}) -> + change_subscr(activate, ClientPid, {table, Tab, simple}); +subscribe(ClientPid, {table, Tab, detailed}) -> + change_subscr(activate, ClientPid, {table, Tab, detailed}); +subscribe(_ClientPid, What) -> + {error, {badarg, What}}. + +unsubscribe(ClientPid, system) -> + change_subscr(deactivate, ClientPid, system); +unsubscribe(ClientPid, {table, Tab}) -> + change_subscr(deactivate, ClientPid, {table, Tab, simple}); +unsubscribe(ClientPid, {table, Tab, simple}) -> + change_subscr(deactivate, ClientPid, {table, Tab, simple}); +unsubscribe(ClientPid, {table, Tab, detailed}) -> + change_subscr(deactivate, ClientPid, {table, Tab, detailed}); +unsubscribe(_ClientPid, What) -> + {error, {badarg, What}}. + +unsubscribe_table(Tab) -> + call({change, {deactivate_table, Tab}}). + +change_subscr(Kind, ClientPid, What) -> + call({change, {Kind, ClientPid, What}}). + +subscribers() -> + [whereis(mnesia_event) | mnesia_lib:val(subscribers)]. + +report_table_event(Tab, Tid, Obj, Op) -> + case ?catch_val({Tab, commit_work}) of + {'EXIT', _} -> ok; + Commit -> + case lists:keysearch(subscribers, 1, Commit) of + false -> ok; + {value, Subs} -> + report_table_event(Subs, Tab, Tid, Obj, Op, undefined) + end + end. + +%% Backwards compatible for the moment when mnesia_tm get's updated! +report_table_event(Subscr, Tab, Tid, Obj, Op) -> + report_table_event(Subscr, Tab, Tid, Obj, Op, undefined). + +report_table_event({subscribers, S1, S2}, Tab, Tid, _Obj, clear_table, _Old) -> + What = {delete, {schema, Tab}, Tid}, + deliver(S1, {mnesia_table_event, What}), + TabDef = mnesia_schema:cs2list(?catch_val({Tab, cstruct})), + What2 = {write, {schema, Tab, TabDef}, Tid}, + deliver(S1, {mnesia_table_event, What2}), + What3 = {delete, schema, {schema, Tab}, [{schema, Tab, TabDef}], Tid}, + deliver(S2, {mnesia_table_event, What3}), + What4 = {write, schema, {schema, Tab, TabDef}, [], Tid}, + deliver(S2, {mnesia_table_event, What4}); + +report_table_event({subscribers, Subscr, []}, Tab, Tid, Obj, Op, _Old) -> + What = {Op, patch_record(Tab, Obj), Tid}, + deliver(Subscr, {mnesia_table_event, What}); + +report_table_event({subscribers, S1, S2}, Tab, Tid, Obj, Op, Old) -> + Standard = {Op, patch_record(Tab, Obj), Tid}, + deliver(S1, {mnesia_table_event, Standard}), + Extended = what(Tab, Tid, Obj, Op, Old), + deliver(S2, Extended); + +%% Backwards compatible for the moment when mnesia_tm get's updated! +report_table_event({subscribers, Subscr}, Tab, Tid, Obj, Op, Old) -> + report_table_event({subscribers, Subscr, []}, Tab, Tid, Obj, Op, Old). + + +patch_record(Tab, Obj) -> + case Tab == element(1, Obj) of + true -> + Obj; + false -> + setelement(1, Obj, Tab) + end. + +what(Tab, Tid, {RecName, Key}, delete, undefined) -> + case catch mnesia_lib:db_get(Tab, Key) of + Old when is_list(Old) -> %% Op only allowed for set table. + {mnesia_table_event, {delete, Tab, {RecName, Key}, Old, Tid}}; + _ -> + %% Record just deleted by a dirty_op or + %% the whole table has been deleted + ignore + end; +what(Tab, Tid, Obj, delete, Old) -> + {mnesia_table_event, {delete, Tab, Obj, Old, Tid}}; +what(Tab, Tid, Obj, delete_object, _Old) -> + {mnesia_table_event, {delete, Tab, Obj, [Obj], Tid}}; +what(Tab, Tid, Obj, write, undefined) -> + case catch mnesia_lib:db_get(Tab, element(2, Obj)) of + Old when is_list(Old) -> + {mnesia_table_event, {write, Tab, Obj, Old, Tid}}; + {'EXIT', _} -> + ignore + end. + +deliver(_, ignore) -> + ok; +deliver([Pid | Pids], Msg) -> + Pid ! Msg, + deliver(Pids, Msg); +deliver([], _Msg) -> + ok. + +call(Msg) -> + Pid = whereis(?MODULE), + case Pid of + undefined -> + {error, {node_not_running, node()}}; + Pid -> + Res = gen_server:call(Pid, Msg, infinity), + %% We get an exit signal if server dies + receive + {'EXIT', _Pid, _Reason} -> + {error, {node_not_running, node()}} + after 0 -> + Res + end + end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%% Callback functions from gen_server + +%%---------------------------------------------------------------------- +%% Func: init/1 +%% Returns: {ok, State} | +%% {ok, State, Timeout} | +%% {stop, Reason} +%%---------------------------------------------------------------------- +init([Parent]) -> + process_flag(trap_exit, true), + ClientPid = whereis(mnesia_event), + link(ClientPid), + mnesia_lib:verbose("~p starting: ~p~n", [?MODULE, self()]), + Tab = ?ets_new_table(mnesia_subscr, [duplicate_bag, private]), + ?ets_insert(Tab, {ClientPid, system}), + {ok, #state{supervisor = Parent, pid_tab = Tab}}. + +%%---------------------------------------------------------------------- +%% Func: handle_call/3 +%% Returns: {reply, Reply, State} | +%% {reply, Reply, State, Timeout} | +%% {noreply, State} | +%% {noreply, State, Timeout} | +%% {stop, Reason, Reply, State} | (terminate/2 is called) +%%---------------------------------------------------------------------- +handle_call({change, How}, _From, State) -> + Reply = do_change(How, State#state.pid_tab), + {reply, Reply, State}; + +handle_call(Msg, _From, State) -> + error("~p got unexpected call: ~p~n", [?MODULE, Msg]), + {noreply, State}. + +%%---------------------------------------------------------------------- +%% Func: handle_cast/2 +%% Returns: {noreply, State} | +%% {noreply, State, Timeout} | +%% {stop, Reason, State} (terminate/2 is called) +%%---------------------------------------------------------------------- +handle_cast(Msg, State) -> + error("~p got unexpected cast: ~p~n", [?MODULE, Msg]), + {noreply, State}. + +%%---------------------------------------------------------------------- +%% Func: handle_info/2 +%% Returns: {noreply, State} | +%% {noreply, State, Timeout} | +%% {stop, Reason, State} (terminate/2 is called) +%%---------------------------------------------------------------------- + +handle_info({'EXIT', Pid, _R}, State) when Pid == State#state.supervisor -> + {stop, shutdown, State}; + +handle_info({'EXIT', Pid, _Reason}, State) -> + handle_exit(Pid, State#state.pid_tab), + {noreply, State}; + +handle_info(Msg, State) -> + error("~p got unexpected info: ~p~n", [?MODULE, Msg]), + {noreply, State}. + +%%---------------------------------------------------------------------- +%% Func: terminate/2 +%% Purpose: Shutdown the server +%% Returns: any (ignored by gen_server) +%%---------------------------------------------------------------------- +terminate(Reason, State) -> + prepare_stop(State#state.pid_tab), + mnesia_monitor:terminate_proc(?MODULE, Reason, State). + +%%---------------------------------------------------------------------- +%% Func: code_change/3 +%% Purpose: Upgrade process when its code is to be changed +%% Returns: {ok, NewState} +%%---------------------------------------------------------------------- +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + +%%%---------------------------------------------------------------------- +%%% Internal functions +%%%---------------------------------------------------------------------- + +do_change({activate, ClientPid, system}, SubscrTab) when is_pid(ClientPid) -> + Var = subscribers, + activate(ClientPid, system, Var, subscribers(), SubscrTab); +do_change({activate, ClientPid, {table, Tab, How}}, SubscrTab) when is_pid(ClientPid) -> + case ?catch_val({Tab, where_to_read}) of + Node when Node == node() -> + Var = {Tab, commit_work}, + activate(ClientPid, {table, Tab, How}, Var, mnesia_lib:val(Var), SubscrTab); + {'EXIT', _} -> + {error, {no_exists, Tab}}; + _Node -> + {error, {not_active_local, Tab}} + end; +do_change({deactivate, ClientPid, system}, SubscrTab) -> + Var = subscribers, + deactivate(ClientPid, system, Var, SubscrTab); +do_change({deactivate, ClientPid, {table, Tab, How}}, SubscrTab) -> + Var = {Tab, commit_work}, + deactivate(ClientPid, {table, Tab, How}, Var, SubscrTab); +do_change({deactivate_table, Tab}, SubscrTab) -> + Var = {Tab, commit_work}, + case ?catch_val(Var) of + {'EXIT', _} -> + {error, {no_exists, Tab}}; + CommitWork -> + case lists:keysearch(subscribers, 1, CommitWork) of + false -> + ok; + {value, Subs} -> + Simple = {table, Tab, simple}, + Detailed = {table, Tab, detailed}, + Fs = fun(C) -> deactivate(C, Simple, Var, SubscrTab) end, + Fd = fun(C) -> deactivate(C, Detailed, Var, SubscrTab) end, + case Subs of + {subscribers, L1, L2} -> + lists:foreach(Fs, L1), + lists:foreach(Fd, L2); + {subscribers, L1} -> + lists:foreach(Fs, L1) + end + end, + {ok, node()} + end; +do_change(_, _) -> + {error, badarg}. + +activate(ClientPid, What, Var, OldSubscribers, SubscrTab) -> + Old = + if Var == subscribers -> + OldSubscribers; + true -> + case lists:keysearch(subscribers, 1, OldSubscribers) of + false -> []; + {value, Subs} -> + case Subs of + {subscribers, L1, L2} -> + L1 ++ L2; + {subscribers, L1} -> + L1 + end + end + end, + case lists:member(ClientPid, Old) of + false -> + %% Don't care about checking old links + case catch link(ClientPid) of + true -> + ?ets_insert(SubscrTab, {ClientPid, What}), + add_subscr(Var, What, ClientPid), + {ok, node()}; + {'EXIT', _Reason} -> + {error, {no_exists, ClientPid}} + end; + true -> + {error, {already_exists, What}} + end. + +%%-record(subscribers, {pids = []}). Old subscriber record removed +%% To solve backward compatibility, this code is a cludge.. +add_subscr(subscribers, _What, Pid) -> + mnesia_lib:add(subscribers, Pid), + {ok, node()}; +add_subscr({Tab, commit_work}, What, Pid) -> + Commit = mnesia_lib:val({Tab, commit_work}), + case lists:keysearch(subscribers, 1, Commit) of + false -> + Subscr = + case What of + {table, _, simple} -> + {subscribers, [Pid], []}; + {table, _, detailed} -> + {subscribers, [], [Pid]} + end, + mnesia_lib:add({Tab, subscribers}, Pid), + mnesia_lib:set({Tab, commit_work}, + mnesia_lib:sort_commit([Subscr | Commit])); + {value, Old} -> + {L1, L2} = + case Old of + {subscribers, L} -> %% Old Way + {L, []}; + {subscribers, SL1, SL2} -> + {SL1, SL2} + end, + Subscr = + case What of + {table, _, simple} -> + {subscribers, [Pid | L1], L2}; + {table, _, detailed} -> + {subscribers, L1, [Pid | L2]} + end, + NewC = lists:keyreplace(subscribers, 1, Commit, Subscr), + mnesia_lib:set({Tab, commit_work}, + mnesia_lib:sort_commit(NewC)), + mnesia_lib:add({Tab, subscribers}, Pid) + end. + +deactivate(ClientPid, What, Var, SubscrTab) -> + ?ets_match_delete(SubscrTab, {ClientPid, What}), + case catch ?ets_lookup_element(SubscrTab, ClientPid, 1) of + List when is_list(List) -> + ignore; + {'EXIT', _} -> + unlink(ClientPid) + end, + del_subscr(Var, What, ClientPid), + {ok, node()}. + +del_subscr(subscribers, _What, Pid) -> + mnesia_lib:del(subscribers, Pid); +del_subscr({Tab, commit_work}, What, Pid) -> + Commit = mnesia_lib:val({Tab, commit_work}), + case lists:keysearch(subscribers, 1, Commit) of + false -> + false; + {value, Old} -> + {L1, L2} = + case Old of + {subscribers, L} -> %% Old Way + {L, []}; + {subscribers, SL1, SL2} -> + {SL1, SL2} + end, + Subscr = + case What of %% Ignore user error delete subscr from any list + {table, _, simple} -> + NewL1 = lists:delete(Pid, L1), + NewL2 = lists:delete(Pid, L2), + {subscribers, NewL1, NewL2}; + {table, _, detailed} -> + NewL1 = lists:delete(Pid, L1), + NewL2 = lists:delete(Pid, L2), + {subscribers, NewL1, NewL2} + end, + case Subscr of + {subscribers, [], []} -> + NewC = lists:keydelete(subscribers, 1, Commit), + mnesia_lib:del({Tab, subscribers}, Pid), + mnesia_lib:set({Tab, commit_work}, + mnesia_lib:sort_commit(NewC)); + _ -> + NewC = lists:keyreplace(subscribers, 1, Commit, Subscr), + mnesia_lib:del({Tab, subscribers}, Pid), + mnesia_lib:set({Tab, commit_work}, + mnesia_lib:sort_commit(NewC)) + end + end. + +handle_exit(ClientPid, SubscrTab) -> + do_handle_exit(?ets_lookup(SubscrTab, ClientPid)), + ?ets_delete(SubscrTab, ClientPid). + +do_handle_exit([{ClientPid, What} | Tail]) -> + case What of + system -> + del_subscr(subscribers, What, ClientPid); + {_, Tab, _Level} -> + del_subscr({Tab, commit_work}, What, ClientPid) + end, + do_handle_exit(Tail); +do_handle_exit([]) -> + ok. + +prepare_stop(SubscrTab) -> + mnesia_lib:report_system_event({mnesia_down, node()}), + do_prepare_stop(?ets_first(SubscrTab), SubscrTab). + +do_prepare_stop('$end_of_table', _SubscrTab) -> + ok; +do_prepare_stop(ClientPid, SubscrTab) -> + Next = ?ets_next(SubscrTab, ClientPid), + handle_exit(ClientPid, SubscrTab), + unlink(ClientPid), + do_prepare_stop(Next, SubscrTab). + diff --git a/lib/mnesia/src/mnesia_sup.erl b/lib/mnesia/src/mnesia_sup.erl new file mode 100644 index 0000000000..9ee4086f50 --- /dev/null +++ b/lib/mnesia/src/mnesia_sup.erl @@ -0,0 +1,131 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 1996-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +%% +%% Supervisor for the entire Mnesia application + +-module(mnesia_sup). + +-behaviour(application). +-behaviour(supervisor). + +-export([start/0, start/2, init/1, stop/1, start_event/0, kill/0]). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% application and suprvisor callback functions + +start(normal, Args) -> + SupName = {local,?MODULE}, + case supervisor:start_link(SupName, ?MODULE, [Args]) of + {ok, Pid} -> + {ok, Pid, {normal, Args}}; + Error -> + Error + end; +start(_, _) -> + {error, badarg}. + +start() -> + SupName = {local,?MODULE}, + supervisor:start_link(SupName, ?MODULE, []). + +stop(_StartArgs) -> + ok. + +init([]) -> % Supervisor + init(); +init([[]]) -> % Application + init(); +init(BadArg) -> + {error, {badarg, BadArg}}. + +init() -> + Flags = {one_for_all, 0, 3600}, % Should be rest_for_one policy + + Event = event_procs(), + Kernel = kernel_procs(), + Mnemosyne = mnemosyne_procs(), + + {ok, {Flags, Event ++ Kernel ++ Mnemosyne}}. + +event_procs() -> + KillAfter = timer:seconds(30), + KA = mnesia_kernel_sup:supervisor_timeout(KillAfter), + E = mnesia_event, + [{E, {?MODULE, start_event, []}, permanent, KA, worker, [E, gen_event]}]. + +kernel_procs() -> + K = mnesia_kernel_sup, + KA = infinity, + [{K, {K, start, []}, permanent, KA, supervisor, [K, supervisor]}]. + +mnemosyne_procs() -> + case mnesia_monitor:get_env(embedded_mnemosyne) of + true -> + Q = mnemosyne_sup, + KA = infinity, + [{Q, {Q, start, []}, permanent, KA, supervisor, [Q, supervisor]}]; + false -> + [] + end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% event handler + +start_event() -> + case gen_event:start_link({local, mnesia_event}) of + {ok, Pid} -> + case add_event_handler() of + ok -> + {ok, Pid}; + Error -> + Error + end; + Error -> + Error + end. + +add_event_handler() -> + Handler = mnesia_monitor:get_env(event_module), + gen_event:add_handler(mnesia_event, Handler, []). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% debug functions + +kill() -> + Mnesia = [mnesia_fallback | mnesia:ms()], + Kill = fun(Name) -> catch exit(whereis(Name), kill) end, + lists:foreach(Kill, Mnesia), + lists:foreach(fun ensure_dead/1, Mnesia), + timer:sleep(10), + case lists:keymember(mnesia, 1, application:which_applications()) of + true -> kill(); + false -> ok + end. + +ensure_dead(Name) -> + case whereis(Name) of + undefined -> + ok; + Pid when is_pid(Pid) -> + exit(Pid, kill), + timer:sleep(10), + ensure_dead(Name) + end. + diff --git a/lib/mnesia/src/mnesia_text.erl b/lib/mnesia/src/mnesia_text.erl new file mode 100644 index 0000000000..f1a28bf43d --- /dev/null +++ b/lib/mnesia/src/mnesia_text.erl @@ -0,0 +1,194 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 1996-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +%% +-module(mnesia_text). + +-export([parse/1, file/1, load_textfile/1, dump_to_textfile/1]). + +load_textfile(File) -> + ensure_started(), + case parse(File) of + {ok, {Tabs, Data}} -> + Badtabs = make_tabs(lists:map(fun validate_tab/1, Tabs)), + load_data(del_data(Badtabs, Data, [])); + Other -> + Other + end. + +dump_to_textfile(File) -> + dump_to_textfile(mnesia_lib:is_running(), file:open(File, [write])). +dump_to_textfile(yes, {ok, F}) -> + Tabs = lists:delete(schema, mnesia_lib:local_active_tables()), + Defs = lists:map(fun(T) -> {T, [{record_name, mnesia_lib:val({T, record_name})}, + {attributes, mnesia_lib:val({T, attributes})}]} + end, + Tabs), + io:format(F, "~p.~n", [{tables, Defs}]), + lists:foreach(fun(T) -> dump_tab(F, T) end, Tabs), + file:close(F); +dump_to_textfile(_,_) -> error. + + +dump_tab(F, T) -> + W = mnesia_lib:val({T, wild_pattern}), + {atomic,All} = mnesia:transaction(fun() -> mnesia:match_object(T, W, read) end), + lists:foreach(fun(Term) -> io:format(F,"~p.~n", [setelement(1, Term, T)]) end, All). + + +ensure_started() -> + case mnesia_lib:is_running() of + yes -> + yes; + no -> + case mnesia_lib:exists(mnesia_lib:dir("schema.DAT")) of + true -> + mnesia:start(); + false -> + mnesia:create_schema([node()]), + mnesia:start() + end + end. + +del_data(Bad, [H|T], Ack) -> + case lists:member(element(1, H), Bad) of + true -> del_data(Bad, T, Ack); + false -> del_data(Bad, T, [H|Ack]) + end; +del_data(_Bad, [], Ack) -> + lists:reverse(Ack). + +%% Tis the place to call the validate func in mnesia_schema +validate_tab({Tabname, List}) -> + {Tabname, List}; +validate_tab({Tabname, RecName, List}) -> + {Tabname, RecName, List}; +validate_tab(_) -> error(badtab). + +make_tabs([{Tab, Def} | Tail]) -> + case catch mnesia:table_info(Tab, where_to_read) of + {'EXIT', _} -> %% non-existing table + case mnesia:create_table(Tab, Def) of + {aborted, Reason} -> + io:format("** Failed to create table ~w ~n" + "** Reason = ~w, Args = ~p~n", + [Tab, Reason, Def]), + [Tab | make_tabs(Tail)]; + _ -> + io:format("New table ~w~n", [Tab]), + make_tabs(Tail) + end; + Node -> + io:format("** Table ~w already exists on ~p, just entering data~n", + [Tab, Node]), + make_tabs(Tail) + end; + +make_tabs([]) -> + []. + +load_data(L) -> + mnesia:transaction(fun() -> + F = fun(X) -> + Tab = element(1, X), + RN = mnesia:table_info(Tab, record_name), + Rec = setelement(1, X, RN), + mnesia:write(Tab, Rec, write) end, + lists:foreach(F, L) + end). + +parse(File) -> + case file(File) of + {ok, Terms} -> + case catch collect(Terms) of + {error, X} -> + {error, X}; + Other -> + {ok, Other} + end; + Other -> + Other + end. + +collect([{_, {tables, Tabs}}|L]) -> + {Tabs, collect_data(Tabs, L)}; + +collect(_) -> + io:format("No tables found\n", []), + error(bad_header). + +collect_data(Tabs, [{Line, Term} | Tail]) when is_tuple(Term) -> + case lists:keysearch(element(1, Term), 1, Tabs) of + {value, _} -> + [Term | collect_data(Tabs, Tail)]; + _Other -> + io:format("Object:~p at line ~w unknown\n", [Term,Line]), + error(undefined_object) + end; +collect_data(_Tabs, []) -> []; +collect_data(_Tabs, [H|_T]) -> + io:format("Object:~p unknown\n", [H]), + error(undefined_object). + +error(What) -> throw({error, What}). + +file(File) -> + case file:open(File, [read]) of + {ok, Stream} -> + Res = read_terms(Stream, File, 1, []), + file:close(Stream), + Res; + _Other -> + {error, open} + end. + +read_terms(Stream, File, Line, L) -> + case read_term_from_stream(Stream, File, Line) of + {ok, Term, NextLine} -> + read_terms(Stream, File, NextLine, [Term|L]); + error -> + {error, read}; + eof -> + {ok, lists:reverse(L)} + end. + +read_term_from_stream(Stream, File, Line) -> + R = io:request(Stream, {get_until,'',erl_scan,tokens,[Line]}), + case R of + {ok,Toks,EndLine} -> + case erl_parse:parse_term(Toks) of + {ok, Term} -> + {ok, {Line, Term}, EndLine}; + {error, {NewLine,Mod,What}} -> + Str = Mod:format_error(What), + io:format("Error in line:~p of:~p ~s\n", + [NewLine, File, Str]), + error; + T -> + io:format("Error2 **~p~n",[T]), + error + end; + {eof,_EndLine} -> + eof; + Other -> + io:format("Error1 **~p~n",[Other]), + error + end. + + diff --git a/lib/mnesia/src/mnesia_tm.erl b/lib/mnesia/src/mnesia_tm.erl new file mode 100644 index 0000000000..3f3a10a9c1 --- /dev/null +++ b/lib/mnesia/src/mnesia_tm.erl @@ -0,0 +1,2301 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 1996-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +%% +-module(mnesia_tm). + +-export([ + start/0, + init/1, + non_transaction/5, + transaction/6, + commit_participant/5, + dirty/2, + display_info/2, + do_update_op/3, + get_info/1, + get_transactions/0, + info/1, + mnesia_down/1, + prepare_checkpoint/2, + prepare_checkpoint/1, % Internal + prepare_snmp/3, + do_snmp/2, + put_activity_id/1, + put_activity_id/2, + block_tab/1, + unblock_tab/1, + fixtable/3 + ]). + +%% sys callback functions +-export([system_continue/3, + system_terminate/4, + system_code_change/4 + ]). + +-include("mnesia.hrl"). +-import(mnesia_lib, [set/2]). +-import(mnesia_lib, [fatal/2, verbose/2, dbg_out/2]). + +-record(state, {coordinators = gb_trees:empty(), participants = gb_trees:empty(), supervisor, + blocked_tabs = [], dirty_queue = [], fixed_tabs = []}). +%% Format on coordinators is [{Tid, EtsTabList} ..... + +-record(prep, {protocol = sym_trans, + %% async_dirty | sync_dirty | sym_trans | sync_sym_trans | asym_trans + records = [], + prev_tab = [], % initiate to a non valid table name + prev_types, + prev_snmp, + types + }). + +-record(participant, {tid, pid, commit, disc_nodes = [], + ram_nodes = [], protocol = sym_trans}). + +start() -> + mnesia_monitor:start_proc(?MODULE, ?MODULE, init, [self()]). + +init(Parent) -> + register(?MODULE, self()), + process_flag(trap_exit, true), + + %% Initialize the schema + IgnoreFallback = mnesia_monitor:get_env(ignore_fallback_at_startup), + mnesia_bup:tm_fallback_start(IgnoreFallback), + mnesia_schema:init(IgnoreFallback), + + %% Handshake and initialize transaction recovery + mnesia_recover:init(), + Early = mnesia_monitor:init(), + AllOthers = mnesia_lib:uniq(Early ++ mnesia_lib:all_nodes()) -- [node()], + set(original_nodes, AllOthers), + mnesia_recover:connect_nodes(AllOthers), + + %% Recover transactions, may wait for decision + case mnesia_monitor:use_dir() of + true -> + P = mnesia_dumper:opt_dump_log(startup), % previous log + L = mnesia_dumper:opt_dump_log(startup), % latest log + Msg = "Initial dump of log during startup: ~p~n", + mnesia_lib:verbose(Msg, [[P, L]]), + mnesia_log:init(); + false -> + ignore + end, + + mnesia_schema:purge_tmp_files(), + mnesia_recover:start_garb(), + + ?eval_debug_fun({?MODULE, init}, [{nodes, AllOthers}]), + + case val(debug) of + Debug when Debug /= debug, Debug /= trace -> + ignore; + _ -> + mnesia_subscr:subscribe(whereis(mnesia_event), {table, schema}) + end, + proc_lib:init_ack(Parent, {ok, self()}), + doit_loop(#state{supervisor = Parent}). + +val(Var) -> + case ?catch_val(Var) of + {'EXIT', _ReASoN_} -> mnesia_lib:other_val(Var, _ReASoN_); + _VaLuE_ -> _VaLuE_ + end. + +reply({From,Ref}, R) -> + From ! {?MODULE, Ref, R}; +reply(From, R) -> + From ! {?MODULE, node(), R}. + +reply(From, R, State) -> + reply(From, R), + doit_loop(State). + +req(R) -> + case whereis(?MODULE) of + undefined -> + {error, {node_not_running, node()}}; + Pid -> + Ref = make_ref(), + Pid ! {{self(), Ref}, R}, + rec(Pid, Ref) + end. + +rec() -> + rec(whereis(?MODULE)). + +rec(Pid) when is_pid(Pid) -> + receive + {?MODULE, _, Reply} -> + Reply; + + {'EXIT', Pid, _} -> + {error, {node_not_running, node()}} + end; +rec(undefined) -> + {error, {node_not_running, node()}}. + +rec(Pid, Ref) -> + receive + {?MODULE, Ref, Reply} -> + Reply; + {'EXIT', Pid, _} -> + {error, {node_not_running, node()}} + end. + +tmlink({From, Ref}) when is_reference(Ref) -> + link(From); +tmlink(From) -> + link(From). +tmpid({Pid, _Ref}) when is_pid(Pid) -> + Pid; +tmpid(Pid) -> + Pid. + +%% Returns a list of participant transaction Tid's +mnesia_down(Node) -> + %% Syncronously call needed in order to avoid + %% race with mnesia_tm's coordinator processes + %% that may restart and acquire new locks. + %% mnesia_monitor takes care of the sync + case whereis(?MODULE) of + undefined -> + mnesia_monitor:mnesia_down(?MODULE, {Node, []}); + Pid -> + Pid ! {mnesia_down, Node} + end. + +prepare_checkpoint(Nodes, Cp) -> + rpc:multicall(Nodes, ?MODULE, prepare_checkpoint, [Cp]). + +prepare_checkpoint(Cp) -> + req({prepare_checkpoint,Cp}). + +block_tab(Tab) -> + req({block_tab, Tab}). + +unblock_tab(Tab) -> + req({unblock_tab, Tab}). + +doit_loop(#state{coordinators=Coordinators,participants=Participants,supervisor=Sup}=State) -> + receive + {_From, {async_dirty, Tid, Commit, Tab}} -> + case lists:member(Tab, State#state.blocked_tabs) of + false -> + do_async_dirty(Tid, Commit, Tab), + doit_loop(State); + true -> + Item = {async_dirty, Tid, Commit, Tab}, + State2 = State#state{dirty_queue = [Item | State#state.dirty_queue]}, + doit_loop(State2) + end; + + {From, {sync_dirty, Tid, Commit, Tab}} -> + case lists:member(Tab, State#state.blocked_tabs) of + false -> + do_sync_dirty(From, Tid, Commit, Tab), + doit_loop(State); + true -> + Item = {sync_dirty, From, Tid, Commit, Tab}, + State2 = State#state{dirty_queue = [Item | State#state.dirty_queue]}, + doit_loop(State2) + end; + + {From, start_outer} -> %% Create and associate ets_tab with Tid + case catch ?ets_new_table(mnesia_trans_store, [bag, public]) of + {'EXIT', Reason} -> %% system limit + Msg = "Cannot create an ets table for the " + "local transaction store", + reply(From, {error, {system_limit, Msg, Reason}}, State); + Etab -> + tmlink(From), + C = mnesia_recover:incr_trans_tid_serial(), + ?ets_insert(Etab, {nodes, node()}), + Tid = #tid{pid = tmpid(From), counter = C}, + A2 = gb_trees:insert(Tid,[Etab],Coordinators), + S2 = State#state{coordinators = A2}, + reply(From, {new_tid, Tid, Etab}, S2) + end; + + {From, {ask_commit, Protocol, Tid, Commit, DiscNs, RamNs}} -> + ?eval_debug_fun({?MODULE, doit_ask_commit}, + [{tid, Tid}, {prot, Protocol}]), + mnesia_checkpoint:tm_enter_pending(Tid, DiscNs, RamNs), + Pid = + case Protocol of + asym_trans when node(Tid#tid.pid) /= node() -> + Args = [tmpid(From), Tid, Commit, DiscNs, RamNs], + spawn_link(?MODULE, commit_participant, Args); + _ when node(Tid#tid.pid) /= node() -> %% *_sym_trans + reply(From, {vote_yes, Tid}), + nopid + end, + P = #participant{tid = Tid, + pid = Pid, + commit = Commit, + disc_nodes = DiscNs, + ram_nodes = RamNs, + protocol = Protocol}, + State2 = State#state{participants = gb_trees:insert(Tid,P,Participants)}, + doit_loop(State2); + + {Tid, do_commit} -> + case gb_trees:lookup(Tid, Participants) of + none -> + verbose("Tried to commit a non participant transaction ~p~n",[Tid]), + doit_loop(State); + {value, P} -> + ?eval_debug_fun({?MODULE,do_commit,pre},[{tid,Tid},{participant,P}]), + case P#participant.pid of + nopid -> + Commit = P#participant.commit, + Member = lists:member(node(), P#participant.disc_nodes), + if Member == false -> + ignore; + P#participant.protocol == sym_trans -> + mnesia_log:log(Commit); + P#participant.protocol == sync_sym_trans -> + mnesia_log:slog(Commit) + end, + mnesia_recover:note_decision(Tid, committed), + do_commit(Tid, Commit), + if + P#participant.protocol == sync_sym_trans -> + Tid#tid.pid ! {?MODULE, node(), {committed, Tid}}; + true -> + ignore + end, + mnesia_locker:release_tid(Tid), + transaction_terminated(Tid), + ?eval_debug_fun({?MODULE,do_commit,post},[{tid,Tid},{pid,nopid}]), + doit_loop(State#state{participants= + gb_trees:delete(Tid,Participants)}); + Pid when is_pid(Pid) -> + Pid ! {Tid, committed}, + ?eval_debug_fun({?MODULE, do_commit, post}, [{tid, Tid}, {pid, Pid}]), + doit_loop(State) + end + end; + + {Tid, simple_commit} -> + mnesia_recover:note_decision(Tid, committed), + mnesia_locker:release_tid(Tid), + transaction_terminated(Tid), + doit_loop(State); + + {Tid, {do_abort, Reason}} -> + ?eval_debug_fun({?MODULE, do_abort, pre}, [{tid, Tid}]), + case gb_trees:lookup(Tid, Participants) of + none -> + verbose("Tried to abort a non participant transaction ~p: ~p~n", + [Tid, Reason]), + mnesia_locker:release_tid(Tid), + doit_loop(State); + {value, P} -> + case P#participant.pid of + nopid -> + Commit = P#participant.commit, + mnesia_recover:note_decision(Tid, aborted), + do_abort(Tid, Commit), + if + P#participant.protocol == sync_sym_trans -> + Tid#tid.pid ! {?MODULE, node(), {aborted, Tid}}; + true -> + ignore + end, + transaction_terminated(Tid), + mnesia_locker:release_tid(Tid), + ?eval_debug_fun({?MODULE, do_abort, post}, [{tid, Tid}, {pid, nopid}]), + doit_loop(State#state{participants= + gb_trees:delete(Tid,Participants)}); + Pid when is_pid(Pid) -> + Pid ! {Tid, {do_abort, Reason}}, + ?eval_debug_fun({?MODULE, do_abort, post}, + [{tid, Tid}, {pid, Pid}]), + doit_loop(State) + end + end; + + {From, {add_store, Tid}} -> %% new store for nested transaction + case catch ?ets_new_table(mnesia_trans_store, [bag, public]) of + {'EXIT', Reason} -> %% system limit + Msg = "Cannot create an ets table for a nested " + "local transaction store", + reply(From, {error, {system_limit, Msg, Reason}}, State); + Etab -> + A2 = add_coord_store(Coordinators, Tid, Etab), + reply(From, {new_store, Etab}, + State#state{coordinators = A2}) + end; + + {From, {del_store, Tid, Current, Obsolete, PropagateStore}} -> + opt_propagate_store(Current, Obsolete, PropagateStore), + A2 = del_coord_store(Coordinators, Tid, Current, Obsolete), + reply(From, store_erased, State#state{coordinators = A2}); + + {'EXIT', Pid, Reason} -> + handle_exit(Pid, Reason, State); + + {From, {restart, Tid, Store}} -> + A2 = restore_stores(Coordinators, Tid, Store), + clear_fixtable([Store]), + ?ets_match_delete(Store, '_'), + ?ets_insert(Store, {nodes, node()}), + reply(From, {restarted, Tid}, State#state{coordinators = A2}); + + {delete_transaction, Tid} -> + %% used to clear transactions which are committed + %% in coordinator or participant processes + case gb_trees:is_defined(Tid, Participants) of + false -> + case gb_trees:lookup(Tid, Coordinators) of + none -> + verbose("** ERROR ** Tried to delete a non transaction ~p~n", + [Tid]), + doit_loop(State); + {value, Etabs} -> + clear_fixtable(Etabs), + erase_ets_tabs(Etabs), + transaction_terminated(Tid), + doit_loop(State#state{coordinators = + gb_trees:delete(Tid,Coordinators)}) + end; + true -> + transaction_terminated(Tid), + State2 = State#state{participants=gb_trees:delete(Tid,Participants)}, + doit_loop(State2) + end; + + {sync_trans_serial, Tid} -> + %% Do the Lamport thing here + mnesia_recover:sync_trans_tid_serial(Tid), + doit_loop(State); + + {From, info} -> + reply(From, {info, gb_trees:values(Participants), + gb_trees:to_list(Coordinators)}, State); + + {mnesia_down, N} -> + verbose("Got mnesia_down from ~p, reconfiguring...~n", [N]), + reconfigure_coordinators(N, gb_trees:to_list(Coordinators)), + + Tids = gb_trees:keys(Participants), + reconfigure_participants(N, gb_trees:values(Participants)), + NewState = clear_fixtable(N, State), + mnesia_monitor:mnesia_down(?MODULE, {N, Tids}), + doit_loop(NewState); + + {From, {unblock_me, Tab}} -> + case lists:member(Tab, State#state.blocked_tabs) of + false -> + verbose("Wrong dirty Op blocked on ~p ~p ~p", + [node(), Tab, From]), + reply(From, unblocked), + doit_loop(State); + true -> + Item = {Tab, unblock_me, From}, + State2 = State#state{dirty_queue = [Item | State#state.dirty_queue]}, + doit_loop(State2) + end; + + {From, {block_tab, Tab}} -> + State2 = State#state{blocked_tabs = [Tab | State#state.blocked_tabs]}, + reply(From, ok, State2); + + {From, {unblock_tab, Tab}} -> + BlockedTabs2 = State#state.blocked_tabs -- [Tab], + case lists:member(Tab, BlockedTabs2) of + false -> + mnesia_controller:unblock_table(Tab), + Queue = process_dirty_queue(Tab, State#state.dirty_queue), + State2 = State#state{blocked_tabs = BlockedTabs2, + dirty_queue = Queue}, + reply(From, ok, State2); + true -> + State2 = State#state{blocked_tabs = BlockedTabs2}, + reply(From, ok, State2) + end; + + {From, {prepare_checkpoint, Cp}} -> + Res = mnesia_checkpoint:tm_prepare(Cp), + case Res of + {ok, _Name, IgnoreNew, _Node} -> + prepare_pending_coordinators(gb_trees:to_list(Coordinators), IgnoreNew), + prepare_pending_participants(gb_trees:values(Participants), IgnoreNew); + {error, _Reason} -> + ignore + end, + reply(From, Res, State); + {From, {fixtable, [Tab,Lock,Requester]}} -> + case ?catch_val({Tab, storage_type}) of + {'EXIT', _} -> + reply(From, error, State); + Storage -> + mnesia_lib:db_fixtable(Storage,Tab,Lock), + NewState = manage_fixtable(Tab,Lock,Requester,State), + reply(From, node(), NewState) + end; + + {system, From, Msg} -> + dbg_out("~p got {system, ~p, ~p}~n", [?MODULE, From, Msg]), + sys:handle_system_msg(Msg, From, Sup, ?MODULE, [], State); + + Msg -> + verbose("** ERROR ** ~p got unexpected message: ~p~n", [?MODULE, Msg]), + doit_loop(State) + end. + +do_sync_dirty(From, Tid, Commit, _Tab) -> + ?eval_debug_fun({?MODULE, sync_dirty, pre}, [{tid, Tid}]), + Res = (catch do_dirty(Tid, Commit)), + ?eval_debug_fun({?MODULE, sync_dirty, post}, [{tid, Tid}]), + From ! {?MODULE, node(), {dirty_res, Res}}. + +do_async_dirty(Tid, Commit, _Tab) -> + ?eval_debug_fun({?MODULE, async_dirty, pre}, [{tid, Tid}]), + catch do_dirty(Tid, Commit), + ?eval_debug_fun({?MODULE, async_dirty, post}, [{tid, Tid}]). + + +%% Process items in fifo order +process_dirty_queue(Tab, [Item | Queue]) -> + Queue2 = process_dirty_queue(Tab, Queue), + case Item of + {async_dirty, Tid, Commit, Tab} -> + do_async_dirty(Tid, Commit, Tab), + Queue2; + {sync_dirty, From, Tid, Commit, Tab} -> + do_sync_dirty(From, Tid, Commit, Tab), + Queue2; + {Tab, unblock_me, From} -> + reply(From, unblocked), + Queue2; + _ -> + [Item | Queue2] + end; +process_dirty_queue(_Tab, []) -> + []. + +prepare_pending_coordinators([{Tid, [Store | _Etabs]} | Coords], IgnoreNew) -> + case catch ?ets_lookup(Store, pending) of + [] -> + prepare_pending_coordinators(Coords, IgnoreNew); + [Pending] -> + case lists:member(Tid, IgnoreNew) of + false -> + mnesia_checkpoint:tm_enter_pending(Pending); + true -> + ignore + end, + prepare_pending_coordinators(Coords, IgnoreNew); + {'EXIT', _} -> + prepare_pending_coordinators(Coords, IgnoreNew) + end; +prepare_pending_coordinators([], _IgnoreNew) -> + ok. + +prepare_pending_participants([Part | Parts], IgnoreNew) -> + Tid = Part#participant.tid, + D = Part#participant.disc_nodes, + R = Part#participant.ram_nodes, + case lists:member(Tid, IgnoreNew) of + false -> + mnesia_checkpoint:tm_enter_pending(Tid, D, R); + true -> + ignore + end, + prepare_pending_participants(Parts, IgnoreNew); +prepare_pending_participants([], _IgnoreNew) -> + ok. + +handle_exit(Pid, _Reason, State) when node(Pid) /= node() -> + %% We got exit from a remote fool + doit_loop(State); + +handle_exit(Pid, _Reason, State) when Pid == State#state.supervisor -> + %% Our supervisor has died, time to stop + do_stop(State); + +handle_exit(Pid, Reason, State) -> + %% Check if it is a coordinator + case pid_search_delete(Pid, gb_trees:to_list(State#state.coordinators)) of + {none, _} -> + %% Check if it is a participant + Ps = gb_trees:values(State#state.participants), + case mnesia_lib:key_search_delete(Pid,#participant.pid,Ps) of + {none, _} -> + %% We got exit from a local fool + doit_loop(State); + {P = #participant{}, _RestP} -> + fatal("Participant ~p in transaction ~p died ~p~n", + [P#participant.pid, P#participant.tid, Reason]), + NewPs = gb_trees:delete(P#participant.tid,State#state.participants), + doit_loop(State#state{participants = NewPs}) + end; + + {{Tid, Etabs}, RestC} -> + %% A local coordinator has died and + %% we must determine the outcome of the + %% transaction and tell mnesia_tm on the + %% other nodes about it and then recover + %% locally. + recover_coordinator(Tid, Etabs), + doit_loop(State#state{coordinators = RestC}) + end. + +recover_coordinator(Tid, Etabs) -> + verbose("Coordinator ~p in transaction ~p died.~n", [Tid#tid.pid, Tid]), + + Store = hd(Etabs), + CheckNodes = get_elements(nodes,Store), + TellNodes = CheckNodes -- [node()], + case catch arrange(Tid, Store, async) of + {'EXIT', Reason} -> + dbg_out("Recovery of coordinator ~p failed:~n", [Tid, Reason]), + Protocol = asym_trans, + tell_outcome(Tid, Protocol, node(), CheckNodes, TellNodes); + {_N, Prep} -> + %% Tell the participants about the outcome + Protocol = Prep#prep.protocol, + Outcome = tell_outcome(Tid, Protocol, node(), CheckNodes, TellNodes), + + %% Recover locally + CR = Prep#prep.records, + {DiscNs, RamNs} = commit_nodes(CR, [], []), + case lists:keysearch(node(), #commit.node, CR) of + {value, Local} -> + ?eval_debug_fun({?MODULE, recover_coordinator, pre}, + [{tid, Tid}, {outcome, Outcome}, {prot, Protocol}]), + recover_coordinator(Tid, Protocol, Outcome, Local, DiscNs, RamNs), + ?eval_debug_fun({?MODULE, recover_coordinator, post}, + [{tid, Tid}, {outcome, Outcome}, {prot, Protocol}]); + false -> %% When killed before store havn't been copied to + ok %% to the new nested trans store. + end + end, + erase_ets_tabs(Etabs), + transaction_terminated(Tid), + mnesia_locker:release_tid(Tid). + +recover_coordinator(Tid, sym_trans, committed, Local, _, _) -> + mnesia_recover:note_decision(Tid, committed), + do_dirty(Tid, Local); +recover_coordinator(Tid, sym_trans, aborted, _Local, _, _) -> + mnesia_recover:note_decision(Tid, aborted); +recover_coordinator(Tid, sync_sym_trans, committed, Local, _, _) -> + mnesia_recover:note_decision(Tid, committed), + do_dirty(Tid, Local); +recover_coordinator(Tid, sync_sym_trans, aborted, _Local, _, _) -> + mnesia_recover:note_decision(Tid, aborted); + +recover_coordinator(Tid, asym_trans, committed, Local, DiscNs, RamNs) -> + D = #decision{tid = Tid, outcome = committed, + disc_nodes = DiscNs, ram_nodes = RamNs}, + mnesia_recover:log_decision(D), + do_commit(Tid, Local); +recover_coordinator(Tid, asym_trans, aborted, Local, DiscNs, RamNs) -> + D = #decision{tid = Tid, outcome = aborted, + disc_nodes = DiscNs, ram_nodes = RamNs}, + mnesia_recover:log_decision(D), + do_abort(Tid, Local). + +restore_stores(Coords, Tid, Store) -> + Etstabs = gb_trees:get(Tid,Coords), + Remaining = lists:delete(Store, Etstabs), + erase_ets_tabs(Remaining), + gb_trees:update(Tid,[Store],Coords). + +add_coord_store(Coords, Tid, Etab) -> + Stores = gb_trees:get(Tid, Coords), + gb_trees:update(Tid, [Etab|Stores], Coords). + +del_coord_store(Coords, Tid, Current, Obsolete) -> + Stores = gb_trees:get(Tid, Coords), + Rest = + case Stores of + [Obsolete, Current | Tail] -> Tail; + [Current, Obsolete | Tail] -> Tail + end, + ?ets_delete_table(Obsolete), + gb_trees:update(Tid, [Current|Rest], Coords). + +erase_ets_tabs([H | T]) -> + ?ets_delete_table(H), + erase_ets_tabs(T); +erase_ets_tabs([]) -> + ok. + +%% Clear one transactions all fixtables +clear_fixtable([Store|_]) -> + Fixed = get_elements(fixtable, Store), + lists:foreach(fun({Tab,Node}) -> + rpc:cast(Node, ?MODULE, fixtable, [Tab,false,self()]) + end, Fixed). + +%% Clear all fixtable Node have done +clear_fixtable(Node, State=#state{fixed_tabs = FT0}) -> + case mnesia_lib:key_search_delete(Node, 1, FT0) of + {none, _Ft} -> + State; + {{Node,Tabs},FT} -> + lists:foreach( + fun(Tab) -> + case ?catch_val({Tab, storage_type}) of + {'EXIT', _} -> + ignore; + Storage -> + mnesia_lib:db_fixtable(Storage,Tab,false) + end + end, Tabs), + State#state{fixed_tabs=FT} + end. + +manage_fixtable(Tab,true,Requester,State=#state{fixed_tabs = FT0}) -> + Node = node(Requester), + case mnesia_lib:key_search_delete(Node, 1, FT0) of + {none, FT}-> + State#state{fixed_tabs=[{Node, [Tab]}|FT]}; + {{Node,Tabs},FT} -> + State#state{fixed_tabs=[{Node, [Tab|Tabs]}|FT]} + end; +manage_fixtable(Tab,false,Requester,State = #state{fixed_tabs = FT0}) -> + Node = node(Requester), + case mnesia_lib:key_search_delete(Node, 1, FT0) of + {none,_FT} -> State; % Hmm? Safeguard + {{Node, Tabs0},FT} -> + case lists:delete(Tab, Tabs0) of + [] -> State#state{fixed_tabs=FT}; + Tabs -> State#state{fixed_tabs=[{Node,Tabs}|FT]} + end + end. + +%% Deletes a pid from a list of participants +%% or from a gb_trees of coordinators +%% {none, All} or {Tr, Rest} +pid_search_delete(Pid, Trs) -> + pid_search_delete(Pid, Trs, none, []). +pid_search_delete(Pid, [Tr = {Tid, _Ts} | Trs], _Val, Ack) when Tid#tid.pid == Pid -> + pid_search_delete(Pid, Trs, Tr, Ack); +pid_search_delete(Pid, [Tr | Trs], Val, Ack) -> + pid_search_delete(Pid, Trs, Val, [Tr | Ack]); + +pid_search_delete(_Pid, [], Val, Ack) -> + {Val, gb_trees:from_orddict(lists:reverse(Ack))}. + +transaction_terminated(Tid) -> + mnesia_checkpoint:tm_exit_pending(Tid), + Pid = Tid#tid.pid, + if + node(Pid) == node() -> + unlink(Pid); + true -> %% Do the Lamport thing here + mnesia_recover:sync_trans_tid_serial(Tid) + end. + +%% If there are an surrounding transaction, we inherit it's context +non_transaction(OldState={_,_,Trans}, Fun, Args, ActivityKind, Mod) + when Trans /= non_transaction -> + Kind = case ActivityKind of + sync_dirty -> sync; + _ -> async + end, + case transaction(OldState, Fun, Args, infinity, Mod, Kind) of + {atomic, Res} -> + Res; + {aborted,Res} -> + exit(Res) + end; +non_transaction(OldState, Fun, Args, ActivityKind, Mod) -> + Id = {ActivityKind, self()}, + NewState = {Mod, Id, non_transaction}, + put(mnesia_activity_state, NewState), + %% I Want something uniqe here, references are expensive + Ref = mNeSia_nOn_TrAnSacTioN, + RefRes = (catch {Ref, apply(Fun, Args)}), + case OldState of + undefined -> erase(mnesia_activity_state); + _ -> put(mnesia_activity_state, OldState) + end, + case RefRes of + {Ref, Res} -> + case Res of + {'EXIT', Reason} -> exit(Reason); + {aborted, Reason} -> mnesia:abort(Reason); + _ -> Res + end; + {'EXIT', Reason} -> + exit(Reason); + Throw -> + throw(Throw) + end. + +transaction(OldTidTs, Fun, Args, Retries, Mod, Type) -> + Factor = 1, + case OldTidTs of + undefined -> % Outer + execute_outer(Mod, Fun, Args, Factor, Retries, Type); + {_, _, non_transaction} -> % Transaction inside ?sync_dirty + Res = execute_outer(Mod, Fun, Args, Factor, Retries, Type), + put(mnesia_activity_state, OldTidTs), + Res; + {OldMod, Tid, Ts} -> % Nested + execute_inner(Mod, Tid, OldMod, Ts, Fun, Args, Factor, Retries, Type); + _ -> % Bad nesting + {aborted, nested_transaction} + end. + +execute_outer(Mod, Fun, Args, Factor, Retries, Type) -> + case req(start_outer) of + {error, Reason} -> + {aborted, Reason}; + {new_tid, Tid, Store} -> + Ts = #tidstore{store = Store}, + NewTidTs = {Mod, Tid, Ts}, + put(mnesia_activity_state, NewTidTs), + execute_transaction(Fun, Args, Factor, Retries, Type) + end. + +execute_inner(Mod, Tid, OldMod, Ts, Fun, Args, Factor, Retries, Type) -> + case req({add_store, Tid}) of + {error, Reason} -> + {aborted, Reason}; + {new_store, Ets} -> + copy_ets(Ts#tidstore.store, Ets), + Up = [{OldMod,Ts#tidstore.store} | Ts#tidstore.up_stores], + NewTs = Ts#tidstore{level = 1 + Ts#tidstore.level, + store = Ets, + up_stores = Up}, + NewTidTs = {Mod, Tid, NewTs}, + put(mnesia_activity_state, NewTidTs), + execute_transaction(Fun, Args, Factor, Retries, Type) + end. + +copy_ets(From, To) -> + do_copy_ets(?ets_first(From), From, To). +do_copy_ets('$end_of_table', _,_) -> + ok; +do_copy_ets(K, From, To) -> + Objs = ?ets_lookup(From, K), + insert_objs(Objs, To), + do_copy_ets(?ets_next(From, K), From, To). + +insert_objs([H|T], Tab) -> + ?ets_insert(Tab, H), + insert_objs(T, Tab); +insert_objs([], _Tab) -> + ok. + +execute_transaction(Fun, Args, Factor, Retries, Type) -> + case catch apply_fun(Fun, Args, Type) of + {'EXIT', Reason} -> + check_exit(Fun, Args, Factor, Retries, Reason, Type); + {atomic, Value} -> + mnesia_lib:incr_counter(trans_commits), + erase(mnesia_activity_state), + %% no need to clear locks, already done by commit ... + %% Flush any un processed mnesia_down messages we might have + flush_downs(), + catch unlink(whereis(?MODULE)), + {atomic, Value}; + {nested_atomic, Value} -> + mnesia_lib:incr_counter(trans_commits), + {atomic, Value}; + Value -> %% User called throw + Reason = {aborted, {throw, Value}}, + return_abort(Fun, Args, Reason) + end. + +apply_fun(Fun, Args, Type) -> + Result = apply(Fun, Args), + case t_commit(Type) of + do_commit -> + {atomic, Result}; + do_commit_nested -> + {nested_atomic, Result}; + {do_abort, {aborted, Reason}} -> + {'EXIT', {aborted, Reason}}; + {do_abort, Reason} -> + {'EXIT', {aborted, Reason}} + end. + +check_exit(Fun, Args, Factor, Retries, Reason, Type) -> + case Reason of + {aborted, C = #cyclic{}} -> + maybe_restart(Fun, Args, Factor, Retries, Type, C); + {aborted, {node_not_running, N}} -> + maybe_restart(Fun, Args, Factor, Retries, Type, {node_not_running, N}); + {aborted, {bad_commit, N}} -> + maybe_restart(Fun, Args, Factor, Retries, Type, {bad_commit, N}); + _ -> + return_abort(Fun, Args, Reason) + end. + +maybe_restart(Fun, Args, Factor, Retries, Type, Why) -> + {Mod, Tid, Ts} = get(mnesia_activity_state), + case try_again(Retries) of + yes when Ts#tidstore.level == 1 -> + restart(Mod, Tid, Ts, Fun, Args, Factor, Retries, Type, Why); + yes -> + return_abort(Fun, Args, Why); + no -> + return_abort(Fun, Args, {aborted, nomore}) + end. + +try_again(infinity) -> yes; +try_again(X) when is_number(X) , X > 1 -> yes; +try_again(_) -> no. + +%% We can only restart toplevel transactions. +%% If a deadlock situation occurs in a nested transaction +%% The whole thing including all nested transactions need to be +%% restarted. The stack is thus popped by a consequtive series of +%% exit({aborted, #cyclic{}}) calls + +restart(Mod, Tid, Ts, Fun, Args, Factor0, Retries0, Type, Why) -> + mnesia_lib:incr_counter(trans_restarts), + Retries = decr(Retries0), + case Why of + {bad_commit, _N} -> + return_abort(Fun, Args, Why), + Factor = 1, + SleepTime = mnesia_lib:random_time(Factor, Tid#tid.counter), + dbg_out("Restarting transaction ~w: in ~wms ~w~n", [Tid, SleepTime, Why]), + timer:sleep(SleepTime), + execute_outer(Mod, Fun, Args, Factor, Retries, Type); + {node_not_running, _N} -> %% Avoids hanging in receive_release_tid_ack + return_abort(Fun, Args, Why), + Factor = 1, + SleepTime = mnesia_lib:random_time(Factor, Tid#tid.counter), + dbg_out("Restarting transaction ~w: in ~wms ~w~n", [Tid, SleepTime, Why]), + timer:sleep(SleepTime), + execute_outer(Mod, Fun, Args, Factor, Retries, Type); + _ -> + SleepTime = mnesia_lib:random_time(Factor0, Tid#tid.counter), + dbg_out("Restarting transaction ~w: in ~wms ~w~n", [Tid, SleepTime, Why]), + + if + Factor0 /= 10 -> + ignore; + true -> + %% Our serial may be much larger than other nodes ditto + AllNodes = val({current, db_nodes}), + verbose("Sync serial ~p~n", [Tid]), + rpc:abcast(AllNodes, ?MODULE, {sync_trans_serial, Tid}) + end, + intercept_friends(Tid, Ts), + Store = Ts#tidstore.store, + Nodes = get_elements(nodes,Store), + ?MODULE ! {self(), {restart, Tid, Store}}, + mnesia_locker:send_release_tid(Nodes, Tid), + timer:sleep(SleepTime), + mnesia_locker:receive_release_tid_acc(Nodes, Tid), + case get_restarted(Tid) of + {restarted, Tid} -> + execute_transaction(Fun, Args, Factor0 + 1, + Retries, Type); + {error, Reason} -> + mnesia:abort(Reason) + end + end. + +get_restarted(Tid) -> + case Res = rec() of + {restarted, Tid} -> + Res; + {error,_} -> + Res; + _ -> %% We could get a couple of aborts to many. + get_restarted(Tid) + end. + +decr(infinity) -> infinity; +decr(X) when is_integer(X), X > 1 -> X - 1; +decr(_X) -> 0. + +return_abort(Fun, Args, Reason) -> + {_Mod, Tid, Ts} = get(mnesia_activity_state), + dbg_out("Transaction ~p calling ~p with ~p failed: ~n ~p~n", + [Tid, Fun, Args, Reason]), + OldStore = Ts#tidstore.store, + Nodes = get_elements(nodes, OldStore), + intercept_friends(Tid, Ts), + catch mnesia_lib:incr_counter(trans_failures), + Level = Ts#tidstore.level, + if + Level == 1 -> + mnesia_locker:async_release_tid(Nodes, Tid), + ?MODULE ! {delete_transaction, Tid}, + erase(mnesia_activity_state), + flush_downs(), + catch unlink(whereis(?MODULE)), + {aborted, mnesia_lib:fix_error(Reason)}; + true -> + %% Nested transaction + [{OldMod,NewStore} | Tail] = Ts#tidstore.up_stores, + req({del_store, Tid, NewStore, OldStore, true}), + Ts2 = Ts#tidstore{store = NewStore, + up_stores = Tail, + level = Level - 1}, + NewTidTs = {OldMod, Tid, Ts2}, + put(mnesia_activity_state, NewTidTs), + case Reason of + #cyclic{} -> + exit({aborted, Reason}); + {node_not_running, _N} -> + exit({aborted, Reason}); + {bad_commit, _N}-> + exit({aborted, Reason}); + _ -> + {aborted, mnesia_lib:fix_error(Reason)} + end + end. + +flush_downs() -> + receive + {?MODULE, _, _} -> flush_downs(); % Votes + {mnesia_down, _} -> flush_downs() + after 0 -> flushed + end. + + +put_activity_id(MTT) -> + put_activity_id(MTT, undefined). +put_activity_id(undefined,_) -> + erase_activity_id(); +put_activity_id({Mod, Tid = #tid{}, Ts = #tidstore{}},Fun) -> + flush_downs(), + Store = Ts#tidstore.store, + if + is_function(Fun) -> + ?ets_insert(Store, {friends, {stop,Fun}}); + true -> + ?ets_insert(Store, {friends, self()}) + end, + NewTidTs = {Mod, Tid, Ts}, + put(mnesia_activity_state, NewTidTs); +put_activity_id(SimpleState,_) -> + put(mnesia_activity_state, SimpleState). + +erase_activity_id() -> + flush_downs(), + erase(mnesia_activity_state). + +get_elements(Type,Store) -> + case catch ?ets_lookup(Store, Type) of + [] -> []; + [{_,Val}] -> [Val]; + {'EXIT', _} -> []; + Vals -> [Val|| {_,Val} <- Vals] + end. + +opt_propagate_store(_Current, _Obsolete, false) -> + ok; +opt_propagate_store(Current, Obsolete, true) -> + propagate_store(Current, nodes, get_elements(nodes,Obsolete)), + propagate_store(Current, fixtable, get_elements(fixtable,Obsolete)), + propagate_store(Current, friends, get_elements(friends, Obsolete)). + +propagate_store(Store, Var, [Val | Vals]) -> + ?ets_insert(Store, {Var, Val}), + propagate_store(Store, Var, Vals); +propagate_store(_Store, _Var, []) -> + ok. + +%% Tell all processes that are cooperating with the current transaction +intercept_friends(_Tid, Ts) -> + Friends = get_elements(friends,Ts#tidstore.store), + intercept_best_friend(Friends, false). + +intercept_best_friend([],_) -> ok; +intercept_best_friend([{stop,Fun} | R],Ignore) -> + catch Fun(), + intercept_best_friend(R,Ignore); +intercept_best_friend([Pid | R],false) -> + Pid ! {activity_ended, undefined, self()}, + wait_for_best_friend(Pid, 0), + intercept_best_friend(R,true); +intercept_best_friend([_|R],true) -> + intercept_best_friend(R,true). + +wait_for_best_friend(Pid, Timeout) -> + receive + {'EXIT', Pid, _} -> ok; + {activity_ended, _, Pid} -> ok + after Timeout -> + case my_process_is_alive(Pid) of + true -> wait_for_best_friend(Pid, 1000); + false -> ok + end + end. + +my_process_is_alive(Pid) -> + case catch erlang:is_process_alive(Pid) of % New BIF in R5 + true -> + true; + false -> + false; + {'EXIT', _} -> % Pre R5 backward compatibility + case process_info(Pid, message_queue_len) of + undefined -> false; + _ -> true + end + end. + +dirty(Protocol, Item) -> + {{Tab, Key}, _Val, _Op} = Item, + Tid = {dirty, self()}, + Prep = prepare_items(Tid, Tab, Key, [Item], #prep{protocol= Protocol}), + CR = Prep#prep.records, + case Protocol of + async_dirty -> + %% Send commit records to the other involved nodes, + %% but do only wait for one node to complete. + %% Preferrably, the local node if possible. + + ReadNode = val({Tab, where_to_read}), + {WaitFor, FirstRes} = async_send_dirty(Tid, CR, Tab, ReadNode), + rec_dirty(WaitFor, FirstRes); + + sync_dirty -> + %% Send commit records to the other involved nodes, + %% and wait for all nodes to complete + {WaitFor, FirstRes} = sync_send_dirty(Tid, CR, Tab, []), + rec_dirty(WaitFor, FirstRes); + _ -> + mnesia:abort({bad_activity, Protocol}) + end. + +%% This is the commit function, The first thing it does, +%% is to find out which nodes that have been participating +%% in this particular transaction, all of the mnesia_locker:lock* +%% functions insert the names of the nodes where it aquires locks +%% into the local shadow Store +%% This function exacutes in the context of the user process +t_commit(Type) -> + {_Mod, Tid, Ts} = get(mnesia_activity_state), + Store = Ts#tidstore.store, + if + Ts#tidstore.level == 1 -> + intercept_friends(Tid, Ts), + %% N is number of updates + case arrange(Tid, Store, Type) of + {N, Prep} when N > 0 -> + multi_commit(Prep#prep.protocol, + Tid, Prep#prep.records, Store); + {0, Prep} -> + multi_commit(read_only, Tid, Prep#prep.records, Store) + end; + true -> + %% nested commit + Level = Ts#tidstore.level, + [{OldMod,Obsolete} | Tail] = Ts#tidstore.up_stores, + req({del_store, Tid, Store, Obsolete, false}), + NewTs = Ts#tidstore{store = Store, + up_stores = Tail, + level = Level - 1}, + NewTidTs = {OldMod, Tid, NewTs}, + put(mnesia_activity_state, NewTidTs), + do_commit_nested + end. + +%% This function arranges for all objects we shall write in S to be +%% in a list of {Node, CommitRecord} +%% Important function for the performance of mnesia. + +arrange(Tid, Store, Type) -> + %% The local node is always included + Nodes = get_elements(nodes,Store), + Recs = prep_recs(Nodes, []), + Key = ?ets_first(Store), + N = 0, + Prep = + case Type of + async -> #prep{protocol = sym_trans, records = Recs}; + sync -> #prep{protocol = sync_sym_trans, records = Recs} + end, + case catch do_arrange(Tid, Store, Key, Prep, N) of + {'EXIT', Reason} -> + dbg_out("do_arrange failed ~p ~p~n", [Reason, Tid]), + case Reason of + {aborted, R} -> + mnesia:abort(R); + _ -> + mnesia:abort(Reason) + end; + {New, Prepared} -> + {New, Prepared#prep{records = reverse(Prepared#prep.records)}} + end. + +reverse([]) -> + []; +reverse([H=#commit{ram_copies=Ram, disc_copies=DC, + disc_only_copies=DOC,snmp = Snmp} + |R]) -> + [ + H#commit{ + ram_copies = lists:reverse(Ram), + disc_copies = lists:reverse(DC), + disc_only_copies = lists:reverse(DOC), + snmp = lists:reverse(Snmp) + } + | reverse(R)]. + +prep_recs([N | Nodes], Recs) -> + prep_recs(Nodes, [#commit{decision = presume_commit, node = N} | Recs]); +prep_recs([], Recs) -> + Recs. + +%% storage_types is a list of {Node, Storage} tuples +%% where each tuple represents an active replica +do_arrange(Tid, Store, {Tab, Key}, Prep, N) -> + Oid = {Tab, Key}, + Items = ?ets_lookup(Store, Oid), %% Store is a bag + P2 = prepare_items(Tid, Tab, Key, Items, Prep), + do_arrange(Tid, Store, ?ets_next(Store, Oid), P2, N + 1); +do_arrange(Tid, Store, SchemaKey, Prep, N) when SchemaKey == op -> + Items = ?ets_lookup(Store, SchemaKey), %% Store is a bag + P2 = prepare_schema_items(Tid, Items, Prep), + do_arrange(Tid, Store, ?ets_next(Store, SchemaKey), P2, N + 1); +do_arrange(Tid, Store, RestoreKey, Prep, N) when RestoreKey == restore_op -> + [{restore_op, R}] = ?ets_lookup(Store, RestoreKey), + Fun = fun({Tab, Key}, CommitRecs, _RecName, Where, Snmp) -> + Item = [{{Tab, Key}, {Tab, Key}, delete}], + do_prepare_items(Tid, Tab, Key, Where, Snmp, Item, CommitRecs); + (BupRec, CommitRecs, RecName, Where, Snmp) -> + Tab = element(1, BupRec), + Key = element(2, BupRec), + Item = + if + Tab == RecName -> + [{{Tab, Key}, BupRec, write}]; + true -> + BupRec2 = setelement(1, BupRec, RecName), + [{{Tab, Key}, BupRec2, write}] + end, + do_prepare_items(Tid, Tab, Key, Where, Snmp, Item, CommitRecs) + end, + Recs2 = mnesia_schema:arrange_restore(R, Fun, Prep#prep.records), + P2 = Prep#prep{protocol = asym_trans, records = Recs2}, + do_arrange(Tid, Store, ?ets_next(Store, RestoreKey), P2, N + 1); +do_arrange(_Tid, _Store, '$end_of_table', Prep, N) -> + {N, Prep}; +do_arrange(Tid, Store, IgnoredKey, Prep, N) -> %% locks, nodes ... local atoms... + do_arrange(Tid, Store, ?ets_next(Store, IgnoredKey), Prep, N). + +%% Returns a prep record with all items in reverse order +prepare_schema_items(Tid, Items, Prep) -> + Types = [{N, schema_ops} || N <- val({current, db_nodes})], + Recs = prepare_nodes(Tid, Types, Items, Prep#prep.records, schema), + Prep#prep{protocol = asym_trans, records = Recs}. + +%% Returns a prep record with all items in reverse order +prepare_items(Tid, Tab, Key, Items, Prep) when Prep#prep.prev_tab == Tab -> + Types = Prep#prep.prev_types, + Snmp = Prep#prep.prev_snmp, + Recs = Prep#prep.records, + Recs2 = do_prepare_items(Tid, Tab, Key, Types, Snmp, Items, Recs), + Prep#prep{records = Recs2}; + +prepare_items(Tid, Tab, Key, Items, Prep) -> + Types = val({Tab, where_to_commit}), + case Types of + [] -> mnesia:abort({no_exists, Tab}); + {blocked, _} -> + unblocked = req({unblock_me, Tab}), + prepare_items(Tid, Tab, Key, Items, Prep); + _ -> + Snmp = val({Tab, snmp}), + Recs2 = do_prepare_items(Tid, Tab, Key, Types, + Snmp, Items, Prep#prep.records), + Prep2 = Prep#prep{records = Recs2, prev_tab = Tab, + prev_types = Types, prev_snmp = Snmp}, + check_prep(Prep2, Types) + end. + +do_prepare_items(Tid, Tab, Key, Types, Snmp, Items, Recs) -> + Recs2 = prepare_snmp(Tid, Tab, Key, Types, Snmp, Items, Recs), % May exit + prepare_nodes(Tid, Types, Items, Recs2, normal). + +prepare_snmp(Tab, Key, Items) -> + case val({Tab, snmp}) of + [] -> + []; + Ustruct when Key /= '_' -> + {_Oid, _Val, Op} = hd(Items), + %% Still making snmp oid (not used) because we want to catch errors here + %% And also it keeps backwards comp. with old nodes. + SnmpOid = mnesia_snmp_hook:key_to_oid(Tab, Key, Ustruct), % May exit + [{Op, Tab, Key, SnmpOid}]; + _ -> + [{clear_table, Tab}] + end. + +prepare_snmp(_Tid, _Tab, _Key, _Types, [], _Items, Recs) -> + Recs; + +prepare_snmp(Tid, Tab, Key, Types, Us, Items, Recs) -> + if Key /= '_' -> + {_Oid, _Val, Op} = hd(Items), + SnmpOid = mnesia_snmp_hook:key_to_oid(Tab, Key, Us), % May exit + prepare_nodes(Tid, Types, [{Op, Tab, Key, SnmpOid}], Recs, snmp); + Key == '_' -> + prepare_nodes(Tid, Types, [{clear_table, Tab}], Recs, snmp) + end. + +check_prep(Prep, Types) when Prep#prep.types == Types -> + Prep; +check_prep(Prep, Types) when Prep#prep.types == undefined -> + Prep#prep{types = Types}; +check_prep(Prep, _Types) -> + Prep#prep{protocol = asym_trans}. + +%% Returns a list of commit records +prepare_nodes(Tid, [{Node, Storage} | Rest], Items, C, Kind) -> + {Rec, C2} = pick_node(Tid, Node, C, []), + Rec2 = prepare_node(Node, Storage, Items, Rec, Kind), + [Rec2 | prepare_nodes(Tid, Rest, Items, C2, Kind)]; +prepare_nodes(_Tid, [], _Items, CommitRecords, _Kind) -> + CommitRecords. + +pick_node(Tid, Node, [Rec | Rest], Done) -> + if + Rec#commit.node == Node -> + {Rec, Done ++ Rest}; + true -> + pick_node(Tid, Node, Rest, [Rec | Done]) + end; +pick_node({dirty,_}, Node, [], Done) -> + {#commit{decision = presume_commit, node = Node}, Done}; +pick_node(_Tid, Node, [], _Done) -> + mnesia:abort({bad_commit, {missing_lock, Node}}). + +prepare_node(Node, Storage, [Item | Items], Rec, Kind) when Kind == snmp -> + Rec2 = Rec#commit{snmp = [Item | Rec#commit.snmp]}, + prepare_node(Node, Storage, Items, Rec2, Kind); +prepare_node(Node, Storage, [Item | Items], Rec, Kind) when Kind /= schema -> + Rec2 = + case Storage of + ram_copies -> + Rec#commit{ram_copies = [Item | Rec#commit.ram_copies]}; + disc_copies -> + Rec#commit{disc_copies = [Item | Rec#commit.disc_copies]}; + disc_only_copies -> + Rec#commit{disc_only_copies = + [Item | Rec#commit.disc_only_copies]} + end, + prepare_node(Node, Storage, Items, Rec2, Kind); +prepare_node(_Node, _Storage, Items, Rec, Kind) + when Kind == schema, Rec#commit.schema_ops == [] -> + Rec#commit{schema_ops = Items}; +prepare_node(_Node, _Storage, [], Rec, _Kind) -> + Rec. + +%% multi_commit((Protocol, Tid, CommitRecords, Store) +%% Local work is always performed in users process +multi_commit(read_only, Tid, CR, _Store) -> + %% This featherweight commit protocol is used when no + %% updates has been performed in the transaction. + + {DiscNs, RamNs} = commit_nodes(CR, [], []), + Msg = {Tid, simple_commit}, + rpc:abcast(DiscNs -- [node()], ?MODULE, Msg), + rpc:abcast(RamNs -- [node()], ?MODULE, Msg), + mnesia_recover:note_decision(Tid, committed), + mnesia_locker:release_tid(Tid), + ?MODULE ! {delete_transaction, Tid}, + do_commit; + +multi_commit(sym_trans, Tid, CR, Store) -> + %% This lightweight commit protocol is used when all + %% the involved tables are replicated symetrically. + %% Their storage types must match on each node. + %% + %% 1 Ask the other involved nodes if they want to commit + %% All involved nodes votes yes if they are up + %% 2a Somebody has voted no + %% Tell all yes voters to do_abort + %% 2b Everybody has voted yes + %% Tell everybody to do_commit. I.e. that they should + %% prepare the commit, log the commit record and + %% perform the updates. + %% + %% The outcome is kept 3 minutes in the transient decision table. + %% + %% Recovery: + %% If somebody dies before the coordinator has + %% broadcasted do_commit, the transaction is aborted. + %% + %% If a participant dies, the table load algorithm + %% ensures that the contents of the involved tables + %% are picked from another node. + %% + %% If the coordinator dies, each participants checks + %% the outcome with all the others. If all are uncertain + %% about the outcome, the transaction is aborted. If + %% somebody knows the outcome the others will follow. + + {DiscNs, RamNs} = commit_nodes(CR, [], []), + Pending = mnesia_checkpoint:tm_enter_pending(Tid, DiscNs, RamNs), + ?ets_insert(Store, Pending), + + {WaitFor, Local} = ask_commit(sym_trans, Tid, CR, DiscNs, RamNs), + {Outcome, []} = rec_all(WaitFor, Tid, do_commit, []), + ?eval_debug_fun({?MODULE, multi_commit_sym}, + [{tid, Tid}, {outcome, Outcome}]), + rpc:abcast(DiscNs -- [node()], ?MODULE, {Tid, Outcome}), + rpc:abcast(RamNs -- [node()], ?MODULE, {Tid, Outcome}), + case Outcome of + do_commit -> + mnesia_recover:note_decision(Tid, committed), + do_dirty(Tid, Local), + mnesia_locker:release_tid(Tid), + ?MODULE ! {delete_transaction, Tid}; + {do_abort, _Reason} -> + mnesia_recover:note_decision(Tid, aborted) + end, + ?eval_debug_fun({?MODULE, multi_commit_sym, post}, + [{tid, Tid}, {outcome, Outcome}]), + Outcome; + +multi_commit(sync_sym_trans, Tid, CR, Store) -> + %% This protocol is the same as sym_trans except that it + %% uses syncronized calls to disk_log and syncronized commits + %% when several nodes are involved. + + {DiscNs, RamNs} = commit_nodes(CR, [], []), + Pending = mnesia_checkpoint:tm_enter_pending(Tid, DiscNs, RamNs), + ?ets_insert(Store, Pending), + + {WaitFor, Local} = ask_commit(sync_sym_trans, Tid, CR, DiscNs, RamNs), + {Outcome, []} = rec_all(WaitFor, Tid, do_commit, []), + ?eval_debug_fun({?MODULE, multi_commit_sym_sync}, + [{tid, Tid}, {outcome, Outcome}]), + rpc:abcast(DiscNs -- [node()], ?MODULE, {Tid, Outcome}), + rpc:abcast(RamNs -- [node()], ?MODULE, {Tid, Outcome}), + case Outcome of + do_commit -> + mnesia_recover:note_decision(Tid, committed), + mnesia_log:slog(Local), + do_commit(Tid, Local), + %% Just wait for completion result is ignore. + rec_all(WaitFor, Tid, ignore, []), + mnesia_locker:release_tid(Tid), + ?MODULE ! {delete_transaction, Tid}; + {do_abort, _Reason} -> + mnesia_recover:note_decision(Tid, aborted) + end, + ?eval_debug_fun({?MODULE, multi_commit_sym, post}, + [{tid, Tid}, {outcome, Outcome}]), + Outcome; + +multi_commit(asym_trans, Tid, CR, Store) -> + %% This more expensive commit protocol is used when + %% table definitions are changed (schema transactions). + %% It is also used when the involved tables are + %% replicated asymetrically. If the storage type differs + %% on at least one node this protocol is used. + %% + %% 1 Ask the other involved nodes if they want to commit. + %% All involved nodes prepares the commit, logs a presume_abort + %% commit record and votes yes or no depending of the + %% outcome of the prepare. The preparation is also performed + %% by the coordinator. + %% + %% 2a Somebody has died or voted no + %% Tell all yes voters to do_abort + %% 2b Everybody has voted yes + %% Put a unclear marker in the log. + %% Tell the others to pre_commit. I.e. that they should + %% put a unclear marker in the log and reply + %% acc_pre_commit when they are done. + %% + %% 3a Somebody died + %% Tell the remaining participants to do_abort + %% 3b Everybody has replied acc_pre_commit + %% Tell everybody to committed. I.e that they should + %% put a committed marker in the log, perform the updates + %% and reply done_commit when they are done. The coordinator + %% must wait with putting his committed marker inte the log + %% until the committed has been sent to all the others. + %% Then he performs local commit before collecting replies. + %% + %% 4 Everybody has either died or replied done_commit + %% Return to the caller. + %% + %% Recovery: + %% If the coordinator dies, the participants (and + %% the coordinator when he starts again) must do + %% the following: + %% + %% If we have no unclear marker in the log we may + %% safely abort, since we know that nobody may have + %% decided to commit yet. + %% + %% If we have a committed marker in the log we may + %% safely commit since we know that everybody else + %% also will come to this conclusion. + %% + %% If we have a unclear marker but no committed + %% in the log we are uncertain about the real outcome + %% of the transaction and must ask the others before + %% we can decide what to do. If someone knows the + %% outcome we will do the same. If nobody knows, we + %% will wait for the remaining involved nodes to come + %% up. When all involved nodes are up and uncertain, + %% we decide to commit (first put a committed marker + %% in the log, then do the updates). + + D = #decision{tid = Tid, outcome = presume_abort}, + {D2, CR2} = commit_decision(D, CR, [], []), + DiscNs = D2#decision.disc_nodes, + RamNs = D2#decision.ram_nodes, + Pending = mnesia_checkpoint:tm_enter_pending(Tid, DiscNs, RamNs), + ?ets_insert(Store, Pending), + {WaitFor, Local} = ask_commit(asym_trans, Tid, CR2, DiscNs, RamNs), + SchemaPrep = (catch mnesia_schema:prepare_commit(Tid, Local, {coord, WaitFor})), + {Votes, Pids} = rec_all(WaitFor, Tid, do_commit, []), + + ?eval_debug_fun({?MODULE, multi_commit_asym_got_votes}, + [{tid, Tid}, {votes, Votes}]), + case Votes of + do_commit -> + case SchemaPrep of + {_Modified, C = #commit{}, DumperMode} -> + mnesia_log:log(C), % C is not a binary + ?eval_debug_fun({?MODULE, multi_commit_asym_log_commit_rec}, + [{tid, Tid}]), + + D3 = C#commit.decision, + D4 = D3#decision{outcome = unclear}, + mnesia_recover:log_decision(D4), + ?eval_debug_fun({?MODULE, multi_commit_asym_log_commit_dec}, + [{tid, Tid}]), + tell_participants(Pids, {Tid, pre_commit}), + %% Now we are uncertain and we do not know + %% if all participants have logged that + %% they are uncertain or not + rec_acc_pre_commit(Pids, Tid, Store, {C,Local}, + do_commit, DumperMode, [], []); + {'EXIT', Reason} -> + %% The others have logged the commit + %% record but they are not uncertain + mnesia_recover:note_decision(Tid, aborted), + ?eval_debug_fun({?MODULE, multi_commit_asym_prepare_exit}, + [{tid, Tid}]), + tell_participants(Pids, {Tid, {do_abort, Reason}}), + do_abort(Tid, Local), + {do_abort, Reason} + end; + + {do_abort, Reason} -> + %% The others have logged the commit + %% record but they are not uncertain + mnesia_recover:note_decision(Tid, aborted), + ?eval_debug_fun({?MODULE, multi_commit_asym_do_abort}, [{tid, Tid}]), + tell_participants(Pids, {Tid, {do_abort, Reason}}), + do_abort(Tid, Local), + {do_abort, Reason} + end. + +%% Returns do_commit or {do_abort, Reason} +rec_acc_pre_commit([Pid | Tail], Tid, Store, Commit, Res, DumperMode, + GoodPids, SchemaAckPids) -> + receive + {?MODULE, _, {acc_pre_commit, Tid, Pid, true}} -> + rec_acc_pre_commit(Tail, Tid, Store, Commit, Res, DumperMode, + [Pid | GoodPids], [Pid | SchemaAckPids]); + + {?MODULE, _, {acc_pre_commit, Tid, Pid, false}} -> + rec_acc_pre_commit(Tail, Tid, Store, Commit, Res, DumperMode, + [Pid | GoodPids], SchemaAckPids); + + {?MODULE, _, {acc_pre_commit, Tid, Pid}} -> + %% Kept for backwards compatibility. Remove after Mnesia 4.x + rec_acc_pre_commit(Tail, Tid, Store, Commit, Res, DumperMode, + [Pid | GoodPids], [Pid | SchemaAckPids]); + {?MODULE, _, {do_abort, Tid, Pid, _Reason}} -> + AbortRes = {do_abort, {bad_commit, node(Pid)}}, + rec_acc_pre_commit(Tail, Tid, Store, Commit, AbortRes, DumperMode, + GoodPids, SchemaAckPids); + {mnesia_down, Node} when Node == node(Pid) -> + AbortRes = {do_abort, {bad_commit, Node}}, + catch Pid ! {Tid, AbortRes}, %% Tell him that he has died + rec_acc_pre_commit(Tail, Tid, Store, Commit, AbortRes, DumperMode, + GoodPids, SchemaAckPids) + end; +rec_acc_pre_commit([], Tid, Store, {Commit,OrigC}, Res, DumperMode, GoodPids, SchemaAckPids) -> + D = Commit#commit.decision, + case Res of + do_commit -> + %% Now everybody knows that the others + %% has voted yes. We also know that + %% everybody are uncertain. + prepare_sync_schema_commit(Store, SchemaAckPids), + tell_participants(GoodPids, {Tid, committed}), + D2 = D#decision{outcome = committed}, + mnesia_recover:log_decision(D2), + ?eval_debug_fun({?MODULE, rec_acc_pre_commit_log_commit}, + [{tid, Tid}]), + + %% Now we have safely logged committed + %% and we can recover without asking others + do_commit(Tid, Commit, DumperMode), + ?eval_debug_fun({?MODULE, rec_acc_pre_commit_done_commit}, + [{tid, Tid}]), + sync_schema_commit(Tid, Store, SchemaAckPids), + mnesia_locker:release_tid(Tid), + ?MODULE ! {delete_transaction, Tid}; + + {do_abort, Reason} -> + tell_participants(GoodPids, {Tid, {do_abort, Reason}}), + D2 = D#decision{outcome = aborted}, + mnesia_recover:log_decision(D2), + ?eval_debug_fun({?MODULE, rec_acc_pre_commit_log_abort}, + [{tid, Tid}]), + do_abort(Tid, OrigC), + ?eval_debug_fun({?MODULE, rec_acc_pre_commit_done_abort}, + [{tid, Tid}]) + end, + Res. + +%% Note all nodes in case of mnesia_down mgt +prepare_sync_schema_commit(_Store, []) -> + ok; +prepare_sync_schema_commit(Store, [Pid | Pids]) -> + ?ets_insert(Store, {waiting_for_commit_ack, node(Pid)}), + prepare_sync_schema_commit(Store, Pids). + +sync_schema_commit(_Tid, _Store, []) -> + ok; +sync_schema_commit(Tid, Store, [Pid | Tail]) -> + receive + {?MODULE, _, {schema_commit, Tid, Pid}} -> + ?ets_match_delete(Store, {waiting_for_commit_ack, node(Pid)}), + sync_schema_commit(Tid, Store, Tail); + + {mnesia_down, Node} when Node == node(Pid) -> + ?ets_match_delete(Store, {waiting_for_commit_ack, Node}), + sync_schema_commit(Tid, Store, Tail) + end. + +tell_participants([Pid | Pids], Msg) -> + Pid ! Msg, + tell_participants(Pids, Msg); +tell_participants([], _Msg) -> + ok. + +%% Trap exit because we can get a shutdown from application manager +commit_participant(Coord, Tid, Bin, DiscNs, RamNs) when is_binary(Bin) -> + process_flag(trap_exit, true), + Commit = binary_to_term(Bin), + commit_participant(Coord, Tid, Bin, Commit, DiscNs, RamNs); +commit_participant(Coord, Tid, C = #commit{}, DiscNs, RamNs) -> + process_flag(trap_exit, true), + commit_participant(Coord, Tid, C, C, DiscNs, RamNs). + +commit_participant(Coord, Tid, Bin, C0, DiscNs, _RamNs) -> + ?eval_debug_fun({?MODULE, commit_participant, pre}, [{tid, Tid}]), + case catch mnesia_schema:prepare_commit(Tid, C0, {part, Coord}) of + {Modified, C = #commit{}, DumperMode} -> + %% If we can not find any local unclear decision + %% we should presume abort at startup recovery + case lists:member(node(), DiscNs) of + false -> + ignore; + true -> + case Modified of + false -> mnesia_log:log(Bin); + true -> mnesia_log:log(C) + end + end, + ?eval_debug_fun({?MODULE, commit_participant, vote_yes}, + [{tid, Tid}]), + reply(Coord, {vote_yes, Tid, self()}), + + receive + {Tid, pre_commit} -> + D = C#commit.decision, + mnesia_recover:log_decision(D#decision{outcome = unclear}), + ?eval_debug_fun({?MODULE, commit_participant, pre_commit}, + [{tid, Tid}]), + Expect_schema_ack = C#commit.schema_ops /= [], + reply(Coord, {acc_pre_commit, Tid, self(), Expect_schema_ack}), + + %% Now we are vulnerable for failures, since + %% we cannot decide without asking others + receive + {Tid, committed} -> + mnesia_recover:log_decision(D#decision{outcome = committed}), + ?eval_debug_fun({?MODULE, commit_participant, log_commit}, + [{tid, Tid}]), + do_commit(Tid, C, DumperMode), + case Expect_schema_ack of + false -> ignore; + true -> reply(Coord, {schema_commit, Tid, self()}) + end, + ?eval_debug_fun({?MODULE, commit_participant, do_commit}, + [{tid, Tid}]); + + {Tid, {do_abort, _Reason}} -> + mnesia_recover:log_decision(D#decision{outcome = aborted}), + ?eval_debug_fun({?MODULE, commit_participant, log_abort}, + [{tid, Tid}]), + mnesia_schema:undo_prepare_commit(Tid, C0), + ?eval_debug_fun({?MODULE, commit_participant, undo_prepare}, + [{tid, Tid}]); + + {'EXIT', _, _} -> + mnesia_recover:log_decision(D#decision{outcome = aborted}), + ?eval_debug_fun({?MODULE, commit_participant, exit_log_abort}, + [{tid, Tid}]), + mnesia_schema:undo_prepare_commit(Tid, C0), + ?eval_debug_fun({?MODULE, commit_participant, exit_undo_prepare}, + [{tid, Tid}]); + + Msg -> + verbose("** ERROR ** commit_participant ~p, got unexpected msg: ~p~n", + [Tid, Msg]) + end; + {Tid, {do_abort, Reason}} -> + reply(Coord, {do_abort, Tid, self(), Reason}), + mnesia_schema:undo_prepare_commit(Tid, C0), + ?eval_debug_fun({?MODULE, commit_participant, pre_commit_undo_prepare}, + [{tid, Tid}]); + + {'EXIT', _, Reason} -> + reply(Coord, {do_abort, Tid, self(), {bad_commit,Reason}}), + mnesia_schema:undo_prepare_commit(Tid, C0), + ?eval_debug_fun({?MODULE, commit_participant, pre_commit_undo_prepare}, [{tid, Tid}]); + + Msg -> + reply(Coord, {do_abort, Tid, self(), {bad_commit,internal}}), + verbose("** ERROR ** commit_participant ~p, got unexpected msg: ~p~n", + [Tid, Msg]) + end; + + {'EXIT', Reason} -> + ?eval_debug_fun({?MODULE, commit_participant, vote_no}, + [{tid, Tid}]), + reply(Coord, {vote_no, Tid, Reason}), + mnesia_schema:undo_prepare_commit(Tid, C0) + end, + mnesia_locker:release_tid(Tid), + ?MODULE ! {delete_transaction, Tid}, + unlink(whereis(?MODULE)), + exit(normal). + +do_abort(Tid, Bin) when is_binary(Bin) -> + %% Possible optimization: + %% If we want we could pass arround a flag + %% that tells us whether the binary contains + %% schema ops or not. Only if the binary + %% contains schema ops there are meningful + %% unpack the binary and perform + %% mnesia_schema:undo_prepare_commit/1. + do_abort(Tid, binary_to_term(Bin)); +do_abort(Tid, Commit) -> + mnesia_schema:undo_prepare_commit(Tid, Commit), + Commit. + +do_dirty(Tid, Commit) when Commit#commit.schema_ops == [] -> + mnesia_log:log(Commit), + do_commit(Tid, Commit). + +%% do_commit(Tid, CommitRecord) +do_commit(Tid, Bin) when is_binary(Bin) -> + do_commit(Tid, binary_to_term(Bin)); +do_commit(Tid, C) -> + do_commit(Tid, C, optional). +do_commit(Tid, Bin, DumperMode) when is_binary(Bin) -> + do_commit(Tid, binary_to_term(Bin), DumperMode); +do_commit(Tid, C, DumperMode) -> + mnesia_dumper:update(Tid, C#commit.schema_ops, DumperMode), + R = do_snmp(Tid, C#commit.snmp), + R2 = do_update(Tid, ram_copies, C#commit.ram_copies, R), + R3 = do_update(Tid, disc_copies, C#commit.disc_copies, R2), + do_update(Tid, disc_only_copies, C#commit.disc_only_copies, R3). + +%% Update the items +do_update(Tid, Storage, [Op | Ops], OldRes) -> + case catch do_update_op(Tid, Storage, Op) of + ok -> + do_update(Tid, Storage, Ops, OldRes); + {'EXIT', Reason} -> + %% This may only happen when we recently have + %% deleted our local replica, changed storage_type + %% or transformed table + %% BUGBUG: Updates may be lost if storage_type is changed. + %% Determine actual storage type and try again. + %% BUGBUG: Updates may be lost if table is transformed. + + verbose("do_update in ~w failed: ~p -> {'EXIT', ~p}~n", + [Tid, Op, Reason]), + do_update(Tid, Storage, Ops, OldRes); + NewRes -> + do_update(Tid, Storage, Ops, NewRes) + end; +do_update(_Tid, _Storage, [], Res) -> + Res. + +do_update_op(Tid, Storage, {{Tab, K}, Obj, write}) -> + commit_write(?catch_val({Tab, commit_work}), Tid, + Tab, K, Obj, undefined), + mnesia_lib:db_put(Storage, Tab, Obj); + +do_update_op(Tid, Storage, {{Tab, K}, Val, delete}) -> + commit_delete(?catch_val({Tab, commit_work}), Tid, Tab, K, Val, undefined), + mnesia_lib:db_erase(Storage, Tab, K); + +do_update_op(Tid, Storage, {{Tab, K}, {RecName, Incr}, update_counter}) -> + {NewObj, OldObjs} = + case catch mnesia_lib:db_update_counter(Storage, Tab, K, Incr) of + NewVal when is_integer(NewVal), NewVal >= 0 -> + {{RecName, K, NewVal}, [{RecName, K, NewVal - Incr}]}; + _ when Incr > 0 -> + New = {RecName, K, Incr}, + mnesia_lib:db_put(Storage, Tab, New), + {New, []}; + _ -> + Zero = {RecName, K, 0}, + mnesia_lib:db_put(Storage, Tab, Zero), + {Zero, []} + end, + commit_update(?catch_val({Tab, commit_work}), Tid, Tab, + K, NewObj, OldObjs), + element(3, NewObj); + +do_update_op(Tid, Storage, {{Tab, Key}, Obj, delete_object}) -> + commit_del_object(?catch_val({Tab, commit_work}), + Tid, Tab, Key, Obj, undefined), + mnesia_lib:db_match_erase(Storage, Tab, Obj); + +do_update_op(Tid, Storage, {{Tab, Key}, Obj, clear_table}) -> + commit_clear(?catch_val({Tab, commit_work}), Tid, Tab, Key, Obj), + mnesia_lib:db_match_erase(Storage, Tab, Obj). + +commit_write([], _, _, _, _, _) -> ok; +commit_write([{checkpoints, CpList}|R], Tid, Tab, K, Obj, Old) -> + mnesia_checkpoint:tm_retain(Tid, Tab, K, write, CpList), + commit_write(R, Tid, Tab, K, Obj, Old); +commit_write([H|R], Tid, Tab, K, Obj, Old) + when element(1, H) == subscribers -> + mnesia_subscr:report_table_event(H, Tab, Tid, Obj, write, Old), + commit_write(R, Tid, Tab, K, Obj, Old); +commit_write([H|R], Tid, Tab, K, Obj, Old) + when element(1, H) == index -> + mnesia_index:add_index(H, Tab, K, Obj, Old), + commit_write(R, Tid, Tab, K, Obj, Old). + +commit_update([], _, _, _, _, _) -> ok; +commit_update([{checkpoints, CpList}|R], Tid, Tab, K, Obj, _) -> + Old = mnesia_checkpoint:tm_retain(Tid, Tab, K, write, CpList), + commit_update(R, Tid, Tab, K, Obj, Old); +commit_update([H|R], Tid, Tab, K, Obj, Old) + when element(1, H) == subscribers -> + mnesia_subscr:report_table_event(H, Tab, Tid, Obj, write, Old), + commit_update(R, Tid, Tab, K, Obj, Old); +commit_update([H|R], Tid, Tab, K, Obj, Old) + when element(1, H) == index -> + mnesia_index:add_index(H, Tab, K, Obj, Old), + commit_update(R, Tid, Tab, K, Obj, Old). + +commit_delete([], _, _, _, _, _) -> ok; +commit_delete([{checkpoints, CpList}|R], Tid, Tab, K, Obj, _) -> + Old = mnesia_checkpoint:tm_retain(Tid, Tab, K, delete, CpList), + commit_delete(R, Tid, Tab, K, Obj, Old); +commit_delete([H|R], Tid, Tab, K, Obj, Old) + when element(1, H) == subscribers -> + mnesia_subscr:report_table_event(H, Tab, Tid, Obj, delete, Old), + commit_delete(R, Tid, Tab, K, Obj, Old); +commit_delete([H|R], Tid, Tab, K, Obj, Old) + when element(1, H) == index -> + mnesia_index:delete_index(H, Tab, K), + commit_delete(R, Tid, Tab, K, Obj, Old). + +commit_del_object([], _, _, _, _, _) -> ok; +commit_del_object([{checkpoints, CpList}|R], Tid, Tab, K, Obj, _) -> + Old = mnesia_checkpoint:tm_retain(Tid, Tab, K, delete_object, CpList), + commit_del_object(R, Tid, Tab, K, Obj, Old); +commit_del_object([H|R], Tid, Tab, K, Obj, Old) + when element(1, H) == subscribers -> + mnesia_subscr:report_table_event(H, Tab, Tid, Obj, delete_object, Old), + commit_del_object(R, Tid, Tab, K, Obj, Old); +commit_del_object([H|R], Tid, Tab, K, Obj, Old) + when element(1, H) == index -> + mnesia_index:del_object_index(H, Tab, K, Obj, Old), + commit_del_object(R, Tid, Tab, K, Obj, Old). + +commit_clear([], _, _, _, _) -> ok; +commit_clear([{checkpoints, CpList}|R], Tid, Tab, K, Obj) -> + mnesia_checkpoint:tm_retain(Tid, Tab, K, clear_table, CpList), + commit_clear(R, Tid, Tab, K, Obj); +commit_clear([H|R], Tid, Tab, K, Obj) + when element(1, H) == subscribers -> + mnesia_subscr:report_table_event(H, Tab, Tid, Obj, clear_table, undefined), + commit_clear(R, Tid, Tab, K, Obj); +commit_clear([H|R], Tid, Tab, K, Obj) + when element(1, H) == index -> + mnesia_index:clear_index(H, Tab, K, Obj), + commit_clear(R, Tid, Tab, K, Obj). + +do_snmp(_, []) -> ok; +do_snmp(Tid, [Head | Tail]) -> + case catch mnesia_snmp_hook:update(Head) of + {'EXIT', Reason} -> + %% This should only happen when we recently have + %% deleted our local replica or recently deattached + %% the snmp table + + verbose("do_snmp in ~w failed: ~p -> {'EXIT', ~p}~n", + [Tid, Head, Reason]); + ok -> + ignore + end, + do_snmp(Tid, Tail). + +commit_nodes([C | Tail], AccD, AccR) + when C#commit.disc_copies == [], + C#commit.disc_only_copies == [], + C#commit.schema_ops == [] -> + commit_nodes(Tail, AccD, [C#commit.node | AccR]); +commit_nodes([C | Tail], AccD, AccR) -> + commit_nodes(Tail, [C#commit.node | AccD], AccR); +commit_nodes([], AccD, AccR) -> + {AccD, AccR}. + +commit_decision(D, [C | Tail], AccD, AccR) -> + N = C#commit.node, + {D2, Tail2} = + case C#commit.schema_ops of + [] when C#commit.disc_copies == [], + C#commit.disc_only_copies == [] -> + commit_decision(D, Tail, AccD, [N | AccR]); + [] -> + commit_decision(D, Tail, [N | AccD], AccR); + Ops -> + case ram_only_ops(N, Ops) of + true -> + commit_decision(D, Tail, AccD, [N | AccR]); + false -> + commit_decision(D, Tail, [N | AccD], AccR) + end + end, + {D2, [C#commit{decision = D2} | Tail2]}; +commit_decision(D, [], AccD, AccR) -> + {D#decision{disc_nodes = AccD, ram_nodes = AccR}, []}. + +ram_only_ops(N, [{op, change_table_copy_type, N, _FromS, _ToS, Cs} | _Ops ]) -> + case lists:member({name, schema}, Cs) of + true -> + %% We always use disk if change type of the schema + false; + false -> + not lists:member(N, val({schema, disc_copies})) + end; + +ram_only_ops(N, _Ops) -> + not lists:member(N, val({schema, disc_copies})). + +%% Returns {WaitFor, Res} +sync_send_dirty(Tid, [Head | Tail], Tab, WaitFor) -> + Node = Head#commit.node, + if + Node == node() -> + {WF, _} = sync_send_dirty(Tid, Tail, Tab, WaitFor), + Res = do_dirty(Tid, Head), + {WF, Res}; + true -> + {?MODULE, Node} ! {self(), {sync_dirty, Tid, Head, Tab}}, + sync_send_dirty(Tid, Tail, Tab, [Node | WaitFor]) + end; +sync_send_dirty(_Tid, [], _Tab, WaitFor) -> + {WaitFor, {'EXIT', {aborted, {node_not_running, WaitFor}}}}. + +%% Returns {WaitFor, Res} +async_send_dirty(_Tid, _Nodes, Tab, nowhere) -> + {[], {'EXIT', {aborted, {no_exists, Tab}}}}; +async_send_dirty(Tid, Nodes, Tab, ReadNode) -> + async_send_dirty(Tid, Nodes, Tab, ReadNode, [], ok). + +async_send_dirty(Tid, [Head | Tail], Tab, ReadNode, WaitFor, Res) -> + Node = Head#commit.node, + if + ReadNode == Node, Node == node() -> + NewRes = do_dirty(Tid, Head), + async_send_dirty(Tid, Tail, Tab, ReadNode, WaitFor, NewRes); + ReadNode == Node -> + {?MODULE, Node} ! {self(), {sync_dirty, Tid, Head, Tab}}, + NewRes = {'EXIT', {aborted, {node_not_running, Node}}}, + async_send_dirty(Tid, Tail, Tab, ReadNode, [Node | WaitFor], NewRes); + true -> + {?MODULE, Node} ! {self(), {async_dirty, Tid, Head, Tab}}, + async_send_dirty(Tid, Tail, Tab, ReadNode, WaitFor, Res) + end; +async_send_dirty(_Tid, [], _Tab, _ReadNode, WaitFor, Res) -> + {WaitFor, Res}. + +rec_dirty([Node | Tail], Res) when Node /= node() -> + NewRes = get_dirty_reply(Node, Res), + rec_dirty(Tail, NewRes); +rec_dirty([], Res) -> + Res. + +get_dirty_reply(Node, Res) -> + receive + {?MODULE, Node, {'EXIT', Reason}} -> + {'EXIT', {aborted, {badarg, Reason}}}; + {?MODULE, Node, {dirty_res, ok}} -> + case Res of + {'EXIT', {aborted, {node_not_running, _Node}}} -> + ok; + _ -> + %% Prioritize bad results, but node_not_running + Res + end; + {?MODULE, Node, {dirty_res, Reply}} -> + Reply; + {mnesia_down, Node} -> + case get(mnesia_activity_state) of + {_, Tid, _Ts} when element(1,Tid) == tid -> + %% Hmm dirty called inside a transaction, to avoid + %% hanging transaction we need to restart the transaction + mnesia:abort({node_not_running, Node}); + _ -> + %% It's ok to ignore mnesia_down's since we will make + %% the replicas consistent again when Node is started + Res + end + after 1000 -> + case lists:member(Node, val({current, db_nodes})) of + true -> + get_dirty_reply(Node, Res); + false -> + Res + end + end. + +%% Assume that CommitRecord is no binary +%% Return {Res, Pids} +ask_commit(Protocol, Tid, CR, DiscNs, RamNs) -> + ask_commit(Protocol, Tid, CR, DiscNs, RamNs, [], no_local). + +ask_commit(Protocol, Tid, [Head | Tail], DiscNs, RamNs, WaitFor, Local) -> + Node = Head#commit.node, + if + Node == node() -> + ask_commit(Protocol, Tid, Tail, DiscNs, RamNs, WaitFor, Head); + true -> + Bin = opt_term_to_binary(Protocol, Head, DiscNs++RamNs), + Msg = {ask_commit, Protocol, Tid, Bin, DiscNs, RamNs}, + {?MODULE, Node} ! {self(), Msg}, + ask_commit(Protocol, Tid, Tail, DiscNs, RamNs, [Node | WaitFor], Local) + end; +ask_commit(_Protocol, _Tid, [], _DiscNs, _RamNs, WaitFor, Local) -> + {WaitFor, Local}. + +%% This used to test protocol conversion between mnesia-nodes +%% but it is really dependent on the emulator version on the +%% two nodes (if funs are sent which they are in transform table op). +%% to be safe we let erts do the translation (many times maybe and thus +%% slower but it works. +% opt_term_to_binary(asym_trans, Head, Nodes) -> +% opt_term_to_binary(Nodes, Head); +opt_term_to_binary(_Protocol, Head, _Nodes) -> + Head. + +rec_all([Node | Tail], Tid, Res, Pids) -> + receive + {?MODULE, Node, {vote_yes, Tid}} -> + rec_all(Tail, Tid, Res, Pids); + {?MODULE, Node, {vote_yes, Tid, Pid}} -> + rec_all(Tail, Tid, Res, [Pid | Pids]); + {?MODULE, Node, {vote_no, Tid, Reason}} -> + rec_all(Tail, Tid, {do_abort, Reason}, Pids); + {?MODULE, Node, {committed, Tid}} -> + rec_all(Tail, Tid, Res, Pids); + {?MODULE, Node, {aborted, Tid}} -> + rec_all(Tail, Tid, Res, Pids); + + {mnesia_down, Node} -> + %% Make sure that mnesia_tm knows it has died + %% it may have been restarted + Abort = {do_abort, {bad_commit, Node}}, + catch {?MODULE, Node} ! {Tid, Abort}, + rec_all(Tail, Tid, Abort, Pids) + end; +rec_all([], _Tid, Res, Pids) -> + {Res, Pids}. + +get_transactions() -> + {info, Participant, Coordinator} = req(info), + lists:map(fun({Tid, _Tabs}) -> + Status = tr_status(Tid,Participant), + {Tid#tid.counter, Tid#tid.pid, Status} + end,Coordinator). + +tr_status(Tid,Participant) -> + case lists:keymember(Tid, 1, Participant) of + true -> participant; + false -> coordinator + end. + +get_info(Timeout) -> + case whereis(?MODULE) of + undefined -> + {timeout, Timeout}; + Pid -> + Pid ! {self(), info}, + receive + {?MODULE, _, {info, Part, Coord}} -> + {info, Part, Coord} + after Timeout -> + {timeout, Timeout} + end + end. + +display_info(Stream, {timeout, T}) -> + io:format(Stream, "---> No info about coordinator and participant transactions, " + "timeout ~p <--- ~n", [T]); + +display_info(Stream, {info, Part, Coord}) -> + io:format(Stream, "---> Participant transactions <--- ~n", []), + lists:foreach(fun(P) -> pr_participant(Stream, P) end, Part), + io:format(Stream, "---> Coordinator transactions <---~n", []), + lists:foreach(fun({Tid, _Tabs}) -> pr_tid(Stream, Tid) end, Coord). + +pr_participant(Stream, P) -> + Commit0 = P#participant.commit, + Commit = + if + is_binary(Commit0) -> binary_to_term(Commit0); + true -> Commit0 + end, + pr_tid(Stream, P#participant.tid), + io:format(Stream, "with participant objects ~p~n", [Commit]). + + +pr_tid(Stream, Tid) -> + io:format(Stream, "Tid: ~p (owned by ~p) ~n", + [Tid#tid.counter, Tid#tid.pid]). + +info(Serial) -> + io:format( "Info about transaction with serial == ~p~n", [Serial]), + {info, Participant, Trs} = req(info), + search_pr_participant(Serial, Participant), + search_pr_coordinator(Serial, Trs). + + +search_pr_coordinator(_S, []) -> no; +search_pr_coordinator(S, [{Tid, _Ts}|Tail]) -> + case Tid#tid.counter of + S -> + io:format( "Tid is coordinator, owner == \n", []), + display_pid_info(Tid#tid.pid), + search_pr_coordinator(S, Tail); + _ -> + search_pr_coordinator(S, Tail) + end. + +search_pr_participant(_S, []) -> + false; +search_pr_participant(S, [ P | Tail]) -> + Tid = P#participant.tid, + Commit0 = P#participant.commit, + if + Tid#tid.counter == S -> + io:format( "Tid is participant to commit, owner == \n", []), + Pid = Tid#tid.pid, + display_pid_info(Pid), + io:format( "Tid wants to write objects \n",[]), + Commit = + if + is_binary(Commit0) -> binary_to_term(Commit0); + true -> Commit0 + end, + + io:format("~p~n", [Commit]), + search_pr_participant(S,Tail); %% !!!!! + true -> + search_pr_participant(S, Tail) + end. + +display_pid_info(Pid) -> + case rpc:pinfo(Pid) of + undefined -> + io:format( "Dead process \n"); + Info -> + Call = fetch(initial_call, Info), + Curr = case fetch(current_function, Info) of + {Mod,F,Args} when is_list(Args) -> + {Mod,F,length(Args)}; + Other -> + Other + end, + Reds = fetch(reductions, Info), + LM = length(fetch(messages, Info)), + pformat(io_lib:format("~p", [Pid]), + io_lib:format("~p", [Call]), + io_lib:format("~p", [Curr]), Reds, LM) + end. + +pformat(A1, A2, A3, A4, A5) -> + io:format( "~-12s ~-21s ~-21s ~9w ~4w~n", [A1,A2,A3,A4,A5]). + +fetch(Key, Info) -> + case lists:keysearch(Key, 1, Info) of + {value, {_, Val}} -> + Val; + _ -> + 0 + end. + + +%%%%%%%%%%%%%%%%%%%% +%%%%%%%%%%%%%%%%%%%%% reconfigure stuff comes here ...... +%%%%%%%%%%%%%%%%%%%%% + +reconfigure_coordinators(N, [{Tid, [Store | _]} | Coordinators]) -> + case mnesia_recover:outcome(Tid, unknown) of + committed -> + WaitingNodes = ?ets_lookup(Store, waiting_for_commit_ack), + case lists:keymember(N, 2, WaitingNodes) of + false -> + ignore; % avoid spurious mnesia_down messages + true -> + send_mnesia_down(Tid, Store, N) + end; + aborted -> + ignore; % avoid spurious mnesia_down messages + _ -> + %% Tell the coordinator about the mnesia_down + send_mnesia_down(Tid, Store, N) + end, + reconfigure_coordinators(N, Coordinators); +reconfigure_coordinators(_N, []) -> + ok. + +send_mnesia_down(Tid, Store, Node) -> + Msg = {mnesia_down, Node}, + send_to_pids([Tid#tid.pid | get_elements(friends,Store)], Msg). + +send_to_pids([Pid | Pids], Msg) when is_pid(Pid) -> + Pid ! Msg, + send_to_pids(Pids, Msg); +send_to_pids([_ | Pids], Msg) -> + send_to_pids(Pids, Msg); +send_to_pids([], _Msg) -> + ok. + +reconfigure_participants(N, [P | Tail]) -> + case lists:member(N, P#participant.disc_nodes) or + lists:member(N, P#participant.ram_nodes) of + false -> + %% Ignore, since we are not a participant + %% in the transaction. + reconfigure_participants(N, Tail); + + true -> + %% We are on a participant node, lets + %% check if the dead one was a + %% participant or a coordinator. + Tid = P#participant.tid, + if + node(Tid#tid.pid) /= N -> + %% Another participant node died. Ignore. + reconfigure_participants(N, Tail); + + true -> + %% The coordinator node has died and + %% we must determine the outcome of the + %% transaction and tell mnesia_tm on all + %% nodes (including the local node) about it + verbose("Coordinator ~p in transaction ~p died~n", + [Tid#tid.pid, Tid]), + + Nodes = P#participant.disc_nodes ++ + P#participant.ram_nodes, + AliveNodes = Nodes -- [N], + Protocol = P#participant.protocol, + tell_outcome(Tid, Protocol, N, AliveNodes, AliveNodes), + reconfigure_participants(N, Tail) + end + end; +reconfigure_participants(_, []) -> + []. + +%% We need to determine the outcome of the transaction and +%% tell mnesia_tm on all involved nodes (including the local node) +%% about the outcome. +tell_outcome(Tid, Protocol, Node, CheckNodes, TellNodes) -> + Outcome = mnesia_recover:what_happened(Tid, Protocol, CheckNodes), + case Outcome of + aborted -> + rpc:abcast(TellNodes, ?MODULE, {Tid,{do_abort, {mnesia_down, Node}}}); + committed -> + rpc:abcast(TellNodes, ?MODULE, {Tid, do_commit}) + end, + Outcome. + +do_stop(#state{coordinators = Coordinators}) -> + Msg = {mnesia_down, node()}, + lists:foreach(fun({Tid, _}) -> Tid#tid.pid ! Msg end, gb_trees:to_list(Coordinators)), + mnesia_checkpoint:stop(), + mnesia_log:stop(), + exit(shutdown). + +fixtable(Tab, Lock, Me) -> + case req({fixtable, [Tab,Lock,Me]}) of + error -> + exit({no_exists, Tab}); + Else -> + Else + end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% System upgrade + +system_continue(_Parent, _Debug, State) -> + doit_loop(State). + +system_terminate(_Reason, _Parent, _Debug, State) -> + do_stop(State). + +system_code_change(State=#state{coordinators=Cs0,participants=Ps0},_Module,_OldVsn,downgrade) -> + case is_tuple(Cs0) of + true -> + Cs = gb_trees:to_list(Cs0), + Ps = gb_trees:values(Ps0), + {ok, State#state{coordinators=Cs,participants=Ps}}; + false -> + {ok, State} + end; + +system_code_change(State=#state{coordinators=Cs0,participants=Ps0},_Module,_OldVsn,_Extra) -> + case is_list(Cs0) of + true -> + Cs = gb_trees:from_orddict(lists:sort(Cs0)), + Ps1 = [{P#participant.tid,P}|| P <- Ps0], + Ps = gb_trees:from_orddict(lists:sort(Ps1)), + {ok, State#state{coordinators=Cs,participants=Ps}}; + false -> + {ok, State} + end. diff --git a/lib/mnesia/vsn.mk b/lib/mnesia/vsn.mk new file mode 100644 index 0000000000..2de3658bf3 --- /dev/null +++ b/lib/mnesia/vsn.mk @@ -0,0 +1,15 @@ + +MNESIA_VSN = 4.4.12 + +TICKETS = OTP-8250 +#TICKETS_4.4.11 = OTP-8074 +#TICKETS_4.4.10 = OTP-7928 OTP-7968 OTP-8002 +#TICKETS_4.4.9 = OTP-7911 +#TICKETS_4.4.8 = OTP-7753 OTP-7835 +#TICKETS_4.4.7 = OTP-7524 OTP-7625 +#TICKETS_4.4.6 = OTP-7585 +#TICKETS_4.4.5 = OTP-7466 +#TICKETS_4.4.4 = OTP-7419 +#TICKETS_4.4.3 = OTP-7340 OTP-7378 OTP-7383 +#TICKETS_4.4.2 = OTP-7205 OTP-7208 +#TICKETS_4.4.1 = OTP-7170 |